src/libcore/num/flt2dec/strategy/grisu.rs

   1 // Copyright 2015 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 /*!
  12 Rust adaptation of Grisu3 algorithm described in [1]. It uses about
  13 1KB of precomputed table, and in turn, it's very quick for most inputs.
  14
  15 [1] Florian Loitsch. 2010. Printing floating-point numbers quickly and
  16     accurately with integers. SIGPLAN Not. 45, 6 (June 2010), 233-243.
  17 */
  18
  19 use prelude::v1::*;
  20
  21 use num::flt2dec::{Decoded, MAX_SIG_DIGITS, round_up};
  22
  23 /// A custom 64-bit floating point type, representing `f * 2^e`.
  24 #[derive(Copy, Clone, Debug)]
  25 #[doc(hidden)]
  26 pub struct Fp {
  27     /// The integer mantissa.
  28     pub f: u64,
  29     /// The exponent in base 2.
  30     pub e: i16,
  31 }
  32
  33 impl Fp {
  34     /// Returns a correctly rounded product of itself and `other`.
  35     pub fn mul(&self, other: &Fp) -> Fp {
  36         const MASK: u64 = 0xffffffff;
  37         let a = self.f >> 32;
  38         let b = self.f & MASK;
  39         let c = other.f >> 32;
  40         let d = other.f & MASK;
  41         let ac = a * c;
  42         let bc = b * c;
  43         let ad = a * d;
  44         let bd = b * d;
  45         let tmp = (bd >> 32) + (ad & MASK) + (bc & MASK) + (1 << 31) /* round */;
  46         let f = ac + (ad >> 32) + (bc >> 32) + (tmp >> 32);
  47         let e = self.e + other.e + 64;
  48         Fp { f: f, e: e }
  49     }
  50
  51     /// Normalizes itself so that the resulting mantissa is at least `2^63`.
  52     pub fn normalize(&self) -> Fp {
  53         let mut f = self.f;
  54         let mut e = self.e;
  55         if f >> (64 - 32) == 0 { f <<= 32; e -= 32; }
  56         if f >> (64 - 16) == 0 { f <<= 16; e -= 16; }
  57         if f >> (64 -  8) == 0 { f <<=  8; e -=  8; }
  58         if f >> (64 -  4) == 0 { f <<=  4; e -=  4; }
  59         if f >> (64 -  2) == 0 { f <<=  2; e -=  2; }
  60         if f >> (64 -  1) == 0 { f <<=  1; e -=  1; }
  61         debug_assert!(f >= (1 >> 63));
  62         Fp { f: f, e: e }
  63     }
  64
  65     /// Normalizes itself to have the shared exponent.
  66     /// It can only decrease the exponent (and thus increase the mantissa).
  67     pub fn normalize_to(&self, e: i16) -> Fp {
  68         let edelta = self.e - e;
  69         assert!(edelta >= 0);
  70         let edelta = edelta as usize;
  71         assert_eq!(self.f << edelta >> edelta, self.f);
  72         Fp { f: self.f << edelta, e: e }
  73     }
  74 }
  75
  76 // see the comments in `format_shortest_opt` for the rationale.
  77 #[doc(hidden)] pub const ALPHA: i16 = -60;
  78 #[doc(hidden)] pub const GAMMA: i16 = -32;
  79
  80 /*
  81 # the following Python code generates this table:
  82 for i in xrange(-308, 333, 8):
  83     if i >= 0: f = 10**i; e = 0
  84     else: f = 2**(80-4*i) // 10**-i; e = 4 * i - 80
  85     l = f.bit_length()
  86     f = ((f << 64 >> (l-1)) + 1) >> 1; e += l - 64
  87     print '    (%#018x, %5d, %4d),' % (f, e, i)
  88 */
  89
  90 #[doc(hidden)]
  91 pub static CACHED_POW10: [(u64, i16, i16); 81] = [ // (f, e, k)
  92     (0xe61acf033d1a45df, -1087, -308),
  93     (0xab70fe17c79ac6ca, -1060, -300),
  94     (0xff77b1fcbebcdc4f, -1034, -292),
  95     (0xbe5691ef416bd60c, -1007, -284),
  96     (0x8dd01fad907ffc3c,  -980, -276),
  97     (0xd3515c2831559a83,  -954, -268),
  98     (0x9d71ac8fada6c9b5,  -927, -260),
  99     (0xea9c227723ee8bcb,  -901, -252),
 100     (0xaecc49914078536d,  -874, -244),
 101     (0x823c12795db6ce57,  -847, -236),
 102     (0xc21094364dfb5637,  -821, -228),
 103     (0x9096ea6f3848984f,  -794, -220),
 104     (0xd77485cb25823ac7,  -768, -212),
 105     (0xa086cfcd97bf97f4,  -741, -204),
 106     (0xef340a98172aace5,  -715, -196),
 107     (0xb23867fb2a35b28e,  -688, -188),
 108     (0x84c8d4dfd2c63f3b,  -661, -180),
 109     (0xc5dd44271ad3cdba,  -635, -172),
 110     (0x936b9fcebb25c996,  -608, -164),
 111     (0xdbac6c247d62a584,  -582, -156),
 112     (0xa3ab66580d5fdaf6,  -555, -148),
 113     (0xf3e2f893dec3f126,  -529, -140),
 114     (0xb5b5ada8aaff80b8,  -502, -132),
 115     (0x87625f056c7c4a8b,  -475, -124),
 116     (0xc9bcff6034c13053,  -449, -116),
 117     (0x964e858c91ba2655,  -422, -108),
 118     (0xdff9772470297ebd,  -396, -100),
 119     (0xa6dfbd9fb8e5b88f,  -369,  -92),
 120     (0xf8a95fcf88747d94,  -343,  -84),
 121     (0xb94470938fa89bcf,  -316,  -76),
 122     (0x8a08f0f8bf0f156b,  -289,  -68),
 123     (0xcdb02555653131b6,  -263,  -60),
 124     (0x993fe2c6d07b7fac,  -236,  -52),
 125     (0xe45c10c42a2b3b06,  -210,  -44),
 126     (0xaa242499697392d3,  -183,  -36),
 127     (0xfd87b5f28300ca0e,  -157,  -28),
 128     (0xbce5086492111aeb,  -130,  -20),
 129     (0x8cbccc096f5088cc,  -103,  -12),
 130     (0xd1b71758e219652c,   -77,   -4),
 131     (0x9c40000000000000,   -50,    4),
 132     (0xe8d4a51000000000,   -24,   12),
 133     (0xad78ebc5ac620000,     3,   20),
 134     (0x813f3978f8940984,    30,   28),
 135     (0xc097ce7bc90715b3,    56,   36),
 136     (0x8f7e32ce7bea5c70,    83,   44),
 137     (0xd5d238a4abe98068,   109,   52),
 138     (0x9f4f2726179a2245,   136,   60),
 139     (0xed63a231d4c4fb27,   162,   68),
 140     (0xb0de65388cc8ada8,   189,   76),
 141     (0x83c7088e1aab65db,   216,   84),
 142     (0xc45d1df942711d9a,   242,   92),
 143     (0x924d692ca61be758,   269,  100),
 144     (0xda01ee641a708dea,   295,  108),
 145     (0xa26da3999aef774a,   322,  116),
 146     (0xf209787bb47d6b85,   348,  124),
 147     (0xb454e4a179dd1877,   375,  132),
 148     (0x865b86925b9bc5c2,   402,  140),
 149     (0xc83553c5c8965d3d,   428,  148),
 150     (0x952ab45cfa97a0b3,   455,  156),
 151     (0xde469fbd99a05fe3,   481,  164),
 152     (0xa59bc234db398c25,   508,  172),
 153     (0xf6c69a72a3989f5c,   534,  180),
 154     (0xb7dcbf5354e9bece,   561,  188),
 155     (0x88fcf317f22241e2,   588,  196),
 156     (0xcc20ce9bd35c78a5,   614,  204),
 157     (0x98165af37b2153df,   641,  212),
 158     (0xe2a0b5dc971f303a,   667,  220),
 159     (0xa8d9d1535ce3b396,   694,  228),
 160     (0xfb9b7cd9a4a7443c,   720,  236),
 161     (0xbb764c4ca7a44410,   747,  244),
 162     (0x8bab8eefb6409c1a,   774,  252),
 163     (0xd01fef10a657842c,   800,  260),
 164     (0x9b10a4e5e9913129,   827,  268),
 165     (0xe7109bfba19c0c9d,   853,  276),
 166     (0xac2820d9623bf429,   880,  284),
 167     (0x80444b5e7aa7cf85,   907,  292),
 168     (0xbf21e44003acdd2d,   933,  300),
 169     (0x8e679c2f5e44ff8f,   960,  308),
 170     (0xd433179d9c8cb841,   986,  316),
 171     (0x9e19db92b4e31ba9,  1013,  324),
 172     (0xeb96bf6ebadf77d9,  1039,  332),
 173 ];
 174
 175 #[doc(hidden)] pub const CACHED_POW10_FIRST_E: i16 = -1087;
 176 #[doc(hidden)] pub const CACHED_POW10_LAST_E: i16 = 1039;
 177
 178 #[doc(hidden)]
 179 pub fn cached_power(alpha: i16, gamma: i16) -> (i16, Fp) {
 180     let offset = CACHED_POW10_FIRST_E as i32;
 181     let range = (CACHED_POW10.len() as i32) - 1;
 182     let domain = (CACHED_POW10_LAST_E - CACHED_POW10_FIRST_E) as i32;
 183     let idx = ((gamma as i32) - offset) * range / domain;
 184     let (f, e, k) = CACHED_POW10[idx as usize];
 185     debug_assert!(alpha <= e && e <= gamma);
 186     (k, Fp { f: f, e: e })
 187 }
 188
 189 /// Given `x > 0`, returns `(k, 10^k)` such that `10^k <= x < 10^(k+1)`.
 190 #[doc(hidden)]
 191 pub fn max_pow10_no_more_than(x: u32) -> (u8, u32) {
 192     debug_assert!(x > 0);
 193
 194     const X9: u32 = 10_0000_0000;
 195     const X8: u32 =  1_0000_0000;
 196     const X7: u32 =    1000_0000;
 197     const X6: u32 =     100_0000;
 198     const X5: u32 =      10_0000;
 199     const X4: u32 =       1_0000;
 200     const X3: u32 =         1000;
 201     const X2: u32 =          100;
 202     const X1: u32 =           10;
 203
 204     if x < X4 {
 205         if x < X2 { if x < X1 {(0,  1)} else {(1, X1)} }
 206         else      { if x < X3 {(2, X2)} else {(3, X3)} }
 207     } else {
 208         if x < X6      { if x < X5 {(4, X4)} else {(5, X5)} }
 209         else if x < X8 { if x < X7 {(6, X6)} else {(7, X7)} }
 210         else           { if x < X9 {(8, X8)} else {(9, X9)} }
 211     }
 212 }
 213
 214 /// The shortest mode implementation for Grisu.
 215 ///
 216 /// It returns `None` when it would return an inexact representation otherwise.
 217 pub fn format_shortest_opt(d: &Decoded,
 218                            buf: &mut [u8]) -> Option<(/*#digits*/ usize, /*exp*/ i16)> {
 219     assert!(d.mant > 0);
 220     assert!(d.minus > 0);
 221     assert!(d.plus > 0);
 222     assert!(d.mant.checked_add(d.plus).is_some());
 223     assert!(d.mant.checked_sub(d.minus).is_some());
 224     assert!(buf.len() >= MAX_SIG_DIGITS);
 225     assert!(d.mant + d.plus < (1 << 61)); // we need at least three bits of additional precision
 226
 227     // start with the normalized values with the shared exponent
 228     let plus = Fp { f: d.mant + d.plus, e: d.exp }.normalize();
 229     let minus = Fp { f: d.mant - d.minus, e: d.exp }.normalize_to(plus.e);
 230     let v = Fp { f: d.mant, e: d.exp }.normalize_to(plus.e);
 231
 232     // find any `cached = 10^minusk` such that `ALPHA <= minusk + plus.e + 64 <= GAMMA`.
 233     // since `plus` is normalized, this means `2^(62 + ALPHA) <= plus * cached < 2^(64 + GAMMA)`;
 234     // given our choices of `ALPHA` and `GAMMA`, this puts `plus * cached` into `[4, 2^32)`.
 235     //
 236     // it is obviously desirable to maximize `GAMMA - ALPHA`,
 237     // so that we don't need many cached powers of 10, but there are some considerations:
 238     //
 239     // 1. we want to keep `floor(plus * cached)` within `u32` since it needs a costly division.
 240     //    (this is not really avoidable, remainder is required for accuracy estimation.)
 241     // 2. the remainder of `floor(plus * cached)` repeatedly gets multiplied by 10,
 242     //    and it should not overflow.
 243     //
 244     // the first gives `64 + GAMMA <= 32`, while the second gives `10 * 2^-ALPHA <= 2^64`;
 245     // -60 and -32 is the maximal range with this constraint, and V8 also uses them.
 246     let (minusk, cached) = cached_power(ALPHA - plus.e - 64, GAMMA - plus.e - 64);
 247
 248     // scale fps. this gives the maximal error of 1 ulp (proved from Theorem 5.1).
 249     let plus = plus.mul(&cached);
 250     let minus = minus.mul(&cached);
 251     let v = v.mul(&cached);
 252     debug_assert_eq!(plus.e, minus.e);
 253     debug_assert_eq!(plus.e, v.e);
 254
 255     //         +- actual range of minus
 256     //   | <---|---------------------- unsafe region --------------------------> |
 257     //   |     |                                                                 |
 258     //   |  |<--->|  | <--------------- safe region ---------------> |           |
 259     //   |  |     |  |                                               |           |
 260     //   |1 ulp|1 ulp|                 |1 ulp|1 ulp|                 |1 ulp|1 ulp|
 261     //   |<--->|<--->|                 |<--->|<--->|                 |<--->|<--->|
 262     //   |-----|-----|-------...-------|-----|-----|-------...-------|-----|-----|
 263     //   |   minus   |                 |     v     |                 |   plus    |
 264     // minus1     minus0           v - 1 ulp   v + 1 ulp           plus0       plus1
 265     //
 266     // above `minus`, `v` and `plus` are *quantized* approximations (error < 1 ulp).
 267     // as we don't know the error is positive or negative, we use two approximations spaced equally
 268     // and have the maximal error of 2 ulps.
 269     //
 270     // the "unsafe region" is a liberal interval which we initially generate.
 271     // the "safe region" is a conservative interval which we only accept.
 272     // we start with the correct repr within the unsafe region, and try to find the closest repr
 273     // to `v` which is also within the safe region. if we can't, we give up.
 274     let plus1 = plus.f + 1;
 275 //  let plus0 = plus.f - 1; // only for explanation
 276 //  let minus0 = minus.f + 1; // only for explanation
 277     let minus1 = minus.f - 1;
 278     let e = -plus.e as usize; // shared exponent
 279
 280     // divide `plus1` into integral and fractional parts.
 281     // integral parts are guaranteed to fit in u32, since cached power guarantees `plus < 2^32`
 282     // and normalized `plus.f` is always less than `2^64 - 2^4` due to the precision requirement.
 283     let plus1int = (plus1 >> e) as u32;
 284     let plus1frac = plus1 & ((1 << e) - 1);
 285
 286     // calculate the largest `10^max_kappa` no more than `plus1` (thus `plus1 < 10^(max_kappa+1)`).
 287     // this is an upper bound of `kappa` below.
 288     let (max_kappa, max_ten_kappa) = max_pow10_no_more_than(plus1int);
 289
 290     let mut i = 0;
 291     let exp = max_kappa as i16 - minusk + 1;
 292
 293     // Theorem 6.2: if `k` is the greatest integer s.t. `0 <= y mod 10^k <= y - x`,
 294     //              then `V = floor(y / 10^k) * 10^k` is in `[x, y]` and one of the shortest
 295     //              representations (with the minimal number of significant digits) in that range.
 296     //
 297     // find the digit length `kappa` between `(minus1, plus1)` as per Theorem 6.2.
 298     // Theorem 6.2 can be adopted to exclude `x` by requiring `y mod 10^k < y - x` instead.
 299     // (e.g. `x` = 32000, `y` = 32777; `kappa` = 2 since `y mod 10^3 = 777 < y - x = 777`.)
 300     // the algorithm relies on the later verification phase to exclude `y`.
 301     let delta1 = plus1 - minus1;
 302 //  let delta1int = (delta1 >> e) as usize; // only for explanation
 303     let delta1frac = delta1 & ((1 << e) - 1);
 304
 305     // render integral parts, while checking for the accuracy at each step.
 306     let mut kappa = max_kappa as i16;
 307     let mut ten_kappa = max_ten_kappa; // 10^kappa
 308     let mut remainder = plus1int; // digits yet to be rendered
 309     loop { // we always have at least one digit to render, as `plus1 >= 10^kappa`
 310         // invariants:
 311         // - `delta1int <= remainder < 10^(kappa+1)`
 312         // - `plus1int = d[0..n-1] * 10^(kappa+1) + remainder`
 313         //   (it follows that `remainder = plus1int % 10^(kappa+1)`)
 314
 315         // divide `remainder` by `10^kappa`. both are scaled by `2^-e`.
 316         let q = remainder / ten_kappa;
 317         let r = remainder % ten_kappa;
 318         debug_assert!(q < 10);
 319         buf[i] = b'0' + q as u8;
 320         i += 1;
 321
 322         let plus1rem = ((r as u64) << e) + plus1frac; // == (plus1 % 10^kappa) * 2^e
 323         if plus1rem < delta1 {
 324             // `plus1 % 10^kappa < delta1 = plus1 - minus1`; we've found the correct `kappa`.
 325             let ten_kappa = (ten_kappa as u64) << e; // scale 10^kappa back to the shared exponent
 326             return round_and_weed(&mut buf[..i], exp, plus1rem, delta1, plus1 - v.f, ten_kappa, 1);
 327         }
 328
 329         // break the loop when we have rendered all integral digits.
 330         // the exact number of digits is `max_kappa + 1` as `plus1 < 10^(max_kappa+1)`.
 331         if i > max_kappa as usize {
 332             debug_assert_eq!(ten_kappa, 1);
 333             debug_assert_eq!(kappa, 0);
 334             break;
 335         }
 336
 337         // restore invariants
 338         kappa -= 1;
 339         ten_kappa /= 10;
 340         remainder = r;
 341     }
 342
 343     // render fractional parts, while checking for the accuracy at each step.
 344     // this time we rely on repeated multiplications, as division will lose the precision.
 345     let mut remainder = plus1frac;
 346     let mut threshold = delta1frac;
 347     let mut ulp = 1;
 348     loop { // the next digit should be significant as we've tested that before breaking out
 349         // invariants, where `m = max_kappa + 1` (# of digits in the integral part):
 350         // - `remainder < 2^e`
 351         // - `plus1frac * 10^(n-m) = d[m..n-1] * 2^e + remainder`
 352
 353         remainder *= 10; // won't overflow, `2^e * 10 < 2^64`
 354         threshold *= 10;
 355         ulp *= 10;
 356
 357         // divide `remainder` by `10^kappa`.
 358         // both are scaled by `2^e / 10^kappa`, so the latter is implicit here.
 359         let q = remainder >> e;
 360         let r = remainder & ((1 << e) - 1);
 361         debug_assert!(q < 10);
 362         buf[i] = b'0' + q as u8;
 363         i += 1;
 364
 365         if r < threshold {
 366             let ten_kappa = 1 << e; // implicit divisor
 367             return round_and_weed(&mut buf[..i], exp, r, threshold,
 368                                   (plus1 - v.f) * ulp, ten_kappa, ulp);
 369         }
 370
 371         // restore invariants
 372         kappa -= 1;
 373         remainder = r;
 374     }
 375
 376     // we've generated all significant digits of `plus1`, but not sure if it's the optimal one.
 377     // for example, if `minus1` is 3.14153... and `plus1` is 3.14158..., there are 5 different
 378     // shortest representation from 3.14154 to 3.14158 but we only have the greatest one.
 379     // we have to successively decrease the last digit and check if this is the optimal repr.
 380     // there are at most 9 candidates (..1 to ..9), so this is fairly quick. ("rounding" phase)
 381     //
 382     // the function checks if this "optimal" repr is actually within the ulp ranges,
 383     // and also, it is possible that the "second-to-optimal" repr can actually be optimal
 384     // due to the rounding error. in either cases this returns `None`. ("weeding" phase)
 385     //
 386     // all arguments here are scaled by the common (but implicit) value `k`, so that:
 387     // - `remainder = (plus1 % 10^kappa) * k`
 388     // - `threshold = (plus1 - minus1) * k` (and also, `remainder < threshold`)
 389     // - `plus1v = (plus1 - v) * k` (and also, `threshold > plus1v` from prior invariants)
 390     // - `ten_kappa = 10^kappa * k`
 391     // - `ulp = 2^-e * k`
 392     fn round_and_weed(buf: &mut [u8], exp: i16, remainder: u64, threshold: u64, plus1v: u64,
 393                       ten_kappa: u64, ulp: u64) -> Option<(usize, i16)> {
 394         assert!(!buf.is_empty());
 395
 396         // produce two approximations to `v` (actually `plus1 - v`) within 1.5 ulps.
 397         // the resulting representation should be the closest representation to both.
 398         //
 399         // here `plus1 - v` is used since calculations are done with respect to `plus1`
 400         // in order to avoid overflow/underflow (hence the seemingly swapped names).
 401         let plus1v_down = plus1v + ulp; // plus1 - (v - 1 ulp)
 402         let plus1v_up = plus1v - ulp; // plus1 - (v + 1 ulp)
 403
 404         // decrease the last digit and stop at the closest representation to `v + 1 ulp`.
 405         let mut plus1w = remainder; // plus1w(n) = plus1 - w(n)
 406         {
 407             let last = buf.last_mut().unwrap();
 408
 409             // we work with the approximated digits `w(n)`, which is initially equal to `plus1 -
 410             // plus1 % 10^kappa`. after running the loop body `n` times, `w(n) = plus1 -
 411             // plus1 % 10^kappa - n * 10^kappa`. we set `plus1w(n) = plus1 - w(n) =
 412             // plus1 % 10^kappa + n * 10^kappa` (thus `remainder = plus1w(0)`) to simplify checks.
 413             // note that `plus1w(n)` is always increasing.
 414             //
 415             // we have three conditions to terminate. any of them will make the loop unable to
 416             // proceed, but we then have at least one valid representation known to be closest to
 417             // `v + 1 ulp` anyway. we will denote them as TC1 through TC3 for brevity.
 418             //
 419             // TC1: `w(n) <= v + 1 ulp`, i.e. this is the last repr that can be the closest one.
 420             // this is equivalent to `plus1 - w(n) = plus1w(n) >= plus1 - (v + 1 ulp) = plus1v_up`.
 421             // combined with TC2 (which checks if `w(n+1)` is valid), this prevents the possible
 422             // overflow on the calculation of `plus1w(n)`.
 423             //
 424             // TC2: `w(n+1) < minus1`, i.e. the next repr definitely does not round to `v`.
 425             // this is equivalent to `plus1 - w(n) + 10^kappa = plus1w(n) + 10^kappa >
 426             // plus1 - minus1 = threshold`. the left hand side can overflow, but we know
 427             // `threshold > plus1v`, so if TC1 is false, `threshold - plus1w(n) >
 428             // threshold - (plus1v - 1 ulp) > 1 ulp` and we can safely test if
 429             // `threshold - plus1w(n) < 10^kappa` instead.
 430             //
 431             // TC3: `abs(w(n) - (v + 1 ulp)) <= abs(w(n+1) - (v + 1 ulp))`, i.e. the next repr is
 432             // no closer to `v + 1 ulp` than the current repr. given `z(n) = plus1v_up - plus1w(n)`,
 433             // this becomes `abs(z(n)) <= abs(z(n+1))`. again assuming that TC1 is false, we have
 434             // `z(n) > 0`. we have two cases to consider:
 435             //
 436             // - when `z(n+1) >= 0`: TC3 becomes `z(n) <= z(n+1)`. as `plus1w(n)` is increasing,
 437             //   `z(n)` should be decreasing and this is clearly false.
 438             // - when `z(n+1) < 0`:
 439             //   - TC3a: the precondition is `plus1v_up < plus1w(n) + 10^kappa`. assuming TC2 is
 440             //     false, `threshold >= plus1w(n) + 10^kappa` so it cannot overflow.
 441             //   - TC3b: TC3 becomes `z(n) <= -z(n+1)`, i.e. `plus1v_up - plus1w(n) >=
 442             //     plus1w(n+1) - plus1v_up = plus1w(n) + 10^kappa - plus1v_up`. the negated TC1
 443             //     gives `plus1v_up > plus1w(n)`, so it cannot overflow or underflow when
 444             //     combined with TC3a.
 445             //
 446             // consequently, we should stop when `TC1 || TC2 || (TC3a && TC3b)`. the following is
 447             // equal to its inverse, `!TC1 && !TC2 && (!TC3a || !TC3b)`.
 448             while plus1w < plus1v_up &&
 449                   threshold - plus1w >= ten_kappa &&
 450                   (plus1w + ten_kappa < plus1v_up ||
 451                    plus1v_up - plus1w >= plus1w + ten_kappa - plus1v_up) {
 452                 *last -= 1;
 453                 debug_assert!(*last > b'0'); // the shortest repr cannot end with `0`
 454                 plus1w += ten_kappa;
 455             }
 456         }
 457
 458         // check if this representation is also the closest representation to `v - 1 ulp`.
 459         //
 460         // this is simply same to the terminating conditions for `v + 1 ulp`, with all `plus1v_up`
 461         // replaced by `plus1v_down` instead. overflow analysis equally holds.
 462         if plus1w < plus1v_down &&
 463            threshold - plus1w >= ten_kappa &&
 464            (plus1w + ten_kappa < plus1v_down ||
 465             plus1v_down - plus1w >= plus1w + ten_kappa - plus1v_down) {
 466             return None;
 467         }
 468
 469         // now we have the closest representation to `v` between `plus1` and `minus1`.
 470         // this is too liberal, though, so we reject any `w(n)` not between `plus0` and `minus0`,
 471         // i.e. `plus1 - plus1w(n) <= minus0` or `plus1 - plus1w(n) >= plus0`. we utilize the facts
 472         // that `threshold = plus1 - minus1` and `plus1 - plus0 = minus0 - minus1 = 2 ulp`.
 473         if 2 * ulp <= plus1w && plus1w <= threshold - 4 * ulp {
 474             Some((buf.len(), exp))
 475         } else {
 476             None
 477         }
 478     }
 479 }
 480
 481 /// The shortest mode implementation for Grisu with Dragon fallback.
 482 ///
 483 /// This should be used for most cases.
 484 pub fn format_shortest(d: &Decoded, buf: &mut [u8]) -> (/*#digits*/ usize, /*exp*/ i16) {
 485     use num::flt2dec::strategy::dragon::format_shortest as fallback;
 486     match format_shortest_opt(d, buf) {
 487         Some(ret) => ret,
 488         None => fallback(d, buf),
 489     }
 490 }
 491
 492 /// The exact and fixed mode implementation for Grisu.
 493 ///
 494 /// It returns `None` when it would return an inexact representation otherwise.
 495 pub fn format_exact_opt(d: &Decoded, buf: &mut [u8], limit: i16)
 496                                 -> Option<(/*#digits*/ usize, /*exp*/ i16)> {
 497     assert!(d.mant > 0);
 498     assert!(d.mant < (1 << 61)); // we need at least three bits of additional precision
 499     assert!(!buf.is_empty());
 500
 501     // normalize and scale `v`.
 502     let v = Fp { f: d.mant, e: d.exp }.normalize();
 503     let (minusk, cached) = cached_power(ALPHA - v.e - 64, GAMMA - v.e - 64);
 504     let v = v.mul(&cached);
 505
 506     // divide `v` into integral and fractional parts.
 507     let e = -v.e as usize;
 508     let vint = (v.f >> e) as u32;
 509     let vfrac = v.f & ((1 << e) - 1);
 510
 511     // both old `v` and new `v` (scaled by `10^-k`) has an error of < 1 ulp (Theorem 5.1).
 512     // as we don't know the error is positive or negative, we use two approximations
 513     // spaced equally and have the maximal error of 2 ulps (same to the shortest case).
 514     //
 515     // the goal is to find the exactly rounded series of digits that are common to
 516     // both `v - 1 ulp` and `v + 1 ulp`, so that we are maximally confident.
 517     // if this is not possible, we don't know which one is the correct output for `v`,
 518     // so we give up and fall back.
 519     //
 520     // `err` is defined as `1 ulp * 2^e` here (same to the ulp in `vfrac`),
 521     // and we will scale it whenever `v` gets scaled.
 522     let mut err = 1;
 523
 524     // calculate the largest `10^max_kappa` no more than `v` (thus `v < 10^(max_kappa+1)`).
 525     // this is an upper bound of `kappa` below.
 526     let (max_kappa, max_ten_kappa) = max_pow10_no_more_than(vint);
 527
 528     let mut i = 0;
 529     let exp = max_kappa as i16 - minusk + 1;
 530
 531     // if we are working with the last-digit limitation, we need to shorten the buffer
 532     // before the actual rendering in order to avoid double rounding.
 533     // note that we have to enlarge the buffer again when rounding up happens!
 534     let len = if exp <= limit {
 535         // oops, we cannot even produce *one* digit.
 536         // this is possible when, say, we've got something like 9.5 and it's being rounded to 10.
 537         //
 538         // in principle we can immediately call `possibly_round` with an empty buffer,
 539         // but scaling `max_ten_kappa << e` by 10 can result in overflow.
 540         // thus we are being sloppy here and widen the error range by a factor of 10.
 541         // this will increase the false negative rate, but only very, *very* slightly;
 542         // it can only matter noticably when the mantissa is bigger than 60 bits.
 543         return possibly_round(buf, 0, exp, limit, v.f / 10, (max_ten_kappa as u64) << e, err << e);
 544     } else if ((exp as i32 - limit as i32) as usize) < buf.len() {
 545         (exp - limit) as usize
 546     } else {
 547         buf.len()
 548     };
 549     debug_assert!(len > 0);
 550
 551     // render integral parts.
 552     // the error is entirely fractional, so we don't need to check it in this part.
 553     let mut kappa = max_kappa as i16;
 554     let mut ten_kappa = max_ten_kappa; // 10^kappa
 555     let mut remainder = vint; // digits yet to be rendered
 556     loop { // we always have at least one digit to render
 557         // invariants:
 558         // - `remainder < 10^(kappa+1)`
 559         // - `vint = d[0..n-1] * 10^(kappa+1) + remainder`
 560         //   (it follows that `remainder = vint % 10^(kappa+1)`)
 561
 562         // divide `remainder` by `10^kappa`. both are scaled by `2^-e`.
 563         let q = remainder / ten_kappa;
 564         let r = remainder % ten_kappa;
 565         debug_assert!(q < 10);
 566         buf[i] = b'0' + q as u8;
 567         i += 1;
 568
 569         // is the buffer full? run the rounding pass with the remainder.
 570         if i == len {
 571             let vrem = ((r as u64) << e) + vfrac; // == (v % 10^kappa) * 2^e
 572             return possibly_round(buf, len, exp, limit, vrem, (ten_kappa as u64) << e, err << e);
 573         }
 574
 575         // break the loop when we have rendered all integral digits.
 576         // the exact number of digits is `max_kappa + 1` as `plus1 < 10^(max_kappa+1)`.
 577         if i > max_kappa as usize {
 578             debug_assert_eq!(ten_kappa, 1);
 579             debug_assert_eq!(kappa, 0);
 580             break;
 581         }
 582
 583         // restore invariants
 584         kappa -= 1;
 585         ten_kappa /= 10;
 586         remainder = r;
 587     }
 588
 589     // render fractional parts.
 590     //
 591     // in principle we can continue to the last available digit and check for the accuracy.
 592     // unfortunately we are working with the finite-sized integers, so we need some criterion
 593     // to detect the overflow. V8 uses `remainder > err`, which becomes false when
 594     // the first `i` significant digits of `v - 1 ulp` and `v` differ. however this rejects
 595     // too many otherwise valid input.
 596     //
 597     // since the later phase has a correct overflow detection, we instead use tighter criterion:
 598     // we continue til `err` exceeds `10^kappa / 2`, so that the range between `v - 1 ulp` and
 599     // `v + 1 ulp` definitely contains two or more rounded representations. this is same to
 600     // the first two comparisons from `possibly_round`, for the reference.
 601     let mut remainder = vfrac;
 602     let maxerr = 1 << (e - 1);
 603     while err < maxerr {
 604         // invariants, where `m = max_kappa + 1` (# of digits in the integral part):
 605         // - `remainder < 2^e`
 606         // - `vfrac * 10^(n-m) = d[m..n-1] * 2^e + remainder`
 607         // - `err = 10^(n-m)`
 608
 609         remainder *= 10; // won't overflow, `2^e * 10 < 2^64`
 610         err *= 10; // won't overflow, `err * 10 < 2^e * 5 < 2^64`
 611
 612         // divide `remainder` by `10^kappa`.
 613         // both are scaled by `2^e / 10^kappa`, so the latter is implicit here.
 614         let q = remainder >> e;
 615         let r = remainder & ((1 << e) - 1);
 616         debug_assert!(q < 10);
 617         buf[i] = b'0' + q as u8;
 618         i += 1;
 619
 620         // is the buffer full? run the rounding pass with the remainder.
 621         if i == len {
 622             return possibly_round(buf, len, exp, limit, r, 1 << e, err);
 623         }
 624
 625         // restore invariants
 626         remainder = r;
 627     }
 628
 629     // further calculation is useless (`possibly_round` definitely fails), so we give up.
 630     return None;
 631
 632     // we've generated all requested digits of `v`, which should be also same to corresponding
 633     // digits of `v - 1 ulp`. now we check if there is a unique representation shared by
 634     // both `v - 1 ulp` and `v + 1 ulp`; this can be either same to generated digits, or
 635     // to the rounded-up version of those digits. if the range contains multiple representations
 636     // of the same length, we cannot be sure and should return `None` instead.
 637     //
 638     // all arguments here are scaled by the common (but implicit) value `k`, so that:
 639     // - `remainder = (v % 10^kappa) * k`
 640     // - `ten_kappa = 10^kappa * k`
 641     // - `ulp = 2^-e * k`
 642     fn possibly_round(buf: &mut [u8], mut len: usize, mut exp: i16, limit: i16,
 643                       remainder: u64, ten_kappa: u64, ulp: u64) -> Option<(usize, i16)> {
 644         debug_assert!(remainder < ten_kappa);
 645
 646         //           10^kappa
 647         //    :   :   :<->:   :
 648         //    :   :   :   :   :
 649         //    :|1 ulp|1 ulp|  :
 650         //    :|<--->|<--->|  :
 651         // ----|-----|-----|----
 652         //     |     v     |
 653         // v - 1 ulp   v + 1 ulp
 654         //
 655         // (for the reference, the dotted line indicates the exact value for
 656         // possible representations in given number of digits.)
 657         //
 658         // error is too large that there are at least three possible representations
 659         // between `v - 1 ulp` and `v + 1 ulp`. we cannot determine which one is correct.
 660         if ulp >= ten_kappa { return None; }
 661
 662         //    10^kappa
 663         //   :<------->:
 664         //   :         :
 665         //   : |1 ulp|1 ulp|
 666         //   : |<--->|<--->|
 667         // ----|-----|-----|----
 668         //     |     v     |
 669         // v - 1 ulp   v + 1 ulp
 670         //
 671         // in fact, 1/2 ulp is enough to introduce two possible representations.
 672         // (remember that we need a unique representation for both `v - 1 ulp` and `v + 1 ulp`.)
 673         // this won't overflow, as `ulp < ten_kappa` from the first check.
 674         if ten_kappa - ulp <= ulp { return None; }
 675
 676         //     remainder
 677         //       :<->|                           :
 678         //       :   |                           :
 679         //       :<--------- 10^kappa ---------->:
 680         //     | :   |                           :
 681         //     |1 ulp|1 ulp|                     :
 682         //     |<--->|<--->|                     :
 683         // ----|-----|-----|------------------------
 684         //     |     v     |
 685         // v - 1 ulp   v + 1 ulp
 686         //
 687         // if `v + 1 ulp` is closer to the rounded-down representation (which is already in `buf`),
 688         // then we can safely return. note that `v - 1 ulp` *can* be less than the current
 689         // representation, but as `1 ulp < 10^kappa / 2`, this condition is enough:
 690         // the distance between `v - 1 ulp` and the current representation
 691         // cannot exceed `10^kappa / 2`.
 692         //
 693         // the condition equals to `remainder + ulp < 10^kappa / 2`.
 694         // since this can easily overflow, first check if `remainder < 10^kappa / 2`.
 695         // we've already verified that `ulp < 10^kappa / 2`, so as long as
 696         // `10^kappa` did not overflow after all, the second check is fine.
 697         if ten_kappa - remainder > remainder && ten_kappa - 2 * remainder >= 2 * ulp {
 698             return Some((len, exp));
 699         }
 700
 701         //   :<------- remainder ------>|   :
 702         //   :                          |   :
 703         //   :<--------- 10^kappa --------->:
 704         //   :                    |     |   : |
 705         //   :                    |1 ulp|1 ulp|
 706         //   :                    |<--->|<--->|
 707         // -----------------------|-----|-----|-----
 708         //                        |     v     |
 709         //                    v - 1 ulp   v + 1 ulp
 710         //
 711         // on the other hands, if `v - 1 ulp` is closer to the rounded-up representation,
 712         // we should round up and return. for the same reason we don't need to check `v + 1 ulp`.
 713         //
 714         // the condition equals to `remainder - ulp >= 10^kappa / 2`.
 715         // again we first check if `remainder > ulp` (note that this is not `remainder >= ulp`,
 716         // as `10^kappa` is never zero). also note that `remainder - ulp <= 10^kappa`,
 717         // so the second check does not overflow.
 718         if remainder > ulp && ten_kappa - (remainder - ulp) <= remainder - ulp {
 719             if let Some(c) = round_up(buf, len) {
 720                 // only add an additional digit when we've been requested the fixed precision.
 721                 // we also need to check that, if the original buffer was empty,
 722                 // the additional digit can only be added when `exp == limit` (edge case).
 723                 exp += 1;
 724                 if exp > limit && len < buf.len() {
 725                     buf[len] = c;
 726                     len += 1;
 727                 }
 728             }
 729             return Some((len, exp));
 730         }
 731
 732         // otherwise we are doomed (i.e. some values between `v - 1 ulp` and `v + 1 ulp` are
 733         // rounding down and others are rounding up) and give up.
 734         None
 735     }
 736 }
 737
 738 /// The exact and fixed mode implementation for Grisu with Dragon fallback.
 739 ///
 740 /// This should be used for most cases.
 741 pub fn format_exact(d: &Decoded, buf: &mut [u8], limit: i16) -> (/*#digits*/ usize, /*exp*/ i16) {
 742     use num::flt2dec::strategy::dragon::format_exact as fallback;
 743     match format_exact_opt(d, buf, limit) {
 744         Some(ret) => ret,
 745         None => fallback(d, buf, limit),
 746     }
 747 }