src/libcore/num/flt2dec/strategy/grisu.rs

   1 // Copyright 2015 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 /*!
  12 Rust adaptation of Grisu3 algorithm described in [1]. It uses about
  13 1KB of precomputed table, and in turn, it's very quick for most inputs.
  14
  15 [1] Florian Loitsch. 2010. Printing floating-point numbers quickly and
  16     accurately with integers. SIGPLAN Not. 45, 6 (June 2010), 233-243.
  17 */
  18
  19 use prelude::v1::*;
  20
  21 use num::diy_float::Fp;
  22 use num::flt2dec::{Decoded, MAX_SIG_DIGITS, round_up};
  23
  24
  25 // see the comments in `format_shortest_opt` for the rationale.
  26 #[doc(hidden)] pub const ALPHA: i16 = -60;
  27 #[doc(hidden)] pub const GAMMA: i16 = -32;
  28
  29 /*
  30 # the following Python code generates this table:
  31 for i in xrange(-308, 333, 8):
  32     if i >= 0: f = 10**i; e = 0
  33     else: f = 2**(80-4*i) // 10**-i; e = 4 * i - 80
  34     l = f.bit_length()
  35     f = ((f << 64 >> (l-1)) + 1) >> 1; e += l - 64
  36     print '    (%#018x, %5d, %4d),' % (f, e, i)
  37 */
  38
  39 #[doc(hidden)]
  40 pub static CACHED_POW10: [(u64, i16, i16); 81] = [ // (f, e, k)
  41     (0xe61acf033d1a45df, -1087, -308),
  42     (0xab70fe17c79ac6ca, -1060, -300),
  43     (0xff77b1fcbebcdc4f, -1034, -292),
  44     (0xbe5691ef416bd60c, -1007, -284),
  45     (0x8dd01fad907ffc3c,  -980, -276),
  46     (0xd3515c2831559a83,  -954, -268),
  47     (0x9d71ac8fada6c9b5,  -927, -260),
  48     (0xea9c227723ee8bcb,  -901, -252),
  49     (0xaecc49914078536d,  -874, -244),
  50     (0x823c12795db6ce57,  -847, -236),
  51     (0xc21094364dfb5637,  -821, -228),
  52     (0x9096ea6f3848984f,  -794, -220),
  53     (0xd77485cb25823ac7,  -768, -212),
  54     (0xa086cfcd97bf97f4,  -741, -204),
  55     (0xef340a98172aace5,  -715, -196),
  56     (0xb23867fb2a35b28e,  -688, -188),
  57     (0x84c8d4dfd2c63f3b,  -661, -180),
  58     (0xc5dd44271ad3cdba,  -635, -172),
  59     (0x936b9fcebb25c996,  -608, -164),
  60     (0xdbac6c247d62a584,  -582, -156),
  61     (0xa3ab66580d5fdaf6,  -555, -148),
  62     (0xf3e2f893dec3f126,  -529, -140),
  63     (0xb5b5ada8aaff80b8,  -502, -132),
  64     (0x87625f056c7c4a8b,  -475, -124),
  65     (0xc9bcff6034c13053,  -449, -116),
  66     (0x964e858c91ba2655,  -422, -108),
  67     (0xdff9772470297ebd,  -396, -100),
  68     (0xa6dfbd9fb8e5b88f,  -369,  -92),
  69     (0xf8a95fcf88747d94,  -343,  -84),
  70     (0xb94470938fa89bcf,  -316,  -76),
  71     (0x8a08f0f8bf0f156b,  -289,  -68),
  72     (0xcdb02555653131b6,  -263,  -60),
  73     (0x993fe2c6d07b7fac,  -236,  -52),
  74     (0xe45c10c42a2b3b06,  -210,  -44),
  75     (0xaa242499697392d3,  -183,  -36),
  76     (0xfd87b5f28300ca0e,  -157,  -28),
  77     (0xbce5086492111aeb,  -130,  -20),
  78     (0x8cbccc096f5088cc,  -103,  -12),
  79     (0xd1b71758e219652c,   -77,   -4),
  80     (0x9c40000000000000,   -50,    4),
  81     (0xe8d4a51000000000,   -24,   12),
  82     (0xad78ebc5ac620000,     3,   20),
  83     (0x813f3978f8940984,    30,   28),
  84     (0xc097ce7bc90715b3,    56,   36),
  85     (0x8f7e32ce7bea5c70,    83,   44),
  86     (0xd5d238a4abe98068,   109,   52),
  87     (0x9f4f2726179a2245,   136,   60),
  88     (0xed63a231d4c4fb27,   162,   68),
  89     (0xb0de65388cc8ada8,   189,   76),
  90     (0x83c7088e1aab65db,   216,   84),
  91     (0xc45d1df942711d9a,   242,   92),
  92     (0x924d692ca61be758,   269,  100),
  93     (0xda01ee641a708dea,   295,  108),
  94     (0xa26da3999aef774a,   322,  116),
  95     (0xf209787bb47d6b85,   348,  124),
  96     (0xb454e4a179dd1877,   375,  132),
  97     (0x865b86925b9bc5c2,   402,  140),
  98     (0xc83553c5c8965d3d,   428,  148),
  99     (0x952ab45cfa97a0b3,   455,  156),
 100     (0xde469fbd99a05fe3,   481,  164),
 101     (0xa59bc234db398c25,   508,  172),
 102     (0xf6c69a72a3989f5c,   534,  180),
 103     (0xb7dcbf5354e9bece,   561,  188),
 104     (0x88fcf317f22241e2,   588,  196),
 105     (0xcc20ce9bd35c78a5,   614,  204),
 106     (0x98165af37b2153df,   641,  212),
 107     (0xe2a0b5dc971f303a,   667,  220),
 108     (0xa8d9d1535ce3b396,   694,  228),
 109     (0xfb9b7cd9a4a7443c,   720,  236),
 110     (0xbb764c4ca7a44410,   747,  244),
 111     (0x8bab8eefb6409c1a,   774,  252),
 112     (0xd01fef10a657842c,   800,  260),
 113     (0x9b10a4e5e9913129,   827,  268),
 114     (0xe7109bfba19c0c9d,   853,  276),
 115     (0xac2820d9623bf429,   880,  284),
 116     (0x80444b5e7aa7cf85,   907,  292),
 117     (0xbf21e44003acdd2d,   933,  300),
 118     (0x8e679c2f5e44ff8f,   960,  308),
 119     (0xd433179d9c8cb841,   986,  316),
 120     (0x9e19db92b4e31ba9,  1013,  324),
 121     (0xeb96bf6ebadf77d9,  1039,  332),
 122 ];
 123
 124 #[doc(hidden)] pub const CACHED_POW10_FIRST_E: i16 = -1087;
 125 #[doc(hidden)] pub const CACHED_POW10_LAST_E: i16 = 1039;
 126
 127 #[doc(hidden)]
 128 pub fn cached_power(alpha: i16, gamma: i16) -> (i16, Fp) {
 129     let offset = CACHED_POW10_FIRST_E as i32;
 130     let range = (CACHED_POW10.len() as i32) - 1;
 131     let domain = (CACHED_POW10_LAST_E - CACHED_POW10_FIRST_E) as i32;
 132     let idx = ((gamma as i32) - offset) * range / domain;
 133     let (f, e, k) = CACHED_POW10[idx as usize];
 134     debug_assert!(alpha <= e && e <= gamma);
 135     (k, Fp { f: f, e: e })
 136 }
 137
 138 /// Given `x > 0`, returns `(k, 10^k)` such that `10^k <= x < 10^(k+1)`.
 139 #[doc(hidden)]
 140 pub fn max_pow10_no_more_than(x: u32) -> (u8, u32) {
 141     debug_assert!(x > 0);
 142
 143     const X9: u32 = 10_0000_0000;
 144     const X8: u32 =  1_0000_0000;
 145     const X7: u32 =    1000_0000;
 146     const X6: u32 =     100_0000;
 147     const X5: u32 =      10_0000;
 148     const X4: u32 =       1_0000;
 149     const X3: u32 =         1000;
 150     const X2: u32 =          100;
 151     const X1: u32 =           10;
 152
 153     if x < X4 {
 154         if x < X2 { if x < X1 {(0,  1)} else {(1, X1)} }
 155         else      { if x < X3 {(2, X2)} else {(3, X3)} }
 156     } else {
 157         if x < X6      { if x < X5 {(4, X4)} else {(5, X5)} }
 158         else if x < X8 { if x < X7 {(6, X6)} else {(7, X7)} }
 159         else           { if x < X9 {(8, X8)} else {(9, X9)} }
 160     }
 161 }
 162
 163 /// The shortest mode implementation for Grisu.
 164 ///
 165 /// It returns `None` when it would return an inexact representation otherwise.
 166 pub fn format_shortest_opt(d: &Decoded,
 167                            buf: &mut [u8]) -> Option<(/*#digits*/ usize, /*exp*/ i16)> {
 168     assert!(d.mant > 0);
 169     assert!(d.minus > 0);
 170     assert!(d.plus > 0);
 171     assert!(d.mant.checked_add(d.plus).is_some());
 172     assert!(d.mant.checked_sub(d.minus).is_some());
 173     assert!(buf.len() >= MAX_SIG_DIGITS);
 174     assert!(d.mant + d.plus < (1 << 61)); // we need at least three bits of additional precision
 175
 176     // start with the normalized values with the shared exponent
 177     let plus = Fp { f: d.mant + d.plus, e: d.exp }.normalize();
 178     let minus = Fp { f: d.mant - d.minus, e: d.exp }.normalize_to(plus.e);
 179     let v = Fp { f: d.mant, e: d.exp }.normalize_to(plus.e);
 180
 181     // find any `cached = 10^minusk` such that `ALPHA <= minusk + plus.e + 64 <= GAMMA`.
 182     // since `plus` is normalized, this means `2^(62 + ALPHA) <= plus * cached < 2^(64 + GAMMA)`;
 183     // given our choices of `ALPHA` and `GAMMA`, this puts `plus * cached` into `[4, 2^32)`.
 184     //
 185     // it is obviously desirable to maximize `GAMMA - ALPHA`,
 186     // so that we don't need many cached powers of 10, but there are some considerations:
 187     //
 188     // 1. we want to keep `floor(plus * cached)` within `u32` since it needs a costly division.
 189     //    (this is not really avoidable, remainder is required for accuracy estimation.)
 190     // 2. the remainder of `floor(plus * cached)` repeatedly gets multiplied by 10,
 191     //    and it should not overflow.
 192     //
 193     // the first gives `64 + GAMMA <= 32`, while the second gives `10 * 2^-ALPHA <= 2^64`;
 194     // -60 and -32 is the maximal range with this constraint, and V8 also uses them.
 195     let (minusk, cached) = cached_power(ALPHA - plus.e - 64, GAMMA - plus.e - 64);
 196
 197     // scale fps. this gives the maximal error of 1 ulp (proved from Theorem 5.1).
 198     let plus = plus.mul(&cached);
 199     let minus = minus.mul(&cached);
 200     let v = v.mul(&cached);
 201     debug_assert_eq!(plus.e, minus.e);
 202     debug_assert_eq!(plus.e, v.e);
 203
 204     //         +- actual range of minus
 205     //   | <---|---------------------- unsafe region --------------------------> |
 206     //   |     |                                                                 |
 207     //   |  |<--->|  | <--------------- safe region ---------------> |           |
 208     //   |  |     |  |                                               |           |
 209     //   |1 ulp|1 ulp|                 |1 ulp|1 ulp|                 |1 ulp|1 ulp|
 210     //   |<--->|<--->|                 |<--->|<--->|                 |<--->|<--->|
 211     //   |-----|-----|-------...-------|-----|-----|-------...-------|-----|-----|
 212     //   |   minus   |                 |     v     |                 |   plus    |
 213     // minus1     minus0           v - 1 ulp   v + 1 ulp           plus0       plus1
 214     //
 215     // above `minus`, `v` and `plus` are *quantized* approximations (error < 1 ulp).
 216     // as we don't know the error is positive or negative, we use two approximations spaced equally
 217     // and have the maximal error of 2 ulps.
 218     //
 219     // the "unsafe region" is a liberal interval which we initially generate.
 220     // the "safe region" is a conservative interval which we only accept.
 221     // we start with the correct repr within the unsafe region, and try to find the closest repr
 222     // to `v` which is also within the safe region. if we can't, we give up.
 223     let plus1 = plus.f + 1;
 224 //  let plus0 = plus.f - 1; // only for explanation
 225 //  let minus0 = minus.f + 1; // only for explanation
 226     let minus1 = minus.f - 1;
 227     let e = -plus.e as usize; // shared exponent
 228
 229     // divide `plus1` into integral and fractional parts.
 230     // integral parts are guaranteed to fit in u32, since cached power guarantees `plus < 2^32`
 231     // and normalized `plus.f` is always less than `2^64 - 2^4` due to the precision requirement.
 232     let plus1int = (plus1 >> e) as u32;
 233     let plus1frac = plus1 & ((1 << e) - 1);
 234
 235     // calculate the largest `10^max_kappa` no more than `plus1` (thus `plus1 < 10^(max_kappa+1)`).
 236     // this is an upper bound of `kappa` below.
 237     let (max_kappa, max_ten_kappa) = max_pow10_no_more_than(plus1int);
 238
 239     let mut i = 0;
 240     let exp = max_kappa as i16 - minusk + 1;
 241
 242     // Theorem 6.2: if `k` is the greatest integer s.t. `0 <= y mod 10^k <= y - x`,
 243     //              then `V = floor(y / 10^k) * 10^k` is in `[x, y]` and one of the shortest
 244     //              representations (with the minimal number of significant digits) in that range.
 245     //
 246     // find the digit length `kappa` between `(minus1, plus1)` as per Theorem 6.2.
 247     // Theorem 6.2 can be adopted to exclude `x` by requiring `y mod 10^k < y - x` instead.
 248     // (e.g. `x` = 32000, `y` = 32777; `kappa` = 2 since `y mod 10^3 = 777 < y - x = 777`.)
 249     // the algorithm relies on the later verification phase to exclude `y`.
 250     let delta1 = plus1 - minus1;
 251 //  let delta1int = (delta1 >> e) as usize; // only for explanation
 252     let delta1frac = delta1 & ((1 << e) - 1);
 253
 254     // render integral parts, while checking for the accuracy at each step.
 255     let mut kappa = max_kappa as i16;
 256     let mut ten_kappa = max_ten_kappa; // 10^kappa
 257     let mut remainder = plus1int; // digits yet to be rendered
 258     loop { // we always have at least one digit to render, as `plus1 >= 10^kappa`
 259         // invariants:
 260         // - `delta1int <= remainder < 10^(kappa+1)`
 261         // - `plus1int = d[0..n-1] * 10^(kappa+1) + remainder`
 262         //   (it follows that `remainder = plus1int % 10^(kappa+1)`)
 263
 264         // divide `remainder` by `10^kappa`. both are scaled by `2^-e`.
 265         let q = remainder / ten_kappa;
 266         let r = remainder % ten_kappa;
 267         debug_assert!(q < 10);
 268         buf[i] = b'0' + q as u8;
 269         i += 1;
 270
 271         let plus1rem = ((r as u64) << e) + plus1frac; // == (plus1 % 10^kappa) * 2^e
 272         if plus1rem < delta1 {
 273             // `plus1 % 10^kappa < delta1 = plus1 - minus1`; we've found the correct `kappa`.
 274             let ten_kappa = (ten_kappa as u64) << e; // scale 10^kappa back to the shared exponent
 275             return round_and_weed(&mut buf[..i], exp, plus1rem, delta1, plus1 - v.f, ten_kappa, 1);
 276         }
 277
 278         // break the loop when we have rendered all integral digits.
 279         // the exact number of digits is `max_kappa + 1` as `plus1 < 10^(max_kappa+1)`.
 280         if i > max_kappa as usize {
 281             debug_assert_eq!(ten_kappa, 1);
 282             debug_assert_eq!(kappa, 0);
 283             break;
 284         }
 285
 286         // restore invariants
 287         kappa -= 1;
 288         ten_kappa /= 10;
 289         remainder = r;
 290     }
 291
 292     // render fractional parts, while checking for the accuracy at each step.
 293     // this time we rely on repeated multiplications, as division will lose the precision.
 294     let mut remainder = plus1frac;
 295     let mut threshold = delta1frac;
 296     let mut ulp = 1;
 297     loop { // the next digit should be significant as we've tested that before breaking out
 298         // invariants, where `m = max_kappa + 1` (# of digits in the integral part):
 299         // - `remainder < 2^e`
 300         // - `plus1frac * 10^(n-m) = d[m..n-1] * 2^e + remainder`
 301
 302         remainder *= 10; // won't overflow, `2^e * 10 < 2^64`
 303         threshold *= 10;
 304         ulp *= 10;
 305
 306         // divide `remainder` by `10^kappa`.
 307         // both are scaled by `2^e / 10^kappa`, so the latter is implicit here.
 308         let q = remainder >> e;
 309         let r = remainder & ((1 << e) - 1);
 310         debug_assert!(q < 10);
 311         buf[i] = b'0' + q as u8;
 312         i += 1;
 313
 314         if r < threshold {
 315             let ten_kappa = 1 << e; // implicit divisor
 316             return round_and_weed(&mut buf[..i], exp, r, threshold,
 317                                   (plus1 - v.f) * ulp, ten_kappa, ulp);
 318         }
 319
 320         // restore invariants
 321         kappa -= 1;
 322         remainder = r;
 323     }
 324
 325     // we've generated all significant digits of `plus1`, but not sure if it's the optimal one.
 326     // for example, if `minus1` is 3.14153... and `plus1` is 3.14158..., there are 5 different
 327     // shortest representation from 3.14154 to 3.14158 but we only have the greatest one.
 328     // we have to successively decrease the last digit and check if this is the optimal repr.
 329     // there are at most 9 candidates (..1 to ..9), so this is fairly quick. ("rounding" phase)
 330     //
 331     // the function checks if this "optimal" repr is actually within the ulp ranges,
 332     // and also, it is possible that the "second-to-optimal" repr can actually be optimal
 333     // due to the rounding error. in either cases this returns `None`. ("weeding" phase)
 334     //
 335     // all arguments here are scaled by the common (but implicit) value `k`, so that:
 336     // - `remainder = (plus1 % 10^kappa) * k`
 337     // - `threshold = (plus1 - minus1) * k` (and also, `remainder < threshold`)
 338     // - `plus1v = (plus1 - v) * k` (and also, `threshold > plus1v` from prior invariants)
 339     // - `ten_kappa = 10^kappa * k`
 340     // - `ulp = 2^-e * k`
 341     fn round_and_weed(buf: &mut [u8], exp: i16, remainder: u64, threshold: u64, plus1v: u64,
 342                       ten_kappa: u64, ulp: u64) -> Option<(usize, i16)> {
 343         assert!(!buf.is_empty());
 344
 345         // produce two approximations to `v` (actually `plus1 - v`) within 1.5 ulps.
 346         // the resulting representation should be the closest representation to both.
 347         //
 348         // here `plus1 - v` is used since calculations are done with respect to `plus1`
 349         // in order to avoid overflow/underflow (hence the seemingly swapped names).
 350         let plus1v_down = plus1v + ulp; // plus1 - (v - 1 ulp)
 351         let plus1v_up = plus1v - ulp; // plus1 - (v + 1 ulp)
 352
 353         // decrease the last digit and stop at the closest representation to `v + 1 ulp`.
 354         let mut plus1w = remainder; // plus1w(n) = plus1 - w(n)
 355         {
 356             let last = buf.last_mut().unwrap();
 357
 358             // we work with the approximated digits `w(n)`, which is initially equal to `plus1 -
 359             // plus1 % 10^kappa`. after running the loop body `n` times, `w(n) = plus1 -
 360             // plus1 % 10^kappa - n * 10^kappa`. we set `plus1w(n) = plus1 - w(n) =
 361             // plus1 % 10^kappa + n * 10^kappa` (thus `remainder = plus1w(0)`) to simplify checks.
 362             // note that `plus1w(n)` is always increasing.
 363             //
 364             // we have three conditions to terminate. any of them will make the loop unable to
 365             // proceed, but we then have at least one valid representation known to be closest to
 366             // `v + 1 ulp` anyway. we will denote them as TC1 through TC3 for brevity.
 367             //
 368             // TC1: `w(n) <= v + 1 ulp`, i.e. this is the last repr that can be the closest one.
 369             // this is equivalent to `plus1 - w(n) = plus1w(n) >= plus1 - (v + 1 ulp) = plus1v_up`.
 370             // combined with TC2 (which checks if `w(n+1)` is valid), this prevents the possible
 371             // overflow on the calculation of `plus1w(n)`.
 372             //
 373             // TC2: `w(n+1) < minus1`, i.e. the next repr definitely does not round to `v`.
 374             // this is equivalent to `plus1 - w(n) + 10^kappa = plus1w(n) + 10^kappa >
 375             // plus1 - minus1 = threshold`. the left hand side can overflow, but we know
 376             // `threshold > plus1v`, so if TC1 is false, `threshold - plus1w(n) >
 377             // threshold - (plus1v - 1 ulp) > 1 ulp` and we can safely test if
 378             // `threshold - plus1w(n) < 10^kappa` instead.
 379             //
 380             // TC3: `abs(w(n) - (v + 1 ulp)) <= abs(w(n+1) - (v + 1 ulp))`, i.e. the next repr is
 381             // no closer to `v + 1 ulp` than the current repr. given `z(n) = plus1v_up - plus1w(n)`,
 382             // this becomes `abs(z(n)) <= abs(z(n+1))`. again assuming that TC1 is false, we have
 383             // `z(n) > 0`. we have two cases to consider:
 384             //
 385             // - when `z(n+1) >= 0`: TC3 becomes `z(n) <= z(n+1)`. as `plus1w(n)` is increasing,
 386             //   `z(n)` should be decreasing and this is clearly false.
 387             // - when `z(n+1) < 0`:
 388             //   - TC3a: the precondition is `plus1v_up < plus1w(n) + 10^kappa`. assuming TC2 is
 389             //     false, `threshold >= plus1w(n) + 10^kappa` so it cannot overflow.
 390             //   - TC3b: TC3 becomes `z(n) <= -z(n+1)`, i.e. `plus1v_up - plus1w(n) >=
 391             //     plus1w(n+1) - plus1v_up = plus1w(n) + 10^kappa - plus1v_up`. the negated TC1
 392             //     gives `plus1v_up > plus1w(n)`, so it cannot overflow or underflow when
 393             //     combined with TC3a.
 394             //
 395             // consequently, we should stop when `TC1 || TC2 || (TC3a && TC3b)`. the following is
 396             // equal to its inverse, `!TC1 && !TC2 && (!TC3a || !TC3b)`.
 397             while plus1w < plus1v_up &&
 398                   threshold - plus1w >= ten_kappa &&
 399                   (plus1w + ten_kappa < plus1v_up ||
 400                    plus1v_up - plus1w >= plus1w + ten_kappa - plus1v_up) {
 401                 *last -= 1;
 402                 debug_assert!(*last > b'0'); // the shortest repr cannot end with `0`
 403                 plus1w += ten_kappa;
 404             }
 405         }
 406
 407         // check if this representation is also the closest representation to `v - 1 ulp`.
 408         //
 409         // this is simply same to the terminating conditions for `v + 1 ulp`, with all `plus1v_up`
 410         // replaced by `plus1v_down` instead. overflow analysis equally holds.
 411         if plus1w < plus1v_down &&
 412            threshold - plus1w >= ten_kappa &&
 413            (plus1w + ten_kappa < plus1v_down ||
 414             plus1v_down - plus1w >= plus1w + ten_kappa - plus1v_down) {
 415             return None;
 416         }
 417
 418         // now we have the closest representation to `v` between `plus1` and `minus1`.
 419         // this is too liberal, though, so we reject any `w(n)` not between `plus0` and `minus0`,
 420         // i.e. `plus1 - plus1w(n) <= minus0` or `plus1 - plus1w(n) >= plus0`. we utilize the facts
 421         // that `threshold = plus1 - minus1` and `plus1 - plus0 = minus0 - minus1 = 2 ulp`.
 422         if 2 * ulp <= plus1w && plus1w <= threshold - 4 * ulp {
 423             Some((buf.len(), exp))
 424         } else {
 425             None
 426         }
 427     }
 428 }
 429
 430 /// The shortest mode implementation for Grisu with Dragon fallback.
 431 ///
 432 /// This should be used for most cases.
 433 pub fn format_shortest(d: &Decoded, buf: &mut [u8]) -> (/*#digits*/ usize, /*exp*/ i16) {
 434     use num::flt2dec::strategy::dragon::format_shortest as fallback;
 435     match format_shortest_opt(d, buf) {
 436         Some(ret) => ret,
 437         None => fallback(d, buf),
 438     }
 439 }
 440
 441 /// The exact and fixed mode implementation for Grisu.
 442 ///
 443 /// It returns `None` when it would return an inexact representation otherwise.
 444 pub fn format_exact_opt(d: &Decoded, buf: &mut [u8], limit: i16)
 445                                 -> Option<(/*#digits*/ usize, /*exp*/ i16)> {
 446     assert!(d.mant > 0);
 447     assert!(d.mant < (1 << 61)); // we need at least three bits of additional precision
 448     assert!(!buf.is_empty());
 449
 450     // normalize and scale `v`.
 451     let v = Fp { f: d.mant, e: d.exp }.normalize();
 452     let (minusk, cached) = cached_power(ALPHA - v.e - 64, GAMMA - v.e - 64);
 453     let v = v.mul(&cached);
 454
 455     // divide `v` into integral and fractional parts.
 456     let e = -v.e as usize;
 457     let vint = (v.f >> e) as u32;
 458     let vfrac = v.f & ((1 << e) - 1);
 459
 460     // both old `v` and new `v` (scaled by `10^-k`) has an error of < 1 ulp (Theorem 5.1).
 461     // as we don't know the error is positive or negative, we use two approximations
 462     // spaced equally and have the maximal error of 2 ulps (same to the shortest case).
 463     //
 464     // the goal is to find the exactly rounded series of digits that are common to
 465     // both `v - 1 ulp` and `v + 1 ulp`, so that we are maximally confident.
 466     // if this is not possible, we don't know which one is the correct output for `v`,
 467     // so we give up and fall back.
 468     //
 469     // `err` is defined as `1 ulp * 2^e` here (same to the ulp in `vfrac`),
 470     // and we will scale it whenever `v` gets scaled.
 471     let mut err = 1;
 472
 473     // calculate the largest `10^max_kappa` no more than `v` (thus `v < 10^(max_kappa+1)`).
 474     // this is an upper bound of `kappa` below.
 475     let (max_kappa, max_ten_kappa) = max_pow10_no_more_than(vint);
 476
 477     let mut i = 0;
 478     let exp = max_kappa as i16 - minusk + 1;
 479
 480     // if we are working with the last-digit limitation, we need to shorten the buffer
 481     // before the actual rendering in order to avoid double rounding.
 482     // note that we have to enlarge the buffer again when rounding up happens!
 483     let len = if exp <= limit {
 484         // oops, we cannot even produce *one* digit.
 485         // this is possible when, say, we've got something like 9.5 and it's being rounded to 10.
 486         //
 487         // in principle we can immediately call `possibly_round` with an empty buffer,
 488         // but scaling `max_ten_kappa << e` by 10 can result in overflow.
 489         // thus we are being sloppy here and widen the error range by a factor of 10.
 490         // this will increase the false negative rate, but only very, *very* slightly;
 491         // it can only matter noticably when the mantissa is bigger than 60 bits.
 492         return possibly_round(buf, 0, exp, limit, v.f / 10, (max_ten_kappa as u64) << e, err << e);
 493     } else if ((exp as i32 - limit as i32) as usize) < buf.len() {
 494         (exp - limit) as usize
 495     } else {
 496         buf.len()
 497     };
 498     debug_assert!(len > 0);
 499
 500     // render integral parts.
 501     // the error is entirely fractional, so we don't need to check it in this part.
 502     let mut kappa = max_kappa as i16;
 503     let mut ten_kappa = max_ten_kappa; // 10^kappa
 504     let mut remainder = vint; // digits yet to be rendered
 505     loop { // we always have at least one digit to render
 506         // invariants:
 507         // - `remainder < 10^(kappa+1)`
 508         // - `vint = d[0..n-1] * 10^(kappa+1) + remainder`
 509         //   (it follows that `remainder = vint % 10^(kappa+1)`)
 510
 511         // divide `remainder` by `10^kappa`. both are scaled by `2^-e`.
 512         let q = remainder / ten_kappa;
 513         let r = remainder % ten_kappa;
 514         debug_assert!(q < 10);
 515         buf[i] = b'0' + q as u8;
 516         i += 1;
 517
 518         // is the buffer full? run the rounding pass with the remainder.
 519         if i == len {
 520             let vrem = ((r as u64) << e) + vfrac; // == (v % 10^kappa) * 2^e
 521             return possibly_round(buf, len, exp, limit, vrem, (ten_kappa as u64) << e, err << e);
 522         }
 523
 524         // break the loop when we have rendered all integral digits.
 525         // the exact number of digits is `max_kappa + 1` as `plus1 < 10^(max_kappa+1)`.
 526         if i > max_kappa as usize {
 527             debug_assert_eq!(ten_kappa, 1);
 528             debug_assert_eq!(kappa, 0);
 529             break;
 530         }
 531
 532         // restore invariants
 533         kappa -= 1;
 534         ten_kappa /= 10;
 535         remainder = r;
 536     }
 537
 538     // render fractional parts.
 539     //
 540     // in principle we can continue to the last available digit and check for the accuracy.
 541     // unfortunately we are working with the finite-sized integers, so we need some criterion
 542     // to detect the overflow. V8 uses `remainder > err`, which becomes false when
 543     // the first `i` significant digits of `v - 1 ulp` and `v` differ. however this rejects
 544     // too many otherwise valid input.
 545     //
 546     // since the later phase has a correct overflow detection, we instead use tighter criterion:
 547     // we continue til `err` exceeds `10^kappa / 2`, so that the range between `v - 1 ulp` and
 548     // `v + 1 ulp` definitely contains two or more rounded representations. this is same to
 549     // the first two comparisons from `possibly_round`, for the reference.
 550     let mut remainder = vfrac;
 551     let maxerr = 1 << (e - 1);
 552     while err < maxerr {
 553         // invariants, where `m = max_kappa + 1` (# of digits in the integral part):
 554         // - `remainder < 2^e`
 555         // - `vfrac * 10^(n-m) = d[m..n-1] * 2^e + remainder`
 556         // - `err = 10^(n-m)`
 557
 558         remainder *= 10; // won't overflow, `2^e * 10 < 2^64`
 559         err *= 10; // won't overflow, `err * 10 < 2^e * 5 < 2^64`
 560
 561         // divide `remainder` by `10^kappa`.
 562         // both are scaled by `2^e / 10^kappa`, so the latter is implicit here.
 563         let q = remainder >> e;
 564         let r = remainder & ((1 << e) - 1);
 565         debug_assert!(q < 10);
 566         buf[i] = b'0' + q as u8;
 567         i += 1;
 568
 569         // is the buffer full? run the rounding pass with the remainder.
 570         if i == len {
 571             return possibly_round(buf, len, exp, limit, r, 1 << e, err);
 572         }
 573
 574         // restore invariants
 575         remainder = r;
 576     }
 577
 578     // further calculation is useless (`possibly_round` definitely fails), so we give up.
 579     return None;
 580
 581     // we've generated all requested digits of `v`, which should be also same to corresponding
 582     // digits of `v - 1 ulp`. now we check if there is a unique representation shared by
 583     // both `v - 1 ulp` and `v + 1 ulp`; this can be either same to generated digits, or
 584     // to the rounded-up version of those digits. if the range contains multiple representations
 585     // of the same length, we cannot be sure and should return `None` instead.
 586     //
 587     // all arguments here are scaled by the common (but implicit) value `k`, so that:
 588     // - `remainder = (v % 10^kappa) * k`
 589     // - `ten_kappa = 10^kappa * k`
 590     // - `ulp = 2^-e * k`
 591     fn possibly_round(buf: &mut [u8], mut len: usize, mut exp: i16, limit: i16,
 592                       remainder: u64, ten_kappa: u64, ulp: u64) -> Option<(usize, i16)> {
 593         debug_assert!(remainder < ten_kappa);
 594
 595         //           10^kappa
 596         //    :   :   :<->:   :
 597         //    :   :   :   :   :
 598         //    :|1 ulp|1 ulp|  :
 599         //    :|<--->|<--->|  :
 600         // ----|-----|-----|----
 601         //     |     v     |
 602         // v - 1 ulp   v + 1 ulp
 603         //
 604         // (for the reference, the dotted line indicates the exact value for
 605         // possible representations in given number of digits.)
 606         //
 607         // error is too large that there are at least three possible representations
 608         // between `v - 1 ulp` and `v + 1 ulp`. we cannot determine which one is correct.
 609         if ulp >= ten_kappa { return None; }
 610
 611         //    10^kappa
 612         //   :<------->:
 613         //   :         :
 614         //   : |1 ulp|1 ulp|
 615         //   : |<--->|<--->|
 616         // ----|-----|-----|----
 617         //     |     v     |
 618         // v - 1 ulp   v + 1 ulp
 619         //
 620         // in fact, 1/2 ulp is enough to introduce two possible representations.
 621         // (remember that we need a unique representation for both `v - 1 ulp` and `v + 1 ulp`.)
 622         // this won't overflow, as `ulp < ten_kappa` from the first check.
 623         if ten_kappa - ulp <= ulp { return None; }
 624
 625         //     remainder
 626         //       :<->|                           :
 627         //       :   |                           :
 628         //       :<--------- 10^kappa ---------->:
 629         //     | :   |                           :
 630         //     |1 ulp|1 ulp|                     :
 631         //     |<--->|<--->|                     :
 632         // ----|-----|-----|------------------------
 633         //     |     v     |
 634         // v - 1 ulp   v + 1 ulp
 635         //
 636         // if `v + 1 ulp` is closer to the rounded-down representation (which is already in `buf`),
 637         // then we can safely return. note that `v - 1 ulp` *can* be less than the current
 638         // representation, but as `1 ulp < 10^kappa / 2`, this condition is enough:
 639         // the distance between `v - 1 ulp` and the current representation
 640         // cannot exceed `10^kappa / 2`.
 641         //
 642         // the condition equals to `remainder + ulp < 10^kappa / 2`.
 643         // since this can easily overflow, first check if `remainder < 10^kappa / 2`.
 644         // we've already verified that `ulp < 10^kappa / 2`, so as long as
 645         // `10^kappa` did not overflow after all, the second check is fine.
 646         if ten_kappa - remainder > remainder && ten_kappa - 2 * remainder >= 2 * ulp {
 647             return Some((len, exp));
 648         }
 649
 650         //   :<------- remainder ------>|   :
 651         //   :                          |   :
 652         //   :<--------- 10^kappa --------->:
 653         //   :                    |     |   : |
 654         //   :                    |1 ulp|1 ulp|
 655         //   :                    |<--->|<--->|
 656         // -----------------------|-----|-----|-----
 657         //                        |     v     |
 658         //                    v - 1 ulp   v + 1 ulp
 659         //
 660         // on the other hands, if `v - 1 ulp` is closer to the rounded-up representation,
 661         // we should round up and return. for the same reason we don't need to check `v + 1 ulp`.
 662         //
 663         // the condition equals to `remainder - ulp >= 10^kappa / 2`.
 664         // again we first check if `remainder > ulp` (note that this is not `remainder >= ulp`,
 665         // as `10^kappa` is never zero). also note that `remainder - ulp <= 10^kappa`,
 666         // so the second check does not overflow.
 667         if remainder > ulp && ten_kappa - (remainder - ulp) <= remainder - ulp {
 668             if let Some(c) = round_up(buf, len) {
 669                 // only add an additional digit when we've been requested the fixed precision.
 670                 // we also need to check that, if the original buffer was empty,
 671                 // the additional digit can only be added when `exp == limit` (edge case).
 672                 exp += 1;
 673                 if exp > limit && len < buf.len() {
 674                     buf[len] = c;
 675                     len += 1;
 676                 }
 677             }
 678             return Some((len, exp));
 679         }
 680
 681         // otherwise we are doomed (i.e. some values between `v - 1 ulp` and `v + 1 ulp` are
 682         // rounding down and others are rounding up) and give up.
 683         None
 684     }
 685 }
 686
 687 /// The exact and fixed mode implementation for Grisu with Dragon fallback.
 688 ///
 689 /// This should be used for most cases.
 690 pub fn format_exact(d: &Decoded, buf: &mut [u8], limit: i16) -> (/*#digits*/ usize, /*exp*/ i16) {
 691     use num::flt2dec::strategy::dragon::format_exact as fallback;
 692     match format_exact_opt(d, buf, limit) {
 693         Some(ret) => ret,
 694         None => fallback(d, buf, limit),
 695     }
 696 }