Sync rust-lang/portable-simd@03f6fbb21e6050da2a05b3ce8f480c020b384916

[rust.git] / src / etc / dec2flt_table.py
diff --git a/src/etc/dec2flt_table.py b/src/etc/dec2flt_table.py

old mode 100755 (executable)

new mode 100644 (file)

index ad2292e..aa5188d
--- a/src/etc/dec2flt_table.py
+++ b/src/etc/dec2flt_table.py
@@ -1,141 +1,110 @@
  #!/usr/bin/env python3
  
  """
-Generate powers of ten using William Clinger's ``AlgorithmM`` for use in
+Generate powers of five using Daniel Lemire's ``Eisel-Lemire algorithm`` for use in
  decimal to floating point conversions.
  
  Specifically, computes and outputs (as Rust code) a table of 10^e for some
-range of exponents e. The output is one array of 64 bit significands and
-another array of corresponding base two exponents. The approximations are
-normalized and rounded perfectly, i.e., within 0.5 ULP of the true value.
+range of exponents e. The output is one array of 128 bit significands.
+The base two exponents can be inferred using a logarithmic slope
+of the decimal exponent. The approximations are normalized and rounded perfectly,
+i.e., within 0.5 ULP of the true value.
  
-The representation ([u64], [i16]) instead of the more natural [(u64, i16)]
-is used because (u64, i16) has a ton of padding which would make the table
-even larger, and it's already uncomfortably large (6 KiB).
+Adapted from Daniel Lemire's fast_float ``table_generation.py``,
+available here: <https://github.com/fastfloat/fast_float/blob/main/script/table_generation.py>.
  """
  from __future__ import print_function
-from math import ceil, log
+from math import ceil, floor, log, log2
  from fractions import Fraction
-from collections import namedtuple
-
-
-N = 64  # Size of the significand field in bits
-MIN_SIG = 2 ** (N - 1)
-MAX_SIG = (2 ** N) - 1
-
-# Hand-rolled fp representation without arithmetic or any other operations.
-# The significand is normalized and always N bit, but the exponent is
-# unrestricted in range.
-Fp = namedtuple('Fp', 'sig exp')
-
-
-def algorithm_m(f, e):
-    assert f > 0
-    if e < 0:
-        u = f
-        v = 10 ** abs(e)
-    else:
-        u = f * 10 ** e
-        v = 1
-    k = 0
-    x = u // v
-    while True:
-        if x < MIN_SIG:
-            u <<= 1
-            k -= 1
-        elif x >= MAX_SIG:
-            v <<= 1
-            k += 1
-        else:
-            break
-        x = u // v
-    return ratio_to_float(u, v, k)
-
-
-def ratio_to_float(u, v, k):
-    q, r = divmod(u, v)
-    v_r = v - r
-    z = Fp(q, k)
-    if r < v_r:
-        return z
-    elif r > v_r:
-        return next_float(z)
-    elif q % 2 == 0:
-        return z
-    else:
-        return next_float(z)
-
-
-def next_float(z):
-    if z.sig == MAX_SIG:
-        return Fp(MIN_SIG, z.exp + 1)
-    else:
-        return Fp(z.sig + 1, z.exp)
-
-
-def error(f, e, z):
-    decimal = f * Fraction(10) ** e
-    binary = z.sig * Fraction(2) ** z.exp
-    abs_err = abs(decimal - binary)
-    # The unit in the last place has value z.exp
-    ulp_err = abs_err / Fraction(2) ** z.exp
-    return float(ulp_err)
-
+from collections import deque
  
  HEADER = """
-//! Tables of approximations of powers of ten.
+//! Pre-computed tables powers-of-5 for extended-precision representations.
+//!
+//! These tables enable fast scaling of the significant digits
+//! of a float to the decimal exponent, with minimal rounding
+//! errors, in a 128 or 192-bit representation.
+//!
  //! DO NOT MODIFY: Generated by `src/etc/dec2flt_table.py`
  """
  
+STATIC_WARNING = """
+// Use static to avoid long compile times: Rust compiler errors
+// can have the entire table compiled multiple times, and then
+// emit code multiple times, even if it's stripped out in
+// the final binary.
+"""
  
  def main():
+    min_exp = minimum_exponent(10)
+    max_exp = maximum_exponent(10)
+    bias = -minimum_exponent(5)
+
      print(HEADER.strip())
      print()
-    print_proper_powers()
-    print()
-    print_short_powers(32, 24)
+    print('pub const SMALLEST_POWER_OF_FIVE: i32 = {};'.format(min_exp))
+    print('pub const LARGEST_POWER_OF_FIVE: i32 = {};'.format(max_exp))
+    print('pub const N_POWERS_OF_FIVE: usize = ', end='')
+    print('(LARGEST_POWER_OF_FIVE - SMALLEST_POWER_OF_FIVE + 1) as usize;')
      print()
-    print_short_powers(64, 53)
+    print_proper_powers(min_exp, max_exp, bias)
+
+
+def minimum_exponent(base):
+    return ceil(log(5e-324, base) - log(0xFFFFFFFFFFFFFFFF, base))
+
  
+def maximum_exponent(base):
+    return floor(log(1.7976931348623157e+308, base))
  
-def print_proper_powers():
-    MIN_E = -305
-    MAX_E = 305
-    e_range = range(MIN_E, MAX_E+1)
+
+def print_proper_powers(min_exp, max_exp, bias):
+    powers = deque()
+
+    # Add negative exponents.
+    # 2^(2b)/(5^−q) with b=64 + int(math.ceil(log2(5^−q)))
      powers = []
-    for e in e_range:
-        z = algorithm_m(1, e)
-        err = error(1, e, z)
-        assert err < 0.5
-        powers.append(z)
-    print("pub const MIN_E: i16 = {};".format(MIN_E))
-    print("pub const MAX_E: i16 = {};".format(MAX_E))
-    print()
-    print("#[rustfmt::skip]")
-    typ = "([u64; {0}], [i16; {0}])".format(len(powers))
-    print("pub static POWERS: ", typ, " = (", sep='')
-    print("    [")
-    for z in powers:
-        print("        0x{:x},".format(z.sig))
-    print("    ],")
-    print("    [")
-    for z in powers:
-        print("        {},".format(z.exp))
-    print("    ],")
-    print(");")
-
-
-def print_short_powers(num_bits, significand_size):
-    max_sig = 2**significand_size - 1
-    # The fast path bails out for exponents >= ceil(log5(max_sig))
-    max_e = int(ceil(log(max_sig, 5)))
-    e_range = range(max_e)
-    typ = "[f{}; {}]".format(num_bits, len(e_range))
-    print("#[rustfmt::skip]")
-    print("pub const F", num_bits, "_SHORT_POWERS: ", typ, " = [", sep='')
-    for e in e_range:
-        print("    1e{},".format(e))
-    print("];")
+    for q in range(min_exp, 0):
+        power5 = 5 ** -q
+        z = 0
+        while (1 << z) < power5:
+            z += 1
+        if q >= -27:
+            b = z + 127
+            c = 2 ** b // power5 + 1
+            powers.append((c, q))
+        else:
+            b = 2 * z + 2 * 64
+            c = 2 ** b // power5 + 1
+            # truncate
+            while c >= (1<<128):
+                c //= 2
+            powers.append((c, q))
+
+    # Add positive exponents
+    for q in range(0, max_exp + 1):
+        power5 = 5 ** q
+        # move the most significant bit in position
+        while power5 < (1<<127):
+            power5 *= 2
+        # *truncate*
+        while power5 >= (1<<128):
+            power5 //= 2
+        powers.append((power5, q))
+
+    # Print the powers.
+    print(STATIC_WARNING.strip())
+    print('#[rustfmt::skip]')
+    typ = '[(u64, u64); N_POWERS_OF_FIVE]'
+    print('pub static POWER_OF_FIVE_128: {} = ['.format(typ))
+    lo_mask = (1 << 64) - 1
+    for c, exp in powers:
+        hi = '0x{:x}'.format(c // (1 << 64))
+        lo = '0x{:x}'.format(c % (1 << 64))
+        value = '    ({}, {}), '.format(hi, lo)
+        comment = '// {}^{}'.format(5, exp)
+        print(value.ljust(46, ' ') + comment)
+    print('];')
  
  
  if __name__ == '__main__':