Speed up dec2flt fast path with additional tables.

author Robin Kruppe <robin.kruppe@gmail.com>

Wed, 30 Dec 2015 13:01:42 +0000 (14:01 +0100)

committer Robin Kruppe <robin.kruppe@gmail.com>

Tue, 12 Jan 2016 21:25:16 +0000 (22:25 +0100)
author Robin Kruppe <robin.kruppe@gmail.com>
Wed, 30 Dec 2015 13:01:42 +0000 (14:01 +0100)
committer Robin Kruppe <robin.kruppe@gmail.com>
Tue, 12 Jan 2016 21:25:16 +0000 (22:25 +0100)
diff --git a/src/etc/dec2flt_table.py b/src/etc/dec2flt_table.py

index b0140fb24559dc0eca78542ec0b5c9ee6b8ff652..9fdab1fcfca28a33cd35d67731af7a28b40f0f72 100644 (file)
--- a/src/etc/dec2flt_table.py
+++ b/src/etc/dec2flt_table.py
@@ -25,6 +25,7 @@ even larger, and it's already uncomfortably large (6 KiB).
  """
  from __future__ import print_function
  import sys
+from math import ceil, log
  from fractions import Fraction
  from collections import namedtuple
  
@@ -33,7 +34,6 @@ N = 64  # Size of the significand field in bits
  MIN_SIG = 2 ** (N - 1)
  MAX_SIG = (2 ** N) - 1
  
-
  # Hand-rolled fp representation without arithmetic or any other operations.
  # The significand is normalized and always N bit, but the exponent is
  # unrestricted in range.
@@ -92,7 +92,7 @@ def error(f, e, z):
      ulp_err = abs_err / Fraction(2) ** z.exp
      return float(ulp_err)
  
-LICENSE = """
+HEADER = """
  // Copyright 2015 The Rust Project Developers. See the COPYRIGHT
  // file at the top-level directory of this distribution and at
  // http://rust-lang.org/COPYRIGHT.
@@ -102,9 +102,23 @@ LICENSE = """
  // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
  // option. This file may not be copied, modified, or distributed
  // except according to those terms.
+
+//! Tables of approximations of powers of ten.
+//! DO NOT MODIFY: Generated by `src/etc/dec2flt_table.py`
  """
  
+
  def main():
+    print(HEADER.strip())
+    print()
+    print_proper_powers()
+    print()
+    print_short_powers(32, 24)
+    print()
+    print_short_powers(64, 53)
+
+
+def print_proper_powers():
      MIN_E = -305
      MAX_E = 305
      e_range = range(MIN_E, MAX_E+1)
@@ -114,13 +128,10 @@ def main():
          err = error(1, e, z)
          assert err < 0.5
          powers.append(z)
-    typ = "([u64; {0}], [i16; {0}])".format(len(e_range))
-    print(LICENSE.strip())
-    print("// Table of approximations of powers of ten.")
-    print("// DO NOT MODIFY: Generated by a src/etc/dec2flt_table.py")
      print("pub const MIN_E: i16 = {};".format(MIN_E))
      print("pub const MAX_E: i16 = {};".format(MAX_E))
      print()
+    typ = "([u64; {0}], [i16; {0}])".format(len(powers))
      print("pub const POWERS: ", typ, " = ([", sep='')
      for z in powers:
          print("    0x{:x},".format(z.sig))
@@ -130,5 +141,17 @@ def main():
      print("]);")
  
  
+def print_short_powers(num_bits, significand_size):
+    max_sig = 2**significand_size - 1
+    # The fast path bails out for exponents >= ceil(log5(max_sig))
+    max_e = int(ceil(log(max_sig, 5)))
+    e_range = range(max_e)
+    typ = "[f{}; {}]".format(num_bits, len(e_range))
+    print("pub const F", num_bits, "_SHORT_POWERS: ", typ, " = [", sep='')
+    for e in e_range:
+        print("    1e{},".format(e))
+    print("];")
+
+
  if __name__ == '__main__':
      main()
diff --git a/src/libcore/num/dec2flt/algorithm.rs b/src/libcore/num/dec2flt/algorithm.rs

index 1f0f06d746197151f704f818a50de782b6d67b91..82d3389edc478d8fa2fcbed631f483620c2c4ccf 100644 (file)
--- a/src/libcore/num/dec2flt/algorithm.rs
+++ b/src/libcore/num/dec2flt/algorithm.rs
@@ -60,17 +60,13 @@ pub fn fast_path<T: RawFloat>(integral: &[u8], fractional: &[u8], e: i64) -> Opt
      if f > T::max_sig() {
          return None;
      }
-    let e = e as i16; // Can't overflow because e.abs() <= LOG5_OF_EXP_N
      // The case e < 0 cannot be folded into the other branch. Negative powers result in
      // a repeating fractional part in binary, which are rounded, which causes real
      // (and occasioally quite significant!) errors in the final result.
-    // The case `e == 0`, however, is unnecessary for correctness. It's just measurably faster.
-    if e == 0 {
-        Some(T::from_int(f))
-    } else if e > 0 {
-        Some(T::from_int(f) * fp_to_float(power_of_ten(e)))
+    if e >= 0 {
+        Some(T::from_int(f) * T::short_fast_pow10(e as usize))
      } else {
-        Some(T::from_int(f) / fp_to_float(power_of_ten(-e)))
+        Some(T::from_int(f) / T::short_fast_pow10(e.abs() as usize))
      }
  }
  
diff --git a/src/libcore/num/dec2flt/rawfp.rs b/src/libcore/num/dec2flt/rawfp.rs

index 197589740032ad40e16ea2fb725f9ad871eb4206..2099c6a7baa7649960065f6688d4fae745339368 100644 (file)
--- a/src/libcore/num/dec2flt/rawfp.rs
+++ b/src/libcore/num/dec2flt/rawfp.rs
@@ -37,6 +37,7 @@
  use num::FpCategory::{Infinite, Zero, Subnormal, Normal, Nan};
  use num::Float;
  use num::dec2flt::num::{self, Big};
+use num::dec2flt::table;
  
  #[derive(Copy, Clone, Debug)]
  pub struct Unpacked {
@@ -73,6 +74,9 @@ pub trait RawFloat : Float + Copy + Debug + LowerExp
      /// represented, the other code in this module makes sure to never let that happen.
      fn from_int(x: u64) -> Self;
  
+    /// Get the value 10^e from a pre-computed table. Panics for e >= ceil_log5_of_max_sig().
+    fn short_fast_pow10(e: usize) -> Self;
+
      // FIXME Everything that follows should be associated constants, but taking the value of an
      // associated constant from a type parameter does not work (yet?)
      // A possible workaround is having a `FloatInfo` struct for all the constants, but so far
@@ -175,6 +179,10 @@ fn from_int(x: u64) -> f32 {
          x as f32
      }
  
+    fn short_fast_pow10(e: usize) -> Self {
+        table::F32_SHORT_POWERS[e]
+    }
+
      fn max_normal_digits() -> usize {
          35
      }
@@ -222,6 +230,10 @@ fn from_int(x: u64) -> f64 {
          x as f64
      }
  
+    fn short_fast_pow10(e: usize) -> Self {
+        table::F64_SHORT_POWERS[e]
+    }
+
      fn max_normal_digits() -> usize {
          305
      }
diff --git a/src/libcore/num/dec2flt/table.rs b/src/libcore/num/dec2flt/table.rs

index dd985fd155b850417cd3326d7ffcadac407f4b4f..cb8c94313d030cb526a427bdabbf19cd6d8794c0 100644 (file)
--- a/src/libcore/num/dec2flt/table.rs
+++ b/src/libcore/num/dec2flt/table.rs
@@ -7,8 +7,10 @@
  // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
  // option. This file may not be copied, modified, or distributed
  // except according to those terms.
-// Table of approximations of powers of ten.
-// DO NOT MODIFY: Generated by a src/etc/dec2flt_table.py
+
+//! Tables of approximations of powers of ten.
+//! DO NOT MODIFY: Generated by `src/etc/dec2flt_table.py`
+
  pub const MIN_E: i16 = -305;
  pub const MAX_E: i16 = 305;
  
@@ -1237,3 +1239,43 @@
      946,
      950,
  ]);
+
+pub const F32_SHORT_POWERS: [f32; 11] = [
+    1e0,
+    1e1,
+    1e2,
+    1e3,
+    1e4,
+    1e5,
+    1e6,
+    1e7,
+    1e8,
+    1e9,
+    1e10,
+];
+
+pub const F64_SHORT_POWERS: [f64; 23] = [
+    1e0,
+    1e1,
+    1e2,
+    1e3,
+    1e4,
+    1e5,
+    1e6,
+    1e7,
+    1e8,
+    1e9,
+    1e10,
+    1e11,
+    1e12,
+    1e13,
+    1e14,
+    1e15,
+    1e16,
+    1e17,
+    1e18,
+    1e19,
+    1e20,
+    1e21,
+    1e22,
+];
author	Robin Kruppe <robin.kruppe@gmail.com>
	Wed, 30 Dec 2015 13:01:42 +0000 (14:01 +0100)
committer	Robin Kruppe <robin.kruppe@gmail.com>
	Tue, 12 Jan 2016 21:25:16 +0000 (22:25 +0100)
src/etc/dec2flt_table.py		patch \| blob \| history
src/libcore/num/dec2flt/algorithm.rs		patch \| blob \| history
src/libcore/num/dec2flt/rawfp.rs		patch \| blob \| history
src/libcore/num/dec2flt/table.rs		patch \| blob \| history