1 //! Helper trait for generic float types.
3 use crate::fmt::{Debug, LowerExp};
4 use crate::num::FpCategory;
5 use crate::ops::{Add, Div, Mul, Neg};
7 /// A helper trait to avoid duplicating basically all the conversion code for `f32` and `f64`.
9 /// See the parent module's doc comment for why this is necessary.
11 /// Should **never ever** be implemented for other types or be used outside the dec2flt module.
28 const NEG_INFINITY: Self;
32 /// The number of bits in the significand, *excluding* the hidden bit.
33 const MANTISSA_EXPLICIT_BITS: usize;
35 // Round-to-even only happens for negative values of q
36 // when q ≥ −4 in the 64-bit case and when q ≥ −17 in
39 // When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we
40 // have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have
41 // 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10.
43 // When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64
44 // so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case)
45 // or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64
46 // (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11
47 // or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bitcase).
49 // Thus we have that we only need to round ties to even when
50 // we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10]
51 // (in the 32-bit case). In both cases,the power of five(5^|q|)
52 // fits in a 64-bit word.
53 const MIN_EXPONENT_ROUND_TO_EVEN: i32;
54 const MAX_EXPONENT_ROUND_TO_EVEN: i32;
56 // Minimum exponent that for a fast path case, or `-⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
57 const MIN_EXPONENT_FAST_PATH: i64;
59 // Maximum exponent that for a fast path case, or `⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
60 const MAX_EXPONENT_FAST_PATH: i64;
62 // Maximum exponent that can be represented for a disguised-fast path case.
63 // This is `MAX_EXPONENT_FAST_PATH + ⌊(MANTISSA_EXPLICIT_BITS+1)/log2(10)⌋`
64 const MAX_EXPONENT_DISGUISED_FAST_PATH: i64;
66 // Minimum exponent value `-(1 << (EXP_BITS - 1)) + 1`.
67 const MINIMUM_EXPONENT: i32;
69 // Largest exponent value `(1 << EXP_BITS) - 1`.
70 const INFINITE_POWER: i32;
72 // Index (in bits) of the sign.
73 const SIGN_INDEX: usize;
75 // Smallest decimal exponent for a non-zero value.
76 const SMALLEST_POWER_OF_TEN: i32;
78 // Largest decimal exponent for a non-infinite value.
79 const LARGEST_POWER_OF_TEN: i32;
81 // Maximum mantissa for the fast-path (`1 << 53` for f64).
82 const MAX_MANTISSA_FAST_PATH: u64 = 2_u64 << Self::MANTISSA_EXPLICIT_BITS;
84 /// Convert integer into float through an as cast.
85 /// This is only called in the fast-path algorithm, and therefore
86 /// will not lose precision, since the value will always have
87 /// only if the value is <= Self::MAX_MANTISSA_FAST_PATH.
88 fn from_u64(v: u64) -> Self;
90 /// Performs a raw transmutation from an integer.
91 fn from_u64_bits(v: u64) -> Self;
93 /// Get a small power-of-ten for fast-path multiplication.
94 fn pow10_fast_path(exponent: usize) -> Self;
96 /// Returns the category that this number falls into.
97 fn classify(self) -> FpCategory;
99 /// Returns the mantissa, exponent and sign as integers.
100 fn integer_decode(self) -> (u64, i16, i8);
103 impl RawFloat for f32 {
104 const INFINITY: Self = f32::INFINITY;
105 const NEG_INFINITY: Self = f32::NEG_INFINITY;
106 const NAN: Self = f32::NAN;
107 const NEG_NAN: Self = -f32::NAN;
109 const MANTISSA_EXPLICIT_BITS: usize = 23;
110 const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -17;
111 const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 10;
112 const MIN_EXPONENT_FAST_PATH: i64 = -10; // assuming FLT_EVAL_METHOD = 0
113 const MAX_EXPONENT_FAST_PATH: i64 = 10;
114 const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 = 17;
115 const MINIMUM_EXPONENT: i32 = -127;
116 const INFINITE_POWER: i32 = 0xFF;
117 const SIGN_INDEX: usize = 31;
118 const SMALLEST_POWER_OF_TEN: i32 = -65;
119 const LARGEST_POWER_OF_TEN: i32 = 38;
121 fn from_u64(v: u64) -> Self {
122 debug_assert!(v <= Self::MAX_MANTISSA_FAST_PATH);
126 fn from_u64_bits(v: u64) -> Self {
127 f32::from_bits((v & 0xFFFFFFFF) as u32)
130 fn pow10_fast_path(exponent: usize) -> Self {
131 #[allow(clippy::use_self)]
132 const TABLE: [f32; 16] =
133 [1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 0., 0., 0., 0., 0.];
137 /// Returns the mantissa, exponent and sign as integers.
138 fn integer_decode(self) -> (u64, i16, i8) {
139 let bits = self.to_bits();
140 let sign: i8 = if bits >> 31 == 0 { 1 } else { -1 };
141 let mut exponent: i16 = ((bits >> 23) & 0xff) as i16;
143 if exponent == 0 { (bits & 0x7fffff) << 1 } else { (bits & 0x7fffff) | 0x800000 };
144 // Exponent bias + mantissa shift
145 exponent -= 127 + 23;
146 (mantissa as u64, exponent, sign)
149 fn classify(self) -> FpCategory {
154 impl RawFloat for f64 {
155 const INFINITY: Self = f64::INFINITY;
156 const NEG_INFINITY: Self = f64::NEG_INFINITY;
157 const NAN: Self = f64::NAN;
158 const NEG_NAN: Self = -f64::NAN;
160 const MANTISSA_EXPLICIT_BITS: usize = 52;
161 const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -4;
162 const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 23;
163 const MIN_EXPONENT_FAST_PATH: i64 = -22; // assuming FLT_EVAL_METHOD = 0
164 const MAX_EXPONENT_FAST_PATH: i64 = 22;
165 const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 = 37;
166 const MINIMUM_EXPONENT: i32 = -1023;
167 const INFINITE_POWER: i32 = 0x7FF;
168 const SIGN_INDEX: usize = 63;
169 const SMALLEST_POWER_OF_TEN: i32 = -342;
170 const LARGEST_POWER_OF_TEN: i32 = 308;
172 fn from_u64(v: u64) -> Self {
173 debug_assert!(v <= Self::MAX_MANTISSA_FAST_PATH);
177 fn from_u64_bits(v: u64) -> Self {
181 fn pow10_fast_path(exponent: usize) -> Self {
182 const TABLE: [f64; 32] = [
183 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15,
184 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 0., 0., 0., 0., 0., 0., 0., 0., 0.,
189 /// Returns the mantissa, exponent and sign as integers.
190 fn integer_decode(self) -> (u64, i16, i8) {
191 let bits = self.to_bits();
192 let sign: i8 = if bits >> 63 == 0 { 1 } else { -1 };
193 let mut exponent: i16 = ((bits >> 52) & 0x7ff) as i16;
194 let mantissa = if exponent == 0 {
195 (bits & 0xfffffffffffff) << 1
197 (bits & 0xfffffffffffff) | 0x10000000000000
199 // Exponent bias + mantissa shift
200 exponent -= 1023 + 52;
201 (mantissa, exponent, sign)
204 fn classify(self) -> FpCategory {