compiler/rustc_apfloat/src/ieee.rs

   1 use crate::{Category, ExpInt, IEK_INF, IEK_NAN, IEK_ZERO};
   2 use crate::{Float, FloatConvert, ParseError, Round, Status, StatusAnd};
   3
   4 use core::cmp::{self, Ordering};
   5 use core::convert::TryFrom;
   6 use core::fmt::{self, Write};
   7 use core::marker::PhantomData;
   8 use core::mem;
   9 use core::ops::Neg;
  10 use smallvec::{smallvec, SmallVec};
  11
  12 #[must_use]
  13 pub struct IeeeFloat<S> {
  14     /// Absolute significand value (including the integer bit).
  15     sig: [Limb; 1],
  16
  17     /// The signed unbiased exponent of the value.
  18     exp: ExpInt,
  19
  20     /// What kind of floating point number this is.
  21     category: Category,
  22
  23     /// Sign bit of the number.
  24     sign: bool,
  25
  26     marker: PhantomData<S>,
  27 }
  28
  29 /// Fundamental unit of big integer arithmetic, but also
  30 /// large to store the largest significands by itself.
  31 type Limb = u128;
  32 const LIMB_BITS: usize = 128;
  33 fn limbs_for_bits(bits: usize) -> usize {
  34     (bits + LIMB_BITS - 1) / LIMB_BITS
  35 }
  36
  37 /// Enum that represents what fraction of the LSB truncated bits of an fp number
  38 /// represent.
  39 ///
  40 /// This essentially combines the roles of guard and sticky bits.
  41 #[must_use]
  42 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
  43 enum Loss {
  44     // Example of truncated bits:
  45     ExactlyZero,  // 000000
  46     LessThanHalf, // 0xxxxx  x's not all zero
  47     ExactlyHalf,  // 100000
  48     MoreThanHalf, // 1xxxxx  x's not all zero
  49 }
  50
  51 /// Represents floating point arithmetic semantics.
  52 pub trait Semantics: Sized {
  53     /// Total number of bits in the in-memory format.
  54     const BITS: usize;
  55
  56     /// Number of bits in the significand. This includes the integer bit.
  57     const PRECISION: usize;
  58
  59     /// The largest E such that 2<sup>E</sup> is representable; this matches the
  60     /// definition of IEEE 754.
  61     const MAX_EXP: ExpInt;
  62
  63     /// The smallest E such that 2<sup>E</sup> is a normalized number; this
  64     /// matches the definition of IEEE 754.
  65     const MIN_EXP: ExpInt = -Self::MAX_EXP + 1;
  66
  67     /// The significand bit that marks NaN as quiet.
  68     const QNAN_BIT: usize = Self::PRECISION - 2;
  69
  70     /// The significand bitpattern to mark a NaN as quiet.
  71     /// NOTE: for X87DoubleExtended we need to set two bits instead of 2.
  72     const QNAN_SIGNIFICAND: Limb = 1 << Self::QNAN_BIT;
  73
  74     fn from_bits(bits: u128) -> IeeeFloat<Self> {
  75         assert!(Self::BITS > Self::PRECISION);
  76
  77         let sign = bits & (1 << (Self::BITS - 1));
  78         let exponent = (bits & !sign) >> (Self::PRECISION - 1);
  79         let mut r = IeeeFloat {
  80             sig: [bits & ((1 << (Self::PRECISION - 1)) - 1)],
  81             // Convert the exponent from its bias representation to a signed integer.
  82             exp: (exponent as ExpInt) - Self::MAX_EXP,
  83             category: Category::Zero,
  84             sign: sign != 0,
  85             marker: PhantomData,
  86         };
  87
  88         if r.exp == Self::MIN_EXP - 1 && r.sig == [0] {
  89             // Exponent, significand meaningless.
  90             r.category = Category::Zero;
  91         } else if r.exp == Self::MAX_EXP + 1 && r.sig == [0] {
  92             // Exponent, significand meaningless.
  93             r.category = Category::Infinity;
  94         } else if r.exp == Self::MAX_EXP + 1 && r.sig != [0] {
  95             // Sign, exponent, significand meaningless.
  96             r.category = Category::NaN;
  97         } else {
  98             r.category = Category::Normal;
  99             if r.exp == Self::MIN_EXP - 1 {
 100                 // Denormal.
 101                 r.exp = Self::MIN_EXP;
 102             } else {
 103                 // Set integer bit.
 104                 sig::set_bit(&mut r.sig, Self::PRECISION - 1);
 105             }
 106         }
 107
 108         r
 109     }
 110
 111     fn to_bits(x: IeeeFloat<Self>) -> u128 {
 112         assert!(Self::BITS > Self::PRECISION);
 113
 114         // Split integer bit from significand.
 115         let integer_bit = sig::get_bit(&x.sig, Self::PRECISION - 1);
 116         let mut significand = x.sig[0] & ((1 << (Self::PRECISION - 1)) - 1);
 117         let exponent = match x.category {
 118             Category::Normal => {
 119                 if x.exp == Self::MIN_EXP && !integer_bit {
 120                     // Denormal.
 121                     Self::MIN_EXP - 1
 122                 } else {
 123                     x.exp
 124                 }
 125             }
 126             Category::Zero => {
 127                 // FIXME(eddyb) Maybe we should guarantee an invariant instead?
 128                 significand = 0;
 129                 Self::MIN_EXP - 1
 130             }
 131             Category::Infinity => {
 132                 // FIXME(eddyb) Maybe we should guarantee an invariant instead?
 133                 significand = 0;
 134                 Self::MAX_EXP + 1
 135             }
 136             Category::NaN => Self::MAX_EXP + 1,
 137         };
 138
 139         // Convert the exponent from a signed integer to its bias representation.
 140         let exponent = (exponent + Self::MAX_EXP) as u128;
 141
 142         ((x.sign as u128) << (Self::BITS - 1)) | (exponent << (Self::PRECISION - 1)) | significand
 143     }
 144 }
 145
 146 impl<S> Copy for IeeeFloat<S> {}
 147 impl<S> Clone for IeeeFloat<S> {
 148     fn clone(&self) -> Self {
 149         *self
 150     }
 151 }
 152
 153 macro_rules! ieee_semantics {
 154     ($($name:ident = $sem:ident($bits:tt : $exp_bits:tt)),*) => {
 155         $(pub struct $sem;)*
 156         $(pub type $name = IeeeFloat<$sem>;)*
 157         $(impl Semantics for $sem {
 158             const BITS: usize = $bits;
 159             const PRECISION: usize = ($bits - 1 - $exp_bits) + 1;
 160             const MAX_EXP: ExpInt = (1 << ($exp_bits - 1)) - 1;
 161         })*
 162     }
 163 }
 164
 165 ieee_semantics! {
 166     Half = HalfS(16:5),
 167     Single = SingleS(32:8),
 168     Double = DoubleS(64:11),
 169     Quad = QuadS(128:15)
 170 }
 171
 172 pub struct X87DoubleExtendedS;
 173 pub type X87DoubleExtended = IeeeFloat<X87DoubleExtendedS>;
 174 impl Semantics for X87DoubleExtendedS {
 175     const BITS: usize = 80;
 176     const PRECISION: usize = 64;
 177     const MAX_EXP: ExpInt = (1 << (15 - 1)) - 1;
 178
 179     /// For x87 extended precision, we want to make a NaN, not a
 180     /// pseudo-NaN. Maybe we should expose the ability to make
 181     /// pseudo-NaNs?
 182     const QNAN_SIGNIFICAND: Limb = 0b11 << Self::QNAN_BIT;
 183
 184     /// Integer bit is explicit in this format. Intel hardware (387 and later)
 185     /// does not support these bit patterns:
 186     ///  exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
 187     ///  exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
 188     ///  exponent = 0, integer bit 1 ("pseudodenormal")
 189     ///  exponent != 0 nor all 1's, integer bit 0 ("unnormal")
 190     /// At the moment, the first two are treated as NaNs, the second two as Normal.
 191     fn from_bits(bits: u128) -> IeeeFloat<Self> {
 192         let sign = bits & (1 << (Self::BITS - 1));
 193         let exponent = (bits & !sign) >> Self::PRECISION;
 194         let mut r = IeeeFloat {
 195             sig: [bits & ((1 << (Self::PRECISION - 1)) - 1)],
 196             // Convert the exponent from its bias representation to a signed integer.
 197             exp: (exponent as ExpInt) - Self::MAX_EXP,
 198             category: Category::Zero,
 199             sign: sign != 0,
 200             marker: PhantomData,
 201         };
 202
 203         if r.exp == Self::MIN_EXP - 1 && r.sig == [0] {
 204             // Exponent, significand meaningless.
 205             r.category = Category::Zero;
 206         } else if r.exp == Self::MAX_EXP + 1 && r.sig == [1 << (Self::PRECISION - 1)] {
 207             // Exponent, significand meaningless.
 208             r.category = Category::Infinity;
 209         } else if r.exp == Self::MAX_EXP + 1 && r.sig != [1 << (Self::PRECISION - 1)] {
 210             // Sign, exponent, significand meaningless.
 211             r.category = Category::NaN;
 212         } else {
 213             r.category = Category::Normal;
 214             if r.exp == Self::MIN_EXP - 1 {
 215                 // Denormal.
 216                 r.exp = Self::MIN_EXP;
 217             }
 218         }
 219
 220         r
 221     }
 222
 223     fn to_bits(x: IeeeFloat<Self>) -> u128 {
 224         // Get integer bit from significand.
 225         let integer_bit = sig::get_bit(&x.sig, Self::PRECISION - 1);
 226         let mut significand = x.sig[0] & ((1 << Self::PRECISION) - 1);
 227         let exponent = match x.category {
 228             Category::Normal => {
 229                 if x.exp == Self::MIN_EXP && !integer_bit {
 230                     // Denormal.
 231                     Self::MIN_EXP - 1
 232                 } else {
 233                     x.exp
 234                 }
 235             }
 236             Category::Zero => {
 237                 // FIXME(eddyb) Maybe we should guarantee an invariant instead?
 238                 significand = 0;
 239                 Self::MIN_EXP - 1
 240             }
 241             Category::Infinity => {
 242                 // FIXME(eddyb) Maybe we should guarantee an invariant instead?
 243                 significand = 1 << (Self::PRECISION - 1);
 244                 Self::MAX_EXP + 1
 245             }
 246             Category::NaN => Self::MAX_EXP + 1,
 247         };
 248
 249         // Convert the exponent from a signed integer to its bias representation.
 250         let exponent = (exponent + Self::MAX_EXP) as u128;
 251
 252         ((x.sign as u128) << (Self::BITS - 1)) | (exponent << Self::PRECISION) | significand
 253     }
 254 }
 255
 256 float_common_impls!(IeeeFloat<S>);
 257
 258 impl<S: Semantics> PartialEq for IeeeFloat<S> {
 259     fn eq(&self, rhs: &Self) -> bool {
 260         self.partial_cmp(rhs) == Some(Ordering::Equal)
 261     }
 262 }
 263
 264 impl<S: Semantics> PartialOrd for IeeeFloat<S> {
 265     fn partial_cmp(&self, rhs: &Self) -> Option<Ordering> {
 266         match (self.category, rhs.category) {
 267             (Category::NaN, _) | (_, Category::NaN) => None,
 268
 269             (Category::Infinity, Category::Infinity) => Some((!self.sign).cmp(&(!rhs.sign))),
 270
 271             (Category::Zero, Category::Zero) => Some(Ordering::Equal),
 272
 273             (Category::Infinity, _) | (Category::Normal, Category::Zero) => {
 274                 Some((!self.sign).cmp(&self.sign))
 275             }
 276
 277             (_, Category::Infinity) | (Category::Zero, Category::Normal) => {
 278                 Some(rhs.sign.cmp(&(!rhs.sign)))
 279             }
 280
 281             (Category::Normal, Category::Normal) => {
 282                 // Two normal numbers. Do they have the same sign?
 283                 Some((!self.sign).cmp(&(!rhs.sign)).then_with(|| {
 284                     // Compare absolute values; invert result if negative.
 285                     let result = self.cmp_abs_normal(*rhs);
 286
 287                     if self.sign { result.reverse() } else { result }
 288                 }))
 289             }
 290         }
 291     }
 292 }
 293
 294 impl<S> Neg for IeeeFloat<S> {
 295     type Output = Self;
 296     fn neg(mut self) -> Self {
 297         self.sign = !self.sign;
 298         self
 299     }
 300 }
 301
 302 /// Prints this value as a decimal string.
 303 ///
 304 /// \param precision The maximum number of digits of
 305 ///   precision to output. If there are fewer digits available,
 306 ///   zero padding will not be used unless the value is
 307 ///   integral and small enough to be expressed in
 308 ///   precision digits. 0 means to use the natural
 309 ///   precision of the number.
 310 /// \param width The maximum number of zeros to
 311 ///   consider inserting before falling back to scientific
 312 ///   notation. 0 means to always use scientific notation.
 313 ///
 314 /// \param alternate Indicate whether to remove the trailing zero in
 315 ///   fraction part or not. Also setting this parameter to true forces
 316 ///   producing of output more similar to default printf behavior.
 317 ///   Specifically the lower e is used as exponent delimiter and exponent
 318 ///   always contains no less than two digits.
 319 ///
 320 /// Number       precision    width      Result
 321 /// ------       ---------    -----      ------
 322 /// 1.01E+4              5        2       10100
 323 /// 1.01E+4              4        2       1.01E+4
 324 /// 1.01E+4              5        1       1.01E+4
 325 /// 1.01E-2              5        2       0.0101
 326 /// 1.01E-2              4        2       0.0101
 327 /// 1.01E-2              4        1       1.01E-2
 328 impl<S: Semantics> fmt::Display for IeeeFloat<S> {
 329     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 330         let width = f.width().unwrap_or(3);
 331         let alternate = f.alternate();
 332
 333         match self.category {
 334             Category::Infinity => {
 335                 if self.sign {
 336                     return f.write_str("-Inf");
 337                 } else {
 338                     return f.write_str("+Inf");
 339                 }
 340             }
 341
 342             Category::NaN => return f.write_str("NaN"),
 343
 344             Category::Zero => {
 345                 if self.sign {
 346                     f.write_char('-')?;
 347                 }
 348
 349                 if width == 0 {
 350                     if alternate {
 351                         f.write_str("0.0")?;
 352                         if let Some(n) = f.precision() {
 353                             for _ in 1..n {
 354                                 f.write_char('0')?;
 355                             }
 356                         }
 357                         f.write_str("e+00")?;
 358                     } else {
 359                         f.write_str("0.0E+0")?;
 360                     }
 361                 } else {
 362                     f.write_char('0')?;
 363                 }
 364                 return Ok(());
 365             }
 366
 367             Category::Normal => {}
 368         }
 369
 370         if self.sign {
 371             f.write_char('-')?;
 372         }
 373
 374         // We use enough digits so the number can be round-tripped back to an
 375         // APFloat. The formula comes from "How to Print Floating-Point Numbers
 376         // Accurately" by Steele and White.
 377         // FIXME: Using a formula based purely on the precision is conservative;
 378         // we can print fewer digits depending on the actual value being printed.
 379
 380         // precision = 2 + floor(S::PRECISION / lg_2(10))
 381         let precision = f.precision().unwrap_or(2 + S::PRECISION * 59 / 196);
 382
 383         // Decompose the number into an APInt and an exponent.
 384         let mut exp = self.exp - (S::PRECISION as ExpInt - 1);
 385         let mut sig = vec![self.sig[0]];
 386
 387         // Ignore trailing binary zeros.
 388         let trailing_zeros = sig[0].trailing_zeros();
 389         let _: Loss = sig::shift_right(&mut sig, &mut exp, trailing_zeros as usize);
 390
 391         // Change the exponent from 2^e to 10^e.
 392         #[allow(clippy::comparison_chain)]
 393         if exp == 0 {
 394             // Nothing to do.
 395         } else if exp > 0 {
 396             // Just shift left.
 397             let shift = exp as usize;
 398             sig.resize(limbs_for_bits(S::PRECISION + shift), 0);
 399             sig::shift_left(&mut sig, &mut exp, shift);
 400         } else {
 401             // exp < 0
 402             let mut texp = -exp as usize;
 403
 404             // We transform this using the identity:
 405             //   (N)(2^-e) == (N)(5^e)(10^-e)
 406
 407             // Multiply significand by 5^e.
 408             //   N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
 409             let mut sig_scratch = vec![];
 410             let mut p5 = vec![];
 411             let mut p5_scratch = vec![];
 412             while texp != 0 {
 413                 if p5.is_empty() {
 414                     p5.push(5);
 415                 } else {
 416                     p5_scratch.resize(p5.len() * 2, 0);
 417                     let _: Loss =
 418                         sig::mul(&mut p5_scratch, &mut 0, &p5, &p5, p5.len() * 2 * LIMB_BITS);
 419                     while p5_scratch.last() == Some(&0) {
 420                         p5_scratch.pop();
 421                     }
 422                     mem::swap(&mut p5, &mut p5_scratch);
 423                 }
 424                 if texp & 1 != 0 {
 425                     sig_scratch.resize(sig.len() + p5.len(), 0);
 426                     let _: Loss = sig::mul(
 427                         &mut sig_scratch,
 428                         &mut 0,
 429                         &sig,
 430                         &p5,
 431                         (sig.len() + p5.len()) * LIMB_BITS,
 432                     );
 433                     while sig_scratch.last() == Some(&0) {
 434                         sig_scratch.pop();
 435                     }
 436                     mem::swap(&mut sig, &mut sig_scratch);
 437                 }
 438                 texp >>= 1;
 439             }
 440         }
 441
 442         // Fill the buffer.
 443         let mut buffer = vec![];
 444
 445         // Ignore digits from the significand until it is no more
 446         // precise than is required for the desired precision.
 447         // 196/59 is a very slight overestimate of lg_2(10).
 448         let required = (precision * 196 + 58) / 59;
 449         let mut discard_digits = sig::omsb(&sig).saturating_sub(required) * 59 / 196;
 450         let mut in_trail = true;
 451         while !sig.is_empty() {
 452             // Perform short division by 10 to extract the rightmost digit.
 453             // rem <- sig % 10
 454             // sig <- sig / 10
 455             let mut rem = 0;
 456
 457             // Use 64-bit division and remainder, with 32-bit chunks from sig.
 458             sig::each_chunk(&mut sig, 32, |chunk| {
 459                 let chunk = chunk as u32;
 460                 let combined = ((rem as u64) << 32) | (chunk as u64);
 461                 rem = (combined % 10) as u8;
 462                 (combined / 10) as u32 as Limb
 463             });
 464
 465             // Reduce the sigificand to avoid wasting time dividing 0's.
 466             while sig.last() == Some(&0) {
 467                 sig.pop();
 468             }
 469
 470             let digit = rem;
 471
 472             // Ignore digits we don't need.
 473             if discard_digits > 0 {
 474                 discard_digits -= 1;
 475                 exp += 1;
 476                 continue;
 477             }
 478
 479             // Drop trailing zeros.
 480             if in_trail && digit == 0 {
 481                 exp += 1;
 482             } else {
 483                 in_trail = false;
 484                 buffer.push(b'0' + digit);
 485             }
 486         }
 487
 488         assert!(!buffer.is_empty(), "no characters in buffer!");
 489
 490         // Drop down to precision.
 491         // FIXME: don't do more precise calculations above than are required.
 492         if buffer.len() > precision {
 493             // The most significant figures are the last ones in the buffer.
 494             let mut first_sig = buffer.len() - precision;
 495
 496             // Round.
 497             // FIXME: this probably shouldn't use 'round half up'.
 498
 499             // Rounding down is just a truncation, except we also want to drop
 500             // trailing zeros from the new result.
 501             if buffer[first_sig - 1] < b'5' {
 502                 while first_sig < buffer.len() && buffer[first_sig] == b'0' {
 503                     first_sig += 1;
 504                 }
 505             } else {
 506                 // Rounding up requires a decimal add-with-carry. If we continue
 507                 // the carry, the newly-introduced zeros will just be truncated.
 508                 for x in &mut buffer[first_sig..] {
 509                     if *x == b'9' {
 510                         first_sig += 1;
 511                     } else {
 512                         *x += 1;
 513                         break;
 514                     }
 515                 }
 516             }
 517
 518             exp += first_sig as ExpInt;
 519             buffer.drain(..first_sig);
 520
 521             // If we carried through, we have exactly one digit of precision.
 522             if buffer.is_empty() {
 523                 buffer.push(b'1');
 524             }
 525         }
 526
 527         let digits = buffer.len();
 528
 529         // Check whether we should use scientific notation.
 530         let scientific = if width == 0 {
 531             true
 532         } else if exp >= 0 {
 533             // 765e3 --> 765000
 534             //              ^^^
 535             // But we shouldn't make the number look more precise than it is.
 536             exp as usize > width || digits + exp as usize > precision
 537         } else {
 538             // Power of the most significant digit.
 539             let msd = exp + (digits - 1) as ExpInt;
 540             if msd >= 0 {
 541                 // 765e-2 == 7.65
 542                 false
 543             } else {
 544                 // 765e-5 == 0.00765
 545                 //           ^ ^^
 546                 -msd as usize > width
 547             }
 548         };
 549
 550         // Scientific formatting is pretty straightforward.
 551         if scientific {
 552             exp += digits as ExpInt - 1;
 553
 554             f.write_char(buffer[digits - 1] as char)?;
 555             f.write_char('.')?;
 556             let truncate_zero = !alternate;
 557             if digits == 1 && truncate_zero {
 558                 f.write_char('0')?;
 559             } else {
 560                 for &d in buffer[..digits - 1].iter().rev() {
 561                     f.write_char(d as char)?;
 562                 }
 563             }
 564             // Fill with zeros up to precision.
 565             if !truncate_zero && precision > digits - 1 {
 566                 for _ in 0..=precision - digits {
 567                     f.write_char('0')?;
 568                 }
 569             }
 570             // For alternate we use lower 'e'.
 571             f.write_char(if alternate { 'e' } else { 'E' })?;
 572
 573             // Exponent always at least two digits if we do not truncate zeros.
 574             if truncate_zero {
 575                 write!(f, "{:+}", exp)?;
 576             } else {
 577                 write!(f, "{:+03}", exp)?;
 578             }
 579
 580             return Ok(());
 581         }
 582
 583         // Non-scientific, positive exponents.
 584         if exp >= 0 {
 585             for &d in buffer.iter().rev() {
 586                 f.write_char(d as char)?;
 587             }
 588             for _ in 0..exp {
 589                 f.write_char('0')?;
 590             }
 591             return Ok(());
 592         }
 593
 594         // Non-scientific, negative exponents.
 595         let unit_place = -exp as usize;
 596         if unit_place < digits {
 597             for &d in buffer[unit_place..].iter().rev() {
 598                 f.write_char(d as char)?;
 599             }
 600             f.write_char('.')?;
 601             for &d in buffer[..unit_place].iter().rev() {
 602                 f.write_char(d as char)?;
 603             }
 604         } else {
 605             f.write_str("0.")?;
 606             for _ in digits..unit_place {
 607                 f.write_char('0')?;
 608             }
 609             for &d in buffer.iter().rev() {
 610                 f.write_char(d as char)?;
 611             }
 612         }
 613
 614         Ok(())
 615     }
 616 }
 617
 618 impl<S: Semantics> fmt::Debug for IeeeFloat<S> {
 619     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 620         write!(
 621             f,
 622             "{}({:?} | {}{:?} * 2^{})",
 623             self,
 624             self.category,
 625             if self.sign { "-" } else { "+" },
 626             self.sig,
 627             self.exp
 628         )
 629     }
 630 }
 631
 632 impl<S: Semantics> Float for IeeeFloat<S> {
 633     const BITS: usize = S::BITS;
 634     const PRECISION: usize = S::PRECISION;
 635     const MAX_EXP: ExpInt = S::MAX_EXP;
 636     const MIN_EXP: ExpInt = S::MIN_EXP;
 637
 638     const ZERO: Self = IeeeFloat {
 639         sig: [0],
 640         exp: S::MIN_EXP - 1,
 641         category: Category::Zero,
 642         sign: false,
 643         marker: PhantomData,
 644     };
 645
 646     const INFINITY: Self = IeeeFloat {
 647         sig: [0],
 648         exp: S::MAX_EXP + 1,
 649         category: Category::Infinity,
 650         sign: false,
 651         marker: PhantomData,
 652     };
 653
 654     // FIXME(eddyb) remove when qnan becomes const fn.
 655     const NAN: Self = IeeeFloat {
 656         sig: [S::QNAN_SIGNIFICAND],
 657         exp: S::MAX_EXP + 1,
 658         category: Category::NaN,
 659         sign: false,
 660         marker: PhantomData,
 661     };
 662
 663     fn qnan(payload: Option<u128>) -> Self {
 664         IeeeFloat {
 665             sig: [S::QNAN_SIGNIFICAND
 666                 | payload.map_or(0, |payload| {
 667                     // Zero out the excess bits of the significand.
 668                     payload & ((1 << S::QNAN_BIT) - 1)
 669                 })],
 670             exp: S::MAX_EXP + 1,
 671             category: Category::NaN,
 672             sign: false,
 673             marker: PhantomData,
 674         }
 675     }
 676
 677     fn snan(payload: Option<u128>) -> Self {
 678         let mut snan = Self::qnan(payload);
 679
 680         // We always have to clear the QNaN bit to make it an SNaN.
 681         sig::clear_bit(&mut snan.sig, S::QNAN_BIT);
 682
 683         // If there are no bits set in the payload, we have to set
 684         // *something* to make it a NaN instead of an infinity;
 685         // conventionally, this is the next bit down from the QNaN bit.
 686         if snan.sig[0] & !S::QNAN_SIGNIFICAND == 0 {
 687             sig::set_bit(&mut snan.sig, S::QNAN_BIT - 1);
 688         }
 689
 690         snan
 691     }
 692
 693     fn largest() -> Self {
 694         // We want (in interchange format):
 695         //   exponent = 1..10
 696         //   significand = 1..1
 697         IeeeFloat {
 698             sig: [(1 << S::PRECISION) - 1],
 699             exp: S::MAX_EXP,
 700             category: Category::Normal,
 701             sign: false,
 702             marker: PhantomData,
 703         }
 704     }
 705
 706     // We want (in interchange format):
 707     //   exponent = 0..0
 708     //   significand = 0..01
 709     const SMALLEST: Self = IeeeFloat {
 710         sig: [1],
 711         exp: S::MIN_EXP,
 712         category: Category::Normal,
 713         sign: false,
 714         marker: PhantomData,
 715     };
 716
 717     fn smallest_normalized() -> Self {
 718         // We want (in interchange format):
 719         //   exponent = 0..0
 720         //   significand = 10..0
 721         IeeeFloat {
 722             sig: [1 << (S::PRECISION - 1)],
 723             exp: S::MIN_EXP,
 724             category: Category::Normal,
 725             sign: false,
 726             marker: PhantomData,
 727         }
 728     }
 729
 730     fn add_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> {
 731         let status = match (self.category, rhs.category) {
 732             (Category::Infinity, Category::Infinity) => {
 733                 // Differently signed infinities can only be validly
 734                 // subtracted.
 735                 if self.sign != rhs.sign {
 736                     self = Self::NAN;
 737                     Status::INVALID_OP
 738                 } else {
 739                     Status::OK
 740                 }
 741             }
 742
 743             // Sign may depend on rounding mode; handled below.
 744             (_, Category::Zero) | (Category::NaN, _) | (Category::Infinity, Category::Normal) => {
 745                 Status::OK
 746             }
 747
 748             (Category::Zero, _) | (_, Category::NaN | Category::Infinity) => {
 749                 self = rhs;
 750                 Status::OK
 751             }
 752
 753             // This return code means it was not a simple case.
 754             (Category::Normal, Category::Normal) => {
 755                 let loss = sig::add_or_sub(
 756                     &mut self.sig,
 757                     &mut self.exp,
 758                     &mut self.sign,
 759                     &mut [rhs.sig[0]],
 760                     rhs.exp,
 761                     rhs.sign,
 762                 );
 763                 let status;
 764                 self = unpack!(status=, self.normalize(round, loss));
 765
 766                 // Can only be zero if we lost no fraction.
 767                 assert!(self.category != Category::Zero || loss == Loss::ExactlyZero);
 768
 769                 status
 770             }
 771         };
 772
 773         // If two numbers add (exactly) to zero, IEEE 754 decrees it is a
 774         // positive zero unless rounding to minus infinity, except that
 775         // adding two like-signed zeroes gives that zero.
 776         if self.category == Category::Zero
 777             && (rhs.category != Category::Zero || self.sign != rhs.sign)
 778         {
 779             self.sign = round == Round::TowardNegative;
 780         }
 781
 782         status.and(self)
 783     }
 784
 785     fn mul_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> {
 786         self.sign ^= rhs.sign;
 787
 788         match (self.category, rhs.category) {
 789             (Category::NaN, _) => {
 790                 self.sign = false;
 791                 Status::OK.and(self)
 792             }
 793
 794             (_, Category::NaN) => {
 795                 self.sign = false;
 796                 self.category = Category::NaN;
 797                 self.sig = rhs.sig;
 798                 Status::OK.and(self)
 799             }
 800
 801             (Category::Zero, Category::Infinity) | (Category::Infinity, Category::Zero) => {
 802                 Status::INVALID_OP.and(Self::NAN)
 803             }
 804
 805             (_, Category::Infinity) | (Category::Infinity, _) => {
 806                 self.category = Category::Infinity;
 807                 Status::OK.and(self)
 808             }
 809
 810             (Category::Zero, _) | (_, Category::Zero) => {
 811                 self.category = Category::Zero;
 812                 Status::OK.and(self)
 813             }
 814
 815             (Category::Normal, Category::Normal) => {
 816                 self.exp += rhs.exp;
 817                 let mut wide_sig = [0; 2];
 818                 let loss =
 819                     sig::mul(&mut wide_sig, &mut self.exp, &self.sig, &rhs.sig, S::PRECISION);
 820                 self.sig = [wide_sig[0]];
 821                 let mut status;
 822                 self = unpack!(status=, self.normalize(round, loss));
 823                 if loss != Loss::ExactlyZero {
 824                     status |= Status::INEXACT;
 825                 }
 826                 status.and(self)
 827             }
 828         }
 829     }
 830
 831     fn mul_add_r(mut self, multiplicand: Self, addend: Self, round: Round) -> StatusAnd<Self> {
 832         // If and only if all arguments are normal do we need to do an
 833         // extended-precision calculation.
 834         if !self.is_finite_non_zero() || !multiplicand.is_finite_non_zero() || !addend.is_finite() {
 835             let mut status;
 836             self = unpack!(status=, self.mul_r(multiplicand, round));
 837
 838             // FS can only be Status::OK or Status::INVALID_OP. There is no more work
 839             // to do in the latter case. The IEEE-754R standard says it is
 840             // implementation-defined in this case whether, if ADDEND is a
 841             // quiet NaN, we raise invalid op; this implementation does so.
 842             //
 843             // If we need to do the addition we can do so with normal
 844             // precision.
 845             if status == Status::OK {
 846                 self = unpack!(status=, self.add_r(addend, round));
 847             }
 848             return status.and(self);
 849         }
 850
 851         // Post-multiplication sign, before addition.
 852         self.sign ^= multiplicand.sign;
 853
 854         // Allocate space for twice as many bits as the original significand, plus one
 855         // extra bit for the addition to overflow into.
 856         assert!(limbs_for_bits(S::PRECISION * 2 + 1) <= 2);
 857         let mut wide_sig = sig::widening_mul(self.sig[0], multiplicand.sig[0]);
 858
 859         let mut loss = Loss::ExactlyZero;
 860         let mut omsb = sig::omsb(&wide_sig);
 861         self.exp += multiplicand.exp;
 862
 863         // Assume the operands involved in the multiplication are single-precision
 864         // FP, and the two multiplicants are:
 865         //     lhs = a23 . a22 ... a0 * 2^e1
 866         //     rhs = b23 . b22 ... b0 * 2^e2
 867         // the result of multiplication is:
 868         //     lhs = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
 869         // Note that there are three significant bits at the left-hand side of the
 870         // radix point: two for the multiplication, and an overflow bit for the
 871         // addition (that will always be zero at this point). Move the radix point
 872         // toward left by two bits, and adjust exponent accordingly.
 873         self.exp += 2;
 874
 875         if addend.is_non_zero() {
 876             // Normalize our MSB to one below the top bit to allow for overflow.
 877             let ext_precision = 2 * S::PRECISION + 1;
 878             if omsb != ext_precision - 1 {
 879                 assert!(ext_precision > omsb);
 880                 sig::shift_left(&mut wide_sig, &mut self.exp, (ext_precision - 1) - omsb);
 881             }
 882
 883             // The intermediate result of the multiplication has "2 * S::PRECISION"
 884             // significant bit; adjust the addend to be consistent with mul result.
 885             let mut ext_addend_sig = [addend.sig[0], 0];
 886
 887             // Extend the addend significand to ext_precision - 1. This guarantees
 888             // that the high bit of the significand is zero (same as wide_sig),
 889             // so the addition will overflow (if it does overflow at all) into the top bit.
 890             sig::shift_left(&mut ext_addend_sig, &mut 0, ext_precision - 1 - S::PRECISION);
 891             loss = sig::add_or_sub(
 892                 &mut wide_sig,
 893                 &mut self.exp,
 894                 &mut self.sign,
 895                 &mut ext_addend_sig,
 896                 addend.exp + 1,
 897                 addend.sign,
 898             );
 899
 900             omsb = sig::omsb(&wide_sig);
 901         }
 902
 903         // Convert the result having "2 * S::PRECISION" significant-bits back to the one
 904         // having "S::PRECISION" significant-bits. First, move the radix point from
 905         // position "2*S::PRECISION - 1" to "S::PRECISION - 1". The exponent need to be
 906         // adjusted by "2*S::PRECISION - 1" - "S::PRECISION - 1" = "S::PRECISION".
 907         self.exp -= S::PRECISION as ExpInt + 1;
 908
 909         // In case MSB resides at the left-hand side of radix point, shift the
 910         // mantissa right by some amount to make sure the MSB reside right before
 911         // the radix point (i.e., "MSB . rest-significant-bits").
 912         if omsb > S::PRECISION {
 913             let bits = omsb - S::PRECISION;
 914             loss = sig::shift_right(&mut wide_sig, &mut self.exp, bits).combine(loss);
 915         }
 916
 917         self.sig[0] = wide_sig[0];
 918
 919         let mut status;
 920         self = unpack!(status=, self.normalize(round, loss));
 921         if loss != Loss::ExactlyZero {
 922             status |= Status::INEXACT;
 923         }
 924
 925         // If two numbers add (exactly) to zero, IEEE 754 decrees it is a
 926         // positive zero unless rounding to minus infinity, except that
 927         // adding two like-signed zeroes gives that zero.
 928         if self.category == Category::Zero
 929             && !status.intersects(Status::UNDERFLOW)
 930             && self.sign != addend.sign
 931         {
 932             self.sign = round == Round::TowardNegative;
 933         }
 934
 935         status.and(self)
 936     }
 937
 938     fn div_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> {
 939         self.sign ^= rhs.sign;
 940
 941         match (self.category, rhs.category) {
 942             (Category::NaN, _) => {
 943                 self.sign = false;
 944                 Status::OK.and(self)
 945             }
 946
 947             (_, Category::NaN) => {
 948                 self.category = Category::NaN;
 949                 self.sig = rhs.sig;
 950                 self.sign = false;
 951                 Status::OK.and(self)
 952             }
 953
 954             (Category::Infinity, Category::Infinity) | (Category::Zero, Category::Zero) => {
 955                 Status::INVALID_OP.and(Self::NAN)
 956             }
 957
 958             (Category::Infinity | Category::Zero, _) => Status::OK.and(self),
 959
 960             (Category::Normal, Category::Infinity) => {
 961                 self.category = Category::Zero;
 962                 Status::OK.and(self)
 963             }
 964
 965             (Category::Normal, Category::Zero) => {
 966                 self.category = Category::Infinity;
 967                 Status::DIV_BY_ZERO.and(self)
 968             }
 969
 970             (Category::Normal, Category::Normal) => {
 971                 self.exp -= rhs.exp;
 972                 let dividend = self.sig[0];
 973                 let loss = sig::div(
 974                     &mut self.sig,
 975                     &mut self.exp,
 976                     &mut [dividend],
 977                     &mut [rhs.sig[0]],
 978                     S::PRECISION,
 979                 );
 980                 let mut status;
 981                 self = unpack!(status=, self.normalize(round, loss));
 982                 if loss != Loss::ExactlyZero {
 983                     status |= Status::INEXACT;
 984                 }
 985                 status.and(self)
 986             }
 987         }
 988     }
 989
 990     fn c_fmod(mut self, rhs: Self) -> StatusAnd<Self> {
 991         match (self.category, rhs.category) {
 992             (Category::NaN, _)
 993             | (Category::Zero, Category::Infinity | Category::Normal)
 994             | (Category::Normal, Category::Infinity) => Status::OK.and(self),
 995
 996             (_, Category::NaN) => {
 997                 self.sign = false;
 998                 self.category = Category::NaN;
 999                 self.sig = rhs.sig;
1000                 Status::OK.and(self)
1001             }
1002
1003             (Category::Infinity, _) | (_, Category::Zero) => Status::INVALID_OP.and(Self::NAN),
1004
1005             (Category::Normal, Category::Normal) => {
1006                 while self.is_finite_non_zero()
1007                     && rhs.is_finite_non_zero()
1008                     && self.cmp_abs_normal(rhs) != Ordering::Less
1009                 {
1010                     let mut v = rhs.scalbn(self.ilogb() - rhs.ilogb());
1011                     if self.cmp_abs_normal(v) == Ordering::Less {
1012                         v = v.scalbn(-1);
1013                     }
1014                     v.sign = self.sign;
1015
1016                     let status;
1017                     self = unpack!(status=, self - v);
1018                     assert_eq!(status, Status::OK);
1019                 }
1020                 Status::OK.and(self)
1021             }
1022         }
1023     }
1024
1025     fn round_to_integral(self, round: Round) -> StatusAnd<Self> {
1026         // If the exponent is large enough, we know that this value is already
1027         // integral, and the arithmetic below would potentially cause it to saturate
1028         // to +/-Inf. Bail out early instead.
1029         if self.is_finite_non_zero() && self.exp + 1 >= S::PRECISION as ExpInt {
1030             return Status::OK.and(self);
1031         }
1032
1033         // The algorithm here is quite simple: we add 2^(p-1), where p is the
1034         // precision of our format, and then subtract it back off again. The choice
1035         // of rounding modes for the addition/subtraction determines the rounding mode
1036         // for our integral rounding as well.
1037         // NOTE: When the input value is negative, we do subtraction followed by
1038         // addition instead.
1039         assert!(S::PRECISION <= 128);
1040         let mut status;
1041         let magic_const = unpack!(status=, Self::from_u128(1 << (S::PRECISION - 1)));
1042         let magic_const = magic_const.copy_sign(self);
1043
1044         if status != Status::OK {
1045             return status.and(self);
1046         }
1047
1048         let mut r = self;
1049         r = unpack!(status=, r.add_r(magic_const, round));
1050         if status != Status::OK && status != Status::INEXACT {
1051             return status.and(self);
1052         }
1053
1054         // Restore the input sign to handle 0.0/-0.0 cases correctly.
1055         r.sub_r(magic_const, round).map(|r| r.copy_sign(self))
1056     }
1057
1058     fn next_up(mut self) -> StatusAnd<Self> {
1059         // Compute nextUp(x), handling each float category separately.
1060         match self.category {
1061             Category::Infinity => {
1062                 if self.sign {
1063                     // nextUp(-inf) = -largest
1064                     Status::OK.and(-Self::largest())
1065                 } else {
1066                     // nextUp(+inf) = +inf
1067                     Status::OK.and(self)
1068                 }
1069             }
1070             Category::NaN => {
1071                 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
1072                 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
1073                 //                     change the payload.
1074                 if self.is_signaling() {
1075                     // For consistency, propagate the sign of the sNaN to the qNaN.
1076                     Status::INVALID_OP.and(Self::NAN.copy_sign(self))
1077                 } else {
1078                     Status::OK.and(self)
1079                 }
1080             }
1081             Category::Zero => {
1082                 // nextUp(pm 0) = +smallest
1083                 Status::OK.and(Self::SMALLEST)
1084             }
1085             Category::Normal => {
1086                 // nextUp(-smallest) = -0
1087                 if self.is_smallest() && self.sign {
1088                     return Status::OK.and(-Self::ZERO);
1089                 }
1090
1091                 // nextUp(largest) == INFINITY
1092                 if self.is_largest() && !self.sign {
1093                     return Status::OK.and(Self::INFINITY);
1094                 }
1095
1096                 // Excluding the integral bit. This allows us to test for binade boundaries.
1097                 let sig_mask = (1 << (S::PRECISION - 1)) - 1;
1098
1099                 // nextUp(normal) == normal + inc.
1100                 if self.sign {
1101                     // If we are negative, we need to decrement the significand.
1102
1103                     // We only cross a binade boundary that requires adjusting the exponent
1104                     // if:
1105                     //   1. exponent != S::MIN_EXP. This implies we are not in the
1106                     //   smallest binade or are dealing with denormals.
1107                     //   2. Our significand excluding the integral bit is all zeros.
1108                     let crossing_binade_boundary =
1109                         self.exp != S::MIN_EXP && self.sig[0] & sig_mask == 0;
1110
1111                     // Decrement the significand.
1112                     //
1113                     // We always do this since:
1114                     //   1. If we are dealing with a non-binade decrement, by definition we
1115                     //   just decrement the significand.
1116                     //   2. If we are dealing with a normal -> normal binade decrement, since
1117                     //   we have an explicit integral bit the fact that all bits but the
1118                     //   integral bit are zero implies that subtracting one will yield a
1119                     //   significand with 0 integral bit and 1 in all other spots. Thus we
1120                     //   must just adjust the exponent and set the integral bit to 1.
1121                     //   3. If we are dealing with a normal -> denormal binade decrement,
1122                     //   since we set the integral bit to 0 when we represent denormals, we
1123                     //   just decrement the significand.
1124                     sig::decrement(&mut self.sig);
1125
1126                     if crossing_binade_boundary {
1127                         // Our result is a normal number. Do the following:
1128                         // 1. Set the integral bit to 1.
1129                         // 2. Decrement the exponent.
1130                         sig::set_bit(&mut self.sig, S::PRECISION - 1);
1131                         self.exp -= 1;
1132                     }
1133                 } else {
1134                     // If we are positive, we need to increment the significand.
1135
1136                     // We only cross a binade boundary that requires adjusting the exponent if
1137                     // the input is not a denormal and all of said input's significand bits
1138                     // are set. If all of said conditions are true: clear the significand, set
1139                     // the integral bit to 1, and increment the exponent. If we have a
1140                     // denormal always increment since moving denormals and the numbers in the
1141                     // smallest normal binade have the same exponent in our representation.
1142                     let crossing_binade_boundary =
1143                         !self.is_denormal() && self.sig[0] & sig_mask == sig_mask;
1144
1145                     if crossing_binade_boundary {
1146                         self.sig = [0];
1147                         sig::set_bit(&mut self.sig, S::PRECISION - 1);
1148                         assert_ne!(
1149                             self.exp,
1150                             S::MAX_EXP,
1151                             "We can not increment an exponent beyond the MAX_EXP \
1152                              allowed by the given floating point semantics."
1153                         );
1154                         self.exp += 1;
1155                     } else {
1156                         sig::increment(&mut self.sig);
1157                     }
1158                 }
1159                 Status::OK.and(self)
1160             }
1161         }
1162     }
1163
1164     fn from_bits(input: u128) -> Self {
1165         // Dispatch to semantics.
1166         S::from_bits(input)
1167     }
1168
1169     fn from_u128_r(input: u128, round: Round) -> StatusAnd<Self> {
1170         IeeeFloat {
1171             sig: [input],
1172             exp: S::PRECISION as ExpInt - 1,
1173             category: Category::Normal,
1174             sign: false,
1175             marker: PhantomData,
1176         }
1177         .normalize(round, Loss::ExactlyZero)
1178     }
1179
1180     fn from_str_r(mut s: &str, mut round: Round) -> Result<StatusAnd<Self>, ParseError> {
1181         if s.is_empty() {
1182             return Err(ParseError("Invalid string length"));
1183         }
1184
1185         // Handle special cases.
1186         match s {
1187             "inf" | "INFINITY" => return Ok(Status::OK.and(Self::INFINITY)),
1188             "-inf" | "-INFINITY" => return Ok(Status::OK.and(-Self::INFINITY)),
1189             "nan" | "NaN" => return Ok(Status::OK.and(Self::NAN)),
1190             "-nan" | "-NaN" => return Ok(Status::OK.and(-Self::NAN)),
1191             _ => {}
1192         }
1193
1194         // Handle a leading minus sign.
1195         let minus = s.starts_with('-');
1196         if minus || s.starts_with('+') {
1197             s = &s[1..];
1198             if s.is_empty() {
1199                 return Err(ParseError("String has no digits"));
1200             }
1201         }
1202
1203         // Adjust the rounding mode for the absolute value below.
1204         if minus {
1205             round = -round;
1206         }
1207
1208         let r = if s.starts_with("0x") || s.starts_with("0X") {
1209             s = &s[2..];
1210             if s.is_empty() {
1211                 return Err(ParseError("Invalid string"));
1212             }
1213             Self::from_hexadecimal_string(s, round)?
1214         } else {
1215             Self::from_decimal_string(s, round)?
1216         };
1217
1218         Ok(r.map(|r| if minus { -r } else { r }))
1219     }
1220
1221     fn to_bits(self) -> u128 {
1222         // Dispatch to semantics.
1223         S::to_bits(self)
1224     }
1225
1226     fn to_u128_r(self, width: usize, round: Round, is_exact: &mut bool) -> StatusAnd<u128> {
1227         // The result of trying to convert a number too large.
1228         let overflow = if self.sign {
1229             // Negative numbers cannot be represented as unsigned.
1230             0
1231         } else {
1232             // Largest unsigned integer of the given width.
1233             !0 >> (128 - width)
1234         };
1235
1236         *is_exact = false;
1237
1238         match self.category {
1239             Category::NaN => Status::INVALID_OP.and(0),
1240
1241             Category::Infinity => Status::INVALID_OP.and(overflow),
1242
1243             Category::Zero => {
1244                 // Negative zero can't be represented as an int.
1245                 *is_exact = !self.sign;
1246                 Status::OK.and(0)
1247             }
1248
1249             Category::Normal => {
1250                 let mut r = 0;
1251
1252                 // Step 1: place our absolute value, with any fraction truncated, in
1253                 // the destination.
1254                 let truncated_bits = if self.exp < 0 {
1255                     // Our absolute value is less than one; truncate everything.
1256                     // For exponent -1 the integer bit represents .5, look at that.
1257                     // For smaller exponents leftmost truncated bit is 0.
1258                     S::PRECISION - 1 + (-self.exp) as usize
1259                 } else {
1260                     // We want the most significant (exponent + 1) bits; the rest are
1261                     // truncated.
1262                     let bits = self.exp as usize + 1;
1263
1264                     // Hopelessly large in magnitude?
1265                     if bits > width {
1266                         return Status::INVALID_OP.and(overflow);
1267                     }
1268
1269                     if bits < S::PRECISION {
1270                         // We truncate (S::PRECISION - bits) bits.
1271                         r = self.sig[0] >> (S::PRECISION - bits);
1272                         S::PRECISION - bits
1273                     } else {
1274                         // We want at least as many bits as are available.
1275                         r = self.sig[0] << (bits - S::PRECISION);
1276                         0
1277                     }
1278                 };
1279
1280                 // Step 2: work out any lost fraction, and increment the absolute
1281                 // value if we would round away from zero.
1282                 let mut loss = Loss::ExactlyZero;
1283                 if truncated_bits > 0 {
1284                     loss = Loss::through_truncation(&self.sig, truncated_bits);
1285                     if loss != Loss::ExactlyZero
1286                         && self.round_away_from_zero(round, loss, truncated_bits)
1287                     {
1288                         r = r.wrapping_add(1);
1289                         if r == 0 {
1290                             return Status::INVALID_OP.and(overflow); // Overflow.
1291                         }
1292                     }
1293                 }
1294
1295                 // Step 3: check if we fit in the destination.
1296                 if r > overflow {
1297                     return Status::INVALID_OP.and(overflow);
1298                 }
1299
1300                 if loss == Loss::ExactlyZero {
1301                     *is_exact = true;
1302                     Status::OK.and(r)
1303                 } else {
1304                     Status::INEXACT.and(r)
1305                 }
1306             }
1307         }
1308     }
1309
1310     fn cmp_abs_normal(self, rhs: Self) -> Ordering {
1311         assert!(self.is_finite_non_zero());
1312         assert!(rhs.is_finite_non_zero());
1313
1314         // If exponents are equal, do an unsigned comparison of the significands.
1315         self.exp.cmp(&rhs.exp).then_with(|| sig::cmp(&self.sig, &rhs.sig))
1316     }
1317
1318     fn bitwise_eq(self, rhs: Self) -> bool {
1319         if self.category != rhs.category || self.sign != rhs.sign {
1320             return false;
1321         }
1322
1323         if self.category == Category::Zero || self.category == Category::Infinity {
1324             return true;
1325         }
1326
1327         if self.is_finite_non_zero() && self.exp != rhs.exp {
1328             return false;
1329         }
1330
1331         self.sig == rhs.sig
1332     }
1333
1334     fn is_negative(self) -> bool {
1335         self.sign
1336     }
1337
1338     fn is_denormal(self) -> bool {
1339         self.is_finite_non_zero()
1340             && self.exp == S::MIN_EXP
1341             && !sig::get_bit(&self.sig, S::PRECISION - 1)
1342     }
1343
1344     fn is_signaling(self) -> bool {
1345         // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
1346         // first bit of the trailing significand being 0.
1347         self.is_nan() && !sig::get_bit(&self.sig, S::QNAN_BIT)
1348     }
1349
1350     fn category(self) -> Category {
1351         self.category
1352     }
1353
1354     fn get_exact_inverse(self) -> Option<Self> {
1355         // Special floats and denormals have no exact inverse.
1356         if !self.is_finite_non_zero() {
1357             return None;
1358         }
1359
1360         // Check that the number is a power of two by making sure that only the
1361         // integer bit is set in the significand.
1362         if self.sig != [1 << (S::PRECISION - 1)] {
1363             return None;
1364         }
1365
1366         // Get the inverse.
1367         let mut reciprocal = Self::from_u128(1).value;
1368         let status;
1369         reciprocal = unpack!(status=, reciprocal / self);
1370         if status != Status::OK {
1371             return None;
1372         }
1373
1374         // Avoid multiplication with a denormal, it is not safe on all platforms and
1375         // may be slower than a normal division.
1376         if reciprocal.is_denormal() {
1377             return None;
1378         }
1379
1380         assert!(reciprocal.is_finite_non_zero());
1381         assert_eq!(reciprocal.sig, [1 << (S::PRECISION - 1)]);
1382
1383         Some(reciprocal)
1384     }
1385
1386     fn ilogb(mut self) -> ExpInt {
1387         if self.is_nan() {
1388             return IEK_NAN;
1389         }
1390         if self.is_zero() {
1391             return IEK_ZERO;
1392         }
1393         if self.is_infinite() {
1394             return IEK_INF;
1395         }
1396         if !self.is_denormal() {
1397             return self.exp;
1398         }
1399
1400         let sig_bits = (S::PRECISION - 1) as ExpInt;
1401         self.exp += sig_bits;
1402         self = self.normalize(Round::NearestTiesToEven, Loss::ExactlyZero).value;
1403         self.exp - sig_bits
1404     }
1405
1406     fn scalbn_r(mut self, exp: ExpInt, round: Round) -> Self {
1407         // If exp is wildly out-of-scale, simply adding it to self.exp will
1408         // overflow; clamp it to a safe range before adding, but ensure that the range
1409         // is large enough that the clamp does not change the result. The range we
1410         // need to support is the difference between the largest possible exponent and
1411         // the normalized exponent of half the smallest denormal.
1412
1413         let sig_bits = (S::PRECISION - 1) as i32;
1414         let max_change = S::MAX_EXP as i32 - (S::MIN_EXP as i32 - sig_bits) + 1;
1415
1416         // Clamp to one past the range ends to let normalize handle overflow.
1417         let exp_change = cmp::min(cmp::max(exp as i32, -max_change - 1), max_change);
1418         self.exp = self.exp.saturating_add(exp_change as ExpInt);
1419         self = self.normalize(round, Loss::ExactlyZero).value;
1420         if self.is_nan() {
1421             sig::set_bit(&mut self.sig, S::QNAN_BIT);
1422         }
1423         self
1424     }
1425
1426     fn frexp_r(mut self, exp: &mut ExpInt, round: Round) -> Self {
1427         *exp = self.ilogb();
1428
1429         // Quiet signalling nans.
1430         if *exp == IEK_NAN {
1431             sig::set_bit(&mut self.sig, S::QNAN_BIT);
1432             return self;
1433         }
1434
1435         if *exp == IEK_INF {
1436             return self;
1437         }
1438
1439         // 1 is added because frexp is defined to return a normalized fraction in
1440         // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
1441         if *exp == IEK_ZERO {
1442             *exp = 0;
1443         } else {
1444             *exp += 1;
1445         }
1446         self.scalbn_r(-*exp, round)
1447     }
1448 }
1449
1450 impl<S: Semantics, T: Semantics> FloatConvert<IeeeFloat<T>> for IeeeFloat<S> {
1451     fn convert_r(self, round: Round, loses_info: &mut bool) -> StatusAnd<IeeeFloat<T>> {
1452         let mut r = IeeeFloat {
1453             sig: self.sig,
1454             exp: self.exp,
1455             category: self.category,
1456             sign: self.sign,
1457             marker: PhantomData,
1458         };
1459
1460         // x86 has some unusual NaNs which cannot be represented in any other
1461         // format; note them here.
1462         fn is_x87_double_extended<S: Semantics>() -> bool {
1463             S::QNAN_SIGNIFICAND == X87DoubleExtendedS::QNAN_SIGNIFICAND
1464         }
1465         let x87_special_nan = is_x87_double_extended::<S>()
1466             && !is_x87_double_extended::<T>()
1467             && r.category == Category::NaN
1468             && (r.sig[0] & S::QNAN_SIGNIFICAND) != S::QNAN_SIGNIFICAND;
1469
1470         // If this is a truncation of a denormal number, and the target semantics
1471         // has larger exponent range than the source semantics (this can happen
1472         // when truncating from PowerPC double-double to double format), the
1473         // right shift could lose result mantissa bits. Adjust exponent instead
1474         // of performing excessive shift.
1475         let mut shift = T::PRECISION as ExpInt - S::PRECISION as ExpInt;
1476         if shift < 0 && r.is_finite_non_zero() {
1477             let mut exp_change = sig::omsb(&r.sig) as ExpInt - S::PRECISION as ExpInt;
1478             if r.exp + exp_change < T::MIN_EXP {
1479                 exp_change = T::MIN_EXP - r.exp;
1480             }
1481             if exp_change < shift {
1482                 exp_change = shift;
1483             }
1484             if exp_change < 0 {
1485                 shift -= exp_change;
1486                 r.exp += exp_change;
1487             }
1488         }
1489
1490         // If this is a truncation, perform the shift.
1491         let loss = if shift < 0 && (r.is_finite_non_zero() || r.category == Category::NaN) {
1492             sig::shift_right(&mut r.sig, &mut 0, -shift as usize)
1493         } else {
1494             Loss::ExactlyZero
1495         };
1496
1497         // If this is an extension, perform the shift.
1498         if shift > 0 && (r.is_finite_non_zero() || r.category == Category::NaN) {
1499             sig::shift_left(&mut r.sig, &mut 0, shift as usize);
1500         }
1501
1502         let status;
1503         if r.is_finite_non_zero() {
1504             r = unpack!(status=, r.normalize(round, loss));
1505             *loses_info = status != Status::OK;
1506         } else if r.category == Category::NaN {
1507             *loses_info = loss != Loss::ExactlyZero || x87_special_nan;
1508
1509             // For x87 extended precision, we want to make a NaN, not a special NaN if
1510             // the input wasn't special either.
1511             if !x87_special_nan && is_x87_double_extended::<T>() {
1512                 sig::set_bit(&mut r.sig, T::PRECISION - 1);
1513             }
1514
1515             // Convert of sNaN creates qNaN and raises an exception (invalid op).
1516             // This also guarantees that a sNaN does not become Inf on a truncation
1517             // that loses all payload bits.
1518             if self.is_signaling() {
1519                 // Quiet signaling NaN.
1520                 sig::set_bit(&mut r.sig, T::QNAN_BIT);
1521                 status = Status::INVALID_OP;
1522             } else {
1523                 status = Status::OK;
1524             }
1525         } else {
1526             *loses_info = false;
1527             status = Status::OK;
1528         }
1529
1530         status.and(r)
1531     }
1532 }
1533
1534 impl<S: Semantics> IeeeFloat<S> {
1535     /// Handle positive overflow. We either return infinity or
1536     /// the largest finite number. For negative overflow,
1537     /// negate the `round` argument before calling.
1538     fn overflow_result(round: Round) -> StatusAnd<Self> {
1539         match round {
1540             // Infinity?
1541             Round::NearestTiesToEven | Round::NearestTiesToAway | Round::TowardPositive => {
1542                 (Status::OVERFLOW | Status::INEXACT).and(Self::INFINITY)
1543             }
1544             // Otherwise we become the largest finite number.
1545             Round::TowardNegative | Round::TowardZero => Status::INEXACT.and(Self::largest()),
1546         }
1547     }
1548
1549     /// Returns `true` if, when truncating the current number, with `bit` the
1550     /// new LSB, with the given lost fraction and rounding mode, the result
1551     /// would need to be rounded away from zero (i.e., by increasing the
1552     /// signficand). This routine must work for `Category::Zero` of both signs, and
1553     /// `Category::Normal` numbers.
1554     fn round_away_from_zero(&self, round: Round, loss: Loss, bit: usize) -> bool {
1555         // NaNs and infinities should not have lost fractions.
1556         assert!(self.is_finite_non_zero() || self.is_zero());
1557
1558         // Current callers never pass this so we don't handle it.
1559         assert_ne!(loss, Loss::ExactlyZero);
1560
1561         match round {
1562             Round::NearestTiesToAway => loss == Loss::ExactlyHalf || loss == Loss::MoreThanHalf,
1563             Round::NearestTiesToEven => {
1564                 if loss == Loss::MoreThanHalf {
1565                     return true;
1566                 }
1567
1568                 // Our zeros don't have a significand to test.
1569                 if loss == Loss::ExactlyHalf && self.category != Category::Zero {
1570                     return sig::get_bit(&self.sig, bit);
1571                 }
1572
1573                 false
1574             }
1575             Round::TowardZero => false,
1576             Round::TowardPositive => !self.sign,
1577             Round::TowardNegative => self.sign,
1578         }
1579     }
1580
1581     fn normalize(mut self, round: Round, mut loss: Loss) -> StatusAnd<Self> {
1582         if !self.is_finite_non_zero() {
1583             return Status::OK.and(self);
1584         }
1585
1586         // Before rounding normalize the exponent of Category::Normal numbers.
1587         let mut omsb = sig::omsb(&self.sig);
1588
1589         if omsb > 0 {
1590             // OMSB is numbered from 1. We want to place it in the integer
1591             // bit numbered PRECISION if possible, with a compensating change in
1592             // the exponent.
1593             let mut final_exp = self.exp.saturating_add(omsb as ExpInt - S::PRECISION as ExpInt);
1594
1595             // If the resulting exponent is too high, overflow according to
1596             // the rounding mode.
1597             if final_exp > S::MAX_EXP {
1598                 let round = if self.sign { -round } else { round };
1599                 return Self::overflow_result(round).map(|r| r.copy_sign(self));
1600             }
1601
1602             // Subnormal numbers have exponent MIN_EXP, and their MSB
1603             // is forced based on that.
1604             if final_exp < S::MIN_EXP {
1605                 final_exp = S::MIN_EXP;
1606             }
1607
1608             // Shifting left is easy as we don't lose precision.
1609             if final_exp < self.exp {
1610                 assert_eq!(loss, Loss::ExactlyZero);
1611
1612                 let exp_change = (self.exp - final_exp) as usize;
1613                 sig::shift_left(&mut self.sig, &mut self.exp, exp_change);
1614
1615                 return Status::OK.and(self);
1616             }
1617
1618             // Shift right and capture any new lost fraction.
1619             if final_exp > self.exp {
1620                 let exp_change = (final_exp - self.exp) as usize;
1621                 loss = sig::shift_right(&mut self.sig, &mut self.exp, exp_change).combine(loss);
1622
1623                 // Keep OMSB up-to-date.
1624                 omsb = omsb.saturating_sub(exp_change);
1625             }
1626         }
1627
1628         // Now round the number according to round given the lost
1629         // fraction.
1630
1631         // As specified in IEEE 754, since we do not trap we do not report
1632         // underflow for exact results.
1633         if loss == Loss::ExactlyZero {
1634             // Canonicalize zeros.
1635             if omsb == 0 {
1636                 self.category = Category::Zero;
1637             }
1638
1639             return Status::OK.and(self);
1640         }
1641
1642         // Increment the significand if we're rounding away from zero.
1643         if self.round_away_from_zero(round, loss, 0) {
1644             if omsb == 0 {
1645                 self.exp = S::MIN_EXP;
1646             }
1647
1648             // We should never overflow.
1649             assert_eq!(sig::increment(&mut self.sig), 0);
1650             omsb = sig::omsb(&self.sig);
1651
1652             // Did the significand increment overflow?
1653             if omsb == S::PRECISION + 1 {
1654                 // Renormalize by incrementing the exponent and shifting our
1655                 // significand right one. However if we already have the
1656                 // maximum exponent we overflow to infinity.
1657                 if self.exp == S::MAX_EXP {
1658                     self.category = Category::Infinity;
1659
1660                     return (Status::OVERFLOW | Status::INEXACT).and(self);
1661                 }
1662
1663                 let _: Loss = sig::shift_right(&mut self.sig, &mut self.exp, 1);
1664
1665                 return Status::INEXACT.and(self);
1666             }
1667         }
1668
1669         // The normal case - we were and are not denormal, and any
1670         // significand increment above didn't overflow.
1671         if omsb == S::PRECISION {
1672             return Status::INEXACT.and(self);
1673         }
1674
1675         // We have a non-zero denormal.
1676         assert!(omsb < S::PRECISION);
1677
1678         // Canonicalize zeros.
1679         if omsb == 0 {
1680             self.category = Category::Zero;
1681         }
1682
1683         // The Category::Zero case is a denormal that underflowed to zero.
1684         (Status::UNDERFLOW | Status::INEXACT).and(self)
1685     }
1686
1687     fn from_hexadecimal_string(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError> {
1688         let mut r = IeeeFloat {
1689             sig: [0],
1690             exp: 0,
1691             category: Category::Normal,
1692             sign: false,
1693             marker: PhantomData,
1694         };
1695
1696         let mut any_digits = false;
1697         let mut has_exp = false;
1698         let mut bit_pos = LIMB_BITS as isize;
1699         let mut loss = None;
1700
1701         // Without leading or trailing zeros, irrespective of the dot.
1702         let mut first_sig_digit = None;
1703         let mut dot = s.len();
1704
1705         for (p, c) in s.char_indices() {
1706             // Skip leading zeros and any (hexa)decimal point.
1707             if c == '.' {
1708                 if dot != s.len() {
1709                     return Err(ParseError("String contains multiple dots"));
1710                 }
1711                 dot = p;
1712             } else if let Some(hex_value) = c.to_digit(16) {
1713                 any_digits = true;
1714
1715                 if first_sig_digit.is_none() {
1716                     if hex_value == 0 {
1717                         continue;
1718                     }
1719                     first_sig_digit = Some(p);
1720                 }
1721
1722                 // Store the number while we have space.
1723                 bit_pos -= 4;
1724                 if bit_pos >= 0 {
1725                     r.sig[0] |= (hex_value as Limb) << bit_pos;
1726                 // If zero or one-half (the hexadecimal digit 8) are followed
1727                 // by non-zero, they're a little more than zero or one-half.
1728                 } else if let Some(ref mut loss) = loss {
1729                     if hex_value != 0 {
1730                         if *loss == Loss::ExactlyZero {
1731                             *loss = Loss::LessThanHalf;
1732                         }
1733                         if *loss == Loss::ExactlyHalf {
1734                             *loss = Loss::MoreThanHalf;
1735                         }
1736                     }
1737                 } else {
1738                     loss = Some(match hex_value {
1739                         0 => Loss::ExactlyZero,
1740                         1..=7 => Loss::LessThanHalf,
1741                         8 => Loss::ExactlyHalf,
1742                         9..=15 => Loss::MoreThanHalf,
1743                         _ => unreachable!(),
1744                     });
1745                 }
1746             } else if c == 'p' || c == 'P' {
1747                 if !any_digits {
1748                     return Err(ParseError("Significand has no digits"));
1749                 }
1750
1751                 if dot == s.len() {
1752                     dot = p;
1753                 }
1754
1755                 let mut chars = s[p + 1..].chars().peekable();
1756
1757                 // Adjust for the given exponent.
1758                 let exp_minus = chars.peek() == Some(&'-');
1759                 if exp_minus || chars.peek() == Some(&'+') {
1760                     chars.next();
1761                 }
1762
1763                 for c in chars {
1764                     if let Some(value) = c.to_digit(10) {
1765                         has_exp = true;
1766                         r.exp = r.exp.saturating_mul(10).saturating_add(value as ExpInt);
1767                     } else {
1768                         return Err(ParseError("Invalid character in exponent"));
1769                     }
1770                 }
1771                 if !has_exp {
1772                     return Err(ParseError("Exponent has no digits"));
1773                 }
1774
1775                 if exp_minus {
1776                     r.exp = -r.exp;
1777                 }
1778
1779                 break;
1780             } else {
1781                 return Err(ParseError("Invalid character in significand"));
1782             }
1783         }
1784         if !any_digits {
1785             return Err(ParseError("Significand has no digits"));
1786         }
1787
1788         // Hex floats require an exponent but not a hexadecimal point.
1789         if !has_exp {
1790             return Err(ParseError("Hex strings require an exponent"));
1791         }
1792
1793         // Ignore the exponent if we are zero.
1794         let first_sig_digit = match first_sig_digit {
1795             Some(p) => p,
1796             None => return Ok(Status::OK.and(Self::ZERO)),
1797         };
1798
1799         // Calculate the exponent adjustment implicit in the number of
1800         // significant digits and adjust for writing the significand starting
1801         // at the most significant nibble.
1802         let exp_adjustment = if dot > first_sig_digit {
1803             ExpInt::try_from(dot - first_sig_digit).unwrap()
1804         } else {
1805             -ExpInt::try_from(first_sig_digit - dot - 1).unwrap()
1806         };
1807         let exp_adjustment = exp_adjustment
1808             .saturating_mul(4)
1809             .saturating_sub(1)
1810             .saturating_add(S::PRECISION as ExpInt)
1811             .saturating_sub(LIMB_BITS as ExpInt);
1812         r.exp = r.exp.saturating_add(exp_adjustment);
1813
1814         Ok(r.normalize(round, loss.unwrap_or(Loss::ExactlyZero)))
1815     }
1816
1817     fn from_decimal_string(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError> {
1818         // Given a normal decimal floating point number of the form
1819         //
1820         //   dddd.dddd[eE][+-]ddd
1821         //
1822         // where the decimal point and exponent are optional, fill out the
1823         // variables below. Exponent is appropriate if the significand is
1824         // treated as an integer, and normalized_exp if the significand
1825         // is taken to have the decimal point after a single leading
1826         // non-zero digit.
1827         //
1828         // If the value is zero, first_sig_digit is None.
1829
1830         let mut any_digits = false;
1831         let mut dec_exp = 0i32;
1832
1833         // Without leading or trailing zeros, irrespective of the dot.
1834         let mut first_sig_digit = None;
1835         let mut last_sig_digit = 0;
1836         let mut dot = s.len();
1837
1838         for (p, c) in s.char_indices() {
1839             if c == '.' {
1840                 if dot != s.len() {
1841                     return Err(ParseError("String contains multiple dots"));
1842                 }
1843                 dot = p;
1844             } else if let Some(dec_value) = c.to_digit(10) {
1845                 any_digits = true;
1846
1847                 if dec_value != 0 {
1848                     if first_sig_digit.is_none() {
1849                         first_sig_digit = Some(p);
1850                     }
1851                     last_sig_digit = p;
1852                 }
1853             } else if c == 'e' || c == 'E' {
1854                 if !any_digits {
1855                     return Err(ParseError("Significand has no digits"));
1856                 }
1857
1858                 if dot == s.len() {
1859                     dot = p;
1860                 }
1861
1862                 let mut chars = s[p + 1..].chars().peekable();
1863
1864                 // Adjust for the given exponent.
1865                 let exp_minus = chars.peek() == Some(&'-');
1866                 if exp_minus || chars.peek() == Some(&'+') {
1867                     chars.next();
1868                 }
1869
1870                 any_digits = false;
1871                 for c in chars {
1872                     if let Some(value) = c.to_digit(10) {
1873                         any_digits = true;
1874                         dec_exp = dec_exp.saturating_mul(10).saturating_add(value as i32);
1875                     } else {
1876                         return Err(ParseError("Invalid character in exponent"));
1877                     }
1878                 }
1879                 if !any_digits {
1880                     return Err(ParseError("Exponent has no digits"));
1881                 }
1882
1883                 if exp_minus {
1884                     dec_exp = -dec_exp;
1885                 }
1886
1887                 break;
1888             } else {
1889                 return Err(ParseError("Invalid character in significand"));
1890             }
1891         }
1892         if !any_digits {
1893             return Err(ParseError("Significand has no digits"));
1894         }
1895
1896         // Test if we have a zero number allowing for non-zero exponents.
1897         let first_sig_digit = match first_sig_digit {
1898             Some(p) => p,
1899             None => return Ok(Status::OK.and(Self::ZERO)),
1900         };
1901
1902         // Adjust the exponents for any decimal point.
1903         if dot > last_sig_digit {
1904             dec_exp = dec_exp.saturating_add((dot - last_sig_digit - 1) as i32);
1905         } else {
1906             dec_exp = dec_exp.saturating_sub((last_sig_digit - dot) as i32);
1907         }
1908         let significand_digits = last_sig_digit - first_sig_digit + 1
1909             - (dot > first_sig_digit && dot < last_sig_digit) as usize;
1910         let normalized_exp = dec_exp.saturating_add(significand_digits as i32 - 1);
1911
1912         // Handle the cases where exponents are obviously too large or too
1913         // small. Writing L for log 10 / log 2, a number d.ddddd*10^dec_exp
1914         // definitely overflows if
1915         //
1916         //       (dec_exp - 1) * L >= MAX_EXP
1917         //
1918         // and definitely underflows to zero where
1919         //
1920         //       (dec_exp + 1) * L <= MIN_EXP - PRECISION
1921         //
1922         // With integer arithmetic the tightest bounds for L are
1923         //
1924         //       93/28 < L < 196/59            [ numerator <= 256 ]
1925         //       42039/12655 < L < 28738/8651  [ numerator <= 65536 ]
1926
1927         // Check for MAX_EXP.
1928         if normalized_exp.saturating_sub(1).saturating_mul(42039) >= 12655 * S::MAX_EXP as i32 {
1929             // Overflow and round.
1930             return Ok(Self::overflow_result(round));
1931         }
1932
1933         // Check for MIN_EXP.
1934         if normalized_exp.saturating_add(1).saturating_mul(28738)
1935             <= 8651 * (S::MIN_EXP as i32 - S::PRECISION as i32)
1936         {
1937             // Underflow to zero and round.
1938             let r =
1939                 if round == Round::TowardPositive { IeeeFloat::SMALLEST } else { IeeeFloat::ZERO };
1940             return Ok((Status::UNDERFLOW | Status::INEXACT).and(r));
1941         }
1942
1943         // A tight upper bound on number of bits required to hold an
1944         // N-digit decimal integer is N * 196 / 59. Allocate enough space
1945         // to hold the full significand, and an extra limb required by
1946         // tcMultiplyPart.
1947         let max_limbs = limbs_for_bits(1 + 196 * significand_digits / 59);
1948         let mut dec_sig: SmallVec<[Limb; 1]> = SmallVec::with_capacity(max_limbs);
1949
1950         // Convert to binary efficiently - we do almost all multiplication
1951         // in a Limb. When this would overflow do we do a single
1952         // bignum multiplication, and then revert again to multiplication
1953         // in a Limb.
1954         let mut chars = s[first_sig_digit..=last_sig_digit].chars();
1955         loop {
1956             let mut val = 0;
1957             let mut multiplier = 1;
1958
1959             loop {
1960                 let dec_value = match chars.next() {
1961                     Some('.') => continue,
1962                     Some(c) => c.to_digit(10).unwrap(),
1963                     None => break,
1964                 };
1965
1966                 multiplier *= 10;
1967                 val = val * 10 + dec_value as Limb;
1968
1969                 // The maximum number that can be multiplied by ten with any
1970                 // digit added without overflowing a Limb.
1971                 if multiplier > (!0 - 9) / 10 {
1972                     break;
1973                 }
1974             }
1975
1976             // If we've consumed no digits, we're done.
1977             if multiplier == 1 {
1978                 break;
1979             }
1980
1981             // Multiply out the current limb.
1982             let mut carry = val;
1983             for x in &mut dec_sig {
1984                 let [low, mut high] = sig::widening_mul(*x, multiplier);
1985
1986                 // Now add carry.
1987                 let (low, overflow) = low.overflowing_add(carry);
1988                 high += overflow as Limb;
1989
1990                 *x = low;
1991                 carry = high;
1992             }
1993
1994             // If we had carry, we need another limb (likely but not guaranteed).
1995             if carry > 0 {
1996                 dec_sig.push(carry);
1997             }
1998         }
1999
2000         // Calculate pow(5, abs(dec_exp)) into `pow5_full`.
2001         // The *_calc Vec's are reused scratch space, as an optimization.
2002         let (pow5_full, mut pow5_calc, mut sig_calc, mut sig_scratch_calc) = {
2003             let mut power = dec_exp.abs() as usize;
2004
2005             const FIRST_EIGHT_POWERS: [Limb; 8] = [1, 5, 25, 125, 625, 3125, 15625, 78125];
2006
2007             let mut p5_scratch = smallvec![];
2008             let mut p5: SmallVec<[Limb; 1]> = smallvec![FIRST_EIGHT_POWERS[4]];
2009
2010             let mut r_scratch = smallvec![];
2011             let mut r: SmallVec<[Limb; 1]> = smallvec![FIRST_EIGHT_POWERS[power & 7]];
2012             power >>= 3;
2013
2014             while power > 0 {
2015                 // Calculate pow(5,pow(2,n+3)).
2016                 p5_scratch.resize(p5.len() * 2, 0);
2017                 let _: Loss = sig::mul(&mut p5_scratch, &mut 0, &p5, &p5, p5.len() * 2 * LIMB_BITS);
2018                 while p5_scratch.last() == Some(&0) {
2019                     p5_scratch.pop();
2020                 }
2021                 mem::swap(&mut p5, &mut p5_scratch);
2022
2023                 if power & 1 != 0 {
2024                     r_scratch.resize(r.len() + p5.len(), 0);
2025                     let _: Loss =
2026                         sig::mul(&mut r_scratch, &mut 0, &r, &p5, (r.len() + p5.len()) * LIMB_BITS);
2027                     while r_scratch.last() == Some(&0) {
2028                         r_scratch.pop();
2029                     }
2030                     mem::swap(&mut r, &mut r_scratch);
2031                 }
2032
2033                 power >>= 1;
2034             }
2035
2036             (r, r_scratch, p5, p5_scratch)
2037         };
2038
2039         // Attempt dec_sig * 10^dec_exp with increasing precision.
2040         let mut attempt = 0;
2041         loop {
2042             let calc_precision = (LIMB_BITS << attempt) - 1;
2043             attempt += 1;
2044
2045             let calc_normal_from_limbs = |sig: &mut SmallVec<[Limb; 1]>,
2046                                           limbs: &[Limb]|
2047              -> StatusAnd<ExpInt> {
2048                 sig.resize(limbs_for_bits(calc_precision), 0);
2049                 let (mut loss, mut exp) = sig::from_limbs(sig, limbs, calc_precision);
2050
2051                 // Before rounding normalize the exponent of Category::Normal numbers.
2052                 let mut omsb = sig::omsb(sig);
2053
2054                 assert_ne!(omsb, 0);
2055
2056                 // OMSB is numbered from 1. We want to place it in the integer
2057                 // bit numbered PRECISION if possible, with a compensating change in
2058                 // the exponent.
2059                 let final_exp = exp.saturating_add(omsb as ExpInt - calc_precision as ExpInt);
2060
2061                 // Shifting left is easy as we don't lose precision.
2062                 if final_exp < exp {
2063                     assert_eq!(loss, Loss::ExactlyZero);
2064
2065                     let exp_change = (exp - final_exp) as usize;
2066                     sig::shift_left(sig, &mut exp, exp_change);
2067
2068                     return Status::OK.and(exp);
2069                 }
2070
2071                 // Shift right and capture any new lost fraction.
2072                 if final_exp > exp {
2073                     let exp_change = (final_exp - exp) as usize;
2074                     loss = sig::shift_right(sig, &mut exp, exp_change).combine(loss);
2075
2076                     // Keep OMSB up-to-date.
2077                     omsb = omsb.saturating_sub(exp_change);
2078                 }
2079
2080                 assert_eq!(omsb, calc_precision);
2081
2082                 // Now round the number according to round given the lost
2083                 // fraction.
2084
2085                 // As specified in IEEE 754, since we do not trap we do not report
2086                 // underflow for exact results.
2087                 if loss == Loss::ExactlyZero {
2088                     return Status::OK.and(exp);
2089                 }
2090
2091                 // Increment the significand if we're rounding away from zero.
2092                 if loss == Loss::MoreThanHalf || loss == Loss::ExactlyHalf && sig::get_bit(sig, 0) {
2093                     // We should never overflow.
2094                     assert_eq!(sig::increment(sig), 0);
2095                     omsb = sig::omsb(sig);
2096
2097                     // Did the significand increment overflow?
2098                     if omsb == calc_precision + 1 {
2099                         let _: Loss = sig::shift_right(sig, &mut exp, 1);
2100
2101                         return Status::INEXACT.and(exp);
2102                     }
2103                 }
2104
2105                 // The normal case - we were and are not denormal, and any
2106                 // significand increment above didn't overflow.
2107                 Status::INEXACT.and(exp)
2108             };
2109
2110             let status;
2111             let mut exp = unpack!(status=,
2112                 calc_normal_from_limbs(&mut sig_calc, &dec_sig));
2113             let pow5_status;
2114             let pow5_exp = unpack!(pow5_status=,
2115                 calc_normal_from_limbs(&mut pow5_calc, &pow5_full));
2116
2117             // Add dec_exp, as 10^n = 5^n * 2^n.
2118             exp += dec_exp as ExpInt;
2119
2120             let mut used_bits = S::PRECISION;
2121             let mut truncated_bits = calc_precision - used_bits;
2122
2123             let half_ulp_err1 = (status != Status::OK) as Limb;
2124             let (calc_loss, half_ulp_err2);
2125             if dec_exp >= 0 {
2126                 exp += pow5_exp;
2127
2128                 sig_scratch_calc.resize(sig_calc.len() + pow5_calc.len(), 0);
2129                 calc_loss = sig::mul(
2130                     &mut sig_scratch_calc,
2131                     &mut exp,
2132                     &sig_calc,
2133                     &pow5_calc,
2134                     calc_precision,
2135                 );
2136                 mem::swap(&mut sig_calc, &mut sig_scratch_calc);
2137
2138                 half_ulp_err2 = (pow5_status != Status::OK) as Limb;
2139             } else {
2140                 exp -= pow5_exp;
2141
2142                 sig_scratch_calc.resize(sig_calc.len(), 0);
2143                 calc_loss = sig::div(
2144                     &mut sig_scratch_calc,
2145                     &mut exp,
2146                     &mut sig_calc,
2147                     &mut pow5_calc,
2148                     calc_precision,
2149                 );
2150                 mem::swap(&mut sig_calc, &mut sig_scratch_calc);
2151
2152                 // Denormal numbers have less precision.
2153                 if exp < S::MIN_EXP {
2154                     truncated_bits += (S::MIN_EXP - exp) as usize;
2155                     used_bits = calc_precision.saturating_sub(truncated_bits);
2156                 }
2157                 // Extra half-ulp lost in reciprocal of exponent.
2158                 half_ulp_err2 =
2159                     2 * (pow5_status != Status::OK || calc_loss != Loss::ExactlyZero) as Limb;
2160             }
2161
2162             // Both sig::mul and sig::div return the
2163             // result with the integer bit set.
2164             assert!(sig::get_bit(&sig_calc, calc_precision - 1));
2165
2166             // The error from the true value, in half-ulps, on multiplying two
2167             // floating point numbers, which differ from the value they
2168             // approximate by at most half_ulp_err1 and half_ulp_err2 half-ulps, is strictly less
2169             // than the returned value.
2170             //
2171             // See "How to Read Floating Point Numbers Accurately" by William D Clinger.
2172             assert!(half_ulp_err1 < 2 || half_ulp_err2 < 2 || (half_ulp_err1 + half_ulp_err2 < 8));
2173
2174             let inexact = (calc_loss != Loss::ExactlyZero) as Limb;
2175             let half_ulp_err = if half_ulp_err1 + half_ulp_err2 == 0 {
2176                 inexact * 2 // <= inexact half-ulps.
2177             } else {
2178                 inexact + 2 * (half_ulp_err1 + half_ulp_err2)
2179             };
2180
2181             let ulps_from_boundary = {
2182                 let bits = calc_precision - used_bits - 1;
2183
2184                 let i = bits / LIMB_BITS;
2185                 let limb = sig_calc[i] & (!0 >> (LIMB_BITS - 1 - bits % LIMB_BITS));
2186                 let boundary = match round {
2187                     Round::NearestTiesToEven | Round::NearestTiesToAway => 1 << (bits % LIMB_BITS),
2188                     _ => 0,
2189                 };
2190                 if i == 0 {
2191                     let delta = limb.wrapping_sub(boundary);
2192                     cmp::min(delta, delta.wrapping_neg())
2193                 } else if limb == boundary {
2194                     if !sig::is_all_zeros(&sig_calc[1..i]) {
2195                         !0 // A lot.
2196                     } else {
2197                         sig_calc[0]
2198                     }
2199                 } else if limb == boundary.wrapping_sub(1) {
2200                     if sig_calc[1..i].iter().any(|&x| x.wrapping_neg() != 1) {
2201                         !0 // A lot.
2202                     } else {
2203                         sig_calc[0].wrapping_neg()
2204                     }
2205                 } else {
2206                     !0 // A lot.
2207                 }
2208             };
2209
2210             // Are we guaranteed to round correctly if we truncate?
2211             if ulps_from_boundary.saturating_mul(2) >= half_ulp_err {
2212                 let mut r = IeeeFloat {
2213                     sig: [0],
2214                     exp,
2215                     category: Category::Normal,
2216                     sign: false,
2217                     marker: PhantomData,
2218                 };
2219                 sig::extract(&mut r.sig, &sig_calc, used_bits, calc_precision - used_bits);
2220                 // If we extracted less bits above we must adjust our exponent
2221                 // to compensate for the implicit right shift.
2222                 r.exp += (S::PRECISION - used_bits) as ExpInt;
2223                 let loss = Loss::through_truncation(&sig_calc, truncated_bits);
2224                 return Ok(r.normalize(round, loss));
2225             }
2226         }
2227     }
2228 }
2229
2230 impl Loss {
2231     /// Combine the effect of two lost fractions.
2232     fn combine(self, less_significant: Loss) -> Loss {
2233         let mut more_significant = self;
2234         if less_significant != Loss::ExactlyZero {
2235             if more_significant == Loss::ExactlyZero {
2236                 more_significant = Loss::LessThanHalf;
2237             } else if more_significant == Loss::ExactlyHalf {
2238                 more_significant = Loss::MoreThanHalf;
2239             }
2240         }
2241
2242         more_significant
2243     }
2244
2245     /// Returns the fraction lost were a bignum truncated losing the least
2246     /// significant `bits` bits.
2247     fn through_truncation(limbs: &[Limb], bits: usize) -> Loss {
2248         if bits == 0 {
2249             return Loss::ExactlyZero;
2250         }
2251
2252         let half_bit = bits - 1;
2253         let half_limb = half_bit / LIMB_BITS;
2254         let (half_limb, rest) = if half_limb < limbs.len() {
2255             (limbs[half_limb], &limbs[..half_limb])
2256         } else {
2257             (0, limbs)
2258         };
2259         let half = 1 << (half_bit % LIMB_BITS);
2260         let has_half = half_limb & half != 0;
2261         let has_rest = half_limb & (half - 1) != 0 || !sig::is_all_zeros(rest);
2262
2263         match (has_half, has_rest) {
2264             (false, false) => Loss::ExactlyZero,
2265             (false, true) => Loss::LessThanHalf,
2266             (true, false) => Loss::ExactlyHalf,
2267             (true, true) => Loss::MoreThanHalf,
2268         }
2269     }
2270 }
2271
2272 /// Implementation details of IeeeFloat significands, such as big integer arithmetic.
2273 /// As a rule of thumb, no functions in this module should dynamically allocate.
2274 mod sig {
2275     use super::{limbs_for_bits, ExpInt, Limb, Loss, LIMB_BITS};
2276     use core::cmp::Ordering;
2277     use core::iter;
2278     use core::mem;
2279
2280     pub(super) fn is_all_zeros(limbs: &[Limb]) -> bool {
2281         limbs.iter().all(|&l| l == 0)
2282     }
2283
2284     /// One, not zero, based LSB. That is, returns 0 for a zeroed significand.
2285     pub(super) fn olsb(limbs: &[Limb]) -> usize {
2286         limbs
2287             .iter()
2288             .enumerate()
2289             .find(|(_, &limb)| limb != 0)
2290             .map_or(0, |(i, limb)| i * LIMB_BITS + limb.trailing_zeros() as usize + 1)
2291     }
2292
2293     /// One, not zero, based MSB. That is, returns 0 for a zeroed significand.
2294     pub(super) fn omsb(limbs: &[Limb]) -> usize {
2295         limbs
2296             .iter()
2297             .enumerate()
2298             .rfind(|(_, &limb)| limb != 0)
2299             .map_or(0, |(i, limb)| (i + 1) * LIMB_BITS - limb.leading_zeros() as usize)
2300     }
2301
2302     /// Comparison (unsigned) of two significands.
2303     pub(super) fn cmp(a: &[Limb], b: &[Limb]) -> Ordering {
2304         assert_eq!(a.len(), b.len());
2305         for (a, b) in a.iter().zip(b).rev() {
2306             match a.cmp(b) {
2307                 Ordering::Equal => {}
2308                 o => return o,
2309             }
2310         }
2311
2312         Ordering::Equal
2313     }
2314
2315     /// Extracts the given bit.
2316     pub(super) fn get_bit(limbs: &[Limb], bit: usize) -> bool {
2317         limbs[bit / LIMB_BITS] & (1 << (bit % LIMB_BITS)) != 0
2318     }
2319
2320     /// Sets the given bit.
2321     pub(super) fn set_bit(limbs: &mut [Limb], bit: usize) {
2322         limbs[bit / LIMB_BITS] |= 1 << (bit % LIMB_BITS);
2323     }
2324
2325     /// Clear the given bit.
2326     pub(super) fn clear_bit(limbs: &mut [Limb], bit: usize) {
2327         limbs[bit / LIMB_BITS] &= !(1 << (bit % LIMB_BITS));
2328     }
2329
2330     /// Shifts `dst` left `bits` bits, subtract `bits` from its exponent.
2331     pub(super) fn shift_left(dst: &mut [Limb], exp: &mut ExpInt, bits: usize) {
2332         if bits > 0 {
2333             // Our exponent should not underflow.
2334             *exp = exp.checked_sub(bits as ExpInt).unwrap();
2335
2336             // Jump is the inter-limb jump; shift is the intra-limb shift.
2337             let jump = bits / LIMB_BITS;
2338             let shift = bits % LIMB_BITS;
2339
2340             for i in (0..dst.len()).rev() {
2341                 let mut limb;
2342
2343                 if i < jump {
2344                     limb = 0;
2345                 } else {
2346                     // dst[i] comes from the two limbs src[i - jump] and, if we have
2347                     // an intra-limb shift, src[i - jump - 1].
2348                     limb = dst[i - jump];
2349                     if shift > 0 {
2350                         limb <<= shift;
2351                         if i > jump {
2352                             limb |= dst[i - jump - 1] >> (LIMB_BITS - shift);
2353                         }
2354                     }
2355                 }
2356
2357                 dst[i] = limb;
2358             }
2359         }
2360     }
2361
2362     /// Shifts `dst` right `bits` bits noting lost fraction.
2363     pub(super) fn shift_right(dst: &mut [Limb], exp: &mut ExpInt, bits: usize) -> Loss {
2364         let loss = Loss::through_truncation(dst, bits);
2365
2366         if bits > 0 {
2367             // Our exponent should not overflow.
2368             *exp = exp.checked_add(bits as ExpInt).unwrap();
2369
2370             // Jump is the inter-limb jump; shift is the intra-limb shift.
2371             let jump = bits / LIMB_BITS;
2372             let shift = bits % LIMB_BITS;
2373
2374             // Perform the shift. This leaves the most significant `bits` bits
2375             // of the result at zero.
2376             for i in 0..dst.len() {
2377                 let mut limb;
2378
2379                 if i + jump >= dst.len() {
2380                     limb = 0;
2381                 } else {
2382                     limb = dst[i + jump];
2383                     if shift > 0 {
2384                         limb >>= shift;
2385                         if i + jump + 1 < dst.len() {
2386                             limb |= dst[i + jump + 1] << (LIMB_BITS - shift);
2387                         }
2388                     }
2389                 }
2390
2391                 dst[i] = limb;
2392             }
2393         }
2394
2395         loss
2396     }
2397
2398     /// Copies the bit vector of width `src_bits` from `src`, starting at bit SRC_LSB,
2399     /// to `dst`, such that the bit SRC_LSB becomes the least significant bit of `dst`.
2400     /// All high bits above `src_bits` in `dst` are zero-filled.
2401     pub(super) fn extract(dst: &mut [Limb], src: &[Limb], src_bits: usize, src_lsb: usize) {
2402         if src_bits == 0 {
2403             return;
2404         }
2405
2406         let dst_limbs = limbs_for_bits(src_bits);
2407         assert!(dst_limbs <= dst.len());
2408
2409         let src = &src[src_lsb / LIMB_BITS..];
2410         dst[..dst_limbs].copy_from_slice(&src[..dst_limbs]);
2411
2412         let shift = src_lsb % LIMB_BITS;
2413         let _: Loss = shift_right(&mut dst[..dst_limbs], &mut 0, shift);
2414
2415         // We now have (dst_limbs * LIMB_BITS - shift) bits from `src`
2416         // in `dst`.  If this is less that src_bits, append the rest, else
2417         // clear the high bits.
2418         let n = dst_limbs * LIMB_BITS - shift;
2419         if n < src_bits {
2420             let mask = (1 << (src_bits - n)) - 1;
2421             dst[dst_limbs - 1] |= (src[dst_limbs] & mask) << (n % LIMB_BITS);
2422         } else if n > src_bits && src_bits % LIMB_BITS > 0 {
2423             dst[dst_limbs - 1] &= (1 << (src_bits % LIMB_BITS)) - 1;
2424         }
2425
2426         // Clear high limbs.
2427         for x in &mut dst[dst_limbs..] {
2428             *x = 0;
2429         }
2430     }
2431
2432     /// We want the most significant PRECISION bits of `src`. There may not
2433     /// be that many; extract what we can.
2434     pub(super) fn from_limbs(dst: &mut [Limb], src: &[Limb], precision: usize) -> (Loss, ExpInt) {
2435         let omsb = omsb(src);
2436
2437         if precision <= omsb {
2438             extract(dst, src, precision, omsb - precision);
2439             (Loss::through_truncation(src, omsb - precision), omsb as ExpInt - 1)
2440         } else {
2441             extract(dst, src, omsb, 0);
2442             (Loss::ExactlyZero, precision as ExpInt - 1)
2443         }
2444     }
2445
2446     /// For every consecutive chunk of `bits` bits from `limbs`,
2447     /// going from most significant to the least significant bits,
2448     /// call `f` to transform those bits and store the result back.
2449     pub(super) fn each_chunk<F: FnMut(Limb) -> Limb>(limbs: &mut [Limb], bits: usize, mut f: F) {
2450         assert_eq!(LIMB_BITS % bits, 0);
2451         for limb in limbs.iter_mut().rev() {
2452             let mut r = 0;
2453             for i in (0..LIMB_BITS / bits).rev() {
2454                 r |= f((*limb >> (i * bits)) & ((1 << bits) - 1)) << (i * bits);
2455             }
2456             *limb = r;
2457         }
2458     }
2459
2460     /// Increment in-place, return the carry flag.
2461     pub(super) fn increment(dst: &mut [Limb]) -> Limb {
2462         for x in dst {
2463             *x = x.wrapping_add(1);
2464             if *x != 0 {
2465                 return 0;
2466             }
2467         }
2468
2469         1
2470     }
2471
2472     /// Decrement in-place, return the borrow flag.
2473     pub(super) fn decrement(dst: &mut [Limb]) -> Limb {
2474         for x in dst {
2475             *x = x.wrapping_sub(1);
2476             if *x != !0 {
2477                 return 0;
2478             }
2479         }
2480
2481         1
2482     }
2483
2484     /// `a += b + c` where `c` is zero or one. Returns the carry flag.
2485     pub(super) fn add(a: &mut [Limb], b: &[Limb], mut c: Limb) -> Limb {
2486         assert!(c <= 1);
2487
2488         for (a, &b) in iter::zip(a, b) {
2489             let (r, overflow) = a.overflowing_add(b);
2490             let (r, overflow2) = r.overflowing_add(c);
2491             *a = r;
2492             c = (overflow | overflow2) as Limb;
2493         }
2494
2495         c
2496     }
2497
2498     /// `a -= b + c` where `c` is zero or one. Returns the borrow flag.
2499     pub(super) fn sub(a: &mut [Limb], b: &[Limb], mut c: Limb) -> Limb {
2500         assert!(c <= 1);
2501
2502         for (a, &b) in iter::zip(a, b) {
2503             let (r, overflow) = a.overflowing_sub(b);
2504             let (r, overflow2) = r.overflowing_sub(c);
2505             *a = r;
2506             c = (overflow | overflow2) as Limb;
2507         }
2508
2509         c
2510     }
2511
2512     /// `a += b` or `a -= b`. Does not preserve `b`.
2513     pub(super) fn add_or_sub(
2514         a_sig: &mut [Limb],
2515         a_exp: &mut ExpInt,
2516         a_sign: &mut bool,
2517         b_sig: &mut [Limb],
2518         b_exp: ExpInt,
2519         b_sign: bool,
2520     ) -> Loss {
2521         // Are we bigger exponent-wise than the RHS?
2522         let bits = *a_exp - b_exp;
2523
2524         // Determine if the operation on the absolute values is effectively
2525         // an addition or subtraction.
2526         // Subtraction is more subtle than one might naively expect.
2527         if *a_sign ^ b_sign {
2528             let (reverse, loss);
2529
2530             #[allow(clippy::comparison_chain)]
2531             if bits == 0 {
2532                 reverse = cmp(a_sig, b_sig) == Ordering::Less;
2533                 loss = Loss::ExactlyZero;
2534             } else if bits > 0 {
2535                 loss = shift_right(b_sig, &mut 0, (bits - 1) as usize);
2536                 shift_left(a_sig, a_exp, 1);
2537                 reverse = false;
2538             } else {
2539                 loss = shift_right(a_sig, a_exp, (-bits - 1) as usize);
2540                 shift_left(b_sig, &mut 0, 1);
2541                 reverse = true;
2542             }
2543
2544             let borrow = (loss != Loss::ExactlyZero) as Limb;
2545             if reverse {
2546                 // The code above is intended to ensure that no borrow is necessary.
2547                 assert_eq!(sub(b_sig, a_sig, borrow), 0);
2548                 a_sig.copy_from_slice(b_sig);
2549                 *a_sign = !*a_sign;
2550             } else {
2551                 // The code above is intended to ensure that no borrow is necessary.
2552                 assert_eq!(sub(a_sig, b_sig, borrow), 0);
2553             }
2554
2555             // Invert the lost fraction - it was on the RHS and subtracted.
2556             match loss {
2557                 Loss::LessThanHalf => Loss::MoreThanHalf,
2558                 Loss::MoreThanHalf => Loss::LessThanHalf,
2559                 _ => loss,
2560             }
2561         } else {
2562             let loss = if bits > 0 {
2563                 shift_right(b_sig, &mut 0, bits as usize)
2564             } else {
2565                 shift_right(a_sig, a_exp, -bits as usize)
2566             };
2567             // We have a guard bit; generating a carry cannot happen.
2568             assert_eq!(add(a_sig, b_sig, 0), 0);
2569             loss
2570         }
2571     }
2572
2573     /// `[low, high] = a * b`.
2574     ///
2575     /// This cannot overflow, because
2576     ///
2577     /// `(n - 1) * (n - 1) + 2 * (n - 1) == (n - 1) * (n + 1)`
2578     ///
2579     /// which is less than n<sup>2</sup>.
2580     pub(super) fn widening_mul(a: Limb, b: Limb) -> [Limb; 2] {
2581         let mut wide = [0, 0];
2582
2583         if a == 0 || b == 0 {
2584             return wide;
2585         }
2586
2587         const HALF_BITS: usize = LIMB_BITS / 2;
2588
2589         let select = |limb, i| (limb >> (i * HALF_BITS)) & ((1 << HALF_BITS) - 1);
2590         for i in 0..2 {
2591             for j in 0..2 {
2592                 let mut x = [select(a, i) * select(b, j), 0];
2593                 shift_left(&mut x, &mut 0, (i + j) * HALF_BITS);
2594                 assert_eq!(add(&mut wide, &x, 0), 0);
2595             }
2596         }
2597
2598         wide
2599     }
2600
2601     /// `dst = a * b` (for normal `a` and `b`). Returns the lost fraction.
2602     pub(super) fn mul<'a>(
2603         dst: &mut [Limb],
2604         exp: &mut ExpInt,
2605         mut a: &'a [Limb],
2606         mut b: &'a [Limb],
2607         precision: usize,
2608     ) -> Loss {
2609         // Put the narrower number on the `a` for less loops below.
2610         if a.len() > b.len() {
2611             mem::swap(&mut a, &mut b);
2612         }
2613
2614         for x in &mut dst[..b.len()] {
2615             *x = 0;
2616         }
2617
2618         for i in 0..a.len() {
2619             let mut carry = 0;
2620             for j in 0..b.len() {
2621                 let [low, mut high] = widening_mul(a[i], b[j]);
2622
2623                 // Now add carry.
2624                 let (low, overflow) = low.overflowing_add(carry);
2625                 high += overflow as Limb;
2626
2627                 // And now `dst[i + j]`, and store the new low part there.
2628                 let (low, overflow) = low.overflowing_add(dst[i + j]);
2629                 high += overflow as Limb;
2630
2631                 dst[i + j] = low;
2632                 carry = high;
2633             }
2634             dst[i + b.len()] = carry;
2635         }
2636
2637         // Assume the operands involved in the multiplication are single-precision
2638         // FP, and the two multiplicants are:
2639         //     a = a23 . a22 ... a0 * 2^e1
2640         //     b = b23 . b22 ... b0 * 2^e2
2641         // the result of multiplication is:
2642         //     dst = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
2643         // Note that there are three significant bits at the left-hand side of the
2644         // radix point: two for the multiplication, and an overflow bit for the
2645         // addition (that will always be zero at this point). Move the radix point
2646         // toward left by two bits, and adjust exponent accordingly.
2647         *exp += 2;
2648
2649         // Convert the result having "2 * precision" significant-bits back to the one
2650         // having "precision" significant-bits. First, move the radix point from
2651         // poision "2*precision - 1" to "precision - 1". The exponent need to be
2652         // adjusted by "2*precision - 1" - "precision - 1" = "precision".
2653         *exp -= precision as ExpInt + 1;
2654
2655         // In case MSB resides at the left-hand side of radix point, shift the
2656         // mantissa right by some amount to make sure the MSB reside right before
2657         // the radix point (i.e., "MSB . rest-significant-bits").
2658         //
2659         // Note that the result is not normalized when "omsb < precision". So, the
2660         // caller needs to call IeeeFloat::normalize() if normalized value is
2661         // expected.
2662         let omsb = omsb(dst);
2663         if omsb <= precision { Loss::ExactlyZero } else { shift_right(dst, exp, omsb - precision) }
2664     }
2665
2666     /// `quotient = dividend / divisor`. Returns the lost fraction.
2667     /// Does not preserve `dividend` or `divisor`.
2668     pub(super) fn div(
2669         quotient: &mut [Limb],
2670         exp: &mut ExpInt,
2671         dividend: &mut [Limb],
2672         divisor: &mut [Limb],
2673         precision: usize,
2674     ) -> Loss {
2675         // Normalize the divisor.
2676         let bits = precision - omsb(divisor);
2677         shift_left(divisor, &mut 0, bits);
2678         *exp += bits as ExpInt;
2679
2680         // Normalize the dividend.
2681         let bits = precision - omsb(dividend);
2682         shift_left(dividend, exp, bits);
2683
2684         // Division by 1.
2685         let olsb_divisor = olsb(divisor);
2686         if olsb_divisor == precision {
2687             quotient.copy_from_slice(dividend);
2688             return Loss::ExactlyZero;
2689         }
2690
2691         // Ensure the dividend >= divisor initially for the loop below.
2692         // Incidentally, this means that the division loop below is
2693         // guaranteed to set the integer bit to one.
2694         if cmp(dividend, divisor) == Ordering::Less {
2695             shift_left(dividend, exp, 1);
2696             assert_ne!(cmp(dividend, divisor), Ordering::Less)
2697         }
2698
2699         // Helper for figuring out the lost fraction.
2700         let lost_fraction = |dividend: &[Limb], divisor: &[Limb]| match cmp(dividend, divisor) {
2701             Ordering::Greater => Loss::MoreThanHalf,
2702             Ordering::Equal => Loss::ExactlyHalf,
2703             Ordering::Less => {
2704                 if is_all_zeros(dividend) {
2705                     Loss::ExactlyZero
2706                 } else {
2707                     Loss::LessThanHalf
2708                 }
2709             }
2710         };
2711
2712         // Try to perform a (much faster) short division for small divisors.
2713         let divisor_bits = precision - (olsb_divisor - 1);
2714         macro_rules! try_short_div {
2715             ($W:ty, $H:ty, $half:expr) => {
2716                 if divisor_bits * 2 <= $half {
2717                     // Extract the small divisor.
2718                     let _: Loss = shift_right(divisor, &mut 0, olsb_divisor - 1);
2719                     let divisor = divisor[0] as $H as $W;
2720
2721                     // Shift the dividend to produce a quotient with the unit bit set.
2722                     let top_limb = *dividend.last().unwrap();
2723                     let mut rem = (top_limb >> (LIMB_BITS - (divisor_bits - 1))) as $H;
2724                     shift_left(dividend, &mut 0, divisor_bits - 1);
2725
2726                     // Apply short division in place on $H (of $half bits) chunks.
2727                     each_chunk(dividend, $half, |chunk| {
2728                         let chunk = chunk as $H;
2729                         let combined = ((rem as $W) << $half) | (chunk as $W);
2730                         rem = (combined % divisor) as $H;
2731                         (combined / divisor) as $H as Limb
2732                     });
2733                     quotient.copy_from_slice(dividend);
2734
2735                     return lost_fraction(&[(rem as Limb) << 1], &[divisor as Limb]);
2736                 }
2737             };
2738         }
2739
2740         try_short_div!(u32, u16, 16);
2741         try_short_div!(u64, u32, 32);
2742         try_short_div!(u128, u64, 64);
2743
2744         // Zero the quotient before setting bits in it.
2745         for x in &mut quotient[..limbs_for_bits(precision)] {
2746             *x = 0;
2747         }
2748
2749         // Long division.
2750         for bit in (0..precision).rev() {
2751             if cmp(dividend, divisor) != Ordering::Less {
2752                 sub(dividend, divisor, 0);
2753                 set_bit(quotient, bit);
2754             }
2755             shift_left(dividend, &mut 0, 1);
2756         }
2757
2758         lost_fraction(dividend, divisor)
2759     }
2760 }