src/librustc_apfloat/ieee.rs

   1 use crate::{Category, ExpInt, IEK_INF, IEK_NAN, IEK_ZERO};
   2 use crate::{Float, FloatConvert, ParseError, Round, Status, StatusAnd};
   3
   4 use smallvec::{SmallVec, smallvec};
   5 use std::cmp::{self, Ordering};
   6 use std::convert::TryFrom;
   7 use std::fmt::{self, Write};
   8 use std::marker::PhantomData;
   9 use std::mem;
  10 use std::ops::Neg;
  11
  12 #[must_use]
  13 pub struct IeeeFloat<S> {
  14     /// Absolute significand value (including the integer bit).
  15     sig: [Limb; 1],
  16
  17     /// The signed unbiased exponent of the value.
  18     exp: ExpInt,
  19
  20     /// What kind of floating point number this is.
  21     category: Category,
  22
  23     /// Sign bit of the number.
  24     sign: bool,
  25
  26     marker: PhantomData<S>,
  27 }
  28
  29 /// Fundamental unit of big integer arithmetic, but also
  30 /// large to store the largest significands by itself.
  31 type Limb = u128;
  32 const LIMB_BITS: usize = 128;
  33 fn limbs_for_bits(bits: usize) -> usize {
  34     (bits + LIMB_BITS - 1) / LIMB_BITS
  35 }
  36
  37 /// Enum that represents what fraction of the LSB truncated bits of an fp number
  38 /// represent.
  39 ///
  40 /// This essentially combines the roles of guard and sticky bits.
  41 #[must_use]
  42 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
  43 enum Loss {
  44     // Example of truncated bits:
  45     ExactlyZero, // 000000
  46     LessThanHalf, // 0xxxxx  x's not all zero
  47     ExactlyHalf, // 100000
  48     MoreThanHalf, // 1xxxxx  x's not all zero
  49 }
  50
  51 /// Represents floating point arithmetic semantics.
  52 pub trait Semantics: Sized {
  53     /// Total number of bits in the in-memory format.
  54     const BITS: usize;
  55
  56     /// Number of bits in the significand. This includes the integer bit.
  57     const PRECISION: usize;
  58
  59     /// The largest E such that 2<sup>E</sup> is representable; this matches the
  60     /// definition of IEEE 754.
  61     const MAX_EXP: ExpInt;
  62
  63     /// The smallest E such that 2<sup>E</sup> is a normalized number; this
  64     /// matches the definition of IEEE 754.
  65     const MIN_EXP: ExpInt = -Self::MAX_EXP + 1;
  66
  67     /// The significand bit that marks NaN as quiet.
  68     const QNAN_BIT: usize = Self::PRECISION - 2;
  69
  70     /// The significand bitpattern to mark a NaN as quiet.
  71     /// NOTE: for X87DoubleExtended we need to set two bits instead of 2.
  72     const QNAN_SIGNIFICAND: Limb = 1 << Self::QNAN_BIT;
  73
  74     fn from_bits(bits: u128) -> IeeeFloat<Self> {
  75         assert!(Self::BITS > Self::PRECISION);
  76
  77         let sign = bits & (1 << (Self::BITS - 1));
  78         let exponent = (bits & !sign) >> (Self::PRECISION - 1);
  79         let mut r = IeeeFloat {
  80             sig: [bits & ((1 << (Self::PRECISION - 1)) - 1)],
  81             // Convert the exponent from its bias representation to a signed integer.
  82             exp: (exponent as ExpInt) - Self::MAX_EXP,
  83             category: Category::Zero,
  84             sign: sign != 0,
  85             marker: PhantomData,
  86         };
  87
  88         if r.exp == Self::MIN_EXP - 1 && r.sig == [0] {
  89             // Exponent, significand meaningless.
  90             r.category = Category::Zero;
  91         } else if r.exp == Self::MAX_EXP + 1 && r.sig == [0] {
  92             // Exponent, significand meaningless.
  93             r.category = Category::Infinity;
  94         } else if r.exp == Self::MAX_EXP + 1 && r.sig != [0] {
  95             // Sign, exponent, significand meaningless.
  96             r.category = Category::NaN;
  97         } else {
  98             r.category = Category::Normal;
  99             if r.exp == Self::MIN_EXP - 1 {
 100                 // Denormal.
 101                 r.exp = Self::MIN_EXP;
 102             } else {
 103                 // Set integer bit.
 104                 sig::set_bit(&mut r.sig, Self::PRECISION - 1);
 105             }
 106         }
 107
 108         r
 109     }
 110
 111     fn to_bits(x: IeeeFloat<Self>) -> u128 {
 112         assert!(Self::BITS > Self::PRECISION);
 113
 114         // Split integer bit from significand.
 115         let integer_bit = sig::get_bit(&x.sig, Self::PRECISION - 1);
 116         let mut significand = x.sig[0] & ((1 << (Self::PRECISION - 1)) - 1);
 117         let exponent = match x.category {
 118             Category::Normal => {
 119                 if x.exp == Self::MIN_EXP && !integer_bit {
 120                     // Denormal.
 121                     Self::MIN_EXP - 1
 122                 } else {
 123                     x.exp
 124                 }
 125             }
 126             Category::Zero => {
 127                 // FIXME(eddyb) Maybe we should guarantee an invariant instead?
 128                 significand = 0;
 129                 Self::MIN_EXP - 1
 130             }
 131             Category::Infinity => {
 132                 // FIXME(eddyb) Maybe we should guarantee an invariant instead?
 133                 significand = 0;
 134                 Self::MAX_EXP + 1
 135             }
 136             Category::NaN => Self::MAX_EXP + 1,
 137         };
 138
 139         // Convert the exponent from a signed integer to its bias representation.
 140         let exponent = (exponent + Self::MAX_EXP) as u128;
 141
 142         ((x.sign as u128) << (Self::BITS - 1)) | (exponent << (Self::PRECISION - 1)) | significand
 143     }
 144 }
 145
 146 impl<S> Copy for IeeeFloat<S> {}
 147 impl<S> Clone for IeeeFloat<S> {
 148     fn clone(&self) -> Self {
 149         *self
 150     }
 151 }
 152
 153 macro_rules! ieee_semantics {
 154     ($($name:ident = $sem:ident($bits:tt : $exp_bits:tt)),*) => {
 155         $(pub struct $sem;)*
 156         $(pub type $name = IeeeFloat<$sem>;)*
 157         $(impl Semantics for $sem {
 158             const BITS: usize = $bits;
 159             const PRECISION: usize = ($bits - 1 - $exp_bits) + 1;
 160             const MAX_EXP: ExpInt = (1 << ($exp_bits - 1)) - 1;
 161         })*
 162     }
 163 }
 164
 165 ieee_semantics! {
 166     Half = HalfS(16:5),
 167     Single = SingleS(32:8),
 168     Double = DoubleS(64:11),
 169     Quad = QuadS(128:15)
 170 }
 171
 172 pub struct X87DoubleExtendedS;
 173 pub type X87DoubleExtended = IeeeFloat<X87DoubleExtendedS>;
 174 impl Semantics for X87DoubleExtendedS {
 175     const BITS: usize = 80;
 176     const PRECISION: usize = 64;
 177     const MAX_EXP: ExpInt = (1 << (15 - 1)) - 1;
 178
 179     /// For x87 extended precision, we want to make a NaN, not a
 180     /// pseudo-NaN. Maybe we should expose the ability to make
 181     /// pseudo-NaNs?
 182     const QNAN_SIGNIFICAND: Limb = 0b11 << Self::QNAN_BIT;
 183
 184     /// Integer bit is explicit in this format. Intel hardware (387 and later)
 185     /// does not support these bit patterns:
 186     ///  exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
 187     ///  exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
 188     ///  exponent = 0, integer bit 1 ("pseudodenormal")
 189     ///  exponent != 0 nor all 1's, integer bit 0 ("unnormal")
 190     /// At the moment, the first two are treated as NaNs, the second two as Normal.
 191     fn from_bits(bits: u128) -> IeeeFloat<Self> {
 192         let sign = bits & (1 << (Self::BITS - 1));
 193         let exponent = (bits & !sign) >> Self::PRECISION;
 194         let mut r = IeeeFloat {
 195             sig: [bits & ((1 << (Self::PRECISION - 1)) - 1)],
 196             // Convert the exponent from its bias representation to a signed integer.
 197             exp: (exponent as ExpInt) - Self::MAX_EXP,
 198             category: Category::Zero,
 199             sign: sign != 0,
 200             marker: PhantomData,
 201         };
 202
 203         if r.exp == Self::MIN_EXP - 1 && r.sig == [0] {
 204             // Exponent, significand meaningless.
 205             r.category = Category::Zero;
 206         } else if r.exp == Self::MAX_EXP + 1 && r.sig == [1 << (Self::PRECISION - 1)] {
 207             // Exponent, significand meaningless.
 208             r.category = Category::Infinity;
 209         } else if r.exp == Self::MAX_EXP + 1 && r.sig != [1 << (Self::PRECISION - 1)] {
 210             // Sign, exponent, significand meaningless.
 211             r.category = Category::NaN;
 212         } else {
 213             r.category = Category::Normal;
 214             if r.exp == Self::MIN_EXP - 1 {
 215                 // Denormal.
 216                 r.exp = Self::MIN_EXP;
 217             }
 218         }
 219
 220         r
 221     }
 222
 223     fn to_bits(x: IeeeFloat<Self>) -> u128 {
 224         // Get integer bit from significand.
 225         let integer_bit = sig::get_bit(&x.sig, Self::PRECISION - 1);
 226         let mut significand = x.sig[0] & ((1 << Self::PRECISION) - 1);
 227         let exponent = match x.category {
 228             Category::Normal => {
 229                 if x.exp == Self::MIN_EXP && !integer_bit {
 230                     // Denormal.
 231                     Self::MIN_EXP - 1
 232                 } else {
 233                     x.exp
 234                 }
 235             }
 236             Category::Zero => {
 237                 // FIXME(eddyb) Maybe we should guarantee an invariant instead?
 238                 significand = 0;
 239                 Self::MIN_EXP - 1
 240             }
 241             Category::Infinity => {
 242                 // FIXME(eddyb) Maybe we should guarantee an invariant instead?
 243                 significand = 1 << (Self::PRECISION - 1);
 244                 Self::MAX_EXP + 1
 245             }
 246             Category::NaN => Self::MAX_EXP + 1,
 247         };
 248
 249         // Convert the exponent from a signed integer to its bias representation.
 250         let exponent = (exponent + Self::MAX_EXP) as u128;
 251
 252         ((x.sign as u128) << (Self::BITS - 1)) | (exponent << Self::PRECISION) | significand
 253     }
 254 }
 255
 256 float_common_impls!(IeeeFloat<S>);
 257
 258 impl<S: Semantics> PartialEq for IeeeFloat<S> {
 259     fn eq(&self, rhs: &Self) -> bool {
 260         self.partial_cmp(rhs) == Some(Ordering::Equal)
 261     }
 262 }
 263
 264 impl<S: Semantics> PartialOrd for IeeeFloat<S> {
 265     fn partial_cmp(&self, rhs: &Self) -> Option<Ordering> {
 266         match (self.category, rhs.category) {
 267             (Category::NaN, _) |
 268             (_, Category::NaN) => None,
 269
 270             (Category::Infinity, Category::Infinity) => Some((!self.sign).cmp(&(!rhs.sign))),
 271
 272             (Category::Zero, Category::Zero) => Some(Ordering::Equal),
 273
 274             (Category::Infinity, _) |
 275             (Category::Normal, Category::Zero) => Some((!self.sign).cmp(&self.sign)),
 276
 277             (_, Category::Infinity) |
 278             (Category::Zero, Category::Normal) => Some(rhs.sign.cmp(&(!rhs.sign))),
 279
 280             (Category::Normal, Category::Normal) => {
 281                 // Two normal numbers. Do they have the same sign?
 282                 Some((!self.sign).cmp(&(!rhs.sign)).then_with(|| {
 283                     // Compare absolute values; invert result if negative.
 284                     let result = self.cmp_abs_normal(*rhs);
 285
 286                     if self.sign { result.reverse() } else { result }
 287                 }))
 288             }
 289         }
 290     }
 291 }
 292
 293 impl<S> Neg for IeeeFloat<S> {
 294     type Output = Self;
 295     fn neg(mut self) -> Self {
 296         self.sign = !self.sign;
 297         self
 298     }
 299 }
 300
 301 /// Prints this value as a decimal string.
 302 ///
 303 /// \param precision The maximum number of digits of
 304 ///   precision to output. If there are fewer digits available,
 305 ///   zero padding will not be used unless the value is
 306 ///   integral and small enough to be expressed in
 307 ///   precision digits. 0 means to use the natural
 308 ///   precision of the number.
 309 /// \param width The maximum number of zeros to
 310 ///   consider inserting before falling back to scientific
 311 ///   notation. 0 means to always use scientific notation.
 312 ///
 313 /// \param alternate Indicate whether to remove the trailing zero in
 314 ///   fraction part or not. Also setting this parameter to true forces
 315 ///   producing of output more similar to default printf behavior.
 316 ///   Specifically the lower e is used as exponent delimiter and exponent
 317 ///   always contains no less than two digits.
 318 ///
 319 /// Number       precision    width      Result
 320 /// ------       ---------    -----      ------
 321 /// 1.01E+4              5        2       10100
 322 /// 1.01E+4              4        2       1.01E+4
 323 /// 1.01E+4              5        1       1.01E+4
 324 /// 1.01E-2              5        2       0.0101
 325 /// 1.01E-2              4        2       0.0101
 326 /// 1.01E-2              4        1       1.01E-2
 327 impl<S: Semantics> fmt::Display for IeeeFloat<S> {
 328     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 329         let width = f.width().unwrap_or(3);
 330         let alternate = f.alternate();
 331
 332         match self.category {
 333             Category::Infinity => {
 334                 if self.sign {
 335                     return f.write_str("-Inf");
 336                 } else {
 337                     return f.write_str("+Inf");
 338                 }
 339             }
 340
 341             Category::NaN => return f.write_str("NaN"),
 342
 343             Category::Zero => {
 344                 if self.sign {
 345                     f.write_char('-')?;
 346                 }
 347
 348                 if width == 0 {
 349                     if alternate {
 350                         f.write_str("0.0")?;
 351                         if let Some(n) = f.precision() {
 352                             for _ in 1..n {
 353                                 f.write_char('0')?;
 354                             }
 355                         }
 356                         f.write_str("e+00")?;
 357                     } else {
 358                         f.write_str("0.0E+0")?;
 359                     }
 360                 } else {
 361                     f.write_char('0')?;
 362                 }
 363                 return Ok(());
 364             }
 365
 366             Category::Normal => {}
 367         }
 368
 369         if self.sign {
 370             f.write_char('-')?;
 371         }
 372
 373         // We use enough digits so the number can be round-tripped back to an
 374         // APFloat. The formula comes from "How to Print Floating-Point Numbers
 375         // Accurately" by Steele and White.
 376         // FIXME: Using a formula based purely on the precision is conservative;
 377         // we can print fewer digits depending on the actual value being printed.
 378
 379         // precision = 2 + floor(S::PRECISION / lg_2(10))
 380         let precision = f.precision().unwrap_or(2 + S::PRECISION * 59 / 196);
 381
 382         // Decompose the number into an APInt and an exponent.
 383         let mut exp = self.exp - (S::PRECISION as ExpInt - 1);
 384         let mut sig = vec![self.sig[0]];
 385
 386         // Ignore trailing binary zeros.
 387         let trailing_zeros = sig[0].trailing_zeros();
 388         let _: Loss = sig::shift_right(&mut sig, &mut exp, trailing_zeros as usize);
 389
 390         // Change the exponent from 2^e to 10^e.
 391         if exp == 0 {
 392             // Nothing to do.
 393         } else if exp > 0 {
 394             // Just shift left.
 395             let shift = exp as usize;
 396             sig.resize(limbs_for_bits(S::PRECISION + shift), 0);
 397             sig::shift_left(&mut sig, &mut exp, shift);
 398         } else {
 399             // exp < 0
 400             let mut texp = -exp as usize;
 401
 402             // We transform this using the identity:
 403             //   (N)(2^-e) == (N)(5^e)(10^-e)
 404
 405             // Multiply significand by 5^e.
 406             //   N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
 407             let mut sig_scratch = vec![];
 408             let mut p5 = vec![];
 409             let mut p5_scratch = vec![];
 410             while texp != 0 {
 411                 if p5.is_empty() {
 412                     p5.push(5);
 413                 } else {
 414                     p5_scratch.resize(p5.len() * 2, 0);
 415                     let _: Loss =
 416                         sig::mul(&mut p5_scratch, &mut 0, &p5, &p5, p5.len() * 2 * LIMB_BITS);
 417                     while p5_scratch.last() == Some(&0) {
 418                         p5_scratch.pop();
 419                     }
 420                     mem::swap(&mut p5, &mut p5_scratch);
 421                 }
 422                 if texp & 1 != 0 {
 423                     sig_scratch.resize(sig.len() + p5.len(), 0);
 424                     let _: Loss = sig::mul(
 425                         &mut sig_scratch,
 426                         &mut 0,
 427                         &sig,
 428                         &p5,
 429                         (sig.len() + p5.len()) * LIMB_BITS,
 430                     );
 431                     while sig_scratch.last() == Some(&0) {
 432                         sig_scratch.pop();
 433                     }
 434                     mem::swap(&mut sig, &mut sig_scratch);
 435                 }
 436                 texp >>= 1;
 437             }
 438         }
 439
 440         // Fill the buffer.
 441         let mut buffer = vec![];
 442
 443         // Ignore digits from the significand until it is no more
 444         // precise than is required for the desired precision.
 445         // 196/59 is a very slight overestimate of lg_2(10).
 446         let required = (precision * 196 + 58) / 59;
 447         let mut discard_digits = sig::omsb(&sig).saturating_sub(required) * 59 / 196;
 448         let mut in_trail = true;
 449         while !sig.is_empty() {
 450             // Perform short division by 10 to extract the rightmost digit.
 451             // rem <- sig % 10
 452             // sig <- sig / 10
 453             let mut rem = 0;
 454
 455             // Use 64-bit division and remainder, with 32-bit chunks from sig.
 456             sig::each_chunk(&mut sig, 32, |chunk| {
 457                 let chunk = chunk as u32;
 458                 let combined = ((rem as u64) << 32) | (chunk as u64);
 459                 rem = (combined % 10) as u8;
 460                 (combined / 10) as u32 as Limb
 461             });
 462
 463             // Reduce the sigificand to avoid wasting time dividing 0's.
 464             while sig.last() == Some(&0) {
 465                 sig.pop();
 466             }
 467
 468             let digit = rem;
 469
 470             // Ignore digits we don't need.
 471             if discard_digits > 0 {
 472                 discard_digits -= 1;
 473                 exp += 1;
 474                 continue;
 475             }
 476
 477             // Drop trailing zeros.
 478             if in_trail && digit == 0 {
 479                 exp += 1;
 480             } else {
 481                 in_trail = false;
 482                 buffer.push(b'0' + digit);
 483             }
 484         }
 485
 486         assert!(!buffer.is_empty(), "no characters in buffer!");
 487
 488         // Drop down to precision.
 489         // FIXME: don't do more precise calculations above than are required.
 490         if buffer.len() > precision {
 491             // The most significant figures are the last ones in the buffer.
 492             let mut first_sig = buffer.len() - precision;
 493
 494             // Round.
 495             // FIXME: this probably shouldn't use 'round half up'.
 496
 497             // Rounding down is just a truncation, except we also want to drop
 498             // trailing zeros from the new result.
 499             if buffer[first_sig - 1] < b'5' {
 500                 while first_sig < buffer.len() && buffer[first_sig] == b'0' {
 501                     first_sig += 1;
 502                 }
 503             } else {
 504                 // Rounding up requires a decimal add-with-carry. If we continue
 505                 // the carry, the newly-introduced zeros will just be truncated.
 506                 for x in &mut buffer[first_sig..] {
 507                     if *x == b'9' {
 508                         first_sig += 1;
 509                     } else {
 510                         *x += 1;
 511                         break;
 512                     }
 513                 }
 514             }
 515
 516             exp += first_sig as ExpInt;
 517             buffer.drain(..first_sig);
 518
 519             // If we carried through, we have exactly one digit of precision.
 520             if buffer.is_empty() {
 521                 buffer.push(b'1');
 522             }
 523         }
 524
 525         let digits = buffer.len();
 526
 527         // Check whether we should use scientific notation.
 528         let scientific = if width == 0 {
 529             true
 530         } else if exp >= 0 {
 531             // 765e3 --> 765000
 532             //              ^^^
 533             // But we shouldn't make the number look more precise than it is.
 534             exp as usize > width || digits + exp as usize > precision
 535         } else {
 536             // Power of the most significant digit.
 537             let msd = exp + (digits - 1) as ExpInt;
 538             if msd >= 0 {
 539                 // 765e-2 == 7.65
 540                 false
 541             } else {
 542                 // 765e-5 == 0.00765
 543                 //           ^ ^^
 544                 -msd as usize > width
 545             }
 546         };
 547
 548         // Scientific formatting is pretty straightforward.
 549         if scientific {
 550             exp += digits as ExpInt - 1;
 551
 552             f.write_char(buffer[digits - 1] as char)?;
 553             f.write_char('.')?;
 554             let truncate_zero = !alternate;
 555             if digits == 1 && truncate_zero {
 556                 f.write_char('0')?;
 557             } else {
 558                 for &d in buffer[..digits - 1].iter().rev() {
 559                     f.write_char(d as char)?;
 560                 }
 561             }
 562             // Fill with zeros up to precision.
 563             if !truncate_zero && precision > digits - 1 {
 564                 for _ in 0..=precision - digits {
 565                     f.write_char('0')?;
 566                 }
 567             }
 568             // For alternate we use lower 'e'.
 569             f.write_char(if alternate { 'e' } else { 'E' })?;
 570
 571             // Exponent always at least two digits if we do not truncate zeros.
 572             if truncate_zero {
 573                 write!(f, "{:+}", exp)?;
 574             } else {
 575                 write!(f, "{:+03}", exp)?;
 576             }
 577
 578             return Ok(());
 579         }
 580
 581         // Non-scientific, positive exponents.
 582         if exp >= 0 {
 583             for &d in buffer.iter().rev() {
 584                 f.write_char(d as char)?;
 585             }
 586             for _ in 0..exp {
 587                 f.write_char('0')?;
 588             }
 589             return Ok(());
 590         }
 591
 592         // Non-scientific, negative exponents.
 593         let unit_place = -exp as usize;
 594         if unit_place < digits {
 595             for &d in buffer[unit_place..].iter().rev() {
 596                 f.write_char(d as char)?;
 597             }
 598             f.write_char('.')?;
 599             for &d in buffer[..unit_place].iter().rev() {
 600                 f.write_char(d as char)?;
 601             }
 602         } else {
 603             f.write_str("0.")?;
 604             for _ in digits..unit_place {
 605                 f.write_char('0')?;
 606             }
 607             for &d in buffer.iter().rev() {
 608                 f.write_char(d as char)?;
 609             }
 610         }
 611
 612         Ok(())
 613     }
 614 }
 615
 616 impl<S: Semantics> fmt::Debug for IeeeFloat<S> {
 617     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 618         write!(f, "{}({:?} | {}{:?} * 2^{})",
 619                self, self.category,
 620                if self.sign { "-" } else { "+" },
 621                self.sig,
 622                self.exp)
 623     }
 624 }
 625
 626 impl<S: Semantics> Float for IeeeFloat<S> {
 627     const BITS: usize = S::BITS;
 628     const PRECISION: usize = S::PRECISION;
 629     const MAX_EXP: ExpInt = S::MAX_EXP;
 630     const MIN_EXP: ExpInt = S::MIN_EXP;
 631
 632     const ZERO: Self = IeeeFloat {
 633         sig: [0],
 634         exp: S::MIN_EXP - 1,
 635         category: Category::Zero,
 636         sign: false,
 637         marker: PhantomData,
 638     };
 639
 640     const INFINITY: Self = IeeeFloat {
 641         sig: [0],
 642         exp: S::MAX_EXP + 1,
 643         category: Category::Infinity,
 644         sign: false,
 645         marker: PhantomData,
 646     };
 647
 648     // FIXME(eddyb) remove when qnan becomes const fn.
 649     const NAN: Self = IeeeFloat {
 650         sig: [S::QNAN_SIGNIFICAND],
 651         exp: S::MAX_EXP + 1,
 652         category: Category::NaN,
 653         sign: false,
 654         marker: PhantomData,
 655     };
 656
 657     fn qnan(payload: Option<u128>) -> Self {
 658         IeeeFloat {
 659             sig: [
 660                 S::QNAN_SIGNIFICAND |
 661                     payload.map_or(0, |payload| {
 662                         // Zero out the excess bits of the significand.
 663                         payload & ((1 << S::QNAN_BIT) - 1)
 664                     }),
 665             ],
 666             exp: S::MAX_EXP + 1,
 667             category: Category::NaN,
 668             sign: false,
 669             marker: PhantomData,
 670         }
 671     }
 672
 673     fn snan(payload: Option<u128>) -> Self {
 674         let mut snan = Self::qnan(payload);
 675
 676         // We always have to clear the QNaN bit to make it an SNaN.
 677         sig::clear_bit(&mut snan.sig, S::QNAN_BIT);
 678
 679         // If there are no bits set in the payload, we have to set
 680         // *something* to make it a NaN instead of an infinity;
 681         // conventionally, this is the next bit down from the QNaN bit.
 682         if snan.sig[0] & !S::QNAN_SIGNIFICAND == 0 {
 683             sig::set_bit(&mut snan.sig, S::QNAN_BIT - 1);
 684         }
 685
 686         snan
 687     }
 688
 689     fn largest() -> Self {
 690         // We want (in interchange format):
 691         //   exponent = 1..10
 692         //   significand = 1..1
 693         IeeeFloat {
 694             sig: [(1 << S::PRECISION) - 1],
 695             exp: S::MAX_EXP,
 696             category: Category::Normal,
 697             sign: false,
 698             marker: PhantomData,
 699         }
 700     }
 701
 702     // We want (in interchange format):
 703     //   exponent = 0..0
 704     //   significand = 0..01
 705     const SMALLEST: Self = IeeeFloat {
 706         sig: [1],
 707         exp: S::MIN_EXP,
 708         category: Category::Normal,
 709         sign: false,
 710         marker: PhantomData,
 711     };
 712
 713     fn smallest_normalized() -> Self {
 714         // We want (in interchange format):
 715         //   exponent = 0..0
 716         //   significand = 10..0
 717         IeeeFloat {
 718             sig: [1 << (S::PRECISION - 1)],
 719             exp: S::MIN_EXP,
 720             category: Category::Normal,
 721             sign: false,
 722             marker: PhantomData,
 723         }
 724     }
 725
 726     fn add_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> {
 727         let status = match (self.category, rhs.category) {
 728             (Category::Infinity, Category::Infinity) => {
 729                 // Differently signed infinities can only be validly
 730                 // subtracted.
 731                 if self.sign != rhs.sign {
 732                     self = Self::NAN;
 733                     Status::INVALID_OP
 734                 } else {
 735                     Status::OK
 736                 }
 737             }
 738
 739             // Sign may depend on rounding mode; handled below.
 740             (_, Category::Zero) |
 741             (Category::NaN, _) |
 742             (Category::Infinity, Category::Normal) => Status::OK,
 743
 744             (Category::Zero, _) |
 745             (_, Category::NaN) |
 746             (_, Category::Infinity) => {
 747                 self = rhs;
 748                 Status::OK
 749             }
 750
 751             // This return code means it was not a simple case.
 752             (Category::Normal, Category::Normal) => {
 753                 let loss = sig::add_or_sub(
 754                     &mut self.sig,
 755                     &mut self.exp,
 756                     &mut self.sign,
 757                     &mut [rhs.sig[0]],
 758                     rhs.exp,
 759                     rhs.sign,
 760                 );
 761                 let status;
 762                 self = unpack!(status=, self.normalize(round, loss));
 763
 764                 // Can only be zero if we lost no fraction.
 765                 assert!(self.category != Category::Zero || loss == Loss::ExactlyZero);
 766
 767                 status
 768             }
 769         };
 770
 771         // If two numbers add (exactly) to zero, IEEE 754 decrees it is a
 772         // positive zero unless rounding to minus infinity, except that
 773         // adding two like-signed zeroes gives that zero.
 774         if self.category == Category::Zero &&
 775             (rhs.category != Category::Zero || self.sign != rhs.sign)
 776         {
 777             self.sign = round == Round::TowardNegative;
 778         }
 779
 780         status.and(self)
 781     }
 782
 783     fn mul_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> {
 784         self.sign ^= rhs.sign;
 785
 786         match (self.category, rhs.category) {
 787             (Category::NaN, _) => {
 788                 self.sign = false;
 789                 Status::OK.and(self)
 790             }
 791
 792             (_, Category::NaN) => {
 793                 self.sign = false;
 794                 self.category = Category::NaN;
 795                 self.sig = rhs.sig;
 796                 Status::OK.and(self)
 797             }
 798
 799             (Category::Zero, Category::Infinity) |
 800             (Category::Infinity, Category::Zero) => Status::INVALID_OP.and(Self::NAN),
 801
 802             (_, Category::Infinity) |
 803             (Category::Infinity, _) => {
 804                 self.category = Category::Infinity;
 805                 Status::OK.and(self)
 806             }
 807
 808             (Category::Zero, _) |
 809             (_, Category::Zero) => {
 810                 self.category = Category::Zero;
 811                 Status::OK.and(self)
 812             }
 813
 814             (Category::Normal, Category::Normal) => {
 815                 self.exp += rhs.exp;
 816                 let mut wide_sig = [0; 2];
 817                 let loss = sig::mul(
 818                     &mut wide_sig,
 819                     &mut self.exp,
 820                     &self.sig,
 821                     &rhs.sig,
 822                     S::PRECISION,
 823                 );
 824                 self.sig = [wide_sig[0]];
 825                 let mut status;
 826                 self = unpack!(status=, self.normalize(round, loss));
 827                 if loss != Loss::ExactlyZero {
 828                     status |= Status::INEXACT;
 829                 }
 830                 status.and(self)
 831             }
 832         }
 833     }
 834
 835     fn mul_add_r(mut self, multiplicand: Self, addend: Self, round: Round) -> StatusAnd<Self> {
 836         // If and only if all arguments are normal do we need to do an
 837         // extended-precision calculation.
 838         if !self.is_finite_non_zero() || !multiplicand.is_finite_non_zero() || !addend.is_finite() {
 839             let mut status;
 840             self = unpack!(status=, self.mul_r(multiplicand, round));
 841
 842             // FS can only be Status::OK or Status::INVALID_OP. There is no more work
 843             // to do in the latter case. The IEEE-754R standard says it is
 844             // implementation-defined in this case whether, if ADDEND is a
 845             // quiet NaN, we raise invalid op; this implementation does so.
 846             //
 847             // If we need to do the addition we can do so with normal
 848             // precision.
 849             if status == Status::OK {
 850                 self = unpack!(status=, self.add_r(addend, round));
 851             }
 852             return status.and(self);
 853         }
 854
 855         // Post-multiplication sign, before addition.
 856         self.sign ^= multiplicand.sign;
 857
 858         // Allocate space for twice as many bits as the original significand, plus one
 859         // extra bit for the addition to overflow into.
 860         assert!(limbs_for_bits(S::PRECISION * 2 + 1) <= 2);
 861         let mut wide_sig = sig::widening_mul(self.sig[0], multiplicand.sig[0]);
 862
 863         let mut loss = Loss::ExactlyZero;
 864         let mut omsb = sig::omsb(&wide_sig);
 865         self.exp += multiplicand.exp;
 866
 867         // Assume the operands involved in the multiplication are single-precision
 868         // FP, and the two multiplicants are:
 869         //     lhs = a23 . a22 ... a0 * 2^e1
 870         //     rhs = b23 . b22 ... b0 * 2^e2
 871         // the result of multiplication is:
 872         //     lhs = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
 873         // Note that there are three significant bits at the left-hand side of the
 874         // radix point: two for the multiplication, and an overflow bit for the
 875         // addition (that will always be zero at this point). Move the radix point
 876         // toward left by two bits, and adjust exponent accordingly.
 877         self.exp += 2;
 878
 879         if addend.is_non_zero() {
 880             // Normalize our MSB to one below the top bit to allow for overflow.
 881             let ext_precision = 2 * S::PRECISION + 1;
 882             if omsb != ext_precision - 1 {
 883                 assert!(ext_precision > omsb);
 884                 sig::shift_left(&mut wide_sig, &mut self.exp, (ext_precision - 1) - omsb);
 885             }
 886
 887             // The intermediate result of the multiplication has "2 * S::PRECISION"
 888             // significant bit; adjust the addend to be consistent with mul result.
 889             let mut ext_addend_sig = [addend.sig[0], 0];
 890
 891             // Extend the addend significand to ext_precision - 1. This guarantees
 892             // that the high bit of the significand is zero (same as wide_sig),
 893             // so the addition will overflow (if it does overflow at all) into the top bit.
 894             sig::shift_left(
 895                 &mut ext_addend_sig,
 896                 &mut 0,
 897                 ext_precision - 1 - S::PRECISION,
 898             );
 899             loss = sig::add_or_sub(
 900                 &mut wide_sig,
 901                 &mut self.exp,
 902                 &mut self.sign,
 903                 &mut ext_addend_sig,
 904                 addend.exp + 1,
 905                 addend.sign,
 906             );
 907
 908             omsb = sig::omsb(&wide_sig);
 909         }
 910
 911         // Convert the result having "2 * S::PRECISION" significant-bits back to the one
 912         // having "S::PRECISION" significant-bits. First, move the radix point from
 913         // position "2*S::PRECISION - 1" to "S::PRECISION - 1". The exponent need to be
 914         // adjusted by "2*S::PRECISION - 1" - "S::PRECISION - 1" = "S::PRECISION".
 915         self.exp -= S::PRECISION as ExpInt + 1;
 916
 917         // In case MSB resides at the left-hand side of radix point, shift the
 918         // mantissa right by some amount to make sure the MSB reside right before
 919         // the radix point (i.e., "MSB . rest-significant-bits").
 920         if omsb > S::PRECISION {
 921             let bits = omsb - S::PRECISION;
 922             loss = sig::shift_right(&mut wide_sig, &mut self.exp, bits).combine(loss);
 923         }
 924
 925         self.sig[0] = wide_sig[0];
 926
 927         let mut status;
 928         self = unpack!(status=, self.normalize(round, loss));
 929         if loss != Loss::ExactlyZero {
 930             status |= Status::INEXACT;
 931         }
 932
 933         // If two numbers add (exactly) to zero, IEEE 754 decrees it is a
 934         // positive zero unless rounding to minus infinity, except that
 935         // adding two like-signed zeroes gives that zero.
 936         if self.category == Category::Zero && !status.intersects(Status::UNDERFLOW) &&
 937             self.sign != addend.sign
 938         {
 939             self.sign = round == Round::TowardNegative;
 940         }
 941
 942         status.and(self)
 943     }
 944
 945     fn div_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> {
 946         self.sign ^= rhs.sign;
 947
 948         match (self.category, rhs.category) {
 949             (Category::NaN, _) => {
 950                 self.sign = false;
 951                 Status::OK.and(self)
 952             }
 953
 954             (_, Category::NaN) => {
 955                 self.category = Category::NaN;
 956                 self.sig = rhs.sig;
 957                 self.sign = false;
 958                 Status::OK.and(self)
 959             }
 960
 961             (Category::Infinity, Category::Infinity) |
 962             (Category::Zero, Category::Zero) => Status::INVALID_OP.and(Self::NAN),
 963
 964             (Category::Infinity, _) |
 965             (Category::Zero, _) => Status::OK.and(self),
 966
 967             (Category::Normal, Category::Infinity) => {
 968                 self.category = Category::Zero;
 969                 Status::OK.and(self)
 970             }
 971
 972             (Category::Normal, Category::Zero) => {
 973                 self.category = Category::Infinity;
 974                 Status::DIV_BY_ZERO.and(self)
 975             }
 976
 977             (Category::Normal, Category::Normal) => {
 978                 self.exp -= rhs.exp;
 979                 let dividend = self.sig[0];
 980                 let loss = sig::div(
 981                     &mut self.sig,
 982                     &mut self.exp,
 983                     &mut [dividend],
 984                     &mut [rhs.sig[0]],
 985                     S::PRECISION,
 986                 );
 987                 let mut status;
 988                 self = unpack!(status=, self.normalize(round, loss));
 989                 if loss != Loss::ExactlyZero {
 990                     status |= Status::INEXACT;
 991                 }
 992                 status.and(self)
 993             }
 994         }
 995     }
 996
 997     fn c_fmod(mut self, rhs: Self) -> StatusAnd<Self> {
 998         match (self.category, rhs.category) {
 999             (Category::NaN, _) |
1000             (Category::Zero, Category::Infinity) |
1001             (Category::Zero, Category::Normal) |
1002             (Category::Normal, Category::Infinity) => Status::OK.and(self),
1003
1004             (_, Category::NaN) => {
1005                 self.sign = false;
1006                 self.category = Category::NaN;
1007                 self.sig = rhs.sig;
1008                 Status::OK.and(self)
1009             }
1010
1011             (Category::Infinity, _) |
1012             (_, Category::Zero) => Status::INVALID_OP.and(Self::NAN),
1013
1014             (Category::Normal, Category::Normal) => {
1015                 while self.is_finite_non_zero() && rhs.is_finite_non_zero() &&
1016                     self.cmp_abs_normal(rhs) != Ordering::Less
1017                 {
1018                     let mut v = rhs.scalbn(self.ilogb() - rhs.ilogb());
1019                     if self.cmp_abs_normal(v) == Ordering::Less {
1020                         v = v.scalbn(-1);
1021                     }
1022                     v.sign = self.sign;
1023
1024                     let status;
1025                     self = unpack!(status=, self - v);
1026                     assert_eq!(status, Status::OK);
1027                 }
1028                 Status::OK.and(self)
1029             }
1030         }
1031     }
1032
1033     fn round_to_integral(self, round: Round) -> StatusAnd<Self> {
1034         // If the exponent is large enough, we know that this value is already
1035         // integral, and the arithmetic below would potentially cause it to saturate
1036         // to +/-Inf. Bail out early instead.
1037         if self.is_finite_non_zero() && self.exp + 1 >= S::PRECISION as ExpInt {
1038             return Status::OK.and(self);
1039         }
1040
1041         // The algorithm here is quite simple: we add 2^(p-1), where p is the
1042         // precision of our format, and then subtract it back off again. The choice
1043         // of rounding modes for the addition/subtraction determines the rounding mode
1044         // for our integral rounding as well.
1045         // NOTE: When the input value is negative, we do subtraction followed by
1046         // addition instead.
1047         assert!(S::PRECISION <= 128);
1048         let mut status;
1049         let magic_const = unpack!(status=, Self::from_u128(1 << (S::PRECISION - 1)));
1050         let magic_const = magic_const.copy_sign(self);
1051
1052         if status != Status::OK {
1053             return status.and(self);
1054         }
1055
1056         let mut r = self;
1057         r = unpack!(status=, r.add_r(magic_const, round));
1058         if status != Status::OK && status != Status::INEXACT {
1059             return status.and(self);
1060         }
1061
1062         // Restore the input sign to handle 0.0/-0.0 cases correctly.
1063         r.sub_r(magic_const, round).map(|r| r.copy_sign(self))
1064     }
1065
1066     fn next_up(mut self) -> StatusAnd<Self> {
1067         // Compute nextUp(x), handling each float category separately.
1068         match self.category {
1069             Category::Infinity => {
1070                 if self.sign {
1071                     // nextUp(-inf) = -largest
1072                     Status::OK.and(-Self::largest())
1073                 } else {
1074                     // nextUp(+inf) = +inf
1075                     Status::OK.and(self)
1076                 }
1077             }
1078             Category::NaN => {
1079                 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
1080                 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
1081                 //                     change the payload.
1082                 if self.is_signaling() {
1083                     // For consistency, propagate the sign of the sNaN to the qNaN.
1084                     Status::INVALID_OP.and(Self::NAN.copy_sign(self))
1085                 } else {
1086                     Status::OK.and(self)
1087                 }
1088             }
1089             Category::Zero => {
1090                 // nextUp(pm 0) = +smallest
1091                 Status::OK.and(Self::SMALLEST)
1092             }
1093             Category::Normal => {
1094                 // nextUp(-smallest) = -0
1095                 if self.is_smallest() && self.sign {
1096                     return Status::OK.and(-Self::ZERO);
1097                 }
1098
1099                 // nextUp(largest) == INFINITY
1100                 if self.is_largest() && !self.sign {
1101                     return Status::OK.and(Self::INFINITY);
1102                 }
1103
1104                 // Excluding the integral bit. This allows us to test for binade boundaries.
1105                 let sig_mask = (1 << (S::PRECISION - 1)) - 1;
1106
1107                 // nextUp(normal) == normal + inc.
1108                 if self.sign {
1109                     // If we are negative, we need to decrement the significand.
1110
1111                     // We only cross a binade boundary that requires adjusting the exponent
1112                     // if:
1113                     //   1. exponent != S::MIN_EXP. This implies we are not in the
1114                     //   smallest binade or are dealing with denormals.
1115                     //   2. Our significand excluding the integral bit is all zeros.
1116                     let crossing_binade_boundary = self.exp != S::MIN_EXP &&
1117                         self.sig[0] & sig_mask == 0;
1118
1119                     // Decrement the significand.
1120                     //
1121                     // We always do this since:
1122                     //   1. If we are dealing with a non-binade decrement, by definition we
1123                     //   just decrement the significand.
1124                     //   2. If we are dealing with a normal -> normal binade decrement, since
1125                     //   we have an explicit integral bit the fact that all bits but the
1126                     //   integral bit are zero implies that subtracting one will yield a
1127                     //   significand with 0 integral bit and 1 in all other spots. Thus we
1128                     //   must just adjust the exponent and set the integral bit to 1.
1129                     //   3. If we are dealing with a normal -> denormal binade decrement,
1130                     //   since we set the integral bit to 0 when we represent denormals, we
1131                     //   just decrement the significand.
1132                     sig::decrement(&mut self.sig);
1133
1134                     if crossing_binade_boundary {
1135                         // Our result is a normal number. Do the following:
1136                         // 1. Set the integral bit to 1.
1137                         // 2. Decrement the exponent.
1138                         sig::set_bit(&mut self.sig, S::PRECISION - 1);
1139                         self.exp -= 1;
1140                     }
1141                 } else {
1142                     // If we are positive, we need to increment the significand.
1143
1144                     // We only cross a binade boundary that requires adjusting the exponent if
1145                     // the input is not a denormal and all of said input's significand bits
1146                     // are set. If all of said conditions are true: clear the significand, set
1147                     // the integral bit to 1, and increment the exponent. If we have a
1148                     // denormal always increment since moving denormals and the numbers in the
1149                     // smallest normal binade have the same exponent in our representation.
1150                     let crossing_binade_boundary = !self.is_denormal() &&
1151                         self.sig[0] & sig_mask == sig_mask;
1152
1153                     if crossing_binade_boundary {
1154                         self.sig = [0];
1155                         sig::set_bit(&mut self.sig, S::PRECISION - 1);
1156                         assert_ne!(
1157                             self.exp,
1158                             S::MAX_EXP,
1159                             "We can not increment an exponent beyond the MAX_EXP \
1160                              allowed by the given floating point semantics."
1161                         );
1162                         self.exp += 1;
1163                     } else {
1164                         sig::increment(&mut self.sig);
1165                     }
1166                 }
1167                 Status::OK.and(self)
1168             }
1169         }
1170     }
1171
1172     fn from_bits(input: u128) -> Self {
1173         // Dispatch to semantics.
1174         S::from_bits(input)
1175     }
1176
1177     fn from_u128_r(input: u128, round: Round) -> StatusAnd<Self> {
1178         IeeeFloat {
1179             sig: [input],
1180             exp: S::PRECISION as ExpInt - 1,
1181             category: Category::Normal,
1182             sign: false,
1183             marker: PhantomData,
1184         }.normalize(round, Loss::ExactlyZero)
1185     }
1186
1187     fn from_str_r(mut s: &str, mut round: Round) -> Result<StatusAnd<Self>, ParseError> {
1188         if s.is_empty() {
1189             return Err(ParseError("Invalid string length"));
1190         }
1191
1192         // Handle special cases.
1193         match s {
1194             "inf" | "INFINITY" => return Ok(Status::OK.and(Self::INFINITY)),
1195             "-inf" | "-INFINITY" => return Ok(Status::OK.and(-Self::INFINITY)),
1196             "nan" | "NaN" => return Ok(Status::OK.and(Self::NAN)),
1197             "-nan" | "-NaN" => return Ok(Status::OK.and(-Self::NAN)),
1198             _ => {}
1199         }
1200
1201         // Handle a leading minus sign.
1202         let minus = s.starts_with("-");
1203         if minus || s.starts_with("+") {
1204             s = &s[1..];
1205             if s.is_empty() {
1206                 return Err(ParseError("String has no digits"));
1207             }
1208         }
1209
1210         // Adjust the rounding mode for the absolute value below.
1211         if minus {
1212             round = -round;
1213         }
1214
1215         let r = if s.starts_with("0x") || s.starts_with("0X") {
1216             s = &s[2..];
1217             if s.is_empty() {
1218                 return Err(ParseError("Invalid string"));
1219             }
1220             Self::from_hexadecimal_string(s, round)?
1221         } else {
1222             Self::from_decimal_string(s, round)?
1223         };
1224
1225         Ok(r.map(|r| if minus { -r } else { r }))
1226     }
1227
1228     fn to_bits(self) -> u128 {
1229         // Dispatch to semantics.
1230         S::to_bits(self)
1231     }
1232
1233     fn to_u128_r(self, width: usize, round: Round, is_exact: &mut bool) -> StatusAnd<u128> {
1234         // The result of trying to convert a number too large.
1235         let overflow = if self.sign {
1236             // Negative numbers cannot be represented as unsigned.
1237             0
1238         } else {
1239             // Largest unsigned integer of the given width.
1240             !0 >> (128 - width)
1241         };
1242
1243         *is_exact = false;
1244
1245         match self.category {
1246             Category::NaN => Status::INVALID_OP.and(0),
1247
1248             Category::Infinity => Status::INVALID_OP.and(overflow),
1249
1250             Category::Zero => {
1251                 // Negative zero can't be represented as an int.
1252                 *is_exact = !self.sign;
1253                 Status::OK.and(0)
1254             }
1255
1256             Category::Normal => {
1257                 let mut r = 0;
1258
1259                 // Step 1: place our absolute value, with any fraction truncated, in
1260                 // the destination.
1261                 let truncated_bits = if self.exp < 0 {
1262                     // Our absolute value is less than one; truncate everything.
1263                     // For exponent -1 the integer bit represents .5, look at that.
1264                     // For smaller exponents leftmost truncated bit is 0.
1265                     S::PRECISION - 1 + (-self.exp) as usize
1266                 } else {
1267                     // We want the most significant (exponent + 1) bits; the rest are
1268                     // truncated.
1269                     let bits = self.exp as usize + 1;
1270
1271                     // Hopelessly large in magnitude?
1272                     if bits > width {
1273                         return Status::INVALID_OP.and(overflow);
1274                     }
1275
1276                     if bits < S::PRECISION {
1277                         // We truncate (S::PRECISION - bits) bits.
1278                         r = self.sig[0] >> (S::PRECISION - bits);
1279                         S::PRECISION - bits
1280                     } else {
1281                         // We want at least as many bits as are available.
1282                         r = self.sig[0] << (bits - S::PRECISION);
1283                         0
1284                     }
1285                 };
1286
1287                 // Step 2: work out any lost fraction, and increment the absolute
1288                 // value if we would round away from zero.
1289                 let mut loss = Loss::ExactlyZero;
1290                 if truncated_bits > 0 {
1291                     loss = Loss::through_truncation(&self.sig, truncated_bits);
1292                     if loss != Loss::ExactlyZero &&
1293                         self.round_away_from_zero(round, loss, truncated_bits)
1294                     {
1295                         r = r.wrapping_add(1);
1296                         if r == 0 {
1297                             return Status::INVALID_OP.and(overflow); // Overflow.
1298                         }
1299                     }
1300                 }
1301
1302                 // Step 3: check if we fit in the destination.
1303                 if r > overflow {
1304                     return Status::INVALID_OP.and(overflow);
1305                 }
1306
1307                 if loss == Loss::ExactlyZero {
1308                     *is_exact = true;
1309                     Status::OK.and(r)
1310                 } else {
1311                     Status::INEXACT.and(r)
1312                 }
1313             }
1314         }
1315     }
1316
1317     fn cmp_abs_normal(self, rhs: Self) -> Ordering {
1318         assert!(self.is_finite_non_zero());
1319         assert!(rhs.is_finite_non_zero());
1320
1321         // If exponents are equal, do an unsigned comparison of the significands.
1322         self.exp.cmp(&rhs.exp).then_with(
1323             || sig::cmp(&self.sig, &rhs.sig),
1324         )
1325     }
1326
1327     fn bitwise_eq(self, rhs: Self) -> bool {
1328         if self.category != rhs.category || self.sign != rhs.sign {
1329             return false;
1330         }
1331
1332         if self.category == Category::Zero || self.category == Category::Infinity {
1333             return true;
1334         }
1335
1336         if self.is_finite_non_zero() && self.exp != rhs.exp {
1337             return false;
1338         }
1339
1340         self.sig == rhs.sig
1341     }
1342
1343     fn is_negative(self) -> bool {
1344         self.sign
1345     }
1346
1347     fn is_denormal(self) -> bool {
1348         self.is_finite_non_zero() && self.exp == S::MIN_EXP &&
1349             !sig::get_bit(&self.sig, S::PRECISION - 1)
1350     }
1351
1352     fn is_signaling(self) -> bool {
1353         // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
1354         // first bit of the trailing significand being 0.
1355         self.is_nan() && !sig::get_bit(&self.sig, S::QNAN_BIT)
1356     }
1357
1358     fn category(self) -> Category {
1359         self.category
1360     }
1361
1362     fn get_exact_inverse(self) -> Option<Self> {
1363         // Special floats and denormals have no exact inverse.
1364         if !self.is_finite_non_zero() {
1365             return None;
1366         }
1367
1368         // Check that the number is a power of two by making sure that only the
1369         // integer bit is set in the significand.
1370         if self.sig != [1 << (S::PRECISION - 1)] {
1371             return None;
1372         }
1373
1374         // Get the inverse.
1375         let mut reciprocal = Self::from_u128(1).value;
1376         let status;
1377         reciprocal = unpack!(status=, reciprocal / self);
1378         if status != Status::OK {
1379             return None;
1380         }
1381
1382         // Avoid multiplication with a denormal, it is not safe on all platforms and
1383         // may be slower than a normal division.
1384         if reciprocal.is_denormal() {
1385             return None;
1386         }
1387
1388         assert!(reciprocal.is_finite_non_zero());
1389         assert_eq!(reciprocal.sig, [1 << (S::PRECISION - 1)]);
1390
1391         Some(reciprocal)
1392     }
1393
1394     fn ilogb(mut self) -> ExpInt {
1395         if self.is_nan() {
1396             return IEK_NAN;
1397         }
1398         if self.is_zero() {
1399             return IEK_ZERO;
1400         }
1401         if self.is_infinite() {
1402             return IEK_INF;
1403         }
1404         if !self.is_denormal() {
1405             return self.exp;
1406         }
1407
1408         let sig_bits = (S::PRECISION - 1) as ExpInt;
1409         self.exp += sig_bits;
1410         self = self.normalize(Round::NearestTiesToEven, Loss::ExactlyZero)
1411             .value;
1412         self.exp - sig_bits
1413     }
1414
1415     fn scalbn_r(mut self, exp: ExpInt, round: Round) -> Self {
1416         // If exp is wildly out-of-scale, simply adding it to self.exp will
1417         // overflow; clamp it to a safe range before adding, but ensure that the range
1418         // is large enough that the clamp does not change the result. The range we
1419         // need to support is the difference between the largest possible exponent and
1420         // the normalized exponent of half the smallest denormal.
1421
1422         let sig_bits = (S::PRECISION - 1) as i32;
1423         let max_change = S::MAX_EXP as i32 - (S::MIN_EXP as i32 - sig_bits) + 1;
1424
1425         // Clamp to one past the range ends to let normalize handle overflow.
1426         let exp_change = cmp::min(cmp::max(exp as i32, -max_change - 1), max_change);
1427         self.exp = self.exp.saturating_add(exp_change as ExpInt);
1428         self = self.normalize(round, Loss::ExactlyZero).value;
1429         if self.is_nan() {
1430             sig::set_bit(&mut self.sig, S::QNAN_BIT);
1431         }
1432         self
1433     }
1434
1435     fn frexp_r(mut self, exp: &mut ExpInt, round: Round) -> Self {
1436         *exp = self.ilogb();
1437
1438         // Quiet signalling nans.
1439         if *exp == IEK_NAN {
1440             sig::set_bit(&mut self.sig, S::QNAN_BIT);
1441             return self;
1442         }
1443
1444         if *exp == IEK_INF {
1445             return self;
1446         }
1447
1448         // 1 is added because frexp is defined to return a normalized fraction in
1449         // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
1450         if *exp == IEK_ZERO {
1451             *exp = 0;
1452         } else {
1453             *exp += 1;
1454         }
1455         self.scalbn_r(-*exp, round)
1456     }
1457 }
1458
1459 impl<S: Semantics, T: Semantics> FloatConvert<IeeeFloat<T>> for IeeeFloat<S> {
1460     fn convert_r(self, round: Round, loses_info: &mut bool) -> StatusAnd<IeeeFloat<T>> {
1461         let mut r = IeeeFloat {
1462             sig: self.sig,
1463             exp: self.exp,
1464             category: self.category,
1465             sign: self.sign,
1466             marker: PhantomData,
1467         };
1468
1469         // x86 has some unusual NaNs which cannot be represented in any other
1470         // format; note them here.
1471         fn is_x87_double_extended<S: Semantics>() -> bool {
1472             S::QNAN_SIGNIFICAND == X87DoubleExtendedS::QNAN_SIGNIFICAND
1473         }
1474         let x87_special_nan = is_x87_double_extended::<S>() && !is_x87_double_extended::<T>() &&
1475             r.category == Category::NaN &&
1476             (r.sig[0] & S::QNAN_SIGNIFICAND) != S::QNAN_SIGNIFICAND;
1477
1478         // If this is a truncation of a denormal number, and the target semantics
1479         // has larger exponent range than the source semantics (this can happen
1480         // when truncating from PowerPC double-double to double format), the
1481         // right shift could lose result mantissa bits. Adjust exponent instead
1482         // of performing excessive shift.
1483         let mut shift = T::PRECISION as ExpInt - S::PRECISION as ExpInt;
1484         if shift < 0 && r.is_finite_non_zero() {
1485             let mut exp_change = sig::omsb(&r.sig) as ExpInt - S::PRECISION as ExpInt;
1486             if r.exp + exp_change < T::MIN_EXP {
1487                 exp_change = T::MIN_EXP - r.exp;
1488             }
1489             if exp_change < shift {
1490                 exp_change = shift;
1491             }
1492             if exp_change < 0 {
1493                 shift -= exp_change;
1494                 r.exp += exp_change;
1495             }
1496         }
1497
1498         // If this is a truncation, perform the shift.
1499         let loss = if shift < 0 && (r.is_finite_non_zero() || r.category == Category::NaN) {
1500             sig::shift_right(&mut r.sig, &mut 0, -shift as usize)
1501         } else {
1502             Loss::ExactlyZero
1503         };
1504
1505         // If this is an extension, perform the shift.
1506         if shift > 0 && (r.is_finite_non_zero() || r.category == Category::NaN) {
1507             sig::shift_left(&mut r.sig, &mut 0, shift as usize);
1508         }
1509
1510         let status;
1511         if r.is_finite_non_zero() {
1512             r = unpack!(status=, r.normalize(round, loss));
1513             *loses_info = status != Status::OK;
1514         } else if r.category == Category::NaN {
1515             *loses_info = loss != Loss::ExactlyZero || x87_special_nan;
1516
1517             // For x87 extended precision, we want to make a NaN, not a special NaN if
1518             // the input wasn't special either.
1519             if !x87_special_nan && is_x87_double_extended::<T>() {
1520                 sig::set_bit(&mut r.sig, T::PRECISION - 1);
1521             }
1522
1523             // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1524             // does not give you back the same bits. This is dubious, and we
1525             // don't currently do it. You're really supposed to get
1526             // an invalid operation signal at runtime, but nobody does that.
1527             status = Status::OK;
1528         } else {
1529             *loses_info = false;
1530             status = Status::OK;
1531         }
1532
1533         status.and(r)
1534     }
1535 }
1536
1537 impl<S: Semantics> IeeeFloat<S> {
1538     /// Handle positive overflow. We either return infinity or
1539     /// the largest finite number. For negative overflow,
1540     /// negate the `round` argument before calling.
1541     fn overflow_result(round: Round) -> StatusAnd<Self> {
1542         match round {
1543             // Infinity?
1544             Round::NearestTiesToEven | Round::NearestTiesToAway | Round::TowardPositive => {
1545                 (Status::OVERFLOW | Status::INEXACT).and(Self::INFINITY)
1546             }
1547             // Otherwise we become the largest finite number.
1548             Round::TowardNegative | Round::TowardZero => Status::INEXACT.and(Self::largest()),
1549         }
1550     }
1551
1552     /// Returns `true` if, when truncating the current number, with `bit` the
1553     /// new LSB, with the given lost fraction and rounding mode, the result
1554     /// would need to be rounded away from zero (i.e., by increasing the
1555     /// signficand). This routine must work for `Category::Zero` of both signs, and
1556     /// `Category::Normal` numbers.
1557     fn round_away_from_zero(&self, round: Round, loss: Loss, bit: usize) -> bool {
1558         // NaNs and infinities should not have lost fractions.
1559         assert!(self.is_finite_non_zero() || self.is_zero());
1560
1561         // Current callers never pass this so we don't handle it.
1562         assert_ne!(loss, Loss::ExactlyZero);
1563
1564         match round {
1565             Round::NearestTiesToAway => loss == Loss::ExactlyHalf || loss == Loss::MoreThanHalf,
1566             Round::NearestTiesToEven => {
1567                 if loss == Loss::MoreThanHalf {
1568                     return true;
1569                 }
1570
1571                 // Our zeros don't have a significand to test.
1572                 if loss == Loss::ExactlyHalf && self.category != Category::Zero {
1573                     return sig::get_bit(&self.sig, bit);
1574                 }
1575
1576                 false
1577             }
1578             Round::TowardZero => false,
1579             Round::TowardPositive => !self.sign,
1580             Round::TowardNegative => self.sign,
1581         }
1582     }
1583
1584     fn normalize(mut self, round: Round, mut loss: Loss) -> StatusAnd<Self> {
1585         if !self.is_finite_non_zero() {
1586             return Status::OK.and(self);
1587         }
1588
1589         // Before rounding normalize the exponent of Category::Normal numbers.
1590         let mut omsb = sig::omsb(&self.sig);
1591
1592         if omsb > 0 {
1593             // OMSB is numbered from 1. We want to place it in the integer
1594             // bit numbered PRECISION if possible, with a compensating change in
1595             // the exponent.
1596             let mut final_exp = self.exp.saturating_add(
1597                 omsb as ExpInt - S::PRECISION as ExpInt,
1598             );
1599
1600             // If the resulting exponent is too high, overflow according to
1601             // the rounding mode.
1602             if final_exp > S::MAX_EXP {
1603                 let round = if self.sign { -round } else { round };
1604                 return Self::overflow_result(round).map(|r| r.copy_sign(self));
1605             }
1606
1607             // Subnormal numbers have exponent MIN_EXP, and their MSB
1608             // is forced based on that.
1609             if final_exp < S::MIN_EXP {
1610                 final_exp = S::MIN_EXP;
1611             }
1612
1613             // Shifting left is easy as we don't lose precision.
1614             if final_exp < self.exp {
1615                 assert_eq!(loss, Loss::ExactlyZero);
1616
1617                 let exp_change = (self.exp - final_exp) as usize;
1618                 sig::shift_left(&mut self.sig, &mut self.exp, exp_change);
1619
1620                 return Status::OK.and(self);
1621             }
1622
1623             // Shift right and capture any new lost fraction.
1624             if final_exp > self.exp {
1625                 let exp_change = (final_exp - self.exp) as usize;
1626                 loss = sig::shift_right(&mut self.sig, &mut self.exp, exp_change).combine(loss);
1627
1628                 // Keep OMSB up-to-date.
1629                 omsb = omsb.saturating_sub(exp_change);
1630             }
1631         }
1632
1633         // Now round the number according to round given the lost
1634         // fraction.
1635
1636         // As specified in IEEE 754, since we do not trap we do not report
1637         // underflow for exact results.
1638         if loss == Loss::ExactlyZero {
1639             // Canonicalize zeros.
1640             if omsb == 0 {
1641                 self.category = Category::Zero;
1642             }
1643
1644             return Status::OK.and(self);
1645         }
1646
1647         // Increment the significand if we're rounding away from zero.
1648         if self.round_away_from_zero(round, loss, 0) {
1649             if omsb == 0 {
1650                 self.exp = S::MIN_EXP;
1651             }
1652
1653             // We should never overflow.
1654             assert_eq!(sig::increment(&mut self.sig), 0);
1655             omsb = sig::omsb(&self.sig);
1656
1657             // Did the significand increment overflow?
1658             if omsb == S::PRECISION + 1 {
1659                 // Renormalize by incrementing the exponent and shifting our
1660                 // significand right one. However if we already have the
1661                 // maximum exponent we overflow to infinity.
1662                 if self.exp == S::MAX_EXP {
1663                     self.category = Category::Infinity;
1664
1665                     return (Status::OVERFLOW | Status::INEXACT).and(self);
1666                 }
1667
1668                 let _: Loss = sig::shift_right(&mut self.sig, &mut self.exp, 1);
1669
1670                 return Status::INEXACT.and(self);
1671             }
1672         }
1673
1674         // The normal case - we were and are not denormal, and any
1675         // significand increment above didn't overflow.
1676         if omsb == S::PRECISION {
1677             return Status::INEXACT.and(self);
1678         }
1679
1680         // We have a non-zero denormal.
1681         assert!(omsb < S::PRECISION);
1682
1683         // Canonicalize zeros.
1684         if omsb == 0 {
1685             self.category = Category::Zero;
1686         }
1687
1688         // The Category::Zero case is a denormal that underflowed to zero.
1689         (Status::UNDERFLOW | Status::INEXACT).and(self)
1690     }
1691
1692     fn from_hexadecimal_string(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError> {
1693         let mut r = IeeeFloat {
1694             sig: [0],
1695             exp: 0,
1696             category: Category::Normal,
1697             sign: false,
1698             marker: PhantomData,
1699         };
1700
1701         let mut any_digits = false;
1702         let mut has_exp = false;
1703         let mut bit_pos = LIMB_BITS as isize;
1704         let mut loss = None;
1705
1706         // Without leading or trailing zeros, irrespective of the dot.
1707         let mut first_sig_digit = None;
1708         let mut dot = s.len();
1709
1710         for (p, c) in s.char_indices() {
1711             // Skip leading zeros and any (hexa)decimal point.
1712             if c == '.' {
1713                 if dot != s.len() {
1714                     return Err(ParseError("String contains multiple dots"));
1715                 }
1716                 dot = p;
1717             } else if let Some(hex_value) = c.to_digit(16) {
1718                 any_digits = true;
1719
1720                 if first_sig_digit.is_none() {
1721                     if hex_value == 0 {
1722                         continue;
1723                     }
1724                     first_sig_digit = Some(p);
1725                 }
1726
1727                 // Store the number while we have space.
1728                 bit_pos -= 4;
1729                 if bit_pos >= 0 {
1730                     r.sig[0] |= (hex_value as Limb) << bit_pos;
1731                 // If zero or one-half (the hexadecimal digit 8) are followed
1732                 // by non-zero, they're a little more than zero or one-half.
1733                 } else if let Some(ref mut loss) = loss {
1734                     if hex_value != 0 {
1735                         if *loss == Loss::ExactlyZero {
1736                             *loss = Loss::LessThanHalf;
1737                         }
1738                         if *loss == Loss::ExactlyHalf {
1739                             *loss = Loss::MoreThanHalf;
1740                         }
1741                     }
1742                 } else {
1743                     loss = Some(match hex_value {
1744                         0 => Loss::ExactlyZero,
1745                         1..=7 => Loss::LessThanHalf,
1746                         8 => Loss::ExactlyHalf,
1747                         9..=15 => Loss::MoreThanHalf,
1748                         _ => unreachable!(),
1749                     });
1750                 }
1751             } else if c == 'p' || c == 'P' {
1752                 if !any_digits {
1753                     return Err(ParseError("Significand has no digits"));
1754                 }
1755
1756                 if dot == s.len() {
1757                     dot = p;
1758                 }
1759
1760                 let mut chars = s[p + 1..].chars().peekable();
1761
1762                 // Adjust for the given exponent.
1763                 let exp_minus = chars.peek() == Some(&'-');
1764                 if exp_minus || chars.peek() == Some(&'+') {
1765                     chars.next();
1766                 }
1767
1768                 for c in chars {
1769                     if let Some(value) = c.to_digit(10) {
1770                         has_exp = true;
1771                         r.exp = r.exp.saturating_mul(10).saturating_add(value as ExpInt);
1772                     } else {
1773                         return Err(ParseError("Invalid character in exponent"));
1774                     }
1775                 }
1776                 if !has_exp {
1777                     return Err(ParseError("Exponent has no digits"));
1778                 }
1779
1780                 if exp_minus {
1781                     r.exp = -r.exp;
1782                 }
1783
1784                 break;
1785             } else {
1786                 return Err(ParseError("Invalid character in significand"));
1787             }
1788         }
1789         if !any_digits {
1790             return Err(ParseError("Significand has no digits"));
1791         }
1792
1793         // Hex floats require an exponent but not a hexadecimal point.
1794         if !has_exp {
1795             return Err(ParseError("Hex strings require an exponent"));
1796         }
1797
1798         // Ignore the exponent if we are zero.
1799         let first_sig_digit = match first_sig_digit {
1800             Some(p) => p,
1801             None => return Ok(Status::OK.and(Self::ZERO)),
1802         };
1803
1804         // Calculate the exponent adjustment implicit in the number of
1805         // significant digits and adjust for writing the significand starting
1806         // at the most significant nibble.
1807         let exp_adjustment = if dot > first_sig_digit {
1808             ExpInt::try_from(dot - first_sig_digit).unwrap()
1809         } else {
1810             -ExpInt::try_from(first_sig_digit - dot - 1).unwrap()
1811         };
1812         let exp_adjustment = exp_adjustment
1813             .saturating_mul(4)
1814             .saturating_sub(1)
1815             .saturating_add(S::PRECISION as ExpInt)
1816             .saturating_sub(LIMB_BITS as ExpInt);
1817         r.exp = r.exp.saturating_add(exp_adjustment);
1818
1819         Ok(r.normalize(round, loss.unwrap_or(Loss::ExactlyZero)))
1820     }
1821
1822     fn from_decimal_string(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError> {
1823         // Given a normal decimal floating point number of the form
1824         //
1825         //   dddd.dddd[eE][+-]ddd
1826         //
1827         // where the decimal point and exponent are optional, fill out the
1828         // variables below. Exponent is appropriate if the significand is
1829         // treated as an integer, and normalized_exp if the significand
1830         // is taken to have the decimal point after a single leading
1831         // non-zero digit.
1832         //
1833         // If the value is zero, first_sig_digit is None.
1834
1835         let mut any_digits = false;
1836         let mut dec_exp = 0i32;
1837
1838         // Without leading or trailing zeros, irrespective of the dot.
1839         let mut first_sig_digit = None;
1840         let mut last_sig_digit = 0;
1841         let mut dot = s.len();
1842
1843         for (p, c) in s.char_indices() {
1844             if c == '.' {
1845                 if dot != s.len() {
1846                     return Err(ParseError("String contains multiple dots"));
1847                 }
1848                 dot = p;
1849             } else if let Some(dec_value) = c.to_digit(10) {
1850                 any_digits = true;
1851
1852                 if dec_value != 0 {
1853                     if first_sig_digit.is_none() {
1854                         first_sig_digit = Some(p);
1855                     }
1856                     last_sig_digit = p;
1857                 }
1858             } else if c == 'e' || c == 'E' {
1859                 if !any_digits {
1860                     return Err(ParseError("Significand has no digits"));
1861                 }
1862
1863                 if dot == s.len() {
1864                     dot = p;
1865                 }
1866
1867                 let mut chars = s[p + 1..].chars().peekable();
1868
1869                 // Adjust for the given exponent.
1870                 let exp_minus = chars.peek() == Some(&'-');
1871                 if exp_minus || chars.peek() == Some(&'+') {
1872                     chars.next();
1873                 }
1874
1875                 any_digits = false;
1876                 for c in chars {
1877                     if let Some(value) = c.to_digit(10) {
1878                         any_digits = true;
1879                         dec_exp = dec_exp.saturating_mul(10).saturating_add(value as i32);
1880                     } else {
1881                         return Err(ParseError("Invalid character in exponent"));
1882                     }
1883                 }
1884                 if !any_digits {
1885                     return Err(ParseError("Exponent has no digits"));
1886                 }
1887
1888                 if exp_minus {
1889                     dec_exp = -dec_exp;
1890                 }
1891
1892                 break;
1893             } else {
1894                 return Err(ParseError("Invalid character in significand"));
1895             }
1896         }
1897         if !any_digits {
1898             return Err(ParseError("Significand has no digits"));
1899         }
1900
1901         // Test if we have a zero number allowing for non-zero exponents.
1902         let first_sig_digit = match first_sig_digit {
1903             Some(p) => p,
1904             None => return Ok(Status::OK.and(Self::ZERO)),
1905         };
1906
1907         // Adjust the exponents for any decimal point.
1908         if dot > last_sig_digit {
1909             dec_exp = dec_exp.saturating_add((dot - last_sig_digit - 1) as i32);
1910         } else {
1911             dec_exp = dec_exp.saturating_sub((last_sig_digit - dot) as i32);
1912         }
1913         let significand_digits = last_sig_digit - first_sig_digit + 1 -
1914             (dot > first_sig_digit && dot < last_sig_digit) as usize;
1915         let normalized_exp = dec_exp.saturating_add(significand_digits as i32 - 1);
1916
1917         // Handle the cases where exponents are obviously too large or too
1918         // small. Writing L for log 10 / log 2, a number d.ddddd*10^dec_exp
1919         // definitely overflows if
1920         //
1921         //       (dec_exp - 1) * L >= MAX_EXP
1922         //
1923         // and definitely underflows to zero where
1924         //
1925         //       (dec_exp + 1) * L <= MIN_EXP - PRECISION
1926         //
1927         // With integer arithmetic the tightest bounds for L are
1928         //
1929         //       93/28 < L < 196/59            [ numerator <= 256 ]
1930         //       42039/12655 < L < 28738/8651  [ numerator <= 65536 ]
1931
1932         // Check for MAX_EXP.
1933         if normalized_exp.saturating_sub(1).saturating_mul(42039) >= 12655 * S::MAX_EXP as i32 {
1934             // Overflow and round.
1935             return Ok(Self::overflow_result(round));
1936         }
1937
1938         // Check for MIN_EXP.
1939         if normalized_exp.saturating_add(1).saturating_mul(28738) <=
1940             8651 * (S::MIN_EXP as i32 - S::PRECISION as i32)
1941         {
1942             // Underflow to zero and round.
1943             let r = if round == Round::TowardPositive {
1944                 IeeeFloat::SMALLEST
1945             } else {
1946                 IeeeFloat::ZERO
1947             };
1948             return Ok((Status::UNDERFLOW | Status::INEXACT).and(r));
1949         }
1950
1951         // A tight upper bound on number of bits required to hold an
1952         // N-digit decimal integer is N * 196 / 59. Allocate enough space
1953         // to hold the full significand, and an extra limb required by
1954         // tcMultiplyPart.
1955         let max_limbs = limbs_for_bits(1 + 196 * significand_digits / 59);
1956         let mut dec_sig: SmallVec<[Limb; 1]> = SmallVec::with_capacity(max_limbs);
1957
1958         // Convert to binary efficiently - we do almost all multiplication
1959         // in a Limb. When this would overflow do we do a single
1960         // bignum multiplication, and then revert again to multiplication
1961         // in a Limb.
1962         let mut chars = s[first_sig_digit..=last_sig_digit].chars();
1963         loop {
1964             let mut val = 0;
1965             let mut multiplier = 1;
1966
1967             loop {
1968                 let dec_value = match chars.next() {
1969                     Some('.') => continue,
1970                     Some(c) => c.to_digit(10).unwrap(),
1971                     None => break,
1972                 };
1973
1974                 multiplier *= 10;
1975                 val = val * 10 + dec_value as Limb;
1976
1977                 // The maximum number that can be multiplied by ten with any
1978                 // digit added without overflowing a Limb.
1979                 if multiplier > (!0 - 9) / 10 {
1980                     break;
1981                 }
1982             }
1983
1984             // If we've consumed no digits, we're done.
1985             if multiplier == 1 {
1986                 break;
1987             }
1988
1989             // Multiply out the current limb.
1990             let mut carry = val;
1991             for x in &mut dec_sig {
1992                 let [low, mut high] = sig::widening_mul(*x, multiplier);
1993
1994                 // Now add carry.
1995                 let (low, overflow) = low.overflowing_add(carry);
1996                 high += overflow as Limb;
1997
1998                 *x = low;
1999                 carry = high;
2000             }
2001
2002             // If we had carry, we need another limb (likely but not guaranteed).
2003             if carry > 0 {
2004                 dec_sig.push(carry);
2005             }
2006         }
2007
2008         // Calculate pow(5, abs(dec_exp)) into `pow5_full`.
2009         // The *_calc Vec's are reused scratch space, as an optimization.
2010         let (pow5_full, mut pow5_calc, mut sig_calc, mut sig_scratch_calc) = {
2011             let mut power = dec_exp.abs() as usize;
2012
2013             const FIRST_EIGHT_POWERS: [Limb; 8] = [1, 5, 25, 125, 625, 3125, 15625, 78125];
2014
2015             let mut p5_scratch = smallvec![];
2016             let mut p5: SmallVec<[Limb; 1]> = smallvec![FIRST_EIGHT_POWERS[4]];
2017
2018             let mut r_scratch = smallvec![];
2019             let mut r: SmallVec<[Limb; 1]> = smallvec![FIRST_EIGHT_POWERS[power & 7]];
2020             power >>= 3;
2021
2022             while power > 0 {
2023                 // Calculate pow(5,pow(2,n+3)).
2024                 p5_scratch.resize(p5.len() * 2, 0);
2025                 let _: Loss = sig::mul(&mut p5_scratch, &mut 0, &p5, &p5, p5.len() * 2 * LIMB_BITS);
2026                 while p5_scratch.last() == Some(&0) {
2027                     p5_scratch.pop();
2028                 }
2029                 mem::swap(&mut p5, &mut p5_scratch);
2030
2031                 if power & 1 != 0 {
2032                     r_scratch.resize(r.len() + p5.len(), 0);
2033                     let _: Loss = sig::mul(
2034                         &mut r_scratch,
2035                         &mut 0,
2036                         &r,
2037                         &p5,
2038                         (r.len() + p5.len()) * LIMB_BITS,
2039                     );
2040                     while r_scratch.last() == Some(&0) {
2041                         r_scratch.pop();
2042                     }
2043                     mem::swap(&mut r, &mut r_scratch);
2044                 }
2045
2046                 power >>= 1;
2047             }
2048
2049             (r, r_scratch, p5, p5_scratch)
2050         };
2051
2052         // Attempt dec_sig * 10^dec_exp with increasing precision.
2053         let mut attempt = 0;
2054         loop {
2055             let calc_precision = (LIMB_BITS << attempt) - 1;
2056             attempt += 1;
2057
2058             let calc_normal_from_limbs = |sig: &mut SmallVec<[Limb; 1]>,
2059                                           limbs: &[Limb]|
2060              -> StatusAnd<ExpInt> {
2061                 sig.resize(limbs_for_bits(calc_precision), 0);
2062                 let (mut loss, mut exp) = sig::from_limbs(sig, limbs, calc_precision);
2063
2064                 // Before rounding normalize the exponent of Category::Normal numbers.
2065                 let mut omsb = sig::omsb(sig);
2066
2067                 assert_ne!(omsb, 0);
2068
2069                 // OMSB is numbered from 1. We want to place it in the integer
2070                 // bit numbered PRECISION if possible, with a compensating change in
2071                 // the exponent.
2072                 let final_exp = exp.saturating_add(omsb as ExpInt - calc_precision as ExpInt);
2073
2074                 // Shifting left is easy as we don't lose precision.
2075                 if final_exp < exp {
2076                     assert_eq!(loss, Loss::ExactlyZero);
2077
2078                     let exp_change = (exp - final_exp) as usize;
2079                     sig::shift_left(sig, &mut exp, exp_change);
2080
2081                     return Status::OK.and(exp);
2082                 }
2083
2084                 // Shift right and capture any new lost fraction.
2085                 if final_exp > exp {
2086                     let exp_change = (final_exp - exp) as usize;
2087                     loss = sig::shift_right(sig, &mut exp, exp_change).combine(loss);
2088
2089                     // Keep OMSB up-to-date.
2090                     omsb = omsb.saturating_sub(exp_change);
2091                 }
2092
2093                 assert_eq!(omsb, calc_precision);
2094
2095                 // Now round the number according to round given the lost
2096                 // fraction.
2097
2098                 // As specified in IEEE 754, since we do not trap we do not report
2099                 // underflow for exact results.
2100                 if loss == Loss::ExactlyZero {
2101                     return Status::OK.and(exp);
2102                 }
2103
2104                 // Increment the significand if we're rounding away from zero.
2105                 if loss == Loss::MoreThanHalf || loss == Loss::ExactlyHalf && sig::get_bit(sig, 0) {
2106                     // We should never overflow.
2107                     assert_eq!(sig::increment(sig), 0);
2108                     omsb = sig::omsb(sig);
2109
2110                     // Did the significand increment overflow?
2111                     if omsb == calc_precision + 1 {
2112                         let _: Loss = sig::shift_right(sig, &mut exp, 1);
2113
2114                         return Status::INEXACT.and(exp);
2115                     }
2116                 }
2117
2118                 // The normal case - we were and are not denormal, and any
2119                 // significand increment above didn't overflow.
2120                 Status::INEXACT.and(exp)
2121             };
2122
2123             let status;
2124             let mut exp = unpack!(status=,
2125                 calc_normal_from_limbs(&mut sig_calc, &dec_sig));
2126             let pow5_status;
2127             let pow5_exp = unpack!(pow5_status=,
2128                 calc_normal_from_limbs(&mut pow5_calc, &pow5_full));
2129
2130             // Add dec_exp, as 10^n = 5^n * 2^n.
2131             exp += dec_exp as ExpInt;
2132
2133             let mut used_bits = S::PRECISION;
2134             let mut truncated_bits = calc_precision - used_bits;
2135
2136             let half_ulp_err1 = (status != Status::OK) as Limb;
2137             let (calc_loss, half_ulp_err2);
2138             if dec_exp >= 0 {
2139                 exp += pow5_exp;
2140
2141                 sig_scratch_calc.resize(sig_calc.len() + pow5_calc.len(), 0);
2142                 calc_loss = sig::mul(
2143                     &mut sig_scratch_calc,
2144                     &mut exp,
2145                     &sig_calc,
2146                     &pow5_calc,
2147                     calc_precision,
2148                 );
2149                 mem::swap(&mut sig_calc, &mut sig_scratch_calc);
2150
2151                 half_ulp_err2 = (pow5_status != Status::OK) as Limb;
2152             } else {
2153                 exp -= pow5_exp;
2154
2155                 sig_scratch_calc.resize(sig_calc.len(), 0);
2156                 calc_loss = sig::div(
2157                     &mut sig_scratch_calc,
2158                     &mut exp,
2159                     &mut sig_calc,
2160                     &mut pow5_calc,
2161                     calc_precision,
2162                 );
2163                 mem::swap(&mut sig_calc, &mut sig_scratch_calc);
2164
2165                 // Denormal numbers have less precision.
2166                 if exp < S::MIN_EXP {
2167                     truncated_bits += (S::MIN_EXP - exp) as usize;
2168                     used_bits = calc_precision.saturating_sub(truncated_bits);
2169                 }
2170                 // Extra half-ulp lost in reciprocal of exponent.
2171                 half_ulp_err2 = 2 *
2172                     (pow5_status != Status::OK || calc_loss != Loss::ExactlyZero) as Limb;
2173             }
2174
2175             // Both sig::mul and sig::div return the
2176             // result with the integer bit set.
2177             assert!(sig::get_bit(&sig_calc, calc_precision - 1));
2178
2179             // The error from the true value, in half-ulps, on multiplying two
2180             // floating point numbers, which differ from the value they
2181             // approximate by at most half_ulp_err1 and half_ulp_err2 half-ulps, is strictly less
2182             // than the returned value.
2183             //
2184             // See "How to Read Floating Point Numbers Accurately" by William D Clinger.
2185             assert!(
2186                 half_ulp_err1 < 2 || half_ulp_err2 < 2 || (half_ulp_err1 + half_ulp_err2 < 8)
2187             );
2188
2189             let inexact = (calc_loss != Loss::ExactlyZero) as Limb;
2190             let half_ulp_err = if half_ulp_err1 + half_ulp_err2 == 0 {
2191                 inexact * 2 // <= inexact half-ulps.
2192             } else {
2193                 inexact + 2 * (half_ulp_err1 + half_ulp_err2)
2194             };
2195
2196             let ulps_from_boundary = {
2197                 let bits = calc_precision - used_bits - 1;
2198
2199                 let i = bits / LIMB_BITS;
2200                 let limb = sig_calc[i] & (!0 >> (LIMB_BITS - 1 - bits % LIMB_BITS));
2201                 let boundary = match round {
2202                     Round::NearestTiesToEven | Round::NearestTiesToAway => 1 << (bits % LIMB_BITS),
2203                     _ => 0,
2204                 };
2205                 if i == 0 {
2206                     let delta = limb.wrapping_sub(boundary);
2207                     cmp::min(delta, delta.wrapping_neg())
2208                 } else if limb == boundary {
2209                     if !sig::is_all_zeros(&sig_calc[1..i]) {
2210                         !0 // A lot.
2211                     } else {
2212                         sig_calc[0]
2213                     }
2214                 } else if limb == boundary.wrapping_sub(1) {
2215                     if sig_calc[1..i].iter().any(|&x| x.wrapping_neg() != 1) {
2216                         !0 // A lot.
2217                     } else {
2218                         sig_calc[0].wrapping_neg()
2219                     }
2220                 } else {
2221                     !0 // A lot.
2222                 }
2223             };
2224
2225             // Are we guaranteed to round correctly if we truncate?
2226             if ulps_from_boundary.saturating_mul(2) >= half_ulp_err {
2227                 let mut r = IeeeFloat {
2228                     sig: [0],
2229                     exp,
2230                     category: Category::Normal,
2231                     sign: false,
2232                     marker: PhantomData,
2233                 };
2234                 sig::extract(&mut r.sig, &sig_calc, used_bits, calc_precision - used_bits);
2235                 // If we extracted less bits above we must adjust our exponent
2236                 // to compensate for the implicit right shift.
2237                 r.exp += (S::PRECISION - used_bits) as ExpInt;
2238                 let loss = Loss::through_truncation(&sig_calc, truncated_bits);
2239                 return Ok(r.normalize(round, loss));
2240             }
2241         }
2242     }
2243 }
2244
2245 impl Loss {
2246     /// Combine the effect of two lost fractions.
2247     fn combine(self, less_significant: Loss) -> Loss {
2248         let mut more_significant = self;
2249         if less_significant != Loss::ExactlyZero {
2250             if more_significant == Loss::ExactlyZero {
2251                 more_significant = Loss::LessThanHalf;
2252             } else if more_significant == Loss::ExactlyHalf {
2253                 more_significant = Loss::MoreThanHalf;
2254             }
2255         }
2256
2257         more_significant
2258     }
2259
2260     /// Returns the fraction lost were a bignum truncated losing the least
2261     /// significant `bits` bits.
2262     fn through_truncation(limbs: &[Limb], bits: usize) -> Loss {
2263         if bits == 0 {
2264             return Loss::ExactlyZero;
2265         }
2266
2267         let half_bit = bits - 1;
2268         let half_limb = half_bit / LIMB_BITS;
2269         let (half_limb, rest) = if half_limb < limbs.len() {
2270             (limbs[half_limb], &limbs[..half_limb])
2271         } else {
2272             (0, limbs)
2273         };
2274         let half = 1 << (half_bit % LIMB_BITS);
2275         let has_half = half_limb & half != 0;
2276         let has_rest = half_limb & (half - 1) != 0 || !sig::is_all_zeros(rest);
2277
2278         match (has_half, has_rest) {
2279             (false, false) => Loss::ExactlyZero,
2280             (false, true) => Loss::LessThanHalf,
2281             (true, false) => Loss::ExactlyHalf,
2282             (true, true) => Loss::MoreThanHalf,
2283         }
2284     }
2285 }
2286
2287 /// Implementation details of IeeeFloat significands, such as big integer arithmetic.
2288 /// As a rule of thumb, no functions in this module should dynamically allocate.
2289 mod sig {
2290     use std::cmp::Ordering;
2291     use std::mem;
2292     use super::{ExpInt, Limb, LIMB_BITS, limbs_for_bits, Loss};
2293
2294     pub(super) fn is_all_zeros(limbs: &[Limb]) -> bool {
2295         limbs.iter().all(|&l| l == 0)
2296     }
2297
2298     /// One, not zero, based LSB. That is, returns 0 for a zeroed significand.
2299     pub(super) fn olsb(limbs: &[Limb]) -> usize {
2300         limbs.iter().enumerate().find(|(_, &limb)| limb != 0).map_or(0,
2301             |(i, limb)| i * LIMB_BITS + limb.trailing_zeros() as usize + 1)
2302     }
2303
2304     /// One, not zero, based MSB. That is, returns 0 for a zeroed significand.
2305     pub(super) fn omsb(limbs: &[Limb]) -> usize {
2306         limbs.iter().enumerate().rfind(|(_, &limb)| limb != 0).map_or(0,
2307             |(i, limb)| (i + 1) * LIMB_BITS - limb.leading_zeros() as usize)
2308     }
2309
2310     /// Comparison (unsigned) of two significands.
2311     pub(super) fn cmp(a: &[Limb], b: &[Limb]) -> Ordering {
2312         assert_eq!(a.len(), b.len());
2313         for (a, b) in a.iter().zip(b).rev() {
2314             match a.cmp(b) {
2315                 Ordering::Equal => {}
2316                 o => return o,
2317             }
2318         }
2319
2320         Ordering::Equal
2321     }
2322
2323     /// Extracts the given bit.
2324     pub(super) fn get_bit(limbs: &[Limb], bit: usize) -> bool {
2325         limbs[bit / LIMB_BITS] & (1 << (bit % LIMB_BITS)) != 0
2326     }
2327
2328     /// Sets the given bit.
2329     pub(super) fn set_bit(limbs: &mut [Limb], bit: usize) {
2330         limbs[bit / LIMB_BITS] |= 1 << (bit % LIMB_BITS);
2331     }
2332
2333     /// Clear the given bit.
2334     pub(super) fn clear_bit(limbs: &mut [Limb], bit: usize) {
2335         limbs[bit / LIMB_BITS] &= !(1 << (bit % LIMB_BITS));
2336     }
2337
2338     /// Shifts `dst` left `bits` bits, subtract `bits` from its exponent.
2339     pub(super) fn shift_left(dst: &mut [Limb], exp: &mut ExpInt, bits: usize) {
2340         if bits > 0 {
2341             // Our exponent should not underflow.
2342             *exp = exp.checked_sub(bits as ExpInt).unwrap();
2343
2344             // Jump is the inter-limb jump; shift is the intra-limb shift.
2345             let jump = bits / LIMB_BITS;
2346             let shift = bits % LIMB_BITS;
2347
2348             for i in (0..dst.len()).rev() {
2349                 let mut limb;
2350
2351                 if i < jump {
2352                     limb = 0;
2353                 } else {
2354                     // dst[i] comes from the two limbs src[i - jump] and, if we have
2355                     // an intra-limb shift, src[i - jump - 1].
2356                     limb = dst[i - jump];
2357                     if shift > 0 {
2358                         limb <<= shift;
2359                         if i > jump {
2360                             limb |= dst[i - jump - 1] >> (LIMB_BITS - shift);
2361                         }
2362                     }
2363                 }
2364
2365                 dst[i] = limb;
2366             }
2367         }
2368     }
2369
2370     /// Shifts `dst` right `bits` bits noting lost fraction.
2371     pub(super) fn shift_right(dst: &mut [Limb], exp: &mut ExpInt, bits: usize) -> Loss {
2372         let loss = Loss::through_truncation(dst, bits);
2373
2374         if bits > 0 {
2375             // Our exponent should not overflow.
2376             *exp = exp.checked_add(bits as ExpInt).unwrap();
2377
2378             // Jump is the inter-limb jump; shift is the intra-limb shift.
2379             let jump = bits / LIMB_BITS;
2380             let shift = bits % LIMB_BITS;
2381
2382             // Perform the shift. This leaves the most significant `bits` bits
2383             // of the result at zero.
2384             for i in 0..dst.len() {
2385                 let mut limb;
2386
2387                 if i + jump >= dst.len() {
2388                     limb = 0;
2389                 } else {
2390                     limb = dst[i + jump];
2391                     if shift > 0 {
2392                         limb >>= shift;
2393                         if i + jump + 1 < dst.len() {
2394                             limb |= dst[i + jump + 1] << (LIMB_BITS - shift);
2395                         }
2396                     }
2397                 }
2398
2399                 dst[i] = limb;
2400             }
2401         }
2402
2403         loss
2404     }
2405
2406     /// Copies the bit vector of width `src_bits` from `src`, starting at bit SRC_LSB,
2407     /// to `dst`, such that the bit SRC_LSB becomes the least significant bit of `dst`.
2408     /// All high bits above `src_bits` in `dst` are zero-filled.
2409     pub(super) fn extract(dst: &mut [Limb], src: &[Limb], src_bits: usize, src_lsb: usize) {
2410         if src_bits == 0 {
2411             return;
2412         }
2413
2414         let dst_limbs = limbs_for_bits(src_bits);
2415         assert!(dst_limbs <= dst.len());
2416
2417         let src = &src[src_lsb / LIMB_BITS..];
2418         dst[..dst_limbs].copy_from_slice(&src[..dst_limbs]);
2419
2420         let shift = src_lsb % LIMB_BITS;
2421         let _: Loss = shift_right(&mut dst[..dst_limbs], &mut 0, shift);
2422
2423         // We now have (dst_limbs * LIMB_BITS - shift) bits from `src`
2424         // in `dst`.  If this is less that src_bits, append the rest, else
2425         // clear the high bits.
2426         let n = dst_limbs * LIMB_BITS - shift;
2427         if n < src_bits {
2428             let mask = (1 << (src_bits - n)) - 1;
2429             dst[dst_limbs - 1] |= (src[dst_limbs] & mask) << (n % LIMB_BITS);
2430         } else if n > src_bits && src_bits % LIMB_BITS > 0 {
2431             dst[dst_limbs - 1] &= (1 << (src_bits % LIMB_BITS)) - 1;
2432         }
2433
2434         // Clear high limbs.
2435         for x in &mut dst[dst_limbs..] {
2436             *x = 0;
2437         }
2438     }
2439
2440     /// We want the most significant PRECISION bits of `src`. There may not
2441     /// be that many; extract what we can.
2442     pub(super) fn from_limbs(dst: &mut [Limb], src: &[Limb], precision: usize) -> (Loss, ExpInt) {
2443         let omsb = omsb(src);
2444
2445         if precision <= omsb {
2446             extract(dst, src, precision, omsb - precision);
2447             (
2448                 Loss::through_truncation(src, omsb - precision),
2449                 omsb as ExpInt - 1,
2450             )
2451         } else {
2452             extract(dst, src, omsb, 0);
2453             (Loss::ExactlyZero, precision as ExpInt - 1)
2454         }
2455     }
2456
2457     /// For every consecutive chunk of `bits` bits from `limbs`,
2458     /// going from most significant to the least significant bits,
2459     /// call `f` to transform those bits and store the result back.
2460     pub(super) fn each_chunk<F: FnMut(Limb) -> Limb>(limbs: &mut [Limb], bits: usize, mut f: F) {
2461         assert_eq!(LIMB_BITS % bits, 0);
2462         for limb in limbs.iter_mut().rev() {
2463             let mut r = 0;
2464             for i in (0..LIMB_BITS / bits).rev() {
2465                 r |= f((*limb >> (i * bits)) & ((1 << bits) - 1)) << (i * bits);
2466             }
2467             *limb = r;
2468         }
2469     }
2470
2471     /// Increment in-place, return the carry flag.
2472     pub(super) fn increment(dst: &mut [Limb]) -> Limb {
2473         for x in dst {
2474             *x = x.wrapping_add(1);
2475             if *x != 0 {
2476                 return 0;
2477             }
2478         }
2479
2480         1
2481     }
2482
2483     /// Decrement in-place, return the borrow flag.
2484     pub(super) fn decrement(dst: &mut [Limb]) -> Limb {
2485         for x in dst {
2486             *x = x.wrapping_sub(1);
2487             if *x != !0 {
2488                 return 0;
2489             }
2490         }
2491
2492         1
2493     }
2494
2495     /// `a += b + c` where `c` is zero or one. Returns the carry flag.
2496     pub(super) fn add(a: &mut [Limb], b: &[Limb], mut c: Limb) -> Limb {
2497         assert!(c <= 1);
2498
2499         for (a, &b) in a.iter_mut().zip(b) {
2500             let (r, overflow) = a.overflowing_add(b);
2501             let (r, overflow2) = r.overflowing_add(c);
2502             *a = r;
2503             c = (overflow | overflow2) as Limb;
2504         }
2505
2506         c
2507     }
2508
2509     /// `a -= b + c` where `c` is zero or one. Returns the borrow flag.
2510     pub(super) fn sub(a: &mut [Limb], b: &[Limb], mut c: Limb) -> Limb {
2511         assert!(c <= 1);
2512
2513         for (a, &b) in a.iter_mut().zip(b) {
2514             let (r, overflow) = a.overflowing_sub(b);
2515             let (r, overflow2) = r.overflowing_sub(c);
2516             *a = r;
2517             c = (overflow | overflow2) as Limb;
2518         }
2519
2520         c
2521     }
2522
2523     /// `a += b` or `a -= b`. Does not preserve `b`.
2524     pub(super) fn add_or_sub(
2525         a_sig: &mut [Limb],
2526         a_exp: &mut ExpInt,
2527         a_sign: &mut bool,
2528         b_sig: &mut [Limb],
2529         b_exp: ExpInt,
2530         b_sign: bool,
2531     ) -> Loss {
2532         // Are we bigger exponent-wise than the RHS?
2533         let bits = *a_exp - b_exp;
2534
2535         // Determine if the operation on the absolute values is effectively
2536         // an addition or subtraction.
2537         // Subtraction is more subtle than one might naively expect.
2538         if *a_sign ^ b_sign {
2539             let (reverse, loss);
2540
2541             if bits == 0 {
2542                 reverse = cmp(a_sig, b_sig) == Ordering::Less;
2543                 loss = Loss::ExactlyZero;
2544             } else if bits > 0 {
2545                 loss = shift_right(b_sig, &mut 0, (bits - 1) as usize);
2546                 shift_left(a_sig, a_exp, 1);
2547                 reverse = false;
2548             } else {
2549                 loss = shift_right(a_sig, a_exp, (-bits - 1) as usize);
2550                 shift_left(b_sig, &mut 0, 1);
2551                 reverse = true;
2552             }
2553
2554             let borrow = (loss != Loss::ExactlyZero) as Limb;
2555             if reverse {
2556                 // The code above is intended to ensure that no borrow is necessary.
2557                 assert_eq!(sub(b_sig, a_sig, borrow), 0);
2558                 a_sig.copy_from_slice(b_sig);
2559                 *a_sign = !*a_sign;
2560             } else {
2561                 // The code above is intended to ensure that no borrow is necessary.
2562                 assert_eq!(sub(a_sig, b_sig, borrow), 0);
2563             }
2564
2565             // Invert the lost fraction - it was on the RHS and subtracted.
2566             match loss {
2567                 Loss::LessThanHalf => Loss::MoreThanHalf,
2568                 Loss::MoreThanHalf => Loss::LessThanHalf,
2569                 _ => loss,
2570             }
2571         } else {
2572             let loss = if bits > 0 {
2573                 shift_right(b_sig, &mut 0, bits as usize)
2574             } else {
2575                 shift_right(a_sig, a_exp, -bits as usize)
2576             };
2577             // We have a guard bit; generating a carry cannot happen.
2578             assert_eq!(add(a_sig, b_sig, 0), 0);
2579             loss
2580         }
2581     }
2582
2583     /// `[low, high] = a * b`.
2584     ///
2585     /// This cannot overflow, because
2586     ///
2587     /// `(n - 1) * (n - 1) + 2 * (n - 1) == (n - 1) * (n + 1)`
2588     ///
2589     /// which is less than n<sup>2</sup>.
2590     pub(super) fn widening_mul(a: Limb, b: Limb) -> [Limb; 2] {
2591         let mut wide = [0, 0];
2592
2593         if a == 0 || b == 0 {
2594             return wide;
2595         }
2596
2597         const HALF_BITS: usize = LIMB_BITS / 2;
2598
2599         let select = |limb, i| (limb >> (i * HALF_BITS)) & ((1 << HALF_BITS) - 1);
2600         for i in 0..2 {
2601             for j in 0..2 {
2602                 let mut x = [select(a, i) * select(b, j), 0];
2603                 shift_left(&mut x, &mut 0, (i + j) * HALF_BITS);
2604                 assert_eq!(add(&mut wide, &x, 0), 0);
2605             }
2606         }
2607
2608         wide
2609     }
2610
2611     /// `dst = a * b` (for normal `a` and `b`). Returns the lost fraction.
2612     pub(super) fn mul<'a>(
2613         dst: &mut [Limb],
2614         exp: &mut ExpInt,
2615         mut a: &'a [Limb],
2616         mut b: &'a [Limb],
2617         precision: usize,
2618     ) -> Loss {
2619         // Put the narrower number on the `a` for less loops below.
2620         if a.len() > b.len() {
2621             mem::swap(&mut a, &mut b);
2622         }
2623
2624         for x in &mut dst[..b.len()] {
2625             *x = 0;
2626         }
2627
2628         for i in 0..a.len() {
2629             let mut carry = 0;
2630             for j in 0..b.len() {
2631                 let [low, mut high] = widening_mul(a[i], b[j]);
2632
2633                 // Now add carry.
2634                 let (low, overflow) = low.overflowing_add(carry);
2635                 high += overflow as Limb;
2636
2637                 // And now `dst[i + j]`, and store the new low part there.
2638                 let (low, overflow) = low.overflowing_add(dst[i + j]);
2639                 high += overflow as Limb;
2640
2641                 dst[i + j] = low;
2642                 carry = high;
2643             }
2644             dst[i + b.len()] = carry;
2645         }
2646
2647         // Assume the operands involved in the multiplication are single-precision
2648         // FP, and the two multiplicants are:
2649         //     a = a23 . a22 ... a0 * 2^e1
2650         //     b = b23 . b22 ... b0 * 2^e2
2651         // the result of multiplication is:
2652         //     dst = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
2653         // Note that there are three significant bits at the left-hand side of the
2654         // radix point: two for the multiplication, and an overflow bit for the
2655         // addition (that will always be zero at this point). Move the radix point
2656         // toward left by two bits, and adjust exponent accordingly.
2657         *exp += 2;
2658
2659         // Convert the result having "2 * precision" significant-bits back to the one
2660         // having "precision" significant-bits. First, move the radix point from
2661         // poision "2*precision - 1" to "precision - 1". The exponent need to be
2662         // adjusted by "2*precision - 1" - "precision - 1" = "precision".
2663         *exp -= precision as ExpInt + 1;
2664
2665         // In case MSB resides at the left-hand side of radix point, shift the
2666         // mantissa right by some amount to make sure the MSB reside right before
2667         // the radix point (i.e., "MSB . rest-significant-bits").
2668         //
2669         // Note that the result is not normalized when "omsb < precision". So, the
2670         // caller needs to call IeeeFloat::normalize() if normalized value is
2671         // expected.
2672         let omsb = omsb(dst);
2673         if omsb <= precision {
2674             Loss::ExactlyZero
2675         } else {
2676             shift_right(dst, exp, omsb - precision)
2677         }
2678     }
2679
2680     /// `quotient = dividend / divisor`. Returns the lost fraction.
2681     /// Does not preserve `dividend` or `divisor`.
2682     pub(super) fn div(
2683         quotient: &mut [Limb],
2684         exp: &mut ExpInt,
2685         dividend: &mut [Limb],
2686         divisor: &mut [Limb],
2687         precision: usize,
2688     ) -> Loss {
2689
2690         // Normalize the divisor.
2691         let bits = precision - omsb(divisor);
2692         shift_left(divisor, &mut 0, bits);
2693         *exp += bits as ExpInt;
2694
2695         // Normalize the dividend.
2696         let bits = precision - omsb(dividend);
2697         shift_left(dividend, exp, bits);
2698
2699         // Division by 1.
2700         let olsb_divisor = olsb(divisor);
2701         if olsb_divisor == precision {
2702             quotient.copy_from_slice(dividend);
2703             return Loss::ExactlyZero;
2704         }
2705
2706         // Ensure the dividend >= divisor initially for the loop below.
2707         // Incidentally, this means that the division loop below is
2708         // guaranteed to set the integer bit to one.
2709         if cmp(dividend, divisor) == Ordering::Less {
2710             shift_left(dividend, exp, 1);
2711             assert_ne!(cmp(dividend, divisor), Ordering::Less)
2712         }
2713
2714         // Helper for figuring out the lost fraction.
2715         let lost_fraction = |dividend: &[Limb], divisor: &[Limb]| {
2716             match cmp(dividend, divisor) {
2717                 Ordering::Greater => Loss::MoreThanHalf,
2718                 Ordering::Equal => Loss::ExactlyHalf,
2719                 Ordering::Less => {
2720                     if is_all_zeros(dividend) {
2721                         Loss::ExactlyZero
2722                     } else {
2723                         Loss::LessThanHalf
2724                     }
2725                 }
2726             }
2727         };
2728
2729         // Try to perform a (much faster) short division for small divisors.
2730         let divisor_bits = precision - (olsb_divisor - 1);
2731         macro_rules! try_short_div {
2732             ($W:ty, $H:ty, $half:expr) => {
2733                 if divisor_bits * 2 <= $half {
2734                     // Extract the small divisor.
2735                     let _: Loss = shift_right(divisor, &mut 0, olsb_divisor - 1);
2736                     let divisor = divisor[0] as $H as $W;
2737
2738                     // Shift the dividend to produce a quotient with the unit bit set.
2739                     let top_limb = *dividend.last().unwrap();
2740                     let mut rem = (top_limb >> (LIMB_BITS - (divisor_bits - 1))) as $H;
2741                     shift_left(dividend, &mut 0, divisor_bits - 1);
2742
2743                     // Apply short division in place on $H (of $half bits) chunks.
2744                     each_chunk(dividend, $half, |chunk| {
2745                         let chunk = chunk as $H;
2746                         let combined = ((rem as $W) << $half) | (chunk as $W);
2747                         rem = (combined % divisor) as $H;
2748                         (combined / divisor) as $H as Limb
2749                     });
2750                     quotient.copy_from_slice(dividend);
2751
2752                     return lost_fraction(&[(rem as Limb) << 1], &[divisor as Limb]);
2753                 }
2754             }
2755         }
2756
2757         try_short_div!(u32, u16, 16);
2758         try_short_div!(u64, u32, 32);
2759         try_short_div!(u128, u64, 64);
2760
2761         // Zero the quotient before setting bits in it.
2762         for x in &mut quotient[..limbs_for_bits(precision)] {
2763             *x = 0;
2764         }
2765
2766         // Long division.
2767         for bit in (0..precision).rev() {
2768             if cmp(dividend, divisor) != Ordering::Less {
2769                 sub(dividend, divisor, 0);
2770                 set_bit(quotient, bit);
2771             }
2772             shift_left(dividend, &mut 0, 1);
2773         }
2774
2775         lost_fraction(dividend, divisor)
2776     }
2777 }