1 // Copyright 2017 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! Port of LLVM's APFloat software floating-point implementation from the
12 //! following C++ sources (please update commit hash when backporting):
13 //! https://github.com/llvm-mirror/llvm/tree/23efab2bbd424ed13495a420ad8641cb2c6c28f9
14 //! * `include/llvm/ADT/APFloat.h` -> `Float` and `FloatConvert` traits
15 //! * `lib/Support/APFloat.cpp` -> `ieee` and `ppc` modules
16 //! * `unittests/ADT/APFloatTest.cpp` -> `tests` directory
18 //! The port contains no unsafe code, global state, or side-effects in general,
19 //! and the only allocations are in the conversion to/from decimal strings.
21 //! Most of the API and the testcases are intact in some form or another,
22 //! with some ergonomic changes, such as idiomatic short names, returning
23 //! new values instead of mutating the receiver, and having separate method
24 //! variants that take a non-default rounding mode (with the suffix `_r`).
25 //! Comments have been preserved where possible, only slightly adapted.
27 //! Instead of keeping a pointer to a configuration struct and inspecting it
28 //! dynamically on every operation, types (e.g. `ieee::Double`), traits
29 //! (e.g. `ieee::Semantics`) and associated constants are employed for
30 //! increased type safety and performance.
32 //! On-heap bigints are replaced everywhere (except in decimal conversion),
33 //! with short arrays of `type Limb = u128` elements (instead of `u64`),
34 //! This allows fitting the largest supported significands in one integer
35 //! (`ieee::Quad` and `ppc::Fallback` use slightly less than 128 bits).
36 //! All of the functions in the `ieee::sig` module operate on slices.
40 //! This API is completely unstable and subject to change.
42 #![doc(html_logo_url = "https://www.rust-lang.org/logos/rust-logo-128x128-blk-v2.png",
43 html_favicon_url = "https://doc.rust-lang.org/favicon.ico",
44 html_root_url = "https://doc.rust-lang.org/nightly/")]
46 #![forbid(unsafe_code)]
48 #![feature(const_max_value)]
49 #![feature(const_min_value)]
50 #![feature(i128_type)]
51 #![feature(slice_patterns)]
54 // See librustc_cratesio_shim/Cargo.toml for a comment explaining this.
55 #[allow(unused_extern_crates)]
56 extern crate rustc_cratesio_shim;
59 extern crate bitflags;
61 use std::cmp::Ordering;
63 use std::ops::{Neg, Add, Sub, Mul, Div, Rem};
64 use std::ops::{AddAssign, SubAssign, MulAssign, DivAssign, RemAssign};
65 use std::str::FromStr;
68 /// IEEE-754R 7: Default exception handling.
70 /// UNDERFLOW or OVERFLOW are always returned or-ed with INEXACT.
72 pub struct Status: u8 {
74 const INVALID_OP = 0x01;
75 const DIV_BY_ZERO = 0x02;
76 const OVERFLOW = 0x04;
77 const UNDERFLOW = 0x08;
83 #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)]
84 pub struct StatusAnd<T> {
90 pub fn and<T>(self, value: T) -> StatusAnd<T> {
98 impl<T> StatusAnd<T> {
99 fn map<F: FnOnce(T) -> U, U>(self, f: F) -> StatusAnd<U> {
102 value: f(self.value),
108 macro_rules! unpack {
109 ($status:ident|=, $e:expr) => {
111 $crate::StatusAnd { status, value } => {
117 ($status:ident=, $e:expr) => {
119 $crate::StatusAnd { status, value } => {
127 /// Category of internally-represented number.
128 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
136 /// IEEE-754R 4.3: Rounding-direction attributes.
137 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
148 fn neg(self) -> Round {
150 Round::TowardPositive => Round::TowardNegative,
151 Round::TowardNegative => Round::TowardPositive,
152 Round::NearestTiesToEven | Round::TowardZero | Round::NearestTiesToAway => self,
157 /// A signed type to represent a floating point number's unbiased exponent.
158 pub type ExpInt = i16;
160 // \c ilogb error results.
161 pub const IEK_INF: ExpInt = ExpInt::max_value();
162 pub const IEK_NAN: ExpInt = ExpInt::min_value();
163 pub const IEK_ZERO: ExpInt = ExpInt::min_value() + 1;
165 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
166 pub struct ParseError(pub &'static str);
168 /// A self-contained host- and target-independent arbitrary-precision
169 /// floating-point software implementation.
171 /// `apfloat` uses significand bignum integer arithmetic as provided by functions
172 /// in the `ieee::sig`.
174 /// Written for clarity rather than speed, in particular with a view to use in
175 /// the front-end of a cross compiler so that target arithmetic can be correctly
176 /// performed on the host. Performance should nonetheless be reasonable,
177 /// particularly for its intended use. It may be useful as a base
178 /// implementation for a run-time library during development of a faster
179 /// target-specific one.
181 /// All 5 rounding modes in the IEEE-754R draft are handled correctly for all
182 /// implemented operations. Currently implemented operations are add, subtract,
183 /// multiply, divide, fused-multiply-add, conversion-to-float,
184 /// conversion-to-integer and conversion-from-integer. New rounding modes
185 /// (e.g. away from zero) can be added with three or four lines of code.
187 /// Four formats are built-in: IEEE single precision, double precision,
188 /// quadruple precision, and x87 80-bit extended double (when operating with
189 /// full extended precision). Adding a new format that obeys IEEE semantics
190 /// only requires adding two lines of code: a declaration and definition of the
193 /// All operations return the status of that operation as an exception bit-mask,
194 /// so multiple operations can be done consecutively with their results or-ed
195 /// together. The returned status can be useful for compiler diagnostics; e.g.,
196 /// inexact, underflow and overflow can be easily diagnosed on constant folding,
197 /// and compiler optimizers can determine what exceptions would be raised by
198 /// folding operations and optimize, or perhaps not optimize, accordingly.
200 /// At present, underflow tininess is detected after rounding; it should be
201 /// straight forward to add support for the before-rounding case too.
203 /// The library reads hexadecimal floating point numbers as per C99, and
204 /// correctly rounds if necessary according to the specified rounding mode.
205 /// Syntax is required to have been validated by the caller.
207 /// It also reads decimal floating point numbers and correctly rounds according
208 /// to the specified rounding mode.
210 /// Non-zero finite numbers are represented internally as a sign bit, a 16-bit
211 /// signed exponent, and the significand as an array of integer limbs. After
212 /// normalization of a number of precision P the exponent is within the range of
213 /// the format, and if the number is not denormal the P-th bit of the
214 /// significand is set as an explicit integer bit. For denormals the most
215 /// significant bit is shifted right so that the exponent is maintained at the
216 /// format's minimum, so that the smallest denormal has just the least
217 /// significant bit of the significand set. The sign of zeros and infinities
218 /// is significant; the exponent and significand of such numbers is not stored,
219 /// but has a known implicit (deterministic) value: 0 for the significands, 0
220 /// for zero exponent, all 1 bits for infinity exponent. For NaNs the sign and
221 /// significand are deterministic, although not really meaningful, and preserved
222 /// in non-conversion operations. The exponent is implicitly all 1 bits.
224 /// `apfloat` does not provide any exception handling beyond default exception
225 /// handling. We represent Signaling NaNs via IEEE-754R 2008 6.2.1 should clause
226 /// by encoding Signaling NaNs with the first bit of its trailing significand as
232 /// Some features that may or may not be worth adding:
234 /// Optional ability to detect underflow tininess before rounding.
236 /// New formats: x87 in single and double precision mode (IEEE apart from
237 /// extended exponent range) (hard).
239 /// New operations: sqrt, nexttoward.
244 + FromStr<Err = ParseError>
253 + Add<Output = StatusAnd<Self>>
254 + Sub<Output = StatusAnd<Self>>
255 + Mul<Output = StatusAnd<Self>>
256 + Div<Output = StatusAnd<Self>>
257 + Rem<Output = StatusAnd<Self>> {
258 /// Total number of bits in the in-memory format.
261 /// Number of bits in the significand. This includes the integer bit.
262 const PRECISION: usize;
264 /// The largest E such that 2^E is representable; this matches the
265 /// definition of IEEE 754.
266 const MAX_EXP: ExpInt;
268 /// The smallest E such that 2^E is a normalized number; this
269 /// matches the definition of IEEE 754.
270 const MIN_EXP: ExpInt;
275 /// Positive Infinity.
276 const INFINITY: Self;
278 /// NaN (Not a Number).
279 // FIXME(eddyb) provide a default when qnan becomes const fn.
282 /// Factory for QNaN values.
283 // FIXME(eddyb) should be const fn.
284 fn qnan(payload: Option<u128>) -> Self;
286 /// Factory for SNaN values.
287 // FIXME(eddyb) should be const fn.
288 fn snan(payload: Option<u128>) -> Self;
290 /// Largest finite number.
291 // FIXME(eddyb) should be const (but FloatPair::largest is nontrivial).
292 fn largest() -> Self;
294 /// Smallest (by magnitude) finite number.
295 /// Might be denormalized, which implies a relative loss of precision.
296 const SMALLEST: Self;
298 /// Smallest (by magnitude) normalized finite number.
299 // FIXME(eddyb) should be const (but FloatPair::smallest_normalized is nontrivial).
300 fn smallest_normalized() -> Self;
304 fn add_r(self, rhs: Self, round: Round) -> StatusAnd<Self>;
305 fn sub_r(self, rhs: Self, round: Round) -> StatusAnd<Self> {
306 self.add_r(-rhs, round)
308 fn mul_r(self, rhs: Self, round: Round) -> StatusAnd<Self>;
309 fn mul_add_r(self, multiplicand: Self, addend: Self, round: Round) -> StatusAnd<Self>;
310 fn mul_add(self, multiplicand: Self, addend: Self) -> StatusAnd<Self> {
311 self.mul_add_r(multiplicand, addend, Round::NearestTiesToEven)
313 fn div_r(self, rhs: Self, round: Round) -> StatusAnd<Self>;
315 // This is not currently correct in all cases.
316 fn ieee_rem(self, rhs: Self) -> StatusAnd<Self> {
320 v = unpack!(status=, v / rhs);
321 if status == Status::DIV_BY_ZERO {
322 return status.and(self);
325 assert!(Self::PRECISION < 128);
328 let x = unpack!(status=, v.to_i128_r(128, Round::NearestTiesToEven, &mut false));
329 if status == Status::INVALID_OP {
330 return status.and(self);
334 let mut v = unpack!(status=, Self::from_i128(x));
335 assert_eq!(status, Status::OK); // should always work
338 v = unpack!(status=, v * rhs);
339 assert_eq!(status - Status::INEXACT, Status::OK); // should not overflow or underflow
342 v = unpack!(status=, self - v);
343 assert_eq!(status - Status::INEXACT, Status::OK); // likewise
346 status.and(v.copy_sign(self)) // IEEE754 requires this
351 /// C fmod, or llvm frem.
352 fn c_fmod(self, rhs: Self) -> StatusAnd<Self>;
353 fn round_to_integral(self, round: Round) -> StatusAnd<Self>;
355 /// IEEE-754R 2008 5.3.1: nextUp.
356 fn next_up(self) -> StatusAnd<Self>;
358 /// IEEE-754R 2008 5.3.1: nextDown.
360 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
361 /// appropriate sign switching before/after the computation.
362 fn next_down(self) -> StatusAnd<Self> {
363 (-self).next_up().map(|r| -r)
366 fn abs(self) -> Self {
367 if self.is_negative() { -self } else { self }
369 fn copy_sign(self, rhs: Self) -> Self {
370 if self.is_negative() != rhs.is_negative() {
378 fn from_bits(input: u128) -> Self;
379 fn from_i128_r(input: i128, round: Round) -> StatusAnd<Self> {
381 Self::from_u128_r(input.wrapping_neg() as u128, -round).map(|r| -r)
383 Self::from_u128_r(input as u128, round)
386 fn from_i128(input: i128) -> StatusAnd<Self> {
387 Self::from_i128_r(input, Round::NearestTiesToEven)
389 fn from_u128_r(input: u128, round: Round) -> StatusAnd<Self>;
390 fn from_u128(input: u128) -> StatusAnd<Self> {
391 Self::from_u128_r(input, Round::NearestTiesToEven)
393 fn from_str_r(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError>;
394 fn to_bits(self) -> u128;
396 /// Convert a floating point number to an integer according to the
397 /// rounding mode. In case of an invalid operation exception,
398 /// deterministic values are returned, namely zero for NaNs and the
399 /// minimal or maximal value respectively for underflow or overflow.
400 /// If the rounded value is in range but the floating point number is
401 /// not the exact integer, the C standard doesn't require an inexact
402 /// exception to be raised. IEEE-854 does require it so we do that.
404 /// Note that for conversions to integer type the C standard requires
405 /// round-to-zero to always be used.
407 /// The *is_exact output tells whether the result is exact, in the sense
408 /// that converting it back to the original floating point type produces
409 /// the original value. This is almost equivalent to result==Status::OK,
410 /// except for negative zeroes.
411 fn to_i128_r(self, width: usize, round: Round, is_exact: &mut bool) -> StatusAnd<i128> {
413 if self.is_negative() {
415 // Negative zero can't be represented as an int.
418 let r = unpack!(status=, (-self).to_u128_r(width, -round, is_exact));
420 // Check for values that don't fit in the signed integer.
421 if r > (1 << (width - 1)) {
422 // Return the most negative integer for the given width.
424 Status::INVALID_OP.and(-1 << (width - 1))
426 status.and(r.wrapping_neg() as i128)
429 // Positive case is simpler, can pretend it's a smaller unsigned
430 // integer, and `to_u128` will take care of all the edge cases.
431 self.to_u128_r(width - 1, round, is_exact).map(
436 fn to_i128(self, width: usize) -> StatusAnd<i128> {
437 self.to_i128_r(width, Round::TowardZero, &mut true)
439 fn to_u128_r(self, width: usize, round: Round, is_exact: &mut bool) -> StatusAnd<u128>;
440 fn to_u128(self, width: usize) -> StatusAnd<u128> {
441 self.to_u128_r(width, Round::TowardZero, &mut true)
444 fn cmp_abs_normal(self, rhs: Self) -> Ordering;
446 /// Bitwise comparison for equality (QNaNs compare equal, 0!=-0).
447 fn bitwise_eq(self, rhs: Self) -> bool;
449 // IEEE-754R 5.7.2 General operations.
451 /// Implements IEEE minNum semantics. Returns the smaller of the 2 arguments if
452 /// both are not NaN. If either argument is a NaN, returns the other argument.
453 fn min(self, other: Self) -> Self {
456 } else if other.is_nan() {
458 } else if other.partial_cmp(&self) == Some(Ordering::Less) {
465 /// Implements IEEE maxNum semantics. Returns the larger of the 2 arguments if
466 /// both are not NaN. If either argument is a NaN, returns the other argument.
467 fn max(self, other: Self) -> Self {
470 } else if other.is_nan() {
472 } else if self.partial_cmp(&other) == Some(Ordering::Less) {
479 /// IEEE-754R isSignMinus: Returns true if and only if the current value is
482 /// This applies to zeros and NaNs as well.
483 fn is_negative(self) -> bool;
485 /// IEEE-754R isNormal: Returns true if and only if the current value is normal.
487 /// This implies that the current value of the float is not zero, subnormal,
488 /// infinite, or NaN following the definition of normality from IEEE-754R.
489 fn is_normal(self) -> bool {
490 !self.is_denormal() && self.is_finite_non_zero()
493 /// Returns true if and only if the current value is zero, subnormal, or
496 /// This means that the value is not infinite or NaN.
497 fn is_finite(self) -> bool {
498 !self.is_nan() && !self.is_infinite()
501 /// Returns true if and only if the float is plus or minus zero.
502 fn is_zero(self) -> bool {
503 self.category() == Category::Zero
506 /// IEEE-754R isSubnormal(): Returns true if and only if the float is a
508 fn is_denormal(self) -> bool;
510 /// IEEE-754R isInfinite(): Returns true if and only if the float is infinity.
511 fn is_infinite(self) -> bool {
512 self.category() == Category::Infinity
515 /// Returns true if and only if the float is a quiet or signaling NaN.
516 fn is_nan(self) -> bool {
517 self.category() == Category::NaN
520 /// Returns true if and only if the float is a signaling NaN.
521 fn is_signaling(self) -> bool;
525 fn category(self) -> Category;
526 fn is_non_zero(self) -> bool {
529 fn is_finite_non_zero(self) -> bool {
530 self.is_finite() && !self.is_zero()
532 fn is_pos_zero(self) -> bool {
533 self.is_zero() && !self.is_negative()
535 fn is_neg_zero(self) -> bool {
536 self.is_zero() && self.is_negative()
539 /// Returns true if and only if the number has the smallest possible non-zero
540 /// magnitude in the current semantics.
541 fn is_smallest(self) -> bool {
542 Self::SMALLEST.copy_sign(self).bitwise_eq(self)
545 /// Returns true if and only if the number has the largest possible finite
546 /// magnitude in the current semantics.
547 fn is_largest(self) -> bool {
548 Self::largest().copy_sign(self).bitwise_eq(self)
551 /// Returns true if and only if the number is an exact integer.
552 fn is_integer(self) -> bool {
553 // This could be made more efficient; I'm going for obviously correct.
554 if !self.is_finite() {
557 self.round_to_integral(Round::TowardZero).value.bitwise_eq(
562 /// If this value has an exact multiplicative inverse, return it.
563 fn get_exact_inverse(self) -> Option<Self>;
565 /// Returns the exponent of the internal representation of the Float.
567 /// Because the radix of Float is 2, this is equivalent to floor(log2(x)).
568 /// For special Float values, this returns special error codes:
570 /// NaN -> \c IEK_NAN
572 /// Inf -> \c IEK_INF
574 fn ilogb(self) -> ExpInt;
576 /// Returns: self * 2^exp for integral exponents.
577 fn scalbn_r(self, exp: ExpInt, round: Round) -> Self;
578 fn scalbn(self, exp: ExpInt) -> Self {
579 self.scalbn_r(exp, Round::NearestTiesToEven)
582 /// Equivalent of C standard library function.
584 /// While the C standard says exp is an unspecified value for infinity and nan,
585 /// this returns INT_MAX for infinities, and INT_MIN for NaNs (see `ilogb`).
586 fn frexp_r(self, exp: &mut ExpInt, round: Round) -> Self;
587 fn frexp(self, exp: &mut ExpInt) -> Self {
588 self.frexp_r(exp, Round::NearestTiesToEven)
592 pub trait FloatConvert<T: Float>: Float {
593 /// Convert a value of one floating point type to another.
594 /// The return value corresponds to the IEEE754 exceptions. *loses_info
595 /// records whether the transformation lost information, i.e. whether
596 /// converting the result back to the original type will produce the
597 /// original value (this is almost the same as return value==Status::OK,
598 /// but there are edge cases where this is not so).
599 fn convert_r(self, round: Round, loses_info: &mut bool) -> StatusAnd<T>;
600 fn convert(self, loses_info: &mut bool) -> StatusAnd<T> {
601 self.convert_r(Round::NearestTiesToEven, loses_info)
605 macro_rules! float_common_impls {
606 ($ty:ident<$t:tt>) => {
607 impl<$t> Default for $ty<$t> where Self: Float {
608 fn default() -> Self {
613 impl<$t> ::std::str::FromStr for $ty<$t> where Self: Float {
614 type Err = ParseError;
615 fn from_str(s: &str) -> Result<Self, ParseError> {
616 Self::from_str_r(s, Round::NearestTiesToEven).map(|x| x.value)
620 // Rounding ties to the nearest even, by default.
622 impl<$t> ::std::ops::Add for $ty<$t> where Self: Float {
623 type Output = StatusAnd<Self>;
624 fn add(self, rhs: Self) -> StatusAnd<Self> {
625 self.add_r(rhs, Round::NearestTiesToEven)
629 impl<$t> ::std::ops::Sub for $ty<$t> where Self: Float {
630 type Output = StatusAnd<Self>;
631 fn sub(self, rhs: Self) -> StatusAnd<Self> {
632 self.sub_r(rhs, Round::NearestTiesToEven)
636 impl<$t> ::std::ops::Mul for $ty<$t> where Self: Float {
637 type Output = StatusAnd<Self>;
638 fn mul(self, rhs: Self) -> StatusAnd<Self> {
639 self.mul_r(rhs, Round::NearestTiesToEven)
643 impl<$t> ::std::ops::Div for $ty<$t> where Self: Float {
644 type Output = StatusAnd<Self>;
645 fn div(self, rhs: Self) -> StatusAnd<Self> {
646 self.div_r(rhs, Round::NearestTiesToEven)
650 impl<$t> ::std::ops::Rem for $ty<$t> where Self: Float {
651 type Output = StatusAnd<Self>;
652 fn rem(self, rhs: Self) -> StatusAnd<Self> {
657 impl<$t> ::std::ops::AddAssign for $ty<$t> where Self: Float {
658 fn add_assign(&mut self, rhs: Self) {
659 *self = (*self + rhs).value;
663 impl<$t> ::std::ops::SubAssign for $ty<$t> where Self: Float {
664 fn sub_assign(&mut self, rhs: Self) {
665 *self = (*self - rhs).value;
669 impl<$t> ::std::ops::MulAssign for $ty<$t> where Self: Float {
670 fn mul_assign(&mut self, rhs: Self) {
671 *self = (*self * rhs).value;
675 impl<$t> ::std::ops::DivAssign for $ty<$t> where Self: Float {
676 fn div_assign(&mut self, rhs: Self) {
677 *self = (*self / rhs).value;
681 impl<$t> ::std::ops::RemAssign for $ty<$t> where Self: Float {
682 fn rem_assign(&mut self, rhs: Self) {
683 *self = (*self % rhs).value;