src/libstd/num/strconv.rs

   1 // Copyright 2013-2014 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10 //
  11 // ignore-lexer-test FIXME #15679
  12
  13 #![allow(missing_docs)]
  14
  15 pub use self::ExponentFormat::*;
  16 pub use self::SignificantDigits::*;
  17 pub use self::SignFormat::*;
  18
  19 use char;
  20 use char::Char;
  21 use num;
  22 use num::{Int, Float, FPNaN, FPInfinite, ToPrimitive};
  23 use slice::{SlicePrelude, CloneSliceAllocPrelude};
  24 use str::StrPrelude;
  25 use string::String;
  26 use vec::Vec;
  27
  28 /// A flag that specifies whether to use exponential (scientific) notation.
  29 pub enum ExponentFormat {
  30     /// Do not use exponential notation.
  31     ExpNone,
  32     /// Use exponential notation with the exponent having a base of 10 and the
  33     /// exponent sign being `e` or `E`. For example, 1000 would be printed
  34     /// 1e3.
  35     ExpDec,
  36     /// Use exponential notation with the exponent having a base of 2 and the
  37     /// exponent sign being `p` or `P`. For example, 8 would be printed 1p3.
  38     ExpBin,
  39 }
  40
  41 /// The number of digits used for emitting the fractional part of a number, if
  42 /// any.
  43 pub enum SignificantDigits {
  44     /// All calculable digits will be printed.
  45     ///
  46     /// Note that bignums or fractions may cause a surprisingly large number
  47     /// of digits to be printed.
  48     DigAll,
  49
  50     /// At most the given number of digits will be printed, truncating any
  51     /// trailing zeroes.
  52     DigMax(uint),
  53
  54     /// Precisely the given number of digits will be printed.
  55     DigExact(uint)
  56 }
  57
  58 /// How to emit the sign of a number.
  59 pub enum SignFormat {
  60     /// No sign will be printed. The exponent sign will also be emitted.
  61     SignNone,
  62     /// `-` will be printed for negative values, but no sign will be emitted
  63     /// for positive numbers.
  64     SignNeg,
  65     /// `+` will be printed for positive values, and `-` will be printed for
  66     /// negative values.
  67     SignAll,
  68 }
  69
  70 /// Converts an integral number to its string representation as a byte vector.
  71 /// This is meant to be a common base implementation for all integral string
  72 /// conversion functions like `to_string()` or `to_str_radix()`.
  73 ///
  74 /// # Arguments
  75 ///
  76 /// - `num`           - The number to convert. Accepts any number that
  77 ///                     implements the numeric traits.
  78 /// - `radix`         - Base to use. Accepts only the values 2-36.
  79 /// - `sign`          - How to emit the sign. Options are:
  80 ///     - `SignNone`: No sign at all. Basically emits `abs(num)`.
  81 ///     - `SignNeg`:  Only `-` on negative values.
  82 ///     - `SignAll`:  Both `+` on positive, and `-` on negative numbers.
  83 /// - `f`             - a callback which will be invoked for each ascii character
  84 ///                     which composes the string representation of this integer
  85 ///
  86 /// # Return value
  87 ///
  88 /// A tuple containing the byte vector, and a boolean flag indicating
  89 /// whether it represents a special value like `inf`, `-inf`, `NaN` or not.
  90 /// It returns a tuple because there can be ambiguity between a special value
  91 /// and a number representation at higher bases.
  92 ///
  93 /// # Panics
  94 ///
  95 /// - Panics if `radix` < 2 or `radix` > 36.
  96 fn int_to_str_bytes_common<T: Int>(num: T, radix: uint, sign: SignFormat, f: |u8|) {
  97     assert!(2 <= radix && radix <= 36);
  98
  99     let _0: T = Int::zero();
 100
 101     let neg = num < _0;
 102     let radix_gen: T = num::cast(radix).unwrap();
 103
 104     let mut deccum = num;
 105     // This is just for integral types, the largest of which is a u64. The
 106     // smallest base that we can have is 2, so the most number of digits we're
 107     // ever going to have is 64
 108     let mut buf = [0u8, ..64];
 109     let mut cur = 0;
 110
 111     // Loop at least once to make sure at least a `0` gets emitted.
 112     loop {
 113         // Calculate the absolute value of each digit instead of only
 114         // doing it once for the whole number because a
 115         // representable negative number doesn't necessary have an
 116         // representable additive inverse of the same type
 117         // (See twos complement). But we assume that for the
 118         // numbers [-35 .. 0] we always have [0 .. 35].
 119         let current_digit_signed = deccum % radix_gen;
 120         let current_digit = if current_digit_signed < _0 {
 121             _0 - current_digit_signed
 122         } else {
 123             current_digit_signed
 124         };
 125         buf[cur] = match current_digit.to_u8().unwrap() {
 126             i @ 0...9 => b'0' + i,
 127             i         => b'a' + (i - 10),
 128         };
 129         cur += 1;
 130
 131         deccum = deccum / radix_gen;
 132         // No more digits to calculate for the non-fractional part -> break
 133         if deccum == _0 { break; }
 134     }
 135
 136     // Decide what sign to put in front
 137     match sign {
 138         SignNeg | SignAll if neg => { f(b'-'); }
 139         SignAll => { f(b'+'); }
 140         _ => ()
 141     }
 142
 143     // We built the number in reverse order, so un-reverse it here
 144     while cur > 0 {
 145         cur -= 1;
 146         f(buf[cur]);
 147     }
 148 }
 149
 150 /**
 151  * Converts a number to its string representation as a byte vector.
 152  * This is meant to be a common base implementation for all numeric string
 153  * conversion functions like `to_string()` or `to_str_radix()`.
 154  *
 155  * # Arguments
 156  * - `num`           - The number to convert. Accepts any number that
 157  *                     implements the numeric traits.
 158  * - `radix`         - Base to use. Accepts only the values 2-36. If the exponential notation
 159  *                     is used, then this base is only used for the significand. The exponent
 160  *                     itself always printed using a base of 10.
 161  * - `negative_zero` - Whether to treat the special value `-0` as
 162  *                     `-0` or as `+0`.
 163  * - `sign`          - How to emit the sign. See `SignFormat`.
 164  * - `digits`        - The amount of digits to use for emitting the fractional
 165  *                     part, if any. See `SignificantDigits`.
 166  * - `exp_format`   - Whether or not to use the exponential (scientific) notation.
 167  *                    See `ExponentFormat`.
 168  * - `exp_capital`   - Whether or not to use a capital letter for the exponent sign, if
 169  *                     exponential notation is desired.
 170  *
 171  * # Return value
 172  * A tuple containing the byte vector, and a boolean flag indicating
 173  * whether it represents a special value like `inf`, `-inf`, `NaN` or not.
 174  * It returns a tuple because there can be ambiguity between a special value
 175  * and a number representation at higher bases.
 176  *
 177  * # Panics
 178  * - Panics if `radix` < 2 or `radix` > 36.
 179  * - Panics if `radix` > 14 and `exp_format` is `ExpDec` due to conflict
 180  *   between digit and exponent sign `'e'`.
 181  * - Panics if `radix` > 25 and `exp_format` is `ExpBin` due to conflict
 182  *   between digit and exponent sign `'p'`.
 183  */
 184 pub fn float_to_str_bytes_common<T: Float>(
 185         num: T, radix: uint, negative_zero: bool,
 186         sign: SignFormat, digits: SignificantDigits, exp_format: ExponentFormat, exp_upper: bool
 187         ) -> (Vec<u8>, bool) {
 188     assert!(2 <= radix && radix <= 36);
 189     match exp_format {
 190         ExpDec if radix >= DIGIT_E_RADIX       // decimal exponent 'e'
 191           => panic!("float_to_str_bytes_common: radix {} incompatible with \
 192                     use of 'e' as decimal exponent", radix),
 193         ExpBin if radix >= DIGIT_P_RADIX       // binary exponent 'p'
 194           => panic!("float_to_str_bytes_common: radix {} incompatible with \
 195                     use of 'p' as binary exponent", radix),
 196         _ => ()
 197     }
 198
 199     let _0: T = Float::zero();
 200     let _1: T = Float::one();
 201
 202     match num.classify() {
 203         FPNaN => { return (b"NaN".to_vec(), true); }
 204         FPInfinite if num > _0 => {
 205             return match sign {
 206                 SignAll => (b"+inf".to_vec(), true),
 207                 _       => (b"inf".to_vec(), true)
 208             };
 209         }
 210         FPInfinite if num < _0 => {
 211             return match sign {
 212                 SignNone => (b"inf".to_vec(), true),
 213                 _        => (b"-inf".to_vec(), true),
 214             };
 215         }
 216         _ => {}
 217     }
 218
 219     let neg = num < _0 || (negative_zero && _1 / num == Float::neg_infinity());
 220     let mut buf = Vec::new();
 221     let radix_gen: T = num::cast(radix as int).unwrap();
 222
 223     let (num, exp) = match exp_format {
 224         ExpNone => (num, 0i32),
 225         ExpDec | ExpBin => {
 226             if num == _0 {
 227                 (num, 0i32)
 228             } else {
 229                 let (exp, exp_base) = match exp_format {
 230                     ExpDec => (num.abs().log10().floor(), num::cast::<f64, T>(10.0f64).unwrap()),
 231                     ExpBin => (num.abs().log2().floor(), num::cast::<f64, T>(2.0f64).unwrap()),
 232                     ExpNone => unreachable!()
 233                 };
 234
 235                 (num / exp_base.powf(exp), num::cast::<T, i32>(exp).unwrap())
 236             }
 237         }
 238     };
 239
 240     // First emit the non-fractional part, looping at least once to make
 241     // sure at least a `0` gets emitted.
 242     let mut deccum = num.trunc();
 243     loop {
 244         // Calculate the absolute value of each digit instead of only
 245         // doing it once for the whole number because a
 246         // representable negative number doesn't necessary have an
 247         // representable additive inverse of the same type
 248         // (See twos complement). But we assume that for the
 249         // numbers [-35 .. 0] we always have [0 .. 35].
 250         let current_digit = (deccum % radix_gen).abs();
 251
 252         // Decrease the deccumulator one digit at a time
 253         deccum = deccum / radix_gen;
 254         deccum = deccum.trunc();
 255
 256         buf.push(char::from_digit(current_digit.to_int().unwrap() as uint, radix)
 257              .unwrap() as u8);
 258
 259         // No more digits to calculate for the non-fractional part -> break
 260         if deccum == _0 { break; }
 261     }
 262
 263     // If limited digits, calculate one digit more for rounding.
 264     let (limit_digits, digit_count, exact) = match digits {
 265         DigAll          => (false, 0u,      false),
 266         DigMax(count)   => (true,  count+1, false),
 267         DigExact(count) => (true,  count+1, true)
 268     };
 269
 270     // Decide what sign to put in front
 271     match sign {
 272         SignNeg | SignAll if neg => {
 273             buf.push(b'-');
 274         }
 275         SignAll => {
 276             buf.push(b'+');
 277         }
 278         _ => ()
 279     }
 280
 281     buf.reverse();
 282
 283     // Remember start of the fractional digits.
 284     // Points one beyond end of buf if none get generated,
 285     // or at the '.' otherwise.
 286     let start_fractional_digits = buf.len();
 287
 288     // Now emit the fractional part, if any
 289     deccum = num.fract();
 290     if deccum != _0 || (limit_digits && exact && digit_count > 0) {
 291         buf.push(b'.');
 292         let mut dig = 0u;
 293
 294         // calculate new digits while
 295         // - there is no limit and there are digits left
 296         // - or there is a limit, it's not reached yet and
 297         //   - it's exact
 298         //   - or it's a maximum, and there are still digits left
 299         while (!limit_digits && deccum != _0)
 300            || (limit_digits && dig < digit_count && (
 301                    exact
 302                 || (!exact && deccum != _0)
 303               )
 304         ) {
 305             // Shift first fractional digit into the integer part
 306             deccum = deccum * radix_gen;
 307
 308             // Calculate the absolute value of each digit.
 309             // See note in first loop.
 310             let current_digit = deccum.trunc().abs();
 311
 312             buf.push(char::from_digit(
 313                 current_digit.to_int().unwrap() as uint, radix).unwrap() as u8);
 314
 315             // Decrease the deccumulator one fractional digit at a time
 316             deccum = deccum.fract();
 317             dig += 1u;
 318         }
 319
 320         // If digits are limited, and that limit has been reached,
 321         // cut off the one extra digit, and depending on its value
 322         // round the remaining ones.
 323         if limit_digits && dig == digit_count {
 324             let ascii2value = |chr: u8| {
 325                 (chr as char).to_digit(radix).unwrap()
 326             };
 327             let value2ascii = |val: uint| {
 328                 char::from_digit(val, radix).unwrap() as u8
 329             };
 330
 331             let extra_digit = ascii2value(buf.pop().unwrap());
 332             if extra_digit >= radix / 2 { // -> need to round
 333                 let mut i: int = buf.len() as int - 1;
 334                 loop {
 335                     // If reached left end of number, have to
 336                     // insert additional digit:
 337                     if i < 0
 338                     || buf[i as uint] == b'-'
 339                     || buf[i as uint] == b'+' {
 340                         buf.insert((i + 1) as uint, value2ascii(1));
 341                         break;
 342                     }
 343
 344                     // Skip the '.'
 345                     if buf[i as uint] == b'.' { i -= 1; continue; }
 346
 347                     // Either increment the digit,
 348                     // or set to 0 if max and carry the 1.
 349                     let current_digit = ascii2value(buf[i as uint]);
 350                     if current_digit < (radix - 1) {
 351                         buf[i as uint] = value2ascii(current_digit+1);
 352                         break;
 353                     } else {
 354                         buf[i as uint] = value2ascii(0);
 355                         i -= 1;
 356                     }
 357                 }
 358             }
 359         }
 360     }
 361
 362     // if number of digits is not exact, remove all trailing '0's up to
 363     // and including the '.'
 364     if !exact {
 365         let buf_max_i = buf.len() - 1;
 366
 367         // index to truncate from
 368         let mut i = buf_max_i;
 369
 370         // discover trailing zeros of fractional part
 371         while i > start_fractional_digits && buf[i] == b'0' {
 372             i -= 1;
 373         }
 374
 375         // Only attempt to truncate digits if buf has fractional digits
 376         if i >= start_fractional_digits {
 377             // If buf ends with '.', cut that too.
 378             if buf[i] == b'.' { i -= 1 }
 379
 380             // only resize buf if we actually remove digits
 381             if i < buf_max_i {
 382                 buf = buf.slice(0, i + 1).to_vec();
 383             }
 384         }
 385     } // If exact and trailing '.', just cut that
 386     else {
 387         let max_i = buf.len() - 1;
 388         if buf[max_i] == b'.' {
 389             buf = buf.slice(0, max_i).to_vec();
 390         }
 391     }
 392
 393     match exp_format {
 394         ExpNone => (),
 395         _ => {
 396             buf.push(match exp_format {
 397                 ExpDec if exp_upper => 'E',
 398                 ExpDec if !exp_upper => 'e',
 399                 ExpBin if exp_upper => 'P',
 400                 ExpBin if !exp_upper => 'p',
 401                 _ => unreachable!()
 402             } as u8);
 403
 404             int_to_str_bytes_common(exp, 10, sign, |c| buf.push(c));
 405         }
 406     }
 407
 408     (buf, false)
 409 }
 410
 411 /**
 412  * Converts a number to its string representation. This is a wrapper for
 413  * `to_str_bytes_common()`, for details see there.
 414  */
 415 #[inline]
 416 pub fn float_to_str_common<T: Float>(
 417         num: T, radix: uint, negative_zero: bool,
 418         sign: SignFormat, digits: SignificantDigits, exp_format: ExponentFormat, exp_capital: bool
 419         ) -> (String, bool) {
 420     let (bytes, special) = float_to_str_bytes_common(num, radix,
 421                                negative_zero, sign, digits, exp_format, exp_capital);
 422     (String::from_utf8(bytes).unwrap(), special)
 423 }
 424
 425 // Some constants for from_str_bytes_common's input validation,
 426 // they define minimum radix values for which the character is a valid digit.
 427 static DIGIT_P_RADIX: uint = ('p' as uint) - ('a' as uint) + 11u;
 428 static DIGIT_E_RADIX: uint = ('e' as uint) - ('a' as uint) + 11u;
 429
 430 #[cfg(test)]
 431 mod tests {
 432     use string::ToString;
 433
 434     #[test]
 435     fn test_int_to_str_overflow() {
 436         let mut i8_val: i8 = 127_i8;
 437         assert_eq!(i8_val.to_string(), "127".to_string());
 438
 439         i8_val += 1 as i8;
 440         assert_eq!(i8_val.to_string(), "-128".to_string());
 441
 442         let mut i16_val: i16 = 32_767_i16;
 443         assert_eq!(i16_val.to_string(), "32767".to_string());
 444
 445         i16_val += 1 as i16;
 446         assert_eq!(i16_val.to_string(), "-32768".to_string());
 447
 448         let mut i32_val: i32 = 2_147_483_647_i32;
 449         assert_eq!(i32_val.to_string(), "2147483647".to_string());
 450
 451         i32_val += 1 as i32;
 452         assert_eq!(i32_val.to_string(), "-2147483648".to_string());
 453
 454         let mut i64_val: i64 = 9_223_372_036_854_775_807_i64;
 455         assert_eq!(i64_val.to_string(), "9223372036854775807".to_string());
 456
 457         i64_val += 1 as i64;
 458         assert_eq!(i64_val.to_string(), "-9223372036854775808".to_string());
 459     }
 460 }
 461
 462 #[cfg(test)]
 463 mod bench {
 464     extern crate test;
 465
 466     mod uint {
 467         use super::test::Bencher;
 468         use rand::{weak_rng, Rng};
 469         use std::fmt;
 470
 471         #[inline]
 472         fn to_string(x: uint, base: u8) {
 473             format!("{}", fmt::radix(x, base));
 474         }
 475
 476         #[bench]
 477         fn to_str_bin(b: &mut Bencher) {
 478             let mut rng = weak_rng();
 479             b.iter(|| { to_string(rng.gen::<uint>(), 2); })
 480         }
 481
 482         #[bench]
 483         fn to_str_oct(b: &mut Bencher) {
 484             let mut rng = weak_rng();
 485             b.iter(|| { to_string(rng.gen::<uint>(), 8); })
 486         }
 487
 488         #[bench]
 489         fn to_str_dec(b: &mut Bencher) {
 490             let mut rng = weak_rng();
 491             b.iter(|| { to_string(rng.gen::<uint>(), 10); })
 492         }
 493
 494         #[bench]
 495         fn to_str_hex(b: &mut Bencher) {
 496             let mut rng = weak_rng();
 497             b.iter(|| { to_string(rng.gen::<uint>(), 16); })
 498         }
 499
 500         #[bench]
 501         fn to_str_base_36(b: &mut Bencher) {
 502             let mut rng = weak_rng();
 503             b.iter(|| { to_string(rng.gen::<uint>(), 36); })
 504         }
 505     }
 506
 507     mod int {
 508         use super::test::Bencher;
 509         use rand::{weak_rng, Rng};
 510         use std::fmt;
 511
 512         #[inline]
 513         fn to_string(x: int, base: u8) {
 514             format!("{}", fmt::radix(x, base));
 515         }
 516
 517         #[bench]
 518         fn to_str_bin(b: &mut Bencher) {
 519             let mut rng = weak_rng();
 520             b.iter(|| { to_string(rng.gen::<int>(), 2); })
 521         }
 522
 523         #[bench]
 524         fn to_str_oct(b: &mut Bencher) {
 525             let mut rng = weak_rng();
 526             b.iter(|| { to_string(rng.gen::<int>(), 8); })
 527         }
 528
 529         #[bench]
 530         fn to_str_dec(b: &mut Bencher) {
 531             let mut rng = weak_rng();
 532             b.iter(|| { to_string(rng.gen::<int>(), 10); })
 533         }
 534
 535         #[bench]
 536         fn to_str_hex(b: &mut Bencher) {
 537             let mut rng = weak_rng();
 538             b.iter(|| { to_string(rng.gen::<int>(), 16); })
 539         }
 540
 541         #[bench]
 542         fn to_str_base_36(b: &mut Bencher) {
 543             let mut rng = weak_rng();
 544             b.iter(|| { to_string(rng.gen::<int>(), 36); })
 545         }
 546     }
 547
 548     mod f64 {
 549         use super::test::Bencher;
 550         use rand::{weak_rng, Rng};
 551         use f64;
 552
 553         #[bench]
 554         fn float_to_string(b: &mut Bencher) {
 555             let mut rng = weak_rng();
 556             b.iter(|| { f64::to_string(rng.gen()); })
 557         }
 558     }
 559 }