From 82dbc2ea619cbfc98ca9ad2f9e06a5acd294cbe3 Mon Sep 17 00:00:00 2001 From: Robin Kruppe Date: Sun, 26 Jul 2015 15:24:08 +0200 Subject: [PATCH] Add optional, external tests for floating point parsing. Running these tests takes hours, so they are not run by @bors. --- src/etc/test-float-parse/_common.rs | 26 ++ src/etc/test-float-parse/few-ones.rs | 27 ++ src/etc/test-float-parse/huge-pow10.rs | 21 ++ src/etc/test-float-parse/many-digits.rs | 39 ++ src/etc/test-float-parse/rand-f64.rs | 32 ++ src/etc/test-float-parse/runtests.py | 399 +++++++++++++++++++++ src/etc/test-float-parse/short-decimals.rs | 29 ++ src/etc/test-float-parse/subnorm.rs | 23 ++ src/etc/test-float-parse/tiny-pow10.rs | 21 ++ src/etc/test-float-parse/u32-small.rs | 19 + src/etc/test-float-parse/u64-pow2.rs | 28 ++ 11 files changed, 664 insertions(+) create mode 100644 src/etc/test-float-parse/_common.rs create mode 100644 src/etc/test-float-parse/few-ones.rs create mode 100644 src/etc/test-float-parse/huge-pow10.rs create mode 100644 src/etc/test-float-parse/many-digits.rs create mode 100644 src/etc/test-float-parse/rand-f64.rs create mode 100644 src/etc/test-float-parse/runtests.py create mode 100644 src/etc/test-float-parse/short-decimals.rs create mode 100644 src/etc/test-float-parse/subnorm.rs create mode 100644 src/etc/test-float-parse/tiny-pow10.rs create mode 100644 src/etc/test-float-parse/u32-small.rs create mode 100644 src/etc/test-float-parse/u64-pow2.rs diff --git a/src/etc/test-float-parse/_common.rs b/src/etc/test-float-parse/_common.rs new file mode 100644 index 00000000000..b4a2a593e01 --- /dev/null +++ b/src/etc/test-float-parse/_common.rs @@ -0,0 +1,26 @@ +// Copyright 2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use std::io; +use std::io::prelude::*; +use std::mem::transmute; + +// Nothing up my sleeve: Just (PI - 3) in base 16. +#[allow(dead_code)] +pub const SEED: [u32; 3] = [0x243f_6a88, 0x85a3_08d3, 0x1319_8a2e]; + +pub fn validate(text: String) { + let mut out = io::stdout(); + let x: f64 = text.parse().unwrap(); + let f64_bytes: u64 = unsafe { transmute(x) }; + let x: f32 = text.parse().unwrap(); + let f32_bytes: u32 = unsafe { transmute(x) }; + writeln!(&mut out, "{:016x} {:08x} {}", f64_bytes, f32_bytes, text).unwrap(); +} diff --git a/src/etc/test-float-parse/few-ones.rs b/src/etc/test-float-parse/few-ones.rs new file mode 100644 index 00000000000..390f4da6f63 --- /dev/null +++ b/src/etc/test-float-parse/few-ones.rs @@ -0,0 +1,27 @@ +// Copyright 2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +mod _common; + +use _common::validate; + +fn main() { + let mut pow = vec![]; + for i in 0..63 { + pow.push(1u64 << i); + } + for a in &pow { + for b in &pow { + for c in &pow { + validate((a | b | c).to_string()); + } + } + } +} diff --git a/src/etc/test-float-parse/huge-pow10.rs b/src/etc/test-float-parse/huge-pow10.rs new file mode 100644 index 00000000000..e62afc78515 --- /dev/null +++ b/src/etc/test-float-parse/huge-pow10.rs @@ -0,0 +1,21 @@ +// Copyright 2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +mod _common; + +use _common::validate; + +fn main() { + for e in 300..310 { + for i in 0..100000 { + validate(format!("{}e{}", i, e)); + } + } +} diff --git a/src/etc/test-float-parse/many-digits.rs b/src/etc/test-float-parse/many-digits.rs new file mode 100644 index 00000000000..0cbf57183df --- /dev/null +++ b/src/etc/test-float-parse/many-digits.rs @@ -0,0 +1,39 @@ +// Copyright 2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![feature(rand)] + +extern crate rand; + +mod _common; + +use std::char; +use rand::{IsaacRng, Rng, SeedableRng}; +use rand::distributions::{Range, Sample}; +use _common::{validate, SEED}; + +fn main() { + let mut rnd = IsaacRng::from_seed(&SEED); + let mut range = Range::new(0, 10); + for _ in 0..5_000_000u64 { + let num_digits = rnd.gen_range(100, 300); + let digits = gen_digits(num_digits, &mut range, &mut rnd); + validate(digits); + } +} + +fn gen_digits(n: u32, range: &mut Range, rnd: &mut R) -> String { + let mut s = String::new(); + for _ in 0..n { + let digit = char::from_digit(range.sample(rnd), 10).unwrap(); + s.push(digit); + } + s +} diff --git a/src/etc/test-float-parse/rand-f64.rs b/src/etc/test-float-parse/rand-f64.rs new file mode 100644 index 00000000000..762c3d95ec6 --- /dev/null +++ b/src/etc/test-float-parse/rand-f64.rs @@ -0,0 +1,32 @@ +// Copyright 2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![feature(rand)] + +extern crate rand; + +mod _common; + +use _common::{validate, SEED}; +use rand::{IsaacRng, Rng, SeedableRng}; +use std::mem::transmute; + +fn main() { + let mut rnd = IsaacRng::from_seed(&SEED); + let mut i = 0; + while i < 10_000_000 { + let bits = rnd.next_u64(); + let x: f64 = unsafe { transmute(bits) }; + if x.is_finite() { + validate(format!("{:e}", x)); + i += 1; + } + } +} diff --git a/src/etc/test-float-parse/runtests.py b/src/etc/test-float-parse/runtests.py new file mode 100644 index 00000000000..17a1b769bd6 --- /dev/null +++ b/src/etc/test-float-parse/runtests.py @@ -0,0 +1,399 @@ +#!/usr/bin/python2.7 +# +# Copyright 2015 The Rust Project Developers. See the COPYRIGHT +# file at the top-level directory of this distribution and at +# http://rust-lang.org/COPYRIGHT. +# +# Licensed under the Apache License, Version 2.0 or the MIT license +# , at your +# option. This file may not be copied, modified, or distributed +# except according to those terms. + +""" +Testing dec2flt +=============== +These are *really* extensive tests. Expect them to run for hours. Due to the +nature of the problem (the input is a string of arbitrary length), exhaustive +testing is not really possible. Instead, there are exhaustive tests for some +classes of inputs for which that is feasible and a bunch of deterministic and +random non-exhaustive tests for covering everything else. + +The actual tests (generating decimal strings and feeding them to dec2flt) is +performed by a set of stand-along rust programs. This script compiles, runs, +and supervises them. In particular, the programs report the strings they +generate and the floating point numbers they converted those strings to. + +You can run specific tests rather than all of them by giving their names +(without .rs extension) as command line parameters. + +Verification +------------ +The tricky part is not generating those inputs but verifying the outputs. +Comparing with the result of Python's float() does not cut it because +(and this is apparently undocumented) although Python includes a version of +Martin Gay's code including the decimal-to-float part, it doesn't actually use +it for float() (only for round()) instead relying on the system scanf() which +is not necessarily completely accurate. + +Instead, we take the input and compute the true value with bignum arithmetic +(as a fraction, using the ``fractions`` module). + +Given an input string and the corresponding float computed via Rust, simply +decode the float into f * 2^k (for intergers f, k) and the ULP. +We can now easily compute the error and check if it is within 0.5 ULP as it +should be. Zero and infinites are handled similarly: + +- If the approximation is 0.0, the exact value should be *less or equal* + half the smallest denormal float: the smallest denormal floating point + number has an odd mantissa (00...001) and thus half of that is rounded + to 00...00, i.e., zero. +- If the approximation is Inf, the exact value should be *greater or equal* + to the largest finite float + 0.5 ULP: the largest finite float has an odd + mantissa (11...11), so that plus half an ULP is rounded up to the nearest + even number, which overflows. + +Implementation details +---------------------- +This directory contains a set of single-file Rust programs that perform +tests with a particular class of inputs. Each is compiled and run without +parameters, outputs (f64, f32, decimal) pairs to verify externally, and +in any case either exits gracefully or with a panic. + +If a test binary writes *anything at all* to stderr or exits with an +exit code that's not 0, the test fails. +The output on stdout is treated as (f64, f32, decimal) record, encoded thusly: + +- The first eight bytes are a binary64 (native endianness). +- The following four bytes are a binary32 (native endianness). +- Then the corresponding string input follows, in ASCII (no newline). +- The record is terminated with a newline. + +Incomplete records are an error. Not-a-Number bit patterns are invalid too. + +The tests run serially but the validaition for a a single test is parallelized +with ``multiprocessing``. Each test is launched as a subprocess. +One thread supervises it: Accepts and enqueues records to validate, observe +stderr, and waits for the process to exit. A set of worker processes perform +the validation work for the outputs enqueued there. Another thread listens +for progress updates from the workers. + +Known issues +------------ +Some errors (e.g., NaN outputs) aren't handled very gracefully. +Also, if there is an exception or the process is interrupted (at least on +Windows) the worker processes are leaked and stick around forever. +They're only a few megabytes each, but still, this script should not be run +if you aren't prepared to manually kill a lot of orphaned processes. +""" +from __future__ import print_function +import sys +import os.path +import time +import struct +from fractions import Fraction +from collections import namedtuple +from subprocess import Popen, check_call, PIPE +from glob import glob +import multiprocessing +import Queue +import threading +import ctypes +import binascii + +NUM_WORKERS = 2 +UPDATE_EVERY_N = 50000 +INF = namedtuple('INF', '')() +NEG_INF = namedtuple('NEG_INF', '')() +ZERO = namedtuple('ZERO', '')() +MAILBOX = None # The queue for reporting errors to the main process. +STDOUT_LOCK = threading.Lock() +test_name = None +child_processes = [] +exit_status = 0 + +def msg(*args): + with STDOUT_LOCK: + print("[" + test_name + "]", *args) + sys.stdout.flush() + + +def write_errors(): + global exit_status + f = open("errors.txt", 'w') + have_seen_error = False + while True: + args = MAILBOX.get() + if args is None: + f.close() + break + print(*args, file=f) + f.flush() + if not have_seen_error: + have_seen_error = True + msg("Something is broken:", *args) + msg("Future errors logged to errors.txt") + exit_status = 101 + + +def rustc(test): + rs = test + '.rs' + exe = test + '.exe' # hopefully this makes it work on *nix + print("compiling", test) + sys.stdout.flush() + check_call(['rustc', rs, '-o', exe]) + + +def run(test): + global test_name + test_name = test + + t0 = time.clock() + msg("setting up supervisor") + exe = test + '.exe' + proc = Popen(exe, bufsize=1<<20 , stdin=PIPE, stdout=PIPE, stderr=PIPE) + done = multiprocessing.Value(ctypes.c_bool) + queue = multiprocessing.Queue(maxsize=5)#(maxsize=1024) + workers = [] + for n in range(NUM_WORKERS): + worker = multiprocessing.Process(name='Worker-' + str(n + 1), + target=init_worker, + args=[test, MAILBOX, queue, done]) + workers.append(worker) + child_processes.append(worker) + for worker in workers: + worker.start() + msg("running test") + interact(proc, queue) + with done.get_lock(): + done.value = True + for worker in workers: + worker.join() + msg("python is done") + assert queue.empty(), "did not validate everything" + dt = time.clock() - t0 + msg("took", round(dt, 3), "seconds") + + +def interact(proc, queue): + line = "" + n = 0 + while proc.poll() is None: + line = proc.stdout.readline() + if not line: + continue + assert line.endswith('\n'), "incomplete line: " + repr(line) + queue.put(line) + line = "" + n += 1 + if n % UPDATE_EVERY_N == 0: + msg("got", str(n // 1000) + "k", "records") + msg("rust is done. exit code:", proc.returncode) + rest, stderr = proc.communicate() + if stderr: + msg("rust stderr output:", stderr) + for line in rest.split('\n'): + if not line: + continue + queue.put(line) + + +def main(): + global MAILBOX + tests = [os.path.splitext(f)[0] for f in glob('*.rs') + if not f.startswith('_')] + whitelist = sys.argv[1:] + if whitelist: + tests = [test for test in tests if test in whitelist] + if not tests: + print("Error: No tests to run") + sys.exit(1) + # Compile first for quicker feedback + for test in tests: + rustc(test) + # Set up mailbox once for all tests + MAILBOX = multiprocessing.Queue() + mailman = threading.Thread(target=write_errors) + mailman.daemon = True + mailman.start() + for test in tests: + if whitelist and test not in whitelist: + continue + run(test) + MAILBOX.put(None) + mailman.join() + + +# ---- Worker thread code ---- + + +POW2 = { e: Fraction(2) ** e for e in range(-1100, 1100) } +HALF_ULP = { e: (Fraction(2) ** e)/2 for e in range(-1100, 1100) } +DONE_FLAG = None + + +def send_error_to_supervisor(*args): + MAILBOX.put(args) + + +def init_worker(test, mailbox, queue, done): + global test_name, MAILBOX, DONE_FLAG + test_name = test + MAILBOX = mailbox + DONE_FLAG = done + do_work(queue) + + +def is_done(): + with DONE_FLAG.get_lock(): + return DONE_FLAG.value + + +def do_work(queue): + while True: + try: + line = queue.get(timeout=0.01) + except Queue.Empty: + if queue.empty() and is_done(): + return + else: + continue + bin64, bin32, text = line.rstrip().split() + validate(bin64, bin32, text) + + +def decode_binary64(x): + """ + Turn a IEEE 754 binary64 into (mantissa, exponent), except 0.0 and + infinity (positive and negative), which return ZERO, INF, and NEG_INF + respectively. + """ + x = binascii.unhexlify(x) + assert len(x) == 8, repr(x) + [bits] = struct.unpack(b'>Q', x) + if bits == 0: + return ZERO + exponent = (bits >> 52) & 0x7FF + negative = bits >> 63 + low_bits = bits & 0xFFFFFFFFFFFFF + if exponent == 0: + mantissa = low_bits + exponent += 1 + if mantissa == 0: + return ZERO + elif exponent == 0x7FF: + assert low_bits == 0, "NaN" + if negative: + return NEG_INF + else: + return INF + else: + mantissa = low_bits | (1 << 52) + exponent -= 1023 + 52 + if negative: + mantissa = -mantissa + return (mantissa, exponent) + + +def decode_binary32(x): + """ + Turn a IEEE 754 binary32 into (mantissa, exponent), except 0.0 and + infinity (positive and negative), which return ZERO, INF, and NEG_INF + respectively. + """ + x = binascii.unhexlify(x) + assert len(x) == 4, repr(x) + [bits] = struct.unpack(b'>I', x) + if bits == 0: + return ZERO + exponent = (bits >> 23) & 0xFF + negative = bits >> 31 + low_bits = bits & 0x7FFFFF + if exponent == 0: + mantissa = low_bits + exponent += 1 + if mantissa == 0: + return ZERO + elif exponent == 0xFF: + if negative: + return NEG_INF + else: + return INF + else: + mantissa = low_bits | (1 << 23) + exponent -= 127 + 23 + if negative: + mantissa = -mantissa + return (mantissa, exponent) + + +MIN_SUBNORMAL_DOUBLE = Fraction(2) ** -1074 +MIN_SUBNORMAL_SINGLE = Fraction(2) ** -149 # XXX unsure +MAX_DOUBLE = (2 - Fraction(2) ** -52) * (2 ** 1023) +MAX_SINGLE = (2 - Fraction(2) ** -23) * (2 ** 127) +MAX_ULP_DOUBLE = 1023 - 52 +MAX_ULP_SINGLE = 127 - 23 +DOUBLE_ZERO_CUTOFF = MIN_SUBNORMAL_DOUBLE / 2 +DOUBLE_INF_CUTOFF = MAX_DOUBLE + 2 ** (MAX_ULP_DOUBLE - 1) +SINGLE_ZERO_CUTOFF = MIN_SUBNORMAL_SINGLE / 2 +SINGLE_INF_CUTOFF = MAX_SINGLE + 2 ** (MAX_ULP_SINGLE - 1) + +def validate(bin64, bin32, text): + double = decode_binary64(bin64) + single = decode_binary32(bin32) + real = Fraction(text) + + if double is ZERO: + if real > DOUBLE_ZERO_CUTOFF: + record_special_error(text, "f64 zero") + elif double is INF: + if real < DOUBLE_INF_CUTOFF: + record_special_error(text, "f64 inf") + elif double is NEG_INF: + if -real < DOUBLE_INF_CUTOFF: + record_special_error(text, "f64 -inf") + elif len(double) == 2: + sig, k = double + validate_normal(text, real, sig, k, "f64") + else: + assert 0, "didn't handle binary64" + if single is ZERO: + if real > SINGLE_ZERO_CUTOFF: + record_special_error(text, "f32 zero") + elif single is INF: + if real < SINGLE_INF_CUTOFF: + record_special_error(text, "f32 inf") + elif single is NEG_INF: + if -real < SINGLE_INF_CUTOFF: + record_special_error(text, "f32 -inf") + elif len(single) == 2: + sig, k = single + validate_normal(text, real, sig, k, "f32") + else: + assert 0, "didn't handle binary32" + +def record_special_error(text, descr): + send_error_to_supervisor(text.strip(), "wrongly rounded to", descr) + + +def validate_normal(text, real, sig, k, kind): + approx = sig * POW2[k] + error = abs(approx - real) + if error > HALF_ULP[k]: + record_normal_error(text, error, k, kind) + + +def record_normal_error(text, error, k, kind): + one_ulp = HALF_ULP[k + 1] + assert one_ulp == 2 * HALF_ULP[k] + relative_error = error / one_ulp + text = text.strip() + try: + err_repr = float(relative_error) + except ValueError: + err_repr = str(err_repr).replace('/', ' / ') + send_error_to_supervisor(err_repr, "ULP error on", text, "(" + kind + ")") + + +if __name__ == '__main__': + main() diff --git a/src/etc/test-float-parse/short-decimals.rs b/src/etc/test-float-parse/short-decimals.rs new file mode 100644 index 00000000000..baefb9c9305 --- /dev/null +++ b/src/etc/test-float-parse/short-decimals.rs @@ -0,0 +1,29 @@ +// Copyright 2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +mod _common; + +use _common::validate; + +fn main() { + // Skip e = 0 because small-u32 already does those. + for e in 1..301 { + for i in 0..10000 { + // If it ends in zeros, the parser will strip those (and adjust the exponent), + // which almost always (except for exponents near +/- 300) result in an input + // equivalent to something we already generate in a different way. + if i % 10 == 0 { + continue; + } + validate(format!("{}e{}", i, e)); + validate(format!("{}e-{}", i, e)); + } + } +} diff --git a/src/etc/test-float-parse/subnorm.rs b/src/etc/test-float-parse/subnorm.rs new file mode 100644 index 00000000000..70682c9b218 --- /dev/null +++ b/src/etc/test-float-parse/subnorm.rs @@ -0,0 +1,23 @@ +// Copyright 2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +mod _common; + +use std::mem::transmute; +use _common::validate; + +fn main() { + for bits in 0u32..(1 << 21) { + let single: f32 = unsafe { transmute(bits) }; + validate(format!("{:e}", single)); + let double: f64 = unsafe { transmute(bits as u64) }; + validate(format!("{:e}", double)); + } +} diff --git a/src/etc/test-float-parse/tiny-pow10.rs b/src/etc/test-float-parse/tiny-pow10.rs new file mode 100644 index 00000000000..a01c6d5a078 --- /dev/null +++ b/src/etc/test-float-parse/tiny-pow10.rs @@ -0,0 +1,21 @@ +// Copyright 2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +mod _common; + +use _common::validate; + +fn main() { + for e in 301..327 { + for i in 0..100000 { + validate(format!("{}e-{}", i, e)); + } + } +} diff --git a/src/etc/test-float-parse/u32-small.rs b/src/etc/test-float-parse/u32-small.rs new file mode 100644 index 00000000000..a4e8488e745 --- /dev/null +++ b/src/etc/test-float-parse/u32-small.rs @@ -0,0 +1,19 @@ +// Copyright 2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +mod _common; + +use _common::validate; + +fn main() { + for i in 0..(1 << 19) { + validate(i.to_string()); + } +} diff --git a/src/etc/test-float-parse/u64-pow2.rs b/src/etc/test-float-parse/u64-pow2.rs new file mode 100644 index 00000000000..a31304d3f68 --- /dev/null +++ b/src/etc/test-float-parse/u64-pow2.rs @@ -0,0 +1,28 @@ +// Copyright 2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +mod _common; + +use _common::validate; +use std::u64; + +fn main() { + for exp in 19..64 { + let power: u64 = 1 << exp; + validate(power.to_string()); + for offset in 1..123 { + validate((power + offset).to_string()); + validate((power - offset).to_string()); + } + } + for offset in 0..123 { + validate((u64::MAX - offset).to_string()); + } +} -- 2.44.0