#!/usr/bin/env python3 """ Testing dec2flt =============== These are *really* extensive tests. Expect them to run for hours. Due to the nature of the problem (the input is a string of arbitrary length), exhaustive testing is not really possible. Instead, there are exhaustive tests for some classes of inputs for which that is feasible and a bunch of deterministic and random non-exhaustive tests for covering everything else. The actual tests (generating decimal strings and feeding them to dec2flt) is performed by a set of stand-along rust programs. This script compiles, runs, and supervises them. The programs report the strings they generate and the floating point numbers they converted those strings to, and this script checks that the results are correct. You can run specific tests rather than all of them by giving their names (without .rs extension) as command line parameters. Verification ------------ The tricky part is not generating those inputs but verifying the outputs. Comparing with the result of Python's float() does not cut it because (and this is apparently undocumented) although Python includes a version of Martin Gay's code including the decimal-to-float part, it doesn't actually use it for float() (only for round()) instead relying on the system scanf() which is not necessarily completely accurate. Instead, we take the input and compute the true value with bignum arithmetic (as a fraction, using the ``fractions`` module). Given an input string and the corresponding float computed via Rust, simply decode the float into f * 2^k (for integers f, k) and the ULP. We can now easily compute the error and check if it is within 0.5 ULP as it should be. Zero and infinites are handled similarly: - If the approximation is 0.0, the exact value should be *less or equal* half the smallest denormal float: the smallest denormal floating point number has an odd mantissa (00...001) and thus half of that is rounded to 00...00, i.e., zero. - If the approximation is Inf, the exact value should be *greater or equal* to the largest finite float + 0.5 ULP: the largest finite float has an odd mantissa (11...11), so that plus half an ULP is rounded up to the nearest even number, which overflows. Implementation details ---------------------- This directory contains a set of single-file Rust programs that perform tests with a particular class of inputs. Each is compiled and run without parameters, outputs (f64, f32, decimal) pairs to verify externally, and in any case either exits gracefully or with a panic. If a test binary writes *anything at all* to stderr or exits with an exit code that's not 0, the test fails. The output on stdout is treated as (f64, f32, decimal) record, encoded thusly: - First, the bits of the f64 encoded as an ASCII hex string. - Second, the bits of the f32 encoded as an ASCII hex string. - Then the corresponding string input, in ASCII - The record is terminated with a newline. Incomplete records are an error. Not-a-Number bit patterns are invalid too. The tests run serially but the validation for a single test is parallelized with ``multiprocessing``. Each test is launched as a subprocess. One thread supervises it: Accepts and enqueues records to validate, observe stderr, and waits for the process to exit. A set of worker processes perform the validation work for the outputs enqueued there. Another thread listens for progress updates from the workers. Known issues ------------ Some errors (e.g., NaN outputs) aren't handled very gracefully. Also, if there is an exception or the process is interrupted (at least on Windows) the worker processes are leaked and stick around forever. They're only a few megabytes each, but still, this script should not be run if you aren't prepared to manually kill a lot of orphaned processes. """ from __future__ import print_function import sys import os.path import time import struct from fractions import Fraction from collections import namedtuple from subprocess import Popen, check_call, PIPE from glob import glob import multiprocessing import threading import ctypes import binascii try: # Python 3 import queue as Queue except ImportError: # Python 2 import Queue NUM_WORKERS = 2 UPDATE_EVERY_N = 50000 INF = namedtuple('INF', '')() NEG_INF = namedtuple('NEG_INF', '')() ZERO = namedtuple('ZERO', '')() MAILBOX = None # The queue for reporting errors to the main process. STDOUT_LOCK = threading.Lock() test_name = None child_processes = [] exit_status = 0 def msg(*args): with STDOUT_LOCK: print("[" + test_name + "]", *args) sys.stdout.flush() def write_errors(): global exit_status f = open("errors.txt", 'w') have_seen_error = False while True: args = MAILBOX.get() if args is None: f.close() break print(*args, file=f) f.flush() if not have_seen_error: have_seen_error = True msg("Something is broken:", *args) msg("Future errors logged to errors.txt") exit_status = 101 def cargo(): print("compiling tests") sys.stdout.flush() check_call(['cargo', 'build', '--release']) def run(test): global test_name test_name = test t0 = time.perf_counter() msg("setting up supervisor") command = ['cargo', 'run', '--bin', test, '--release'] proc = Popen(command, bufsize=1<<20 , stdin=PIPE, stdout=PIPE, stderr=PIPE) done = multiprocessing.Value(ctypes.c_bool) queue = multiprocessing.Queue(maxsize=5)#(maxsize=1024) workers = [] for n in range(NUM_WORKERS): worker = multiprocessing.Process(name='Worker-' + str(n + 1), target=init_worker, args=[test, MAILBOX, queue, done]) workers.append(worker) child_processes.append(worker) for worker in workers: worker.start() msg("running test") interact(proc, queue) with done.get_lock(): done.value = True for worker in workers: worker.join() msg("python is done") assert queue.empty(), "did not validate everything" dt = time.perf_counter() - t0 msg("took", round(dt, 3), "seconds") def interact(proc, queue): n = 0 while proc.poll() is None: line = proc.stdout.readline() if not line: continue assert line.endswith(b'\n'), "incomplete line: " + repr(line) queue.put(line) n += 1 if n % UPDATE_EVERY_N == 0: msg("got", str(n // 1000) + "k", "records") msg("rust is done. exit code:", proc.returncode) rest, stderr = proc.communicate() if stderr: msg("rust stderr output:", stderr) for line in rest.split(b'\n'): if not line: continue queue.put(line) def main(): global MAILBOX files = glob('src/bin/*.rs') basenames = [os.path.basename(i) for i in files] all_tests = [os.path.splitext(f)[0] for f in basenames if not f.startswith('_')] args = sys.argv[1:] if args: tests = [test for test in all_tests if test in args] else: tests = all_tests if not tests: print("Error: No tests to run") sys.exit(1) # Compile first for quicker feedback cargo() # Set up mailbox once for all tests MAILBOX = multiprocessing.Queue() mailman = threading.Thread(target=write_errors) mailman.daemon = True mailman.start() for test in tests: run(test) MAILBOX.put(None) mailman.join() # ---- Worker thread code ---- POW2 = { e: Fraction(2) ** e for e in range(-1100, 1100) } HALF_ULP = { e: (Fraction(2) ** e)/2 for e in range(-1100, 1100) } DONE_FLAG = None def send_error_to_supervisor(*args): MAILBOX.put(args) def init_worker(test, mailbox, queue, done): global test_name, MAILBOX, DONE_FLAG test_name = test MAILBOX = mailbox DONE_FLAG = done do_work(queue) def is_done(): with DONE_FLAG.get_lock(): return DONE_FLAG.value def do_work(queue): while True: try: line = queue.get(timeout=0.01) except Queue.Empty: if queue.empty() and is_done(): return else: continue bin64, bin32, text = line.rstrip().split() validate(bin64, bin32, text.decode('utf-8')) def decode_binary64(x): """ Turn a IEEE 754 binary64 into (mantissa, exponent), except 0.0 and infinity (positive and negative), which return ZERO, INF, and NEG_INF respectively. """ x = binascii.unhexlify(x) assert len(x) == 8, repr(x) [bits] = struct.unpack(b'>Q', x) if bits == 0: return ZERO exponent = (bits >> 52) & 0x7FF negative = bits >> 63 low_bits = bits & 0xFFFFFFFFFFFFF if exponent == 0: mantissa = low_bits exponent += 1 if mantissa == 0: return ZERO elif exponent == 0x7FF: assert low_bits == 0, "NaN" if negative: return NEG_INF else: return INF else: mantissa = low_bits | (1 << 52) exponent -= 1023 + 52 if negative: mantissa = -mantissa return (mantissa, exponent) def decode_binary32(x): """ Turn a IEEE 754 binary32 into (mantissa, exponent), except 0.0 and infinity (positive and negative), which return ZERO, INF, and NEG_INF respectively. """ x = binascii.unhexlify(x) assert len(x) == 4, repr(x) [bits] = struct.unpack(b'>I', x) if bits == 0: return ZERO exponent = (bits >> 23) & 0xFF negative = bits >> 31 low_bits = bits & 0x7FFFFF if exponent == 0: mantissa = low_bits exponent += 1 if mantissa == 0: return ZERO elif exponent == 0xFF: if negative: return NEG_INF else: return INF else: mantissa = low_bits | (1 << 23) exponent -= 127 + 23 if negative: mantissa = -mantissa return (mantissa, exponent) MIN_SUBNORMAL_DOUBLE = Fraction(2) ** -1074 MIN_SUBNORMAL_SINGLE = Fraction(2) ** -149 # XXX unsure MAX_DOUBLE = (2 - Fraction(2) ** -52) * (2 ** 1023) MAX_SINGLE = (2 - Fraction(2) ** -23) * (2 ** 127) MAX_ULP_DOUBLE = 1023 - 52 MAX_ULP_SINGLE = 127 - 23 DOUBLE_ZERO_CUTOFF = MIN_SUBNORMAL_DOUBLE / 2 DOUBLE_INF_CUTOFF = MAX_DOUBLE + 2 ** (MAX_ULP_DOUBLE - 1) SINGLE_ZERO_CUTOFF = MIN_SUBNORMAL_SINGLE / 2 SINGLE_INF_CUTOFF = MAX_SINGLE + 2 ** (MAX_ULP_SINGLE - 1) def validate(bin64, bin32, text): try: double = decode_binary64(bin64) except AssertionError: print(bin64, bin32, text) raise single = decode_binary32(bin32) real = Fraction(text) if double is ZERO: if real > DOUBLE_ZERO_CUTOFF: record_special_error(text, "f64 zero") elif double is INF: if real < DOUBLE_INF_CUTOFF: record_special_error(text, "f64 inf") elif double is NEG_INF: if -real < DOUBLE_INF_CUTOFF: record_special_error(text, "f64 -inf") elif len(double) == 2: sig, k = double validate_normal(text, real, sig, k, "f64") else: assert 0, "didn't handle binary64" if single is ZERO: if real > SINGLE_ZERO_CUTOFF: record_special_error(text, "f32 zero") elif single is INF: if real < SINGLE_INF_CUTOFF: record_special_error(text, "f32 inf") elif single is NEG_INF: if -real < SINGLE_INF_CUTOFF: record_special_error(text, "f32 -inf") elif len(single) == 2: sig, k = single validate_normal(text, real, sig, k, "f32") else: assert 0, "didn't handle binary32" def record_special_error(text, descr): send_error_to_supervisor(text.strip(), "wrongly rounded to", descr) def validate_normal(text, real, sig, k, kind): approx = sig * POW2[k] error = abs(approx - real) if error > HALF_ULP[k]: record_normal_error(text, error, k, kind) def record_normal_error(text, error, k, kind): one_ulp = HALF_ULP[k + 1] assert one_ulp == 2 * HALF_ULP[k] relative_error = error / one_ulp text = text.strip() try: err_repr = float(relative_error) except ValueError: err_repr = str(err_repr).replace('/', ' / ') send_error_to_supervisor(err_repr, "ULP error on", text, "(" + kind + ")") if __name__ == '__main__': main()