src/etc/test-float-parse/runtests.py

   1 #!/usr/bin/env python3
   2
   3 """
   4 Testing dec2flt
   5 ===============
   6 These are *really* extensive tests. Expect them to run for hours. Due to the
   7 nature of the problem (the input is a string of arbitrary length), exhaustive
   8 testing is not really possible. Instead, there are exhaustive tests for some
   9 classes of inputs for which that is feasible and a bunch of deterministic and
  10 random non-exhaustive tests for covering everything else.
  11
  12 The actual tests (generating decimal strings and feeding them to dec2flt) is
  13 performed by a set of stand-along rust programs. This script compiles, runs,
  14 and supervises them. The programs report the strings they generate and the
  15 floating point numbers they converted those strings to, and this script
  16 checks that the results are correct.
  17
  18 You can run specific tests rather than all of them by giving their names
  19 (without .rs extension) as command line parameters.
  20
  21 Verification
  22 ------------
  23 The tricky part is not generating those inputs but verifying the outputs.
  24 Comparing with the result of Python's float() does not cut it because
  25 (and this is apparently undocumented) although Python includes a version of
  26 Martin Gay's code including the decimal-to-float part, it doesn't actually use
  27 it for float() (only for round()) instead relying on the system scanf() which
  28 is not necessarily completely accurate.
  29
  30 Instead, we take the input and compute the true value with bignum arithmetic
  31 (as a fraction, using the ``fractions`` module).
  32
  33 Given an input string and the corresponding float computed via Rust, simply
  34 decode the float into f * 2^k (for integers f, k) and the ULP.
  35 We can now easily compute the error and check if it is within 0.5 ULP as it
  36 should be. Zero and infinites are handled similarly:
  37
  38 - If the approximation is 0.0, the exact value should be *less or equal*
  39   half the smallest denormal float: the smallest denormal floating point
  40   number has an odd mantissa (00...001) and thus half of that is rounded
  41   to 00...00, i.e., zero.
  42 - If the approximation is Inf, the exact value should be *greater or equal*
  43   to the largest finite float + 0.5 ULP: the largest finite float has an odd
  44   mantissa (11...11), so that plus half an ULP is rounded up to the nearest
  45   even number, which overflows.
  46
  47 Implementation details
  48 ----------------------
  49 This directory contains a set of single-file Rust programs that perform
  50 tests with a particular class of inputs. Each is compiled and run without
  51 parameters, outputs (f64, f32, decimal) pairs to verify externally, and
  52 in any case either exits gracefully or with a panic.
  53
  54 If a test binary writes *anything at all* to stderr or exits with an
  55 exit code that's not 0, the test fails.
  56 The output on stdout is treated as (f64, f32, decimal) record, encoded thusly:
  57
  58 - First, the bits of the f64 encoded as an ASCII hex string.
  59 - Second, the bits of the f32 encoded as an ASCII hex string.
  60 - Then the corresponding string input, in ASCII
  61 - The record is terminated with a newline.
  62
  63 Incomplete records are an error. Not-a-Number bit patterns are invalid too.
  64
  65 The tests run serially but the validation for a single test is parallelized
  66 with ``multiprocessing``. Each test is launched as a subprocess.
  67 One thread supervises it: Accepts and enqueues records to validate, observe
  68 stderr, and waits for the process to exit. A set of worker processes perform
  69 the validation work for the outputs enqueued there. Another thread listens
  70 for progress updates from the workers.
  71
  72 Known issues
  73 ------------
  74 Some errors (e.g., NaN outputs) aren't handled very gracefully.
  75 Also, if there is an exception or the process is interrupted (at least on
  76 Windows) the worker processes are leaked and stick around forever.
  77 They're only a few megabytes each, but still, this script should not be run
  78 if you aren't prepared to manually kill a lot of orphaned processes.
  79 """
  80 from __future__ import print_function
  81 import sys
  82 import os.path
  83 import time
  84 import struct
  85 from fractions import Fraction
  86 from collections import namedtuple
  87 from subprocess import Popen, check_call, PIPE
  88 from glob import glob
  89 import multiprocessing
  90 import threading
  91 import ctypes
  92 import binascii
  93
  94 try:  # Python 3
  95     import queue as Queue
  96 except ImportError:  # Python 2
  97     import Queue
  98
  99 NUM_WORKERS = 2
 100 UPDATE_EVERY_N = 50000
 101 INF = namedtuple('INF', '')()
 102 NEG_INF = namedtuple('NEG_INF', '')()
 103 ZERO = namedtuple('ZERO', '')()
 104 MAILBOX = None  # The queue for reporting errors to the main process.
 105 STDOUT_LOCK = threading.Lock()
 106 test_name = None
 107 child_processes = []
 108 exit_status = 0
 109
 110 def msg(*args):
 111     with STDOUT_LOCK:
 112         print("[" + test_name + "]", *args)
 113         sys.stdout.flush()
 114
 115
 116 def write_errors():
 117     global exit_status
 118     f = open("errors.txt", 'w')
 119     have_seen_error = False
 120     while True:
 121         args = MAILBOX.get()
 122         if args is None:
 123             f.close()
 124             break
 125         print(*args, file=f)
 126         f.flush()
 127         if not have_seen_error:
 128             have_seen_error = True
 129             msg("Something is broken:", *args)
 130             msg("Future errors logged to errors.txt")
 131             exit_status = 101
 132
 133
 134 def rustc(test):
 135     rs = test + '.rs'
 136     exe = test + '.exe'  # hopefully this makes it work on *nix
 137     print("compiling", test)
 138     sys.stdout.flush()
 139     check_call(['rustc', rs, '-o', exe])
 140
 141
 142 def run(test):
 143     global test_name
 144     test_name = test
 145
 146     t0 = time.clock()
 147     msg("setting up supervisor")
 148     exe = test + '.exe'
 149     proc = Popen(exe, bufsize=1<<20 , stdin=PIPE, stdout=PIPE, stderr=PIPE)
 150     done = multiprocessing.Value(ctypes.c_bool)
 151     queue = multiprocessing.Queue(maxsize=5)#(maxsize=1024)
 152     workers = []
 153     for n in range(NUM_WORKERS):
 154         worker = multiprocessing.Process(name='Worker-' + str(n + 1),
 155                                          target=init_worker,
 156                                          args=[test, MAILBOX, queue, done])
 157         workers.append(worker)
 158         child_processes.append(worker)
 159     for worker in workers:
 160         worker.start()
 161     msg("running test")
 162     interact(proc, queue)
 163     with done.get_lock():
 164         done.value = True
 165     for worker in workers:
 166         worker.join()
 167     msg("python is done")
 168     assert queue.empty(), "did not validate everything"
 169     dt = time.clock() - t0
 170     msg("took", round(dt, 3), "seconds")
 171
 172
 173 def interact(proc, queue):
 174     n = 0
 175     while proc.poll() is None:
 176         line = proc.stdout.readline()
 177         if not line:
 178             continue
 179         assert line.endswith('\n'), "incomplete line: " + repr(line)
 180         queue.put(line)
 181         n += 1
 182         if n % UPDATE_EVERY_N == 0:
 183             msg("got", str(n // 1000) + "k", "records")
 184     msg("rust is done. exit code:", proc.returncode)
 185     rest, stderr = proc.communicate()
 186     if stderr:
 187         msg("rust stderr output:", stderr)
 188     for line in rest.split('\n'):
 189         if not line:
 190             continue
 191         queue.put(line)
 192
 193
 194 def main():
 195     global MAILBOX
 196     tests = [os.path.splitext(f)[0] for f in glob('*.rs')
 197                                     if not f.startswith('_')]
 198     whitelist = sys.argv[1:]
 199     if whitelist:
 200         tests = [test for test in tests if test in whitelist]
 201     if not tests:
 202         print("Error: No tests to run")
 203         sys.exit(1)
 204     # Compile first for quicker feedback
 205     for test in tests:
 206         rustc(test)
 207     # Set up mailbox once for all tests
 208     MAILBOX = multiprocessing.Queue()
 209     mailman = threading.Thread(target=write_errors)
 210     mailman.daemon = True
 211     mailman.start()
 212     for test in tests:
 213         if whitelist and test not in whitelist:
 214             continue
 215         run(test)
 216     MAILBOX.put(None)
 217     mailman.join()
 218
 219
 220 # ---- Worker thread code ----
 221
 222
 223 POW2 = { e: Fraction(2) ** e for e in range(-1100, 1100) }
 224 HALF_ULP = { e: (Fraction(2) ** e)/2 for e in range(-1100, 1100) }
 225 DONE_FLAG = None
 226
 227
 228 def send_error_to_supervisor(*args):
 229     MAILBOX.put(args)
 230
 231
 232 def init_worker(test, mailbox, queue, done):
 233     global test_name, MAILBOX, DONE_FLAG
 234     test_name = test
 235     MAILBOX = mailbox
 236     DONE_FLAG = done
 237     do_work(queue)
 238
 239
 240 def is_done():
 241     with DONE_FLAG.get_lock():
 242         return DONE_FLAG.value
 243
 244
 245 def do_work(queue):
 246     while True:
 247         try:
 248             line = queue.get(timeout=0.01)
 249         except Queue.Empty:
 250             if queue.empty() and is_done():
 251                 return
 252             else:
 253                 continue
 254         bin64, bin32, text = line.rstrip().split()
 255         validate(bin64, bin32, text)
 256
 257
 258 def decode_binary64(x):
 259     """
 260     Turn a IEEE 754 binary64 into (mantissa, exponent), except 0.0 and
 261     infinity (positive and negative), which return ZERO, INF, and NEG_INF
 262     respectively.
 263     """
 264     x = binascii.unhexlify(x)
 265     assert len(x) == 8, repr(x)
 266     [bits] = struct.unpack(b'>Q', x)
 267     if bits == 0:
 268         return ZERO
 269     exponent = (bits >> 52) & 0x7FF
 270     negative = bits >> 63
 271     low_bits = bits & 0xFFFFFFFFFFFFF
 272     if exponent == 0:
 273         mantissa = low_bits
 274         exponent += 1
 275         if mantissa == 0:
 276             return ZERO
 277     elif exponent == 0x7FF:
 278         assert low_bits == 0, "NaN"
 279         if negative:
 280             return NEG_INF
 281         else:
 282             return INF
 283     else:
 284         mantissa = low_bits | (1 << 52)
 285     exponent -= 1023 + 52
 286     if negative:
 287         mantissa = -mantissa
 288     return (mantissa, exponent)
 289
 290
 291 def decode_binary32(x):
 292     """
 293     Turn a IEEE 754 binary32 into (mantissa, exponent), except 0.0 and
 294     infinity (positive and negative), which return ZERO, INF, and NEG_INF
 295     respectively.
 296     """
 297     x = binascii.unhexlify(x)
 298     assert len(x) == 4, repr(x)
 299     [bits] = struct.unpack(b'>I', x)
 300     if bits == 0:
 301         return ZERO
 302     exponent = (bits >> 23) & 0xFF
 303     negative = bits >> 31
 304     low_bits = bits & 0x7FFFFF
 305     if exponent == 0:
 306         mantissa = low_bits
 307         exponent += 1
 308         if mantissa == 0:
 309             return ZERO
 310     elif exponent == 0xFF:
 311         if negative:
 312             return NEG_INF
 313         else:
 314             return INF
 315     else:
 316         mantissa = low_bits | (1 << 23)
 317     exponent -= 127 + 23
 318     if negative:
 319         mantissa = -mantissa
 320     return (mantissa, exponent)
 321
 322
 323 MIN_SUBNORMAL_DOUBLE = Fraction(2) ** -1074
 324 MIN_SUBNORMAL_SINGLE = Fraction(2) ** -149  # XXX unsure
 325 MAX_DOUBLE = (2 - Fraction(2) ** -52) * (2 ** 1023)
 326 MAX_SINGLE = (2 - Fraction(2) ** -23) * (2 ** 127)
 327 MAX_ULP_DOUBLE = 1023 - 52
 328 MAX_ULP_SINGLE = 127 - 23
 329 DOUBLE_ZERO_CUTOFF = MIN_SUBNORMAL_DOUBLE / 2
 330 DOUBLE_INF_CUTOFF = MAX_DOUBLE + 2 ** (MAX_ULP_DOUBLE - 1)
 331 SINGLE_ZERO_CUTOFF = MIN_SUBNORMAL_SINGLE / 2
 332 SINGLE_INF_CUTOFF = MAX_SINGLE + 2 ** (MAX_ULP_SINGLE - 1)
 333
 334 def validate(bin64, bin32, text):
 335     double = decode_binary64(bin64)
 336     single = decode_binary32(bin32)
 337     real = Fraction(text)
 338
 339     if double is ZERO:
 340         if real > DOUBLE_ZERO_CUTOFF:
 341             record_special_error(text, "f64 zero")
 342     elif double is INF:
 343         if real < DOUBLE_INF_CUTOFF:
 344             record_special_error(text, "f64 inf")
 345     elif double is NEG_INF:
 346         if -real < DOUBLE_INF_CUTOFF:
 347             record_special_error(text, "f64 -inf")
 348     elif len(double) == 2:
 349         sig, k = double
 350         validate_normal(text, real, sig, k, "f64")
 351     else:
 352         assert 0, "didn't handle binary64"
 353     if single is ZERO:
 354         if real > SINGLE_ZERO_CUTOFF:
 355             record_special_error(text, "f32 zero")
 356     elif single is INF:
 357         if real < SINGLE_INF_CUTOFF:
 358             record_special_error(text, "f32 inf")
 359     elif single is NEG_INF:
 360         if -real < SINGLE_INF_CUTOFF:
 361             record_special_error(text, "f32 -inf")
 362     elif len(single) == 2:
 363         sig, k = single
 364         validate_normal(text, real, sig, k, "f32")
 365     else:
 366         assert 0, "didn't handle binary32"
 367
 368 def record_special_error(text, descr):
 369     send_error_to_supervisor(text.strip(), "wrongly rounded to", descr)
 370
 371
 372 def validate_normal(text, real, sig, k, kind):
 373     approx = sig * POW2[k]
 374     error = abs(approx - real)
 375     if error > HALF_ULP[k]:
 376         record_normal_error(text, error, k, kind)
 377
 378
 379 def record_normal_error(text, error, k, kind):
 380     one_ulp = HALF_ULP[k + 1]
 381     assert one_ulp == 2 * HALF_ULP[k]
 382     relative_error = error / one_ulp
 383     text = text.strip()
 384     try:
 385         err_repr = float(relative_error)
 386     except ValueError:
 387         err_repr = str(err_repr).replace('/', ' / ')
 388     send_error_to_supervisor(err_repr, "ULP error on", text, "(" + kind + ")")
 389
 390
 391 if __name__ == '__main__':
 392     main()