src/etc/test-float-parse/runtests.py

   1 #!/usr/bin/env python3
   2
   3 """
   4 Testing dec2flt
   5 ===============
   6 These are *really* extensive tests. Expect them to run for hours. Due to the
   7 nature of the problem (the input is a string of arbitrary length), exhaustive
   8 testing is not really possible. Instead, there are exhaustive tests for some
   9 classes of inputs for which that is feasible and a bunch of deterministic and
  10 random non-exhaustive tests for covering everything else.
  11
  12 The actual tests (generating decimal strings and feeding them to dec2flt) is
  13 performed by a set of stand-along rust programs. This script compiles, runs,
  14 and supervises them. The programs report the strings they generate and the
  15 floating point numbers they converted those strings to, and this script
  16 checks that the results are correct.
  17
  18 You can run specific tests rather than all of them by giving their names
  19 (without .rs extension) as command line parameters.
  20
  21 Verification
  22 ------------
  23 The tricky part is not generating those inputs but verifying the outputs.
  24 Comparing with the result of Python's float() does not cut it because
  25 (and this is apparently undocumented) although Python includes a version of
  26 Martin Gay's code including the decimal-to-float part, it doesn't actually use
  27 it for float() (only for round()) instead relying on the system scanf() which
  28 is not necessarily completely accurate.
  29
  30 Instead, we take the input and compute the true value with bignum arithmetic
  31 (as a fraction, using the ``fractions`` module).
  32
  33 Given an input string and the corresponding float computed via Rust, simply
  34 decode the float into f * 2^k (for integers f, k) and the ULP.
  35 We can now easily compute the error and check if it is within 0.5 ULP as it
  36 should be. Zero and infinites are handled similarly:
  37
  38 - If the approximation is 0.0, the exact value should be *less or equal*
  39   half the smallest denormal float: the smallest denormal floating point
  40   number has an odd mantissa (00...001) and thus half of that is rounded
  41   to 00...00, i.e., zero.
  42 - If the approximation is Inf, the exact value should be *greater or equal*
  43   to the largest finite float + 0.5 ULP: the largest finite float has an odd
  44   mantissa (11...11), so that plus half an ULP is rounded up to the nearest
  45   even number, which overflows.
  46
  47 Implementation details
  48 ----------------------
  49 This directory contains a set of single-file Rust programs that perform
  50 tests with a particular class of inputs. Each is compiled and run without
  51 parameters, outputs (f64, f32, decimal) pairs to verify externally, and
  52 in any case either exits gracefully or with a panic.
  53
  54 If a test binary writes *anything at all* to stderr or exits with an
  55 exit code that's not 0, the test fails.
  56 The output on stdout is treated as (f64, f32, decimal) record, encoded thusly:
  57
  58 - First, the bits of the f64 encoded as an ASCII hex string.
  59 - Second, the bits of the f32 encoded as an ASCII hex string.
  60 - Then the corresponding string input, in ASCII
  61 - The record is terminated with a newline.
  62
  63 Incomplete records are an error. Not-a-Number bit patterns are invalid too.
  64
  65 The tests run serially but the validation for a single test is parallelized
  66 with ``multiprocessing``. Each test is launched as a subprocess.
  67 One thread supervises it: Accepts and enqueues records to validate, observe
  68 stderr, and waits for the process to exit. A set of worker processes perform
  69 the validation work for the outputs enqueued there. Another thread listens
  70 for progress updates from the workers.
  71
  72 Known issues
  73 ------------
  74 Some errors (e.g., NaN outputs) aren't handled very gracefully.
  75 Also, if there is an exception or the process is interrupted (at least on
  76 Windows) the worker processes are leaked and stick around forever.
  77 They're only a few megabytes each, but still, this script should not be run
  78 if you aren't prepared to manually kill a lot of orphaned processes.
  79 """
  80 from __future__ import print_function
  81 import sys
  82 import os.path
  83 import time
  84 import struct
  85 from fractions import Fraction
  86 from collections import namedtuple
  87 from subprocess import Popen, check_call, PIPE
  88 from glob import glob
  89 import multiprocessing
  90 import threading
  91 import ctypes
  92 import binascii
  93
  94 try:  # Python 3
  95     import queue as Queue
  96 except ImportError:  # Python 2
  97     import Queue
  98
  99 NUM_WORKERS = 2
 100 UPDATE_EVERY_N = 50000
 101 INF = namedtuple('INF', '')()
 102 NEG_INF = namedtuple('NEG_INF', '')()
 103 ZERO = namedtuple('ZERO', '')()
 104 MAILBOX = None  # The queue for reporting errors to the main process.
 105 STDOUT_LOCK = threading.Lock()
 106 test_name = None
 107 child_processes = []
 108 exit_status = 0
 109
 110 def msg(*args):
 111     with STDOUT_LOCK:
 112         print("[" + test_name + "]", *args)
 113         sys.stdout.flush()
 114
 115
 116 def write_errors():
 117     global exit_status
 118     f = open("errors.txt", 'w')
 119     have_seen_error = False
 120     while True:
 121         args = MAILBOX.get()
 122         if args is None:
 123             f.close()
 124             break
 125         print(*args, file=f)
 126         f.flush()
 127         if not have_seen_error:
 128             have_seen_error = True
 129             msg("Something is broken:", *args)
 130             msg("Future errors logged to errors.txt")
 131             exit_status = 101
 132
 133
 134 def cargo():
 135     print("compiling tests")
 136     sys.stdout.flush()
 137     check_call(['cargo', 'build', '--release'])
 138
 139
 140 def run(test):
 141     global test_name
 142     test_name = test
 143
 144     t0 = time.perf_counter()
 145     msg("setting up supervisor")
 146     command = ['cargo', 'run', '--bin', test, '--release']
 147     proc = Popen(command, bufsize=1<<20 , stdin=PIPE, stdout=PIPE, stderr=PIPE)
 148     done = multiprocessing.Value(ctypes.c_bool)
 149     queue = multiprocessing.Queue(maxsize=5)#(maxsize=1024)
 150     workers = []
 151     for n in range(NUM_WORKERS):
 152         worker = multiprocessing.Process(name='Worker-' + str(n + 1),
 153                                          target=init_worker,
 154                                          args=[test, MAILBOX, queue, done])
 155         workers.append(worker)
 156         child_processes.append(worker)
 157     for worker in workers:
 158         worker.start()
 159     msg("running test")
 160     interact(proc, queue)
 161     with done.get_lock():
 162         done.value = True
 163     for worker in workers:
 164         worker.join()
 165     msg("python is done")
 166     assert queue.empty(), "did not validate everything"
 167     dt = time.perf_counter() - t0
 168     msg("took", round(dt, 3), "seconds")
 169
 170
 171 def interact(proc, queue):
 172     n = 0
 173     while proc.poll() is None:
 174         line = proc.stdout.readline()
 175         if not line:
 176             continue
 177         assert line.endswith(b'\n'), "incomplete line: " + repr(line)
 178         queue.put(line)
 179         n += 1
 180         if n % UPDATE_EVERY_N == 0:
 181             msg("got", str(n // 1000) + "k", "records")
 182     msg("rust is done. exit code:", proc.returncode)
 183     rest, stderr = proc.communicate()
 184     if stderr:
 185         msg("rust stderr output:", stderr)
 186     for line in rest.split(b'\n'):
 187         if not line:
 188             continue
 189         queue.put(line)
 190
 191
 192 def main():
 193     global MAILBOX
 194     files = glob('src/bin/*.rs')
 195     basenames = [os.path.basename(i) for i in files]
 196     all_tests = [os.path.splitext(f)[0] for f in basenames if not f.startswith('_')]
 197     args = sys.argv[1:]
 198     if args:
 199         tests = [test for test in all_tests if test in args]
 200     else:
 201         tests = all_tests
 202     if not tests:
 203         print("Error: No tests to run")
 204         sys.exit(1)
 205     # Compile first for quicker feedback
 206     cargo()
 207     # Set up mailbox once for all tests
 208     MAILBOX = multiprocessing.Queue()
 209     mailman = threading.Thread(target=write_errors)
 210     mailman.daemon = True
 211     mailman.start()
 212     for test in tests:
 213         run(test)
 214     MAILBOX.put(None)
 215     mailman.join()
 216
 217
 218 # ---- Worker thread code ----
 219
 220
 221 POW2 = { e: Fraction(2) ** e for e in range(-1100, 1100) }
 222 HALF_ULP = { e: (Fraction(2) ** e)/2 for e in range(-1100, 1100) }
 223 DONE_FLAG = None
 224
 225
 226 def send_error_to_supervisor(*args):
 227     MAILBOX.put(args)
 228
 229
 230 def init_worker(test, mailbox, queue, done):
 231     global test_name, MAILBOX, DONE_FLAG
 232     test_name = test
 233     MAILBOX = mailbox
 234     DONE_FLAG = done
 235     do_work(queue)
 236
 237
 238 def is_done():
 239     with DONE_FLAG.get_lock():
 240         return DONE_FLAG.value
 241
 242
 243 def do_work(queue):
 244     while True:
 245         try:
 246             line = queue.get(timeout=0.01)
 247         except Queue.Empty:
 248             if queue.empty() and is_done():
 249                 return
 250             else:
 251                 continue
 252         bin64, bin32, text = line.rstrip().split()
 253         validate(bin64, bin32, text.decode('utf-8'))
 254
 255
 256 def decode_binary64(x):
 257     """
 258     Turn a IEEE 754 binary64 into (mantissa, exponent), except 0.0 and
 259     infinity (positive and negative), which return ZERO, INF, and NEG_INF
 260     respectively.
 261     """
 262     x = binascii.unhexlify(x)
 263     assert len(x) == 8, repr(x)
 264     [bits] = struct.unpack(b'>Q', x)
 265     if bits == 0:
 266         return ZERO
 267     exponent = (bits >> 52) & 0x7FF
 268     negative = bits >> 63
 269     low_bits = bits & 0xFFFFFFFFFFFFF
 270     if exponent == 0:
 271         mantissa = low_bits
 272         exponent += 1
 273         if mantissa == 0:
 274             return ZERO
 275     elif exponent == 0x7FF:
 276         assert low_bits == 0, "NaN"
 277         if negative:
 278             return NEG_INF
 279         else:
 280             return INF
 281     else:
 282         mantissa = low_bits | (1 << 52)
 283     exponent -= 1023 + 52
 284     if negative:
 285         mantissa = -mantissa
 286     return (mantissa, exponent)
 287
 288
 289 def decode_binary32(x):
 290     """
 291     Turn a IEEE 754 binary32 into (mantissa, exponent), except 0.0 and
 292     infinity (positive and negative), which return ZERO, INF, and NEG_INF
 293     respectively.
 294     """
 295     x = binascii.unhexlify(x)
 296     assert len(x) == 4, repr(x)
 297     [bits] = struct.unpack(b'>I', x)
 298     if bits == 0:
 299         return ZERO
 300     exponent = (bits >> 23) & 0xFF
 301     negative = bits >> 31
 302     low_bits = bits & 0x7FFFFF
 303     if exponent == 0:
 304         mantissa = low_bits
 305         exponent += 1
 306         if mantissa == 0:
 307             return ZERO
 308     elif exponent == 0xFF:
 309         if negative:
 310             return NEG_INF
 311         else:
 312             return INF
 313     else:
 314         mantissa = low_bits | (1 << 23)
 315     exponent -= 127 + 23
 316     if negative:
 317         mantissa = -mantissa
 318     return (mantissa, exponent)
 319
 320
 321 MIN_SUBNORMAL_DOUBLE = Fraction(2) ** -1074
 322 MIN_SUBNORMAL_SINGLE = Fraction(2) ** -149  # XXX unsure
 323 MAX_DOUBLE = (2 - Fraction(2) ** -52) * (2 ** 1023)
 324 MAX_SINGLE = (2 - Fraction(2) ** -23) * (2 ** 127)
 325 MAX_ULP_DOUBLE = 1023 - 52
 326 MAX_ULP_SINGLE = 127 - 23
 327 DOUBLE_ZERO_CUTOFF = MIN_SUBNORMAL_DOUBLE / 2
 328 DOUBLE_INF_CUTOFF = MAX_DOUBLE + 2 ** (MAX_ULP_DOUBLE - 1)
 329 SINGLE_ZERO_CUTOFF = MIN_SUBNORMAL_SINGLE / 2
 330 SINGLE_INF_CUTOFF = MAX_SINGLE + 2 ** (MAX_ULP_SINGLE - 1)
 331
 332 def validate(bin64, bin32, text):
 333     try:
 334         double = decode_binary64(bin64)
 335     except AssertionError:
 336         print(bin64, bin32, text)
 337         raise
 338     single = decode_binary32(bin32)
 339     real = Fraction(text)
 340
 341     if double is ZERO:
 342         if real > DOUBLE_ZERO_CUTOFF:
 343             record_special_error(text, "f64 zero")
 344     elif double is INF:
 345         if real < DOUBLE_INF_CUTOFF:
 346             record_special_error(text, "f64 inf")
 347     elif double is NEG_INF:
 348         if -real < DOUBLE_INF_CUTOFF:
 349             record_special_error(text, "f64 -inf")
 350     elif len(double) == 2:
 351         sig, k = double
 352         validate_normal(text, real, sig, k, "f64")
 353     else:
 354         assert 0, "didn't handle binary64"
 355     if single is ZERO:
 356         if real > SINGLE_ZERO_CUTOFF:
 357             record_special_error(text, "f32 zero")
 358     elif single is INF:
 359         if real < SINGLE_INF_CUTOFF:
 360             record_special_error(text, "f32 inf")
 361     elif single is NEG_INF:
 362         if -real < SINGLE_INF_CUTOFF:
 363             record_special_error(text, "f32 -inf")
 364     elif len(single) == 2:
 365         sig, k = single
 366         validate_normal(text, real, sig, k, "f32")
 367     else:
 368         assert 0, "didn't handle binary32"
 369
 370 def record_special_error(text, descr):
 371     send_error_to_supervisor(text.strip(), "wrongly rounded to", descr)
 372
 373
 374 def validate_normal(text, real, sig, k, kind):
 375     approx = sig * POW2[k]
 376     error = abs(approx - real)
 377     if error > HALF_ULP[k]:
 378         record_normal_error(text, error, k, kind)
 379
 380
 381 def record_normal_error(text, error, k, kind):
 382     one_ulp = HALF_ULP[k + 1]
 383     assert one_ulp == 2 * HALF_ULP[k]
 384     relative_error = error / one_ulp
 385     text = text.strip()
 386     try:
 387         err_repr = float(relative_error)
 388     except ValueError:
 389         err_repr = str(err_repr).replace('/', ' / ')
 390     send_error_to_supervisor(err_repr, "ULP error on", text, "(" + kind + ")")
 391
 392
 393 if __name__ == '__main__':
 394     main()