src/etc/test-float-parse/runtests.py

   1 #!/usr/bin/env python3
   2
   3 """
   4 Testing dec2flt
   5 ===============
   6 These are *really* extensive tests. Expect them to run for hours. Due to the
   7 nature of the problem (the input is a string of arbitrary length), exhaustive
   8 testing is not really possible. Instead, there are exhaustive tests for some
   9 classes of inputs for which that is feasible and a bunch of deterministic and
  10 random non-exhaustive tests for covering everything else.
  11
  12 The actual tests (generating decimal strings and feeding them to dec2flt) is
  13 performed by a set of stand-along rust programs. This script compiles, runs,
  14 and supervises them. The programs report the strings they generate and the
  15 floating point numbers they converted those strings to, and this script
  16 checks that the results are correct.
  17
  18 You can run specific tests rather than all of them by giving their names
  19 (without .rs extension) as command line parameters.
  20
  21 Verification
  22 ------------
  23 The tricky part is not generating those inputs but verifying the outputs.
  24 Comparing with the result of Python's float() does not cut it because
  25 (and this is apparently undocumented) although Python includes a version of
  26 Martin Gay's code including the decimal-to-float part, it doesn't actually use
  27 it for float() (only for round()) instead relying on the system scanf() which
  28 is not necessarily completely accurate.
  29
  30 Instead, we take the input and compute the true value with bignum arithmetic
  31 (as a fraction, using the ``fractions`` module).
  32
  33 Given an input string and the corresponding float computed via Rust, simply
  34 decode the float into f * 2^k (for integers f, k) and the ULP.
  35 We can now easily compute the error and check if it is within 0.5 ULP as it
  36 should be. Zero and infinites are handled similarly:
  37
  38 - If the approximation is 0.0, the exact value should be *less or equal*
  39   half the smallest denormal float: the smallest denormal floating point
  40   number has an odd mantissa (00...001) and thus half of that is rounded
  41   to 00...00, i.e., zero.
  42 - If the approximation is Inf, the exact value should be *greater or equal*
  43   to the largest finite float + 0.5 ULP: the largest finite float has an odd
  44   mantissa (11...11), so that plus half an ULP is rounded up to the nearest
  45   even number, which overflows.
  46
  47 Implementation details
  48 ----------------------
  49 This directory contains a set of single-file Rust programs that perform
  50 tests with a particular class of inputs. Each is compiled and run without
  51 parameters, outputs (f64, f32, decimal) pairs to verify externally, and
  52 in any case either exits gracefully or with a panic.
  53
  54 If a test binary writes *anything at all* to stderr or exits with an
  55 exit code that's not 0, the test fails.
  56 The output on stdout is treated as (f64, f32, decimal) record, encoded thusly:
  57
  58 - First, the bits of the f64 encoded as an ASCII hex string.
  59 - Second, the bits of the f32 encoded as an ASCII hex string.
  60 - Then the corresponding string input, in ASCII
  61 - The record is terminated with a newline.
  62
  63 Incomplete records are an error. Not-a-Number bit patterns are invalid too.
  64
  65 The tests run serially but the validation for a single test is parallelized
  66 with ``multiprocessing``. Each test is launched as a subprocess.
  67 One thread supervises it: Accepts and enqueues records to validate, observe
  68 stderr, and waits for the process to exit. A set of worker processes perform
  69 the validation work for the outputs enqueued there. Another thread listens
  70 for progress updates from the workers.
  71
  72 Known issues
  73 ------------
  74 Some errors (e.g., NaN outputs) aren't handled very gracefully.
  75 Also, if there is an exception or the process is interrupted (at least on
  76 Windows) the worker processes are leaked and stick around forever.
  77 They're only a few megabytes each, but still, this script should not be run
  78 if you aren't prepared to manually kill a lot of orphaned processes.
  79 """
  80 from __future__ import print_function
  81 import sys
  82 import os.path
  83 import time
  84 import struct
  85 from fractions import Fraction
  86 from collections import namedtuple
  87 from subprocess import Popen, check_call, PIPE
  88 from glob import glob
  89 import multiprocessing
  90 import threading
  91 import ctypes
  92 import binascii
  93
  94 try:  # Python 3
  95     import queue as Queue
  96 except ImportError:  # Python 2
  97     import Queue
  98
  99 NUM_WORKERS = 2
 100 UPDATE_EVERY_N = 50000
 101 INF = namedtuple('INF', '')()
 102 NEG_INF = namedtuple('NEG_INF', '')()
 103 ZERO = namedtuple('ZERO', '')()
 104 MAILBOX = None  # The queue for reporting errors to the main process.
 105 STDOUT_LOCK = threading.Lock()
 106 test_name = None
 107 child_processes = []
 108 exit_status = 0
 109
 110 def msg(*args):
 111     with STDOUT_LOCK:
 112         print("[" + test_name + "]", *args)
 113         sys.stdout.flush()
 114
 115
 116 def write_errors():
 117     global exit_status
 118     f = open("errors.txt", 'w')
 119     have_seen_error = False
 120     while True:
 121         args = MAILBOX.get()
 122         if args is None:
 123             f.close()
 124             break
 125         print(*args, file=f)
 126         f.flush()
 127         if not have_seen_error:
 128             have_seen_error = True
 129             msg("Something is broken:", *args)
 130             msg("Future errors logged to errors.txt")
 131             exit_status = 101
 132
 133
 134 def rustc(test):
 135     rs = test + '.rs'
 136     exe = test + '.exe'  # hopefully this makes it work on *nix
 137     print("compiling", test)
 138     sys.stdout.flush()
 139     check_call(['rustc', rs, '-o', exe])
 140
 141
 142 def run(test):
 143     global test_name
 144     test_name = test
 145
 146     t0 = time.clock()
 147     msg("setting up supervisor")
 148     exe = test + '.exe'
 149     proc = Popen(exe, bufsize=1<<20 , stdin=PIPE, stdout=PIPE, stderr=PIPE)
 150     done = multiprocessing.Value(ctypes.c_bool)
 151     queue = multiprocessing.Queue(maxsize=5)#(maxsize=1024)
 152     workers = []
 153     for n in range(NUM_WORKERS):
 154         worker = multiprocessing.Process(name='Worker-' + str(n + 1),
 155                                          target=init_worker,
 156                                          args=[test, MAILBOX, queue, done])
 157         workers.append(worker)
 158         child_processes.append(worker)
 159     for worker in workers:
 160         worker.start()
 161     msg("running test")
 162     interact(proc, queue)
 163     with done.get_lock():
 164         done.value = True
 165     for worker in workers:
 166         worker.join()
 167     msg("python is done")
 168     assert queue.empty(), "did not validate everything"
 169     dt = time.clock() - t0
 170     msg("took", round(dt, 3), "seconds")
 171
 172
 173 def interact(proc, queue):
 174     n = 0
 175     while proc.poll() is None:
 176         line = proc.stdout.readline()
 177         if not line:
 178             continue
 179         assert line.endswith('\n'), "incomplete line: " + repr(line)
 180         queue.put(line)
 181         n += 1
 182         if n % UPDATE_EVERY_N == 0:
 183             msg("got", str(n // 1000) + "k", "records")
 184     msg("rust is done. exit code:", proc.returncode)
 185     rest, stderr = proc.communicate()
 186     if stderr:
 187         msg("rust stderr output:", stderr)
 188     for line in rest.split('\n'):
 189         if not line:
 190             continue
 191         queue.put(line)
 192
 193
 194 def main():
 195     global MAILBOX
 196     all_tests = [os.path.splitext(f)[0] for f in glob('*.rs') if not f.startswith('_')]
 197     args = sys.argv[1:]
 198     if args:
 199         tests = [test for test in all_tests if test in args]
 200     else
 201         tests = all_tests
 202     if not tests:
 203         print("Error: No tests to run")
 204         sys.exit(1)
 205     # Compile first for quicker feedback
 206     for test in tests:
 207         rustc(test)
 208     # Set up mailbox once for all tests
 209     MAILBOX = multiprocessing.Queue()
 210     mailman = threading.Thread(target=write_errors)
 211     mailman.daemon = True
 212     mailman.start()
 213     for test in tests:
 214         run(test)
 215     MAILBOX.put(None)
 216     mailman.join()
 217
 218
 219 # ---- Worker thread code ----
 220
 221
 222 POW2 = { e: Fraction(2) ** e for e in range(-1100, 1100) }
 223 HALF_ULP = { e: (Fraction(2) ** e)/2 for e in range(-1100, 1100) }
 224 DONE_FLAG = None
 225
 226
 227 def send_error_to_supervisor(*args):
 228     MAILBOX.put(args)
 229
 230
 231 def init_worker(test, mailbox, queue, done):
 232     global test_name, MAILBOX, DONE_FLAG
 233     test_name = test
 234     MAILBOX = mailbox
 235     DONE_FLAG = done
 236     do_work(queue)
 237
 238
 239 def is_done():
 240     with DONE_FLAG.get_lock():
 241         return DONE_FLAG.value
 242
 243
 244 def do_work(queue):
 245     while True:
 246         try:
 247             line = queue.get(timeout=0.01)
 248         except Queue.Empty:
 249             if queue.empty() and is_done():
 250                 return
 251             else:
 252                 continue
 253         bin64, bin32, text = line.rstrip().split()
 254         validate(bin64, bin32, text)
 255
 256
 257 def decode_binary64(x):
 258     """
 259     Turn a IEEE 754 binary64 into (mantissa, exponent), except 0.0 and
 260     infinity (positive and negative), which return ZERO, INF, and NEG_INF
 261     respectively.
 262     """
 263     x = binascii.unhexlify(x)
 264     assert len(x) == 8, repr(x)
 265     [bits] = struct.unpack(b'>Q', x)
 266     if bits == 0:
 267         return ZERO
 268     exponent = (bits >> 52) & 0x7FF
 269     negative = bits >> 63
 270     low_bits = bits & 0xFFFFFFFFFFFFF
 271     if exponent == 0:
 272         mantissa = low_bits
 273         exponent += 1
 274         if mantissa == 0:
 275             return ZERO
 276     elif exponent == 0x7FF:
 277         assert low_bits == 0, "NaN"
 278         if negative:
 279             return NEG_INF
 280         else:
 281             return INF
 282     else:
 283         mantissa = low_bits | (1 << 52)
 284     exponent -= 1023 + 52
 285     if negative:
 286         mantissa = -mantissa
 287     return (mantissa, exponent)
 288
 289
 290 def decode_binary32(x):
 291     """
 292     Turn a IEEE 754 binary32 into (mantissa, exponent), except 0.0 and
 293     infinity (positive and negative), which return ZERO, INF, and NEG_INF
 294     respectively.
 295     """
 296     x = binascii.unhexlify(x)
 297     assert len(x) == 4, repr(x)
 298     [bits] = struct.unpack(b'>I', x)
 299     if bits == 0:
 300         return ZERO
 301     exponent = (bits >> 23) & 0xFF
 302     negative = bits >> 31
 303     low_bits = bits & 0x7FFFFF
 304     if exponent == 0:
 305         mantissa = low_bits
 306         exponent += 1
 307         if mantissa == 0:
 308             return ZERO
 309     elif exponent == 0xFF:
 310         if negative:
 311             return NEG_INF
 312         else:
 313             return INF
 314     else:
 315         mantissa = low_bits | (1 << 23)
 316     exponent -= 127 + 23
 317     if negative:
 318         mantissa = -mantissa
 319     return (mantissa, exponent)
 320
 321
 322 MIN_SUBNORMAL_DOUBLE = Fraction(2) ** -1074
 323 MIN_SUBNORMAL_SINGLE = Fraction(2) ** -149  # XXX unsure
 324 MAX_DOUBLE = (2 - Fraction(2) ** -52) * (2 ** 1023)
 325 MAX_SINGLE = (2 - Fraction(2) ** -23) * (2 ** 127)
 326 MAX_ULP_DOUBLE = 1023 - 52
 327 MAX_ULP_SINGLE = 127 - 23
 328 DOUBLE_ZERO_CUTOFF = MIN_SUBNORMAL_DOUBLE / 2
 329 DOUBLE_INF_CUTOFF = MAX_DOUBLE + 2 ** (MAX_ULP_DOUBLE - 1)
 330 SINGLE_ZERO_CUTOFF = MIN_SUBNORMAL_SINGLE / 2
 331 SINGLE_INF_CUTOFF = MAX_SINGLE + 2 ** (MAX_ULP_SINGLE - 1)
 332
 333 def validate(bin64, bin32, text):
 334     double = decode_binary64(bin64)
 335     single = decode_binary32(bin32)
 336     real = Fraction(text)
 337
 338     if double is ZERO:
 339         if real > DOUBLE_ZERO_CUTOFF:
 340             record_special_error(text, "f64 zero")
 341     elif double is INF:
 342         if real < DOUBLE_INF_CUTOFF:
 343             record_special_error(text, "f64 inf")
 344     elif double is NEG_INF:
 345         if -real < DOUBLE_INF_CUTOFF:
 346             record_special_error(text, "f64 -inf")
 347     elif len(double) == 2:
 348         sig, k = double
 349         validate_normal(text, real, sig, k, "f64")
 350     else:
 351         assert 0, "didn't handle binary64"
 352     if single is ZERO:
 353         if real > SINGLE_ZERO_CUTOFF:
 354             record_special_error(text, "f32 zero")
 355     elif single is INF:
 356         if real < SINGLE_INF_CUTOFF:
 357             record_special_error(text, "f32 inf")
 358     elif single is NEG_INF:
 359         if -real < SINGLE_INF_CUTOFF:
 360             record_special_error(text, "f32 -inf")
 361     elif len(single) == 2:
 362         sig, k = single
 363         validate_normal(text, real, sig, k, "f32")
 364     else:
 365         assert 0, "didn't handle binary32"
 366
 367 def record_special_error(text, descr):
 368     send_error_to_supervisor(text.strip(), "wrongly rounded to", descr)
 369
 370
 371 def validate_normal(text, real, sig, k, kind):
 372     approx = sig * POW2[k]
 373     error = abs(approx - real)
 374     if error > HALF_ULP[k]:
 375         record_normal_error(text, error, k, kind)
 376
 377
 378 def record_normal_error(text, error, k, kind):
 379     one_ulp = HALF_ULP[k + 1]
 380     assert one_ulp == 2 * HALF_ULP[k]
 381     relative_error = error / one_ulp
 382     text = text.strip()
 383     try:
 384         err_repr = float(relative_error)
 385     except ValueError:
 386         err_repr = str(err_repr).replace('/', ' / ')
 387     send_error_to_supervisor(err_repr, "ULP error on", text, "(" + kind + ")")
 388
 389
 390 if __name__ == '__main__':
 391     main()