sys/src/ape/lib/fmt/strtod.c

   1 /*
   2  * The authors of this software are Rob Pike and Ken Thompson.
   3  *              Copyright (c) 2002 by Lucent Technologies.
   4  * Permission to use, copy, modify, and distribute this software for any
   5  * purpose without fee is hereby granted, provided that this entire notice
   6  * is included in all copies of any software which is or includes a copy
   7  * or modification of this software and in all copies of the supporting
   8  * documentation for such software.
   9  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
  10  * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
  11  * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
  12  * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
  13  */
  14 #include <stdlib.h>
  15 #include <math.h>
  16 #include <ctype.h>
  17 #include <stdlib.h>
  18 #include <string.h>
  19 #include <errno.h>
  20 #include "fmt.h"
  21 #include "nan.h"
  22
  23 #ifndef nelem
  24 #define nelem(x)        (sizeof(x)/sizeof *(x))
  25 #endif
  26 #define nil ((void*)0)
  27 #define ulong _fmtulong
  28 typedef unsigned long ulong;
  29
  30 /*
  31  * This routine will convert to arbitrary precision
  32  * floating point entirely in multi-precision fixed.
  33  * The answer is the closest floating point number to
  34  * the given decimal number. Exactly half way are
  35  * rounded ala ieee rules.
  36  * Method is to scale input decimal between .500 and .999...
  37  * with external power of 2, then binary search for the
  38  * closest mantissa to this decimal number.
  39  * Nmant is is the required precision. (53 for ieee dp)
  40  * Nbits is the max number of bits/word. (must be <= 28)
  41  * Prec is calculated - the number of words of fixed mantissa.
  42  */
  43 enum
  44 {
  45         Nbits   = 28,                           /* bits safely represented in a ulong */
  46         Nmant   = 53,                           /* bits of precision required */
  47         Bias    = 1022,
  48         Prec    = (Nmant+Nbits+1)/Nbits,        /* words of Nbits each to represent mantissa */
  49         Sigbit  = 1<<(Prec*Nbits-Nmant),        /* first significant bit of Prec-th word */
  50         Ndig    = 1500,
  51         One     = (ulong)(1<<Nbits),
  52         Half    = (ulong)(One>>1),
  53         Maxe    = 310,
  54
  55         S0      = 0,            /* _            _S0     +S1     #S2     .S3 */
  56         S1,                     /* _+           #S2     .S3 */
  57         S2,                     /* _+#          #S2     .S4     eS5 */
  58         S3,                     /* _+.          #S4 */
  59         S4,                     /* _+#.#        #S4     eS5 */
  60         S5,                     /* _+#.#e       +S6     #S7 */
  61         S6,                     /* _+#.#e+      #S7 */
  62         S7,                     /* _+#.#e+#     #S7 */
  63
  64         Fsign   = 1<<0,         /* found - */
  65         Fesign  = 1<<1,         /* found e- */
  66         Fdpoint = 1<<2,         /* found . */
  67 };
  68
  69 static  int     xcmp(char*, char*);
  70 static  int     fpcmp(char*, ulong*);
  71 static  void    frnorm(ulong*);
  72 static  void    divascii(char*, int*, int*, int*);
  73 static  void    mulascii(char*, int*, int*, int*);
  74 static  void    divby(char*, int*, int);
  75 static ulong    umuldiv(ulong, ulong, ulong);
  76
  77 typedef struct  Tab     Tab;
  78 struct  Tab
  79 {
  80         int     bp;
  81         int     siz;
  82         char*   cmp;
  83 };
  84
  85 double
  86 fmtstrtod(const char *as, char **aas)
  87 {
  88         int na, ona, ex, dp, bp, c, i, flag, state;
  89         ulong low[Prec], hig[Prec], mid[Prec], num, den;
  90         double d;
  91         char *s, a[Ndig];
  92
  93         flag = 0;       /* Fsign, Fesign, Fdpoint */
  94         na = 0;         /* number of digits of a[] */
  95         dp = 0;         /* na of decimal point */
  96         ex = 0;         /* exonent */
  97
  98         state = S0;
  99         for(s=(char*)as;; s++) {
 100                 c = *s;
 101                 if(c >= '0' && c <= '9') {
 102                         switch(state) {
 103                         case S0:
 104                         case S1:
 105                         case S2:
 106                                 state = S2;
 107                                 break;
 108                         case S3:
 109                         case S4:
 110                                 state = S4;
 111                                 break;
 112
 113                         case S5:
 114                         case S6:
 115                         case S7:
 116                                 state = S7;
 117                                 ex = ex*10 + (c-'0');
 118                                 continue;
 119                         }
 120                         if(na == 0 && c == '0') {
 121                                 dp--;
 122                                 continue;
 123                         }
 124                         if(na < Ndig-50)
 125                                 a[na++] = c;
 126                         continue;
 127                 }
 128                 switch(c) {
 129                 case '\t':
 130                 case '\n':
 131                 case '\v':
 132                 case '\f':
 133                 case '\r':
 134                 case ' ':
 135                         if(state == S0)
 136                                 continue;
 137                         break;
 138                 case '-':
 139                         if(state == S0)
 140                                 flag |= Fsign;
 141                         else
 142                                 flag |= Fesign;
 143                 case '+':
 144                         if(state == S0)
 145                                 state = S1;
 146                         else
 147                         if(state == S5)
 148                                 state = S6;
 149                         else
 150                                 break;  /* syntax */
 151                         continue;
 152                 case '.':
 153                         flag |= Fdpoint;
 154                         dp = na;
 155                         if(state == S0 || state == S1) {
 156                                 state = S3;
 157                                 continue;
 158                         }
 159                         if(state == S2) {
 160                                 state = S4;
 161                                 continue;
 162                         }
 163                         break;
 164                 case 'e':
 165                 case 'E':
 166                         if(state == S2 || state == S4) {
 167                                 state = S5;
 168                                 continue;
 169                         }
 170                         break;
 171                 }
 172                 break;
 173         }
 174
 175         /*
 176          * clean up return char-pointer
 177          */
 178         switch(state) {
 179         case S0:
 180                 if(xcmp(s, "nan") == 0) {
 181                         if(aas != nil)
 182                                 *aas = s+3;
 183                         goto retnan;
 184                 }
 185         case S1:
 186                 if(xcmp(s, "infinity") == 0) {
 187                         if(aas != nil)
 188                                 *aas = s+8;
 189                         goto retinf;
 190                 }
 191                 if(xcmp(s, "inf") == 0) {
 192                         if(aas != nil)
 193                                 *aas = s+3;
 194                         goto retinf;
 195                 }
 196         case S3:
 197                 if(aas != nil)
 198                         *aas = (char*)as;
 199                 goto ret0;      /* no digits found */
 200         case S6:
 201                 s--;            /* back over +- */
 202         case S5:
 203                 s--;            /* back over e */
 204                 break;
 205         }
 206         if(aas != nil)
 207                 *aas = s;
 208
 209         if(flag & Fdpoint)
 210         while(na > 0 && a[na-1] == '0')
 211                 na--;
 212         if(na == 0)
 213                 goto ret0;      /* zero */
 214         a[na] = 0;
 215         if(!(flag & Fdpoint))
 216                 dp = na;
 217         if(flag & Fesign)
 218                 ex = -ex;
 219         dp += ex;
 220         if(dp < -Maxe-Nmant/3){ /* actually -Nmant*log(2)/log(10), but Nmant/3 close enough */
 221                 errno = ERANGE;
 222                 goto ret0;      /* underflow by exp */
 223         } else
 224         if(dp > +Maxe)
 225                 goto retinf;    /* overflow by exp */
 226
 227         /*
 228          * normalize the decimal ascii number
 229          * to range .[5-9][0-9]* e0
 230          */
 231         bp = 0;         /* binary exponent */
 232         while(dp > 0)
 233                 divascii(a, &na, &dp, &bp);
 234         while(dp < 0 || a[0] < '5')
 235                 mulascii(a, &na, &dp, &bp);
 236         a[na] = 0;
 237
 238         /*
 239          * very small numbers are represented using
 240          * bp = -Bias+1.  adjust accordingly.
 241          */
 242         if(bp < -Bias+1){
 243                 ona = na;
 244                 divby(a, &na, -bp-Bias+1);
 245                 if(na < ona){
 246                         memmove(a+ona-na, a, na);
 247                         memset(a, '0', ona-na);
 248                         na = ona;
 249                 }
 250                 a[na] = 0;
 251                 bp = -Bias+1;
 252         }
 253
 254         /* close approx by naive conversion */
 255         num = 0;
 256         den = 1;
 257         for(i=0; i<9 && (c=a[i]); i++) {
 258                 num = num*10 + (c-'0');
 259                 den *= 10;
 260         }
 261         low[0] = umuldiv(num, One, den);
 262         hig[0] = umuldiv(num+1, One, den);
 263         for(i=1; i<Prec; i++) {
 264                 low[i] = 0;
 265                 hig[i] = One-1;
 266         }
 267
 268         /* binary search for closest mantissa */
 269         for(;;) {
 270                 /* mid = (hig + low) / 2 */
 271                 c = 0;
 272                 for(i=0; i<Prec; i++) {
 273                         mid[i] = hig[i] + low[i];
 274                         if(c)
 275                                 mid[i] += One;
 276                         c = mid[i] & 1;
 277                         mid[i] >>= 1;
 278                 }
 279                 frnorm(mid);
 280
 281                 /* compare */
 282                 c = fpcmp(a, mid);
 283                 if(c > 0) {
 284                         c = 1;
 285                         for(i=0; i<Prec; i++)
 286                                 if(low[i] != mid[i]) {
 287                                         c = 0;
 288                                         low[i] = mid[i];
 289                                 }
 290                         if(c)
 291                                 break;  /* between mid and hig */
 292                         continue;
 293                 }
 294                 if(c < 0) {
 295                         for(i=0; i<Prec; i++)
 296                                 hig[i] = mid[i];
 297                         continue;
 298                 }
 299
 300                 /* only hard part is if even/odd roundings wants to go up */
 301                 c = mid[Prec-1] & (Sigbit-1);
 302                 if(c == Sigbit/2 && (mid[Prec-1]&Sigbit) == 0)
 303                         mid[Prec-1] -= c;
 304                 break;  /* exactly mid */
 305         }
 306
 307         /* normal rounding applies */
 308         c = mid[Prec-1] & (Sigbit-1);
 309         mid[Prec-1] -= c;
 310         if(c >= Sigbit/2) {
 311                 mid[Prec-1] += Sigbit;
 312                 frnorm(mid);
 313         }
 314         goto out;
 315
 316 ret0:
 317         return 0;
 318
 319 retnan:
 320         return __NaN();
 321
 322 retinf:
 323         /*
 324          * Unix strtod requires these.  Plan 9 would return Inf(0) or Inf(-1). */
 325         errno = ERANGE;
 326         if(flag & Fsign)
 327                 return -HUGE_VAL;
 328         return HUGE_VAL;
 329
 330 out:
 331         d = 0;
 332         for(i=0; i<Prec; i++)
 333                 d = d*One + mid[i];
 334         if(flag & Fsign)
 335                 d = -d;
 336         d = ldexp(d, bp - Prec*Nbits);
 337         if(d == 0){     /* underflow */
 338                 errno = ERANGE;
 339         }
 340         return d;
 341 }
 342
 343 static void
 344 frnorm(ulong *f)
 345 {
 346         int i, c;
 347
 348         c = 0;
 349         for(i=Prec-1; i>0; i--) {
 350                 f[i] += c;
 351                 c = f[i] >> Nbits;
 352                 f[i] &= One-1;
 353         }
 354         f[0] += c;
 355 }
 356
 357 static int
 358 fpcmp(char *a, ulong* f)
 359 {
 360         ulong tf[Prec];
 361         int i, d, c;
 362
 363         for(i=0; i<Prec; i++)
 364                 tf[i] = f[i];
 365
 366         for(;;) {
 367                 /* tf *= 10 */
 368                 for(i=0; i<Prec; i++)
 369                         tf[i] = tf[i]*10;
 370                 frnorm(tf);
 371                 d = (tf[0] >> Nbits) + '0';
 372                 tf[0] &= One-1;
 373
 374                 /* compare next digit */
 375                 c = *a;
 376                 if(c == 0) {
 377                         if('0' < d)
 378                                 return -1;
 379                         if(tf[0] != 0)
 380                                 goto cont;
 381                         for(i=1; i<Prec; i++)
 382                                 if(tf[i] != 0)
 383                                         goto cont;
 384                         return 0;
 385                 }
 386                 if(c > d)
 387                         return +1;
 388                 if(c < d)
 389                         return -1;
 390                 a++;
 391         cont:;
 392         }
 393 }
 394
 395 static void
 396 _divby(char *a, int *na, int b)
 397 {
 398         int n, c;
 399         char *p;
 400
 401         p = a;
 402         n = 0;
 403         while(n>>b == 0) {
 404                 c = *a++;
 405                 if(c == 0) {
 406                         while(n) {
 407                                 c = n*10;
 408                                 if(c>>b)
 409                                         break;
 410                                 n = c;
 411                         }
 412                         goto xx;
 413                 }
 414                 n = n*10 + c-'0';
 415                 (*na)--;
 416         }
 417         for(;;) {
 418                 c = n>>b;
 419                 n -= c<<b;
 420                 *p++ = c + '0';
 421                 c = *a++;
 422                 if(c == 0)
 423                         break;
 424                 n = n*10 + c-'0';
 425         }
 426         (*na)++;
 427 xx:
 428         while(n) {
 429                 n = n*10;
 430                 c = n>>b;
 431                 n -= c<<b;
 432                 *p++ = c + '0';
 433                 (*na)++;
 434         }
 435         *p = 0;
 436 }
 437
 438 static void
 439 divby(char *a, int *na, int b)
 440 {
 441         while(b > 9){
 442                 _divby(a, na, 9);
 443                 a[*na] = 0;
 444                 b -= 9;
 445         }
 446         if(b > 0)
 447                 _divby(a, na, b);
 448 }
 449
 450 static  Tab     tab1[] =
 451 {
 452          1,  0, "",
 453          3,  1, "7",
 454          6,  2, "63",
 455          9,  3, "511",
 456         13,  4, "8191",
 457         16,  5, "65535",
 458         19,  6, "524287",
 459         23,  7, "8388607",
 460         26,  8, "67108863",
 461         27,  9, "134217727",
 462 };
 463
 464 static void
 465 divascii(char *a, int *na, int *dp, int *bp)
 466 {
 467         int b, d;
 468         Tab *t;
 469
 470         d = *dp;
 471         if(d >= (int)(nelem(tab1)))
 472                 d = (int)(nelem(tab1))-1;
 473         t = tab1 + d;
 474         b = t->bp;
 475         if(memcmp(a, t->cmp, t->siz) > 0)
 476                 d--;
 477         *dp -= d;
 478         *bp += b;
 479         divby(a, na, b);
 480 }
 481
 482 static void
 483 mulby(char *a, char *p, char *q, int b)
 484 {
 485         int n, c;
 486
 487         n = 0;
 488         *p = 0;
 489         for(;;) {
 490                 q--;
 491                 if(q < a)
 492                         break;
 493                 c = *q - '0';
 494                 c = (c<<b) + n;
 495                 n = c/10;
 496                 c -= n*10;
 497                 p--;
 498                 *p = c + '0';
 499         }
 500         while(n) {
 501                 c = n;
 502                 n = c/10;
 503                 c -= n*10;
 504                 p--;
 505                 *p = c + '0';
 506         }
 507 }
 508
 509 static  Tab     tab2[] =
 510 {
 511          1,  1, "",                             /* dp = 0-0 */
 512          3,  3, "125",
 513          6,  5, "15625",
 514          9,  7, "1953125",
 515         13, 10, "1220703125",
 516         16, 12, "152587890625",
 517         19, 14, "19073486328125",
 518         23, 17, "11920928955078125",
 519         26, 19, "1490116119384765625",
 520         27, 19, "7450580596923828125",          /* dp 8-9 */
 521 };
 522
 523 static void
 524 mulascii(char *a, int *na, int *dp, int *bp)
 525 {
 526         char *p;
 527         int d, b;
 528         Tab *t;
 529
 530         d = -*dp;
 531         if(d >= (int)(nelem(tab2)))
 532                 d = (int)(nelem(tab2))-1;
 533         t = tab2 + d;
 534         b = t->bp;
 535         if(memcmp(a, t->cmp, t->siz) < 0)
 536                 d--;
 537         p = a + *na;
 538         *bp -= b;
 539         *dp += d;
 540         *na += d;
 541         mulby(a, p+d, p, b);
 542 }
 543
 544 static int
 545 xcmp(char *a, char *b)
 546 {
 547         int c1, c2;
 548
 549         while(c1 = *b++) {
 550                 c2 = *a++;
 551                 if(isupper(c2))
 552                         c2 = tolower(c2);
 553                 if(c1 != c2)
 554                         return 1;
 555         }
 556         return 0;
 557 }
 558
 559 static ulong
 560 umuldiv(ulong a, ulong b, ulong c)
 561 {
 562         return ((uvlong)a * (uvlong)b) / c;
 563 }