1 /* derived from /netlib/fp/dtoa.c assuming IEEE, Standard C */
2 /* kudos to dmg@bell-labs.com, gripes to ehg@bell-labs.com */
4 /* Let x be the exact mathematical number defined by a decimal
5 * string s. Then atof(s) is the round-nearest-even IEEE
6 * floating point value.
7 * Let y be an IEEE floating point value and let s be the string
8 * printed as %.17g. Then atof(s) is exactly y.
13 static Lock _dtoalk[2];
14 #define ACQUIRE_DTOA_LOCK(n) lock(&_dtoalk[n])
15 #define FREE_DTOA_LOCK(n) unlock(&_dtoalk[n])
16 #define PRIVATE_mem ((2000+sizeof(double)-1)/sizeof(double))
17 static double private_mem[PRIVATE_mem], *pmem_next = private_mem;
20 #define DBL_MAX_10_EXP 308
21 #define DBL_MAX_EXP 1024
23 #define Storeinc(a,b,c) (*a++ = b << 16 | c & 0xffff)
24 #define fpword0(x) ((x).hi)
25 #define fpword1(x) ((x).lo)
27 /* Ten_pmax = floor(P*log(2)/log(5)) */
28 /* Bletch = (highest power of 2 < DBL_MAX_10_EXP) / 16 */
29 /* Quick_max = floor((P-1)*log(FLT_RADIX)/log(10) - 1) */
30 /* Int_max = floor(P*log(FLT_RADIX)/log(10) - 1) */
34 #define Exp_msk1 0x100000
35 #define Exp_msk11 0x100000
36 #define Exp_mask 0x7ff00000
40 #define Exp_1 0x3ff00000
41 #define Exp_11 0x3ff00000
43 #define Frac_mask 0xfffff
44 #define Frac_mask1 0xfffff
47 #define Bndry_mask 0xfffff
48 #define Bndry_mask1 0xfffff
49 #define Sign_bit 0x80000000
56 #define rounded_product(a,b) a *= b
57 #define rounded_quotient(a,b) a /= b
59 #define Big0 (Frac_mask1 | Exp_msk1*(DBL_MAX_EXP+Bias-1))
60 #define Big1 0xffffffff
62 #define FFFFFFFF 0xffffffffUL
71 int k, maxwds, sign, wds;
75 typedef struct Bigint Bigint;
77 static Bigint *freelist[Kmax+1];
86 assert(k < nelem(freelist));
89 if (rv = freelist[k]) {
90 freelist[k] = rv->next;
93 len = (sizeof(Bigint) + (x - 1) * sizeof(unsigned int) + sizeof(double) -1)
95 if (pmem_next - private_mem + len <= PRIVATE_mem) {
96 rv = (Bigint * )pmem_next;
99 rv = (Bigint * )malloc(len * sizeof(double));
104 rv->sign = rv->wds = 0;
112 ACQUIRE_DTOA_LOCK(0);
113 v->next = freelist[v->k];
119 #define Bcopy(x,y) memcpy((char *)&x->sign, (char *)&y->sign, \
120 y->wds*sizeof(int) + 2*sizeof(int))
123 multadd(Bigint *b, int m, int a) /* multiply by m and add a */
126 unsigned int carry, *x, y;
136 y = (xi & 0xffff) * m + carry;
137 z = (xi >> 16) * m + (y >> 16);
139 *x++ = (z << 16) + (y & 0xffff);
142 if (wds >= b->maxwds) {
143 b1 = Balloc(b->k + 1);
155 hi0bits(register unsigned int x)
159 if (!(x & 0xffff0000)) {
163 if (!(x & 0xff000000)) {
167 if (!(x & 0xf0000000)) {
171 if (!(x & 0xc0000000)) {
175 if (!(x & 0x80000000)) {
177 if (!(x & 0x40000000))
184 lo0bits(unsigned int *y)
187 register unsigned int x = *y;
238 mult(Bigint *a, Bigint *b)
242 unsigned int * x, *xa, *xae, *xb, *xbe, *xc, *xc0;
244 unsigned int carry, z;
247 if (a->wds < b->wds) {
259 for (x = c->x, xa = x + wc; x < xa; x++)
266 for (; xb < xbe; xb++, xc0++) {
267 if (y = *xb & 0xffff) {
272 z = (*x & 0xffff) * y + (*xc & 0xffff) + carry;
274 z2 = (*x++ >> 16) * y + (*xc >> 16) + carry;
286 z = (*x & 0xffff) * y + (*xc >> 16) + carry;
289 z2 = (*x++ >> 16) * y + (*xc & 0xffff) + carry;
295 for (xc0 = c->x, xc = xc0 + wc; wc > 0 && !*--xc; --wc)
304 pow5mult(Bigint *b, int k)
306 Bigint * b1, *p5, *p51;
308 static int p05[3] = {
312 b = multadd(b, p05[i-1], 0);
318 ACQUIRE_DTOA_LOCK(1);
333 if (!(p51 = p5->next)) {
334 ACQUIRE_DTOA_LOCK(1);
335 if (!(p51 = p5->next)) {
336 p51 = p5->next = mult(p5, p5);
347 lshift(Bigint *b, int k)
351 unsigned int * x, *x1, *xe, z;
356 for (i = b->maxwds; n1 > i; i <<= 1)
360 for (i = 0; i < n; i++)
383 cmp(Bigint *a, Bigint *b)
385 unsigned int * xa, *xa0, *xb, *xb0;
398 return * xa < *xb ? -1 : 1;
406 diff(Bigint *a, Bigint *b)
410 unsigned int * xa, *xae, *xb, *xbe, *xc;
411 unsigned int borrow, y;
439 y = (*xa & 0xffff) - (*xb & 0xffff) - borrow;
440 borrow = (y & 0x10000) >> 16;
441 z = (*xa++ >> 16) - (*xb++ >> 16) - borrow;
442 borrow = (z & 0x10000) >> 16;
446 y = (*xa & 0xffff) - borrow;
447 borrow = (y & 0x10000) >> 16;
448 z = (*xa++ >> 16) - borrow;
449 borrow = (z & 0x10000) >> 16;
459 b2d(Bigint *a, int *e)
461 unsigned int * xa, *xa0, w, y, z;
463 #define d0 fpword0(d)
464 #define d1 fpword1(d)
473 d0 = Exp_1 | y >> Ebits - k;
474 w = xa > xa0 ? *--xa : 0;
475 d1 = y << (32 - Ebits) + k | w >> Ebits - k;
478 z = xa > xa0 ? *--xa : 0;
480 d0 = Exp_1 | y << k | z >> 32 - k;
481 y = xa > xa0 ? *--xa : 0;
482 d1 = z << k | y >> 32 - k;
494 d2b(FPdbleword d, int *e, int *bits)
498 unsigned int * x, y, z;
506 d0 &= 0x7fffffff; /* clear sign bit, which we ignore */
507 de = (int)(d0 >> Exp_shift);
510 if (k = lo0bits(&y)) {
511 x[0] = y | z << 32 - k;
515 b->wds = (x[1] = z) ? 2 : 1;
522 *e = de - Bias - (P - 1) + k;
532 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
533 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
539 1e16, 1e32, 1e64, 1e128, 1e256 };
541 /* The factor of 2^53 in tinytens[4] helps us avoid setting the underflow */
542 /* flag unnecessarily. It leads to a song and dance at the end of strtod. */
543 #define Scale_Bit 0x10
546 #define NAN_WORD0 0x7ff80000
551 quorem(Bigint *b, Bigint *S)
554 unsigned int * bx, *bxe, q, *sx, *sxe;
555 unsigned int borrow, carry, y, ys;
556 unsigned int si, z, zs;
565 q = *bxe / (*sxe + 1); /* ensure q <= true quotient */
571 ys = (si & 0xffff) * q + carry;
572 zs = (si >> 16) * q + (ys >> 16);
574 y = (*bx & 0xffff) - (ys & 0xffff) - borrow;
575 borrow = (y & 0x10000) >> 16;
576 z = (*bx >> 16) - (zs & 0xffff) - borrow;
577 borrow = (z & 0x10000) >> 16;
582 while (--bxe > bx && !*bxe)
587 if (cmp(b, S) >= 0) {
595 ys = (si & 0xffff) + carry;
596 zs = (si >> 16) + (ys >> 16);
598 y = (*bx & 0xffff) - (ys & 0xffff) - borrow;
599 borrow = (y & 0x10000) >> 16;
600 z = (*bx >> 16) - (zs & 0xffff) - borrow;
601 borrow = (z & 0x10000) >> 16;
607 while (--bxe > bx && !*bxe)
620 j = sizeof(unsigned int);
622 sizeof(Bigint) - sizeof(unsigned int) - sizeof(int) + j <= i;
625 r = (int * )Balloc(k);
632 nrv_alloc(char *s, char **rve, int n)
636 t = rv = rv_alloc(n);
644 /* freedtoa(s) must be used to free values s returned by dtoa
645 * when MULTIPLE_THREADS is #defined. It should be used in all cases,
646 * but for consistency with earlier versions of dtoa, it is optional
647 * when MULTIPLE_THREADS is not defined.
653 Bigint * b = (Bigint * )((int *)s - 1);
654 b->maxwds = 1 << (b->k = *(int * )b);
658 /* dtoa for IEEE arithmetic (dmg): convert double to ASCII string.
660 * Inspired by "How to Print Floating-Point Numbers Accurately" by
661 * Guy L. Steele, Jr. and Jon L. White [Proc. ACM SIGPLAN '90, pp. 92-101].
664 * 1. Rather than iterating, we use a simple numeric overestimate
665 * to determine k = floor(log10(d)). We scale relevant
666 * quantities using O(log2(k)) rather than O(k) multiplications.
667 * 2. For some modes > 2 (corresponding to ecvt and fcvt), we don't
668 * try to generate digits strictly left to right. Instead, we
669 * compute with fewer bits and propagate the carry if necessary
670 * when rounding the final digit up. This is often faster.
671 * 3. Under the assumption that input will be rounded nearest,
672 * mode 0 renders 1e23 as 1e23 rather than 9.999999999999999e22.
673 * That is, we allow equality in stopping tests when the
674 * round-nearest rule will give the same floating-point value
675 * as would satisfaction of the stopping test with strict
677 * 4. We remove common factors of powers of 2 from relevant
679 * 5. When converting floating-point integers less than 1e16,
680 * we use floating-point arithmetic rather than resorting
681 * to multiple-precision integers.
682 * 6. When asked to produce fewer than 15 digits, we first try
683 * to get by with floating-point arithmetic; we resort to
684 * multiple-precision integer arithmetic only if we cannot
685 * guarantee that the floating-point calculation has given
686 * the correctly rounded result. For k requested digits and
687 * "uniformly" distributed input, the probability is
688 * something like 10^(k-15) that we must resort to the int
693 dtoa(double _d, int mode, int ndigits, int *decpt, int *sign, char **rve)
695 /* Arguments ndigits, decpt, sign are similar to those
696 of ecvt and fcvt; trailing zeros are suppressed from
697 the returned string. If not null, *rve is set to point
698 to the end of the return value. If d is +-Infinity or NaN,
699 then *decpt is set to 9999.
702 0 ==> shortest string that yields d when read in
703 and rounded to nearest.
704 1 ==> like 0, but with Steele & White stopping rule;
705 e.g. with IEEE P754 arithmetic , mode 0 gives
706 1e23 whereas mode 1 gives 9.999999999999999e22.
707 2 ==> max(1,ndigits) significant digits. This gives a
708 return value similar to that of ecvt, except
709 that trailing zeros are suppressed.
710 3 ==> through ndigits past the decimal point. This
711 gives a return value similar to that from fcvt,
712 except that trailing zeros are suppressed, and
713 ndigits can be negative.
714 4-9 should give the same return values as 2-3, i.e.,
715 4 <= mode <= 9 ==> same return as mode
716 2 + (mode & 1). These modes are mainly for
717 debugging; often they run slower but sometimes
718 faster than modes 2-3.
719 4,5,8,9 ==> left-to-right digit generation.
720 6-9 ==> don't try fast floating-point estimate
723 Values of mode other than 0-9 are treated as mode 0.
725 Sufficient space is allocated to the return value
726 to hold the suppressed trailing zeros.
729 int bbits, b2, b5, be, dig, i, ieps, ilim, ilim0, ilim1,
730 j, j1, k, k0, k_check, leftright, m2, m5, s2, s5,
731 spec_case, try_quick;
733 Bigint * b, *b1, *delta, *mlo=nil, *mhi, *S;
735 FPdbleword d, d2, eps;
739 if (fpword0(d) & Sign_bit) {
740 /* set sign for everything, including 0's and NaNs */
742 fpword0(d) &= ~Sign_bit; /* clear sign bit */
746 if ((fpword0(d) & Exp_mask) == Exp_mask) {
747 /* Infinity or NaN */
749 if (!fpword1(d) && !(fpword0(d) & 0xfffff))
750 return nrv_alloc("Infinity", rve, 8);
751 return nrv_alloc("NaN", rve, 3);
755 return nrv_alloc("0", rve, 1);
758 b = d2b(d, &be, &bbits);
759 i = (int)(fpword0(d) >> Exp_shift1 & (Exp_mask >> Exp_shift1));
761 fpword0(d2) &= Frac_mask1;
762 fpword0(d2) |= Exp_11;
764 /* log(x) ~=~ log(1.5) + (x-1.5)/1.5
765 * log10(x) = log(x) / log(10)
766 * ~=~ log(1.5)/log(10) + (x-1.5)/(1.5*log(10))
767 * log10(d) = (i-Bias)*log(2)/log(10) + log10(d2)
769 * This suggests computing an approximation k to log10(d) by
771 * k = (i - Bias)*0.301029995663981
772 * + ( (d2-1.5)*0.289529654602168 + 0.176091259055681 );
774 * We want k to be too large rather than too small.
775 * The error in the first-order Taylor series approximation
776 * is in our favor, so we just round up the constant enough
777 * to compensate for any error in the multiplication of
778 * (i - Bias) by 0.301029995663981; since |i - Bias| <= 1077,
779 * and 1077 * 0.30103 * 2^-52 ~=~ 7.2e-14,
780 * adding 1e-13 to the constant term more than suffices.
781 * Hence we adjust the constant term to 0.1760912590558.
782 * (We could get a more accurate k by invoking log10,
783 * but this is probably not worthwhile.)
787 ds = (d2.x - 1.5) * 0.289529654602168 + 0.1760912590558 + i * 0.301029995663981;
789 if (ds < 0. && ds != k)
790 k--; /* want k = floor(ds) */
792 if (k >= 0 && k <= Ten_pmax) {
815 if (mode < 0 || mode > 9)
837 ilim = ilim1 = i = ndigits;
849 s = s0 = rv_alloc(i);
851 if (ilim >= 0 && ilim <= Quick_max && try_quick) {
853 /* Try to get by with floating-point arithmetic. */
859 ieps = 2; /* conservative */
864 /* prevent overflows */
866 d.x /= bigtens[n_bigtens-1];
869 for (; j; j >>= 1, i++)
875 } else if (j1 = -k) {
876 d.x *= tens[j1 & 0xf];
877 for (j = j1 >> 4; j; j >>= 1, i++)
883 if (k_check && d.x < 1. && ilim > 0) {
891 eps.x = ieps * d.x + 7.;
892 fpword0(eps) -= (P - 1) * Exp_msk1;
902 /* Generate ilim digits, then fix them up. */
903 eps.x *= tens[ilim-1];
904 for (i = 1; ; i++, d.x *= 10.) {
909 if (d.x > 0.5 + eps.x)
911 else if (d.x < 0.5 - eps.x) {
927 /* Do we have a "small" integer? */
929 if (be >= 0 && k <= Int_max) {
932 if (ndigits < 0 && ilim <= 0) {
934 if (ilim < 0 || d.x <= 5 * ds)
944 if (d.x > ds || d.x == ds && L & 1) {
978 if ((i = ilim) < 0) {
987 if (m2 > 0 && s2 > 0) {
988 i = m2 < s2 ? m2 : s2;
996 mhi = pow5mult(mhi, m5);
1004 b = pow5mult(b, b5);
1008 S = pow5mult(S, s5);
1010 /* Check for special case that d is a normalized power of 2. */
1014 if (!fpword1(d) && !(fpword0(d) & Bndry_mask)
1016 /* The special case */
1023 /* Arrange for convenient computation of quotients:
1024 * shift left if necessary so divisor has 4 leading 0 bits.
1026 * Perhaps we should just compute leading 28 bits of S once
1027 * and for all and pass them and a shift to quorem, so it
1028 * can do shifts and ors to compute the numerator for q.
1030 if (i = ((s5 ? 32 - hi0bits(S->x[S->wds-1]) : 1) + s2) & 0x1f)
1048 if (cmp(b, S) < 0) {
1050 b = multadd(b, 10, 0); /* we botched the k estimate */
1052 mhi = multadd(mhi, 10, 0);
1056 if (ilim <= 0 && mode > 2) {
1057 if (ilim < 0 || cmp(b, S = multadd(S, 5, 0)) <= 0) {
1058 /* no digits, fcvt style */
1070 mhi = lshift(mhi, m2);
1072 /* Compute mlo -- check for special case
1073 * that d is a normalized power of 2.
1078 mhi = Balloc(mhi->k);
1080 mhi = lshift(mhi, Log2P);
1083 for (i = 1; ; i++) {
1084 dig = quorem(b, S) + '0';
1085 /* Do we yet have the shortest decimal string
1086 * that will round to d?
1089 delta = diff(S, mhi);
1090 j1 = delta->sign ? 1 : cmp(b, delta);
1092 if (j1 == 0 && !mode && !(fpword1(d) & 1)) {
1100 if (j < 0 || j == 0 && !mode
1101 && !(fpword1(d) & 1)
1106 if ((j1 > 0 || j1 == 0 && dig & 1)
1114 if (dig == '9') { /* possible if i == 1 */
1125 b = multadd(b, 10, 0);
1127 mlo = mhi = multadd(mhi, 10, 0);
1129 mlo = multadd(mlo, 10, 0);
1130 mhi = multadd(mhi, 10, 0);
1134 for (i = 1; ; i++) {
1135 *s++ = dig = quorem(b, S) + '0';
1138 b = multadd(b, 10, 0);
1141 /* Round off last digit */
1145 if (j > 0 || j == 0 && dig & 1) {
1162 if (mlo && mlo != mhi)