diff options
Diffstat (limited to 'modules/fdlibm/src')
-rw-r--r-- | modules/fdlibm/src/e_acos.cpp | 5 | ||||
-rw-r--r-- | modules/fdlibm/src/e_acosh.cpp | 5 | ||||
-rw-r--r-- | modules/fdlibm/src/e_asin.cpp | 5 | ||||
-rw-r--r-- | modules/fdlibm/src/e_atan2.cpp | 4 | ||||
-rw-r--r-- | modules/fdlibm/src/e_exp.cpp | 7 | ||||
-rw-r--r-- | modules/fdlibm/src/e_hypot.cpp | 7 | ||||
-rw-r--r-- | modules/fdlibm/src/e_pow.cpp | 54 | ||||
-rw-r--r-- | modules/fdlibm/src/e_sqrt.cpp | 446 | ||||
-rw-r--r-- | modules/fdlibm/src/fdlibm.h | 1 | ||||
-rw-r--r-- | modules/fdlibm/src/k_exp.cpp | 2 | ||||
-rw-r--r-- | modules/fdlibm/src/math_private.h | 129 | ||||
-rw-r--r-- | modules/fdlibm/src/moz.build | 18 | ||||
-rw-r--r-- | modules/fdlibm/src/s_asinh.cpp | 5 | ||||
-rw-r--r-- | modules/fdlibm/src/s_cbrt.cpp | 1 | ||||
-rw-r--r-- | modules/fdlibm/src/s_expm1.cpp | 2 | ||||
-rw-r--r-- | modules/fdlibm/src/s_fabs.cpp | 7 | ||||
-rw-r--r-- | modules/fdlibm/src/s_nearbyint.cpp | 2 | ||||
-rw-r--r-- | modules/fdlibm/src/s_scalbn.cpp | 12 |
18 files changed, 164 insertions, 548 deletions
diff --git a/modules/fdlibm/src/e_acos.cpp b/modules/fdlibm/src/e_acos.cpp index 12be296cb..4f497b3b3 100644 --- a/modules/fdlibm/src/e_acos.cpp +++ b/modules/fdlibm/src/e_acos.cpp @@ -38,6 +38,7 @@ * Function needed: sqrt */ +#include <cmath> #include <float.h> #include "math_private.h" @@ -87,13 +88,13 @@ __ieee754_acos(double x) z = (one+x)*0.5; p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4))); - s = sqrt(z); + s = std::sqrt(z); r = p/q; w = r*s-pio2_lo; return pi - 2.0*(s+w); } else { /* x > 0.5 */ z = (one-x)*0.5; - s = sqrt(z); + s = std::sqrt(z); df = s; SET_LOW_WORD(df,0); c = (z-df*df)/(s+df); diff --git a/modules/fdlibm/src/e_acosh.cpp b/modules/fdlibm/src/e_acosh.cpp index bdabcec3e..ce52d5aaa 100644 --- a/modules/fdlibm/src/e_acosh.cpp +++ b/modules/fdlibm/src/e_acosh.cpp @@ -29,6 +29,7 @@ * acosh(NaN) is NaN without signal. */ +#include <cmath> #include <float.h> #include "math_private.h" @@ -55,9 +56,9 @@ __ieee754_acosh(double x) return 0.0; /* acosh(1) = 0 */ } else if (hx > 0x40000000) { /* 2**28 > x > 2 */ t=x*x; - return __ieee754_log(2.0*x-one/(x+sqrt(t-one))); + return __ieee754_log(2.0*x-one/(x+std::sqrt(t-one))); } else { /* 1<x<2 */ t = x-one; - return log1p(t+sqrt(2.0*t+t*t)); + return log1p(t+std::sqrt(2.0*t+t*t)); } } diff --git a/modules/fdlibm/src/e_asin.cpp b/modules/fdlibm/src/e_asin.cpp index 396f49449..e896bde9e 100644 --- a/modules/fdlibm/src/e_asin.cpp +++ b/modules/fdlibm/src/e_asin.cpp @@ -6,7 +6,7 @@ * * Developed at SunSoft, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice + * software is freely granted, provided that this notice * is preserved. * ==================================================== */ @@ -44,6 +44,7 @@ * */ +#include <cmath> #include <float.h> #include "math_private.h" @@ -95,7 +96,7 @@ __ieee754_asin(double x) t = w*0.5; p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5))))); q = one+t*(qS1+t*(qS2+t*(qS3+t*qS4))); - s = sqrt(t); + s = std::sqrt(t); if(ix>=0x3FEF3333) { /* if |x| > 0.975 */ w = p/q; t = pio2_hi-(2.0*(s+s*w)-pio2_lo); diff --git a/modules/fdlibm/src/e_atan2.cpp b/modules/fdlibm/src/e_atan2.cpp index 9990072cf..f45ad187f 100644 --- a/modules/fdlibm/src/e_atan2.cpp +++ b/modules/fdlibm/src/e_atan2.cpp @@ -69,8 +69,8 @@ __ieee754_atan2(double y, double x) iy = hy&0x7fffffff; if(((ix|((lx|-lx)>>31))>0x7ff00000)|| ((iy|((ly|-ly)>>31))>0x7ff00000)) /* x or y is NaN */ - return x+y; - if((hx-0x3ff00000|lx)==0) return atan(y); /* x=1.0 */ + return nan_mix(x, y); + if(hx==0x3ff00000&&lx==0) return atan(y); /* x=1.0 */ m = ((hy>>31)&1)|((hx>>30)&2); /* 2*sign(x)+sign(y) */ /* when y = 0 */ diff --git a/modules/fdlibm/src/e_exp.cpp b/modules/fdlibm/src/e_exp.cpp index b31979134..92af819ce 100644 --- a/modules/fdlibm/src/e_exp.cpp +++ b/modules/fdlibm/src/e_exp.cpp @@ -96,6 +96,8 @@ P3 = 6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */ P4 = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */ P5 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */ +static const double E = 2.7182818284590452354; /* e */ + static volatile double huge = 1.0e+300, twom1000= 9.33263618503218878990e-302; /* 2**-1000=0x01700000,0*/ @@ -127,6 +129,7 @@ __ieee754_exp(double x) /* default IEEE double exp */ /* argument reduction */ if(hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */ if(hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */ + if (x == 1.0) return E; hi = x-ln2HI[xsb]; lo=ln2LO[xsb]; k = 1-xsb-xsb; } else { k = (int)(invln2*x+halF[xsb]); @@ -144,9 +147,9 @@ __ieee754_exp(double x) /* default IEEE double exp */ /* x is now in primary range */ t = x*x; if(k >= -1021) - INSERT_WORDS(twopk,0x3ff00000+(k<<20), 0); + INSERT_WORDS(twopk,((u_int32_t)(0x3ff+k))<<20, 0); else - INSERT_WORDS(twopk,0x3ff00000+((k+1000)<<20), 0); + INSERT_WORDS(twopk,((u_int32_t)(0x3ff+(k+1000)))<<20, 0); c = x - t*(P1+t*(P2+t*(P3+t*(P4+t*P5)))); if(k==0) return one-((x*c)/(c-2.0)-x); else y = one-((lo-(x*c)/(2.0-c))-hi); diff --git a/modules/fdlibm/src/e_hypot.cpp b/modules/fdlibm/src/e_hypot.cpp index f5c7037bb..a23571150 100644 --- a/modules/fdlibm/src/e_hypot.cpp +++ b/modules/fdlibm/src/e_hypot.cpp @@ -46,6 +46,7 @@ * than 1 ulps (units in the last place) */ +#include <cmath> #include <float.h> #include "math_private.h" @@ -69,7 +70,7 @@ __ieee754_hypot(double x, double y) if(ha >= 0x7ff00000) { /* Inf or NaN */ u_int32_t low; /* Use original arg order iff result is NaN; quieten sNaNs. */ - w = fabs(x+0.0)-fabs(y+0.0); + w = fabsl(x+0.0L)-fabs(y+0); GET_LOW_WORD(low,a); if(((ha&0xfffff)|low)==0) w = a; GET_LOW_WORD(low,b); @@ -105,7 +106,7 @@ __ieee754_hypot(double x, double y) t1 = 0; SET_HIGH_WORD(t1,ha); t2 = a-t1; - w = sqrt(t1*t1-(b*(-b)-t2*(a+t1))); + w = std::sqrt(t1*t1-(b*(-b)-t2*(a+t1))); } else { a = a+a; y1 = 0; @@ -114,7 +115,7 @@ __ieee754_hypot(double x, double y) t1 = 0; SET_HIGH_WORD(t1,ha+0x00100000); t2 = a - t1; - w = sqrt(t1*y1-(w*(-w)-(t1*y2+t2*b))); + w = std::sqrt(t1*y1-(w*(-w)-(t1*y2+t2*b))); } if(k!=0) { u_int32_t high; diff --git a/modules/fdlibm/src/e_pow.cpp b/modules/fdlibm/src/e_pow.cpp index 366e3933b..c18226b8a 100644 --- a/modules/fdlibm/src/e_pow.cpp +++ b/modules/fdlibm/src/e_pow.cpp @@ -4,7 +4,7 @@ * Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved. * * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice + * software is freely granted, provided that this notice * is preserved. * ==================================================== */ @@ -19,7 +19,7 @@ * 1. Compute and return log2(x) in two pieces: * log2(x) = w1 + w2, * where w1 has 53-24 = 29 bit trailing zeros. - * 2. Perform y*log2(x) = n+y' by simulating multi-precision + * 2. Perform y*log2(x) = n+y' by simulating multi-precision * arithmetic, where |y'|<=0.5. * 3. Return x**y = 2**n*exp(y'*log2) * @@ -47,16 +47,19 @@ * Accuracy: * pow(x,y) returns x**y nearly rounded. In particular * pow(integer,integer) - * always returns the correct integer provided it is + * always returns the correct integer provided it is * representable. * * Constants : - * The hexadecimal values are the intended ones for the following - * constants. The decimal values may be used, provided that the - * compiler will convert from decimal to binary accurately enough + * The hexadecimal values are the intended ones for the following + * constants. The decimal values may be used, provided that the + * compiler will convert from decimal to binary accurately enough * to produce the hexadecimal values shown. */ +#include <cmath> + +#include <float.h> #include "math_private.h" static const double @@ -64,6 +67,9 @@ bp[] = {1.0, 1.5,}, dp_h[] = { 0.0, 5.84962487220764160156e-01,}, /* 0x3FE2B803, 0x40000000 */ dp_l[] = { 0.0, 1.35003920212974897128e-08,}, /* 0x3E4CFDEB, 0x43CFD006 */ zero = 0.0, +half = 0.5, +qrtr = 0.25, +thrd = 3.3333333333333331e-01, /* 0x3fd55555, 0x55555555 */ one = 1.0, two = 2.0, two53 = 9007199254740992.0, /* 0x43400000, 0x00000000 */ @@ -106,15 +112,15 @@ __ieee754_pow(double x, double y) ix = hx&0x7fffffff; iy = hy&0x7fffffff; /* y==zero: x**0 = 1 */ - if((iy|ly)==0) return one; + if((iy|ly)==0) return one; /* x==1: 1**y = 1, even if y is NaN */ if (hx==0x3ff00000 && lx == 0) return one; /* y!=zero: result is NaN if either arg is NaN */ if(ix > 0x7ff00000 || ((ix==0x7ff00000)&&(lx!=0)) || - iy > 0x7ff00000 || ((iy==0x7ff00000)&&(ly!=0))) - return (x+0.0)+(y+0.0); + iy > 0x7ff00000 || ((iy==0x7ff00000)&&(ly!=0))) + return nan_mix(x, y); /* determine if y is an odd int when x < 0 * yisint = 0 ... y is not an integer @@ -122,22 +128,22 @@ __ieee754_pow(double x, double y) * yisint = 2 ... y is an even int */ yisint = 0; - if(hx<0) { + if(hx<0) { if(iy>=0x43400000) yisint = 2; /* even integer y */ else if(iy>=0x3ff00000) { k = (iy>>20)-0x3ff; /* exponent */ if(k>20) { j = ly>>(52-k); - if((j<<(52-k))==ly) yisint = 2-(j&1); + if(((u_int32_t)j<<(52-k))==ly) yisint = 2-(j&1); } else if(ly==0) { j = iy>>(20-k); if((j<<(20-k))==iy) yisint = 2-(j&1); } - } - } + } + } /* special value of y */ - if(ly==0) { + if(ly==0) { if (iy==0x7ff00000) { /* y is +-inf */ if(((ix-0x3ff00000)|lx)==0) return one; /* (-1)**+-inf is 1 */ @@ -145,14 +151,14 @@ __ieee754_pow(double x, double y) return (hy>=0)? y: zero; else /* (|x|<1)**-,+inf = inf,0 */ return (hy<0)?-y: zero; - } + } if(iy==0x3ff00000) { /* y is +-1 */ if(hy<0) return one/x; else return x; } if(hy==0x40000000) return x*x; /* y is 2 */ if(hy==0x3fe00000) { /* y is 0.5 */ if(hx>=0) /* x >= +0 */ - return sqrt(x); + return std::sqrt(x); } } @@ -165,13 +171,13 @@ __ieee754_pow(double x, double y) if(hx<0) { if(((ix-0x3ff00000)|yisint)==0) { z = (z-z)/(z-z); /* (-1)**non-int is NaN */ - } else if(yisint==1) + } else if(yisint==1) z = -z; /* (x<0)**odd = -(|x|**odd) */ } return z; } } - + /* CYGNUS LOCAL + fdlibm-5.3 fix: This used to be n = (hx>>31)+1; but ANSI C says a right shift of a signed negative quantity is @@ -193,10 +199,10 @@ __ieee754_pow(double x, double y) /* over/underflow if x is not close to one */ if(ix<0x3fefffff) return (hy<0)? s*huge*huge:s*tiny*tiny; if(ix>0x3ff00000) return (hy>0)? s*huge*huge:s*tiny*tiny; - /* now |1-x| is tiny <= 2**-20, suffice to compute + /* now |1-x| is tiny <= 2**-20, suffice to compute log(x) by x-x^2/2+x^3/3-x^4/4 */ t = ax-one; /* t has 20 trailing zeros */ - w = (t*t)*(0.5-t*(0.3333333333333333333333-t*0.25)); + w = (t*t)*(half-t*(thrd-t*qrtr)); u = ivln2_h*t; /* ivln2_h has 21 sig. bits */ v = t*ivln2_l-w*ivln2; t1 = u+v; @@ -233,9 +239,9 @@ __ieee754_pow(double x, double y) r = s2*s2*(L1+s2*(L2+s2*(L3+s2*(L4+s2*(L5+s2*L6))))); r += s_l*(s_h+ss); s2 = s_h*s_h; - t_h = 3.0+s2+r; + t_h = 3+s2+r; SET_LOW_WORD(t_h,0); - t_l = r-((t_h-3.0)-s2); + t_l = r-((t_h-3)-s2); /* u+v = ss*(1+...) */ u = s_h*t_h; v = s_l*t_h+t_l*ss; @@ -246,7 +252,7 @@ __ieee754_pow(double x, double y) z_h = cp_h*p_h; /* cp_h+cp_l = 2/(3*log2) */ z_l = cp_l*p_h+p_l*cp+dp_l[k]; /* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */ - t = (double)n; + t = n; t1 = (((z_h+z_l)+dp_h[k])+t); SET_LOW_WORD(t1,0); t2 = z_l-(((t1-t)-dp_h[k])-z_h); @@ -286,7 +292,7 @@ __ieee754_pow(double x, double y) n = ((n&0x000fffff)|0x00100000)>>(20-k); if(j<0) n = -n; p_h -= t; - } + } t = p_l+p_h; SET_LOW_WORD(t,0); u = t*lg2_h; diff --git a/modules/fdlibm/src/e_sqrt.cpp b/modules/fdlibm/src/e_sqrt.cpp deleted file mode 100644 index 681505390..000000000 --- a/modules/fdlibm/src/e_sqrt.cpp +++ /dev/null @@ -1,446 +0,0 @@ - -/* @(#)e_sqrt.c 1.3 95/01/18 */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunSoft, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -//#include <sys/cdefs.h> -//__FBSDID("$FreeBSD$"); - -/* __ieee754_sqrt(x) - * Return correctly rounded sqrt. - * ------------------------------------------ - * | Use the hardware sqrt if you have one | - * ------------------------------------------ - * Method: - * Bit by bit method using integer arithmetic. (Slow, but portable) - * 1. Normalization - * Scale x to y in [1,4) with even powers of 2: - * find an integer k such that 1 <= (y=x*2^(2k)) < 4, then - * sqrt(x) = 2^k * sqrt(y) - * 2. Bit by bit computation - * Let q = sqrt(y) truncated to i bit after binary point (q = 1), - * i 0 - * i+1 2 - * s = 2*q , and y = 2 * ( y - q ). (1) - * i i i i - * - * To compute q from q , one checks whether - * i+1 i - * - * -(i+1) 2 - * (q + 2 ) <= y. (2) - * i - * -(i+1) - * If (2) is false, then q = q ; otherwise q = q + 2 . - * i+1 i i+1 i - * - * With some algebric manipulation, it is not difficult to see - * that (2) is equivalent to - * -(i+1) - * s + 2 <= y (3) - * i i - * - * The advantage of (3) is that s and y can be computed by - * i i - * the following recurrence formula: - * if (3) is false - * - * s = s , y = y ; (4) - * i+1 i i+1 i - * - * otherwise, - * -i -(i+1) - * s = s + 2 , y = y - s - 2 (5) - * i+1 i i+1 i i - * - * One may easily use induction to prove (4) and (5). - * Note. Since the left hand side of (3) contain only i+2 bits, - * it does not necessary to do a full (53-bit) comparison - * in (3). - * 3. Final rounding - * After generating the 53 bits result, we compute one more bit. - * Together with the remainder, we can decide whether the - * result is exact, bigger than 1/2ulp, or less than 1/2ulp - * (it will never equal to 1/2ulp). - * The rounding mode can be detected by checking whether - * huge + tiny is equal to huge, and whether huge - tiny is - * equal to huge for some floating point number "huge" and "tiny". - * - * Special cases: - * sqrt(+-0) = +-0 ... exact - * sqrt(inf) = inf - * sqrt(-ve) = NaN ... with invalid signal - * sqrt(NaN) = NaN ... with invalid signal for signaling NaN - * - * Other methods : see the appended file at the end of the program below. - *--------------- - */ - -#include <float.h> - -#include "math_private.h" - -static const double one = 1.0, tiny=1.0e-300; - -double -__ieee754_sqrt(double x) -{ - double z; - int32_t sign = (int)0x80000000; - int32_t ix0,s0,q,m,t,i; - u_int32_t r,t1,s1,ix1,q1; - - EXTRACT_WORDS(ix0,ix1,x); - - /* take care of Inf and NaN */ - if((ix0&0x7ff00000)==0x7ff00000) { - return x*x+x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf - sqrt(-inf)=sNaN */ - } - /* take care of zero */ - if(ix0<=0) { - if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */ - else if(ix0<0) - return (x-x)/(x-x); /* sqrt(-ve) = sNaN */ - } - /* normalize x */ - m = (ix0>>20); - if(m==0) { /* subnormal x */ - while(ix0==0) { - m -= 21; - ix0 |= (ix1>>11); ix1 <<= 21; - } - for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1; - m -= i-1; - ix0 |= (ix1>>(32-i)); - ix1 <<= i; - } - m -= 1023; /* unbias exponent */ - ix0 = (ix0&0x000fffff)|0x00100000; - if(m&1){ /* odd m, double x to make it even */ - ix0 += ix0 + ((ix1&sign)>>31); - ix1 += ix1; - } - m >>= 1; /* m = [m/2] */ - - /* generate sqrt(x) bit by bit */ - ix0 += ix0 + ((ix1&sign)>>31); - ix1 += ix1; - q = q1 = s0 = s1 = 0; /* [q,q1] = sqrt(x) */ - r = 0x00200000; /* r = moving bit from right to left */ - - while(r!=0) { - t = s0+r; - if(t<=ix0) { - s0 = t+r; - ix0 -= t; - q += r; - } - ix0 += ix0 + ((ix1&sign)>>31); - ix1 += ix1; - r>>=1; - } - - r = sign; - while(r!=0) { - t1 = s1+r; - t = s0; - if((t<ix0)||((t==ix0)&&(t1<=ix1))) { - s1 = t1+r; - if(((t1&sign)==sign)&&(s1&sign)==0) s0 += 1; - ix0 -= t; - if (ix1 < t1) ix0 -= 1; - ix1 -= t1; - q1 += r; - } - ix0 += ix0 + ((ix1&sign)>>31); - ix1 += ix1; - r>>=1; - } - - /* use floating add to find out rounding direction */ - if((ix0|ix1)!=0) { - z = one-tiny; /* trigger inexact flag */ - if (z>=one) { - z = one+tiny; - if (q1==(u_int32_t)0xffffffff) { q1=0; q += 1;} - else if (z>one) { - if (q1==(u_int32_t)0xfffffffe) q+=1; - q1+=2; - } else - q1 += (q1&1); - } - } - ix0 = (q>>1)+0x3fe00000; - ix1 = q1>>1; - if ((q&1)==1) ix1 |= sign; - ix0 += (m <<20); - INSERT_WORDS(z,ix0,ix1); - return z; -} - -/* -Other methods (use floating-point arithmetic) -------------- -(This is a copy of a drafted paper by Prof W. Kahan -and K.C. Ng, written in May, 1986) - - Two algorithms are given here to implement sqrt(x) - (IEEE double precision arithmetic) in software. - Both supply sqrt(x) correctly rounded. The first algorithm (in - Section A) uses newton iterations and involves four divisions. - The second one uses reciproot iterations to avoid division, but - requires more multiplications. Both algorithms need the ability - to chop results of arithmetic operations instead of round them, - and the INEXACT flag to indicate when an arithmetic operation - is executed exactly with no roundoff error, all part of the - standard (IEEE 754-1985). The ability to perform shift, add, - subtract and logical AND operations upon 32-bit words is needed - too, though not part of the standard. - -A. sqrt(x) by Newton Iteration - - (1) Initial approximation - - Let x0 and x1 be the leading and the trailing 32-bit words of - a floating point number x (in IEEE double format) respectively - - 1 11 52 ...widths - ------------------------------------------------------ - x: |s| e | f | - ------------------------------------------------------ - msb lsb msb lsb ...order - - - ------------------------ ------------------------ - x0: |s| e | f1 | x1: | f2 | - ------------------------ ------------------------ - - By performing shifts and subtracts on x0 and x1 (both regarded - as integers), we obtain an 8-bit approximation of sqrt(x) as - follows. - - k := (x0>>1) + 0x1ff80000; - y0 := k - T1[31&(k>>15)]. ... y ~ sqrt(x) to 8 bits - Here k is a 32-bit integer and T1[] is an integer array containing - correction terms. Now magically the floating value of y (y's - leading 32-bit word is y0, the value of its trailing word is 0) - approximates sqrt(x) to almost 8-bit. - - Value of T1: - static int T1[32]= { - 0, 1024, 3062, 5746, 9193, 13348, 18162, 23592, - 29598, 36145, 43202, 50740, 58733, 67158, 75992, 85215, - 83599, 71378, 60428, 50647, 41945, 34246, 27478, 21581, - 16499, 12183, 8588, 5674, 3403, 1742, 661, 130,}; - - (2) Iterative refinement - - Apply Heron's rule three times to y, we have y approximates - sqrt(x) to within 1 ulp (Unit in the Last Place): - - y := (y+x/y)/2 ... almost 17 sig. bits - y := (y+x/y)/2 ... almost 35 sig. bits - y := y-(y-x/y)/2 ... within 1 ulp - - - Remark 1. - Another way to improve y to within 1 ulp is: - - y := (y+x/y) ... almost 17 sig. bits to 2*sqrt(x) - y := y - 0x00100006 ... almost 18 sig. bits to sqrt(x) - - 2 - (x-y )*y - y := y + 2* ---------- ...within 1 ulp - 2 - 3y + x - - - This formula has one division fewer than the one above; however, - it requires more multiplications and additions. Also x must be - scaled in advance to avoid spurious overflow in evaluating the - expression 3y*y+x. Hence it is not recommended uless division - is slow. If division is very slow, then one should use the - reciproot algorithm given in section B. - - (3) Final adjustment - - By twiddling y's last bit it is possible to force y to be - correctly rounded according to the prevailing rounding mode - as follows. Let r and i be copies of the rounding mode and - inexact flag before entering the square root program. Also we - use the expression y+-ulp for the next representable floating - numbers (up and down) of y. Note that y+-ulp = either fixed - point y+-1, or multiply y by nextafter(1,+-inf) in chopped - mode. - - I := FALSE; ... reset INEXACT flag I - R := RZ; ... set rounding mode to round-toward-zero - z := x/y; ... chopped quotient, possibly inexact - If(not I) then { ... if the quotient is exact - if(z=y) { - I := i; ... restore inexact flag - R := r; ... restore rounded mode - return sqrt(x):=y. - } else { - z := z - ulp; ... special rounding - } - } - i := TRUE; ... sqrt(x) is inexact - If (r=RN) then z=z+ulp ... rounded-to-nearest - If (r=RP) then { ... round-toward-+inf - y = y+ulp; z=z+ulp; - } - y := y+z; ... chopped sum - y0:=y0-0x00100000; ... y := y/2 is correctly rounded. - I := i; ... restore inexact flag - R := r; ... restore rounded mode - return sqrt(x):=y. - - (4) Special cases - - Square root of +inf, +-0, or NaN is itself; - Square root of a negative number is NaN with invalid signal. - - -B. sqrt(x) by Reciproot Iteration - - (1) Initial approximation - - Let x0 and x1 be the leading and the trailing 32-bit words of - a floating point number x (in IEEE double format) respectively - (see section A). By performing shifs and subtracts on x0 and y0, - we obtain a 7.8-bit approximation of 1/sqrt(x) as follows. - - k := 0x5fe80000 - (x0>>1); - y0:= k - T2[63&(k>>14)]. ... y ~ 1/sqrt(x) to 7.8 bits - - Here k is a 32-bit integer and T2[] is an integer array - containing correction terms. Now magically the floating - value of y (y's leading 32-bit word is y0, the value of - its trailing word y1 is set to zero) approximates 1/sqrt(x) - to almost 7.8-bit. - - Value of T2: - static int T2[64]= { - 0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866, - 0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f, - 0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d, - 0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0, - 0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989, - 0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd, - 0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e, - 0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd,}; - - (2) Iterative refinement - - Apply Reciproot iteration three times to y and multiply the - result by x to get an approximation z that matches sqrt(x) - to about 1 ulp. To be exact, we will have - -1ulp < sqrt(x)-z<1.0625ulp. - - ... set rounding mode to Round-to-nearest - y := y*(1.5-0.5*x*y*y) ... almost 15 sig. bits to 1/sqrt(x) - y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x) - ... special arrangement for better accuracy - z := x*y ... 29 bits to sqrt(x), with z*y<1 - z := z + 0.5*z*(1-z*y) ... about 1 ulp to sqrt(x) - - Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that - (a) the term z*y in the final iteration is always less than 1; - (b) the error in the final result is biased upward so that - -1 ulp < sqrt(x) - z < 1.0625 ulp - instead of |sqrt(x)-z|<1.03125ulp. - - (3) Final adjustment - - By twiddling y's last bit it is possible to force y to be - correctly rounded according to the prevailing rounding mode - as follows. Let r and i be copies of the rounding mode and - inexact flag before entering the square root program. Also we - use the expression y+-ulp for the next representable floating - numbers (up and down) of y. Note that y+-ulp = either fixed - point y+-1, or multiply y by nextafter(1,+-inf) in chopped - mode. - - R := RZ; ... set rounding mode to round-toward-zero - switch(r) { - case RN: ... round-to-nearest - if(x<= z*(z-ulp)...chopped) z = z - ulp; else - if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp; - break; - case RZ:case RM: ... round-to-zero or round-to--inf - R:=RP; ... reset rounding mod to round-to-+inf - if(x<z*z ... rounded up) z = z - ulp; else - if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp; - break; - case RP: ... round-to-+inf - if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else - if(x>z*z ...chopped) z = z+ulp; - break; - } - - Remark 3. The above comparisons can be done in fixed point. For - example, to compare x and w=z*z chopped, it suffices to compare - x1 and w1 (the trailing parts of x and w), regarding them as - two's complement integers. - - ...Is z an exact square root? - To determine whether z is an exact square root of x, let z1 be the - trailing part of z, and also let x0 and x1 be the leading and - trailing parts of x. - - If ((z1&0x03ffffff)!=0) ... not exact if trailing 26 bits of z!=0 - I := 1; ... Raise Inexact flag: z is not exact - else { - j := 1 - [(x0>>20)&1] ... j = logb(x) mod 2 - k := z1 >> 26; ... get z's 25-th and 26-th - fraction bits - I := i or (k&j) or ((k&(j+j+1))!=(x1&3)); - } - R:= r ... restore rounded mode - return sqrt(x):=z. - - If multiplication is cheaper then the foregoing red tape, the - Inexact flag can be evaluated by - - I := i; - I := (z*z!=x) or I. - - Note that z*z can overwrite I; this value must be sensed if it is - True. - - Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be - zero. - - -------------------- - z1: | f2 | - -------------------- - bit 31 bit 0 - - Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd - or even of logb(x) have the following relations: - - ------------------------------------------------- - bit 27,26 of z1 bit 1,0 of x1 logb(x) - ------------------------------------------------- - 00 00 odd and even - 01 01 even - 10 10 odd - 10 00 even - 11 01 even - ------------------------------------------------- - - (4) Special cases (see (4) of Section A). - - */ - diff --git a/modules/fdlibm/src/fdlibm.h b/modules/fdlibm/src/fdlibm.h index 0ad215911..324e5d0b0 100644 --- a/modules/fdlibm/src/fdlibm.h +++ b/modules/fdlibm/src/fdlibm.h @@ -33,7 +33,6 @@ double log(double); double log10(double); double pow(double, double); -double sqrt(double); double fabs(double); double floor(double); diff --git a/modules/fdlibm/src/k_exp.cpp b/modules/fdlibm/src/k_exp.cpp index a0699fa4a..9394c8fd8 100644 --- a/modules/fdlibm/src/k_exp.cpp +++ b/modules/fdlibm/src/k_exp.cpp @@ -1,4 +1,6 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2011 David Schultz <das@FreeBSD.ORG> * All rights reserved. * diff --git a/modules/fdlibm/src/math_private.h b/modules/fdlibm/src/math_private.h index 86597e75f..d9ec44817 100644 --- a/modules/fdlibm/src/math_private.h +++ b/modules/fdlibm/src/math_private.h @@ -45,6 +45,47 @@ #define u_int64_t uint64_t #endif +/* A union which permits us to convert between a long double and + four 32 bit ints. */ + +#if MOZ_BIG_ENDIAN + +typedef union +{ + long double value; + struct { + u_int32_t mswhi; + u_int32_t mswlo; + u_int32_t lswhi; + u_int32_t lswlo; + } parts32; + struct { + u_int64_t msw; + u_int64_t lsw; + } parts64; +} ieee_quad_shape_type; + +#endif + +#if MOZ_LITTLE_ENDIAN + +typedef union +{ + long double value; + struct { + u_int32_t lswlo; + u_int32_t lswhi; + u_int32_t mswlo; + u_int32_t mswhi; + } parts32; + struct { + u_int64_t lsw; + u_int64_t msw; + } parts64; +} ieee_quad_shape_type; + +#endif + /* * A union which permits us to convert between a double and two 32 bit * ints. @@ -307,8 +348,9 @@ do { \ /* Support switching the mode to FP_PE if necessary. */ #if defined(__i386__) && !defined(NO_FPSETPREC) -#define ENTERI() \ - long double __retval; \ +#define ENTERI() ENTERIT(long double) +#define ENTERIT(returntype) \ + returntype __retval; \ fp_prec_t __oprec; \ \ if ((__oprec = fpgetprec()) != FP_PE) \ @@ -319,9 +361,22 @@ do { \ fpsetprec(__oprec); \ RETURNF(__retval); \ } while (0) +#define ENTERV() \ + fp_prec_t __oprec; \ + \ + if ((__oprec = fpgetprec()) != FP_PE) \ + fpsetprec(FP_PE) +#define RETURNV() do { \ + if (__oprec != FP_PE) \ + fpsetprec(__oprec); \ + return; \ +} while (0) #else -#define ENTERI(x) +#define ENTERI() +#define ENTERIT(x) #define RETURNI(x) RETURNF(x) +#define ENTERV() +#define RETURNV() return #endif /* Default return statement if hack*_t() is not used. */ @@ -436,6 +491,31 @@ do { \ */ void _scan_nan(uint32_t *__words, int __num_words, const char *__s); +/* + * Mix 0, 1 or 2 NaNs. First add 0 to each arg. This normally just turns + * signaling NaNs into quiet NaNs by setting a quiet bit. We do this + * because we want to never return a signaling NaN, and also because we + * don't want the quiet bit to affect the result. Then mix the converted + * args using the specified operation. + * + * When one arg is NaN, the result is typically that arg quieted. When both + * args are NaNs, the result is typically the quietening of the arg whose + * mantissa is largest after quietening. When neither arg is NaN, the + * result may be NaN because it is indeterminate, or finite for subsequent + * construction of a NaN as the indeterminate 0.0L/0.0L. + * + * Technical complications: the result in bits after rounding to the final + * precision might depend on the runtime precision and/or on compiler + * optimizations, especially when different register sets are used for + * different precisions. Try to make the result not depend on at least the + * runtime precision by always doing the main mixing step in long double + * precision. Try to reduce dependencies on optimizations by adding the + * the 0's in different precisions (unless everything is in long double + * precision). + */ +#define nan_mix(x, y) (nan_mix_op((x), (y), +)) +#define nan_mix_op(x, y, op) (((x) + 0.0L) op ((y) + 0)) + #ifdef _COMPLEX_H /* @@ -511,48 +591,6 @@ CMPLXL(long double x, long double y) #endif /* _COMPLEX_H */ -#ifdef __GNUCLIKE_ASM - -/* Asm versions of some functions. */ - -#ifdef __amd64__ -static __inline int -irint(double x) -{ - int n; - - asm("cvtsd2si %1,%0" : "=r" (n) : "x" (x)); - return (n); -} -#define HAVE_EFFICIENT_IRINT -#endif - -#ifdef __i386__ -static __inline int -irint(double x) -{ - int n; - - asm("fistl %0" : "=m" (n) : "t" (x)); - return (n); -} -#define HAVE_EFFICIENT_IRINT -#endif - -#if defined(__amd64__) || defined(__i386__) -static __inline int -irintl(long double x) -{ - int n; - - asm("fistl %0" : "=m" (n) : "t" (x)); - return (n); -} -#define HAVE_EFFICIENT_IRINTL -#endif - -#endif /* __GNUCLIKE_ASM */ - #ifdef DEBUG #if defined(__amd64__) || defined(__i386__) #define breakpoint() asm("int $3") @@ -759,7 +797,6 @@ irintl(long double x) #define log fdlibm::log #define log10 fdlibm::log10 #define pow fdlibm::pow -#define sqrt fdlibm::sqrt #define ceil fdlibm::ceil #define ceilf fdlibm::ceilf #define fabs fdlibm::fabs diff --git a/modules/fdlibm/src/moz.build b/modules/fdlibm/src/moz.build index b197881ac..be5bf3d9b 100644 --- a/modules/fdlibm/src/moz.build +++ b/modules/fdlibm/src/moz.build @@ -10,26 +10,35 @@ EXPORTS += [ FINAL_LIBRARY = 'js' -if CONFIG['GNU_CXX']: +if CONFIG['CC_TYPE'] in ('clang', 'gcc'): CXXFLAGS += [ '-Wno-parentheses', '-Wno-sign-compare', ] -if CONFIG['CLANG_CXX']: +if CONFIG['CC_TYPE'] == 'clang': CXXFLAGS += [ '-Wno-dangling-else', ] -if CONFIG['_MSC_VER']: +if CONFIG['CC_TYPE'] in ('msvc', 'clang-cl'): CXXFLAGS += [ - '-wd4018', # signed/unsigned mismatch '-wd4146', # unary minus operator applied to unsigned type '-wd4305', # truncation from 'double' to 'const float' '-wd4723', # potential divide by 0 '-wd4756', # overflow in constant arithmetic ] +if CONFIG['CC_TYPE'] == 'msvc': + CXXFLAGS += [ + '-wd4018', # signed/unsigned mismatch + ] + +if CONFIG['CC_TYPE'] == 'clang-cl': + CXXFLAGS += [ + '-Wno-sign-compare', # signed/unsigned mismatch + ] + SOURCES += [ 'e_acos.cpp', 'e_acosh.cpp', @@ -44,7 +53,6 @@ SOURCES += [ 'e_log2.cpp', 'e_pow.cpp', 'e_sinh.cpp', - 'e_sqrt.cpp', 'k_exp.cpp', 's_asinh.cpp', 's_atan.cpp', diff --git a/modules/fdlibm/src/s_asinh.cpp b/modules/fdlibm/src/s_asinh.cpp index 400fd89c1..7ecc396bb 100644 --- a/modules/fdlibm/src/s_asinh.cpp +++ b/modules/fdlibm/src/s_asinh.cpp @@ -24,6 +24,7 @@ * := sign(x)*log1p(|x| + x^2/(1 + sqrt(1+x^2))) */ +#include <cmath> #include <float.h> #include "math_private.h" @@ -48,10 +49,10 @@ asinh(double x) w = __ieee754_log(fabs(x))+ln2; } else if (ix>0x40000000) { /* 2**28 > |x| > 2.0 */ t = fabs(x); - w = __ieee754_log(2.0*t+one/(__ieee754_sqrt(x*x+one)+t)); + w = __ieee754_log(2.0*t+one/(std::sqrt(x*x+one)+t)); } else { /* 2.0 > |x| > 2**-28 */ t = x*x; - w =log1p(fabs(x)+t/(one+__ieee754_sqrt(one+t))); + w =log1p(fabs(x)+t/(one+std::sqrt(one+t))); } if(hx>0) return w; else return -w; } diff --git a/modules/fdlibm/src/s_cbrt.cpp b/modules/fdlibm/src/s_cbrt.cpp index a2de24af7..fe3747e81 100644 --- a/modules/fdlibm/src/s_cbrt.cpp +++ b/modules/fdlibm/src/s_cbrt.cpp @@ -15,6 +15,7 @@ //#include <sys/cdefs.h> //__FBSDID("$FreeBSD$"); +#include <float.h> #include "math_private.h" /* cbrt(x) diff --git a/modules/fdlibm/src/s_expm1.cpp b/modules/fdlibm/src/s_expm1.cpp index 4c19485de..90ebc1698 100644 --- a/modules/fdlibm/src/s_expm1.cpp +++ b/modules/fdlibm/src/s_expm1.cpp @@ -187,7 +187,7 @@ expm1(double x) e = hxs*((r1-t)/(6.0 - x*t)); if(k==0) return x - (x*e-hxs); /* c is 0 */ else { - INSERT_WORDS(twopk,0x3ff00000+(k<<20),0); /* 2^k */ + INSERT_WORDS(twopk,((u_int32_t)(0x3ff+k))<<20,0); /* 2^k */ e = (x*(e-c)-c); e -= hxs; if(k== -1) return 0.5*(x-e)-0.5; diff --git a/modules/fdlibm/src/s_fabs.cpp b/modules/fdlibm/src/s_fabs.cpp index 3bea0478a..6ca84d71b 100644 --- a/modules/fdlibm/src/s_fabs.cpp +++ b/modules/fdlibm/src/s_fabs.cpp @@ -1,4 +1,4 @@ - /* @(#)s_fabs.c 5.1 93/09/24 */ +/* @(#)s_fabs.c 5.1 93/09/24 */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. @@ -10,9 +10,8 @@ * ==================================================== */ -#ifndef lint - //static char rcsid[] = "$FreeBSD$"; -#endif +//#include <sys/cdefs.h> +//__FBSDID("$FreeBSD$"); /* * fabs(x) returns the absolute value of x. diff --git a/modules/fdlibm/src/s_nearbyint.cpp b/modules/fdlibm/src/s_nearbyint.cpp index 532bb5d8d..6c04212d3 100644 --- a/modules/fdlibm/src/s_nearbyint.cpp +++ b/modules/fdlibm/src/s_nearbyint.cpp @@ -1,4 +1,6 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2004 David Schultz <das@FreeBSD.ORG> * All rights reserved. * diff --git a/modules/fdlibm/src/s_scalbn.cpp b/modules/fdlibm/src/s_scalbn.cpp index 5dbf58c23..dfbcf5c57 100644 --- a/modules/fdlibm/src/s_scalbn.cpp +++ b/modules/fdlibm/src/s_scalbn.cpp @@ -10,9 +10,8 @@ * ==================================================== */ -#ifndef lint -//static char rcsid[] = "$FreeBSD$"; -#endif +//#include <sys/cdefs.h> +//__FBSDID("$FreeBSD$"); /* * scalbn (double x, int n) @@ -21,7 +20,6 @@ * exponentiation or a multiplication. */ -//#include <sys/cdefs.h> #include <float.h> #include "math_private.h" @@ -50,10 +48,12 @@ scalbn (double x, int n) if (k > 0x7fe) return huge*copysign(huge,x); /* overflow */ if (k > 0) /* normal result */ {SET_HIGH_WORD(x,(hx&0x800fffff)|(k<<20)); return x;} - if (k <= -54) + if (k <= -54) { if (n > 50000) /* in case integer overflow in n+k */ return huge*copysign(huge,x); /*overflow*/ - else return tiny*copysign(tiny,x); /*underflow*/ + else + return tiny*copysign(tiny,x); /*underflow*/ + } k += 54; /* subnormal result */ SET_HIGH_WORD(x,(hx&0x800fffff)|(k<<20)); return x*twom54; |