Open64 (mfef90, whirl2f, and IR tools)
TAG: version-openad; SVN changeset: 916
|
00001 /* 00002 00003 Copyright (C) 2000, 2001 Silicon Graphics, Inc. All Rights Reserved. 00004 00005 This program is free software; you can redistribute it and/or modify it 00006 under the terms of version 2 of the GNU General Public License as 00007 published by the Free Software Foundation. 00008 00009 This program is distributed in the hope that it would be useful, but 00010 WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00012 00013 Further, this software is distributed without any warranty that it is 00014 free of the rightful claim of any third person regarding infringement 00015 or the like. Any license provided herein, whether implied or 00016 otherwise, applies only to this software file. Patent licenses, if 00017 any, provided herein do not apply to combinations of this program with 00018 other software, or any other product whatsoever. 00019 00020 You should have received a copy of the GNU General Public License along 00021 with this program; if not, write the Free Software Foundation, Inc., 59 00022 Temple Place - Suite 330, Boston MA 02111-1307, USA. 00023 00024 Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pky, 00025 Mountain View, CA 94043, or: 00026 00027 http://www.sgi.com 00028 00029 For further information regarding this notice, see: 00030 00031 http://oss.sgi.com/projects/GenInfo/NoticeExplan 00032 00033 */ 00034 00035 00036 /* ======================================================================= 00037 * ======================================================================= 00038 * 00039 * 00040 * ======================================================================= 00041 * ======================================================================= 00042 */ 00043 00044 00045 #include "defs.h" 00046 #include "quad.h" 00047 00048 /* note that this routine must be kept in sync with the corresponding libc 00049 * routine, q_div. 00050 */ 00051 00052 typedef union 00053 { 00054 struct 00055 { 00056 UINT32 hi; 00057 UINT32 lo; 00058 } word; 00059 00060 double d; 00061 } du; 00062 00063 00064 /* const1 is 1.0 + 2^(53 - 53/2), i.e. 1.0 + 2^27 */ 00065 00066 static const du const1 = 00067 {0x41a00000, 0x02000000}; 00068 00069 static const du twopm968 = 00070 {0x03700000, 0x00000000}; 00071 00072 static const du twopm54 = 00073 {0x3c900000, 0x00000000}; 00074 00075 static const du twop52 = 00076 {0x43300000, 0x00000000}; 00077 00078 static const du inf = 00079 {0x7ff00000, 0x00000000}; 00080 00081 extern QUAD c_q_div(QUAD, QUAD, INT *); 00082 00083 #pragma weak c_q_div = __c_q_div 00084 #define c_q_div __c_q_div 00085 00086 double fabs(double); 00087 #pragma intrinsic (fabs) 00088 00089 #define EXPBIAS 0x3ff 00090 00091 /* computes the quotient of two long doubles */ 00092 00093 QUAD 00094 c_q_div(QUAD x, QUAD y, INT *p_err ) 00095 { 00096 double xhi, xlo, yhi, ylo; 00097 INT32 n; 00098 INT64 ixhi, iyhi; 00099 INT64 xptxhi, xptyhi; 00100 INT64 ix, iy, iz; 00101 double c, cc, w, ww; 00102 double quarterulp; 00103 double xfactor, yfactor; 00104 double p, hc, tc; 00105 double hyhi, tyhi; 00106 double z, zz; 00107 QUAD u; 00108 QUAD result; 00109 00110 /* adapted from T. J. Dekker's div2 subroutine */ 00111 00112 #ifdef QUAD_DEBUG 00113 printf("c_q_div: xhi = %08x%08x\n", *(INT32 *)&xhi, *((INT32 *)&xhi + 1)); 00114 printf("c_q_div: xlo = %08x%08x\n", *(INT32 *)&xlo, *((INT32 *)&xlo + 1)); 00115 printf("c_q_div: yhi = %08x%08x\n", *(INT32 *)&yhi, *((INT32 *)&yhi + 1)); 00116 printf("c_q_div: ylo = %08x%08x\n", *(INT32 *)&ylo, *((INT32 *)&ylo + 1)); 00117 #endif 00118 00119 *p_err = 0; 00120 00121 xhi = x.hi; xlo = x.lo; 00122 yhi = y.hi; ylo = y.lo; 00123 00124 /* extract exponents of y and x for some quick screening */ 00125 00126 DBL2LL(yhi, iyhi); 00127 xptyhi = (iyhi >> DMANTWIDTH); 00128 xptyhi &= 0x7ff; 00129 00130 DBL2LL(xhi, ixhi); 00131 xptxhi = (ixhi >> DMANTWIDTH); 00132 xptxhi &= 0x7ff; 00133 00134 /* Avoid underflows and overflows in forming the products 00135 x*const1.d, y*const1.d, x*y, and hx*hy by scaling if 00136 necessary. x and y are also be scaled if tx*ty is 00137 a denormal. 00138 */ 00139 00140 if ( (0x30d < xptxhi) && (xptxhi < 0x4f1) && 00141 (0x30d < xptyhi) && (xptyhi < 0x4f1) 00142 ) 00143 { 00144 /* normal case */ 00145 00146 c = xhi/yhi; 00147 /* 00148 u.ld = __qprod(c, yhi); 00149 */ 00150 p = c*const1.d; 00151 00152 hc = (c - p) + p; 00153 tc = c - hc; 00154 00155 p = yhi*const1.d; 00156 00157 hyhi = (yhi - p) + p; 00158 tyhi = yhi - hyhi; 00159 00160 u.hi = c*yhi; 00161 u.lo = (((hc*hyhi - u.hi) + hc*tyhi) + hyhi*tc) + tc*tyhi; 00162 00163 cc = ((xhi - u.hi - u.lo + xlo) - c*ylo)/yhi; 00164 00165 z = c + cc; 00166 zz = (c - z) + cc; 00167 00168 #ifdef QUAD_DEBUG 00169 printf("c_q_div: z = %08x%08x\n", *(INT32 *)&z, *((INT32 *)&z + 1)); 00170 printf("c_q_div: zz = %08x%08x\n", *(INT32 *)&zz, *((INT32 *)&zz + 1)); 00171 #endif 00172 00173 /* if necessary, round zz so that the sum of z and zz has 00174 at most 107 significant bits 00175 */ 00176 00177 if ( fabs(z) >= twopm968.d ) 00178 { 00179 /* determine true exponent of z + zz as a 107 bit number */ 00180 00181 DBL2LL(z, iz); 00182 iz >>= DMANTWIDTH; 00183 iz <<= DMANTWIDTH; 00184 LL2DBL(iz, w); /* w = dtwofloor(z) */ 00185 00186 if ( (z == w) && ((z > 0.0 && zz < 0.0) || 00187 (z < 0.0 && zz > 0.0)) 00188 ) 00189 w *= 0.5; 00190 00191 /* round zz if it's less than 1/4 ulp of w */ 00192 00193 quarterulp = twopm54.d*fabs(w); 00194 00195 if ( fabs(zz) < quarterulp ) 00196 { 00197 if ( zz >= 0.0 ) 00198 { 00199 zz = (quarterulp + zz) - quarterulp; 00200 } 00201 else 00202 { 00203 zz = quarterulp + (zz - quarterulp); 00204 } 00205 00206 w = z + zz; 00207 ww = (z - w) + zz; 00208 00209 z = w; 00210 zz = ww; 00211 } 00212 } 00213 00214 result.hi = z; 00215 result.lo = zz; 00216 00217 #ifdef QUAD_DEBUG 00218 printf("c_q_div: result.hi = %08x%08x\n", *(INT32 *)&result.hi, *((INT32 *)&result.hi + 1)); 00219 printf("c_q_div: result.lo = %08x%08x\n", *(INT32 *)&result.lo, *((INT32 *)&result.lo + 1)); 00220 #endif 00221 00222 return ( result ); 00223 } 00224 00225 if ( (xptxhi < 0x7ff) && (xptyhi < 0x7ff) ) 00226 { 00227 if ( (xhi == 0.0) || (yhi == 0.0) ) 00228 { 00229 result.hi = xhi/yhi; 00230 result.lo = 0.0; 00231 00232 #ifdef QUAD_DEBUG 00233 printf("c_q_div: result.hi = %08x%08x\n", *(INT32 *)&result.hi, *((INT32 *)&result.hi + 1)); 00234 printf("c_q_div: result.lo = %08x%08x\n", *(INT32 *)&result.lo, *((INT32 *)&result.lo + 1)); 00235 #endif 00236 00237 return ( result ); 00238 } 00239 00240 xfactor = 1.0; 00241 yfactor = 1.0; 00242 00243 /* Scale x and y appropriately and use previous algorithm. 00244 Then unscale result. 00245 */ 00246 00247 if ( xptxhi <= 0x30d ) 00248 { 00249 xhi *= twop52.d; /* first, make sure x is normal */ 00250 xlo *= twop52.d; 00251 00252 /* now scale x so that its exponent is 0x30e */ 00253 00254 DBL2LL(xhi, ix); 00255 ix >>= DMANTWIDTH; 00256 n = (ix & 0x7ff); 00257 00258 ix = (0x30e - n) + EXPBIAS; 00259 ix <<= DMANTWIDTH; 00260 LL2DBL(ix, c); 00261 xhi *= c; 00262 xlo *= c; 00263 ix = (n - 52 - 0x30e) + EXPBIAS; 00264 ix <<= DMANTWIDTH; 00265 LL2DBL(ix, xfactor); 00266 } 00267 00268 if ( xptyhi <= 0x30d ) 00269 { 00270 yhi *= twop52.d; /* first, make sure y is normal */ 00271 ylo *= twop52.d; 00272 00273 /* now scale y so that its exponent is 0x30e */ 00274 00275 DBL2LL(yhi, iy); 00276 iy >>= DMANTWIDTH; 00277 n = (iy & 0x7ff); 00278 00279 iy = (0x30e - n) + EXPBIAS; 00280 iy <<= DMANTWIDTH; 00281 LL2DBL(iy, c); 00282 yhi *= c; 00283 ylo *= c; 00284 iy = (0x30e - n + 52) + EXPBIAS; 00285 iy <<= DMANTWIDTH; 00286 LL2DBL(iy, yfactor); 00287 } 00288 00289 if ( xptxhi >= 0x4f1 ) 00290 { 00291 /* scale x so that its exponent is 0x4f1 */ 00292 00293 DBL2LL(xhi, ix); 00294 ix >>= DMANTWIDTH; 00295 n = (ix & 0x7ff); 00296 00297 ix = (0x4f1 - n) + EXPBIAS; 00298 ix <<= DMANTWIDTH; 00299 LL2DBL(ix, c); 00300 xhi *= c; 00301 xlo *= c; 00302 ix = (n - 0x4f1) + EXPBIAS; 00303 ix <<= DMANTWIDTH; 00304 LL2DBL(ix, xfactor); 00305 } 00306 00307 if ( xptyhi >= 0x4f1 ) 00308 { 00309 /* scale y so that its exponent is 0x4f1 */ 00310 00311 DBL2LL(yhi, iy); 00312 iy >>= DMANTWIDTH; 00313 n = (iy & 0x7ff); 00314 00315 iy = (0x4f1 - n) + EXPBIAS; 00316 iy <<= DMANTWIDTH; 00317 LL2DBL(iy, c); 00318 yhi *= c; 00319 ylo *= c; 00320 iy = (0x4f1 - n) + EXPBIAS; 00321 iy <<= DMANTWIDTH; 00322 LL2DBL(iy, yfactor); 00323 } 00324 00325 c = xhi/yhi; 00326 00327 /* 00328 u.ld = __qprod(c, yhi); 00329 */ 00330 p = c*const1.d; 00331 00332 hc = (c - p) + p; 00333 tc = c - hc; 00334 00335 p = yhi*const1.d; 00336 00337 hyhi = (yhi - p) + p; 00338 tyhi = yhi - hyhi; 00339 00340 u.hi = c*yhi; 00341 u.lo = (((hc*hyhi - u.hi) + hc*tyhi) + hyhi*tc) + tc*tyhi; 00342 00343 cc = ((xhi - u.hi - u.lo + xlo) - c*ylo)/yhi; 00344 00345 z = c + cc; 00346 zz = (c - z) + cc; 00347 00348 #ifdef QUAD_DEBUG 00349 printf("c_q_div: z = %08x%08x\n", *(INT32 *)&z, *((INT32 *)&z + 1)); 00350 printf("c_q_div: zz = %08x%08x\n", *(INT32 *)&zz, *((INT32 *)&zz + 1)); 00351 #endif 00352 00353 /* if necessary, round zz so that the sum of z and zz has 00354 at most 107 significant bits 00355 */ 00356 00357 if ( fabs(z) >= twopm968.d ) 00358 { 00359 /* determine true exponent of z + zz as a 107 bit number */ 00360 00361 DBL2LL(z, iz); 00362 iz >>= DMANTWIDTH; 00363 iz <<= DMANTWIDTH; 00364 LL2DBL(iz, w); /* w = dtwofloor(z) */ 00365 00366 if ( (z == w) && ((z > 0.0 && zz < 0.0) || 00367 (z < 0.0 && zz > 0.0)) 00368 ) 00369 w *= 0.5; 00370 00371 /* round zz if it's less than 1/4 ulp of w */ 00372 00373 quarterulp = twopm54.d*fabs(w); 00374 00375 if ( fabs(zz) < quarterulp ) 00376 { 00377 if ( zz >= 0.0 ) 00378 { 00379 zz = (quarterulp + zz) - quarterulp; 00380 } 00381 else 00382 { 00383 zz = quarterulp + (zz - quarterulp); 00384 } 00385 00386 w = z + zz; 00387 ww = (z - w) + zz; 00388 00389 z = w; 00390 zz = ww; 00391 } 00392 } 00393 00394 if ( ((xfactor <= 1.0) && (1.0 <= yfactor)) || 00395 ((yfactor <= 1.0) && (1.0 <= xfactor)) 00396 ) 00397 { 00398 z = z*(xfactor*yfactor); 00399 } 00400 else 00401 { 00402 z *= xfactor; 00403 z *= yfactor; 00404 } 00405 00406 if ( (z == 0.0) || (fabs(z) == inf.d) ) 00407 { 00408 result.hi = z; 00409 result.lo = 0.0; 00410 00411 #ifdef QUAD_DEBUG 00412 printf("c_q_div: result.hi = %08x%08x\n", *(INT32 *)&result.hi, *((INT32 *)&result.hi + 1)); 00413 printf("c_q_div: result.lo = %08x%08x\n", *(INT32 *)&result.lo, *((INT32 *)&result.lo + 1)); 00414 #endif 00415 00416 return ( result ); 00417 } 00418 00419 if ( ((xfactor <= 1.0) && (1.0 <= yfactor)) || 00420 ((yfactor <= 1.0) && (1.0 <= xfactor)) 00421 ) 00422 { 00423 zz = zz*(xfactor*yfactor); 00424 } 00425 else 00426 { 00427 zz *= xfactor; 00428 zz *= yfactor; 00429 } 00430 00431 result.hi = z + zz; 00432 result.lo = (z - result.hi) + zz; 00433 00434 #ifdef QUAD_DEBUG 00435 printf("c_q_div: result.hi = %08x%08x\n", *(INT32 *)&result.hi, *((INT32 *)&result.hi + 1)); 00436 printf("c_q_div: result.lo = %08x%08x\n", *(INT32 *)&result.lo, *((INT32 *)&result.lo + 1)); 00437 #endif 00438 00439 return ( result ); 00440 } 00441 00442 result.hi = xhi/yhi; 00443 result.lo = 0.0; 00444 00445 #ifdef QUAD_DEBUG 00446 printf("c_q_div: result.hi = %08x%08x\n", *(INT32 *)&result.hi, *((INT32 *)&result.hi + 1)); 00447 printf("c_q_div: result.lo = %08x%08x\n", *(INT32 *)&result.lo, *((INT32 *)&result.lo + 1)); 00448 #endif 00449 00450 return ( result ); 00451 } 00452