Open64 (mfef90, whirl2f, and IR tools)
TAG: version-openad; SVN changeset: 916
|
00001 /* 00002 00003 Copyright (C) 2000, 2001 Silicon Graphics, Inc. All Rights Reserved. 00004 00005 This program is free software; you can redistribute it and/or modify it 00006 under the terms of version 2 of the GNU General Public License as 00007 published by the Free Software Foundation. 00008 00009 This program is distributed in the hope that it would be useful, but 00010 WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00012 00013 Further, this software is distributed without any warranty that it is 00014 free of the rightful claim of any third person regarding infringement 00015 or the like. Any license provided herein, whether implied or 00016 otherwise, applies only to this software file. Patent licenses, if 00017 any, provided herein do not apply to combinations of this program with 00018 other software, or any other product whatsoever. 00019 00020 You should have received a copy of the GNU General Public License along 00021 with this program; if not, write the Free Software Foundation, Inc., 59 00022 Temple Place - Suite 330, Boston MA 02111-1307, USA. 00023 00024 Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pky, 00025 Mountain View, CA 94043, or: 00026 00027 http://www.sgi.com 00028 00029 For further information regarding this notice, see: 00030 00031 http://oss.sgi.com/projects/GenInfo/NoticeExplan 00032 00033 */ 00034 00035 00036 /* ======================================================================= 00037 * ======================================================================= 00038 * 00039 * 00040 * ======================================================================= 00041 * ======================================================================= 00042 */ 00043 00044 00045 #include "defs.h" 00046 #include "quad.h" 00047 00048 /* note that this routine must be kept in sync with the corresponding libc 00049 * routine, q_mul. 00050 */ 00051 00052 typedef union 00053 { 00054 struct 00055 { 00056 UINT32 hi; 00057 UINT32 lo; 00058 } word; 00059 00060 double d; 00061 } du; 00062 00063 00064 /* const1 is 1.0 + 2^(53 - 53/2), i.e. 1.0 + 2^27 */ 00065 00066 static const du const1 = 00067 {0x41a00000, 0x02000000}; 00068 00069 static const du twop590 = 00070 {0x64d00000, 0x00000000}; 00071 00072 static const du twopm590 = 00073 {0x1b100000, 0x00000000}; 00074 00075 static const du inf = 00076 {0x7ff00000, 0x00000000}; 00077 00078 #define NO 0 00079 #define YES 1 00080 00081 extern QUAD c_q_mul(QUAD, QUAD, INT *); 00082 00083 #pragma weak c_q_mul = __c_q_mul 00084 #define c_q_mul __c_q_mul 00085 00086 double fabs(double); 00087 #pragma intrinsic (fabs) 00088 00089 QUAD 00090 c_q_mul(QUAD x, QUAD y, INT *p_err ) 00091 { 00092 INT32 ixhi, iyhi; 00093 INT32 xptxhi, xptyhi; 00094 INT32 xpthi, xptlo; 00095 INT32 scaleup, scaledown; 00096 INT64 iz, iw, iqulp; 00097 double xhi, xlo, yhi, ylo; 00098 double xfactor, yfactor; 00099 double hx, tx, hy, ty; 00100 double p, q; 00101 double w, ww; 00102 double qulp; 00103 QUAD z, u; 00104 double rem; 00105 double a, v, vv; 00106 00107 /* Avoid underflows and overflows in forming the products 00108 xhi*const1.d, yhi*const1.d, xhi*yhi, and hx*hy by scaling if 00109 necessary. x and y should also be scaled if tx*ty is 00110 a denormal. 00111 */ 00112 00113 *p_err = 0; 00114 00115 xhi = x.hi; xlo = x.lo; 00116 yhi = y.hi; ylo = y.lo; 00117 00118 #ifdef QUAD_DEBUG 00119 printf("c_q_mul: xhi = %08x%08x\n", *(INT32 *)&xhi, *((INT32 *)&xhi + 1)); 00120 printf("c_q_mul: xlo = %08x%08x\n", *(INT32 *)&xlo, *((INT32 *)&xlo + 1)); 00121 printf("c_q_mul: yhi = %08x%08x\n", *(INT32 *)&yhi, *((INT32 *)&yhi + 1)); 00122 printf("c_q_mul: ylo = %08x%08x\n", *(INT32 *)&ylo, *((INT32 *)&ylo + 1)); 00123 #endif 00124 00125 00126 iyhi = *(INT32 *)&yhi; 00127 xptyhi = (iyhi >> 20); 00128 xptyhi &= 0x7ff; 00129 00130 ixhi = *(INT32 *)&xhi; 00131 xptxhi = (ixhi >> 20); 00132 xptxhi &= 0x7ff; 00133 00134 xpthi = xptxhi; 00135 xptlo = xptyhi; 00136 00137 if ( xptxhi < xptyhi ) 00138 { 00139 xptlo = xptxhi; 00140 xpthi = xptyhi; 00141 } 00142 00143 if ( (0x21a < xptlo) && (xpthi < 0x5fd) ) 00144 { 00145 /* normal case */ 00146 00147 /* Form the exact product of xhi and yhi using 00148 * Dekker's algorithm. 00149 */ 00150 00151 p = xhi*const1.d; 00152 00153 hx = (xhi - p) + p; 00154 tx = xhi - hx; 00155 00156 q = yhi*const1.d; 00157 00158 hy = (yhi - q) + q; 00159 ty = yhi - hy; 00160 00161 u.hi = xhi*yhi; 00162 u.lo = (((hx*hy - u.hi) + hx*ty) + hy*tx) + tx*ty; 00163 00164 /* Add the remaining pieces using the Kahan summation formula. */ 00165 00166 a = xhi*ylo; 00167 00168 ww = u.lo + a; 00169 rem = u.lo - ww + a; 00170 00171 v = u.hi + ww; 00172 vv = u.hi - v + ww; 00173 00174 a = xlo*yhi + rem; 00175 u.lo = vv + a; 00176 00177 rem = vv - u.lo + a; 00178 00179 w = v + u.lo; 00180 ww = v - w + u.lo; 00181 00182 a = xlo*ylo + rem; 00183 00184 vv = ww + a; 00185 00186 z.hi = w + vv; 00187 DBL2LL(z.hi, iz); 00188 z.lo = w - z.hi + vv; 00189 00190 /* if necessary, round z.lo so that the sum of z.hi and z.lo has at most 00191 107 significant bits 00192 */ 00193 00194 /* first, compute a quarter ulp of z */ 00195 00196 iw = (iz >> DMANTWIDTH); 00197 iqulp = (iw & 0x7ff); 00198 iqulp -= 54; 00199 iqulp <<= DMANTWIDTH; 00200 00201 if ( iqulp > 0 ) 00202 { 00203 LL2DBL( iqulp, qulp ); 00204 iw <<= DMANTWIDTH; 00205 00206 /* Note that the size of an ulp changes at a 00207 * power of two. 00208 */ 00209 00210 if ( iw == iz ) 00211 goto fix; 00212 00213 if ( fabs(z.lo) >= qulp ) 00214 { 00215 qulp = 0.0; 00216 } 00217 else if ( z.lo < 0.0 ) 00218 qulp = -qulp; 00219 00220 z.lo += qulp; 00221 z.lo -= qulp; 00222 } 00223 00224 #ifdef QUAD_DEBUG 00225 printf("c_q_mul: z.hi = %08x%08x\n", *(INT32 *)&z.hi, *((INT32 *)&z.hi + 1)); 00226 printf("c_q_mul: z.lo = %08x%08x\n", *(INT32 *)&z.lo, *((INT32 *)&z.lo + 1)); 00227 #endif 00228 00229 return ( z ); 00230 } 00231 else if ( (xptxhi == 0x7ff) || (xhi == 0.0) || (yhi == 0.0) ) 00232 { 00233 z.hi = xhi*yhi; 00234 z.lo = 0.0; 00235 00236 #ifdef QUAD_DEBUG 00237 printf("c_q_mul: z.hi = %08x%08x\n", *(INT32 *)&z.hi, *((INT32 *)&z.hi + 1)); 00238 printf("c_q_mul: z.lo = %08x%08x\n", *(INT32 *)&z.lo, *((INT32 *)&z.lo + 1)); 00239 #endif 00240 00241 return ( z ); 00242 } 00243 else 00244 { 00245 xfactor = 1.0; 00246 yfactor = 1.0; 00247 scaleup = scaledown = NO; 00248 00249 if ( xptxhi <= 0x21a ) 00250 { 00251 xhi *= twop590.d; 00252 xlo *= twop590.d; 00253 xfactor = twopm590.d; 00254 scaleup = YES; 00255 } 00256 00257 if ( xptyhi <= 0x21a ) 00258 { 00259 yhi *= twop590.d; 00260 ylo *= twop590.d; 00261 yfactor = twopm590.d; 00262 scaleup = YES; 00263 } 00264 00265 if ( xptxhi >= 0x5fd ) 00266 { 00267 xhi *= twopm590.d; 00268 xlo *= twopm590.d; 00269 xfactor = twop590.d; 00270 scaledown = YES; 00271 } 00272 00273 if ( xptyhi >= 0x5fd ) 00274 { 00275 yhi *= twopm590.d; 00276 ylo *= twopm590.d; 00277 yfactor = twop590.d; 00278 scaledown = YES; 00279 } 00280 00281 if ( (scaleup == YES) && (scaledown == YES) ) 00282 { 00283 xfactor = yfactor = 1.0; 00284 } 00285 00286 00287 /* Form the exact product of xhi and yhi using 00288 * Dekker's algorithm. 00289 */ 00290 00291 p = xhi*const1.d; 00292 00293 hx = (xhi - p) + p; 00294 tx = xhi - hx; 00295 00296 q = yhi*const1.d; 00297 00298 hy = (yhi - q) + q; 00299 ty = yhi - hy; 00300 00301 u.hi = xhi*yhi; 00302 u.lo = (((hx*hy - u.hi) + hx*ty) + hy*tx) + tx*ty; 00303 00304 /* Add the remaining pieces using the Kahan summation formula. */ 00305 00306 a = xhi*ylo; 00307 00308 ww = u.lo + a; 00309 rem = u.lo - ww + a; 00310 00311 v = u.hi + ww; 00312 vv = u.hi - v + ww; 00313 00314 a = xlo*yhi + rem; 00315 u.lo = vv + a; 00316 00317 rem = vv - u.lo + a; 00318 00319 w = v + u.lo; 00320 ww = v - w + u.lo; 00321 00322 a = xlo*ylo + rem; 00323 00324 vv = ww + a; 00325 00326 z.hi = w + vv; 00327 z.lo = w - z.hi + vv; 00328 00329 /* Rescale z.hi and z.lo before rounding */ 00330 00331 w = z.hi*xfactor; 00332 w *= yfactor; 00333 00334 if ( (w == 0.0) || (fabs(w) == inf.d) ) 00335 { 00336 z.hi = w; 00337 z.lo = 0.0; 00338 00339 #ifdef QUAD_DEBUG 00340 printf("c_q_mul: z.hi = %08x%08x\n", *(INT32 *)&z.hi, *((INT32 *)&z.hi + 1)); 00341 printf("c_q_mul: z.lo = %08x%08x\n", *(INT32 *)&z.lo, *((INT32 *)&z.lo + 1)); 00342 #endif 00343 00344 return ( z ); 00345 } 00346 00347 ww = z.lo*xfactor; 00348 ww *= yfactor; 00349 00350 z.hi = w + ww; 00351 DBL2LL(z.hi, iz); 00352 z.lo = w - z.hi + ww; 00353 00354 #ifdef QUAD_DEBUG 00355 printf("c_q_mul: z.hi = %08x%08x\n", *(INT32 *)&z.hi, *((INT32 *)&z.hi + 1)); 00356 printf("c_q_mul: z.lo = %08x%08x\n", *(INT32 *)&z.lo, *((INT32 *)&z.lo + 1)); 00357 #endif 00358 00359 /* if necessary, round z.lo so that the sum of z.hi and z.lo has at most 00360 107 significant bits 00361 */ 00362 00363 /* first, compute a quarter ulp of z */ 00364 00365 iw = (iz >> DMANTWIDTH); 00366 iqulp = (iw & 0x7ff); 00367 iqulp -= 54; 00368 iqulp <<= DMANTWIDTH; 00369 00370 if ( iqulp > 0 ) 00371 { 00372 LL2DBL( iqulp, qulp ); 00373 iw <<= DMANTWIDTH; 00374 00375 /* Note that the size of an ulp changes at a 00376 * power of two. 00377 */ 00378 00379 if ( iw == iz ) 00380 goto fix2; 00381 00382 back: 00383 if ( fabs(z.lo) >= qulp ) 00384 { 00385 qulp = 0.0; 00386 } 00387 else if ( z.lo < 0.0 ) 00388 qulp = -qulp; 00389 00390 z.lo += qulp; 00391 z.lo -= qulp; 00392 } 00393 00394 return ( z ); 00395 } 00396 00397 fix: 00398 if ( ((z.hi > 0.0) && (z.lo < 0.0)) || ((z.hi < 0.0) && (z.lo > 0.0)) ) 00399 qulp *= 0.5; 00400 00401 if ( fabs(z.lo) >= qulp ) 00402 { 00403 qulp = 0.0; 00404 } 00405 else if ( z.lo < 0.0 ) 00406 qulp = -qulp; 00407 00408 z.lo += qulp; 00409 z.lo -= qulp; 00410 00411 return ( z ); 00412 00413 fix2: 00414 if ( ((z.hi > 0.0) && (z.lo < 0.0)) || ((z.hi < 0.0) && (z.lo > 0.0)) ) 00415 qulp *= 0.5; 00416 00417 goto back; 00418 } 00419