Open64 (mfef90, whirl2f, and IR tools)
TAG: version-openad; SVN changeset: 916
|
00001 /* 00002 00003 Copyright (C) 2000, 2001 Silicon Graphics, Inc. All Rights Reserved. 00004 00005 This program is free software; you can redistribute it and/or modify it 00006 under the terms of version 2 of the GNU General Public License as 00007 published by the Free Software Foundation. 00008 00009 This program is distributed in the hope that it would be useful, but 00010 WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00012 00013 Further, this software is distributed without any warranty that it is 00014 free of the rightful claim of any third person regarding infringement 00015 or the like. Any license provided herein, whether implied or 00016 otherwise, applies only to this software file. Patent licenses, if 00017 any, provided herein do not apply to combinations of this program with 00018 other software, or any other product whatsoever. 00019 00020 You should have received a copy of the GNU General Public License along 00021 with this program; if not, write the Free Software Foundation, Inc., 59 00022 Temple Place - Suite 330, Boston MA 02111-1307, USA. 00023 00024 Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pky, 00025 Mountain View, CA 94043, or: 00026 00027 http://www.sgi.com 00028 00029 For further information regarding this notice, see: 00030 00031 http://oss.sgi.com/projects/GenInfo/NoticeExplan 00032 00033 */ 00034 00035 00036 /* ==================================================================== 00037 * ==================================================================== 00038 * 00039 * 00040 * Revision history: 00041 * 29-jun-93 - Original Version 00042 * 00043 * Description: 128b fraction * 10^exp => 128b fraction * 2^bexp. 00044 * 00045 * ==================================================================== 00046 * ==================================================================== 00047 */ 00048 00049 static char *source_file = __FILE__; 00050 00051 #include "defs.h" 00052 00053 #define min(x,y) ((x)<(y)? (x): (y)) 00054 00055 /* Power of ten fractions */ 00056 00057 static const UINT64 tenpow[80][2] = { 00058 00059 0xa000000000000000ULL, 0x0000000000000000ULL, /* (10**1)/(2**4) */ 00060 0xc800000000000000ULL, 0x0000000000000000ULL, /* (10**2)/(2**7) */ 00061 0xfa00000000000000ULL, 0x0000000000000000ULL, /* (10**3)/(2**10) */ 00062 0x9c40000000000000ULL, 0x0000000000000000ULL, /* (10**4)/(2**14) */ 00063 0xc350000000000000ULL, 0x0000000000000000ULL, /* (10**5)/(2**17) */ 00064 0xf424000000000000ULL, 0x0000000000000000ULL, /* (10**6)/(2**20) */ 00065 0x9896800000000000ULL, 0x0000000000000000ULL, /* (10**7)/(2**24) */ 00066 0xbebc200000000000ULL, 0x0000000000000000ULL, /* (10**8)/(2**27) */ 00067 0xee6b280000000000ULL, 0x0000000000000000ULL, /* (10**9)/(2**30) */ 00068 0x9502f90000000000ULL, 0x0000000000000000ULL, /* (10**10)/(2**34) */ 00069 0xba43b74000000000ULL, 0x0000000000000000ULL, /* (10**11)/(2**37) */ 00070 0xe8d4a51000000000ULL, 0x0000000000000000ULL, /* (10**12)/(2**40) */ 00071 0x9184e72a00000000ULL, 0x0000000000000000ULL, /* (10**13)/(2**44) */ 00072 0xb5e620f480000000ULL, 0x0000000000000000ULL, /* (10**14)/(2**47) */ 00073 0xe35fa931a0000000ULL, 0x0000000000000000ULL, /* (10**15)/(2**50) */ 00074 0x8e1bc9bf04000000ULL, 0x0000000000000000ULL, /* (10**16)/(2**54) */ 00075 0xb1a2bc2ec5000000ULL, 0x0000000000000000ULL, /* (10**17)/(2**57) */ 00076 0xde0b6b3a76400000ULL, 0x0000000000000000ULL, /* (10**18)/(2**60) */ 00077 0x8ac7230489e80000ULL, 0x0000000000000000ULL, /* (10**19)/(2**64) */ 00078 0xad78ebc5ac620000ULL, 0x0000000000000000ULL, /* (10**20)/(2**67) */ 00079 0xd8d726b7177a8000ULL, 0x0000000000000000ULL, /* (10**21)/(2**70) */ 00080 0x878678326eac9000ULL, 0x0000000000000000ULL, /* (10**22)/(2**74) */ 00081 0xa968163f0a57b400ULL, 0x0000000000000000ULL, /* (10**23)/(2**77) */ 00082 0xd3c21bcecceda100ULL, 0x0000000000000000ULL, /* (10**24)/(2**80) */ 00083 0x84595161401484a0ULL, 0x0000000000000000ULL, /* (10**25)/(2**84) */ 00084 0xa56fa5b99019a5c8ULL, 0x0000000000000000ULL, /* (10**26)/(2**87) */ 00085 0xcecb8f27f4200f3aULL, 0x0000000000000000ULL, /* (10**27)/(2**90) */ 00086 0x813f3978f8940984ULL, 0x4000000000000000ULL, /* (10**28)/(2**94) */ 00087 0xa18f07d736b90be5ULL, 0x5000000000000000ULL, /* (10**29)/(2**97) */ 00088 0xc9f2c9cd04674edeULL, 0xa400000000000000ULL, /* (10**30)/(2**100) */ 00089 0xfc6f7c4045812296ULL, 0x4d00000000000000ULL, /* (10**31)/(2**103) */ 00090 00091 0x9dc5ada82b70b59dULL, 0xf020000000000000ULL, /* (10**32)/(2**107) */ 00092 0xc2781f49ffcfa6d5ULL, 0x3cbf6b71c76b25fbULL, /* (10**64)/(2**213) */ 00093 0xefb3ab16c59b14a2ULL, 0xc5cfe94ef3ea101eULL, /* (10**96)/(2**319) */ 00094 0x93ba47c980e98cdfULL, 0xc66f336c36b10137ULL, /* (10**128)/(2**426) */ 00095 0xb616a12b7fe617aaULL, 0x577b986b314d6009ULL, /* (10**160)/(2**532) */ 00096 0xe070f78d3927556aULL, 0x85bbe253f47b1417ULL, /* (10**192)/(2**638) */ 00097 0x8a5296ffe33cc92fULL, 0x82bd6b70d99aaa70ULL, /* (10**224)/(2**745) */ 00098 0xaa7eebfb9df9de8dULL, 0xddbb901b98feeab8ULL, /* (10**256)/(2**851) */ 00099 0xd226fc195c6a2f8cULL, 0x73832eec6fff3112ULL, /* (10**288)/(2**957) */ 00100 00101 0xccccccccccccccccULL, 0xcccccccccccccccdULL, /* (10**-1)/(2**-3) */ 00102 0xa3d70a3d70a3d70aULL, 0x3d70a3d70a3d70a4ULL, /* (10**-2)/(2**-6) */ 00103 0x83126e978d4fdf3bULL, 0x645a1cac083126e9ULL, /* (10**-3)/(2**-9) */ 00104 0xd1b71758e219652bULL, 0xd3c36113404ea4a9ULL, /* (10**-4)/(2**-13) */ 00105 0xa7c5ac471b478423ULL, 0x0fcf80dc33721d54ULL, /* (10**-5)/(2**-16) */ 00106 0x8637bd05af6c69b5ULL, 0xa63f9a49c2c1b110ULL, /* (10**-6)/(2**-19) */ 00107 0xd6bf94d5e57a42bcULL, 0x3d32907604691b4dULL, /* (10**-7)/(2**-23) */ 00108 0xabcc77118461cefcULL, 0xfdc20d2b36ba7c3dULL, /* (10**-8)/(2**-26) */ 00109 0x89705f4136b4a597ULL, 0x31680a88f8953031ULL, /* (10**-9)/(2**-29) */ 00110 0xdbe6fecebdedd5beULL, 0xb573440e5a884d1bULL, /* (10**-10)/(2**-33) */ 00111 0xafebff0bcb24aafeULL, 0xf78f69a51539d749ULL, /* (10**-11)/(2**-36) */ 00112 0x8cbccc096f5088cbULL, 0xf93f87b7442e45d4ULL, /* (10**-12)/(2**-39) */ 00113 0xe12e13424bb40e13ULL, 0x2865a5f206b06fbaULL, /* (10**-13)/(2**-43) */ 00114 0xb424dc35095cd80fULL, 0x538484c19ef38c94ULL, /* (10**-14)/(2**-46) */ 00115 0x901d7cf73ab0acd9ULL, 0x0f9d37014bf60a10ULL, /* (10**-15)/(2**-49) */ 00116 0xe69594bec44de15bULL, 0x4c2ebe687989a9b4ULL, /* (10**-16)/(2**-53) */ 00117 0xb877aa3236a4b449ULL, 0x09befeb9fad487c3ULL, /* (10**-17)/(2**-56) */ 00118 0x9392ee8e921d5d07ULL, 0x3aff322e62439fcfULL, /* (10**-18)/(2**-59) */ 00119 0xec1e4a7db69561a5ULL, 0x2b31e9e3d06c32e5ULL, /* (10**-19)/(2**-63) */ 00120 0xbce5086492111aeaULL, 0x88f4bb1ca6bcf584ULL, /* (10**-20)/(2**-66) */ 00121 0x971da05074da7beeULL, 0xd3f6fc16ebca5e03ULL, /* (10**-21)/(2**-69) */ 00122 0xf1c90080baf72cb1ULL, 0x5324c68b12dd6338ULL, /* (10**-22)/(2**-73) */ 00123 0xc16d9a0095928a27ULL, 0x75b7053c0f178294ULL, /* (10**-23)/(2**-76) */ 00124 0x9abe14cd44753b52ULL, 0xc4926a9672793543ULL, /* (10**-24)/(2**-79) */ 00125 0xf79687aed3eec551ULL, 0x3a83ddbd83f52205ULL, /* (10**-25)/(2**-83) */ 00126 0xc612062576589ddaULL, 0x95364afe032a819dULL, /* (10**-26)/(2**-86) */ 00127 0x9e74d1b791e07e48ULL, 0x775ea264cf55347eULL, /* (10**-27)/(2**-89) */ 00128 0xfd87b5f28300ca0dULL, 0x8bca9d6e188853fcULL, /* (10**-28)/(2**-93) */ 00129 0xcad2f7f5359a3b3eULL, 0x096ee45813a04330ULL, /* (10**-29)/(2**-96) */ 00130 0xa2425ff75e14fc31ULL, 0xa1258379a94d028dULL, /* (10**-30)/(2**-99) */ 00131 0x81ceb32c4b43fcf4ULL, 0x80eacf948770ced7ULL, /* (10**-31)/(2**-102) */ 00132 00133 0xcfb11ead453994baULL, 0x67de18eda5814af2ULL, /* (10**-32)/(2**-106) */ 00134 0xa87fea27a539e9a5ULL, 0x3f2398d747b36224ULL, /* (10**-64)/(2**-212) */ 00135 0x88b402f7fd75539bULL, 0x11dbcb0218ebb414ULL, /* (10**-96)/(2**-318) */ 00136 0xddd0467c64bce4a0ULL, 0xac7cb3f6d05ddbdfULL, /* (10**-128)/(2**-425) */ 00137 0xb3f4e093db73a093ULL, 0x59ed216765690f57ULL, /* (10**-160)/(2**-531) */ 00138 0x91ff83775423cc06ULL, 0x7b6306a34627ddcfULL, /* (10**-192)/(2**-637) */ 00139 0xece53cec4a314ebdULL, 0xa4f8bf5635246428ULL, /* (10**-224)/(2**-744) */ 00140 0xc0314325637a1939ULL, 0xfa911155fefb5309ULL, /* (10**-256)/(2**-850) */ 00141 0x9becce62836ac577ULL, 0x4ee367f9430aec33ULL /* (10**-288)/(2**-956) */ 00142 }; 00143 00144 static const INT32 twoexp[80] = { 00145 4, 7, 10, 14, 17, 20, 24, 27, 30, 34, 00146 37, 40, 44, 47, 50, 54, 57, 60, 64, 67, 00147 70, 74, 77, 80, 84, 87, 90, 94, 97, 100, 103, 00148 00149 107, 213, 319, 426, 532, 638, 745, 851, 957, 00150 00151 -3, -6, -9, -13, -16, -19, -23, -26, -29, -33, 00152 -36, -39, -43, -46, -49, -53, -56, -59, -63, -66, 00153 -69, -73, -76, -79, -83, -86, -89, -93, -96, -99,-102, 00154 00155 -106,-212,-318,-425,-531,-637,-744,-850,-956 00156 }; 00157 00158 00159 #define TEN_1 0 /* offset to 10 ** 1 */ 00160 #define TEN_32 31 /* offset to 10 ** 32 */ 00161 #define TEN_M1 40 /* offset to 10 ** -1 */ 00162 #define TEN_M32 71 /* offset to 10 ** -32 */ 00163 #define TEN_LAST 80 /* offset one past last entry */ 00164 00165 #define HIBITULL (1ULL << 63) 00166 00167 extern c_qwmultu(UINT64 *z, UINT64 *x, const UINT64 *y); 00168 static void qnorm_and_round(UINT64 *, INT32 *, UINT64 *); 00169 00170 00171 /* ==================================================================== 00172 * 00173 * FunctionName: qnorm_and_round 00174 * 00175 * Description: normalize 256 bit fraction and round to 128 bits 00176 * 00177 * ==================================================================== 00178 */ 00179 static void qnorm_and_round(p, norm, prod) 00180 UINT64 p[2]; /* 128b fraction - out */ 00181 INT32 *norm; /* bits shifted - out */ 00182 UINT64 prod[4]; /* 256b fraction - in */ 00183 { 00184 *norm = 0; 00185 if( ! (prod[0] & HIBITULL) ) { 00186 /* leading bit is a zero 00187 * may have to normalize 00188 */ 00189 if(prod[0] == ~HIBITULL && 00190 prod[1] == ~0ULL && 00191 prod[2] >> 62 == 0x3 ) { /* normalization followed by round 00192 * would cause carry to create 00193 * extra bit, so don't normalize 00194 */ 00195 p[0] = HIBITULL; 00196 p[1] = 0ULL; 00197 return; 00198 } 00199 p[0] = prod[0]<<1 | prod[1]>>63; /* normalize */ 00200 p[1] = prod[1]<<1 | prod[2]>>63; 00201 *norm=1; 00202 prod[2] <<= 1; 00203 } 00204 else { 00205 p[0] = prod[0]; 00206 p[1] = prod[1]; 00207 } 00208 00209 if( prod[2] & HIBITULL ) { /* first guard bit a one */ 00210 if( (p[1] & 0x1ULL) || /* LSB on, round to even */ 00211 prod[2] != HIBITULL || /* not borderline for round to even */ 00212 prod[3] ) { 00213 00214 /* round */ 00215 p[1]++; 00216 if(p[1]==0) 00217 p[0]++; 00218 } 00219 } 00220 return; 00221 } 00222 00223 /* ==================================================================== 00224 * 00225 * FunctionName: c_qtenscale 00226 * 00227 * Description: 128b fraction * 10^exp => 128b fraction * 2^bexp. 00228 * Except for minor changes, this routine is the same as __qtenscale() 00229 * in libc. Included here, so we can do cross compilations on 00230 * machines not supporting quad precision. 00231 * 00232 * 00233 * ==================================================================== 00234 */ 00235 00236 void 00237 c_qtenscale(p,exp,bexp) 00238 UINT64 p[2]; /* 128b fraction - inout */ 00239 INT32 exp; /* power of ten exponent - in */ 00240 INT32 *bexp; /* power of two exponent - out */ 00241 { 00242 UINT64 prod[4]; /* 256b product */ 00243 INT32 exp_hi, exp_lo; /* exp = exp_hi*32 + exp_lo */ 00244 INT32 hi, lo, t1, t32; /* offsets in power of ten table */ 00245 INT32 norm; /* number of bits of normalization */ 00246 00247 *bexp = 0; 00248 if(exp > 0) { /* split exponent */ 00249 exp_hi = exp >> 5; 00250 exp_lo = exp & 0x1F; 00251 t1 = TEN_1; 00252 t32 = TEN_32; 00253 } 00254 else if(exp < 0) { 00255 #if QUAD_DEBUG 00256 printf("exp is negative: exp = %d\n",exp); 00257 #endif 00258 exp_hi = (-exp) >> 5; 00259 exp_lo = (-exp) & 0x1F; 00260 t1 = TEN_M1; 00261 t32 = TEN_M32; 00262 #if QUAD_DEBUG 00263 printf(" exp_hi=%d\n",exp_hi); 00264 printf(" exp_lo=%d\n",exp_lo); 00265 #endif 00266 } 00267 else { /* no scaling needed */ 00268 return; 00269 } 00270 while(exp_hi) { /* scale */ 00271 hi = min(exp_hi,9); /* only nine large powers of 10 */ 00272 exp_hi -= hi; /* could iterate in extreme case */ 00273 hi += t32-1; 00274 #if QUAD_DEBUG 00275 printf("about to scale hi\n"); 00276 printf(" hi=%d\n",hi); 00277 #endif 00278 c_qwmultu(prod, p, tenpow[hi]); 00279 #if QUAD_DEBUG 00280 printf("After c_qwmultu: prod = %llx %llx %llx %llx\n", 00281 prod[0], prod[1], prod[2], prod[3]); 00282 #endif 00283 qnorm_and_round(p, &norm, prod); 00284 #if QUAD_DEBUG 00285 printf("After qnorm_and_round: p = %llx %llx\n", 00286 p[0], p[1]); 00287 #endif 00288 *bexp += twoexp[hi] - norm; 00289 #if QUAD_DEBUG 00290 printf("New *bexp = %d\n", *bexp); 00291 #endif 00292 } 00293 if(exp_lo) { 00294 lo = t1 + exp_lo -1; 00295 #if QUAD_DEBUG 00296 printf("about to scale lo\n"); 00297 printf(" lo=%d\n",lo); 00298 #endif 00299 c_qwmultu(prod, p, tenpow[lo]); 00300 #if QUAD_DEBUG 00301 printf("After c_qwmultu: prod = %llx %llx %llx %llx\n", 00302 prod[0], prod[1], prod[2], prod[3]); 00303 #endif 00304 qnorm_and_round(p, &norm, prod); 00305 #if QUAD_DEBUG 00306 printf("After qnorm_and_round: p = %llx %llx\n", 00307 p[0], p[1]); 00308 #endif 00309 *bexp += twoexp[lo] - norm; 00310 #if QUAD_DEBUG 00311 printf("New *bexp = %d\n", *bexp); 00312 #endif 00313 } 00314 return; 00315 } 00316