Open64 (mfef90, whirl2f, and IR tools)  TAG: version-openad; SVN changeset: 916
c_qtenscale.c
Go to the documentation of this file.
00001 /*
00002 
00003   Copyright (C) 2000, 2001 Silicon Graphics, Inc.  All Rights Reserved.
00004 
00005   This program is free software; you can redistribute it and/or modify it
00006   under the terms of version 2 of the GNU General Public License as
00007   published by the Free Software Foundation.
00008 
00009   This program is distributed in the hope that it would be useful, but
00010   WITHOUT ANY WARRANTY; without even the implied warranty of
00011   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
00012 
00013   Further, this software is distributed without any warranty that it is
00014   free of the rightful claim of any third person regarding infringement 
00015   or the like.  Any license provided herein, whether implied or 
00016   otherwise, applies only to this software file.  Patent licenses, if 
00017   any, provided herein do not apply to combinations of this program with 
00018   other software, or any other product whatsoever.  
00019 
00020   You should have received a copy of the GNU General Public License along
00021   with this program; if not, write the Free Software Foundation, Inc., 59
00022   Temple Place - Suite 330, Boston MA 02111-1307, USA.
00023 
00024   Contact information:  Silicon Graphics, Inc., 1600 Amphitheatre Pky,
00025   Mountain View, CA 94043, or:
00026 
00027   http://www.sgi.com
00028 
00029   For further information regarding this notice, see:
00030 
00031   http://oss.sgi.com/projects/GenInfo/NoticeExplan
00032 
00033 */
00034 
00035 
00036 /* ====================================================================
00037  * ====================================================================
00038  *
00039  *
00040  * Revision history:
00041  *  29-jun-93 - Original Version
00042  *
00043  * Description: 128b fraction * 10^exp => 128b fraction * 2^bexp.
00044  *
00045  * ====================================================================
00046  * ====================================================================
00047  */
00048 
00049 static char *source_file = __FILE__;
00050 
00051 #include "defs.h"
00052 
00053 #define min(x,y) ((x)<(y)? (x): (y))
00054 
00055 /* Power of ten fractions */
00056 
00057 static const UINT64 tenpow[80][2] = {
00058 
00059 0xa000000000000000ULL, 0x0000000000000000ULL, /* (10**1)/(2**4) */
00060 0xc800000000000000ULL, 0x0000000000000000ULL, /* (10**2)/(2**7) */
00061 0xfa00000000000000ULL, 0x0000000000000000ULL, /* (10**3)/(2**10) */
00062 0x9c40000000000000ULL, 0x0000000000000000ULL, /* (10**4)/(2**14) */
00063 0xc350000000000000ULL, 0x0000000000000000ULL, /* (10**5)/(2**17) */
00064 0xf424000000000000ULL, 0x0000000000000000ULL, /* (10**6)/(2**20) */
00065 0x9896800000000000ULL, 0x0000000000000000ULL, /* (10**7)/(2**24) */
00066 0xbebc200000000000ULL, 0x0000000000000000ULL, /* (10**8)/(2**27) */
00067 0xee6b280000000000ULL, 0x0000000000000000ULL, /* (10**9)/(2**30) */
00068 0x9502f90000000000ULL, 0x0000000000000000ULL, /* (10**10)/(2**34) */
00069 0xba43b74000000000ULL, 0x0000000000000000ULL, /* (10**11)/(2**37) */
00070 0xe8d4a51000000000ULL, 0x0000000000000000ULL, /* (10**12)/(2**40) */
00071 0x9184e72a00000000ULL, 0x0000000000000000ULL, /* (10**13)/(2**44) */
00072 0xb5e620f480000000ULL, 0x0000000000000000ULL, /* (10**14)/(2**47) */
00073 0xe35fa931a0000000ULL, 0x0000000000000000ULL, /* (10**15)/(2**50) */
00074 0x8e1bc9bf04000000ULL, 0x0000000000000000ULL, /* (10**16)/(2**54) */
00075 0xb1a2bc2ec5000000ULL, 0x0000000000000000ULL, /* (10**17)/(2**57) */
00076 0xde0b6b3a76400000ULL, 0x0000000000000000ULL, /* (10**18)/(2**60) */
00077 0x8ac7230489e80000ULL, 0x0000000000000000ULL, /* (10**19)/(2**64) */
00078 0xad78ebc5ac620000ULL, 0x0000000000000000ULL, /* (10**20)/(2**67) */
00079 0xd8d726b7177a8000ULL, 0x0000000000000000ULL, /* (10**21)/(2**70) */
00080 0x878678326eac9000ULL, 0x0000000000000000ULL, /* (10**22)/(2**74) */
00081 0xa968163f0a57b400ULL, 0x0000000000000000ULL, /* (10**23)/(2**77) */
00082 0xd3c21bcecceda100ULL, 0x0000000000000000ULL, /* (10**24)/(2**80) */
00083 0x84595161401484a0ULL, 0x0000000000000000ULL, /* (10**25)/(2**84) */
00084 0xa56fa5b99019a5c8ULL, 0x0000000000000000ULL, /* (10**26)/(2**87) */
00085 0xcecb8f27f4200f3aULL, 0x0000000000000000ULL, /* (10**27)/(2**90) */
00086 0x813f3978f8940984ULL, 0x4000000000000000ULL, /* (10**28)/(2**94) */
00087 0xa18f07d736b90be5ULL, 0x5000000000000000ULL, /* (10**29)/(2**97) */
00088 0xc9f2c9cd04674edeULL, 0xa400000000000000ULL, /* (10**30)/(2**100) */
00089 0xfc6f7c4045812296ULL, 0x4d00000000000000ULL, /* (10**31)/(2**103) */
00090 
00091 0x9dc5ada82b70b59dULL, 0xf020000000000000ULL, /* (10**32)/(2**107) */
00092 0xc2781f49ffcfa6d5ULL, 0x3cbf6b71c76b25fbULL, /* (10**64)/(2**213) */
00093 0xefb3ab16c59b14a2ULL, 0xc5cfe94ef3ea101eULL, /* (10**96)/(2**319) */
00094 0x93ba47c980e98cdfULL, 0xc66f336c36b10137ULL, /* (10**128)/(2**426) */
00095 0xb616a12b7fe617aaULL, 0x577b986b314d6009ULL, /* (10**160)/(2**532) */
00096 0xe070f78d3927556aULL, 0x85bbe253f47b1417ULL, /* (10**192)/(2**638) */
00097 0x8a5296ffe33cc92fULL, 0x82bd6b70d99aaa70ULL, /* (10**224)/(2**745) */
00098 0xaa7eebfb9df9de8dULL, 0xddbb901b98feeab8ULL, /* (10**256)/(2**851) */
00099 0xd226fc195c6a2f8cULL, 0x73832eec6fff3112ULL, /* (10**288)/(2**957) */
00100 
00101 0xccccccccccccccccULL, 0xcccccccccccccccdULL, /* (10**-1)/(2**-3) */
00102 0xa3d70a3d70a3d70aULL, 0x3d70a3d70a3d70a4ULL, /* (10**-2)/(2**-6) */
00103 0x83126e978d4fdf3bULL, 0x645a1cac083126e9ULL, /* (10**-3)/(2**-9) */
00104 0xd1b71758e219652bULL, 0xd3c36113404ea4a9ULL, /* (10**-4)/(2**-13) */
00105 0xa7c5ac471b478423ULL, 0x0fcf80dc33721d54ULL, /* (10**-5)/(2**-16) */
00106 0x8637bd05af6c69b5ULL, 0xa63f9a49c2c1b110ULL, /* (10**-6)/(2**-19) */
00107 0xd6bf94d5e57a42bcULL, 0x3d32907604691b4dULL, /* (10**-7)/(2**-23) */
00108 0xabcc77118461cefcULL, 0xfdc20d2b36ba7c3dULL, /* (10**-8)/(2**-26) */
00109 0x89705f4136b4a597ULL, 0x31680a88f8953031ULL, /* (10**-9)/(2**-29) */
00110 0xdbe6fecebdedd5beULL, 0xb573440e5a884d1bULL, /* (10**-10)/(2**-33) */
00111 0xafebff0bcb24aafeULL, 0xf78f69a51539d749ULL, /* (10**-11)/(2**-36) */
00112 0x8cbccc096f5088cbULL, 0xf93f87b7442e45d4ULL, /* (10**-12)/(2**-39) */
00113 0xe12e13424bb40e13ULL, 0x2865a5f206b06fbaULL, /* (10**-13)/(2**-43) */
00114 0xb424dc35095cd80fULL, 0x538484c19ef38c94ULL, /* (10**-14)/(2**-46) */
00115 0x901d7cf73ab0acd9ULL, 0x0f9d37014bf60a10ULL, /* (10**-15)/(2**-49) */
00116 0xe69594bec44de15bULL, 0x4c2ebe687989a9b4ULL, /* (10**-16)/(2**-53) */
00117 0xb877aa3236a4b449ULL, 0x09befeb9fad487c3ULL, /* (10**-17)/(2**-56) */
00118 0x9392ee8e921d5d07ULL, 0x3aff322e62439fcfULL, /* (10**-18)/(2**-59) */
00119 0xec1e4a7db69561a5ULL, 0x2b31e9e3d06c32e5ULL, /* (10**-19)/(2**-63) */
00120 0xbce5086492111aeaULL, 0x88f4bb1ca6bcf584ULL, /* (10**-20)/(2**-66) */
00121 0x971da05074da7beeULL, 0xd3f6fc16ebca5e03ULL, /* (10**-21)/(2**-69) */
00122 0xf1c90080baf72cb1ULL, 0x5324c68b12dd6338ULL, /* (10**-22)/(2**-73) */
00123 0xc16d9a0095928a27ULL, 0x75b7053c0f178294ULL, /* (10**-23)/(2**-76) */
00124 0x9abe14cd44753b52ULL, 0xc4926a9672793543ULL, /* (10**-24)/(2**-79) */
00125 0xf79687aed3eec551ULL, 0x3a83ddbd83f52205ULL, /* (10**-25)/(2**-83) */
00126 0xc612062576589ddaULL, 0x95364afe032a819dULL, /* (10**-26)/(2**-86) */
00127 0x9e74d1b791e07e48ULL, 0x775ea264cf55347eULL, /* (10**-27)/(2**-89) */
00128 0xfd87b5f28300ca0dULL, 0x8bca9d6e188853fcULL, /* (10**-28)/(2**-93) */
00129 0xcad2f7f5359a3b3eULL, 0x096ee45813a04330ULL, /* (10**-29)/(2**-96) */
00130 0xa2425ff75e14fc31ULL, 0xa1258379a94d028dULL, /* (10**-30)/(2**-99) */
00131 0x81ceb32c4b43fcf4ULL, 0x80eacf948770ced7ULL, /* (10**-31)/(2**-102) */
00132 
00133 0xcfb11ead453994baULL, 0x67de18eda5814af2ULL, /* (10**-32)/(2**-106) */
00134 0xa87fea27a539e9a5ULL, 0x3f2398d747b36224ULL, /* (10**-64)/(2**-212) */
00135 0x88b402f7fd75539bULL, 0x11dbcb0218ebb414ULL, /* (10**-96)/(2**-318) */
00136 0xddd0467c64bce4a0ULL, 0xac7cb3f6d05ddbdfULL, /* (10**-128)/(2**-425) */
00137 0xb3f4e093db73a093ULL, 0x59ed216765690f57ULL, /* (10**-160)/(2**-531) */
00138 0x91ff83775423cc06ULL, 0x7b6306a34627ddcfULL, /* (10**-192)/(2**-637) */
00139 0xece53cec4a314ebdULL, 0xa4f8bf5635246428ULL, /* (10**-224)/(2**-744) */
00140 0xc0314325637a1939ULL, 0xfa911155fefb5309ULL, /* (10**-256)/(2**-850) */
00141 0x9becce62836ac577ULL, 0x4ee367f9430aec33ULL  /* (10**-288)/(2**-956) */
00142 };
00143 
00144 static const INT32 twoexp[80] = {
00145    4,   7,  10,  14,  17,  20,  24,  27,  30,  34,
00146   37,  40,  44,  47,  50,  54,  57,  60,  64,  67,
00147   70,  74,  77,  80,  84,  87,  90,  94,  97, 100, 103, 
00148 
00149  107, 213, 319, 426, 532, 638, 745, 851, 957, 
00150 
00151   -3,  -6,  -9, -13, -16, -19, -23, -26, -29, -33,
00152  -36, -39, -43, -46, -49, -53, -56, -59, -63, -66,
00153  -69, -73, -76, -79, -83, -86, -89, -93, -96, -99,-102,
00154 
00155 -106,-212,-318,-425,-531,-637,-744,-850,-956
00156 };
00157 
00158 
00159 #define TEN_1           0       /* offset to 10 **   1 */
00160 #define TEN_32          31      /* offset to 10 **  32 */
00161 #define TEN_M1          40      /* offset to 10 **  -1 */
00162 #define TEN_M32         71      /* offset to 10 ** -32 */
00163 #define TEN_LAST        80      /* offset one past last entry */
00164 
00165 #define HIBITULL (1ULL << 63)
00166 
00167 extern  c_qwmultu(UINT64 *z, UINT64 *x, const UINT64 *y);
00168 static void qnorm_and_round(UINT64 *, INT32 *, UINT64 *);
00169 
00170 
00171 /* ====================================================================
00172  *
00173  * FunctionName: qnorm_and_round
00174  *
00175  * Description: normalize 256 bit fraction and round to 128 bits
00176  *
00177  * ====================================================================
00178  */
00179 static void qnorm_and_round(p, norm, prod)
00180      UINT64 p[2];               /* 128b fraction        - out */
00181      INT32 *norm;               /* bits shifted         - out */
00182      UINT64 prod[4];            /* 256b fraction        - in  */
00183 {
00184   *norm = 0;
00185   if( ! (prod[0] & HIBITULL) ) { 
00186                                 /* leading bit is a zero 
00187                                  * may have to normalize 
00188                                  */
00189     if(prod[0] == ~HIBITULL &&
00190        prod[1] == ~0ULL &&
00191        prod[2] >> 62 == 0x3 ) { /* normalization followed by round
00192                                  * would cause carry to create
00193                                  * extra bit, so don't normalize 
00194                                  */
00195       p[0] = HIBITULL;
00196       p[1] = 0ULL;
00197       return;
00198     }
00199     p[0] = prod[0]<<1 | prod[1]>>63; /* normalize */
00200     p[1] = prod[1]<<1 | prod[2]>>63;
00201     *norm=1;
00202     prod[2] <<= 1;
00203   }
00204   else {
00205     p[0] = prod[0];
00206     p[1] = prod[1];
00207   }
00208 
00209   if( prod[2] & HIBITULL ) {    /* first guard bit a one */
00210     if( (p[1] & 0x1ULL) ||      /* LSB on, round to even */
00211        prod[2] != HIBITULL ||   /* not borderline for round to even */
00212        prod[3] ) {
00213 
00214       /* round */
00215       p[1]++;
00216       if(p[1]==0)
00217         p[0]++;
00218     }
00219   }
00220   return;
00221 }
00222 
00223 /* ====================================================================
00224  *
00225  * FunctionName: c_qtenscale
00226  *
00227  * Description: 128b fraction * 10^exp => 128b fraction * 2^bexp.
00228  * Except for minor changes, this routine is the same as __qtenscale()
00229  * in libc.  Included here, so we can do cross compilations on
00230  * machines not supporting quad precision.
00231  *
00232  *
00233  * ====================================================================
00234  */
00235 
00236 void 
00237 c_qtenscale(p,exp,bexp)
00238      UINT64 p[2];               /* 128b fraction         - inout */
00239      INT32      exp;            /* power of ten exponent - in    */
00240      INT32      *bexp;          /* power of two exponent - out   */
00241 {
00242   UINT64 prod[4];               /* 256b product */
00243   INT32 exp_hi, exp_lo;         /* exp = exp_hi*32 + exp_lo */
00244   INT32 hi, lo, t1, t32;        /* offsets in power of ten table */
00245   INT32 norm;                   /* number of bits of normalization */
00246 
00247   *bexp = 0;
00248   if(exp > 0) {                 /* split exponent */
00249     exp_hi = exp >> 5;
00250     exp_lo = exp & 0x1F;
00251     t1 = TEN_1;
00252     t32 = TEN_32;
00253   }
00254   else if(exp < 0) {
00255 #if QUAD_DEBUG
00256     printf("exp is negative: exp = %d\n",exp);
00257 #endif
00258     exp_hi = (-exp) >> 5;
00259     exp_lo = (-exp) & 0x1F;
00260     t1 = TEN_M1;
00261     t32 = TEN_M32;
00262 #if QUAD_DEBUG
00263     printf("                 exp_hi=%d\n",exp_hi);
00264     printf("                 exp_lo=%d\n",exp_lo);
00265 #endif
00266   }
00267   else {                        /* no scaling needed */
00268     return;
00269   }
00270   while(exp_hi) {               /* scale */
00271     hi = min(exp_hi,9);         /* only nine large powers of 10 */
00272     exp_hi -= hi;               /* could iterate in extreme case */
00273     hi += t32-1;
00274 #if QUAD_DEBUG
00275     printf("about to scale hi\n");
00276     printf("                 hi=%d\n",hi);
00277 #endif
00278     c_qwmultu(prod, p, tenpow[hi]);
00279 #if QUAD_DEBUG
00280     printf("After c_qwmultu: prod = %llx %llx %llx %llx\n", 
00281            prod[0], prod[1], prod[2], prod[3]);
00282 #endif
00283     qnorm_and_round(p, &norm, prod);
00284 #if QUAD_DEBUG
00285     printf("After qnorm_and_round: p = %llx %llx\n", 
00286            p[0], p[1]);
00287 #endif
00288     *bexp += twoexp[hi] - norm;
00289 #if QUAD_DEBUG
00290     printf("New *bexp = %d\n", *bexp);
00291 #endif
00292   }
00293   if(exp_lo) {
00294     lo = t1 + exp_lo -1;
00295 #if QUAD_DEBUG
00296     printf("about to scale lo\n");
00297     printf("                 lo=%d\n",lo);
00298 #endif
00299     c_qwmultu(prod, p, tenpow[lo]);
00300 #if QUAD_DEBUG
00301     printf("After c_qwmultu: prod = %llx %llx %llx %llx\n", 
00302            prod[0], prod[1], prod[2], prod[3]);
00303 #endif
00304     qnorm_and_round(p, &norm, prod);
00305 #if QUAD_DEBUG
00306     printf("After qnorm_and_round: p = %llx %llx\n", 
00307            p[0], p[1]);
00308 #endif
00309     *bexp += twoexp[lo] - norm;
00310 #if QUAD_DEBUG
00311     printf("New *bexp = %d\n", *bexp);
00312 #endif
00313   }
00314   return;
00315 }
00316 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines