Open64 (mfef90, whirl2f, and IR tools)
TAG: version-openad; SVN changeset: 916
|
00001 /* 00002 00003 Copyright (C) 2000, 2001 Silicon Graphics, Inc. All Rights Reserved. 00004 00005 This program is free software; you can redistribute it and/or modify it 00006 under the terms of version 2 of the GNU General Public License as 00007 published by the Free Software Foundation. 00008 00009 This program is distributed in the hope that it would be useful, but 00010 WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00012 00013 Further, this software is distributed without any warranty that it is 00014 free of the rightful claim of any third person regarding infringement 00015 or the like. Any license provided herein, whether implied or 00016 otherwise, applies only to this software file. Patent licenses, if 00017 any, provided herein do not apply to combinations of this program with 00018 other software, or any other product whatsoever. 00019 00020 You should have received a copy of the GNU General Public License along 00021 with this program; if not, write the Free Software Foundation, Inc., 59 00022 Temple Place - Suite 330, Boston MA 02111-1307, USA. 00023 00024 Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pky, 00025 Mountain View, CA 94043, or: 00026 00027 http://www.sgi.com 00028 00029 For further information regarding this notice, see: 00030 00031 http://oss.sgi.com/projects/GenInfo/NoticeExplan 00032 00033 */ 00034 00035 00036 /* 00037 * Portable IEEE 754 floating-point multiplication evaluator 00038 */ 00039 00040 #include "arith.internal.h" 00041 00042 00043 00044 int 00045 ar_ifmul32 (AR_IEEE_32 *x, 00046 const AR_IEEE_32 *a, 00047 const AR_IEEE_32 *b, 00048 int roundmode) 00049 { 00050 00051 int i, res = AR_STAT_OK; 00052 unsigned long x_lbits, z_lbits, rbits, carry; 00053 signed int x_expo; 00054 AR_IEEE_32 x2, y, y2, z; 00055 00056 /* If either a or b is a NaN, it's the result. */ 00057 if (IS_IEEE32_NaN(a)) { 00058 *x = *a; 00059 return res | AR_STAT_UNDEFINED; 00060 } 00061 if (IS_IEEE32_NaN(b)) { 00062 *x = *b; 00063 return res | AR_STAT_UNDEFINED; 00064 } 00065 00066 /* Test for infinities */ 00067 if (b->expo > AR_IEEE32_MAX_EXPO) 00068 if (a->expo == 0 && !IS_IEEE32_NZ_COEFF(a)){ 00069 /* zero * inf = quiet NaN */ 00070 QNaNIEEE32 (x); 00071 return res | AR_STAT_UNDEFINED; 00072 } else { 00073 /* anything * infinity = infinity */ 00074 if (i = a->sign ^ b->sign) 00075 res |= AR_STAT_NEGATIVE; 00076 *x = *b; 00077 x->sign = i; 00078 return res | AR_STAT_OVERFLOW; 00079 } 00080 if (a->expo > AR_IEEE32_MAX_EXPO) 00081 if (b->expo == 0 && !IS_IEEE32_NZ_COEFF(b)){ 00082 /* infinity * zero = quiet NaN */ 00083 QNaNIEEE32 (x); 00084 return res | AR_STAT_UNDEFINED; 00085 } else { 00086 /* infinity * anything = infinity */ 00087 if (i = a->sign ^ b->sign) 00088 res |= AR_STAT_NEGATIVE; 00089 *x = *a; 00090 x->sign = i; 00091 return res | AR_STAT_OVERFLOW; 00092 } 00093 00094 /* Test for denorms (they have zero exponents) to determine the 00095 * values of the implicit normalization bits; make them explicit. 00096 */ 00097 if (ar_state_register.ar_denorms_trap && 00098 ((!a->expo && IS_IEEE32_NZ_COEFF(a)) || 00099 (!b->expo && IS_IEEE32_NZ_COEFF(b)))) { 00100 /* operand is a denorm and denorms cause a trap */ 00101 x->expo = AR_IEEE32_MAX_EXPO + 1; 00102 return res | AR_STAT_UNDEFINED; 00103 } 00104 ZEROIEEE32 (y); 00105 y.coeff1 = !!a->expo; 00106 y2 = *a; 00107 z = *b; 00108 z_lbits = !!b->expo; 00109 x_expo = a->expo + b->expo + !a->expo + !b->expo - AR_IEEE32_EXPO_BIAS; 00110 if (x_expo <= 0) 00111 x_expo--; 00112 i = a->sign ^ b->sign; 00113 00114 /* Sum the pyramid */ 00115 if (z.coeff1 & 1) { 00116 x2 = *a; 00117 *x = y; 00118 } 00119 else { 00120 ZEROIEEE32 (*x); 00121 ZEROIEEE32 (x2); 00122 } 00123 x->sign = i; 00124 x_lbits = z_lbits & y.coeff1; 00125 SHLEFTIEEE32_2 (y, y2); 00126 SHRIGHTIEEE32 (z); 00127 z.coeff0 |= z_lbits << (AR_IEEE32_C0_BITS - 1); 00128 for (i = 1; i < AR_IEEE32_COEFF_BITS + 1; i++) { 00129 00130 if (z.coeff1 & 1) { 00131 carry = 0; 00132 ADDIEEE32 (x2, carry, x2, y2); 00133 ADDIEEE32 (*x, carry, *x, y); 00134 x_lbits += carry; 00135 } 00136 00137 SHLEFTIEEE32_2 (y, y2); 00138 SHRIGHTIEEE32 (z); 00139 } 00140 00141 /* Extract rounding bits */ 00142 rbits = x2.coeff0 >> (AR_IEEE32_C0_BITS - AR_IEEE32_ROUND_BITS); 00143 if (x2.coeff0 & MASKR (AR_IEEE32_C0_BITS - AR_IEEE32_ROUND_BITS) | 00144 x2.coeff1) 00145 rbits |= 1; /* sticky bit */ 00146 00147 /* Normalize and round */ 00148 return ar_i32norm (x_expo, x_lbits, rbits, x, roundmode); 00149 } 00150 00151 00152 00153 int 00154 ar_ifmul64 (AR_IEEE_64 *x, 00155 const AR_IEEE_64 *a, 00156 const AR_IEEE_64 *b, 00157 int roundmode) 00158 { 00159 00160 int i, res = AR_STAT_OK; 00161 unsigned long x_lbits, z_lbits, rbits, carry; 00162 signed int x_expo; 00163 AR_IEEE_64 x2, y, y2, z; 00164 00165 /* If either a or b is a NaN, it's the result. */ 00166 if (IS_IEEE64_NaN(a)) { 00167 *x = *a; 00168 return res | AR_STAT_UNDEFINED; 00169 } 00170 if (IS_IEEE64_NaN(b)) { 00171 *x = *b; 00172 return res | AR_STAT_UNDEFINED; 00173 } 00174 00175 /* Test for infinities */ 00176 if (b->expo > AR_IEEE64_MAX_EXPO) 00177 if (a->expo == 0 && !IS_IEEE64_NZ_COEFF(a)) { 00178 /* zero * inf = quiet NaN */ 00179 QNaNIEEE64 (x); 00180 return res | AR_STAT_UNDEFINED; 00181 } else { 00182 /* anything * infinity = infinity */ 00183 if (i = a->sign ^ b->sign) 00184 res |= AR_STAT_NEGATIVE; 00185 *x = *b; 00186 x->sign = i; 00187 return res | AR_STAT_OVERFLOW; 00188 } 00189 if (a->expo > AR_IEEE64_MAX_EXPO) 00190 if (b->expo == 0 && !IS_IEEE64_NZ_COEFF(b)) { 00191 /* infinity * zero = quiet NaN */ 00192 QNaNIEEE64 (x); 00193 return res | AR_STAT_UNDEFINED; 00194 } else { 00195 /* infinity * anything = infinity */ 00196 if (i = a->sign ^ b->sign) 00197 res |= AR_STAT_NEGATIVE; 00198 *x = *a; 00199 x->sign = i; 00200 return res | AR_STAT_OVERFLOW; 00201 } 00202 00203 /* Test for denorms (they have zero exponents) to determine the 00204 * values of the implicit normalization bits; make them explicit. 00205 */ 00206 if (ar_state_register.ar_denorms_trap && 00207 ((!a->expo && IS_IEEE64_NZ_COEFF(a)) || 00208 (!b->expo && IS_IEEE64_NZ_COEFF(b)))) { 00209 /* operand is a denorm and denorms cause a trap */ 00210 x->expo = AR_IEEE64_MAX_EXPO + 1; 00211 return res | AR_STAT_UNDEFINED; 00212 } 00213 ZEROIEEE64 (y); 00214 y.coeff3 = !!a->expo; 00215 y2 = *a; 00216 z = *b; 00217 z_lbits = !!b->expo; 00218 x_expo = a->expo + b->expo + !a->expo + !b->expo - AR_IEEE64_EXPO_BIAS; 00219 if (x_expo <= 0) 00220 x_expo--; 00221 i = a->sign ^ b->sign; 00222 00223 /* Sum the pyramid */ 00224 if (z.coeff3 & 1) { 00225 x2 = *a; 00226 *x = y; 00227 } 00228 else { 00229 ZEROIEEE64 (*x); 00230 ZEROIEEE64 (x2); 00231 } 00232 x->sign = i; 00233 x_lbits = z_lbits & y.coeff3; 00234 SHLEFTIEEE64_2 (y, y2); 00235 SHRIGHTIEEE64 (z); 00236 z.coeff0 |= z_lbits << (AR_IEEE64_C0_BITS - 1); 00237 for (i = 1; i < AR_IEEE64_COEFF_BITS + 1; i++) { 00238 00239 if (z.coeff3 & 1) { 00240 carry = 0; 00241 ADDIEEE64 (x2, carry, x2, y2); 00242 ADDIEEE64 (*x, carry, *x, y); 00243 x_lbits += carry; 00244 } 00245 00246 SHLEFTIEEE64_2 (y, y2); 00247 SHRIGHTIEEE64 (z); 00248 } 00249 00250 /* Extract rounding bits */ 00251 rbits = x2.coeff0 >> (AR_IEEE64_C0_BITS - AR_IEEE64_ROUND_BITS); 00252 if (x2.coeff0 & MASKR (AR_IEEE64_C0_BITS - AR_IEEE64_ROUND_BITS) | 00253 x2.coeff1 | x2.coeff2 | x2.coeff3) 00254 rbits |= 1; /* sticky bit */ 00255 00256 /* Normalize and round */ 00257 return ar_i64norm (x_expo, x_lbits, rbits, x, roundmode); 00258 } 00259 00260 #ifdef __mips 00261 00262 int 00263 ar_ifmul128(AR_IEEE_128 *x, 00264 const AR_IEEE_128 *a, 00265 const AR_IEEE_128 *b, 00266 int roundmode) 00267 { 00268 AR_TYPE ty = AR_Float_IEEE_NR_128; 00269 *(long double *)x = *(long double *)a * *(long double *)b; 00270 return AR_status(x, &ty); 00271 } 00272 00273 #else 00274 00275 int 00276 ar_ifmul128(AR_IEEE_128 *x, 00277 const AR_IEEE_128 *a, 00278 const AR_IEEE_128 *b, 00279 int roundmode) 00280 { 00281 00282 int i, res = AR_STAT_OK; 00283 unsigned long x_lbits, z_lbits, rbits, carry; 00284 signed int x_expo; 00285 AR_IEEE_128 x2, y, y2, z; 00286 00287 /* 00288 * Use native arithmetic for MIPS. 00289 */ 00290 if (HOST_IS_MIPS) { 00291 AR_TYPE ty = AR_Float_IEEE_NR_128; 00292 00293 *(long double *)x = *(long double *)a * *(long double *)b; 00294 return AR_status((AR_DATA *) x, &ty); 00295 } 00296 00297 /* If either a or b is a NaN, it's the result. */ 00298 if (IS_IEEE128_NaN(a)) { 00299 *x = *a; 00300 return res | AR_STAT_UNDEFINED; 00301 } 00302 if (IS_IEEE128_NaN(b)) { 00303 *x = *b; 00304 return res | AR_STAT_UNDEFINED; 00305 } 00306 00307 /* Test for infinities */ 00308 if (b->expo > AR_IEEE128_MAX_EXPO) 00309 if (a->expo == 0 && !IS_IEEE128_NZ_COEFF(a)) { 00310 /* zero * inf = quiet NaN */ 00311 QNaNIEEE128 (x); 00312 return res | AR_STAT_UNDEFINED; 00313 } else { 00314 /* anything * infinity = infinity */ 00315 if (i = a->sign ^ b->sign) 00316 res |= AR_STAT_NEGATIVE; 00317 *x = *b; 00318 x->sign = i; 00319 return res | AR_STAT_OVERFLOW; 00320 } 00321 if (a->expo > AR_IEEE128_MAX_EXPO) 00322 if (b->expo == 0 && !IS_IEEE128_NZ_COEFF(b)) { 00323 /* infinity * zero = quietNaN */ 00324 QNaNIEEE128 (x); 00325 return res | AR_STAT_UNDEFINED; 00326 } else { 00327 /* infinity * anything = infinity */ 00328 if (i = a->sign ^ b->sign) 00329 res |= AR_STAT_NEGATIVE; 00330 *x = *a; 00331 x->sign = i; 00332 return res | AR_STAT_OVERFLOW; 00333 } 00334 00335 /* Test for denorms (they have zero exponents) to determine the 00336 * values of the implicit normalization bits; make them explicit. 00337 */ 00338 if (ar_state_register.ar_denorms_trap && 00339 ((!a->expo && IS_IEEE128_NZ_COEFF(a)) || 00340 (!b->expo && IS_IEEE128_NZ_COEFF(b)))) { 00341 /* operand is a denorm and denorms cause a trap */ 00342 x->expo = AR_IEEE128_MAX_EXPO + 1; 00343 return res | AR_STAT_UNDEFINED; 00344 } 00345 ZEROIEEE128 (y); 00346 y.coeff6 = !!a->expo; 00347 y2 = *a; 00348 z = *b; 00349 z_lbits = !!b->expo; 00350 x_expo = a->expo + b->expo + !a->expo + !b->expo - AR_IEEE128_EXPO_BIAS; 00351 if (x_expo <= 0) 00352 x_expo--; 00353 i = a->sign ^ b->sign; 00354 00355 /* Sum the pyramid */ 00356 if (z.coeff6 & 1) { 00357 x2 = *a; 00358 *x = y; 00359 } 00360 else { 00361 ZEROIEEE128 (*x); 00362 ZEROIEEE128 (x2); 00363 } 00364 x->sign = i; 00365 x_lbits = z_lbits & y.coeff6; 00366 SHLEFTIEEE128_2 (y, y2); 00367 SHRIGHTIEEE128 (z); 00368 z.coeff0 |= z_lbits << (AR_IEEE128_C0_BITS - 1); 00369 for (i = 1; i < AR_IEEE128_COEFF_BITS + 1; i++) { 00370 00371 if (z.coeff6 & 1) { 00372 carry = 0; 00373 ADDIEEE128 (x2, carry, x2, y2); 00374 ADDIEEE128 (*x, carry, *x, y); 00375 x_lbits += carry; 00376 } 00377 00378 SHLEFTIEEE128_2 (y, y2); 00379 SHRIGHTIEEE128 (z); 00380 } 00381 00382 /* Extract rounding bits */ 00383 rbits = x2.coeff0 >> (AR_IEEE128_C0_BITS - AR_IEEE128_ROUND_BITS); 00384 if (x2.coeff0 & MASKR (AR_IEEE128_C0_BITS - AR_IEEE128_ROUND_BITS) | 00385 x2.coeff1 | x2.coeff2 | x2.coeff3 | x2.coeff3 | x2.coeff5 | x2.coeff6) 00386 rbits |= 1; /* sticky bit */ 00387 00388 /* Normalize and round */ 00389 return ar_i128norm (x_expo, x_lbits, rbits, x, roundmode); 00390 } 00391 #endif 00392 00393 00394 static char USMID [] = "\n%Z%%M% %I% %G% %U%\n"; 00395 static char rcsid [] = "$Id: ieee_fmul.c,v 1.1.1.1 2002-05-22 20:06:19 dsystem Exp $";