Open64 (mfef90, whirl2f, and IR tools)
TAG: version-openad; SVN changeset: 916
|
00001 /* 00002 00003 Copyright (C) 2000, 2001 Silicon Graphics, Inc. All Rights Reserved. 00004 00005 This program is free software; you can redistribute it and/or modify it 00006 under the terms of version 2 of the GNU General Public License as 00007 published by the Free Software Foundation. 00008 00009 This program is distributed in the hope that it would be useful, but 00010 WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00012 00013 Further, this software is distributed without any warranty that it is 00014 free of the rightful claim of any third person regarding infringement 00015 or the like. Any license provided herein, whether implied or 00016 otherwise, applies only to this software file. Patent licenses, if 00017 any, provided herein do not apply to combinations of this program with 00018 other software, or any other product whatsoever. 00019 00020 You should have received a copy of the GNU General Public License along 00021 with this program; if not, write the Free Software Foundation, Inc., 59 00022 Temple Place - Suite 330, Boston MA 02111-1307, USA. 00023 00024 Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pky, 00025 Mountain View, CA 94043, or: 00026 00027 http://www.sgi.com 00028 00029 For further information regarding this notice, see: 00030 00031 http://oss.sgi.com/projects/GenInfo/NoticeExplan 00032 00033 */ 00034 00035 00036 00037 /* 00038 * Portable IEEE 754 floating-point addition and subtraction 00039 */ 00040 00041 #include "arith.internal.h" 00042 00043 00044 int 00045 ar_ifadd32 (AR_IEEE_32 *x, 00046 const AR_IEEE_32 *a, 00047 const AR_IEEE_32 *b, 00048 int roundmode) 00049 { 00050 00051 int res = AR_STAT_OK; 00052 unsigned int lbits, y_lbits, rbits; 00053 unsigned int shift, diffsign, carry, x_expo; 00054 AR_IEEE_32 y; 00055 00056 /* Ensure that the first argument has the largest exponent, 00057 * for simplicity. 00058 */ 00059 if (b->expo > a->expo) 00060 y = *a, *x = *b; 00061 else 00062 y = *b, *x = *a; 00063 x_expo = x->expo; 00064 00065 /* If either x or y is a NaN, it's the result. */ 00066 if (IS_IEEE32_NaN(x)) { 00067 return res | AR_STAT_UNDEFINED; 00068 } 00069 if (IS_IEEE32_NaN(&y)) { 00070 *x = y; 00071 return res | AR_STAT_UNDEFINED; 00072 } 00073 00074 /* Test for infinities */ 00075 if (x_expo > AR_IEEE32_MAX_EXPO) 00076 if (y.expo > AR_IEEE32_MAX_EXPO) 00077 if (x->sign == y.sign) { 00078 /* infinities of same sign - return infinity */ 00079 if (x->sign) 00080 res |= AR_STAT_NEGATIVE; 00081 return res | AR_STAT_OVERFLOW; 00082 } else { 00083 /* infinities of different signs - quiet NaN */ 00084 QNaNIEEE32 (x); 00085 return res | AR_STAT_UNDEFINED; 00086 } 00087 else { 00088 /* infinity + finite quantity - return infinity */ 00089 if (x->sign) 00090 res |= AR_STAT_NEGATIVE; 00091 return res | AR_STAT_OVERFLOW; 00092 } 00093 else if (y.expo > AR_IEEE32_MAX_EXPO) { 00094 *x = y; 00095 if (x->sign) 00096 res |= AR_STAT_NEGATIVE; 00097 return res | AR_STAT_OVERFLOW; 00098 } 00099 00100 /* Test for denorms (they have zero exponents) to determine the 00101 * values of the implicit normalization bits. 00102 */ 00103 if (ar_state_register.ar_denorms_trap && 00104 ((!x_expo && IS_IEEE32_NZ_COEFF(x)) || 00105 (!y.expo && IS_IEEE32_NZ_COEFF(&y)))) { 00106 /* operand is a denorm and denorms cause a trap */ 00107 x->expo = AR_IEEE32_MAX_EXPO + 1; 00108 return res | AR_STAT_UNDEFINED; 00109 } 00110 lbits = !!x_expo; 00111 y_lbits = !!y.expo; 00112 00113 /* Shift the coefficient of the second argument down (right). We 00114 * do this in parts so as not to overflow registers. 00115 */ 00116 rbits = 0; 00117 shift = (x_expo - lbits) - (y.expo - y_lbits); 00118 if (shift > AR_IEEE32_COEFF_BITS + AR_IEEE32_ROUND_BITS) { 00119 rbits = !!(y.coeff0 | y.coeff1); 00120 y.sign = y.expo = y_lbits = 0; 00121 y.coeff0 = y.coeff1 = 0; 00122 } else 00123 for (; shift; shift--) { 00124 /* Sticky bit shifting */ 00125 rbits = (rbits & 1) | (rbits >> 1) | 00126 ((y.coeff1 & 1) << (AR_IEEE32_ROUND_BITS-1)); 00127 SHRIGHTIEEE32 (y); 00128 y.coeff0 |= y_lbits << (AR_IEEE32_C0_BITS - 1); 00129 y_lbits = 0; 00130 } 00131 00132 /* If signs differ, complement the first argument; this is equivalent 00133 * to negating and then subtracting one, in a two's complement sense. 00134 */ 00135 if (diffsign = (x->sign ^ y.sign)) { 00136 lbits ^= MASKR (2); 00137 NOTIEEE32 (*x); 00138 } 00139 00140 /* Compute sum of coefficients */ 00141 if (diffsign) { 00142 rbits += MASKR (AR_IEEE32_ROUND_BITS); 00143 carry = rbits >> AR_IEEE32_ROUND_BITS; 00144 rbits &= MASKR (AR_IEEE32_ROUND_BITS); 00145 } else 00146 carry = 0; 00147 ADDIEEE32 (*x, carry, *x, y); 00148 lbits = (carry + lbits + y_lbits) & MASKR (2); 00149 00150 /* Opposite signs */ 00151 if (diffsign) 00152 if (lbits & 2) { 00153 /* Complement */ 00154 lbits ^= MASKR (2); 00155 NOTIEEE32 (*x); 00156 rbits ^= MASKR (AR_IEEE32_ROUND_BITS); 00157 } else { 00158 /* End-around carry */ 00159 x->sign ^= 1; 00160 carry = 1 + rbits; 00161 rbits = carry & MASKR (AR_IEEE32_ROUND_BITS); 00162 carry >>= AR_IEEE32_ROUND_BITS; 00163 INCIEEE32 (*x, carry); 00164 lbits = (carry + lbits) & MASKR (2); 00165 } 00166 00167 /* Shift rightward if carry, or if undenormalized */ 00168 if (lbits & 2 || lbits && !x_expo) { 00169 rbits = (rbits >> 1) | rbits & 1 | 00170 ((x->coeff1 & 1) << (AR_IEEE32_ROUND_BITS - 1)); 00171 SHRIGHTIEEE32 (*x); 00172 x->coeff0 |= lbits << (AR_IEEE32_C0_BITS - 1); 00173 lbits >>= 1; 00174 if (x_expo) 00175 x_expo++; 00176 else 00177 x_expo = 2; 00178 } 00179 00180 /* Check for a zero result, as a special case */ 00181 if (!(lbits | x->coeff0 | x->coeff1 | rbits)) { 00182 x->sign &= !diffsign; /* -0 + -0 == -0 */ 00183 x->expo = 0; 00184 if (x->sign) 00185 res |= AR_STAT_NEGATIVE; 00186 return res | AR_STAT_ZERO; 00187 } 00188 00189 return ar_i32norm (x_expo, lbits, rbits, x, roundmode); 00190 } 00191 00192 00193 int 00194 ar_ifsub32 (AR_IEEE_32 *x, 00195 const AR_IEEE_32 *a, 00196 const AR_IEEE_32 *b, 00197 int roundmode) 00198 { 00199 00200 AR_IEEE_32 nb; 00201 00202 /* If either a or b is a NaN, it's the result. */ 00203 if (IS_IEEE32_NaN(a)) { 00204 *x = *a; 00205 return AR_STAT_UNDEFINED; 00206 } 00207 if (IS_IEEE32_NaN(b)) { 00208 *x = *b; 00209 return AR_STAT_UNDEFINED; 00210 } 00211 00212 nb = *b; 00213 nb.sign ^= 1; 00214 return ar_ifadd32 (x, a, &nb, roundmode); 00215 } 00216 00217 00218 int 00219 ar_ifadd64 (AR_IEEE_64 *x, 00220 const AR_IEEE_64 *a, 00221 const AR_IEEE_64 *b, 00222 int roundmode) 00223 { 00224 00225 int res = AR_STAT_OK; 00226 unsigned int lbits, y_lbits, rbits; 00227 unsigned int shift, diffsign, carry, x_expo; 00228 AR_IEEE_64 y; 00229 00230 /* Ensure that the first argument has the largest exponent, 00231 * for simplicity. 00232 */ 00233 if (b->expo > a->expo) 00234 y = *a, *x = *b; 00235 else 00236 y = *b, *x = *a; 00237 x_expo = x->expo; 00238 00239 /* If either x or y is a NaN, it's the result. */ 00240 if (IS_IEEE64_NaN(x)) { 00241 return res | AR_STAT_UNDEFINED; 00242 } 00243 if (IS_IEEE64_NaN(&y)) { 00244 *x = y; 00245 return res | AR_STAT_UNDEFINED; 00246 } 00247 00248 /* Test for infinities */ 00249 if (x_expo > AR_IEEE64_MAX_EXPO) 00250 if (y.expo > AR_IEEE64_MAX_EXPO) 00251 if (x->sign == y.sign) { 00252 /* infinities of same sign - return infinity */ 00253 if (x->sign) 00254 res |= AR_STAT_NEGATIVE; 00255 return res | AR_STAT_OVERFLOW; 00256 } else { 00257 /* infinities of different signs - quiet NaN */ 00258 QNaNIEEE64 (x); 00259 return res | AR_STAT_UNDEFINED; 00260 } 00261 else { 00262 /* infinity + finite quantity - return infinity */ 00263 if (x->sign) 00264 res |= AR_STAT_NEGATIVE; 00265 return res | AR_STAT_OVERFLOW; 00266 } 00267 else if (y.expo > AR_IEEE64_MAX_EXPO) { 00268 *x = y; 00269 if (x->sign) 00270 res |= AR_STAT_NEGATIVE; 00271 return res | AR_STAT_OVERFLOW; 00272 } 00273 00274 /* Test for denorms (they have zero exponents) to determine the 00275 * values of the implicit normalization bits. 00276 */ 00277 if (ar_state_register.ar_denorms_trap && 00278 ((!x_expo && IS_IEEE64_NZ_COEFF(x)) || 00279 (!y.expo && IS_IEEE64_NZ_COEFF(&y)))) { 00280 /* operand is a denorm and denorms cause a trap */ 00281 x->expo = AR_IEEE64_MAX_EXPO + 1; 00282 return res | AR_STAT_UNDEFINED; 00283 } 00284 lbits = !!x_expo; 00285 y_lbits = !!y.expo; 00286 00287 /* Shift the coefficient of the second argument down (right). We 00288 * do this in parts so as not to overflow registers. 00289 */ 00290 rbits = 0; 00291 shift = (x_expo - lbits) - (y.expo - y_lbits); 00292 if (shift > AR_IEEE64_COEFF_BITS + AR_IEEE64_ROUND_BITS) { 00293 rbits = !!(y.coeff0 | y.coeff1 | y.coeff2 | y.coeff3); 00294 y.sign = y.expo = y_lbits = 0; 00295 y.coeff0 = y.coeff1 = y.coeff2 = y.coeff3 = 0; 00296 } else 00297 for (; shift; shift--) { 00298 /* Sticky bit shifting */ 00299 rbits = (rbits & 1) | (rbits >> 1) | 00300 ((y.coeff3 & 1) << (AR_IEEE64_ROUND_BITS-1)); 00301 SHRIGHTIEEE64 (y); 00302 y.coeff0 |= y_lbits << (AR_IEEE64_C0_BITS - 1); 00303 y_lbits = 0; 00304 } 00305 00306 /* If signs differ, complement the first argument; this is equivalent 00307 * to negating and then subtracting one, in a two's complement sense. 00308 */ 00309 if (diffsign = (x->sign ^ y.sign)) { 00310 lbits ^= MASKR (2); 00311 NOTIEEE64 (*x); 00312 } 00313 00314 /* Compute sum of coefficients */ 00315 if (diffsign) { 00316 rbits += MASKR (AR_IEEE64_ROUND_BITS); 00317 carry = rbits >> AR_IEEE64_ROUND_BITS; 00318 rbits &= MASKR (AR_IEEE64_ROUND_BITS); 00319 } else 00320 carry = 0; 00321 ADDIEEE64 (*x, carry, *x, y); 00322 lbits = (carry + lbits + y_lbits) & MASKR (2); 00323 00324 /* Opposite signs */ 00325 if (diffsign) 00326 if (lbits & 2) { 00327 /* Complement */ 00328 lbits ^= MASKR (2); 00329 NOTIEEE64 (*x); 00330 rbits ^= MASKR (AR_IEEE64_ROUND_BITS); 00331 } else { 00332 /* End-around carry */ 00333 x->sign ^= 1; 00334 carry = 1 + rbits; 00335 rbits = carry & MASKR (AR_IEEE64_ROUND_BITS); 00336 carry >>= AR_IEEE64_ROUND_BITS; 00337 INCIEEE64 (*x, carry); 00338 lbits = (carry + lbits) & MASKR (2); 00339 } 00340 00341 /* Shift rightward if carry, or if undenormalized */ 00342 if (lbits & 2 || lbits && !x_expo) { 00343 rbits = (rbits >> 1) | rbits & 1 | 00344 ((x->coeff3 & 1) << (AR_IEEE64_ROUND_BITS - 1)); 00345 SHRIGHTIEEE64 (*x); 00346 x->coeff0 |= lbits << (AR_IEEE64_C0_BITS - 1); 00347 lbits >>= 1; 00348 if (x_expo) 00349 x_expo++; 00350 else 00351 x_expo = 2; 00352 } 00353 00354 /* Check for a zero result, as a special case */ 00355 if (!(lbits | x->coeff0|x->coeff1|x->coeff2|x->coeff3 | rbits)) { 00356 x->sign &= !diffsign; /* -0 + -0 == -0 */ 00357 x->expo = 0; 00358 if (x->sign) 00359 res |= AR_STAT_NEGATIVE; 00360 return res | AR_STAT_ZERO; 00361 } 00362 00363 return ar_i64norm (x_expo, lbits, rbits, x, roundmode); 00364 } 00365 00366 00367 int 00368 ar_ifsub64 (AR_IEEE_64 *x, 00369 const AR_IEEE_64 *a, 00370 const AR_IEEE_64 *b, 00371 int roundmode) 00372 { 00373 00374 AR_IEEE_64 nb; 00375 00376 /* If either a or b is a NaN, it's the result. */ 00377 if (IS_IEEE64_NaN(a)) { 00378 *x = *a; 00379 return AR_STAT_UNDEFINED; 00380 } 00381 if (IS_IEEE64_NaN(b)) { 00382 *x = *b; 00383 return AR_STAT_UNDEFINED; 00384 } 00385 00386 nb = *b; 00387 nb.sign ^= 1; 00388 return ar_ifadd64 (x, a, &nb, roundmode); 00389 } 00390 00391 #ifdef __mips 00392 /* Use native addition for MIPS */ 00393 int 00394 ar_ifadd128(AR_IEEE_128 *x, 00395 const AR_IEEE_128 *a, 00396 const AR_IEEE_128 *b, 00397 int roundmode) 00398 { 00399 AR_TYPE ty = AR_Float_IEEE_NR_128; 00400 *(long double *)x = *(long double *)a + *(long double *)b; 00401 return AR_status(x, &ty); 00402 } 00403 00404 int 00405 ar_ifsub128( AR_IEEE_128 *x, 00406 const AR_IEEE_128 *a, 00407 const AR_IEEE_128 *b, 00408 int roundmode) 00409 { 00410 AR_TYPE ty = AR_Float_IEEE_NR_128; 00411 *(long double *)x = *(long double *)a - *(long double *)b; 00412 return AR_status(x, &ty); 00413 } 00414 00415 #else 00416 00417 int 00418 ar_ifadd128(AR_IEEE_128 *x, 00419 const AR_IEEE_128 *a, 00420 const AR_IEEE_128 *b, 00421 int roundmode) 00422 { 00423 00424 int res = AR_STAT_OK; 00425 unsigned int lbits, y_lbits, rbits; 00426 unsigned int shift, diffsign, carry, x_expo; 00427 AR_IEEE_128 y; 00428 00429 /* 00430 * Use native arithmetic for MIPS. 00431 */ 00432 if (HOST_IS_MIPS) { 00433 AR_TYPE ty = AR_Float_IEEE_NR_128; 00434 00435 *(long double *)x = *(long double *)a + *(long double *)b; 00436 return AR_status((AR_DATA *) x, &ty); 00437 } 00438 00439 /* Ensure that the first argument has the largest exponent, 00440 * for simplicity. 00441 */ 00442 if (b->expo > a->expo) 00443 y = *a, *x = *b; 00444 else 00445 y = *b, *x = *a; 00446 x_expo = x->expo; 00447 00448 /* If either x or y is a NaN, it's the result. */ 00449 if (IS_IEEE128_NaN(x)) { 00450 return res | AR_STAT_UNDEFINED; 00451 } 00452 if (IS_IEEE128_NaN(&y)) { 00453 *x = y; 00454 return res | AR_STAT_UNDEFINED; 00455 } 00456 00457 /* Test for infinities */ 00458 if (x_expo > AR_IEEE128_MAX_EXPO) 00459 if (y.expo > AR_IEEE128_MAX_EXPO) 00460 if (x->sign == y.sign) { 00461 /* infinities of same sign - return infinity */ 00462 if (x->sign) 00463 res |= AR_STAT_NEGATIVE; 00464 return res | AR_STAT_OVERFLOW; 00465 } else { 00466 /* infinities of different signs - quiet NaN */ 00467 QNaNIEEE128 (x); 00468 return res | AR_STAT_UNDEFINED; 00469 } 00470 else { 00471 /* infinity + finite quantity - return infinity */ 00472 if (x->sign) 00473 res |= AR_STAT_NEGATIVE; 00474 return res | AR_STAT_OVERFLOW; 00475 } 00476 else if (y.expo > AR_IEEE128_MAX_EXPO) { 00477 *x = y; 00478 if (x->sign) 00479 res |= AR_STAT_NEGATIVE; 00480 return res | AR_STAT_OVERFLOW; 00481 } 00482 00483 /* Test for denorms (they have zero exponents) to determine the 00484 * values of the implicit normalization bits. 00485 */ 00486 if (ar_state_register.ar_denorms_trap && 00487 ((!x_expo && IS_IEEE128_NZ_COEFF(x)) || 00488 (!y.expo && IS_IEEE128_NZ_COEFF(&y)))) { 00489 /* operand is a denorm and denorms cause a trap */ 00490 x->expo = AR_IEEE128_MAX_EXPO + 1; 00491 return res | AR_STAT_UNDEFINED; 00492 } 00493 lbits = !!x_expo; 00494 y_lbits = !!y.expo; 00495 00496 /* Shift the coefficient of the second argument down (right). We 00497 * do this in parts so as not to overflow registers. 00498 */ 00499 rbits = 0; 00500 shift = (x_expo - lbits) - (y.expo - y_lbits); 00501 if (shift > AR_IEEE128_COEFF_BITS + AR_IEEE128_ROUND_BITS) { 00502 rbits = !!(y.coeff0 | y.coeff1 | y.coeff2 | 00503 y.coeff3 | y.coeff4 | y.coeff5 | y.coeff6); 00504 y.sign = y.expo = y_lbits = 0; 00505 y.coeff0 = y.coeff1 = y.coeff2 = 00506 y.coeff3 = y.coeff4 = y.coeff5 = y.coeff6 = 0; 00507 } else 00508 for (; shift; shift--) { 00509 /* Sticky bit shifting */ 00510 rbits = (rbits & 1) | (rbits >> 1) | 00511 ((y.coeff6 & 1) << (AR_IEEE128_ROUND_BITS-1)); 00512 SHRIGHTIEEE128 (y); 00513 y.coeff0 |= y_lbits << (AR_IEEE128_C0_BITS - 1); 00514 y_lbits = 0; 00515 } 00516 00517 /* If signs differ, complement the first argument; this is equivalent 00518 * to negating and then subtracting one, in a two's complement sense. 00519 */ 00520 if (diffsign = (x->sign ^ y.sign)) { 00521 lbits ^= MASKR (2); 00522 NOTIEEE128 (*x); 00523 } 00524 00525 /* Compute sum of coefficients */ 00526 if (diffsign) { 00527 rbits += MASKR (AR_IEEE128_ROUND_BITS); 00528 carry = rbits >> AR_IEEE128_ROUND_BITS; 00529 rbits &= MASKR (AR_IEEE128_ROUND_BITS); 00530 } else 00531 carry = 0; 00532 ADDIEEE128 (*x, carry, *x, y); 00533 lbits = (carry + lbits + y_lbits) & MASKR (2); 00534 00535 /* Opposite signs */ 00536 if (diffsign) 00537 if (lbits & 2) { 00538 /* Complement */ 00539 lbits ^= MASKR (2); 00540 NOTIEEE128 (*x); 00541 rbits ^= MASKR (AR_IEEE128_ROUND_BITS); 00542 } else { 00543 /* End-around carry */ 00544 x->sign ^= 1; 00545 carry = 1 + rbits; 00546 rbits = carry & MASKR (AR_IEEE128_ROUND_BITS); 00547 carry >>= AR_IEEE128_ROUND_BITS; 00548 INCIEEE128 (*x, carry); 00549 lbits = (carry + lbits) & MASKR (2); 00550 } 00551 00552 /* Shift rightward if carry, or if undenormalized */ 00553 if (lbits & 2 || lbits && !x_expo) { 00554 rbits = (rbits >> 1) | rbits & 1 | 00555 ((x->coeff6 & 1) << (AR_IEEE128_ROUND_BITS - 1)); 00556 SHRIGHTIEEE128 (*x); 00557 x->coeff0 |= lbits << (AR_IEEE128_C0_BITS - 1); 00558 lbits >>= 1; 00559 if (x_expo) 00560 x_expo++; 00561 else 00562 x_expo = 2; 00563 } 00564 00565 /* Check for a zero result, as a special case */ 00566 if (!(lbits | rbits | x->coeff0 | x->coeff1 | x->coeff2 | 00567 x->coeff3 | x->coeff4 | x->coeff5 | x->coeff6)) { 00568 x->sign &= !diffsign; /* -0 + -0 == -0 */ 00569 x->expo = 0; 00570 if (x->sign) 00571 res |= AR_STAT_NEGATIVE; 00572 return res | AR_STAT_ZERO; 00573 } 00574 00575 return ar_i128norm (x_expo, lbits, rbits, x, roundmode); 00576 } 00577 00578 00579 int 00580 ar_ifsub128(AR_IEEE_128 *x, 00581 const AR_IEEE_128 *a, 00582 const AR_IEEE_128 *b, 00583 int roundmode) 00584 { 00585 00586 AR_IEEE_128 nb; 00587 00588 /* If either a or b is a NaN, it's the result. */ 00589 if (IS_IEEE128_NaN(a)) { 00590 *x = *a; 00591 return AR_STAT_UNDEFINED; 00592 } 00593 if (IS_IEEE128_NaN(b)) { 00594 *x = *b; 00595 return AR_STAT_UNDEFINED; 00596 } 00597 00598 nb = *b; 00599 nb.sign ^= 1; 00600 return ar_ifadd128 (x, a, &nb, roundmode); 00601 } 00602 #endif /* __mips */ 00603 00604 00605 static char USMID [] = "\n%Z%%M% %I% %G% %U%\n"; 00606 static char rcsid [] = "$Id: ieee_fadd.c,v 1.1.1.1 2002-05-22 20:06:18 dsystem Exp $";