OpenAD/Open64Dox/ieee__fadd_8c_source.html

00001 /*
00002
00003   Copyright (C) 2000, 2001 Silicon Graphics, Inc.  All Rights Reserved.
00004
00005   This program is free software; you can redistribute it and/or modify it
00006   under the terms of version 2 of the GNU General Public License as
00007   published by the Free Software Foundation.
00008
00009   This program is distributed in the hope that it would be useful, but
00010   WITHOUT ANY WARRANTY; without even the implied warranty of
00011   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
00012
00013   Further, this software is distributed without any warranty that it is
00014   free of the rightful claim of any third person regarding infringement
00015   or the like.  Any license provided herein, whether implied or
00016   otherwise, applies only to this software file.  Patent licenses, if
00017   any, provided herein do not apply to combinations of this program with
00018   other software, or any other product whatsoever.
00019
00020   You should have received a copy of the GNU General Public License along
00021   with this program; if not, write the Free Software Foundation, Inc., 59
00022   Temple Place - Suite 330, Boston MA 02111-1307, USA.
00023
00024   Contact information:  Silicon Graphics, Inc., 1600 Amphitheatre Pky,
00025   Mountain View, CA 94043, or:
00026
00027   http://www.sgi.com
00028
00029   For further information regarding this notice, see:
00030
00031   http://oss.sgi.com/projects/GenInfo/NoticeExplan
00032
00033 */
00034
00035
00036
00037 /*
00038  *      Portable IEEE 754 floating-point addition and subtraction
00039  */
00040
00041 #include "arith.internal.h"
00042
00043
00044 int
00045 ar_ifadd32 (AR_IEEE_32 *x,
00046                         const AR_IEEE_32 *a,
00047                         const AR_IEEE_32 *b,
00048                         int roundmode)
00049 {
00050
00051         int res = AR_STAT_OK;
00052         unsigned int lbits, y_lbits, rbits;
00053         unsigned int shift, diffsign, carry, x_expo;
00054         AR_IEEE_32 y;
00055
00056         /* Ensure that the first argument has the largest exponent,
00057          * for simplicity.
00058          */
00059         if (b->expo > a->expo)
00060                 y = *a, *x = *b;
00061         else
00062                 y = *b, *x = *a;
00063         x_expo = x->expo;
00064
00065         /* If either x or y is a NaN, it's the result. */
00066         if (IS_IEEE32_NaN(x)) {
00067                 return res | AR_STAT_UNDEFINED;
00068         }
00069         if (IS_IEEE32_NaN(&y)) {
00070                 *x = y;
00071                 return res | AR_STAT_UNDEFINED;
00072         }
00073
00074         /* Test for infinities */
00075         if (x_expo > AR_IEEE32_MAX_EXPO)
00076                 if (y.expo > AR_IEEE32_MAX_EXPO)
00077                         if (x->sign == y.sign) {
00078                                 /* infinities of same sign - return infinity */
00079                                 if (x->sign)
00080                                         res |= AR_STAT_NEGATIVE;
00081                                 return res | AR_STAT_OVERFLOW;
00082                         } else {
00083                                 /* infinities of different signs - quiet NaN */
00084                                 QNaNIEEE32 (x);
00085                                 return res | AR_STAT_UNDEFINED;
00086                         }
00087                 else {
00088                         /* infinity + finite quantity - return infinity */
00089                         if (x->sign)
00090                                 res |= AR_STAT_NEGATIVE;
00091                         return res | AR_STAT_OVERFLOW;
00092                 }
00093         else if (y.expo > AR_IEEE32_MAX_EXPO) {
00094                 *x = y;
00095                 if (x->sign)
00096                         res |= AR_STAT_NEGATIVE;
00097                 return res | AR_STAT_OVERFLOW;
00098         }
00099
00100         /* Test for denorms (they have zero exponents) to determine the
00101          * values of the implicit normalization bits.
00102          */
00103         if (ar_state_register.ar_denorms_trap &&
00104                 ((!x_expo && IS_IEEE32_NZ_COEFF(x)) ||
00105                  (!y.expo && IS_IEEE32_NZ_COEFF(&y)))) {
00106                 /* operand is a denorm and denorms cause a trap */
00107                 x->expo = AR_IEEE32_MAX_EXPO + 1;
00108                 return res | AR_STAT_UNDEFINED;
00109         }
00110         lbits = !!x_expo;
00111         y_lbits = !!y.expo;
00112
00113         /* Shift the coefficient of the second argument down (right). We
00114          * do this in parts so as not to overflow registers.
00115          */
00116         rbits = 0;
00117         shift = (x_expo - lbits) - (y.expo - y_lbits);
00118         if (shift > AR_IEEE32_COEFF_BITS + AR_IEEE32_ROUND_BITS) {
00119                 rbits = !!(y.coeff0 | y.coeff1);
00120                 y.sign = y.expo = y_lbits = 0;
00121                 y.coeff0 = y.coeff1 = 0;
00122         } else
00123                 for (; shift; shift--) {
00124                         /* Sticky bit shifting */
00125                         rbits = (rbits & 1) | (rbits >> 1) |
00126                                 ((y.coeff1 & 1) << (AR_IEEE32_ROUND_BITS-1));
00127                         SHRIGHTIEEE32 (y);
00128                         y.coeff0 |= y_lbits << (AR_IEEE32_C0_BITS - 1);
00129                         y_lbits = 0;
00130                 }
00131
00132         /* If signs differ, complement the first argument; this is equivalent
00133          * to negating and then subtracting one, in a two's complement sense.
00134          */
00135         if (diffsign = (x->sign ^ y.sign)) {
00136                 lbits ^= MASKR (2);
00137                 NOTIEEE32 (*x);
00138         }
00139
00140         /* Compute sum of coefficients */
00141         if (diffsign) {
00142                 rbits += MASKR (AR_IEEE32_ROUND_BITS);
00143                 carry = rbits >> AR_IEEE32_ROUND_BITS;
00144                 rbits &= MASKR (AR_IEEE32_ROUND_BITS);
00145         } else
00146                 carry = 0;
00147         ADDIEEE32 (*x, carry, *x, y);
00148         lbits = (carry + lbits + y_lbits) & MASKR (2);
00149
00150         /* Opposite signs */
00151         if (diffsign)
00152                 if (lbits & 2) {
00153                         /* Complement */
00154                         lbits ^= MASKR (2);
00155                         NOTIEEE32 (*x);
00156                         rbits ^= MASKR (AR_IEEE32_ROUND_BITS);
00157                 } else {
00158                         /* End-around carry */
00159                         x->sign ^= 1;
00160                         carry = 1 + rbits;
00161                         rbits = carry & MASKR (AR_IEEE32_ROUND_BITS);
00162                         carry >>= AR_IEEE32_ROUND_BITS;
00163                         INCIEEE32 (*x, carry);
00164                         lbits = (carry + lbits) & MASKR (2);
00165                 }
00166
00167         /* Shift rightward if carry, or if undenormalized */
00168         if (lbits & 2 || lbits && !x_expo) {
00169                 rbits = (rbits >> 1) | rbits & 1 |
00170                         ((x->coeff1 & 1) << (AR_IEEE32_ROUND_BITS - 1));
00171                 SHRIGHTIEEE32 (*x);
00172                 x->coeff0 |= lbits << (AR_IEEE32_C0_BITS - 1);
00173                 lbits >>= 1;
00174                 if (x_expo)
00175                         x_expo++;
00176                 else
00177                         x_expo = 2;
00178         }
00179
00180         /* Check for a zero result, as a special case */
00181         if (!(lbits | x->coeff0 | x->coeff1 | rbits)) {
00182                 x->sign &= !diffsign;   /* -0 + -0 == -0 */
00183                 x->expo = 0;
00184                 if (x->sign)
00185                         res |= AR_STAT_NEGATIVE;
00186                 return res | AR_STAT_ZERO;
00187         }
00188
00189         return ar_i32norm (x_expo, lbits, rbits, x, roundmode);
00190 }
00191
00192
00193 int
00194 ar_ifsub32 (AR_IEEE_32 *x,
00195                         const AR_IEEE_32 *a,
00196                         const AR_IEEE_32 *b,
00197                         int roundmode)
00198 {
00199
00200         AR_IEEE_32 nb;
00201
00202         /* If either a or b is a NaN, it's the result. */
00203         if (IS_IEEE32_NaN(a)) {
00204                 *x = *a;
00205                 return AR_STAT_UNDEFINED;
00206         }
00207         if (IS_IEEE32_NaN(b)) {
00208                 *x = *b;
00209                 return AR_STAT_UNDEFINED;
00210         }
00211
00212         nb = *b;
00213         nb.sign ^= 1;
00214         return ar_ifadd32 (x, a, &nb, roundmode);
00215 }
00216
00217
00218 int
00219 ar_ifadd64 (AR_IEEE_64 *x,
00220                         const AR_IEEE_64 *a,
00221                         const AR_IEEE_64 *b,
00222                         int roundmode)
00223 {
00224
00225         int res = AR_STAT_OK;
00226         unsigned int lbits, y_lbits, rbits;
00227         unsigned int shift, diffsign, carry, x_expo;
00228         AR_IEEE_64 y;
00229
00230         /* Ensure that the first argument has the largest exponent,
00231          * for simplicity.
00232          */
00233         if (b->expo > a->expo)
00234                 y = *a, *x = *b;
00235         else
00236                 y = *b, *x = *a;
00237         x_expo = x->expo;
00238
00239         /* If either x or y is a NaN, it's the result. */
00240         if (IS_IEEE64_NaN(x)) {
00241                 return res | AR_STAT_UNDEFINED;
00242         }
00243         if (IS_IEEE64_NaN(&y)) {
00244                 *x = y;
00245                 return res | AR_STAT_UNDEFINED;
00246         }
00247
00248         /* Test for infinities */
00249         if (x_expo > AR_IEEE64_MAX_EXPO)
00250                 if (y.expo > AR_IEEE64_MAX_EXPO)
00251                         if (x->sign == y.sign) {
00252                                 /* infinities of same sign - return infinity */
00253                                 if (x->sign)
00254                                         res |= AR_STAT_NEGATIVE;
00255                                 return res | AR_STAT_OVERFLOW;
00256                         } else {
00257                                 /* infinities of different signs - quiet NaN */
00258                                 QNaNIEEE64 (x);
00259                                 return res | AR_STAT_UNDEFINED;
00260                         }
00261                 else {
00262                         /* infinity + finite quantity - return infinity */
00263                         if (x->sign)
00264                                 res |= AR_STAT_NEGATIVE;
00265                         return res | AR_STAT_OVERFLOW;
00266                 }
00267         else if (y.expo > AR_IEEE64_MAX_EXPO) {
00268                 *x = y;
00269                 if (x->sign)
00270                         res |= AR_STAT_NEGATIVE;
00271                 return res | AR_STAT_OVERFLOW;
00272         }
00273
00274         /* Test for denorms (they have zero exponents) to determine the
00275          * values of the implicit normalization bits.
00276          */
00277         if (ar_state_register.ar_denorms_trap &&
00278                 ((!x_expo && IS_IEEE64_NZ_COEFF(x)) ||
00279                  (!y.expo && IS_IEEE64_NZ_COEFF(&y)))) {
00280                 /* operand is a denorm and denorms cause a trap */
00281                 x->expo = AR_IEEE64_MAX_EXPO + 1;
00282                 return res | AR_STAT_UNDEFINED;
00283         }
00284         lbits = !!x_expo;
00285         y_lbits = !!y.expo;
00286
00287         /* Shift the coefficient of the second argument down (right). We
00288          * do this in parts so as not to overflow registers.
00289          */
00290         rbits = 0;
00291         shift = (x_expo - lbits) - (y.expo - y_lbits);
00292         if (shift > AR_IEEE64_COEFF_BITS + AR_IEEE64_ROUND_BITS) {
00293                 rbits = !!(y.coeff0 | y.coeff1 | y.coeff2 | y.coeff3);
00294                 y.sign = y.expo = y_lbits = 0;
00295                 y.coeff0 = y.coeff1 = y.coeff2 = y.coeff3 = 0;
00296         } else
00297                 for (; shift; shift--) {
00298                         /* Sticky bit shifting */
00299                         rbits = (rbits & 1) | (rbits >> 1) |
00300                                 ((y.coeff3 & 1) << (AR_IEEE64_ROUND_BITS-1));
00301                         SHRIGHTIEEE64 (y);
00302                         y.coeff0 |= y_lbits << (AR_IEEE64_C0_BITS - 1);
00303                         y_lbits = 0;
00304                 }
00305
00306         /* If signs differ, complement the first argument; this is equivalent
00307          * to negating and then subtracting one, in a two's complement sense.
00308          */
00309         if (diffsign = (x->sign ^ y.sign)) {
00310                 lbits ^= MASKR (2);
00311                 NOTIEEE64 (*x);
00312         }
00313
00314         /* Compute sum of coefficients */
00315         if (diffsign) {
00316                 rbits += MASKR (AR_IEEE64_ROUND_BITS);
00317                 carry = rbits >> AR_IEEE64_ROUND_BITS;
00318                 rbits &= MASKR (AR_IEEE64_ROUND_BITS);
00319         } else
00320                 carry = 0;
00321         ADDIEEE64 (*x, carry, *x, y);
00322         lbits = (carry + lbits + y_lbits) & MASKR (2);
00323
00324         /* Opposite signs */
00325         if (diffsign)
00326                 if (lbits & 2) {
00327                         /* Complement */
00328                         lbits ^= MASKR (2);
00329                         NOTIEEE64 (*x);
00330                         rbits ^= MASKR (AR_IEEE64_ROUND_BITS);
00331                 } else {
00332                         /* End-around carry */
00333                         x->sign ^= 1;
00334                         carry = 1 + rbits;
00335                         rbits = carry & MASKR (AR_IEEE64_ROUND_BITS);
00336                         carry >>= AR_IEEE64_ROUND_BITS;
00337                         INCIEEE64 (*x, carry);
00338                         lbits = (carry + lbits) & MASKR (2);
00339                 }
00340
00341         /* Shift rightward if carry, or if undenormalized */
00342         if (lbits & 2 || lbits && !x_expo) {
00343                 rbits = (rbits >> 1) | rbits & 1 |
00344                         ((x->coeff3 & 1) << (AR_IEEE64_ROUND_BITS - 1));
00345                 SHRIGHTIEEE64 (*x);
00346                 x->coeff0 |= lbits << (AR_IEEE64_C0_BITS - 1);
00347                 lbits >>= 1;
00348                 if (x_expo)
00349                         x_expo++;
00350                 else
00351                         x_expo = 2;
00352         }
00353
00354         /* Check for a zero result, as a special case */
00355         if (!(lbits | x->coeff0|x->coeff1|x->coeff2|x->coeff3 | rbits)) {
00356                 x->sign &= !diffsign;   /* -0 + -0 == -0 */
00357                 x->expo = 0;
00358                 if (x->sign)
00359                         res |= AR_STAT_NEGATIVE;
00360                 return res | AR_STAT_ZERO;
00361         }
00362
00363         return ar_i64norm (x_expo, lbits, rbits, x, roundmode);
00364 }
00365
00366
00367 int
00368 ar_ifsub64 (AR_IEEE_64 *x,
00369                         const AR_IEEE_64 *a,
00370                         const AR_IEEE_64 *b,
00371                         int roundmode)
00372 {
00373
00374         AR_IEEE_64 nb;
00375
00376         /* If either a or b is a NaN, it's the result. */
00377         if (IS_IEEE64_NaN(a)) {
00378                 *x = *a;
00379                 return AR_STAT_UNDEFINED;
00380         }
00381         if (IS_IEEE64_NaN(b)) {
00382                 *x = *b;
00383                 return AR_STAT_UNDEFINED;
00384         }
00385
00386         nb = *b;
00387         nb.sign ^= 1;
00388         return ar_ifadd64 (x, a, &nb, roundmode);
00389 }
00390
00391 #ifdef __mips
00392 /* Use native addition for MIPS */
00393 int
00394 ar_ifadd128(AR_IEEE_128 *x,
00395                         const AR_IEEE_128 *a,
00396                         const AR_IEEE_128 *b,
00397                         int roundmode)
00398 {
00399    AR_TYPE ty = AR_Float_IEEE_NR_128;
00400    *(long double *)x = *(long double *)a + *(long double *)b;
00401    return AR_status(x, &ty);
00402 }
00403
00404 int
00405 ar_ifsub128( AR_IEEE_128 *x,
00406                         const  AR_IEEE_128 *a,
00407                         const  AR_IEEE_128 *b,
00408                         int roundmode)
00409 {
00410    AR_TYPE ty = AR_Float_IEEE_NR_128;
00411    *(long double *)x = *(long double *)a - *(long double *)b;
00412    return AR_status(x, &ty);
00413 }
00414
00415 #else
00416
00417 int
00418 ar_ifadd128(AR_IEEE_128 *x,
00419                         const AR_IEEE_128 *a,
00420                         const AR_IEEE_128 *b,
00421                         int roundmode)
00422 {
00423
00424         int res = AR_STAT_OK;
00425         unsigned int lbits, y_lbits, rbits;
00426         unsigned int shift, diffsign, carry, x_expo;
00427         AR_IEEE_128 y;
00428
00429         /*
00430          * Use native arithmetic for MIPS.
00431          */
00432         if (HOST_IS_MIPS) {
00433                 AR_TYPE ty = AR_Float_IEEE_NR_128;
00434
00435                 *(long double *)x = *(long double *)a + *(long double *)b;
00436                 return AR_status((AR_DATA *) x, &ty);
00437         }
00438
00439         /* Ensure that the first argument has the largest exponent,
00440          * for simplicity.
00441          */
00442         if (b->expo > a->expo)
00443                 y = *a, *x = *b;
00444         else
00445                 y = *b, *x = *a;
00446         x_expo = x->expo;
00447
00448         /* If either x or y is a NaN, it's the result. */
00449         if (IS_IEEE128_NaN(x)) {
00450                 return res | AR_STAT_UNDEFINED;
00451         }
00452         if (IS_IEEE128_NaN(&y)) {
00453                 *x = y;
00454                 return res | AR_STAT_UNDEFINED;
00455         }
00456
00457         /* Test for infinities */
00458         if (x_expo > AR_IEEE128_MAX_EXPO)
00459                 if (y.expo > AR_IEEE128_MAX_EXPO)
00460                         if (x->sign == y.sign) {
00461                                 /* infinities of same sign - return infinity */
00462                                 if (x->sign)
00463                                         res |= AR_STAT_NEGATIVE;
00464                                 return res | AR_STAT_OVERFLOW;
00465                         } else {
00466                                 /* infinities of different signs - quiet NaN */
00467                                 QNaNIEEE128 (x);
00468                                 return res | AR_STAT_UNDEFINED;
00469                         }
00470                 else {
00471                         /* infinity + finite quantity - return infinity */
00472                         if (x->sign)
00473                                 res |= AR_STAT_NEGATIVE;
00474                         return res | AR_STAT_OVERFLOW;
00475                 }
00476         else if (y.expo > AR_IEEE128_MAX_EXPO) {
00477                 *x = y;
00478                 if (x->sign)
00479                         res |= AR_STAT_NEGATIVE;
00480                 return res | AR_STAT_OVERFLOW;
00481         }
00482
00483         /* Test for denorms (they have zero exponents) to determine the
00484          * values of the implicit normalization bits.
00485          */
00486         if (ar_state_register.ar_denorms_trap &&
00487                 ((!x_expo && IS_IEEE128_NZ_COEFF(x)) ||
00488                  (!y.expo && IS_IEEE128_NZ_COEFF(&y)))) {
00489                 /* operand is a denorm and denorms cause a trap */
00490                 x->expo = AR_IEEE128_MAX_EXPO + 1;
00491                 return res | AR_STAT_UNDEFINED;
00492         }
00493         lbits = !!x_expo;
00494         y_lbits = !!y.expo;
00495
00496         /* Shift the coefficient of the second argument down (right). We
00497          * do this in parts so as not to overflow registers.
00498          */
00499         rbits = 0;
00500         shift = (x_expo - lbits) - (y.expo - y_lbits);
00501         if (shift > AR_IEEE128_COEFF_BITS + AR_IEEE128_ROUND_BITS) {
00502                 rbits = !!(y.coeff0 | y.coeff1 | y.coeff2 |
00503                                    y.coeff3 | y.coeff4 | y.coeff5 | y.coeff6);
00504                 y.sign = y.expo = y_lbits = 0;
00505                 y.coeff0 = y.coeff1 = y.coeff2 =
00506                 y.coeff3 = y.coeff4 = y.coeff5 = y.coeff6 = 0;
00507         } else
00508                 for (; shift; shift--) {
00509                         /* Sticky bit shifting */
00510                         rbits = (rbits & 1) | (rbits >> 1) |
00511                                 ((y.coeff6 & 1) << (AR_IEEE128_ROUND_BITS-1));
00512                         SHRIGHTIEEE128 (y);
00513                         y.coeff0 |= y_lbits << (AR_IEEE128_C0_BITS - 1);
00514                         y_lbits = 0;
00515                 }
00516
00517         /* If signs differ, complement the first argument; this is equivalent
00518          * to negating and then subtracting one, in a two's complement sense.
00519          */
00520         if (diffsign = (x->sign ^ y.sign)) {
00521                 lbits ^= MASKR (2);
00522                 NOTIEEE128 (*x);
00523         }
00524
00525         /* Compute sum of coefficients */
00526         if (diffsign) {
00527                 rbits += MASKR (AR_IEEE128_ROUND_BITS);
00528                 carry = rbits >> AR_IEEE128_ROUND_BITS;
00529                 rbits &= MASKR (AR_IEEE128_ROUND_BITS);
00530         } else
00531                 carry = 0;
00532         ADDIEEE128 (*x, carry, *x, y);
00533         lbits = (carry + lbits + y_lbits) & MASKR (2);
00534
00535         /* Opposite signs */
00536         if (diffsign)
00537                 if (lbits & 2) {
00538                         /* Complement */
00539                         lbits ^= MASKR (2);
00540                         NOTIEEE128 (*x);
00541                         rbits ^= MASKR (AR_IEEE128_ROUND_BITS);
00542                 } else {
00543                         /* End-around carry */
00544                         x->sign ^= 1;
00545                         carry = 1 + rbits;
00546                         rbits = carry & MASKR (AR_IEEE128_ROUND_BITS);
00547                         carry >>= AR_IEEE128_ROUND_BITS;
00548                         INCIEEE128 (*x, carry);
00549                         lbits = (carry + lbits) & MASKR (2);
00550                 }
00551
00552         /* Shift rightward if carry, or if undenormalized */
00553         if (lbits & 2 || lbits && !x_expo) {
00554                 rbits = (rbits >> 1) | rbits & 1 |
00555                         ((x->coeff6 & 1) << (AR_IEEE128_ROUND_BITS - 1));
00556                 SHRIGHTIEEE128 (*x);
00557                 x->coeff0 |= lbits << (AR_IEEE128_C0_BITS - 1);
00558                 lbits >>= 1;
00559                 if (x_expo)
00560                         x_expo++;
00561                 else
00562                         x_expo = 2;
00563         }
00564
00565         /* Check for a zero result, as a special case */
00566         if (!(lbits | rbits | x->coeff0 | x->coeff1 | x->coeff2 |
00567                                                   x->coeff3 | x->coeff4 | x->coeff5 | x->coeff6)) {
00568                 x->sign &= !diffsign;   /* -0 + -0 == -0 */
00569                 x->expo = 0;
00570                 if (x->sign)
00571                         res |= AR_STAT_NEGATIVE;
00572                 return res | AR_STAT_ZERO;
00573         }
00574
00575         return ar_i128norm (x_expo, lbits, rbits, x, roundmode);
00576 }
00577
00578
00579 int
00580 ar_ifsub128(AR_IEEE_128 *x,
00581                         const AR_IEEE_128 *a,
00582                         const AR_IEEE_128 *b,
00583                         int roundmode)
00584 {
00585
00586         AR_IEEE_128 nb;
00587
00588         /* If either a or b is a NaN, it's the result. */
00589         if (IS_IEEE128_NaN(a)) {
00590                 *x = *a;
00591                 return AR_STAT_UNDEFINED;
00592         }
00593         if (IS_IEEE128_NaN(b)) {
00594                 *x = *b;
00595                 return AR_STAT_UNDEFINED;
00596         }
00597
00598         nb = *b;
00599         nb.sign ^= 1;
00600         return ar_ifadd128 (x, a, &nb, roundmode);
00601 }
00602 #endif /* __mips */
00603
00604
00605 static char USMID [] = "\n%Z%%M%        %I%     %G% %U%\n";
00606 static char rcsid [] = "$Id: ieee_fadd.c,v 1.1.1.1 2002-05-22 20:06:18 dsystem Exp $";