Open64 (mfef90, whirl2f, and IR tools)  TAG: version-openad; SVN changeset: 916
ieee_fmul.c
Go to the documentation of this file.
00001 /*
00002 
00003   Copyright (C) 2000, 2001 Silicon Graphics, Inc.  All Rights Reserved.
00004 
00005   This program is free software; you can redistribute it and/or modify it
00006   under the terms of version 2 of the GNU General Public License as
00007   published by the Free Software Foundation.
00008 
00009   This program is distributed in the hope that it would be useful, but
00010   WITHOUT ANY WARRANTY; without even the implied warranty of
00011   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
00012 
00013   Further, this software is distributed without any warranty that it is
00014   free of the rightful claim of any third person regarding infringement 
00015   or the like.  Any license provided herein, whether implied or 
00016   otherwise, applies only to this software file.  Patent licenses, if 
00017   any, provided herein do not apply to combinations of this program with 
00018   other software, or any other product whatsoever.  
00019 
00020   You should have received a copy of the GNU General Public License along
00021   with this program; if not, write the Free Software Foundation, Inc., 59
00022   Temple Place - Suite 330, Boston MA 02111-1307, USA.
00023 
00024   Contact information:  Silicon Graphics, Inc., 1600 Amphitheatre Pky,
00025   Mountain View, CA 94043, or:
00026 
00027   http://www.sgi.com
00028 
00029   For further information regarding this notice, see:
00030 
00031   http://oss.sgi.com/projects/GenInfo/NoticeExplan
00032 
00033 */
00034 
00035 
00036 /*
00037  *      Portable IEEE 754 floating-point multiplication evaluator
00038  */
00039 
00040 #include "arith.internal.h"
00041 
00042 
00043 
00044 int
00045 ar_ifmul32 (AR_IEEE_32 *x,
00046                         const AR_IEEE_32 *a,
00047                         const AR_IEEE_32 *b,
00048                         int roundmode)
00049 {
00050 
00051         int i, res = AR_STAT_OK;
00052         unsigned long x_lbits, z_lbits, rbits, carry;
00053         signed int x_expo;
00054         AR_IEEE_32 x2, y, y2, z;
00055 
00056         /* If either a or b is a NaN, it's the result. */
00057         if (IS_IEEE32_NaN(a)) {
00058                 *x = *a;
00059                 return res | AR_STAT_UNDEFINED;
00060         }
00061         if (IS_IEEE32_NaN(b)) {
00062                 *x = *b;
00063                 return res | AR_STAT_UNDEFINED;
00064         }
00065 
00066         /* Test for infinities */
00067         if (b->expo > AR_IEEE32_MAX_EXPO)
00068                 if (a->expo == 0 && !IS_IEEE32_NZ_COEFF(a)){
00069                         /* zero * inf = quiet NaN */
00070                         QNaNIEEE32 (x);
00071                         return res | AR_STAT_UNDEFINED;
00072                 } else {
00073                         /* anything * infinity = infinity */
00074                         if (i = a->sign ^ b->sign)
00075                                 res |= AR_STAT_NEGATIVE;
00076                         *x = *b;
00077                         x->sign = i;
00078                         return res | AR_STAT_OVERFLOW;
00079                 }
00080         if (a->expo > AR_IEEE32_MAX_EXPO)
00081                 if (b->expo == 0 && !IS_IEEE32_NZ_COEFF(b)){
00082                         /* infinity * zero = quiet NaN */
00083                         QNaNIEEE32 (x);
00084                         return res | AR_STAT_UNDEFINED;
00085                 } else {
00086                         /* infinity * anything = infinity */
00087                         if (i = a->sign ^ b->sign)
00088                                 res |= AR_STAT_NEGATIVE;
00089                         *x = *a;
00090                         x->sign = i;
00091                         return res | AR_STAT_OVERFLOW;
00092                 }
00093 
00094         /* Test for denorms (they have zero exponents) to determine the
00095          * values of the implicit normalization bits; make them explicit.
00096          */
00097         if (ar_state_register.ar_denorms_trap &&
00098                 ((!a->expo && IS_IEEE32_NZ_COEFF(a)) ||
00099                  (!b->expo && IS_IEEE32_NZ_COEFF(b)))) {
00100                 /* operand is a denorm and denorms cause a trap */
00101                 x->expo = AR_IEEE32_MAX_EXPO + 1;
00102                 return res | AR_STAT_UNDEFINED;
00103         }
00104         ZEROIEEE32 (y);
00105         y.coeff1 = !!a->expo;
00106         y2 = *a;
00107         z = *b;
00108         z_lbits = !!b->expo;
00109         x_expo = a->expo + b->expo + !a->expo + !b->expo - AR_IEEE32_EXPO_BIAS;
00110         if (x_expo <= 0)
00111                 x_expo--;
00112         i = a->sign ^ b->sign;
00113 
00114         /* Sum the pyramid */
00115         if (z.coeff1 & 1) {
00116                 x2 = *a;
00117                 *x = y;
00118         }
00119         else {
00120                 ZEROIEEE32 (*x);
00121                 ZEROIEEE32 (x2);
00122         }
00123         x->sign = i;
00124         x_lbits = z_lbits & y.coeff1;
00125         SHLEFTIEEE32_2 (y, y2);
00126         SHRIGHTIEEE32 (z);
00127         z.coeff0 |= z_lbits << (AR_IEEE32_C0_BITS - 1);
00128         for (i = 1; i < AR_IEEE32_COEFF_BITS + 1; i++) {
00129 
00130                 if (z.coeff1 & 1) {
00131                         carry = 0;
00132                         ADDIEEE32 (x2, carry, x2, y2);
00133                         ADDIEEE32 (*x, carry, *x, y);
00134                         x_lbits += carry;
00135                 }
00136 
00137                 SHLEFTIEEE32_2 (y, y2);
00138                 SHRIGHTIEEE32 (z);
00139         }
00140 
00141         /* Extract rounding bits */
00142         rbits = x2.coeff0 >> (AR_IEEE32_C0_BITS - AR_IEEE32_ROUND_BITS);
00143         if (x2.coeff0 & MASKR (AR_IEEE32_C0_BITS - AR_IEEE32_ROUND_BITS) |
00144                 x2.coeff1)
00145                 rbits |= 1; /* sticky bit */
00146 
00147         /* Normalize and round */
00148         return ar_i32norm (x_expo, x_lbits, rbits, x, roundmode);
00149 }
00150 
00151 
00152 
00153 int
00154 ar_ifmul64 (AR_IEEE_64 *x,
00155                         const AR_IEEE_64 *a,
00156                         const AR_IEEE_64 *b,
00157                         int roundmode)
00158 {
00159 
00160         int i, res = AR_STAT_OK;
00161         unsigned long x_lbits, z_lbits, rbits, carry;
00162         signed int x_expo;
00163         AR_IEEE_64 x2, y, y2, z;
00164 
00165         /* If either a or b is a NaN, it's the result. */
00166         if (IS_IEEE64_NaN(a)) {
00167                 *x = *a;
00168                 return res | AR_STAT_UNDEFINED;
00169         }
00170         if (IS_IEEE64_NaN(b)) {
00171                 *x = *b;
00172                 return res | AR_STAT_UNDEFINED;
00173         }
00174 
00175         /* Test for infinities */
00176         if (b->expo > AR_IEEE64_MAX_EXPO)
00177                 if (a->expo == 0 && !IS_IEEE64_NZ_COEFF(a)) {
00178                         /* zero * inf = quiet NaN */
00179                         QNaNIEEE64 (x);
00180                         return res | AR_STAT_UNDEFINED;
00181                 } else {
00182                         /* anything * infinity = infinity */
00183                         if (i = a->sign ^ b->sign)
00184                                 res |= AR_STAT_NEGATIVE;
00185                         *x = *b;
00186                         x->sign = i;
00187                         return res | AR_STAT_OVERFLOW;
00188                 }
00189         if (a->expo > AR_IEEE64_MAX_EXPO)
00190                 if (b->expo == 0 && !IS_IEEE64_NZ_COEFF(b)) {
00191                         /* infinity * zero = quiet NaN */
00192                         QNaNIEEE64 (x);
00193                         return res | AR_STAT_UNDEFINED;
00194                 } else {
00195                         /* infinity * anything = infinity */
00196                         if (i = a->sign ^ b->sign)
00197                                 res |= AR_STAT_NEGATIVE;
00198                         *x = *a;
00199                         x->sign = i;
00200                         return res | AR_STAT_OVERFLOW;
00201                 }
00202 
00203         /* Test for denorms (they have zero exponents) to determine the
00204          * values of the implicit normalization bits; make them explicit.
00205          */
00206         if (ar_state_register.ar_denorms_trap &&
00207                 ((!a->expo && IS_IEEE64_NZ_COEFF(a)) ||
00208                  (!b->expo && IS_IEEE64_NZ_COEFF(b)))) {
00209                 /* operand is a denorm and denorms cause a trap */
00210                 x->expo = AR_IEEE64_MAX_EXPO + 1;
00211                 return res | AR_STAT_UNDEFINED;
00212         }
00213         ZEROIEEE64 (y);
00214         y.coeff3 = !!a->expo;
00215         y2 = *a;
00216         z = *b;
00217         z_lbits = !!b->expo;
00218         x_expo = a->expo + b->expo + !a->expo + !b->expo - AR_IEEE64_EXPO_BIAS;
00219         if (x_expo <= 0)
00220                 x_expo--;
00221         i = a->sign ^ b->sign;
00222 
00223         /* Sum the pyramid */
00224         if (z.coeff3 & 1) {
00225                 x2 = *a;
00226                 *x = y;
00227         }
00228         else {
00229                 ZEROIEEE64 (*x);
00230                 ZEROIEEE64 (x2);
00231         }
00232         x->sign = i;
00233         x_lbits = z_lbits & y.coeff3;
00234         SHLEFTIEEE64_2 (y, y2);
00235         SHRIGHTIEEE64 (z);
00236         z.coeff0 |= z_lbits << (AR_IEEE64_C0_BITS - 1);
00237         for (i = 1; i < AR_IEEE64_COEFF_BITS + 1; i++) {
00238 
00239                 if (z.coeff3 & 1) {
00240                         carry = 0;
00241                         ADDIEEE64 (x2, carry, x2, y2);
00242                         ADDIEEE64 (*x, carry, *x, y);
00243                         x_lbits += carry;
00244                 }
00245 
00246                 SHLEFTIEEE64_2 (y, y2);
00247                 SHRIGHTIEEE64 (z);
00248         }
00249 
00250         /* Extract rounding bits */
00251         rbits = x2.coeff0 >> (AR_IEEE64_C0_BITS - AR_IEEE64_ROUND_BITS);
00252         if (x2.coeff0 & MASKR (AR_IEEE64_C0_BITS - AR_IEEE64_ROUND_BITS) |
00253                 x2.coeff1 | x2.coeff2 | x2.coeff3)
00254                 rbits |= 1; /* sticky bit */
00255 
00256         /* Normalize and round */
00257         return ar_i64norm (x_expo, x_lbits, rbits, x, roundmode);
00258 }
00259 
00260 #ifdef __mips
00261 
00262 int
00263 ar_ifmul128(AR_IEEE_128 *x,
00264                         const AR_IEEE_128 *a,
00265                         const AR_IEEE_128 *b,
00266                         int roundmode)
00267 {
00268    AR_TYPE ty = AR_Float_IEEE_NR_128;
00269    *(long double *)x = *(long double *)a * *(long double *)b;
00270    return AR_status(x, &ty);
00271 }
00272 
00273 #else
00274 
00275 int
00276 ar_ifmul128(AR_IEEE_128 *x,
00277                         const AR_IEEE_128 *a,
00278                         const AR_IEEE_128 *b,
00279                         int roundmode)
00280 {
00281 
00282         int i, res = AR_STAT_OK;
00283         unsigned long x_lbits, z_lbits, rbits, carry;
00284         signed int x_expo;
00285         AR_IEEE_128 x2, y, y2, z;
00286 
00287         /*
00288          * Use native arithmetic for MIPS.
00289          */
00290         if (HOST_IS_MIPS) {
00291                 AR_TYPE ty = AR_Float_IEEE_NR_128;
00292 
00293                 *(long double *)x = *(long double *)a * *(long double *)b;
00294                 return AR_status((AR_DATA *) x, &ty);
00295         }
00296 
00297         /* If either a or b is a NaN, it's the result. */
00298         if (IS_IEEE128_NaN(a)) {
00299                 *x = *a;
00300                 return res | AR_STAT_UNDEFINED;
00301         }
00302         if (IS_IEEE128_NaN(b)) {
00303                 *x = *b;
00304                 return res | AR_STAT_UNDEFINED;
00305         }
00306 
00307         /* Test for infinities */
00308         if (b->expo > AR_IEEE128_MAX_EXPO)
00309                 if (a->expo == 0 && !IS_IEEE128_NZ_COEFF(a)) {
00310                         /* zero * inf = quiet NaN */
00311                         QNaNIEEE128 (x);
00312                         return res | AR_STAT_UNDEFINED;
00313                 } else {
00314                         /* anything * infinity = infinity */
00315                         if (i = a->sign ^ b->sign)
00316                                 res |= AR_STAT_NEGATIVE;
00317                         *x = *b;
00318                         x->sign = i;
00319                         return res | AR_STAT_OVERFLOW;
00320                 }
00321         if (a->expo > AR_IEEE128_MAX_EXPO)
00322                 if (b->expo == 0 && !IS_IEEE128_NZ_COEFF(b)) {
00323                         /* infinity * zero = quietNaN */
00324                         QNaNIEEE128 (x);
00325                         return res | AR_STAT_UNDEFINED;
00326                 } else {
00327                         /* infinity * anything = infinity */
00328                         if (i = a->sign ^ b->sign)
00329                                 res |= AR_STAT_NEGATIVE;
00330                         *x = *a;
00331                         x->sign = i;
00332                         return res | AR_STAT_OVERFLOW;
00333                 }
00334 
00335         /* Test for denorms (they have zero exponents) to determine the
00336          * values of the implicit normalization bits; make them explicit.
00337          */
00338         if (ar_state_register.ar_denorms_trap &&
00339                 ((!a->expo && IS_IEEE128_NZ_COEFF(a)) ||
00340                  (!b->expo && IS_IEEE128_NZ_COEFF(b)))) {
00341                 /* operand is a denorm and denorms cause a trap */
00342                 x->expo = AR_IEEE128_MAX_EXPO + 1;
00343                 return res | AR_STAT_UNDEFINED;
00344         }
00345         ZEROIEEE128 (y);
00346         y.coeff6 = !!a->expo;
00347         y2 = *a;
00348         z = *b;
00349         z_lbits = !!b->expo;
00350         x_expo = a->expo + b->expo + !a->expo + !b->expo - AR_IEEE128_EXPO_BIAS;
00351         if (x_expo <= 0)
00352                 x_expo--;
00353         i = a->sign ^ b->sign;
00354 
00355         /* Sum the pyramid */
00356         if (z.coeff6 & 1) {
00357                 x2 = *a;
00358                 *x = y;
00359         }
00360         else {
00361                 ZEROIEEE128 (*x);
00362                 ZEROIEEE128 (x2);
00363         }
00364         x->sign = i;
00365         x_lbits = z_lbits & y.coeff6;
00366         SHLEFTIEEE128_2 (y, y2);
00367         SHRIGHTIEEE128 (z);
00368         z.coeff0 |= z_lbits << (AR_IEEE128_C0_BITS - 1);
00369         for (i = 1; i < AR_IEEE128_COEFF_BITS + 1; i++) {
00370 
00371                 if (z.coeff6 & 1) {
00372                         carry = 0;
00373                         ADDIEEE128 (x2, carry, x2, y2);
00374                         ADDIEEE128 (*x, carry, *x, y);
00375                         x_lbits += carry;
00376                 }
00377 
00378                 SHLEFTIEEE128_2 (y, y2);
00379                 SHRIGHTIEEE128 (z);
00380         }
00381 
00382         /* Extract rounding bits */
00383         rbits = x2.coeff0 >> (AR_IEEE128_C0_BITS - AR_IEEE128_ROUND_BITS);
00384         if (x2.coeff0 & MASKR (AR_IEEE128_C0_BITS - AR_IEEE128_ROUND_BITS) |
00385                 x2.coeff1 | x2.coeff2 | x2.coeff3 | x2.coeff3 | x2.coeff5 | x2.coeff6)
00386                 rbits |= 1; /* sticky bit */
00387 
00388         /* Normalize and round */
00389         return ar_i128norm (x_expo, x_lbits, rbits, x, roundmode);
00390 }
00391 #endif
00392 
00393 
00394 static char USMID [] = "\n%Z%%M%        %I%     %G% %U%\n";
00395 static char rcsid [] = "$Id: ieee_fmul.c,v 1.1.1.1 2002-05-22 20:06:19 dsystem Exp $";
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines