Open64 (mfef90, whirl2f, and IR tools)  TAG: version-openad; SVN changeset: 916
cray_fmul.c
Go to the documentation of this file.
00001 /*
00002 
00003   Copyright (C) 2000, 2001 Silicon Graphics, Inc.  All Rights Reserved.
00004 
00005   This program is free software; you can redistribute it and/or modify it
00006   under the terms of version 2 of the GNU General Public License as
00007   published by the Free Software Foundation.
00008 
00009   This program is distributed in the hope that it would be useful, but
00010   WITHOUT ANY WARRANTY; without even the implied warranty of
00011   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
00012 
00013   Further, this software is distributed without any warranty that it is
00014   free of the rightful claim of any third person regarding infringement 
00015   or the like.  Any license provided herein, whether implied or 
00016   otherwise, applies only to this software file.  Patent licenses, if 
00017   any, provided herein do not apply to combinations of this program with 
00018   other software, or any other product whatsoever.  
00019 
00020   You should have received a copy of the GNU General Public License along
00021   with this program; if not, write the Free Software Foundation, Inc., 59
00022   Temple Place - Suite 330, Boston MA 02111-1307, USA.
00023 
00024   Contact information:  Silicon Graphics, Inc., 1600 Amphitheatre Pky,
00025   Mountain View, CA 94043, or:
00026 
00027   http://www.sgi.com
00028 
00029   For further information regarding this notice, see:
00030 
00031   http://oss.sgi.com/projects/GenInfo/NoticeExplan
00032 
00033 */
00034 
00035 
00036 /*
00037  *      Portable Cray floating-point multiplication evaluator
00038  *
00039  *      Sources: hardware manuals, online diagnostics, Dick Nelson's
00040  *               emulation code, and the source for the double-precision
00041  *               library routines
00042  */
00043 
00044 #include "arith.internal.h"
00045 
00046 
00047 int
00048 ar_cfmul64 (AR_CRAY_64 *x,
00049             const AR_CRAY_64 *a,
00050             const AR_CRAY_64 *b,
00051             int roundmode) {
00052 
00053         int i, res = AR_STAT_OK;
00054         long x_expo, a_expo = a->expo, b_expo = b->expo, test_expo;
00055         unsigned int x_lbits, y_rbits, zcoeff, carry, x_rbits = 0;
00056         AR_CRAY_64 y, z;
00057 
00058         x->sign = a->sign ^ b->sign;
00059         y = *a;
00060         z = *b;
00061 
00062         if (!a_expo && !b_expo)
00063                 x->sign = x_expo = 0;
00064         else {
00065                 x_expo = a_expo + b_expo - AR_CRAY_EXPO_BIAS;
00066                 if (a_expo < AR_CRAY_MIN_EXPO - 1 ||
00067                     b_expo < AR_CRAY_MIN_EXPO - 1 ||
00068                     x_expo < AR_CRAY_MIN_EXPO - 1) {
00069                         ZEROCRAY64 (*x);
00070                         return AR_STAT_UNDERFLOW | AR_STAT_ZERO;
00071                 }
00072         }
00073 
00074         switch (roundmode) {
00075         case AR_ROUNDED:                /* CRAY-1 rounded multiply */
00076                 x_lbits = 0;
00077                 x->coeff0 = x->coeff1 = x->coeff2 = 0;
00078                 x_rbits = 0151;
00079                 break;
00080         case AR_UNROUNDED:              /* CRAY-1 truncation compensation */
00081                 x_lbits = 0;
00082                 x->coeff0 = x->coeff1 = x->coeff2 = 0;
00083                 x_rbits = 0011;
00084                 break;
00085         case AR_RECIPROCAL_ITERATION:           /* CRAY-1 recip iter */
00086                 x_lbits = 1;
00087                 x->coeff0 = ~0;
00088                 x->coeff1 = ~0;
00089                 x->coeff2 = (0011 - 0320) >> 7;
00090                 x_rbits = (0011 - 0320) & MASKR (7);
00091                 break;
00092         }
00093 
00094         /* Compute and sum the pyramid */
00095 #if AR_CRAY_C0_BITS*3 == AR_CRAY64_COEFF_BITS
00096         y_rbits = y.coeff2<<7;
00097         i = AR_CRAY_C0_BITS;
00098         zcoeff = z.coeff0;
00099         while(zcoeff) {
00100                 if(zcoeff & 0x8000) {
00101                         x_rbits += (y_rbits & 0177);
00102                         carry = x_rbits >> 7;
00103                         x_rbits &= 0177;
00104                         ADDCRAY64 (*x, carry, *x, y);
00105                         x_lbits += carry;
00106                 }
00107                 SHRIGHTCRAY64 (y);
00108                 y_rbits >>= 1;
00109                 zcoeff = (zcoeff & 0x7fff) << 1;
00110                 i--;
00111         }
00112         y.coeff2 = (y.coeff2>>i) | (y.coeff1<<(AR_CRAY_C0_BITS-i));
00113         y.coeff1 = (y.coeff1>>i) | (y.coeff0<<(AR_CRAY_C0_BITS-i));
00114         y.coeff0 = 0;
00115         y_rbits = (y_rbits>>i) | (y.coeff2<<7);
00116         i = AR_CRAY_C1_BITS;
00117         zcoeff = z.coeff1;
00118         while(zcoeff) {
00119                 if(zcoeff & 0x8000) {
00120                         x_rbits += (y_rbits & 0177);
00121                         carry = x_rbits >> 7;
00122                         x_rbits &= 0177;
00123                         ADDCRAY64 (*x, carry, *x, y);
00124                         x_lbits += carry;
00125                 }
00126                 SHRIGHTCRAY64 (y);
00127                 y_rbits >>= 1;
00128                 zcoeff = (zcoeff & 0x7fff) << 1;
00129                 i--;
00130         }
00131         y.coeff2 = (y.coeff2>>i) | (y.coeff1<<(AR_CRAY_C1_BITS-i));
00132         y.coeff1 = 0;
00133         y_rbits = (y_rbits>>i) | (y.coeff2<<7);
00134         zcoeff = z.coeff2;
00135         while(zcoeff) {
00136                 if(zcoeff & 0x8000) {
00137                         x_rbits += (y_rbits & 0177);
00138                         carry = x_rbits >> 7;
00139                         x_rbits &= 0177;
00140                         ADDCRAY64 (*x, carry, *x, y);
00141                         x_lbits += carry;
00142                 }
00143                 y.coeff2 >>= 1;
00144                 y_rbits >>= 1;
00145                 zcoeff = (zcoeff & 0x7fff) << 1;
00146         }
00147 #else
00148         y_rbits = 0;
00149         for (i = 0; i < AR_CRAY64_COEFF_BITS; i++) {
00150                 /* Add scaled operand to sum, with truncation */
00151                 if (z.coeff0 >> (AR_CRAY_C0_BITS - 1)) {
00152                         x_rbits += y_rbits;
00153                         carry = x_rbits >> 7;
00154                         x_rbits &= MASKR (7);
00155                         ADDCRAY64 (*x, carry, *x, y);
00156                         x_lbits += carry;
00157                 }
00158                 y_rbits = (y_rbits >> 1) | ((y.coeff2 & 1) << 6);
00159                 SHRIGHTCRAY64 (y);
00160                 SHLEFTCRAY64 (z);
00161         }
00162 #endif
00163 
00164         if (roundmode == AR_RECIPROCAL_ITERATION) {
00165                 x_lbits ^= 1;
00166                 NOTCRAY64 (*x);
00167         }
00168 
00169         /* Normalize right if necessary */
00170         test_expo = x_expo;
00171         if (x_lbits & 1 || !(a_expo | b_expo)) {
00172                 SHRIGHTCRAY64 (*x);
00173                 x->coeff0 |= x_lbits << (AR_CRAY_C0_BITS - 1);
00174                 if (a_expo | b_expo)
00175                         x_expo++;
00176         }
00177 
00178         /* Check for overflow with unadjusted result exponent */
00179         if (test_expo >= AR_CRAY_MAX_EXPO ||
00180             a_expo > AR_CRAY_MAX_EXPO ||
00181             b_expo > AR_CRAY_MAX_EXPO) {
00182                 x_expo = AR_CRAY_MAX_EXPO + 1;
00183                 res |= AR_STAT_OVERFLOW;
00184         }
00185 
00186         x->expo = x_expo;
00187 
00188         if (!(x->sign | x->expo | x->coeff0 | x->coeff1 | x->coeff2))
00189                 res |= AR_STAT_ZERO;
00190         if (x->sign)
00191                 res |= AR_STAT_NEGATIVE;
00192 
00193         return res;
00194 }
00195 
00196 
00197 int
00198 ar_cfmul128 (AR_CRAY_128 *x,
00199              const AR_CRAY_128 *a,
00200              const AR_CRAY_128 *b,
00201              int roundmode) {
00202 
00203         int i, res = AR_STAT_OK, rnd, inexact, a_expo = a->expo, b_expo = b->expo;
00204         long x_expo, test_expo;
00205         unsigned int x_lbits = 0, carry;
00206         AR_CRAY_128 x2, y, y2, z;
00207         AR_CRAY_64 spa, spb;
00208 
00209         /* Use the sign and exponent together in a 16-bit package */
00210         test_expo = x_expo = a_expo + b_expo - AR_CRAY_EXPO_BIAS;
00211         x_expo += (a->sign << AR_CRAY_EXPO_BITS) +
00212                   (b->sign << AR_CRAY_EXPO_BITS);
00213 
00214         /* Special-case zero arguments or zero SP products */
00215         if (!(a->coeff0 | a->coeff1 | a->coeff2 |
00216               a->coeff3 | a->coeff4 | a->coeff5) ||
00217             !(b->coeff0 | b->coeff1 | b->coeff2 |
00218               b->coeff3 | b->coeff4 | b->coeff5)) {
00219                 ZEROCRAY128 (*x);
00220                 return AR_STAT_ZERO;
00221         }
00222 
00223         y = *a;
00224         z = *b;
00225         ZEROCRAY128 (x2);
00226         ZEROCRAY128 (y2);
00227         ZEROCRAY128 (*x);
00228 
00229         /* Compute and sum the pyramid */
00230         for (i = 0; i < AR_CRAY128_COEFF_BITS; i++) {
00231 
00232                 /* Add scaled operand to sum, with truncation */
00233                 if (z.coeff0 & (1 << (AR_CRAY_C0_BITS - 1))) {
00234                         carry = 0;
00235                         ADDCRAY128 (x2, carry, x2, y2);
00236                         ADDCRAY128 (*x, carry, *x, y);
00237                         x_lbits += carry;
00238                 }
00239 
00240                 SHRIGHTCRAY128 (y2);
00241                 y2.coeff0 |= y.coeff5 << (AR_CRAY_C0_BITS - 1);
00242                 SHRIGHTCRAY128 (y);
00243                 SHLEFTCRAY128 (z);
00244         }
00245 
00246         /* Rounding */
00247         inexact = !!(x2.coeff0 | x2.coeff1 | x2.coeff2 | x2.coeff3 |
00248                         x2.coeff4 | x2.coeff5);
00249         carry = rnd = x2.coeff0 >> (AR_CRAY_C0_BITS - 1);
00250         INCCRAY128 (*x, carry);
00251         x_lbits += carry;
00252 
00253         /* Normalize right if necessary */
00254         if (x_lbits) {
00255                 rnd ^= 1;
00256                 INCCRAY128 (*x, rnd);
00257                 SHRIGHTCRAY128 (*x);
00258                 x->coeff0 |= 1 << (AR_CRAY_C0_BITS - 1);
00259                 x_expo++;
00260         }
00261 
00262         /* Check for overflow with unadjusted result exponent */
00263         if (test_expo >= AR_CRAY_MAX_EXPO ||
00264             a_expo > AR_CRAY_MAX_EXPO && !!b_expo ||
00265             b_expo > AR_CRAY_MAX_EXPO && !!a_expo)
00266                 res |= AR_STAT_OVERFLOW;
00267 
00268         /* Check for underflow */
00269         if (!(((x_expo + AR_CRAY_MIN_EXPO) >> (AR_CRAY_EXPO_BITS - 1)) & 1)) {
00270                 ZEROCRAY128 (*x);
00271                 x_expo = 0;
00272                 res |= AR_STAT_UNDERFLOW;
00273         }
00274 
00275         x->expo = x_expo;
00276         x->sign = x_expo >> AR_CRAY_EXPO_BITS;
00277 
00278         if (!(x->sign | x->expo | x->coeff0 | x->coeff1 | x->coeff2 |
00279               x->coeff3 | x->coeff4 | x->coeff5))
00280                 res |= AR_STAT_ZERO;
00281         if (x->sign)
00282                 res |= AR_STAT_NEGATIVE;
00283         if (inexact)
00284                 res |= AR_STAT_INEXACT;
00285         return res;
00286 }
00287 
00288 
00289 static char USMID [] = "\n%Z%%M%        %I%     %G% %U%\n";
00290 static char rcsid [] = "$Id: cray_fmul.c,v 1.1.1.1 2002-05-22 20:06:18 dsystem Exp $";
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines