OpenAD/Open64Dox/cray__fmul_8c_source.html

00001 /*
00002
00003   Copyright (C) 2000, 2001 Silicon Graphics, Inc.  All Rights Reserved.
00004
00005   This program is free software; you can redistribute it and/or modify it
00006   under the terms of version 2 of the GNU General Public License as
00007   published by the Free Software Foundation.
00008
00009   This program is distributed in the hope that it would be useful, but
00010   WITHOUT ANY WARRANTY; without even the implied warranty of
00011   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
00012
00013   Further, this software is distributed without any warranty that it is
00014   free of the rightful claim of any third person regarding infringement
00015   or the like.  Any license provided herein, whether implied or
00016   otherwise, applies only to this software file.  Patent licenses, if
00017   any, provided herein do not apply to combinations of this program with
00018   other software, or any other product whatsoever.
00019
00020   You should have received a copy of the GNU General Public License along
00021   with this program; if not, write the Free Software Foundation, Inc., 59
00022   Temple Place - Suite 330, Boston MA 02111-1307, USA.
00023
00024   Contact information:  Silicon Graphics, Inc., 1600 Amphitheatre Pky,
00025   Mountain View, CA 94043, or:
00026
00027   http://www.sgi.com
00028
00029   For further information regarding this notice, see:
00030
00031   http://oss.sgi.com/projects/GenInfo/NoticeExplan
00032
00033 */
00034
00035
00036 /*
00037  *      Portable Cray floating-point multiplication evaluator
00038  *
00039  *      Sources: hardware manuals, online diagnostics, Dick Nelson's
00040  *               emulation code, and the source for the double-precision
00041  *               library routines
00042  */
00043
00044 #include "arith.internal.h"
00045
00046
00047 int
00048 ar_cfmul64 (AR_CRAY_64 *x,
00049             const AR_CRAY_64 *a,
00050             const AR_CRAY_64 *b,
00051             int roundmode) {
00052
00053         int i, res = AR_STAT_OK;
00054         long x_expo, a_expo = a->expo, b_expo = b->expo, test_expo;
00055         unsigned int x_lbits, y_rbits, zcoeff, carry, x_rbits = 0;
00056         AR_CRAY_64 y, z;
00057
00058         x->sign = a->sign ^ b->sign;
00059         y = *a;
00060         z = *b;
00061
00062         if (!a_expo && !b_expo)
00063                 x->sign = x_expo = 0;
00064         else {
00065                 x_expo = a_expo + b_expo - AR_CRAY_EXPO_BIAS;
00066                 if (a_expo < AR_CRAY_MIN_EXPO - 1 ||
00067                     b_expo < AR_CRAY_MIN_EXPO - 1 ||
00068                     x_expo < AR_CRAY_MIN_EXPO - 1) {
00069                         ZEROCRAY64 (*x);
00070                         return AR_STAT_UNDERFLOW | AR_STAT_ZERO;
00071                 }
00072         }
00073
00074         switch (roundmode) {
00075         case AR_ROUNDED:                /* CRAY-1 rounded multiply */
00076                 x_lbits = 0;
00077                 x->coeff0 = x->coeff1 = x->coeff2 = 0;
00078                 x_rbits = 0151;
00079                 break;
00080         case AR_UNROUNDED:              /* CRAY-1 truncation compensation */
00081                 x_lbits = 0;
00082                 x->coeff0 = x->coeff1 = x->coeff2 = 0;
00083                 x_rbits = 0011;
00084                 break;
00085         case AR_RECIPROCAL_ITERATION:           /* CRAY-1 recip iter */
00086                 x_lbits = 1;
00087                 x->coeff0 = ~0;
00088                 x->coeff1 = ~0;
00089                 x->coeff2 = (0011 - 0320) >> 7;
00090                 x_rbits = (0011 - 0320) & MASKR (7);
00091                 break;
00092         }
00093
00094         /* Compute and sum the pyramid */
00095 #if AR_CRAY_C0_BITS*3 == AR_CRAY64_COEFF_BITS
00096         y_rbits = y.coeff2<<7;
00097         i = AR_CRAY_C0_BITS;
00098         zcoeff = z.coeff0;
00099         while(zcoeff) {
00100                 if(zcoeff & 0x8000) {
00101                         x_rbits += (y_rbits & 0177);
00102                         carry = x_rbits >> 7;
00103                         x_rbits &= 0177;
00104                         ADDCRAY64 (*x, carry, *x, y);
00105                         x_lbits += carry;
00106                 }
00107                 SHRIGHTCRAY64 (y);
00108                 y_rbits >>= 1;
00109                 zcoeff = (zcoeff & 0x7fff) << 1;
00110                 i--;
00111         }
00112         y.coeff2 = (y.coeff2>>i) | (y.coeff1<<(AR_CRAY_C0_BITS-i));
00113         y.coeff1 = (y.coeff1>>i) | (y.coeff0<<(AR_CRAY_C0_BITS-i));
00114         y.coeff0 = 0;
00115         y_rbits = (y_rbits>>i) | (y.coeff2<<7);
00116         i = AR_CRAY_C1_BITS;
00117         zcoeff = z.coeff1;
00118         while(zcoeff) {
00119                 if(zcoeff & 0x8000) {
00120                         x_rbits += (y_rbits & 0177);
00121                         carry = x_rbits >> 7;
00122                         x_rbits &= 0177;
00123                         ADDCRAY64 (*x, carry, *x, y);
00124                         x_lbits += carry;
00125                 }
00126                 SHRIGHTCRAY64 (y);
00127                 y_rbits >>= 1;
00128                 zcoeff = (zcoeff & 0x7fff) << 1;
00129                 i--;
00130         }
00131         y.coeff2 = (y.coeff2>>i) | (y.coeff1<<(AR_CRAY_C1_BITS-i));
00132         y.coeff1 = 0;
00133         y_rbits = (y_rbits>>i) | (y.coeff2<<7);
00134         zcoeff = z.coeff2;
00135         while(zcoeff) {
00136                 if(zcoeff & 0x8000) {
00137                         x_rbits += (y_rbits & 0177);
00138                         carry = x_rbits >> 7;
00139                         x_rbits &= 0177;
00140                         ADDCRAY64 (*x, carry, *x, y);
00141                         x_lbits += carry;
00142                 }
00143                 y.coeff2 >>= 1;
00144                 y_rbits >>= 1;
00145                 zcoeff = (zcoeff & 0x7fff) << 1;
00146         }
00147 #else
00148         y_rbits = 0;
00149         for (i = 0; i < AR_CRAY64_COEFF_BITS; i++) {
00150                 /* Add scaled operand to sum, with truncation */
00151                 if (z.coeff0 >> (AR_CRAY_C0_BITS - 1)) {
00152                         x_rbits += y_rbits;
00153                         carry = x_rbits >> 7;
00154                         x_rbits &= MASKR (7);
00155                         ADDCRAY64 (*x, carry, *x, y);
00156                         x_lbits += carry;
00157                 }
00158                 y_rbits = (y_rbits >> 1) | ((y.coeff2 & 1) << 6);
00159                 SHRIGHTCRAY64 (y);
00160                 SHLEFTCRAY64 (z);
00161         }
00162 #endif
00163
00164         if (roundmode == AR_RECIPROCAL_ITERATION) {
00165                 x_lbits ^= 1;
00166                 NOTCRAY64 (*x);
00167         }
00168
00169         /* Normalize right if necessary */
00170         test_expo = x_expo;
00171         if (x_lbits & 1 || !(a_expo | b_expo)) {
00172                 SHRIGHTCRAY64 (*x);
00173                 x->coeff0 |= x_lbits << (AR_CRAY_C0_BITS - 1);
00174                 if (a_expo | b_expo)
00175                         x_expo++;
00176         }
00177
00178         /* Check for overflow with unadjusted result exponent */
00179         if (test_expo >= AR_CRAY_MAX_EXPO ||
00180             a_expo > AR_CRAY_MAX_EXPO ||
00181             b_expo > AR_CRAY_MAX_EXPO) {
00182                 x_expo = AR_CRAY_MAX_EXPO + 1;
00183                 res |= AR_STAT_OVERFLOW;
00184         }
00185
00186         x->expo = x_expo;
00187
00188         if (!(x->sign | x->expo | x->coeff0 | x->coeff1 | x->coeff2))
00189                 res |= AR_STAT_ZERO;
00190         if (x->sign)
00191                 res |= AR_STAT_NEGATIVE;
00192
00193         return res;
00194 }
00195
00196
00197 int
00198 ar_cfmul128 (AR_CRAY_128 *x,
00199              const AR_CRAY_128 *a,
00200              const AR_CRAY_128 *b,
00201              int roundmode) {
00202
00203         int i, res = AR_STAT_OK, rnd, inexact, a_expo = a->expo, b_expo = b->expo;
00204         long x_expo, test_expo;
00205         unsigned int x_lbits = 0, carry;
00206         AR_CRAY_128 x2, y, y2, z;
00207         AR_CRAY_64 spa, spb;
00208
00209         /* Use the sign and exponent together in a 16-bit package */
00210         test_expo = x_expo = a_expo + b_expo - AR_CRAY_EXPO_BIAS;
00211         x_expo += (a->sign << AR_CRAY_EXPO_BITS) +
00212                   (b->sign << AR_CRAY_EXPO_BITS);
00213
00214         /* Special-case zero arguments or zero SP products */
00215         if (!(a->coeff0 | a->coeff1 | a->coeff2 |
00216               a->coeff3 | a->coeff4 | a->coeff5) ||
00217             !(b->coeff0 | b->coeff1 | b->coeff2 |
00218               b->coeff3 | b->coeff4 | b->coeff5)) {
00219                 ZEROCRAY128 (*x);
00220                 return AR_STAT_ZERO;
00221         }
00222
00223         y = *a;
00224         z = *b;
00225         ZEROCRAY128 (x2);
00226         ZEROCRAY128 (y2);
00227         ZEROCRAY128 (*x);
00228
00229         /* Compute and sum the pyramid */
00230         for (i = 0; i < AR_CRAY128_COEFF_BITS; i++) {
00231
00232                 /* Add scaled operand to sum, with truncation */
00233                 if (z.coeff0 & (1 << (AR_CRAY_C0_BITS - 1))) {
00234                         carry = 0;
00235                         ADDCRAY128 (x2, carry, x2, y2);
00236                         ADDCRAY128 (*x, carry, *x, y);
00237                         x_lbits += carry;
00238                 }
00239
00240                 SHRIGHTCRAY128 (y2);
00241                 y2.coeff0 |= y.coeff5 << (AR_CRAY_C0_BITS - 1);
00242                 SHRIGHTCRAY128 (y);
00243                 SHLEFTCRAY128 (z);
00244         }
00245
00246         /* Rounding */
00247         inexact = !!(x2.coeff0 | x2.coeff1 | x2.coeff2 | x2.coeff3 |
00248                         x2.coeff4 | x2.coeff5);
00249         carry = rnd = x2.coeff0 >> (AR_CRAY_C0_BITS - 1);
00250         INCCRAY128 (*x, carry);
00251         x_lbits += carry;
00252
00253         /* Normalize right if necessary */
00254         if (x_lbits) {
00255                 rnd ^= 1;
00256                 INCCRAY128 (*x, rnd);
00257                 SHRIGHTCRAY128 (*x);
00258                 x->coeff0 |= 1 << (AR_CRAY_C0_BITS - 1);
00259                 x_expo++;
00260         }
00261
00262         /* Check for overflow with unadjusted result exponent */
00263         if (test_expo >= AR_CRAY_MAX_EXPO ||
00264             a_expo > AR_CRAY_MAX_EXPO && !!b_expo ||
00265             b_expo > AR_CRAY_MAX_EXPO && !!a_expo)
00266                 res |= AR_STAT_OVERFLOW;
00267
00268         /* Check for underflow */
00269         if (!(((x_expo + AR_CRAY_MIN_EXPO) >> (AR_CRAY_EXPO_BITS - 1)) & 1)) {
00270                 ZEROCRAY128 (*x);
00271                 x_expo = 0;
00272                 res |= AR_STAT_UNDERFLOW;
00273         }
00274
00275         x->expo = x_expo;
00276         x->sign = x_expo >> AR_CRAY_EXPO_BITS;
00277
00278         if (!(x->sign | x->expo | x->coeff0 | x->coeff1 | x->coeff2 |
00279               x->coeff3 | x->coeff4 | x->coeff5))
00280                 res |= AR_STAT_ZERO;
00281         if (x->sign)
00282                 res |= AR_STAT_NEGATIVE;
00283         if (inexact)
00284                 res |= AR_STAT_INEXACT;
00285         return res;
00286 }
00287
00288
00289 static char USMID [] = "\n%Z%%M%        %I%     %G% %U%\n";
00290 static char rcsid [] = "$Id: cray_fmul.c,v 1.1.1.1 2002-05-22 20:06:18 dsystem Exp $";