OpenAD/Open64Dox/c__q__mul_8c_source.html

00001 /*
00002
00003   Copyright (C) 2000, 2001 Silicon Graphics, Inc.  All Rights Reserved.
00004
00005   This program is free software; you can redistribute it and/or modify it
00006   under the terms of version 2 of the GNU General Public License as
00007   published by the Free Software Foundation.
00008
00009   This program is distributed in the hope that it would be useful, but
00010   WITHOUT ANY WARRANTY; without even the implied warranty of
00011   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
00012
00013   Further, this software is distributed without any warranty that it is
00014   free of the rightful claim of any third person regarding infringement
00015   or the like.  Any license provided herein, whether implied or
00016   otherwise, applies only to this software file.  Patent licenses, if
00017   any, provided herein do not apply to combinations of this program with
00018   other software, or any other product whatsoever.
00019
00020   You should have received a copy of the GNU General Public License along
00021   with this program; if not, write the Free Software Foundation, Inc., 59
00022   Temple Place - Suite 330, Boston MA 02111-1307, USA.
00023
00024   Contact information:  Silicon Graphics, Inc., 1600 Amphitheatre Pky,
00025   Mountain View, CA 94043, or:
00026
00027   http://www.sgi.com
00028
00029   For further information regarding this notice, see:
00030
00031   http://oss.sgi.com/projects/GenInfo/NoticeExplan
00032
00033 */
00034
00035
00036 /* =======================================================================
00037  * =======================================================================
00038  *
00039  *
00040  * =======================================================================
00041  * =======================================================================
00042  */
00043
00044
00045 #include "defs.h"
00046 #include "quad.h"
00047
00048 /* note that this routine must be kept in sync with the corresponding libc
00049  * routine, q_mul.
00050  */
00051
00052 typedef union
00053 {
00054         struct
00055         {
00056                 UINT32 hi;
00057                 UINT32 lo;
00058         } word;
00059
00060         double  d;
00061 } du;
00062
00063
00064 /* const1 is 1.0 + 2^(53 - 53/2), i.e. 1.0 + 2^27 */
00065
00066 static  const du        const1 =
00067 {0x41a00000,    0x02000000};
00068
00069 static  const du        twop590 =
00070 {0x64d00000,    0x00000000};
00071
00072 static  const du        twopm590 =
00073 {0x1b100000,    0x00000000};
00074
00075 static const du         inf =
00076 {0x7ff00000,    0x00000000};
00077
00078 #define NO      0
00079 #define YES     1
00080
00081 extern  QUAD    c_q_mul(QUAD, QUAD, INT *);
00082
00083 #pragma weak c_q_mul = __c_q_mul
00084 #define c_q_mul __c_q_mul
00085
00086 double  fabs(double);
00087 #pragma intrinsic (fabs)
00088
00089 QUAD
00090 c_q_mul(QUAD x, QUAD y, INT *p_err )
00091 {
00092 INT32   ixhi, iyhi;
00093 INT32   xptxhi, xptyhi;
00094 INT32   xpthi, xptlo;
00095 INT32   scaleup, scaledown;
00096 INT64   iz, iw, iqulp;
00097 double  xhi, xlo, yhi, ylo;
00098 double  xfactor, yfactor;
00099 double  hx, tx, hy, ty;
00100 double  p, q;
00101 double  w, ww;
00102 double  qulp;
00103 QUAD    z, u;
00104 double  rem;
00105 double  a, v, vv;
00106
00107         /* Avoid underflows and overflows in forming the products
00108            xhi*const1.d, yhi*const1.d, xhi*yhi, and hx*hy by scaling if
00109            necessary.  x and y should also be scaled if tx*ty is
00110            a denormal.
00111         */
00112
00113         *p_err = 0;
00114
00115         xhi = x.hi; xlo = x.lo;
00116         yhi = y.hi; ylo = y.lo;
00117
00118 #ifdef QUAD_DEBUG
00119         printf("c_q_mul: xhi = %08x%08x\n", *(INT32 *)&xhi, *((INT32 *)&xhi + 1));
00120         printf("c_q_mul: xlo = %08x%08x\n", *(INT32 *)&xlo, *((INT32 *)&xlo + 1));
00121         printf("c_q_mul: yhi = %08x%08x\n", *(INT32 *)&yhi, *((INT32 *)&yhi + 1));
00122         printf("c_q_mul: ylo = %08x%08x\n", *(INT32 *)&ylo, *((INT32 *)&ylo + 1));
00123 #endif
00124
00125
00126         iyhi = *(INT32 *)&yhi;
00127         xptyhi = (iyhi >> 20);
00128         xptyhi &= 0x7ff;
00129
00130         ixhi = *(INT32 *)&xhi;
00131         xptxhi = (ixhi >> 20);
00132         xptxhi &= 0x7ff;
00133
00134         xpthi = xptxhi;
00135         xptlo = xptyhi;
00136
00137         if ( xptxhi < xptyhi )
00138         {
00139                 xptlo = xptxhi;
00140                 xpthi = xptyhi;
00141         }
00142
00143         if ( (0x21a < xptlo) && (xpthi < 0x5fd) )
00144         {
00145                 /* normal case */
00146
00147                 /* Form the exact product of xhi and yhi using
00148                  * Dekker's algorithm.
00149                  */
00150
00151                 p = xhi*const1.d;
00152
00153                 hx = (xhi - p) + p;
00154                 tx = xhi - hx;
00155
00156                 q = yhi*const1.d;
00157
00158                 hy = (yhi - q) + q;
00159                 ty = yhi - hy;
00160
00161                 u.hi = xhi*yhi;
00162                 u.lo = (((hx*hy - u.hi) + hx*ty) + hy*tx) + tx*ty;
00163
00164                 /* Add the remaining pieces using the Kahan summation formula. */
00165
00166                 a = xhi*ylo;
00167
00168                 ww = u.lo + a;
00169                 rem = u.lo - ww + a;
00170
00171                 v = u.hi + ww;
00172                 vv = u.hi - v + ww;
00173
00174                 a = xlo*yhi + rem;
00175                 u.lo = vv + a;
00176
00177                 rem = vv - u.lo + a;
00178
00179                 w = v + u.lo;
00180                 ww = v - w + u.lo;
00181
00182                 a = xlo*ylo + rem;
00183
00184                 vv = ww + a;
00185
00186                 z.hi = w + vv;
00187                 DBL2LL(z.hi, iz);
00188                 z.lo = w - z.hi + vv;
00189
00190                 /* if necessary, round z.lo so that the sum of z.hi and z.lo has at most
00191                    107 significant bits
00192                 */
00193
00194                 /* first, compute a quarter ulp of z */
00195
00196                 iw = (iz >> DMANTWIDTH);
00197                 iqulp = (iw & 0x7ff);
00198                 iqulp -= 54;
00199                 iqulp <<= DMANTWIDTH;
00200
00201                 if ( iqulp > 0 )
00202                 {
00203                         LL2DBL( iqulp, qulp );
00204                         iw <<= DMANTWIDTH;
00205
00206                         /* Note that the size of an ulp changes at a
00207                          * power of two.
00208                          */
00209
00210                         if ( iw == iz )
00211                                 goto fix;
00212
00213                         if ( fabs(z.lo) >= qulp )
00214                         {
00215                                 qulp = 0.0;
00216                         }
00217                         else if ( z.lo < 0.0 )
00218                                 qulp = -qulp;
00219
00220                         z.lo += qulp;
00221                         z.lo -= qulp;
00222                 }
00223
00224 #ifdef QUAD_DEBUG
00225         printf("c_q_mul: z.hi = %08x%08x\n", *(INT32 *)&z.hi, *((INT32 *)&z.hi + 1));
00226         printf("c_q_mul: z.lo = %08x%08x\n", *(INT32 *)&z.lo, *((INT32 *)&z.lo + 1));
00227 #endif
00228
00229                 return ( z );
00230         }
00231         else if ( (xptxhi == 0x7ff) || (xhi == 0.0) || (yhi == 0.0) )
00232         {
00233                 z.hi = xhi*yhi;
00234                 z.lo = 0.0;
00235
00236 #ifdef QUAD_DEBUG
00237         printf("c_q_mul: z.hi = %08x%08x\n", *(INT32 *)&z.hi, *((INT32 *)&z.hi + 1));
00238         printf("c_q_mul: z.lo = %08x%08x\n", *(INT32 *)&z.lo, *((INT32 *)&z.lo + 1));
00239 #endif
00240
00241                 return ( z );
00242         }
00243         else
00244         {
00245                 xfactor = 1.0;
00246                 yfactor = 1.0;
00247                 scaleup = scaledown = NO;
00248
00249                 if ( xptxhi <= 0x21a )
00250                 {
00251                         xhi *= twop590.d;
00252                         xlo *= twop590.d;
00253                         xfactor = twopm590.d;
00254                         scaleup = YES;
00255                 }
00256
00257                 if ( xptyhi <= 0x21a )
00258                 {
00259                         yhi *= twop590.d;
00260                         ylo *= twop590.d;
00261                         yfactor = twopm590.d;
00262                         scaleup = YES;
00263                 }
00264
00265                 if ( xptxhi >= 0x5fd )
00266                 {
00267                         xhi *= twopm590.d;
00268                         xlo *= twopm590.d;
00269                         xfactor = twop590.d;
00270                         scaledown = YES;
00271                 }
00272
00273                 if ( xptyhi >= 0x5fd )
00274                 {
00275                         yhi *= twopm590.d;
00276                         ylo *= twopm590.d;
00277                         yfactor = twop590.d;
00278                         scaledown = YES;
00279                 }
00280
00281                 if ( (scaleup == YES) && (scaledown == YES) )
00282                 {
00283                         xfactor = yfactor = 1.0;
00284                 }
00285
00286
00287                 /* Form the exact product of xhi and yhi using
00288                  * Dekker's algorithm.
00289                  */
00290
00291                 p = xhi*const1.d;
00292
00293                 hx = (xhi - p) + p;
00294                 tx = xhi - hx;
00295
00296                 q = yhi*const1.d;
00297
00298                 hy = (yhi - q) + q;
00299                 ty = yhi - hy;
00300
00301                 u.hi = xhi*yhi;
00302                 u.lo = (((hx*hy - u.hi) + hx*ty) + hy*tx) + tx*ty;
00303
00304                 /* Add the remaining pieces using the Kahan summation formula. */
00305
00306                 a = xhi*ylo;
00307
00308                 ww = u.lo + a;
00309                 rem = u.lo - ww + a;
00310
00311                 v = u.hi + ww;
00312                 vv = u.hi - v + ww;
00313
00314                 a = xlo*yhi + rem;
00315                 u.lo = vv + a;
00316
00317                 rem = vv - u.lo + a;
00318
00319                 w = v + u.lo;
00320                 ww = v - w + u.lo;
00321
00322                 a = xlo*ylo + rem;
00323
00324                 vv = ww + a;
00325
00326                 z.hi = w + vv;
00327                 z.lo = w - z.hi + vv;
00328
00329                 /* Rescale z.hi and z.lo before rounding */
00330
00331                 w = z.hi*xfactor;
00332                 w *= yfactor;
00333
00334                 if ( (w == 0.0) || (fabs(w) == inf.d) )
00335                 {
00336                         z.hi = w;
00337                         z.lo = 0.0;
00338
00339 #ifdef QUAD_DEBUG
00340         printf("c_q_mul: z.hi = %08x%08x\n", *(INT32 *)&z.hi, *((INT32 *)&z.hi + 1));
00341         printf("c_q_mul: z.lo = %08x%08x\n", *(INT32 *)&z.lo, *((INT32 *)&z.lo + 1));
00342 #endif
00343
00344                         return ( z );
00345                 }
00346
00347                 ww = z.lo*xfactor;
00348                 ww *= yfactor;
00349
00350                 z.hi = w + ww;
00351                 DBL2LL(z.hi, iz);
00352                 z.lo = w - z.hi + ww;
00353
00354 #ifdef QUAD_DEBUG
00355         printf("c_q_mul: z.hi = %08x%08x\n", *(INT32 *)&z.hi, *((INT32 *)&z.hi + 1));
00356         printf("c_q_mul: z.lo = %08x%08x\n", *(INT32 *)&z.lo, *((INT32 *)&z.lo + 1));
00357 #endif
00358
00359                 /* if necessary, round z.lo so that the sum of z.hi and z.lo has at most
00360                    107 significant bits
00361                 */
00362
00363                 /* first, compute a quarter ulp of z */
00364
00365                 iw = (iz >> DMANTWIDTH);
00366                 iqulp = (iw & 0x7ff);
00367                 iqulp -= 54;
00368                 iqulp <<= DMANTWIDTH;
00369
00370                 if ( iqulp > 0 )
00371                 {
00372                         LL2DBL( iqulp, qulp );
00373                         iw <<= DMANTWIDTH;
00374
00375                         /* Note that the size of an ulp changes at a
00376                          * power of two.
00377                          */
00378
00379                         if ( iw == iz )
00380                                 goto fix2;
00381
00382 back:
00383                         if ( fabs(z.lo) >= qulp )
00384                         {
00385                                 qulp = 0.0;
00386                         }
00387                         else if ( z.lo < 0.0 )
00388                                 qulp = -qulp;
00389
00390                         z.lo += qulp;
00391                         z.lo -= qulp;
00392                 }
00393
00394                 return ( z );
00395         }
00396
00397 fix:
00398         if ( ((z.hi > 0.0) && (z.lo < 0.0)) || ((z.hi < 0.0) && (z.lo > 0.0)) )
00399                 qulp *= 0.5;
00400
00401         if ( fabs(z.lo) >= qulp )
00402         {
00403                 qulp = 0.0;
00404         }
00405         else if ( z.lo < 0.0 )
00406                 qulp = -qulp;
00407
00408         z.lo += qulp;
00409         z.lo -= qulp;
00410
00411         return ( z );
00412
00413 fix2:
00414         if ( ((z.hi > 0.0) && (z.lo < 0.0)) || ((z.hi < 0.0) && (z.lo > 0.0)) )
00415                 qulp *= 0.5;
00416
00417         goto back;
00418 }
00419