Open64 (mfef90, whirl2f, and IR tools)  TAG: version-openad; SVN changeset: 916
c_q_mul.c
Go to the documentation of this file.
00001 /*
00002 
00003   Copyright (C) 2000, 2001 Silicon Graphics, Inc.  All Rights Reserved.
00004 
00005   This program is free software; you can redistribute it and/or modify it
00006   under the terms of version 2 of the GNU General Public License as
00007   published by the Free Software Foundation.
00008 
00009   This program is distributed in the hope that it would be useful, but
00010   WITHOUT ANY WARRANTY; without even the implied warranty of
00011   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
00012 
00013   Further, this software is distributed without any warranty that it is
00014   free of the rightful claim of any third person regarding infringement 
00015   or the like.  Any license provided herein, whether implied or 
00016   otherwise, applies only to this software file.  Patent licenses, if 
00017   any, provided herein do not apply to combinations of this program with 
00018   other software, or any other product whatsoever.  
00019 
00020   You should have received a copy of the GNU General Public License along
00021   with this program; if not, write the Free Software Foundation, Inc., 59
00022   Temple Place - Suite 330, Boston MA 02111-1307, USA.
00023 
00024   Contact information:  Silicon Graphics, Inc., 1600 Amphitheatre Pky,
00025   Mountain View, CA 94043, or:
00026 
00027   http://www.sgi.com
00028 
00029   For further information regarding this notice, see:
00030 
00031   http://oss.sgi.com/projects/GenInfo/NoticeExplan
00032 
00033 */
00034 
00035 
00036 /* =======================================================================
00037  * =======================================================================
00038  *
00039  *
00040  * =======================================================================
00041  * =======================================================================
00042  */
00043 
00044 
00045 #include "defs.h"
00046 #include "quad.h"
00047 
00048 /* note that this routine must be kept in sync with the corresponding libc
00049  * routine, q_mul.
00050  */
00051 
00052 typedef union
00053 {
00054         struct
00055         {
00056                 UINT32 hi;
00057                 UINT32 lo;
00058         } word;
00059 
00060         double  d;
00061 } du;
00062 
00063 
00064 /* const1 is 1.0 + 2^(53 - 53/2), i.e. 1.0 + 2^27 */
00065 
00066 static  const du        const1 =
00067 {0x41a00000,    0x02000000};
00068 
00069 static  const du        twop590 =
00070 {0x64d00000,    0x00000000};
00071 
00072 static  const du        twopm590 =
00073 {0x1b100000,    0x00000000};
00074 
00075 static const du         inf =
00076 {0x7ff00000,    0x00000000};
00077 
00078 #define NO      0
00079 #define YES     1
00080 
00081 extern  QUAD    c_q_mul(QUAD, QUAD, INT *);
00082 
00083 #pragma weak c_q_mul = __c_q_mul
00084 #define c_q_mul __c_q_mul
00085 
00086 double  fabs(double);
00087 #pragma intrinsic (fabs)
00088 
00089 QUAD
00090 c_q_mul(QUAD x, QUAD y, INT *p_err )
00091 {
00092 INT32   ixhi, iyhi;
00093 INT32   xptxhi, xptyhi;
00094 INT32   xpthi, xptlo;
00095 INT32   scaleup, scaledown;
00096 INT64   iz, iw, iqulp;
00097 double  xhi, xlo, yhi, ylo;
00098 double  xfactor, yfactor;
00099 double  hx, tx, hy, ty;
00100 double  p, q;
00101 double  w, ww;
00102 double  qulp;
00103 QUAD    z, u;
00104 double  rem;
00105 double  a, v, vv;
00106 
00107         /* Avoid underflows and overflows in forming the products
00108            xhi*const1.d, yhi*const1.d, xhi*yhi, and hx*hy by scaling if
00109            necessary.  x and y should also be scaled if tx*ty is
00110            a denormal.
00111         */
00112 
00113         *p_err = 0;
00114 
00115         xhi = x.hi; xlo = x.lo;
00116         yhi = y.hi; ylo = y.lo;
00117 
00118 #ifdef QUAD_DEBUG
00119         printf("c_q_mul: xhi = %08x%08x\n", *(INT32 *)&xhi, *((INT32 *)&xhi + 1));
00120         printf("c_q_mul: xlo = %08x%08x\n", *(INT32 *)&xlo, *((INT32 *)&xlo + 1));
00121         printf("c_q_mul: yhi = %08x%08x\n", *(INT32 *)&yhi, *((INT32 *)&yhi + 1));
00122         printf("c_q_mul: ylo = %08x%08x\n", *(INT32 *)&ylo, *((INT32 *)&ylo + 1));
00123 #endif
00124 
00125 
00126         iyhi = *(INT32 *)&yhi;
00127         xptyhi = (iyhi >> 20);
00128         xptyhi &= 0x7ff;
00129 
00130         ixhi = *(INT32 *)&xhi;
00131         xptxhi = (ixhi >> 20);
00132         xptxhi &= 0x7ff;
00133 
00134         xpthi = xptxhi;
00135         xptlo = xptyhi;
00136 
00137         if ( xptxhi < xptyhi )
00138         {
00139                 xptlo = xptxhi;
00140                 xpthi = xptyhi;
00141         }
00142 
00143         if ( (0x21a < xptlo) && (xpthi < 0x5fd) )
00144         {
00145                 /* normal case */
00146 
00147                 /* Form the exact product of xhi and yhi using
00148                  * Dekker's algorithm.
00149                  */
00150 
00151                 p = xhi*const1.d;
00152         
00153                 hx = (xhi - p) + p;
00154                 tx = xhi - hx;
00155         
00156                 q = yhi*const1.d;
00157         
00158                 hy = (yhi - q) + q;
00159                 ty = yhi - hy;
00160         
00161                 u.hi = xhi*yhi;
00162                 u.lo = (((hx*hy - u.hi) + hx*ty) + hy*tx) + tx*ty;
00163 
00164                 /* Add the remaining pieces using the Kahan summation formula. */
00165 
00166                 a = xhi*ylo;
00167 
00168                 ww = u.lo + a;
00169                 rem = u.lo - ww + a;
00170 
00171                 v = u.hi + ww;
00172                 vv = u.hi - v + ww;
00173 
00174                 a = xlo*yhi + rem;
00175                 u.lo = vv + a;
00176 
00177                 rem = vv - u.lo + a;
00178 
00179                 w = v + u.lo;
00180                 ww = v - w + u.lo;
00181 
00182                 a = xlo*ylo + rem;
00183 
00184                 vv = ww + a;
00185 
00186                 z.hi = w + vv;
00187                 DBL2LL(z.hi, iz);
00188                 z.lo = w - z.hi + vv;
00189 
00190                 /* if necessary, round z.lo so that the sum of z.hi and z.lo has at most
00191                    107 significant bits
00192                 */
00193 
00194                 /* first, compute a quarter ulp of z */
00195 
00196                 iw = (iz >> DMANTWIDTH);
00197                 iqulp = (iw & 0x7ff);
00198                 iqulp -= 54;
00199                 iqulp <<= DMANTWIDTH;
00200 
00201                 if ( iqulp > 0 )
00202                 {
00203                         LL2DBL( iqulp, qulp );
00204                         iw <<= DMANTWIDTH;
00205 
00206                         /* Note that the size of an ulp changes at a
00207                          * power of two.
00208                          */
00209 
00210                         if ( iw == iz )
00211                                 goto fix;
00212 
00213                         if ( fabs(z.lo) >= qulp )
00214                         {
00215                                 qulp = 0.0;
00216                         }
00217                         else if ( z.lo < 0.0 )
00218                                 qulp = -qulp;
00219 
00220                         z.lo += qulp;
00221                         z.lo -= qulp;
00222                 }
00223 
00224 #ifdef QUAD_DEBUG
00225         printf("c_q_mul: z.hi = %08x%08x\n", *(INT32 *)&z.hi, *((INT32 *)&z.hi + 1));
00226         printf("c_q_mul: z.lo = %08x%08x\n", *(INT32 *)&z.lo, *((INT32 *)&z.lo + 1));
00227 #endif
00228 
00229                 return ( z );
00230         }
00231         else if ( (xptxhi == 0x7ff) || (xhi == 0.0) || (yhi == 0.0) )
00232         {
00233                 z.hi = xhi*yhi;
00234                 z.lo = 0.0;
00235 
00236 #ifdef QUAD_DEBUG
00237         printf("c_q_mul: z.hi = %08x%08x\n", *(INT32 *)&z.hi, *((INT32 *)&z.hi + 1));
00238         printf("c_q_mul: z.lo = %08x%08x\n", *(INT32 *)&z.lo, *((INT32 *)&z.lo + 1));
00239 #endif
00240 
00241                 return ( z );
00242         }
00243         else
00244         {
00245                 xfactor = 1.0;
00246                 yfactor = 1.0;
00247                 scaleup = scaledown = NO;
00248 
00249                 if ( xptxhi <= 0x21a )
00250                 {
00251                         xhi *= twop590.d;
00252                         xlo *= twop590.d;
00253                         xfactor = twopm590.d;
00254                         scaleup = YES;
00255                 }
00256 
00257                 if ( xptyhi <= 0x21a )
00258                 {
00259                         yhi *= twop590.d;
00260                         ylo *= twop590.d;
00261                         yfactor = twopm590.d;
00262                         scaleup = YES;
00263                 }
00264 
00265                 if ( xptxhi >= 0x5fd )
00266                 {
00267                         xhi *= twopm590.d;
00268                         xlo *= twopm590.d;
00269                         xfactor = twop590.d;
00270                         scaledown = YES;
00271                 }
00272 
00273                 if ( xptyhi >= 0x5fd )
00274                 {
00275                         yhi *= twopm590.d;
00276                         ylo *= twopm590.d;
00277                         yfactor = twop590.d;
00278                         scaledown = YES;
00279                 }
00280 
00281                 if ( (scaleup == YES) && (scaledown == YES) )
00282                 {
00283                         xfactor = yfactor = 1.0;
00284                 }
00285 
00286 
00287                 /* Form the exact product of xhi and yhi using
00288                  * Dekker's algorithm.
00289                  */
00290 
00291                 p = xhi*const1.d;
00292         
00293                 hx = (xhi - p) + p;
00294                 tx = xhi - hx;
00295         
00296                 q = yhi*const1.d;
00297         
00298                 hy = (yhi - q) + q;
00299                 ty = yhi - hy;
00300         
00301                 u.hi = xhi*yhi;
00302                 u.lo = (((hx*hy - u.hi) + hx*ty) + hy*tx) + tx*ty;
00303 
00304                 /* Add the remaining pieces using the Kahan summation formula. */
00305 
00306                 a = xhi*ylo;
00307 
00308                 ww = u.lo + a;
00309                 rem = u.lo - ww + a;
00310 
00311                 v = u.hi + ww;
00312                 vv = u.hi - v + ww;
00313 
00314                 a = xlo*yhi + rem;
00315                 u.lo = vv + a;
00316 
00317                 rem = vv - u.lo + a;
00318 
00319                 w = v + u.lo;
00320                 ww = v - w + u.lo;
00321 
00322                 a = xlo*ylo + rem;
00323 
00324                 vv = ww + a;
00325 
00326                 z.hi = w + vv;
00327                 z.lo = w - z.hi + vv;
00328 
00329                 /* Rescale z.hi and z.lo before rounding */
00330 
00331                 w = z.hi*xfactor;
00332                 w *= yfactor;
00333 
00334                 if ( (w == 0.0) || (fabs(w) == inf.d) )
00335                 {
00336                         z.hi = w;
00337                         z.lo = 0.0;
00338 
00339 #ifdef QUAD_DEBUG
00340         printf("c_q_mul: z.hi = %08x%08x\n", *(INT32 *)&z.hi, *((INT32 *)&z.hi + 1));
00341         printf("c_q_mul: z.lo = %08x%08x\n", *(INT32 *)&z.lo, *((INT32 *)&z.lo + 1));
00342 #endif
00343 
00344                         return ( z );
00345                 }
00346 
00347                 ww = z.lo*xfactor;
00348                 ww *= yfactor;
00349 
00350                 z.hi = w + ww;
00351                 DBL2LL(z.hi, iz);
00352                 z.lo = w - z.hi + ww;
00353 
00354 #ifdef QUAD_DEBUG
00355         printf("c_q_mul: z.hi = %08x%08x\n", *(INT32 *)&z.hi, *((INT32 *)&z.hi + 1));
00356         printf("c_q_mul: z.lo = %08x%08x\n", *(INT32 *)&z.lo, *((INT32 *)&z.lo + 1));
00357 #endif
00358 
00359                 /* if necessary, round z.lo so that the sum of z.hi and z.lo has at most
00360                    107 significant bits
00361                 */
00362 
00363                 /* first, compute a quarter ulp of z */
00364 
00365                 iw = (iz >> DMANTWIDTH);
00366                 iqulp = (iw & 0x7ff);
00367                 iqulp -= 54;
00368                 iqulp <<= DMANTWIDTH;
00369 
00370                 if ( iqulp > 0 )
00371                 {
00372                         LL2DBL( iqulp, qulp );
00373                         iw <<= DMANTWIDTH;
00374 
00375                         /* Note that the size of an ulp changes at a
00376                          * power of two.
00377                          */
00378 
00379                         if ( iw == iz )
00380                                 goto fix2;
00381 
00382 back:
00383                         if ( fabs(z.lo) >= qulp )
00384                         {
00385                                 qulp = 0.0;
00386                         }
00387                         else if ( z.lo < 0.0 )
00388                                 qulp = -qulp;
00389 
00390                         z.lo += qulp;
00391                         z.lo -= qulp;
00392                 }
00393 
00394                 return ( z );
00395         }
00396 
00397 fix:
00398         if ( ((z.hi > 0.0) && (z.lo < 0.0)) || ((z.hi < 0.0) && (z.lo > 0.0)) )
00399                 qulp *= 0.5;
00400 
00401         if ( fabs(z.lo) >= qulp )
00402         {
00403                 qulp = 0.0;
00404         }
00405         else if ( z.lo < 0.0 )
00406                 qulp = -qulp;
00407 
00408         z.lo += qulp;
00409         z.lo -= qulp;
00410 
00411         return ( z );
00412 
00413 fix2:
00414         if ( ((z.hi > 0.0) && (z.lo < 0.0)) || ((z.hi < 0.0) && (z.lo > 0.0)) )
00415                 qulp *= 0.5;
00416 
00417         goto back;
00418 }
00419 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines