Open64 (mfef90, whirl2f, and IR tools)  TAG: version-openad; SVN changeset: 916
c_q_sqrt.c
Go to the documentation of this file.
00001 /*
00002 
00003   Copyright (C) 2000, 2001 Silicon Graphics, Inc.  All Rights Reserved.
00004 
00005   This program is free software; you can redistribute it and/or modify it
00006   under the terms of version 2 of the GNU General Public License as
00007   published by the Free Software Foundation.
00008 
00009   This program is distributed in the hope that it would be useful, but
00010   WITHOUT ANY WARRANTY; without even the implied warranty of
00011   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
00012 
00013   Further, this software is distributed without any warranty that it is
00014   free of the rightful claim of any third person regarding infringement 
00015   or the like.  Any license provided herein, whether implied or 
00016   otherwise, applies only to this software file.  Patent licenses, if 
00017   any, provided herein do not apply to combinations of this program with 
00018   other software, or any other product whatsoever.  
00019 
00020   You should have received a copy of the GNU General Public License along
00021   with this program; if not, write the Free Software Foundation, Inc., 59
00022   Temple Place - Suite 330, Boston MA 02111-1307, USA.
00023 
00024   Contact information:  Silicon Graphics, Inc., 1600 Amphitheatre Pky,
00025   Mountain View, CA 94043, or:
00026 
00027   http://www.sgi.com
00028 
00029   For further information regarding this notice, see:
00030 
00031   http://oss.sgi.com/projects/GenInfo/NoticeExplan
00032 
00033 */
00034 
00035 
00036 /* =======================================================================
00037  * =======================================================================
00038  *
00039  *
00040  * =======================================================================
00041  * =======================================================================
00042  */
00043 
00044 #include <math.h>
00045 #include "defs.h"
00046 #include "quad.h"
00047 
00048 /* quad square root */
00049 
00050 /* note that this routine must be kept in sync with the corresponding libm
00051  * routine, qsqrt.
00052  */
00053 
00054 typedef union
00055 {
00056         struct
00057         {
00058                 UINT32 hi;
00059                 UINT32 lo;
00060         } word;
00061 
00062         double  d;
00063 } du;
00064 
00065 
00066 #ifndef __MATH_H__
00067 double  sqrt(double);
00068 
00069 #if _MIPS_ISA != _MIPS_ISA_MIPS1
00070 #pragma intrinsic (sqrt)
00071 #endif
00072 #endif /* __MATH_H__ */
00073 
00074 static  double  __dtwofloor(double);
00075 
00076 /* const1 is 1.0 + 2^(53 - 53/2), i.e. 1.0 + 2^27 */
00077 
00078 static  const du        const1 =
00079 {0x41a00000,    0x02000000};
00080 
00081 static const du         twopm53 =
00082 {0x3ca00000,    0x00000000};
00083 
00084 static const du         twopm54 =
00085 {0x3c900000,    0x00000000};
00086 
00087 static const du         twopm6 =
00088 {0x3f900000,    0x00000000};
00089 
00090 static const du         twop3 =
00091 {0x40200000,    0x00000000};
00092 
00093 static const du         twop108 =
00094 {0x46b00000,    0x00000000};
00095 
00096 #pragma weak c_q_sqrt = __c_q_sqrt
00097 #define c_q_sqrt __c_q_sqrt
00098 
00099 QUAD
00100 c_q_sqrt(QUAD x, INT *p_err )
00101 {
00102 INT64   ix, xpt;
00103 INT32   n;
00104 double  xfactor;
00105 double  w;
00106 double  quarterulp;
00107 double  p;
00108 double  hc, tc;
00109 QUAD    c;
00110 QUAD    u, z;
00111 
00112         /* adapted from T. J. Dekker's sqrt2 subroutine */
00113 
00114         *p_err = 0;
00115 
00116         /* extract exponent of x for some quick screening */
00117 
00118         DBL2LL(x.hi, ix);
00119         xpt = (ix >> DMANTWIDTH);
00120         xpt &= 0x7ff;
00121 
00122         if ( ix < 0 )
00123         {
00124                 z.hi = sqrt(x.hi);
00125                 z.lo = 0.0;
00126 
00127                 return ( z );
00128         }
00129 
00130         /* Avoid underflows and overflows in forming the products
00131            c.hi*const1.d, c.hi*c.hi, and hc*hc by scaling if
00132            necessary.  x should also be scaled if tc*tc is
00133            a denormal.
00134         */
00135 
00136         if ( (0x06d < xpt) && (xpt < 0x7fb) )
00137         {
00138                 /* normal case */
00139 
00140                 c.hi = sqrt(x.hi);
00141 
00142                 /*u.ld = _prodl(c.hi, c.hi);*/
00143 
00144                 p = c.hi*const1.d;
00145         
00146                 hc = (c.hi - p) + p;
00147                 tc = c.hi - hc;
00148         
00149                 u.hi = c.hi*c.hi;
00150                 u.lo = ((((hc*hc - u.hi) + hc*tc) + hc*tc) + tc*tc);
00151 
00152                 c.lo = 0.5*(((x.hi - u.hi) - u.lo) + x.lo)/c.hi;
00153 
00154                 /* if necessary, round c.lo so that the sum of c.hi and c.lo
00155                    has at most 107 significant bits
00156                 */
00157 
00158                 /* Compute a quarter ulp of c.hi */
00159 
00160                 DBL2LL(c.hi, ix);
00161                 ix >>= DMANTWIDTH;
00162                 ix <<= DMANTWIDTH;
00163                 LL2DBL(ix, w); /* w = dtwofloor(c.hi) */
00164 
00165                 /* Note that the size of an ulp changes at a
00166                  * power of two.
00167                  */
00168 
00169                 if ( (c.hi == w) && (c.lo < 0.0) )
00170                         w *= 0.5;
00171 
00172                 /* round c.lo if it's less than 1/4 ulp of w */
00173 
00174                 quarterulp = twopm54.d*fabs(w);
00175 
00176                 if ( fabs(c.lo) < quarterulp )
00177                 {
00178                         if ( c.lo >= 0.0 )
00179                         {
00180                                 c.lo = (quarterulp + c.lo) - quarterulp;
00181                         }
00182                         else
00183                         {
00184                                 c.lo = quarterulp + (c.lo - quarterulp);
00185                         }
00186 
00187                 }
00188 
00189                 z.hi = c.hi + c.lo;
00190                 z.lo = (c.hi - z.hi) + c.lo;
00191 
00192                 return ( z );
00193         }
00194 
00195         if ( xpt < 0x7ff )
00196         {
00197                 if ( x.hi == 0.0 )
00198                 {
00199                         z.lo = 0.0;
00200                         z.hi = x.hi;
00201 
00202                         return ( z );
00203                 }
00204 
00205                 if ( xpt <= 0x06d )
00206                 {
00207                         x.hi *= twop108.d;
00208                         x.lo *= twop108.d;
00209                         xfactor = twopm54.d;
00210                 }
00211                 else
00212                 {
00213                         x.hi *= twopm6.d;
00214                         x.lo *= twopm6.d;
00215                         xfactor = twop3.d;
00216                 }
00217 
00218                 c.hi = sqrt(x.hi);
00219 
00220                 /*u.ld = _prodl(c.hi, c.hi);*/
00221         
00222                 p = c.hi*const1.d;
00223         
00224                 hc = (c.hi - p) + p;
00225                 tc = c.hi - hc;
00226         
00227                 u.hi = c.hi*c.hi;
00228                 u.lo = ((((hc*hc - u.hi) + hc*tc) + hc*tc) + tc*tc);
00229 
00230                 c.lo = 0.5*(((x.hi - u.hi) - u.lo) + x.lo)/c.hi;
00231 
00232                 /* if necessary, round c.lo so that the sum of c.hi and c.lo
00233                    has at most 107 significant bits
00234                 */
00235 
00236                 /* Compute a quarter ulp of c.hi */
00237 
00238                 DBL2LL(c.hi, ix);
00239                 ix >>= DMANTWIDTH;
00240                 ix <<= DMANTWIDTH;
00241                 LL2DBL(ix, w); /* w = dtwofloor(c.hi) */
00242 
00243                 /* Note that the size of an ulp changes at a
00244                  * power of two.
00245                  */
00246 
00247                 if ( (c.hi == w) && (c.lo < 0.0) )
00248                         w *= 0.5;
00249 
00250                 /* round c.lo if it's less than 1/4 ulp of w */
00251 
00252                 quarterulp = twopm54.d*fabs(w);
00253 
00254                 if ( fabs(c.lo) < quarterulp )
00255                 {
00256                         if ( c.lo >= 0.0 )
00257                         {
00258                                 c.lo = (quarterulp + c.lo) - quarterulp;
00259                         }
00260                         else
00261                         {
00262                                 c.lo = quarterulp + (c.lo - quarterulp);
00263                         }
00264 
00265                 }
00266 
00267                 c.hi *= xfactor;
00268                 c.lo *= xfactor;
00269 
00270                 z.hi = c.hi + c.lo;
00271                 z.lo = (c.hi - z.hi) + c.lo;
00272 
00273                 return ( z );
00274         }
00275 
00276         z.lo = 0.0;
00277         z.hi = sqrt(x.hi);
00278 
00279         return ( z );
00280 }
00281 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines