Open64 (mfef90, whirl2f, and IR tools)
TAG: version-openad; SVN changeset: 916
|
00001 /* 00002 00003 Copyright (C) 2000, 2001 Silicon Graphics, Inc. All Rights Reserved. 00004 00005 This program is free software; you can redistribute it and/or modify it 00006 under the terms of version 2 of the GNU General Public License as 00007 published by the Free Software Foundation. 00008 00009 This program is distributed in the hope that it would be useful, but 00010 WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00012 00013 Further, this software is distributed without any warranty that it is 00014 free of the rightful claim of any third person regarding infringement 00015 or the like. Any license provided herein, whether implied or 00016 otherwise, applies only to this software file. Patent licenses, if 00017 any, provided herein do not apply to combinations of this program with 00018 other software, or any other product whatsoever. 00019 00020 You should have received a copy of the GNU General Public License along 00021 with this program; if not, write the Free Software Foundation, Inc., 59 00022 Temple Place - Suite 330, Boston MA 02111-1307, USA. 00023 00024 Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pky, 00025 Mountain View, CA 94043, or: 00026 00027 http://www.sgi.com 00028 00029 For further information regarding this notice, see: 00030 00031 http://oss.sgi.com/projects/GenInfo/NoticeExplan 00032 00033 */ 00034 00035 00036 /* ======================================================================= 00037 * ======================================================================= 00038 * 00039 * 00040 * ======================================================================= 00041 * ======================================================================= 00042 */ 00043 00044 #include <math.h> 00045 #include "defs.h" 00046 #include "quad.h" 00047 00048 /* quad square root */ 00049 00050 /* note that this routine must be kept in sync with the corresponding libm 00051 * routine, qsqrt. 00052 */ 00053 00054 typedef union 00055 { 00056 struct 00057 { 00058 UINT32 hi; 00059 UINT32 lo; 00060 } word; 00061 00062 double d; 00063 } du; 00064 00065 00066 #ifndef __MATH_H__ 00067 double sqrt(double); 00068 00069 #if _MIPS_ISA != _MIPS_ISA_MIPS1 00070 #pragma intrinsic (sqrt) 00071 #endif 00072 #endif /* __MATH_H__ */ 00073 00074 static double __dtwofloor(double); 00075 00076 /* const1 is 1.0 + 2^(53 - 53/2), i.e. 1.0 + 2^27 */ 00077 00078 static const du const1 = 00079 {0x41a00000, 0x02000000}; 00080 00081 static const du twopm53 = 00082 {0x3ca00000, 0x00000000}; 00083 00084 static const du twopm54 = 00085 {0x3c900000, 0x00000000}; 00086 00087 static const du twopm6 = 00088 {0x3f900000, 0x00000000}; 00089 00090 static const du twop3 = 00091 {0x40200000, 0x00000000}; 00092 00093 static const du twop108 = 00094 {0x46b00000, 0x00000000}; 00095 00096 #pragma weak c_q_sqrt = __c_q_sqrt 00097 #define c_q_sqrt __c_q_sqrt 00098 00099 QUAD 00100 c_q_sqrt(QUAD x, INT *p_err ) 00101 { 00102 INT64 ix, xpt; 00103 INT32 n; 00104 double xfactor; 00105 double w; 00106 double quarterulp; 00107 double p; 00108 double hc, tc; 00109 QUAD c; 00110 QUAD u, z; 00111 00112 /* adapted from T. J. Dekker's sqrt2 subroutine */ 00113 00114 *p_err = 0; 00115 00116 /* extract exponent of x for some quick screening */ 00117 00118 DBL2LL(x.hi, ix); 00119 xpt = (ix >> DMANTWIDTH); 00120 xpt &= 0x7ff; 00121 00122 if ( ix < 0 ) 00123 { 00124 z.hi = sqrt(x.hi); 00125 z.lo = 0.0; 00126 00127 return ( z ); 00128 } 00129 00130 /* Avoid underflows and overflows in forming the products 00131 c.hi*const1.d, c.hi*c.hi, and hc*hc by scaling if 00132 necessary. x should also be scaled if tc*tc is 00133 a denormal. 00134 */ 00135 00136 if ( (0x06d < xpt) && (xpt < 0x7fb) ) 00137 { 00138 /* normal case */ 00139 00140 c.hi = sqrt(x.hi); 00141 00142 /*u.ld = _prodl(c.hi, c.hi);*/ 00143 00144 p = c.hi*const1.d; 00145 00146 hc = (c.hi - p) + p; 00147 tc = c.hi - hc; 00148 00149 u.hi = c.hi*c.hi; 00150 u.lo = ((((hc*hc - u.hi) + hc*tc) + hc*tc) + tc*tc); 00151 00152 c.lo = 0.5*(((x.hi - u.hi) - u.lo) + x.lo)/c.hi; 00153 00154 /* if necessary, round c.lo so that the sum of c.hi and c.lo 00155 has at most 107 significant bits 00156 */ 00157 00158 /* Compute a quarter ulp of c.hi */ 00159 00160 DBL2LL(c.hi, ix); 00161 ix >>= DMANTWIDTH; 00162 ix <<= DMANTWIDTH; 00163 LL2DBL(ix, w); /* w = dtwofloor(c.hi) */ 00164 00165 /* Note that the size of an ulp changes at a 00166 * power of two. 00167 */ 00168 00169 if ( (c.hi == w) && (c.lo < 0.0) ) 00170 w *= 0.5; 00171 00172 /* round c.lo if it's less than 1/4 ulp of w */ 00173 00174 quarterulp = twopm54.d*fabs(w); 00175 00176 if ( fabs(c.lo) < quarterulp ) 00177 { 00178 if ( c.lo >= 0.0 ) 00179 { 00180 c.lo = (quarterulp + c.lo) - quarterulp; 00181 } 00182 else 00183 { 00184 c.lo = quarterulp + (c.lo - quarterulp); 00185 } 00186 00187 } 00188 00189 z.hi = c.hi + c.lo; 00190 z.lo = (c.hi - z.hi) + c.lo; 00191 00192 return ( z ); 00193 } 00194 00195 if ( xpt < 0x7ff ) 00196 { 00197 if ( x.hi == 0.0 ) 00198 { 00199 z.lo = 0.0; 00200 z.hi = x.hi; 00201 00202 return ( z ); 00203 } 00204 00205 if ( xpt <= 0x06d ) 00206 { 00207 x.hi *= twop108.d; 00208 x.lo *= twop108.d; 00209 xfactor = twopm54.d; 00210 } 00211 else 00212 { 00213 x.hi *= twopm6.d; 00214 x.lo *= twopm6.d; 00215 xfactor = twop3.d; 00216 } 00217 00218 c.hi = sqrt(x.hi); 00219 00220 /*u.ld = _prodl(c.hi, c.hi);*/ 00221 00222 p = c.hi*const1.d; 00223 00224 hc = (c.hi - p) + p; 00225 tc = c.hi - hc; 00226 00227 u.hi = c.hi*c.hi; 00228 u.lo = ((((hc*hc - u.hi) + hc*tc) + hc*tc) + tc*tc); 00229 00230 c.lo = 0.5*(((x.hi - u.hi) - u.lo) + x.lo)/c.hi; 00231 00232 /* if necessary, round c.lo so that the sum of c.hi and c.lo 00233 has at most 107 significant bits 00234 */ 00235 00236 /* Compute a quarter ulp of c.hi */ 00237 00238 DBL2LL(c.hi, ix); 00239 ix >>= DMANTWIDTH; 00240 ix <<= DMANTWIDTH; 00241 LL2DBL(ix, w); /* w = dtwofloor(c.hi) */ 00242 00243 /* Note that the size of an ulp changes at a 00244 * power of two. 00245 */ 00246 00247 if ( (c.hi == w) && (c.lo < 0.0) ) 00248 w *= 0.5; 00249 00250 /* round c.lo if it's less than 1/4 ulp of w */ 00251 00252 quarterulp = twopm54.d*fabs(w); 00253 00254 if ( fabs(c.lo) < quarterulp ) 00255 { 00256 if ( c.lo >= 0.0 ) 00257 { 00258 c.lo = (quarterulp + c.lo) - quarterulp; 00259 } 00260 else 00261 { 00262 c.lo = quarterulp + (c.lo - quarterulp); 00263 } 00264 00265 } 00266 00267 c.hi *= xfactor; 00268 c.lo *= xfactor; 00269 00270 z.hi = c.hi + c.lo; 00271 z.lo = (c.hi - z.hi) + c.lo; 00272 00273 return ( z ); 00274 } 00275 00276 z.lo = 0.0; 00277 z.hi = sqrt(x.hi); 00278 00279 return ( z ); 00280 } 00281