Open64 (mfef90, whirl2f, and IR tools)
TAG: version-openad; SVN changeset: 916
|
00001 /* 00002 00003 Copyright (C) 2000, 2001 Silicon Graphics, Inc. All Rights Reserved. 00004 00005 This program is free software; you can redistribute it and/or modify it 00006 under the terms of version 2 of the GNU General Public License as 00007 published by the Free Software Foundation. 00008 00009 This program is distributed in the hope that it would be useful, but 00010 WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00012 00013 Further, this software is distributed without any warranty that it is 00014 free of the rightful claim of any third person regarding infringement 00015 or the like. Any license provided herein, whether implied or 00016 otherwise, applies only to this software file. Patent licenses, if 00017 any, provided herein do not apply to combinations of this program with 00018 other software, or any other product whatsoever. 00019 00020 You should have received a copy of the GNU General Public License along 00021 with this program; if not, write the Free Software Foundation, Inc., 59 00022 Temple Place - Suite 330, Boston MA 02111-1307, USA. 00023 00024 Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pky, 00025 Mountain View, CA 94043, or: 00026 00027 http://www.sgi.com 00028 00029 For further information regarding this notice, see: 00030 00031 http://oss.sgi.com/projects/GenInfo/NoticeExplan 00032 00033 */ 00034 00035 00036 #include "arith.internal.h" 00037 00038 static AR_IEEE_64 P0 = { 0, 0x400, 0x3, 0xc9b0, 0x5d7f, 0xa527 }; 00039 static AR_IEEE_64 P1 = { 1, 0x400, 0x9, 0x6989, 0x055f, 0x3e6c }; 00040 static AR_IEEE_64 P2 = { 0, 0x400, 0x4, 0xbaa5, 0x10fa, 0x5f82 }; 00041 static AR_IEEE_64 P3 = { 1, 0x3ff, 0x0, 0xeed8, 0x47a3, 0x911c }; 00042 static AR_IEEE_64 P4 = { 0, 0x3fc, 0x5, 0x8567, 0x3a3b, 0xeba9 }; 00043 00044 static AR_IEEE_64 HLF = { 0, 0x3fe, 0, 0, 0, 0 }; 00045 static AR_IEEE_64 ONE = { 0, 0x3ff, 0, 0, 0, 0 }; 00046 00047 int 00048 ar_isqrt64 (AR_IEEE_64 *a, 00049 const AR_IEEE_64 *x, 00050 int roundmode) { 00051 00052 int res = AR_STAT_OK; 00053 int expo; 00054 AR_TYPE int64 = AR_Int_64_S; 00055 AR_IEEE_64 t, u, y; 00056 00057 *a = *x; 00058 00059 /* Test for NaNs and Infs */ 00060 if (x->expo > AR_IEEE64_MAX_EXPO) { 00061 /* For NaNs and Infs, return same */ 00062 if (!IS_IEEE64_NZ_COEFF(x)) { 00063 /* infinity */ 00064 if (x->sign) 00065 res |= AR_STAT_NEGATIVE; 00066 } 00067 return res | AR_STAT_UNDEFINED; 00068 } 00069 00070 /* Test for zero */ 00071 if (x->expo == 0 && !IS_IEEE64_NZ_COEFF(x)) { 00072 if (x->sign) 00073 res |= AR_STAT_NEGATIVE; 00074 return res | AR_STAT_ZERO; 00075 } 00076 00077 /* Test for negative */ 00078 if (x->sign) { 00079 /* negatives yield quiet NaNs */ 00080 a->expo = AR_IEEE64_MAX_EXPO + 1; 00081 if (HOST_IS_MIPS) { 00082 a->coeff0 &= MASKR (AR_IEEE64_C0_BITS - 1); 00083 if (!IS_IEEE64_NZ_COEFF(a)) { 00084 a->coeff0 |= 1; 00085 } 00086 } 00087 else { 00088 a->coeff0 |= (1 << (AR_IEEE64_C0_BITS - 1)); 00089 } 00090 return res | AR_STAT_UNDEFINED; 00091 } 00092 00093 /* Test for denorms (they have zero exponents) */ 00094 if (ar_state_register.ar_denorms_trap && 00095 !x->expo && IS_IEEE64_NZ_COEFF(a)) { 00096 /* operand is a denorm and denorms cause a trap */ 00097 a->expo = AR_IEEE64_MAX_EXPO + 1; 00098 return res | AR_STAT_UNDEFINED; 00099 } 00100 00101 /* Minimax approx for 1/sqrt(x), on the interval u = (0.5,2.0), 00102 with maximum error = -2.11772E-03 */ 00103 00104 u = *x; 00105 00106 /* scale value into [0.5,2.0) */ 00107 expo = u.expo & 0x7fe; 00108 u.expo = (u.expo ^ expo) | 0x3fe; 00109 00110 /* Approximate 1/sqrt(u) = P0 + u*(P1 + u*(P2 + u*(P3 + u*P4))) */ 00111 ar_ifmul64(&y, &u, (AR_IEEE_64*)&P4, roundmode); 00112 ar_ifadd64(&y, (AR_IEEE_64*)&P3, &y, roundmode); 00113 ar_ifmul64(&y, &u, &y, roundmode); 00114 ar_ifadd64(&y, (AR_IEEE_64*)&P2, &y, roundmode); 00115 ar_ifmul64(&y, &u, &y, roundmode); 00116 ar_ifadd64(&y, (AR_IEEE_64*)&P1, &y, roundmode); 00117 ar_ifmul64(&y, &u, &y, roundmode); 00118 ar_ifadd64(&u, (AR_IEEE_64*)&P0, &y, roundmode); 00119 00120 /* scale by 2**(-N) */ 00121 u.expo -= ((expo - 0x3fe)>>1); 00122 u.sign = 0; 00123 00124 /* 2 Newton iterations: u += 0.5*u*(1.0 - u*(u*x)) */ 00125 ar_ifmul64(&y, &u, x, roundmode); 00126 ar_ifmul64(&y, &u, &y, roundmode); 00127 ar_ifsub64(&y, (AR_IEEE_64*)&ONE, &y, roundmode); 00128 ar_ifmul64(&y, &u, &y, roundmode); 00129 ar_ifmul64(&y, (AR_IEEE_64*)&HLF, &y, roundmode); 00130 ar_ifadd64(&u, &u, &y, roundmode); 00131 00132 ar_ifmul64(&y, &u, x, roundmode); 00133 ar_ifmul64(&y, &u, &y, roundmode); 00134 ar_ifsub64(&y, (AR_IEEE_64*)&ONE, &y, roundmode); 00135 ar_ifmul64(&y, &u, &y, roundmode); 00136 ar_ifmul64(&y, (AR_IEEE_64*)&HLF, &y, roundmode); 00137 ar_ifadd64(&u, &u, &y, roundmode); 00138 00139 /* 1 more Newton iteration: u = u*x + 0.5*u*x*(1.0 - u*(u*x)) */ 00140 ar_ifmul64(&y, &u, x, roundmode); 00141 ar_ifmul64(&u, &u, &y, roundmode); 00142 ar_ifsub64(&u, (AR_IEEE_64*)&ONE, &u, roundmode); 00143 ar_ifmul64(&u, &y, &u, roundmode); 00144 ar_ifmul64(&u, (AR_IEEE_64*)&HLF, &u, roundmode); 00145 ar_ifadd64(&u, &y, &u, roundmode); 00146 00147 /* use the Tuckerman test to get the last bit */ 00148 00149 ar_add_integer((ar_data*)&y, &int64, 00150 (ar_data*)&u, &int64, (ar_data*)&AR_const_one, &int64); 00151 ar_ifmul64(&t, &u, &y, AR_ROUND_ZERO); 00152 if(ar_ifcmp64(&t, x) & AR_STAT_NEGATIVE) 00153 u = y; 00154 00155 ar_subtract_integer((ar_data*)&y, &int64, 0, 00156 (ar_data*)&u, &int64, (ar_data*)&AR_const_one, &int64); 00157 ar_ifmul64(&t, &u, &y, AR_ROUND_ZERO); 00158 if(ar_ifcmp64(&t, x) & AR_STAT_NEGATIVE) 00159 *a = u; 00160 else 00161 *a = y; 00162 00163 return AR_STAT_OK; 00164 } 00165 00166 00167 static char USMID [] = "\n%Z%%M% %I% %G% %U%\n"; 00168 static char rcsid [] = "$Id: ieee_sqrt.c,v 1.1.1.1 2002-05-22 20:06:19 dsystem Exp $";