Open64 (mfef90, whirl2f, and IR tools)  TAG: version-openad; SVN changeset: 916
ieee_sqrt.c
Go to the documentation of this file.
00001 /*
00002 
00003   Copyright (C) 2000, 2001 Silicon Graphics, Inc.  All Rights Reserved.
00004 
00005   This program is free software; you can redistribute it and/or modify it
00006   under the terms of version 2 of the GNU General Public License as
00007   published by the Free Software Foundation.
00008 
00009   This program is distributed in the hope that it would be useful, but
00010   WITHOUT ANY WARRANTY; without even the implied warranty of
00011   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
00012 
00013   Further, this software is distributed without any warranty that it is
00014   free of the rightful claim of any third person regarding infringement 
00015   or the like.  Any license provided herein, whether implied or 
00016   otherwise, applies only to this software file.  Patent licenses, if 
00017   any, provided herein do not apply to combinations of this program with 
00018   other software, or any other product whatsoever.  
00019 
00020   You should have received a copy of the GNU General Public License along
00021   with this program; if not, write the Free Software Foundation, Inc., 59
00022   Temple Place - Suite 330, Boston MA 02111-1307, USA.
00023 
00024   Contact information:  Silicon Graphics, Inc., 1600 Amphitheatre Pky,
00025   Mountain View, CA 94043, or:
00026 
00027   http://www.sgi.com
00028 
00029   For further information regarding this notice, see:
00030 
00031   http://oss.sgi.com/projects/GenInfo/NoticeExplan
00032 
00033 */
00034 
00035 
00036 #include "arith.internal.h"
00037 
00038 static AR_IEEE_64 P0 = { 0, 0x400, 0x3, 0xc9b0, 0x5d7f, 0xa527 };
00039 static AR_IEEE_64 P1 = { 1, 0x400, 0x9, 0x6989, 0x055f, 0x3e6c };
00040 static AR_IEEE_64 P2 = { 0, 0x400, 0x4, 0xbaa5, 0x10fa, 0x5f82 };
00041 static AR_IEEE_64 P3 = { 1, 0x3ff, 0x0, 0xeed8, 0x47a3, 0x911c };
00042 static AR_IEEE_64 P4 = { 0, 0x3fc, 0x5, 0x8567, 0x3a3b, 0xeba9 };
00043 
00044 static AR_IEEE_64 HLF = { 0, 0x3fe, 0, 0, 0, 0 };
00045 static AR_IEEE_64 ONE = { 0, 0x3ff, 0, 0, 0, 0 };
00046 
00047 int
00048 ar_isqrt64 (AR_IEEE_64 *a,
00049                         const AR_IEEE_64 *x,
00050                         int roundmode) {
00051 
00052         int res = AR_STAT_OK;
00053         int expo;
00054         AR_TYPE int64  = AR_Int_64_S;
00055         AR_IEEE_64 t, u, y;
00056  
00057         *a = *x;
00058 
00059         /* Test for NaNs and Infs */
00060         if (x->expo > AR_IEEE64_MAX_EXPO) {
00061                 /* For NaNs and Infs, return same */
00062                 if (!IS_IEEE64_NZ_COEFF(x)) {
00063                         /* infinity */
00064                         if (x->sign)
00065                                 res |= AR_STAT_NEGATIVE;
00066                 }
00067                 return res | AR_STAT_UNDEFINED;
00068         }
00069 
00070         /* Test for zero */
00071         if (x->expo == 0 && !IS_IEEE64_NZ_COEFF(x)) {
00072                 if (x->sign)
00073                         res |= AR_STAT_NEGATIVE;
00074                 return res | AR_STAT_ZERO;
00075         }
00076 
00077         /* Test for negative */
00078         if (x->sign) {
00079                 /* negatives yield quiet NaNs */
00080                 a->expo = AR_IEEE64_MAX_EXPO + 1;
00081                 if (HOST_IS_MIPS) {
00082                         a->coeff0 &= MASKR (AR_IEEE64_C0_BITS - 1);
00083                         if (!IS_IEEE64_NZ_COEFF(a)) {
00084                                 a->coeff0 |= 1;
00085                         }
00086                 }
00087                 else {
00088                         a->coeff0 |= (1 << (AR_IEEE64_C0_BITS - 1));
00089                 }
00090                 return res | AR_STAT_UNDEFINED;
00091         }
00092 
00093         /* Test for denorms (they have zero exponents) */
00094         if (ar_state_register.ar_denorms_trap &&
00095                 !x->expo && IS_IEEE64_NZ_COEFF(a)) {
00096                 /* operand is a denorm and denorms cause a trap */
00097                 a->expo = AR_IEEE64_MAX_EXPO + 1;
00098                 return res | AR_STAT_UNDEFINED;
00099         }
00100 
00101         /*  Minimax approx for 1/sqrt(x), on the interval u = (0.5,2.0), 
00102             with maximum error = -2.11772E-03 */
00103 
00104         u = *x;
00105 
00106         /* scale value into [0.5,2.0) */
00107         expo = u.expo & 0x7fe;
00108         u.expo = (u.expo ^ expo) | 0x3fe;
00109 
00110         /* Approximate 1/sqrt(u) = P0 + u*(P1 + u*(P2 + u*(P3 + u*P4))) */
00111         ar_ifmul64(&y, &u, (AR_IEEE_64*)&P4, roundmode);
00112         ar_ifadd64(&y, (AR_IEEE_64*)&P3, &y, roundmode);
00113         ar_ifmul64(&y, &u,  &y, roundmode);
00114         ar_ifadd64(&y, (AR_IEEE_64*)&P2, &y, roundmode);
00115         ar_ifmul64(&y, &u,  &y, roundmode);
00116         ar_ifadd64(&y, (AR_IEEE_64*)&P1, &y, roundmode);
00117         ar_ifmul64(&y, &u,  &y, roundmode);
00118         ar_ifadd64(&u, (AR_IEEE_64*)&P0, &y, roundmode);
00119 
00120         /* scale by 2**(-N) */
00121         u.expo -= ((expo - 0x3fe)>>1);
00122         u.sign = 0;
00123 
00124         /* 2 Newton iterations:  u += 0.5*u*(1.0 - u*(u*x)) */
00125         ar_ifmul64(&y,   &u,  x, roundmode);
00126         ar_ifmul64(&y,   &u, &y, roundmode);
00127         ar_ifsub64(&y, (AR_IEEE_64*)&ONE, &y, roundmode);
00128         ar_ifmul64(&y,   &u, &y, roundmode);
00129         ar_ifmul64(&y, (AR_IEEE_64*)&HLF, &y, roundmode);
00130         ar_ifadd64(&u,   &u, &y, roundmode);
00131 
00132         ar_ifmul64(&y,   &u,  x, roundmode);
00133         ar_ifmul64(&y,   &u, &y, roundmode);
00134         ar_ifsub64(&y, (AR_IEEE_64*)&ONE, &y, roundmode);
00135         ar_ifmul64(&y,   &u, &y, roundmode);
00136         ar_ifmul64(&y, (AR_IEEE_64*)&HLF, &y, roundmode);
00137         ar_ifadd64(&u,   &u, &y, roundmode);
00138 
00139         /* 1 more Newton iteration:  u = u*x + 0.5*u*x*(1.0 - u*(u*x)) */
00140         ar_ifmul64(&y,   &u,  x, roundmode);
00141         ar_ifmul64(&u,   &u, &y, roundmode);
00142         ar_ifsub64(&u, (AR_IEEE_64*)&ONE, &u, roundmode);
00143         ar_ifmul64(&u,   &y, &u, roundmode);
00144         ar_ifmul64(&u, (AR_IEEE_64*)&HLF, &u, roundmode);
00145         ar_ifadd64(&u,   &y, &u, roundmode);
00146 
00147         /* use the Tuckerman test to get the last bit */
00148 
00149         ar_add_integer((ar_data*)&y, &int64,
00150                                    (ar_data*)&u, &int64, (ar_data*)&AR_const_one, &int64);
00151         ar_ifmul64(&t, &u, &y, AR_ROUND_ZERO);
00152         if(ar_ifcmp64(&t, x) & AR_STAT_NEGATIVE)
00153                 u = y;
00154 
00155         ar_subtract_integer((ar_data*)&y, &int64, 0,
00156                                    (ar_data*)&u, &int64, (ar_data*)&AR_const_one, &int64);
00157         ar_ifmul64(&t, &u, &y, AR_ROUND_ZERO);
00158         if(ar_ifcmp64(&t, x) & AR_STAT_NEGATIVE)
00159                 *a = u;
00160         else
00161                 *a = y;
00162 
00163         return AR_STAT_OK;
00164 }
00165 
00166 
00167 static char USMID [] = "\n%Z%%M%        %I%     %G% %U%\n";
00168 static char rcsid [] = "$Id: ieee_sqrt.c,v 1.1.1.1 2002-05-22 20:06:19 dsystem Exp $";
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines