Open64 (mfef90, whirl2f, and IR tools)
TAG: version-openad; SVN changeset: 916
|
00001 /* 00002 00003 Copyright (C) 2000, 2001 Silicon Graphics, Inc. All Rights Reserved. 00004 00005 This program is free software; you can redistribute it and/or modify it 00006 under the terms of version 2 of the GNU General Public License as 00007 published by the Free Software Foundation. 00008 00009 This program is distributed in the hope that it would be useful, but 00010 WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00012 00013 Further, this software is distributed without any warranty that it is 00014 free of the rightful claim of any third person regarding infringement 00015 or the like. Any license provided herein, whether implied or 00016 otherwise, applies only to this software file. Patent licenses, if 00017 any, provided herein do not apply to combinations of this program with 00018 other software, or any other product whatsoever. 00019 00020 You should have received a copy of the GNU General Public License along 00021 with this program; if not, write the Free Software Foundation, Inc., 59 00022 Temple Place - Suite 330, Boston MA 02111-1307, USA. 00023 00024 Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pky, 00025 Mountain View, CA 94043, or: 00026 00027 http://www.sgi.com 00028 00029 For further information regarding this notice, see: 00030 00031 http://oss.sgi.com/projects/GenInfo/NoticeExplan 00032 00033 */ 00034 00035 00036 /* ==================================================================== 00037 * ==================================================================== 00038 * 00039 * 00040 * Description: 00041 * 00042 * Support routines for target-specific functionality. 00043 * 00044 * ==================================================================== 00045 * ==================================================================== 00046 */ 00047 00048 #include "defs.h" 00049 #include "errors.h" 00050 #include "util.h" 00051 #include "tracing.h" 00052 #include "topcode.h" 00053 #include "wn.h" 00054 #include "opcode.h" 00055 #include "config_targ.h" 00056 #include "targ_isa_lits.h" 00057 #include "betarget.h" 00058 #include "w2op.h" 00059 00060 BOOL Targ_Lower_Float_To_Unsigned = FALSE; 00061 BOOL Targ_Lower_Unsigned_To_Float = FALSE; 00062 00063 // largest signed offset possible in small-frame stack model 00064 INT Max_Small_Frame_Offset = 0x1fff; // 13 bits 00065 00066 00067 /* only return machine_ops, TOP_UNDEFINED if not an exact correspondence */ 00068 TOP 00069 OPCODE_To_TOP (OPCODE opcode) 00070 { 00071 OPERATOR opr = OPCODE_operator (opcode); 00072 TYPE_ID rtype = OPCODE_rtype (opcode); 00073 TYPE_ID desc = OPCODE_desc (opcode); 00074 00075 switch (opr) { 00076 00077 case OPR_FORWARD_BARRIER: 00078 return TOP_fwd_bar; 00079 00080 case OPR_BACKWARD_BARRIER: 00081 return TOP_bwd_bar; 00082 00083 case OPR_INTRINSIC_CALL: 00084 if (rtype == MTYPE_V) return TOP_intrncall; 00085 else return TOP_UNDEFINED; 00086 00087 case OPR_NEG: 00088 if (rtype == MTYPE_F4 00089 || rtype == MTYPE_F8) return TOP_fneg; 00090 else return TOP_UNDEFINED; 00091 00092 case OPR_ABS: 00093 if (rtype == MTYPE_F4 00094 || rtype == MTYPE_F8) return TOP_fabs; 00095 else return TOP_UNDEFINED; 00096 00097 case OPR_PAREN: 00098 if (rtype == MTYPE_F4) return TOP_nop; 00099 else if (rtype == MTYPE_F8) return TOP_nop; 00100 else return TOP_UNDEFINED; 00101 00102 case OPR_PARM: 00103 return TOP_nop; 00104 00105 case OPR_TRAP: 00106 return TOP_break; 00107 00108 default: 00109 return TOP_UNDEFINED; 00110 } 00111 } 00112 00113 /* pick the opcode corresponding to the TAS, which will either 00114 * be a float<->int move or a no-op. */ 00115 TOP 00116 TAS_To_TOP (WN *tas_wn) 00117 { 00118 TYPE_ID kid_mtype = WN_rtype(WN_kid0(tas_wn)); 00119 00120 switch (WN_opcode(tas_wn)) { 00121 case OPC_I8TAS: 00122 case OPC_U8TAS: 00123 return MTYPE_float(kid_mtype) ? TOP_getf_d : TOP_nop; 00124 case OPC_I4TAS: 00125 case OPC_U4TAS: 00126 return MTYPE_float(kid_mtype) ? TOP_getf_s : TOP_nop; 00127 case OPC_F8TAS: 00128 return MTYPE_float(kid_mtype) ? TOP_nop : TOP_setf_d; 00129 case OPC_F4TAS: 00130 return MTYPE_float(kid_mtype) ? TOP_nop : TOP_setf_s; 00131 default: 00132 return TOP_UNDEFINED; 00133 } 00134 } 00135 00136 /* return TRUE if the val is a power of 2 */ 00137 /* WARNING: these routines must be kept in sync with cg's exp_divrem */ 00138 #define IS_POWER_OF_2(val) ((val != 0) && ((val & (val-1)) == 0)) 00139 00140 static BOOL Is_Power_Of_2(INT64 val, TYPE_ID mtype) 00141 { 00142 if (MTYPE_is_signed(mtype) && val < 0) 00143 val= -val; 00144 00145 if (mtype == MTYPE_U4) 00146 val &= 0xffffffffull; 00147 00148 return IS_POWER_OF_2(val); 00149 } 00150 00151 /* return whether DIV will be translated into shifts */ 00152 extern BOOL 00153 Can_Do_Fast_Divide (TYPE_ID mtype, INT64 dividend) 00154 { 00155 if (Is_Power_Of_2(dividend, mtype)) { 00156 return TRUE; 00157 } 00158 return FALSE; 00159 } 00160 00161 /* return whether REM or MOD will be translated into shifts */ 00162 extern BOOL 00163 Can_Do_Fast_Remainder (TYPE_ID mtype, INT64 dividend) 00164 { 00165 if (Is_Power_Of_2(dividend, mtype)) { 00166 return TRUE; 00167 } 00168 return FALSE; 00169 } 00170 00171 00172 /* ==================================================================== 00173 * 00174 * Multiply_Limit 00175 * Divide_Limit 00176 * 00177 * When trying to convert a multiply or divide operation into a series 00178 * of shifts/adds/subtracts, there is some limit (cycles? ops?) at 00179 * which the conversion is not profitable. Return that limit. 00180 * The number of cycles should be the mult latency + mflo cycle. 00181 * TODO: Revisit these limits to check for I-cache effects, timing. 00182 * 00183 * ==================================================================== 00184 */ 00185 extern INT 00186 Multiply_Limit( BOOL is_64bit, INT64 val) 00187 { 00188 INT limit = 0; 00189 00190 if (is_64bit) { /* 64-bit multiply */ 00191 switch( Target ) { 00192 case TARGET_ITANIUM: limit = 14; break; 00193 default: limit = 14; break; 00194 } 00195 } else { /* 32-bit multiply */ 00196 switch( Target ) { 00197 case TARGET_ITANIUM: limit = 14; break; 00198 default: limit = 14; break; 00199 } 00200 } 00201 return limit; 00202 } 00203 00204 extern INT 00205 Divide_Limit ( BOOL is_64bit) 00206 { 00207 INT limit = 0; 00208 if (is_64bit) { /* 64-bit divide */ 00209 switch( Target ) { 00210 case TARGET_ITANIUM: limit = 50; break; 00211 default: limit = 50; break; 00212 } 00213 } else { /* 32-bit divide */ 00214 switch( Target ) { 00215 case TARGET_ITANIUM: limit = 50; break; 00216 default: limit = 50; break; 00217 } 00218 } 00219 return ( limit ); 00220 } 00221 00222 /* Count # instructions needed to do multiply with shifts and adds. 00223 * NOTE: this routine must stay in sync with cg's Expand_Multiply_Into_Shifts. 00224 * See that routine for an explanation of the algorithm. */ 00225 static INT 00226 Count_Multiply_Shifts (TARG_UINT constant) 00227 { 00228 switch (constant) { 00229 case 0: 00230 case 1: 00231 case 2: 00232 return 1; 00233 default: 00234 if ((constant % 2) == 1) { /* odd */ 00235 if ((constant & 2) != 0) 00236 return 1 + Count_Multiply_Shifts (constant+1); 00237 else 00238 return 1 + Count_Multiply_Shifts (constant-1); 00239 } 00240 else { /* even */ 00241 while ((constant % 2) == 0) { /* even */ 00242 constant = (TARG_UINT)constant >> 1; 00243 } 00244 if (constant == 1) 00245 return 1; 00246 else 00247 return 1 + Count_Multiply_Shifts (constant); 00248 } 00249 } 00250 } 00251 00252 /* return whether MPY will be translated into shifts and adds */ 00253 /* NOTE: this routine must stay in sync with cg's Expand_Multiply */ 00254 extern BOOL 00255 Can_Do_Fast_Multiply (TYPE_ID mtype, INT64 val) 00256 { 00257 INT limit; 00258 INT num = 0; 00259 00260 // Count the number of 1 bits 00261 INT num_ones=0; 00262 INT num_ones_neg=0; 00263 00264 if (val < 0) val = -val; 00265 00266 UINT64 uc=val; 00267 while (uc) {num_ones += (uc&1); uc >>= 1;} 00268 00269 uc = val; 00270 if (uc <= 63) return (TRUE); 00271 00272 if (num_ones == 1) return (TRUE); // Anything with this few ones is fair game 00273 while ((uc & 1) == 0) {uc >>= 1;} 00274 // Another special case, 2**N - 1 00275 if (uc == ((1<<num_ones)-1)) return (TRUE); 00276 // 00277 // The remaining piece we need to see how to multiply. 00278 // 00279 // Using shladd we can always multiply no worse than: 00280 // 00281 // <=6 bits - 4/3 (4 instructions, dependence height of 3) 00282 // 7,8 - 7/3 (do as 2 groups of 4 combine with shladd) 00283 // 9,10 - 8/4 (do as group of 6 and 4 combine with shladd) 00284 // 11-16 - 16/5 (do as two groups of 8, combine with pair of shladds) 00285 // 00286 if (OPT_Space) return FALSE; // These may eat up a bunch of instructions 00287 if (uc <= 65535) return TRUE; 00288 return FALSE; 00289 } 00290 00291 00292 /* 00293 */ 00294 INT Copy_Quantum_Ratio(void) 00295 { 00296 INT32 ratio; 00297 00298 // Lmt_DevWarn(1, ("Copy_Quantum_Ratio needs work")); 00299 // I don't think it still needs work 00300 switch(Target) { 00301 case TARGET_ITANIUM: ratio= 4; break; 00302 default: ratio= 4; break; 00303 } 00304 00305 return ratio; 00306 } 00307 00308 /* Does <val> fit in a signed word of length <bits>? 00309 */ 00310 inline BOOL Is_Signed_Bits(INT64 val, INT bits) 00311 { 00312 INT64 hibit = 1LL << (bits - 1); 00313 return val >= -hibit && val <= (hibit - 1); 00314 } 00315 00316 00317 /* Does <val> fit in a unsigned word of length <bits>? 00318 */ 00319 inline BOOL Is_Unsigned_Bits(UINT64 val, INT bits) 00320 { 00321 return val < (1ULL << bits); 00322 } 00323 00324 00325 /* Indicate if the specified operation can have an immediate operand. 00326 */ 00327 BOOL Can_Be_Immediate(OPERATOR opr, 00328 INT64 val, 00329 TYPE_ID dtype, 00330 INT whichkid, 00331 ST *stid_st) 00332 { 00333 // for IA64 targets, the value 0 is a dedicated register 00334 if (val == 0) return TRUE; 00335 00336 switch (opr) { 00337 case OPR_AGOTO: // leave a constant condition here alone 00338 case OPR_LOOP_INFO: // leave the constant trip-count alone 00339 return TRUE; 00340 00341 case OPR_EQ: 00342 case OPR_NE: 00343 case OPR_GE: 00344 case OPR_GT: 00345 case OPR_LE: 00346 case OPR_LT: 00347 case OPR_MAX: // treat as OPR_LT without regard to which kid 00348 case OPR_MIN: // treat as OPR_LT without regard to which kid 00349 // the range of immediates for ia64 is complicated, but basically 00350 // an 8-bit literal is sign-extended and then its value is 00351 // taken in the context of the data size and type. 00352 if (!MTYPE_is_size_double(dtype)) val = (INT32)val; 00353 00354 switch (opr) { 00355 case OPR_LE: 00356 case OPR_GT: 00357 whichkid = !whichkid; 00358 /*FALLTHROUGH*/ 00359 case OPR_LT: 00360 case OPR_GE: 00361 if (whichkid == 1) val = val - 1; 00362 break; 00363 } 00364 00365 return Is_Signed_Bits(val, 8); 00366 00367 case OPR_ASHR: 00368 case OPR_LSHR: 00369 case OPR_MLOAD: 00370 case OPR_SHL: 00371 // can handle any constant as long as it's the second kid 00372 return whichkid == 1; 00373 00374 case OPR_MSTORE: 00375 // can handle any constant as long as it's the third kid 00376 return whichkid == 2; 00377 00378 case OPR_BAND: 00379 case OPR_BIOR: 00380 case OPR_BXOR: 00381 // can the value fit in signed 8 bits? 00382 return Is_Signed_Bits(val, 8); 00383 00384 case OPR_SUB: 00385 // If the constant is kid #0 (left operand) then we can use 00386 // the subtract instruction which takes an immediate 8-bit first/left 00387 // operand. If the constant is kid #1 (right operand) then 00388 // we can negate the constant and use an add. 00389 if (whichkid == 0) return Is_Signed_Bits(val, 8); 00390 return Is_Signed_Bits(-val, 14); 00391 00392 case OPR_ADD: 00393 // can the value fit in signed 14 bits? 00394 return Is_Signed_Bits(val, 14); 00395 00396 case OPR_DIV: 00397 // can the second kid be handled with shifts? 00398 return whichkid == 1 && Can_Do_Fast_Divide(dtype, val); 00399 00400 case OPR_REM: 00401 case OPR_MOD: 00402 // can the second kid be handled with shifts? 00403 return whichkid == 1 && Can_Do_Fast_Remainder(dtype, val); 00404 00405 case OPR_DIVREM: 00406 // can the second kid be handled with shifts? 00407 return whichkid == 1 00408 && Can_Do_Fast_Remainder(dtype, val) 00409 && Can_Do_Fast_Divide(dtype, val); 00410 00411 case OPR_MPY: 00412 // can the value be handled with shifts? 00413 return Can_Do_Fast_Multiply(dtype, val); 00414 00415 case OPR_SELECT: 00416 // The select will be expanded as a pair of predicated moves. 00417 // Therefore the constant we can handle is determined by 'mov', 00418 // i.e. can value fit in signed 22 bits? 00419 return whichkid > 0 && Is_Signed_Bits(val, 22); 00420 00421 case OPR_CALL: 00422 case OPR_ICALL: 00423 case OPR_INTRINSIC_CALL: 00424 case OPR_PARM: 00425 // calls end up storing their constant parameters to dedicated 00426 // registers, which can be quicker if they're left in place as 00427 // ldimm's. 00428 return TRUE; 00429 00430 case OPR_STID: 00431 // is this in a store to a register, which usually 00432 // means in preparation for a call, or return value, so just 00433 // let us generate the stid/load-immediate in place if it fits 00434 return ST_class(stid_st) == CLASS_PREG; 00435 } 00436 00437 return FALSE; 00438 }