Open64 (mfef90, whirl2f, and IR tools)  TAG: version-openad; SVN changeset: 916
betarget.cxx
Go to the documentation of this file.
00001 /*
00002 
00003   Copyright (C) 2000, 2001 Silicon Graphics, Inc.  All Rights Reserved.
00004 
00005   This program is free software; you can redistribute it and/or modify it
00006   under the terms of version 2 of the GNU General Public License as
00007   published by the Free Software Foundation.
00008 
00009   This program is distributed in the hope that it would be useful, but
00010   WITHOUT ANY WARRANTY; without even the implied warranty of
00011   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
00012 
00013   Further, this software is distributed without any warranty that it is
00014   free of the rightful claim of any third person regarding infringement 
00015   or the like.  Any license provided herein, whether implied or 
00016   otherwise, applies only to this software file.  Patent licenses, if 
00017   any, provided herein do not apply to combinations of this program with 
00018   other software, or any other product whatsoever.  
00019 
00020   You should have received a copy of the GNU General Public License along
00021   with this program; if not, write the Free Software Foundation, Inc., 59
00022   Temple Place - Suite 330, Boston MA 02111-1307, USA.
00023 
00024   Contact information:  Silicon Graphics, Inc., 1600 Amphitheatre Pky,
00025   Mountain View, CA 94043, or:
00026 
00027   http://www.sgi.com
00028 
00029   For further information regarding this notice, see:
00030 
00031   http://oss.sgi.com/projects/GenInfo/NoticeExplan
00032 
00033 */
00034 
00035 
00036 /* ====================================================================
00037  * ====================================================================
00038  *
00039  *
00040  * Description:
00041  *
00042  * Support routines for target-specific functionality.
00043  *
00044  * ====================================================================
00045  * ====================================================================
00046  */
00047 
00048 #include "defs.h"
00049 #include "errors.h"
00050 #include "util.h"
00051 #include "tracing.h"
00052 #include "topcode.h"
00053 #include "wn.h"
00054 #include "opcode.h"
00055 #include "config_targ.h"
00056 #include "targ_isa_lits.h"
00057 #include "betarget.h"
00058 #include "w2op.h"
00059 
00060 BOOL Targ_Lower_Float_To_Unsigned = FALSE;
00061 BOOL Targ_Lower_Unsigned_To_Float = FALSE;
00062 
00063 // largest signed offset possible in small-frame stack model
00064 INT Max_Small_Frame_Offset = 0x1fff;    // 13 bits
00065 
00066 
00067 /* only return machine_ops, TOP_UNDEFINED if not an exact correspondence */
00068 TOP
00069 OPCODE_To_TOP (OPCODE opcode)
00070 {
00071   OPERATOR opr   = OPCODE_operator (opcode);
00072   TYPE_ID  rtype = OPCODE_rtype (opcode);
00073   TYPE_ID  desc  = OPCODE_desc  (opcode);
00074 
00075   switch (opr) {
00076 
00077     case OPR_FORWARD_BARRIER:
00078       return TOP_fwd_bar;
00079 
00080     case OPR_BACKWARD_BARRIER:
00081       return TOP_bwd_bar;
00082 
00083     case OPR_INTRINSIC_CALL:
00084       if (rtype == MTYPE_V) return TOP_intrncall;
00085       else                  return TOP_UNDEFINED;
00086 
00087     case OPR_NEG:
00088            if (rtype == MTYPE_F4 
00089             || rtype == MTYPE_F8) return TOP_fneg;
00090       else                        return TOP_UNDEFINED;
00091 
00092     case OPR_ABS:
00093            if (rtype == MTYPE_F4 
00094             || rtype == MTYPE_F8) return TOP_fabs;
00095       else                        return TOP_UNDEFINED;
00096 
00097     case OPR_PAREN:
00098            if (rtype == MTYPE_F4) return TOP_nop;
00099       else if (rtype == MTYPE_F8) return TOP_nop;
00100       else                        return TOP_UNDEFINED;
00101 
00102     case OPR_PARM:
00103       return TOP_nop;
00104 
00105     case OPR_TRAP:
00106       return TOP_break;
00107 
00108     default:
00109       return TOP_UNDEFINED;
00110   }
00111 }
00112 
00113 /* pick the opcode corresponding to the TAS, which will either
00114  * be a float<->int move or a no-op. */
00115 TOP
00116 TAS_To_TOP (WN *tas_wn)
00117 {
00118   TYPE_ID kid_mtype = WN_rtype(WN_kid0(tas_wn));
00119 
00120   switch (WN_opcode(tas_wn)) {
00121     case OPC_I8TAS:
00122     case OPC_U8TAS:
00123       return MTYPE_float(kid_mtype) ? TOP_getf_d : TOP_nop;
00124     case OPC_I4TAS:
00125     case OPC_U4TAS:
00126       return MTYPE_float(kid_mtype) ? TOP_getf_s : TOP_nop;
00127     case OPC_F8TAS:
00128       return MTYPE_float(kid_mtype) ? TOP_nop : TOP_setf_d;
00129     case OPC_F4TAS:
00130       return MTYPE_float(kid_mtype) ? TOP_nop : TOP_setf_s;
00131     default:
00132       return TOP_UNDEFINED;
00133   }
00134 }
00135 
00136 /* return TRUE if the val is a power of 2 */
00137 /* WARNING:  these routines must be kept in sync with cg's exp_divrem */
00138 #define IS_POWER_OF_2(val)      ((val != 0) && ((val & (val-1)) == 0))
00139 
00140 static BOOL Is_Power_Of_2(INT64 val, TYPE_ID mtype)
00141 {
00142   if (MTYPE_is_signed(mtype) && val < 0)
00143     val=        -val;
00144 
00145   if (mtype == MTYPE_U4)
00146     val &= 0xffffffffull;
00147 
00148   return IS_POWER_OF_2(val);
00149 }
00150 
00151 /* return whether DIV will be translated into shifts */
00152 extern BOOL
00153 Can_Do_Fast_Divide (TYPE_ID mtype, INT64 dividend)
00154 {
00155         if (Is_Power_Of_2(dividend, mtype)) {
00156                 return TRUE;
00157         }
00158         return FALSE;
00159 }
00160 
00161 /* return whether REM or MOD will be translated into shifts */
00162 extern BOOL
00163 Can_Do_Fast_Remainder (TYPE_ID mtype, INT64 dividend)
00164 {
00165         if (Is_Power_Of_2(dividend, mtype)) {
00166                 return TRUE;
00167         }
00168         return FALSE;
00169 }
00170 
00171 
00172 /* ====================================================================
00173  *
00174  * Multiply_Limit
00175  * Divide_Limit
00176  *
00177  * When trying to convert a multiply or divide operation into a series
00178  * of shifts/adds/subtracts, there is some limit (cycles? ops?) at
00179  * which the conversion is not profitable.  Return that limit.
00180  * The number of cycles should be the mult latency + mflo cycle.
00181  * TODO: Revisit these limits to check for I-cache effects, timing.
00182  *
00183  * ====================================================================
00184  */
00185 extern INT
00186 Multiply_Limit( BOOL is_64bit, INT64 val)
00187 {
00188   INT limit = 0;
00189 
00190   if (is_64bit) { /* 64-bit multiply */
00191     switch( Target ) {
00192     case TARGET_ITANIUM:        limit = 14; break;
00193     default:            limit = 14; break;
00194     }
00195   } else { /* 32-bit multiply */
00196     switch( Target ) {
00197     case TARGET_ITANIUM:        limit = 14; break;
00198     default:            limit = 14; break;
00199     }
00200   }
00201   return limit;
00202 }
00203 
00204 extern INT
00205 Divide_Limit ( BOOL is_64bit)
00206 {
00207   INT limit = 0;
00208   if (is_64bit) { /* 64-bit divide */
00209     switch( Target ) {
00210     case TARGET_ITANIUM:        limit = 50; break;
00211     default:            limit = 50; break;
00212     }
00213   } else { /* 32-bit divide */
00214     switch( Target ) {
00215     case TARGET_ITANIUM:        limit = 50; break;
00216     default:            limit = 50; break;
00217     }
00218   }
00219   return ( limit );
00220 }
00221 
00222 /* Count # instructions needed to do multiply with shifts and adds.
00223  * NOTE:  this routine must stay in sync with cg's Expand_Multiply_Into_Shifts.
00224  * See that routine for an explanation of the algorithm. */
00225 static INT
00226 Count_Multiply_Shifts (TARG_UINT constant)
00227 {
00228   switch (constant) {
00229   case 0:
00230   case 1:
00231   case 2:
00232         return 1;
00233   default:
00234     if ((constant % 2) == 1) {          /* odd */
00235         if ((constant & 2) != 0)
00236                 return 1 + Count_Multiply_Shifts (constant+1);
00237         else
00238                 return 1 + Count_Multiply_Shifts (constant-1);
00239     }
00240     else {                              /* even */
00241         while ((constant % 2) == 0) {   /* even */
00242                 constant = (TARG_UINT)constant >> 1;
00243         }
00244         if (constant == 1)
00245                 return 1;
00246         else
00247                 return 1 + Count_Multiply_Shifts (constant);
00248     }
00249   }
00250 }
00251 
00252 /* return whether MPY will be translated into shifts and adds */
00253 /* NOTE:  this routine must stay in sync with cg's Expand_Multiply */
00254 extern BOOL
00255 Can_Do_Fast_Multiply (TYPE_ID mtype, INT64 val)
00256 {
00257   INT limit;
00258   INT num = 0;
00259   
00260   // Count the number of 1 bits
00261   INT num_ones=0;
00262   INT num_ones_neg=0;
00263   
00264   if (val < 0) val = -val;
00265 
00266   UINT64 uc=val;
00267   while (uc) {num_ones += (uc&1); uc >>= 1;}
00268   
00269   uc = val;
00270   if (uc <= 63) return (TRUE);
00271 
00272   if (num_ones == 1) return (TRUE); // Anything with this few ones is fair game
00273   while ((uc & 1) == 0) {uc >>= 1;}
00274   // Another special case, 2**N - 1
00275   if (uc == ((1<<num_ones)-1)) return (TRUE);
00276   //
00277   // The remaining piece we need to see how to multiply. 
00278   // 
00279   // Using shladd we can always multiply no worse than:
00280   //
00281   // <=6 bits - 4/3  (4 instructions, dependence height of 3)
00282   // 7,8      - 7/3  (do as 2 groups of 4 combine with shladd)
00283   // 9,10     - 8/4  (do as group of 6 and 4 combine with shladd)
00284   // 11-16    - 16/5 (do as two groups of 8, combine with pair of shladds)
00285   //
00286   if (OPT_Space) return FALSE;  // These may eat up a bunch of instructions
00287   if (uc <= 65535) return TRUE;
00288   return FALSE;
00289 }
00290 
00291 
00292 /*
00293  */
00294 INT Copy_Quantum_Ratio(void)
00295 {
00296   INT32  ratio;
00297 
00298   //  Lmt_DevWarn(1, ("Copy_Quantum_Ratio needs work"));
00299   // I don't think it still needs work
00300   switch(Target) {
00301   case TARGET_ITANIUM:  ratio=  4; break;
00302   default:              ratio=  4; break;
00303   }
00304 
00305   return ratio;    
00306 }
00307 
00308 /* Does <val> fit in a signed word of length <bits>?
00309  */
00310 inline BOOL Is_Signed_Bits(INT64 val, INT bits)
00311 {
00312   INT64 hibit = 1LL << (bits - 1);
00313   return val >= -hibit && val <= (hibit - 1);
00314 }
00315 
00316 
00317 /* Does <val> fit in a unsigned word of length <bits>?
00318  */
00319 inline BOOL Is_Unsigned_Bits(UINT64 val, INT bits)
00320 {
00321   return val < (1ULL << bits);
00322 }
00323 
00324 
00325 /* Indicate if the specified operation can have an immediate operand.
00326  */
00327 BOOL Can_Be_Immediate(OPERATOR opr,
00328                       INT64 val,
00329                       TYPE_ID dtype,
00330                       INT whichkid,
00331                       ST *stid_st)
00332 {
00333   // for IA64 targets, the value 0 is a dedicated register
00334   if (val == 0) return TRUE;
00335 
00336   switch (opr) {
00337   case OPR_AGOTO:       // leave a constant condition here alone
00338   case OPR_LOOP_INFO:   // leave the constant trip-count alone
00339     return TRUE;
00340     
00341   case OPR_EQ:
00342   case OPR_NE:
00343   case OPR_GE:
00344   case OPR_GT:
00345   case OPR_LE:
00346   case OPR_LT:
00347   case OPR_MAX:         // treat as OPR_LT without regard to which kid
00348   case OPR_MIN:         // treat as OPR_LT without regard to which kid
00349     // the range of immediates for ia64 is complicated, but basically
00350     // an 8-bit literal is sign-extended and then its value is
00351     // taken in the context of the data size and type.
00352     if (!MTYPE_is_size_double(dtype)) val = (INT32)val;
00353 
00354     switch (opr) {
00355     case OPR_LE:
00356     case OPR_GT:
00357       whichkid = !whichkid;
00358       /*FALLTHROUGH*/
00359     case OPR_LT:
00360     case OPR_GE:
00361       if (whichkid == 1) val = val - 1;
00362       break;
00363     }
00364 
00365     return Is_Signed_Bits(val, 8);
00366 
00367   case OPR_ASHR:
00368   case OPR_LSHR:
00369   case OPR_MLOAD:
00370   case OPR_SHL:
00371     // can handle any constant as long as it's the second kid
00372     return whichkid == 1;
00373 
00374   case OPR_MSTORE:
00375     // can handle any constant as long as it's the third kid
00376     return whichkid == 2;
00377 
00378   case OPR_BAND:
00379   case OPR_BIOR:
00380   case OPR_BXOR:
00381     // can the value fit in signed 8 bits?
00382     return Is_Signed_Bits(val, 8);
00383 
00384   case OPR_SUB:
00385     // If the constant is kid #0 (left operand) then we can use
00386     // the subtract instruction which takes an immediate 8-bit first/left
00387     // operand. If the constant is kid #1 (right operand) then
00388     // we can negate the constant and use an add.
00389     if (whichkid == 0) return Is_Signed_Bits(val, 8);
00390     return Is_Signed_Bits(-val, 14);
00391 
00392   case OPR_ADD:
00393     // can the value fit in signed 14 bits?
00394     return Is_Signed_Bits(val, 14);
00395 
00396   case OPR_DIV:
00397     // can the second kid be handled with shifts?
00398     return whichkid == 1 && Can_Do_Fast_Divide(dtype, val);
00399 
00400   case OPR_REM:
00401   case OPR_MOD:
00402     // can the second kid be handled with shifts?
00403     return whichkid == 1 && Can_Do_Fast_Remainder(dtype, val);
00404 
00405   case OPR_DIVREM:
00406     // can the second kid be handled with shifts?
00407     return    whichkid == 1 
00408            && Can_Do_Fast_Remainder(dtype, val)
00409            && Can_Do_Fast_Divide(dtype, val);
00410 
00411   case OPR_MPY:
00412     // can the value be handled with shifts?
00413     return Can_Do_Fast_Multiply(dtype, val);
00414     
00415   case OPR_SELECT:
00416     // The select will be expanded as a pair of predicated moves.
00417     // Therefore the constant we can handle is determined by 'mov',
00418     // i.e. can value fit in signed 22 bits?
00419     return whichkid > 0 && Is_Signed_Bits(val, 22);
00420 
00421   case OPR_CALL:
00422   case OPR_ICALL:
00423   case OPR_INTRINSIC_CALL:
00424   case OPR_PARM:
00425     // calls end up storing their constant parameters to dedicated
00426     // registers, which can be quicker if they're left in place as
00427     // ldimm's.
00428     return TRUE;
00429     
00430   case OPR_STID:
00431     // is this in a store to a register, which usually
00432     // means in preparation for a call, or return value, so just 
00433     // let us generate the stid/load-immediate in place if it fits
00434     return ST_class(stid_st) == CLASS_PREG;
00435   }
00436 
00437   return FALSE;
00438 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines