Open64 (mfef90, whirl2f, and IR tools)  TAG: version-openad; SVN changeset: 916
strtab.cxx
Go to the documentation of this file.
00001 /*
00002 
00003 Copyright (C) 2000, 2001 Silicon Graphics, Inc.  All Rights Reserved.
00004 
00005 This program is free software; you can redistribute it and/or modify it
00006 under the terms of version 2 of the GNU General Public License as
00007 published by the Free Software Foundation.
00008 
00009 This program is distributed in the hope that it would be useful, but
00010 WITHOUT ANY WARRANTY; without even the implied warranty of
00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
00012 
00013 Further, this software is distributed without any warranty that it is
00014 free of the rightful claim of any third person regarding infringement 
00015 or the like.  Any license provided herein, whether implied or 
00016 otherwise, applies only to this software file.  Patent licenses, if 
00017 any, provided herein do not apply to combinations of this program with 
00018 other software, or any other product whatsoever.  
00019 
00020 You should have received a copy of the GNU General Public License along
00021 with this program; if not, write the Free Software Foundation, Inc., 59
00022 Temple Place - Suite 330, Boston MA 02111-1307, USA.
00023 
00024 Contact information:  Silicon Graphics, Inc., 1600 Amphitheatre Pky,
00025 Mountain View, CA 94043, or:
00026 
00027 http://www.sgi.com
00028 
00029 For further information regarding this notice, see:
00030 
00031 http://oss.sgi.com/projects/GenInfo/NoticeExplan
00032 
00033 */
00034 
00035 
00036 
00037 #ifdef USE_PCH
00038 #include "common_com_pch.h"
00039 #endif /* USE_PCH */
00040 #pragma hdrstop
00041 
00042 #include <alloca.h>
00043 #include <string.h> /* for memmove() */
00044 
00045 #include <map>
00046 #include "HashTable.h"
00047 using namespace stlCompatibility;
00048 
00049 
00050 #ifdef _SGI_SGI
00051 #include <utility>    // for pair< > needed by sgi CC 
00052 #endif
00053 
00054 using namespace std;
00055 
00056 #include "defs.h"
00057 #include "errors.h"
00058 #include "cxx_memory.h"                 // for CXX_NEW
00059 
00060 #include "strtab.h"
00061 
00062 // The string table is implmeneted as a single character buffer, re-allocated
00063 // if necessary.  Byte 0 of this buffer is alwasy NULL.  Each string is
00064 // copied to the buffer once.  Duplicates are not entered.  A simple hash
00065 // table is used to for quick lookup.  The hash table is a fixed size
00066 // array of STR_BUCKET, each of which a dynamic array of <idx,size> pair.
00067 // The size field is truncated to 8 bits.  It is used for 
00068 // quick comparision to minimize the need to call strcmp.
00069 
00070 // we support two types of strings: C-style null-terminated string and
00071 // character array with a specified size (may contain null character within
00072 // the array.
00073 
00074 typedef UINT32 STR_INDEX;
00075 
00076 #ifdef linux
00077 
00078 #define STR_INDEX_size(xxx) (xxx & 0xff)
00079 #define STR_INDEX_index(xxx) (xxx >> 8)
00080 
00081 #else
00082 
00083 // given a STR_INDEX (which is a 32-bit UINT), the following two functions
00084 // extract the offset of the string in string table (first 24 bits), and the 
00085 // size of the string (not real size, but a truncated_to_8_bits size). 
00086 
00087 static inline mUINT8
00088 STR_INDEX_size (STR_INDEX idx)          { return idx & 0xff; }
00089 static inline mUINT32
00090 STR_INDEX_index (STR_INDEX idx)         { return idx >> 8; }
00091 
00092 #endif /* linux */
00093 
00094 // given the offset of a string in string table, and the string size, 
00095 // this function makes a 32-bit hybrid index (basically the reverse
00096 // of the above two functions).
00097 
00098 static inline STR_INDEX
00099 make_STR_INDEX (UINT32 size, UINT32 idx)
00100 {
00101   // if string length larger than 0xff, just use 0xff
00102   if (size > 0xff)
00103     size = 0xff;
00104   return (STR_INDEX) ((idx << 8) | size);
00105 }
00106 
00107 struct NULL_TERMINATED_STRING
00108 {
00109   static char *get_str (char *buffer) {
00110     return buffer;
00111   }
00112 
00113   static const char *get_str (const char *buffer) {
00114     return buffer;
00115   }
00116 
00117   static UINT32 get_length (const char *buffer) {
00118     return strlen (buffer);
00119   }
00120 
00121   static UINT32 get_buffer_length (UINT32 length) {
00122     return length + 1;          // length + null character
00123   }
00124 
00125   static void copy (const char *str, UINT32 length, char *buffer) {
00126     memcpy (buffer, str, length+1);
00127   }
00128 };
00129 
00130 
00131 union UNALIGN_INT32
00132 {
00133   char ch[sizeof(UINT32)];
00134   UINT32 n;
00135 
00136   UNALIGN_INT32 (UINT32 x) : n (x) {}
00137 
00138   UNALIGN_INT32 (const char *s) {
00139     for (INT32 i = 0; i < sizeof(UINT32); ++i)
00140       ch[i] = *s++;
00141   }
00142 };
00143 
00144 
00145 struct CHARACTER_ARRAY
00146 {
00147   //----------------------------------------------------------------------
00148   // IMPORTANT NOTE:
00149   //
00150   // the character array data structure keeps a length field as its first
00151   // byte. this length should be interpreted as an unsigned number.
00152   //
00153   // some compilers (e.g. Sun's CC) sign extend "char" types so an "unsigned
00154   // char" type is needed to ensure that the length is not interpreted 
00155   // as a negative number. We defined the UC macro to cast the signed "char"
00156   // type to an unsigned char so that it is properly interpreted as a 
00157   // 8-bit unsigned number.
00158   //
00159   // Fengmei Zhao and John Mellor-Crummey, Rice University
00160   //----------------------------------------------------------------------
00161 #define UC(x) ((unsigned char)(x))
00162 
00163   // If the character array is less then 0xff bytes, we store the size in
00164   // The First Byte Followed By The Array.  Otherwise, The First Byte Is Set
00165   // To 0xff And The Next 4 Bytes Hold The Size Of The Array As An Unsigned
00166   // Integer, And The Array Follows.
00167 
00168   static const char *get_str (const char *buffer) {
00169     if (UC(*buffer) != 0xff)
00170       return buffer + 1;
00171     else 
00172       return buffer + sizeof(UINT32) + 1;
00173   }
00174 
00175   static char *get_str (char *buffer) {
00176     if (UC(*buffer) != 0xff)
00177       return buffer + 1;
00178     else 
00179       return buffer + sizeof(UINT32) + 1;
00180   }
00181 
00182   static UINT32 get_length (const char *buffer) {
00183     if (UC(*buffer) != 0xff)
00184 
00185 
00186       return UC(*buffer);
00187     else {
00188       UNALIGN_INT32 unalign (buffer + 1);
00189       return unalign.n;
00190     }
00191   }
00192 
00193   static UINT32 get_buffer_length (UINT32 length) {
00194     return length < 0xff ? length + 1 : length + 1 + sizeof(UINT32);
00195   }
00196 
00197   static void copy (const char *str, UINT32 length, char *buffer_) {
00198     unsigned char *buffer = (unsigned char *) buffer_;
00199     if (length < 0xff) {
00200       *buffer++ = length;
00201     } else {
00202       *buffer++ = 0xff;
00203       UNALIGN_INT32 unalign (length);
00204       for (INT32 i = 0; i < sizeof(UINT32); ++i)
00205         *buffer++ = unalign.ch[i];
00206     }
00207     memcpy (buffer, str, length);
00208   }
00209 
00210 }; // CHARACTER_ARRAY
00211 
00212 
00213 template <class STR>
00214 struct STR_TAB
00215 {
00216   char *buffer;       // main buffer storing all strings in string table
00217   STR_IDX last_idx;   // end of current used storage
00218   UINT32 buffer_size;  
00219 
00220   // the following 4 function objects are required for setting up the
00221   // hash table declared after them.
00222 
00223   // hash_key is the "key" of hashtable. STR_INDEX is the "data" of hashtable. 
00224   // they two compose pair<key, STR_INDEX>, which can serve as the "value type"
00225   // in map (created by me).
00226 
00227   // hash key, which is a pair of string pointer and string length
00228   /*
00229   struct hash_key {
00230     const char *str;
00231     mUINT32 size;
00232 
00233     hash_key () {}
00234         
00235     hash_key (const char *s, UINT32 l) : str (s), size (l) {}
00236         
00237     hash_key (const hash_key& p) : str (p.str), size (p.size) {}
00238   };
00239   */
00240   struct StringHashKey {
00241     mUINT32 idx;
00242     mUINT32 size;
00243 
00244     StringHashKey() {}
00245 
00246     StringHashKey(mUINT32 _idx, mUINT32 _size) : 
00247       idx(_idx), size(_size) {}
00248 
00249     StringHashKey(const StringHashKey& shk) : 
00250       idx(shk.idx), size(shk.size) {}
00251 
00252     StringHashKey& operator=(const StringHashKey& shk) { 
00253       if (this != &shk) {
00254         idx = shk.idx;
00255         size = shk.size;
00256       }
00257       return (*this);
00258     }
00259   };
00260 
00261 
00262   // class-helper to determine the actual pointer to the string idx memory
00263   class ExtractStringBufferFromStringTable {
00264     const STR_TAB<STR>& stringTable;
00265 
00266   public:
00267     ExtractStringBufferFromStringTable (const STR_TAB<STR>& _stringTable) : 
00268       stringTable(_stringTable) {}
00269     
00270     char * operator() (mUINT32 idx) const {
00271       char *str = stringTable.buffer + idx;
00272       return STR::get_str(str);
00273     }
00274   };
00275 
00276   ExtractStringBufferFromStringTable extractStringBufferFromStringTable;
00277 
00278   class HashStringHashKey {
00279   public:
00280     STR_TAB<STR> *thisEnclosing;
00281 
00282     HashStringHashKey() : thisEnclosing(NULL) {}
00283 
00284     HashStringHashKey(STR_TAB<STR> & _thisEnclosing) :
00285       thisEnclosing(&_thisEnclosing) {}
00286 
00287     UINT32 operator() (const StringHashKey& key) const {
00288       char * str = thisEnclosing->extractStringBufferFromStringTable(key.idx);
00289 
00290       UINT32 h = 0;
00291       const UINT32 length = key.size;
00292       for (UINT32 i = 0; i < length; ++i)
00293         h = 5 * h + str[i];
00294       return h;
00295     }
00296   };
00297 
00298   class EqStringHashKey {
00299   public:
00300     STR_TAB<STR> *thisEnclosing;
00301 
00302     EqStringHashKey() :
00303       thisEnclosing(NULL) {}
00304 
00305     EqStringHashKey(STR_TAB<STR> & _thisEnclosing) :
00306       thisEnclosing(&_thisEnclosing) {}
00307 
00308     bool operator() (const StringHashKey& key1, 
00309                      const StringHashKey& key2) const {
00310       if (key1.size != key2.size) return false;
00311       char * str1 = thisEnclosing->extractStringBufferFromStringTable(key1.idx);
00312       char * str2 = thisEnclosing->extractStringBufferFromStringTable(key2.idx);
00313       bool result = (str1[0] == str2[0]) &&
00314                     (memcmp(str1, str2, key1.size) == 0);
00315       return result;
00316     }
00317   };
00318 
00319   typedef HashTable<StringHashKey, STR_INDEX, HashStringHashKey, EqStringHashKey> HASHTABLE;
00320   HASHTABLE hash_table;
00321 
00322   STR_TAB(UINT bucket_size) : 
00323     hash_table(),
00324     extractStringBufferFromStringTable(*this), 
00325     buffer (NULL), last_idx (0), buffer_size (0) { 
00326     hash_table.hashClass().thisEnclosing = this;
00327     hash_table.eqClass().thisEnclosing = this;
00328   };
00329   // operations
00330     
00331   void reserve (UINT32 size) {
00332     if (size > buffer_size - last_idx) {
00333       // realloc
00334       while (size > buffer_size - last_idx) {
00335         if (buffer_size < 0x100000)
00336           buffer_size *= 2;
00337         else
00338           buffer_size += 0x80000;
00339       }
00340       buffer = (char *) MEM_POOL_Realloc (Malloc_Mem_Pool, buffer, 0,
00341                                           buffer_size); 
00342     }
00343   }
00344     
00345   void init_hash ();
00346   
00347   UINT32 insert (const char *str, UINT32 size);
00348   
00349   UINT32 insert (const char *str) {
00350     return insert (str, strlen (str));
00351   }
00352     
00353   void copy_str (const char *str, UINT32 size);
00354 
00355   void dump(FILE *fout) {
00356     
00357   }
00358 }; // STR_TAB
00359 
00360 
00361 
00362 
00363 // when inserting a new string/size into the string table
00364 //   first  - copy the string into stringtable's buffer
00365 //   second - calculate its STR_INDEX according to its offset and size
00366 //   third  - try to insert its STR_INDEX/hash_key into hash_table
00367 //   fourth - if a same hash_key already exists, un-insert
00368 template <class STR>
00369 UINT32 STR_TAB<STR>::insert (const char *str, UINT32 size)
00370 {
00371   // insert the new string into the String Buffer,
00372   // not into auxiliary String Hash table
00373   UINT32 oldLastIndex = last_idx;
00374   copy_str (str, size);  //copy str into buffer at offset 'index'
00375   STR_INDEX new_index = make_STR_INDEX (size, oldLastIndex);
00376 
00377   // try to insert the new string into the String Hash Table
00378   typename HASHTABLE::ValueBoolPair insertStatus = 
00379     hash_table.insert(typename HASHTABLE::KeyValuePair(StringHashKey(oldLastIndex, size), new_index));
00380 
00381   if (insertStatus.second == false) {
00382     // the new string was already in the StringTable => 
00383     // uninsert from the String Buffer
00384     last_idx = oldLastIndex;
00385     return STR_INDEX_index(insertStatus.first);
00386   } else {
00387     // this is the first occurence of the new string
00388     assert(STR_INDEX_index(insertStatus.first) == oldLastIndex);
00389     return oldLastIndex;
00390   }
00391 } // STR_TAB<STR>::insert
00392 
00393 
00394 template <class STR>
00395 void STR_TAB<STR>::copy_str (const char *str, UINT32 size) {
00396   UINT32 buffer_size = STR::get_buffer_length (size);
00397   reserve (buffer_size);
00398   STR::copy (str, size, buffer + last_idx);
00399   last_idx += buffer_size;
00400 } // STR_TAB::copy_str
00401 
00402 
00403 template <class STR>
00404 void STR_TAB<STR>::init_hash () {
00405   UINT32 idx = 1;                       // first entry always null
00406   while (idx < last_idx) {
00407     UINT32 length = STR::get_length(buffer + idx);
00408 
00409     STR_INDEX new_index = make_STR_INDEX(length, idx);
00410     hash_table.insert(typename HASHTABLE::KeyValuePair(StringHashKey(idx, length), new_index));
00411     idx += STR::get_buffer_length (length);
00412   }
00413 } // STR_TAB<STR>::init_hash
00414 
00415 
00416 template <class STRTAB>
00417 static inline void initialize_strtab (STRTAB& strtab, UINT32 size) {
00418   strtab.buffer_size = size;
00419   strtab.buffer = (char *) MEM_POOL_Alloc (Malloc_Mem_Pool, size);
00420   strtab.buffer[0] = 0;
00421   strtab.last_idx = 1;
00422 }
00423 
00424 
00425 template <class STRTAB>
00426 static inline void initialize_strtab (STRTAB& strtab, const char *buf, UINT32 size) {
00427   strtab.hash_table.clear();
00428   strtab.buffer_size = size;
00429   strtab.buffer = (char *) MEM_POOL_Alloc (Malloc_Mem_Pool, size);
00430   memmove (strtab.buffer, buf, size);
00431   strtab.last_idx = size;
00432   strtab.init_hash ();
00433 } // Initialize_Strtab
00434     
00435 
00436   // Global String table
00437 static STR_TAB<NULL_TERMINATED_STRING> Strtab (1000);
00438 
00439 STRING_TABLE Str_Table;
00440 
00441 UINT32
00442 STR_Table_Size () {
00443   return Strtab.last_idx;
00444 }
00445 
00446 // init an empty table; use by producer (e.g., front end)
00447 void
00448 Initialize_Strtab (UINT32 size) {
00449   initialize_strtab (Strtab, size);
00450 } 
00451 
00452 // initialized the string table with the strtab from an input file
00453 void Initialize_Strtab (const char *buf, UINT32 size) {
00454   initialize_strtab (Strtab, buf, size);
00455 } // Initialize_Strtab
00456 
00457 
00458 STR_IDX Save_Str (const char *str) {
00459   if (str == NULL) return 0;
00460   return Strtab.insert (str);
00461 } // Save_Str
00462 
00463 
00464 STR_IDX Save_Str2 (const char *s1, const char *s2) {
00465   UINT len = strlen (s1) + strlen(s2) + 1;
00466   char *new_str = (char *) alloca (len);
00467   strcpy (new_str, s1);
00468   strcat (new_str, s2);
00469   return Save_Str (new_str);
00470 } // Save_Str2
00471 
00472 STR_IDX Save_Str2i (const char *s1, const char *s2, UINT i) {
00473   UINT len = strlen (s1) + strlen(s2) + 17;
00474   char *new_str = (char *) alloca (len);
00475   sprintf(new_str, "%s%s%d", s1, s2, i);
00476   return Save_Str (new_str);
00477 } // Save_Str2
00478 
00479 char * Index_To_Str (STR_IDX idx) {
00480   Is_True (idx < Strtab.last_idx, ("Invalid STR_IDX"));
00481   return NULL_TERMINATED_STRING::get_str (Strtab.buffer + idx);
00482 }
00483 
00484 // character array table
00485 const UINT32 TCON_STRTAB_HASH_SIZE = 512;
00486 static STR_TAB<CHARACTER_ARRAY> TCON_strtab (TCON_STRTAB_HASH_SIZE);
00487 
00488 UINT32
00489 TCON_strtab_size () {
00490   return TCON_strtab.last_idx;
00491 }
00492 
00493 char * TCON_strtab_buffer () {
00494   return TCON_strtab.buffer;
00495 }
00496 
00497 // init an empty table; use by producer
00498 void Initialize_TCON_strtab (UINT32 size) {
00499   initialize_strtab (TCON_strtab, size);
00500 }
00501 
00502 // init the TCON strtab from an input file
00503 void Initialize_TCON_strtab (const char *buf, UINT32 size) {
00504   initialize_strtab (TCON_strtab, buf, size);
00505 }
00506 
00507 // add string of length "len"; string might not be null-terminated
00508 UINT32 Save_StrN (const char *s1, UINT32 len) {
00509   if (len == 0) return 0;
00510   return TCON_strtab.insert(s1, len);
00511   //  return TCON_strtab.insert (s1, len);
00512 } // Save_StrN
00513 
00514 char * Index_to_char_array (UINT32 idx) {
00515   Is_True (idx < TCON_strtab.last_idx, ("Invalid TCON str index"));
00516   return CHARACTER_ARRAY::get_str (TCON_strtab.buffer + idx);
00517 }
00518 
00519 #ifdef MONGOOSE_BE
00520 
00521 template <class STR, class MAP>
00522 void merge_strtab (STR_TAB<STR>& strtab, const char *buf, UINT32 size, MAP& map) {
00523   map[0] = 0;
00524   UINT32 idx = 1;
00525   while (idx < size) {
00526     const char *str = STR::get_str (buf + idx);
00527     UINT32 size = STR::get_length (buf + idx);
00528     UINT32 new_idx = strtab.insert (str, size);
00529     map[idx] = new_idx;
00530     idx += STR::get_buffer_length (size);
00531   }
00532 } // merge_strtab
00533 
00534 
00535 // merge in a string table buffer from an input file, return a map from
00536 // the old STR_IDX to the new (merged) STR_IDX
00537 
00538 void
00539 Merge_Strtab (const char *buf, UINT32 size, STR_IDX_MAP& map) {
00540   merge_strtab (Strtab, buf, size, map);
00541 }
00542 
00543 // merge in a string table buffer from an input file, return a map from
00544 // the old STR_IDX to the new (merged) STR_IDX
00545 void Merge_TCON_Strtab (const char *buf, UINT32 size, STR_IDX_MAP& map) {
00546   merge_strtab (TCON_strtab, buf, size, map);
00547 }
00548 
00549 #endif // MONGOOSE_BE
00550 
00551   
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines