moab
FileTokenizer.cpp
Go to the documentation of this file.
00001 
00016 #include "FileTokenizer.hpp"
00017 #include "moab/ReadUtilIface.hpp"
00018 #include <cstring>
00019 #include <cctype>
00020 #include <string>
00021 #include <cstdlib>
00022 
00023 namespace moab {
00024 
00025 using namespace std;
00026 
00027 FileTokenizer::FileTokenizer( FILE* file_ptr, ReadUtilIface* rif_ptr )
00028   : filePtr( file_ptr ),
00029     readUtilPtr( rif_ptr ),
00030     nextToken( buffer ),
00031     bufferEnd( buffer ),
00032     lineNumber( 1 ),
00033     lastChar( '\0' )
00034   {}
00035   
00036 FileTokenizer::~FileTokenizer() 
00037   { fclose( filePtr ); }
00038 
00039 bool FileTokenizer::eof() const
00040   { return nextToken == bufferEnd && feof(filePtr); }
00041 
00042 const char* FileTokenizer::get_string( )
00043 {
00044     // If the whitepsace character marking the end of the
00045     // last token was a newline, increment the line count.
00046   if (lastChar == '\n')
00047     ++lineNumber;
00048   
00049     // Loop until either found the start of a token to return or have
00050     // reached the end of the file.
00051   for (;;)
00052   {
00053       // If the buffer is empty, read more.
00054     if (nextToken == bufferEnd)
00055     {
00056       size_t count = fread( buffer, 1, sizeof(buffer) - 1, filePtr );
00057       if (!count)
00058       {
00059         if (feof(filePtr))
00060           readUtilPtr->report_error( "File truncated at line %d\n", line_number() );
00061         else
00062           readUtilPtr->report_error( "I/O Error\n" );
00063         return NULL;
00064       }
00065       
00066       nextToken = buffer;
00067       bufferEnd = buffer + count;
00068     }
00069     
00070       // If the current character is not a space, we've found a token.
00071     if (!isspace(*nextToken))
00072       break;
00073       
00074       // If the current space character is a newline,
00075       // increment the line number count.
00076     if (*nextToken == '\n')
00077       ++lineNumber;
00078     ++nextToken;
00079   }
00080   
00081     // Store the start of the token in "result" and
00082     // advance "nextToken" to one past the end of the
00083     // token.
00084   char* result = nextToken;
00085   while (nextToken != bufferEnd && !isspace(*nextToken))
00086     ++nextToken;
00087   
00088     // If we have reached the end of the buffer without finding
00089     // a whitespace character terminating the token, we need to
00090     // read more from the file.  Only try once.  If the token is
00091     // too large to fit in the buffer, give up.
00092   if (nextToken == bufferEnd)
00093   {
00094       // Shift the (possibly) partial token to the start of the buffer.
00095     size_t remaining = bufferEnd - result;
00096     memmove( buffer, result, remaining );
00097     result = buffer;
00098     nextToken =  result + remaining;
00099     
00100       // Fill the remainder of the buffer after the token.
00101     size_t count = fread( nextToken, 1, sizeof(buffer) - remaining - 1, filePtr );
00102     if (!count && !feof(filePtr))
00103     {
00104       readUtilPtr->report_error( "I/O Error\n" );
00105       return NULL;
00106     }
00107     bufferEnd = nextToken + count;
00108     
00109       // Continue to advance nextToken until we find the space
00110       // terminating the token.
00111     while (nextToken != bufferEnd && !isspace(*nextToken))
00112       ++nextToken;
00113   
00114     if (nextToken == bufferEnd) // EOF
00115     {
00116       *bufferEnd = '\0';
00117       ++bufferEnd;
00118     }
00119   }
00120   
00121     // Save terminating whitespace character (or NULL char if EOF).
00122   lastChar = *nextToken;
00123     // Put null in buffer to mark end of current token.
00124   *nextToken = '\0';
00125     // Advance nextToken to the next character to search next time.
00126   ++nextToken;
00127   return result;
00128 }
00129 
00130 bool FileTokenizer::get_double_internal( double& result )
00131 {
00132     // Get a token
00133   const char *token_end, *token = get_string( );
00134   if (!token)
00135     return false;
00136   
00137     // Check for hex value -- on some platforms (e.g. Linux), strtod
00138     // will accept hex values, on others (e.g. Sun) it wil not.  Force
00139     // failure on hex numbers for consistancy.
00140   if (token[0] && token[1] && token[0] == '0' && toupper(token[1]) == 'X')
00141   {
00142     readUtilPtr->report_error(
00143       "Syntax error at line %d: expected number, got \"%s\"",
00144       line_number(), token );
00145     return false;
00146   }
00147   
00148   
00149     // Parse token as double
00150   result = strtod( token, (char**)&token_end );
00151 
00152     // If the one past the last char read by strtod is
00153     // not the NULL character terminating the string,
00154     // then parse failed.
00155   if (*token_end)
00156   {
00157     readUtilPtr->report_error(
00158       "Syntax error at line %d: expected number, got \"%s\"",
00159       line_number(), token );
00160     return false;
00161   }
00162   
00163   return true;
00164 }
00165 
00166 bool FileTokenizer::get_float_internal( float& result )
00167 {
00168   double d;
00169   if (!get_double_internal( d ))
00170     return false;
00171   
00172   result = (float)d;
00173   return true;
00174 }
00175 
00176 bool FileTokenizer::get_long_int_internal( long& result )
00177 {
00178     // Get a token
00179   const char *token_end, *token = get_string( );
00180   if (!token)
00181     return false;
00182   
00183     // Parse token as long
00184   result = strtol( token, (char**)&token_end, 0 );
00185 
00186     // If the one past the last char read by strtol is
00187     // not the NULL character terminating the string,
00188     // then parse failed.
00189   if (*token_end)
00190   {
00191     readUtilPtr->report_error(
00192       "Syntax error at line %d: expected integer, got \"%s\"",
00193       line_number(), token );
00194     return false;
00195   }
00196 
00197   return true;
00198 }
00199 
00200 bool FileTokenizer::get_byte_internal( unsigned char& result )
00201 {
00202   long i;
00203   if (!get_long_int_internal( i ))
00204     return false;
00205   
00206   result = (unsigned char)i;
00207   if (i != (long)result)
00208   {
00209     readUtilPtr->report_error( "Numberic overflow at line %d.", line_number() );
00210     return false;
00211   }
00212   
00213   return true;
00214 }
00215 
00216 bool FileTokenizer::get_short_int_internal( short& result )
00217 {
00218   long i;
00219   if (!get_long_int_internal( i ))
00220     return false;
00221   
00222   result = (short)i;
00223   if (i != (long)result)
00224   {
00225     readUtilPtr->report_error( "Numberic overflow at line %d.", line_number() );
00226     return false;
00227   }
00228   
00229   return true;
00230 }
00231 
00232 bool FileTokenizer::get_integer_internal( int& result )
00233 {
00234   long i;
00235   if (!get_long_int_internal( i ))
00236     return false;
00237   
00238   result = (int)i;
00239   if (i != (long)result)
00240   {
00241     readUtilPtr->report_error( "Numberic overflow at line %d.", line_number() );
00242     return false;
00243   }
00244   
00245   return true;
00246 }
00247 
00248 bool FileTokenizer::get_boolean_internal( bool& result )
00249 {
00250     // Get a token
00251   const char *token = get_string( );
00252   if (!token)
00253     return false;
00254   
00255   if (token[1] || (token[0] != '0' && token[0] != '1'))
00256   {
00257     readUtilPtr->report_error( 
00258       "Syntax error at line %d: expected 0 or 1, got \"%s\"",
00259       line_number(), token );
00260     return false;
00261   }
00262 
00263   result = token[0] == '1';
00264   return true;
00265 }
00266 
00267 bool FileTokenizer::get_floats( size_t count, float* array )
00268 {
00269   for (size_t i = 0; i < count; ++i)
00270   {
00271     if (!get_float_internal( *array ))
00272       return false;
00273     ++array;
00274   }
00275   return true;
00276 }
00277 
00278 bool FileTokenizer::get_doubles( size_t count, double* array )
00279 {
00280   for (size_t i = 0; i < count; ++i)
00281   {
00282     if (!get_double_internal( *array ))
00283       return false;
00284     ++array;
00285   }
00286   return true;
00287 }
00288 
00289 bool FileTokenizer::get_bytes( size_t count, unsigned char* array )
00290 {
00291   for (size_t i = 0; i < count; ++i)
00292   {
00293     if (!get_byte_internal( *array ))
00294       return false;
00295     ++array;
00296   }
00297   return true;
00298 }
00299 
00300 bool FileTokenizer::get_short_ints( size_t count, short* array )
00301 {
00302   for (size_t i = 0; i < count; ++i)
00303   {
00304     if (!get_short_int_internal( *array ))
00305       return false;
00306     ++array;
00307   }
00308   return true;
00309 }
00310 
00311 
00312 bool FileTokenizer::get_integers( size_t count, int* array )
00313 {
00314   for (size_t i = 0; i < count; ++i)
00315   {
00316     if (!get_integer_internal( *array ))
00317       return false;
00318     ++array;
00319   }
00320   return true;
00321 }
00322 
00323 bool FileTokenizer::get_long_ints( size_t count, long* array )
00324 {
00325   for (size_t i = 0; i < count; ++i)
00326   {
00327     if (!get_long_int_internal( *array ))
00328       return false;
00329     ++array;
00330   }
00331   return true;
00332 }
00333 
00334 bool FileTokenizer::get_booleans( size_t count, bool* array )
00335 {
00336   for (size_t i = 0; i < count; ++i)
00337   {
00338     if (!get_boolean_internal( *array ))
00339       return false;
00340     ++array;
00341   }
00342   return true;
00343 }
00344 
00345 void FileTokenizer::unget_token()
00346 {
00347   if (nextToken - buffer < 2)
00348     return;
00349   
00350   --nextToken;
00351   *nextToken = lastChar;
00352   --nextToken;
00353   while (nextToken > buffer && *nextToken)
00354     --nextToken;
00355     
00356   if (!*nextToken)
00357     ++nextToken;
00358     
00359   lastChar = '\0';
00360 }
00361 
00362 bool FileTokenizer::match_token( const char* str, bool print_error )
00363 {
00364     // Get a token
00365   const char *token = get_string( );
00366   if (!token)
00367     return false;
00368 
00369     // Check if it matches
00370   if (0 == strcmp( token, str ))
00371     return true;
00372   
00373     // Construct error message
00374   if (print_error)
00375     readUtilPtr->report_error( "Syntax error at line %d: expected \"%s\", got \"%s\"",
00376                                 line_number(), str, token );
00377   return false;
00378 }  // namespace Mesquite
00379 
00380 
00381 int FileTokenizer::match_token( const char* const* list, bool print_error )
00382 {
00383     // Get a token
00384   const char *token = get_string( );
00385   if (!token)
00386     return false;
00387 
00388     // Check if it matches any input string
00389   const char* const* ptr;
00390   for (ptr = list; *ptr; ++ptr)
00391     if (0 == strcmp( token, *ptr ))
00392       return ptr - list + 1;
00393   
00394   if (!print_error)
00395     return false;
00396   
00397     // No match, constuct error message
00398   std::string message( "Parsing error at line " );
00399   char lineno[16];
00400   sprintf( lineno, "%d", line_number() );
00401   message += lineno;
00402   message += ": expected one of {";
00403   for (ptr = list; *ptr; ++ptr)
00404   {
00405     message += " ";
00406     message += *ptr;
00407   }
00408   message += " } got \"";
00409   message += token;
00410   message += "\"";
00411   readUtilPtr->report_error( message );
00412   return false;
00413 }
00414 
00415 bool FileTokenizer::get_newline( )
00416 {
00417   if (lastChar == '\n')
00418   {
00419     lastChar = ' ';
00420     ++lineNumber;
00421     return true;
00422   }
00423   
00424     // Loop until either we a) find a newline, b) find a non-whitespace
00425     // character or c) reach the end of the file.
00426   for (;;)
00427   {
00428       // If the buffer is empty, read more.
00429     if (nextToken == bufferEnd)
00430     {
00431       size_t count = fread( buffer, 1, sizeof(buffer), filePtr );
00432       if (!count)
00433       {
00434         if (eof())
00435           readUtilPtr->report_error( "File truncated at line %d.", line_number() );
00436         else
00437           readUtilPtr->report_error( "I/O Error" );
00438         break;
00439       }
00440       
00441       nextToken = buffer;
00442       bufferEnd = buffer + count;
00443     }
00444     
00445       // If the current character is not a space, the we've failed.
00446     if (!isspace(*nextToken))
00447     {
00448       readUtilPtr->report_error( "Expected newline at line %d.", line_number() );
00449       break;
00450     }
00451       
00452       // If the current space character is a newline,
00453       // increment the line number count.
00454     if (*nextToken == '\n')
00455     {
00456       ++lineNumber;
00457       ++nextToken;
00458       lastChar = ' ';
00459       return true;
00460     }
00461     ++nextToken;
00462   }
00463   
00464   return false;
00465 }
00466 
00467 bool FileTokenizer::get_binary( size_t size, void* mem )
00468 {
00469     // if data in buffer
00470   if (nextToken != bufferEnd) {
00471       // if requested size is less than buffer contents,
00472       // just pass back part of the buffer
00473     if (bufferEnd - nextToken <= (int)size) {
00474       memcpy( mem, nextToken, size );
00475       nextToken += size;
00476       return true;
00477     }
00478     
00479       // copy buffer contents into memory and clear buffer
00480     memcpy( mem, nextToken, bufferEnd - nextToken );
00481     size -= bufferEnd - nextToken;
00482     mem = reinterpret_cast<char*>(mem) + (bufferEnd - nextToken);
00483     nextToken = bufferEnd;
00484   }
00485   
00486     // read any additional data from file
00487   return size == fread( mem, 1, size, filePtr );
00488 }
00489 
00490 
00491 } // namespace moab
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines