moab
|
00001 00016 #include "FileTokenizer.hpp" 00017 #include "moab/ReadUtilIface.hpp" 00018 #include <cstring> 00019 #include <cctype> 00020 #include <string> 00021 #include <cstdlib> 00022 00023 namespace moab { 00024 00025 using namespace std; 00026 00027 FileTokenizer::FileTokenizer( FILE* file_ptr, ReadUtilIface* rif_ptr ) 00028 : filePtr( file_ptr ), 00029 readUtilPtr( rif_ptr ), 00030 nextToken( buffer ), 00031 bufferEnd( buffer ), 00032 lineNumber( 1 ), 00033 lastChar( '\0' ) 00034 {} 00035 00036 FileTokenizer::~FileTokenizer() 00037 { fclose( filePtr ); } 00038 00039 bool FileTokenizer::eof() const 00040 { return nextToken == bufferEnd && feof(filePtr); } 00041 00042 const char* FileTokenizer::get_string( ) 00043 { 00044 // If the whitepsace character marking the end of the 00045 // last token was a newline, increment the line count. 00046 if (lastChar == '\n') 00047 ++lineNumber; 00048 00049 // Loop until either found the start of a token to return or have 00050 // reached the end of the file. 00051 for (;;) 00052 { 00053 // If the buffer is empty, read more. 00054 if (nextToken == bufferEnd) 00055 { 00056 size_t count = fread( buffer, 1, sizeof(buffer) - 1, filePtr ); 00057 if (!count) 00058 { 00059 if (feof(filePtr)) 00060 readUtilPtr->report_error( "File truncated at line %d\n", line_number() ); 00061 else 00062 readUtilPtr->report_error( "I/O Error\n" ); 00063 return NULL; 00064 } 00065 00066 nextToken = buffer; 00067 bufferEnd = buffer + count; 00068 } 00069 00070 // If the current character is not a space, we've found a token. 00071 if (!isspace(*nextToken)) 00072 break; 00073 00074 // If the current space character is a newline, 00075 // increment the line number count. 00076 if (*nextToken == '\n') 00077 ++lineNumber; 00078 ++nextToken; 00079 } 00080 00081 // Store the start of the token in "result" and 00082 // advance "nextToken" to one past the end of the 00083 // token. 00084 char* result = nextToken; 00085 while (nextToken != bufferEnd && !isspace(*nextToken)) 00086 ++nextToken; 00087 00088 // If we have reached the end of the buffer without finding 00089 // a whitespace character terminating the token, we need to 00090 // read more from the file. Only try once. If the token is 00091 // too large to fit in the buffer, give up. 00092 if (nextToken == bufferEnd) 00093 { 00094 // Shift the (possibly) partial token to the start of the buffer. 00095 size_t remaining = bufferEnd - result; 00096 memmove( buffer, result, remaining ); 00097 result = buffer; 00098 nextToken = result + remaining; 00099 00100 // Fill the remainder of the buffer after the token. 00101 size_t count = fread( nextToken, 1, sizeof(buffer) - remaining - 1, filePtr ); 00102 if (!count && !feof(filePtr)) 00103 { 00104 readUtilPtr->report_error( "I/O Error\n" ); 00105 return NULL; 00106 } 00107 bufferEnd = nextToken + count; 00108 00109 // Continue to advance nextToken until we find the space 00110 // terminating the token. 00111 while (nextToken != bufferEnd && !isspace(*nextToken)) 00112 ++nextToken; 00113 00114 if (nextToken == bufferEnd) // EOF 00115 { 00116 *bufferEnd = '\0'; 00117 ++bufferEnd; 00118 } 00119 } 00120 00121 // Save terminating whitespace character (or NULL char if EOF). 00122 lastChar = *nextToken; 00123 // Put null in buffer to mark end of current token. 00124 *nextToken = '\0'; 00125 // Advance nextToken to the next character to search next time. 00126 ++nextToken; 00127 return result; 00128 } 00129 00130 bool FileTokenizer::get_double_internal( double& result ) 00131 { 00132 // Get a token 00133 const char *token_end, *token = get_string( ); 00134 if (!token) 00135 return false; 00136 00137 // Check for hex value -- on some platforms (e.g. Linux), strtod 00138 // will accept hex values, on others (e.g. Sun) it wil not. Force 00139 // failure on hex numbers for consistancy. 00140 if (token[0] && token[1] && token[0] == '0' && toupper(token[1]) == 'X') 00141 { 00142 readUtilPtr->report_error( 00143 "Syntax error at line %d: expected number, got \"%s\"", 00144 line_number(), token ); 00145 return false; 00146 } 00147 00148 00149 // Parse token as double 00150 result = strtod( token, (char**)&token_end ); 00151 00152 // If the one past the last char read by strtod is 00153 // not the NULL character terminating the string, 00154 // then parse failed. 00155 if (*token_end) 00156 { 00157 readUtilPtr->report_error( 00158 "Syntax error at line %d: expected number, got \"%s\"", 00159 line_number(), token ); 00160 return false; 00161 } 00162 00163 return true; 00164 } 00165 00166 bool FileTokenizer::get_float_internal( float& result ) 00167 { 00168 double d; 00169 if (!get_double_internal( d )) 00170 return false; 00171 00172 result = (float)d; 00173 return true; 00174 } 00175 00176 bool FileTokenizer::get_long_int_internal( long& result ) 00177 { 00178 // Get a token 00179 const char *token_end, *token = get_string( ); 00180 if (!token) 00181 return false; 00182 00183 // Parse token as long 00184 result = strtol( token, (char**)&token_end, 0 ); 00185 00186 // If the one past the last char read by strtol is 00187 // not the NULL character terminating the string, 00188 // then parse failed. 00189 if (*token_end) 00190 { 00191 readUtilPtr->report_error( 00192 "Syntax error at line %d: expected integer, got \"%s\"", 00193 line_number(), token ); 00194 return false; 00195 } 00196 00197 return true; 00198 } 00199 00200 bool FileTokenizer::get_byte_internal( unsigned char& result ) 00201 { 00202 long i; 00203 if (!get_long_int_internal( i )) 00204 return false; 00205 00206 result = (unsigned char)i; 00207 if (i != (long)result) 00208 { 00209 readUtilPtr->report_error( "Numberic overflow at line %d.", line_number() ); 00210 return false; 00211 } 00212 00213 return true; 00214 } 00215 00216 bool FileTokenizer::get_short_int_internal( short& result ) 00217 { 00218 long i; 00219 if (!get_long_int_internal( i )) 00220 return false; 00221 00222 result = (short)i; 00223 if (i != (long)result) 00224 { 00225 readUtilPtr->report_error( "Numberic overflow at line %d.", line_number() ); 00226 return false; 00227 } 00228 00229 return true; 00230 } 00231 00232 bool FileTokenizer::get_integer_internal( int& result ) 00233 { 00234 long i; 00235 if (!get_long_int_internal( i )) 00236 return false; 00237 00238 result = (int)i; 00239 if (i != (long)result) 00240 { 00241 readUtilPtr->report_error( "Numberic overflow at line %d.", line_number() ); 00242 return false; 00243 } 00244 00245 return true; 00246 } 00247 00248 bool FileTokenizer::get_boolean_internal( bool& result ) 00249 { 00250 // Get a token 00251 const char *token = get_string( ); 00252 if (!token) 00253 return false; 00254 00255 if (token[1] || (token[0] != '0' && token[0] != '1')) 00256 { 00257 readUtilPtr->report_error( 00258 "Syntax error at line %d: expected 0 or 1, got \"%s\"", 00259 line_number(), token ); 00260 return false; 00261 } 00262 00263 result = token[0] == '1'; 00264 return true; 00265 } 00266 00267 bool FileTokenizer::get_floats( size_t count, float* array ) 00268 { 00269 for (size_t i = 0; i < count; ++i) 00270 { 00271 if (!get_float_internal( *array )) 00272 return false; 00273 ++array; 00274 } 00275 return true; 00276 } 00277 00278 bool FileTokenizer::get_doubles( size_t count, double* array ) 00279 { 00280 for (size_t i = 0; i < count; ++i) 00281 { 00282 if (!get_double_internal( *array )) 00283 return false; 00284 ++array; 00285 } 00286 return true; 00287 } 00288 00289 bool FileTokenizer::get_bytes( size_t count, unsigned char* array ) 00290 { 00291 for (size_t i = 0; i < count; ++i) 00292 { 00293 if (!get_byte_internal( *array )) 00294 return false; 00295 ++array; 00296 } 00297 return true; 00298 } 00299 00300 bool FileTokenizer::get_short_ints( size_t count, short* array ) 00301 { 00302 for (size_t i = 0; i < count; ++i) 00303 { 00304 if (!get_short_int_internal( *array )) 00305 return false; 00306 ++array; 00307 } 00308 return true; 00309 } 00310 00311 00312 bool FileTokenizer::get_integers( size_t count, int* array ) 00313 { 00314 for (size_t i = 0; i < count; ++i) 00315 { 00316 if (!get_integer_internal( *array )) 00317 return false; 00318 ++array; 00319 } 00320 return true; 00321 } 00322 00323 bool FileTokenizer::get_long_ints( size_t count, long* array ) 00324 { 00325 for (size_t i = 0; i < count; ++i) 00326 { 00327 if (!get_long_int_internal( *array )) 00328 return false; 00329 ++array; 00330 } 00331 return true; 00332 } 00333 00334 bool FileTokenizer::get_booleans( size_t count, bool* array ) 00335 { 00336 for (size_t i = 0; i < count; ++i) 00337 { 00338 if (!get_boolean_internal( *array )) 00339 return false; 00340 ++array; 00341 } 00342 return true; 00343 } 00344 00345 void FileTokenizer::unget_token() 00346 { 00347 if (nextToken - buffer < 2) 00348 return; 00349 00350 --nextToken; 00351 *nextToken = lastChar; 00352 --nextToken; 00353 while (nextToken > buffer && *nextToken) 00354 --nextToken; 00355 00356 if (!*nextToken) 00357 ++nextToken; 00358 00359 lastChar = '\0'; 00360 } 00361 00362 bool FileTokenizer::match_token( const char* str, bool print_error ) 00363 { 00364 // Get a token 00365 const char *token = get_string( ); 00366 if (!token) 00367 return false; 00368 00369 // Check if it matches 00370 if (0 == strcmp( token, str )) 00371 return true; 00372 00373 // Construct error message 00374 if (print_error) 00375 readUtilPtr->report_error( "Syntax error at line %d: expected \"%s\", got \"%s\"", 00376 line_number(), str, token ); 00377 return false; 00378 } // namespace Mesquite 00379 00380 00381 int FileTokenizer::match_token( const char* const* list, bool print_error ) 00382 { 00383 // Get a token 00384 const char *token = get_string( ); 00385 if (!token) 00386 return false; 00387 00388 // Check if it matches any input string 00389 const char* const* ptr; 00390 for (ptr = list; *ptr; ++ptr) 00391 if (0 == strcmp( token, *ptr )) 00392 return ptr - list + 1; 00393 00394 if (!print_error) 00395 return false; 00396 00397 // No match, constuct error message 00398 std::string message( "Parsing error at line " ); 00399 char lineno[16]; 00400 sprintf( lineno, "%d", line_number() ); 00401 message += lineno; 00402 message += ": expected one of {"; 00403 for (ptr = list; *ptr; ++ptr) 00404 { 00405 message += " "; 00406 message += *ptr; 00407 } 00408 message += " } got \""; 00409 message += token; 00410 message += "\""; 00411 readUtilPtr->report_error( message ); 00412 return false; 00413 } 00414 00415 bool FileTokenizer::get_newline( ) 00416 { 00417 if (lastChar == '\n') 00418 { 00419 lastChar = ' '; 00420 ++lineNumber; 00421 return true; 00422 } 00423 00424 // Loop until either we a) find a newline, b) find a non-whitespace 00425 // character or c) reach the end of the file. 00426 for (;;) 00427 { 00428 // If the buffer is empty, read more. 00429 if (nextToken == bufferEnd) 00430 { 00431 size_t count = fread( buffer, 1, sizeof(buffer), filePtr ); 00432 if (!count) 00433 { 00434 if (eof()) 00435 readUtilPtr->report_error( "File truncated at line %d.", line_number() ); 00436 else 00437 readUtilPtr->report_error( "I/O Error" ); 00438 break; 00439 } 00440 00441 nextToken = buffer; 00442 bufferEnd = buffer + count; 00443 } 00444 00445 // If the current character is not a space, the we've failed. 00446 if (!isspace(*nextToken)) 00447 { 00448 readUtilPtr->report_error( "Expected newline at line %d.", line_number() ); 00449 break; 00450 } 00451 00452 // If the current space character is a newline, 00453 // increment the line number count. 00454 if (*nextToken == '\n') 00455 { 00456 ++lineNumber; 00457 ++nextToken; 00458 lastChar = ' '; 00459 return true; 00460 } 00461 ++nextToken; 00462 } 00463 00464 return false; 00465 } 00466 00467 bool FileTokenizer::get_binary( size_t size, void* mem ) 00468 { 00469 // if data in buffer 00470 if (nextToken != bufferEnd) { 00471 // if requested size is less than buffer contents, 00472 // just pass back part of the buffer 00473 if (bufferEnd - nextToken <= (int)size) { 00474 memcpy( mem, nextToken, size ); 00475 nextToken += size; 00476 return true; 00477 } 00478 00479 // copy buffer contents into memory and clear buffer 00480 memcpy( mem, nextToken, bufferEnd - nextToken ); 00481 size -= bufferEnd - nextToken; 00482 mem = reinterpret_cast<char*>(mem) + (bufferEnd - nextToken); 00483 nextToken = bufferEnd; 00484 } 00485 00486 // read any additional data from file 00487 return size == fread( mem, 1, size, filePtr ); 00488 } 00489 00490 00491 } // namespace moab