moab
ReadHDF5VarLen.cpp
Go to the documentation of this file.
00001 
00006 #include "ReadHDF5VarLen.hpp"
00007 #include "ReadHDF5Dataset.hpp"
00008 #include "H5Tpublic.h"
00009 #include <assert.h>
00010 
00011 namespace moab {
00012 
00013 bool ReadHDF5VarLen::is_ranged( EntityHandle file_id,
00014                                 Range::const_iterator& ranged_iter,
00015                                 Range::const_iterator range_end )
00016 {
00017   if (ranged_iter == range_end)
00018     return false;
00019   
00020   assert( file_id <= *ranged_iter );
00021   if (*ranged_iter != file_id) 
00022     return false;
00023   
00024   ++ranged_iter;
00025   return true;
00026 }
00027 
00028 ErrorCode ReadHDF5VarLen::read_data( 
00029                                 ReadHDF5Dataset& data_set,
00030                                 const Range& offsets,
00031                                 EntityHandle start_offset,
00032                                 hid_t data_type,
00033                                 const Range& file_ids,
00034                                 const std::vector<unsigned>& vals_per_ent,
00035                                 const Range& ranged_file_ids )
00036 {
00037   ErrorCode rval;
00038   const size_t value_size = H5Tget_size( data_type );
00039   const size_t buffer_size = bufferSize / value_size;
00040   unsigned char* const data_buffer = reinterpret_cast<unsigned char*>(dataBuffer);
00041   std::vector<unsigned char> partial; // for when we read only part of the contents of a set/entity
00042   Range::const_iterator fileid_iter = file_ids.begin();
00043   Range::const_iterator ranged_iter = ranged_file_ids.begin();
00044   std::vector<unsigned>::const_iterator count_iter = vals_per_ent.begin();
00045   size_t count, offset;
00046   bool ranged;
00047   int nn = 0;
00048   
00049   assert( file_ids.size() == vals_per_ent.size() );
00050   
00051   try {
00052     data_set.set_file_ids( offsets, start_offset, buffer_size, data_type );
00053   }
00054   catch (ReadHDF5Dataset::Exception ) {
00055     return MB_FAILURE;
00056   }
00057   
00058   dbgOut.printf( 3, "Reading %s in %lu chunks\n", data_set.get_debug_desc(), data_set.get_read_count() );
00059   
00060   while (!data_set.done()) {
00061     dbgOut.printf( 3, "Reading chunk %d of %s\n", ++nn, data_set.get_debug_desc() );
00062     try { 
00063       data_set.read( data_buffer, count );
00064     }
00065     catch (ReadHDF5Dataset::Exception ) {
00066       return MB_FAILURE;
00067     }
00068     
00069     assert( 0 == count || fileid_iter != file_ids.end() );
00070     
00071       // Handle 'special' case where we read some, but not all
00072       // of the data for an entity during the last iteration.
00073     offset = 0;
00074     if (!partial.empty()) { // didn't read all of previous entity
00075       assert( fileid_iter != file_ids.end() );
00076       assert( 0 == (partial.size() % value_size) );
00077       size_t num_prev = partial.size() / value_size;
00078       offset = *count_iter - num_prev;
00079       if (offset > count) { // still don't have all
00080         partial.insert( partial.end(), data_buffer, data_buffer+count*value_size );
00081         continue;
00082       }
00083       
00084       partial.insert( partial.end(), data_buffer, data_buffer+offset*value_size );
00085       
00086       ranged = is_ranged( *fileid_iter, ranged_iter, ranged_file_ids.end() );
00087       assert(partial.size() == *count_iter * value_size );
00088       rval = store_data( *fileid_iter, &partial[0], *count_iter, ranged );
00089       if (MB_SUCCESS != rval)
00090         return rval;
00091       
00092       ++count_iter;
00093       ++fileid_iter;
00094       partial.clear();
00095     }
00096     
00097       // Process contents for all entities for which we 
00098       // have read the complete list
00099     while (count_iter != vals_per_ent.end() && offset + *count_iter <= count) {
00100       assert( fileid_iter != file_ids.end() );
00101       ranged = is_ranged( *fileid_iter, ranged_iter, ranged_file_ids.end() );
00102       rval = store_data( *fileid_iter, data_buffer + offset*value_size, *count_iter, ranged );
00103       if (MB_SUCCESS != rval)
00104         return rval;
00105       
00106       offset += *count_iter;
00107       ++count_iter;
00108       ++fileid_iter;
00109     }
00110     
00111       // If we did not read all of the final entity,
00112       // store what we did read to be processed in the
00113       // next iteration
00114     if (offset < count) {
00115       assert(partial.empty());
00116       partial.insert( partial.end(), 
00117                       data_buffer + offset*value_size, 
00118                       data_buffer + count*value_size );
00119     }
00120   }
00121   // NOTE: If the last set is empty, we will not process it here
00122   // assert(fileid_iter == file_ids.end());
00123 #ifndef NDEBUG
00124   for (;fileid_iter != file_ids.end(); ++fileid_iter) 
00125     assert(0 == *count_iter++);
00126 #endif
00127   return MB_SUCCESS;
00128 }
00129 /*
00130 ErrorCode ReadHDF5VarLen::read_offsets( ReadHDF5Dataset& data_set,
00131                                         const Range& file_ids,
00132                                         EntityHandle start_file_id,
00133                                         unsigned num_columns,
00134                                         const unsigned indices[],
00135                                         EntityHandle nudge,
00136                                         Range offsets_out[],
00137                                         std::vector<unsigned> counts_out[],
00138                                         Range* ranged_file_ids = 0 )
00139 {
00140   const int local_index = 1;
00141 
00142     // sanity check
00143   const unsigned max_cols = ranged_file_ids ? data_set.columns() - 1 : data_set.columns()
00144   for (unsigned i = 0; i < num_columns; ++i) {
00145     assert(indices[i] >= max_cols);
00146     if (indices[i] >= max_cols)    
00147       return MB_FAILURE;
00148  }
00149   
00150     // Use hints to make sure insertion into ranges is O(1)
00151   std::vector<Range::iterator> hints;
00152   if (ranged_file_ids) {
00153     hints.resize( num_colums + 1 );
00154     hints.back() = ranged_file_ids->begin();
00155   }
00156   else {
00157     hints.resize( num_columns );
00158   }
00159   for (unsigned i = 0; i < num_columns; ++i)
00160     offsets_out[i].clear();
00161     counts_out[i].clear();
00162     counts_out[i].reserve( file_ids.size() );
00163     hints[i] = offsets_out[i].begin();
00164   }
00165 
00166     // If we only need one colunm from a multi-column data set,
00167     // then read only that column.
00168   if (num_columns == 1 && data_set.columns() > 1 && !ranged_file_ids) {
00169     data_set.set_column( indices[0] );
00170     indices = &local_index;
00171   }
00172   else if (ranged_file_ids && data_set.columns() > 1 && 0 == num_columns) {
00173     data_set.set_column( data_set.columns() - 1 );
00174   }
00175     // NOTE: do not move this above the previous block.  
00176     //       The previous block changes the resutls of data_set.columns()!
00177   const size_t table_columns = data_set.columns();
00178 
00179     // Calculate which rows we need to read from the offsets table
00180   Range rows;
00181   Range::iterator hint = rows.begin();
00182   Range::const_pair_iterator pair = file_ids.const_pair_begin();
00183     // special case if reading first entity in dataset, because
00184     // there is no previous end value.
00185   if (pair != file_ids.const_pair_end() && pair->first == start_file_id) 
00186     hint = rows.insert( nudge, pair->second - start_file_id + nudge );
00187   while (pair != file_ids.const_pair_end()) {
00188     hint = rows.insert( hint,
00189                         pair->first + nudge - 1 - start_file_id, 
00190                         pair->second + nudge - start_file_id );
00191     ++pair;
00192   }
00193     
00194     // set up read of offsets dataset
00195   hsize_t buffer_size = bufferSize / (sizeof(hssize_t) * data_set.columns());
00196   hssize_t* buffer = reinterpret_cast<hssize_t*>(dataBuffer);
00197   data_set.set_file_ids( rows, nudge, buffer_size, H5T_NATIVE_HSSIZE );
00198   std::vector<hssize_t> prev_end;
00199     // If we're reading the first row of the table, then the 
00200     // previous end is implicitly -1.
00201   if (!file_ids.empty() && file_ids.front() == start_file_id) 
00202     prev_end.resize(num_columns,-1);
00203   
00204     // read offset table
00205   size_t count, offset;
00206   Range::const_iterator fiter = file_ids.begin();
00207   while (!data_set.done()) {
00208     try {
00209       data_set.read( buffer, count );
00210     }
00211     catch (ReadHDF5Dataset::Exception e) {
00212       return MB_FAILURE;
00213     }
00214     if (!count) // might have been NULL read for collectve IO
00215       continue;
00216     
00217       // If the previous end values were read in the previous iteration,
00218       // then they're stored in prev_end.  
00219     size_t offset = 0;
00220     if (!prev_end.empty()) {
00221        for (unsigned i = 0; i < num_columns; ++i) {
00222         counts_out[i].push_back( buffer[indices[i]] - prev_end[i] );
00223         hints[i] = offsets_out[i].insert( hints[i],
00224                                           prev_end[i] + 1 + nudge,
00225                                           buffer[indices[i]] + nudge );
00226       }
00227       if (ranged_file_ids && (buffer[table_columns-1] & mhdf_SET_RANGE_BIT))
00228         hints.back() = ranged_file_ids->insert( hints.back(), *fiter );
00229       ++fiter;
00230       offset = 1;
00231       prev_end.clear();
00232     }
00233 
00234     while (offset < count) {
00235       assert(fiter != file_ids.end());
00236         // whenever we get to a gap between blocks we need to 
00237         // advance one step because we read an extra end id 
00238         // preceeding teah block
00239       if (fiter == fiter.start_of_block()) {
00240         if (offset == count-1) 
00241           break;
00242         ++offset;
00243       }
00244       
00245       for (unsigned i = 0; i < num_columns; ++i) {
00246         size_t s = buffer[(offset-1)*table_columns+indices[i]] + 1;
00247         size_t e = buffer[ offset   *table_columns+indices[i]];
00248         counts_out.push_back( e - s + 1 );
00249         hints[i] = offsets_out.insert( hints[i], s, e );
00250       }
00251       if (ranged_file_ids && (buffer[offset*table_columns+table_columns-1] & mhdf_SET_RANGE_BIT))
00252         hints.back() = ranged_file_ids->insert( hints.back(), *fiter );
00253       
00254       ++fiter;
00255       ++offset;
00256     }
00257     
00258       // If we did not end on the boundary between two blocks,
00259       // then we need to save the end indices for the final entry
00260       // for use in the next iteration.  Similarly, if we ended
00261       // with extra values that were read with the express intention
00262       // of getting the previus end values for a block, we need to
00263       // save them.  This case only arises if we hit the break in
00264       // the above loop.
00265     if (fiter != fiter.start_of_block() || offset < count) {
00266       assert(prev_end.empty());
00267       if (offset == count) {
00268         --offset;
00269         assert(fiter != fiter.start_of_block());
00270       }
00271       else {
00272         assert(offset+1 == count);
00273         assert(fiter == fiter.start_of_block());
00274       }
00275       for (unsigned i = 0; i < num_columns; ++i) 
00276         prev_end.push_back(buffer[offset*table_columns+indices[i]]);
00277     }
00278   }
00279   assert(prev_end.empty());
00280   assert(fiter == file_ids.end());
00281   
00282   return MB_SUCCESS;
00283 }
00284 */
00285 ErrorCode ReadHDF5VarLen::read_offsets( ReadHDF5Dataset& data_set,
00286                                         const Range& file_ids,
00287                                         EntityHandle start_file_id,
00288                                         EntityHandle nudge,
00289                                         Range& offsets_out,
00290                                         std::vector<unsigned>& counts_out )
00291 {
00292   
00293     // Use hints to make sure insertion into ranges is O(1)
00294   offsets_out.clear();
00295   counts_out.clear();
00296   counts_out.reserve( file_ids.size() );
00297   Range::iterator hint;
00298 
00299     // Calculate which rows we need to read from the offsets table
00300   Range rows;
00301   hint = rows.begin();
00302   Range::const_pair_iterator pair = file_ids.const_pair_begin();
00303     // special case if reading first entity in dataset, because
00304     // there is no previous end value.
00305   if (pair != file_ids.const_pair_end() && pair->first == start_file_id) {
00306     hint = rows.insert( nudge, pair->second - start_file_id + nudge );
00307     ++pair;
00308   }
00309   while (pair != file_ids.const_pair_end()) {
00310     hint = rows.insert( hint,
00311                         pair->first  - start_file_id + nudge - 1, 
00312                         pair->second - start_file_id + nudge );
00313     ++pair;
00314   }
00315     
00316     // set up read of offsets dataset
00317   hsize_t buffer_size = bufferSize / sizeof(hssize_t);
00318   hssize_t* buffer = reinterpret_cast<hssize_t*>(dataBuffer);
00319   data_set.set_file_ids( rows, nudge, buffer_size, H5T_NATIVE_HSSIZE );
00320   hssize_t prev_end;
00321   bool have_prev_end = false;
00322     // If we're reading the first row of the table, then the 
00323     // previous end is implicitly -1.
00324   if (!file_ids.empty() && file_ids.front() == start_file_id)  {
00325     prev_end = -1;
00326     have_prev_end = true;
00327   }
00328   
00329   dbgOut.printf( 3, "Reading %s in %lu chunks\n", data_set.get_debug_desc(), data_set.get_read_count() );
00330   
00331     // read offset table
00332   size_t count, offset;
00333   Range::const_iterator fiter = file_ids.begin();
00334   hint = offsets_out.begin();
00335   int nn = 0;
00336   while (!data_set.done()) {
00337     dbgOut.printf( 3, "Reading chunk %d of %s\n", ++nn, data_set.get_debug_desc() );
00338     try {
00339       data_set.read( buffer, count );
00340     }
00341     catch (ReadHDF5Dataset::Exception ) {
00342       return MB_FAILURE;
00343     }
00344     if (!count) // might have been NULL read for collectve IO
00345       continue;
00346     
00347       // If the previous end values were read in the previous iteration,
00348       // then they're stored in prev_end.  
00349     offset = 0;
00350     if (have_prev_end) {
00351       counts_out.push_back( buffer[0] - prev_end );
00352       hint = offsets_out.insert( hint,
00353                                  prev_end + 1 + nudge,
00354                                  buffer[0] + nudge );
00355       ++fiter;
00356       offset = 1;
00357       have_prev_end = false;
00358     }
00359 
00360     while (offset < count) {
00361       assert(fiter != file_ids.end());
00362         // whenever we get to a gap between blocks we need to 
00363         // advance one step because we read an extra end id 
00364         // preceeding teah block
00365       if (fiter == fiter.start_of_block()) {
00366         if (offset == count-1) 
00367           break;
00368         ++offset;
00369       }
00370       
00371       size_t s = buffer[offset-1] + 1;
00372       size_t e = buffer[offset];
00373       counts_out.push_back( e - s + 1 );
00374       hint = offsets_out.insert( hint, s + nudge, e + nudge );
00375       
00376       ++fiter;
00377       ++offset;
00378     }
00379     
00380       // If we did not end on the boundary between two blocks,
00381       // then we need to save the end indices for the final entry
00382       // for use in the next iteration.  Similarly, if we ended
00383       // with extra values that were read with the express intention
00384       // of getting the previus end values for a block, we need to
00385       // save them.  This case only arises if we hit the break in
00386       // the above loop.
00387     if (fiter != fiter.start_of_block() || offset < count) {
00388       assert(!have_prev_end);
00389       if (offset == count) {
00390         --offset;
00391         assert(fiter != fiter.start_of_block());
00392       }
00393       else {
00394         assert(offset+1 == count);
00395         assert(fiter == fiter.start_of_block());
00396       }
00397       have_prev_end = true;
00398       prev_end = buffer[offset];
00399     }
00400   }
00401   assert(!have_prev_end);
00402   assert(fiter == file_ids.end());
00403   
00404   return MB_SUCCESS;
00405 }
00406 
00407 } // namespace moab
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines