moab
|
00001 00006 #include "ReadHDF5VarLen.hpp" 00007 #include "ReadHDF5Dataset.hpp" 00008 #include "H5Tpublic.h" 00009 #include <assert.h> 00010 00011 namespace moab { 00012 00013 bool ReadHDF5VarLen::is_ranged( EntityHandle file_id, 00014 Range::const_iterator& ranged_iter, 00015 Range::const_iterator range_end ) 00016 { 00017 if (ranged_iter == range_end) 00018 return false; 00019 00020 assert( file_id <= *ranged_iter ); 00021 if (*ranged_iter != file_id) 00022 return false; 00023 00024 ++ranged_iter; 00025 return true; 00026 } 00027 00028 ErrorCode ReadHDF5VarLen::read_data( 00029 ReadHDF5Dataset& data_set, 00030 const Range& offsets, 00031 EntityHandle start_offset, 00032 hid_t data_type, 00033 const Range& file_ids, 00034 const std::vector<unsigned>& vals_per_ent, 00035 const Range& ranged_file_ids ) 00036 { 00037 ErrorCode rval; 00038 const size_t value_size = H5Tget_size( data_type ); 00039 const size_t buffer_size = bufferSize / value_size; 00040 unsigned char* const data_buffer = reinterpret_cast<unsigned char*>(dataBuffer); 00041 std::vector<unsigned char> partial; // for when we read only part of the contents of a set/entity 00042 Range::const_iterator fileid_iter = file_ids.begin(); 00043 Range::const_iterator ranged_iter = ranged_file_ids.begin(); 00044 std::vector<unsigned>::const_iterator count_iter = vals_per_ent.begin(); 00045 size_t count, offset; 00046 bool ranged; 00047 int nn = 0; 00048 00049 assert( file_ids.size() == vals_per_ent.size() ); 00050 00051 try { 00052 data_set.set_file_ids( offsets, start_offset, buffer_size, data_type ); 00053 } 00054 catch (ReadHDF5Dataset::Exception ) { 00055 return MB_FAILURE; 00056 } 00057 00058 dbgOut.printf( 3, "Reading %s in %lu chunks\n", data_set.get_debug_desc(), data_set.get_read_count() ); 00059 00060 while (!data_set.done()) { 00061 dbgOut.printf( 3, "Reading chunk %d of %s\n", ++nn, data_set.get_debug_desc() ); 00062 try { 00063 data_set.read( data_buffer, count ); 00064 } 00065 catch (ReadHDF5Dataset::Exception ) { 00066 return MB_FAILURE; 00067 } 00068 00069 assert( 0 == count || fileid_iter != file_ids.end() ); 00070 00071 // Handle 'special' case where we read some, but not all 00072 // of the data for an entity during the last iteration. 00073 offset = 0; 00074 if (!partial.empty()) { // didn't read all of previous entity 00075 assert( fileid_iter != file_ids.end() ); 00076 assert( 0 == (partial.size() % value_size) ); 00077 size_t num_prev = partial.size() / value_size; 00078 offset = *count_iter - num_prev; 00079 if (offset > count) { // still don't have all 00080 partial.insert( partial.end(), data_buffer, data_buffer+count*value_size ); 00081 continue; 00082 } 00083 00084 partial.insert( partial.end(), data_buffer, data_buffer+offset*value_size ); 00085 00086 ranged = is_ranged( *fileid_iter, ranged_iter, ranged_file_ids.end() ); 00087 assert(partial.size() == *count_iter * value_size ); 00088 rval = store_data( *fileid_iter, &partial[0], *count_iter, ranged ); 00089 if (MB_SUCCESS != rval) 00090 return rval; 00091 00092 ++count_iter; 00093 ++fileid_iter; 00094 partial.clear(); 00095 } 00096 00097 // Process contents for all entities for which we 00098 // have read the complete list 00099 while (count_iter != vals_per_ent.end() && offset + *count_iter <= count) { 00100 assert( fileid_iter != file_ids.end() ); 00101 ranged = is_ranged( *fileid_iter, ranged_iter, ranged_file_ids.end() ); 00102 rval = store_data( *fileid_iter, data_buffer + offset*value_size, *count_iter, ranged ); 00103 if (MB_SUCCESS != rval) 00104 return rval; 00105 00106 offset += *count_iter; 00107 ++count_iter; 00108 ++fileid_iter; 00109 } 00110 00111 // If we did not read all of the final entity, 00112 // store what we did read to be processed in the 00113 // next iteration 00114 if (offset < count) { 00115 assert(partial.empty()); 00116 partial.insert( partial.end(), 00117 data_buffer + offset*value_size, 00118 data_buffer + count*value_size ); 00119 } 00120 } 00121 // NOTE: If the last set is empty, we will not process it here 00122 // assert(fileid_iter == file_ids.end()); 00123 #ifndef NDEBUG 00124 for (;fileid_iter != file_ids.end(); ++fileid_iter) 00125 assert(0 == *count_iter++); 00126 #endif 00127 return MB_SUCCESS; 00128 } 00129 /* 00130 ErrorCode ReadHDF5VarLen::read_offsets( ReadHDF5Dataset& data_set, 00131 const Range& file_ids, 00132 EntityHandle start_file_id, 00133 unsigned num_columns, 00134 const unsigned indices[], 00135 EntityHandle nudge, 00136 Range offsets_out[], 00137 std::vector<unsigned> counts_out[], 00138 Range* ranged_file_ids = 0 ) 00139 { 00140 const int local_index = 1; 00141 00142 // sanity check 00143 const unsigned max_cols = ranged_file_ids ? data_set.columns() - 1 : data_set.columns() 00144 for (unsigned i = 0; i < num_columns; ++i) { 00145 assert(indices[i] >= max_cols); 00146 if (indices[i] >= max_cols) 00147 return MB_FAILURE; 00148 } 00149 00150 // Use hints to make sure insertion into ranges is O(1) 00151 std::vector<Range::iterator> hints; 00152 if (ranged_file_ids) { 00153 hints.resize( num_colums + 1 ); 00154 hints.back() = ranged_file_ids->begin(); 00155 } 00156 else { 00157 hints.resize( num_columns ); 00158 } 00159 for (unsigned i = 0; i < num_columns; ++i) 00160 offsets_out[i].clear(); 00161 counts_out[i].clear(); 00162 counts_out[i].reserve( file_ids.size() ); 00163 hints[i] = offsets_out[i].begin(); 00164 } 00165 00166 // If we only need one colunm from a multi-column data set, 00167 // then read only that column. 00168 if (num_columns == 1 && data_set.columns() > 1 && !ranged_file_ids) { 00169 data_set.set_column( indices[0] ); 00170 indices = &local_index; 00171 } 00172 else if (ranged_file_ids && data_set.columns() > 1 && 0 == num_columns) { 00173 data_set.set_column( data_set.columns() - 1 ); 00174 } 00175 // NOTE: do not move this above the previous block. 00176 // The previous block changes the resutls of data_set.columns()! 00177 const size_t table_columns = data_set.columns(); 00178 00179 // Calculate which rows we need to read from the offsets table 00180 Range rows; 00181 Range::iterator hint = rows.begin(); 00182 Range::const_pair_iterator pair = file_ids.const_pair_begin(); 00183 // special case if reading first entity in dataset, because 00184 // there is no previous end value. 00185 if (pair != file_ids.const_pair_end() && pair->first == start_file_id) 00186 hint = rows.insert( nudge, pair->second - start_file_id + nudge ); 00187 while (pair != file_ids.const_pair_end()) { 00188 hint = rows.insert( hint, 00189 pair->first + nudge - 1 - start_file_id, 00190 pair->second + nudge - start_file_id ); 00191 ++pair; 00192 } 00193 00194 // set up read of offsets dataset 00195 hsize_t buffer_size = bufferSize / (sizeof(hssize_t) * data_set.columns()); 00196 hssize_t* buffer = reinterpret_cast<hssize_t*>(dataBuffer); 00197 data_set.set_file_ids( rows, nudge, buffer_size, H5T_NATIVE_HSSIZE ); 00198 std::vector<hssize_t> prev_end; 00199 // If we're reading the first row of the table, then the 00200 // previous end is implicitly -1. 00201 if (!file_ids.empty() && file_ids.front() == start_file_id) 00202 prev_end.resize(num_columns,-1); 00203 00204 // read offset table 00205 size_t count, offset; 00206 Range::const_iterator fiter = file_ids.begin(); 00207 while (!data_set.done()) { 00208 try { 00209 data_set.read( buffer, count ); 00210 } 00211 catch (ReadHDF5Dataset::Exception e) { 00212 return MB_FAILURE; 00213 } 00214 if (!count) // might have been NULL read for collectve IO 00215 continue; 00216 00217 // If the previous end values were read in the previous iteration, 00218 // then they're stored in prev_end. 00219 size_t offset = 0; 00220 if (!prev_end.empty()) { 00221 for (unsigned i = 0; i < num_columns; ++i) { 00222 counts_out[i].push_back( buffer[indices[i]] - prev_end[i] ); 00223 hints[i] = offsets_out[i].insert( hints[i], 00224 prev_end[i] + 1 + nudge, 00225 buffer[indices[i]] + nudge ); 00226 } 00227 if (ranged_file_ids && (buffer[table_columns-1] & mhdf_SET_RANGE_BIT)) 00228 hints.back() = ranged_file_ids->insert( hints.back(), *fiter ); 00229 ++fiter; 00230 offset = 1; 00231 prev_end.clear(); 00232 } 00233 00234 while (offset < count) { 00235 assert(fiter != file_ids.end()); 00236 // whenever we get to a gap between blocks we need to 00237 // advance one step because we read an extra end id 00238 // preceeding teah block 00239 if (fiter == fiter.start_of_block()) { 00240 if (offset == count-1) 00241 break; 00242 ++offset; 00243 } 00244 00245 for (unsigned i = 0; i < num_columns; ++i) { 00246 size_t s = buffer[(offset-1)*table_columns+indices[i]] + 1; 00247 size_t e = buffer[ offset *table_columns+indices[i]]; 00248 counts_out.push_back( e - s + 1 ); 00249 hints[i] = offsets_out.insert( hints[i], s, e ); 00250 } 00251 if (ranged_file_ids && (buffer[offset*table_columns+table_columns-1] & mhdf_SET_RANGE_BIT)) 00252 hints.back() = ranged_file_ids->insert( hints.back(), *fiter ); 00253 00254 ++fiter; 00255 ++offset; 00256 } 00257 00258 // If we did not end on the boundary between two blocks, 00259 // then we need to save the end indices for the final entry 00260 // for use in the next iteration. Similarly, if we ended 00261 // with extra values that were read with the express intention 00262 // of getting the previus end values for a block, we need to 00263 // save them. This case only arises if we hit the break in 00264 // the above loop. 00265 if (fiter != fiter.start_of_block() || offset < count) { 00266 assert(prev_end.empty()); 00267 if (offset == count) { 00268 --offset; 00269 assert(fiter != fiter.start_of_block()); 00270 } 00271 else { 00272 assert(offset+1 == count); 00273 assert(fiter == fiter.start_of_block()); 00274 } 00275 for (unsigned i = 0; i < num_columns; ++i) 00276 prev_end.push_back(buffer[offset*table_columns+indices[i]]); 00277 } 00278 } 00279 assert(prev_end.empty()); 00280 assert(fiter == file_ids.end()); 00281 00282 return MB_SUCCESS; 00283 } 00284 */ 00285 ErrorCode ReadHDF5VarLen::read_offsets( ReadHDF5Dataset& data_set, 00286 const Range& file_ids, 00287 EntityHandle start_file_id, 00288 EntityHandle nudge, 00289 Range& offsets_out, 00290 std::vector<unsigned>& counts_out ) 00291 { 00292 00293 // Use hints to make sure insertion into ranges is O(1) 00294 offsets_out.clear(); 00295 counts_out.clear(); 00296 counts_out.reserve( file_ids.size() ); 00297 Range::iterator hint; 00298 00299 // Calculate which rows we need to read from the offsets table 00300 Range rows; 00301 hint = rows.begin(); 00302 Range::const_pair_iterator pair = file_ids.const_pair_begin(); 00303 // special case if reading first entity in dataset, because 00304 // there is no previous end value. 00305 if (pair != file_ids.const_pair_end() && pair->first == start_file_id) { 00306 hint = rows.insert( nudge, pair->second - start_file_id + nudge ); 00307 ++pair; 00308 } 00309 while (pair != file_ids.const_pair_end()) { 00310 hint = rows.insert( hint, 00311 pair->first - start_file_id + nudge - 1, 00312 pair->second - start_file_id + nudge ); 00313 ++pair; 00314 } 00315 00316 // set up read of offsets dataset 00317 hsize_t buffer_size = bufferSize / sizeof(hssize_t); 00318 hssize_t* buffer = reinterpret_cast<hssize_t*>(dataBuffer); 00319 data_set.set_file_ids( rows, nudge, buffer_size, H5T_NATIVE_HSSIZE ); 00320 hssize_t prev_end; 00321 bool have_prev_end = false; 00322 // If we're reading the first row of the table, then the 00323 // previous end is implicitly -1. 00324 if (!file_ids.empty() && file_ids.front() == start_file_id) { 00325 prev_end = -1; 00326 have_prev_end = true; 00327 } 00328 00329 dbgOut.printf( 3, "Reading %s in %lu chunks\n", data_set.get_debug_desc(), data_set.get_read_count() ); 00330 00331 // read offset table 00332 size_t count, offset; 00333 Range::const_iterator fiter = file_ids.begin(); 00334 hint = offsets_out.begin(); 00335 int nn = 0; 00336 while (!data_set.done()) { 00337 dbgOut.printf( 3, "Reading chunk %d of %s\n", ++nn, data_set.get_debug_desc() ); 00338 try { 00339 data_set.read( buffer, count ); 00340 } 00341 catch (ReadHDF5Dataset::Exception ) { 00342 return MB_FAILURE; 00343 } 00344 if (!count) // might have been NULL read for collectve IO 00345 continue; 00346 00347 // If the previous end values were read in the previous iteration, 00348 // then they're stored in prev_end. 00349 offset = 0; 00350 if (have_prev_end) { 00351 counts_out.push_back( buffer[0] - prev_end ); 00352 hint = offsets_out.insert( hint, 00353 prev_end + 1 + nudge, 00354 buffer[0] + nudge ); 00355 ++fiter; 00356 offset = 1; 00357 have_prev_end = false; 00358 } 00359 00360 while (offset < count) { 00361 assert(fiter != file_ids.end()); 00362 // whenever we get to a gap between blocks we need to 00363 // advance one step because we read an extra end id 00364 // preceeding teah block 00365 if (fiter == fiter.start_of_block()) { 00366 if (offset == count-1) 00367 break; 00368 ++offset; 00369 } 00370 00371 size_t s = buffer[offset-1] + 1; 00372 size_t e = buffer[offset]; 00373 counts_out.push_back( e - s + 1 ); 00374 hint = offsets_out.insert( hint, s + nudge, e + nudge ); 00375 00376 ++fiter; 00377 ++offset; 00378 } 00379 00380 // If we did not end on the boundary between two blocks, 00381 // then we need to save the end indices for the final entry 00382 // for use in the next iteration. Similarly, if we ended 00383 // with extra values that were read with the express intention 00384 // of getting the previus end values for a block, we need to 00385 // save them. This case only arises if we hit the break in 00386 // the above loop. 00387 if (fiter != fiter.start_of_block() || offset < count) { 00388 assert(!have_prev_end); 00389 if (offset == count) { 00390 --offset; 00391 assert(fiter != fiter.start_of_block()); 00392 } 00393 else { 00394 assert(offset+1 == count); 00395 assert(fiter == fiter.start_of_block()); 00396 } 00397 have_prev_end = true; 00398 prev_end = buffer[offset]; 00399 } 00400 } 00401 assert(!have_prev_end); 00402 assert(fiter == file_ids.end()); 00403 00404 return MB_SUCCESS; 00405 } 00406 00407 } // namespace moab