moab
|
Utility used for reading portions of an HDF5 dataset. More...
#include <ReadHDF5Dataset.hpp>
Classes | |
class | Exception |
Public Types | |
typedef int | Comm |
Public Member Functions | |
ReadHDF5Dataset (const char *debug_desc, hid_t data_set_handle, bool parallel, const Comm *communicator=0, bool close_data_set_on_destruct=true) | |
Setup to read entire table. | |
ReadHDF5Dataset (const char *debug_desc, bool parallel, const Comm *communicator=0) | |
void | init (hid_t data_set_handle, bool close_data_set_on_destruct=true) |
bool | will_close_data_set () const |
void | close_data_set_on_destruct (bool val) |
~ReadHDF5Dataset () | |
void | set_file_ids (const Range &file_ids, EntityHandle start_id, hsize_t row_cout, hid_t data_type) |
Change file ids to read from. | |
void | set_all_file_ids (hsize_t row_count, hid_t data_type) |
Read all values in dataset (undo set_file_ids) | |
bool | done () const |
Return false if more data to read, true otherwise. | |
void | read (void *buffer, size_t &rows_read) |
Read rows of table. | |
Range::const_iterator | next_file_id () const |
Return position in Range of file IDs at which next read will start. | |
void | null_read () |
Do null read operation. | |
unsigned | columns () const |
void | set_column (unsigned c) |
unsigned long | get_read_count () const |
const char * | get_debug_desc () const |
Static Public Member Functions | |
static void | set_hyperslab_selection_limit (size_t val) |
static void | default_hyperslab_selection_limit () |
static void | append_hyperslabs () |
static void | or_hyperslabs () |
Private Member Functions | |
Range::const_iterator | next_end (Range::const_iterator iter) |
Private Attributes | |
Range | internalRange |
used when reading entire dataset | |
bool | closeDataSet |
close dataset in destructor | |
hsize_t | dataSetOffset [64] |
hsize_t | dataSetCount [64] |
hid_t | dataSet |
Handle for HDF5 data set. | |
hid_t | dataSpace |
Data space for data set. | |
hid_t | dataType |
Data type client code wants for data. | |
hid_t | fileType |
Data type as stored in data set. | |
hid_t | ioProp |
Used to specify collective IO. | |
int | dataSpaceRank |
Rank of data set. | |
hsize_t | rowsInTable |
Total number of rows in dataset. | |
bool | doConversion |
True if dataType != fileType. | |
bool | nativeParallel |
If true then reading different data on different procs. | |
hsize_t | readCount |
Number of actual reads to do. | |
hsize_t | bufferSize |
size of buffer passed to read , in number of rows | |
const Comm * | mpiComm |
Range::const_iterator | currOffset |
Range::const_iterator | rangeEnd |
EntityHandle | startID |
std::string | mpeDesc |
Static Private Attributes | |
static bool | haveMPEEvents = false |
static std::pair< int, int > | mpeReadEvent |
static std::pair< int, int > | mpeReduceEvent |
static size_t | hyperslabSelectionLimit = DEFAULT_HYPERSLAB_SELECTION_LIMIT |
static H5S_seloper_t | hyperslabSelectOp = H5S_SELECT_OR |
Utility used for reading portions of an HDF5 dataset.
Implement iterative read of table where:
NOTE: This class also implements an RAII pattern for the data set handle: It will close the data set in its destructor unless it is specified to the constructor that only a single column should be read.
NOTE: This class will always do collective IO for parallel reads.
Definition at line 36 of file ReadHDF5Dataset.hpp.
typedef int moab::ReadHDF5Dataset::Comm |
Definition at line 43 of file ReadHDF5Dataset.hpp.
moab::ReadHDF5Dataset::ReadHDF5Dataset | ( | const char * | debug_desc, |
hid_t | data_set_handle, | ||
bool | parallel, | ||
const Comm * | communicator = 0 , |
||
bool | close_data_set_on_destruct = true |
||
) |
Setup to read entire table.
data_set_handle | The HDF5 DataSet to read. |
parallel | Doing true partial-read parallel read (as opposed to read and delete where collective IO is done for everything because all procs read the same stuff.) |
communictor | If parallel is true and io_prop is H5FD_MPIO_COLLECTIVE , then this must be a pointer to the MPI_Communicator value. |
close_data_set_on_destruct | Call H5Dclose on passed data_set_handle in desturctor. |
If parallel
is true
and io_prop
is H5FD_MPIO_COLLECTIVE
, then not only must communicator
be non-null, but this call must be made collectively!
Class instance will not be usable until one of either set_file_ids
or set_all_file_ids
is called.
Definition at line 95 of file ReadHDF5Dataset.cpp.
: closeDataSet(close_data_set), dataSet( data_set_handle ), dataSpace(-1), dataType( -1 ), fileType(-1), ioProp(H5P_DEFAULT), dataSpaceRank(0), rowsInTable(0), doConversion(false), nativeParallel(parallel), readCount(0), bufferSize(0), mpiComm(communicator), mpeDesc( debug_desc ) { if (!haveMPEEvents) { haveMPEEvents = true; mpeReadEvent = allocate_mpe_state( "ReadHDF5Dataset::read", "yellow" ); mpeReduceEvent = allocate_mpe_state( "ReadHDF5Dataset::all_reduce", "yellow" ); } init( data_set_handle, close_data_set ); #ifndef HDF5_PARALLEL if (nativeParallel) throw Exception(__LINE__); #else if (nativeParallel && !mpiComm) throw Exception(__LINE__); if (mpiComm) { ioProp = H5Pcreate(H5P_DATASET_XFER); H5Pset_dxpl_mpio(ioProp, H5FD_MPIO_COLLECTIVE); } #endif }
moab::ReadHDF5Dataset::ReadHDF5Dataset | ( | const char * | debug_desc, |
bool | parallel, | ||
const Comm * | communicator = 0 |
||
) |
Definition at line 57 of file ReadHDF5Dataset.cpp.
: closeDataSet(false), dataSet( -1 ), dataSpace( -1 ), dataType( -1 ), fileType(-1), ioProp(H5P_DEFAULT), dataSpaceRank(0), rowsInTable(0), doConversion(false), nativeParallel(parallel), readCount(0), bufferSize(0), mpiComm(communicator), mpeDesc( debug_desc ) { if (!haveMPEEvents) { haveMPEEvents = true; mpeReadEvent = allocate_mpe_state( "ReadHDF5Dataset::read", "yellow" ); mpeReduceEvent = allocate_mpe_state( "ReadHDF5Dataset::all_reduce", "yellow" ); } #ifndef HDF5_PARALLEL if (nativeParallel) throw Exception(__LINE__); #else if (nativeParallel && !mpiComm) throw Exception(__LINE__); if (mpiComm) { ioProp = H5Pcreate(H5P_DATASET_XFER); H5Pset_dxpl_mpio(ioProp, H5FD_MPIO_COLLECTIVE); } #endif }
static void moab::ReadHDF5Dataset::append_hyperslabs | ( | ) | [inline, static] |
Use non-standard 'APPEND' operation for hyperslab selection
Definition at line 146 of file ReadHDF5Dataset.hpp.
{ hyperslabSelectOp = H5S_SELECT_APPEND; }
void moab::ReadHDF5Dataset::close_data_set_on_destruct | ( | bool | val | ) | [inline] |
Definition at line 78 of file ReadHDF5Dataset.hpp.
{ closeDataSet = val; }
unsigned moab::ReadHDF5Dataset::columns | ( | ) | const |
Definition at line 161 of file ReadHDF5Dataset.cpp.
{ if (dataSpaceRank == 1) return 1; else if (dataSpaceRank == 2) return dataSetCount[1]; throw Exception(__LINE__); }
void moab::ReadHDF5Dataset::default_hyperslab_selection_limit | ( | ) | [static] |
Definition at line 32 of file ReadHDF5Dataset.cpp.
bool moab::ReadHDF5Dataset::done | ( | ) | const [inline] |
Return false if more data to read, true otherwise.
Test if the iterative read has reached the end.
Definition at line 110 of file ReadHDF5Dataset.hpp.
{ return (currOffset == rangeEnd) && (readCount == 0); }
const char* moab::ReadHDF5Dataset::get_debug_desc | ( | ) | const [inline] |
Definition at line 139 of file ReadHDF5Dataset.hpp.
{ return mpeDesc.c_str(); }
unsigned long moab::ReadHDF5Dataset::get_read_count | ( | ) | const [inline] |
Definition at line 138 of file ReadHDF5Dataset.hpp.
{ return readCount; }
void moab::ReadHDF5Dataset::init | ( | hid_t | data_set_handle, |
bool | close_data_set_on_destruct = true |
||
) |
Definition at line 137 of file ReadHDF5Dataset.cpp.
{ closeDataSet = close_data_set; dataSet = data_set_handle; fileType = H5Dget_type( data_set_handle ); if (fileType < 0) throw Exception(__LINE__); dataSpace = H5Dget_space( dataSet ); if (dataSpace < 0) throw Exception(__LINE__); dataSpaceRank = H5Sget_simple_extent_dims( dataSpace, dataSetCount, dataSetOffset ); if (dataSpaceRank < 0) throw Exception(__LINE__); rowsInTable = dataSetCount[0]; for (int i = 0; i < dataSpaceRank; ++i) dataSetOffset[i] = 0; currOffset = rangeEnd = internalRange.end(); }
Range::const_iterator moab::ReadHDF5Dataset::next_end | ( | Range::const_iterator | iter | ) | [private] |
Definition at line 179 of file ReadHDF5Dataset.cpp.
{ size_t slabs_remaining = hyperslabSelectionLimit; size_t avail = bufferSize; while (iter != rangeEnd && slabs_remaining) { size_t count = *(iter.end_of_block()) - *iter + 1; if (count >= avail) { iter += avail; break; } avail -= count; iter += count; --slabs_remaining; } return iter; }
Range::const_iterator moab::ReadHDF5Dataset::next_file_id | ( | ) | const [inline] |
Return position in Range
of file IDs at which next read will start.
Definition at line 123 of file ReadHDF5Dataset.hpp.
{ return currOffset; }
void moab::ReadHDF5Dataset::null_read | ( | ) |
Do null read operation.
Do a read call requesting no data. This functionality is provided so as to allow collective IO when not all processes need to make the same number of read calls. To prevent deadlock in this case, processes that have finished their necessary read calls can call this function so that all processes are calling the read method collectively.
Definition at line 328 of file ReadHDF5Dataset.cpp.
{ herr_t err; err = H5Sselect_none( dataSpace ); if (err < 0) throw Exception(__LINE__); //#if HDF5_16API hsize_t one = 1; hid_t mem_id = H5Screate_simple( 1, &one, NULL ); if (mem_id < 0) throw Exception(__LINE__); err = H5Sselect_none( mem_id ); if (err < 0) { H5Sclose(mem_id); throw Exception(__LINE__); } //#else // hid_t mem_id = H5Screate(H5S_NULL); // if (mem_id < 0) // throw Exception(__LINE__); //#endif err = H5Dread( dataSet, fileType, mem_id, dataSpace, ioProp, 0 ); H5Sclose( mem_id ); if (err < 0) throw Exception(__LINE__); }
static void moab::ReadHDF5Dataset::or_hyperslabs | ( | ) | [inline, static] |
Revert to default select behavior for standard HDF5 library
Definition at line 148 of file ReadHDF5Dataset.hpp.
{ hyperslabSelectOp = H5S_SELECT_OR; }
void moab::ReadHDF5Dataset::read | ( | void * | buffer, |
size_t & | rows_read | ||
) |
Read rows of table.
Read up to max_num_rows from data set.
buffer | Memory in which to store values read from data set |
rows_read | The actual number of rows read from the table. Will never exceed max_rows . |
Definition at line 270 of file ReadHDF5Dataset.cpp.
{ herr_t err; rows_read = 0; MPE_Log_event(mpeReadEvent.first, (int)readCount, mpeDesc.c_str()); if (currOffset != rangeEnd) { // Build H5S hyperslab selection describing the portions of the // data set to read H5S_seloper_t sop = H5S_SELECT_SET; Range::iterator new_end = next_end( currOffset ); while (currOffset != new_end) { size_t count = *(currOffset.end_of_block()) - *currOffset + 1; if (new_end != rangeEnd && *currOffset + count > *new_end) { count = *new_end - *currOffset; } rows_read += count; dataSetOffset[0] = *currOffset - startID; dataSetCount[0] = count; err = H5Sselect_hyperslab( dataSpace, sop, dataSetOffset, NULL, dataSetCount, 0 ); if (err < 0) throw Exception(__LINE__); sop = hyperslabSelectOp; // subsequent calls to select_hyperslab append currOffset += count; } // Create a data space describing the memory in which to read the data dataSetCount[0] = rows_read; hid_t mem_id = H5Screate_simple( dataSpaceRank, dataSetCount, NULL ); if (mem_id < 0) throw Exception(__LINE__); // Do the actual read err = H5Dread( dataSet, fileType, mem_id, dataSpace, ioProp, buffer ); H5Sclose( mem_id ); if (err < 0) throw Exception(__LINE__); if (readCount) --readCount; if (doConversion) { err = H5Tconvert( fileType, dataType, rows_read*columns(), buffer, 0, H5P_DEFAULT); if (err < 0) throw Exception(__LINE__); } } else if (readCount) { null_read(); --readCount; } MPE_Log_event(mpeReadEvent.second, (int)readCount, mpeDesc.c_str()); }
void moab::ReadHDF5Dataset::set_all_file_ids | ( | hsize_t | row_count, |
hid_t | data_type | ||
) |
Read all values in dataset (undo set_file_ids)
row_count | Read buffer size in number of table rows. |
data_type | The data type of the buffer into which table values are to be read. |
Definition at line 250 of file ReadHDF5Dataset.cpp.
{ internalRange.clear(); internalRange.insert( (EntityHandle)1, (EntityHandle)(rowsInTable) ); set_file_ids( internalRange, 1, row_count, data_type ); }
void moab::ReadHDF5Dataset::set_column | ( | unsigned | c | ) |
Definition at line 171 of file ReadHDF5Dataset.cpp.
{ if (dataSpaceRank != 2 || column >= dataSetCount[1]) throw Exception(__LINE__); dataSetCount[1] = 1; dataSetOffset[1] = column; }
void moab::ReadHDF5Dataset::set_file_ids | ( | const Range & | file_ids, |
EntityHandle | start_id, | ||
hsize_t | row_cout, | ||
hid_t | data_type | ||
) |
Change file ids to read from.
file_ids | List of rows to read from dataset |
start_id | Rows of dataset are enumerating beginning with this value. Thus the offset row to be read from dataset will be file_ids.begin() - start_id . |
row_count | Read buffer size in number of table rows. |
data_type | The data type of the buffer into which table values are to be read. |
Definition at line 198 of file ReadHDF5Dataset.cpp.
{ startID = start_id; currOffset = file_ids.begin(); rangeEnd = file_ids.end(); readCount = 0; bufferSize = row_count; // if a) user specified buffer size and b) we're doing a true // parallel partial read and c) we're doing collective I/O, then // we need to know the maximum number of reads that will be done. #ifdef HDF5_PARALLEL if (nativeParallel) { Range::const_iterator iter = currOffset; while (iter != rangeEnd) { ++readCount; iter = next_end( iter ); } MPE_Log_event(mpeReduceEvent.first, (int)readCount, mpeDesc.c_str()); unsigned long recv = readCount, send = readCount; MPI_Allreduce( &send, &recv, 1, MPI_UNSIGNED_LONG, MPI_MAX, *mpiComm ); readCount = recv; MPE_Log_event(mpeReduceEvent.second, (int)readCount, mpeDesc.c_str()); } #endif dataType = data_type; htri_t equal = H5Tequal( fileType, dataType ); if (equal < 0) throw Exception(__LINE__); doConversion = !equal; // We always read in the format of the file to avoid stupind HDF5 // library behavior when reading in parallel. We call H5Tconvert // ourselves to do the data conversion. If the type we're reading // from the file is larger than the type we want in memory, then // we need to reduce num_rows so that we can read the larger type // from the file into the passed buffer mean to accomodate num_rows // of values of the smaller in-memory type. if (doConversion) { size_t mem_size, file_size; mem_size = H5Tget_size( dataType ); file_size = H5Tget_size( fileType ); if (file_size > mem_size) bufferSize = bufferSize * mem_size / file_size; } }
static void moab::ReadHDF5Dataset::set_hyperslab_selection_limit | ( | size_t | val | ) | [inline, static] |
Definition at line 141 of file ReadHDF5Dataset.hpp.
{ hyperslabSelectionLimit = val; }
bool moab::ReadHDF5Dataset::will_close_data_set | ( | ) | const [inline] |
Definition at line 77 of file ReadHDF5Dataset.hpp.
{ return closeDataSet; }
hsize_t moab::ReadHDF5Dataset::bufferSize [private] |
size of buffer passed to read
, in number of rows
Definition at line 168 of file ReadHDF5Dataset.hpp.
bool moab::ReadHDF5Dataset::closeDataSet [private] |
close dataset in destructor
Definition at line 155 of file ReadHDF5Dataset.hpp.
Definition at line 171 of file ReadHDF5Dataset.hpp.
hid_t moab::ReadHDF5Dataset::dataSet [private] |
Handle for HDF5 data set.
Definition at line 157 of file ReadHDF5Dataset.hpp.
hsize_t moab::ReadHDF5Dataset::dataSetCount[64] [private] |
Definition at line 156 of file ReadHDF5Dataset.hpp.
hsize_t moab::ReadHDF5Dataset::dataSetOffset[64] [private] |
Definition at line 156 of file ReadHDF5Dataset.hpp.
hid_t moab::ReadHDF5Dataset::dataSpace [private] |
Data space for data set.
Definition at line 158 of file ReadHDF5Dataset.hpp.
int moab::ReadHDF5Dataset::dataSpaceRank [private] |
Rank of data set.
Definition at line 162 of file ReadHDF5Dataset.hpp.
hid_t moab::ReadHDF5Dataset::dataType [private] |
Data type client code wants for data.
Definition at line 159 of file ReadHDF5Dataset.hpp.
bool moab::ReadHDF5Dataset::doConversion [private] |
True if dataType != fileType.
Definition at line 164 of file ReadHDF5Dataset.hpp.
hid_t moab::ReadHDF5Dataset::fileType [private] |
Data type as stored in data set.
Definition at line 160 of file ReadHDF5Dataset.hpp.
bool moab::ReadHDF5Dataset::haveMPEEvents = false [static, private] |
Definition at line 174 of file ReadHDF5Dataset.hpp.
size_t moab::ReadHDF5Dataset::hyperslabSelectionLimit = DEFAULT_HYPERSLAB_SELECTION_LIMIT [static, private] |
Definition at line 179 of file ReadHDF5Dataset.hpp.
H5S_seloper_t moab::ReadHDF5Dataset::hyperslabSelectOp = H5S_SELECT_OR [static, private] |
Definition at line 180 of file ReadHDF5Dataset.hpp.
Range moab::ReadHDF5Dataset::internalRange [private] |
used when reading entire dataset
Definition at line 153 of file ReadHDF5Dataset.hpp.
hid_t moab::ReadHDF5Dataset::ioProp [private] |
Used to specify collective IO.
Definition at line 161 of file ReadHDF5Dataset.hpp.
std::string moab::ReadHDF5Dataset::mpeDesc [private] |
Definition at line 177 of file ReadHDF5Dataset.hpp.
std::pair< int, int > moab::ReadHDF5Dataset::mpeReadEvent [static, private] |
Definition at line 175 of file ReadHDF5Dataset.hpp.
std::pair< int, int > moab::ReadHDF5Dataset::mpeReduceEvent [static, private] |
Definition at line 176 of file ReadHDF5Dataset.hpp.
const Comm* moab::ReadHDF5Dataset::mpiComm [private] |
Definition at line 169 of file ReadHDF5Dataset.hpp.
bool moab::ReadHDF5Dataset::nativeParallel [private] |
If true then reading different data on different procs.
Definition at line 165 of file ReadHDF5Dataset.hpp.
Definition at line 171 of file ReadHDF5Dataset.hpp.
hsize_t moab::ReadHDF5Dataset::readCount [private] |
Number of actual reads to do.
Definition at line 167 of file ReadHDF5Dataset.hpp.
hsize_t moab::ReadHDF5Dataset::rowsInTable [private] |
Total number of rows in dataset.
Definition at line 163 of file ReadHDF5Dataset.hpp.
EntityHandle moab::ReadHDF5Dataset::startID [private] |
Definition at line 172 of file ReadHDF5Dataset.hpp.