Source code for darshan.tests.test_cffi_misc

# miscellaneous tests for the CFFI backend
# that are not specific to any particular
# mod

import re

import pytest
import numpy as np
from numpy.testing import assert_array_equal, assert_allclose
import darshan
import darshan.backend.cffi_backend as backend
from darshan.backend.cffi_backend import ffi, libdutil, _structdefs
from darshan.log_utils import get_log_path

[docs]def test_get_lib_version(): # check for a reasonable version string # returned by get_lib_version() actual_version = backend.get_lib_version() # must be a string assert isinstance(actual_version, str) # two periods in semantic version num assert actual_version.count('.') == 2 # stricter regular expression match on # the semantic version number prog = re.compile(r"^\d+\.\d+\.\d+(-.+)?$") match = prog.fullmatch(actual_version) assert match is not None assert match.group(0) == actual_version
[docs]@pytest.mark.parametrize( "log_path", [ # this log is the only one that returns a dataframe # with 'int' file hashes # NOTE: this case fails even before the fix # because the fix enforces the data type uint64 "sample.darshan", # these following 2 logs return dataframes with # 'float' file hashes "sample-goodost.darshan", "sample-dxt-simple.darshan", ], ) def test_file_hash_type(log_path): # regression test for issue #438 # see: https://github.com/darshan-hpc/darshan/issues/438 # check that a single record generated by `log_get_generic_record` # has the correct data type for the file hash/id log_path = get_log_path(log_path) log = backend.log_open(log_path) rec = backend.log_get_generic_record(log=log, mod_name="POSIX", dtype="pandas") # verify the records returned have the correct # data type for the ids/hashes assert rec["counters"]["id"].dtype == np.uint64 assert rec["fcounters"]["id"].dtype == np.uint64 # additionally check that the dataframes # generated are of the correct types with darshan.DarshanReport(log_path, read_all=True) as report: report.mod_read_all_records("POSIX", dtype="pandas") rec_counters = report.records["POSIX"][0]["counters"] rec_fcounters = report.records["POSIX"][0]["fcounters"] # verify the records returned have the correct # data type for the ids/hashes assert rec_counters["id"].dtype == np.uint64 assert rec_fcounters["id"].dtype == np.uint64
[docs]@pytest.mark.parametrize("dtype", ["numpy", "dict", "pandas"]) def test_log_get_generic_record(dtype): # regression test for issue #440 # see: https://github.com/darshan-hpc/darshan/issues/440 # collect the expected counter/fcounter column names expected_counter_names = backend.counter_names("POSIX") expected_fcounter_names = backend.fcounter_names("POSIX") # assign the expected counter/fcounter values expected_counter_vals = np.array( [ 2049, -1, -1, 0, 16402, 16404, 0, 0, 0, 0, -1, -1, 0, 0, 0, 2199023259968, 0, 2199023261831, 0, 0, 0, 16384, 0, 0, 8, 16401, 1048576, 0, 134217728, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 14, 0, 0, 0, 0, 0, 0, 16384, 0, 274743689216, 274743691264, 0, 0, 10240, 4096, 0, 0, 134217728, 272, 544, 328, 16384, 8, 2, 2, 597, 1073741824, 1312, 1073741824, ] ) expected_fcounter_vals = np.array( [ 3.9191410541534424, 0.0, 3.940063953399658, 3.927093982696533, 3.936579942703247, 0.0, 115.0781660079956, 115.77035808563232, 0.0, 100397.60042190552, 11.300841808319092, 0.0, 17.940945863723755, 20.436099529266357, 85.47495031356812, 0.0, 0.0, ] ) # generate a record from sample log log = backend.log_open(get_log_path("sample.darshan")) rec = backend.log_get_generic_record(log=log, mod_name="POSIX", dtype=dtype) # each record should have the following keys record_keys = ["id", "rank", "counters", "fcounters"] assert list(rec.keys()) == record_keys # check the file hash/id assert rec["id"] == 6301063301082038805 # check the rank assert rec["rank"] == -1 if dtype == "numpy": # check the length of the returned arrays are correct assert rec["counters"].size == 69 assert rec["fcounters"].size == 17 # collect the actual counter/fcounter values actual_counter_vals = rec["counters"] actual_fcounter_vals = rec["fcounters"] elif dtype == "dict": # check the length of the returned dictionaries are correct assert len(rec["counters"]) == 69 assert len(rec["fcounters"]) == 17 # collect the actual counter/fcounter key names actual_counter_names = list(rec["counters"].keys()) actual_fcounter_names = list(rec["fcounters"].keys()) # collect the actual counter/fcounter values actual_counter_vals = np.array(list(rec["counters"].values())) actual_fcounter_vals = np.array(list(rec["fcounters"].values())) elif dtype == "pandas": # make sure the added column keys are in the dataframes for key in ["id", "rank"]: assert key in rec["counters"].columns assert key in rec["fcounters"].columns # double check the id/rank values assert rec["counters"]["id"].values == 6301063301082038805 assert rec["counters"]["rank"].values == -1 # make sure the dataframes are the expected shapes # the shapes are 2 larger than the arrays since the id/rank # columns are added to the dataframes assert rec["counters"].shape == (1, 71) assert rec["fcounters"].shape == (1, 19) # collect the actual counter/fcounter key names # don't include the id/rank columns actual_counter_names = list(rec["counters"].columns)[2:] actual_fcounter_names = list(rec["fcounters"].columns)[2:] # collect the actual counter/fcounter values # don't include the id/rank columns actual_counter_vals = rec["counters"].values[0][2:] actual_fcounter_vals = rec["fcounters"].values[0][2:] # check the actual counter/fcounter values agree # with the expected counter/fcounter values assert_array_equal(actual_counter_vals, expected_counter_vals) assert_allclose(actual_fcounter_vals, expected_fcounter_vals) if dtype != "numpy": # make sure the returned key/column names agree assert actual_counter_names == expected_counter_names assert actual_fcounter_names == expected_fcounter_names
[docs]@pytest.mark.parametrize("log_name", [ "imbalanced-io.darshan", "e3sm_io_heatmap_only.darshan", ]) @pytest.mark.parametrize("module, index", [ ("POSIX", 0), ("POSIX", 3), ("POSIX", 5), ("MPI-IO", 0), ("MPI-IO", 2), # less records available for STDIO testing # with these logs ("STDIO", 0), ]) def test_df_to_rec(log_name, index, module): # test for packing a dataframe into a C-style record # this is perhaps nothing more than a "round-trip" test log_path = get_log_path(log_name) with darshan.DarshanReport(log_path, read_all=True) as report: report.mod_read_all_records(module, dtype="pandas") rec_dict = report.records[module][0] # id and rank are not formally included in the reconsituted # (f)counters "buffer" so truncate a bit on comparison expected_fcounters = rec_dict["fcounters"].iloc[index, 2:] expected_counters = rec_dict["counters"].iloc[index, 2:].astype(np.int64) expected_id = rec_dict["counters"].iloc[index, 0].astype(np.uint64) expected_rank = rec_dict["counters"].iloc[index, 1] # retrive the "re-packed"/actual record data: rbuf = backend._df_to_rec(rec_dict, module, index) rec_buf = ffi.from_buffer(_structdefs[module].replace("**", "*"), rbuf) actual_fcounters = np.frombuffer(ffi.buffer(rec_buf[0].fcounters)) actual_counters = np.frombuffer(ffi.buffer(rec_buf[0].counters), dtype=np.int64) actual_id = rec_buf[0].base_rec.id actual_rank = rec_buf[0].base_rec.rank assert_allclose(actual_fcounters, expected_fcounters) assert_allclose(actual_counters, expected_counters) assert actual_id == expected_id assert actual_rank == expected_rank
[docs]@pytest.mark.parametrize("python_filter, expected_counts", [ # whether to do an initial filtering # of the DataFrame in Python before # packing it back into C records (True, [18, 12, 2, 1]), (False, [1026, 12, 2, 1]) # see gh-867 ]) def test_reverse_record_array(python_filter, expected_counts): # pack pandas DataFrame objects back into # a contiguous buffer of several records # and then use the darshan-util C lib accumulator # on that record array, and compare the results # with those discussed in gh-867 from Perl report log_path = get_log_path("imbalanced-io.darshan") with darshan.DarshanReport(log_path, read_all=True) as report: nprocs = report.metadata['job']['nprocs'] modules = report.modules report.mod_read_all_records("POSIX", dtype="pandas") rec_dict = report.records["POSIX"][0] counters_df = rec_dict["counters"] fcounters_df = rec_dict["fcounters"] if python_filter: # gh-867 and the perl report filtered files that were # only stat'd rather than opened, so demo the same filtering # here at Python layer, then feed back to C accum stuff fcounters_df = fcounters_df[counters_df["POSIX_OPENS"] > 0] counters_df = counters_df[counters_df["POSIX_OPENS"] > 0] rec_dict["counters"] = counters_df rec_dict["fcounters"] = fcounters_df num_recs = rec_dict["fcounters"].shape[0] record_array = backend._df_to_rec(rec_dict, "POSIX") # need to deal with the low-level C stuff to set up # accumulator infrastructure to receive the repacked # records darshan_accumulator = ffi.new("darshan_accumulator *") r = libdutil.darshan_accumulator_create(modules["POSIX"]['idx'], nprocs, darshan_accumulator) assert r == 0 r_i = libdutil.darshan_accumulator_inject(darshan_accumulator[0], record_array, num_recs) assert r_i == 0 derived_metrics = ffi.new("struct darshan_derived_metrics *") summation_record = ffi.new(_structdefs["POSIX"].replace("**", "*")) r = libdutil.darshan_accumulator_emit(darshan_accumulator[0], derived_metrics, summation_record) assert r == 0 r = libdutil.darshan_accumulator_destroy(darshan_accumulator[0]) assert r == 0 # the indices into category_counters are pretty opaque.. we should just # move everything to Python "eventually"... (also to avoid all the junk above after filtering..) # 0 = total # 1 = RO # 2 = WO # 3 = R/W actual_total_files = derived_metrics.category_counters[0].count actual_ro_files = derived_metrics.category_counters[1].count actual_wo_files = derived_metrics.category_counters[2].count actual_rw_files = derived_metrics.category_counters[3].count assert_array_equal([actual_total_files, actual_ro_files, actual_wo_files, actual_rw_files], expected_counts)