Source code for darshan.tests.test_plot_common_access_table

import pytest

import pandas as pd
from pandas.testing import assert_frame_equal

import darshan
from darshan.experimental.plots import plot_common_access_table
from darshan.log_utils import get_log_path


[docs]@pytest.mark.parametrize("filename, mod, expected_df",
    [
        (
            "ior_hdf5_example.darshan",
            "POSIX",
            # values from the old report (Perl) code
            pd.DataFrame([[262144, 32], [512, 9], [544, 5], [328, 3]]),
        ),
        (
            "ior_hdf5_example.darshan",
            "MPI-IO",
            # values from the old report (Perl) code
            pd.DataFrame([[262144, 32], [512, 9], [544, 5], [328, 3]]),
        ),
        (
            "ior_hdf5_example.darshan",
            "H5D",
            pd.DataFrame([[262144, 24]]),
        ),
        (
            "nonmpi_dxt_anonymized.darshan",
            "POSIX",
            # values from the old report (Perl) code
            pd.DataFrame([[1024, 7692], [32, 276], [100, 269], [92, 265]]),
        ),
        (
            "imbalanced-io.darshan",
            "POSIX",
            # values from the old report (Perl) code
            pd.DataFrame([[1048576, 100968], [8162, 14664], [512, 1003], [328, 545]]),
        ),
        (
            "imbalanced-io.darshan",
            "MPI-IO",
            # values from the old report (Perl) code
            pd.DataFrame([[241664, 71400], [294912, 18806], [483328, 4862], [512, 1003]]),
        ),
        (
            # "ground truth" log where 10 ranks wrote 1 byte each
            "hdf5_diagonal_write_1_byte_dxt.darshan",
            "H5D",
            pd.DataFrame([[1, 10]]),
        ),
        (
            # "ground truth" log where 10 ranks wrote `10 * (1 + rank)` bytes
            # each (e.g. 10 bytes for rank 0, 20 bytes for rank 1, etc.)
            "hdf5_diagonal_write_bytes_range_dxt.darshan",
            "H5D",
            pd.DataFrame(
                [[10, 1], [20, 1], [30, 1], [40, 1], [50, 1],
                [60, 1], [70, 1], [80, 1], [90, 1], [100, 1]],
            ),
        ),
        (
            # "ground truth" log where 10 ranks wrote 1 byte each and
            # 5 of the ranks called the `flush` operation. Results
            # should be the same as `hdf5_diagonal_write_1_byte_dxt.darshan`
            "hdf5_diagonal_write_half_flush_dxt.darshan",
            "H5D",
            pd.DataFrame([[1, 10]]),
        ),
        (
            # "ground truth" log where 5 ranks wrote 1 byte each
            "hdf5_diagonal_write_half_ranks_dxt.darshan",
            "H5D",
            pd.DataFrame([[1, 5]]),
        ),
    ],
)
def test_common_access_table(filename, mod, expected_df):
    log_path = get_log_path(filename=filename)
    expected_df.columns = ["Access Size", "Count"]
    with darshan.DarshanReport(log_path) as report:
        # collect the number of rows from the expected dataframe
        n_rows = expected_df.shape[0]
        actual_df = plot_common_access_table.plot_common_access_table(report=report, mod=mod, n_rows=n_rows).df
    assert_frame_equal(actual_df, expected_df)


[docs]@pytest.mark.parametrize("func, input_df, expected_df",
    [
        # based on `ior_hdf5_example.darshan` `H5D` module data
        (
            plot_common_access_table.remove_nonzero_rows,
            pd.DataFrame([[262144, 8], [262144, 8], [262144, 8], [0, 0], [0, 0]]),
            pd.DataFrame([[262144, 8], [262144, 8], [262144, 8]]),
        ),
        # check that single zeros in either column remain
        (
            plot_common_access_table.remove_nonzero_rows,
            pd.DataFrame([[262144, 8], [262144, 8], [262144, 8], [1, 0], [0, 1]]),
            pd.DataFrame([[262144, 8], [262144, 8], [262144, 8], [1, 0], [0, 1]]),
        ),
        # based on `ior_hdf5_example.darshan` `H5D` module data
        (
            plot_common_access_table.combine_access_sizes,
            pd.DataFrame([[262144, 8], [262144, 8], [262144, 8]]),
            pd.DataFrame([[262144, 24]]),
        ),
        # synthetic case with multiple identical access sizes
        (
            plot_common_access_table.combine_access_sizes,
            pd.DataFrame([[10, 1], [10, 2], [20, 1], [20, 2], [20, 3]]),
            pd.DataFrame([[10, 3], [20, 6]]),
        ),
        # based on `ior_hdf5_example.darshan` `POSIX` module data
        (
            plot_common_access_table.get_most_common_access_sizes,
            pd.DataFrame([[544, 5], [512, 9], [262144, 32], [328, 3]]),
            pd.DataFrame([[262144, 32], [512, 9], [544, 5], [328, 3]]),
        ),
        # synthetic case with > 4 access sizes
        (
            plot_common_access_table.get_most_common_access_sizes,
            pd.DataFrame([[1, 1], [2, 10], [3, 4], [4, 9], [5, 5], [6, 2], [7, 3]]),
            pd.DataFrame([[2, 10], [4, 9], [5, 5], [3, 4]]),
        ),
        # case where there are < 4 access sizes, based on
        # `ior_hdf5_example.darshan` `H5D` module data
        (
            plot_common_access_table.get_most_common_access_sizes,
            pd.DataFrame([[262144, 24]]),
            pd.DataFrame([[262144, 24]]),
        ),
    ]
)
def test_misc_funcs(func, input_df, expected_df):
    # tests functions that make slight modifications to dataframes
    input_df.columns = ["Access Size", "Count"]
    expected_df.columns = ["Access Size", "Count"]
    actual_df = func(df=input_df)
    assert_frame_equal(actual_df, expected_df)


[docs]@pytest.mark.parametrize("input_df, col_name, expected_df",
    [
        # based on `ior_hdf5_example.darshan` `POSIX` module data
        (
            pd.DataFrame(
                data=[[262144, 512, 544, 328]],
                columns=[
                    "POSIX_ACCESS1_ACCESS", "POSIX_ACCESS2_ACCESS",
                    "POSIX_ACCESS3_ACCESS", "POSIX_ACCESS4_ACCESS",
                ],
            ),
            "Access Size",
            pd.DataFrame(
                data=[[262144], [512], [544], [328]],
                columns=["Access Size"],
            ),
        ),
        # based on `ior_hdf5_example.darshan` `POSIX` module data
        (
            pd.DataFrame(
                data=[[32, 9, 5, 3]],
                columns=[
                    "POSIX_ACCESS1_COUNT", "POSIX_ACCESS2_COUNT",
                    "POSIX_ACCESS3_COUNT", "POSIX_ACCESS4_COUNT",
                ],
            ),
            "Count",
            pd.DataFrame(
                data=[[32], [9], [5], [3]],
                columns=["Count"],
            ),
        ),
        # synthetic case to test multiple rows and columns
        (
            pd.DataFrame(
                data=[[1, 4, 7, 10], [2, 5, 8, 11], [3, 6, 9, 12]],
                columns=["col1", "col2", "col3", "col4"],
            ),
            "TEST",
            pd.DataFrame(
                data=[[i] for i in range(1, 13)],
                columns=["TEST"],
            ),
        ),
    ]
)
def test_collapse_access_cols(input_df, col_name, expected_df):
    actual_df = plot_common_access_table.collapse_access_cols(df=input_df, col_name=col_name)
    assert_frame_equal(actual_df, expected_df)


[docs]@pytest.mark.parametrize("mod_df, mod, expected_df",
    [
        (
            pd.DataFrame(
                data=[
                    [17, 178, 356, 890, 0, 35, 15, 1, 0],
                    [17, 192, 13266432, 13284480, 96, 7688, 3, 2, 2],
                    [17, 128, 8192, 16384, 0, 50, 15, 1, 0],
                ],
                columns=[
                    "POSIX_OPENS",
                    "POSIX_ACCESS1_ACCESS", "POSIX_ACCESS2_ACCESS",
                    "POSIX_ACCESS3_ACCESS", "POSIX_ACCESS4_ACCESS",
                    "POSIX_ACCESS1_COUNT", "POSIX_ACCESS2_COUNT",
                    "POSIX_ACCESS3_COUNT", "POSIX_ACCESS4_COUNT",
                ],
            ),
            "POSIX",
            pd.DataFrame(
                data=[
                    [178, 35],
                    [192, 7688],
                    [128, 50],
                    [356, 15],
                    [13266432, 3],
                    [8192, 15],
                    [890, 1],
                    [13284480, 2],
                    [16384, 1],
                    [0, 0],
                    [96, 2],
                    [0, 0],
                ],
                columns=["Access Size", "Count"],
            ),
        ),
    ]
)
def test_get_access_count_df(mod_df, mod, expected_df):
    actual_df = plot_common_access_table.get_access_count_df(mod_df=mod_df, mod=mod)
    assert_frame_equal(actual_df, expected_df)