Source code for darshan.tests.test_plot_dxt_heatmap

import pytest
import numpy as np
from numpy.testing import assert_array_equal, assert_allclose
import pandas as pd
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
from packaging import version

import darshan
from darshan.experimental.plots import heatmap_handling, plot_dxt_heatmap
from darshan.log_utils import get_log_path


[docs]@pytest.fixture(scope="function") def jointgrid(): # generates a `sns.JointGrid` object jgrid = sns.jointplot(kind="hist", bins=(4, 100)) return jgrid
[docs]@pytest.mark.parametrize( "filepath, n_xlabels, expected_xticks, expected_xticklabels", [ ("ior_hdf5_example.darshan", 2, np.linspace(0.0, 348.956244, 2), [0.0, 1.0]), ( "ior_hdf5_example.darshan", 4, np.linspace(0.0, 348.956244, 4), np.around(np.linspace(0, 1.0, 4), decimals=2), ), ( "ior_hdf5_example.darshan", 6, np.linspace(0.0, 348.956244, 6), np.linspace(0, 1.0, 6), ), ( "ior_hdf5_example.darshan", 10, np.linspace(0.0, 348.956244, 10), np.around(np.linspace(0, 1.0, 10), decimals=2), ), ("dxt.darshan", 2, np.linspace(0.0, 100.091252, 2), [0, 1469]), ( "dxt.darshan", 4, np.linspace(0.0, 100.091252, 4), [0, 489, 979, 1469], ), ( "dxt.darshan", 6, np.linspace(0.0, 100.091252, 6), [0, 293, 587, 881, 1175, 1469], ), ( "dxt.darshan", 10, np.linspace(0.0, 100.091252, 10), [0, 163, 326, 489, 652, 816, 979, 1142, 1305, 1469], ), ("sample-dxt-simple.darshan", 2, np.linspace(0.0, 959.403244, 2), [0.0, 1.0]), ( "sample-dxt-simple.darshan", 4, np.linspace(0.0, 959.403244, 4), np.around(np.linspace(0, 1.0, 4), decimals=2), ), ( "sample-dxt-simple.darshan", 6, np.linspace(0.0, 959.403244, 6), np.linspace(0, 1.0, 6), ), ( "sample-dxt-simple.darshan", 10, np.linspace(0.0, 959.403244, 10), np.around(np.linspace(0, 1.0, 10), decimals=2), ), (None, 2, [0.0, 191.880649], [0.0, 2.0]), ], ) def test_set_x_axis_ticks_and_labels( filepath, n_xlabels, expected_xticks, expected_xticklabels, jointgrid, ): # make sure the x-axis ticks and # tick labels are generated appropriately if filepath is None: # don't have any data sets with a max time between 1 and 10, so # create a synthetic one here data = [[4, 1.03378843, 1.03387713, 0], [4000, 1.04216653, 1.04231459, 0]] cols = ["length", "start_time", "end_time", "rank"] agg_df = pd.DataFrame(data=data, columns=cols) runtime = 2 else: filepath = get_log_path(filepath) # for all other data sets just load the data from the log file with darshan.DarshanReport(filepath) as report: agg_df = heatmap_handling.get_aggregate_data( report=report, mod="DXT_POSIX", ops=["read", "write"] ) runtime = report.metadata["job"]["run_time"] tmax_dxt = float(agg_df["end_time"].max()) # the jointgrid fixture has 100 xbins xbins = 100 # add a heatmap to the jointgrid to simulate a normal use case sns.heatmap(pd.DataFrame(np.ones((xbins, 4))), ax=jointgrid.ax_joint) # calculate the scaled number of bins bin_max = xbins * (runtime / tmax_dxt) # set the new x limit based on the scaled bins jointgrid.ax_joint.set_xlim(0.0, bin_max) # set the x-axis ticks and tick labels using the runtime plot_dxt_heatmap.set_x_axis_ticks_and_labels( jointgrid=jointgrid, n_xlabels=n_xlabels, tmax=runtime, bin_max=bin_max, ) # collect the actual x-axis tick labels actual_xticks = jointgrid.ax_joint.get_xticks() actual_xticklabels = [tl.get_text() for tl in jointgrid.ax_joint.get_xticklabels()] actual_xticklabels = np.asarray(actual_xticklabels, dtype=float) # make sure the figure object gets closed plt.close() # verify the actual ticks/labels match the expected assert_allclose(actual_xticks, expected_xticks) assert_allclose(actual_xticklabels, expected_xticklabels, atol=1e-14, rtol=1e-17)
[docs]@pytest.mark.parametrize( "n_ylabels, expected_yticks", [ # try the minimum number of tick marks (2, [0, 1]), # request more y-axis tick marks than are available (10, [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]), (4, [0, 0.4, 0.6, 1]), (6, [0, 0.2, 0.4, 0.6, 0.8, 1]), ], ) def test_get_y_axis_ticks(n_ylabels, expected_yticks, jointgrid): # test some edge cases for the # y-axis tick mark location code # get the y-axis tick mark locations actual_yticks = plot_dxt_heatmap.get_y_axis_ticks( ax=jointgrid.ax_joint, n_ylabels=n_ylabels ) # close the figure object plt.close() # make sure the actual tick mark locations match the expected assert_allclose(actual_yticks, expected_yticks, atol=1e-14, rtol=1e-17)
[docs]@pytest.mark.parametrize( "n_ylabels, unique_ranks, expected_yticklabels", [ (2, np.arange(10), ["0", "9"]), # request more y-axis tick labels than are available (10, np.arange(4), ["0", "1", "2", "3"]), (4, np.arange(10), ["0", "3", "6", "9"]), (6, np.arange(10), ["0", "2", "4", "5", "7", "9"]), (8, np.arange(10), ["0", "1", "3", "4", "5", "6", "8", "9"]), (4, np.arange(100), ["0", "32", "64", "96"]), (8, np.arange(100), ["0", "12", "28", "40", "56", "68", "84", "96"]), (6, np.arange(1000), ["0", "192", "384", "608", "800", "992"]), ], ) def test_get_y_axis_tick_labels( n_ylabels, unique_ranks, expected_yticklabels, ): # test some specific cases for # the y-axis tick mark label function # x-axis bins are arbitrary, y-axis bins are the number of unique ranks xbins = 50 ybins = unique_ranks.size bins = [xbins, ybins] # generate a jointgrid object using the bin dimensions above jointgrid = sns.jointplot(kind="hist", bins=bins) # generate array of ones and put a heatmap as the main 'joint' plot data = np.ones((ybins, xbins)) sns.heatmap(data, ax=jointgrid.ax_joint) # retrieve the actual y-axis tick labels actual_yticklabels = plot_dxt_heatmap.get_y_axis_tick_labels( ax=jointgrid.ax_joint, n_ylabels=n_ylabels ) # make sure the figure object gets closed plt.close() # make sure the actual tick mark labels match the expected assert_array_equal(actual_yticklabels, expected_yticklabels)
[docs]@pytest.mark.parametrize( "filepath, n_ylabels, expected_yticks, expected_yticklabels", [ # check that if less y-axis labels are input, we get the # correct number of labels back ("ior_hdf5_example.darshan", 2, [0.5, 3.5], [0.0, 3.0]), ( "ior_hdf5_example.darshan", 4, [0.5, 1.5, 2.5, 3.5], [0.0, 1.0, 2.0, 3.0], ), # check that if we input more y-axis labels than available, # we just get back the maximum number available ( "ior_hdf5_example.darshan", 8, [0.5, 1.5, 2.5, 3.5], [0.0, 1.0, 2.0, 3.0], ), ("dxt.darshan", 2, [0.5], [0]), # check that if we ask for more y-axis labels than # available, we still get the same output ("dxt.darshan", 4, [0.5], [0]), ("sample-dxt-simple.darshan", 2, [0.5, 15.5], [0.0, 15.0]), # check that if we ask for more y-axis labels than # available, we still get the same output ("sample-dxt-simple.darshan", 4, [0.5, 5.5, 10.5, 15.5], [0.0, 5.0, 10.0, 15.0]), ], ) def test_set_y_axis_ticks_and_labels( filepath, n_ylabels, expected_yticks, expected_yticklabels, ): # make sure the x-axis ticks and # tick labels are generated appropriately filepath = get_log_path(filepath) # load the report and generate the aggregate data dataframe with darshan.DarshanReport(filepath) as report: agg_df = heatmap_handling.get_aggregate_data( report=report, mod="DXT_POSIX", ops=["read", "write"] ) # x-axis bins are arbitrary xbins = 100 nprocs = report.metadata["job"]["nprocs"] # generate the heatmap data data = heatmap_handling.get_heatmap_df(agg_df=agg_df, xbins=xbins, nprocs=nprocs) # generate a joint plot object, then add the heatmap to it jointgrid = sns.jointplot(kind="hist", bins=[xbins, nprocs]) sns.heatmap(data, ax=jointgrid.ax_joint) # set the x-axis ticks and tick labels plot_dxt_heatmap.set_y_axis_ticks_and_labels( jointgrid=jointgrid, n_ylabels=n_ylabels ) # collect the actual x-axis tick labels actual_yticks = jointgrid.ax_joint.get_yticks() actual_yticklabels = [tl.get_text() for tl in jointgrid.ax_joint.get_yticklabels()] actual_yticklabels = np.asarray(actual_yticklabels, dtype=float) # make sure the figure object gets closed plt.close() # verify the actual ticks/labels match the expected assert_allclose(actual_yticks, expected_yticks, atol=1e-14, rtol=1e-17) assert_allclose(actual_yticklabels, expected_yticklabels, atol=1e-14, rtol=1e-17)
[docs]@pytest.mark.parametrize( "filepath", [ "ior_hdf5_example.darshan", "dxt.darshan", "sample-dxt-simple.darshan", ], ) def test_remove_marginal_graph_ticks_and_labels(filepath): # regression test ensuring the marginal x/y bar graphs do # not have any x/y tick labels or frames filepath = get_log_path(filepath) with darshan.DarshanReport(filepath) as report: jgrid = plot_dxt_heatmap.plot_heatmap( report=report, mod="DXT_POSIX", ops=["read", "write"], xbins=100 ) # verify the heatmap axis is on assert jgrid.ax_joint.axison # verify the marginal axes are turned off assert not jgrid.ax_marg_x.axison assert not jgrid.ax_marg_y.axison # make sure the label lists are empty assert jgrid.ax_marg_x.get_xticklabels() == [] assert jgrid.ax_marg_x.get_yticklabels() == [] assert jgrid.ax_marg_y.get_xticklabels() == [] assert jgrid.ax_marg_y.get_yticklabels() == [] # close the figure object plt.close()
[docs]@pytest.mark.parametrize( "filepath", [ "ior_hdf5_example.darshan", "dxt.darshan", "sample-dxt-simple.darshan", ], ) def test_adjust_for_colorbar(filepath): # regression test for `plot_dxt_heatmap.adjust_for_colorbar()` filepath = get_log_path(filepath) with darshan.DarshanReport(filepath) as report: jgrid = plot_dxt_heatmap.plot_heatmap(report=report) # the plot positions change based on the number of unique ranks. # If there is only 1 rank, there is no horizontal bar graph # so the x-axis values are scaled accordingly. # get heatmap positions hmap_positions = jgrid.ax_joint.get_position() assert hmap_positions.x0 == 0.1 assert hmap_positions.y0 == 0.15000000000000002 assert hmap_positions.y1 == 0.774390243902439 if "dxt.darshan" in filepath: # since `dxt.darshan` has 1 rank, it has # different x max values because it doesn't need room for # the colorbar on the outside of the horizontal bar graph assert hmap_positions.x1 == 0.7824516129032258 else: assert hmap_positions.x1 == 0.7158709677419354 # get vertical bar graph positions vert_bar_positions = jgrid.ax_marg_x.get_position() assert vert_bar_positions.x0 == 0.1 assert vert_bar_positions.y0 == 0.7780487804878049 assert vert_bar_positions.y1 == 0.9 if "dxt.darshan" in filepath: # since `dxt.darshan` has 1 rank, the vertical # bar graph has a different x max value because it doesn't need room for # the colorbar on the outside of the horizontal bar graph assert vert_bar_positions.x1 == 0.7824516129032258 else: assert vert_bar_positions.x1 == 0.7158709677419354 # get horizontal bar graph positions horiz_bar_positions = jgrid.ax_marg_y.get_position() assert horiz_bar_positions.y0 == 0.15000000000000002 assert horiz_bar_positions.y1 == 0.774390243902439 if "dxt.darshan" in filepath: # since `dxt.darshan` has 1 rank, the horizontal # bar graph has different x min/max values because it doesn't need to # make room for the colorbar assert horiz_bar_positions.x0 == 0.7877419354838711 assert horiz_bar_positions.x1 == 0.92 else: assert horiz_bar_positions.x0 == 0.7206451612903225 assert horiz_bar_positions.x1 == 0.84 # get the colorbar positions cbar_positions = jgrid.fig.axes[-1].get_position() assert cbar_positions.y0 == 0.15000000000000002 assert cbar_positions.y1 == 0.774390243902439 if "dxt.darshan" in filepath: # since `dxt.darshan` has 1 rank, the colorbar doesn't have # to go closer to the edge of the figure assert cbar_positions.x0 == 0.82 if version.parse(matplotlib.__version__) < version.parse("3.5.0"): assert cbar_positions.x1 == 0.8416135084427767 else: assert cbar_positions.x1 == 1.72 else: assert cbar_positions.x0 == 0.85 if version.parse(matplotlib.__version__) < version.parse("3.5.0"): assert cbar_positions.x1 == 0.8716135084427767 else: assert cbar_positions.x1 == 1.75
[docs]@pytest.mark.parametrize( "filepath", [ "ior_hdf5_example.darshan", "dxt.darshan", "sample-dxt-simple.darshan", ], ) @pytest.mark.parametrize("mod", ["DXT_POSIX", "DXT_MPIIO", "POSIX"]) @pytest.mark.parametrize("ops", [["read", "write"], ["read"], ["write"]]) def test_plot_heatmap(filepath, mod, ops): # test the primary plotting function, `plot_dxt_heatmap.plot_heatmap()` filepath = get_log_path(filepath) with darshan.DarshanReport(filepath) as report: if mod == "POSIX": with pytest.raises(NotImplementedError, match="Only DXT and HEATMAP modules are supported."): plot_dxt_heatmap.plot_heatmap(report=report, mod=mod) elif ("dxt.darshan" in filepath) & (mod == "DXT_MPIIO"): # if the input module is not "DXT_POSIX" check # that we raise the appropriate error with pytest.raises(ValueError, match="DXT_MPIIO not found in"): jgrid = plot_dxt_heatmap.plot_heatmap( report=report, mod=mod, ops=ops, xbins=100 ) elif ("sample-dxt-simple.darshan" in filepath) & (ops == ["read"]): # this log file is known to not have any read data, so # make sure we raise a ValueError here expected_msg = ( "No data available for selected module\\(s\\) and operation\\(s\\)." ) with pytest.raises(ValueError, match=expected_msg): jgrid = plot_dxt_heatmap.plot_heatmap( report=report, mod=mod, ops=ops, xbins=100 ) else: jgrid = plot_dxt_heatmap.plot_heatmap( report=report, mod=mod, ops=ops, xbins=100 ) # verify the margins for all plots assert jgrid.ax_joint.margins() == (0.05, 0.05) assert jgrid.ax_marg_x.margins() == (0.05, 0.05) assert jgrid.ax_marg_y.margins() == (0.05, 0.05) # ensure the heatmap spines are all visible for _, spine in jgrid.ax_joint.spines.items(): assert spine.get_visible() # for single-rank files, check that the # horizontal bar graph does not exist assert jgrid.ax_marg_x.has_data() assert jgrid.ax_joint.has_data() if "dxt.darshan" in filepath: # verify the horizontal bar graph does not contain data since there # is only 1 rank for this case assert not jgrid.ax_marg_y.has_data() else: # verify the horizontal bar graph contains data for multirank cases assert jgrid.ax_marg_y.has_data() # check that the axis labels are as expected assert jgrid.ax_joint.get_xlabel() == "Time (s)" assert jgrid.ax_joint.get_ylabel() == "Rank" plt.close()