latham.bib

@article{latham:rma-ops,
  author = {Latham, Robert and Ross, Robert and Thakur, Rajeev},
  title = {{Implementing MPI-IO Atomic Mode and Shared File Pointers Using MPI
  One-Sided Communication}},
  journal = {International Journal of High Performance Computing Applications},
  volume = {21},
  number = {2},
  pages = {132-143},
  doi = {10.1177/1094342007077859},
  year = {2007},
  abstract = {The ROMIO implementation of the MPI-IO standard provides a
  portable infrastructure for use on top of a variety of underlying storage
  targets. These targets vary widely   in their capabilities, and in some cases
  additional effort is needed within ROMIO to support all MPI-IO semantics. Two
  aspects of the interface that can be problematic  to implement are MPI-IO
  atomic mode and the shared file pointer access routines. Atomic mode requires
  enforcing strict consistency semantics, and   shared file pointer routines
  require communication and coordination in order to atomically update a shared
  resource. For some file systems, native locks may be used to implement these
  features, but not all file systems have lock support. In this work, we
  describe algorithms for implementing efficient mutex locks using MPI-1 and
  the one-sided capabilities from MPI-2. We then show how these algorithms may
  be used   to implement both MPI-IO atomic mode and shared file pointer
  methods for ROMIO without requiring any features from the underlying file
  system. We show that these  algorithms can outperform traditional file system
  lock approaches. Because of the portable nature of these algorithms, they are
  likely useful in a variety of  situations where distributed locking or
  coordination is needed in the MPI-2 environment.  },
  url = {http://hpc.sagepub.com/cgi/content/abstract/21/2/132},
  pdf = {papers/latham_rmaops.pdf},
  eprint = {http://hpc.sagepub.com/cgi/reprint/21/2/132.pdf}
}

@inproceedings{yu:bgl-io,
  author = {Hao Yu and R. K. Sahoo and C. Howson and George. Almasi and 
  J. G. Castanos and M. Gupta and  Jose. E. Moreira and J. J. Parker and 
  T. E. Engelsiepen and Robert Ross and Rajeev Thakur and Robert Latham 
  and W. D. Gropp},
  title = {High Performance File {I/O} for the BlueGene/L Supercomputer},
  booktitle = {Proceedings of the 12th International Symposium on High-Performance Computer Architecture (HPCA-12)},
  month = {February},
  year = {2006},
  url = {http://www.mcs.anl.gov/~thakur/papers/bgl-io.pdf},
  volume = {},
  number = {},
  pages = { 187 - 196},
  doi = {10.1109/HPCA.2006.1598125},
  issn = {1530-0897},
  keywords = { Blue Gene/L supercomputer; General Parallel File System; MPI; data-intensive application; functional partitioning design; hierarchical partitioning; high performance file I/O; parallel HDF5; parallel I/O benchmark; parallel NetCDF; parallel file I/O architecture; application program interfaces; benchmark testing; file organisation; message passing; parallel architectures; parallel machines;}
}

@inproceedings{ross:atomic,
  author = {Robert Ross and Robert Latham and William Gropp and Rajeev Thakur and Brian Toonen},
  title = {Implementing {MPI-IO} Atomic Mode Without File System Support},
  booktitle = {Proceedings of CCGrid 2005},
  month = {May},
  year = {2005},
  pdf = {papers/ross_atomic-mpiio.pdf},
  note = {Superseded by IJHPCA paper}
}

@article{thakur:byte-range-locks,
  author = {Rajeev Thakur and Robert Ross and Robert Latham},
  title = {Implementing Byte-Range Locks Using MPI One-Sided Communication},
  journal = {Lecture Notes in Computer Science},
  booktitle = {12th European PVM/MPI User's Group Meeting; September 18-21, 2006; Sorrento, Italy},
  year = {2005},
  month = {September},
  publisher = {Springer-Verlag Heidelberg},
  url = {http://www.mcs.anl.gov/~thakur/papers/byte-range.pdf}
}

@article{latham:grequest-extensions,
  author = {Robert Latham and William Gropp and Robert Ross and Rajeev Thakur},
  title = {{Extending the MPI-2 Generalized Request Interface}},
  journal = {Lecture Notes in Computer Science},
  booktitle = {14th European PVM/MPI User's Group Meeting},
  note = {(EuroPVM/MPI 2007)},
  year = {2007},
  month = {October},
  pages = {223-232},
  doi = {10.1007/978-3-540-75416-9_33},
  url = {http://www.springerlink.com/content/y332095819261422},
  publsher = {Springer-Verlag Heidelberg},
  pdf = {papers/latham_grequest-enhance.pdf},
  abstract = {The MPI-2 standard added a new feature to MPI called generalized
	      requests. Generalized requests allow users to add new nonblocking
	      operations to MPI while still using many pieces of MPI
	      infrastructure such as request objects and the progress
	      notification routines (MPI_Test, MPI_Wait). The generalized request design as it stands, however, has deficiencies regarding
	      typical use cases. These deficiencies are particularly evident in
	      environments that do not support threads or signals, such as the
	      leading petascale systems (IBM Blue Gene/L, Cray XT3 and XT4).
	      This paper examines these shortcomings, proposes extensions to
	      the interface to overcome them, and presents implementation
	      results.}
}

@article{latham:mpi-services,
  author = {Robert Latham and Robert Ross and Rajeev Thakur},
  title = {Can {MPI} Be Used for Persistent Parallel Services?},
  journal = {Lecture Notes in Computer Science},
  booktitle = {13th European PVM/MPI User's Group Meeting; September 17-20, 2006; Bonn, Germany},
  editor = {Mohr, B; Worringen, J; Dongarra, J},
  year = {2006},
  month = {September},
  volume = {4192},
  pages = {275-284},
  publisher = {Springer-Verlag Heidelberg},
  pdf = {papers/latham_mpi-servers.pdf},
  url = {http://www.springerlink.com/content/u5768x256v818u1p/}
}

@article{latham:sharedfp,
  author = {Robert Latham and Robert Ross and Rajeev Thakur},
  title = {Implementing {MPI-IO} Shared File Pointers without File System Support},
  journal = {Lecture Notes in Computer Science},
  booktitle = {12th European PVM/MPI User's Group Meeting; September 18-21, 2006; Sorrento, Italy},
  year = {2005},
  month = {September},
  note = {Selected as one of five Best Papers. Superseded by IJHPCA paper.},
  publisher = {Springer-Verlag Heidelberg},
  pdf = {papers/latham_sharedfp.pdf},
  url = {http://springerlink.com/content/4h4yne4v91qpjrhk/},
  abstract = { The ROMIO implementation of the MPI-IO standard provides a
  portable infrastructure for use on top of any number of different underlying
  storage targets. These targets vary widely in their capabilities, and in some
  cases additional effort is needed within ROMIO to support all MPI-IO
  semantics. The MPI-2 standard defines a class of file access routines that
  use a shared file pointer. These routines require communication internal to
  the MPI-IO implementation in order to allow processes to atomically update
  this shared value. We discuss a technique that leverages MPI-2 one-sided
  operations and can be used to implement this concept without requiring any
  features from the underlying file system. We then demonstrate through a
  simulation that our algorithm adds reasonable overhead for independent
  accesses and very small overhead for collective accesses. }
}

@article{latham:mpi-io-scalability,
  author = {Rob Latham and Rob Ross and Rajeev Thakur},
  title = {The impact of file systems on {MPI-IO} scalability},
  journal = {Lecture Notes in Computer Science},
  booktitle = {11th European Parallel Virtual Machine and Message Passing
  Interface Users Group Meeting; September 19-22, 2004; Budapest, HUNGARY},
  editor = {Kranzlmuller, D; Kacsuk, P; Dongarra, J},
  year = {2004},
  month = {September},
  volume = {3241},
  pages = {87--96},
  institution = {Argonne Natl Lab, 9700 S Cass Ave, Argonne, IL 60439 USA;
  Argonne Natl Lab, Argonne, IL 60439 USA},
  publisher = {Springer-Verlag Heidelberg},
  copyright = {(c)2004 Institute for Scientific Information, Inc.},
  url = {http://www.springerlink.com/link.asp?id=m31px2lt90296b62},
  pdf = {papers/latham:scalable_ops.pdf},
  keywords = {scalability analysis, MPI-IO, pario-bib},
  abstract = {As the number of nodes in cluster systems continues to grow,
  leveraging scalable algorithms in all aspects of such systems becomes key to
  maintaining performance. While scalable algorithms have been applied
  successfully in some areas of parallel I/O, many operations are still
  performed in an uncoordinated manner. In this work we consider, in three file
  system scenarios, the possibilities for applying scalable algorithms to the
  many operations that make up the MPI-IO interface. From this evaluation we
  extract a set of file system characteristics that aid in developing scalable
  MPI-IO implementations.}
}

@inproceedings{li:PnetCDF,
  author = {Jianwei Li and Wei-keng Liao and Alok Choudhary and Robert Ross and
  Rajeev Thakur and William Gropp and Rob Latham and Andrew Siegel and Brad
  Gallagher and Michael Zingale},
  title = {Parallel {netCDF}: A High-Performance Scientific {I/O} Interface},
  booktitle = {Proceedings of SC2003: High Performance Networking and
  Computing},
  series = {SC '03},
  year = {2003},
  month = {November},
  address = {Phoenix, AZ},
  publisher = {IEEE Computer Society Press},
  isbn = {1-58113-695-1},
  pages = {39--},
  doi = {10.1145/1048935.1050189},
  url = {http://www.sc-conference.org/sc2003/paperpdfs/pap258.pdf},
  keywords = {parallel I/O interface, netCDF, MPI-IO, pario-bib},
  abstract = {Dataset storage, exchange, and access play a critical role in
  scientific applications. For such purposes netCDF serves as a portable,
  efficient file format and programming interface, which is popular in numerous
  scientific application domains. However, the original interface does not
  provide an efficient mechanism for parallel data storage and access. \par In
  this work, we present a new parallel interface for writing and reading netCDF
  datasets. This interface is derived with minimal changes from the serial
  netCDF interface but defines semantics for parallel access and is tailored
  for high performance. The underlying parallel I/O is achieved through MPI-IO,
  allowing for substantial performance gains through the use of collective I/O
  optimizations. We compare the implementation strategies and performance with
  HDF5. Our tests indicate programming convenience and significant I/O
  performance improvement with this parallel netCDF (PnetCDF) interface.},
  comment = {published on the web only}
}

@inproceedings{peterka:vol-rend-bgp,
  author = {Tom Peterka and Hongfeng Yu and Robert Ross and Kwan-Liu Ma and Rob Latham},
  title = {{End-to-End Study of Parallel Volume Rendering on the IBM Blue Gene/P}},
  booktitle = {International Conference on Parallel Processing (ICPP 09), Vienna, Austria},
  year = {2009},
  month = {September},
  abstract = {In addition to their crucial role as simulation engines, modern
  supercomputers can be harnessed for scientific visualization. Their massive
  parallelism, high-performance storage, and low-latency high-bandwidth
  interconnects can mitigate the expanding size and complexity of scientific
  datasets and prepare for in situ visualization of these data. In prior
  research, we tested parallel volume rendering on the IBM Blue Gene/P (BG/P)
  at Argonne National Laboratory, work that this paper extends to the
  largest-scale visualization system to date. We measure performance of disk
  I/O, rendering, and compositing on larger data and images and evaluate
  bottleneck locations with respect to the volume rendering algorithm,
  BG/P-specific architecture, and parallel file system. The results, with core
  counts to 32K, data sizes to 44803 elements, and image sizes to 40962 pixels,
  affirm that a distributed-memory high-performance computing architecture such
  as BG/P is a scalable platform for large visualization problems. To allay
  compositing bottlenecks at large system scale, we limit the number of
  compositing cores when many small messages are exchanged. This approach
  extends the performance and scalability of direct-send compositing. After
  improving compositing, I/O is the main bottleneck, so we study I/O
  performance in detail, including collective reading of multivariate netCDF
  files directly through the visualization. To put the algorithm s bottlenecks
  into context, we compare the I/O and compositing performance to benchmarks of
  similar access patterns.},
  pdf = {papers/peterka_volume_rendering_on_bgp.pdf}
}

@article{ross:datatype_lib,
  author = {Robert Ross and Robert Latham and William Gropp
        and Ewing Lusk and Rajeev Thakur},
  title = {Processing {MPI} Datatypes Outside {MPI}},
  booktitle = {Proceedings of the 16th European PVM/MPI Users' Group Meeting on Recent Advances in Parallel Virtual Machine and Message Passing Interface (EuroPVM/MPI)},
  journal = {Lecture Notes in Computer Science},
  year = {2009},
  month = {September},
  isbn = {978-3-642-03769-6},
  location = {Espoo, Finland},
  pages = {42--53},
  numpages = {12},
  url = {http://dx.doi.org/10.1007/978-3-642-03770-2_11},
  doi = {10.1007/978-3-642-03770-2_11},
  publisher = {Springer-Verlag},
  address = {Berlin, Heidelberg},
  abstract = { The MPI datatype functionality provides a powerful tool for
      describing structured memory and file regions in parallel applications,
      enabling noncontiguous data to be operated on by MPI communication and
      I/O routines. However, no facilities are provided by the MPI stan- dard
      to allow users to efficiently manipulate MPI datatypes in their own
      codes.

      We present MPITypes, an open source, portable library that enables the
      construction of efficient MPI datatype processing routines outside the
      MPI implementation. MPITypes enables programmers who are not MPI
      implementors to create efficient datatype processing routines. We show
      the use of MPITypes in three examples: copying data between user buffers
      and a “pack” buffer, encoding of data in a portable format, and
      transpacking. Our experimental evaluation shows that the implementation
      achieves rates comparable to existing MPI implementations.},
  pdf = {papers/ross_datatype_lib.pdf}
}

@inproceedings{slang:leadership_io,
  author = {Samuel Lang and Philip Carns and Robert Latham and Robert Ross and Kevin Harms and William Allcock},
  title = {{I/O} Performance Challenges at Leadership Scale},
  year = {2009},
  month = {November},
  booktitle = {Proceedings of the Conference on High Performance Computing Networking, Storage and Analysis},
  series = {SC '09},
  isbn = {978-1-60558-744-8},
  location = {Portland, Oregon},
  pages = {40:1--40:12},
  articleno = {40},
  numpages = {12},
  url = {http://doi.acm.org/10.1145/1654059.1654100},
  doi = {10.1145/1654059.1654100},
  publisher = {ACM},
  address = {New York, NY, USA}
}

@inproceedings{blas:bg-writeback,
  author = {Javier Garc{\'i}a Blas and Florin Isaila and Jes{\'u}s Carretero and Robert Latham and Robert Ross},
  title = {Multiple-level {MPI} file write-back and prefetching for {Blue Gene} systems},
  booktitle = {Proc. of the 16th European PVM/MPI User's Group Meeting (Euro PVM/MPI 2009)},
  month = {September},
  year = {2009}
}

@inproceedings{kendall:terrascale,
  author = {Wes Kendall and M. Glatter and J. Huang and Tom Peterka and Rob Latham and Robert B. Ross},
  title = { Terascale Data Organization for Discovering Multivariate
	Climatic Trends},
  booktitle = {Proceedings of SC2009: High Performance Networking and Computing},
  year = {2009},
  month = {November}
}

@inproceedings{gao:pnetcdf-subfiling,
  author = {Kui Gao and Wei-keng Liao and Arifa Nisar and Alok Choudhary and Robert Ross and Robert Latham},
  title = {{Using Subfiling to Improve Programming Flexibility and Performance of Parallel Shared-file I/O}},
  booktitle = {Proceedings of the International Conference on Parallel
    Processing, Vienna, Austria},
  month = {September},
  year = {2009}
}

@inproceedings{gao:multivar-io,
  author = {Kui Gao and Wei-keng Liao and Alok Choudhary and Robert Ross and Robert Latham},
  title = {{Combining I/O Operations for Multiple Array Variables in Parallel NetCDF}},
  booktitle = {Proceedings of the Workshop on Interfaces and Architectures for Scientific Data Storage, held in conjunction with the the IEEE Cluster Conference, New Orleans, Louisiana},
  month = {September},
  year = {2009}
}

@inproceedings{carns:darshan,
  author = {Philip Carns and Robert Latham and Robert Ross and Kamil Iskra and Samuel Lang and Katherine Riley},
  title = {{24/7 Characterization of Petascale I/O Workloads}},
  booktitle = {Proceedings of 2009 Workshop on Interfaces and Architectures for Scientific Data Storage},
  month = {September},
  year = {2009},
  abstract = {Developing and tuning computational science applications
       to run on extreme scale systems are increasingly complicated
       processes.  Challenges such as managing memory access and tuning
       message-passing behavior are made easier by tools designed
       specifically to aid in these processes. Tools that can help users
       better understand the behavior of their application with respect
       to I/O have not yet reached the level of utility necessary to
       play a central role in application development and tuning. This
       deficiency in the tool set means that we have a poor
       understanding of how specific applications interact with storage.
       Worse, the community has little knowledge of what sorts of access
       patterns are common in today's applications, leading to confusion
       in the storage research community as to the pressing needs of the
       computational science community.  This paper describes the
       Darshan I/O characterization tool.  Darshan is designed to
       capture an accurate picture of application I/O behavior,
       including properties such as patterns of access within files,
       with the minimum possible overhead.  This characterization can
       shed important light on the I/O behavior of applications at
       extreme scale.  Darshan also can enable researchers to gain
       greater insight into the overall patterns of access exhibited by
       such applications, helping the storage community to understand
       how to best serve current computational science applications and
       better predict the needs of future applications.  In this work we
       demonstrate Darshan's ability to characterize the I/O behavior of
       four scientific applications and show that it induces negligible
       overhead for I/O intensive jobs with as many as 65,536 processes.}
}

@article{choudhary:scalable-analytics,
  author = {Alok Choudhary and Wei-keng Liao and Kui Gao and Arifa Nisar and Robert Ross and Rajeev Thakur and Robert Latham},
  title = {{Scalable I/O and Analytics}},
  journal = {Journal of Physics: Conference Series},
  volume = {180},
  number = {012048},
  year = {2009},
  month = {August},
  note = {Proceedings of SciDAC conference, 14-18 June 2009, San Diego, California, USA}
}

@inproceedings{ali:scalable-iofwd,
  author = {Nawab Ali and Philip Carns and Kamil Iskra and Dries Kimpe and Samuel Lang and Robert Latham and Robert Ross and Lee Ward and P. Sadayappan},
  title = {{Scalable I/O Forwarding Framework for High-Performance Computing Systems}},
  booktitle = {Proceedings of IEEE Conference on Cluster Computing, New Orleans, LA},
  month = {September},
  year = {2009}
}

@inproceedings{lang:aattrs,
  author = {Sam Lang and Robert Latham and Dries Kimpe and Robert Ross},
  title = {{Interfaces for Coordinated Access in the File System}},
  booktitle = {Proceedings of 2009 Workshop on Interfaces and Architectures for Scientific Data Storage},
  year = {2009},
  month = {September},
  abstract = {Distributed applications routinely use the file system for
	coordination of access and often rely on POSIX consistency semantics or
	file system lock support for coordination.  In this paper we discuss
	the types of coordination many distributed applications perform and the
	coordination model they are restricted to using with locks.  We
	introduce an alternative coordination model in the file system that
	uses extended attribute support in the file system to provide atomic
	operations on serialization variables.  We demonstrate the usefulness
	of this approach for a number of coordination patterns common to
	distributed applications.}
}

@inproceedings{isaila:latency-hiding,
  author = {Isaila, Florin and Blas, Javier Garcia and Carretero, Jesus and Latham, Robert and Lang, Samuel and Ross, Robert},
  title = {{Latency Hiding File I/O for Blue Gene Systems}},
  booktitle = {CCGRID '09: Proceedings of the 2009 9th IEEE/ACM International Symposium on Cluster Computing and the Grid},
  year = {2009},
  isbn = {978-0-7695-3622-4},
  pages = {212--219},
  doi = {http://dx.doi.org/10.1109/CCGRID.2009.49},
  publisher = {IEEE Computer Society},
  address = {Washington, DC, USA}
}

@article{isaila:multi-level-data-staging-bgp,
  author = {Isaila, Florin and Garcia Blas, Javier and Carretero, Jesus and Latham, Robert and Ross, Robert},
  journal = {Parallel and Distributed Systems, IEEE Transactions on},
  title = {Design and Evaluation of Multiple-Level Data Staging for Blue Gene Systems},
  year = {2011},
  month = {June},
  volume = {22},
  number = {6},
  pages = {946 -959},
  abstract = {Parallel applications currently suffer from a significant imbalance between computational power and available I/O bandwidth. Additionally, the hierarchical organization of current Petascale systems contributes to an increase of the I/O subsystem latency. In these hierarchies, file access involves pipelining data through several networks with incremental latencies and higher probability of congestion. Future Exascale systems are likely to share this trait. This paper presents a scalable parallel I/O software system designed to transparently hide the latency of file system accesses to applications on these platforms. Our solution takes advantage of the hierarchy of networks involved in file accesses, to maximize the degree of overlap between computation, file I/O-related communication, and file system access. We describe and evaluate a two-level hierarchy for Blue Gene systems consisting of client-side and I/O node-side caching. Our file cache management modules coordinate the data staging between application and storage through the Blue Gene networks. The experimental results demonstrate that our architecture achieves significant performance improvements through a high degree of overlap between computation, communication, and file I/O.},
  doi = {10.1109/TPDS.2010.127},
  pdf = {papers/TPDSSI-2009-12-0654.pdf},
  issn = {1045-9219}
}

@inproceedings{Wozniak:reliable_mpi_data_structures,
  author = {Wozniak, Justin M. and Jacobs, Bryan and Latham, Robert and Lang, Sam and Son, Seung Woo and Ross, Robert},
  title = {Implementing Reliable Data Structures for MPI Services in High Component Count Systems},
  booktitle = {Proceedings of the 16th European PVM/MPI Users' Group Meeting on Recent Advances in Parallel Virtual Machine and Message Passing Interface},
  year = {2009},
  isbn = {978-3-642-03769-6},
  location = {Espoo, Finland},
  pages = {321--322},
  numpages = {2},
  url = {http://dx.doi.org/10.1007/978-3-642-03770-2_39},
  doi = {http://dx.doi.org/10.1007/978-3-642-03770-2_39},
  acmid = {1612260},
  publisher = {Springer-Verlag},
  address = {Berlin, Heidelberg}
}

@article{latham:flash_io_study,
  author = {Rob Latham and Chris Daley and Wei-keng Liao and Kui Gao and Rob Ross and Anshu Dubey and Alok Choudhary},
  title = {A case study for scientific I/O: improving the FLASH astrophysics code},
  journal = {Computational Science & Discovery},
  volume = {5},
  number = {1},
  pages = {015001},
  url = {http://stacks.iop.org/1749-4699/5/i=1/a=015001},
  year = {2012},
  abstract = {The FLASH code is a computational science tool for simulating and studying thermonuclear reactions. The program periodically outputs large checkpoint files (to resume a calculation from a particular point in time) and smaller plot files (for visualization and analysis). Initial experiments on BlueGene/P spent excessive time in input/output (I/O), making it difficult to do actual science. Our investigation of time spent in I/O revealed several locations in the I/O software stack where we could make improvements. Fixing data corruption in the MPI-IO library allowed us to use collective I/O, yielding an order of magnitude improvement. Restructuring the data layout provided a more efficient I/O access pattern and yielded another doubling of performance, but broke format assumptions made by other tools in the application workflow. Using new nonblocking APIs in the Parallel-NetCDF library allowed us to keep high performance and maintain backward compatibility. The I/O research community has studied a host of optimizations and strategies. Sometimes the challenge for applications is knowing how to apply these new techniques to production codes. In this case study, we offer a demonstration of how computational scientists, with a detailed understanding of their application, and the I/O community, with a wide array of approaches from which to choose, can magnify each other's efforts and achieve tremendous application productivity gains.}
}

@article{carns:darshan_study,
  author = {Philip Carns and Kevin Harms and William Allcock and Charles Bacon and Samuel Lang and Robert Latham and Robert Ross},
  title = {Understanding and improving computational science storage access through continuous characterization},
  journal = {Mass Storage Systems and Technologies, IEEE / NASA Goddard Conference on},
  volume = {0},
  isbn = {978-1-4577-0427-7},
  month = {May},
  year = {2011},
  pages = {1-14},
  doi = {http://doi.ieeecomputersociety.org/10.1109/MSST.2011.5937212},
  publisher = {IEEE Computer Society},
  address = {Los Alamitos, CA, USA}
}

@inproceedings{son:reliable_mpi_io,
  author = {Seung Woo Son and Samuel Lang and Robert Latham and Robert Ross and Rajeev Thakur},
  title = {{Reliable MPI-IO through Layout-Aware Replication}},
  booktitle = {Proc. of the 7th IEEE International Workshop on Storage Network Architecture and Parallel I/O (SNAPI 2011)},
  month = {May},
  year = {2011}
}

@article{carns:darshan_study_journal,
  title = {{Understanding and Improving Computational Science Storage Access through Continuous Characterization}},
  author = {Philip Carns and Kevin Harms and William Allcock and Charles Bacon and Samuel Lang and Robert Latham and Robert Ross},
  journal = {Trans. Storage},
  year = {2011},
  issn = {1553-3077},
  volume = {7},
  number = {3},
  issue_date = {October 2011},
  publisher = {ACM},
  address = {New York, NY, USA}
}

@inproceedings{Lakshminarasimhan:isabella,
  author = {Lakshminarasimhan, Sriram and Shah, Neil and Ethier, Stephane and
Klasky, Scott and Latham, Rob and Ross, Rob and Samatova, Nagiza},
  affiliation = {North Carolina State University, Raleigh, NC 27695, USA},
  title = {Compressing the Incompressible with ISABELA: In-situ Reduction of
Spatio-temporal Data},
  booktitle = {Euro-Par 2011 Parallel Processing},
  series = {Lecture Notes in Computer Science},
  editor = {Jeannot, Emmanuel and Namyst, Raymond and Roman, Jean},
  publisher = {Springer Berlin / Heidelberg},
  isbn = {978-3-642-23399-9},
  keyword = {Computer Science},
  pages = {366-379},
  volume = {6852},
  url = {http://dx.doi.org/10.1007/978-3-642-23400-2_34},
  note = {10.1007/978-3-642-23400-2_34},
  abstract = {Modern large-scale scientific simulations running on HPC systems
generate data in the order of terabytes during a single run. To lessen the I/O
load during a simulation run, scientists are forced to capture data
infrequently, thereby making data collection an inherently lossy process. Yet,
lossless compression techniques are hardly suitable for scientific data due to
its inherently random nature; for the applications used here, they offer less
than 10\% compression rate. They also impose significant overhead during
decompression, making them unsuitable for data analysis and visualization that
require repeated data access. To address this problem, we propose an effective
method for In-situ Sort-And-B-spline Error-bounded Lossy Abatement ( ISABELA )
of scientific data that is widely regarded as effectively incompressible. With
ISABELA , we apply a preconditioner to seemingly random and noisy data along
spatial resolution to achieve an accurate fitting model that guarantees a ≥
0.99 correlation with the original data. We further take advantage of temporal
patterns in scientific data to compress data by ≈ 85\%, while introducing only a
negligible overhead on simulations in terms of runtime. ISABELA significantly
outperforms existing lossy compression methods, such as Wavelet compression.
Moreover, besides being a communication-free and scalable compression
technique, ISABELA is an inherently local decompression method, namely it does
not decode the entire data, making it attractive for random access.},
  month = {September},
  year = {2011}
}

@article{kendall:bil_cga,
  author = {Kendall, Wesley and Huang, Jian and Peterka, Tom and Latham, Robert and
 Ross, Robert},
  journal = {Computer Graphics and Applications, IEEE},
  title = {Toward a General I/O Layer for Parallel-Visualization Applications},
  year = {2011},
  month = {November-December},
  volume = {31},
  number = {6},
  pages = {6 -10},
  abstract = {For large-scale visualization applications, the visualization
 community urgently needs general solutions for efficient parallel I/O. These
 parallel visualization solutions should center around design patterns and the
 related data-partitioning strategies, not file formats. From this respect,
 it's feasible to greatly alleviate I/O burdens without reinventing the wheel.
 For example, BIL (Block I/O Layer), which implements such a pattern, has
 greatly accelerated I/O performance for large-scale parallel particle tracing,
 a pervasive but challenging use case.},
  keywords = {},
  doi = {10.1109/MCG.2011.102},
  issn = {0272-1716}
}

This file was generated by bibtex2html 1.96.