Add write_hdf to SolutionArray objects

* The commit implements saving of data extracted from SolutionArrays to HDF containers using pandas infrastructure. * Two methods are introduced: `write_hdf` and `to_pandas`. * Both methods only work if the pandas module can be imported; an exception is raised only if the method is called without a working pandas installation.
Cantera · Aug 7, 2019 · d4f4eeb · d4f4eeb
1 parent bbdc790
commit d4f4eeb
Showing 1 changed file with 73 additions and 8 deletions.
diff --git a/interfaces/cython/cantera/composite.py b/interfaces/cython/cantera/composite.py
@@ -1,10 +1,11 @@
 # This file is part of Cantera. See License.txt in the top-level directory or
-# at http://www.cantera.org/license.txt for license and copyright information.
+# at https://cantera.org/license.txt for license and copyright information.
 
 from ._cantera import *
 import numpy as np
 import csv as _csv
 
+
 class Solution(ThermoPhase, Kinetics, Transport):
     """
     A class for chemically-reacting solutions. Instances can be created to
@@ -294,7 +295,7 @@ class SolutionArray:
 
     SolutionArray can represent both 1D and multi-dimensional arrays of states,
     with shapes described in the same way as Numpy arrays. All of the states
-    can be set in a single call.
+    can be set in a single call::
 
         >>> gas = ct.Solution('gri30.cti')
         >>> states = ct.SolutionArray(gas, (6, 10))
@@ -347,7 +348,7 @@ class SolutionArray:
         ...     # do something with mu[i,j]
 
     Information about a subset of species may also be accessed, using
-    parentheses to specify the species:
+    parentheses to specify the species::
 
         >>> states('CH4').Y # -> mass fraction of CH4 in each state
         >>> states('H2','O2').partial_molar_cp # -> cp for H2 and O2
@@ -361,10 +362,20 @@ class SolutionArray:
         >>> s.reaction_equation(10)
         'CH4 + O <=> CH3 + OH'
 
-    Data represnted by a SolutionArray can be extracted and saved to a CSV file
-    using the `write_csv` method:
+    Data represented by a SolutionArray can be extracted and saved to a CSV file
+    using the `write_csv` method::
+
+        >>> states.write_csv('somefile.csv', cols=('T', 'P', 'X', 'net_rates_of_progress'))
+
+    As an alternative, data extracted from SolutionArray objects can be saved
+    to a Pandas compatible HDF container file using the `write_hdf` method::
 
-        >>> states.write_csv('somefile.csv', cols=('T','P','X','net_rates_of_progress'))
+        >>> states.write_hdf('somefile.h5', cols=('T', 'P', 'X'), key='some_key')
+
+    In this case, the (optional) key argument allows for saving and accessing
+    multiple solutions in a single container file. Note that `write_hdf` requires
+    working installations of Pandas and PyTables, i.e. use pip or conda to install
+    pandas and tables to enable this optional output method.
 
     :param phase: The `Solution` object used to compute the thermodynamic,
         kinetic, and transport properties
@@ -565,7 +576,7 @@ def equilibrate(self, *args, **kwargs):
             self._phase.equilibrate(*args, **kwargs)
             self._states[index][:] = self._phase.state
 
-    def collect_data(self, cols=('extra','T','density','Y'), threshold=0,
+    def collect_data(self, cols=('extra', 'T', 'density', 'Y'), threshold=0,
                      species='Y'):
         """
         Returns the data specified by *cols* in a single 2D Numpy array, along
@@ -637,7 +648,7 @@ def collect_data(self, cols=('extra','T','density','Y'), threshold=0,
 
         return np.hstack(data), labels
 
-    def write_csv(self, filename, cols=('extra','T','density','Y'),
+    def write_csv(self, filename, cols=('extra', 'T', 'density', 'Y'),
                   *args, **kwargs):
         """
         Write a CSV file named *filename* containing the data specified by
@@ -653,6 +664,60 @@ def write_csv(self, filename, cols=('extra','T','density','Y'),
             for row in data:
                 writer.writerow(row)
 
+    def to_pandas(self, cols=('extra', 'T', 'density', 'Y'),
+                  *args, **kwargs):
+        """
+        Returns the data specified by *cols* in a single Pandas DataFrame.
+
+        Additional arguments are passed on to `collect_data`. This method works
+        only with 1D SolutionArray objects and requires a working Pandas
+        installation. Use pip or conda to install pandas to enable this method.
+        """
+
+        # local import avoids explicit dependence of cantera on Pandas
+        import pandas as pd
+
+        data, labels = self.collect_data(cols, *args, **kwargs)
+        return pd.DataFrame(data=data, columns=labels)
+
+    def write_hdf(self, filename, cols=('extra', 'T', 'density', 'Y'),
+                  key='df', mode=None, append=None, complevel=None,
+                  *args, **kwargs):
+        """
+        Write data specified by *cols* to a HDF container file named *filename*.
+        Note that it is possible to write multiple data entries to a single HDF
+        container file, where *key* is used to differentiate data.
+
+        Internally, every HDF data entry is a `pandas.DataFrame` generated by
+        the `to_pandas` method.
+
+        :param filename: name of the HDF container file; typical file extensions
+            are `.hdf`, `.hdf5` or `.h5`.
+        :param cols: A list of any properties of the solution being exported.
+        :param key: Identifier for the group in the container file.
+        :param mode: Mode to open the file {None,'a','w','r+}.
+        :param append: use less efficient structure that makes HDF entries
+            appendable or append to existing appendable HDF entry {None,True,False}.
+        :param complevel: Specifies a compression level for data {None,0-9}.
+            A value of 0 disables compression.
+
+        Arguments *key*, *mode*, *append*, and *complevel* are mapped to
+        parameters for `pandas.DataFrame.to_hdf`; the choice `None` for *mode*,
+        *append*, and *complevel* results in default values set by Pandas.
+
+        Additional arguments (i.e. *args* and *kwargs*) are passed on to
+        `collect_data` via `to_pandas`; see `collect_data` for further
+        information. This method works only with 1D SolutionArray objects
+        and requires working installations of Pandas and PyTables. Use
+        pip or conda to install pandas and tables to enable this method.
+        """
+
+        # create Pandas DataFame and write to file
+        df = self.to_pandas(cols, *args, **kwargs)
+        pd_kwargs = {'mode': mode, 'append': append, 'complevel': complevel}
+        pd_kwargs = {k: v for k, v in pd_kwargs.items() if v is not None}
+        df.to_hdf(filename, key, **pd_kwargs)
+
 
 def _make_functions():
     # this is wrapped in a function to avoid polluting the module namespace