Add write_hdf to SolutionArray objects

* The commit implements saving of data extracted from SolutionArrays to HDF containers using pandas infrastructure. * Two methods are introduced: `write_hdf` and `to_pandas`. * Both methods only work if the pandas module can be imported; an exception is raised only if the method is called without a working pandas installation.
Cantera · Aug 6, 2019 · dec7fa2 · dec7fa2
1 parent bbdc790
commit dec7fa2
Showing 1 changed file with 68 additions and 5 deletions.
diff --git a/interfaces/cython/cantera/composite.py b/interfaces/cython/cantera/composite.py
@@ -1,10 +1,18 @@
 # This file is part of Cantera. See License.txt in the top-level directory or
-# at http://www.cantera.org/license.txt for license and copyright information.
+# at https://cantera.org/license.txt for license and copyright information.
 
 from ._cantera import *
 import numpy as np
 import csv as _csv
 
+# avoid explicit dependence of cantera on pandas
+try:
+    import pandas as pd
+    _has_pandas = True
+except ImportError:
+    _has_pandas = False
+
+
 class Solution(ThermoPhase, Kinetics, Transport):
     """
     A class for chemically-reacting solutions. Instances can be created to
@@ -361,11 +369,20 @@ class SolutionArray:
         >>> s.reaction_equation(10)
         'CH4 + O <=> CH3 + OH'
 
-    Data represnted by a SolutionArray can be extracted and saved to a CSV file
+    Data represented by a SolutionArray can be extracted and saved to a CSV file
     using the `write_csv` method:
 
         >>> states.write_csv('somefile.csv', cols=('T','P','X','net_rates_of_progress'))
 
+    As an alternative, data extracted from SolutionArray objects can be saved
+    to a pandas compatible HDF container file using the `write_hdf` method:
+
+        >>> states.write_hdf('somefile.h5', cols=('T','P','X'), key='some_key')
+
+    In this case, the (optional) key argument allows for saving and accessing
+    multiple solutions in a single container file. Note that `write_hdf` requires
+    a working pandas installation.
+
     :param phase: The `Solution` object used to compute the thermodynamic,
         kinetic, and transport properties
     :param shape: A tuple or integer indicating the dimensions of the
@@ -637,22 +654,68 @@ def collect_data(self, cols=('extra','T','density','Y'), threshold=0,
 
         return np.hstack(data), labels
 
-    def write_csv(self, filename, cols=('extra','T','density','Y'),
+    def write_csv(self, file_name, cols=('extra','T','density','Y'),
                   *args, **kwargs):
         """
-        Write a CSV file named *filename* containing the data specified by
+        Write a CSV file named *file_name* containing the data specified by
         *cols*. The first row of the CSV file will contain column labels.
 
         Additional arguments are passed on to `collect_data`. This method works
         only with 1D SolutionArray objects.
         """
         data, labels = self.collect_data(cols, *args, **kwargs)
-        with open(filename, 'w') as outfile:
+        with open(file_name, 'w') as outfile:
             writer = _csv.writer(outfile)
             writer.writerow(labels)
             for row in data:
                 writer.writerow(row)
 
+    def to_pandas(self, cols=('extra','T','density','Y'),
+                  *args, **kwargs):
+        """
+        Returns the data specified by *cols* in a single pandas DataFrame.
+
+        Additional arguments are passed on to `collect_data`. This method works
+        only with 1D SolutionArray objects and requires a working pandas
+        installation.
+        """
+
+        if not _has_pandas:
+            raise RuntimeError('Method `to_pandas` requires pandas installation.')
+
+        data, labels = self.collect_data(cols, *args, **kwargs)
+        return pd.DataFrame(data=data, columns=labels)
+
+    def write_hdf(self, file_name, cols=('extra','T','density','Y'),
+                  key='df', mode='a', append=False, complevel=5, *args, **kwargs):
+        """
+        Write data specified by *cols* to a HDF container file named *file_name*,
+        where *key* is used to label a group entry. Note that it is possible to
+        write multiple data sets to a single HDF container file, where unique
+        keys specify individual entries.
+
+        Internally, the HDF data entry is a `pandas.DataFrame` generated via
+        the `to_pandas` method.
+
+        :param file_name: name of the HDF container file; typical file extensions
+            are `.hdf`, `.hdf5` or `.h5`.
+        :param cols: A list of any properties of the solution being exported.
+        :param key: Identifier for the group in the container file.
+        :param mode: Mode to open the file {'a','w','r+}.
+        :param append: use less efficient structure that makes HDF entries
+            appendable or append to existing appendable HDF entry.
+        :param complevel: Specifies a compression level for data {0-9}.
+            A value of 0 disables compression
+
+        Parameters *key*, *mode*, *append*, and *complevel* are passed to
+        `pandas.DataFrame.to_hdf`. Additional arguments are passed on to
+        `collect_data`. This method works only with 1D SolutionArray objects
+        and requires a working pandas/PyTables installation.
+        """
+
+        # create pandas DataFame and write to file
+        df = self.to_pandas(cols, *args, **kwargs)
+        df.to_hdf(file_name, key, mode=mode, append=append, complevel=complevel)
 
 def _make_functions():
     # this is wrapped in a function to avoid polluting the module namespace