Cantera · speth · Oct 3, 2019 · Aug 11, 2019 · Aug 9, 2019 · Aug 13, 2019
diff --git a/interfaces/cython/cantera/composite.py b/interfaces/cython/cantera/composite.py
@@ -5,6 +5,12 @@
 import numpy as np
 import csv as _csv
 
+# avoid explicit dependence of cantera on pandas
+try:
+    import pandas as _pandas
+except ImportError as err:
+    _pandas = err
+
 
 class Solution(ThermoPhase, Kinetics, Transport):
     """
@@ -293,7 +299,7 @@ class SolutionArray:
     states using the same `Solution` object and computing properties for that
     array of states.
 
-    SolutionArray can represent both 1D and multi-dimensional arrays of states,
+    `SolutionArray` can represent both 1D and multi-dimensional arrays of states,
     with shapes described in the same way as Numpy arrays. All of the states
     can be set in a single call::
 
@@ -305,7 +311,7 @@ class SolutionArray:
         >>> states.TPX = T, P, X
 
     Similar to Numpy arrays, input with fewer non-singleton dimensions than the
-    SolutionArray is 'broadcast' to generate input of the appropriate shape. In
+    `SolutionArray` is 'broadcast' to generate input of the appropriate shape. In
     the above example, the single value for the mole fraction input is applied
     to each input, while each row has a constant temperature and each column has
     a constant pressure.
@@ -331,7 +337,7 @@ class SolutionArray:
         >>> states.equilibrate('HP')
         >>> states.T # -> adiabatic flame temperature at various equivalence ratios
 
-    SolutionArray objects can also be 'sliced' like Numpy arrays, which can be
+    `SolutionArray` objects can also be 'sliced' like Numpy arrays, which can be
     used both for accessing and setting properties::
 
         >>> states = ct.SolutionArray(gas, (6, 10))
@@ -341,7 +347,7 @@ class SolutionArray:
     If many slices or elements of a property are going to be accessed (i.e.
     within a loop), it is generally more efficient to compute the property array
     once and access this directly, rather than repeatedly slicing the
-    SolutionArray object, e.g.::
+    `SolutionArray` object, e.g.::
 
         >>> mu = states.viscosity
         >>> for i,j in np.ndindex(mu.shape):
@@ -362,25 +368,38 @@ class SolutionArray:
         >>> s.reaction_equation(10)
         'CH4 + O <=> CH3 + OH'
 
-    Data represented by a SolutionArray can be extracted and saved to a CSV file
+    Data represented by a `SolutionArray` can be extracted and saved to a CSV file
     using the `write_csv` method::
 
         >>> states.write_csv('somefile.csv', cols=('T', 'P', 'X', 'net_rates_of_progress'))
 
-    As an alternative, data extracted from SolutionArray objects can be saved
-    to a pandas compatible HDF container file using the `write_hdf` method::
+    As long as stored columns specify a valid thermodynamic state, the contents of
+    a `SolutionArray` can be restored using the `read_csv` method::
+
+        >>> states = ct.SolutionArray(gas)
+        >>> states.read_csv('somefile.csv')
+
+    As an alternative to comma separated export and import, data extracted from
+    `SolutionArray` objects can also be saved to and restored from a pandas compatible
+    HDF container file using the `write_hdf`::
 
         >>> states.write_hdf('somefile.h5', cols=('T', 'P', 'X'), key='some_key')
 
-    In this case, the (optional) key argument allows for saving and accessing
-    multiple solutions in a single container file. Note that `write_hdf` requires
-    working installations of pandas and PyTables. These packages can be installed
-    using pip (`pandas` and `tables`) or conda (`pandas` and `pytables`).
+    and `read_hdf` methods::
+
+        >>> states = ct.SolutionArray(gas)
+        >>> states.read_hdf('somefile.h5', key='some_key')
+
+    For HDF export and import, the (optional) key argument *key* allows for saving
+    and accessing of multiple solutions in a single container file. Note that
+    `write_hdf` and `read_hdf` require working installations of pandas and
+    PyTables. These packages can be installed using pip (`pandas` and `tables`)
+    or conda (`pandas` and `pytables`).
 
     :param phase: The `Solution` object used to compute the thermodynamic,
         kinetic, and transport properties
     :param shape: A tuple or integer indicating the dimensions of the
-        SolutionArray. If the shape is 1D, the array may be extended using the
+        `SolutionArray`. If the shape is 1D, the array may be extended using the
         `append` method. Otherwise, the shape is fixed.
     :param states: The initial array of states. Used internally to provide
         slicing support.
@@ -576,6 +595,124 @@ def equilibrate(self, *args, **kwargs):
             self._phase.equilibrate(*args, **kwargs)
             self._states[index][:] = self._phase.state
 
+    def restore_data(self, data, labels):
+        """
+        Restores a `SolutionArray` based on *data* specified in a single
+        2D Numpy array and a list of corresponding column *labels*. Thus,
+        this method allows to restore data exported by `collect_data`.
+
+        :param data: a 2D Numpy array holding data to be restored.
+        :param labels: a list of labels corresponding to `SolutionArray` entries.
+
+        The receiving `SolutionArray` either has to be empty or should have
+        matching dimensions. Essential state properties and extra entries
+        are detected automatically whereas stored information of calculated
+        properties is omitted. If the receiving `SolutionArray` has extra
+        entries already specified, only those will be imported; if *labels*
+        does not contain those entries, an error is raised.
+        """
+
+        # check arguments
+        if not isinstance(data, np.ndarray) or data.ndim != 2:
+            raise TypeError("restore_data only works for 2D ndarrays")
+        elif len(labels) != data.shape[1]:
+            raise ValueError("inconsistent data and label dimensions "
+                             "({} vs. {})".format(len(labels), data.shape[1]))
+        rows = data.shape[0]
+        if self._shape != (0,) and self._shape != (rows,):
+            raise ValueError(
+                "incompatible dimensions ({} vs. {}): the receiving "
+                "SolutionArray object either needs to be empty "
+                "or have a length that matches data rows "
+                "to be restored".format(self._shape[0], rows)
+            )
+
+        # get full state information (may differ depending on ThermoPhase type)
+        full_states = self._phase._full_states.values()
+        if isinstance(self._phase, PureFluid):
+            # ensure that potentially non-unique state definitions are checked last
+            last = ['TP', 'TX', 'PX']
+            full_states = [fs for fs in full_states
+                           if fs not in last] + ['TPX'] + last
+
+        # determine whether complete concentration is available (mass or mole)
+        # assumes that `X` or `Y` is always in last place
+        mode = ''
+        for prefix in ['X_', 'Y_']:
+            spc = ['{}{}'.format(prefix, s) for s in self.species_names]
+            # solution species names also found in labels
+            valid_species = {s[2:]: labels.index(s) for s in spc
+                             if s in labels}
+            # labels that start with prefix (indicating concentration)
+            all_species = [l for l in labels if l.startswith(prefix)]
+            if valid_species:
+                # save species mode and remaining full_state candidates
+                mode = prefix[0]
+                full_states = [v[:-1] for v in full_states if mode in v]
+                break
+        if len(valid_species) != len(all_species):
+            incompatible = list(set(valid_species) ^ set(all_species))
+            raise ValueError('incompatible species information for '
+                             '{}'.format(incompatible))
+        if mode == '':
+            # concentration specifier ('X' or 'Y') is not used
+            full_states = {v[:2] for v in full_states}
+
+        # determine suitable thermo properties for reconstruction
+        basis = 'mass' if self.basis == 'mass' else 'mole'
+        prop = {'T': ('T'), 'P': ('P'),
+                'D': ('density', 'density_{}'.format(basis)),
+                'U': ('u', 'int_energy_{}'.format(basis)),
+                'V': ('v', 'volume_{}'.format(basis)),
+                'H': ('h', 'enthalpy_{}'.format(basis)),
+                'S': ('s', 'entropy_{}'.format(basis))}
+        for fs in full_states:
+            # identify property specifiers
+            state = [{fs[i]: labels.index(p) for p in prop[fs[i]] if p in labels}
+                     for i in range(len(fs))]
+            if all(state):
+                # all property identifiers match
+                mode = fs + mode
+                break
+        if len(mode) == 1:
+            raise ValueError(
+                "invalid/incomplete state information (detected "
+                "partial information as mode='{}')".format(mode)
+            )
+
+        # assemble and restore state information
+        state_data = tuple([data[:, state[i][mode[i]]] for i in range(len(state))])
+        if valid_species:
+            state_data += (np.zeros((rows, self.n_species)),)
+            for i, s in enumerate(self.species_names):
+                if s in valid_species:
+                    state_data[-1][:, i] = data[:, valid_species[s]]
+
+        # labels may include calculated properties that must not be restored
+        calculated = self._scalar + self._n_species + self._n_reactions
+        exclude = [l for l in labels
+                   if any([v in l for v in calculated])]
+        extra = {l: list(data[:, i]) for i, l in enumerate(labels)
+                 if l not in exclude}
+        if len(self._extra_lists):
+            extra_lists = {k: extra[k] for k in self._extra_arrays}
+        else:
+            extra_lists = extra
+
+        # ensure that SolutionArray accommodates dimensions
+        if self._shape == (0,):
+            self._states = [self._phase.state] * rows
+            self._indices = range(rows)
+            self._output_dummy = self._indices
+            self._shape = (rows,)
+
+        # restore data
+        for i in self._indices:
+            setattr(self._phase, mode, [st[i, ...] for st in state_data])
+            self._states[i] = self._phase.state
+        self._extra_lists = extra_lists
+        self._extra_arrays = {l: np.array(v) for l, v in extra_lists.items()}
+
     def set_equivalence_ratio(self, phi, *args, **kwargs):
         """
         See `ThermoPhase.set_equivalence_ratio`
@@ -604,8 +741,8 @@ def collect_data(self, cols=('extra', 'T', 'density', 'Y'), threshold=0,
             are specified, then either the mass or mole fraction of that species
             will be taken, depending on the value of *species*. *cols* may also
             include any arrays which were specified as 'extra' variables when
-            defining the SolutionArray object. The special value 'extra' can be
-            used to include all 'extra' variables.
+            defining the `SolutionArray` object. The special value 'extra' can
+            be used to include all 'extra' variables.
         :param threshold: Relative tolerance for including a particular column.
             The tolerance is applied by comparing the maximum absolute value for
             a particular column to the maximum absolute value in all columns for
@@ -672,7 +809,7 @@ def write_csv(self, filename, cols=('extra', 'T', 'density', 'Y'),
         *cols*. The first row of the CSV file will contain column labels.
 
         Additional arguments are passed on to `collect_data`. This method works
-        only with 1D SolutionArray objects.
+        only with 1D `SolutionArray` objects.
         """
         data, labels = self.collect_data(cols, *args, **kwargs)
         with open(filename, 'w') as outfile:
@@ -681,21 +818,47 @@ def write_csv(self, filename, cols=('extra', 'T', 'density', 'Y'),
             for row in data:
                 writer.writerow(row)
 
+    def read_csv(self, filename):
+        """
+        Read a CSV file named *filename* and restore data to the `SolutionArray`
+        using `restore_data`. This method allows for recreation of data
+        previously exported by `write_csv`.
+        """
+        # read data block and header separately
+        data = np.genfromtxt(filename, skip_header=1, delimiter=',')
+        labels = np.genfromtxt(filename, max_rows=1, delimiter=',', dtype=str)
+
+        self.restore_data(data, list(labels))
+
     def to_pandas(self, cols=('extra', 'T', 'density', 'Y'),
                   *args, **kwargs):
         """
         Returns the data specified by *cols* in a single pandas DataFrame.
 
         Additional arguments are passed on to `collect_data`. This method works
-        only with 1D SolutionArray objects and requires a working pandas
+        only with 1D `SolutionArray` objects and requires a working pandas
         installation. Use pip or conda to install `pandas` to enable this method.
         """
 
-        # local import avoids explicit dependence of cantera on pandas
-        import pandas as pd
+        if isinstance(_pandas, ImportError):
+            raise _pandas
 
         data, labels = self.collect_data(cols, *args, **kwargs)
-        return pd.DataFrame(data=data, columns=labels)
+        return _pandas.DataFrame(data=data, columns=labels)
+
+    def from_pandas(self, df):
+        """
+        Restores `SolutionArray` data from a pandas DataFrame *df*.
+
+        This method is intendend for loading of data that were previously
+        exported by `to_pandas`. The method requires a working pandas
+        installation. The package 'pandas' can be installed using pip or conda.
+        """
+
+        data = df.to_numpy(dtype=float)
+        labels = list(df.columns)
+
+        self.restore_data(data, labels)
 
     def write_hdf(self, filename, cols=('extra', 'T', 'density', 'Y'),
                   key='df', mode=None, append=None, complevel=None,
@@ -724,7 +887,7 @@ def write_hdf(self, filename, cols=('extra', 'T', 'density', 'Y'),
 
         Additional arguments (i.e. *args* and *kwargs*) are passed on to
         `collect_data` via `to_pandas`; see `collect_data` for further
-        information. This method works only with 1D SolutionArray objects
+        information. This method works only with 1D `SolutionArray` objects
         and requires working installations of pandas and PyTables. These
         packages can be installed using pip (`pandas` and `tables`) or conda
         (`pandas` and `pytables`).
@@ -736,6 +899,24 @@ def write_hdf(self, filename, cols=('extra', 'T', 'density', 'Y'),
         pd_kwargs = {k: v for k, v in pd_kwargs.items() if v is not None}
         df.to_hdf(filename, key, **pd_kwargs)
 
+    def read_hdf(self, filename, key=None):
+        """
+        Read a dataset identified by *key* from a HDF file named *filename*
+        and restore data to the `SolutionArray` object. This method allows for
+        recreation of data previously exported by `write_hdf`.
+
+        The method imports data using `restore_data` via `from_pandas` and
+        requires working installations of pandas and PyTables. These
+        packages can be installed using pip (`pandas` and `tables`) or conda
+        (`pandas` and `pytables`).
+        """
+
+        if isinstance(_pandas, ImportError):
+            raise _pandas
+
+        pd_kwargs = {'key': key} if key else {}
+        self.from_pandas(_pandas.read_hdf(filename, **pd_kwargs))
+
 
 def _make_functions():
     # this is wrapped in a function to avoid polluting the module namespace

diff --git a/interfaces/cython/cantera/test/test_purefluid.py b/interfaces/cython/cantera/test/test_purefluid.py
@@ -149,11 +149,18 @@ def test_fd_properties_twophase(self):
 
     def test_TPX(self):
         self.water.TX = 400, 0.8
-        T,P,X = self.water.TPX
+        T, P, X = self.water.TPX
         self.assertNear(T, 400)
         self.assertNear(X, 0.8)
-        with self.assertRaises(AttributeError):
-            self.water.TPX = 500, 101325, 0.3
+
+        self.water.TPX = T, P, X
+        self.assertNear(X, 0.8)
+        with self.assertRaisesRegex(ValueError, 'invalid thermodynamic'):
+            self.water.TPX = T, .999*P, X
+        with self.assertRaisesRegex(ValueError, 'invalid thermodynamic'):
+            self.water.TPX = T, 1.001*P, X
+        with self.assertRaisesRegex(ValueError, 'numeric value is required'):
+            self.water.TPX = T, P, 'spam'
 
 
 # To minimize errors when transcribing tabulated data, the input units here are: