Merge pull request #367 from jianyangli/add_dataclass_merge_202209

Enable combining `DataClass` objects
NASA-Planetary-Science · Jun 9, 2023 · 7647830 · 7647830
2 parents a6619c7 + 74d8edc
commit 7647830
Show file tree

Hide file tree

Showing 4 changed files with 295 additions and 35 deletions.
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -22,6 +22,9 @@ sbpy.data
 - Added ``DataClass.__contains__`` to enable `in` operator for ``DataClass``
   objects. [#357]
 
+- Added ``DataClass.add_row``, ``DataClass.vstack``
+  methods. [#367]
+
 
 sbpy.photometry
 ^^^^^^^^^^^^^^^

diff --git a/docs/sbpy/data/dataclass.rst b/docs/sbpy/data/dataclass.rst
@@ -314,8 +314,8 @@ object, you can use `~sbpy.data.DataClass.field_names`:
     ['ra', 'dec', 't']
 
 You can also use the `in` operator to check if a field is contained in
-a `~sbpy.data.DataClass` object.  Alternative field names can also be
-used for the `in` test:
+a `~sbpy.data.DataClass` object.  Alternative field names can be used
+for the `in` test:
 
     >>> 'ra' in obs
     True
@@ -411,21 +411,17 @@ directly addressing them:
     >>> obs['ra']
     <Quantity [10.323423, 10.333453, 10.343452] deg>
 
-More complex data table modifications are possible by directly
-accessing the underlying `~astropy.table.QTable` object as shown below.
-
-`~sbpy.data.DataClass` provides a direct interface to the table
-modification functions provided by `~astropy.table.Table`:
-`~astropy.table.Table.add_row`, `~astropy.table.Table.add_column`,
-`~astropy.table.Table.add_columns`, etc. For instance, it is trivial to add
-additional rows and columns to these objects.
+The basic functionalities to modify the data table are implemented in
+`~sbpy.data.DataClass`, including adding rows and columns and stack a
+DataClass with another DataClass object or an `~astropy.table.Table`
+object.
 
 Let's assume you want to add some more observations to your ``obs``
 object:
 
 .. doctest-requires:: astropy>=5
 
-    >>> obs.table.add_row([10.255460 * u.deg, -12.39460 * u.deg, 2451523.94653 * u.d])
+    >>> obs.add_row([10.255460 * u.deg, -12.39460 * u.deg, 2451523.94653 * u.d])
     >>> obs
     <QTable length=4>
         ra       dec          t      
@@ -442,13 +438,12 @@ or if you want to add a column to your object:
 
 .. doctest-requires:: astropy>=5
 
-    >>> from astropy.table import Column
-    >>> obs.table.add_column(Column(['V', 'V', 'R', 'i'], name='filter'))
+    >>> obs.apply(['V', 'V', 'R', 'i'], name='filter')
     >>> obs
     <QTable length=4>
         ra       dec          t       filter
        deg       deg                        
-     float64   float64       Time      str1 
+     float64   float64       Time     str32
     --------- --------- ------------- ------
     10.323423 -12.42123  2451523.6234      V
     10.333453 -12.41562  2451523.7345      V
@@ -464,7 +459,7 @@ The same result can be achieved using the following syntax:
     <QTable length=4>
         ra       dec          t       filter filter2
        deg       deg                                
-     float64   float64       Time      str1    str1 
+     float64   float64       Time     str32    str1
     --------- --------- ------------- ------ -------
     10.323423 -12.42123  2451523.6234      V       V
     10.333453 -12.41562  2451523.7345      V       V
@@ -477,16 +472,43 @@ Similarly, existing columns can be modified using:
 
     >>> obs['filter'] = ['g', 'i', 'R', 'V']
 
-Note how the `~astropy.table.Table.add_column` and
-`~astropy.table.Table.add_row` functions are called from
-``obs.table``. `~sbpy.data.DataClass.table` is a property that exposes
-the underlying `~astropy.table.QTable` object so that the user can
-directly interact with it. Please refer to the `~astropy.table.Table`
-reference and
-[documentation](https://docs.astropy.org/en/stable/table/index.html)
-for more information on how to modify `~astropy.table.QTable` objects.
+If you want to stack two observations into a single object:
 
+.. doctest-requires:: astropy>=5
 
+    >>> ra = [20.223423, 20.233453, 20.243452] * u.deg
+    >>> dec = [12.42123, 12.41562, 12.40435] * u.deg
+    >>> phase = [10.1, 12.3, 15.6] * u.deg
+    >>> epoch = Time(2451623.5 + array([0.1234, 0.2345, 0.3525]), format='jd')
+    >>> obs2 = Obs.from_columns([ra, dec, epoch, phase],
+    ...     names=['ra', 'dec', 't', 'phase'])
+    >>>
+    >>> obs.vstack(obs2)
+    >>> obs
+    <QTable length=7>
+        ra       dec          t       filter filter2  phase
+       deg       deg                                   deg
+     float64   float64       Time      str1    str1  float64
+    --------- --------- ------------- ------ ------- -------
+    10.323423 -12.42123  2451523.6234      g       V     ———
+    10.333453 -12.41562  2451523.7345      i       V     ———
+    10.343452 -12.40435  2451523.8525      R       R     ———
+     10.25546  -12.3946 2451523.94653      V       i     ———
+    20.223423  12.42123  2451623.6234     --      --    10.1
+    20.233453  12.41562  2451623.7345     --      --    12.3
+    20.243452  12.40435  2451623.8525     --      --    15.6
+
+Note that the data table to be stacked doesn't have to have the same
+columns as the original data table.  A keyword `join_type` is used to
+decide how to process the different sets of columns.  See
+`~astropy.table.Table.vstack()` for more detail.
+
+Because the underlying `~astropy.table.QTable` can be exposed by the
+`~sbpy.data.DataClass.table` property, it is possible to modify the data
+table by directly accessing the underlying `~astropy.table.QTable` object.
+However, this is not generally advised.  You should use the mechanisms provided
+by `~sbpy.data.DataClass` to manipulate the data table as much as possible
+to maintain the integrity of the data table.
 
 Additional Data Container Concepts
 ==================================

diff --git a/sbpy/data/core.py b/sbpy/data/core.py
@@ -7,9 +7,10 @@
 created on June 22, 2017
 """
 
+from collections.abc import Mapping
 from copy import deepcopy
 from numpy import ndarray, array, hstack, iterable
-from astropy.table import QTable, Column
+from astropy.table import QTable, Table, Column, Row, vstack
 from astropy.time import Time
 from astropy.coordinates import Angle
 import astropy.units as u
@@ -661,32 +662,35 @@ def __contains__(self, value):
         else:
             return False
 
-    def _translate_columns(self, target_colnames):
+    def _translate_columns(self, target_colnames, ignore_missing=False):
         """Translate target_colnames to the corresponding column names
         present in this object's table. Returns a list of actual column
         names present in this object that corresponds to target_colnames
-        (order is preserved). Raises KeyError if not all columns are
-        present or one or more columns could not be translated.
+        (order is preserved). If `ignore_missing == False` (default),
+        raises a `KeyError` if a match cannot be found for an input column
+        name (neither in this object nor defined in `Conf.fieldnames`).
+        If `ignore_missing == True`, then the problemtic column name will
+        be silently carried over and returned.
         """
 
         if not isinstance(target_colnames, (list, ndarray, tuple)):
             target_colnames = [target_colnames]
 
         translated_colnames = deepcopy(target_colnames)
         for idx, colname in enumerate(target_colnames):
-            # colname is already a column name in self.table
-            if colname in self.field_names:
-                continue
-            # colname is an alternative column name
-            else:
+            if colname not in self.field_names:
+                # colname not already in self.table
                 for alt in Conf.fieldnames[
                             Conf.fieldname_idx.get(colname, slice(0))]:
+                    # defined in `Conf.fieldnames`
                     if alt in self.field_names:
                         translated_colnames[idx] = alt
                         break
                 else:
-                    raise KeyError('field "{:s}" not available.'.format(
-                        colname))
+                    # undefined colname
+                    if not ignore_missing:
+                        raise KeyError('field "{:s}" not available.'.format(
+                            colname))
 
         return translated_colnames
 
@@ -934,3 +938,116 @@ def verify_fields(self, field=None):
                 ):
                     raise FieldError('Field {} does not have units of {}'
                                      .format(test_field, str(dim.unit)))
+
+    def add_row(self, vals, names=None, units=None):
+        """Add a new row to the end of DataClass.
+
+        This is similar to `astropy.table.Table.add_row`, but allows for
+        a set of different columns in the new row from the original DataClass
+        object.  It also allows for aliases of column names.
+
+        Parameters
+        ----------
+        vals : `~astropy.table.Row`, tuple, list, dict
+            Row to be added
+        names : iterable of strings, optional
+            The names of columns if not implicitly specified in ``vals``.
+            Takes precedence over the column names in ``vals`` if any.
+        units : str or list-like, optional
+            Unit labels (as provided by `~astropy.units.Unit`) in which
+            the data provided in ``rows`` will be stored in the underlying
+            table. If None, the units as provided by ``rows``
+            are used. If the units provided in ``units`` differ from those
+            used in ``rows``, ``rows`` will be transformed to the units
+            provided in ``units``. Must have the same length as ``names``
+            and the individual data rows in ``rows``. Default: None
+
+        Notes
+        -----
+        If a time is included in ``vals``, it can either be an explicit
+        `~astropy.time.Time` object, or a number, `~astropy.units.Quantity`
+        object, or string that can be inferred to be a time by the existing
+        column of the same name or by its position in the sequence.  In
+        this case, the type of time values must be valid to initialize
+        an `~astropy.time.Time` object with format='jd' or 'isot', and
+        the scale of time is default to the scale of the corresponding
+        existing column of time.
+
+        Examples
+        --------
+        >>> import astropy.units as u
+        >>> from sbpy.data import DataClass
+        >>>
+        >>> data = DataClass.from_dict(
+        ...         {'rh': [1, 2, 3] * u.au, 'delta': [1, 2, 3] * u.au})
+        >>> row = {'rh': 4 * u.au, 'delta': 4 * u.au, 'phase': 15 * u.deg}
+        >>> data.add_row(row)
+        """
+        if isinstance(vals, Row):
+            vals = DataClass.from_table(vals)
+        else:
+            if isinstance(vals, Mapping):
+                keys_list = list(vals.keys())
+                vals_list = [vals[k] for k in keys_list]
+                vals = vals_list
+                if names is None:
+                    names = keys_list
+            else:
+                # assume it's an iterable that can be taken as columns
+                if names is None:
+                    # if names of columns are not specified, default to the
+                    # existing names and orders
+                    names = self.field_names
+            # check if any astropy Time columns
+            for i, k in enumerate(names):
+                if k in self and isinstance(self[k], Time):
+                    vals[i] = Time(vals[i], scale=self[k].scale,
+                                   format='isot' if isinstance(vals[i], str)
+                                   else 'jd')
+            vals = DataClass.from_rows(vals, names, units=units)
+        self.vstack(vals)
+
+    def vstack(self, data, **kwargs):
+        """Stack another DataClass object to the end of DataClass
+
+        Similar to `~astropy.table.Table.vstack`, the DataClass object
+        to be stacked doesn't have to have the same set of columns as
+        the existing object.  The `join_type` keyword parameter will be
+        used to decide how to process the different sets of columns.
+
+        Joining will be in-place.
+
+        Parameters
+        ----------
+        data : `~sbpy.data.DataClass`, dict, `~astropy.table.Table`
+            Object to be joined with the current object
+        kwargs : dict
+            Keyword parameters accepted by `~astropy.table.Table.vstack`.
+
+        Examples
+        --------
+        >>> import astropy.units as u
+        >>> from sbpy.data import DataClass
+        >>>
+        >>> data1 = DataClass.from_dict(
+        ...         {'rh': [1, 2, 3] * u.au, 'delta': [1, 2, 3] * u.au})
+        >>> data2 = DataClass.from_dict(
+        ...         {'rh': [4, 5] * u.au, 'phase': [15, 15] * u.deg})
+        >>> data1.vstack(data2)
+        """
+        # check and process input data
+        if isinstance(data, dict):
+            data = DataClass.from_dict(data)
+        elif isinstance(data, Table):
+            data = DataClass.from_table(data)
+        if not isinstance(data, DataClass):
+            raise ValueError('DataClass, dict, or astorpy.table.Table are '
+                             'expected, but {} is received.'.
+                             format(type(data)))
+
+        # adjust input column names for alises
+        alt = self._translate_columns(data.field_names, ignore_missing=True)
+        data.table.rename_columns(data.field_names, alt)
+
+        # join with the input table
+        self.table = vstack([self.table, data.table], **kwargs)