diff --git a/CHANGES.rst b/CHANGES.rst index aa84975d..6ad9a60a 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -22,6 +22,9 @@ sbpy.data - Added ``DataClass.__contains__`` to enable `in` operator for ``DataClass`` objects. [#357] +- Added ``DataClass.add_row``, ``DataClass.vstack`` + methods. [#367] + sbpy.photometry ^^^^^^^^^^^^^^^ diff --git a/docs/sbpy/data/dataclass.rst b/docs/sbpy/data/dataclass.rst index 78b8000c..193c4a4c 100644 --- a/docs/sbpy/data/dataclass.rst +++ b/docs/sbpy/data/dataclass.rst @@ -314,8 +314,8 @@ object, you can use `~sbpy.data.DataClass.field_names`: ['ra', 'dec', 't'] You can also use the `in` operator to check if a field is contained in -a `~sbpy.data.DataClass` object. Alternative field names can also be -used for the `in` test: +a `~sbpy.data.DataClass` object. Alternative field names can be used +for the `in` test: >>> 'ra' in obs True @@ -411,21 +411,17 @@ directly addressing them: >>> obs['ra'] -More complex data table modifications are possible by directly -accessing the underlying `~astropy.table.QTable` object as shown below. - -`~sbpy.data.DataClass` provides a direct interface to the table -modification functions provided by `~astropy.table.Table`: -`~astropy.table.Table.add_row`, `~astropy.table.Table.add_column`, -`~astropy.table.Table.add_columns`, etc. For instance, it is trivial to add -additional rows and columns to these objects. +The basic functionalities to modify the data table are implemented in +`~sbpy.data.DataClass`, including adding rows and columns and stack a +DataClass with another DataClass object or an `~astropy.table.Table` +object. Let's assume you want to add some more observations to your ``obs`` object: .. doctest-requires:: astropy>=5 - >>> obs.table.add_row([10.255460 * u.deg, -12.39460 * u.deg, 2451523.94653 * u.d]) + >>> obs.add_row([10.255460 * u.deg, -12.39460 * u.deg, 2451523.94653 * u.d]) >>> obs ra dec t @@ -442,13 +438,12 @@ or if you want to add a column to your object: .. doctest-requires:: astropy>=5 - >>> from astropy.table import Column - >>> obs.table.add_column(Column(['V', 'V', 'R', 'i'], name='filter')) + >>> obs.apply(['V', 'V', 'R', 'i'], name='filter') >>> obs ra dec t filter deg deg - float64 float64 Time str1 + float64 float64 Time str32 --------- --------- ------------- ------ 10.323423 -12.42123 2451523.6234 V 10.333453 -12.41562 2451523.7345 V @@ -464,7 +459,7 @@ The same result can be achieved using the following syntax: ra dec t filter filter2 deg deg - float64 float64 Time str1 str1 + float64 float64 Time str32 str1 --------- --------- ------------- ------ ------- 10.323423 -12.42123 2451523.6234 V V 10.333453 -12.41562 2451523.7345 V V @@ -477,16 +472,43 @@ Similarly, existing columns can be modified using: >>> obs['filter'] = ['g', 'i', 'R', 'V'] -Note how the `~astropy.table.Table.add_column` and -`~astropy.table.Table.add_row` functions are called from -``obs.table``. `~sbpy.data.DataClass.table` is a property that exposes -the underlying `~astropy.table.QTable` object so that the user can -directly interact with it. Please refer to the `~astropy.table.Table` -reference and -[documentation](https://docs.astropy.org/en/stable/table/index.html) -for more information on how to modify `~astropy.table.QTable` objects. +If you want to stack two observations into a single object: +.. doctest-requires:: astropy>=5 + >>> ra = [20.223423, 20.233453, 20.243452] * u.deg + >>> dec = [12.42123, 12.41562, 12.40435] * u.deg + >>> phase = [10.1, 12.3, 15.6] * u.deg + >>> epoch = Time(2451623.5 + array([0.1234, 0.2345, 0.3525]), format='jd') + >>> obs2 = Obs.from_columns([ra, dec, epoch, phase], + ... names=['ra', 'dec', 't', 'phase']) + >>> + >>> obs.vstack(obs2) + >>> obs + + ra dec t filter filter2 phase + deg deg deg + float64 float64 Time str1 str1 float64 + --------- --------- ------------- ------ ------- ------- + 10.323423 -12.42123 2451523.6234 g V ——— + 10.333453 -12.41562 2451523.7345 i V ——— + 10.343452 -12.40435 2451523.8525 R R ——— + 10.25546 -12.3946 2451523.94653 V i ——— + 20.223423 12.42123 2451623.6234 -- -- 10.1 + 20.233453 12.41562 2451623.7345 -- -- 12.3 + 20.243452 12.40435 2451623.8525 -- -- 15.6 + +Note that the data table to be stacked doesn't have to have the same +columns as the original data table. A keyword `join_type` is used to +decide how to process the different sets of columns. See +`~astropy.table.Table.vstack()` for more detail. + +Because the underlying `~astropy.table.QTable` can be exposed by the +`~sbpy.data.DataClass.table` property, it is possible to modify the data +table by directly accessing the underlying `~astropy.table.QTable` object. +However, this is not generally advised. You should use the mechanisms provided +by `~sbpy.data.DataClass` to manipulate the data table as much as possible +to maintain the integrity of the data table. Additional Data Container Concepts ================================== diff --git a/sbpy/data/core.py b/sbpy/data/core.py index 307841ba..62e8a9f1 100644 --- a/sbpy/data/core.py +++ b/sbpy/data/core.py @@ -7,9 +7,10 @@ created on June 22, 2017 """ +from collections.abc import Mapping from copy import deepcopy from numpy import ndarray, array, hstack, iterable -from astropy.table import QTable, Column +from astropy.table import QTable, Table, Column, Row, vstack from astropy.time import Time from astropy.coordinates import Angle import astropy.units as u @@ -661,12 +662,15 @@ def __contains__(self, value): else: return False - def _translate_columns(self, target_colnames): + def _translate_columns(self, target_colnames, ignore_missing=False): """Translate target_colnames to the corresponding column names present in this object's table. Returns a list of actual column names present in this object that corresponds to target_colnames - (order is preserved). Raises KeyError if not all columns are - present or one or more columns could not be translated. + (order is preserved). If `ignore_missing == False` (default), + raises a `KeyError` if a match cannot be found for an input column + name (neither in this object nor defined in `Conf.fieldnames`). + If `ignore_missing == True`, then the problemtic column name will + be silently carried over and returned. """ if not isinstance(target_colnames, (list, ndarray, tuple)): @@ -674,19 +678,19 @@ def _translate_columns(self, target_colnames): translated_colnames = deepcopy(target_colnames) for idx, colname in enumerate(target_colnames): - # colname is already a column name in self.table - if colname in self.field_names: - continue - # colname is an alternative column name - else: + if colname not in self.field_names: + # colname not already in self.table for alt in Conf.fieldnames[ Conf.fieldname_idx.get(colname, slice(0))]: + # defined in `Conf.fieldnames` if alt in self.field_names: translated_colnames[idx] = alt break else: - raise KeyError('field "{:s}" not available.'.format( - colname)) + # undefined colname + if not ignore_missing: + raise KeyError('field "{:s}" not available.'.format( + colname)) return translated_colnames @@ -934,3 +938,116 @@ def verify_fields(self, field=None): ): raise FieldError('Field {} does not have units of {}' .format(test_field, str(dim.unit))) + + def add_row(self, vals, names=None, units=None): + """Add a new row to the end of DataClass. + + This is similar to `astropy.table.Table.add_row`, but allows for + a set of different columns in the new row from the original DataClass + object. It also allows for aliases of column names. + + Parameters + ---------- + vals : `~astropy.table.Row`, tuple, list, dict + Row to be added + names : iterable of strings, optional + The names of columns if not implicitly specified in ``vals``. + Takes precedence over the column names in ``vals`` if any. + units : str or list-like, optional + Unit labels (as provided by `~astropy.units.Unit`) in which + the data provided in ``rows`` will be stored in the underlying + table. If None, the units as provided by ``rows`` + are used. If the units provided in ``units`` differ from those + used in ``rows``, ``rows`` will be transformed to the units + provided in ``units``. Must have the same length as ``names`` + and the individual data rows in ``rows``. Default: None + + Notes + ----- + If a time is included in ``vals``, it can either be an explicit + `~astropy.time.Time` object, or a number, `~astropy.units.Quantity` + object, or string that can be inferred to be a time by the existing + column of the same name or by its position in the sequence. In + this case, the type of time values must be valid to initialize + an `~astropy.time.Time` object with format='jd' or 'isot', and + the scale of time is default to the scale of the corresponding + existing column of time. + + Examples + -------- + >>> import astropy.units as u + >>> from sbpy.data import DataClass + >>> + >>> data = DataClass.from_dict( + ... {'rh': [1, 2, 3] * u.au, 'delta': [1, 2, 3] * u.au}) + >>> row = {'rh': 4 * u.au, 'delta': 4 * u.au, 'phase': 15 * u.deg} + >>> data.add_row(row) + """ + if isinstance(vals, Row): + vals = DataClass.from_table(vals) + else: + if isinstance(vals, Mapping): + keys_list = list(vals.keys()) + vals_list = [vals[k] for k in keys_list] + vals = vals_list + if names is None: + names = keys_list + else: + # assume it's an iterable that can be taken as columns + if names is None: + # if names of columns are not specified, default to the + # existing names and orders + names = self.field_names + # check if any astropy Time columns + for i, k in enumerate(names): + if k in self and isinstance(self[k], Time): + vals[i] = Time(vals[i], scale=self[k].scale, + format='isot' if isinstance(vals[i], str) + else 'jd') + vals = DataClass.from_rows(vals, names, units=units) + self.vstack(vals) + + def vstack(self, data, **kwargs): + """Stack another DataClass object to the end of DataClass + + Similar to `~astropy.table.Table.vstack`, the DataClass object + to be stacked doesn't have to have the same set of columns as + the existing object. The `join_type` keyword parameter will be + used to decide how to process the different sets of columns. + + Joining will be in-place. + + Parameters + ---------- + data : `~sbpy.data.DataClass`, dict, `~astropy.table.Table` + Object to be joined with the current object + kwargs : dict + Keyword parameters accepted by `~astropy.table.Table.vstack`. + + Examples + -------- + >>> import astropy.units as u + >>> from sbpy.data import DataClass + >>> + >>> data1 = DataClass.from_dict( + ... {'rh': [1, 2, 3] * u.au, 'delta': [1, 2, 3] * u.au}) + >>> data2 = DataClass.from_dict( + ... {'rh': [4, 5] * u.au, 'phase': [15, 15] * u.deg}) + >>> data1.vstack(data2) + """ + # check and process input data + if isinstance(data, dict): + data = DataClass.from_dict(data) + elif isinstance(data, Table): + data = DataClass.from_table(data) + if not isinstance(data, DataClass): + raise ValueError('DataClass, dict, or astorpy.table.Table are ' + 'expected, but {} is received.'. + format(type(data))) + + # adjust input column names for alises + alt = self._translate_columns(data.field_names, ignore_missing=True) + data.table.rename_columns(data.field_names, alt) + + # join with the input table + self.table = vstack([self.table, data.table], **kwargs) diff --git a/sbpy/data/tests/test_dataclass.py b/sbpy/data/tests/test_dataclass.py index 28f86229..cf43b41b 100644 --- a/sbpy/data/tests/test_dataclass.py +++ b/sbpy/data/tests/test_dataclass.py @@ -6,7 +6,7 @@ from copy import deepcopy import astropy.units as u from astropy.coordinates import Angle -from astropy.table import QTable, Column +from astropy.table import QTable, Column, Row from astropy.time import Time from ..core import DataClass, Conf, DataClassError, FieldError @@ -452,6 +452,9 @@ def test_translate_columns_and_contains(monkeypatch): tab._translate_columns(['x']) # undefined column name tab._translate_columns(['dd']) # defined column name but not in table + trans = tab._translate_columns(['x', 'dd'], ignore_missing=True) + assert trans == ['x', 'dd'] + assert 'aa' in tab assert 'bb' in tab assert 'zz' in tab @@ -602,3 +605,118 @@ def test_apply(): with pytest.raises(DataClassError): tab.apply([12.1, 12.5, 12.6, 99]*u.mag, name='V') # wrong size + + +def test_add_row(): + """test DataClass.add_row""" + tab = DataClass.from_columns([Time([2451223, 2451224, 2451226], + format='jd'), + [120.1, 121.3, 124.9]*u.deg, + [12.4, 12.2, 10.8]*u.deg], + names=('JD', 'RA', 'DEC')) + # add astropy Row + r = tab.table[0] + assert isinstance(r, Row) + tab.add_row(r) + assert len(tab) == 4 + assert tab.table[-1] == r + + # add a dict + r = {'JD': 2451228 * u.d, 'RA': 130 * u.deg, 'DEC': 8 * u.deg} + tab.add_row(r) + assert len(tab) == 5 + assert all(tab[-1]['JD'] == Time(r['JD'], format='jd')) + for k in ['RA', 'DEC']: + assert u.isclose(tab[-1][k], r[k]) + + # add an iterable that matches the existing columns + r = [2451130 * u.d, 135 * u.deg, 6 * u.deg] + tab.add_row(r) + assert len(tab) == 6 + assert all(tab[-1]['JD'] == Time(r[0], format='jd')) + for i, k in enumerate(tab.field_names[1:]): + assert u.isclose(tab[-1][k], r[i+1]) + + # add an iterable with specified column names + r = [2451132 * u.d, 140 * u.deg, 3 * u.au] + n = ['JD', 'RA', 'rh'] # with a new column and missing an existing column + tab.add_row(r, n) + assert len(tab) == 7 + assert set(tab.field_names) == {'JD', 'RA', 'DEC', 'rh'} + assert all(tab[-1]['JD'] == Time(r[0], format='jd')) + for i, k in enumerate(n[1:]): + assert u.isclose(tab[-1][k], r[i+1]) + + # time represented by a string + r = ['1998-11-18', 120 * u.deg, 3 * u.au] + n = ['JD', 'RA', 'rh'] + tab.add_row(r, n) + assert len(tab) == 8 + assert all(tab[-1]['JD'] == r[0]) + + # specify different names from the Mapping object + r = {'JD': 2451228 * u.d, 'RA': 130 * u.deg, 'DEC': 8 * u.deg} + n = ['JD', 'RA', 'phase'] + tab.add_row(r, n) + assert len(tab) == 9 + assert set(tab.field_names) == {'JD', 'RA', 'DEC', 'rh', 'phase'} + assert u.isclose(tab[-1]['phase'], r['DEC']) + + +def test_vstack(): + """test DataClass.vstack""" + tab = DataClass.from_columns([[2451223, 2451224, 2451226]*u.d, + [120.1, 121.3, 124.9]*u.deg, + [12.4, 12.2, 10.8]*u.deg], + names=('JD', 'RA', 'DEC')) + + # join a DataClass, same columns + assert isinstance(tab, DataClass) + tab.vstack(tab) + assert len(tab) == 6 + assert set(tab.field_names) == {'JD', 'RA', 'DEC'} + assert all(tab.table[:3] == tab.table[-3:]) + + # join a Table + delta_tab = tab.table + assert isinstance(delta_tab, QTable) + tab.vstack(delta_tab) + assert len(tab) == 12 + assert set(tab.field_names) == {'JD', 'RA', 'DEC'} + assert all(tab.table[:6] == tab.table[-6:]) + + # join a dict + delta_tab = dict(tab.table) + assert isinstance(delta_tab, dict) + tab.vstack(dict(delta_tab)) + assert len(tab) == 24 + assert set(tab.field_names) == {'JD', 'RA', 'DEC'} + assert all(tab.table[:6] == tab.table[-6:]) + + # join an unrecoganized object + with pytest.raises(ValueError): + tab.vstack([1, 2, 3]) + + # join a table with different sets of columns + tab = DataClass.from_columns([[2451223, 2451224, 2451226]*u.d, + [120.1, 121.3, 124.9]*u.deg, + [12.4, 12.2, 10.8]*u.deg], + names=('JD', 'RA', 'DEC')) + subtab = QTable([[1, 2, 3] * u.au, + [1, 2, 3] * u.au, + [20, 30, 40] * u.deg], + names=('r', 'delta', 'DEC')) + field0 = tab.field_names + tab.vstack(subtab) + assert len(tab) == 6 + assert set(field0).union(set(subtab.colnames)) == set(tab.field_names) + + # join a table that has a column using alternative names + subtab = QTable([[4, 5] * u.au, + [10, 20] * u.deg], + names=('rh', 'phase')) + field0 = tab.field_names + tab.vstack(subtab) + assert len(tab) == 8 + assert 'rh' not in tab.table.colnames + assert set(field0).union({'phase'}) == set(tab.field_names)