From 448d595ef32f709e62b0f8b26a779185b7bbdfef Mon Sep 17 00:00:00 2001 From: Jian-Yang Li Date: Tue, 20 Sep 2022 10:08:50 -0400 Subject: [PATCH 01/16] add `add_row` and `join` method definitions --- sbpy/data/core.py | 56 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/sbpy/data/core.py b/sbpy/data/core.py index 307841ba..0dae099a 100644 --- a/sbpy/data/core.py +++ b/sbpy/data/core.py @@ -934,3 +934,59 @@ def verify_fields(self, field=None): ): raise FieldError('Field {} does not have units of {}' .format(test_field, str(dim.unit))) + + def add_row(self, vals, names=None): + """Add a new row to the end of DataClass. + + This is similar to `astropy.table.Table.add_row`, but allows for + a set of different columns in the new row from the original DataClass + object. It also allows for aliases of column names. + + Parameters + ---------- + vals : tuple, list, dict or None + Use the specified values in the new row + mask : tuple, list, dict or None + Use the specified mask values in the new row + names : iterable of strings + The names of columns if not implicitly specified in `vals`. + Ignored if the column names are specified in `vals`. + + Examples + -------- + >>> import astropy.units as u + >>> from sbpy.data import DataClass + >>> + >>> data = DataClass.from_dict( + ... {'rh': [1, 2, 3] * u.au, 'delta': [1, 2, 3] * u.au}) + >>> row = {'rh': 4 * u.au, 'delta': 4 * u.au, 'phase': 15 * u.deg} + >>> data.add_row(row) + """ + pass + + def join(self, data): + """Join another DataClass object to the end of DataClass + + The DataClass object doesn't need to have the same set of columns + as the existing object. The original dataclass will be expanded + with new columns, and the cells with no values will be masked in + both the existing dataclass and the newly joined rows. + + Parameters + ---------- + data : `sbpy.data.DataClass` + Object to be joined with the current object + + Examples + -------- + >>> import astropy.units as u + >>> from sbpy.data import DataClass + >>> + >>> data1 = DataClass.from_dict( + ... {'rh': [1, 2, 3] * u.au, 'delta': [1, 2, 3] * u.au}) + >>> data2 = DataClass.from_dict( + {'rh': [4, 5] * u.au, 'phase': 15 * u.deg} + >>> data1.join(data2) + """ + pass + From 40be33b1963e57453ee2075a08661fdb4ebb8489 Mon Sep 17 00:00:00 2001 From: Jian-Yang Li Date: Tue, 20 Sep 2022 12:00:34 -0400 Subject: [PATCH 02/16] update `DataClass._translate_columns` Add `ignore_missing` keyword parameter --- sbpy/data/core.py | 23 +++++++++++++---------- sbpy/data/tests/test_dataclass.py | 3 +++ 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/sbpy/data/core.py b/sbpy/data/core.py index 0dae099a..dd1f06ee 100644 --- a/sbpy/data/core.py +++ b/sbpy/data/core.py @@ -661,12 +661,15 @@ def __contains__(self, value): else: return False - def _translate_columns(self, target_colnames): + def _translate_columns(self, target_colnames, ignore_missing=False): """Translate target_colnames to the corresponding column names present in this object's table. Returns a list of actual column names present in this object that corresponds to target_colnames - (order is preserved). Raises KeyError if not all columns are - present or one or more columns could not be translated. + (order is preserved). If `ignore_missing == False` (default), + raises a `KeyError` if a match cannot be found for an input column + name (neither in this object nor defined in `Conf.fieldnames`). + If `ignore_missing == True`, then the problemtic column name will + be silently carried ouver and returned. """ if not isinstance(target_colnames, (list, ndarray, tuple)): @@ -674,19 +677,19 @@ def _translate_columns(self, target_colnames): translated_colnames = deepcopy(target_colnames) for idx, colname in enumerate(target_colnames): - # colname is already a column name in self.table - if colname in self.field_names: - continue - # colname is an alternative column name - else: + if colname not in self.field_names: + # colname not already in self.table for alt in Conf.fieldnames[ Conf.fieldname_idx.get(colname, slice(0))]: + # defined in `Conf.fieldnames` if alt in self.field_names: translated_colnames[idx] = alt break else: - raise KeyError('field "{:s}" not available.'.format( - colname)) + # undefined colname + if not ignore_missing: + raise KeyError('field "{:s}" not available.'.format( + colname)) return translated_colnames diff --git a/sbpy/data/tests/test_dataclass.py b/sbpy/data/tests/test_dataclass.py index 28f86229..e77431b7 100644 --- a/sbpy/data/tests/test_dataclass.py +++ b/sbpy/data/tests/test_dataclass.py @@ -452,6 +452,9 @@ def test_translate_columns_and_contains(monkeypatch): tab._translate_columns(['x']) # undefined column name tab._translate_columns(['dd']) # defined column name but not in table + trans = tab._translate_columns(['x', 'dd'], ignore_missing=True) + assert trans == ['x', 'dd'] + assert 'aa' in tab assert 'bb' in tab assert 'zz' in tab From 661c828d1e9d524f89f9aae198299c6403595b8c Mon Sep 17 00:00:00 2001 From: Jian-Yang Li Date: Tue, 20 Sep 2022 14:49:22 -0400 Subject: [PATCH 03/16] implement `DataClass.join` --- sbpy/data/core.py | 25 ++++++++++--- sbpy/data/tests/test_dataclass.py | 59 +++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 5 deletions(-) diff --git a/sbpy/data/core.py b/sbpy/data/core.py index dd1f06ee..8c2c377c 100644 --- a/sbpy/data/core.py +++ b/sbpy/data/core.py @@ -9,7 +9,7 @@ from copy import deepcopy from numpy import ndarray, array, hstack, iterable -from astropy.table import QTable, Column +from astropy.table import QTable, Table, Column, vstack from astropy.time import Time from astropy.coordinates import Angle import astropy.units as u @@ -975,9 +975,11 @@ def join(self, data): with new columns, and the cells with no values will be masked in both the existing dataclass and the newly joined rows. + Joining will be in-place. + Parameters ---------- - data : `sbpy.data.DataClass` + data : `sbpy.data.DataClass`, dict, `astropy.table.Table` Object to be joined with the current object Examples @@ -988,8 +990,21 @@ def join(self, data): >>> data1 = DataClass.from_dict( ... {'rh': [1, 2, 3] * u.au, 'delta': [1, 2, 3] * u.au}) >>> data2 = DataClass.from_dict( - {'rh': [4, 5] * u.au, 'phase': 15 * u.deg} + ... {'rh': [4, 5] * u.au, 'phase': [15, 15] * u.deg}) >>> data1.join(data2) """ - pass - + # check and process input data + if isinstance(data, dict): + data = DataClass.from_dict(data) + elif isinstance(data, Table): + data = DataClass.from_table(data) + if not isinstance(data, DataClass): + raise ValueError('DataClass, dict, or astorpy.table.Table are ' + 'expected, but {} is received.'.format(type(data))) + + # adjust input column names for alises + alt = self._translate_columns(data.field_names, ignore_missing=True) + data.table.rename_columns(data.field_names, alt) + + # join with the input table + self.table = vstack([self.table, data.table], join_type='outer') diff --git a/sbpy/data/tests/test_dataclass.py b/sbpy/data/tests/test_dataclass.py index e77431b7..088160e2 100644 --- a/sbpy/data/tests/test_dataclass.py +++ b/sbpy/data/tests/test_dataclass.py @@ -605,3 +605,62 @@ def test_apply(): with pytest.raises(DataClassError): tab.apply([12.1, 12.5, 12.6, 99]*u.mag, name='V') # wrong size + + +def test_join(): + """test DataClass.join""" + tab = DataClass.from_columns([[2451223, 2451224, 2451226]*u.d, + [120.1, 121.3, 124.9]*u.deg, + [12.4, 12.2, 10.8]*u.deg], + names=('JD', 'RA', 'DEC')) + + # join a DataClass, same columns + assert isinstance(tab, DataClass) + tab.join(tab) + assert len(tab) == 6 + assert set(tab.field_names) == {'JD', 'RA', 'DEC'} + assert all(tab.table[:3] == tab.table[-3:]) + + # join a Table + delta_tab = tab.table + assert isinstance(delta_tab, QTable) + tab.join(delta_tab) + assert len(tab) == 12 + assert set(tab.field_names) == {'JD', 'RA', 'DEC'} + assert all(tab.table[:6] == tab.table[-6:]) + + # join a dict + delta_tab = dict(tab.table) + assert isinstance(delta_tab, dict) + tab.join(dict(delta_tab)) + assert len(tab) == 24 + assert set(tab.field_names) == {'JD', 'RA', 'DEC'} + assert all(tab.table[:6] == tab.table[-6:]) + + # join an unrecoganized object + with pytest.raises(ValueError): + tab.join([1, 2, 3]) + + # join a table with different sets of columns + tab = DataClass.from_columns([[2451223, 2451224, 2451226]*u.d, + [120.1, 121.3, 124.9]*u.deg, + [12.4, 12.2, 10.8]*u.deg], + names=('JD', 'RA', 'DEC')) + subtab = QTable([[1, 2, 3] * u.au, + [1, 2, 3] * u.au, + [20, 30, 40] * u.deg], + names=('r', 'delta', 'DEC')) + field0 = tab.field_names + tab.join(subtab) + assert len(tab) == 6 + assert set(field0).union(set(subtab.colnames)) == set(tab.field_names) + + # join a table that has a column using alternative names + subtab = QTable([[4, 5] * u.au, + [10, 20] * u.deg], + names=('rh', 'phase')) + field0 = tab.field_names + tab.join(subtab) + assert len(tab) == 8 + assert 'rh' not in tab.table.colnames + assert set(field0).union({'phase'}) == set(tab.field_names) From ed3e95abfead6161917cfce920e9b71fe764d6e3 Mon Sep 17 00:00:00 2001 From: Jian-Yang Li Date: Tue, 20 Sep 2022 19:34:14 -0400 Subject: [PATCH 04/16] implement `DataClass.add_row` --- sbpy/data/core.py | 30 ++++++++++++++++-------- sbpy/data/tests/test_dataclass.py | 38 ++++++++++++++++++++++++++++++- 2 files changed, 58 insertions(+), 10 deletions(-) diff --git a/sbpy/data/core.py b/sbpy/data/core.py index 8c2c377c..7a15cb5c 100644 --- a/sbpy/data/core.py +++ b/sbpy/data/core.py @@ -7,9 +7,10 @@ created on June 22, 2017 """ +from collections.abc import Mapping from copy import deepcopy from numpy import ndarray, array, hstack, iterable -from astropy.table import QTable, Table, Column, vstack +from astropy.table import QTable, Table, Column, Row, vstack from astropy.time import Time from astropy.coordinates import Angle import astropy.units as u @@ -947,13 +948,11 @@ def add_row(self, vals, names=None): Parameters ---------- - vals : tuple, list, dict or None - Use the specified values in the new row - mask : tuple, list, dict or None - Use the specified mask values in the new row + vals : `~astropy.table.Row`, tuple, list, dict + Row to be added names : iterable of strings - The names of columns if not implicitly specified in `vals`. - Ignored if the column names are specified in `vals`. + The names of columns if not implicitly specified in ``vals``. + Ignored if the column names are specified in ``vals``. Examples -------- @@ -965,7 +964,20 @@ def add_row(self, vals, names=None): >>> row = {'rh': 4 * u.au, 'delta': 4 * u.au, 'phase': 15 * u.deg} >>> data.add_row(row) """ - pass + if isinstance(vals, Row): + vals = DataClass.from_table(vals) + elif isinstance(vals, Mapping): + keys_list = list(vals.keys()) + vals_list = [vals[k] for k in keys_list] + vals = DataClass.from_rows(vals_list, keys_list) + else: + # assume it's an iterable that can be taken as columns + if names is None: + # if names of columns are not specified, default to the + # existing names and orders + names = self.field_names + vals = DataClass.from_rows(vals, names) + self.join(vals) def join(self, data): """Join another DataClass object to the end of DataClass @@ -979,7 +991,7 @@ def join(self, data): Parameters ---------- - data : `sbpy.data.DataClass`, dict, `astropy.table.Table` + data : `~sbpy.data.DataClass`, dict, `~astropy.table.Table` Object to be joined with the current object Examples diff --git a/sbpy/data/tests/test_dataclass.py b/sbpy/data/tests/test_dataclass.py index 088160e2..1f2c3014 100644 --- a/sbpy/data/tests/test_dataclass.py +++ b/sbpy/data/tests/test_dataclass.py @@ -6,7 +6,7 @@ from copy import deepcopy import astropy.units as u from astropy.coordinates import Angle -from astropy.table import QTable, Column +from astropy.table import QTable, Column, Row from astropy.time import Time from ..core import DataClass, Conf, DataClassError, FieldError @@ -607,6 +607,42 @@ def test_apply(): tab.apply([12.1, 12.5, 12.6, 99]*u.mag, name='V') # wrong size +def test_add_row(): + """test DataClass.add_row""" + tab = DataClass.from_columns([[2451223, 2451224, 2451226]*u.d, + [120.1, 121.3, 124.9]*u.deg, + [12.4, 12.2, 10.8]*u.deg], + names=('JD', 'RA', 'DEC')) + # add astropy Row + r = tab.table[0] + assert isinstance(r, Row) + tab.add_row(r) + assert len(tab) == 4 + assert tab.table[-1] == r + + # add a dict + r = {'JD': 2451228 * u.d, 'RA': 130 * u.deg, 'DEC': 8 * u.deg} + tab.add_row(r) + assert len(tab) == 5 + for k in r.keys(): + assert u.isclose(tab[-1][k], r[k]) + + # add an iterable that matches the existing columns + r = [2451130 * u.d, 135 * u.deg, 6 * u.deg] + tab.add_row(r) + assert len(tab) == 6 + for i, k in enumerate(tab.field_names): + assert u.isclose(tab[-1][k], r[i]) + + # add an iterable with specified column names + r = [2451132 * u.d, 140 * u.deg, 3 * u.au] + n = ['JD', 'RA', 'rh'] # adding a new column and missing an existing column + tab.add_row(r, n) + assert len(tab) == 7 + for i, k in enumerate(n): + assert u.isclose(tab[-1][k], r[i]) + + def test_join(): """test DataClass.join""" tab = DataClass.from_columns([[2451223, 2451224, 2451226]*u.d, From 171e73cb62dbd399cb53e337929a3aaed57ac02c Mon Sep 17 00:00:00 2001 From: Jian-Yang Li Date: Tue, 20 Sep 2022 22:43:13 -0400 Subject: [PATCH 05/16] improve `DataClass.add_row` - Add ``units`` keyword argument - Make sure ``names`` argument takes precedence - Better support for `Time` objects --- sbpy/data/core.py | 53 +++++++++++++++++++++++-------- sbpy/data/tests/test_dataclass.py | 32 +++++++++++++++---- 2 files changed, 66 insertions(+), 19 deletions(-) diff --git a/sbpy/data/core.py b/sbpy/data/core.py index 7a15cb5c..e8accc02 100644 --- a/sbpy/data/core.py +++ b/sbpy/data/core.py @@ -939,7 +939,7 @@ def verify_fields(self, field=None): raise FieldError('Field {} does not have units of {}' .format(test_field, str(dim.unit))) - def add_row(self, vals, names=None): + def add_row(self, vals, names=None, units=None): """Add a new row to the end of DataClass. This is similar to `astropy.table.Table.add_row`, but allows for @@ -950,9 +950,28 @@ def add_row(self, vals, names=None): ---------- vals : `~astropy.table.Row`, tuple, list, dict Row to be added - names : iterable of strings + names : iterable of strings, optional The names of columns if not implicitly specified in ``vals``. - Ignored if the column names are specified in ``vals``. + Takes precedence over the column names in ``vals`` if any. + units : str or list-like, optional + Unit labels (as provided by `~astropy.units.Unit`) in which + the data provided in ``rows`` will be stored in the underlying + table. If None, the units as provided by ``rows`` + are used. If the units provided in ``units`` differ from those + used in ``rows``, ``rows`` will be transformed to the units + provided in ``units``. Must have the same length as ``names`` + and the individual data rows in ``rows``. Default: None + + Notes + ----- + If a time is included in ``vals``, it can either be an explicit + `~astropy.time.Time` object, or a number, `~astropy.units.Quantity` + object, or string that can be inferred to be a time by the existing + column of the same name or by its position in the sequence. In + this case, the type of time values must be valid to initialize + an `~astropy.time.Time` object with format='jd' or 'isot', and + the scale of time is default to the scale of the corresponding + existing column of time. Examples -------- @@ -966,17 +985,25 @@ def add_row(self, vals, names=None): """ if isinstance(vals, Row): vals = DataClass.from_table(vals) - elif isinstance(vals, Mapping): - keys_list = list(vals.keys()) - vals_list = [vals[k] for k in keys_list] - vals = DataClass.from_rows(vals_list, keys_list) else: - # assume it's an iterable that can be taken as columns - if names is None: - # if names of columns are not specified, default to the - # existing names and orders - names = self.field_names - vals = DataClass.from_rows(vals, names) + if isinstance(vals, Mapping): + keys_list = list(vals.keys()) + vals_list = [vals[k] for k in keys_list] + vals = vals_list + if names is None: + names = keys_list + else: + # assume it's an iterable that can be taken as columns + if names is None: + # if names of columns are not specified, default to the + # existing names and orders + names = self.field_names + # check if any astropy Time columns + for i, k in enumerate(names): + if k in self and isinstance(self[k], Time): + vals[i] = Time(vals[i], scale=self[k].scale, + format='isot' if isinstance(vals[i], str) else 'jd') + vals = DataClass.from_rows(vals, names, units=units) self.join(vals) def join(self, data): diff --git a/sbpy/data/tests/test_dataclass.py b/sbpy/data/tests/test_dataclass.py index 1f2c3014..5914fe3f 100644 --- a/sbpy/data/tests/test_dataclass.py +++ b/sbpy/data/tests/test_dataclass.py @@ -609,7 +609,8 @@ def test_apply(): def test_add_row(): """test DataClass.add_row""" - tab = DataClass.from_columns([[2451223, 2451224, 2451226]*u.d, + tab = DataClass.from_columns([Time([2451223, 2451224, 2451226], + format='jd'), [120.1, 121.3, 124.9]*u.deg, [12.4, 12.2, 10.8]*u.deg], names=('JD', 'RA', 'DEC')) @@ -624,23 +625,42 @@ def test_add_row(): r = {'JD': 2451228 * u.d, 'RA': 130 * u.deg, 'DEC': 8 * u.deg} tab.add_row(r) assert len(tab) == 5 - for k in r.keys(): + assert all(tab[-1]['JD'] == Time(r['JD'], format='jd')) + for k in ['RA', 'DEC']: assert u.isclose(tab[-1][k], r[k]) # add an iterable that matches the existing columns r = [2451130 * u.d, 135 * u.deg, 6 * u.deg] tab.add_row(r) assert len(tab) == 6 - for i, k in enumerate(tab.field_names): - assert u.isclose(tab[-1][k], r[i]) + assert all(tab[-1]['JD'] == Time(r[0], format='jd')) + for i, k in enumerate(tab.field_names[1:]): + assert u.isclose(tab[-1][k], r[i+1]) # add an iterable with specified column names r = [2451132 * u.d, 140 * u.deg, 3 * u.au] n = ['JD', 'RA', 'rh'] # adding a new column and missing an existing column tab.add_row(r, n) assert len(tab) == 7 - for i, k in enumerate(n): - assert u.isclose(tab[-1][k], r[i]) + assert set(tab.field_names) == {'JD', 'RA', 'DEC', 'rh'} + assert all(tab[-1]['JD'] == Time(r[0], format='jd')) + for i, k in enumerate(n[1:]): + assert u.isclose(tab[-1][k], r[i+1]) + + # time represented by a string + r = ['1998-11-18', 120 * u.deg, 3 * u.au] + n = ['JD', 'RA', 'rh'] + tab.add_row(r, n) + assert len(tab) == 8 + assert all(tab[-1]['JD'] == r[0]) + + # specify different names from the Mapping object + r = {'JD': 2451228 * u.d, 'RA': 130 * u.deg, 'DEC': 8 * u.deg} + n = ['JD', 'RA', 'phase'] + tab.add_row(r, n) + assert len(tab) == 9 + assert set(tab.field_names) == {'JD', 'RA', 'DEC', 'rh', 'phase'} + assert u.isclose(tab[-1]['phase'], r['DEC']) def test_join(): From 24f6f9cc816bb7b8c444d8aff20681ac3307f712 Mon Sep 17 00:00:00 2001 From: Jian-Yang Li Date: Tue, 20 Sep 2022 22:52:04 -0400 Subject: [PATCH 06/16] add `DataClass.add_column` --- sbpy/data/core.py | 21 +++++++++++++++++++++ sbpy/data/tests/test_dataclass.py | 22 ++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/sbpy/data/core.py b/sbpy/data/core.py index e8accc02..341859d6 100644 --- a/sbpy/data/core.py +++ b/sbpy/data/core.py @@ -1006,6 +1006,27 @@ def add_row(self, vals, names=None, units=None): vals = DataClass.from_rows(vals, names, units=units) self.join(vals) + def add_column(self, col, name=None, unit=None): + """Add a new column + + col : `~astropy.table.Column` object, or sequence + The column to be added. Must have the same length as the + existing table. + name : array-like str, optional + Specify the name of added column. If not ``None``, then + takes precedence over ``col.name``. + unit : array-like `~astropy.units` objects or str, optional + Unit to be applied to the new column. If not ``None``, then + take precedence over ``col.unit``. + """ + if name is None: + name = getattr(col, 'name', '') + if unit is None: + unit = getattr(col, 'unit', None) + if name in self: + raise DataClassError('Column {} already exists.'.format(name)) + self.apply(col, name=name, unit=unit) + def join(self, data): """Join another DataClass object to the end of DataClass diff --git a/sbpy/data/tests/test_dataclass.py b/sbpy/data/tests/test_dataclass.py index 5914fe3f..3df2a915 100644 --- a/sbpy/data/tests/test_dataclass.py +++ b/sbpy/data/tests/test_dataclass.py @@ -663,6 +663,28 @@ def test_add_row(): assert u.isclose(tab[-1]['phase'], r['DEC']) +def test_add_column(): + """test DataClass.add_column""" + tab = DataClass.from_columns([[2451223, 2451224, 2451226]*u.d, + [120.1, 121.3, 124.9]*u.deg, + [12.4, 12.2, 10.8]*u.deg], + names=('JD', 'RA', 'DEC')) + filt = ['V', 'V', 'R'] + # add astropy Column + tab.add_column(Column(filt, name='filter')) + assert set(tab.field_names) == {'JD', 'RA', 'DEC', 'filter'} + assert all(tab['filter'] == filt) + + # add a sequence + tab.add_column(filt, name='filter1') + assert set(tab.field_names) == {'JD', 'RA', 'DEC', 'filter', 'filter1'} + assert all(tab['filter1'] == filt) + + # duplicated column + with pytest.raises(DataClassError): + tab.add_column(filt, name='filter') + + def test_join(): """test DataClass.join""" tab = DataClass.from_columns([[2451223, 2451224, 2451226]*u.d, From be3ff83804ad7a8ea72c867035ccf6c59aa56ce1 Mon Sep 17 00:00:00 2001 From: Jian-Yang Li Date: Tue, 20 Sep 2022 23:11:39 -0400 Subject: [PATCH 07/16] update documents - dataclass.rst --- docs/sbpy/data/dataclass.rst | 63 ++++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 21 deletions(-) diff --git a/docs/sbpy/data/dataclass.rst b/docs/sbpy/data/dataclass.rst index 78b8000c..5bdb6edc 100644 --- a/docs/sbpy/data/dataclass.rst +++ b/docs/sbpy/data/dataclass.rst @@ -411,21 +411,17 @@ directly addressing them: >>> obs['ra'] -More complex data table modifications are possible by directly -accessing the underlying `~astropy.table.QTable` object as shown below. - -`~sbpy.data.DataClass` provides a direct interface to the table -modification functions provided by `~astropy.table.Table`: -`~astropy.table.Table.add_row`, `~astropy.table.Table.add_column`, -`~astropy.table.Table.add_columns`, etc. For instance, it is trivial to add -additional rows and columns to these objects. +The basic functionalities to modify the data table are implemented in +`~sbpy.data.DataClass`, including adding rows and columns and join a +DataClass with another DataClass object or an `~astropy.table.Table` +object. Let's assume you want to add some more observations to your ``obs`` object: .. doctest-requires:: astropy>=5 - >>> obs.table.add_row([10.255460 * u.deg, -12.39460 * u.deg, 2451523.94653 * u.d]) + >>> obs.add_row([10.255460 * u.deg, -12.39460 * u.deg, 2451523.94653 * u.d]) >>> obs ra dec t @@ -442,13 +438,12 @@ or if you want to add a column to your object: .. doctest-requires:: astropy>=5 - >>> from astropy.table import Column - >>> obs.table.add_column(Column(['V', 'V', 'R', 'i'], name='filter')) + >>> obs.add_column(['V', 'V', 'R', 'i'], name='filter') >>> obs ra dec t filter deg deg - float64 float64 Time str1 + float64 float64 Time str32 --------- --------- ------------- ------ 10.323423 -12.42123 2451523.6234 V 10.333453 -12.41562 2451523.7345 V @@ -464,7 +459,7 @@ The same result can be achieved using the following syntax: ra dec t filter filter2 deg deg - float64 float64 Time str1 str1 + float64 float64 Time str32 str1 --------- --------- ------------- ------ ------- 10.323423 -12.42123 2451523.6234 V V 10.333453 -12.41562 2451523.7345 V V @@ -477,16 +472,42 @@ Similarly, existing columns can be modified using: >>> obs['filter'] = ['g', 'i', 'R', 'V'] -Note how the `~astropy.table.Table.add_column` and -`~astropy.table.Table.add_row` functions are called from -``obs.table``. `~sbpy.data.DataClass.table` is a property that exposes -the underlying `~astropy.table.QTable` object so that the user can -directly interact with it. Please refer to the `~astropy.table.Table` -reference and -[documentation](https://docs.astropy.org/en/stable/table/index.html) -for more information on how to modify `~astropy.table.QTable` objects. +If you want to join two observations into a single object: +.. doctest-requires:: astropy>=5 + >>> ra = [20.223423, 20.233453, 20.243452] * u.deg + >>> dec = [12.42123, 12.41562, 12.40435] * u.deg + >>> phase = [10.1, 12.3, 15.6] * u.deg + >>> epoch = Time(2451623.5 + array([0.1234, 0.2345, 0.3525]), format='jd') + >>> obs2 = Obs.from_columns([ra, dec, epoch, phase], + ... names=['ra', 'dec', 't', 'phase']) + >>> + >>> obs.join(obs2) + >>> obs + + ra dec t filter filter2 phase + deg deg deg + float64 float64 Time str1 str1 float64 + --------- --------- ------------- ------ ------- ------- + 10.323423 -12.42123 2451523.6234 g V ——— + 10.333453 -12.41562 2451523.7345 i V ——— + 10.343452 -12.40435 2451523.8525 R R ——— + 10.25546 -12.3946 2451523.94653 V i ——— + 20.223423 12.42123 2451623.6234 -- -- 10.1 + 20.233453 12.41562 2451623.7345 -- -- 12.3 + 20.243452 12.40435 2451623.8525 -- -- 15.6 + +Note that the data table to be joined doesn't have to have the same +columns as the original data table. The empty field will be automatically +masked out. + +Because the underlying `~astropy.table.QTable` can be exposed by the +`~sbpy.data.DataClass.table` property, it is possible to modify the data +table by directly accessing the underlying `~astropy.table.QTable` object. +However, this is not generally advised. You should use the mechanisms provided +by `~sbpy.data.DataClass` to manipulate the data table as much as possible +to maintain the integrity of the data table. Additional Data Container Concepts ================================== From 0d98fe9976d527d5489c625a7604ad6f6f65adfa Mon Sep 17 00:00:00 2001 From: Jian-Yang Li Date: Tue, 20 Sep 2022 23:19:40 -0400 Subject: [PATCH 08/16] PEP8 fixes --- sbpy/data/core.py | 6 ++++-- sbpy/data/tests/test_dataclass.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/sbpy/data/core.py b/sbpy/data/core.py index 341859d6..46c4d7db 100644 --- a/sbpy/data/core.py +++ b/sbpy/data/core.py @@ -1002,7 +1002,8 @@ def add_row(self, vals, names=None, units=None): for i, k in enumerate(names): if k in self and isinstance(self[k], Time): vals[i] = Time(vals[i], scale=self[k].scale, - format='isot' if isinstance(vals[i], str) else 'jd') + format='isot' if isinstance(vals[i], str) + else 'jd') vals = DataClass.from_rows(vals, names, units=units) self.join(vals) @@ -1060,7 +1061,8 @@ def join(self, data): data = DataClass.from_table(data) if not isinstance(data, DataClass): raise ValueError('DataClass, dict, or astorpy.table.Table are ' - 'expected, but {} is received.'.format(type(data))) + 'expected, but {} is received.'. + format(type(data))) # adjust input column names for alises alt = self._translate_columns(data.field_names, ignore_missing=True) diff --git a/sbpy/data/tests/test_dataclass.py b/sbpy/data/tests/test_dataclass.py index 3df2a915..02d6c47a 100644 --- a/sbpy/data/tests/test_dataclass.py +++ b/sbpy/data/tests/test_dataclass.py @@ -639,7 +639,7 @@ def test_add_row(): # add an iterable with specified column names r = [2451132 * u.d, 140 * u.deg, 3 * u.au] - n = ['JD', 'RA', 'rh'] # adding a new column and missing an existing column + n = ['JD', 'RA', 'rh'] # with a new column and missing an existing column tab.add_row(r, n) assert len(tab) == 7 assert set(tab.field_names) == {'JD', 'RA', 'DEC', 'rh'} From 2db8a227ad1f0730c63e6d49c241bf9a4dcec505 Mon Sep 17 00:00:00 2001 From: Jian-Yang Li Date: Tue, 20 Sep 2022 23:20:02 -0400 Subject: [PATCH 09/16] update changelog --- CHANGES.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index d7646ced..e72c93c0 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -22,6 +22,9 @@ sbpy.data - Added ``DataClass.__contains__`` to enable `in` operator for ``DataClass`` objects. [#357] +- Added ``DataClass.add_row``, ``DataClass.add_column``, ``DataClass.join`` + methods. [#xxx] + Bug Fixes --------- From 1127bfca63dc846227646ca88ac1271bcc86cb5a Mon Sep 17 00:00:00 2001 From: Jian-Yang Li Date: Tue, 20 Sep 2022 23:30:56 -0400 Subject: [PATCH 10/16] update changelog --- CHANGES.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index e72c93c0..9b0a5664 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -23,7 +23,7 @@ sbpy.data objects. [#357] - Added ``DataClass.add_row``, ``DataClass.add_column``, ``DataClass.join`` - methods. [#xxx] + methods. [#367] Bug Fixes From 5d00a5838c0479247e59683de4128f83eab33c96 Mon Sep 17 00:00:00 2001 From: Jian-Yang Li Date: Tue, 6 Jun 2023 09:49:19 -0400 Subject: [PATCH 11/16] remove `.add_column` --- sbpy/data/core.py | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/sbpy/data/core.py b/sbpy/data/core.py index 46c4d7db..dce40286 100644 --- a/sbpy/data/core.py +++ b/sbpy/data/core.py @@ -670,7 +670,7 @@ def _translate_columns(self, target_colnames, ignore_missing=False): raises a `KeyError` if a match cannot be found for an input column name (neither in this object nor defined in `Conf.fieldnames`). If `ignore_missing == True`, then the problemtic column name will - be silently carried ouver and returned. + be silently carried over and returned. """ if not isinstance(target_colnames, (list, ndarray, tuple)): @@ -1007,27 +1007,6 @@ def add_row(self, vals, names=None, units=None): vals = DataClass.from_rows(vals, names, units=units) self.join(vals) - def add_column(self, col, name=None, unit=None): - """Add a new column - - col : `~astropy.table.Column` object, or sequence - The column to be added. Must have the same length as the - existing table. - name : array-like str, optional - Specify the name of added column. If not ``None``, then - takes precedence over ``col.name``. - unit : array-like `~astropy.units` objects or str, optional - Unit to be applied to the new column. If not ``None``, then - take precedence over ``col.unit``. - """ - if name is None: - name = getattr(col, 'name', '') - if unit is None: - unit = getattr(col, 'unit', None) - if name in self: - raise DataClassError('Column {} already exists.'.format(name)) - self.apply(col, name=name, unit=unit) - def join(self, data): """Join another DataClass object to the end of DataClass From 0038b4ed1bf2f0f81154c780383031db7392cae9 Mon Sep 17 00:00:00 2001 From: Jian-Yang Li Date: Tue, 6 Jun 2023 10:04:40 -0400 Subject: [PATCH 12/16] rename `.join` to `.vstack` pass through `astropy.table.Table.vstack` keywords --- sbpy/data/core.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/sbpy/data/core.py b/sbpy/data/core.py index dce40286..62e8a9f1 100644 --- a/sbpy/data/core.py +++ b/sbpy/data/core.py @@ -1005,15 +1005,15 @@ def add_row(self, vals, names=None, units=None): format='isot' if isinstance(vals[i], str) else 'jd') vals = DataClass.from_rows(vals, names, units=units) - self.join(vals) + self.vstack(vals) - def join(self, data): - """Join another DataClass object to the end of DataClass + def vstack(self, data, **kwargs): + """Stack another DataClass object to the end of DataClass - The DataClass object doesn't need to have the same set of columns - as the existing object. The original dataclass will be expanded - with new columns, and the cells with no values will be masked in - both the existing dataclass and the newly joined rows. + Similar to `~astropy.table.Table.vstack`, the DataClass object + to be stacked doesn't have to have the same set of columns as + the existing object. The `join_type` keyword parameter will be + used to decide how to process the different sets of columns. Joining will be in-place. @@ -1021,6 +1021,8 @@ def join(self, data): ---------- data : `~sbpy.data.DataClass`, dict, `~astropy.table.Table` Object to be joined with the current object + kwargs : dict + Keyword parameters accepted by `~astropy.table.Table.vstack`. Examples -------- @@ -1031,7 +1033,7 @@ def join(self, data): ... {'rh': [1, 2, 3] * u.au, 'delta': [1, 2, 3] * u.au}) >>> data2 = DataClass.from_dict( ... {'rh': [4, 5] * u.au, 'phase': [15, 15] * u.deg}) - >>> data1.join(data2) + >>> data1.vstack(data2) """ # check and process input data if isinstance(data, dict): @@ -1048,4 +1050,4 @@ def join(self, data): data.table.rename_columns(data.field_names, alt) # join with the input table - self.table = vstack([self.table, data.table], join_type='outer') + self.table = vstack([self.table, data.table], **kwargs) From a2d64590e20ba1ea8d114ac26ce5f8240a9d1bcd Mon Sep 17 00:00:00 2001 From: Jian-Yang Li Date: Tue, 6 Jun 2023 10:04:50 -0400 Subject: [PATCH 13/16] update tests --- sbpy/data/tests/test_dataclass.py | 38 +++++++------------------------ 1 file changed, 8 insertions(+), 30 deletions(-) diff --git a/sbpy/data/tests/test_dataclass.py b/sbpy/data/tests/test_dataclass.py index 02d6c47a..cf43b41b 100644 --- a/sbpy/data/tests/test_dataclass.py +++ b/sbpy/data/tests/test_dataclass.py @@ -663,30 +663,8 @@ def test_add_row(): assert u.isclose(tab[-1]['phase'], r['DEC']) -def test_add_column(): - """test DataClass.add_column""" - tab = DataClass.from_columns([[2451223, 2451224, 2451226]*u.d, - [120.1, 121.3, 124.9]*u.deg, - [12.4, 12.2, 10.8]*u.deg], - names=('JD', 'RA', 'DEC')) - filt = ['V', 'V', 'R'] - # add astropy Column - tab.add_column(Column(filt, name='filter')) - assert set(tab.field_names) == {'JD', 'RA', 'DEC', 'filter'} - assert all(tab['filter'] == filt) - - # add a sequence - tab.add_column(filt, name='filter1') - assert set(tab.field_names) == {'JD', 'RA', 'DEC', 'filter', 'filter1'} - assert all(tab['filter1'] == filt) - - # duplicated column - with pytest.raises(DataClassError): - tab.add_column(filt, name='filter') - - -def test_join(): - """test DataClass.join""" +def test_vstack(): + """test DataClass.vstack""" tab = DataClass.from_columns([[2451223, 2451224, 2451226]*u.d, [120.1, 121.3, 124.9]*u.deg, [12.4, 12.2, 10.8]*u.deg], @@ -694,7 +672,7 @@ def test_join(): # join a DataClass, same columns assert isinstance(tab, DataClass) - tab.join(tab) + tab.vstack(tab) assert len(tab) == 6 assert set(tab.field_names) == {'JD', 'RA', 'DEC'} assert all(tab.table[:3] == tab.table[-3:]) @@ -702,7 +680,7 @@ def test_join(): # join a Table delta_tab = tab.table assert isinstance(delta_tab, QTable) - tab.join(delta_tab) + tab.vstack(delta_tab) assert len(tab) == 12 assert set(tab.field_names) == {'JD', 'RA', 'DEC'} assert all(tab.table[:6] == tab.table[-6:]) @@ -710,14 +688,14 @@ def test_join(): # join a dict delta_tab = dict(tab.table) assert isinstance(delta_tab, dict) - tab.join(dict(delta_tab)) + tab.vstack(dict(delta_tab)) assert len(tab) == 24 assert set(tab.field_names) == {'JD', 'RA', 'DEC'} assert all(tab.table[:6] == tab.table[-6:]) # join an unrecoganized object with pytest.raises(ValueError): - tab.join([1, 2, 3]) + tab.vstack([1, 2, 3]) # join a table with different sets of columns tab = DataClass.from_columns([[2451223, 2451224, 2451226]*u.d, @@ -729,7 +707,7 @@ def test_join(): [20, 30, 40] * u.deg], names=('r', 'delta', 'DEC')) field0 = tab.field_names - tab.join(subtab) + tab.vstack(subtab) assert len(tab) == 6 assert set(field0).union(set(subtab.colnames)) == set(tab.field_names) @@ -738,7 +716,7 @@ def test_join(): [10, 20] * u.deg], names=('rh', 'phase')) field0 = tab.field_names - tab.join(subtab) + tab.vstack(subtab) assert len(tab) == 8 assert 'rh' not in tab.table.colnames assert set(field0).union({'phase'}) == set(tab.field_names) From 69e93dc4cd43fe4c52d01aa95acb29c31af2df3b Mon Sep 17 00:00:00 2001 From: Jian-Yang Li Date: Tue, 6 Jun 2023 10:21:51 -0400 Subject: [PATCH 14/16] update documents --- docs/sbpy/data/dataclass.rst | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/docs/sbpy/data/dataclass.rst b/docs/sbpy/data/dataclass.rst index 5bdb6edc..9a354e45 100644 --- a/docs/sbpy/data/dataclass.rst +++ b/docs/sbpy/data/dataclass.rst @@ -314,8 +314,8 @@ object, you can use `~sbpy.data.DataClass.field_names`: ['ra', 'dec', 't'] You can also use the `in` operator to check if a field is contained in -a `~sbpy.data.DataClass` object. Alternative field names can also be -used for the `in` test: +a `~sbpy.data.DataClass` object. Alternative field names can be used +for the `in` test: >>> 'ra' in obs True @@ -412,7 +412,7 @@ directly addressing them: The basic functionalities to modify the data table are implemented in -`~sbpy.data.DataClass`, including adding rows and columns and join a +`~sbpy.data.DataClass`, including adding rows and columns and stack a DataClass with another DataClass object or an `~astropy.table.Table` object. @@ -438,7 +438,7 @@ or if you want to add a column to your object: .. doctest-requires:: astropy>=5 - >>> obs.add_column(['V', 'V', 'R', 'i'], name='filter') + >>> obs.apply(['V', 'V', 'R', 'i'], name='filter') >>> obs ra dec t filter @@ -472,7 +472,7 @@ Similarly, existing columns can be modified using: >>> obs['filter'] = ['g', 'i', 'R', 'V'] -If you want to join two observations into a single object: +If you want to stack two observations into a single object: .. doctest-requires:: astropy>=5 @@ -483,7 +483,7 @@ If you want to join two observations into a single object: >>> obs2 = Obs.from_columns([ra, dec, epoch, phase], ... names=['ra', 'dec', 't', 'phase']) >>> - >>> obs.join(obs2) + >>> obs.stack(obs2) >>> obs ra dec t filter filter2 phase @@ -498,9 +498,10 @@ If you want to join two observations into a single object: 20.233453 12.41562 2451623.7345 -- -- 12.3 20.243452 12.40435 2451623.8525 -- -- 15.6 -Note that the data table to be joined doesn't have to have the same -columns as the original data table. The empty field will be automatically -masked out. +Note that the data table to be stacked doesn't have to have the same +columns as the original data table. A keyword `join_type` is used to +decide how to process the different sets of columns. See +`~astropy.table.Table.vstack()` for more detail. Because the underlying `~astropy.table.QTable` can be exposed by the `~sbpy.data.DataClass.table` property, it is possible to modify the data From 1efb19bf637ffeabc62da90f8bb1f2e035733a76 Mon Sep 17 00:00:00 2001 From: Jian-Yang Li Date: Thu, 8 Jun 2023 15:04:32 -0400 Subject: [PATCH 15/16] fix typo in documents --- docs/sbpy/data/dataclass.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sbpy/data/dataclass.rst b/docs/sbpy/data/dataclass.rst index 9a354e45..193c4a4c 100644 --- a/docs/sbpy/data/dataclass.rst +++ b/docs/sbpy/data/dataclass.rst @@ -483,7 +483,7 @@ If you want to stack two observations into a single object: >>> obs2 = Obs.from_columns([ra, dec, epoch, phase], ... names=['ra', 'dec', 't', 'phase']) >>> - >>> obs.stack(obs2) + >>> obs.vstack(obs2) >>> obs ra dec t filter filter2 phase From 74d8edcf117f8a1924690542d739a974a00a2b29 Mon Sep 17 00:00:00 2001 From: Jian-Yang Li Date: Fri, 9 Jun 2023 11:53:55 -0400 Subject: [PATCH 16/16] fix changelog --- CHANGES.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 9b0a5664..2edcdf6a 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -22,7 +22,7 @@ sbpy.data - Added ``DataClass.__contains__`` to enable `in` operator for ``DataClass`` objects. [#357] -- Added ``DataClass.add_row``, ``DataClass.add_column``, ``DataClass.join`` +- Added ``DataClass.add_row``, ``DataClass.vstack`` methods. [#367]