From 1fa9dcedf6560933462eb740e7242c890c5e13f6 Mon Sep 17 00:00:00 2001 From: Jaladh Singhal Date: Fri, 4 Dec 2020 16:37:27 +0530 Subject: [PATCH 1/6] Made scalars overwrite & added option to control overwriting --- tardis/io/util.py | 55 +++++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/tardis/io/util.py b/tardis/io/util.py index 1b42d7eac1d..13903337675 100644 --- a/tardis/io/util.py +++ b/tardis/io/util.py @@ -36,12 +36,12 @@ def get_internal_data_path(fname): def quantity_from_str(text): """ Convert a string to `astropy.units.Quantity` - + Parameters ---------- text : The string to convert to `astropy.units.Quantity` - + Returns ------- `astropy.units.Quantity` @@ -76,7 +76,7 @@ def match(self, text): ---------- text : A string to be passed to `target_type` for conversion. - + Returns ------- `True` if `text` can be converted to `target_type`. @@ -196,7 +196,9 @@ def __new__(cls, *args, **kwargs): return instance @staticmethod - def to_hdf_util(path_or_buf, path, elements, complevel=9, complib="blosc"): + def to_hdf_util( + path_or_buf, path, elements, overwrite, complevel=9, complib="blosc" + ): """ A function to uniformly store TARDIS data to an HDF file. @@ -220,21 +222,28 @@ def to_hdf_util(path_or_buf, path, elements, complevel=9, complib="blosc"): Returns ------- """ - we_opened = False + buf_opened = False - try: + try: # when path_or_buf is a str, the HDFStore should get created buf = pd.HDFStore(path_or_buf, complevel=complevel, complib=complib) - except TypeError as e: # Already a HDFStore + except TypeError as e: if e.message == "Expected bytes, got HDFStore": + # when path_or_buf is an HDFStore buffer instead buf = path_or_buf else: raise e - else: # path_or_buf was a string and we opened the HDFStore - we_opened = True + else: + if os.path.exists(path_or_buf) and not overwrite: + buf.close() + raise FileExistsError( + "The specified HDF file already exists. If you still want " + "to overwrite it, set option overwrite=True" + ) + buf_opened = True if not buf.is_open: buf.open() - we_opened = True + buf_opened = True scalars = {} for key, value in elements.items(): @@ -253,25 +262,17 @@ def to_hdf_util(path_or_buf, path, elements, complevel=9, complib="blosc"): pd.DataFrame(value).to_hdf(buf, os.path.join(path, key)) else: pd.DataFrame(value).to_hdf(buf, os.path.join(path, key)) - else: + else: # value is a TARDIS object like model, runner or plasma try: - value.to_hdf(buf, path, name=key) + value.to_hdf(buf, path, name=key, overwrite=overwrite) except AttributeError: data = pd.DataFrame([value]) data.to_hdf(buf, os.path.join(path, key)) if scalars: - scalars_series = pd.Series(scalars) + pd.Series(scalars).to_hdf(buf, os.path.join(path, "scalars")) - # Unfortunately, with to_hdf we cannot append, so merge beforehand - scalars_path = os.path.join(path, "scalars") - try: - scalars_series = buf[scalars_path].append(scalars_series) - except KeyError: # no scalars in HDFStore - pass - scalars_series.to_hdf(buf, os.path.join(path, "scalars")) - - if we_opened: + if buf_opened: buf.close() def get_properties(self): @@ -291,7 +292,7 @@ def convert_to_snake_case(s): s1 = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", s) return re.sub("([a-z0-9])([A-Z])", r"\1_\2", s1).lower() - def to_hdf(self, file_path, path="", name=None): + def to_hdf(self, file_path, path="", name=None, overwrite=False): """ Parameters ---------- @@ -313,7 +314,7 @@ def to_hdf(self, file_path, path="", name=None): data = self.get_properties() buff_path = os.path.join(path, name) - self.to_hdf_util(file_path, buff_path, data) + self.to_hdf_util(file_path, buff_path, data, overwrite) class PlasmaWriterMixin(HDFWriterMixin): @@ -338,7 +339,9 @@ def get_properties(self): data.pop("nlte_data") return data - def to_hdf(self, file_path, path="", name=None, collection=None): + def to_hdf( + self, file_path, path="", name=None, collection=None, overwrite=False + ): """ Parameters ---------- @@ -361,7 +364,7 @@ def to_hdf(self, file_path, path="", name=None, collection=None): ------- """ self.collection = collection - super(PlasmaWriterMixin, self).to_hdf(file_path, path, name) + super(PlasmaWriterMixin, self).to_hdf(file_path, path, name, overwrite) def download_from_url(url, dst): From a7f605015e184ed5b94dcb2f8fb3191986685bc3 Mon Sep 17 00:00:00 2001 From: Jaladh Singhal Date: Fri, 4 Dec 2020 16:56:42 +0530 Subject: [PATCH 2/6] Added overwrite option in docstrings and fixed them --- tardis/io/util.py | 69 +++++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 32 deletions(-) diff --git a/tardis/io/util.py b/tardis/io/util.py index 13903337675..0ba2ce8f74b 100644 --- a/tardis/io/util.py +++ b/tardis/io/util.py @@ -199,9 +199,7 @@ def __new__(cls, *args, **kwargs): def to_hdf_util( path_or_buf, path, elements, overwrite, complevel=9, complib="blosc" ): - """ - A function to uniformly store TARDIS data - to an HDF file. + """A function to uniformly store TARDIS data to an HDF file. Scalars will be stored in a Series under path/scalars 1D arrays will be stored under path/property_name as distinct Series @@ -211,16 +209,20 @@ def to_hdf_util( Parameters ---------- - path_or_buf : - Path or buffer to the HDF store + path_or_buf : str or pandas.io.pytables.HDFStore + Path or buffer to the HDF file path : str - Path inside the HDF store to store the `elements` + Path inside the HDF file to store the `elements` elements : dict A dict of property names and their values to be stored. + overwrite: bool + If the HDF file path already exists, whether overwrite it or not - Returns - ------- + Notes + ----- + `overwrite` option doesn't have any effect when `path_or_buf` is an + HDFStore because it depends on user that in which they've opened it. """ buf_opened = False @@ -232,7 +234,7 @@ def to_hdf_util( buf = path_or_buf else: raise e - else: + else: # path_or_buf was a str if os.path.exists(path_or_buf) and not overwrite: buf.close() raise FileExistsError( @@ -292,19 +294,18 @@ def convert_to_snake_case(s): s1 = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", s) return re.sub("([a-z0-9])([A-Z])", r"\1_\2", s1).lower() - def to_hdf(self, file_path, path="", name=None, overwrite=False): + def to_hdf(self, file_path_or_buf, path="", name=None, overwrite=False): """ Parameters ---------- - file_path : str - Path or buffer to the HDF store + file_path_or_buf : str or pandas.io.pytables.HDFStore + Path or buffer to the HDF file path : str - Path inside the HDF store to store the `elements` + Path inside the HDF file to store the `elements` name : str - Group inside the HDF store to which the `elements` need to be saved - - Returns - ------- + Group inside the HDF file to which the `elements` need to be saved + overwrite: bool + If the HDF file path already exists, whether overwrite it or not """ if name is None: try: @@ -314,7 +315,7 @@ def to_hdf(self, file_path, path="", name=None, overwrite=False): data = self.get_properties() buff_path = os.path.join(path, name) - self.to_hdf_util(file_path, buff_path, data, overwrite) + self.to_hdf_util(file_path_or_buf, buff_path, data, overwrite) class PlasmaWriterMixin(HDFWriterMixin): @@ -340,31 +341,35 @@ def get_properties(self): return data def to_hdf( - self, file_path, path="", name=None, collection=None, overwrite=False + self, + file_path_or_buf, + path="", + name=None, + collection=None, + overwrite=False, ): """ Parameters ---------- - file_path : str - Path or buffer to the HDF store + file_path_or_buf : str or pandas.io.pytables.HDFStore + Path or buffer to the HDF file path : str - Path inside the HDF store to store the `elements` + Path inside the HDF file to store the `elements` name : str - Group inside the HDF store to which the `elements` need to be saved + Group inside the HDF file to which the `elements` need to be saved collection : `None` or a `PlasmaPropertyCollection` of which members are the property types which will be stored. If `None` then - all types of properties will be stored. - - This acts like a filter, for example if a value of - `property_collections.basic_inputs` is given, only - those input parameters will be stored to the HDF store. - - Returns - ------- + all types of properties will be stored. This acts like a filter, + for example if a value of `property_collections.basic_inputs` is + given, only those input parameters will be stored to the HDF file. + overwrite: bool + If the HDF file path already exists, whether overwrite it or not """ self.collection = collection - super(PlasmaWriterMixin, self).to_hdf(file_path, path, name, overwrite) + super(PlasmaWriterMixin, self).to_hdf( + file_path_or_buf, path, name, overwrite + ) def download_from_url(url, dst): From aa37e1ecf1de324bde525228dc013d326ec8b318 Mon Sep 17 00:00:00 2001 From: Jaladh Singhal Date: Fri, 4 Dec 2020 18:35:27 +0530 Subject: [PATCH 3/6] Added overwrite=True option in tests using hdfs --- tardis/io/tests/test_HDFWriter.py | 14 +++++++------- tardis/io/tests/test_config_reader.py | 2 +- tardis/model/tests/test_base.py | 2 +- tardis/model/tests/test_density.py | 2 +- tardis/montecarlo/tests/test_base.py | 4 +++- tardis/montecarlo/tests/test_spectrum.py | 2 +- tardis/plasma/tests/test_hdf_plasma.py | 3 ++- tardis/widgets/tests/test_shell_info.py | 2 +- 8 files changed, 17 insertions(+), 14 deletions(-) diff --git a/tardis/io/tests/test_HDFWriter.py b/tardis/io/tests/test_HDFWriter.py index eafd59228de..c46448d185c 100644 --- a/tardis/io/tests/test_HDFWriter.py +++ b/tardis/io/tests/test_HDFWriter.py @@ -37,7 +37,7 @@ def __init__(self, property): def test_simple_write(tmpdir, attr): fname = str(tmpdir.mkdir("data").join("test.hdf")) actual = MockHDF(attr) - actual.to_hdf(fname, path="test") + actual.to_hdf(fname, path="test", overwrite=True) expected = pd.read_hdf(fname, key="/test/mock_hdf/scalars")["property"] assert actual.property == expected @@ -59,7 +59,7 @@ def test_simple_write(tmpdir, attr): def test_complex_obj_write(tmpdir, attr): fname = str(tmpdir.mkdir("data").join("test.hdf")) actual = MockHDF(attr) - actual.to_hdf(fname, path="test") + actual.to_hdf(fname, path="test", overwrite=True) expected = pd.read_hdf(fname, key="/test/mock_hdf/property").values assert_array_almost_equal(actual.property, expected) @@ -76,7 +76,7 @@ def test_complex_obj_write(tmpdir, attr): def test_MultiIndex_write(tmpdir): fname = str(tmpdir.mkdir("data").join("test.hdf")) actual = MockHDF(mock_multiIndex) - actual.to_hdf(fname, path="test") + actual.to_hdf(fname, path="test", overwrite=True) expected = pd.read_hdf(fname, key="/test/mock_hdf/property") expected = pd.MultiIndex.from_tuples(expected.unstack().values) pdt.assert_almost_equal(actual.property, expected) @@ -92,7 +92,7 @@ def test_quantity_objects_write(tmpdir, attr): fname = str(tmpdir.mkdir("data").join("test.hdf")) attr_quantity = u.Quantity(attr, "g/cm**3") actual = MockHDF(attr_quantity) - actual.to_hdf(fname, path="test") + actual.to_hdf(fname, path="test", overwrite=True) expected = pd.read_hdf(fname, key="/test/mock_hdf/property") assert_array_almost_equal(actual.property.cgs.value, expected) @@ -105,7 +105,7 @@ def test_scalar_quantity_objects_write(tmpdir, attr): fname = str(tmpdir.mkdir("data").join("test.hdf")) attr_quantity = u.Quantity(attr, "g/cm**3") actual = MockHDF(attr_quantity) - actual.to_hdf(fname, path="test") + actual.to_hdf(fname, path="test", overwrite=True) expected = pd.read_hdf(fname, key="/test/mock_hdf/scalars/")["property"] assert_array_almost_equal(actual.property.cgs.value, expected) @@ -113,7 +113,7 @@ def test_scalar_quantity_objects_write(tmpdir, attr): def test_none_write(tmpdir): fname = str(tmpdir.mkdir("data").join("test.hdf")) actual = MockHDF(None) - actual.to_hdf(fname, path="test") + actual.to_hdf(fname, path="test", overwrite=True) expected = pd.read_hdf(fname, key="/test/mock_hdf/scalars/")["property"] if expected == "none": expected = None @@ -138,7 +138,7 @@ def test_objects_write(tmpdir, attr): nested_object = MockHDF(np.array([4.0e14, 2, 2e14, 27.5])) attr_quantity = u.Quantity(attr, "g/cm**3") actual = MockClass(attr_quantity, nested_object) - actual.to_hdf(fname, path="test") + actual.to_hdf(fname, path="test", overwrite=True) expected_property = pd.read_hdf(fname, key="/test/mock_class/property") assert_array_almost_equal(actual.property.cgs.value, expected_property) nested_property = pd.read_hdf( diff --git a/tardis/io/tests/test_config_reader.py b/tardis/io/tests/test_config_reader.py index 2345329cd69..6b8b7dbf503 100644 --- a/tardis/io/tests/test_config_reader.py +++ b/tardis/io/tests/test_config_reader.py @@ -64,7 +64,7 @@ def test_config_hdf(hdf_file_path, tardis_config_verysimple): expected = Configuration.from_config_dict( tardis_config_verysimple, validate=True, config_dirname="test" ) - expected.to_hdf(hdf_file_path) + expected.to_hdf(hdf_file_path, overwrite=True) actual = pd.read_hdf(hdf_file_path, key="/simulation/config") expected = expected.get_properties()["config"] assert actual[0] == expected[0] diff --git a/tardis/model/tests/test_base.py b/tardis/model/tests/test_base.py index faf72e5dae2..1f44b007b5c 100644 --- a/tardis/model/tests/test_base.py +++ b/tardis/model/tests/test_base.py @@ -240,7 +240,7 @@ def test_model_decay(simple_isotope_abundance): @pytest.fixture(scope="module", autouse=True) def to_hdf_buffer(hdf_file_path, simulation_verysimple): - simulation_verysimple.model.to_hdf(hdf_file_path) + simulation_verysimple.model.to_hdf(hdf_file_path, overwrite=True) model_scalar_attrs = ['t_inner'] diff --git a/tardis/model/tests/test_density.py b/tardis/model/tests/test_density.py index 0f176c12236..96153cbf69f 100644 --- a/tardis/model/tests/test_density.py +++ b/tardis/model/tests/test_density.py @@ -9,7 +9,7 @@ @pytest.fixture(scope="module", autouse=True) def to_hdf_buffer(hdf_file_path,simulation_verysimple): - simulation_verysimple.model.homologous_density.to_hdf(hdf_file_path) + simulation_verysimple.model.homologous_density.to_hdf(hdf_file_path, overwrite=True) def test_hdf_density_0(hdf_file_path, simulation_verysimple): actual = simulation_verysimple.model.homologous_density.density_0 diff --git a/tardis/montecarlo/tests/test_base.py b/tardis/montecarlo/tests/test_base.py index 283eb45bb6c..7dda40ef237 100644 --- a/tardis/montecarlo/tests/test_base.py +++ b/tardis/montecarlo/tests/test_base.py @@ -12,7 +12,9 @@ @pytest.fixture(scope="module", autouse=True) def to_hdf_buffer(hdf_file_path, simulation_verysimple): - simulation_verysimple.runner.to_hdf(hdf_file_path, name="runner") + simulation_verysimple.runner.to_hdf( + hdf_file_path, name="runner", overwrite=True + ) runner_properties = [ diff --git a/tardis/montecarlo/tests/test_spectrum.py b/tardis/montecarlo/tests/test_spectrum.py index a16ea419666..ff37fb4d572 100644 --- a/tardis/montecarlo/tests/test_spectrum.py +++ b/tardis/montecarlo/tests/test_spectrum.py @@ -153,7 +153,7 @@ def compare_spectra(actual, desired): @pytest.fixture(autouse=True) def to_hdf_buffer(hdf_file_path, spectrum): - spectrum.to_hdf(hdf_file_path, name="spectrum") + spectrum.to_hdf(hdf_file_path, name="spectrum", overwrite=True) @pytest.mark.parametrize("attr", TARDISSpectrum.hdf_properties) diff --git a/tardis/plasma/tests/test_hdf_plasma.py b/tardis/plasma/tests/test_hdf_plasma.py index 6d97ef659cb..028ab4f40d1 100644 --- a/tardis/plasma/tests/test_hdf_plasma.py +++ b/tardis/plasma/tests/test_hdf_plasma.py @@ -12,7 +12,7 @@ @pytest.fixture(scope="module", autouse=True) def to_hdf_buffer(hdf_file_path, simulation_verysimple): - simulation_verysimple.plasma.to_hdf(hdf_file_path) + simulation_verysimple.plasma.to_hdf(hdf_file_path, overwrite=True) plasma_properties_list = [ @@ -104,6 +104,7 @@ def to_hdf_collection_buffer(hdf_file_path, simulation_verysimple): hdf_file_path, name="collection", collection=property_collections.basic_inputs, + overwrite=True, ) diff --git a/tardis/widgets/tests/test_shell_info.py b/tardis/widgets/tests/test_shell_info.py index e739d693c97..b34cd5e53b2 100644 --- a/tardis/widgets/tests/test_shell_info.py +++ b/tardis/widgets/tests/test_shell_info.py @@ -29,7 +29,7 @@ def simulation_shell_info(simulation_verysimple): @pytest.fixture(scope="class") def hdf_shell_info(hdf_file_path, simulation_verysimple): - simulation_verysimple.to_hdf(hdf_file_path) # save sim at hdf_file_path + simulation_verysimple.to_hdf(hdf_file_path, overwrite=True) # save sim at hdf_file_path return HDFShellInfo(hdf_file_path) From 080552bce1d704276f848a51e475cef74db379b6 Mon Sep 17 00:00:00 2001 From: Jaladh Singhal Date: Fri, 4 Dec 2020 20:37:02 +0530 Subject: [PATCH 4/6] Fixed a typo in docstring --- tardis/io/util.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tardis/io/util.py b/tardis/io/util.py index 0ba2ce8f74b..a0c8b5eb6a7 100644 --- a/tardis/io/util.py +++ b/tardis/io/util.py @@ -222,7 +222,8 @@ def to_hdf_util( Notes ----- `overwrite` option doesn't have any effect when `path_or_buf` is an - HDFStore because it depends on user that in which they've opened it. + HDFStore because it depends on user that in which mode they've opened + it. """ buf_opened = False From 722392fd20c2819340ec7a2a2d351a022e852d42 Mon Sep 17 00:00:00 2001 From: Jaladh Singhal Date: Fri, 4 Dec 2020 23:34:26 +0530 Subject: [PATCH 5/6] Improved docstrings --- tardis/io/util.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tardis/io/util.py b/tardis/io/util.py index a0c8b5eb6a7..46f4bcfdad3 100644 --- a/tardis/io/util.py +++ b/tardis/io/util.py @@ -217,13 +217,13 @@ def to_hdf_util( A dict of property names and their values to be stored. overwrite: bool - If the HDF file path already exists, whether overwrite it or not + If the HDF file path already exists, whether to overwrite it or not Notes ----- `overwrite` option doesn't have any effect when `path_or_buf` is an - HDFStore because it depends on user that in which mode they've opened - it. + HDFStore because the user decides on the mode in which they have + opened the HDFStore ('r', 'w' or 'a'). """ buf_opened = False @@ -306,7 +306,7 @@ def to_hdf(self, file_path_or_buf, path="", name=None, overwrite=False): name : str Group inside the HDF file to which the `elements` need to be saved overwrite: bool - If the HDF file path already exists, whether overwrite it or not + If the HDF file path already exists, whether to overwrite it or not """ if name is None: try: @@ -365,7 +365,7 @@ def to_hdf( for example if a value of `property_collections.basic_inputs` is given, only those input parameters will be stored to the HDF file. overwrite: bool - If the HDF file path already exists, whether overwrite it or not + If the HDF file path already exists, whether to overwrite it or not """ self.collection = collection super(PlasmaWriterMixin, self).to_hdf( From 077cfe718d8dfecb2919b0aec0702c750af573ad Mon Sep 17 00:00:00 2001 From: Jaladh Singhal Date: Fri, 4 Dec 2020 23:37:07 +0530 Subject: [PATCH 6/6] Removed buf_opened flag since buf.is_open has that info --- tardis/io/util.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tardis/io/util.py b/tardis/io/util.py index 46f4bcfdad3..e0c60e7d369 100644 --- a/tardis/io/util.py +++ b/tardis/io/util.py @@ -225,8 +225,6 @@ def to_hdf_util( HDFStore because the user decides on the mode in which they have opened the HDFStore ('r', 'w' or 'a'). """ - buf_opened = False - try: # when path_or_buf is a str, the HDFStore should get created buf = pd.HDFStore(path_or_buf, complevel=complevel, complib=complib) except TypeError as e: @@ -242,11 +240,9 @@ def to_hdf_util( "The specified HDF file already exists. If you still want " "to overwrite it, set option overwrite=True" ) - buf_opened = True if not buf.is_open: buf.open() - buf_opened = True scalars = {} for key, value in elements.items(): @@ -275,7 +271,7 @@ def to_hdf_util( if scalars: pd.Series(scalars).to_hdf(buf, os.path.join(path, "scalars")) - if buf_opened: + if buf.is_open: buf.close() def get_properties(self):