Merge pull request #2797 from esdc-esac-esa-int/ESA_ehst-download_imp…

…rovements Esa ehst download improvements
astropy · Aug 8, 2023 · 3f16e62 · 3f16e62
2 parents cef75bc + f1a833d
commit 3f16e62
Show file tree

Hide file tree

Showing 5 changed files with 361 additions and 132 deletions.
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -31,6 +31,8 @@ esa.hubble
 
 - Update to TAP url to query data and download files, aligned with the new eHST Science Archive. [#2567][#2597]
 - Status and maintenance messages from eHST TAP when the module is instantiated. get_status_messages method to retrieve them. [#2597]
+- New methods to download single files ``download_file`` and download FITS associated to an observation ``download_fits_files``. [#2797]
+- New function to retrieve all the files associated to an observation. [#2797]
 
 solarsystem.neodys
 ^^^^^^^^^^^^^^^^^^
@@ -64,6 +66,7 @@ esa.hubble
   a lot faster. [#2524]
 - Method query_hst_tap has been deprecated and is replaced with query_tap, with the same arguments. [#2597]
 - Product types in download_product method have been modified to: PRODUCT, SCIENCE_PRODUCT or POSTCARD. [#2597]
+- Added ``proposal`` keyword argument to several methods now allows to filter by Proposal ID. [#2797]
 
 alma
 ^^^^

diff --git a/astroquery/esa/hubble/core.py b/astroquery/esa/hubble/core.py
@@ -8,6 +8,7 @@
 European Space Agency (ESA)
 
 """
+import os
 from urllib.parse import urlencode
 
 from astropy import units
@@ -27,14 +28,29 @@
 __all__ = ['ESAHubble', 'ESAHubbleClass']
 
 
+def _check_rename_to_gz(filename):
+    rename = False
+    if os.path.exists(filename):
+        with open(filename, 'rb') as test_f:
+            if test_f.read(2) == b'\x1f\x8b' and not filename.endswith('.fits.gz'):
+                rename = True
+
+    if rename:
+        output = os.path.splitext(filename)[0] + '.fits.gz'
+        os.rename(filename, output)
+        return output
+    else:
+        return filename
+
+
 class ESAHubbleClass(BaseQuery):
     """
     Class to init ESA Hubble Module and communicate with eHST TAP
     """
     TIMEOUT = conf.TIMEOUT
     calibration_levels = {"AUXILIARY": 0, "RAW": 1, "CALIBRATED": 2,
                           "PRODUCT": 3}
-    product_types = ["SCIENCE", "PREVIEW", "THUMBNAIL" or "AUXILIARY"]
+    product_types = ["SCIENCE", "PREVIEW", "THUMBNAIL", "AUXILIARY"]
     copying_string = "Copying file to {0}..."
 
     def __init__(self, *, tap_handler=None, show_messages=True):
@@ -93,7 +109,7 @@ def download_product(self, observation_id, *, calibration_level=None,
                   "RETRIEVAL_TYPE": "OBSERVATION"}
 
         if filename is None:
-            filename = observation_id + ".tar"
+            filename = observation_id
 
         if calibration_level:
             params["CALIBRATIONLEVEL"] = calibration_level
@@ -107,7 +123,7 @@ def download_product(self, observation_id, *, calibration_level=None,
         filename = self._get_product_filename(product_type, filename)
         self._tap.load_data(params_dict=params, output_file=filename, verbose=verbose)
 
-        return filename
+        return _check_rename_to_gz(filename=filename)
 
     def __set_product_type(self, product_type):
         if product_type:
@@ -216,18 +232,12 @@ def __validate_product_type(self, product_type):
             raise ValueError("This product_type is not allowed")
 
     def _get_product_filename(self, product_type, filename):
-        if (product_type == "PRODUCT"):
-            return filename
-        elif (product_type == "SCIENCE"):
-            log.info("This is a SCIENCE_PRODUCT, the filename will be "
-                     f"renamed to {filename}.fits.gz")
-            return f"{filename}.fits.gz"
-        elif (product_type == "THUMBNAIL" or product_type == "PREVIEW"):
-            log.info("This is a POSTCARD, the filename will be "
+        if (product_type == "THUMBNAIL" or product_type == "PREVIEW"):
+            log.info("This is an image, the filename will be "
                      f"renamed to {filename}.jpg")
             return f"{filename}.jpg"
-
-        return filename
+        else:
+            return f"{filename}.zip"
 
     def get_artifact(self, artifact_id, *, filename=None, verbose=False):
         """
@@ -236,7 +246,7 @@ def get_artifact(self, artifact_id, *, filename=None, verbose=False):
         Parameters
         ----------
         artifact_id : string
-            id of the artifact to be downloaded, mandatory
+            filename to be downloaded, mandatory
             The identifier of the physical product (file) we want to retrieve.
         filename : string
             file name to be used to store the artifact, optional, default None
@@ -250,13 +260,83 @@ def get_artifact(self, artifact_id, *, filename=None, verbose=False):
         None. It downloads the artifact indicated
         """
 
-        params = {"RETRIEVAL_TYPE": "PRODUCT", "ARTIFACTID": artifact_id, "TAPCLIENT": "ASTROQUERY"}
+        return self.download_file(file=artifact_id, filename=filename, verbose=verbose)
+
+    def get_associated_files(self, observation_id, *, verbose=False):
+        """
+        Retrieves all the files associated to an observation
+
+        Parameters
+        ----------
+        observation_id : string
+            id of the observation to be downloaded, mandatory
+            The identifier of the observation we want to retrieve, regardless
+            of whether it is simple or composite.
+        verbose : bool
+            optional, default 'False'
+            flag to display information about the process
+
+        Returns
+        -------
+        None. The file is associated
+        """
+        query = (f"select art.artifact_id as filename, p.calibration_level, art.archive_class as type, "
+                 f"pg_size_pretty(art.size_uncompr) as size_uncompressed from ehst.artifact art "
+                 f"join ehst.plane p on p.plane_id = art.plane_id where "
+                 f"art.observation_id = '{observation_id}'")
+        return self.query_tap(query=query)
+
+    def download_fits_files(self, observation_id, *, verbose=False):
+        """
+        Retrieves all the FITS files associated to an observation
+
+        Parameters
+        ----------
+        observation_id : string
+            id of the observation to be downloaded, mandatory
+            The identifier of the observation we want to retrieve, regardless
+            of whether it is simple or composite.
+        verbose : bool
+            optional, default 'False'
+            flag to display information about the process
+
+        Returns
+        -------
+        None. The file is associated
+        """
+        results = self.get_associated_files(observation_id=observation_id, verbose=verbose)
+        for file in [i['filename'] for i in results if i['filename'].endswith('.fits')]:
+            if verbose:
+                print(f"Downloading {file} ...")
+            self.download_file(file=file, filename=file, verbose=verbose)
+
+    def download_file(self, file, *, filename=None, verbose=False):
+        """
+        Download a file from eHST based on its filename.
+
+        Parameters
+        ----------
+        file : string
+            file name of the artifact to be downloaded
+
+        filename : string
+            file name to be used to store the file, optional, default None
+        verbose : bool
+            optional, default 'False'
+            flag to display information about the process
+
+        Returns
+        -------
+        None. The file is associated
+        """
+
+        params = {"RETRIEVAL_TYPE": "PRODUCT", "ARTIFACTID": file, "TAPCLIENT": "ASTROQUERY"}
         if filename is None:
-            filename = artifact_id
+            filename = file
 
         self._tap.load_data(params_dict=params, output_file=filename, verbose=verbose)
 
-        return filename
+        return _check_rename_to_gz(filename=filename)
 
     def get_postcard(self, observation_id, *, calibration_level="RAW",
                      resolution=256, filename=None, verbose=False):
@@ -391,6 +471,7 @@ def cone_search_criteria(self, radius, *, target=None,
                              obs_collection=None,
                              instrument_name=None,
                              filters=None,
+                             proposal=None,
                              async_job=True,
                              filename=None,
                              output_format='votable',
@@ -428,6 +509,8 @@ def cone_search_criteria(self, radius, *, target=None,
             Name(s) of the instrument(s) used to generate the dataset
         filters : list of str, optional
             Name(s) of the filter(s) used to generate the dataset
+        proposal : int, optional
+            Proposal ID associated to the observations
         async_job : bool, optional, default 'False'
             executes the query (job) in asynchronous/synchronous mode (default
             synchronous)
@@ -460,6 +543,7 @@ def cone_search_criteria(self, radius, *, target=None,
                                          obs_collection=obs_collection,
                                          instrument_name=instrument_name,
                                          filters=filters,
+                                         proposal=proposal,
                                          async_job=True,
                                          get_query=True)
         if crit_query.endswith(")"):
@@ -619,7 +703,7 @@ def query_hst_tap(self, query, *, async_job=False, output_file=None,
     def query_criteria(self, *, calibration_level=None,
                        data_product_type=None, intent=None,
                        obs_collection=None, instrument_name=None,
-                       filters=None, async_job=True, output_file=None,
+                       filters=None, proposal=None, async_job=True, output_file=None,
                        output_format="votable", verbose=False,
                        get_query=False):
         """
@@ -639,13 +723,15 @@ def query_criteria(self, *, calibration_level=None,
         intent : str, optional
             The intent of the original observer in acquiring this observation.
             SCIENCE or CALIBRATION
-        collection : list of str, optional
+        obs_collection : list of str, optional
             List of collections that are available in eHST catalogue.
-            HLA, HST
+            HLA, HST, HAP
         instrument_name : list of str, optional
             Name(s) of the instrument(s) used to generate the dataset
         filters : list of str, optional
             Name(s) of the filter(s) used to generate the dataset
+        proposal : int, optional
+            Proposal ID associated to the observations
         async_job : bool, optional, default 'True'
             executes the query (job) in asynchronous/synchronous mode (default
             synchronous)
@@ -680,6 +766,11 @@ def query_criteria(self, *, calibration_level=None,
                 parameters.append("intent LIKE '%{}%'".format(intent.lower()))
             else:
                 raise ValueError("intent must be a string")
+        if proposal is not None:
+            if isinstance(proposal, int):
+                parameters.append("proposal_id = '{}'".format(proposal))
+            else:
+                raise ValueError("Proposal ID must be an integer")
         if self.__check_list_strings(obs_collection):
             parameters.append("(collection LIKE '%{}%')".format(
                 "%' OR collection LIKE '%".join(obs_collection)
@@ -767,7 +858,7 @@ def get_status_messages(self):
             if response.status == 200:
                 for line in response:
                     string_message = line.decode("utf-8")
-                    print(string_message[string_message.index('=')+1:])
+                    print(string_message[string_message.index('=') + 1:])
         except OSError:
             print("Status messages could not be retrieved")
 
@@ -810,10 +901,8 @@ def get_columns(self, table_name, *, only_names=True, verbose=False):
             return columns
 
     def _getCoordInput(self, value):
-        if not (isinstance(value, str)
-                or isinstance(value, SkyCoord)):
-            raise ValueError("Coordinates"
-                             + " must be either a string or astropy.coordinates")
+        if not (isinstance(value, str) or isinstance(value, SkyCoord)):
+            raise ValueError("Coordinates must be either a string or astropy.coordinates")
         if isinstance(value, str):
             return SkyCoord(value)
         else:

diff --git a/astroquery/esa/hubble/tests/test_esa_hubble.py b/astroquery/esa/hubble/tests/test_esa_hubble.py
@@ -11,6 +11,7 @@
 
 import os
 import shutil
+import gzip
 from pathlib import Path
 from unittest.mock import MagicMock
 from unittest.mock import patch
@@ -22,6 +23,7 @@
 from requests.models import Response
 
 from astroquery.esa.hubble import ESAHubbleClass
+from astroquery.esa.hubble.core import _check_rename_to_gz
 from astroquery.esa.hubble.tests.dummy_tap_handler import DummyHubbleTapHandler
 from astropy.utils.exceptions import AstropyDeprecationWarning
 
@@ -76,8 +78,10 @@ def pformat():
 
 class TestESAHubble:
 
-    def get_dummy_tap_handler(self):
-        parameterst = {'query': "select top 10 * from hsc_v2.hubble_sc2",
+    def get_dummy_tap_handler(self, query=None):
+        if query is None:
+            query = "select top 10 * from hsc_v2.hubble_sc2"
+        parameterst = {'query': query,
                        'output_file': "test2.vot",
                        'output_format': "votable",
                        'verbose': False}
@@ -228,6 +232,58 @@ def test_get_artifact(self, tmp_path):
         path = Path(tmp_path, "w0ji0v01t_c2f.fits.gz")
         ehst.get_artifact(artifact_id=path)
 
+    def test_download_file(self, tmp_path):
+        ehst = ESAHubbleClass(tap_handler=self.get_dummy_tap_handler(), show_messages=False)
+        file = 'w0ji0v01t_c2f.fits'
+        path = Path(tmp_path, file + '.gz')
+        ehst.download_file(file=path, filename=path)
+
+    def test_get_associated_files(self):
+        observation_id = 'test'
+        query = (f"select art.artifact_id as filename, p.calibration_level, art.archive_class as type, "
+                 f"pg_size_pretty(art.size_uncompr) as size_uncompressed from ehst.artifact art "
+                 f"join ehst.plane p on p.plane_id = art.plane_id where "
+                 f"art.observation_id = '{observation_id}'")
+        parameters = {'query': query,
+                      'output_file': 'test2.vot',
+                      'output_format': "votable",
+                      'verbose': False}
+        ehst = ESAHubbleClass(tap_handler=self.get_dummy_tap_handler(query=query), show_messages=False)
+        ehst.get_associated_files(observation_id=observation_id)
+        self.get_dummy_tap_handler(query=query).check_call("launch_job", parameters)
+
+    @patch.object(ESAHubbleClass, 'get_associated_files')
+    def test_download_fits(self, mock_associated_files):
+        observation_id = 'test'
+        query = (f"select art.artifact_id as filename, p.calibration_level, art.archive_class as type, "
+                 f"pg_size_pretty(art.size_uncompr) as size_uncompressed from ehst.artifact art "
+                 f"join ehst.plane p on p.plane_id = art.plane_id where "
+                 f"art.observation_id = '{observation_id}'")
+        parameters = {'query': query,
+                      'output_file': 'test2.vot',
+                      'output_format': "votable",
+                      'verbose': False}
+        mock_associated_files.return_value = [{'filename': 'test.fits'}]
+        ehst = ESAHubbleClass(tap_handler=self.get_dummy_tap_handler(query=query), show_messages=False)
+        ehst.download_fits_files(observation_id=observation_id)
+        self.get_dummy_tap_handler(query=query).check_call("launch_job", parameters)
+
+    def test_is_not_gz(self, tmp_path):
+        target_file = data_path('cone_search.vot')
+        ESAHubbleClass(tap_handler=self.get_dummy_tap_handler(), show_messages=False)
+        assert _check_rename_to_gz(target_file) == target_file
+
+    def test_is_gz(self, tmp_path):
+        ESAHubbleClass(tap_handler=self.get_dummy_tap_handler(), show_messages=False)
+        # test_file = data_path('m31.vot.test')
+        temp_file = 'testgz'
+        target_file = os.path.join(tmp_path, temp_file)
+        with gzip.open(target_file, 'wb') as f:
+            f.write(b'')
+        # with open(test_file, 'rb') as f_in, gzip.open(target_file, 'wb') as f_out:
+        #     f_out.writelines(f_in)
+        assert _check_rename_to_gz(target_file) == target_file + '.fits.gz'
+
     def test_get_columns(self):
         parameters = {'table_name': "table",
                       'only_names': True,
@@ -238,6 +294,32 @@ def test_get_columns(self):
         ehst.get_columns(table_name="table", only_names=True, verbose=True)
         dummyTapHandler.check_call("get_columns", parameters)
 
+    def test_query_criteria_proposal(self):
+        parameters1 = {'proposal': 12345,
+                       'async_job': False,
+                       'output_file': "output_test_query_by_criteria.vot.gz",
+                       'output_format': "votable",
+                       'verbose': True,
+                       'get_query': True}
+        ehst = ESAHubbleClass(tap_handler=self.get_dummy_tap_handler(), show_messages=False)
+        test_query = ehst.query_criteria(proposal=parameters1['proposal'],
+                                         async_job=parameters1['async_job'],
+                                         output_file=parameters1['output_file'],
+                                         output_format=parameters1['output_format'],
+                                         verbose=parameters1['verbose'],
+                                         get_query=parameters1['get_query'])
+        parameters2 = {'query': test_query,
+                       'output_file': "output_test_query_by_criteria.vot.gz",
+                       'output_format': "votable",
+                       'verbose': False}
+        parameters3 = {'query': "select * from ehst.archive where("
+                                "proposal_id = '12345')",
+                       'output_file': "output_test_query_by_criteria.vot.gz",
+                       'output_format': "votable",
+                       'verbose': False}
+        dummy_tap_handler = DummyHubbleTapHandler("launch_job", parameters2)
+        dummy_tap_handler.check_call("launch_job", parameters3)
+
     def test_query_criteria(self):
         parameters1 = {'calibration_level': "PRODUCT",
                        'data_product_type': "image",