Skip to content

Commit

Permalink
Merge pull request #2797 from esdc-esac-esa-int/ESA_ehst-download_imp…
Browse files Browse the repository at this point in the history
…rovements

Esa ehst download improvements
  • Loading branch information
bsipocz authored Aug 8, 2023
2 parents cef75bc + f1a833d commit 3f16e62
Show file tree
Hide file tree
Showing 5 changed files with 361 additions and 132 deletions.
3 changes: 3 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ esa.hubble

- Update to TAP url to query data and download files, aligned with the new eHST Science Archive. [#2567][#2597]
- Status and maintenance messages from eHST TAP when the module is instantiated. get_status_messages method to retrieve them. [#2597]
- New methods to download single files ``download_file`` and download FITS associated to an observation ``download_fits_files``. [#2797]
- New function to retrieve all the files associated to an observation. [#2797]

solarsystem.neodys
^^^^^^^^^^^^^^^^^^
Expand Down Expand Up @@ -64,6 +66,7 @@ esa.hubble
a lot faster. [#2524]
- Method query_hst_tap has been deprecated and is replaced with query_tap, with the same arguments. [#2597]
- Product types in download_product method have been modified to: PRODUCT, SCIENCE_PRODUCT or POSTCARD. [#2597]
- Added ``proposal`` keyword argument to several methods now allows to filter by Proposal ID. [#2797]

alma
^^^^
Expand Down
139 changes: 114 additions & 25 deletions astroquery/esa/hubble/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
European Space Agency (ESA)
"""
import os
from urllib.parse import urlencode

from astropy import units
Expand All @@ -27,14 +28,29 @@
__all__ = ['ESAHubble', 'ESAHubbleClass']


def _check_rename_to_gz(filename):
rename = False
if os.path.exists(filename):
with open(filename, 'rb') as test_f:
if test_f.read(2) == b'\x1f\x8b' and not filename.endswith('.fits.gz'):
rename = True

if rename:
output = os.path.splitext(filename)[0] + '.fits.gz'
os.rename(filename, output)
return output
else:
return filename


class ESAHubbleClass(BaseQuery):
"""
Class to init ESA Hubble Module and communicate with eHST TAP
"""
TIMEOUT = conf.TIMEOUT
calibration_levels = {"AUXILIARY": 0, "RAW": 1, "CALIBRATED": 2,
"PRODUCT": 3}
product_types = ["SCIENCE", "PREVIEW", "THUMBNAIL" or "AUXILIARY"]
product_types = ["SCIENCE", "PREVIEW", "THUMBNAIL", "AUXILIARY"]
copying_string = "Copying file to {0}..."

def __init__(self, *, tap_handler=None, show_messages=True):
Expand Down Expand Up @@ -93,7 +109,7 @@ def download_product(self, observation_id, *, calibration_level=None,
"RETRIEVAL_TYPE": "OBSERVATION"}

if filename is None:
filename = observation_id + ".tar"
filename = observation_id

if calibration_level:
params["CALIBRATIONLEVEL"] = calibration_level
Expand All @@ -107,7 +123,7 @@ def download_product(self, observation_id, *, calibration_level=None,
filename = self._get_product_filename(product_type, filename)
self._tap.load_data(params_dict=params, output_file=filename, verbose=verbose)

return filename
return _check_rename_to_gz(filename=filename)

def __set_product_type(self, product_type):
if product_type:
Expand Down Expand Up @@ -216,18 +232,12 @@ def __validate_product_type(self, product_type):
raise ValueError("This product_type is not allowed")

def _get_product_filename(self, product_type, filename):
if (product_type == "PRODUCT"):
return filename
elif (product_type == "SCIENCE"):
log.info("This is a SCIENCE_PRODUCT, the filename will be "
f"renamed to {filename}.fits.gz")
return f"{filename}.fits.gz"
elif (product_type == "THUMBNAIL" or product_type == "PREVIEW"):
log.info("This is a POSTCARD, the filename will be "
if (product_type == "THUMBNAIL" or product_type == "PREVIEW"):
log.info("This is an image, the filename will be "
f"renamed to {filename}.jpg")
return f"{filename}.jpg"

return filename
else:
return f"{filename}.zip"

def get_artifact(self, artifact_id, *, filename=None, verbose=False):
"""
Expand All @@ -236,7 +246,7 @@ def get_artifact(self, artifact_id, *, filename=None, verbose=False):
Parameters
----------
artifact_id : string
id of the artifact to be downloaded, mandatory
filename to be downloaded, mandatory
The identifier of the physical product (file) we want to retrieve.
filename : string
file name to be used to store the artifact, optional, default None
Expand All @@ -250,13 +260,83 @@ def get_artifact(self, artifact_id, *, filename=None, verbose=False):
None. It downloads the artifact indicated
"""

params = {"RETRIEVAL_TYPE": "PRODUCT", "ARTIFACTID": artifact_id, "TAPCLIENT": "ASTROQUERY"}
return self.download_file(file=artifact_id, filename=filename, verbose=verbose)

def get_associated_files(self, observation_id, *, verbose=False):
"""
Retrieves all the files associated to an observation
Parameters
----------
observation_id : string
id of the observation to be downloaded, mandatory
The identifier of the observation we want to retrieve, regardless
of whether it is simple or composite.
verbose : bool
optional, default 'False'
flag to display information about the process
Returns
-------
None. The file is associated
"""
query = (f"select art.artifact_id as filename, p.calibration_level, art.archive_class as type, "
f"pg_size_pretty(art.size_uncompr) as size_uncompressed from ehst.artifact art "
f"join ehst.plane p on p.plane_id = art.plane_id where "
f"art.observation_id = '{observation_id}'")
return self.query_tap(query=query)

def download_fits_files(self, observation_id, *, verbose=False):
"""
Retrieves all the FITS files associated to an observation
Parameters
----------
observation_id : string
id of the observation to be downloaded, mandatory
The identifier of the observation we want to retrieve, regardless
of whether it is simple or composite.
verbose : bool
optional, default 'False'
flag to display information about the process
Returns
-------
None. The file is associated
"""
results = self.get_associated_files(observation_id=observation_id, verbose=verbose)
for file in [i['filename'] for i in results if i['filename'].endswith('.fits')]:
if verbose:
print(f"Downloading {file} ...")
self.download_file(file=file, filename=file, verbose=verbose)

def download_file(self, file, *, filename=None, verbose=False):
"""
Download a file from eHST based on its filename.
Parameters
----------
file : string
file name of the artifact to be downloaded
filename : string
file name to be used to store the file, optional, default None
verbose : bool
optional, default 'False'
flag to display information about the process
Returns
-------
None. The file is associated
"""

params = {"RETRIEVAL_TYPE": "PRODUCT", "ARTIFACTID": file, "TAPCLIENT": "ASTROQUERY"}
if filename is None:
filename = artifact_id
filename = file

self._tap.load_data(params_dict=params, output_file=filename, verbose=verbose)

return filename
return _check_rename_to_gz(filename=filename)

def get_postcard(self, observation_id, *, calibration_level="RAW",
resolution=256, filename=None, verbose=False):
Expand Down Expand Up @@ -391,6 +471,7 @@ def cone_search_criteria(self, radius, *, target=None,
obs_collection=None,
instrument_name=None,
filters=None,
proposal=None,
async_job=True,
filename=None,
output_format='votable',
Expand Down Expand Up @@ -428,6 +509,8 @@ def cone_search_criteria(self, radius, *, target=None,
Name(s) of the instrument(s) used to generate the dataset
filters : list of str, optional
Name(s) of the filter(s) used to generate the dataset
proposal : int, optional
Proposal ID associated to the observations
async_job : bool, optional, default 'False'
executes the query (job) in asynchronous/synchronous mode (default
synchronous)
Expand Down Expand Up @@ -460,6 +543,7 @@ def cone_search_criteria(self, radius, *, target=None,
obs_collection=obs_collection,
instrument_name=instrument_name,
filters=filters,
proposal=proposal,
async_job=True,
get_query=True)
if crit_query.endswith(")"):
Expand Down Expand Up @@ -619,7 +703,7 @@ def query_hst_tap(self, query, *, async_job=False, output_file=None,
def query_criteria(self, *, calibration_level=None,
data_product_type=None, intent=None,
obs_collection=None, instrument_name=None,
filters=None, async_job=True, output_file=None,
filters=None, proposal=None, async_job=True, output_file=None,
output_format="votable", verbose=False,
get_query=False):
"""
Expand All @@ -639,13 +723,15 @@ def query_criteria(self, *, calibration_level=None,
intent : str, optional
The intent of the original observer in acquiring this observation.
SCIENCE or CALIBRATION
collection : list of str, optional
obs_collection : list of str, optional
List of collections that are available in eHST catalogue.
HLA, HST
HLA, HST, HAP
instrument_name : list of str, optional
Name(s) of the instrument(s) used to generate the dataset
filters : list of str, optional
Name(s) of the filter(s) used to generate the dataset
proposal : int, optional
Proposal ID associated to the observations
async_job : bool, optional, default 'True'
executes the query (job) in asynchronous/synchronous mode (default
synchronous)
Expand Down Expand Up @@ -680,6 +766,11 @@ def query_criteria(self, *, calibration_level=None,
parameters.append("intent LIKE '%{}%'".format(intent.lower()))
else:
raise ValueError("intent must be a string")
if proposal is not None:
if isinstance(proposal, int):
parameters.append("proposal_id = '{}'".format(proposal))
else:
raise ValueError("Proposal ID must be an integer")
if self.__check_list_strings(obs_collection):
parameters.append("(collection LIKE '%{}%')".format(
"%' OR collection LIKE '%".join(obs_collection)
Expand Down Expand Up @@ -767,7 +858,7 @@ def get_status_messages(self):
if response.status == 200:
for line in response:
string_message = line.decode("utf-8")
print(string_message[string_message.index('=')+1:])
print(string_message[string_message.index('=') + 1:])
except OSError:
print("Status messages could not be retrieved")

Expand Down Expand Up @@ -810,10 +901,8 @@ def get_columns(self, table_name, *, only_names=True, verbose=False):
return columns

def _getCoordInput(self, value):
if not (isinstance(value, str)
or isinstance(value, SkyCoord)):
raise ValueError("Coordinates"
+ " must be either a string or astropy.coordinates")
if not (isinstance(value, str) or isinstance(value, SkyCoord)):
raise ValueError("Coordinates must be either a string or astropy.coordinates")
if isinstance(value, str):
return SkyCoord(value)
else:
Expand Down
86 changes: 84 additions & 2 deletions astroquery/esa/hubble/tests/test_esa_hubble.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import os
import shutil
import gzip
from pathlib import Path
from unittest.mock import MagicMock
from unittest.mock import patch
Expand All @@ -22,6 +23,7 @@
from requests.models import Response

from astroquery.esa.hubble import ESAHubbleClass
from astroquery.esa.hubble.core import _check_rename_to_gz
from astroquery.esa.hubble.tests.dummy_tap_handler import DummyHubbleTapHandler
from astropy.utils.exceptions import AstropyDeprecationWarning

Expand Down Expand Up @@ -76,8 +78,10 @@ def pformat():

class TestESAHubble:

def get_dummy_tap_handler(self):
parameterst = {'query': "select top 10 * from hsc_v2.hubble_sc2",
def get_dummy_tap_handler(self, query=None):
if query is None:
query = "select top 10 * from hsc_v2.hubble_sc2"
parameterst = {'query': query,
'output_file': "test2.vot",
'output_format': "votable",
'verbose': False}
Expand Down Expand Up @@ -228,6 +232,58 @@ def test_get_artifact(self, tmp_path):
path = Path(tmp_path, "w0ji0v01t_c2f.fits.gz")
ehst.get_artifact(artifact_id=path)

def test_download_file(self, tmp_path):
ehst = ESAHubbleClass(tap_handler=self.get_dummy_tap_handler(), show_messages=False)
file = 'w0ji0v01t_c2f.fits'
path = Path(tmp_path, file + '.gz')
ehst.download_file(file=path, filename=path)

def test_get_associated_files(self):
observation_id = 'test'
query = (f"select art.artifact_id as filename, p.calibration_level, art.archive_class as type, "
f"pg_size_pretty(art.size_uncompr) as size_uncompressed from ehst.artifact art "
f"join ehst.plane p on p.plane_id = art.plane_id where "
f"art.observation_id = '{observation_id}'")
parameters = {'query': query,
'output_file': 'test2.vot',
'output_format': "votable",
'verbose': False}
ehst = ESAHubbleClass(tap_handler=self.get_dummy_tap_handler(query=query), show_messages=False)
ehst.get_associated_files(observation_id=observation_id)
self.get_dummy_tap_handler(query=query).check_call("launch_job", parameters)

@patch.object(ESAHubbleClass, 'get_associated_files')
def test_download_fits(self, mock_associated_files):
observation_id = 'test'
query = (f"select art.artifact_id as filename, p.calibration_level, art.archive_class as type, "
f"pg_size_pretty(art.size_uncompr) as size_uncompressed from ehst.artifact art "
f"join ehst.plane p on p.plane_id = art.plane_id where "
f"art.observation_id = '{observation_id}'")
parameters = {'query': query,
'output_file': 'test2.vot',
'output_format': "votable",
'verbose': False}
mock_associated_files.return_value = [{'filename': 'test.fits'}]
ehst = ESAHubbleClass(tap_handler=self.get_dummy_tap_handler(query=query), show_messages=False)
ehst.download_fits_files(observation_id=observation_id)
self.get_dummy_tap_handler(query=query).check_call("launch_job", parameters)

def test_is_not_gz(self, tmp_path):
target_file = data_path('cone_search.vot')
ESAHubbleClass(tap_handler=self.get_dummy_tap_handler(), show_messages=False)
assert _check_rename_to_gz(target_file) == target_file

def test_is_gz(self, tmp_path):
ESAHubbleClass(tap_handler=self.get_dummy_tap_handler(), show_messages=False)
# test_file = data_path('m31.vot.test')
temp_file = 'testgz'
target_file = os.path.join(tmp_path, temp_file)
with gzip.open(target_file, 'wb') as f:
f.write(b'')
# with open(test_file, 'rb') as f_in, gzip.open(target_file, 'wb') as f_out:
# f_out.writelines(f_in)
assert _check_rename_to_gz(target_file) == target_file + '.fits.gz'

def test_get_columns(self):
parameters = {'table_name': "table",
'only_names': True,
Expand All @@ -238,6 +294,32 @@ def test_get_columns(self):
ehst.get_columns(table_name="table", only_names=True, verbose=True)
dummyTapHandler.check_call("get_columns", parameters)

def test_query_criteria_proposal(self):
parameters1 = {'proposal': 12345,
'async_job': False,
'output_file': "output_test_query_by_criteria.vot.gz",
'output_format': "votable",
'verbose': True,
'get_query': True}
ehst = ESAHubbleClass(tap_handler=self.get_dummy_tap_handler(), show_messages=False)
test_query = ehst.query_criteria(proposal=parameters1['proposal'],
async_job=parameters1['async_job'],
output_file=parameters1['output_file'],
output_format=parameters1['output_format'],
verbose=parameters1['verbose'],
get_query=parameters1['get_query'])
parameters2 = {'query': test_query,
'output_file': "output_test_query_by_criteria.vot.gz",
'output_format': "votable",
'verbose': False}
parameters3 = {'query': "select * from ehst.archive where("
"proposal_id = '12345')",
'output_file': "output_test_query_by_criteria.vot.gz",
'output_format': "votable",
'verbose': False}
dummy_tap_handler = DummyHubbleTapHandler("launch_job", parameters2)
dummy_tap_handler.check_call("launch_job", parameters3)

def test_query_criteria(self):
parameters1 = {'calibration_level': "PRODUCT",
'data_product_type': "image",
Expand Down
Loading

0 comments on commit 3f16e62

Please sign in to comment.