Skip to content

Commit

Permalink
Merge pull request #599 from msdemlei/fix-bug-543
Browse files Browse the repository at this point in the history
Fix bug #543
  • Loading branch information
andamian authored and bsipocz committed Oct 14, 2024
1 parent 550f9b1 commit 6d29b46
Show file tree
Hide file tree
Showing 5 changed files with 286 additions and 51 deletions.
17 changes: 15 additions & 2 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@ Bug Fixes
- path separators are no longer taken over from image titles to file
system paths. [#557]

- Registry Spatial constraint now supports Astropy Quantities for the radius argument [#594]

- Added `'sia1'` as servicetype for registry searches. [#583]

- Adding ``session`` kwarg to allow to pass a session along when turning
Expand All @@ -24,6 +22,11 @@ Bug Fixes
- Tables returned by RegistryResource.get_tables() now have a utype
attribute [#576]

- Registry Spatial constraint now supports Astropy Quantities for the radius argument [#594]

- iter_metadata() no longer crashes on tables with a datalink RESOURCE
and without obscore attributes [#599]


Deprecations and Removals
-------------------------
Expand All @@ -32,6 +35,16 @@ Deprecations and Removals
``pyvo.test()`` functionality. [#606]


1.5.3 (unreleased)
==================

- Added `'sia1'` as servicetype for registry searches. [#583]

- Adding ``session`` kwarg to allow to pass a session along when turning
an Interface into a service via ``Interface.to_service``. [#590]
>>>>>>> 4e86bf3 (Merge pull request #599 from msdemlei/fix-bug-543)


1.5.2 (2024-05-22)
==================

Expand Down
160 changes: 113 additions & 47 deletions pyvo/dal/adhoc.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def _get_accessurl_from_params(params):

class AdhocServiceResultsMixin:
"""
Mixin for adhoc:service functionallity for results classes.
Mixin for adhoc:service functionality for results classes.
"""

def __init__(self, votable, url=None, session=None):
Expand Down Expand Up @@ -169,6 +169,113 @@ class DatalinkResultsMixin(AdhocServiceResultsMixin):
"""
Mixin for datalink functionallity for results classes.
"""
def _iter_datalinks_from_dlblock(self, datalink_service):
"""yields datalinks from the current rows using a datalink
service RESOURCE.
"""
remaining_ids = [] # remaining IDs to processed
current_batch = None # retrieved but not returned yet
current_ids = [] # retrieved but not returned
processed_ids = [] # retrived and returned IDs
batch_size = None # size of the batch

for row in self:
if not current_ids:
if batch_size is None:
# first call.
self.query = DatalinkQuery.from_resource(
[_ for _ in self],
self._datalink,
session=self._session,
original_row=row)
remaining_ids = self.query['ID']
if not remaining_ids:
# we are done
return
if batch_size:
# subsequent calls are limitted to batch size
self.query['ID'] = remaining_ids[:batch_size]
current_batch = self.query.execute(post=True)
current_ids = list(OrderedDict.fromkeys(
[_ for _ in current_batch.to_table()['ID']]))
if not current_ids:
raise DALServiceError(
'Could not retrieve datalinks for: {}'.format(
', '.join([_ for _ in remaining_ids])))
batch_size = len(current_ids)
id1 = current_ids.pop(0)
processed_ids.append(id1)
remaining_ids.remove(id1)
yield current_batch.clone_byid(
id1,
original_row=row)

@staticmethod
def _guess_access_format(row):
"""returns a guess for the format of what __guess_access_url will
return.
This tries a few heuristics based on how obscore or SIA records might
be marked up. If will return None if row does not look as if
it contained an access format. Note that the heuristics are
tried in sequence; for now, we do not define the sequence of
heuristics.
"""
if hasattr(row, "access_format"):
return row.access_format

if "access_format" in row:
return row["access_format"]

access_format = row.getbyutype("obscore:access.format"
) or row.getbyutype("ssa:Access.Format")
if access_format:
return access_format

access_format = row.getbyucd("meta.code.mime"
) or row.getbyucd("VOX:Image_Format")
if access_format:
return access_format

@staticmethod
def _guess_access_url(row):
"""returns a guess for a URI to a data product in row.
This tries a few heuristics based on how obscore or SIA records might
be marked up. If will return None if row does not look as if
it contained a product access url. Note that the heuristics are
tried in sequence; for now, we do not define the sequence of
heuristics.
"""
if hasattr(row, "access_url"):
return row.access_url

if "access_url" in row:
return row["access_url"]

access_url = row.getbyutype("obscore:access.reference"
) or row.getbyutype("ssa:Access.Reference")
if access_url:
return access_url

access_url = row.getbyucd("meta.ref.url"
) or row.getbyucd("VOX:Image_AccessReference")
if access_url:
return access_url

def _iter_datalinks_from_product_rows(self):
"""yield datalinks from self's rows if they describe datalink-valued
products.
"""
for row in self:
# TODO: we should be more careful about whitespace, case
# and perhaps more parameters in the following comparison
if self._guess_access_format(row) == DATALINK_MIME_TYPE:
access_url = self._guess_access_url(row)
if access_url is not None:
yield DatalinkResults.from_result_url(
access_url,
original_row=row)

def iter_datalinks(self):
"""
Expand All @@ -186,53 +293,12 @@ def iter_datalinks(self):
self._datalink = self.get_adhocservice_by_ivoid(DATALINK_IVOID)
except DALServiceError:
self._datalink = None
remaining_ids = [] # remaining IDs to processed
current_batch = None # retrieved but not returned yet
current_ids = [] # retrieved but not returned
processed_ids = [] # retrived and returned IDs
batch_size = None # size of the batch

for row in self:
if self._datalink:
if not current_ids:
if batch_size is None:
# first call.
self.query = DatalinkQuery.from_resource(
[_ for _ in self],
self._datalink,
session=self._session,
original_row=row)
remaining_ids = self.query['ID']
if not remaining_ids:
# we are done
return
if batch_size:
# subsequent calls are limitted to batch size
self.query['ID'] = remaining_ids[:batch_size]
current_batch = self.query.execute(post=True)
current_ids = list(OrderedDict.fromkeys(
[_ for _ in current_batch.to_table()['ID']]))
if not current_ids:
raise DALServiceError(
'Could not retrieve datalinks for: {}'.format(
', '.join([_ for _ in remaining_ids])))
batch_size = len(current_ids)
id1 = current_ids.pop(0)
processed_ids.append(id1)
remaining_ids.remove(id1)
yield current_batch.clone_byid(
id1,
original_row=row)
elif getattr(row, 'access_format', None) == DATALINK_MIME_TYPE:
yield DatalinkResults.from_result_url(
row.getdataurl(),
original_row=row)
else:
# Fall back to row-specific handling
try:
yield row.getdatalink()
except AttributeError:
yield None
if self._datalink is None:
yield from self._iter_datalinks_from_product_rows()

else:
yield from self._iter_datalinks_from_dlblock(self._datalink)


class DatalinkRecordMixin:
Expand Down
3 changes: 2 additions & 1 deletion pyvo/dal/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,10 +516,11 @@ def fieldname_with_utype(self, utype):
return the field name that has a given UType value or None if the UType
is not found.
"""
utype = utype.lower()
try:
iterchain = (
self.getdesc(fieldname) for fieldname in self.fieldnames)
iterchain = (field for field in iterchain if field.utype == utype)
iterchain = (field for field in iterchain if (field.utype or "").lower() == utype)
return next(iterchain).name
except StopIteration:
return None
Expand Down
111 changes: 110 additions & 1 deletion pyvo/dal/tests/test_datalink.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@

import pyvo as vo
from pyvo.dal.adhoc import DatalinkResults
from pyvo.utils import vocabularies
from pyvo.dal.sia2 import SIA2Results
from pyvo.dal.tap import TAPResults
from pyvo.utils import testing, vocabularies

from astropy.utils.data import get_pkg_data_contents, get_pkg_data_filename

Expand Down Expand Up @@ -39,6 +41,17 @@ def callback(request, context):
yield matcher


@pytest.fixture()
def datalink_product(mocker):
def callback(request, context):
return get_pkg_data_contents('data/datalink/datalink.xml')

with mocker.register_uri(
'GET', 'http://example.com/datalink.xml', content=callback
) as matcher:
yield matcher


@pytest.fixture()
def obscore_datalink(mocker):
def callback(request, context):
Expand Down Expand Up @@ -198,3 +211,99 @@ def test_all_mixed(self):
assert res[1].endswith("comb_avg.0001.fits.fz?preview=True")
assert res[2].endswith("http://dc.zah.uni-heidelberg.de/wider.dat")
assert res[3].endswith("when-will-it-be-back")


@pytest.mark.filterwarnings("ignore::astropy.io.votable.exceptions.E02")
@pytest.mark.usefixtures('datalink_product', 'datalink_vocabulary')
class TestIterDatalinksProducts:
"""Tests for producing datalinks from tables containing links to
datalink documents.
"""
def test_no_access_format(self):
res = testing.create_dalresults([
{"name": "access_url", "datatype": "char", "arraysize": "*",
"utype": "obscore:access.reference"}],
[("http://foo.bar/baz.jpeg",)],
resultsClass=TAPResults)
assert list(res.iter_datalinks()) == []

def test_obscore_utype(self):
res = testing.create_dalresults([
{"name": "data_product", "datatype": "char", "arraysize": "*",
"utype": "obscore:access.reference"},
{"name": "content_type", "datatype": "char", "arraysize": "*",
"utype": "obscore:access.format"},],
[("http://example.com/datalink.xml",
"application/x-votable+xml;content=datalink")],
resultsClass=TAPResults)
links = list(res.iter_datalinks())
assert len(links) == 1
assert (next(links[0].bysemantics("#this"))["access_url"]
== "http://dc.zah.uni-heidelberg.de/getproduct/flashheros/data/ca90/f0011.mt")

def test_sia2_record(self):
res = testing.create_dalresults([
{"name": "access_url", "datatype": "char", "arraysize": "*",
"utype": "obscore:access.reference"},
{"name": "access_format", "datatype": "char", "arraysize": "*",
"utype": "obscore:access.format"},],
[("http://example.com/datalink.xml",
"application/x-votable+xml;content=datalink")],
resultsClass=SIA2Results)
links = list(res.iter_datalinks())
assert len(links) == 1
assert (next(links[0].bysemantics("#this"))["access_url"]
== "http://dc.zah.uni-heidelberg.de/getproduct/flashheros/data/ca90/f0011.mt")

def test_sia1_record(self):
res = testing.create_dalresults([
{"name": "product", "datatype": "char", "arraysize": "*",
"ucd": "VOX:Image_AccessReference"},
{"name": "mime", "datatype": "char", "arraysize": "*",
"ucd": "VOX:Image_Format"},],
[("http://example.com/datalink.xml",
"application/x-votable+xml;content=datalink")],
resultsClass=TAPResults)
links = list(res.iter_datalinks())
assert len(links) == 1
assert (next(links[0].bysemantics("#this"))["access_url"]
== "http://dc.zah.uni-heidelberg.de/getproduct/flashheros/data/ca90/f0011.mt")

def test_ssap_record(self):
res = testing.create_dalresults([
{"name": "product", "datatype": "char", "arraysize": "*",
"utype": "ssa:access.reference"},
{"name": "mime", "datatype": "char", "arraysize": "*",
"utype": "ssa:access.format"},],
[("http://example.com/datalink.xml",
"application/x-votable+xml;content=datalink")],
resultsClass=TAPResults)
links = list(res.iter_datalinks())
assert len(links) == 1
assert (next(links[0].bysemantics("#this"))["access_url"]
== "http://dc.zah.uni-heidelberg.de/getproduct/flashheros/data/ca90/f0011.mt")

def test_generic_record(self):
# The meta.code.mime and meta.ref.url UCDs are perhaps too
# generic. To ensure a somewhat predictable behaviour,
# we at least make sure we pick the first of possibly multiple
# pairs (not that this would preclude arbitrary amounts of
# chaos).
res = testing.create_dalresults([
{"name": "access_url", "datatype": "char", "arraysize": "*",
"ucd": "meta.ref.url"},
{"name": "access_format", "datatype": "char", "arraysize": "*",
"utype": "meta.code.mime"},
{"name": "alt_access_url", "datatype": "char", "arraysize": "*",
"ucd": "meta.ref.url"},
{"name": "alt_access_format", "datatype": "char", "arraysize": "*",
"utype": "meta.code.mime"},],
[("http://example.com/datalink.xml",
"application/x-votable+xml;content=datalink",
"http://example.com/bad-pick.xml",
"application/x-votable+xml;content=datalink",)],
resultsClass=TAPResults)
links = list(res.iter_datalinks())
assert len(links) == 1
assert (next(links[0].bysemantics("#this"))["access_url"]
== "http://dc.zah.uni-heidelberg.de/getproduct/flashheros/data/ca90/f0011.mt")
Loading

0 comments on commit 6d29b46

Please sign in to comment.