Skip to content

Commit

Permalink
Move concatenation of prefixes out of plugin (#3579)
Browse files Browse the repository at this point in the history
  • Loading branch information
nadove-ucsc committed Sep 11, 2024
1 parent ba1dcde commit 0ca7312
Show file tree
Hide file tree
Showing 6 changed files with 15 additions and 28 deletions.
15 changes: 7 additions & 8 deletions src/azul/indexer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def parse(cls, prefix: str) -> Self:
Prefix(common='8f538f53', partition=1))
>>> list(Prefix.parse('8f538f53/0').partition_prefixes())
['']
['8f538f53']
>>> Prefix.parse('aa/bb')
Traceback (most recent call last):
Expand Down Expand Up @@ -147,17 +147,16 @@ def partition_prefixes(self) -> Iterator[str]:
>>> list(Prefix.parse('/0').partition_prefixes())
['']
>>> list(Prefix.parse('/1').partition_prefixes())
['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']
>>> list(Prefix.parse('a/1').partition_prefixes())
['a0', 'a1', 'a2', 'a3', 'a4', 'a5', 'a6', 'a7', 'a8', 'a9', 'aa', 'ab', 'ac', 'ad', 'ae', 'af']
>>> len(list(Prefix.parse('/2').partition_prefixes()))
256
"""
partition_prefixes = map(''.join, product(self.digits,
repeat=self.partition))
for partition_prefix in partition_prefixes:
validate_uuid_prefix(self.common + partition_prefix)
yield partition_prefix
for partition_prefix_digits in product(self.digits, repeat=self.partition):
complete_prefix = ''.join((self.common, *partition_prefix_digits))
validate_uuid_prefix(complete_prefix)
yield complete_prefix

@property
def num_partitions(self) -> int:
Expand Down
5 changes: 2 additions & 3 deletions src/azul/plugins/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -607,9 +607,8 @@ def list_bundles(self,
:param source: a reference to the repository source that contains the
bundles to list
:param prefix: appended to the common prefix of the provided source's
spec to produce a string that should be no more than
eight lower-case hexadecimal characters
:param prefix: a string that should be no more than eight lower-case
hexadecimal characters
"""

raise NotImplementedError
Expand Down
1 change: 0 additions & 1 deletion src/azul/plugins/repository/canned/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,6 @@ def list_bundles(self,
prefix: str
) -> list[CannedBundleFQID]:
self._assert_source(source)
prefix = source.spec.prefix.common + prefix
validate_uuid_prefix(prefix)
log.info('Listing bundles with prefix %r in source %r.', prefix, source)
bundle_fqids = []
Expand Down
10 changes: 3 additions & 7 deletions src/azul/plugins/repository/tdr_anvil/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@
)
from azul.uuids import (
change_version,
validate_uuid_prefix,
)

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -189,20 +188,17 @@ def _list_bundles(self,
prefix: str
) -> list[TDRAnvilBundleFQID]:
spec = source.spec
common_prefix = spec.prefix.common
complete_prefix = common_prefix + prefix
validate_uuid_prefix(complete_prefix)
primary = BundleEntityType.primary.value
supplementary = BundleEntityType.supplementary.value
duos = BundleEntityType.duos.value
rows = list(self._run_sql(f'''
SELECT datarepo_row_id, {primary!r} AS entity_type
FROM {backtick(self._full_table_name(spec, primary))}
WHERE STARTS_WITH(datarepo_row_id, '{complete_prefix}')
WHERE STARTS_WITH(datarepo_row_id, '{prefix}')
UNION ALL
SELECT datarepo_row_id, {supplementary!r} AS entity_type
FROM {backtick(self._full_table_name(spec, supplementary))} AS supp
WHERE supp.is_supplementary AND STARTS_WITH(datarepo_row_id, '{complete_prefix}')
WHERE supp.is_supplementary AND STARTS_WITH(datarepo_row_id, '{prefix}')
''' + (
''
if config.duos_service_url is None else
Expand Down Expand Up @@ -230,7 +226,7 @@ def _list_bundles(self,
duos_count += 1
# Ensure that one partition will always contain the DUOS bundle
# regardless of the choice of common prefix
if not bundle_uuid[len(common_prefix):].startswith(prefix):
if not bundle_uuid.startswith(prefix):
continue
bundles.append(TDRAnvilBundleFQID(
source=source,
Expand Down
7 changes: 1 addition & 6 deletions src/azul/plugins/repository/tdr_hca/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,6 @@
JSONs,
is_optional,
)
from azul.uuids import (
validate_uuid_prefix,
)

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -299,12 +296,10 @@ def _list_bundles(self,
source: TDRSourceRef,
prefix: str
) -> list[TDRBundleFQID]:
source_prefix = source.spec.prefix.common
validate_uuid_prefix(source_prefix + prefix)
current_bundles = self._query_unique_sorted(f'''
SELECT links_id, version
FROM {backtick(self._full_table_name(source.spec, 'links'))}
WHERE STARTS_WITH(links_id, '{source_prefix + prefix}')
WHERE STARTS_WITH(links_id, '{prefix}')
''', group_by='links_id')
return [
TDRBundleFQID(source=source,
Expand Down
5 changes: 2 additions & 3 deletions test/integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,17 +301,16 @@ def _list_partition_bundles(self,
prefix = source.spec.prefix
partition_prefixes = list(prefix.partition_prefixes())
partition_prefix = self.random.choice(partition_prefixes)
effective_prefix = prefix.common + partition_prefix
fqids = self.azul_client.list_bundles(catalog, source, partition_prefix)
num_bundles = len(fqids)
partition = f'Partition {effective_prefix!r} of source {source.spec}'
partition = f'Partition {partition_prefix!r} of source {source.spec}'
if not config.deployment.is_sandbox_or_personal:
# For sources that use partitioning, 512 is the desired partition
# size. In practice, we observe the reindex succeeding with sizes
# >700 without the partition size becoming a limiting factor. From
# this we project 1024 as a reasonable upper bound to enforce.
upper = 1024
if effective_prefix:
if partition_prefix:
lower = 512 // 16
if len(fqids) < lower:
# If bundle UUIDs were uniformly distributed by prefix, we
Expand Down

0 comments on commit 0ca7312

Please sign in to comment.