Skip to content

Commit

Permalink
fixup! Add TDR parquet export script
Browse files Browse the repository at this point in the history
  • Loading branch information
nadove-ucsc committed Jul 31, 2024
1 parent c67ee3b commit 19b70af
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 6 deletions.
6 changes: 3 additions & 3 deletions scripts/download_tdr_parquet.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Export parquet files from TDR and download them to local storage.
Export Parquet files from TDR and download them to local storage.
"""
from argparse import (
ArgumentParser,
Expand Down Expand Up @@ -51,7 +51,7 @@ def tdr(self) -> TDRClient:
def get_download_urls(self) -> dict[str, list[furl]]:
urls = self.tdr.export_parquet_urls(self.snapshot_id)
reject(urls is None,
'No parquet access information is available for snapshot %r', self.snapshot_id)
'No Parquet access information is available for snapshot %r', self.snapshot_id)
return urls

def get_data(self, parquet_urls: list[furl]) -> Iterator[bytes]:
Expand All @@ -75,7 +75,7 @@ def download_table(self,
with open(output_path, 'wb') as f:
f.write(data)
reject(data is None,
'No parquet files found for snapshot %r. Tried URLs: %r',
'No Parquet files found for snapshot %r. Tried URLs: %r',
self.snapshot_id, download_urls)


Expand Down
6 changes: 3 additions & 3 deletions src/azul/terra.py
Original file line number Diff line number Diff line change
Expand Up @@ -657,14 +657,14 @@ def export_parquet_urls(self,
snapshot_id: str
) -> Optional[dict[str, list[mutable_furl]]]:
"""
Obtain URLs of parquet files for the data tables of the specified
Obtain URLs of Parquet files for the data tables of the specified
snapshot. This is an time-consuming operation that usually takes on the
order of 1 minute to complete.
:param snapshot_id: The UUID of the snapshot.
:return: A mapping of table names to lists of parquet file download
URLs, or `None` if if no parquet downloads are available for
:return: A mapping of table names to lists of Parquet file download
URLs, or `None` if if no Parquet downloads are available for
the specified snapshot. The URLs are typically expiring signed
URLs pointing to a cloud storage service such as GCS or Azure.
"""
Expand Down

0 comments on commit 19b70af

Please sign in to comment.