Skip to content

Commit

Permalink
Merge pull request #269 from datalad/translator-work
Browse files Browse the repository at this point in the history
ENH+BUG: Improving translators and catalog generation.
  • Loading branch information
jsheunis authored Apr 1, 2023
2 parents 9f6e568 + d7ac163 commit 573af3c
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 10 deletions.
1 change: 0 additions & 1 deletion datalad_catalog/translators/bids_dataset_translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ class BIDSTranslator:
def __init__(self, metadata_record):
self.metadata_record = metadata_record
self.extracted_metadata = self.metadata_record["extracted_metadata"]
self.graph = self.extracted_metadata["@graph"]

def get_name(self):
return self.extracted_metadata.get("title", "")
Expand Down
36 changes: 30 additions & 6 deletions datalad_catalog/translators/metalad_core_translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ class CoreTranslator:
def __init__(self, metadata_record):
self.metadata_record = metadata_record
self.extracted_metadata = self.metadata_record["extracted_metadata"]
self.graph = self.extracted_metadata["@graph"]
self.graph = self.extracted_metadata.get("@graph", [])

def get_name(self):
"""Return an empty string as name
Expand All @@ -110,7 +110,7 @@ def get_name(self):
"""
return ""

def get_url(self):
def get_dataset_url(self):
program = (
'.[]? | select(.["@type"] == "Dataset") | '
'[.distribution[]? | select(has("url")) | .url]'
Expand Down Expand Up @@ -149,15 +149,39 @@ def get_metadata_source(self):
result = jq.first(program, self.metadata_record)
return result if len(result) > 0 else None

def get_file_url(self):
program = ".distribution? | .url?"
return jq.first(program, self.extracted_metadata)

def get_file_path(self):
return self.metadata_record.get("path", None)

def get_contentbytesize(self):
return self.extracted_metadata.get("contentbytesize", None)

def translate(self):
translated_record = {
"type": self.metadata_record["type"],
"dataset_id": self.metadata_record["dataset_id"],
"dataset_version": self.metadata_record["dataset_version"],
"name": self.get_name(),
"url": self.get_url(),
"authors": self.get_authors(),
"subdatasets": self.get_subdatasets(),
"metadata_sources": self.get_metadata_source(),
}
if translated_record["type"] == "dataset":
translated_record.update(
{
"name": self.get_name(),
"url": self.get_dataset_url(),
"authors": self.get_authors(),
"subdatasets": self.get_subdatasets(),
}
)
if translated_record["type"] == "file":
translated_record.update(
{
"path": self.get_file_path(),
"url": self.get_file_url(),
"contentbytesize": self.get_contentbytesize(),
}
)

return {k: v for k, v in translated_record.items() if v is not None}
23 changes: 20 additions & 3 deletions datalad_catalog/webcatalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,10 @@ def create(self, force=False):
# Check logo path, if added to config
if (
self.catalog_config.get(cnst.LOGO_PATH) is not None
and not Path(self.catalog_config[cnst.LOGO_PATH]).exists()
and not self.get_logo_path().exists()
# and not Path(self.catalog_config[cnst.LOGO_PATH]).exists()
):
msg = f"Error in config: the specified logo does not exist at path: {self.catalog_config[cnst.LOGO_PATH]}"
msg = f"Error in config: the specified logo does not exist at path: {self.get_logo_path()}"
raise FileNotFoundError(msg)

# Get package-related paths/content
Expand Down Expand Up @@ -237,6 +238,21 @@ def get_config_source(
# where dataset id and version are available.
return None

def get_logo_path(self):
# Returns the absolute path to the logo
# If none provided via config -> None
if self.catalog_config.get(cnst.LOGO_PATH) is None:
return None
# If the provided path is absolute, return it
# else assume that the provided path us relative to the
# parent directory of the config file (within which the
# logo path was specified)
if Path(self.catalog_config[cnst.LOGO_PATH]).is_absolute():
return self.catalog_config[cnst.LOGO_PATH]
else:
cfg_dir = self.catalog_config_path.parent
return cfg_dir / self.catalog_config[cnst.LOGO_PATH]

def get_config(self, config_level: str = "catalog"):
""""""
# Read metadata from file
Expand All @@ -256,7 +272,8 @@ def write_config(self, force=False):
and self.catalog_config[cnst.LOGO_PATH]
!= "artwork/catalog_logo.svg"
):
existing_path = Path(self.catalog_config[cnst.LOGO_PATH])
# existing_path = Path(self.catalog_config[cnst.LOGO_PATH])
existing_path = self.get_logo_path()
existing_name = existing_path.name
new_path = Path(self.location) / "artwork" / existing_name
copy_overwrite_path(
Expand Down

0 comments on commit 573af3c

Please sign in to comment.