Skip to content

Commit

Permalink
Merge pull request #1538 from Sage-Bionetworks/schematic-1-existing-eTag
Browse files Browse the repository at this point in the history
[SCHEMATIC-1] BugFix: manifest submission when data previously annotated without manifest upload
  • Loading branch information
thomasyu888 authored Nov 13, 2024
2 parents 4f82822 + cc1fb27 commit f6d015b
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 1 deletion.
8 changes: 7 additions & 1 deletion schematic/store/synapse.py
Original file line number Diff line number Diff line change
Expand Up @@ -2831,7 +2831,7 @@ def getDatasetAnnotations(
try:
logger.info("Trying batch mode for retrieving Synapse annotations")
table = self.getDatasetAnnotationsBatch(datasetId, dataset_file_ids)
except (SynapseAuthenticationError, SynapseHTTPError):
except (SynapseAuthenticationError, SynapseHTTPError, ValueError):
logger.info(
f"Unable to create a temporary file view bound to {datasetId}. "
"Defaulting to slower iterative retrieval of annotations."
Expand Down Expand Up @@ -3514,6 +3514,12 @@ def _fix_default_columns(self):
# Rename ROW_ETAG column to eTag and place at end of data frame
if "ROW_ETAG" in self.table:
row_etags = self.table.pop("ROW_ETAG")

# eTag column may already present if users annotated data without submitting manifest
# we're only concerned with the new values and not the existing ones
if "eTag" in self.table:
del self.table["eTag"]

self.table.insert(len(self.table.columns), "eTag", row_etags)

return self.table
Expand Down
66 changes: 66 additions & 0 deletions tests/test_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import pandas as pd
import pytest
from numpy import nan
from pandas.testing import assert_frame_equal
from synapseclient import EntityViewSchema, Folder
from synapseclient.core.exceptions import SynapseHTTPError
Expand Down Expand Up @@ -991,6 +992,71 @@ def test_tidy_table(self, dataset_fileview_table_tidy):
assert isinstance(year_value, str)
assert year_value == "1980"

def test_tidy_table_no_manifest_uploaded(self, synapse_store):
"""
Test to ensure that the table can be tidied without issue when a DatasetFileView object is instantiated
based on a dataset that has files annotated but no manifest uploaded.
Covers the case where a user validates a manifest with schematic, and annotates the files with a non-schematic tool (ie the R client),
and then tries to generate a manifest for the dataset with schematic.
"""
# GIVEN a dataset that has files annotated (including the eTag annotation) but no manifest uplodaded
dataset_id = "syn64019998"

# AND the expected metadata from the files in the dataset
expected_metadata = pd.DataFrame(
{
"Component": {
0: nan,
1: "BulkRNA-seqAssay",
2: "BulkRNA-seqAssay",
3: "BulkRNA-seqAssay",
4: "BulkRNA-seqAssay",
},
"FileFormat": {0: nan, 1: "BAM", 2: "BAM", 3: "BAM", 4: "BAM"},
"GenomeBuild": {
0: nan,
1: "GRCh37",
2: "GRCh37",
3: "GRCh37",
4: "GRCh37",
},
"entityId": {
0: "syn64019999",
1: "syn64020000",
2: "syn64020001",
3: "syn64020002",
4: "syn64020003",
},
},
).set_index("entityId", drop=False)

# WHEN a DatasetFileView object is instantiated based on the dataset
dataset_fileview = DatasetFileView(dataset_id, synapse_store.syn)

# AND the fileview is queried without being tidied
table = dataset_fileview.query(tidy=False, force=True)

# THEN a table should be present
assert isinstance(table, pd.DataFrame)

# AND the table should not be empty
assert not table.empty

# AND the table should already include the eTag column that will be removed and saved for comparison later
assert "eTag" in table.columns
original_etag_colum = table.pop("eTag")

# AND when the table is tidied no exception should be raised
with does_not_raise():
table = dataset_fileview.tidy_table()

# AND the eTag column should be different from the original eTag column
new_etag_column = table.pop("eTag").reset_index(drop=True)
assert (new_etag_column != original_etag_colum).all()

# AND the expected metadata should be present in the table
assert_frame_equal(table, expected_metadata)


@pytest.mark.table_operations
class TestTableOperations:
Expand Down

0 comments on commit f6d015b

Please sign in to comment.