Skip to content

Commit

Permalink
Skip data_manager_json files that aren't bundles
Browse files Browse the repository at this point in the history
The file_size hack turns out to not work, since people report related
bugs that they shouldn't have been able to run into without generating
bundles.
It's probably still true that bundles should produce a distinct
datatype.
  • Loading branch information
mvdbeek committed Feb 19, 2025
1 parent 467fb58 commit b6823fc
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 18 deletions.
1 change: 1 addition & 0 deletions lib/galaxy/datatypes/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ class DataManagerJson(Json):
MetadataElement(
name="data_tables", default=None, desc="Data tables represented by this dataset", readonly=True, visible=True
)
MetadataElement(name="is_bundle", default=False, desc="Dataset represents bundle", readonly=True, visible=True)

def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd):
super().set_meta(dataset=dataset, overwrite=overwrite, **kwd)
Expand Down
20 changes: 3 additions & 17 deletions lib/galaxy/managers/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,13 @@
)
from sqlalchemy import (
and_,
Cast,
ColumnElement,
desc,
false,
func,
or_,
select,
true,
)
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import (
aliased,
joinedload,
Expand Down Expand Up @@ -72,6 +69,7 @@
StoredWorkflow,
StoredWorkflowTagAssociation,
StoredWorkflowUserShareAssociation,
to_json,
User,
Workflow,
WorkflowInvocation,
Expand Down Expand Up @@ -2067,26 +2065,14 @@ def get_workflow_by_trs_id_and_version(
) -> Optional[model.StoredWorkflow]:
sa_session = self.app.model.session

def to_json(column, keys: List[str]):
assert sa_session.bind
if sa_session.bind.dialect.name == "postgresql":
cast: Union[ColumnElement[Any], Cast[Any]] = func.cast(func.convert_from(column, "UTF8"), JSONB)
for key in keys:
cast = cast.__getitem__(key)
return cast.astext
else:
for key in keys:
column = func.json_extract(column, f"$.{key}")
return column

stmnt = (
select(model.StoredWorkflow)
.join(model.Workflow, model.Workflow.id == model.StoredWorkflow.latest_workflow_id)
.filter(
and_(
model.StoredWorkflow.deleted == false(),
to_json(model.Workflow.source_metadata, ["trs_tool_id"]) == trs_id,
to_json(model.Workflow.source_metadata, ["trs_version_id"]) == trs_version,
to_json(sa_session, model.Workflow.source_metadata, ["trs_tool_id"]) == trs_id,
to_json(sa_session, model.Workflow.source_metadata, ["trs_version_id"]) == trs_version,
)
)
)
Expand Down
18 changes: 17 additions & 1 deletion lib/galaxy/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,10 @@
bindparam,
Boolean,
case,
Cast,
Column,
column,
ColumnElement,
DateTime,
delete,
desc,
Expand Down Expand Up @@ -100,6 +102,7 @@
update,
VARCHAR,
)
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.exc import (
CompileError,
OperationalError,
Expand Down Expand Up @@ -318,6 +321,19 @@ def get_uuid(uuid: Optional[Union[UUID, str]] = None) -> UUID:
return UUID(str(uuid))


def to_json(sa_session, column, keys: List[str]):
assert sa_session.bind
if sa_session.bind.dialect.name == "postgresql":
cast: Union[ColumnElement[Any], Cast[Any]] = func.cast(func.convert_from(column, "UTF8"), JSONB)
for key in keys:
cast = cast.__getitem__(key)
return cast.astext
else:
for key in keys:
column = func.json_extract(column, f"$.{key}")
return column


class Base(DeclarativeBase, _HasTable):
__abstract__ = True
metadata = MetaData(naming_convention=NAMING_CONVENTION)
Expand Down Expand Up @@ -904,8 +920,8 @@ def get_user_data_tables(self, data_table: str):
Dataset.state == "ok",
# excludes data manager runs that actually populated tables.
# maybe track this formally by creating a different datatype for bundles ?
Dataset.total_size != Dataset.file_size,
HistoryDatasetAssociation._metadata.contains(data_table),
to_json(session, HistoryDatasetAssociation._metadata, ["is_bundle"]) == "true",
)
.order_by(HistoryDatasetAssociation.id)
)
Expand Down
1 change: 1 addition & 0 deletions lib/galaxy/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3261,6 +3261,7 @@ def exec_after_process(self, app, inp_data, out_data, param_dict, job, final_job
create=True,
preserve_symlinks=True,
)
hda.metadata.is_bundle = True

else:
raise Exception("Unknown data manager mode encountered type...")
Expand Down

0 comments on commit b6823fc

Please sign in to comment.