Skip to content

Commit

Permalink
Merge pull request #1317 from Sage-Bionetworks/develop
Browse files Browse the repository at this point in the history
Schematic `v23.11.1`
  • Loading branch information
andrewelamb authored Nov 3, 2023
2 parents 6eb54b5 + 07d90e7 commit eea1276
Show file tree
Hide file tree
Showing 23 changed files with 2,602 additions and 2,564 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/api_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
test:
runs-on: ubuntu-latest
env:
POETRY_VERSION: 1.2.0
POETRY_VERSION: 1.3.0
strategy:
fail-fast: false
matrix:
Expand Down Expand Up @@ -85,4 +85,4 @@ jobs:
if: ${{ false == inputs.perform_benchmarking }}
run: >
source .venv/bin/activate;
pytest -m "schematic_api and not submission and not rule_benchmark"
pytest -m "schematic_api and not submission and not rule_benchmark"
4 changes: 2 additions & 2 deletions .github/workflows/pdoc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
build:
runs-on: ubuntu-latest
env:
POETRY_VERSION: 1.2.0
POETRY_VERSION: 1.3.0
strategy:
matrix:
python-version: ["3.9", "3.10"]
Expand Down Expand Up @@ -93,4 +93,4 @@ jobs:
url: ${{ steps.deployment.outputs.page_url }}
steps:
- id: deployment
uses: actions/deploy-pages@v1
uses: actions/deploy-pages@v1
2 changes: 1 addition & 1 deletion .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
pypi_release:
runs-on: ubuntu-latest
env:
POETRY_VERSION: 1.2.0
POETRY_VERSION: 1.3.0
if: github.event_name == 'push' && contains(github.ref, 'refs/tags')
steps:
#----------------------------------------------
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
test:
runs-on: ubuntu-latest
env:
POETRY_VERSION: 1.2.0
POETRY_VERSION: 1.3.0
strategy:
fail-fast: false
matrix:
Expand Down Expand Up @@ -127,7 +127,7 @@ jobs:
if: ${{ contains(github.event.head_commit.message, 'runcombos') }}
run: >
source .venv/bin/activate;
pytest --cov-report=term --cov-report=html:htmlcov --cov=schematic/
pytest --durations=0 --cov-report=term --cov-report=html:htmlcov --cov=schematic/
-m "not (google_credentials_needed or schematic_api or table_operations)"
- name: Run tests
Expand All @@ -137,7 +137,7 @@ jobs:
if: ${{ false == contains(github.event.head_commit.message, 'runcombos') }}
run: >
source .venv/bin/activate;
pytest --cov-report=term --cov-report=html:htmlcov --cov=schematic/
pytest --durations=0 --cov-report=term --cov-report=html:htmlcov --cov=schematic/
-m "not (google_credentials_needed or rule_combos or schematic_api or table_operations)"
- name: Upload pytest test results
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ Please note we have a [code of conduct](CODE_OF_CONDUCT.md), please follow it in
```
git clone https://github.com/Sage-Bionetworks/schematic.git
```
2. Install `poetry` (version 1.2 or later) using either the [official installer](https://python-poetry.org/docs/#installing-with-the-official-installer) or [pipx](https://python-poetry.org/docs/#installing-with-pipx). If you have an older installation of Poetry, we recommend uninstalling it first.
2. Install `poetry` (version 1.3.0 or later) using either the [official installer](https://python-poetry.org/docs/#installing-with-the-official-installer) or [pipx](https://python-poetry.org/docs/#installing-with-pipx). If you have an older installation of Poetry, we recommend uninstalling it first.

3. Start the virtual environment by doing:
```
Expand Down
4,473 changes: 2,192 additions & 2,281 deletions poetry.lock

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "schematicpy"
version = "22.3.2"
version = "23.11.1"
description = "Package for biomedical data model and metadata ingress management"
authors = [
"Milen Nikolov <milen.nikolov@sagebase.org>",
Expand Down Expand Up @@ -54,7 +54,7 @@ pygsheets = "^2.0.4"
PyYAML = "^6.0.0"
rdflib = "^6.0.0"
setuptools = "^66.0.0"
synapseclient = "^2.7.0"
synapseclient = "^3.1.1"
tenacity = "^8.0.1"
toml = "^0.10.2"
Flask = "^2.0.0"
Expand All @@ -70,7 +70,7 @@ Flask-Cors = "^3.0.10"
pdoc = "^12.2.0"
dateparser = "^1.1.4"
pandarallel = "^1.6.4"
schematic-db = {version = "^0.0.29", extras = ["synapse"]}
schematic-db = {version = "0.0.dev33", extras = ["synapse"]}
pyopenssl = "^23.0.0"
typing-extensions = "<4.6.0"

Expand Down
21 changes: 16 additions & 5 deletions schematic/manifest/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,11 @@ def __init__(
self.creds = services_creds["creds"]

# schema root
self.root = root
if root:
self.root = root
# Raise an error if no DataType has been provided
else:
raise ValueError("No DataType has been provided.")

# alphabetize valid values
self.alphabetize = alphabetize_valid_values
Expand All @@ -79,12 +83,19 @@ def __init__(

# additional metadata to add to manifest
self.additional_metadata = additional_metadata

# Check if the class is in the schema
root_in_schema = self.sg.se.is_class_in_schema(self.root)

# If the class could not be found, give a notification
if not root_in_schema:
exception_message = f"The DataType entered ({self.root}) could not be found in the data model schema. " + \
"Please confirm that the datatype is in the data model and that the spelling matches the class label in the .jsonld file."
raise LookupError(exception_message)

# Determine whether current data type is file-based
is_file_based = False
if self.root:
is_file_based = "Filename" in self.sg.get_node_dependencies(self.root)
self.is_file_based = is_file_based
self.is_file_based = "Filename" in self.sg.get_node_dependencies(self.root)


def _attribute_to_letter(self, attribute, manifest_fields):
"""Map attribute to column letter in a google sheet"""
Expand Down
15 changes: 12 additions & 3 deletions schematic/models/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def get_component_requirements(

# TODO: abstract validation in its own module
def validateModelManifest(
self, manifestPath: str, rootNode: str, restrict_rules: bool = False, jsonSchema: str = None, project_scope: List = None,
self, manifestPath: str, rootNode: str, restrict_rules: bool = False, jsonSchema: str = None, project_scope: List = None, access_token: str = None,
) -> List[str]:
"""Check if provided annotations manifest dataframe satisfies all model requirements.
Expand Down Expand Up @@ -251,7 +251,16 @@ def validateModelManifest(

return errors, warnings

errors, warnings, manifest = validate_all(self, errors, warnings, manifest, manifestPath, self.sg, jsonSchema, restrict_rules, project_scope)
errors, warnings, manifest = validate_all(self,
errors=errors,
warnings=warnings,
manifest=manifest,
manifestPath=manifestPath,
sg=self.sg,
jsonSchema=jsonSchema,
restrict_rules=restrict_rules,
project_scope=project_scope,
access_token=access_token)
return errors, warnings

def populateModelManifest(self, title, manifestPath: str, rootNode: str, return_excel = False) -> str:
Expand Down Expand Up @@ -328,7 +337,7 @@ def submit_metadata_manifest(

# automatic JSON schema generation and validation with that JSON schema
val_errors, val_warnings = self.validateModelManifest(
manifestPath=manifest_path, rootNode=validate_component, restrict_rules=restrict_rules, project_scope=project_scope,
manifestPath=manifest_path, rootNode=validate_component, restrict_rules=restrict_rules, project_scope=project_scope, access_token=access_token
)

# if there are no errors in validation process
Expand Down
14 changes: 10 additions & 4 deletions schematic/models/validate_attribute.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
rule_in_rule_list,
)

from synapseclient.core.exceptions import SynapseNoCredentialsError

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -564,13 +565,18 @@ class ValidateAttribute(object):
- Add string length validator
"""

def get_target_manifests(target_component, project_scope: List):
def get_target_manifests(target_component, project_scope: List, access_token: str = None):
t_manifest_search = perf_counter()
target_manifest_IDs=[]
target_dataset_IDs=[]

#login
synStore = SynapseStorage(project_scope=project_scope)
try:
synStore = SynapseStorage(access_token=access_token, project_scope=project_scope)
except SynapseNoCredentialsError as e:
raise ValueError(
"No Synapse credentials were provided. Credentials must be provided to utilize cross-manfiest validation functionality."
) from e

#Get list of all projects user has access to
projects = synStore.getStorageProjects(project_scope=project_scope)
Expand Down Expand Up @@ -893,7 +899,7 @@ def url_validation(self, val_rule: str, manifest_col: str, sg: SchemaGenerator,)
return errors, warnings

def cross_validation(
self, val_rule: str, manifest_col: pd.core.series.Series, project_scope: List, sg: SchemaGenerator,
self, val_rule: str, manifest_col: pd.core.series.Series, project_scope: List, sg: SchemaGenerator, access_token: str,
) -> List[List[str]]:
"""
Purpose:
Expand Down Expand Up @@ -921,7 +927,7 @@ def cross_validation(


#Get IDs of manifests with target component
synStore, target_manifest_IDs, target_dataset_IDs = ValidateAttribute.get_target_manifests(target_component,project_scope)
synStore, target_manifest_IDs, target_dataset_IDs = ValidateAttribute.get_target_manifests(target_component, project_scope, access_token)

t_cross_manifest = perf_counter()
#Read each manifest
Expand Down
8 changes: 4 additions & 4 deletions schematic/models/validate_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def get_multiple_types_error(
return ["NA", error_col, error_message, error_val]

def validate_manifest_rules(
self, manifest: pd.core.frame.DataFrame, sg: SchemaGenerator, restrict_rules: bool, project_scope: List,
self, manifest: pd.core.frame.DataFrame, sg: SchemaGenerator, restrict_rules: bool, project_scope: List, access_token: Optional[str] = None,
) -> (pd.core.frame.DataFrame, List[List[str]]):
"""
Purpose:
Expand Down Expand Up @@ -208,7 +208,7 @@ def validate_manifest_rules(
manifest[col] = manifest_col
elif validation_type.lower().startswith("match"):
vr_errors, vr_warnings = validation_method(
self, rule, manifest[col], project_scope, sg,
self, rule, manifest[col], project_scope, sg, access_token
)
else:
vr_errors, vr_warnings = validation_method(
Expand Down Expand Up @@ -256,9 +256,9 @@ def validate_manifest_values(self, manifest, jsonSchema, sg
return errors, warnings


def validate_all(self, errors, warnings, manifest, manifestPath, sg, jsonSchema, restrict_rules, project_scope: List):
def validate_all(self, errors, warnings, manifest, manifestPath, sg, jsonSchema, restrict_rules, project_scope: List, access_token: str):
vm = ValidateManifest(errors, manifest, manifestPath, sg, jsonSchema)
manifest, vmr_errors, vmr_warnings = vm.validate_manifest_rules(manifest, sg, restrict_rules, project_scope)
manifest, vmr_errors, vmr_warnings = vm.validate_manifest_rules(manifest, sg, restrict_rules, project_scope, access_token)
if vmr_errors:
errors.extend(vmr_errors)
if vmr_warnings:
Expand Down
28 changes: 22 additions & 6 deletions schematic/schemas/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,19 @@ def is_node_required(self, node_display_name: str) -> bool:

return node_required

def get_node_display_name(self, node_label: str, mm_graph: nx.MultiDiGraph) -> list:
"""Get display name associated with a given node label, return id if no display name.
Args:
node_label, str: Node to retrieve display name for
Returns:
node_display_name: display name of the node, or its id if it does not have a display name.
"""
if "displayName" in mm_graph.nodes[node_label]:
node_display_name = mm_graph.nodes[node_label]["displayName"]
else:
node_display_name = mm_graph.nodes[node_label]["id"].split(':')[1]
return node_display_name

def get_nodes_display_names(
self, node_list: List[str], mm_graph: nx.MultiDiGraph
) -> List[str]:
Expand All @@ -349,11 +362,10 @@ def get_nodes_display_names(
node_list: List of nodes whose display names we need to retrieve.
Returns:
List of display names.
List of display names, return id if no display name
"""
node_list_display_names = [
mm_graph.nodes[node]["displayName"] for node in node_list
]

node_list_display_names = [self.get_node_display_name(node, mm_graph) for node in node_list]

return node_list_display_names

Expand Down Expand Up @@ -430,8 +442,12 @@ def is_required(self, node_name: str, mm_graph: nx.MultiDiGraph) -> bool:
Boolean value indicating if the node is required or not.
True: yes, it is required.
False: no, it is not required.
Return False, if no required key
"""
return mm_graph.nodes[node_name]["required"]
if "required" in mm_graph.nodes[node_name]:
return mm_graph.nodes[node_name]["required"]
else:
return False

def get_json_schema_requirements(self, source_node: str, schema_name: str) -> Dict:
"""Consolidated method that aims to gather dependencies and value constraints across terms / nodes in a schema.org schema and store them in a jsonschema /JSON Schema schema.
Expand Down Expand Up @@ -505,7 +521,7 @@ def get_json_schema_requirements(self, source_node: str, schema_name: str) -> Di
)

# get process node display name
node_display_name = mm_graph.nodes[process_node]["displayName"]
node_display_name = self.get_node_display_name(node_label=process_node, mm_graph=mm_graph)

# updating map between node and node's valid values
for n in node_range_d:
Expand Down
Loading

0 comments on commit eea1276

Please sign in to comment.