Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CM Differ Context #475

Merged
merged 1 commit into from
Apr 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion ted_sws/core/model/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
""" """
import abc
from datetime import datetime
from pathlib import Path
from typing import List, Optional

from ted_sws.core.model import PropertyBaseModel
Expand Down
2 changes: 1 addition & 1 deletion ted_sws/core/model/validation_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Optional, List

from ted_sws.core.model import PropertyBaseModel
from ted_sws.core.model.manifestation import SPARQLQueryResult, ValidationManifestation, Manifestation, SPARQLQuery
from ted_sws.core.model.manifestation import ValidationManifestation
from ted_sws.core.model.notice import Notice
from ted_sws.core.model.validation_report_data import ReportPackageNoticeData, ReportNoticeData

Expand Down
1 change: 0 additions & 1 deletion ted_sws/core/model/validation_report_data.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from pathlib import Path
from typing import Optional

from ted_sws.core.model import PropertyBaseModel
Expand Down
2 changes: 1 addition & 1 deletion ted_sws/core/service/batch_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ def chunks(iterable: Iterable, chunk_size: int):
"""
iterator = iter(iterable)
for first in iterator:
yield chain([first], islice(iterator, chunk_size - 1))
yield chain([first], islice(iterator, chunk_size - 1))
1 change: 0 additions & 1 deletion ted_sws/data_manager/adapters/notice_repository.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import json
import json
import logging
import pathlib
from datetime import datetime
Expand Down
2 changes: 1 addition & 1 deletion ted_sws/data_manager/adapters/sparql_endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

import pandas as pd
import rdflib
from SPARQLWrapper import SPARQLWrapper, CSV, JSON, RDF, POST
from SPARQLWrapper import SPARQLWrapper, CSV, JSON, POST

from ted_sws import config

Expand Down
2 changes: 1 addition & 1 deletion ted_sws/data_manager/services/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
MONGO_DB_AGGREGATES_DATABASE_DEFAULT_NAME = "aggregates_db"
MONGO_DB_AGGREGATES_DATABASE_DEFAULT_NAME = "aggregates_db"
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
import pathlib
from pathlib import Path
from typing import List, Dict
Expand Down
1 change: 0 additions & 1 deletion ted_sws/data_sampler/services/notice_xml_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ def index_notice_xslt(notice: Notice, xslt_transformer=None) -> Notice:
This function selects unique XPath from XMlManifestation from a notice and indexes notices with these unique XPath.
:param notice:
:param xslt_transformer:
:param unique:
:return:
"""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import pandas as pd

from ted_sws.core.model.transform import ConceptualMapping, ConceptualMappingXPATH, ConceptualMappingMetadata, \
ConceptualMappingResource, ConceptualMappingMetadataConstraints, ConceptualMappingRule, ConceptualMappingRMLModule, \
ConceptualMappingRemark, ConceptualMappingControlList
ConceptualMappingResource, ConceptualMappingMetadataConstraints, ConceptualMappingRule, \
ConceptualMappingRMLModule, ConceptualMappingRemark, ConceptualMappingControlList
from ted_sws.mapping_suite_processor import CONCEPTUAL_MAPPINGS_METADATA_SHEET_NAME, \
CONCEPTUAL_MAPPINGS_RULES_SHEET_NAME, RULES_FIELD_XPATH, RULES_SF_FIELD_ID, RULES_SF_FIELD_NAME, \
CONCEPTUAL_MAPPINGS_RESOURCES_SHEET_NAME, CONCEPTUAL_MAPPINGS_RML_MODULES_SHEET_NAME, RULES_E_FORM_BT_ID, \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ class MappingSuitePackageDownloaderABC(abc.ABC):
def download(self, output_mapping_suite_package_path: pathlib.Path):
"""
This method downloads a mapping_suite_package and loads it at the output_mapping_suite_package_path provided.
:param mapping_suite_package_name:
:param output_mapping_suite_package_path:
:return:
"""
Expand All @@ -39,7 +38,6 @@ def __init__(self, github_repository_url: str, branch_or_tag_name: str):
def download(self, output_mapping_suite_package_path: pathlib.Path) -> str:
"""
This method downloads a mapping_suite_package and loads it at the output_mapping_suite_package_path provided.
:param mapping_suite_package_name:
:param output_mapping_suite_package_path:
:return:
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

{% set ms_acronyms_title = ms1_acronym + ("-" + ms2_acronym if ms2_acronym else "") %}

{% set CONTEXT_KEY = "__CONTEXT__" %}

<!DOCTYPE html>
<html lang="en">
<head>
Expand Down Expand Up @@ -57,6 +59,16 @@
.tab {
font-weight: bold;
}
small.context {
font-weight: normal;
font-size: 12px;
}
hr {
margin: 7px 0;
height: 3PX;
background: #ccc;
border: 0;
}
</style>
</head>
<body>
Expand Down Expand Up @@ -141,6 +153,13 @@
<tr>
<th>
{% if is_field0_int %}Item {% endif %}{{ fields_labels[field0] or field0 }}
{% if CONTEXT_KEY in field0_value %}
<hr>
<small class="context">
<b>{{ mapping_suite2 }}:</b><br>
{{ field0_value[CONTEXT_KEY] | join(' - ') }}
</small>
{% endif %}
</th>
<td>
{% if "old_value" in field0_value %}
Expand All @@ -160,7 +179,7 @@
{% if field0_value and field0_value is mapping %}
<table class="dataTable heading">
<tbody>
{% for field1 in field0_value %}
{% for field1 in field0_value if field1 != CONTEXT_KEY %}
{% set field1_value = field0_value[field1] %}
{% if field1|int(-1) != -1 %} {% set is_field1_int = True %} {% endif %}
<tr>
Expand Down
103 changes: 84 additions & 19 deletions ted_sws/mapping_suite_processor/services/conceptual_mapping_differ.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from pydantic.utils import deep_update

from ted_sws import config
from ted_sws.core.model.transform import ConceptualMapping
from ted_sws.core.model.transform import ConceptualMapping, ConceptualMappingRule, ConceptualMappingRemark, \
ConceptualMappingResource, ConceptualMappingRMLModule
from ted_sws.core.model.transform import ConceptualMappingDiff, ConceptualMappingDiffMetadata, ConceptualMappingDiffData
from ted_sws.data_manager.adapters.mapping_suite_repository import MS_TRANSFORM_FOLDER_NAME, \
MS_CONCEPTUAL_MAPPING_FILE_NAME
Expand All @@ -22,39 +23,55 @@
MS_TRANSFORM_FOLDER_NAME + "/" + MS_CONCEPTUAL_MAPPING_FILE_NAME

DEFAULT_REPORT_FILE_NAME = "cm_diff"
DIFF_VALUE_CONTEXT_KEY = "__CONTEXT__"

DIFF_METADATA_TAB = "metadata"
DIFF_RULES_TAB = "rules"
DIFF_MAPPING_REMARKS_TAB = "mapping_remarks"
DIFF_RESOURCES_TAB = "resources"
DIFF_RML_MODULES_TAB = "rml_modules"
DIFF_CL1_ROLES_TAB = "cl1_roles"
DIFF_CL2_ORGANISATIONS_TAB = "cl2_organisations"


class ConceptualMappingDiffDataTransformer:
data: dict
tabs: dict = {
"metadata": {},
"rules": {},
"mapping_remarks": {},
"resources": {},
"rml_modules": {},
"cl1_roles": {},
"cl2_organisations": {}
DIFF_METADATA_TAB: {},
DIFF_RULES_TAB: {},
DIFF_MAPPING_REMARKS_TAB: {},
DIFF_RESOURCES_TAB: {},
DIFF_RML_MODULES_TAB: {},
DIFF_CL1_ROLES_TAB: {},
DIFF_CL2_ORGANISATIONS_TAB: {}
}
labels: dict
mapping1: ConceptualMapping
mapping2: ConceptualMapping
context_mapping: ConceptualMapping

item_key_flattenizer: str = "|"

def __init__(self, data):
def __init__(self, data, mapping1: ConceptualMapping, mapping2: ConceptualMapping):
self.data = data
self.mapping1 = mapping1
self.mapping2 = mapping2
self.context_mapping = self.mapping2
self.init_labels()
self.init_tabs()
self.process_tabs_data()

@classmethod
def init_labels(cls):
cls.labels = {
"tabs": {
"metadata": "Metadata",
"rules": "Rules",
"mapping_remarks": "Remarks",
"resources": "Resources",
"rml_modules": "RML Modules",
"cl1_roles": "CL1 Roles",
"cl2_organisations": "CL2 Organisations"
DIFF_METADATA_TAB: "Metadata",
DIFF_RULES_TAB: "Rules",
DIFF_MAPPING_REMARKS_TAB: "Remarks",
DIFF_RESOURCES_TAB: "Resources",
DIFF_RML_MODULES_TAB: "RML Modules",
DIFF_CL1_ROLES_TAB: "CL1 Roles",
DIFF_CL2_ORGANISATIONS_TAB: "CL2 Organisations"
},
"actions": {
"set_item_added": "Set Added",
Expand Down Expand Up @@ -107,6 +124,52 @@ def init_tabs(self):
self.tabs[tab][action] = {}
self.tabs[tab][action] = deep_update(self.tabs[tab][action], action_items[tab])

def process_tabs_data(self):
self.process_rules_tab()
self.process_mapping_remarks_tab()
self.process_resources_tab()
self.process_rml_modules_tab()

def process_rules_tab(self):
cm_rules_len = len(self.context_mapping.rules)
for action in self.tabs[DIFF_RULES_TAB]:
for row_idx in self.tabs[DIFF_RULES_TAB][action]:
idx = int(row_idx)
if idx < cm_rules_len:
cm_row: ConceptualMappingRule = self.context_mapping.rules[idx]
context = [cm_row.standard_form_field_id, cm_row.standard_form_field_name]
self.tabs[DIFF_RULES_TAB][action][row_idx][DIFF_VALUE_CONTEXT_KEY] = context

def process_mapping_remarks_tab(self):
cm_mapping_remarks_len = len(self.context_mapping.mapping_remarks)
for action in self.tabs[DIFF_MAPPING_REMARKS_TAB]:
for row_idx in self.tabs[DIFF_MAPPING_REMARKS_TAB][action]:
idx = int(row_idx)
if idx < cm_mapping_remarks_len:
cm_row: ConceptualMappingRemark = self.context_mapping.mapping_remarks[idx]
context = [cm_row.standard_form_field_id, cm_row.standard_form_field_name]
self.tabs[DIFF_MAPPING_REMARKS_TAB][action][row_idx][DIFF_VALUE_CONTEXT_KEY] = context

def process_resources_tab(self):
cm_resources_len = len(self.context_mapping.resources)
for action in self.tabs[DIFF_RESOURCES_TAB]:
for row_idx in self.tabs[DIFF_RESOURCES_TAB][action]:
idx = int(row_idx)
if idx < cm_resources_len:
cm_row: ConceptualMappingResource = self.context_mapping.resources[idx]
context = [cm_row.file_name]
self.tabs[DIFF_RESOURCES_TAB][action][row_idx][DIFF_VALUE_CONTEXT_KEY] = context

def process_rml_modules_tab(self):
cm_rml_modules_len = len(self.context_mapping.rml_modules)
for action in self.tabs[DIFF_RML_MODULES_TAB]:
for row_idx in self.tabs[DIFF_RML_MODULES_TAB][action]:
idx = int(row_idx)
if idx < cm_rml_modules_len:
cm_row: ConceptualMappingRMLModule = self.context_mapping.rml_modules[idx]
context = [cm_row.file_name]
self.tabs[DIFF_RML_MODULES_TAB][action][row_idx][DIFF_VALUE_CONTEXT_KEY] = context

@classmethod
def normalize_item_key(cls, k):
return cls.item_key_flattenizer.join(k.replace("'", "").split("root[", 1)[1].rsplit("]", 1)[0].split("]["))
Expand Down Expand Up @@ -148,7 +211,7 @@ def mapping_suite_diff_conceptual_mappings(mappings: List[ConceptualMapping]) ->

diff.data = transform_conceptual_mappings_diff_data(ConceptualMappingDiffData(
original=DeepDiff(mapping1, mapping2, ignore_order=False)
))
), mapping1=mappings[0], mapping2=mappings[1])
return diff.dict()


Expand Down Expand Up @@ -222,8 +285,10 @@ def mapping_suite_diff_repo_conceptual_mappings(branch_or_tag_name: List[str], m
return mapping_suite_diff_files_conceptual_mappings([filepath1, filepath2])


def transform_conceptual_mappings_diff_data(diff_data: ConceptualMappingDiffData):
diff_transformer = ConceptualMappingDiffDataTransformer(data=diff_data.original)
def transform_conceptual_mappings_diff_data(diff_data: ConceptualMappingDiffData, mapping1: ConceptualMapping,
mapping2: ConceptualMapping):
diff_transformer = ConceptualMappingDiffDataTransformer(data=diff_data.original, mapping1=mapping1,
mapping2=mapping2)
diff_data.transformed = {
"labels": diff_transformer.labels,
"tabs": diff_transformer.tabs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ def mapping_suite_processor_inject_integration_sparql_queries(
output_sparql_queries_folder_path: pathlib.Path
):
"""
This function reads the SPARQL files from conceptual_mappings_file_path Rules sheet, and then, based on this list,
This function reads the SPARQL files from conceptual_mappings_file_path Rules sheet, and then,
based on this list,
the resources in sparql_queries_folder_path will be copied to output_sparql_queries_folder_path.
:param conceptual_mappings_file_path:
:param sparql_queries_folder_path:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,19 +37,19 @@ def generate_metadata(raw_metadata: dict) -> dict:
:return:
"""

def get_list_from_raw_metadata(raw_metadata: dict, field_key: str) -> list:
def get_list_from_raw_metadata(field_key: str) -> list:
data = raw_metadata[field_key][0]
if pd.notna(data):
return [x.strip() for x in str(data).split(',')]
else:
return []

constraints = {
E_FORMS_SUBTYPE_KEY: [int(float(x)) for x in get_list_from_raw_metadata(raw_metadata, E_FORMS_SUBTYPE_FIELD)],
START_DATE_KEY: get_list_from_raw_metadata(raw_metadata, START_DATE_FIELD),
END_DATE_KEY: get_list_from_raw_metadata(raw_metadata, END_DATE_FIELD),
MIN_XSD_VERSION_KEY: get_list_from_raw_metadata(raw_metadata, MIN_XSD_VERSION_FIELD),
MAX_XSD_VERSION_KEY: get_list_from_raw_metadata(raw_metadata, MAX_XSD_VERSION_FIELD)}
E_FORMS_SUBTYPE_KEY: [int(float(x)) for x in get_list_from_raw_metadata(E_FORMS_SUBTYPE_FIELD)],
START_DATE_KEY: get_list_from_raw_metadata(START_DATE_FIELD),
END_DATE_KEY: get_list_from_raw_metadata(END_DATE_FIELD),
MIN_XSD_VERSION_KEY: get_list_from_raw_metadata(MIN_XSD_VERSION_FIELD),
MAX_XSD_VERSION_KEY: get_list_from_raw_metadata(MAX_XSD_VERSION_FIELD)}

metadata = {TITLE_KEY: raw_metadata[TITLE_FIELD][0], IDENTIFIER_KEY: raw_metadata[IDENTIFIER_FIELD][0],
CREATED_KEY: datetime.now().isoformat(), VERSION_KEY: raw_metadata[VERSION_FIELD][0],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,8 @@ def mapping_suite_processor_generate_sparql_queries(conceptual_mappings_file_pat
rq_name: str = DEFAULT_RQ_NAME,
prefixes_definitions=None):
"""
This function reads data from conceptual_mappings.xlsx and generates SPARQL validation queries in provided package.
This function reads data from conceptual_mappings.xlsx and generates SPARQL validation queries in
provided package.
:param prefixes_definitions:
:param conceptual_mappings_file_path:
:param output_sparql_queries_folder_path:
Expand Down
1 change: 0 additions & 1 deletion ted_sws/notice_fetcher/adapters/ted_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import requests

from ted_sws import config
from ted_sws.event_manager.services.log import log_error
from ted_sws.notice_fetcher.adapters.ted_api_abc import TedAPIAdapterABC, RequestAPI

DEFAULT_TED_API_QUERY_RESULT_SIZE = {"pageSize": 100,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@

from ted_sws.core.model.metadata import NormalisedMetadata, LanguageTaggedString, NormalisedMetadataView
from ted_sws.core.model.notice import Notice
from ted_sws.event_manager.services.log import log_error
from ted_sws.notice_metadata_processor.services.metadata_constraints import filter_df_by_variables
from ted_sws.data_manager.adapters.notice_repository import NoticeRepositoryABC
from ted_sws.notice_metadata_processor.model.metadata import ExtractedMetadata
from ted_sws.notice_metadata_processor.services.metadata_constraints import filter_df_by_variables
from ted_sws.notice_metadata_processor.services.xml_manifestation_metadata_extractor import \
XMLManifestationMetadataExtractor
from ted_sws.resources.mapping_files_registry import MappingFilesRegistry
Expand Down Expand Up @@ -151,7 +150,6 @@ def get_map_value(cls, mapping: Dict, value: str) -> str:
def normalise_legal_basis_value(cls, value: str) -> str:
"""
Transforms and returns Legal Basis value
:param mapping:
:param value:
:return:
"""
Expand Down
2 changes: 0 additions & 2 deletions ted_sws/notice_packager/services/notice_packager.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,6 @@ def get_rdf_content_from_notice(notice: Notice) -> bytes:

def add_rdf_content(self):
"""

:param rdf_content:
:return:
"""
if self.rdf_content is not None:
Expand Down
6 changes: 3 additions & 3 deletions ted_sws/notice_transformer/services/notice_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,18 @@
from ted_sws.core.model.manifestation import RDFManifestation, XMLManifestation
from ted_sws.core.model.notice import Notice, NoticeStatus
from ted_sws.core.model.transform import MappingSuite, FileResource
from ted_sws.core.model.validation_report import ReportNotice
from ted_sws.core.model.validation_report_data import ReportNoticeData
from ted_sws.data_manager.adapters.mapping_suite_repository import MappingSuiteRepositoryInFileSystem
from ted_sws.data_manager.adapters.repository_abc import NoticeRepositoryABC, MappingSuiteRepositoryABC
from ted_sws.data_manager.services.mapping_suite_resource_manager import file_resource_output_path, \
mapping_suite_skipped_notice, mapping_suite_notice_path_by_group_depth
mapping_suite_skipped_notice
from ted_sws.event_manager.adapters.event_logger import EventLogger, EventMessageLogSettings
from ted_sws.event_manager.model.event_message import NoticeEventMessage
from ted_sws.event_manager.services.logger_from_context import get_env_logger
from ted_sws.notice_transformer.adapters.notice_transformer import NoticeTransformer
from ted_sws.notice_transformer.adapters.rml_mapper import RMLMapperABC
from ted_sws.notice_transformer.services import DEFAULT_TRANSFORMATION_FILE_EXTENSION
from ted_sws.core.model.validation_report import ReportNotice
from ted_sws.core.model.validation_report_data import ReportNoticeData

DATA_SOURCE_PACKAGE = "data"

Expand Down
Loading