Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/ted 720 #278

Merged
merged 4 commits into from
Sep 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions ted_sws/event_manager/services/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,24 @@ def log_cli_brief_notice_info(message: str, name: str = None):
event_message=NoticeEventMessage(message=message),
settings=EventMessageLogSettings(briefly=True)
)


def log_cli_brief_notice_error(message: str, name: str = None):
get_cli_logger(name=name).error(
event_message=NoticeEventMessage(message=message),
settings=EventMessageLogSettings(briefly=True)
)


def log_cli_brief_info(message: str, name: str = None):
get_cli_logger(name=name).info(
event_message=EventMessage(message=message),
settings=EventMessageLogSettings(briefly=True)
)


def log_cli_brief_error(message: str, name: str = None):
get_cli_logger(name=name).error(
event_message=EventMessage(message=message),
settings=EventMessageLogSettings(briefly=True)
)
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import pandas as pd

from ted_sws.event_manager.services.log import log_cli_brief_error
from ted_sws.mapping_suite_processor import CONCEPTUAL_MAPPINGS_METADATA_SHEET_NAME, \
CONCEPTUAL_MAPPINGS_RULES_SHEET_NAME, RULES_FIELD_XPATH, RULES_E_FORM_BT_NAME, RULES_SF_FIELD_ID, RULES_E_FORM_BT_ID
from ted_sws.notice_validator import BASE_XPATH_FIELD
Expand All @@ -17,6 +18,7 @@

SPARQL_PREFIX_PATTERN = re.compile('(?:\\s+|^)(\\w+)?:')
SPARQL_PREFIX_LINE = 'PREFIX {prefix}: <{value}>'
SPARQL_LOGGER_NAME = "SPARQL"


def get_sparql_prefixes(sparql_q: str) -> set:
Expand All @@ -31,13 +33,16 @@ def concat_field_xpath(base_xpath: str, field_xpath: str, separator: str = ", ")
return separator.join([base_xpath + xpath for xpath in field_xpath.splitlines()])


def sparql_validation_generator(data: pd.DataFrame, base_xpath: str) -> Iterator[str]:
def sparql_validation_generator(data: pd.DataFrame, base_xpath: str,
prefixes_definitions) -> Iterator[str]:
"""
This function generates SPARQL queries based on data in the dataframe.
:param prefixes_definitions:
:param data:
:param base_xpath:
:return:
"""

for index, row in data.iterrows():
sf_field_id = row[RULES_SF_FIELD_ID]
sf_field_name = row[RULES_SF_FIELD_NAME]
Expand All @@ -46,11 +51,22 @@ def sparql_validation_generator(data: pd.DataFrame, base_xpath: str) -> Iterator
field_xpath = row[RULES_FIELD_XPATH]
class_path = row[RULES_CLASS_PATH]
property_path = row[RULES_PROPERTY_PATH]
prefixes = [SPARQL_PREFIX_LINE.format(
prefix=prefix, value=PREFIXES_DEFINITIONS.get(prefix)
) for prefix in get_sparql_prefixes(property_path)]
yield f"#title: {sf_field_id} - {sf_field_name}\n" \
f"#description: “{sf_field_id} - {sf_field_name}” in SF corresponds to “{e_form_bt_id} " \

sparql_title = f"{sf_field_id} - {sf_field_name}"

prefixes = []
for prefix in get_sparql_prefixes(property_path):
if prefix in prefixes_definitions:
prefix_value = prefixes_definitions.get(prefix)
else:
# the prefix value is set to "^" on purpose, to generate a syntactically incorrect SPARQL query
prefix_value = "^"
log_cli_brief_error(f"'{sf_field_id}': PREFIX '{prefix}' is not defined.", name=SPARQL_LOGGER_NAME)

prefixes.append(SPARQL_PREFIX_LINE.format(prefix=prefix, value=prefix_value))

yield f"#title: {sparql_title}\n" \
f"#description: “{sparql_title}” in SF corresponds to “{e_form_bt_id} " \
f"{e_form_bt_name}” in eForms. The corresponding XML element is " \
f"{concat_field_xpath(base_xpath, field_xpath)}. " \
f"The expected ontology instances are epo: {class_path} .\n" \
Expand All @@ -61,14 +77,19 @@ def sparql_validation_generator(data: pd.DataFrame, base_xpath: str) -> Iterator

def mapping_suite_processor_generate_sparql_queries(conceptual_mappings_file_path: pathlib.Path,
output_sparql_queries_folder_path: pathlib.Path,
rq_name: str = DEFAULT_RQ_NAME):
rq_name: str = DEFAULT_RQ_NAME,
prefixes_definitions=None):
"""
This function reads data from conceptual_mappings.xlsx and generates SPARQL validation queries in provided package.
:param prefixes_definitions:
:param conceptual_mappings_file_path:
:param output_sparql_queries_folder_path:
:param rq_name:
:return:
"""
if prefixes_definitions is None:
prefixes_definitions = PREFIXES_DEFINITIONS

with open(conceptual_mappings_file_path, 'rb') as excel_file:
conceptual_mappings_rules_df = pd.read_excel(excel_file, sheet_name=CONCEPTUAL_MAPPINGS_RULES_SHEET_NAME)
conceptual_mappings_rules_df.columns = conceptual_mappings_rules_df.iloc[0]
Expand All @@ -78,7 +99,7 @@ def mapping_suite_processor_generate_sparql_queries(conceptual_mappings_file_pat
metadata_df = pd.read_excel(excel_file, sheet_name=CONCEPTUAL_MAPPINGS_METADATA_SHEET_NAME)
metadata = metadata_df.set_index('Field').T.to_dict('list')
base_xpath = metadata[BASE_XPATH_FIELD][0]
sparql_queries = sparql_validation_generator(conceptual_mappings_rules_df, base_xpath)
sparql_queries = sparql_validation_generator(conceptual_mappings_rules_df, base_xpath, prefixes_definitions)
output_sparql_queries_folder_path.mkdir(parents=True, exist_ok=True)
for index, sparql_query in enumerate(sparql_queries):
output_file_path = output_sparql_queries_folder_path / f"{rq_name}{index}.rq"
Expand Down
2 changes: 1 addition & 1 deletion ted_sws/resources/prefixes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
PREFIXES_FILE = "prefixes.json"

PREFIXES: dict = json.loads((PREFIXES_PATH / PREFIXES_FILE).read_text())
PREFIXES_DEFINITIONS: dict = PREFIXES[PREFIXES_DEFINITIONS_KEY]
PREFIXES_DEFINITIONS: dict = PREFIXES[PREFIXES_DEFINITIONS_KEY]
15 changes: 14 additions & 1 deletion tests/unit/event_manager/services/test_log.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from ted_sws.event_manager.services.log import log_debug, log_error, log_info, log_warning, log_technical_debug, \
log_technical_error, log_technical_info, log_technical_warning, log_notice_warning, log_notice_error, \
log_notice_debug, log_notice_info, log_mapping_suite_debug, log_mapping_suite_info, log_mapping_suite_error, \
log_mapping_suite_warning, log_cli_brief_notice_info
log_mapping_suite_warning, log_cli_brief_notice_info, log_cli_brief_notice_error, log_cli_brief_info, \
log_cli_brief_error


def test_log_info():
Expand Down Expand Up @@ -70,3 +71,15 @@ def test_log_mapping_suite_warning():

def test_log_cli_brief_notice_info():
log_cli_brief_notice_info("TEST_BRIEF_NOTICE_INFO_MESSAGE")


def test_log_cli_brief_notice_error():
log_cli_brief_notice_error("TEST_BRIEF_NOTICE_ERROR_MESSAGE")


def test_log_cli_brief_info():
log_cli_brief_info("TEST_BRIEF_INFO_MESSAGE")


def test_log_cli_brief_error():
log_cli_brief_error("TEST_BRIEF_ERROR_MESSAGE")
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import shutil
import tempfile
from pathlib import Path

from ted_sws.mapping_suite_processor.entrypoints.cli.cmd_sparql_generator import DEFAULT_OUTPUT_SPARQL_QUERIES_FOLDER
from ted_sws.mapping_suite_processor.services.conceptual_mapping_generate_sparql_queries import \
mapping_suite_processor_generate_sparql_queries
from ted_sws.mapping_suite_processor.entrypoints.cli import CONCEPTUAL_MAPPINGS_FILE_TEMPLATE


def test_mapping_suite_processor_generate_sparql_queries(caplog, fake_mapping_suite_id, file_system_repository_path):
with tempfile.TemporaryDirectory() as temp_folder:
temp_mapping_suite_path = Path(temp_folder)
shutil.copytree(file_system_repository_path, temp_mapping_suite_path,
dirs_exist_ok=True)

conceptual_mappings_file_path = Path(CONCEPTUAL_MAPPINGS_FILE_TEMPLATE.format(
mappings_path=temp_mapping_suite_path,
mapping_suite_id=fake_mapping_suite_id
))
output_sparql_queries_folder_path = Path(DEFAULT_OUTPUT_SPARQL_QUERIES_FOLDER.format(
mappings_path=temp_mapping_suite_path,
mapping_suite_id=fake_mapping_suite_id
))
mapping_suite_processor_generate_sparql_queries(
conceptual_mappings_file_path=conceptual_mappings_file_path,
output_sparql_queries_folder_path=output_sparql_queries_folder_path
)
assert output_sparql_queries_folder_path.is_dir()
assert any(output_sparql_queries_folder_path.iterdir())
assert "ERROR" not in caplog.text

mapping_suite_processor_generate_sparql_queries(
conceptual_mappings_file_path=conceptual_mappings_file_path,
output_sparql_queries_folder_path=output_sparql_queries_folder_path,
prefixes_definitions={
"test": "https://test"
}
)
assert output_sparql_queries_folder_path.is_dir()
assert any(output_sparql_queries_folder_path.iterdir())
assert "ERROR" in caplog.text
assert "is not defined" in caplog.text