From e330cc4b46bdbea72e18e4294c1d6ac5fd803e35 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Fri, 19 Aug 2022 16:29:18 +0300 Subject: [PATCH 01/65] WIP --- tests/features/__init__.py | 19 -- tests/features/conftest.py | 52 +--- tests/features/notice_fetcher/__init__.py | 8 - tests/features/notice_fetcher/conftest.py | 37 ++- .../test_notice_fetcher.feature | 61 ++++- .../notice_fetcher/test_notice_fetcher.py | 230 ++++++++++++++---- .../notice_metadata_processor/__init__.py | 0 .../test_notice_eligiblity.feature | 6 + tests/features/notice_transformer/__init__.py | 8 - .../test_notice_transformer.feature | 14 ++ .../test_notice_transformer.py | 0 .../test_notice_transformer_bla_b.py | 3 + tests/features/notice_validator/__init__.py | 0 .../test_notice_validator.feature | 6 + tests/old_features/__init__.py | 19 ++ tests/old_features/conftest.py | 60 +++++ .../metadata_normaliser/__init__.py | 0 .../metadata_normaliser/conftest.py | 0 .../metadata_normaliser.feature | 0 .../notice_extractor.feature | 0 .../test_metadata_normaliser.py | 0 .../test_notice_extractor.py | 0 .../model/__init__.py | 0 .../model/conftest.py | 0 .../model/test_creating_notice.feature | 0 .../model/test_creating_notice.py | 0 .../model/test_notice_operations.feature | 0 .../model/test_notice_operations.py | 0 .../notice_eligibility_checker/__init__.py | 0 .../notice_eligibility_rml_mapping.feature | 0 .../test_notice_eligibility.py | 0 ...otice_eligibility_validation_tests.feature | 0 .../test_notice_ineligibility.py | 0 .../notice_fetcher}/__init__.py | 0 tests/old_features/notice_fetcher/conftest.py | 18 ++ .../test_fetching_types.feature | 0 .../notice_fetcher/test_fetching_types.py | 0 .../test_notice_fetcher.feature | 22 ++ .../notice_fetcher/test_notice_fetcher.py | 79 ++++++ .../test_search_queries.feature | 0 .../notice_fetcher/test_search_queries.py | 0 .../notice_normaliser}/__init__.py | 0 .../notice_normalizer.feature | 0 .../notice_packager}/__init__.py | 0 .../notice_packager/mets_metadata.feature | 0 .../notice_packager/notice_packager.feature | 0 .../notice_packager/validate_package.feature | 0 .../notice_transformer/__init__.py | 8 + .../notice_transformer.feature | 0 .../transformation_validator/__init__.py | 8 + .../validate_transformer.feature | 0 51 files changed, 508 insertions(+), 150 deletions(-) create mode 100644 tests/features/notice_metadata_processor/__init__.py create mode 100644 tests/features/notice_metadata_processor/test_notice_eligiblity.feature create mode 100644 tests/features/notice_transformer/test_notice_transformer.feature create mode 100644 tests/features/notice_transformer/test_notice_transformer.py create mode 100644 tests/features/notice_transformer/test_notice_transformer_bla_b.py create mode 100644 tests/features/notice_validator/__init__.py create mode 100644 tests/features/notice_validator/test_notice_validator.feature create mode 100644 tests/old_features/__init__.py create mode 100644 tests/old_features/conftest.py rename tests/{features => old_features}/metadata_normaliser/__init__.py (100%) rename tests/{features => old_features}/metadata_normaliser/conftest.py (100%) rename tests/{features => old_features}/metadata_normaliser/metadata_normaliser.feature (100%) rename tests/{features => old_features}/metadata_normaliser/notice_extractor.feature (100%) rename tests/{features => old_features}/metadata_normaliser/test_metadata_normaliser.py (100%) rename tests/{features => old_features}/metadata_normaliser/test_notice_extractor.py (100%) rename tests/{features => old_features}/model/__init__.py (100%) rename tests/{features => old_features}/model/conftest.py (100%) rename tests/{features => old_features}/model/test_creating_notice.feature (100%) rename tests/{features => old_features}/model/test_creating_notice.py (100%) rename tests/{features => old_features}/model/test_notice_operations.feature (100%) rename tests/{features => old_features}/model/test_notice_operations.py (100%) rename tests/{features => old_features}/notice_eligibility_checker/__init__.py (100%) rename tests/{features => old_features}/notice_eligibility_checker/notice_eligibility_rml_mapping.feature (100%) rename tests/{features => old_features}/notice_eligibility_checker/test_notice_eligibility.py (100%) rename tests/{features => old_features}/notice_eligibility_checker/test_notice_eligibility_validation_tests.feature (100%) rename tests/{features => old_features}/notice_eligibility_checker/test_notice_ineligibility.py (100%) rename tests/{features/notice_normaliser => old_features/notice_fetcher}/__init__.py (100%) create mode 100644 tests/old_features/notice_fetcher/conftest.py rename tests/{features => old_features}/notice_fetcher/test_fetching_types.feature (100%) rename tests/{features => old_features}/notice_fetcher/test_fetching_types.py (100%) create mode 100644 tests/old_features/notice_fetcher/test_notice_fetcher.feature create mode 100644 tests/old_features/notice_fetcher/test_notice_fetcher.py rename tests/{features => old_features}/notice_fetcher/test_search_queries.feature (100%) rename tests/{features => old_features}/notice_fetcher/test_search_queries.py (100%) rename tests/{features/notice_packager => old_features/notice_normaliser}/__init__.py (100%) rename tests/{features => old_features}/notice_normaliser/notice_normalizer.feature (100%) rename tests/{features/transformation_validator => old_features/notice_packager}/__init__.py (100%) rename tests/{features => old_features}/notice_packager/mets_metadata.feature (100%) rename tests/{features => old_features}/notice_packager/notice_packager.feature (100%) rename tests/{features => old_features}/notice_packager/validate_package.feature (100%) create mode 100644 tests/old_features/notice_transformer/__init__.py rename tests/{features => old_features}/notice_transformer/notice_transformer.feature (100%) create mode 100644 tests/old_features/transformation_validator/__init__.py rename tests/{features => old_features}/transformation_validator/validate_transformer.feature (100%) diff --git a/tests/features/__init__.py b/tests/features/__init__.py index 54e38cdef..e69de29bb 100644 --- a/tests/features/__init__.py +++ b/tests/features/__init__.py @@ -1,19 +0,0 @@ -#!/usr/bin/python3 - -# __init__.py -# Date: 03/02/2022 -# Author: Eugeniu Costetchi -# Email: costezki.eugen@gmail.com - -""" """ - - -def str2bool(value: str) -> bool: - """ - Parse a string value and cast it into its boolean value - :param value: - :return: - """ - if value in ["y", "yes", "t", "true", "on", "1"]: return True - if value in ["n", "no", "f", "false", "off", "0"]: return False - raise ValueError("boolean value unrecognised") diff --git a/tests/features/conftest.py b/tests/features/conftest.py index 4242ec3ae..a825f44b4 100644 --- a/tests/features/conftest.py +++ b/tests/features/conftest.py @@ -1,60 +1,28 @@ +import mongomock +import pymongo import pytest -from pymongo import MongoClient from ted_sws import config -from ted_sws.core.model.metadata import XMLMetadata -from ted_sws.data_manager.adapters.mapping_suite_repository import MappingSuiteRepositoryInFileSystem from ted_sws.data_manager.adapters.notice_repository import NoticeRepository -from ted_sws.notice_fetcher.adapters.ted_api import TedAPIAdapter, TedRequestAPI -from ted_sws.notice_fetcher.services.notice_fetcher import NoticeFetcher -from tests import TEST_DATA_PATH - NOTICE_STORAGE_FEATURES_TEST_DB = "features_test_db_for_notice" @pytest.fixture -def api_end_point(): - return config.TED_API_URL - - -@pytest.fixture -def notice_storage(): - url = config.MONGO_DB_AUTH_URL - mongodb_client = MongoClient(url) - mongodb_client.drop_database(NOTICE_STORAGE_FEATURES_TEST_DB) - return NoticeRepository(mongodb_client=mongodb_client, database_name=NOTICE_STORAGE_FEATURES_TEST_DB) +def mongodb_end_point(): + return config.MONGO_DB_AUTH_URL @pytest.fixture -def f03_notice_2020(notice_storage, api_end_point): - notice_search_query = {"q": "ND=[408313-2020]"} - NoticeFetcher(notice_repository=notice_storage, - ted_api_adapter=TedAPIAdapter(request_api=TedRequestAPI(), - ted_api_url=api_end_point)).fetch_notices_by_query( - query=notice_search_query) - notice = notice_storage.get(reference="408313-2020") - notice.set_xml_metadata(xml_metadata=XMLMetadata(unique_xpaths=["FAKE_INDEX_XPATHS"])) - return notice +def mongodb_client(mongodb_end_point): + return pymongo.MongoClient(mongodb_end_point) @pytest.fixture -def f18_notice_2022(notice_storage, api_end_point): - notice_search_query = {"q": "ND=[067623-2022]"} - NoticeFetcher(notice_repository=notice_storage, - ted_api_adapter=TedAPIAdapter(request_api=TedRequestAPI(), - ted_api_url=api_end_point)).fetch_notices_by_query( - query=notice_search_query) - notice = notice_storage.get(reference="067623-2022") - notice.set_xml_metadata(xml_metadata=XMLMetadata(unique_xpaths=["FAKE_INDEX_XPATHS"])) - return notice - - -@pytest.fixture -def file_system_repository_path(): - return TEST_DATA_PATH / "notice_transformer" / "test_repository" +def ted_api_end_point(): + return config.TED_API_URL @pytest.fixture -def mapping_suite_repository_in_file_system(file_system_repository_path): - return MappingSuiteRepositoryInFileSystem(repository_path=file_system_repository_path) +def notice_repository(mongodb_client): + return NoticeRepository(mongodb_client=mongodb_client, database_name=NOTICE_STORAGE_FEATURES_TEST_DB) diff --git a/tests/features/notice_fetcher/__init__.py b/tests/features/notice_fetcher/__init__.py index 00c67d91a..e69de29bb 100644 --- a/tests/features/notice_fetcher/__init__.py +++ b/tests/features/notice_fetcher/__init__.py @@ -1,8 +0,0 @@ -#!/usr/bin/python3 - -# __init__.py -# Date: 03/02/2022 -# Author: Eugeniu Costetchi -# Email: costezki.eugen@gmail.com - -""" """ \ No newline at end of file diff --git a/tests/features/notice_fetcher/conftest.py b/tests/features/notice_fetcher/conftest.py index d80a02eb8..aba1394bd 100644 --- a/tests/features/notice_fetcher/conftest.py +++ b/tests/features/notice_fetcher/conftest.py @@ -1,18 +1,43 @@ +from datetime import date + import pytest +from ted_sws.notice_fetcher.adapters.ted_api import TedAPIAdapter, TedRequestAPI +from ted_sws.notice_fetcher.services.notice_fetcher import NoticeFetcher + + +@pytest.fixture +def fetch_notice_id(): + return "No notice ID" + + +@pytest.fixture +def fetch_start_date(): + return date(2020, 2, 3) + + +@pytest.fixture +def fetch_date(fetch_start_date): + return fetch_start_date + @pytest.fixture -def notice_identifier(): - return "067623-2022" +def fetch_end_date(): + return date(2020, 2, 3) @pytest.fixture -def notice_search_query(): - return {"q": "ND=[067623-2022]"} +def fetch_wildcard_date(): + return "20200203*" @pytest.fixture -def notice_incorrect_search_query(): - return {"q": "ND=067623-20224856"} +def fetch_query(fetch_wildcard_date): + return {"q": f"PD=[{fetch_wildcard_date}]"} +@pytest.fixture +def notice_fetcher(notice_repository, ted_api_end_point): + return NoticeFetcher(notice_repository=notice_repository, + ted_api_adapter=TedAPIAdapter(request_api=TedRequestAPI(), + ted_api_url=ted_api_end_point)) diff --git a/tests/features/notice_fetcher/test_notice_fetcher.feature b/tests/features/notice_fetcher/test_notice_fetcher.feature index d491c34e4..d8f43bf59 100644 --- a/tests/features/notice_fetcher/test_notice_fetcher.feature +++ b/tests/features/notice_fetcher/test_notice_fetcher.feature @@ -1,21 +1,56 @@ -# Created by dude at 25/01/2022 +# Created by Stefan at 16.08.2022 Feature: Notice fetcher The system is able to fetch selected TED-XML notices together with their metadata - Scenario: Fetch a TED notice - Given a TED REST API download endpoint - And correct download API parameters - When call to the API is made - Then a notice and notice metadata is received from the API - And the notice and notice metadata are stored + Scenario: Fetch notices, from TED, for a date + Given a date + And knowing the TED API endpoint + And knowing database endpoint + When notice fetching by date wildcard is executed + Then a list of fetched notice_ids is returned + And a daily notice-batch (supra-notice) is created containing the fetched notice_ids list + And the database contains notices with these Ids, including the XML manifestation, and the metadata - Scenario: Fail to fetch a TED notice - Given a TED REST API download endpoint - And incorrect download API parameters - When the call to the API is made - And no notice or metadata is returned - Then an error message is received indicating the problem + Scenario: Fetch a notice by id, from Ted + Given a notice_id + And knowing the TED API endpoint + And knowing database endpoint + When notice fetching by id is executed + Then fetched notice is available in database + And fetched notice have raw status + And fetched notice have xml_manifestation + And fetched notice have original_metadata + Scenario: Fetch notices by query, from Ted + Given a query + And knowing the TED API endpoint + And knowing database endpoint + When notices fetching by query is executed + Then a list of fetched notice_ids is returned + And foreach returned notice_id exist in database a notice with RAW status + And foreach returned notice_id exist in database a notice with xml_manifestation + And foreach returned notice_id exist in database a notice with original_metadata + + Scenario: Fetch notices by date range, from Ted + Given a start_date + And a end_date + And knowing the TED API endpoint + And knowing database endpoint + When notices fetching by date range is executed + Then a list of fetched notice_ids is returned + And foreach returned notice_id exist in database a notice with RAW status + And foreach returned notice_id exist in database a notice with xml_manifestation + And foreach returned notice_id exist in database a notice with original_metadata + + Scenario: Fetch notices by date wild card, from Ted + Given a wildcard_date + And knowing the TED API endpoint + And knowing database endpoint + When notices fetching by date wild card is executed + Then a list of fetched notice_ids is returned + And foreach returned notice_id exist in database a notice with RAW status + And foreach returned notice_id exist in database a notice with xml_manifestation + And foreach returned notice_id exist in database a notice with original_metadata diff --git a/tests/features/notice_fetcher/test_notice_fetcher.py b/tests/features/notice_fetcher/test_notice_fetcher.py index 5e5e42dd1..d1bbb6226 100644 --- a/tests/features/notice_fetcher/test_notice_fetcher.py +++ b/tests/features/notice_fetcher/test_notice_fetcher.py @@ -1,79 +1,201 @@ -import pytest -from _pytest._code import ExceptionInfo -from pytest_bdd import scenario, given, when, then +"""Notice fetcher feature tests.""" -from ted_sws.core.model.notice import Notice +from pytest_bdd import ( + given, + scenario, + then, + when, +) + +from datetime import date, datetime + +from ted_sws.core.model.notice import Notice, NoticeStatus from ted_sws.notice_fetcher.adapters.ted_api import TedAPIAdapter, TedRequestAPI from ted_sws.notice_fetcher.services.notice_fetcher import NoticeFetcher -@scenario('test_notice_fetcher.feature', 'Fetch a TED notice') -def test_fetch_a_ted_notice(): - """Fetch a TED notice""" +@scenario('test_notice_fetcher.feature', 'Fetch a notice by id, from Ted') +def test_fetch_a_notice_by_id_from_ted(): + """Fetch a notice by id, from Ted.""" + + +@scenario('test_notice_fetcher.feature', 'Fetch notices by date range, from Ted') +def test_fetch_notices_by_date_range_from_ted(): + """Fetch notices by date range, from Ted.""" + + +@scenario('test_notice_fetcher.feature', 'Fetch notices by date wild card, from Ted') +def test_fetch_notices_by_date_wild_card_from_ted(): + """Fetch notices by date wild card, from Ted.""" + + +@scenario('test_notice_fetcher.feature', 'Fetch notices by query, from Ted') +def test_fetch_notices_by_query_from_ted(): + """Fetch notices by query, from Ted.""" + + +@scenario('test_notice_fetcher.feature', 'Fetch notices, from TED, for a date') +def test_fetch_notices_from_ted_for_a_date(): + """Fetch notices, from TED, for a date.""" + + +@given('a date') +def a_date(fetch_date): + """a date.""" + assert type(fetch_date) == date + + +@given('a end_date') +def a_end_date(fetch_end_date): + """a end_date.""" + assert type(fetch_end_date) == date + + +@given('a notice_id') +def a_notice_id(notice_id): + """a notice_id.""" + assert type(notice_id) == str + + +@given('a query') +def a_query(fetch_query): + """a query.""" + assert type(fetch_query) == dict + + +@given('a start_date') +def a_start_date(fetch_start_date): + """a start_date.""" + assert type(fetch_start_date) == date + + +@given('a wildcard_date') +def a_wildcard_date(fetch_wildcard_date): + """a wildcard_date.""" + assert type(fetch_wildcard_date) == str + date_filter = datetime.strptime(fetch_wildcard_date, "%Y%m%d*").date() + assert type(date_filter) == date + + +@given('knowing database endpoint') +def knowing_database_endpoint(mongodb_end_point): + """knowing database endpoint.""" + assert mongodb_end_point is not None + assert type(mongodb_end_point) == str + + +@given('knowing the TED API endpoint') +def knowing_the_ted_api_endpoint(ted_api_end_point): + """knowing the TED API endpoint.""" + assert ted_api_end_point is not None + assert type(ted_api_end_point) == str + + +@when('notice fetching by date wildcard is executed', target_fixture="fetched_notice_ids") +def notice_fetching_by_date_wildcard_is_executed(notice_fetcher, fetch_wildcard_date): + """notice fetching by date wildcard is executed.""" + notice_ids = notice_fetcher.fetch_notices_by_date_wild_card(wildcard_date=fetch_wildcard_date) + return notice_ids + + +@when('notice fetching by id is executed') +def notice_fetching_by_id_is_executed(notice_fetcher, fetch_notice_id, notice_repository): + """notice fetching by id is executed.""" + notice_fetcher.fetch_notice_by_id(document_id=fetch_notice_id) + + +@when('notices fetching by date range is executed', target_fixture="fetched_notice_ids") +def notices_fetching_by_date_range_is_executed(notice_fetcher, fetch_start_date, fetch_end_date): + """notices fetching by date range is executed.""" + notice_ids = notice_fetcher.fetch_notices_by_date_range(start_date=fetch_start_date, end_date=fetch_end_date) + return notice_ids + + +@when('notices fetching by date wild card is executed', target_fixture="fetched_notice_ids") +def notices_fetching_by_date_wild_card_is_executed(notice_fetcher, fetch_wildcard_date): + """notices fetching by date wild card is executed.""" + notice_ids = notice_fetcher.fetch_notices_by_date_wild_card(wildcard_date=fetch_wildcard_date) + return notice_ids + + +@when('notices fetching by query is executed', target_fixture="fetched_notice_ids") +def notices_fetching_by_query_is_executed(notice_fetcher, fetch_query): + """notices fetching by query is executed.""" + notice_ids = notice_fetcher.fetch_notices_by_query(query=fetch_query) + return notice_ids -@given("a TED REST API download endpoint", target_fixture="api_url") -def step_impl(api_end_point): - return api_end_point +@then('a daily notice-batch (supra-notice) is created containing the fetched notice_ids list') +def a_daily_noticebatch_supranotice_is_created_containing_the_fetched_notice_ids_list(): + """a daily notice-batch (supra-notice) is created containing the fetched notice_ids list.""" + raise NotImplementedError -@given("correct download API parameters") -def step_impl(notice_search_query): - return notice_search_query +@then('a list of fetched notice_ids is returned') +def a_list_of_fetched_notice_ids_is_returned(fetched_notice_ids): + """a list of fetched notice_ids is returned.""" + assert fetched_notice_ids is not None + assert type(fetched_notice_ids) == list + for notice_id in fetched_notice_ids: + assert type(notice_id) == str -@when("call to the API is made", target_fixture="notice_storage") -def step_impl(notice_search_query, api_end_point, notice_storage): - NoticeFetcher(notice_repository=notice_storage, - ted_api_adapter=TedAPIAdapter(request_api=TedRequestAPI(), ted_api_url=api_end_point)).fetch_notices_by_query( - query=notice_search_query) - return notice_storage +@then('fetched notice have original_metadata') +def fetched_notice_have_original_metadata(fetched_notice: Notice): + """fetched notice have original_metadata.""" + assert fetched_notice.original_metadata -@then("a notice and notice metadata is received from the API", target_fixture="notice_storage") -def step_impl(notice_storage): - notices = list(notice_storage.list()) - assert isinstance(notices, list) - assert len(notices) > 0 - notice = notices[0] - assert isinstance(notice, Notice) - assert notice.xml_manifestation - assert notice.original_metadata - return notice_storage +@then('fetched notice have raw status') +def fetched_notice_have_raw_status(fetched_notice: Notice): + """fetched notice have raw status.""" + assert fetched_notice.status == NoticeStatus.RAW -@then("the notice and notice metadata are stored") -def step_impl(notice_storage, notice_identifier): - assert notice_storage.get(reference=notice_identifier) - assert notice_storage.get(reference=notice_identifier).original_metadata - assert notice_storage.get(reference=notice_identifier).xml_manifestation +@then('fetched notice have xml_manifestation') +def fetched_notice_have_xml_manifestation(fetched_notice: Notice): + """fetched notice have xml_manifestation.""" + assert fetched_notice.xml_manifestation + assert fetched_notice.xml_manifestation.object_data -@scenario('test_notice_fetcher.feature', 'Fail to fetch a TED notice') -def test_fail_to_fetch_a_ted_notice(): - """Fail to fetch a TED notice""" +@then('fetched notice is available in database', target_fixture="fetched_notice") +def fetched_notice_is_available_in_database(fetch_notice_id, notice_repository): + """fetched notice is available in database.""" + result_notice = notice_repository.get(reference=fetch_notice_id) + assert result_notice + return result_notice -@given("incorrect download API parameters") -def step_impl(notice_incorrect_search_query): - return notice_incorrect_search_query +@then('foreach returned notice_id exist in database a notice with RAW status') +def foreach_returned_notice_id_exist_in_database_a_notice_with_raw_status(fetched_notice_ids, notice_repository): + """foreach returned notice_id exist in database a notice with RAW status.""" + for notice_id in fetched_notice_ids: + notice = notice_repository.get(reference=notice_id) + assert notice + assert notice.status == NoticeStatus.RAW -@when("the call to the API is made", target_fixture="api_call_message") -def step_impl(notice_incorrect_search_query, api_end_point, notice_storage): - with pytest.raises(Exception) as e: - NoticeFetcher(notice_repository=notice_storage, - ted_api_adapter=TedAPIAdapter(request_api=TedRequestAPI(), ted_api_url=api_end_point)).fetch_notices_by_query( - query=notice_incorrect_search_query) - return e +@then('foreach returned notice_id exist in database a notice with original_metadata') +def foreach_returned_notice_id_exist_in_database_a_notice_with_original_metadata(fetched_notice_ids, notice_repository): + """foreach returned notice_id exist in database a notice with original_metadata.""" + for notice_id in fetched_notice_ids: + notice = notice_repository.get(reference=notice_id) + assert notice + assert notice.original_metadata -@when("no notice or metadata is returned") -def step_impl(notice_storage, notice_identifier): - assert notice_storage.get(notice_identifier) is None +@then('foreach returned notice_id exist in database a notice with xml_manifestation') +def foreach_returned_notice_id_exist_in_database_a_notice_with_xml_manifestation(fetched_notice_ids, notice_repository): + """foreach returned notice_id exist in database a notice with xml_manifestation.""" + for notice_id in fetched_notice_ids: + notice = notice_repository.get(reference=notice_id) + assert notice + assert notice.xml_manifestation + assert notice.xml_manifestation.object_data -@then("an error message is received indicating the problem") -def step_impl(api_call_message): - assert isinstance(api_call_message, ExceptionInfo) - assert str(api_call_message.value) == "The API call failed with: " +@then('the database contains notices with these Ids, including the XML manifestation, and the metadata') +def the_database_contains_notices_with_these_ids_including_the_xml_manifestation_and_the_metadata(): + """the database contains notices with these Ids, including the XML manifestation, and the metadata.""" + raise NotImplementedError diff --git a/tests/features/notice_metadata_processor/__init__.py b/tests/features/notice_metadata_processor/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/features/notice_metadata_processor/test_notice_eligiblity.feature b/tests/features/notice_metadata_processor/test_notice_eligiblity.feature new file mode 100644 index 000000000..e3076e406 --- /dev/null +++ b/tests/features/notice_metadata_processor/test_notice_eligiblity.feature @@ -0,0 +1,6 @@ +# Created by Stefan at 18.08.2022 +Feature: # Enter feature name here + # Enter feature description here + + Scenario: # Enter scenario name here + # Enter steps here \ No newline at end of file diff --git a/tests/features/notice_transformer/__init__.py b/tests/features/notice_transformer/__init__.py index 00c67d91a..e69de29bb 100644 --- a/tests/features/notice_transformer/__init__.py +++ b/tests/features/notice_transformer/__init__.py @@ -1,8 +0,0 @@ -#!/usr/bin/python3 - -# __init__.py -# Date: 03/02/2022 -# Author: Eugeniu Costetchi -# Email: costezki.eugen@gmail.com - -""" """ \ No newline at end of file diff --git a/tests/features/notice_transformer/test_notice_transformer.feature b/tests/features/notice_transformer/test_notice_transformer.feature new file mode 100644 index 000000000..866356134 --- /dev/null +++ b/tests/features/notice_transformer/test_notice_transformer.feature @@ -0,0 +1,14 @@ +# Created by Stefan at 16.08.2022 +Feature: Notice transformer + The system is able to transform a notice from XML format in RDF format + + Scenario: Transform a TED notice + Given a notice Id available in the database with form number F03 + And Stefan has a banana + And a mapping suite for F03 available in the database + And the notice status is ELIGIBLE_FOR_TRANSFORMATION + And knowing database endpoint + When the notice transformation is executed + Then RDF notice manifestation is available in the database + And the notice status is TRANSFORMED + diff --git a/tests/features/notice_transformer/test_notice_transformer.py b/tests/features/notice_transformer/test_notice_transformer.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/features/notice_transformer/test_notice_transformer_bla_b.py b/tests/features/notice_transformer/test_notice_transformer_bla_b.py new file mode 100644 index 000000000..1188f40fb --- /dev/null +++ b/tests/features/notice_transformer/test_notice_transformer_bla_b.py @@ -0,0 +1,3 @@ +from pytest_bdd import scenario, given, when, then + + diff --git a/tests/features/notice_validator/__init__.py b/tests/features/notice_validator/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/features/notice_validator/test_notice_validator.feature b/tests/features/notice_validator/test_notice_validator.feature new file mode 100644 index 000000000..a6a71cde7 --- /dev/null +++ b/tests/features/notice_validator/test_notice_validator.feature @@ -0,0 +1,6 @@ +# Created by Stefan at 16.08.2022 +Feature: # Enter feature name here + # Enter feature description here + + Scenario: # Enter scenario name here + # Enter steps here \ No newline at end of file diff --git a/tests/old_features/__init__.py b/tests/old_features/__init__.py new file mode 100644 index 000000000..54e38cdef --- /dev/null +++ b/tests/old_features/__init__.py @@ -0,0 +1,19 @@ +#!/usr/bin/python3 + +# __init__.py +# Date: 03/02/2022 +# Author: Eugeniu Costetchi +# Email: costezki.eugen@gmail.com + +""" """ + + +def str2bool(value: str) -> bool: + """ + Parse a string value and cast it into its boolean value + :param value: + :return: + """ + if value in ["y", "yes", "t", "true", "on", "1"]: return True + if value in ["n", "no", "f", "false", "off", "0"]: return False + raise ValueError("boolean value unrecognised") diff --git a/tests/old_features/conftest.py b/tests/old_features/conftest.py new file mode 100644 index 000000000..4242ec3ae --- /dev/null +++ b/tests/old_features/conftest.py @@ -0,0 +1,60 @@ +import pytest +from pymongo import MongoClient + +from ted_sws import config +from ted_sws.core.model.metadata import XMLMetadata +from ted_sws.data_manager.adapters.mapping_suite_repository import MappingSuiteRepositoryInFileSystem +from ted_sws.data_manager.adapters.notice_repository import NoticeRepository + +from ted_sws.notice_fetcher.adapters.ted_api import TedAPIAdapter, TedRequestAPI +from ted_sws.notice_fetcher.services.notice_fetcher import NoticeFetcher +from tests import TEST_DATA_PATH + +NOTICE_STORAGE_FEATURES_TEST_DB = "features_test_db_for_notice" + + +@pytest.fixture +def api_end_point(): + return config.TED_API_URL + + +@pytest.fixture +def notice_storage(): + url = config.MONGO_DB_AUTH_URL + mongodb_client = MongoClient(url) + mongodb_client.drop_database(NOTICE_STORAGE_FEATURES_TEST_DB) + return NoticeRepository(mongodb_client=mongodb_client, database_name=NOTICE_STORAGE_FEATURES_TEST_DB) + + +@pytest.fixture +def f03_notice_2020(notice_storage, api_end_point): + notice_search_query = {"q": "ND=[408313-2020]"} + NoticeFetcher(notice_repository=notice_storage, + ted_api_adapter=TedAPIAdapter(request_api=TedRequestAPI(), + ted_api_url=api_end_point)).fetch_notices_by_query( + query=notice_search_query) + notice = notice_storage.get(reference="408313-2020") + notice.set_xml_metadata(xml_metadata=XMLMetadata(unique_xpaths=["FAKE_INDEX_XPATHS"])) + return notice + + +@pytest.fixture +def f18_notice_2022(notice_storage, api_end_point): + notice_search_query = {"q": "ND=[067623-2022]"} + NoticeFetcher(notice_repository=notice_storage, + ted_api_adapter=TedAPIAdapter(request_api=TedRequestAPI(), + ted_api_url=api_end_point)).fetch_notices_by_query( + query=notice_search_query) + notice = notice_storage.get(reference="067623-2022") + notice.set_xml_metadata(xml_metadata=XMLMetadata(unique_xpaths=["FAKE_INDEX_XPATHS"])) + return notice + + +@pytest.fixture +def file_system_repository_path(): + return TEST_DATA_PATH / "notice_transformer" / "test_repository" + + +@pytest.fixture +def mapping_suite_repository_in_file_system(file_system_repository_path): + return MappingSuiteRepositoryInFileSystem(repository_path=file_system_repository_path) diff --git a/tests/features/metadata_normaliser/__init__.py b/tests/old_features/metadata_normaliser/__init__.py similarity index 100% rename from tests/features/metadata_normaliser/__init__.py rename to tests/old_features/metadata_normaliser/__init__.py diff --git a/tests/features/metadata_normaliser/conftest.py b/tests/old_features/metadata_normaliser/conftest.py similarity index 100% rename from tests/features/metadata_normaliser/conftest.py rename to tests/old_features/metadata_normaliser/conftest.py diff --git a/tests/features/metadata_normaliser/metadata_normaliser.feature b/tests/old_features/metadata_normaliser/metadata_normaliser.feature similarity index 100% rename from tests/features/metadata_normaliser/metadata_normaliser.feature rename to tests/old_features/metadata_normaliser/metadata_normaliser.feature diff --git a/tests/features/metadata_normaliser/notice_extractor.feature b/tests/old_features/metadata_normaliser/notice_extractor.feature similarity index 100% rename from tests/features/metadata_normaliser/notice_extractor.feature rename to tests/old_features/metadata_normaliser/notice_extractor.feature diff --git a/tests/features/metadata_normaliser/test_metadata_normaliser.py b/tests/old_features/metadata_normaliser/test_metadata_normaliser.py similarity index 100% rename from tests/features/metadata_normaliser/test_metadata_normaliser.py rename to tests/old_features/metadata_normaliser/test_metadata_normaliser.py diff --git a/tests/features/metadata_normaliser/test_notice_extractor.py b/tests/old_features/metadata_normaliser/test_notice_extractor.py similarity index 100% rename from tests/features/metadata_normaliser/test_notice_extractor.py rename to tests/old_features/metadata_normaliser/test_notice_extractor.py diff --git a/tests/features/model/__init__.py b/tests/old_features/model/__init__.py similarity index 100% rename from tests/features/model/__init__.py rename to tests/old_features/model/__init__.py diff --git a/tests/features/model/conftest.py b/tests/old_features/model/conftest.py similarity index 100% rename from tests/features/model/conftest.py rename to tests/old_features/model/conftest.py diff --git a/tests/features/model/test_creating_notice.feature b/tests/old_features/model/test_creating_notice.feature similarity index 100% rename from tests/features/model/test_creating_notice.feature rename to tests/old_features/model/test_creating_notice.feature diff --git a/tests/features/model/test_creating_notice.py b/tests/old_features/model/test_creating_notice.py similarity index 100% rename from tests/features/model/test_creating_notice.py rename to tests/old_features/model/test_creating_notice.py diff --git a/tests/features/model/test_notice_operations.feature b/tests/old_features/model/test_notice_operations.feature similarity index 100% rename from tests/features/model/test_notice_operations.feature rename to tests/old_features/model/test_notice_operations.feature diff --git a/tests/features/model/test_notice_operations.py b/tests/old_features/model/test_notice_operations.py similarity index 100% rename from tests/features/model/test_notice_operations.py rename to tests/old_features/model/test_notice_operations.py diff --git a/tests/features/notice_eligibility_checker/__init__.py b/tests/old_features/notice_eligibility_checker/__init__.py similarity index 100% rename from tests/features/notice_eligibility_checker/__init__.py rename to tests/old_features/notice_eligibility_checker/__init__.py diff --git a/tests/features/notice_eligibility_checker/notice_eligibility_rml_mapping.feature b/tests/old_features/notice_eligibility_checker/notice_eligibility_rml_mapping.feature similarity index 100% rename from tests/features/notice_eligibility_checker/notice_eligibility_rml_mapping.feature rename to tests/old_features/notice_eligibility_checker/notice_eligibility_rml_mapping.feature diff --git a/tests/features/notice_eligibility_checker/test_notice_eligibility.py b/tests/old_features/notice_eligibility_checker/test_notice_eligibility.py similarity index 100% rename from tests/features/notice_eligibility_checker/test_notice_eligibility.py rename to tests/old_features/notice_eligibility_checker/test_notice_eligibility.py diff --git a/tests/features/notice_eligibility_checker/test_notice_eligibility_validation_tests.feature b/tests/old_features/notice_eligibility_checker/test_notice_eligibility_validation_tests.feature similarity index 100% rename from tests/features/notice_eligibility_checker/test_notice_eligibility_validation_tests.feature rename to tests/old_features/notice_eligibility_checker/test_notice_eligibility_validation_tests.feature diff --git a/tests/features/notice_eligibility_checker/test_notice_ineligibility.py b/tests/old_features/notice_eligibility_checker/test_notice_ineligibility.py similarity index 100% rename from tests/features/notice_eligibility_checker/test_notice_ineligibility.py rename to tests/old_features/notice_eligibility_checker/test_notice_ineligibility.py diff --git a/tests/features/notice_normaliser/__init__.py b/tests/old_features/notice_fetcher/__init__.py similarity index 100% rename from tests/features/notice_normaliser/__init__.py rename to tests/old_features/notice_fetcher/__init__.py diff --git a/tests/old_features/notice_fetcher/conftest.py b/tests/old_features/notice_fetcher/conftest.py new file mode 100644 index 000000000..d80a02eb8 --- /dev/null +++ b/tests/old_features/notice_fetcher/conftest.py @@ -0,0 +1,18 @@ +import pytest + + +@pytest.fixture +def notice_identifier(): + return "067623-2022" + + +@pytest.fixture +def notice_search_query(): + return {"q": "ND=[067623-2022]"} + + +@pytest.fixture +def notice_incorrect_search_query(): + return {"q": "ND=067623-20224856"} + + diff --git a/tests/features/notice_fetcher/test_fetching_types.feature b/tests/old_features/notice_fetcher/test_fetching_types.feature similarity index 100% rename from tests/features/notice_fetcher/test_fetching_types.feature rename to tests/old_features/notice_fetcher/test_fetching_types.feature diff --git a/tests/features/notice_fetcher/test_fetching_types.py b/tests/old_features/notice_fetcher/test_fetching_types.py similarity index 100% rename from tests/features/notice_fetcher/test_fetching_types.py rename to tests/old_features/notice_fetcher/test_fetching_types.py diff --git a/tests/old_features/notice_fetcher/test_notice_fetcher.feature b/tests/old_features/notice_fetcher/test_notice_fetcher.feature new file mode 100644 index 000000000..d491c34e4 --- /dev/null +++ b/tests/old_features/notice_fetcher/test_notice_fetcher.feature @@ -0,0 +1,22 @@ +# Created by dude at 25/01/2022 +Feature: Notice fetcher + The system is able to fetch selected TED-XML notices together with their metadata + + Scenario: Fetch a TED notice + Given a TED REST API download endpoint + And correct download API parameters + When call to the API is made + Then a notice and notice metadata is received from the API + And the notice and notice metadata are stored + + Scenario: Fail to fetch a TED notice + Given a TED REST API download endpoint + And incorrect download API parameters + When the call to the API is made + And no notice or metadata is returned + Then an error message is received indicating the problem + + + + + diff --git a/tests/old_features/notice_fetcher/test_notice_fetcher.py b/tests/old_features/notice_fetcher/test_notice_fetcher.py new file mode 100644 index 000000000..5e5e42dd1 --- /dev/null +++ b/tests/old_features/notice_fetcher/test_notice_fetcher.py @@ -0,0 +1,79 @@ +import pytest +from _pytest._code import ExceptionInfo +from pytest_bdd import scenario, given, when, then + +from ted_sws.core.model.notice import Notice +from ted_sws.notice_fetcher.adapters.ted_api import TedAPIAdapter, TedRequestAPI +from ted_sws.notice_fetcher.services.notice_fetcher import NoticeFetcher + + +@scenario('test_notice_fetcher.feature', 'Fetch a TED notice') +def test_fetch_a_ted_notice(): + """Fetch a TED notice""" + + +@given("a TED REST API download endpoint", target_fixture="api_url") +def step_impl(api_end_point): + return api_end_point + + +@given("correct download API parameters") +def step_impl(notice_search_query): + return notice_search_query + + +@when("call to the API is made", target_fixture="notice_storage") +def step_impl(notice_search_query, api_end_point, notice_storage): + NoticeFetcher(notice_repository=notice_storage, + ted_api_adapter=TedAPIAdapter(request_api=TedRequestAPI(), ted_api_url=api_end_point)).fetch_notices_by_query( + query=notice_search_query) + return notice_storage + + +@then("a notice and notice metadata is received from the API", target_fixture="notice_storage") +def step_impl(notice_storage): + notices = list(notice_storage.list()) + assert isinstance(notices, list) + assert len(notices) > 0 + notice = notices[0] + assert isinstance(notice, Notice) + assert notice.xml_manifestation + assert notice.original_metadata + return notice_storage + + +@then("the notice and notice metadata are stored") +def step_impl(notice_storage, notice_identifier): + assert notice_storage.get(reference=notice_identifier) + assert notice_storage.get(reference=notice_identifier).original_metadata + assert notice_storage.get(reference=notice_identifier).xml_manifestation + + +@scenario('test_notice_fetcher.feature', 'Fail to fetch a TED notice') +def test_fail_to_fetch_a_ted_notice(): + """Fail to fetch a TED notice""" + + +@given("incorrect download API parameters") +def step_impl(notice_incorrect_search_query): + return notice_incorrect_search_query + + +@when("the call to the API is made", target_fixture="api_call_message") +def step_impl(notice_incorrect_search_query, api_end_point, notice_storage): + with pytest.raises(Exception) as e: + NoticeFetcher(notice_repository=notice_storage, + ted_api_adapter=TedAPIAdapter(request_api=TedRequestAPI(), ted_api_url=api_end_point)).fetch_notices_by_query( + query=notice_incorrect_search_query) + return e + + +@when("no notice or metadata is returned") +def step_impl(notice_storage, notice_identifier): + assert notice_storage.get(notice_identifier) is None + + +@then("an error message is received indicating the problem") +def step_impl(api_call_message): + assert isinstance(api_call_message, ExceptionInfo) + assert str(api_call_message.value) == "The API call failed with: " diff --git a/tests/features/notice_fetcher/test_search_queries.feature b/tests/old_features/notice_fetcher/test_search_queries.feature similarity index 100% rename from tests/features/notice_fetcher/test_search_queries.feature rename to tests/old_features/notice_fetcher/test_search_queries.feature diff --git a/tests/features/notice_fetcher/test_search_queries.py b/tests/old_features/notice_fetcher/test_search_queries.py similarity index 100% rename from tests/features/notice_fetcher/test_search_queries.py rename to tests/old_features/notice_fetcher/test_search_queries.py diff --git a/tests/features/notice_packager/__init__.py b/tests/old_features/notice_normaliser/__init__.py similarity index 100% rename from tests/features/notice_packager/__init__.py rename to tests/old_features/notice_normaliser/__init__.py diff --git a/tests/features/notice_normaliser/notice_normalizer.feature b/tests/old_features/notice_normaliser/notice_normalizer.feature similarity index 100% rename from tests/features/notice_normaliser/notice_normalizer.feature rename to tests/old_features/notice_normaliser/notice_normalizer.feature diff --git a/tests/features/transformation_validator/__init__.py b/tests/old_features/notice_packager/__init__.py similarity index 100% rename from tests/features/transformation_validator/__init__.py rename to tests/old_features/notice_packager/__init__.py diff --git a/tests/features/notice_packager/mets_metadata.feature b/tests/old_features/notice_packager/mets_metadata.feature similarity index 100% rename from tests/features/notice_packager/mets_metadata.feature rename to tests/old_features/notice_packager/mets_metadata.feature diff --git a/tests/features/notice_packager/notice_packager.feature b/tests/old_features/notice_packager/notice_packager.feature similarity index 100% rename from tests/features/notice_packager/notice_packager.feature rename to tests/old_features/notice_packager/notice_packager.feature diff --git a/tests/features/notice_packager/validate_package.feature b/tests/old_features/notice_packager/validate_package.feature similarity index 100% rename from tests/features/notice_packager/validate_package.feature rename to tests/old_features/notice_packager/validate_package.feature diff --git a/tests/old_features/notice_transformer/__init__.py b/tests/old_features/notice_transformer/__init__.py new file mode 100644 index 000000000..00c67d91a --- /dev/null +++ b/tests/old_features/notice_transformer/__init__.py @@ -0,0 +1,8 @@ +#!/usr/bin/python3 + +# __init__.py +# Date: 03/02/2022 +# Author: Eugeniu Costetchi +# Email: costezki.eugen@gmail.com + +""" """ \ No newline at end of file diff --git a/tests/features/notice_transformer/notice_transformer.feature b/tests/old_features/notice_transformer/notice_transformer.feature similarity index 100% rename from tests/features/notice_transformer/notice_transformer.feature rename to tests/old_features/notice_transformer/notice_transformer.feature diff --git a/tests/old_features/transformation_validator/__init__.py b/tests/old_features/transformation_validator/__init__.py new file mode 100644 index 000000000..00c67d91a --- /dev/null +++ b/tests/old_features/transformation_validator/__init__.py @@ -0,0 +1,8 @@ +#!/usr/bin/python3 + +# __init__.py +# Date: 03/02/2022 +# Author: Eugeniu Costetchi +# Email: costezki.eugen@gmail.com + +""" """ \ No newline at end of file diff --git a/tests/features/transformation_validator/validate_transformer.feature b/tests/old_features/transformation_validator/validate_transformer.feature similarity index 100% rename from tests/features/transformation_validator/validate_transformer.feature rename to tests/old_features/transformation_validator/validate_transformer.feature From 0ff790223e9a9e2fc4061b770515a6a3744d4f37 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Sun, 21 Aug 2022 22:31:56 +0300 Subject: [PATCH 02/65] Update notice_transformer.py --- ted_sws/notice_transformer/services/notice_transformer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ted_sws/notice_transformer/services/notice_transformer.py b/ted_sws/notice_transformer/services/notice_transformer.py index 3eec26d3d..0decfb754 100644 --- a/ted_sws/notice_transformer/services/notice_transformer.py +++ b/ted_sws/notice_transformer/services/notice_transformer.py @@ -1,4 +1,3 @@ -import abc import tempfile from pathlib import Path From 8cd118d840dcf1864fd4d95369a6f832313b0625 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Sun, 21 Aug 2022 22:32:00 +0300 Subject: [PATCH 03/65] Update conftest.py --- tests/features/conftest.py | 46 +++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/tests/features/conftest.py b/tests/features/conftest.py index a825f44b4..49067cd37 100644 --- a/tests/features/conftest.py +++ b/tests/features/conftest.py @@ -1,8 +1,11 @@ -import mongomock import pymongo import pytest from ted_sws import config +from ted_sws.core.model.manifestation import METSManifestation, RDFManifestation, SHACLTestSuiteValidationReport, \ + SPARQLTestSuiteValidationReport +from ted_sws.core.model.metadata import NormalisedMetadata +from ted_sws.core.model.notice import NoticeStatus, Notice from ted_sws.data_manager.adapters.notice_repository import NoticeRepository NOTICE_STORAGE_FEATURES_TEST_DB = "features_test_db_for_notice" @@ -26,3 +29,44 @@ def ted_api_end_point(): @pytest.fixture def notice_repository(mongodb_client): return NoticeRepository(mongodb_client=mongodb_client, database_name=NOTICE_STORAGE_FEATURES_TEST_DB) + + +@pytest.fixture +def notice_id(notice_2020): + return notice_2020.ted_id + + +@pytest.fixture +def fetched_notice_data(notice_2020): + ted_id = notice_2020.ted_id + original_metadata = notice_2020.original_metadata + xml_manifestation = notice_2020.xml_manifestation + return ted_id, original_metadata, xml_manifestation + + +@pytest.fixture(scope="function") +def publicly_available_notice(fetched_notice_data, normalised_metadata_dict) -> Notice: + ted_id, original_metadata, xml_manifestation = fetched_notice_data + sparql_validation = SPARQLTestSuiteValidationReport(object_data="This is validation report!", + test_suite_identifier="sparql_test_id", + mapping_suite_identifier="mapping_suite_id", + validation_results=[]) + shacl_validation = SHACLTestSuiteValidationReport(object_data="This is validation report!", + test_suite_identifier="shacl_test_id", + mapping_suite_identifier="mapping_suite_id", + validation_results=[]) + notice = Notice(ted_id=ted_id, original_metadata=original_metadata, + xml_manifestation=xml_manifestation) + notice._rdf_manifestation = RDFManifestation(object_data="RDF manifestation content", + shacl_validations=[shacl_validation], + sparql_validations=[sparql_validation] + ) + notice._distilled_rdf_manifestation = RDFManifestation(object_data="RDF manifestation content", + shacl_validations=[shacl_validation], + sparql_validations=[sparql_validation] + ) + notice._mets_manifestation = METSManifestation(object_data="METS manifestation content") + notice._normalised_metadata = NormalisedMetadata(**normalised_metadata_dict) + notice._preprocessed_xml_manifestation = xml_manifestation + notice._status = NoticeStatus.PUBLICLY_AVAILABLE + return notice From fe07d4983673bd2f9669f122cb72a9a37da5ff8b Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Sun, 21 Aug 2022 22:32:03 +0300 Subject: [PATCH 04/65] Update conftest.py --- tests/features/notice_fetcher/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/features/notice_fetcher/conftest.py b/tests/features/notice_fetcher/conftest.py index aba1394bd..76eac5f70 100644 --- a/tests/features/notice_fetcher/conftest.py +++ b/tests/features/notice_fetcher/conftest.py @@ -8,7 +8,7 @@ @pytest.fixture def fetch_notice_id(): - return "No notice ID" + return "408313-2020" @pytest.fixture From 4ae7c99dbc3bb925e32444192b21bcc60239ab46 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Sun, 21 Aug 2022 22:32:10 +0300 Subject: [PATCH 05/65] Update test_notice_fetcher.feature --- .../features/notice_fetcher/test_notice_fetcher.feature | 9 --------- 1 file changed, 9 deletions(-) diff --git a/tests/features/notice_fetcher/test_notice_fetcher.feature b/tests/features/notice_fetcher/test_notice_fetcher.feature index d8f43bf59..9d1bba2e8 100644 --- a/tests/features/notice_fetcher/test_notice_fetcher.feature +++ b/tests/features/notice_fetcher/test_notice_fetcher.feature @@ -2,15 +2,6 @@ Feature: Notice fetcher The system is able to fetch selected TED-XML notices together with their metadata - Scenario: Fetch notices, from TED, for a date - Given a date - And knowing the TED API endpoint - And knowing database endpoint - When notice fetching by date wildcard is executed - Then a list of fetched notice_ids is returned - And a daily notice-batch (supra-notice) is created containing the fetched notice_ids list - And the database contains notices with these Ids, including the XML manifestation, and the metadata - Scenario: Fetch a notice by id, from Ted Given a notice_id And knowing the TED API endpoint From d38625394e4f4c444ab330e3e90453b6a0034ec3 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Sun, 21 Aug 2022 22:32:13 +0300 Subject: [PATCH 06/65] Update test_notice_fetcher.py --- .../notice_fetcher/test_notice_fetcher.py | 23 ++----------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/tests/features/notice_fetcher/test_notice_fetcher.py b/tests/features/notice_fetcher/test_notice_fetcher.py index d1bbb6226..9224237a2 100644 --- a/tests/features/notice_fetcher/test_notice_fetcher.py +++ b/tests/features/notice_fetcher/test_notice_fetcher.py @@ -1,5 +1,7 @@ """Notice fetcher feature tests.""" +from datetime import date, datetime + from pytest_bdd import ( given, scenario, @@ -7,11 +9,7 @@ when, ) -from datetime import date, datetime - from ted_sws.core.model.notice import Notice, NoticeStatus -from ted_sws.notice_fetcher.adapters.ted_api import TedAPIAdapter, TedRequestAPI -from ted_sws.notice_fetcher.services.notice_fetcher import NoticeFetcher @scenario('test_notice_fetcher.feature', 'Fetch a notice by id, from Ted') @@ -34,11 +32,6 @@ def test_fetch_notices_by_query_from_ted(): """Fetch notices by query, from Ted.""" -@scenario('test_notice_fetcher.feature', 'Fetch notices, from TED, for a date') -def test_fetch_notices_from_ted_for_a_date(): - """Fetch notices, from TED, for a date.""" - - @given('a date') def a_date(fetch_date): """a date.""" @@ -125,12 +118,6 @@ def notices_fetching_by_query_is_executed(notice_fetcher, fetch_query): return notice_ids -@then('a daily notice-batch (supra-notice) is created containing the fetched notice_ids list') -def a_daily_noticebatch_supranotice_is_created_containing_the_fetched_notice_ids_list(): - """a daily notice-batch (supra-notice) is created containing the fetched notice_ids list.""" - raise NotImplementedError - - @then('a list of fetched notice_ids is returned') def a_list_of_fetched_notice_ids_is_returned(fetched_notice_ids): """a list of fetched notice_ids is returned.""" @@ -193,9 +180,3 @@ def foreach_returned_notice_id_exist_in_database_a_notice_with_xml_manifestation assert notice assert notice.xml_manifestation assert notice.xml_manifestation.object_data - - -@then('the database contains notices with these Ids, including the XML manifestation, and the metadata') -def the_database_contains_notices_with_these_ids_including_the_xml_manifestation_and_the_metadata(): - """the database contains notices with these Ids, including the XML manifestation, and the metadata.""" - raise NotImplementedError From fdf0cb0ba194eed30c615c41df9b404e7ba37203 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Sun, 21 Aug 2022 22:32:16 +0300 Subject: [PATCH 07/65] Create __init__.py --- tests/features/notice_packager/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/features/notice_packager/__init__.py diff --git a/tests/features/notice_packager/__init__.py b/tests/features/notice_packager/__init__.py new file mode 100644 index 000000000..e69de29bb From 02ee7bb334b84afb82915da328b4f59c36044061 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Sun, 21 Aug 2022 22:32:18 +0300 Subject: [PATCH 08/65] Create conftest.py --- tests/features/notice_packager/conftest.py | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 tests/features/notice_packager/conftest.py diff --git a/tests/features/notice_packager/conftest.py b/tests/features/notice_packager/conftest.py new file mode 100644 index 000000000..82dc81f1f --- /dev/null +++ b/tests/features/notice_packager/conftest.py @@ -0,0 +1,10 @@ +import pytest + +from ted_sws.core.model.notice import NoticeStatus, Notice + + +@pytest.fixture(scope="function") +def package_eligible_notice(publicly_available_notice) -> Notice: + notice = publicly_available_notice + notice.update_status_to(NoticeStatus.ELIGIBLE_FOR_PACKAGING) + return notice From 7c4762555448e698777af1bb6144b66620077bae Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Sun, 21 Aug 2022 22:32:22 +0300 Subject: [PATCH 09/65] Create test_notice_packager.feature --- .../notice_packager/test_notice_packager.feature | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 tests/features/notice_packager/test_notice_packager.feature diff --git a/tests/features/notice_packager/test_notice_packager.feature b/tests/features/notice_packager/test_notice_packager.feature new file mode 100644 index 000000000..94002da58 --- /dev/null +++ b/tests/features/notice_packager/test_notice_packager.feature @@ -0,0 +1,10 @@ +# Created by Stefan at 18.08.2022 +Feature: Notice packager + The system is able to pack TED-RDF notices in METS packages that are described with the metadata fetched from TED. + + Scenario: Package a TED notice in a METS package + Given a notice + And the notice status is ELIGIBLE_FOR_PACKAGING + When the notice packaging is executed + Then the notice have METS manifestation + And the notice status is PACKAGED From 73068b09c669b59f184e729a1af4991a822f559e Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Sun, 21 Aug 2022 22:32:24 +0300 Subject: [PATCH 10/65] Create test_notice_packager.py --- .../notice_packager/test_notice_packager.py | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 tests/features/notice_packager/test_notice_packager.py diff --git a/tests/features/notice_packager/test_notice_packager.py b/tests/features/notice_packager/test_notice_packager.py new file mode 100644 index 000000000..edc3b8428 --- /dev/null +++ b/tests/features/notice_packager/test_notice_packager.py @@ -0,0 +1,53 @@ +"""Notice packager feature tests.""" + +from pytest_bdd import ( + given, + scenario, + then, + when, +) + +from ted_sws.core.model.manifestation import METSManifestation +from ted_sws.core.model.notice import Notice, NoticeStatus +from ted_sws.notice_packager.services.notice_packager import create_notice_package + + +@scenario('test_notice_packager.feature', 'Package a TED notice in a METS package') +def test_package_a_ted_notice_in_a_mets_package(): + """Package a TED notice in a METS package.""" + + +@given('a notice') +def a_notice(package_eligible_notice): + """a notice.""" + assert package_eligible_notice + assert isinstance(package_eligible_notice, Notice) + + +@given('the notice status is ELIGIBLE_FOR_PACKAGING') +def the_notice_status_is_eligible_for_packaging(package_eligible_notice): + """the notice status is ELIGIBLE_FOR_PACKAGING.""" + assert package_eligible_notice.status == NoticeStatus.ELIGIBLE_FOR_PACKAGING + + +@when('the notice packaging is executed', target_fixture="packaged_notice") +def the_notice_packaging_is_executed(package_eligible_notice): + """the notice packaging is executed.""" + rdf_content = package_eligible_notice.distilled_rdf_manifestation.object_data.encode("utf-8") + mets_manifestation_content = create_notice_package(in_data=package_eligible_notice, rdf_content=rdf_content) + package_eligible_notice.set_mets_manifestation( + mets_manifestation=METSManifestation(object_data=mets_manifestation_content)) + return package_eligible_notice + + +@then('the notice have METS manifestation') +def the_notice_have_mets_manifestation(packaged_notice: Notice): + """the notice have METS manifestation.""" + assert packaged_notice.mets_manifestation + assert packaged_notice.mets_manifestation.object_data + + +@then('the notice status is PACKAGED') +def the_notice_status_is_packaged(packaged_notice: Notice): + """the notice status is PACKAGED.""" + assert packaged_notice.status == NoticeStatus.PACKAGED From d79c78b812baeedc3f391dee882fb8324ae78c8b Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Sun, 21 Aug 2022 22:32:27 +0300 Subject: [PATCH 11/65] Create __init__.py --- tests/features/notice_publisher/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/features/notice_publisher/__init__.py diff --git a/tests/features/notice_publisher/__init__.py b/tests/features/notice_publisher/__init__.py new file mode 100644 index 000000000..e69de29bb From 29f95dcc04dfde4fb78a583448642fc0d44db2ce Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Sun, 21 Aug 2022 22:32:30 +0300 Subject: [PATCH 12/65] Create conftest.py --- tests/features/notice_publisher/conftest.py | 45 +++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 tests/features/notice_publisher/conftest.py diff --git a/tests/features/notice_publisher/conftest.py b/tests/features/notice_publisher/conftest.py new file mode 100644 index 000000000..f9af13b68 --- /dev/null +++ b/tests/features/notice_publisher/conftest.py @@ -0,0 +1,45 @@ +import base64 + +import mongomock +import pymongo +import pytest + +from ted_sws import config +from ted_sws.core.model.manifestation import METSManifestation +from ted_sws.core.model.notice import NoticeStatus, Notice +from ted_sws.data_manager.adapters.notice_repository import NoticeRepository + + +@pytest.fixture +def sftp_remote_folder_path(): + return config.SFTP_PATH + + +@pytest.fixture(scope="function") +@mongomock.patch(servers=(('server.example.com', 27017),)) +def mongodb_client(): + mongo_client = pymongo.MongoClient('server.example.com') + for database_name in mongo_client.list_database_names(): + mongo_client.drop_database(database_name) + return mongo_client + + +@pytest.fixture(scope="function") +def publish_eligible_notice(publicly_available_notice) -> Notice: + notice = publicly_available_notice + notice.update_status_to(NoticeStatus.ELIGIBLE_FOR_PUBLISHING) + notice._mets_manifestation = METSManifestation( + object_data=base64.b64encode("METS manifestation content".encode("utf-8"))) + return notice + + +@pytest.fixture(scope="function") +def notice_repository(mongodb_client, publish_eligible_notice): + notice_repository = NoticeRepository(mongodb_client=mongodb_client) + notice_repository.add(notice=publish_eligible_notice) + return notice_repository + + +@pytest.fixture +def sftp_endpoint(): + return config.SFTP_HOST From b53ab7a8efc3e78e18a2e81113f78580abf02d69 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Sun, 21 Aug 2022 22:32:35 +0300 Subject: [PATCH 13/65] Create test_notice_publisher.feature --- .../test_notice_publisher.feature | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 tests/features/notice_publisher/test_notice_publisher.feature diff --git a/tests/features/notice_publisher/test_notice_publisher.feature b/tests/features/notice_publisher/test_notice_publisher.feature new file mode 100644 index 000000000..b26da6550 --- /dev/null +++ b/tests/features/notice_publisher/test_notice_publisher.feature @@ -0,0 +1,21 @@ +# Created by Stefan at 19.08.2022 +Feature: Notice publisher + The system is able to publish notice METS manifestation to SFTP endpoint. + + + Scenario: Publish notice + Given a notice + And knowing the SFTP endpoint + And the notice is eligible for publishing + When the notice publication is executed + Then the METS package available in a shared SFTP drive + And the notice status is PUBLISHED + + Scenario: Publish notice by id + Given a notice id + And a notice repository + And knowing the SFTP endpoint + And the notice is eligible for publishing + When the notice publication by id is executed + Then the METS package available in a shared SFTP drive + And the notice status is PUBLISHED \ No newline at end of file From 0961362c477a10c2dac7a7f816a4d41b06ecad20 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Sun, 21 Aug 2022 22:32:39 +0300 Subject: [PATCH 14/65] Create test_notice_publisher.py --- .../notice_publisher/test_notice_publisher.py | 91 +++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 tests/features/notice_publisher/test_notice_publisher.py diff --git a/tests/features/notice_publisher/test_notice_publisher.py b/tests/features/notice_publisher/test_notice_publisher.py new file mode 100644 index 000000000..f479c262f --- /dev/null +++ b/tests/features/notice_publisher/test_notice_publisher.py @@ -0,0 +1,91 @@ +"""Notice publisher feature tests.""" + +from pytest_bdd import ( + given, + scenario, + then, + when, +) + +from ted_sws.core.model.notice import Notice, NoticeStatus +from ted_sws.data_manager.adapters.repository_abc import NoticeRepositoryABC +from ted_sws.notice_publisher.adapters.sftp_notice_publisher import SFTPPublisher +from ted_sws.notice_publisher.adapters.sftp_publisher_abc import SFTPPublisherABC +from ted_sws.notice_publisher.services.notice_publisher import publish_notice, publish_notice_by_id + + +@scenario('test_notice_publisher.feature', 'Publish notice') +def test_publish_notice(): + """Publish notice.""" + + +@scenario('test_notice_publisher.feature', 'Publish notice by id') +def test_publish_notice_by_id(): + """Publish notice by id.""" + + +@given('a notice') +def a_notice(publish_eligible_notice): + """a notice.""" + assert publish_eligible_notice + assert isinstance(publish_eligible_notice, Notice) + + +@given('a notice id', target_fixture="publish_notice_id") +def a_notice_id(publish_eligible_notice): + """a notice id.""" + assert publish_eligible_notice + assert isinstance(publish_eligible_notice, Notice) + assert publish_eligible_notice.ted_id + assert type(publish_eligible_notice.ted_id) == str + return publish_eligible_notice.ted_id + + +@given('a notice repository') +def a_notice_repository(notice_repository): + """a notice repository.""" + assert notice_repository + assert isinstance(notice_repository, NoticeRepositoryABC) + + +@given('knowing the SFTP endpoint') +def knowing_the_sftp_endpoint(sftp_endpoint): + """knowing the SFTP endpoint.""" + assert sftp_endpoint + assert type(sftp_endpoint) == str + + +@given('the notice is eligible for publishing') +def the_notice_is_eligible_for_publishing(publish_eligible_notice): + """the notice is eligible for publishing.""" + assert publish_eligible_notice.status == NoticeStatus.ELIGIBLE_FOR_PUBLISHING + + +@when('the notice publication is executed', target_fixture="published_notice") +def the_notice_publication_is_executed(publish_eligible_notice): + """the notice publication is executed.""" + publish_notice(notice=publish_eligible_notice) + return publish_eligible_notice + + +@when('the notice publication by id is executed', target_fixture="published_notice") +def the_notice_publication_by_id_is_executed(publish_notice_id, notice_repository): + """the notice publication by id is executed.""" + publish_notice_by_id(notice_id=publish_notice_id, notice_repository=notice_repository) + return notice_repository.get(reference=publish_notice_id) + + +@then('the METS package available in a shared SFTP drive') +def the_mets_package_available_in_a_shared_sftp_drive(published_notice: Notice, sftp_remote_folder_path): + """the METS package available in a shared SFTP drive.""" + publisher: SFTPPublisher = SFTPPublisher() + remote_notice_path = f"{sftp_remote_folder_path}/{published_notice.ted_id}.zip" + publisher.connect() + assert publisher.connection.exists(remotepath=remote_notice_path) + publisher.disconnect() + + +@then('the notice status is PUBLISHED') +def the_notice_status_is_published(published_notice: Notice): + """the notice status is PUBLISHED.""" + assert published_notice.status == NoticeStatus.PUBLISHED From 1c6c24c974478fc155280125427dd59fa1562d73 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Sun, 21 Aug 2022 22:32:42 +0300 Subject: [PATCH 15/65] Create conftest.py --- tests/features/notice_transformer/conftest.py | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 tests/features/notice_transformer/conftest.py diff --git a/tests/features/notice_transformer/conftest.py b/tests/features/notice_transformer/conftest.py new file mode 100644 index 000000000..db02e3c78 --- /dev/null +++ b/tests/features/notice_transformer/conftest.py @@ -0,0 +1,69 @@ +from pathlib import Path + +import mongomock +import pymongo +import pytest + +from ted_sws import config +from ted_sws.core.model.manifestation import XMLManifestation, RDFManifestation, METSManifestation, \ + SPARQLTestSuiteValidationReport, SHACLTestSuiteValidationReport +from ted_sws.core.model.metadata import TEDMetadata, NormalisedMetadata +from ted_sws.core.model.notice import Notice, NoticeStatus +from ted_sws.core.model.transform import MappingSuite +from ted_sws.data_manager.adapters.mapping_suite_repository import MappingSuiteRepositoryMongoDB, \ + MappingSuiteRepositoryInFileSystem +from ted_sws.data_manager.adapters.notice_repository import NoticeRepository +from ted_sws.notice_transformer.adapters.rml_mapper import RMLMapper, SerializationFormat +from tests import TEST_DATA_PATH +from tests.fakes.fake_rml_mapper import FakeRMLMapper + + +@pytest.fixture +def fake_repository_path() -> Path: + return TEST_DATA_PATH / "notice_transformer" / "test_repository" + + +@pytest.fixture +def mapping_suite_id() -> str: + return "test_package" + + +@pytest.fixture +def mapping_suite_repository(fake_repository_path): + return MappingSuiteRepositoryInFileSystem(repository_path=fake_repository_path) + + +@pytest.fixture +def mapping_suite(mapping_suite_repository, mapping_suite_id) -> MappingSuite: + return mapping_suite_repository.get(reference=mapping_suite_id) + + +@pytest.fixture(scope="function") +@mongomock.patch(servers=(('server.example.com', 27017),)) +def mongodb_client(): + mongo_client = pymongo.MongoClient('server.example.com') + for database_name in mongo_client.list_database_names(): + mongo_client.drop_database(database_name) + return mongo_client + + +@pytest.fixture(scope="function") +def notice_repository(mongodb_client, transformation_eligible_notice): + notice_repository = NoticeRepository(mongodb_client=mongodb_client) + notice_repository.add(notice=transformation_eligible_notice) + return notice_repository + + +@pytest.fixture +def rml_mapper(): + rml_mapper = FakeRMLMapper() + rml_mapper.set_serialization_format(SerializationFormat.TURTLE) + return rml_mapper + + +@pytest.fixture(scope="function") +def transformation_eligible_notice(publicly_available_notice) -> Notice: + notice = publicly_available_notice + notice.update_status_to(NoticeStatus.ELIGIBLE_FOR_TRANSFORMATION) + notice.update_status_to(NoticeStatus.PREPROCESSED_FOR_TRANSFORMATION) + return notice From 1fc9cb58e5f4bb2b6759a8924b573c844bd0c21b Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Sun, 21 Aug 2022 22:32:46 +0300 Subject: [PATCH 16/65] Update test_notice_transformer.feature --- .../test_notice_transformer.feature | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/tests/features/notice_transformer/test_notice_transformer.feature b/tests/features/notice_transformer/test_notice_transformer.feature index 866356134..d2a98c273 100644 --- a/tests/features/notice_transformer/test_notice_transformer.feature +++ b/tests/features/notice_transformer/test_notice_transformer.feature @@ -3,12 +3,24 @@ Feature: Notice transformer The system is able to transform a notice from XML format in RDF format Scenario: Transform a TED notice - Given a notice Id available in the database with form number F03 - And Stefan has a banana - And a mapping suite for F03 available in the database - And the notice status is ELIGIBLE_FOR_TRANSFORMATION - And knowing database endpoint + Given a notice + And a mapping suite package + And a rml mapper + And given notice is eligible for transformation + And given mapping suite is eligible for notice transformation When the notice transformation is executed - Then RDF notice manifestation is available in the database + Then the notice have RDF manifestation And the notice status is TRANSFORMED + Scenario: Transform a TED notice by id + Given a notice id + And a mapping suite package id + And a rml mapper + And a notice repository + And a mapping suite repository + And given notice is eligible for transformation + And given mapping suite is eligible for notice transformation + When the notice transformation by id is executed + Then the RDF notice manifestation is available in the database + And the notice have RDF manifestation + And the notice status is TRANSFORMED \ No newline at end of file From c69344175652e0e2bc26cbb3072a6db27c58430b Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Sun, 21 Aug 2022 22:32:49 +0300 Subject: [PATCH 17/65] Update test_notice_transformer.py --- .../test_notice_transformer.py | 129 ++++++++++++++++++ 1 file changed, 129 insertions(+) diff --git a/tests/features/notice_transformer/test_notice_transformer.py b/tests/features/notice_transformer/test_notice_transformer.py index e69de29bb..e4caa3d93 100644 --- a/tests/features/notice_transformer/test_notice_transformer.py +++ b/tests/features/notice_transformer/test_notice_transformer.py @@ -0,0 +1,129 @@ +"""Notice transformer feature tests.""" + +from pytest_bdd import ( + given, + scenario, + then, + when, +) + +from ted_sws.core.model.notice import NoticeStatus, Notice +from ted_sws.core.model.transform import MappingSuite +from ted_sws.data_manager.adapters.repository_abc import NoticeRepositoryABC, MappingSuiteRepositoryABC +from ted_sws.notice_transformer.adapters.rml_mapper import RMLMapperABC +from ted_sws.notice_transformer.services.notice_transformer import transform_notice, transform_notice_by_id + + +@scenario('test_notice_transformer.feature', 'Transform a TED notice') +def test_transform_a_ted_notice(): + """Transform a TED notice.""" + + +@scenario('test_notice_transformer.feature', 'Transform a TED notice by id') +def test_transform_a_ted_notice_by_id(): + """Transform a TED notice by id.""" + + +@given('a mapping suite package') +def a_mapping_suite_package(mapping_suite): + """a mapping suite package.""" + assert mapping_suite + assert isinstance(mapping_suite, MappingSuite) + + +@given('a mapping suite package id') +def a_mapping_suite_package_id(mapping_suite_id): + """a mapping suite package id.""" + assert mapping_suite_id + assert type(mapping_suite_id) == str + + +@given('a mapping suite repository') +def a_mapping_suite_repository(mapping_suite_repository): + """a mapping suite repository.""" + assert mapping_suite_repository + assert isinstance(mapping_suite_repository, MappingSuiteRepositoryABC) + + +@given('a notice', target_fixture="eligible_for_transformation_notice") +def a_notice(transformation_eligible_notice): + """a notice.""" + assert transformation_eligible_notice + assert isinstance(transformation_eligible_notice, Notice) + return transformation_eligible_notice + + +@given('a notice id', target_fixture="eligible_for_transformation_notice") +def a_notice_id(notice_id, transformation_eligible_notice, notice_repository): + """a notice id.""" + assert notice_id + assert type(notice_id) == str + notice_repository.add(notice=transformation_eligible_notice) + return transformation_eligible_notice + + +@given('a notice repository') +def a_notice_repository(notice_repository): + """a notice repository.""" + assert notice_repository + assert isinstance(notice_repository, NoticeRepositoryABC) + + +@given('a rml mapper') +def a_rml_mapper(rml_mapper): + """a rml mapper.""" + assert rml_mapper + assert isinstance(rml_mapper, RMLMapperABC) + + +@given('given mapping suite is eligible for notice transformation') +def given_mapping_suite_is_eligible_for_notice_transformation(): + """given mapping suite is eligible for notice transformation.""" + + +@given('given notice is eligible for transformation') +def given_notice_is_eligible_for_transformation(eligible_for_transformation_notice): + """given notice is eligible for transformation.""" + assert eligible_for_transformation_notice + assert eligible_for_transformation_notice.status == NoticeStatus.PREPROCESSED_FOR_TRANSFORMATION + + +@when('the notice transformation is executed', target_fixture="transformed_notice") +def the_notice_transformation_is_executed(eligible_for_transformation_notice, mapping_suite, rml_mapper): + """the notice transformation is executed.""" + transformed_notice = transform_notice(notice=eligible_for_transformation_notice, mapping_suite=mapping_suite, + rml_mapper=rml_mapper) + return transformed_notice + + +@when('the notice transformation by id is executed', target_fixture="notice_repository_with_transformed_notice") +def the_notice_transformation_by_id_is_executed(notice_id, mapping_suite_id, notice_repository, + mapping_suite_repository, rml_mapper): + """the notice transformation is executed.""" + transform_notice_by_id(notice_id=notice_id, mapping_suite_id=mapping_suite_id, notice_repository=notice_repository, + mapping_suite_repository=mapping_suite_repository, rml_mapper=rml_mapper) + return notice_repository + + +@then('the RDF notice manifestation is available in the database', target_fixture="transformed_notice") +def the_rdf_notice_manifestation_is_available_in_the_database(notice_id: str, + notice_repository_with_transformed_notice: NoticeRepositoryABC): + """the RDF notice manifestation is available in the database.""" + notice = notice_repository_with_transformed_notice.get(reference=notice_id) + assert notice + assert notice.rdf_manifestation + assert notice.rdf_manifestation.object_data + return notice + + +@then('the notice have RDF manifestation') +def the_notice_have_rdf_manifestation(transformed_notice: Notice): + """the notice have RDF manifestation.""" + assert transformed_notice.rdf_manifestation + assert transformed_notice.rdf_manifestation.object_data + + +@then('the notice status is TRANSFORMED') +def the_notice_status_is_transformed(transformed_notice: Notice): + """the notice status is TRANSFORMED.""" + assert transformed_notice.status == NoticeStatus.TRANSFORMED From 69871a878242390996630b22b09c929c51c54449 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Sun, 21 Aug 2022 22:33:17 +0300 Subject: [PATCH 18/65] Delete test_notice_transformer_tmp.py --- .../notice_transformer/test_notice_transformer_bla_b.py | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 tests/features/notice_transformer/test_notice_transformer_bla_b.py diff --git a/tests/features/notice_transformer/test_notice_transformer_bla_b.py b/tests/features/notice_transformer/test_notice_transformer_bla_b.py deleted file mode 100644 index 1188f40fb..000000000 --- a/tests/features/notice_transformer/test_notice_transformer_bla_b.py +++ /dev/null @@ -1,3 +0,0 @@ -from pytest_bdd import scenario, given, when, then - - From 78b59b0d17b7d2033df1a4af1692f180f99facd8 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 00:54:21 +0300 Subject: [PATCH 19/65] Update Makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0313f84a7..ff5ab175e 100644 --- a/Makefile +++ b/Makefile @@ -37,7 +37,7 @@ test-unit: test-features: @ echo -e "$(BUILD_PRINT)Gherkin Features Testing ...$(END_BUILD_PRINT)" -# @ tox -e features + @ tox -e features test-e2e: @ echo -e "$(BUILD_PRINT)End to End Testing ...$(END_BUILD_PRINT)" From 63033e8dcb7bb3defa65130027fcd3e296b40be7 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 00:54:26 +0300 Subject: [PATCH 20/65] Update conftest.py --- tests/features/conftest.py | 39 ++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/tests/features/conftest.py b/tests/features/conftest.py index 49067cd37..db591f080 100644 --- a/tests/features/conftest.py +++ b/tests/features/conftest.py @@ -1,12 +1,15 @@ +import mongomock import pymongo import pytest from ted_sws import config from ted_sws.core.model.manifestation import METSManifestation, RDFManifestation, SHACLTestSuiteValidationReport, \ SPARQLTestSuiteValidationReport -from ted_sws.core.model.metadata import NormalisedMetadata +from ted_sws.core.model.metadata import NormalisedMetadata, XMLMetadata from ted_sws.core.model.notice import NoticeStatus, Notice from ted_sws.data_manager.adapters.notice_repository import NoticeRepository +from ted_sws.notice_fetcher.adapters.ted_api import TedAPIAdapter, TedRequestAPI +from ted_sws.notice_fetcher.services.notice_fetcher import NoticeFetcher NOTICE_STORAGE_FEATURES_TEST_DB = "features_test_db_for_notice" @@ -16,9 +19,13 @@ def mongodb_end_point(): return config.MONGO_DB_AUTH_URL -@pytest.fixture -def mongodb_client(mongodb_end_point): - return pymongo.MongoClient(mongodb_end_point) +@pytest.fixture(scope="function") +@mongomock.patch(servers=(('server.example.com', 27017),)) +def mongodb_client(): + mongo_client = pymongo.MongoClient('server.example.com') + for database_name in mongo_client.list_database_names(): + mongo_client.drop_database(database_name) + return mongo_client @pytest.fixture @@ -31,6 +38,30 @@ def notice_repository(mongodb_client): return NoticeRepository(mongodb_client=mongodb_client, database_name=NOTICE_STORAGE_FEATURES_TEST_DB) +@pytest.fixture +def f03_notice_2020(notice_repository, ted_api_end_point): + notice_search_query = {"q": "ND=[408313-2020]"} + NoticeFetcher(notice_repository=notice_repository, + ted_api_adapter=TedAPIAdapter(request_api=TedRequestAPI(), + ted_api_url=ted_api_end_point)).fetch_notices_by_query( + query=notice_search_query) + notice = notice_repository.get(reference="408313-2020") + notice.set_xml_metadata(xml_metadata=XMLMetadata(unique_xpaths=["FAKE_INDEX_XPATHS"])) + return notice + + +@pytest.fixture +def f18_notice_2022(notice_repository, ted_api_end_point): + notice_search_query = {"q": "ND=[067623-2022]"} + NoticeFetcher(notice_repository=notice_repository, + ted_api_adapter=TedAPIAdapter(request_api=TedRequestAPI(), + ted_api_url=ted_api_end_point)).fetch_notices_by_query( + query=notice_search_query) + notice = notice_repository.get(reference="067623-2022") + notice.set_xml_metadata(xml_metadata=XMLMetadata(unique_xpaths=["FAKE_INDEX_XPATHS"])) + return notice + + @pytest.fixture def notice_id(notice_2020): return notice_2020.ted_id From 1c89a21182f7e6197b58cae806afc35165e36de8 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 00:54:28 +0300 Subject: [PATCH 21/65] Create __init__.py --- tests/features/model/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/features/model/__init__.py diff --git a/tests/features/model/__init__.py b/tests/features/model/__init__.py new file mode 100644 index 000000000..e69de29bb From 71a505114154299963683d58b796c2ff1f0ab959 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 00:54:58 +0300 Subject: [PATCH 22/65] Create conftest.py --- tests/features/model/conftest.py | 60 ++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 tests/features/model/conftest.py diff --git a/tests/features/model/conftest.py b/tests/features/model/conftest.py new file mode 100644 index 000000000..f9d40ba28 --- /dev/null +++ b/tests/features/model/conftest.py @@ -0,0 +1,60 @@ +import pytest + +from ted_sws.core.model.manifestation import XMLManifestation, RDFManifestation, METSManifestation, \ + SPARQLTestSuiteValidationReport, SHACLTestSuiteValidationReport +from ted_sws.core.model.metadata import TEDMetadata, NormalisedMetadata +from ted_sws.core.model.notice import Notice, NoticeStatus + + +@pytest.fixture +def fetched_notice_data(): + ted_id = "ted_id1" + original_metadata = TEDMetadata(**{"AA": "Value here"}) + xml_manifestation = XMLManifestation(object_data="XML manifestation content") + return ted_id, original_metadata, xml_manifestation + + +@pytest.fixture(scope="function") +def publicly_available_notice(fetched_notice_data, normalised_metadata_dict) -> Notice: + ted_id, original_metadata, xml_manifestation = fetched_notice_data + sparql_validation = SPARQLTestSuiteValidationReport(object_data="This is validation report!", + test_suite_identifier="sparql_test_id", + mapping_suite_identifier="mapping_suite_id", + validation_results=[]) + shacl_validation = SHACLTestSuiteValidationReport(object_data="This is validation report!", + test_suite_identifier="shacl_test_id", + mapping_suite_identifier="mapping_suite_id", + validation_results=[]) + notice = Notice(ted_id=ted_id, original_metadata=original_metadata, + xml_manifestation=xml_manifestation) + notice._rdf_manifestation = RDFManifestation(object_data="RDF manifestation content", + shacl_validations=[shacl_validation], + sparql_validations=[sparql_validation] + ) + notice._distilled_rdf_manifestation = RDFManifestation(object_data="RDF manifestation content", + shacl_validations=[shacl_validation], + sparql_validations=[sparql_validation] + ) + notice._mets_manifestation = METSManifestation(object_data="METS manifestation content") + notice._normalised_metadata = NormalisedMetadata(**normalised_metadata_dict) + notice._preprocessed_xml_manifestation = xml_manifestation + notice._status = NoticeStatus.PUBLICLY_AVAILABLE + return notice + + +@pytest.fixture(scope="function") +def raw_notice(fetched_notice_data) -> Notice: + ted_id, original_metadata, xml_manifestation = fetched_notice_data + notice = Notice(ted_id=ted_id, xml_manifestation=xml_manifestation, original_metadata=original_metadata) + return notice + + +@pytest.fixture(scope="function") +def transformation_eligible_notice(indexed_notice, normalised_metadata_dict) -> Notice: + indexed_notice.set_normalised_metadata(normalised_metadata=NormalisedMetadata(**normalised_metadata_dict)) + indexed_notice.update_status_to(NoticeStatus.ELIGIBLE_FOR_TRANSFORMATION) + indexed_notice.update_status_to(NoticeStatus.PREPROCESSED_FOR_TRANSFORMATION) + return indexed_notice + + + From 4afb27f4c5d71e2279144dd5e0178b7eacf0a8fd Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 00:55:07 +0300 Subject: [PATCH 23/65] Create test_creating_notice.feature --- .../features/model/test_creating_notice.feature | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 tests/features/model/test_creating_notice.feature diff --git a/tests/features/model/test_creating_notice.feature b/tests/features/model/test_creating_notice.feature new file mode 100644 index 000000000..184d473d4 --- /dev/null +++ b/tests/features/model/test_creating_notice.feature @@ -0,0 +1,16 @@ +# Date: 29/01/2022 +# Author: Eugeniu Costetchi +# Email: costezki.eugen@gmail.com + +Feature: Notice creation + + A notice is created so that it can be used in the system. + + Scenario: Create a bare minimum notice + Given a TED identifier ted_identifier + And original TED notice metadata notice_metadata + And the XML content of the notice xml_content + When a notice is instantiated + Then a new notice object is available + And notice_metadata, xml_content, source_url and status RAW are accessible + From be2df0fe82b515ba1e4455f3abfcefdedc3e6cd0 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 00:55:11 +0300 Subject: [PATCH 24/65] Create test_creating_notice.py --- tests/features/model/test_creating_notice.py | 44 ++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 tests/features/model/test_creating_notice.py diff --git a/tests/features/model/test_creating_notice.py b/tests/features/model/test_creating_notice.py new file mode 100644 index 000000000..958636c02 --- /dev/null +++ b/tests/features/model/test_creating_notice.py @@ -0,0 +1,44 @@ +from pytest_bdd import scenario, given, when, then + +from ted_sws.core.model.manifestation import XMLManifestation +from ted_sws.core.model.metadata import TEDMetadata +from ted_sws.core.model.notice import Notice + + +@scenario("test_creating_notice.feature", "Create a bare minimum notice") +def test_create_a_bare_minimum_notice(): + pass + + +@given("a TED identifier ted_identifier", target_fixture="ted_identifier") +def step_impl(): + return "ted_identifier_1234" + + +@given("original TED notice metadata notice_metadata", target_fixture="ted_metadata") +def step_impl(): + return TEDMetadata(**{"AA": "clever value"}) + + +@given("the XML content of the notice xml_content", target_fixture="notice_content") +def step_impl(): + return XMLManifestation(object_data="XML content") + + +@when("a notice is instantiated", target_fixture="new_notice") +def step_impl(ted_identifier, ted_metadata, notice_content): + return Notice(ted_id=ted_identifier, original_metadata=ted_metadata, + xml_manifestation=notice_content) + + +@then("a new notice object is available") +def step_impl(new_notice): + assert new_notice is not None + + +@then("notice_metadata, xml_content, source_url and status RAW are accessible") +def step_impl(new_notice, ted_identifier, ted_metadata, notice_content): + assert new_notice.original_metadata == ted_metadata + assert new_notice.xml_manifestation == notice_content + assert new_notice.ted_id == ted_identifier + From 4bc78242446b76c5c9a5ccae10da849c44651ef3 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 00:55:16 +0300 Subject: [PATCH 25/65] Create test_notice_operations.feature --- .../model/test_notice_operations.feature | 181 ++++++++++++++++++ 1 file changed, 181 insertions(+) create mode 100644 tests/features/model/test_notice_operations.feature diff --git a/tests/features/model/test_notice_operations.feature b/tests/features/model/test_notice_operations.feature new file mode 100644 index 000000000..4fa14e12f --- /dev/null +++ b/tests/features/model/test_notice_operations.feature @@ -0,0 +1,181 @@ +# Date: 29/01/2022 +# Author: Eugeniu Costetchi +# Email: costezki.eugen@gmail.com + +Feature: Notice state and content in the lifecycle process + + A notice accounts for its state and status in the process lifecycle + so that affordances are known and constraints are met. + + Scenario: add normalised metadata + Given a notice + And normalised metadata + When normalised metadata is added + Then the notice object contains the normalised metadata + And the notice status is NORMALISED_METADATA + + Scenario: overwrite normalised metadata + Given a notice eligible for transformation + And normalised metadata + And the notice already contains normalised metadata + When normalised metadata is overwritten + Then the notice object contains the new normalised metadata + And the notice status is NORMALISED_METADATA + And normalised notice contains no RDF manifestation + And notice not contains RDF validation + And notice contains no METS manifestation + + Scenario: add RDF manifestation + Given a notice eligible for transformation + And RDF manifestation + When RDF manifestation is added + Then the notice object contains the RDF manifestation + And the notice status is TRANSFORMED + + Scenario: overwrite RDF manifestation + Given a notice eligible for transformation + And RDF manifestation + And the notice already contains an RDF manifestation + When the RDF manifestation is overwritten + Then the notice object contains the new RDF manifestation + And the notice status is TRANSFORMED + And notice contains no RDF validation + And notice contains no METS manifestation + + Scenario: add validation report for a transformation + Given a notice eligible for transformation + And RDF validation report + And the notice contains an RDF manifestation + When RDF validation report is added + Then the notice object contains the RDF validation report + And the notice status is VALIDATED + And notice contains no METS manifestation + + Scenario: cannot add a validation report when there is no transformation + Given a notice + And RDF validation report + And the notice does not contains an RDF manifestation + When RDF validation report is added an exception is raised + + + Scenario: add METS manifestation + Given a packaging eligible notice + And METS manifestation + When METS manifestation is added + Then the notice object contains the METS manifestation + And the notice status is PACKAGED + + Scenario: overwrite METS manifestation + Given a packaging eligible notice + And METS manifestation + And the notice already contains an METS manifestation + When METS manifestation is added + Then the notice object contains the new METS manifestation + And the notice status is PACKAGED + + Scenario Outline: set notice eligibility for transformation before transformation + Given a notice + And eligibility check result is + And the notice status is lower than TRANSFORMED + When eligibility for transformation is set + Then notice status is + + Examples: + | eligibility | notice_status | + | true | ELIGIBLE_FOR_TRANSFORMATION | + | false | INELIGIBLE_FOR_TRANSFORMATION | + + Scenario Outline: set notice eligibility for transformation after transformation + Given a notice + And eligibility check result is + And the notice status is equal or greater than TRANSFORMED + When eligibility for transformation is set + Then notice status is + + Examples: + | eligibility | notice_status | + | false | INELIGIBLE_FOR_TRANSFORMATION | + + Scenario Outline: set notice eligibility for packaging before packaging + Given a notice + And eligibility check result is + And the notice is validated + When eligibility for packaging is set + Then notice status is + + Examples: + | eligibility | notice_status | + | true | ELIGIBLE_FOR_PACKAGING | + | false | INELIGIBLE_FOR_PACKAGING | + + Scenario Outline: set notice eligibility for packaging after packaging + Given a notice + And eligibility check result is + And the notice is published + When eligibility for packaging is set + Then notice status is + + Examples: + | eligibility | notice_status | + | false | INELIGIBLE_FOR_PACKAGING | + + Scenario Outline: set notice eligibility for publishing after packaging + Given a notice + And eligibility check result is + And notice contains a METS package + When the package validity is set + Then notice status is + + Examples: + | eligibility | notice_status | + | true | ELIGIBLE_FOR_PUBLISHING | + | false | INELIGIBLE_FOR_PUBLISHING | + + Scenario Outline: set notice eligibility for publishing after publishing + Given a notice + And eligibility check result is + And the notice is published + When the package validity is set + Then notice status is + + Examples: + | eligibility | notice_status | + | false | INELIGIBLE_FOR_PUBLISHING | + + + Scenario Outline: mark notice as published if eligible + Given a notice + And eligibility check result is + And the notice is packaged + When the package validity is set + And the notice is marked as published + Then notice status is + Examples: + | eligibility | notice_status | + | true | PUBLISHED | + + + Scenario Outline: mark notice as published when ineligible + Given a notice + And eligibility check result is + And the notice is packaged + When the package validity is set + Then the notice cannot be marked as published + + Examples: + | eligibility | + | false | + + + Scenario Outline: set notice public availability after publishing + Given a notice + And availability check result is + And the notice is published + When public availability is set + Then notice status is + + Examples: + | availability | notice_status | + | true | PUBLICLY_AVAILABLE | + | false | PUBLICLY_UNAVAILABLE | + From fb4ec383b8070a7ea631bab90091fff9fe3a4430 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 00:55:19 +0300 Subject: [PATCH 26/65] Create test_notice_operations.py --- .../features/model/test_notice_operations.py | 375 ++++++++++++++++++ 1 file changed, 375 insertions(+) create mode 100644 tests/features/model/test_notice_operations.py diff --git a/tests/features/model/test_notice_operations.py b/tests/features/model/test_notice_operations.py new file mode 100644 index 000000000..8e0b59d4e --- /dev/null +++ b/tests/features/model/test_notice_operations.py @@ -0,0 +1,375 @@ +import pytest +from pytest_bdd import scenario, given, when, then, parsers + +from ted_sws.core.model.manifestation import RDFManifestation, RDFValidationManifestation, METSManifestation +from ted_sws.core.model.metadata import NormalisedMetadata +from ted_sws.core.model.notice import NoticeStatus + + +def str2bool(value: str) -> bool: + """ + Parse a string value and cast it into its boolean value + :param value: + :return: + """ + if value in ["y", "yes", "t", "true", "on", "1"]: return True + if value in ["n", "no", "f", "false", "off", "0"]: return False + raise ValueError("boolean value unrecognised") + + +@scenario("test_notice_operations.feature", "add normalised metadata") +def test_add_normalised_metadata(): + pass + + +@scenario("test_notice_operations.feature", "overwrite normalised metadata") +def test_overwrite_normalised_metadata(): + pass + + +@scenario("test_notice_operations.feature", "add RDF manifestation") +def test_add_rdf_manifestation(): + pass + + +@scenario("test_notice_operations.feature", "overwrite RDF manifestation") +def test_overwrite_rdf_manifestation(): + pass + + +@scenario("test_notice_operations.feature", "add validation report for a transformation") +def test_add_validation_report_for_a_transformation(): + pass + + +@scenario("test_notice_operations.feature", "cannot add a validation report when there is no transformation") +def test_cannot_add_a_validation_report_when_there_is_no_transformation(): + pass + + +@scenario("test_notice_operations.feature", "add METS manifestation") +def test_add_mets_manifestation(): + pass + + +@scenario("test_notice_operations.feature", "overwrite METS manifestation") +def test_overwrite_mets_manifestation(): + pass + + +@scenario("test_notice_operations.feature", "set notice eligibility for transformation before transformation") +def test_set_notice_eligibility_for_transformation_before_transformation(): + pass + + +@scenario("test_notice_operations.feature", "set notice eligibility for transformation after transformation") +def test_set_notice_eligibility_for_transformation_after_transformation(): + pass + + +@scenario("test_notice_operations.feature", "set notice eligibility for packaging before packaging") +def test_set_notice_eligibility_for_packaging_when_validated(): + pass + + +@scenario("test_notice_operations.feature", "set notice eligibility for packaging after packaging") +def test_set_notice_eligibility_for_packaging_when_not_validated(): + pass + + +@scenario("test_notice_operations.feature", "set notice eligibility for publishing after packaging") +def test_set_mets_package_validity_when_package_is_available(): + pass + + +@scenario("test_notice_operations.feature", "set notice eligibility for publishing after publishing") +def test_set_mets_package_validity_when_package_is_missing(): + pass + + +@scenario("test_notice_operations.feature", "mark notice as published if eligible") +def test_mark_notice_as_published_if_eligible(): + pass + + +@scenario("test_notice_operations.feature", "set notice public availability after publishing") +def test_set_notice_public_availability_after_publishing(): + pass + + +# -------------------------------- +# Step implementations +# -------------------------------- + +@given("a notice", target_fixture="a_notice") +def step_impl(publicly_available_notice): + return publicly_available_notice + + +@given("normalised metadata", target_fixture="normalised_metadata") +def step_impl(normalised_metadata_dict): + return NormalisedMetadata(**normalised_metadata_dict) + + +@when("normalised metadata is added") +def step_impl(indexed_notice, normalised_metadata): + indexed_notice.set_normalised_metadata(normalised_metadata=normalised_metadata) + + +@then("the notice object contains the normalised metadata") +def step_impl(indexed_notice): + assert indexed_notice.normalised_metadata is not None + + +@then("the notice status is NORMALISED_METADATA") +def step_impl(indexed_notice): + assert indexed_notice.status is NoticeStatus.NORMALISED_METADATA + + +@given("the notice already contains normalised metadata") +def step_impl(indexed_notice, normalised_metadata_dict): + indexed_notice.set_normalised_metadata(NormalisedMetadata(**normalised_metadata_dict)) + assert indexed_notice.normalised_metadata is not None + + +@when("normalised metadata is overwritten", target_fixture="old_normalised_metadata") +def step_impl(indexed_notice, normalised_metadata): + assert indexed_notice.normalised_metadata is not None + old = indexed_notice.normalised_metadata + normalised_metadata.notice_publication_number = "something else" + indexed_notice.set_normalised_metadata(normalised_metadata=normalised_metadata) + return old + + +@then("the notice object contains the new normalised metadata") +def step_impl(indexed_notice, normalised_metadata, old_normalised_metadata): + assert indexed_notice.normalised_metadata == normalised_metadata + assert indexed_notice.normalised_metadata != old_normalised_metadata + + +@then("normalised notice contains no RDF manifestation") +def step_impl(indexed_notice): + assert indexed_notice.rdf_manifestation is None + + +@then("notice contains no RDF validation") +def step_impl(transformation_eligible_notice): + assert transformation_eligible_notice + print( + f"transformation_eligible_notice.get_rdf_validation() = {transformation_eligible_notice.get_rdf_validation()}") + assert transformation_eligible_notice.get_rdf_validation() == [] + + +@then("notice not contains RDF validation") +def step_impl(transformation_eligible_notice): + assert transformation_eligible_notice + print( + f"transformation_eligible_notice.get_rdf_validation() = {transformation_eligible_notice.get_rdf_validation()}") + assert transformation_eligible_notice.get_rdf_validation() is None + + +@then("notice contains no METS manifestation") +def step_impl(transformation_eligible_notice): + assert transformation_eligible_notice.mets_manifestation is None + + +@given("RDF manifestation", target_fixture="rdf_manifestation") +def step_impl(): + return RDFManifestation(object_data="featured object data of the RDF manifestation") + + +@when("RDF manifestation is added") +def step_impl(transformation_eligible_notice, rdf_manifestation): + transformation_eligible_notice.set_rdf_manifestation(rdf_manifestation) + + +@then("the notice object contains the RDF manifestation") +def step_impl(transformation_eligible_notice, rdf_manifestation): + assert transformation_eligible_notice.rdf_manifestation == rdf_manifestation + + +@then("the notice status is TRANSFORMED") +def step_impl(transformation_eligible_notice): + assert transformation_eligible_notice.status is NoticeStatus.TRANSFORMED + + +@given("the notice already contains an RDF manifestation") +def step_impl(transformation_eligible_notice, rdf_manifestation): + transformation_eligible_notice.set_rdf_manifestation( + rdf_manifestation=RDFManifestation(object_data="data some data")) + + +@when("the RDF manifestation is overwritten", target_fixture="old_rdf_manifestation") +def step_impl(transformation_eligible_notice, rdf_manifestation): + old_manifestation = transformation_eligible_notice.rdf_manifestation + transformation_eligible_notice.set_rdf_manifestation(rdf_manifestation) + return old_manifestation + + +@then("the notice object contains the new RDF manifestation") +def step_impl(transformation_eligible_notice, old_rdf_manifestation, rdf_manifestation): + assert transformation_eligible_notice.rdf_manifestation != old_rdf_manifestation + assert transformation_eligible_notice.rdf_manifestation == rdf_manifestation + + +@given("RDF validation report", target_fixture="rdf_validation") +def step_impl(): + return RDFValidationManifestation(object_data="this is another validation report", + test_suite_identifier="test_suite_id", + mapping_suite_identifier="mapping_suite_id" + ) + + +@given("the notice contains an RDF manifestation") +def step_impl(transformation_eligible_notice): + transformation_eligible_notice.set_rdf_manifestation( + rdf_manifestation=RDFManifestation(object_data="data some data")) + + +@when("RDF validation report is added an exception is raised") +def step_impl(transformation_eligible_notice, rdf_validation): + with pytest.raises(Exception): + transformation_eligible_notice.set_rdf_validation(rdf_validation) + + +@then("the notice object contains the RDF validation report") +def step_impl(transformation_eligible_notice): + assert transformation_eligible_notice.get_rdf_validation() is not None + + +@then("the notice status is VALIDATED") +def step_impl(transformation_eligible_notice): + # TODO: Change feature text and tests, once the SPARQL test suite was refactor + # assert transformation_eligible_notice.status is NoticeStatus.VALIDATED + assert transformation_eligible_notice.status is NoticeStatus.DISTILLED + transformation_eligible_notice.update_status_to(new_status=NoticeStatus.VALIDATED) + + +@given("the notice does not contains an RDF manifestation") +def step_impl(transformation_eligible_notice): + transformation_eligible_notice._rdf_manifestation = None + + +@given("METS manifestation", target_fixture="mets_manifestation") +def step_impl(): + return METSManifestation(object_data="THE METS manifestation") + + +@when("METS manifestation is added") +def step_impl(packaging_eligible_notice, mets_manifestation): + packaging_eligible_notice.set_mets_manifestation(mets_manifestation=mets_manifestation) + + +@then("the notice object contains the METS manifestation") +def step_impl(packaging_eligible_notice, mets_manifestation): + assert packaging_eligible_notice.mets_manifestation == mets_manifestation + + +@then("the notice status is PACKAGED") +def step_impl(packaging_eligible_notice): + assert packaging_eligible_notice.status is NoticeStatus.PACKAGED + + +@given("the notice already contains an METS manifestation", target_fixture="packaging_eligible_notice") +def step_impl(publicly_available_notice): + publicly_available_notice.update_status_to(NoticeStatus.PACKAGED) + return publicly_available_notice + + +@then("the notice object contains the new METS manifestation") +def step_impl(packaging_eligible_notice, mets_manifestation): + assert packaging_eligible_notice.mets_manifestation == mets_manifestation + + +@given(parsers.parse("eligibility check result is {eligibility}"), + target_fixture="eligibility") +def step_impl(eligibility): + return str2bool(eligibility) + + +@given("the notice status is lower than TRANSFORMED") +def step_impl(a_notice): + a_notice.update_status_to(NoticeStatus.NORMALISED_METADATA) + + +@when("eligibility for transformation is set") +def step_impl(a_notice, eligibility): + a_notice.set_is_eligible_for_transformation(eligibility) + + +@then(parsers.parse("notice status is {notice_status}")) +def step_impl(a_notice, notice_status): + assert a_notice.status == NoticeStatus[notice_status] + + +@given("the notice status is equal or greater than TRANSFORMED") +def step_impl(a_notice): + a_notice.update_status_to(NoticeStatus.VALIDATED) + + +@when("eligibility for packaging is set") +def step_impl(a_notice, eligibility): + a_notice.set_is_eligible_for_packaging(eligibility) + + +@given("notice contains a METS package") +def step_impl(a_notice): + a_notice.update_status_to(NoticeStatus.PACKAGED) + + +@when("the package validity is set") +def step_impl(a_notice, eligibility): + a_notice.set_is_eligible_for_publishing(eligibility) + + +@when("the notice is marked as published") +def step_impl(a_notice): + a_notice.mark_as_published() + + +@when("public availability is set") +def step_impl(a_notice, availability): + a_notice.set_is_publicly_available(availability) + + +@given("a notice eligible for transformation", ) +def step_impl(transformation_eligible_notice): + assert transformation_eligible_notice.status is NoticeStatus.PREPROCESSED_FOR_TRANSFORMATION + + +@when("RDF validation report is added") +def step_impl(transformation_eligible_notice, rdf_validation): + transformation_eligible_notice.update_status_to(NoticeStatus.DISTILLED) + transformation_eligible_notice.set_rdf_validation(rdf_validation) + + +@given("a packaging eligible notice", target_fixture="packaging_eligible_notice") +def step_impl(publicly_available_notice): + publicly_available_notice.update_status_to(NoticeStatus.ELIGIBLE_FOR_PACKAGING) + return publicly_available_notice + + +@given("the notice is validated") +def step_impl(a_notice): + a_notice.update_status_to(NoticeStatus.VALIDATED) + + +@given("the notice is published") +def step_impl(a_notice): + a_notice.update_status_to(NoticeStatus.PUBLISHED) + + +@given("the notice is packaged") +def step_impl(a_notice): + a_notice.update_status_to(NoticeStatus.PACKAGED) + + +@then("the notice cannot be marked as published") +def step_impl(a_notice): + with pytest.raises(Exception): + a_notice.mark_as_published() + + +@given(parsers.parse("availability check result is {availability}"), target_fixture="availability") +def step_impl(availability): + return str2bool(availability) From 22012c57b943a2fa0de853fb05d5fb14cc7d790d Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 00:55:24 +0300 Subject: [PATCH 27/65] Update conftest.py --- tests/features/notice_fetcher/conftest.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/features/notice_fetcher/conftest.py b/tests/features/notice_fetcher/conftest.py index 76eac5f70..b7487db29 100644 --- a/tests/features/notice_fetcher/conftest.py +++ b/tests/features/notice_fetcher/conftest.py @@ -4,11 +4,12 @@ from ted_sws.notice_fetcher.adapters.ted_api import TedAPIAdapter, TedRequestAPI from ted_sws.notice_fetcher.services.notice_fetcher import NoticeFetcher +from tests.fakes.fake_ted_api import FakeTedApiAdapter @pytest.fixture def fetch_notice_id(): - return "408313-2020" + return "067623-2022" @pytest.fixture @@ -39,5 +40,4 @@ def fetch_query(fetch_wildcard_date): @pytest.fixture def notice_fetcher(notice_repository, ted_api_end_point): return NoticeFetcher(notice_repository=notice_repository, - ted_api_adapter=TedAPIAdapter(request_api=TedRequestAPI(), - ted_api_url=ted_api_end_point)) + ted_api_adapter=FakeTedApiAdapter()) From cbf5fa4a4584b8116c04391a3984b0af80698c3f Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 00:55:27 +0300 Subject: [PATCH 28/65] Create conftest.py --- .../notice_metadata_processor/conftest.py | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 tests/features/notice_metadata_processor/conftest.py diff --git a/tests/features/notice_metadata_processor/conftest.py b/tests/features/notice_metadata_processor/conftest.py new file mode 100644 index 000000000..adbba8ab9 --- /dev/null +++ b/tests/features/notice_metadata_processor/conftest.py @@ -0,0 +1,47 @@ +import pytest + +from ted_sws import config +from ted_sws.data_manager.adapters.mapping_suite_repository import MappingSuiteRepositoryInFileSystem, \ + MappingSuiteRepositoryMongoDB +from ted_sws.notice_metadata_processor.services.metadata_normalizer import MetadataNormaliser +from tests import TEST_DATA_PATH +from tests.fakes.fake_repository import FakeNoticeRepository + + +@pytest.fixture +def notice_identifier(): + return "067623-2022" + + +@pytest.fixture +def api_end_point(): + return config.TED_API_URL + + +@pytest.fixture +def fake_notice_storage(): + return FakeNoticeRepository() + + +@pytest.fixture +def notice_eligibility_repository_path(): + return TEST_DATA_PATH / "notice_transformer" / "test_repository" + + +@pytest.fixture +def normalised_notice(notice_2020): + notice = notice_2020.copy() + MetadataNormaliser(notice=notice).normalise_metadata() + return notice + + +@pytest.fixture +def mapping_suite_repository_with_mapping_suite(notice_eligibility_repository_path): + mapping_suite_repository = MappingSuiteRepositoryInFileSystem(repository_path=notice_eligibility_repository_path) + return mapping_suite_repository + + +@pytest.fixture +def clean_mapping_suite_repository(mongodb_client): + mapping_suite_repository = MappingSuiteRepositoryMongoDB(mongodb_client=mongodb_client) + return mapping_suite_repository From adf89bb2b3aa9aa657e531da26c44ac45fc4e241 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 00:55:31 +0300 Subject: [PATCH 29/65] Create metadata_normaliser.feature --- .../metadata_normaliser.feature | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 tests/features/notice_metadata_processor/metadata_normaliser.feature diff --git a/tests/features/notice_metadata_processor/metadata_normaliser.feature b/tests/features/notice_metadata_processor/metadata_normaliser.feature new file mode 100644 index 000000000..41efad9b4 --- /dev/null +++ b/tests/features/notice_metadata_processor/metadata_normaliser.feature @@ -0,0 +1,31 @@ +# Created by dude at 25/01/2022 +Feature: Notice metadata normalizer + A fetched notice metadata should be normalized + + Scenario Outline: Normalizing a notice metadata + Given a notice + When the normalize process is executed + Then a normalized notice is available + And the notice status is NORMALISED_METADATA + And normalised metadata is available + + Examples: + | metadata | possibly | + | title | True | + | long_title | True | + | notice_publication_number | True | + | publication_date | True | + | ojs_issue_number | True | + | ojs_type | True | + | city_of_buyer | True | + | name_of_buyer | True | + | original_language | True | + | country_of_buyer | True | + | eu_institution | True | + | document_sent_date | True | + | deadline_for_submission | False | + | notice_type | True | + | form_type | True | + | place_of_performance | True | + | legal_basis_directive | True | + From 83866379400e6f667caae55f27a64d7f70d41ff3 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 00:55:36 +0300 Subject: [PATCH 30/65] Create notice_extractor.feature --- .../notice_extractor.feature | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 tests/features/notice_metadata_processor/notice_extractor.feature diff --git a/tests/features/notice_metadata_processor/notice_extractor.feature b/tests/features/notice_metadata_processor/notice_extractor.feature new file mode 100644 index 000000000..8e2504eb3 --- /dev/null +++ b/tests/features/notice_metadata_processor/notice_extractor.feature @@ -0,0 +1,39 @@ +# Created by dude at 24/02/2022 +Feature: Notice extractor + The system is extracting metadata from the xml manifestation + + Scenario Outline: Extracting metadata + Given an XML manifestation + When the extracting process is executed + Then extracted is possibly available + + Examples: + | metadata | + | title | + | notice_publication_number | + | publication_date | + | ojs_issue_number | + | ojs_type | + | city_of_buyer | + | name_of_buyer | + | original_language | + | country_of_buyer | + | type_of_buyer | + | eu_institution | + | document_sent_date | + | deadline_for_submission | + | type_of_contract | + | type_of_procedure | +# | extracted_notice_type | +# | form_number | + | regulation | + | type_of_bid | + | award_criteria | + | common_procurement | + | place_of_performance | + | internet_address | + | legal_basis_directive | +# | xml_schema | +# | xml_schema_version | + + From e96e200df616d05146f23726d668f6cf06cc82dd Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 00:55:39 +0300 Subject: [PATCH 31/65] Create test_metadata_normaliser.py --- .../test_metadata_normaliser.py | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 tests/features/notice_metadata_processor/test_metadata_normaliser.py diff --git a/tests/features/notice_metadata_processor/test_metadata_normaliser.py b/tests/features/notice_metadata_processor/test_metadata_normaliser.py new file mode 100644 index 000000000..5a41a239d --- /dev/null +++ b/tests/features/notice_metadata_processor/test_metadata_normaliser.py @@ -0,0 +1,36 @@ +from pytest_bdd import scenario, given, when, then, parsers + +from ted_sws.core.model.notice import NoticeStatus +from ted_sws.notice_metadata_processor.services.metadata_normalizer import MetadataNormaliser + + +@scenario('metadata_normaliser.feature', 'Normalizing a notice metadata') +def test_extract_metadata(): + """normalising metadata""" + + +@given("a notice", target_fixture="notice") +def step_impl(f03_notice_2020): + return f03_notice_2020 + + +@when("the normalize process is executed") +def step_impl(notice): + MetadataNormaliser(notice=notice).normalise_metadata() + + +@then(parsers.parse("a normalized notice {metadata} is {possibly} available")) +def step_impl(notice, metadata, possibly): + metadata_value = notice.normalised_metadata.dict()[metadata] + is_value_there = "True" if metadata_value else "False" + assert is_value_there == possibly + + +@then("the notice status is NORMALISED_METADATA") +def step_impl(notice): + assert notice.status is NoticeStatus.NORMALISED_METADATA + + +@then("normalised metadata is available") +def step_impl(notice): + assert notice.normalised_metadata From ad40b65dc9a72c46e4e89019d692da867ed0fb47 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 00:55:42 +0300 Subject: [PATCH 32/65] Create test_notice_eligibility.feature --- .../test_notice_eligibility.feature | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 tests/features/notice_metadata_processor/test_notice_eligibility.feature diff --git a/tests/features/notice_metadata_processor/test_notice_eligibility.feature b/tests/features/notice_metadata_processor/test_notice_eligibility.feature new file mode 100644 index 000000000..ffe962541 --- /dev/null +++ b/tests/features/notice_metadata_processor/test_notice_eligibility.feature @@ -0,0 +1,22 @@ +# Created by Stefan at 18.08.2022 +Feature: Notice metadata processor + The system is able to process TED notice metadata and check eligibility with mapping rules. + + Scenario: Notice eligibility checking negative + Given a notice + And the notice is with form number F03 + And the notice status is NORMALISED + And a mapping suite repository + And a mapping suite for F03 is not available in mapping suite repository + When the notice eligibility checking is executed + Then the notice status is INELIGIBLE_FOR_TRANSFORMATION + + + Scenario: Notice eligibility checking positive + Given a notice + And the notice is with form number F03 + And the notice status is NORMALISED + And a mapping suite repository + And a mapping suite for F03 is available in mapping suite repository + When the notice eligibility checking is executed + Then the notice status is ELIGIBLE_FOR_TRANSFORMATION From 0a0670e811bee7558b772346c94b9542fe20519c Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 00:55:44 +0300 Subject: [PATCH 33/65] Create test_notice_eligibility.py --- .../test_notice_eligibility.py | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 tests/features/notice_metadata_processor/test_notice_eligibility.py diff --git a/tests/features/notice_metadata_processor/test_notice_eligibility.py b/tests/features/notice_metadata_processor/test_notice_eligibility.py new file mode 100644 index 000000000..a616d7aa8 --- /dev/null +++ b/tests/features/notice_metadata_processor/test_notice_eligibility.py @@ -0,0 +1,83 @@ +"""Notice metadata processor feature tests.""" + +from pytest_bdd import ( + given, + scenario, + then, + when, +) + +from ted_sws.core.model.notice import Notice, NoticeStatus +from ted_sws.data_manager.adapters.repository_abc import MappingSuiteRepositoryABC +from ted_sws.notice_metadata_processor.services.notice_eligibility import notice_eligibility_checker + + +@scenario('test_notice_eligibility.feature', 'Notice eligibility checking negative') +def test_notice_eligibility_checking_negative(): + """Notice eligibility checking negative.""" + + +@scenario('test_notice_eligibility.feature', 'Notice eligibility checking positive') +def test_notice_eligibility_checking_positive(): + """Notice eligibility checking positive.""" + + +@given('a mapping suite for F03 is available in mapping suite repository', target_fixture="mapping_suite_repository") +def a_mapping_suite_for_f03_is_available_in_mapping_suite_repository(clean_mapping_suite_repository, + mapping_suite_repository_with_mapping_suite): + """a mapping suite for F03 is available in mapping suite repository.""" + for mapping_suite in mapping_suite_repository_with_mapping_suite.list(): + clean_mapping_suite_repository.add(mapping_suite=mapping_suite) + return clean_mapping_suite_repository + + +@given('a mapping suite for F03 is not available in mapping suite repository', + target_fixture="mapping_suite_repository") +def a_mapping_suite_for_f03_is_not_available_in_mapping_suite_repository(clean_mapping_suite_repository): + """a mapping suite for F03 is not available in mapping suite repository.""" + return clean_mapping_suite_repository + + +@given('a mapping suite repository') +def a_mapping_suite_repository(clean_mapping_suite_repository): + """a mapping suite repository.""" + assert clean_mapping_suite_repository + assert isinstance(clean_mapping_suite_repository, MappingSuiteRepositoryABC) + + +@given('a notice') +def a_notice(normalised_notice): + """a notice.""" + assert normalised_notice + assert isinstance(normalised_notice, Notice) + + +@given('the notice is with form number F03') +def the_notice_is_with_form_number_f03(normalised_notice): + """the notice is with form number F03.""" + assert normalised_notice.normalised_metadata.form_number == "F03" + + +@given('the notice status is NORMALISED') +def the_notice_status_is_normalised(normalised_notice): + """the notice status is NORMALISED.""" + assert normalised_notice.status == NoticeStatus.NORMALISED_METADATA + + +@when('the notice eligibility checking is executed', target_fixture="checked_notice") +def the_notice_eligibility_checking_is_executed(normalised_notice, mapping_suite_repository): + """the notice eligibility checking is executed.""" + notice_eligibility_checker(notice=normalised_notice, mapping_suite_repository=mapping_suite_repository) + return normalised_notice + + +@then('the notice status is ELIGIBLE_FOR_TRANSFORMATION') +def the_notice_status_is_eligible_for_transformation(checked_notice: Notice): + """the notice status is ELIGIBLE_FOR_TRANSFORMATION.""" + assert checked_notice.status == NoticeStatus.ELIGIBLE_FOR_TRANSFORMATION + + +@then('the notice status is INELIGIBLE_FOR_TRANSFORMATION') +def the_notice_status_is_ineligible_for_transformation(checked_notice: Notice): + """the notice status is INELIGIBLE_FOR_TRANSFORMATION.""" + assert checked_notice.status == NoticeStatus.INELIGIBLE_FOR_TRANSFORMATION From be7a2d2a1845cad019d5fd83f3616f9c1b71bf88 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 00:55:47 +0300 Subject: [PATCH 34/65] Delete test_notice_eligiblity.feature --- .../test_notice_eligiblity.feature | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 tests/features/notice_metadata_processor/test_notice_eligiblity.feature diff --git a/tests/features/notice_metadata_processor/test_notice_eligiblity.feature b/tests/features/notice_metadata_processor/test_notice_eligiblity.feature deleted file mode 100644 index e3076e406..000000000 --- a/tests/features/notice_metadata_processor/test_notice_eligiblity.feature +++ /dev/null @@ -1,6 +0,0 @@ -# Created by Stefan at 18.08.2022 -Feature: # Enter feature name here - # Enter feature description here - - Scenario: # Enter scenario name here - # Enter steps here \ No newline at end of file From fd6b4616679d4470f788be33cad9e0e9f0f44256 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 00:55:51 +0300 Subject: [PATCH 35/65] Create test_notice_extractor.py --- .../test_notice_extractor.py | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 tests/features/notice_metadata_processor/test_notice_extractor.py diff --git a/tests/features/notice_metadata_processor/test_notice_extractor.py b/tests/features/notice_metadata_processor/test_notice_extractor.py new file mode 100644 index 000000000..8d8d34156 --- /dev/null +++ b/tests/features/notice_metadata_processor/test_notice_extractor.py @@ -0,0 +1,33 @@ +from pytest_bdd import scenario, given, when, then, parsers + +from ted_sws.notice_metadata_processor.model.metadata import ExtractedMetadata +from ted_sws.notice_metadata_processor.services.xml_manifestation_metadata_extractor import XMLManifestationMetadataExtractor +from ted_sws.notice_fetcher.adapters.ted_api import TedRequestAPI, TedAPIAdapter +from ted_sws.notice_fetcher.services.notice_fetcher import NoticeFetcher + + +@scenario('notice_extractor.feature', 'Extracting metadata') +def test_extract_metadata(): + """Extracting metadata""" + + +@given("an XML manifestation", target_fixture="xml_manifestation") +def step_impl(notice_identifier, api_end_point, fake_notice_storage): + NoticeFetcher(notice_repository=fake_notice_storage, + ted_api_adapter=TedAPIAdapter(request_api=TedRequestAPI(), + ted_api_url=api_end_point)).fetch_notice_by_id( + document_id=notice_identifier) + return fake_notice_storage.get(reference=notice_identifier).xml_manifestation + + +@when("the extracting process is executed", target_fixture="extracted_metadata") +def step_impl(xml_manifestation): + return XMLManifestationMetadataExtractor(xml_manifestation=xml_manifestation).to_metadata() + + +@then(parsers.parse("extracted {metadata} is possibly available")) +def step_impl(extracted_metadata, notice_identifier, metadata): + assert isinstance(extracted_metadata, ExtractedMetadata) + assert extracted_metadata.dict().keys() == ExtractedMetadata.__fields__.keys() + assert notice_identifier == extracted_metadata.dict()["notice_publication_number"] + assert metadata in extracted_metadata.dict() From a050074ecd7e95dbb795f96bfd726d06646902fe Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 00:55:54 +0300 Subject: [PATCH 36/65] Create conftest.py --- tests/features/notice_validator/conftest.py | 239 ++++++++++++++++++++ 1 file changed, 239 insertions(+) create mode 100644 tests/features/notice_validator/conftest.py diff --git a/tests/features/notice_validator/conftest.py b/tests/features/notice_validator/conftest.py new file mode 100644 index 000000000..f1193de77 --- /dev/null +++ b/tests/features/notice_validator/conftest.py @@ -0,0 +1,239 @@ +import pytest + +from ted_sws.core.model.manifestation import RDFManifestation +from ted_sws.core.model.notice import NoticeStatus +from ted_sws.core.model.transform import FileResource, SPARQLTestSuite, MetadataConstraints, TransformationRuleSet, \ + SHACLTestSuite, TransformationTestData, MappingSuite +from tests import TEST_DATA_PATH + + +@pytest.fixture +def query_content(): + return """# title: Official name +# description: this is a description +PREFIX epo: +ASK +WHERE +{ + ?this epo:playedBy / epo:hasDefaultContactPoint / epo:hasFax ?organisationContactPointFax . +} + """ + + +@pytest.fixture +def query_content_without_description(): + return """ + + # title : Official name + + PREFIX epo: + ASK + WHERE + { + ?this epo:playedBy / epo:hasDefaultContactPoint / epo:hasFax ?organisationContactPointFax . + } + """ + + +@pytest.fixture +def query_content_with_xpath(): + return """ + # title: Official name + #xpath: //some/xpath/goes/here + # description: this is a description + + PREFIX epo: + ASK + WHERE + { + ?this epo:playedBy / epo:hasDefaultContactPoint / epo:hasFax ?organisationContactPointFax . + } + """ + + +@pytest.fixture +def rdf_file_content(): + path = TEST_DATA_PATH / "example.ttl" + return path.read_text() + + +@pytest.fixture +def shacl_file_content(): + path = TEST_DATA_PATH / "ePO_shacl_shapes.xml" + return path.read_text() + + +@pytest.fixture +def shacl_file_two_content(): + path = TEST_DATA_PATH / "ePO_shacl_shapes_two.xml" + return path.read_text() + + +@pytest.fixture +def list_of_shacl_files(shacl_file_content, shacl_file_two_content): + return [ + FileResource(file_name="shacl_file_one.xml", file_content=shacl_file_content), + FileResource(file_name="shacl_file_two.xml", file_content=shacl_file_two_content) + ] + + +@pytest.fixture +def shacl_file_one(shacl_file_content): + return FileResource(file_name="shacl_file_one.xml", file_content=shacl_file_content) + + +@pytest.fixture +def shacl_file_two(shacl_file_two_content): + return FileResource(file_name="shacl_file_two.xml", file_content=shacl_file_two_content) + + +@pytest.fixture +def shacl_file_with_error(): + return FileResource(file_name="shacl_file_with_error", file_content="something fishy") + + +@pytest.fixture +def validator_query(): + return """ +prefix dash: +prefix sh: +prefix message: + +SELECT ?focusNode ?message ?resultPath ?resultSeverity ?sourceConstraintComponent ?sourceShape ?value +WHERE { + ?vr a sh:ValidationResult . + ?vr sh:focusNode ?focusNode . + OPTIONAL { + ?vr sh:message ?message . + } + OPTIONAL { + ?vr sh:resultPath ?resultPath . + } + OPTIONAL { + ?vr sh:resultSeverity ?resultSeverity . + } + OPTIONAL { + ?vr sh:sourceConstraintComponent ?sourceConstraintComponent . + } + OPTIONAL { + ?vr sh:sourceShape ?sourceShape . + } + OPTIONAL { + ?vr sh:value ?value . + } +} +ORDER BY ?focusNode ?resultSeverity ?sourceConstraintComponent + """ + + +@pytest.fixture +def sparql_file_one(): + query = """# title: Title One +# description: this is a description +PREFIX epo: +ASK +WHERE +{ + ?this epo:playedBy / epo:hasDefaultContactPoint / epo:hasFax ?organisationContactPointFax . +} + """ + return FileResource(file_name="good_file", file_content=query) + + +@pytest.fixture +def sparql_file_two(): + query = """# title: Title Two +# description: this is a description +PREFIX epo: +ASK +WHERE +{ + ?this epo:IsRoleOf / epo:hasName ?value . +} + """ + return FileResource(file_name="better_file", file_content=query) + + +@pytest.fixture +def invalid_sparql_file(): + query = """# title: Title Two +# description: this is a description +ASK +WHERE +{ + ?this hasName ?value . +} + """ + return FileResource(file_name="some_file", file_content=query) + + +@pytest.fixture +def sparql_test_suite(sparql_file_one, sparql_file_two): + return SPARQLTestSuite(identifier="sparql_test_package", sparql_tests=[sparql_file_one, sparql_file_two]) + + +@pytest.fixture +def shacl_test_suite(shacl_file_one, shacl_file_two): + return SHACLTestSuite(identifier="shacl_test_package", shacl_tests=[shacl_file_one, shacl_file_two]) + + +@pytest.fixture +def bad_shacl_test_suite(shacl_file_one, shacl_file_with_error): + return SHACLTestSuite(identifier="bad_shacl_test_package", shacl_tests=[shacl_file_one, shacl_file_with_error]) + + +@pytest.fixture +def sparql_test_suite_with_invalid_query(invalid_sparql_file): + return SPARQLTestSuite(identifier="sparql_test_package", sparql_tests=[invalid_sparql_file]) + + +@pytest.fixture +def mapping_suite(sparql_test_suite, shacl_test_suite): + metadata_constrains = MetadataConstraints(constraints=dict()) + file_name = "fake_title.txt" + empty_file_resource = FileResource(file_name=file_name, file_content="no content here", original_name=file_name) + transformation_rule_set = TransformationRuleSet(resources=[empty_file_resource], + rml_mapping_rules=[empty_file_resource] + ) + shacl_test_suite = shacl_test_suite + sparql_test_suite = sparql_test_suite + transformation_test_data = TransformationTestData(test_data=[empty_file_resource]) + return MappingSuite(metadata_constraints=metadata_constrains, + transformation_rule_set=transformation_rule_set, + shacl_test_suites=[shacl_test_suite], + sparql_test_suites=[sparql_test_suite], + transformation_test_data=transformation_test_data + ) + + +@pytest.fixture +def path_to_file_system_repository(): + return TEST_DATA_PATH / "notice_transformer" / "test_repository" + + +@pytest.fixture +def notice_with_distilled_status(notice_2020, rdf_file_content): + notice_2020.update_status_to(new_status=NoticeStatus.NORMALISED_METADATA) + notice_2020.update_status_to(new_status=NoticeStatus.ELIGIBLE_FOR_TRANSFORMATION) + notice_2020.update_status_to(new_status=NoticeStatus.PREPROCESSED_FOR_TRANSFORMATION) + notice_2020.set_rdf_manifestation(rdf_manifestation=RDFManifestation(object_data=rdf_file_content)) + notice_2020.set_distilled_rdf_manifestation( + distilled_rdf_manifestation=RDFManifestation(object_data=rdf_file_content)) + notice_2020.update_status_to(new_status=NoticeStatus.DISTILLED) + + return notice_2020 + + +@pytest.fixture +def fake_repository_path(): + return TEST_DATA_PATH / "notice_validator" / "test_repository" + + +@pytest.fixture +def invalid_mapping_suite_id() -> str: + return "test_invalid_package" + + +@pytest.fixture +def cellar_sparql_endpoint(): + return "https://publications.europa.eu/webapi/rdf/sparql" From 3befd5c00430b182a11caedeafd1c80a59d6897e Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 00:55:57 +0300 Subject: [PATCH 37/65] Update test_notice_validator.feature --- .../test_notice_validator.feature | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/tests/features/notice_validator/test_notice_validator.feature b/tests/features/notice_validator/test_notice_validator.feature index a6a71cde7..495d88254 100644 --- a/tests/features/notice_validator/test_notice_validator.feature +++ b/tests/features/notice_validator/test_notice_validator.feature @@ -1,6 +1,20 @@ # Created by Stefan at 16.08.2022 -Feature: # Enter feature name here - # Enter feature description here +Feature: Notice Validator + The system is able to validate the notice xml and rdf manifestation. + + Scenario: SHACL validation + Given a notice + And a mapping suite package + And at least one SHACL test suite is available + And the notice status is DISTILLED + When the notice shacl validation is executed + Then the notice have SHACL validation reports for each RDF manifestation + + Scenario: SPARQL validation + Given a notice + And a mapping suite package + And at least one SPARQL test suite is available + And the notice status is DISTILLED + When the notice sparql validation is executed + Then the notice have SPARQL validation reports for each RDF manifestation - Scenario: # Enter scenario name here - # Enter steps here \ No newline at end of file From d4cb1b814965a09a548678c9e66bfa5045a11ace Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 00:56:00 +0300 Subject: [PATCH 38/65] Create test_notice_validator.py --- .../notice_validator/test_notice_validator.py | 111 ++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 tests/features/notice_validator/test_notice_validator.py diff --git a/tests/features/notice_validator/test_notice_validator.py b/tests/features/notice_validator/test_notice_validator.py new file mode 100644 index 000000000..05a193e3b --- /dev/null +++ b/tests/features/notice_validator/test_notice_validator.py @@ -0,0 +1,111 @@ +"""Notice Validator feature tests.""" + +from pytest_bdd import ( + given, + scenario, + then, + when, +) + +from ted_sws.core.model.manifestation import SHACLTestSuiteValidationReport, \ + SPARQLTestSuiteValidationReport +from ted_sws.core.model.notice import Notice, NoticeStatus +from ted_sws.core.model.transform import MappingSuite +from ted_sws.notice_validator.services.shacl_test_suite_runner import validate_notice_with_shacl_suite +from ted_sws.notice_validator.services.sparql_test_suite_runner import validate_notice_with_sparql_suite + + +@scenario('test_notice_validator.feature', 'SHACL validation') +def test_shacl_validation(): + """SHACL validation.""" + + +@scenario('test_notice_validator.feature', 'SPARQL validation') +def test_sparql_validation(): + """SPARQL validation.""" + + +@given('a mapping suite package') +def a_mapping_suite_package(mapping_suite): + """a mapping suite package.""" + assert mapping_suite + assert isinstance(mapping_suite, MappingSuite) + + +@given('a notice') +def a_notice(notice_with_distilled_status): + """a notice.""" + assert notice_with_distilled_status + assert isinstance(notice_with_distilled_status, Notice) + + +@given('at least one SHACL test suite is available') +def at_least_one_shacl_test_suite_is_available(mapping_suite): + """at least one SHACL test suite is available.""" + assert mapping_suite.shacl_test_suites + assert len(mapping_suite.shacl_test_suites) + + +@given('at least one SPARQL test suite is available') +def at_least_one_sparql_test_suite_is_available(mapping_suite): + """at least one SPARQL test suite is available.""" + assert mapping_suite.sparql_test_suites + assert len(mapping_suite.sparql_test_suites) + + +@given('the notice status is DISTILLED') +def the_notice_status_is_distilled(notice_with_distilled_status): + """the notice status is DISTILLED.""" + assert notice_with_distilled_status.status == NoticeStatus.DISTILLED + + +@when('the notice shacl validation is executed', target_fixture="shacl_validated_notice") +def the_notice_shacl_validation_is_executed(notice_with_distilled_status, mapping_suite): + """the notice shacl validation is executed.""" + validate_notice_with_shacl_suite(notice=notice_with_distilled_status, mapping_suite_package=mapping_suite) + return notice_with_distilled_status + + +@when('the notice sparql validation is executed', target_fixture="sparql_validated_notice") +def the_notice_sparql_validation_is_executed(notice_with_distilled_status, mapping_suite): + """the notice sparql validation is executed.""" + validate_notice_with_sparql_suite(notice=notice_with_distilled_status, mapping_suite_package=mapping_suite) + return notice_with_distilled_status + + +@then('the notice have SHACL validation reports for each RDF manifestation') +def the_notice_have_shacl_validation_reports_for_each_rdf_manifestation(shacl_validated_notice): + """the notice have SHACL validation reports for each RDF manifestation.""" + notice = shacl_validated_notice + rdf_validation = notice.get_rdf_validation() + distilled_rdf_validation = notice.get_distilled_rdf_validation() + assert notice.status == NoticeStatus.DISTILLED + assert isinstance(rdf_validation, list) + assert len(rdf_validation) == 1 + assert isinstance(rdf_validation[0], SHACLTestSuiteValidationReport) + assert rdf_validation[0].object_data + assert rdf_validation[0].validation_results + assert isinstance(distilled_rdf_validation, list) + assert len(distilled_rdf_validation) == 1 + assert isinstance(distilled_rdf_validation[0], SHACLTestSuiteValidationReport) + assert distilled_rdf_validation[0].object_data + assert distilled_rdf_validation[0].validation_results + + +@then('the notice have SPARQL validation reports for each RDF manifestation') +def the_notice_have_sparql_validation_reports_for_each_rdf_manifestation(sparql_validated_notice): + """the notice have SPARQL validation reports for each RDF manifestation.""" + notice = sparql_validated_notice + rdf_validation = notice.get_rdf_validation() + distilled_rdf_validation = notice.get_distilled_rdf_validation() + assert notice.status == NoticeStatus.DISTILLED + assert isinstance(rdf_validation, list) + assert len(rdf_validation) == 1 + assert isinstance(rdf_validation[0], SPARQLTestSuiteValidationReport) + assert rdf_validation[0].object_data + assert rdf_validation[0].validation_results + assert isinstance(distilled_rdf_validation, list) + assert len(distilled_rdf_validation) == 1 + assert isinstance(distilled_rdf_validation[0], SPARQLTestSuiteValidationReport) + assert distilled_rdf_validation[0].object_data + assert distilled_rdf_validation[0].validation_results From 0e0f7e222003c9b733005615a38f5ede3504c15c Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 00:56:04 +0300 Subject: [PATCH 39/65] Update __init__.py --- tests/old_features/__init__.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/tests/old_features/__init__.py b/tests/old_features/__init__.py index 54e38cdef..64b28bdf5 100644 --- a/tests/old_features/__init__.py +++ b/tests/old_features/__init__.py @@ -8,12 +8,4 @@ """ """ -def str2bool(value: str) -> bool: - """ - Parse a string value and cast it into its boolean value - :param value: - :return: - """ - if value in ["y", "yes", "t", "true", "on", "1"]: return True - if value in ["n", "no", "f", "false", "off", "0"]: return False - raise ValueError("boolean value unrecognised") + From 32f174e019d4ce3a7dfc5cf64e06d2a37d4e614b Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 00:56:07 +0300 Subject: [PATCH 40/65] Update conftest.py --- tests/old_features/model/conftest.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/old_features/model/conftest.py b/tests/old_features/model/conftest.py index d7629ce38..3d744070f 100644 --- a/tests/old_features/model/conftest.py +++ b/tests/old_features/model/conftest.py @@ -64,3 +64,6 @@ def transformation_eligible_notice(indexed_notice, normalised_metadata_dict) -> indexed_notice.update_status_to(NoticeStatus.ELIGIBLE_FOR_TRANSFORMATION) indexed_notice.update_status_to(NoticeStatus.PREPROCESSED_FOR_TRANSFORMATION) return indexed_notice + + + From ebca9deeaccb0f6cebd54fc51b0a2d7ac16c5fd3 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 00:56:11 +0300 Subject: [PATCH 41/65] Update test_notice_operations.py --- tests/old_features/model/test_notice_operations.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/old_features/model/test_notice_operations.py b/tests/old_features/model/test_notice_operations.py index 0f73b76d0..8e0b59d4e 100644 --- a/tests/old_features/model/test_notice_operations.py +++ b/tests/old_features/model/test_notice_operations.py @@ -4,7 +4,17 @@ from ted_sws.core.model.manifestation import RDFManifestation, RDFValidationManifestation, METSManifestation from ted_sws.core.model.metadata import NormalisedMetadata from ted_sws.core.model.notice import NoticeStatus -from tests.features import str2bool + + +def str2bool(value: str) -> bool: + """ + Parse a string value and cast it into its boolean value + :param value: + :return: + """ + if value in ["y", "yes", "t", "true", "on", "1"]: return True + if value in ["n", "no", "f", "false", "off", "0"]: return False + raise ValueError("boolean value unrecognised") @scenario("test_notice_operations.feature", "add normalised metadata") From cf8db6067de76a873cccb1e5123b4da13bb090f3 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 19:44:05 +0300 Subject: [PATCH 42/65] Update supra_notice.py --- ted_sws/core/model/supra_notice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ted_sws/core/model/supra_notice.py b/ted_sws/core/model/supra_notice.py index de8ab3d40..94e69570d 100644 --- a/ted_sws/core/model/supra_notice.py +++ b/ted_sws/core/model/supra_notice.py @@ -45,7 +45,7 @@ class DailySupraNotice(SupraNotice): """ This is an aggregate over the notices published in TED in a specific day. """ - notice_publication_date: date + notice_fetched_date: date validation_report: Optional[SupraNoticeValidationReport] From b465103e2ef8550d893dd9a0202491565c695056 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 19:44:08 +0300 Subject: [PATCH 43/65] Update supra_notice_repository.py --- ted_sws/data_manager/adapters/supra_notice_repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ted_sws/data_manager/adapters/supra_notice_repository.py b/ted_sws/data_manager/adapters/supra_notice_repository.py index a243d1ba1..a458783ea 100644 --- a/ted_sws/data_manager/adapters/supra_notice_repository.py +++ b/ted_sws/data_manager/adapters/supra_notice_repository.py @@ -7,7 +7,7 @@ from ted_sws.core.model.supra_notice import DailySupraNotice from ted_sws.data_manager.adapters.repository_abc import DailySupraNoticeRepositoryABC -DAILY_SUPRA_NOTICE_ID = "notice_publication_date" +DAILY_SUPRA_NOTICE_ID = "notice_fetched_date" class DailySupraNoticeRepository(DailySupraNoticeRepositoryABC): From 69eaae8f16cd5eecadd656be732ee0e899e57398 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 19:44:11 +0300 Subject: [PATCH 44/65] Update test_supra_notice_repository.py --- tests/unit/data_manager/test_supra_notice_repository.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/unit/data_manager/test_supra_notice_repository.py b/tests/unit/data_manager/test_supra_notice_repository.py index 199af798a..984bfcc68 100644 --- a/tests/unit/data_manager/test_supra_notice_repository.py +++ b/tests/unit/data_manager/test_supra_notice_repository.py @@ -6,17 +6,17 @@ def test_daily_supra_notice_repository(mongodb_client, daily_supra_notice): daily_supra_notice_repository = DailySupraNoticeRepository(mongodb_client=mongodb_client) daily_supra_notice_repository.add(daily_supra_notice=daily_supra_notice) - result_supra_notice = daily_supra_notice_repository.get(reference=daily_supra_notice.notice_publication_date) + result_supra_notice = daily_supra_notice_repository.get(reference=daily_supra_notice.notice_fetched_date) assert result_supra_notice assert len(result_supra_notice.notice_ids) == 3 - assert result_supra_notice.notice_publication_date == daily_supra_notice.notice_publication_date + assert result_supra_notice.notice_fetched_date == daily_supra_notice.notice_fetched_date assert result_supra_notice.created_at == daily_supra_notice.created_at assert result_supra_notice.notice_ids == daily_supra_notice.notice_ids daily_supra_notice_repository.update(daily_supra_notice=daily_supra_notice) - result_supra_notice = daily_supra_notice_repository.get(reference=daily_supra_notice.notice_publication_date) + result_supra_notice = daily_supra_notice_repository.get(reference=daily_supra_notice.notice_fetched_date) assert result_supra_notice assert len(result_supra_notice.notice_ids) == 3 - assert result_supra_notice.notice_publication_date == daily_supra_notice.notice_publication_date + assert result_supra_notice.notice_fetched_date == daily_supra_notice.notice_fetched_date assert result_supra_notice.created_at == daily_supra_notice.created_at assert result_supra_notice.notice_ids == daily_supra_notice.notice_ids result = list(daily_supra_notice_repository.list()) From da4638b94f70cf65453036a7a2eb160bc332594e Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 19:44:14 +0300 Subject: [PATCH 45/65] Update test_daily_supra_notice_manager.py --- .../test_daily_supra_notice_manager.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/unit/supra_notice_manager/test_daily_supra_notice_manager.py b/tests/unit/supra_notice_manager/test_daily_supra_notice_manager.py index d63fb756f..781182b94 100644 --- a/tests/unit/supra_notice_manager/test_daily_supra_notice_manager.py +++ b/tests/unit/supra_notice_manager/test_daily_supra_notice_manager.py @@ -6,17 +6,17 @@ def test_daily_supra_notice_manager(mongodb_client, daily_supra_notice_repository): notice_ids = ["1", "2", "3"] - notice_publication_date = date(2020, 1, 1) + notice_fetched_date = date(2020, 1, 1) create_and_store_in_mongo_db_daily_supra_notice(notice_ids=notice_ids, mongodb_client=mongodb_client, - notice_publication_date=notice_publication_date + notice_publication_date=notice_fetched_date ) for result in daily_supra_notice_repository.list(): assert result - result = daily_supra_notice_repository.get(reference=notice_publication_date) + result = daily_supra_notice_repository.get(reference=notice_fetched_date) assert result - assert result.notice_publication_date == notice_publication_date + assert result.notice_fetched_date == notice_fetched_date notice_ids.append("4") result.notice_ids = notice_ids daily_supra_notice_repository.update(daily_supra_notice=result) - result = daily_supra_notice_repository.get(reference=notice_publication_date) + result = daily_supra_notice_repository.get(reference=notice_fetched_date) assert result.notice_ids == notice_ids From bdd5509792419796dbb28b651f70cdc1e3bad57e Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 19:46:27 +0300 Subject: [PATCH 46/65] Update selector_daily_fetch_orchestrator.py --- dags/selector_daily_fetch_orchestrator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dags/selector_daily_fetch_orchestrator.py b/dags/selector_daily_fetch_orchestrator.py index 7f17c5f96..065dcd782 100644 --- a/dags/selector_daily_fetch_orchestrator.py +++ b/dags/selector_daily_fetch_orchestrator.py @@ -48,7 +48,7 @@ def fetch_notice_from_ted(): request_api=TedRequestAPI())).fetch_notices_by_date_wild_card( wildcard_date=current_datetime_wildcard) create_and_store_in_mongo_db_daily_supra_notice(notice_ids=notice_ids, mongodb_client=mongodb_client, - notice_publication_date=notice_publication_date) + notice_fetched_date=notice_publication_date) return notice_ids @task From b0e316dd1d31fe774525cf69cbe2a42677038827 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 19:46:29 +0300 Subject: [PATCH 47/65] Update daily_supra_notice_manager.py --- .../services/daily_supra_notice_manager.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ted_sws/supra_notice_manager/services/daily_supra_notice_manager.py b/ted_sws/supra_notice_manager/services/daily_supra_notice_manager.py index d55668ba4..2cee7a7d0 100644 --- a/ted_sws/supra_notice_manager/services/daily_supra_notice_manager.py +++ b/ted_sws/supra_notice_manager/services/daily_supra_notice_manager.py @@ -7,15 +7,15 @@ def create_and_store_in_mongo_db_daily_supra_notice(notice_ids: List[str], mongodb_client: MongoClient, - notice_publication_date: date = date.today()): + notice_fetched_date: date = date.today()): """ This function creates and stores a DailySupraNotice in MongoDB. :param notice_ids: :param mongodb_client: - :param notice_publication_date: + :param notice_fetched_date: :return: """ daily_supra_notice_repository = DailySupraNoticeRepository(mongodb_client=mongodb_client) - daily_supra_notice = DailySupraNotice(notice_publication_date=notice_publication_date, + daily_supra_notice = DailySupraNotice(notice_fetched_date=notice_fetched_date, notice_ids=notice_ids) daily_supra_notice_repository.add(daily_supra_notice=daily_supra_notice) From 141bf75fa0407d2c0b38ff3c29ebbd35952c388a Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 19:46:32 +0300 Subject: [PATCH 48/65] Update test_daily_supra_notice_manager.py --- .../supra_notice_manager/test_daily_supra_notice_manager.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/unit/supra_notice_manager/test_daily_supra_notice_manager.py b/tests/unit/supra_notice_manager/test_daily_supra_notice_manager.py index 781182b94..423325737 100644 --- a/tests/unit/supra_notice_manager/test_daily_supra_notice_manager.py +++ b/tests/unit/supra_notice_manager/test_daily_supra_notice_manager.py @@ -8,8 +8,7 @@ def test_daily_supra_notice_manager(mongodb_client, daily_supra_notice_repositor notice_ids = ["1", "2", "3"] notice_fetched_date = date(2020, 1, 1) create_and_store_in_mongo_db_daily_supra_notice(notice_ids=notice_ids, mongodb_client=mongodb_client, - notice_publication_date=notice_fetched_date - ) + notice_fetched_date=notice_fetched_date) for result in daily_supra_notice_repository.list(): assert result result = daily_supra_notice_repository.get(reference=notice_fetched_date) From 4006f5d8d7eb17b6085102d1ae22da4df42eff90 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 23:08:39 +0300 Subject: [PATCH 49/65] Create docker-compose.yaml --- infra/airflow-cluster/docker-compose.yaml | 350 ++++++++++++++++++++++ 1 file changed, 350 insertions(+) create mode 100644 infra/airflow-cluster/docker-compose.yaml diff --git a/infra/airflow-cluster/docker-compose.yaml b/infra/airflow-cluster/docker-compose.yaml new file mode 100644 index 000000000..bd71714f3 --- /dev/null +++ b/infra/airflow-cluster/docker-compose.yaml @@ -0,0 +1,350 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Basic Airflow cluster configuration for CeleryExecutor with Redis and PostgreSQL. +# +# WARNING: This configuration is for local development. Do not use it in a production deployment. +# +# This configuration supports basic configuration using environment variables or an .env file +# The following variables are supported: +# +# AIRFLOW_IMAGE_NAME - Docker image name used to run Airflow. +# Default: apache/airflow:|version| +# AIRFLOW_UID - User ID in Airflow containers +# Default: 50000 +# Those configurations are useful mostly in case of standalone testing/running Airflow in test/try-out mode +# +# _AIRFLOW_WWW_USER_USERNAME - Username for the administrator account (if requested). +# Default: airflow +# _AIRFLOW_WWW_USER_PASSWORD - Password for the administrator account (if requested). +# Default: airflow +# _PIP_ADDITIONAL_REQUIREMENTS - Additional PIP requirements to add when starting all containers. +# Default: '' +# +# Feel free to modify this file to suit your needs. +--- +version: '3' +x-airflow-common: + &airflow-common + # In order to add custom dependencies or upgrade provider packages you can use your extended image. + # Comment the image line, place your Dockerfile in the directory where you placed the docker-compose.yaml + # and uncomment the "build" line below, Then run `docker-compose build` to build the images. + #image: ${AIRFLOW_IMAGE_NAME:-apache/airflow:2.2.2-python3.8} + #build: . + image: meaningfy/airflow:latest + env_file: + - ../../.env + environment: + &airflow-common-env + AIRFLOW__CORE__PARALLELISM: 64 + AIRFLOW__CORE__MAX_ACTIVE_TASKS_PER_DAG: 32 + AIRFLOW__SCHEDULER__PARSING_PROCESSES: 4 + AIRFLOW__CELERY__WORKER_CONCURRENCY: 8 + AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@ted-data.eu/airflow + AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@ted-data.eu/airflow + AIRFLOW__CELERY__BROKER_URL: redis://:@ted-data.eu:6379/0 + #AIRFLOW__CORE__FERNET_KEY: ${FERNET_KEY} + #AIRFLOW__WEBSERVER__SECRET_KEY: ${SECRET_KEY} + IS_PRIME_ENV: 'true' + AIRFLOW__CORE__EXECUTOR: CeleryExecutor + #AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow + #AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@postgres/airflow + #AIRFLOW__CELERY__BROKER_URL: redis://:@redis:6379/0 + AIRFLOW__CORE__FERNET_KEY: '' + AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true' + AIRFLOW__CORE__ENABLE_XCOM_PICKLING: "true" + AIRFLOW__CORE__LOAD_EXAMPLES: 'false' + AIRFLOW__API__AUTH_BACKEND: 'airflow.api.auth.backend.basic_auth' + _PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-} + VAULT_TOKEN: ${VAULT_TOKEN} + VAULT_ADDR: ${VAULT_ADDR} + ENVIRONMENT: ${ENVIRONMENT} + PYTHONPATH: /opt/airflow/ + AIRFLOW_HOME: /opt/airflow + RML_MAPPER_PATH: /opt/airflow/.rmlmapper/rmlmapper.jar + XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.6.jar + DAG_LOGGER_CONFIG_HANDLERS: ${DAG_LOGGER_CONFIG_HANDLERS} + extra_hosts: + - "hermes-worker:${HERMES_IP_ADDRESS}" + - "srv-worker:${SRV_IP_ADDRESS}" + volumes: +# - ./config/airflow.cfg:/opt/airflow/airflow.cfg + - ${AIRFLOW_INFRA_FOLDER}/.env:/opt/airflow/.env + - ${AIRFLOW_INFRA_FOLDER}/dags:/opt/airflow/dags + - ${AIRFLOW_INFRA_FOLDER}/logs:/opt/airflow/logs + - ${AIRFLOW_INFRA_FOLDER}/plugins:/opt/airflow/plugins + - ${AIRFLOW_INFRA_FOLDER}/ted_sws:/opt/airflow/ted_sws + - ${AIRFLOW_INFRA_FOLDER}/tests:/opt/airflow/tests + user: "${AIRFLOW_UID:-50000}:0" + command: bash -c "export PYTHONPATH='/opt/airflow/'" + depends_on: + &airflow-common-depends-on + redis: + condition: service_healthy + postgres: + condition: service_healthy + +services: + postgres: + image: postgres:13 + container_name: postgres-airflow-${ENVIRONMENT} + environment: + POSTGRES_USER: airflow + POSTGRES_PASSWORD: airflow + POSTGRES_DB: airflow + ports: + - "5432:5432" + volumes: + - postgres-db-volume:/var/lib/postgresql/data + healthcheck: + test: ["CMD", "pg_isready", "-U", "airflow"] + interval: 5s + retries: 5 + restart: unless-stopped + networks: + - airflow + + redis: + image: redis:latest + container_name: redis-airflow-${ENVIRONMENT} + expose: + - 6379:6379 + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 5s + timeout: 30s + retries: 50 + restart: unless-stopped + networks: + - airflow + + airflow-webserver: + <<: *airflow-common + container_name: airflow-webserver-${ENVIRONMENT} + command: webserver + restart: unless-stopped + networks: + - airflow + - proxy-net + labels: + #### Labels define the behavior and rules of the traefik proxy for this container #### + - "traefik.enable=true" # <== Enable traefik to proxy this container + - "traefik.http.routers.${ENVIRONMENT}-airflow.rule=Host(`airflow.${SUBDOMAIN}${DOMAIN}`)" # <== Your Domain Name goes here for the http rule + - "traefik.http.routers.${ENVIRONMENT}-airflow.entrypoints=web" # <== Defining the entrypoint for http, **ref: line 30 + + - "traefik.http.routers.airflow.middlewares=redirect@file" # <== This is a middleware to redirect to https + - "traefik.http.routers.${ENVIRONMENT}-airflow-secured.rule=Host(`airflow.${SUBDOMAIN}${DOMAIN}`)" # <== Your Domain Name for the https rule + - "traefik.http.routers.${ENVIRONMENT}-airflow-secured.entrypoints=web-secured" # <== Defining entrypoint for https, **ref: line 31 + - "traefik.http.routers.${ENVIRONMENT}-airflow-secured.tls.certresolver=mytlschallenge" # <== Defining certsresolvers for https + - "traefik.http.services.${ENVIRONMENT}-airflow-secured.loadbalancer.server.port=8080" + + - "traefik.http.routers.${ENVIRONMENT}-airflow.middlewares=admin-auth" + - "traefik.http.middlewares.admin-auth.basicauth.users=admin:$$apr1$$O4NQPpRP$$P5LlBzvwUi3UuuRU9KuxY." + + depends_on: + <<: *airflow-common-depends-on + airflow-init: + condition: service_completed_successfully + + + + + airflow-scheduler: + <<: *airflow-common + container_name: airflow-scheduler-${ENVIRONMENT} + command: scheduler + healthcheck: + test: ["CMD-SHELL", 'airflow jobs check --job-type SchedulerJob --hostname "$${HOSTNAME}"'] + interval: 10s + timeout: 10s + retries: 5 + restart: unless-stopped + networks: + - airflow + - common-ext + depends_on: + <<: *airflow-common-depends-on + airflow-init: + condition: service_completed_successfully + + airflow-worker: + <<: *airflow-common + container_name: airflow-worker-${ENVIRONMENT} + command: celery worker + hostname: ${WORKER_HOSTNAME} + ports: + - "8793:8793" + healthcheck: + test: ["CMD-SHELL",'celery --app airflow.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}"'] + interval: 10s + timeout: 10s + retries: 5 + environment: + <<: *airflow-common-env + # Required to handle warm shutdown of the celery workers properly + # See https://airflow.apache.org/docs/docker-stack/entrypoint.html#signal-propagation + DUMB_INIT_SETSID: "0" + restart: unless-stopped + networks: + - airflow + - common-ext + depends_on: + <<: *airflow-common-depends-on + airflow-init: + condition: service_completed_successfully + + airflow-triggerer: + <<: *airflow-common + container_name: airflow-triggerer-${ENVIRONMENT} + command: triggerer + healthcheck: + test: ["CMD-SHELL", 'airflow jobs check --job-type TriggererJob --hostname "$${HOSTNAME}"'] + interval: 10s + timeout: 10s + retries: 5 + restart: unless-stopped + networks: + - airflow + - common-ext + depends_on: + <<: *airflow-common-depends-on + airflow-init: + condition: service_completed_successfully + + airflow-init: + <<: *airflow-common + entrypoint: /bin/bash + # yamllint disable rule:line-length + command: + - -c + - | + function ver() { + printf "%04d%04d%04d%04d" $${1//./ } + } + airflow_version=$$(gosu airflow airflow version) + airflow_version_comparable=$$(ver $${airflow_version}) + min_airflow_version=2.2.0 + min_airflow_version_comparable=$$(ver $${min_airflow_version}) + if (( airflow_version_comparable < min_airflow_version_comparable )); then + echo + echo -e "\033[1;31mERROR!!!: Too old Airflow version $${airflow_version}!\e[0m" + echo "The minimum Airflow version supported: $${min_airflow_version}. Only use this or higher!" + echo + exit 1 + fi + if [[ -z "${AIRFLOW_UID}" ]]; then + echo + echo -e "\033[1;33mWARNING!!!: AIRFLOW_UID not set!\e[0m" + echo "If you are on Linux, you SHOULD follow the instructions below to set " + echo "AIRFLOW_UID environment variable, otherwise files will be owned by root." + echo "For other operating systems you can get rid of the warning with manually created .env file:" + echo " See: https://airflow.apache.org/docs/apache-airflow/stable/start/docker.html#setting-the-right-airflow-user" + echo + fi + one_meg=1048576 + mem_available=$$(($$(getconf _PHYS_PAGES) * $$(getconf PAGE_SIZE) / one_meg)) + cpus_available=$$(grep -cE 'cpu[0-9]+' /proc/stat) + disk_available=$$(df / | tail -1 | awk '{print $$4}') + warning_resources="false" + if (( mem_available < 4000 )) ; then + echo + echo -e "\033[1;33mWARNING!!!: Not enough memory available for Docker.\e[0m" + echo "At least 4GB of memory required. You have $$(numfmt --to iec $$((mem_available * one_meg)))" + echo + warning_resources="true" + fi + if (( cpus_available < 2 )); then + echo + echo -e "\033[1;33mWARNING!!!: Not enough CPUS available for Docker.\e[0m" + echo "At least 2 CPUs recommended. You have $${cpus_available}" + echo + warning_resources="true" + fi + if (( disk_available < one_meg * 10 )); then + echo + echo -e "\033[1;33mWARNING!!!: Not enough Disk space available for Docker.\e[0m" + echo "At least 10 GBs recommended. You have $$(numfmt --to iec $$((disk_available * 1024 )))" + echo + warning_resources="true" + fi + if [[ $${warning_resources} == "true" ]]; then + echo + echo -e "\033[1;33mWARNING!!!: You have not enough resources to run Airflow (see above)!\e[0m" + echo "Please follow the instructions to increase amount of resources available:" + echo " https://airflow.apache.org/docs/apache-airflow/stable/start/docker.html#before-you-begin" + echo + fi + exec /entrypoint airflow version + # yamllint enable rule:line-length + environment: + <<: *airflow-common-env + _AIRFLOW_DB_UPGRADE: 'true' + _AIRFLOW_WWW_USER_CREATE: 'true' + _AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow} + _AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-airflow} + user: "0:0" + volumes: + - .:/sources + networks: + - airflow + + airflow-cli: + <<: *airflow-common + profiles: + - debug + environment: + <<: *airflow-common-env + CONNECTION_CHECK_MAX_COUNT: "0" + # Workaround for entrypoint issue. See: https://github.com/apache/airflow/issues/16252 + command: + - bash + - -c + - airflow + networks: + - airflow + + flower: + <<: *airflow-common + container_name: airflow-flower-${ENVIRONMENT} + command: celery flower + restart: unless-stopped + networks: + - airflow + - proxy-net + depends_on: + <<: *airflow-common-depends-on + airflow-init: + condition: service_completed_successfully + + +volumes: + postgres-db-volume: + name: airflow-${ENVIRONMENT} + + +networks: + airflow: + internal: true + name: airflow-${ENVIRONMENT} + networks: + common-ext: + external: + name: common-ext-${ENVIRONMENT} + proxy-net: + external: + name: proxy-net From 9f930b66ad9ea0892c055b366af25b59242ca2c3 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 23:08:42 +0300 Subject: [PATCH 50/65] Create Dockerfile --- infra/airflow-cluster/Dockerfile | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 infra/airflow-cluster/Dockerfile diff --git a/infra/airflow-cluster/Dockerfile b/infra/airflow-cluster/Dockerfile new file mode 100644 index 000000000..e12913c66 --- /dev/null +++ b/infra/airflow-cluster/Dockerfile @@ -0,0 +1,25 @@ +FROM apache/airflow:2.2.5-python3.8 + +# quick sudo +USER root +RUN apt-get update && apt-get install make && apt-get install build-essential git -y && apt-get install -y wget && apt-get install -y unzip +RUN apt install -y default-jre + +# back to normal user +USER airflow + +# requirements.txt shall be made availble from the **ted-sws** GitHub repository +COPY requirements.txt /opt/airflow + +# working in the /opt/airflow +WORKDIR /opt/airflow +RUN mkdir -p ./.rmlmapper +RUN wget -c https://api.bitbucket.org/2.0/repositories/Dragos0000/rml-mapper/src/master/rmlmapper.jar -P ./.rmlmapper + + +RUN wget -c https://kumisystems.dl.sourceforge.net/project/saxon/Saxon-HE/10/Java/SaxonHE10-6J.zip -P .saxon/ +RUN cd .saxon && unzip SaxonHE10-6J.zip && rm -rf SaxonHE10-6J.zip + + +RUN pip install --upgrade pip +RUN pip install --no-cache-dir -r requirements.txt --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.2.5/constraints-no-providers-3.8.txt" From 056ba12db3fb33ef64325f5ccc24ec1b45f1735f Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 23:08:45 +0300 Subject: [PATCH 51/65] Update Makefile --- Makefile | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/Makefile b/Makefile index ff5ab175e..654a0754a 100644 --- a/Makefile +++ b/Makefile @@ -91,11 +91,41 @@ create-env-airflow: @ chmod 777 ${AIRFLOW_INFRA_FOLDER}/logs ${AIRFLOW_INFRA_FOLDER}/plugins ${AIRFLOW_INFRA_FOLDER}/.env @ cp requirements.txt ./infra/airflow/ +create-env-airflow-cluster: + @ echo -e "$(BUILD_PRINT) Create Airflow env $(END_BUILD_PRINT)" + @ echo -e "$(BUILD_PRINT) ${AIRFLOW_INFRA_FOLDER} ${ENVIRONMENT} $(END_BUILD_PRINT)" + @ mkdir -p ${AIRFLOW_INFRA_FOLDER}/logs ${AIRFLOW_INFRA_FOLDER}/plugins ${AIRFLOW_INFRA_FOLDER}/.env + @ ln -s -f -n ${PROJECT_PATH}/dags ${AIRFLOW_INFRA_FOLDER}/dags + @ ln -s -f -n ${PROJECT_PATH}/ted_sws ${AIRFLOW_INFRA_FOLDER}/ted_sws + @ chmod 777 ${AIRFLOW_INFRA_FOLDER}/logs ${AIRFLOW_INFRA_FOLDER}/plugins ${AIRFLOW_INFRA_FOLDER}/.env + @ cp requirements.txt ./infra/airflow-cluster/ + build-airflow: guard-ENVIRONMENT create-env-airflow build-externals @ echo -e "$(BUILD_PRINT) Build Airflow services $(END_BUILD_PRINT)" @ docker build -t meaningfy/airflow ./infra/airflow/ @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow/docker-compose.yaml --env-file ${ENV_FILE} up -d --force-recreate +build-airflow-cluster: guard-ENVIRONMENT create-env-airflow-cluster build-externals + @ echo -e "$(BUILD_PRINT) Build Airflow services $(END_BUILD_PRINT)" + @ docker build -t meaningfy/airflow ./infra/airflow-cluster/ + @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} up airflow-init + +start-airflow-cluster: build-externals + @ echo -e "$(BUILD_PRINT)Starting Airflow services $(END_BUILD_PRINT)" + @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} up -d --force-recreate airflow-webserver airflow-scheduler airflow-triggerer flower + +start-airflow-cluster-worker: build-externals + @ echo -e "$(BUILD_PRINT)Starting Airflow services $(END_BUILD_PRINT)" + @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} up -d --force-recreate airflow-worker + +stop-airflow-cluster: + @ echo -e "$(BUILD_PRINT)Stopping Airflow Cluster $(END_BUILD_PRINT)" + @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} down airflow-webserver airflow-scheduler airflow-triggerer flower + +stop-airflow-cluster-worker: + @ echo -e "$(BUILD_PRINT)Stopping Airflow Cluster Worker $(END_BUILD_PRINT)" + @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} down airflow-worker + start-airflow: build-externals @ echo -e "$(BUILD_PRINT)Starting Airflow services $(END_BUILD_PRINT)" @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow/docker-compose.yaml --env-file ${ENV_FILE} up -d From 8e75232593953565a7b60b3ceebf825f908442bc Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 23:12:04 +0300 Subject: [PATCH 52/65] Update docker-compose.yaml --- infra/airflow-cluster/docker-compose.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/infra/airflow-cluster/docker-compose.yaml b/infra/airflow-cluster/docker-compose.yaml index bd71714f3..121e398b5 100644 --- a/infra/airflow-cluster/docker-compose.yaml +++ b/infra/airflow-cluster/docker-compose.yaml @@ -58,13 +58,9 @@ x-airflow-common: AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@ted-data.eu/airflow AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@ted-data.eu/airflow AIRFLOW__CELERY__BROKER_URL: redis://:@ted-data.eu:6379/0 - #AIRFLOW__CORE__FERNET_KEY: ${FERNET_KEY} #AIRFLOW__WEBSERVER__SECRET_KEY: ${SECRET_KEY} IS_PRIME_ENV: 'true' AIRFLOW__CORE__EXECUTOR: CeleryExecutor - #AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow - #AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@postgres/airflow - #AIRFLOW__CELERY__BROKER_URL: redis://:@redis:6379/0 AIRFLOW__CORE__FERNET_KEY: '' AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true' AIRFLOW__CORE__ENABLE_XCOM_PICKLING: "true" From 296ebf7a2dc11b57df212388e89114ee570d55f1 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 23:23:03 +0300 Subject: [PATCH 53/65] Update conftest.py --- tests/unit/data_manager/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/data_manager/conftest.py b/tests/unit/data_manager/conftest.py index 05dcc5151..f37c6bf06 100644 --- a/tests/unit/data_manager/conftest.py +++ b/tests/unit/data_manager/conftest.py @@ -46,4 +46,4 @@ def fake_mapping_suite(): @pytest.fixture def daily_supra_notice(): - return DailySupraNotice(notice_ids=["1", "2", "3"], notice_publication_date=date.today()) + return DailySupraNotice(notice_ids=["1", "2", "3"], notice_fetched_date=date.today()) From f5f7b4d9d9be14b7f6ad5140e8c3c1a9183cf7e3 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 23:39:56 +0300 Subject: [PATCH 54/65] Update .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index f915567f1..b05e00be7 100644 --- a/.gitignore +++ b/.gitignore @@ -121,3 +121,4 @@ tests/test_data/notice_transformer/mapping_suite_processor_repository/test_packa tests/test_data/notice_transformer/mapping_suite_processor_repository/test_package_fake/transformation/resources/languages.json tests/test_data/notice_transformer/mapping_suite_processor_repository/test_package_fake/transformation/resources/main_activity.json tests/test_data/notice_transformer/mapping_suite_processor_repository/test_package_fake/transformation/resources/nuts.json +infra/airflow-cluster/requirements.txt From 4d053227a8523d266939ac9dacb4a9a27cc090ac Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 23:40:06 +0300 Subject: [PATCH 55/65] Update docker-compose.yaml --- infra/airflow-cluster/docker-compose.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/infra/airflow-cluster/docker-compose.yaml b/infra/airflow-cluster/docker-compose.yaml index 121e398b5..563e2a6cc 100644 --- a/infra/airflow-cluster/docker-compose.yaml +++ b/infra/airflow-cluster/docker-compose.yaml @@ -118,8 +118,10 @@ services: redis: image: redis:latest container_name: redis-airflow-${ENVIRONMENT} + ports: + - "6379:6379" expose: - - 6379:6379 + - 6379 healthcheck: test: ["CMD", "redis-cli", "ping"] interval: 5s From accb6b378bfb235082d253867f5e0d5250b840cb Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 23:48:15 +0300 Subject: [PATCH 56/65] Update Makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 654a0754a..1dbc5fd38 100644 --- a/Makefile +++ b/Makefile @@ -108,7 +108,7 @@ build-airflow: guard-ENVIRONMENT create-env-airflow build-externals build-airflow-cluster: guard-ENVIRONMENT create-env-airflow-cluster build-externals @ echo -e "$(BUILD_PRINT) Build Airflow services $(END_BUILD_PRINT)" @ docker build -t meaningfy/airflow ./infra/airflow-cluster/ - @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} up airflow-init + @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} up -d airflow-init start-airflow-cluster: build-externals @ echo -e "$(BUILD_PRINT)Starting Airflow services $(END_BUILD_PRINT)" From f1b7f0f9cbf172f4855dc24ab1b175d0895bc0b2 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Mon, 22 Aug 2022 23:58:14 +0300 Subject: [PATCH 57/65] Update Makefile --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 1dbc5fd38..38b95b42d 100644 --- a/Makefile +++ b/Makefile @@ -112,19 +112,19 @@ build-airflow-cluster: guard-ENVIRONMENT create-env-airflow-cluster build-extern start-airflow-cluster: build-externals @ echo -e "$(BUILD_PRINT)Starting Airflow services $(END_BUILD_PRINT)" - @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} up -d --force-recreate airflow-webserver airflow-scheduler airflow-triggerer flower + @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} up -d --force-recreate airflow-webserver-${ENVIRONMENT} airflow-scheduler-${ENVIRONMENT} airflow-triggerer-${ENVIRONMENT} airflow-flower-${ENVIRONMENT} start-airflow-cluster-worker: build-externals @ echo -e "$(BUILD_PRINT)Starting Airflow services $(END_BUILD_PRINT)" - @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} up -d --force-recreate airflow-worker + @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} up -d --force-recreate airflow-worker-${ENVIRONMENT} stop-airflow-cluster: @ echo -e "$(BUILD_PRINT)Stopping Airflow Cluster $(END_BUILD_PRINT)" - @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} down airflow-webserver airflow-scheduler airflow-triggerer flower + @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} down airflow-webserver-${ENVIRONMENT} airflow-scheduler-${ENVIRONMENT} airflow-triggerer-${ENVIRONMENT} airflow-flower-${ENVIRONMENT} stop-airflow-cluster-worker: @ echo -e "$(BUILD_PRINT)Stopping Airflow Cluster Worker $(END_BUILD_PRINT)" - @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} down airflow-worker + @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} down airflow-worker-${ENVIRONMENT} start-airflow: build-externals @ echo -e "$(BUILD_PRINT)Starting Airflow services $(END_BUILD_PRINT)" From fa4cc6a2c57c2c01986b5c4858f2bd78ad1a05d3 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Tue, 23 Aug 2022 00:01:50 +0300 Subject: [PATCH 58/65] Update Makefile --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 38b95b42d..473643e98 100644 --- a/Makefile +++ b/Makefile @@ -112,19 +112,19 @@ build-airflow-cluster: guard-ENVIRONMENT create-env-airflow-cluster build-extern start-airflow-cluster: build-externals @ echo -e "$(BUILD_PRINT)Starting Airflow services $(END_BUILD_PRINT)" - @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} up -d --force-recreate airflow-webserver-${ENVIRONMENT} airflow-scheduler-${ENVIRONMENT} airflow-triggerer-${ENVIRONMENT} airflow-flower-${ENVIRONMENT} + @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} up -d --force-recreate airflow-webserver airflow-scheduler airflow-triggerer airflow-flower start-airflow-cluster-worker: build-externals @ echo -e "$(BUILD_PRINT)Starting Airflow services $(END_BUILD_PRINT)" - @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} up -d --force-recreate airflow-worker-${ENVIRONMENT} + @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} up -d --force-recreate airflow-worker stop-airflow-cluster: @ echo -e "$(BUILD_PRINT)Stopping Airflow Cluster $(END_BUILD_PRINT)" - @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} down airflow-webserver-${ENVIRONMENT} airflow-scheduler-${ENVIRONMENT} airflow-triggerer-${ENVIRONMENT} airflow-flower-${ENVIRONMENT} + @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} down airflow-webserver airflow-scheduler airflow-triggerer airflow-flower stop-airflow-cluster-worker: @ echo -e "$(BUILD_PRINT)Stopping Airflow Cluster Worker $(END_BUILD_PRINT)" - @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} down airflow-worker-${ENVIRONMENT} + @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} down airflow-worker start-airflow: build-externals @ echo -e "$(BUILD_PRINT)Starting Airflow services $(END_BUILD_PRINT)" From b2b7d028328b150572b92fd1343ea6b11947bf52 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Tue, 23 Aug 2022 00:03:18 +0300 Subject: [PATCH 59/65] Update Makefile --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 473643e98..1dbc5fd38 100644 --- a/Makefile +++ b/Makefile @@ -112,7 +112,7 @@ build-airflow-cluster: guard-ENVIRONMENT create-env-airflow-cluster build-extern start-airflow-cluster: build-externals @ echo -e "$(BUILD_PRINT)Starting Airflow services $(END_BUILD_PRINT)" - @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} up -d --force-recreate airflow-webserver airflow-scheduler airflow-triggerer airflow-flower + @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} up -d --force-recreate airflow-webserver airflow-scheduler airflow-triggerer flower start-airflow-cluster-worker: build-externals @ echo -e "$(BUILD_PRINT)Starting Airflow services $(END_BUILD_PRINT)" @@ -120,7 +120,7 @@ start-airflow-cluster-worker: build-externals stop-airflow-cluster: @ echo -e "$(BUILD_PRINT)Stopping Airflow Cluster $(END_BUILD_PRINT)" - @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} down airflow-webserver airflow-scheduler airflow-triggerer airflow-flower + @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} down airflow-webserver airflow-scheduler airflow-triggerer flower stop-airflow-cluster-worker: @ echo -e "$(BUILD_PRINT)Stopping Airflow Cluster Worker $(END_BUILD_PRINT)" From 3ea33999a198381ea92df4eb16806b67886299ec Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Tue, 23 Aug 2022 10:37:45 +0300 Subject: [PATCH 60/65] fix docker compose network visibility --- Makefile | 2 +- infra/airflow-cluster/docker-compose.yaml | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 1dbc5fd38..fd40ca7c9 100644 --- a/Makefile +++ b/Makefile @@ -116,7 +116,7 @@ start-airflow-cluster: build-externals start-airflow-cluster-worker: build-externals @ echo -e "$(BUILD_PRINT)Starting Airflow services $(END_BUILD_PRINT)" - @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} up -d --force-recreate airflow-worker + @ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} up -d airflow-worker stop-airflow-cluster: @ echo -e "$(BUILD_PRINT)Stopping Airflow Cluster $(END_BUILD_PRINT)" diff --git a/infra/airflow-cluster/docker-compose.yaml b/infra/airflow-cluster/docker-compose.yaml index 563e2a6cc..e9fd850ed 100644 --- a/infra/airflow-cluster/docker-compose.yaml +++ b/infra/airflow-cluster/docker-compose.yaml @@ -58,10 +58,10 @@ x-airflow-common: AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@ted-data.eu/airflow AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@ted-data.eu/airflow AIRFLOW__CELERY__BROKER_URL: redis://:@ted-data.eu:6379/0 - #AIRFLOW__WEBSERVER__SECRET_KEY: ${SECRET_KEY} + AIRFLOW__WEBSERVER__SECRET_KEY: "zqOVjqVrMstjDbKEPpYiSA==" IS_PRIME_ENV: 'true' AIRFLOW__CORE__EXECUTOR: CeleryExecutor - AIRFLOW__CORE__FERNET_KEY: '' + AIRFLOW__CORE__FERNET_KEY: 'M9NmXPlfIszmYCVjp3nJNQEocpmNQtKQdG-Kxdvfgm8=' AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true' AIRFLOW__CORE__ENABLE_XCOM_PICKLING: "true" AIRFLOW__CORE__LOAD_EXAMPLES: 'false' @@ -114,6 +114,7 @@ services: restart: unless-stopped networks: - airflow + - common-ext redis: image: redis:latest @@ -130,6 +131,7 @@ services: restart: unless-stopped networks: - airflow + - common-ext airflow-webserver: <<: *airflow-common @@ -315,6 +317,7 @@ services: - airflow networks: - airflow + - common-ext flower: <<: *airflow-common @@ -324,6 +327,7 @@ services: networks: - airflow - proxy-net + - common-ext depends_on: <<: *airflow-common-depends-on airflow-init: From fd7fc8709e979c74f2115f64bd16a8298f96aa54 Mon Sep 17 00:00:00 2001 From: Mihai Racovit Date: Tue, 13 Sep 2022 13:15:49 +0200 Subject: [PATCH 61/65] made notice check-up availability in celler --- ...check_availability_of _notice_in_celler.py | 25 +++++++++++++++++++ .../services/notice_eligibility.py | 2 +- tests/e2e/data_manager/conftest.py | 12 ++++++++- ..._check_availability_of_notice_in_celler.py | 7 ++++++ 4 files changed, 44 insertions(+), 2 deletions(-) create mode 100644 ted_sws/data_manager/services/check_availability_of _notice_in_celler.py create mode 100644 tests/e2e/data_manager/test_check_availability_of_notice_in_celler.py diff --git a/ted_sws/data_manager/services/check_availability_of _notice_in_celler.py b/ted_sws/data_manager/services/check_availability_of _notice_in_celler.py new file mode 100644 index 000000000..dfeca59f0 --- /dev/null +++ b/ted_sws/data_manager/services/check_availability_of _notice_in_celler.py @@ -0,0 +1,25 @@ + +from ted_sws.data_manager.adapters.repository_abc import NoticeRepositoryABC +from ted_sws.data_manager.adapters.sparql_endpoint import SPARQLTripleStoreEndpoint + +NOTICE_URI = 'http://publications.europa.eu/resource/celler/396207_2018' +NOTICE_ID = '982988' + + +def check_availability_of_notice_in_celler(notice_id, cellar_sparql_endpoint, + notice_uri): + notice_id = NOTICE_ID + notice_uri = NOTICE_URI + + query = f""" + ASK + {{ + VALUES ?{notice_id} {notice_uri} + ?{notice_uri} ?predicate [] . + }} + """ + execute_query = SPARQLTripleStoreEndpoint(endpoint_url=cellar_sparql_endpoint).with_query(sparql_query=query) + if execute_query is True: + print('the notice status is publicly available') + else: + print('the notice status is publicly unavailable') diff --git a/ted_sws/notice_metadata_processor/services/notice_eligibility.py b/ted_sws/notice_metadata_processor/services/notice_eligibility.py index e1e484907..27529b95a 100644 --- a/ted_sws/notice_metadata_processor/services/notice_eligibility.py +++ b/ted_sws/notice_metadata_processor/services/notice_eligibility.py @@ -1,7 +1,7 @@ import datetime from typing import Tuple -import semantic_version +# import semantic_version from ted_sws.core.model.metadata import NormalisedMetadata from ted_sws.core.model.notice import Notice diff --git a/tests/e2e/data_manager/conftest.py b/tests/e2e/data_manager/conftest.py index 5c541c165..3b59d45d8 100644 --- a/tests/e2e/data_manager/conftest.py +++ b/tests/e2e/data_manager/conftest.py @@ -237,4 +237,14 @@ def invalid_mapping_suite_id() -> str: @pytest.fixture def cellar_sparql_endpoint(): - return "https://publications.europa.eu/webapi/rdf/sparql" \ No newline at end of file + return "https://publications.europa.eu/webapi/rdf/sparql" + + +@pytest.fixture +def notice_uri(): + return 'https://op.europa.eu/en/publication-detail/-/publication/1ff5ff3c-e203-4c6a-a255-30a41c1c6b4b' + + +@pytest.fixture +def notice_id_from_celler(): + return "1ff5ff3c-e203-4c6a-a255-30a41c1c6b4b" \ No newline at end of file diff --git a/tests/e2e/data_manager/test_check_availability_of_notice_in_celler.py b/tests/e2e/data_manager/test_check_availability_of_notice_in_celler.py new file mode 100644 index 000000000..1893b4647 --- /dev/null +++ b/tests/e2e/data_manager/test_check_availability_of_notice_in_celler.py @@ -0,0 +1,7 @@ +from ted_sws.data_manager.services import \ + load_notice_into_triple_store, DEFAULT_NOTICE_REPOSITORY_NAME + +def test_validate_mets_package_publication(notice_id_from_celler, notice_uri, cellar_sparql_endpoint): + + check_availability_of_notice_in_celler(notice_id=notice_id_from_celler, notice_uri=notice_uri, cellar_sparql_endpoint=cellar_sparql_endpoint) + print(a) From 1b93f60fd56aa5e9106e7aefaaddd416c4e700c1 Mon Sep 17 00:00:00 2001 From: Mihai Racovit Date: Tue, 13 Sep 2022 19:22:10 +0200 Subject: [PATCH 62/65] made notice check-up availability in celler --- ...celler.py => check_availability_of_notice_in_celler.py} | 5 ++--- tests/e2e/data_manager/conftest.py | 4 ++-- .../test_ check_availability_of_notice_in_celler.py | 7 +++++++ .../test_check_availability_of_notice_in_celler.py | 7 ------- 4 files changed, 11 insertions(+), 12 deletions(-) rename ted_sws/data_manager/services/{check_availability_of _notice_in_celler.py => check_availability_of_notice_in_celler.py} (86%) create mode 100644 tests/e2e/data_manager/test_ check_availability_of_notice_in_celler.py delete mode 100644 tests/e2e/data_manager/test_check_availability_of_notice_in_celler.py diff --git a/ted_sws/data_manager/services/check_availability_of _notice_in_celler.py b/ted_sws/data_manager/services/check_availability_of_notice_in_celler.py similarity index 86% rename from ted_sws/data_manager/services/check_availability_of _notice_in_celler.py rename to ted_sws/data_manager/services/check_availability_of_notice_in_celler.py index dfeca59f0..6c849da4d 100644 --- a/ted_sws/data_manager/services/check_availability_of _notice_in_celler.py +++ b/ted_sws/data_manager/services/check_availability_of_notice_in_celler.py @@ -1,5 +1,3 @@ - -from ted_sws.data_manager.adapters.repository_abc import NoticeRepositoryABC from ted_sws.data_manager.adapters.sparql_endpoint import SPARQLTripleStoreEndpoint NOTICE_URI = 'http://publications.europa.eu/resource/celler/396207_2018' @@ -15,7 +13,7 @@ def check_availability_of_notice_in_celler(notice_id, cellar_sparql_endpoint, ASK {{ VALUES ?{notice_id} {notice_uri} - ?{notice_uri} ?predicate [] . + ?{notice_id} ?predicate [] . }} """ execute_query = SPARQLTripleStoreEndpoint(endpoint_url=cellar_sparql_endpoint).with_query(sparql_query=query) @@ -23,3 +21,4 @@ def check_availability_of_notice_in_celler(notice_id, cellar_sparql_endpoint, print('the notice status is publicly available') else: print('the notice status is publicly unavailable') + diff --git a/tests/e2e/data_manager/conftest.py b/tests/e2e/data_manager/conftest.py index 3b59d45d8..93fdb8048 100644 --- a/tests/e2e/data_manager/conftest.py +++ b/tests/e2e/data_manager/conftest.py @@ -6,6 +6,7 @@ SHACLTestSuite, TransformationTestData, MappingSuite from tests import TEST_DATA_PATH + @pytest.fixture def query_content(): return """# title: Official name @@ -213,7 +214,6 @@ def path_to_file_system_repository(): @pytest.fixture def notice_with_distilled_status(notice_2020, rdf_file_content): - notice_2020.update_status_to(new_status=NoticeStatus.NORMALISED_METADATA) notice_2020.update_status_to(new_status=NoticeStatus.ELIGIBLE_FOR_TRANSFORMATION) notice_2020.update_status_to(new_status=NoticeStatus.PREPROCESSED_FOR_TRANSFORMATION) @@ -247,4 +247,4 @@ def notice_uri(): @pytest.fixture def notice_id_from_celler(): - return "1ff5ff3c-e203-4c6a-a255-30a41c1c6b4b" \ No newline at end of file + return "1ff5ff3c-e203-4c6a-a255-30a41c1c6b4b" diff --git a/tests/e2e/data_manager/test_ check_availability_of_notice_in_celler.py b/tests/e2e/data_manager/test_ check_availability_of_notice_in_celler.py new file mode 100644 index 000000000..36693b86d --- /dev/null +++ b/tests/e2e/data_manager/test_ check_availability_of_notice_in_celler.py @@ -0,0 +1,7 @@ +from ted_sws.data_manager.services.check_availability_of_notice_in_celler import check_availability_of_notice_in_celler + + +def test_check_availability_of_notice_in_celler(notice_id_from_celler, notice_uri, cellar_sparql_endpoint): + check_availability_of_notice_in_celler(notice_id=notice_id_from_celler, notice_uri=notice_uri, cellar_sparql_endpoint=cellar_sparql_endpoint) + + diff --git a/tests/e2e/data_manager/test_check_availability_of_notice_in_celler.py b/tests/e2e/data_manager/test_check_availability_of_notice_in_celler.py deleted file mode 100644 index 1893b4647..000000000 --- a/tests/e2e/data_manager/test_check_availability_of_notice_in_celler.py +++ /dev/null @@ -1,7 +0,0 @@ -from ted_sws.data_manager.services import \ - load_notice_into_triple_store, DEFAULT_NOTICE_REPOSITORY_NAME - -def test_validate_mets_package_publication(notice_id_from_celler, notice_uri, cellar_sparql_endpoint): - - check_availability_of_notice_in_celler(notice_id=notice_id_from_celler, notice_uri=notice_uri, cellar_sparql_endpoint=cellar_sparql_endpoint) - print(a) From 8e02bb253d1463cf1a5ed5162ea30332fa040992 Mon Sep 17 00:00:00 2001 From: Kolea Plesco Date: Thu, 15 Sep 2022 22:55:20 +0300 Subject: [PATCH 63/65] Added METS package publication validation --- .../data_manager/adapters/sparql_endpoint.py | 15 ++++++------ .../check_availability_of_notice_in_celler.py | 24 ------------------- .../check_availability_of_notice_in_cellar.py | 13 ++++++++++ tests/e2e/data_manager/conftest.py | 10 -------- ... check_availability_of_notice_in_celler.py | 7 ------ tests/e2e/notice_validator/conftest.py | 10 ++++++++ ... check_availability_of_notice_in_cellar.py | 7 ++++++ 7 files changed, 38 insertions(+), 48 deletions(-) delete mode 100644 ted_sws/data_manager/services/check_availability_of_notice_in_celler.py create mode 100644 ted_sws/notice_validator/services/check_availability_of_notice_in_cellar.py delete mode 100644 tests/e2e/data_manager/test_ check_availability_of_notice_in_celler.py create mode 100644 tests/e2e/notice_validator/test_ check_availability_of_notice_in_cellar.py diff --git a/ted_sws/data_manager/adapters/sparql_endpoint.py b/ted_sws/data_manager/adapters/sparql_endpoint.py index 88b43c1d8..580901929 100644 --- a/ted_sws/data_manager/adapters/sparql_endpoint.py +++ b/ted_sws/data_manager/adapters/sparql_endpoint.py @@ -35,13 +35,14 @@ class SPARQLClientPool(object): connection_pool = {} @staticmethod - def create_or_reuse_connection(endpoint_url: str): + def create_or_reuse_connection(endpoint_url: str, use_env_credentials: bool = True): if endpoint_url not in SPARQLClientPool.connection_pool: sparql_wrapper = SPARQLWrapper(endpoint_url) - sparql_wrapper.setCredentials( - user=config.AGRAPH_SUPER_USER, - passwd=config.AGRAPH_SUPER_PASSWORD - ) + if use_env_credentials: + sparql_wrapper.setCredentials( + user=config.AGRAPH_SUPER_USER, + passwd=config.AGRAPH_SUPER_PASSWORD + ) SPARQLClientPool.connection_pool[endpoint_url] = sparql_wrapper return SPARQLClientPool.connection_pool[endpoint_url] @@ -117,8 +118,8 @@ def add_data_to_repository(self, file_content, repository_name, mime_type): class SPARQLTripleStoreEndpoint(TripleStoreEndpointABC): - def __init__(self, endpoint_url: str): - self.endpoint = SPARQLClientPool.create_or_reuse_connection(endpoint_url) + def __init__(self, endpoint_url: str, use_env_credentials: bool = True): + self.endpoint = SPARQLClientPool.create_or_reuse_connection(endpoint_url, use_env_credentials) def _set_sparql_query(self, sparql_query: str): """ diff --git a/ted_sws/data_manager/services/check_availability_of_notice_in_celler.py b/ted_sws/data_manager/services/check_availability_of_notice_in_celler.py deleted file mode 100644 index 6c849da4d..000000000 --- a/ted_sws/data_manager/services/check_availability_of_notice_in_celler.py +++ /dev/null @@ -1,24 +0,0 @@ -from ted_sws.data_manager.adapters.sparql_endpoint import SPARQLTripleStoreEndpoint - -NOTICE_URI = 'http://publications.europa.eu/resource/celler/396207_2018' -NOTICE_ID = '982988' - - -def check_availability_of_notice_in_celler(notice_id, cellar_sparql_endpoint, - notice_uri): - notice_id = NOTICE_ID - notice_uri = NOTICE_URI - - query = f""" - ASK - {{ - VALUES ?{notice_id} {notice_uri} - ?{notice_id} ?predicate [] . - }} - """ - execute_query = SPARQLTripleStoreEndpoint(endpoint_url=cellar_sparql_endpoint).with_query(sparql_query=query) - if execute_query is True: - print('the notice status is publicly available') - else: - print('the notice status is publicly unavailable') - diff --git a/ted_sws/notice_validator/services/check_availability_of_notice_in_cellar.py b/ted_sws/notice_validator/services/check_availability_of_notice_in_cellar.py new file mode 100644 index 000000000..ae55f0a34 --- /dev/null +++ b/ted_sws/notice_validator/services/check_availability_of_notice_in_cellar.py @@ -0,0 +1,13 @@ +from ted_sws.data_manager.adapters.sparql_endpoint import SPARQLTripleStoreEndpoint + +WEBAPI_SPARQL_URL = "https://publications.europa.eu/webapi/rdf/sparql" +CELLAR_NOTICE_AVAILABILITY_QUERY = "ASK {{ VALUES ?instance {{<{notice_uri}>}} ?instance ?predicate [] . }}" +WEBAPI_SPARQL_RUN_FORMAT = "application/sparql-results+json" + + +def check_availability_of_notice_in_cellar(notice_uri: str) -> bool: + query = CELLAR_NOTICE_AVAILABILITY_QUERY.format(notice_uri=notice_uri) + result = SPARQLTripleStoreEndpoint( + endpoint_url=WEBAPI_SPARQL_URL, + use_env_credentials=False).with_query(sparql_query=query).fetch_tree() + return result['boolean'] diff --git a/tests/e2e/data_manager/conftest.py b/tests/e2e/data_manager/conftest.py index 93fdb8048..0407591f1 100644 --- a/tests/e2e/data_manager/conftest.py +++ b/tests/e2e/data_manager/conftest.py @@ -238,13 +238,3 @@ def invalid_mapping_suite_id() -> str: @pytest.fixture def cellar_sparql_endpoint(): return "https://publications.europa.eu/webapi/rdf/sparql" - - -@pytest.fixture -def notice_uri(): - return 'https://op.europa.eu/en/publication-detail/-/publication/1ff5ff3c-e203-4c6a-a255-30a41c1c6b4b' - - -@pytest.fixture -def notice_id_from_celler(): - return "1ff5ff3c-e203-4c6a-a255-30a41c1c6b4b" diff --git a/tests/e2e/data_manager/test_ check_availability_of_notice_in_celler.py b/tests/e2e/data_manager/test_ check_availability_of_notice_in_celler.py deleted file mode 100644 index 36693b86d..000000000 --- a/tests/e2e/data_manager/test_ check_availability_of_notice_in_celler.py +++ /dev/null @@ -1,7 +0,0 @@ -from ted_sws.data_manager.services.check_availability_of_notice_in_celler import check_availability_of_notice_in_celler - - -def test_check_availability_of_notice_in_celler(notice_id_from_celler, notice_uri, cellar_sparql_endpoint): - check_availability_of_notice_in_celler(notice_id=notice_id_from_celler, notice_uri=notice_uri, cellar_sparql_endpoint=cellar_sparql_endpoint) - - diff --git a/tests/e2e/notice_validator/conftest.py b/tests/e2e/notice_validator/conftest.py index 6fd55f2ce..4c38a5237 100644 --- a/tests/e2e/notice_validator/conftest.py +++ b/tests/e2e/notice_validator/conftest.py @@ -41,3 +41,13 @@ def fake_notice_F03_content(fake_repository_path, fake_mapping_suite_F03_id): def fake_notice_F03(fake_notice_F03_content, fake_notice_id): xml_manifestation = XMLManifestation(object_data=fake_notice_F03_content) return Notice(ted_id=fake_notice_id, xml_manifestation=xml_manifestation) + + +@pytest.fixture +def valid_cellar_uri(): + return 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' + + +@pytest.fixture +def invalid_cellar_uri(): + return 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type-invalid' diff --git a/tests/e2e/notice_validator/test_ check_availability_of_notice_in_cellar.py b/tests/e2e/notice_validator/test_ check_availability_of_notice_in_cellar.py new file mode 100644 index 000000000..0041b7ab6 --- /dev/null +++ b/tests/e2e/notice_validator/test_ check_availability_of_notice_in_cellar.py @@ -0,0 +1,7 @@ +from ted_sws.notice_validator.services.check_availability_of_notice_in_cellar import \ + check_availability_of_notice_in_cellar + + +def test_check_availability_of_notice_in_cellar(valid_cellar_uri, invalid_cellar_uri): + assert check_availability_of_notice_in_cellar(notice_uri=valid_cellar_uri) + assert not check_availability_of_notice_in_cellar(notice_uri=invalid_cellar_uri) From 5ff158c987af323a5260775a680a0a1108e9a507 Mon Sep 17 00:00:00 2001 From: Kolea Plesco Date: Thu, 15 Sep 2022 23:04:04 +0300 Subject: [PATCH 64/65] Added METS package publication validation --- .../services/check_availability_of_notice_in_cellar.py | 4 ++-- tests/e2e/conftest.py | 5 +++++ tests/e2e/data_manager/conftest.py | 4 ---- tests/e2e/notice_metadata_processor/conftest.py | 5 ----- .../test_ check_availability_of_notice_in_cellar.py | 8 +++++--- 5 files changed, 12 insertions(+), 14 deletions(-) diff --git a/ted_sws/notice_validator/services/check_availability_of_notice_in_cellar.py b/ted_sws/notice_validator/services/check_availability_of_notice_in_cellar.py index ae55f0a34..75ea75648 100644 --- a/ted_sws/notice_validator/services/check_availability_of_notice_in_cellar.py +++ b/ted_sws/notice_validator/services/check_availability_of_notice_in_cellar.py @@ -5,9 +5,9 @@ WEBAPI_SPARQL_RUN_FORMAT = "application/sparql-results+json" -def check_availability_of_notice_in_cellar(notice_uri: str) -> bool: +def check_availability_of_notice_in_cellar(notice_uri: str, endpoint_url: str = WEBAPI_SPARQL_URL) -> bool: query = CELLAR_NOTICE_AVAILABILITY_QUERY.format(notice_uri=notice_uri) result = SPARQLTripleStoreEndpoint( - endpoint_url=WEBAPI_SPARQL_URL, + endpoint_url=endpoint_url, use_env_credentials=False).with_query(sparql_query=query).fetch_tree() return result['boolean'] diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py index dfe1d6aae..931ece3e8 100644 --- a/tests/e2e/conftest.py +++ b/tests/e2e/conftest.py @@ -39,3 +39,8 @@ def fake_mapping_suite_id() -> str: @pytest.fixture def fuseki_triple_store(): return FusekiAdapter(host=config.FUSEKI_ADMIN_HOST, user=config.FUSEKI_ADMIN_USER, password=config.FUSEKI_ADMIN_PASSWORD) + + +@pytest.fixture +def cellar_sparql_endpoint(): + return "https://publications.europa.eu/webapi/rdf/sparql" diff --git a/tests/e2e/data_manager/conftest.py b/tests/e2e/data_manager/conftest.py index 0407591f1..946410dc8 100644 --- a/tests/e2e/data_manager/conftest.py +++ b/tests/e2e/data_manager/conftest.py @@ -234,7 +234,3 @@ def fake_repository_path(): def invalid_mapping_suite_id() -> str: return "test_invalid_package" - -@pytest.fixture -def cellar_sparql_endpoint(): - return "https://publications.europa.eu/webapi/rdf/sparql" diff --git a/tests/e2e/notice_metadata_processor/conftest.py b/tests/e2e/notice_metadata_processor/conftest.py index 5c541c165..5c19fd48a 100644 --- a/tests/e2e/notice_metadata_processor/conftest.py +++ b/tests/e2e/notice_metadata_processor/conftest.py @@ -233,8 +233,3 @@ def fake_repository_path(): @pytest.fixture def invalid_mapping_suite_id() -> str: return "test_invalid_package" - - -@pytest.fixture -def cellar_sparql_endpoint(): - return "https://publications.europa.eu/webapi/rdf/sparql" \ No newline at end of file diff --git a/tests/e2e/notice_validator/test_ check_availability_of_notice_in_cellar.py b/tests/e2e/notice_validator/test_ check_availability_of_notice_in_cellar.py index 0041b7ab6..19a6822de 100644 --- a/tests/e2e/notice_validator/test_ check_availability_of_notice_in_cellar.py +++ b/tests/e2e/notice_validator/test_ check_availability_of_notice_in_cellar.py @@ -2,6 +2,8 @@ check_availability_of_notice_in_cellar -def test_check_availability_of_notice_in_cellar(valid_cellar_uri, invalid_cellar_uri): - assert check_availability_of_notice_in_cellar(notice_uri=valid_cellar_uri) - assert not check_availability_of_notice_in_cellar(notice_uri=invalid_cellar_uri) +def test_check_availability_of_notice_in_cellar(cellar_sparql_endpoint, valid_cellar_uri, invalid_cellar_uri): + assert check_availability_of_notice_in_cellar(notice_uri=valid_cellar_uri, + endpoint_url=cellar_sparql_endpoint) + assert not check_availability_of_notice_in_cellar(notice_uri=invalid_cellar_uri, + endpoint_url=cellar_sparql_endpoint) From 3a7c96eb0243ebcfbf72d3c4411b29b904946057 Mon Sep 17 00:00:00 2001 From: Kolea Plesco Date: Thu, 15 Sep 2022 23:08:55 +0300 Subject: [PATCH 65/65] Added METS package publication validation --- .../notice_metadata_processor/services/notice_eligibility.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ted_sws/notice_metadata_processor/services/notice_eligibility.py b/ted_sws/notice_metadata_processor/services/notice_eligibility.py index 27529b95a..e1e484907 100644 --- a/ted_sws/notice_metadata_processor/services/notice_eligibility.py +++ b/ted_sws/notice_metadata_processor/services/notice_eligibility.py @@ -1,7 +1,7 @@ import datetime from typing import Tuple -# import semantic_version +import semantic_version from ted_sws.core.model.metadata import NormalisedMetadata from ted_sws.core.model.notice import Notice