diff --git a/ted_sws/metadata_normaliser/model/metadata.py b/ted_sws/metadata_normaliser/model/metadata.py index 884980aaf..c31e25942 100644 --- a/ted_sws/metadata_normaliser/model/metadata.py +++ b/ted_sws/metadata_normaliser/model/metadata.py @@ -1,4 +1,3 @@ -import datetime from typing import List from ted_sws.core.model.metadata import Metadata, CompositeTitle, LanguageTaggedString, EncodedValue diff --git a/ted_sws/metadata_normaliser/services/metadata_normalizer.py b/ted_sws/metadata_normaliser/services/metadata_normalizer.py index b2d294a2f..439ddff6f 100644 --- a/ted_sws/metadata_normaliser/services/metadata_normalizer.py +++ b/ted_sws/metadata_normaliser/services/metadata_normalizer.py @@ -1,17 +1,16 @@ import abc -import datetime +from datetime import datetime +from typing import Dict, Tuple, List import pandas as pd -from ted_sws.core.service.metadata_constraints import filter_df_by_variables -from ted_sws.data_manager.adapters.notice_repository import NoticeRepositoryABC from ted_sws.core.model.metadata import NormalisedMetadata, LanguageTaggedString from ted_sws.core.model.notice import Notice +from ted_sws.core.service.metadata_constraints import filter_df_by_variables +from ted_sws.data_manager.adapters.notice_repository import NoticeRepositoryABC from ted_sws.metadata_normaliser.model.metadata import ExtractedMetadata -from ted_sws.metadata_normaliser.services.xml_manifestation_metadata_extractor import XMLManifestationMetadataExtractor from ted_sws.metadata_normaliser.resources.mapping_files_registry import MappingFilesRegistry - -from typing import Dict, Tuple +from ted_sws.metadata_normaliser.services.xml_manifestation_metadata_extractor import XMLManifestationMetadataExtractor JOIN_SEP = " :: " MERGING_COLUMN = "eforms_subtype" @@ -140,6 +139,15 @@ def get_form_type_and_notice_type(cls, ef_map: pd.DataFrame, sf_map: pd.DataFram notice_type = filtered_df["eform_notice_type"].values[0] return form_type, notice_type + def get_map_list_value_by_code(self, mapping: Dict, listing: List): + return [self.get_map_value(mapping=mapping, value=element.code) if element else None for element in listing] + + @classmethod + def iso_date_format(cls, _date: str, with_none=False): + if _date or not with_none: + return datetime.strptime(_date, '%Y%m%d').isoformat() + return None + def to_metadata(self) -> NormalisedMetadata: """ Generate the normalised metadata @@ -155,13 +163,15 @@ def to_metadata(self) -> NormalisedMetadata: nuts_map = mapping_registry.nuts standard_forms_map = mapping_registry.sf_notice_df eforms_map = mapping_registry.ef_notice_df - form_type, notice_type = self.get_form_type_and_notice_type(sf_map=standard_forms_map, ef_map=eforms_map, - extracted_notice_type=self.extracted_metadata.extracted_notice_type, - form_number=self.normalise_form_number( - self.extracted_metadata.extracted_form_number), - legal_basis=self.normalise_legal_basis_value( - self.extracted_metadata.legal_basis_directive), - document_type_code=self.extracted_metadata.extracted_document_type.code) + form_type, notice_type = self.get_form_type_and_notice_type( + sf_map=standard_forms_map, ef_map=eforms_map, + extracted_notice_type=self.extracted_metadata.extracted_notice_type, + form_number=self.normalise_form_number( + self.extracted_metadata.extracted_form_number), + legal_basis=self.normalise_legal_basis_value( + self.extracted_metadata.legal_basis_directive), + document_type_code=self.extracted_metadata.extracted_document_type.code + ) extracted_metadata = self.extracted_metadata @@ -177,9 +187,7 @@ def to_metadata(self) -> NormalisedMetadata: language=title.title.language) for title in extracted_metadata.title ], "notice_publication_number": extracted_metadata.notice_publication_number, - "publication_date": datetime.datetime.strptime( - extracted_metadata.publication_date, '%Y%m%d' - ).isoformat(), + "publication_date": self.iso_date_format(extracted_metadata.publication_date), "ojs_issue_number": extracted_metadata.ojs_issue_number, "ojs_type": extracted_metadata.ojs_type if extracted_metadata.ojs_type else "S", "city_of_buyer": [city_of_buyer for city_of_buyer in extracted_metadata.city_of_buyer], @@ -187,20 +195,18 @@ def to_metadata(self) -> NormalisedMetadata: "original_language": self.get_map_value(mapping=languages_map, value=extracted_metadata.original_language), "country_of_buyer": self.get_map_value(mapping=countries_map, value=extracted_metadata.country_of_buyer), "eu_institution": False if extracted_metadata.eu_institution == '-' else True, - "document_sent_date": datetime.datetime.strptime( - extracted_metadata.document_sent_date, '%Y%m%d' - ).isoformat() if extracted_metadata.document_sent_date is not None else None, - "deadline_for_submission": datetime.datetime.strptime( - extracted_metadata.deadline_for_submission, '%Y%m%d' - ).isoformat() if extracted_metadata.deadline_for_submission is not None else None, + "document_sent_date": self.iso_date_format(extracted_metadata.document_sent_date, True), + "deadline_for_submission": self.iso_date_format(extracted_metadata.deadline_for_submission, True), "notice_type": self.get_map_value(mapping=notice_type_map, value=notice_type), "form_type": self.get_map_value(mapping=form_type_map, value=form_type), - "place_of_performance": [self.get_map_value(mapping=nuts_map, value=place_of_performance.code) if place_of_performance else None for - place_of_performance - in extracted_metadata.place_of_performance ], + "place_of_performance": self.get_map_list_value_by_code( + mapping=nuts_map, + listing=extracted_metadata.place_of_performance + ), "legal_basis_directive": self.get_map_value(mapping=legal_basis_map, value=self.normalise_legal_basis_value( - extracted_metadata.legal_basis_directive)), + extracted_metadata.legal_basis_directive + )), "form_number": self.normalise_form_number(value=extracted_metadata.extracted_form_number) } diff --git a/tests/conftest.py b/tests/conftest.py index 855720ef5..f6ec3c00a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,7 +9,6 @@ from ted_sws.core.model.notice import Notice from ted_sws.notice_fetcher.adapters.ted_api import TedAPIAdapter from ted_sws.notice_fetcher.services.notice_fetcher import NoticeFetcher -from ted_sws.notice_packager.model.metadata import NoticeMetadata from tests import TEST_DATA_PATH from tests.fakes.fake_repository import FakeNoticeRepository from tests.fakes.fake_ted_api import FakeRequestAPI @@ -84,6 +83,7 @@ def notice_2018(): return Notice(ted_id=ted_id, xml_manifestation=xml_manifestation, original_metadata=original_metadata) + @pytest.fixture def notice_2020(): notice_data = read_notice("408313-2020.json") @@ -96,6 +96,7 @@ def notice_2020(): return Notice(ted_id=ted_id, xml_manifestation=xml_manifestation, original_metadata=original_metadata) + @pytest.fixture def normalised_metadata_dict(): data = { diff --git a/tests/unit/metadata_normaliser/test_metadata_normaliser.py b/tests/unit/metadata_normaliser/test_metadata_normaliser.py index 164fd8507..3aae94b49 100644 --- a/tests/unit/metadata_normaliser/test_metadata_normaliser.py +++ b/tests/unit/metadata_normaliser/test_metadata_normaliser.py @@ -1,10 +1,10 @@ import pytest from ted_sws.core.model.notice import NoticeStatus +from ted_sws.core.service.metadata_constraints import filter_df_by_variables from ted_sws.metadata_normaliser.resources.mapping_files_registry import MappingFilesRegistry from ted_sws.metadata_normaliser.services.metadata_normalizer import normalise_notice, normalise_notice_by_id, \ MetadataNormaliser, ExtractedMetadataNormaliser -from ted_sws.core.service.metadata_constraints import filter_df_by_variables from ted_sws.metadata_normaliser.services.xml_manifestation_metadata_extractor import XMLManifestationMetadataExtractor