Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/ted 1219 packages #447

Merged
merged 5 commits into from
Feb 20, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions ted_sws/notice_packager/adapters/template_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from jinja2 import Environment, PackageLoader

from ted_sws.notice_packager.model.metadata import PackagerMetadata, validate_notice_action_type
from ted_sws.notice_packager.model.metadata import PackagerMetadata, validate_mets_type

TEMPLATES = Environment(loader=PackageLoader("ted_sws.notice_packager.resources", "templates"))

Expand All @@ -34,8 +34,8 @@ def tmd_rdf_generator(cls, data: PackagerMetadata = None) -> str:

@classmethod
def mets2action_mets_xml_generator(cls, data: PackagerMetadata = None) -> str:
action = data.notice.action.type
validate_notice_action_type(action)
action = data.mets.type
validate_mets_type(action)

template = 'mets2action_mets_xml.jinja2'
return cls.__generate_template(template, data)
75 changes: 55 additions & 20 deletions ted_sws/notice_packager/model/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,25 @@

from ted_sws.core.model.metadata import Metadata

WORK_AGENT = "PUBL"
METS_PROFILE = "http://publications.europa.eu/resource/mets/op-sip-profile_002"
METS_TYPE_CREATE = "create"
METS_TYPE_UPDATE = "update"
METS_TYPE_DELETE = "delete"
METS_ACCEPTED_TYPES = [METS_TYPE_CREATE, METS_TYPE_UPDATE, METS_TYPE_DELETE]
METS_DMD_MDTYPE = "OTHER"
METS_DMD_OTHERMDTYPE = "INSTANCE"
METS_DMD_HREF = "{work_identifier}_{revision}.mets.xml.dmd.rdf"
METS_DMD_ID = "dmd_{work_identifier}_{revision}_{dmd_idx}"
METS_TMD_ID = "tmd_{work_identifier}_{revision}_{tmd_idx}"
METS_TMD_HREF = "{work_identifier}_{revision}.tmd.rdf"
METS_TMD_MDTYPE = "OTHER"
METS_TMD_OTHERMDTYPE = "INSTANCE"
METS_FILE_ID = "file_{work_identifier}_{revision}_{file_idx}"
METS_NOTICE_FILE_HREF = "{work_identifier}_{revision}.notice.rdf"
METS_NOTICE_FILE_MIMETYPE = "application/rdf+xml"
METS_NOTICE_FILE_CHECKSUM_TYPE = "SHA-256"

WORK_AGENT = "EURUN"
PUBLICATION_FREQUENCY = "OTHER"
CONCEPT_TYPE_DATASET = "TEST_DATA"
DATASET_KEYWORD = [
Expand All @@ -35,38 +53,52 @@
LANGUAGE = LANGUAGES[0]
USES_LANGUAGE = "MUL"

ACTION_CREATE = "create"
ACTION_UPDATE = "update"
ACCEPTED_ACTIONS = [ACTION_CREATE, ACTION_UPDATE]

REVISION = "0"


def validate_notice_action_type(v):
if v not in ACCEPTED_ACTIONS:
raise ValueError('No such action: %s' % v)
def validate_mets_type(v):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is v ? victory ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it’s a short name for value when used in a very small context (likke: k => v: key => value)

if v not in METS_ACCEPTED_TYPES:
raise ValueError('No such METS type: %s' % v)


class NoticeActionMetadata(Metadata):
class NoticeMetadata(Metadata):
"""
Notice action metadata
General notice metadata
"""
type: str = ACTION_CREATE
date: str = datetime.datetime.now().isoformat()

@validator('type')
def validate_notice_action_type(cls, v):
validate_notice_action_type(v)
return v
id: Optional[str]
public_number_document: Optional[str]
public_number_edition: Optional[str]


class NoticeMetadata(Metadata):
class MetsMetadata(Metadata):
"""
General notice metadata
"""
id: Optional[str] = None
languages: List[str] = LANGUAGES
action: NoticeActionMetadata = NoticeActionMetadata()
revision: str = REVISION

type: str = METS_TYPE_CREATE
profile: str = METS_PROFILE
createdate: str = datetime.datetime.now().isoformat()
document_id: Optional[str]
dmd_id: Optional[str]
dmd_mdtype: str = METS_DMD_MDTYPE
dmd_othermdtype: str = METS_DMD_OTHERMDTYPE
dmd_href: Optional[str]
tmd_id: Optional[str]
tmd_href: Optional[str]
tmd_mdtype: str = METS_TMD_MDTYPE
tmd_othermdtype: str = METS_TMD_OTHERMDTYPE
file_id: Optional[str]
notice_file_href: Optional[str]
notice_file_mimetype: Optional[str] = METS_NOTICE_FILE_MIMETYPE
notice_file_checksum: Optional[str]
notice_file_checksum_type: Optional[str] = METS_NOTICE_FILE_CHECKSUM_TYPE

@validator('type')
def validate_notice_action_type(cls, v):
validate_mets_type(v)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is v ? vicar?

return v


class WorkMetadata(Metadata):
Expand Down Expand Up @@ -95,11 +127,13 @@ class WorkMetadata(Metadata):


class ExpressionMetadata(Metadata):
identifier: Optional[str]
title: Optional[Dict[str, str]] = None
uses_language: str = USES_LANGUAGE


class ManifestationMetadata(Metadata):
identifier: Optional[str]
type: str = MANIFESTATION_TYPE
date_publication: str = datetime.datetime.now().strftime('%Y-%m-%d')
distribution_has_status_distribution_status: str = DISTRIBUTION_STATUS
Expand All @@ -108,6 +142,7 @@ class ManifestationMetadata(Metadata):

class PackagerMetadata(Metadata):
notice: NoticeMetadata = NoticeMetadata()
mets: MetsMetadata = MetsMetadata()
work: WorkMetadata = WorkMetadata()
expression: ExpressionMetadata = ExpressionMetadata()
manifestation: ManifestationMetadata = ManifestationMetadata()
Original file line number Diff line number Diff line change
Expand Up @@ -2,38 +2,38 @@
<mets xmlns="http://www.loc.gov/METS/"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.loc.gov/METS/ cellar-mets.xsd"
TYPE="{{ notice.action.type }}"
PROFILE="http://publications.europa.eu/resource/mets/op-sip-profile_002">
<metsHdr {{ notice.action.type|upper }}DATE="{{ notice.action.date }}">
<metsDocumentID>{{ work.identifier }}_{{ notice.action.type }}</metsDocumentID>
xsi:schemaLocation="http://www.loc.gov/METS/cellar-mets.xsd"
TYPE="{{ mets.type }}"
PROFILE="{{ mets.profile }}">
<metsHdr CREATEDATE="{{ mets.createdate }}">
<metsDocumentID>{{ mets.document_id }}</metsDocumentID>
</metsHdr>
<dmdSec ID="dmdSec01">
<mdRef MDTYPE="OTHER" LOCTYPE="URL" MIMETYPE="application/rdf+xml" OTHERMDTYPE="INSTANCE" xlink:href="{{ notice.id }}-0.mets.xml.dmd.rdf"/>
<dmdSec ID="{{ mets.dmd_id }}">
<mdRef MDTYPE="{{ mets.dmd_mdtype }}" LOCTYPE="URL" MIMETYPE="application/rdf+xml" OTHERMDTYPE="{{ mets.dmd_othermdtype }}" xlink:href="{{ mets.dmd_href }}"/>
</dmdSec>
<amdSec>
<techMD ID="techMDID001">
<mdRef MDTYPE="OTHER" LOCTYPE="URL" MIMETYPE="application/rdf+xml" OTHERMDTYPE="INSTANCE" xlink:href="techMDID001.tmd.rdf"/>
<techMD ID="{{ mets.tmd_id }}">
<mdRef MDTYPE="{{ mets.tmd_mdtype }}" LOCTYPE="URL" MIMETYPE="application/rdf+xml" OTHERMDTYPE="{{ mets.tmd_othermdtype }}" xlink:href="{{ mets.tmd_href }}"/>
</techMD>
</amdSec>
<fileSec>
<fileGrp>
<file ID="file-001" MIMETYPE="application/rdf+xml" CHECKSUM="f9cdda52af5e532068547f0c91fcf186840bd088 " CHECKSUMTYPE="SHA-1">
<FLocat LOCTYPE="URL" xlink:href="{{ notice.id }}.rdf"/>
<file ID="{{ mets.file_id }}" MIMETYPE="{{ mets.notice_file_mimetype }}" CHECKSUM="{{ mets.notice_file_checksum }}" CHECKSUMTYPE="{{ mets.notice_file_checksum_type }}">
<FLocat LOCTYPE="URL" xlink:href="{{ mets.notice_file_href }}"/>
</file>
</fileGrp>
</fileSec>
<structMap ID="structMap01">
<div TYPE="work" CONTENTIDS="dataset:{{ notice.id }}" DMDID="dmdSec01" ID="w-01">
<div TYPE="expression" CONTENTIDS="expression:{{ notice.id }}" DMDID="dmdSec01" ID="e-01">
<div TYPE="manifestation" CONTENTIDS="distribution:{{ notice.id }}/{{ notice.id }}_rdf" DMDID="dmdSec01" ADMID="techMDID001" ID="m-001">
<fptr CONTENTIDS="distribution:{{ notice.id }}/{{ notice.id }}.rdf" FILEID="file-001"/>
<structMap ID="struct_map_{{ work.identifier }}_001">
<div TYPE="work" CONTENTIDS="ted:{{ work.identifier }}" DMDID="{{ mets.dmd_id }}" ID="w_{{ work.identifier }}_001">
<div TYPE="expression" CONTENTIDS="ted:{{ expression.identifier }}" DMDID="{{ mets.dmd_id }}" ID="e_{{ work.identifier }}_001">
<div TYPE="manifestation" CONTENTIDS="ted:{{ manifestation.identifier }}" DMDID="{{ mets.dmd_id }}" ADMID="{{ mets.tmd_id }}" ID="m_{{ work.identifier }}_001">
<fptr CONTENTIDS="distribution:{{ notice.id }}/{{ mets.notice_file_href }}" FILEID="{{ mets.file_id }}"/>
</div>
</div>
</div>
</structMap>
<behaviorSec>
<behavior BTYPE="sparql-load" STRUCTID="m-001">
<behavior BTYPE="sparql-load" STRUCTID="m_{{ work.identifier }}_001">
<mechanism LOCTYPE="URL" LABEL="Sparql-load" xlink:href="cellar-mets:sparql-load-behavior?model={{ work.uri | urlencode | replace("/", "%2F") }}"/>
</behavior>
</behaviorSec>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,17 @@

<cdm:work rdf:about="&resource;ted/{{ work.identifier }}">
<rdf:type rdf:resource="http://publications.europa.eu/ontology/cdm#work"/>
<rdf:type rdf:resource="http://publications.europa.eu/ontology/cdm#procurement_public"/>
{# <rdf:type rdf:resource="http://publications.europa.eu/ontology/cdm#procurement_public"/> #}
<cdm:work_id_document rdf:datatype="http://www.w3.org/2001/XMLSchema#string">ted:{{ work.identifier }}</cdm:work_id_document>
<cdm:work_has_resource-type rdf:resource="http://publications.europa.eu/resource/authority/resource-type/PROCUREMENT_NOTICE"/>
<cdm:do_not_index rdf:datatype="http://www.w3.org/2001/XMLSchema#boolean">{{ work.do_not_index }}</cdm:do_not_index>
<cdm:work_date_document rdf:datatype="http://www.w3.org/2001/XMLSchema#date">{{ work.date_document }}</cdm:work_date_document>
<cdm:work_created_by_agent rdf:resource="&cellar-authority;corporate-body/{{ work.created_by_agent }}"/>
{% for lang in notice.languages %}
<cdm:procurement_public_number_edition rdf:datatype="http://www.w3.org/2001/XMLSchema#positiveInteger">{{ notice.public_number_edition }}</cdm:procurement_public_number_edition>
{% for lang in mets.languages %}
<cdm:work_title xml:lang="{{ lang }}">{{ work.title[lang] }}</cdm:work_title>
{% endfor %}
<cdm:procurement_public_number_document_in_official-journal rdf:datatype="http://www.w3.org/2001/XMLSchema#string">{{ notice.public_number_document }}</cdm:procurement_public_number_document_in_official-journal>
<cdm:datetime_transmission rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">{{ work.datetime_transmission }}</cdm:datetime_transmission>
{# <cdm:procurement_public_issued_by_country>{{ work.procurement_public_issued_by_country }}</cdm:procurement_public_issued_by_country>
{% for uri in work.procurement_public_url_etendering %}
Expand All @@ -36,16 +39,16 @@
<cdm:work_dataset_has_frequency_publication_frequency rdf:resource="http://publications.europa.eu/resource/authority/frequency/{{ work.dataset_has_frequency_publication_frequency }}"/> #}
</cdm:work>

<cdm:expression rdf:about="&resource;ted/{{ work.identifier }}.MUL">
<cdm:expression rdf:about="&resource;ted/{{ expression.identifier }}">
<cdm:expression_belongs_to_work rdf:resource="&resource;ted/{{ work.identifier }}"/>
{% for lang in notice.languages %}
{% for lang in mets.languages %}
<cdm:expression_title xml:lang="{{ lang }}">{{ expression.title[lang] }}</cdm:expression_title>
{% endfor %}
<cdm:expression_uses_language rdf:resource="&cellar-authority;language/{{ expression.uses_language }}"/>
</cdm:expression>

<cdm:manifestation_distribution rdf:about="&resource;ted/{{ work.identifier }}.MUL.rdf">
<cdm:manifestation_manifests_expression rdf:resource="&resource;ted/{{ work.identifier }}.MUL"/>
<cdm:manifestation_distribution rdf:about="&resource;ted/{{ manifestation.identifier }}">
<cdm:manifestation_manifests_expression rdf:resource="&resource;ted/{{ expression.identifier }}"/>
<cdm:manifestation_type rdf:datatype="http://www.w3.org/2001/XMLSchema#string">{{ manifestation.type }}</cdm:manifestation_type>
<cdm:manifestation_date_publication rdf:datatype="http://www.w3.org/2001/XMLSchema#date">{{ manifestation.date_publication }}</cdm:manifestation_date_publication>
{# <cdm:manifestation_distribution_has_status_distribution_status rdf:resource="http://publications.europa.eu/resource/authority/dataset-status/{{ manifestation.distribution_has_status_distribution_status }}"/>
Expand Down
6 changes: 3 additions & 3 deletions ted_sws/notice_packager/resources/templates/tmd_rdf.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
%cellarEntities;
]>
<rdf:RDF xmlns:tdm="http://publications.europa.eu/ontology/tdm#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<tdm:stream rdf:about="&resource;distribution/{{ notice.id }}/{{ notice.id }}.rdf">
<tdm:stream_name rdf:datatype="http://www.w3.org/2001/XMLSchema#string">{{ notice.id }}.rdf</tdm:stream_name>
<tdm:stream rdf:about="&resource;distribution/{{ notice.id }}/{{ mets.notice_file_href }}">
<tdm:stream_name rdf:datatype="http://www.w3.org/2001/XMLSchema#string">{{ mets.notice_file_href }}</tdm:stream_name>
<tdm:stream_format>
<tdm:format>
<tdm:format_designated_by_format_designation>
Expand All @@ -16,7 +16,7 @@
</tdm:format_designated_by_format_designation>
</tdm:format>
</tdm:stream_format>
<tdm:stream_label rdf:datatype="http://www.w3.org/2001/XMLSchema#string">{{ work.title[notice.languages[0]] }}</tdm:stream_label>
<tdm:stream_label rdf:datatype="http://www.w3.org/2001/XMLSchema#string">{{ work.title[mets.languages[0]] }}</tdm:stream_label>
<tdm:stream_order rdf:datatype="http://www.w3.org/2001/XMLSchema#positiveInteger">1</tdm:stream_order>
</tdm:stream>
</rdf:RDF>
60 changes: 44 additions & 16 deletions ted_sws/notice_packager/services/metadata_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
import datetime

from ted_sws.notice_metadata_processor.model.metadata import ExtractedMetadata
from ted_sws.notice_packager.model.metadata import PackagerMetadata, ACTION_CREATE, LANGUAGE, REVISION, BASE_WORK, \
BASE_TITLE
from ted_sws.notice_packager.model.metadata import PackagerMetadata, METS_TYPE_CREATE, LANGUAGE, REVISION, BASE_WORK, \
BASE_TITLE, METS_DMD_HREF, METS_DMD_ID, METS_TMD_ID, METS_TMD_HREF, METS_FILE_ID, METS_NOTICE_FILE_HREF

# This is used in pipeline
NORMALIZED_SEPARATOR = '_'
Expand All @@ -31,9 +31,12 @@ class MetadataTransformer:
def __init__(self, notice_metadata: ExtractedMetadata):
self.notice_metadata = notice_metadata

def template_metadata(self, action: str = ACTION_CREATE) -> PackagerMetadata:
def template_metadata(self, action: str = METS_TYPE_CREATE) -> PackagerMetadata:
metadata = self.from_notice_metadata(self.notice_metadata)
metadata.notice.action.type = action

# here the custom and composed metadata properties are set
metadata.mets.type = action
metadata.mets.document_id = f"{metadata.work.identifier}_{action}"
return metadata

@classmethod
Expand All @@ -46,16 +49,6 @@ def normalize_value(cls, value: str) -> str:
"""
return value.replace(DENORMALIZED_SEPARATOR, NORMALIZED_SEPARATOR)

@classmethod
def denormalize_value(cls, value: str) -> str:
"""
The pipeline's separator is replaced with initial (TED API)'s one.
This is used when notice goes out to API
:param value:
:return:
"""
return value.replace(NORMALIZED_SEPARATOR, DENORMALIZED_SEPARATOR)

@classmethod
def from_notice_metadata(cls, notice_metadata: ExtractedMetadata) -> PackagerMetadata:
_date = datetime.datetime.now()
Expand All @@ -65,6 +58,9 @@ def from_notice_metadata(cls, notice_metadata: ExtractedMetadata) -> PackagerMet

# NOTICE
metadata.notice.id = cls.normalize_value(notice_metadata.notice_publication_number)
metadata.notice.public_number_document = publication_notice_number(metadata.notice.id)
metadata.notice.public_number_edition = publication_notice_year(
notice_metadata) + notice_metadata.ojs_issue_number.zfill(3)

# WORK
publication_date = datetime.datetime.strptime(notice_metadata.publication_date, '%Y%m%d').strftime('%Y-%m-%d')
Expand All @@ -81,10 +77,42 @@ def from_notice_metadata(cls, notice_metadata: ExtractedMetadata) -> PackagerMet
metadata.work.procurement_public_url_etendering = notice_metadata.uri_list

# EXPRESSION
metadata.expression.title = {LANGUAGE: BASE_TITLE + " " + metadata.notice.id}
metadata.expression.identifier = f"{metadata.work.identifier}.MUL"
metadata.expression.title = {LANGUAGE: BASE_TITLE + " " + metadata.work.identifier}

# MANIFESTATION
metadata.manifestation.identifier = f"{metadata.expression.identifier}.rdf"
metadata.manifestation.date_publication = publication_date

# METS
metadata.mets.dmd_href = METS_DMD_HREF.format(
work_identifier=metadata.work.identifier,
revision=metadata.mets.revision
)
metadata.mets.dmd_id = METS_DMD_ID.format(
work_identifier=metadata.work.identifier,
revision=metadata.mets.revision,
dmd_idx="001"
)
metadata.mets.tmd_id = METS_TMD_ID.format(
work_identifier=metadata.work.identifier,
revision=metadata.mets.revision,
tmd_idx="001"
)
metadata.mets.tmd_href = METS_TMD_HREF.format(
work_identifier=metadata.work.identifier,
revision=metadata.mets.revision
)
metadata.mets.file_id = METS_FILE_ID.format(
work_identifier=metadata.work.identifier,
revision=metadata.mets.revision,
file_idx="001"
)
metadata.mets.notice_file_href = METS_NOTICE_FILE_HREF.format(
work_identifier=metadata.work.identifier,
revision=metadata.mets.revision
)

return metadata


Expand All @@ -103,4 +131,4 @@ def publication_notice_uri(notice_id, notice_metadata):
def publication_work_identifier(notice_id, notice_metadata):
year = publication_notice_year(notice_metadata)
number = publication_notice_number(notice_id)
return f"{year}_{notice_metadata.ojs_type}_{notice_metadata.ojs_issue_number}_{number}"
return f"{year}_{notice_metadata.ojs_type}_{notice_metadata.ojs_issue_number.zfill(3)}_{number}"
Loading