Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update entity_deduplication.py #340

Merged
merged 1 commit into from
Nov 3, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions ted_sws/master_data_registry/services/entity_deduplication.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,12 +138,13 @@ def register_new_cets_in_mdr(new_canonical_entities: Dict[rdflib.URIRef, rdflib.

def inject_similarity_links_in_notices(notices: List[Notice],
cet_rdf_fragments_dict: Dict[rdflib.URIRef, rdflib.Graph],
alignment_graph: rdflib.Graph):
alignment_graph: rdflib.Graph, inject_reflexive_links: bool = False):
"""
This function inject similarity links in Notice distilled rdf manifestation.
:param notices:
:param cet_rdf_fragments_dict:
:param alignment_graph:
:param inject_reflexive_links:
:return:
"""
notices_dict = {notice.ted_id: notice for notice in notices}
Expand All @@ -152,8 +153,11 @@ def inject_similarity_links_in_notices(notices: List[Notice],
notice_id = str(next(cet_rdf_fragment.triples(triple=(root_uri, RDF_FRAGMENT_FROM_NOTICE_PROPERTY, None)))[2])
notice = notices_dict[notice_id]
inject_links = rdflib.Graph()
for triple in alignment_graph.triples(triple=(root_uri, OWL.sameAs, None)):
inject_links.add(triple)
if inject_reflexive_links:
for triple in alignment_graph.triples(triple=(root_uri, OWL.sameAs, None)):
inject_links.add(triple)
else:
inject_links.add((root_uri, OWL.sameAs, root_uri))
notice.distilled_rdf_manifestation.object_data = '\n'.join([notice.distilled_rdf_manifestation.object_data,
str(inject_links.serialize(format="nt"))])

Expand Down Expand Up @@ -211,3 +215,5 @@ def deduplicate_entities_by_cet_uri(notices: List[Notice], cet_uri: str,

inject_similarity_links_in_notices(notices=notices, cet_rdf_fragments_dict=non_canonical_cet_fragments_dict,
alignment_graph=cet_alignment_links)
inject_similarity_links_in_notices(notices=notices, cet_rdf_fragments_dict=new_canonical_cet_fragments_dict,
alignment_graph=cet_alignment_links, inject_reflexive_links=True)