Skip to content

Commit

Permalink
re-introduce provenance query by content id; related to #199
Browse files Browse the repository at this point in the history
  • Loading branch information
Jorrit Poelen committed Dec 27, 2023
1 parent 1e530af commit 0b95792
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Pattern;
Expand All @@ -25,8 +26,10 @@ public class ProvUtil {
public static final Pattern URN_UUID_REQUEST_PATTERN
= Pattern.compile("^urn:uuid:" + UUIDUtil.UUID_PATTERN_PART + "$");
public static final String QUERY_TYPE_UUID = "uuid";
public static final String QUERY_TYPE_CONTENT_ID = "contentId";
public static final String QUERY_TYPE_URL = "url";
public static final String QUERY_TYPE_DOI = "doi";
public static final List<String> QUERIES_SUPPORTED = Arrays.asList(QUERY_TYPE_DOI, QUERY_TYPE_UUID, QUERY_TYPE_URL, QUERY_TYPE_CONTENT_ID);

public static Map<String, String> findMostRecentContentId(IRI iri, String paramName, String sparqlEndpoint) throws IOException, URISyntaxException {
String response = findProvenance(iri, paramName, sparqlEndpoint);
Expand Down Expand Up @@ -75,7 +78,8 @@ static String queryTypeForRequestedId(String requestURI) {
.map(req -> URN_UUID_REQUEST_PATTERN.matcher(req).matches() ? QUERY_TYPE_UUID : req)
.map(req -> Pattern.compile("^(10[.])([^/]+)/(.*)$").matcher(req).matches() ? QUERY_TYPE_DOI : req)
.map(req -> Pattern.compile("^http[s]{0,1}://[^ ]+").matcher(req).matches() ? QUERY_TYPE_URL : req)
.filter(type -> Arrays.asList(QUERY_TYPE_DOI, QUERY_TYPE_UUID, QUERY_TYPE_URL).contains(type))
.map(req -> Pattern.compile("^hash://[a-zA-Z0-9]+/[a-f0-9]+$").matcher(req).matches() ? QUERY_TYPE_CONTENT_ID : req)
.filter(QUERIES_SUPPORTED::contains)
.findFirst()
.orElse("unknown");
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#+ endpoint: https://lod.globalbioticinteractions.org/query
#+ defaults:
#+ - uuid: urn:uuid:4fa7b334-ce0d-4e88-aaae-2e0c138d049e

SELECT DISTINCT
?uuid
?doi
?contentId
?archiveUrl
?seenAt
?provenanceId
?activity
WHERE {
{
SELECT DISTINCT
?contentId
?archiveUrl
?seenAt
?provenanceId
?activity
WHERE {
BIND(?_contentId_iri as ?contentId)
BIND(<hash://sha256/5b7fa37bf8b64e7c935c4ff3389e36f8dd162f0705410dd719fd089e1ea253cd> as ?provenanceId)
{
SELECT ?uuid ?seenAt ?archiveUrl ?doi ?activity
WHERE {
graph ?activity {
?activity <http://www.w3.org/ns/prov#used> ?archiveUrl .
?activity <http://www.w3.org/ns/prov#generatedAtTime> ?seenAt .
?_contentId_iri <http://www.w3.org/ns/prov#qualifiedGeneration> ?activity .
}
}
}
} ORDER BY DESC(?seenAt) LIMIT 2
}
{
graph ?g1 {
?uuid <http://www.w3.org/ns/prov#hadMember> ?archiveUrl .
?uuid <http://www.w3.org/1999/02/22-rdf-syntax-ns#seeAlso> ?doi .
?archiveUrl <http://purl.org/dc/elements/1.1/format> "application/dwca" .
}
}


}
12 changes: 0 additions & 12 deletions preston-serve/src/main/resources/bio/guoda/preston/server/doi.rq
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,6 @@ WHERE {
}
}
}
UNION
{
SELECT ?contentId ?seenAt ?archiveUrl ?uuid ?activity
WHERE {
?uuid <http://www.w3.org/ns/prov#hadMember> ?archiveUrl .
?uuid <http://www.w3.org/1999/02/22-rdf-syntax-ns#seeAlso> ?_doi_iri .
?archiveUrl <http://purl.org/dc/elements/1.1/format> "application/dwca" .
?activity <http://www.w3.org/ns/prov#used> ?archiveUrl .
?activity <http://www.w3.org/ns/prov#generatedAtTime> ?seenAt .
?contentId <http://www.w3.org/ns/prov#qualifiedGeneration> ?activity .
}
}

# exclude blank nodes that represent unavailable or non-content
FILTER(!CONTAINS(str(?contentId), ".well-known/genid" ) )
Expand Down
12 changes: 0 additions & 12 deletions preston-serve/src/main/resources/bio/guoda/preston/server/url.rq
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,6 @@ WHERE {
}

}
}
UNION
{
SELECT ?contentId ?seenAt ?uuid ?activity
WHERE {
?uuid <http://www.w3.org/ns/prov#hadMember> ?_url_iri .
OPTIONAL { ?uuid <http://www.w3.org/1999/02/22-rdf-syntax-ns#seeAlso> ?doi . }
?_url_iri <http://purl.org/dc/elements/1.1/format> "application/dwca" .
?activity <http://www.w3.org/ns/prov#used> ?_url_iri .
?activity <http://www.w3.org/ns/prov#generatedAtTime> ?seenAt .
?contentId <http://www.w3.org/ns/prov#qualifiedGeneration> ?activity .
}
}
# exclude blank nodes that represent unavailable or non-content
FILTER(!CONTAINS(str(?contentId), ".well-known/genid" ) )
Expand Down
12 changes: 0 additions & 12 deletions preston-serve/src/main/resources/bio/guoda/preston/server/uuid.rq
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,6 @@ WHERE {
}
}
}
UNION
{
SELECT ?contentId ?seenAt ?archiveUrl ?doi ?activity
WHERE {
?_uuid_iri <http://www.w3.org/ns/prov#hadMember> ?archiveUrl .
OPTIONAL { ?_uuid_iri <http://www.w3.org/1999/02/22-rdf-syntax-ns#seeAlso> ?doi . }
?archiveUrl <http://purl.org/dc/elements/1.1/format> "application/dwca" .
?activity <http://www.w3.org/ns/prov#used> ?archiveUrl .
?activity <http://www.w3.org/ns/prov#generatedAtTime> ?seenAt .
?contentId <http://www.w3.org/ns/prov#qualifiedGeneration> ?activity .
}
}
# exclude blank nodes that represent unavailable or non-content
FILTER(!CONTAINS(str(?contentId), ".well-known/genid" ) )

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@
import java.util.Map;

import static bio.guoda.preston.server.RedirectingServlet.ACTIVITY;
import static bio.guoda.preston.server.RedirectingServlet.ARCHIVE_URL;
import static bio.guoda.preston.server.RedirectingServlet.CONTENT_ID;
import static bio.guoda.preston.server.RedirectingServlet.DOI;
import static bio.guoda.preston.server.RedirectingServlet.PROVENANCE_ID;
import static bio.guoda.preston.server.RedirectingServlet.SEEN_AT;
import static bio.guoda.preston.server.RedirectingServlet.UUID;
import static org.hamcrest.MatcherAssert.assertThat;

public class RedirectingServletIT {
Expand Down Expand Up @@ -38,6 +43,22 @@ public void dealiasUUIDGBIF() throws IOException, URISyntaxException {

}

@Test
public void dealiasContentIdGBIF() throws IOException, URISyntaxException {
Map<String, String> contentId = ProvUtil.findMostRecentContentId(
RefNodeFactory.toIRI("hash://sha256/1e2b7436fce1848f41698e5a9c193f311abaf0ee051bec1a2e48b5106d29524d"),
ProvUtil.QUERY_TYPE_CONTENT_ID,
sparqlEndpoint);
assertThat(contentId.get(CONTENT_ID), Is.is("hash://sha256/1e2b7436fce1848f41698e5a9c193f311abaf0ee051bec1a2e48b5106d29524d"));
assertThat(contentId.get(ACTIVITY), Is.is("urn:uuid:77f3faf7-acd2-4f14-9c0e-4e04ef5b63c7"));
assertThat(contentId.get(PROVENANCE_ID), Is.is("hash://sha256/5b7fa37bf8b64e7c935c4ff3389e36f8dd162f0705410dd719fd089e1ea253cd"));
assertThat(contentId.get(SEEN_AT), Is.is("2023-12-02T16:05:25.261Z"));
assertThat(contentId.get(ARCHIVE_URL), Is.is("https://hosted-datasets.gbif.org/eBird/2022-eBird-dwca-1.0.zip"));
assertThat(contentId.get(UUID), Is.is("urn:uuid:4fa7b334-ce0d-4e88-aaae-2e0c138d049e"));
assertThat(contentId.get(DOI), Is.is("https://doi.org/10.15468/aomfnb"));

}

@Test
public void dealiasUUIDiDigBio() throws IOException, URISyntaxException {
Map<String, String> contentId = ProvUtil.findMostRecentContentId(
Expand Down

0 comments on commit 0b95792

Please sign in to comment.