Skip to content

Commit

Permalink
enable redirection of iDigBio recordset uuids also; related to #199
Browse files Browse the repository at this point in the history
  • Loading branch information
Jorrit Poelen committed Dec 22, 2023
1 parent a10a3ba commit 1c2d3b3
Show file tree
Hide file tree
Showing 7 changed files with 121 additions and 159 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package bio.guoda.preston.server;

import bio.guoda.preston.HashType;
import bio.guoda.preston.MimeTypes;
import bio.guoda.preston.RefNodeFactory;
import bio.guoda.preston.ResourcesHTTP;
Expand Down Expand Up @@ -43,8 +42,8 @@ public class RedirectingServlet extends HttpServlet {
public static final String DOI = "doi";
public static final String CONTENT_ID = "contentId";
public static final String ACTIVITY = "activity";

public static final String QUERY_TYPE_UUID = "uuid";
public static final String QUERY_TYPE_HASH = "hash";
public static final String QUERY_TYPE_URL = "url";
public static final String QUERY_TYPE_DOI = "doi";

Expand Down Expand Up @@ -130,9 +129,6 @@ private String parseRequestedIdOrThrow(String requestURI) throws ServletExceptio
static String queryTypeForRequestedId(String requestURI) {
return Stream.of(requestURI)
.map(req -> URN_UUID_REQUEST_PATTERN.matcher(req).matches() ? QUERY_TYPE_UUID : req)
.map(req -> HashType.sha1.getIRIPattern().matcher(req).matches() ? QUERY_TYPE_HASH : req)
.map(req -> HashType.md5.getIRIPattern().matcher(req).matches() ? QUERY_TYPE_HASH : req)
.map(req -> HashType.sha256.getIRIPattern().matcher(req).matches() ? QUERY_TYPE_HASH : req)
.map(req -> Pattern.compile("^(10[.])([^/]+)/(.*)$").matcher(req).matches() ? QUERY_TYPE_DOI : req)
.map(req -> Pattern.compile("^http[s]{0,1}://[^ ]+").matcher(req).matches() ? QUERY_TYPE_URL : req)
.findFirst()
Expand Down
56 changes: 27 additions & 29 deletions preston-serve/src/main/resources/bio/guoda/preston/server/doi.rq
Original file line number Diff line number Diff line change
Expand Up @@ -10,39 +10,37 @@ SELECT DISTINCT
?seenAt
?provenanceId
?activity
WHERE
{
BIND(?_doi_iri as ?doi)
BIND(<hash://sha256/5b7fa37bf8b64e7c935c4ff3389e36f8dd162f0705410dd719fd089e1ea253cd> as ?provenanceId)

{
WHERE {
BIND(?_doi_iri as ?doi)
BIND(<hash://sha256/5b7fa37bf8b64e7c935c4ff3389e36f8dd162f0705410dd719fd089e1ea253cd> as ?provenanceId)
{
SELECT ?contentId ?seenAt ?archiveUrl ?uuid ?activity
WHERE {
graph ?g1 {
?uuid <http://www.w3.org/ns/prov#hadMember> ?archiveUrl .
?uuid <http://www.w3.org/1999/02/22-rdf-syntax-ns#seeAlso> ?_doi_iri .
?archiveUrl <http://purl.org/dc/elements/1.1/format> "application/dwca" .
WHERE {
graph ?g1 {
?uuid <http://www.w3.org/ns/prov#hadMember> ?archiveUrl .
?uuid <http://www.w3.org/1999/02/22-rdf-syntax-ns#seeAlso> ?_doi_iri .
?archiveUrl <http://purl.org/dc/elements/1.1/format> "application/dwca" .
}
graph ?activity {
?activity <http://www.w3.org/ns/prov#used> ?archiveUrl .
?activity <http://www.w3.org/ns/prov#generatedAtTime> ?seenAt .
?contentId <http://www.w3.org/ns/prov#qualifiedGeneration> ?activity .
}
}
}
graph ?activity {
?activity <http://www.w3.org/ns/prov#used> ?archiveUrl .
?activity <http://www.w3.org/ns/prov#generatedAtTime> ?seenAt .
?contentId <http://www.w3.org/ns/prov#qualifiedGeneration> ?activity .
UNION
{
SELECT ?contentId ?seenAt ?archiveUrl ?uuid ?activity
WHERE {
?uuid <http://www.w3.org/ns/prov#hadMember> ?archiveUrl .
?uuid <http://www.w3.org/1999/02/22-rdf-syntax-ns#seeAlso> ?_doi_iri .
?archiveUrl <http://purl.org/dc/elements/1.1/format> "application/dwca" .
?activity <http://www.w3.org/ns/prov#used> ?archiveUrl .
?activity <http://www.w3.org/ns/prov#generatedAtTime> ?seenAt .
?contentId <http://www.w3.org/ns/prov#qualifiedGeneration> ?activity .
}
}

}
}
UNION
{
SELECT ?contentId ?seenAt ?archiveUrl ?uuid ?activity
WHERE {
?uuid <http://www.w3.org/ns/prov#hadMember> ?archiveUrl .
?uuid <http://www.w3.org/1999/02/22-rdf-syntax-ns#seeAlso> ?_doi_iri .
?archiveUrl <http://purl.org/dc/elements/1.1/format> "application/dwca" .
?activity <http://www.w3.org/ns/prov#used> ?archiveUrl .
?activity <http://www.w3.org/ns/prov#generatedAtTime> ?seenAt .
?contentId <http://www.w3.org/ns/prov#qualifiedGeneration> ?activity .
}
}
# exclude blank nodes that represent unavailable or non-content
FILTER(!CONTAINS(str(?contentId), ".well-known/genid" ) )

Expand Down
49 changes: 0 additions & 49 deletions preston-serve/src/main/resources/bio/guoda/preston/server/hash.rq

This file was deleted.

10 changes: 5 additions & 5 deletions preston-serve/src/main/resources/bio/guoda/preston/server/url.rq
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ WHERE
WHERE {
graph ?g1 {
?uuid <http://www.w3.org/ns/prov#hadMember> ?_url_iri .
?uuid <http://www.w3.org/1999/02/22-rdf-syntax-ns#seeAlso> ?doi .
OPTIONAL { ?uuid <http://www.w3.org/1999/02/22-rdf-syntax-ns#seeAlso> ?doi . }
?_url_iri <http://purl.org/dc/elements/1.1/format> "application/dwca" .
}
graph ?activity {
Expand All @@ -36,11 +36,11 @@ UNION
SELECT ?contentId ?seenAt ?uuid ?activity
WHERE {
?uuid <http://www.w3.org/ns/prov#hadMember> ?_url_iri .
?uuid <http://www.w3.org/1999/02/22-rdf-syntax-ns#seeAlso> ?doi .
OPTIONAL { ?uuid <http://www.w3.org/1999/02/22-rdf-syntax-ns#seeAlso> ?doi . }
?_url_iri <http://purl.org/dc/elements/1.1/format> "application/dwca" .
?activity <http://www.w3.org/ns/prov#used> ?_url_iri .
?activity <http://www.w3.org/ns/prov#generatedAtTime> ?seenAt .
?contentId <http://www.w3.org/ns/prov#qualifiedGeneration> ?activity .
?activity <http://www.w3.org/ns/prov#used> ?_url_iri .
?activity <http://www.w3.org/ns/prov#generatedAtTime> ?seenAt .
?contentId <http://www.w3.org/ns/prov#qualifiedGeneration> ?activity .
}
}
# exclude blank nodes that represent unavailable or non-content
Expand Down
52 changes: 25 additions & 27 deletions preston-serve/src/main/resources/bio/guoda/preston/server/uuid.rq
Original file line number Diff line number Diff line change
Expand Up @@ -10,38 +10,36 @@ SELECT DISTINCT
?seenAt
?provenanceId
?activity
WHERE
{
WHERE {
BIND(?_uuid_iri as ?uuid)
BIND(<hash://sha256/5b7fa37bf8b64e7c935c4ff3389e36f8dd162f0705410dd719fd089e1ea253cd> as ?provenanceId)
{
{
SELECT ?contentId ?seenAt ?archiveUrl ?doi ?activity
WHERE {
graph ?g1 {
?_uuid_iri <http://www.w3.org/ns/prov#hadMember> ?archiveUrl .
?_uuid_iri <http://www.w3.org/1999/02/22-rdf-syntax-ns#seeAlso> ?doi .
?archiveUrl <http://purl.org/dc/elements/1.1/format> "application/dwca" .
}
graph ?activity {
?activity <http://www.w3.org/ns/prov#used> ?archiveUrl .
?activity <http://www.w3.org/ns/prov#generatedAtTime> ?seenAt .
?contentId <http://www.w3.org/ns/prov#qualifiedGeneration> ?activity .
WHERE {
graph ?g1 {
?_uuid_iri <http://www.w3.org/ns/prov#hadMember> ?archiveUrl .
OPTIONAL { ?_uuid_iri <http://www.w3.org/1999/02/22-rdf-syntax-ns#seeAlso> ?doi . }
?archiveUrl <http://purl.org/dc/elements/1.1/format> "application/dwca" .
}
graph ?activity {
?activity <http://www.w3.org/ns/prov#used> ?archiveUrl .
?activity <http://www.w3.org/ns/prov#generatedAtTime> ?seenAt .
?contentId <http://www.w3.org/ns/prov#qualifiedGeneration> ?activity .
}
}

}
}
UNION
{
}
UNION
{
SELECT ?contentId ?seenAt ?archiveUrl ?doi ?activity
WHERE {
?_uuid_iri <http://www.w3.org/ns/prov#hadMember> ?archiveUrl .
?_uuid_iri <http://www.w3.org/1999/02/22-rdf-syntax-ns#seeAlso> ?doi .
?archiveUrl <http://purl.org/dc/elements/1.1/format> "application/dwca" .
?activity <http://www.w3.org/ns/prov#used> ?archiveUrl .
?activity <http://www.w3.org/ns/prov#generatedAtTime> ?seenAt .
?contentId <http://www.w3.org/ns/prov#qualifiedGeneration> ?activity .
}
}
WHERE {
?_uuid_iri <http://www.w3.org/ns/prov#hadMember> ?archiveUrl .
OPTIONAL { ?_uuid_iri <http://www.w3.org/1999/02/22-rdf-syntax-ns#seeAlso> ?doi . }
?archiveUrl <http://purl.org/dc/elements/1.1/format> "application/dwca" .
?activity <http://www.w3.org/ns/prov#used> ?archiveUrl .
?activity <http://www.w3.org/ns/prov#generatedAtTime> ?seenAt .
?contentId <http://www.w3.org/ns/prov#qualifiedGeneration> ?activity .
}
}
# exclude blank nodes that represent unavailable or non-content
FILTER(!CONTAINS(str(?contentId), ".well-known/genid" ) )

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package bio.guoda.preston.server;

import bio.guoda.preston.RefNodeFactory;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.hamcrest.core.Is;
import org.junit.Test;

import java.io.IOException;
import java.net.URISyntaxException;
import java.util.Map;

import static bio.guoda.preston.server.RedirectingServlet.ACTIVITY;
import static bio.guoda.preston.server.RedirectingServlet.CONTENT_ID;
import static org.hamcrest.MatcherAssert.assertThat;

public class RedirectingServletIT {


private String sparqlEndpoint = "http://localhost:7878/query";

@Test
public void dealiasDOI() throws IOException, URISyntaxException {
Map<String, String> contentId = RedirectingServlet.findMostRecentContentId(
RefNodeFactory.toIRI("https://doi.org/10.15468/aomfnb"),
RedirectingServlet.QUERY_TYPE_DOI,
sparqlEndpoint
);
assertThat(contentId.get(CONTENT_ID), Is.is("hash://sha256/1e2b7436fce1848f41698e5a9c193f311abaf0ee051bec1a2e48b5106d29524d"));
assertThat(contentId.get(ACTIVITY), Is.is("urn:uuid:77f3faf7-acd2-4f14-9c0e-4e04ef5b63c7"));
}

@Test
public void dealiasUUIDGBIF() throws IOException, URISyntaxException {
Map<String, String> contentId = RedirectingServlet.findMostRecentContentId(
RefNodeFactory.toIRI("urn:uuid:4fa7b334-ce0d-4e88-aaae-2e0c138d049e"),
RedirectingServlet.QUERY_TYPE_UUID,
sparqlEndpoint);
assertThat(contentId.get(CONTENT_ID), Is.is("hash://sha256/1e2b7436fce1848f41698e5a9c193f311abaf0ee051bec1a2e48b5106d29524d"));

}

@Test
public void dealiasUUIDiDigBio() throws IOException, URISyntaxException {
Map<String, String> contentId = RedirectingServlet.findMostRecentContentId(
RefNodeFactory.toIRI("urn:uuid:65007e62-740c-4302-ba20-260fe68da291"),
RedirectingServlet.QUERY_TYPE_UUID,
sparqlEndpoint);
assertThat(contentId.get(CONTENT_ID), Is.is("hash://sha256/f5d8f67c1eca34cbba1abac12f353585c78bb053bc8ce7ee7e7a78846e1bfc4a"));

}

@Test
public void dealiasURL() throws URISyntaxException, IOException {
Map<String, String> contentId = RedirectingServlet.findMostRecentContentId(
RefNodeFactory.toIRI("https://hosted-datasets.gbif.org/eBird/2022-eBird-dwca-1.0.zip"),
RedirectingServlet.QUERY_TYPE_URL,
sparqlEndpoint);
assertThat(contentId.get(CONTENT_ID), Is.is("hash://sha256/1e2b7436fce1848f41698e5a9c193f311abaf0ee051bec1a2e48b5106d29524d"));
}

}
Original file line number Diff line number Diff line change
@@ -1,61 +1,18 @@
package bio.guoda.preston.server;

import bio.guoda.preston.RefNodeFactory;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.hamcrest.core.Is;
import org.junit.Test;

import java.io.IOException;
import java.net.URISyntaxException;
import java.util.Map;

import static bio.guoda.preston.server.RedirectingServlet.ACTIVITY;
import static bio.guoda.preston.server.RedirectingServlet.CONTENT_ID;
import static org.hamcrest.MatcherAssert.assertThat;

public class RedirectingServletTest {


@Test
public void dealiasDOI() throws IOException, URISyntaxException {
Map<String, String> contentId = RedirectingServlet.findMostRecentContentId(
RefNodeFactory.toIRI("https://doi.org/10.15468/aomfnb"),
RedirectingServlet.QUERY_TYPE_DOI, "https://lod.globalbioticinteractions.org/query"
);
assertThat(contentId.get(CONTENT_ID), Is.is("hash://sha256/1e2b7436fce1848f41698e5a9c193f311abaf0ee051bec1a2e48b5106d29524d"));
assertThat(contentId.get(ACTIVITY), Is.is("urn:uuid:77f3faf7-acd2-4f14-9c0e-4e04ef5b63c7"));
}

@Test
public void dealiasHash() throws IOException, URISyntaxException {
Map<String, String> contentId = RedirectingServlet.findMostRecentContentId(
RefNodeFactory.toIRI("hash://sha256/1e2b7436fce1848f41698e5a9c193f311abaf0ee051bec1a2e48b5106d29524d"),
RedirectingServlet.QUERY_TYPE_HASH,
"https://lod.globalbioticinteractions.org/query");
assertThat(contentId.get(CONTENT_ID), Is.is("hash://sha256/1e2b7436fce1848f41698e5a9c193f311abaf0ee051bec1a2e48b5106d29524d"));

}

@Test
public void dealiasUUID() throws IOException, URISyntaxException {
Map<String, String> contentId = RedirectingServlet.findMostRecentContentId(
RefNodeFactory.toIRI("urn:uuid:4fa7b334-ce0d-4e88-aaae-2e0c138d049e"),
RedirectingServlet.QUERY_TYPE_UUID,
"https://lod.globalbioticinteractions.org/query");
assertThat(contentId.get(CONTENT_ID), Is.is("hash://sha256/1e2b7436fce1848f41698e5a9c193f311abaf0ee051bec1a2e48b5106d29524d"));

}

@Test
public void dealiasURL() throws URISyntaxException, IOException {
Map<String, String> contentId = RedirectingServlet.findMostRecentContentId(
RefNodeFactory.toIRI("https://hosted-datasets.gbif.org/eBird/2022-eBird-dwca-1.0.zip"),
RedirectingServlet.QUERY_TYPE_URL,
"https://lod.globalbioticinteractions.org/query");
assertThat(contentId.get(CONTENT_ID), Is.is("hash://sha256/1e2b7436fce1848f41698e5a9c193f311abaf0ee051bec1a2e48b5106d29524d"));
}

@Test
public void parseResult() throws IOException {
JsonNode response = new ObjectMapper().readTree(getClass().getResourceAsStream("url-response.json"));
Expand Down Expand Up @@ -84,7 +41,7 @@ public void queryTypeUUID() {
@Test
public void queryTypeHash() {
String s = RedirectingServlet.queryTypeForRequestedId("hash://sha256/1e2b7436fce1848f41698e5a9c193f311abaf0ee051bec1a2e48b5106d29524d");
assertThat(s, Is.is(RedirectingServlet.QUERY_TYPE_HASH));
assertThat(s, Is.is("unknown"));
}

@Test
Expand Down

0 comments on commit 1c2d3b3

Please sign in to comment.