Skip to content

Commit

Permalink
add index search support for EML files in addition to DwC Archives; r…
Browse files Browse the repository at this point in the history
…elated to #199
  • Loading branch information
Jorrit Poelen committed Dec 29, 2023
1 parent a4cac2f commit 7247545
Show file tree
Hide file tree
Showing 11 changed files with 146 additions and 43 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ public class KeyTo1LevelZenodoBucket implements KeyToPath {

private final KeyToPath proxied;

private final AtomicReference<URI> lastZenodoBucket = new AtomicReference<URI>();
private final AtomicReference<URI> lastZenodoBucket = new AtomicReference<>();

public KeyTo1LevelZenodoBucket(KeyToPath keyToPath) {
this.proxied = keyToPath;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import java.util.TreeMap;

public class BadgeServlet extends RedirectingServlet {

Expand All @@ -29,21 +30,36 @@ protected void handleRequest(HttpServletResponse response,
String sparqlEndpoint,
String queryType,
IRI requestedIdIRI,
int responseHttpStatus) throws IOException, ServletException {
int responseHttpStatus,
String resourceType) throws IOException, ServletException {
try {
Map<String, String> provInfo = ProvUtil.findMostRecentContentId(
requestedIdIRI,
queryType,
sparqlEndpoint);
Map<String, String> provInfo = ProvUtil
.findMostRecentContentId(
requestedIdIRI,
queryType,
sparqlEndpoint,
resourceType
);
final TreeMap<String, String> labelMap = new TreeMap<String, String>() {{
put(MimeTypes.MIME_TYPE_DWCA, "DwC Archive");
put(MimeTypes.MIME_TYPE_EML, "EML File");
}};
String typeLabel = labelMap.getOrDefault(resourceType, "DwC Archive");
if (isOfKnownOrigin(provInfo)) {
URI uri = populateResponseHeader(response, getResolverEndpoint(), provInfo);
renderTemplate(response, uri, "origin-known.svg");
renderTemplate(
response,
uri,
"origin-known.svg",
typeLabel
);
log("found origin of [" + requestedIdIRI.getIRIString() + "]");
} else {
renderTemplate(
response,
URI.create(getResolverEndpoint() + requestedIdIRI.getIRIString()),
"origin-unknown.svg"
"origin-unknown.svg",
typeLabel
);
response.setStatus(HttpServletResponse.SC_NOT_FOUND);
log("did not find origin of [" + requestedIdIRI.getIRIString() + "]");
Expand All @@ -65,11 +81,13 @@ protected String getPrefix() {
}


private void renderTemplate(HttpServletResponse response, URI uri, String templateResource) throws IOException {
private void renderTemplate(HttpServletResponse response, URI uri, String templateResource, String typeLabel) throws IOException {
String badgeTemplate = IOUtils.toString(getClass().getResourceAsStream(templateResource), StandardCharsets.UTF_8);
String badge = StringUtils.replace(badgeTemplate, "{{REDIRECT_URL}}", uri.toString());
String badge = StringUtils
.replace(badgeTemplate, "{{REDIRECT_URL}}", uri.toString())
.replace("{{TYPE}}", typeLabel);

try(InputStream inputStream = IOUtils.toInputStream(badge, StandardCharsets.UTF_8)) {
try (InputStream inputStream = IOUtils.toInputStream(badge, StandardCharsets.UTF_8)) {
IOUtils.copy(inputStream, response.getOutputStream());
response.setHeader(HttpHeaders.CONTENT_TYPE, "image/svg+xml");
response.setStatus(HttpServletResponse.SC_OK);
Expand Down
16 changes: 11 additions & 5 deletions preston-serve/src/main/java/bio/guoda/preston/server/ProvUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,23 @@ public class ProvUtil {
public static final String QUERY_TYPE_DOI = "doi";
public static final List<String> QUERIES_SUPPORTED = Arrays.asList(QUERY_TYPE_DOI, QUERY_TYPE_UUID, QUERY_TYPE_URL, QUERY_TYPE_CONTENT_ID);

public static Map<String, String> findMostRecentContentId(IRI iri, String paramName, String sparqlEndpoint) throws IOException, URISyntaxException {
String response = findProvenance(iri, paramName, sparqlEndpoint);
public static Map<String, String> findMostRecentContentId(IRI iri, String paramName, String sparqlEndpoint, String resourceType) throws IOException, URISyntaxException {
String response = findProvenance(iri, paramName, sparqlEndpoint, resourceType);
return extractProvenanceInfo(response);
}

protected static String findProvenance(IRI iri, String paramName, String sparqlEndpoint) throws IOException, URISyntaxException {
InputStream resourceAsStream = RedirectingServlet.class.getResourceAsStream(paramName + ".rq");
protected static String findProvenance(IRI iri, String paramName, String sparqlEndpoint, String resourceType) throws IOException, URISyntaxException {
String queryTemplateName = paramName + ".rq";
InputStream resourceAsStream = RedirectingServlet.class.getResourceAsStream(queryTemplateName);

if (resourceAsStream == null) {
throw new IOException("failed to location query template [" + queryTemplateName + "]");
}
String queryTemplate = IOUtils.toString(resourceAsStream, StandardCharsets.UTF_8);

String queryString = StringUtils.replace(queryTemplate, "?_" + paramName + "_iri", iri.toString());
String queryString = StringUtils
.replace(queryTemplate, "?_" + paramName + "_iri", iri.toString())
.replace("?_type", "\"" + resourceType + "\"");

URI query = new URI("https", "example.org", "/query", "query=" + queryString, null);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.rdf.api.IRI;
import org.apache.http.HttpHeaders;
import org.apache.http.NameValuePair;
import org.apache.http.client.utils.URLEncodedUtils;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
Expand All @@ -14,6 +16,7 @@
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
Expand All @@ -27,6 +30,7 @@ public class RedirectingServlet extends HttpServlet {
public static final String SEEN_AT = "seenAt";
public static final String PROVENANCE_ID = "provenanceId";
public static final String ARCHIVE_URL = "archiveUrl";
public static final String CONTENT_TYPE = "contentType";
public static final String UUID = "uuid";
public static final String DOI = "doi";
public static final String CONTENT_ID = "contentId";
Expand Down Expand Up @@ -68,11 +72,21 @@ protected void doGet(
sparqlEndpoint,
queryType,
requestedIdIRI,
redirectOnGetRequest(request));
redirectOnGetRequest(request),
getResourceType(request.getRequestURI()));

}
}

private String getResourceType(String requestURI) {
List<NameValuePair> parse = URLEncodedUtils.parse(requestURI, StandardCharsets.UTF_8);
return parse
.stream()
.filter(p -> StringUtils.equals(p.getName(), "type"))
.findFirst().map(NameValuePair::getValue)
.orElse(MimeTypes.MIME_TYPE_DWCA);
}

protected String getPrefix() {
return "/";
}
Expand All @@ -92,12 +106,13 @@ protected void handleRequest(HttpServletResponse response,
String sparqlEndpoint,
String queryType,
IRI requestedIdIRI,
int responseHttpStatus) throws IOException, ServletException {
int responseHttpStatus,
String resourceType) throws IOException, ServletException {
try {
Map<String, String> provInfo = ProvUtil.findMostRecentContentId(
requestedIdIRI,
queryType,
sparqlEndpoint);
sparqlEndpoint, resourceType);
if (isOfKnownOrigin(provInfo)) {
populateResponseHeader(response, resolverEndpoint, provInfo);
response.setStatus(responseHttpStatus);
Expand All @@ -120,7 +135,7 @@ protected URI populateResponseHeader(HttpServletResponse response, String resolv
String contentId = provInfo.get(CONTENT_ID);
URI uri = getResolverURI(resolverEndpoint, contentId);
response.setHeader(HttpHeaders.LOCATION, uri.toString());
response.setHeader(HttpHeaders.CONTENT_TYPE, MimeTypes.MIME_TYPE_DWCA);
response.setHeader(HttpHeaders.CONTENT_TYPE, provInfo.get(CONTENT_TYPE));
response.setHeader(HttpHeaders.ETAG, contentId);
response.setHeader(HttpHeaders.CONTENT_LOCATION, provInfo.get(ARCHIVE_URL));
List<String> influencedBy = new ArrayList<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ SELECT DISTINCT
?uuid
?doi
?contentId
?contentType
?archiveUrl
?seenAt
?provenanceId
Expand All @@ -20,6 +21,7 @@ SELECT DISTINCT
?activity
WHERE {
BIND(?_contentId_iri as ?contentId)
BIND(?_type as ?contentType)
BIND(<hash://sha256/5b7fa37bf8b64e7c935c4ff3389e36f8dd162f0705410dd719fd089e1ea253cd> as ?provenanceId)
{
SELECT ?uuid ?seenAt ?archiveUrl ?doi ?activity
Expand All @@ -37,7 +39,7 @@ SELECT DISTINCT
graph ?g1 {
?uuid <http://www.w3.org/ns/prov#hadMember> ?archiveUrl .
?uuid <http://www.w3.org/1999/02/22-rdf-syntax-ns#seeAlso> ?doi .
?archiveUrl <http://purl.org/dc/elements/1.1/format> "application/dwca" .
?archiveUrl <http://purl.org/dc/elements/1.1/format> ?_type .
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,22 @@ SELECT DISTINCT
?doi
?uuid
?contentId
?archiveUrl
?contentType
?archiveUrl
?seenAt
?provenanceId
?activity
WHERE {
BIND(?_doi_iri as ?doi)
BIND(?_type as ?contentType)
BIND(<hash://sha256/5b7fa37bf8b64e7c935c4ff3389e36f8dd162f0705410dd719fd089e1ea253cd> as ?provenanceId)
{
SELECT ?contentId ?seenAt ?archiveUrl ?uuid ?activity
WHERE {
graph ?g1 {
?uuid <http://www.w3.org/ns/prov#hadMember> ?archiveUrl .
?uuid <http://www.w3.org/1999/02/22-rdf-syntax-ns#seeAlso> ?_doi_iri .
?archiveUrl <http://purl.org/dc/elements/1.1/format> "application/dwca" .
?archiveUrl <http://purl.org/dc/elements/1.1/format> ?_type .
}
graph ?activity {
?activity <http://www.w3.org/ns/prov#used> ?archiveUrl .
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit 7247545

Please sign in to comment.