Skip to content

Commit

Permalink
use provenance logs to find head if no anchor is provided
Browse files Browse the repository at this point in the history
  • Loading branch information
Jorrit Poelen committed Aug 22, 2024
1 parent 087fa29 commit a19951a
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,14 @@ private static void findHead(Persisting persisting, AtomicReference<IRI> head, P
}
}

public static AtomicReference<IRI> findHeadOrThrow(Persisting persisting) {
public static IRI findHeadOrThrow(Persisting persisting) {
AtomicReference<IRI> head = findHead(persisting);

if (head.get() == null) {
throw new RuntimeException("Cannot find most recent version: no provenance logs found.");
}

return head;
return head.get();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;

public class BlobStoreUtil {
private static final Logger LOG = LoggerFactory.getLogger(BlobStoreUtil.class);
Expand Down Expand Up @@ -71,8 +72,9 @@ private static Map<String, String> buildIndexedBlobStore(Persisting persisting)
File tmpDir = persisting.getTmpDir();
IRI provenanceAnchor = persisting.getProvenanceAnchor();
if (CmdWithProvenance.PROVENANCE_ANCHOR_DEFAULT.equals(provenanceAnchor)) {
throw new IllegalArgumentException("--anchor provenance anchor not set; please set provenance anchor");
provenanceAnchor = AnchorUtil.findHeadOrThrow(persisting);
}

// indexing
DBMaker maker = newTmpFileDB(tmpDir);
Map<String, String> treeMap = maker
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,34 @@ public void indexedBlobStore() throws IOException, URISyntaxException {
assertThat(IOUtils.toString(inputStream, StandardCharsets.UTF_8), Is.is("foo"));
}

@Test
public void indexedBlobStoreWithoutProvenanceAnchor() throws IOException, URISyntaxException {
File dataDir = getDataDir();

Persisting persisting = getPersisting(dataDir);
persisting.setProvenanceArchor(CmdWithProvenance.PROVENANCE_ANCHOR_DEFAULT);
BlobStoreReadOnly blobStoreIndexed = BlobStoreUtil.createIndexedBlobStoreFor(getBlobStore(), persisting);

InputStream inputStream = blobStoreIndexed.get(RefNodeFactory.toIRI("https://example.org"));

assertThat(IOUtils.toString(inputStream, StandardCharsets.UTF_8), Is.is("foo"));
}

@Test(expected = RuntimeException.class)
public void indexedBlobStoreWithoutProvenanceAnchorNoProvenanceIndex() throws IOException, URISyntaxException {
File dataDir = getDataDir("index-data-no-provenance/d3/b0/d3b07384d113edec49eaa6238ad5ff00");

Persisting persisting = getPersisting(dataDir);
persisting.setProvenanceArchor(CmdWithProvenance.PROVENANCE_ANCHOR_DEFAULT);

try {
BlobStoreUtil.createIndexedBlobStoreFor(getBlobStore(), persisting);
} catch(RuntimeException ex) {
assertThat(ex.getMessage(), Is.is("Cannot find most recent version: no provenance logs found."));
throw ex;
}
}

@Test
public void resolvingBlobStoreWithProvenanceAnchor() throws IOException, URISyntaxException {
File dataDir = getDataDir();
Expand Down Expand Up @@ -87,7 +115,12 @@ private Persisting getPersisting(File dataDir) throws IOException {


private File getDataDir() throws URISyntaxException {
URL resource = getClass().getResource("index-data/27/f5/27f552c25bc733d05a5cc67e9ba63850");
String s = "index-data/27/f5/27f552c25bc733d05a5cc67e9ba63850";
return getDataDir(s);
}

private File getDataDir(String s) throws URISyntaxException {
URL resource = getClass().getResource(s);
File root = new File(resource.toURI());
return root.getParentFile().getParentFile().getParentFile();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
foo
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<https://preston.guoda.bio> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/ns/prov#SoftwareAgent> <urn:uuid:4d4df7fa-e890-458c-ae52-c58926bd5b67> .
<https://preston.guoda.bio> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/ns/prov#Agent> <urn:uuid:4d4df7fa-e890-458c-ae52-c58926bd5b67> .
<https://preston.guoda.bio> <http://purl.org/dc/terms/description> "Preston is a software program that finds, archives and provides access to biodiversity datasets."@en <urn:uuid:4d4df7fa-e890-458c-ae52-c58926bd5b67> .
<urn:uuid:4d4df7fa-e890-458c-ae52-c58926bd5b67> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/ns/prov#Activity> <urn:uuid:4d4df7fa-e890-458c-ae52-c58926bd5b67> .
<urn:uuid:4d4df7fa-e890-458c-ae52-c58926bd5b67> <http://purl.org/dc/terms/description> "A crawl event that discovers biodiversity archives."@en <urn:uuid:4d4df7fa-e890-458c-ae52-c58926bd5b67> .
<urn:uuid:4d4df7fa-e890-458c-ae52-c58926bd5b67> <http://www.w3.org/ns/prov#startedAtTime> "2024-08-22T13:19:17.894Z"^^<http://www.w3.org/2001/XMLSchema#dateTime> <urn:uuid:4d4df7fa-e890-458c-ae52-c58926bd5b67> .
<urn:uuid:4d4df7fa-e890-458c-ae52-c58926bd5b67> <http://www.w3.org/ns/prov#wasStartedBy> <https://preston.guoda.bio> <urn:uuid:4d4df7fa-e890-458c-ae52-c58926bd5b67> .
<https://doi.org/10.5281/zenodo.1410543> <http://www.w3.org/ns/prov#usedBy> <urn:uuid:4d4df7fa-e890-458c-ae52-c58926bd5b67> <urn:uuid:4d4df7fa-e890-458c-ae52-c58926bd5b67> .
<https://doi.org/10.5281/zenodo.1410543> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/dc/dcmitype/Software> <urn:uuid:4d4df7fa-e890-458c-ae52-c58926bd5b67> .
<https://doi.org/10.5281/zenodo.1410543> <http://purl.org/dc/terms/bibliographicCitation> "Jorrit Poelen, Icaro Alzuru, & Michael Elliott. 2018-2024. Preston: a biodiversity dataset tracker (Version 0.8.7-SNAPSHOT) [Software]. Zenodo. https://doi.org/10.5281/zenodo.1410543"@en <urn:uuid:4d4df7fa-e890-458c-ae52-c58926bd5b67> .
<urn:uuid:0659a54f-b713-4f86-a917-5be166a14110> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/ns/prov#Entity> <urn:uuid:4d4df7fa-e890-458c-ae52-c58926bd5b67> .
<urn:uuid:0659a54f-b713-4f86-a917-5be166a14110> <http://purl.org/dc/terms/description> "A biodiversity dataset graph archive."@en <urn:uuid:4d4df7fa-e890-458c-ae52-c58926bd5b67> .
<hash://md5/d3b07384d113edec49eaa6238ad5ff00> <http://www.w3.org/ns/prov#wasGeneratedBy> <urn:uuid:8bcd4aeb-af38-4840-acf3-a829668c0757> <urn:uuid:8bcd4aeb-af38-4840-acf3-a829668c0757> .
<hash://md5/d3b07384d113edec49eaa6238ad5ff00> <http://www.w3.org/ns/prov#qualifiedGeneration> <urn:uuid:8bcd4aeb-af38-4840-acf3-a829668c0757> <urn:uuid:8bcd4aeb-af38-4840-acf3-a829668c0757> .
<urn:uuid:8bcd4aeb-af38-4840-acf3-a829668c0757> <http://www.w3.org/ns/prov#generatedAtTime> "2024-08-22T13:19:17.988Z"^^<http://www.w3.org/2001/XMLSchema#dateTime> <urn:uuid:8bcd4aeb-af38-4840-acf3-a829668c0757> .
<urn:uuid:8bcd4aeb-af38-4840-acf3-a829668c0757> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/ns/prov#Generation> <urn:uuid:8bcd4aeb-af38-4840-acf3-a829668c0757> .
<urn:uuid:8bcd4aeb-af38-4840-acf3-a829668c0757> <http://www.w3.org/ns/prov#wasInformedBy> <urn:uuid:4d4df7fa-e890-458c-ae52-c58926bd5b67> <urn:uuid:8bcd4aeb-af38-4840-acf3-a829668c0757> .
<urn:uuid:8bcd4aeb-af38-4840-acf3-a829668c0757> <http://www.w3.org/ns/prov#used> <urn:uuid:c5b68b8b-ad93-4de7-85e3-4f325b24712c> <urn:uuid:8bcd4aeb-af38-4840-acf3-a829668c0757> .
<urn:uuid:c5b68b8b-ad93-4de7-85e3-4f325b24712c> <http://purl.org/pav/hasVersion> <hash://md5/d3b07384d113edec49eaa6238ad5ff00> <urn:uuid:8bcd4aeb-af38-4840-acf3-a829668c0757> .
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<https://preston.guoda.bio> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/ns/prov#SoftwareAgent> <urn:uuid:cfcbb9c0-5adf-48a1-84b6-18ed348d89f0> .
<https://preston.guoda.bio> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/ns/prov#Agent> <urn:uuid:cfcbb9c0-5adf-48a1-84b6-18ed348d89f0> .
<https://preston.guoda.bio> <http://purl.org/dc/terms/description> "Preston is a software program that finds, archives and provides access to biodiversity datasets."@en <urn:uuid:cfcbb9c0-5adf-48a1-84b6-18ed348d89f0> .
<urn:uuid:cfcbb9c0-5adf-48a1-84b6-18ed348d89f0> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/ns/prov#Activity> <urn:uuid:cfcbb9c0-5adf-48a1-84b6-18ed348d89f0> .
<urn:uuid:cfcbb9c0-5adf-48a1-84b6-18ed348d89f0> <http://purl.org/dc/terms/description> "An activity that assigns an alias to a content hash"@en <urn:uuid:cfcbb9c0-5adf-48a1-84b6-18ed348d89f0> .
<urn:uuid:cfcbb9c0-5adf-48a1-84b6-18ed348d89f0> <http://www.w3.org/ns/prov#startedAtTime> "2024-08-22T13:19:49.902Z"^^<http://www.w3.org/2001/XMLSchema#dateTime> <urn:uuid:cfcbb9c0-5adf-48a1-84b6-18ed348d89f0> .
<urn:uuid:cfcbb9c0-5adf-48a1-84b6-18ed348d89f0> <http://www.w3.org/ns/prov#wasStartedBy> <https://preston.guoda.bio> <urn:uuid:cfcbb9c0-5adf-48a1-84b6-18ed348d89f0> .
<https://doi.org/10.5281/zenodo.1410543> <http://www.w3.org/ns/prov#usedBy> <urn:uuid:cfcbb9c0-5adf-48a1-84b6-18ed348d89f0> <urn:uuid:cfcbb9c0-5adf-48a1-84b6-18ed348d89f0> .
<https://doi.org/10.5281/zenodo.1410543> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/dc/dcmitype/Software> <urn:uuid:cfcbb9c0-5adf-48a1-84b6-18ed348d89f0> .
<https://doi.org/10.5281/zenodo.1410543> <http://purl.org/dc/terms/bibliographicCitation> "Jorrit Poelen, Icaro Alzuru, & Michael Elliott. 2018-2024. Preston: a biodiversity dataset tracker (Version 0.8.7-SNAPSHOT) [Software]. Zenodo. https://doi.org/10.5281/zenodo.1410543"@en <urn:uuid:cfcbb9c0-5adf-48a1-84b6-18ed348d89f0> .
<urn:uuid:0659a54f-b713-4f86-a917-5be166a14110> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/ns/prov#Entity> <urn:uuid:cfcbb9c0-5adf-48a1-84b6-18ed348d89f0> .
<urn:uuid:0659a54f-b713-4f86-a917-5be166a14110> <http://purl.org/dc/terms/description> "A biodiversity dataset graph archive."@en <urn:uuid:cfcbb9c0-5adf-48a1-84b6-18ed348d89f0> .
<hash://md5/d8ae3c306edbac7a0d598ad0bc711a9a> <http://www.w3.org/ns/prov#usedBy> <urn:uuid:cfcbb9c0-5adf-48a1-84b6-18ed348d89f0> <urn:uuid:cfcbb9c0-5adf-48a1-84b6-18ed348d89f0> .
<https://example.org> <http://purl.org/pav/hasVersion> <hash://md5/d3b07384d113edec49eaa6238ad5ff00> <urn:uuid:cfcbb9c0-5adf-48a1-84b6-18ed348d89f0> .

0 comments on commit a19951a

Please sign in to comment.