-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add test for indexed blob store; related to #199
- Loading branch information
Jorrit Poelen
committed
Aug 22, 2024
1 parent
ca22e7c
commit c54a767
Showing
22 changed files
with
211 additions
and
147 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
132 changes: 132 additions & 0 deletions
132
preston-cmd/src/main/java/bio/guoda/preston/cmd/BlobStoreUtil.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
package bio.guoda.preston.cmd; | ||
|
||
import bio.guoda.preston.RefNodeConstants; | ||
import bio.guoda.preston.RefNodeFactory; | ||
import bio.guoda.preston.process.EmittingStreamFactory; | ||
import bio.guoda.preston.process.EmittingStreamOfAnyQuad; | ||
import bio.guoda.preston.process.ParsingEmitter; | ||
import bio.guoda.preston.process.ProcessorState; | ||
import bio.guoda.preston.process.StatementEmitter; | ||
import bio.guoda.preston.process.StatementsListener; | ||
import bio.guoda.preston.process.StatementsListenerAdapter; | ||
import bio.guoda.preston.store.AliasDereferencer; | ||
import bio.guoda.preston.store.BlobStoreReadOnly; | ||
import bio.guoda.preston.store.ContentHashDereferencer; | ||
import bio.guoda.preston.store.Dereferencer; | ||
import bio.guoda.preston.store.HashKeyUtil; | ||
import org.apache.commons.lang3.StringUtils; | ||
import org.apache.commons.lang3.time.StopWatch; | ||
import org.apache.commons.rdf.api.IRI; | ||
import org.apache.commons.rdf.api.Quad; | ||
import org.mapdb.DBMaker; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import java.io.File; | ||
import java.io.IOError; | ||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.util.Map; | ||
import java.util.concurrent.TimeUnit; | ||
import java.util.concurrent.atomic.AtomicLong; | ||
|
||
public class BlobStoreUtil { | ||
private static final Logger LOG = LoggerFactory.getLogger(BlobStoreUtil.class); | ||
|
||
public static BlobStoreReadOnly createIndexedBlobStoreFor(BlobStoreReadOnly blobStoreReadOnly, Persisting persisting) { | ||
Map<String, String> treeMap = buildIndexedBlobStore(persisting); | ||
|
||
return new BlobStoreReadOnly() { | ||
|
||
@Override | ||
public InputStream get(IRI uri) throws IOException { | ||
IRI iriForLookup = null; | ||
if (HashKeyUtil.isValidHashKey(uri)) { | ||
iriForLookup = uri; | ||
} else { | ||
String indexedVersion = treeMap.get(uri.getIRIString()); | ||
iriForLookup = StringUtils.isBlank(indexedVersion) ? uri : RefNodeFactory.toIRI(indexedVersion); | ||
} | ||
|
||
if (iriForLookup == null) { | ||
throw new IOException("failed to find content associated to [" + uri + "] in index."); | ||
} | ||
|
||
return blobStoreReadOnly.get(iriForLookup); | ||
} | ||
}; | ||
} | ||
|
||
public static BlobStoreReadOnly createResolvingBlobStoreFor(Dereferencer<InputStream> blobStore, Persisting persisting) { | ||
return new AliasDereferencer( | ||
new ContentHashDereferencer(blobStore), | ||
persisting, | ||
persisting.getProvenanceTracer() | ||
); | ||
} | ||
|
||
|
||
private static Map<String, String> buildIndexedBlobStore(Persisting persisting) { | ||
|
||
File tmpDir = persisting.getTmpDir(); | ||
IRI provenanceAnchor = persisting.getProvenanceAnchor(); | ||
if (CmdWithProvenance.PROVENANCE_ANCHOR_DEFAULT.equals(provenanceAnchor)) { | ||
throw new IllegalArgumentException("--anchor provenance anchor not set; please set provenance anchor"); | ||
} | ||
// indexing | ||
DBMaker maker = newTmpFileDB(tmpDir); | ||
Map<String, String> treeMap = maker | ||
.deleteFilesAfterClose() | ||
.closeOnJvmShutdown() | ||
.transactionDisable() | ||
.make() | ||
.createTreeMap("zotero-stream") | ||
.make(); | ||
|
||
|
||
StopWatch stopWatch = new StopWatch(); | ||
stopWatch.start(); | ||
AtomicLong index = new AtomicLong(0); | ||
LOG.info("version index for [" + provenanceAnchor + "] building..."); | ||
|
||
StatementsListener listener = new StatementsListenerAdapter() { | ||
@Override | ||
public void on(Quad statement) { | ||
if (RefNodeConstants.HAS_VERSION.equals(statement.getPredicate()) | ||
&& !RefNodeFactory.isBlankOrSkolemizedBlank(statement.getObject())) { | ||
if (statement.getSubject() instanceof IRI && statement.getObject() instanceof IRI) { | ||
IRI version = (IRI) statement.getObject(); | ||
if (HashKeyUtil.isValidHashKey(version)) { | ||
index.incrementAndGet(); | ||
String uri = ((IRI) statement.getSubject()).getIRIString(); | ||
String indexedVersion = version.getIRIString(); | ||
treeMap.putIfAbsent(uri, indexedVersion); | ||
} | ||
} | ||
} | ||
|
||
} | ||
}; | ||
ReplayUtil.replay(listener, persisting, new EmittingStreamFactory() { | ||
@Override | ||
public ParsingEmitter createEmitter(StatementEmitter emitter, ProcessorState context) { | ||
return new EmittingStreamOfAnyQuad(emitter, context); | ||
} | ||
}); | ||
stopWatch.stop(); | ||
LOG.info("version index for [" + provenanceAnchor + "] with [" + index.get() + "] versions built in [" + stopWatch.getTime(TimeUnit.SECONDS) + "] s"); | ||
|
||
return treeMap; | ||
} | ||
|
||
private static DBMaker newTmpFileDB(File tmpDir) { | ||
try { | ||
File db = File.createTempFile("mapdb-temp", "db", tmpDir); | ||
return DBMaker.newFileDB(db); | ||
} catch (IOException e) { | ||
throw new IOError(new IOException("failed to create tmpFile in [" + tmpDir.getAbsolutePath() + "]", e)); | ||
} | ||
|
||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
56 changes: 56 additions & 0 deletions
56
preston-cmd/src/test/java/bio/guoda/preston/cmd/BlobStoreUtilTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
package bio.guoda.preston.cmd; | ||
|
||
import bio.guoda.preston.HashType; | ||
import bio.guoda.preston.Hasher; | ||
import bio.guoda.preston.RefNodeFactory; | ||
import bio.guoda.preston.store.BlobStoreReadOnly; | ||
import org.apache.commons.io.IOUtils; | ||
import org.apache.commons.rdf.api.IRI; | ||
import org.hamcrest.core.Is; | ||
import org.junit.Rule; | ||
import org.junit.Test; | ||
import org.junit.rules.TemporaryFolder; | ||
|
||
import java.io.File; | ||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.net.URISyntaxException; | ||
import java.net.URL; | ||
import java.nio.charset.StandardCharsets; | ||
|
||
import static org.hamcrest.MatcherAssert.assertThat; | ||
|
||
public class BlobStoreUtilTest { | ||
|
||
@Rule | ||
public TemporaryFolder folder = new TemporaryFolder(); | ||
|
||
@Test | ||
public void blobStore() throws IOException, URISyntaxException { | ||
URL resource = getClass().getResource("index/data/27/f5/27f552c25bc733d05a5cc67e9ba63850"); | ||
File root = new File(resource.toURI()); | ||
File dataDir = root.getParentFile().getParentFile().getParentFile(); | ||
|
||
Persisting persisting = new Persisting(); | ||
persisting.setHashType(HashType.md5); | ||
|
||
persisting.setLocalDataDir(dataDir.getAbsolutePath()); | ||
persisting.setLocalTmpDir(folder.newFolder("tmp").getAbsolutePath()); | ||
persisting.setProvenanceArchor(RefNodeFactory.toIRI("hash://md5/ec998a9c63a64ac7bfef04c91ee84f16")); | ||
BlobStoreReadOnly blobStoreIndexed = BlobStoreUtil.createIndexedBlobStoreFor(new BlobStoreReadOnly() { | ||
@Override | ||
public InputStream get(IRI uri) throws IOException { | ||
IRI iri = Hasher.calcHashIRI("foo\n", HashType.md5); | ||
if (!iri.equals(uri)) { | ||
throw new IOException("kaboom!"); | ||
} | ||
return IOUtils.toInputStream("foo", StandardCharsets.UTF_8); | ||
} | ||
}, persisting); | ||
|
||
InputStream inputStream = blobStoreIndexed.get(RefNodeFactory.toIRI("https://example.org")); | ||
|
||
assertThat(IOUtils.toString(inputStream, StandardCharsets.UTF_8), Is.is("foo")); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.