Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Write payload manifest checksums to tag manifests #15

Merged
merged 14 commits into from
Apr 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 26 additions & 8 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,14 +1,32 @@
language: java
sudo: false
jdk: openjdk8
jobs:
include:
- os: linux
jdk: openjdk8
language: java
- os: windows
language: shell

env:
- MAVEN_VERSION=3.6.3 JAVA_VERSION=8.242.8.1

before_install:
- "echo $JAVA_OPTS"
- if [ "$TRAVIS_OS_NAME" = "windows" ]; then
echo "Installing java ${JAVA_VERSION}";
choco install openjdk8 --version ${JAVA_VERSION};
echo "Installing maven ${MAVEN_VERSION}";
choco install maven --version ${MAVEN_VERSION};
export JAVA_HOME="/c/Program Files/OpenJDK/openjdk-8u242-b08";
export MAVEN_HOME="/c/ProgramData/chocolatey/lib/maven/apache-maven-${MAVEN_VERSION}/bin";
export PATH="${PATH}:${JAVA_HOME}/bin:${MAVEN_HOME}";
fi
- "echo ${PATH}"
- "mvn -N io.takari:maven:0.7.7:wrapper -Dmaven=${MAVEN_VERSION}"


script:
- mvn install -B -V
- mvn javadoc:jar
- mvn javadoc:test-aggregate
- ./mvnw install -B -V
- ./mvnw javadoc:jar
- ./mvnw javadoc:test-aggregate

after_success:
- bash <(curl -s https://codecov.io/bash)
- if [ "$TRAVIS_OS_NAME" = "linux" ]; then bash <(curl -s https://codecov.io/bash); fi
103 changes: 53 additions & 50 deletions src/main/java/org/duraspace/bagit/BagWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,12 @@
*/
package org.duraspace.bagit;

import static org.duraspace.bagit.BagItDigest.MD5;
import static org.duraspace.bagit.BagItDigest.SHA1;
import static org.duraspace.bagit.BagItDigest.SHA256;
import static org.duraspace.bagit.BagItDigest.SHA512;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.nio.file.Files;
import java.nio.file.Path;
import java.security.DigestOutputStream;
import java.security.MessageDigest;
import java.util.HashMap;
import java.util.Map;
Expand All @@ -38,6 +31,12 @@ public class BagWriter {
private Map<BagItDigest, Map<File, String>> tagFileRegistry;
private Map<String, Map<String, String>> tagRegistry;

/**
* This map provides a way to retrieve all ongoing MessageDigests so that multiple checksums
* can easily be run and retrieved
*/
private Map<BagItDigest, DigestOutputStream> activeStreams;

/**
* Version of the BagIt specification implemented
*/
Expand All @@ -64,6 +63,8 @@ public BagWriter(final File bagDir, final Set<BagItDigest> algorithms) {
bagitValues.put("BagIt-Version", BAGIT_VERSION);
bagitValues.put("Tag-File-Character-Encoding", "UTF-8");
tagRegistry.put("bagit.txt", bagitValues);

activeStreams = new HashMap<>();
}

/**
Expand Down Expand Up @@ -109,15 +110,15 @@ public Map<String, String> getTags(final String key) {
* @throws IOException when an I/O error occurs
*/
public void write() throws IOException {
writeManifests("manifest", payloadRegistry);
writeManifests("manifest", payloadRegistry, true);
for (String tagFile : tagRegistry.keySet()) {
writeTagFile(tagFile);
}
writeManifests("tagmanifest", tagFileRegistry);
writeManifests("tagmanifest", tagFileRegistry, false);
}

private void writeManifests(final String prefix, final Map<BagItDigest, Map<File, String>> registry)
throws IOException {
private void writeManifests(final String prefix, final Map<BagItDigest, Map<File, String>> registry,
final boolean registerToTags) throws IOException {
final String delimiter = " ";
final char backslash = '\\';
final char bagitSeparator = '/';
Expand All @@ -127,12 +128,21 @@ private void writeManifests(final String prefix, final Map<BagItDigest, Map<File
final Map<File, String> filemap = registry.get(algorithm);
if (filemap != null) {
final File f = new File(bagDir, prefix + "-" + algorithm.bagitName() + ".txt");
try (PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(f)))) {
try (OutputStream out = streamFor(f.toPath())) {
for (final File payload : filemap.keySet()) {
// replace all occurrences of backslashes, which are not allowed per the bagit spec
final String relative = bag.relativize(payload.toPath()).toString()
.replace(backslash, bagitSeparator);
out.println(filemap.get(payload) + delimiter + relative);
final String line = filemap.get(payload) + delimiter + relative;
out.write(line.getBytes());
out.write("\n".getBytes());

if (registerToTags) {
for (Map.Entry<BagItDigest, DigestOutputStream> entry : activeStreams.entrySet()) {
addTagChecksum(entry.getKey(), f, entry.getValue().getMessageDigest());
}
}
activeStreams.clear();
}
}
}
Expand All @@ -144,48 +154,41 @@ private void writeTagFile(final String key) throws IOException {
if (values != null) {
final File f = new File(bagDir, key);

MessageDigest md5 = null;
MessageDigest sha1 = null;
MessageDigest sha256 = null;
MessageDigest sha512 = null;
if (algorithms.contains(MD5)) {
md5 = MD5.messageDigest();
}
if (algorithms.contains(SHA1)) {
sha1 = SHA1.messageDigest();
}
if (algorithms.contains(SHA256)) {
sha256 = SHA256.messageDigest();
}
if (algorithms.contains(SHA512)) {
sha512 = SHA512.messageDigest();
}

try (OutputStream out = new FileOutputStream(f)) {
try (OutputStream out = streamFor(f.toPath())) {
for (final String field : values.keySet()) {
final byte[] bytes = (field + ": " + values.get(field) + "\n").getBytes();
out.write(bytes);

if (md5 != null) {
md5.update(bytes);
}
if (sha1 != null) {
sha1.update(bytes);
}
if (sha256 != null) {
sha256.update(bytes);
}
if (sha512 != null) {
sha512.update(bytes);
}
}
}

addTagChecksum(MD5, f, md5);
addTagChecksum(SHA1, f, sha1);
addTagChecksum(SHA256, f, sha256);
addTagChecksum(SHA512, f, sha512);
for (Map.Entry<BagItDigest, DigestOutputStream> entry : activeStreams.entrySet()) {
addTagChecksum(entry.getKey(), f, entry.getValue().getMessageDigest());
}
}

activeStreams.clear();
}

/**
* Create an {@link OutputStream} for a given {@link Path} which can be used to write data to the file.
* This wraps the returned {@link OutputStream} with {@link DigestOutputStream}s in order to create a checksum
* for the file as it is being written. There is one {@link DigestOutputStream} per {@link BagItDigest} in this
* classes registered {@code algorithms}. Each {@link DigestOutputStream} is stored in the {@code activeStreams} so
* that it can be retrieved later on.
*
* @param file the {@link Path} to create an {@link OutputStream} for
* @return the {@link OutputStream}
* @throws IOException if there is an error creating the {@link OutputStream}
*/
private OutputStream streamFor(final Path file) throws IOException {
OutputStream lastStream = Files.newOutputStream(file);
for (BagItDigest algorithm : algorithms) {
final DigestOutputStream dos = new DigestOutputStream(lastStream, algorithm.messageDigest());
activeStreams.put(algorithm, dos);
lastStream = dos;
}

return lastStream;
}

private void addTagChecksum(final BagItDigest algorithm, final File f, final MessageDigest digest) {
Expand Down
28 changes: 28 additions & 0 deletions src/test/java/org/duraspace/bagit/BagWriterTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@
import java.nio.file.Paths;
import java.security.MessageDigest;
import java.time.LocalDate;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Stream;

import gov.loc.repository.bagit.domain.Bag;
import gov.loc.repository.bagit.exceptions.CorruptChecksumException;
Expand Down Expand Up @@ -142,6 +144,17 @@ public void write() throws IOException {
final List<String> extraLines = Files.readAllLines(extra);
assertThat(extraLines).contains("test-key: test-value");

// Assert that tagmanifest-{sha1,sha256,sha512}.txt contain the manifest checksums
final String manifestRegex = sha1.bagitName() + "|" + sha256.bagitName() + "|" + sha512.bagitName();
for (Path tagmanifest : Sets.newHashSet(sha1Tagmanifest, sha256Tagmanifest, sha512Tagmanifest)) {
try (Stream<String> lines = Files.lines(tagmanifest)) {
assertThat(lines)
.filteredOn(line -> line.contains("manifest"))
.hasSize(3)
.allSatisfy(entry -> assertThat(entry).containsPattern(manifestRegex));
}
}

// Finally, pass BagProfile validation and BagIt validation
final BagReader reader = new BagReader();
final BagVerifier verifier = new BagVerifier();
Expand All @@ -158,4 +171,19 @@ public void write() throws IOException {
fail("Unable to verify bag:\n" + e.getMessage());
}
}

@Test(expected = RuntimeException.class)
public void testAddInvalidAlgorithm() throws IOException {
// The message digests to use
final BagItDigest sha1 = BagItDigest.SHA1;
final BagItDigest sha256 = BagItDigest.SHA256;

// Create a writer with 3 manifest algorithms
Files.createDirectories(bag);
final BagWriter writer = new BagWriter(bag.toFile(), Sets.newHashSet(sha1));

// we don't need to pass any files, just the errant BagItDigest
writer.registerChecksums(sha256, Collections.emptyMap());
}

}