Skip to content

Commit

Permalink
BagProfile Improvements (#26)
Browse files Browse the repository at this point in the history
* Add additional validate method for tag files
* Simplify init of Other-Info values
* Use lower-hypen formatting when storing field names for tag files
* Replace sections set with metadataFields keyset
* Add some generic method for validating from a BagProfile
* checkRequiredTagsExist - checks that all tag files in a profile exist
* validateTag - validates given fields for a tag file for a profile
* Additional coverage for BagProfile#metadataFields
* Create method for validating all BagIt tag files
* Add a non-BagConfig way of validating conformance to a BagProfile. This
requires the user to pass the configuration of the tag files in as a
Map<String, Map<String, String>> which is the same data structure which the
BagConfig uses.

Resolves #18
  • Loading branch information
mikejritter authored Apr 28, 2020
1 parent 68b5158 commit 1cb7e1c
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 38 deletions.
120 changes: 84 additions & 36 deletions src/main/java/org/duraspace/bagit/BagProfile.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import static org.duraspace.bagit.BagProfileConstants.ACCEPT_SERIALIZATION;
import static org.duraspace.bagit.BagProfileConstants.ALLOW_FETCH_TXT;
import static org.duraspace.bagit.BagProfileConstants.BAGIT_PROFILE_INFO;
import static org.duraspace.bagit.BagProfileConstants.BAGIT_TAG_SUFFIX;
import static org.duraspace.bagit.BagProfileConstants.BAG_INFO;
import static org.duraspace.bagit.BagProfileConstants.MANIFESTS_ALLOWED;
import static org.duraspace.bagit.BagProfileConstants.MANIFESTS_REQUIRED;
Expand All @@ -33,14 +34,15 @@
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import org.slf4j.Logger;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import gov.loc.repository.bagit.domain.Bag;
import gov.loc.repository.bagit.domain.Manifest;
import org.slf4j.Logger;

/**
* A BagProfile contains the entire contents of a BagIt profile specified through the profile's json.
Expand Down Expand Up @@ -139,7 +141,6 @@ public String getIdentifier() {
private Set<String> payloadDigestAlgorithms;
private Set<String> tagDigestAlgorithms;

private Set<String> sections = new HashSet<>();
private Map<String, Map<String, ProfileFieldRule>> metadataFields = new HashMap<>();
private Map<String, String> profileMetadata = new HashMap<>();

Expand Down Expand Up @@ -189,8 +190,7 @@ private void load(final InputStream in) throws IOException {
payloadDigestAlgorithms = arrayValues(json, MANIFESTS_REQUIRED);
tagDigestAlgorithms = arrayValues(json, TAG_MANIFESTS_REQUIRED);

metadataFields.put(BAG_INFO, metadataFields(json, BAG_INFO));
sections.add(BAG_INFO);
metadataFields.put(BAG_INFO.toLowerCase(), metadataFields(json.get(BAG_INFO)));

if (json.get(OTHER_INFO) != null) {
loadOtherTags(json);
Expand All @@ -207,18 +207,17 @@ private void loadProfileInfo(final JsonNode json) {
private void loadOtherTags(final JsonNode json) {
final JsonNode arrayTags = json.get(OTHER_INFO);
if (arrayTags != null && arrayTags.isArray()) {
arrayTags.forEach(tag -> tag.fieldNames().forEachRemaining(sections::add));
final Iterator<JsonNode> arrayEntries = arrayTags.elements();
while (arrayEntries.hasNext()) {
final JsonNode entries = arrayEntries.next();
final Iterator<String> tagNames = entries.fieldNames();
while (tagNames.hasNext()) {
final String tagName = tagNames.next();
metadataFields.put(tagName, metadataFields(entries, tagName));
final Iterator<Map.Entry<String, JsonNode>> fields = entries.fields();
while (fields.hasNext()) {
final Map.Entry<String, JsonNode> entry = fields.next();
final String tagName = entry.getKey().toLowerCase();
metadataFields.put(tagName, metadataFields(entry.getValue()));
}
}
}
logger.debug("tagFiles is {}", sections);
logger.debug("metadataFields is {}", metadataFields);
}

Expand All @@ -240,26 +239,24 @@ private static Set<String> arrayValues(final JsonNode json, final String key) {
* Loads required tags and allowed values
*
* @param json json to parse
* @param key key in json to load tags from
* @return map of tags => set of allowed values
*/
private static Map<String, ProfileFieldRule> metadataFields(final JsonNode json, final String key) {
final JsonNode fields = json.get(key);

if (fields == null) {
private static Map<String, ProfileFieldRule> metadataFields(final JsonNode json) {
if (json == null) {
return Collections.emptyMap();
}

final Map<String, ProfileFieldRule> results = new HashMap<>();
for (final Iterator<String> it = fields.fieldNames(); it.hasNext(); ) {
// why not use the entry to iterate?
for (final Iterator<String> it = json.fieldNames(); it.hasNext(); ) {
// fields to pass to the ProfileFieldRule constructor
boolean required = false;
boolean repeatable = true;
boolean recommended = false;
String description = "No description";

final String name = it.next();
final JsonNode field = fields.get(name);
final JsonNode field = json.get(name);

// read each of the fields for the ProfileFieldRule:
// required, repeated, recommended, description, and values
Expand Down Expand Up @@ -419,7 +416,7 @@ public Map<String, ProfileFieldRule> getMetadataFields() {
* @return map of tag = set of acceptable values, or null if tagFile doesn't exist
*/
public Map<String, ProfileFieldRule> getMetadataFields(final String tagFile) {
return metadataFields.get(tagFile);
return metadataFields.get(tagFile.toLowerCase());
}

/**
Expand All @@ -428,7 +425,7 @@ public Map<String, ProfileFieldRule> getMetadataFields(final String tagFile) {
* @return set of section names
*/
public Set<String> getSectionNames() {
return sections;
return metadataFields.keySet();
}

/**
Expand All @@ -446,23 +443,74 @@ public Map<String, String> getProfileMetadata() {
* @param config the BagConfig
*/
public void validateConfig(final BagConfig config) {
for (final String section : sections) {
final String tagFile = section.toLowerCase() + ".txt";
if (config.hasTagFile(tagFile)) {
try {
ProfileValidationUtil.validate(section, getMetadataFields(section),
config.getFieldsForTagFile(tagFile));
checkRequiredTagsExist(config.getTagFiles());
for (final String section : config.getTagFiles()) {
validateTag(section, config.getFieldsForTagFile(section));
}
}

ProfileValidationUtil.validateTagIsAllowed(Paths.get(tagFile), tagFilesAllowed);
} catch (ProfileValidationException e) {
throw new RuntimeException(e.getMessage(), e);
}
} else {
throw new RuntimeException(String.format("Error missing section %s from bag config", section));
/**
* Validate a configuration for tag files based on a mapping of BagIt tag filenames to key-value pairs.
*
* e.g. the filename "bag-info.txt" could contain the pairs "Source-Organization: DuraSpace" and
* "Organization-Address: The Cloud"
*
* @param config the Map containing the configuration of BagIt tag files
*/
public void validateTagFiles(final Map<String, Map<String, String>> config) {
checkRequiredTagsExist(config.keySet());
config.forEach(this::validateTag);
}

/**
* Test that all required tag files exist
*
* @param tags the name of each tag file to check
*/
private void checkRequiredTagsExist(final Set<String> tags) {
for (String section : metadataFields.keySet()) {
final String expected = section + BAGIT_TAG_SUFFIX;
if (!tags.contains(expected)) {
throw new RuntimeException("Missing configuration for required tag file " + expected);
}
}
}

/**
* Validate a Mapping of key value pairs for a tag file
*
* @param filename the name of the tag file to validate
* @param fields A mapping of tag file names and their fields to validate
*/
private void validateTag(final String filename, final Map<String, String> fields) {
// strip the trailing file extension
final String section = getSection(filename);
logger.debug("Checking validation for {}", section);
if (metadataFields.containsKey(section)) {
try {
ProfileValidationUtil.validate(section, getMetadataFields(section), fields);
ProfileValidationUtil.validateTagIsAllowed(Paths.get(filename), tagFilesAllowed);
} catch (ProfileValidationException e) {
throw new RuntimeException(e.getMessage(), e);
}
}
}

/**
* Normalize a filename to be what we expect is held in the MetadataFields key set
*
* @param filename the filename to normalize
* @return the filename without a tag extension, so that it can be used with the metadataFields
*/
private String getSection(final String filename) {
// use two regexps
// the main pattern: two groups - a wildcard matcher for the filename and the tag suffix
// the replacement: just the first capture group
final String replacement = "$1";
final Pattern tagEnding = Pattern.compile("(.*)(\\" + BAGIT_TAG_SUFFIX + ")");
final Matcher matcher = tagEnding.matcher(filename.toLowerCase());
return matcher.replaceAll(replacement);
}

/**
* Validate a given {@link Bag} against the current profile
Expand All @@ -488,7 +536,7 @@ public void validateBag(final Bag bag) {

// check payload manifest algorithms
errors.append(ProfileValidationUtil.validateManifest(foundPayloadManifests, payloadDigestAlgorithms,
allowedPayloadAlgorithms, payloadIdentifier));
allowedPayloadAlgorithms, payloadIdentifier));

// check tag manifest rules files allowed
// the reporting can be redundant if no tag manifests are found, so only check the allowed algorithms and
Expand All @@ -497,7 +545,7 @@ public void validateBag(final Bag bag) {
errors.append("No tag manifest found!\n");
} else {
errors.append(ProfileValidationUtil.validateManifest(foundTagManifests, tagDigestAlgorithms,
allowedTagAlgorithms, tagIdentifier));
allowedTagAlgorithms, tagIdentifier));

// grab the first tag manifest and use that to check all registered tag files
final Manifest manifest = foundTagManifests.iterator().next();
Expand All @@ -522,8 +570,8 @@ public void validateBag(final Bag bag) {
}

// check *-info required fields
for (String section : sections) {
final String tagFile = section.toLowerCase() + ".txt";
for (String section : metadataFields.keySet()) {
final String tagFile = section.toLowerCase() + BAGIT_TAG_SUFFIX;
final Path resolved = root.resolve(tagFile);
try {
ProfileValidationUtil.validate(section, metadataFields.get(section), resolved);
Expand Down
2 changes: 2 additions & 0 deletions src/main/java/org/duraspace/bagit/BagProfileConstants.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ public abstract class BagProfileConstants {
public static final String BAGIT_PROFILE_VERSION = "BagIt-Profile-Version";
public static final String BAGIT_PROFILE_IDENTIFIER = "BagIt-Profile-Identifier";

// misc
public static final String BAGIT_TAG_SUFFIX = ".txt";
public static final String BAGIT_MD5 = "md5";
public static final String BAGIT_SHA1 = "sha1";
public static final String BAGIT_SHA_256 = "sha256";
Expand Down
25 changes: 23 additions & 2 deletions src/test/java/org/duraspace/bagit/BagProfileTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
*/
package org.duraspace.bagit;

import static java.util.function.Function.identity;
import static org.assertj.core.api.Assertions.assertThat;
import static org.duraspace.bagit.BagConfig.ACCESS_KEY;
import static org.duraspace.bagit.BagConfig.BAGGING_DATE_KEY;
Expand All @@ -28,6 +29,7 @@
import static org.duraspace.bagit.BagProfileConstants.TAG_MANIFESTS_REQUIRED;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;

import java.io.IOException;
Expand All @@ -43,6 +45,7 @@
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;

import gov.loc.repository.bagit.domain.Bag;
import gov.loc.repository.bagit.domain.FetchItem;
Expand Down Expand Up @@ -131,6 +134,11 @@ public void testBasicProfileFromFile() throws Exception {
assertTrue(profile.getMetadataFields().get(PAYLOAD_OXUM_KEY).isRequired());
assertFalse(profile.getMetadataFields().get(CONTACT_EMAIL_KEY).isRequired());

assertTrue(profile.getMetadataFields().get(BAGIT_PROFILE_IDENTIFIER).isRepeatable());
assertFalse(profile.getMetadataFields().get(BAGIT_PROFILE_IDENTIFIER).isRequired());
assertFalse(profile.getMetadataFields().get(BAGIT_PROFILE_IDENTIFIER).isRecommended());
assertEquals(profile.getMetadataFields().get(BAGIT_PROFILE_IDENTIFIER).getDescription(), "No description");

assertTrue(profile.getSectionNames().stream().allMatch(t -> t.equalsIgnoreCase(BAG_INFO)));

assertFalse(profile.isAllowFetch());
Expand All @@ -144,14 +152,23 @@ public void testBasicProfileFromFile() throws Exception {
assertTrue(profile.getAllowedPayloadAlgorithms().isEmpty());
}

@Test
public void testLoadsEmptyMap() throws URISyntaxException, IOException {
final String profilePath = "profiles/profileNoBagInfo.json";
final BagProfile profile = new BagProfile(Files.newInputStream(resolveResourcePath(profilePath)));
final Map<String, ProfileFieldRule> bagInfoFields = profile.getMetadataFields(BAG_INFO);
assertNotNull(bagInfoFields);
assertTrue(bagInfoFields.isEmpty());
}

@Test
public void testExtendedProfile() throws Exception {
final String aptrustInfo = "APTrust-Info";
final BagProfile profile = new BagProfile(Files.newInputStream(resolveResourcePath(extraTagsPath)));

assertTrue(profile.getSectionNames().stream().anyMatch(t -> t.equalsIgnoreCase(BAG_INFO)));
assertTrue(profile.getSectionNames().stream().anyMatch(t -> t.equals(aptrustInfo)));
assertTrue(profile.getSectionNames().stream().noneMatch(t -> t.equals("Wrong-Tags")));
assertTrue(profile.getSectionNames().stream().anyMatch(t -> t.equalsIgnoreCase(aptrustInfo)));
assertTrue(profile.getSectionNames().stream().noneMatch(t -> t.equalsIgnoreCase("Wrong-Tags")));
assertTrue(profile.getMetadataFields(aptrustInfo).containsKey(TITLE_KEY));
assertTrue(profile.getMetadataFields(aptrustInfo).containsKey(ACCESS_KEY));
assertTrue(profile.getMetadataFields(aptrustInfo).get(ACCESS_KEY).getValues().contains("Consortia"));
Expand All @@ -163,8 +180,12 @@ public void testExtendedProfile() throws Exception {
@Test
public void testGoodConfig() throws Exception {
final BagConfig config = new BagConfig(resolveResourcePath(bagitConfig).toFile());
final Map<String, Map<String, String>> configAsMap =
config.getTagFiles().stream()
.collect(Collectors.toMap(identity(), config::getFieldsForTagFile));
final BagProfile profile = new BagProfile(Files.newInputStream(resolveResourcePath(extraTagsPath)));
profile.validateConfig(config);
profile.validateTagFiles(configAsMap);
}

@Test(expected = RuntimeException.class)
Expand Down
4 changes: 4 additions & 0 deletions src/test/resources/profiles/profile.json
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@
},
"Payload-Oxum":{
"required":true
},
"BagIt-Profile-Identifier": {
"recommended": false,
"description": ""
}
},
"Manifests-Required":[
Expand Down
30 changes: 30 additions & 0 deletions src/test/resources/profiles/profileNoBagInfo.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"BagIt-Profile-Info":{
"BagIt-Profile-Identifier":"http://fedora.info/profile.json",
"BagIt-Profile-Version":"1.3.0",
"Source-Organization":"Duraspace",
"Contact-Name":"Fedo Radmin",
"Contact-Email":"fedo@example.org",
"External-Description":"Test Bag Profile for Fedora.",
"Version":"0.2"
},
"Manifests-Required":[
"md5", "sha1", "sha256", "sha512"
],
"Manifests-Allowed": [],
"Allow-Fetch.txt":false,
"Serialization":"optional",
"Accept-Serialization": [
"application/tar"
],
"Tag-Manifests-Required":[
"sha1", "sha256", "sha512"
],
"Tag-Manifests-Allowed": [],
"Tag-Files-Required": [],
"Tag-Files-Allowed": ["*"],
"Accept-BagIt-Version":[
"0.97",
"1.0"
]
}

0 comments on commit 1cb7e1c

Please sign in to comment.