Skip to content

Commit

Permalink
Merge branch 'develop' into 7000-mpconfig-infra IQSS#7000
Browse files Browse the repository at this point in the history
  • Loading branch information
pdurbin committed Sep 6, 2022
2 parents 22940ff + a8af4fd commit 233bbda
Show file tree
Hide file tree
Showing 10 changed files with 53 additions and 31 deletions.
7 changes: 7 additions & 0 deletions doc/release-notes/8868-fix-json-import.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Under "bug fixes":

Small bugs have been fixed in the dataset export in the JSON and DDI formats; eliminating the export of "undefined" as a metadata language in the former, and a duplicate keyword tag in the latter.

Run ReExportall to update Exports

Following the directions in the [Admin Guide](http://guides.dataverse.org/en/5.12/admin/metadataexport.html#batch-exports-through-the-api)
3 changes: 3 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ public void setMetadataLanguage(String ml) {
}
}

public static boolean isMetadataLanguageSet(String mdLang) {
return mdLang!=null && !mdLang.equals(UNDEFINED_METADATA_LANGUAGE_CODE);
}


/* Dataverse collections can be configured to allow use of Curation labels and have this inheritable value to decide which set of labels to use.
Expand Down
13 changes: 9 additions & 4 deletions src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
import edu.harvard.iq.dataverse.util.BundleUtil;
import edu.harvard.iq.dataverse.util.EjbUtil;
import edu.harvard.iq.dataverse.util.FileUtil;
import edu.harvard.iq.dataverse.util.MarkupChecker;
import edu.harvard.iq.dataverse.util.SystemConfig;
import edu.harvard.iq.dataverse.util.bagit.OREMap;
import edu.harvard.iq.dataverse.util.json.JSONLDUtil;
Expand Down Expand Up @@ -3323,17 +3324,20 @@ public Response getDatasetVersionArchivalStatus(@PathParam("id") String datasetI
@Consumes(MediaType.APPLICATION_JSON)
@Path("/{id}/{version}/archivalStatus")
public Response setDatasetVersionArchivalStatus(@PathParam("id") String datasetId,
@PathParam("version") String versionNumber, JsonObject update, @Context UriInfo uriInfo,
@PathParam("version") String versionNumber, String newStatus, @Context UriInfo uriInfo,
@Context HttpHeaders headers) {

logger.fine(JsonUtil.prettyPrint(update));
logger.fine(newStatus);
try {
AuthenticatedUser au = findAuthenticatedUserOrDie();

if (!au.isSuperuser()) {
return error(Response.Status.FORBIDDEN, "Superusers only.");
}


//Verify we have valid json after removing any HTML tags (the status gets displayed in the UI, so we want plain text).
JsonObject update= JsonUtil.getJsonObject(MarkupChecker.stripAllTags(newStatus));

if (update.containsKey(DatasetVersion.ARCHIVAL_STATUS) && update.containsKey(DatasetVersion.ARCHIVAL_STATUS_MESSAGE)) {
String status = update.getString(DatasetVersion.ARCHIVAL_STATUS);
if (status.equals(DatasetVersion.ARCHIVAL_STATUS_PENDING) || status.equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)
Expand Down Expand Up @@ -3364,8 +3368,9 @@ public Response setDatasetVersionArchivalStatus(@PathParam("id") String datasetI
}
} catch (WrappedResponse wr) {
return wr.getResponse();
} catch (JsonException| IllegalStateException ex) {
return error(Status.BAD_REQUEST, "Unable to parse provided JSON");
}

return error(Status.BAD_REQUEST, "Unacceptable status format");
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
public class DublinCoreExporter implements Exporter {



@Override
public String getProviderName() {
return "oai_dc";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ private static void dtoddi(DatasetDTO datasetDto, OutputStream outputStream) thr
xmlw.writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
xmlw.writeAttribute("xsi:schemaLocation", DDIExporter.DEFAULT_XML_NAMESPACE + " " + DDIExporter.DEFAULT_XML_SCHEMALOCATION);
writeAttribute(xmlw, "version", DDIExporter.DEFAULT_XML_VERSION);
if(isMetadataLanguageSet(datasetDto.getMetadataLanguage())) {
if(DvObjectContainer.isMetadataLanguageSet(datasetDto.getMetadataLanguage())) {
writeAttribute(xmlw, "xml:lang", datasetDto.getMetadataLanguage());
}
createStdyDscr(xmlw, datasetDto);
Expand All @@ -151,7 +151,7 @@ public static void datasetJson2ddi(JsonObject datasetDtoAsJson, DatasetVersion v
xmlw.writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
xmlw.writeAttribute("xsi:schemaLocation", DDIExporter.DEFAULT_XML_NAMESPACE + " " + DDIExporter.DEFAULT_XML_SCHEMALOCATION);
writeAttribute(xmlw, "version", DDIExporter.DEFAULT_XML_VERSION);
if(isMetadataLanguageSet(datasetDto.getMetadataLanguage())) {
if(DvObjectContainer.isMetadataLanguageSet(datasetDto.getMetadataLanguage())) {
writeAttribute(xmlw, "xml:lang", datasetDto.getMetadataLanguage());
}
createStdyDscr(xmlw, datasetDto);
Expand All @@ -161,14 +161,6 @@ public static void datasetJson2ddi(JsonObject datasetDtoAsJson, DatasetVersion v
xmlw.writeEndElement(); // codeBook
xmlw.flush();
}


private static boolean isMetadataLanguageSet(String mdLang) {
if(mdLang!=null && !mdLang.equals(DvObjectContainer.UNDEFINED_METADATA_LANGUAGE_CODE)) {
return true;
}
return false;
}

/**
* @todo This is just a stub, copied from DDIExportServiceBean. It should
Expand Down Expand Up @@ -944,7 +936,7 @@ private static void writeDistributorsElement(XMLStreamWriter xmlw, DatasetVersio
}
if (!distributorName.isEmpty()) {
xmlw.writeStartElement("distrbtr");
if(isMetadataLanguageSet(lang)) {
if(DvObjectContainer.isMetadataLanguageSet(lang)) {
writeAttribute(xmlw, "xml:lang", lang);
}
if (!distributorAffiliation.isEmpty()) {
Expand Down Expand Up @@ -1064,7 +1056,7 @@ private static void writeAbstractElement(XMLStreamWriter xmlw, DatasetVersionDTO
if(!descriptionDate.isEmpty()){
writeAttribute(xmlw,"date",descriptionDate);
}
if(isMetadataLanguageSet(lang)) {
if(DvObjectContainer.isMetadataLanguageSet(lang)) {
writeAttribute(xmlw, "xml:lang", lang);
}
xmlw.writeCharacters(descriptionText);
Expand Down Expand Up @@ -1538,7 +1530,7 @@ private static void writeFullElement (XMLStreamWriter xmlw, String name, String
//For the simplest Elements we can
if (!StringUtilisEmpty(value)) {
xmlw.writeStartElement(name);
if(isMetadataLanguageSet(lang)) {
if(DvObjectContainer.isMetadataLanguageSet(lang)) {
writeAttribute(xmlw, "xml:lang", lang);
}
xmlw.writeCharacters(value);
Expand Down
14 changes: 7 additions & 7 deletions src/main/java/edu/harvard/iq/dataverse/util/MarkupChecker.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import org.apache.commons.text.StringEscapeUtils;
import org.jsoup.Jsoup;
import org.jsoup.safety.Whitelist;
import org.jsoup.safety.Safelist;
import org.jsoup.parser.Parser;

/**
Expand All @@ -20,8 +20,8 @@ public class MarkupChecker {


/**
* Wrapper around Jsoup clean method with the basic White list
* http://jsoup.org/cookbook/cleaning-html/whitelist-sanitizer
* Wrapper around Jsoup clean method with the basic Safe list
* http://jsoup.org/cookbook/cleaning-html/safelist-sanitizer
* @param unsafe
* @return
*/
Expand All @@ -33,18 +33,18 @@ public static String sanitizeBasicHTML(String unsafe) {
// basic includes: a, b, blockquote, br, cite, code, dd, dl, dt, em, i, li, ol, p, pre, q, small, span, strike, strong, sub, sup, u, ul
//Whitelist wl = Whitelist.basic().addTags("img", "h1", "h2", "h3", "kbd", "hr", "s", "del");

Whitelist wl = Whitelist.basicWithImages().addTags("h1", "h2", "h3", "kbd", "hr", "s", "del", "map", "area").addAttributes("img", "usemap")
Safelist sl = Safelist.basicWithImages().addTags("h1", "h2", "h3", "kbd", "hr", "s", "del", "map", "area").addAttributes("img", "usemap")
.addAttributes("map", "name").addAttributes("area", "shape", "coords", "href", "title", "alt")
.addEnforcedAttribute("a", "target", "_blank");

return Jsoup.clean(unsafe, wl);
return Jsoup.clean(unsafe, sl);

}

/**
* Strip all HTMl tags
*
* http://jsoup.org/apidocs/org/jsoup/safety/Whitelist.html#none%28%29
* http://jsoup.org/apidocs/org/jsoup/safety/Safelist.html#none
*
* @param unsafe
* @return
Expand All @@ -55,7 +55,7 @@ public static String stripAllTags(String unsafe) {
return null;
}

return Parser.unescapeEntities(Jsoup.clean(unsafe, Whitelist.none()), true);
return Parser.unescapeEntities(Jsoup.clean(unsafe, Safelist.none()), true);

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) throws Except

aggBuilder.add(JsonLDTerm.schemaOrg("isPartOf").getLabel(), getDataverseDescription(dataset.getOwner()));
String mdl = dataset.getMetadataLanguage();
if(!mdl.equals(DvObjectContainer.UNDEFINED_METADATA_LANGUAGE_CODE)) {
if (DvObjectContainer.isMetadataLanguageSet(mdl)) {
aggBuilder.add(JsonLDTerm.schemaOrg("inLanguage").getLabel(), mdl);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -314,16 +314,19 @@ public static JsonObjectBuilder json(BuiltinUser user) {
}

public static JsonObjectBuilder json(Dataset ds) {
return jsonObjectBuilder()
JsonObjectBuilder bld = jsonObjectBuilder()
.add("id", ds.getId())
.add("identifier", ds.getIdentifier())
.add("persistentUrl", ds.getPersistentURL())
.add("protocol", ds.getProtocol())
.add("authority", ds.getAuthority())
.add("publisher", BrandingUtil.getInstallationBrandName())
.add("publicationDate", ds.getPublicationDateFormattedYYYYMMDD())
.add("storageIdentifier", ds.getStorageIdentifier())
.add("metadataLanguage", ds.getMetadataLanguage());
.add("storageIdentifier", ds.getStorageIdentifier());
if (DvObjectContainer.isMetadataLanguageSet(ds.getMetadataLanguage())) {
bld.add("metadataLanguage", ds.getMetadataLanguage());
}
return bld;
}

public static JsonObjectBuilder json(DatasetVersion dsv) {
Expand Down
8 changes: 7 additions & 1 deletion src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -486,7 +486,7 @@ public void testDataFileAPIPermissions() {
}

@Test
public void testImportDDI() throws IOException {
public void testImportDDI() throws IOException, InterruptedException {

Response createUser = UtilIT.createRandomUser();
String username = UtilIT.getUsernameFromResponse(createUser);
Expand Down Expand Up @@ -534,6 +534,12 @@ public void testImportDDI() throws IOException {
Response destroyDatasetResponsePidRel = UtilIT.destroyDataset(datasetIdIntPidRel, apiToken);
assertEquals(200, destroyDatasetResponsePidRel.getStatusCode());

// This last dataset we have just imported, let's give it a sec. to finish indexing (?)
// or whatever it is that may still be happening. (Have been seeing intermittent 500 from the next
// destroyDataset() line lately)

Thread.sleep(1000L);

Integer datasetIdIntRelease = JsonPath.from(importDDIRelease.body().asString()).getInt("data.id");
Response destroyDatasetResponseRelease = UtilIT.destroyDataset(datasetIdIntRelease, apiToken);
assertEquals(200, destroyDatasetResponseRelease.getStatusCode());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,13 @@ public void testOaiFunctionality() throws InterruptedException {
logger.info("identifier: " + identifier);

// Let's try and create an OAI set with the dataset we have just
// created and published:
// created and published:
// - however, publish command is executed asynchronously, i.e. it may
// still be running after we received the OK from the publish API.
// So let's give it a couple of extra seconds to finish, to make sure
// the dataset is published, exported and indexed - because the OAI
// set create API requires all of the above.
Thread.sleep(3000L);
String setName = identifier;
String setQuery = "dsPersistentId:" + identifier;
String apiPath = String.format("/api/harvest/server/oaisets/%s", setName);
Expand Down

0 comments on commit 233bbda

Please sign in to comment.