Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added whitespace trimming to uploaded custom metadata TSV files #10696

Merged
merged 3 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions doc/release-notes/10688_whitespace_trimming.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
### Added whitespace trimming to uploaded custom metadata TSV files

When loading custom metadata blocks using the `api/admin/datasetfield/load` API, whitespace can be introduced into field names.
This change trims whitespace at the beginning and end of all values read into the API before persisting them.

For more information, see #10688.
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ public Response getByName(@PathParam("name") String name) {
String solrFieldSearchable = dsf.getSolrField().getNameSearchable();
String solrFieldFacetable = dsf.getSolrField().getNameFacetable();
String metadataBlock = dsf.getMetadataBlock().getName();
String uri=dsf.getUri();
String uri = dsf.getUri();
boolean hasParent = dsf.isHasParent();
boolean allowsMultiples = dsf.isAllowMultiples();
boolean isRequired = dsf.isRequired();
Expand Down Expand Up @@ -243,7 +243,9 @@ public Response loadDatasetFields(File file) {
br = new BufferedReader(new FileReader("/" + file));
while ((line = br.readLine()) != null) {
lineNumber++;
values = line.split(splitBy);
values = Arrays.stream(line.split(splitBy))
.map(String::trim)
.toArray(String[]::new);
if (values[0].startsWith("#")) { // Header row
switch (values[0]) {
case "#metadataBlock":
Expand Down Expand Up @@ -326,17 +328,17 @@ public Response loadDatasetFields(File file) {
*/
public String getGeneralErrorMessage(HeaderType header, int lineNumber, String message) {
List<String> arguments = new ArrayList<>();
arguments.add(header.name());
arguments.add(header != null ? header.name() : "unknown");
arguments.add(String.valueOf(lineNumber));
arguments.add(message);
return BundleUtil.getStringFromBundle("api.admin.datasetfield.load.GeneralErrorMessage", arguments);
}

/**
* Turn ArrayIndexOutOfBoundsException into an informative error message
* @param lineNumber
* @param header
* @param e
* @param lineNumber
* @param wrongIndex
* @return
*/
public String getArrayIndexOutOfBoundMessage(HeaderType header,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,61 @@
package edu.harvard.iq.dataverse.api;

import edu.harvard.iq.dataverse.ControlledVocabularyValueServiceBean;
import edu.harvard.iq.dataverse.DatasetFieldServiceBean;
import edu.harvard.iq.dataverse.DataverseServiceBean;
import edu.harvard.iq.dataverse.MetadataBlockServiceBean;
import edu.harvard.iq.dataverse.actionlogging.ActionLogServiceBean;
import edu.harvard.iq.dataverse.util.BundleUtil;
import jakarta.json.Json;
import jakarta.json.JsonObject;
import jakarta.json.JsonReader;
import jakarta.ws.rs.core.Response;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;

import java.io.File;
import java.io.StringReader;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;

import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;

@ExtendWith(MockitoExtension.class)
public class DatasetFieldServiceApiTest {

@Mock
private ActionLogServiceBean actionLogSvc;

@Mock
private MetadataBlockServiceBean metadataBlockService;

@Mock
private DataverseServiceBean dataverseService;

@Mock
private DatasetFieldServiceBean datasetFieldService;

@Mock
private ControlledVocabularyValueServiceBean controlledVocabularyValueService;

private DatasetFieldServiceApi api;

@BeforeEach
public void setup(){
api = new DatasetFieldServiceApi();
api.actionLogSvc = actionLogSvc;
api.metadataBlockService = metadataBlockService;
api.dataverseService = dataverseService;
api.datasetFieldService = datasetFieldService;
api.controlledVocabularyValueService = controlledVocabularyValueService;
}

@Test
public void testArrayIndexOutOfBoundMessageBundle() {
List<String> arguments = new ArrayList<>();
Expand Down Expand Up @@ -59,4 +105,41 @@ public void testGetGeneralErrorMessage() {
message
);
}

@Test
public void testGetGeneralErrorMessageEmptyHeader() {
DatasetFieldServiceApi api = new DatasetFieldServiceApi();
String message = api.getGeneralErrorMessage(null, 5, "some error");
assertEquals(
"Error parsing metadata block in unknown part, line #5: some error",
message
);
}

@Test
public void testLoadDatasetFieldsWhitespaceTrimming() {

Path resourceDirectory = Paths.get("src/test/resources/tsv/whitespace-test.tsv");
File testfile = new File(resourceDirectory.toFile().getAbsolutePath());
JsonReader jsonReader;
try (Response response = api.loadDatasetFields(testfile)) {
assertEquals(200, response.getStatus());
jsonReader = Json.createReader(new StringReader(response.getEntity().toString()));
}
JsonObject jsonObject = jsonReader.readObject();

final List<String> metadataNames = jsonObject.getJsonObject("data").getJsonArray("added")
.getValuesAs(e -> e.asJsonObject().getString("name"));
assertThat(metadataNames).contains("whitespaceDemo")
.contains("whitespaceDemoOne")
.contains("whitespaceDemoTwo")
.contains("whitespaceDemoThree")
.contains("CV1")
.contains("CV2")
.contains("CV3");
assertThat(metadataNames).doesNotContain(" whitespaceDemo")
.doesNotContain("whitespaceDemoOne ")
.doesNotContain("CV1 ")
.doesNotContain(" CV2");
}
}
10 changes: 10 additions & 0 deletions src/test/resources/tsv/whitespace-test.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#metadataBlock name dataverseAlias displayName
whitespaceDemo Whitespace Demo
#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id
whitespaceDemoOne One Trailing Space text 0 TRUE TRUE TRUE FALSE TRUE FALSE whitespaceDemo
whitespaceDemoTwo Two Leading Space text 1 TRUE TRUE TRUE FALSE TRUE FALSE whitespaceDemo
whitespaceDemoThree Three CV with errors text 2 TRUE TRUE TRUE FALSE TRUE FALSE whitespaceDemo
#controlledVocabulary DatasetField Value identifier displayOrder
whitespaceDemoThree CV1 0
whitespaceDemoThree CV2 1
whitespaceDemoThree CV3 2
Loading