Skip to content

Commit

Permalink
detect year pattern in the publication year field to avoid extra char…
Browse files Browse the repository at this point in the history
…acters like commas. TaxoDros/TaxoDros.github.io#41
  • Loading branch information
Jorrit Poelen committed Jul 12, 2024
1 parent 00da2b9 commit 6969e74
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import org.apache.commons.lang3.StringUtils;

import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;

public class ZenodoMetaUtil {
Expand Down Expand Up @@ -116,10 +118,19 @@ public static void setCreators(ObjectNode objectNode, List<String> creatorList)
}

public static void setPublicationDate(ObjectNode objectNode, String publicationYear) {
if (publicationYear.startsWith("2")) {
Pattern compile = Pattern.compile(".*(?<year>[12][0-9]{3}).*");
Matcher matcher = compile.matcher(publicationYear);

if (!matcher.matches()) {
throw new IllegalArgumentException("cannot parse publication year [" + publicationYear + "]");
}

String year = matcher.group("year");
if (StringUtils.startsWith(year, "2")) {
setRestricted(objectNode);
}
setValue(objectNode, PUBLICATION_DATE, publicationYear);

setValue(objectNode, PUBLICATION_DATE, year);
}

private static void setRestricted(ObjectNode objectNode) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,25 @@ public void streamTaxoDrosToLineJsonWithDOI() throws IOException {
assertThat(taxonNode.get("doi").textValue(), is("10.7868/S0016675814060150"));
}

@Test
public void streamTaxoDrosToLineJsonYear() throws IOException {
String[] jsonObjects = getResource("DROS5.TEXT.year.txt");
assertThat(jsonObjects.length, is(1));

JsonNode taxonNode = unwrapMetadata(jsonObjects[0]);

assertThat(taxonNode.has("doi"), is(true));
assertThat(taxonNode.get("doi").textValue(), is("10.11646/zootaxa.4161.2.4"));
assertThat(taxonNode.get("publication_date").textValue(), is("2016"));
}

@Test
public void streamTaxoDrosToLineJsonAuthorsWithAmpersand() throws IOException {
String[] jsonObjects = getResource("DROS5.TEXT.authors.ampersand.txt");
assertThat(jsonObjects.length, is(1));

JsonNode taxonNode = unwrapMetadata(jsonObjects[0]);


assertThat(taxonNode.has("doi"), is(true));
assertThat(taxonNode.get("doi").textValue(), is("10.1016/j.tpb.2006.05.001"));

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.TEXT;huang & chen, 2016.A Huang, J. & Chen, H.,.J 2016,.S The genus Leucophenga (Diptera, Drosophilidae), part VI:the argentata species group from the East Asia, withmorphological and molecular evidence..Z Zootaxa, 4161(2):207-227..K ocr++ / 5.ix.2016 / DOI:10.11646/zootaxa.4161.2.4.P Huang & Chen, 2016.pdf
Expand Down

0 comments on commit 6969e74

Please sign in to comment.