diff --git a/preston-taxodros/src/main/java/bio/guoda/preston/cmd/ZenodoMetaUtil.java b/preston-taxodros/src/main/java/bio/guoda/preston/cmd/ZenodoMetaUtil.java index 790f016f..7750ce42 100644 --- a/preston-taxodros/src/main/java/bio/guoda/preston/cmd/ZenodoMetaUtil.java +++ b/preston-taxodros/src/main/java/bio/guoda/preston/cmd/ZenodoMetaUtil.java @@ -8,6 +8,8 @@ import org.apache.commons.lang3.StringUtils; import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.stream.Stream; public class ZenodoMetaUtil { @@ -116,10 +118,19 @@ public static void setCreators(ObjectNode objectNode, List creatorList) } public static void setPublicationDate(ObjectNode objectNode, String publicationYear) { - if (publicationYear.startsWith("2")) { + Pattern compile = Pattern.compile(".*(?[12][0-9]{3}).*"); + Matcher matcher = compile.matcher(publicationYear); + + if (!matcher.matches()) { + throw new IllegalArgumentException("cannot parse publication year [" + publicationYear + "]"); + } + + String year = matcher.group("year"); + if (StringUtils.startsWith(year, "2")) { setRestricted(objectNode); } - setValue(objectNode, PUBLICATION_DATE, publicationYear); + + setValue(objectNode, PUBLICATION_DATE, year); } private static void setRestricted(ObjectNode objectNode) { diff --git a/preston-taxodros/src/test/java/bio/guoda/preston/cmd/TaxoDrosFileExtractorTest.java b/preston-taxodros/src/test/java/bio/guoda/preston/cmd/TaxoDrosFileExtractorTest.java index c982745e..c3db9cb8 100644 --- a/preston-taxodros/src/test/java/bio/guoda/preston/cmd/TaxoDrosFileExtractorTest.java +++ b/preston-taxodros/src/test/java/bio/guoda/preston/cmd/TaxoDrosFileExtractorTest.java @@ -59,6 +59,18 @@ public void streamTaxoDrosToLineJsonWithDOI() throws IOException { assertThat(taxonNode.get("doi").textValue(), is("10.7868/S0016675814060150")); } + @Test + public void streamTaxoDrosToLineJsonYear() throws IOException { + String[] jsonObjects = getResource("DROS5.TEXT.year.txt"); + assertThat(jsonObjects.length, is(1)); + + JsonNode taxonNode = unwrapMetadata(jsonObjects[0]); + + assertThat(taxonNode.has("doi"), is(true)); + assertThat(taxonNode.get("doi").textValue(), is("10.11646/zootaxa.4161.2.4")); + assertThat(taxonNode.get("publication_date").textValue(), is("2016")); + } + @Test public void streamTaxoDrosToLineJsonAuthorsWithAmpersand() throws IOException { String[] jsonObjects = getResource("DROS5.TEXT.authors.ampersand.txt"); @@ -66,7 +78,6 @@ public void streamTaxoDrosToLineJsonAuthorsWithAmpersand() throws IOException { JsonNode taxonNode = unwrapMetadata(jsonObjects[0]); - assertThat(taxonNode.has("doi"), is(true)); assertThat(taxonNode.get("doi").textValue(), is("10.1016/j.tpb.2006.05.001")); diff --git a/preston-taxodros/src/test/resources/bio/guoda/preston/cmd/DROS5.TEXT.year.txt b/preston-taxodros/src/test/resources/bio/guoda/preston/cmd/DROS5.TEXT.year.txt new file mode 100644 index 00000000..2132c063 --- /dev/null +++ b/preston-taxodros/src/test/resources/bio/guoda/preston/cmd/DROS5.TEXT.year.txt @@ -0,0 +1 @@ +.TEXT; huang & chen, 2016 .A Huang, J. & Chen, H., .J 2016, .S The genus Leucophenga (Diptera, Drosophilidae), part VI: the argentata species group from the East Asia, with morphological and molecular evidence. .Z Zootaxa, 4161(2):207-227. .K ocr++ / 5.ix.2016 / DOI:10.11646/zootaxa.4161.2.4 .P Huang & Chen, 2016.pdf \ No newline at end of file