diff --git a/src/main/java/io/deephaven/csv/tokenization/Tokenizer.java b/src/main/java/io/deephaven/csv/tokenization/Tokenizer.java index 3acd908..5d4661e 100644 --- a/src/main/java/io/deephaven/csv/tokenization/Tokenizer.java +++ b/src/main/java/io/deephaven/csv/tokenization/Tokenizer.java @@ -83,6 +83,7 @@ public boolean tryParseBMPChar(final ByteSlice bs, final MutableInt result) { // Last code point U+007F value = first & 0x7F; result.setValue(value); + // Succeed only if the string ended here. return o == end; } if ((first & 0xE0) == 0xC0) { @@ -117,7 +118,8 @@ public boolean tryParseBMPChar(final ByteSlice bs, final MutableInt result) { } result.setValue(value); - return true; + // Succeed only if the string ended here. + return o == end; } private static int byteToInt(byte b) { diff --git a/src/test/java/io/deephaven/csv/CsvReaderTest.java b/src/test/java/io/deephaven/csv/CsvReaderTest.java index aae2382..3dd2ffa 100644 --- a/src/test/java/io/deephaven/csv/CsvReaderTest.java +++ b/src/test/java/io/deephaven/csv/CsvReaderTest.java @@ -110,6 +110,27 @@ public void bug52() throws CsvReaderException { CsvReader.read(specs, inputStream, sf); } + /** + * Reported in Deephaven Core Issue #2898. + * Bug filed in Deephaven CSV Issue #70. + */ + @Test + public void bug70() throws CsvReaderException { + final String input = "Coin,Change,Remark\r\n" + + "USDT,-49.00787612,\r\n" + + "USDT,-152.686844,穿仓保证金补偿\r\n" + + "USDT,-59.92650232,\r\n" + + "USDT,-102.3862566,\r\n"; + + final ColumnSet expected = + ColumnSet.of( + Column.ofRefs("Coin", "USDT", "USDT", "USDT", "USDT"), + Column.ofValues("Change", -49.00787612, -152.686844, -59.92650232, -102.3862566), + Column.ofRefs("Remark", null, "穿仓保证金补偿", null, null) + ); + invokeTest(defaultCsvBuilder().parsers(Parsers.DEFAULT).build(), input, expected); + } + @Test public void validates() { final String lengthyMessage = "CsvSpecs failed validation for the following reasons: "