From e8697118d40240b2e4e0eb2f5e54e47ac5899e44 Mon Sep 17 00:00:00 2001 From: Corey Kosak Date: Wed, 1 Dec 2021 18:10:49 -0500 Subject: [PATCH] New approach for CSV reading --- Integrations/python/deephaven/csv.py | 4 +- .../engine/table/impl/InMemoryTable.java | 6 + .../main/java/io/deephaven/csv/CsvSpecs.java | 361 ++++-- .../main/java/io/deephaven/csv/CsvTools.java | 8 +- .../java/io/deephaven/csv/InferenceSpecs.java | 349 +----- .../main/java/io/deephaven/csv/Parser.java | 376 ------ .../deephaven/csv/containers/ByteSlice.java | 61 + .../deephaven/csv/containers/CharSlice.java | 56 + .../csv/containers/GrowableCharBuffer.java | 43 + .../densestorage/DenseStorageConstants.java | 7 + .../csv/densestorage/DenseStorageReader.java | 79 ++ .../csv/densestorage/DenseStorageWriter.java | 122 ++ .../deephaven/csv/densestorage/QueueNode.java | 17 + .../csv/densestorage/QueueReader.java | 211 ++++ .../csv/densestorage/QueueWriter.java | 203 +++ .../deephaven/csv/parsers/BooleanParser.java | 51 + .../io/deephaven/csv/parsers/ByteParser.java | 61 + .../io/deephaven/csv/parsers/CharParser.java | 54 + .../csv/parsers/DateTimeAsLongParser.java | 56 + .../deephaven/csv/parsers/DoubleParser.java | 58 + .../io/deephaven/csv/parsers/FloatParser.java | 70 ++ .../io/deephaven/csv/parsers/IntParser.java | 67 + .../deephaven/csv/parsers/IteratorHolder.java | 88 ++ .../io/deephaven/csv/parsers/LongParser.java | 62 + .../io/deephaven/csv/parsers/ParserBase.java | 75 ++ .../io/deephaven/csv/parsers/Parsers.java | 47 + .../io/deephaven/csv/parsers/ShortParser.java | 62 + .../deephaven/csv/parsers/StringParser.java | 45 + .../csv/parsers/TimestampMicrosParser.java | 9 + .../csv/parsers/TimestampMillisParser.java | 9 + .../csv/parsers/TimestampNanosParser.java | 9 + .../csv/parsers/TimestampParser.java | 12 + .../csv/parsers/TimestampParserBase.java | 82 ++ .../csv/parsers/context/ParseContext.java | 46 + .../context/SentinelConfiguration.java | 67 + .../io/deephaven/csv/reading/CellGrabber.java | 230 ++++ .../io/deephaven/csv/reading/CsvReader.java | 302 +++++ .../reading/ParseDenseStorageToColumn.java | 119 ++ .../reading/ParseInputFileToDenseStorage.java | 81 ++ .../java/io/deephaven/csv/sinks/Sink.java | 5 + .../io/deephaven/csv/sinks/SinkFactory.java | 14 + .../csv/tokenization/RangeTests.java | 48 + .../deephaven/csv/tokenization/Tokenizer.java | 522 ++++++++ .../tokenization/external/DoubleParser.java | 612 ++++++++++ .../tokenization/external/FastDoubleMath.java | 1083 +++++++++++++++++ .../java/io/deephaven/csv/util/Renderer.java | 24 + .../test/java/io/deephaven/csv/CsvTest.java | 1029 +++++++++------- .../java/io/deephaven/csv/InferenceTest.java | 163 --- .../test/resources/io/deephaven/csv/bools.csv | 8 - .../test/resources/io/deephaven/csv/byte.csv | 4 - .../test/resources/io/deephaven/csv/chars.csv | 8 - .../resources/io/deephaven/csv/doubles.csv | 8 - .../resources/io/deephaven/csv/floats.csv | 8 - .../test/resources/io/deephaven/csv/int.csv | 4 - .../csv/language-example-headerless.csv | 8 - .../io/deephaven/csv/language-example.csv | 9 - .../io/deephaven/csv/language-example.tsv | 9 - .../test/resources/io/deephaven/csv/long.csv | 4 - .../test/resources/io/deephaven/csv/short.csv | 4 - .../io/deephaven/csv/strings-pound.csv | 4 - .../resources/io/deephaven/csv/strings.csv | 4 - .../io/deephaven/csv/timestamp-legacy.csv | 4 - .../io/deephaven/csv/timestamp-micros.csv | 4 - .../io/deephaven/csv/timestamp-millis.csv | 4 - .../io/deephaven/csv/timestamp-mixed.csv | 4 - .../io/deephaven/csv/timestamp-nanos.csv | 4 - .../io/deephaven/csv/timestamp-seconds.csv | 4 - .../resources/io/deephaven/csv/timestamp.csv | 4 - .../csv/whitespace-inside-and-outside.csv | 4 - .../io/deephaven/csv/whitespace-inside.csv | 4 - .../io/deephaven/csv/whitespace-no-quotes.csv | 4 - .../io/deephaven/csv/whitespace-outside.csv | 4 - 72 files changed, 5757 insertions(+), 1534 deletions(-) delete mode 100644 extensions/csv/src/main/java/io/deephaven/csv/Parser.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/containers/ByteSlice.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/containers/CharSlice.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/containers/GrowableCharBuffer.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/densestorage/DenseStorageConstants.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/densestorage/DenseStorageReader.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/densestorage/DenseStorageWriter.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/densestorage/QueueNode.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/densestorage/QueueReader.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/densestorage/QueueWriter.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/parsers/BooleanParser.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/parsers/ByteParser.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/parsers/CharParser.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/parsers/DateTimeAsLongParser.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/parsers/DoubleParser.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/parsers/FloatParser.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/parsers/IntParser.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/parsers/IteratorHolder.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/parsers/LongParser.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/parsers/ParserBase.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/parsers/Parsers.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/parsers/ShortParser.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/parsers/StringParser.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/parsers/TimestampMicrosParser.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/parsers/TimestampMillisParser.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/parsers/TimestampNanosParser.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/parsers/TimestampParser.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/parsers/TimestampParserBase.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/parsers/context/ParseContext.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/parsers/context/SentinelConfiguration.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/reading/CellGrabber.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/reading/CsvReader.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/reading/ParseDenseStorageToColumn.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/reading/ParseInputFileToDenseStorage.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/sinks/Sink.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/sinks/SinkFactory.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/tokenization/RangeTests.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/tokenization/Tokenizer.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/tokenization/external/DoubleParser.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/tokenization/external/FastDoubleMath.java create mode 100644 extensions/csv/src/main/java/io/deephaven/csv/util/Renderer.java delete mode 100644 extensions/csv/src/test/java/io/deephaven/csv/InferenceTest.java delete mode 100644 extensions/csv/src/test/resources/io/deephaven/csv/bools.csv delete mode 100644 extensions/csv/src/test/resources/io/deephaven/csv/byte.csv delete mode 100644 extensions/csv/src/test/resources/io/deephaven/csv/chars.csv delete mode 100644 extensions/csv/src/test/resources/io/deephaven/csv/doubles.csv delete mode 100644 extensions/csv/src/test/resources/io/deephaven/csv/floats.csv delete mode 100644 extensions/csv/src/test/resources/io/deephaven/csv/int.csv delete mode 100644 extensions/csv/src/test/resources/io/deephaven/csv/language-example-headerless.csv delete mode 100644 extensions/csv/src/test/resources/io/deephaven/csv/language-example.csv delete mode 100644 extensions/csv/src/test/resources/io/deephaven/csv/language-example.tsv delete mode 100644 extensions/csv/src/test/resources/io/deephaven/csv/long.csv delete mode 100644 extensions/csv/src/test/resources/io/deephaven/csv/short.csv delete mode 100644 extensions/csv/src/test/resources/io/deephaven/csv/strings-pound.csv delete mode 100644 extensions/csv/src/test/resources/io/deephaven/csv/strings.csv delete mode 100644 extensions/csv/src/test/resources/io/deephaven/csv/timestamp-legacy.csv delete mode 100644 extensions/csv/src/test/resources/io/deephaven/csv/timestamp-micros.csv delete mode 100644 extensions/csv/src/test/resources/io/deephaven/csv/timestamp-millis.csv delete mode 100644 extensions/csv/src/test/resources/io/deephaven/csv/timestamp-mixed.csv delete mode 100644 extensions/csv/src/test/resources/io/deephaven/csv/timestamp-nanos.csv delete mode 100644 extensions/csv/src/test/resources/io/deephaven/csv/timestamp-seconds.csv delete mode 100644 extensions/csv/src/test/resources/io/deephaven/csv/timestamp.csv delete mode 100644 extensions/csv/src/test/resources/io/deephaven/csv/whitespace-inside-and-outside.csv delete mode 100644 extensions/csv/src/test/resources/io/deephaven/csv/whitespace-inside.csv delete mode 100644 extensions/csv/src/test/resources/io/deephaven/csv/whitespace-no-quotes.csv delete mode 100644 extensions/csv/src/test/resources/io/deephaven/csv/whitespace-outside.csv diff --git a/Integrations/python/deephaven/csv.py b/Integrations/python/deephaven/csv.py index 6fb308883c3..153d8b52bd8 100644 --- a/Integrations/python/deephaven/csv.py +++ b/Integrations/python/deephaven/csv.py @@ -116,7 +116,7 @@ def read(path: str, Args: path (str): a file path or a URL string header (Dict[str, DataType]): a dict to define the table columns with key being the name, value being the data type - inference (csv.Inference): an Enum value specifying the rules for data type inference, default is INFERENCE_STANDARD_TIMES + inference (csv.Inference): an Enum value specifying the rules for data type inference, default is INFERENCE_STANDARD headless (bool): indicates if the CSV data is headless, default is False delimiter (str): the delimiter used by the CSV, default is the comma quote (str): the quote character for the CSV, default is double quote @@ -133,7 +133,7 @@ def read(path: str, """ if inference is None: - inference = INFERENCE_STANDARD_TIMES + inference = INFERENCE_STANDARD csv_specs_builder = _JCsvSpecs.builder() diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/InMemoryTable.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/InMemoryTable.java index 008ebd9bef8..33279570043 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/InMemoryTable.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/InMemoryTable.java @@ -39,6 +39,12 @@ public static InMemoryTable from(NewTable table) { columns); } + // TODO(kosak): this may not be what we want. + public static InMemoryTable from(TableDefinition definition, TrackingRowSet rowSet, + Map> columns) { + return new InMemoryTable(definition, rowSet, columns); + } + public InMemoryTable(String[] columnNames, Object[] arrayValues) { super(RowSetFactory.flat(Array.getLength(arrayValues[0])).toTracking(), createColumnsMap(columnNames, arrayValues)); diff --git a/extensions/csv/src/main/java/io/deephaven/csv/CsvSpecs.java b/extensions/csv/src/main/java/io/deephaven/csv/CsvSpecs.java index bb21dbf24b9..7d779809225 100644 --- a/extensions/csv/src/main/java/io/deephaven/csv/CsvSpecs.java +++ b/extensions/csv/src/main/java/io/deephaven/csv/CsvSpecs.java @@ -1,15 +1,27 @@ package io.deephaven.csv; import io.deephaven.annotations.BuildableStyle; -import io.deephaven.api.util.NameValidator; -import io.deephaven.qst.array.Array; -import io.deephaven.qst.array.ArrayBuilder; +import io.deephaven.chunk.*; +import io.deephaven.chunk.attributes.Values; +import io.deephaven.csv.parsers.ParserBase; +import io.deephaven.csv.parsers.Parsers; +import io.deephaven.csv.reading.CsvReader; +import io.deephaven.csv.sinks.Sink; +import io.deephaven.csv.sinks.SinkFactory; +import io.deephaven.engine.rowset.RowSequence; +import io.deephaven.engine.rowset.RowSequenceFactory; +import io.deephaven.engine.rowset.RowSetFactory; +import io.deephaven.engine.rowset.TrackingRowSet; +import io.deephaven.engine.table.*; +import io.deephaven.engine.table.impl.InMemoryTable; +import io.deephaven.engine.table.impl.sources.*; +import io.deephaven.qst.column.header.ColumnHeader; import io.deephaven.qst.table.NewTable; import io.deephaven.qst.table.TableHeader; -import io.deephaven.qst.type.Type; -import org.apache.commons.csv.CSVFormat; -import org.apache.commons.csv.CSVParser; -import org.apache.commons.csv.CSVRecord; +import io.deephaven.qst.type.*; +import io.deephaven.time.DateTime; +import io.deephaven.util.BooleanUtils; +import io.deephaven.util.QueryConstants; import org.immutables.value.Value.Default; import org.immutables.value.Value.Immutable; @@ -19,16 +31,7 @@ import java.io.Reader; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; -import java.util.Set; -import java.util.stream.Collectors; -import java.util.stream.IntStream; +import java.util.*; /** * A specification object for parsing a CSV, or CSV-like, structure into a {@link NewTable}. @@ -38,10 +41,9 @@ public abstract class CsvSpecs { public interface Builder { - Builder header(TableHeader header); - Builder putParsers(String columnName, Parser parser); + Builder putParsers(String columnName, ParserBase parser); Builder inference(InferenceSpecs inferenceSpecs); @@ -57,6 +59,8 @@ public interface Builder { Builder charset(Charset charset); + Builder async(boolean async); + CsvSpecs build(); } @@ -148,19 +152,19 @@ public static CsvSpecs fromLegacyFormat(String format) { * * @return the parsers */ - public abstract Map> parsers(); + public abstract Map> parsers(); /** * The inference specifications. * *

- * By default, is {@link InferenceSpecs#standardTimes()}. + * By default, is {@link InferenceSpecs#standard()}. * * @return the inference specifications */ @Default public InferenceSpecs inference() { - return InferenceSpecs.standardTimes(); + return InferenceSpecs.standard(); } /** @@ -247,12 +251,13 @@ public Charset charset() { return StandardCharsets.UTF_8; } - private CSVFormat format() { - return CSVFormat.DEFAULT - .withIgnoreSurroundingSpaces(ignoreSurroundingSpaces()) - .withDelimiter(delimiter()) - .withQuote(quote()) - .withTrim(trim()); + /** + * Should the CSVReader run asynchronously for better performance. + * @return the async flag + */ + @Default + public boolean async() { + return true; } /** @@ -266,7 +271,7 @@ private CSVFormat format() { * @return the new table * @throws IOException if an I/O exception occurs */ - public final NewTable parse(InputStream stream) throws IOException { + public final Table parse(InputStream stream) throws IOException { return parse(new InputStreamReader(stream, charset())); } @@ -281,130 +286,248 @@ public final NewTable parse(InputStream stream) throws IOException { * @return the new table * @throws IOException if an I/O exception occurs */ - public final NewTable parse(Reader reader) throws IOException { - try ( - final CSVParser csvParser = format().parse(reader)) { - final List records = csvParser.getRecords(); - if (hasHeaderRow() && records.isEmpty()) { - throw new IllegalStateException("Expected header row, none found"); - } - final List dataRecords = hasHeaderRow() ? records.subList(1, records.size()) : records; - if (!header().isPresent() && dataRecords.isEmpty()) { - throw new IllegalStateException("Unable to infer types with no TableHeader and no data"); - } - final int numColumns = records.get(0).size(); - if (numColumns == 0) { - throw new IllegalStateException("Unable to parse an empty CSV"); + public final Table parse(Reader reader) throws IOException { + final CsvReader csvReader = configureCsvReader(reader); + final CsvReader.Result result = csvReader.read(); + + final String[] columnNames = result.columnNames(); + final Sink[] sinks = result.columns(); + final Map> columns = new LinkedHashMap<>(); + long maxSize = 0; + for (int ii = 0; ii < columnNames.length; ++ii) { + final String columnName = columnNames[ii]; + final MySinkBase sink = (MySinkBase) sinks[ii]; + maxSize = Math.max(maxSize, sink.resultSize()); + columns.put(columnName, sink.result()); + } + final TableDefinition tableDef = TableDefinition.inferFrom(columns); + final TrackingRowSet rowSet = RowSetFactory.flat(maxSize).toTracking(); + return InMemoryTable.from(tableDef, rowSet, columns); + } + + private CsvReader configureCsvReader(Reader reader) { + final MySinkFactory sinkFactory = new MySinkFactory(); + final CsvReader csvReader = new CsvReader(reader, sinkFactory); + + csvReader.setAsync(async()); + csvReader.setIgnoreSurroundingSpaces(ignoreSurroundingSpaces()); + csvReader.setTrim(trim()); + csvReader.setHasHeaders(hasHeaderRow()); + csvReader.setquoteChar(quote()); + csvReader.setFieldDelimiter(delimiter()); + csvReader.setInference(inference().parsers()); + csvReader.setParsers(parsers()); + csvReader.setNullParser(inference().nullParser()); + + if (header().isPresent()) { + final List headers = new ArrayList<>(); + final Map> pmap = new HashMap<>(); + for (ColumnHeader ch : header().get()) { + headers.add(ch.name()); + pmap.put(ch.name(), typeToParser(ch.componentType())); } - final Iterable columnNames; - if (header().isPresent()) { - columnNames = header().get().columnNames(); - } else if (hasHeaderRow()) { - columnNames = legalizeColumnNames(records.get(0)); + csvReader.setHeaders(headers); + csvReader.setParsers(pmap); + } + + csvReader.setNullBooleanAsByteValue(BooleanUtils.NULL_BOOLEAN_AS_BYTE); + csvReader.setNullByteValue(QueryConstants.NULL_BYTE); + csvReader.setNullShortValue(QueryConstants.NULL_SHORT); + csvReader.setNullIntValue(QueryConstants.NULL_INT); + csvReader.setNullLongValue(QueryConstants.NULL_LONG); + csvReader.setNullFloatValue(QueryConstants.NULL_FLOAT); + csvReader.setNullDoubleValue(QueryConstants.NULL_DOUBLE); + csvReader.setNullCharValue(QueryConstants.NULL_CHAR); + csvReader.setNullStringValue(null); + csvReader.setNullDateTimeAsLongValue(QueryConstants.NULL_LONG); + return csvReader; + } + + private static abstract class MySinkBase implements Sink { + private final ArrayBackedColumnSource result; + private final WritableColumnSource reinterpreted; + private final ChunkWrapInvoker chunkWrapInvoker; + private long resultSize = 0; + + public MySinkBase(ArrayBackedColumnSource result, Class interpClass, + ChunkWrapInvoker chunkWrapInvoker) { + this.result = result; + if (interpClass != null) { + reinterpreted = (WritableColumnSource) result.reinterpret(interpClass); } else { - columnNames = IntStream - .range(0, numColumns) - .mapToObj(i -> String.format("Column%d", i + 1)) - .collect(Collectors.toList()); + reinterpreted = result; } + this.chunkWrapInvoker = chunkWrapInvoker; + } - final NewTable.Builder table = NewTable.builder(); - int columnIndex = 0; - int size = -1; - for (String columnName : columnNames) { - final Parser parser = parser(columnName, columnIndex, dataRecords); - final Array array = buildArray(getColumn(columnIndex, dataRecords), parser, dataRecords.size()); - if (size == -1) { - size = array.size(); - } - table.putColumns(columnName, array); - ++columnIndex; + @Override + public final void write(TARRAY src, int srcOffset, long destOffset, int size) { + if (size == 0) { + return; } - return table.size(size).build(); + final long requiredCapacity = destOffset + size; + reinterpreted.ensureCapacity(requiredCapacity); + resultSize = Math.max(resultSize, requiredCapacity); + try (final ChunkSink.FillFromContext context = reinterpreted.makeFillFromContext(size); + final RowSequence range = RowSequenceFactory.forRange(destOffset, destOffset + size - 1)) { + Chunk chunk = chunkWrapInvoker.apply(src, srcOffset, size); + reinterpreted.fillFromChunk(context, chunk, range); + } + } + + private interface ChunkWrapInvoker { + Chunk apply(TARRAY data, int offset, int capacity); + } + + public ArrayBackedColumnSource result() { + return result; + } + + public long resultSize() { + return resultSize; } } - private static List legalizeColumnNames(CSVRecord record) { - final Set taken = new HashSet<>(record.size()); - final List out = new ArrayList<>(record.size()); - for (String name : record) { - out.add(NameValidator.legalizeColumnName(name, (s) -> s.replaceAll("[- ]", "_"), taken)); + private static final class MyCharSink extends MySinkBase { + public MyCharSink() { + super(new CharacterArraySource(), null, CharChunk::chunkWrap); } - return out; } - private static Type type(TableHeader header, String columnName) { - final Type type = header.getHeader(columnName); - if (type != null) { - return type; + private static final class MyBooleanAsByteSink extends MySinkBase { + public MyBooleanAsByteSink() { + super(new BooleanArraySource(), byte.class, ByteChunk::chunkWrap); } - throw new IllegalArgumentException(String.format( - "When specifying a header, all columns must be accounted for. Missing type for column name '%s'", - columnName)); } - private Parser parser(String columnName, int columnIndex, List dataRecords) { - final Type type = header().map(header -> type(header, columnName)).orElse(null); + private static final class MyByteSink extends MySinkBase { + public MyByteSink() { + super(new ByteArraySource(), null, ByteChunk::chunkWrap); + } + } - // 1. An explicit parser if set - final Parser explicit = parsers().get(columnName); - if (explicit != null) { - if (type != null && !type.equals(explicit.type())) { - throw new IllegalArgumentException("Explicit parser type and column header type do not match"); - } - return explicit; + private static final class MyShortSink extends MySinkBase { + public MyShortSink() { + super(new ShortArraySource(), null, ShortChunk::chunkWrap); + } + } + + private static final class MyIntSink extends MySinkBase { + public MyIntSink() { + super(new IntegerArraySource(), null, IntChunk::chunkWrap); } + } + + private static final class MyLongSink extends MySinkBase { + public MyLongSink() { + super(new LongArraySource(), null, LongChunk::chunkWrap); + } + } - final InferenceSpecs inference; - if (type != null) { - // 2. Guided inference - inference = inference().limitToType(type); - } else { - // 3. Original inference - inference = inference(); + private static final class MyFloatSink extends MySinkBase { + public MyFloatSink() { + super(new FloatArraySource(), null, FloatChunk::chunkWrap); } + } - final Optional> p = inference.infer(getColumn(columnIndex, dataRecords)); - if (!p.isPresent()) { - throw new IllegalStateException( - String.format("Unable to infer type for column '%s'", columnName)); + private static final class MyDoubleSink extends MySinkBase { + public MyDoubleSink() { + super(new DoubleArraySource(), null, DoubleChunk::chunkWrap); } - return p.get(); } - private static Array buildArray(Iterator it, Parser parser, int size) { - final ArrayBuilder builder = Array.builder(parser.type(), size); - while (it.hasNext()) { - final T item = parser.parse(it.next()); - builder.add(item); + private static final class MyStringSink extends MySinkBase { + public MyStringSink() { + super(new ObjectArraySource<>(String.class), null, ObjectChunk::chunkWrap); } - return builder.build(); } - private static Iterator getColumn(int index, Iterable records) { - return new CsvColumnIterator(index, records.iterator()); + private static final class MyDateTimeAsLongSink extends MySinkBase { + public MyDateTimeAsLongSink() { + super(new DateTimeArraySource(), long.class, LongChunk::chunkWrap); + } } - private static class CsvColumnIterator implements Iterator { - private final int index; - private final Iterator it; + private static class MySinkFactory implements SinkFactory { + @Override + public Sink makeBooleanAsByteSink() { + return new MyBooleanAsByteSink(); + } + + @Override + public Sink makeByteSink() { + return new MyByteSink(); + } + + @Override + public Sink makeShortSink() { + return new MyShortSink(); + } + + @Override + public Sink makeIntSink() { + return new MyIntSink(); + } + + @Override + public Sink makeLongSink() { + return new MyLongSink(); + } + + @Override + public Sink makeFloatSink() { + return new MyFloatSink(); + } - public CsvColumnIterator(int index, Iterator it) { - this.index = index; - this.it = Objects.requireNonNull(it); + @Override + public Sink makeDoubleSink() { + return new MyDoubleSink(); } @Override - public boolean hasNext() { - return it.hasNext(); + public Sink makeCharSink() { + return new MyCharSink(); } @Override - public String next() { - CSVRecord next = it.next(); - String stringValue = next.get(index); - // treating empty string as null - return stringValue.isEmpty() ? null : stringValue; + public Sink makeStringSink() { + return new MyStringSink(); + } + + @Override + public Sink makeDateTimeAsLongSink() { + return new MyDateTimeAsLongSink(); + } + } + + private static ParserBase typeToParser(Type type) { + if (type == BooleanType.instance()) { + return Parsers.BOOLEAN; + } + if (type == ByteType.instance()) { + return Parsers.BYTE; + } + if (type == ShortType.instance()) { + return Parsers.SHORT; + } + if (type == IntType.instance()) { + return Parsers.INT; + } + if (type == LongType.instance()) { + return Parsers.LONG; + } + if (type == FloatType.instance()) { + return Parsers.FLOAT; + } + if (type == DoubleType.instance()) { + return Parsers.DOUBLE; + } + if (type == CharType.instance()) { + return Parsers.CHAR; + } + if (type == StringType.instance()) { + return Parsers.STRING; } + throw new RuntimeException("Can't find Parser for " + type); } } diff --git a/extensions/csv/src/main/java/io/deephaven/csv/CsvTools.java b/extensions/csv/src/main/java/io/deephaven/csv/CsvTools.java index 0047267a157..ef33ce0d7f9 100644 --- a/extensions/csv/src/main/java/io/deephaven/csv/CsvTools.java +++ b/extensions/csv/src/main/java/io/deephaven/csv/CsvTools.java @@ -149,7 +149,7 @@ public static Table readCsv(String path, CsvSpecs specs) throws IOException { */ @ScriptApi public static Table readCsv(InputStream stream, CsvSpecs specs) throws IOException { - return InMemoryTable.from(specs.parse(stream)); + return specs.parse(stream); } /** @@ -162,7 +162,7 @@ public static Table readCsv(InputStream stream, CsvSpecs specs) throws IOExcepti */ @ScriptApi public static Table readCsv(URL url, CsvSpecs specs) throws IOException { - return InMemoryTable.from(specs.parse(url.openStream())); + return specs.parse(url.openStream()); } /** @@ -180,7 +180,7 @@ public static Table readCsv(URL url, CsvSpecs specs) throws IOException { */ @ScriptApi public static Table readCsv(Path path, CsvSpecs specs) throws IOException { - return InMemoryTable.from(specs.parse(PathUtil.open(path))); + return specs.parse(PathUtil.open(path)); } /** @@ -264,7 +264,7 @@ public static Table readCsv(InputStream is, final String format) throws IOExcept @ScriptApi @Deprecated public static Table readCsv(InputStream is, final char separator) throws IOException { - return InMemoryTable.from(CsvSpecs.builder().delimiter(separator).build().parse(is)); + return CsvSpecs.builder().delimiter(separator).build().parse(is); } private static boolean isStandardFile(URL url) { diff --git a/extensions/csv/src/main/java/io/deephaven/csv/InferenceSpecs.java b/extensions/csv/src/main/java/io/deephaven/csv/InferenceSpecs.java index 127f6bad617..77643659e4d 100644 --- a/extensions/csv/src/main/java/io/deephaven/csv/InferenceSpecs.java +++ b/extensions/csv/src/main/java/io/deephaven/csv/InferenceSpecs.java @@ -1,6 +1,8 @@ package io.deephaven.csv; import io.deephaven.annotations.BuildableStyle; +import io.deephaven.csv.parsers.ParserBase; +import io.deephaven.csv.parsers.Parsers; import io.deephaven.qst.type.Type; import org.immutables.value.Value.Check; import org.immutables.value.Value.Default; @@ -8,10 +10,7 @@ import org.jetbrains.annotations.Nullable; import java.time.Instant; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.Optional; +import java.util.*; /** * Inference specifications contains the configuration and logic for inferring an acceptable parser from string values. @@ -34,173 +33,89 @@ public static Builder builder() { /** * The string-only inference. * - *

- * Contains the following parsers: - * - *

    - *
  • {@link Parser#STRING}
  • - *
  • {@link Parser#INSTANT}
  • - *
  • {@link Parser#SHORT}
  • - *
  • {@link Parser#INT}
  • - *
  • {@link Parser#LONG}
  • - *
  • {@link Parser#DOUBLE}
  • - *
  • {@link Parser#BOOL}
  • - *
  • {@link Parser#CHAR}
  • - *
  • {@link Parser#BYTE}
  • - *
  • {@link Parser#FLOAT}
  • - *
- * - * Uses the default {@link #onNullParser()}. - * - *

- * Note: the non-string parsers are only relevant when the appropriate {@link #limitToType(Type)} is invoked. - * * @return the string-only inference */ public static InferenceSpecs strings() { - return builder().addParsers( - Parser.STRING, - Parser.INSTANT, - Parser.SHORT, - Parser.INT, - Parser.LONG, - Parser.DOUBLE, - Parser.BOOL, - Parser.CHAR, - Parser.BYTE, - Parser.FLOAT) + return builder() + .addParsers(Parsers.STRING) .build(); } /** * The "minimal" inference. - * - *

- * Contains the following parsers: - * - *

    - *
  • {@link Parser#INSTANT}
  • - *
  • {@link Parser#LONG}
  • - *
  • {@link Parser#DOUBLE}
  • - *
  • {@link Parser#BOOL}
  • - *
  • {@link Parser#STRING}
  • - *
  • {@link Parser#BYTE}
  • - *
  • {@link Parser#SHORT}
  • - *
  • {@link Parser#INT}
  • - *
  • {@link Parser#FLOAT}
  • - *
  • {@link Parser#CHAR}
  • - *
- * - * Uses the default {@link #onNullParser()}. - * - *

- * Note: the byte, short, int, float, and char parsers are only relevant when the appropriate - * {@link #limitToType(Type)} is invoked. - * - * @return the minimal inference */ public static InferenceSpecs minimal() { return builder().addParsers( - Parser.INSTANT, - Parser.LONG, - Parser.DOUBLE, - Parser.BOOL, - Parser.STRING, - Parser.BYTE, - Parser.SHORT, - Parser.INT, - Parser.FLOAT, - Parser.CHAR) + Parsers.DBDATETIME, + Parsers.LONG, + Parsers.DOUBLE, + Parsers.BOOLEAN, + Parsers.STRING) .build(); } /** * The "standard" inference, does not parse floats or bytes. - * - *

- * Contains the following parsers: - * - *

    - *
  • {@link Parser#INSTANT}
  • - *
  • {@link Parser#SHORT}
  • - *
  • {@link Parser#INT}
  • - *
  • {@link Parser#LONG}
  • - *
  • {@link Parser#DOUBLE}
  • - *
  • {@link Parser#BOOL}
  • - *
  • {@link Parser#CHAR}
  • - *
  • {@link Parser#STRING}
  • - *
  • {@link Parser#BYTE}
  • - *
  • {@link Parser#FLOAT}
  • - *
- * - * Uses the default {@link #onNullParser()}. - * - *

- * Note: the byte and float parsers are only relevant when the appropriate {@link #limitToType(Type)} is invoked. - * - * @return the standard inference */ public static InferenceSpecs standard() { return builder().addParsers( - Parser.INSTANT, - Parser.SHORT, - Parser.INT, - Parser.LONG, - Parser.DOUBLE, - Parser.BOOL, - Parser.CHAR, - Parser.STRING, - Parser.BYTE, - Parser.FLOAT) + Parsers.DBDATETIME, + Parsers.SHORT, + Parsers.INT, + Parsers.LONG, + Parsers.DOUBLE, + Parsers.BOOLEAN, + Parsers.CHAR, + Parsers.STRING) .build(); } /** * The standard parsers with additional {@link java.time.Instant}-based parsing. * - *

- * Contains the following parsers: - * - *

    - *
  • {@link Parser#INSTANT}
  • - *
  • {@link Parser#INSTANT_LEGACY}
  • - *
  • {@link Parser#epochAny21stCentury(Parser)}, with {@link Parser#LONG}
  • - *
  • {@link Parser#SHORT}
  • - *
  • {@link Parser#INT}
  • - *
  • {@link Parser#LONG}
  • - *
  • {@link Parser#DOUBLE}
  • - *
  • {@link Parser#BOOL}
  • - *
  • {@link Parser#CHAR}
  • - *
  • {@link Parser#STRING}
  • - *
  • {@link Parser#BYTE}
  • - *
  • {@link Parser#FLOAT}
  • - *
- * - * Uses the default {@link #onNullParser()}. - * - *

- * Note: the byte and float parsers are only relevant when the appropriate {@link #limitToType(Type)} is invoked. - * * @return the standard times inference */ public static InferenceSpecs standardTimes() { - final List> parsers = Parser.epochAny21stCentury(Parser.LONG); return builder().addParsers( - Parser.INSTANT, - Parser.INSTANT_LEGACY, - parsers.get(0), - parsers.get(1), - parsers.get(2), - parsers.get(3), - Parser.SHORT, - Parser.INT, - Parser.LONG, - Parser.DOUBLE, - Parser.BOOL, - Parser.CHAR, - Parser.STRING, - Parser.BYTE, - Parser.FLOAT) + Parsers.TIMESTAMP, + Parsers.FLOAT, + Parsers.DOUBLE, + Parsers.BOOLEAN, + Parsers.CHAR, + Parsers.STRING) + .build(); + } + + public static InferenceSpecs milliTimes() { + return builder().addParsers( + Parsers.TIMESTAMP_MILLIS, + Parsers.FLOAT, + Parsers.DOUBLE, + Parsers.BOOLEAN, + Parsers.CHAR, + Parsers.STRING) + .build(); + } + + public static InferenceSpecs microTimes() { + return builder().addParsers( + Parsers.TIMESTAMP_MICROS, + Parsers.FLOAT, + Parsers.DOUBLE, + Parsers.BOOLEAN, + Parsers.CHAR, + Parsers.STRING) + .build(); + } + + public static InferenceSpecs nanoTimes() { + return builder().addParsers( + Parsers.TIMESTAMP_NANOS, + Parsers.FLOAT, + Parsers.DOUBLE, + Parsers.BOOLEAN, + Parsers.CHAR, + Parsers.STRING) .build(); } @@ -209,165 +124,31 @@ public static InferenceSpecs standardTimes() { * * @return the parsers */ - public abstract List> parsers(); + public abstract Set> parsers(); /** * The parser to return when all values are null. May be {@code null}. * *

- * By default, returns a {@link Parser#STRING}. + * By default, returns a {@link Parsers#STRING}. * * @return the on-null values parser */ @Default @Nullable - public Parser onNullParser() { - return Parser.STRING; - } - - /** - * Filters out all parsers that do not have {@code type}. - * - *

- * {@link #onNullParser()} will be set to the first parser that matches {@code type}. - * - * @param type the type to limit to - * @return the new inference based on type - */ - public InferenceSpecs limitToType(Type type) { - Parser first = null; - final Builder builder = builder(); - for (Parser parser : parsers()) { - if (type.equals(parser.type())) { - builder.addParsers(parser); - if (first == null) { - first = parser; - } - } - } - return builder.onNullParser(first).build(); - } - - /** - * Finds the best parser by checking and eliminating parsers based on {@link Parser#canParse(String)}. The returned - * parser will be the lowest indexed parser remaining based on the order specified in {@link #parsers()}. - * - *

- * When all {@code values} are null, the returned value will be an optional that wraps {@link #onNullParser()}. - * - * @param values the values to be inferred - * @return the best parser, if any - */ - public Optional> infer(Iterator values) { - final List> candidates = collect(); - final List> hasParsed = new ArrayList<>(); - boolean allNull = true; - while (values.hasNext() && !candidates.isEmpty()) { - final String item = values.next(); - if (item != null) { - allNull = false; - if (candidates.size() <= 1) { - break; - } - hasParsed.clear(); - final Iterator> it = candidates.iterator(); - NEXT_PARSER: while (it.hasNext()) { - final Parser parser = it.next(); - for (Parser alreadyParsed : hasParsed) { - // If a more specific parser has already run, we know we don't need to check this parser. - // For example, if SHORT has already successfully parsed, we don't need to check INT. - // isSuperset(INT, SHORT) == true - if (isSuperset(parser, alreadyParsed)) { - // Note: we *don't* have to add parser to hasParsed, since superset properties are - // transitive - continue NEXT_PARSER; - } - } - if (parser.canParse(item)) { - hasParsed.add(parser); - } else { - it.remove(); - } - } - } - } - if (allNull) { - return Optional.ofNullable(onNullParser()); - } - return candidates.stream().findFirst(); - } - - @Check - final void checkNonEmpty() { - if (parsers().isEmpty()) { - throw new IllegalArgumentException("Must provide at least one parser for inference"); - } - } - - private List> collect() { - final List> collected = new ArrayList<>(); - for (Parser candidate : parsers()) { - // If anything we've already collected is a superset of the candidate, discard the candidate. - // For example, if INT is already collected, we don't need to even consider SHORT. - boolean useCandidate = true; - for (Parser actual : collected) { - if (isSuperset(actual, candidate)) { - useCandidate = false; - break; - } - } - if (useCandidate) { - collected.add(candidate); - } - } - return collected; - } - - /** - * {@code first} is a superset of {@code second} if {@code first} will parse all the values that {@code second} will - * parse. - */ - private static boolean isSuperset(Parser first, Parser second) { - if (first == Parser.STRING) { - return true; - } - if (first == Parser.DOUBLE) { - return second == Parser.FLOAT - || second == Parser.LONG - || second == Parser.INT - || second == Parser.SHORT - || second == Parser.BYTE; - } - if (first == Parser.FLOAT) { - // Note: *superset* here means will parse all the same (or more) inputs. - // Floats *can* parse everything that Double can parse. - return second == Parser.DOUBLE - || second == Parser.LONG - || second == Parser.INT - || second == Parser.SHORT - || second == Parser.BYTE; - } - if (first == Parser.LONG) { - return second == Parser.INT || second == Parser.SHORT || second == Parser.BYTE; - } - if (first == Parser.INT) { - return second == Parser.SHORT || second == Parser.BYTE; - } - if (first == Parser.SHORT) { - return second == Parser.BYTE; - } - return false; + public ParserBase nullParser() { + return Parsers.STRING; } public interface Builder { - Builder onNullParser(Parser parser); + Builder nullParser(ParserBase parser); - Builder addParsers(Parser item); + Builder addParsers(ParserBase item); - Builder addParsers(Parser... items); + Builder addParsers(ParserBase... items); - Builder addAllParsers(Iterable> items); + Builder addAllParsers(Iterable> items); InferenceSpecs build(); } diff --git a/extensions/csv/src/main/java/io/deephaven/csv/Parser.java b/extensions/csv/src/main/java/io/deephaven/csv/Parser.java deleted file mode 100644 index 03cdabd38a5..00000000000 --- a/extensions/csv/src/main/java/io/deephaven/csv/Parser.java +++ /dev/null @@ -1,376 +0,0 @@ -package io.deephaven.csv; - -import io.deephaven.time.DateTimeUtils; -import io.deephaven.qst.type.Type; - -import java.time.Duration; -import java.time.Instant; -import java.time.LocalDate; -import java.time.ZoneOffset; -import java.util.Arrays; -import java.util.List; -import java.util.Objects; -import java.util.function.Function; - -/** - * A parser is responsible for parsing strings into parsed types. - * - * @param the parsed type - */ -public class Parser { - - /** - * A parser exception. - */ - public static class ParserException extends IllegalArgumentException { - private final String value; - - public ParserException(String value, String message) { - super(message); - this.value = value; - } - - public ParserException(String value, Throwable cause) { - super(cause); - this.value = value; - } - - public String value() { - return value; - } - } - - /** - * A parser that maps the case-insensitive string "true" to {@code true}, and "false" to {@code false}. - */ - public static final Parser BOOL = new Parser<>(Type.booleanType(), Parser::parseBool); - - /** - * A parser that delegates to {@link Byte#parseByte(String)}. - */ - public static final Parser BYTE = new Parser<>(Type.byteType(), Byte::parseByte); - - /** - * A parses that returns the first character of the string if there is exactly one character in the string. - */ - public static final Parser CHAR = new Parser<>(Type.charType(), Parser::parseChar); - - /** - * A parser that delegates to {@link Short#parseShort(String)}. - */ - public static final Parser SHORT = new Parser<>(Type.shortType(), Short::parseShort); - - /** - * A parser that delegates to {@link Integer#parseInt(String)}. - */ - public static final Parser INT = new Parser<>(Type.intType(), Integer::parseInt); - - /** - * A parser that delegates to {@link Long#parseLong(String)}. - */ - public static final Parser LONG = new Parser<>(Type.longType(), Long::parseLong); - - /** - * A parser that delegates non-trimmable strings to {@link Float#parseFloat(String)}. - * - *

- * Note: if the string is trimmable, the parsing fails. This is to remain consistent with the parsing of integral - * values. - */ - public static final Parser FLOAT = new Parser<>(Type.floatType(), Parser::parseFloat); - - /** - * A parser that delegates non-trimmable strings to {@link Double#parseDouble(String)}. - * - *

- * Note: if the string is trimmable, the parsing fails. This is to remain consistent with the parsing of integral - * values. - */ - public static final Parser DOUBLE = new Parser<>(Type.doubleType(), Parser::parseDouble); - - /** - * A parser that delegates to {@link Instant#parse(CharSequence)}. - */ - public static final Parser INSTANT = new Parser<>(Type.instantType(), Instant::parse); - - /** - * A parser that delegates to {@link DateTimeUtils#convertDateTime(String)}. - */ - public static final Parser INSTANT_LEGACY = new Parser<>(Type.instantType(), Parser::parseAsDateFormat); - - /** - * A naive parser, which returns the same string value it was passed in. - */ - public static final Parser STRING = new Parser<>(Type.stringType(), Function.identity()); - - /** - * A parser that will parse long values as epoch seconds. - * - * @param longParser the long parser - * @param min the minimum instant to infer, may be null - * @param max the maximum instant to infer, may be null - * @return the epoch second parser - * - * @see #epochAnyParser(Parser, Instant, Instant) - */ - public static Parser epochSecondParser(Parser longParser, Instant min, Instant max) { - if (min != null && max != null && min.isAfter(max)) { - throw new IllegalArgumentException(String.format("min is greater that max: %s > %s", min, max)); - } - return new Parser<>(Type.instantType(), s -> parseAsEpochSeconds(longParser, min, max, s)); - } - - /** - * A parser that will parse long values as epoch milliseconds. - * - * @param longParser the long parser - * @param min the minimum instant to infer, may be null - * @param max the maximum instant to infer, may be null - * @return the epoch milli parser - * - * @see #epochAnyParser(Parser, Instant, Instant) - */ - public static Parser epochMilliParser(Parser longParser, Instant min, Instant max) { - if (min != null && max != null && min.isAfter(max)) { - throw new IllegalArgumentException(String.format("min is greater that max: %s > %s", min, max)); - } - return new Parser<>(Type.instantType(), s -> parseAsEpochMillis(longParser, min, max, s)); - } - - /** - * A parser that will parse long values as epoch microseconds. - * - * @param longParser the long parser - * @param min the minimum instant to infer, may be null - * @param max the maximum instant to infer, may be null - * @return the epoch micro parser - * - * @see #epochAnyParser(Parser, Instant, Instant) - */ - public static Parser epochMicroParser(Parser longParser, Instant min, Instant max) { - if (min != null && max != null && min.isAfter(max)) { - throw new IllegalArgumentException(String.format("min is greater that max: %s > %s", min, max)); - } - return new Parser<>(Type.instantType(), s -> parseAsEpochMicros(longParser, min, max, s)); - } - - /** - * A parser that will parse long values as epoch nanoseconds. - * - * @param longParser the long parser - * @param min the minimum instant to infer, may be null - * @param max the maximum instant to infer, may be null - * @return the epoch nano parser - * - * @see #epochAnyParser(Parser, Instant, Instant) - */ - public static Parser epochNanoParser(Parser longParser, Instant min, Instant max) { - if (min != null && max != null && min.isAfter(max)) { - throw new IllegalArgumentException(String.format("min is greater that max: %s > %s", min, max)); - } - return new Parser<>(Type.instantType(), s -> parseAsEpochNanos(longParser, min, max, s)); - } - - /** - * Returns four parsers that will parse long values as epoch seconds, milliseconds, epoch microseconds, and epoch - * nanoseconds based on non-overlapping min/max ranges. - * - *

- * Note: the duration between the epoch and the max must be less than 1000 times the duration between the epoch and - * the min. - * - * @param longParser the long parser - * @param min the minimum instant to infer - * @param max the maximum instant to infer - * @return the epoch milli and micro parsers - * - * @see #epochSecondParser(Parser, Instant, Instant) - * @see #epochMilliParser(Parser, Instant, Instant) - * @see #epochMicroParser(Parser, Instant, Instant) - * @see #epochNanoParser(Parser, Instant, Instant) - * @see #epochAny21stCentury(Parser) - */ - public static List> epochAnyParser(Parser longParser, Instant min, Instant max) { - if (min.isAfter(max)) { - throw new IllegalArgumentException(String.format("min is greater that max: %s > %s", min, max)); - } - if (Duration.between(Instant.EPOCH, max) - .compareTo(Duration.between(Instant.EPOCH, min).multipliedBy(1000)) >= 0) { - throw new IllegalArgumentException("Unable to do proper inference on instants, has overlapping range"); - } - return Arrays.asList( - epochSecondParser(longParser, min, max), - epochMilliParser(longParser, min, max), - epochMicroParser(longParser, min, max), - epochNanoParser(longParser, min, max)); - } - - /** - * Returns four parser that will parse long values as epoch seconds, epoch milliseconds, epoch microseconds, and - * epoch nanoseconds from the 21st century. - * - * @param longParser the long parser - * @return the 21st century epoch second, milli, micro, and nanoseconds parsers - * @see #epochAnyParser(Parser, Instant, Instant) - */ - public static List> epochAny21stCentury(Parser longParser) { - final Instant min = LocalDate.ofYearDay(2000, 1).atStartOfDay().toInstant(ZoneOffset.UTC); - final Instant max = LocalDate.ofYearDay(2100, 1).atStartOfDay().toInstant(ZoneOffset.UTC).minusNanos(1); - return epochAnyParser(longParser, min, max); - } - - private final Type type; - private final Function function; - - /** - * Creates a parser. The {@code function} is passed non-null strings, and expected to return the parsed value, or - * throw an appropriate {@link RuntimeException}. - * - * @param type the type - * @param function the function - */ - public Parser(Type type, Function function) { - this.type = Objects.requireNonNull(type); - this.function = Objects.requireNonNull(function); - } - - public Type type() { - return type; - } - - /** - * Parses {@code value} when non-null, otherwise returns null. - * - *

- * This method catches {@link RuntimeException} from {@code function} and converts them to {@link ParserException}. - * - * @param value the string to parse - * @return the parsed value, or null - * @throws ParserException if {@code value} can't be parsed - */ - public T parse(String value) { - if (value == null) { - return null; - } - try { - return function.apply(value); - } catch (RuntimeException t) { - if (t instanceof ParserException) { - throw t; - } - throw new ParserException(value, t); - } - } - - /** - * Checks if {@code this} parser can parse {@code value}. - * - *

- * {@code null} values are always return true. - * - * @param value the value - * @return true if the value can be parsed. - */ - public boolean canParse(String value) { - if (value == null) { - return true; - } - try { - function.apply(value); - } catch (RuntimeException t) { - return false; - } - return true; - } - - private static boolean parseBool(String value) { - if (value.equalsIgnoreCase("true")) { - return true; - } - if (value.equalsIgnoreCase("false")) { - return false; - } - throw new ParserException(value, "Value is not a boolean"); - } - - private static char parseChar(String value) { - if (value.length() != 1) { - throw new ParserException(value, "Value is not a char"); - } - return value.charAt(0); - } - - private static float parseFloat(String value) { - if (isTrimmable(value)) { - throw new ParserException(value, "Not parsing floats that are trimmable"); - } - return Float.parseFloat(value); - } - - private static double parseDouble(String value) { - if (isTrimmable(value)) { - throw new ParserException(value, "Not parsing doubles that are trimmable"); - } - return Double.parseDouble(value); - } - - private static boolean isTrimmable(String value) { - return !value.isEmpty() && (value.charAt(0) <= ' ' || value.charAt(value.length() - 1) <= ' '); - } - - private static Instant parseAsDateFormat(String value) { - return DateTimeUtils.convertDateTime(value).getInstant(); - } - - private static Instant parseAsEpochSeconds(Parser longParser, Instant min, Instant max, String value) { - final long epochSecond = longParser.parse(value); - final Instant instant = Instant.ofEpochSecond(epochSecond); - if (min != null && instant.isBefore(min)) { - throw new ParserException(value, "Long seconds is less than min instant"); - } - if (max != null && instant.isAfter(max)) { - throw new ParserException(value, "Long seconds is greater than max instant"); - } - return instant; - } - - private static Instant parseAsEpochMillis(Parser longParser, Instant min, Instant max, String value) { - final long epochMilli = longParser.parse(value); - final Instant instant = Instant.ofEpochMilli(epochMilli); - if (min != null && instant.isBefore(min)) { - throw new ParserException(value, "Long millis is less than min instant"); - } - if (max != null && instant.isAfter(max)) { - throw new ParserException(value, "Long millis is greater than max instant"); - } - return instant; - } - - private static Instant parseAsEpochMicros(Parser longParser, Instant min, Instant max, String value) { - final long epochMicro = longParser.parse(value); - final long epochSecond = Math.floorDiv(epochMicro, 1_000_000); - final int nanoAdj = (int) Math.floorMod(epochMicro, 1_000_000) * 1_000; - final Instant instant = Instant.ofEpochSecond(epochSecond, nanoAdj); - if (min != null && instant.isBefore(min)) { - throw new ParserException(value, "Long micros is less than min instant"); - } - if (max != null && instant.isAfter(max)) { - throw new ParserException(value, "Long micros is greater than max instant"); - } - return instant; - } - - private static Instant parseAsEpochNanos(Parser longParser, Instant min, Instant max, String value) { - final long epochNano = longParser.parse(value); - final long epochSecond = Math.floorDiv(epochNano, 1_000_000_000); - final int nanoAdj = (int) Math.floorMod(epochNano, 1_000_000_000); - final Instant instant = Instant.ofEpochSecond(epochSecond, nanoAdj); - if (min != null && instant.isBefore(min)) { - throw new ParserException(value, "Long nanos is less than min instant"); - } - if (max != null && instant.isAfter(max)) { - throw new ParserException(value, "Long nanos is greater than max instant"); - } - return instant; - } -} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/containers/ByteSlice.java b/extensions/csv/src/main/java/io/deephaven/csv/containers/ByteSlice.java new file mode 100644 index 00000000000..6100c8563d7 --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/containers/ByteSlice.java @@ -0,0 +1,61 @@ +package io.deephaven.csv.containers; + +public final class ByteSlice { + private byte[] data; + private int begin; + private int end; + + public ByteSlice() { + } + + public ByteSlice(byte[] data, int begin, int end) { + reset(data, begin, end); + } + + public void reset(byte[] data, int begin, int end) { + this.data = data; + this.begin = begin; + this.end = end; + } + + public void setBegin(int begin) { + this.begin = begin; + } + + public void setEnd(int end) { + this.end = end; + } + + public byte front() { + return data[begin]; + } + + public byte back() { + return data[end - 1]; + } + + public void copyTo(byte[] dest, int destOffset) { + for (int cur = begin; cur != end; ++cur) { + dest[destOffset++] = data[cur]; + } + } + + public void copyTo(char[] dest, int destOffset) { + for (int cur = begin; cur != end; ++cur) { + dest[destOffset++] = (char)data[cur]; + } + } + + public byte[] data() { return data; } + public int begin() { return begin; } + public int end() { return end; } + public int size() { + return end - begin; + } + + @Override + public String toString() { + final int size = end - begin; + return size == 0 ? "" : new String(data, begin, end - begin); + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/containers/CharSlice.java b/extensions/csv/src/main/java/io/deephaven/csv/containers/CharSlice.java new file mode 100644 index 00000000000..f7065457833 --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/containers/CharSlice.java @@ -0,0 +1,56 @@ +package io.deephaven.csv.containers; + +public final class CharSlice { + private char[] data; + private int begin; + private int end; + + public CharSlice() {} + + public CharSlice(char[] data, int begin, int end) { + reset(data, begin, end); + } + + public void reset(char[] data, int begin, int end) { + this.data = data; + this.begin = begin; + this.end = end; + } + + public char front() { + return data[begin]; + } + + public char back() { + return data[end - 1]; + } + + public void copyTo(byte[] dest, int destOffset) { + for (int cur = begin; cur != end; ++cur) { + dest[destOffset++] = (byte)data[cur]; + } + } + + public void copyTo(char[] dest, int destOffset) { + for (int cur = begin; cur != end; ++cur) { + dest[destOffset++] = data[cur]; + } + } + + public void setEnd(int end) { + this.end = end; + } + + public char[] data() { return data; } + public int begin() { + return begin; + } + public int end() { return end; } + public int size() { return end - begin; } + + @Override + public String toString() { + final int size = end - begin; + return size == 0 ? "" : new String(data, begin, end - begin); + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/containers/GrowableCharBuffer.java b/extensions/csv/src/main/java/io/deephaven/csv/containers/GrowableCharBuffer.java new file mode 100644 index 00000000000..1530e444606 --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/containers/GrowableCharBuffer.java @@ -0,0 +1,43 @@ +package io.deephaven.csv.containers; + +/** + * This is like TCharArrayList except that you can get at the underlying data buffer. + */ +public final class GrowableCharBuffer { + private static final int INITIAL_BUFFER_SIZE = 16384; + + private char[] data = new char[INITIAL_BUFFER_SIZE]; + private int size = 0; + + public void append(char[] src, int srcOffset, int srcSize) { + ensure(srcSize); + System.arraycopy(src, srcOffset, data, size, srcSize); + size += srcSize; + } + + private void ensure(int srcSize) { + final int sizeNeeded = Math.addExact(size, srcSize); + if (sizeNeeded <= data.length) { + return; + } + + // Ensuring that we always at least double the buffer, but we may not always + // follow powers of two + final int newSize = Math.max(sizeNeeded, Math.multiplyExact(size, 2)); + final char[] newData = new char[newSize]; + System.arraycopy(data, 0, newData, 0, size); + data = newData; + } + + public void clear() { + size = 0; + } + + public char[] data() { + return data; + } + + public int size() { + return size; + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/densestorage/DenseStorageConstants.java b/extensions/csv/src/main/java/io/deephaven/csv/densestorage/DenseStorageConstants.java new file mode 100644 index 00000000000..5265cab0b39 --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/densestorage/DenseStorageConstants.java @@ -0,0 +1,7 @@ +package io.deephaven.csv.densestorage; + +public class DenseStorageConstants { + public static final int LARGE_THRESHOLD = 1024; + public static final int LARGE_BYTE_SENTINEL = Integer.MAX_VALUE; + public static final int LARGE_STRING_SENTINEL = Integer.MIN_VALUE; +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/densestorage/DenseStorageReader.java b/extensions/csv/src/main/java/io/deephaven/csv/densestorage/DenseStorageReader.java new file mode 100644 index 00000000000..1cfdfc6ae84 --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/densestorage/DenseStorageReader.java @@ -0,0 +1,79 @@ + +package io.deephaven.csv.densestorage; + +import io.deephaven.csv.containers.ByteSlice; +import io.deephaven.csv.containers.CharSlice; + +/** + * Companion to the DenseStorageWriter. + */ +public final class DenseStorageReader { + /** + * Byte sequences < DENSE_THRESHOLD are compactly stored here + */ + private final QueueReader.ByteReader byteReader; + /** + * Char sequences < DENSE_THRESHOLD are compactly stored here + */ + private final QueueReader.CharReader charReader; + /** + * Byte sequences >= DENSE_THRESHOLD are stored here + */ + private final QueueReader.ByteArrayReader largeByteArrayReader; + /** + * Char sequences >= DENSE_THRESHOLD are stored here + */ + private final QueueReader.CharArrayReader largeCharArrayReader; + /** + * Control bytes (lengths, negated lengths, or sentinels). See DenseStorageWriter. + */ + private final QueueReader.IntReader controlReader; + /** + * For the "out" parameter of controlReader.tryGetInt() + */ + private final int[] intHolder = new int[1]; + + public DenseStorageReader(QueueReader.IntReader controlReader, + QueueReader.ByteReader byteReader, + QueueReader.CharReader charReader, + QueueReader.ByteArrayReader largeByteArrayReader, + QueueReader.CharArrayReader largeCharArrayReader) { + this.controlReader = controlReader; + this.byteReader = byteReader; + this.charReader = charReader; + this.largeByteArrayReader = largeByteArrayReader; + this.largeCharArrayReader = largeCharArrayReader; + } + + public boolean tryGetNextSlice(ByteSlice bs, CharSlice cs, boolean[] nextIsBytes) { + if (!controlReader.tryGetInt(intHolder)) { + return false; + } + final int control = intHolder[0]; + if (control == DenseStorageConstants.LARGE_BYTE_SENTINEL) { + mustSucceed(largeByteArrayReader.tryGetBytes(bs), "largeByteArrayReader"); + nextIsBytes[0] = true; + return true; + } + if (control == DenseStorageConstants.LARGE_STRING_SENTINEL) { + mustSucceed(largeCharArrayReader.tryGetChars(cs), "largeCharArrayReader"); + nextIsBytes[0] = false; + return true; + } + if (control >= 0) { + mustSucceed(byteReader.tryGetBytes(control, bs), "byteReader"); + nextIsBytes[0] = true; + return true; + } + mustSucceed(charReader.tryGetChars(-control, cs), "charReader"); + nextIsBytes[0] = false; + return true; + } + + private static void mustSucceed(boolean success, String what) { + if (success) { + return; + } + throw new RuntimeException("Data unexpectedly exhausted: " + what); + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/densestorage/DenseStorageWriter.java b/extensions/csv/src/main/java/io/deephaven/csv/densestorage/DenseStorageWriter.java new file mode 100644 index 00000000000..4fdfc09e9ab --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/densestorage/DenseStorageWriter.java @@ -0,0 +1,122 @@ +package io.deephaven.csv.densestorage; + +import io.deephaven.csv.containers.CharSlice; + +/** + * The point of this object is to store data with a small fraction of overhead. "Large" objects (byte or char sequences + * with length >= a threshold) are stored directly. "Small" objects (byte or char sequences with a smaller length) are + * compacted into byte and char pools. + */ +public final class DenseStorageWriter { + /** + * The ints in this array indicate where the next item is stored: Integer.MIN_VALUE: largeStringWriter + * Integer.MAX_VALUE: largeByteWriter == 0: no bytes or characters, so they're not stored anywhere otherwise < 0: + * charWriter (the number of chars is the negative of this value) otherwise >= 0 : byteWriter (the number of bytes + * is equal to this value) + */ + private final QueueWriter.IntWriter controlWriter; + /** + * Byte sequences < DENSE_THRESHOLD are compactly stored here + */ + private final QueueWriter.ByteWriter byteWriter; + /** + * Char sequences < DENSE_THRESHOLD are compactly stored here + */ + private final QueueWriter.CharWriter charWriter; + /** + * Byte sequences >= DENSE_THRESHOLD are stored here + */ + private final QueueWriter.ByteArrayWriter largeByteArrayWriter; + /** + * Strings >= DENSE_THRESHOLD are stored here + */ + private final QueueWriter.CharArrayWriter largeCharArrayWriter; + + public DenseStorageWriter() { + final int blobSize = DenseStorageConstants.LARGE_THRESHOLD * 1024; + this.controlWriter = new QueueWriter.IntWriter(100_000); // biggish + this.byteWriter = new QueueWriter.ByteWriter(blobSize); + this.charWriter = new QueueWriter.CharWriter(blobSize); + this.largeByteArrayWriter = new QueueWriter.ByteArrayWriter(100_000); + this.largeCharArrayWriter = new QueueWriter.CharArrayWriter(100_100); + } + + public DenseStorageReader newReader() { + return new DenseStorageReader( + controlWriter.newReader(), + byteWriter.newReader(), + charWriter.newReader(), + largeByteArrayWriter.newReader(), + largeCharArrayWriter.newReader()); + + } + + public void append(CharSlice cs) { + boolean fctrl = false; + boolean fbyte = false; + boolean fchar = false; + boolean flbyte = false; + boolean flchar = false; + final int size = cs.size(); + final boolean isByteRepresentable = calcByteRepresentable(cs); + if (size >= DenseStorageConstants.LARGE_THRESHOLD) { + if (isByteRepresentable) { + final byte[] data = new byte[size]; + cs.copyTo(data, 0); + flbyte = largeByteArrayWriter.addByteArray(data); + fctrl = controlWriter.addInt(DenseStorageConstants.LARGE_BYTE_SENTINEL); + } else { + final char[] data = new char[size]; + cs.copyTo(data, 0); + flchar = largeCharArrayWriter.addCharArray(data); + fctrl = controlWriter.addInt(DenseStorageConstants.LARGE_STRING_SENTINEL); + } + } else { + // size < DenseStorageConstants.LARGE_THRESHOLD + if (isByteRepresentable) { + fbyte = byteWriter.addBytesFromCharSlice(cs); + fctrl = controlWriter.addInt(size); + } else { + fchar = charWriter.addChars(cs); + fctrl = controlWriter.addInt(-size); + } + } + // If any flushed, then flush them all. But try to be nice avoid double-flushing. This is annoying. + if (!fctrl && !fbyte && !fchar && !flbyte && !flchar) { + return; + } + if (!fctrl) { + controlWriter.flush(); + } + if (!fbyte) { + byteWriter.flush(); + } + if (!fchar) { + charWriter.flush(); + } + if (!flbyte) { + largeByteArrayWriter.flush(); + } + if (!flbyte) { + largeCharArrayWriter.flush(); + } + } + + public void finish() { + controlWriter.finish(); + byteWriter.finish(); + charWriter.finish(); + largeByteArrayWriter.finish(); + largeCharArrayWriter.finish(); + } + + private static boolean calcByteRepresentable(CharSlice cs) { + for (int cur = cs.begin(); cur != cs.end(); ++cur) { + final int ch = cs.data()[cur]; + if (ch > 0xff) { + return false; + } + } + return true; + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/densestorage/QueueNode.java b/extensions/csv/src/main/java/io/deephaven/csv/densestorage/QueueNode.java new file mode 100644 index 00000000000..b231cb7ca07 --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/densestorage/QueueNode.java @@ -0,0 +1,17 @@ +package io.deephaven.csv.densestorage; + +public final class QueueNode { + public final TARRAY data; + public final int begin; + public final int end; + public final boolean isLast; + public QueueNode next; + + public QueueNode(TARRAY data, int begin, int end, boolean isLast) { + this.data = data; + this.begin = begin; + this.end = end; + this.isLast = isLast; + this.next = null; + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/densestorage/QueueReader.java b/extensions/csv/src/main/java/io/deephaven/csv/densestorage/QueueReader.java new file mode 100644 index 00000000000..d40c1941eb6 --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/densestorage/QueueReader.java @@ -0,0 +1,211 @@ +package io.deephaven.csv.densestorage; + +import io.deephaven.csv.containers.ByteSlice; +import io.deephaven.csv.containers.CharSlice; + +public class QueueReader { + private final Object sync; + private QueueNode node; + protected TARRAY genericBlock; + protected int current; + protected int end; + + protected QueueReader(Object sync, QueueNode node) { + this.sync = sync; + this.node = node; + this.genericBlock = null; + this.current = 0; + this.end = 0; + } + + /** + * This method exists as a helper method for a subclass' tryGetXXX method. A typical implementation is in + * CharReader: + * + * if (current + size > end) { + * if (!tryRefill(size)) { + * return false; + * } + * typedBlock = genericBlock; + * } + * + * The "if" in the caller is actually checking for two cases in a single comparison. One is a normal "buffer empty, + * needs to be refilled" case. The other is a bad "something went terribly wrong" case. + * + * Case 1: The buffer is empty. Then current == end, and therefore current + size > end (given size > 0, which it + * always is) Then we would refill the buffer and proceed. + * + * Case 2: The buffer isn't empty, but size goes beyond the end of the block. Then current < end but current + size + * > end. The caller detects this (with a single if) and then we look for this case in the first line of our method. + */ + protected boolean tryRefill(int size) { + if (current != end) { + throw new RuntimeException("Programming error: slice straddled block"); + } + while (current == end) { + if (node.isLast) { + // Hygeine + node = null; + genericBlock = null; + current = 0; + end = 0; + return false; + } + synchronized (sync) { + while (node.next == null) { + catchyWait(sync); + } + node = node.next; + genericBlock = node.data; + current = node.begin; + end = node.end; + } + } + if (end - current < size) { + throw new RuntimeException(String.format("Got short block: expected %d, got %d", + size, end - current)); + } + return true; + } + + /** + * Call Object.wait() but suppress the need to deal with checked InterruptedExceptions. + */ + private static void catchyWait(Object o) { + try { + o.wait(); + } catch (InterruptedException ie) { + throw new RuntimeException("Thread interrupted - can't happen"); + } + } + + public static final class CharReader extends QueueReader { + private char[] typedBlock; + + public CharReader(Object sync, QueueNode head) { + super(sync, head); + } + + /** + * Tries to get the next CharSlice from the reader. + * + * @param cs The result, modified in place. + * @return true If the next CharSlice was successfully read; false if the end of input was reached. + */ + public boolean tryGetChars(int size, CharSlice cs) { + if (current + size > end) { + if (!tryRefill(size)) { + return false; + } + typedBlock = genericBlock; + } + cs.reset(typedBlock, current, current + size); + current += size; + return true; + } + } + + public static final class ByteReader extends QueueReader { + private byte[] typedBlock; + + public ByteReader(Object sync, QueueNode head) { + super(sync, head); + } + + /** + * Tries to get the next ByteSlice from the reader. + * + * @param bs The result, modified in place. + * @return true If the next ByteSlice was successfully read; false if the end of input was reached. + */ + public boolean tryGetBytes(int size, ByteSlice bs) { + if (current + size > end) { + if (!tryRefill(size)) { + return false; + } + typedBlock = genericBlock; + } + bs.reset(typedBlock, current, current + size); + current += size; + return true; + } + } + + public static final class IntReader extends QueueReader { + private int[] typedBlock; + + public IntReader(Object sync, QueueNode head) { + super(sync, head); + } + + /** + * Tries to get the next integer from the reader. + * + * @param result An int array used as an out parameter. If this method returns true, result[0] will hold the + * value that was read. + * @return true if the next value was successfully read; false if the end of input was reached. + */ + public boolean tryGetInt(int[] result) { + if (current == end) { + if (!tryRefill(1)) { + return false; + } + typedBlock = genericBlock; + } + result[0] = typedBlock[current++]; + return true; + } + } + + public static final class ByteArrayReader extends QueueReader { + private byte[][] typedBlock; + + public ByteArrayReader(Object sync, QueueNode head) { + super(sync, head); + } + + /** + * Tries to get the next ByteSlice from the reader. + * + * @param bs The result, modified in place. + * @return true If the next ByteSlice was successfully read; false if the end of input was reached. + */ + public boolean tryGetBytes(ByteSlice bs) { + if (current == end) { + if (!tryRefill(1)) { + return false; + } + typedBlock = genericBlock; + } + final byte[] data = typedBlock[current++]; + bs.reset(data, 0, data.length); + return true; + } + } + + public static final class CharArrayReader extends QueueReader { + private char[][] typedBlock; + + public CharArrayReader(Object sync, QueueNode head) { + super(sync, head); + } + + /** + * Tries to get the next CharSlice from the reader. + * + * @param cs The result, modified in place. + * @return true If the next CharSlice was successfully read; false if the end of input was reached. + */ + public boolean tryGetChars(CharSlice cs) { + if (current == end) { + if (!tryRefill(1)) { + return false; + } + typedBlock = genericBlock; + } + final char[] data = typedBlock[current++]; + cs.reset(data, 0, data.length); + return true; + } + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/densestorage/QueueWriter.java b/extensions/csv/src/main/java/io/deephaven/csv/densestorage/QueueWriter.java new file mode 100644 index 00000000000..8b86c668c89 --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/densestorage/QueueWriter.java @@ -0,0 +1,203 @@ +package io.deephaven.csv.densestorage; + +import io.deephaven.csv.containers.CharSlice; + +import java.util.function.BiFunction; +import java.util.function.IntFunction; + +public class QueueWriter { + private final Object sync; + private QueueNode tail; + protected final int blobSize; + private final IntFunction arrayFactory; + private final BiFunction, TREADER> readerFactory; + private boolean allowReaderCreation; + private TARRAY genericBlock; + protected int begin; + protected int current; + protected int end; + + protected QueueWriter(int blobSize, + IntFunction arrayFactory, + BiFunction, TREADER> readerFactory) { + this.sync = new Object(); + // Placeholder object at head of linked list + this.tail = new QueueNode<>(null, 0, 0, false); + this.blobSize = blobSize; + this.arrayFactory = arrayFactory; + this.readerFactory = readerFactory; + this.allowReaderCreation = true; + this.genericBlock = null; + this.begin = 0; + this.current = 0; + this.end = 0; + } + + public void finish() { + flush(true); + genericBlock = null; // hygeine + begin = 0; + current = 0; + end = 0; + } + + public TREADER newReader() { + if (!allowReaderCreation) { + throw new RuntimeException("Must allocate readers before writing any data"); + } + return readerFactory.apply(sync, tail); + } + + /** + * This supports an "early flush" for callers like DenseStorageWriter who want to flush all their queues from time + * to time. + */ + public void flush() { + flush(false); + } + + /** + * Flush can be called at any time... when the block is empty (and hence nothing to flush), when there's some data, + * or when the data is full. + * + * @param isLast Whether this is the last node in the linked list. + */ + private void flush(boolean isLast) { + // Sometimes our users ask us to flush even if there is nothing to flush. + // We need to flush "isLast" blocks (whether or not they contain data) and + // we need to flush blocks containing data. We don't need to flush empty blocks. + if (!isLast && (begin == end)) { + // No need to flush. + return; + } + + // No more creating readers after the first flush. + allowReaderCreation = false; + + final QueueNode newBlob = new QueueNode<>(genericBlock, begin, current, isLast); + begin = current; + synchronized (sync) { + tail.next = newBlob; + tail = newBlob; + sync.notifyAll(); + } + } + + protected final TARRAY flushAndAllocate(int additional) { + flush(false); + final int capacity = Math.max(blobSize, additional); + genericBlock = arrayFactory.apply(capacity); + begin = 0; + current = 0; + end = capacity; + return genericBlock; + } + + public static final class CharWriter extends QueueWriter { + private char[] block = null; + + public CharWriter(int blobSize) { + super(blobSize, char[]::new, QueueReader.CharReader::new); + } + + /** + * @return true iff the add caused a flush to happen, false otherwise. + */ + public boolean addChars(CharSlice cs) { + final int sliceSize = cs.size(); + final boolean flushHappened = current + sliceSize > end; + if (flushHappened) { + block = flushAndAllocate(sliceSize); + } + cs.copyTo(block, current); + current += sliceSize; + return flushHappened; + } + } + + public static final class ByteWriter extends QueueWriter { + private byte[] block = null; + + public ByteWriter(int blobSize) { + super(blobSize, byte[]::new, QueueReader.ByteReader::new); + } + + /** + * Add bytes from a CharSlice to the queue. The conversion from char to byte + * is provided automatically as a convenience. The caller needs to ensure + * that the characters in the CharSlice are within the range of a byte. + * @return true iff the add caused a flush to happen, false otherwise. + */ + public boolean addBytesFromCharSlice(CharSlice cs) { + final int sliceSize = cs.size(); + final boolean flushHappened = current + sliceSize > end; + if (flushHappened) { + block = flushAndAllocate(sliceSize); + } + cs.copyTo(block, current); + current += sliceSize; + return flushHappened; + } + } + + public static final class IntWriter extends QueueWriter { + private int[] block = null; + + public IntWriter(int blobSize) { + super(blobSize, int[]::new, QueueReader.IntReader::new); + } + + /** + * @return true iff the add caused a flush to happen, false otherwise. + */ + public boolean addInt(int value) { + final boolean flushHappened = current == end; + if (flushHappened) { + block = flushAndAllocate(1); + } + block[current++] = value; + return flushHappened; + } + } + + + public static final class ByteArrayWriter extends QueueWriter { + private byte[][] block = null; + + public ByteArrayWriter(int blobSize) { + super(blobSize, byte[][]::new, QueueReader.ByteArrayReader::new); + } + + /** + * @return true iff the add caused a flush to happen, false otherwise. + */ + public boolean addByteArray(byte[] value) { + final boolean flushHappened = current == end; + if (flushHappened) { + block = flushAndAllocate(1); + } + block[current++] = value; + return flushHappened; + } + } + + public static final class CharArrayWriter extends QueueWriter { + private char[][] block = null; + + public CharArrayWriter(int blobSize) { + super(blobSize, char[][]::new, QueueReader.CharArrayReader::new); + } + + /** + * @return true iff the add caused a flush to happen, false otherwise. + */ + public boolean addCharArray(char[] value) { + final boolean flushHappened = current == end; + if (flushHappened) { + block = flushAndAllocate(1); + } + block[current++] = value; + return flushHappened; + } + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/parsers/BooleanParser.java b/extensions/csv/src/main/java/io/deephaven/csv/parsers/BooleanParser.java new file mode 100644 index 00000000000..3a47b4506ad --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/parsers/BooleanParser.java @@ -0,0 +1,51 @@ +package io.deephaven.csv.parsers; + +import io.deephaven.csv.sinks.Sink; +import io.deephaven.csv.tokenization.Tokenizer; +import io.deephaven.csv.parsers.context.ParseContext; + +public final class BooleanParser extends ParserBase { + public static BooleanParser INSTANCE = new BooleanParser(); + + private BooleanParser() {} + + @Override + public Sink tryParse(ParseContext ctx, IteratorHolder ih, IteratorHolder ihAlt) { + return twoPhaseDriver(ctx, ih, ihAlt, BooleanParser::tryParseHelper, + ctx.sinkFactory::makeBooleanAsByteSink); + } + + private static boolean tryParseHelper(ParseContext ctx, IteratorHolder ih, + Sink sink, long current, long end) { + final byte[] chunk = new byte[DEST_BLOCK_SIZE]; + final boolean[] boolHolder = new boolean[1]; + final Tokenizer t = ctx.tokenizer; + final Byte nullValue = ctx.sentinelConfiguration.nullBooleanAsByteValue; + + int chunkIndex = 0; + do { + if (chunkIndex == chunk.length) { + sink.write(chunk, 0, current, chunkIndex); + current += chunkIndex; + chunkIndex = 0; + } + if (current + chunkIndex == end) { + break; + } + if (ih.isNullCell()) { + chunk[chunkIndex++] = assertHasNullValue(nullValue); + continue; + } + if (!ih.hasBytes()) { + return false; + } + if (!t.tryParseBoolean(ih.bs(), boolHolder)) { + return false; + } + ctx.isNullOrWidthOneSoFar = false; + chunk[chunkIndex++] = boolHolder[0] ? (byte)1 : (byte)0; + } while (ih.tryMoveNext()); + sink.write(chunk, 0, current, chunkIndex); + return true; + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/parsers/ByteParser.java b/extensions/csv/src/main/java/io/deephaven/csv/parsers/ByteParser.java new file mode 100644 index 00000000000..d00c8dccfc8 --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/parsers/ByteParser.java @@ -0,0 +1,61 @@ +package io.deephaven.csv.parsers; + +import io.deephaven.csv.sinks.Sink; +import io.deephaven.csv.tokenization.RangeTests; +import io.deephaven.csv.tokenization.Tokenizer; +import io.deephaven.csv.parsers.context.ParseContext; + +public final class ByteParser extends ParserBase { + public static final ByteParser INSTANCE = new ByteParser(); + + private ByteParser() {} + + @Override + public Sink tryParse(ParseContext ctx, IteratorHolder ih, IteratorHolder ihAlt) { + return twoPhaseDriver(ctx, ih, ihAlt, ByteParser::tryParseHelper, ctx.sinkFactory::makeByteSink); + } + + private static boolean tryParseHelper(ParseContext ctx, IteratorHolder ih, + Sink sink, long current, long end) { + final byte[] chunk = new byte[DEST_BLOCK_SIZE]; + final long[] longHolder = new long[1]; + final Tokenizer t = ctx.tokenizer; + final Byte nullValue = ctx.sentinelConfiguration.nullByteValue; + + int chunkIndex = 0; + do { + if (chunkIndex == chunk.length) { + sink.write(chunk, 0, current, chunkIndex); + current += chunkIndex; + chunkIndex = 0; + } + if (current + chunkIndex == end) { + break; + } + if (ih.isNullCell()) { + chunk[chunkIndex++] = assertHasNullValue(nullValue); + continue; + } + if (!ih.hasBytes()) { + return false; + } + if (!t.tryParseLong(ih.bs(), longHolder)) { + return false; + } + final long value = longHolder[0]; + if (!RangeTests.isInRangeForByte(value)) { + return false; + } + if (nullValue != null && value == nullValue) { + // If a sentinel null value is defined, it cannot be present in the input. + return false; + } + if (ih.bs().size() != 1) { + ctx.isNullOrWidthOneSoFar = false; + } + chunk[chunkIndex++] = (byte) value; + } while (ih.tryMoveNext()); + sink.write(chunk, 0, current, chunkIndex); + return true; + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/parsers/CharParser.java b/extensions/csv/src/main/java/io/deephaven/csv/parsers/CharParser.java new file mode 100644 index 00000000000..d73b5bad24b --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/parsers/CharParser.java @@ -0,0 +1,54 @@ +package io.deephaven.csv.parsers; + +import io.deephaven.csv.sinks.Sink; +import io.deephaven.csv.parsers.context.ParseContext; + +public final class CharParser extends ParserBase { + public static final CharParser INSTANCE = new CharParser(); + + private CharParser() { + } + + @Override + public Sink tryParse(ParseContext ctx, IteratorHolder ih, IteratorHolder ihAlt) { + return twoPhaseDriver(ctx, ih, ihAlt, CharParser::tryParseHelper, + ctx.sinkFactory::makeCharSink); + } + + private static boolean tryParseHelper(ParseContext ctx, IteratorHolder ih, + Sink sink, + long current, long end) { + if (!ctx.isNullOrWidthOneSoFar) { + return false; + } + final char[] chunk = new char[DEST_BLOCK_SIZE]; + final Character nullValue = ctx.sentinelConfiguration.nullCharValue; + + int chunkIndex = 0; + do { + if (chunkIndex == chunk.length) { + sink.write(chunk, 0, current, chunkIndex); + current += chunkIndex; + chunkIndex = 0; + } + if (current + chunkIndex == end) { + break; + } + if (ih.isNullCell()) { + chunk[chunkIndex++] = assertHasNullValue(nullValue); + continue; + } + if (ih.sliceSize() > 1) { + return false; + } + final char value = ih.hasBytes() ? (char) ih.bs().front() : ih.cs().front(); + if (nullValue != null && value == nullValue) { + // If a sentinel null value is defined, it cannot be present in the input. + return false; + } + chunk[chunkIndex++] = value; + } while (ih.tryMoveNext()); + sink.write(chunk, 0, current, chunkIndex); + return true; + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/parsers/DateTimeAsLongParser.java b/extensions/csv/src/main/java/io/deephaven/csv/parsers/DateTimeAsLongParser.java new file mode 100644 index 00000000000..24bc8a798fc --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/parsers/DateTimeAsLongParser.java @@ -0,0 +1,56 @@ +package io.deephaven.csv.parsers; + +import io.deephaven.csv.sinks.Sink; +import io.deephaven.csv.tokenization.Tokenizer; +import io.deephaven.csv.parsers.context.ParseContext; + +public final class DateTimeAsLongParser extends ParserBase { + public static final DateTimeAsLongParser INSTANCE = new DateTimeAsLongParser(); + + private DateTimeAsLongParser() {} + + @Override + public Sink tryParse(ParseContext ctx, IteratorHolder ih, IteratorHolder ihAlt) { + return twoPhaseDriver(ctx, ih, ihAlt, DateTimeAsLongParser::tryParseHelper, + ctx.sinkFactory::makeDateTimeAsLongSink); + } + + private static boolean tryParseHelper(ParseContext ctx, IteratorHolder ih, + Sink sink, long current, long end) { + final long[] chunk = new long[DEST_BLOCK_SIZE]; + final long[] dateTimeAsLongHolder = new long[1]; + final Tokenizer t = ctx.tokenizer; + final Long nullValue = ctx.sentinelConfiguration.nullDateTimeAsLongValue; + + int chunkIndex = 0; + do { + if (chunkIndex == chunk.length) { + sink.write(chunk, 0, current, chunkIndex); + current += chunkIndex; + chunkIndex = 0; + } + if (current + chunkIndex == end) { + break; + } + if (ih.isNullCell()) { + chunk[chunkIndex++] = assertHasNullValue(nullValue); + continue; + } + if (!ih.hasBytes()) { + return false; + } + if (!t.tryParseDBDateTime(ih.bs(), dateTimeAsLongHolder)) { + return false; + } + final long value = dateTimeAsLongHolder[0]; + if (nullValue != null && value == nullValue) { + // If a sentinel null value is defined, it cannot be present in the input. + return false; + } + ctx.isNullOrWidthOneSoFar = false; // No valid DBDateTime is 1 character wide + chunk[chunkIndex++] = value; + } while (ih.tryMoveNext()); + sink.write(chunk, 0, current, chunkIndex); + return true; + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/parsers/DoubleParser.java b/extensions/csv/src/main/java/io/deephaven/csv/parsers/DoubleParser.java new file mode 100644 index 00000000000..151175b2669 --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/parsers/DoubleParser.java @@ -0,0 +1,58 @@ +package io.deephaven.csv.parsers; + +import io.deephaven.csv.sinks.Sink; +import io.deephaven.csv.tokenization.Tokenizer; +import io.deephaven.csv.parsers.context.ParseContext; + +public final class DoubleParser extends ParserBase { + public static final DoubleParser INSTANCE = new DoubleParser(); + + private DoubleParser() {} + + @Override + public Sink tryParse(ParseContext ctx, IteratorHolder ih, IteratorHolder ihAlt) { + return twoPhaseDriver(ctx, ih, ihAlt, DoubleParser::tryParseHelper, ctx.sinkFactory::makeDoubleSink); + } + + private static boolean tryParseHelper(ParseContext ctx, IteratorHolder ih, Sink sink, + long current, long end) { + final double[] chunk = new double[DEST_BLOCK_SIZE]; + final double[] doubleHolder = new double[1]; + final Tokenizer t = ctx.tokenizer; + final Double nullValue = ctx.sentinelConfiguration.nullDoubleValue; + + int chunkIndex = 0; + do { + if (chunkIndex == chunk.length) { + sink.write(chunk, 0, current, chunkIndex); + current += chunkIndex; + chunkIndex = 0; + } + if (current + chunkIndex == end) { + break; + } + if (ih.isNullCell()) { + chunk[chunkIndex++] = assertHasNullValue(nullValue); + continue; + } + if (!ih.hasBytes()) { + return false; + } + if (!t.tryParseDouble(ih.bs(), doubleHolder)) { + return false; + } + final double value = doubleHolder[0]; + if (nullValue != null && value == nullValue) { + // If a sentinel null value is defined, it cannot be present in the input. + return false; + } + if (ih.bs().size() > 1) { + // Not an error, but needed in case we eventually fall back to char. + ctx.isNullOrWidthOneSoFar = false; + } + chunk[chunkIndex++] = value; + } while (ih.tryMoveNext()); + sink.write(chunk, 0, current, chunkIndex); + return true; + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/parsers/FloatParser.java b/extensions/csv/src/main/java/io/deephaven/csv/parsers/FloatParser.java new file mode 100644 index 00000000000..54d8efc857a --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/parsers/FloatParser.java @@ -0,0 +1,70 @@ +package io.deephaven.csv.parsers; + +import io.deephaven.csv.sinks.Sink; +import io.deephaven.csv.tokenization.Tokenizer; +import io.deephaven.csv.parsers.context.ParseContext; +import io.deephaven.csv.tokenization.RangeTests; + +public final class FloatParser extends ParserBase { + public static final FloatParser INSTANCE = new FloatParser(); + + private FloatParser() {} + + @Override + public Sink tryParse(ParseContext ctx, IteratorHolder ih, IteratorHolder ihAlt) { + return twoPhaseDriver(ctx, ih, ihAlt, FloatParser::tryParseFloatsHelper, ctx.sinkFactory::makeFloatSink); + } + + private static boolean tryParseFloatsHelper(ParseContext ctx, IteratorHolder ih, + Sink sink, long current, long end) { + if (!ctx.hasFewerThan8SigFigsSoFar) { + return false; + } + final float[] chunk = new float[DEST_BLOCK_SIZE]; + final double[] doubleHolder = new double[1]; + final Tokenizer t = ctx.tokenizer; + final Float nullValue = ctx.sentinelConfiguration.nullFloatValue; + + int chunkIndex = 0; + do { + if (chunkIndex == chunk.length) { + sink.write(chunk, 0, current, chunkIndex); + current += chunkIndex; + chunkIndex = 0; + } + if (current + chunkIndex == end) { + break; + } + if (ih.isNullCell()) { + chunk[chunkIndex++] = assertHasNullValue(nullValue); + continue; + } + if (!ih.hasBytes()) { + return false; + } + if (!t.tryParseDouble(ih.bs(), doubleHolder)) { + return false; + } + final double value = doubleHolder[0]; + if (!RangeTests.isInRangeForFloat(value)) { + return false; + } + if (nullValue != null && value == nullValue) { + // If a sentinel null value is defined, it cannot be present in the input. + return false; + } + if (ctx.doubleParserIsAvailable && !t.hasFewerThan8SignificantFigures(ih.bs())) { + // Lots of significant figures, so fall back to double parsing (but only + // if a double parse is available). + return false; + } + if (ih.bs().size() > 1) { + // Not an error, but needed in case we eventually fall back to char. + ctx.isNullOrWidthOneSoFar = false; + } + chunk[chunkIndex++] = (float)value; + } while (ih.tryMoveNext()); + sink.write(chunk, 0, current, chunkIndex); + return true; + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/parsers/IntParser.java b/extensions/csv/src/main/java/io/deephaven/csv/parsers/IntParser.java new file mode 100644 index 00000000000..5903f8efdbf --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/parsers/IntParser.java @@ -0,0 +1,67 @@ +package io.deephaven.csv.parsers; + +import io.deephaven.csv.parsers.context.ParseContext; +import io.deephaven.csv.sinks.Sink; +import io.deephaven.csv.tokenization.RangeTests; +import io.deephaven.csv.tokenization.Tokenizer; + +public final class IntParser extends ParserBase { + public static final IntParser INSTANCE = new IntParser(); + + private IntParser() {} + + @Override + public Sink tryParse(ParseContext ctx, IteratorHolder ih, IteratorHolder ihAlt) { + return twoPhaseDriver(ctx, ih, ihAlt, IntParser::tryParseHelper, + ctx.sinkFactory::makeIntSink); + } + + private static boolean tryParseHelper(ParseContext ctx, IteratorHolder ih, Sink sink, + long current, long end) { + final int[] chunk = new int[DEST_BLOCK_SIZE]; + final long[] longHolder = new long[1]; + final Tokenizer t = ctx.tokenizer; + final Integer nullValue = ctx.sentinelConfiguration.nullIntValue; + + int chunkIndex = 0; + do { + if (chunkIndex == chunk.length) { + sink.write(chunk, 0, current, chunkIndex); + current += chunkIndex; + chunkIndex = 0; + } + if (current + chunkIndex == end) { + break; + } + if (ih.isNullCell()) { + chunk[chunkIndex++] = assertHasNullValue(nullValue); + continue; + } + if (!ih.hasBytes()) { + return false; + } + if (!t.tryParseLong(ih.bs(), longHolder)) { + return false; + } + final long value = longHolder[0]; + if (!RangeTests.isInRangeForInt(value)) { + return false; + } + if (nullValue != null && value == nullValue) { + // If a sentinel null value is defined, it cannot be present in the input. + return false; + } + if (value < -9_999_999 || value > 9_999_999) { + // Not an error, but needed in case we eventually fall back to float. + ctx.hasFewerThan8SigFigsSoFar = false; + } + if (ih.bs().size() > 1) { + // Not an error, but needed in case we eventually fall back to char. + ctx.isNullOrWidthOneSoFar = false; + } + chunk[chunkIndex++] = (int) value; + } while (ih.tryMoveNext()); + sink.write(chunk, 0, current, chunkIndex); + return true; + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/parsers/IteratorHolder.java b/extensions/csv/src/main/java/io/deephaven/csv/parsers/IteratorHolder.java new file mode 100644 index 00000000000..74f84bfd541 --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/parsers/IteratorHolder.java @@ -0,0 +1,88 @@ +package io.deephaven.csv.parsers; + +import io.deephaven.csv.densestorage.DenseStorageReader; +import io.deephaven.csv.containers.ByteSlice; +import io.deephaven.csv.containers.CharSlice; + +public final class IteratorHolder { + private final DenseStorageReader dsr; + /** + * Storage for our reusable byte slice. Data inside it is valid after a call to tryMoveNext() returns true, where + * hasBytes has been set to true. + */ + private final ByteSlice bs = new ByteSlice(); + /** + * Storage for our reusable char slice. Data inside it is valid after a call to tryMoveNext() returns true, where + * hasBytes has been set to false. + */ + private final CharSlice cs = new CharSlice(); + /** + * Storage for reusable out parameters. + */ + private final boolean[] booleanHolder = new boolean[1]; + /** + * Number of successful calls so far to tryMoveNext (i.e. those that returned true). + */ + private long numConsumed = 0; + /** + * Valid only after a call to tryMoveNext() returns true. Contains true if 'bs' is valid; otherwise, contains false + * if 'cs' is valid. + */ + private boolean hasBytes = false; + /** + * Valid anytime after the first call to tryMoveNext(), but not before. + */ + private boolean isExhausted = false; + + public IteratorHolder(DenseStorageReader dsr) { + this.dsr = dsr; + } + + public boolean tryMoveNext() { + if (!dsr.tryGetNextSlice(bs, cs, booleanHolder)) { + isExhausted = true; + return false; + } + hasBytes = booleanHolder[0]; + ++numConsumed; + return true; + } + + public void mustMoveNext() { + if (tryMoveNext()) { + return; + } + throw new RuntimeException("Iteration ended unexpectedly."); + } + + public String sliceToString() { + return hasBytes ? bs.toString() : cs.toString(); + } + + // TODO(kosak): move this somewhere logical + public boolean isNullCell() { + return sliceSize() == 0; + } + + public int sliceSize() { + return hasBytes ? bs.size() : cs.size(); + } + + public boolean hasBytes() { + return hasBytes; + } + + public ByteSlice bs() { + return bs; + } + + public CharSlice cs() { + return cs; + } + + public long numConsumed() { + return numConsumed; + } + + public boolean isExhausted() { return isExhausted; } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/parsers/LongParser.java b/extensions/csv/src/main/java/io/deephaven/csv/parsers/LongParser.java new file mode 100644 index 00000000000..cca2b2d03c6 --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/parsers/LongParser.java @@ -0,0 +1,62 @@ +package io.deephaven.csv.parsers; + +import io.deephaven.csv.sinks.Sink; +import io.deephaven.csv.tokenization.Tokenizer; +import io.deephaven.csv.parsers.context.ParseContext; + +public final class LongParser extends ParserBase { + public static final LongParser INSTANCE = new LongParser(); + + private LongParser() { + } + + public Sink tryParse(ParseContext ctx, IteratorHolder ih, IteratorHolder ihAlt) { + return twoPhaseDriver(ctx, ih, ihAlt, LongParser::tryParseHelper, ctx.sinkFactory::makeLongSink); + } + + private static boolean tryParseHelper(ParseContext ctx, IteratorHolder ih, + Sink sink, long current, long end) { + final long[] chunk = new long[DEST_BLOCK_SIZE]; + final long[] longHolder = new long[1]; + final Tokenizer t = ctx.tokenizer; + final Long nullValue = ctx.sentinelConfiguration.nullLongValue; + + int chunkIndex = 0; + do { + if (current == end) { + break; + } + if (chunkIndex == chunk.length) { + sink.write(chunk, 0, current, chunkIndex); + current += chunkIndex; + chunkIndex = 0; + } + if (ih.isNullCell()) { + chunk[chunkIndex++] = assertHasNullValue(nullValue); + continue; + } + if (!ih.hasBytes()) { + return false; + } + if (!t.tryParseLong(ih.bs(), longHolder)) { + return false; + } + final long value = longHolder[0]; + if (nullValue != null && value == nullValue) { + // If a sentinel null value is defined, it cannot be present in the input. + return false; + } + if (value < -9_999_999 || value > 9_999_999) { + // Not an error, but needed in case we eventually fall back to float. + ctx.hasFewerThan8SigFigsSoFar = false; + } + if (ih.bs().size() > 1) { + // Not an error, but needed in case we eventually fall back to char. + ctx.isNullOrWidthOneSoFar = false; + } + chunk[chunkIndex++] = value; + } while (ih.tryMoveNext()); + sink.write(chunk, 0, current, chunkIndex); + return true; + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/parsers/ParserBase.java b/extensions/csv/src/main/java/io/deephaven/csv/parsers/ParserBase.java new file mode 100644 index 00000000000..f7926e0a402 --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/parsers/ParserBase.java @@ -0,0 +1,75 @@ +package io.deephaven.csv.parsers; + +import io.deephaven.csv.parsers.context.ParseContext; +import io.deephaven.csv.sinks.Sink; + +import java.util.function.Supplier; + +public abstract class ParserBase { + protected static final int DEST_BLOCK_SIZE = 65536; + + public abstract Sink tryParse(ParseContext ctx, IteratorHolder ih, IteratorHolder ihAlt); + + protected Sink twoPhaseDriver(ParseContext ctx, IteratorHolder ih, IteratorHolder ihAlt, + ParseHelperInvoker parseHelperInvoker, Supplier> sinkFactory) { + if (ih.isExhausted()) { + // Column contains all nulls (or is empty) + final Sink sink = sinkFactory.get(); + if (ihAlt.tryMoveNext()) { + // Column is not empty, but contains all nulls + if (!parseHelperInvoker.apply(ctx, ihAlt, sink, 0, ih.numConsumed())) { + // TODO(kosak): better error here + throw new RuntimeException("Parse failed when scanning all-null column"); + } + } + return sink; + } + + // Lazy because the parser may fail right away and we don't want to needlessly create the target + // data structure. + final Lazy lazySink = new Lazy<>(sinkFactory); + + final long startRow = ih.numConsumed() - 1; + if (!parseHelperInvoker.apply(ctx, ih, lazySink, startRow, Long.MAX_VALUE)) { + return null; + } + + ihAlt.mustMoveNext(); + if (!parseHelperInvoker.apply(ctx, ihAlt, lazySink, 0, startRow)) { + // TODO(kosak): better error here + throw new RuntimeException("Parse failed on rescan"); + } + return lazySink.inner; + } + + protected static T assertHasNullValue(T boxedValue) { + if (boxedValue != null) { + return boxedValue; + } + // TODO(kosak): better exception. + throw new RuntimeException("Encountered a null cell but no null value was configured"); + } + + protected interface ParseHelperInvoker { + boolean apply(ParseContext ctx, IteratorHolder ih, Sink workingSink, + long current, long end); + } + + private static final class Lazy implements Sink { + private final Supplier> sinkFactory; + private Sink inner; + + public Lazy(Supplier> sinkFactory) { + this.sinkFactory = sinkFactory; + inner = null; + } + + @Override + public void write(TARRAY src, int srcOffset, long destOffset, int size) { + if (inner == null) { + inner = sinkFactory.get(); + } + inner.write(src, srcOffset, destOffset, size); + } + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/parsers/Parsers.java b/extensions/csv/src/main/java/io/deephaven/csv/parsers/Parsers.java new file mode 100644 index 00000000000..f5d9ef0ea93 --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/parsers/Parsers.java @@ -0,0 +1,47 @@ +package io.deephaven.csv.parsers; + +import java.util.List; + +public class Parsers { + public static final BooleanParser BOOLEAN = BooleanParser.INSTANCE; + public static final ByteParser BYTE = ByteParser.INSTANCE; + public static final ShortParser SHORT = ShortParser.INSTANCE; + public static final IntParser INT = IntParser.INSTANCE; + public static final LongParser LONG = LongParser.INSTANCE; + public static final FloatParser FLOAT = FloatParser.INSTANCE; + public static final DoubleParser DOUBLE = DoubleParser.INSTANCE; + public static final DateTimeAsLongParser DBDATETIME = DateTimeAsLongParser.INSTANCE; + public static final CharParser CHAR = CharParser.INSTANCE; + public static final StringParser STRING = StringParser.INSTANCE; + public static final TimestampParser TIMESTAMP = TimestampParser.INSTANCE; + public static final TimestampMillisParser TIMESTAMP_MILLIS = TimestampMillisParser.INSTANCE; + public static final TimestampMicrosParser TIMESTAMP_MICROS = TimestampMicrosParser.INSTANCE; + public static final TimestampNanosParser TIMESTAMP_NANOS = TimestampNanosParser.INSTANCE; + + public static final List> PRECEDENCE = List.of( + BOOLEAN, + BYTE, + SHORT, + INT, + LONG, + TIMESTAMP, + TIMESTAMP_MILLIS, + TIMESTAMP_MICROS, + TIMESTAMP_NANOS, + FLOAT, + DOUBLE, + DBDATETIME, + CHAR, + STRING); + + public static final List> DEFAULT = List.of( + BOOLEAN, + SHORT, + INT, + LONG, + FLOAT, + DOUBLE, + DBDATETIME, + CHAR, + STRING); +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/parsers/ShortParser.java b/extensions/csv/src/main/java/io/deephaven/csv/parsers/ShortParser.java new file mode 100644 index 00000000000..c98f7a8527b --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/parsers/ShortParser.java @@ -0,0 +1,62 @@ +package io.deephaven.csv.parsers; + +import io.deephaven.csv.sinks.Sink; +import io.deephaven.csv.tokenization.RangeTests; +import io.deephaven.csv.tokenization.Tokenizer; +import io.deephaven.csv.parsers.context.ParseContext; + +public final class ShortParser extends ParserBase { + public static final ShortParser INSTANCE = new ShortParser(); + + private ShortParser() { + } + + @Override + public Sink tryParse(ParseContext ctx, IteratorHolder ih, IteratorHolder ihAlt) { + return twoPhaseDriver(ctx, ih, ihAlt, ShortParser::tryParseHelper, ctx.sinkFactory::makeShortSink); + } + + private static boolean tryParseHelper( + ParseContext ctx, IteratorHolder ih, Sink sink, long current, long end) { + final short[] chunk = new short[DEST_BLOCK_SIZE]; + final long[] longHolder = new long[1]; + final Tokenizer t = ctx.tokenizer; + final Short nullValue = ctx.sentinelConfiguration.nullShortValue; + + int chunkIndex = 0; + do { + if (chunkIndex == chunk.length) { + sink.write(chunk, 0, current, chunkIndex); + current += chunkIndex; + chunkIndex = 0; + } + if (current + chunkIndex == end) { + break; + } + if (ih.isNullCell()) { + chunk[chunkIndex++] = assertHasNullValue(nullValue); + continue; + } + if (!ih.hasBytes()) { + return false; + } + if (!t.tryParseLong(ih.bs(), longHolder)) { + return false; + } + final long value = longHolder[0]; + if (!RangeTests.isInRangeForShort(value)) { + return false; + } + if (nullValue != null && value == nullValue) { + // If a sentinel null value is defined, it cannot be present in the input. + return false; + } + if (ih.bs().size() > 1) { + ctx.isNullOrWidthOneSoFar = false; + } + chunk[chunkIndex++] = (short) value; + } while (ih.tryMoveNext()); + sink.write(chunk, 0, current, chunkIndex); + return true; + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/parsers/StringParser.java b/extensions/csv/src/main/java/io/deephaven/csv/parsers/StringParser.java new file mode 100644 index 00000000000..213ac00af18 --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/parsers/StringParser.java @@ -0,0 +1,45 @@ +package io.deephaven.csv.parsers; + +import io.deephaven.csv.sinks.Sink; +import io.deephaven.csv.parsers.context.ParseContext; + +public final class StringParser extends ParserBase { + public static final StringParser INSTANCE = new StringParser(); + + private StringParser() {} + + @Override + public Sink tryParse(ParseContext ctx, IteratorHolder ih, IteratorHolder ihAlt) { + return twoPhaseDriver(ctx, ih, ihAlt, StringParser::tryParseStringsHelper, ctx.sinkFactory::makeStringSink); + } + + private static boolean tryParseStringsHelper(ParseContext ctx, IteratorHolder ih, + Sink sink, long current, long end) { + final String[] chunk = new String[DEST_BLOCK_SIZE]; + final String[] nullValue = ctx.sentinelConfiguration.nullStringValue; + + int chunkIndex = 0; + do { + if (chunkIndex == chunk.length) { + sink.write(chunk, 0, current, chunkIndex); + current += chunkIndex; + chunkIndex = 0; + } + if (current + chunkIndex == end) { + break; + } + if (ih.isNullCell()) { + chunk[chunkIndex++] = assertHasNullValue(nullValue)[0]; + continue; + } + final String value = ih.sliceToString(); + if (nullValue != null && value.equals(nullValue[0])) { + // If a sentinel null value is defined, it cannot be present in the input. + return false; + } + chunk[chunkIndex++] = value; + } while (ih.tryMoveNext()); + sink.write(chunk, 0, current, chunkIndex); + return true; + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/parsers/TimestampMicrosParser.java b/extensions/csv/src/main/java/io/deephaven/csv/parsers/TimestampMicrosParser.java new file mode 100644 index 00000000000..b7032bddc4f --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/parsers/TimestampMicrosParser.java @@ -0,0 +1,9 @@ +package io.deephaven.csv.parsers; + +public class TimestampMicrosParser extends TimestampParserBase { + public static final TimestampMicrosParser INSTANCE = new TimestampMicrosParser(); + + private TimestampMicrosParser() { + super(MICROSECOND_SCALE); + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/parsers/TimestampMillisParser.java b/extensions/csv/src/main/java/io/deephaven/csv/parsers/TimestampMillisParser.java new file mode 100644 index 00000000000..347aec4a0fe --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/parsers/TimestampMillisParser.java @@ -0,0 +1,9 @@ +package io.deephaven.csv.parsers; + +public class TimestampMillisParser extends TimestampParserBase { + public static final TimestampMillisParser INSTANCE = new TimestampMillisParser(); + + private TimestampMillisParser() { + super(MILLISECOND_SCALE); + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/parsers/TimestampNanosParser.java b/extensions/csv/src/main/java/io/deephaven/csv/parsers/TimestampNanosParser.java new file mode 100644 index 00000000000..ec2c8f97665 --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/parsers/TimestampNanosParser.java @@ -0,0 +1,9 @@ +package io.deephaven.csv.parsers; + +public class TimestampNanosParser extends TimestampParserBase { + public static final TimestampNanosParser INSTANCE = new TimestampNanosParser(); + + private TimestampNanosParser() { + super(NANOSECOND_SCALE); + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/parsers/TimestampParser.java b/extensions/csv/src/main/java/io/deephaven/csv/parsers/TimestampParser.java new file mode 100644 index 00000000000..25ffdf6021a --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/parsers/TimestampParser.java @@ -0,0 +1,12 @@ +package io.deephaven.csv.parsers; + +/** + * Seconds since Unix epoch. + */ +public final class TimestampParser extends TimestampParserBase { + public static final TimestampParser INSTANCE = new TimestampParser(); + + private TimestampParser() { + super(SECOND_SCALE); + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/parsers/TimestampParserBase.java b/extensions/csv/src/main/java/io/deephaven/csv/parsers/TimestampParserBase.java new file mode 100644 index 00000000000..258c0ec9031 --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/parsers/TimestampParserBase.java @@ -0,0 +1,82 @@ +package io.deephaven.csv.parsers; + +import io.deephaven.csv.sinks.Sink; +import io.deephaven.csv.tokenization.Tokenizer; +import io.deephaven.csv.parsers.context.ParseContext; + +public abstract class TimestampParserBase extends ParserBase { + protected static final long SECOND_SCALE = 1_000_000_000; + protected static final long MILLISECOND_SCALE = 1_000_000; + protected static final long MICROSECOND_SCALE = 1_000; + protected static final long NANOSECOND_SCALE = 1; + + private final long scale; + private final long minValue; + private final long maxValue; + + /** + * @param scale: 1 for seconds, 1000 for millis, 1_000_000 for micros, 1_000_000_000 for nanos + */ + protected TimestampParserBase(long scale) { + this.scale = scale; + minValue = Long.MIN_VALUE / scale; + maxValue = Long.MAX_VALUE / scale; + } + + @Override + public final Sink tryParse(ParseContext ctx, IteratorHolder ih, IteratorHolder ihAlt) { + return twoPhaseDriver(ctx, ih, ihAlt, this::tryParseHelper, ctx.sinkFactory::makeDateTimeAsLongSink); + } + + private boolean tryParseHelper(ParseContext ctx, IteratorHolder ih, + Sink sink, long current, long end) { + final long[] chunk = new long[DEST_BLOCK_SIZE]; + final long[] longHolder = new long[1]; + final Tokenizer t = ctx.tokenizer; + final Long nullValue = ctx.sentinelConfiguration.nullDateTimeAsLongValue; + + int chunkIndex = 0; + do { + if (chunkIndex == chunk.length) { + sink.write(chunk, 0, current, chunkIndex); + current += chunkIndex; + chunkIndex = 0; + } + if (current + chunkIndex == end) { + break; + } + if (ih.isNullCell()) { + chunk[chunkIndex++] = assertHasNullValue(nullValue); + continue; + } + if (!ih.hasBytes()) { + return false; + } + if (!t.tryParseLong(ih.bs(), longHolder)) { + return false; + } + final long value = longHolder[0]; + if (value < minValue || value > maxValue) { + return false; + } + if (nullValue != null && value == nullValue) { + // If a sentinel null value is defined, it cannot be present in the input. + return false; + } + // TODO(kosak): this is a cheap test but is not really fair, as there are + // certainly *some* large longs (e.g. powers of two) that are easily + // representable as float. It may be good enough however, and may more closely + // represent the input's "intent". + if (value < -9_999_999 || value > 9_999_999) { + // Not an error, but needed in case we eventually fall back to float. + ctx.hasFewerThan8SigFigsSoFar = false; + } + if (ih.bs().size() > 1) { + ctx.isNullOrWidthOneSoFar = false; + } + chunk[chunkIndex++] = value * scale; + } while (ih.tryMoveNext()); + sink.write(chunk, 0, current, chunkIndex); + return true; + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/parsers/context/ParseContext.java b/extensions/csv/src/main/java/io/deephaven/csv/parsers/context/ParseContext.java new file mode 100644 index 00000000000..f2f8d242bed --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/parsers/context/ParseContext.java @@ -0,0 +1,46 @@ +package io.deephaven.csv.parsers.context; + +import io.deephaven.csv.sinks.SinkFactory; +import io.deephaven.csv.tokenization.Tokenizer; + +public final class ParseContext { + /** + * The Tokenizer is responsible for parsing entities like ints, doubles, supported DateTime formats, etc. + */ + public final Tokenizer tokenizer; + /** + * Caller-specified interface for making all the various Sink<TARRAY> types. + */ + public final SinkFactory sinkFactory; + /** + * The settings for the various null sentinels. + */ + public final SentinelConfiguration sentinelConfiguration; + /** + * This is a bit of a hack which allows the FloatParser to decide what to do when it encounters a value with more + * significant digits than would fit in a float. If it knows a double parser is available, the float parser will + * fail (and let the double parser handle the value). Otherwise (if a double parser is not available), it will just + * accept the number and truncate. + */ + public final boolean doubleParserIsAvailable; + /** + * Whether all the cells seen so far have fewer than 8 significant figures. This is used when inferring float vs + * long. + */ + public boolean hasFewerThan8SigFigsSoFar; + /** + * Whether all the cells seen so far are the "null" indicator (usually the empty string), or are 1 character in + * length. This is used when inferring char vs String. + */ + public boolean isNullOrWidthOneSoFar; + + public ParseContext(Tokenizer tokenizer, SinkFactory sinkFactory, SentinelConfiguration sentinelConfiguration, + boolean doubleParserIsAvailable) { + this.tokenizer = tokenizer; + this.sinkFactory = sinkFactory; + this.sentinelConfiguration = sentinelConfiguration; + this.doubleParserIsAvailable = doubleParserIsAvailable; + hasFewerThan8SigFigsSoFar = true; + isNullOrWidthOneSoFar = true; + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/parsers/context/SentinelConfiguration.java b/extensions/csv/src/main/java/io/deephaven/csv/parsers/context/SentinelConfiguration.java new file mode 100644 index 00000000000..34166b208c7 --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/parsers/context/SentinelConfiguration.java @@ -0,0 +1,67 @@ +package io.deephaven.csv.parsers.context; + +/** + * This class holds the configuration for the optional sentinel values for each type. + * This is useful for systems where there is a special distinguished value taken from the + * range of values meant to be interpreted as null. For example, in the Deephaven system, + * Long.MIN_VALUE is not a valid long value in the system; rather it is taken to mean "null". + * + * When the reader encounters a null CSV cell (typically, one that is empty or has been configured + * by the user to be the special "null cell" string), the library will consult the values + * here to see if there is a sentinel value for that type that means null. If there is no + * such sentinel value configured, then it is an error to have a null CSV cell. Conversely, + * if there is a sentinel value configured here, then the system will refuse to read + * a value of that type as a normal value of that type. + * + * This rule interacts with type inference: say you have configured nullIntValue to be + * some value (say, Integer.MIN_VALUE). If the system is reading a column of ints, and + * you have this nullIntValue in the input, then the system will conclude that it cannot + * parse the column as int and it will try the next configured parser (typically, the + * parser for long). Each parser is configured this way (to punt to the next). If type + * inferencing has led you to the "last" numeric parser, namely double, and you encounter + * the nullDoubleValue, then the system would interpret your column as strings. + */ +public class SentinelConfiguration { + /** + * null means "not configured". + */ + public Byte nullBooleanAsByteValue; + /** + * null means "not configured". + */ + public Byte nullByteValue; + /** + * null means "not configured". + */ + public Short nullShortValue; + /** + * null means "not configured". + */ + public Integer nullIntValue; + /** + * null means "not configured". + */ + public Long nullLongValue; + /** + * null means "not configured". + */ + public Float nullFloatValue; + /** + * null means "not configured". + */ + public Double nullDoubleValue; + /** + * null means "not configured". + */ + public Character nullCharValue; + /** + * String is configured differently from all the others. null means "not configured". + * To configure a value, make a string array of length 1 with the desired null value as its + * first element. + */ + public String[] nullStringValue; + /** + * null means "not configured". + */ + public Long nullDateTimeAsLongValue; +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/reading/CellGrabber.java b/extensions/csv/src/main/java/io/deephaven/csv/reading/CellGrabber.java new file mode 100644 index 00000000000..f284ed299e7 --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/reading/CellGrabber.java @@ -0,0 +1,230 @@ +package io.deephaven.csv.reading; + +import io.deephaven.csv.containers.GrowableCharBuffer; +import io.deephaven.csv.tokenization.RangeTests; +import io.deephaven.csv.containers.CharSlice; + +import java.io.IOException; +import java.io.Reader; + +public final class CellGrabber { + private static final int BUFFER_SIZE = 65536; + private final char quoteChar; + private final char fieldDelimiter; + /** + * Whether to trim leading and trailing blanks from non-quoted values. + */ + private boolean ignoreSurroundingSpaces = false; + /** + * Whether to trim leading and trailing blanks from inside quoted values. + */ + private final boolean trim; + private final Reader reader; + private final char[] buffer; + private int size; + private int offset; + private int startOffset; + private final GrowableCharBuffer spillBuffer; + private boolean lastInRow; + + public CellGrabber(Reader reader, char quoteChar, char fieldDelimiter, boolean ignoreSurroundingSpaces, + boolean trim) { + this.reader = reader; + this.quoteChar = quoteChar; + this.fieldDelimiter = fieldDelimiter; + this.ignoreSurroundingSpaces = ignoreSurroundingSpaces; + this.trim = trim; + this.buffer = new char[BUFFER_SIZE]; + this.size = 0; + this.offset = 0; + this.spillBuffer = new GrowableCharBuffer(); + } + + public boolean grabNext(CharSlice dest, boolean[] lastInRow) throws IOException { + spillBuffer.clear(); + startOffset = offset; + this.lastInRow = false; + + if (ignoreSurroundingSpaces) { + skipWhitespace(); + } + if (!tryEnsureMore()) { + return false; + } + + // Is first char the quote char? + if (buffer[offset] == quoteChar) { + ++offset; + startOffset = offset; + processQuotedMode(dest); + if (trim) { + trimWhitespace(dest); + } + } else { + startOffset = offset; + processUnquotedMode(dest); + } + lastInRow[0] = this.lastInRow; + return true; + } + + private void processQuotedMode(CharSlice dest) throws IOException { + OUTER: while (true) { + while (offset != size) { + final char ch = buffer[offset++]; + if (ch != quoteChar) { + // Ordinary character. Note: in quoted mode we will gladly eat field and line separators. + continue; + } + // This character is a quote char. It could be the end of the cell, or it could be an escaped + // quote char (e.g. ""). The way to tell is to peek ahead at the next character. + if (!tryEnsureMore()) { + // There is no next char (we are at end of input), so let's call this end of cell. + break OUTER; + } + final char peek = buffer[offset]; + if (peek != quoteChar) { + // There is a next char, but it's not a quotation mark. So this + // quotation mark must be the end of the quoted string. + break OUTER; + } + // There is a next character, and it *is* a quotation mark. So this is a quoted quote + // "", to be interpreted as ". So we'll spill this string (up to the first quotation mark), + // skip the second quotation mark, and keep going. + spillRange(); + // Skip the second quotation mark. + ++offset; + startOffset = offset; + } + if (!tryEnsureMore()) { + // TODO(kosak): not an IOException + throw new IOException("Cell did not have closing quote char"); + } + } + // We got out of the quoted string. Remember where you are and then consume the + // rest of the characters. + finishField(dest); + + // The easiest way to make all the above logic run smoothly is to let the final quotation mark + // (which will unconditionally be there) and subsequent whitespace (if any) into the field. + // Then we can simply trim it back out now. + while (dest.begin() != dest.end() && RangeTests.isWhitespace(dest.back())) { + dest.setEnd(dest.end() - 1); + } + if (dest.begin() == dest.end() || dest.back() != quoteChar) { + throw new RuntimeException("Logic error: final non-whitespace in field is not quotation mark"); + } + dest.setEnd(dest.end() - 1); + } + + private void processUnquotedMode(CharSlice dest) throws IOException { + finishField(dest); + } + + private void skipWhitespace() throws IOException { + do { + while (offset != size) { + final char ch = buffer[offset]; + if (!RangeTests.isWhitespace(ch)) { + return; + } + ++offset; + } + } while (tryEnsureMore()); + } + + private void finishField(CharSlice dest) throws IOException { + while (true) { + while (offset != size) { + final char ch = buffer[offset]; + if (ch == fieldDelimiter) { + finish(dest); + ++offset; + return; + } + if (ch == '\n') { + lastInRow = true; + finish(dest); + ++offset; + return; + } + if (ch == '\r') { + lastInRow = true; + finish(dest); + ++offset; + if (tryEnsureMore()) { + // might be \r\n + if (buffer[offset] == '\n') { + ++offset; + } + } + return; + } + ++offset; + } + if (!tryEnsureMore()) { + finish(dest); + return; + } + } + } + + /** + * @return true if there are more characters. + */ + private boolean tryEnsureMore() throws IOException { + if (offset != size) { + return true; + } + spillRange(); + refillBuffer(); + return size != 0; + } + + private void spillRange() { + spillBuffer.append(buffer, startOffset, offset - startOffset); + startOffset = offset; + } + + private void refillBuffer() throws IOException { + offset = 0; + startOffset = 0; + while (true) { + final int charsRead = reader.read(buffer); + if (charsRead < 0) { + size = 0; + return; + } + if (charsRead > 0) { + size = charsRead; + return; + } + // I don't think zero-length reads happen, but if they do, I'll just keep trying. + } + } + + private void finish(CharSlice dest) { + if (spillBuffer.size() == 0) { + // Never spilled. Still in same buffer. + dest.reset(buffer, startOffset, offset); + return; + } + // Otherwise, append residual to spillBuffer and return a reference + // to spillBuffer. + spillRange(); + dest.reset(spillBuffer.data(), 0, spillBuffer.size()); + } + + private static void trimWhitespace(CharSlice cs) { + int begin = cs.begin(); + int end = cs.end(); + final char[] data = cs.data(); + while (begin != end && RangeTests.isWhitespace(data[begin])) { + ++begin; + } + while (begin != end && RangeTests.isWhitespace(data[end - 1])) { + --end; + } + cs.reset(data, begin, end); + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/reading/CsvReader.java b/extensions/csv/src/main/java/io/deephaven/csv/reading/CsvReader.java new file mode 100644 index 00000000000..c4dbb2c3055 --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/reading/CsvReader.java @@ -0,0 +1,302 @@ +package io.deephaven.csv.reading; + +import io.deephaven.csv.densestorage.DenseStorageReader; +import io.deephaven.csv.densestorage.DenseStorageWriter; +import io.deephaven.csv.parsers.ParserBase; +import io.deephaven.csv.sinks.Sink; +import io.deephaven.csv.parsers.context.SentinelConfiguration; +import io.deephaven.csv.parsers.Parsers; +import io.deephaven.csv.sinks.SinkFactory; +import io.deephaven.csv.containers.CharSlice; + +import java.io.IOException; +import java.io.Reader; +import java.util.*; +import java.util.concurrent.*; + +public class CsvReader { + private final Reader reader; + private final SinkFactory sinkFactory; + /** + * Whether to trim leading and trailing blanks from non-quoted values. + */ + private boolean ignoreSurroundingSpaces = false; + /** + * Whether to trim leading and trailing blanks from inside quoted values. + */ + private boolean trim = false; + /** + * Whether the incoming data has column headers. + */ + private boolean hasHeaders = true; + /** + * The quote character (used when you want field or line delimiters to be interpreted as literal text. + */ + private char quoteChar = '#'; + /** + * The field delimiter (the character that separates one column from the nextd0. + */ + private char fieldDelimiter = ','; + /** + * Whether to run asynchronously. In particular, the operation of reading the raw file, breaking it into columns, + * and storing that column text in memory can run in parallel with parsing the data for a given column, and all the + * column parsers can themselves run in parallel. + */ + private boolean async = false; + /** + * The user-defined set of parsers that participate in type inference. Defaults to ZamboniParsers.DEFAULT + */ + private Set> inference = new HashSet<>(Parsers.DEFAULT); + /** + * Client-specified headers that can be used to override the existing headers in the input (if hasHeaders is true), + * or to provide absent headers (if hasHeaders is false). + */ + private List clientSpecifiedHeaders = new ArrayList<>(); + /** + * Used to force a specific parser for a specific column. + */ + private Map> parsers = new HashMap<>(); + /** + * The parser + */ + private ParserBase nullParser; + /** + * The NullContext holding various null sentinels. + */ + private final SentinelConfiguration nullContext = new SentinelConfiguration(); + + public CsvReader(Reader reader, SinkFactory sinkFactory) { + this.reader = reader; + this.sinkFactory = sinkFactory; + } + + public Result read() throws IOException { + final CellGrabber grabber = new CellGrabber(reader, quoteChar, fieldDelimiter, ignoreSurroundingSpaces, trim); + String[] headersToUse = null; + if (hasHeaders) { + headersToUse = tryReadOneRow(grabber); + if (headersToUse == null) { + throw new RuntimeException("Can't proceed because hasHeaders is set but input file is empty"); + } + } + + // If not, maybe override with client-specified headers + if (clientSpecifiedHeaders.size() != 0) { + headersToUse = clientSpecifiedHeaders.toArray(new String[0]); + } + + // If not, maybe generate synthetic column headers (works only if the file is non-empty) + String[] firstDataRow = null; + if (headersToUse == null) { + firstDataRow = tryReadOneRow(grabber); + if (firstDataRow == null) { + throw new RuntimeException( + "Can't proceed because input file is empty and client has not specified headers"); + } + headersToUse = new String[firstDataRow.length]; + for (int ii = 0; ii < headersToUse.length; ++ii) { + headersToUse[ii] = "Column" + (ii + 1); + } + } + + final int numCols = headersToUse.length; + + // Create a DenseStorageWriter and two readers for each column. + final DenseStorageWriter[] dsws = new DenseStorageWriter[numCols]; + final DenseStorageReader[] dsr0s = new DenseStorageReader[numCols]; + final DenseStorageReader[] dsr1s = new DenseStorageReader[numCols]; + for (int ii = 0; ii < numCols; ++ii) { + final DenseStorageWriter dsw = new DenseStorageWriter(); + dsws[ii] = dsw; + dsr0s[ii] = dsw.newReader(); + dsr1s[ii] = dsw.newReader(); + } + + // run in separate thread if async + // note: may need to have the writer in a special thread pool, because we don't want to ever starve the + // writer. + final String[] firstDataRowFinal = firstDataRow; + final ExecutorService exec = async ? Executors.newFixedThreadPool(4) : Executors.newSingleThreadExecutor(); + final Future numRowsFuture = exec.submit( + () -> ParseInputFileToDenseStorage.doit(firstDataRowFinal, grabber, dsws)); + + final ArrayList>> sinkFutures = new ArrayList<>(); + + for (int ii = 0; ii < numCols; ++ii) { + final Set> inferenceToUse = calcInferenceToUse(headersToUse[ii]); + + final int iiCopy = ii; + final Future> fcb = exec.submit( + () -> ParseDenseStorageToColumn.doit(dsr0s[iiCopy], dsr1s[iiCopy], + inferenceToUse, nullParser, nullContext, sinkFactory)); + sinkFutures.add(fcb); + } + + final long numRows; + Sink[] sinks = new Sink[numCols]; + try { + numRows = numRowsFuture.get(); + for (int ii = 0; ii < numCols; ++ii) { + sinks[ii] = sinkFutures.get(ii).get(); + } + } catch (Exception ee) { + // TODO(kosak): not an IOException + throw new IOException("Caught exception while parsing columns", ee); + } + + return new Result(numRows, headersToUse, sinks); + } + + private Set> calcInferenceToUse(String columnName) { + ParserBase specifiedParser = parsers.get(columnName); + if (specifiedParser == null) { + return inference; + } + Set> result = new HashSet<>(); + result.add(specifiedParser); + return result; + } + + private static String[] tryReadOneRow(CellGrabber grabber) throws IOException { + final List headers = new ArrayList<>(); + + // Grab the header + final CharSlice slice = new CharSlice(); + final boolean[] lastInRow = new boolean[1]; // Mutable boolean + do { + if (!grabber.grabNext(slice, lastInRow)) { + return null; + } + headers.add(slice.toString()); + } while (!lastInRow[0]); + return headers.toArray(new String[0]); + } + + public CsvReader setIgnoreSurroundingSpaces(boolean value) { + ignoreSurroundingSpaces = value; + return this; + } + + public CsvReader setTrim(boolean value) { + trim = value; + return this; + } + + public CsvReader setHasHeaders(boolean value) { + hasHeaders = value; + return this; + } + + public CsvReader setFieldDelimiter(char value) { + fieldDelimiter = value; + return this; + } + + public CsvReader setquoteChar(char value) { + quoteChar = value; + return this; + } + + public CsvReader setAsync(boolean value) { + this.async = value; + return this; + } + + public CsvReader setInference(Collection> parsers) { + inference = new HashSet<>(parsers); + return this; + } + + public CsvReader setHeaders(Collection headers) { + clientSpecifiedHeaders = new ArrayList<>(headers); + return this; + } + + public CsvReader setParsers(Map> parsers) { + this.parsers = new HashMap<>(parsers); + return this; + } + + public CsvReader setNullParser(ParserBase nullParser) { + this.nullParser = nullParser; + return this; + } + + public CsvReader setNullBooleanAsByteValue(byte nullValue) { + nullContext.nullBooleanAsByteValue = nullValue; + return this; + } + + public CsvReader setNullByteValue(byte nullValue) { + nullContext.nullByteValue = nullValue; + return this; + } + + public CsvReader setNullShortValue(short nullValue) { + nullContext.nullShortValue = nullValue; + return this; + } + + public CsvReader setNullIntValue(int nullValue) { + nullContext.nullIntValue = nullValue; + return this; + } + + public CsvReader setNullLongValue(long nullValue) { + nullContext.nullLongValue = nullValue; + return this; + } + + public CsvReader setNullFloatValue(float nullValue) { + nullContext.nullFloatValue = nullValue; + return this; + } + + public CsvReader setNullDoubleValue(double nullValue) { + nullContext.nullDoubleValue = nullValue; + return this; + } + + public CsvReader setNullCharValue(char nullValue) { + nullContext.nullCharValue = nullValue; + return this; + } + + public CsvReader setNullStringValue(String nullValue) { + nullContext.nullStringValue = new String[]{nullValue}; + return this; + } + + public CsvReader setNullDateTimeAsLongValue(long nullValue) { + nullContext.nullDateTimeAsLongValue = nullValue; + return this; + } + + public static final class Result { + private final long numRows; + private final String[] columnNames; + private final Sink[] columns; + + public Result(long numRows, String[] columnNames, Sink[] columns) { + this.numRows = numRows; + this.columnNames = columnNames; + this.columns = columns; + } + + public long numRows() { + return numRows; + } + + public String[] columnNames() { + return columnNames; + } + + public Sink[] columns() { + return columns; + } + + public int numCols() { + return columns.length; + } + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/reading/ParseDenseStorageToColumn.java b/extensions/csv/src/main/java/io/deephaven/csv/reading/ParseDenseStorageToColumn.java new file mode 100644 index 00000000000..5d462bfc6bc --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/reading/ParseDenseStorageToColumn.java @@ -0,0 +1,119 @@ +package io.deephaven.csv.reading; + +import io.deephaven.csv.parsers.IteratorHolder; +import io.deephaven.csv.parsers.ParserBase; +import io.deephaven.csv.parsers.Parsers; +import io.deephaven.csv.parsers.context.SentinelConfiguration; +import io.deephaven.csv.densestorage.DenseStorageReader; +import io.deephaven.csv.sinks.Sink; +import io.deephaven.csv.sinks.SinkFactory; +import io.deephaven.csv.tokenization.Tokenizer; +import io.deephaven.csv.util.Renderer; +import io.deephaven.csv.parsers.context.ParseContext; + +import java.util.*; +import java.util.stream.Collectors; + +public final class ParseDenseStorageToColumn { + public static Sink doit(DenseStorageReader dsr, DenseStorageReader dsrAlt, + Set> parsers, ParserBase nullParser, + SentinelConfiguration nullContext, SinkFactory sinkFactory) { + final ParseDenseStorageToColumn pdsc = new ParseDenseStorageToColumn(dsr, dsrAlt, parsers, nullParser, + nullContext, sinkFactory); + return pdsc.run(); + } + + /** + * A reader for the input. + */ + private final DenseStorageReader dsr; + /** + * A second reader for the same input (used to perform the second pass over the data). + */ + private final DenseStorageReader dsrAlt; + /** + * The set of parsers to try. + */ + private final Set> parsers; + /** + * The Parser to use if the column contains all nulls. + */ + private final ParserBase nullParser; + /** + * Configuration for null sentinel values. + */ + private final SentinelConfiguration sentinelConfiguration; + /** + * Factory that makes all of the Sinks of various types, used to consume the data + * we produce. + */ + private final SinkFactory sinkFactory; + + private ParseDenseStorageToColumn( + DenseStorageReader dsr, DenseStorageReader dsrAlt, Set> parsers, + ParserBase nullParser, SentinelConfiguration sentinelConfiguration, SinkFactory sinkFactory) { + this.dsr = dsr; + this.dsrAlt = dsrAlt; + this.parsers = parsers; + this.nullParser = nullParser; + this.sentinelConfiguration = sentinelConfiguration; + this.sinkFactory = sinkFactory; + } + + private Sink run() { + List> parsersToTry; + if (parsers == null) { + parsersToTry = Parsers.DEFAULT; + } else { + // Put the user-specified parsers in precedence order. + parsersToTry = Parsers.PRECEDENCE.stream().filter(parsers::contains).collect(Collectors.toList()); + } + + // This is a hack that lets the float parser know whether there is another parser + // available (namely, the double parser) that can handle more significant digits + // than it can. + final boolean doubleParserIsAvailable = parsersToTry.contains(Parsers.DOUBLE); + + // Skip null cells. Nulls are supported but they cannot help us with type inference. + final IteratorHolder ih = new IteratorHolder(dsr); + boolean columnIsAllNulls = true; + while (ih.tryMoveNext()) { + if (!isNullCell(ih)) { + columnIsAllNulls = false; + break; + } + } + + final Tokenizer tokenizer = new Tokenizer(); + final ParseContext ctx = new ParseContext(tokenizer, sinkFactory, sentinelConfiguration, + doubleParserIsAvailable); + + if (columnIsAllNulls && parsersToTry.size() != 1) { + if (nullParser == null) { + throw new RuntimeException( + "Column contains all null cells: can't infer type of column, and nullParser is not set."); + } + parsersToTry = List.of(nullParser); + } + + if (parsersToTry.size() == 0) { + throw new RuntimeException("No parsers available to try."); + } + + final IteratorHolder ihAlt = new IteratorHolder(dsrAlt); + for (ParserBase parser : parsersToTry) { + final Sink result = parser.tryParse(ctx, ih, ihAlt); + if (result != null) { + return result; + } + } + + throw new RuntimeException(String.format("Tried %d parsers, none succeeded. Parsers were: %s", + parsersToTry.size(), Renderer.renderList(parsersToTry, ", ", p -> p.getClass().getName()))); + } + + // TOD(kosak): make the concept of "null cell" configurable. + private static boolean isNullCell(IteratorHolder ih) { + return ih.sliceSize() == 0; + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/reading/ParseInputFileToDenseStorage.java b/extensions/csv/src/main/java/io/deephaven/csv/reading/ParseInputFileToDenseStorage.java new file mode 100644 index 00000000000..82b6770b487 --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/reading/ParseInputFileToDenseStorage.java @@ -0,0 +1,81 @@ +package io.deephaven.csv.reading; + +import io.deephaven.csv.densestorage.DenseStorageWriter; +import io.deephaven.csv.containers.CharSlice; + +import java.io.IOException; + +public class ParseInputFileToDenseStorage { + public static long doit(String[] optionalFirstDataRow, CellGrabber cb, DenseStorageWriter[] dsws) + throws IOException { + final CharSlice slice = new CharSlice(); + final int numCols = dsws.length; + + // Zero-based row number. + long rowNum = 0; + // There is a case (namely when the file has no headers and the client hasn't specified + // them either) where the CsvReader was forced to read the first row of data from the file + // in order to determine the number of columns. If this happened, optionalFirstRow will + // be non-null and we can process it as data here. Then the rest of the processing can + // proceed as normal. + if (optionalFirstDataRow != null) { + if (optionalFirstDataRow.length != numCols) { + throw new RuntimeException(String.format("Expected %d columns but optionalFirstRow had %d", + numCols, optionalFirstDataRow.length)); + } + for (int ii = 0; ii < optionalFirstDataRow.length; ++ii) { + final char[] temp = optionalFirstDataRow[ii].toCharArray(); + slice.reset(temp, 0, temp.length); + dsws[ii].append(slice); + } + ++rowNum; + } + + // Grab the remaining lines and store them + // The outer while is the "row" iteration + final boolean[] lastInRow = new boolean[1]; // Mutable boolean + OUTER: while (true) { + // Zero-based column number. + int colNum = 0; + + try { + // The inner while is the "column" iteration + while (true) { + if (!cb.grabNext(slice, lastInRow)) { + if (colNum == 0) { + break OUTER; + } + // TODO(kosak): not actually an IO exception + throw new IOException( + String.format("Short last row: expected %d columns, got %d columns", numCols, colNum)); + } + final DenseStorageWriter dsw = dsws[colNum]; + dsw.append(slice); + ++colNum; + if (colNum == numCols) { + if (!lastInRow[0]) { + // TODO(kosak): not actually an IO exception + throw new IOException( + String.format("Row %d has too many columns (expected %d)", rowNum, numCols)); + } + break; + } + if (lastInRow[0]) { + // TODO(kosak): not actually an IO exception + throw new IOException(String.format("Row %d has too few columns (expected %d)", rowNum, numCols)); + } + } + } catch (Exception e) { + final String message = String.format("While processing row %d, column %d:", + rowNum + 1, colNum + 1); + throw new RuntimeException(message, e); + } + ++rowNum; + } + for (DenseStorageWriter dsw : dsws) { + dsw.finish(); + } + + return rowNum; + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/sinks/Sink.java b/extensions/csv/src/main/java/io/deephaven/csv/sinks/Sink.java new file mode 100644 index 00000000000..2e7e65c4f45 --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/sinks/Sink.java @@ -0,0 +1,5 @@ +package io.deephaven.csv.sinks; + +public interface Sink { + void write(TARRAY src, int srcOffset, long destOffset, int size); +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/sinks/SinkFactory.java b/extensions/csv/src/main/java/io/deephaven/csv/sinks/SinkFactory.java new file mode 100644 index 00000000000..a4b06e58507 --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/sinks/SinkFactory.java @@ -0,0 +1,14 @@ +package io.deephaven.csv.sinks; + +public interface SinkFactory { + Sink makeBooleanAsByteSink(); + Sink makeByteSink(); + Sink makeShortSink(); + Sink makeIntSink(); + Sink makeLongSink(); + Sink makeFloatSink(); + Sink makeDoubleSink(); + Sink makeCharSink(); + Sink makeStringSink(); + Sink makeDateTimeAsLongSink(); +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/tokenization/RangeTests.java b/extensions/csv/src/main/java/io/deephaven/csv/tokenization/RangeTests.java new file mode 100644 index 00000000000..04b06c10846 --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/tokenization/RangeTests.java @@ -0,0 +1,48 @@ +package io.deephaven.csv.tokenization; + +import io.deephaven.util.QueryConstants; + +/** + * Simple range tests, that may be faster than the corresponding Java utilities because they + * are ASCII-specific. + */ +public class RangeTests { + public static char toUpper(char ch) { + return isLower(ch) ? (char) (ch - 'a' + 'A') : ch; + } + + public static boolean isUpper(char ch) { + return ch >= 'A' && ch <= 'Z'; + } + + public static boolean isLower(char ch) { + return ch >= 'a' && ch <= 'z'; + } + + public static boolean isDigit(char ch) { + return ch >= '0' && ch <= '9'; + } + + public static boolean isWhitespace(char ch) { + // TODO(kosak) + return ch == ' '; + } + + public static boolean isInRangeForByte(long value) { + return value >= QueryConstants.MIN_BYTE && value <= QueryConstants.MAX_BYTE; + } + + public static boolean isInRangeForShort(long value) { + return value >= QueryConstants.MIN_SHORT && value <= QueryConstants.MAX_SHORT; + } + + public static boolean isInRangeForInt(long value) { + return value >= QueryConstants.MIN_INT && value <= QueryConstants.MAX_INT; + } + + public static boolean isInRangeForFloat(double value) { + return Double.isNaN(value) || + Double.isInfinite(value) || + (value >= QueryConstants.MIN_FINITE_FLOAT && value <= QueryConstants.MAX_FINITE_FLOAT); + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/tokenization/Tokenizer.java b/extensions/csv/src/main/java/io/deephaven/csv/tokenization/Tokenizer.java new file mode 100644 index 00000000000..372c8787284 --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/tokenization/Tokenizer.java @@ -0,0 +1,522 @@ +package io.deephaven.csv.tokenization; + +import gnu.trove.map.hash.TIntObjectHashMap; +import io.deephaven.csv.tokenization.external.DoubleParser; +import io.deephaven.csv.containers.ByteSlice; +import io.deephaven.time.TimeZone; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; + +public class Tokenizer { + /** + * Temporary variable owned by tryParseDBDateTime. + */ + private final long[] dbDateTimeTemp0 = new long[1]; + /** + * Temporary variable owned by tryParseDBDateTime. + */ + private final long[] dbDateTimeTemp1 = new long[1];; + /** + * Temporary variable owned by tryParseDBDateTime. + */ + private final long[] dbDateTimeTemp2 = new long[1];; + /** + * Temporary variable owned by tryParseDBDateTime. + */ + private final DateTimeZone[] dbDateTimeTempZone = new DateTimeZone[1]; + + private final TimeZoneParser timeZoneParser = new TimeZoneParser(); + + /** + * ByteSlice is *NOT* modified, regardless of sucess or failure. + */ + public boolean tryParseBoolean(ByteSlice bs, boolean[] result) { + final byte[] d = bs.data(); + final int o = bs.begin(); + final int bSize = bs.end() - o; + if (bSize == 4) { + result[0] = true; // Optimistic + // sad + return (d[o] == 't' || d[o] == 'T') && + (d[o + 1] == 'r' || d[o + 1] == 'R') && + (d[o + 2] == 'u' || d[o + 2] == 'U') && + (d[o + 3] == 'e' || d[o + 3] == 'E'); + } + + if (bSize == 5) { + result[0] = false; // Optimistic + // mega sad + return (d[o] == 'f' || d[o] == 'F') && + (d[o + 1] == 'a' || d[o + 1] == 'A') && + (d[o + 2] == 'l' || d[o + 2] == 'L') && + (d[o + 3] == 's' || d[o + 3] == 'S') && + (d[o + 4] == 'e' || d[o + 4] == 'E'); + } + + return false; + } + + /** + * ByteSlice is *NOT* modified, regardless of sucess or failure. + */ + public boolean tryParseLong(final ByteSlice bs, long[] result) { + final int savedBegin = bs.begin(); + Mutating.trim(bs); + final boolean success = Mutating.tryParseLong(bs, result); + bs.setBegin(savedBegin); + return success; + } + + /** + * ByteSlice is *NOT* modified, regardless of sucess or failure. + */ + public boolean tryParseDouble(ByteSlice bs, double[] result) { + return DoubleParser.tryParseDouble(bs, result); + } + + public boolean hasFewerThan8SignificantFigures(ByteSlice bs) { + final byte[] d = bs.data(); + int current = bs.begin(); + final int end = bs.end(); + + // Find first digit + while (true) { + if (current == end) { + return true; + } + final char ch = (char) d[current++]; + if (RangeTests.isDigit(ch)) { + break; + } + } + + // Find last digit. Intervening period is ok + final int firstDigitPos = current; + int decimalPointAdjustment = 0; + while (current != end) { + final char ch = (char) d[current++]; + if (ch == '.') { + decimalPointAdjustment = 1; + continue; + } + if (!RangeTests.isDigit(ch)) { + break; + } + } + + final int numDigits = current - firstDigitPos - decimalPointAdjustment; + return numDigits < 8; + } + + /** + * ByteSlice is *NOT* modified, regardless of sucess or failure. + */ + public boolean tryParseDBDateTime(ByteSlice bs, long[] result) { + final int savedBegin = bs.begin(); + final boolean success = Mutating.tryParseDBDateTime(bs, timeZoneParser, dbDateTimeTemp0, + dbDateTimeTemp1, dbDateTimeTemp2, dbDateTimeTempZone, result); + bs.setBegin(savedBegin); + return success; + } + + /** + * The methods in this class obey the following invariants: On success, they update their incoming ByteSlice (to + * point to the end of the sequence) On failure, they leave it unchanged. + */ + private static final class Mutating { + public static void trim(ByteSlice bs) { + while (bs.begin() != bs.end() && RangeTests.isWhitespace((char) bs.front())) { + bs.setBegin(bs.begin() + 1); + } + while (bs.begin() != bs.end() && RangeTests.isWhitespace((char) bs.back())) { + bs.setEnd(bs.end() - 1); + } + } + + private static boolean tryEatChar(ByteSlice bs, char ch) { + if (bs.begin() == bs.end() || bs.front() != ch) { + return false; + } + bs.setBegin(bs.begin() + 1); + return true; + } + + public static boolean tryParseLong(final ByteSlice bs, long[] result) { + final int savedBegin = bs.begin(); + if (bs.begin() == bs.end()) { + return false; + } + final char front = (char) bs.front(); + boolean negative = false; + if (front == '+') { + bs.setBegin(bs.begin() + 1); + } else if (front == '-') { + negative = true; + bs.setBegin(bs.begin() + 1); + } + if (!tryParseWholeNumber(bs, 1, 999, negative, result)) { + bs.setBegin(savedBegin); + return false; + } + // Successful iff completely consumed. + return bs.begin() == bs.end(); + } + + // Allowable formats in DH style (tz stands for some supported timezone like NY) + // 2021-11-07T09:00:00 tz + // 2021-11-07T09:00:00.1 tz + // 2021-11-07T09:00:00.12 tz + // ... + // 2021-11-07T09:00:00.123456789 tz + // + // hyphens and colons are optional (all in or all out) + // The 'T' can also be a space. + // + // Instead of " tz" we also admit Z (no space) meaning UTC + // + // Allowable formats in UTC offset style (can be + or -) + // The offset can be hh or hh:mm or hhmm + // 2021-11-07T09:00:00+01 + // 2021-11-07T09:00:00.1-02:30 + // 2021-11-07T09:00:00.12+0300 + // ... + // 2021-11-07T09:00:00.123456789+01:30 + // + // Putting it all together: First we have the required fields + // yyyy + // - (optional, but if absent then no later hyphens or colons) + // mm + // - (optional, see above) + // dd + // T + // hh + // : (optional, see above) + // MM + // : (optional, see above) + // SS + // . (optional fraction, must be followed by 1-9 decimal digits) (can also be comma apparently) + // Z or space or + or - : + // Z means UTC + // space means there is a Deephaven time zone indicator following, which must be one of the + // enumeration values in the class DBTimeZone + // + or - means an offset follows, which itself is + // hh + // : (optional) + // mm (optional) + private static boolean tryParseDBDateTime( + ByteSlice bs, TimeZoneParser tzp, + long[] temp0, long[] temp1, long[] temp2, DateTimeZone[] tempZone, + long[] result) { + final int savedBegin = bs.begin(); + if (!tryParseYyyymmdd(bs, temp0, temp1, temp2)) { + return false; + } + final int year = (int) temp0[0]; + final int month = (int) temp1[0]; + final int day = (int) temp2[0]; + + // Require 'T' (or, per RFC 3339, allow ' ') + if (!tryEatChar(bs, 'T') && !tryEatChar(bs, ' ')) { + bs.setBegin(savedBegin); + return false; + } + + // Reusing result for temporary storage! + if (!tryParseHHmmssNanos(bs, temp0, temp1, temp2, result)) { + bs.setBegin(savedBegin); + return false; + } + final int hour = (int) temp0[0]; + final int minute = (int) temp1[0]; + final int second = (int) temp2[0]; + final int nanos = (int) result[0]; + + long tzAdjustmentMillis = 0; + if (tryParseIsoTimeZone(bs, tempZone, temp0)) { + tzAdjustmentMillis = temp0[0]; + } else if (!tzp.tryParseDeephavenTimeZoneIndicator(bs, tempZone)) { + bs.setBegin(savedBegin); + return false; + } + + if (bs.begin() != bs.end()) { + // Residual characters! + bs.setBegin(savedBegin); + return false; + } + + final DateTimeZone timeZoneToUse = tempZone[0]; + final DateTime dt = new DateTime(year, month, day, hour, minute, second, timeZoneToUse); + + final long dtMillis = dt.getMillis() + tzAdjustmentMillis; + result[0] = dtMillis * 1_000_000 + nanos; + return true; + } + + private static boolean tryParseYyyymmdd(ByteSlice bs, long[] yyyy, long[] mm, long[] dd) { + final int savedBegin = bs.begin(); + if (!tryParseWholeNumber(bs, 4, 4, false, yyyy)) { + return false; + } + + boolean hasPunctuation = Mutating.tryEatChar(bs, '-'); + + if (!tryParseWholeNumber(bs, 2, 2, false, mm)) { + bs.setBegin(savedBegin); + return false; + } + + if (hasPunctuation && !tryEatChar(bs, '-')) { + bs.setBegin(savedBegin); + return false; + } + return tryParseWholeNumber(bs, 2, 2, false, dd); + } + + private static boolean tryParseHHmmssNanos(ByteSlice bs, long[] hours, long[] minutes, + long[] seconds, long[] nanos) { + final int savedBegin = bs.begin(); + // Hour + if (!tryParseWholeNumber(bs, 2, 2, false, hours)) { + return false; + } + // Set defaults for minutes, seconds, nanos, in case we exit early. + minutes[0] = 0; + seconds[0] = 0; + nanos[0] = 0; + + // minutes, seconds, and nanos are optional + + // There may be a colon after hours + boolean punctuationRequired = tryEatChar(bs, ':'); + + // Try minutes + if (!tryParseWholeNumber(bs, 2, 2, false, minutes)) { + // Next thing is not a number. If we previously ingested a colon, not having a next number is an error. + // But if we did not ingest a colon, not having a number is ok. + // If we return false we are obligated to reset the slice. + minutes[0] = 0; // Sub-parse failed, but we still might return success. So this needs to be correct. + final boolean success = !punctuationRequired; + if (!success) { + bs.setBegin(savedBegin); + } + return success; + } + + // If a colon is required but not present, then the parse is done (this is not an error) + if (punctuationRequired && !tryEatChar(bs, ':')) { + return true; + } + + // Try seconds + if (!tryParseWholeNumber(bs, 2, 2, false, seconds)) { + // Next thing is apparently not a number. If we previously ingested a colon, not having a next number is + // an error. + // But if we did not ingest a colon, not having a number is ok + seconds[0] = 0; // Sub-parse failed, but we still might return success. So this needs to be correct. + final boolean success = !punctuationRequired; + if (!success) { + bs.setBegin(savedBegin); + } + return success; + } + + if (!tryEatChar(bs, '.') && !tryEatChar(bs, ',')) { + // Period (or comma!) introduces fraction. If not present, then stop the parse here (with a success + // indication) + return true; + } + + // Try nanoseconds + final int beginBeforeNs = bs.begin(); + if (!tryParseWholeNumber(bs, 1, 9, false, nanos)) { + // If you couldn't get a number, that's a parse fail. + bs.setBegin(savedBegin); + return false; + } + + // pad to the right with zeroes (that is, in "blah.12", the .12 is 120,000,000 nanos + final int length = bs.begin() - beginBeforeNs; + for (int ii = length; ii < 9; ++ii) { + nanos[0] = 10 * nanos[0]; + } + return true; + } + + private static boolean tryParseWholeNumber(ByteSlice bs, int minSize, int maxSize, boolean negate, + long[] result) { + final byte[] data = bs.data(); + final int begin = bs.begin(); + final int end = bs.end(); + final int size = bs.size(); + if (size < minSize) { + return false; + } + final int endToUse = Math.min(end, begin + maxSize); + long res = 0; + long prevRes = 0; + int current = begin; + // We build the number using negative values, because the negative range is slightly longer and this helps + // us + // when we happen to parse Long.MIN_VALUE + for (; current < endToUse; ++current) { + final char ch = (char) data[current]; + if (!RangeTests.isDigit(ch)) { + break; + } + res = res * 10 - (ch - '0'); + if (res > prevRes) { + // Overflow + return false; + } + prevRes = res; + } + if (current == begin) { + return false; + } + // Caller wanted a positive number, but we operate in a negative number system + if (!negate) { + if (res == Long.MIN_VALUE) { + // Can't represent + return false; + } + res = -res; + } + result[0] = res; + bs.setBegin(current); + return true; + } + + private static boolean tryParseIsoTimeZone(ByteSlice bs, DateTimeZone[] timeZone, long[] millisOffset) { + if (bs.size() == 0) { + return false; + } + + final int savedBegin = bs.begin(); + final char front = (char) bs.front(); + if (front == 'Z') { + timeZone[0] = DateTimeZone.UTC; + millisOffset[0] = 0; + bs.setBegin(bs.begin() + 1); + return true; + } + + // Try an offset like +02 or +03:30 or -0400 + if (front != '+' && front != '-') { + return false; + } + final boolean negative = front == '-'; + bs.setBegin(bs.begin() + 1); + + // Reuse millisOffset as temp variable + if (!tryParseWholeNumber(bs, 2, 2, false, millisOffset)) { + bs.setBegin(savedBegin); + return false; + } + final long hours = millisOffset[0]; + + // Optional colon + tryEatChar(bs, ':'); + + long minutes = 0; + if (bs.size() != 0) { + // Reuse nanosOffset as temp variable + if (!tryParseWholeNumber(bs, 2, 2, false, millisOffset)) { + bs.setBegin(savedBegin); + return false; + } + minutes = millisOffset[0]; + } + timeZone[0] = DateTimeZone.UTC; + + // If someone says yyyy-MM-DDThh:mm:ss-05 + // The "-05" means this is meant to be interpreted as UTC-5. + // If I parse yyyy-MM-DDThh:mm:ss in UTC (without any offset), it will be 5 hours later than + // what the user intended. So in other words, I need to negate the -05 + final long moff = ((hours * 60) + minutes) * 60 * 1_000; + millisOffset[0] = negative ? moff : -moff; + return true; + } + } + + private static final class TimeZoneParser { + // It's possible this could be static if this hashmap has threadsafe concurrent reads (not sure). + private final TIntObjectHashMap timeZoneMap; + + private int lastTzKey = -1; + private DateTimeZone lastTimeZone = null; + + public TimeZoneParser() { + this.timeZoneMap = new TIntObjectHashMap<>(); + for (TimeZone zone : TimeZone.values()) { + final String zname = zone.name(); + if (!zname.startsWith("TZ_")) { + throw new RuntimeException("Unexpected enum in DBTimeZone: " + zname); + } + final String zSuffix = zname.substring(3); + final int zlen = zSuffix.length(); + if (zlen > 3) { + throw new RuntimeException("Unexpectedly-long enum in DBTimeZone: " + zname); + } + final byte[] data = new byte[zlen]; + for (int ii = 0; ii < zlen; ++ii) { + final char ch = zSuffix.charAt(ii); + if (!RangeTests.isUpper(ch)) { + throw new RuntimeException("Unexpected character in DBTimeZone name: " + zname); + } + data[ii] = (byte) ch; + } + final ByteSlice bs = new ByteSlice(data, 0, zlen); + final int tzKey = tryParseTzKey(bs); + if (tzKey < 0) { + throw new RuntimeException("Can't parse DBTimeZone as key: " + zname); + } + timeZoneMap.put(tzKey, zone.getTimeZone()); + } + } + + private static int tryParseTzKey(ByteSlice bs) { + final int size = bs.end() - bs.begin(); + if (size == 0 || size > 3) { + return -1; + } + int res = 0; + for (int current = bs.begin(); current != bs.end(); ++current) { + final char ch = RangeTests.toUpper((char) bs.data()[current]); + if (!RangeTests.isUpper(ch)) { + // If it's some nonalphabetic character + return -1; + } + res = res * 26 + (ch - 'A'); + } + bs.setBegin(bs.end()); + return res; + } + + public boolean tryParseDeephavenTimeZoneIndicator(ByteSlice bs, DateTimeZone[] result) { + int savedBegin = bs.begin(); + + if (!Mutating.tryEatChar(bs, ' ')) { + return false; + } + + final int tzKey = tryParseTzKey(bs); + if (tzKey < 0) { + return false; + } + if (tzKey == lastTzKey) { + result[0] = lastTimeZone; + return true; + } + final DateTimeZone res = timeZoneMap.get(tzKey); + if (res == null) { + bs.setBegin(savedBegin); + return false; + } + lastTzKey = tzKey; + lastTimeZone = res; + result[0] = res; + return true; + } + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/tokenization/external/DoubleParser.java b/extensions/csv/src/main/java/io/deephaven/csv/tokenization/external/DoubleParser.java new file mode 100644 index 00000000000..5480c349744 --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/tokenization/external/DoubleParser.java @@ -0,0 +1,612 @@ +package io.deephaven.csv.tokenization.external; + +/* + * @(#)FastDoubleParser.java + * Copyright © 2021. Werner Randelshofer, Switzerland. MIT License. + */ + +/* + * Modifications by kosak: change package name, and use booleans to indicate errors + * rather than throwing exceptions, change method name to tryParseDouble, and accept + * our ByteSlice type rather than a byte[] array. + */ + +import io.deephaven.csv.containers.ByteSlice; + +import java.lang.invoke.MethodHandles; +import java.lang.invoke.VarHandle; +import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; + +/** + * This is a C++ to Java port of Daniel Lemire's fast_double_parser. + *

+ * The code has been changed, so that it parses the same syntax as + * {@link Double#parseDouble(String)}. + *

+ * References: + *

+ *
Daniel Lemire, fast_double_parser, 4x faster than strtod. + * Apache License 2.0 or Boost Software License.
+ *
github.com
+ * + *
Daniel Lemire, fast_float number parsing library: 4x faster than strtod. + * Apache License 2.0.
+ *
github.com
+ * + *
Daniel Lemire, Number Parsing at a Gigabyte per Second, + * Software: Practice and Experience 51 (8), 2021. + * arXiv.2101.11408v3 [cs.DS] 24 Feb 2021
+ *
arxiv.org
+ *
+ */ +public class DoubleParser { + private final static long MINIMAL_NINETEEN_DIGIT_INTEGER = 1000_00000_00000_00000L; + private final static int MINIMAL_EIGHT_DIGIT_INTEGER = 10_000_000; + /** + * Special value in {@link #CHAR_TO_HEX_MAP} for + * the decimal point character. + */ + private static final byte DECIMAL_POINT_CLASS = -4; + /** + * Special value in {@link #CHAR_TO_HEX_MAP} for + * characters that are neither a hex digit nor + * a decimal point character.. + */ + private static final byte OTHER_CLASS = -1; + /** + * A table of 128 entries or of entries up to including + * character 'p' would suffice. + *

+ * However for some reason, performance is best, + * if this table has exactly 256 entries. + */ + private static final byte[] CHAR_TO_HEX_MAP = new byte[256]; + private final static VarHandle readLongFromByteArray = + MethodHandles.byteArrayViewVarHandle(long[].class, ByteOrder.LITTLE_ENDIAN); + + static { + for (char ch = 0; ch < CHAR_TO_HEX_MAP.length; ch++) { + CHAR_TO_HEX_MAP[ch] = OTHER_CLASS; + } + for (char ch = '0'; ch <= '9'; ch++) { + CHAR_TO_HEX_MAP[ch] = (byte) (ch - '0'); + } + for (char ch = 'A'; ch <= 'F'; ch++) { + CHAR_TO_HEX_MAP[ch] = (byte) (ch - 'A' + 10); + } + for (char ch = 'a'; ch <= 'f'; ch++) { + CHAR_TO_HEX_MAP[ch] = (byte) (ch - 'a' + 10); + } + for (char ch = '.'; ch <= '.'; ch++) { + CHAR_TO_HEX_MAP[ch] = DECIMAL_POINT_CLASS; + } + } + + /** + * Prevents instantiation. + */ + private DoubleParser() { + + } + + private static boolean isDigit(byte c) { + return (byte) '0' <= c && c <= (byte) '9'; + } + + /** + * Convenience method for calling {@link #parseDouble(byte[], int, int)}. + * + * @param bs the string to be parsed, a byte array with characters + * in ISO-8859-1, ASCII or UTF-8 encoding + * @return the parsed double value + * @throws NumberFormatException if the string can not be parsed + */ + public static boolean tryParseDouble(ByteSlice bs, double[] result) { + return tryParseDouble(bs.data(), bs.begin(), bs.size(), result); + } + + /** + * Returns a Double object holding the double value represented by the + * argument string {@code str}. + *

+ * This method can be used as a drop in for method + * {@link Double#valueOf(String)}. (Assuming that the API of this method + * has not changed since Java SE 16). + *

+ * Leading and trailing whitespace characters in {@code str} are ignored. + * Whitespace is removed as if by the {@link String#trim()} method; + * that is, characters in the range [U+0000,U+0020]. + *

+ * The rest of {@code str} should constitute a FloatValue as described by the + * lexical syntax rules shown below: + *

+ *
+ *
FloatValue: + *
[Sign] {@code NaN} + *
[Sign] {@code Infinity} + *
[Sign] DecimalFloatingPointLiteral + *
[Sign] HexFloatingPointLiteral + *
SignedInteger + *
+ * + *
+ *
HexFloatingPointLiteral: + *
HexSignificand BinaryExponent + *
+ * + *
+ *
HexSignificand: + *
HexNumeral + *
HexNumeral {@code .} + *
{@code 0x} [HexDigits] {@code .} HexDigits + *
{@code 0X} [HexDigits] {@code .} HexDigits + *
+ * + *
+ *
HexSignificand: + *
HexNumeral + *
HexNumeral {@code .} + *
{@code 0x} [HexDigits] {@code .} HexDigits + *
{@code 0X} [HexDigits] {@code .} HexDigits + *
+ * + *
+ *
BinaryExponent: + *
BinaryExponentIndicator SignedInteger + *
+ * + *
+ *
BinaryExponentIndicator: + *
{@code p} + *
{@code P} + *
+ * + *
+ *
DecimalFloatingPointLiteral: + *
Digits {@code .} [Digits] [ExponentPart] + *
{@code .} Digits [ExponentPart] + *
Digits ExponentPart + *
+ * + *
+ *
ExponentPart: + *
ExponentIndicator SignedInteger + *
+ * + *
+ *
ExponentIndicator: + *
(one of) + *
e E + *
+ * + *
+ *
SignedInteger: + *
[Sign] Digits + *
+ * + *
+ *
Sign: + *
(one of) + *
+ - + *
+ * + *
+ *
Digits: + *
Digit {Digit} + *
+ * + *
+ *
HexNumeral: + *
{@code 0} {@code x} HexDigits + *
{@code 0} {@code X} HexDigits + *
+ * + *
+ *
HexDigits: + *
HexDigit {HexDigit} + *
+ * + *
+ *
HexDigit: + *
(one of) + *
{@code 0 1 2 3 4 5 6 7 8 9 a b c d e f A B C D E F} + *
+ *
+ * + * @param str the string to be parsed, a byte array with characters + * in ISO-8859-1, ASCII or UTF-8 encoding + * @param off The index of the first byte to parse + * @param len The number of bytes to parse + * @return the parsed double value + * @throws NumberFormatException if the string can not be parsed + */ + public static boolean tryParseDouble(byte[] str, int off, int len, double[] result) { + final int endIndex = len + off; + + // Skip leading whitespace + // ------------------- + int index = skipWhitespace(str, off, endIndex); + if (index == endIndex) { + throw new NumberFormatException("empty String"); + } + byte ch = str[index]; + + // Parse optional sign + // ------------------- + final boolean isNegative = ch == '-'; + if (isNegative || ch == '+') { + ch = ++index < endIndex ? str[index] : 0; + if (ch == 0) { + return false; + } + } + + // Parse NaN or Infinity + // --------------------- + if (ch == 'N') { + return tryParseNaN(str, index, endIndex, off, result); + } else if (ch == 'I') { + return tryParseInfinity(str, index, endIndex, isNegative, off, result); + } + + // Parse optional leading zero + // --------------------------- + final boolean hasLeadingZero = ch == '0'; + if (hasLeadingZero) { + ch = ++index < endIndex ? str[index] : 0; + if (ch == 'x' || ch == 'X') { + return tryParseRestOfHexFloatingPointLiteral(str, index + 1, off, endIndex, isNegative, result); + } + } + + return tryParseRestOfDecimalFloatLiteral(str, index, off, endIndex, isNegative, hasLeadingZero, result); + } + + /** + * Tries to parse eight digits from a byte array provided in a long. + * + * @param value an array of 8 bytes in a long + * @return the parsed digits or -1 on failure + */ + private static int tryToParseEightDigits(long value) { + long val = value - 0x3030303030303030L; + long l = ((value + 0x4646464646464646L) | val) & + 0x8080808080808080L; + if (l != 0L) { + return -1; + } + + + long mask = 0x000000FF000000FFL; + long mul1 = 0x000F424000000064L; // 100 + (1000000ULL << 32) + long mul2 = 0x0000271000000001L; // 1 + (10000ULL << 32) + val = (val * 10) + (val >>> 8); // val = (val * 2561) >> 8; + val = (((val & mask) * mul1) + (((val >>> 16) & mask) * mul2)) >>> 32; + return (int) (val); + } + + private static boolean tryParseInfinity(byte[] str, int index, int endIndex, boolean negative, int off, double[] result) { + if (index + 7 < endIndex + // && str.charAt(index) == 'I' + && str[index + 1] == (byte) 'n' + && str[index + 2] == (byte) 'f' + && str[index + 3] == (byte) 'i' + && str[index + 4] == (byte) 'n' + && str[index + 5] == (byte) 'i' + && str[index + 6] == (byte) 't' + && str[index + 7] == (byte) 'y' + ) { + index = skipWhitespace(str, index + 8, endIndex); + if (index < endIndex) { + return false; + } + result[0] = negative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY; + return true; + } else { + return false; + } + } + + private static boolean tryParseNaN(byte[] str, int index, int endIndex, int off, double[] result) { + if (index + 2 < endIndex + // && str.charAt(index) == 'N' + && str[index + 1] == (byte) 'a' + && str[index + 2] == (byte) 'N') { + + index = skipWhitespace(str, index + 3, endIndex); + if (index < endIndex) { + return false; + } + + result[0] = Double.NaN; + return true; + } else { + return false; + } + } + + /** + * Parses the following rules + * (more rules are defined in {@link #parseDouble}): + *
+ *
RestOfDecimalFloatingPointLiteral: + *
[Digits] {@code .} [Digits] [ExponentPart] + *
{@code .} Digits [ExponentPart] + *
[Digits] ExponentPart + *
+ * + * @param str the input string + * @param index index to the first character of RestOfHexFloatingPointLiteral + * @param endIndex the end index of the string + * @param isNegative if the resulting number is negative + * @param hasLeadingZero if the digit '0' has been consumed + * @return a double representation + */ + private static boolean tryParseRestOfDecimalFloatLiteral(byte[] str, int index, int startIndex, int endIndex, boolean isNegative, boolean hasLeadingZero, double[] result) { + // Parse digits + // ------------ + // Note: a multiplication by a constant is cheaper than an + // arbitrary integer multiplication. + long digits = 0;// digits is treated as an unsigned long + int exponent = 0; + final int indexOfFirstDigit = index; + int virtualIndexOfPoint = -1; + final int digitCount; + byte ch = 0; + for (; index < endIndex; index++) { + ch = str[index]; + if (isDigit(ch)) { + // This might overflow, we deal with it later. + digits = 10 * digits + ch - '0'; + } else if (ch == '.') { + if (virtualIndexOfPoint != -1) { + return false; + } + virtualIndexOfPoint = index; + while (index < endIndex - 9) { + long val = (long) readLongFromByteArray.get(str, index + 1); + int parsed = tryToParseEightDigits(val); + if (parsed >= 0) { + // This might overflow, we deal with it later. + digits = digits * 100_000_000L + parsed; + index += 8; + } else { + break; + } + } + } else { + break; + } + } + final int indexAfterDigits = index; + if (virtualIndexOfPoint == -1) { + digitCount = indexAfterDigits - indexOfFirstDigit; + virtualIndexOfPoint = indexAfterDigits; + } else { + digitCount = indexAfterDigits - indexOfFirstDigit - 1; + exponent = virtualIndexOfPoint - index + 1; + } + + // Parse exponent number + // --------------------- + long exp_number = 0; + final boolean hasExponent = (ch == 'e') || (ch == 'E'); + if (hasExponent) { + ch = ++index < endIndex ? str[index] : 0; + boolean neg_exp = ch == '-'; + if (neg_exp || ch == '+') { + ch = ++index < endIndex ? str[index] : 0; + } + if (!isDigit(ch)) { + return false; + } + do { + // Guard against overflow of exp_number + if (exp_number < MINIMAL_EIGHT_DIGIT_INTEGER) { + exp_number = 10 * exp_number + ch - '0'; + } + ch = ++index < endIndex ? str[index] : 0; + } while (isDigit(ch)); + if (neg_exp) { + exp_number = -exp_number; + } + exponent += exp_number; + } + + // Skip trailing whitespace + // ------------------------ + index = skipWhitespace(str, index, endIndex); + if (index < endIndex + || !hasLeadingZero && digitCount == 0 && str[virtualIndexOfPoint] != '.') { + return false; + } + + // Re-parse digits in case of a potential overflow + // ----------------------------------------------- + final boolean isDigitsTruncated; + int skipCountInTruncatedDigits = 0;//counts +1 if we skipped over the decimal point + if (digitCount > 19) { + digits = 0; + for (index = indexOfFirstDigit; index < indexAfterDigits; index++) { + ch = str[index]; + if (ch == '.') { + skipCountInTruncatedDigits++; + } else { + if (Long.compareUnsigned(digits, MINIMAL_NINETEEN_DIGIT_INTEGER) < 0) { + digits = 10 * digits + ch - '0'; + } else { + break; + } + } + } + isDigitsTruncated = index < indexAfterDigits; + } else { + isDigitsTruncated = false; + } + + result[0] = FastDoubleMath.decFloatLiteralToDouble(index, isNegative, digits, exponent, virtualIndexOfPoint, exp_number, isDigitsTruncated, skipCountInTruncatedDigits); + if (!Double.isNaN(result[0])) { + return true; + } + return tryParseRestOfDecimalFloatLiteralTheHardWay(str, startIndex, endIndex - startIndex, result); + } + + /** + * Parses the following rules + * (more rules are defined in {@link #parseDouble}): + *
+ *
RestOfDecimalFloatingPointLiteral: + *
[Digits] {@code .} [Digits] [ExponentPart] + *
{@code .} Digits [ExponentPart] + *
[Digits] ExponentPart + *
+ * @param str the input string + */ + private static boolean tryParseRestOfDecimalFloatLiteralTheHardWay(byte[] str, int off, int len, double[] result) { + try { + result[0] = Double.parseDouble(new String(str, off, len, StandardCharsets.ISO_8859_1)); + return true; + } catch (NumberFormatException nfe) { + return false; + } + } + + /** + * Parses the following rules + * (more rules are defined in {@link #parseDouble}): + *
+ *
RestOfHexFloatingPointLiteral: + *
RestOfHexSignificand BinaryExponent + *
+ * + *
+ *
RestOfHexSignificand: + *
HexDigits + *
HexDigits {@code .} + *
[HexDigits] {@code .} HexDigits + *
+ * + * @param str the input string + * @param index index to the first character of RestOfHexFloatingPointLiteral + * @param startIndex the start index of the string + * @param endIndex the end index of the string + * @param isNegative if the resulting number is negative + * @return a double representation + */ + private static boolean tryParseRestOfHexFloatingPointLiteral( + byte[] str, int index, int startIndex, int endIndex, boolean isNegative, double[] result) { + if (index >= endIndex) { + return false; + } + + // Parse digits + // ------------ + long digits = 0;// digits is treated as an unsigned long + int exponent = 0; + final int indexOfFirstDigit = index; + int virtualIndexOfPoint = -1; + final int digitCount; + byte ch = 0; + for (; index < endIndex; index++) { + ch = str[index]; + // Table look up is faster than a sequence of if-else-branches. + int hexValue = ch < 0 ? OTHER_CLASS : CHAR_TO_HEX_MAP[ch]; + if (hexValue >= 0) { + digits = (digits << 4) | hexValue;// This might overflow, we deal with it later. + } else if (hexValue == DECIMAL_POINT_CLASS) { + if (virtualIndexOfPoint != -1) { + return false; + } + virtualIndexOfPoint = index; + } else { + break; + } + } + final int indexAfterDigits = index; + if (virtualIndexOfPoint == -1) { + digitCount = indexAfterDigits - indexOfFirstDigit; + virtualIndexOfPoint = indexAfterDigits; + } else { + digitCount = indexAfterDigits - indexOfFirstDigit - 1; + exponent = Math.min(virtualIndexOfPoint - index + 1, MINIMAL_EIGHT_DIGIT_INTEGER) * 4; + } + + // Parse exponent number + // --------------------- + long exp_number = 0; + final boolean hasExponent = (ch == 'p') || (ch == 'P'); + if (hasExponent) { + ch = ++index < endIndex ? str[index] : 0; + boolean neg_exp = ch == '-'; + if (neg_exp || ch == '+') { + ch = ++index < endIndex ? str[index] : 0; + } + if (!isDigit(ch)) { + return false; + } + do { + // Guard against overflow of exp_number + if (exp_number < MINIMAL_EIGHT_DIGIT_INTEGER) { + exp_number = 10 * exp_number + ch - '0'; + } + ch = ++index < endIndex ? str[index] : 0; + } while (isDigit(ch)); + if (neg_exp) { + exp_number = -exp_number; + } + exponent += exp_number; + } + + // Skip trailing whitespace + // ------------------------ + index = skipWhitespace(str, index, endIndex); + if (index < endIndex + || digitCount == 0 && str[virtualIndexOfPoint] != '.' + || !hasExponent) { + return false; + } + + // Re-parse digits in case of a potential overflow + // ----------------------------------------------- + final boolean isDigitsTruncated; + int skipCountInTruncatedDigits = 0;//counts +1 if we skipped over the decimal point + if (digitCount > 16) { + digits = 0; + for (index = indexOfFirstDigit; index < indexAfterDigits; index++) { + ch = str[index]; + // Table look up is faster than a sequence of if-else-branches. + int hexValue = ch < 0 ? OTHER_CLASS : CHAR_TO_HEX_MAP[ch]; + if (hexValue >= 0) { + if (Long.compareUnsigned(digits, MINIMAL_NINETEEN_DIGIT_INTEGER) < 0) { + digits = (digits << 4) | hexValue; + } else { + break; + } + } else { + skipCountInTruncatedDigits++; + } + } + isDigitsTruncated = (index < indexAfterDigits); + } else { + isDigitsTruncated = false; + } + + result[0] = FastDoubleMath.hexFloatLiteralToDouble(index, isNegative, digits, exponent, virtualIndexOfPoint, exp_number, isDigitsTruncated, skipCountInTruncatedDigits); + if (!Double.isNaN(result[0])) { + return true; + } + return tryParseRestOfDecimalFloatLiteralTheHardWay(str, startIndex, endIndex - startIndex, result); + } + + private static int skipWhitespace(byte[] str, int startIndex, int endIndex) { + int index = startIndex; + for (; index < endIndex; index++) { + if ((str[index] & 0xff) > 0x20) { + break; + } + } + return index; + } + +} \ No newline at end of file diff --git a/extensions/csv/src/main/java/io/deephaven/csv/tokenization/external/FastDoubleMath.java b/extensions/csv/src/main/java/io/deephaven/csv/tokenization/external/FastDoubleMath.java new file mode 100644 index 00000000000..71a15af7d24 --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/tokenization/external/FastDoubleMath.java @@ -0,0 +1,1083 @@ +package io.deephaven.csv.tokenization.external; + +/* +* @(#)FastDoubleMath.java +* Copyright © 2021. Werner Randelshofer, Switzerland. MIT License. +*/ + +/* + * Modifications by kosak: change package name. + */ + +import java.util.Objects; + +/** + * This class provides the mathematical functions needed by {@link FastDoubleParserFromByteArray}. + *

+ * This is a C++ to Java port of Daniel Lemire's fast_double_parser. + *

+ * The code contains enhancements from Daniel Lemire's fast_float_parser, + * so that it can parse double Strings with very long sequences of numbers + *

+ * References: + *

+ *
Daniel Lemire, fast_double_parser, 4x faster than strtod. + * Apache License 2.0 or Boost Software License.
+ *
github.com
+ * + *
Daniel Lemire, fast_float number parsing library: 4x faster than strtod. + * Apache License 2.0.
+ *
github.com
+ * + *
Daniel Lemire, Number Parsing at a Gigabyte per Second, + * Software: Practice and Experience 51 (8), 2021. + * arXiv.2101.11408v3 [cs.DS] 24 Feb 2021
+ *
arxiv.org
+ *
+ *

+ */ +class FastDoubleMath { + /** + * The smallest non-zero float (binary64) is 2^−1074. + * We take as input numbers of the form w x 10^q where w < 2^64. + * We have that {@literal w * 10^-343 < 2^(64-344) 5^-343 < 2^-1076}. + *

+ * However, we have that + * {@literal (2^64-1) * 10^-342 = (2^64-1) * 2^-342 * 5^-342 > 2^−1074}. + * Thus it is possible for a number of the form w * 10^-342 where + * w is a 64-bit value to be a non-zero floating-point number. + *

+ * ******** + *

+ * If we are solely interested in the *normal* numbers then the + * smallest value is 2^-1022. We can generate a value larger + * than 2^-1022 with expressions of the form w * 10^-326. + * Thus we need to pick FASTFLOAT_SMALLEST_POWER >= -326. + *

+ * ******** + *

+ * Any number of form w * 10^309 where w>= 1 is going to be + * infinite in binary64 so we never need to worry about powers + * of 5 greater than 308. + */ + private final static int FASTFLOAT_DEC_SMALLEST_POWER = -325; + private final static int FASTFLOAT_DEC_LARGEST_POWER = 308; + private final static int FASTFLOAT_HEX_SMALLEST_POWER = Double.MIN_EXPONENT; + private final static int FASTFLOAT_HEX_LARGEST_POWER = Double.MAX_EXPONENT; + /** + * Precomputed powers of ten from 10^0 to 10^22. These + * can be represented exactly using the double type. + */ + private static final double[] powerOfTen = { + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, + 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22}; + /** + * When mapping numbers from decimal to binary, + * we go from w * 10^q to m * 2^p but we have + * 10^q = 5^q * 2^q, so effectively + * we are trying to match + * w * 2^q * 5^q to m * 2^p. Thus the powers of two + * are not a concern since they can be represented + * exactly using the binary notation, only the powers of five + * affect the binary significand. + *

+ *

+ * The mantissas of powers of ten from -308 to 308, extended out to sixty four + * bits. The array contains the powers of ten approximated + * as a 64-bit mantissa. It goes from 10^FASTFLOAT_SMALLEST_POWER to + * 10^FASTFLOAT_LARGEST_POWER (inclusively). + * The mantissa is truncated, and + * never rounded up. Uses about 5KB. + */ + private static final long[] MANTISSA_64 = { + 0xa5ced43b7e3e9188L, 0xcf42894a5dce35eaL, + 0x818995ce7aa0e1b2L, 0xa1ebfb4219491a1fL, + 0xca66fa129f9b60a6L, 0xfd00b897478238d0L, + 0x9e20735e8cb16382L, 0xc5a890362fddbc62L, + 0xf712b443bbd52b7bL, 0x9a6bb0aa55653b2dL, + 0xc1069cd4eabe89f8L, 0xf148440a256e2c76L, + 0x96cd2a865764dbcaL, 0xbc807527ed3e12bcL, + 0xeba09271e88d976bL, 0x93445b8731587ea3L, + 0xb8157268fdae9e4cL, 0xe61acf033d1a45dfL, + 0x8fd0c16206306babL, 0xb3c4f1ba87bc8696L, + 0xe0b62e2929aba83cL, 0x8c71dcd9ba0b4925L, + 0xaf8e5410288e1b6fL, 0xdb71e91432b1a24aL, + 0x892731ac9faf056eL, 0xab70fe17c79ac6caL, + 0xd64d3d9db981787dL, 0x85f0468293f0eb4eL, + 0xa76c582338ed2621L, 0xd1476e2c07286faaL, + 0x82cca4db847945caL, 0xa37fce126597973cL, + 0xcc5fc196fefd7d0cL, 0xff77b1fcbebcdc4fL, + 0x9faacf3df73609b1L, 0xc795830d75038c1dL, + 0xf97ae3d0d2446f25L, 0x9becce62836ac577L, + 0xc2e801fb244576d5L, 0xf3a20279ed56d48aL, + 0x9845418c345644d6L, 0xbe5691ef416bd60cL, + 0xedec366b11c6cb8fL, 0x94b3a202eb1c3f39L, + 0xb9e08a83a5e34f07L, 0xe858ad248f5c22c9L, + 0x91376c36d99995beL, 0xb58547448ffffb2dL, + 0xe2e69915b3fff9f9L, 0x8dd01fad907ffc3bL, + 0xb1442798f49ffb4aL, 0xdd95317f31c7fa1dL, + 0x8a7d3eef7f1cfc52L, 0xad1c8eab5ee43b66L, + 0xd863b256369d4a40L, 0x873e4f75e2224e68L, + 0xa90de3535aaae202L, 0xd3515c2831559a83L, + 0x8412d9991ed58091L, 0xa5178fff668ae0b6L, + 0xce5d73ff402d98e3L, 0x80fa687f881c7f8eL, + 0xa139029f6a239f72L, 0xc987434744ac874eL, + 0xfbe9141915d7a922L, 0x9d71ac8fada6c9b5L, + 0xc4ce17b399107c22L, 0xf6019da07f549b2bL, + 0x99c102844f94e0fbL, 0xc0314325637a1939L, + 0xf03d93eebc589f88L, 0x96267c7535b763b5L, + 0xbbb01b9283253ca2L, 0xea9c227723ee8bcbL, + 0x92a1958a7675175fL, 0xb749faed14125d36L, + 0xe51c79a85916f484L, 0x8f31cc0937ae58d2L, + 0xb2fe3f0b8599ef07L, 0xdfbdcece67006ac9L, + 0x8bd6a141006042bdL, 0xaecc49914078536dL, + 0xda7f5bf590966848L, 0x888f99797a5e012dL, + 0xaab37fd7d8f58178L, 0xd5605fcdcf32e1d6L, + 0x855c3be0a17fcd26L, 0xa6b34ad8c9dfc06fL, + 0xd0601d8efc57b08bL, 0x823c12795db6ce57L, + 0xa2cb1717b52481edL, 0xcb7ddcdda26da268L, + 0xfe5d54150b090b02L, 0x9efa548d26e5a6e1L, + 0xc6b8e9b0709f109aL, 0xf867241c8cc6d4c0L, + 0x9b407691d7fc44f8L, 0xc21094364dfb5636L, + 0xf294b943e17a2bc4L, 0x979cf3ca6cec5b5aL, + 0xbd8430bd08277231L, 0xece53cec4a314ebdL, + 0x940f4613ae5ed136L, 0xb913179899f68584L, + 0xe757dd7ec07426e5L, 0x9096ea6f3848984fL, + 0xb4bca50b065abe63L, 0xe1ebce4dc7f16dfbL, + 0x8d3360f09cf6e4bdL, 0xb080392cc4349decL, + 0xdca04777f541c567L, 0x89e42caaf9491b60L, + 0xac5d37d5b79b6239L, 0xd77485cb25823ac7L, + 0x86a8d39ef77164bcL, 0xa8530886b54dbdebL, + 0xd267caa862a12d66L, 0x8380dea93da4bc60L, + 0xa46116538d0deb78L, 0xcd795be870516656L, + 0x806bd9714632dff6L, 0xa086cfcd97bf97f3L, + 0xc8a883c0fdaf7df0L, 0xfad2a4b13d1b5d6cL, + 0x9cc3a6eec6311a63L, 0xc3f490aa77bd60fcL, + 0xf4f1b4d515acb93bL, 0x991711052d8bf3c5L, + 0xbf5cd54678eef0b6L, 0xef340a98172aace4L, + 0x9580869f0e7aac0eL, 0xbae0a846d2195712L, + 0xe998d258869facd7L, 0x91ff83775423cc06L, + 0xb67f6455292cbf08L, 0xe41f3d6a7377eecaL, + 0x8e938662882af53eL, 0xb23867fb2a35b28dL, + 0xdec681f9f4c31f31L, 0x8b3c113c38f9f37eL, + 0xae0b158b4738705eL, 0xd98ddaee19068c76L, + 0x87f8a8d4cfa417c9L, 0xa9f6d30a038d1dbcL, + 0xd47487cc8470652bL, 0x84c8d4dfd2c63f3bL, + 0xa5fb0a17c777cf09L, 0xcf79cc9db955c2ccL, + 0x81ac1fe293d599bfL, 0xa21727db38cb002fL, + 0xca9cf1d206fdc03bL, 0xfd442e4688bd304aL, + 0x9e4a9cec15763e2eL, 0xc5dd44271ad3cdbaL, + 0xf7549530e188c128L, 0x9a94dd3e8cf578b9L, + 0xc13a148e3032d6e7L, 0xf18899b1bc3f8ca1L, + 0x96f5600f15a7b7e5L, 0xbcb2b812db11a5deL, + 0xebdf661791d60f56L, 0x936b9fcebb25c995L, + 0xb84687c269ef3bfbL, 0xe65829b3046b0afaL, + 0x8ff71a0fe2c2e6dcL, 0xb3f4e093db73a093L, + 0xe0f218b8d25088b8L, 0x8c974f7383725573L, + 0xafbd2350644eeacfL, 0xdbac6c247d62a583L, + 0x894bc396ce5da772L, 0xab9eb47c81f5114fL, + 0xd686619ba27255a2L, 0x8613fd0145877585L, + 0xa798fc4196e952e7L, 0xd17f3b51fca3a7a0L, + 0x82ef85133de648c4L, 0xa3ab66580d5fdaf5L, + 0xcc963fee10b7d1b3L, 0xffbbcfe994e5c61fL, + 0x9fd561f1fd0f9bd3L, 0xc7caba6e7c5382c8L, + 0xf9bd690a1b68637bL, 0x9c1661a651213e2dL, + 0xc31bfa0fe5698db8L, 0xf3e2f893dec3f126L, + 0x986ddb5c6b3a76b7L, 0xbe89523386091465L, + 0xee2ba6c0678b597fL, 0x94db483840b717efL, + 0xba121a4650e4ddebL, 0xe896a0d7e51e1566L, + 0x915e2486ef32cd60L, 0xb5b5ada8aaff80b8L, + 0xe3231912d5bf60e6L, 0x8df5efabc5979c8fL, + 0xb1736b96b6fd83b3L, 0xddd0467c64bce4a0L, + 0x8aa22c0dbef60ee4L, 0xad4ab7112eb3929dL, + 0xd89d64d57a607744L, 0x87625f056c7c4a8bL, + 0xa93af6c6c79b5d2dL, 0xd389b47879823479L, + 0x843610cb4bf160cbL, 0xa54394fe1eedb8feL, + 0xce947a3da6a9273eL, 0x811ccc668829b887L, + 0xa163ff802a3426a8L, 0xc9bcff6034c13052L, + 0xfc2c3f3841f17c67L, 0x9d9ba7832936edc0L, + 0xc5029163f384a931L, 0xf64335bcf065d37dL, + 0x99ea0196163fa42eL, 0xc06481fb9bcf8d39L, + 0xf07da27a82c37088L, 0x964e858c91ba2655L, + 0xbbe226efb628afeaL, 0xeadab0aba3b2dbe5L, + 0x92c8ae6b464fc96fL, 0xb77ada0617e3bbcbL, + 0xe55990879ddcaabdL, 0x8f57fa54c2a9eab6L, + 0xb32df8e9f3546564L, 0xdff9772470297ebdL, + 0x8bfbea76c619ef36L, 0xaefae51477a06b03L, + 0xdab99e59958885c4L, 0x88b402f7fd75539bL, + 0xaae103b5fcd2a881L, 0xd59944a37c0752a2L, + 0x857fcae62d8493a5L, 0xa6dfbd9fb8e5b88eL, + 0xd097ad07a71f26b2L, 0x825ecc24c873782fL, + 0xa2f67f2dfa90563bL, 0xcbb41ef979346bcaL, + 0xfea126b7d78186bcL, 0x9f24b832e6b0f436L, + 0xc6ede63fa05d3143L, 0xf8a95fcf88747d94L, + 0x9b69dbe1b548ce7cL, 0xc24452da229b021bL, + 0xf2d56790ab41c2a2L, 0x97c560ba6b0919a5L, + 0xbdb6b8e905cb600fL, 0xed246723473e3813L, + 0x9436c0760c86e30bL, 0xb94470938fa89bceL, + 0xe7958cb87392c2c2L, 0x90bd77f3483bb9b9L, + 0xb4ecd5f01a4aa828L, 0xe2280b6c20dd5232L, + 0x8d590723948a535fL, 0xb0af48ec79ace837L, + 0xdcdb1b2798182244L, 0x8a08f0f8bf0f156bL, + 0xac8b2d36eed2dac5L, 0xd7adf884aa879177L, + 0x86ccbb52ea94baeaL, 0xa87fea27a539e9a5L, + 0xd29fe4b18e88640eL, 0x83a3eeeef9153e89L, + 0xa48ceaaab75a8e2bL, 0xcdb02555653131b6L, + 0x808e17555f3ebf11L, 0xa0b19d2ab70e6ed6L, + 0xc8de047564d20a8bL, 0xfb158592be068d2eL, + 0x9ced737bb6c4183dL, 0xc428d05aa4751e4cL, + 0xf53304714d9265dfL, 0x993fe2c6d07b7fabL, + 0xbf8fdb78849a5f96L, 0xef73d256a5c0f77cL, + 0x95a8637627989aadL, 0xbb127c53b17ec159L, + 0xe9d71b689dde71afL, 0x9226712162ab070dL, + 0xb6b00d69bb55c8d1L, 0xe45c10c42a2b3b05L, + 0x8eb98a7a9a5b04e3L, 0xb267ed1940f1c61cL, + 0xdf01e85f912e37a3L, 0x8b61313bbabce2c6L, + 0xae397d8aa96c1b77L, 0xd9c7dced53c72255L, + 0x881cea14545c7575L, 0xaa242499697392d2L, + 0xd4ad2dbfc3d07787L, 0x84ec3c97da624ab4L, + 0xa6274bbdd0fadd61L, 0xcfb11ead453994baL, + 0x81ceb32c4b43fcf4L, 0xa2425ff75e14fc31L, + 0xcad2f7f5359a3b3eL, 0xfd87b5f28300ca0dL, + 0x9e74d1b791e07e48L, 0xc612062576589ddaL, + 0xf79687aed3eec551L, 0x9abe14cd44753b52L, + 0xc16d9a0095928a27L, 0xf1c90080baf72cb1L, + 0x971da05074da7beeL, 0xbce5086492111aeaL, + 0xec1e4a7db69561a5L, 0x9392ee8e921d5d07L, + 0xb877aa3236a4b449L, 0xe69594bec44de15bL, + 0x901d7cf73ab0acd9L, 0xb424dc35095cd80fL, + 0xe12e13424bb40e13L, 0x8cbccc096f5088cbL, + 0xafebff0bcb24aafeL, 0xdbe6fecebdedd5beL, + 0x89705f4136b4a597L, 0xabcc77118461cefcL, + 0xd6bf94d5e57a42bcL, 0x8637bd05af6c69b5L, + 0xa7c5ac471b478423L, 0xd1b71758e219652bL, + 0x83126e978d4fdf3bL, 0xa3d70a3d70a3d70aL, + 0xccccccccccccccccL, 0x8000000000000000L, + 0xa000000000000000L, 0xc800000000000000L, + 0xfa00000000000000L, 0x9c40000000000000L, + 0xc350000000000000L, 0xf424000000000000L, + 0x9896800000000000L, 0xbebc200000000000L, + 0xee6b280000000000L, 0x9502f90000000000L, + 0xba43b74000000000L, 0xe8d4a51000000000L, + 0x9184e72a00000000L, 0xb5e620f480000000L, + 0xe35fa931a0000000L, 0x8e1bc9bf04000000L, + 0xb1a2bc2ec5000000L, 0xde0b6b3a76400000L, + 0x8ac7230489e80000L, 0xad78ebc5ac620000L, + 0xd8d726b7177a8000L, 0x878678326eac9000L, + 0xa968163f0a57b400L, 0xd3c21bcecceda100L, + 0x84595161401484a0L, 0xa56fa5b99019a5c8L, + 0xcecb8f27f4200f3aL, 0x813f3978f8940984L, + 0xa18f07d736b90be5L, 0xc9f2c9cd04674edeL, + 0xfc6f7c4045812296L, 0x9dc5ada82b70b59dL, + 0xc5371912364ce305L, 0xf684df56c3e01bc6L, + 0x9a130b963a6c115cL, 0xc097ce7bc90715b3L, + 0xf0bdc21abb48db20L, 0x96769950b50d88f4L, + 0xbc143fa4e250eb31L, 0xeb194f8e1ae525fdL, + 0x92efd1b8d0cf37beL, 0xb7abc627050305adL, + 0xe596b7b0c643c719L, 0x8f7e32ce7bea5c6fL, + 0xb35dbf821ae4f38bL, 0xe0352f62a19e306eL, + 0x8c213d9da502de45L, 0xaf298d050e4395d6L, + 0xdaf3f04651d47b4cL, 0x88d8762bf324cd0fL, + 0xab0e93b6efee0053L, 0xd5d238a4abe98068L, + 0x85a36366eb71f041L, 0xa70c3c40a64e6c51L, + 0xd0cf4b50cfe20765L, 0x82818f1281ed449fL, + 0xa321f2d7226895c7L, 0xcbea6f8ceb02bb39L, + 0xfee50b7025c36a08L, 0x9f4f2726179a2245L, + 0xc722f0ef9d80aad6L, 0xf8ebad2b84e0d58bL, + 0x9b934c3b330c8577L, 0xc2781f49ffcfa6d5L, + 0xf316271c7fc3908aL, 0x97edd871cfda3a56L, + 0xbde94e8e43d0c8ecL, 0xed63a231d4c4fb27L, + 0x945e455f24fb1cf8L, 0xb975d6b6ee39e436L, + 0xe7d34c64a9c85d44L, 0x90e40fbeea1d3a4aL, + 0xb51d13aea4a488ddL, 0xe264589a4dcdab14L, + 0x8d7eb76070a08aecL, 0xb0de65388cc8ada8L, + 0xdd15fe86affad912L, 0x8a2dbf142dfcc7abL, + 0xacb92ed9397bf996L, 0xd7e77a8f87daf7fbL, + 0x86f0ac99b4e8dafdL, 0xa8acd7c0222311bcL, + 0xd2d80db02aabd62bL, 0x83c7088e1aab65dbL, + 0xa4b8cab1a1563f52L, 0xcde6fd5e09abcf26L, + 0x80b05e5ac60b6178L, 0xa0dc75f1778e39d6L, + 0xc913936dd571c84cL, 0xfb5878494ace3a5fL, + 0x9d174b2dcec0e47bL, 0xc45d1df942711d9aL, + 0xf5746577930d6500L, 0x9968bf6abbe85f20L, + 0xbfc2ef456ae276e8L, 0xefb3ab16c59b14a2L, + 0x95d04aee3b80ece5L, 0xbb445da9ca61281fL, + 0xea1575143cf97226L, 0x924d692ca61be758L, + 0xb6e0c377cfa2e12eL, 0xe498f455c38b997aL, + 0x8edf98b59a373fecL, 0xb2977ee300c50fe7L, + 0xdf3d5e9bc0f653e1L, 0x8b865b215899f46cL, + 0xae67f1e9aec07187L, 0xda01ee641a708de9L, + 0x884134fe908658b2L, 0xaa51823e34a7eedeL, + 0xd4e5e2cdc1d1ea96L, 0x850fadc09923329eL, + 0xa6539930bf6bff45L, 0xcfe87f7cef46ff16L, + 0x81f14fae158c5f6eL, 0xa26da3999aef7749L, + 0xcb090c8001ab551cL, 0xfdcb4fa002162a63L, + 0x9e9f11c4014dda7eL, 0xc646d63501a1511dL, + 0xf7d88bc24209a565L, 0x9ae757596946075fL, + 0xc1a12d2fc3978937L, 0xf209787bb47d6b84L, + 0x9745eb4d50ce6332L, 0xbd176620a501fbffL, + 0xec5d3fa8ce427affL, 0x93ba47c980e98cdfL, + 0xb8a8d9bbe123f017L, 0xe6d3102ad96cec1dL, + 0x9043ea1ac7e41392L, 0xb454e4a179dd1877L, + 0xe16a1dc9d8545e94L, 0x8ce2529e2734bb1dL, + 0xb01ae745b101e9e4L, 0xdc21a1171d42645dL, + 0x899504ae72497ebaL, 0xabfa45da0edbde69L, + 0xd6f8d7509292d603L, 0x865b86925b9bc5c2L, + 0xa7f26836f282b732L, 0xd1ef0244af2364ffL, + 0x8335616aed761f1fL, 0xa402b9c5a8d3a6e7L, + 0xcd036837130890a1L, 0x802221226be55a64L, + 0xa02aa96b06deb0fdL, 0xc83553c5c8965d3dL, + 0xfa42a8b73abbf48cL, 0x9c69a97284b578d7L, + 0xc38413cf25e2d70dL, 0xf46518c2ef5b8cd1L, + 0x98bf2f79d5993802L, 0xbeeefb584aff8603L, + 0xeeaaba2e5dbf6784L, 0x952ab45cfa97a0b2L, + 0xba756174393d88dfL, 0xe912b9d1478ceb17L, + 0x91abb422ccb812eeL, 0xb616a12b7fe617aaL, + 0xe39c49765fdf9d94L, 0x8e41ade9fbebc27dL, + 0xb1d219647ae6b31cL, 0xde469fbd99a05fe3L, + 0x8aec23d680043beeL, 0xada72ccc20054ae9L, + 0xd910f7ff28069da4L, 0x87aa9aff79042286L, + 0xa99541bf57452b28L, 0xd3fa922f2d1675f2L, + 0x847c9b5d7c2e09b7L, 0xa59bc234db398c25L, + 0xcf02b2c21207ef2eL, 0x8161afb94b44f57dL, + 0xa1ba1ba79e1632dcL, 0xca28a291859bbf93L, + 0xfcb2cb35e702af78L, 0x9defbf01b061adabL, + 0xc56baec21c7a1916L, 0xf6c69a72a3989f5bL, + 0x9a3c2087a63f6399L, 0xc0cb28a98fcf3c7fL, + 0xf0fdf2d3f3c30b9fL, 0x969eb7c47859e743L, + 0xbc4665b596706114L, 0xeb57ff22fc0c7959L, + 0x9316ff75dd87cbd8L, 0xb7dcbf5354e9beceL, + 0xe5d3ef282a242e81L, 0x8fa475791a569d10L, + 0xb38d92d760ec4455L, 0xe070f78d3927556aL, + 0x8c469ab843b89562L, 0xaf58416654a6babbL, + 0xdb2e51bfe9d0696aL, 0x88fcf317f22241e2L, + 0xab3c2fddeeaad25aL, 0xd60b3bd56a5586f1L, + 0x85c7056562757456L, 0xa738c6bebb12d16cL, + 0xd106f86e69d785c7L, 0x82a45b450226b39cL, + 0xa34d721642b06084L, 0xcc20ce9bd35c78a5L, + 0xff290242c83396ceL, 0x9f79a169bd203e41L, + 0xc75809c42c684dd1L, 0xf92e0c3537826145L, + 0x9bbcc7a142b17ccbL, 0xc2abf989935ddbfeL, + 0xf356f7ebf83552feL, 0x98165af37b2153deL, + 0xbe1bf1b059e9a8d6L, 0xeda2ee1c7064130cL, + 0x9485d4d1c63e8be7L, 0xb9a74a0637ce2ee1L, + 0xe8111c87c5c1ba99L, 0x910ab1d4db9914a0L, + 0xb54d5e4a127f59c8L, 0xe2a0b5dc971f303aL, + 0x8da471a9de737e24L, 0xb10d8e1456105dadL, + 0xdd50f1996b947518L, 0x8a5296ffe33cc92fL, + 0xace73cbfdc0bfb7bL, 0xd8210befd30efa5aL, + 0x8714a775e3e95c78L, 0xa8d9d1535ce3b396L, + 0xd31045a8341ca07cL, 0x83ea2b892091e44dL, + 0xa4e4b66b68b65d60L, 0xce1de40642e3f4b9L, + 0x80d2ae83e9ce78f3L, 0xa1075a24e4421730L, + 0xc94930ae1d529cfcL, 0xfb9b7cd9a4a7443cL, + 0x9d412e0806e88aa5L, 0xc491798a08a2ad4eL, + 0xf5b5d7ec8acb58a2L, 0x9991a6f3d6bf1765L, + 0xbff610b0cc6edd3fL, 0xeff394dcff8a948eL, + 0x95f83d0a1fb69cd9L, 0xbb764c4ca7a4440fL, + 0xea53df5fd18d5513L, 0x92746b9be2f8552cL, + 0xb7118682dbb66a77L, 0xe4d5e82392a40515L, + 0x8f05b1163ba6832dL, 0xb2c71d5bca9023f8L, + 0xdf78e4b2bd342cf6L, 0x8bab8eefb6409c1aL, + 0xae9672aba3d0c320L, 0xda3c0f568cc4f3e8L, + 0x8865899617fb1871L, 0xaa7eebfb9df9de8dL, + 0xd51ea6fa85785631L, 0x8533285c936b35deL, + 0xa67ff273b8460356L, 0xd01fef10a657842cL, + 0x8213f56a67f6b29bL, 0xa298f2c501f45f42L, + 0xcb3f2f7642717713L, 0xfe0efb53d30dd4d7L, + 0x9ec95d1463e8a506L, 0xc67bb4597ce2ce48L, + 0xf81aa16fdc1b81daL, 0x9b10a4e5e9913128L, + 0xc1d4ce1f63f57d72L, 0xf24a01a73cf2dccfL, + 0x976e41088617ca01L, 0xbd49d14aa79dbc82L, + 0xec9c459d51852ba2L, 0x93e1ab8252f33b45L, + 0xb8da1662e7b00a17L, 0xe7109bfba19c0c9dL, + 0x906a617d450187e2L, 0xb484f9dc9641e9daL, + 0xe1a63853bbd26451L, 0x8d07e33455637eb2L, + 0xb049dc016abc5e5fL, 0xdc5c5301c56b75f7L, + 0x89b9b3e11b6329baL, 0xac2820d9623bf429L, + 0xd732290fbacaf133L, 0x867f59a9d4bed6c0L, + 0xa81f301449ee8c70L, 0xd226fc195c6a2f8cL, + 0x83585d8fd9c25db7L, 0xa42e74f3d032f525L, + 0xcd3a1230c43fb26fL, 0x80444b5e7aa7cf85L, + 0xa0555e361951c366L, 0xc86ab5c39fa63440L, + 0xfa856334878fc150L, 0x9c935e00d4b9d8d2L, + 0xc3b8358109e84f07L, 0xf4a642e14c6262c8L, + 0x98e7e9cccfbd7dbdL, 0xbf21e44003acdd2cL, + 0xeeea5d5004981478L, 0x95527a5202df0ccbL, + 0xbaa718e68396cffdL, 0xe950df20247c83fdL, + 0x91d28b7416cdd27eL, 0xb6472e511c81471dL, + 0xe3d8f9e563a198e5L, 0x8e679c2f5e44ff8fL}; + /** + * A complement to mantissa_64 + * complete to a 128-bit mantissa. + * Uses about 5KB but is rarely accessed. + */ + private final static long[] MANTISSA_128 = { + 0x419ea3bd35385e2dL, 0x52064cac828675b9L, + 0x7343efebd1940993L, 0x1014ebe6c5f90bf8L, + 0xd41a26e077774ef6L, 0x8920b098955522b4L, + 0x55b46e5f5d5535b0L, 0xeb2189f734aa831dL, + 0xa5e9ec7501d523e4L, 0x47b233c92125366eL, + 0x999ec0bb696e840aL, 0xc00670ea43ca250dL, + 0x380406926a5e5728L, 0xc605083704f5ecf2L, + 0xf7864a44c633682eL, 0x7ab3ee6afbe0211dL, + 0x5960ea05bad82964L, 0x6fb92487298e33bdL, + 0xa5d3b6d479f8e056L, 0x8f48a4899877186cL, + 0x331acdabfe94de87L, 0x9ff0c08b7f1d0b14L, + 0x7ecf0ae5ee44dd9L, 0xc9e82cd9f69d6150L, + 0xbe311c083a225cd2L, 0x6dbd630a48aaf406L, + 0x92cbbccdad5b108L, 0x25bbf56008c58ea5L, + 0xaf2af2b80af6f24eL, 0x1af5af660db4aee1L, + 0x50d98d9fc890ed4dL, 0xe50ff107bab528a0L, + 0x1e53ed49a96272c8L, 0x25e8e89c13bb0f7aL, + 0x77b191618c54e9acL, 0xd59df5b9ef6a2417L, + 0x4b0573286b44ad1dL, 0x4ee367f9430aec32L, + 0x229c41f793cda73fL, 0x6b43527578c1110fL, + 0x830a13896b78aaa9L, 0x23cc986bc656d553L, + 0x2cbfbe86b7ec8aa8L, 0x7bf7d71432f3d6a9L, + 0xdaf5ccd93fb0cc53L, 0xd1b3400f8f9cff68L, + 0x23100809b9c21fa1L, 0xabd40a0c2832a78aL, + 0x16c90c8f323f516cL, 0xae3da7d97f6792e3L, + 0x99cd11cfdf41779cL, 0x40405643d711d583L, + 0x482835ea666b2572L, 0xda3243650005eecfL, + 0x90bed43e40076a82L, 0x5a7744a6e804a291L, + 0x711515d0a205cb36L, 0xd5a5b44ca873e03L, + 0xe858790afe9486c2L, 0x626e974dbe39a872L, + 0xfb0a3d212dc8128fL, 0x7ce66634bc9d0b99L, + 0x1c1fffc1ebc44e80L, 0xa327ffb266b56220L, + 0x4bf1ff9f0062baa8L, 0x6f773fc3603db4a9L, + 0xcb550fb4384d21d3L, 0x7e2a53a146606a48L, + 0x2eda7444cbfc426dL, 0xfa911155fefb5308L, + 0x793555ab7eba27caL, 0x4bc1558b2f3458deL, + 0x9eb1aaedfb016f16L, 0x465e15a979c1cadcL, + 0xbfacd89ec191ec9L, 0xcef980ec671f667bL, + 0x82b7e12780e7401aL, 0xd1b2ecb8b0908810L, + 0x861fa7e6dcb4aa15L, 0x67a791e093e1d49aL, + 0xe0c8bb2c5c6d24e0L, 0x58fae9f773886e18L, + 0xaf39a475506a899eL, 0x6d8406c952429603L, + 0xc8e5087ba6d33b83L, 0xfb1e4a9a90880a64L, + 0x5cf2eea09a55067fL, 0xf42faa48c0ea481eL, + 0xf13b94daf124da26L, 0x76c53d08d6b70858L, + 0x54768c4b0c64ca6eL, 0xa9942f5dcf7dfd09L, + 0xd3f93b35435d7c4cL, 0xc47bc5014a1a6dafL, + 0x359ab6419ca1091bL, 0xc30163d203c94b62L, + 0x79e0de63425dcf1dL, 0x985915fc12f542e4L, + 0x3e6f5b7b17b2939dL, 0xa705992ceecf9c42L, + 0x50c6ff782a838353L, 0xa4f8bf5635246428L, + 0x871b7795e136be99L, 0x28e2557b59846e3fL, + 0x331aeada2fe589cfL, 0x3ff0d2c85def7621L, + 0xfed077a756b53a9L, 0xd3e8495912c62894L, + 0x64712dd7abbbd95cL, 0xbd8d794d96aacfb3L, + 0xecf0d7a0fc5583a0L, 0xf41686c49db57244L, + 0x311c2875c522ced5L, 0x7d633293366b828bL, + 0xae5dff9c02033197L, 0xd9f57f830283fdfcL, + 0xd072df63c324fd7bL, 0x4247cb9e59f71e6dL, + 0x52d9be85f074e608L, 0x67902e276c921f8bL, + 0xba1cd8a3db53b6L, 0x80e8a40eccd228a4L, + 0x6122cd128006b2cdL, 0x796b805720085f81L, + 0xcbe3303674053bb0L, 0xbedbfc4411068a9cL, + 0xee92fb5515482d44L, 0x751bdd152d4d1c4aL, + 0xd262d45a78a0635dL, 0x86fb897116c87c34L, + 0xd45d35e6ae3d4da0L, 0x8974836059cca109L, + 0x2bd1a438703fc94bL, 0x7b6306a34627ddcfL, + 0x1a3bc84c17b1d542L, 0x20caba5f1d9e4a93L, + 0x547eb47b7282ee9cL, 0xe99e619a4f23aa43L, + 0x6405fa00e2ec94d4L, 0xde83bc408dd3dd04L, + 0x9624ab50b148d445L, 0x3badd624dd9b0957L, + 0xe54ca5d70a80e5d6L, 0x5e9fcf4ccd211f4cL, + 0x7647c3200069671fL, 0x29ecd9f40041e073L, + 0xf468107100525890L, 0x7182148d4066eeb4L, + 0xc6f14cd848405530L, 0xb8ada00e5a506a7cL, + 0xa6d90811f0e4851cL, 0x908f4a166d1da663L, + 0x9a598e4e043287feL, 0x40eff1e1853f29fdL, + 0xd12bee59e68ef47cL, 0x82bb74f8301958ceL, + 0xe36a52363c1faf01L, 0xdc44e6c3cb279ac1L, + 0x29ab103a5ef8c0b9L, 0x7415d448f6b6f0e7L, + 0x111b495b3464ad21L, 0xcab10dd900beec34L, + 0x3d5d514f40eea742L, 0xcb4a5a3112a5112L, + 0x47f0e785eaba72abL, 0x59ed216765690f56L, + 0x306869c13ec3532cL, 0x1e414218c73a13fbL, + 0xe5d1929ef90898faL, 0xdf45f746b74abf39L, + 0x6b8bba8c328eb783L, 0x66ea92f3f326564L, + 0xc80a537b0efefebdL, 0xbd06742ce95f5f36L, + 0x2c48113823b73704L, 0xf75a15862ca504c5L, + 0x9a984d73dbe722fbL, 0xc13e60d0d2e0ebbaL, + 0x318df905079926a8L, 0xfdf17746497f7052L, + 0xfeb6ea8bedefa633L, 0xfe64a52ee96b8fc0L, + 0x3dfdce7aa3c673b0L, 0x6bea10ca65c084eL, + 0x486e494fcff30a62L, 0x5a89dba3c3efccfaL, + 0xf89629465a75e01cL, 0xf6bbb397f1135823L, + 0x746aa07ded582e2cL, 0xa8c2a44eb4571cdcL, + 0x92f34d62616ce413L, 0x77b020baf9c81d17L, + 0xace1474dc1d122eL, 0xd819992132456baL, + 0x10e1fff697ed6c69L, 0xca8d3ffa1ef463c1L, + 0xbd308ff8a6b17cb2L, 0xac7cb3f6d05ddbdeL, + 0x6bcdf07a423aa96bL, 0x86c16c98d2c953c6L, + 0xe871c7bf077ba8b7L, 0x11471cd764ad4972L, + 0xd598e40d3dd89bcfL, 0x4aff1d108d4ec2c3L, + 0xcedf722a585139baL, 0xc2974eb4ee658828L, + 0x733d226229feea32L, 0x806357d5a3f525fL, + 0xca07c2dcb0cf26f7L, 0xfc89b393dd02f0b5L, + 0xbbac2078d443ace2L, 0xd54b944b84aa4c0dL, + 0xa9e795e65d4df11L, 0x4d4617b5ff4a16d5L, + 0x504bced1bf8e4e45L, 0xe45ec2862f71e1d6L, + 0x5d767327bb4e5a4cL, 0x3a6a07f8d510f86fL, + 0x890489f70a55368bL, 0x2b45ac74ccea842eL, + 0x3b0b8bc90012929dL, 0x9ce6ebb40173744L, + 0xcc420a6a101d0515L, 0x9fa946824a12232dL, + 0x47939822dc96abf9L, 0x59787e2b93bc56f7L, + 0x57eb4edb3c55b65aL, 0xede622920b6b23f1L, + 0xe95fab368e45ecedL, 0x11dbcb0218ebb414L, + 0xd652bdc29f26a119L, 0x4be76d3346f0495fL, + 0x6f70a4400c562ddbL, 0xcb4ccd500f6bb952L, + 0x7e2000a41346a7a7L, 0x8ed400668c0c28c8L, + 0x728900802f0f32faL, 0x4f2b40a03ad2ffb9L, + 0xe2f610c84987bfa8L, 0xdd9ca7d2df4d7c9L, + 0x91503d1c79720dbbL, 0x75a44c6397ce912aL, + 0xc986afbe3ee11abaL, 0xfbe85badce996168L, + 0xfae27299423fb9c3L, 0xdccd879fc967d41aL, + 0x5400e987bbc1c920L, 0x290123e9aab23b68L, + 0xf9a0b6720aaf6521L, 0xf808e40e8d5b3e69L, + 0xb60b1d1230b20e04L, 0xb1c6f22b5e6f48c2L, + 0x1e38aeb6360b1af3L, 0x25c6da63c38de1b0L, + 0x579c487e5a38ad0eL, 0x2d835a9df0c6d851L, + 0xf8e431456cf88e65L, 0x1b8e9ecb641b58ffL, + 0xe272467e3d222f3fL, 0x5b0ed81dcc6abb0fL, + 0x98e947129fc2b4e9L, 0x3f2398d747b36224L, + 0x8eec7f0d19a03aadL, 0x1953cf68300424acL, + 0x5fa8c3423c052dd7L, 0x3792f412cb06794dL, + 0xe2bbd88bbee40bd0L, 0x5b6aceaeae9d0ec4L, + 0xf245825a5a445275L, 0xeed6e2f0f0d56712L, + 0x55464dd69685606bL, 0xaa97e14c3c26b886L, + 0xd53dd99f4b3066a8L, 0xe546a8038efe4029L, + 0xde98520472bdd033L, 0x963e66858f6d4440L, + 0xdde7001379a44aa8L, 0x5560c018580d5d52L, + 0xaab8f01e6e10b4a6L, 0xcab3961304ca70e8L, + 0x3d607b97c5fd0d22L, 0x8cb89a7db77c506aL, + 0x77f3608e92adb242L, 0x55f038b237591ed3L, + 0x6b6c46dec52f6688L, 0x2323ac4b3b3da015L, + 0xabec975e0a0d081aL, 0x96e7bd358c904a21L, + 0x7e50d64177da2e54L, 0xdde50bd1d5d0b9e9L, + 0x955e4ec64b44e864L, 0xbd5af13bef0b113eL, + 0xecb1ad8aeacdd58eL, 0x67de18eda5814af2L, + 0x80eacf948770ced7L, 0xa1258379a94d028dL, + 0x96ee45813a04330L, 0x8bca9d6e188853fcL, + 0x775ea264cf55347dL, 0x95364afe032a819dL, + 0x3a83ddbd83f52204L, 0xc4926a9672793542L, + 0x75b7053c0f178293L, 0x5324c68b12dd6338L, + 0xd3f6fc16ebca5e03L, 0x88f4bb1ca6bcf584L, + 0x2b31e9e3d06c32e5L, 0x3aff322e62439fcfL, + 0x9befeb9fad487c2L, 0x4c2ebe687989a9b3L, + 0xf9d37014bf60a10L, 0x538484c19ef38c94L, + 0x2865a5f206b06fb9L, 0xf93f87b7442e45d3L, + 0xf78f69a51539d748L, 0xb573440e5a884d1bL, + 0x31680a88f8953030L, 0xfdc20d2b36ba7c3dL, + 0x3d32907604691b4cL, 0xa63f9a49c2c1b10fL, + 0xfcf80dc33721d53L, 0xd3c36113404ea4a8L, + 0x645a1cac083126e9L, 0x3d70a3d70a3d70a3L, + 0xccccccccccccccccL, 0x0L, + 0x0L, 0x0L, + 0x0L, 0x0L, + 0x0L, 0x0L, + 0x0L, 0x0L, + 0x0L, 0x0L, + 0x0L, 0x0L, + 0x0L, 0x0L, + 0x0L, 0x0L, + 0x0L, 0x0L, + 0x0L, 0x0L, + 0x0L, 0x0L, + 0x0L, 0x0L, + 0x0L, 0x0L, + 0x0L, 0x4000000000000000L, + 0x5000000000000000L, 0xa400000000000000L, + 0x4d00000000000000L, 0xf020000000000000L, + 0x6c28000000000000L, 0xc732000000000000L, + 0x3c7f400000000000L, 0x4b9f100000000000L, + 0x1e86d40000000000L, 0x1314448000000000L, + 0x17d955a000000000L, 0x5dcfab0800000000L, + 0x5aa1cae500000000L, 0xf14a3d9e40000000L, + 0x6d9ccd05d0000000L, 0xe4820023a2000000L, + 0xdda2802c8a800000L, 0xd50b2037ad200000L, + 0x4526f422cc340000L, 0x9670b12b7f410000L, + 0x3c0cdd765f114000L, 0xa5880a69fb6ac800L, + 0x8eea0d047a457a00L, 0x72a4904598d6d880L, + 0x47a6da2b7f864750L, 0x999090b65f67d924L, + 0xfff4b4e3f741cf6dL, 0xbff8f10e7a8921a4L, + 0xaff72d52192b6a0dL, 0x9bf4f8a69f764490L, + 0x2f236d04753d5b4L, 0x1d762422c946590L, + 0x424d3ad2b7b97ef5L, 0xd2e0898765a7deb2L, + 0x63cc55f49f88eb2fL, 0x3cbf6b71c76b25fbL, + 0x8bef464e3945ef7aL, 0x97758bf0e3cbb5acL, + 0x3d52eeed1cbea317L, 0x4ca7aaa863ee4bddL, + 0x8fe8caa93e74ef6aL, 0xb3e2fd538e122b44L, + 0x60dbbca87196b616L, 0xbc8955e946fe31cdL, + 0x6babab6398bdbe41L, 0xc696963c7eed2dd1L, + 0xfc1e1de5cf543ca2L, 0x3b25a55f43294bcbL, + 0x49ef0eb713f39ebeL, 0x6e3569326c784337L, + 0x49c2c37f07965404L, 0xdc33745ec97be906L, + 0x69a028bb3ded71a3L, 0xc40832ea0d68ce0cL, + 0xf50a3fa490c30190L, 0x792667c6da79e0faL, + 0x577001b891185938L, 0xed4c0226b55e6f86L, + 0x544f8158315b05b4L, 0x696361ae3db1c721L, + 0x3bc3a19cd1e38e9L, 0x4ab48a04065c723L, + 0x62eb0d64283f9c76L, 0x3ba5d0bd324f8394L, + 0xca8f44ec7ee36479L, 0x7e998b13cf4e1ecbL, + 0x9e3fedd8c321a67eL, 0xc5cfe94ef3ea101eL, + 0xbba1f1d158724a12L, 0x2a8a6e45ae8edc97L, + 0xf52d09d71a3293bdL, 0x593c2626705f9c56L, + 0x6f8b2fb00c77836cL, 0xb6dfb9c0f956447L, + 0x4724bd4189bd5eacL, 0x58edec91ec2cb657L, + 0x2f2967b66737e3edL, 0xbd79e0d20082ee74L, + 0xecd8590680a3aa11L, 0xe80e6f4820cc9495L, + 0x3109058d147fdcddL, 0xbd4b46f0599fd415L, + 0x6c9e18ac7007c91aL, 0x3e2cf6bc604ddb0L, + 0x84db8346b786151cL, 0xe612641865679a63L, + 0x4fcb7e8f3f60c07eL, 0xe3be5e330f38f09dL, + 0x5cadf5bfd3072cc5L, 0x73d9732fc7c8f7f6L, + 0x2867e7fddcdd9afaL, 0xb281e1fd541501b8L, + 0x1f225a7ca91a4226L, 0x3375788de9b06958L, + 0x52d6b1641c83aeL, 0xc0678c5dbd23a49aL, + 0xf840b7ba963646e0L, 0xb650e5a93bc3d898L, + 0xa3e51f138ab4cebeL, 0xc66f336c36b10137L, + 0xb80b0047445d4184L, 0xa60dc059157491e5L, + 0x87c89837ad68db2fL, 0x29babe4598c311fbL, + 0xf4296dd6fef3d67aL, 0x1899e4a65f58660cL, + 0x5ec05dcff72e7f8fL, 0x76707543f4fa1f73L, + 0x6a06494a791c53a8L, 0x487db9d17636892L, + 0x45a9d2845d3c42b6L, 0xb8a2392ba45a9b2L, + 0x8e6cac7768d7141eL, 0x3207d795430cd926L, + 0x7f44e6bd49e807b8L, 0x5f16206c9c6209a6L, + 0x36dba887c37a8c0fL, 0xc2494954da2c9789L, + 0xf2db9baa10b7bd6cL, 0x6f92829494e5acc7L, + 0xcb772339ba1f17f9L, 0xff2a760414536efbL, + 0xfef5138519684abaL, 0x7eb258665fc25d69L, + 0xef2f773ffbd97a61L, 0xaafb550ffacfd8faL, + 0x95ba2a53f983cf38L, 0xdd945a747bf26183L, + 0x94f971119aeef9e4L, 0x7a37cd5601aab85dL, + 0xac62e055c10ab33aL, 0x577b986b314d6009L, + 0xed5a7e85fda0b80bL, 0x14588f13be847307L, + 0x596eb2d8ae258fc8L, 0x6fca5f8ed9aef3bbL, + 0x25de7bb9480d5854L, 0xaf561aa79a10ae6aL, + 0x1b2ba1518094da04L, 0x90fb44d2f05d0842L, + 0x353a1607ac744a53L, 0x42889b8997915ce8L, + 0x69956135febada11L, 0x43fab9837e699095L, + 0x94f967e45e03f4bbL, 0x1d1be0eebac278f5L, + 0x6462d92a69731732L, 0x7d7b8f7503cfdcfeL, + 0x5cda735244c3d43eL, 0x3a0888136afa64a7L, + 0x88aaa1845b8fdd0L, 0x8aad549e57273d45L, + 0x36ac54e2f678864bL, 0x84576a1bb416a7ddL, + 0x656d44a2a11c51d5L, 0x9f644ae5a4b1b325L, + 0x873d5d9f0dde1feeL, 0xa90cb506d155a7eaL, + 0x9a7f12442d588f2L, 0xc11ed6d538aeb2fL, + 0x8f1668c8a86da5faL, 0xf96e017d694487bcL, + 0x37c981dcc395a9acL, 0x85bbe253f47b1417L, + 0x93956d7478ccec8eL, 0x387ac8d1970027b2L, + 0x6997b05fcc0319eL, 0x441fece3bdf81f03L, + 0xd527e81cad7626c3L, 0x8a71e223d8d3b074L, + 0xf6872d5667844e49L, 0xb428f8ac016561dbL, + 0xe13336d701beba52L, 0xecc0024661173473L, + 0x27f002d7f95d0190L, 0x31ec038df7b441f4L, + 0x7e67047175a15271L, 0xf0062c6e984d386L, + 0x52c07b78a3e60868L, 0xa7709a56ccdf8a82L, + 0x88a66076400bb691L, 0x6acff893d00ea435L, + 0x583f6b8c4124d43L, 0xc3727a337a8b704aL, + 0x744f18c0592e4c5cL, 0x1162def06f79df73L, + 0x8addcb5645ac2ba8L, 0x6d953e2bd7173692L, + 0xc8fa8db6ccdd0437L, 0x1d9c9892400a22a2L, + 0x2503beb6d00cab4bL, 0x2e44ae64840fd61dL, + 0x5ceaecfed289e5d2L, 0x7425a83e872c5f47L, + 0xd12f124e28f77719L, 0x82bd6b70d99aaa6fL, + 0x636cc64d1001550bL, 0x3c47f7e05401aa4eL, + 0x65acfaec34810a71L, 0x7f1839a741a14d0dL, + 0x1ede48111209a050L, 0x934aed0aab460432L, + 0xf81da84d5617853fL, 0x36251260ab9d668eL, + 0xc1d72b7c6b426019L, 0xb24cf65b8612f81fL, + 0xdee033f26797b627L, 0x169840ef017da3b1L, + 0x8e1f289560ee864eL, 0xf1a6f2bab92a27e2L, + 0xae10af696774b1dbL, 0xacca6da1e0a8ef29L, + 0x17fd090a58d32af3L, 0xddfc4b4cef07f5b0L, + 0x4abdaf101564f98eL, 0x9d6d1ad41abe37f1L, + 0x84c86189216dc5edL, 0x32fd3cf5b4e49bb4L, + 0x3fbc8c33221dc2a1L, 0xfabaf3feaa5334aL, + 0x29cb4d87f2a7400eL, 0x743e20e9ef511012L, + 0x914da9246b255416L, 0x1ad089b6c2f7548eL, + 0xa184ac2473b529b1L, 0xc9e5d72d90a2741eL, + 0x7e2fa67c7a658892L, 0xddbb901b98feeab7L, + 0x552a74227f3ea565L, 0xd53a88958f87275fL, + 0x8a892abaf368f137L, 0x2d2b7569b0432d85L, + 0x9c3b29620e29fc73L, 0x8349f3ba91b47b8fL, + 0x241c70a936219a73L, 0xed238cd383aa0110L, + 0xf4363804324a40aaL, 0xb143c6053edcd0d5L, + 0xdd94b7868e94050aL, 0xca7cf2b4191c8326L, + 0xfd1c2f611f63a3f0L, 0xbc633b39673c8cecL, + 0xd5be0503e085d813L, 0x4b2d8644d8a74e18L, + 0xddf8e7d60ed1219eL, 0xcabb90e5c942b503L, + 0x3d6a751f3b936243L, 0xcc512670a783ad4L, + 0x27fb2b80668b24c5L, 0xb1f9f660802dedf6L, + 0x5e7873f8a0396973L, 0xdb0b487b6423e1e8L, + 0x91ce1a9a3d2cda62L, 0x7641a140cc7810fbL, + 0xa9e904c87fcb0a9dL, 0x546345fa9fbdcd44L, + 0xa97c177947ad4095L, 0x49ed8eabcccc485dL, + 0x5c68f256bfff5a74L, 0x73832eec6fff3111L, + 0xc831fd53c5ff7eabL, 0xba3e7ca8b77f5e55L, + 0x28ce1bd2e55f35ebL, 0x7980d163cf5b81b3L, + 0xd7e105bcc332621fL, 0x8dd9472bf3fefaa7L, + 0xb14f98f6f0feb951L, 0x6ed1bf9a569f33d3L, + 0xa862f80ec4700c8L, 0xcd27bb612758c0faL, + 0x8038d51cb897789cL, 0xe0470a63e6bd56c3L, + 0x1858ccfce06cac74L, 0xf37801e0c43ebc8L, + 0xd30560258f54e6baL, 0x47c6b82ef32a2069L, + 0x4cdc331d57fa5441L, 0xe0133fe4adf8e952L, + 0x58180fddd97723a6L, 0x570f09eaa7ea7648L}; + + /** + * Prevents instantiation. + */ + private FastDoubleMath() { + + } + + static double decFloatLiteralToDouble(int index, boolean isNegative, long digits, int exponent, int virtualIndexOfPoint, long exp_number, boolean isDigitsTruncated, int skipCountInTruncatedDigits) { + if (digits == 0) { + return isNegative ? -0.0 : 0.0; + } + final double outDouble; + if (isDigitsTruncated) { + final long exponentOfTruncatedDigits = virtualIndexOfPoint - index + skipCountInTruncatedDigits + exp_number; + + // We have too many digits. We may have to round up. + // To know whether rounding up is needed, we may have to examine up to 768 digits. + + // There are cases, in which rounding has no effect. + if (FASTFLOAT_DEC_SMALLEST_POWER <= exponentOfTruncatedDigits + && exponentOfTruncatedDigits <= FASTFLOAT_DEC_LARGEST_POWER) { + double withoutRounding = tryDecToDoubleWithFastAlgorithm(isNegative, digits, (int) exponentOfTruncatedDigits); + double roundedUp = tryDecToDoubleWithFastAlgorithm(isNegative, digits + 1, (int) exponentOfTruncatedDigits); + if (!Double.isNaN(withoutRounding) && Objects.equals(roundedUp, withoutRounding)) { + return withoutRounding; + } + } + + // We have to take a slow path. + //return Double.parseDouble(str.toString()); + outDouble = Double.NaN; + + } else if (FASTFLOAT_DEC_SMALLEST_POWER <= exponent && exponent <= FASTFLOAT_DEC_LARGEST_POWER) { + outDouble = tryDecToDoubleWithFastAlgorithm(isNegative, digits, exponent); + } else { + outDouble = Double.NaN; + } + return outDouble; + } + + /** + * Computes {@code uint128 product = (uint64)x * (uint64)y}. + *

+ * References: + *

+ *
Getting the high part of 64 bit integer multiplication
+ *
+ * stackoverflow
+ *
+ * + * @param x uint64 factor x + * @param y uint64 factor y + * @return uint128 product of x and y + */ + private static Value128 fullMultiplication(long x, long y) { + long x0 = x & 0xffffffffL, x1 = x >>> 32; + long y0 = y & 0xffffffffL, y1 = y >>> 32; + long p11 = x1 * y1, p01 = x0 * y1; + long p10 = x1 * y0, p00 = x0 * y0; + + // 64-bit product + two 32-bit values + long middle = p10 + (p00 >>> 32) + (p01 & 0xffffffffL); + return new Value128( + // 64-bit product + two 32-bit values + p11 + (middle >>> 32) + (p01 >>> 32), + // Add LOW PART and lower half of MIDDLE PART + (middle << 32) | (p00 & 0xffffffffL)); + } + + static double hexFloatLiteralToDouble(int index, boolean isNegative, long digits, long exponent, int virtualIndexOfPoint, long exp_number, boolean isDigitsTruncated, int skipCountInTruncatedDigits) { + if (digits == 0) { + return isNegative ? -0.0 : 0.0; + } + final double outDouble; + if (isDigitsTruncated) { + final long truncatedExponent = (virtualIndexOfPoint - index + skipCountInTruncatedDigits) * 4L + + exp_number; + + // We have too many digits. We may have to round up. + // To know whether rounding up is needed, we may have to examine up to 768 digits. + + // There are cases, in which rounding has no effect. + if (FASTFLOAT_HEX_SMALLEST_POWER <= truncatedExponent && truncatedExponent <= FASTFLOAT_HEX_LARGEST_POWER) { + double withoutRounding = tryHexToDoubleWithFastAlgorithm(isNegative, digits, (int) truncatedExponent); + double roundedUp = tryHexToDoubleWithFastAlgorithm(isNegative, digits + 1, (int) truncatedExponent); + if (!Double.isNaN(withoutRounding) && Objects.equals(roundedUp, withoutRounding)) { + return withoutRounding; + } + } + + // We have to take a slow path. + outDouble = Double.NaN; + + } else if (FASTFLOAT_HEX_SMALLEST_POWER <= exponent && exponent <= FASTFLOAT_HEX_LARGEST_POWER) { + outDouble = tryHexToDoubleWithFastAlgorithm(isNegative, digits, (int) exponent); + } else { + outDouble = Double.NaN; + } + return outDouble; + } + + /** + * Attempts to compute {@literal digits * 10^(power)} exactly; + * and if "negative" is true, negate the result. + *

+ * This function will only work in some cases, when it does not work it + * returns null. This should work *most of the time* (like 99% of the time). + * We assume that power is in the [FASTFLOAT_SMALLEST_POWER, + * FASTFLOAT_LARGEST_POWER] interval: the caller is responsible for this check. + * + * @param isNegative whether the number is negative + * @param digits uint64 the digits of the number + * @param power int32 the exponent of the number + * @return the computed double on success, {@link Double#NaN} on failure + */ + static double tryDecToDoubleWithFastAlgorithm(boolean isNegative, long digits, int power) { + if (digits == 0 || power < -380 - 19) { + return isNegative ? -0.0 : 0.0; + } + if (power > 380) { + return isNegative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY; + } + + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 + if (-22 <= power && power <= 22 && Long.compareUnsigned(digits, 0x1fffffffffffffL) <= 0) { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + double d = (double) digits; + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / powerOfTen[-power]; + } else { + d = d * powerOfTen[power]; + } + return (isNegative) ? -d : d; + } + + + // The fast path has now failed, so we are falling back on the slower path. + + // We are going to need to do some 64-bit arithmetic to get a more precise product. + // We use a table lookup approach. + // It is safe because + // power >= FASTFLOAT_SMALLEST_POWER + // and power <= FASTFLOAT_LARGEST_POWER + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + long factor_mantissa = MANTISSA_64[power - FASTFLOAT_DEC_SMALLEST_POWER]; + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximately equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + long exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + // We want the most significant bit of digits to be 1. Shift if needed. + int lz = Long.numberOfLeadingZeros(digits); + digits <<= lz; + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + Value128 product = fullMultiplication(digits, factor_mantissa); + long lower = product.low; + long upper = product.high; + // We know that upper has at most one leading zero because + // both i and factor_mantissa have a leading one. This means + // that the result is at least as large as ((1<<63)*(1<<63))/(1<<64). + + // As long as the first 9 bits of "upper" are not "1", then we + // know that we have an exact computed value for the leading + // 55 bits because any imprecision would play out as a +1, in + // the worst case. + // Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + // We expect this next branch to be rarely taken (say 1% of the time). + // When (upper & 0x1FF) == 0x1FF, it can be common for + // lower + i < lower to be true (proba. much higher than 1%). + if ((upper & 0x1FF) == 0x1FF && Long.compareUnsigned(lower + digits, lower) < 0) { + long factor_mantissa_low = + MANTISSA_128[power - FASTFLOAT_DEC_SMALLEST_POWER]; + // next, we compute the 64-bit x 128-bit multiplication, getting a 192-bit + // result (three 64-bit values) + product = fullMultiplication(digits, factor_mantissa_low); + long product_low = product.low; + long product_middle2 = product.high; + long product_middle1 = lower; + long product_high = upper; + long product_middle = product_middle1 + product_middle2; + if (Long.compareUnsigned(product_middle, product_middle1) < 0) { + product_high++; // overflow carry + } + + + // we want to check whether mantissa *i + i would affect our result + // This does happen, e.g. with 7.3177701707893310e+15 + if (((product_middle + 1 == 0) && ((product_high & 0x1ff) == 0x1ff) && + (product_low + Long.compareUnsigned(digits, product_low) < 0))) { // let us be prudent and bail out. + return Double.NaN; + } + upper = product_high; + //lower = product_middle; + } + + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + long upperbit = upper >>> 63; + long mantissa = upper >>> (upperbit + 9); + lz += (int) (1 ^ upperbit); + // Here we have mantissa < (1<<54). + + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + if (((upper & 0x1ff) == 0x1ff) + || ((upper & 0x1ff) == 0) && (mantissa & 3) == 1) { + // if mantissa & 1 == 1 we might need to round up. + // + // Scenarios: + // 1. We are not in the middle. Then we should round up. + // + // 2. We are right in the middle. Whether we round up depends + // on the last significant bit: if it is "one" then we round + // up (round to even) otherwise, we do not. + // + // So if the last significant bit is 1, we can safely round up. + // Hence we only need to bail out if (mantissa & 3) == 1. + // Otherwise we may need more accuracy or analysis to determine whether + // we are exactly between two floating-point numbers. + // It can be triggered with 1e23. + // Note: because the factor_mantissa and factor_mantissa_low are + // almost always rounded down (except for small positive powers), + // almost always should round up. + return Double.NaN; + } + + mantissa += 1; + mantissa >>>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1L << 53)) { + // This will happen when parsing values such as 7.2057594037927933e+16 + mantissa = (1L << 52); + lz--; // undo previous addition + } + + mantissa &= ~(1L << 52); + long real_exponent = exponent - lz; + // we have to check that real_exponent is in range, otherwise we bail out + if ((real_exponent < 1) || (real_exponent > 2046)) { + return Double.NaN; + } + + long bits = mantissa | real_exponent << 52 + | (isNegative ? 1L << 63 : 0L); + return Double.longBitsToDouble(bits); + } + + /** + * Attempts to compute {@literal digits * 2^(power)} exactly; + * and if "negative" is true, negate the result. + *

+ * This function will only work in some cases, when it does not work it + * returns null. + * + * @param isNegative whether the number is negative + * @param digits uint64 the digits of the number + * @param power int32 the exponent of the number + * @return the computed double on success, null on failure + */ + static double tryHexToDoubleWithFastAlgorithm(boolean isNegative, long digits, int power) { + if (digits == 0 || power < Double.MIN_EXPONENT - 54) { + return isNegative ? -0.0 : 0.0; + } + if (power > Double.MAX_EXPONENT) { + return isNegative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY; + } + + // we start with a fast path + // We try to mimic the fast described by Clinger WD for decimal + // float number literals. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 + if (Long.compareUnsigned(digits, 0x1fffffffffffffL) <= 0) { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + double d = (double) digits; + // + // The general idea is as follows. + // If 0 <= s < 2^53 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p will produce correctly rounded values. + // + d = d * Math.scalb(1d, power); + if (isNegative) { + d = -d; + } + return d; + } + + // The fast path has failed + return Double.NaN; + } + + private static class Value128{ + + final long high, low; + + private Value128(long high, long low) { + this.high = high; + this.low = low; + } + } +} diff --git a/extensions/csv/src/main/java/io/deephaven/csv/util/Renderer.java b/extensions/csv/src/main/java/io/deephaven/csv/util/Renderer.java new file mode 100644 index 00000000000..1be7a147d20 --- /dev/null +++ b/extensions/csv/src/main/java/io/deephaven/csv/util/Renderer.java @@ -0,0 +1,24 @@ +package io.deephaven.csv.util; + +import java.util.function.Function; + +public class Renderer { + public static String renderList(Iterable items) { + return renderList(items, ", ", Object::toString); + } + + public static String renderList(Iterable items, String separator) { + return renderList(items, separator, Object::toString); + } + + public static String renderList(Iterable items, final String separator, Function renderer) { + String separatorToUse = ""; + final StringBuilder result = new StringBuilder(); + for (T item : items) { + result.append(separatorToUse); + result.append(renderer.apply(item)); + separatorToUse = separator; + } + return result.toString(); + } +} diff --git a/extensions/csv/src/test/java/io/deephaven/csv/CsvTest.java b/extensions/csv/src/test/java/io/deephaven/csv/CsvTest.java index e8738950279..3f79239d65a 100644 --- a/extensions/csv/src/test/java/io/deephaven/csv/CsvTest.java +++ b/extensions/csv/src/test/java/io/deephaven/csv/CsvTest.java @@ -1,529 +1,638 @@ package io.deephaven.csv; +import io.deephaven.csv.parsers.Parsers; +import io.deephaven.engine.table.Table; +import io.deephaven.engine.table.impl.util.ColumnHolder; +import io.deephaven.engine.util.TableTools; import io.deephaven.qst.column.header.ColumnHeader; -import io.deephaven.qst.column.header.ColumnHeaders3; -import io.deephaven.qst.table.NewTable; import io.deephaven.qst.table.TableHeader; +import io.deephaven.time.DateTime; import org.assertj.core.api.Assertions; import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; - -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; -import java.nio.charset.StandardCharsets; -import java.time.Instant; + +import java.io.*; import java.time.LocalDateTime; import java.time.ZoneOffset; -import java.util.Arrays; -import java.util.List; -import java.util.Objects; -@RunWith(Parameterized.class) public class CsvTest { + private static final DateTime DATETIME_A = + DateTime.of(LocalDateTime.of(2021, 9, 27, 19, 0, 0).toInstant(ZoneOffset.UTC)); + private static final DateTime DATETIME_B = + DateTime.of(LocalDateTime.of(2021, 9, 27, 20, 0, 0).toInstant(ZoneOffset.UTC)); + + @Test + public void timestamp() { + final String input = "" + + "Timestamp\n" + + "2021-09-27T19:00:00Z\n" + + "\n" + + "2021-09-27T20:00:00Z\n"; + + final Table expected = TableTools.newTable( + TableTools.col("Timestamp", DATETIME_A, null, DATETIME_B) + ); + + invokeTest(input, CsvSpecs.csv(), expected); + } + + @Test + public void timestampSeconds() { + final String input = "" + + "Timestamp\n" + + "1632769200\n" + + "\n" + + "1632772800\n"; + + final Table expected = TableTools.newTable( + TableTools.col("Timestamp", DATETIME_A, null, DATETIME_B) + ); + + invokeTest(input, + CsvSpecs.builder().inference(InferenceSpecs.standardTimes()).build(), + expected); + } - private static final Instant TIMESTAMP_A = LocalDateTime.of(2021, 9, 27, 19, 0, 0).toInstant(ZoneOffset.UTC); - private static final Instant TIMESTAMP_B = LocalDateTime.of(2021, 9, 27, 20, 0, 0).toInstant(ZoneOffset.UTC); - - @Parameters(name = "{0}") - public static Iterable parameters() { - return () -> tests().stream().map(CsvTest::parameterize).iterator(); - } - - public static List tests() { - return Arrays.asList( - timestamp(), - timestampSeconds(), - timestampMillis(), - timestampMicros(), - timestampNanos(), - timestampMixed(), - timestampLegacy(), - bools(), - chars(), - byteIsShort(), - byteViaHeader(), - byteViaInference(), - shortRange(), - intRange(), - longRange(), - longAsStringsViaInference(), - longAsStringsViaParser(), - longAsStringsViaHeader(), - longOverrideStringInference(), - doubleRange(), - floatIsDouble(), - floatViaHeader(), - floatViaInference(), - floatFromDouble(), - strings(), - stringsPound(), - languageExample(), - languageExampleTsv(), - languageExampleHeaderless(), - languageExampleHeaderlessExplicit(), - whitespaceNoQuotes(), - whitespaceNoQuotesLiteral(), - whitespaceOutside(), - whitespaceInsideDefault(), - whitespaceInsideTrim(), - whitespaceInsideAndOutsideDefault(), - whitespaceInsideAndOutsideTrim()); - } - - public static CsvTest timestamp() { - final NewTable expected = ColumnHeader.ofInstant("Timestamp") - .row(TIMESTAMP_A) - .row(null) - .row(TIMESTAMP_B) - .newTable(); - return new CsvTest("timestamp", "timestamp.csv", CsvSpecs.csv(), expected); - } - - public static CsvTest timestampSeconds() { - final NewTable expected = ColumnHeader.ofInstant("Timestamp") - .row(TIMESTAMP_A) - .row(null) - .row(TIMESTAMP_B) - .newTable(); - return new CsvTest("timestampSeconds", "timestamp-seconds.csv", CsvSpecs.csv(), expected); - } - - public static CsvTest timestampMillis() { - final NewTable expected = ColumnHeader.ofInstant("Timestamp") - .row(TIMESTAMP_A) - .row(null) - .row(TIMESTAMP_B) - .newTable(); - return new CsvTest("timestampMillis", "timestamp-millis.csv", CsvSpecs.csv(), expected); - } - - public static CsvTest timestampMicros() { - final NewTable expected = ColumnHeader.ofInstant("Timestamp") - .row(TIMESTAMP_A) - .row(null) - .row(TIMESTAMP_B) - .newTable(); - return new CsvTest("timestampMicros", "timestamp-micros.csv", CsvSpecs.csv(), expected); - } - - public static CsvTest timestampNanos() { - final NewTable expected = ColumnHeader.ofInstant("Timestamp") - .row(TIMESTAMP_A) - .row(null) - .row(TIMESTAMP_B) - .newTable(); - return new CsvTest("timestampNanos", "timestamp-nanos.csv", CsvSpecs.csv(), expected); - } - - public static CsvTest timestampMixed() { - // Can't infer milli and micros in a single column - will parse as a long, not an Instant. - final NewTable expected = ColumnHeader.ofLong("Timestamp") - .row(TIMESTAMP_A.toEpochMilli()) - .row(null) - .row(TIMESTAMP_B.toEpochMilli() * 1000L) - .newTable(); - return new CsvTest("timestampMixed", "timestamp-mixed.csv", CsvSpecs.csv(), expected); - } - - public static CsvTest timestampLegacy() { - final NewTable expected = ColumnHeader.ofInstant("Timestamp") - .row(TIMESTAMP_A) - .row(null) - .row(TIMESTAMP_B) - .newTable(); - return new CsvTest("timestampLegacy", "timestamp-legacy.csv", CsvSpecs.csv(), expected); - } - - public static CsvTest bools() { - final NewTable expected = ColumnHeader.ofBoolean("Bool") - .row(true) - .row(null) - .row(false) - .row(true) - .row(false) - .row(true) - .row(false) - .newTable(); - return new CsvTest("bools", "bools.csv", CsvSpecs.csv(), expected); - } - - public static CsvTest chars() { - final NewTable expected = ColumnHeader.ofChar("Char") - .row('A') - .row(null) - .row('B') - .row('C') - .row('1') - .row('2') - .row('3') - .newTable(); - return new CsvTest("chars", "chars.csv", CsvSpecs.csv(), expected); - } - - public static CsvTest byteViaHeader() { - final NewTable expected = ColumnHeader.ofByte("Byte") - .row((byte) (Byte.MIN_VALUE + 1)) - .row(null) - .row(Byte.MAX_VALUE) - .newTable(); - return new CsvTest("byteViaHeader", "byte.csv", CsvSpecs.builder().header(expected.header()).build(), expected); - } - - public static CsvTest byteViaInference() { - final NewTable expected = ColumnHeader.ofByte("Byte") - .row((byte) (Byte.MIN_VALUE + 1)) - .row(null) - .row(Byte.MAX_VALUE) - .newTable(); - return new CsvTest("byteViaInference", "byte.csv", - CsvSpecs.builder().inference(InferenceSpecs.builder().addParsers(Parser.BYTE).build()).build(), + @Test + public void timestampMillis() { + final String input = "" + + "Timestamp\n" + + "1632769200000\n" + + "\n" + + "1632772800000\n"; + + final Table expected = TableTools.newTable( + TableTools.col("Timestamp", DATETIME_A, null, DATETIME_B) + ); + + invokeTest(input, + CsvSpecs.builder().inference(InferenceSpecs.milliTimes()).build(), expected); } - public static CsvTest byteIsShort() { + @Test + public void timestampMicros() { + final String input = "" + + "Timestamp\n" + + "1632769200000000\n" + + "\n" + + "1632772800000000\n"; + + final Table expected = TableTools.newTable( + TableTools.col("Timestamp", DATETIME_A, null, DATETIME_B) + ); + + invokeTest(input, + CsvSpecs.builder().inference(InferenceSpecs.microTimes()).build(), + expected); + } + + @Test + public void timestampNanos() { + final String input = "" + + "Timestamp\n" + + "1632769200000000000\n" + + "\n" + + "1632772800000000000\n"; + + final Table expected = TableTools.newTable( + TableTools.col("Timestamp", DATETIME_A, null, DATETIME_B) + ); + + invokeTest(input, + CsvSpecs.builder().inference(InferenceSpecs.nanoTimes()).build(), + expected); + } + + @Test + public void timestampLegacy() { + final String input = "" + + "Timestamp\n" + + "2021-09-27T19:00:00 UTC\n" + + "\n" + + "2021-09-27T20:00:00 UTC\n"; + + final Table expected = TableTools.newTable( + TableTools.col("Timestamp", DATETIME_A, null, DATETIME_B) + ); + + invokeTest(input, CsvSpecs.csv(), expected); + } + + @Test + public void bools() { + final String input = "" + + "Bool\n" + + "true\n" + + "\n" + + "false\n" + + "True\n" + + "False\n" + + "TrUe\n" + + "FALSE\n"; + + final Table expected = TableTools.newTable( + TableTools.col("Bool", true, null, false, true, false, true, false) + ); + + invokeTest(input, CsvSpecs.csv(), expected); + } + + @Test + public void chars() { + final String input = "" + + "Char\n" + + "A\n" + + "\n" + + "B\n" + + "C\n" + + "1\n" + + "2\n" + + "3\n"; + + final Table expected = TableTools.newTable( + TableTools.col("Char", 'A', null, 'B', 'C', '1', '2', '3') + ); + + invokeTest(input, CsvSpecs.csv(), expected); + } + + private static final String BYTE_INPUT = "" + + "Byte\n" + + "-127\n" + + "\n" + + "127\n"; + + @Test + public void byteViaHeader() { + final Table expected = TableTools.newTable( + TableTools.col("Byte", (byte) (Byte.MIN_VALUE + 1), null, Byte.MAX_VALUE) + ); + final TableHeader expectedHeader = ColumnHeader.ofByte("Byte").tableHeader(); + invokeTest(BYTE_INPUT, CsvSpecs.builder().header(expectedHeader).build(), expected); + } + + @Test + public void byteViaInference() { + final Table expected = TableTools.newTable( + TableTools.col("Byte", (byte) (Byte.MIN_VALUE + 1), null, Byte.MAX_VALUE) + ); + + invokeTest(BYTE_INPUT, + CsvSpecs.builder().inference(InferenceSpecs.builder() + .addParsers(Parsers.BYTE) + .build()).build(), + expected); + } + + @Test + public void byteIsShort() { // By default, byte will be parsed as short - final NewTable expected = ColumnHeader.ofShort("Byte") - .row((short) (Byte.MIN_VALUE + 1)) - .row(null) - .row((short) Byte.MAX_VALUE) - .newTable(); - return new CsvTest("byteIsShort", "byte.csv", CsvSpecs.csv(), expected); - } - - public static CsvTest shortRange() { - final NewTable expected = ColumnHeader.ofShort("Short") - .row((short) (Short.MIN_VALUE + 1)) - .row(null) - .row(Short.MAX_VALUE) - .newTable(); - return new CsvTest("shortRange", "short.csv", CsvSpecs.csv(), expected); - } - - public static CsvTest intRange() { - final NewTable expected = ColumnHeader.ofInt("Int") - .row(Integer.MIN_VALUE + 1) - .row(null) - .row(Integer.MAX_VALUE) - .newTable(); - return new CsvTest("intRange", "int.csv", CsvSpecs.csv(), expected); - } - - public static CsvTest longRange() { - final NewTable expected = ColumnHeader.ofLong("Long") - .row(Long.MIN_VALUE + 1) - .row(null) - .row(Long.MAX_VALUE) - .newTable(); - return new CsvTest("longRange", "long.csv", CsvSpecs.csv(), expected); - } - - public static CsvTest longAsStringsViaInference() { - final NewTable expected = ColumnHeader.ofString("Long") - .row("-9223372036854775807") - .row(null) - .row("9223372036854775807") - .newTable(); - return new CsvTest("longAsStringsViaInference", "long.csv", + final Table expected = TableTools.newTable( + TableTools.col("Byte", (short) (Byte.MIN_VALUE + 1), null, (short)Byte.MAX_VALUE) + ); + + invokeTest(BYTE_INPUT, CsvSpecs.csv(), expected); + } + + @Test + public void shortRange() { + final String input = "" + + "Short\n" + + "-32767\n" + + "\n" + + "32767\n"; + + final Table expected = TableTools.newTable( + TableTools.col("Short", (short) (Short.MIN_VALUE + 1), null, Short.MAX_VALUE) + ); + + invokeTest(input, CsvSpecs.csv(), expected); + } + + @Test + public void intRange() { + final String input = "" + + "Int\n" + + "-2147483647\n" + + "\n" + + "2147483647\n"; + + final Table expected = TableTools.newTable( + TableTools.col("Int", Integer.MIN_VALUE + 1, null, Integer.MAX_VALUE) + ); + + invokeTest(input, CsvSpecs.csv(), expected); + } + + private static final String LONG_INPUT = "" + + "Long\n" + + "-9223372036854775807\n" + + "\n" + + "9223372036854775807\n"; + + @Test + public void longRange() { + final Table expected = TableTools.newTable( + TableTools.col("Long", Long.MIN_VALUE + 1, null, Long.MAX_VALUE) + ); + invokeTest(LONG_INPUT, CsvSpecs.csv(), expected); + } + + @Test + public void longAsStringsViaInference() { + final Table expected = TableTools.newTable( + TableTools.col("Long", "-9223372036854775807", null, "9223372036854775807") + ); + invokeTest(LONG_INPUT, CsvSpecs.builder().inference(InferenceSpecs.strings()).build(), expected); } - public static CsvTest longAsStringsViaParser() { - final NewTable expected = ColumnHeader.ofString("Long") - .row("-9223372036854775807") - .row(null) - .row("9223372036854775807") - .newTable(); - return new CsvTest("longAsStringsViaParser", "long.csv", - CsvSpecs.builder().putParsers("Long", Parser.STRING).build(), expected); + @Test + public void longAsStringsViaParser() { + final Table expected = TableTools.newTable( + TableTools.col("Long", "-9223372036854775807", null, "9223372036854775807") + ); + invokeTest(LONG_INPUT, + CsvSpecs.builder() + .putParsers("Long", Parsers.STRING) + .build(), + expected); } - public static CsvTest longAsStringsViaHeader() { - final NewTable expected = ColumnHeader.ofString("Long") - .row("-9223372036854775807") - .row(null) - .row("9223372036854775807") - .newTable(); - return new CsvTest("longAsStringsViaHeader", "long.csv", CsvSpecs.builder().header(expected.header()).build(), + @Test + public void longAsStringsViaHeader() { + final Table expected = TableTools.newTable( + TableTools.col("Long", "-9223372036854775807", null, "9223372036854775807") + ); + final TableHeader expectedHeader = ColumnHeader.ofString("Long").tableHeader(); + invokeTest(LONG_INPUT, CsvSpecs.builder().header(expectedHeader).build(), expected); } - public static CsvTest longOverrideStringInference() { - final NewTable expected = ColumnHeader.ofLong("Long") - .row(Long.MIN_VALUE + 1) - .row(null) - .row(Long.MAX_VALUE) - .newTable(); + @Test + public void longOverrideStringInference() { + final Table expected = TableTools.newTable( + TableTools.col("Long", Long.MIN_VALUE + 1, null, Long.MAX_VALUE) + ); final InferenceSpecs stringOnlyInference = InferenceSpecs.strings(); - final TableHeader tableHeader = ColumnHeader.ofLong("Long").tableHeader(); - final CsvSpecs csvSpecs = CsvSpecs.builder().inference(stringOnlyInference).header(tableHeader).build(); - return new CsvTest("longOverrideStringInference", "long.csv", csvSpecs, expected); + final TableHeader expectedHeader = ColumnHeader.ofLong("Long").tableHeader(); + final CsvSpecs csvSpecs = CsvSpecs.builder().inference(stringOnlyInference).header(expectedHeader).build(); + invokeTest(LONG_INPUT, csvSpecs, expected); } - public static CsvTest floatIsDouble() { + private static final String FLOAT_INPUT = "" + + "Float\n" + + "Infinity\n" + + "\n" + + "-Infinity\n" + + "NaN\n" + + "3.4028234e+38\n" + + "1.17549435E-38\n" + + "1.4e-45\n"; + + @Test + public void floatIsDouble() { // By defaults, floats are parsed as double - final NewTable expected = ColumnHeader.ofDouble("Float") - .row((double) Float.POSITIVE_INFINITY) - .row(null) - .row((double) Float.NEGATIVE_INFINITY) - .row((double) Float.NaN).row(3.4028235e+38d) - .row(1.17549435E-38d) - .row(1.4e-45d) - .newTable(); - return new CsvTest("floatIsDouble", "floats.csv", CsvSpecs.csv(), expected); - } - - public static CsvTest floatViaHeader() { - final NewTable expected = ColumnHeader.ofFloat("Float") - .row(Float.POSITIVE_INFINITY) - .row(null) - .row(Float.NEGATIVE_INFINITY) - .row(Float.NaN) - .row(Float.MAX_VALUE) - .row(Float.MIN_NORMAL) - .row(Float.MIN_VALUE) - .newTable(); - return new CsvTest("floatViaHeader", "floats.csv", CsvSpecs.builder().header(expected.header()).build(), - expected); + final Table expected = TableTools.newTable( + TableTools.col("Float", + (double) Float.POSITIVE_INFINITY, + null, + (double) Float.NEGATIVE_INFINITY, + (double) Float.NaN, + 3.4028234e+38d, + 1.17549435E-38d, + 1.4e-45d) + ); + invokeTest(FLOAT_INPUT, CsvSpecs.csv(), expected); } - public static CsvTest floatViaInference() { - final NewTable expected = ColumnHeader.ofFloat("Float") - .row(Float.POSITIVE_INFINITY) - .row(null) - .row(Float.NEGATIVE_INFINITY) - .row(Float.NaN) - .row(Float.MAX_VALUE) - .row(Float.MIN_NORMAL) - .row(Float.MIN_VALUE) - .newTable(); - return new CsvTest("floatViaInference", "floats.csv", - CsvSpecs.builder().inference(InferenceSpecs.builder().addParsers(Parser.FLOAT).build()).build(), + @Test + public void floatViaHeader() { + final Table expected = TableTools.newTable( + TableTools.col("Float", + Float.POSITIVE_INFINITY, + null, + Float.NEGATIVE_INFINITY, + Float.NaN, Float.MAX_VALUE, + Float.MIN_NORMAL, + Float.MIN_VALUE) + ); + final TableHeader expectedHeader = ColumnHeader.ofFloat("Float").tableHeader(); + invokeTest(FLOAT_INPUT, CsvSpecs.builder().header(expectedHeader).build(), expected); } - public static CsvTest floatFromDouble() { - final NewTable expected = ColumnHeader.ofFloat("Double") - .row((float) Double.POSITIVE_INFINITY) - .row(null) - .row((float) Double.NEGATIVE_INFINITY) - .row((float) Double.NaN) - .row((float) Double.MAX_VALUE) - .row((float) Double.MIN_NORMAL) - .row((float) Double.MIN_VALUE) - .newTable(); - return new CsvTest("floatFromDouble", "doubles.csv", CsvSpecs.builder().header(expected.header()).build(), + @Test + public void floatViaInference() { + final Table expected = TableTools.newTable( + TableTools.col("Float", + Float.POSITIVE_INFINITY, + null, + Float.NEGATIVE_INFINITY, + Float.NaN, + Float.MAX_VALUE, + Float.MIN_NORMAL, + Float.MIN_VALUE) + ); + invokeTest(FLOAT_INPUT, + CsvSpecs.builder() + .inference(InferenceSpecs.builder() + .addParsers(Parsers.FLOAT).build()) + .build(), expected); } - public static CsvTest doubleRange() { - final NewTable expected = ColumnHeader.ofDouble("Double") - .row(Double.POSITIVE_INFINITY) - .row(null) - .row(Double.NEGATIVE_INFINITY) - .row(Double.NaN) - .row(Double.MAX_VALUE) - .row(Double.MIN_NORMAL) - .row(Double.MIN_VALUE) - .newTable(); - return new CsvTest("doubleRange", "doubles.csv", CsvSpecs.csv(), expected); - } + // public static CsvTest floatFromDouble() { + // final NewTable expected = ColumnHeader.ofFloat("Double") + // .row((float) Double.POSITIVE_INFINITY) + // .row(null) + // .row((float) Double.NEGATIVE_INFINITY) + // .row((float) Double.NaN) + // .row((float) Double.MAX_VALUE) + // .row((float) Double.MIN_NORMAL) + // .row((float) Double.MIN_VALUE) + // .newTable(); + // return new CsvTest("floatFromDouble", "doubles.csv", CsvSpecs.builder().header(expected.header()).build(), + // expected); + // } - public static CsvTest strings() { - final NewTable expected = ColumnHeader.ofString("String") - .row("Hello, world") - .row(null) - .row("Goodbye.") - .newTable(); - return new CsvTest("strings", "strings.csv", CsvSpecs.csv(), expected); + @Test + public void doubleRange() { + final String input = "" + + "Double\n" + + "Infinity\n" + + "\n" + + "-Infinity\n" + + "NaN\n" + + "1.7976931348623157e+308\n" + + "2.2250738585072014E-308\n" + + "4.9e-324\n"; + final Table expected = TableTools.newTable( + TableTools.col("Double", + Double.POSITIVE_INFINITY, + null, + Double.NEGATIVE_INFINITY, + + Double.NaN, + Double.MAX_VALUE, + Double.MIN_NORMAL, + Double.MIN_VALUE) + ); + invokeTest(input, CsvSpecs.csv(), expected); } - public static CsvTest stringsPound() { - final NewTable expected = ColumnHeader.ofString("String") - .row("Hello, world") - .row(null) - .row("Goodbye.") - .newTable(); - return new CsvTest("stringsPound", "strings-pound.csv", CsvSpecs.builder().quote('#').build(), expected); + @Test + public void strings() { + final String input = "" + + "String\n" + + "\"Hello, world\"\n" + + "\n" + + "Goodbye.\n"; + + final Table expected = TableTools.newTable( + TableTools.col("String", + "Hello, world", + null, + "Goodbye.") + ); + + invokeTest(input, CsvSpecs.csv(), expected); } - public static CsvTest languageExample() { - return new CsvTest("languageExample", "language-example.csv", CsvSpecs.csv(), languageCreatorTypeTable()); + @Test + public void stringsPound() { + final String input = "" + + "String\n" + + "#Hello, world#\n" + + "\n" + + "Goodbye.\n"; + final Table expected = TableTools.newTable( + TableTools.col("String", + "Hello, world", + null, + "Goodbye.") + ); + invokeTest(input, CsvSpecs.builder().quote('#').build(), expected); + } + + private static final String LANGUAGE_EXAMPLE_HEADERLESS_INPUT = "" + + "C,Dennis Ritchie,Compiled\n" + + "C++,Bjarne Stroustrup,Compiled\n" + + "Fortran,John Backus,Compiled\n" + + "Java,James Gosling,Both\n" + + "JavaScript,Brendan Eich,Interpreted\n" + + "MATLAB,Cleve Moler,Interpreted\n" + + "Pascal,Niklaus Wirth,Compiled\n" + + "Python,Guido van Rossum,Interpreted\n"; + + private static final String LANGUAGE_EXAMPLE_INPUT = "" + + "Language,Creator,Type\n" + + LANGUAGE_EXAMPLE_HEADERLESS_INPUT; + + private static final String LANGUAGE_EXAMPLE_TSV = "" + + "Language\tCreator\tType\n" + + "C\tDennis Ritchie\tCompiled\n" + + "C++\tBjarne Stroustrup\tCompiled\n" + + "Fortran\tJohn Backus\tCompiled\n" + + "Java\tJames Gosling\tBoth\n" + + "JavaScript\tBrendan Eich\tInterpreted\n" + + "MATLAB\tCleve Moler\tInterpreted\n" + + "Pascal\tNiklaus Wirth\tCompiled\n" + + "Python\tGuido van Rossum\tInterpreted\n"; + + @Test + public void languageExample() { + invokeTest(LANGUAGE_EXAMPLE_INPUT, CsvSpecs.csv(), languageCreatorTypeTable()); } - public static CsvTest languageExampleTsv() { - return new CsvTest("languageExampleTsv", "language-example.tsv", CsvSpecs.tsv(), languageCreatorTypeTable()); + @Test + public void languageExampleTsv() { + invokeTest(LANGUAGE_EXAMPLE_TSV, CsvSpecs.tsv(), languageCreatorTypeTable()); } - public static CsvTest languageExampleHeaderless() { - return new CsvTest("languageExampleHeaderless", "language-example-headerless.csv", CsvSpecs.headerless(), + @Test + public void languageExampleHeaderless() { + invokeTest(LANGUAGE_EXAMPLE_HEADERLESS_INPUT, CsvSpecs.headerless(), languageCreatorTypeTableHeaderless()); } - public static CsvTest languageExampleHeaderlessExplicit() { - final NewTable expected = languageCreatorTypeTable(); - final CsvSpecs specs = CsvSpecs.headerless(expected.header()); - return new CsvTest("languageExampleHeaderlessExplicit", "language-example-headerless.csv", specs, expected); - } + @Test + public void languageExampleHeaderlessExplicit() { + final Table expected = languageCreatorTypeTable(); + final TableHeader expectedHeader = TableHeader.builder() + .addHeaders( + ColumnHeader.ofString("Language"), + ColumnHeader.ofString("Creator"), + ColumnHeader.ofString("Type") + ).build(); + final CsvSpecs specs = CsvSpecs.headerless(expectedHeader); + invokeTest(LANGUAGE_EXAMPLE_HEADERLESS_INPUT, specs, expected); + } + + private static final String WHITESPACE_NO_QUOTES = "" + + "Sym,Type,Price,SecurityId\n" + + "GOOG, Dividend, 0.25, 200\n" + + "T, Dividend, 0.15, 300\n" + + " Z, Dividend, 0.18, 500\n"; - public static CsvTest whitespaceNoQuotes() { - final NewTable expected = ColumnHeader.of( - ColumnHeader.ofString("Sym"), - ColumnHeader.ofString("Type"), - ColumnHeader.ofDouble("Price"), - ColumnHeader.ofShort("SecurityId")) - .row("GOOG", "Dividend", 0.25, (short) 200) - .row("T", "Dividend", 0.15, (short) 300) - .row("Z", "Dividend", 0.18, (short) 500) - .newTable(); + @Test + public void whitespaceNoQuotes() { + final Table expected = TableTools.newTable( + TableTools.col("Sym", "GOOG", "T", "Z"), + TableTools.col("Type", "Dividend", "Dividend", "Dividend"), + TableTools.col("Price", 0.25, 0.15, 0.18), + TableTools.col("SecurityId", (short) 200, (short) 300, (short) 500) + ); final CsvSpecs specs = CsvSpecs.csv(); - return new CsvTest("whitespaceNoQuotes", "whitespace-no-quotes.csv", specs, expected); - } - - public static CsvTest whitespaceNoQuotesLiteral() { - final NewTable expected = ColumnHeader.of( - ColumnHeader.ofString("Sym"), - ColumnHeader.ofString("Type"), - ColumnHeader.ofString("Price"), - ColumnHeader.ofString("SecurityId")) - .row("GOOG", " Dividend", " 0.25", " 200") - .row("T", " Dividend", " 0.15", " 300") - .row(" Z", " Dividend", " 0.18", " 500") - .newTable(); + invokeTest(WHITESPACE_NO_QUOTES, specs, expected); + } + + @Test + public void whitespaceNoQuotesLiteral() { + final Table expected = TableTools.newTable( + TableTools.col("Sym", "GOOG", "T", " Z"), + TableTools.col("Type", " Dividend", " Dividend", " Dividend"), + TableTools.col("Price", 0.25, 0.15, 0.18), + TableTools.col("SecurityId", (short) 200, (short) 300, (short) 500) + ); final CsvSpecs specs = CsvSpecs.builder().ignoreSurroundingSpaces(false).build(); - return new CsvTest("whitespaceNoQuotesLiteral", "whitespace-no-quotes.csv", specs, expected); - } - - public static CsvTest whitespaceOutside() { - final NewTable expected = ColumnHeader.of( - ColumnHeader.ofString("Sym"), - ColumnHeader.ofString("Type"), - ColumnHeader.ofDouble("Price"), - ColumnHeader.ofShort("SecurityId")) - .row("GOOG", "Dividend", 0.25, (short) 200) - .row("T", "Dividend", 0.15, (short) 300) - .row("Z", "Dividend", 0.18, (short) 500) - .newTable(); - final CsvSpecs specs = CsvSpecs.csv(); - return new CsvTest("whitespaceOutside", "whitespace-outside.csv", specs, expected); - } - - public static CsvTest whitespaceInsideDefault() { - final NewTable expected = ColumnHeader.of( - ColumnHeader.ofString("Sym"), - ColumnHeader.ofString("Type"), - ColumnHeader.ofString("Price"), - ColumnHeader.ofString("SecurityId")) - .row("GOOG", " Dividend", " 0.25", " 200") - .row("T", " Dividend", " 0.15", " 300") - .row(" Z", " Dividend", " 0.18", " 500") - .newTable(); + invokeTest(WHITESPACE_NO_QUOTES, specs, expected); + } + + @Test + public void whitespaceOutside() { + final String input = ("" + + "Sym,Type,Price,SecurityId\n" + + "`GOOG`, `Dividend`, `0.25`, `200`\n" + + "`T`, `Dividend`, `0.15`, `300`\n" + + " `Z`, `Dividend`, `0.18`, `500`\n") + .replace('`', '"'); // backticks for readability + + final Table expected = TableTools.newTable( + TableTools.col("Sym", "GOOG", "T", "Z"), + TableTools.col("Type", "Dividend", "Dividend", "Dividend"), + TableTools.col("Price", 0.25, 0.15, 0.18), + TableTools.col("SecurityId", (short) 200, (short) 300, (short) 500) + ); final CsvSpecs specs = CsvSpecs.csv(); - return new CsvTest("whitespaceInsideDefault", "whitespace-inside.csv", specs, expected); - } - - public static CsvTest whitespaceInsideTrim() { - final NewTable expected = ColumnHeader.of( - ColumnHeader.ofString("Sym"), - ColumnHeader.ofString("Type"), - ColumnHeader.ofDouble("Price"), - ColumnHeader.ofShort("SecurityId")) - .row("GOOG", "Dividend", 0.25, (short) 200) - .row("T", "Dividend", 0.15, (short) 300) - .row("Z", "Dividend", 0.18, (short) 500) - .newTable(); - final CsvSpecs specs = CsvSpecs.builder().trim(true).build(); - return new CsvTest("whitespaceInsideTrim", "whitespace-inside.csv", specs, expected); - } - - public static CsvTest whitespaceInsideAndOutsideDefault() { - final NewTable expected = ColumnHeader.of( - ColumnHeader.ofString("Sym"), - ColumnHeader.ofString("Type"), - ColumnHeader.ofString("Price"), - ColumnHeader.ofString("SecurityId")) - .row("GOOG", " Dividend", " 0.25", " 200") - .row("T", " Dividend", " 0.15", " 300") - .row(" Z", " Dividend", " 0.18", " 500") - .newTable(); + invokeTest(input, specs, expected); + } + + private static final String WHITESPACE_INSIDE = ("" + + "Sym,Type,Price,SecurityId\n" + + "`GOOG`,` Dividend`,` 0.25`,` 200`\n" + + "`T`,` Dividend`,` 0.15`,` 300`\n" + + "` Z`,` Dividend`,` 0.18`,` 500`\n") + .replace('`', '"'); // backticks for readability + + @Test + public void whitespaceInsideDefault() { + final Table expected = TableTools.newTable( + TableTools.col("Sym", "GOOG", "T", " Z"), + TableTools.col("Type", " Dividend", " Dividend", " Dividend"), + TableTools.col("Price", 0.25, 0.15, 0.18), + TableTools.col("SecurityId", (short) 200, (short) 300, (short) 500) + ); final CsvSpecs specs = CsvSpecs.csv(); - return new CsvTest("whitespaceInsideAndOutsideDefault", "whitespace-inside-and-outside.csv", specs, expected); - } - - public static CsvTest whitespaceInsideAndOutsideTrim() { - final NewTable expected = ColumnHeader.of( - ColumnHeader.ofString("Sym"), - ColumnHeader.ofString("Type"), - ColumnHeader.ofDouble("Price"), - ColumnHeader.ofShort("SecurityId")) - .row("GOOG", "Dividend", 0.25, (short) 200) - .row("T", "Dividend", 0.15, (short) 300) - .row("Z", "Dividend", 0.18, (short) 500) - .newTable(); - final CsvSpecs specs = CsvSpecs.builder().trim(true).build(); - return new CsvTest("whitespaceInsideAndOutsideTrim", "whitespace-inside-and-outside.csv", specs, expected); + invokeTest(WHITESPACE_INSIDE, specs, expected); } - private static NewTable languageCreatorTypeTable() { - return populateLanguageExample(ColumnHeader.ofString("Language") - .header(ColumnHeader.ofString("Creator")) - .header(ColumnHeader.ofString("Type"))); + @Test + public void whitespaceInsideTrim() { + final Table expected = TableTools.newTable( + TableTools.col("Sym", "GOOG", "T", "Z"), + TableTools.col("Type", "Dividend", "Dividend", "Dividend"), + TableTools.col("Price", 0.25, 0.15, 0.18), + TableTools.col("SecurityId", (short) 200, (short) 300, (short) 500) + ); + final CsvSpecs specs = CsvSpecs.builder().trim(true).build(); + invokeTest(WHITESPACE_INSIDE, specs, expected); } - private static NewTable languageCreatorTypeTableHeaderless() { - return populateLanguageExample(ColumnHeader.ofString("Column1") - .header(ColumnHeader.ofString("Column2")) - .header(ColumnHeader.ofString("Column3"))); + private static final String WHITESPACE_INSIDE_AND_OUTSIDE = ("" + + "Sym,Type,Price,SecurityId\n" + + "=GOOG=, = Dividend=, = 0.25=, = 200=\n" + + "=T=, = Dividend=, = 0.15=, = 300=\n" + + "= Z=, = Dividend=, = 0.18=, = 500=\n") + .replace('=', '"'); // using = instead of quotes in this code for readability + + @Test + public void whitespaceInsideAndOutsideDefault() { + final Table expected = TableTools.newTable( + TableTools.col("Sym", "GOOG", "T", " Z"), + TableTools.col("Type", " Dividend", " Dividend", " Dividend"), + TableTools.col("Price", 0.25, 0.15, 0.18), + TableTools.col("SecurityId", (short) 200, (short) 300, (short) 500) + ); + final CsvSpecs specs = CsvSpecs.csv(); + invokeTest(WHITESPACE_INSIDE_AND_OUTSIDE, specs, expected); } - private static NewTable populateLanguageExample(ColumnHeaders3 header) { - return header - .row("C", "Dennis Ritchie", "Compiled") - .row("C++", "Bjarne Stroustrup", "Compiled") - .row("Fortran", "John Backus", "Compiled") - .row("Java", "James Gosling", "Both") - .row("JavaScript", "Brendan Eich", "Interpreted") - .row("MATLAB", "Cleve Moler", "Interpreted") - .row("Pascal", "Niklas Wirth", "Compiled") - .row("Python", "Guido van Rossum", "Interpreted") - .newTable(); + @Test + public void whitespaceInsideAndOutsideTrim() { + final Table expected = TableTools.newTable( + TableTools.col("Sym", "GOOG", "T", "Z"), + TableTools.col("Type", "Dividend", "Dividend", "Dividend"), + TableTools.col("Price", 0.25, 0.15, 0.18), + TableTools.col("SecurityId", (short) 200, (short) 300, (short) 500) + ); + final CsvSpecs specs = CsvSpecs.builder().trim(true).build(); + invokeTest(WHITESPACE_INSIDE_AND_OUTSIDE, specs, expected); + } +// + @Test + public void allNulls() { + final String input = "" + + "Long\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n"; + final Table expected = TableTools.newTable( + new ColumnHolder<>("Long", Long.class, null, false, null, null, null, null, null) + ); + invokeTest(input, + CsvSpecs.builder().putParsers("Long", Parsers.LONG).build(), + expected); } - private final String name; - private final String resourceName; - private final CsvSpecs specs; - private final NewTable expected; - public CsvTest(String name, String resourceName, CsvSpecs specs, NewTable expected) { - this.name = Objects.requireNonNull(name); - this.resourceName = Objects.requireNonNull(resourceName); - this.specs = Objects.requireNonNull(specs); - this.expected = expected; + private static Table languageCreatorTypeTable() { + return populateLanguageExample("Language", "Creator", "Type"); } - Object[] parameterize() { - return new Object[] {name, resourceName, specs, expected}; + private static Table languageCreatorTypeTableHeaderless() { + return populateLanguageExample("Column1", "Column2", "Column3"); } - @Test - public void parseCsv() throws IOException { - final InputStream in = CsvTest.class.getResourceAsStream(resourceName); - if (in == null) { - throw new IllegalArgumentException("Unable to find resource " + resourceName); - } - final NewTable actual; - try (final Reader reader = new InputStreamReader(in, StandardCharsets.UTF_8)) { - actual = specs.parse(reader); - } catch (Parser.ParserException e) { - if (expected == null) { - // expected! - return; + private static Table populateLanguageExample(String col1, String col2, String col3) { + return TableTools.newTable( + TableTools.col(col1, "C", "C++", "Fortran", "Java", + "JavaScript", "MATLAB", "Pascal", "Python"), + TableTools.col(col2, "Dennis Ritchie", "Bjarne Stroustrup", "John Backus", "James Gosling", + "Brendan Eich", "Cleve Moler", "Niklaus Wirth", "Guido van Rossum"), + TableTools.col(col3, "Compiled", "Compiled", "Compiled", "Both", + "Interpreted", "Interpreted", "Compiled", "Interpreted") + ); + } + + private static void invokeTest(String input, CsvSpecs specs, Table expected) { + try { + final Table actual; + try (final Reader reader = new StringReader(input)) { + actual = specs.parse(reader); + } catch (Exception e) { + if (expected == null) { + // expected! + return; + } + throw e; } - throw e; + final String differences = TableTools.diff(actual, expected, 25); + Assertions.assertThat(differences).isEmpty(); + } catch (Exception e) { + throw new RuntimeException(e); } - Assertions.assertThat(actual).isEqualTo(expected); } } diff --git a/extensions/csv/src/test/java/io/deephaven/csv/InferenceTest.java b/extensions/csv/src/test/java/io/deephaven/csv/InferenceTest.java deleted file mode 100644 index 030ff9f4c13..00000000000 --- a/extensions/csv/src/test/java/io/deephaven/csv/InferenceTest.java +++ /dev/null @@ -1,163 +0,0 @@ -package io.deephaven.csv; - -import org.junit.Test; - -import java.util.Arrays; - -import static org.assertj.core.api.Assertions.assertThat; - -public class InferenceTest { - - @Test - public void singleParserNoBackupNoItems() { - noInfer(InferenceSpecs.builder().addParsers(Parser.INT).onNullParser(null).build()); - } - - @Test - public void singleParserBackupNoItems() { - infer(InferenceSpecs.builder().addParsers(Parser.INT).onNullParser(Parser.DOUBLE).build(), Parser.DOUBLE); - } - - @Test - public void singleParserNoBackupNullItems() { - noInfer(InferenceSpecs.builder().addParsers(Parser.INT).onNullParser(null).build(), null, null); - } - - @Test - public void singleParserBackupNullItems() { - infer(InferenceSpecs.builder().addParsers(Parser.INT).onNullParser(Parser.DOUBLE).build(), Parser.DOUBLE, null, - null); - } - - @Test - public void noItems() { - // all null defaults to string type - infer(Parser.STRING); - - // if we use a custom inference specs with null parser, will not infer - noInfer(InferenceSpecs.builder().addParsers(Parser.STRING).onNullParser(null).build()); - } - - @Test - public void allNull() { - // all null defaults to string type - infer(Parser.STRING, null, null, null, null); - - // if we use a custom inference specs with null parser, will not infer - noInfer(InferenceSpecs.builder().addParsers(Parser.STRING).onNullParser(null).build(), null, null, null, null); - } - - @Test - public void mixedType() { - infer(Parser.STRING, "1.0", "1", null, "true", "False"); - } - - @Test - public void stringType() { - infer(Parser.STRING, "this", "should", null, "be", "a", "string"); - } - - @Test - public void boolType() { - infer(Parser.BOOL, "true", null, "True", "false", "False"); - } - - @Test - public void notQuiteBool() { - infer(Parser.STRING, "true", null, "1", "false", "False"); - infer(Parser.STRING, "true", null, "yes", "false", "False"); - infer(Parser.STRING, "true", null, "", "false", "False"); - } - - @Test - public void charType() { - infer(Parser.CHAR, "a", null, "b", "c", "c"); - } - - @Test - public void notQuiteChar() { - infer(Parser.STRING, "a", null, "", "c", "c"); - infer(Parser.STRING, "a", null, "bb", "c", "c"); - } - - @Test - public void shortType() { - infer(Parser.SHORT, "1", "2", null, "-1", String.valueOf(Short.MAX_VALUE)); - } - - @Test - public void notQuiteShort() { - infer(Parser.INT, "1", "2", null, "-1", String.valueOf(Short.MAX_VALUE + 1)); - infer(Parser.STRING, "1", "2", null, "-1", ""); - } - - @Test - public void intType() { - infer(Parser.INT, "1", "2", null, "-1", String.valueOf(Integer.MAX_VALUE)); - } - - @Test - public void notQuiteInt() { - infer(Parser.LONG, "1", "2", null, "-1", String.valueOf(Integer.MAX_VALUE + 1L)); - infer(Parser.STRING, "1", "2", null, "-1", ""); - } - - @Test - public void longType() { - infer(Parser.LONG, "1", "2", null, "-1", String.valueOf(Long.MAX_VALUE)); - } - - @Test - public void notQuiteLong() { - // one more than Long.MAX_VALUE - infer(Parser.DOUBLE, "1", "2", null, "-1", "9223372036854775808"); - infer(Parser.STRING, "1", "2", null, "-1", ""); - } - - @Test - public void doubleType() { - infer(Parser.DOUBLE, "1", "1.1", null, "-1", String.valueOf(Double.MIN_VALUE)); - infer(Parser.DOUBLE, "1", "1.1", null, "-1", String.valueOf(Double.MAX_VALUE)); - infer(Parser.DOUBLE, "1", "1.1", null, "-1", String.valueOf(Double.NEGATIVE_INFINITY)); - infer(Parser.DOUBLE, "1", "1.1", null, "-1", String.valueOf(Double.POSITIVE_INFINITY)); - infer(Parser.DOUBLE, "1", "1.1", null, "-1", String.valueOf(Double.NaN)); - infer(Parser.DOUBLE, "1", "1.1", null, "-1", String.valueOf(Double.MIN_NORMAL)); - } - - @Test - public void notQuiteDouble() { - infer(Parser.STRING, "1", "1.1", null, "-1", ""); - } - - @Test - public void instantType() { - infer(Parser.INSTANT, "2019-08-25T11:34:56.000Z", null, "2021-01-01T09:00:00Z"); - } - - @Test - public void notQuiteInstant() { - infer(Parser.STRING, "2019-08-25T11:34:56.000Z", null, ""); - } - - @Test - public void shortCircuitEvenIfEventuallyIncorrect() { - final InferenceSpecs shortOrInt = InferenceSpecs.builder().addParsers(Parser.SHORT, Parser.INT).build(); - infer(shortOrInt, Parser.INT, "1", "2", Integer.toString(Short.MAX_VALUE + 1), "This is not an int"); - } - - private static void infer(Parser expected, String... values) { - infer(InferenceSpecs.standard(), expected, values); - } - - private static void noInfer(String... values) { - noInfer(InferenceSpecs.standard(), values); - } - - private static void infer(InferenceSpecs specs, Parser expected, String... values) { - assertThat(specs.infer(Arrays.asList(values).iterator())).contains(expected); - } - - private static void noInfer(InferenceSpecs specs, String... values) { - assertThat(specs.infer(Arrays.asList(values).iterator())).isEmpty(); - } -} diff --git a/extensions/csv/src/test/resources/io/deephaven/csv/bools.csv b/extensions/csv/src/test/resources/io/deephaven/csv/bools.csv deleted file mode 100644 index ba641167774..00000000000 --- a/extensions/csv/src/test/resources/io/deephaven/csv/bools.csv +++ /dev/null @@ -1,8 +0,0 @@ -Bool -true -, -false -True -False -TrUe -FALSE diff --git a/extensions/csv/src/test/resources/io/deephaven/csv/byte.csv b/extensions/csv/src/test/resources/io/deephaven/csv/byte.csv deleted file mode 100644 index 7cbc6ff227a..00000000000 --- a/extensions/csv/src/test/resources/io/deephaven/csv/byte.csv +++ /dev/null @@ -1,4 +0,0 @@ -Byte --127 -, -127 diff --git a/extensions/csv/src/test/resources/io/deephaven/csv/chars.csv b/extensions/csv/src/test/resources/io/deephaven/csv/chars.csv deleted file mode 100644 index c7e8f615077..00000000000 --- a/extensions/csv/src/test/resources/io/deephaven/csv/chars.csv +++ /dev/null @@ -1,8 +0,0 @@ -Char -A -, -B -C -1 -2 -3 diff --git a/extensions/csv/src/test/resources/io/deephaven/csv/doubles.csv b/extensions/csv/src/test/resources/io/deephaven/csv/doubles.csv deleted file mode 100644 index 744ff32abd4..00000000000 --- a/extensions/csv/src/test/resources/io/deephaven/csv/doubles.csv +++ /dev/null @@ -1,8 +0,0 @@ -Double -Infinity -, --Infinity -NaN -1.7976931348623157e+308 -2.2250738585072014E-308 -4.9e-324 diff --git a/extensions/csv/src/test/resources/io/deephaven/csv/floats.csv b/extensions/csv/src/test/resources/io/deephaven/csv/floats.csv deleted file mode 100644 index b94fa3f3c27..00000000000 --- a/extensions/csv/src/test/resources/io/deephaven/csv/floats.csv +++ /dev/null @@ -1,8 +0,0 @@ -Float -Infinity -, --Infinity -NaN -3.4028235e+38f -1.17549435E-38f -1.4e-45f diff --git a/extensions/csv/src/test/resources/io/deephaven/csv/int.csv b/extensions/csv/src/test/resources/io/deephaven/csv/int.csv deleted file mode 100644 index e5dc41bb7e0..00000000000 --- a/extensions/csv/src/test/resources/io/deephaven/csv/int.csv +++ /dev/null @@ -1,4 +0,0 @@ -Int --2147483647 -, -2147483647 diff --git a/extensions/csv/src/test/resources/io/deephaven/csv/language-example-headerless.csv b/extensions/csv/src/test/resources/io/deephaven/csv/language-example-headerless.csv deleted file mode 100644 index d1cd45b8b4d..00000000000 --- a/extensions/csv/src/test/resources/io/deephaven/csv/language-example-headerless.csv +++ /dev/null @@ -1,8 +0,0 @@ -C,Dennis Ritchie,Compiled -C++,Bjarne Stroustrup,Compiled -Fortran,John Backus,Compiled -Java,James Gosling,Both -JavaScript,Brendan Eich,Interpreted -MATLAB,Cleve Moler,Interpreted -Pascal,Niklas Wirth,Compiled -Python,Guido van Rossum,Interpreted \ No newline at end of file diff --git a/extensions/csv/src/test/resources/io/deephaven/csv/language-example.csv b/extensions/csv/src/test/resources/io/deephaven/csv/language-example.csv deleted file mode 100644 index ffeeb71d53a..00000000000 --- a/extensions/csv/src/test/resources/io/deephaven/csv/language-example.csv +++ /dev/null @@ -1,9 +0,0 @@ -Language,Creator,Type -C,Dennis Ritchie,Compiled -C++,Bjarne Stroustrup,Compiled -Fortran,John Backus,Compiled -Java,James Gosling,Both -JavaScript,Brendan Eich,Interpreted -MATLAB,Cleve Moler,Interpreted -Pascal,Niklas Wirth,Compiled -Python,Guido van Rossum,Interpreted \ No newline at end of file diff --git a/extensions/csv/src/test/resources/io/deephaven/csv/language-example.tsv b/extensions/csv/src/test/resources/io/deephaven/csv/language-example.tsv deleted file mode 100644 index 4219cecbec4..00000000000 --- a/extensions/csv/src/test/resources/io/deephaven/csv/language-example.tsv +++ /dev/null @@ -1,9 +0,0 @@ -Language Creator Type -C Dennis Ritchie Compiled -C++ Bjarne Stroustrup Compiled -Fortran John Backus Compiled -Java James Gosling Both -JavaScript Brendan Eich Interpreted -MATLAB Cleve Moler Interpreted -Pascal Niklas Wirth Compiled -Python Guido van Rossum Interpreted \ No newline at end of file diff --git a/extensions/csv/src/test/resources/io/deephaven/csv/long.csv b/extensions/csv/src/test/resources/io/deephaven/csv/long.csv deleted file mode 100644 index ff0a0cb279b..00000000000 --- a/extensions/csv/src/test/resources/io/deephaven/csv/long.csv +++ /dev/null @@ -1,4 +0,0 @@ -Long --9223372036854775807 -, -9223372036854775807 diff --git a/extensions/csv/src/test/resources/io/deephaven/csv/short.csv b/extensions/csv/src/test/resources/io/deephaven/csv/short.csv deleted file mode 100644 index 83c29f32203..00000000000 --- a/extensions/csv/src/test/resources/io/deephaven/csv/short.csv +++ /dev/null @@ -1,4 +0,0 @@ -Short --32767 -, -32767 diff --git a/extensions/csv/src/test/resources/io/deephaven/csv/strings-pound.csv b/extensions/csv/src/test/resources/io/deephaven/csv/strings-pound.csv deleted file mode 100644 index b413d43b0b7..00000000000 --- a/extensions/csv/src/test/resources/io/deephaven/csv/strings-pound.csv +++ /dev/null @@ -1,4 +0,0 @@ -String -#Hello, world# -, -Goodbye. \ No newline at end of file diff --git a/extensions/csv/src/test/resources/io/deephaven/csv/strings.csv b/extensions/csv/src/test/resources/io/deephaven/csv/strings.csv deleted file mode 100644 index 61f95587a10..00000000000 --- a/extensions/csv/src/test/resources/io/deephaven/csv/strings.csv +++ /dev/null @@ -1,4 +0,0 @@ -String -"Hello, world" -, -Goodbye. \ No newline at end of file diff --git a/extensions/csv/src/test/resources/io/deephaven/csv/timestamp-legacy.csv b/extensions/csv/src/test/resources/io/deephaven/csv/timestamp-legacy.csv deleted file mode 100644 index 182814c1d68..00000000000 --- a/extensions/csv/src/test/resources/io/deephaven/csv/timestamp-legacy.csv +++ /dev/null @@ -1,4 +0,0 @@ -Timestamp -2021-09-27T19:00:00 UTC -, -2021-09-27T20:00:00 UTC diff --git a/extensions/csv/src/test/resources/io/deephaven/csv/timestamp-micros.csv b/extensions/csv/src/test/resources/io/deephaven/csv/timestamp-micros.csv deleted file mode 100644 index bc73d5759f5..00000000000 --- a/extensions/csv/src/test/resources/io/deephaven/csv/timestamp-micros.csv +++ /dev/null @@ -1,4 +0,0 @@ -Timestamp -1632769200000000 -, -1632772800000000 \ No newline at end of file diff --git a/extensions/csv/src/test/resources/io/deephaven/csv/timestamp-millis.csv b/extensions/csv/src/test/resources/io/deephaven/csv/timestamp-millis.csv deleted file mode 100644 index 44d4fdd5ea4..00000000000 --- a/extensions/csv/src/test/resources/io/deephaven/csv/timestamp-millis.csv +++ /dev/null @@ -1,4 +0,0 @@ -Timestamp -1632769200000 -, -1632772800000 \ No newline at end of file diff --git a/extensions/csv/src/test/resources/io/deephaven/csv/timestamp-mixed.csv b/extensions/csv/src/test/resources/io/deephaven/csv/timestamp-mixed.csv deleted file mode 100644 index 4ba0ce1ad94..00000000000 --- a/extensions/csv/src/test/resources/io/deephaven/csv/timestamp-mixed.csv +++ /dev/null @@ -1,4 +0,0 @@ -Timestamp -1632769200000 -, -1632772800000000 diff --git a/extensions/csv/src/test/resources/io/deephaven/csv/timestamp-nanos.csv b/extensions/csv/src/test/resources/io/deephaven/csv/timestamp-nanos.csv deleted file mode 100644 index 8db502fa0de..00000000000 --- a/extensions/csv/src/test/resources/io/deephaven/csv/timestamp-nanos.csv +++ /dev/null @@ -1,4 +0,0 @@ -Timestamp -1632769200000000000 -, -1632772800000000000 \ No newline at end of file diff --git a/extensions/csv/src/test/resources/io/deephaven/csv/timestamp-seconds.csv b/extensions/csv/src/test/resources/io/deephaven/csv/timestamp-seconds.csv deleted file mode 100644 index bdb1935814f..00000000000 --- a/extensions/csv/src/test/resources/io/deephaven/csv/timestamp-seconds.csv +++ /dev/null @@ -1,4 +0,0 @@ -Timestamp -1632769200 -, -1632772800 \ No newline at end of file diff --git a/extensions/csv/src/test/resources/io/deephaven/csv/timestamp.csv b/extensions/csv/src/test/resources/io/deephaven/csv/timestamp.csv deleted file mode 100644 index 406bd36c6dc..00000000000 --- a/extensions/csv/src/test/resources/io/deephaven/csv/timestamp.csv +++ /dev/null @@ -1,4 +0,0 @@ -Timestamp -2021-09-27T19:00:00Z -, -2021-09-27T20:00:00Z diff --git a/extensions/csv/src/test/resources/io/deephaven/csv/whitespace-inside-and-outside.csv b/extensions/csv/src/test/resources/io/deephaven/csv/whitespace-inside-and-outside.csv deleted file mode 100644 index 7a029354d59..00000000000 --- a/extensions/csv/src/test/resources/io/deephaven/csv/whitespace-inside-and-outside.csv +++ /dev/null @@ -1,4 +0,0 @@ -Sym,Type,Price,SecurityId -"GOOG", " Dividend", " 0.25", " 200" -"T", " Dividend", " 0.15", " 300" -" Z", " Dividend", " 0.18", " 500" \ No newline at end of file diff --git a/extensions/csv/src/test/resources/io/deephaven/csv/whitespace-inside.csv b/extensions/csv/src/test/resources/io/deephaven/csv/whitespace-inside.csv deleted file mode 100644 index 4745ea745b1..00000000000 --- a/extensions/csv/src/test/resources/io/deephaven/csv/whitespace-inside.csv +++ /dev/null @@ -1,4 +0,0 @@ -Sym,Type,Price,SecurityId -"GOOG"," Dividend"," 0.25"," 200" -"T"," Dividend"," 0.15"," 300" -" Z"," Dividend"," 0.18"," 500" \ No newline at end of file diff --git a/extensions/csv/src/test/resources/io/deephaven/csv/whitespace-no-quotes.csv b/extensions/csv/src/test/resources/io/deephaven/csv/whitespace-no-quotes.csv deleted file mode 100644 index 6e3c34c7fd3..00000000000 --- a/extensions/csv/src/test/resources/io/deephaven/csv/whitespace-no-quotes.csv +++ /dev/null @@ -1,4 +0,0 @@ -Sym,Type,Price,SecurityId -GOOG, Dividend, 0.25, 200 -T, Dividend, 0.15, 300 - Z, Dividend, 0.18, 500 \ No newline at end of file diff --git a/extensions/csv/src/test/resources/io/deephaven/csv/whitespace-outside.csv b/extensions/csv/src/test/resources/io/deephaven/csv/whitespace-outside.csv deleted file mode 100644 index 8bff3eac84e..00000000000 --- a/extensions/csv/src/test/resources/io/deephaven/csv/whitespace-outside.csv +++ /dev/null @@ -1,4 +0,0 @@ -Sym,Type,Price,SecurityId -"GOOG", "Dividend", "0.25", "200" -"T", "Dividend", "0.15", "300" - "Z", "Dividend", "0.18", "500" \ No newline at end of file