Skip to content

Commit

Permalink
New approach for CSV reading
Browse files Browse the repository at this point in the history
  • Loading branch information
kosak committed Jan 5, 2022
1 parent 7c31c52 commit 52ee70c
Show file tree
Hide file tree
Showing 86 changed files with 10,381 additions and 1,697 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package io.deephaven.treetable;

import io.deephaven.csv.CsvTools;
import io.deephaven.csv.util.CsvReaderException;
import io.deephaven.engine.table.Table;
import io.deephaven.engine.table.lang.QueryLibrary;
import io.deephaven.engine.util.TableTools;
Expand All @@ -9,7 +10,6 @@
import io.deephaven.engine.table.impl.QueryTableTestBase;
import org.junit.Test;

import java.io.IOException;
import java.util.BitSet;
import java.util.HashMap;
import java.util.HashSet;
Expand All @@ -18,7 +18,7 @@
import static io.deephaven.treetable.TreeTableConstants.ROOT_TABLE_KEY;

public class SnapshotStateTest extends QueryTableTestBase {
private static Table getRawNyMunis() throws IOException {
private static Table getRawNyMunis() throws CsvReaderException {
QueryLibrary.importStatic(TreeSnapshotQueryTest.StaticHolder.class);

final BaseTable base =
Expand All @@ -33,12 +33,12 @@ private static Table makeNyMunisTreeTableFrom(Table t) {
return t.treeTable("Path", "Direct");
}

private static Table makeNyMunisTreeTable() throws IOException {
private static Table makeNyMunisTreeTable() throws CsvReaderException {
return makeNyMunisTreeTableFrom(getRawNyMunis());
}

@Test
public void testBounds() throws IOException {
public void testBounds() throws CsvReaderException {
final HierarchicalTable treeTable = (HierarchicalTable) makeNyMunisTreeTable();
final Map<Object, TableDetails> details = new HashMap<>();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import io.deephaven.base.Pair;
import io.deephaven.csv.CsvTools;
import io.deephaven.csv.util.CsvReaderException;
import io.deephaven.datastructures.util.SmartKey;
import io.deephaven.engine.table.ColumnDefinition;
import io.deephaven.engine.table.Table;
Expand All @@ -21,7 +22,6 @@
import gnu.trove.map.hash.TIntObjectHashMap;
import gnu.trove.map.hash.TObjectIntHashMap;

import java.io.IOException;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
Expand Down Expand Up @@ -183,7 +183,7 @@ public static List<String> removeEmpty(String... components) {
}
}

private static Table getRawNyMunis() throws IOException {
private static Table getRawNyMunis() throws CsvReaderException {
QueryLibrary.importStatic(StaticHolder.class);

final BaseTable base =
Expand All @@ -202,7 +202,7 @@ private static Table getRawNyMunis() throws IOException {
.lastBy("Path");
}

private static Table makeNyMunisTreeTable() throws IOException {
private static Table makeNyMunisTreeTable() throws CsvReaderException {
return makeNyMunisTreeTableFrom(getRawNyMunis());
}

Expand All @@ -218,7 +218,7 @@ private static List<String> munisKey(String... path) {

// region Actual Tests

public void testTsq() throws IOException {
public void testTsq() throws CsvReaderException {
final Table t = makeNyMunisTreeTable();
final TTState state = new TTState(t);
final BitSet allColumns = new BitSet(t.getColumns().length);
Expand Down Expand Up @@ -456,7 +456,7 @@ public void testTsq() throws IOException {
assertFalse(state.expansionMap.containsKey(mayfieldKey));
}

public void testSortandFilter() throws IOException {
public void testSortandFilter() throws CsvReaderException {
final Table t = makeNyMunisTreeTable();
final TTState state = new TTState(t);
final BitSet allColumns = new BitSet(t.getColumns().length);
Expand Down
9 changes: 3 additions & 6 deletions Integrations/python/deephaven/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
_JCsvSpecs = None
_JInferenceSpecs = None
_JTableHeader = None
_JCharset = None
_JCsvTools = None


Expand Down Expand Up @@ -55,7 +54,7 @@ def _defineSymbols():
if not jpy.has_jvm():
raise SystemError("No java functionality can be used until the JVM has been initialized through the jpy module")

global _JCsvHelpers, _JCsvSpecs, _JInferenceSpecs, _JTableHeader, _JCharset, _JCsvTools, \
global _JCsvHelpers, _JCsvSpecs, _JInferenceSpecs, _JTableHeader, _JCsvTools, \
INFERENCE_STRINGS, INFERENCE_MINIMAL, INFERENCE_STANDARD, INFERENCE_STANDARD_TIMES

if _JCsvHelpers is None:
Expand All @@ -64,7 +63,6 @@ def _defineSymbols():
_JCsvSpecs = jpy.get_type("io.deephaven.csv.CsvSpecs")
_JInferenceSpecs = jpy.get_type("io.deephaven.csv.InferenceSpecs")
_JTableHeader = jpy.get_type("io.deephaven.qst.table.TableHeader")
_JCharset = jpy.get_type("java.nio.charset.Charset")
_JCsvTools = jpy.get_type("io.deephaven.csv.CsvTools")

INFERENCE_STRINGS = _JInferenceSpecs.strings()
Expand Down Expand Up @@ -116,7 +114,7 @@ def read(path: str,
Args:
path (str): a file path or a URL string
header (Dict[str, DataType]): a dict to define the table columns with key being the name, value being the data type
inference (csv.Inference): an Enum value specifying the rules for data type inference, default is INFERENCE_STANDARD_TIMES
inference (csv.Inference): an Enum value specifying the rules for data type inference, default is INFERENCE_STANDARD
headless (bool): indicates if the CSV data is headless, default is False
delimiter (str): the delimiter used by the CSV, default is the comma
quote (str): the quote character for the CSV, default is double quote
Expand All @@ -133,7 +131,7 @@ def read(path: str,
"""

if inference is None:
inference = INFERENCE_STANDARD_TIMES
inference = INFERENCE_STANDARD

csv_specs_builder = _JCsvSpecs.builder()

Expand All @@ -148,7 +146,6 @@ def read(path: str,
.quote(ord(quote))
.ignoreSurroundingSpaces(ignore_surrounding_spaces)
.trim(trim)
.charset(_JCharset.forName(charset))
.build())

return _JCsvHelpers.readCsv(path, csv_specs)
Expand Down
2 changes: 1 addition & 1 deletion Integrations/python/test/data/test_csv.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ String,Long,Float
a ,9223372036854775807,
b,,-Infinity
c,-9223372036854775807,NaN
"d 'c " ,9999999,3.4028235e+38f
"d 'c " ,9999999,3.4028234e+38f
null,-0,1.17549435E-38f
"null",0,1.4e-45f
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ public static InMemoryTable from(NewTable table) {
columns);
}

public static InMemoryTable from(TableDefinition definition, TrackingRowSet rowSet,
Map<String, ? extends ColumnSource<?>> columns) {
return new InMemoryTable(definition, rowSet, columns);
}

public InMemoryTable(String[] columnNames, Object[] arrayValues) {
super(RowSetFactory.flat(Array.getLength(arrayValues[0])).toTracking(),
createColumnsMap(columnNames, arrayValues));
Expand Down
Loading

0 comments on commit 52ee70c

Please sign in to comment.