Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New approach to CSV reading #1629

Merged
merged 1 commit into from
Jan 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package io.deephaven.treetable;

import io.deephaven.csv.CsvTools;
import io.deephaven.csv.util.CsvReaderException;
import io.deephaven.engine.table.Table;
import io.deephaven.engine.table.lang.QueryLibrary;
import io.deephaven.engine.util.TableTools;
Expand All @@ -9,7 +10,6 @@
import io.deephaven.engine.table.impl.QueryTableTestBase;
import org.junit.Test;

import java.io.IOException;
import java.util.BitSet;
import java.util.HashMap;
import java.util.HashSet;
Expand All @@ -18,7 +18,7 @@
import static io.deephaven.treetable.TreeTableConstants.ROOT_TABLE_KEY;

public class SnapshotStateTest extends QueryTableTestBase {
private static Table getRawNyMunis() throws IOException {
private static Table getRawNyMunis() throws CsvReaderException {
QueryLibrary.importStatic(TreeSnapshotQueryTest.StaticHolder.class);

final BaseTable base =
Expand All @@ -33,12 +33,12 @@ private static Table makeNyMunisTreeTableFrom(Table t) {
return t.treeTable("Path", "Direct");
}

private static Table makeNyMunisTreeTable() throws IOException {
private static Table makeNyMunisTreeTable() throws CsvReaderException {
return makeNyMunisTreeTableFrom(getRawNyMunis());
}

@Test
public void testBounds() throws IOException {
public void testBounds() throws CsvReaderException {
final HierarchicalTable treeTable = (HierarchicalTable) makeNyMunisTreeTable();
final Map<Object, TableDetails> details = new HashMap<>();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import io.deephaven.base.Pair;
import io.deephaven.csv.CsvTools;
import io.deephaven.csv.util.CsvReaderException;
import io.deephaven.datastructures.util.SmartKey;
import io.deephaven.engine.table.ColumnDefinition;
import io.deephaven.engine.table.Table;
Expand All @@ -21,7 +22,6 @@
import gnu.trove.map.hash.TIntObjectHashMap;
import gnu.trove.map.hash.TObjectIntHashMap;

import java.io.IOException;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
Expand Down Expand Up @@ -183,7 +183,7 @@ public static List<String> removeEmpty(String... components) {
}
}

private static Table getRawNyMunis() throws IOException {
private static Table getRawNyMunis() throws CsvReaderException {
QueryLibrary.importStatic(StaticHolder.class);

final BaseTable base =
Expand All @@ -202,7 +202,7 @@ private static Table getRawNyMunis() throws IOException {
.lastBy("Path");
}

private static Table makeNyMunisTreeTable() throws IOException {
private static Table makeNyMunisTreeTable() throws CsvReaderException {
return makeNyMunisTreeTableFrom(getRawNyMunis());
}

Expand All @@ -218,7 +218,7 @@ private static List<String> munisKey(String... path) {

// region Actual Tests

public void testTsq() throws IOException {
public void testTsq() throws CsvReaderException {
final Table t = makeNyMunisTreeTable();
final TTState state = new TTState(t);
final BitSet allColumns = new BitSet(t.getColumns().length);
Expand Down Expand Up @@ -456,7 +456,7 @@ public void testTsq() throws IOException {
assertFalse(state.expansionMap.containsKey(mayfieldKey));
}

public void testSortandFilter() throws IOException {
public void testSortandFilter() throws CsvReaderException {
final Table t = makeNyMunisTreeTable();
final TTState state = new TTState(t);
final BitSet allColumns = new BitSet(t.getColumns().length);
Expand Down
9 changes: 3 additions & 6 deletions Integrations/python/deephaven/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
_JCsvSpecs = None
_JInferenceSpecs = None
_JTableHeader = None
_JCharset = None
_JCsvTools = None


Expand Down Expand Up @@ -55,7 +54,7 @@ def _defineSymbols():
if not jpy.has_jvm():
raise SystemError("No java functionality can be used until the JVM has been initialized through the jpy module")

global _JCsvHelpers, _JCsvSpecs, _JInferenceSpecs, _JTableHeader, _JCharset, _JCsvTools, \
global _JCsvHelpers, _JCsvSpecs, _JInferenceSpecs, _JTableHeader, _JCsvTools, \
INFERENCE_STRINGS, INFERENCE_MINIMAL, INFERENCE_STANDARD, INFERENCE_STANDARD_TIMES

if _JCsvHelpers is None:
Expand All @@ -64,7 +63,6 @@ def _defineSymbols():
_JCsvSpecs = jpy.get_type("io.deephaven.csv.CsvSpecs")
_JInferenceSpecs = jpy.get_type("io.deephaven.csv.InferenceSpecs")
_JTableHeader = jpy.get_type("io.deephaven.qst.table.TableHeader")
_JCharset = jpy.get_type("java.nio.charset.Charset")
_JCsvTools = jpy.get_type("io.deephaven.csv.CsvTools")

INFERENCE_STRINGS = _JInferenceSpecs.strings()
Expand Down Expand Up @@ -116,7 +114,7 @@ def read(path: str,
Args:
path (str): a file path or a URL string
header (Dict[str, DataType]): a dict to define the table columns with key being the name, value being the data type
inference (csv.Inference): an Enum value specifying the rules for data type inference, default is INFERENCE_STANDARD_TIMES
inference (csv.Inference): an Enum value specifying the rules for data type inference, default is INFERENCE_STANDARD
headless (bool): indicates if the CSV data is headless, default is False
delimiter (str): the delimiter used by the CSV, default is the comma
quote (str): the quote character for the CSV, default is double quote
Expand All @@ -133,7 +131,7 @@ def read(path: str,
"""

if inference is None:
inference = INFERENCE_STANDARD_TIMES
inference = INFERENCE_STANDARD

csv_specs_builder = _JCsvSpecs.builder()

Expand All @@ -148,7 +146,6 @@ def read(path: str,
.quote(ord(quote))
.ignoreSurroundingSpaces(ignore_surrounding_spaces)
.trim(trim)
.charset(_JCharset.forName(charset))
.build())

return _JCsvHelpers.readCsv(path, csv_specs)
Expand Down
6 changes: 3 additions & 3 deletions Integrations/python/test/data/test_csv.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ String,Long,Float
a ,9223372036854775807,
b,,-Infinity
c,-9223372036854775807,NaN
"d 'c " ,9999999,3.4028235e+38f
null,-0,1.17549435E-38f
"null",0,1.4e-45f
"d 'c " ,9999999,3.4028234e+38
null,-0,1.17549435E-38
"null",0,1.4e-45
7 changes: 0 additions & 7 deletions Integrations/python/test/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,6 @@ def test_read_error_col_type(self):
with self.assertRaises(Exception) as cm:
t = read_csv('test/data/test_csv.csv', header=table_header)

def test_read_error_charset(self):
col_names = ["Strings", "Longs", "Floats"]
col_types = [Types.string, Types.float_, Types.int64]
table_header = {k: v for k, v in zip(col_names, col_types)}
with self.assertRaises(Exception) as cm:
t = read_csv('test/data/test_csv.csv', header=table_header, charset='abc')

def test_read_error_quote(self):
col_names = ["Strings", "Longs", "Floats"]
col_types = [Types.string, Types.int64, Types.float_]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ public static InMemoryTable from(NewTable table) {
columns);
}

public static InMemoryTable from(TableDefinition definition, TrackingRowSet rowSet,
Map<String, ? extends ColumnSource<?>> columns) {
return new InMemoryTable(definition, rowSet, columns);
}

public InMemoryTable(String[] columnNames, Object[] arrayValues) {
super(RowSetFactory.flat(Array.getLength(arrayValues[0])).toTracking(),
createColumnsMap(columnNames, arrayValues));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,9 @@ public void testTypesAndNameLegalization() throws IOException {
TestCase.assertEquals(3, results.get(1).getPartitionKeys().size());
TestCase.assertEquals(3, results.get(2).getPartitionKeys().size());

TestCase.assertEquals(Short.valueOf((short) 1), results.get(0).getPartitionValue("A"));
TestCase.assertEquals(Short.valueOf((short) 1), results.get(1).getPartitionValue("A"));
TestCase.assertEquals(Short.valueOf((short) 2), results.get(2).getPartitionValue("A"));
TestCase.assertEquals(Integer.valueOf(1), results.get(0).getPartitionValue("A"));
TestCase.assertEquals(Integer.valueOf(1), results.get(1).getPartitionValue("A"));
TestCase.assertEquals(Integer.valueOf(2), results.get(2).getPartitionValue("A"));

TestCase.assertEquals(7.0, results.get(0).getPartitionValue("B1"));
TestCase.assertEquals(100.0, results.get(1).getPartitionValue("B1"));
Expand Down
Loading