Skip to content

Commit

Permalink
Updating the API to support an optional 'includeMetadata' field.
Browse files Browse the repository at this point in the history
  • Loading branch information
Mike Pigott committed Dec 5, 2018
1 parent 5af1b5b commit 523387f
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,11 @@
*/
public class JdbcToArrow {

public static final String SQL_CATALOG_NAME_KEY = "SQL_CATALOG_NAME";
public static final String SQL_TABLE_NAME_KEY = "SQL_TABLE_NAME";
public static final String SQL_COLUMN_NAME_KEY = "SQL_COLUMN_NAME";
public static final String SQL_TYPE_KEY = "SQL_TYPE";

/**
* For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow objects.
* This method uses the default Calendar instance with default TimeZone and Locale as returned by the JVM.
Expand All @@ -89,7 +94,8 @@ public static VectorSchemaRoot sqlToArrow(Connection connection, String query, B
Preconditions.checkArgument(query != null && query.length() > 0, "SQL query can not be null or empty");
Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");

return sqlToArrow(connection, query, allocator, Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT));
return sqlToArrow(connection, query, allocator,
Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT), false);
}

/**
Expand All @@ -110,13 +116,42 @@ public static VectorSchemaRoot sqlToArrow(
String query,
BaseAllocator allocator,
Calendar calendar) throws SQLException, IOException {

Preconditions.checkNotNull(connection, "JDBC connection object can not be null");
Preconditions.checkArgument(query != null && query.length() > 0, "SQL query can not be null or empty");
Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");
Preconditions.checkNotNull(calendar, "Calendar object can not be null");

return sqlToArrow(connection, query, allocator, calendar, false);
}

/**
* For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow objects.
*
* @param connection Database connection to be used. This method will not close the passed connection object.
* Since the caller has passed the connection object it's the responsibility of the caller
* to close or return the connection to the pool.
* @param query The DB Query to fetch the data.
* @param allocator Memory allocator
* @param calendar Calendar object to use to handle Date, Time and Timestamp datasets.
* @param includeMetadata Whether to include column information in the schema field metadata.
* @return Arrow Data Objects {@link VectorSchemaRoot}
* @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as
* ResultSet and Statement objects.
*/
public static VectorSchemaRoot sqlToArrow(
Connection connection,
String query,
BaseAllocator allocator,
Calendar calendar,
boolean includeMetadata) throws SQLException, IOException {
Preconditions.checkNotNull(connection, "JDBC connection object can not be null");
Preconditions.checkArgument(query != null && query.length() > 0, "SQL query can not be null or empty");
Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");
Preconditions.checkNotNull(calendar, "Calendar object can not be null");

try (Statement stmt = connection.createStatement()) {
return sqlToArrow(stmt.executeQuery(query), allocator, calendar);
return sqlToArrow(stmt.executeQuery(query), allocator, calendar, includeMetadata);
}
}

Expand Down Expand Up @@ -163,7 +198,7 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar
Preconditions.checkNotNull(calendar, "Calendar object can not be null");

RootAllocator rootAllocator = new RootAllocator(Integer.MAX_VALUE);
VectorSchemaRoot root = sqlToArrow(resultSet, rootAllocator, calendar);
VectorSchemaRoot root = sqlToArrow(resultSet, rootAllocator, calendar, false);

return root;
}
Expand All @@ -177,14 +212,40 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar
* @return Arrow Data Objects {@link VectorSchemaRoot}
* @throws SQLException on error
*/
public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator allocator, Calendar calendar)
public static VectorSchemaRoot sqlToArrow(
ResultSet resultSet,
BaseAllocator allocator,
Calendar calendar)
throws SQLException, IOException {
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");
Preconditions.checkNotNull(calendar, "Calendar object can not be null");

return sqlToArrow(resultSet, allocator, calendar, false);
}

/**
* For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects.
*
* @param resultSet ResultSet to use to fetch the data from underlying database
* @param allocator Memory allocator to use.
* @param calendar Calendar instance to use for Date, Time and Timestamp datasets.
* @param metadata Whether to include column information in the schema field metadata.
* @return Arrow Data Objects {@link VectorSchemaRoot}
* @throws SQLException on error
*/
public static VectorSchemaRoot sqlToArrow(
ResultSet resultSet,
BaseAllocator allocator,
Calendar calendar,
boolean includeMetadata)
throws SQLException, IOException {
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");
Preconditions.checkNotNull(calendar, "Calendar object can not be null");

VectorSchemaRoot root = VectorSchemaRoot.create(
JdbcToArrowUtils.jdbcToArrowSchema(resultSet.getMetaData(), calendar), allocator);
JdbcToArrowUtils.jdbcToArrowSchema(resultSet.getMetaData(), calendar, includeMetadata), allocator);
JdbcToArrowUtils.jdbcToArrowVectors(resultSet, root, calendar);
return root;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@
import java.sql.Types;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.arrow.vector.BaseFixedWidthVector;
import org.apache.arrow.vector.BigIntVector;
Expand Down Expand Up @@ -124,7 +126,8 @@ public class JdbcToArrowUtils {
* @return {@link Schema}
* @throws SQLException on error
*/
public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar) throws SQLException {
public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar, boolean includeMetadata)
throws SQLException {

Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null");
Preconditions.checkNotNull(calendar, "Calendar object can't be null");
Expand All @@ -135,35 +138,47 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar
final String columnName = rsmd.getColumnName(i);
final FieldType fieldType;

final Map<String, String> metadata;
if (includeMetadata) {
metadata = new HashMap<String, String>();
metadata.put(JdbcToArrow.SQL_CATALOG_NAME_KEY, rsmd.getCatalogName(i));
metadata.put(JdbcToArrow.SQL_TABLE_NAME_KEY, rsmd.getTableName(i));
metadata.put(JdbcToArrow.SQL_COLUMN_NAME_KEY, columnName);
metadata.put(JdbcToArrow.SQL_TYPE_KEY, rsmd.getColumnTypeName(i));

} else {
metadata = null;
}

switch (rsmd.getColumnType(i)) {
case Types.BOOLEAN:
case Types.BIT:
fieldType = FieldType.nullable(new ArrowType.Bool());
fieldType = new FieldType(true, new ArrowType.Bool(), null, metadata);
break;
case Types.TINYINT:
fieldType = FieldType.nullable(new ArrowType.Int(8, true));
fieldType = new FieldType(true, new ArrowType.Int(8, true), null, metadata);
break;
case Types.SMALLINT:
fieldType = FieldType.nullable(new ArrowType.Int(16, true));
fieldType = new FieldType(true, new ArrowType.Int(16, true), null, metadata);
break;
case Types.INTEGER:
fieldType = FieldType.nullable(new ArrowType.Int(32, true));
fieldType = new FieldType(true, new ArrowType.Int(32, true), null, metadata);
break;
case Types.BIGINT:
fieldType = FieldType.nullable(new ArrowType.Int(64, true));
fieldType = new FieldType(true, new ArrowType.Int(64, true), null, metadata);
break;
case Types.NUMERIC:
case Types.DECIMAL:
int precision = rsmd.getPrecision(i);
int scale = rsmd.getScale(i);
fieldType = FieldType.nullable(new ArrowType.Decimal(precision, scale));
fieldType = new FieldType(true, new ArrowType.Decimal(precision, scale), null, metadata);
break;
case Types.REAL:
case Types.FLOAT:
fieldType = FieldType.nullable(new ArrowType.FloatingPoint(SINGLE));
fieldType = new FieldType(true, new ArrowType.FloatingPoint(SINGLE), null, metadata);
break;
case Types.DOUBLE:
fieldType = FieldType.nullable(new ArrowType.FloatingPoint(DOUBLE));
fieldType = new FieldType(true, new ArrowType.FloatingPoint(DOUBLE), null, metadata);
break;
case Types.CHAR:
case Types.NCHAR:
Expand All @@ -172,24 +187,23 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar
case Types.LONGVARCHAR:
case Types.LONGNVARCHAR:
case Types.CLOB:
fieldType = FieldType.nullable(new ArrowType.Utf8());
fieldType = new FieldType(true, new ArrowType.Utf8(), null, metadata);
break;
case Types.DATE:
fieldType = FieldType.nullable(new ArrowType.Date(DateUnit.MILLISECOND));
fieldType = new FieldType(true, new ArrowType.Date(DateUnit.MILLISECOND), null, metadata);
break;
case Types.TIME:
fieldType = FieldType.nullable(new ArrowType.Time(TimeUnit.MILLISECOND, 32));
fieldType = new FieldType(true, new ArrowType.Time(TimeUnit.MILLISECOND, 32), null, metadata);
break;
case Types.TIMESTAMP:
fieldType = FieldType.nullable(new ArrowType.Timestamp(TimeUnit.MILLISECOND, calendar.getTimeZone().getID()));
fieldType = new FieldType(true, new ArrowType.Timestamp(TimeUnit.MILLISECOND, calendar.getTimeZone().getID()),
null, metadata);
break;
case Types.BINARY:
case Types.VARBINARY:
case Types.LONGVARBINARY:
fieldType = FieldType.nullable(new ArrowType.Binary());
break;
case Types.BLOB:
fieldType = FieldType.nullable(new ArrowType.Binary());
fieldType = new FieldType(true, new ArrowType.Binary(), null, metadata);
break;

case Types.ARRAY:
Expand Down

0 comments on commit 523387f

Please sign in to comment.