Skip to content

Commit

Permalink
[Python] add tests for writing and reading float16 with Parquet to Py…
Browse files Browse the repository at this point in the history
…Arrow
  • Loading branch information
anjakefala committed Aug 2, 2023
1 parent 8d225a4 commit 622a08e
Show file tree
Hide file tree
Showing 6 changed files with 12 additions and 2 deletions.
2 changes: 1 addition & 1 deletion python/pyarrow/tests/interchange/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ def test_pandas_roundtrip_datetime(unit):

@pytest.mark.pandas
@pytest.mark.parametrize(
"np_float", [np.float32, np.float64]
"np_float", [np.float32, np.float64] # float16 operations not yet supported
)
def test_pandas_to_pyarrow_with_missing(np_float):
if Version(pd.__version__) < Version("1.5.0"):
Expand Down
2 changes: 2 additions & 0 deletions python/pyarrow/tests/parquet/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ def _test_dataframe(size=10000, seed=0):
'int16': _random_integers(size, np.int16),
'int32': _random_integers(size, np.int32),
'int64': _random_integers(size, np.int64),
'float16': np.arange(size, dtype=np.float16),
'float32': np.random.randn(size).astype(np.float32),
'float64': np.arange(size, dtype=np.float64),
'bool': np.random.randn(size) > 0,
Expand Down Expand Up @@ -169,6 +170,7 @@ def alltypes_sample(size=10000, seed=0, categorical=False):
'int16': np.arange(size, dtype=np.int16),
'int32': np.arange(size, dtype=np.int32),
'int64': np.arange(size, dtype=np.int64),
'float16': np.arange(size, dtype=np.float16),
'float32': np.arange(size, dtype=np.float32),
'float64': np.arange(size, dtype=np.float64),
'bool': np.random.randn(size) > 0,
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/tests/parquet/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def test_set_dictionary_pagesize_limit(use_legacy_dataset):

@pytest.mark.pandas
@parametrize_legacy_dataset
def test_chunked_table_write(use_legacy_dataset):
def test_chunked_table_write(use_legacy_dataset): #
# ARROW-232
tables = []
batch = pa.RecordBatch.from_pandas(alltypes_sample(size=10))
Expand Down
5 changes: 5 additions & 0 deletions python/pyarrow/tests/parquet/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,11 @@ def test_parquet_metadata_lifetime(tempdir):
([-1, 2, 2, None, 4], pa.int16(), 'INT32', -1, 4, 1, 4, 0),
([-1, 2, 2, None, 4], pa.int32(), 'INT32', -1, 4, 1, 4, 0),
([-1, 2, 2, None, 4], pa.int64(), 'INT64', -1, 4, 1, 4, 0),
#(
# [-1.1, 2.2, 2.3, None, 4.4], pa.float16(),
# 'HALFFLOAT', -1.1, 4.4, 1, 4, 0
#),
# float16 operations are not yet implemented
(
[-1.1, 2.2, 2.3, None, 4.4], pa.float32(),
'FLOAT', -1.1, 4.4, 1, 4, 0
Expand Down
2 changes: 2 additions & 0 deletions python/pyarrow/tests/parquet/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ def test_pandas_parquet_pyfile_roundtrip(tempdir, use_legacy_dataset):
size = 5
df = pd.DataFrame({
'int64': np.arange(size, dtype=np.int64),
'float16': np.arange(size, dtype=np.float16),
'float32': np.arange(size, dtype=np.float32),
'float64': np.arange(size, dtype=np.float64),
'bool': np.random.randn(size) > 0,
Expand Down Expand Up @@ -279,6 +280,7 @@ def test_pandas_parquet_configuration_options(tempdir, use_legacy_dataset):
'int16': np.arange(size, dtype=np.int16),
'int32': np.arange(size, dtype=np.int32),
'int64': np.arange(size, dtype=np.int64),
'float16': np.arange(size, dtype=np.float16),
'float32': np.arange(size, dtype=np.float32),
'float64': np.arange(size, dtype=np.float64),
'bool': np.random.randn(size) > 0
Expand Down
1 change: 1 addition & 0 deletions python/pyarrow/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1447,6 +1447,7 @@ def test_cast_integers_unsafe():

def test_floating_point_truncate_safe():
safe_cases = [
# float16 does not support casts yet
(np.array([1.0, 2.0, 3.0], dtype='float32'), 'float32',
np.array([1, 2, 3], dtype='i4'), pa.int32()),
(np.array([1.0, 2.0, 3.0], dtype='float64'), 'float64',
Expand Down

0 comments on commit 622a08e

Please sign in to comment.