Skip to content

Commit

Permalink
deprecate timestamps_to_ms in .from_pandas()
Browse files Browse the repository at this point in the history
  • Loading branch information
jreback committed Sep 7, 2017
1 parent 6e5f7be commit 382592f
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 48 deletions.
2 changes: 2 additions & 0 deletions python/pyarrow/array.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ cdef class Array:
compatibility with other functionality like Parquet I/O which
only supports milliseconds.
.. deprecated:: 0.7.0
memory_pool: MemoryPool, optional
Specific memory pool to use to allocate the resulting Arrow array.
Expand Down
3 changes: 3 additions & 0 deletions python/pyarrow/table.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -723,6 +723,9 @@ cdef class Table:
Convert datetime columns to ms resolution. This is needed for
compability with other functionality like Parquet I/O which
only supports milliseconds.
.. deprecated:: 0.7.0
schema : pyarrow.Schema, optional
The expected schema of the Arrow Table. This can be used to
indicate the type of columns if we cannot infer it automatically.
Expand Down
62 changes: 14 additions & 48 deletions python/pyarrow/tests/test_convert_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,10 @@ def tearDown(self):
pass

def _check_pandas_roundtrip(self, df, expected=None, nthreads=1,
timestamps_to_ms=False, expected_schema=None,
expected_schema=None,
check_dtype=True, schema=None,
check_index=False):
table = pa.Table.from_pandas(df, timestamps_to_ms=timestamps_to_ms,
table = pa.Table.from_pandas(df,
schema=schema, preserve_index=check_index)
result = table.to_pandas(nthreads=nthreads)
if expected_schema:
Expand All @@ -92,9 +92,8 @@ def _check_series_roundtrip(self, s, type_=None):
tm.assert_series_equal(s, result)

def _check_array_roundtrip(self, values, expected=None, mask=None,
timestamps_to_ms=False, type=None):
arr = pa.Array.from_pandas(values, timestamps_to_ms=timestamps_to_ms,
mask=mask, type=type)
type=None):
arr = pa.Array.from_pandas(values, mask=mask, type=type)
result = arr.to_pandas()

values_nulls = pd.isnull(values)
Expand Down Expand Up @@ -332,21 +331,6 @@ def test_fixed_size_bytes_does_not_accept_varying_lengths(self):
pa.Table.from_pandas(df, schema=schema)

def test_timestamps_notimezone_no_nulls(self):
df = pd.DataFrame({
'datetime64': np.array([
'2007-07-13T01:23:34.123',
'2006-01-13T12:34:56.432',
'2010-08-13T05:46:57.437'],
dtype='datetime64[ms]')
})
field = pa.field('datetime64', pa.timestamp('ms'))
schema = pa.schema([field])
self._check_pandas_roundtrip(
df,
timestamps_to_ms=True,
expected_schema=schema,
)

df = pd.DataFrame({
'datetime64': np.array([
'2007-07-13T01:23:34.123456789',
Expand All @@ -357,36 +341,24 @@ def test_timestamps_notimezone_no_nulls(self):
field = pa.field('datetime64', pa.timestamp('ns'))
schema = pa.schema([field])
self._check_pandas_roundtrip(
df, expected_schema=schema,
df,
expected_schema=schema,
)

def test_timestamps_to_ms_explicit_schema(self):
# ARROW-1328
df = pd.DataFrame({'datetime': [datetime(2017, 1, 1)]})
pa_type = pa.from_numpy_dtype(df['datetime'].dtype)

arr = pa.Array.from_pandas(df['datetime'], type=pa_type,
timestamps_to_ms=True)
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
arr = pa.Array.from_pandas(df['datetime'], type=pa_type,
timestamps_to_ms=True)

tm.assert_almost_equal(df['datetime'].values.astype('M8[ms]'),
arr.to_pandas())

def test_timestamps_notimezone_nulls(self):
df = pd.DataFrame({
'datetime64': np.array([
'2007-07-13T01:23:34.123',
None,
'2010-08-13T05:46:57.437'],
dtype='datetime64[ms]')
})
field = pa.field('datetime64', pa.timestamp('ms'))
schema = pa.schema([field])
self._check_pandas_roundtrip(
df,
timestamps_to_ms=True,
expected_schema=schema,
)

df = pd.DataFrame({
'datetime64': np.array([
'2007-07-13T01:23:34.123456789',
Expand All @@ -397,7 +369,8 @@ def test_timestamps_notimezone_nulls(self):
field = pa.field('datetime64', pa.timestamp('ns'))
schema = pa.schema([field])
self._check_pandas_roundtrip(
df, expected_schema=schema,
df,
expected_schema=schema,
)

def test_timestamps_with_timezone(self):
Expand All @@ -410,7 +383,7 @@ def test_timestamps_with_timezone(self):
})
df['datetime64'] = (df['datetime64'].dt.tz_localize('US/Eastern')
.to_frame())
self._check_pandas_roundtrip(df, timestamps_to_ms=True)
self._check_pandas_roundtrip(df)

self._check_series_roundtrip(df['datetime64'])

Expand All @@ -425,15 +398,8 @@ def test_timestamps_with_timezone(self):
})
df['datetime64'] = (df['datetime64'].dt.tz_localize('US/Eastern')
.to_frame())
self._check_pandas_roundtrip(df)

def test_timestamp_with_tz_to_pandas_type(self):
from pyarrow.compat import DatetimeTZDtype

tz = 'America/Los_Angeles'
t = pa.timestamp('ns', tz=tz)

assert t.to_pandas_dtype() == DatetimeTZDtype('ns', tz=tz)
self._check_pandas_roundtrip(df)

def test_date_infer(self):
df = pd.DataFrame({
Expand Down

0 comments on commit 382592f

Please sign in to comment.