Skip to content

Commit

Permalink
BUG: Fix index type casting in read_json with orient='table' and floa…
Browse files Browse the repository at this point in the history
…t index (#25433) (#25434)
  • Loading branch information
albertvillanova authored and jreback committed Feb 28, 2019
1 parent f04342a commit e9de5f3
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 19 deletions.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,8 @@ I/O

- Fixed bug in missing text when using :meth:`to_clipboard` if copying utf-16 characters in Python 3 on Windows (:issue:`25040`)
- Bug in :func:`read_json` for ``orient='table'`` when it tries to infer dtypes by default, which is not applicable as dtypes are already defined in the JSON schema (:issue:`21345`)
- Bug in :func:`read_json` for ``orient='table'`` and float index, as it infers index dtype by default, which is not applicable because index dtype is already defined in the JSON schema (:issue:`25433`)
- Bug in :func:`read_json` for ``orient='table'`` and string of float column names, as it makes a column name type conversion to Timestamp, which is not applicable because column names are already defined in the JSON schema (:issue:`25435`)
-
-
-
Expand Down
28 changes: 20 additions & 8 deletions pandas/io/json/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def _write(self, obj, orient, double_precision, ensure_ascii,


def read_json(path_or_buf=None, orient=None, typ='frame', dtype=None,
convert_axes=True, convert_dates=True, keep_default_dates=True,
convert_axes=None, convert_dates=True, keep_default_dates=True,
numpy=False, precise_float=False, date_unit=None, encoding=None,
lines=False, chunksize=None, compression='infer'):
"""
Expand Down Expand Up @@ -277,18 +277,25 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=None,
'table' as an allowed value for the ``orient`` argument
typ : type of object to recover (series or frame), default 'frame'
dtype : boolean or dict, default True
dtype : boolean or dict, default None
If True, infer dtypes; if a dict of column to dtype, then use those;
if False, then don't infer dtypes at all, applies only to the data.
Not applicable with ``orient='table'``.
For all ``orient`` values except ``'table'``, default is True.
.. versionchanged:: 0.25
.. versionchanged:: 0.25.0
Not applicable with ``orient='table'``.
Not applicable for ``orient='table'``.
convert_axes : boolean, default True
convert_axes : boolean, default None
Try to convert the axes to the proper dtypes.
For all ``orient`` values except ``'table'``, default is True.
.. versionchanged:: 0.25.0
Not applicable for ``orient='table'``.
convert_dates : boolean, default True
List of columns to parse for dates; If True, then try to parse
datelike columns default is True; a column label is datelike if
Expand Down Expand Up @@ -417,8 +424,13 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=None,

if orient == 'table' and dtype:
raise ValueError("cannot pass both dtype and orient='table'")
if orient == 'table' and convert_axes:
raise ValueError("cannot pass both convert_axes and orient='table'")

dtype = orient != 'table' if dtype is None else dtype
if dtype is None and orient != 'table':
dtype = True
if convert_axes is None and orient != 'table':
convert_axes = True

compression = _infer_compression(path_or_buf, compression)
filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer(
Expand Down Expand Up @@ -692,7 +704,7 @@ def _try_convert_data(self, name, data, use_dtypes=True,

# don't try to coerce, unless a force conversion
if use_dtypes:
if self.dtype is False:
if not self.dtype:
return data, False
elif self.dtype is True:
pass
Expand Down
11 changes: 2 additions & 9 deletions pandas/tests/io/json/test_json_table_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,17 +564,10 @@ def test_multiindex(self, index_names):
result = pd.read_json(out, orient="table")
tm.assert_frame_equal(df, result)

@pytest.mark.parametrize("strict_check", [
pytest.param(True, marks=pytest.mark.xfail),
False
])
def test_empty_frame_roundtrip(self, strict_check):
def test_empty_frame_roundtrip(self):
# GH 21287
df = pd.DataFrame([], columns=['a', 'b', 'c'])
expected = df.copy()
out = df.to_json(orient='table')
result = pd.read_json(out, orient='table')
# TODO: When DF coercion issue (#21345) is resolved tighten type checks
tm.assert_frame_equal(expected, result,
check_dtype=strict_check,
check_index_type=strict_check)
tm.assert_frame_equal(expected, result)
23 changes: 21 additions & 2 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def _check_orient(df, orient, dtype=None, numpy=False,
else:
unser = unser.sort_index()

if dtype is False:
if not dtype:
check_dtype = False

if not convert_axes and df.index.dtype.type == np.datetime64:
Expand Down Expand Up @@ -1202,6 +1202,16 @@ def test_data_frame_size_after_to_json(self):

assert size_before == size_after

@pytest.mark.parametrize('index', [None, [1, 2], [1., 2.], ['a', 'b'],
['1', '2'], ['1.', '2.']])
@pytest.mark.parametrize('columns', [['a', 'b'], ['1', '2'], ['1.', '2.']])
def test_from_json_to_json_table_index_and_columns(self, index, columns):
# GH25433 GH25435
expected = DataFrame([[1, 2], [3, 4]], index=index, columns=columns)
dfjson = expected.to_json(orient='table')
result = pd.read_json(dfjson, orient='table')
assert_frame_equal(result, expected)

def test_from_json_to_json_table_dtypes(self):
# GH21345
expected = pd.DataFrame({'a': [1, 2], 'b': [3., 4.], 'c': ['5', '6']})
Expand All @@ -1214,9 +1224,18 @@ def test_read_json_table_dtype_raises(self, dtype):
# GH21345
df = pd.DataFrame({'a': [1, 2], 'b': [3., 4.], 'c': ['5', '6']})
dfjson = df.to_json(orient='table')
with pytest.raises(ValueError):
msg = "cannot pass both dtype and orient='table'"
with pytest.raises(ValueError, match=msg):
pd.read_json(dfjson, orient='table', dtype=dtype)

def test_read_json_table_convert_axes_raises(self):
# GH25433 GH25435
df = DataFrame([[1, 2], [3, 4]], index=[1., 2.], columns=['1.', '2.'])
dfjson = df.to_json(orient='table')
msg = "cannot pass both convert_axes and orient='table'"
with pytest.raises(ValueError, match=msg):
pd.read_json(dfjson, orient='table', convert_axes=True)

@pytest.mark.parametrize('data, expected', [
(DataFrame([[1, 2], [4, 5]], columns=['a', 'b']),
{'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}),
Expand Down

0 comments on commit e9de5f3

Please sign in to comment.