Skip to content

Commit

Permalink
Add fix for pandas-dev#59242
Browse files Browse the repository at this point in the history
  • Loading branch information
kastkeepitjumpinlikekangaroos committed Oct 25, 2024
1 parent 68d9dca commit 2244869
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 2 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ MultiIndex

I/O
^^^
-
- Bug in :func:`read_sql` causing an unintended exception when byte data was being converted to string when using the pyarrow dtype_backend (:issue:`59242`)
-

Period
Expand Down
12 changes: 11 additions & 1 deletion pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -970,7 +970,17 @@ def convert(arr):
if dtype_backend != "numpy" and arr.dtype == np.dtype("O"):
new_dtype = StringDtype()
arr_cls = new_dtype.construct_array_type()
arr = arr_cls._from_sequence(arr, dtype=new_dtype)
try:
# Addressing (#59242)
# Byte data that could not be decoded into
# a string would throw a UnicodeDecodeError exception

# Try and greedily convert to string
# Will fail if the object is bytes
arr = arr_cls._from_sequence(arr, dtype=new_dtype)
except UnicodeDecodeError:
pass

elif dtype_backend != "numpy" and isinstance(arr, np.ndarray):
if arr.dtype.kind in "iufb":
arr = pd_array(arr, copy=False)
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/io/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -4352,3 +4352,17 @@ def test_xsqlite_if_exists(sqlite_buildin):
(5, "E"),
]
drop_table(table_name, sqlite_buildin)


def test_bytes_column(sqlite_buildin):
"""
Regression test for (#59242)
Bytes being returned in a column that could not be converted
to a string would raise a UnicodeDecodeError
when using dtype_backend='pyarrow'
"""
query = """
select cast(x'0123456789abcdef0123456789abcdef' as blob) a
"""
df = pd.read_sql(query, sqlite_buildin, dtype_backend="pyarrow")
assert df.a.values[0] == b"\x01#Eg\x89\xab\xcd\xef\x01#Eg\x89\xab\xcd\xef"

0 comments on commit 2244869

Please sign in to comment.