diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index b8f142700b830..4032a7d22d4a2 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -126,6 +126,7 @@ Other Enhancements - :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year (:issue:`9313`) - Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. - :func:`DataFrame.add_prefix` and :func:`DataFrame.add_suffix` now accept strings containing the '%' character. (:issue:`17151`) +- `read_*` methods can now infer compression from non-string paths, such as ``pathlib.Path`` objects (:issue:`17206`). .. _whatsnew_0210.api_breaking: diff --git a/pandas/io/common.py b/pandas/io/common.py index cbfc33dbebb81..69a7e69ea724b 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -272,13 +272,15 @@ def _infer_compression(filepath_or_buffer, compression): if compression is None: return None - # Cannot infer compression of a buffer. Hence assume no compression. - is_path = isinstance(filepath_or_buffer, compat.string_types) - if compression == 'infer' and not is_path: - return None - - # Infer compression from the filename/URL extension + # Infer compression if compression == 'infer': + # Convert all path types (e.g. pathlib.Path) to strings + filepath_or_buffer = _stringify_path(filepath_or_buffer) + if not isinstance(filepath_or_buffer, compat.string_types): + # Cannot infer compression of a buffer, assume no compression + return None + + # Infer compression from the filename/URL extension for compression, extension in _compression_to_extension.items(): if filepath_or_buffer.endswith(extension): return compression diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 9c76d3126890c..05a04f268f72b 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -208,11 +208,11 @@ `_ for more information on ``iterator`` and ``chunksize``. compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer' - For on-the-fly decompression of on-disk data. If 'infer', then use gzip, - bz2, zip or xz if filepath_or_buffer is a string ending in '.gz', '.bz2', - '.zip', or 'xz', respectively, and no decompression otherwise. If using - 'zip', the ZIP file must contain only one data file to be read in. - Set to None for no decompression. + For on-the-fly decompression of on-disk data. If 'infer' and + `filepath_or_buffer` is path-like, then detect compression from the + following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no + decompression). If using 'zip', the ZIP file must contain only one data + file to be read in. Set to None for no decompression. .. versionadded:: 0.18.1 support for 'zip' and 'xz' compression. diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index 6f345092c514d..143b76575e36b 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -62,8 +62,8 @@ def read_pickle(path, compression='infer'): File path compression : {'infer', 'gzip', 'bz2', 'xz', 'zip', None}, default 'infer' For on-the-fly decompression of on-disk data. If 'infer', then use - gzip, bz2, xz or zip if path is a string ending in '.gz', '.bz2', 'xz', - or 'zip' respectively, and no decompression otherwise. + gzip, bz2, xz or zip if path ends in '.gz', '.bz2', '.xz', + or '.zip' respectively, and no decompression otherwise. Set to None for no decompression. .. versionadded:: 0.20.0 diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index b527e3c5dc254..30904593fedc4 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -14,16 +14,6 @@ from pandas import read_csv, concat -try: - from pathlib import Path -except ImportError: - pass - -try: - from py.path import local as LocalPath -except ImportError: - pass - class CustomFSPath(object): """For testing fspath on unknown objects""" @@ -34,6 +24,21 @@ def __fspath__(self): return self.path +# Functions that consume a string path and return a string or path-like object +path_types = [str, CustomFSPath] + +try: + from pathlib import Path + path_types.append(Path) +except ImportError: + pass + +try: + from py.path import local as LocalPath + path_types.append(LocalPath) +except ImportError: + pass + HERE = os.path.dirname(__file__) @@ -83,6 +88,19 @@ def test_stringify_path_fspath(self): result = common._stringify_path(p) assert result == 'foo/bar.csv' + @pytest.mark.parametrize('extension,expected', [ + ('', None), + ('.gz', 'gzip'), + ('.bz2', 'bz2'), + ('.zip', 'zip'), + ('.xz', 'xz'), + ]) + @pytest.mark.parametrize('path_type', path_types) + def test_infer_compression_from_path(self, extension, expected, path_type): + path = path_type('foo/bar.csv' + extension) + compression = common._infer_compression(path, compression='infer') + assert compression == expected + def test_get_filepath_or_buffer_with_path(self): filename = '~/sometest' filepath_or_buffer, _, _ = common.get_filepath_or_buffer(filename)