diff --git a/doc/source/io.rst b/doc/source/io.rst index 7ea476514e88d..69377fad15270 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -225,9 +225,9 @@ NA and Missing Data Handling na_values : scalar, str, list-like, or dict, default ``None`` Additional strings to recognize as NA/NaN. If dict passed, specific per-column - NA values. By default the following values are interpreted as NaN: - ``'-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', 'N/A', 'n/a', 'NA', - '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', ''``. + NA values. See :ref:`na values const ` below + for a list of the values interpreted as NaN by default. + keep_default_na : boolean, default ``True`` If na_values are specified and keep_default_na is ``False`` the default NaN values are overridden, otherwise they're appended to. @@ -1030,10 +1030,11 @@ the corresponding equivalent values will also imply a missing value (in this cas ``[5.0,5]`` are recognized as ``NaN``. To completely override the default values that are recognized as missing, specify ``keep_default_na=False``. -The default ``NaN`` recognized values are ``['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A','N/A', 'NA', -'#NA', 'NULL', 'NaN', '-NaN', 'nan', '-nan']``. Although a 0-length string -``''`` is not included in the default ``NaN`` values list, it is still treated -as a missing value. + +.. _io.navaluesconst: + +The default ``NaN`` recognized values are ``['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', 'N/A', +'n/a', 'NA', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', '']``. .. code-block:: python diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 2549c8545908d..7375a2197c6b7 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -50,6 +50,8 @@ from pandas.core.algorithms import take_1d from pandas.core.dtypes.concat import union_categoricals from pandas import Index +import pandas.io.common as com + import time import os @@ -273,13 +275,6 @@ cdef extern from "parser/io.h": DEFAULT_CHUNKSIZE = 256 * 1024 -# common NA values -# no longer excluding inf representations -# '1.#INF','-1.#INF', '1.#INF000000', -_NA_VALUES = [b'-1.#IND', b'1.#QNAN', b'1.#IND', b'-1.#QNAN', - b'#N/A N/A', b'n/a', b'NA', b'#NA', b'NULL', b'null', b'NaN', - b'nan', b''] - cdef class TextReader: """ @@ -1380,6 +1375,12 @@ cdef asbytes(object o): return str(o) +# common NA values +# no longer excluding inf representations +# '1.#INF','-1.#INF', '1.#INF000000', +_NA_VALUES = _ensure_encoded(list(com._NA_VALUES)) + + def _is_file_like(obj): if PY3: import io diff --git a/pandas/tests/io/parser/na_values.py b/pandas/tests/io/parser/na_values.py index 170f9d428c9cc..c6d1cc79b82d7 100644 --- a/pandas/tests/io/parser/na_values.py +++ b/pandas/tests/io/parser/na_values.py @@ -8,7 +8,7 @@ import numpy as np from numpy import nan -import pandas.io.parsers as parsers +import pandas.io.common as com import pandas.util.testing as tm from pandas import DataFrame, Index, MultiIndex @@ -72,7 +72,7 @@ def test_default_na_values(self): _NA_VALUES = set(['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A', 'N/A', 'n/a', 'NA', '#NA', 'NULL', 'null', 'NaN', 'nan', '-NaN', '-nan', '#N/A N/A', '']) - assert _NA_VALUES == parsers._NA_VALUES + assert _NA_VALUES == com._NA_VALUES nv = len(_NA_VALUES) def f(i, v):