diff --git a/pandas/core/base.py b/pandas/core/base.py index 8f21e3125a27e..0836f13437e91 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -23,6 +23,8 @@ _indexops_doc_kwargs = dict(klass='IndexOpsMixin', inplace='', unique='IndexOpsMixin', duplicated='IndexOpsMixin') +import platform +IS_PYPY = platform.python_implementation() == 'PyPy' class StringMixin(object): """implements string methods so long as object defines a `__unicode__` @@ -1061,7 +1063,7 @@ def memory_usage(self, deep=False): Notes ----- Memory usage does not include memory consumed by elements that - are not components of the array if deep=False + are not components of the array if deep=False or if used on PyPy See Also -------- @@ -1071,9 +1073,8 @@ def memory_usage(self, deep=False): return self.values.memory_usage(deep=deep) v = self.values.nbytes - if deep and is_object_dtype(self): + if deep and is_object_dtype(self) and not IS_PYPY: v += lib.memory_usage_of_objects(self.values) - return v def factorize(self, sort=False, na_sentinel=-1): diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 420788f9008cd..c08079ee75cff 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -467,7 +467,8 @@ def _nbytes(self, deep=False): """ level_nbytes = sum((i.memory_usage(deep=deep) for i in self.levels)) label_nbytes = sum((i.nbytes for i in self.labels)) - names_nbytes = sum((getsizeof(i) for i in self.names)) + objsize = 24 # for inplementations with no meaningfule getsizeof (PyPy) + names_nbytes = sum((getsizeof(i, 24) for i in self.names)) result = level_nbytes + label_nbytes + names_nbytes # include our engine hashtable diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 5071b50bbebdf..671d581b33583 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -194,8 +194,11 @@ def _format_data(self): @cache_readonly def nbytes(self): - """ return the number of bytes in the underlying data """ - return sum([getsizeof(getattr(self, v)) for v in + """ return the number of bytes in the underlying data + On implementations where this is problematic (PyPy) + assume 24 bytes for each value + """ + return sum([getsizeof(getattr(self, v), 24) for v in ['_start', '_stop', '_step']]) def memory_usage(self, deep=False): diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index c317ad542659a..e37478e4d3411 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -332,13 +332,15 @@ def test_info_memory_usage(self): res = buf.getvalue().splitlines() assert re.match(r"memory usage: [^+]+$", res[-1]) - assert (df_with_object_index.memory_usage( - index=True, deep=True).sum() > df_with_object_index.memory_usage( - index=True).sum()) + if not tm.IS_PYPY: + assert (df_with_object_index.memory_usage( + index=True, + deep=True).sum() > df_with_object_index.memory_usage( + index=True).sum()) - df_object = pd.DataFrame({'a': ['a']}) - assert (df_object.memory_usage(deep=True).sum() > - df_object.memory_usage().sum()) + df_object = pd.DataFrame({'a': ['a']}) + assert (df_object.memory_usage(deep=True).sum() > + df_object.memory_usage().sum()) # Test a DataFrame with duplicate columns dtypes = ['int64', 'int64', 'int64', 'float64'] @@ -377,10 +379,11 @@ def test_info_memory_usage(self): df.memory_usage(index=True) df.index.values.nbytes - # sys.getsizeof will call the .memory_usage with - # deep=True, and add on some GC overhead - diff = df.memory_usage(deep=True).sum() - sys.getsizeof(df) - assert abs(diff) < 100 + if not tm.IS_PYPY: + # sys.getsizeof will call the .memory_usage with + # deep=True, and add on some GC overhead + diff = df.memory_usage(deep=True).sum() - sys.getsizeof(df) + assert abs(diff) < 100 def test_info_memory_usage_qualified(self): diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 9af4a9edeb8b1..e7f359b9cab73 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -144,6 +144,7 @@ def f(): pytest.raises(TypeError, f) + @pytest.mark.skipif(tm.IS_PYPY, "not relevant for PyPy") def test_memory_usage(self): # Delegate does not implement memory_usage. # Check that we fall back to in-built `__sizeof__` @@ -941,6 +942,7 @@ def test_fillna(self): # check shallow_copied assert o is not result + @pytest.mark.skipif(tm.IS_PYPY, "not relevant for PyPy") def test_memory_usage(self): for o in self.objs: res = o.memory_usage() diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index eecdd672095b0..2476d10e8ef3e 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -1448,10 +1448,11 @@ def test_memory_usage(self): cat = pd.Categorical(['foo', 'foo', 'bar']) assert cat.memory_usage(deep=True) > cat.nbytes - # sys.getsizeof will call the .memory_usage with - # deep=True, and add on some GC overhead - diff = cat.memory_usage(deep=True) - sys.getsizeof(cat) - assert abs(diff) < 100 + if not tm.IS_PYPY: + # sys.getsizeof will call the .memory_usage with + # deep=True, and add on some GC overhead + diff = cat.memory_usage(deep=True) - sys.getsizeof(cat) + assert abs(diff) < 100 def test_searchsorted(self): # https://github.com/pandas-dev/pandas/issues/8420 diff --git a/pandas/util/testing.py b/pandas/util/testing.py index d6ba9561340cc..197fd494c1ece 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -56,6 +56,8 @@ K = 4 _RAISE_NETWORK_ERROR_DEFAULT = False +import platform +IS_PYPY = platform.python_implementation() == 'PyPy' # set testing_mode _testing_mode_warnings = (DeprecationWarning, compat.ResourceWarning)