diff --git a/pandas/core/common.py b/pandas/core/common.py index 77536fb391f93..492025426f59a 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -26,15 +26,12 @@ class PandasError(Exception): pass - -class SettingWithCopyError(ValueError): +class SettingImmutableError(ValueError): pass - -class SettingWithCopyWarning(Warning): +class SettingWithCopyError(ValueError): pass - class AmbiguousIndexError(PandasError, KeyError): pass diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 03eaa45582bef..889c41f64055a 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -321,13 +321,12 @@ def use_inf_as_null_cb(key): # user warnings chained_assignment = """ : string - Raise an exception, warn, or no action if trying to use chained assignment, - The default is warn + this option has been deprecated and has no effect """ -with cf.config_prefix('mode'): - cf.register_option('chained_assignment', 'warn', chained_assignment, - validator=is_one_of_factory([None, 'warn', 'raise'])) +cf.register_option('mode.chained_assignment', 'warn', chained_assignment, + validator=is_one_of_factory([None, 'warn', 'raise'])) +cf.deprecate_option('mode.chained_assignment', chained_assignment) # Set up the io.excel specific configuration. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index acf5e69bf05e3..8ea0321feae1b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1825,7 +1825,7 @@ def _ixs(self, i, axis=0): copy = isinstance(new_values,np.ndarray) and new_values.base is None result = Series(new_values, index=self.columns, name=self.index[i], dtype=new_values.dtype) - result._set_is_copy(self, copy=copy) + result._set_parent(self, copy=copy) return result # icol @@ -1957,7 +1957,7 @@ def _getitem_multilevel(self, key): if isinstance(result, Series): result = self._constructor_sliced(result, index=self.index, name=key) - result._set_is_copy(self) + result._set_parent(self) return result else: return self._get_item_cache(key) @@ -2229,7 +2229,7 @@ def __setitem__(self, key, value): self._set_item(key, value) def _setitem_slice(self, key, value): - self._check_setitem_copy() + self._check_copy_on_write() self.ix._setitem_with_indexer(key, value) def _setitem_array(self, key, value): @@ -2240,7 +2240,7 @@ def _setitem_array(self, key, value): (len(key), len(self.index))) key = check_bool_indexer(self.index, key) indexer = key.nonzero()[0] - self._check_setitem_copy() + self._check_copy_on_write() self.ix._setitem_with_indexer(indexer, value) else: if isinstance(value, DataFrame): @@ -2250,7 +2250,7 @@ def _setitem_array(self, key, value): self[k1] = value[k2] else: indexer = self.ix._convert_to_indexer(key, axis=1) - self._check_setitem_copy() + self._check_copy_on_write() self.ix._setitem_with_indexer((slice(None), indexer), value) def _setitem_frame(self, key, value): @@ -2260,7 +2260,7 @@ def _setitem_frame(self, key, value): raise TypeError('Must pass DataFrame with boolean values only') self._check_inplace_setting(value) - self._check_setitem_copy() + self._check_copy_on_write() self.where(-key, value, inplace=True) def _ensure_valid_index(self, value): @@ -2293,14 +2293,20 @@ def _set_item(self, key, value): """ self._ensure_valid_index(value) - value = self._sanitize_column(key, value) - NDFrame._set_item(self, key, value) - - # check if we are modifying a copy - # try to set first as we want an invalid - # value exeption to occur first - if len(self): - self._check_setitem_copy() + svalue = self._sanitize_column(key, value) + try: + NDFrame._set_item(self, key, svalue) + except com.SettingWithCopyError: + + # if we have a multi-index (which potentially has dropped levels) + # need to raise + for p in self._parent: + if isinstance(getattr(p(),'columns',None), MultiIndex): + raise + + # we have a chained assignment + # assign back to the original + self._parent[0]().loc[self.index,key] = value def insert(self, loc, column, value, allow_duplicates=False): """ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9c170286006f2..37d07e5d6611b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3,6 +3,7 @@ import operator import weakref import gc +import inspect import numpy as np import pandas.lib as lib @@ -21,8 +22,7 @@ from pandas.core.common import (isnull, notnull, is_list_like, _values_from_object, _maybe_promote, _maybe_box_datetimelike, ABCSeries, - SettingWithCopyError, SettingWithCopyWarning, - AbstractMethodError) + AbstractMethodError, SettingWithCopyError) import pandas.core.nanops as nanops from pandas.util.decorators import Appender, Substitution, deprecate_kwarg from pandas.core import config @@ -78,14 +78,15 @@ class NDFrame(PandasObject): axes : list copy : boolean, default False """ - _internal_names = ['_data', '_cacher', '_item_cache', '_cache', - 'is_copy', '_subtyp', '_index', + _internal_names = ['_data', '_cacher', '_item_cache', '_cache', '_parent', + '_subtyp', '_index', '_parent_copy_on_write', '_default_kind', '_default_fill_value', '_metadata', '__array_struct__', '__array_interface__'] _internal_names_set = set(_internal_names) _accessors = frozenset([]) _metadata = [] - is_copy = None + _parent_copy_on_write = True + _parent = [] def __init__(self, data, axes=None, copy=False, dtype=None, fastpath=False): @@ -100,10 +101,22 @@ def __init__(self, data, axes=None, copy=False, dtype=None, for i, ax in enumerate(axes): data = data.reindex_axis(ax, axis=i) - object.__setattr__(self, 'is_copy', None) + object.__setattr__(self, '_parent', []) object.__setattr__(self, '_data', data) object.__setattr__(self, '_item_cache', {}) + def _get_is_copy(self): + warnings.warn("is_copy is deprecated will be removed in a future release", + FutureWarning) + return None + + def _set_is_copy(self, v): + warnings.warn("is_copy is deprecated will be removed in a future release", + FutureWarning) + pass + + is_copy = property(fget=_get_is_copy, fset=_set_is_copy) + def _validate_dtype(self, dtype): """ validate the passed dtype """ @@ -1091,7 +1104,7 @@ def _get_item_cache(self, item): res._set_as_cached(item, self) # for a chain - res.is_copy = self.is_copy + res._set_parent(self) return res def _set_as_cached(self, item, cacher): @@ -1143,7 +1156,7 @@ def _is_view(self): """ boolean : return if I am a view of another array """ return self._data.is_view - def _maybe_update_cacher(self, clear=False, verify_is_copy=True): + def _maybe_update_cacher(self, clear=False, verify_parent=True): """ see if we need to update our parent cacher @@ -1153,8 +1166,8 @@ def _maybe_update_cacher(self, clear=False, verify_is_copy=True): ---------- clear : boolean, default False clear the item cache - verify_is_copy : boolean, default True - provide is_copy checks + verify_parent : boolean, default True + provide parent checks """ @@ -1172,8 +1185,8 @@ def _maybe_update_cacher(self, clear=False, verify_is_copy=True): except: pass - if verify_is_copy: - self._check_setitem_copy(stacklevel=5, t='referant') + if verify_parent: + self._check_copy_on_write() if clear: self._clear_item_cache() @@ -1192,125 +1205,94 @@ def _slice(self, slobj, axis=0, kind=None): """ axis = self._get_block_manager_axis(axis) - result = self._constructor(self._data.get_slice(slobj, axis=axis)) - result = result.__finalize__(self) - - # this could be a view - # but only in a single-dtyped view slicable case - is_copy = axis!=0 or result._is_view - result._set_is_copy(self, copy=is_copy) - return result + data = self._data.get_slice(slobj, axis=axis) + return self._constructor(data)._set_parent(self).__finalize__(self) def _set_item(self, key, value): + + self._check_copy_on_write() self._data.set(key, value) self._clear_item_cache() - def _set_is_copy(self, ref=None, copy=True): - if not copy: - self.is_copy = None - else: - if ref is not None: - self.is_copy = weakref.ref(ref) - else: - self.is_copy = None - - def _check_is_chained_assignment_possible(self): - """ - check if we are a view, have a cacher, and are of mixed type - if so, then force a setitem_copy check - - should be called just near setting a value + def _set_parent(self, ref=None, copy=True): + if ref is not None: + self._parent.extend(ref._parent) + self._parent.append(weakref.ref(ref)) + return self - will return a boolean if it we are a view and are cached, but a single-dtype - meaning that the cacher should be updated following setting - """ - if self._is_view and self._is_cached: - ref = self._get_cacher() - if ref is not None and ref._is_mixed_type: - self._check_setitem_copy(stacklevel=4, t='referant', force=True) - return True - elif self.is_copy: - self._check_setitem_copy(stacklevel=4, t='referant') - return False + def _check_copy_on_write(self): - def _check_setitem_copy(self, stacklevel=4, t='setting', force=False): - """ + # we could have a copy-on-write scenario + if self._parent and self._parent_copy_on_write: - Parameters - ---------- - stacklevel : integer, default 4 - the level to show of the stack when the error is output - t : string, the type of setting error - force : boolean, default False - if True, then force showing an error + # we have an exception + if isinstance(self._parent, Exception): + raise self._parent - validate if we are doing a settitem on a chained copy. + def get_names_for_obj(__really_unused_name__342424__): + """Returns all named references for self""" - If you call this function, be sure to set the stacklevel such that the - user will see the error *at the level of setting* + removals = set(["__really_unused_name__342424__", "__really_unused_name__xxxxx__", "self"]) + refs = gc.get_referrers(__really_unused_name__342424__) - It is technically possible to figure out that we are setting on - a copy even WITH a multi-dtyped pandas object. In other words, some blocks - may be views while other are not. Currently _is_view will ALWAYS return False - for multi-blocks to avoid having to handle this case. + names = [] + for ref in refs: + if inspect.isframe(ref): + for name, __really_unused_name__xxxxx__ in compat.iteritems(ref.f_locals): + if __really_unused_name__xxxxx__ is __really_unused_name__342424__: + names.append(name) + elif isinstance(ref, dict): + for name, __really_unused_name__xxxxx__ in compat.iteritems(ref): + if __really_unused_name__xxxxx__ is __really_unused_name__342424__: + names.append(name) - df = DataFrame(np.arange(0,9), columns=['count']) - df['group'] = 'b' + for name, __really_unused_name__xxxxx__ in compat.iteritems(globals()): + if __really_unused_name__xxxxx__ is __really_unused_name__342424__: + names.append(name) - # this technically need not raise SettingWithCopy if both are view (which is not - # generally guaranteed but is usually True - # however, this is in general not a good practice and we recommend using .loc - df.iloc[0:5]['group'] = 'a' + return set(names) - removals - """ + # collect garbage + # if we don't have references, then we have a reassignment case + # e.g. df = df.ix[....]; since the reference is gone + # we can just copy and be done - if force or self.is_copy: + # otherwise we have chained indexing, raise and error + def error(): + raise SettingWithCopyError("chained indexing detected, you can fix this ......") - value = config.get_option('mode.chained_assignment') - if value is None: - return + gc.collect(2) + if len(self._parent) > 1: + error() - # see if the copy is not actually refererd; if so, then disolve - # the copy weakref - try: - gc.collect(2) - if not gc.get_referents(self.is_copy()): - self.is_copy = None - return - except: - pass + p = self._parent[0]() + if p is not None: + names = get_names_for_obj(self) + if not len(names): + error() - # we might be a false positive - try: - if self.is_copy().shape == self.shape: - self.is_copy = None - return - except: - pass + # provide copy-on-write + self._data = self._data.copy() + self._parent = [] - # a custom message - if isinstance(self.is_copy, string_types): - t = self.is_copy + def _check_is_chained_assignment_possible(self): + """ + check if we are a view, have a cacher, and are of mixed type + if so, then force a copy_on_write check - elif t == 'referant': - t = ("\n" - "A value is trying to be set on a copy of a slice from a " - "DataFrame\n\n" - "See the caveats in the documentation: " - "http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy") + should be called just near setting a value - else: - t = ("\n" - "A value is trying to be set on a copy of a slice from a " - "DataFrame.\n" - "Try using .loc[row_indexer,col_indexer] = value instead\n\n" - "See the caveats in the documentation: " - "http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy") - - if value == 'raise': - raise SettingWithCopyError(t) - elif value == 'warn': - warnings.warn(t, SettingWithCopyWarning, stacklevel=stacklevel) + will return a boolean if it we are a view and are cached, but a single-dtype + meaning that the cacher should be updated following setting + """ + if self._is_cached: + ref = self._get_cacher() + if ref is not None: + self._check_copy_on_write() + return True + elif self._parent: + self._check_copy_on_write() + return False def __delitem__(self, key): """ @@ -1371,7 +1353,7 @@ def take(self, indices, axis=0, convert=True, is_copy=True): # maybe set copy if we didn't actually change the index if is_copy: if not result._get_axis(axis).equals(self._get_axis(axis)): - result._set_is_copy(self) + result._set_parent(self) return result @@ -1514,9 +1496,7 @@ def xs(self, key, axis=0, level=None, copy=None, drop_level=True): result = self.iloc[loc] result.index = new_index - # this could be a view - # but only in a single-dtyped view slicable case - result._set_is_copy(self, copy=not result._is_view) + result._set_parent(self) return result _xs = xs @@ -1639,14 +1619,14 @@ def drop(self, labels, axis=0, level=None, inplace=False, errors='raise'): else: return result - def _update_inplace(self, result, verify_is_copy=True): + def _update_inplace(self, result, verify_parent=True): """ replace self internals with result. Parameters ---------- - verify_is_copy : boolean, default True - provide is_copy checks + verify_parent : boolean, default True + provide parent checks """ # NOTE: This does *not* call __finalize__ and that's an explicit @@ -1655,7 +1635,7 @@ def _update_inplace(self, result, verify_is_copy=True): self._reset_cache() self._clear_item_cache() self._data = getattr(result,'_data',result) - self._maybe_update_cacher(verify_is_copy=verify_is_copy) + self._maybe_update_cacher(verify_parent=verify_parent) def add_prefix(self, prefix): """ @@ -3376,11 +3356,11 @@ def resample(self, rule, how=None, axis=0, fill_method=None, For frequencies that evenly subdivide 1 day, the "origin" of the aggregated intervals. For example, for '5min' frequency, base could range from 0 through 4. Defaults to 0 - + Examples -------- - + Start by creating a series with 9 one minute timestamps. >>> index = pd.date_range('1/1/2000', periods=9, freq='T') @@ -3409,11 +3389,11 @@ def resample(self, rule, how=None, axis=0, fill_method=None, Downsample the series into 3 minute bins as above, but label each bin using the right edge instead of the left. Please note that the value in the bucket used as the label is not included in the bucket, - which it labels. For example, in the original series the + which it labels. For example, in the original series the bucket ``2000-01-01 00:03:00`` contains the value 3, but the summed - value in the resampled bucket with the label``2000-01-01 00:03:00`` + value in the resampled bucket with the label``2000-01-01 00:03:00`` does not include 3 (if it did, the summed value would be 6, not 3). - To include this value close the right side of the bin interval as + To include this value close the right side of the bin interval as illustrated in the example below this one. >>> series.resample('3T', how='sum', label='right') @@ -3424,7 +3404,7 @@ def resample(self, rule, how=None, axis=0, fill_method=None, Downsample the series into 3 minute bins as above, but close the right side of the bin interval. - + >>> series.resample('3T', how='sum', label='right', closed='right') 2000-01-01 00:00:00 0 2000-01-01 00:03:00 6 @@ -3453,7 +3433,7 @@ def resample(self, rule, how=None, axis=0, fill_method=None, 2000-01-01 00:02:00 2 Freq: 30S, dtype: int64 - Upsample the series into 30 second bins and fill the + Upsample the series into 30 second bins and fill the ``NaN`` values using the ``bfill`` method. >>> series.resample('30S', fill_method='bfill')[0:5] @@ -3468,7 +3448,7 @@ def resample(self, rule, how=None, axis=0, fill_method=None, >>> def custom_resampler(array_like): ... return np.sum(array_like)+5 - + >>> series.resample('3T', how=custom_resampler) 2000-01-01 00:00:00 8 2000-01-01 00:03:00 17 diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 534117b8e9249..61c2c9b4e1474 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -688,13 +688,14 @@ def apply(self, func, *args, **kwargs): def f(g): return func(g, *args, **kwargs) - # ignore SettingWithCopy here in case the user mutates - with option_context('mode.chained_assignment',None): - return self._python_apply_general(f) + return self._python_apply_general(f) + def _python_apply_general(self, f): + keys, values, mutated = self.grouper.apply(f, self._selected_obj, self.axis) + self._selected_obj._parent_copy_on_write = True return self._wrap_applied_output(keys, values, not_indexed_same=mutated) @@ -3591,6 +3592,15 @@ def sort_idx(self): # Counting sort indexer return _get_group_index_sorter(self.labels, self.ngroups) + + def _set_cow(self, data): + # we may mutate, so don't allow cow + try: + data._parent_copy_on_write=False + except AttributeError: + pass + return data + def __iter__(self): sdata = self._get_sorted_data() @@ -3612,7 +3622,7 @@ def _get_sorted_data(self): return self.data.take(self.sort_idx, axis=self.axis, convert=False) def _chop(self, sdata, slice_obj): - return sdata.iloc[slice_obj] + return self._set_cow(sdata.iloc[slice_obj]) def apply(self, f): raise AbstractMethodError(self) @@ -3625,7 +3635,7 @@ class ArraySplitter(DataSplitter): class SeriesSplitter(DataSplitter): def _chop(self, sdata, slice_obj): - return sdata._get_values(slice_obj).to_dense() + return self._set_cow(sdata._get_values(slice_obj).to_dense()) class FrameSplitter(DataSplitter): @@ -3648,9 +3658,9 @@ def fast_apply(self, f, names): def _chop(self, sdata, slice_obj): if self.axis == 0: - return sdata.iloc[slice_obj] + return self._set_cow(sdata.iloc[slice_obj]) else: - return sdata._slice(slice_obj, axis=1) # ix[:, slice_obj] + return self._set_cow(sdata._slice(slice_obj, axis=1)) # ix[:, slice_obj] class NDFrameSplitter(DataSplitter): @@ -3671,7 +3681,7 @@ def _get_sorted_data(self): return sorted_data def _chop(self, sdata, slice_obj): - return self.factory(sdata.get_slice(slice_obj, axis=self.axis)) + return self._set_cow(self.factory(sdata.get_slice(slice_obj, axis=self.axis))) def get_splitter(data, *args, **kwargs): diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b8ee831cdc12c..a641f5126454b 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -273,7 +273,7 @@ def _setitem_with_indexer(self, indexer, value): labels = index.insert(len(index),key) self.obj._data = self.obj.reindex_axis(labels, i)._data self.obj._maybe_update_cacher(clear=True) - self.obj.is_copy=None + self.obj._parent=[] nindexer.append(labels.get_loc(key)) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 8e3dd3836855c..aeb218b56e3e6 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -191,7 +191,7 @@ def f(self, other): # this makes sure that we are aligned like the input # we are updating inplace so we want to ignore is_copy self._update_inplace(result.reindex_like(self,copy=False)._data, - verify_is_copy=False) + verify_parent=False) return self return f diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 1293b4034b84e..b676aad7599c5 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -812,8 +812,7 @@ def xs(self, key, axis=1, copy=None): axis_number = self._get_axis_number(axis) new_data = self._data.xs(key, axis=axis_number, copy=False) result = self._construct_return_type(new_data) - copy = new_data.is_mixed_type - result._set_is_copy(self, copy=copy) + result._set_parent(self) return result _xs = xs diff --git a/pandas/core/series.py b/pandas/core/series.py index 2890730956c75..030b0c117efc4 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -21,7 +21,8 @@ _possibly_convert_platform, _try_sort, is_int64_dtype, ABCSparseArray, _maybe_match_name, - _coerce_to_dtype, SettingWithCopyError, + _coerce_to_dtype, + SettingImmutableError, SettingWithCopyError, _maybe_box_datetimelike, ABCDataFrame, _dict_compat) from pandas.core.index import (Index, MultiIndex, InvalidIndexError, @@ -527,7 +528,7 @@ def __getitem__(self, key): if not self.index.is_unique: result = self._constructor(result, index=[key]*len(result) - ,dtype=self.dtype).__finalize__(self) + ,dtype=self.dtype)._set_parent(self).__finalize__(self) return result except InvalidIndexError: @@ -620,12 +621,12 @@ def _get_values_tuple(self, key): # If key is contained, would have returned by now indexer, new_index = self.index.get_loc_level(key) return self._constructor(self.values[indexer], - index=new_index).__finalize__(self) + index=new_index)._set_parent(self).__finalize__(self) def _get_values(self, indexer): try: return self._constructor(self._data.get_slice(indexer), - fastpath=True).__finalize__(self) + fastpath=True)._set_parent(self).__finalize__(self) except Exception: return self.values[indexer] @@ -635,7 +636,7 @@ def setitem(key, value): try: self._set_with_engine(key, value) return - except (SettingWithCopyError): + except (SettingImmutableError): raise except (KeyError, ValueError): values = self.values @@ -683,7 +684,19 @@ def setitem(key, value): self._set_with(key, value) # do the setitem - cacher_needs_updating = self._check_is_chained_assignment_possible() + try: + cacher_needs_updating = self._check_is_chained_assignment_possible() + except (SettingWithCopyError): + + # we have a chained assignment + # assign back to the original + obj = self._parent[0]() + if isinstance(obj, Series): + obj.loc[key] = value + else: + obj.loc[self.name,key] = value + return + setitem(key, value) if cacher_needs_updating: self._maybe_update_cacher() diff --git a/pandas/src/reduce.pyx b/pandas/src/reduce.pyx index eb736e4569009..5e09c87a629fb 100644 --- a/pandas/src/reduce.pyx +++ b/pandas/src/reduce.pyx @@ -488,6 +488,7 @@ def apply_frame_axis0(object frame, object f, object names, # Need to infer if our low-level mucking is going to cause a segfault if n > 0: chunk = frame.iloc[starts[0]:ends[0]] + chunk._parent_copy_on_write = False shape_before = chunk.shape try: result = f(chunk) @@ -508,6 +509,7 @@ def apply_frame_axis0(object frame, object f, object names, item_cache.clear() # ugh object.__setattr__(slider.dummy, 'name', names[i]) + object.__setattr__(slider.dummy, '_parent_copy_on_write', False) piece = f(slider.dummy) # I'm paying the price for index-sharing, ugh diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 9bdb7f08fe7cf..eb2d53f0db6d3 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -552,12 +552,10 @@ def test_setitem(self): self.frame['col8'] = 'foo' assert((self.frame['col8'] == 'foo').all()) - # this is partially a view (e.g. some blocks are view) - # so raise/warn + # this is copy-on-write smaller = self.frame[:2] - def f(): - smaller['col10'] = ['1', '2'] - self.assertRaises(com.SettingWithCopyError, f) + smaller['col10'] = ['1', '2'] + self.assertEqual(smaller['col10'].dtype, np.object_) self.assertTrue((smaller['col10'] == ['1', '2']).all()) @@ -999,13 +997,11 @@ def test_fancy_getitem_slice_mixed(self): sliced = self.mixed_frame.ix[:, -3:] self.assertEqual(sliced['D'].dtype, np.float64) - # get view with single block - # setting it triggers setting with copy + # this is copy-on-write sliced = self.frame.ix[:, -3:] - def f(): - sliced['C'] = 4. - self.assertRaises(com.SettingWithCopyError, f) - self.assertTrue((self.frame['C'] == 4).all()) + sliced['C'] = 4. + self.assertFalse((self.frame['C'] == 4).all()) + self.assertTrue((sliced['C'] == 4).all()) def test_fancy_setitem_int_labels(self): # integer index defers to label-based indexing @@ -1798,14 +1794,10 @@ def test_irow(self): expected = df.ix[8:14] assert_frame_equal(result, expected) - # verify slice is view - # setting it makes it raise/warn - def f(): - result[2] = 0. - self.assertRaises(com.SettingWithCopyError, f) - exp_col = df[2].copy() - exp_col[4:8] = 0. - assert_series_equal(df[2], exp_col) + # copy-on-write for a slice + result[2] = 0. + self.assertFalse((df[2] == 0).all()) + self.assertTrue((result[2] == 0).all()) # list of integers result = df.iloc[[1, 2, 4, 6]] @@ -1833,12 +1825,10 @@ def test_icol(self): expected = df.ix[:, 8:14] assert_frame_equal(result, expected) - # verify slice is view - # and that we are setting a copy - def f(): - result[8] = 0. - self.assertRaises(com.SettingWithCopyError, f) - self.assertTrue((df[8] == 0).all()) + # we have a slice, but copy-on-write + result[8] = 0. + self.assertFalse((df[8] == 0).all()) + self.assertTrue((result[8] == 0).all()) # list of integers result = df.iloc[:, [1, 2, 4, 6]] @@ -14489,16 +14479,15 @@ def test_idxmax(self): def test_stale_cached_series_bug_473(self): # this is chained, but ok - with option_context('chained_assignment',None): - Y = DataFrame(np.random.random((4, 4)), index=('a', 'b', 'c', 'd'), - columns=('e', 'f', 'g', 'h')) - repr(Y) - Y['e'] = Y['e'].astype('object') - Y['g']['c'] = np.NaN - repr(Y) - result = Y.sum() - exp = Y['g'].sum() - self.assertTrue(isnull(Y['g']['c'])) + Y = DataFrame(np.random.random((4, 4)), index=('a', 'b', 'c', 'd'), + columns=('e', 'f', 'g', 'h')) + repr(Y) + Y['e'] = Y['e'].astype('object') + Y['g']['c'] = np.NaN + repr(Y) + result = Y.sum() + exp = Y['g'].sum() + self.assertTrue(isnull(Y['g']['c'])) def test_index_namedtuple(self): from collections import namedtuple diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 41703b3b5a3b7..cd770bf449ec7 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -2744,10 +2744,9 @@ def f(group): self.assertEqual(result['d'].dtype, np.float64) # this is by definition a mutating operation! - with option_context('mode.chained_assignment',None): - for key, group in grouped: - res = f(group) - assert_frame_equal(res, result.ix[key]) + for key, group in grouped: + res = f(group) + assert_frame_equal(res, result.ix[key]) def test_groupby_wrong_multi_labels(self): from pandas import read_csv diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 30a5716831087..62caa45d6bd82 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -3534,10 +3534,10 @@ def test_set_value_keeps_names(self): columns=['one', 'two', 'three', 'four'], index=idx) df = df.sortlevel() - self.assertIsNone(df.is_copy) + self.assertIsNone(df._parent) self.assertEqual(df.index.names, ('Name', 'Number')) df = df.set_value(('grethe', '4'), 'one', 99.34) - self.assertIsNone(df.is_copy) + self.assertIsNone(df._parent) self.assertEqual(df.index.names, ('Name', 'Number')) def test_names(self): diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 6a9d4096ad4b3..cecb137b23df4 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -2851,12 +2851,10 @@ def test_ix_assign_column_mixed(self): assert_frame_equal(df,expected) # ok, but chained assignments are dangerous - # if we turn off chained assignement it will work - with option_context('chained_assignment',None): - df = pd.DataFrame({'a': lrange(4) }) - df['b'] = np.nan - df['b'].ix[[1,3]] = [100,-100] - assert_frame_equal(df,expected) + df = pd.DataFrame({'a': lrange(4) }) + df['b'] = np.nan + df['b'].ix[[1,3]] = [100,-100] + assert_frame_equal(df,expected) def test_ix_get_set_consistency(self): @@ -3672,25 +3670,22 @@ def test_cache_updating(self): def test_slice_consolidate_invalidate_item_cache(self): - # this is chained assignment, but will 'work' - with option_context('chained_assignment',None): - - # #3970 - df = DataFrame({ "aa":lrange(5), "bb":[2.2]*5}) + # #3970 + df = DataFrame({ "aa":lrange(5), "bb":[2.2]*5}) - # Creates a second float block - df["cc"] = 0.0 + # Creates a second float block + df["cc"] = 0.0 - # caches a reference to the 'bb' series - df["bb"] + # caches a reference to the 'bb' series + df["bb"] - # repr machinery triggers consolidation - repr(df) + # repr machinery triggers consolidation + repr(df) - # Assignment to wrong series - df['bb'].iloc[0] = 0.17 - df._clear_item_cache() - self.assertAlmostEqual(df['bb'][0], 0.17) + # Assignment to wrong series + df['bb'].iloc[0] = 0.17 + df._clear_item_cache() + self.assertAlmostEqual(df['bb'][0], 0.17) def test_setitem_cache_updating(self): # GH 5424 @@ -3776,65 +3771,108 @@ def test_setitem_chained_setfault(self): result = df.head() assert_frame_equal(result, expected) - def test_detect_chained_assignment(self): + def test_chain_assignment_yields_copy_on_write(self): + + # 10954 + df = DataFrame({'col1':[1,2], 'col2':[3,4]}) + intermediate = df.loc[1:1,] + + # refrence created + self.assertFalse(len(df._parent)) + self.assertTrue(len(intermediate._parent) == 1) + intermediate['col1'] = -99 + + # reference is broken + self.assertFalse(len(df._parent)) + self.assertFalse(len(intermediate._parent)) + + # local assignment + expected = DataFrame([[-99,4]],index=[1],columns=['col1','col2']) + assert_frame_equal(intermediate, expected) + + # unchanged + expected = DataFrame({'col1':[1,2], 'col2':[3,4]}) + assert_frame_equal(df, expected) + + # chained assignment + # but one that we can deal with + df = pd.DataFrame({'col1':[1,2], 'col2':[3,4]}) + df.loc[1:1,]['col1'] = -99 + expected = DataFrame({'col1':[1,-99], 'col2':[3,4]}) + assert_frame_equal(df, expected) + + df = pd.DataFrame({'col1':[1,2], 'col2':[3,4]}) + df.loc[1]['col1'] = -99 + assert_frame_equal(df, expected) + + # changed via a scalar accessor + df = DataFrame({'col1':[1,2], 'col2':[3,4]}) + expected = df.copy() + + s2 = df.loc[0] + s2.iloc[0] = -99 + assert_frame_equal(df, expected) + expected = Series([-99,3],index=['col1','col2'],name=0) + assert_series_equal(s2, expected) + + # change dtype + df = pd.DataFrame({'col1':[1,2], 'col2':[3,4]}) + expected = DataFrame({'col1':[1, 2], 'col2':[3,'foo']}) + df.loc[1]['col2'] = 'foo' + assert_frame_equal(df, expected) - pd.set_option('chained_assignment','raise') + def test_detect_chained_assignment(self): # work with the chain expected = DataFrame([[-5,1],[-6,3]],columns=list('AB')) df = DataFrame(np.arange(4).reshape(2,2),columns=list('AB'),dtype='int64') - self.assertIsNone(df.is_copy) + self.assertFalse(len(df._parent)) df['A'][0] = -5 df['A'][1] = -6 assert_frame_equal(df, expected) # test with the chaining df = DataFrame({ 'A' : Series(range(2),dtype='int64'), 'B' : np.array(np.arange(2,4),dtype=np.float64)}) - self.assertIsNone(df.is_copy) - def f(): - df['A'][0] = -5 - self.assertRaises(com.SettingWithCopyError, f) - def f(): - df['A'][1] = np.nan - self.assertRaises(com.SettingWithCopyError, f) - self.assertIsNone(df['A'].is_copy) + self.assertFalse(len(df._parent)) + df['A'][0] = -5 + df['A'][1] = np.nan + self.assertFalse(len(df['A']._parent)) # using a copy (the chain), fails df = DataFrame({ 'A' : Series(range(2),dtype='int64'), 'B' : np.array(np.arange(2,4),dtype=np.float64)}) - def f(): - df.loc[0]['A'] = -5 - self.assertRaises(com.SettingWithCopyError, f) + df.loc[0]['A'] = -5 + self.assertEqual(df.loc[0,'A'], -5) # doc example df = DataFrame({'a' : ['one', 'one', 'two', 'three', 'two', 'one', 'six'], 'c' : Series(range(7),dtype='int64') }) - self.assertIsNone(df.is_copy) + self.assertFalse(len(df._parent)) expected = DataFrame({'a' : ['one', 'one', 'two', 'three', 'two', 'one', 'six'], 'c' : [42,42,2,3,4,42,6]}) - def f(): - indexer = df.a.str.startswith('o') - df[indexer]['c'] = 42 - self.assertRaises(com.SettingWithCopyError, f) + indexer = df.a.str.startswith('o') + df[indexer]['c'] = 42 + assert_frame_equal(df, expected) expected = DataFrame({'A':[111,'bbb','ccc'],'B':[1,2,3]}) df = DataFrame({'A':['aaa','bbb','ccc'],'B':[1,2,3]}) - def f(): - df['A'][0] = 111 - self.assertRaises(com.SettingWithCopyError, f) - def f(): - df.loc[0]['A'] = 111 - self.assertRaises(com.SettingWithCopyError, f) + df['A'][0] = 111 + assert_frame_equal(df, expected) + df = DataFrame({'A':['aaa','bbb','ccc'],'B':[1,2,3]}) + df.loc[0]['A'] = 111 + assert_frame_equal(df,expected) + + df = DataFrame({'A':['aaa','bbb','ccc'],'B':[1,2,3]}) df.loc[0,'A'] = 111 assert_frame_equal(df,expected) # make sure that is_copy is picked up reconstruction # GH5475 df = DataFrame({"A": [1,2]}) - self.assertIsNone(df.is_copy) + self.assertFalse(len(df._parent)) with tm.ensure_clean('__tmp__pickle') as path: df.to_pickle(path) df2 = pd.read_pickle(path) @@ -3858,34 +3896,34 @@ def random_text(nobs=100): # always a copy x = df.iloc[[0,1,2]] - self.assertIsNotNone(x.is_copy) + self.assertIsNotNone(x._parent) x = df.iloc[[0,1,2,4]] - self.assertIsNotNone(x.is_copy) + self.assertIsNotNone(x._parent) # explicity copy indexer = df.letters.apply(lambda x : len(x) > 10) df = df.ix[indexer].copy() - self.assertIsNone(df.is_copy) + self.assertFalse(len(df._parent)) df['letters'] = df['letters'].apply(str.lower) # implicity take df = random_text(100000) indexer = df.letters.apply(lambda x : len(x) > 10) df = df.ix[indexer] - self.assertIsNotNone(df.is_copy) + self.assertIsNotNone(df._parent) df['letters'] = df['letters'].apply(str.lower) # implicity take 2 df = random_text(100000) indexer = df.letters.apply(lambda x : len(x) > 10) df = df.ix[indexer] - self.assertIsNotNone(df.is_copy) + self.assertIsNotNone(df._parent) df.loc[:,'letters'] = df['letters'].apply(str.lower) # should be ok even though it's a copy! - self.assertIsNone(df.is_copy) + self.assertFalse(len(df._parent)) df['letters'] = df['letters'].apply(str.lower) - self.assertIsNone(df.is_copy) + self.assertFalse(len(df._parent)) df = random_text(100000) indexer = df.letters.apply(lambda x : len(x) > 10) @@ -3893,7 +3931,7 @@ def random_text(nobs=100): # an identical take, so no copy df = DataFrame({'a' : [1]}).dropna() - self.assertIsNone(df.is_copy) + self.assertFalse(len(df._parent)) df['a'] += 1 # inplace ops @@ -3928,23 +3966,22 @@ def f(): # from SO: http://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc df = DataFrame(np.arange(0,9), columns=['count']) df['group'] = 'b' - def f(): - df.iloc[0:5]['group'] = 'a' - self.assertRaises(com.SettingWithCopyError, f) + df.iloc[0:5]['group'] = 'a' # mixed type setting # same dtype & changing dtype df = DataFrame(dict(A=date_range('20130101',periods=5),B=np.random.randn(5),C=np.arange(5,dtype='int64'),D=list('abcde'))) + df.ix[2]['D'] = 'foo' - def f(): - df.ix[2]['D'] = 'foo' - self.assertRaises(com.SettingWithCopyError, f) - def f(): - df.ix[2]['C'] = 'foo' - self.assertRaises(com.SettingWithCopyError, f) - def f(): - df['C'][2] = 'foo' - self.assertRaises(com.SettingWithCopyError, f) + df = DataFrame(dict(A=date_range('20130101',periods=5),B=np.random.randn(5),C=np.arange(5,dtype='int64'),D=list('abcde'))) + df.ix[2]['C'] = 'foo' + + df = DataFrame(dict(A=date_range('20130101',periods=5),B=np.random.randn(5),C=np.arange(5,dtype='int64'),D=list('abcde'))) + df['C'][2] = 'foo' + + df = DataFrame({'A':['aaa','bbb','ccc'],'B':[1,2,3]}) + df.loc[0]['A'] = 111 + self.assertEqual(df.loc[0,'A'],111) def test_setting_with_copy_bug(self): @@ -3964,14 +4001,6 @@ def f(): # this should not raise df2['y'] = ['g', 'h', 'i'] - def test_detect_chained_assignment_warnings(self): - - # warnings - with option_context('chained_assignment','warn'): - df = DataFrame({'A':['aaa','bbb','ccc'],'B':[1,2,3]}) - with tm.assert_produces_warning(expected_warning=com.SettingWithCopyWarning): - df.loc[0]['A'] = 111 - def test_float64index_slicing_bug(self): # GH 5557, related to slicing a float index ser = {256: 2321.0, 1: 78.0, 2: 2716.0, 3: 0.0, 4: 369.0, 5: 0.0, 6: 269.0, 7: 0.0, 8: 0.0, 9: 0.0, 10: 3536.0, 11: 0.0, 12: 24.0, 13: 0.0, 14: 931.0, 15: 0.0, 16: 101.0, 17: 78.0, 18: 9643.0, 19: 0.0, 20: 0.0, 21: 0.0, 22: 63761.0, 23: 0.0, 24: 446.0, 25: 0.0, 26: 34773.0, 27: 0.0, 28: 729.0, 29: 78.0, 30: 0.0, 31: 0.0, 32: 3374.0, 33: 0.0, 34: 1391.0, 35: 0.0, 36: 361.0, 37: 0.0, 38: 61808.0, 39: 0.0, 40: 0.0, 41: 0.0, 42: 6677.0, 43: 0.0, 44: 802.0, 45: 0.0, 46: 2691.0, 47: 0.0, 48: 3582.0, 49: 0.0, 50: 734.0, 51: 0.0, 52: 627.0, 53: 70.0, 54: 2584.0, 55: 0.0, 56: 324.0, 57: 0.0, 58: 605.0, 59: 0.0, 60: 0.0, 61: 0.0, 62: 3989.0, 63: 10.0, 64: 42.0, 65: 0.0, 66: 904.0, 67: 0.0, 68: 88.0, 69: 70.0, 70: 8172.0, 71: 0.0, 72: 0.0, 73: 0.0, 74: 64902.0, 75: 0.0, 76: 347.0, 77: 0.0, 78: 36605.0, 79: 0.0, 80: 379.0, 81: 70.0, 82: 0.0, 83: 0.0, 84: 3001.0, 85: 0.0, 86: 1630.0, 87: 7.0, 88: 364.0, 89: 0.0, 90: 67404.0, 91: 9.0, 92: 0.0, 93: 0.0, 94: 7685.0, 95: 0.0, 96: 1017.0, 97: 0.0, 98: 2831.0, 99: 0.0, 100: 2963.0, 101: 0.0, 102: 854.0, 103: 0.0, 104: 0.0, 105: 0.0, 106: 0.0, 107: 0.0, 108: 0.0, 109: 0.0, 110: 0.0, 111: 0.0, 112: 0.0, 113: 0.0, 114: 0.0, 115: 0.0, 116: 0.0, 117: 0.0, 118: 0.0, 119: 0.0, 120: 0.0, 121: 0.0, 122: 0.0, 123: 0.0, 124: 0.0, 125: 0.0, 126: 67744.0, 127: 22.0, 128: 264.0, 129: 0.0, 260: 197.0, 268: 0.0, 265: 0.0, 269: 0.0, 261: 0.0, 266: 1198.0, 267: 0.0, 262: 2629.0, 258: 775.0, 257: 0.0, 263: 0.0, 259: 0.0, 264: 163.0, 250: 10326.0, 251: 0.0, 252: 1228.0, 253: 0.0, 254: 2769.0, 255: 0.0} diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 1bce047f3bf96..0f9f7c94f4ff1 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -539,11 +539,9 @@ def test_xs_level(self): # this is a copy in 0.14 result = self.frame.xs('two', level='second') - # setting this will give a SettingWithCopyError - # as we are trying to write a view - def f(x): - x[:] = 10 - self.assertRaises(com.SettingWithCopyError, f, result) + # this is copy-on-write + result[:] = 10 + self.assertTrue((result.values == 10).all()) def test_xs_level_multiple(self): from pandas import read_table @@ -562,11 +560,9 @@ def test_xs_level_multiple(self): # this is a copy in 0.14 result = df.xs(('a', 4), level=['one', 'four']) - # setting this will give a SettingWithCopyError - # as we are trying to write a view - def f(x): - x[:] = 10 - self.assertRaises(com.SettingWithCopyError, f, result) + # copy-on-write + result[:] = 10 + self.assertTrue((result.values == 10).all()) # GH2107 dates = lrange(20111201, 20111205) @@ -1412,7 +1408,7 @@ def test_frame_getitem_view(self): df['foo', 'four'] = 'foo' df = df.sortlevel(0, axis=1) - # this will work, but will raise/warn as its chained assignment + # chained assignment def f(): df['foo']['one'] = 2 return df diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 9cdc769dd7d74..72954f8dcb3ba 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -556,7 +556,7 @@ def test_xs(self): # mixed-type yields a copy self.panel['strings'] = 'foo' result = self.panel.xs('D', axis=2) - self.assertIsNotNone(result.is_copy) + self.assertIsNotNone(result._parent) def test_getitem_fancy_labels(self): p = self.panel diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 289f7f134aa27..88e1277edd89b 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -514,7 +514,7 @@ def test_xs(self): # mixed-type self.panel4d['strings'] = 'foo' result = self.panel4d.xs('D', axis=3) - self.assertIsNotNone(result.is_copy) + self.assertIsNotNone(result._parent) def test_getitem_fancy_labels(self): panel4d = self.panel4d diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 86eafdf7ca2c8..61fbb72d967cc 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -157,7 +157,7 @@ def compare(s, name): result = s.dt.to_pytimedelta() self.assertIsInstance(result,np.ndarray) self.assertTrue(result.dtype == object) - + result = s.dt.total_seconds() self.assertIsInstance(result,pd.Series) self.assertTrue(result.dtype == 'float64') @@ -204,11 +204,10 @@ def get_dir(s): with tm.assertRaisesRegexp(ValueError, "modifications"): s.dt.hour = 5 - # trying to set a copy - with pd.option_context('chained_assignment','raise'): - def f(): - s.dt.hour[0] = 5 - self.assertRaises(com.SettingWithCopyError, f) + # trying to set an immutable + def f(): + s.dt.hour[0] = 5 + self.assertRaises(com.SettingImmutableError, f) def test_strftime(self): # GH 10086 @@ -1236,9 +1235,9 @@ def test_iget(self): expected = s.ix[2:4] assert_series_equal(result, expected) - # test slice is a view + # this is copy-on-write result[:] = 0 - self.assertTrue((s[1:3] == 0).all()) + self.assertTrue((s[1:3] != 0).all()) # list of integers result = s.iloc[[0, 2, 3, 4, 5]] @@ -1474,10 +1473,10 @@ def test_slice(self): self.assertTrue(tm.equalContents(numSliceEnd, np.array(self.series)[-10:])) - # test return view + # copy-on-write sl = self.series[10:20] sl[:] = 0 - self.assertTrue((self.series[10:20] == 0).all()) + self.assertTrue((self.series[10:20] != 0).all()) def test_slice_can_reorder_not_uniquely_indexed(self): s = Series(1, index=['a', 'a', 'b', 'b', 'c']) @@ -4447,7 +4446,6 @@ def test_underlying_data_conversion(self): # GH 3970 # these are chained assignments as well - pd.set_option('chained_assignment',None) df = DataFrame({ "aa":range(5), "bb":[2.2]*5}) df["cc"] = 0.0 ck = [True]*len(df) @@ -4455,7 +4453,6 @@ def test_underlying_data_conversion(self): df_tmp = df.iloc[ck] df["bb"].iloc[0] = .15 self.assertEqual(df['bb'].iloc[0], 0.15) - pd.set_option('chained_assignment','raise') # GH 3217 df = DataFrame(dict(a = [1,3], b = [np.nan, 2])) diff --git a/pandas/tseries/common.py b/pandas/tseries/common.py index 9a282bec2e9e4..2692e38389106 100644 --- a/pandas/tseries/common.py +++ b/pandas/tseries/common.py @@ -9,7 +9,7 @@ from pandas import tslib from pandas.core.common import (_NS_DTYPE, _TD_DTYPE, is_period_arraylike, is_datetime_arraylike, is_integer_dtype, is_list_like, - get_dtype_kinds) + get_dtype_kinds, SettingImmutableError) def is_datetimelike(data): """ return a boolean if we can be successfully converted to a datetimelike """ @@ -76,15 +76,16 @@ def _delegate_property_get(self, name): # return the result as a Series, which is by definition a copy result = Series(result, index=self.index) - # setting this object will show a SettingWithCopyWarning/Error - result.is_copy = ("modifications to a property of a datetimelike object are not " - "supported and are discarded. Change values on the original.") + # setting this object will show a ValueError if accessed + result._parent = SettingImmutableError("modifications to a property of a datetimelike object are not " + "supported and are discarded. Change values on the original.") + return result def _delegate_property_set(self, name, value, *args, **kwargs): - raise ValueError("modifications to a property of a datetimelike object are not " - "supported. Change values on the original.") + raise SettingImmutableError("modifications to a property of a datetimelike object are not " + "supported. Change values on the original.") def _delegate_method(self, name, *args, **kwargs): from pandas import Series @@ -97,9 +98,9 @@ def _delegate_method(self, name, *args, **kwargs): result = Series(result, index=self.index) - # setting this object will show a SettingWithCopyWarning/Error - result.is_copy = ("modifications to a method of a datetimelike object are not " - "supported and are discarded. Change values on the original.") + # setting this object will show a SettingImmutableError + result._parent = SettingImmutableError("modifications to a method of a datetimelike object are not " + "supported and are discarded. Change values on the original.") return result diff --git a/pandas/util/testing.py b/pandas/util/testing.py index aaa83da036c2f..f1fe9d74af1f3 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -64,7 +64,7 @@ class TestCase(unittest.TestCase): @classmethod def setUpClass(cls): - pd.set_option('chained_assignment', 'raise') + pass @classmethod def tearDownClass(cls):