From eb3973af209e1287b486a5398d76097563652053 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Wed, 11 Mar 2020 14:43:55 -0700 Subject: [PATCH] Fixing the issue with exclude_path and hash calculations when dictionaries were inside iterables. https://github.com/seperman/deepdiff/issues/174 --- README.md | 1 + deepdiff/deephash.py | 13 ++++++++----- deepdiff/diff.py | 4 +++- docs/index.rst | 1 + tests/test_diff_text.py | 11 +++++++++++ tests/test_hash.py | 21 +++++++++++++++++++-- 6 files changed, 43 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 9df65b9a..ad06a2a4 100644 --- a/README.md +++ b/README.md @@ -417,6 +417,7 @@ And then running # ChangeLog +- v4-3-1: Fixing the issue with exclude_path and hash calculations when dictionaries were inside iterables. https://github.com/seperman/deepdiff/issues/174 - v4-3-0: adding exclude_obj_callback - v4-2-0: .json property is finally removed. Fix for Py3.10. Dropping support for EOL Python 3.4. Ignoring private keys when calculating hashes. For example __init__ is not a part of hash calculation anymore. Fix for #166 Problem with comparing lists, with an boolean as element. - v4-0-9: Fixing the bug for hashing custom unhashable objects diff --git a/deepdiff/deephash.py b/deepdiff/deephash.py index 4cb72d18..3241af27 100644 --- a/deepdiff/deephash.py +++ b/deepdiff/deephash.py @@ -70,6 +70,7 @@ def __init__(self, ignore_string_case=False, exclude_obj_callback=None, number_to_string_func=None, + parent="root", **kwargs): if kwargs: raise ValueError( @@ -78,7 +79,7 @@ def __init__(self, "exclude_paths, exclude_regex_paths, hasher, ignore_repetition, " "number_format_notation, apply_hash, ignore_type_in_groups, ignore_string_type_changes, " "ignore_numeric_type_changes, ignore_type_subclasses, ignore_string_case " - "number_to_string_func") % ', '.join(kwargs.keys())) + "number_to_string_func, parent") % ', '.join(kwargs.keys())) self.obj = obj exclude_types = set() if exclude_types is None else set(exclude_types) self.exclude_types_tuple = tuple(exclude_types) # we need tuple for checking isinstance @@ -109,7 +110,7 @@ def __init__(self, self.type_check_func = type_is_subclass_of_type_group if ignore_type_subclasses else type_in_type_group self.number_to_string = number_to_string_func or number_to_string - self._hash(obj, parent="root", parents_ids=frozenset({get_id(obj)})) + self._hash(obj, parent=parent, parents_ids=frozenset({get_id(obj)})) if self[UNPROCESSED]: logger.warning("Can not hash the following items: {}.".format(self[UNPROCESSED])) @@ -202,7 +203,6 @@ def _skip_this(self, obj, parent): skip = True elif self.exclude_obj_callback and self.exclude_obj_callback(obj, parent): skip = True - return skip def _prep_dict(self, obj, parent, parents_ids=EMPTY_FROZENSET, print_as_attribute=False, original_type=None): @@ -218,6 +218,8 @@ def _prep_dict(self, obj, parent, parents_ids=EMPTY_FROZENSET, print_as_attribut key_in_report = key_text % (parent, key_formatted) key_hash = self._hash(key, parent=key_in_report, parents_ids=parents_ids) + if not key_hash: + continue item_id = get_id(item) if (parents_ids and item_id in parents_ids) or self._skip_this(item, parent=key_in_report): continue @@ -244,7 +246,8 @@ def _prep_iterable(self, obj, parent, parents_ids=EMPTY_FROZENSET): result = defaultdict(int) for i, item in enumerate(obj): - if self._skip_this(item, parent="{}[{}]".format(parent, i)): + new_parent = "{}[{}]".format(parent, i) + if self._skip_this(item, parent=new_parent): continue item_id = get_id(item) @@ -252,7 +255,7 @@ def _prep_iterable(self, obj, parent, parents_ids=EMPTY_FROZENSET): continue parents_ids_added = add_to_frozen_set(parents_ids, item_id) - hashed = self._hash(item, parent=parent, parents_ids=parents_ids_added) + hashed = self._hash(item, parent=new_parent, parents_ids=parents_ids_added) # counting repetitions result[hashed] += 1 diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 6ba6e058..60bcd214 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -469,6 +469,7 @@ def __create_hashtable(self, t, level): hashes = {} for (i, item) in enumerate(t): try: + parent = "{}[{}]".format(level.path(), i) hashes_all = DeepHash(item, hashes=self.hashes, exclude_types=self.exclude_types, @@ -485,8 +486,9 @@ def __create_hashtable(self, t, level): ignore_string_case=self.ignore_string_case, number_to_string_func=self.number_to_string, exclude_obj_callback=self.exclude_obj_callback, + parent=parent, + apply_hash=False, ) - # import pytest; pytest.set_trace() key = item if item is True: key = BoolObj.TRUE diff --git a/docs/index.rst b/docs/index.rst index f0b29e00..a9e038a3 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -281,6 +281,7 @@ Indices and tables Changelog ========= +- v4-3-1: Fixing the issue with exclude_path and hash calculations when dictionaries were inside iterables. https://github.com/seperman/deepdiff/issues/174 - v4-3-0: adding exclude_obj_callback - v4-2-0: .json property is finally removed. Fix for Py3.10. Dropping support for EOL Python 3.4. Ignoring private keys when calculating hashes. For example __init__ is not a part of hash calculation anymore. Fix for #166 Problem with comparing lists, with an boolean as element. - v4-1-0: .json property is finally removed. diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index f4f638c4..75c63c80 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -1609,6 +1609,17 @@ def test_skip_path4(self): assert 'dictionary_item_added' in ddiff, {} assert 'dictionary_item_removed' not in ddiff, {} + def test_skip_exclude_path5(self): + exclude_paths = ["root[0]['e']", "root[1]['e']"] + + t1 = [{'a': 1, 'b': 'randomString', 'e': "1111"}] + t2 = [{'a': 1, 'b': 'randomString', 'e': "2222"}] + + ddiff = DeepDiff(t1, t2, exclude_paths=exclude_paths, + ignore_order=True, report_repetition=False) + result = {} + assert result == ddiff + def test_skip_custom_object_path(self): t1 = CustomClass(1) t2 = CustomClass(2) diff --git a/tests/test_hash.py b/tests/test_hash.py index 0fdbd509..781d9fb5 100755 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -510,6 +510,23 @@ def test_skip_path(self): assert 2 in t1_hash assert t1_hash[2] == t2_hash[2] + def test_skip_path2(self): + + obj10 = {'a': 1, 'b': 'f', 'e': "1111", 'foo': {'bar': 'baz'}} + obj11 = {'c': 1, 'd': 'f', 'e': 'Cool'} + + obj20 = {'a': 1, 'b': 'f', 'e': 'Cool', 'foo': {'bar': 'baz2'}} + obj21 = {'c': 1, 'd': 'f', 'e': "2222"} + + t1 = [obj10, obj11] + t2 = [obj20, obj21] + + exclude_paths = ["root[0]['e']", "root[1]['e']", "root[0]['foo']['bar']"] + + t1_hash = DeepHashPrep(t1, exclude_paths=exclude_paths) + t2_hash = DeepHashPrep(t2, exclude_paths=exclude_paths) + assert t1_hash[t1] == t2_hash[t2] + def test_skip_regex_path(self): dic1 = {1: "a"} t1 = [dic1, 2] @@ -520,9 +537,9 @@ def test_skip_regex_path(self): assert 2 in t1_hash assert t1_hash[2] == t2_hash[2] - def test_skip_exclude_obj_callback(self): + def test_skip_hash_exclude_obj_callback(self): def exclude_obj_callback(obj, parent): - return True if parent == "root['x']" or obj == 2 else False + return True if parent == "root[0]['x']" or obj == 2 else False dic1 = {"x": 1, "y": 2, "z": 3} t1 = [dic1]