Skip to content

Commit

Permalink
pythongh-121795: Improve performance of set membership testing from s…
Browse files Browse the repository at this point in the history
…et arguments (python#121796)
  • Loading branch information
HarryLHW authored Jul 22, 2024
1 parent 9766819 commit 2408a8a
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 23 deletions.
10 changes: 10 additions & 0 deletions Lib/test/test_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,16 @@ def __le__(self, some_set):
myset >= myobj
self.assertTrue(myobj.le_called)

def test_set_membership(self):
myfrozenset = frozenset(range(3))
myset = {myfrozenset, "abc", 1}
self.assertIn(set(range(3)), myset)
self.assertNotIn(set(range(1)), myset)
myset.discard(set(range(3)))
self.assertEqual(myset, {"abc", 1})
self.assertRaises(KeyError, myset.remove, set(range(1)))
self.assertRaises(KeyError, myset.remove, set(range(3)))


class SetSubclass(set):
pass
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improve performance of set membership testing, ``set.remove()`` and ``set.discard()`` when the argument is a set.
59 changes: 36 additions & 23 deletions Objects/setobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -709,18 +709,20 @@ _shuffle_bits(Py_uhash_t h)
large primes with "interesting bit patterns" and that passed tests
for good collision statistics on a variety of problematic datasets
including powersets and graph structures (such as David Eppstein's
graph recipes in Lib/test/test_set.py) */
graph recipes in Lib/test/test_set.py).
This hash algorithm can be used on either a frozenset or a set.
When it is used on a set, it computes the hash value of the equivalent
frozenset without creating a new frozenset object. */

static Py_hash_t
frozenset_hash(PyObject *self)
frozenset_hash_impl(PyObject *self)
{
assert(PyAnySet_Check(self));
PySetObject *so = (PySetObject *)self;
Py_uhash_t hash = 0;
setentry *entry;

if (so->hash != -1)
return so->hash;

/* Xor-in shuffled bits from every entry's hash field because xor is
commutative and a frozenset hash should be independent of order.
Expand Down Expand Up @@ -753,6 +755,20 @@ frozenset_hash(PyObject *self)
if (hash == (Py_uhash_t)-1)
hash = 590923713UL;

return (Py_hash_t)hash;
}

static Py_hash_t
frozenset_hash(PyObject *self)
{
PySetObject *so = (PySetObject *)self;
Py_uhash_t hash;

if (so->hash != -1) {
return so->hash;
}

hash = frozenset_hash_impl(self);
so->hash = hash;
return hash;
}
Expand Down Expand Up @@ -2137,19 +2153,18 @@ set_add_impl(PySetObject *so, PyObject *key)
static int
set_contains_lock_held(PySetObject *so, PyObject *key)
{
PyObject *tmpkey;
int rv;

rv = set_contains_key(so, key);
if (rv < 0) {
if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
return -1;
PyErr_Clear();
tmpkey = make_new_set(&PyFrozenSet_Type, key);
if (tmpkey == NULL)
return -1;
rv = set_contains_key(so, tmpkey);
Py_DECREF(tmpkey);
Py_hash_t hash;
Py_BEGIN_CRITICAL_SECTION(key);
hash = frozenset_hash_impl(key);
Py_END_CRITICAL_SECTION();
rv = set_contains_entry(so, key, hash);
}
return rv;
}
Expand Down Expand Up @@ -2203,19 +2218,18 @@ static PyObject *
set_remove_impl(PySetObject *so, PyObject *key)
/*[clinic end generated code: output=0b9134a2a2200363 input=893e1cb1df98227a]*/
{
PyObject *tmpkey;
int rv;

rv = set_discard_key(so, key);
if (rv < 0) {
if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
return NULL;
PyErr_Clear();
tmpkey = make_new_set(&PyFrozenSet_Type, key);
if (tmpkey == NULL)
return NULL;
rv = set_discard_key(so, tmpkey);
Py_DECREF(tmpkey);
Py_hash_t hash;
Py_BEGIN_CRITICAL_SECTION(key);
hash = frozenset_hash_impl(key);
Py_END_CRITICAL_SECTION();
rv = set_discard_entry(so, key, hash);
if (rv < 0)
return NULL;
}
Expand Down Expand Up @@ -2244,19 +2258,18 @@ static PyObject *
set_discard_impl(PySetObject *so, PyObject *key)
/*[clinic end generated code: output=eec3b687bf32759e input=861cb7fb69b4def0]*/
{
PyObject *tmpkey;
int rv;

rv = set_discard_key(so, key);
if (rv < 0) {
if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
return NULL;
PyErr_Clear();
tmpkey = make_new_set(&PyFrozenSet_Type, key);
if (tmpkey == NULL)
return NULL;
rv = set_discard_key(so, tmpkey);
Py_DECREF(tmpkey);
Py_hash_t hash;
Py_BEGIN_CRITICAL_SECTION(key);
hash = frozenset_hash_impl(key);
Py_END_CRITICAL_SECTION();
rv = set_discard_entry(so, key, hash);
if (rv < 0)
return NULL;
}
Expand Down

0 comments on commit 2408a8a

Please sign in to comment.