-
-
Notifications
You must be signed in to change notification settings - Fork 31.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GH-91432: Add more FOR_ITER specializations #94096
Conversation
In microbenchmarks, it seems adding these extra opcodes bumped some things around and made But dict items, enumerate, and tuple did speed up, as expected. microbenchmark scriptfrom pyperf import Runner, perf_counter
from itertools import repeat
def for_range(loops, length):
repetitions = repeat(None, loops)
R = range(length)
t0 = perf_counter()
for _ in repetitions:
for x in R:
pass
t1 = perf_counter()
return t1 - t0
def for_list(loops, length):
repetitions = repeat(None, loops)
L = list(map(float, range(length)))
t0 = perf_counter()
for _ in repetitions:
for x in L:
pass
t1 = perf_counter()
return t1 - t0
def for_tuple(loops, length):
repetitions = repeat(None, loops)
T = tuple(map(float, range(length)))
t0 = perf_counter()
for _ in repetitions:
for x in T:
pass
t1 = perf_counter()
return t1 - t0
def for_dict(loops, length):
repetitions = repeat(None, loops)
D = dict.fromkeys(map(float, range(length)))
t0 = perf_counter()
for _ in repetitions:
for x, y in D.items():
pass
t1 = perf_counter()
return t1 - t0
def for_enumerate(loops, length):
repetitions = repeat(None, loops)
L = [None] * length
t0 = perf_counter()
for _ in repetitions:
for i, x in enumerate(L):
pass
t1 = perf_counter()
return t1 - t0
def for_map(loops, length):
repetitions = repeat(None, loops)
L = [()] * length
t0 = perf_counter()
for _ in repetitions:
for x in map(len, L):
pass
t1 = perf_counter()
return t1 - t0
def for_string(loops, length):
repetitions = repeat(None, loops)
S = "a" * length
t0 = perf_counter()
for _ in repetitions:
for x in S:
pass
t1 = perf_counter()
return t1 - t0
def for_set(loops, length):
repetitions = repeat(None, loops)
S = {f"a{i}" for i in range(length)}
t0 = perf_counter()
for _ in repetitions:
for x in S:
pass
t1 = perf_counter()
return t1 - t0
bench = Runner().bench_time_func
for n in [20, 200, 2_000, 20_000]:
bench(f"for_range {n:_}", for_range, n, inner_loops=n)
bench(f"for_list {n:_}", for_list, n, inner_loops=n)
bench(f"for_tuple {n:_}", for_tuple, n, inner_loops=n)
bench(f"for_dict {n:_}", for_dict, n, inner_loops=n)
bench(f"for_enumerate {n:_}", for_enumerate, n, inner_loops=n)
bench(f"for_map {n:_}", for_map, n, inner_loops=n)
bench(f"for_string {n:_}", for_string, n, inner_loops=n)
bench(f"for_set {n:_}", for_set, n, inner_loops=n)
|
Before adding any more specializations for builtin iterators, I'd like to try implementing faster-cpython/ideas#392 and add specialization for generators. |
#91432
This does:
FOR_ITER(tuple)
FOR_ITER(dict_items) + UNPACK_SEQUENCE(2)
FOR_ITER(enumerate) + UNPACK_SEQUENCE(2) + STORE_FAST
PyLongObject
I'm not sure whether all of these are worth it, but I want to see how this moves stats and micro- and macro- benchmarks.