From cff9ba7a8da5efe64e90a98eaf779667d936de07 Mon Sep 17 00:00:00 2001 From: Matthew Rocklin Date: Sat, 24 May 2014 09:33:44 -0700 Subject: [PATCH] reduceby defaults to item in no ``init`` case --- toolz/itertoolz.py | 24 +++++++++++++++++++----- toolz/tests/test_itertoolz.py | 4 ++++ 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/toolz/itertoolz.py b/toolz/itertoolz.py index 61477277..c815493f 100644 --- a/toolz/itertoolz.py +++ b/toolz/itertoolz.py @@ -449,7 +449,7 @@ def frequencies(seq): return dict(d) -def reduceby(key, binop, seq, init): +def reduceby(key, binop, seq, init=no_default): """ Perform a simultaneous groupby and reduction The computation: @@ -468,18 +468,28 @@ def reduceby(key, binop, seq, init): operate in much less space. This makes it suitable for larger datasets that do not fit comfortably in memory + Simple Examples + --------------- + >>> from operator import add, mul - >>> data = [1, 2, 3, 4, 5] >>> iseven = lambda x: x % 2 == 0 - >>> reduceby(iseven, add, data, 0) + + >>> data = [1, 2, 3, 4, 5] + + >>> reduceby(iseven, add, data) {False: 9, True: 6} - >>> reduceby(iseven, mul, data, 1) + + >>> reduceby(iseven, mul, data) {False: 15, True: 8} + Complex Example + --------------- + >>> projects = [{'name': 'build roads', 'state': 'CA', 'cost': 1000000}, ... {'name': 'fight crime', 'state': 'IL', 'cost': 100000}, ... {'name': 'help farmers', 'state': 'IL', 'cost': 2000000}, ... {'name': 'help farmers', 'state': 'CA', 'cost': 200000}] + >>> reduceby(lambda x: x['state'], # doctest: +SKIP ... lambda acc, x: acc + x['cost'], ... projects, 0) @@ -489,7 +499,11 @@ def reduceby(key, binop, seq, init): for item in seq: k = key(item) if k not in d: - d[k] = init + if init is no_default: + d[k] = item + continue + else: + d[k] = init d[k] = binop(d[k], item) return d diff --git a/toolz/tests/test_itertoolz.py b/toolz/tests/test_itertoolz.py index d4045e57..dc966458 100644 --- a/toolz/tests/test_itertoolz.py +++ b/toolz/tests/test_itertoolz.py @@ -208,6 +208,10 @@ def test_reduceby(): projects, 0) == {'CA': 1200000, 'IL': 2100000} +def test_reduce_by_init(): + assert reduceby(iseven, add, [1, 2, 3, 4]) == {True: 2 + 4, False: 1 + 3} + + def test_iterate(): assert list(itertools.islice(iterate(inc, 0), 0, 5)) == [0, 1, 2, 3, 4] assert list(take(4, iterate(double, 1))) == [1, 2, 4, 8]