From 623c4482c001657d3c2b3945712bc1d45525e7ce Mon Sep 17 00:00:00 2001
From: Erik Welch <erik.n.welch@gmail.com>
Date: Sat, 10 May 2014 09:17:24 -0400
Subject: [PATCH 1/6] Faster groupby!

Issue #178 impressed upon me just how costly attribute resolution can
be.  In this case, `groupby` was made faster by avoiding resolving the
attribute `list.append`.

This implementation is also more memory efficient than the current
version that uses a `defaultdict` that gets cast to a `dict`.  While
casting a defaultdict `d` to a dict as `dict(d)` is fast, it is still
a fast *copy*.

Honorable mention goes to the following implementation:
```python
def groupby_alt(func, seq):
    d = collections.defaultdict(lambda: [].append)
    for item in seq:
        d[func(item)](item)
    rv = {}
    for k, v in iteritems(d):
        rv[k] = v.__self__
    return rv
```
This alternative implementation can at times be *very* impressive.  You
should play with it!
---
 toolz/itertoolz.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/toolz/itertoolz.py b/toolz/itertoolz.py
index c187193f..073aa440 100644
--- a/toolz/itertoolz.py
+++ b/toolz/itertoolz.py
@@ -3,7 +3,8 @@
 import collections
 import operator
 from functools import partial
-from toolz.compatibility import map, filter, filterfalse, zip, zip_longest
+from toolz.compatibility import (map, filter, filterfalse, zip, zip_longest,
+                                 iteritems)
 
 
 __all__ = ('remove', 'accumulate', 'groupby', 'merge_sorted', 'interleave',
@@ -66,12 +67,19 @@ def groupby(func, seq):
     {False: [1, 3, 5, 7], True: [2, 4, 6, 8]}
 
     See Also:
-        ``countby``
+        countby
     """
-    d = collections.defaultdict(list)
+    d = {}
     for item in seq:
-        d[func(item)].append(item)
-    return dict(d)
+        key = func(item)
+        if key not in d:
+            d[key] = [item].append
+        else:
+            d[key](item)
+    # This is okay to do, because we are not adding or removing keys
+    for k, v in iteritems(d):
+        d[k] = v.__self__
+    return d
 
 
 def merge_sorted(*seqs, **kwargs):

From 76054c8fb448f066699229b8e51ea74a8b3469ea Mon Sep 17 00:00:00 2001
From: Erik Welch <erik.n.welch@gmail.com>
Date: Sat, 10 May 2014 12:59:32 -0400
Subject: [PATCH 2/6] Add useful code comments to groupby including pythonic
 implementation

---
 toolz/itertoolz.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/toolz/itertoolz.py b/toolz/itertoolz.py
index 073aa440..3e11d831 100644
--- a/toolz/itertoolz.py
+++ b/toolz/itertoolz.py
@@ -69,16 +69,25 @@ def groupby(func, seq):
     See Also:
         countby
     """
+    # The commented code below shows what is probably the fastest
+    # "pythonic" implementation of `groupby`:
+    #
+    # d = collections.defaultdict(list)
+    # for item in seq:
+    #     d[func(item)].append(item)
+    # return dict(d)
+
     d = {}
     for item in seq:
         key = func(item)
         if key not in d:
-            d[key] = [item].append
+            d[key] = [item].append  # avoid method resolution overhead
         else:
-            d[key](item)
-    # This is okay to do, because we are not adding or removing keys
-    for k, v in iteritems(d):
-        d[k] = v.__self__
+            d[key](item)  # append item to list
+
+    for key, appendval in iteritems(d):
+        # `mylist.append.__self__` references original list instance `mylist`
+        d[key] = appendval.__self__
     return d
 
 

From 8365b4db655a879b41a650b39c1e2b0ffeeb8810 Mon Sep 17 00:00:00 2001
From: Erik Welch <erik.n.welch@gmail.com>
Date: Sat, 10 May 2014 18:45:32 -0400
Subject: [PATCH 3/6] Small tweak to `groupby` to improve it for "real-world"
 data

Benchmarks have a tendency to use data that is pathologically perfect.
In such pathological cases, it was unclear whether the current or previous
implementation would be preferred.  However, when the input data gets
shuffled, the implementation in this commit is *clearly* superior.
Therefore, I believe this is the best implementation for *real* data.
---
 toolz/itertoolz.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/toolz/itertoolz.py b/toolz/itertoolz.py
index 3e11d831..171719ea 100644
--- a/toolz/itertoolz.py
+++ b/toolz/itertoolz.py
@@ -80,10 +80,10 @@ def groupby(func, seq):
     d = {}
     for item in seq:
         key = func(item)
-        if key not in d:
-            d[key] = [item].append  # avoid method resolution overhead
-        else:
+        if key in d:
             d[key](item)  # append item to list
+        else:
+            d[key] = [item].append  # avoid method resolution overhead
 
     for key, appendval in iteritems(d):
         # `mylist.append.__self__` references original list instance `mylist`

From 6d330339467126185af31b5c6d319580ef5ee8c0 Mon Sep 17 00:00:00 2001
From: Erik Welch <erik.n.welch@gmail.com>
Date: Sun, 11 May 2014 23:10:00 -0400
Subject: [PATCH 4/6] Add a new implementation of `groupby` that uses two
 dicts.

The previous commit was wrong.  It is slower than the commit before.
There was a mistake in the code used for benchmarks.

I believe the new implementation has optimal performance as groups
get larger.  It is also fast when creating a new group.  It is slowest
when each group has two or three items in it, but it is still fast
enough so as to not impact the general performance of the algorithm.

A note on size: using a second dict in the implementation doesn't
add much memory.  Let us consider the size used by all of the
containers--dicts and lists--but not their contents.  For the worst
case scenario in which both dicts have two items (note that `fastdict`
only has groups of length two or greater), memory usage is only
increased by about 25% by having a second dict.
---
 toolz/itertoolz.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/toolz/itertoolz.py b/toolz/itertoolz.py
index 171719ea..5d23c088 100644
--- a/toolz/itertoolz.py
+++ b/toolz/itertoolz.py
@@ -77,18 +77,22 @@ def groupby(func, seq):
     #     d[func(item)].append(item)
     # return dict(d)
 
-    d = {}
+    rv = {}
+    fastdict = {}
     for item in seq:
         key = func(item)
-        if key in d:
-            d[key](item)  # append item to list
+        if key in fastdict:
+            # Optimal asymptotic performance
+            fastdict[key](item)  # append item to list
+        elif key not in rv:
+            # Fast initialization of groups
+            rv[key] = [item]
         else:
-            d[key] = [item].append  # avoid method resolution overhead
-
-    for key, appendval in iteritems(d):
-        # `mylist.append.__self__` references original list instance `mylist`
-        d[key] = appendval.__self__
-    return d
+            # Add `list.append` to `fastdict` to avoid attribute resolution
+            # overhead.  This improves asymptotic speed as groups get larger.
+            val = fastdict[key] = rv[key].append
+            val(item)
+    return rv
 
 
 def merge_sorted(*seqs, **kwargs):

From d871c9431ce418ffd2cd9920d127f198b47de63a Mon Sep 17 00:00:00 2001
From: Erik Welch <erik.n.welch@gmail.com>
Date: Mon, 12 May 2014 12:48:36 -0400
Subject: [PATCH 5/6] No need to import `compatibility.iteritems` in
 `itertoolz`

---
 toolz/itertoolz.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/toolz/itertoolz.py b/toolz/itertoolz.py
index 5d23c088..6bcf8810 100644
--- a/toolz/itertoolz.py
+++ b/toolz/itertoolz.py
@@ -3,8 +3,7 @@
 import collections
 import operator
 from functools import partial
-from toolz.compatibility import (map, filter, filterfalse, zip, zip_longest,
-                                 iteritems)
+from toolz.compatibility import map, filter, filterfalse, zip, zip_longest
 
 
 __all__ = ('remove', 'accumulate', 'groupby', 'merge_sorted', 'interleave',

From 42dc568e4141c32c34c8c5a926f2b119928c3add Mon Sep 17 00:00:00 2001
From: Erik Welch <erik.n.welch@gmail.com>
Date: Thu, 15 May 2014 12:53:53 -0400
Subject: [PATCH 6/6] Choose a version of `groupby` that is easier to
 understand.

This is `groupby_alt` from the original commit comment in this branch.
See discussion at #179.  This version performs very well as groups become
larger.  The previous implementation performs well when 10% or more of
the elements form groups of length one.

We plan to have various implementations in `benchmarkz` repository, which
will let contributors add benchmarks that they care about and easily run
them on all variants of `groupby`.
---
 toolz/itertoolz.py | 30 +++++++-----------------------
 1 file changed, 7 insertions(+), 23 deletions(-)

diff --git a/toolz/itertoolz.py b/toolz/itertoolz.py
index 6bcf8810..f380213b 100644
--- a/toolz/itertoolz.py
+++ b/toolz/itertoolz.py
@@ -3,7 +3,8 @@
 import collections
 import operator
 from functools import partial
-from toolz.compatibility import map, filter, filterfalse, zip, zip_longest
+from toolz.compatibility import (map, filter, filterfalse, zip, zip_longest,
+                                 iteritems)
 
 
 __all__ = ('remove', 'accumulate', 'groupby', 'merge_sorted', 'interleave',
@@ -68,29 +69,12 @@ def groupby(func, seq):
     See Also:
         countby
     """
-    # The commented code below shows what is probably the fastest
-    # "pythonic" implementation of `groupby`:
-    #
-    # d = collections.defaultdict(list)
-    # for item in seq:
-    #     d[func(item)].append(item)
-    # return dict(d)
-
-    rv = {}
-    fastdict = {}
+    d = collections.defaultdict(lambda: [].append)
     for item in seq:
-        key = func(item)
-        if key in fastdict:
-            # Optimal asymptotic performance
-            fastdict[key](item)  # append item to list
-        elif key not in rv:
-            # Fast initialization of groups
-            rv[key] = [item]
-        else:
-            # Add `list.append` to `fastdict` to avoid attribute resolution
-            # overhead.  This improves asymptotic speed as groups get larger.
-            val = fastdict[key] = rv[key].append
-            val(item)
+        d[func(item)](item)
+    rv = {}
+    for k, v in iteritems(d):
+        rv[k] = v.__self__
     return rv