Skip to content

Commit

Permalink
hack namedtuple in __main__ module, make it picklable.
Browse files Browse the repository at this point in the history
Do not need import pyspark before using namedtuple
  • Loading branch information
davies committed Jul 29, 2014
1 parent 93b03b8 commit 21991e6
Showing 1 changed file with 22 additions and 7 deletions.
29 changes: 22 additions & 7 deletions python/pyspark/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,6 @@ def dumps(self, obj):


# Hook namedtuple, make it picklable
# pyspark should be imported before 'from collections import namedtuple'

old_namedtuple = collections.namedtuple
__cls = {}
Expand All @@ -283,16 +282,19 @@ def _restore(name, fields, value):
__cls[k] = cls
return cls(*value)

def namedtuple(name, fields, verbose=False, rename=False):
""" Pickable namedtuple """
cls = old_namedtuple(name, fields, verbose, rename)

def hack_namedtuple(cls):
name = cls.__name__
fields = cls._fields
def __reduce__(self):
return (_restore, (name, fields, tuple(self)))

cls.__reduce__ = __reduce__
return cls

def namedtuple(name, fields, verbose=False, rename=False):
cls = old_namedtuple(name, fields, verbose, rename)
return hack_namedtuple(cls)

namedtuple.__doc__ = old_namedtuple.__doc__
collections.namedtuple = namedtuple


Expand All @@ -306,6 +308,19 @@ class PickleSerializer(FramedSerializer):
not be as fast as more specialized serializers.
"""

def _hack_namedtuple(self):
# namedtuple created in other module can be pickled normal
# hack namedtuple in __main__ module
for n, o in sys.modules["__main__"].__dict__.iteritems():
if (type(o) is type and o.__base__ is tuple
and hasattr(o, "_fields")
and "__reduce__" not in o.__dict__):
hack_namedtuple(o)

def dump_stream(self, iterator, stream):
self._hack_namedtuple()
FramedSerializer.dump_stream(self, iterator, stream)

def dumps(self, obj):
return cPickle.dumps(obj, 2)

Expand All @@ -331,7 +346,7 @@ class MarshalSerializer(FramedSerializer):
loads = marshal.loads


class AutoSerializer(FramedSerializer):
class AutoSerializer(PickleSerializer):
"""
Choose marshal or cPickle as serialization protocol autumatically
"""
Expand Down

0 comments on commit 21991e6

Please sign in to comment.