From 21991e6c5b032700eaa51dea1edf026aa3d844b4 Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Tue, 29 Jul 2014 13:20:21 -0700 Subject: [PATCH] hack namedtuple in __main__ module, make it picklable. Do not need import pyspark before using namedtuple --- python/pyspark/serializers.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py index 001944ecfb29d..e41026be104ee 100644 --- a/python/pyspark/serializers.py +++ b/python/pyspark/serializers.py @@ -270,7 +270,6 @@ def dumps(self, obj): # Hook namedtuple, make it picklable -# pyspark should be imported before 'from collections import namedtuple' old_namedtuple = collections.namedtuple __cls = {} @@ -283,16 +282,19 @@ def _restore(name, fields, value): __cls[k] = cls return cls(*value) -def namedtuple(name, fields, verbose=False, rename=False): - """ Pickable namedtuple """ - cls = old_namedtuple(name, fields, verbose, rename) - +def hack_namedtuple(cls): + name = cls.__name__ + fields = cls._fields def __reduce__(self): return (_restore, (name, fields, tuple(self))) - cls.__reduce__ = __reduce__ return cls +def namedtuple(name, fields, verbose=False, rename=False): + cls = old_namedtuple(name, fields, verbose, rename) + return hack_namedtuple(cls) + +namedtuple.__doc__ = old_namedtuple.__doc__ collections.namedtuple = namedtuple @@ -306,6 +308,19 @@ class PickleSerializer(FramedSerializer): not be as fast as more specialized serializers. """ + def _hack_namedtuple(self): + # namedtuple created in other module can be pickled normal + # hack namedtuple in __main__ module + for n, o in sys.modules["__main__"].__dict__.iteritems(): + if (type(o) is type and o.__base__ is tuple + and hasattr(o, "_fields") + and "__reduce__" not in o.__dict__): + hack_namedtuple(o) + + def dump_stream(self, iterator, stream): + self._hack_namedtuple() + FramedSerializer.dump_stream(self, iterator, stream) + def dumps(self, obj): return cPickle.dumps(obj, 2) @@ -331,7 +346,7 @@ class MarshalSerializer(FramedSerializer): loads = marshal.loads -class AutoSerializer(FramedSerializer): +class AutoSerializer(PickleSerializer): """ Choose marshal or cPickle as serialization protocol autumatically """