diff --git a/dill/_dill.py b/dill/_dill.py index c04d4581..33fabb74 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -8,17 +8,29 @@ """ dill: a utility for serialization of python objects +The primary functions in `dill` are :func:`dump` and +:func:`dumps` for serialization ("pickling") to a +file or to a string, respectively, and :func:`load` +and :func:`loads` for deserialization ("unpickling"), +similarly, from a file or from a string. Other notable +functions are :func:`~dill.dump_module` and +:func:`~dill.load_module`, which are used to save and +restore module objects, including an intepreter session. + Based on code written by Oren Tirosh and Armin Ronacher. Extended to a (near) full set of the builtin types (in types module), and coded to the pickle interface, by . Initial port to python3 by Jonathan Dobson, continued by mmckerns. -Test against "all" python types (Std. Lib. CH 1-15 @ 2.7) by mmckerns. -Test against CH16+ Std. Lib. ... TBD. +Tested against "all" python types (Std. Lib. CH 1-15 @ 2.7) by mmckerns. +Tested against CH16+ Std. Lib. ... TBD. """ + +from __future__ import annotations + __all__ = [ - 'Pickler','Unpickler', - 'check','copy','dump','dumps','load','loads','pickle','pickles','register', - 'DEFAULT_PROTOCOL','HIGHEST_PROTOCOL','CONTENTS_FMODE','FILE_FMODE','HANDLE_FMODE', + 'dump','dumps','load','loads','copy', + 'Pickler','Unpickler','register','pickle','pickles','check', + 'DEFAULT_PROTOCOL','HIGHEST_PROTOCOL','HANDLE_FMODE','CONTENTS_FMODE','FILE_FMODE', 'PickleError','PickleWarning','PicklingError','PicklingWarning','UnpicklingError', 'UnpicklingWarning', ] @@ -39,6 +51,7 @@ #XXX: get types from .objtypes ? import builtins as __builtin__ from pickle import _Pickler as StockPickler, Unpickler as StockUnpickler +from pickle import GLOBAL, POP from _thread import LockType from _thread import RLock as RLockType #from io import IOBase @@ -58,6 +71,7 @@ import marshal import gc # import zlib +import dataclasses from weakref import ReferenceType, ProxyType, CallableProxyType from collections import OrderedDict from functools import partial @@ -158,22 +172,19 @@ def get_file_type(*args, **kwargs): from socket import socket as SocketType #FIXME: additionally calls ForkingPickler.register several times from multiprocessing.reduction import _reduce_socket as reduce_socket -try: +try: #pragma: no cover IS_IPYTHON = __IPYTHON__ # is True - ExitType = None # IPython.core.autocall.ExitAutocall - singletontypes = ['exit', 'quit', 'get_ipython'] + ExitType = None # IPython.core.autocall.ExitAutocall + IPYTHON_SINGLETONS = ('exit', 'quit', 'get_ipython') except NameError: IS_IPYTHON = False try: ExitType = type(exit) # apparently 'exit' can be removed except NameError: ExitType = None - singletontypes = [] + IPYTHON_SINGLETONS = () import inspect -import dataclasses import typing -from pickle import GLOBAL - ### Shims for different versions of Python and dill class Sentinel(object): @@ -320,7 +331,14 @@ class UnpicklingWarning(PickleWarning, UnpicklingError): ### Extend the Picklers class Pickler(StockPickler): """python's Pickler extended to interpreter sessions""" - dispatch = MetaCatchingDict(StockPickler.dispatch.copy()) + dispatch: typing.Dict[type, typing.Callable[[Pickler, typing.Any], None]] \ + = MetaCatchingDict(StockPickler.dispatch.copy()) + """The dispatch table, a dictionary of serializing functions used + by Pickler to save objects of specific types. Use :func:`pickle` + or :func:`register` to associate types to custom functions. + + :meta hide-value: + """ _session = False from .settings import settings @@ -341,11 +359,12 @@ def __init__(self, file, *args, **kwds): self._file = file def save(self, obj, save_persistent_id=True): - # register if the object is a numpy ufunc - # thanks to Paul Kienzle for pointing out ufuncs didn't pickle + # numpy hack obj_type = type(obj) if NumpyArrayType and not (obj_type is type or obj_type in Pickler.dispatch): - if NumpyUfuncType and numpyufunc(obj_type): + # register if the object is a numpy ufunc + # thanks to Paul Kienzle for pointing out ufuncs didn't pickle + if numpyufunc(obj_type): @register(obj_type) def save_numpy_ufunc(pickler, obj): logger.trace(pickler, "Nu: %s", obj) @@ -359,7 +378,7 @@ def save_numpy_ufunc(pickler, obj): # def uload(name): return getattr(numpy, name) # copy_reg.pickle(NumpyUfuncType, udump, uload) # register if the object is a numpy dtype - if NumpyDType and numpydtype(obj_type): + if numpydtype(obj_type): @register(obj_type) def save_numpy_dtype(pickler, obj): logger.trace(pickler, "Dt: %s", obj) @@ -372,7 +391,7 @@ def save_numpy_dtype(pickler, obj): # def udump(f): return uload, (f.type,) # copy_reg.pickle(NumpyDTypeType, udump, uload) # register if the object is a subclassed numpy array instance - if NumpyArrayType and ndarraysubclassinstance(obj_type): + if ndarraysubclassinstance(obj_type): @register(obj_type) def save_numpy_array(pickler, obj): logger.trace(pickler, "Nu: (%s, %s)", obj.shape, obj.dtype) @@ -381,8 +400,9 @@ def save_numpy_array(pickler, obj): pickler.save_reduce(_create_array, (f,args,state,npdict), obj=obj) logger.trace(pickler, "# Nu") return - # end hack - if GENERATOR_FAIL and type(obj) == GeneratorType: + # end numpy hack + + if GENERATOR_FAIL and obj_type is GeneratorType: msg = "Can't pickle %s: attribute lookup builtins.generator failed" % GeneratorType raise PicklingError(msg) StockPickler.save(self, obj, save_persistent_id) @@ -392,7 +412,6 @@ def save_numpy_array(pickler, obj): def dump(self, obj): #NOTE: if settings change, need to update attributes logger.trace_setup(self) StockPickler.dump(self, obj) - dump.__doc__ = StockPickler.dump.__doc__ class Unpickler(StockUnpickler): @@ -436,12 +455,12 @@ def dispatch_table(): pickle_dispatch_copy = StockPickler.dispatch.copy() def pickle(t, func): - """expose dispatch table for user-created extensions""" + """expose :attr:`~Pickler.dispatch` table for user-created extensions""" Pickler.dispatch[t] = func return def register(t): - """register type to Pickler's dispatch table """ + """decorator to register types to Pickler's :attr:`~Pickler.dispatch` table""" def proxy(func): Pickler.dispatch[t] = func return func @@ -460,7 +479,7 @@ def use_diff(on=True): Reduces size of pickles by only including object which have changed. Decreases pickle size but increases CPU time needed. - Also helps avoid some unpicklable objects. + Also helps avoid some unpickleable objects. MUST be called at start of script, otherwise changes will not be recorded. """ global _use_diff, diff @@ -1088,7 +1107,7 @@ def _save_with_postproc(pickler, reduction, is_pickler_dill=None, obj=Getattr.NO else: pickler.save_reduce(*reduction) # pop None created by calling preprocessing step off stack - pickler.write(bytes('0', 'UTF-8')) + pickler.write(POP) #@register(CodeType) #def save_code(pickler, obj): @@ -1158,7 +1177,7 @@ def save_code(pickler, obj): return def _repr_dict(obj): - """make a short string representation of a dictionary""" + """Make a short string representation of a dictionary.""" return "<%s object at %#012x>" % (type(obj).__name__, id(obj)) @register(dict) @@ -1470,7 +1489,7 @@ def save_cell(pickler, obj): # The result of this function call will be None pickler.save_reduce(_shims._delattr, (obj, 'cell_contents')) # pop None created by calling _delattr off stack - pickler.write(bytes('0', 'UTF-8')) + pickler.write(POP) logger.trace(pickler, "# Ce3") return if is_dill(pickler, child=True): @@ -1614,7 +1633,7 @@ def save_module(pickler, obj): pass else: logger.trace(pickler, "M2: %s with diff", obj) - logger.trace(pickler, "Diff: %s", changed.keys()) + logger.info("Diff: %s", changed.keys()) pickler.save_reduce(_import_module, (obj.__name__,), obj=obj, state=changed) logger.trace(pickler, "# M2") @@ -1625,15 +1644,20 @@ def save_module(pickler, obj): logger.trace(pickler, "# M1") else: builtin_mod = _is_builtin_module(obj) - if obj.__name__ not in ("builtins", "dill", "dill._dill") and not builtin_mod or \ - is_dill(pickler, child=True) and obj is pickler._main: + is_session_main = is_dill(pickler, child=True) and obj is pickler._main + if (obj.__name__ not in ("builtins", "dill", "dill._dill") and not builtin_mod + or is_session_main): logger.trace(pickler, "M1: %s", obj) - _main_dict = obj.__dict__.copy() #XXX: better no copy? option to copy? - [_main_dict.pop(item, None) for item in singletontypes - + ["__builtins__", "__loader__"]] + # Hack for handling module-type objects in load_module(). mod_name = obj.__name__ if _is_imported_module(obj) else '__runtime__.%s' % obj.__name__ - pickler.save_reduce(_import_module, (mod_name,), obj=obj, - state=_main_dict) + # Second references are saved as __builtin__.__main__ in save_module_dict(). + main_dict = obj.__dict__.copy() + for item in ('__builtins__', '__loader__'): + main_dict.pop(item, None) + for item in IPYTHON_SINGLETONS: #pragma: no cover + if getattr(main_dict.get(item), '__module__', '').startswith('IPython'): + del main_dict[item] + pickler.save_reduce(_import_module, (mod_name,), obj=obj, state=main_dict) logger.trace(pickler, "# M1") elif obj.__name__ == "dill._dill": logger.trace(pickler, "M2: %s", obj) @@ -1643,7 +1667,6 @@ def save_module(pickler, obj): logger.trace(pickler, "M2: %s", obj) pickler.save_reduce(_import_module, (obj.__name__,), obj=obj) logger.trace(pickler, "# M2") - return return @register(TypeType) @@ -1669,7 +1692,7 @@ def save_type(pickler, obj, postproc_list=None): elif obj is type(None): logger.trace(pickler, "T7: %s", obj) #XXX: pickler.save_reduce(type, (None,), obj=obj) - pickler.write(bytes('c__builtin__\nNoneType\n', 'UTF-8')) + pickler.write(GLOBAL + b'__builtin__\nNoneType\n') logger.trace(pickler, "# T7") elif obj is NotImplementedType: logger.trace(pickler, "T7: %s", obj) @@ -1710,9 +1733,18 @@ def save_type(pickler, obj, postproc_list=None): else: logger.trace(pickler, "T4: %s", obj) if incorrectly_named: - warnings.warn('Cannot locate reference to %r.' % (obj,), PicklingWarning) + warnings.warn( + "Cannot locate reference to %r." % (obj,), + PicklingWarning, + stacklevel=3, + ) if obj_recursive: - warnings.warn('Cannot pickle %r: %s.%s has recursive self-references that trigger a RecursionError.' % (obj, obj.__module__, obj_name), PicklingWarning) + warnings.warn( + "Cannot pickle %r: %s.%s has recursive self-references that " + "trigger a RecursionError." % (obj, obj.__module__, obj_name), + PicklingWarning, + stacklevel=3, + ) #print (obj.__dict__) #print ("%s\n%s" % (type(obj), obj.__name__)) #print ("%s\n%s" % (obj.__bases__, obj.__dict__)) @@ -1848,7 +1880,7 @@ def save_function(pickler, obj): # Change the value of the cell pickler.save_reduce(*possible_postproc) # pop None created by calling preprocessing step off stack - pickler.write(bytes('0', 'UTF-8')) + pickler.write(POP) logger.trace(pickler, "# F1") else: diff --git a/dill/session.py b/dill/session.py index 4b2ad664..e9843a71 100644 --- a/dill/session.py +++ b/dill/session.py @@ -194,13 +194,29 @@ def dump_module( >>> [foo.sin(x) for x in foo.values] [0.8414709848078965, 0.9092974268256817, 0.1411200080598672] + - Use `refimported` to save imported objects by reference: + + >>> import dill + >>> from html.entities import html5 + >>> type(html5), len(html5) + (dict, 2231) + >>> import io + >>> buf = io.BytesIO() + >>> dill.dump_module(buf) # saves __main__, with html5 saved by value + >>> len(buf.getvalue()) # pickle size in bytes + 71665 + >>> buf = io.BytesIO() + >>> dill.dump_module(buf, refimported=True) # html5 saved by reference + >>> len(buf.getvalue()) + 438 + *Changed in version 0.3.6:* Function ``dump_session()`` was renamed to ``dump_module()``. Parameters ``main`` and ``byref`` were renamed to ``module`` and ``refimported``, respectively. Note: Currently, ``dill.settings['byref']`` and ``dill.settings['recurse']`` - don't apply to this function.` + don't apply to this function. """ for old_par, par in [('main', 'module'), ('byref', 'refimported')]: if old_par in kwds: diff --git a/docs/source/conf.py b/docs/source/conf.py index 03e152b1..b968fe29 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -73,15 +73,15 @@ 'special-members': True, 'show-inheritance': True, 'imported-members': True, - 'exclude-members': ( - '__dict__,' + 'exclude-members': ( #NOTE: this is a single string concatenation + '__dict__,' # may be verbose '__slots__,' '__weakref__,' '__module__,' '_abc_impl,' - '__init__,' - '__annotations__,' - '__dataclass_fields__,' + '__init__,' # redundant with class docstring by "autoclass_content=both" + '__annotations__,' # redundant with signature documentation + '__dataclass_fields__,' # redundant automatic attribute ) } autodoc_typehints = 'description' diff --git a/docs/source/dill.rst b/docs/source/dill.rst index 268eeea1..0a94a100 100644 --- a/docs/source/dill.rst +++ b/docs/source/dill.rst @@ -11,7 +11,7 @@ logger module ------------- .. automodule:: dill.logger -.. :exclude-members: + + :exclude-members: +trace objtypes module ---------------