Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DO NOT MERGE - WIP: Add ability to serialize/deserialze astroid.Module (and friends) #1194

Closed
wants to merge 24 commits into from
Closed
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 153 additions & 0 deletions astroid/_persistence.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
"""Utilities for serializing and deserializing astroid Module (and it's pals).

These shouldn't be called directly from client code,
instead call `astroid.nodes.Module.dump` and `.load`.

The serialized/deserialized data is JSON-compatible.
(Converting to JSON was chosen because it has standard library support, and is
more secure than pickling the types)

Astroid types can customize serialization and deserialization by defining
`__dump__` which should return a dict of data for serializing, and
`__load__` which should fully initialize the instance from the data.
Both methods are provided a helper function for dumping or losding
non-trivial data (when in doubt, use the helper).

Because astroid types have the possibility of being circularly referenced,
object instances aren't serialized directly as values of the parent's data.
Instead, each object is given an identifier and placed in a reference mapping,
with the object's reference being used as a value in the parent's data.
Additionally, dumping and loading an object is performed in two phases so we
don't infinitely recurse.
"""

import base64
import builtins
import enum
import functools

def _dump_obj_default(instance, dumper):
if isinstance(instance, enum.Enum):
return {"value": instance.value}
return {k: dumper(v) for k, v in instance.__dict__.items()}

def dump(obj, refmap, depth=0):
"""Dumps an astroid object or builtin type."""
# @TODO: Make types for the "special" dicts and serialize them specially

if isinstance(obj, (int, str, float, bool, type(None))):
return obj # JSON serializable and unambiguous

dumper = lambda x: dump(x, refmap, depth+1)

if isinstance(obj, list):
return list(map(dumper, obj))

if isinstance(obj, (set, tuple)):
return {
".class": f"{obj.__class__.__name__}",
".values": list(map(dumper, obj)),
}

if isinstance(obj, dict):
# Serializable, but ambiguous w.r.t. dumping an object
# If this is ever false, we can serialize it as .items()
assert all(isinstance(k, str) for k in obj)
return {
".class": "dict",
".values": {k: dumper(v) for k, v in obj.items()}
}

if obj in {..., NotImplemented}:
return {".class": f"{obj.__class__.__name__}"}

if isinstance(obj, bytes):
return {
".class": "bytes",
".value": base64.b64encode(obj).decode('ascii'),
}

if isinstance(obj, complex):
return {
".class": "complex",
"imag": obj.imag,
"real": obj.real,
}

if id(obj) not in refmap:
assert obj.__class__.__module__.startswith("astroid")
# Phase 1, add the obj to the refmap
submodule = obj.__class__.__module__.split('.')[1]
refmap[id(obj)] = {
".class": f"{submodule}.{obj.__class__.__name__}"
}

# Phase 2, actually populate the entry
data_dumper = getattr(obj, "__dump__", functools.partial(_dump_obj_default, obj))
refmap[id(obj)].update(
**data_dumper(dumper=dumper)
)

# Stringify the id, since JSON objects must have str keys
return {".class": "Ref", ".value": str(id(obj))}

def _load_obj_default(instance, data, loader):
return instance.__init__(**{k: loader(v) for k, v in data.items()})

def _loadref(ref, refmap):
import astroid

instance_or_data = refmap[ref]
if isinstance(instance_or_data, dict):
data = instance_or_data
# pop in case nodes want to just unpack the dict
submodname, classname = data.pop(".class").split(".")
submodule = getattr(astroid, submodname)
cls = getattr(submodule, classname)

if issubclass(cls, enum.Enum):
# Enum uses __new__ to initialize :(
refmap[ref] = cls.__new__(cls, **data)
else:
instance = cls.__new__(cls)
refmap[ref] = instance

# Phase 2, populate any fields that are or contain astroic objects
data_loader = getattr(instance, "__load__", functools.partial(_load_obj_default, instance))
data_loader(data, loader=lambda x: load(x, refmap))

return refmap[ref]

def load(data, refmap):
loader = lambda x: load(x, refmap)

if isinstance(data, list):
return list(map(loader, data))

if not isinstance(data, dict):
return data # Just use the deserialized int or str or whatever

if data[".class"] == "Ref":
return _loadref(data['.value'], refmap)

classname = data.pop(".class")
cls = getattr(builtins, classname)
if cls is type(NotImplemented):
return NotImplemented

if cls is type(...):
return ...

if cls is dict:
return {k: loader(v) for k, v in data[".values"].items()}

if cls in {set, tuple}:
return cls(map(loader, data['.values']))

if cls is complex:
return complex(**data)

if cls is bytes:
return base64.b64decode(data['.values'])

assert False, "Unhandled case!"
12 changes: 12 additions & 0 deletions astroid/bases.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,11 @@ def __getattr__(self, name):
def infer(self, context=None):
yield self

#@TODO: Can remove (uses default)
def __dump__(self, dumper):
return {
"proxied": dumper(self._proxied),
}

def _infer_stmts(stmts, context, frame=None):
"""Return an iterator on statements inferred by each statement in *stmts*."""
Expand Down Expand Up @@ -422,6 +427,7 @@ class BoundMethod(UnboundMethod):
# pylint: disable=unnecessary-lambda
special_attributes = lazy_descriptor(lambda: objectmodel.BoundMethodModel())

# @TODO deprecate "proxy" for "proxied?"
def __init__(self, proxy, bound):
UnboundMethod.__init__(self, proxy)
self.bound = bound
Expand Down Expand Up @@ -544,6 +550,12 @@ def infer_call_result(self, caller, context=None):
def bool_value(self, context=None):
return True

def __dump__(self, dumper):
data = super().__dump__(dumper)
data["proxy"] = data.pop("proxied")
data.update(bound=dumper(self.bound))
return data


class Generator(BaseInstance):
"""a special node representing a generator.
Expand Down
1 change: 0 additions & 1 deletion astroid/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ class Context(enum.Enum):
Store = 2
Del = 3


# TODO Remove in 3.0 in favor of Context
Load = Context.Load
Store = Context.Store
Expand Down
32 changes: 21 additions & 11 deletions astroid/nodes/node_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -750,6 +750,8 @@ def __init__(
vararg: Optional[str] = None,
kwarg: Optional[str] = None,
parent: Optional[NodeNG] = None,
lineno: Optional[int] = None,
col_offset: Optional[int] = None,
) -> None:
"""
:param vararg: The name of the variable length arguments.
Expand All @@ -758,7 +760,7 @@ def __init__(

:param parent: The parent node in the syntax tree.
"""
super().__init__(parent=parent)
super().__init__(lineno=lineno, col_offset=col_offset, parent=parent)

self.vararg: Optional[str] = vararg # can be None
"""The name of the variable length arguments."""
Expand Down Expand Up @@ -1282,6 +1284,7 @@ def __init__(
lineno: Optional[int] = None,
col_offset: Optional[int] = None,
parent: Optional[NodeNG] = None,
simple: Optional[int] = None,
) -> None:
"""
:param lineno: The line that this node appears on in the source code.
Expand All @@ -1300,7 +1303,7 @@ def __init__(
self.value: Optional[NodeNG] = None # can be None
"""The value being assigned to the variables."""

self.simple: Optional[int] = None
self.simple: Optional[int] = simple
"""Whether :attr:`target` is a pure name or a complex statement."""

super().__init__(lineno=lineno, col_offset=col_offset, parent=parent)
Expand All @@ -1309,7 +1312,7 @@ def postinit(
self,
target: NodeNG,
annotation: NodeNG,
simple: int,
simple: int = None,
value: Optional[NodeNG] = None,
) -> None:
"""Do some setup after initialisation.
Expand All @@ -1326,7 +1329,7 @@ def postinit(
self.target = target
self.annotation = annotation
self.value = value
self.simple = simple
self.simple = simple or self.simple

def get_children(self):
yield self.target
Expand Down Expand Up @@ -1757,7 +1760,13 @@ class Comprehension(NodeNG):
optional_assign = True
"""Whether this node optionally assigns a variable."""

def __init__(self, parent: Optional[NodeNG] = None) -> None:
def __init__(
self,
parent: Optional[NodeNG] = None,
lineno: Optional[int] = None,
col_offset: Optional[int] = None,
is_async: Optional[bool] = None,
) -> None:
"""
:param parent: The parent node in the syntax tree.
"""
Expand All @@ -1770,18 +1779,18 @@ def __init__(self, parent: Optional[NodeNG] = None) -> None:
self.ifs: typing.List[NodeNG] = []
"""The contents of any if statements that filter the comprehension."""

self.is_async: Optional[bool] = None
self.is_async: Optional[bool] = is_async
"""Whether this is an asynchronous comprehension or not."""

super().__init__(parent=parent)
super().__init__(lineno=lineno, col_offset=col_offset, parent=parent)

# pylint: disable=redefined-builtin; same name as builtin ast module.
def postinit(
self,
target: Optional[NodeNG] = None,
iter: Optional[NodeNG] = None,
ifs: Optional[typing.List[NodeNG]] = None,
is_async: Optional[bool] = None,
is_async: Optional[bool] = None, # @TODO: Deprecate and remove
) -> None:
"""Do some setup after initialisation.

Expand Down Expand Up @@ -1844,7 +1853,7 @@ class Const(mixins.NoChildrenMixin, NodeNG, Instance):
<Const.bytes l.1 at 0x7f23b2e35a20>]
"""

_other_fields = ("value",)
_other_fields = ("value", "kind")

def __init__(
self,
Expand Down Expand Up @@ -2628,7 +2637,7 @@ class ImportFrom(mixins.NoChildrenMixin, mixins.ImportFromMixin, Statement):
<ImportFrom l.1 at 0x7f23b2e415c0>
"""

_other_fields = ("modname", "names", "level")
_other_fields = ("fromname", "names", "level")

def __init__(
self,
Expand All @@ -2653,7 +2662,8 @@ def __init__(

:param parent: The parent node in the syntax tree.
"""
self.modname: Optional[str] = fromname # can be None
self.fromname: Optional[str] = fromname # can be None
self.modname = self.fromname # For backwards
"""The module that is being imported from.

This is ``None`` for relative imports.
Expand Down
29 changes: 29 additions & 0 deletions astroid/nodes/node_ng.py
Original file line number Diff line number Diff line change
Expand Up @@ -724,3 +724,32 @@ def op_precedence(self):
def op_left_associative(self):
# Everything is left associative except `**` and IfExp
return True

def __dump__(self, dumper):
data = dict(
**{field_name: dumper(getattr(self, field_name)) for field_name in self._other_fields},
**{field_name: dumper(getattr(self, field_name)) for field_name in self._other_other_fields},
**{field_name: dumper(getattr(self, field_name)) for field_name in self._astroid_fields},
)
# Not all __init__s take these
if getattr(self, "lineno", None) is not None:
data["lineno"] = self.lineno
if getattr(self, "col_offset", None) is not None:
data["col_offset"] = self.col_offset
if self.parent is not None:
data["parent"] = dumper(self.parent)
return data

def __load__(self, data, loader):
import astroid.nodes
self.__init__(
**{key: loader(data[key]) for key in {"lineno", "col_offset", "parent"} if key in data},
**{key: loader(data[key]) for key in self._other_fields},
)

postinit_fields = self._astroid_fields + self._other_other_fields

if postinit_fields:
self.postinit(
**{key: loader(data[key]) for key in postinit_fields}
)
Loading