-
Notifications
You must be signed in to change notification settings - Fork 28.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-6055] [PySpark] fix incorrect __eq__ of DataType #4808
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,7 @@ | |
import warnings | ||
import json | ||
import re | ||
import weakref | ||
from array import array | ||
from operator import itemgetter | ||
|
||
|
@@ -42,8 +43,7 @@ def __hash__(self): | |
return hash(str(self)) | ||
|
||
def __eq__(self, other): | ||
return (isinstance(other, self.__class__) and | ||
self.__dict__ == other.__dict__) | ||
return isinstance(other, self.__class__) and self.jsonValue() == other.jsonValue() | ||
|
||
def __ne__(self, other): | ||
return not self.__eq__(other) | ||
|
@@ -786,8 +786,24 @@ def _merge_type(a, b): | |
return a | ||
|
||
|
||
def _need_converter(dataType): | ||
if isinstance(dataType, StructType): | ||
return True | ||
elif isinstance(dataType, ArrayType): | ||
return _need_converter(dataType.elementType) | ||
elif isinstance(dataType, MapType): | ||
return _need_converter(dataType.keyType) or _need_converter(dataType.valueType) | ||
elif isinstance(dataType, NullType): | ||
return True | ||
else: | ||
return False | ||
|
||
|
||
def _create_converter(dataType): | ||
"""Create an converter to drop the names of fields in obj """ | ||
if not _need_converter(dataType): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this particular call to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's useful for ArrayType and MapType, can it didnot can There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, right; I overlooked that. Makes sense. |
||
return lambda x: x | ||
|
||
if isinstance(dataType, ArrayType): | ||
conv = _create_converter(dataType.elementType) | ||
return lambda row: map(conv, row) | ||
|
@@ -806,13 +822,17 @@ def _create_converter(dataType): | |
# dataType must be StructType | ||
names = [f.name for f in dataType.fields] | ||
converters = [_create_converter(f.dataType) for f in dataType.fields] | ||
convert_fields = any(_need_converter(f.dataType) for f in dataType.fields) | ||
|
||
def convert_struct(obj): | ||
if obj is None: | ||
return | ||
|
||
if isinstance(obj, (tuple, list)): | ||
return tuple(conv(v) for v, conv in zip(obj, converters)) | ||
if convert_fields: | ||
return tuple(conv(v) for v, conv in zip(obj, converters)) | ||
else: | ||
return tuple(obj) | ||
|
||
if isinstance(obj, dict): | ||
d = obj | ||
|
@@ -821,7 +841,10 @@ def convert_struct(obj): | |
else: | ||
raise ValueError("Unexpected obj: %s" % obj) | ||
|
||
return tuple([conv(d.get(name)) for name, conv in zip(names, converters)]) | ||
if convert_fields: | ||
return tuple([conv(d.get(name)) for name, conv in zip(names, converters)]) | ||
else: | ||
return tuple([d.get(name) for name in names]) | ||
|
||
return convert_struct | ||
|
||
|
@@ -1037,8 +1060,7 @@ def _verify_type(obj, dataType): | |
for v, f in zip(obj, dataType.fields): | ||
_verify_type(v, f.dataType) | ||
|
||
|
||
_cached_cls = {} | ||
_cached_cls = weakref.WeakValueDictionary() | ||
|
||
|
||
def _restore_object(dataType, obj): | ||
|
@@ -1233,8 +1255,7 @@ def __new__(self, *args, **kwargs): | |
elif kwargs: | ||
# create row objects | ||
names = sorted(kwargs.keys()) | ||
values = tuple(kwargs[n] for n in names) | ||
row = tuple.__new__(self, values) | ||
row = tuple.__new__(self, [kwargs[n] for n in names]) | ||
row.__FIELDS__ = names | ||
return row | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These are duplicated, also in types.py.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, good catch.