diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py index 3afb88c4351fd..bb96828f5fb67 100644 --- a/python/pyspark/sql/tests/test_types.py +++ b/python/pyspark/sql/tests/test_types.py @@ -1,3 +1,4 @@ +# -*- encoding: utf-8 -*- # # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with @@ -739,6 +740,17 @@ def test_timestamp_microsecond(self): tst = TimestampType() self.assertEqual(tst.toInternal(datetime.datetime.max) % 1000000, 999999) + # regression test for SPARK-23299 + def test_row_without_column_name(self): + row = Row("Alice", 11) + self.assertEqual(repr(row), "") + + # test __repr__ with unicode values + if sys.version_info.major >= 3: + self.assertEqual(repr(Row("数", "量")), "") + else: + self.assertEqual(repr(Row(u"数", u"量")), r"") + def test_empty_row(self): row = Row() self.assertEqual(len(row), 0) diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 72c437a499a91..f9b12f15117db 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -1435,13 +1435,24 @@ class Row(tuple): >>> Person = Row("name", "age") >>> Person - + >>> 'name' in Person True >>> 'wrong_key' in Person False >>> Person("Alice", 11) Row(name='Alice', age=11) + + This form can also be used to create rows as tuple values, i.e. with unnamed + fields. Beware that such Row objects have different equality semantics: + + >>> row1 = Row("Alice", 11) + >>> row2 = Row(name="Alice", age=11) + >>> row1 == row2 + False + >>> row3 = Row(a="Alice", b=11) + >>> row1 == row3 + True """ def __new__(self, *args, **kwargs): @@ -1549,7 +1560,7 @@ def __repr__(self): return "Row(%s)" % ", ".join("%s=%r" % (k, v) for k, v in zip(self.__fields__, tuple(self))) else: - return "" % ", ".join(self) + return "" % ", ".join("%r" % field for field in self) class DateConverter(object):