SPARK-45959. added new tests. Handled flattening of Project when done… #261
GitHub Actions / Report test results
failed
Dec 15, 2023 in 0s
19348 tests run, 823 skipped, 1 failed.
Annotations
Check failure on line 1 in python/pyspark/pandas/tests/test_frame_spark.py
github-actions / Report test results
python/pyspark/pandas/tests/test_frame_spark.py.test_hint
Column value_x#443L are ambiguous. It's probably because you joined several Datasets together, and some of these Datasets are the same. This column points to one of the Datasets but Spark is unable to figure out which one. Please alias the Datasets with different names via `Dataset.as` before joining them, and specify the column using qualified name, e.g. `df.as("a").join(df.as("b"), $"a.id" > $"b.id")`. You can also set spark.sql.analyzer.failAmbiguousSelfJoin to false to disable this check.
JVM stacktrace:
org.apache.spark.sql.AnalysisException: Column value_x#443L are ambiguous. It's probably because you joined several Datasets together, and some of these Datasets are the same. This column points to one of the Datasets but Spark is unable to figure out which one. Please alias the Datasets with different names via `Dataset.as` before joining them, and specify the column using qualified name, e.g. `df.as("a").join(df.as("b"), $"a.id" > $"b.id")`. You can also set spark.sql.analyzer.failAmbiguousSelfJoin to false to disable this check.
at org.apache.spark.sql.errors.QueryCompilationErrors$.ambiguousAttributesInSelfJoinError(QueryCompilationErrors.scala:1986)
at org.apache.spark.sql.execution.analysis.DetectAmbiguousSelfJoin$.apply(DetectAmbiguousSelfJoin.scala:161)
at org.apache.spark.sql.execution.analysis.DetectAmbiguousSelfJoin$.apply(DetectAmbiguousSelfJoin.scala:45)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:222)
at scala.collection.LinearSeqOps.foldLeft(LinearSeq.scala:183)
at scala.collection.LinearSeqOps.foldLeft$(LinearSeq.scala:179)
at scala.collection.immutable.List.foldLeft(List.scala:79)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:219)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:211)
at scala.collection.immutable.List.foreach(List.scala:333)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:211)
at org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$executeSameContext(Analyzer.scala:225)
at org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$execute$1(Analyzer.scala:221)
at org.apache.spark.sql.catalyst.analysis.AnalysisContext$.withNewAnalysisContext(Analyzer.scala:177)
at org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:221)
at org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:192)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:182)
at org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:89)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:182)
at org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:213)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:330)
at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:212)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$analyzed$1(QueryExecution.scala:88)
at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:138)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:230)
at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:557)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:230)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:918)
at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:229)
at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:88)
at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:85)
at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:69)
at org.apache.spark.sql.Dataset$.$anonfun$ofRows$1(Dataset.scala:93)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:918)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:91)
at org.apache.spark.sql.Dataset.withPlan(Dataset.scala:4474)
at org.apache.spark.sql.Dataset.$anonfun$select$1(Dataset.scala:1581)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:83)
at org.apache.spark.sql.package$.withOrigin(package.scala:110)
at org.apache.spark.sql.Dataset.select(Dataset.scala:1557)
at jdk.internal.reflect.GeneratedMethodAccessor21.invoke(Unknown Source)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:568)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
at java.base/java.lang.Thread.run(Thread.java:840)
Raw output
Traceback (most recent call last):
File "/__w/spark/spark/python/pyspark/pandas/tests/test_frame_spark.py", line 51, in test_hint
psdf1.merge(psdf2.spark.hint(hint), left_index=True, right_index=True).sort_values(
File "/__w/spark/spark/python/pyspark/pandas/frame.py", line 8694, in merge
internal = InternalFrame(
File "/__w/spark/spark/python/pyspark/pandas/internal.py", line 716, in __init__
schema = spark_frame.select(index_spark_columns + data_spark_columns).schema
File "/__w/spark/spark/python/pyspark/sql/dataframe.py", line 3721, in select
jdf = self._jdf.select(self._jcols(*cols))
File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line 1322, in __call__
return_value = get_return_value(
File "/__w/spark/spark/python/pyspark/errors/exceptions/captured.py", line 219, in deco
raise converted from None
pyspark.errors.exceptions.captured.AnalysisException: Column value_x#443L are ambiguous. It's probably because you joined several Datasets together, and some of these Datasets are the same. This column points to one of the Datasets but Spark is unable to figure out which one. Please alias the Datasets with different names via `Dataset.as` before joining them, and specify the column using qualified name, e.g. `df.as("a").join(df.as("b"), $"a.id" > $"b.id")`. You can also set spark.sql.analyzer.failAmbiguousSelfJoin to false to disable this check.
JVM stacktrace:
org.apache.spark.sql.AnalysisException: Column value_x#443L are ambiguous. It's probably because you joined several Datasets together, and some of these Datasets are the same. This column points to one of the Datasets but Spark is unable to figure out which one. Please alias the Datasets with different names via `Dataset.as` before joining them, and specify the column using qualified name, e.g. `df.as("a").join(df.as("b"), $"a.id" > $"b.id")`. You can also set spark.sql.analyzer.failAmbiguousSelfJoin to false to disable this check.
at org.apache.spark.sql.errors.QueryCompilationErrors$.ambiguousAttributesInSelfJoinError(QueryCompilationErrors.scala:1986)
at org.apache.spark.sql.execution.analysis.DetectAmbiguousSelfJoin$.apply(DetectAmbiguousSelfJoin.scala:161)
at org.apache.spark.sql.execution.analysis.DetectAmbiguousSelfJoin$.apply(DetectAmbiguousSelfJoin.scala:45)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:222)
at scala.collection.LinearSeqOps.foldLeft(LinearSeq.scala:183)
at scala.collection.LinearSeqOps.foldLeft$(LinearSeq.scala:179)
at scala.collection.immutable.List.foldLeft(List.scala:79)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:219)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:211)
at scala.collection.immutable.List.foreach(List.scala:333)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:211)
at org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$executeSameContext(Analyzer.scala:225)
at org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$execute$1(Analyzer.scala:221)
at org.apache.spark.sql.catalyst.analysis.AnalysisContext$.withNewAnalysisContext(Analyzer.scala:177)
at org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:221)
at org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:192)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:182)
at org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:89)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:182)
at org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:213)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:330)
at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:212)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$analyzed$1(QueryExecution.scala:88)
at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:138)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:230)
at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:557)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:230)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:918)
at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:229)
at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:88)
at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:85)
at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:69)
at org.apache.spark.sql.Dataset$.$anonfun$ofRows$1(Dataset.scala:93)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:918)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:91)
at org.apache.spark.sql.Dataset.withPlan(Dataset.scala:4474)
at org.apache.spark.sql.Dataset.$anonfun$select$1(Dataset.scala:1581)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:83)
at org.apache.spark.sql.package$.withOrigin(package.scala:110)
at org.apache.spark.sql.Dataset.select(Dataset.scala:1557)
at jdk.internal.reflect.GeneratedMethodAccessor21.invoke(Unknown Source)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:568)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
at java.base/java.lang.Thread.run(Thread.java:840)
Loading