diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 6942ef7201703..7997090e710a9 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -51,6 +51,8 @@ license: | - In Spark 3.1, the `schema_of_json` and `schema_of_csv` functions return the schema in the SQL format in which field names are quoted. In Spark 3.0, the function returns a catalog string without field quoting and in lower case. - In Spark 3.1, refreshing a table will trigger an uncache operation for all other caches that reference the table, even if the table itself is not cached. In Spark 3.0 the operation will only be triggered if the table itself is cached. + + - In Spark 3.1, creating or altering a view will capture runtime SQL configs and store them as view properties. These configs will be applied during the parsing and analysis phases of the view resolution. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.useCurrentConfigsForView` to `true`. ## Upgrading from Spark SQL 3.0 to 3.0.1 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 77c1dd9ebb7fa..dae496244c858 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1034,7 +1034,9 @@ class Analyzer(override val catalogManager: CatalogManager) s"avoid errors. Increase the value of ${SQLConf.MAX_NESTED_VIEW_DEPTH.key} to work " + "around this.") } - executeSameContext(child) + SQLConf.withExistingConf(View.effectiveSQLConf(desc.viewSQLConfigs)) { + executeSameContext(child) + } } view.copy(child = newChild) case p @ SubqueryAlias(_, view: View) => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 17ab6664df75c..5122ca7521d9a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -795,14 +795,19 @@ class SessionCatalog( if (metadata.tableType == CatalogTableType.VIEW) { val viewText = metadata.viewText.getOrElse(sys.error("Invalid view without text.")) - logDebug(s"'$viewText' will be used for the view($table).") + val viewConfigs = metadata.viewSQLConfigs + val viewPlan = SQLConf.withExistingConf(View.effectiveSQLConf(viewConfigs)) { + parser.parsePlan(viewText) + } + + logDebug(s"'$viewText' will be used for the view($table) with configs: $viewConfigs.") // The relation is a view, so we wrap the relation by: // 1. Add a [[View]] operator over the relation to keep track of the view desc; // 2. Wrap the logical plan in a [[SubqueryAlias]] which tracks the name of the view. val child = View( desc = metadata, output = metadata.schema.toAttributes, - child = parser.parsePlan(viewText)) + child = viewPlan) SubqueryAlias(multiParts, child) } else { SubqueryAlias(multiParts, UnresolvedCatalogRelation(metadata, options)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index ee7216e93ebb5..621ad84f1f5ec 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -305,6 +305,22 @@ case class CatalogTable( } } + /** + * Return the SQL configs of when the view was created, the configs are applied when parsing and + * analyzing the view, should be empty if the CatalogTable is not a View or created by older + * versions of Spark(before 3.1.0). + */ + def viewSQLConfigs: Map[String, String] = { + try { + for ((key, value) <- properties if key.startsWith(CatalogTable.VIEW_SQL_CONFIG_PREFIX)) + yield (key.substring(CatalogTable.VIEW_SQL_CONFIG_PREFIX.length), value) + } catch { + case e: Exception => + throw new AnalysisException( + "Corrupted view SQL configs in catalog", cause = Some(e)) + } + } + /** * Return the output column names of the query that creates a view, the column names are used to * resolve a view, should be empty if the CatalogTable is not a View or created by older versions @@ -411,6 +427,8 @@ object CatalogTable { props.toMap } + val VIEW_SQL_CONFIG_PREFIX = VIEW_PREFIX + "sqlConfig." + val VIEW_QUERY_OUTPUT_PREFIX = VIEW_PREFIX + "query.out." val VIEW_QUERY_OUTPUT_NUM_COLUMNS = VIEW_QUERY_OUTPUT_PREFIX + "numCols" val VIEW_QUERY_OUTPUT_COLUMN_NAME_PREFIX = VIEW_QUERY_OUTPUT_PREFIX + "col." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index c7108ea8ac74b..a524ed4ff73e9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -453,6 +453,22 @@ case class View( } } +object View { + def effectiveSQLConf(configs: Map[String, String]): SQLConf = { + val activeConf = SQLConf.get + if (activeConf.useCurrentSQLConfigsForView) return activeConf + + val sqlConf = new SQLConf() + for ((k, v) <- configs) { + sqlConf.settings.put(k, v) + } + // We should respect the current maxNestedViewDepth cause the view resolving are executed + // from top to down. + sqlConf.setConf(SQLConf.MAX_NESTED_VIEW_DEPTH, activeConf.maxNestedViewDepth) + sqlConf + } +} + /** * A container for holding named common table expressions (CTEs) and a query plan. * This operator will be removed during analysis and the relations will be substituted into child. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index add9a1d0f3aa6..b2c28ffa984a9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1481,6 +1481,15 @@ object SQLConf { "must be positive.") .createWithDefault(100) + val USE_CURRENT_SQL_CONFIGS_FOR_VIEW = + buildConf("spark.sql.legacy.useCurrentConfigsForView") + .internal() + .doc("When true, SQL Configs of the current active SparkSession instead of the captured " + + "ones will be applied during the parsing and analysis phases of the view resolution.") + .version("3.1.0") + .booleanConf + .createWithDefault(false) + val STREAMING_FILE_COMMIT_PROTOCOL_CLASS = buildConf("spark.sql.streaming.commitProtocolClass") .version("2.1.0") @@ -3415,6 +3424,8 @@ class SQLConf extends Serializable with Logging { def maxNestedViewDepth: Int = getConf(SQLConf.MAX_NESTED_VIEW_DEPTH) + def useCurrentSQLConfigsForView: Boolean = getConf(SQLConf.USE_CURRENT_SQL_CONFIGS_FOR_VIEW) + def starSchemaDetection: Boolean = getConf(STARSCHEMA_DETECTION) def starSchemaFTRatio: Double = getConf(STARSCHEMA_FACT_TABLE_RATIO) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index 43bc50522f2a8..a02f863a360f8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeRef import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, View} import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper -import org.apache.spark.sql.internal.StaticSQLConf +import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf} import org.apache.spark.sql.types.{BooleanType, MetadataBuilder, StringType} import org.apache.spark.sql.util.SchemaUtils @@ -334,6 +334,18 @@ case class ShowViewsCommand( object ViewHelper { + private val configPrefixDenyList = Seq( + SQLConf.MAX_NESTED_VIEW_DEPTH.key, + "spark.sql.optimizer.", + "spark.sql.codegen.", + "spark.sql.execution.", + "spark.sql.shuffle.", + "spark.sql.adaptive.") + + private def shouldCaptureConfig(key: String): Boolean = { + !configPrefixDenyList.exists(prefix => key.startsWith(prefix)) + } + import CatalogTable._ /** @@ -361,11 +373,37 @@ object ViewHelper { } } + /** + * Convert the view SQL configs to `properties`. + */ + private def sqlConfigsToProps(conf: SQLConf): Map[String, String] = { + val modifiedConfs = conf.getAllConfs.filter { case (k, _) => + conf.isModifiable(k) && shouldCaptureConfig(k) + } + val props = new mutable.HashMap[String, String] + for ((key, value) <- modifiedConfs) { + props.put(s"$VIEW_SQL_CONFIG_PREFIX$key", value) + } + props.toMap + } + + /** + * Remove the view SQL configs in `properties`. + */ + private def removeSQLConfigs(properties: Map[String, String]): Map[String, String] = { + // We can't use `filterKeys` here, as the map returned by `filterKeys` is not serializable, + // while `CatalogTable` should be serializable. + properties.filterNot { case (key, _) => + key.startsWith(VIEW_SQL_CONFIG_PREFIX) + } + } + /** * Generate the view properties in CatalogTable, including: * 1. view default database that is used to provide the default database name on view resolution. * 2. the output column names of the query that creates a view, this is used to map the output of * the view child to the view output during view resolution. + * 3. the SQL configs when creating the view. * * @param properties the `properties` in CatalogTable. * @param session the spark session. @@ -380,15 +418,18 @@ object ViewHelper { // for createViewCommand queryOutput may be different from fieldNames val queryOutput = analyzedPlan.schema.fieldNames + val conf = session.sessionState.conf + // Generate the query column names, throw an AnalysisException if there exists duplicate column // names. SchemaUtils.checkColumnNameDuplication( - fieldNames, "in the view definition", session.sessionState.conf.resolver) + fieldNames, "in the view definition", conf.resolver) - // Generate the view default catalog and namespace. + // Generate the view default catalog and namespace, as well as captured SQL configs. val manager = session.sessionState.catalogManager - removeQueryColumnNames(properties) ++ + removeSQLConfigs(removeQueryColumnNames(properties)) ++ catalogAndNamespaceToProps(manager.currentCatalog.name, manager.currentNamespace) ++ + sqlConfigsToProps(conf) ++ generateQueryColumnNames(queryOutput) } diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out index ae1cb2f171704..2fab32fa4b4eb 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out @@ -257,7 +257,7 @@ View Text SELECT * FROM base_table View Original Text SELECT * FROM base_table View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [a, id] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] -- !query @@ -313,7 +313,7 @@ View Text SELECT * FROM base_table View Original Text SELECT * FROM base_table View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [a, id] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] -- !query @@ -359,7 +359,7 @@ View Original Text SELECT t1.a AS t1_a, t2.a AS t2_a WHERE t1.id = t2.id View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [t1_a, t2_a] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=t1_a, view.query.out.numCols=2, view.query.out.col.1=t2_a, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=t1_a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=t2_a, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] -- !query @@ -413,7 +413,7 @@ View Text SELECT * FROM base_table WHERE id IN (SELECT id FROM base_t View Original Text SELECT * FROM base_table WHERE id IN (SELECT id FROM base_table2) View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [a, id] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] -- !query @@ -443,7 +443,7 @@ View Text SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM base_ View Original Text SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM base_table2) t2 View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [id, a] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=id, view.query.out.numCols=2, view.query.out.col.1=a, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=id, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=a, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] -- !query @@ -473,7 +473,7 @@ View Text SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM base_t View Original Text SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM base_table2) View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [a, id] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] -- !query @@ -503,7 +503,7 @@ View Text SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM ba View Original Text SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM base_table2) View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [a, id] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] -- !query @@ -533,7 +533,7 @@ View Text SELECT * FROM base_table WHERE EXISTS (SELECT 1) View Original Text SELECT * FROM base_table WHERE EXISTS (SELECT 1) View Catalog and Namespace spark_catalog.temp_view_test View Query Output Columns [a, id] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test] -- !query @@ -669,7 +669,7 @@ View Text SELECT * FROM t1 CROSS JOIN t2 View Original Text SELECT * FROM t1 CROSS JOIN t2 View Catalog and Namespace spark_catalog.testviewschm2 View Query Output Columns [num, name, num2, value] -Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2] +Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2] -- !query @@ -710,7 +710,7 @@ View Text SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2 View Original Text SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2 View Catalog and Namespace spark_catalog.testviewschm2 View Query Output Columns [num, name, num2, value] -Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2] +Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2] -- !query @@ -751,7 +751,7 @@ View Text SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 View Original Text SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 View Catalog and Namespace spark_catalog.testviewschm2 View Query Output Columns [num, name, num2, value] -Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2] +Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2] -- !query @@ -792,7 +792,7 @@ View Text SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND t2.va View Original Text SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND t2.value = 'xxx' View Catalog and Namespace spark_catalog.testviewschm2 View Query Output Columns [num, name, num2, value] -Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2] +Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2] -- !query @@ -894,7 +894,7 @@ BETWEEN (SELECT d FROM tbl2 WHERE c = 1) AND (SELECT e FROM tbl3 WHERE f = 2) AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f) View Catalog and Namespace spark_catalog.testviewschm2 View Query Output Columns [a, b] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2] -- !query @@ -933,7 +933,7 @@ AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f) AND NOT EXISTS (SELECT g FROM tbl4 LEFT JOIN tmptbl ON tbl4.h = tmptbl.j) View Catalog and Namespace spark_catalog.testviewschm2 View Query Output Columns [a, b] -Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2] +Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2] -- !query diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index d776198bc7470..0b19f706836be 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -21,7 +21,7 @@ import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.NoSuchTableException import org.apache.spark.sql.catalyst.parser.ParseException -import org.apache.spark.sql.internal.SQLConf.MAX_NESTED_VIEW_DEPTH +import org.apache.spark.sql.internal.SQLConf._ import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} class SimpleSQLViewSuite extends SQLViewSuite with SharedSparkSession @@ -762,4 +762,77 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { } } } + + test("SPARK-33141: view should be parsed and analyzed with configs set when creating") { + withTable("t") { + withView("v1", "v2", "v3", "v4", "v5") { + Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t") + sql("CREATE VIEW v1 (c1) AS SELECT C1 FROM t") + sql("CREATE VIEW v2 (c1) AS SELECT c1 FROM t ORDER BY 1 ASC, c1 DESC") + sql("CREATE VIEW v3 (c1, count) AS SELECT c1, count(c1) FROM t GROUP BY 1") + sql("CREATE VIEW v4 (a, count) AS SELECT c1 as a, count(c1) FROM t GROUP BY a") + sql("CREATE VIEW v5 (c1) AS SELECT 1/0") + + withSQLConf(CASE_SENSITIVE.key -> "true") { + checkAnswer(sql("SELECT * FROM v1"), Seq(Row(2), Row(3), Row(1))) + } + withSQLConf(ORDER_BY_ORDINAL.key -> "false") { + checkAnswer(sql("SELECT * FROM v2"), Seq(Row(1), Row(2), Row(3))) + } + withSQLConf(GROUP_BY_ORDINAL.key -> "false") { + checkAnswer(sql("SELECT * FROM v3"), + Seq(Row(1, 1), Row(2, 1), Row(3, 1))) + } + withSQLConf(GROUP_BY_ALIASES.key -> "false") { + checkAnswer(sql("SELECT * FROM v4"), + Seq(Row(1, 1), Row(2, 1), Row(3, 1))) + } + withSQLConf(ANSI_ENABLED.key -> "true") { + checkAnswer(sql("SELECT * FROM v5"), Seq(Row(null))) + } + + withSQLConf(USE_CURRENT_SQL_CONFIGS_FOR_VIEW.key -> "true") { + withSQLConf(CASE_SENSITIVE.key -> "true") { + val e = intercept[AnalysisException] { + sql("SELECT * FROM v1") + }.getMessage + assert(e.contains("cannot resolve '`C1`' given input columns: " + + "[spark_catalog.default.t.c1]")) + } + withSQLConf(ORDER_BY_ORDINAL.key -> "false") { + checkAnswer(sql("SELECT * FROM v2"), Seq(Row(3), Row(2), Row(1))) + } + withSQLConf(GROUP_BY_ORDINAL.key -> "false") { + val e = intercept[AnalysisException] { + sql("SELECT * FROM v3") + }.getMessage + assert(e.contains("expression 'spark_catalog.default.t.`c1`' is neither present " + + "in the group by, nor is it an aggregate function. Add to group by or wrap in " + + "first() (or first_value) if you don't care which value you get.")) + } + withSQLConf(GROUP_BY_ALIASES.key -> "false") { + val e = intercept[AnalysisException] { + sql("SELECT * FROM v4") + }.getMessage + assert(e.contains("cannot resolve '`a`' given input columns: " + + "[spark_catalog.default.t.c1]")) + } + withSQLConf(ANSI_ENABLED.key -> "true") { + val e = intercept[ArithmeticException] { + sql("SELECT * FROM v5").collect() + }.getMessage + assert(e.contains("divide by zero")) + } + } + + withSQLConf(ANSI_ENABLED.key -> "true") { + sql("ALTER VIEW v1 AS SELECT 1/0") + } + val e = intercept[ArithmeticException] { + sql("SELECT * FROM v1").collect() + }.getMessage + assert(e.contains("divide by zero")) + } + } + } }