Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

branch-3.0: [fix](hive) support query hive view created by spark #43552

Merged
merged 1 commit into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
use default;
create view test_view1 as select * from sale_table;
create view test_view2 as select * from default.sale_table;
create view test_view3 as select * from sale_table where bill_code="bill_code1";
create view test_view4 as select parquet_zstd_all_types.t_int, parquet_zstd_all_types.t_varchar from parquet_zstd_all_types join multi_catalog.parquet_all_types on parquet_zstd_all_types.t_varchar = parquet_all_types.t_varchar order by t_int limit 10;
create view unsupported_view as select bill_code from sale_table union all select t_varchar from multi_catalog.parquet_all_types order by bill_code limit 10;
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,13 @@ ls /mnt/scripts/create_preinstalled_scripts/*.hql | xargs -n 1 -P 10 -I {} bash
echo "Script: {} executed in $EXECUTION_TIME seconds"
'

# create view
START_TIME=$(date +%s)
hive -f /mnt/scripts/create_view_scripts/create_view.hql
END_TIME=$(date +%s)
EXECUTION_TIME=$((END_TIME - START_TIME))
echo "Script: create_view.hql executed in $EXECUTION_TIME seconds"

touch /mnt/SUCCESS

# Avoid container exit
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,10 @@ private boolean supportedHiveTable() {
if (remoteTable.getSd() == null) {
throw new NotSupportedException("remote table's storage descriptor is null");
}
// If this is hive view, no need to check file format.
if (remoteTable.isSetViewExpandedText() || remoteTable.isSetViewOriginalText()) {
return true;
}
String inputFileFormat = remoteTable.getSd().getInputFormat();
if (inputFileFormat == null) {
throw new NotSupportedException("remote table's storage input format is null");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -420,8 +420,10 @@ private LogicalPlan getLogicalPlan(TableIf table, UnboundRelation unboundRelatio
if (Config.enable_query_hive_views && hmsTable.isView()) {
isView = true;
String hiveCatalog = hmsTable.getCatalog().getName();
String hiveDb = hmsTable.getDatabase().getFullName();
String ddlSql = hmsTable.getViewText();
Plan hiveViewPlan = parseAndAnalyzeHiveView(hmsTable, hiveCatalog, ddlSql, cascadesContext);
Plan hiveViewPlan = parseAndAnalyzeHiveView(
hmsTable, hiveCatalog, hiveDb, ddlSql, cascadesContext);
return new LogicalSubQueryAlias<>(qualifiedTableName, hiveViewPlan);
}
if (hmsTable.getDlaType() == DLAType.HUDI) {
Expand Down Expand Up @@ -474,15 +476,20 @@ private LogicalPlan getLogicalPlan(TableIf table, UnboundRelation unboundRelatio
}

private Plan parseAndAnalyzeHiveView(
HMSExternalTable table, String hiveCatalog, String ddlSql, CascadesContext cascadesContext) {
HMSExternalTable table, String hiveCatalog, String hiveDb, String ddlSql, CascadesContext cascadesContext) {
ConnectContext ctx = cascadesContext.getConnectContext();
String previousCatalog = ctx.getCurrentCatalog().getName();
String previousDb = ctx.getDatabase();
// change catalog and db to hive catalog and db, so that we can parse and analyze the view sql in hive context.
ctx.changeDefaultCatalog(hiveCatalog);
Plan hiveViewPlan = parseAndAnalyzeView(table, ddlSql, cascadesContext);
ctx.changeDefaultCatalog(previousCatalog);
ctx.setDatabase(previousDb);
return hiveViewPlan;
ctx.setDatabase(hiveDb);
try {
return parseAndAnalyzeView(table, ddlSql, cascadesContext);
} finally {
// restore catalog and db in connect context
ctx.changeDefaultCatalog(previousCatalog);
ctx.setDatabase(previousDb);
}
}

private Plan parseAndAnalyzeView(TableIf view, String ddlSql, CascadesContext parentContext) {
Expand Down
133 changes: 133 additions & 0 deletions regression-test/data/external_table_p0/hive/test_hive_view.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !desc1 --
bill_code varchar(500) Yes true \N
dates varchar(500) Yes true \N
ord_year varchar(500) Yes true \N
ord_month varchar(500) Yes true \N
ord_quarter varchar(500) Yes true \N
on_time varchar(500) Yes true \N

-- !desc2 --
bill_code varchar(500) Yes true \N
dates varchar(500) Yes true \N
ord_year varchar(500) Yes true \N
ord_month varchar(500) Yes true \N
ord_quarter varchar(500) Yes true \N
on_time varchar(500) Yes true \N

-- !desc3 --
bill_code varchar(500) Yes true \N
dates varchar(500) Yes true \N
ord_year varchar(500) Yes true \N
ord_month varchar(500) Yes true \N
ord_quarter varchar(500) Yes true \N
on_time varchar(500) Yes true \N

-- !desc4 --
t_int int Yes true \N
t_varchar varchar(65535) Yes true \N

-- !sql1 --
bill_code dates ord_year ord_month ord_quarter on_time
bill_code1 dates2 ord_year3 ord_month4 ord_quarter5 on_time6

-- !sql2 --
dates
dates2

-- !sql3 --
1

-- !sql4 --
3 test
3 test
3 test
3 test
3 test
3 test
3 test
3 test
3 test
3 test

-- !sql5 --
3
3
3
3
3
3
3
3
3
3

-- !desc5 --
bill_code varchar(65535) Yes true \N

-- !desc1 --
bill_code varchar(500) Yes true \N
dates varchar(500) Yes true \N
ord_year varchar(500) Yes true \N
ord_month varchar(500) Yes true \N
ord_quarter varchar(500) Yes true \N
on_time varchar(500) Yes true \N

-- !desc2 --
bill_code varchar(500) Yes true \N
dates varchar(500) Yes true \N
ord_year varchar(500) Yes true \N
ord_month varchar(500) Yes true \N
ord_quarter varchar(500) Yes true \N
on_time varchar(500) Yes true \N

-- !desc3 --
bill_code varchar(500) Yes true \N
dates varchar(500) Yes true \N
ord_year varchar(500) Yes true \N
ord_month varchar(500) Yes true \N
ord_quarter varchar(500) Yes true \N
on_time varchar(500) Yes true \N

-- !desc4 --
t_int int Yes true \N
t_varchar varchar(65535) Yes true \N

-- !sql1 --
bill_code dates ord_year ord_month ord_quarter on_time
bill_code1 dates2 ord_year3 ord_month4 ord_quarter5 on_time6

-- !sql2 --
dates
dates2

-- !sql3 --
1

-- !sql4 --
3 test
3 test
3 test
3 test
3 test
3 test
3 test
3 test
3 test
3 test

-- !sql5 --
3
3
3
3
3
3
3
3
3
3

-- !desc5 --
bill_code varchar(65535) Yes true \N

Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("test_hive_view", "external_docker,hive,external_docker_hive,p0,external") {
String enabled = context.config.otherConfigs.get("enableHiveTest")
if (enabled == null || !enabled.equalsIgnoreCase("true")) {
logger.info("diable Hive test.")
return;
}

for (String hivePrefix : ["hive2", "hive3"]) {
setHivePrefix(hivePrefix)
String catalog_name = "test_${hivePrefix}_view"
String ex_db_name = "`default`"
String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
String hms_port = context.config.otherConfigs.get(hivePrefix + "HmsPort")
String hdfs_port = context.config.otherConfigs.get(hivePrefix + "HdfsPort")

sql """drop catalog if exists ${catalog_name} """

sql """CREATE CATALOG ${catalog_name} PROPERTIES (
'type'='hms',
'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}',
'hadoop.username' = 'hive'
);"""

sql """use ${catalog_name}.${ex_db_name}"""
qt_desc1 """desc test_view1"""
qt_desc2 """desc test_view2"""
qt_desc3 """desc test_view3"""
qt_desc4 """desc test_view4"""
// create view test_view1 as select * from sale_table;
order_qt_sql1 """ select * from test_view1 """
// create view test_view2 as select * from default.sale_table;
order_qt_sql2 """ select dates from test_view2 """
// create view test_view3 as select * from sale_table where bill_code="bill_code1";
order_qt_sql3 """ select count(*) from test_view3 """
// create view test_view4 as select parquet_zstd_all_types.t_int, parquet_zstd_all_types.t_varchar from parquet_zstd_all_types join multi_catalog.parquet_all_types on parquet_zstd_all_types.t_varchar = parquet_all_types.t_varchar order by t_int limit 10;
order_qt_sql4 """ select * from test_view4 """
order_qt_sql5 """ select test_view4.t_int from test_view4 join multi_catalog.parquet_all_types on test_view4.t_varchar = parquet_all_types.t_varchar order by test_view4.t_int limit 10; """

// check unsupported view
sql """set enable_fallback_to_original_planner=false;"""
test {
sql """select * from unsupported_view;"""
exception """Unknown column 'bill_code' in '_u2' in SORT clause"""
}
// pr #43530
qt_desc5 """desc unsupported_view;"""
}
}

Loading