diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out new file mode 100644 index 0000000000000..83fa42695d9db --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out @@ -0,0 +1,659 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +create temporary view data as select * from values + ("one", array(11, 12, 13), array(array(111, 112, 113), array(121, 122, 123))), + ("two", array(21, 22, 23), array(array(211, 212, 213), array(221, 222, 223))) + as data(a, b, c) +-- !query analysis +CreateViewCommand `data`, select * from values + ("one", array(11, 12, 13), array(array(111, 112, 113), array(121, 122, 123))), + ("two", array(21, 22, 23), array(array(211, 212, 213), array(221, 222, 223))) + as data(a, b, c), false, false, LocalTempView, true + +- Project [a#x, b#x, c#x] + +- SubqueryAlias data + +- LocalRelation [a#x, b#x, c#x] + + +-- !query +select * from data +-- !query analysis +Project [a#x, b#x, c#x] ++- SubqueryAlias data + +- View (`data`, [a#x,b#x,c#x]) + +- Project [cast(a#x as string) AS a#x, cast(b#x as array) AS b#x, cast(c#x as array>) AS c#x] + +- Project [a#x, b#x, c#x] + +- SubqueryAlias data + +- LocalRelation [a#x, b#x, c#x] + + +-- !query +select a, b[0], b[0] + b[1] from data +-- !query analysis +Project [a#x, b#x[0] AS b[0]#x, (b#x[0] + b#x[1]) AS (b[0] + b[1])#x] ++- SubqueryAlias data + +- View (`data`, [a#x,b#x,c#x]) + +- Project [cast(a#x as string) AS a#x, cast(b#x as array) AS b#x, cast(c#x as array>) AS c#x] + +- Project [a#x, b#x, c#x] + +- SubqueryAlias data + +- LocalRelation [a#x, b#x, c#x] + + +-- !query +select a, c[0][0] + c[0][0 + 1] from data +-- !query analysis +Project [a#x, (c#x[0][0] + c#x[0][(0 + 1)]) AS (c[0][0] + c[0][(0 + 1)])#x] ++- SubqueryAlias data + +- View (`data`, [a#x,b#x,c#x]) + +- Project [cast(a#x as string) AS a#x, cast(b#x as array) AS b#x, cast(c#x as array>) AS c#x] + +- Project [a#x, b#x, c#x] + +- SubqueryAlias data + +- LocalRelation [a#x, b#x, c#x] + + +-- !query +create temporary view primitive_arrays as select * from values ( + array(true), + array(2Y, 1Y), + array(2S, 1S), + array(2, 1), + array(2L, 1L), + array(9223372036854775809, 9223372036854775808), + array(2.0D, 1.0D), + array(float(2.0), float(1.0)), + array(date '2016-03-14', date '2016-03-13'), + array(timestamp '2016-11-15 20:54:00.000', timestamp '2016-11-12 20:54:00.000') +) as primitive_arrays( + boolean_array, + tinyint_array, + smallint_array, + int_array, + bigint_array, + decimal_array, + double_array, + float_array, + date_array, + timestamp_array +) +-- !query analysis +CreateViewCommand `primitive_arrays`, select * from values ( + array(true), + array(2Y, 1Y), + array(2S, 1S), + array(2, 1), + array(2L, 1L), + array(9223372036854775809, 9223372036854775808), + array(2.0D, 1.0D), + array(float(2.0), float(1.0)), + array(date '2016-03-14', date '2016-03-13'), + array(timestamp '2016-11-15 20:54:00.000', timestamp '2016-11-12 20:54:00.000') +) as primitive_arrays( + boolean_array, + tinyint_array, + smallint_array, + int_array, + bigint_array, + decimal_array, + double_array, + float_array, + date_array, + timestamp_array +), false, false, LocalTempView, true + +- Project [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x] + +- SubqueryAlias primitive_arrays + +- LocalRelation [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x] + + +-- !query +select * from primitive_arrays +-- !query analysis +Project [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x] ++- SubqueryAlias primitive_arrays + +- View (`primitive_arrays`, [boolean_array#x,tinyint_array#x,smallint_array#x,int_array#x,bigint_array#x,decimal_array#x,double_array#x,float_array#x,date_array#x,timestamp_array#x]) + +- Project [cast(boolean_array#x as array) AS boolean_array#x, cast(tinyint_array#x as array) AS tinyint_array#x, cast(smallint_array#x as array) AS smallint_array#x, cast(int_array#x as array) AS int_array#x, cast(bigint_array#x as array) AS bigint_array#x, cast(decimal_array#x as array) AS decimal_array#x, cast(double_array#x as array) AS double_array#x, cast(float_array#x as array) AS float_array#x, cast(date_array#x as array) AS date_array#x, cast(timestamp_array#x as array) AS timestamp_array#x] + +- Project [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x] + +- SubqueryAlias primitive_arrays + +- LocalRelation [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x] + + +-- !query +select + array_contains(boolean_array, true), array_contains(boolean_array, false), + array_contains(tinyint_array, 2Y), array_contains(tinyint_array, 0Y), + array_contains(smallint_array, 2S), array_contains(smallint_array, 0S), + array_contains(int_array, 2), array_contains(int_array, 0), + array_contains(bigint_array, 2L), array_contains(bigint_array, 0L), + array_contains(decimal_array, 9223372036854775809), array_contains(decimal_array, 1), + array_contains(double_array, 2.0D), array_contains(double_array, 0.0D), + array_contains(float_array, float(2.0)), array_contains(float_array, float(0.0)), + array_contains(date_array, date '2016-03-14'), array_contains(date_array, date '2016-01-01'), + array_contains(timestamp_array, timestamp '2016-11-15 20:54:00.000'), array_contains(timestamp_array, timestamp '2016-01-01 20:54:00.000') +from primitive_arrays +-- !query analysis +Project [array_contains(boolean_array#x, true) AS array_contains(boolean_array, true)#x, array_contains(boolean_array#x, false) AS array_contains(boolean_array, false)#x, array_contains(tinyint_array#x, 2) AS array_contains(tinyint_array, 2)#x, array_contains(tinyint_array#x, 0) AS array_contains(tinyint_array, 0)#x, array_contains(smallint_array#x, 2) AS array_contains(smallint_array, 2)#x, array_contains(smallint_array#x, 0) AS array_contains(smallint_array, 0)#x, array_contains(int_array#x, 2) AS array_contains(int_array, 2)#x, array_contains(int_array#x, 0) AS array_contains(int_array, 0)#x, array_contains(bigint_array#x, 2) AS array_contains(bigint_array, 2)#x, array_contains(bigint_array#x, 0) AS array_contains(bigint_array, 0)#x, array_contains(decimal_array#x, 9223372036854775809) AS array_contains(decimal_array, 9223372036854775809)#x, array_contains(decimal_array#x, cast(1 as decimal(19,0))) AS array_contains(decimal_array, 1)#x, array_contains(double_array#x, 2.0) AS array_contains(double_array, 2.0)#x, array_contains(double_array#x, 0.0) AS array_contains(double_array, 0.0)#x, array_contains(float_array#x, cast(2.0 as float)) AS array_contains(float_array, 2.0)#x, array_contains(float_array#x, cast(0.0 as float)) AS array_contains(float_array, 0.0)#x, array_contains(date_array#x, 2016-03-14) AS array_contains(date_array, DATE '2016-03-14')#x, array_contains(date_array#x, 2016-01-01) AS array_contains(date_array, DATE '2016-01-01')#x, array_contains(timestamp_array#x, 2016-11-15 20:54:00) AS array_contains(timestamp_array, TIMESTAMP '2016-11-15 20:54:00')#x, array_contains(timestamp_array#x, 2016-01-01 20:54:00) AS array_contains(timestamp_array, TIMESTAMP '2016-01-01 20:54:00')#x] ++- SubqueryAlias primitive_arrays + +- View (`primitive_arrays`, [boolean_array#x,tinyint_array#x,smallint_array#x,int_array#x,bigint_array#x,decimal_array#x,double_array#x,float_array#x,date_array#x,timestamp_array#x]) + +- Project [cast(boolean_array#x as array) AS boolean_array#x, cast(tinyint_array#x as array) AS tinyint_array#x, cast(smallint_array#x as array) AS smallint_array#x, cast(int_array#x as array) AS int_array#x, cast(bigint_array#x as array) AS bigint_array#x, cast(decimal_array#x as array) AS decimal_array#x, cast(double_array#x as array) AS double_array#x, cast(float_array#x as array) AS float_array#x, cast(date_array#x as array) AS date_array#x, cast(timestamp_array#x as array) AS timestamp_array#x] + +- Project [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x] + +- SubqueryAlias primitive_arrays + +- LocalRelation [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x] + + +-- !query +select array_contains(b, 11), array_contains(c, array(111, 112, 113)) from data +-- !query analysis +Project [array_contains(b#x, 11) AS array_contains(b, 11)#x, array_contains(c#x, array(111, 112, 113)) AS array_contains(c, array(111, 112, 113))#x] ++- SubqueryAlias data + +- View (`data`, [a#x,b#x,c#x]) + +- Project [cast(a#x as string) AS a#x, cast(b#x as array) AS b#x, cast(c#x as array>) AS c#x] + +- Project [a#x, b#x, c#x] + +- SubqueryAlias data + +- LocalRelation [a#x, b#x, c#x] + + +-- !query +select + sort_array(boolean_array), + sort_array(tinyint_array), + sort_array(smallint_array), + sort_array(int_array), + sort_array(bigint_array), + sort_array(decimal_array), + sort_array(double_array), + sort_array(float_array), + sort_array(date_array), + sort_array(timestamp_array) +from primitive_arrays +-- !query analysis +Project [sort_array(boolean_array#x, true) AS sort_array(boolean_array, true)#x, sort_array(tinyint_array#x, true) AS sort_array(tinyint_array, true)#x, sort_array(smallint_array#x, true) AS sort_array(smallint_array, true)#x, sort_array(int_array#x, true) AS sort_array(int_array, true)#x, sort_array(bigint_array#x, true) AS sort_array(bigint_array, true)#x, sort_array(decimal_array#x, true) AS sort_array(decimal_array, true)#x, sort_array(double_array#x, true) AS sort_array(double_array, true)#x, sort_array(float_array#x, true) AS sort_array(float_array, true)#x, sort_array(date_array#x, true) AS sort_array(date_array, true)#x, sort_array(timestamp_array#x, true) AS sort_array(timestamp_array, true)#x] ++- SubqueryAlias primitive_arrays + +- View (`primitive_arrays`, [boolean_array#x,tinyint_array#x,smallint_array#x,int_array#x,bigint_array#x,decimal_array#x,double_array#x,float_array#x,date_array#x,timestamp_array#x]) + +- Project [cast(boolean_array#x as array) AS boolean_array#x, cast(tinyint_array#x as array) AS tinyint_array#x, cast(smallint_array#x as array) AS smallint_array#x, cast(int_array#x as array) AS int_array#x, cast(bigint_array#x as array) AS bigint_array#x, cast(decimal_array#x as array) AS decimal_array#x, cast(double_array#x as array) AS double_array#x, cast(float_array#x as array) AS float_array#x, cast(date_array#x as array) AS date_array#x, cast(timestamp_array#x as array) AS timestamp_array#x] + +- Project [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x] + +- SubqueryAlias primitive_arrays + +- LocalRelation [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x] + + +-- !query +select sort_array(array('b', 'd'), '1') +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + "sqlState" : "42K09", + "messageParameters" : { + "inputSql" : "\"1\"", + "inputType" : "\"STRING\"", + "paramIndex" : "2", + "requiredType" : "\"BOOLEAN\"", + "sqlExpr" : "\"sort_array(array(b, d), 1)\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 39, + "fragment" : "sort_array(array('b', 'd'), '1')" + } ] +} + + +-- !query +select sort_array(array('b', 'd'), cast(NULL as boolean)) +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + "sqlState" : "42K09", + "messageParameters" : { + "inputSql" : "\"CAST(NULL AS BOOLEAN)\"", + "inputType" : "\"BOOLEAN\"", + "paramIndex" : "2", + "requiredType" : "\"BOOLEAN\"", + "sqlExpr" : "\"sort_array(array(b, d), CAST(NULL AS BOOLEAN))\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 57, + "fragment" : "sort_array(array('b', 'd'), cast(NULL as boolean))" + } ] +} + + +-- !query +select + size(boolean_array), + size(tinyint_array), + size(smallint_array), + size(int_array), + size(bigint_array), + size(decimal_array), + size(double_array), + size(float_array), + size(date_array), + size(timestamp_array) +from primitive_arrays +-- !query analysis +Project [size(boolean_array#x, true) AS size(boolean_array)#x, size(tinyint_array#x, true) AS size(tinyint_array)#x, size(smallint_array#x, true) AS size(smallint_array)#x, size(int_array#x, true) AS size(int_array)#x, size(bigint_array#x, true) AS size(bigint_array)#x, size(decimal_array#x, true) AS size(decimal_array)#x, size(double_array#x, true) AS size(double_array)#x, size(float_array#x, true) AS size(float_array)#x, size(date_array#x, true) AS size(date_array)#x, size(timestamp_array#x, true) AS size(timestamp_array)#x] ++- SubqueryAlias primitive_arrays + +- View (`primitive_arrays`, [boolean_array#x,tinyint_array#x,smallint_array#x,int_array#x,bigint_array#x,decimal_array#x,double_array#x,float_array#x,date_array#x,timestamp_array#x]) + +- Project [cast(boolean_array#x as array) AS boolean_array#x, cast(tinyint_array#x as array) AS tinyint_array#x, cast(smallint_array#x as array) AS smallint_array#x, cast(int_array#x as array) AS int_array#x, cast(bigint_array#x as array) AS bigint_array#x, cast(decimal_array#x as array) AS decimal_array#x, cast(double_array#x as array) AS double_array#x, cast(float_array#x as array) AS float_array#x, cast(date_array#x as array) AS date_array#x, cast(timestamp_array#x as array) AS timestamp_array#x] + +- Project [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x] + +- SubqueryAlias primitive_arrays + +- LocalRelation [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x] + + +-- !query +select element_at(array(1, 2, 3), 5) +-- !query analysis +Project [element_at(array(1, 2, 3), 5, None, false) AS element_at(array(1, 2, 3), 5)#x] ++- OneRowRelation + + +-- !query +select element_at(array(1, 2, 3), -5) +-- !query analysis +Project [element_at(array(1, 2, 3), -5, None, false) AS element_at(array(1, 2, 3), -5)#x] ++- OneRowRelation + + +-- !query +select element_at(array(1, 2, 3), 0) +-- !query analysis +Project [element_at(array(1, 2, 3), 0, None, false) AS element_at(array(1, 2, 3), 0)#x] ++- OneRowRelation + + +-- !query +select elt(4, '123', '456') +-- !query analysis +Project [elt(4, 123, 456, false) AS elt(4, 123, 456)#x] ++- OneRowRelation + + +-- !query +select elt(0, '123', '456') +-- !query analysis +Project [elt(0, 123, 456, false) AS elt(0, 123, 456)#x] ++- OneRowRelation + + +-- !query +select elt(-1, '123', '456') +-- !query analysis +Project [elt(-1, 123, 456, false) AS elt(-1, 123, 456)#x] ++- OneRowRelation + + +-- !query +select elt(null, '123', '456') +-- !query analysis +Project [elt(cast(null as int), 123, 456, false) AS elt(NULL, 123, 456)#x] ++- OneRowRelation + + +-- !query +select elt(null, '123', null) +-- !query analysis +Project [elt(cast(null as int), 123, cast(null as string), false) AS elt(NULL, 123, NULL)#x] ++- OneRowRelation + + +-- !query +select elt(1, '123', null) +-- !query analysis +Project [elt(1, 123, cast(null as string), false) AS elt(1, 123, NULL)#x] ++- OneRowRelation + + +-- !query +select elt(2, '123', null) +-- !query analysis +Project [elt(2, 123, cast(null as string), false) AS elt(2, 123, NULL)#x] ++- OneRowRelation + + +-- !query +select array(1, 2, 3)[5] +-- !query analysis +Project [array(1, 2, 3)[5] AS array(1, 2, 3)[5]#x] ++- OneRowRelation + + +-- !query +select array(1, 2, 3)[-1] +-- !query analysis +Project [array(1, 2, 3)[-1] AS array(1, 2, 3)[-1]#x] ++- OneRowRelation + + +-- !query +select array_size(array()) +-- !query analysis +Project [array_size(array()) AS array_size(array())#x] ++- OneRowRelation + + +-- !query +select array_size(array(true)) +-- !query analysis +Project [array_size(array(true)) AS array_size(array(true))#x] ++- OneRowRelation + + +-- !query +select array_size(array(2, 1)) +-- !query analysis +Project [array_size(array(2, 1)) AS array_size(array(2, 1))#x] ++- OneRowRelation + + +-- !query +select array_size(NULL) +-- !query analysis +Project [array_size(cast(null as array)) AS array_size(NULL)#x] ++- OneRowRelation + + +-- !query +select array_size(map('a', 1, 'b', 2)) +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + "sqlState" : "42K09", + "messageParameters" : { + "inputSql" : "\"map(a, 1, b, 2)\"", + "inputType" : "\"MAP\"", + "paramIndex" : "1", + "requiredType" : "\"ARRAY\"", + "sqlExpr" : "\"array_size(map(a, 1, b, 2))\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 38, + "fragment" : "array_size(map('a', 1, 'b', 2))" + } ] +} + + +-- !query +select size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10))) +-- !query analysis +Project [size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10), 0, 1, 2), true) AS size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10)))#x] ++- OneRowRelation + + +-- !query +select size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10))) +-- !query analysis +Project [size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10), 0, 1, 2, 3), true) AS size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10)))#x] ++- OneRowRelation + + +-- !query +select size(arrays_zip(array(1, 2, 3), array(4), null, array(7, 8, 9, 10))) +-- !query analysis +Project [size(arrays_zip(array(1, 2, 3), array(4), null, array(7, 8, 9, 10), 0, 1, 2, 3), true) AS size(arrays_zip(array(1, 2, 3), array(4), NULL, array(7, 8, 9, 10)))#x] ++- OneRowRelation + + +-- !query +select isnotnull(arrays_zip(array(), array(4), array(7, 8, 9, 10))) +-- !query analysis +Project [isnotnull(arrays_zip(array(), array(4), array(7, 8, 9, 10), 0, 1, 2)) AS (arrays_zip(array(), array(4), array(7, 8, 9, 10)) IS NOT NULL)#x] ++- OneRowRelation + + +-- !query +select isnotnull(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10))) +-- !query analysis +Project [isnotnull(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10), 0, 1, 2)) AS (arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10)) IS NOT NULL)#x] ++- OneRowRelation + + +-- !query +select isnotnull(arrays_zip(array(1, 2, 3), NULL, array(4), array(7, 8, 9, 10))) +-- !query analysis +Project [isnotnull(arrays_zip(array(1, 2, 3), null, array(4), array(7, 8, 9, 10), 0, 1, 2, 3)) AS (arrays_zip(array(1, 2, 3), NULL, array(4), array(7, 8, 9, 10)) IS NOT NULL)#x] ++- OneRowRelation + + +-- !query +select get(array(1, 2, 3), 0) +-- !query analysis +Project [get(array(1, 2, 3), 0) AS get(array(1, 2, 3), 0)#x] ++- OneRowRelation + + +-- !query +select get(array(1, 2, 3), 3) +-- !query analysis +Project [get(array(1, 2, 3), 3) AS get(array(1, 2, 3), 3)#x] ++- OneRowRelation + + +-- !query +select get(array(1, 2, 3), null) +-- !query analysis +Project [get(array(1, 2, 3), null) AS get(array(1, 2, 3), NULL)#x] ++- OneRowRelation + + +-- !query +select get(array(1, 2, 3), -1) +-- !query analysis +Project [get(array(1, 2, 3), -1) AS get(array(1, 2, 3), -1)#x] ++- OneRowRelation + + +-- !query +select array_insert(array(1, 2, 3), 3, 4) +-- !query analysis +Project [array_insert(array(1, 2, 3), 3, 4) AS array_insert(array(1, 2, 3), 3, 4)#x] ++- OneRowRelation + + +-- !query +select array_insert(array(2, 3, 4), 0, 1) +-- !query analysis +Project [array_insert(array(2, 3, 4), 0, 1) AS array_insert(array(2, 3, 4), 0, 1)#x] ++- OneRowRelation + + +-- !query +select array_insert(array(2, 3, 4), 1, 1) +-- !query analysis +Project [array_insert(array(2, 3, 4), 1, 1) AS array_insert(array(2, 3, 4), 1, 1)#x] ++- OneRowRelation + + +-- !query +select array_insert(array(1, 3, 4), -2, 2) +-- !query analysis +Project [array_insert(array(1, 3, 4), -2, 2) AS array_insert(array(1, 3, 4), -2, 2)#x] ++- OneRowRelation + + +-- !query +select array_insert(array(1, 2, 3), 3, "4") +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.ARRAY_FUNCTION_DIFF_TYPES", + "sqlState" : "42K09", + "messageParameters" : { + "dataType" : "\"ARRAY\"", + "functionName" : "`array_insert`", + "leftType" : "\"ARRAY\"", + "rightType" : "\"STRING\"", + "sqlExpr" : "\"array_insert(array(1, 2, 3), 3, 4)\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 43, + "fragment" : "array_insert(array(1, 2, 3), 3, \"4\")" + } ] +} + + +-- !query +select array_insert(cast(NULL as ARRAY), 1, 1) +-- !query analysis +Project [array_insert(cast(null as array), 1, 1) AS array_insert(NULL, 1, 1)#x] ++- OneRowRelation + + +-- !query +select array_insert(array(1, 2, 3, NULL), cast(NULL as INT), 4) +-- !query analysis +Project [array_insert(array(1, 2, 3, cast(null as int)), cast(null as int), 4) AS array_insert(array(1, 2, 3, NULL), CAST(NULL AS INT), 4)#x] ++- OneRowRelation + + +-- !query +select array_insert(array(1, 2, 3, NULL), 4, cast(NULL as INT)) +-- !query analysis +Project [array_insert(array(1, 2, 3, cast(null as int)), 4, cast(null as int)) AS array_insert(array(1, 2, 3, NULL), 4, CAST(NULL AS INT))#x] ++- OneRowRelation + + +-- !query +select array_insert(array(2, 3, NULL, 4), 5, 5) +-- !query analysis +Project [array_insert(array(2, 3, cast(null as int), 4), 5, 5) AS array_insert(array(2, 3, NULL, 4), 5, 5)#x] ++- OneRowRelation + + +-- !query +select array_insert(array(2, 3, NULL, 4), -5, 1) +-- !query analysis +Project [array_insert(array(2, 3, cast(null as int), 4), -5, 1) AS array_insert(array(2, 3, NULL, 4), -5, 1)#x] ++- OneRowRelation + + +-- !query +select array_compact(id) from values (1) as t(id) +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + "sqlState" : "42K09", + "messageParameters" : { + "inputSql" : "\"id\"", + "inputType" : "\"INT\"", + "paramIndex" : "1", + "requiredType" : "\"ARRAY\"", + "sqlExpr" : "\"array_compact(id)\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 24, + "fragment" : "array_compact(id)" + } ] +} + + +-- !query +select array_compact(array("1", null, "2", null)) +-- !query analysis +Project [array_compact(array(1, cast(null as string), 2, cast(null as string))) AS array_compact(array(1, NULL, 2, NULL))#x] ++- OneRowRelation + + +-- !query +select array_compact(array("a", "b", "c")) +-- !query analysis +Project [array_compact(array(a, b, c)) AS array_compact(array(a, b, c))#x] ++- OneRowRelation + + +-- !query +select array_compact(array(1D, null, 2D, null)) +-- !query analysis +Project [array_compact(array(1.0, cast(null as double), 2.0, cast(null as double))) AS array_compact(array(1.0, NULL, 2.0, NULL))#x] ++- OneRowRelation + + +-- !query +select array_compact(array(array(1, 2, 3, null), null, array(4, null, 6))) +-- !query analysis +Project [array_compact(array(array(1, 2, 3, cast(null as int)), cast(null as array), array(4, cast(null as int), 6))) AS array_compact(array(array(1, 2, 3, NULL), NULL, array(4, NULL, 6)))#x] ++- OneRowRelation + + +-- !query +select array_compact(array(null)) +-- !query analysis +Project [array_compact(array(null)) AS array_compact(array(NULL))#x] ++- OneRowRelation + + +-- !query +select array_append(array(1, 2, 3), 4) +-- !query analysis +Project [array_append(array(1, 2, 3), 4) AS array_append(array(1, 2, 3), 4)#x] ++- OneRowRelation + + +-- !query +select array_append(array('a', 'b', 'c'), 'd') +-- !query analysis +Project [array_append(array(a, b, c), d) AS array_append(array(a, b, c), d)#x] ++- OneRowRelation + + +-- !query +select array_append(array(1, 2, 3, NULL), NULL) +-- !query analysis +Project [array_append(array(1, 2, 3, cast(null as int)), cast(null as int)) AS array_append(array(1, 2, 3, NULL), NULL)#x] ++- OneRowRelation + + +-- !query +select array_append(array('a', 'b', 'c', NULL), NULL) +-- !query analysis +Project [array_append(array(a, b, c, cast(null as string)), cast(null as string)) AS array_append(array(a, b, c, NULL), NULL)#x] ++- OneRowRelation + + +-- !query +select array_append(CAST(null AS ARRAY), 'a') +-- !query analysis +Project [array_append(cast(null as array), a) AS array_append(NULL, a)#x] ++- OneRowRelation + + +-- !query +select array_append(CAST(null AS ARRAY), CAST(null as String)) +-- !query analysis +Project [array_append(cast(null as array), cast(null as string)) AS array_append(NULL, CAST(NULL AS STRING))#x] ++- OneRowRelation + + +-- !query +select array_append(array(), 1) +-- !query analysis +Project [array_append(cast(array() as array), 1) AS array_append(array(), 1)#x] ++- OneRowRelation + + +-- !query +select array_append(CAST(array() AS ARRAY), CAST(NULL AS String)) +-- !query analysis +Project [array_append(cast(array() as array), cast(null as string)) AS array_append(array(), CAST(NULL AS STRING))#x] ++- OneRowRelation + + +-- !query +select array_append(array(CAST(NULL AS String)), CAST(NULL AS String)) +-- !query analysis +Project [array_append(array(cast(null as string)), cast(null as string)) AS array_append(array(CAST(NULL AS STRING)), CAST(NULL AS STRING))#x] ++- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out new file mode 100644 index 0000000000000..83fa42695d9db --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out @@ -0,0 +1,659 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +create temporary view data as select * from values + ("one", array(11, 12, 13), array(array(111, 112, 113), array(121, 122, 123))), + ("two", array(21, 22, 23), array(array(211, 212, 213), array(221, 222, 223))) + as data(a, b, c) +-- !query analysis +CreateViewCommand `data`, select * from values + ("one", array(11, 12, 13), array(array(111, 112, 113), array(121, 122, 123))), + ("two", array(21, 22, 23), array(array(211, 212, 213), array(221, 222, 223))) + as data(a, b, c), false, false, LocalTempView, true + +- Project [a#x, b#x, c#x] + +- SubqueryAlias data + +- LocalRelation [a#x, b#x, c#x] + + +-- !query +select * from data +-- !query analysis +Project [a#x, b#x, c#x] ++- SubqueryAlias data + +- View (`data`, [a#x,b#x,c#x]) + +- Project [cast(a#x as string) AS a#x, cast(b#x as array) AS b#x, cast(c#x as array>) AS c#x] + +- Project [a#x, b#x, c#x] + +- SubqueryAlias data + +- LocalRelation [a#x, b#x, c#x] + + +-- !query +select a, b[0], b[0] + b[1] from data +-- !query analysis +Project [a#x, b#x[0] AS b[0]#x, (b#x[0] + b#x[1]) AS (b[0] + b[1])#x] ++- SubqueryAlias data + +- View (`data`, [a#x,b#x,c#x]) + +- Project [cast(a#x as string) AS a#x, cast(b#x as array) AS b#x, cast(c#x as array>) AS c#x] + +- Project [a#x, b#x, c#x] + +- SubqueryAlias data + +- LocalRelation [a#x, b#x, c#x] + + +-- !query +select a, c[0][0] + c[0][0 + 1] from data +-- !query analysis +Project [a#x, (c#x[0][0] + c#x[0][(0 + 1)]) AS (c[0][0] + c[0][(0 + 1)])#x] ++- SubqueryAlias data + +- View (`data`, [a#x,b#x,c#x]) + +- Project [cast(a#x as string) AS a#x, cast(b#x as array) AS b#x, cast(c#x as array>) AS c#x] + +- Project [a#x, b#x, c#x] + +- SubqueryAlias data + +- LocalRelation [a#x, b#x, c#x] + + +-- !query +create temporary view primitive_arrays as select * from values ( + array(true), + array(2Y, 1Y), + array(2S, 1S), + array(2, 1), + array(2L, 1L), + array(9223372036854775809, 9223372036854775808), + array(2.0D, 1.0D), + array(float(2.0), float(1.0)), + array(date '2016-03-14', date '2016-03-13'), + array(timestamp '2016-11-15 20:54:00.000', timestamp '2016-11-12 20:54:00.000') +) as primitive_arrays( + boolean_array, + tinyint_array, + smallint_array, + int_array, + bigint_array, + decimal_array, + double_array, + float_array, + date_array, + timestamp_array +) +-- !query analysis +CreateViewCommand `primitive_arrays`, select * from values ( + array(true), + array(2Y, 1Y), + array(2S, 1S), + array(2, 1), + array(2L, 1L), + array(9223372036854775809, 9223372036854775808), + array(2.0D, 1.0D), + array(float(2.0), float(1.0)), + array(date '2016-03-14', date '2016-03-13'), + array(timestamp '2016-11-15 20:54:00.000', timestamp '2016-11-12 20:54:00.000') +) as primitive_arrays( + boolean_array, + tinyint_array, + smallint_array, + int_array, + bigint_array, + decimal_array, + double_array, + float_array, + date_array, + timestamp_array +), false, false, LocalTempView, true + +- Project [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x] + +- SubqueryAlias primitive_arrays + +- LocalRelation [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x] + + +-- !query +select * from primitive_arrays +-- !query analysis +Project [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x] ++- SubqueryAlias primitive_arrays + +- View (`primitive_arrays`, [boolean_array#x,tinyint_array#x,smallint_array#x,int_array#x,bigint_array#x,decimal_array#x,double_array#x,float_array#x,date_array#x,timestamp_array#x]) + +- Project [cast(boolean_array#x as array) AS boolean_array#x, cast(tinyint_array#x as array) AS tinyint_array#x, cast(smallint_array#x as array) AS smallint_array#x, cast(int_array#x as array) AS int_array#x, cast(bigint_array#x as array) AS bigint_array#x, cast(decimal_array#x as array) AS decimal_array#x, cast(double_array#x as array) AS double_array#x, cast(float_array#x as array) AS float_array#x, cast(date_array#x as array) AS date_array#x, cast(timestamp_array#x as array) AS timestamp_array#x] + +- Project [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x] + +- SubqueryAlias primitive_arrays + +- LocalRelation [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x] + + +-- !query +select + array_contains(boolean_array, true), array_contains(boolean_array, false), + array_contains(tinyint_array, 2Y), array_contains(tinyint_array, 0Y), + array_contains(smallint_array, 2S), array_contains(smallint_array, 0S), + array_contains(int_array, 2), array_contains(int_array, 0), + array_contains(bigint_array, 2L), array_contains(bigint_array, 0L), + array_contains(decimal_array, 9223372036854775809), array_contains(decimal_array, 1), + array_contains(double_array, 2.0D), array_contains(double_array, 0.0D), + array_contains(float_array, float(2.0)), array_contains(float_array, float(0.0)), + array_contains(date_array, date '2016-03-14'), array_contains(date_array, date '2016-01-01'), + array_contains(timestamp_array, timestamp '2016-11-15 20:54:00.000'), array_contains(timestamp_array, timestamp '2016-01-01 20:54:00.000') +from primitive_arrays +-- !query analysis +Project [array_contains(boolean_array#x, true) AS array_contains(boolean_array, true)#x, array_contains(boolean_array#x, false) AS array_contains(boolean_array, false)#x, array_contains(tinyint_array#x, 2) AS array_contains(tinyint_array, 2)#x, array_contains(tinyint_array#x, 0) AS array_contains(tinyint_array, 0)#x, array_contains(smallint_array#x, 2) AS array_contains(smallint_array, 2)#x, array_contains(smallint_array#x, 0) AS array_contains(smallint_array, 0)#x, array_contains(int_array#x, 2) AS array_contains(int_array, 2)#x, array_contains(int_array#x, 0) AS array_contains(int_array, 0)#x, array_contains(bigint_array#x, 2) AS array_contains(bigint_array, 2)#x, array_contains(bigint_array#x, 0) AS array_contains(bigint_array, 0)#x, array_contains(decimal_array#x, 9223372036854775809) AS array_contains(decimal_array, 9223372036854775809)#x, array_contains(decimal_array#x, cast(1 as decimal(19,0))) AS array_contains(decimal_array, 1)#x, array_contains(double_array#x, 2.0) AS array_contains(double_array, 2.0)#x, array_contains(double_array#x, 0.0) AS array_contains(double_array, 0.0)#x, array_contains(float_array#x, cast(2.0 as float)) AS array_contains(float_array, 2.0)#x, array_contains(float_array#x, cast(0.0 as float)) AS array_contains(float_array, 0.0)#x, array_contains(date_array#x, 2016-03-14) AS array_contains(date_array, DATE '2016-03-14')#x, array_contains(date_array#x, 2016-01-01) AS array_contains(date_array, DATE '2016-01-01')#x, array_contains(timestamp_array#x, 2016-11-15 20:54:00) AS array_contains(timestamp_array, TIMESTAMP '2016-11-15 20:54:00')#x, array_contains(timestamp_array#x, 2016-01-01 20:54:00) AS array_contains(timestamp_array, TIMESTAMP '2016-01-01 20:54:00')#x] ++- SubqueryAlias primitive_arrays + +- View (`primitive_arrays`, [boolean_array#x,tinyint_array#x,smallint_array#x,int_array#x,bigint_array#x,decimal_array#x,double_array#x,float_array#x,date_array#x,timestamp_array#x]) + +- Project [cast(boolean_array#x as array) AS boolean_array#x, cast(tinyint_array#x as array) AS tinyint_array#x, cast(smallint_array#x as array) AS smallint_array#x, cast(int_array#x as array) AS int_array#x, cast(bigint_array#x as array) AS bigint_array#x, cast(decimal_array#x as array) AS decimal_array#x, cast(double_array#x as array) AS double_array#x, cast(float_array#x as array) AS float_array#x, cast(date_array#x as array) AS date_array#x, cast(timestamp_array#x as array) AS timestamp_array#x] + +- Project [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x] + +- SubqueryAlias primitive_arrays + +- LocalRelation [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x] + + +-- !query +select array_contains(b, 11), array_contains(c, array(111, 112, 113)) from data +-- !query analysis +Project [array_contains(b#x, 11) AS array_contains(b, 11)#x, array_contains(c#x, array(111, 112, 113)) AS array_contains(c, array(111, 112, 113))#x] ++- SubqueryAlias data + +- View (`data`, [a#x,b#x,c#x]) + +- Project [cast(a#x as string) AS a#x, cast(b#x as array) AS b#x, cast(c#x as array>) AS c#x] + +- Project [a#x, b#x, c#x] + +- SubqueryAlias data + +- LocalRelation [a#x, b#x, c#x] + + +-- !query +select + sort_array(boolean_array), + sort_array(tinyint_array), + sort_array(smallint_array), + sort_array(int_array), + sort_array(bigint_array), + sort_array(decimal_array), + sort_array(double_array), + sort_array(float_array), + sort_array(date_array), + sort_array(timestamp_array) +from primitive_arrays +-- !query analysis +Project [sort_array(boolean_array#x, true) AS sort_array(boolean_array, true)#x, sort_array(tinyint_array#x, true) AS sort_array(tinyint_array, true)#x, sort_array(smallint_array#x, true) AS sort_array(smallint_array, true)#x, sort_array(int_array#x, true) AS sort_array(int_array, true)#x, sort_array(bigint_array#x, true) AS sort_array(bigint_array, true)#x, sort_array(decimal_array#x, true) AS sort_array(decimal_array, true)#x, sort_array(double_array#x, true) AS sort_array(double_array, true)#x, sort_array(float_array#x, true) AS sort_array(float_array, true)#x, sort_array(date_array#x, true) AS sort_array(date_array, true)#x, sort_array(timestamp_array#x, true) AS sort_array(timestamp_array, true)#x] ++- SubqueryAlias primitive_arrays + +- View (`primitive_arrays`, [boolean_array#x,tinyint_array#x,smallint_array#x,int_array#x,bigint_array#x,decimal_array#x,double_array#x,float_array#x,date_array#x,timestamp_array#x]) + +- Project [cast(boolean_array#x as array) AS boolean_array#x, cast(tinyint_array#x as array) AS tinyint_array#x, cast(smallint_array#x as array) AS smallint_array#x, cast(int_array#x as array) AS int_array#x, cast(bigint_array#x as array) AS bigint_array#x, cast(decimal_array#x as array) AS decimal_array#x, cast(double_array#x as array) AS double_array#x, cast(float_array#x as array) AS float_array#x, cast(date_array#x as array) AS date_array#x, cast(timestamp_array#x as array) AS timestamp_array#x] + +- Project [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x] + +- SubqueryAlias primitive_arrays + +- LocalRelation [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x] + + +-- !query +select sort_array(array('b', 'd'), '1') +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + "sqlState" : "42K09", + "messageParameters" : { + "inputSql" : "\"1\"", + "inputType" : "\"STRING\"", + "paramIndex" : "2", + "requiredType" : "\"BOOLEAN\"", + "sqlExpr" : "\"sort_array(array(b, d), 1)\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 39, + "fragment" : "sort_array(array('b', 'd'), '1')" + } ] +} + + +-- !query +select sort_array(array('b', 'd'), cast(NULL as boolean)) +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + "sqlState" : "42K09", + "messageParameters" : { + "inputSql" : "\"CAST(NULL AS BOOLEAN)\"", + "inputType" : "\"BOOLEAN\"", + "paramIndex" : "2", + "requiredType" : "\"BOOLEAN\"", + "sqlExpr" : "\"sort_array(array(b, d), CAST(NULL AS BOOLEAN))\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 57, + "fragment" : "sort_array(array('b', 'd'), cast(NULL as boolean))" + } ] +} + + +-- !query +select + size(boolean_array), + size(tinyint_array), + size(smallint_array), + size(int_array), + size(bigint_array), + size(decimal_array), + size(double_array), + size(float_array), + size(date_array), + size(timestamp_array) +from primitive_arrays +-- !query analysis +Project [size(boolean_array#x, true) AS size(boolean_array)#x, size(tinyint_array#x, true) AS size(tinyint_array)#x, size(smallint_array#x, true) AS size(smallint_array)#x, size(int_array#x, true) AS size(int_array)#x, size(bigint_array#x, true) AS size(bigint_array)#x, size(decimal_array#x, true) AS size(decimal_array)#x, size(double_array#x, true) AS size(double_array)#x, size(float_array#x, true) AS size(float_array)#x, size(date_array#x, true) AS size(date_array)#x, size(timestamp_array#x, true) AS size(timestamp_array)#x] ++- SubqueryAlias primitive_arrays + +- View (`primitive_arrays`, [boolean_array#x,tinyint_array#x,smallint_array#x,int_array#x,bigint_array#x,decimal_array#x,double_array#x,float_array#x,date_array#x,timestamp_array#x]) + +- Project [cast(boolean_array#x as array) AS boolean_array#x, cast(tinyint_array#x as array) AS tinyint_array#x, cast(smallint_array#x as array) AS smallint_array#x, cast(int_array#x as array) AS int_array#x, cast(bigint_array#x as array) AS bigint_array#x, cast(decimal_array#x as array) AS decimal_array#x, cast(double_array#x as array) AS double_array#x, cast(float_array#x as array) AS float_array#x, cast(date_array#x as array) AS date_array#x, cast(timestamp_array#x as array) AS timestamp_array#x] + +- Project [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x] + +- SubqueryAlias primitive_arrays + +- LocalRelation [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x] + + +-- !query +select element_at(array(1, 2, 3), 5) +-- !query analysis +Project [element_at(array(1, 2, 3), 5, None, false) AS element_at(array(1, 2, 3), 5)#x] ++- OneRowRelation + + +-- !query +select element_at(array(1, 2, 3), -5) +-- !query analysis +Project [element_at(array(1, 2, 3), -5, None, false) AS element_at(array(1, 2, 3), -5)#x] ++- OneRowRelation + + +-- !query +select element_at(array(1, 2, 3), 0) +-- !query analysis +Project [element_at(array(1, 2, 3), 0, None, false) AS element_at(array(1, 2, 3), 0)#x] ++- OneRowRelation + + +-- !query +select elt(4, '123', '456') +-- !query analysis +Project [elt(4, 123, 456, false) AS elt(4, 123, 456)#x] ++- OneRowRelation + + +-- !query +select elt(0, '123', '456') +-- !query analysis +Project [elt(0, 123, 456, false) AS elt(0, 123, 456)#x] ++- OneRowRelation + + +-- !query +select elt(-1, '123', '456') +-- !query analysis +Project [elt(-1, 123, 456, false) AS elt(-1, 123, 456)#x] ++- OneRowRelation + + +-- !query +select elt(null, '123', '456') +-- !query analysis +Project [elt(cast(null as int), 123, 456, false) AS elt(NULL, 123, 456)#x] ++- OneRowRelation + + +-- !query +select elt(null, '123', null) +-- !query analysis +Project [elt(cast(null as int), 123, cast(null as string), false) AS elt(NULL, 123, NULL)#x] ++- OneRowRelation + + +-- !query +select elt(1, '123', null) +-- !query analysis +Project [elt(1, 123, cast(null as string), false) AS elt(1, 123, NULL)#x] ++- OneRowRelation + + +-- !query +select elt(2, '123', null) +-- !query analysis +Project [elt(2, 123, cast(null as string), false) AS elt(2, 123, NULL)#x] ++- OneRowRelation + + +-- !query +select array(1, 2, 3)[5] +-- !query analysis +Project [array(1, 2, 3)[5] AS array(1, 2, 3)[5]#x] ++- OneRowRelation + + +-- !query +select array(1, 2, 3)[-1] +-- !query analysis +Project [array(1, 2, 3)[-1] AS array(1, 2, 3)[-1]#x] ++- OneRowRelation + + +-- !query +select array_size(array()) +-- !query analysis +Project [array_size(array()) AS array_size(array())#x] ++- OneRowRelation + + +-- !query +select array_size(array(true)) +-- !query analysis +Project [array_size(array(true)) AS array_size(array(true))#x] ++- OneRowRelation + + +-- !query +select array_size(array(2, 1)) +-- !query analysis +Project [array_size(array(2, 1)) AS array_size(array(2, 1))#x] ++- OneRowRelation + + +-- !query +select array_size(NULL) +-- !query analysis +Project [array_size(cast(null as array)) AS array_size(NULL)#x] ++- OneRowRelation + + +-- !query +select array_size(map('a', 1, 'b', 2)) +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + "sqlState" : "42K09", + "messageParameters" : { + "inputSql" : "\"map(a, 1, b, 2)\"", + "inputType" : "\"MAP\"", + "paramIndex" : "1", + "requiredType" : "\"ARRAY\"", + "sqlExpr" : "\"array_size(map(a, 1, b, 2))\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 38, + "fragment" : "array_size(map('a', 1, 'b', 2))" + } ] +} + + +-- !query +select size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10))) +-- !query analysis +Project [size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10), 0, 1, 2), true) AS size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10)))#x] ++- OneRowRelation + + +-- !query +select size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10))) +-- !query analysis +Project [size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10), 0, 1, 2, 3), true) AS size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10)))#x] ++- OneRowRelation + + +-- !query +select size(arrays_zip(array(1, 2, 3), array(4), null, array(7, 8, 9, 10))) +-- !query analysis +Project [size(arrays_zip(array(1, 2, 3), array(4), null, array(7, 8, 9, 10), 0, 1, 2, 3), true) AS size(arrays_zip(array(1, 2, 3), array(4), NULL, array(7, 8, 9, 10)))#x] ++- OneRowRelation + + +-- !query +select isnotnull(arrays_zip(array(), array(4), array(7, 8, 9, 10))) +-- !query analysis +Project [isnotnull(arrays_zip(array(), array(4), array(7, 8, 9, 10), 0, 1, 2)) AS (arrays_zip(array(), array(4), array(7, 8, 9, 10)) IS NOT NULL)#x] ++- OneRowRelation + + +-- !query +select isnotnull(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10))) +-- !query analysis +Project [isnotnull(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10), 0, 1, 2)) AS (arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10)) IS NOT NULL)#x] ++- OneRowRelation + + +-- !query +select isnotnull(arrays_zip(array(1, 2, 3), NULL, array(4), array(7, 8, 9, 10))) +-- !query analysis +Project [isnotnull(arrays_zip(array(1, 2, 3), null, array(4), array(7, 8, 9, 10), 0, 1, 2, 3)) AS (arrays_zip(array(1, 2, 3), NULL, array(4), array(7, 8, 9, 10)) IS NOT NULL)#x] ++- OneRowRelation + + +-- !query +select get(array(1, 2, 3), 0) +-- !query analysis +Project [get(array(1, 2, 3), 0) AS get(array(1, 2, 3), 0)#x] ++- OneRowRelation + + +-- !query +select get(array(1, 2, 3), 3) +-- !query analysis +Project [get(array(1, 2, 3), 3) AS get(array(1, 2, 3), 3)#x] ++- OneRowRelation + + +-- !query +select get(array(1, 2, 3), null) +-- !query analysis +Project [get(array(1, 2, 3), null) AS get(array(1, 2, 3), NULL)#x] ++- OneRowRelation + + +-- !query +select get(array(1, 2, 3), -1) +-- !query analysis +Project [get(array(1, 2, 3), -1) AS get(array(1, 2, 3), -1)#x] ++- OneRowRelation + + +-- !query +select array_insert(array(1, 2, 3), 3, 4) +-- !query analysis +Project [array_insert(array(1, 2, 3), 3, 4) AS array_insert(array(1, 2, 3), 3, 4)#x] ++- OneRowRelation + + +-- !query +select array_insert(array(2, 3, 4), 0, 1) +-- !query analysis +Project [array_insert(array(2, 3, 4), 0, 1) AS array_insert(array(2, 3, 4), 0, 1)#x] ++- OneRowRelation + + +-- !query +select array_insert(array(2, 3, 4), 1, 1) +-- !query analysis +Project [array_insert(array(2, 3, 4), 1, 1) AS array_insert(array(2, 3, 4), 1, 1)#x] ++- OneRowRelation + + +-- !query +select array_insert(array(1, 3, 4), -2, 2) +-- !query analysis +Project [array_insert(array(1, 3, 4), -2, 2) AS array_insert(array(1, 3, 4), -2, 2)#x] ++- OneRowRelation + + +-- !query +select array_insert(array(1, 2, 3), 3, "4") +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.ARRAY_FUNCTION_DIFF_TYPES", + "sqlState" : "42K09", + "messageParameters" : { + "dataType" : "\"ARRAY\"", + "functionName" : "`array_insert`", + "leftType" : "\"ARRAY\"", + "rightType" : "\"STRING\"", + "sqlExpr" : "\"array_insert(array(1, 2, 3), 3, 4)\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 43, + "fragment" : "array_insert(array(1, 2, 3), 3, \"4\")" + } ] +} + + +-- !query +select array_insert(cast(NULL as ARRAY), 1, 1) +-- !query analysis +Project [array_insert(cast(null as array), 1, 1) AS array_insert(NULL, 1, 1)#x] ++- OneRowRelation + + +-- !query +select array_insert(array(1, 2, 3, NULL), cast(NULL as INT), 4) +-- !query analysis +Project [array_insert(array(1, 2, 3, cast(null as int)), cast(null as int), 4) AS array_insert(array(1, 2, 3, NULL), CAST(NULL AS INT), 4)#x] ++- OneRowRelation + + +-- !query +select array_insert(array(1, 2, 3, NULL), 4, cast(NULL as INT)) +-- !query analysis +Project [array_insert(array(1, 2, 3, cast(null as int)), 4, cast(null as int)) AS array_insert(array(1, 2, 3, NULL), 4, CAST(NULL AS INT))#x] ++- OneRowRelation + + +-- !query +select array_insert(array(2, 3, NULL, 4), 5, 5) +-- !query analysis +Project [array_insert(array(2, 3, cast(null as int), 4), 5, 5) AS array_insert(array(2, 3, NULL, 4), 5, 5)#x] ++- OneRowRelation + + +-- !query +select array_insert(array(2, 3, NULL, 4), -5, 1) +-- !query analysis +Project [array_insert(array(2, 3, cast(null as int), 4), -5, 1) AS array_insert(array(2, 3, NULL, 4), -5, 1)#x] ++- OneRowRelation + + +-- !query +select array_compact(id) from values (1) as t(id) +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + "sqlState" : "42K09", + "messageParameters" : { + "inputSql" : "\"id\"", + "inputType" : "\"INT\"", + "paramIndex" : "1", + "requiredType" : "\"ARRAY\"", + "sqlExpr" : "\"array_compact(id)\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 24, + "fragment" : "array_compact(id)" + } ] +} + + +-- !query +select array_compact(array("1", null, "2", null)) +-- !query analysis +Project [array_compact(array(1, cast(null as string), 2, cast(null as string))) AS array_compact(array(1, NULL, 2, NULL))#x] ++- OneRowRelation + + +-- !query +select array_compact(array("a", "b", "c")) +-- !query analysis +Project [array_compact(array(a, b, c)) AS array_compact(array(a, b, c))#x] ++- OneRowRelation + + +-- !query +select array_compact(array(1D, null, 2D, null)) +-- !query analysis +Project [array_compact(array(1.0, cast(null as double), 2.0, cast(null as double))) AS array_compact(array(1.0, NULL, 2.0, NULL))#x] ++- OneRowRelation + + +-- !query +select array_compact(array(array(1, 2, 3, null), null, array(4, null, 6))) +-- !query analysis +Project [array_compact(array(array(1, 2, 3, cast(null as int)), cast(null as array), array(4, cast(null as int), 6))) AS array_compact(array(array(1, 2, 3, NULL), NULL, array(4, NULL, 6)))#x] ++- OneRowRelation + + +-- !query +select array_compact(array(null)) +-- !query analysis +Project [array_compact(array(null)) AS array_compact(array(NULL))#x] ++- OneRowRelation + + +-- !query +select array_append(array(1, 2, 3), 4) +-- !query analysis +Project [array_append(array(1, 2, 3), 4) AS array_append(array(1, 2, 3), 4)#x] ++- OneRowRelation + + +-- !query +select array_append(array('a', 'b', 'c'), 'd') +-- !query analysis +Project [array_append(array(a, b, c), d) AS array_append(array(a, b, c), d)#x] ++- OneRowRelation + + +-- !query +select array_append(array(1, 2, 3, NULL), NULL) +-- !query analysis +Project [array_append(array(1, 2, 3, cast(null as int)), cast(null as int)) AS array_append(array(1, 2, 3, NULL), NULL)#x] ++- OneRowRelation + + +-- !query +select array_append(array('a', 'b', 'c', NULL), NULL) +-- !query analysis +Project [array_append(array(a, b, c, cast(null as string)), cast(null as string)) AS array_append(array(a, b, c, NULL), NULL)#x] ++- OneRowRelation + + +-- !query +select array_append(CAST(null AS ARRAY), 'a') +-- !query analysis +Project [array_append(cast(null as array), a) AS array_append(NULL, a)#x] ++- OneRowRelation + + +-- !query +select array_append(CAST(null AS ARRAY), CAST(null as String)) +-- !query analysis +Project [array_append(cast(null as array), cast(null as string)) AS array_append(NULL, CAST(NULL AS STRING))#x] ++- OneRowRelation + + +-- !query +select array_append(array(), 1) +-- !query analysis +Project [array_append(cast(array() as array), 1) AS array_append(array(), 1)#x] ++- OneRowRelation + + +-- !query +select array_append(CAST(array() AS ARRAY), CAST(NULL AS String)) +-- !query analysis +Project [array_append(cast(array() as array), cast(null as string)) AS array_append(array(), CAST(NULL AS STRING))#x] ++- OneRowRelation + + +-- !query +select array_append(array(CAST(NULL AS String)), CAST(NULL AS String)) +-- !query analysis +Project [array_append(array(cast(null as string)), cast(null as string)) AS array_append(array(CAST(NULL AS STRING)), CAST(NULL AS STRING))#x] ++- OneRowRelation diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala index 25f3983090625..10ef2a32accc9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala @@ -22,6 +22,7 @@ import scala.util.control.NonFatal import org.apache.spark.{SparkException, SparkThrowable} import org.apache.spark.ErrorMessageFormat.MINIMAL import org.apache.spark.SparkThrowableHelper.getMessage +import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.execution.HiveResult.hiveResultString @@ -29,12 +30,14 @@ import org.apache.spark.sql.execution.SQLExecution import org.apache.spark.sql.execution.command.{DescribeColumnCommand, DescribeCommandBase} import org.apache.spark.sql.types.StructType -trait SQLQueryTestHelper { +trait SQLQueryTestHelper extends Logging { private val notIncludedMsg = "[not included in comparison]" private val clsName = this.getClass.getCanonicalName protected val emptySchema = StructType(Seq.empty).catalogString + protected val validFileExtensions = ".sql" + protected def replaceNotIncludedMsg(line: String): String = { line.replaceAll("#\\d+", "#x") .replaceAll("plan_id=\\d+", "plan_id=x") @@ -50,8 +53,21 @@ trait SQLQueryTestHelper { } + /** + * Analyzes a query and returns the result as (schema of the output, normalized resolved plan + * tree string representation). + */ + protected def getNormalizedQueryAnalysisResult( + session: SparkSession, sql: String): (String, Seq[String]) = { + val df = session.sql(sql) + val schema = df.schema.catalogString + // Get the answer, but also get rid of the #1234 expression IDs that show up in analyzer plans. + (schema, Seq(replaceNotIncludedMsg(df.queryExecution.analyzed.toString))) + } + /** Executes a query and returns the result as (schema of the output, normalized output). */ - protected def getNormalizedResult(session: SparkSession, sql: String): (String, Seq[String]) = { + protected def getNormalizedQueryExecutionResult( + session: SparkSession, sql: String): (String, Seq[String]) = { // Returns true if the plan is supposed to be sorted. def isSorted(plan: LogicalPlan): Boolean = plan match { case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 2dabcf01be7ed..5b6d587cb8df6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -120,6 +120,15 @@ import org.apache.spark.util.Utils * * Therefore, UDF test cases should have single input and output files but executed by three * different types of UDFs. See 'udf/udf-inner-join.sql' as an example. + * + * This test suite also implements end-to-end test cases using golden files for the purposes of + * exercising the analysis of SQL queries. The output of each test case for this suite is the string + * representation of the logical plan returned as output from the analyzer, rather than the result + * data from executing the query end-to-end. + * + * Each case has a golden result file in "spark/sql/core/src/test/resources/sql-tests/analyzer-results". + * Only input filenames in the "analyzerTestCaseList" below are included for this type of testing. + * In the future, we may expand the coverage to all of the input test files instead. */ // scalastyle:on line.size.limit @ExtendedSQLTest @@ -137,8 +146,8 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper protected val inputFilePath = new File(baseResourcePath, "inputs").getAbsolutePath protected val goldenFilePath = new File(baseResourcePath, "results").getAbsolutePath - - protected val validFileExtensions = ".sql" + protected val analyzerGoldenFilePath = + new File(baseResourcePath, "analyzer-results").getAbsolutePath protected override def sparkConf: SparkConf = super.sparkConf // Fewer shuffle partitions to speed up testing. @@ -161,18 +170,8 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper // Create all the test cases. listTestCases.foreach(createScalaTestCase) - /** A single SQL query's output. */ - protected case class QueryOutput(sql: String, schema: String, output: String) { - override def toString: String = { - // We are explicitly not using multi-line string due to stripMargin removing "|" in output. - s"-- !query\n" + - sql + "\n" + - s"-- !query schema\n" + - schema + "\n" + - s"-- !query output\n" + - output - } - } + /** List of test cases to perform analyzer tests for. */ + protected def analyzerTestCaseList = Seq("array.sql") /** A test case. */ protected trait TestCase { @@ -187,14 +186,13 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper */ protected trait PgSQLTest - /** - * traits that indicate ANSI-related tests with the ANSI mode enabled. - */ + /** Trait that indicates ANSI-related tests with the ANSI mode enabled. */ protected trait AnsiTest - /** - * traits that indicate the default timestamp type is TimestampNTZType. - */ + /** Trait that indicates an analyzer test that shows the analyzed plan string as output. */ + protected trait AnalyzerTest + + /** Trait that indicates the default timestamp type is TimestampNTZType. */ protected trait TimestampNTZTest protected trait UDFTest { @@ -205,6 +203,14 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper protected case class RegularTestCase( name: String, inputFile: String, resultFile: String) extends TestCase + /** An ANSI-related test case. */ + protected case class AnsiTestCase( + name: String, inputFile: String, resultFile: String) extends TestCase with AnsiTest + + /** An analyzer test that shows the analyzed plan string as output. */ + protected case class AnalyzerTestCase( + name: String, inputFile: String, resultFile: String) extends TestCase with AnalyzerTest + /** A PostgreSQL test case. */ protected case class PgSQLTestCase( name: String, inputFile: String, resultFile: String) extends TestCase with PgSQLTest @@ -230,10 +236,6 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper resultFile: String, udf: TestUDF) extends TestCase with UDFTest with PgSQLTest - /** An ANSI-related test case. */ - protected case class AnsiTestCase( - name: String, inputFile: String, resultFile: String) extends TestCase with AnsiTest - /** An date time test case with default timestamp as TimestampNTZType */ protected case class TimestampNTZTestCase( name: String, inputFile: String, resultFile: String) extends TestCase with TimestampNTZTest @@ -266,13 +268,13 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper case _ => // Create a test case to run this case. test(testCase.name) { - runTest(testCase) + runSqlTestCase(testCase, listTestCases) } } } /** Run a test case. */ - protected def runTest(testCase: TestCase): Unit = { + protected def runSqlTestCase(testCase: TestCase, listTestCases: Seq[TestCase]): Unit = { def splitWithSemicolon(seq: Seq[String]) = { seq.mkString("\n").split("(?<=[^\\\\]);") } @@ -414,13 +416,25 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper } // Run the SQL queries preparing them for comparison. - val outputs: Seq[QueryOutput] = queries.map { sql => - val (schema, output) = handleExceptions(getNormalizedResult(localSparkSession, sql)) - // We might need to do some query canonicalization in the future. - QueryOutput( - sql = sql, - schema = schema, - output = output.mkString("\n").replaceAll("\\s+$", "")) + val outputs: Seq[QueryTestOutput] = queries.map { sql => + testCase match { + case _: AnalyzerTest => + val (_, output) = + handleExceptions(getNormalizedQueryAnalysisResult(localSparkSession, sql)) + // We might need to do some query canonicalization in the future. + AnalyzerOutput( + sql = sql, + schema = None, + output = output.mkString("\n").replaceAll("\\s+$", "")) + case _ => + val (schema, output) = + handleExceptions(getNormalizedQueryExecutionResult(localSparkSession, sql)) + // We might need to do some query canonicalization in the future. + ExecutionOutput( + sql = sql, + schema = Some(schema), + output = output.mkString("\n").replaceAll("\\s+$", "")) + } } if (regenerateGoldenFiles) { @@ -460,39 +474,11 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper } withClue(clue) { - // Read back the golden file. - val expectedOutputs: Seq[QueryOutput] = { - val goldenOutput = fileToString(new File(testCase.resultFile)) - val segments = goldenOutput.split("-- !query.*\n") - - // each query has 3 segments, plus the header - assert(segments.size == outputs.size * 3 + 1, - s"Expected ${outputs.size * 3 + 1} blocks in result file but got ${segments.size}. " + - s"Try regenerate the result files.") - Seq.tabulate(outputs.size) { i => - QueryOutput( - sql = segments(i * 3 + 1).trim, - schema = segments(i * 3 + 2).trim, - output = segments(i * 3 + 3).replaceAll("\\s+$", "") - ) - } - } - - // Compare results. - assertResult(expectedOutputs.size, s"Number of queries should be ${expectedOutputs.size}") { - outputs.size - } - - outputs.zip(expectedOutputs).zipWithIndex.foreach { case ((output, expected), i) => - assertResult(expected.sql, s"SQL query did not match for query #$i\n${expected.sql}") { - output.sql - } - assertResult(expected.schema, - s"Schema did not match for query #$i\n${expected.sql}: $output") { - output.schema - } - assertResult(expected.output, s"Result did not match" + - s" for query #$i\n${expected.sql}") { output.output } + testCase match { + case _: AnalyzerTestCase => + readGoldenFileAndCompareResults(testCase.resultFile, outputs, AnalyzerOutput) + case _ => + readGoldenFileAndCompareResults(testCase.resultFile, outputs, ExecutionOutput) } } } @@ -500,10 +486,13 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper protected lazy val listTestCases: Seq[TestCase] = { listFilesRecursively(new File(inputFilePath)).flatMap { file => val resultFile = file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out" + val analyzerResultFile = + file.getAbsolutePath.replace(inputFilePath, analyzerGoldenFilePath) + ".out" val absPath = file.getAbsolutePath val testCaseName = absPath.stripPrefix(inputFilePath).stripPrefix(File.separator) + val analyzerTestCaseName = s"${testCaseName}_analyzer_test" - if (file.getAbsolutePath.startsWith( + val newTestCase = if (file.getAbsolutePath.startsWith( s"$inputFilePath${File.separator}udf${File.separator}postgreSQL")) { Seq(TestScalaUDF("udf"), TestPythonUDF("udf"), TestScalarPandasUDF("udf")).map { udf => UDFPgSQLTestCase( @@ -528,6 +517,11 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper } else { RegularTestCase(testCaseName, absPath, resultFile) :: Nil } + if (analyzerTestCaseList.contains(file.getName.toLowerCase(Locale.ROOT))) { + AnalyzerTestCase(analyzerTestCaseName, absPath, analyzerResultFile) +: newTestCase + } else { + newTestCase + } }.sortBy(_.name) } @@ -541,7 +535,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper } /** Load built-in test tables into the SparkSession. */ - private def createTestTables(session: SparkSession): Unit = { + protected def createTestTables(session: SparkSession): Unit = { import session.implicits._ // Before creating test tables, deletes orphan directories in warehouse dir @@ -641,7 +635,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper .saveAsTable("tenk1") } - private def removeTestTables(session: SparkSession): Unit = { + protected def removeTestTables(session: SparkSession): Unit = { session.sql("DROP TABLE IF EXISTS testdata") session.sql("DROP TABLE IF EXISTS arraydata") session.sql("DROP TABLE IF EXISTS mapdata") @@ -678,4 +672,99 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper super.afterAll() } } + + /** + * Consumes contents from a single golden file and compares the expected results against the + * output of running a query. + */ + def readGoldenFileAndCompareResults( + resultFile: String, + outputs: Seq[QueryTestOutput], + makeOutput: (String, Option[String], String) => QueryTestOutput): Unit = { + // Read back the golden file. + val expectedOutputs: Seq[QueryTestOutput] = { + val goldenOutput = fileToString(new File(resultFile)) + val segments = goldenOutput.split("-- !query.*\n") + + val numSegments = outputs.map(_.numSegments).sum + 1 + assert(segments.size == numSegments, + s"Expected $numSegments blocks in result file but got " + + s"${segments.size}. Try regenerate the result files.") + var curSegment = 0 + outputs.map { output => + val result = if (output.numSegments == 3) { + makeOutput( + segments(curSegment + 1).trim, // SQL + Some(segments(curSegment + 2).trim), // Schema + segments(curSegment + 3).replaceAll("\\s+$", "")) // Output + } else { + makeOutput( + segments(curSegment + 1).trim, // SQL + None, // Schema + segments(curSegment + 2).replaceAll("\\s+$", "")) // Output + } + curSegment += output.numSegments + result + } + } + + // Compare results. + assertResult(expectedOutputs.size, s"Number of queries should be ${expectedOutputs.size}") { + outputs.size + } + + outputs.zip(expectedOutputs).zipWithIndex.foreach { case ((output, expected), i) => + assertResult(expected.sql, s"SQL query did not match for query #$i\n${expected.sql}") { + output.sql + } + assertResult(expected.schema, + s"Schema did not match for query #$i\n${expected.sql}: $output") { + output.schema + } + assertResult(expected.output, s"Result did not match" + + s" for query #$i\n${expected.sql}") { + output.output + } + } + } + + /** A single SQL query's output. */ + trait QueryTestOutput { + def sql: String + def schema: Option[String] + def output: String + def numSegments: Int + } + + /** A single SQL query's execution output. */ + case class ExecutionOutput( + sql: String, + schema: Option[String], + output: String) extends QueryTestOutput { + override def toString: String = { + // We are explicitly not using multi-line string due to stripMargin removing "|" in output. + s"-- !query\n" + + sql + "\n" + + s"-- !query schema\n" + + schema.get + "\n" + + s"-- !query output\n" + + output + } + override def numSegments: Int = 3 + } + + /** A single SQL query's analysis results. */ + case class AnalyzerOutput( + sql: String, + schema: Option[String], + output: String) extends QueryTestOutput { + override def toString: String = { + // We are explicitly not using multi-line string due to stripMargin removing "|" in output. + s"-- !query\n" + + sql + "\n" + + s"-- !query analysis\n" + + output + } + override def numSegments: Int = 2 + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala index a525d582b091b..93369299fda3b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala @@ -107,7 +107,7 @@ class TPCDSQueryTestSuite extends QueryTest with TPCDSBase with SQLQueryTestHelp val shouldSortResults = sortMergeJoinConf != conf // Sort for other joins withSQLConf(conf.toSeq: _*) { try { - val (schema, output) = handleExceptions(getNormalizedResult(spark, query)) + val (schema, output) = handleExceptions(getNormalizedQueryExecutionResult(spark, query)) val queryString = query.trim val outputString = output.mkString("\n").replaceAll("\\s+$", "") if (regenerateGoldenFiles) { diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala index b850ffccd4ee3..cf0184f6608da 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala @@ -26,6 +26,7 @@ import scala.util.control.NonFatal import org.apache.commons.lang3.exception.ExceptionUtils import org.apache.spark.SparkException +import org.apache.spark.internal.Logging import org.apache.spark.sql.SQLQueryTestSuite import org.apache.spark.sql.catalyst.analysis.NoSuchTableException import org.apache.spark.sql.catalyst.util.fileToString @@ -68,7 +69,7 @@ import org.apache.spark.sql.types._ * 4. Support UDAF testing. */ // scalastyle:on line.size.limit -class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServer { +class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServer with Logging { override def mode: ServerMode.Value = ServerMode.binary @@ -125,17 +126,17 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServ } // Run the SQL queries preparing them for comparison. - val outputs: Seq[QueryOutput] = queries.map { sql => + val outputs: Seq[QueryTestOutput] = queries.map { sql => val (_, output) = handleExceptions(getNormalizedResult(statement, sql)) // We might need to do some query canonicalization in the future. - QueryOutput( + ExecutionOutput( sql = sql, - schema = "", + schema = Some(""), output = output.mkString("\n").replaceAll("\\s+$", "")) } // Read back the golden file. - val expectedOutputs: Seq[QueryOutput] = { + val expectedOutputs: Seq[QueryTestOutput] = { val goldenOutput = fileToString(new File(testCase.resultFile)) val segments = goldenOutput.split("-- !query.*\n") @@ -152,9 +153,9 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServ } else { originalOut } - QueryOutput( + ExecutionOutput( sql = sql, - schema = "", + schema = Some(""), output = output.replaceAll("\\s+$", "") ) } @@ -235,7 +236,7 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServ } else { // Create a test case to run this case. test(testCase.name) { - runTest(testCase) + runSqlTestCase(testCase, listTestCases) } } }