From 3569e768e657d4e28ee7520808ec910cdff2b099 Mon Sep 17 00:00:00 2001 From: Takuya Ueshin Date: Mon, 13 Jan 2025 11:17:05 -0800 Subject: [PATCH 01/15] [SPARK-50789][CONNECT] The inputs for typed aggregations should be analyzed ### What changes were proposed in this pull request? Fixes `SparkConnectPlanner` to analyze the inputs for typed aggregations. ### Why are the changes needed? The inputs for typed aggregations should be analyzed. For example: ```scala val ds = Seq("abc", "xyz", "hello").toDS().select("*").as[String] ds.groupByKey(_.length).reduceGroups(_ + _).show() ``` fails with: ``` org.apache.spark.SparkException: [INTERNAL_ERROR] Invalid call to toAttribute on unresolved object SQLSTATE: XX000 org.apache.spark.sql.catalyst.analysis.Star.toAttribute(unresolved.scala:439) org.apache.spark.sql.catalyst.plans.logical.Project.$anonfun$output$1(basicLogicalOperators.scala:74) scala.collection.immutable.List.map(List.scala:247) scala.collection.immutable.List.map(List.scala:79) org.apache.spark.sql.catalyst.plans.logical.Project.output(basicLogicalOperators.scala:74) org.apache.spark.sql.connect.planner.SparkConnectPlanner.transformExpressionWithTypedReduceExpression(SparkConnectPlanner.scala:2340) org.apache.spark.sql.connect.planner.SparkConnectPlanner.$anonfun$transformKeyValueGroupedAggregate$1(SparkConnectPlanner.scala:2244) scala.collection.immutable.List.map(List.scala:247) scala.collection.immutable.List.map(List.scala:79) org.apache.spark.sql.connect.planner.SparkConnectPlanner.transformKeyValueGroupedAggregate(SparkConnectPlanner.scala:2244) org.apache.spark.sql.connect.planner.SparkConnectPlanner.transformAggregate(SparkConnectPlanner.scala:2232) ... ``` ### Does this PR introduce _any_ user-facing change? The failure will not appear. ### How was this patch tested? Added the related tests. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #49449 from ueshin/issues/SPARK-50789/typed_agg. Authored-by: Takuya Ueshin Signed-off-by: Takuya Ueshin --- .../KeyValueGroupedDatasetE2ETestSuite.scala | 8 ++++ .../sql/UserDefinedFunctionE2ETestSuite.scala | 22 +++++++++- .../connect/planner/SparkConnectPlanner.scala | 43 +++++++++++++------ 3 files changed, 59 insertions(+), 14 deletions(-) diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/KeyValueGroupedDatasetE2ETestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/KeyValueGroupedDatasetE2ETestSuite.scala index 6fd664d905408..021b4fea26e2a 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/KeyValueGroupedDatasetE2ETestSuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/KeyValueGroupedDatasetE2ETestSuite.scala @@ -460,6 +460,14 @@ class KeyValueGroupedDatasetE2ETestSuite extends QueryTest with RemoteSparkSessi (5, "hello")) } + test("SPARK-50789: reduceGroups on unresolved plan") { + val ds = Seq("abc", "xyz", "hello").toDS().select("*").as[String] + checkDatasetUnorderly( + ds.groupByKey(_.length).reduceGroups(_ + _), + (3, "abcxyz"), + (5, "hello")) + } + test("groupby") { val ds = Seq(("a", 1, 10), ("a", 2, 20), ("b", 2, 1), ("b", 1, 2), ("c", 1, 1)) .toDF("key", "seq", "value") diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionE2ETestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionE2ETestSuite.scala index 8415444c10aac..19275326d6421 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionE2ETestSuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionE2ETestSuite.scala @@ -401,6 +401,13 @@ class UserDefinedFunctionE2ETestSuite extends QueryTest with RemoteSparkSession assert(ds.select(aggCol).head() == 135) // 45 + 90 } + test("SPARK-50789: UDAF custom Aggregator - toColumn on unresolved plan") { + val encoder = Encoders.product[UdafTestInput] + val aggCol = new CompleteUdafTestInputAggregator().toColumn + val ds = spark.range(10).withColumn("extra", col("id") * 2).select("*").as(encoder) + assert(ds.select(aggCol).head() == 135) // 45 + 90 + } + test("UDAF custom Aggregator - multiple extends - toColumn") { val encoder = Encoders.product[UdafTestInput] val aggCol = new CompleteGrandChildUdafTestInputAggregator().toColumn @@ -408,11 +415,24 @@ class UserDefinedFunctionE2ETestSuite extends QueryTest with RemoteSparkSession assert(ds.select(aggCol).head() == 540) // (45 + 90) * 4 } - test("UDAF custom aggregator - with rows - toColumn") { + test("SPARK-50789: UDAF custom Aggregator - multiple extends - toColumn on unresolved plan") { + val encoder = Encoders.product[UdafTestInput] + val aggCol = new CompleteGrandChildUdafTestInputAggregator().toColumn + val ds = spark.range(10).withColumn("extra", col("id") * 2).select("*").as(encoder) + assert(ds.select(aggCol).head() == 540) // (45 + 90) * 4 + } + + test("UDAF custom Aggregator - with rows - toColumn") { val ds = spark.range(10).withColumn("extra", col("id") * 2) assert(ds.select(RowAggregator.toColumn).head() == 405) assert(ds.agg(RowAggregator.toColumn).head().getLong(0) == 405) } + + test("SPARK-50789: UDAF custom Aggregator - with rows - toColumn on unresolved plan") { + val ds = spark.range(10).withColumn("extra", col("id") * 2).select("*") + assert(ds.select(RowAggregator.toColumn).head() == 405) + assert(ds.agg(RowAggregator.toColumn).head().getLong(0) == 405) + } } case class UdafTestInput(id: Long, extra: Long) diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala index c0b4384af8b6d..6ab69aea12e5d 100644 --- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala +++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala @@ -845,9 +845,10 @@ class SparkConnectPlanner( kEncoder: ExpressionEncoder[_], vEncoder: ExpressionEncoder[_], analyzed: LogicalPlan, - dataAttributes: Seq[Attribute], + analyzedData: LogicalPlan, groupingAttributes: Seq[Attribute], sortOrder: Seq[SortOrder]) { + val dataAttributes: Seq[Attribute] = analyzedData.output val valueDeserializer: Expression = UnresolvedDeserializer(vEncoder.deserializer, dataAttributes) } @@ -900,7 +901,7 @@ class SparkConnectPlanner( dummyFunc.outEnc, dummyFunc.inEnc, qe.analyzed, - analyzed.output, + analyzed, aliasedGroupings, sortOrder) } @@ -924,7 +925,7 @@ class SparkConnectPlanner( kEnc, vEnc, withGroupingKeyAnalyzed, - analyzed.output, + analyzed, withGroupingKey.newColumns, sortOrder) } @@ -1489,11 +1490,19 @@ class SparkConnectPlanner( logical.OneRowRelation() } + val logicalPlan = + if (rel.getExpressionsList.asScala.toSeq.exists( + _.getExprTypeCase == proto.Expression.ExprTypeCase.TYPED_AGGREGATE_EXPRESSION)) { + session.sessionState.executePlan(baseRel).analyzed + } else { + baseRel + } + val projection = rel.getExpressionsList.asScala.toSeq - .map(transformExpression(_, Some(baseRel))) + .map(transformExpression(_, Some(logicalPlan))) .map(toNamedExpression) - logical.Project(projectList = projection, child = baseRel) + logical.Project(projectList = projection, child = logicalPlan) } /** @@ -2241,7 +2250,7 @@ class SparkConnectPlanner( val keyColumn = TypedAggUtils.aggKeyColumn(ds.kEncoder, ds.groupingAttributes) val namedColumns = rel.getAggregateExpressionsList.asScala.toSeq - .map(expr => transformExpressionWithTypedReduceExpression(expr, input)) + .map(expr => transformExpressionWithTypedReduceExpression(expr, ds.analyzedData)) .map(toNamedExpression) logical.Aggregate(ds.groupingAttributes, keyColumn +: namedColumns, ds.analyzed) } @@ -2252,9 +2261,17 @@ class SparkConnectPlanner( } val input = transformRelation(rel.getInput) + val logicalPlan = + if (rel.getAggregateExpressionsList.asScala.toSeq.exists( + _.getExprTypeCase == proto.Expression.ExprTypeCase.TYPED_AGGREGATE_EXPRESSION)) { + session.sessionState.executePlan(input).analyzed + } else { + input + } + val groupingExprs = rel.getGroupingExpressionsList.asScala.toSeq.map(transformExpression) val aggExprs = rel.getAggregateExpressionsList.asScala.toSeq - .map(expr => transformExpressionWithTypedReduceExpression(expr, input)) + .map(expr => transformExpressionWithTypedReduceExpression(expr, logicalPlan)) val aliasedAgg = (groupingExprs ++ aggExprs).map(toNamedExpression) rel.getGroupType match { @@ -2262,19 +2279,19 @@ class SparkConnectPlanner( logical.Aggregate( groupingExpressions = groupingExprs, aggregateExpressions = aliasedAgg, - child = input) + child = logicalPlan) case proto.Aggregate.GroupType.GROUP_TYPE_ROLLUP => logical.Aggregate( groupingExpressions = Seq(Rollup(groupingExprs.map(Seq(_)))), aggregateExpressions = aliasedAgg, - child = input) + child = logicalPlan) case proto.Aggregate.GroupType.GROUP_TYPE_CUBE => logical.Aggregate( groupingExpressions = Seq(Cube(groupingExprs.map(Seq(_)))), aggregateExpressions = aliasedAgg, - child = input) + child = logicalPlan) case proto.Aggregate.GroupType.GROUP_TYPE_PIVOT => if (!rel.hasPivot) { @@ -2286,7 +2303,7 @@ class SparkConnectPlanner( rel.getPivot.getValuesList.asScala.toSeq.map(transformLiteral) } else { RelationalGroupedDataset - .collectPivotValues(Dataset.ofRows(session, input), Column(pivotExpr)) + .collectPivotValues(Dataset.ofRows(session, logicalPlan), Column(pivotExpr)) .map(expressions.Literal.apply) } logical.Pivot( @@ -2294,7 +2311,7 @@ class SparkConnectPlanner( pivotColumn = pivotExpr, pivotValues = valueExprs, aggregates = aggExprs, - child = input) + child = logicalPlan) case proto.Aggregate.GroupType.GROUP_TYPE_GROUPING_SETS => val groupingSetsExprs = rel.getGroupingSetsList.asScala.toSeq.map { getGroupingSets => @@ -2306,7 +2323,7 @@ class SparkConnectPlanner( groupingSets = groupingSetsExprs, userGivenGroupByExprs = groupingExprs)), aggregateExpressions = aliasedAgg, - child = input) + child = logicalPlan) case other => throw InvalidPlanInput(s"Unknown Group Type $other") } From c1e5f53cbe0fd8b1358d909e5126530abf3ce004 Mon Sep 17 00:00:00 2001 From: Livia Zhu Date: Tue, 14 Jan 2025 08:26:17 +0800 Subject: [PATCH 02/15] [SPARK-50791][SQL] Fix NPE in State Store error handling ### What changes were proposed in this pull request? Directly calling `contains` on a `SparkException.getCondition` may result in a NullPointerException if `getCondition` returns `null`. This change wraps `getCondition` in an option for safe handling. ### Why are the changes needed? Throwing an NPE is a bug. ### Does this PR introduce _any_ user-facing change? Yes. Previously, on SparkException such as OOM that do not have a set condition, users would see an NullPointerException. Now they will correctly see a CANNOT_LOAD_STATE_STORE exception. ### How was this patch tested? Existing tests. ### Was this patch authored or co-authored using generative AI tooling? No Closes #49451 from liviazhu-db/liviazhu-db/statestore-npe. Authored-by: Livia Zhu Signed-off-by: yangjie01 --- .../streaming/state/HDFSBackedStateStoreProvider.scala | 3 ++- .../streaming/state/RocksDBStateStoreProvider.scala | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala index ae06e82335b12..2deccb845fea2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala @@ -291,7 +291,8 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with newMap } catch { - case e: SparkException if e.getCondition.contains("CANNOT_LOAD_STATE_STORE") => + case e: SparkException + if Option(e.getCondition).exists(_.contains("CANNOT_LOAD_STATE_STORE")) => throw e case e: OutOfMemoryError => throw QueryExecutionErrors.notEnoughMemoryToLoadStore( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala index 60652367f3351..9fc48a60d7c6a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala @@ -439,7 +439,8 @@ private[sql] class RocksDBStateStoreProvider new RocksDBStateStore(version) } catch { - case e: SparkException if e.getCondition.contains("CANNOT_LOAD_STATE_STORE") => + case e: SparkException + if Option(e.getCondition).exists(_.contains("CANNOT_LOAD_STATE_STORE")) => throw e case e: OutOfMemoryError => throw QueryExecutionErrors.notEnoughMemoryToLoadStore( @@ -462,7 +463,8 @@ private[sql] class RocksDBStateStoreProvider new RocksDBStateStore(version) } catch { - case e: SparkException if e.getCondition.contains("CANNOT_LOAD_STATE_STORE") => + case e: SparkException + if Option(e.getCondition).exists(_.contains("CANNOT_LOAD_STATE_STORE")) => throw e case e: OutOfMemoryError => throw QueryExecutionErrors.notEnoughMemoryToLoadStore( From 313e824931fd9b407b650fb1a8c11157dc3fe676 Mon Sep 17 00:00:00 2001 From: Takuya Ueshin Date: Mon, 13 Jan 2025 16:36:34 -0800 Subject: [PATCH 03/15] [SPARK-50392][PYTHON][FOLLOWUP] Move `import`s into methods to fix `connect-only` builds ### What changes were proposed in this pull request? Move imports into methods to fix connect-only builds. ### Why are the changes needed? #49055 broke the connect-only builds: https://github.com/apache/spark/pull/49055#pullrequestreview-2545547927 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #49472 from ueshin/issues/SPARK-50392/fup. Authored-by: Takuya Ueshin Signed-off-by: Dongjoon Hyun --- python/pyspark/sql/table_arg.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/table_arg.py b/python/pyspark/sql/table_arg.py index d4b5e1653c7a1..cacfd24b2f1ba 100644 --- a/python/pyspark/sql/table_arg.py +++ b/python/pyspark/sql/table_arg.py @@ -17,7 +17,6 @@ from typing import TYPE_CHECKING -from pyspark.sql.classic.column import _to_java_column, _to_seq from pyspark.sql.tvf_argument import TableValuedFunctionArgument from pyspark.sql.utils import get_active_spark_context @@ -32,6 +31,8 @@ def __init__(self, j_table_arg: "JavaObject"): self._j_table_arg = j_table_arg def partitionBy(self, *cols: "ColumnOrName") -> "TableArg": + from pyspark.sql.classic.column import _to_java_column, _to_seq + sc = get_active_spark_context() if len(cols) == 1 and isinstance(cols[0], list): cols = cols[0] @@ -40,6 +41,8 @@ def partitionBy(self, *cols: "ColumnOrName") -> "TableArg": return TableArg(new_j_table_arg) def orderBy(self, *cols: "ColumnOrName") -> "TableArg": + from pyspark.sql.classic.column import _to_java_column, _to_seq + sc = get_active_spark_context() if len(cols) == 1 and isinstance(cols[0], list): cols = cols[0] From c5529d31513de1df4f596670d7aeea2455ae2199 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Mon, 13 Jan 2025 16:47:05 -0800 Subject: [PATCH 04/15] [SPARK-50773][CORE] Disable structured logging by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? This PR restores the default Spark logging format to plain text instead of JSON. After changes, compared to Spark 3.x releases, uses can optionally enable structured logging by setting configuration `spark.log.structuredLogging.enabled` to `true` (default is `false`). For additional customization, users can copy `log4j2-json-layout.properties.template` to `conf/log4j2.properties` and adjust as needed. ### Why are the changes needed? After discussions on the [dev mailing list](https://lists.apache.org/thread/4fnlnvhsqym72k53jw8cjhcdjbhpmm95), Spark developers decided to revert to the previous plain text logging format for two main reasons: * Readability: JSON logs are verbose and not easily human-readable. * Setup Requirements: Structured logging requires a logging pipeline to collect JSON logs from both drivers and executors. Enabling it by default doesn’t provide much benefit without this infrastructure in place. ### Does this PR introduce _any_ user-facing change? No, Spark 4.0 has not yet been released. ### How was this patch tested? Existing tests. Also, manually tested on enable/disable the configuration `spark.log.structuredLogging.enabled`, as well as verified the logging behavior with different log4j2 templates. ### Was this patch authored or co-authored using generative AI tooling? No Closes #49421 from gengliangwang/disableStructuredLogging. Authored-by: Gengliang Wang Signed-off-by: Dongjoon Hyun --- .../apache/spark/log4j2-defaults.properties | 4 ++-- ...operties => log4j2-json-layout.properties} | 4 ++-- .../org/apache/spark/internal/Logging.scala | 6 ++--- .../util/StructuredSparkLoggerSuite.java | 16 +++++++++++++ .../org/apache/spark/util/MDCSuite.scala | 12 +++++++++- .../spark/util/PatternLoggingSuite.scala | 5 +--- .../spark/util/StructuredLoggingSuite.scala | 16 ++++++++++++- ...=> log4j2-json-layout.properties.template} | 10 ++------ conf/log4j2.properties.template | 10 ++++++-- .../spark/internal/config/package.scala | 2 +- .../scala/org/apache/spark/util/Utils.scala | 2 +- docs/configuration.md | 23 ++++++++----------- docs/core-migration-guide.md | 4 ---- .../org/apache/spark/sql/LogQuerySuite.scala | 6 +++++ 14 files changed, 78 insertions(+), 42 deletions(-) rename common/utils/src/main/resources/org/apache/spark/{log4j2-pattern-layout-defaults.properties => log4j2-json-layout.properties} (94%) rename conf/{log4j2.properties.pattern-layout-template => log4j2-json-layout.properties.template} (80%) diff --git a/common/utils/src/main/resources/org/apache/spark/log4j2-defaults.properties b/common/utils/src/main/resources/org/apache/spark/log4j2-defaults.properties index 9be86b650d091..777c5f2b25915 100644 --- a/common/utils/src/main/resources/org/apache/spark/log4j2-defaults.properties +++ b/common/utils/src/main/resources/org/apache/spark/log4j2-defaults.properties @@ -22,8 +22,8 @@ rootLogger.appenderRef.stdout.ref = console appender.console.type = Console appender.console.name = console appender.console.target = SYSTEM_ERR -appender.console.layout.type = JsonTemplateLayout -appender.console.layout.eventTemplateUri = classpath:org/apache/spark/SparkLayout.json +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex # Settings to quiet third party logs that are too verbose logger.jetty.name = org.sparkproject.jetty diff --git a/common/utils/src/main/resources/org/apache/spark/log4j2-pattern-layout-defaults.properties b/common/utils/src/main/resources/org/apache/spark/log4j2-json-layout.properties similarity index 94% rename from common/utils/src/main/resources/org/apache/spark/log4j2-pattern-layout-defaults.properties rename to common/utils/src/main/resources/org/apache/spark/log4j2-json-layout.properties index 777c5f2b25915..9be86b650d091 100644 --- a/common/utils/src/main/resources/org/apache/spark/log4j2-pattern-layout-defaults.properties +++ b/common/utils/src/main/resources/org/apache/spark/log4j2-json-layout.properties @@ -22,8 +22,8 @@ rootLogger.appenderRef.stdout.ref = console appender.console.type = Console appender.console.name = console appender.console.target = SYSTEM_ERR -appender.console.layout.type = PatternLayout -appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex +appender.console.layout.type = JsonTemplateLayout +appender.console.layout.eventTemplateUri = classpath:org/apache/spark/SparkLayout.json # Settings to quiet third party logs that are too verbose logger.jetty.name = org.sparkproject.jetty diff --git a/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala b/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala index 7471b764bd2b3..4b60cb20f0732 100644 --- a/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala +++ b/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala @@ -337,9 +337,9 @@ trait Logging { if (Logging.defaultSparkLog4jConfig || Logging.islog4j2DefaultConfigured()) { Logging.defaultSparkLog4jConfig = true val defaultLogProps = if (Logging.isStructuredLoggingEnabled) { - "org/apache/spark/log4j2-defaults.properties" + "org/apache/spark/log4j2-json-layout.properties" } else { - "org/apache/spark/log4j2-pattern-layout-defaults.properties" + "org/apache/spark/log4j2-defaults.properties" } Option(SparkClassUtils.getSparkClassLoader.getResource(defaultLogProps)) match { case Some(url) => @@ -398,7 +398,7 @@ private[spark] object Logging { @volatile private var initialized = false @volatile private var defaultRootLevel: Level = null @volatile private var defaultSparkLog4jConfig = false - @volatile private var structuredLoggingEnabled = true + @volatile private var structuredLoggingEnabled = false @volatile private[spark] var sparkShellThresholdLevel: Level = null @volatile private[spark] var setLogLevelPrinted: Boolean = false diff --git a/common/utils/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java b/common/utils/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java index 6959fe11820ff..1fab167adfeb0 100644 --- a/common/utils/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java +++ b/common/utils/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java @@ -21,11 +21,27 @@ import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.logging.log4j.Level; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; + +import org.apache.spark.internal.Logging$; import org.apache.spark.internal.SparkLogger; import org.apache.spark.internal.SparkLoggerFactory; public class StructuredSparkLoggerSuite extends SparkLoggerSuiteBase { + // Enable Structured Logging before running the tests + @BeforeAll + public static void setup() { + Logging$.MODULE$.enableStructuredLogging(); + } + + // Disable Structured Logging after running the tests + @AfterAll + public static void teardown() { + Logging$.MODULE$.disableStructuredLogging(); + } + private static final SparkLogger LOGGER = SparkLoggerFactory.getLogger(StructuredSparkLoggerSuite.class); diff --git a/common/utils/src/test/scala/org/apache/spark/util/MDCSuite.scala b/common/utils/src/test/scala/org/apache/spark/util/MDCSuite.scala index 7631c25662219..9615eb2263636 100644 --- a/common/utils/src/test/scala/org/apache/spark/util/MDCSuite.scala +++ b/common/utils/src/test/scala/org/apache/spark/util/MDCSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.util import scala.jdk.CollectionConverters._ +import org.scalatest.BeforeAndAfterAll import org.scalatest.funsuite.AnyFunSuite // scalastyle:ignore funsuite import org.apache.spark.internal.{Logging, MDC} @@ -26,7 +27,16 @@ import org.apache.spark.internal.LogKeys.{EXIT_CODE, OFFSET, RANGE} class MDCSuite extends AnyFunSuite // scalastyle:ignore funsuite - with Logging { + with Logging + with BeforeAndAfterAll { + + override def beforeAll(): Unit = { + Logging.enableStructuredLogging() + } + + override def afterAll(): Unit = { + Logging.disableStructuredLogging() + } test("check MDC message") { val log = log"This is a log, exitcode ${MDC(EXIT_CODE, 10086)}" diff --git a/common/utils/src/test/scala/org/apache/spark/util/PatternLoggingSuite.scala b/common/utils/src/test/scala/org/apache/spark/util/PatternLoggingSuite.scala index 2ba2b15c49f33..248136798b362 100644 --- a/common/utils/src/test/scala/org/apache/spark/util/PatternLoggingSuite.scala +++ b/common/utils/src/test/scala/org/apache/spark/util/PatternLoggingSuite.scala @@ -17,19 +17,16 @@ package org.apache.spark.util import org.apache.logging.log4j.Level -import org.scalatest.BeforeAndAfterAll import org.apache.spark.internal.Logging -class PatternLoggingSuite extends LoggingSuiteBase with BeforeAndAfterAll { +class PatternLoggingSuite extends LoggingSuiteBase { override def className: String = classOf[PatternLoggingSuite].getSimpleName override def logFilePath: String = "target/pattern.log" override def beforeAll(): Unit = Logging.disableStructuredLogging() - override def afterAll(): Unit = Logging.enableStructuredLogging() - override def expectedPatternForBasicMsg(level: Level): String = { s""".*$level $className: This is a log message\n""" } diff --git a/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala b/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala index 48951c2084f17..0026b696f0695 100644 --- a/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala +++ b/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala @@ -23,14 +23,21 @@ import java.nio.file.Files import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule import org.apache.logging.log4j.Level +import org.scalatest.BeforeAndAfterAll import org.scalatest.funsuite.AnyFunSuite // scalastyle:ignore funsuite import org.apache.spark.internal.{LogEntry, Logging, LogKey, LogKeys, MDC, MessageWithContext} trait LoggingSuiteBase extends AnyFunSuite // scalastyle:ignore funsuite + with BeforeAndAfterAll with Logging { + override def afterAll(): Unit = { + super.afterAll() + Logging.disableStructuredLogging() + } + def className: String def logFilePath: String @@ -202,7 +209,7 @@ trait LoggingSuiteBase } } - private val customLog = log"${MDC(CustomLogKeys.CUSTOM_LOG_KEY, "Custom log message.")}" + private lazy val customLog = log"${MDC(CustomLogKeys.CUSTOM_LOG_KEY, "Custom log message.")}" test("Logging with custom LogKey") { Seq( (Level.ERROR, () => logError(customLog)), @@ -265,6 +272,13 @@ class StructuredLoggingSuite extends LoggingSuiteBase { override def className: String = classOf[StructuredLoggingSuite].getSimpleName override def logFilePath: String = "target/structured.log" + override def beforeAll(): Unit = { + super.beforeAll() + Logging.enableStructuredLogging() + } + + override def afterAll(): Unit = super.afterAll() + private val jsonMapper = new ObjectMapper().registerModule(DefaultScalaModule) private def compactAndToRegexPattern(json: String): String = { jsonMapper.readTree(json).toString. diff --git a/conf/log4j2.properties.pattern-layout-template b/conf/log4j2-json-layout.properties.template similarity index 80% rename from conf/log4j2.properties.pattern-layout-template rename to conf/log4j2-json-layout.properties.template index 011fca58c9b2a..76499bb6691e7 100644 --- a/conf/log4j2.properties.pattern-layout-template +++ b/conf/log4j2-json-layout.properties.template @@ -19,17 +19,11 @@ rootLogger.level = info rootLogger.appenderRef.stdout.ref = console -# In the pattern layout configuration below, we specify an explicit `%ex` conversion -# pattern for logging Throwables. If this was omitted, then (by default) Log4J would -# implicitly add an `%xEx` conversion pattern which logs stacktraces with additional -# class packaging information. That extra information can sometimes add a substantial -# performance overhead, so we disable it in our default logging config. -# For more information, see SPARK-39361. appender.console.type = Console appender.console.name = console appender.console.target = SYSTEM_ERR -appender.console.layout.type = PatternLayout -appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex +appender.console.layout.type = JsonTemplateLayout +appender.console.layout.eventTemplateUri = classpath:org/apache/spark/SparkLayout.json # Set the default spark-shell/spark-sql log level to WARN. When running the # spark-shell/spark-sql, the log level for these classes is used to overwrite diff --git a/conf/log4j2.properties.template b/conf/log4j2.properties.template index 76499bb6691e7..011fca58c9b2a 100644 --- a/conf/log4j2.properties.template +++ b/conf/log4j2.properties.template @@ -19,11 +19,17 @@ rootLogger.level = info rootLogger.appenderRef.stdout.ref = console +# In the pattern layout configuration below, we specify an explicit `%ex` conversion +# pattern for logging Throwables. If this was omitted, then (by default) Log4J would +# implicitly add an `%xEx` conversion pattern which logs stacktraces with additional +# class packaging information. That extra information can sometimes add a substantial +# performance overhead, so we disable it in our default logging config. +# For more information, see SPARK-39361. appender.console.type = Console appender.console.name = console appender.console.target = SYSTEM_ERR -appender.console.layout.type = JsonTemplateLayout -appender.console.layout.eventTemplateUri = classpath:org/apache/spark/SparkLayout.json +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex # Set the default spark-shell/spark-sql log level to WARN. When running the # spark-shell/spark-sql, the log level for these classes is used to overwrite diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index 6d51424f0baff..5dda7afc3ebcb 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -162,7 +162,7 @@ package object config { "PySpark shell.") .version("4.0.0") .booleanConf - .createWithDefault(true) + .createWithDefault(false) private[spark] val LEGACY_TASK_NAME_MDC_ENABLED = ConfigBuilder("spark.log.legacyTaskNameMdc.enabled") diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index e7b65bf1a4eff..536c6b4447aac 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -2689,7 +2689,7 @@ private[spark] object Utils * loading SparkConf. */ def resetStructuredLogging(sparkConf: SparkConf): Unit = { - if (sparkConf.getBoolean(STRUCTURED_LOGGING_ENABLED.key, defaultValue = true)) { + if (sparkConf.get(STRUCTURED_LOGGING_ENABLED)) { Logging.enableStructuredLogging() } else { Logging.disableStructuredLogging() diff --git a/docs/configuration.md b/docs/configuration.md index f080a79580683..162165ffe68dd 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -3751,15 +3751,20 @@ Note: When running Spark on YARN in `cluster` mode, environment variables need t # Configuring Logging -Spark uses [log4j](http://logging.apache.org/log4j/) for logging. You can configure it by adding a -`log4j2.properties` file in the `conf` directory. One way to start is to copy the existing templates `log4j2.properties.template` or `log4j2.properties.pattern-layout-template` located there. +Spark uses [log4j](http://logging.apache.org/log4j/) for logging. You can configure it by adding a `log4j2.properties` file in the `conf` directory. To get started, copy one of the provided templates: `log4j2.properties.template` (for plain text logging) or `log4j2-json-layout.properties.template` (for structured logging). + +## Plain Text Logging +The default logging format is plain text, using Log4j's [Pattern Layout](https://logging.apache.org/log4j/2.x/manual/pattern-layout.html). + +MDC (Mapped Diagnostic Context) information is not included by default in plain text logs. To include it, update the `PatternLayout` configuration in the `log4j2.properties` file. For example, add `%X{task_name}` to include the task name in logs. Additionally, use `spark.sparkContext.setLocalProperty("key", "value")` to add custom data to the MDC. ## Structured Logging -Starting from version 4.0.0, `spark-submit` has adopted the [JSON Template Layout](https://logging.apache.org/log4j/2.x/manual/json-template-layout.html) for logging, which outputs logs in JSON format. This format facilitates querying logs using Spark SQL with the JSON data source. Additionally, the logs include all Mapped Diagnostic Context (MDC) information for search and debugging purposes. +Starting with version 4.0.0, `spark-submit` supports optional structured logging using the [JSON Template Layout](https://logging.apache.org/log4j/2.x/manual/json-template-layout.html). This format enables efficient querying of logs with Spark SQL using the JSON data source and includes all MDC information for improved searchability and debugging. -To configure the layout of structured logging, start with the `log4j2.properties.template` file. +To enable structured logging and include MDC information, set the configuration `spark.log.structuredLogging.enabled` to `true` (default is `false`). For additional customization, copy `log4j2-json-layout.properties.template` to `conf/log4j2.properties` and adjust as needed. -To query Spark logs using Spark SQL, you can use the following code snippets: +### Querying Structured Logs with Spark SQL +To query structured logs in JSON format, use the following code snippet: **Python:** ```python @@ -3775,14 +3780,6 @@ import org.apache.spark.util.LogUtils.SPARK_LOG_SCHEMA val logDf = spark.read.schema(SPARK_LOG_SCHEMA).json("path/to/logs") ``` **Note**: If you're using the interactive shell (pyspark shell or spark-shell), you can omit the import statement in the code because SPARK_LOG_SCHEMA is already available in the shell's context. -## Plain Text Logging -If you prefer plain text logging, you have two options: -- Disable structured JSON logging by setting the Spark configuration `spark.log.structuredLogging.enabled` to `false`. -- Use a custom log4j configuration file. Rename `conf/log4j2.properties.pattern-layout-template` to `conf/log4j2.properties`. This reverts to the default configuration prior to Spark 4.0, which utilizes [PatternLayout](https://logging.apache.org/log4j/2.x/manual/layouts.html#PatternLayout) for logging all messages in plain text. - -MDC information is not included by default when with plain text logging. In order to print it in the logs, you can update the patternLayout in the file. For example, you can add `%X{task_name}` to print the task name in the logs. -Moreover, you can use `spark.sparkContext.setLocalProperty(s"mdc.$name", "value")` to add user specific data into MDC. -The key in MDC will be the string of `mdc.$name`. # Overriding configuration directory diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md index 49737392312a7..9dcf4ad8a2984 100644 --- a/docs/core-migration-guide.md +++ b/docs/core-migration-guide.md @@ -44,10 +44,6 @@ license: | - Since Spark 4.0, Spark uses the external shuffle service for deleting shuffle blocks for deallocated executors when the shuffle is no longer needed. To restore the legacy behavior, you can set `spark.shuffle.service.removeShuffle` to `false`. -- Starting with Spark 4.0, the default logging format for `spark-submit` has changed from plain text to JSON lines to improve log analysis. If you prefer plain text logs, you have two options: - - Set the Spark configuration `spark.log.structuredLogging.enabled` to `false`. For example, you can use `JDK_JAVA_OPTIONS=-Dspark.log.structuredLogging.enabled=false`. - - Use a custom log4j configuration file, such as renaming the template file `conf/log4j2.properties.pattern-layout-template` to `conf/log4j2.properties`. - - Since Spark 4.0, the MDC (Mapped Diagnostic Context) key for Spark task names in Spark logs has been changed from `mdc.taskName` to `task_name`. To use the key `mdc.taskName`, you can set `spark.log.legacyTaskNameMdc.enabled` to `true`. - Since Spark 4.0, Spark performs speculative executions less aggressively with `spark.speculation.multiplier=3` and `spark.speculation.quantile=0.9`. To restore the legacy behavior, you can set `spark.speculation.multiplier=1.5` and `spark.speculation.quantile=0.75`. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/LogQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/LogQuerySuite.scala index 873337e7a4242..861b0bf0f3945 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/LogQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/LogQuerySuite.scala @@ -33,12 +33,18 @@ class LogQuerySuite extends QueryTest with SharedSparkSession with Logging { new File(pwd + "/target/LogQuerySuite.log") } + override def beforeAll(): Unit = { + super.beforeAll() + Logging.enableStructuredLogging() + } + override def afterAll(): Unit = { super.afterAll() // Clear the log file if (logFile.exists()) { logFile.delete() } + Logging.disableStructuredLogging() } private def createTempView(viewName: String): Unit = { From 0a8798f378ca14a32dad8c48ac085f668252d7b8 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 14 Jan 2025 10:00:10 +0900 Subject: [PATCH 05/15] [SPARK-48745][INFRA][PYTHON][TESTS][FOLLOWUP] use `conda-incubator/setup-miniconda` action ### What changes were proposed in this pull request? This PR follows the PR https://github.com/apache/spark/pull/49441, use `conda-incubator/setup-miniconda` instead of manual installation. ### Why are the changes needed? Reduce complexity. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually check. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #49465 from panbingkun/SPARK-48745_FOLLOWUP. Authored-by: panbingkun Signed-off-by: Hyukjin Kwon --- .github/workflows/build_and_test.yml | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index b5f7ba7383a0b..292971092b70d 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -600,19 +600,16 @@ jobs: done - name: Install Conda for pip packaging test if: contains(matrix.modules, 'pyspark-errors') - run: | - curl -s -L "https://github.com/conda-forge/miniforge/releases/download/24.11.2-1/Miniforge3-Linux-x86_64.sh" > miniforge3.sh - bash miniforge3.sh -b -p $HOME/miniforge3 - rm miniforge3.sh + uses: conda-incubator/setup-miniconda@v3 + with: + miniforge-version: latest # Run the tests. - name: Run tests env: ${{ fromJSON(inputs.envs) }} shell: 'script -q -e -c "bash {0}"' run: | if [[ "$MODULES_TO_TEST" == *"pyspark-errors"* ]]; then - export PATH=$PATH:$HOME/miniforge3/bin - env - which conda + export PATH=$CONDA/bin:$PATH export SKIP_PACKAGING=false echo "Python Packaging Tests Enabled!" fi From 22731393069a3f180a9e719e57a694347c0ce87b Mon Sep 17 00:00:00 2001 From: Takuya Ueshin Date: Mon, 13 Jan 2025 18:22:16 -0800 Subject: [PATCH 06/15] [SPARK-50601][SQL] Support withColumns / withColumnsRenamed in subqueries ### What changes were proposed in this pull request? Supports `withColumns` / `withColumnsRenamed` in subqueries. ### Why are the changes needed? When the query is used as a subquery by adding `col.outer()`, `withColumns` or `withColumnsRenamed` doesn't work because they need analyzed plans. ### Does this PR introduce _any_ user-facing change? Yes, those APIs are available in subqueries. ### How was this patch tested? Added the related tests. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #49386 from ueshin/issues/SPARK-50601/with_columns. Lead-authored-by: Takuya Ueshin Co-authored-by: Takuya UESHIN Signed-off-by: Takuya Ueshin --- .../spark/sql/DataFrameSubquerySuite.scala | 57 ++++++-- .../sql/tests/connect/test_parity_subquery.py | 4 - python/pyspark/sql/tests/test_subquery.py | 39 +++++- .../sql/catalyst/analysis/unresolved.scala | 132 ++++++++++++++++-- .../connect/planner/SparkConnectPlanner.scala | 33 +++-- .../planner/SparkConnectPlannerSuite.scala | 33 ++--- .../scala/org/apache/spark/sql/Dataset.scala | 56 ++------ .../spark/sql/DataFrameSubquerySuite.scala | 48 ++++++- 8 files changed, 295 insertions(+), 107 deletions(-) diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameSubquerySuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameSubquerySuite.scala index 4b36d36983a5d..1d2165b668f61 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameSubquerySuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameSubquerySuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql -import org.apache.spark.{SparkException, SparkRuntimeException} +import org.apache.spark.SparkRuntimeException import org.apache.spark.sql.functions._ import org.apache.spark.sql.test.{QueryTest, RemoteSparkSession} @@ -665,15 +665,52 @@ class DataFrameSubquerySuite extends QueryTest with RemoteSparkSession { withView("t1") { val t1 = table1() - // TODO(SPARK-50601): Fix the SparkConnectPlanner to support this case - checkError( - intercept[SparkException] { - t1.withColumn("scalar", spark.range(1).select($"c1".outer() + $"c2".outer()).scalar()) - .collect() - }, - "INTERNAL_ERROR", - parameters = Map("message" -> "Found the unresolved operator: .*"), - matchPVals = true) + checkAnswer( + t1.withColumn( + "scalar", + spark + .range(1) + .select($"c1".outer() + $"c2".outer()) + .scalar()), + t1.select($"*", ($"c1" + $"c2").as("scalar"))) + + checkAnswer( + t1.withColumn( + "scalar", + spark + .range(1) + .withColumn("c1", $"c1".outer()) + .select($"c1" + $"c2".outer()) + .scalar()), + t1.select($"*", ($"c1" + $"c2").as("scalar"))) + + checkAnswer( + t1.withColumn( + "scalar", + spark + .range(1) + .select($"c1".outer().as("c1")) + .withColumn("c2", $"c2".outer()) + .select($"c1" + $"c2") + .scalar()), + t1.select($"*", ($"c1" + $"c2").as("scalar"))) + } + } + + test("subquery in withColumnsRenamed") { + withView("t1") { + val t1 = table1() + + checkAnswer( + t1.withColumn( + "scalar", + spark + .range(1) + .select($"c1".outer().as("c1"), $"c2".outer().as("c2")) + .withColumnsRenamed(Map("c1" -> "x", "c2" -> "y")) + .select($"x" + $"y") + .scalar()), + t1.select($"*", ($"c1".as("x") + $"c2".as("y")).as("scalar"))) } } diff --git a/python/pyspark/sql/tests/connect/test_parity_subquery.py b/python/pyspark/sql/tests/connect/test_parity_subquery.py index dae60a354d20a..f3225fcb7f2dd 100644 --- a/python/pyspark/sql/tests/connect/test_parity_subquery.py +++ b/python/pyspark/sql/tests/connect/test_parity_subquery.py @@ -45,10 +45,6 @@ def test_scalar_subquery_with_missing_outer_reference(self): def test_subquery_in_unpivot(self): self.check_subquery_in_unpivot(None, None) - @unittest.skip("SPARK-50601: Fix the SparkConnectPlanner to support this case") - def test_subquery_in_with_columns(self): - super().test_subquery_in_with_columns() - if __name__ == "__main__": from pyspark.sql.tests.connect.test_parity_subquery import * # noqa: F401 diff --git a/python/pyspark/sql/tests/test_subquery.py b/python/pyspark/sql/tests/test_subquery.py index 99a22d7c29664..7c63ddb69458e 100644 --- a/python/pyspark/sql/tests/test_subquery.py +++ b/python/pyspark/sql/tests/test_subquery.py @@ -939,7 +939,44 @@ def test_subquery_in_with_columns(self): .select(sf.col("c1").outer() + sf.col("c2").outer()) .scalar(), ), - t1.withColumn("scalar", sf.col("c1") + sf.col("c2")), + t1.select("*", (sf.col("c1") + sf.col("c2")).alias("scalar")), + ) + assertDataFrameEqual( + t1.withColumn( + "scalar", + self.spark.range(1) + .withColumn("c1", sf.col("c1").outer()) + .select(sf.col("c1") + sf.col("c2").outer()) + .scalar(), + ), + t1.select("*", (sf.col("c1") + sf.col("c2")).alias("scalar")), + ) + assertDataFrameEqual( + t1.withColumn( + "scalar", + self.spark.range(1) + .select(sf.col("c1").outer().alias("c1")) + .withColumn("c2", sf.col("c2").outer()) + .select(sf.col("c1") + sf.col("c2")) + .scalar(), + ), + t1.select("*", (sf.col("c1") + sf.col("c2")).alias("scalar")), + ) + + def test_subquery_in_with_columns_renamed(self): + with self.tempView("t1"): + t1 = self.table1() + + assertDataFrameEqual( + t1.withColumn( + "scalar", + self.spark.range(1) + .select(sf.col("c1").outer().alias("c1"), sf.col("c2").outer().alias("c2")) + .withColumnsRenamed({"c1": "x", "c2": "y"}) + .select(sf.col("x") + sf.col("y")) + .scalar(), + ), + t1.select("*", (sf.col("c1").alias("x") + sf.col("c2").alias("y")).alias("scalar")), ) def test_subquery_in_drop(self): diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala index b47af90c651a6..fabe551d054ca 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala @@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLId import org.apache.spark.sql.connector.catalog.TableWritePrivilege import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} import org.apache.spark.sql.types.{DataType, Metadata, StructType} -import org.apache.spark.sql.util.CaseInsensitiveStringMap +import org.apache.spark.sql.util.{CaseInsensitiveStringMap, SchemaUtils} import org.apache.spark.util.ArrayImplicits._ /** @@ -429,7 +429,7 @@ object UnresolvedFunction { * Represents all of the input attributes to a given relational operator, for example in * "SELECT * FROM ...". A [[Star]] gets automatically expanded during analysis. */ -abstract class Star extends LeafExpression with NamedExpression { +trait Star extends NamedExpression { override def name: String = throw new UnresolvedException("name") override def exprId: ExprId = throw new UnresolvedException("exprId") @@ -451,15 +451,20 @@ abstract class Star extends LeafExpression with NamedExpression { * This is also used to expand structs. For example: * "SELECT record.* from (SELECT struct(a,b,c) as record ...) * - * @param target an optional name that should be the target of the expansion. If omitted all - * targets' columns are produced. This can either be a table name or struct name. This - * is a list of identifiers that is the path of the expansion. - * - * This class provides the shared behavior between the classes for SELECT * ([[UnresolvedStar]]) - * and SELECT * EXCEPT ([[UnresolvedStarExceptOrReplace]]). [[UnresolvedStar]] is just a case class - * of this, while [[UnresolvedStarExceptOrReplace]] adds some additional logic to the expand method. + * This trait provides the shared behavior among the classes for SELECT * ([[UnresolvedStar]]) + * and SELECT * EXCEPT ([[UnresolvedStarExceptOrReplace]]), etc. [[UnresolvedStar]] is just a case + * class of this, while [[UnresolvedStarExceptOrReplace]] or other classes add some additional logic + * to the expand method. */ -abstract class UnresolvedStarBase(target: Option[Seq[String]]) extends Star with Unevaluable { +trait UnresolvedStarBase extends Star with Unevaluable { + + /** + * An optional name that should be the target of the expansion. If omitted all + * targets' columns are produced. This can either be a table name or struct name. This + * is a list of identifiers that is the path of the expansion. + */ + def target: Option[Seq[String]] + /** * Returns true if the nameParts is a subset of the last elements of qualifier of the attribute. * @@ -583,7 +588,7 @@ case class UnresolvedStarExceptOrReplace( target: Option[Seq[String]], excepts: Seq[Seq[String]], replacements: Option[Seq[NamedExpression]]) - extends UnresolvedStarBase(target) { + extends LeafExpression with UnresolvedStarBase { /** * We expand the * EXCEPT by the following three steps: @@ -712,6 +717,103 @@ case class UnresolvedStarExceptOrReplace( } } +/** + * Represents some of the input attributes to a given relational operator, for example in + * `df.withColumn`. + * + * @param colNames a list of column names that should be replaced or produced. + * + * @param exprs the corresponding expressions for `colNames`. + * + * @param explicitMetadata an optional list of explicit metadata to associate with the columns. + */ +case class UnresolvedStarWithColumns( + colNames: Seq[String], + exprs: Seq[Expression], + explicitMetadata: Option[Seq[Metadata]] = None) + extends UnresolvedStarBase { + + override def target: Option[Seq[String]] = None + override def children: Seq[Expression] = exprs + + override protected def withNewChildrenInternal( + newChildren: IndexedSeq[Expression]): UnresolvedStarWithColumns = + copy(exprs = newChildren) + + override def expand(input: LogicalPlan, resolver: Resolver): Seq[NamedExpression] = { + assert(colNames.size == exprs.size, + s"The size of column names: ${colNames.size} isn't equal to " + + s"the size of expressions: ${exprs.size}") + explicitMetadata.foreach { m => + assert(colNames.size == m.size, + s"The size of column names: ${colNames.size} isn't equal to " + + s"the size of metadata elements: ${m.size}") + } + + SchemaUtils.checkColumnNameDuplication(colNames, resolver) + + val expandedCols = super.expand(input, resolver) + + val columnSeq = explicitMetadata match { + case Some(ms) => colNames.zip(exprs).zip(ms.map(Some(_))) + case _ => colNames.zip(exprs).map((_, None)) + } + + val replacedAndExistingColumns = expandedCols.map { field => + columnSeq.find { case ((colName, _), _) => + resolver(field.name, colName) + } match { + case Some(((colName, expr), m)) => Alias(expr, colName)(explicitMetadata = m) + case _ => field + } + } + + val newColumns = columnSeq.filter { case ((colName, _), _) => + !expandedCols.exists(f => resolver(f.name, colName)) + }.map { + case ((colName, expr), m) => Alias(expr, colName)(explicitMetadata = m) + } + + replacedAndExistingColumns ++ newColumns + } +} + +/** + * Represents some of the input attributes to a given relational operator, for example in + * `df.withColumnRenamed`. + * + * @param existingNames a list of column names that should be replaced. + * If the column does not exist, it is ignored. + * + * @param newNames a list of new column names that should be used to replace the existing columns. + */ +case class UnresolvedStarWithColumnsRenames( + existingNames: Seq[String], + newNames: Seq[String]) + extends LeafExpression with UnresolvedStarBase { + + override def target: Option[Seq[String]] = None + + override def expand(input: LogicalPlan, resolver: Resolver): Seq[NamedExpression] = { + assert(existingNames.size == newNames.size, + s"The size of existing column names: ${existingNames.size} isn't equal to " + + s"the size of new column names: ${newNames.size}") + + val expandedCols = super.expand(input, resolver) + + existingNames.zip(newNames).foldLeft(expandedCols) { + case (attrs, (existingName, newName)) => + attrs.map(attr => + if (resolver(attr.name, existingName)) { + Alias(attr, newName)() + } else { + attr + } + ) + } + } +} + /** * Represents all of the input attributes to a given relational operator, for example in * "SELECT * FROM ...". @@ -723,7 +825,8 @@ case class UnresolvedStarExceptOrReplace( * targets' columns are produced. This can either be a table name or struct name. This * is a list of identifiers that is the path of the expansion. */ -case class UnresolvedStar(target: Option[Seq[String]]) extends UnresolvedStarBase(target) +case class UnresolvedStar(target: Option[Seq[String]]) + extends LeafExpression with UnresolvedStarBase /** * Represents all of the input attributes to a given relational operator, for example in @@ -733,7 +836,7 @@ case class UnresolvedStar(target: Option[Seq[String]]) extends UnresolvedStarBas * tables' columns are produced. */ case class UnresolvedRegex(regexPattern: String, table: Option[String], caseSensitive: Boolean) - extends Star with Unevaluable { + extends LeafExpression with Star with Unevaluable { override def expand(input: LogicalPlan, resolver: Resolver): Seq[NamedExpression] = { val pattern = if (caseSensitive) regexPattern else s"(?i)$regexPattern" table match { @@ -791,7 +894,8 @@ case class MultiAlias(child: Expression, names: Seq[String]) * * @param expressions Expressions to expand. */ -case class ResolvedStar(expressions: Seq[NamedExpression]) extends Star with Unevaluable { +case class ResolvedStar(expressions: Seq[NamedExpression]) + extends LeafExpression with Star with Unevaluable { override def newInstance(): NamedExpression = throw new UnresolvedException("newInstance") override def expand(input: LogicalPlan, resolver: Resolver): Seq[NamedExpression] = expressions override def toString: String = expressions.mkString("ResolvedStar(", ", ", ")") diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala index 6ab69aea12e5d..acbbeb49b267b 100644 --- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala +++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala @@ -45,7 +45,7 @@ import org.apache.spark.internal.LogKeys.{DATAFRAME_ID, SESSION_ID} import org.apache.spark.resource.{ExecutorResourceRequest, ResourceProfile, TaskResourceProfile, TaskResourceRequest} import org.apache.spark.sql.{Column, Dataset, Encoders, ForeachWriter, Observation, RelationalGroupedDataset, Row, SparkSession} import org.apache.spark.sql.catalyst.{expressions, AliasIdentifier, FunctionIdentifier, QueryPlanningTracker} -import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, GlobalTempView, LazyExpression, LocalTempView, MultiAlias, NameParameterizedQuery, PosParameterizedQuery, UnresolvedAlias, UnresolvedAttribute, UnresolvedDataFrameStar, UnresolvedDeserializer, UnresolvedExtractValue, UnresolvedFunction, UnresolvedPlanId, UnresolvedRegex, UnresolvedRelation, UnresolvedStar, UnresolvedSubqueryColumnAliases, UnresolvedTableValuedFunction, UnresolvedTranspose} +import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, GlobalTempView, LazyExpression, LocalTempView, MultiAlias, NameParameterizedQuery, PosParameterizedQuery, UnresolvedAlias, UnresolvedAttribute, UnresolvedDataFrameStar, UnresolvedDeserializer, UnresolvedExtractValue, UnresolvedFunction, UnresolvedPlanId, UnresolvedRegex, UnresolvedRelation, UnresolvedStar, UnresolvedStarWithColumns, UnresolvedStarWithColumnsRenames, UnresolvedSubqueryColumnAliases, UnresolvedTableValuedFunction, UnresolvedTranspose} import org.apache.spark.sql.catalyst.encoders.{encoderFor, AgnosticEncoder, ExpressionEncoder, RowEncoder} import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.UnboundRowEncoder import org.apache.spark.sql.catalyst.expressions._ @@ -1065,25 +1065,21 @@ class SparkConnectPlanner( } private def transformWithColumnsRenamed(rel: proto.WithColumnsRenamed): LogicalPlan = { - if (rel.getRenamesCount > 0) { - val (colNames, newColNames) = rel.getRenamesList.asScala.toSeq.map { rename => + val (colNames, newColNames) = if (rel.getRenamesCount > 0) { + rel.getRenamesList.asScala.toSeq.map { rename => (rename.getColName, rename.getNewColName) }.unzip - Dataset - .ofRows(session, transformRelation(rel.getInput)) - .withColumnsRenamed(colNames, newColNames) - .logicalPlan } else { // for backward compatibility - Dataset - .ofRows(session, transformRelation(rel.getInput)) - .withColumnsRenamed(rel.getRenameColumnsMapMap) - .logicalPlan + rel.getRenameColumnsMapMap.asScala.toSeq.unzip } + Project( + Seq(UnresolvedStarWithColumnsRenames(existingNames = colNames, newNames = newColNames)), + transformRelation(rel.getInput)) } private def transformWithColumns(rel: proto.WithColumns): LogicalPlan = { - val (colNames, cols, metadata) = + val (colNames, exprs, metadata) = rel.getAliasesList.asScala.toSeq.map { alias => if (alias.getNameCount != 1) { throw InvalidPlanInput(s"""WithColumns require column name only contains one name part, @@ -1096,13 +1092,16 @@ class SparkConnectPlanner( Metadata.empty } - (alias.getName(0), Column(transformExpression(alias.getExpr)), metadata) + (alias.getName(0), transformExpression(alias.getExpr), metadata) }.unzip3 - Dataset - .ofRows(session, transformRelation(rel.getInput)) - .withColumns(colNames, cols, metadata) - .logicalPlan + Project( + Seq( + UnresolvedStarWithColumns( + colNames = colNames, + exprs = exprs, + explicitMetadata = Some(metadata))), + transformRelation(rel.getInput)) } private def transformWithWatermark(rel: proto.WithWatermark): LogicalPlan = { diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala index aaeb5d9fe509a..054a32179935d 100644 --- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala +++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala @@ -504,26 +504,27 @@ class SparkConnectPlannerSuite extends SparkFunSuite with SparkConnectPlanTest { } test("Test duplicated names in WithColumns") { - intercept[AnalysisException] { - transform( - proto.Relation - .newBuilder() - .setWithColumns( - proto.WithColumns - .newBuilder() - .setInput(readRel) - .addAliases(proto.Expression.Alias + val logical = transform( + proto.Relation + .newBuilder() + .setWithColumns( + proto.WithColumns + .newBuilder() + .setInput(readRel) + .addAliases( + proto.Expression.Alias .newBuilder() .addName("test") .setExpr(proto.Expression.newBuilder .setLiteral(proto.Expression.Literal.newBuilder.setInteger(32)))) - .addAliases(proto.Expression.Alias - .newBuilder() - .addName("test") - .setExpr(proto.Expression.newBuilder - .setLiteral(proto.Expression.Literal.newBuilder.setInteger(32))))) - .build()) - } + .addAliases(proto.Expression.Alias + .newBuilder() + .addName("test") + .setExpr(proto.Expression.newBuilder + .setLiteral(proto.Expression.Literal.newBuilder.setInteger(32))))) + .build()) + + intercept[AnalysisException](Dataset.ofRows(spark, logical)) } test("Test multi nameparts for column names in WithColumns") { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index e4e782a50e3d4..e41521cba533a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -1275,29 +1275,14 @@ class Dataset[T] private[sql]( require(colNames.size == cols.size, s"The size of column names: ${colNames.size} isn't equal to " + s"the size of columns: ${cols.size}") - SchemaUtils.checkColumnNameDuplication( - colNames, - sparkSession.sessionState.conf.caseSensitiveAnalysis) - - val resolver = sparkSession.sessionState.analyzer.resolver - val output = queryExecution.analyzed.output - - val columnSeq = colNames.zip(cols) - - val replacedAndExistingColumns = output.map { field => - columnSeq.find { case (colName, _) => - resolver(field.name, colName) - } match { - case Some((colName: String, col: Column)) => col.as(colName) - case _ => Column(field) - } + withPlan { + Project( + Seq( + UnresolvedStarWithColumns( + colNames = colNames, + exprs = cols.map(_.expr))), + logicalPlan) } - - val newColumns = columnSeq.filter { case (colName, col) => - !output.exists(f => resolver(f.name, colName)) - }.map { case (colName, col) => col.as(colName) } - - select(replacedAndExistingColumns ++ newColumns : _*) } /** @inheritdoc */ @@ -1324,26 +1309,13 @@ class Dataset[T] private[sql]( require(colNames.size == newColNames.size, s"The size of existing column names: ${colNames.size} isn't equal to " + s"the size of new column names: ${newColNames.size}") - - val resolver = sparkSession.sessionState.analyzer.resolver - val output: Seq[NamedExpression] = queryExecution.analyzed.output - var shouldRename = false - - val projectList = colNames.zip(newColNames).foldLeft(output) { - case (attrs, (existingName, newName)) => - attrs.map(attr => - if (resolver(attr.name, existingName)) { - shouldRename = true - Alias(attr, newName)() - } else { - attr - } - ) - } - if (shouldRename) { - withPlan(Project(projectList, logicalPlan)) - } else { - toDF() + withPlan { + Project( + Seq( + UnresolvedStarWithColumnsRenames( + existingNames = colNames, + newNames = newColNames)), + logicalPlan) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSubquerySuite.scala index fdfb909d9ba73..621d468454d40 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSubquerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSubquerySuite.scala @@ -777,9 +777,51 @@ class DataFrameSubquerySuite extends QueryTest with SharedSparkSession { val t1 = table1() checkAnswer( - t1.withColumn("scalar", spark.range(1).select($"c1".outer() + $"c2".outer()).scalar()), - t1.withColumn("scalar", $"c1" + $"c2") - ) + t1.withColumn( + "scalar", + spark + .range(1) + .select($"c1".outer() + $"c2".outer()) + .scalar()), + t1.select($"*", ($"c1" + $"c2").as("scalar"))) + + checkAnswer( + t1.withColumn( + "scalar", + spark + .range(1) + .withColumn("c1", $"c1".outer()) + .select($"c1" + $"c2".outer()) + .scalar()), + t1.select($"*", ($"c1" + $"c2").as("scalar"))) + + checkAnswer( + t1.withColumn( + "scalar", + spark + .range(1) + .select($"c1".outer().as("c1")) + .withColumn("c2", $"c2".outer()) + .select($"c1" + $"c2") + .scalar()), + t1.select($"*", ($"c1" + $"c2").as("scalar"))) + } + } + + test("subquery in withColumnsRenamed") { + withView("t1") { + val t1 = table1() + + checkAnswer( + t1.withColumn( + "scalar", + spark + .range(1) + .select($"c1".outer().as("c1"), $"c2".outer().as("c2")) + .withColumnsRenamed(Map("c1" -> "x", "c2" -> "y")) + .select($"x" + $"y") + .scalar()), + t1.select($"*", ($"c1".as("x") + $"c2".as("y")).as("scalar"))) } } From 8858a4c9628f349986e093a93737b2eaed7e8833 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 14 Jan 2025 11:23:22 +0900 Subject: [PATCH 07/15] [SPARK-47081][CONNECT][FOLLOW-UP] Respect `spark.connect.progress.reportInterval` over timeout ### What changes were proposed in this pull request? This PR is a followup that addresses https://github.com/apache/spark/pull/45150#discussion_r1913310090 ### Why are the changes needed? To respect `spark.connect.progress.reportInterval` ### Does this PR introduce _any_ user-facing change? Virtually no. In corner case, it the progress upgrade might take longer than `spark.connect.progress.reportInterval`. ### How was this patch tested? Manually tested. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #49474 from HyukjinKwon/SPARK-47081-followup3. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon --- .../sql/connect/execution/ExecuteGrpcResponseSender.scala | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala index 44b634af95ca9..72c2b0e3f1095 100644 --- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala +++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala @@ -241,14 +241,13 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message]( // The state of interrupted, response and lastIndex are changed under executionObserver // monitor, and will notify upon state change. if (response.isEmpty) { + val timeout = Math.max(1, deadlineTimeMillis - System.currentTimeMillis()) // Wake up more frequently to send the progress updates. val progressTimeout = executeHolder.sessionHolder.session.sessionState.conf .getConf(CONNECT_PROGRESS_REPORT_INTERVAL) // If the progress feature is disabled, wait for the deadline. - val timeout = if (progressTimeout > 0) { - progressTimeout - } else { - Math.max(1, deadlineTimeMillis - System.currentTimeMillis()) + if (progressTimeout > 0L) { + Math.min(progressTimeout, timeout) } logTrace(s"Wait for response to become available with timeout=$timeout ms.") executionObserver.responseLock.wait(timeout) From e62697fc585ad5f06878d44f859a9f2fefe4d021 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Mon, 13 Jan 2025 18:51:23 -0800 Subject: [PATCH 08/15] [SPARK-50805][CORE] Move method `nameForAppAndAttempt` to `o.a.s.u.Utils` ### What changes were proposed in this pull request? Pure refactor, move method `nameForAppAndAttempt` from `EventLogFileWriter` to `o.a.s.u.Utils`. ### Why are the changes needed? The method could be reused in several other places, e.g. https://github.com/apache/spark/pull/49440 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass GHA. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #49476 from pan3793/SPARK-50805. Authored-by: Cheng Pan Signed-off-by: Dongjoon Hyun --- .../spark/deploy/history/EventLogFileWriters.scala | 7 +------ core/src/main/scala/org/apache/spark/util/Utils.scala | 9 +++++++++ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileWriters.scala b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileWriters.scala index f3bb6d5af3358..990ab680f3aaf 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileWriters.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileWriters.scala @@ -187,12 +187,7 @@ object EventLogFileWriter { } def nameForAppAndAttempt(appId: String, appAttemptId: Option[String]): String = { - val base = Utils.sanitizeDirName(appId) - if (appAttemptId.isDefined) { - base + "_" + Utils.sanitizeDirName(appAttemptId.get) - } else { - base - } + Utils.nameForAppAndAttempt(appId, appAttemptId) } def codecName(log: Path): Option[String] = { diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 536c6b4447aac..1efe181a8c38a 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -2954,6 +2954,15 @@ private[spark] object Utils str.replaceAll("[ :/]", "-").replaceAll("[.${}'\"]", "_").toLowerCase(Locale.ROOT) } + def nameForAppAndAttempt(appId: String, appAttemptId: Option[String]): String = { + val base = sanitizeDirName(appId) + if (appAttemptId.isDefined) { + base + "_" + sanitizeDirName(appAttemptId.get) + } else { + base + } + } + def isClientMode(conf: SparkConf): Boolean = { "client".equals(conf.get(SparkLauncher.DEPLOY_MODE, "client")) } From 0e51dba278000a7cc8010f1852ecb45923d56439 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 14 Jan 2025 11:05:02 +0800 Subject: [PATCH 09/15] [SPARK-50633][FOLLOWUP] Set `CODECOV_TOKEN` with environment variables ### What changes were proposed in this pull request? The pr aims to set `CODECOV_TOKEN` with `environment variables` for `codecov/codecov-action`. ### Why are the changes needed? Based on the suggestions of the `codecov/codecov-action` community, we will try setting it up in a different way to see if `codecov/codecov-action` can succeed. https://github.com/codecov/codecov-action/issues/1738#issuecomment-2588783885 image ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually check. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #49477 from panbingkun/SPARK-50633_FOLLOWUP_1. Authored-by: panbingkun Signed-off-by: panbingkun --- .github/workflows/build_and_test.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 292971092b70d..8040169fcb2c8 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -622,9 +622,10 @@ jobs: - name: Upload coverage to Codecov if: fromJSON(inputs.envs).PYSPARK_CODECOV == 'true' uses: codecov/codecov-action@v5 + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} with: files: ./python/coverage.xml - token: ${{ secrets.CODECOV_TOKEN }} flags: unittests name: PySpark verbose: true From e945a9031d4eb4f9893f9b1a744646c2cfb214c0 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Mon, 13 Jan 2025 19:21:34 -0800 Subject: [PATCH 10/15] [SPARK-50783] Canonicalize JVM profiler results file name and layout on DFS ### What changes were proposed in this pull request? This PR canonicalizes the JVM profiler added in SPARK-46094 profiling result files on DFS to ``` dfsDir/{{APP_ID}}/profile-exec-{{EXECUTOR_ID}}.jfr ``` which majorly follows the event logs file name pattern and layout. ### Why are the changes needed? According to https://github.com/apache/spark/pull/44021#issuecomment-1863873954, we can integrate the profiling results with Spark UI (both live and history) in the future, so it's good to follow the event logs file name pattern and layout as much as possible. ### Does this PR introduce _any_ user-facing change? No, it's an unreleased feature. ### How was this patch tested? ``` $ bin/spark-submit run-example \ --master yarn \ --deploy-mode cluster \ --conf spark.plugins=org.apache.spark.executor.profiler.ExecutorProfilerPlugin \ --conf spark.executor.profiling.enabled=true \ --conf spark.executor.profiling.dfsDir=hdfs:///spark-profiling \ --conf spark.executor.profiling.fraction=1 \ SparkPi 100000 ``` ``` hadoopspark-dev1:~/spark$ hadoop fs -ls /spark-profiling/ Found 1 items drwxrwx--- - hadoop supergroup 0 2025-01-13 10:29 /spark-profiling/application_1736320707252_0023_1 ``` ``` hadoopspark-dev1:~/spark$ hadoop fs -ls /spark-profiling/application_1736320707252_0023_1 Found 48 items -rw-rw---- 3 hadoop supergroup 5255028 2025-01-13 10:29 /spark-profiling/application_1736320707252_0023_1/profile-exec-1.jfr -rw-rw---- 3 hadoop supergroup 3840775 2025-01-13 10:29 /spark-profiling/application_1736320707252_0023_1/profile-exec-10.jfr -rw-rw---- 3 hadoop supergroup 3889002 2025-01-13 10:29 /spark-profiling/application_1736320707252_0023_1/profile-exec-11.jfr -rw-rw---- 3 hadoop supergroup 3570697 2025-01-13 10:29 /spark-profiling/application_1736320707252_0023_1/profile-exec-12.jfr ... ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes #49440 from pan3793/SPARK-50783. Authored-by: Cheng Pan Signed-off-by: Dongjoon Hyun --- connector/profiler/README.md | 6 +- .../profiler/ExecutorJVMProfiler.scala | 77 +++++++++++-------- 2 files changed, 50 insertions(+), 33 deletions(-) diff --git a/connector/profiler/README.md b/connector/profiler/README.md index 1326fd55df097..4d97b15eb96ab 100644 --- a/connector/profiler/README.md +++ b/connector/profiler/README.md @@ -16,7 +16,7 @@ The profiler writes the jfr files to the executor's working directory in the exe Code profiling is currently only supported for * Linux (x64) -* Linux (arm 64) +* Linux (arm64) * Linux (musl, x64) * MacOS @@ -54,7 +54,7 @@ Then enable the profiling in the configuration. spark.executor.profiling.dfsDir (none) - An HDFS compatible path to which the profiler's output files are copied. The output files will be written as dfsDir/application_id/profile-appname-exec-executor_id.jfr
+ An HDFS compatible path to which the profiler's output files are copied. The output files will be written as dfsDir/{{APP_ID}}/profile-exec-{{EXECUTOR_ID}}.jfr
If no dfsDir is specified then the files are not copied over. Users should ensure there is sufficient disk space available otherwise it may lead to corrupt jfr files. 4.0.0 @@ -72,7 +72,7 @@ Then enable the profiling in the configuration. event=wall,interval=10ms,alloc=2m,lock=10ms,chunktime=300s Options to pass to the profiler. Detailed options are documented in the comments here: - Profiler arguments. + Profiler arguments. Note that the options to start, stop, specify output format, and output file do not have to be specified. 4.0.0 diff --git a/connector/profiler/src/main/scala/org/apache/spark/executor/profiler/ExecutorJVMProfiler.scala b/connector/profiler/src/main/scala/org/apache/spark/executor/profiler/ExecutorJVMProfiler.scala index 20b6db5221fa9..94e5b46c65881 100644 --- a/connector/profiler/src/main/scala/org/apache/spark/executor/profiler/ExecutorJVMProfiler.scala +++ b/connector/profiler/src/main/scala/org/apache/spark/executor/profiler/ExecutorJVMProfiler.scala @@ -17,17 +17,17 @@ package org.apache.spark.executor.profiler import java.io.{BufferedInputStream, FileInputStream, InputStream, IOException} -import java.net.URI import java.util.concurrent.{ScheduledExecutorService, TimeUnit} import one.profiler.{AsyncProfiler, AsyncProfilerLoader} import org.apache.hadoop.fs.{FileSystem, FSDataOutputStream, Path} +import org.apache.hadoop.fs.permission.FsPermission import org.apache.spark.SparkConf import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.internal.{Logging, MDC} import org.apache.spark.internal.LogKeys.PATH -import org.apache.spark.util.ThreadUtils +import org.apache.spark.util.{ThreadUtils, Utils} /** @@ -38,15 +38,26 @@ private[spark] class ExecutorJVMProfiler(conf: SparkConf, executorId: String) ex private var running = false private val enableProfiler = conf.get(EXECUTOR_PROFILING_ENABLED) private val profilerOptions = conf.get(EXECUTOR_PROFILING_OPTIONS) - private val profilerDfsDir = conf.get(EXECUTOR_PROFILING_DFS_DIR) + private val profilerDfsDirOpt = conf.get(EXECUTOR_PROFILING_DFS_DIR) private val profilerLocalDir = conf.get(EXECUTOR_PROFILING_LOCAL_DIR) private val writeInterval = conf.get(EXECUTOR_PROFILING_WRITE_INTERVAL) - private val startcmd = s"start,$profilerOptions,file=$profilerLocalDir/profile.jfr" - private val stopcmd = s"stop,$profilerOptions,file=$profilerLocalDir/profile.jfr" - private val dumpcmd = s"dump,$profilerOptions,file=$profilerLocalDir/profile.jfr" - private val resumecmd = s"resume,$profilerOptions,file=$profilerLocalDir/profile.jfr" + private val appId = try { + conf.getAppId + } catch { + case _: NoSuchElementException => "local-" + System.currentTimeMillis + } + private val appAttemptId = conf.getOption("spark.app.attempt.id") + private val baseName = Utils.nameForAppAndAttempt(appId, appAttemptId) + private val profileFile = s"profile-exec-$executorId.jfr" + + private val startcmd = s"start,$profilerOptions,file=$profilerLocalDir/$profileFile" + private val stopcmd = s"stop,$profilerOptions,file=$profilerLocalDir/$profileFile" + private val dumpcmd = s"dump,$profilerOptions,file=$profilerLocalDir/$profileFile" + private val resumecmd = s"resume,$profilerOptions,file=$profilerLocalDir/$profileFile" + private val PROFILER_FOLDER_PERMISSIONS = new FsPermission(Integer.parseInt("770", 8).toShort) + private val PROFILER_FILE_PERMISSIONS = new FsPermission(Integer.parseInt("660", 8).toShort) private val UPLOAD_SIZE = 8 * 1024 * 1024 // 8 MB private var outputStream: FSDataOutputStream = _ private var inputStream: InputStream = _ @@ -89,28 +100,34 @@ private[spark] class ExecutorJVMProfiler(conf: SparkConf, executorId: String) ex } } + private def requireProfilerBaseDirAsDirectory(fs: FileSystem, profilerDfsDir: String): Unit = { + if (!fs.getFileStatus(new Path(profilerDfsDir)).isDirectory) { + throw new IllegalArgumentException( + s"Profiler DFS base directory $profilerDfsDir is not a directory.") + } + } + private def startWriting(): Unit = { - if (profilerDfsDir.isDefined) { - val applicationId = try { - conf.getAppId - } catch { - case _: NoSuchElementException => "local-" + System.currentTimeMillis + profilerDfsDirOpt.foreach { profilerDfsDir => + val profilerDirForApp = s"$profilerDfsDir/$baseName" + val profileOutputFile = s"$profilerDirForApp/$profileFile" + + val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf) + val fs = Utils.getHadoopFileSystem(profilerDfsDir, hadoopConf) + + requireProfilerBaseDirAsDirectory(fs, profilerDfsDir) + + val profilerDirForAppPath = new Path(profilerDirForApp) + if (!fs.exists(profilerDirForAppPath)) { + // SPARK-30860: use the class method to avoid the umask causing permission issues + FileSystem.mkdirs(fs, profilerDirForAppPath, PROFILER_FOLDER_PERMISSIONS) } - val config = SparkHadoopUtil.get.newConfiguration(conf) - val appName = conf.get("spark.app.name").replace(" ", "-") - val profilerOutputDirname = profilerDfsDir.get - - val profileOutputFile = - s"$profilerOutputDirname/$applicationId/profile-$appName-exec-$executorId.jfr" - val fs = FileSystem.get(new URI(profileOutputFile), config); - val filenamePath = new Path(profileOutputFile) - outputStream = fs.create(filenamePath) + + outputStream = FileSystem.create(fs, new Path(profileOutputFile), PROFILER_FILE_PERMISSIONS) try { - if (fs.exists(filenamePath)) { - fs.delete(filenamePath, true) - } logInfo(log"Copying executor profiling file to ${MDC(PATH, profileOutputFile)}") - inputStream = new BufferedInputStream(new FileInputStream(s"$profilerLocalDir/profile.jfr")) + inputStream = new BufferedInputStream( + new FileInputStream(s"$profilerLocalDir/$profileFile")) threadpool = ThreadUtils.newDaemonSingleThreadScheduledExecutor("profilerOutputThread") threadpool.scheduleWithFixedDelay( new Runnable() { @@ -158,14 +175,14 @@ private[spark] class ExecutorJVMProfiler(conf: SparkConf, executorId: String) ex } catch { case e: IOException => logError("Exception occurred while writing some profiler output: ", e) case e @ (_: IllegalArgumentException | _: IllegalStateException) => - logError("Some profiler output not written." + - " Exception occurred in profiler native code: ", e) + logError("Some profiler output not written. " + + "Exception occurred in profiler native code: ", e) case e: Exception => logError("Some profiler output not written. Unexpected exception: ", e) } } private def finishWriting(): Unit = { - if (profilerDfsDir.isDefined && writing) { + if (profilerDfsDirOpt.isDefined && writing) { try { // shutdown background writer threadpool.shutdown() @@ -177,8 +194,8 @@ private[spark] class ExecutorJVMProfiler(conf: SparkConf, executorId: String) ex } catch { case _: InterruptedException => Thread.currentThread().interrupt() case e: IOException => - logWarning("Some profiling output not written." + - "Exception occurred while completing profiler output", e) + logWarning("Some profiling output not written. " + + "Exception occurred while completing profiler output: ", e) } writing = false } From ebe39f6adc8bee841c79946a438d2a9ea191ac03 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Mon, 13 Jan 2025 19:23:45 -0800 Subject: [PATCH 11/15] [MINOR][DOCS] Fix the examples of createDataFrame ### What changes were proposed in this pull request? Fix the examples of createDataFrame `collect` -> `show` ### Why are the changes needed? existing examples generate different outputs ### Does this PR introduce _any_ user-facing change? doc only changes ### How was this patch tested? manually test in `bin/pyspark` ### Was this patch authored or co-authored using generative AI tooling? no Closes #49475 from zhengruifeng/py_doc_create_df. Authored-by: Ruifeng Zheng Signed-off-by: Dongjoon Hyun --- python/pyspark/sql/session.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index 5ab186b2957e7..f5bb269c23d6e 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -1467,7 +1467,9 @@ def createDataFrame( # type: ignore[misc] +-----+---+ |Alice| 1| +-----+---+ - >>> spark.createDataFrame(pandas.DataFrame([[1, 2]])).collect() # doctest: +SKIP + + >>> pdf = pandas.DataFrame([[1, 2]]) # doctest: +SKIP + >>> spark.createDataFrame(pdf).show() # doctest: +SKIP +---+---+ | 0| 1| +---+---+ @@ -1482,8 +1484,9 @@ def createDataFrame( # type: ignore[misc] +-----+---+ |Alice| 1| +-----+---+ + >>> table = pyarrow.table({'0': [1], '1': [2]}) # doctest: +SKIP - >>> spark.createDataFrame(table).collect() # doctest: +SKIP + >>> spark.createDataFrame(table).show() # doctest: +SKIP +---+---+ | 0| 1| +---+---+ From b89082b4190d541a2c0084319e5e4a006a332e79 Mon Sep 17 00:00:00 2001 From: Stefan Kandic Date: Tue, 14 Jan 2025 12:32:39 +0900 Subject: [PATCH 12/15] [SPARK-50774][SQL] Centralize collation names in one place ### What changes were proposed in this pull request? Made a new class to put most commonly used collation names and refactor `CollationSupportSuite` which had thousands of literals for the four most used collations (UTF8_BINARY, UTF8_LCASE, UNICODE and UNICODE_CI) ### Why are the changes needed? To be able to have this centralized and not have to create new string literals with "UTF8_BINARY" over and over again. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Should be covered with existing tests. ### Was this patch authored or co-authored using generative AI tooling? Closes #49425 from stefankandic/makeCollationNamesPublic. Authored-by: Stefan Kandic Signed-off-by: Hyukjin Kwon --- .../sql/catalyst/util/CollationFactory.java | 17 +- .../sql/catalyst/util/CollationNames.java | 25 + .../unsafe/types/CollationSupportSuite.java | 5925 +++++++++-------- .../apache/spark/sql/internal/SQLConf.scala | 4 +- 4 files changed, 2997 insertions(+), 2974 deletions(-) create mode 100644 common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationNames.java diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java index ce416293131a1..81448dc95a374 100644 --- a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java +++ b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java @@ -467,9 +467,6 @@ private enum CaseSensitivity { */ private static final int CASE_SENSITIVITY_MASK = 0b1; - private static final String UTF8_BINARY_COLLATION_NAME = "UTF8_BINARY"; - private static final String UTF8_LCASE_COLLATION_NAME = "UTF8_LCASE"; - private static final int UTF8_BINARY_COLLATION_ID = new CollationSpecUTF8(CaseSensitivity.UNSPECIFIED, SpaceTrimming.NONE).collationId; private static final int UTF8_LCASE_COLLATION_ID = @@ -655,9 +652,9 @@ protected CollationMeta buildCollationMeta() { protected String normalizedCollationName() { StringBuilder builder = new StringBuilder(); if(caseSensitivity == CaseSensitivity.UNSPECIFIED){ - builder.append(UTF8_BINARY_COLLATION_NAME); + builder.append(CollationNames.UTF8_BINARY); } else{ - builder.append(UTF8_LCASE_COLLATION_NAME); + builder.append(CollationNames.UTF8_LCASE); } if (spaceTrimming != SpaceTrimming.NONE) { builder.append('_'); @@ -669,12 +666,12 @@ protected String normalizedCollationName() { static List listCollations() { CollationIdentifier UTF8_BINARY_COLLATION_IDENT = new CollationIdentifier( PROVIDER_SPARK, - UTF8_BINARY_COLLATION_NAME, + CollationNames.UTF8_BINARY, CollationSpecICU.ICU_VERSION ); CollationIdentifier UTF8_LCASE_COLLATION_IDENT = new CollationIdentifier( PROVIDER_SPARK, - UTF8_LCASE_COLLATION_NAME, + CollationNames.UTF8_LCASE, CollationSpecICU.ICU_VERSION ); return Arrays.asList(UTF8_BINARY_COLLATION_IDENT, UTF8_LCASE_COLLATION_IDENT); @@ -758,7 +755,7 @@ private enum AccentSensitivity { VersionInfo.ICU_VERSION.getMinor()); static { - ICULocaleMap.put("UNICODE", ULocale.ROOT); + ICULocaleMap.put(CollationNames.UNICODE, ULocale.ROOT); // ICU-implemented `ULocale`s which have corresponding `Collator` installed. ULocale[] locales = Collator.getAvailableULocales(); // Build locale names in format: language["_" optional script]["_" optional country code]. @@ -806,13 +803,13 @@ private enum AccentSensitivity { } private static final int UNICODE_COLLATION_ID = new CollationSpecICU( - "UNICODE", + CollationNames.UNICODE, CaseSensitivity.CS, AccentSensitivity.AS, SpaceTrimming.NONE).collationId; private static final int UNICODE_CI_COLLATION_ID = new CollationSpecICU( - "UNICODE", + CollationNames.UNICODE, CaseSensitivity.CI, AccentSensitivity.AS, SpaceTrimming.NONE).collationId; diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationNames.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationNames.java new file mode 100644 index 0000000000000..11e9e1a87e713 --- /dev/null +++ b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationNames.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.util; + +public class CollationNames { + public static final String UTF8_BINARY = "UTF8_BINARY"; + public static final String UTF8_LCASE = "UTF8_LCASE"; + public static final String UNICODE = "UNICODE"; + public static final String UNICODE_CI = "UNICODE_CI"; +} diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CollationSupportSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CollationSupportSuite.java index a696da8cf45b8..1db163c1c822d 100644 --- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CollationSupportSuite.java +++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CollationSupportSuite.java @@ -26,6 +26,7 @@ import java.util.Map; import static org.junit.jupiter.api.Assertions.*; +import static org.apache.spark.sql.catalyst.util.CollationNames.*; // checkstyle.off: AvoidEscapedUnicodeCharacters public class CollationSupportSuite { @@ -37,7 +38,7 @@ public class CollationSupportSuite { * the specified collations (as often seen in some pass-through Spark expressions). */ private final String[] testSupportedCollations = - {"UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI"}; + {UTF8_BINARY, UTF8_LCASE, UNICODE, UNICODE_CI}; /** * Collation-aware UTF8String comparison and equality check. @@ -86,82 +87,82 @@ public void testCompare() throws SparkException { assertCompare("a", "ä", collationName, -1); } // Advanced tests. - assertCompare("äü", "bü", "UTF8_BINARY", 1); - assertCompare("bxx", "bü", "UTF8_BINARY", -1); - assertCompare("äü", "bü", "UTF8_LCASE", 1); - assertCompare("bxx", "bü", "UTF8_LCASE", -1); - assertCompare("äü", "bü", "UNICODE", -1); - assertCompare("bxx", "bü", "UNICODE", 1); - assertCompare("äü", "bü", "UNICODE_CI", -1); - assertCompare("bxx", "bü", "UNICODE_CI", 1); + assertCompare("äü", "bü", UTF8_BINARY, 1); + assertCompare("bxx", "bü", UTF8_BINARY, -1); + assertCompare("äü", "bü", UTF8_LCASE, 1); + assertCompare("bxx", "bü", UTF8_LCASE, -1); + assertCompare("äü", "bü", UNICODE, -1); + assertCompare("bxx", "bü", UNICODE, 1); + assertCompare("äü", "bü", UNICODE_CI, -1); + assertCompare("bxx", "bü", UNICODE_CI, 1); assertCompare("cČć", "ČćC", "SR_CI_AI", 0); // Case variation. - assertCompare("AbCd", "aBcD", "UTF8_BINARY", -1); - assertCompare("ABCD", "abcd", "UTF8_LCASE", 0); - assertCompare("AbcD", "aBCd", "UNICODE", 1); - assertCompare("abcd", "ABCD", "UNICODE_CI", 0); + assertCompare("AbCd", "aBcD", UTF8_BINARY, -1); + assertCompare("ABCD", "abcd", UTF8_LCASE, 0); + assertCompare("AbcD", "aBCd", UNICODE, 1); + assertCompare("abcd", "ABCD", UNICODE_CI, 0); // Accent variation. - assertCompare("aBćD", "ABĆD", "UTF8_BINARY", 1); - assertCompare("AbCδ", "ABCΔ", "UTF8_LCASE", 0); - assertCompare("äBCd", "ÄBCD", "UNICODE", -1); - assertCompare("Ab́cD", "AB́CD", "UNICODE_CI", 0); + assertCompare("aBćD", "ABĆD", UTF8_BINARY, 1); + assertCompare("AbCδ", "ABCΔ", UTF8_LCASE, 0); + assertCompare("äBCd", "ÄBCD", UNICODE, -1); + assertCompare("Ab́cD", "AB́CD", UNICODE_CI, 0); assertCompare("ÈÉÊË", "EeEe", "AF_CI_AI", 0); // One-to-many case mapping (e.g. Turkish dotted I). - assertCompare("i\u0307", "İ", "UTF8_BINARY", -1); - assertCompare("İ", "i\u0307", "UTF8_BINARY", 1); - assertCompare("i\u0307", "İ", "UTF8_LCASE", 0); - assertCompare("İ", "i\u0307", "UTF8_LCASE", 0); - assertCompare("i\u0307", "İ", "UNICODE", -1); - assertCompare("İ", "i\u0307", "UNICODE", 1); - assertCompare("i\u0307", "İ", "UNICODE_CI", 0); - assertCompare("İ", "i\u0307", "UNICODE_CI", 0); - assertCompare("i\u0307İ", "i\u0307İ", "UTF8_LCASE", 0); - assertCompare("i\u0307İ", "İi\u0307", "UTF8_LCASE", 0); - assertCompare("İi\u0307", "i\u0307İ", "UTF8_LCASE", 0); - assertCompare("İi\u0307", "İi\u0307", "UTF8_LCASE", 0); - assertCompare("i\u0307İ", "i\u0307İ", "UNICODE_CI", 0); - assertCompare("i\u0307İ", "İi\u0307", "UNICODE_CI", 0); - assertCompare("İi\u0307", "i\u0307İ", "UNICODE_CI", 0); - assertCompare("İi\u0307", "İi\u0307", "UNICODE_CI", 0); + assertCompare("i\u0307", "İ", UTF8_BINARY, -1); + assertCompare("İ", "i\u0307", UTF8_BINARY, 1); + assertCompare("i\u0307", "İ", UTF8_LCASE, 0); + assertCompare("İ", "i\u0307", UTF8_LCASE, 0); + assertCompare("i\u0307", "İ", UNICODE, -1); + assertCompare("İ", "i\u0307", UNICODE, 1); + assertCompare("i\u0307", "İ", UNICODE_CI, 0); + assertCompare("İ", "i\u0307", UNICODE_CI, 0); + assertCompare("i\u0307İ", "i\u0307İ", UTF8_LCASE, 0); + assertCompare("i\u0307İ", "İi\u0307", UTF8_LCASE, 0); + assertCompare("İi\u0307", "i\u0307İ", UTF8_LCASE, 0); + assertCompare("İi\u0307", "İi\u0307", UTF8_LCASE, 0); + assertCompare("i\u0307İ", "i\u0307İ", UNICODE_CI, 0); + assertCompare("i\u0307İ", "İi\u0307", UNICODE_CI, 0); + assertCompare("İi\u0307", "i\u0307İ", UNICODE_CI, 0); + assertCompare("İi\u0307", "İi\u0307", UNICODE_CI, 0); // Conditional case mapping (e.g. Greek sigmas). - assertCompare("ς", "σ", "UTF8_BINARY", -1); - assertCompare("ς", "Σ", "UTF8_BINARY", 1); - assertCompare("σ", "Σ", "UTF8_BINARY", 1); - assertCompare("ς", "σ", "UTF8_LCASE", 0); - assertCompare("ς", "Σ", "UTF8_LCASE", 0); - assertCompare("σ", "Σ", "UTF8_LCASE", 0); - assertCompare("ς", "σ", "UNICODE", 1); - assertCompare("ς", "Σ", "UNICODE", 1); - assertCompare("σ", "Σ", "UNICODE", -1); - assertCompare("ς", "σ", "UNICODE_CI", 0); - assertCompare("ς", "Σ", "UNICODE_CI", 0); - assertCompare("σ", "Σ", "UNICODE_CI", 0); + assertCompare("ς", "σ", UTF8_BINARY, -1); + assertCompare("ς", "Σ", UTF8_BINARY, 1); + assertCompare("σ", "Σ", UTF8_BINARY, 1); + assertCompare("ς", "σ", UTF8_LCASE, 0); + assertCompare("ς", "Σ", UTF8_LCASE, 0); + assertCompare("σ", "Σ", UTF8_LCASE, 0); + assertCompare("ς", "σ", UNICODE, 1); + assertCompare("ς", "Σ", UNICODE, 1); + assertCompare("σ", "Σ", UNICODE, -1); + assertCompare("ς", "σ", UNICODE_CI, 0); + assertCompare("ς", "Σ", UNICODE_CI, 0); + assertCompare("σ", "Σ", UNICODE_CI, 0); // Surrogate pairs. - assertCompare("a🙃b🙃c", "aaaaa", "UTF8_BINARY", 1); - assertCompare("a🙃b🙃c", "aaaaa", "UTF8_LCASE", 1); - assertCompare("a🙃b🙃c", "aaaaa", "UNICODE", -1); // != UTF8_BINARY - assertCompare("a🙃b🙃c", "aaaaa", "UNICODE_CI", -1); // != UTF8_LCASE - assertCompare("a🙃b🙃c", "a🙃b🙃c", "UTF8_BINARY", 0); - assertCompare("a🙃b🙃c", "a🙃b🙃c", "UTF8_LCASE", 0); - assertCompare("a🙃b🙃c", "a🙃b🙃c", "UNICODE", 0); - assertCompare("a🙃b🙃c", "a🙃b🙃c", "UNICODE_CI", 0); - assertCompare("a🙃b🙃c", "a🙃b🙃d", "UTF8_BINARY", -1); - assertCompare("a🙃b🙃c", "a🙃b🙃d", "UTF8_LCASE", -1); - assertCompare("a🙃b🙃c", "a🙃b🙃d", "UNICODE", -1); - assertCompare("a🙃b🙃c", "a🙃b🙃d", "UNICODE_CI", -1); + assertCompare("a🙃b🙃c", "aaaaa", UTF8_BINARY, 1); + assertCompare("a🙃b🙃c", "aaaaa", UTF8_LCASE, 1); + assertCompare("a🙃b🙃c", "aaaaa", UNICODE, -1); // != UTF8_BINARY + assertCompare("a🙃b🙃c", "aaaaa", UNICODE_CI, -1); // != UTF8_LCASE + assertCompare("a🙃b🙃c", "a🙃b🙃c", UTF8_BINARY, 0); + assertCompare("a🙃b🙃c", "a🙃b🙃c", UTF8_LCASE, 0); + assertCompare("a🙃b🙃c", "a🙃b🙃c", UNICODE, 0); + assertCompare("a🙃b🙃c", "a🙃b🙃c", UNICODE_CI, 0); + assertCompare("a🙃b🙃c", "a🙃b🙃d", UTF8_BINARY, -1); + assertCompare("a🙃b🙃c", "a🙃b🙃d", UTF8_LCASE, -1); + assertCompare("a🙃b🙃c", "a🙃b🙃d", UNICODE, -1); + assertCompare("a🙃b🙃c", "a🙃b🙃d", UNICODE_CI, -1); // Maximum code point. int maxCodePoint = Character.MAX_CODE_POINT; String maxCodePointStr = new String(Character.toChars(maxCodePoint)); for (int i = 0; i < maxCodePoint && Character.isValidCodePoint(i); ++i) { - assertCompare(new String(Character.toChars(i)), maxCodePointStr, "UTF8_BINARY", -1); - assertCompare(new String(Character.toChars(i)), maxCodePointStr, "UTF8_LCASE", -1); + assertCompare(new String(Character.toChars(i)), maxCodePointStr, UTF8_BINARY, -1); + assertCompare(new String(Character.toChars(i)), maxCodePointStr, UTF8_LCASE, -1); } // Minimum code point. int minCodePoint = Character.MIN_CODE_POINT; String minCodePointStr = new String(Character.toChars(minCodePoint)); for (int i = minCodePoint + 1; i <= maxCodePoint && Character.isValidCodePoint(i); ++i) { - assertCompare(new String(Character.toChars(i)), minCodePointStr, "UTF8_BINARY", 1); - assertCompare(new String(Character.toChars(i)), minCodePointStr, "UTF8_LCASE", 1); + assertCompare(new String(Character.toChars(i)), minCodePointStr, UTF8_BINARY, 1); + assertCompare(new String(Character.toChars(i)), minCodePointStr, UTF8_LCASE, 1); } } @@ -302,201 +303,201 @@ public void testContains() throws SparkException { assertContains("Здраво", "Здраво", collationName, true); } // Advanced tests. - assertContains("abcde", "bcd", "UTF8_BINARY", true); - assertContains("abcde", "bde", "UTF8_BINARY", false); - assertContains("abcde", "fgh", "UTF8_BINARY", false); - assertContains("abcde", "abcde", "UNICODE", true); - assertContains("abcde", "aBcDe", "UNICODE", false); - assertContains("abcde", "fghij", "UNICODE", false); - assertContains("abcde", "C", "UTF8_LCASE", true); - assertContains("abcde", "AbCdE", "UTF8_LCASE", true); - assertContains("abcde", "X", "UTF8_LCASE", false); - assertContains("abcde", "c", "UNICODE_CI", true); - assertContains("abcde", "bCD", "UNICODE_CI", true); - assertContains("abcde", "123", "UNICODE_CI", false); - assertContains("ab世De", "b世D", "UTF8_BINARY", true); - assertContains("ab世De", "B世d", "UTF8_BINARY", false); - assertContains("äbćδe", "bćδ", "UTF8_BINARY", true); - assertContains("äbćδe", "BcΔ", "UTF8_BINARY", false); - assertContains("ab世De", "ab世De", "UNICODE", true); - assertContains("ab世De", "AB世dE", "UNICODE", false); - assertContains("äbćδe", "äbćδe", "UNICODE", true); - assertContains("äbćδe", "ÄBcΔÉ", "UNICODE", false); - assertContains("ab世De", "b世D", "UTF8_LCASE", true); - assertContains("ab世De", "B世d", "UTF8_LCASE", true); - assertContains("äbćδe", "bćδ", "UTF8_LCASE", true); - assertContains("äbćδe", "BcΔ", "UTF8_LCASE", false); - assertContains("ab世De", "ab世De", "UNICODE_CI", true); - assertContains("ab世De", "AB世dE", "UNICODE_CI", true); - assertContains("äbćδe", "ÄbćδE", "UNICODE_CI", true); - assertContains("äbćδe", "ÄBcΔÉ", "UNICODE_CI", false); - assertContains("The Kelvin.", "Kelvin", "UTF8_LCASE", true); - assertContains("The Kelvin.", "Kelvin", "UTF8_LCASE", true); - assertContains("The KKelvin.", "KKelvin", "UTF8_LCASE", true); - assertContains("2 Kelvin.", "2 Kelvin", "UTF8_LCASE", true); - assertContains("2 Kelvin.", "2 Kelvin", "UTF8_LCASE", true); - assertContains("The KKelvin.", "KKelvin,", "UTF8_LCASE", false); + assertContains("abcde", "bcd", UTF8_BINARY, true); + assertContains("abcde", "bde", UTF8_BINARY, false); + assertContains("abcde", "fgh", UTF8_BINARY, false); + assertContains("abcde", "abcde", UNICODE, true); + assertContains("abcde", "aBcDe", UNICODE, false); + assertContains("abcde", "fghij", UNICODE, false); + assertContains("abcde", "C", UTF8_LCASE, true); + assertContains("abcde", "AbCdE", UTF8_LCASE, true); + assertContains("abcde", "X", UTF8_LCASE, false); + assertContains("abcde", "c", UNICODE_CI, true); + assertContains("abcde", "bCD", UNICODE_CI, true); + assertContains("abcde", "123", UNICODE_CI, false); + assertContains("ab世De", "b世D", UTF8_BINARY, true); + assertContains("ab世De", "B世d", UTF8_BINARY, false); + assertContains("äbćδe", "bćδ", UTF8_BINARY, true); + assertContains("äbćδe", "BcΔ", UTF8_BINARY, false); + assertContains("ab世De", "ab世De", UNICODE, true); + assertContains("ab世De", "AB世dE", UNICODE, false); + assertContains("äbćδe", "äbćδe", UNICODE, true); + assertContains("äbćδe", "ÄBcΔÉ", UNICODE, false); + assertContains("ab世De", "b世D", UTF8_LCASE, true); + assertContains("ab世De", "B世d", UTF8_LCASE, true); + assertContains("äbćδe", "bćδ", UTF8_LCASE, true); + assertContains("äbćδe", "BcΔ", UTF8_LCASE, false); + assertContains("ab世De", "ab世De", UNICODE_CI, true); + assertContains("ab世De", "AB世dE", UNICODE_CI, true); + assertContains("äbćδe", "ÄbćδE", UNICODE_CI, true); + assertContains("äbćδe", "ÄBcΔÉ", UNICODE_CI, false); + assertContains("The Kelvin.", "Kelvin", UTF8_LCASE, true); + assertContains("The Kelvin.", "Kelvin", UTF8_LCASE, true); + assertContains("The KKelvin.", "KKelvin", UTF8_LCASE, true); + assertContains("2 Kelvin.", "2 Kelvin", UTF8_LCASE, true); + assertContains("2 Kelvin.", "2 Kelvin", UTF8_LCASE, true); + assertContains("The KKelvin.", "KKelvin,", UTF8_LCASE, false); assertContains("abčćd", "ABCCD", "SR_CI_AI", true); // Case variation. - assertContains("aBcDe", "bcd", "UTF8_BINARY", false); - assertContains("aBcDe", "BcD", "UTF8_BINARY", true); - assertContains("aBcDe", "abcde", "UNICODE", false); - assertContains("aBcDe", "aBcDe", "UNICODE", true); - assertContains("aBcDe", "bcd", "UTF8_LCASE", true); - assertContains("aBcDe", "BCD", "UTF8_LCASE", true); - assertContains("aBcDe", "abcde", "UNICODE_CI", true); - assertContains("aBcDe", "AbCdE", "UNICODE_CI", true); + assertContains("aBcDe", "bcd", UTF8_BINARY, false); + assertContains("aBcDe", "BcD", UTF8_BINARY, true); + assertContains("aBcDe", "abcde", UNICODE, false); + assertContains("aBcDe", "aBcDe", UNICODE, true); + assertContains("aBcDe", "bcd", UTF8_LCASE, true); + assertContains("aBcDe", "BCD", UTF8_LCASE, true); + assertContains("aBcDe", "abcde", UNICODE_CI, true); + assertContains("aBcDe", "AbCdE", UNICODE_CI, true); // Accent variation. - assertContains("aBcDe", "bćd", "UTF8_BINARY", false); - assertContains("aBcDe", "BćD", "UTF8_BINARY", false); - assertContains("aBcDe", "abćde", "UNICODE", false); - assertContains("aBcDe", "aBćDe", "UNICODE", false); - assertContains("aBcDe", "bćd", "UTF8_LCASE", false); - assertContains("aBcDe", "BĆD", "UTF8_LCASE", false); - assertContains("aBcDe", "abćde", "UNICODE_CI", false); - assertContains("aBcDe", "AbĆdE", "UNICODE_CI", false); + assertContains("aBcDe", "bćd", UTF8_BINARY, false); + assertContains("aBcDe", "BćD", UTF8_BINARY, false); + assertContains("aBcDe", "abćde", UNICODE, false); + assertContains("aBcDe", "aBćDe", UNICODE, false); + assertContains("aBcDe", "bćd", UTF8_LCASE, false); + assertContains("aBcDe", "BĆD", UTF8_LCASE, false); + assertContains("aBcDe", "abćde", UNICODE_CI, false); + assertContains("aBcDe", "AbĆdE", UNICODE_CI, false); assertContains("abEEE", "Bèêë", "AF_CI_AI", true); // One-to-many case mapping (e.g. Turkish dotted I). - assertContains("i\u0307", "i", "UNICODE_CI", false); - assertContains("i\u0307", "\u0307", "UNICODE_CI", false); - assertContains("i\u0307", "İ", "UNICODE_CI", true); - assertContains("İ", "i", "UNICODE_CI", false); - assertContains("adi̇os", "io", "UNICODE_CI", false); - assertContains("adi̇os", "Io", "UNICODE_CI", false); - assertContains("adi̇os", "i\u0307o", "UNICODE_CI", true); - assertContains("adi̇os", "İo", "UNICODE_CI", true); - assertContains("adİos", "io", "UNICODE_CI", false); - assertContains("adİos", "Io", "UNICODE_CI", false); - assertContains("adİos", "i\u0307o", "UNICODE_CI", true); - assertContains("adİos", "İo", "UNICODE_CI", true); - assertContains("i\u0307", "i", "UTF8_LCASE", true); // != UNICODE_CI - assertContains("İ", "\u0307", "UTF8_LCASE", false); - assertContains("İ", "i", "UTF8_LCASE", false); - assertContains("i\u0307", "\u0307", "UTF8_LCASE", true); // != UNICODE_CI - assertContains("i\u0307", "İ", "UTF8_LCASE", true); - assertContains("İ", "i", "UTF8_LCASE", false); - assertContains("adi̇os", "io", "UTF8_LCASE", false); - assertContains("adi̇os", "Io", "UTF8_LCASE", false); - assertContains("adi̇os", "i\u0307o", "UTF8_LCASE", true); - assertContains("adi̇os", "İo", "UTF8_LCASE", true); - assertContains("adİos", "io", "UTF8_LCASE", false); - assertContains("adİos", "Io", "UTF8_LCASE", false); - assertContains("adİos", "i\u0307o", "UTF8_LCASE", true); - assertContains("adİos", "İo", "UTF8_LCASE", true); + assertContains("i\u0307", "i", UNICODE_CI, false); + assertContains("i\u0307", "\u0307", UNICODE_CI, false); + assertContains("i\u0307", "İ", UNICODE_CI, true); + assertContains("İ", "i", UNICODE_CI, false); + assertContains("adi̇os", "io", UNICODE_CI, false); + assertContains("adi̇os", "Io", UNICODE_CI, false); + assertContains("adi̇os", "i\u0307o", UNICODE_CI, true); + assertContains("adi̇os", "İo", UNICODE_CI, true); + assertContains("adİos", "io", UNICODE_CI, false); + assertContains("adİos", "Io", UNICODE_CI, false); + assertContains("adİos", "i\u0307o", UNICODE_CI, true); + assertContains("adİos", "İo", UNICODE_CI, true); + assertContains("i\u0307", "i", UTF8_LCASE, true); // != UNICODE_CI + assertContains("İ", "\u0307", UTF8_LCASE, false); + assertContains("İ", "i", UTF8_LCASE, false); + assertContains("i\u0307", "\u0307", UTF8_LCASE, true); // != UNICODE_CI + assertContains("i\u0307", "İ", UTF8_LCASE, true); + assertContains("İ", "i", UTF8_LCASE, false); + assertContains("adi̇os", "io", UTF8_LCASE, false); + assertContains("adi̇os", "Io", UTF8_LCASE, false); + assertContains("adi̇os", "i\u0307o", UTF8_LCASE, true); + assertContains("adi̇os", "İo", UTF8_LCASE, true); + assertContains("adİos", "io", UTF8_LCASE, false); + assertContains("adİos", "Io", UTF8_LCASE, false); + assertContains("adİos", "i\u0307o", UTF8_LCASE, true); + assertContains("adİos", "İo", UTF8_LCASE, true); // Conditional case mapping (e.g. Greek sigmas). - assertContains("σ", "σ", "UTF8_BINARY", true); - assertContains("σ", "ς", "UTF8_BINARY", false); - assertContains("σ", "Σ", "UTF8_BINARY", false); - assertContains("ς", "σ", "UTF8_BINARY", false); - assertContains("ς", "ς", "UTF8_BINARY", true); - assertContains("ς", "Σ", "UTF8_BINARY", false); - assertContains("Σ", "σ", "UTF8_BINARY", false); - assertContains("Σ", "ς", "UTF8_BINARY", false); - assertContains("Σ", "Σ", "UTF8_BINARY", true); - assertContains("σ", "σ", "UTF8_LCASE", true); - assertContains("σ", "ς", "UTF8_LCASE", true); - assertContains("σ", "Σ", "UTF8_LCASE", true); - assertContains("ς", "σ", "UTF8_LCASE", true); - assertContains("ς", "ς", "UTF8_LCASE", true); - assertContains("ς", "Σ", "UTF8_LCASE", true); - assertContains("Σ", "σ", "UTF8_LCASE", true); - assertContains("Σ", "ς", "UTF8_LCASE", true); - assertContains("Σ", "Σ", "UTF8_LCASE", true); - assertContains("σ", "σ", "UNICODE", true); - assertContains("σ", "ς", "UNICODE", false); - assertContains("σ", "Σ", "UNICODE", false); - assertContains("ς", "σ", "UNICODE", false); - assertContains("ς", "ς", "UNICODE", true); - assertContains("ς", "Σ", "UNICODE", false); - assertContains("Σ", "σ", "UNICODE", false); - assertContains("Σ", "ς", "UNICODE", false); - assertContains("Σ", "Σ", "UNICODE", true); - assertContains("σ", "σ", "UNICODE_CI", true); - assertContains("σ", "ς", "UNICODE_CI", true); - assertContains("σ", "Σ", "UNICODE_CI", true); - assertContains("ς", "σ", "UNICODE_CI", true); - assertContains("ς", "ς", "UNICODE_CI", true); - assertContains("ς", "Σ", "UNICODE_CI", true); - assertContains("Σ", "σ", "UNICODE_CI", true); - assertContains("Σ", "ς", "UNICODE_CI", true); - assertContains("Σ", "Σ", "UNICODE_CI", true); - assertContains("ΣΑΛΑΤΑ", "Σ", "UTF8_BINARY", true); - assertContains("ΣΑΛΑΤΑ", "σ", "UTF8_BINARY", false); - assertContains("ΣΑΛΑΤΑ", "ς", "UTF8_BINARY", false); - assertContains("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UTF8_BINARY", true); - assertContains("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UTF8_BINARY", false); - assertContains("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UTF8_BINARY", false); - assertContains("ΣΑΛΑΤΑ", "Σ", "UTF8_LCASE", true); - assertContains("ΣΑΛΑΤΑ", "σ", "UTF8_LCASE", true); - assertContains("ΣΑΛΑΤΑ", "ς", "UTF8_LCASE", true); - assertContains("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UTF8_LCASE", true); - assertContains("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UTF8_LCASE", true); - assertContains("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UTF8_LCASE", true); - assertContains("ΣΑΛΑΤΑ", "Σ", "UNICODE", true); - assertContains("ΣΑΛΑΤΑ", "σ", "UNICODE", false); - assertContains("ΣΑΛΑΤΑ", "ς", "UNICODE", false); - assertContains("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UNICODE", true); - assertContains("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UNICODE", false); - assertContains("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UNICODE", false); - assertContains("ΣΑΛΑΤΑ", "Σ", "UNICODE_CI", true); - assertContains("ΣΑΛΑΤΑ", "σ", "UNICODE_CI", true); - assertContains("ΣΑΛΑΤΑ", "ς", "UNICODE_CI", true); - assertContains("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UNICODE_CI", true); - assertContains("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UNICODE_CI", true); - assertContains("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UNICODE_CI", true); + assertContains("σ", "σ", UTF8_BINARY, true); + assertContains("σ", "ς", UTF8_BINARY, false); + assertContains("σ", "Σ", UTF8_BINARY, false); + assertContains("ς", "σ", UTF8_BINARY, false); + assertContains("ς", "ς", UTF8_BINARY, true); + assertContains("ς", "Σ", UTF8_BINARY, false); + assertContains("Σ", "σ", UTF8_BINARY, false); + assertContains("Σ", "ς", UTF8_BINARY, false); + assertContains("Σ", "Σ", UTF8_BINARY, true); + assertContains("σ", "σ", UTF8_LCASE, true); + assertContains("σ", "ς", UTF8_LCASE, true); + assertContains("σ", "Σ", UTF8_LCASE, true); + assertContains("ς", "σ", UTF8_LCASE, true); + assertContains("ς", "ς", UTF8_LCASE, true); + assertContains("ς", "Σ", UTF8_LCASE, true); + assertContains("Σ", "σ", UTF8_LCASE, true); + assertContains("Σ", "ς", UTF8_LCASE, true); + assertContains("Σ", "Σ", UTF8_LCASE, true); + assertContains("σ", "σ", UNICODE, true); + assertContains("σ", "ς", UNICODE, false); + assertContains("σ", "Σ", UNICODE, false); + assertContains("ς", "σ", UNICODE, false); + assertContains("ς", "ς", UNICODE, true); + assertContains("ς", "Σ", UNICODE, false); + assertContains("Σ", "σ", UNICODE, false); + assertContains("Σ", "ς", UNICODE, false); + assertContains("Σ", "Σ", UNICODE, true); + assertContains("σ", "σ", UNICODE_CI, true); + assertContains("σ", "ς", UNICODE_CI, true); + assertContains("σ", "Σ", UNICODE_CI, true); + assertContains("ς", "σ", UNICODE_CI, true); + assertContains("ς", "ς", UNICODE_CI, true); + assertContains("ς", "Σ", UNICODE_CI, true); + assertContains("Σ", "σ", UNICODE_CI, true); + assertContains("Σ", "ς", UNICODE_CI, true); + assertContains("Σ", "Σ", UNICODE_CI, true); + assertContains("ΣΑΛΑΤΑ", "Σ", UTF8_BINARY, true); + assertContains("ΣΑΛΑΤΑ", "σ", UTF8_BINARY, false); + assertContains("ΣΑΛΑΤΑ", "ς", UTF8_BINARY, false); + assertContains("ΘΑΛΑΣΣΙΝΟΣ", "Σ", UTF8_BINARY, true); + assertContains("ΘΑΛΑΣΣΙΝΟΣ", "σ", UTF8_BINARY, false); + assertContains("ΘΑΛΑΣΣΙΝΟΣ", "ς", UTF8_BINARY, false); + assertContains("ΣΑΛΑΤΑ", "Σ", UTF8_LCASE, true); + assertContains("ΣΑΛΑΤΑ", "σ", UTF8_LCASE, true); + assertContains("ΣΑΛΑΤΑ", "ς", UTF8_LCASE, true); + assertContains("ΘΑΛΑΣΣΙΝΟΣ", "Σ", UTF8_LCASE, true); + assertContains("ΘΑΛΑΣΣΙΝΟΣ", "σ", UTF8_LCASE, true); + assertContains("ΘΑΛΑΣΣΙΝΟΣ", "ς", UTF8_LCASE, true); + assertContains("ΣΑΛΑΤΑ", "Σ", UNICODE, true); + assertContains("ΣΑΛΑΤΑ", "σ", UNICODE, false); + assertContains("ΣΑΛΑΤΑ", "ς", UNICODE, false); + assertContains("ΘΑΛΑΣΣΙΝΟΣ", "Σ", UNICODE, true); + assertContains("ΘΑΛΑΣΣΙΝΟΣ", "σ", UNICODE, false); + assertContains("ΘΑΛΑΣΣΙΝΟΣ", "ς", UNICODE, false); + assertContains("ΣΑΛΑΤΑ", "Σ", UNICODE_CI, true); + assertContains("ΣΑΛΑΤΑ", "σ", UNICODE_CI, true); + assertContains("ΣΑΛΑΤΑ", "ς", UNICODE_CI, true); + assertContains("ΘΑΛΑΣΣΙΝΟΣ", "Σ", UNICODE_CI, true); + assertContains("ΘΑΛΑΣΣΙΝΟΣ", "σ", UNICODE_CI, true); + assertContains("ΘΑΛΑΣΣΙΝΟΣ", "ς", UNICODE_CI, true); // Surrogate pairs. - assertContains("a🙃b🙃c", "x", "UTF8_BINARY", false); - assertContains("a🙃b🙃c", "x", "UTF8_LCASE", false); - assertContains("a🙃b🙃c", "x", "UNICODE", false); - assertContains("a🙃b🙃c", "x", "UNICODE_CI", false); - assertContains("a🙃b🙃c", "b", "UTF8_BINARY", true); - assertContains("a🙃b🙃c", "b", "UTF8_LCASE", true); - assertContains("a🙃b🙃c", "b", "UNICODE", true); - assertContains("a🙃b🙃c", "b", "UNICODE_CI", true); - assertContains("a🙃b🙃c", "a🙃b", "UTF8_BINARY", true); - assertContains("a🙃b🙃c", "a🙃b", "UTF8_LCASE", true); - assertContains("a🙃b🙃c", "a🙃b", "UNICODE", true); - assertContains("a🙃b🙃c", "a🙃b", "UNICODE_CI", true); - assertContains("a🙃b🙃c", "b🙃c", "UTF8_BINARY", true); - assertContains("a🙃b🙃c", "b🙃c", "UTF8_LCASE", true); - assertContains("a🙃b🙃c", "b🙃c", "UNICODE", true); - assertContains("a🙃b🙃c", "b🙃c", "UNICODE_CI", true); - assertContains("a🙃b🙃c", "a🙃b🙃c", "UTF8_BINARY", true); - assertContains("a🙃b🙃c", "a🙃b🙃c", "UTF8_LCASE", true); - assertContains("a🙃b🙃c", "a🙃b🙃c", "UNICODE", true); - assertContains("a🙃b🙃c", "a🙃b🙃c", "UNICODE_CI", true); - assertContains("😀😆😃😄", "😄😆", "UTF8_BINARY", false); - assertContains("😀😆😃😄", "😄😆", "UTF8_LCASE", false); - assertContains("😀😆😃😄", "😄😆", "UNICODE", false); - assertContains("😀😆😃😄", "😄😆", "UNICODE_CI", false); - assertContains("😀😆😃😄", "😆😃", "UTF8_BINARY", true); - assertContains("😀😆😃😄", "😆😃", "UTF8_LCASE", true); - assertContains("😀😆😃😄", "😆😃", "UNICODE", true); - assertContains("😀😆😃😄", "😆😃", "UNICODE_CI", true); - assertContains("😀😆😃😄", "😀😆", "UTF8_BINARY", true); - assertContains("😀😆😃😄", "😀😆", "UTF8_LCASE", true); - assertContains("😀😆😃😄", "😀😆", "UNICODE", true); - assertContains("😀😆😃😄", "😀😆", "UNICODE_CI", true); - assertContains("😀😆😃😄", "😃😄", "UTF8_BINARY", true); - assertContains("😀😆😃😄", "😃😄", "UTF8_LCASE", true); - assertContains("😀😆😃😄", "😃😄", "UNICODE", true); - assertContains("😀😆😃😄", "😃😄", "UNICODE_CI", true); - assertContains("😀😆😃😄", "😀😆😃😄", "UTF8_BINARY", true); - assertContains("😀😆😃😄", "😀😆😃😄", "UTF8_LCASE", true); - assertContains("😀😆😃😄", "😀😆😃😄", "UNICODE", true); - assertContains("😀😆😃😄", "😀😆😃😄", "UNICODE_CI", true); - assertContains("𐐅", "𐐅", "UTF8_BINARY", true); - assertContains("𐐅", "𐐅", "UTF8_LCASE", true); - assertContains("𐐅", "𐐅", "UNICODE", true); - assertContains("𐐅", "𐐅", "UNICODE_CI", true); - assertContains("𐐅", "𐐭", "UTF8_BINARY", false); - assertContains("𐐅", "𐐭", "UTF8_LCASE", true); - assertContains("𐐅", "𐐭", "UNICODE", false); - assertContains("𐐅", "𐐭", "UNICODE_CI", true); - assertContains("𝔸", "𝔸", "UTF8_BINARY", true); - assertContains("𝔸", "𝔸", "UTF8_LCASE", true); - assertContains("𝔸", "𝔸", "UNICODE", true); - assertContains("𝔸", "𝔸", "UNICODE_CI", true); + assertContains("a🙃b🙃c", "x", UTF8_BINARY, false); + assertContains("a🙃b🙃c", "x", UTF8_LCASE, false); + assertContains("a🙃b🙃c", "x", UNICODE, false); + assertContains("a🙃b🙃c", "x", UNICODE_CI, false); + assertContains("a🙃b🙃c", "b", UTF8_BINARY, true); + assertContains("a🙃b🙃c", "b", UTF8_LCASE, true); + assertContains("a🙃b🙃c", "b", UNICODE, true); + assertContains("a🙃b🙃c", "b", UNICODE_CI, true); + assertContains("a🙃b🙃c", "a🙃b", UTF8_BINARY, true); + assertContains("a🙃b🙃c", "a🙃b", UTF8_LCASE, true); + assertContains("a🙃b🙃c", "a🙃b", UNICODE, true); + assertContains("a🙃b🙃c", "a🙃b", UNICODE_CI, true); + assertContains("a🙃b🙃c", "b🙃c", UTF8_BINARY, true); + assertContains("a🙃b🙃c", "b🙃c", UTF8_LCASE, true); + assertContains("a🙃b🙃c", "b🙃c", UNICODE, true); + assertContains("a🙃b🙃c", "b🙃c", UNICODE_CI, true); + assertContains("a🙃b🙃c", "a🙃b🙃c", UTF8_BINARY, true); + assertContains("a🙃b🙃c", "a🙃b🙃c", UTF8_LCASE, true); + assertContains("a🙃b🙃c", "a🙃b🙃c", UNICODE, true); + assertContains("a🙃b🙃c", "a🙃b🙃c", UNICODE_CI, true); + assertContains("😀😆😃😄", "😄😆", UTF8_BINARY, false); + assertContains("😀😆😃😄", "😄😆", UTF8_LCASE, false); + assertContains("😀😆😃😄", "😄😆", UNICODE, false); + assertContains("😀😆😃😄", "😄😆", UNICODE_CI, false); + assertContains("😀😆😃😄", "😆😃", UTF8_BINARY, true); + assertContains("😀😆😃😄", "😆😃", UTF8_LCASE, true); + assertContains("😀😆😃😄", "😆😃", UNICODE, true); + assertContains("😀😆😃😄", "😆😃", UNICODE_CI, true); + assertContains("😀😆😃😄", "😀😆", UTF8_BINARY, true); + assertContains("😀😆😃😄", "😀😆", UTF8_LCASE, true); + assertContains("😀😆😃😄", "😀😆", UNICODE, true); + assertContains("😀😆😃😄", "😀😆", UNICODE_CI, true); + assertContains("😀😆😃😄", "😃😄", UTF8_BINARY, true); + assertContains("😀😆😃😄", "😃😄", UTF8_LCASE, true); + assertContains("😀😆😃😄", "😃😄", UNICODE, true); + assertContains("😀😆😃😄", "😃😄", UNICODE_CI, true); + assertContains("😀😆😃😄", "😀😆😃😄", UTF8_BINARY, true); + assertContains("😀😆😃😄", "😀😆😃😄", UTF8_LCASE, true); + assertContains("😀😆😃😄", "😀😆😃😄", UNICODE, true); + assertContains("😀😆😃😄", "😀😆😃😄", UNICODE_CI, true); + assertContains("𐐅", "𐐅", UTF8_BINARY, true); + assertContains("𐐅", "𐐅", UTF8_LCASE, true); + assertContains("𐐅", "𐐅", UNICODE, true); + assertContains("𐐅", "𐐅", UNICODE_CI, true); + assertContains("𐐅", "𐐭", UTF8_BINARY, false); + assertContains("𐐅", "𐐭", UTF8_LCASE, true); + assertContains("𐐅", "𐐭", UNICODE, false); + assertContains("𐐅", "𐐭", UNICODE_CI, true); + assertContains("𝔸", "𝔸", UTF8_BINARY, true); + assertContains("𝔸", "𝔸", UTF8_LCASE, true); + assertContains("𝔸", "𝔸", UNICODE, true); + assertContains("𝔸", "𝔸", UNICODE_CI, true); } /** @@ -549,211 +550,211 @@ public void testStartsWith() throws SparkException { assertStartsWith("Здраво", "Здраво", collationName, true); } // Advanced tests. - assertStartsWith("abcde", "abc", "UTF8_BINARY", true); - assertStartsWith("abcde", "abd", "UTF8_BINARY", false); - assertStartsWith("abcde", "fgh", "UTF8_BINARY", false); - assertStartsWith("abcde", "abcde", "UNICODE", true); - assertStartsWith("abcde", "aBcDe", "UNICODE", false); - assertStartsWith("abcde", "fghij", "UNICODE", false); - assertStartsWith("abcde", "A", "UTF8_LCASE", true); - assertStartsWith("abcde", "AbCdE", "UTF8_LCASE", true); - assertStartsWith("abcde", "X", "UTF8_LCASE", false); - assertStartsWith("abcde", "a", "UNICODE_CI", true); - assertStartsWith("abcde", "aBC", "UNICODE_CI", true); - assertStartsWith("abcde", "bcd", "UNICODE_CI", false); - assertStartsWith("abcde", "123", "UNICODE_CI", false); - assertStartsWith("ab世De", "ab世", "UTF8_BINARY", true); - assertStartsWith("ab世De", "aB世", "UTF8_BINARY", false); - assertStartsWith("äbćδe", "äbć", "UTF8_BINARY", true); - assertStartsWith("äbćδe", "äBc", "UTF8_BINARY", false); - assertStartsWith("ab世De", "ab世De", "UNICODE", true); - assertStartsWith("ab世De", "AB世dE", "UNICODE", false); - assertStartsWith("äbćδe", "äbćδe", "UNICODE", true); - assertStartsWith("äbćδe", "ÄBcΔÉ", "UNICODE", false); - assertStartsWith("ab世De", "ab世", "UTF8_LCASE", true); - assertStartsWith("ab世De", "aB世", "UTF8_LCASE", true); - assertStartsWith("äbćδe", "äbć", "UTF8_LCASE", true); - assertStartsWith("äbćδe", "äBc", "UTF8_LCASE", false); - assertStartsWith("ab世De", "ab世De", "UNICODE_CI", true); - assertStartsWith("ab世De", "AB世dE", "UNICODE_CI", true); - assertStartsWith("äbćδe", "ÄbćδE", "UNICODE_CI", true); - assertStartsWith("äbćδe", "ÄBcΔÉ", "UNICODE_CI", false); - assertStartsWith("Kelvin.", "Kelvin", "UTF8_LCASE", true); - assertStartsWith("Kelvin.", "Kelvin", "UTF8_LCASE", true); - assertStartsWith("KKelvin.", "KKelvin", "UTF8_LCASE", true); - assertStartsWith("2 Kelvin.", "2 Kelvin", "UTF8_LCASE", true); - assertStartsWith("2 Kelvin.", "2 Kelvin", "UTF8_LCASE", true); - assertStartsWith("KKelvin.", "KKelvin,", "UTF8_LCASE", false); + assertStartsWith("abcde", "abc", UTF8_BINARY, true); + assertStartsWith("abcde", "abd", UTF8_BINARY, false); + assertStartsWith("abcde", "fgh", UTF8_BINARY, false); + assertStartsWith("abcde", "abcde", UNICODE, true); + assertStartsWith("abcde", "aBcDe", UNICODE, false); + assertStartsWith("abcde", "fghij", UNICODE, false); + assertStartsWith("abcde", "A", UTF8_LCASE, true); + assertStartsWith("abcde", "AbCdE", UTF8_LCASE, true); + assertStartsWith("abcde", "X", UTF8_LCASE, false); + assertStartsWith("abcde", "a", UNICODE_CI, true); + assertStartsWith("abcde", "aBC", UNICODE_CI, true); + assertStartsWith("abcde", "bcd", UNICODE_CI, false); + assertStartsWith("abcde", "123", UNICODE_CI, false); + assertStartsWith("ab世De", "ab世", UTF8_BINARY, true); + assertStartsWith("ab世De", "aB世", UTF8_BINARY, false); + assertStartsWith("äbćδe", "äbć", UTF8_BINARY, true); + assertStartsWith("äbćδe", "äBc", UTF8_BINARY, false); + assertStartsWith("ab世De", "ab世De", UNICODE, true); + assertStartsWith("ab世De", "AB世dE", UNICODE, false); + assertStartsWith("äbćδe", "äbćδe", UNICODE, true); + assertStartsWith("äbćδe", "ÄBcΔÉ", UNICODE, false); + assertStartsWith("ab世De", "ab世", UTF8_LCASE, true); + assertStartsWith("ab世De", "aB世", UTF8_LCASE, true); + assertStartsWith("äbćδe", "äbć", UTF8_LCASE, true); + assertStartsWith("äbćδe", "äBc", UTF8_LCASE, false); + assertStartsWith("ab世De", "ab世De", UNICODE_CI, true); + assertStartsWith("ab世De", "AB世dE", UNICODE_CI, true); + assertStartsWith("äbćδe", "ÄbćδE", UNICODE_CI, true); + assertStartsWith("äbćδe", "ÄBcΔÉ", UNICODE_CI, false); + assertStartsWith("Kelvin.", "Kelvin", UTF8_LCASE, true); + assertStartsWith("Kelvin.", "Kelvin", UTF8_LCASE, true); + assertStartsWith("KKelvin.", "KKelvin", UTF8_LCASE, true); + assertStartsWith("2 Kelvin.", "2 Kelvin", UTF8_LCASE, true); + assertStartsWith("2 Kelvin.", "2 Kelvin", UTF8_LCASE, true); + assertStartsWith("KKelvin.", "KKelvin,", UTF8_LCASE, false); assertStartsWith("Ћао", "Ца", "sr_Cyrl_CI_AI", false); assertStartsWith("Ћао", "ћа", "sr_Cyrl_CI_AI", true); assertStartsWith("Ćao", "Ca", "SR_CI", false); assertStartsWith("Ćao", "Ca", "SR_CI_AI", true); assertStartsWith("Ćao", "Ća", "SR", true); // Case variation. - assertStartsWith("aBcDe", "abc", "UTF8_BINARY", false); - assertStartsWith("aBcDe", "aBc", "UTF8_BINARY", true); - assertStartsWith("aBcDe", "abcde", "UNICODE", false); - assertStartsWith("aBcDe", "aBcDe", "UNICODE", true); - assertStartsWith("aBcDe", "abc", "UTF8_LCASE", true); - assertStartsWith("aBcDe", "ABC", "UTF8_LCASE", true); - assertStartsWith("aBcDe", "abcde", "UNICODE_CI", true); - assertStartsWith("aBcDe", "AbCdE", "UNICODE_CI", true); + assertStartsWith("aBcDe", "abc", UTF8_BINARY, false); + assertStartsWith("aBcDe", "aBc", UTF8_BINARY, true); + assertStartsWith("aBcDe", "abcde", UNICODE, false); + assertStartsWith("aBcDe", "aBcDe", UNICODE, true); + assertStartsWith("aBcDe", "abc", UTF8_LCASE, true); + assertStartsWith("aBcDe", "ABC", UTF8_LCASE, true); + assertStartsWith("aBcDe", "abcde", UNICODE_CI, true); + assertStartsWith("aBcDe", "AbCdE", UNICODE_CI, true); // Accent variation. - assertStartsWith("aBcDe", "abć", "UTF8_BINARY", false); - assertStartsWith("aBcDe", "aBć", "UTF8_BINARY", false); - assertStartsWith("aBcDe", "abćde", "UNICODE", false); - assertStartsWith("aBcDe", "aBćDe", "UNICODE", false); - assertStartsWith("aBcDe", "abć", "UTF8_LCASE", false); - assertStartsWith("aBcDe", "ABĆ", "UTF8_LCASE", false); - assertStartsWith("aBcDe", "abćde", "UNICODE_CI", false); - assertStartsWith("aBcDe", "AbĆdE", "UNICODE_CI", false); + assertStartsWith("aBcDe", "abć", UTF8_BINARY, false); + assertStartsWith("aBcDe", "aBć", UTF8_BINARY, false); + assertStartsWith("aBcDe", "abćde", UNICODE, false); + assertStartsWith("aBcDe", "aBćDe", UNICODE, false); + assertStartsWith("aBcDe", "abć", UTF8_LCASE, false); + assertStartsWith("aBcDe", "ABĆ", UTF8_LCASE, false); + assertStartsWith("aBcDe", "abćde", UNICODE_CI, false); + assertStartsWith("aBcDe", "AbĆdE", UNICODE_CI, false); // One-to-many case mapping (e.g. Turkish dotted I). - assertStartsWith("i\u0307", "i", "UNICODE_CI", false); - assertStartsWith("i\u0307", "İ", "UNICODE_CI", true); - assertStartsWith("İ", "i", "UNICODE_CI", false); - assertStartsWith("İİİ", "i̇i̇", "UNICODE_CI", true); - assertStartsWith("İİİ", "i̇i", "UNICODE_CI", false); - assertStartsWith("İi̇İ", "i̇İ", "UNICODE_CI", true); - assertStartsWith("i̇İi̇i̇", "İi̇İi", "UNICODE_CI", false); - assertStartsWith("i̇onic", "io", "UNICODE_CI", false); - assertStartsWith("i̇onic", "Io", "UNICODE_CI", false); - assertStartsWith("i̇onic", "i\u0307o", "UNICODE_CI", true); - assertStartsWith("i̇onic", "İo", "UNICODE_CI", true); - assertStartsWith("İonic", "io", "UNICODE_CI", false); - assertStartsWith("İonic", "Io", "UNICODE_CI", false); - assertStartsWith("İonic", "i\u0307o", "UNICODE_CI", true); - assertStartsWith("İonic", "İo", "UNICODE_CI", true); - assertStartsWith("i\u0307", "i", "UTF8_LCASE", true); // != UNICODE_CI - assertStartsWith("i\u0307", "İ", "UTF8_LCASE", true); - assertStartsWith("İ", "i", "UTF8_LCASE", false); - assertStartsWith("İİİ", "i̇i̇", "UTF8_LCASE", true); - assertStartsWith("İİİ", "i̇i", "UTF8_LCASE", false); - assertStartsWith("İi̇İ", "i̇İ", "UTF8_LCASE", true); - assertStartsWith("i̇İi̇i̇", "İi̇İi", "UTF8_LCASE", true); // != UNICODE_CI - assertStartsWith("i̇onic", "io", "UTF8_LCASE", false); - assertStartsWith("i̇onic", "Io", "UTF8_LCASE", false); - assertStartsWith("i̇onic", "i\u0307o", "UTF8_LCASE", true); - assertStartsWith("i̇onic", "İo", "UTF8_LCASE", true); - assertStartsWith("İonic", "io", "UTF8_LCASE", false); - assertStartsWith("İonic", "Io", "UTF8_LCASE", false); - assertStartsWith("İonic", "i\u0307o", "UTF8_LCASE", true); - assertStartsWith("İonic", "İo", "UTF8_LCASE", true); - assertStartsWith("oİ", "oİ", "UTF8_LCASE", true); - assertStartsWith("oİ", "oi̇", "UTF8_LCASE", true); + assertStartsWith("i\u0307", "i", UNICODE_CI, false); + assertStartsWith("i\u0307", "İ", UNICODE_CI, true); + assertStartsWith("İ", "i", UNICODE_CI, false); + assertStartsWith("İİİ", "i̇i̇", UNICODE_CI, true); + assertStartsWith("İİİ", "i̇i", UNICODE_CI, false); + assertStartsWith("İi̇İ", "i̇İ", UNICODE_CI, true); + assertStartsWith("i̇İi̇i̇", "İi̇İi", UNICODE_CI, false); + assertStartsWith("i̇onic", "io", UNICODE_CI, false); + assertStartsWith("i̇onic", "Io", UNICODE_CI, false); + assertStartsWith("i̇onic", "i\u0307o", UNICODE_CI, true); + assertStartsWith("i̇onic", "İo", UNICODE_CI, true); + assertStartsWith("İonic", "io", UNICODE_CI, false); + assertStartsWith("İonic", "Io", UNICODE_CI, false); + assertStartsWith("İonic", "i\u0307o", UNICODE_CI, true); + assertStartsWith("İonic", "İo", UNICODE_CI, true); + assertStartsWith("i\u0307", "i", UTF8_LCASE, true); // != UNICODE_CI + assertStartsWith("i\u0307", "İ", UTF8_LCASE, true); + assertStartsWith("İ", "i", UTF8_LCASE, false); + assertStartsWith("İİİ", "i̇i̇", UTF8_LCASE, true); + assertStartsWith("İİİ", "i̇i", UTF8_LCASE, false); + assertStartsWith("İi̇İ", "i̇İ", UTF8_LCASE, true); + assertStartsWith("i̇İi̇i̇", "İi̇İi", UTF8_LCASE, true); // != UNICODE_CI + assertStartsWith("i̇onic", "io", UTF8_LCASE, false); + assertStartsWith("i̇onic", "Io", UTF8_LCASE, false); + assertStartsWith("i̇onic", "i\u0307o", UTF8_LCASE, true); + assertStartsWith("i̇onic", "İo", UTF8_LCASE, true); + assertStartsWith("İonic", "io", UTF8_LCASE, false); + assertStartsWith("İonic", "Io", UTF8_LCASE, false); + assertStartsWith("İonic", "i\u0307o", UTF8_LCASE, true); + assertStartsWith("İonic", "İo", UTF8_LCASE, true); + assertStartsWith("oİ", "oİ", UTF8_LCASE, true); + assertStartsWith("oİ", "oi̇", UTF8_LCASE, true); // Conditional case mapping (e.g. Greek sigmas). - assertStartsWith("σ", "σ", "UTF8_BINARY", true); - assertStartsWith("σ", "ς", "UTF8_BINARY", false); - assertStartsWith("σ", "Σ", "UTF8_BINARY", false); - assertStartsWith("ς", "σ", "UTF8_BINARY", false); - assertStartsWith("ς", "ς", "UTF8_BINARY", true); - assertStartsWith("ς", "Σ", "UTF8_BINARY", false); - assertStartsWith("Σ", "σ", "UTF8_BINARY", false); - assertStartsWith("Σ", "ς", "UTF8_BINARY", false); - assertStartsWith("Σ", "Σ", "UTF8_BINARY", true); - assertStartsWith("σ", "σ", "UTF8_LCASE", true); - assertStartsWith("σ", "ς", "UTF8_LCASE", true); - assertStartsWith("σ", "Σ", "UTF8_LCASE", true); - assertStartsWith("ς", "σ", "UTF8_LCASE", true); - assertStartsWith("ς", "ς", "UTF8_LCASE", true); - assertStartsWith("ς", "Σ", "UTF8_LCASE", true); - assertStartsWith("Σ", "σ", "UTF8_LCASE", true); - assertStartsWith("Σ", "ς", "UTF8_LCASE", true); - assertStartsWith("Σ", "Σ", "UTF8_LCASE", true); - assertStartsWith("σ", "σ", "UNICODE", true); - assertStartsWith("σ", "ς", "UNICODE", false); - assertStartsWith("σ", "Σ", "UNICODE", false); - assertStartsWith("ς", "σ", "UNICODE", false); - assertStartsWith("ς", "ς", "UNICODE", true); - assertStartsWith("ς", "Σ", "UNICODE", false); - assertStartsWith("Σ", "σ", "UNICODE", false); - assertStartsWith("Σ", "ς", "UNICODE", false); - assertStartsWith("Σ", "Σ", "UNICODE", true); - assertStartsWith("σ", "σ", "UNICODE_CI", true); - assertStartsWith("σ", "ς", "UNICODE_CI", true); - assertStartsWith("σ", "Σ", "UNICODE_CI", true); - assertStartsWith("ς", "σ", "UNICODE_CI", true); - assertStartsWith("ς", "ς", "UNICODE_CI", true); - assertStartsWith("ς", "Σ", "UNICODE_CI", true); - assertStartsWith("Σ", "σ", "UNICODE_CI", true); - assertStartsWith("Σ", "ς", "UNICODE_CI", true); - assertStartsWith("Σ", "Σ", "UNICODE_CI", true); - assertStartsWith("ΣΑΛΑΤΑ", "Σ", "UTF8_BINARY", true); - assertStartsWith("ΣΑΛΑΤΑ", "σ", "UTF8_BINARY", false); - assertStartsWith("ΣΑΛΑΤΑ", "ς", "UTF8_BINARY", false); - assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UTF8_BINARY", false); - assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UTF8_BINARY", false); - assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UTF8_BINARY", false); - assertStartsWith("ΣΑΛΑΤΑ", "Σ", "UTF8_LCASE", true); - assertStartsWith("ΣΑΛΑΤΑ", "σ", "UTF8_LCASE", true); - assertStartsWith("ΣΑΛΑΤΑ", "ς", "UTF8_LCASE", true); - assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UTF8_LCASE", false); - assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UTF8_LCASE", false); - assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UTF8_LCASE", false); - assertStartsWith("ΣΑΛΑΤΑ", "Σ", "UNICODE", true); - assertStartsWith("ΣΑΛΑΤΑ", "σ", "UNICODE", false); - assertStartsWith("ΣΑΛΑΤΑ", "ς", "UNICODE", false); - assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UNICODE", false); - assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UNICODE", false); - assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UNICODE", false); - assertStartsWith("ΣΑΛΑΤΑ", "Σ", "UNICODE_CI", true); - assertStartsWith("ΣΑΛΑΤΑ", "σ", "UNICODE_CI", true); - assertStartsWith("ΣΑΛΑΤΑ", "ς", "UNICODE_CI", true); - assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UNICODE_CI", false); - assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UNICODE_CI", false); - assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UNICODE_CI", false); + assertStartsWith("σ", "σ", UTF8_BINARY, true); + assertStartsWith("σ", "ς", UTF8_BINARY, false); + assertStartsWith("σ", "Σ", UTF8_BINARY, false); + assertStartsWith("ς", "σ", UTF8_BINARY, false); + assertStartsWith("ς", "ς", UTF8_BINARY, true); + assertStartsWith("ς", "Σ", UTF8_BINARY, false); + assertStartsWith("Σ", "σ", UTF8_BINARY, false); + assertStartsWith("Σ", "ς", UTF8_BINARY, false); + assertStartsWith("Σ", "Σ", UTF8_BINARY, true); + assertStartsWith("σ", "σ", UTF8_LCASE, true); + assertStartsWith("σ", "ς", UTF8_LCASE, true); + assertStartsWith("σ", "Σ", UTF8_LCASE, true); + assertStartsWith("ς", "σ", UTF8_LCASE, true); + assertStartsWith("ς", "ς", UTF8_LCASE, true); + assertStartsWith("ς", "Σ", UTF8_LCASE, true); + assertStartsWith("Σ", "σ", UTF8_LCASE, true); + assertStartsWith("Σ", "ς", UTF8_LCASE, true); + assertStartsWith("Σ", "Σ", UTF8_LCASE, true); + assertStartsWith("σ", "σ", UNICODE, true); + assertStartsWith("σ", "ς", UNICODE, false); + assertStartsWith("σ", "Σ", UNICODE, false); + assertStartsWith("ς", "σ", UNICODE, false); + assertStartsWith("ς", "ς", UNICODE, true); + assertStartsWith("ς", "Σ", UNICODE, false); + assertStartsWith("Σ", "σ", UNICODE, false); + assertStartsWith("Σ", "ς", UNICODE, false); + assertStartsWith("Σ", "Σ", UNICODE, true); + assertStartsWith("σ", "σ", UNICODE_CI, true); + assertStartsWith("σ", "ς", UNICODE_CI, true); + assertStartsWith("σ", "Σ", UNICODE_CI, true); + assertStartsWith("ς", "σ", UNICODE_CI, true); + assertStartsWith("ς", "ς", UNICODE_CI, true); + assertStartsWith("ς", "Σ", UNICODE_CI, true); + assertStartsWith("Σ", "σ", UNICODE_CI, true); + assertStartsWith("Σ", "ς", UNICODE_CI, true); + assertStartsWith("Σ", "Σ", UNICODE_CI, true); + assertStartsWith("ΣΑΛΑΤΑ", "Σ", UTF8_BINARY, true); + assertStartsWith("ΣΑΛΑΤΑ", "σ", UTF8_BINARY, false); + assertStartsWith("ΣΑΛΑΤΑ", "ς", UTF8_BINARY, false); + assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", UTF8_BINARY, false); + assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", UTF8_BINARY, false); + assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", UTF8_BINARY, false); + assertStartsWith("ΣΑΛΑΤΑ", "Σ", UTF8_LCASE, true); + assertStartsWith("ΣΑΛΑΤΑ", "σ", UTF8_LCASE, true); + assertStartsWith("ΣΑΛΑΤΑ", "ς", UTF8_LCASE, true); + assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", UTF8_LCASE, false); + assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", UTF8_LCASE, false); + assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", UTF8_LCASE, false); + assertStartsWith("ΣΑΛΑΤΑ", "Σ", UNICODE, true); + assertStartsWith("ΣΑΛΑΤΑ", "σ", UNICODE, false); + assertStartsWith("ΣΑΛΑΤΑ", "ς", UNICODE, false); + assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", UNICODE, false); + assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", UNICODE, false); + assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", UNICODE, false); + assertStartsWith("ΣΑΛΑΤΑ", "Σ", UNICODE_CI, true); + assertStartsWith("ΣΑΛΑΤΑ", "σ", UNICODE_CI, true); + assertStartsWith("ΣΑΛΑΤΑ", "ς", UNICODE_CI, true); + assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", UNICODE_CI, false); + assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", UNICODE_CI, false); + assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", UNICODE_CI, false); // Surrogate pairs. - assertStartsWith("a🙃b🙃c", "x", "UTF8_BINARY", false); - assertStartsWith("a🙃b🙃c", "x", "UTF8_LCASE", false); - assertStartsWith("a🙃b🙃c", "x", "UNICODE", false); - assertStartsWith("a🙃b🙃c", "x", "UNICODE_CI", false); - assertStartsWith("a🙃b🙃c", "b", "UTF8_BINARY", false); - assertStartsWith("a🙃b🙃c", "b", "UTF8_LCASE", false); - assertStartsWith("a🙃b🙃c", "b", "UNICODE", false); - assertStartsWith("a🙃b🙃c", "b", "UNICODE_CI", false); - assertStartsWith("a🙃b🙃c", "a🙃b", "UTF8_BINARY", true); - assertStartsWith("a🙃b🙃c", "a🙃b", "UTF8_LCASE", true); - assertStartsWith("a🙃b🙃c", "a🙃b", "UNICODE", true); - assertStartsWith("a🙃b🙃c", "a🙃b", "UNICODE_CI", true); - assertStartsWith("a🙃b🙃c", "b🙃c", "UTF8_BINARY", false); - assertStartsWith("a🙃b🙃c", "b🙃c", "UTF8_LCASE", false); - assertStartsWith("a🙃b🙃c", "b🙃c", "UNICODE", false); - assertStartsWith("a🙃b🙃c", "b🙃c", "UNICODE_CI", false); - assertStartsWith("a🙃b🙃c", "a🙃b🙃c", "UTF8_BINARY", true); - assertStartsWith("a🙃b🙃c", "a🙃b🙃c", "UTF8_LCASE", true); - assertStartsWith("a🙃b🙃c", "a🙃b🙃c", "UNICODE", true); - assertStartsWith("a🙃b🙃c", "a🙃b🙃c", "UNICODE_CI", true); - assertStartsWith("😀😆😃😄", "😄😆", "UTF8_BINARY", false); - assertStartsWith("😀😆😃😄", "😄😆", "UTF8_LCASE", false); - assertStartsWith("😀😆😃😄", "😄😆", "UNICODE", false); - assertStartsWith("😀😆😃😄", "😄😆", "UNICODE_CI", false); - assertStartsWith("😀😆😃😄", "😆😃", "UTF8_BINARY", false); - assertStartsWith("😀😆😃😄", "😆😃", "UTF8_LCASE", false); - assertStartsWith("😀😆😃😄", "😆😃", "UNICODE", false); - assertStartsWith("😀😆😃😄", "😆😃", "UNICODE_CI", false); - assertStartsWith("😀😆😃😄", "😀😆", "UTF8_BINARY", true); - assertStartsWith("😀😆😃😄", "😀😆", "UTF8_LCASE", true); - assertStartsWith("😀😆😃😄", "😀😆", "UNICODE", true); - assertStartsWith("😀😆😃😄", "😀😆", "UNICODE_CI", true); - assertStartsWith("😀😆😃😄", "😃😄", "UTF8_BINARY", false); - assertStartsWith("😀😆😃😄", "😃😄", "UTF8_LCASE", false); - assertStartsWith("😀😆😃😄", "😃😄", "UNICODE", false); - assertStartsWith("😀😆😃😄", "😃😄", "UNICODE_CI", false); - assertStartsWith("😀😆😃😄", "😀😆😃😄", "UTF8_BINARY", true); - assertStartsWith("😀😆😃😄", "😀😆😃😄", "UTF8_LCASE", true); - assertStartsWith("😀😆😃😄", "😀😆😃😄", "UNICODE", true); - assertStartsWith("😀😆😃😄", "😀😆😃😄", "UNICODE_CI", true); - assertStartsWith("𐐅", "𐐅", "UTF8_BINARY", true); - assertStartsWith("𐐅", "𐐅", "UTF8_LCASE", true); - assertStartsWith("𐐅", "𐐅", "UNICODE", true); - assertStartsWith("𐐅", "𐐅", "UNICODE_CI", true); - assertStartsWith("𐐅", "𐐭", "UTF8_BINARY", false); - assertStartsWith("𐐅", "𐐭", "UTF8_LCASE", true); - assertStartsWith("𐐅", "𐐭", "UNICODE", false); - assertStartsWith("𐐅", "𐐭", "UNICODE_CI", true); - assertStartsWith("𝔸", "𝔸", "UTF8_BINARY", true); - assertStartsWith("𝔸", "𝔸", "UTF8_LCASE", true); - assertStartsWith("𝔸", "𝔸", "UNICODE", true); - assertStartsWith("𝔸", "𝔸", "UNICODE_CI", true); + assertStartsWith("a🙃b🙃c", "x", UTF8_BINARY, false); + assertStartsWith("a🙃b🙃c", "x", UTF8_LCASE, false); + assertStartsWith("a🙃b🙃c", "x", UNICODE, false); + assertStartsWith("a🙃b🙃c", "x", UNICODE_CI, false); + assertStartsWith("a🙃b🙃c", "b", UTF8_BINARY, false); + assertStartsWith("a🙃b🙃c", "b", UTF8_LCASE, false); + assertStartsWith("a🙃b🙃c", "b", UNICODE, false); + assertStartsWith("a🙃b🙃c", "b", UNICODE_CI, false); + assertStartsWith("a🙃b🙃c", "a🙃b", UTF8_BINARY, true); + assertStartsWith("a🙃b🙃c", "a🙃b", UTF8_LCASE, true); + assertStartsWith("a🙃b🙃c", "a🙃b", UNICODE, true); + assertStartsWith("a🙃b🙃c", "a🙃b", UNICODE_CI, true); + assertStartsWith("a🙃b🙃c", "b🙃c", UTF8_BINARY, false); + assertStartsWith("a🙃b🙃c", "b🙃c", UTF8_LCASE, false); + assertStartsWith("a🙃b🙃c", "b🙃c", UNICODE, false); + assertStartsWith("a🙃b🙃c", "b🙃c", UNICODE_CI, false); + assertStartsWith("a🙃b🙃c", "a🙃b🙃c", UTF8_BINARY, true); + assertStartsWith("a🙃b🙃c", "a🙃b🙃c", UTF8_LCASE, true); + assertStartsWith("a🙃b🙃c", "a🙃b🙃c", UNICODE, true); + assertStartsWith("a🙃b🙃c", "a🙃b🙃c", UNICODE_CI, true); + assertStartsWith("😀😆😃😄", "😄😆", UTF8_BINARY, false); + assertStartsWith("😀😆😃😄", "😄😆", UTF8_LCASE, false); + assertStartsWith("😀😆😃😄", "😄😆", UNICODE, false); + assertStartsWith("😀😆😃😄", "😄😆", UNICODE_CI, false); + assertStartsWith("😀😆😃😄", "😆😃", UTF8_BINARY, false); + assertStartsWith("😀😆😃😄", "😆😃", UTF8_LCASE, false); + assertStartsWith("😀😆😃😄", "😆😃", UNICODE, false); + assertStartsWith("😀😆😃😄", "😆😃", UNICODE_CI, false); + assertStartsWith("😀😆😃😄", "😀😆", UTF8_BINARY, true); + assertStartsWith("😀😆😃😄", "😀😆", UTF8_LCASE, true); + assertStartsWith("😀😆😃😄", "😀😆", UNICODE, true); + assertStartsWith("😀😆😃😄", "😀😆", UNICODE_CI, true); + assertStartsWith("😀😆😃😄", "😃😄", UTF8_BINARY, false); + assertStartsWith("😀😆😃😄", "😃😄", UTF8_LCASE, false); + assertStartsWith("😀😆😃😄", "😃😄", UNICODE, false); + assertStartsWith("😀😆😃😄", "😃😄", UNICODE_CI, false); + assertStartsWith("😀😆😃😄", "😀😆😃😄", UTF8_BINARY, true); + assertStartsWith("😀😆😃😄", "😀😆😃😄", UTF8_LCASE, true); + assertStartsWith("😀😆😃😄", "😀😆😃😄", UNICODE, true); + assertStartsWith("😀😆😃😄", "😀😆😃😄", UNICODE_CI, true); + assertStartsWith("𐐅", "𐐅", UTF8_BINARY, true); + assertStartsWith("𐐅", "𐐅", UTF8_LCASE, true); + assertStartsWith("𐐅", "𐐅", UNICODE, true); + assertStartsWith("𐐅", "𐐅", UNICODE_CI, true); + assertStartsWith("𐐅", "𐐭", UTF8_BINARY, false); + assertStartsWith("𐐅", "𐐭", UTF8_LCASE, true); + assertStartsWith("𐐅", "𐐭", UNICODE, false); + assertStartsWith("𐐅", "𐐭", UNICODE_CI, true); + assertStartsWith("𝔸", "𝔸", UTF8_BINARY, true); + assertStartsWith("𝔸", "𝔸", UTF8_LCASE, true); + assertStartsWith("𝔸", "𝔸", UNICODE, true); + assertStartsWith("𝔸", "𝔸", UNICODE_CI, true); } /** @@ -806,212 +807,212 @@ public void testEndsWith() throws SparkException { assertEndsWith("Здраво", "Здраво", collationName, true); } // Advanced tests. - assertEndsWith("abcde", "cde", "UTF8_BINARY", true); - assertEndsWith("abcde", "bde", "UTF8_BINARY", false); - assertEndsWith("abcde", "fgh", "UTF8_BINARY", false); - assertEndsWith("abcde", "abcde", "UNICODE", true); - assertEndsWith("abcde", "aBcDe", "UNICODE", false); - assertEndsWith("abcde", "fghij", "UNICODE", false); - assertEndsWith("abcde", "E", "UTF8_LCASE", true); - assertEndsWith("abcde", "AbCdE", "UTF8_LCASE", true); - assertEndsWith("abcde", "X", "UTF8_LCASE", false); - assertEndsWith("abcde", "e", "UNICODE_CI", true); - assertEndsWith("abcde", "CDe", "UNICODE_CI", true); - assertEndsWith("abcde", "bcd", "UNICODE_CI", false); - assertEndsWith("abcde", "123", "UNICODE_CI", false); - assertEndsWith("ab世De", "世De", "UTF8_BINARY", true); - assertEndsWith("ab世De", "世dE", "UTF8_BINARY", false); - assertEndsWith("äbćδe", "ćδe", "UTF8_BINARY", true); - assertEndsWith("äbćδe", "cΔé", "UTF8_BINARY", false); - assertEndsWith("ab世De", "ab世De", "UNICODE", true); - assertEndsWith("ab世De", "AB世dE", "UNICODE", false); - assertEndsWith("äbćδe", "äbćδe", "UNICODE", true); - assertEndsWith("äbćδe", "ÄBcΔÉ", "UNICODE", false); - assertEndsWith("ab世De", "世De", "UTF8_LCASE", true); - assertEndsWith("ab世De", "世dE", "UTF8_LCASE", true); - assertEndsWith("äbćδe", "ćδe", "UTF8_LCASE", true); - assertEndsWith("äbćδe", "cδE", "UTF8_LCASE", false); - assertEndsWith("ab世De", "ab世De", "UNICODE_CI", true); - assertEndsWith("ab世De", "AB世dE", "UNICODE_CI", true); - assertEndsWith("äbćδe", "ÄbćδE", "UNICODE_CI", true); - assertEndsWith("äbćδe", "ÄBcΔÉ", "UNICODE_CI", false); - assertEndsWith("The Kelvin", "Kelvin", "UTF8_LCASE", true); - assertEndsWith("The Kelvin", "Kelvin", "UTF8_LCASE", true); - assertEndsWith("The KKelvin", "KKelvin", "UTF8_LCASE", true); - assertEndsWith("The 2 Kelvin", "2 Kelvin", "UTF8_LCASE", true); - assertEndsWith("The 2 Kelvin", "2 Kelvin", "UTF8_LCASE", true); - assertEndsWith("The KKelvin", "KKelvin,", "UTF8_LCASE", false); + assertEndsWith("abcde", "cde", UTF8_BINARY, true); + assertEndsWith("abcde", "bde", UTF8_BINARY, false); + assertEndsWith("abcde", "fgh", UTF8_BINARY, false); + assertEndsWith("abcde", "abcde", UNICODE, true); + assertEndsWith("abcde", "aBcDe", UNICODE, false); + assertEndsWith("abcde", "fghij", UNICODE, false); + assertEndsWith("abcde", "E", UTF8_LCASE, true); + assertEndsWith("abcde", "AbCdE", UTF8_LCASE, true); + assertEndsWith("abcde", "X", UTF8_LCASE, false); + assertEndsWith("abcde", "e", UNICODE_CI, true); + assertEndsWith("abcde", "CDe", UNICODE_CI, true); + assertEndsWith("abcde", "bcd", UNICODE_CI, false); + assertEndsWith("abcde", "123", UNICODE_CI, false); + assertEndsWith("ab世De", "世De", UTF8_BINARY, true); + assertEndsWith("ab世De", "世dE", UTF8_BINARY, false); + assertEndsWith("äbćδe", "ćδe", UTF8_BINARY, true); + assertEndsWith("äbćδe", "cΔé", UTF8_BINARY, false); + assertEndsWith("ab世De", "ab世De", UNICODE, true); + assertEndsWith("ab世De", "AB世dE", UNICODE, false); + assertEndsWith("äbćδe", "äbćδe", UNICODE, true); + assertEndsWith("äbćδe", "ÄBcΔÉ", UNICODE, false); + assertEndsWith("ab世De", "世De", UTF8_LCASE, true); + assertEndsWith("ab世De", "世dE", UTF8_LCASE, true); + assertEndsWith("äbćδe", "ćδe", UTF8_LCASE, true); + assertEndsWith("äbćδe", "cδE", UTF8_LCASE, false); + assertEndsWith("ab世De", "ab世De", UNICODE_CI, true); + assertEndsWith("ab世De", "AB世dE", UNICODE_CI, true); + assertEndsWith("äbćδe", "ÄbćδE", UNICODE_CI, true); + assertEndsWith("äbćδe", "ÄBcΔÉ", UNICODE_CI, false); + assertEndsWith("The Kelvin", "Kelvin", UTF8_LCASE, true); + assertEndsWith("The Kelvin", "Kelvin", UTF8_LCASE, true); + assertEndsWith("The KKelvin", "KKelvin", UTF8_LCASE, true); + assertEndsWith("The 2 Kelvin", "2 Kelvin", UTF8_LCASE, true); + assertEndsWith("The 2 Kelvin", "2 Kelvin", UTF8_LCASE, true); + assertEndsWith("The KKelvin", "KKelvin,", UTF8_LCASE, false); assertEndsWith("Ћевапчићи", "цици", "sr_Cyrl_CI_AI", false); assertEndsWith("Ћевапчићи", "чИЋи", "sr_Cyrl_CI_AI", true); assertEndsWith("Ćevapčići", "cici", "SR_CI", false); assertEndsWith("Ćevapčići", "cici", "SR_CI_AI", true); assertEndsWith("Ćevapčići", "čići", "SR", true); // Case variation. - assertEndsWith("aBcDe", "cde", "UTF8_BINARY", false); - assertEndsWith("aBcDe", "cDe", "UTF8_BINARY", true); - assertEndsWith("aBcDe", "abcde", "UNICODE", false); - assertEndsWith("aBcDe", "aBcDe", "UNICODE", true); - assertEndsWith("aBcDe", "cde", "UTF8_LCASE", true); - assertEndsWith("aBcDe", "CDE", "UTF8_LCASE", true); - assertEndsWith("aBcDe", "abcde", "UNICODE_CI", true); - assertEndsWith("aBcDe", "AbCdE", "UNICODE_CI", true); + assertEndsWith("aBcDe", "cde", UTF8_BINARY, false); + assertEndsWith("aBcDe", "cDe", UTF8_BINARY, true); + assertEndsWith("aBcDe", "abcde", UNICODE, false); + assertEndsWith("aBcDe", "aBcDe", UNICODE, true); + assertEndsWith("aBcDe", "cde", UTF8_LCASE, true); + assertEndsWith("aBcDe", "CDE", UTF8_LCASE, true); + assertEndsWith("aBcDe", "abcde", UNICODE_CI, true); + assertEndsWith("aBcDe", "AbCdE", UNICODE_CI, true); // Accent variation. - assertEndsWith("aBcDe", "ćde", "UTF8_BINARY", false); - assertEndsWith("aBcDe", "ćDe", "UTF8_BINARY", false); - assertEndsWith("aBcDe", "abćde", "UNICODE", false); - assertEndsWith("aBcDe", "aBćDe", "UNICODE", false); - assertEndsWith("aBcDe", "ćde", "UTF8_LCASE", false); - assertEndsWith("aBcDe", "ĆDE", "UTF8_LCASE", false); - assertEndsWith("aBcDe", "abćde", "UNICODE_CI", false); - assertEndsWith("aBcDe", "AbĆdE", "UNICODE_CI", false); + assertEndsWith("aBcDe", "ćde", UTF8_BINARY, false); + assertEndsWith("aBcDe", "ćDe", UTF8_BINARY, false); + assertEndsWith("aBcDe", "abćde", UNICODE, false); + assertEndsWith("aBcDe", "aBćDe", UNICODE, false); + assertEndsWith("aBcDe", "ćde", UTF8_LCASE, false); + assertEndsWith("aBcDe", "ĆDE", UTF8_LCASE, false); + assertEndsWith("aBcDe", "abćde", UNICODE_CI, false); + assertEndsWith("aBcDe", "AbĆdE", UNICODE_CI, false); // One-to-many case mapping (e.g. Turkish dotted I). - assertEndsWith("i\u0307", "\u0307", "UNICODE_CI", false); - assertEndsWith("i\u0307", "İ", "UNICODE_CI", true); - assertEndsWith("İ", "i", "UNICODE_CI", false); - assertEndsWith("İİİ", "i̇i̇", "UNICODE_CI", true); - assertEndsWith("İİİ", "ii̇", "UNICODE_CI", false); - assertEndsWith("İi̇İ", "İi̇", "UNICODE_CI", true); - assertEndsWith("i̇İi̇i̇", "\u0307İi̇İ", "UNICODE_CI", false); - assertEndsWith("the i\u0307o", "io", "UNICODE_CI", false); - assertEndsWith("the i\u0307o", "Io", "UNICODE_CI", false); - assertEndsWith("the i\u0307o", "i\u0307o", "UNICODE_CI", true); - assertEndsWith("the i\u0307o", "İo", "UNICODE_CI", true); - assertEndsWith("the İo", "io", "UNICODE_CI", false); - assertEndsWith("the İo", "Io", "UNICODE_CI", false); - assertEndsWith("the İo", "i\u0307o", "UNICODE_CI", true); - assertEndsWith("the İo", "İo", "UNICODE_CI", true); - assertEndsWith("i\u0307", "\u0307", "UTF8_LCASE", true); // != UNICODE_CI - assertEndsWith("i\u0307", "İ", "UTF8_LCASE", true); - assertEndsWith("İ", "\u0307", "UTF8_LCASE", false); - assertEndsWith("İİİ", "i̇i̇", "UTF8_LCASE", true); - assertEndsWith("İİİ", "ii̇", "UTF8_LCASE", false); - assertEndsWith("İi̇İ", "İi̇", "UTF8_LCASE", true); - assertEndsWith("i̇İi̇i̇", "\u0307İi̇İ", "UTF8_LCASE", true); // != UNICODE_CI - assertEndsWith("i̇İi̇i̇", "\u0307İİ", "UTF8_LCASE", false); - assertEndsWith("the i\u0307o", "io", "UTF8_LCASE", false); - assertEndsWith("the i\u0307o", "Io", "UTF8_LCASE", false); - assertEndsWith("the i\u0307o", "i\u0307o", "UTF8_LCASE", true); - assertEndsWith("the i\u0307o", "İo", "UTF8_LCASE", true); - assertEndsWith("the İo", "io", "UTF8_LCASE", false); - assertEndsWith("the İo", "Io", "UTF8_LCASE", false); - assertEndsWith("the İo", "i\u0307o", "UTF8_LCASE", true); - assertEndsWith("the İo", "İo", "UTF8_LCASE", true); - assertEndsWith("İo", "İo", "UTF8_LCASE", true); - assertEndsWith("İo", "i̇o", "UTF8_LCASE", true); + assertEndsWith("i\u0307", "\u0307", UNICODE_CI, false); + assertEndsWith("i\u0307", "İ", UNICODE_CI, true); + assertEndsWith("İ", "i", UNICODE_CI, false); + assertEndsWith("İİİ", "i̇i̇", UNICODE_CI, true); + assertEndsWith("İİİ", "ii̇", UNICODE_CI, false); + assertEndsWith("İi̇İ", "İi̇", UNICODE_CI, true); + assertEndsWith("i̇İi̇i̇", "\u0307İi̇İ", UNICODE_CI, false); + assertEndsWith("the i\u0307o", "io", UNICODE_CI, false); + assertEndsWith("the i\u0307o", "Io", UNICODE_CI, false); + assertEndsWith("the i\u0307o", "i\u0307o", UNICODE_CI, true); + assertEndsWith("the i\u0307o", "İo", UNICODE_CI, true); + assertEndsWith("the İo", "io", UNICODE_CI, false); + assertEndsWith("the İo", "Io", UNICODE_CI, false); + assertEndsWith("the İo", "i\u0307o", UNICODE_CI, true); + assertEndsWith("the İo", "İo", UNICODE_CI, true); + assertEndsWith("i\u0307", "\u0307", UTF8_LCASE, true); // != UNICODE_CI + assertEndsWith("i\u0307", "İ", UTF8_LCASE, true); + assertEndsWith("İ", "\u0307", UTF8_LCASE, false); + assertEndsWith("İİİ", "i̇i̇", UTF8_LCASE, true); + assertEndsWith("İİİ", "ii̇", UTF8_LCASE, false); + assertEndsWith("İi̇İ", "İi̇", UTF8_LCASE, true); + assertEndsWith("i̇İi̇i̇", "\u0307İi̇İ", UTF8_LCASE, true); // != UNICODE_CI + assertEndsWith("i̇İi̇i̇", "\u0307İİ", UTF8_LCASE, false); + assertEndsWith("the i\u0307o", "io", UTF8_LCASE, false); + assertEndsWith("the i\u0307o", "Io", UTF8_LCASE, false); + assertEndsWith("the i\u0307o", "i\u0307o", UTF8_LCASE, true); + assertEndsWith("the i\u0307o", "İo", UTF8_LCASE, true); + assertEndsWith("the İo", "io", UTF8_LCASE, false); + assertEndsWith("the İo", "Io", UTF8_LCASE, false); + assertEndsWith("the İo", "i\u0307o", UTF8_LCASE, true); + assertEndsWith("the İo", "İo", UTF8_LCASE, true); + assertEndsWith("İo", "İo", UTF8_LCASE, true); + assertEndsWith("İo", "i̇o", UTF8_LCASE, true); // Conditional case mapping (e.g. Greek sigmas). - assertEndsWith("σ", "σ", "UTF8_BINARY", true); - assertEndsWith("σ", "ς", "UTF8_BINARY", false); - assertEndsWith("σ", "Σ", "UTF8_BINARY", false); - assertEndsWith("ς", "σ", "UTF8_BINARY", false); - assertEndsWith("ς", "ς", "UTF8_BINARY", true); - assertEndsWith("ς", "Σ", "UTF8_BINARY", false); - assertEndsWith("Σ", "σ", "UTF8_BINARY", false); - assertEndsWith("Σ", "ς", "UTF8_BINARY", false); - assertEndsWith("Σ", "Σ", "UTF8_BINARY", true); - assertEndsWith("σ", "σ", "UTF8_LCASE", true); - assertEndsWith("σ", "ς", "UTF8_LCASE", true); - assertEndsWith("σ", "Σ", "UTF8_LCASE", true); - assertEndsWith("ς", "σ", "UTF8_LCASE", true); - assertEndsWith("ς", "ς", "UTF8_LCASE", true); - assertEndsWith("ς", "Σ", "UTF8_LCASE", true); - assertEndsWith("Σ", "σ", "UTF8_LCASE", true); - assertEndsWith("Σ", "ς", "UTF8_LCASE", true); - assertEndsWith("Σ", "Σ", "UTF8_LCASE", true); - assertEndsWith("σ", "σ", "UNICODE", true); - assertEndsWith("σ", "ς", "UNICODE", false); - assertEndsWith("σ", "Σ", "UNICODE", false); - assertEndsWith("ς", "σ", "UNICODE", false); - assertEndsWith("ς", "ς", "UNICODE", true); - assertEndsWith("ς", "Σ", "UNICODE", false); - assertEndsWith("Σ", "σ", "UNICODE", false); - assertEndsWith("Σ", "ς", "UNICODE", false); - assertEndsWith("Σ", "Σ", "UNICODE", true); - assertEndsWith("σ", "σ", "UNICODE_CI", true); - assertEndsWith("σ", "ς", "UNICODE_CI", true); - assertEndsWith("σ", "Σ", "UNICODE_CI", true); - assertEndsWith("ς", "σ", "UNICODE_CI", true); - assertEndsWith("ς", "ς", "UNICODE_CI", true); - assertEndsWith("ς", "Σ", "UNICODE_CI", true); - assertEndsWith("Σ", "σ", "UNICODE_CI", true); - assertEndsWith("Σ", "ς", "UNICODE_CI", true); - assertEndsWith("Σ", "Σ", "UNICODE_CI", true); - assertEndsWith("ΣΑΛΑΤΑ", "Σ", "UTF8_BINARY", false); - assertEndsWith("ΣΑΛΑΤΑ", "σ", "UTF8_BINARY", false); - assertEndsWith("ΣΑΛΑΤΑ", "ς", "UTF8_BINARY", false); - assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UTF8_BINARY", true); - assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UTF8_BINARY", false); - assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UTF8_BINARY", false); - assertEndsWith("ΣΑΛΑΤΑ", "Σ", "UTF8_LCASE", false); - assertEndsWith("ΣΑΛΑΤΑ", "σ", "UTF8_LCASE", false); - assertEndsWith("ΣΑΛΑΤΑ", "ς", "UTF8_LCASE", false); - assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UTF8_LCASE", true); - assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UTF8_LCASE", true); - assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UTF8_LCASE", true); - assertEndsWith("ΣΑΛΑΤΑ", "Σ", "UNICODE", false); - assertEndsWith("ΣΑΛΑΤΑ", "σ", "UNICODE", false); - assertEndsWith("ΣΑΛΑΤΑ", "ς", "UNICODE", false); - assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UNICODE", true); - assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UNICODE", false); - assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UNICODE", false); - assertEndsWith("ΣΑΛΑΤΑ", "Σ", "UNICODE_CI", false); - assertEndsWith("ΣΑΛΑΤΑ", "σ", "UNICODE_CI", false); - assertEndsWith("ΣΑΛΑΤΑ", "ς", "UNICODE_CI", false); - assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UNICODE_CI", true); - assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UNICODE_CI", true); - assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UNICODE_CI", true); + assertEndsWith("σ", "σ", UTF8_BINARY, true); + assertEndsWith("σ", "ς", UTF8_BINARY, false); + assertEndsWith("σ", "Σ", UTF8_BINARY, false); + assertEndsWith("ς", "σ", UTF8_BINARY, false); + assertEndsWith("ς", "ς", UTF8_BINARY, true); + assertEndsWith("ς", "Σ", UTF8_BINARY, false); + assertEndsWith("Σ", "σ", UTF8_BINARY, false); + assertEndsWith("Σ", "ς", UTF8_BINARY, false); + assertEndsWith("Σ", "Σ", UTF8_BINARY, true); + assertEndsWith("σ", "σ", UTF8_LCASE, true); + assertEndsWith("σ", "ς", UTF8_LCASE, true); + assertEndsWith("σ", "Σ", UTF8_LCASE, true); + assertEndsWith("ς", "σ", UTF8_LCASE, true); + assertEndsWith("ς", "ς", UTF8_LCASE, true); + assertEndsWith("ς", "Σ", UTF8_LCASE, true); + assertEndsWith("Σ", "σ", UTF8_LCASE, true); + assertEndsWith("Σ", "ς", UTF8_LCASE, true); + assertEndsWith("Σ", "Σ", UTF8_LCASE, true); + assertEndsWith("σ", "σ", UNICODE, true); + assertEndsWith("σ", "ς", UNICODE, false); + assertEndsWith("σ", "Σ", UNICODE, false); + assertEndsWith("ς", "σ", UNICODE, false); + assertEndsWith("ς", "ς", UNICODE, true); + assertEndsWith("ς", "Σ", UNICODE, false); + assertEndsWith("Σ", "σ", UNICODE, false); + assertEndsWith("Σ", "ς", UNICODE, false); + assertEndsWith("Σ", "Σ", UNICODE, true); + assertEndsWith("σ", "σ", UNICODE_CI, true); + assertEndsWith("σ", "ς", UNICODE_CI, true); + assertEndsWith("σ", "Σ", UNICODE_CI, true); + assertEndsWith("ς", "σ", UNICODE_CI, true); + assertEndsWith("ς", "ς", UNICODE_CI, true); + assertEndsWith("ς", "Σ", UNICODE_CI, true); + assertEndsWith("Σ", "σ", UNICODE_CI, true); + assertEndsWith("Σ", "ς", UNICODE_CI, true); + assertEndsWith("Σ", "Σ", UNICODE_CI, true); + assertEndsWith("ΣΑΛΑΤΑ", "Σ", UTF8_BINARY, false); + assertEndsWith("ΣΑΛΑΤΑ", "σ", UTF8_BINARY, false); + assertEndsWith("ΣΑΛΑΤΑ", "ς", UTF8_BINARY, false); + assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", UTF8_BINARY, true); + assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", UTF8_BINARY, false); + assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", UTF8_BINARY, false); + assertEndsWith("ΣΑΛΑΤΑ", "Σ", UTF8_LCASE, false); + assertEndsWith("ΣΑΛΑΤΑ", "σ", UTF8_LCASE, false); + assertEndsWith("ΣΑΛΑΤΑ", "ς", UTF8_LCASE, false); + assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", UTF8_LCASE, true); + assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", UTF8_LCASE, true); + assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", UTF8_LCASE, true); + assertEndsWith("ΣΑΛΑΤΑ", "Σ", UNICODE, false); + assertEndsWith("ΣΑΛΑΤΑ", "σ", UNICODE, false); + assertEndsWith("ΣΑΛΑΤΑ", "ς", UNICODE, false); + assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", UNICODE, true); + assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", UNICODE, false); + assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", UNICODE, false); + assertEndsWith("ΣΑΛΑΤΑ", "Σ", UNICODE_CI, false); + assertEndsWith("ΣΑΛΑΤΑ", "σ", UNICODE_CI, false); + assertEndsWith("ΣΑΛΑΤΑ", "ς", UNICODE_CI, false); + assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", UNICODE_CI, true); + assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", UNICODE_CI, true); + assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", UNICODE_CI, true); // Surrogate pairs. - assertEndsWith("a🙃b🙃c", "x", "UTF8_BINARY", false); - assertEndsWith("a🙃b🙃c", "x", "UTF8_LCASE", false); - assertEndsWith("a🙃b🙃c", "x", "UNICODE", false); - assertEndsWith("a🙃b🙃c", "x", "UNICODE_CI", false); - assertEndsWith("a🙃b🙃c", "b", "UTF8_BINARY", false); - assertEndsWith("a🙃b🙃c", "b", "UTF8_LCASE", false); - assertEndsWith("a🙃b🙃c", "b", "UNICODE", false); - assertEndsWith("a🙃b🙃c", "b", "UNICODE_CI", false); - assertEndsWith("a🙃b🙃c", "a🙃b", "UTF8_BINARY", false); - assertEndsWith("a🙃b🙃c", "a🙃b", "UTF8_LCASE", false); - assertEndsWith("a🙃b🙃c", "a🙃b", "UNICODE", false); - assertEndsWith("a🙃b🙃c", "a🙃b", "UNICODE_CI", false); - assertEndsWith("a🙃b🙃c", "b🙃c", "UTF8_BINARY", true); - assertEndsWith("a🙃b🙃c", "b🙃c", "UTF8_LCASE", true); - assertEndsWith("a🙃b🙃c", "b🙃c", "UNICODE", true); - assertEndsWith("a🙃b🙃c", "b🙃c", "UNICODE_CI", true); - assertEndsWith("a🙃b🙃c", "a🙃b🙃c", "UTF8_BINARY", true); - assertEndsWith("a🙃b🙃c", "a🙃b🙃c", "UTF8_LCASE", true); - assertEndsWith("a🙃b🙃c", "a🙃b🙃c", "UNICODE", true); - assertEndsWith("a🙃b🙃c", "a🙃b🙃c", "UNICODE_CI", true); - assertEndsWith("😀😆😃😄", "😄😆", "UTF8_BINARY", false); - assertEndsWith("😀😆😃😄", "😄😆", "UTF8_LCASE", false); - assertEndsWith("😀😆😃😄", "😄😆", "UNICODE", false); - assertEndsWith("😀😆😃😄", "😄😆", "UNICODE_CI", false); - assertEndsWith("😀😆😃😄", "😆😃", "UTF8_BINARY", false); - assertEndsWith("😀😆😃😄", "😆😃", "UTF8_LCASE", false); - assertEndsWith("😀😆😃😄", "😆😃", "UNICODE", false); - assertEndsWith("😀😆😃😄", "😆😃", "UNICODE_CI", false); - assertEndsWith("😀😆😃😄", "😀😆", "UTF8_BINARY", false); - assertEndsWith("😀😆😃😄", "😀😆", "UTF8_LCASE", false); - assertEndsWith("😀😆😃😄", "😀😆", "UNICODE", false); - assertEndsWith("😀😆😃😄", "😀😆", "UNICODE_CI", false); - assertEndsWith("😀😆😃😄", "😃😄", "UTF8_BINARY", true); - assertEndsWith("😀😆😃😄", "😃😄", "UTF8_LCASE", true); - assertEndsWith("😀😆😃😄", "😃😄", "UNICODE", true); - assertEndsWith("😀😆😃😄", "😃😄", "UNICODE_CI", true); - assertEndsWith("😀😆😃😄", "😀😆😃😄", "UTF8_BINARY", true); - assertEndsWith("😀😆😃😄", "😀😆😃😄", "UTF8_LCASE", true); - assertEndsWith("😀😆😃😄", "😀😆😃😄", "UNICODE", true); - assertEndsWith("😀😆😃😄", "😀😆😃😄", "UNICODE_CI", true); - assertEndsWith("𐐅", "𐐅", "UTF8_BINARY", true); - assertEndsWith("𐐅", "𐐅", "UTF8_LCASE", true); - assertEndsWith("𐐅", "𐐅", "UNICODE", true); - assertEndsWith("𐐅", "𐐅", "UNICODE_CI", true); - assertEndsWith("𐐅", "𐐭", "UTF8_BINARY", false); - assertEndsWith("𐐅", "𐐭", "UTF8_LCASE", true); - assertEndsWith("𐐅", "𐐭", "UNICODE", false); - assertEndsWith("𐐅", "𐐭", "UNICODE_CI", true); - assertEndsWith("𝔸", "𝔸", "UTF8_BINARY", true); - assertEndsWith("𝔸", "𝔸", "UTF8_LCASE", true); - assertEndsWith("𝔸", "𝔸", "UNICODE", true); - assertEndsWith("𝔸", "𝔸", "UNICODE_CI", true); + assertEndsWith("a🙃b🙃c", "x", UTF8_BINARY, false); + assertEndsWith("a🙃b🙃c", "x", UTF8_LCASE, false); + assertEndsWith("a🙃b🙃c", "x", UNICODE, false); + assertEndsWith("a🙃b🙃c", "x", UNICODE_CI, false); + assertEndsWith("a🙃b🙃c", "b", UTF8_BINARY, false); + assertEndsWith("a🙃b🙃c", "b", UTF8_LCASE, false); + assertEndsWith("a🙃b🙃c", "b", UNICODE, false); + assertEndsWith("a🙃b🙃c", "b", UNICODE_CI, false); + assertEndsWith("a🙃b🙃c", "a🙃b", UTF8_BINARY, false); + assertEndsWith("a🙃b🙃c", "a🙃b", UTF8_LCASE, false); + assertEndsWith("a🙃b🙃c", "a🙃b", UNICODE, false); + assertEndsWith("a🙃b🙃c", "a🙃b", UNICODE_CI, false); + assertEndsWith("a🙃b🙃c", "b🙃c", UTF8_BINARY, true); + assertEndsWith("a🙃b🙃c", "b🙃c", UTF8_LCASE, true); + assertEndsWith("a🙃b🙃c", "b🙃c", UNICODE, true); + assertEndsWith("a🙃b🙃c", "b🙃c", UNICODE_CI, true); + assertEndsWith("a🙃b🙃c", "a🙃b🙃c", UTF8_BINARY, true); + assertEndsWith("a🙃b🙃c", "a🙃b🙃c", UTF8_LCASE, true); + assertEndsWith("a🙃b🙃c", "a🙃b🙃c", UNICODE, true); + assertEndsWith("a🙃b🙃c", "a🙃b🙃c", UNICODE_CI, true); + assertEndsWith("😀😆😃😄", "😄😆", UTF8_BINARY, false); + assertEndsWith("😀😆😃😄", "😄😆", UTF8_LCASE, false); + assertEndsWith("😀😆😃😄", "😄😆", UNICODE, false); + assertEndsWith("😀😆😃😄", "😄😆", UNICODE_CI, false); + assertEndsWith("😀😆😃😄", "😆😃", UTF8_BINARY, false); + assertEndsWith("😀😆😃😄", "😆😃", UTF8_LCASE, false); + assertEndsWith("😀😆😃😄", "😆😃", UNICODE, false); + assertEndsWith("😀😆😃😄", "😆😃", UNICODE_CI, false); + assertEndsWith("😀😆😃😄", "😀😆", UTF8_BINARY, false); + assertEndsWith("😀😆😃😄", "😀😆", UTF8_LCASE, false); + assertEndsWith("😀😆😃😄", "😀😆", UNICODE, false); + assertEndsWith("😀😆😃😄", "😀😆", UNICODE_CI, false); + assertEndsWith("😀😆😃😄", "😃😄", UTF8_BINARY, true); + assertEndsWith("😀😆😃😄", "😃😄", UTF8_LCASE, true); + assertEndsWith("😀😆😃😄", "😃😄", UNICODE, true); + assertEndsWith("😀😆😃😄", "😃😄", UNICODE_CI, true); + assertEndsWith("😀😆😃😄", "😀😆😃😄", UTF8_BINARY, true); + assertEndsWith("😀😆😃😄", "😀😆😃😄", UTF8_LCASE, true); + assertEndsWith("😀😆😃😄", "😀😆😃😄", UNICODE, true); + assertEndsWith("😀😆😃😄", "😀😆😃😄", UNICODE_CI, true); + assertEndsWith("𐐅", "𐐅", UTF8_BINARY, true); + assertEndsWith("𐐅", "𐐅", UTF8_LCASE, true); + assertEndsWith("𐐅", "𐐅", UNICODE, true); + assertEndsWith("𐐅", "𐐅", UNICODE_CI, true); + assertEndsWith("𐐅", "𐐭", UTF8_BINARY, false); + assertEndsWith("𐐅", "𐐭", UTF8_LCASE, true); + assertEndsWith("𐐅", "𐐭", UNICODE, false); + assertEndsWith("𐐅", "𐐭", UNICODE_CI, true); + assertEndsWith("𝔸", "𝔸", UTF8_BINARY, true); + assertEndsWith("𝔸", "𝔸", UTF8_LCASE, true); + assertEndsWith("𝔸", "𝔸", UNICODE, true); + assertEndsWith("𝔸", "𝔸", UNICODE_CI, true); } /** @@ -1057,158 +1058,158 @@ public void testStringSplitSQL() throws SparkException { var array_AOB = new UTF8String[] { UTF8String.fromString("A𐐅B") }; var array_AoB = new UTF8String[] { UTF8String.fromString("A𐐭B") }; // Empty strings. - assertStringSplitSQL("", "", "UTF8_BINARY", empty_match); - assertStringSplitSQL("abc", "", "UTF8_BINARY", array_abc); - assertStringSplitSQL("", "abc", "UTF8_BINARY", empty_match); - assertStringSplitSQL("", "", "UNICODE", empty_match); - assertStringSplitSQL("abc", "", "UNICODE", array_abc); - assertStringSplitSQL("", "abc", "UNICODE", empty_match); - assertStringSplitSQL("", "", "UTF8_LCASE", empty_match); - assertStringSplitSQL("abc", "", "UTF8_LCASE", array_abc); - assertStringSplitSQL("", "abc", "UTF8_LCASE", empty_match); - assertStringSplitSQL("", "", "UNICODE_CI", empty_match); - assertStringSplitSQL("abc", "", "UNICODE_CI", array_abc); - assertStringSplitSQL("", "abc", "UNICODE_CI", empty_match); + assertStringSplitSQL("", "", UTF8_BINARY, empty_match); + assertStringSplitSQL("abc", "", UTF8_BINARY, array_abc); + assertStringSplitSQL("", "abc", UTF8_BINARY, empty_match); + assertStringSplitSQL("", "", UNICODE, empty_match); + assertStringSplitSQL("abc", "", UNICODE, array_abc); + assertStringSplitSQL("", "abc", UNICODE, empty_match); + assertStringSplitSQL("", "", UTF8_LCASE, empty_match); + assertStringSplitSQL("abc", "", UTF8_LCASE, array_abc); + assertStringSplitSQL("", "abc", UTF8_LCASE, empty_match); + assertStringSplitSQL("", "", UNICODE_CI, empty_match); + assertStringSplitSQL("abc", "", UNICODE_CI, array_abc); + assertStringSplitSQL("", "abc", UNICODE_CI, empty_match); // Basic tests. - assertStringSplitSQL("1a2", "a", "UTF8_BINARY", array_1_2); - assertStringSplitSQL("1a2", "A", "UTF8_BINARY", array_1a2); - assertStringSplitSQL("1a2", "b", "UTF8_BINARY", array_1a2); - assertStringSplitSQL("1a2", "1a2", "UNICODE", full_match); - assertStringSplitSQL("1a2", "1A2", "UNICODE", array_1a2); - assertStringSplitSQL("1a2", "3b4", "UNICODE", array_1a2); - assertStringSplitSQL("1a2", "A", "UTF8_LCASE", array_1_2); - assertStringSplitSQL("1a2", "1A2", "UTF8_LCASE", full_match); - assertStringSplitSQL("1a2", "X", "UTF8_LCASE", array_1a2); - assertStringSplitSQL("1a2", "a", "UNICODE_CI", array_1_2); - assertStringSplitSQL("1a2", "A", "UNICODE_CI", array_1_2); - assertStringSplitSQL("1a2", "1A2", "UNICODE_CI", full_match); - assertStringSplitSQL("1a2", "123", "UNICODE_CI", array_1a2); + assertStringSplitSQL("1a2", "a", UTF8_BINARY, array_1_2); + assertStringSplitSQL("1a2", "A", UTF8_BINARY, array_1a2); + assertStringSplitSQL("1a2", "b", UTF8_BINARY, array_1a2); + assertStringSplitSQL("1a2", "1a2", UNICODE, full_match); + assertStringSplitSQL("1a2", "1A2", UNICODE, array_1a2); + assertStringSplitSQL("1a2", "3b4", UNICODE, array_1a2); + assertStringSplitSQL("1a2", "A", UTF8_LCASE, array_1_2); + assertStringSplitSQL("1a2", "1A2", UTF8_LCASE, full_match); + assertStringSplitSQL("1a2", "X", UTF8_LCASE, array_1a2); + assertStringSplitSQL("1a2", "a", UNICODE_CI, array_1_2); + assertStringSplitSQL("1a2", "A", UNICODE_CI, array_1_2); + assertStringSplitSQL("1a2", "1A2", UNICODE_CI, full_match); + assertStringSplitSQL("1a2", "123", UNICODE_CI, array_1a2); // Advanced tests. - assertStringSplitSQL("äb世De", "b世D", "UTF8_BINARY", array_a_e); - assertStringSplitSQL("äb世De", "B世d", "UTF8_BINARY", array_special); - assertStringSplitSQL("äbćδe", "bćδ", "UTF8_BINARY", array_a_e); - assertStringSplitSQL("äbćδe", "BcΔ", "UTF8_BINARY", array_abcde); - assertStringSplitSQL("äb世De", "äb世De", "UNICODE", full_match); - assertStringSplitSQL("äb世De", "äB世de", "UNICODE", array_special); - assertStringSplitSQL("äbćδe", "äbćδe", "UNICODE", full_match); - assertStringSplitSQL("äbćδe", "ÄBcΔÉ", "UNICODE", array_abcde); - assertStringSplitSQL("äb世De", "b世D", "UTF8_LCASE", array_a_e); - assertStringSplitSQL("äb世De", "B世d", "UTF8_LCASE", array_a_e); - assertStringSplitSQL("äbćδe", "bćδ", "UTF8_LCASE", array_a_e); - assertStringSplitSQL("äbćδe", "BcΔ", "UTF8_LCASE", array_abcde); - assertStringSplitSQL("äb世De", "ab世De", "UNICODE_CI", array_special); - assertStringSplitSQL("äb世De", "AB世dE", "UNICODE_CI", array_special); - assertStringSplitSQL("äbćδe", "ÄbćδE", "UNICODE_CI", full_match); - assertStringSplitSQL("äbćδe", "ÄBcΔÉ", "UNICODE_CI", array_abcde); + assertStringSplitSQL("äb世De", "b世D", UTF8_BINARY, array_a_e); + assertStringSplitSQL("äb世De", "B世d", UTF8_BINARY, array_special); + assertStringSplitSQL("äbćδe", "bćδ", UTF8_BINARY, array_a_e); + assertStringSplitSQL("äbćδe", "BcΔ", UTF8_BINARY, array_abcde); + assertStringSplitSQL("äb世De", "äb世De", UNICODE, full_match); + assertStringSplitSQL("äb世De", "äB世de", UNICODE, array_special); + assertStringSplitSQL("äbćδe", "äbćδe", UNICODE, full_match); + assertStringSplitSQL("äbćδe", "ÄBcΔÉ", UNICODE, array_abcde); + assertStringSplitSQL("äb世De", "b世D", UTF8_LCASE, array_a_e); + assertStringSplitSQL("äb世De", "B世d", UTF8_LCASE, array_a_e); + assertStringSplitSQL("äbćδe", "bćδ", UTF8_LCASE, array_a_e); + assertStringSplitSQL("äbćδe", "BcΔ", UTF8_LCASE, array_abcde); + assertStringSplitSQL("äb世De", "ab世De", UNICODE_CI, array_special); + assertStringSplitSQL("äb世De", "AB世dE", UNICODE_CI, array_special); + assertStringSplitSQL("äbćδe", "ÄbćδE", UNICODE_CI, full_match); + assertStringSplitSQL("äbćδe", "ÄBcΔÉ", UNICODE_CI, array_abcde); // Case variation. - assertStringSplitSQL("AaXbB", "x", "UTF8_BINARY", array_AaXbB); - assertStringSplitSQL("AaXbB", "X", "UTF8_BINARY", array_Aa_bB); - assertStringSplitSQL("AaXbB", "axb", "UNICODE", array_AaXbB); - assertStringSplitSQL("AaXbB", "aXb", "UNICODE", array_A_B); - assertStringSplitSQL("AaXbB", "axb", "UTF8_LCASE", array_A_B); - assertStringSplitSQL("AaXbB", "AXB", "UTF8_LCASE", array_A_B); - assertStringSplitSQL("AaXbB", "axb", "UNICODE_CI", array_A_B); - assertStringSplitSQL("AaXbB", "AxB", "UNICODE_CI", array_A_B); + assertStringSplitSQL("AaXbB", "x", UTF8_BINARY, array_AaXbB); + assertStringSplitSQL("AaXbB", "X", UTF8_BINARY, array_Aa_bB); + assertStringSplitSQL("AaXbB", "axb", UNICODE, array_AaXbB); + assertStringSplitSQL("AaXbB", "aXb", UNICODE, array_A_B); + assertStringSplitSQL("AaXbB", "axb", UTF8_LCASE, array_A_B); + assertStringSplitSQL("AaXbB", "AXB", UTF8_LCASE, array_A_B); + assertStringSplitSQL("AaXbB", "axb", UNICODE_CI, array_A_B); + assertStringSplitSQL("AaXbB", "AxB", UNICODE_CI, array_A_B); // Accent variation. - assertStringSplitSQL("aBcDe", "bćd", "UTF8_BINARY", array_aBcDe); - assertStringSplitSQL("aBcDe", "BćD", "UTF8_BINARY", array_aBcDe); - assertStringSplitSQL("aBcDe", "abćde", "UNICODE", array_aBcDe); - assertStringSplitSQL("aBcDe", "aBćDe", "UNICODE", array_aBcDe); - assertStringSplitSQL("aBcDe", "bćd", "UTF8_LCASE", array_aBcDe); - assertStringSplitSQL("aBcDe", "BĆD", "UTF8_LCASE", array_aBcDe); - assertStringSplitSQL("aBcDe", "abćde", "UNICODE_CI", array_aBcDe); - assertStringSplitSQL("aBcDe", "AbĆdE", "UNICODE_CI", array_aBcDe); + assertStringSplitSQL("aBcDe", "bćd", UTF8_BINARY, array_aBcDe); + assertStringSplitSQL("aBcDe", "BćD", UTF8_BINARY, array_aBcDe); + assertStringSplitSQL("aBcDe", "abćde", UNICODE, array_aBcDe); + assertStringSplitSQL("aBcDe", "aBćDe", UNICODE, array_aBcDe); + assertStringSplitSQL("aBcDe", "bćd", UTF8_LCASE, array_aBcDe); + assertStringSplitSQL("aBcDe", "BĆD", UTF8_LCASE, array_aBcDe); + assertStringSplitSQL("aBcDe", "abćde", UNICODE_CI, array_aBcDe); + assertStringSplitSQL("aBcDe", "AbĆdE", UNICODE_CI, array_aBcDe); // One-to-many case mapping (e.g. Turkish dotted I). - assertStringSplitSQL("İ", "i", "UTF8_BINARY", array_Turkish_uppercase_dotted_I); - assertStringSplitSQL("İ", "i", "UTF8_LCASE", array_Turkish_uppercase_dotted_I); - assertStringSplitSQL("İ", "i", "UNICODE", array_Turkish_uppercase_dotted_I); - assertStringSplitSQL("İ", "i", "UNICODE_CI", array_Turkish_uppercase_dotted_I); - assertStringSplitSQL("İ", "\u0307", "UTF8_BINARY", array_Turkish_uppercase_dotted_I); - assertStringSplitSQL("İ", "\u0307", "UTF8_LCASE", array_Turkish_uppercase_dotted_I); - assertStringSplitSQL("İ", "\u0307", "UNICODE", array_Turkish_uppercase_dotted_I); - assertStringSplitSQL("İ", "\u0307", "UNICODE_CI", array_Turkish_uppercase_dotted_I); - assertStringSplitSQL("i\u0307", "i", "UTF8_BINARY", array_dot); - assertStringSplitSQL("i\u0307", "i", "UTF8_LCASE", array_dot); - assertStringSplitSQL("i\u0307", "i", "UNICODE", array_Turkish_lowercase_dotted_i); - assertStringSplitSQL("i\u0307", "i", "UNICODE_CI", array_Turkish_lowercase_dotted_i); - assertStringSplitSQL("i\u0307", "\u0307", "UTF8_BINARY", array_i); - assertStringSplitSQL("i\u0307", "\u0307", "UTF8_LCASE", array_i); - assertStringSplitSQL("i\u0307", "\u0307", "UNICODE", array_Turkish_lowercase_dotted_i); - assertStringSplitSQL("i\u0307", "\u0307", "UNICODE_CI", array_Turkish_lowercase_dotted_i); - assertStringSplitSQL("AİB", "İ", "UTF8_BINARY", array_A_B); - assertStringSplitSQL("AİB", "İ", "UTF8_LCASE", array_A_B); - assertStringSplitSQL("AİB", "İ", "UNICODE", array_A_B); - assertStringSplitSQL("AİB", "İ", "UNICODE_CI", array_A_B); - assertStringSplitSQL("AİB", "i\u0307", "UTF8_BINARY", array_AIB); - assertStringSplitSQL("AİB", "i\u0307", "UTF8_LCASE", array_A_B); - assertStringSplitSQL("AİB", "i\u0307", "UNICODE", array_AIB); - assertStringSplitSQL("AİB", "i\u0307", "UNICODE_CI", array_A_B); - assertStringSplitSQL("Ai\u0307B", "İ", "UTF8_BINARY", array_AiB); - assertStringSplitSQL("Ai\u0307B", "İ", "UTF8_LCASE", array_A_B); - assertStringSplitSQL("Ai\u0307B", "İ", "UNICODE", array_AiB); - assertStringSplitSQL("Ai\u0307B", "İ", "UNICODE_CI", array_A_B); - assertStringSplitSQL("Ai\u0307B", "i\u0307", "UTF8_BINARY", array_A_B); - assertStringSplitSQL("Ai\u0307B", "i\u0307", "UTF8_LCASE", array_A_B); - assertStringSplitSQL("Ai\u0307B", "i\u0307", "UNICODE", array_A_B); - assertStringSplitSQL("Ai\u0307B", "i\u0307", "UNICODE_CI", array_A_B); + assertStringSplitSQL("İ", "i", UTF8_BINARY, array_Turkish_uppercase_dotted_I); + assertStringSplitSQL("İ", "i", UTF8_LCASE, array_Turkish_uppercase_dotted_I); + assertStringSplitSQL("İ", "i", UNICODE, array_Turkish_uppercase_dotted_I); + assertStringSplitSQL("İ", "i", UNICODE_CI, array_Turkish_uppercase_dotted_I); + assertStringSplitSQL("İ", "\u0307", UTF8_BINARY, array_Turkish_uppercase_dotted_I); + assertStringSplitSQL("İ", "\u0307", UTF8_LCASE, array_Turkish_uppercase_dotted_I); + assertStringSplitSQL("İ", "\u0307", UNICODE, array_Turkish_uppercase_dotted_I); + assertStringSplitSQL("İ", "\u0307", UNICODE_CI, array_Turkish_uppercase_dotted_I); + assertStringSplitSQL("i\u0307", "i", UTF8_BINARY, array_dot); + assertStringSplitSQL("i\u0307", "i", UTF8_LCASE, array_dot); + assertStringSplitSQL("i\u0307", "i", UNICODE, array_Turkish_lowercase_dotted_i); + assertStringSplitSQL("i\u0307", "i", UNICODE_CI, array_Turkish_lowercase_dotted_i); + assertStringSplitSQL("i\u0307", "\u0307", UTF8_BINARY, array_i); + assertStringSplitSQL("i\u0307", "\u0307", UTF8_LCASE, array_i); + assertStringSplitSQL("i\u0307", "\u0307", UNICODE, array_Turkish_lowercase_dotted_i); + assertStringSplitSQL("i\u0307", "\u0307", UNICODE_CI, array_Turkish_lowercase_dotted_i); + assertStringSplitSQL("AİB", "İ", UTF8_BINARY, array_A_B); + assertStringSplitSQL("AİB", "İ", UTF8_LCASE, array_A_B); + assertStringSplitSQL("AİB", "İ", UNICODE, array_A_B); + assertStringSplitSQL("AİB", "İ", UNICODE_CI, array_A_B); + assertStringSplitSQL("AİB", "i\u0307", UTF8_BINARY, array_AIB); + assertStringSplitSQL("AİB", "i\u0307", UTF8_LCASE, array_A_B); + assertStringSplitSQL("AİB", "i\u0307", UNICODE, array_AIB); + assertStringSplitSQL("AİB", "i\u0307", UNICODE_CI, array_A_B); + assertStringSplitSQL("Ai\u0307B", "İ", UTF8_BINARY, array_AiB); + assertStringSplitSQL("Ai\u0307B", "İ", UTF8_LCASE, array_A_B); + assertStringSplitSQL("Ai\u0307B", "İ", UNICODE, array_AiB); + assertStringSplitSQL("Ai\u0307B", "İ", UNICODE_CI, array_A_B); + assertStringSplitSQL("Ai\u0307B", "i\u0307", UTF8_BINARY, array_A_B); + assertStringSplitSQL("Ai\u0307B", "i\u0307", UTF8_LCASE, array_A_B); + assertStringSplitSQL("Ai\u0307B", "i\u0307", UNICODE, array_A_B); + assertStringSplitSQL("Ai\u0307B", "i\u0307", UNICODE_CI, array_A_B); // Conditional case mapping (e.g. Greek sigmas). - assertStringSplitSQL("σ", "σ", "UTF8_BINARY", full_match); - assertStringSplitSQL("σ", "σ", "UTF8_LCASE", full_match); - assertStringSplitSQL("σ", "σ", "UNICODE", full_match); - assertStringSplitSQL("σ", "σ", "UNICODE_CI", full_match); - assertStringSplitSQL("σ", "ς", "UTF8_BINARY", array_small_nonfinal_sigma); - assertStringSplitSQL("σ", "ς", "UTF8_LCASE", full_match); - assertStringSplitSQL("σ", "ς", "UNICODE", array_small_nonfinal_sigma); - assertStringSplitSQL("σ", "ς", "UNICODE_CI", full_match); - assertStringSplitSQL("σ", "Σ", "UTF8_BINARY", array_small_nonfinal_sigma); - assertStringSplitSQL("σ", "Σ", "UTF8_LCASE", full_match); - assertStringSplitSQL("σ", "Σ", "UNICODE", array_small_nonfinal_sigma); - assertStringSplitSQL("σ", "Σ", "UNICODE_CI", full_match); - assertStringSplitSQL("ς", "σ", "UTF8_BINARY", array_small_final_sigma); - assertStringSplitSQL("ς", "σ", "UTF8_LCASE", full_match); - assertStringSplitSQL("ς", "σ", "UNICODE", array_small_final_sigma); - assertStringSplitSQL("ς", "σ", "UNICODE_CI", full_match); - assertStringSplitSQL("ς", "ς", "UTF8_BINARY", full_match); - assertStringSplitSQL("ς", "ς", "UTF8_LCASE", full_match); - assertStringSplitSQL("ς", "ς", "UNICODE", full_match); - assertStringSplitSQL("ς", "ς", "UNICODE_CI", full_match); - assertStringSplitSQL("ς", "Σ", "UTF8_BINARY", array_small_final_sigma); - assertStringSplitSQL("ς", "Σ", "UTF8_LCASE", full_match); - assertStringSplitSQL("ς", "Σ", "UNICODE", array_small_final_sigma); - assertStringSplitSQL("ς", "Σ", "UNICODE_CI", full_match); - assertStringSplitSQL("Σ", "σ", "UTF8_BINARY", array_capital_sigma); - assertStringSplitSQL("Σ", "σ", "UTF8_LCASE", full_match); - assertStringSplitSQL("Σ", "σ", "UNICODE", array_capital_sigma); - assertStringSplitSQL("Σ", "σ", "UNICODE_CI", full_match); - assertStringSplitSQL("Σ", "ς", "UTF8_BINARY", array_capital_sigma); - assertStringSplitSQL("Σ", "ς", "UTF8_LCASE", full_match); - assertStringSplitSQL("Σ", "ς", "UNICODE", array_capital_sigma); - assertStringSplitSQL("Σ", "ς", "UNICODE_CI", full_match); - assertStringSplitSQL("Σ", "Σ", "UTF8_BINARY", full_match); - assertStringSplitSQL("Σ", "Σ", "UTF8_LCASE", full_match); - assertStringSplitSQL("Σ", "Σ", "UNICODE", full_match); - assertStringSplitSQL("Σ", "Σ", "UNICODE_CI", full_match); + assertStringSplitSQL("σ", "σ", UTF8_BINARY, full_match); + assertStringSplitSQL("σ", "σ", UTF8_LCASE, full_match); + assertStringSplitSQL("σ", "σ", UNICODE, full_match); + assertStringSplitSQL("σ", "σ", UNICODE_CI, full_match); + assertStringSplitSQL("σ", "ς", UTF8_BINARY, array_small_nonfinal_sigma); + assertStringSplitSQL("σ", "ς", UTF8_LCASE, full_match); + assertStringSplitSQL("σ", "ς", UNICODE, array_small_nonfinal_sigma); + assertStringSplitSQL("σ", "ς", UNICODE_CI, full_match); + assertStringSplitSQL("σ", "Σ", UTF8_BINARY, array_small_nonfinal_sigma); + assertStringSplitSQL("σ", "Σ", UTF8_LCASE, full_match); + assertStringSplitSQL("σ", "Σ", UNICODE, array_small_nonfinal_sigma); + assertStringSplitSQL("σ", "Σ", UNICODE_CI, full_match); + assertStringSplitSQL("ς", "σ", UTF8_BINARY, array_small_final_sigma); + assertStringSplitSQL("ς", "σ", UTF8_LCASE, full_match); + assertStringSplitSQL("ς", "σ", UNICODE, array_small_final_sigma); + assertStringSplitSQL("ς", "σ", UNICODE_CI, full_match); + assertStringSplitSQL("ς", "ς", UTF8_BINARY, full_match); + assertStringSplitSQL("ς", "ς", UTF8_LCASE, full_match); + assertStringSplitSQL("ς", "ς", UNICODE, full_match); + assertStringSplitSQL("ς", "ς", UNICODE_CI, full_match); + assertStringSplitSQL("ς", "Σ", UTF8_BINARY, array_small_final_sigma); + assertStringSplitSQL("ς", "Σ", UTF8_LCASE, full_match); + assertStringSplitSQL("ς", "Σ", UNICODE, array_small_final_sigma); + assertStringSplitSQL("ς", "Σ", UNICODE_CI, full_match); + assertStringSplitSQL("Σ", "σ", UTF8_BINARY, array_capital_sigma); + assertStringSplitSQL("Σ", "σ", UTF8_LCASE, full_match); + assertStringSplitSQL("Σ", "σ", UNICODE, array_capital_sigma); + assertStringSplitSQL("Σ", "σ", UNICODE_CI, full_match); + assertStringSplitSQL("Σ", "ς", UTF8_BINARY, array_capital_sigma); + assertStringSplitSQL("Σ", "ς", UTF8_LCASE, full_match); + assertStringSplitSQL("Σ", "ς", UNICODE, array_capital_sigma); + assertStringSplitSQL("Σ", "ς", UNICODE_CI, full_match); + assertStringSplitSQL("Σ", "Σ", UTF8_BINARY, full_match); + assertStringSplitSQL("Σ", "Σ", UTF8_LCASE, full_match); + assertStringSplitSQL("Σ", "Σ", UNICODE, full_match); + assertStringSplitSQL("Σ", "Σ", UNICODE_CI, full_match); // Surrogate pairs. - assertStringSplitSQL("a🙃b🙃c", "🙃", "UTF8_BINARY", array_a_b_c); - assertStringSplitSQL("a🙃b🙃c", "🙃", "UTF8_LCASE", array_a_b_c); - assertStringSplitSQL("a🙃b🙃c", "🙃", "UNICODE", array_a_b_c); - assertStringSplitSQL("a🙃b🙃c", "🙃", "UNICODE_CI", array_a_b_c); - assertStringSplitSQL("😀😆😃😄", "😆😃", "UTF8_BINARY", array_emojis); - assertStringSplitSQL("😀😆😃😄", "😆😃", "UTF8_LCASE", array_emojis); - assertStringSplitSQL("😀😆😃😄", "😆😃", "UNICODE", array_emojis); - assertStringSplitSQL("😀😆😃😄", "😆😃", "UNICODE_CI", array_emojis); - assertStringSplitSQL("A𐐅B", "𐐅", "UTF8_BINARY", array_A_B); - assertStringSplitSQL("A𐐅B", "𐐅", "UTF8_LCASE", array_A_B); - assertStringSplitSQL("A𐐅B", "𐐅", "UNICODE", array_A_B); - assertStringSplitSQL("A𐐅B", "𐐅", "UNICODE_CI", array_A_B); - assertStringSplitSQL("A𐐅B", "𐐭", "UTF8_BINARY", array_AOB); - assertStringSplitSQL("A𐐅B", "𐐭", "UTF8_LCASE", array_A_B); - assertStringSplitSQL("A𐐅B", "𐐭", "UNICODE", array_AOB); - assertStringSplitSQL("A𐐅B", "𐐭", "UNICODE_CI", array_A_B); - assertStringSplitSQL("A𐐭B", "𐐅", "UTF8_BINARY", array_AoB); - assertStringSplitSQL("A𐐭B", "𐐅", "UTF8_LCASE", array_A_B); - assertStringSplitSQL("A𐐭B", "𐐅", "UNICODE", array_AoB); - assertStringSplitSQL("A𐐭B", "𐐅", "UNICODE_CI", array_A_B); + assertStringSplitSQL("a🙃b🙃c", "🙃", UTF8_BINARY, array_a_b_c); + assertStringSplitSQL("a🙃b🙃c", "🙃", UTF8_LCASE, array_a_b_c); + assertStringSplitSQL("a🙃b🙃c", "🙃", UNICODE, array_a_b_c); + assertStringSplitSQL("a🙃b🙃c", "🙃", UNICODE_CI, array_a_b_c); + assertStringSplitSQL("😀😆😃😄", "😆😃", UTF8_BINARY, array_emojis); + assertStringSplitSQL("😀😆😃😄", "😆😃", UTF8_LCASE, array_emojis); + assertStringSplitSQL("😀😆😃😄", "😆😃", UNICODE, array_emojis); + assertStringSplitSQL("😀😆😃😄", "😆😃", UNICODE_CI, array_emojis); + assertStringSplitSQL("A𐐅B", "𐐅", UTF8_BINARY, array_A_B); + assertStringSplitSQL("A𐐅B", "𐐅", UTF8_LCASE, array_A_B); + assertStringSplitSQL("A𐐅B", "𐐅", UNICODE, array_A_B); + assertStringSplitSQL("A𐐅B", "𐐅", UNICODE_CI, array_A_B); + assertStringSplitSQL("A𐐅B", "𐐭", UTF8_BINARY, array_AOB); + assertStringSplitSQL("A𐐅B", "𐐭", UTF8_LCASE, array_A_B); + assertStringSplitSQL("A𐐅B", "𐐭", UNICODE, array_AOB); + assertStringSplitSQL("A𐐅B", "𐐭", UNICODE_CI, array_A_B); + assertStringSplitSQL("A𐐭B", "𐐅", UTF8_BINARY, array_AoB); + assertStringSplitSQL("A𐐭B", "𐐅", UTF8_LCASE, array_A_B); + assertStringSplitSQL("A𐐭B", "𐐅", UNICODE, array_AoB); + assertStringSplitSQL("A𐐭B", "𐐅", UNICODE_CI, array_A_B); } /** @@ -1391,156 +1392,156 @@ public void testInitCap() throws SparkException { assertInitCap("θαλασσινος", collationName, "Θαλασσινος"); } // Advanced tests. - assertInitCap("aBćDe", "UTF8_BINARY", "Abćde"); - assertInitCap("aBćDe", "UTF8_LCASE", "Abćde"); - assertInitCap("aBćDe", "UNICODE", "Abćde"); - assertInitCap("aBćDe", "UNICODE_CI", "Abćde"); - assertInitCap("ab世De", "UTF8_BINARY", "Ab世de"); - assertInitCap("ab世De", "UTF8_LCASE", "Ab世De"); - assertInitCap("ab世De", "UNICODE", "Ab世De"); - assertInitCap("ab世De", "UNICODE_CI", "Ab世De"); - assertInitCap("äbćδe", "UTF8_BINARY", "Äbćδe"); - assertInitCap("äbćδe", "UTF8_LCASE", "Äbćδe"); - assertInitCap("äbćδe", "UNICODE", "Äbćδe"); - assertInitCap("äbćδe", "UNICODE_CI", "Äbćδe"); - assertInitCap("ÄBĆΔE", "UTF8_BINARY", "Äbćδe"); - assertInitCap("ÄBĆΔE", "UTF8_LCASE", "Äbćδe"); - assertInitCap("ÄBĆΔE", "UNICODE", "Äbćδe"); - assertInitCap("ÄBĆΔE", "UNICODE_CI", "Äbćδe"); + assertInitCap("aBćDe", UTF8_BINARY, "Abćde"); + assertInitCap("aBćDe", UTF8_LCASE, "Abćde"); + assertInitCap("aBćDe", UNICODE, "Abćde"); + assertInitCap("aBćDe", UNICODE_CI, "Abćde"); + assertInitCap("ab世De", UTF8_BINARY, "Ab世de"); + assertInitCap("ab世De", UTF8_LCASE, "Ab世De"); + assertInitCap("ab世De", UNICODE, "Ab世De"); + assertInitCap("ab世De", UNICODE_CI, "Ab世De"); + assertInitCap("äbćδe", UTF8_BINARY, "Äbćδe"); + assertInitCap("äbćδe", UTF8_LCASE, "Äbćδe"); + assertInitCap("äbćδe", UNICODE, "Äbćδe"); + assertInitCap("äbćδe", UNICODE_CI, "Äbćδe"); + assertInitCap("ÄBĆΔE", UTF8_BINARY, "Äbćδe"); + assertInitCap("ÄBĆΔE", UTF8_LCASE, "Äbćδe"); + assertInitCap("ÄBĆΔE", UNICODE, "Äbćδe"); + assertInitCap("ÄBĆΔE", UNICODE_CI, "Äbćδe"); assertInitCap("êéfgh", "AF_CI_AI", "Êéfgh"); assertInitCap("öoAÄ", "DE_CI_AI", "Öoaä"); // Case-variable character length - assertInitCap("İo", "UTF8_BINARY", "İo", "I\u0307o"); - assertInitCap("İo", "UTF8_LCASE", "İo"); - assertInitCap("İo", "UNICODE", "İo"); - assertInitCap("İo", "UNICODE_CI", "İo"); - assertInitCap("i\u0307o", "UTF8_BINARY", "I\u0307o"); - assertInitCap("i\u0307o", "UTF8_LCASE", "I\u0307o"); - assertInitCap("i\u0307o", "UNICODE", "I\u0307o"); - assertInitCap("i\u0307o", "UNICODE_CI", "I\u0307o"); + assertInitCap("İo", UTF8_BINARY, "İo", "I\u0307o"); + assertInitCap("İo", UTF8_LCASE, "İo"); + assertInitCap("İo", UNICODE, "İo"); + assertInitCap("İo", UNICODE_CI, "İo"); + assertInitCap("i\u0307o", UTF8_BINARY, "I\u0307o"); + assertInitCap("i\u0307o", UTF8_LCASE, "I\u0307o"); + assertInitCap("i\u0307o", UNICODE, "I\u0307o"); + assertInitCap("i\u0307o", UNICODE_CI, "I\u0307o"); // Different possible word boundaries - assertInitCap("aB 世 de", "UTF8_BINARY", "Ab 世 De"); - assertInitCap("aB 世 de", "UTF8_LCASE", "Ab 世 De"); - assertInitCap("aB 世 de", "UNICODE", "Ab 世 De"); - assertInitCap("aB 世 de", "UNICODE_CI", "Ab 世 De"); + assertInitCap("aB 世 de", UTF8_BINARY, "Ab 世 De"); + assertInitCap("aB 世 de", UTF8_LCASE, "Ab 世 De"); + assertInitCap("aB 世 de", UNICODE, "Ab 世 De"); + assertInitCap("aB 世 de", UNICODE_CI, "Ab 世 De"); // One-to-many case mapping (e.g. Turkish dotted I). - assertInitCap("İ", "UTF8_BINARY", "İ", "I\u0307"); - assertInitCap("İ", "UTF8_LCASE", "İ"); - assertInitCap("İ", "UNICODE", "İ"); - assertInitCap("İ", "UNICODE_CI", "İ"); - assertInitCap("I\u0307", "UTF8_BINARY","I\u0307"); - assertInitCap("I\u0307", "UTF8_LCASE","I\u0307"); - assertInitCap("I\u0307", "UNICODE","I\u0307"); - assertInitCap("I\u0307", "UNICODE_CI","I\u0307"); - assertInitCap("İonic", "UTF8_BINARY", "İonic", "I\u0307onic"); - assertInitCap("İonic", "UTF8_LCASE", "İonic"); - assertInitCap("İonic", "UNICODE", "İonic"); - assertInitCap("İonic", "UNICODE_CI", "İonic"); - assertInitCap("i\u0307onic", "UTF8_BINARY","I\u0307onic"); - assertInitCap("i\u0307onic", "UTF8_LCASE","I\u0307onic"); - assertInitCap("i\u0307onic", "UNICODE","I\u0307onic"); - assertInitCap("i\u0307onic", "UNICODE_CI","I\u0307onic"); - assertInitCap("FIDELİO", "UTF8_BINARY", "Fideli\u0307o"); - assertInitCap("FIDELİO", "UTF8_LCASE", "Fideli\u0307o"); - assertInitCap("FIDELİO", "UNICODE", "Fideli\u0307o"); - assertInitCap("FIDELİO", "UNICODE_CI", "Fideli\u0307o"); + assertInitCap("İ", UTF8_BINARY, "İ", "I\u0307"); + assertInitCap("İ", UTF8_LCASE, "İ"); + assertInitCap("İ", UNICODE, "İ"); + assertInitCap("İ", UNICODE_CI, "İ"); + assertInitCap("I\u0307", UTF8_BINARY,"I\u0307"); + assertInitCap("I\u0307", UTF8_LCASE,"I\u0307"); + assertInitCap("I\u0307", UNICODE,"I\u0307"); + assertInitCap("I\u0307", UNICODE_CI,"I\u0307"); + assertInitCap("İonic", UTF8_BINARY, "İonic", "I\u0307onic"); + assertInitCap("İonic", UTF8_LCASE, "İonic"); + assertInitCap("İonic", UNICODE, "İonic"); + assertInitCap("İonic", UNICODE_CI, "İonic"); + assertInitCap("i\u0307onic", UTF8_BINARY,"I\u0307onic"); + assertInitCap("i\u0307onic", UTF8_LCASE,"I\u0307onic"); + assertInitCap("i\u0307onic", UNICODE,"I\u0307onic"); + assertInitCap("i\u0307onic", UNICODE_CI,"I\u0307onic"); + assertInitCap("FIDELİO", UTF8_BINARY, "Fideli\u0307o"); + assertInitCap("FIDELİO", UTF8_LCASE, "Fideli\u0307o"); + assertInitCap("FIDELİO", UNICODE, "Fideli\u0307o"); + assertInitCap("FIDELİO", UNICODE_CI, "Fideli\u0307o"); // Surrogate pairs. - assertInitCap("a🙃B🙃c", "UTF8_BINARY", "A🙃b🙃c"); - assertInitCap("a🙃B🙃c", "UTF8_LCASE", "A🙃B🙃C"); - assertInitCap("a🙃B🙃c", "UNICODE", "A🙃B🙃C"); - assertInitCap("a🙃B🙃c", "UNICODE_CI", "A🙃B🙃C"); - assertInitCap("😄 😆", "UTF8_BINARY", "😄 😆"); - assertInitCap("😄 😆", "UTF8_LCASE", "😄 😆"); - assertInitCap("😄 😆", "UNICODE", "😄 😆"); - assertInitCap("😄 😆", "UNICODE_CI", "😄 😆"); - assertInitCap("😀😆😃😄", "UTF8_BINARY", "😀😆😃😄"); - assertInitCap("😀😆😃😄", "UTF8_LCASE", "😀😆😃😄"); - assertInitCap("😀😆😃😄", "UNICODE", "😀😆😃😄"); - assertInitCap("😀😆😃😄", "UNICODE_CI", "😀😆😃😄"); - assertInitCap("𝔸", "UTF8_BINARY", "𝔸"); - assertInitCap("𝔸", "UTF8_LCASE", "𝔸"); - assertInitCap("𝔸", "UNICODE", "𝔸"); - assertInitCap("𝔸", "UNICODE_CI", "𝔸"); - assertInitCap("𐐅", "UTF8_BINARY", "\uD801\uDC05", "𐐭"); - assertInitCap("𐐅", "UTF8_LCASE", "𐐅"); - assertInitCap("𐐅", "UNICODE", "𐐅"); - assertInitCap("𐐅", "UNICODE_CI", "𐐅"); - assertInitCap("𐐭", "UTF8_BINARY", "\uD801\uDC05", "𐐭"); - assertInitCap("𐐭", "UTF8_LCASE", "𐐅"); - assertInitCap("𐐭", "UNICODE", "𐐅"); - assertInitCap("𐐭", "UNICODE_CI", "𐐅"); - assertInitCap("𐐭𝔸", "UTF8_BINARY", "\uD801\uDC05\uD835\uDD38", "𐐭𝔸"); - assertInitCap("𐐭𝔸", "UTF8_LCASE", "𐐅𝔸"); - assertInitCap("𐐭𝔸", "UNICODE", "𐐅𝔸"); - assertInitCap("𐐭𝔸", "UNICODE_CI", "𐐅𝔸"); + assertInitCap("a🙃B🙃c", UTF8_BINARY, "A🙃b🙃c"); + assertInitCap("a🙃B🙃c", UTF8_LCASE, "A🙃B🙃C"); + assertInitCap("a🙃B🙃c", UNICODE, "A🙃B🙃C"); + assertInitCap("a🙃B🙃c", UNICODE_CI, "A🙃B🙃C"); + assertInitCap("😄 😆", UTF8_BINARY, "😄 😆"); + assertInitCap("😄 😆", UTF8_LCASE, "😄 😆"); + assertInitCap("😄 😆", UNICODE, "😄 😆"); + assertInitCap("😄 😆", UNICODE_CI, "😄 😆"); + assertInitCap("😀😆😃😄", UTF8_BINARY, "😀😆😃😄"); + assertInitCap("😀😆😃😄", UTF8_LCASE, "😀😆😃😄"); + assertInitCap("😀😆😃😄", UNICODE, "😀😆😃😄"); + assertInitCap("😀😆😃😄", UNICODE_CI, "😀😆😃😄"); + assertInitCap("𝔸", UTF8_BINARY, "𝔸"); + assertInitCap("𝔸", UTF8_LCASE, "𝔸"); + assertInitCap("𝔸", UNICODE, "𝔸"); + assertInitCap("𝔸", UNICODE_CI, "𝔸"); + assertInitCap("𐐅", UTF8_BINARY, "\uD801\uDC05", "𐐭"); + assertInitCap("𐐅", UTF8_LCASE, "𐐅"); + assertInitCap("𐐅", UNICODE, "𐐅"); + assertInitCap("𐐅", UNICODE_CI, "𐐅"); + assertInitCap("𐐭", UTF8_BINARY, "\uD801\uDC05", "𐐭"); + assertInitCap("𐐭", UTF8_LCASE, "𐐅"); + assertInitCap("𐐭", UNICODE, "𐐅"); + assertInitCap("𐐭", UNICODE_CI, "𐐅"); + assertInitCap("𐐭𝔸", UTF8_BINARY, "\uD801\uDC05\uD835\uDD38", "𐐭𝔸"); + assertInitCap("𐐭𝔸", UTF8_LCASE, "𐐅𝔸"); + assertInitCap("𐐭𝔸", UNICODE, "𐐅𝔸"); + assertInitCap("𐐭𝔸", UNICODE_CI, "𐐅𝔸"); // Ligatures. - assertInitCap("ß fi ffi ff st ῗ", "UTF8_BINARY", "Ss Fi Ffi Ff St Ϊ͂", "ß fi ffi ff st ῗ"); - assertInitCap("ß fi ffi ff st ῗ", "UTF8_LCASE", "Ss Fi Ffi Ff St \u0399\u0308\u0342"); - assertInitCap("ß fi ffi ff st ῗ", "UNICODE", "Ss Fi Ffi Ff St \u0399\u0308\u0342"); - assertInitCap("ß fi ffi ff st ῗ", "UNICODE", "Ss Fi Ffi Ff St \u0399\u0308\u0342"); - assertInitCap("œ ǽ", "UTF8_BINARY", "Œ Ǽ", "Œ Ǽ"); + assertInitCap("ß fi ffi ff st ῗ", UTF8_BINARY, "Ss Fi Ffi Ff St Ϊ͂", "ß fi ffi ff st ῗ"); + assertInitCap("ß fi ffi ff st ῗ", UTF8_LCASE, "Ss Fi Ffi Ff St \u0399\u0308\u0342"); + assertInitCap("ß fi ffi ff st ῗ", UNICODE, "Ss Fi Ffi Ff St \u0399\u0308\u0342"); + assertInitCap("ß fi ffi ff st ῗ", UNICODE, "Ss Fi Ffi Ff St \u0399\u0308\u0342"); + assertInitCap("œ ǽ", UTF8_BINARY, "Œ Ǽ", "Œ Ǽ"); // Different possible word boundaries. - assertInitCap("a b c", "UTF8_BINARY", "A B C"); - assertInitCap("a b c", "UNICODE", "A B C"); - assertInitCap("a b c", "UTF8_LCASE", "A B C"); - assertInitCap("a b c", "UNICODE_CI", "A B C"); - assertInitCap("a.b,c", "UTF8_BINARY", "A.b,c"); - assertInitCap("a.b,c", "UNICODE", "A.b,C"); - assertInitCap("a.b,c", "UTF8_LCASE", "A.b,C"); - assertInitCap("a.b,c", "UNICODE_CI", "A.b,C"); - assertInitCap("a. b-c", "UTF8_BINARY", "A. B-c"); - assertInitCap("a. b-c", "UNICODE", "A. B-C"); - assertInitCap("a. b-c", "UTF8_LCASE", "A. B-C"); - assertInitCap("a. b-c", "UNICODE_CI", "A. B-C"); - assertInitCap("a?b世c", "UTF8_BINARY", "A?b世c"); - assertInitCap("a?b世c", "UNICODE", "A?B世C"); - assertInitCap("a?b世c", "UTF8_LCASE", "A?B世C"); - assertInitCap("a?b世c", "UNICODE_CI", "A?B世C"); + assertInitCap("a b c", UTF8_BINARY, "A B C"); + assertInitCap("a b c", UNICODE, "A B C"); + assertInitCap("a b c", UTF8_LCASE, "A B C"); + assertInitCap("a b c", UNICODE_CI, "A B C"); + assertInitCap("a.b,c", UTF8_BINARY, "A.b,c"); + assertInitCap("a.b,c", UNICODE, "A.b,C"); + assertInitCap("a.b,c", UTF8_LCASE, "A.b,C"); + assertInitCap("a.b,c", UNICODE_CI, "A.b,C"); + assertInitCap("a. b-c", UTF8_BINARY, "A. B-c"); + assertInitCap("a. b-c", UNICODE, "A. B-C"); + assertInitCap("a. b-c", UTF8_LCASE, "A. B-C"); + assertInitCap("a. b-c", UNICODE_CI, "A. B-C"); + assertInitCap("a?b世c", UTF8_BINARY, "A?b世c"); + assertInitCap("a?b世c", UNICODE, "A?B世C"); + assertInitCap("a?b世c", UTF8_LCASE, "A?B世C"); + assertInitCap("a?b世c", UNICODE_CI, "A?B世C"); // Titlecase characters that are different from uppercase characters. - assertInitCap("dzDZDz", "UTF8_BINARY", "Dzdzdz"); - assertInitCap("dzDZDz", "UNICODE", "Dzdzdz"); - assertInitCap("dzDZDz", "UTF8_LCASE", "Dzdzdz"); - assertInitCap("dzDZDz", "UNICODE_CI", "Dzdzdz"); - assertInitCap("džaba Ljubav NJegova", "UTF8_BINARY", "Džaba Ljubav Njegova"); - assertInitCap("džaba Ljubav NJegova", "UNICODE", "Džaba Ljubav Njegova"); - assertInitCap("džaba Ljubav NJegova", "UTF8_LCASE", "Džaba Ljubav Njegova"); - assertInitCap("džaba Ljubav NJegova", "UNICODE_CI", "Džaba Ljubav Njegova"); - assertInitCap("ß fi ffi ff st ΣΗΜΕΡΙΝΟΣ ΑΣΗΜΕΝΙΟΣ İOTA", "UTF8_BINARY", + assertInitCap("dzDZDz", UTF8_BINARY, "Dzdzdz"); + assertInitCap("dzDZDz", UNICODE, "Dzdzdz"); + assertInitCap("dzDZDz", UTF8_LCASE, "Dzdzdz"); + assertInitCap("dzDZDz", UNICODE_CI, "Dzdzdz"); + assertInitCap("džaba Ljubav NJegova", UTF8_BINARY, "Džaba Ljubav Njegova"); + assertInitCap("džaba Ljubav NJegova", UNICODE, "Džaba Ljubav Njegova"); + assertInitCap("džaba Ljubav NJegova", UTF8_LCASE, "Džaba Ljubav Njegova"); + assertInitCap("džaba Ljubav NJegova", UNICODE_CI, "Džaba Ljubav Njegova"); + assertInitCap("ß fi ffi ff st ΣΗΜΕΡΙΝΟΣ ΑΣΗΜΕΝΙΟΣ İOTA", UTF8_BINARY, "Ss Fi Ffi Ff St Σημερινος Ασημενιος İota","ß fi ffi ff st Σημερινος Ασημενιος I\u0307ota"); - assertInitCap("ß fi ffi ff st ΣΗΜΕΡΙΝΟΣ ΑΣΗΜΕΝΙΟΣ İOTA", "UTF8_LCASE", + assertInitCap("ß fi ffi ff st ΣΗΜΕΡΙΝΟΣ ΑΣΗΜΕΝΙΟΣ İOTA", UTF8_LCASE, "Ss Fi Ffi Ff St Σημερινος Ασημενιος İota"); - assertInitCap("ß fi ffi ff st ΣΗΜΕΡΙΝΟΣ ΑΣΗΜΕΝΙΟΣ İOTA", "UNICODE", + assertInitCap("ß fi ffi ff st ΣΗΜΕΡΙΝΟΣ ΑΣΗΜΕΝΙΟΣ İOTA", UNICODE, "Ss Fi Ffi Ff St Σημερινος Ασημενιος İota"); - assertInitCap("ß fi ffi ff st ΣΗΜΕΡςΙΝΟΣ ΑΣΗΜΕΝΙΟΣ İOTA", "UNICODE_CI", + assertInitCap("ß fi ffi ff st ΣΗΜΕΡςΙΝΟΣ ΑΣΗΜΕΝΙΟΣ İOTA", UNICODE_CI, "Ss Fi Ffi Ff St Σημερςινος Ασημενιος İota"); // Characters that map to multiple characters when titlecased and lowercased. - assertInitCap("ß fi ffi ff st İOTA", "UTF8_BINARY", "Ss Fi Ffi Ff St İota", "ß fi ffi ff st İota"); - assertInitCap("ß fi ffi ff st OİOTA", "UTF8_BINARY", + assertInitCap("ß fi ffi ff st İOTA", UTF8_BINARY, "Ss Fi Ffi Ff St İota", "ß fi ffi ff st İota"); + assertInitCap("ß fi ffi ff st OİOTA", UTF8_BINARY, "Ss Fi Ffi Ff St Oi\u0307ota", "ß fi ffi ff st Oi̇ota"); // Lowercasing Greek letter sigma ('Σ') when case-ignorable character present. - assertInitCap("`Σ", "UTF8_BINARY", "`σ", "`σ"); - assertInitCap("1`Σ`` AΣ", "UTF8_BINARY", "1`σ`` Aς", "1`σ`` Aς"); - assertInitCap("a1`Σ``", "UTF8_BINARY", "A1`σ``", "A1`σ``"); - assertInitCap("a`Σ``", "UTF8_BINARY", "A`ς``", "A`σ``"); - assertInitCap("a`Σ``1", "UTF8_BINARY", "A`ς``1", "A`σ``1"); - assertInitCap("a`Σ``A", "UTF8_BINARY", "A`σ``a", "A`σ``a"); - assertInitCap("ΘΑ�Σ�ΟΣ�", "UTF8_BINARY", "Θα�σ�ος�", "Θα�σ�ος�"); - assertInitCap("ΘΑᵩΣ�ΟᵩΣᵩ�", "UTF8_BINARY", "Θαᵩς�οᵩςᵩ�", "Θαᵩς�οᵩςᵩ�"); - assertInitCap("ΘΑ�ᵩΣ�ΟᵩΣᵩ�", "UTF8_BINARY", "Θα�ᵩσ�οᵩςᵩ�", "Θα�ᵩσ�οᵩςᵩ�"); - assertInitCap("ΘΑ�ᵩΣᵩ�ΟᵩΣᵩ�", "UTF8_BINARY", "Θα�ᵩσᵩ�οᵩςᵩ�", "Θα�ᵩσᵩ�οᵩςᵩ�"); - assertInitCap("ΘΑ�Σ�Ο�Σ�", "UTF8_BINARY", "Θα�σ�ο�σ�", "Θα�σ�ο�σ�"); + assertInitCap("`Σ", UTF8_BINARY, "`σ", "`σ"); + assertInitCap("1`Σ`` AΣ", UTF8_BINARY, "1`σ`` Aς", "1`σ`` Aς"); + assertInitCap("a1`Σ``", UTF8_BINARY, "A1`σ``", "A1`σ``"); + assertInitCap("a`Σ``", UTF8_BINARY, "A`ς``", "A`σ``"); + assertInitCap("a`Σ``1", UTF8_BINARY, "A`ς``1", "A`σ``1"); + assertInitCap("a`Σ``A", UTF8_BINARY, "A`σ``a", "A`σ``a"); + assertInitCap("ΘΑ�Σ�ΟΣ�", UTF8_BINARY, "Θα�σ�ος�", "Θα�σ�ος�"); + assertInitCap("ΘΑᵩΣ�ΟᵩΣᵩ�", UTF8_BINARY, "Θαᵩς�οᵩςᵩ�", "Θαᵩς�οᵩςᵩ�"); + assertInitCap("ΘΑ�ᵩΣ�ΟᵩΣᵩ�", UTF8_BINARY, "Θα�ᵩσ�οᵩςᵩ�", "Θα�ᵩσ�οᵩςᵩ�"); + assertInitCap("ΘΑ�ᵩΣᵩ�ΟᵩΣᵩ�", UTF8_BINARY, "Θα�ᵩσᵩ�οᵩςᵩ�", "Θα�ᵩσᵩ�οᵩςᵩ�"); + assertInitCap("ΘΑ�Σ�Ο�Σ�", UTF8_BINARY, "Θα�σ�ο�σ�", "Θα�σ�ο�σ�"); // Disallowed bytes and invalid sequences. assertInitCap(UTF8String.fromBytes(new byte[] { (byte)0xC0, (byte)0xC1, (byte)0xF5}).toString(), - "UTF8_BINARY", "���", "���"); + UTF8_BINARY, "���", "���"); assertInitCap(UTF8String.fromBytes( new byte[]{(byte)0xC0, (byte)0xC1, (byte)0xF5, 0x20, 0x61, 0x41, (byte)0xC0}).toString(), - "UTF8_BINARY", + UTF8_BINARY, "��� Aa�", "��� Aa�"); assertInitCap(UTF8String.fromBytes(new byte[]{(byte)0xC2,(byte)0xC2}).toString(), - "UTF8_BINARY", "��", "��"); + UTF8_BINARY, "��", "��"); assertInitCap(UTF8String.fromBytes( new byte[]{0x61, 0x41, (byte)0xC2, (byte)0xC2, 0x41}).toString(), - "UTF8_BINARY", + UTF8_BINARY, "Aa��a", "Aa��a"); } @@ -1559,147 +1560,147 @@ private void assertStringInstr(String string, String substring, @Test public void testStringInstr() throws SparkException { // Empty strings. - assertStringInstr("", "", "UTF8_BINARY", 1); - assertStringInstr("", "", "UTF8_LCASE", 1); - assertStringInstr("", "", "UNICODE_CI", 1); - assertStringInstr("", "", "UNICODE", 1); - assertStringInstr("a", "", "UTF8_BINARY", 1); - assertStringInstr("a", "", "UTF8_LCASE", 1); - assertStringInstr("a", "", "UNICODE", 1); - assertStringInstr("a", "", "UNICODE_CI", 1); - assertStringInstr("", "x", "UTF8_BINARY", 0); - assertStringInstr("", "x", "UTF8_LCASE", 0); - assertStringInstr("", "x", "UNICODE", 0); - assertStringInstr("", "x", "UNICODE_CI", 0); + assertStringInstr("", "", UTF8_BINARY, 1); + assertStringInstr("", "", UTF8_LCASE, 1); + assertStringInstr("", "", UNICODE_CI, 1); + assertStringInstr("", "", UNICODE, 1); + assertStringInstr("a", "", UTF8_BINARY, 1); + assertStringInstr("a", "", UTF8_LCASE, 1); + assertStringInstr("a", "", UNICODE, 1); + assertStringInstr("a", "", UNICODE_CI, 1); + assertStringInstr("", "x", UTF8_BINARY, 0); + assertStringInstr("", "x", UTF8_LCASE, 0); + assertStringInstr("", "x", UNICODE, 0); + assertStringInstr("", "x", UNICODE_CI, 0); // Basic tests. - assertStringInstr("aaads", "aa", "UTF8_BINARY", 1); - assertStringInstr("aaads", "aa", "UTF8_LCASE", 1); - assertStringInstr("aaads", "aa", "UNICODE", 1); - assertStringInstr("aaads", "aa", "UNICODE_CI", 1); - assertStringInstr("aaads", "ds", "UTF8_BINARY", 4); - assertStringInstr("aaads", "ds", "UTF8_LCASE", 4); - assertStringInstr("aaads", "ds", "UNICODE", 4); - assertStringInstr("aaads", "ds", "UNICODE_CI", 4); - assertStringInstr("aaads", "Aa", "UTF8_BINARY", 0); - assertStringInstr("aaads", "Aa", "UTF8_LCASE", 1); - assertStringInstr("aaads", "Aa", "UNICODE", 0); - assertStringInstr("aaads", "Aa", "UNICODE_CI", 1); - assertStringInstr("aaaDs", "de", "UTF8_BINARY", 0); - assertStringInstr("aaaDs", "de", "UTF8_LCASE", 0); - assertStringInstr("aaaDs", "de", "UNICODE", 0); - assertStringInstr("aaaDs", "de", "UNICODE_CI", 0); - assertStringInstr("aaaDs", "ds", "UTF8_BINARY", 0); - assertStringInstr("aaaDs", "ds", "UTF8_LCASE", 4); - assertStringInstr("aaaDs", "ds", "UNICODE", 0); - assertStringInstr("aaaDs", "ds", "UNICODE_CI", 4); - assertStringInstr("aaadS", "Ds", "UTF8_BINARY", 0); - assertStringInstr("aaadS", "Ds", "UTF8_LCASE", 4); - assertStringInstr("aaadS", "Ds", "UNICODE", 0); - assertStringInstr("aaadS", "Ds", "UNICODE_CI", 4); + assertStringInstr("aaads", "aa", UTF8_BINARY, 1); + assertStringInstr("aaads", "aa", UTF8_LCASE, 1); + assertStringInstr("aaads", "aa", UNICODE, 1); + assertStringInstr("aaads", "aa", UNICODE_CI, 1); + assertStringInstr("aaads", "ds", UTF8_BINARY, 4); + assertStringInstr("aaads", "ds", UTF8_LCASE, 4); + assertStringInstr("aaads", "ds", UNICODE, 4); + assertStringInstr("aaads", "ds", UNICODE_CI, 4); + assertStringInstr("aaads", "Aa", UTF8_BINARY, 0); + assertStringInstr("aaads", "Aa", UTF8_LCASE, 1); + assertStringInstr("aaads", "Aa", UNICODE, 0); + assertStringInstr("aaads", "Aa", UNICODE_CI, 1); + assertStringInstr("aaaDs", "de", UTF8_BINARY, 0); + assertStringInstr("aaaDs", "de", UTF8_LCASE, 0); + assertStringInstr("aaaDs", "de", UNICODE, 0); + assertStringInstr("aaaDs", "de", UNICODE_CI, 0); + assertStringInstr("aaaDs", "ds", UTF8_BINARY, 0); + assertStringInstr("aaaDs", "ds", UTF8_LCASE, 4); + assertStringInstr("aaaDs", "ds", UNICODE, 0); + assertStringInstr("aaaDs", "ds", UNICODE_CI, 4); + assertStringInstr("aaadS", "Ds", UTF8_BINARY, 0); + assertStringInstr("aaadS", "Ds", UTF8_LCASE, 4); + assertStringInstr("aaadS", "Ds", UNICODE, 0); + assertStringInstr("aaadS", "Ds", UNICODE_CI, 4); assertStringInstr("aaaČŠčšcs", "cs", "SR", 8); assertStringInstr("aaaČŠčšcs", "cs", "SR_CI_AI", 4); // Advanced tests. - assertStringInstr("test大千世界X大千世界", "大千", "UTF8_BINARY", 5); - assertStringInstr("test大千世界X大千世界", "大千", "UTF8_LCASE", 5); - assertStringInstr("test大千世界X大千世界", "大千", "UNICODE", 5); - assertStringInstr("test大千世界X大千世界", "大千", "UNICODE_CI", 5); - assertStringInstr("test大千世界X大千世界", "界X", "UTF8_BINARY", 8); - assertStringInstr("test大千世界X大千世界", "界X", "UTF8_LCASE", 8); - assertStringInstr("test大千世界X大千世界", "界X", "UNICODE", 8); - assertStringInstr("test大千世界X大千世界", "界X", "UNICODE_CI", 8); - assertStringInstr("test大千世界X大千世界", "界x", "UTF8_BINARY", 0); - assertStringInstr("test大千世界X大千世界", "界x", "UTF8_LCASE", 8); - assertStringInstr("test大千世界X大千世界", "界x", "UNICODE", 0); - assertStringInstr("test大千世界X大千世界", "界x", "UNICODE_CI", 8); - assertStringInstr("test大千世界X大千世界", "界y", "UTF8_BINARY", 0); - assertStringInstr("test大千世界X大千世界", "界y", "UTF8_LCASE", 0); - assertStringInstr("test大千世界X大千世界", "界y", "UNICODE", 0); - assertStringInstr("test大千世界X大千世界", "界y", "UNICODE_CI", 0); + assertStringInstr("test大千世界X大千世界", "大千", UTF8_BINARY, 5); + assertStringInstr("test大千世界X大千世界", "大千", UTF8_LCASE, 5); + assertStringInstr("test大千世界X大千世界", "大千", UNICODE, 5); + assertStringInstr("test大千世界X大千世界", "大千", UNICODE_CI, 5); + assertStringInstr("test大千世界X大千世界", "界X", UTF8_BINARY, 8); + assertStringInstr("test大千世界X大千世界", "界X", UTF8_LCASE, 8); + assertStringInstr("test大千世界X大千世界", "界X", UNICODE, 8); + assertStringInstr("test大千世界X大千世界", "界X", UNICODE_CI, 8); + assertStringInstr("test大千世界X大千世界", "界x", UTF8_BINARY, 0); + assertStringInstr("test大千世界X大千世界", "界x", UTF8_LCASE, 8); + assertStringInstr("test大千世界X大千世界", "界x", UNICODE, 0); + assertStringInstr("test大千世界X大千世界", "界x", UNICODE_CI, 8); + assertStringInstr("test大千世界X大千世界", "界y", UTF8_BINARY, 0); + assertStringInstr("test大千世界X大千世界", "界y", UTF8_LCASE, 0); + assertStringInstr("test大千世界X大千世界", "界y", UNICODE, 0); + assertStringInstr("test大千世界X大千世界", "界y", UNICODE_CI, 0); // One-to-many case mapping (e.g. Turkish dotted I). - assertStringInstr("i\u0307", "i", "UNICODE_CI", 0); - assertStringInstr("i\u0307", "\u0307", "UNICODE_CI", 0); - assertStringInstr("i\u0307", "İ", "UNICODE_CI", 1); - assertStringInstr("İ", "i", "UNICODE_CI", 0); - assertStringInstr("İoi̇o12", "i\u0307o", "UNICODE_CI", 1); - assertStringInstr("i̇oİo12", "İo", "UNICODE_CI", 1); - assertStringInstr("abİoi̇o", "i\u0307o", "UNICODE_CI", 3); - assertStringInstr("abi̇oİo", "İo", "UNICODE_CI", 3); - assertStringInstr("ai̇oxXİo", "Xx", "UNICODE_CI", 5); - assertStringInstr("aİoi̇oxx", "XX", "UNICODE_CI", 7); - assertStringInstr("i\u0307", "i", "UTF8_LCASE", 1); // != UNICODE_CI - assertStringInstr("i\u0307", "\u0307", "UTF8_LCASE", 2); // != UNICODE_CI - assertStringInstr("i\u0307", "İ", "UTF8_LCASE", 1); - assertStringInstr("İ", "i", "UTF8_LCASE", 0); - assertStringInstr("İoi̇o12", "i\u0307o", "UTF8_LCASE", 1); - assertStringInstr("i̇oİo12", "İo", "UTF8_LCASE", 1); - assertStringInstr("abİoi̇o", "i\u0307o", "UTF8_LCASE", 3); - assertStringInstr("abi̇oİo", "İo", "UTF8_LCASE", 3); - assertStringInstr("abI\u0307oi̇o", "İo", "UTF8_LCASE", 3); - assertStringInstr("ai̇oxXİo", "Xx", "UTF8_LCASE", 5); - assertStringInstr("abİoi̇o", "\u0307o", "UTF8_LCASE", 6); - assertStringInstr("aİoi̇oxx", "XX", "UTF8_LCASE", 7); + assertStringInstr("i\u0307", "i", UNICODE_CI, 0); + assertStringInstr("i\u0307", "\u0307", UNICODE_CI, 0); + assertStringInstr("i\u0307", "İ", UNICODE_CI, 1); + assertStringInstr("İ", "i", UNICODE_CI, 0); + assertStringInstr("İoi̇o12", "i\u0307o", UNICODE_CI, 1); + assertStringInstr("i̇oİo12", "İo", UNICODE_CI, 1); + assertStringInstr("abİoi̇o", "i\u0307o", UNICODE_CI, 3); + assertStringInstr("abi̇oİo", "İo", UNICODE_CI, 3); + assertStringInstr("ai̇oxXİo", "Xx", UNICODE_CI, 5); + assertStringInstr("aİoi̇oxx", "XX", UNICODE_CI, 7); + assertStringInstr("i\u0307", "i", UTF8_LCASE, 1); // != UNICODE_CI + assertStringInstr("i\u0307", "\u0307", UTF8_LCASE, 2); // != UNICODE_CI + assertStringInstr("i\u0307", "İ", UTF8_LCASE, 1); + assertStringInstr("İ", "i", UTF8_LCASE, 0); + assertStringInstr("İoi̇o12", "i\u0307o", UTF8_LCASE, 1); + assertStringInstr("i̇oİo12", "İo", UTF8_LCASE, 1); + assertStringInstr("abİoi̇o", "i\u0307o", UTF8_LCASE, 3); + assertStringInstr("abi̇oİo", "İo", UTF8_LCASE, 3); + assertStringInstr("abI\u0307oi̇o", "İo", UTF8_LCASE, 3); + assertStringInstr("ai̇oxXİo", "Xx", UTF8_LCASE, 5); + assertStringInstr("abİoi̇o", "\u0307o", UTF8_LCASE, 6); + assertStringInstr("aİoi̇oxx", "XX", UTF8_LCASE, 7); // Conditional case mapping (e.g. Greek sigmas). - assertStringInstr("σ", "σ", "UTF8_BINARY", 1); - assertStringInstr("σ", "ς", "UTF8_BINARY", 0); - assertStringInstr("σ", "Σ", "UTF8_BINARY", 0); - assertStringInstr("ς", "σ", "UTF8_BINARY", 0); - assertStringInstr("ς", "ς", "UTF8_BINARY", 1); - assertStringInstr("ς", "Σ", "UTF8_BINARY", 0); - assertStringInstr("Σ", "σ", "UTF8_BINARY", 0); - assertStringInstr("Σ", "ς", "UTF8_BINARY", 0); - assertStringInstr("Σ", "Σ", "UTF8_BINARY", 1); - assertStringInstr("σ", "σ", "UTF8_LCASE", 1); - assertStringInstr("σ", "ς", "UTF8_LCASE", 1); - assertStringInstr("σ", "Σ", "UTF8_LCASE", 1); - assertStringInstr("ς", "σ", "UTF8_LCASE", 1); - assertStringInstr("ς", "ς", "UTF8_LCASE", 1); - assertStringInstr("ς", "Σ", "UTF8_LCASE", 1); - assertStringInstr("Σ", "σ", "UTF8_LCASE", 1); - assertStringInstr("Σ", "ς", "UTF8_LCASE", 1); - assertStringInstr("Σ", "Σ", "UTF8_LCASE", 1); - assertStringInstr("σ", "σ", "UNICODE", 1); - assertStringInstr("σ", "ς", "UNICODE", 0); - assertStringInstr("σ", "Σ", "UNICODE", 0); - assertStringInstr("ς", "σ", "UNICODE", 0); - assertStringInstr("ς", "ς", "UNICODE", 1); - assertStringInstr("ς", "Σ", "UNICODE", 0); - assertStringInstr("Σ", "σ", "UNICODE", 0); - assertStringInstr("Σ", "ς", "UNICODE", 0); - assertStringInstr("Σ", "Σ", "UNICODE", 1); - assertStringInstr("σ", "σ", "UNICODE_CI", 1); - assertStringInstr("σ", "ς", "UNICODE_CI", 1); - assertStringInstr("σ", "Σ", "UNICODE_CI", 1); - assertStringInstr("ς", "σ", "UNICODE_CI", 1); - assertStringInstr("ς", "ς", "UNICODE_CI", 1); - assertStringInstr("ς", "Σ", "UNICODE_CI", 1); - assertStringInstr("Σ", "σ", "UNICODE_CI", 1); - assertStringInstr("Σ", "ς", "UNICODE_CI", 1); - assertStringInstr("Σ", "Σ", "UNICODE_CI", 1); + assertStringInstr("σ", "σ", UTF8_BINARY, 1); + assertStringInstr("σ", "ς", UTF8_BINARY, 0); + assertStringInstr("σ", "Σ", UTF8_BINARY, 0); + assertStringInstr("ς", "σ", UTF8_BINARY, 0); + assertStringInstr("ς", "ς", UTF8_BINARY, 1); + assertStringInstr("ς", "Σ", UTF8_BINARY, 0); + assertStringInstr("Σ", "σ", UTF8_BINARY, 0); + assertStringInstr("Σ", "ς", UTF8_BINARY, 0); + assertStringInstr("Σ", "Σ", UTF8_BINARY, 1); + assertStringInstr("σ", "σ", UTF8_LCASE, 1); + assertStringInstr("σ", "ς", UTF8_LCASE, 1); + assertStringInstr("σ", "Σ", UTF8_LCASE, 1); + assertStringInstr("ς", "σ", UTF8_LCASE, 1); + assertStringInstr("ς", "ς", UTF8_LCASE, 1); + assertStringInstr("ς", "Σ", UTF8_LCASE, 1); + assertStringInstr("Σ", "σ", UTF8_LCASE, 1); + assertStringInstr("Σ", "ς", UTF8_LCASE, 1); + assertStringInstr("Σ", "Σ", UTF8_LCASE, 1); + assertStringInstr("σ", "σ", UNICODE, 1); + assertStringInstr("σ", "ς", UNICODE, 0); + assertStringInstr("σ", "Σ", UNICODE, 0); + assertStringInstr("ς", "σ", UNICODE, 0); + assertStringInstr("ς", "ς", UNICODE, 1); + assertStringInstr("ς", "Σ", UNICODE, 0); + assertStringInstr("Σ", "σ", UNICODE, 0); + assertStringInstr("Σ", "ς", UNICODE, 0); + assertStringInstr("Σ", "Σ", UNICODE, 1); + assertStringInstr("σ", "σ", UNICODE_CI, 1); + assertStringInstr("σ", "ς", UNICODE_CI, 1); + assertStringInstr("σ", "Σ", UNICODE_CI, 1); + assertStringInstr("ς", "σ", UNICODE_CI, 1); + assertStringInstr("ς", "ς", UNICODE_CI, 1); + assertStringInstr("ς", "Σ", UNICODE_CI, 1); + assertStringInstr("Σ", "σ", UNICODE_CI, 1); + assertStringInstr("Σ", "ς", UNICODE_CI, 1); + assertStringInstr("Σ", "Σ", UNICODE_CI, 1); // Surrogate pairs. - assertStringInstr("a🙃b", "a", "UTF8_BINARY", 1); - assertStringInstr("a🙃b", "a", "UTF8_LCASE", 1); - assertStringInstr("a🙃b", "a", "UNICODE", 1); - assertStringInstr("a🙃b", "a", "UNICODE_CI", 1); - assertStringInstr("a🙃b", "🙃", "UTF8_BINARY", 2); - assertStringInstr("a🙃b", "🙃", "UTF8_LCASE", 2); - assertStringInstr("a🙃b", "🙃", "UNICODE", 2); - assertStringInstr("a🙃b", "🙃", "UNICODE_CI", 2); - assertStringInstr("a🙃b", "b", "UTF8_BINARY", 3); - assertStringInstr("a🙃b", "b", "UTF8_LCASE", 3); - assertStringInstr("a🙃b", "b", "UNICODE", 3); - assertStringInstr("a🙃b", "b", "UNICODE_CI", 3); - assertStringInstr("a🙃🙃b", "🙃", "UTF8_BINARY", 2); - assertStringInstr("a🙃🙃b", "🙃", "UTF8_LCASE", 2); - assertStringInstr("a🙃🙃b", "🙃", "UNICODE", 2); - assertStringInstr("a🙃🙃b", "🙃", "UNICODE_CI", 2); - assertStringInstr("a🙃🙃b", "b", "UTF8_BINARY", 4); - assertStringInstr("a🙃🙃b", "b", "UTF8_LCASE", 4); - assertStringInstr("a🙃🙃b", "b", "UNICODE", 4); - assertStringInstr("a🙃🙃b", "b", "UNICODE_CI", 4); - assertStringInstr("a🙃x🙃b", "b", "UTF8_BINARY", 5); - assertStringInstr("a🙃x🙃b", "b", "UTF8_LCASE", 5); - assertStringInstr("a🙃x🙃b", "b", "UNICODE", 5); - assertStringInstr("a🙃x🙃b", "b", "UNICODE_CI", 5); + assertStringInstr("a🙃b", "a", UTF8_BINARY, 1); + assertStringInstr("a🙃b", "a", UTF8_LCASE, 1); + assertStringInstr("a🙃b", "a", UNICODE, 1); + assertStringInstr("a🙃b", "a", UNICODE_CI, 1); + assertStringInstr("a🙃b", "🙃", UTF8_BINARY, 2); + assertStringInstr("a🙃b", "🙃", UTF8_LCASE, 2); + assertStringInstr("a🙃b", "🙃", UNICODE, 2); + assertStringInstr("a🙃b", "🙃", UNICODE_CI, 2); + assertStringInstr("a🙃b", "b", UTF8_BINARY, 3); + assertStringInstr("a🙃b", "b", UTF8_LCASE, 3); + assertStringInstr("a🙃b", "b", UNICODE, 3); + assertStringInstr("a🙃b", "b", UNICODE_CI, 3); + assertStringInstr("a🙃🙃b", "🙃", UTF8_BINARY, 2); + assertStringInstr("a🙃🙃b", "🙃", UTF8_LCASE, 2); + assertStringInstr("a🙃🙃b", "🙃", UNICODE, 2); + assertStringInstr("a🙃🙃b", "🙃", UNICODE_CI, 2); + assertStringInstr("a🙃🙃b", "b", UTF8_BINARY, 4); + assertStringInstr("a🙃🙃b", "b", UTF8_LCASE, 4); + assertStringInstr("a🙃🙃b", "b", UNICODE, 4); + assertStringInstr("a🙃🙃b", "b", UNICODE_CI, 4); + assertStringInstr("a🙃x🙃b", "b", UTF8_BINARY, 5); + assertStringInstr("a🙃x🙃b", "b", UTF8_LCASE, 5); + assertStringInstr("a🙃x🙃b", "b", UNICODE, 5); + assertStringInstr("a🙃x🙃b", "b", UNICODE_CI, 5); } /** @@ -1717,256 +1718,256 @@ private void assertFindInSet(String word, UTF8String set, String collationName, @Test public void testFindInSet() throws SparkException { // Empty strings. - assertFindInSet("", UTF8String.fromString(""), "UTF8_BINARY", 1); - assertFindInSet("", UTF8String.fromString(""), "UTF8_LCASE", 1); - assertFindInSet("", UTF8String.fromString(""), "UNICODE", 1); - assertFindInSet("", UTF8String.fromString(""), "UNICODE_CI", 1); - assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 0); - assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 0); - assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 0); - assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 0); - assertFindInSet("", UTF8String.fromString(",abc,b,ab,c,def"), "UTF8_BINARY", 1); - assertFindInSet("", UTF8String.fromString(",abc,b,ab,c,def"), "UTF8_LCASE", 1); - assertFindInSet("", UTF8String.fromString(",abc,b,ab,c,def"), "UNICODE", 1); - assertFindInSet("", UTF8String.fromString(",abc,b,ab,c,def"), "UNICODE_CI", 1); - assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def,"), "UTF8_BINARY", 6); - assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def,"), "UTF8_LCASE", 6); - assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def,"), "UNICODE", 6); - assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def,"), "UNICODE_CI", 6); - assertFindInSet("", UTF8String.fromString("abc"), "UTF8_BINARY", 0); - assertFindInSet("", UTF8String.fromString("abc"), "UTF8_LCASE", 0); - assertFindInSet("", UTF8String.fromString("abc"), "UNICODE", 0); - assertFindInSet("", UTF8String.fromString("abc"), "UNICODE_CI", 0); + assertFindInSet("", UTF8String.fromString(""), UTF8_BINARY, 1); + assertFindInSet("", UTF8String.fromString(""), UTF8_LCASE, 1); + assertFindInSet("", UTF8String.fromString(""), UNICODE, 1); + assertFindInSet("", UTF8String.fromString(""), UNICODE_CI, 1); + assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def"), UTF8_BINARY, 0); + assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def"), UTF8_LCASE, 0); + assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def"), UNICODE, 0); + assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def"), UNICODE_CI, 0); + assertFindInSet("", UTF8String.fromString(",abc,b,ab,c,def"), UTF8_BINARY, 1); + assertFindInSet("", UTF8String.fromString(",abc,b,ab,c,def"), UTF8_LCASE, 1); + assertFindInSet("", UTF8String.fromString(",abc,b,ab,c,def"), UNICODE, 1); + assertFindInSet("", UTF8String.fromString(",abc,b,ab,c,def"), UNICODE_CI, 1); + assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def,"), UTF8_BINARY, 6); + assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def,"), UTF8_LCASE, 6); + assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def,"), UNICODE, 6); + assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def,"), UNICODE_CI, 6); + assertFindInSet("", UTF8String.fromString("abc"), UTF8_BINARY, 0); + assertFindInSet("", UTF8String.fromString("abc"), UTF8_LCASE, 0); + assertFindInSet("", UTF8String.fromString("abc"), UNICODE, 0); + assertFindInSet("", UTF8String.fromString("abc"), UNICODE_CI, 0); // Basic tests. - assertFindInSet("xx", UTF8String.fromString("xx"), "UTF8_BINARY", 1); - assertFindInSet("xx", UTF8String.fromString("xx"), "UTF8_LCASE", 1); - assertFindInSet("xx", UTF8String.fromString("xx"), "UNICODE", 1); - assertFindInSet("xx", UTF8String.fromString("xx"), "UNICODE_CI", 1); - assertFindInSet("a", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 0); - assertFindInSet("a", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 0); - assertFindInSet("a", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 0); - assertFindInSet("a", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 0); - assertFindInSet("abc", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 1); - assertFindInSet("abc", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 1); - assertFindInSet("abc", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 1); - assertFindInSet("abc", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 1); - assertFindInSet("abcd", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 0); - assertFindInSet("abcd", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 0); - assertFindInSet("abcd", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 0); - assertFindInSet("abcd", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 0); - assertFindInSet("def", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 5); - assertFindInSet("def", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 5); - assertFindInSet("def", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 5); - assertFindInSet("def", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 5); - assertFindInSet("xyz", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 0); - assertFindInSet("xyz", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 0); - assertFindInSet("xyz", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 0); - assertFindInSet("xyz", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 0); - assertFindInSet("Ab", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 0); - assertFindInSet("Ab", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 3); - assertFindInSet("Ab", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 0); - assertFindInSet("Ab", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 3); - assertFindInSet("d,ef", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 0); - assertFindInSet("d,ef", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 0); - assertFindInSet("d,ef", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 0); - assertFindInSet("d,ef", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 0); - assertFindInSet("C", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 0); - assertFindInSet("C", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 4); - assertFindInSet("C", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 0); - assertFindInSet("C", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 4); + assertFindInSet("xx", UTF8String.fromString("xx"), UTF8_BINARY, 1); + assertFindInSet("xx", UTF8String.fromString("xx"), UTF8_LCASE, 1); + assertFindInSet("xx", UTF8String.fromString("xx"), UNICODE, 1); + assertFindInSet("xx", UTF8String.fromString("xx"), UNICODE_CI, 1); + assertFindInSet("a", UTF8String.fromString("abc,b,ab,c,def"), UTF8_BINARY, 0); + assertFindInSet("a", UTF8String.fromString("abc,b,ab,c,def"), UTF8_LCASE, 0); + assertFindInSet("a", UTF8String.fromString("abc,b,ab,c,def"), UNICODE, 0); + assertFindInSet("a", UTF8String.fromString("abc,b,ab,c,def"), UNICODE_CI, 0); + assertFindInSet("abc", UTF8String.fromString("abc,b,ab,c,def"), UTF8_BINARY, 1); + assertFindInSet("abc", UTF8String.fromString("abc,b,ab,c,def"), UTF8_LCASE, 1); + assertFindInSet("abc", UTF8String.fromString("abc,b,ab,c,def"), UNICODE, 1); + assertFindInSet("abc", UTF8String.fromString("abc,b,ab,c,def"), UNICODE_CI, 1); + assertFindInSet("abcd", UTF8String.fromString("abc,b,ab,c,def"), UTF8_BINARY, 0); + assertFindInSet("abcd", UTF8String.fromString("abc,b,ab,c,def"), UTF8_LCASE, 0); + assertFindInSet("abcd", UTF8String.fromString("abc,b,ab,c,def"), UNICODE, 0); + assertFindInSet("abcd", UTF8String.fromString("abc,b,ab,c,def"), UNICODE_CI, 0); + assertFindInSet("def", UTF8String.fromString("abc,b,ab,c,def"), UTF8_BINARY, 5); + assertFindInSet("def", UTF8String.fromString("abc,b,ab,c,def"), UTF8_LCASE, 5); + assertFindInSet("def", UTF8String.fromString("abc,b,ab,c,def"), UNICODE, 5); + assertFindInSet("def", UTF8String.fromString("abc,b,ab,c,def"), UNICODE_CI, 5); + assertFindInSet("xyz", UTF8String.fromString("abc,b,ab,c,def"), UTF8_BINARY, 0); + assertFindInSet("xyz", UTF8String.fromString("abc,b,ab,c,def"), UTF8_LCASE, 0); + assertFindInSet("xyz", UTF8String.fromString("abc,b,ab,c,def"), UNICODE, 0); + assertFindInSet("xyz", UTF8String.fromString("abc,b,ab,c,def"), UNICODE_CI, 0); + assertFindInSet("Ab", UTF8String.fromString("abc,b,ab,c,def"), UTF8_BINARY, 0); + assertFindInSet("Ab", UTF8String.fromString("abc,b,ab,c,def"), UTF8_LCASE, 3); + assertFindInSet("Ab", UTF8String.fromString("abc,b,ab,c,def"), UNICODE, 0); + assertFindInSet("Ab", UTF8String.fromString("abc,b,ab,c,def"), UNICODE_CI, 3); + assertFindInSet("d,ef", UTF8String.fromString("abc,b,ab,c,def"), UTF8_BINARY, 0); + assertFindInSet("d,ef", UTF8String.fromString("abc,b,ab,c,def"), UTF8_LCASE, 0); + assertFindInSet("d,ef", UTF8String.fromString("abc,b,ab,c,def"), UNICODE, 0); + assertFindInSet("d,ef", UTF8String.fromString("abc,b,ab,c,def"), UNICODE_CI, 0); + assertFindInSet("C", UTF8String.fromString("abc,b,ab,c,def"), UTF8_BINARY, 0); + assertFindInSet("C", UTF8String.fromString("abc,b,ab,c,def"), UTF8_LCASE, 4); + assertFindInSet("C", UTF8String.fromString("abc,b,ab,c,def"), UNICODE, 0); + assertFindInSet("C", UTF8String.fromString("abc,b,ab,c,def"), UNICODE_CI, 4); // Advanced tests. - assertFindInSet("大", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UTF8_BINARY", 5); - assertFindInSet("大", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UTF8_LCASE", 5); - assertFindInSet("大", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UNICODE", 5); - assertFindInSet("大", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UNICODE_CI", 5); - assertFindInSet("界x", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UTF8_BINARY", 0); - assertFindInSet("界x", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UTF8_LCASE", 4); - assertFindInSet("界x", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UNICODE", 0); - assertFindInSet("界x", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UNICODE_CI", 4); - assertFindInSet("界x", UTF8String.fromString("test,大千,界Xx,世,界X,大,千,世界"), "UTF8_BINARY", 0); - assertFindInSet("界x", UTF8String.fromString("test,大千,界Xx,世,界X,大,千,世界"), "UTF8_LCASE", 5); - assertFindInSet("界x", UTF8String.fromString("test,大千,界Xx,世,界X,大,千,世界"), "UNICODE", 0); - assertFindInSet("界x", UTF8String.fromString("test,大千,界Xx,世,界X,大,千,世界"), "UNICODE_CI", 5); + assertFindInSet("大", UTF8String.fromString("test,大千,世,界X,大,千,世界"), UTF8_BINARY, 5); + assertFindInSet("大", UTF8String.fromString("test,大千,世,界X,大,千,世界"), UTF8_LCASE, 5); + assertFindInSet("大", UTF8String.fromString("test,大千,世,界X,大,千,世界"), UNICODE, 5); + assertFindInSet("大", UTF8String.fromString("test,大千,世,界X,大,千,世界"), UNICODE_CI, 5); + assertFindInSet("界x", UTF8String.fromString("test,大千,世,界X,大,千,世界"), UTF8_BINARY, 0); + assertFindInSet("界x", UTF8String.fromString("test,大千,世,界X,大,千,世界"), UTF8_LCASE, 4); + assertFindInSet("界x", UTF8String.fromString("test,大千,世,界X,大,千,世界"), UNICODE, 0); + assertFindInSet("界x", UTF8String.fromString("test,大千,世,界X,大,千,世界"), UNICODE_CI, 4); + assertFindInSet("界x", UTF8String.fromString("test,大千,界Xx,世,界X,大,千,世界"), UTF8_BINARY, 0); + assertFindInSet("界x", UTF8String.fromString("test,大千,界Xx,世,界X,大,千,世界"), UTF8_LCASE, 5); + assertFindInSet("界x", UTF8String.fromString("test,大千,界Xx,世,界X,大,千,世界"), UNICODE, 0); + assertFindInSet("界x", UTF8String.fromString("test,大千,界Xx,世,界X,大,千,世界"), UNICODE_CI, 5); // One-to-many case mapping (e.g. Turkish dotted I). - assertFindInSet("i\u0307", UTF8String.fromString("İ"), "UTF8_BINARY", 0); - assertFindInSet("i\u0307", UTF8String.fromString("İ"), "UTF8_LCASE", 1); - assertFindInSet("i\u0307", UTF8String.fromString("İ"), "UNICODE", 0); - assertFindInSet("i\u0307", UTF8String.fromString("İ"), "UNICODE_CI", 1); - assertFindInSet("i", UTF8String.fromString("İ"), "UTF8_BINARY", 0); - assertFindInSet("i", UTF8String.fromString("İ"), "UTF8_LCASE", 0); - assertFindInSet("i", UTF8String.fromString("İ"), "UNICODE", 0); - assertFindInSet("i", UTF8String.fromString("İ"), "UNICODE_CI", 0); - assertFindInSet("i\u0307", UTF8String.fromString("i\u0307"), "UTF8_BINARY", 1); - assertFindInSet("i\u0307", UTF8String.fromString("i\u0307"), "UTF8_LCASE", 1); - assertFindInSet("i\u0307", UTF8String.fromString("i\u0307"), "UNICODE", 1); - assertFindInSet("i\u0307", UTF8String.fromString("i\u0307"), "UNICODE_CI", 1); - assertFindInSet("i", UTF8String.fromString("i\u0307"), "UTF8_BINARY", 0); - assertFindInSet("i", UTF8String.fromString("i\u0307"), "UTF8_LCASE", 0); - assertFindInSet("i", UTF8String.fromString("i\u0307"), "UNICODE", 0); - assertFindInSet("i", UTF8String.fromString("i\u0307"), "UNICODE_CI", 0); - assertFindInSet("i\u0307", UTF8String.fromString("İ,"), "UTF8_BINARY", 0); - assertFindInSet("i\u0307", UTF8String.fromString("İ,"), "UTF8_LCASE", 1); - assertFindInSet("i\u0307", UTF8String.fromString("İ,"), "UNICODE", 0); - assertFindInSet("i\u0307", UTF8String.fromString("İ,"), "UNICODE_CI", 1); - assertFindInSet("i", UTF8String.fromString("İ,"), "UTF8_BINARY", 0); - assertFindInSet("i", UTF8String.fromString("İ,"), "UTF8_LCASE", 0); - assertFindInSet("i", UTF8String.fromString("İ,"), "UNICODE", 0); - assertFindInSet("i", UTF8String.fromString("İ,"), "UNICODE_CI", 0); - assertFindInSet("i\u0307", UTF8String.fromString("i\u0307,"), "UTF8_BINARY", 1); - assertFindInSet("i\u0307", UTF8String.fromString("i\u0307,"), "UTF8_LCASE", 1); - assertFindInSet("i\u0307", UTF8String.fromString("i\u0307,"), "UNICODE", 1); - assertFindInSet("i\u0307", UTF8String.fromString("i\u0307,"), "UNICODE_CI", 1); - assertFindInSet("i", UTF8String.fromString("i\u0307,"), "UTF8_BINARY", 0); - assertFindInSet("i", UTF8String.fromString("i\u0307,"), "UTF8_LCASE", 0); - assertFindInSet("i", UTF8String.fromString("i\u0307,"), "UNICODE", 0); - assertFindInSet("i", UTF8String.fromString("i\u0307,"), "UNICODE_CI", 0); - assertFindInSet("i\u0307", UTF8String.fromString("ab,İ"), "UTF8_BINARY", 0); - assertFindInSet("i\u0307", UTF8String.fromString("ab,İ"), "UTF8_LCASE", 2); - assertFindInSet("i\u0307", UTF8String.fromString("ab,İ"), "UNICODE", 0); - assertFindInSet("i\u0307", UTF8String.fromString("ab,İ"), "UNICODE_CI", 2); - assertFindInSet("i", UTF8String.fromString("ab,İ"), "UTF8_BINARY", 0); - assertFindInSet("i", UTF8String.fromString("ab,İ"), "UTF8_LCASE", 0); - assertFindInSet("i", UTF8String.fromString("ab,İ"), "UNICODE", 0); - assertFindInSet("i", UTF8String.fromString("ab,İ"), "UNICODE_CI", 0); - assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307"), "UTF8_BINARY", 2); - assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307"), "UTF8_LCASE", 2); - assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307"), "UNICODE", 2); - assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307"), "UNICODE_CI", 2); - assertFindInSet("i", UTF8String.fromString("ab,i\u0307"), "UTF8_BINARY", 0); - assertFindInSet("i", UTF8String.fromString("ab,i\u0307"), "UTF8_LCASE", 0); - assertFindInSet("i", UTF8String.fromString("ab,i\u0307"), "UNICODE", 0); - assertFindInSet("i", UTF8String.fromString("ab,i\u0307"), "UNICODE_CI", 0); - assertFindInSet("İ", UTF8String.fromString("ab,i\u0307"), "UTF8_BINARY", 0); - assertFindInSet("İ", UTF8String.fromString("ab,i\u0307"), "UTF8_LCASE", 2); - assertFindInSet("İ", UTF8String.fromString("ab,i\u0307"), "UNICODE", 0); - assertFindInSet("İ", UTF8String.fromString("ab,i\u0307"), "UNICODE_CI", 2); - assertFindInSet("i\u0307", UTF8String.fromString("ab,İ,12"), "UTF8_BINARY", 0); - assertFindInSet("i\u0307", UTF8String.fromString("ab,İ,12"), "UTF8_LCASE", 2); - assertFindInSet("i\u0307", UTF8String.fromString("ab,İ,12"), "UNICODE", 0); - assertFindInSet("i\u0307", UTF8String.fromString("ab,İ,12"), "UNICODE_CI", 2); - assertFindInSet("i", UTF8String.fromString("ab,İ,12"), "UTF8_BINARY", 0); - assertFindInSet("i", UTF8String.fromString("ab,İ,12"), "UTF8_LCASE", 0); - assertFindInSet("i", UTF8String.fromString("ab,İ,12"), "UNICODE", 0); - assertFindInSet("i", UTF8String.fromString("ab,İ,12"), "UNICODE_CI", 0); - assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307,12"), "UTF8_BINARY", 2); - assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307,12"), "UTF8_LCASE", 2); - assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307,12"), "UNICODE", 2); - assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307,12"), "UNICODE_CI", 2); - assertFindInSet("i", UTF8String.fromString("ab,i\u0307,12"), "UTF8_BINARY", 0); - assertFindInSet("i", UTF8String.fromString("ab,i\u0307,12"), "UTF8_LCASE", 0); - assertFindInSet("i", UTF8String.fromString("ab,i\u0307,12"), "UNICODE", 0); - assertFindInSet("i", UTF8String.fromString("ab,i\u0307,12"), "UNICODE_CI", 0); - assertFindInSet("i\u0307o", UTF8String.fromString("ab,İo,12"), "UTF8_BINARY", 0); - assertFindInSet("i\u0307o", UTF8String.fromString("ab,İo,12"), "UTF8_LCASE", 2); - assertFindInSet("i\u0307o", UTF8String.fromString("ab,İo,12"), "UNICODE", 0); - assertFindInSet("i\u0307o", UTF8String.fromString("ab,İo,12"), "UNICODE_CI", 2); - assertFindInSet("İo", UTF8String.fromString("ab,i\u0307o,12"), "UTF8_BINARY", 0); - assertFindInSet("İo", UTF8String.fromString("ab,i\u0307o,12"), "UTF8_LCASE", 2); - assertFindInSet("İo", UTF8String.fromString("ab,i\u0307o,12"), "UNICODE", 0); - assertFindInSet("İo", UTF8String.fromString("ab,i\u0307o,12"), "UNICODE_CI", 2); + assertFindInSet("i\u0307", UTF8String.fromString("İ"), UTF8_BINARY, 0); + assertFindInSet("i\u0307", UTF8String.fromString("İ"), UTF8_LCASE, 1); + assertFindInSet("i\u0307", UTF8String.fromString("İ"), UNICODE, 0); + assertFindInSet("i\u0307", UTF8String.fromString("İ"), UNICODE_CI, 1); + assertFindInSet("i", UTF8String.fromString("İ"), UTF8_BINARY, 0); + assertFindInSet("i", UTF8String.fromString("İ"), UTF8_LCASE, 0); + assertFindInSet("i", UTF8String.fromString("İ"), UNICODE, 0); + assertFindInSet("i", UTF8String.fromString("İ"), UNICODE_CI, 0); + assertFindInSet("i\u0307", UTF8String.fromString("i\u0307"), UTF8_BINARY, 1); + assertFindInSet("i\u0307", UTF8String.fromString("i\u0307"), UTF8_LCASE, 1); + assertFindInSet("i\u0307", UTF8String.fromString("i\u0307"), UNICODE, 1); + assertFindInSet("i\u0307", UTF8String.fromString("i\u0307"), UNICODE_CI, 1); + assertFindInSet("i", UTF8String.fromString("i\u0307"), UTF8_BINARY, 0); + assertFindInSet("i", UTF8String.fromString("i\u0307"), UTF8_LCASE, 0); + assertFindInSet("i", UTF8String.fromString("i\u0307"), UNICODE, 0); + assertFindInSet("i", UTF8String.fromString("i\u0307"), UNICODE_CI, 0); + assertFindInSet("i\u0307", UTF8String.fromString("İ,"), UTF8_BINARY, 0); + assertFindInSet("i\u0307", UTF8String.fromString("İ,"), UTF8_LCASE, 1); + assertFindInSet("i\u0307", UTF8String.fromString("İ,"), UNICODE, 0); + assertFindInSet("i\u0307", UTF8String.fromString("İ,"), UNICODE_CI, 1); + assertFindInSet("i", UTF8String.fromString("İ,"), UTF8_BINARY, 0); + assertFindInSet("i", UTF8String.fromString("İ,"), UTF8_LCASE, 0); + assertFindInSet("i", UTF8String.fromString("İ,"), UNICODE, 0); + assertFindInSet("i", UTF8String.fromString("İ,"), UNICODE_CI, 0); + assertFindInSet("i\u0307", UTF8String.fromString("i\u0307,"), UTF8_BINARY, 1); + assertFindInSet("i\u0307", UTF8String.fromString("i\u0307,"), UTF8_LCASE, 1); + assertFindInSet("i\u0307", UTF8String.fromString("i\u0307,"), UNICODE, 1); + assertFindInSet("i\u0307", UTF8String.fromString("i\u0307,"), UNICODE_CI, 1); + assertFindInSet("i", UTF8String.fromString("i\u0307,"), UTF8_BINARY, 0); + assertFindInSet("i", UTF8String.fromString("i\u0307,"), UTF8_LCASE, 0); + assertFindInSet("i", UTF8String.fromString("i\u0307,"), UNICODE, 0); + assertFindInSet("i", UTF8String.fromString("i\u0307,"), UNICODE_CI, 0); + assertFindInSet("i\u0307", UTF8String.fromString("ab,İ"), UTF8_BINARY, 0); + assertFindInSet("i\u0307", UTF8String.fromString("ab,İ"), UTF8_LCASE, 2); + assertFindInSet("i\u0307", UTF8String.fromString("ab,İ"), UNICODE, 0); + assertFindInSet("i\u0307", UTF8String.fromString("ab,İ"), UNICODE_CI, 2); + assertFindInSet("i", UTF8String.fromString("ab,İ"), UTF8_BINARY, 0); + assertFindInSet("i", UTF8String.fromString("ab,İ"), UTF8_LCASE, 0); + assertFindInSet("i", UTF8String.fromString("ab,İ"), UNICODE, 0); + assertFindInSet("i", UTF8String.fromString("ab,İ"), UNICODE_CI, 0); + assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307"), UTF8_BINARY, 2); + assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307"), UTF8_LCASE, 2); + assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307"), UNICODE, 2); + assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307"), UNICODE_CI, 2); + assertFindInSet("i", UTF8String.fromString("ab,i\u0307"), UTF8_BINARY, 0); + assertFindInSet("i", UTF8String.fromString("ab,i\u0307"), UTF8_LCASE, 0); + assertFindInSet("i", UTF8String.fromString("ab,i\u0307"), UNICODE, 0); + assertFindInSet("i", UTF8String.fromString("ab,i\u0307"), UNICODE_CI, 0); + assertFindInSet("İ", UTF8String.fromString("ab,i\u0307"), UTF8_BINARY, 0); + assertFindInSet("İ", UTF8String.fromString("ab,i\u0307"), UTF8_LCASE, 2); + assertFindInSet("İ", UTF8String.fromString("ab,i\u0307"), UNICODE, 0); + assertFindInSet("İ", UTF8String.fromString("ab,i\u0307"), UNICODE_CI, 2); + assertFindInSet("i\u0307", UTF8String.fromString("ab,İ,12"), UTF8_BINARY, 0); + assertFindInSet("i\u0307", UTF8String.fromString("ab,İ,12"), UTF8_LCASE, 2); + assertFindInSet("i\u0307", UTF8String.fromString("ab,İ,12"), UNICODE, 0); + assertFindInSet("i\u0307", UTF8String.fromString("ab,İ,12"), UNICODE_CI, 2); + assertFindInSet("i", UTF8String.fromString("ab,İ,12"), UTF8_BINARY, 0); + assertFindInSet("i", UTF8String.fromString("ab,İ,12"), UTF8_LCASE, 0); + assertFindInSet("i", UTF8String.fromString("ab,İ,12"), UNICODE, 0); + assertFindInSet("i", UTF8String.fromString("ab,İ,12"), UNICODE_CI, 0); + assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307,12"), UTF8_BINARY, 2); + assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307,12"), UTF8_LCASE, 2); + assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307,12"), UNICODE, 2); + assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307,12"), UNICODE_CI, 2); + assertFindInSet("i", UTF8String.fromString("ab,i\u0307,12"), UTF8_BINARY, 0); + assertFindInSet("i", UTF8String.fromString("ab,i\u0307,12"), UTF8_LCASE, 0); + assertFindInSet("i", UTF8String.fromString("ab,i\u0307,12"), UNICODE, 0); + assertFindInSet("i", UTF8String.fromString("ab,i\u0307,12"), UNICODE_CI, 0); + assertFindInSet("i\u0307o", UTF8String.fromString("ab,İo,12"), UTF8_BINARY, 0); + assertFindInSet("i\u0307o", UTF8String.fromString("ab,İo,12"), UTF8_LCASE, 2); + assertFindInSet("i\u0307o", UTF8String.fromString("ab,İo,12"), UNICODE, 0); + assertFindInSet("i\u0307o", UTF8String.fromString("ab,İo,12"), UNICODE_CI, 2); + assertFindInSet("İo", UTF8String.fromString("ab,i\u0307o,12"), UTF8_BINARY, 0); + assertFindInSet("İo", UTF8String.fromString("ab,i\u0307o,12"), UTF8_LCASE, 2); + assertFindInSet("İo", UTF8String.fromString("ab,i\u0307o,12"), UNICODE, 0); + assertFindInSet("İo", UTF8String.fromString("ab,i\u0307o,12"), UNICODE_CI, 2); // Conditional case mapping (e.g. Greek sigmas). - assertFindInSet("σ", UTF8String.fromString("σ"), "UTF8_BINARY", 1); - assertFindInSet("σ", UTF8String.fromString("ς"), "UTF8_BINARY", 0); - assertFindInSet("σ", UTF8String.fromString("Σ"), "UTF8_BINARY", 0); - assertFindInSet("ς", UTF8String.fromString("σ"), "UTF8_BINARY", 0); - assertFindInSet("ς", UTF8String.fromString("ς"), "UTF8_BINARY", 1); - assertFindInSet("ς", UTF8String.fromString("Σ"), "UTF8_BINARY", 0); - assertFindInSet("Σ", UTF8String.fromString("σ"), "UTF8_BINARY", 0); - assertFindInSet("Σ", UTF8String.fromString("ς"), "UTF8_BINARY", 0); - assertFindInSet("Σ", UTF8String.fromString("Σ"), "UTF8_BINARY", 1); - assertFindInSet("σ", UTF8String.fromString("σ"), "UTF8_LCASE", 1); - assertFindInSet("σ", UTF8String.fromString("ς"), "UTF8_LCASE", 1); - assertFindInSet("σ", UTF8String.fromString("Σ"), "UTF8_LCASE", 1); - assertFindInSet("ς", UTF8String.fromString("σ"), "UTF8_LCASE", 1); - assertFindInSet("ς", UTF8String.fromString("ς"), "UTF8_LCASE", 1); - assertFindInSet("ς", UTF8String.fromString("Σ"), "UTF8_LCASE", 1); - assertFindInSet("Σ", UTF8String.fromString("σ"), "UTF8_LCASE", 1); - assertFindInSet("Σ", UTF8String.fromString("ς"), "UTF8_LCASE", 1); - assertFindInSet("Σ", UTF8String.fromString("Σ"), "UTF8_LCASE", 1); - assertFindInSet("σ", UTF8String.fromString("σ"), "UNICODE", 1); - assertFindInSet("σ", UTF8String.fromString("ς"), "UNICODE", 0); - assertFindInSet("σ", UTF8String.fromString("Σ"), "UNICODE", 0); - assertFindInSet("ς", UTF8String.fromString("σ"), "UNICODE", 0); - assertFindInSet("ς", UTF8String.fromString("ς"), "UNICODE", 1); - assertFindInSet("ς", UTF8String.fromString("Σ"), "UNICODE", 0); - assertFindInSet("Σ", UTF8String.fromString("σ"), "UNICODE", 0); - assertFindInSet("Σ", UTF8String.fromString("ς"), "UNICODE", 0); - assertFindInSet("Σ", UTF8String.fromString("Σ"), "UNICODE", 1); - assertFindInSet("σ", UTF8String.fromString("σ"), "UNICODE_CI", 1); - assertFindInSet("σ", UTF8String.fromString("ς"), "UNICODE_CI", 1); - assertFindInSet("σ", UTF8String.fromString("Σ"), "UNICODE_CI", 1); - assertFindInSet("ς", UTF8String.fromString("σ"), "UNICODE_CI", 1); - assertFindInSet("ς", UTF8String.fromString("ς"), "UNICODE_CI", 1); - assertFindInSet("ς", UTF8String.fromString("Σ"), "UNICODE_CI", 1); - assertFindInSet("Σ", UTF8String.fromString("σ"), "UNICODE_CI", 1); - assertFindInSet("Σ", UTF8String.fromString("ς"), "UNICODE_CI", 1); - assertFindInSet("Σ", UTF8String.fromString("Σ"), "UNICODE_CI", 1); + assertFindInSet("σ", UTF8String.fromString("σ"), UTF8_BINARY, 1); + assertFindInSet("σ", UTF8String.fromString("ς"), UTF8_BINARY, 0); + assertFindInSet("σ", UTF8String.fromString("Σ"), UTF8_BINARY, 0); + assertFindInSet("ς", UTF8String.fromString("σ"), UTF8_BINARY, 0); + assertFindInSet("ς", UTF8String.fromString("ς"), UTF8_BINARY, 1); + assertFindInSet("ς", UTF8String.fromString("Σ"), UTF8_BINARY, 0); + assertFindInSet("Σ", UTF8String.fromString("σ"), UTF8_BINARY, 0); + assertFindInSet("Σ", UTF8String.fromString("ς"), UTF8_BINARY, 0); + assertFindInSet("Σ", UTF8String.fromString("Σ"), UTF8_BINARY, 1); + assertFindInSet("σ", UTF8String.fromString("σ"), UTF8_LCASE, 1); + assertFindInSet("σ", UTF8String.fromString("ς"), UTF8_LCASE, 1); + assertFindInSet("σ", UTF8String.fromString("Σ"), UTF8_LCASE, 1); + assertFindInSet("ς", UTF8String.fromString("σ"), UTF8_LCASE, 1); + assertFindInSet("ς", UTF8String.fromString("ς"), UTF8_LCASE, 1); + assertFindInSet("ς", UTF8String.fromString("Σ"), UTF8_LCASE, 1); + assertFindInSet("Σ", UTF8String.fromString("σ"), UTF8_LCASE, 1); + assertFindInSet("Σ", UTF8String.fromString("ς"), UTF8_LCASE, 1); + assertFindInSet("Σ", UTF8String.fromString("Σ"), UTF8_LCASE, 1); + assertFindInSet("σ", UTF8String.fromString("σ"), UNICODE, 1); + assertFindInSet("σ", UTF8String.fromString("ς"), UNICODE, 0); + assertFindInSet("σ", UTF8String.fromString("Σ"), UNICODE, 0); + assertFindInSet("ς", UTF8String.fromString("σ"), UNICODE, 0); + assertFindInSet("ς", UTF8String.fromString("ς"), UNICODE, 1); + assertFindInSet("ς", UTF8String.fromString("Σ"), UNICODE, 0); + assertFindInSet("Σ", UTF8String.fromString("σ"), UNICODE, 0); + assertFindInSet("Σ", UTF8String.fromString("ς"), UNICODE, 0); + assertFindInSet("Σ", UTF8String.fromString("Σ"), UNICODE, 1); + assertFindInSet("σ", UTF8String.fromString("σ"), UNICODE_CI, 1); + assertFindInSet("σ", UTF8String.fromString("ς"), UNICODE_CI, 1); + assertFindInSet("σ", UTF8String.fromString("Σ"), UNICODE_CI, 1); + assertFindInSet("ς", UTF8String.fromString("σ"), UNICODE_CI, 1); + assertFindInSet("ς", UTF8String.fromString("ς"), UNICODE_CI, 1); + assertFindInSet("ς", UTF8String.fromString("Σ"), UNICODE_CI, 1); + assertFindInSet("Σ", UTF8String.fromString("σ"), UNICODE_CI, 1); + assertFindInSet("Σ", UTF8String.fromString("ς"), UNICODE_CI, 1); + assertFindInSet("Σ", UTF8String.fromString("Σ"), UNICODE_CI, 1); // Surrogate pairs. - assertFindInSet("a", UTF8String.fromString("a🙃,b,🙃c"), "UTF8_BINARY", 0); - assertFindInSet("a", UTF8String.fromString("a🙃,b,🙃c"), "UTF8_LCASE", 0); - assertFindInSet("a", UTF8String.fromString("a🙃,b,🙃c"), "UNICODE", 0); - assertFindInSet("a", UTF8String.fromString("a🙃,b,🙃c"), "UNICODE_CI", 0); - assertFindInSet("a🙃", UTF8String.fromString("a🙃,b,🙃c"), "UTF8_BINARY", 1); - assertFindInSet("a🙃", UTF8String.fromString("a🙃,b,🙃c"), "UTF8_LCASE", 1); - assertFindInSet("a🙃", UTF8String.fromString("a🙃,b,🙃c"), "UNICODE", 1); - assertFindInSet("a🙃", UTF8String.fromString("a🙃,b,🙃c"), "UNICODE_CI", 1); - assertFindInSet("b", UTF8String.fromString("a🙃,b,🙃c"), "UTF8_BINARY", 2); - assertFindInSet("b", UTF8String.fromString("a🙃,b,🙃c"), "UTF8_LCASE", 2); - assertFindInSet("b", UTF8String.fromString("a🙃,b,🙃c"), "UNICODE", 2); - assertFindInSet("b", UTF8String.fromString("a🙃,b,🙃c"), "UNICODE_CI", 2); - assertFindInSet("🙃c", UTF8String.fromString("a🙃,b,🙃c"), "UTF8_BINARY", 3); - assertFindInSet("🙃c", UTF8String.fromString("a🙃,b,🙃c"), "UTF8_LCASE", 3); - assertFindInSet("🙃c", UTF8String.fromString("a🙃,b,🙃c"), "UNICODE", 3); - assertFindInSet("🙃c", UTF8String.fromString("a🙃,b,🙃c"), "UNICODE_CI", 3); - assertFindInSet("😄😆", UTF8String.fromString("😀😆,😃😄"), "UTF8_BINARY", 0); - assertFindInSet("😄😆", UTF8String.fromString("😀😆,😃😄"), "UTF8_LCASE", 0); - assertFindInSet("😄😆", UTF8String.fromString("😀😆,😃😄"), "UNICODE", 0); - assertFindInSet("😄😆", UTF8String.fromString("😀😆,😃😄"), "UNICODE_CI", 0); - assertFindInSet("😀😆", UTF8String.fromString("😀😆,😃😄"), "UTF8_BINARY", 1); - assertFindInSet("😀😆", UTF8String.fromString("😀😆,😃😄"), "UTF8_LCASE", 1); - assertFindInSet("😀😆", UTF8String.fromString("😀😆,😃😄"), "UNICODE", 1); - assertFindInSet("😀😆", UTF8String.fromString("😀😆,😃😄"), "UNICODE_CI", 1); - assertFindInSet("😃😄", UTF8String.fromString("😀😆,😃😄"), "UTF8_BINARY", 2); - assertFindInSet("😃😄", UTF8String.fromString("😀😆,😃😄"), "UTF8_LCASE", 2); - assertFindInSet("😃😄", UTF8String.fromString("😀😆,😃😄"), "UNICODE", 2); - assertFindInSet("😃😄", UTF8String.fromString("😀😆,😃😄"), "UNICODE_CI", 2); - assertFindInSet("x", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_BINARY", 0); - assertFindInSet("x", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_LCASE", 0); - assertFindInSet("x", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE", 0); - assertFindInSet("x", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE_CI", 0); - assertFindInSet("a", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_BINARY", 1); - assertFindInSet("a", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_LCASE", 1); - assertFindInSet("a", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE", 1); - assertFindInSet("a", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE_CI", 1); - assertFindInSet("A", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_BINARY", 0); - assertFindInSet("A", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_LCASE", 1); - assertFindInSet("A", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE", 0); - assertFindInSet("A", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE_CI", 1); - assertFindInSet("𝔸", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_BINARY", 3); - assertFindInSet("𝔸", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_LCASE", 3); - assertFindInSet("𝔸", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE", 3); - assertFindInSet("𝔸", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE_CI", 1); - assertFindInSet("𐐅", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_BINARY", 2); - assertFindInSet("𐐅", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_LCASE", 2); - assertFindInSet("𐐅", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE", 2); - assertFindInSet("𐐅", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE_CI", 2); - assertFindInSet("𐐭", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_BINARY", 0); - assertFindInSet("𐐭", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_LCASE", 2); - assertFindInSet("𐐭", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE", 0); - assertFindInSet("𐐭", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE_CI", 2); + assertFindInSet("a", UTF8String.fromString("a🙃,b,🙃c"), UTF8_BINARY, 0); + assertFindInSet("a", UTF8String.fromString("a🙃,b,🙃c"), UTF8_LCASE, 0); + assertFindInSet("a", UTF8String.fromString("a🙃,b,🙃c"), UNICODE, 0); + assertFindInSet("a", UTF8String.fromString("a🙃,b,🙃c"), UNICODE_CI, 0); + assertFindInSet("a🙃", UTF8String.fromString("a🙃,b,🙃c"), UTF8_BINARY, 1); + assertFindInSet("a🙃", UTF8String.fromString("a🙃,b,🙃c"), UTF8_LCASE, 1); + assertFindInSet("a🙃", UTF8String.fromString("a🙃,b,🙃c"), UNICODE, 1); + assertFindInSet("a🙃", UTF8String.fromString("a🙃,b,🙃c"), UNICODE_CI, 1); + assertFindInSet("b", UTF8String.fromString("a🙃,b,🙃c"), UTF8_BINARY, 2); + assertFindInSet("b", UTF8String.fromString("a🙃,b,🙃c"), UTF8_LCASE, 2); + assertFindInSet("b", UTF8String.fromString("a🙃,b,🙃c"), UNICODE, 2); + assertFindInSet("b", UTF8String.fromString("a🙃,b,🙃c"), UNICODE_CI, 2); + assertFindInSet("🙃c", UTF8String.fromString("a🙃,b,🙃c"), UTF8_BINARY, 3); + assertFindInSet("🙃c", UTF8String.fromString("a🙃,b,🙃c"), UTF8_LCASE, 3); + assertFindInSet("🙃c", UTF8String.fromString("a🙃,b,🙃c"), UNICODE, 3); + assertFindInSet("🙃c", UTF8String.fromString("a🙃,b,🙃c"), UNICODE_CI, 3); + assertFindInSet("😄😆", UTF8String.fromString("😀😆,😃😄"), UTF8_BINARY, 0); + assertFindInSet("😄😆", UTF8String.fromString("😀😆,😃😄"), UTF8_LCASE, 0); + assertFindInSet("😄😆", UTF8String.fromString("😀😆,😃😄"), UNICODE, 0); + assertFindInSet("😄😆", UTF8String.fromString("😀😆,😃😄"), UNICODE_CI, 0); + assertFindInSet("😀😆", UTF8String.fromString("😀😆,😃😄"), UTF8_BINARY, 1); + assertFindInSet("😀😆", UTF8String.fromString("😀😆,😃😄"), UTF8_LCASE, 1); + assertFindInSet("😀😆", UTF8String.fromString("😀😆,😃😄"), UNICODE, 1); + assertFindInSet("😀😆", UTF8String.fromString("😀😆,😃😄"), UNICODE_CI, 1); + assertFindInSet("😃😄", UTF8String.fromString("😀😆,😃😄"), UTF8_BINARY, 2); + assertFindInSet("😃😄", UTF8String.fromString("😀😆,😃😄"), UTF8_LCASE, 2); + assertFindInSet("😃😄", UTF8String.fromString("😀😆,😃😄"), UNICODE, 2); + assertFindInSet("😃😄", UTF8String.fromString("😀😆,😃😄"), UNICODE_CI, 2); + assertFindInSet("x", UTF8String.fromString("a,𐐅,𝔸"), UTF8_BINARY, 0); + assertFindInSet("x", UTF8String.fromString("a,𐐅,𝔸"), UTF8_LCASE, 0); + assertFindInSet("x", UTF8String.fromString("a,𐐅,𝔸"), UNICODE, 0); + assertFindInSet("x", UTF8String.fromString("a,𐐅,𝔸"), UNICODE_CI, 0); + assertFindInSet("a", UTF8String.fromString("a,𐐅,𝔸"), UTF8_BINARY, 1); + assertFindInSet("a", UTF8String.fromString("a,𐐅,𝔸"), UTF8_LCASE, 1); + assertFindInSet("a", UTF8String.fromString("a,𐐅,𝔸"), UNICODE, 1); + assertFindInSet("a", UTF8String.fromString("a,𐐅,𝔸"), UNICODE_CI, 1); + assertFindInSet("A", UTF8String.fromString("a,𐐅,𝔸"), UTF8_BINARY, 0); + assertFindInSet("A", UTF8String.fromString("a,𐐅,𝔸"), UTF8_LCASE, 1); + assertFindInSet("A", UTF8String.fromString("a,𐐅,𝔸"), UNICODE, 0); + assertFindInSet("A", UTF8String.fromString("a,𐐅,𝔸"), UNICODE_CI, 1); + assertFindInSet("𝔸", UTF8String.fromString("a,𐐅,𝔸"), UTF8_BINARY, 3); + assertFindInSet("𝔸", UTF8String.fromString("a,𐐅,𝔸"), UTF8_LCASE, 3); + assertFindInSet("𝔸", UTF8String.fromString("a,𐐅,𝔸"), UNICODE, 3); + assertFindInSet("𝔸", UTF8String.fromString("a,𐐅,𝔸"), UNICODE_CI, 1); + assertFindInSet("𐐅", UTF8String.fromString("a,𐐅,𝔸"), UTF8_BINARY, 2); + assertFindInSet("𐐅", UTF8String.fromString("a,𐐅,𝔸"), UTF8_LCASE, 2); + assertFindInSet("𐐅", UTF8String.fromString("a,𐐅,𝔸"), UNICODE, 2); + assertFindInSet("𐐅", UTF8String.fromString("a,𐐅,𝔸"), UNICODE_CI, 2); + assertFindInSet("𐐭", UTF8String.fromString("a,𐐅,𝔸"), UTF8_BINARY, 0); + assertFindInSet("𐐭", UTF8String.fromString("a,𐐅,𝔸"), UTF8_LCASE, 2); + assertFindInSet("𐐭", UTF8String.fromString("a,𐐅,𝔸"), UNICODE, 0); + assertFindInSet("𐐭", UTF8String.fromString("a,𐐅,𝔸"), UNICODE_CI, 2); // Invalid UTF8 strings assertFindInSet("C", UTF8String.fromBytes( new byte[] { 0x41, (byte) 0xC2, 0x2C, 0x42, 0x2C, 0x43, 0x2C, 0x43, 0x2C, 0x56 }), - "UTF8_BINARY", 3); + UTF8_BINARY, 3); assertFindInSet("c", UTF8String.fromBytes( new byte[] { 0x41, (byte) 0xC2, 0x2C, 0x42, 0x2C, 0x43, 0x2C, 0x43, 0x2C, 0x56 }), - "UTF8_LCASE", 2); + UTF8_LCASE, 2); assertFindInSet("C", UTF8String.fromBytes( new byte[] { 0x41, (byte) 0xC2, 0x2C, 0x42, 0x2C, 0x43, 0x2C, 0x43, 0x2C, 0x56 }), - "UNICODE", 2); + UNICODE, 2); assertFindInSet("c", UTF8String.fromBytes( new byte[] { 0x41, (byte) 0xC2, 0x2C, 0x42, 0x2C, 0x43, 0x2C, 0x43, 0x2C, 0x56 }), - "UNICODE_CI", 2); + UNICODE_CI, 2); } /** @@ -1986,145 +1987,145 @@ private void assertStringReplace(String source, String search, String replace, @Test public void testStringReplace() throws SparkException { // Empty strings. - assertStringReplace("", "", "", "UTF8_BINARY", ""); - assertStringReplace("", "", "", "UTF8_LCASE", ""); - assertStringReplace("", "", "", "UNICODE", ""); - assertStringReplace("", "", "", "UNICODE_CI", ""); - assertStringReplace("abc", "", "", "UTF8_BINARY", "abc"); - assertStringReplace("abc", "", "", "UTF8_LCASE", "abc"); - assertStringReplace("abc", "", "", "UNICODE", "abc"); - assertStringReplace("abc", "", "", "UNICODE_CI", "abc"); - assertStringReplace("", "x", "", "UTF8_BINARY", ""); - assertStringReplace("", "x", "", "UTF8_LCASE", ""); - assertStringReplace("", "x", "", "UNICODE", ""); - assertStringReplace("", "x", "", "UNICODE_CI", ""); - assertStringReplace("", "", "x", "UTF8_BINARY", ""); - assertStringReplace("", "", "x", "UTF8_LCASE", ""); - assertStringReplace("", "", "x", "UNICODE", ""); - assertStringReplace("", "", "x", "UNICODE_CI", ""); - assertStringReplace("", "b", "x", "UTF8_BINARY", ""); - assertStringReplace("", "b", "x", "UTF8_LCASE", ""); - assertStringReplace("", "b", "x", "UNICODE", ""); - assertStringReplace("", "b", "x", "UNICODE_CI", ""); - assertStringReplace("abc", "b", "", "UTF8_BINARY", "ac"); - assertStringReplace("abc", "b", "", "UTF8_LCASE", "ac"); - assertStringReplace("abc", "b", "", "UNICODE", "ac"); - assertStringReplace("abc", "b", "", "UNICODE_CI", "ac"); - assertStringReplace("abc", "", "x", "UTF8_BINARY", "abc"); - assertStringReplace("abc", "", "x", "UTF8_LCASE", "abc"); - assertStringReplace("abc", "", "x", "UNICODE", "abc"); - assertStringReplace("abc", "", "x", "UNICODE_CI", "abc"); + assertStringReplace("", "", "", UTF8_BINARY, ""); + assertStringReplace("", "", "", UTF8_LCASE, ""); + assertStringReplace("", "", "", UNICODE, ""); + assertStringReplace("", "", "", UNICODE_CI, ""); + assertStringReplace("abc", "", "", UTF8_BINARY, "abc"); + assertStringReplace("abc", "", "", UTF8_LCASE, "abc"); + assertStringReplace("abc", "", "", UNICODE, "abc"); + assertStringReplace("abc", "", "", UNICODE_CI, "abc"); + assertStringReplace("", "x", "", UTF8_BINARY, ""); + assertStringReplace("", "x", "", UTF8_LCASE, ""); + assertStringReplace("", "x", "", UNICODE, ""); + assertStringReplace("", "x", "", UNICODE_CI, ""); + assertStringReplace("", "", "x", UTF8_BINARY, ""); + assertStringReplace("", "", "x", UTF8_LCASE, ""); + assertStringReplace("", "", "x", UNICODE, ""); + assertStringReplace("", "", "x", UNICODE_CI, ""); + assertStringReplace("", "b", "x", UTF8_BINARY, ""); + assertStringReplace("", "b", "x", UTF8_LCASE, ""); + assertStringReplace("", "b", "x", UNICODE, ""); + assertStringReplace("", "b", "x", UNICODE_CI, ""); + assertStringReplace("abc", "b", "", UTF8_BINARY, "ac"); + assertStringReplace("abc", "b", "", UTF8_LCASE, "ac"); + assertStringReplace("abc", "b", "", UNICODE, "ac"); + assertStringReplace("abc", "b", "", UNICODE_CI, "ac"); + assertStringReplace("abc", "", "x", UTF8_BINARY, "abc"); + assertStringReplace("abc", "", "x", UTF8_LCASE, "abc"); + assertStringReplace("abc", "", "x", UNICODE, "abc"); + assertStringReplace("abc", "", "x", UNICODE_CI, "abc"); // Basic tests. - assertStringReplace("replace", "pl", "", "UTF8_BINARY", "reace"); - assertStringReplace("replace", "pl", "", "UTF8_LCASE", "reace"); - assertStringReplace("replace", "pl", "", "UNICODE", "reace"); - assertStringReplace("replace", "pl", "", "UNICODE_CI", "reace"); - assertStringReplace("replace", "", "123", "UTF8_BINARY", "replace"); - assertStringReplace("replace", "", "123", "UTF8_LCASE", "replace"); - assertStringReplace("replace", "", "123", "UNICODE", "replace"); - assertStringReplace("replace", "", "123", "UNICODE_CI", "replace"); - assertStringReplace("abcabc", "b", "12", "UTF8_BINARY", "a12ca12c"); - assertStringReplace("abcabc", "b", "12", "UTF8_LCASE", "a12ca12c"); - assertStringReplace("abcabc", "b", "12", "UNICODE", "a12ca12c"); - assertStringReplace("abcabc", "b", "12", "UNICODE_CI", "a12ca12c"); - assertStringReplace("replace", "plx", "123", "UTF8_BINARY", "replace"); - assertStringReplace("replace", "plx", "123", "UTF8_LCASE", "replace"); - assertStringReplace("replace", "plx", "123", "UNICODE", "replace"); - assertStringReplace("replace", "plx", "123", "UNICODE_CI", "replace"); - assertStringReplace("Replace", "re", "", "UTF8_BINARY", "Replace"); - assertStringReplace("Replace", "re", "", "UTF8_LCASE", "place"); - assertStringReplace("Replace", "re", "", "UNICODE", "Replace"); - assertStringReplace("Replace", "re", "", "UNICODE_CI", "place"); - assertStringReplace("abcdabcd", "Bc", "", "UTF8_BINARY", "abcdabcd"); - assertStringReplace("abcdabcd", "Bc", "", "UTF8_LCASE", "adad"); - assertStringReplace("abcdabcd", "Bc", "", "UNICODE", "abcdabcd"); - assertStringReplace("abcdabcd", "Bc", "", "UNICODE_CI", "adad"); - assertStringReplace("AbcdabCd", "Bc", "", "UTF8_BINARY", "AbcdabCd"); - assertStringReplace("AbcdabCd", "Bc", "", "UTF8_LCASE", "Adad"); - assertStringReplace("AbcdabCd", "Bc", "", "UNICODE", "AbcdabCd"); - assertStringReplace("AbcdabCd", "Bc", "", "UNICODE_CI", "Adad"); + assertStringReplace("replace", "pl", "", UTF8_BINARY, "reace"); + assertStringReplace("replace", "pl", "", UTF8_LCASE, "reace"); + assertStringReplace("replace", "pl", "", UNICODE, "reace"); + assertStringReplace("replace", "pl", "", UNICODE_CI, "reace"); + assertStringReplace("replace", "", "123", UTF8_BINARY, "replace"); + assertStringReplace("replace", "", "123", UTF8_LCASE, "replace"); + assertStringReplace("replace", "", "123", UNICODE, "replace"); + assertStringReplace("replace", "", "123", UNICODE_CI, "replace"); + assertStringReplace("abcabc", "b", "12", UTF8_BINARY, "a12ca12c"); + assertStringReplace("abcabc", "b", "12", UTF8_LCASE, "a12ca12c"); + assertStringReplace("abcabc", "b", "12", UNICODE, "a12ca12c"); + assertStringReplace("abcabc", "b", "12", UNICODE_CI, "a12ca12c"); + assertStringReplace("replace", "plx", "123", UTF8_BINARY, "replace"); + assertStringReplace("replace", "plx", "123", UTF8_LCASE, "replace"); + assertStringReplace("replace", "plx", "123", UNICODE, "replace"); + assertStringReplace("replace", "plx", "123", UNICODE_CI, "replace"); + assertStringReplace("Replace", "re", "", UTF8_BINARY, "Replace"); + assertStringReplace("Replace", "re", "", UTF8_LCASE, "place"); + assertStringReplace("Replace", "re", "", UNICODE, "Replace"); + assertStringReplace("Replace", "re", "", UNICODE_CI, "place"); + assertStringReplace("abcdabcd", "Bc", "", UTF8_BINARY, "abcdabcd"); + assertStringReplace("abcdabcd", "Bc", "", UTF8_LCASE, "adad"); + assertStringReplace("abcdabcd", "Bc", "", UNICODE, "abcdabcd"); + assertStringReplace("abcdabcd", "Bc", "", UNICODE_CI, "adad"); + assertStringReplace("AbcdabCd", "Bc", "", UTF8_BINARY, "AbcdabCd"); + assertStringReplace("AbcdabCd", "Bc", "", UTF8_LCASE, "Adad"); + assertStringReplace("AbcdabCd", "Bc", "", UNICODE, "AbcdabCd"); + assertStringReplace("AbcdabCd", "Bc", "", UNICODE_CI, "Adad"); // Advanced tests. - assertStringReplace("abcdabcd", "bc", "", "UTF8_BINARY", "adad"); - assertStringReplace("r世eplace", "pl", "123", "UTF8_BINARY", "r世e123ace"); - assertStringReplace("世Replace", "re", "", "UTF8_BINARY", "世Replace"); - assertStringReplace("r世eplace", "pl", "xx", "UTF8_LCASE", "r世exxace"); - assertStringReplace("repl世ace", "PL", "AB", "UTF8_LCASE", "reAB世ace"); - assertStringReplace("re世place", "世", "x", "UTF8_LCASE", "rexplace"); - assertStringReplace("re世place", "plx", "123", "UNICODE", "re世place"); - assertStringReplace("replace世", "", "123", "UNICODE", "replace世"); - assertStringReplace("aBc世abc", "b", "12", "UNICODE", "aBc世a12c"); - assertStringReplace("aBc世abc", "b", "12", "UNICODE_CI", "a12c世a12c"); - assertStringReplace("a世Bcdabcd", "bC", "", "UNICODE_CI", "a世dad"); - assertStringReplace("repl世ace", "Pl", "", "UNICODE_CI", "re世ace"); + assertStringReplace("abcdabcd", "bc", "", UTF8_BINARY, "adad"); + assertStringReplace("r世eplace", "pl", "123", UTF8_BINARY, "r世e123ace"); + assertStringReplace("世Replace", "re", "", UTF8_BINARY, "世Replace"); + assertStringReplace("r世eplace", "pl", "xx", UTF8_LCASE, "r世exxace"); + assertStringReplace("repl世ace", "PL", "AB", UTF8_LCASE, "reAB世ace"); + assertStringReplace("re世place", "世", "x", UTF8_LCASE, "rexplace"); + assertStringReplace("re世place", "plx", "123", UNICODE, "re世place"); + assertStringReplace("replace世", "", "123", UNICODE, "replace世"); + assertStringReplace("aBc世abc", "b", "12", UNICODE, "aBc世a12c"); + assertStringReplace("aBc世abc", "b", "12", UNICODE_CI, "a12c世a12c"); + assertStringReplace("a世Bcdabcd", "bC", "", UNICODE_CI, "a世dad"); + assertStringReplace("repl世ace", "Pl", "", UNICODE_CI, "re世ace"); assertStringReplace("abcčšdabĆŠscd", "cs", "", "SR_CI_AI", "abcdabscd"); // One-to-many case mapping (e.g. Turkish dotted I). - assertStringReplace("abi̇12", "i", "X", "UNICODE_CI", "abi̇12"); - assertStringReplace("abi̇12", "\u0307", "X", "UNICODE_CI", "abi̇12"); - assertStringReplace("abi̇12", "İ", "X", "UNICODE_CI", "abX12"); - assertStringReplace("abİ12", "i", "X", "UNICODE_CI", "abİ12"); - assertStringReplace("İi̇İi̇İi̇", "i\u0307", "x", "UNICODE_CI", "xxxxxx"); - assertStringReplace("İi̇İi̇İi̇", "i", "x", "UNICODE_CI", "İi̇İi̇İi̇"); - assertStringReplace("abİo12i̇o", "i\u0307o", "xx", "UNICODE_CI", "abxx12xx"); - assertStringReplace("abi̇o12i̇o", "İo", "yy", "UNICODE_CI", "abyy12yy"); - assertStringReplace("abi̇12", "i", "X", "UTF8_LCASE", "abX\u030712"); // != UNICODE_CI - assertStringReplace("abi̇12", "\u0307", "X", "UTF8_LCASE", "abiX12"); // != UNICODE_CI - assertStringReplace("abi̇12", "İ", "X", "UTF8_LCASE", "abX12"); - assertStringReplace("abİ12", "i", "X", "UTF8_LCASE", "abİ12"); - assertStringReplace("İi̇İi̇İi̇", "i\u0307", "x", "UTF8_LCASE", "xxxxxx"); - assertStringReplace("İi̇İi̇İi̇", "i", "x", "UTF8_LCASE", + assertStringReplace("abi̇12", "i", "X", UNICODE_CI, "abi̇12"); + assertStringReplace("abi̇12", "\u0307", "X", UNICODE_CI, "abi̇12"); + assertStringReplace("abi̇12", "İ", "X", UNICODE_CI, "abX12"); + assertStringReplace("abİ12", "i", "X", UNICODE_CI, "abİ12"); + assertStringReplace("İi̇İi̇İi̇", "i\u0307", "x", UNICODE_CI, "xxxxxx"); + assertStringReplace("İi̇İi̇İi̇", "i", "x", UNICODE_CI, "İi̇İi̇İi̇"); + assertStringReplace("abİo12i̇o", "i\u0307o", "xx", UNICODE_CI, "abxx12xx"); + assertStringReplace("abi̇o12i̇o", "İo", "yy", UNICODE_CI, "abyy12yy"); + assertStringReplace("abi̇12", "i", "X", UTF8_LCASE, "abX\u030712"); // != UNICODE_CI + assertStringReplace("abi̇12", "\u0307", "X", UTF8_LCASE, "abiX12"); // != UNICODE_CI + assertStringReplace("abi̇12", "İ", "X", UTF8_LCASE, "abX12"); + assertStringReplace("abİ12", "i", "X", UTF8_LCASE, "abİ12"); + assertStringReplace("İi̇İi̇İi̇", "i\u0307", "x", UTF8_LCASE, "xxxxxx"); + assertStringReplace("İi̇İi̇İi̇", "i", "x", UTF8_LCASE, "İx\u0307İx\u0307İx\u0307"); // != UNICODE_CI - assertStringReplace("abİo12i̇o", "i\u0307o", "xx", "UTF8_LCASE", "abxx12xx"); - assertStringReplace("abi̇o12i̇o", "İo", "yy", "UTF8_LCASE", "abyy12yy"); + assertStringReplace("abİo12i̇o", "i\u0307o", "xx", UTF8_LCASE, "abxx12xx"); + assertStringReplace("abi̇o12i̇o", "İo", "yy", UTF8_LCASE, "abyy12yy"); // Conditional case mapping (e.g. Greek sigmas). - assertStringReplace("σ", "σ", "x", "UTF8_BINARY", "x"); - assertStringReplace("σ", "ς", "x", "UTF8_BINARY", "σ"); - assertStringReplace("σ", "Σ", "x", "UTF8_BINARY", "σ"); - assertStringReplace("ς", "σ", "x", "UTF8_BINARY", "ς"); - assertStringReplace("ς", "ς", "x", "UTF8_BINARY", "x"); - assertStringReplace("ς", "Σ", "x", "UTF8_BINARY", "ς"); - assertStringReplace("Σ", "σ", "x", "UTF8_BINARY", "Σ"); - assertStringReplace("Σ", "ς", "x", "UTF8_BINARY", "Σ"); - assertStringReplace("Σ", "Σ", "x", "UTF8_BINARY", "x"); - assertStringReplace("σ", "σ", "x", "UTF8_LCASE", "x"); - assertStringReplace("σ", "ς", "x", "UTF8_LCASE", "x"); - assertStringReplace("σ", "Σ", "x", "UTF8_LCASE", "x"); - assertStringReplace("ς", "σ", "x", "UTF8_LCASE", "x"); - assertStringReplace("ς", "ς", "x", "UTF8_LCASE", "x"); - assertStringReplace("ς", "Σ", "x", "UTF8_LCASE", "x"); - assertStringReplace("Σ", "σ", "x", "UTF8_LCASE", "x"); - assertStringReplace("Σ", "ς", "x", "UTF8_LCASE", "x"); - assertStringReplace("Σ", "Σ", "x", "UTF8_LCASE", "x"); - assertStringReplace("σ", "σ", "x", "UNICODE", "x"); - assertStringReplace("σ", "ς", "x", "UNICODE", "σ"); - assertStringReplace("σ", "Σ", "x", "UNICODE", "σ"); - assertStringReplace("ς", "σ", "x", "UNICODE", "ς"); - assertStringReplace("ς", "ς", "x", "UNICODE", "x"); - assertStringReplace("ς", "Σ", "x", "UNICODE", "ς"); - assertStringReplace("Σ", "σ", "x", "UNICODE", "Σ"); - assertStringReplace("Σ", "ς", "x", "UNICODE", "Σ"); - assertStringReplace("Σ", "Σ", "x", "UNICODE", "x"); - assertStringReplace("σ", "σ", "x", "UNICODE_CI", "x"); - assertStringReplace("σ", "ς", "x", "UNICODE_CI", "x"); - assertStringReplace("σ", "Σ", "x", "UNICODE_CI", "x"); - assertStringReplace("ς", "σ", "x", "UNICODE_CI", "x"); - assertStringReplace("ς", "ς", "x", "UNICODE_CI", "x"); - assertStringReplace("ς", "Σ", "x", "UNICODE_CI", "x"); - assertStringReplace("Σ", "σ", "x", "UNICODE_CI", "x"); - assertStringReplace("Σ", "ς", "x", "UNICODE_CI", "x"); - assertStringReplace("Σ", "Σ", "x", "UNICODE_CI", "x"); + assertStringReplace("σ", "σ", "x", UTF8_BINARY, "x"); + assertStringReplace("σ", "ς", "x", UTF8_BINARY, "σ"); + assertStringReplace("σ", "Σ", "x", UTF8_BINARY, "σ"); + assertStringReplace("ς", "σ", "x", UTF8_BINARY, "ς"); + assertStringReplace("ς", "ς", "x", UTF8_BINARY, "x"); + assertStringReplace("ς", "Σ", "x", UTF8_BINARY, "ς"); + assertStringReplace("Σ", "σ", "x", UTF8_BINARY, "Σ"); + assertStringReplace("Σ", "ς", "x", UTF8_BINARY, "Σ"); + assertStringReplace("Σ", "Σ", "x", UTF8_BINARY, "x"); + assertStringReplace("σ", "σ", "x", UTF8_LCASE, "x"); + assertStringReplace("σ", "ς", "x", UTF8_LCASE, "x"); + assertStringReplace("σ", "Σ", "x", UTF8_LCASE, "x"); + assertStringReplace("ς", "σ", "x", UTF8_LCASE, "x"); + assertStringReplace("ς", "ς", "x", UTF8_LCASE, "x"); + assertStringReplace("ς", "Σ", "x", UTF8_LCASE, "x"); + assertStringReplace("Σ", "σ", "x", UTF8_LCASE, "x"); + assertStringReplace("Σ", "ς", "x", UTF8_LCASE, "x"); + assertStringReplace("Σ", "Σ", "x", UTF8_LCASE, "x"); + assertStringReplace("σ", "σ", "x", UNICODE, "x"); + assertStringReplace("σ", "ς", "x", UNICODE, "σ"); + assertStringReplace("σ", "Σ", "x", UNICODE, "σ"); + assertStringReplace("ς", "σ", "x", UNICODE, "ς"); + assertStringReplace("ς", "ς", "x", UNICODE, "x"); + assertStringReplace("ς", "Σ", "x", UNICODE, "ς"); + assertStringReplace("Σ", "σ", "x", UNICODE, "Σ"); + assertStringReplace("Σ", "ς", "x", UNICODE, "Σ"); + assertStringReplace("Σ", "Σ", "x", UNICODE, "x"); + assertStringReplace("σ", "σ", "x", UNICODE_CI, "x"); + assertStringReplace("σ", "ς", "x", UNICODE_CI, "x"); + assertStringReplace("σ", "Σ", "x", UNICODE_CI, "x"); + assertStringReplace("ς", "σ", "x", UNICODE_CI, "x"); + assertStringReplace("ς", "ς", "x", UNICODE_CI, "x"); + assertStringReplace("ς", "Σ", "x", UNICODE_CI, "x"); + assertStringReplace("Σ", "σ", "x", UNICODE_CI, "x"); + assertStringReplace("Σ", "ς", "x", UNICODE_CI, "x"); + assertStringReplace("Σ", "Σ", "x", UNICODE_CI, "x"); // Surrogate pairs. - assertStringReplace("a🙃b", "a", "x", "UTF8_BINARY", "x🙃b"); - assertStringReplace("a🙃b", "b", "x", "UTF8_BINARY", "a🙃x"); - assertStringReplace("a🙃b", "🙃", "x", "UTF8_BINARY", "axb"); - assertStringReplace("a🙃b", "b", "c", "UTF8_LCASE", "a🙃c"); - assertStringReplace("a🙃b", "b", "x", "UTF8_LCASE", "a🙃x"); - assertStringReplace("a🙃b", "🙃", "x", "UTF8_LCASE", "axb"); - assertStringReplace("a🙃b", "b", "c", "UNICODE", "a🙃c"); - assertStringReplace("a🙃b", "b", "x", "UNICODE", "a🙃x"); - assertStringReplace("a🙃b", "🙃", "x", "UNICODE", "axb"); - assertStringReplace("a🙃b", "b", "c", "UNICODE_CI", "a🙃c"); - assertStringReplace("a🙃b", "b", "x", "UNICODE_CI", "a🙃x"); - assertStringReplace("a🙃b", "🙃", "x", "UNICODE_CI", "axb"); + assertStringReplace("a🙃b", "a", "x", UTF8_BINARY, "x🙃b"); + assertStringReplace("a🙃b", "b", "x", UTF8_BINARY, "a🙃x"); + assertStringReplace("a🙃b", "🙃", "x", UTF8_BINARY, "axb"); + assertStringReplace("a🙃b", "b", "c", UTF8_LCASE, "a🙃c"); + assertStringReplace("a🙃b", "b", "x", UTF8_LCASE, "a🙃x"); + assertStringReplace("a🙃b", "🙃", "x", UTF8_LCASE, "axb"); + assertStringReplace("a🙃b", "b", "c", UNICODE, "a🙃c"); + assertStringReplace("a🙃b", "b", "x", UNICODE, "a🙃x"); + assertStringReplace("a🙃b", "🙃", "x", UNICODE, "axb"); + assertStringReplace("a🙃b", "b", "c", UNICODE_CI, "a🙃c"); + assertStringReplace("a🙃b", "b", "x", UNICODE_CI, "a🙃x"); + assertStringReplace("a🙃b", "🙃", "x", UNICODE_CI, "axb"); } /** @@ -2145,293 +2146,293 @@ private void assertStringLocate(String substring, String string, int start, @Test public void testStringLocate() throws SparkException { // Empty strings. - assertStringLocate("", "", -1, "UTF8_BINARY", 1); - assertStringLocate("", "", -1, "UTF8_LCASE", 1); - assertStringLocate("", "", -1, "UNICODE", 1); - assertStringLocate("", "", -1, "UNICODE_CI", 1); - assertStringLocate("", "", 0, "UTF8_BINARY", 1); - assertStringLocate("", "", 0, "UTF8_LCASE", 1); - assertStringLocate("", "", 0, "UNICODE", 1); - assertStringLocate("", "", 0, "UNICODE_CI", 1); - assertStringLocate("", "", 1, "UTF8_BINARY", 1); - assertStringLocate("", "", 1, "UTF8_LCASE", 1); - assertStringLocate("", "", 1, "UNICODE", 1); - assertStringLocate("", "", 1, "UNICODE_CI", 1); - assertStringLocate("a", "", -1, "UTF8_BINARY", 0); - assertStringLocate("a", "", -1, "UTF8_LCASE", 0); - assertStringLocate("a", "", -1, "UNICODE", 0); - assertStringLocate("a", "", -1, "UNICODE_CI", 0); - assertStringLocate("a", "", 0, "UTF8_BINARY", 0); - assertStringLocate("a", "", 0, "UTF8_LCASE", 0); - assertStringLocate("a", "", 0, "UNICODE", 0); - assertStringLocate("a", "", 0, "UNICODE_CI", 0); - assertStringLocate("a", "", 1, "UTF8_BINARY", 0); - assertStringLocate("a", "", 1, "UTF8_LCASE", 0); - assertStringLocate("a", "", 1, "UNICODE", 0); - assertStringLocate("a", "", 1, "UNICODE_CI", 0); - assertStringLocate("", "x", -1, "UTF8_BINARY", 1); - assertStringLocate("", "x", -1, "UTF8_LCASE", 1); - assertStringLocate("", "x", -1, "UNICODE", 1); - assertStringLocate("", "x", -1, "UNICODE_CI", 1); - assertStringLocate("", "x", 0, "UTF8_BINARY", 1); - assertStringLocate("", "x", 0, "UTF8_LCASE", 1); - assertStringLocate("", "x", 0, "UNICODE", 1); - assertStringLocate("", "x", 0, "UNICODE_CI", 1); - assertStringLocate("", "x", 1, "UTF8_BINARY", 1); - assertStringLocate("", "x", 1, "UTF8_LCASE", 1); - assertStringLocate("", "x", 1, "UNICODE", 1); - assertStringLocate("", "x", 1, "UNICODE_CI", 1); + assertStringLocate("", "", -1, UTF8_BINARY, 1); + assertStringLocate("", "", -1, UTF8_LCASE, 1); + assertStringLocate("", "", -1, UNICODE, 1); + assertStringLocate("", "", -1, UNICODE_CI, 1); + assertStringLocate("", "", 0, UTF8_BINARY, 1); + assertStringLocate("", "", 0, UTF8_LCASE, 1); + assertStringLocate("", "", 0, UNICODE, 1); + assertStringLocate("", "", 0, UNICODE_CI, 1); + assertStringLocate("", "", 1, UTF8_BINARY, 1); + assertStringLocate("", "", 1, UTF8_LCASE, 1); + assertStringLocate("", "", 1, UNICODE, 1); + assertStringLocate("", "", 1, UNICODE_CI, 1); + assertStringLocate("a", "", -1, UTF8_BINARY, 0); + assertStringLocate("a", "", -1, UTF8_LCASE, 0); + assertStringLocate("a", "", -1, UNICODE, 0); + assertStringLocate("a", "", -1, UNICODE_CI, 0); + assertStringLocate("a", "", 0, UTF8_BINARY, 0); + assertStringLocate("a", "", 0, UTF8_LCASE, 0); + assertStringLocate("a", "", 0, UNICODE, 0); + assertStringLocate("a", "", 0, UNICODE_CI, 0); + assertStringLocate("a", "", 1, UTF8_BINARY, 0); + assertStringLocate("a", "", 1, UTF8_LCASE, 0); + assertStringLocate("a", "", 1, UNICODE, 0); + assertStringLocate("a", "", 1, UNICODE_CI, 0); + assertStringLocate("", "x", -1, UTF8_BINARY, 1); + assertStringLocate("", "x", -1, UTF8_LCASE, 1); + assertStringLocate("", "x", -1, UNICODE, 1); + assertStringLocate("", "x", -1, UNICODE_CI, 1); + assertStringLocate("", "x", 0, UTF8_BINARY, 1); + assertStringLocate("", "x", 0, UTF8_LCASE, 1); + assertStringLocate("", "x", 0, UNICODE, 1); + assertStringLocate("", "x", 0, UNICODE_CI, 1); + assertStringLocate("", "x", 1, UTF8_BINARY, 1); + assertStringLocate("", "x", 1, UTF8_LCASE, 1); + assertStringLocate("", "x", 1, UNICODE, 1); + assertStringLocate("", "x", 1, UNICODE_CI, 1); // Basic tests. - assertStringLocate("aa", "aaads", 1, "UTF8_BINARY", 1); - assertStringLocate("aa", "aaads", 1, "UTF8_LCASE", 1); - assertStringLocate("aa", "aaads", 1, "UNICODE", 1); - assertStringLocate("aa", "aaads", 1, "UNICODE_CI", 1); - assertStringLocate("aa", "aaads", 2, "UTF8_BINARY", 2); - assertStringLocate("aa", "aaads", 2, "UTF8_LCASE", 2); - assertStringLocate("aa", "aaads", 2, "UNICODE", 2); - assertStringLocate("aa", "aaads", 2, "UNICODE_CI", 2); - assertStringLocate("aa", "aaads", 3, "UTF8_BINARY", 0); - assertStringLocate("aa", "aaads", 3, "UTF8_LCASE", 0); - assertStringLocate("aa", "aaads", 3, "UNICODE", 0); - assertStringLocate("aa", "aaads", 3, "UNICODE_CI", 0); - assertStringLocate("Aa", "aaads", 1, "UTF8_BINARY", 0); - assertStringLocate("Aa", "aaads", 1, "UTF8_LCASE", 1); - assertStringLocate("Aa", "aaads", 1, "UNICODE", 0); - assertStringLocate("Aa", "aaads", 1, "UNICODE_CI", 1); - assertStringLocate("Aa", "aaads", 2, "UTF8_BINARY", 0); - assertStringLocate("Aa", "aaads", 2, "UTF8_LCASE", 2); - assertStringLocate("Aa", "aaads", 2, "UNICODE", 0); - assertStringLocate("Aa", "aaads", 2, "UNICODE_CI", 2); - assertStringLocate("Aa", "aaads", 3, "UTF8_BINARY", 0); - assertStringLocate("Aa", "aaads", 3, "UTF8_LCASE", 0); - assertStringLocate("Aa", "aaads", 3, "UNICODE", 0); - assertStringLocate("Aa", "aaads", 3, "UNICODE_CI", 0); - assertStringLocate("Aa", "aAads", 1, "UTF8_BINARY", 2); - assertStringLocate("Aa", "aAads", 1, "UTF8_LCASE", 1); - assertStringLocate("Aa", "aAads", 1, "UNICODE", 2); - assertStringLocate("Aa", "aAads", 1, "UNICODE_CI", 1); - assertStringLocate("AA", "aaads", 1, "UTF8_BINARY", 0); - assertStringLocate("AA", "aaads", 1, "UTF8_LCASE", 1); - assertStringLocate("AA", "aaads", 1, "UNICODE", 0); - assertStringLocate("AA", "aaads", 1, "UNICODE_CI", 1); - assertStringLocate("aa", "aAads", 2, "UTF8_BINARY", 0); - assertStringLocate("aa", "aAads", 2, "UTF8_LCASE", 2); - assertStringLocate("aa", "aAads", 2, "UNICODE", 0); - assertStringLocate("aa", "aAads", 2, "UNICODE_CI", 2); - assertStringLocate("aa", "aaAds", 3, "UTF8_BINARY", 0); - assertStringLocate("aa", "aaAds", 3, "UTF8_LCASE", 0); - assertStringLocate("aa", "aaAds", 3, "UNICODE", 0); - assertStringLocate("aa", "aaAds", 3, "UNICODE_CI", 0); - assertStringLocate("abC", "abcabc", 1, "UTF8_BINARY", 0); - assertStringLocate("abC", "abcabc", 1, "UTF8_LCASE", 1); - assertStringLocate("abC", "abcabc", 1, "UNICODE", 0); - assertStringLocate("abC", "abcabc", 1, "UNICODE_CI", 1); - assertStringLocate("abC", "abCabc", 2, "UTF8_BINARY", 0); - assertStringLocate("abC", "abCabc", 2, "UTF8_LCASE", 4); - assertStringLocate("abC", "abCabc", 2, "UNICODE", 0); - assertStringLocate("abC", "abCabc", 2, "UNICODE_CI", 4); - assertStringLocate("abc", "abcabc", 1, "UTF8_BINARY", 1); - assertStringLocate("abc", "abcabc", 1, "UTF8_LCASE", 1); - assertStringLocate("abc", "abcabc", 1, "UNICODE", 1); - assertStringLocate("abc", "abcabc", 1, "UNICODE_CI", 1); - assertStringLocate("abc", "abcabc", 2, "UTF8_BINARY", 4); - assertStringLocate("abc", "abcabc", 2, "UTF8_LCASE", 4); - assertStringLocate("abc", "abcabc", 2, "UNICODE", 4); - assertStringLocate("abc", "abcabc", 2, "UNICODE_CI", 4); - assertStringLocate("abc", "abcabc", 3, "UTF8_BINARY", 4); - assertStringLocate("abc", "abcabc", 3, "UTF8_LCASE", 4); - assertStringLocate("abc", "abcabc", 3, "UNICODE", 4); - assertStringLocate("abc", "abcabc", 3, "UNICODE_CI", 4); - assertStringLocate("abc", "abcabc", 4, "UTF8_BINARY", 4); - assertStringLocate("abc", "abcabc", 4, "UTF8_LCASE", 4); - assertStringLocate("abc", "abcabc", 4, "UNICODE", 4); - assertStringLocate("abc", "abcabc", 4, "UNICODE_CI", 4); - assertStringLocate("aa", "Aaads", 1, "UTF8_BINARY", 2); - assertStringLocate("aa", "Aaads", 1, "UTF8_LCASE", 1); - assertStringLocate("aa", "Aaads", 1, "UNICODE", 2); - assertStringLocate("aa", "Aaads", 1, "UNICODE_CI", 1); + assertStringLocate("aa", "aaads", 1, UTF8_BINARY, 1); + assertStringLocate("aa", "aaads", 1, UTF8_LCASE, 1); + assertStringLocate("aa", "aaads", 1, UNICODE, 1); + assertStringLocate("aa", "aaads", 1, UNICODE_CI, 1); + assertStringLocate("aa", "aaads", 2, UTF8_BINARY, 2); + assertStringLocate("aa", "aaads", 2, UTF8_LCASE, 2); + assertStringLocate("aa", "aaads", 2, UNICODE, 2); + assertStringLocate("aa", "aaads", 2, UNICODE_CI, 2); + assertStringLocate("aa", "aaads", 3, UTF8_BINARY, 0); + assertStringLocate("aa", "aaads", 3, UTF8_LCASE, 0); + assertStringLocate("aa", "aaads", 3, UNICODE, 0); + assertStringLocate("aa", "aaads", 3, UNICODE_CI, 0); + assertStringLocate("Aa", "aaads", 1, UTF8_BINARY, 0); + assertStringLocate("Aa", "aaads", 1, UTF8_LCASE, 1); + assertStringLocate("Aa", "aaads", 1, UNICODE, 0); + assertStringLocate("Aa", "aaads", 1, UNICODE_CI, 1); + assertStringLocate("Aa", "aaads", 2, UTF8_BINARY, 0); + assertStringLocate("Aa", "aaads", 2, UTF8_LCASE, 2); + assertStringLocate("Aa", "aaads", 2, UNICODE, 0); + assertStringLocate("Aa", "aaads", 2, UNICODE_CI, 2); + assertStringLocate("Aa", "aaads", 3, UTF8_BINARY, 0); + assertStringLocate("Aa", "aaads", 3, UTF8_LCASE, 0); + assertStringLocate("Aa", "aaads", 3, UNICODE, 0); + assertStringLocate("Aa", "aaads", 3, UNICODE_CI, 0); + assertStringLocate("Aa", "aAads", 1, UTF8_BINARY, 2); + assertStringLocate("Aa", "aAads", 1, UTF8_LCASE, 1); + assertStringLocate("Aa", "aAads", 1, UNICODE, 2); + assertStringLocate("Aa", "aAads", 1, UNICODE_CI, 1); + assertStringLocate("AA", "aaads", 1, UTF8_BINARY, 0); + assertStringLocate("AA", "aaads", 1, UTF8_LCASE, 1); + assertStringLocate("AA", "aaads", 1, UNICODE, 0); + assertStringLocate("AA", "aaads", 1, UNICODE_CI, 1); + assertStringLocate("aa", "aAads", 2, UTF8_BINARY, 0); + assertStringLocate("aa", "aAads", 2, UTF8_LCASE, 2); + assertStringLocate("aa", "aAads", 2, UNICODE, 0); + assertStringLocate("aa", "aAads", 2, UNICODE_CI, 2); + assertStringLocate("aa", "aaAds", 3, UTF8_BINARY, 0); + assertStringLocate("aa", "aaAds", 3, UTF8_LCASE, 0); + assertStringLocate("aa", "aaAds", 3, UNICODE, 0); + assertStringLocate("aa", "aaAds", 3, UNICODE_CI, 0); + assertStringLocate("abC", "abcabc", 1, UTF8_BINARY, 0); + assertStringLocate("abC", "abcabc", 1, UTF8_LCASE, 1); + assertStringLocate("abC", "abcabc", 1, UNICODE, 0); + assertStringLocate("abC", "abcabc", 1, UNICODE_CI, 1); + assertStringLocate("abC", "abCabc", 2, UTF8_BINARY, 0); + assertStringLocate("abC", "abCabc", 2, UTF8_LCASE, 4); + assertStringLocate("abC", "abCabc", 2, UNICODE, 0); + assertStringLocate("abC", "abCabc", 2, UNICODE_CI, 4); + assertStringLocate("abc", "abcabc", 1, UTF8_BINARY, 1); + assertStringLocate("abc", "abcabc", 1, UTF8_LCASE, 1); + assertStringLocate("abc", "abcabc", 1, UNICODE, 1); + assertStringLocate("abc", "abcabc", 1, UNICODE_CI, 1); + assertStringLocate("abc", "abcabc", 2, UTF8_BINARY, 4); + assertStringLocate("abc", "abcabc", 2, UTF8_LCASE, 4); + assertStringLocate("abc", "abcabc", 2, UNICODE, 4); + assertStringLocate("abc", "abcabc", 2, UNICODE_CI, 4); + assertStringLocate("abc", "abcabc", 3, UTF8_BINARY, 4); + assertStringLocate("abc", "abcabc", 3, UTF8_LCASE, 4); + assertStringLocate("abc", "abcabc", 3, UNICODE, 4); + assertStringLocate("abc", "abcabc", 3, UNICODE_CI, 4); + assertStringLocate("abc", "abcabc", 4, UTF8_BINARY, 4); + assertStringLocate("abc", "abcabc", 4, UTF8_LCASE, 4); + assertStringLocate("abc", "abcabc", 4, UNICODE, 4); + assertStringLocate("abc", "abcabc", 4, UNICODE_CI, 4); + assertStringLocate("aa", "Aaads", 1, UTF8_BINARY, 2); + assertStringLocate("aa", "Aaads", 1, UTF8_LCASE, 1); + assertStringLocate("aa", "Aaads", 1, UNICODE, 2); + assertStringLocate("aa", "Aaads", 1, UNICODE_CI, 1); assertStringLocate("ćČ", "CćČČćCČĆČcČcććČč", 3, "SR", 14); assertStringLocate("ćČ", "CćČČćCČĆČcČcććČč", 3, "SR_CI_AI", 3); // Advanced tests. - assertStringLocate("界x", "test大千世界X大千世界", 1, "UTF8_BINARY", 0); - assertStringLocate("界X", "test大千世界X大千世界", 1, "UTF8_BINARY", 8); - assertStringLocate("界", "test大千世界X大千世界", 13, "UTF8_BINARY", 13); - assertStringLocate("界x", "test大千世界X大千世界", 1, "UTF8_LCASE", 8); - assertStringLocate("界X", "test大千世界Xtest大千世界", 1, "UTF8_LCASE", 8); - assertStringLocate("界", "test大千世界X大千世界", 13, "UTF8_LCASE", 13); - assertStringLocate("大千", "test大千世界大千世界", 1, "UTF8_LCASE", 5); - assertStringLocate("大千", "test大千世界大千世界", 9, "UTF8_LCASE", 9); - assertStringLocate("大千", "大千世界大千世界", 1, "UTF8_LCASE", 1); - assertStringLocate("界x", "test大千世界X大千世界", 1, "UNICODE", 0); - assertStringLocate("界X", "test大千世界X大千世界", 1, "UNICODE", 8); - assertStringLocate("界", "test大千世界X大千世界", 13, "UNICODE", 13); - assertStringLocate("界x", "test大千世界X大千世界", 1, "UNICODE_CI", 8); - assertStringLocate("界", "test大千世界X大千世界", 13, "UNICODE_CI", 13); - assertStringLocate("大千", "test大千世界大千世界", 1, "UNICODE_CI", 5); - assertStringLocate("大千", "test大千世界大千世界", 9, "UNICODE_CI", 9); - assertStringLocate("大千", "大千世界大千世界", 1, "UNICODE_CI", 1); + assertStringLocate("界x", "test大千世界X大千世界", 1, UTF8_BINARY, 0); + assertStringLocate("界X", "test大千世界X大千世界", 1, UTF8_BINARY, 8); + assertStringLocate("界", "test大千世界X大千世界", 13, UTF8_BINARY, 13); + assertStringLocate("界x", "test大千世界X大千世界", 1, UTF8_LCASE, 8); + assertStringLocate("界X", "test大千世界Xtest大千世界", 1, UTF8_LCASE, 8); + assertStringLocate("界", "test大千世界X大千世界", 13, UTF8_LCASE, 13); + assertStringLocate("大千", "test大千世界大千世界", 1, UTF8_LCASE, 5); + assertStringLocate("大千", "test大千世界大千世界", 9, UTF8_LCASE, 9); + assertStringLocate("大千", "大千世界大千世界", 1, UTF8_LCASE, 1); + assertStringLocate("界x", "test大千世界X大千世界", 1, UNICODE, 0); + assertStringLocate("界X", "test大千世界X大千世界", 1, UNICODE, 8); + assertStringLocate("界", "test大千世界X大千世界", 13, UNICODE, 13); + assertStringLocate("界x", "test大千世界X大千世界", 1, UNICODE_CI, 8); + assertStringLocate("界", "test大千世界X大千世界", 13, UNICODE_CI, 13); + assertStringLocate("大千", "test大千世界大千世界", 1, UNICODE_CI, 5); + assertStringLocate("大千", "test大千世界大千世界", 9, UNICODE_CI, 9); + assertStringLocate("大千", "大千世界大千世界", 1, UNICODE_CI, 1); // One-to-many case mapping (e.g. Turkish dotted I). - assertStringLocate("\u0307", "i\u0307", 1, "UTF8_BINARY", 2); - assertStringLocate("\u0307", "İ", 1, "UTF8_LCASE", 0); // != UTF8_BINARY - assertStringLocate("i", "i\u0307", 1, "UNICODE_CI", 0); - assertStringLocate("\u0307", "i\u0307", 1, "UNICODE_CI", 0); - assertStringLocate("i\u0307", "i", 1, "UNICODE_CI", 0); - assertStringLocate("İ", "i\u0307", 1, "UNICODE_CI", 1); - assertStringLocate("İ", "i", 1, "UNICODE_CI", 0); - assertStringLocate("i", "i\u0307", 1, "UTF8_LCASE", 1); // != UNICODE_CI - assertStringLocate("\u0307", "i\u0307", 1, "UTF8_LCASE", 2); // != UNICODE_CI - assertStringLocate("i\u0307", "i", 1, "UTF8_LCASE", 0); - assertStringLocate("İ", "i\u0307", 1, "UTF8_LCASE", 1); - assertStringLocate("İ", "i", 1, "UTF8_LCASE", 0); - assertStringLocate("i\u0307o", "İo世界大千世界", 1, "UNICODE_CI", 1); - assertStringLocate("i\u0307o", "大千İo世界大千世界", 1, "UNICODE_CI", 3); - assertStringLocate("i\u0307o", "世界İo大千世界大千İo", 4, "UNICODE_CI", 11); - assertStringLocate("İo", "i̇o世界大千世界", 1, "UNICODE_CI", 1); - assertStringLocate("İo", "大千i̇o世界大千世界", 1, "UNICODE_CI", 3); - assertStringLocate("İo", "世界i̇o大千世界大千i̇o", 4, "UNICODE_CI", 12); + assertStringLocate("\u0307", "i\u0307", 1, UTF8_BINARY, 2); + assertStringLocate("\u0307", "İ", 1, UTF8_LCASE, 0); // != UTF8_BINARY + assertStringLocate("i", "i\u0307", 1, UNICODE_CI, 0); + assertStringLocate("\u0307", "i\u0307", 1, UNICODE_CI, 0); + assertStringLocate("i\u0307", "i", 1, UNICODE_CI, 0); + assertStringLocate("İ", "i\u0307", 1, UNICODE_CI, 1); + assertStringLocate("İ", "i", 1, UNICODE_CI, 0); + assertStringLocate("i", "i\u0307", 1, UTF8_LCASE, 1); // != UNICODE_CI + assertStringLocate("\u0307", "i\u0307", 1, UTF8_LCASE, 2); // != UNICODE_CI + assertStringLocate("i\u0307", "i", 1, UTF8_LCASE, 0); + assertStringLocate("İ", "i\u0307", 1, UTF8_LCASE, 1); + assertStringLocate("İ", "i", 1, UTF8_LCASE, 0); + assertStringLocate("i\u0307o", "İo世界大千世界", 1, UNICODE_CI, 1); + assertStringLocate("i\u0307o", "大千İo世界大千世界", 1, UNICODE_CI, 3); + assertStringLocate("i\u0307o", "世界İo大千世界大千İo", 4, UNICODE_CI, 11); + assertStringLocate("İo", "i̇o世界大千世界", 1, UNICODE_CI, 1); + assertStringLocate("İo", "大千i̇o世界大千世界", 1, UNICODE_CI, 3); + assertStringLocate("İo", "世界i̇o大千世界大千i̇o", 4, UNICODE_CI, 12); // Conditional case mapping (e.g. Greek sigmas). - assertStringLocate("σ", "σ", 1, "UTF8_BINARY", 1); - assertStringLocate("σ", "ς", 1, "UTF8_BINARY", 0); - assertStringLocate("σ", "Σ", 1, "UTF8_BINARY", 0); - assertStringLocate("ς", "σ", 1, "UTF8_BINARY", 0); - assertStringLocate("ς", "ς", 1, "UTF8_BINARY", 1); - assertStringLocate("ς", "Σ", 1, "UTF8_BINARY", 0); - assertStringLocate("Σ", "σ", 1, "UTF8_BINARY", 0); - assertStringLocate("Σ", "ς", 1, "UTF8_BINARY", 0); - assertStringLocate("Σ", "Σ", 1, "UTF8_BINARY", 1); - assertStringLocate("σ", "σ", 1, "UTF8_LCASE", 1); - assertStringLocate("σ", "ς", 1, "UTF8_LCASE", 1); - assertStringLocate("σ", "Σ", 1, "UTF8_LCASE", 1); - assertStringLocate("ς", "σ", 1, "UTF8_LCASE", 1); - assertStringLocate("ς", "ς", 1, "UTF8_LCASE", 1); - assertStringLocate("ς", "Σ", 1, "UTF8_LCASE", 1); - assertStringLocate("Σ", "σ", 1, "UTF8_LCASE", 1); - assertStringLocate("Σ", "ς", 1, "UTF8_LCASE", 1); - assertStringLocate("Σ", "Σ", 1, "UTF8_LCASE", 1); - assertStringLocate("σ", "σ", 1, "UNICODE", 1); - assertStringLocate("σ", "ς", 1, "UNICODE", 0); - assertStringLocate("σ", "Σ", 1, "UNICODE", 0); - assertStringLocate("ς", "σ", 1, "UNICODE", 0); - assertStringLocate("ς", "ς", 1, "UNICODE", 1); - assertStringLocate("ς", "Σ", 1, "UNICODE", 0); - assertStringLocate("Σ", "σ", 1, "UNICODE", 0); - assertStringLocate("Σ", "ς", 1, "UNICODE", 0); - assertStringLocate("Σ", "Σ", 1, "UNICODE", 1); - assertStringLocate("σ", "σ", 1, "UNICODE_CI", 1); - assertStringLocate("σ", "ς", 1, "UNICODE_CI", 1); - assertStringLocate("σ", "Σ", 1, "UNICODE_CI", 1); - assertStringLocate("ς", "σ", 1, "UNICODE_CI", 1); - assertStringLocate("ς", "ς", 1, "UNICODE_CI", 1); - assertStringLocate("ς", "Σ", 1, "UNICODE_CI", 1); - assertStringLocate("Σ", "σ", 1, "UNICODE_CI", 1); - assertStringLocate("Σ", "ς", 1, "UNICODE_CI", 1); - assertStringLocate("Σ", "Σ", 1, "UNICODE_CI", 1); + assertStringLocate("σ", "σ", 1, UTF8_BINARY, 1); + assertStringLocate("σ", "ς", 1, UTF8_BINARY, 0); + assertStringLocate("σ", "Σ", 1, UTF8_BINARY, 0); + assertStringLocate("ς", "σ", 1, UTF8_BINARY, 0); + assertStringLocate("ς", "ς", 1, UTF8_BINARY, 1); + assertStringLocate("ς", "Σ", 1, UTF8_BINARY, 0); + assertStringLocate("Σ", "σ", 1, UTF8_BINARY, 0); + assertStringLocate("Σ", "ς", 1, UTF8_BINARY, 0); + assertStringLocate("Σ", "Σ", 1, UTF8_BINARY, 1); + assertStringLocate("σ", "σ", 1, UTF8_LCASE, 1); + assertStringLocate("σ", "ς", 1, UTF8_LCASE, 1); + assertStringLocate("σ", "Σ", 1, UTF8_LCASE, 1); + assertStringLocate("ς", "σ", 1, UTF8_LCASE, 1); + assertStringLocate("ς", "ς", 1, UTF8_LCASE, 1); + assertStringLocate("ς", "Σ", 1, UTF8_LCASE, 1); + assertStringLocate("Σ", "σ", 1, UTF8_LCASE, 1); + assertStringLocate("Σ", "ς", 1, UTF8_LCASE, 1); + assertStringLocate("Σ", "Σ", 1, UTF8_LCASE, 1); + assertStringLocate("σ", "σ", 1, UNICODE, 1); + assertStringLocate("σ", "ς", 1, UNICODE, 0); + assertStringLocate("σ", "Σ", 1, UNICODE, 0); + assertStringLocate("ς", "σ", 1, UNICODE, 0); + assertStringLocate("ς", "ς", 1, UNICODE, 1); + assertStringLocate("ς", "Σ", 1, UNICODE, 0); + assertStringLocate("Σ", "σ", 1, UNICODE, 0); + assertStringLocate("Σ", "ς", 1, UNICODE, 0); + assertStringLocate("Σ", "Σ", 1, UNICODE, 1); + assertStringLocate("σ", "σ", 1, UNICODE_CI, 1); + assertStringLocate("σ", "ς", 1, UNICODE_CI, 1); + assertStringLocate("σ", "Σ", 1, UNICODE_CI, 1); + assertStringLocate("ς", "σ", 1, UNICODE_CI, 1); + assertStringLocate("ς", "ς", 1, UNICODE_CI, 1); + assertStringLocate("ς", "Σ", 1, UNICODE_CI, 1); + assertStringLocate("Σ", "σ", 1, UNICODE_CI, 1); + assertStringLocate("Σ", "ς", 1, UNICODE_CI, 1); + assertStringLocate("Σ", "Σ", 1, UNICODE_CI, 1); // Surrogate pairs. - assertStringLocate("a", "a🙃b", 1, "UTF8_BINARY", 1); - assertStringLocate("a", "a🙃b", 1, "UTF8_LCASE", 1); - assertStringLocate("a", "a🙃b", 1, "UNICODE", 1); - assertStringLocate("a", "a🙃b", 1, "UNICODE_CI", 1); - assertStringLocate("a", "a🙃b", 2, "UTF8_BINARY", 0); - assertStringLocate("a", "a🙃b", 2, "UTF8_LCASE", 0); - assertStringLocate("a", "a🙃b", 2, "UNICODE", 0); - assertStringLocate("a", "a🙃b", 2, "UNICODE_CI", 0); - assertStringLocate("a", "a🙃b", 3, "UTF8_BINARY", 0); - assertStringLocate("a", "a🙃b", 3, "UTF8_LCASE", 0); - assertStringLocate("a", "a🙃b", 3, "UNICODE", 0); - assertStringLocate("a", "a🙃b", 3, "UNICODE_CI", 0); - assertStringLocate("🙃", "a🙃b", 1, "UTF8_BINARY", 2); - assertStringLocate("🙃", "a🙃b", 1, "UTF8_LCASE", 2); - assertStringLocate("🙃", "a🙃b", 1, "UNICODE", 2); - assertStringLocate("🙃", "a🙃b", 1, "UNICODE_CI", 2); - assertStringLocate("🙃", "a🙃b", 2, "UTF8_BINARY", 2); - assertStringLocate("🙃", "a🙃b", 2, "UTF8_LCASE", 2); - assertStringLocate("🙃", "a🙃b", 2, "UNICODE", 2); - assertStringLocate("🙃", "a🙃b", 2, "UNICODE_CI", 2); - assertStringLocate("🙃", "a🙃b", 3, "UTF8_BINARY", 0); - assertStringLocate("🙃", "a🙃b", 3, "UTF8_LCASE", 0); - assertStringLocate("🙃", "a🙃b", 3, "UNICODE", 0); - assertStringLocate("🙃", "a🙃b", 3, "UNICODE_CI", 0); - assertStringLocate("b", "a🙃b", 1, "UTF8_BINARY", 3); - assertStringLocate("b", "a🙃b", 1, "UTF8_LCASE", 3); - assertStringLocate("b", "a🙃b", 1, "UNICODE", 3); - assertStringLocate("b", "a🙃b", 1, "UNICODE_CI", 3); - assertStringLocate("b", "a🙃b", 2, "UTF8_BINARY", 3); - assertStringLocate("b", "a🙃b", 2, "UTF8_LCASE", 3); - assertStringLocate("b", "a🙃b", 2, "UNICODE", 3); - assertStringLocate("b", "a🙃b", 2, "UNICODE_CI", 3); - assertStringLocate("b", "a🙃b", 3, "UTF8_BINARY", 3); - assertStringLocate("b", "a🙃b", 3, "UTF8_LCASE", 3); - assertStringLocate("b", "a🙃b", 3, "UNICODE", 3); - assertStringLocate("b", "a🙃b", 3, "UNICODE_CI", 3); - assertStringLocate("🙃", "a🙃🙃b", 1, "UTF8_BINARY", 2); - assertStringLocate("🙃", "a🙃🙃b", 1, "UTF8_LCASE", 2); - assertStringLocate("🙃", "a🙃🙃b", 1, "UNICODE", 2); - assertStringLocate("🙃", "a🙃🙃b", 1, "UNICODE_CI", 2); - assertStringLocate("🙃", "a🙃🙃b", 2, "UTF8_BINARY", 2); - assertStringLocate("🙃", "a🙃🙃b", 2, "UTF8_LCASE", 2); - assertStringLocate("🙃", "a🙃🙃b", 2, "UNICODE", 2); - assertStringLocate("🙃", "a🙃🙃b", 2, "UNICODE_CI", 2); - assertStringLocate("🙃", "a🙃🙃b", 3, "UTF8_BINARY", 3); - assertStringLocate("🙃", "a🙃🙃b", 3, "UTF8_LCASE", 3); - assertStringLocate("🙃", "a🙃🙃b", 3, "UNICODE", 3); - assertStringLocate("🙃", "a🙃🙃b", 3, "UNICODE_CI", 3); - assertStringLocate("🙃", "a🙃🙃b", 4, "UTF8_BINARY", 0); - assertStringLocate("🙃", "a🙃🙃b", 4, "UTF8_LCASE", 0); - assertStringLocate("🙃", "a🙃🙃b", 4, "UNICODE", 0); - assertStringLocate("🙃", "a🙃🙃b", 4, "UNICODE_CI", 0); - assertStringLocate("b", "a🙃🙃b", 1, "UTF8_BINARY", 4); - assertStringLocate("b", "a🙃🙃b", 1, "UTF8_LCASE", 4); - assertStringLocate("b", "a🙃🙃b", 1, "UNICODE", 4); - assertStringLocate("b", "a🙃🙃b", 1, "UNICODE_CI", 4); - assertStringLocate("b", "a🙃🙃b", 2, "UTF8_BINARY", 4); - assertStringLocate("b", "a🙃🙃b", 2, "UTF8_LCASE", 4); - assertStringLocate("b", "a🙃🙃b", 2, "UNICODE", 4); - assertStringLocate("b", "a🙃🙃b", 2, "UNICODE_CI", 4); - assertStringLocate("b", "a🙃🙃b", 3, "UTF8_BINARY", 4); - assertStringLocate("b", "a🙃🙃b", 3, "UTF8_LCASE", 4); - assertStringLocate("b", "a🙃🙃b", 3, "UNICODE", 4); - assertStringLocate("b", "a🙃🙃b", 3, "UNICODE_CI", 4); - assertStringLocate("b", "a🙃🙃b", 4, "UTF8_BINARY", 4); - assertStringLocate("b", "a🙃🙃b", 4, "UTF8_LCASE", 4); - assertStringLocate("b", "a🙃🙃b", 4, "UNICODE", 4); - assertStringLocate("b", "a🙃🙃b", 4, "UNICODE_CI", 4); - assertStringLocate("b", "a🙃x🙃b", 1, "UTF8_BINARY", 5); - assertStringLocate("b", "a🙃x🙃b", 1, "UTF8_LCASE", 5); - assertStringLocate("b", "a🙃x🙃b", 1, "UNICODE", 5); - assertStringLocate("b", "a🙃x🙃b", 1, "UNICODE_CI", 5); - assertStringLocate("b", "a🙃x🙃b", 2, "UTF8_BINARY", 5); - assertStringLocate("b", "a🙃x🙃b", 2, "UTF8_LCASE", 5); - assertStringLocate("b", "a🙃x🙃b", 2, "UNICODE", 5); - assertStringLocate("b", "a🙃x🙃b", 2, "UNICODE_CI", 5); - assertStringLocate("b", "a🙃x🙃b", 3, "UTF8_BINARY", 5); - assertStringLocate("b", "a🙃x🙃b", 3, "UTF8_LCASE", 5); - assertStringLocate("b", "a🙃x🙃b", 3, "UNICODE", 5); - assertStringLocate("b", "a🙃x🙃b", 3, "UNICODE_CI", 5); - assertStringLocate("b", "a🙃x🙃b", 4, "UTF8_BINARY", 5); - assertStringLocate("b", "a🙃x🙃b", 4, "UTF8_LCASE", 5); - assertStringLocate("b", "a🙃x🙃b", 4, "UNICODE", 5); - assertStringLocate("b", "a🙃x🙃b", 4, "UNICODE_CI", 5); + assertStringLocate("a", "a🙃b", 1, UTF8_BINARY, 1); + assertStringLocate("a", "a🙃b", 1, UTF8_LCASE, 1); + assertStringLocate("a", "a🙃b", 1, UNICODE, 1); + assertStringLocate("a", "a🙃b", 1, UNICODE_CI, 1); + assertStringLocate("a", "a🙃b", 2, UTF8_BINARY, 0); + assertStringLocate("a", "a🙃b", 2, UTF8_LCASE, 0); + assertStringLocate("a", "a🙃b", 2, UNICODE, 0); + assertStringLocate("a", "a🙃b", 2, UNICODE_CI, 0); + assertStringLocate("a", "a🙃b", 3, UTF8_BINARY, 0); + assertStringLocate("a", "a🙃b", 3, UTF8_LCASE, 0); + assertStringLocate("a", "a🙃b", 3, UNICODE, 0); + assertStringLocate("a", "a🙃b", 3, UNICODE_CI, 0); + assertStringLocate("🙃", "a🙃b", 1, UTF8_BINARY, 2); + assertStringLocate("🙃", "a🙃b", 1, UTF8_LCASE, 2); + assertStringLocate("🙃", "a🙃b", 1, UNICODE, 2); + assertStringLocate("🙃", "a🙃b", 1, UNICODE_CI, 2); + assertStringLocate("🙃", "a🙃b", 2, UTF8_BINARY, 2); + assertStringLocate("🙃", "a🙃b", 2, UTF8_LCASE, 2); + assertStringLocate("🙃", "a🙃b", 2, UNICODE, 2); + assertStringLocate("🙃", "a🙃b", 2, UNICODE_CI, 2); + assertStringLocate("🙃", "a🙃b", 3, UTF8_BINARY, 0); + assertStringLocate("🙃", "a🙃b", 3, UTF8_LCASE, 0); + assertStringLocate("🙃", "a🙃b", 3, UNICODE, 0); + assertStringLocate("🙃", "a🙃b", 3, UNICODE_CI, 0); + assertStringLocate("b", "a🙃b", 1, UTF8_BINARY, 3); + assertStringLocate("b", "a🙃b", 1, UTF8_LCASE, 3); + assertStringLocate("b", "a🙃b", 1, UNICODE, 3); + assertStringLocate("b", "a🙃b", 1, UNICODE_CI, 3); + assertStringLocate("b", "a🙃b", 2, UTF8_BINARY, 3); + assertStringLocate("b", "a🙃b", 2, UTF8_LCASE, 3); + assertStringLocate("b", "a🙃b", 2, UNICODE, 3); + assertStringLocate("b", "a🙃b", 2, UNICODE_CI, 3); + assertStringLocate("b", "a🙃b", 3, UTF8_BINARY, 3); + assertStringLocate("b", "a🙃b", 3, UTF8_LCASE, 3); + assertStringLocate("b", "a🙃b", 3, UNICODE, 3); + assertStringLocate("b", "a🙃b", 3, UNICODE_CI, 3); + assertStringLocate("🙃", "a🙃🙃b", 1, UTF8_BINARY, 2); + assertStringLocate("🙃", "a🙃🙃b", 1, UTF8_LCASE, 2); + assertStringLocate("🙃", "a🙃🙃b", 1, UNICODE, 2); + assertStringLocate("🙃", "a🙃🙃b", 1, UNICODE_CI, 2); + assertStringLocate("🙃", "a🙃🙃b", 2, UTF8_BINARY, 2); + assertStringLocate("🙃", "a🙃🙃b", 2, UTF8_LCASE, 2); + assertStringLocate("🙃", "a🙃🙃b", 2, UNICODE, 2); + assertStringLocate("🙃", "a🙃🙃b", 2, UNICODE_CI, 2); + assertStringLocate("🙃", "a🙃🙃b", 3, UTF8_BINARY, 3); + assertStringLocate("🙃", "a🙃🙃b", 3, UTF8_LCASE, 3); + assertStringLocate("🙃", "a🙃🙃b", 3, UNICODE, 3); + assertStringLocate("🙃", "a🙃🙃b", 3, UNICODE_CI, 3); + assertStringLocate("🙃", "a🙃🙃b", 4, UTF8_BINARY, 0); + assertStringLocate("🙃", "a🙃🙃b", 4, UTF8_LCASE, 0); + assertStringLocate("🙃", "a🙃🙃b", 4, UNICODE, 0); + assertStringLocate("🙃", "a🙃🙃b", 4, UNICODE_CI, 0); + assertStringLocate("b", "a🙃🙃b", 1, UTF8_BINARY, 4); + assertStringLocate("b", "a🙃🙃b", 1, UTF8_LCASE, 4); + assertStringLocate("b", "a🙃🙃b", 1, UNICODE, 4); + assertStringLocate("b", "a🙃🙃b", 1, UNICODE_CI, 4); + assertStringLocate("b", "a🙃🙃b", 2, UTF8_BINARY, 4); + assertStringLocate("b", "a🙃🙃b", 2, UTF8_LCASE, 4); + assertStringLocate("b", "a🙃🙃b", 2, UNICODE, 4); + assertStringLocate("b", "a🙃🙃b", 2, UNICODE_CI, 4); + assertStringLocate("b", "a🙃🙃b", 3, UTF8_BINARY, 4); + assertStringLocate("b", "a🙃🙃b", 3, UTF8_LCASE, 4); + assertStringLocate("b", "a🙃🙃b", 3, UNICODE, 4); + assertStringLocate("b", "a🙃🙃b", 3, UNICODE_CI, 4); + assertStringLocate("b", "a🙃🙃b", 4, UTF8_BINARY, 4); + assertStringLocate("b", "a🙃🙃b", 4, UTF8_LCASE, 4); + assertStringLocate("b", "a🙃🙃b", 4, UNICODE, 4); + assertStringLocate("b", "a🙃🙃b", 4, UNICODE_CI, 4); + assertStringLocate("b", "a🙃x🙃b", 1, UTF8_BINARY, 5); + assertStringLocate("b", "a🙃x🙃b", 1, UTF8_LCASE, 5); + assertStringLocate("b", "a🙃x🙃b", 1, UNICODE, 5); + assertStringLocate("b", "a🙃x🙃b", 1, UNICODE_CI, 5); + assertStringLocate("b", "a🙃x🙃b", 2, UTF8_BINARY, 5); + assertStringLocate("b", "a🙃x🙃b", 2, UTF8_LCASE, 5); + assertStringLocate("b", "a🙃x🙃b", 2, UNICODE, 5); + assertStringLocate("b", "a🙃x🙃b", 2, UNICODE_CI, 5); + assertStringLocate("b", "a🙃x🙃b", 3, UTF8_BINARY, 5); + assertStringLocate("b", "a🙃x🙃b", 3, UTF8_LCASE, 5); + assertStringLocate("b", "a🙃x🙃b", 3, UNICODE, 5); + assertStringLocate("b", "a🙃x🙃b", 3, UNICODE_CI, 5); + assertStringLocate("b", "a🙃x🙃b", 4, UTF8_BINARY, 5); + assertStringLocate("b", "a🙃x🙃b", 4, UTF8_LCASE, 5); + assertStringLocate("b", "a🙃x🙃b", 4, UNICODE, 5); + assertStringLocate("b", "a🙃x🙃b", 4, UNICODE_CI, 5); // Out of bounds test cases. - assertStringLocate("a", "asd", 4, "UTF8_BINARY", 0); - assertStringLocate("a", "asd", 4, "UTF8_LCASE", 0); - assertStringLocate("a", "asd", 4, "UNICODE", 0); - assertStringLocate("a", "asd", 4, "UNICODE_CI", 0); - assertStringLocate("a", "asd", 100, "UTF8_BINARY", 0); - assertStringLocate("a", "asd", 100, "UTF8_LCASE", 0); - assertStringLocate("a", "asd", 100, "UNICODE", 0); - assertStringLocate("a", "asd", 100, "UNICODE_CI", 0); - assertStringLocate("a", "🙃🙃", 4, "UTF8_BINARY", 0); - assertStringLocate("a", "🙃🙃", 4, "UTF8_LCASE", 0); - assertStringLocate("a", "🙃🙃", 4, "UNICODE", 0); - assertStringLocate("a", "🙃🙃", 4, "UNICODE_CI", 0); - assertStringLocate("", "asd", 100, "UTF8_BINARY", 1); - assertStringLocate("", "asd", 100, "UTF8_LCASE", 1); - assertStringLocate("", "asd", 100, "UNICODE", 1); - assertStringLocate("", "asd", 100, "UNICODE_CI", 1); - assertStringLocate("asd", "", 100, "UTF8_BINARY", 0); - assertStringLocate("asd", "", 100, "UTF8_LCASE", 0); - assertStringLocate("asd", "", 100, "UNICODE", 0); - assertStringLocate("asd", "", 100, "UNICODE_CI", 0); + assertStringLocate("a", "asd", 4, UTF8_BINARY, 0); + assertStringLocate("a", "asd", 4, UTF8_LCASE, 0); + assertStringLocate("a", "asd", 4, UNICODE, 0); + assertStringLocate("a", "asd", 4, UNICODE_CI, 0); + assertStringLocate("a", "asd", 100, UTF8_BINARY, 0); + assertStringLocate("a", "asd", 100, UTF8_LCASE, 0); + assertStringLocate("a", "asd", 100, UNICODE, 0); + assertStringLocate("a", "asd", 100, UNICODE_CI, 0); + assertStringLocate("a", "🙃🙃", 4, UTF8_BINARY, 0); + assertStringLocate("a", "🙃🙃", 4, UTF8_LCASE, 0); + assertStringLocate("a", "🙃🙃", 4, UNICODE, 0); + assertStringLocate("a", "🙃🙃", 4, UNICODE_CI, 0); + assertStringLocate("", "asd", 100, UTF8_BINARY, 1); + assertStringLocate("", "asd", 100, UTF8_LCASE, 1); + assertStringLocate("", "asd", 100, UNICODE, 1); + assertStringLocate("", "asd", 100, UNICODE_CI, 1); + assertStringLocate("asd", "", 100, UTF8_BINARY, 0); + assertStringLocate("asd", "", 100, UTF8_LCASE, 0); + assertStringLocate("asd", "", 100, UNICODE, 0); + assertStringLocate("asd", "", 100, UNICODE_CI, 0); } /** @@ -2450,292 +2451,292 @@ private void assertSubstringIndex(String string, String delimiter, int count, @Test public void testSubstringIndex() throws SparkException { // Empty strings. - assertSubstringIndex("", "", 0, "UTF8_BINARY", ""); - assertSubstringIndex("", "", 0, "UTF8_LCASE", ""); - assertSubstringIndex("", "", 0, "UNICODE", ""); - assertSubstringIndex("", "", 0, "UNICODE_CI", ""); - assertSubstringIndex("", "", 1, "UTF8_BINARY", ""); - assertSubstringIndex("", "", 1, "UTF8_LCASE", ""); - assertSubstringIndex("", "", 1, "UNICODE", ""); - assertSubstringIndex("", "", 1, "UNICODE_CI", ""); - assertSubstringIndex("", "", -1, "UTF8_BINARY", ""); - assertSubstringIndex("", "", -1, "UTF8_LCASE", ""); - assertSubstringIndex("", "", -1, "UNICODE", ""); - assertSubstringIndex("", "", -1, "UNICODE_CI", ""); - assertSubstringIndex("", "x", 0, "UTF8_BINARY", ""); - assertSubstringIndex("", "x", 0, "UTF8_LCASE", ""); - assertSubstringIndex("", "x", 0, "UNICODE", ""); - assertSubstringIndex("", "x", 0, "UNICODE_CI", ""); - assertSubstringIndex("", "x", 1, "UTF8_BINARY", ""); - assertSubstringIndex("", "x", 1, "UTF8_LCASE", ""); - assertSubstringIndex("", "x", 1, "UNICODE", ""); - assertSubstringIndex("", "x", 1, "UNICODE_CI", ""); - assertSubstringIndex("", "x", -1, "UTF8_BINARY", ""); - assertSubstringIndex("", "x", -1, "UTF8_LCASE", ""); - assertSubstringIndex("", "x", -1, "UNICODE", ""); - assertSubstringIndex("", "x", -1, "UNICODE_CI", ""); - assertSubstringIndex("abc", "", 0, "UTF8_BINARY", ""); - assertSubstringIndex("abc", "", 0, "UTF8_LCASE", ""); - assertSubstringIndex("abc", "", 0, "UNICODE", ""); - assertSubstringIndex("abc", "", 0, "UNICODE_CI", ""); - assertSubstringIndex("abc", "", 1, "UTF8_BINARY", ""); - assertSubstringIndex("abc", "", 1, "UTF8_LCASE", ""); - assertSubstringIndex("abc", "", 1, "UNICODE", ""); - assertSubstringIndex("abc", "", 1, "UNICODE_CI", ""); - assertSubstringIndex("abc", "", -1, "UTF8_BINARY", ""); - assertSubstringIndex("abc", "", -1, "UTF8_LCASE", ""); - assertSubstringIndex("abc", "", -1, "UNICODE", ""); - assertSubstringIndex("abc", "", -1, "UNICODE_CI", ""); + assertSubstringIndex("", "", 0, UTF8_BINARY, ""); + assertSubstringIndex("", "", 0, UTF8_LCASE, ""); + assertSubstringIndex("", "", 0, UNICODE, ""); + assertSubstringIndex("", "", 0, UNICODE_CI, ""); + assertSubstringIndex("", "", 1, UTF8_BINARY, ""); + assertSubstringIndex("", "", 1, UTF8_LCASE, ""); + assertSubstringIndex("", "", 1, UNICODE, ""); + assertSubstringIndex("", "", 1, UNICODE_CI, ""); + assertSubstringIndex("", "", -1, UTF8_BINARY, ""); + assertSubstringIndex("", "", -1, UTF8_LCASE, ""); + assertSubstringIndex("", "", -1, UNICODE, ""); + assertSubstringIndex("", "", -1, UNICODE_CI, ""); + assertSubstringIndex("", "x", 0, UTF8_BINARY, ""); + assertSubstringIndex("", "x", 0, UTF8_LCASE, ""); + assertSubstringIndex("", "x", 0, UNICODE, ""); + assertSubstringIndex("", "x", 0, UNICODE_CI, ""); + assertSubstringIndex("", "x", 1, UTF8_BINARY, ""); + assertSubstringIndex("", "x", 1, UTF8_LCASE, ""); + assertSubstringIndex("", "x", 1, UNICODE, ""); + assertSubstringIndex("", "x", 1, UNICODE_CI, ""); + assertSubstringIndex("", "x", -1, UTF8_BINARY, ""); + assertSubstringIndex("", "x", -1, UTF8_LCASE, ""); + assertSubstringIndex("", "x", -1, UNICODE, ""); + assertSubstringIndex("", "x", -1, UNICODE_CI, ""); + assertSubstringIndex("abc", "", 0, UTF8_BINARY, ""); + assertSubstringIndex("abc", "", 0, UTF8_LCASE, ""); + assertSubstringIndex("abc", "", 0, UNICODE, ""); + assertSubstringIndex("abc", "", 0, UNICODE_CI, ""); + assertSubstringIndex("abc", "", 1, UTF8_BINARY, ""); + assertSubstringIndex("abc", "", 1, UTF8_LCASE, ""); + assertSubstringIndex("abc", "", 1, UNICODE, ""); + assertSubstringIndex("abc", "", 1, UNICODE_CI, ""); + assertSubstringIndex("abc", "", -1, UTF8_BINARY, ""); + assertSubstringIndex("abc", "", -1, UTF8_LCASE, ""); + assertSubstringIndex("abc", "", -1, UNICODE, ""); + assertSubstringIndex("abc", "", -1, UNICODE_CI, ""); // Basic tests. - assertSubstringIndex("axbxc", "a", 1, "UTF8_BINARY", ""); - assertSubstringIndex("axbxc", "a", 1, "UTF8_LCASE", ""); - assertSubstringIndex("axbxc", "a", 1, "UNICODE", ""); - assertSubstringIndex("axbxc", "a", 1, "UNICODE_CI", ""); - assertSubstringIndex("axbxc", "x", 1, "UTF8_BINARY", "a"); - assertSubstringIndex("axbxc", "x", 1, "UTF8_LCASE", "a"); - assertSubstringIndex("axbxc", "x", 1, "UNICODE", "a"); - assertSubstringIndex("axbxc", "x", 1, "UNICODE_CI", "a"); - assertSubstringIndex("axbxc", "b", 1, "UTF8_BINARY", "ax"); - assertSubstringIndex("axbxc", "b", 1, "UTF8_LCASE", "ax"); - assertSubstringIndex("axbxc", "b", 1, "UNICODE", "ax"); - assertSubstringIndex("axbxc", "b", 1, "UNICODE_CI", "ax"); - assertSubstringIndex("axbxc", "x", 2, "UTF8_BINARY", "axb"); - assertSubstringIndex("axbxc", "x", 2, "UTF8_LCASE", "axb"); - assertSubstringIndex("axbxc", "x", 2, "UNICODE", "axb"); - assertSubstringIndex("axbxc", "x", 2, "UNICODE_CI", "axb"); - assertSubstringIndex("axbxc", "c", 1, "UTF8_BINARY", "axbx"); - assertSubstringIndex("axbxc", "c", 1, "UTF8_LCASE", "axbx"); - assertSubstringIndex("axbxc", "c", 1, "UNICODE", "axbx"); - assertSubstringIndex("axbxc", "c", 1, "UNICODE_CI", "axbx"); - assertSubstringIndex("axbxc", "x", 3, "UTF8_BINARY", "axbxc"); - assertSubstringIndex("axbxc", "x", 3, "UTF8_LCASE", "axbxc"); - assertSubstringIndex("axbxc", "x", 3, "UNICODE", "axbxc"); - assertSubstringIndex("axbxc", "x", 3, "UNICODE_CI", "axbxc"); - assertSubstringIndex("axbxc", "d", 1, "UTF8_BINARY", "axbxc"); - assertSubstringIndex("axbxc", "d", 1, "UTF8_LCASE", "axbxc"); - assertSubstringIndex("axbxc", "d", 1, "UNICODE", "axbxc"); - assertSubstringIndex("axbxc", "d", 1, "UNICODE_CI", "axbxc"); - assertSubstringIndex("axbxc", "c", -1, "UTF8_BINARY", ""); - assertSubstringIndex("axbxc", "c", -1, "UTF8_LCASE", ""); - assertSubstringIndex("axbxc", "c", -1, "UNICODE", ""); - assertSubstringIndex("axbxc", "c", -1, "UNICODE_CI", ""); - assertSubstringIndex("axbxc", "x", -1, "UTF8_BINARY", "c"); - assertSubstringIndex("axbxc", "x", -1, "UTF8_LCASE", "c"); - assertSubstringIndex("axbxc", "x", -1, "UNICODE", "c"); - assertSubstringIndex("axbxc", "x", -1, "UNICODE_CI", "c"); - assertSubstringIndex("axbxc", "b", -1, "UTF8_BINARY", "xc"); - assertSubstringIndex("axbxc", "b", -1, "UTF8_LCASE", "xc"); - assertSubstringIndex("axbxc", "b", -1, "UNICODE", "xc"); - assertSubstringIndex("axbxc", "b", -1, "UNICODE_CI", "xc"); - assertSubstringIndex("axbxc", "x", -2, "UTF8_BINARY", "bxc"); - assertSubstringIndex("axbxc", "x", -2, "UTF8_LCASE", "bxc"); - assertSubstringIndex("axbxc", "x", -2, "UNICODE", "bxc"); - assertSubstringIndex("axbxc", "x", -2, "UNICODE_CI", "bxc"); - assertSubstringIndex("axbxc", "a", -1, "UTF8_BINARY", "xbxc"); - assertSubstringIndex("axbxc", "a", -1, "UTF8_LCASE", "xbxc"); - assertSubstringIndex("axbxc", "a", -1, "UNICODE", "xbxc"); - assertSubstringIndex("axbxc", "a", -1, "UNICODE_CI", "xbxc"); - assertSubstringIndex("axbxc", "x", -3, "UTF8_BINARY", "axbxc"); - assertSubstringIndex("axbxc", "x", -3, "UTF8_LCASE", "axbxc"); - assertSubstringIndex("axbxc", "x", -3, "UNICODE", "axbxc"); - assertSubstringIndex("axbxc", "x", -3, "UNICODE_CI", "axbxc"); - assertSubstringIndex("axbxc", "d", -1, "UTF8_BINARY", "axbxc"); - assertSubstringIndex("axbxc", "d", -1, "UTF8_LCASE", "axbxc"); - assertSubstringIndex("axbxc", "d", -1, "UNICODE", "axbxc"); - assertSubstringIndex("axbxc", "d", -1, "UNICODE_CI", "axbxc"); + assertSubstringIndex("axbxc", "a", 1, UTF8_BINARY, ""); + assertSubstringIndex("axbxc", "a", 1, UTF8_LCASE, ""); + assertSubstringIndex("axbxc", "a", 1, UNICODE, ""); + assertSubstringIndex("axbxc", "a", 1, UNICODE_CI, ""); + assertSubstringIndex("axbxc", "x", 1, UTF8_BINARY, "a"); + assertSubstringIndex("axbxc", "x", 1, UTF8_LCASE, "a"); + assertSubstringIndex("axbxc", "x", 1, UNICODE, "a"); + assertSubstringIndex("axbxc", "x", 1, UNICODE_CI, "a"); + assertSubstringIndex("axbxc", "b", 1, UTF8_BINARY, "ax"); + assertSubstringIndex("axbxc", "b", 1, UTF8_LCASE, "ax"); + assertSubstringIndex("axbxc", "b", 1, UNICODE, "ax"); + assertSubstringIndex("axbxc", "b", 1, UNICODE_CI, "ax"); + assertSubstringIndex("axbxc", "x", 2, UTF8_BINARY, "axb"); + assertSubstringIndex("axbxc", "x", 2, UTF8_LCASE, "axb"); + assertSubstringIndex("axbxc", "x", 2, UNICODE, "axb"); + assertSubstringIndex("axbxc", "x", 2, UNICODE_CI, "axb"); + assertSubstringIndex("axbxc", "c", 1, UTF8_BINARY, "axbx"); + assertSubstringIndex("axbxc", "c", 1, UTF8_LCASE, "axbx"); + assertSubstringIndex("axbxc", "c", 1, UNICODE, "axbx"); + assertSubstringIndex("axbxc", "c", 1, UNICODE_CI, "axbx"); + assertSubstringIndex("axbxc", "x", 3, UTF8_BINARY, "axbxc"); + assertSubstringIndex("axbxc", "x", 3, UTF8_LCASE, "axbxc"); + assertSubstringIndex("axbxc", "x", 3, UNICODE, "axbxc"); + assertSubstringIndex("axbxc", "x", 3, UNICODE_CI, "axbxc"); + assertSubstringIndex("axbxc", "d", 1, UTF8_BINARY, "axbxc"); + assertSubstringIndex("axbxc", "d", 1, UTF8_LCASE, "axbxc"); + assertSubstringIndex("axbxc", "d", 1, UNICODE, "axbxc"); + assertSubstringIndex("axbxc", "d", 1, UNICODE_CI, "axbxc"); + assertSubstringIndex("axbxc", "c", -1, UTF8_BINARY, ""); + assertSubstringIndex("axbxc", "c", -1, UTF8_LCASE, ""); + assertSubstringIndex("axbxc", "c", -1, UNICODE, ""); + assertSubstringIndex("axbxc", "c", -1, UNICODE_CI, ""); + assertSubstringIndex("axbxc", "x", -1, UTF8_BINARY, "c"); + assertSubstringIndex("axbxc", "x", -1, UTF8_LCASE, "c"); + assertSubstringIndex("axbxc", "x", -1, UNICODE, "c"); + assertSubstringIndex("axbxc", "x", -1, UNICODE_CI, "c"); + assertSubstringIndex("axbxc", "b", -1, UTF8_BINARY, "xc"); + assertSubstringIndex("axbxc", "b", -1, UTF8_LCASE, "xc"); + assertSubstringIndex("axbxc", "b", -1, UNICODE, "xc"); + assertSubstringIndex("axbxc", "b", -1, UNICODE_CI, "xc"); + assertSubstringIndex("axbxc", "x", -2, UTF8_BINARY, "bxc"); + assertSubstringIndex("axbxc", "x", -2, UTF8_LCASE, "bxc"); + assertSubstringIndex("axbxc", "x", -2, UNICODE, "bxc"); + assertSubstringIndex("axbxc", "x", -2, UNICODE_CI, "bxc"); + assertSubstringIndex("axbxc", "a", -1, UTF8_BINARY, "xbxc"); + assertSubstringIndex("axbxc", "a", -1, UTF8_LCASE, "xbxc"); + assertSubstringIndex("axbxc", "a", -1, UNICODE, "xbxc"); + assertSubstringIndex("axbxc", "a", -1, UNICODE_CI, "xbxc"); + assertSubstringIndex("axbxc", "x", -3, UTF8_BINARY, "axbxc"); + assertSubstringIndex("axbxc", "x", -3, UTF8_LCASE, "axbxc"); + assertSubstringIndex("axbxc", "x", -3, UNICODE, "axbxc"); + assertSubstringIndex("axbxc", "x", -3, UNICODE_CI, "axbxc"); + assertSubstringIndex("axbxc", "d", -1, UTF8_BINARY, "axbxc"); + assertSubstringIndex("axbxc", "d", -1, UTF8_LCASE, "axbxc"); + assertSubstringIndex("axbxc", "d", -1, UNICODE, "axbxc"); + assertSubstringIndex("axbxc", "d", -1, UNICODE_CI, "axbxc"); // Advanced tests. - assertSubstringIndex("wwwgapachegorg", "g", -3, "UTF8_BINARY", "apachegorg"); - assertSubstringIndex("www||apache||org", "||", 2, "UTF8_BINARY", "www||apache"); - assertSubstringIndex("aaaaaaaaaa", "aa", 2, "UTF8_BINARY", "a"); - assertSubstringIndex("AaAaAaAaAa", "aa", 2, "UTF8_LCASE", "A"); - assertSubstringIndex("www.apache.org", ".", 3, "UTF8_LCASE", "www.apache.org"); - assertSubstringIndex("wwwXapacheXorg", "x", 2, "UTF8_LCASE", "wwwXapache"); - assertSubstringIndex("wwwxapachexorg", "X", 1, "UTF8_LCASE", "www"); - assertSubstringIndex("www.apache.org", ".", 0, "UTF8_LCASE", ""); - assertSubstringIndex("www.apache.ORG", ".", -3, "UTF8_LCASE", "www.apache.ORG"); - assertSubstringIndex("wwwGapacheGorg", "g", 1, "UTF8_LCASE", "www"); - assertSubstringIndex("wwwGapacheGorg", "g", 3, "UTF8_LCASE", "wwwGapacheGor"); - assertSubstringIndex("gwwwGapacheGorg", "g", 3, "UTF8_LCASE", "gwwwGapache"); - assertSubstringIndex("wwwGapacheGorg", "g", -3, "UTF8_LCASE", "apacheGorg"); - assertSubstringIndex("wwwmapacheMorg", "M", -2, "UTF8_LCASE", "apacheMorg"); - assertSubstringIndex("www.apache.org", ".", -1, "UTF8_LCASE", "org"); - assertSubstringIndex("www.apache.org.", ".", -1, "UTF8_LCASE", ""); - assertSubstringIndex("", ".", -2, "UTF8_LCASE", ""); - assertSubstringIndex("test大千世界X大千世界", "x", -1, "UTF8_LCASE", "大千世界"); - assertSubstringIndex("test大千世界X大千世界", "X", 1, "UTF8_LCASE", "test大千世界"); - assertSubstringIndex("test大千世界大千世界", "千", 2, "UTF8_LCASE", "test大千世界大"); - assertSubstringIndex("www||APACHE||org", "||", 2, "UTF8_LCASE", "www||APACHE"); - assertSubstringIndex("www||APACHE||org", "||", -1, "UTF8_LCASE", "org"); - assertSubstringIndex("AaAaAaAaAa", "Aa", 2, "UNICODE", "Aa"); - assertSubstringIndex("wwwYapacheyorg", "y", 3, "UNICODE", "wwwYapacheyorg"); - assertSubstringIndex("www.apache.org", ".", 2, "UNICODE", "www.apache"); - assertSubstringIndex("wwwYapacheYorg", "Y", 1, "UNICODE", "www"); - assertSubstringIndex("wwwYapacheYorg", "y", 1, "UNICODE", "wwwYapacheYorg"); - assertSubstringIndex("wwwGapacheGorg", "g", 1, "UNICODE", "wwwGapacheGor"); - assertSubstringIndex("GwwwGapacheGorG", "G", 3, "UNICODE", "GwwwGapache"); - assertSubstringIndex("wwwGapacheGorG", "G", -3, "UNICODE", "apacheGorG"); - assertSubstringIndex("www.apache.org", ".", 0, "UNICODE", ""); - assertSubstringIndex("www.apache.org", ".", -3, "UNICODE", "www.apache.org"); - assertSubstringIndex("www.apache.org", ".", -2, "UNICODE", "apache.org"); - assertSubstringIndex("www.apache.org", ".", -1, "UNICODE", "org"); - assertSubstringIndex("", ".", -2, "UNICODE", ""); - assertSubstringIndex("test大千世界X大千世界", "X", -1, "UNICODE", "大千世界"); - assertSubstringIndex("test大千世界X大千世界", "X", 1, "UNICODE", "test大千世界"); - assertSubstringIndex("大x千世界大千世x界", "x", 1, "UNICODE", "大"); - assertSubstringIndex("大x千世界大千世x界", "x", -1, "UNICODE", "界"); - assertSubstringIndex("大x千世界大千世x界", "x", -2, "UNICODE", "千世界大千世x界"); - assertSubstringIndex("大千世界大千世界", "千", 2, "UNICODE", "大千世界大"); - assertSubstringIndex("www||apache||org", "||", 2, "UNICODE", "www||apache"); - assertSubstringIndex("AaAaAaAaAa", "aa", 2, "UNICODE_CI", "A"); - assertSubstringIndex("www.apache.org", ".", 3, "UNICODE_CI", "www.apache.org"); - assertSubstringIndex("wwwXapacheXorg", "x", 2, "UNICODE_CI", "wwwXapache"); - assertSubstringIndex("wwwxapacheXorg", "X", 1, "UNICODE_CI", "www"); - assertSubstringIndex("www.apache.org", ".", 0, "UNICODE_CI", ""); - assertSubstringIndex("wwwGapacheGorg", "G", 3, "UNICODE_CI", "wwwGapacheGor"); - assertSubstringIndex("gwwwGapacheGorg", "g", 3, "UNICODE_CI", "gwwwGapache"); - assertSubstringIndex("gwwwGapacheGorg", "g", -3, "UNICODE_CI", "apacheGorg"); - assertSubstringIndex("www.apache.ORG", ".", -3, "UNICODE_CI", "www.apache.ORG"); - assertSubstringIndex("wwwmapacheMorg", "M", -2, "UNICODE_CI", "apacheMorg"); - assertSubstringIndex("www.apache.org", ".", -1, "UNICODE_CI", "org"); - assertSubstringIndex("", ".", -2, "UNICODE_CI", ""); - assertSubstringIndex("test大千世界X大千世界", "X", -1, "UNICODE_CI", "大千世界"); - assertSubstringIndex("test大千世界X大千世界", "X", 1, "UNICODE_CI", "test大千世界"); - assertSubstringIndex("test大千世界大千世界", "千", 2, "UNICODE_CI", "test大千世界大"); - assertSubstringIndex("www||APACHE||org", "||", 2, "UNICODE_CI", "www||APACHE"); + assertSubstringIndex("wwwgapachegorg", "g", -3, UTF8_BINARY, "apachegorg"); + assertSubstringIndex("www||apache||org", "||", 2, UTF8_BINARY, "www||apache"); + assertSubstringIndex("aaaaaaaaaa", "aa", 2, UTF8_BINARY, "a"); + assertSubstringIndex("AaAaAaAaAa", "aa", 2, UTF8_LCASE, "A"); + assertSubstringIndex("www.apache.org", ".", 3, UTF8_LCASE, "www.apache.org"); + assertSubstringIndex("wwwXapacheXorg", "x", 2, UTF8_LCASE, "wwwXapache"); + assertSubstringIndex("wwwxapachexorg", "X", 1, UTF8_LCASE, "www"); + assertSubstringIndex("www.apache.org", ".", 0, UTF8_LCASE, ""); + assertSubstringIndex("www.apache.ORG", ".", -3, UTF8_LCASE, "www.apache.ORG"); + assertSubstringIndex("wwwGapacheGorg", "g", 1, UTF8_LCASE, "www"); + assertSubstringIndex("wwwGapacheGorg", "g", 3, UTF8_LCASE, "wwwGapacheGor"); + assertSubstringIndex("gwwwGapacheGorg", "g", 3, UTF8_LCASE, "gwwwGapache"); + assertSubstringIndex("wwwGapacheGorg", "g", -3, UTF8_LCASE, "apacheGorg"); + assertSubstringIndex("wwwmapacheMorg", "M", -2, UTF8_LCASE, "apacheMorg"); + assertSubstringIndex("www.apache.org", ".", -1, UTF8_LCASE, "org"); + assertSubstringIndex("www.apache.org.", ".", -1, UTF8_LCASE, ""); + assertSubstringIndex("", ".", -2, UTF8_LCASE, ""); + assertSubstringIndex("test大千世界X大千世界", "x", -1, UTF8_LCASE, "大千世界"); + assertSubstringIndex("test大千世界X大千世界", "X", 1, UTF8_LCASE, "test大千世界"); + assertSubstringIndex("test大千世界大千世界", "千", 2, UTF8_LCASE, "test大千世界大"); + assertSubstringIndex("www||APACHE||org", "||", 2, UTF8_LCASE, "www||APACHE"); + assertSubstringIndex("www||APACHE||org", "||", -1, UTF8_LCASE, "org"); + assertSubstringIndex("AaAaAaAaAa", "Aa", 2, UNICODE, "Aa"); + assertSubstringIndex("wwwYapacheyorg", "y", 3, UNICODE, "wwwYapacheyorg"); + assertSubstringIndex("www.apache.org", ".", 2, UNICODE, "www.apache"); + assertSubstringIndex("wwwYapacheYorg", "Y", 1, UNICODE, "www"); + assertSubstringIndex("wwwYapacheYorg", "y", 1, UNICODE, "wwwYapacheYorg"); + assertSubstringIndex("wwwGapacheGorg", "g", 1, UNICODE, "wwwGapacheGor"); + assertSubstringIndex("GwwwGapacheGorG", "G", 3, UNICODE, "GwwwGapache"); + assertSubstringIndex("wwwGapacheGorG", "G", -3, UNICODE, "apacheGorG"); + assertSubstringIndex("www.apache.org", ".", 0, UNICODE, ""); + assertSubstringIndex("www.apache.org", ".", -3, UNICODE, "www.apache.org"); + assertSubstringIndex("www.apache.org", ".", -2, UNICODE, "apache.org"); + assertSubstringIndex("www.apache.org", ".", -1, UNICODE, "org"); + assertSubstringIndex("", ".", -2, UNICODE, ""); + assertSubstringIndex("test大千世界X大千世界", "X", -1, UNICODE, "大千世界"); + assertSubstringIndex("test大千世界X大千世界", "X", 1, UNICODE, "test大千世界"); + assertSubstringIndex("大x千世界大千世x界", "x", 1, UNICODE, "大"); + assertSubstringIndex("大x千世界大千世x界", "x", -1, UNICODE, "界"); + assertSubstringIndex("大x千世界大千世x界", "x", -2, UNICODE, "千世界大千世x界"); + assertSubstringIndex("大千世界大千世界", "千", 2, UNICODE, "大千世界大"); + assertSubstringIndex("www||apache||org", "||", 2, UNICODE, "www||apache"); + assertSubstringIndex("AaAaAaAaAa", "aa", 2, UNICODE_CI, "A"); + assertSubstringIndex("www.apache.org", ".", 3, UNICODE_CI, "www.apache.org"); + assertSubstringIndex("wwwXapacheXorg", "x", 2, UNICODE_CI, "wwwXapache"); + assertSubstringIndex("wwwxapacheXorg", "X", 1, UNICODE_CI, "www"); + assertSubstringIndex("www.apache.org", ".", 0, UNICODE_CI, ""); + assertSubstringIndex("wwwGapacheGorg", "G", 3, UNICODE_CI, "wwwGapacheGor"); + assertSubstringIndex("gwwwGapacheGorg", "g", 3, UNICODE_CI, "gwwwGapache"); + assertSubstringIndex("gwwwGapacheGorg", "g", -3, UNICODE_CI, "apacheGorg"); + assertSubstringIndex("www.apache.ORG", ".", -3, UNICODE_CI, "www.apache.ORG"); + assertSubstringIndex("wwwmapacheMorg", "M", -2, UNICODE_CI, "apacheMorg"); + assertSubstringIndex("www.apache.org", ".", -1, UNICODE_CI, "org"); + assertSubstringIndex("", ".", -2, UNICODE_CI, ""); + assertSubstringIndex("test大千世界X大千世界", "X", -1, UNICODE_CI, "大千世界"); + assertSubstringIndex("test大千世界X大千世界", "X", 1, UNICODE_CI, "test大千世界"); + assertSubstringIndex("test大千世界大千世界", "千", 2, UNICODE_CI, "test大千世界大"); + assertSubstringIndex("www||APACHE||org", "||", 2, UNICODE_CI, "www||APACHE"); assertSubstringIndex("wwwèapacheËorg", "Ê", -3, "AF_CI_AI", "apacheËorg"); // One-to-many case mapping (e.g. Turkish dotted I). - assertSubstringIndex("abİo12", "i\u0307o", 1, "UNICODE_CI", "ab"); - assertSubstringIndex("abİo12", "i\u0307o", -1, "UNICODE_CI", "12"); - assertSubstringIndex("abi̇o12", "İo", 1, "UNICODE_CI", "ab"); - assertSubstringIndex("abi̇o12", "İo", -1, "UNICODE_CI", "12"); - assertSubstringIndex("ai̇bi̇o12", "İo", 1, "UNICODE_CI", "ai̇b"); - assertSubstringIndex("ai̇bi̇o12i̇o", "İo", 2, "UNICODE_CI", "ai̇bi̇o12"); - assertSubstringIndex("ai̇bi̇o12i̇o", "İo", -1, "UNICODE_CI", ""); - assertSubstringIndex("ai̇bi̇o12i̇o", "İo", -2, "UNICODE_CI", "12i̇o"); - assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", -4, "UNICODE_CI", "İo12İoi̇o"); - assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i\u0307o", -4, "UNICODE_CI", "İo12İoi̇o"); - assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", -4, "UNICODE_CI", "i̇o12i̇oİo"); - assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i\u0307o", -4, "UNICODE_CI", "i̇o12i̇oİo"); - assertSubstringIndex("abi̇12", "i", 1, "UNICODE_CI", "abi̇12"); - assertSubstringIndex("abi̇12", "\u0307", 1, "UNICODE_CI", "abi̇12"); - assertSubstringIndex("abi̇12", "İ", 1, "UNICODE_CI", "ab"); - assertSubstringIndex("abİ12", "i", 1, "UNICODE_CI", "abİ12"); - assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", -4, "UNICODE_CI", "İo12İoi̇o"); - assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i\u0307o", -4, "UNICODE_CI", "İo12İoi̇o"); - assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", -4, "UNICODE_CI", "i̇o12i̇oİo"); - assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i\u0307o", -4, "UNICODE_CI", "i̇o12i̇oİo"); - assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", 3, "UNICODE_CI", "ai̇bi̇oİo12"); - assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i\u0307o", 3, "UNICODE_CI", "ai̇bi̇oİo12"); - assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", 3, "UNICODE_CI", "ai̇bİoi̇o12"); - assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i\u0307o", 3, "UNICODE_CI", "ai̇bİoi̇o12"); - assertSubstringIndex("abi̇12", "i", 1, "UTF8_LCASE", "ab"); // != UNICODE_CI - assertSubstringIndex("abi̇12", "\u0307", 1, "UTF8_LCASE", "abi"); // != UNICODE_CI - assertSubstringIndex("abi̇12", "İ", 1, "UTF8_LCASE", "ab"); - assertSubstringIndex("abİ12", "i", 1, "UTF8_LCASE", "abİ12"); - assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", -4, "UTF8_LCASE", "İo12İoi̇o"); - assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i\u0307o", -4, "UTF8_LCASE", "İo12İoi̇o"); - assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", -4, "UTF8_LCASE", "i̇o12i̇oİo"); - assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i\u0307o", -4, "UTF8_LCASE", "i̇o12i̇oİo"); - assertSubstringIndex("bİoi̇o12i̇o", "\u0307oi", 1, "UTF8_LCASE", "bİoi̇o12i̇o"); - assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", 3, "UTF8_LCASE", "ai̇bi̇oİo12"); - assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i\u0307o", 3, "UTF8_LCASE", "ai̇bi̇oİo12"); - assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", 3, "UTF8_LCASE", "ai̇bİoi̇o12"); - assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i\u0307o", 3, "UTF8_LCASE", "ai̇bİoi̇o12"); - assertSubstringIndex("bİoi̇o12i̇o", "\u0307oi", 1, "UTF8_LCASE", "bİoi̇o12i̇o"); + assertSubstringIndex("abİo12", "i\u0307o", 1, UNICODE_CI, "ab"); + assertSubstringIndex("abİo12", "i\u0307o", -1, UNICODE_CI, "12"); + assertSubstringIndex("abi̇o12", "İo", 1, UNICODE_CI, "ab"); + assertSubstringIndex("abi̇o12", "İo", -1, UNICODE_CI, "12"); + assertSubstringIndex("ai̇bi̇o12", "İo", 1, UNICODE_CI, "ai̇b"); + assertSubstringIndex("ai̇bi̇o12i̇o", "İo", 2, UNICODE_CI, "ai̇bi̇o12"); + assertSubstringIndex("ai̇bi̇o12i̇o", "İo", -1, UNICODE_CI, ""); + assertSubstringIndex("ai̇bi̇o12i̇o", "İo", -2, UNICODE_CI, "12i̇o"); + assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", -4, UNICODE_CI, "İo12İoi̇o"); + assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i\u0307o", -4, UNICODE_CI, "İo12İoi̇o"); + assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", -4, UNICODE_CI, "i̇o12i̇oİo"); + assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i\u0307o", -4, UNICODE_CI, "i̇o12i̇oİo"); + assertSubstringIndex("abi̇12", "i", 1, UNICODE_CI, "abi̇12"); + assertSubstringIndex("abi̇12", "\u0307", 1, UNICODE_CI, "abi̇12"); + assertSubstringIndex("abi̇12", "İ", 1, UNICODE_CI, "ab"); + assertSubstringIndex("abİ12", "i", 1, UNICODE_CI, "abİ12"); + assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", -4, UNICODE_CI, "İo12İoi̇o"); + assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i\u0307o", -4, UNICODE_CI, "İo12İoi̇o"); + assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", -4, UNICODE_CI, "i̇o12i̇oİo"); + assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i\u0307o", -4, UNICODE_CI, "i̇o12i̇oİo"); + assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", 3, UNICODE_CI, "ai̇bi̇oİo12"); + assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i\u0307o", 3, UNICODE_CI, "ai̇bi̇oİo12"); + assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", 3, UNICODE_CI, "ai̇bİoi̇o12"); + assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i\u0307o", 3, UNICODE_CI, "ai̇bİoi̇o12"); + assertSubstringIndex("abi̇12", "i", 1, UTF8_LCASE, "ab"); // != UNICODE_CI + assertSubstringIndex("abi̇12", "\u0307", 1, UTF8_LCASE, "abi"); // != UNICODE_CI + assertSubstringIndex("abi̇12", "İ", 1, UTF8_LCASE, "ab"); + assertSubstringIndex("abİ12", "i", 1, UTF8_LCASE, "abİ12"); + assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", -4, UTF8_LCASE, "İo12İoi̇o"); + assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i\u0307o", -4, UTF8_LCASE, "İo12İoi̇o"); + assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", -4, UTF8_LCASE, "i̇o12i̇oİo"); + assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i\u0307o", -4, UTF8_LCASE, "i̇o12i̇oİo"); + assertSubstringIndex("bİoi̇o12i̇o", "\u0307oi", 1, UTF8_LCASE, "bİoi̇o12i̇o"); + assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", 3, UTF8_LCASE, "ai̇bi̇oİo12"); + assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i\u0307o", 3, UTF8_LCASE, "ai̇bi̇oİo12"); + assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", 3, UTF8_LCASE, "ai̇bİoi̇o12"); + assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i\u0307o", 3, UTF8_LCASE, "ai̇bİoi̇o12"); + assertSubstringIndex("bİoi̇o12i̇o", "\u0307oi", 1, UTF8_LCASE, "bİoi̇o12i̇o"); // Conditional case mapping (e.g. Greek sigmas). - assertSubstringIndex("σ", "σ", 1, "UTF8_BINARY", ""); - assertSubstringIndex("σ", "ς", 1, "UTF8_BINARY", "σ"); - assertSubstringIndex("σ", "Σ", 1, "UTF8_BINARY", "σ"); - assertSubstringIndex("ς", "σ", 1, "UTF8_BINARY", "ς"); - assertSubstringIndex("ς", "ς", 1, "UTF8_BINARY", ""); - assertSubstringIndex("ς", "Σ", 1, "UTF8_BINARY", "ς"); - assertSubstringIndex("Σ", "σ", 1, "UTF8_BINARY", "Σ"); - assertSubstringIndex("Σ", "ς", 1, "UTF8_BINARY", "Σ"); - assertSubstringIndex("Σ", "Σ", 1, "UTF8_BINARY", ""); - assertSubstringIndex("σ", "σ", 1, "UTF8_LCASE", ""); - assertSubstringIndex("σ", "ς", 1, "UTF8_LCASE", ""); - assertSubstringIndex("σ", "Σ", 1, "UTF8_LCASE", ""); - assertSubstringIndex("ς", "σ", 1, "UTF8_LCASE", ""); - assertSubstringIndex("ς", "ς", 1, "UTF8_LCASE", ""); - assertSubstringIndex("ς", "Σ", 1, "UTF8_LCASE", ""); - assertSubstringIndex("Σ", "σ", 1, "UTF8_LCASE", ""); - assertSubstringIndex("Σ", "ς", 1, "UTF8_LCASE", ""); - assertSubstringIndex("Σ", "Σ", 1, "UTF8_LCASE", ""); - assertSubstringIndex("σ", "σ", 1, "UNICODE", ""); - assertSubstringIndex("σ", "ς", 1, "UNICODE", "σ"); - assertSubstringIndex("σ", "Σ", 1, "UNICODE", "σ"); - assertSubstringIndex("ς", "σ", 1, "UNICODE", "ς"); - assertSubstringIndex("ς", "ς", 1, "UNICODE", ""); - assertSubstringIndex("ς", "Σ", 1, "UNICODE", "ς"); - assertSubstringIndex("Σ", "σ", 1, "UNICODE", "Σ"); - assertSubstringIndex("Σ", "ς", 1, "UNICODE", "Σ"); - assertSubstringIndex("Σ", "Σ", 1, "UNICODE", ""); - assertSubstringIndex("σ", "σ", 1, "UNICODE_CI", ""); - assertSubstringIndex("σ", "ς", 1, "UNICODE_CI", ""); - assertSubstringIndex("σ", "Σ", 1, "UNICODE_CI", ""); - assertSubstringIndex("ς", "σ", 1, "UNICODE_CI", ""); - assertSubstringIndex("ς", "ς", 1, "UNICODE_CI", ""); - assertSubstringIndex("ς", "Σ", 1, "UNICODE_CI", ""); - assertSubstringIndex("Σ", "σ", 1, "UNICODE_CI", ""); - assertSubstringIndex("Σ", "ς", 1, "UNICODE_CI", ""); - assertSubstringIndex("Σ", "Σ", 1, "UNICODE_CI", ""); + assertSubstringIndex("σ", "σ", 1, UTF8_BINARY, ""); + assertSubstringIndex("σ", "ς", 1, UTF8_BINARY, "σ"); + assertSubstringIndex("σ", "Σ", 1, UTF8_BINARY, "σ"); + assertSubstringIndex("ς", "σ", 1, UTF8_BINARY, "ς"); + assertSubstringIndex("ς", "ς", 1, UTF8_BINARY, ""); + assertSubstringIndex("ς", "Σ", 1, UTF8_BINARY, "ς"); + assertSubstringIndex("Σ", "σ", 1, UTF8_BINARY, "Σ"); + assertSubstringIndex("Σ", "ς", 1, UTF8_BINARY, "Σ"); + assertSubstringIndex("Σ", "Σ", 1, UTF8_BINARY, ""); + assertSubstringIndex("σ", "σ", 1, UTF8_LCASE, ""); + assertSubstringIndex("σ", "ς", 1, UTF8_LCASE, ""); + assertSubstringIndex("σ", "Σ", 1, UTF8_LCASE, ""); + assertSubstringIndex("ς", "σ", 1, UTF8_LCASE, ""); + assertSubstringIndex("ς", "ς", 1, UTF8_LCASE, ""); + assertSubstringIndex("ς", "Σ", 1, UTF8_LCASE, ""); + assertSubstringIndex("Σ", "σ", 1, UTF8_LCASE, ""); + assertSubstringIndex("Σ", "ς", 1, UTF8_LCASE, ""); + assertSubstringIndex("Σ", "Σ", 1, UTF8_LCASE, ""); + assertSubstringIndex("σ", "σ", 1, UNICODE, ""); + assertSubstringIndex("σ", "ς", 1, UNICODE, "σ"); + assertSubstringIndex("σ", "Σ", 1, UNICODE, "σ"); + assertSubstringIndex("ς", "σ", 1, UNICODE, "ς"); + assertSubstringIndex("ς", "ς", 1, UNICODE, ""); + assertSubstringIndex("ς", "Σ", 1, UNICODE, "ς"); + assertSubstringIndex("Σ", "σ", 1, UNICODE, "Σ"); + assertSubstringIndex("Σ", "ς", 1, UNICODE, "Σ"); + assertSubstringIndex("Σ", "Σ", 1, UNICODE, ""); + assertSubstringIndex("σ", "σ", 1, UNICODE_CI, ""); + assertSubstringIndex("σ", "ς", 1, UNICODE_CI, ""); + assertSubstringIndex("σ", "Σ", 1, UNICODE_CI, ""); + assertSubstringIndex("ς", "σ", 1, UNICODE_CI, ""); + assertSubstringIndex("ς", "ς", 1, UNICODE_CI, ""); + assertSubstringIndex("ς", "Σ", 1, UNICODE_CI, ""); + assertSubstringIndex("Σ", "σ", 1, UNICODE_CI, ""); + assertSubstringIndex("Σ", "ς", 1, UNICODE_CI, ""); + assertSubstringIndex("Σ", "Σ", 1, UNICODE_CI, ""); // Surrogate pairs. - assertSubstringIndex("a🙃b🙃c", "a", 1, "UTF8_BINARY", ""); - assertSubstringIndex("a🙃b🙃c", "a", 1, "UTF8_LCASE", ""); - assertSubstringIndex("a🙃b🙃c", "a", 1, "UNICODE", ""); - assertSubstringIndex("a🙃b🙃c", "a", 1, "UNICODE_CI", ""); - assertSubstringIndex("a🙃b🙃c", "🙃", 1, "UTF8_BINARY", "a"); - assertSubstringIndex("a🙃b🙃c", "🙃", 1, "UTF8_LCASE", "a"); - assertSubstringIndex("a🙃b🙃c", "🙃", 1, "UNICODE", "a"); - assertSubstringIndex("a🙃b🙃c", "🙃", 1, "UNICODE_CI", "a"); - assertSubstringIndex("a🙃b🙃c", "b", 1, "UTF8_BINARY", "a🙃"); - assertSubstringIndex("a🙃b🙃c", "b", 1, "UTF8_LCASE", "a🙃"); - assertSubstringIndex("a🙃b🙃c", "b", 1, "UNICODE", "a🙃"); - assertSubstringIndex("a🙃b🙃c", "b", 1, "UNICODE_CI", "a🙃"); - assertSubstringIndex("a🙃b🙃c", "🙃", 2, "UTF8_BINARY", "a🙃b"); - assertSubstringIndex("a🙃b🙃c", "🙃", 2, "UTF8_LCASE", "a🙃b"); - assertSubstringIndex("a🙃b🙃c", "🙃", 2, "UNICODE", "a🙃b"); - assertSubstringIndex("a🙃b🙃c", "🙃", 2, "UNICODE_CI", "a🙃b"); - assertSubstringIndex("a🙃b🙃c", "c", 1, "UTF8_BINARY", "a🙃b🙃"); - assertSubstringIndex("a🙃b🙃c", "c", 1, "UTF8_LCASE", "a🙃b🙃"); - assertSubstringIndex("a🙃b🙃c", "c", 1, "UNICODE", "a🙃b🙃"); - assertSubstringIndex("a🙃b🙃c", "c", 1, "UNICODE_CI", "a🙃b🙃"); - assertSubstringIndex("a🙃b🙃c", "🙃", 3, "UTF8_BINARY", "a🙃b🙃c"); - assertSubstringIndex("a🙃b🙃c", "🙃", 3, "UTF8_LCASE", "a🙃b🙃c"); - assertSubstringIndex("a🙃b🙃c", "🙃", 3, "UNICODE", "a🙃b🙃c"); - assertSubstringIndex("a🙃b🙃c", "🙃", 3, "UNICODE_CI", "a🙃b🙃c"); - assertSubstringIndex("a🙃b🙃c", "d", 1, "UTF8_BINARY", "a🙃b🙃c"); - assertSubstringIndex("a🙃b🙃c", "d", 1, "UTF8_LCASE", "a🙃b🙃c"); - assertSubstringIndex("a🙃b🙃c", "d", 1, "UNICODE", "a🙃b🙃c"); - assertSubstringIndex("a🙃b🙃c", "d", 1, "UNICODE_CI", "a🙃b🙃c"); - assertSubstringIndex("a🙃b🙃c", "c", -1, "UTF8_BINARY", ""); - assertSubstringIndex("a🙃b🙃c", "c", -1, "UTF8_LCASE", ""); - assertSubstringIndex("a🙃b🙃c", "c", -1, "UNICODE", ""); - assertSubstringIndex("a🙃b🙃c", "c", -1, "UNICODE_CI", ""); - assertSubstringIndex("a🙃b🙃c", "🙃", -1, "UTF8_BINARY", "c"); - assertSubstringIndex("a🙃b🙃c", "🙃", -1, "UTF8_LCASE", "c"); - assertSubstringIndex("a🙃b🙃c", "🙃", -1, "UNICODE", "c"); - assertSubstringIndex("a🙃b🙃c", "🙃", -1, "UNICODE_CI", "c"); - assertSubstringIndex("a🙃b🙃c", "b", -1, "UTF8_BINARY", "🙃c"); - assertSubstringIndex("a🙃b🙃c", "b", -1, "UTF8_LCASE", "🙃c"); - assertSubstringIndex("a🙃b🙃c", "b", -1, "UNICODE", "🙃c"); - assertSubstringIndex("a🙃b🙃c", "b", -1, "UNICODE_CI", "🙃c"); - assertSubstringIndex("a🙃b🙃c", "🙃", -2, "UTF8_BINARY", "b🙃c"); - assertSubstringIndex("a🙃b🙃c", "🙃", -2, "UTF8_LCASE", "b🙃c"); - assertSubstringIndex("a🙃b🙃c", "🙃", -2, "UNICODE", "b🙃c"); - assertSubstringIndex("a🙃b🙃c", "🙃", -2, "UNICODE_CI", "b🙃c"); - assertSubstringIndex("a🙃b🙃c", "a", -1, "UTF8_BINARY", "🙃b🙃c"); - assertSubstringIndex("a🙃b🙃c", "a", -1, "UTF8_LCASE", "🙃b🙃c"); - assertSubstringIndex("a🙃b🙃c", "a", -1, "UNICODE", "🙃b🙃c"); - assertSubstringIndex("a🙃b🙃c", "a", -1, "UNICODE_CI", "🙃b🙃c"); - assertSubstringIndex("a🙃b🙃c", "🙃", -3, "UTF8_BINARY", "a🙃b🙃c"); - assertSubstringIndex("a🙃b🙃c", "🙃", -3, "UTF8_LCASE", "a🙃b🙃c"); - assertSubstringIndex("a🙃b🙃c", "🙃", -3, "UNICODE", "a🙃b🙃c"); - assertSubstringIndex("a🙃b🙃c", "🙃", -3, "UNICODE_CI", "a🙃b🙃c"); - assertSubstringIndex("a🙃b🙃c", "d", -1, "UTF8_BINARY", "a🙃b🙃c"); - assertSubstringIndex("a🙃b🙃c", "d", -1, "UTF8_LCASE", "a🙃b🙃c"); - assertSubstringIndex("a🙃b🙃c", "d", -1, "UNICODE", "a🙃b🙃c"); - assertSubstringIndex("a🙃b🙃c", "d", -1, "UNICODE_CI", "a🙃b🙃c"); + assertSubstringIndex("a🙃b🙃c", "a", 1, UTF8_BINARY, ""); + assertSubstringIndex("a🙃b🙃c", "a", 1, UTF8_LCASE, ""); + assertSubstringIndex("a🙃b🙃c", "a", 1, UNICODE, ""); + assertSubstringIndex("a🙃b🙃c", "a", 1, UNICODE_CI, ""); + assertSubstringIndex("a🙃b🙃c", "🙃", 1, UTF8_BINARY, "a"); + assertSubstringIndex("a🙃b🙃c", "🙃", 1, UTF8_LCASE, "a"); + assertSubstringIndex("a🙃b🙃c", "🙃", 1, UNICODE, "a"); + assertSubstringIndex("a🙃b🙃c", "🙃", 1, UNICODE_CI, "a"); + assertSubstringIndex("a🙃b🙃c", "b", 1, UTF8_BINARY, "a🙃"); + assertSubstringIndex("a🙃b🙃c", "b", 1, UTF8_LCASE, "a🙃"); + assertSubstringIndex("a🙃b🙃c", "b", 1, UNICODE, "a🙃"); + assertSubstringIndex("a🙃b🙃c", "b", 1, UNICODE_CI, "a🙃"); + assertSubstringIndex("a🙃b🙃c", "🙃", 2, UTF8_BINARY, "a🙃b"); + assertSubstringIndex("a🙃b🙃c", "🙃", 2, UTF8_LCASE, "a🙃b"); + assertSubstringIndex("a🙃b🙃c", "🙃", 2, UNICODE, "a🙃b"); + assertSubstringIndex("a🙃b🙃c", "🙃", 2, UNICODE_CI, "a🙃b"); + assertSubstringIndex("a🙃b🙃c", "c", 1, UTF8_BINARY, "a🙃b🙃"); + assertSubstringIndex("a🙃b🙃c", "c", 1, UTF8_LCASE, "a🙃b🙃"); + assertSubstringIndex("a🙃b🙃c", "c", 1, UNICODE, "a🙃b🙃"); + assertSubstringIndex("a🙃b🙃c", "c", 1, UNICODE_CI, "a🙃b🙃"); + assertSubstringIndex("a🙃b🙃c", "🙃", 3, UTF8_BINARY, "a🙃b🙃c"); + assertSubstringIndex("a🙃b🙃c", "🙃", 3, UTF8_LCASE, "a🙃b🙃c"); + assertSubstringIndex("a🙃b🙃c", "🙃", 3, UNICODE, "a🙃b🙃c"); + assertSubstringIndex("a🙃b🙃c", "🙃", 3, UNICODE_CI, "a🙃b🙃c"); + assertSubstringIndex("a🙃b🙃c", "d", 1, UTF8_BINARY, "a🙃b🙃c"); + assertSubstringIndex("a🙃b🙃c", "d", 1, UTF8_LCASE, "a🙃b🙃c"); + assertSubstringIndex("a🙃b🙃c", "d", 1, UNICODE, "a🙃b🙃c"); + assertSubstringIndex("a🙃b🙃c", "d", 1, UNICODE_CI, "a🙃b🙃c"); + assertSubstringIndex("a🙃b🙃c", "c", -1, UTF8_BINARY, ""); + assertSubstringIndex("a🙃b🙃c", "c", -1, UTF8_LCASE, ""); + assertSubstringIndex("a🙃b🙃c", "c", -1, UNICODE, ""); + assertSubstringIndex("a🙃b🙃c", "c", -1, UNICODE_CI, ""); + assertSubstringIndex("a🙃b🙃c", "🙃", -1, UTF8_BINARY, "c"); + assertSubstringIndex("a🙃b🙃c", "🙃", -1, UTF8_LCASE, "c"); + assertSubstringIndex("a🙃b🙃c", "🙃", -1, UNICODE, "c"); + assertSubstringIndex("a🙃b🙃c", "🙃", -1, UNICODE_CI, "c"); + assertSubstringIndex("a🙃b🙃c", "b", -1, UTF8_BINARY, "🙃c"); + assertSubstringIndex("a🙃b🙃c", "b", -1, UTF8_LCASE, "🙃c"); + assertSubstringIndex("a🙃b🙃c", "b", -1, UNICODE, "🙃c"); + assertSubstringIndex("a🙃b🙃c", "b", -1, UNICODE_CI, "🙃c"); + assertSubstringIndex("a🙃b🙃c", "🙃", -2, UTF8_BINARY, "b🙃c"); + assertSubstringIndex("a🙃b🙃c", "🙃", -2, UTF8_LCASE, "b🙃c"); + assertSubstringIndex("a🙃b🙃c", "🙃", -2, UNICODE, "b🙃c"); + assertSubstringIndex("a🙃b🙃c", "🙃", -2, UNICODE_CI, "b🙃c"); + assertSubstringIndex("a🙃b🙃c", "a", -1, UTF8_BINARY, "🙃b🙃c"); + assertSubstringIndex("a🙃b🙃c", "a", -1, UTF8_LCASE, "🙃b🙃c"); + assertSubstringIndex("a🙃b🙃c", "a", -1, UNICODE, "🙃b🙃c"); + assertSubstringIndex("a🙃b🙃c", "a", -1, UNICODE_CI, "🙃b🙃c"); + assertSubstringIndex("a🙃b🙃c", "🙃", -3, UTF8_BINARY, "a🙃b🙃c"); + assertSubstringIndex("a🙃b🙃c", "🙃", -3, UTF8_LCASE, "a🙃b🙃c"); + assertSubstringIndex("a🙃b🙃c", "🙃", -3, UNICODE, "a🙃b🙃c"); + assertSubstringIndex("a🙃b🙃c", "🙃", -3, UNICODE_CI, "a🙃b🙃c"); + assertSubstringIndex("a🙃b🙃c", "d", -1, UTF8_BINARY, "a🙃b🙃c"); + assertSubstringIndex("a🙃b🙃c", "d", -1, UTF8_LCASE, "a🙃b🙃c"); + assertSubstringIndex("a🙃b🙃c", "d", -1, UNICODE, "a🙃b🙃c"); + assertSubstringIndex("a🙃b🙃c", "d", -1, UNICODE_CI, "a🙃b🙃c"); } /** @@ -2776,279 +2777,279 @@ private void assertStringTrim(String collationName, String sourceString, String @Test public void testStringTrim() throws SparkException { // Basic tests. - assertStringTrim("UTF8_BINARY", "", "", ""); - assertStringTrim("UTF8_BINARY", "", "xyz", ""); - assertStringTrim("UTF8_BINARY", "asd", "", "asd"); - assertStringTrim("UTF8_BINARY", "asd", null, "asd"); - assertStringTrim("UTF8_BINARY", " asd ", null, "asd"); - assertStringTrim("UTF8_BINARY", " a世a ", null, "a世a"); - assertStringTrim("UTF8_BINARY", "asd", "x", "asd"); - assertStringTrim("UTF8_BINARY", "xxasdxx", "x", "asd"); - assertStringTrim("UTF8_BINARY", "xa世ax", "x", "a世a"); - assertStringTrim("UTF8_LCASE", "", "", ""); - assertStringTrim("UTF8_LCASE", "", "xyz", ""); - assertStringTrim("UTF8_LCASE", "asd", "", "asd"); - assertStringTrim("UTF8_LCASE", "asd", null, "asd"); - assertStringTrim("UTF8_LCASE", " asd ", null, "asd"); - assertStringTrim("UTF8_LCASE", " a世a ", null, "a世a"); - assertStringTrim("UTF8_LCASE", "asd", "x", "asd"); - assertStringTrim("UTF8_LCASE", "xxasdxx", "x", "asd"); - assertStringTrim("UTF8_LCASE", "xa世ax", "x", "a世a"); - assertStringTrim("UNICODE", "", "", ""); - assertStringTrim("UNICODE", "", "xyz", ""); - assertStringTrim("UNICODE", "asd", "", "asd"); - assertStringTrim("UNICODE", "asd", null, "asd"); - assertStringTrim("UNICODE", " asd ", null, "asd"); - assertStringTrim("UNICODE", " a世a ", null, "a世a"); - assertStringTrim("UNICODE", "asd", "x", "asd"); - assertStringTrim("UNICODE", "xxasdxx", "x", "asd"); - assertStringTrim("UNICODE", "xa世ax", "x", "a世a"); - assertStringTrim("UNICODE_CI", "", "", ""); - assertStringTrim("UNICODE_CI", "", "xyz", ""); - assertStringTrim("UNICODE_CI", "asd", "", "asd"); - assertStringTrim("UNICODE_CI", "asd", null, "asd"); - assertStringTrim("UNICODE_CI", " asd ", null, "asd"); - assertStringTrim("UNICODE_CI", " a世a ", null, "a世a"); - assertStringTrim("UNICODE_CI", "asd", "x", "asd"); - assertStringTrim("UNICODE_CI", "xxasdxx", "x", "asd"); - assertStringTrim("UNICODE_CI", "xa世ax", "x", "a世a"); + assertStringTrim(UTF8_BINARY, "", "", ""); + assertStringTrim(UTF8_BINARY, "", "xyz", ""); + assertStringTrim(UTF8_BINARY, "asd", "", "asd"); + assertStringTrim(UTF8_BINARY, "asd", null, "asd"); + assertStringTrim(UTF8_BINARY, " asd ", null, "asd"); + assertStringTrim(UTF8_BINARY, " a世a ", null, "a世a"); + assertStringTrim(UTF8_BINARY, "asd", "x", "asd"); + assertStringTrim(UTF8_BINARY, "xxasdxx", "x", "asd"); + assertStringTrim(UTF8_BINARY, "xa世ax", "x", "a世a"); + assertStringTrim(UTF8_LCASE, "", "", ""); + assertStringTrim(UTF8_LCASE, "", "xyz", ""); + assertStringTrim(UTF8_LCASE, "asd", "", "asd"); + assertStringTrim(UTF8_LCASE, "asd", null, "asd"); + assertStringTrim(UTF8_LCASE, " asd ", null, "asd"); + assertStringTrim(UTF8_LCASE, " a世a ", null, "a世a"); + assertStringTrim(UTF8_LCASE, "asd", "x", "asd"); + assertStringTrim(UTF8_LCASE, "xxasdxx", "x", "asd"); + assertStringTrim(UTF8_LCASE, "xa世ax", "x", "a世a"); + assertStringTrim(UNICODE, "", "", ""); + assertStringTrim(UNICODE, "", "xyz", ""); + assertStringTrim(UNICODE, "asd", "", "asd"); + assertStringTrim(UNICODE, "asd", null, "asd"); + assertStringTrim(UNICODE, " asd ", null, "asd"); + assertStringTrim(UNICODE, " a世a ", null, "a世a"); + assertStringTrim(UNICODE, "asd", "x", "asd"); + assertStringTrim(UNICODE, "xxasdxx", "x", "asd"); + assertStringTrim(UNICODE, "xa世ax", "x", "a世a"); + assertStringTrim(UNICODE_CI, "", "", ""); + assertStringTrim(UNICODE_CI, "", "xyz", ""); + assertStringTrim(UNICODE_CI, "asd", "", "asd"); + assertStringTrim(UNICODE_CI, "asd", null, "asd"); + assertStringTrim(UNICODE_CI, " asd ", null, "asd"); + assertStringTrim(UNICODE_CI, " a世a ", null, "a世a"); + assertStringTrim(UNICODE_CI, "asd", "x", "asd"); + assertStringTrim(UNICODE_CI, "xxasdxx", "x", "asd"); + assertStringTrim(UNICODE_CI, "xa世ax", "x", "a世a"); // Case variation. - assertStringTrim("UTF8_BINARY", "asd", "A", "asd"); - assertStringTrim("UTF8_BINARY", "ddsXXXaa", "asd", "XXX"); - assertStringTrim("UTF8_BINARY", "ASD", "a", "ASD"); - assertStringTrim("UTF8_LCASE", "asd", "A", "sd"); - assertStringTrim("UTF8_LCASE", "ASD", "a", "SD"); - assertStringTrim("UTF8_LCASE", "ddsXXXaa", "ASD", "XXX"); - assertStringTrim("UNICODE", "asd", "A", "asd"); - assertStringTrim("UNICODE", "ASD", "a", "ASD"); - assertStringTrim("UNICODE", "ddsXXXaa", "asd", "XXX"); - assertStringTrim("UNICODE_CI", "asd", "A", "sd"); - assertStringTrim("UNICODE_CI", "ASD", "a", "SD"); - assertStringTrim("UNICODE_CI", "ddsXXXaa", "ASD", "XXX"); + assertStringTrim(UTF8_BINARY, "asd", "A", "asd"); + assertStringTrim(UTF8_BINARY, "ddsXXXaa", "asd", "XXX"); + assertStringTrim(UTF8_BINARY, "ASD", "a", "ASD"); + assertStringTrim(UTF8_LCASE, "asd", "A", "sd"); + assertStringTrim(UTF8_LCASE, "ASD", "a", "SD"); + assertStringTrim(UTF8_LCASE, "ddsXXXaa", "ASD", "XXX"); + assertStringTrim(UNICODE, "asd", "A", "asd"); + assertStringTrim(UNICODE, "ASD", "a", "ASD"); + assertStringTrim(UNICODE, "ddsXXXaa", "asd", "XXX"); + assertStringTrim(UNICODE_CI, "asd", "A", "sd"); + assertStringTrim(UNICODE_CI, "ASD", "a", "SD"); + assertStringTrim(UNICODE_CI, "ddsXXXaa", "ASD", "XXX"); assertStringTrim("SR_CI_AI", "cSCšćČXXXsčšČŠsć", "čš", "XXX"); // One-to-many case mapping (e.g. Turkish dotted I).. - assertStringTrim("UTF8_BINARY", "ẞaaaẞ", "ß", "ẞaaaẞ"); - assertStringTrim("UTF8_BINARY", "ßaaaß", "ẞ", "ßaaaß"); - assertStringTrim("UTF8_BINARY", "Ëaaaẞ", "Ëẞ", "aaa"); - assertStringTrim("UTF8_LCASE", "ẞaaaẞ", "ß", "aaa"); - assertStringTrim("UTF8_LCASE", "ßaaaß", "ẞ", "aaa"); - assertStringTrim("UTF8_LCASE", "Ëaaaẞ", "Ëẞ", "aaa"); - assertStringTrim("UNICODE", "ẞaaaẞ", "ß", "ẞaaaẞ"); - assertStringTrim("UNICODE", "ßaaaß", "ẞ", "ßaaaß"); - assertStringTrim("UNICODE", "Ëaaaẞ", "Ëẞ", "aaa"); - assertStringTrim("UNICODE_CI", "ẞaaaẞ", "ß", "aaa"); - assertStringTrim("UNICODE_CI", "ßaaaß", "ẞ", "aaa"); - assertStringTrim("UNICODE_CI", "Ëaaaẞ", "Ëẞ", "aaa"); + assertStringTrim(UTF8_BINARY, "ẞaaaẞ", "ß", "ẞaaaẞ"); + assertStringTrim(UTF8_BINARY, "ßaaaß", "ẞ", "ßaaaß"); + assertStringTrim(UTF8_BINARY, "Ëaaaẞ", "Ëẞ", "aaa"); + assertStringTrim(UTF8_LCASE, "ẞaaaẞ", "ß", "aaa"); + assertStringTrim(UTF8_LCASE, "ßaaaß", "ẞ", "aaa"); + assertStringTrim(UTF8_LCASE, "Ëaaaẞ", "Ëẞ", "aaa"); + assertStringTrim(UNICODE, "ẞaaaẞ", "ß", "ẞaaaẞ"); + assertStringTrim(UNICODE, "ßaaaß", "ẞ", "ßaaaß"); + assertStringTrim(UNICODE, "Ëaaaẞ", "Ëẞ", "aaa"); + assertStringTrim(UNICODE_CI, "ẞaaaẞ", "ß", "aaa"); + assertStringTrim(UNICODE_CI, "ßaaaß", "ẞ", "aaa"); + assertStringTrim(UNICODE_CI, "Ëaaaẞ", "Ëẞ", "aaa"); // One-to-many case mapping (e.g. Turkish dotted I). - assertStringTrim("UTF8_BINARY", "i", "i", ""); - assertStringTrim("UTF8_BINARY", "iii", "I", "iii"); - assertStringTrim("UTF8_BINARY", "I", "iii", "I"); - assertStringTrim("UTF8_BINARY", "ixi", "i", "x"); - assertStringTrim("UTF8_BINARY", "i", "İ", "i"); - assertStringTrim("UTF8_BINARY", "i\u0307", "İ", "i\u0307"); - assertStringTrim("UTF8_BINARY", "ii\u0307", "İi", "\u0307"); - assertStringTrim("UTF8_BINARY", "iii\u0307", "İi", "\u0307"); - assertStringTrim("UTF8_BINARY", "iiii\u0307", "iİ", "\u0307"); - assertStringTrim("UTF8_BINARY", "ii\u0307ii\u0307", "iİ", "\u0307ii\u0307"); - assertStringTrim("UTF8_BINARY", "i\u0307", "i", "\u0307"); - assertStringTrim("UTF8_BINARY", "i\u0307", "\u0307", "i"); - assertStringTrim("UTF8_BINARY", "i\u0307", "i\u0307", ""); - assertStringTrim("UTF8_BINARY", "i\u0307i\u0307", "i\u0307", ""); - assertStringTrim("UTF8_BINARY", "i\u0307\u0307", "i\u0307", ""); - assertStringTrim("UTF8_BINARY", "i\u0307i", "i\u0307", ""); - assertStringTrim("UTF8_BINARY", "i\u0307i", "İ", "i\u0307i"); - assertStringTrim("UTF8_BINARY", "i\u0307İ", "i\u0307", "İ"); - assertStringTrim("UTF8_BINARY", "i\u0307İ", "İ", "i\u0307"); - assertStringTrim("UTF8_BINARY", "İ", "İ", ""); - assertStringTrim("UTF8_BINARY", "IXi", "İ", "IXi"); - assertStringTrim("UTF8_BINARY", "ix\u0307", "Ixİ", "ix\u0307"); - assertStringTrim("UTF8_BINARY", "i\u0307x", "IXİ", "i\u0307x"); - assertStringTrim("UTF8_BINARY", "i\u0307x", "ix\u0307İ", ""); - assertStringTrim("UTF8_BINARY", "İ", "i", "İ"); - assertStringTrim("UTF8_BINARY", "İ", "\u0307", "İ"); - assertStringTrim("UTF8_BINARY", "Ixİ", "i\u0307", "Ixİ"); - assertStringTrim("UTF8_BINARY", "IXİ", "ix\u0307", "IXİ"); - assertStringTrim("UTF8_BINARY", "xi\u0307", "\u0307IX", "xi"); - assertStringTrim("UTF8_LCASE", "i", "i", ""); - assertStringTrim("UTF8_LCASE", "iii", "I", ""); - assertStringTrim("UTF8_LCASE", "I", "iii", ""); - assertStringTrim("UTF8_LCASE", "ixi", "i", "x"); - assertStringTrim("UTF8_LCASE", "i", "İ", "i"); - assertStringTrim("UTF8_LCASE", "i\u0307", "İ", ""); - assertStringTrim("UTF8_LCASE", "ii\u0307", "İi", ""); - assertStringTrim("UTF8_LCASE", "iii\u0307", "İi", ""); - assertStringTrim("UTF8_LCASE", "iiii\u0307", "iİ", ""); - assertStringTrim("UTF8_LCASE", "ii\u0307ii\u0307", "iİ", ""); - assertStringTrim("UTF8_LCASE", "i\u0307", "i", "\u0307"); - assertStringTrim("UTF8_LCASE", "i\u0307", "\u0307", "i"); - assertStringTrim("UTF8_LCASE", "i\u0307", "i\u0307", ""); - assertStringTrim("UTF8_LCASE", "i\u0307i\u0307", "i\u0307", ""); - assertStringTrim("UTF8_LCASE", "i\u0307\u0307", "i\u0307", ""); - assertStringTrim("UTF8_LCASE", "i\u0307i", "i\u0307", ""); - assertStringTrim("UTF8_LCASE", "i\u0307i", "İ", "i"); - assertStringTrim("UTF8_LCASE", "i\u0307İ", "i\u0307", "İ"); - assertStringTrim("UTF8_LCASE", "i\u0307İ", "İ", ""); - assertStringTrim("UTF8_LCASE", "İ", "İ", ""); - assertStringTrim("UTF8_LCASE", "IXi", "İ", "IXi"); - assertStringTrim("UTF8_LCASE", "ix\u0307", "Ixİ", "\u0307"); - assertStringTrim("UTF8_LCASE", "i\u0307x", "IXİ", ""); - assertStringTrim("UTF8_LCASE", "i\u0307x", "I\u0307xİ", ""); - assertStringTrim("UTF8_LCASE", "İ", "i", "İ"); - assertStringTrim("UTF8_LCASE", "İ", "\u0307", "İ"); - assertStringTrim("UTF8_LCASE", "Ixİ", "i\u0307", "xİ"); - assertStringTrim("UTF8_LCASE", "IXİ", "ix\u0307", "İ"); - assertStringTrim("UTF8_LCASE", "xi\u0307", "\u0307IX", ""); - assertStringTrim("UNICODE", "i", "i", ""); - assertStringTrim("UNICODE", "iii", "I", "iii"); - assertStringTrim("UNICODE", "I", "iii", "I"); - assertStringTrim("UNICODE", "ixi", "i", "x"); - assertStringTrim("UNICODE", "i", "İ", "i"); - assertStringTrim("UNICODE", "i\u0307", "İ", "i\u0307"); - assertStringTrim("UNICODE", "ii\u0307", "İi", "i\u0307"); - assertStringTrim("UNICODE", "iii\u0307", "İi", "i\u0307"); - assertStringTrim("UNICODE", "iiii\u0307", "iİ", "i\u0307"); - assertStringTrim("UNICODE", "ii\u0307ii\u0307", "iİ", "i\u0307ii\u0307"); - assertStringTrim("UNICODE", "i\u0307", "i", "i\u0307"); - assertStringTrim("UNICODE", "i\u0307", "\u0307", "i\u0307"); - assertStringTrim("UNICODE", "i\u0307", "i\u0307", "i\u0307"); - assertStringTrim("UNICODE", "i\u0307i\u0307", "i\u0307", "i\u0307i\u0307"); - assertStringTrim("UNICODE", "i\u0307\u0307", "i\u0307", "i\u0307\u0307"); - assertStringTrim("UNICODE", "i\u0307i", "i\u0307", "i\u0307"); - assertStringTrim("UNICODE", "i\u0307i", "İ", "i\u0307i"); - assertStringTrim("UNICODE", "i\u0307İ", "i\u0307", "i\u0307İ"); - assertStringTrim("UNICODE", "i\u0307İ", "İ", "i\u0307"); - assertStringTrim("UNICODE", "İ", "İ", ""); - assertStringTrim("UNICODE", "IXi", "İ", "IXi"); - assertStringTrim("UNICODE", "ix\u0307", "Ixİ", "ix\u0307"); - assertStringTrim("UNICODE", "i\u0307x", "IXİ", "i\u0307x"); - assertStringTrim("UNICODE", "i\u0307x", "ix\u0307İ", "i\u0307"); - assertStringTrim("UNICODE", "İ", "i", "İ"); - assertStringTrim("UNICODE", "İ", "\u0307", "İ"); - assertStringTrim("UNICODE", "i\u0307", "i\u0307", "i\u0307"); - assertStringTrim("UNICODE", "Ixİ", "i\u0307", "Ixİ"); - assertStringTrim("UNICODE", "IXİ", "ix\u0307", "IXİ"); - assertStringTrim("UNICODE", "xi\u0307", "\u0307IX", "xi\u0307"); - assertStringTrim("UNICODE_CI", "i", "i", ""); - assertStringTrim("UNICODE_CI", "iii", "I", ""); - assertStringTrim("UNICODE_CI", "I", "iii", ""); - assertStringTrim("UNICODE_CI", "ixi", "i", "x"); - assertStringTrim("UNICODE_CI", "i", "İ", "i"); - assertStringTrim("UNICODE_CI", "i\u0307", "İ", ""); - assertStringTrim("UNICODE_CI", "ii\u0307", "İi", ""); - assertStringTrim("UNICODE_CI", "iii\u0307", "İi", ""); - assertStringTrim("UNICODE_CI", "iiii\u0307", "iİ", ""); - assertStringTrim("UNICODE_CI", "ii\u0307ii\u0307", "iİ", ""); - assertStringTrim("UNICODE_CI", "i\u0307", "i", "i\u0307"); - assertStringTrim("UNICODE_CI", "i\u0307", "\u0307", "i\u0307"); - assertStringTrim("UNICODE_CI", "i\u0307", "i\u0307", "i\u0307"); - assertStringTrim("UNICODE_CI", "i\u0307i\u0307", "i\u0307", "i\u0307i\u0307"); - assertStringTrim("UNICODE_CI", "i\u0307\u0307", "i\u0307", "i\u0307\u0307"); - assertStringTrim("UNICODE_CI", "i\u0307i", "i\u0307", "i\u0307"); - assertStringTrim("UNICODE_CI", "i\u0307i", "İ", "i"); - assertStringTrim("UNICODE_CI", "i\u0307İ", "i\u0307", "i\u0307İ"); - assertStringTrim("UNICODE_CI", "i\u0307İ", "İ", ""); - assertStringTrim("UNICODE_CI", "İ", "İ", ""); - assertStringTrim("UNICODE_CI", "IXi", "İ", "IXi"); - assertStringTrim("UNICODE_CI", "ix\u0307", "Ixİ", "x\u0307"); - assertStringTrim("UNICODE_CI", "i\u0307x", "IXİ", ""); - assertStringTrim("UNICODE_CI", "i\u0307x", "I\u0307xİ", ""); - assertStringTrim("UNICODE_CI", "İ", "i", "İ"); - assertStringTrim("UNICODE_CI", "İ", "\u0307", "İ"); - assertStringTrim("UNICODE_CI", "i\u0307", "i\u0307", "i\u0307"); - assertStringTrim("UNICODE_CI", "Ixİ", "i\u0307", "xİ"); - assertStringTrim("UNICODE_CI", "IXİ", "ix\u0307", "İ"); - assertStringTrim("UNICODE_CI", "xi\u0307", "\u0307IX", "i\u0307"); + assertStringTrim(UTF8_BINARY, "i", "i", ""); + assertStringTrim(UTF8_BINARY, "iii", "I", "iii"); + assertStringTrim(UTF8_BINARY, "I", "iii", "I"); + assertStringTrim(UTF8_BINARY, "ixi", "i", "x"); + assertStringTrim(UTF8_BINARY, "i", "İ", "i"); + assertStringTrim(UTF8_BINARY, "i\u0307", "İ", "i\u0307"); + assertStringTrim(UTF8_BINARY, "ii\u0307", "İi", "\u0307"); + assertStringTrim(UTF8_BINARY, "iii\u0307", "İi", "\u0307"); + assertStringTrim(UTF8_BINARY, "iiii\u0307", "iİ", "\u0307"); + assertStringTrim(UTF8_BINARY, "ii\u0307ii\u0307", "iİ", "\u0307ii\u0307"); + assertStringTrim(UTF8_BINARY, "i\u0307", "i", "\u0307"); + assertStringTrim(UTF8_BINARY, "i\u0307", "\u0307", "i"); + assertStringTrim(UTF8_BINARY, "i\u0307", "i\u0307", ""); + assertStringTrim(UTF8_BINARY, "i\u0307i\u0307", "i\u0307", ""); + assertStringTrim(UTF8_BINARY, "i\u0307\u0307", "i\u0307", ""); + assertStringTrim(UTF8_BINARY, "i\u0307i", "i\u0307", ""); + assertStringTrim(UTF8_BINARY, "i\u0307i", "İ", "i\u0307i"); + assertStringTrim(UTF8_BINARY, "i\u0307İ", "i\u0307", "İ"); + assertStringTrim(UTF8_BINARY, "i\u0307İ", "İ", "i\u0307"); + assertStringTrim(UTF8_BINARY, "İ", "İ", ""); + assertStringTrim(UTF8_BINARY, "IXi", "İ", "IXi"); + assertStringTrim(UTF8_BINARY, "ix\u0307", "Ixİ", "ix\u0307"); + assertStringTrim(UTF8_BINARY, "i\u0307x", "IXİ", "i\u0307x"); + assertStringTrim(UTF8_BINARY, "i\u0307x", "ix\u0307İ", ""); + assertStringTrim(UTF8_BINARY, "İ", "i", "İ"); + assertStringTrim(UTF8_BINARY, "İ", "\u0307", "İ"); + assertStringTrim(UTF8_BINARY, "Ixİ", "i\u0307", "Ixİ"); + assertStringTrim(UTF8_BINARY, "IXİ", "ix\u0307", "IXİ"); + assertStringTrim(UTF8_BINARY, "xi\u0307", "\u0307IX", "xi"); + assertStringTrim(UTF8_LCASE, "i", "i", ""); + assertStringTrim(UTF8_LCASE, "iii", "I", ""); + assertStringTrim(UTF8_LCASE, "I", "iii", ""); + assertStringTrim(UTF8_LCASE, "ixi", "i", "x"); + assertStringTrim(UTF8_LCASE, "i", "İ", "i"); + assertStringTrim(UTF8_LCASE, "i\u0307", "İ", ""); + assertStringTrim(UTF8_LCASE, "ii\u0307", "İi", ""); + assertStringTrim(UTF8_LCASE, "iii\u0307", "İi", ""); + assertStringTrim(UTF8_LCASE, "iiii\u0307", "iİ", ""); + assertStringTrim(UTF8_LCASE, "ii\u0307ii\u0307", "iİ", ""); + assertStringTrim(UTF8_LCASE, "i\u0307", "i", "\u0307"); + assertStringTrim(UTF8_LCASE, "i\u0307", "\u0307", "i"); + assertStringTrim(UTF8_LCASE, "i\u0307", "i\u0307", ""); + assertStringTrim(UTF8_LCASE, "i\u0307i\u0307", "i\u0307", ""); + assertStringTrim(UTF8_LCASE, "i\u0307\u0307", "i\u0307", ""); + assertStringTrim(UTF8_LCASE, "i\u0307i", "i\u0307", ""); + assertStringTrim(UTF8_LCASE, "i\u0307i", "İ", "i"); + assertStringTrim(UTF8_LCASE, "i\u0307İ", "i\u0307", "İ"); + assertStringTrim(UTF8_LCASE, "i\u0307İ", "İ", ""); + assertStringTrim(UTF8_LCASE, "İ", "İ", ""); + assertStringTrim(UTF8_LCASE, "IXi", "İ", "IXi"); + assertStringTrim(UTF8_LCASE, "ix\u0307", "Ixİ", "\u0307"); + assertStringTrim(UTF8_LCASE, "i\u0307x", "IXİ", ""); + assertStringTrim(UTF8_LCASE, "i\u0307x", "I\u0307xİ", ""); + assertStringTrim(UTF8_LCASE, "İ", "i", "İ"); + assertStringTrim(UTF8_LCASE, "İ", "\u0307", "İ"); + assertStringTrim(UTF8_LCASE, "Ixİ", "i\u0307", "xİ"); + assertStringTrim(UTF8_LCASE, "IXİ", "ix\u0307", "İ"); + assertStringTrim(UTF8_LCASE, "xi\u0307", "\u0307IX", ""); + assertStringTrim(UNICODE, "i", "i", ""); + assertStringTrim(UNICODE, "iii", "I", "iii"); + assertStringTrim(UNICODE, "I", "iii", "I"); + assertStringTrim(UNICODE, "ixi", "i", "x"); + assertStringTrim(UNICODE, "i", "İ", "i"); + assertStringTrim(UNICODE, "i\u0307", "İ", "i\u0307"); + assertStringTrim(UNICODE, "ii\u0307", "İi", "i\u0307"); + assertStringTrim(UNICODE, "iii\u0307", "İi", "i\u0307"); + assertStringTrim(UNICODE, "iiii\u0307", "iİ", "i\u0307"); + assertStringTrim(UNICODE, "ii\u0307ii\u0307", "iİ", "i\u0307ii\u0307"); + assertStringTrim(UNICODE, "i\u0307", "i", "i\u0307"); + assertStringTrim(UNICODE, "i\u0307", "\u0307", "i\u0307"); + assertStringTrim(UNICODE, "i\u0307", "i\u0307", "i\u0307"); + assertStringTrim(UNICODE, "i\u0307i\u0307", "i\u0307", "i\u0307i\u0307"); + assertStringTrim(UNICODE, "i\u0307\u0307", "i\u0307", "i\u0307\u0307"); + assertStringTrim(UNICODE, "i\u0307i", "i\u0307", "i\u0307"); + assertStringTrim(UNICODE, "i\u0307i", "İ", "i\u0307i"); + assertStringTrim(UNICODE, "i\u0307İ", "i\u0307", "i\u0307İ"); + assertStringTrim(UNICODE, "i\u0307İ", "İ", "i\u0307"); + assertStringTrim(UNICODE, "İ", "İ", ""); + assertStringTrim(UNICODE, "IXi", "İ", "IXi"); + assertStringTrim(UNICODE, "ix\u0307", "Ixİ", "ix\u0307"); + assertStringTrim(UNICODE, "i\u0307x", "IXİ", "i\u0307x"); + assertStringTrim(UNICODE, "i\u0307x", "ix\u0307İ", "i\u0307"); + assertStringTrim(UNICODE, "İ", "i", "İ"); + assertStringTrim(UNICODE, "İ", "\u0307", "İ"); + assertStringTrim(UNICODE, "i\u0307", "i\u0307", "i\u0307"); + assertStringTrim(UNICODE, "Ixİ", "i\u0307", "Ixİ"); + assertStringTrim(UNICODE, "IXİ", "ix\u0307", "IXİ"); + assertStringTrim(UNICODE, "xi\u0307", "\u0307IX", "xi\u0307"); + assertStringTrim(UNICODE_CI, "i", "i", ""); + assertStringTrim(UNICODE_CI, "iii", "I", ""); + assertStringTrim(UNICODE_CI, "I", "iii", ""); + assertStringTrim(UNICODE_CI, "ixi", "i", "x"); + assertStringTrim(UNICODE_CI, "i", "İ", "i"); + assertStringTrim(UNICODE_CI, "i\u0307", "İ", ""); + assertStringTrim(UNICODE_CI, "ii\u0307", "İi", ""); + assertStringTrim(UNICODE_CI, "iii\u0307", "İi", ""); + assertStringTrim(UNICODE_CI, "iiii\u0307", "iİ", ""); + assertStringTrim(UNICODE_CI, "ii\u0307ii\u0307", "iİ", ""); + assertStringTrim(UNICODE_CI, "i\u0307", "i", "i\u0307"); + assertStringTrim(UNICODE_CI, "i\u0307", "\u0307", "i\u0307"); + assertStringTrim(UNICODE_CI, "i\u0307", "i\u0307", "i\u0307"); + assertStringTrim(UNICODE_CI, "i\u0307i\u0307", "i\u0307", "i\u0307i\u0307"); + assertStringTrim(UNICODE_CI, "i\u0307\u0307", "i\u0307", "i\u0307\u0307"); + assertStringTrim(UNICODE_CI, "i\u0307i", "i\u0307", "i\u0307"); + assertStringTrim(UNICODE_CI, "i\u0307i", "İ", "i"); + assertStringTrim(UNICODE_CI, "i\u0307İ", "i\u0307", "i\u0307İ"); + assertStringTrim(UNICODE_CI, "i\u0307İ", "İ", ""); + assertStringTrim(UNICODE_CI, "İ", "İ", ""); + assertStringTrim(UNICODE_CI, "IXi", "İ", "IXi"); + assertStringTrim(UNICODE_CI, "ix\u0307", "Ixİ", "x\u0307"); + assertStringTrim(UNICODE_CI, "i\u0307x", "IXİ", ""); + assertStringTrim(UNICODE_CI, "i\u0307x", "I\u0307xİ", ""); + assertStringTrim(UNICODE_CI, "İ", "i", "İ"); + assertStringTrim(UNICODE_CI, "İ", "\u0307", "İ"); + assertStringTrim(UNICODE_CI, "i\u0307", "i\u0307", "i\u0307"); + assertStringTrim(UNICODE_CI, "Ixİ", "i\u0307", "xİ"); + assertStringTrim(UNICODE_CI, "IXİ", "ix\u0307", "İ"); + assertStringTrim(UNICODE_CI, "xi\u0307", "\u0307IX", "i\u0307"); // Conditional case mapping (e.g. Greek sigmas). - assertStringTrim("UTF8_BINARY", "ςxς", "σ", "ςxς"); - assertStringTrim("UTF8_BINARY", "ςxς", "ς", "x"); - assertStringTrim("UTF8_BINARY", "ςxς", "Σ", "ςxς"); - assertStringTrim("UTF8_BINARY", "σxσ", "σ", "x"); - assertStringTrim("UTF8_BINARY", "σxσ", "ς", "σxσ"); - assertStringTrim("UTF8_BINARY", "σxσ", "Σ", "σxσ"); - assertStringTrim("UTF8_BINARY", "ΣxΣ", "σ", "ΣxΣ"); - assertStringTrim("UTF8_BINARY", "ΣxΣ", "ς", "ΣxΣ"); - assertStringTrim("UTF8_BINARY", "ΣxΣ", "Σ", "x"); - assertStringTrim("UTF8_LCASE", "ςxς", "σ", "x"); - assertStringTrim("UTF8_LCASE", "ςxς", "ς", "x"); - assertStringTrim("UTF8_LCASE", "ςxς", "Σ", "x"); - assertStringTrim("UTF8_LCASE", "σxσ", "σ", "x"); - assertStringTrim("UTF8_LCASE", "σxσ", "ς", "x"); - assertStringTrim("UTF8_LCASE", "σxσ", "Σ", "x"); - assertStringTrim("UTF8_LCASE", "ΣxΣ", "σ", "x"); - assertStringTrim("UTF8_LCASE", "ΣxΣ", "ς", "x"); - assertStringTrim("UTF8_LCASE", "ΣxΣ", "Σ", "x"); - assertStringTrim("UNICODE", "ςxς", "σ", "ςxς"); - assertStringTrim("UNICODE", "ςxς", "ς", "x"); - assertStringTrim("UNICODE", "ςxς", "Σ", "ςxς"); - assertStringTrim("UNICODE", "σxσ", "σ", "x"); - assertStringTrim("UNICODE", "σxσ", "ς", "σxσ"); - assertStringTrim("UNICODE", "σxσ", "Σ", "σxσ"); - assertStringTrim("UNICODE", "ΣxΣ", "σ", "ΣxΣ"); - assertStringTrim("UNICODE", "ΣxΣ", "ς", "ΣxΣ"); - assertStringTrim("UNICODE", "ΣxΣ", "Σ", "x"); - assertStringTrim("UNICODE_CI", "ςxς", "σ", "x"); - assertStringTrim("UNICODE_CI", "ςxς", "ς", "x"); - assertStringTrim("UNICODE_CI", "ςxς", "Σ", "x"); - assertStringTrim("UNICODE_CI", "σxσ", "σ", "x"); - assertStringTrim("UNICODE_CI", "σxσ", "ς", "x"); - assertStringTrim("UNICODE_CI", "σxσ", "Σ", "x"); - assertStringTrim("UNICODE_CI", "ΣxΣ", "σ", "x"); - assertStringTrim("UNICODE_CI", "ΣxΣ", "ς", "x"); - assertStringTrim("UNICODE_CI", "ΣxΣ", "Σ", "x"); + assertStringTrim(UTF8_BINARY, "ςxς", "σ", "ςxς"); + assertStringTrim(UTF8_BINARY, "ςxς", "ς", "x"); + assertStringTrim(UTF8_BINARY, "ςxς", "Σ", "ςxς"); + assertStringTrim(UTF8_BINARY, "σxσ", "σ", "x"); + assertStringTrim(UTF8_BINARY, "σxσ", "ς", "σxσ"); + assertStringTrim(UTF8_BINARY, "σxσ", "Σ", "σxσ"); + assertStringTrim(UTF8_BINARY, "ΣxΣ", "σ", "ΣxΣ"); + assertStringTrim(UTF8_BINARY, "ΣxΣ", "ς", "ΣxΣ"); + assertStringTrim(UTF8_BINARY, "ΣxΣ", "Σ", "x"); + assertStringTrim(UTF8_LCASE, "ςxς", "σ", "x"); + assertStringTrim(UTF8_LCASE, "ςxς", "ς", "x"); + assertStringTrim(UTF8_LCASE, "ςxς", "Σ", "x"); + assertStringTrim(UTF8_LCASE, "σxσ", "σ", "x"); + assertStringTrim(UTF8_LCASE, "σxσ", "ς", "x"); + assertStringTrim(UTF8_LCASE, "σxσ", "Σ", "x"); + assertStringTrim(UTF8_LCASE, "ΣxΣ", "σ", "x"); + assertStringTrim(UTF8_LCASE, "ΣxΣ", "ς", "x"); + assertStringTrim(UTF8_LCASE, "ΣxΣ", "Σ", "x"); + assertStringTrim(UNICODE, "ςxς", "σ", "ςxς"); + assertStringTrim(UNICODE, "ςxς", "ς", "x"); + assertStringTrim(UNICODE, "ςxς", "Σ", "ςxς"); + assertStringTrim(UNICODE, "σxσ", "σ", "x"); + assertStringTrim(UNICODE, "σxσ", "ς", "σxσ"); + assertStringTrim(UNICODE, "σxσ", "Σ", "σxσ"); + assertStringTrim(UNICODE, "ΣxΣ", "σ", "ΣxΣ"); + assertStringTrim(UNICODE, "ΣxΣ", "ς", "ΣxΣ"); + assertStringTrim(UNICODE, "ΣxΣ", "Σ", "x"); + assertStringTrim(UNICODE_CI, "ςxς", "σ", "x"); + assertStringTrim(UNICODE_CI, "ςxς", "ς", "x"); + assertStringTrim(UNICODE_CI, "ςxς", "Σ", "x"); + assertStringTrim(UNICODE_CI, "σxσ", "σ", "x"); + assertStringTrim(UNICODE_CI, "σxσ", "ς", "x"); + assertStringTrim(UNICODE_CI, "σxσ", "Σ", "x"); + assertStringTrim(UNICODE_CI, "ΣxΣ", "σ", "x"); + assertStringTrim(UNICODE_CI, "ΣxΣ", "ς", "x"); + assertStringTrim(UNICODE_CI, "ΣxΣ", "Σ", "x"); // Unicode normalization. - assertStringTrim("UTF8_BINARY", "åβγδa\u030A", "å", "βγδa\u030A"); - assertStringTrim("UTF8_LCASE", "åβγδa\u030A", "Å", "βγδa\u030A"); - assertStringTrim("UNICODE", "åβγδa\u030A", "å", "βγδ"); - assertStringTrim("UNICODE_CI", "åβγδa\u030A", "Å", "βγδ"); + assertStringTrim(UTF8_BINARY, "åβγδa\u030A", "å", "βγδa\u030A"); + assertStringTrim(UTF8_LCASE, "åβγδa\u030A", "Å", "βγδa\u030A"); + assertStringTrim(UNICODE, "åβγδa\u030A", "å", "βγδ"); + assertStringTrim(UNICODE_CI, "åβγδa\u030A", "Å", "βγδ"); // Surrogate pairs. - assertStringTrim("UTF8_BINARY", "a🙃b🙃c", "🙃", "a🙃b🙃c"); - assertStringTrim("UTF8_LCASE", "a🙃b🙃c", "🙃", "a🙃b🙃c"); - assertStringTrim("UNICODE", "a🙃b🙃c", "🙃", "a🙃b🙃c"); - assertStringTrim("UNICODE_CI", "a🙃b🙃c", "🙃", "a🙃b🙃c"); - assertStringTrim("UTF8_BINARY", "a🙃b🙃c", "ac", "🙃b🙃"); - assertStringTrim("UTF8_LCASE", "a🙃b🙃c", "ac", "🙃b🙃"); - assertStringTrim("UNICODE", "a🙃b🙃c", "ac", "🙃b🙃"); - assertStringTrim("UNICODE_CI", "a🙃b🙃c", "ac", "🙃b🙃"); - assertStringTrim("UTF8_BINARY", "a🙃b🙃c", "a🙃c", "b"); - assertStringTrim("UTF8_LCASE", "a🙃b🙃c", "a🙃c", "b"); - assertStringTrim("UNICODE", "a🙃b🙃c", "a🙃c", "b"); - assertStringTrim("UNICODE_CI", "a🙃b🙃c", "a🙃c", "b"); - assertStringTrim("UTF8_BINARY", "a🙃b🙃c", "abc🙃", ""); - assertStringTrim("UTF8_LCASE", "a🙃b🙃c", "abc🙃", ""); - assertStringTrim("UNICODE", "a🙃b🙃c", "abc🙃", ""); - assertStringTrim("UNICODE_CI", "a🙃b🙃c", "abc🙃", ""); - assertStringTrim("UTF8_BINARY", "😀😆😃😄", "😀😄", "😆😃"); - assertStringTrim("UTF8_LCASE", "😀😆😃😄", "😀😄", "😆😃"); - assertStringTrim("UNICODE", "😀😆😃😄", "😀😄", "😆😃"); - assertStringTrim("UNICODE_CI", "😀😆😃😄", "😀😄", "😆😃"); - assertStringTrim("UTF8_BINARY", "😀😆😃😄", "😃😄", "😀😆"); - assertStringTrim("UTF8_LCASE", "😀😆😃😄", "😃😄", "😀😆"); - assertStringTrim("UNICODE", "😀😆😃😄", "😃😄", "😀😆"); - assertStringTrim("UNICODE_CI", "😀😆😃😄", "😃😄", "😀😆"); - assertStringTrim("UTF8_BINARY", "😀😆😃😄", "😀😆😃😄", ""); - assertStringTrim("UTF8_LCASE", "😀😆😃😄", "😀😆😃😄", ""); - assertStringTrim("UNICODE", "😀😆😃😄", "😀😆😃😄", ""); - assertStringTrim("UNICODE_CI", "😀😆😃😄", "😀😆😃😄", ""); - assertStringTrim("UTF8_BINARY", "𐐅", "𐐅", ""); - assertStringTrim("UTF8_LCASE", "𐐅", "𐐅", ""); - assertStringTrim("UNICODE", "𐐅", "𐐅", ""); - assertStringTrim("UNICODE_CI", "𐐅", "𐐅", ""); - assertStringTrim("UTF8_BINARY", "𐐅", "𐐭", "𐐅"); - assertStringTrim("UTF8_LCASE", "𐐅", "𐐭", ""); - assertStringTrim("UNICODE", "𐐅", "𐐭", "𐐅"); - assertStringTrim("UNICODE_CI", "𐐅", "𐐭", ""); - assertStringTrim("UTF8_BINARY", "𝔸", "𝔸", ""); - assertStringTrim("UTF8_LCASE", "𝔸", "𝔸", ""); - assertStringTrim("UNICODE", "𝔸", "𝔸", ""); - assertStringTrim("UNICODE_CI", "𝔸", "𝔸", ""); - assertStringTrim("UTF8_BINARY", "𝔸", "A", "𝔸"); - assertStringTrim("UTF8_LCASE", "𝔸", "A", "𝔸"); - assertStringTrim("UNICODE", "𝔸", "A", "𝔸"); - assertStringTrim("UNICODE_CI", "𝔸", "A", ""); - assertStringTrim("UTF8_BINARY", "𝔸", "a", "𝔸"); - assertStringTrim("UTF8_LCASE", "𝔸", "a", "𝔸"); - assertStringTrim("UNICODE", "𝔸", "a", "𝔸"); - assertStringTrim("UNICODE_CI", "𝔸", "a", ""); + assertStringTrim(UTF8_BINARY, "a🙃b🙃c", "🙃", "a🙃b🙃c"); + assertStringTrim(UTF8_LCASE, "a🙃b🙃c", "🙃", "a🙃b🙃c"); + assertStringTrim(UNICODE, "a🙃b🙃c", "🙃", "a🙃b🙃c"); + assertStringTrim(UNICODE_CI, "a🙃b🙃c", "🙃", "a🙃b🙃c"); + assertStringTrim(UTF8_BINARY, "a🙃b🙃c", "ac", "🙃b🙃"); + assertStringTrim(UTF8_LCASE, "a🙃b🙃c", "ac", "🙃b🙃"); + assertStringTrim(UNICODE, "a🙃b🙃c", "ac", "🙃b🙃"); + assertStringTrim(UNICODE_CI, "a🙃b🙃c", "ac", "🙃b🙃"); + assertStringTrim(UTF8_BINARY, "a🙃b🙃c", "a🙃c", "b"); + assertStringTrim(UTF8_LCASE, "a🙃b🙃c", "a🙃c", "b"); + assertStringTrim(UNICODE, "a🙃b🙃c", "a🙃c", "b"); + assertStringTrim(UNICODE_CI, "a🙃b🙃c", "a🙃c", "b"); + assertStringTrim(UTF8_BINARY, "a🙃b🙃c", "abc🙃", ""); + assertStringTrim(UTF8_LCASE, "a🙃b🙃c", "abc🙃", ""); + assertStringTrim(UNICODE, "a🙃b🙃c", "abc🙃", ""); + assertStringTrim(UNICODE_CI, "a🙃b🙃c", "abc🙃", ""); + assertStringTrim(UTF8_BINARY, "😀😆😃😄", "😀😄", "😆😃"); + assertStringTrim(UTF8_LCASE, "😀😆😃😄", "😀😄", "😆😃"); + assertStringTrim(UNICODE, "😀😆😃😄", "😀😄", "😆😃"); + assertStringTrim(UNICODE_CI, "😀😆😃😄", "😀😄", "😆😃"); + assertStringTrim(UTF8_BINARY, "😀😆😃😄", "😃😄", "😀😆"); + assertStringTrim(UTF8_LCASE, "😀😆😃😄", "😃😄", "😀😆"); + assertStringTrim(UNICODE, "😀😆😃😄", "😃😄", "😀😆"); + assertStringTrim(UNICODE_CI, "😀😆😃😄", "😃😄", "😀😆"); + assertStringTrim(UTF8_BINARY, "😀😆😃😄", "😀😆😃😄", ""); + assertStringTrim(UTF8_LCASE, "😀😆😃😄", "😀😆😃😄", ""); + assertStringTrim(UNICODE, "😀😆😃😄", "😀😆😃😄", ""); + assertStringTrim(UNICODE_CI, "😀😆😃😄", "😀😆😃😄", ""); + assertStringTrim(UTF8_BINARY, "𐐅", "𐐅", ""); + assertStringTrim(UTF8_LCASE, "𐐅", "𐐅", ""); + assertStringTrim(UNICODE, "𐐅", "𐐅", ""); + assertStringTrim(UNICODE_CI, "𐐅", "𐐅", ""); + assertStringTrim(UTF8_BINARY, "𐐅", "𐐭", "𐐅"); + assertStringTrim(UTF8_LCASE, "𐐅", "𐐭", ""); + assertStringTrim(UNICODE, "𐐅", "𐐭", "𐐅"); + assertStringTrim(UNICODE_CI, "𐐅", "𐐭", ""); + assertStringTrim(UTF8_BINARY, "𝔸", "𝔸", ""); + assertStringTrim(UTF8_LCASE, "𝔸", "𝔸", ""); + assertStringTrim(UNICODE, "𝔸", "𝔸", ""); + assertStringTrim(UNICODE_CI, "𝔸", "𝔸", ""); + assertStringTrim(UTF8_BINARY, "𝔸", "A", "𝔸"); + assertStringTrim(UTF8_LCASE, "𝔸", "A", "𝔸"); + assertStringTrim(UNICODE, "𝔸", "A", "𝔸"); + assertStringTrim(UNICODE_CI, "𝔸", "A", ""); + assertStringTrim(UTF8_BINARY, "𝔸", "a", "𝔸"); + assertStringTrim(UTF8_LCASE, "𝔸", "a", "𝔸"); + assertStringTrim(UNICODE, "𝔸", "a", "𝔸"); + assertStringTrim(UNICODE_CI, "𝔸", "a", ""); } /** @@ -3078,277 +3079,277 @@ private void assertStringTrimLeft(String collationName, String sourceString, Str @Test public void testStringTrimLeft() throws SparkException { // Basic tests - UTF8_BINARY. - assertStringTrimLeft("UTF8_BINARY", "", "", ""); - assertStringTrimLeft("UTF8_BINARY", "", "xyz", ""); - assertStringTrimLeft("UTF8_BINARY", "asd", "", "asd"); - assertStringTrimLeft("UTF8_BINARY", "asd", null, "asd"); - assertStringTrimLeft("UTF8_BINARY", " asd ", null, "asd "); - assertStringTrimLeft("UTF8_BINARY", " a世a ", null, "a世a "); - assertStringTrimLeft("UTF8_BINARY", "asd", "x", "asd"); - assertStringTrimLeft("UTF8_BINARY", "xxasdxx", "x", "asdxx"); - assertStringTrimLeft("UTF8_BINARY", "xa世ax", "x", "a世ax"); + assertStringTrimLeft(UTF8_BINARY, "", "", ""); + assertStringTrimLeft(UTF8_BINARY, "", "xyz", ""); + assertStringTrimLeft(UTF8_BINARY, "asd", "", "asd"); + assertStringTrimLeft(UTF8_BINARY, "asd", null, "asd"); + assertStringTrimLeft(UTF8_BINARY, " asd ", null, "asd "); + assertStringTrimLeft(UTF8_BINARY, " a世a ", null, "a世a "); + assertStringTrimLeft(UTF8_BINARY, "asd", "x", "asd"); + assertStringTrimLeft(UTF8_BINARY, "xxasdxx", "x", "asdxx"); + assertStringTrimLeft(UTF8_BINARY, "xa世ax", "x", "a世ax"); // Basic tests - UTF8_LCASE. - assertStringTrimLeft("UTF8_LCASE", "", "", ""); - assertStringTrimLeft("UTF8_LCASE", "", "xyz", ""); - assertStringTrimLeft("UTF8_LCASE", "asd", "", "asd"); - assertStringTrimLeft("UTF8_LCASE", "asd", null, "asd"); - assertStringTrimLeft("UTF8_LCASE", " asd ", null, "asd "); - assertStringTrimLeft("UTF8_LCASE", " a世a ", null, "a世a "); - assertStringTrimLeft("UTF8_LCASE", "asd", "x", "asd"); - assertStringTrimLeft("UTF8_LCASE", "xxasdxx", "x", "asdxx"); - assertStringTrimLeft("UTF8_LCASE", "xa世ax", "x", "a世ax"); + assertStringTrimLeft(UTF8_LCASE, "", "", ""); + assertStringTrimLeft(UTF8_LCASE, "", "xyz", ""); + assertStringTrimLeft(UTF8_LCASE, "asd", "", "asd"); + assertStringTrimLeft(UTF8_LCASE, "asd", null, "asd"); + assertStringTrimLeft(UTF8_LCASE, " asd ", null, "asd "); + assertStringTrimLeft(UTF8_LCASE, " a世a ", null, "a世a "); + assertStringTrimLeft(UTF8_LCASE, "asd", "x", "asd"); + assertStringTrimLeft(UTF8_LCASE, "xxasdxx", "x", "asdxx"); + assertStringTrimLeft(UTF8_LCASE, "xa世ax", "x", "a世ax"); // Basic tests - UNICODE. - assertStringTrimLeft("UNICODE", "", "", ""); - assertStringTrimLeft("UNICODE", "", "xyz", ""); - assertStringTrimLeft("UNICODE", "asd", "", "asd"); - assertStringTrimLeft("UNICODE", "asd", null, "asd"); - assertStringTrimLeft("UNICODE", " asd ", null, "asd "); - assertStringTrimLeft("UNICODE", " a世a ", null, "a世a "); - assertStringTrimLeft("UNICODE", "asd", "x", "asd"); - assertStringTrimLeft("UNICODE", "xxasdxx", "x", "asdxx"); - assertStringTrimLeft("UNICODE", "xa世ax", "x", "a世ax"); + assertStringTrimLeft(UNICODE, "", "", ""); + assertStringTrimLeft(UNICODE, "", "xyz", ""); + assertStringTrimLeft(UNICODE, "asd", "", "asd"); + assertStringTrimLeft(UNICODE, "asd", null, "asd"); + assertStringTrimLeft(UNICODE, " asd ", null, "asd "); + assertStringTrimLeft(UNICODE, " a世a ", null, "a世a "); + assertStringTrimLeft(UNICODE, "asd", "x", "asd"); + assertStringTrimLeft(UNICODE, "xxasdxx", "x", "asdxx"); + assertStringTrimLeft(UNICODE, "xa世ax", "x", "a世ax"); // Basic tests - UNICODE_CI. - assertStringTrimLeft("UNICODE_CI", "", "", ""); - assertStringTrimLeft("UNICODE_CI", "", "xyz", ""); - assertStringTrimLeft("UNICODE_CI", "asd", "", "asd"); - assertStringTrimLeft("UNICODE_CI", "asd", null, "asd"); - assertStringTrimLeft("UNICODE_CI", " asd ", null, "asd "); - assertStringTrimLeft("UNICODE_CI", " a世a ", null, "a世a "); - assertStringTrimLeft("UNICODE_CI", "asd", "x", "asd"); - assertStringTrimLeft("UNICODE_CI", "xxasdxx", "x", "asdxx"); - assertStringTrimLeft("UNICODE_CI", "xa世ax", "x", "a世ax"); + assertStringTrimLeft(UNICODE_CI, "", "", ""); + assertStringTrimLeft(UNICODE_CI, "", "xyz", ""); + assertStringTrimLeft(UNICODE_CI, "asd", "", "asd"); + assertStringTrimLeft(UNICODE_CI, "asd", null, "asd"); + assertStringTrimLeft(UNICODE_CI, " asd ", null, "asd "); + assertStringTrimLeft(UNICODE_CI, " a世a ", null, "a世a "); + assertStringTrimLeft(UNICODE_CI, "asd", "x", "asd"); + assertStringTrimLeft(UNICODE_CI, "xxasdxx", "x", "asdxx"); + assertStringTrimLeft(UNICODE_CI, "xa世ax", "x", "a世ax"); // Case variation. - assertStringTrimLeft("UTF8_BINARY", "ddsXXXaa", "asd", "XXXaa"); - assertStringTrimLeft("UTF8_LCASE", "ddsXXXaa", "aSd", "XXXaa"); - assertStringTrimLeft("UNICODE", "ddsXXXaa", "asd", "XXXaa"); - assertStringTrimLeft("UNICODE_CI", "ddsXXXaa", "aSd", "XXXaa"); + assertStringTrimLeft(UTF8_BINARY, "ddsXXXaa", "asd", "XXXaa"); + assertStringTrimLeft(UTF8_LCASE, "ddsXXXaa", "aSd", "XXXaa"); + assertStringTrimLeft(UNICODE, "ddsXXXaa", "asd", "XXXaa"); + assertStringTrimLeft(UNICODE_CI, "ddsXXXaa", "aSd", "XXXaa"); // One-to-many case mapping (e.g. Turkish dotted I).. - assertStringTrimLeft("UTF8_BINARY", "ẞaaaẞ", "ß", "ẞaaaẞ"); - assertStringTrimLeft("UTF8_BINARY", "ßaaaß", "ẞ", "ßaaaß"); - assertStringTrimLeft("UTF8_BINARY", "Ëaaaẞ", "Ëẞ", "aaaẞ"); - assertStringTrimLeft("UTF8_LCASE", "ẞaaaẞ", "ß", "aaaẞ"); - assertStringTrimLeft("UTF8_LCASE", "ßaaaß", "ẞ", "aaaß"); - assertStringTrimLeft("UTF8_LCASE", "Ëaaaẞ", "Ëẞ", "aaaẞ"); - assertStringTrimLeft("UNICODE", "ẞaaaẞ", "ß", "ẞaaaẞ"); - assertStringTrimLeft("UNICODE", "ßaaaß", "ẞ", "ßaaaß"); - assertStringTrimLeft("UNICODE", "Ëaaaẞ", "Ëẞ", "aaaẞ"); - assertStringTrimLeft("UNICODE_CI", "ẞaaaẞ", "ß", "aaaẞ"); - assertStringTrimLeft("UNICODE_CI", "ßaaaß", "ẞ", "aaaß"); - assertStringTrimLeft("UNICODE_CI", "Ëaaaẞ", "Ëẞ", "aaaẞ"); + assertStringTrimLeft(UTF8_BINARY, "ẞaaaẞ", "ß", "ẞaaaẞ"); + assertStringTrimLeft(UTF8_BINARY, "ßaaaß", "ẞ", "ßaaaß"); + assertStringTrimLeft(UTF8_BINARY, "Ëaaaẞ", "Ëẞ", "aaaẞ"); + assertStringTrimLeft(UTF8_LCASE, "ẞaaaẞ", "ß", "aaaẞ"); + assertStringTrimLeft(UTF8_LCASE, "ßaaaß", "ẞ", "aaaß"); + assertStringTrimLeft(UTF8_LCASE, "Ëaaaẞ", "Ëẞ", "aaaẞ"); + assertStringTrimLeft(UNICODE, "ẞaaaẞ", "ß", "ẞaaaẞ"); + assertStringTrimLeft(UNICODE, "ßaaaß", "ẞ", "ßaaaß"); + assertStringTrimLeft(UNICODE, "Ëaaaẞ", "Ëẞ", "aaaẞ"); + assertStringTrimLeft(UNICODE_CI, "ẞaaaẞ", "ß", "aaaẞ"); + assertStringTrimLeft(UNICODE_CI, "ßaaaß", "ẞ", "aaaß"); + assertStringTrimLeft(UNICODE_CI, "Ëaaaẞ", "Ëẞ", "aaaẞ"); // One-to-many case mapping (e.g. Turkish dotted I). - assertStringTrimLeft("UTF8_BINARY", "i", "i", ""); - assertStringTrimLeft("UTF8_BINARY", "iii", "I", "iii"); - assertStringTrimLeft("UTF8_BINARY", "I", "iii", "I"); - assertStringTrimLeft("UTF8_BINARY", "ixi", "i", "xi"); - assertStringTrimLeft("UTF8_BINARY", "i", "İ", "i"); - assertStringTrimLeft("UTF8_BINARY", "i\u0307", "İ", "i\u0307"); - assertStringTrimLeft("UTF8_BINARY", "ii\u0307", "İi", "\u0307"); - assertStringTrimLeft("UTF8_BINARY", "iii\u0307", "İi", "\u0307"); - assertStringTrimLeft("UTF8_BINARY", "iiii\u0307", "iİ", "\u0307"); - assertStringTrimLeft("UTF8_BINARY", "ii\u0307ii\u0307", "iİ", "\u0307ii\u0307"); - assertStringTrimLeft("UTF8_BINARY", "i\u0307", "i", "\u0307"); - assertStringTrimLeft("UTF8_BINARY", "i\u0307", "\u0307", "i\u0307"); - assertStringTrimLeft("UTF8_BINARY", "i\u0307", "i\u0307", ""); - assertStringTrimLeft("UTF8_BINARY", "i\u0307i\u0307", "i\u0307", ""); - assertStringTrimLeft("UTF8_BINARY", "i\u0307\u0307", "i\u0307", ""); - assertStringTrimLeft("UTF8_BINARY", "i\u0307i", "i\u0307", ""); - assertStringTrimLeft("UTF8_BINARY", "i\u0307i", "İ", "i\u0307i"); - assertStringTrimLeft("UTF8_BINARY", "i\u0307İ", "i\u0307", "İ"); - assertStringTrimLeft("UTF8_BINARY", "i\u0307İ", "İ", "i\u0307İ"); - assertStringTrimLeft("UTF8_BINARY", "İ", "İ", ""); - assertStringTrimLeft("UTF8_BINARY", "IXi", "İ", "IXi"); - assertStringTrimLeft("UTF8_BINARY", "ix\u0307", "Ixİ", "ix\u0307"); - assertStringTrimLeft("UTF8_BINARY", "i\u0307x", "IXİ", "i\u0307x"); - assertStringTrimLeft("UTF8_BINARY", "i\u0307x", "ix\u0307İ", ""); - assertStringTrimLeft("UTF8_BINARY", "İ", "i", "İ"); - assertStringTrimLeft("UTF8_BINARY", "İ", "\u0307", "İ"); - assertStringTrimLeft("UTF8_BINARY", "Ixİ", "i\u0307", "Ixİ"); - assertStringTrimLeft("UTF8_BINARY", "IXİ", "ix\u0307", "IXİ"); - assertStringTrimLeft("UTF8_BINARY", "xi\u0307", "\u0307IX", "xi\u0307"); - assertStringTrimLeft("UTF8_LCASE", "i", "i", ""); - assertStringTrimLeft("UTF8_LCASE", "iii", "I", ""); - assertStringTrimLeft("UTF8_LCASE", "I", "iii", ""); - assertStringTrimLeft("UTF8_LCASE", "ixi", "i", "xi"); - assertStringTrimLeft("UTF8_LCASE", "i", "İ", "i"); - assertStringTrimLeft("UTF8_LCASE", "i\u0307", "İ", ""); - assertStringTrimLeft("UTF8_LCASE", "ii\u0307", "İi", ""); - assertStringTrimLeft("UTF8_LCASE", "iii\u0307", "İi", ""); - assertStringTrimLeft("UTF8_LCASE", "iiii\u0307", "iİ", ""); - assertStringTrimLeft("UTF8_LCASE", "ii\u0307ii\u0307", "iİ", ""); - assertStringTrimLeft("UTF8_LCASE", "i\u0307", "i", "\u0307"); - assertStringTrimLeft("UTF8_LCASE", "i\u0307", "\u0307", "i\u0307"); - assertStringTrimLeft("UTF8_LCASE", "i\u0307", "i\u0307", ""); - assertStringTrimLeft("UTF8_LCASE", "i\u0307i\u0307", "i\u0307", ""); - assertStringTrimLeft("UTF8_LCASE", "i\u0307\u0307", "i\u0307", ""); - assertStringTrimLeft("UTF8_LCASE", "i\u0307i", "i\u0307", ""); - assertStringTrimLeft("UTF8_LCASE", "i\u0307i", "İ", "i"); - assertStringTrimLeft("UTF8_LCASE", "i\u0307İ", "i\u0307", "İ"); - assertStringTrimLeft("UTF8_LCASE", "i\u0307İ", "İ", ""); - assertStringTrimLeft("UTF8_LCASE", "İ", "İ", ""); - assertStringTrimLeft("UTF8_LCASE", "IXi", "İ", "IXi"); - assertStringTrimLeft("UTF8_LCASE", "ix\u0307", "Ixİ", "\u0307"); - assertStringTrimLeft("UTF8_LCASE", "i\u0307x", "IXİ", ""); - assertStringTrimLeft("UTF8_LCASE", "i\u0307x", "I\u0307xİ", ""); - assertStringTrimLeft("UTF8_LCASE", "İ", "i", "İ"); - assertStringTrimLeft("UTF8_LCASE", "İ", "\u0307", "İ"); - assertStringTrimLeft("UTF8_LCASE", "Ixİ", "i\u0307", "xİ"); - assertStringTrimLeft("UTF8_LCASE", "IXİ", "ix\u0307", "İ"); - assertStringTrimLeft("UTF8_LCASE", "xi\u0307", "\u0307IX", ""); - assertStringTrimLeft("UNICODE", "i", "i", ""); - assertStringTrimLeft("UNICODE", "iii", "I", "iii"); - assertStringTrimLeft("UNICODE", "I", "iii", "I"); - assertStringTrimLeft("UNICODE", "ixi", "i", "xi"); - assertStringTrimLeft("UNICODE", "i", "İ", "i"); - assertStringTrimLeft("UNICODE", "i\u0307", "İ", "i\u0307"); - assertStringTrimLeft("UNICODE", "ii\u0307", "İi", "i\u0307"); - assertStringTrimLeft("UNICODE", "iii\u0307", "İi", "i\u0307"); - assertStringTrimLeft("UNICODE", "iiii\u0307", "iİ", "i\u0307"); - assertStringTrimLeft("UNICODE", "ii\u0307ii\u0307", "iİ", "i\u0307ii\u0307"); - assertStringTrimLeft("UNICODE", "i\u0307", "i", "i\u0307"); - assertStringTrimLeft("UNICODE", "i\u0307", "\u0307", "i\u0307"); - assertStringTrimLeft("UNICODE", "i\u0307", "i\u0307", "i\u0307"); - assertStringTrimLeft("UNICODE", "i\u0307i\u0307", "i\u0307", "i\u0307i\u0307"); - assertStringTrimLeft("UNICODE", "i\u0307\u0307", "i\u0307", "i\u0307\u0307"); - assertStringTrimLeft("UNICODE", "i\u0307i", "i\u0307", "i\u0307i"); - assertStringTrimLeft("UNICODE", "i\u0307i", "İ", "i\u0307i"); - assertStringTrimLeft("UNICODE", "i\u0307İ", "i\u0307", "i\u0307İ"); - assertStringTrimLeft("UNICODE", "i\u0307İ", "İ", "i\u0307İ"); - assertStringTrimLeft("UNICODE", "İ", "İ", ""); - assertStringTrimLeft("UNICODE", "IXi", "İ", "IXi"); - assertStringTrimLeft("UNICODE", "ix\u0307", "Ixİ", "ix\u0307"); - assertStringTrimLeft("UNICODE", "i\u0307x", "IXİ", "i\u0307x"); - assertStringTrimLeft("UNICODE", "i\u0307x", "ix\u0307İ", "i\u0307x"); - assertStringTrimLeft("UNICODE", "İ", "i", "İ"); - assertStringTrimLeft("UNICODE", "İ", "\u0307", "İ"); - assertStringTrimLeft("UNICODE", "i\u0307", "i\u0307", "i\u0307"); - assertStringTrimLeft("UNICODE", "Ixİ", "i\u0307", "Ixİ"); - assertStringTrimLeft("UNICODE", "IXİ", "ix\u0307", "IXİ"); - assertStringTrimLeft("UNICODE", "xi\u0307", "\u0307IX", "xi\u0307"); - assertStringTrimLeft("UNICODE_CI", "i", "i", ""); - assertStringTrimLeft("UNICODE_CI", "iii", "I", ""); - assertStringTrimLeft("UNICODE_CI", "I", "iii", ""); - assertStringTrimLeft("UNICODE_CI", "ixi", "i", "xi"); - assertStringTrimLeft("UNICODE_CI", "i", "İ", "i"); - assertStringTrimLeft("UNICODE_CI", "i\u0307", "İ", ""); - assertStringTrimLeft("UNICODE_CI", "ii\u0307", "İi", ""); - assertStringTrimLeft("UNICODE_CI", "iii\u0307", "İi", ""); - assertStringTrimLeft("UNICODE_CI", "iiii\u0307", "iİ", ""); - assertStringTrimLeft("UNICODE_CI", "ii\u0307ii\u0307", "iİ", ""); - assertStringTrimLeft("UNICODE_CI", "i\u0307", "i", "i\u0307"); - assertStringTrimLeft("UNICODE_CI", "i\u0307", "\u0307", "i\u0307"); - assertStringTrimLeft("UNICODE_CI", "i\u0307", "i\u0307", "i\u0307"); - assertStringTrimLeft("UNICODE_CI", "i\u0307i\u0307", "i\u0307", "i\u0307i\u0307"); - assertStringTrimLeft("UNICODE_CI", "i\u0307\u0307", "i\u0307", "i\u0307\u0307"); - assertStringTrimLeft("UNICODE_CI", "i\u0307i", "i\u0307", "i\u0307i"); - assertStringTrimLeft("UNICODE_CI", "i\u0307i", "İ", "i"); - assertStringTrimLeft("UNICODE_CI", "i\u0307İ", "i\u0307", "i\u0307İ"); - assertStringTrimLeft("UNICODE_CI", "i\u0307İ", "İ", ""); - assertStringTrimLeft("UNICODE_CI", "İ", "İ", ""); - assertStringTrimLeft("UNICODE_CI", "IXi", "İ", "IXi"); - assertStringTrimLeft("UNICODE_CI", "ix\u0307", "Ixİ", "x\u0307"); - assertStringTrimLeft("UNICODE_CI", "i\u0307x", "IXİ", ""); - assertStringTrimLeft("UNICODE_CI", "i\u0307x", "I\u0307xİ", ""); - assertStringTrimLeft("UNICODE_CI", "İ", "i", "İ"); - assertStringTrimLeft("UNICODE_CI", "İ", "\u0307", "İ"); - assertStringTrimLeft("UNICODE_CI", "i\u0307", "i\u0307", "i\u0307"); - assertStringTrimLeft("UNICODE_CI", "Ixİ", "i\u0307", "xİ"); - assertStringTrimLeft("UNICODE_CI", "IXİ", "ix\u0307", "İ"); - assertStringTrimLeft("UNICODE_CI", "xi\u0307", "\u0307IX", "i\u0307"); + assertStringTrimLeft(UTF8_BINARY, "i", "i", ""); + assertStringTrimLeft(UTF8_BINARY, "iii", "I", "iii"); + assertStringTrimLeft(UTF8_BINARY, "I", "iii", "I"); + assertStringTrimLeft(UTF8_BINARY, "ixi", "i", "xi"); + assertStringTrimLeft(UTF8_BINARY, "i", "İ", "i"); + assertStringTrimLeft(UTF8_BINARY, "i\u0307", "İ", "i\u0307"); + assertStringTrimLeft(UTF8_BINARY, "ii\u0307", "İi", "\u0307"); + assertStringTrimLeft(UTF8_BINARY, "iii\u0307", "İi", "\u0307"); + assertStringTrimLeft(UTF8_BINARY, "iiii\u0307", "iİ", "\u0307"); + assertStringTrimLeft(UTF8_BINARY, "ii\u0307ii\u0307", "iİ", "\u0307ii\u0307"); + assertStringTrimLeft(UTF8_BINARY, "i\u0307", "i", "\u0307"); + assertStringTrimLeft(UTF8_BINARY, "i\u0307", "\u0307", "i\u0307"); + assertStringTrimLeft(UTF8_BINARY, "i\u0307", "i\u0307", ""); + assertStringTrimLeft(UTF8_BINARY, "i\u0307i\u0307", "i\u0307", ""); + assertStringTrimLeft(UTF8_BINARY, "i\u0307\u0307", "i\u0307", ""); + assertStringTrimLeft(UTF8_BINARY, "i\u0307i", "i\u0307", ""); + assertStringTrimLeft(UTF8_BINARY, "i\u0307i", "İ", "i\u0307i"); + assertStringTrimLeft(UTF8_BINARY, "i\u0307İ", "i\u0307", "İ"); + assertStringTrimLeft(UTF8_BINARY, "i\u0307İ", "İ", "i\u0307İ"); + assertStringTrimLeft(UTF8_BINARY, "İ", "İ", ""); + assertStringTrimLeft(UTF8_BINARY, "IXi", "İ", "IXi"); + assertStringTrimLeft(UTF8_BINARY, "ix\u0307", "Ixİ", "ix\u0307"); + assertStringTrimLeft(UTF8_BINARY, "i\u0307x", "IXİ", "i\u0307x"); + assertStringTrimLeft(UTF8_BINARY, "i\u0307x", "ix\u0307İ", ""); + assertStringTrimLeft(UTF8_BINARY, "İ", "i", "İ"); + assertStringTrimLeft(UTF8_BINARY, "İ", "\u0307", "İ"); + assertStringTrimLeft(UTF8_BINARY, "Ixİ", "i\u0307", "Ixİ"); + assertStringTrimLeft(UTF8_BINARY, "IXİ", "ix\u0307", "IXİ"); + assertStringTrimLeft(UTF8_BINARY, "xi\u0307", "\u0307IX", "xi\u0307"); + assertStringTrimLeft(UTF8_LCASE, "i", "i", ""); + assertStringTrimLeft(UTF8_LCASE, "iii", "I", ""); + assertStringTrimLeft(UTF8_LCASE, "I", "iii", ""); + assertStringTrimLeft(UTF8_LCASE, "ixi", "i", "xi"); + assertStringTrimLeft(UTF8_LCASE, "i", "İ", "i"); + assertStringTrimLeft(UTF8_LCASE, "i\u0307", "İ", ""); + assertStringTrimLeft(UTF8_LCASE, "ii\u0307", "İi", ""); + assertStringTrimLeft(UTF8_LCASE, "iii\u0307", "İi", ""); + assertStringTrimLeft(UTF8_LCASE, "iiii\u0307", "iİ", ""); + assertStringTrimLeft(UTF8_LCASE, "ii\u0307ii\u0307", "iİ", ""); + assertStringTrimLeft(UTF8_LCASE, "i\u0307", "i", "\u0307"); + assertStringTrimLeft(UTF8_LCASE, "i\u0307", "\u0307", "i\u0307"); + assertStringTrimLeft(UTF8_LCASE, "i\u0307", "i\u0307", ""); + assertStringTrimLeft(UTF8_LCASE, "i\u0307i\u0307", "i\u0307", ""); + assertStringTrimLeft(UTF8_LCASE, "i\u0307\u0307", "i\u0307", ""); + assertStringTrimLeft(UTF8_LCASE, "i\u0307i", "i\u0307", ""); + assertStringTrimLeft(UTF8_LCASE, "i\u0307i", "İ", "i"); + assertStringTrimLeft(UTF8_LCASE, "i\u0307İ", "i\u0307", "İ"); + assertStringTrimLeft(UTF8_LCASE, "i\u0307İ", "İ", ""); + assertStringTrimLeft(UTF8_LCASE, "İ", "İ", ""); + assertStringTrimLeft(UTF8_LCASE, "IXi", "İ", "IXi"); + assertStringTrimLeft(UTF8_LCASE, "ix\u0307", "Ixİ", "\u0307"); + assertStringTrimLeft(UTF8_LCASE, "i\u0307x", "IXİ", ""); + assertStringTrimLeft(UTF8_LCASE, "i\u0307x", "I\u0307xİ", ""); + assertStringTrimLeft(UTF8_LCASE, "İ", "i", "İ"); + assertStringTrimLeft(UTF8_LCASE, "İ", "\u0307", "İ"); + assertStringTrimLeft(UTF8_LCASE, "Ixİ", "i\u0307", "xİ"); + assertStringTrimLeft(UTF8_LCASE, "IXİ", "ix\u0307", "İ"); + assertStringTrimLeft(UTF8_LCASE, "xi\u0307", "\u0307IX", ""); + assertStringTrimLeft(UNICODE, "i", "i", ""); + assertStringTrimLeft(UNICODE, "iii", "I", "iii"); + assertStringTrimLeft(UNICODE, "I", "iii", "I"); + assertStringTrimLeft(UNICODE, "ixi", "i", "xi"); + assertStringTrimLeft(UNICODE, "i", "İ", "i"); + assertStringTrimLeft(UNICODE, "i\u0307", "İ", "i\u0307"); + assertStringTrimLeft(UNICODE, "ii\u0307", "İi", "i\u0307"); + assertStringTrimLeft(UNICODE, "iii\u0307", "İi", "i\u0307"); + assertStringTrimLeft(UNICODE, "iiii\u0307", "iİ", "i\u0307"); + assertStringTrimLeft(UNICODE, "ii\u0307ii\u0307", "iİ", "i\u0307ii\u0307"); + assertStringTrimLeft(UNICODE, "i\u0307", "i", "i\u0307"); + assertStringTrimLeft(UNICODE, "i\u0307", "\u0307", "i\u0307"); + assertStringTrimLeft(UNICODE, "i\u0307", "i\u0307", "i\u0307"); + assertStringTrimLeft(UNICODE, "i\u0307i\u0307", "i\u0307", "i\u0307i\u0307"); + assertStringTrimLeft(UNICODE, "i\u0307\u0307", "i\u0307", "i\u0307\u0307"); + assertStringTrimLeft(UNICODE, "i\u0307i", "i\u0307", "i\u0307i"); + assertStringTrimLeft(UNICODE, "i\u0307i", "İ", "i\u0307i"); + assertStringTrimLeft(UNICODE, "i\u0307İ", "i\u0307", "i\u0307İ"); + assertStringTrimLeft(UNICODE, "i\u0307İ", "İ", "i\u0307İ"); + assertStringTrimLeft(UNICODE, "İ", "İ", ""); + assertStringTrimLeft(UNICODE, "IXi", "İ", "IXi"); + assertStringTrimLeft(UNICODE, "ix\u0307", "Ixİ", "ix\u0307"); + assertStringTrimLeft(UNICODE, "i\u0307x", "IXİ", "i\u0307x"); + assertStringTrimLeft(UNICODE, "i\u0307x", "ix\u0307İ", "i\u0307x"); + assertStringTrimLeft(UNICODE, "İ", "i", "İ"); + assertStringTrimLeft(UNICODE, "İ", "\u0307", "İ"); + assertStringTrimLeft(UNICODE, "i\u0307", "i\u0307", "i\u0307"); + assertStringTrimLeft(UNICODE, "Ixİ", "i\u0307", "Ixİ"); + assertStringTrimLeft(UNICODE, "IXİ", "ix\u0307", "IXİ"); + assertStringTrimLeft(UNICODE, "xi\u0307", "\u0307IX", "xi\u0307"); + assertStringTrimLeft(UNICODE_CI, "i", "i", ""); + assertStringTrimLeft(UNICODE_CI, "iii", "I", ""); + assertStringTrimLeft(UNICODE_CI, "I", "iii", ""); + assertStringTrimLeft(UNICODE_CI, "ixi", "i", "xi"); + assertStringTrimLeft(UNICODE_CI, "i", "İ", "i"); + assertStringTrimLeft(UNICODE_CI, "i\u0307", "İ", ""); + assertStringTrimLeft(UNICODE_CI, "ii\u0307", "İi", ""); + assertStringTrimLeft(UNICODE_CI, "iii\u0307", "İi", ""); + assertStringTrimLeft(UNICODE_CI, "iiii\u0307", "iİ", ""); + assertStringTrimLeft(UNICODE_CI, "ii\u0307ii\u0307", "iİ", ""); + assertStringTrimLeft(UNICODE_CI, "i\u0307", "i", "i\u0307"); + assertStringTrimLeft(UNICODE_CI, "i\u0307", "\u0307", "i\u0307"); + assertStringTrimLeft(UNICODE_CI, "i\u0307", "i\u0307", "i\u0307"); + assertStringTrimLeft(UNICODE_CI, "i\u0307i\u0307", "i\u0307", "i\u0307i\u0307"); + assertStringTrimLeft(UNICODE_CI, "i\u0307\u0307", "i\u0307", "i\u0307\u0307"); + assertStringTrimLeft(UNICODE_CI, "i\u0307i", "i\u0307", "i\u0307i"); + assertStringTrimLeft(UNICODE_CI, "i\u0307i", "İ", "i"); + assertStringTrimLeft(UNICODE_CI, "i\u0307İ", "i\u0307", "i\u0307İ"); + assertStringTrimLeft(UNICODE_CI, "i\u0307İ", "İ", ""); + assertStringTrimLeft(UNICODE_CI, "İ", "İ", ""); + assertStringTrimLeft(UNICODE_CI, "IXi", "İ", "IXi"); + assertStringTrimLeft(UNICODE_CI, "ix\u0307", "Ixİ", "x\u0307"); + assertStringTrimLeft(UNICODE_CI, "i\u0307x", "IXİ", ""); + assertStringTrimLeft(UNICODE_CI, "i\u0307x", "I\u0307xİ", ""); + assertStringTrimLeft(UNICODE_CI, "İ", "i", "İ"); + assertStringTrimLeft(UNICODE_CI, "İ", "\u0307", "İ"); + assertStringTrimLeft(UNICODE_CI, "i\u0307", "i\u0307", "i\u0307"); + assertStringTrimLeft(UNICODE_CI, "Ixİ", "i\u0307", "xİ"); + assertStringTrimLeft(UNICODE_CI, "IXİ", "ix\u0307", "İ"); + assertStringTrimLeft(UNICODE_CI, "xi\u0307", "\u0307IX", "i\u0307"); // Conditional case mapping (e.g. Greek sigmas). - assertStringTrimLeft("UTF8_BINARY", "ςxς", "σ", "ςxς"); - assertStringTrimLeft("UTF8_BINARY", "ςxς", "ς", "xς"); - assertStringTrimLeft("UTF8_BINARY", "ςxς", "Σ", "ςxς"); - assertStringTrimLeft("UTF8_BINARY", "σxσ", "σ", "xσ"); - assertStringTrimLeft("UTF8_BINARY", "σxσ", "ς", "σxσ"); - assertStringTrimLeft("UTF8_BINARY", "σxσ", "Σ", "σxσ"); - assertStringTrimLeft("UTF8_BINARY", "ΣxΣ", "σ", "ΣxΣ"); - assertStringTrimLeft("UTF8_BINARY", "ΣxΣ", "ς", "ΣxΣ"); - assertStringTrimLeft("UTF8_BINARY", "ΣxΣ", "Σ", "xΣ"); - assertStringTrimLeft("UTF8_LCASE", "ςxς", "σ", "xς"); - assertStringTrimLeft("UTF8_LCASE", "ςxς", "ς", "xς"); - assertStringTrimLeft("UTF8_LCASE", "ςxς", "Σ", "xς"); - assertStringTrimLeft("UTF8_LCASE", "σxσ", "σ", "xσ"); - assertStringTrimLeft("UTF8_LCASE", "σxσ", "ς", "xσ"); - assertStringTrimLeft("UTF8_LCASE", "σxσ", "Σ", "xσ"); - assertStringTrimLeft("UTF8_LCASE", "ΣxΣ", "σ", "xΣ"); - assertStringTrimLeft("UTF8_LCASE", "ΣxΣ", "ς", "xΣ"); - assertStringTrimLeft("UTF8_LCASE", "ΣxΣ", "Σ", "xΣ"); - assertStringTrimLeft("UNICODE", "ςxς", "σ", "ςxς"); - assertStringTrimLeft("UNICODE", "ςxς", "ς", "xς"); - assertStringTrimLeft("UNICODE", "ςxς", "Σ", "ςxς"); - assertStringTrimLeft("UNICODE", "σxσ", "σ", "xσ"); - assertStringTrimLeft("UNICODE", "σxσ", "ς", "σxσ"); - assertStringTrimLeft("UNICODE", "σxσ", "Σ", "σxσ"); - assertStringTrimLeft("UNICODE", "ΣxΣ", "σ", "ΣxΣ"); - assertStringTrimLeft("UNICODE", "ΣxΣ", "ς", "ΣxΣ"); - assertStringTrimLeft("UNICODE", "ΣxΣ", "Σ", "xΣ"); - assertStringTrimLeft("UNICODE_CI", "ςxς", "σ", "xς"); - assertStringTrimLeft("UNICODE_CI", "ςxς", "ς", "xς"); - assertStringTrimLeft("UNICODE_CI", "ςxς", "Σ", "xς"); - assertStringTrimLeft("UNICODE_CI", "σxσ", "σ", "xσ"); - assertStringTrimLeft("UNICODE_CI", "σxσ", "ς", "xσ"); - assertStringTrimLeft("UNICODE_CI", "σxσ", "Σ", "xσ"); - assertStringTrimLeft("UNICODE_CI", "ΣxΣ", "σ", "xΣ"); - assertStringTrimLeft("UNICODE_CI", "ΣxΣ", "ς", "xΣ"); - assertStringTrimLeft("UNICODE_CI", "ΣxΣ", "Σ", "xΣ"); + assertStringTrimLeft(UTF8_BINARY, "ςxς", "σ", "ςxς"); + assertStringTrimLeft(UTF8_BINARY, "ςxς", "ς", "xς"); + assertStringTrimLeft(UTF8_BINARY, "ςxς", "Σ", "ςxς"); + assertStringTrimLeft(UTF8_BINARY, "σxσ", "σ", "xσ"); + assertStringTrimLeft(UTF8_BINARY, "σxσ", "ς", "σxσ"); + assertStringTrimLeft(UTF8_BINARY, "σxσ", "Σ", "σxσ"); + assertStringTrimLeft(UTF8_BINARY, "ΣxΣ", "σ", "ΣxΣ"); + assertStringTrimLeft(UTF8_BINARY, "ΣxΣ", "ς", "ΣxΣ"); + assertStringTrimLeft(UTF8_BINARY, "ΣxΣ", "Σ", "xΣ"); + assertStringTrimLeft(UTF8_LCASE, "ςxς", "σ", "xς"); + assertStringTrimLeft(UTF8_LCASE, "ςxς", "ς", "xς"); + assertStringTrimLeft(UTF8_LCASE, "ςxς", "Σ", "xς"); + assertStringTrimLeft(UTF8_LCASE, "σxσ", "σ", "xσ"); + assertStringTrimLeft(UTF8_LCASE, "σxσ", "ς", "xσ"); + assertStringTrimLeft(UTF8_LCASE, "σxσ", "Σ", "xσ"); + assertStringTrimLeft(UTF8_LCASE, "ΣxΣ", "σ", "xΣ"); + assertStringTrimLeft(UTF8_LCASE, "ΣxΣ", "ς", "xΣ"); + assertStringTrimLeft(UTF8_LCASE, "ΣxΣ", "Σ", "xΣ"); + assertStringTrimLeft(UNICODE, "ςxς", "σ", "ςxς"); + assertStringTrimLeft(UNICODE, "ςxς", "ς", "xς"); + assertStringTrimLeft(UNICODE, "ςxς", "Σ", "ςxς"); + assertStringTrimLeft(UNICODE, "σxσ", "σ", "xσ"); + assertStringTrimLeft(UNICODE, "σxσ", "ς", "σxσ"); + assertStringTrimLeft(UNICODE, "σxσ", "Σ", "σxσ"); + assertStringTrimLeft(UNICODE, "ΣxΣ", "σ", "ΣxΣ"); + assertStringTrimLeft(UNICODE, "ΣxΣ", "ς", "ΣxΣ"); + assertStringTrimLeft(UNICODE, "ΣxΣ", "Σ", "xΣ"); + assertStringTrimLeft(UNICODE_CI, "ςxς", "σ", "xς"); + assertStringTrimLeft(UNICODE_CI, "ςxς", "ς", "xς"); + assertStringTrimLeft(UNICODE_CI, "ςxς", "Σ", "xς"); + assertStringTrimLeft(UNICODE_CI, "σxσ", "σ", "xσ"); + assertStringTrimLeft(UNICODE_CI, "σxσ", "ς", "xσ"); + assertStringTrimLeft(UNICODE_CI, "σxσ", "Σ", "xσ"); + assertStringTrimLeft(UNICODE_CI, "ΣxΣ", "σ", "xΣ"); + assertStringTrimLeft(UNICODE_CI, "ΣxΣ", "ς", "xΣ"); + assertStringTrimLeft(UNICODE_CI, "ΣxΣ", "Σ", "xΣ"); // Unicode normalization. - assertStringTrimLeft("UTF8_BINARY", "åβγδa\u030A", "å", "βγδa\u030A"); - assertStringTrimLeft("UTF8_LCASE", "åβγδa\u030A", "Å", "βγδa\u030A"); - assertStringTrimLeft("UNICODE", "åβγδa\u030A", "å", "βγδa\u030A"); - assertStringTrimLeft("UNICODE_CI", "åβγδa\u030A", "Å", "βγδa\u030A"); + assertStringTrimLeft(UTF8_BINARY, "åβγδa\u030A", "å", "βγδa\u030A"); + assertStringTrimLeft(UTF8_LCASE, "åβγδa\u030A", "Å", "βγδa\u030A"); + assertStringTrimLeft(UNICODE, "åβγδa\u030A", "å", "βγδa\u030A"); + assertStringTrimLeft(UNICODE_CI, "åβγδa\u030A", "Å", "βγδa\u030A"); // Surrogate pairs. - assertStringTrimLeft("UTF8_BINARY", "a🙃b🙃c", "🙃", "a🙃b🙃c"); - assertStringTrimLeft("UTF8_LCASE", "a🙃b🙃c", "🙃", "a🙃b🙃c"); - assertStringTrimLeft("UNICODE", "a🙃b🙃c", "🙃", "a🙃b🙃c"); - assertStringTrimLeft("UNICODE_CI", "a🙃b🙃c", "🙃", "a🙃b🙃c"); - assertStringTrimLeft("UTF8_BINARY", "a🙃b🙃c", "a", "🙃b🙃c"); - assertStringTrimLeft("UTF8_LCASE", "a🙃b🙃c", "a", "🙃b🙃c"); - assertStringTrimLeft("UNICODE", "a🙃b🙃c", "a", "🙃b🙃c"); - assertStringTrimLeft("UNICODE_CI", "a🙃b🙃c", "a", "🙃b🙃c"); - assertStringTrimLeft("UTF8_BINARY", "a🙃b🙃c", "a🙃", "b🙃c"); - assertStringTrimLeft("UTF8_LCASE", "a🙃b🙃c", "a🙃", "b🙃c"); - assertStringTrimLeft("UNICODE", "a🙃b🙃c", "a🙃", "b🙃c"); - assertStringTrimLeft("UNICODE_CI", "a🙃b🙃c", "a🙃", "b🙃c"); - assertStringTrimLeft("UTF8_BINARY", "a🙃b🙃c", "a🙃b", "c"); - assertStringTrimLeft("UTF8_LCASE", "a🙃b🙃c", "a🙃b", "c"); - assertStringTrimLeft("UNICODE", "a🙃b🙃c", "a🙃b", "c"); - assertStringTrimLeft("UNICODE_CI", "a🙃b🙃c", "a🙃b", "c"); - assertStringTrimLeft("UTF8_BINARY", "a🙃b🙃c", "abc🙃", ""); - assertStringTrimLeft("UTF8_LCASE", "a🙃b🙃c", "abc🙃", ""); - assertStringTrimLeft("UNICODE", "a🙃b🙃c", "abc🙃", ""); - assertStringTrimLeft("UNICODE_CI", "a🙃b🙃c", "abc🙃", ""); - assertStringTrimLeft("UTF8_BINARY", "😀😆😃😄", "😆😃", "😀😆😃😄"); - assertStringTrimLeft("UTF8_LCASE", "😀😆😃😄", "😆😃", "😀😆😃😄"); - assertStringTrimLeft("UNICODE", "😀😆😃😄", "😆😃", "😀😆😃😄"); - assertStringTrimLeft("UNICODE_CI", "😀😆😃😄", "😆😃", "😀😆😃😄"); - assertStringTrimLeft("UTF8_BINARY", "😀😆😃😄", "😀😆", "😃😄"); - assertStringTrimLeft("UTF8_LCASE", "😀😆😃😄", "😀😆", "😃😄"); - assertStringTrimLeft("UNICODE", "😀😆😃😄", "😀😆", "😃😄"); - assertStringTrimLeft("UNICODE_CI", "😀😆😃😄", "😀😆", "😃😄"); - assertStringTrimLeft("UTF8_BINARY", "😀😆😃😄", "😀😆😃😄", ""); - assertStringTrimLeft("UTF8_LCASE", "😀😆😃😄", "😀😆😃😄", ""); - assertStringTrimLeft("UNICODE", "😀😆😃😄", "😀😆😃😄", ""); - assertStringTrimLeft("UNICODE_CI", "😀😆😃😄", "😀😆😃😄", ""); - assertStringTrimLeft("UTF8_BINARY", "𐐅", "𐐅", ""); - assertStringTrimLeft("UTF8_LCASE", "𐐅", "𐐅", ""); - assertStringTrimLeft("UNICODE", "𐐅", "𐐅", ""); - assertStringTrimLeft("UNICODE_CI", "𐐅", "𐐅", ""); - assertStringTrimLeft("UTF8_BINARY", "𐐅", "𐐭", "𐐅"); - assertStringTrimLeft("UTF8_LCASE", "𐐅", "𐐭", ""); - assertStringTrimLeft("UNICODE", "𐐅", "𐐭", "𐐅"); - assertStringTrimLeft("UNICODE_CI", "𐐅", "𐐭", ""); - assertStringTrimLeft("UTF8_BINARY", "𝔸", "𝔸", ""); - assertStringTrimLeft("UTF8_LCASE", "𝔸", "𝔸", ""); - assertStringTrimLeft("UNICODE", "𝔸", "𝔸", ""); - assertStringTrimLeft("UNICODE_CI", "𝔸", "𝔸", ""); - assertStringTrimLeft("UTF8_BINARY", "𝔸", "A", "𝔸"); - assertStringTrimLeft("UTF8_LCASE", "𝔸", "A", "𝔸"); - assertStringTrimLeft("UNICODE", "𝔸", "A", "𝔸"); - assertStringTrimLeft("UNICODE_CI", "𝔸", "A", ""); - assertStringTrimLeft("UTF8_BINARY", "𝔸", "a", "𝔸"); - assertStringTrimLeft("UTF8_LCASE", "𝔸", "a", "𝔸"); - assertStringTrimLeft("UNICODE", "𝔸", "a", "𝔸"); - assertStringTrimLeft("UNICODE_CI", "𝔸", "a", ""); + assertStringTrimLeft(UTF8_BINARY, "a🙃b🙃c", "🙃", "a🙃b🙃c"); + assertStringTrimLeft(UTF8_LCASE, "a🙃b🙃c", "🙃", "a🙃b🙃c"); + assertStringTrimLeft(UNICODE, "a🙃b🙃c", "🙃", "a🙃b🙃c"); + assertStringTrimLeft(UNICODE_CI, "a🙃b🙃c", "🙃", "a🙃b🙃c"); + assertStringTrimLeft(UTF8_BINARY, "a🙃b🙃c", "a", "🙃b🙃c"); + assertStringTrimLeft(UTF8_LCASE, "a🙃b🙃c", "a", "🙃b🙃c"); + assertStringTrimLeft(UNICODE, "a🙃b🙃c", "a", "🙃b🙃c"); + assertStringTrimLeft(UNICODE_CI, "a🙃b🙃c", "a", "🙃b🙃c"); + assertStringTrimLeft(UTF8_BINARY, "a🙃b🙃c", "a🙃", "b🙃c"); + assertStringTrimLeft(UTF8_LCASE, "a🙃b🙃c", "a🙃", "b🙃c"); + assertStringTrimLeft(UNICODE, "a🙃b🙃c", "a🙃", "b🙃c"); + assertStringTrimLeft(UNICODE_CI, "a🙃b🙃c", "a🙃", "b🙃c"); + assertStringTrimLeft(UTF8_BINARY, "a🙃b🙃c", "a🙃b", "c"); + assertStringTrimLeft(UTF8_LCASE, "a🙃b🙃c", "a🙃b", "c"); + assertStringTrimLeft(UNICODE, "a🙃b🙃c", "a🙃b", "c"); + assertStringTrimLeft(UNICODE_CI, "a🙃b🙃c", "a🙃b", "c"); + assertStringTrimLeft(UTF8_BINARY, "a🙃b🙃c", "abc🙃", ""); + assertStringTrimLeft(UTF8_LCASE, "a🙃b🙃c", "abc🙃", ""); + assertStringTrimLeft(UNICODE, "a🙃b🙃c", "abc🙃", ""); + assertStringTrimLeft(UNICODE_CI, "a🙃b🙃c", "abc🙃", ""); + assertStringTrimLeft(UTF8_BINARY, "😀😆😃😄", "😆😃", "😀😆😃😄"); + assertStringTrimLeft(UTF8_LCASE, "😀😆😃😄", "😆😃", "😀😆😃😄"); + assertStringTrimLeft(UNICODE, "😀😆😃😄", "😆😃", "😀😆😃😄"); + assertStringTrimLeft(UNICODE_CI, "😀😆😃😄", "😆😃", "😀😆😃😄"); + assertStringTrimLeft(UTF8_BINARY, "😀😆😃😄", "😀😆", "😃😄"); + assertStringTrimLeft(UTF8_LCASE, "😀😆😃😄", "😀😆", "😃😄"); + assertStringTrimLeft(UNICODE, "😀😆😃😄", "😀😆", "😃😄"); + assertStringTrimLeft(UNICODE_CI, "😀😆😃😄", "😀😆", "😃😄"); + assertStringTrimLeft(UTF8_BINARY, "😀😆😃😄", "😀😆😃😄", ""); + assertStringTrimLeft(UTF8_LCASE, "😀😆😃😄", "😀😆😃😄", ""); + assertStringTrimLeft(UNICODE, "😀😆😃😄", "😀😆😃😄", ""); + assertStringTrimLeft(UNICODE_CI, "😀😆😃😄", "😀😆😃😄", ""); + assertStringTrimLeft(UTF8_BINARY, "𐐅", "𐐅", ""); + assertStringTrimLeft(UTF8_LCASE, "𐐅", "𐐅", ""); + assertStringTrimLeft(UNICODE, "𐐅", "𐐅", ""); + assertStringTrimLeft(UNICODE_CI, "𐐅", "𐐅", ""); + assertStringTrimLeft(UTF8_BINARY, "𐐅", "𐐭", "𐐅"); + assertStringTrimLeft(UTF8_LCASE, "𐐅", "𐐭", ""); + assertStringTrimLeft(UNICODE, "𐐅", "𐐭", "𐐅"); + assertStringTrimLeft(UNICODE_CI, "𐐅", "𐐭", ""); + assertStringTrimLeft(UTF8_BINARY, "𝔸", "𝔸", ""); + assertStringTrimLeft(UTF8_LCASE, "𝔸", "𝔸", ""); + assertStringTrimLeft(UNICODE, "𝔸", "𝔸", ""); + assertStringTrimLeft(UNICODE_CI, "𝔸", "𝔸", ""); + assertStringTrimLeft(UTF8_BINARY, "𝔸", "A", "𝔸"); + assertStringTrimLeft(UTF8_LCASE, "𝔸", "A", "𝔸"); + assertStringTrimLeft(UNICODE, "𝔸", "A", "𝔸"); + assertStringTrimLeft(UNICODE_CI, "𝔸", "A", ""); + assertStringTrimLeft(UTF8_BINARY, "𝔸", "a", "𝔸"); + assertStringTrimLeft(UTF8_LCASE, "𝔸", "a", "𝔸"); + assertStringTrimLeft(UNICODE, "𝔸", "a", "𝔸"); + assertStringTrimLeft(UNICODE_CI, "𝔸", "a", ""); } /** @@ -3378,274 +3379,274 @@ private void assertStringTrimRight(String collationName, String sourceString, St @Test public void testStringTrimRight() throws SparkException { // Basic tests. - assertStringTrimRight("UTF8_BINARY", "", "", ""); - assertStringTrimRight("UTF8_BINARY", "", "xyz", ""); - assertStringTrimRight("UTF8_BINARY", "asd", "", "asd"); - assertStringTrimRight("UTF8_BINARY", "asd", null, "asd"); - assertStringTrimRight("UTF8_BINARY", " asd ", null, " asd"); - assertStringTrimRight("UTF8_BINARY", " a世a ", null, " a世a"); - assertStringTrimRight("UTF8_BINARY", "asd", "x", "asd"); - assertStringTrimRight("UTF8_BINARY", "xxasdxx", "x", "xxasd"); - assertStringTrimRight("UTF8_BINARY", "xa世ax", "x", "xa世a"); - assertStringTrimRight("UTF8_LCASE", "", "", ""); - assertStringTrimRight("UTF8_LCASE", "", "xyz", ""); - assertStringTrimRight("UTF8_LCASE", "asd", "", "asd"); - assertStringTrimRight("UTF8_LCASE", "asd", null, "asd"); - assertStringTrimRight("UTF8_LCASE", " asd ", null, " asd"); - assertStringTrimRight("UTF8_LCASE", " a世a ", null, " a世a"); - assertStringTrimRight("UTF8_LCASE", "asd", "x", "asd"); - assertStringTrimRight("UTF8_LCASE", "xxasdxx", "x", "xxasd"); - assertStringTrimRight("UTF8_LCASE", "xa世ax", "x", "xa世a"); - assertStringTrimRight("UNICODE", "", "", ""); - assertStringTrimRight("UNICODE", "", "xyz", ""); - assertStringTrimRight("UNICODE", "asd", "", "asd"); - assertStringTrimRight("UNICODE", "asd", null, "asd"); - assertStringTrimRight("UNICODE", " asd ", null, " asd"); - assertStringTrimRight("UNICODE", " a世a ", null, " a世a"); - assertStringTrimRight("UNICODE", "asd", "x", "asd"); - assertStringTrimRight("UNICODE", "xxasdxx", "x", "xxasd"); - assertStringTrimRight("UNICODE", "xa世ax", "x", "xa世a"); - assertStringTrimRight("UNICODE_CI", "", "", ""); - assertStringTrimRight("UNICODE_CI", "", "xyz", ""); - assertStringTrimRight("UNICODE_CI", "asd", "", "asd"); - assertStringTrimRight("UNICODE_CI", "asd", null, "asd"); - assertStringTrimRight("UNICODE_CI", " asd ", null, " asd"); - assertStringTrimRight("UNICODE_CI", " a世a ", null, " a世a"); - assertStringTrimRight("UNICODE_CI", "asd", "x", "asd"); - assertStringTrimRight("UNICODE_CI", "xxasdxx", "x", "xxasd"); - assertStringTrimRight("UNICODE_CI", "xa世ax", "x", "xa世a"); + assertStringTrimRight(UTF8_BINARY, "", "", ""); + assertStringTrimRight(UTF8_BINARY, "", "xyz", ""); + assertStringTrimRight(UTF8_BINARY, "asd", "", "asd"); + assertStringTrimRight(UTF8_BINARY, "asd", null, "asd"); + assertStringTrimRight(UTF8_BINARY, " asd ", null, " asd"); + assertStringTrimRight(UTF8_BINARY, " a世a ", null, " a世a"); + assertStringTrimRight(UTF8_BINARY, "asd", "x", "asd"); + assertStringTrimRight(UTF8_BINARY, "xxasdxx", "x", "xxasd"); + assertStringTrimRight(UTF8_BINARY, "xa世ax", "x", "xa世a"); + assertStringTrimRight(UTF8_LCASE, "", "", ""); + assertStringTrimRight(UTF8_LCASE, "", "xyz", ""); + assertStringTrimRight(UTF8_LCASE, "asd", "", "asd"); + assertStringTrimRight(UTF8_LCASE, "asd", null, "asd"); + assertStringTrimRight(UTF8_LCASE, " asd ", null, " asd"); + assertStringTrimRight(UTF8_LCASE, " a世a ", null, " a世a"); + assertStringTrimRight(UTF8_LCASE, "asd", "x", "asd"); + assertStringTrimRight(UTF8_LCASE, "xxasdxx", "x", "xxasd"); + assertStringTrimRight(UTF8_LCASE, "xa世ax", "x", "xa世a"); + assertStringTrimRight(UNICODE, "", "", ""); + assertStringTrimRight(UNICODE, "", "xyz", ""); + assertStringTrimRight(UNICODE, "asd", "", "asd"); + assertStringTrimRight(UNICODE, "asd", null, "asd"); + assertStringTrimRight(UNICODE, " asd ", null, " asd"); + assertStringTrimRight(UNICODE, " a世a ", null, " a世a"); + assertStringTrimRight(UNICODE, "asd", "x", "asd"); + assertStringTrimRight(UNICODE, "xxasdxx", "x", "xxasd"); + assertStringTrimRight(UNICODE, "xa世ax", "x", "xa世a"); + assertStringTrimRight(UNICODE_CI, "", "", ""); + assertStringTrimRight(UNICODE_CI, "", "xyz", ""); + assertStringTrimRight(UNICODE_CI, "asd", "", "asd"); + assertStringTrimRight(UNICODE_CI, "asd", null, "asd"); + assertStringTrimRight(UNICODE_CI, " asd ", null, " asd"); + assertStringTrimRight(UNICODE_CI, " a世a ", null, " a世a"); + assertStringTrimRight(UNICODE_CI, "asd", "x", "asd"); + assertStringTrimRight(UNICODE_CI, "xxasdxx", "x", "xxasd"); + assertStringTrimRight(UNICODE_CI, "xa世ax", "x", "xa世a"); // Case variation. - assertStringTrimRight("UTF8_BINARY", "ddsXXXaa", "asd", "ddsXXX"); - assertStringTrimRight("UTF8_LCASE", "ddsXXXaa", "AsD", "ddsXXX"); - assertStringTrimRight("UNICODE", "ddsXXXaa", "asd", "ddsXXX"); - assertStringTrimRight("UNICODE_CI", "ddsXXXaa", "AsD", "ddsXXX"); + assertStringTrimRight(UTF8_BINARY, "ddsXXXaa", "asd", "ddsXXX"); + assertStringTrimRight(UTF8_LCASE, "ddsXXXaa", "AsD", "ddsXXX"); + assertStringTrimRight(UNICODE, "ddsXXXaa", "asd", "ddsXXX"); + assertStringTrimRight(UNICODE_CI, "ddsXXXaa", "AsD", "ddsXXX"); // One-to-many case mapping (e.g. Turkish dotted I).. - assertStringTrimRight("UTF8_BINARY", "ẞaaaẞ", "ß", "ẞaaaẞ"); - assertStringTrimRight("UTF8_BINARY", "ßaaaß", "ẞ", "ßaaaß"); - assertStringTrimRight("UTF8_BINARY", "Ëaaaẞ", "Ëẞ", "Ëaaa"); - assertStringTrimRight("UTF8_LCASE", "ẞaaaẞ", "ß", "ẞaaa"); - assertStringTrimRight("UTF8_LCASE", "ßaaaß", "ẞ", "ßaaa"); - assertStringTrimRight("UTF8_LCASE", "Ëaaaẞ", "Ëẞ", "Ëaaa"); - assertStringTrimRight("UNICODE", "ẞaaaẞ", "ß", "ẞaaaẞ"); - assertStringTrimRight("UNICODE", "ßaaaß", "ẞ", "ßaaaß"); - assertStringTrimRight("UNICODE", "Ëaaaẞ", "Ëẞ", "Ëaaa"); - assertStringTrimRight("UNICODE_CI", "ẞaaaẞ", "ß", "ẞaaa"); - assertStringTrimRight("UNICODE_CI", "ßaaaß", "ẞ", "ßaaa"); - assertStringTrimRight("UNICODE_CI", "Ëaaaẞ", "Ëẞ", "Ëaaa"); + assertStringTrimRight(UTF8_BINARY, "ẞaaaẞ", "ß", "ẞaaaẞ"); + assertStringTrimRight(UTF8_BINARY, "ßaaaß", "ẞ", "ßaaaß"); + assertStringTrimRight(UTF8_BINARY, "Ëaaaẞ", "Ëẞ", "Ëaaa"); + assertStringTrimRight(UTF8_LCASE, "ẞaaaẞ", "ß", "ẞaaa"); + assertStringTrimRight(UTF8_LCASE, "ßaaaß", "ẞ", "ßaaa"); + assertStringTrimRight(UTF8_LCASE, "Ëaaaẞ", "Ëẞ", "Ëaaa"); + assertStringTrimRight(UNICODE, "ẞaaaẞ", "ß", "ẞaaaẞ"); + assertStringTrimRight(UNICODE, "ßaaaß", "ẞ", "ßaaaß"); + assertStringTrimRight(UNICODE, "Ëaaaẞ", "Ëẞ", "Ëaaa"); + assertStringTrimRight(UNICODE_CI, "ẞaaaẞ", "ß", "ẞaaa"); + assertStringTrimRight(UNICODE_CI, "ßaaaß", "ẞ", "ßaaa"); + assertStringTrimRight(UNICODE_CI, "Ëaaaẞ", "Ëẞ", "Ëaaa"); // One-to-many case mapping (e.g. Turkish dotted I). - assertStringTrimRight("UTF8_BINARY", "i", "i", ""); - assertStringTrimRight("UTF8_BINARY", "iii", "I", "iii"); - assertStringTrimRight("UTF8_BINARY", "I", "iii", "I"); - assertStringTrimRight("UTF8_BINARY", "ixi", "i", "ix"); - assertStringTrimRight("UTF8_BINARY", "i", "İ", "i"); - assertStringTrimRight("UTF8_BINARY", "i\u0307", "İ", "i\u0307"); - assertStringTrimRight("UTF8_BINARY", "ii\u0307", "İi", "ii\u0307"); - assertStringTrimRight("UTF8_BINARY", "iii\u0307", "İi", "iii\u0307"); - assertStringTrimRight("UTF8_BINARY", "iiii\u0307", "iİ", "iiii\u0307"); - assertStringTrimRight("UTF8_BINARY", "ii\u0307ii\u0307", "iİ", "ii\u0307ii\u0307"); - assertStringTrimRight("UTF8_BINARY", "i\u0307", "i", "i\u0307"); - assertStringTrimRight("UTF8_BINARY", "i\u0307", "\u0307", "i"); - assertStringTrimRight("UTF8_BINARY", "i\u0307", "i\u0307", ""); - assertStringTrimRight("UTF8_BINARY", "i\u0307i\u0307", "i\u0307", ""); - assertStringTrimRight("UTF8_BINARY", "i\u0307\u0307", "i\u0307", ""); - assertStringTrimRight("UTF8_BINARY", "i\u0307i", "i\u0307", ""); - assertStringTrimRight("UTF8_BINARY", "i\u0307i", "İ", "i\u0307i"); - assertStringTrimRight("UTF8_BINARY", "i\u0307İ", "i\u0307", "i\u0307İ"); - assertStringTrimRight("UTF8_BINARY", "i\u0307İ", "İ", "i\u0307"); - assertStringTrimRight("UTF8_BINARY", "İ", "İ", ""); - assertStringTrimRight("UTF8_BINARY", "IXi", "İ", "IXi"); - assertStringTrimRight("UTF8_BINARY", "ix\u0307", "Ixİ", "ix\u0307"); - assertStringTrimRight("UTF8_BINARY", "i\u0307x", "IXİ", "i\u0307x"); - assertStringTrimRight("UTF8_BINARY", "i\u0307x", "ix\u0307İ", ""); - assertStringTrimRight("UTF8_BINARY", "İ", "i", "İ"); - assertStringTrimRight("UTF8_BINARY", "İ", "\u0307", "İ"); - assertStringTrimRight("UTF8_BINARY", "Ixİ", "i\u0307", "Ixİ"); - assertStringTrimRight("UTF8_BINARY", "IXİ", "ix\u0307", "IXİ"); - assertStringTrimRight("UTF8_BINARY", "xi\u0307", "\u0307IX", "xi"); - assertStringTrimRight("UTF8_LCASE", "i", "i", ""); - assertStringTrimRight("UTF8_LCASE", "iii", "I", ""); - assertStringTrimRight("UTF8_LCASE", "I", "iii", ""); - assertStringTrimRight("UTF8_LCASE", "ixi", "i", "ix"); - assertStringTrimRight("UTF8_LCASE", "i", "İ", "i"); - assertStringTrimRight("UTF8_LCASE", "i\u0307", "İ", ""); - assertStringTrimRight("UTF8_LCASE", "ii\u0307", "İi", ""); - assertStringTrimRight("UTF8_LCASE", "iii\u0307", "İi", ""); - assertStringTrimRight("UTF8_LCASE", "iiii\u0307", "iİ", ""); - assertStringTrimRight("UTF8_LCASE", "ii\u0307ii\u0307", "iİ", ""); - assertStringTrimRight("UTF8_LCASE", "i\u0307", "i", "i\u0307"); - assertStringTrimRight("UTF8_LCASE", "i\u0307", "\u0307", "i"); - assertStringTrimRight("UTF8_LCASE", "i\u0307", "i\u0307", ""); - assertStringTrimRight("UTF8_LCASE", "i\u0307i\u0307", "i\u0307", ""); - assertStringTrimRight("UTF8_LCASE", "i\u0307\u0307", "i\u0307", ""); - assertStringTrimRight("UTF8_LCASE", "i\u0307i", "i\u0307", ""); - assertStringTrimRight("UTF8_LCASE", "i\u0307i", "İ", "i\u0307i"); - assertStringTrimRight("UTF8_LCASE", "i\u0307İ", "i\u0307", "i\u0307İ"); - assertStringTrimRight("UTF8_LCASE", "i\u0307İ", "İ", ""); - assertStringTrimRight("UTF8_LCASE", "İ", "İ", ""); - assertStringTrimRight("UTF8_LCASE", "IXi", "İ", "IXi"); - assertStringTrimRight("UTF8_LCASE", "ix\u0307", "Ixİ", "ix\u0307"); - assertStringTrimRight("UTF8_LCASE", "i\u0307x", "IXİ", ""); - assertStringTrimRight("UTF8_LCASE", "i\u0307x", "I\u0307xİ", ""); - assertStringTrimRight("UTF8_LCASE", "İ", "i", "İ"); - assertStringTrimRight("UTF8_LCASE", "İ", "\u0307", "İ"); - assertStringTrimRight("UTF8_LCASE", "Ixİ", "i\u0307", "Ixİ"); - assertStringTrimRight("UTF8_LCASE", "IXİ", "ix\u0307", "IXİ"); - assertStringTrimRight("UTF8_LCASE", "xi\u0307", "\u0307IX", ""); - assertStringTrimRight("UNICODE", "i", "i", ""); - assertStringTrimRight("UNICODE", "iii", "I", "iii"); - assertStringTrimRight("UNICODE", "I", "iii", "I"); - assertStringTrimRight("UNICODE", "ixi", "i", "ix"); - assertStringTrimRight("UNICODE", "i", "İ", "i"); - assertStringTrimRight("UNICODE", "i\u0307", "İ", "i\u0307"); - assertStringTrimRight("UTF8_BINARY", "ii\u0307", "İi", "ii\u0307"); - assertStringTrimRight("UTF8_BINARY", "iii\u0307", "İi", "iii\u0307"); - assertStringTrimRight("UTF8_BINARY", "iiii\u0307", "iİ", "iiii\u0307"); - assertStringTrimRight("UTF8_BINARY", "ii\u0307ii\u0307", "iİ", "ii\u0307ii\u0307"); - assertStringTrimRight("UNICODE", "i\u0307", "i", "i\u0307"); - assertStringTrimRight("UNICODE", "i\u0307", "\u0307", "i\u0307"); - assertStringTrimRight("UNICODE", "i\u0307", "i\u0307", "i\u0307"); - assertStringTrimRight("UNICODE", "i\u0307i\u0307", "i\u0307", "i\u0307i\u0307"); - assertStringTrimRight("UNICODE", "i\u0307\u0307", "i\u0307", "i\u0307\u0307"); - assertStringTrimRight("UNICODE", "i\u0307i", "i\u0307", "i\u0307"); - assertStringTrimRight("UNICODE", "i\u0307i", "İ", "i\u0307i"); - assertStringTrimRight("UNICODE", "i\u0307İ", "i\u0307", "i\u0307İ"); - assertStringTrimRight("UNICODE", "i\u0307İ", "İ", "i\u0307"); - assertStringTrimRight("UNICODE", "İ", "İ", ""); - assertStringTrimRight("UNICODE", "IXi", "İ", "IXi"); - assertStringTrimRight("UNICODE", "ix\u0307", "Ixİ", "ix\u0307"); - assertStringTrimRight("UNICODE", "i\u0307x", "IXİ", "i\u0307x"); - assertStringTrimRight("UNICODE", "i\u0307x", "ix\u0307İ", "i\u0307"); - assertStringTrimRight("UNICODE", "İ", "i", "İ"); - assertStringTrimRight("UNICODE", "İ", "\u0307", "İ"); - assertStringTrimRight("UNICODE", "i\u0307", "i\u0307", "i\u0307"); - assertStringTrimRight("UNICODE", "Ixİ", "i\u0307", "Ixİ"); - assertStringTrimRight("UNICODE", "IXİ", "ix\u0307", "IXİ"); - assertStringTrimRight("UNICODE", "xi\u0307", "\u0307IX", "xi\u0307"); - assertStringTrimRight("UNICODE_CI", "i", "i", ""); - assertStringTrimRight("UNICODE_CI", "iii", "I", ""); - assertStringTrimRight("UNICODE_CI", "I", "iii", ""); - assertStringTrimRight("UNICODE_CI", "ixi", "i", "ix"); - assertStringTrimRight("UNICODE_CI", "i", "İ", "i"); - assertStringTrimRight("UNICODE_CI", "i\u0307", "İ", ""); - assertStringTrimRight("UNICODE_CI", "ii\u0307", "İi", ""); - assertStringTrimRight("UNICODE_CI", "iii\u0307", "İi", ""); - assertStringTrimRight("UNICODE_CI", "iiii\u0307", "iİ", ""); - assertStringTrimRight("UNICODE_CI", "ii\u0307ii\u0307", "iİ", ""); - assertStringTrimRight("UNICODE_CI", "i\u0307", "i", "i\u0307"); - assertStringTrimRight("UNICODE_CI", "i\u0307", "\u0307", "i\u0307"); - assertStringTrimRight("UNICODE_CI", "i\u0307", "i\u0307", "i\u0307"); - assertStringTrimRight("UNICODE_CI", "i\u0307i\u0307", "i\u0307", "i\u0307i\u0307"); - assertStringTrimRight("UNICODE_CI", "i\u0307\u0307", "i\u0307", "i\u0307\u0307"); - assertStringTrimRight("UNICODE_CI", "i\u0307i", "i\u0307", "i\u0307"); - assertStringTrimRight("UNICODE_CI", "i\u0307i", "İ", "i\u0307i"); - assertStringTrimRight("UNICODE_CI", "i\u0307İ", "i\u0307", "i\u0307İ"); - assertStringTrimRight("UNICODE_CI", "i\u0307İ", "İ", ""); - assertStringTrimRight("UNICODE_CI", "İ", "İ", ""); - assertStringTrimRight("UNICODE_CI", "IXi", "İ", "IXi"); - assertStringTrimRight("UNICODE_CI", "ix\u0307", "Ixİ", "ix\u0307"); - assertStringTrimRight("UNICODE_CI", "i\u0307x", "IXİ", ""); - assertStringTrimRight("UNICODE_CI", "i\u0307x", "I\u0307xİ", ""); - assertStringTrimRight("UNICODE_CI", "İ", "i", "İ"); - assertStringTrimRight("UNICODE_CI", "İ", "\u0307", "İ"); - assertStringTrimRight("UNICODE_CI", "i\u0307", "i\u0307", "i\u0307"); - assertStringTrimRight("UNICODE_CI", "Ixİ", "i\u0307", "Ixİ"); - assertStringTrimRight("UNICODE_CI", "IXİ", "ix\u0307", "IXİ"); - assertStringTrimRight("UNICODE_CI", "xi\u0307", "\u0307IX", "xi\u0307"); + assertStringTrimRight(UTF8_BINARY, "i", "i", ""); + assertStringTrimRight(UTF8_BINARY, "iii", "I", "iii"); + assertStringTrimRight(UTF8_BINARY, "I", "iii", "I"); + assertStringTrimRight(UTF8_BINARY, "ixi", "i", "ix"); + assertStringTrimRight(UTF8_BINARY, "i", "İ", "i"); + assertStringTrimRight(UTF8_BINARY, "i\u0307", "İ", "i\u0307"); + assertStringTrimRight(UTF8_BINARY, "ii\u0307", "İi", "ii\u0307"); + assertStringTrimRight(UTF8_BINARY, "iii\u0307", "İi", "iii\u0307"); + assertStringTrimRight(UTF8_BINARY, "iiii\u0307", "iİ", "iiii\u0307"); + assertStringTrimRight(UTF8_BINARY, "ii\u0307ii\u0307", "iİ", "ii\u0307ii\u0307"); + assertStringTrimRight(UTF8_BINARY, "i\u0307", "i", "i\u0307"); + assertStringTrimRight(UTF8_BINARY, "i\u0307", "\u0307", "i"); + assertStringTrimRight(UTF8_BINARY, "i\u0307", "i\u0307", ""); + assertStringTrimRight(UTF8_BINARY, "i\u0307i\u0307", "i\u0307", ""); + assertStringTrimRight(UTF8_BINARY, "i\u0307\u0307", "i\u0307", ""); + assertStringTrimRight(UTF8_BINARY, "i\u0307i", "i\u0307", ""); + assertStringTrimRight(UTF8_BINARY, "i\u0307i", "İ", "i\u0307i"); + assertStringTrimRight(UTF8_BINARY, "i\u0307İ", "i\u0307", "i\u0307İ"); + assertStringTrimRight(UTF8_BINARY, "i\u0307İ", "İ", "i\u0307"); + assertStringTrimRight(UTF8_BINARY, "İ", "İ", ""); + assertStringTrimRight(UTF8_BINARY, "IXi", "İ", "IXi"); + assertStringTrimRight(UTF8_BINARY, "ix\u0307", "Ixİ", "ix\u0307"); + assertStringTrimRight(UTF8_BINARY, "i\u0307x", "IXİ", "i\u0307x"); + assertStringTrimRight(UTF8_BINARY, "i\u0307x", "ix\u0307İ", ""); + assertStringTrimRight(UTF8_BINARY, "İ", "i", "İ"); + assertStringTrimRight(UTF8_BINARY, "İ", "\u0307", "İ"); + assertStringTrimRight(UTF8_BINARY, "Ixİ", "i\u0307", "Ixİ"); + assertStringTrimRight(UTF8_BINARY, "IXİ", "ix\u0307", "IXİ"); + assertStringTrimRight(UTF8_BINARY, "xi\u0307", "\u0307IX", "xi"); + assertStringTrimRight(UTF8_LCASE, "i", "i", ""); + assertStringTrimRight(UTF8_LCASE, "iii", "I", ""); + assertStringTrimRight(UTF8_LCASE, "I", "iii", ""); + assertStringTrimRight(UTF8_LCASE, "ixi", "i", "ix"); + assertStringTrimRight(UTF8_LCASE, "i", "İ", "i"); + assertStringTrimRight(UTF8_LCASE, "i\u0307", "İ", ""); + assertStringTrimRight(UTF8_LCASE, "ii\u0307", "İi", ""); + assertStringTrimRight(UTF8_LCASE, "iii\u0307", "İi", ""); + assertStringTrimRight(UTF8_LCASE, "iiii\u0307", "iİ", ""); + assertStringTrimRight(UTF8_LCASE, "ii\u0307ii\u0307", "iİ", ""); + assertStringTrimRight(UTF8_LCASE, "i\u0307", "i", "i\u0307"); + assertStringTrimRight(UTF8_LCASE, "i\u0307", "\u0307", "i"); + assertStringTrimRight(UTF8_LCASE, "i\u0307", "i\u0307", ""); + assertStringTrimRight(UTF8_LCASE, "i\u0307i\u0307", "i\u0307", ""); + assertStringTrimRight(UTF8_LCASE, "i\u0307\u0307", "i\u0307", ""); + assertStringTrimRight(UTF8_LCASE, "i\u0307i", "i\u0307", ""); + assertStringTrimRight(UTF8_LCASE, "i\u0307i", "İ", "i\u0307i"); + assertStringTrimRight(UTF8_LCASE, "i\u0307İ", "i\u0307", "i\u0307İ"); + assertStringTrimRight(UTF8_LCASE, "i\u0307İ", "İ", ""); + assertStringTrimRight(UTF8_LCASE, "İ", "İ", ""); + assertStringTrimRight(UTF8_LCASE, "IXi", "İ", "IXi"); + assertStringTrimRight(UTF8_LCASE, "ix\u0307", "Ixİ", "ix\u0307"); + assertStringTrimRight(UTF8_LCASE, "i\u0307x", "IXİ", ""); + assertStringTrimRight(UTF8_LCASE, "i\u0307x", "I\u0307xİ", ""); + assertStringTrimRight(UTF8_LCASE, "İ", "i", "İ"); + assertStringTrimRight(UTF8_LCASE, "İ", "\u0307", "İ"); + assertStringTrimRight(UTF8_LCASE, "Ixİ", "i\u0307", "Ixİ"); + assertStringTrimRight(UTF8_LCASE, "IXİ", "ix\u0307", "IXİ"); + assertStringTrimRight(UTF8_LCASE, "xi\u0307", "\u0307IX", ""); + assertStringTrimRight(UNICODE, "i", "i", ""); + assertStringTrimRight(UNICODE, "iii", "I", "iii"); + assertStringTrimRight(UNICODE, "I", "iii", "I"); + assertStringTrimRight(UNICODE, "ixi", "i", "ix"); + assertStringTrimRight(UNICODE, "i", "İ", "i"); + assertStringTrimRight(UNICODE, "i\u0307", "İ", "i\u0307"); + assertStringTrimRight(UTF8_BINARY, "ii\u0307", "İi", "ii\u0307"); + assertStringTrimRight(UTF8_BINARY, "iii\u0307", "İi", "iii\u0307"); + assertStringTrimRight(UTF8_BINARY, "iiii\u0307", "iİ", "iiii\u0307"); + assertStringTrimRight(UTF8_BINARY, "ii\u0307ii\u0307", "iİ", "ii\u0307ii\u0307"); + assertStringTrimRight(UNICODE, "i\u0307", "i", "i\u0307"); + assertStringTrimRight(UNICODE, "i\u0307", "\u0307", "i\u0307"); + assertStringTrimRight(UNICODE, "i\u0307", "i\u0307", "i\u0307"); + assertStringTrimRight(UNICODE, "i\u0307i\u0307", "i\u0307", "i\u0307i\u0307"); + assertStringTrimRight(UNICODE, "i\u0307\u0307", "i\u0307", "i\u0307\u0307"); + assertStringTrimRight(UNICODE, "i\u0307i", "i\u0307", "i\u0307"); + assertStringTrimRight(UNICODE, "i\u0307i", "İ", "i\u0307i"); + assertStringTrimRight(UNICODE, "i\u0307İ", "i\u0307", "i\u0307İ"); + assertStringTrimRight(UNICODE, "i\u0307İ", "İ", "i\u0307"); + assertStringTrimRight(UNICODE, "İ", "İ", ""); + assertStringTrimRight(UNICODE, "IXi", "İ", "IXi"); + assertStringTrimRight(UNICODE, "ix\u0307", "Ixİ", "ix\u0307"); + assertStringTrimRight(UNICODE, "i\u0307x", "IXİ", "i\u0307x"); + assertStringTrimRight(UNICODE, "i\u0307x", "ix\u0307İ", "i\u0307"); + assertStringTrimRight(UNICODE, "İ", "i", "İ"); + assertStringTrimRight(UNICODE, "İ", "\u0307", "İ"); + assertStringTrimRight(UNICODE, "i\u0307", "i\u0307", "i\u0307"); + assertStringTrimRight(UNICODE, "Ixİ", "i\u0307", "Ixİ"); + assertStringTrimRight(UNICODE, "IXİ", "ix\u0307", "IXİ"); + assertStringTrimRight(UNICODE, "xi\u0307", "\u0307IX", "xi\u0307"); + assertStringTrimRight(UNICODE_CI, "i", "i", ""); + assertStringTrimRight(UNICODE_CI, "iii", "I", ""); + assertStringTrimRight(UNICODE_CI, "I", "iii", ""); + assertStringTrimRight(UNICODE_CI, "ixi", "i", "ix"); + assertStringTrimRight(UNICODE_CI, "i", "İ", "i"); + assertStringTrimRight(UNICODE_CI, "i\u0307", "İ", ""); + assertStringTrimRight(UNICODE_CI, "ii\u0307", "İi", ""); + assertStringTrimRight(UNICODE_CI, "iii\u0307", "İi", ""); + assertStringTrimRight(UNICODE_CI, "iiii\u0307", "iİ", ""); + assertStringTrimRight(UNICODE_CI, "ii\u0307ii\u0307", "iİ", ""); + assertStringTrimRight(UNICODE_CI, "i\u0307", "i", "i\u0307"); + assertStringTrimRight(UNICODE_CI, "i\u0307", "\u0307", "i\u0307"); + assertStringTrimRight(UNICODE_CI, "i\u0307", "i\u0307", "i\u0307"); + assertStringTrimRight(UNICODE_CI, "i\u0307i\u0307", "i\u0307", "i\u0307i\u0307"); + assertStringTrimRight(UNICODE_CI, "i\u0307\u0307", "i\u0307", "i\u0307\u0307"); + assertStringTrimRight(UNICODE_CI, "i\u0307i", "i\u0307", "i\u0307"); + assertStringTrimRight(UNICODE_CI, "i\u0307i", "İ", "i\u0307i"); + assertStringTrimRight(UNICODE_CI, "i\u0307İ", "i\u0307", "i\u0307İ"); + assertStringTrimRight(UNICODE_CI, "i\u0307İ", "İ", ""); + assertStringTrimRight(UNICODE_CI, "İ", "İ", ""); + assertStringTrimRight(UNICODE_CI, "IXi", "İ", "IXi"); + assertStringTrimRight(UNICODE_CI, "ix\u0307", "Ixİ", "ix\u0307"); + assertStringTrimRight(UNICODE_CI, "i\u0307x", "IXİ", ""); + assertStringTrimRight(UNICODE_CI, "i\u0307x", "I\u0307xİ", ""); + assertStringTrimRight(UNICODE_CI, "İ", "i", "İ"); + assertStringTrimRight(UNICODE_CI, "İ", "\u0307", "İ"); + assertStringTrimRight(UNICODE_CI, "i\u0307", "i\u0307", "i\u0307"); + assertStringTrimRight(UNICODE_CI, "Ixİ", "i\u0307", "Ixİ"); + assertStringTrimRight(UNICODE_CI, "IXİ", "ix\u0307", "IXİ"); + assertStringTrimRight(UNICODE_CI, "xi\u0307", "\u0307IX", "xi\u0307"); // Conditional case mapping (e.g. Greek sigmas). - assertStringTrimRight("UTF8_BINARY", "ςxς", "σ", "ςxς"); - assertStringTrimRight("UTF8_BINARY", "ςxς", "ς", "ςx"); - assertStringTrimRight("UTF8_BINARY", "ςxς", "Σ", "ςxς"); - assertStringTrimRight("UTF8_BINARY", "σxσ", "σ", "σx"); - assertStringTrimRight("UTF8_BINARY", "σxσ", "ς", "σxσ"); - assertStringTrimRight("UTF8_BINARY", "σxσ", "Σ", "σxσ"); - assertStringTrimRight("UTF8_BINARY", "ΣxΣ", "σ", "ΣxΣ"); - assertStringTrimRight("UTF8_BINARY", "ΣxΣ", "ς", "ΣxΣ"); - assertStringTrimRight("UTF8_BINARY", "ΣxΣ", "Σ", "Σx"); - assertStringTrimRight("UTF8_LCASE", "ςxς", "σ", "ςx"); - assertStringTrimRight("UTF8_LCASE", "ςxς", "ς", "ςx"); - assertStringTrimRight("UTF8_LCASE", "ςxς", "Σ", "ςx"); - assertStringTrimRight("UTF8_LCASE", "σxσ", "σ", "σx"); - assertStringTrimRight("UTF8_LCASE", "σxσ", "ς", "σx"); - assertStringTrimRight("UTF8_LCASE", "σxσ", "Σ", "σx"); - assertStringTrimRight("UTF8_LCASE", "ΣxΣ", "σ", "Σx"); - assertStringTrimRight("UTF8_LCASE", "ΣxΣ", "ς", "Σx"); - assertStringTrimRight("UTF8_LCASE", "ΣxΣ", "Σ", "Σx"); - assertStringTrimRight("UNICODE", "ςxς", "σ", "ςxς"); - assertStringTrimRight("UNICODE", "ςxς", "ς", "ςx"); - assertStringTrimRight("UNICODE", "ςxς", "Σ", "ςxς"); - assertStringTrimRight("UNICODE", "σxσ", "σ", "σx"); - assertStringTrimRight("UNICODE", "σxσ", "ς", "σxσ"); - assertStringTrimRight("UNICODE", "σxσ", "Σ", "σxσ"); - assertStringTrimRight("UNICODE", "ΣxΣ", "σ", "ΣxΣ"); - assertStringTrimRight("UNICODE", "ΣxΣ", "ς", "ΣxΣ"); - assertStringTrimRight("UNICODE", "ΣxΣ", "Σ", "Σx"); - assertStringTrimRight("UNICODE_CI", "ςxς", "σ", "ςx"); - assertStringTrimRight("UNICODE_CI", "ςxς", "ς", "ςx"); - assertStringTrimRight("UNICODE_CI", "ςxς", "Σ", "ςx"); - assertStringTrimRight("UNICODE_CI", "σxσ", "σ", "σx"); - assertStringTrimRight("UNICODE_CI", "σxσ", "ς", "σx"); - assertStringTrimRight("UNICODE_CI", "σxσ", "Σ", "σx"); - assertStringTrimRight("UNICODE_CI", "ΣxΣ", "σ", "Σx"); - assertStringTrimRight("UNICODE_CI", "ΣxΣ", "ς", "Σx"); - assertStringTrimRight("UNICODE_CI", "ΣxΣ", "Σ", "Σx"); + assertStringTrimRight(UTF8_BINARY, "ςxς", "σ", "ςxς"); + assertStringTrimRight(UTF8_BINARY, "ςxς", "ς", "ςx"); + assertStringTrimRight(UTF8_BINARY, "ςxς", "Σ", "ςxς"); + assertStringTrimRight(UTF8_BINARY, "σxσ", "σ", "σx"); + assertStringTrimRight(UTF8_BINARY, "σxσ", "ς", "σxσ"); + assertStringTrimRight(UTF8_BINARY, "σxσ", "Σ", "σxσ"); + assertStringTrimRight(UTF8_BINARY, "ΣxΣ", "σ", "ΣxΣ"); + assertStringTrimRight(UTF8_BINARY, "ΣxΣ", "ς", "ΣxΣ"); + assertStringTrimRight(UTF8_BINARY, "ΣxΣ", "Σ", "Σx"); + assertStringTrimRight(UTF8_LCASE, "ςxς", "σ", "ςx"); + assertStringTrimRight(UTF8_LCASE, "ςxς", "ς", "ςx"); + assertStringTrimRight(UTF8_LCASE, "ςxς", "Σ", "ςx"); + assertStringTrimRight(UTF8_LCASE, "σxσ", "σ", "σx"); + assertStringTrimRight(UTF8_LCASE, "σxσ", "ς", "σx"); + assertStringTrimRight(UTF8_LCASE, "σxσ", "Σ", "σx"); + assertStringTrimRight(UTF8_LCASE, "ΣxΣ", "σ", "Σx"); + assertStringTrimRight(UTF8_LCASE, "ΣxΣ", "ς", "Σx"); + assertStringTrimRight(UTF8_LCASE, "ΣxΣ", "Σ", "Σx"); + assertStringTrimRight(UNICODE, "ςxς", "σ", "ςxς"); + assertStringTrimRight(UNICODE, "ςxς", "ς", "ςx"); + assertStringTrimRight(UNICODE, "ςxς", "Σ", "ςxς"); + assertStringTrimRight(UNICODE, "σxσ", "σ", "σx"); + assertStringTrimRight(UNICODE, "σxσ", "ς", "σxσ"); + assertStringTrimRight(UNICODE, "σxσ", "Σ", "σxσ"); + assertStringTrimRight(UNICODE, "ΣxΣ", "σ", "ΣxΣ"); + assertStringTrimRight(UNICODE, "ΣxΣ", "ς", "ΣxΣ"); + assertStringTrimRight(UNICODE, "ΣxΣ", "Σ", "Σx"); + assertStringTrimRight(UNICODE_CI, "ςxς", "σ", "ςx"); + assertStringTrimRight(UNICODE_CI, "ςxς", "ς", "ςx"); + assertStringTrimRight(UNICODE_CI, "ςxς", "Σ", "ςx"); + assertStringTrimRight(UNICODE_CI, "σxσ", "σ", "σx"); + assertStringTrimRight(UNICODE_CI, "σxσ", "ς", "σx"); + assertStringTrimRight(UNICODE_CI, "σxσ", "Σ", "σx"); + assertStringTrimRight(UNICODE_CI, "ΣxΣ", "σ", "Σx"); + assertStringTrimRight(UNICODE_CI, "ΣxΣ", "ς", "Σx"); + assertStringTrimRight(UNICODE_CI, "ΣxΣ", "Σ", "Σx"); // Unicode normalization. - assertStringTrimRight("UTF8_BINARY", "åβγδa\u030A", "å", "åβγδa\u030A"); - assertStringTrimRight("UTF8_LCASE", "åβγδa\u030A", "Å", "åβγδa\u030A"); - assertStringTrimRight("UNICODE", "åβγδa\u030A", "å", "åβγδ"); - assertStringTrimRight("UNICODE_CI", "åβγδa\u030A", "Å", "åβγδ"); + assertStringTrimRight(UTF8_BINARY, "åβγδa\u030A", "å", "åβγδa\u030A"); + assertStringTrimRight(UTF8_LCASE, "åβγδa\u030A", "Å", "åβγδa\u030A"); + assertStringTrimRight(UNICODE, "åβγδa\u030A", "å", "åβγδ"); + assertStringTrimRight(UNICODE_CI, "åβγδa\u030A", "Å", "åβγδ"); // Surrogate pairs. - assertStringTrimRight("UTF8_BINARY", "a🙃b🙃c", "🙃", "a🙃b🙃c"); - assertStringTrimRight("UTF8_LCASE", "a🙃b🙃c", "🙃", "a🙃b🙃c"); - assertStringTrimRight("UNICODE", "a🙃b🙃c", "🙃", "a🙃b🙃c"); - assertStringTrimRight("UNICODE_CI", "a🙃b🙃c", "🙃", "a🙃b🙃c"); - assertStringTrimRight("UTF8_BINARY", "a🙃b🙃c", "c", "a🙃b🙃"); - assertStringTrimRight("UTF8_LCASE", "a🙃b🙃c", "c", "a🙃b🙃"); - assertStringTrimRight("UNICODE", "a🙃b🙃c", "c", "a🙃b🙃"); - assertStringTrimRight("UNICODE_CI", "a🙃b🙃c", "c", "a🙃b🙃"); - assertStringTrimRight("UTF8_BINARY", "a🙃b🙃c", "c🙃", "a🙃b"); - assertStringTrimRight("UTF8_LCASE", "a🙃b🙃c", "c🙃", "a🙃b"); - assertStringTrimRight("UNICODE", "a🙃b🙃c", "c🙃", "a🙃b"); - assertStringTrimRight("UNICODE_CI", "a🙃b🙃c", "c🙃", "a🙃b"); - assertStringTrimRight("UTF8_BINARY", "a🙃b🙃c", "c🙃b", "a"); - assertStringTrimRight("UTF8_LCASE", "a🙃b🙃c", "c🙃b", "a"); - assertStringTrimRight("UNICODE", "a🙃b🙃c", "c🙃b", "a"); - assertStringTrimRight("UNICODE_CI", "a🙃b🙃c", "c🙃b", "a"); - assertStringTrimRight("UTF8_BINARY", "a🙃b🙃c", "abc🙃", ""); - assertStringTrimRight("UTF8_LCASE", "a🙃b🙃c", "abc🙃", ""); - assertStringTrimRight("UNICODE", "a🙃b🙃c", "abc🙃", ""); - assertStringTrimRight("UNICODE_CI", "a🙃b🙃c", "abc🙃", ""); - assertStringTrimRight("UTF8_BINARY", "😀😆😃😄", "😆😃", "😀😆😃😄"); - assertStringTrimRight("UTF8_LCASE", "😀😆😃😄", "😆😃", "😀😆😃😄"); - assertStringTrimRight("UNICODE", "😀😆😃😄", "😆😃", "😀😆😃😄"); - assertStringTrimRight("UNICODE_CI", "😀😆😃😄", "😆😃", "😀😆😃😄"); - assertStringTrimRight("UTF8_BINARY", "😀😆😃😄", "😃😄", "😀😆"); - assertStringTrimRight("UTF8_LCASE", "😀😆😃😄", "😃😄", "😀😆"); - assertStringTrimRight("UNICODE", "😀😆😃😄", "😃😄", "😀😆"); - assertStringTrimRight("UNICODE_CI", "😀😆😃😄", "😃😄", "😀😆"); - assertStringTrimRight("UTF8_BINARY", "😀😆😃😄", "😀😆😃😄", ""); - assertStringTrimRight("UTF8_LCASE", "😀😆😃😄", "😀😆😃😄", ""); - assertStringTrimRight("UNICODE", "😀😆😃😄", "😀😆😃😄", ""); - assertStringTrimRight("UNICODE_CI", "😀😆😃😄", "😀😆😃😄", ""); - assertStringTrimRight("UTF8_BINARY", "𐐅", "𐐅", ""); - assertStringTrimRight("UTF8_LCASE", "𐐅", "𐐅", ""); - assertStringTrimRight("UNICODE", "𐐅", "𐐅", ""); - assertStringTrimRight("UNICODE_CI", "𐐅", "𐐅", ""); - assertStringTrimRight("UTF8_BINARY", "𐐅", "𐐭", "𐐅"); - assertStringTrimRight("UTF8_LCASE", "𐐅", "𐐭", ""); - assertStringTrimRight("UNICODE", "𐐅", "𐐭", "𐐅"); - assertStringTrimRight("UNICODE_CI", "𐐅", "𐐭", ""); - assertStringTrimRight("UTF8_BINARY", "𝔸", "𝔸", ""); - assertStringTrimRight("UTF8_LCASE", "𝔸", "𝔸", ""); - assertStringTrimRight("UNICODE", "𝔸", "𝔸", ""); - assertStringTrimRight("UNICODE_CI", "𝔸", "𝔸", ""); - assertStringTrimRight("UTF8_BINARY", "𝔸", "A", "𝔸"); - assertStringTrimRight("UTF8_LCASE", "𝔸", "A", "𝔸"); - assertStringTrimRight("UNICODE", "𝔸", "A", "𝔸"); - assertStringTrimRight("UNICODE_CI", "𝔸", "A", ""); - assertStringTrimRight("UTF8_BINARY", "𝔸", "a", "𝔸"); - assertStringTrimRight("UTF8_LCASE", "𝔸", "a", "𝔸"); - assertStringTrimRight("UNICODE", "𝔸", "a", "𝔸"); - assertStringTrimRight("UNICODE_CI", "𝔸", "a", ""); + assertStringTrimRight(UTF8_BINARY, "a🙃b🙃c", "🙃", "a🙃b🙃c"); + assertStringTrimRight(UTF8_LCASE, "a🙃b🙃c", "🙃", "a🙃b🙃c"); + assertStringTrimRight(UNICODE, "a🙃b🙃c", "🙃", "a🙃b🙃c"); + assertStringTrimRight(UNICODE_CI, "a🙃b🙃c", "🙃", "a🙃b🙃c"); + assertStringTrimRight(UTF8_BINARY, "a🙃b🙃c", "c", "a🙃b🙃"); + assertStringTrimRight(UTF8_LCASE, "a🙃b🙃c", "c", "a🙃b🙃"); + assertStringTrimRight(UNICODE, "a🙃b🙃c", "c", "a🙃b🙃"); + assertStringTrimRight(UNICODE_CI, "a🙃b🙃c", "c", "a🙃b🙃"); + assertStringTrimRight(UTF8_BINARY, "a🙃b🙃c", "c🙃", "a🙃b"); + assertStringTrimRight(UTF8_LCASE, "a🙃b🙃c", "c🙃", "a🙃b"); + assertStringTrimRight(UNICODE, "a🙃b🙃c", "c🙃", "a🙃b"); + assertStringTrimRight(UNICODE_CI, "a🙃b🙃c", "c🙃", "a🙃b"); + assertStringTrimRight(UTF8_BINARY, "a🙃b🙃c", "c🙃b", "a"); + assertStringTrimRight(UTF8_LCASE, "a🙃b🙃c", "c🙃b", "a"); + assertStringTrimRight(UNICODE, "a🙃b🙃c", "c🙃b", "a"); + assertStringTrimRight(UNICODE_CI, "a🙃b🙃c", "c🙃b", "a"); + assertStringTrimRight(UTF8_BINARY, "a🙃b🙃c", "abc🙃", ""); + assertStringTrimRight(UTF8_LCASE, "a🙃b🙃c", "abc🙃", ""); + assertStringTrimRight(UNICODE, "a🙃b🙃c", "abc🙃", ""); + assertStringTrimRight(UNICODE_CI, "a🙃b🙃c", "abc🙃", ""); + assertStringTrimRight(UTF8_BINARY, "😀😆😃😄", "😆😃", "😀😆😃😄"); + assertStringTrimRight(UTF8_LCASE, "😀😆😃😄", "😆😃", "😀😆😃😄"); + assertStringTrimRight(UNICODE, "😀😆😃😄", "😆😃", "😀😆😃😄"); + assertStringTrimRight(UNICODE_CI, "😀😆😃😄", "😆😃", "😀😆😃😄"); + assertStringTrimRight(UTF8_BINARY, "😀😆😃😄", "😃😄", "😀😆"); + assertStringTrimRight(UTF8_LCASE, "😀😆😃😄", "😃😄", "😀😆"); + assertStringTrimRight(UNICODE, "😀😆😃😄", "😃😄", "😀😆"); + assertStringTrimRight(UNICODE_CI, "😀😆😃😄", "😃😄", "😀😆"); + assertStringTrimRight(UTF8_BINARY, "😀😆😃😄", "😀😆😃😄", ""); + assertStringTrimRight(UTF8_LCASE, "😀😆😃😄", "😀😆😃😄", ""); + assertStringTrimRight(UNICODE, "😀😆😃😄", "😀😆😃😄", ""); + assertStringTrimRight(UNICODE_CI, "😀😆😃😄", "😀😆😃😄", ""); + assertStringTrimRight(UTF8_BINARY, "𐐅", "𐐅", ""); + assertStringTrimRight(UTF8_LCASE, "𐐅", "𐐅", ""); + assertStringTrimRight(UNICODE, "𐐅", "𐐅", ""); + assertStringTrimRight(UNICODE_CI, "𐐅", "𐐅", ""); + assertStringTrimRight(UTF8_BINARY, "𐐅", "𐐭", "𐐅"); + assertStringTrimRight(UTF8_LCASE, "𐐅", "𐐭", ""); + assertStringTrimRight(UNICODE, "𐐅", "𐐭", "𐐅"); + assertStringTrimRight(UNICODE_CI, "𐐅", "𐐭", ""); + assertStringTrimRight(UTF8_BINARY, "𝔸", "𝔸", ""); + assertStringTrimRight(UTF8_LCASE, "𝔸", "𝔸", ""); + assertStringTrimRight(UNICODE, "𝔸", "𝔸", ""); + assertStringTrimRight(UNICODE_CI, "𝔸", "𝔸", ""); + assertStringTrimRight(UTF8_BINARY, "𝔸", "A", "𝔸"); + assertStringTrimRight(UTF8_LCASE, "𝔸", "A", "𝔸"); + assertStringTrimRight(UNICODE, "𝔸", "A", "𝔸"); + assertStringTrimRight(UNICODE_CI, "𝔸", "A", ""); + assertStringTrimRight(UTF8_BINARY, "𝔸", "a", "𝔸"); + assertStringTrimRight(UTF8_LCASE, "𝔸", "a", "𝔸"); + assertStringTrimRight(UNICODE, "𝔸", "a", "𝔸"); + assertStringTrimRight(UNICODE_CI, "𝔸", "a", ""); } /** @@ -3664,211 +3665,211 @@ private void assertStringTranslate(String inputString, String matchingString, @Test public void testStringTranslate() throws SparkException { // Empty strings. - assertStringTranslate("", "", "", "UTF8_BINARY", ""); - assertStringTranslate("", "", "", "UTF8_LCASE", ""); - assertStringTranslate("", "", "", "UNICODE", ""); - assertStringTranslate("", "", "", "UNICODE_CI", ""); - assertStringTranslate("abc", "", "", "UTF8_BINARY", "abc"); - assertStringTranslate("abc", "", "", "UTF8_LCASE", "abc"); - assertStringTranslate("abc", "", "", "UNICODE", "abc"); - assertStringTranslate("abc", "", "", "UNICODE_CI", "abc"); - assertStringTranslate("", "b", "", "UTF8_BINARY", ""); - assertStringTranslate("", "b", "", "UTF8_LCASE", ""); - assertStringTranslate("", "b", "", "UNICODE", ""); - assertStringTranslate("", "b", "", "UNICODE_CI", ""); - assertStringTranslate("", "", "x", "UTF8_BINARY", ""); - assertStringTranslate("", "", "x", "UTF8_LCASE", ""); - assertStringTranslate("", "", "x", "UNICODE", ""); - assertStringTranslate("", "", "x", "UNICODE_CI", ""); - assertStringTranslate("abc", "b", "", "UTF8_BINARY", "ac"); - assertStringTranslate("abc", "b", "", "UTF8_LCASE", "ac"); - assertStringTranslate("abc", "b", "", "UNICODE", "ac"); - assertStringTranslate("abc", "b", "", "UNICODE_CI", "ac"); - assertStringTranslate("abc", "", "x", "UTF8_BINARY", "abc"); - assertStringTranslate("abc", "", "x", "UTF8_LCASE", "abc"); - assertStringTranslate("abc", "", "x", "UNICODE", "abc"); - assertStringTranslate("abc", "", "x", "UNICODE_CI", "abc"); - assertStringTranslate("", "b", "x", "UTF8_BINARY", ""); - assertStringTranslate("", "b", "x", "UTF8_LCASE", ""); - assertStringTranslate("", "b", "x", "UNICODE", ""); - assertStringTranslate("", "b", "x", "UNICODE_CI", ""); + assertStringTranslate("", "", "", UTF8_BINARY, ""); + assertStringTranslate("", "", "", UTF8_LCASE, ""); + assertStringTranslate("", "", "", UNICODE, ""); + assertStringTranslate("", "", "", UNICODE_CI, ""); + assertStringTranslate("abc", "", "", UTF8_BINARY, "abc"); + assertStringTranslate("abc", "", "", UTF8_LCASE, "abc"); + assertStringTranslate("abc", "", "", UNICODE, "abc"); + assertStringTranslate("abc", "", "", UNICODE_CI, "abc"); + assertStringTranslate("", "b", "", UTF8_BINARY, ""); + assertStringTranslate("", "b", "", UTF8_LCASE, ""); + assertStringTranslate("", "b", "", UNICODE, ""); + assertStringTranslate("", "b", "", UNICODE_CI, ""); + assertStringTranslate("", "", "x", UTF8_BINARY, ""); + assertStringTranslate("", "", "x", UTF8_LCASE, ""); + assertStringTranslate("", "", "x", UNICODE, ""); + assertStringTranslate("", "", "x", UNICODE_CI, ""); + assertStringTranslate("abc", "b", "", UTF8_BINARY, "ac"); + assertStringTranslate("abc", "b", "", UTF8_LCASE, "ac"); + assertStringTranslate("abc", "b", "", UNICODE, "ac"); + assertStringTranslate("abc", "b", "", UNICODE_CI, "ac"); + assertStringTranslate("abc", "", "x", UTF8_BINARY, "abc"); + assertStringTranslate("abc", "", "x", UTF8_LCASE, "abc"); + assertStringTranslate("abc", "", "x", UNICODE, "abc"); + assertStringTranslate("abc", "", "x", UNICODE_CI, "abc"); + assertStringTranslate("", "b", "x", UTF8_BINARY, ""); + assertStringTranslate("", "b", "x", UTF8_LCASE, ""); + assertStringTranslate("", "b", "x", UNICODE, ""); + assertStringTranslate("", "b", "x", UNICODE_CI, ""); // Basic tests. - assertStringTranslate("abc", "b", "x", "UTF8_BINARY", "axc"); - assertStringTranslate("abc", "b", "x", "UTF8_LCASE", "axc"); - assertStringTranslate("abc", "b", "x", "UNICODE", "axc"); - assertStringTranslate("abc", "b", "x", "UNICODE_CI", "axc"); - assertStringTranslate("Translate", "Rnlt", "12", "UTF8_BINARY", "Tra2sae"); - assertStringTranslate("Translate", "Rnlt", "12", "UTF8_LCASE", "1a2sae"); - assertStringTranslate("Translate", "Rnlt", "12", "UNICODE", "Tra2sae"); - assertStringTranslate("Translate", "Rnlt", "12", "UNICODE_CI", "1a2sae"); - assertStringTranslate("Translate", "Rn", "1234", "UTF8_BINARY", "Tra2slate"); - assertStringTranslate("Translate", "Rn", "1234", "UTF8_LCASE", "T1a2slate"); - assertStringTranslate("Translate", "Rn", "1234", "UNICODE", "Tra2slate"); - assertStringTranslate("Translate", "Rn", "1234", "UNICODE_CI", "T1a2slate"); - assertStringTranslate("Translate", "Rnlt", "1234", "UTF8_BINARY", "Tra2s3a4e"); - assertStringTranslate("Translate", "Rnlt", "1234", "UTF8_LCASE", "41a2s3a4e"); - assertStringTranslate("Translate", "Rnlt", "1234", "UNICODE", "Tra2s3a4e"); - assertStringTranslate("Translate", "Rnlt", "1234", "UNICODE_CI", "41a2s3a4e"); - assertStringTranslate("TRanslate", "rnlt", "XxXx", "UTF8_BINARY", "TRaxsXaxe"); - assertStringTranslate("TRanslate", "rnlt", "XxXx", "UTF8_LCASE", "xXaxsXaxe"); - assertStringTranslate("TRanslate", "rnlt", "XxXx", "UNICODE", "TRaxsXaxe"); - assertStringTranslate("TRanslate", "rnlt", "XxXx", "UNICODE_CI", "xXaxsXaxe"); - assertStringTranslate("TRanslater", "Rrnlt", "xXxXx", "UTF8_BINARY", "TxaxsXaxeX"); - assertStringTranslate("TRanslater", "Rrnlt", "xXxXx", "UTF8_LCASE", "xxaxsXaxex"); - assertStringTranslate("TRanslater", "Rrnlt", "xXxXx", "UNICODE", "TxaxsXaxeX"); - assertStringTranslate("TRanslater", "Rrnlt", "xXxXx", "UNICODE_CI", "xxaxsXaxex"); - assertStringTranslate("TRanslater", "Rrnlt", "XxxXx", "UTF8_BINARY", "TXaxsXaxex"); - assertStringTranslate("TRanslater", "Rrnlt", "XxxXx", "UTF8_LCASE", "xXaxsXaxeX"); - assertStringTranslate("TRanslater", "Rrnlt", "XxxXx", "UNICODE", "TXaxsXaxex"); - assertStringTranslate("TRanslater", "Rrnlt", "XxxXx", "UNICODE_CI", "xXaxsXaxeX"); - assertStringTranslate("test大千世界X大千世界", "界x", "AB", "UTF8_BINARY", "test大千世AX大千世A"); - assertStringTranslate("test大千世界X大千世界", "界x", "AB", "UTF8_LCASE", "test大千世AB大千世A"); - assertStringTranslate("test大千世界X大千世界", "界x", "AB", "UNICODE", "test大千世AX大千世A"); - assertStringTranslate("test大千世界X大千世界", "界x", "AB", "UNICODE_CI", "test大千世AB大千世A"); - assertStringTranslate("大千世界test大千世界", "TEST", "abcd", "UTF8_BINARY", "大千世界test大千世界"); - assertStringTranslate("大千世界test大千世界", "TEST", "abcd", "UTF8_LCASE", "大千世界abca大千世界"); - assertStringTranslate("大千世界test大千世界", "TEST", "abcd", "UNICODE", "大千世界test大千世界"); - assertStringTranslate("大千世界test大千世界", "TEST", "abcd", "UNICODE_CI", "大千世界abca大千世界"); - assertStringTranslate("Test大千世界大千世界", "tT", "oO", "UTF8_BINARY", "Oeso大千世界大千世界"); - assertStringTranslate("Test大千世界大千世界", "tT", "oO", "UTF8_LCASE", "oeso大千世界大千世界"); - assertStringTranslate("Test大千世界大千世界", "tT", "oO", "UNICODE", "Oeso大千世界大千世界"); - assertStringTranslate("Test大千世界大千世界", "tT", "oO", "UNICODE_CI", "oeso大千世界大千世界"); - assertStringTranslate("大千世界大千世界tesT", "Tt", "Oo", "UTF8_BINARY", "大千世界大千世界oesO"); - assertStringTranslate("大千世界大千世界tesT", "Tt", "Oo", "UTF8_LCASE", "大千世界大千世界OesO"); - assertStringTranslate("大千世界大千世界tesT", "Tt", "Oo", "UNICODE", "大千世界大千世界oesO"); - assertStringTranslate("大千世界大千世界tesT", "Tt", "Oo", "UNICODE_CI", "大千世界大千世界OesO"); - assertStringTranslate("大千世界大千世界tesT", "大千", "世世", "UTF8_BINARY", "世世世界世世世界tesT"); - assertStringTranslate("大千世界大千世界tesT", "大千", "世世", "UTF8_LCASE", "世世世界世世世界tesT"); - assertStringTranslate("大千世界大千世界tesT", "大千", "世世", "UNICODE", "世世世界世世世界tesT"); - assertStringTranslate("大千世界大千世界tesT", "大千", "世世", "UNICODE_CI", "世世世界世世世界tesT"); - assertStringTranslate("Translate", "Rnlasdfjhgadt", "1234", "UTF8_BINARY", "Tr4234e"); - assertStringTranslate("Translate", "Rnlasdfjhgadt", "1234", "UTF8_LCASE", "14234e"); - assertStringTranslate("Translate", "Rnlasdfjhgadt", "1234", "UNICODE", "Tr4234e"); - assertStringTranslate("Translate", "Rnlasdfjhgadt", "1234", "UNICODE_CI", "14234e"); - assertStringTranslate("Translate", "Rnlt", "123495834634", "UTF8_BINARY", "Tra2s3a4e"); - assertStringTranslate("Translate", "Rnlt", "123495834634", "UTF8_LCASE", "41a2s3a4e"); - assertStringTranslate("Translate", "Rnlt", "123495834634", "UNICODE", "Tra2s3a4e"); - assertStringTranslate("Translate", "Rnlt", "123495834634", "UNICODE_CI", "41a2s3a4e"); - assertStringTranslate("abcdef", "abcde", "123", "UTF8_BINARY", "123f"); - assertStringTranslate("abcdef", "abcde", "123", "UTF8_LCASE", "123f"); - assertStringTranslate("abcdef", "abcde", "123", "UNICODE", "123f"); - assertStringTranslate("abcdef", "abcde", "123", "UNICODE_CI", "123f"); + assertStringTranslate("abc", "b", "x", UTF8_BINARY, "axc"); + assertStringTranslate("abc", "b", "x", UTF8_LCASE, "axc"); + assertStringTranslate("abc", "b", "x", UNICODE, "axc"); + assertStringTranslate("abc", "b", "x", UNICODE_CI, "axc"); + assertStringTranslate("Translate", "Rnlt", "12", UTF8_BINARY, "Tra2sae"); + assertStringTranslate("Translate", "Rnlt", "12", UTF8_LCASE, "1a2sae"); + assertStringTranslate("Translate", "Rnlt", "12", UNICODE, "Tra2sae"); + assertStringTranslate("Translate", "Rnlt", "12", UNICODE_CI, "1a2sae"); + assertStringTranslate("Translate", "Rn", "1234", UTF8_BINARY, "Tra2slate"); + assertStringTranslate("Translate", "Rn", "1234", UTF8_LCASE, "T1a2slate"); + assertStringTranslate("Translate", "Rn", "1234", UNICODE, "Tra2slate"); + assertStringTranslate("Translate", "Rn", "1234", UNICODE_CI, "T1a2slate"); + assertStringTranslate("Translate", "Rnlt", "1234", UTF8_BINARY, "Tra2s3a4e"); + assertStringTranslate("Translate", "Rnlt", "1234", UTF8_LCASE, "41a2s3a4e"); + assertStringTranslate("Translate", "Rnlt", "1234", UNICODE, "Tra2s3a4e"); + assertStringTranslate("Translate", "Rnlt", "1234", UNICODE_CI, "41a2s3a4e"); + assertStringTranslate("TRanslate", "rnlt", "XxXx", UTF8_BINARY, "TRaxsXaxe"); + assertStringTranslate("TRanslate", "rnlt", "XxXx", UTF8_LCASE, "xXaxsXaxe"); + assertStringTranslate("TRanslate", "rnlt", "XxXx", UNICODE, "TRaxsXaxe"); + assertStringTranslate("TRanslate", "rnlt", "XxXx", UNICODE_CI, "xXaxsXaxe"); + assertStringTranslate("TRanslater", "Rrnlt", "xXxXx", UTF8_BINARY, "TxaxsXaxeX"); + assertStringTranslate("TRanslater", "Rrnlt", "xXxXx", UTF8_LCASE, "xxaxsXaxex"); + assertStringTranslate("TRanslater", "Rrnlt", "xXxXx", UNICODE, "TxaxsXaxeX"); + assertStringTranslate("TRanslater", "Rrnlt", "xXxXx", UNICODE_CI, "xxaxsXaxex"); + assertStringTranslate("TRanslater", "Rrnlt", "XxxXx", UTF8_BINARY, "TXaxsXaxex"); + assertStringTranslate("TRanslater", "Rrnlt", "XxxXx", UTF8_LCASE, "xXaxsXaxeX"); + assertStringTranslate("TRanslater", "Rrnlt", "XxxXx", UNICODE, "TXaxsXaxex"); + assertStringTranslate("TRanslater", "Rrnlt", "XxxXx", UNICODE_CI, "xXaxsXaxeX"); + assertStringTranslate("test大千世界X大千世界", "界x", "AB", UTF8_BINARY, "test大千世AX大千世A"); + assertStringTranslate("test大千世界X大千世界", "界x", "AB", UTF8_LCASE, "test大千世AB大千世A"); + assertStringTranslate("test大千世界X大千世界", "界x", "AB", UNICODE, "test大千世AX大千世A"); + assertStringTranslate("test大千世界X大千世界", "界x", "AB", UNICODE_CI, "test大千世AB大千世A"); + assertStringTranslate("大千世界test大千世界", "TEST", "abcd", UTF8_BINARY, "大千世界test大千世界"); + assertStringTranslate("大千世界test大千世界", "TEST", "abcd", UTF8_LCASE, "大千世界abca大千世界"); + assertStringTranslate("大千世界test大千世界", "TEST", "abcd", UNICODE, "大千世界test大千世界"); + assertStringTranslate("大千世界test大千世界", "TEST", "abcd", UNICODE_CI, "大千世界abca大千世界"); + assertStringTranslate("Test大千世界大千世界", "tT", "oO", UTF8_BINARY, "Oeso大千世界大千世界"); + assertStringTranslate("Test大千世界大千世界", "tT", "oO", UTF8_LCASE, "oeso大千世界大千世界"); + assertStringTranslate("Test大千世界大千世界", "tT", "oO", UNICODE, "Oeso大千世界大千世界"); + assertStringTranslate("Test大千世界大千世界", "tT", "oO", UNICODE_CI, "oeso大千世界大千世界"); + assertStringTranslate("大千世界大千世界tesT", "Tt", "Oo", UTF8_BINARY, "大千世界大千世界oesO"); + assertStringTranslate("大千世界大千世界tesT", "Tt", "Oo", UTF8_LCASE, "大千世界大千世界OesO"); + assertStringTranslate("大千世界大千世界tesT", "Tt", "Oo", UNICODE, "大千世界大千世界oesO"); + assertStringTranslate("大千世界大千世界tesT", "Tt", "Oo", UNICODE_CI, "大千世界大千世界OesO"); + assertStringTranslate("大千世界大千世界tesT", "大千", "世世", UTF8_BINARY, "世世世界世世世界tesT"); + assertStringTranslate("大千世界大千世界tesT", "大千", "世世", UTF8_LCASE, "世世世界世世世界tesT"); + assertStringTranslate("大千世界大千世界tesT", "大千", "世世", UNICODE, "世世世界世世世界tesT"); + assertStringTranslate("大千世界大千世界tesT", "大千", "世世", UNICODE_CI, "世世世界世世世界tesT"); + assertStringTranslate("Translate", "Rnlasdfjhgadt", "1234", UTF8_BINARY, "Tr4234e"); + assertStringTranslate("Translate", "Rnlasdfjhgadt", "1234", UTF8_LCASE, "14234e"); + assertStringTranslate("Translate", "Rnlasdfjhgadt", "1234", UNICODE, "Tr4234e"); + assertStringTranslate("Translate", "Rnlasdfjhgadt", "1234", UNICODE_CI, "14234e"); + assertStringTranslate("Translate", "Rnlt", "123495834634", UTF8_BINARY, "Tra2s3a4e"); + assertStringTranslate("Translate", "Rnlt", "123495834634", UTF8_LCASE, "41a2s3a4e"); + assertStringTranslate("Translate", "Rnlt", "123495834634", UNICODE, "Tra2s3a4e"); + assertStringTranslate("Translate", "Rnlt", "123495834634", UNICODE_CI, "41a2s3a4e"); + assertStringTranslate("abcdef", "abcde", "123", UTF8_BINARY, "123f"); + assertStringTranslate("abcdef", "abcde", "123", UTF8_LCASE, "123f"); + assertStringTranslate("abcdef", "abcde", "123", UNICODE, "123f"); + assertStringTranslate("abcdef", "abcde", "123", UNICODE_CI, "123f"); assertStringTranslate("abcdëÈêf", "ÊèË", "123", "AF_CI", "abcd321f"); // One-to-many case mapping (e.g. Turkish dotted I). - assertStringTranslate("İ", "i\u0307", "xy", "UTF8_BINARY", "İ"); - assertStringTranslate("İ", "i\u0307", "xy", "UTF8_LCASE", "İ"); - assertStringTranslate("İ", "i\u0307", "xy", "UNICODE", "İ"); - assertStringTranslate("İ", "i\u0307", "xy", "UNICODE_CI", "İ"); - assertStringTranslate("i\u0307", "İ", "xy", "UTF8_BINARY", "i\u0307"); - assertStringTranslate("i\u0307", "İ", "xy", "UTF8_LCASE", "x"); - assertStringTranslate("i\u0307", "İ", "xy", "UNICODE", "i\u0307"); - assertStringTranslate("i\u0307", "İ", "xy", "UNICODE_CI", "x"); - assertStringTranslate("i\u030A", "İ", "x", "UTF8_BINARY", "i\u030A"); - assertStringTranslate("i\u030A", "İ", "x", "UTF8_LCASE", "i\u030A"); - assertStringTranslate("i\u030A", "İ", "x", "UNICODE", "i\u030A"); - assertStringTranslate("i\u030A", "İ", "x", "UNICODE_CI", "i\u030A"); - assertStringTranslate("i\u030A", "İi", "xy", "UTF8_BINARY", "y\u030A"); - assertStringTranslate("i\u030A", "İi", "xy", "UTF8_LCASE", "y\u030A"); - assertStringTranslate("i\u030A", "İi", "xy", "UNICODE", "i\u030A"); - assertStringTranslate("i\u030A", "İi", "xy", "UNICODE_CI", "i\u030A"); - assertStringTranslate("İi\u0307", "İi\u0307", "123", "UTF8_BINARY", "123"); - assertStringTranslate("İi\u0307", "İi\u0307", "123", "UTF8_LCASE", "11"); - assertStringTranslate("İi\u0307", "İi\u0307", "123", "UNICODE", "1i\u0307"); - assertStringTranslate("İi\u0307", "İi\u0307", "123", "UNICODE_CI", "11"); - assertStringTranslate("İi\u0307", "İyz", "123", "UTF8_BINARY", "1i\u0307"); - assertStringTranslate("İi\u0307", "İyz", "123", "UTF8_LCASE", "11"); - assertStringTranslate("İi\u0307", "İyz", "123", "UNICODE", "1i\u0307"); - assertStringTranslate("İi\u0307", "İyz", "123", "UNICODE_CI", "11"); - assertStringTranslate("İi\u0307", "xi\u0307", "123", "UTF8_BINARY", "İ23"); - assertStringTranslate("İi\u0307", "xi\u0307", "123", "UTF8_LCASE", "İ23"); - assertStringTranslate("İi\u0307", "xi\u0307", "123", "UNICODE", "İi\u0307"); - assertStringTranslate("İi\u0307", "xi\u0307", "123", "UNICODE_CI", "İi\u0307"); - assertStringTranslate("a\u030Abcå", "a\u030Aå", "123", "UTF8_BINARY", "12bc3"); - assertStringTranslate("a\u030Abcå", "a\u030Aå", "123", "UTF8_LCASE", "12bc3"); - assertStringTranslate("a\u030Abcå", "a\u030Aå", "123", "UNICODE", "3bc3"); - assertStringTranslate("a\u030Abcå", "a\u030Aå", "123", "UNICODE_CI", "3bc3"); - assertStringTranslate("a\u030Abcå", "A\u030AÅ", "123", "UTF8_BINARY", "a2bcå"); - assertStringTranslate("a\u030Abcå", "A\u030AÅ", "123", "UTF8_LCASE", "12bc3"); - assertStringTranslate("a\u030Abcå", "A\u030AÅ", "123", "UNICODE", "a\u030Abcå"); - assertStringTranslate("a\u030Abcå", "A\u030AÅ", "123", "UNICODE_CI", "3bc3"); - assertStringTranslate("a\u030AβφδI\u0307", "Iİaå", "1234", "UTF8_BINARY", "3\u030Aβφδ1\u0307"); - assertStringTranslate("A\u030Aβφδi\u0307", "Iİaå", "1234", "UTF8_LCASE", "3\u030Aβφδ2"); - assertStringTranslate("a\u030AβφδI\u0307", "Iİaå", "1234", "UNICODE", "4βφδ2"); - assertStringTranslate("A\u030Aβφδi\u0307", "Iİaå", "1234", "UNICODE_CI", "4βφδ2"); + assertStringTranslate("İ", "i\u0307", "xy", UTF8_BINARY, "İ"); + assertStringTranslate("İ", "i\u0307", "xy", UTF8_LCASE, "İ"); + assertStringTranslate("İ", "i\u0307", "xy", UNICODE, "İ"); + assertStringTranslate("İ", "i\u0307", "xy", UNICODE_CI, "İ"); + assertStringTranslate("i\u0307", "İ", "xy", UTF8_BINARY, "i\u0307"); + assertStringTranslate("i\u0307", "İ", "xy", UTF8_LCASE, "x"); + assertStringTranslate("i\u0307", "İ", "xy", UNICODE, "i\u0307"); + assertStringTranslate("i\u0307", "İ", "xy", UNICODE_CI, "x"); + assertStringTranslate("i\u030A", "İ", "x", UTF8_BINARY, "i\u030A"); + assertStringTranslate("i\u030A", "İ", "x", UTF8_LCASE, "i\u030A"); + assertStringTranslate("i\u030A", "İ", "x", UNICODE, "i\u030A"); + assertStringTranslate("i\u030A", "İ", "x", UNICODE_CI, "i\u030A"); + assertStringTranslate("i\u030A", "İi", "xy", UTF8_BINARY, "y\u030A"); + assertStringTranslate("i\u030A", "İi", "xy", UTF8_LCASE, "y\u030A"); + assertStringTranslate("i\u030A", "İi", "xy", UNICODE, "i\u030A"); + assertStringTranslate("i\u030A", "İi", "xy", UNICODE_CI, "i\u030A"); + assertStringTranslate("İi\u0307", "İi\u0307", "123", UTF8_BINARY, "123"); + assertStringTranslate("İi\u0307", "İi\u0307", "123", UTF8_LCASE, "11"); + assertStringTranslate("İi\u0307", "İi\u0307", "123", UNICODE, "1i\u0307"); + assertStringTranslate("İi\u0307", "İi\u0307", "123", UNICODE_CI, "11"); + assertStringTranslate("İi\u0307", "İyz", "123", UTF8_BINARY, "1i\u0307"); + assertStringTranslate("İi\u0307", "İyz", "123", UTF8_LCASE, "11"); + assertStringTranslate("İi\u0307", "İyz", "123", UNICODE, "1i\u0307"); + assertStringTranslate("İi\u0307", "İyz", "123", UNICODE_CI, "11"); + assertStringTranslate("İi\u0307", "xi\u0307", "123", UTF8_BINARY, "İ23"); + assertStringTranslate("İi\u0307", "xi\u0307", "123", UTF8_LCASE, "İ23"); + assertStringTranslate("İi\u0307", "xi\u0307", "123", UNICODE, "İi\u0307"); + assertStringTranslate("İi\u0307", "xi\u0307", "123", UNICODE_CI, "İi\u0307"); + assertStringTranslate("a\u030Abcå", "a\u030Aå", "123", UTF8_BINARY, "12bc3"); + assertStringTranslate("a\u030Abcå", "a\u030Aå", "123", UTF8_LCASE, "12bc3"); + assertStringTranslate("a\u030Abcå", "a\u030Aå", "123", UNICODE, "3bc3"); + assertStringTranslate("a\u030Abcå", "a\u030Aå", "123", UNICODE_CI, "3bc3"); + assertStringTranslate("a\u030Abcå", "A\u030AÅ", "123", UTF8_BINARY, "a2bcå"); + assertStringTranslate("a\u030Abcå", "A\u030AÅ", "123", UTF8_LCASE, "12bc3"); + assertStringTranslate("a\u030Abcå", "A\u030AÅ", "123", UNICODE, "a\u030Abcå"); + assertStringTranslate("a\u030Abcå", "A\u030AÅ", "123", UNICODE_CI, "3bc3"); + assertStringTranslate("a\u030AβφδI\u0307", "Iİaå", "1234", UTF8_BINARY, "3\u030Aβφδ1\u0307"); + assertStringTranslate("A\u030Aβφδi\u0307", "Iİaå", "1234", UTF8_LCASE, "3\u030Aβφδ2"); + assertStringTranslate("a\u030AβφδI\u0307", "Iİaå", "1234", UNICODE, "4βφδ2"); + assertStringTranslate("A\u030Aβφδi\u0307", "Iİaå", "1234", UNICODE_CI, "4βφδ2"); // Conditional case mapping (e.g. Greek sigmas). - assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "σιι", "UTF8_BINARY", "σΥσΤΗΜΑΤΙΚΟσ"); - assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "σιι", "UTF8_LCASE", "σισΤιΜΑΤΙΚΟσ"); - assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "σιι", "UNICODE", "σΥσΤΗΜΑΤΙΚΟσ"); - assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "σιι", "UNICODE_CI", "σισΤιΜΑΤΙΚΟσ"); - assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "σιι", "UTF8_BINARY", "ΣΥΣΤΗΜΑΤΙΚΟΣ"); - assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "σιι", "UTF8_LCASE", "σισΤιΜΑΤΙΚΟσ"); - assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "σιι", "UNICODE", "ΣΥΣΤΗΜΑΤΙΚΟΣ"); - assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "σιι", "UNICODE_CI", "σισΤιΜΑΤΙΚΟσ"); - assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "σιι", "UTF8_BINARY", "ΣΥΣΤΗΜΑΤΙΚΟΣ"); - assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "σιι", "UTF8_LCASE", "σισΤιΜΑΤΙΚΟσ"); - assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "σιι", "UNICODE", "ΣΥΣΤΗΜΑΤΙΚΟΣ"); - assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "σιι", "UNICODE_CI", "σισΤιΜΑΤΙΚΟσ"); - assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "ςιι", "UTF8_BINARY", "ΣΥΣΤΗΜΑΤΙΚΟΣ"); - assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "ςιι", "UTF8_LCASE", "ςιςΤιΜΑΤΙΚΟς"); - assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "ςιι", "UNICODE", "ΣΥΣΤΗΜΑΤΙΚΟΣ"); - assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "ςιι", "UNICODE_CI", "ςιςΤιΜΑΤΙΚΟς"); - assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "ςιι", "UTF8_BINARY", "ςΥςΤΗΜΑΤΙΚΟς"); - assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "ςιι", "UTF8_LCASE", "ςιςΤιΜΑΤΙΚΟς"); - assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "ςιι", "UNICODE", "ςΥςΤΗΜΑΤΙΚΟς"); - assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "ςιι", "UNICODE_CI", "ςιςΤιΜΑΤΙΚΟς"); - assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "ςιι", "UTF8_BINARY", "ΣΥΣΤΗΜΑΤΙΚΟΣ"); - assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "ςιι", "UTF8_LCASE", "ςιςΤιΜΑΤΙΚΟς"); - assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "ςιι", "UNICODE", "ΣΥΣΤΗΜΑΤΙΚΟΣ"); - assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "ςιι", "UNICODE_CI", "ςιςΤιΜΑΤΙΚΟς"); - assertStringTranslate("συστηματικος", "Συη", "σιι", "UTF8_BINARY", "σιστιματικος"); - assertStringTranslate("συστηματικος", "Συη", "σιι", "UTF8_LCASE", "σιστιματικοσ"); - assertStringTranslate("συστηματικος", "Συη", "σιι", "UNICODE", "σιστιματικος"); - assertStringTranslate("συστηματικος", "Συη", "σιι", "UNICODE_CI", "σιστιματικοσ"); - assertStringTranslate("συστηματικος", "συη", "σιι", "UTF8_BINARY", "σιστιματικος"); - assertStringTranslate("συστηματικος", "συη", "σιι", "UTF8_LCASE", "σιστιματικοσ"); - assertStringTranslate("συστηματικος", "συη", "σιι", "UNICODE", "σιστιματικος"); - assertStringTranslate("συστηματικος", "συη", "σιι", "UNICODE_CI", "σιστιματικοσ"); - assertStringTranslate("συστηματικος", "ςυη", "σιι", "UTF8_BINARY", "σιστιματικοσ"); - assertStringTranslate("συστηματικος", "ςυη", "σιι", "UTF8_LCASE", "σιστιματικοσ"); - assertStringTranslate("συστηματικος", "ςυη", "σιι", "UNICODE", "σιστιματικοσ"); - assertStringTranslate("συστηματικος", "ςυη", "σιι", "UNICODE_CI", "σιστιματικοσ"); + assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "σιι", UTF8_BINARY, "σΥσΤΗΜΑΤΙΚΟσ"); + assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "σιι", UTF8_LCASE, "σισΤιΜΑΤΙΚΟσ"); + assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "σιι", UNICODE, "σΥσΤΗΜΑΤΙΚΟσ"); + assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "σιι", UNICODE_CI, "σισΤιΜΑΤΙΚΟσ"); + assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "σιι", UTF8_BINARY, "ΣΥΣΤΗΜΑΤΙΚΟΣ"); + assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "σιι", UTF8_LCASE, "σισΤιΜΑΤΙΚΟσ"); + assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "σιι", UNICODE, "ΣΥΣΤΗΜΑΤΙΚΟΣ"); + assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "σιι", UNICODE_CI, "σισΤιΜΑΤΙΚΟσ"); + assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "σιι", UTF8_BINARY, "ΣΥΣΤΗΜΑΤΙΚΟΣ"); + assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "σιι", UTF8_LCASE, "σισΤιΜΑΤΙΚΟσ"); + assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "σιι", UNICODE, "ΣΥΣΤΗΜΑΤΙΚΟΣ"); + assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "σιι", UNICODE_CI, "σισΤιΜΑΤΙΚΟσ"); + assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "ςιι", UTF8_BINARY, "ΣΥΣΤΗΜΑΤΙΚΟΣ"); + assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "ςιι", UTF8_LCASE, "ςιςΤιΜΑΤΙΚΟς"); + assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "ςιι", UNICODE, "ΣΥΣΤΗΜΑΤΙΚΟΣ"); + assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "ςιι", UNICODE_CI, "ςιςΤιΜΑΤΙΚΟς"); + assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "ςιι", UTF8_BINARY, "ςΥςΤΗΜΑΤΙΚΟς"); + assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "ςιι", UTF8_LCASE, "ςιςΤιΜΑΤΙΚΟς"); + assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "ςιι", UNICODE, "ςΥςΤΗΜΑΤΙΚΟς"); + assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "ςιι", UNICODE_CI, "ςιςΤιΜΑΤΙΚΟς"); + assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "ςιι", UTF8_BINARY, "ΣΥΣΤΗΜΑΤΙΚΟΣ"); + assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "ςιι", UTF8_LCASE, "ςιςΤιΜΑΤΙΚΟς"); + assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "ςιι", UNICODE, "ΣΥΣΤΗΜΑΤΙΚΟΣ"); + assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "ςιι", UNICODE_CI, "ςιςΤιΜΑΤΙΚΟς"); + assertStringTranslate("συστηματικος", "Συη", "σιι", UTF8_BINARY, "σιστιματικος"); + assertStringTranslate("συστηματικος", "Συη", "σιι", UTF8_LCASE, "σιστιματικοσ"); + assertStringTranslate("συστηματικος", "Συη", "σιι", UNICODE, "σιστιματικος"); + assertStringTranslate("συστηματικος", "Συη", "σιι", UNICODE_CI, "σιστιματικοσ"); + assertStringTranslate("συστηματικος", "συη", "σιι", UTF8_BINARY, "σιστιματικος"); + assertStringTranslate("συστηματικος", "συη", "σιι", UTF8_LCASE, "σιστιματικοσ"); + assertStringTranslate("συστηματικος", "συη", "σιι", UNICODE, "σιστιματικος"); + assertStringTranslate("συστηματικος", "συη", "σιι", UNICODE_CI, "σιστιματικοσ"); + assertStringTranslate("συστηματικος", "ςυη", "σιι", UTF8_BINARY, "σιστιματικοσ"); + assertStringTranslate("συστηματικος", "ςυη", "σιι", UTF8_LCASE, "σιστιματικοσ"); + assertStringTranslate("συστηματικος", "ςυη", "σιι", UNICODE, "σιστιματικοσ"); + assertStringTranslate("συστηματικος", "ςυη", "σιι", UNICODE_CI, "σιστιματικοσ"); // Surrogate pairs. - assertStringTranslate("a🙃b🙃c", "a", "x", "UTF8_BINARY", "x🙃b🙃c"); - assertStringTranslate("a🙃b🙃c", "a🙃", "xy", "UTF8_BINARY", "xybyc"); - assertStringTranslate("a🙃b🙃c", "a🙃b", "xyz", "UTF8_BINARY", "xyzyc"); - assertStringTranslate("a🙃b🙃c", "a🙃bc", "xyzw", "UTF8_BINARY", "xyzyw"); - assertStringTranslate("😀😆😃😄", "😄😆", "😅😂", "UTF8_BINARY", "😀😂😃😅"); - assertStringTranslate("😀😆😃😄", "😄😆", "😅😂", "UTF8_LCASE", "😀😂😃😅"); - assertStringTranslate("😀😆😃😄", "😄😆", "😅😂", "UNICODE", "😀😂😃😅"); - assertStringTranslate("😀😆😃😄", "😄😆", "😅😂", "UNICODE_CI", "😀😂😃😅"); - assertStringTranslate("𐐅", "𐐅", "x", "UTF8_BINARY", "x"); - assertStringTranslate("𐐅", "𐐅", "x", "UTF8_LCASE", "x"); - assertStringTranslate("𐐅", "𐐅", "x", "UNICODE", "x"); - assertStringTranslate("𐐅", "𐐅", "x", "UNICODE_CI", "x"); - assertStringTranslate("𐐅", "𐐭", "x", "UTF8_BINARY", "𐐅"); - assertStringTranslate("𐐅", "𐐭", "x", "UTF8_LCASE", "x"); - assertStringTranslate("𐐅", "𐐭", "x", "UNICODE", "𐐅"); - assertStringTranslate("𐐅", "𐐭", "x", "UNICODE_CI", "x"); - assertStringTranslate("A", "A", "𐐅", "UTF8_BINARY", "𐐅"); - assertStringTranslate("A", "A", "𐐅", "UTF8_LCASE", "𐐅"); - assertStringTranslate("A", "A", "𐐅", "UNICODE", "𐐅"); - assertStringTranslate("A", "A", "𐐅", "UNICODE_CI", "𐐅"); - assertStringTranslate("A", "a", "𐐅", "UTF8_BINARY", "A"); - assertStringTranslate("A", "a", "𐐅", "UTF8_LCASE", "𐐅"); - assertStringTranslate("A", "a", "𐐅", "UNICODE", "A"); - assertStringTranslate("A", "a", "𐐅", "UNICODE_CI", "𐐅"); - assertStringTranslate("a", "A", "𐐅", "UTF8_BINARY", "a"); - assertStringTranslate("a", "A", "𐐅", "UTF8_LCASE", "𐐅"); - assertStringTranslate("a", "A", "𐐅", "UNICODE", "a"); - assertStringTranslate("a", "A", "𐐅", "UNICODE_CI", "𐐅"); - assertStringTranslate("𝔸", "𝔸", "x", "UTF8_BINARY", "x"); - assertStringTranslate("𝔸", "𝔸", "x", "UTF8_LCASE", "x"); - assertStringTranslate("𝔸", "𝔸", "x", "UNICODE", "x"); - assertStringTranslate("𝔸", "𝔸", "x", "UNICODE_CI", "x"); - assertStringTranslate("𝔸", "𝕒", "x", "UTF8_BINARY", "𝔸"); - assertStringTranslate("𝔸", "𝕒", "x", "UTF8_LCASE", "𝔸"); - assertStringTranslate("𝔸", "𝕒", "x", "UNICODE", "𝔸"); - assertStringTranslate("𝔸", "𝕒", "x", "UNICODE_CI", "x"); + assertStringTranslate("a🙃b🙃c", "a", "x", UTF8_BINARY, "x🙃b🙃c"); + assertStringTranslate("a🙃b🙃c", "a🙃", "xy", UTF8_BINARY, "xybyc"); + assertStringTranslate("a🙃b🙃c", "a🙃b", "xyz", UTF8_BINARY, "xyzyc"); + assertStringTranslate("a🙃b🙃c", "a🙃bc", "xyzw", UTF8_BINARY, "xyzyw"); + assertStringTranslate("😀😆😃😄", "😄😆", "😅😂", UTF8_BINARY, "😀😂😃😅"); + assertStringTranslate("😀😆😃😄", "😄😆", "😅😂", UTF8_LCASE, "😀😂😃😅"); + assertStringTranslate("😀😆😃😄", "😄😆", "😅😂", UNICODE, "😀😂😃😅"); + assertStringTranslate("😀😆😃😄", "😄😆", "😅😂", UNICODE_CI, "😀😂😃😅"); + assertStringTranslate("𐐅", "𐐅", "x", UTF8_BINARY, "x"); + assertStringTranslate("𐐅", "𐐅", "x", UTF8_LCASE, "x"); + assertStringTranslate("𐐅", "𐐅", "x", UNICODE, "x"); + assertStringTranslate("𐐅", "𐐅", "x", UNICODE_CI, "x"); + assertStringTranslate("𐐅", "𐐭", "x", UTF8_BINARY, "𐐅"); + assertStringTranslate("𐐅", "𐐭", "x", UTF8_LCASE, "x"); + assertStringTranslate("𐐅", "𐐭", "x", UNICODE, "𐐅"); + assertStringTranslate("𐐅", "𐐭", "x", UNICODE_CI, "x"); + assertStringTranslate("A", "A", "𐐅", UTF8_BINARY, "𐐅"); + assertStringTranslate("A", "A", "𐐅", UTF8_LCASE, "𐐅"); + assertStringTranslate("A", "A", "𐐅", UNICODE, "𐐅"); + assertStringTranslate("A", "A", "𐐅", UNICODE_CI, "𐐅"); + assertStringTranslate("A", "a", "𐐅", UTF8_BINARY, "A"); + assertStringTranslate("A", "a", "𐐅", UTF8_LCASE, "𐐅"); + assertStringTranslate("A", "a", "𐐅", UNICODE, "A"); + assertStringTranslate("A", "a", "𐐅", UNICODE_CI, "𐐅"); + assertStringTranslate("a", "A", "𐐅", UTF8_BINARY, "a"); + assertStringTranslate("a", "A", "𐐅", UTF8_LCASE, "𐐅"); + assertStringTranslate("a", "A", "𐐅", UNICODE, "a"); + assertStringTranslate("a", "A", "𐐅", UNICODE_CI, "𐐅"); + assertStringTranslate("𝔸", "𝔸", "x", UTF8_BINARY, "x"); + assertStringTranslate("𝔸", "𝔸", "x", UTF8_LCASE, "x"); + assertStringTranslate("𝔸", "𝔸", "x", UNICODE, "x"); + assertStringTranslate("𝔸", "𝔸", "x", UNICODE_CI, "x"); + assertStringTranslate("𝔸", "𝕒", "x", UTF8_BINARY, "𝔸"); + assertStringTranslate("𝔸", "𝕒", "x", UTF8_LCASE, "𝔸"); + assertStringTranslate("𝔸", "𝕒", "x", UNICODE, "𝔸"); + assertStringTranslate("𝔸", "𝕒", "x", UNICODE_CI, "x"); } private Map buildDict(String matching, String replace) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 6077e55561e62..727d54b6bbd2e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -43,7 +43,7 @@ import org.apache.spark.sql.catalyst.analysis.{HintErrorLogger, Resolver} import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator import org.apache.spark.sql.catalyst.plans.logical.HintErrorHandler -import org.apache.spark.sql.catalyst.util.{CollationFactory, DateTimeUtils} +import org.apache.spark.sql.catalyst.util.{CollationFactory, CollationNames, DateTimeUtils} import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} import org.apache.spark.sql.types.{AtomicType, StringType, TimestampNTZType, TimestampType} @@ -5787,7 +5787,7 @@ class SQLConf extends Serializable with Logging with SqlApiConf { def trimCollationEnabled: Boolean = getConf(TRIM_COLLATION_ENABLED) override def defaultStringType: StringType = { - if (getConf(DEFAULT_COLLATION).toUpperCase(Locale.ROOT) == "UTF8_BINARY") { + if (getConf(DEFAULT_COLLATION).toUpperCase(Locale.ROOT) == CollationNames.UTF8_BINARY) { StringType } else { StringType(getConf(DEFAULT_COLLATION)) From 94a31bd9c144da16f4fe5d42a709c1dc415e1277 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 14 Jan 2025 12:52:55 +0900 Subject: [PATCH 13/15] [SPARK-48809][PYTHON][DOCS] Reimplemented `spark version drop down` of the `PySpark doc site` and fix bug ### What changes were proposed in this pull request? The pr aims to using `pydata_sphinx_theme`'s `embedded` `version-switcher` to reimplement `spark version drop down` of the `PySpark doc site` and fix bug for `4.0.0-preview1`'s python api docs. ### Why are the changes needed? - When I was reviewing `4.0.0-preview1`'s docs, I found that `spark version drop down of the PySpark doc site` is no longer usable (when clicking, `the dropdown menu` will no longer display), as follows: https://spark.apache.org/docs/4.0.0-preview1/api/python/index.html image With the continuous updates of sphinx and its dependent component versions that generate Python documents, the `version-switcher` originally implemented by custom extensions is no longer usable, and we need to fix it - After image **It looks more in line with the current theme `CSS style` and looks more `beautiful`** - In addition, to cooperate with this fix, we need to update the content of the `spark-website/site/static/versions.json` file, as follows: image **I will complete it in another separate PR** The modified file `versions.json` will be compatible with the implementation of `spark 3.5.1` and the `new implementation`. https://github.com/apache/spark/blob/310f8ea2456dad7cec0f22bfed05a679764c3d7e/python/docs/source/_templates/version-switcher.html#L63-L73 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually test. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #47214 from panbingkun/SPARK-48809. Lead-authored-by: panbingkun Co-authored-by: panbingkun Signed-off-by: Hyukjin Kwon --- python/docs/source/_static/css/pyspark.css | 13 ---- .../source/_templates/version-switcher.html | 77 ------------------- python/docs/source/conf.py | 18 ++--- 3 files changed, 9 insertions(+), 99 deletions(-) delete mode 100644 python/docs/source/_templates/version-switcher.html diff --git a/python/docs/source/_static/css/pyspark.css b/python/docs/source/_static/css/pyspark.css index 565eaea299359..6f47dd80e9503 100644 --- a/python/docs/source/_static/css/pyspark.css +++ b/python/docs/source/_static/css/pyspark.css @@ -91,16 +91,3 @@ u.bd-sidebar .nav>li>ul>.active:hover>a,.bd-sidebar .nav>li>ul>.active>a { .spec_table tr, td, th { border-top: none!important; } - -/* Styling to the version dropdown */ -#version-button { - padding-left: 0.2rem; - padding-right: 3.2rem; -} - -#version_switcher { - height: auto; - max-height: 300px; - width: 165px; - overflow-y: auto; -} diff --git a/python/docs/source/_templates/version-switcher.html b/python/docs/source/_templates/version-switcher.html deleted file mode 100644 index 16c443229f4be..0000000000000 --- a/python/docs/source/_templates/version-switcher.html +++ /dev/null @@ -1,77 +0,0 @@ - - - - - diff --git a/python/docs/source/conf.py b/python/docs/source/conf.py index 5640ba151176d..20c13cd768deb 100644 --- a/python/docs/source/conf.py +++ b/python/docs/source/conf.py @@ -188,19 +188,19 @@ # a list of builtin themes. html_theme = 'pydata_sphinx_theme' -html_context = { - # When releasing a new Spark version, please update the file - # "site/static/versions.json" under the code repository "spark-website" - # (item should be added in order), and also set the local environment - # variable "RELEASE_VERSION". - "switcher_json_url": "https://spark.apache.org/static/versions.json", - "switcher_template_url": "https://spark.apache.org/docs/{version}/api/python/index.html", -} - # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. html_theme_options = { + "check_switcher": False, + "switcher": { + # When releasing a new Spark version, please update the file + # "site/static/versions.json" under the code repository "spark-website" + # (item should be added in order), and also set the local environment + # variable "RELEASE_VERSION". + "json_url": "https://spark.apache.org/static/versions.json", + "version_match": release, + }, "header_links_before_dropdown": 6, "navbar_end": ["version-switcher", "theme-switcher", "navbar-icon-links"], "footer_start": ["spark_footer", "sphinx-version"], From 3c7f5e25b70ce8332c31bee50b704dc55d810bf1 Mon Sep 17 00:00:00 2001 From: Gene Pang Date: Tue, 14 Jan 2025 12:58:42 +0900 Subject: [PATCH 14/15] [SPARK-50790][PYTHON] Implement parse json in pyspark ### What changes were proposed in this pull request? Implement the parseJson functionality in PySpark, for parsing a json string to a VariantVal. ### Why are the changes needed? Currently, there is no way to create a VariantVal from python. It can only be created from Spark SQL. ### Does this PR introduce _any_ user-facing change? Added `VariantVal.parseJson`, which takes a json string, and returns a `VariantVal`. ### How was this patch tested? Added unittests. ### Was this patch authored or co-authored using generative AI tooling? no Closes #49450 from gene-db/py-parse-json. Authored-by: Gene Pang Signed-off-by: Hyukjin Kwon --- .../reference/pyspark.sql/variant_val.rst | 1 + python/pyspark/sql/tests/test_types.py | 11 + python/pyspark/sql/types.py | 9 + python/pyspark/sql/variant_utils.py | 327 +++++++++++++++++- 4 files changed, 346 insertions(+), 2 deletions(-) diff --git a/python/docs/source/reference/pyspark.sql/variant_val.rst b/python/docs/source/reference/pyspark.sql/variant_val.rst index 8630ae8aace14..883b4c8fdc3d5 100644 --- a/python/docs/source/reference/pyspark.sql/variant_val.rst +++ b/python/docs/source/reference/pyspark.sql/variant_val.rst @@ -26,3 +26,4 @@ VariantVal VariantVal.toPython VariantVal.toJson + VariantVal.parseJson diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py index 432ddd083c802..75c28ac0dec1d 100644 --- a/python/pyspark/sql/tests/test_types.py +++ b/python/pyspark/sql/tests/test_types.py @@ -2240,6 +2240,17 @@ def test_variant_type(self): PySparkValueError, lambda: str(VariantVal(bytes([32, 10, 1, 0, 0, 0]), metadata)) ) + # check parse_json + for key, json, obj in expected_values: + self.assertEqual(VariantVal.parseJson(json).toJson(), json) + self.assertEqual(VariantVal.parseJson(json).toPython(), obj) + + # compare the parse_json in Spark vs python. `json_str` contains all of `expected_values`. + parse_json_spark_output = variants[0] + parse_json_python_output = VariantVal.parseJson(json_str) + self.assertEqual(parse_json_spark_output.value, parse_json_python_output.value) + self.assertEqual(parse_json_spark_output.metadata, parse_json_python_output.metadata) + def test_to_ddl(self): schema = StructType().add("a", NullType()).add("b", BooleanType()).add("c", BinaryType()) self.assertEqual(schema.toDDL(), "a VOID,b BOOLEAN,c BINARY") diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index f40a8bf62b290..b913e05e16d2a 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -1770,6 +1770,15 @@ def toJson(self, zone_id: str = "UTC") -> str: """ return VariantUtils.to_json(self.value, self.metadata, zone_id) + @classmethod + def parseJson(cls, json_str: str) -> "VariantVal": + """ + Convert the VariantVal to a nested Python object of Python data types. + :return: Python representation of the Variant nested structure + """ + (value, metadata) = VariantUtils.parse_json(json_str) + return VariantVal(value, metadata) + _atomic_types: List[Type[DataType]] = [ StringType, diff --git a/python/pyspark/sql/variant_utils.py b/python/pyspark/sql/variant_utils.py index 40cc69c1f0961..3025523064e1d 100644 --- a/python/pyspark/sql/variant_utils.py +++ b/python/pyspark/sql/variant_utils.py @@ -21,7 +21,7 @@ import json import struct from array import array -from typing import Any, Callable, Dict, List, Tuple +from typing import Any, Callable, Dict, List, NamedTuple, Tuple from pyspark.errors import PySparkValueError from zoneinfo import ZoneInfo @@ -108,8 +108,25 @@ class VariantUtils: # string size) + (size bytes of string content). LONG_STR = 16 + VERSION = 1 + # The lower 4 bits of the first metadata byte contain the version. + VERSION_MASK = 0x0F + + U8_MAX = 0xFF + U16_MAX = 0xFFFF + U24_MAX = 0xFFFFFF + U24_SIZE = 3 U32_SIZE = 4 + I8_MAX = 0x7F + I8_MIN = -0x80 + I16_MAX = 0x7FFF + I16_MIN = -0x8000 + I32_MAX = 0x7FFFFFFF + I32_MIN = -0x80000000 + I64_MAX = 0x7FFFFFFFFFFFFFFF + I64_MIN = -0x8000000000000000 + EPOCH = datetime.datetime( year=1970, month=1, day=1, hour=0, minute=0, second=0, tzinfo=datetime.timezone.utc ) @@ -140,6 +157,15 @@ def to_python(cls, value: bytes, metadata: bytes) -> str: """ return cls._to_python(value, metadata, 0) + @classmethod + def parse_json(cls, json_str: str) -> Tuple[bytes, bytes]: + """ + Parses the JSON string and creates the Variant binary (value, metadata) + :return: tuple of 2 binary values (value, metadata) + """ + builder = VariantBuilder() + return builder.build(json_str) + @classmethod def _read_long(cls, data: bytes, pos: int, num_bytes: int, signed: bool) -> int: cls._check_index(pos, len(data)) @@ -468,7 +494,10 @@ def _handle_object( value, offset_start + offset_size * i, offset_size, signed=False ) value_pos = data_start + offset - key_value_pos_list.append((cls._get_metadata_key(metadata, id), value_pos)) + if metadata is not None: + key_value_pos_list.append((cls._get_metadata_key(metadata, id), value_pos)) + else: + key_value_pos_list.append(("", value_pos)) return func(key_value_pos_list) @classmethod @@ -496,3 +525,297 @@ def _handle_array(cls, value: bytes, pos: int, func: Callable[[List[int]], Any]) element_pos = data_start + offset value_pos_list.append(element_pos) return func(value_pos_list) + + +class FieldEntry(NamedTuple): + """ + Info about an object field + """ + + key: str + id: int + offset: int + + +class VariantBuilder: + """ + A utility class for building VariantVal. + """ + + DEFAULT_SIZE_LIMIT = 16 * 1024 * 1024 + + def __init__(self, size_limit: int = DEFAULT_SIZE_LIMIT): + self.value = bytearray() + self.dictionary = dict[str, int]() + self.dictionary_keys = list[bytes]() + self.size_limit = size_limit + + def build(self, json_str: str) -> Tuple[bytes, bytes]: + parsed = json.loads(json_str, parse_float=self._handle_float) + self._process_parsed_json(parsed) + + num_keys = len(self.dictionary_keys) + dictionary_string_size = sum(len(key) for key in self.dictionary_keys) + + # Determine the number of bytes required per offset entry. + # The largest offset is the one-past-the-end value, which is total string size. It's very + # unlikely that the number of keys could be larger, but incorporate that into the + # calculation in case of pathological data. + max_size = max(dictionary_string_size, num_keys) + if max_size > self.size_limit: + raise PySparkValueError(errorClass="VARIANT_SIZE_LIMIT_EXCEEDED", messageParameters={}) + offset_size = self._get_integer_size(max_size) + + offset_start = 1 + offset_size + string_start = offset_start + (num_keys + 1) * offset_size + metadata_size = string_start + dictionary_string_size + if metadata_size > self.size_limit: + raise PySparkValueError(errorClass="VARIANT_SIZE_LIMIT_EXCEEDED", messageParameters={}) + + metadata = bytearray() + header_byte = VariantUtils.VERSION | ((offset_size - 1) << 6) + metadata.extend(header_byte.to_bytes(1, byteorder="little")) + metadata.extend(num_keys.to_bytes(offset_size, byteorder="little")) + # write offsets + current_offset = 0 + for key in self.dictionary_keys: + metadata.extend(current_offset.to_bytes(offset_size, byteorder="little")) + current_offset += len(key) + metadata.extend(current_offset.to_bytes(offset_size, byteorder="little")) + # write key data + for key in self.dictionary_keys: + metadata.extend(key) + return (bytes(self.value), bytes(metadata)) + + def _process_parsed_json(self, parsed: Any) -> None: + if type(parsed) is dict: + fields = list[FieldEntry]() + start = len(self.value) + for key, value in parsed.items(): + id = self._add_key(key) + fields.append(FieldEntry(key, id, len(self.value) - start)) + self._process_parsed_json(value) + self._finish_writing_object(start, fields) + elif type(parsed) is list: + offsets = [] + start = len(self.value) + for elem in parsed: + offsets.append(len(self.value) - start) + self._process_parsed_json(elem) + self._finish_writing_array(start, offsets) + elif type(parsed) is str: + self._append_string(parsed) + elif type(parsed) is int: + if not self._append_int(parsed): + self._process_parsed_json(self._handle_float(str(parsed))) + elif type(parsed) is float: + self._append_float(parsed) + elif type(parsed) is decimal.Decimal: + self._append_decimal(parsed) + elif type(parsed) is bool: + self._append_boolean(parsed) + elif parsed is None: + self._append_null() + else: + raise PySparkValueError(errorClass="MALFORMED_VARIANT", messageParameters={}) + + # Choose the smallest unsigned integer type that can store `value`. It must be within + # [0, U24_MAX]. + def _get_integer_size(self, value: int) -> int: + if value <= VariantUtils.U8_MAX: + return 1 + if value <= VariantUtils.U16_MAX: + return 2 + return VariantUtils.U24_SIZE + + def _check_capacity(self, additional: int) -> None: + required = len(self.value) + additional + if required > self.size_limit: + raise PySparkValueError(errorClass="VARIANT_SIZE_LIMIT_EXCEEDED", messageParameters={}) + + def _primitive_header(self, type: int) -> bytes: + return bytes([(type << 2) | VariantUtils.PRIMITIVE]) + + def _short_string_header(self, size: int) -> bytes: + return bytes([size << 2 | VariantUtils.SHORT_STR]) + + def _array_header(self, large_size: bool, offset_size: int) -> bytes: + return bytes( + [ + ( + (large_size << (VariantUtils.BASIC_TYPE_BITS + 2)) + | ((offset_size - 1) << VariantUtils.BASIC_TYPE_BITS) + | VariantUtils.ARRAY + ) + ] + ) + + def _object_header(self, large_size: bool, id_size: int, offset_size: int) -> bytes: + return bytes( + [ + ( + (large_size << (VariantUtils.BASIC_TYPE_BITS + 4)) + | ((id_size - 1) << (VariantUtils.BASIC_TYPE_BITS + 2)) + | ((offset_size - 1) << VariantUtils.BASIC_TYPE_BITS) + | VariantUtils.OBJECT + ) + ] + ) + + # Add a key to the variant dictionary. If the key already exists, the dictionary is + # not modified. In either case, return the id of the key. + def _add_key(self, key: str) -> int: + if key in self.dictionary: + return self.dictionary[key] + id = len(self.dictionary_keys) + self.dictionary[key] = id + self.dictionary_keys.append(key.encode("utf-8")) + return id + + def _handle_float(self, num_str: str) -> Any: + # a float can be a decimal if it only contains digits, '-', or '-'. + if all([ch.isdecimal() or ch == "-" or ch == "." for ch in num_str]): + dec = decimal.Decimal(num_str) + precision = len(dec.as_tuple().digits) + scale = -int(dec.as_tuple().exponent) + + if ( + scale <= VariantUtils.MAX_DECIMAL16_PRECISION + and precision <= VariantUtils.MAX_DECIMAL16_PRECISION + ): + return dec + return float(num_str) + + def _append_boolean(self, b: bool) -> None: + self._check_capacity(1) + self.value.extend(self._primitive_header(VariantUtils.TRUE if b else VariantUtils.FALSE)) + + def _append_null(self) -> None: + self._check_capacity(1) + self.value.extend(self._primitive_header(VariantUtils.NULL)) + + def _append_string(self, s: str) -> None: + text = s.encode("utf-8") + long_str = len(text) > VariantUtils.MAX_SHORT_STR_SIZE + additional = (1 + VariantUtils.U32_SIZE) if long_str else 1 + self._check_capacity(additional + len(text)) + if long_str: + self.value.extend(self._primitive_header(VariantUtils.LONG_STR)) + self.value.extend(len(text).to_bytes(VariantUtils.U32_SIZE, byteorder="little")) + else: + self.value.extend(self._short_string_header(len(text))) + self.value.extend(text) + + def _append_int(self, i: int) -> bool: + self._check_capacity(1 + 8) + if i >= VariantUtils.I8_MIN and i <= VariantUtils.I8_MAX: + self.value.extend(self._primitive_header(VariantUtils.INT1)) + self.value.extend(i.to_bytes(1, byteorder="little", signed=True)) + elif i >= VariantUtils.I16_MIN and i <= VariantUtils.I16_MAX: + self.value.extend(self._primitive_header(VariantUtils.INT2)) + self.value.extend(i.to_bytes(2, byteorder="little", signed=True)) + elif i >= VariantUtils.I32_MIN and i <= VariantUtils.I32_MAX: + self.value.extend(self._primitive_header(VariantUtils.INT4)) + self.value.extend(i.to_bytes(4, byteorder="little", signed=True)) + elif i >= VariantUtils.I64_MIN and i <= VariantUtils.I64_MAX: + self.value.extend(self._primitive_header(VariantUtils.INT8)) + self.value.extend(i.to_bytes(8, byteorder="little", signed=True)) + else: + return False + return True + + # Append a decimal value to the variant builder. The caller should guarantee that its precision + # and scale fit into `MAX_DECIMAL16_PRECISION`. + def _append_decimal(self, d: decimal.Decimal) -> None: + self._check_capacity(2 + 16) + precision = len(d.as_tuple().digits) + scale = -int(d.as_tuple().exponent) + unscaled = int("".join(map(str, d.as_tuple().digits))) + unscaled = -unscaled if d < 0 else unscaled + if ( + scale <= VariantUtils.MAX_DECIMAL4_PRECISION + and precision <= VariantUtils.MAX_DECIMAL4_PRECISION + ): + self.value.extend(self._primitive_header(VariantUtils.DECIMAL4)) + self.value.extend(scale.to_bytes(1, byteorder="little")) + self.value.extend(unscaled.to_bytes(4, byteorder="little", signed=True)) + elif ( + scale <= VariantUtils.MAX_DECIMAL8_PRECISION + and precision <= VariantUtils.MAX_DECIMAL8_PRECISION + ): + self.value.extend(self._primitive_header(VariantUtils.DECIMAL8)) + self.value.extend(scale.to_bytes(1, byteorder="little")) + self.value.extend(unscaled.to_bytes(8, byteorder="little", signed=True)) + else: + assert ( + scale <= VariantUtils.MAX_DECIMAL16_PRECISION + and precision <= VariantUtils.MAX_DECIMAL16_PRECISION + ) + self.value.extend(self._primitive_header(VariantUtils.DECIMAL16)) + self.value.extend(scale.to_bytes(1, byteorder="little")) + self.value.extend(unscaled.to_bytes(16, byteorder="little", signed=True)) + + def _append_float(self, f: float) -> None: + self._check_capacity(1 + 8) + self.value.extend(self._primitive_header(VariantUtils.DOUBLE)) + self.value.extend(struct.pack(" None: + data_size = len(self.value) - start + num_offsets = len(offsets) + large_size = num_offsets > VariantUtils.U8_MAX + size_bytes = VariantUtils.U32_SIZE if large_size else 1 + offset_size = self._get_integer_size(data_size) + # The space for header byte, object size, and offset list. + header_size = 1 + size_bytes + (num_offsets + 1) * offset_size + self._check_capacity(header_size) + self.value.extend(bytearray(header_size)) + # Shift the just-written element data to make room for the header section. + self.value[start + header_size :] = bytes(self.value[start : start + data_size]) + # Write the header byte, num offsets + offset_start = start + 1 + size_bytes + self.value[start : start + 1] = self._array_header(large_size, offset_size) + self.value[start + 1 : offset_start] = num_offsets.to_bytes(size_bytes, byteorder="little") + # write offset list + offset_list = bytearray() + for offset in offsets: + offset_list.extend(offset.to_bytes(offset_size, byteorder="little")) + offset_list.extend(data_size.to_bytes(offset_size, byteorder="little")) + self.value[offset_start : offset_start + len(offset_list)] = offset_list + + # Finish writing a variant object after all of its fields have already been written. + def _finish_writing_object(self, start: int, fields: List[FieldEntry]) -> None: + num_fields = len(fields) + # object fields are from a python dictionary, so keys are already distinct + fields.sort(key=lambda f: f.key) + max_id = 0 + for field in fields: + max_id = max(max_id, field.id) + + data_size = len(self.value) - start + large_size = num_fields > VariantUtils.U8_MAX + size_bytes = VariantUtils.U32_SIZE if large_size else 1 + id_size = self._get_integer_size(max_id) + offset_size = self._get_integer_size(data_size) + # The space for header byte, object size, id list, and offset list. + header_size = 1 + size_bytes + num_fields * id_size + (num_fields + 1) * offset_size + self._check_capacity(header_size) + self.value.extend(bytearray(header_size)) + # Shift the just-written field data to make room for the object header section. + self.value[start + header_size :] = self.value[start : start + data_size] + # Write the header byte, num fields, id list, offset list + self.value[start : start + 1] = self._object_header(large_size, id_size, offset_size) + self.value[start + 1 : start + 1 + size_bytes] = num_fields.to_bytes( + size_bytes, byteorder="little" + ) + id_start = start + 1 + size_bytes + offset_start = id_start + num_fields * id_size + id_list = bytearray() + offset_list = bytearray() + for field in fields: + id_list.extend(field.id.to_bytes(id_size, byteorder="little")) + offset_list.extend(field.offset.to_bytes(offset_size, byteorder="little")) + offset_list.extend(data_size.to_bytes(offset_size, byteorder="little")) + self.value[id_start : id_start + len(id_list)] = id_list + self.value[offset_start : offset_start + len(offset_list)] = offset_list From bba6839d87144a251464bda410540e9877cbba2b Mon Sep 17 00:00:00 2001 From: Allison Wang Date: Tue, 14 Jan 2025 14:58:58 +0800 Subject: [PATCH 15/15] [SPARK-50762][SQL] Add Analyzer rule for resolving SQL scalar UDFs ### What changes were proposed in this pull request? This PR adds a new Analyzer rule `ResolveSQLFunctions` to resolve scalar SQL UDFs by replacing a `SQLFunctionExpression` with an actual function body. It currently supports the following operators: Project, Filter, Join and Aggregate. For example: ``` CREATE FUNCTION area(width DOUBLE, height DOUBLE) RETURNS DOUBLE RETURN width * height; ``` and this query ``` SELECT area(a, b) FROM t; ``` will be resolved as ``` Project [area(width, height) AS area] +- Project [a, b, CAST(a AS DOUBLE) AS width, CAST(b AS DOUBLE) AS height] +- Relation [a, b] ``` ### Why are the changes needed? To support SQL UDFs. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? New SQL query tests. More tests will be added once table function resolution is supported. ### Was this patch authored or co-authored using generative AI tooling? No Closes #49414 from allisonwang-db/spark-50762-resolve-scalar-udf. Authored-by: Allison Wang Signed-off-by: Wenchen Fan --- .../resources/error/error-conditions.json | 13 + .../catalyst/expressions/ExpressionInfo.java | 2 +- .../sql/catalyst/analysis/Analyzer.scala | 272 +++++++++ .../sql/catalyst/analysis/CheckAnalysis.scala | 2 + .../analysis/SQLFunctionExpression.scala | 53 +- .../sql/catalyst/catalog/SessionCatalog.scala | 103 +++- .../catalog/UserDefinedFunction.scala | 21 + .../optimizer/EliminateSQLFunctionNode.scala | 47 ++ .../sql/catalyst/optimizer/Optimizer.scala | 1 + .../sql/catalyst/trees/TreePatterns.scala | 1 + .../analyzer-results/sql-udf.sql.out | 575 ++++++++++++++++++ .../resources/sql-tests/inputs/sql-udf.sql | 122 ++++ .../sql-tests/results/sql-udf.sql.out | 484 +++++++++++++++ .../sql/execution/SQLFunctionSuite.scala | 61 ++ .../sql/expressions/ExpressionInfoSuite.scala | 3 +- 15 files changed, 1753 insertions(+), 7 deletions(-) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSQLFunctionNode.scala create mode 100644 sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf.sql.out create mode 100644 sql/core/src/test/resources/sql-tests/inputs/sql-udf.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/sql-udf.sql.out create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/SQLFunctionSuite.scala diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index 8b266e9d6ac11..5037b52475422 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -3126,6 +3126,13 @@ ], "sqlState" : "42K08" }, + "INVALID_SQL_FUNCTION_PLAN_STRUCTURE" : { + "message" : [ + "Invalid SQL function plan structure", + "" + ], + "sqlState" : "XXKD0" + }, "INVALID_SQL_SYNTAX" : { "message" : [ "Invalid SQL syntax:" @@ -5757,6 +5764,12 @@ ], "sqlState" : "0A000" }, + "UNSUPPORTED_SQL_UDF_USAGE" : { + "message" : [ + "Using SQL function in is not supported." + ], + "sqlState" : "0A000" + }, "UNSUPPORTED_STREAMING_OPERATOR_WITHOUT_WATERMARK" : { "message" : [ " output mode not supported for on streaming DataFrames/DataSets without watermark." diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java index 4200619d3c5f9..310d18ddb3486 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java @@ -51,7 +51,7 @@ public class ExpressionInfo { "window_funcs", "xml_funcs", "table_funcs", "url_funcs", "variant_funcs")); private static final Set validSources = - new HashSet<>(Arrays.asList("built-in", "hive", "python_udf", "scala_udf", + new HashSet<>(Arrays.asList("built-in", "hive", "python_udf", "scala_udf", "sql_udf", "java_udf", "python_udtf", "internal")); public String getClassName() { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 9282e0554a2d4..92cfc4119dd0c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -374,6 +374,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor BindProcedures :: ResolveTableSpec :: ValidateAndStripPipeExpressions :: + ResolveSQLFunctions :: ResolveAliases :: ResolveSubquery :: ResolveSubqueryColumnAliases :: @@ -2364,6 +2365,277 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor } } + /** + * This rule resolves SQL function expressions. It pulls out function inputs and place them + * in a separate [[Project]] node below the operator and replace the SQL function with its + * actual function body. SQL function expressions in [[Aggregate]] are handled in a special + * way. Non-aggregated SQL functions in the aggregate expressions of an Aggregate need to be + * pulled out into a Project above the Aggregate before replacing the SQL function expressions + * with actual function bodies. For example: + * + * Before: + * Aggregate [c1] [foo(c1), foo(max(c2)), sum(foo(c2)) AS sum] + * +- Relation [c1, c2] + * + * After: + * Project [foo(c1), foo(max_c2), sum] + * +- Aggregate [c1] [c1, max(c2) AS max_c2, sum(foo(c2)) AS sum] + * +- Relation [c1, c2] + */ + object ResolveSQLFunctions extends Rule[LogicalPlan] { + + private def hasSQLFunctionExpression(exprs: Seq[Expression]): Boolean = { + exprs.exists(_.find(_.isInstanceOf[SQLFunctionExpression]).nonEmpty) + } + + /** + * Check if the function input contains aggregate expressions. + */ + private def checkFunctionInput(f: SQLFunctionExpression): Unit = { + if (f.inputs.exists(AggregateExpression.containsAggregate)) { + // The input of a SQL function should not contain aggregate functions after + // `extractAndRewrite`. If there are aggregate functions, it means they are + // nested in another aggregate function, which is not allowed. + // For example: SELECT sum(foo(sum(c1))) FROM t + // We have to throw the error here because otherwise the query plan after + // resolving the SQL function will not be valid. + throw new AnalysisException( + errorClass = "NESTED_AGGREGATE_FUNCTION", + messageParameters = Map.empty) + } + } + + /** + * Resolve a SQL function expression as a logical plan check if it can be analyzed. + */ + private def resolve(f: SQLFunctionExpression): LogicalPlan = { + // Validate the SQL function input. + checkFunctionInput(f) + val plan = v1SessionCatalog.makeSQLFunctionPlan(f.name, f.function, f.inputs) + val resolved = SQLFunctionContext.withSQLFunction { + // Resolve the SQL function plan using its context. + val conf = new SQLConf() + f.function.getSQLConfigs.foreach { case (k, v) => conf.settings.put(k, v) } + SQLConf.withExistingConf(conf) { + executeSameContext(plan) + } + } + // Fail the analysis eagerly if a SQL function cannot be resolved using its input. + SimpleAnalyzer.checkAnalysis(resolved) + resolved + } + + /** + * Rewrite SQL function expressions into actual resolved function bodies and extract + * function inputs into the given project list. + */ + private def rewriteSQLFunctions[E <: Expression]( + expression: E, + projectList: ArrayBuffer[NamedExpression]): E = { + val newExpr = expression match { + case f: SQLFunctionExpression if !hasSQLFunctionExpression(f.inputs) && + // Make sure LateralColumnAliasReference in parameters is resolved and eliminated first. + // Otherwise, the projectList can contain the LateralColumnAliasReference, which will be + // pushed down to a Project without the 'referenced' alias by LCA present, leaving it + // unresolved. + !f.inputs.exists(_.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) => + withPosition(f) { + val plan = resolve(f) + // Extract the function input project list from the SQL function plan and + // inline the SQL function expression. + plan match { + case Project(body :: Nil, Project(aliases, _: LocalRelation)) => + projectList ++= aliases + SQLScalarFunction(f.function, aliases.map(_.toAttribute), body) + case o => + throw new AnalysisException( + errorClass = "INVALID_SQL_FUNCTION_PLAN_STRUCTURE", + messageParameters = Map("plan" -> o.toString)) + } + } + case o => o.mapChildren(rewriteSQLFunctions(_, projectList)) + } + newExpr.asInstanceOf[E] + } + + /** + * Check if the given expression contains expressions that should be extracted, + * i.e. non-aggregated SQL functions with non-foldable inputs. + */ + private def shouldExtract(e: Expression): Boolean = e match { + // Return false if the expression is already an aggregate expression. + case _: AggregateExpression => false + case _: SQLFunctionExpression => true + case _: LeafExpression => false + case o => o.children.exists(shouldExtract) + } + + /** + * Extract aggregate expressions from the given expression and replace + * them with attribute references. + * Example: + * Before: foo(c1) + foo(max(c2)) + max(foo(c2)) + * After: foo(c1) + foo(max_c2) + max_foo_c2 + * Extracted expressions: [c1, max(c2) AS max_c2, max(foo(c2)) AS max_foo_c2] + */ + private def extractAndRewrite[T <: Expression]( + expression: T, + extractedExprs: ArrayBuffer[NamedExpression]): T = { + val newExpr = expression match { + case e if !shouldExtract(e) => + val exprToAdd: NamedExpression = e match { + case o: OuterReference => Alias(o, toPrettySQL(o.e))() + case ne: NamedExpression => ne + case o => Alias(o, toPrettySQL(o))() + } + extractedExprs += exprToAdd + exprToAdd.toAttribute + case f: SQLFunctionExpression => + val newInputs = f.inputs.map(extractAndRewrite(_, extractedExprs)) + f.copy(inputs = newInputs) + case o => o.mapChildren(extractAndRewrite(_, extractedExprs)) + } + newExpr.asInstanceOf[T] + } + + /** + * Replace all [[SQLFunctionExpression]]s in an expression with attribute references + * from the aliasMap. + */ + private def replaceSQLFunctionWithAttr[T <: Expression]( + expr: T, + aliasMap: mutable.HashMap[Expression, Alias]): T = { + expr.transform { + case f: SQLFunctionExpression if aliasMap.contains(f.canonicalized) => + aliasMap(f.canonicalized).toAttribute + }.asInstanceOf[T] + } + + private def rewrite(plan: LogicalPlan): LogicalPlan = plan match { + // Return if a sub-tree does not contain SQLFunctionExpression. + case p: LogicalPlan if !p.containsPattern(SQL_FUNCTION_EXPRESSION) => p + + case f @ Filter(cond, a: Aggregate) + if !f.resolved || AggregateExpression.containsAggregate(cond) || + ResolveGroupingAnalytics.hasGroupingFunction(cond) || + cond.containsPattern(TEMP_RESOLVED_COLUMN) => + // If the filter's condition contains aggregate expressions or grouping expressions or temp + // resolved column, we cannot rewrite both the filter and the aggregate until they are + // resolved by ResolveAggregateFunctions or ResolveGroupingAnalytics, because rewriting SQL + // functions in aggregate can add an additional project on top of the aggregate + // which breaks the pattern matching in those rules. + f.copy(child = a.copy(child = rewrite(a.child))) + + case h @ UnresolvedHaving(_, a: Aggregate) => + // Similarly UnresolvedHaving should be resolved by ResolveAggregateFunctions first + // before rewriting aggregate. + h.copy(child = a.copy(child = rewrite(a.child))) + + case a: Aggregate if a.resolved && hasSQLFunctionExpression(a.expressions) => + val child = rewrite(a.child) + // Extract SQL functions in the grouping expressions and place them in a project list + // below the current aggregate. Also update their appearances in the aggregate expressions. + val bottomProjectList = ArrayBuffer.empty[NamedExpression] + val aliasMap = mutable.HashMap.empty[Expression, Alias] + val newGrouping = a.groupingExpressions.map { expr => + expr.transformDown { + case f: SQLFunctionExpression => + val alias = aliasMap.getOrElseUpdate(f.canonicalized, Alias(f, f.name)()) + bottomProjectList += alias + alias.toAttribute + } + } + val aggregateExpressions = a.aggregateExpressions.map( + replaceSQLFunctionWithAttr(_, aliasMap)) + + // Rewrite SQL functions in the aggregate expressions that are not wrapped in + // aggregate functions. They need to be extracted into a project list above the + // current aggregate. + val aggExprs = ArrayBuffer.empty[NamedExpression] + val topProjectList = aggregateExpressions.map(extractAndRewrite(_, aggExprs)) + + // Rewrite SQL functions in the new aggregate expressions that are wrapped inside + // aggregate functions. + val newAggExprs = aggExprs.map(rewriteSQLFunctions(_, bottomProjectList)) + + val bottomProject = if (bottomProjectList.nonEmpty) { + Project(child.output ++ bottomProjectList, child) + } else { + child + } + val newAgg = if (newGrouping.nonEmpty || newAggExprs.nonEmpty) { + a.copy( + groupingExpressions = newGrouping, + aggregateExpressions = newAggExprs.toSeq, + child = bottomProject) + } else { + bottomProject + } + if (topProjectList.nonEmpty) Project(topProjectList, newAgg) else newAgg + + case p: Project if p.resolved && hasSQLFunctionExpression(p.expressions) => + val newChild = rewrite(p.child) + val projectList = ArrayBuffer.empty[NamedExpression] + val newPList = p.projectList.map(rewriteSQLFunctions(_, projectList)) + if (newPList != newChild.output) { + p.copy(newPList, Project(newChild.output ++ projectList, newChild)) + } else { + assert(projectList.isEmpty) + p.copy(child = newChild) + } + + case f: Filter if f.resolved && hasSQLFunctionExpression(f.expressions) => + val newChild = rewrite(f.child) + val projectList = ArrayBuffer.empty[NamedExpression] + val newCond = rewriteSQLFunctions(f.condition, projectList) + if (newCond != f.condition) { + Project(f.output, Filter(newCond, Project(newChild.output ++ projectList, newChild))) + } else { + assert(projectList.isEmpty) + f.copy(child = newChild) + } + + case j: Join if j.resolved && hasSQLFunctionExpression(j.expressions) => + val newLeft = rewrite(j.left) + val newRight = rewrite(j.right) + val projectList = ArrayBuffer.empty[NamedExpression] + val joinCond = j.condition.map(rewriteSQLFunctions(_, projectList)) + if (joinCond != j.condition) { + // Join condition cannot have non-deterministic expressions. We can safely + // replace the aliases with the original SQL function input expressions. + val aliasMap = projectList.collect { case a: Alias => a.toAttribute -> a.child }.toMap + val newJoinCond = joinCond.map(_.transform { + case a: Attribute => aliasMap.getOrElse(a, a) + }) + j.copy(left = newLeft, right = newRight, condition = newJoinCond) + } else { + assert(projectList.isEmpty) + j.copy(left = newLeft, right = newRight) + } + + case o: LogicalPlan if o.resolved && hasSQLFunctionExpression(o.expressions) => + o.transformExpressionsWithPruning(_.containsPattern(SQL_FUNCTION_EXPRESSION)) { + case f: SQLFunctionExpression => + f.failAnalysis( + errorClass = "UNSUPPORTED_SQL_UDF_USAGE", + messageParameters = Map( + "functionName" -> toSQLId(f.function.name.nameParts), + "nodeName" -> o.nodeName.toString)) + } + + case p: LogicalPlan => p.mapChildren(rewrite) + } + + def apply(plan: LogicalPlan): LogicalPlan = { + // Only rewrite SQL functions when they are not in nested function calls. + if (SQLFunctionContext.get.nestedSQLFunctionDepth > 0) { + plan + } else { + rewrite(plan) + } + } + } + /** * Turns projections that contain aggregate expressions into aggregations. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 46ca8e793218b..0a68524c31241 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -1106,6 +1106,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB @scala.annotation.tailrec def cleanQueryInScalarSubquery(p: LogicalPlan): LogicalPlan = p match { case s: SubqueryAlias => cleanQueryInScalarSubquery(s.child) + // Skip SQL function node added by the Analyzer + case s: SQLFunctionNode => cleanQueryInScalarSubquery(s.child) case p: Project => cleanQueryInScalarSubquery(p.child) case h: ResolvedHint => cleanQueryInScalarSubquery(h.child) case child => child diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SQLFunctionExpression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SQLFunctionExpression.scala index fb6935d64d4c4..37981f47287da 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SQLFunctionExpression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SQLFunctionExpression.scala @@ -18,8 +18,8 @@ package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.catalog.SQLFunction -import org.apache.spark.sql.catalyst.expressions.{Expression, Unevaluable} -import org.apache.spark.sql.catalyst.trees.TreePattern.{SQL_FUNCTION_EXPRESSION, TreePattern} +import org.apache.spark.sql.catalyst.expressions.{Expression, UnaryExpression, Unevaluable} +import org.apache.spark.sql.catalyst.trees.TreePattern.{SQL_FUNCTION_EXPRESSION, SQL_SCALAR_FUNCTION, TreePattern} import org.apache.spark.sql.types.DataType /** @@ -39,3 +39,52 @@ case class SQLFunctionExpression( newChildren: IndexedSeq[Expression]): SQLFunctionExpression = copy(inputs = newChildren) final override val nodePatterns: Seq[TreePattern] = Seq(SQL_FUNCTION_EXPRESSION) } + +/** + * A wrapper node for a SQL scalar function expression. + */ +case class SQLScalarFunction(function: SQLFunction, inputs: Seq[Expression], child: Expression) + extends UnaryExpression with Unevaluable { + override def dataType: DataType = child.dataType + override def toString: String = s"${function.name}(${inputs.mkString(", ")})" + override def sql: String = s"${function.name}(${inputs.map(_.sql).mkString(", ")})" + override protected def withNewChildInternal(newChild: Expression): SQLScalarFunction = { + copy(child = newChild) + } + final override val nodePatterns: Seq[TreePattern] = Seq(SQL_SCALAR_FUNCTION) + // The `inputs` is for display only and does not matter in execution. + override lazy val canonicalized: Expression = copy(inputs = Nil, child = child.canonicalized) + override lazy val deterministic: Boolean = { + function.deterministic.getOrElse(true) && children.forall(_.deterministic) + } +} + +/** + * Provide a way to keep state during analysis for resolving nested SQL functions. + * + * @param nestedSQLFunctionDepth The nested depth in the SQL function resolution. A SQL function + * expression should only be expanded as a [[SQLScalarFunction]] if + * the nested depth is 0. + */ +case class SQLFunctionContext(nestedSQLFunctionDepth: Int = 0) + +object SQLFunctionContext { + + private val value = new ThreadLocal[SQLFunctionContext]() { + override def initialValue: SQLFunctionContext = SQLFunctionContext() + } + + def get: SQLFunctionContext = value.get() + + def reset(): Unit = value.remove() + + private def set(context: SQLFunctionContext): Unit = value.set(context) + + def withSQLFunction[A](f: => A): A = { + val originContext = value.get() + val context = originContext.copy( + nestedSQLFunctionDepth = originContext.nestedSQLFunctionDepth + 1) + set(context) + try f finally { set(originContext) } + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 3c6dfe5ac8445..b123952c5f086 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -38,9 +38,9 @@ import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder import org.apache.spark.sql.catalyst.analysis.TableFunctionRegistry.TableFunctionBuilder import org.apache.spark.sql.catalyst.catalog.SQLFunction.parseDefault -import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Cast, Expression, ExpressionInfo, NamedArgumentExpression, NamedExpression, UpCast} +import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Cast, Expression, ExpressionInfo, NamedArgumentExpression, NamedExpression, ScalarSubquery, UpCast} import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParserInterface} -import org.apache.spark.sql.catalyst.plans.logical.{FunctionSignature, InputParameter, LogicalPlan, NamedParametersSupport, Project, SubqueryAlias, View} +import org.apache.spark.sql.catalyst.plans.logical.{FunctionSignature, InputParameter, LocalRelation, LogicalPlan, NamedParametersSupport, Project, SubqueryAlias, View} import org.apache.spark.sql.catalyst.trees.CurrentOrigin import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, StringUtils} import org.apache.spark.sql.connector.catalog.CatalogManager @@ -48,7 +48,7 @@ import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAM import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE -import org.apache.spark.sql.types.{StructField, StructType} +import org.apache.spark.sql.types.{MetadataBuilder, StructField, StructType} import org.apache.spark.sql.util.{CaseInsensitiveStringMap, PartitioningUtils} import org.apache.spark.util.ArrayImplicits._ import org.apache.spark.util.Utils @@ -1561,6 +1561,103 @@ class SessionCatalog( } } + /** + * Constructs a scalar SQL function logical plan. The logical plan will be used to + * construct actual expression from the function inputs and body. + * + * The body of a scalar SQL function can either be an expression or a query returns + * one single column. + * + * Example scalar SQL function with an expression: + * + * CREATE FUNCTION area(width DOUBLE, height DOUBLE) RETURNS DOUBLE + * RETURN width * height; + * + * Query: + * + * SELECT area(a, b) FROM t; + * + * SQL function plan: + * + * Project [CAST(width * height AS DOUBLE) AS area] + * +- Project [CAST(a AS DOUBLE) AS width, CAST(b AS DOUBLE) AS height] + * +- LocalRelation [a, b] + * + * Example scalar SQL function with a subquery: + * + * CREATE FUNCTION foo(x INT) RETURNS INT + * RETURN SELECT SUM(b) FROM t WHERE x = a; + * + * SELECT foo(a) FROM t; + * + * SQL function plan: + * + * Project [scalar-subquery AS foo] + * : +- Aggregate [] [sum(b)] + * : +- Filter [outer(x) = a] + * : +- Relation [a, b] + * +- Project [CAST(a AS INT) AS x] + * +- LocalRelation [a, b] + */ + def makeSQLFunctionPlan( + name: String, + function: SQLFunction, + input: Seq[Expression]): LogicalPlan = { + def metaForFuncInputAlias = { + new MetadataBuilder() + .putString("__funcInputAlias", "true") + .build() + } + assert(!function.isTableFunc) + val funcName = function.name.funcName + + // Use captured SQL configs when parsing a SQL function. + val conf = new SQLConf() + function.getSQLConfigs.foreach { case (k, v) => conf.settings.put(k, v) } + SQLConf.withExistingConf(conf) { + val inputParam = function.inputParam + val returnType = function.getScalarFuncReturnType + val (expression, query) = function.getExpressionAndQuery(parser, isTableFunc = false) + assert(expression.isDefined || query.isDefined) + + // Check function arguments + val paramSize = inputParam.map(_.size).getOrElse(0) + if (input.size > paramSize) { + throw QueryCompilationErrors.wrongNumArgsError( + name, paramSize.toString, input.size) + } + + val inputs = inputParam.map { param => + // Attributes referencing the input parameters inside the function can use the + // function name as a qualifier. E.G.: + // `create function foo(a int) returns int return foo.a` + val qualifier = Seq(funcName) + val paddedInput = input ++ + param.takeRight(paramSize - input.size).map { p => + val defaultExpr = p.getDefault() + if (defaultExpr.isDefined) { + Cast(parseDefault(defaultExpr.get, parser), p.dataType) + } else { + throw QueryCompilationErrors.wrongNumArgsError( + name, paramSize.toString, input.size) + } + } + + paddedInput.zip(param.fields).map { + case (expr, param) => + Alias(Cast(expr, param.dataType), param.name)( + qualifier = qualifier, + // mark the alias as function input + explicitMetadata = Some(metaForFuncInputAlias)) + } + }.getOrElse(Nil) + + val body = if (query.isDefined) ScalarSubquery(query.get) else expression.get + Project(Alias(Cast(body, returnType), funcName)() :: Nil, + Project(inputs, LocalRelation(inputs.flatMap(_.references)))) + } + } + /** * Constructs a [[TableFunctionBuilder]] based on the provided class that represents a function. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/UserDefinedFunction.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/UserDefinedFunction.scala index b00cae22cf9c0..a76ca7b15c278 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/UserDefinedFunction.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/UserDefinedFunction.scala @@ -45,6 +45,14 @@ trait UserDefinedFunction { */ def properties: Map[String, String] + /** + * Get SQL configs from the function properties. + * Use this to restore the SQL configs that should be used for this function. + */ + def getSQLConfigs: Map[String, String] = { + UserDefinedFunction.propertiesToSQLConfigs(properties) + } + /** * Owner of the function */ @@ -142,4 +150,17 @@ object UserDefinedFunction { * Verify if the function is a [[UserDefinedFunction]]. */ def isUserDefinedFunction(className: String): Boolean = SQLFunction.isSQLFunction(className) + + /** + * Covert properties to SQL configs. + */ + def propertiesToSQLConfigs(properties: Map[String, String]): Map[String, String] = { + try { + for ((key, value) <- properties if key.startsWith(SQL_CONFIG_PREFIX)) + yield (key.substring(SQL_CONFIG_PREFIX.length), value) + } catch { + case e: Exception => throw SparkException.internalError( + "Corrupted user defined function SQL configs in catalog", cause = e) + } + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSQLFunctionNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSQLFunctionNode.scala new file mode 100644 index 0000000000000..d9da38b4c2af4 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSQLFunctionNode.scala @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.optimizer + +import org.apache.spark.SparkException +import org.apache.spark.sql.catalyst.analysis.{SQLFunctionExpression, SQLFunctionNode, SQLScalarFunction, SQLTableFunction} +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.rules.Rule + +/** + * This rule removes [[SQLScalarFunction]] and [[SQLFunctionNode]] wrapper. They are respected + * till the end of analysis stage because we want to see which part of an analyzed logical + * plan is generated from a SQL function and also perform ACL checks. + */ +object EliminateSQLFunctionNode extends Rule[LogicalPlan] { + override def apply(plan: LogicalPlan): LogicalPlan = { + // Include subqueries when eliminating SQL function expressions otherwise we might miss + // expressions in subqueries which can be inlined by the rule `OptimizeOneRowRelationSubquery`. + plan.transformWithSubqueries { + case SQLFunctionNode(_, child) => child + case f: SQLTableFunction => + throw SparkException.internalError( + s"SQL table function plan should be rewritten during analysis: $f") + case p: LogicalPlan => p.transformExpressions { + case f: SQLScalarFunction => f.child + case f: SQLFunctionExpression => + throw SparkException.internalError( + s"SQL function expression should be rewritten during analysis: $f") + } + } + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 8ee2226947ec9..9d269f37e58b9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -315,6 +315,7 @@ abstract class Optimizer(catalogManager: CatalogManager) EliminateSubqueryAliases, EliminatePipeOperators, EliminateView, + EliminateSQLFunctionNode, ReplaceExpressions, RewriteNonCorrelatedExists, PullOutGroupingExpressions, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala index b56085ecae8d6..9856a26346f6a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala @@ -93,6 +93,7 @@ object TreePattern extends Enumeration { val SESSION_WINDOW: Value = Value val SORT: Value = Value val SQL_FUNCTION_EXPRESSION: Value = Value + val SQL_SCALAR_FUNCTION: Value = Value val SQL_TABLE_FUNCTION: Value = Value val SUBQUERY_ALIAS: Value = Value val SUM: Value = Value diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf.sql.out new file mode 100644 index 0000000000000..b3c10e929f297 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf.sql.out @@ -0,0 +1,575 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE FUNCTION foo1a0() RETURNS INT RETURN 1 +-- !query analysis +org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException +{ + "errorClass" : "ROUTINE_ALREADY_EXISTS", + "sqlState" : "42723", + "messageParameters" : { + "existingRoutineType" : "routine", + "newRoutineType" : "routine", + "routineName" : "`default`.`foo1a0`" + } +} + + +-- !query +SELECT foo1a0() +-- !query analysis +Project [spark_catalog.default.foo1a0() AS spark_catalog.default.foo1a0()#x] ++- Project + +- OneRowRelation + + +-- !query +SELECT foo1a0(1) +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", + "sqlState" : "42605", + "messageParameters" : { + "actualNum" : "1", + "docroot" : "https://spark.apache.org/docs/latest", + "expectedNum" : "0", + "functionName" : "`spark_catalog`.`default`.`foo1a0`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 16, + "fragment" : "foo1a0(1)" + } ] +} + + +-- !query +CREATE FUNCTION foo1a1(a INT) RETURNS INT RETURN 1 +-- !query analysis +org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException +{ + "errorClass" : "ROUTINE_ALREADY_EXISTS", + "sqlState" : "42723", + "messageParameters" : { + "existingRoutineType" : "routine", + "newRoutineType" : "routine", + "routineName" : "`default`.`foo1a1`" + } +} + + +-- !query +SELECT foo1a1(1) +-- !query analysis +Project [spark_catalog.default.foo1a1(a#x) AS spark_catalog.default.foo1a1(1)#x] ++- Project [cast(1 as int) AS a#x] + +- OneRowRelation + + +-- !query +SELECT foo1a1(1, 2) +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", + "sqlState" : "42605", + "messageParameters" : { + "actualNum" : "2", + "docroot" : "https://spark.apache.org/docs/latest", + "expectedNum" : "1", + "functionName" : "`spark_catalog`.`default`.`foo1a1`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 19, + "fragment" : "foo1a1(1, 2)" + } ] +} + + +-- !query +CREATE FUNCTION foo1a2(a INT, b INT, c INT, d INT) RETURNS INT RETURN 1 +-- !query analysis +org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException +{ + "errorClass" : "ROUTINE_ALREADY_EXISTS", + "sqlState" : "42723", + "messageParameters" : { + "existingRoutineType" : "routine", + "newRoutineType" : "routine", + "routineName" : "`default`.`foo1a2`" + } +} + + +-- !query +SELECT foo1a2(1, 2, 3, 4) +-- !query analysis +Project [spark_catalog.default.foo1a2(a#x, b#x, c#x, d#x) AS spark_catalog.default.foo1a2(1, 2, 3, 4)#x] ++- Project [cast(1 as int) AS a#x, cast(2 as int) AS b#x, cast(3 as int) AS c#x, cast(4 as int) AS d#x] + +- OneRowRelation + + +-- !query +CREATE FUNCTION foo2_1a(a INT) RETURNS INT RETURN a +-- !query analysis +org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException +{ + "errorClass" : "ROUTINE_ALREADY_EXISTS", + "sqlState" : "42723", + "messageParameters" : { + "existingRoutineType" : "routine", + "newRoutineType" : "routine", + "routineName" : "`default`.`foo2_1a`" + } +} + + +-- !query +SELECT foo2_1a(5) +-- !query analysis +Project [spark_catalog.default.foo2_1a(a#x) AS spark_catalog.default.foo2_1a(5)#x] ++- Project [cast(5 as int) AS a#x] + +- OneRowRelation + + +-- !query +CREATE FUNCTION foo2_1b(a INT, b INT) RETURNS INT RETURN a + b +-- !query analysis +org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException +{ + "errorClass" : "ROUTINE_ALREADY_EXISTS", + "sqlState" : "42723", + "messageParameters" : { + "existingRoutineType" : "routine", + "newRoutineType" : "routine", + "routineName" : "`default`.`foo2_1b`" + } +} + + +-- !query +SELECT foo2_1b(5, 6) +-- !query analysis +Project [spark_catalog.default.foo2_1b(a#x, b#x) AS spark_catalog.default.foo2_1b(5, 6)#x] ++- Project [cast(5 as int) AS a#x, cast(6 as int) AS b#x] + +- OneRowRelation + + +-- !query +CREATE FUNCTION foo2_1c(a INT, b INT) RETURNS INT RETURN 10 * (a + b) + 100 * (a -b) +-- !query analysis +org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException +{ + "errorClass" : "ROUTINE_ALREADY_EXISTS", + "sqlState" : "42723", + "messageParameters" : { + "existingRoutineType" : "routine", + "newRoutineType" : "routine", + "routineName" : "`default`.`foo2_1c`" + } +} + + +-- !query +SELECT foo2_1c(5, 6) +-- !query analysis +Project [spark_catalog.default.foo2_1c(a#x, b#x) AS spark_catalog.default.foo2_1c(5, 6)#x] ++- Project [cast(5 as int) AS a#x, cast(6 as int) AS b#x] + +- OneRowRelation + + +-- !query +CREATE FUNCTION foo2_1d(a INT, b INT) RETURNS INT RETURN ABS(a) - LENGTH(CAST(b AS VARCHAR(10))) +-- !query analysis +org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException +{ + "errorClass" : "ROUTINE_ALREADY_EXISTS", + "sqlState" : "42723", + "messageParameters" : { + "existingRoutineType" : "routine", + "newRoutineType" : "routine", + "routineName" : "`default`.`foo2_1d`" + } +} + + +-- !query +SELECT foo2_1d(-5, 6) +-- !query analysis +Project [spark_catalog.default.foo2_1d(a#x, b#x) AS spark_catalog.default.foo2_1d(-5, 6)#x] ++- Project [cast(-5 as int) AS a#x, cast(6 as int) AS b#x] + +- OneRowRelation + + +-- !query +CREATE FUNCTION foo2_2a(a INT) RETURNS INT RETURN SELECT a +-- !query analysis +org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException +{ + "errorClass" : "ROUTINE_ALREADY_EXISTS", + "sqlState" : "42723", + "messageParameters" : { + "existingRoutineType" : "routine", + "newRoutineType" : "routine", + "routineName" : "`default`.`foo2_2a`" + } +} + + +-- !query +SELECT foo2_2a(5) +-- !query analysis +Project [spark_catalog.default.foo2_2a(a#x) AS spark_catalog.default.foo2_2a(5)#x] ++- Project [cast(5 as int) AS a#x] + +- OneRowRelation + + +-- !query +CREATE FUNCTION foo2_2b(a INT) RETURNS INT RETURN 1 + (SELECT a) +-- !query analysis +org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException +{ + "errorClass" : "ROUTINE_ALREADY_EXISTS", + "sqlState" : "42723", + "messageParameters" : { + "existingRoutineType" : "routine", + "newRoutineType" : "routine", + "routineName" : "`default`.`foo2_2b`" + } +} + + +-- !query +SELECT foo2_2b(5) +-- !query analysis +Project [spark_catalog.default.foo2_2b(a#x) AS spark_catalog.default.foo2_2b(5)#x] +: +- Project [outer(a#x)] +: +- OneRowRelation ++- Project [cast(5 as int) AS a#x] + +- OneRowRelation + + +-- !query +CREATE FUNCTION foo2_2c(a INT) RETURNS INT RETURN 1 + (SELECT (SELECT a)) +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`a`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 21, + "stopIndex" : 21, + "fragment" : "a" + } ] +} + + +-- !query +CREATE FUNCTION foo2_2d(a INT) RETURNS INT RETURN 1 + (SELECT (SELECT (SELECT (SELECT a)))) +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`a`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 37, + "stopIndex" : 37, + "fragment" : "a" + } ] +} + + +-- !query +CREATE FUNCTION foo2_2e(a INT) RETURNS INT RETURN +SELECT a FROM (VALUES 1) AS V(c1) WHERE c1 = 2 +UNION ALL +SELECT a + 1 FROM (VALUES 1) AS V(c1) +-- !query analysis +org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException +{ + "errorClass" : "ROUTINE_ALREADY_EXISTS", + "sqlState" : "42723", + "messageParameters" : { + "existingRoutineType" : "routine", + "newRoutineType" : "routine", + "routineName" : "`default`.`foo2_2e`" + } +} + + +-- !query +CREATE FUNCTION foo2_2f(a INT) RETURNS INT RETURN +SELECT a FROM (VALUES 1) AS V(c1) +EXCEPT +SELECT a + 1 FROM (VALUES 1) AS V(a) +-- !query analysis +org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException +{ + "errorClass" : "ROUTINE_ALREADY_EXISTS", + "sqlState" : "42723", + "messageParameters" : { + "existingRoutineType" : "routine", + "newRoutineType" : "routine", + "routineName" : "`default`.`foo2_2f`" + } +} + + +-- !query +CREATE FUNCTION foo2_2g(a INT) RETURNS INT RETURN +SELECT a FROM (VALUES 1) AS V(c1) +INTERSECT +SELECT a FROM (VALUES 1) AS V(a) +-- !query analysis +org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException +{ + "errorClass" : "ROUTINE_ALREADY_EXISTS", + "sqlState" : "42723", + "messageParameters" : { + "existingRoutineType" : "routine", + "newRoutineType" : "routine", + "routineName" : "`default`.`foo2_2g`" + } +} + + +-- !query +DROP TABLE IF EXISTS t1 +-- !query analysis +DropTable true, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t1 + + +-- !query +DROP TABLE IF EXISTS t2 +-- !query analysis +DropTable true, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t2 + + +-- !query +DROP TABLE IF EXISTS ts +-- !query analysis +DropTable true, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.ts + + +-- !query +DROP TABLE IF EXISTS tm +-- !query analysis +DropTable true, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.tm + + +-- !query +DROP TABLE IF EXISTS ta +-- !query analysis +DropTable true, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.ta + + +-- !query +DROP TABLE IF EXISTS V1 +-- !query analysis +DropTable true, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.V1 + + +-- !query +DROP TABLE IF EXISTS V2 +-- !query analysis +DropTable true, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.V2 + + +-- !query +DROP VIEW IF EXISTS t1 +-- !query analysis +DropTableCommand `spark_catalog`.`default`.`t1`, true, true, false + + +-- !query +DROP VIEW IF EXISTS t2 +-- !query analysis +DropTableCommand `spark_catalog`.`default`.`t2`, true, true, false + + +-- !query +DROP VIEW IF EXISTS ts +-- !query analysis +DropTableCommand `spark_catalog`.`default`.`ts`, true, true, false + + +-- !query +DROP VIEW IF EXISTS tm +-- !query analysis +DropTableCommand `spark_catalog`.`default`.`tm`, true, true, false + + +-- !query +DROP VIEW IF EXISTS ta +-- !query analysis +DropTableCommand `spark_catalog`.`default`.`ta`, true, true, false + + +-- !query +DROP VIEW IF EXISTS V1 +-- !query analysis +DropTableCommand `spark_catalog`.`default`.`V1`, true, true, false + + +-- !query +DROP VIEW IF EXISTS V2 +-- !query analysis +DropTableCommand `spark_catalog`.`default`.`V2`, true, true, false + + +-- !query +CREATE FUNCTION foo2_3(a INT, b INT) RETURNS INT RETURN a + b +-- !query analysis +org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException +{ + "errorClass" : "ROUTINE_ALREADY_EXISTS", + "sqlState" : "42723", + "messageParameters" : { + "existingRoutineType" : "routine", + "newRoutineType" : "routine", + "routineName" : "`default`.`foo2_3`" + } +} + + +-- !query +CREATE VIEW V1(c1, c2) AS VALUES (1, 2), (3, 4), (5, 6) +-- !query analysis +CreateViewCommand `spark_catalog`.`default`.`V1`, [(c1,None), (c2,None)], VALUES (1, 2), (3, 4), (5, 6), false, false, PersistedView, COMPENSATION, true + +- LocalRelation [col1#x, col2#x] + + +-- !query +CREATE VIEW V2(c1, c2) AS VALUES (-1, -2), (-3, -4), (-5, -6) +-- !query analysis +CreateViewCommand `spark_catalog`.`default`.`V2`, [(c1,None), (c2,None)], VALUES (-1, -2), (-3, -4), (-5, -6), false, false, PersistedView, COMPENSATION, true + +- LocalRelation [col1#x, col2#x] + + +-- !query +SELECT foo2_3(c1, c2), foo2_3(c2, 1), foo2_3(c1, c2) - foo2_3(c2, c1 - 1) FROM V1 ORDER BY 1, 2, 3 +-- !query analysis +Sort [spark_catalog.default.foo2_3(c1, c2)#x ASC NULLS FIRST, spark_catalog.default.foo2_3(c2, 1)#x ASC NULLS FIRST, (spark_catalog.default.foo2_3(c1, c2) - spark_catalog.default.foo2_3(c2, (c1 - 1)))#x ASC NULLS FIRST], true ++- Project [spark_catalog.default.foo2_3(a#x, b#x) AS spark_catalog.default.foo2_3(c1, c2)#x, spark_catalog.default.foo2_3(a#x, b#x) AS spark_catalog.default.foo2_3(c2, 1)#x, (spark_catalog.default.foo2_3(a#x, b#x) - spark_catalog.default.foo2_3(a#x, b#x)) AS (spark_catalog.default.foo2_3(c1, c2) - spark_catalog.default.foo2_3(c2, (c1 - 1)))#x] + +- Project [c1#x, c2#x, cast(c1#x as int) AS a#x, cast(c2#x as int) AS b#x, cast(c2#x as int) AS a#x, cast(1 as int) AS b#x, cast(c1#x as int) AS a#x, cast(c2#x as int) AS b#x, cast(c2#x as int) AS a#x, cast((c1#x - 1) as int) AS b#x] + +- SubqueryAlias spark_catalog.default.v1 + +- View (`spark_catalog`.`default`.`v1`, [c1#x, c2#x]) + +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x] + +- LocalRelation [col1#x, col2#x] + + +-- !query +SELECT * FROM V1 WHERE foo2_3(c1, 0) = c1 AND foo2_3(c1, c2) < 8 +-- !query analysis +Project [c1#x, c2#x] ++- Project [c1#x, c2#x] + +- Filter ((spark_catalog.default.foo2_3(a#x, b#x) = c1#x) AND (spark_catalog.default.foo2_3(a#x, b#x) < 8)) + +- Project [c1#x, c2#x, cast(c1#x as int) AS a#x, cast(0 as int) AS b#x, cast(c1#x as int) AS a#x, cast(c2#x as int) AS b#x] + +- SubqueryAlias spark_catalog.default.v1 + +- View (`spark_catalog`.`default`.`v1`, [c1#x, c2#x]) + +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x] + +- LocalRelation [col1#x, col2#x] + + +-- !query +SELECT foo2_3(SUM(c1), SUM(c2)), SUM(c1) + SUM(c2), SUM(foo2_3(c1, c2) + foo2_3(c2, c1) - foo2_3(c2, c1)) +FROM V1 +-- !query analysis +Project [spark_catalog.default.foo2_3(a#x, b#x) AS spark_catalog.default.foo2_3(sum(c1), sum(c2))#x, (sum(c1) + sum(c2))#xL, sum(((spark_catalog.default.foo2_3(c1, c2) + spark_catalog.default.foo2_3(c2, c1)) - spark_catalog.default.foo2_3(c2, c1)))#xL] ++- Project [sum(c1)#xL, sum(c2)#xL, (sum(c1) + sum(c2))#xL, sum(((spark_catalog.default.foo2_3(c1, c2) + spark_catalog.default.foo2_3(c2, c1)) - spark_catalog.default.foo2_3(c2, c1)))#xL, cast(sum(c1)#xL as int) AS a#x, cast(sum(c2)#xL as int) AS b#x] + +- Aggregate [sum(c1#x) AS sum(c1)#xL, sum(c2#x) AS sum(c2)#xL, (sum(c1#x) + sum(c2#x)) AS (sum(c1) + sum(c2))#xL, sum(((spark_catalog.default.foo2_3(a#x, b#x) + spark_catalog.default.foo2_3(a#x, b#x)) - spark_catalog.default.foo2_3(a#x, b#x))) AS sum(((spark_catalog.default.foo2_3(c1, c2) + spark_catalog.default.foo2_3(c2, c1)) - spark_catalog.default.foo2_3(c2, c1)))#xL] + +- Project [c1#x, c2#x, cast(c1#x as int) AS a#x, cast(c2#x as int) AS b#x, cast(c2#x as int) AS a#x, cast(c1#x as int) AS b#x, cast(c2#x as int) AS a#x, cast(c1#x as int) AS b#x] + +- SubqueryAlias spark_catalog.default.v1 + +- View (`spark_catalog`.`default`.`v1`, [c1#x, c2#x]) + +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x] + +- LocalRelation [col1#x, col2#x] + + +-- !query +CREATE FUNCTION foo2_4a(a ARRAY) RETURNS STRING RETURN +SELECT array_sort(a, (i, j) -> rank[i] - rank[j])[0] FROM (SELECT MAP('a', 1, 'b', 2) rank) +-- !query analysis +org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException +{ + "errorClass" : "ROUTINE_ALREADY_EXISTS", + "sqlState" : "42723", + "messageParameters" : { + "existingRoutineType" : "routine", + "newRoutineType" : "routine", + "routineName" : "`default`.`foo2_4a`" + } +} + + +-- !query +SELECT foo2_4a(ARRAY('a', 'b')) +-- !query analysis +Project [spark_catalog.default.foo2_4a(a#x) AS spark_catalog.default.foo2_4a(array(a, b))#x] +: +- Project [array_sort(outer(a#x), lambdafunction((rank#x[lambda i#x] - rank#x[lambda j#x]), lambda i#x, lambda j#x, false), false)[0] AS array_sort(outer(foo2_4a.a), lambdafunction((rank[namedlambdavariable()] - rank[namedlambdavariable()]), namedlambdavariable(), namedlambdavariable()))[0]#x] +: +- SubqueryAlias __auto_generated_subquery_name +: +- Project [map(a, 1, b, 2) AS rank#x] +: +- OneRowRelation ++- Project [cast(array(a, b) as array) AS a#x] + +- OneRowRelation + + +-- !query +CREATE FUNCTION foo2_4b(m MAP, k STRING) RETURNS STRING RETURN +SELECT v || ' ' || v FROM (SELECT upper(m[k]) AS v) +-- !query analysis +org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException +{ + "errorClass" : "ROUTINE_ALREADY_EXISTS", + "sqlState" : "42723", + "messageParameters" : { + "existingRoutineType" : "routine", + "newRoutineType" : "routine", + "routineName" : "`default`.`foo2_4b`" + } +} + + +-- !query +SELECT foo2_4b(map('a', 'hello', 'b', 'world'), 'a') +-- !query analysis +Project [spark_catalog.default.foo2_4b(m#x, k#x) AS spark_catalog.default.foo2_4b(map(a, hello, b, world), a)#x] +: +- Project [concat(concat(v#x, ), v#x) AS concat(concat(v, ), v)#x] +: +- SubqueryAlias __auto_generated_subquery_name +: +- Project [upper(outer(m#x)[outer(k#x)]) AS v#x] +: +- OneRowRelation ++- Project [cast(map(a, hello, b, world) as map) AS m#x, cast(a as string) AS k#x] + +- OneRowRelation + + +-- !query +DROP VIEW V2 +-- !query analysis +DropTableCommand `spark_catalog`.`default`.`V2`, false, true, false + + +-- !query +DROP VIEW V1 +-- !query analysis +DropTableCommand `spark_catalog`.`default`.`V1`, false, true, false diff --git a/sql/core/src/test/resources/sql-tests/inputs/sql-udf.sql b/sql/core/src/test/resources/sql-tests/inputs/sql-udf.sql new file mode 100644 index 0000000000000..34cb41d726766 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/sql-udf.sql @@ -0,0 +1,122 @@ +-- test cases for SQL User Defined Functions + +-- 1. CREATE FUNCTION +-- 1.1 Parameter +-- 1.1.a A scalar function with various numbers of parameter +-- Expect success +CREATE FUNCTION foo1a0() RETURNS INT RETURN 1; +-- Expect: 1 +SELECT foo1a0(); +-- Expect failure +SELECT foo1a0(1); + +CREATE FUNCTION foo1a1(a INT) RETURNS INT RETURN 1; +-- Expect: 1 +SELECT foo1a1(1); +-- Expect failure +SELECT foo1a1(1, 2); + +CREATE FUNCTION foo1a2(a INT, b INT, c INT, d INT) RETURNS INT RETURN 1; +-- Expect: 1 +SELECT foo1a2(1, 2, 3, 4); + +------------------------------- +-- 2. Scalar SQL UDF +-- 2.1 deterministic simple expressions +CREATE FUNCTION foo2_1a(a INT) RETURNS INT RETURN a; +SELECT foo2_1a(5); + +CREATE FUNCTION foo2_1b(a INT, b INT) RETURNS INT RETURN a + b; +SELECT foo2_1b(5, 6); + +CREATE FUNCTION foo2_1c(a INT, b INT) RETURNS INT RETURN 10 * (a + b) + 100 * (a -b); +SELECT foo2_1c(5, 6); + +CREATE FUNCTION foo2_1d(a INT, b INT) RETURNS INT RETURN ABS(a) - LENGTH(CAST(b AS VARCHAR(10))); +SELECT foo2_1d(-5, 6); + +-- 2.2 deterministic complex expression with subqueries +-- 2.2.1 Nested Scalar subqueries +CREATE FUNCTION foo2_2a(a INT) RETURNS INT RETURN SELECT a; +SELECT foo2_2a(5); + +CREATE FUNCTION foo2_2b(a INT) RETURNS INT RETURN 1 + (SELECT a); +SELECT foo2_2b(5); + +-- Expect error: deep correlation is not yet supported +CREATE FUNCTION foo2_2c(a INT) RETURNS INT RETURN 1 + (SELECT (SELECT a)); +-- SELECT foo2_2c(5); + +-- Expect error: deep correlation is not yet supported +CREATE FUNCTION foo2_2d(a INT) RETURNS INT RETURN 1 + (SELECT (SELECT (SELECT (SELECT a)))); +-- SELECT foo2_2d(5); + +-- 2.2.2 Set operations +-- Expect error: correlated scalar subquery must be aggregated. +CREATE FUNCTION foo2_2e(a INT) RETURNS INT RETURN +SELECT a FROM (VALUES 1) AS V(c1) WHERE c1 = 2 +UNION ALL +SELECT a + 1 FROM (VALUES 1) AS V(c1); +-- SELECT foo2_2e(5); + +-- Expect error: correlated scalar subquery must be aggregated. +CREATE FUNCTION foo2_2f(a INT) RETURNS INT RETURN +SELECT a FROM (VALUES 1) AS V(c1) +EXCEPT +SELECT a + 1 FROM (VALUES 1) AS V(a); +-- SELECT foo2_2f(5); + +-- Expect error: correlated scalar subquery must be aggregated. +CREATE FUNCTION foo2_2g(a INT) RETURNS INT RETURN +SELECT a FROM (VALUES 1) AS V(c1) +INTERSECT +SELECT a FROM (VALUES 1) AS V(a); +-- SELECT foo2_2g(5); + +-- Prepare by dropping views or tables if they already exist. +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS ts; +DROP TABLE IF EXISTS tm; +DROP TABLE IF EXISTS ta; +DROP TABLE IF EXISTS V1; +DROP TABLE IF EXISTS V2; +DROP VIEW IF EXISTS t1; +DROP VIEW IF EXISTS t2; +DROP VIEW IF EXISTS ts; +DROP VIEW IF EXISTS tm; +DROP VIEW IF EXISTS ta; +DROP VIEW IF EXISTS V1; +DROP VIEW IF EXISTS V2; + +-- 2.3 Calling Scalar UDF from various places +CREATE FUNCTION foo2_3(a INT, b INT) RETURNS INT RETURN a + b; +CREATE VIEW V1(c1, c2) AS VALUES (1, 2), (3, 4), (5, 6); +CREATE VIEW V2(c1, c2) AS VALUES (-1, -2), (-3, -4), (-5, -6); + +-- 2.3.1 Multiple times in the select list +SELECT foo2_3(c1, c2), foo2_3(c2, 1), foo2_3(c1, c2) - foo2_3(c2, c1 - 1) FROM V1 ORDER BY 1, 2, 3; + +-- 2.3.2 In the WHERE clause +SELECT * FROM V1 WHERE foo2_3(c1, 0) = c1 AND foo2_3(c1, c2) < 8; + +-- 2.3.3 Different places around an aggregate +SELECT foo2_3(SUM(c1), SUM(c2)), SUM(c1) + SUM(c2), SUM(foo2_3(c1, c2) + foo2_3(c2, c1) - foo2_3(c2, c1)) +FROM V1; + +-- 2.4 Scalar UDF with complex one row relation subquery +-- 2.4.1 higher order functions +CREATE FUNCTION foo2_4a(a ARRAY) RETURNS STRING RETURN +SELECT array_sort(a, (i, j) -> rank[i] - rank[j])[0] FROM (SELECT MAP('a', 1, 'b', 2) rank); + +SELECT foo2_4a(ARRAY('a', 'b')); + +-- 2.4.2 built-in functions +CREATE FUNCTION foo2_4b(m MAP, k STRING) RETURNS STRING RETURN +SELECT v || ' ' || v FROM (SELECT upper(m[k]) AS v); + +SELECT foo2_4b(map('a', 'hello', 'b', 'world'), 'a'); + +-- Clean up +DROP VIEW V2; +DROP VIEW V1; diff --git a/sql/core/src/test/resources/sql-tests/results/sql-udf.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-udf.sql.out new file mode 100644 index 0000000000000..9f7af7c644871 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/sql-udf.sql.out @@ -0,0 +1,484 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE FUNCTION foo1a0() RETURNS INT RETURN 1 +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT foo1a0() +-- !query schema +struct +-- !query output +1 + + +-- !query +SELECT foo1a0(1) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", + "sqlState" : "42605", + "messageParameters" : { + "actualNum" : "1", + "docroot" : "https://spark.apache.org/docs/latest", + "expectedNum" : "0", + "functionName" : "`spark_catalog`.`default`.`foo1a0`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 16, + "fragment" : "foo1a0(1)" + } ] +} + + +-- !query +CREATE FUNCTION foo1a1(a INT) RETURNS INT RETURN 1 +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT foo1a1(1) +-- !query schema +struct +-- !query output +1 + + +-- !query +SELECT foo1a1(1, 2) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", + "sqlState" : "42605", + "messageParameters" : { + "actualNum" : "2", + "docroot" : "https://spark.apache.org/docs/latest", + "expectedNum" : "1", + "functionName" : "`spark_catalog`.`default`.`foo1a1`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 19, + "fragment" : "foo1a1(1, 2)" + } ] +} + + +-- !query +CREATE FUNCTION foo1a2(a INT, b INT, c INT, d INT) RETURNS INT RETURN 1 +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT foo1a2(1, 2, 3, 4) +-- !query schema +struct +-- !query output +1 + + +-- !query +CREATE FUNCTION foo2_1a(a INT) RETURNS INT RETURN a +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT foo2_1a(5) +-- !query schema +struct +-- !query output +5 + + +-- !query +CREATE FUNCTION foo2_1b(a INT, b INT) RETURNS INT RETURN a + b +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT foo2_1b(5, 6) +-- !query schema +struct +-- !query output +11 + + +-- !query +CREATE FUNCTION foo2_1c(a INT, b INT) RETURNS INT RETURN 10 * (a + b) + 100 * (a -b) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT foo2_1c(5, 6) +-- !query schema +struct +-- !query output +10 + + +-- !query +CREATE FUNCTION foo2_1d(a INT, b INT) RETURNS INT RETURN ABS(a) - LENGTH(CAST(b AS VARCHAR(10))) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT foo2_1d(-5, 6) +-- !query schema +struct +-- !query output +4 + + +-- !query +CREATE FUNCTION foo2_2a(a INT) RETURNS INT RETURN SELECT a +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT foo2_2a(5) +-- !query schema +struct +-- !query output +5 + + +-- !query +CREATE FUNCTION foo2_2b(a INT) RETURNS INT RETURN 1 + (SELECT a) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT foo2_2b(5) +-- !query schema +struct +-- !query output +6 + + +-- !query +CREATE FUNCTION foo2_2c(a INT) RETURNS INT RETURN 1 + (SELECT (SELECT a)) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`a`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 21, + "stopIndex" : 21, + "fragment" : "a" + } ] +} + + +-- !query +CREATE FUNCTION foo2_2d(a INT) RETURNS INT RETURN 1 + (SELECT (SELECT (SELECT (SELECT a)))) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`a`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 37, + "stopIndex" : 37, + "fragment" : "a" + } ] +} + + +-- !query +CREATE FUNCTION foo2_2e(a INT) RETURNS INT RETURN +SELECT a FROM (VALUES 1) AS V(c1) WHERE c1 = 2 +UNION ALL +SELECT a + 1 FROM (VALUES 1) AS V(c1) +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE FUNCTION foo2_2f(a INT) RETURNS INT RETURN +SELECT a FROM (VALUES 1) AS V(c1) +EXCEPT +SELECT a + 1 FROM (VALUES 1) AS V(a) +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE FUNCTION foo2_2g(a INT) RETURNS INT RETURN +SELECT a FROM (VALUES 1) AS V(c1) +INTERSECT +SELECT a FROM (VALUES 1) AS V(a) +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE IF EXISTS t1 +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE IF EXISTS t2 +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE IF EXISTS ts +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE IF EXISTS tm +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE IF EXISTS ta +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE IF EXISTS V1 +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE IF EXISTS V2 +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP VIEW IF EXISTS t1 +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP VIEW IF EXISTS t2 +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP VIEW IF EXISTS ts +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP VIEW IF EXISTS tm +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP VIEW IF EXISTS ta +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP VIEW IF EXISTS V1 +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP VIEW IF EXISTS V2 +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE FUNCTION foo2_3(a INT, b INT) RETURNS INT RETURN a + b +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE VIEW V1(c1, c2) AS VALUES (1, 2), (3, 4), (5, 6) +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE VIEW V2(c1, c2) AS VALUES (-1, -2), (-3, -4), (-5, -6) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT foo2_3(c1, c2), foo2_3(c2, 1), foo2_3(c1, c2) - foo2_3(c2, c1 - 1) FROM V1 ORDER BY 1, 2, 3 +-- !query schema +struct +-- !query output +3 3 1 +7 5 1 +11 7 1 + + +-- !query +SELECT * FROM V1 WHERE foo2_3(c1, 0) = c1 AND foo2_3(c1, c2) < 8 +-- !query schema +struct +-- !query output +1 2 +3 4 + + +-- !query +SELECT foo2_3(SUM(c1), SUM(c2)), SUM(c1) + SUM(c2), SUM(foo2_3(c1, c2) + foo2_3(c2, c1) - foo2_3(c2, c1)) +FROM V1 +-- !query schema +struct +-- !query output +21 21 21 + + +-- !query +CREATE FUNCTION foo2_4a(a ARRAY) RETURNS STRING RETURN +SELECT array_sort(a, (i, j) -> rank[i] - rank[j])[0] FROM (SELECT MAP('a', 1, 'b', 2) rank) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT foo2_4a(ARRAY('a', 'b')) +-- !query schema +struct +-- !query output +a + + +-- !query +CREATE FUNCTION foo2_4b(m MAP, k STRING) RETURNS STRING RETURN +SELECT v || ' ' || v FROM (SELECT upper(m[k]) AS v) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT foo2_4b(map('a', 'hello', 'b', 'world'), 'a') +-- !query schema +struct +-- !query output +HELLO HELLO + + +-- !query +DROP VIEW V2 +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP VIEW V1 +-- !query schema +struct<> +-- !query output + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLFunctionSuite.scala new file mode 100644 index 0000000000000..4da3b9ab1d06b --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLFunctionSuite.scala @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution + +import org.apache.spark.sql.{QueryTest, Row} +import org.apache.spark.sql.test.SharedSparkSession + +/** + * Test suite for SQL user-defined functions (UDFs). + */ +class SQLFunctionSuite extends QueryTest with SharedSparkSession { + import testImplicits._ + + protected override def beforeAll(): Unit = { + super.beforeAll() + Seq((0, 1), (1, 2)).toDF("a", "b").createOrReplaceTempView("t") + } + + test("SQL scalar function") { + withUserDefinedFunction("area" -> false) { + sql( + """ + |CREATE FUNCTION area(width DOUBLE, height DOUBLE) + |RETURNS DOUBLE + |RETURN width * height + |""".stripMargin) + checkAnswer(sql("SELECT area(1, 2)"), Row(2)) + checkAnswer(sql("SELECT area(a, b) FROM t"), Seq(Row(0), Row(2))) + } + } + + test("SQL scalar function with subquery in the function body") { + withUserDefinedFunction("foo" -> false) { + withTable("tbl") { + sql("CREATE TABLE tbl AS SELECT * FROM VALUES (1, 2), (1, 3), (2, 3) t(a, b)") + sql( + """ + |CREATE FUNCTION foo(x INT) RETURNS INT + |RETURN SELECT SUM(b) FROM tbl WHERE x = a; + |""".stripMargin) + checkAnswer(sql("SELECT foo(1)"), Row(5)) + checkAnswer(sql("SELECT foo(a) FROM t"), Seq(Row(null), Row(5))) + } + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala index c00f00ceaa355..a7af22a0554e9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala @@ -79,7 +79,8 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession { assert(info.getSource === "built-in") val validSources = Seq( - "built-in", "hive", "python_udf", "scala_udf", "java_udf", "python_udtf", "internal") + "built-in", "hive", "python_udf", "scala_udf", "java_udf", "python_udtf", "internal", + "sql_udf") validSources.foreach { source => val info = new ExpressionInfo( "testClass", null, "testName", null, "", "", "", "", "", "", source)