From 1433a2854af13e071b83cacb6a498c76eb40f401 Mon Sep 17 00:00:00 2001 From: shiyuhang0 <52435083+shiyuhang0@users.noreply.github.com> Date: Fri, 18 Mar 2022 17:06:31 +0800 Subject: [PATCH] replace v1 to v2 (#2264) --- README.md | 4 - .../com/pingcap/tispark/TiDBRelation.scala | 156 +----------------- .../com/pingcap/tispark/v2/TiDBTable.scala | 34 +++- .../tispark/v2/TiDBTableProvider.scala | 35 ++-- .../tispark/v2/sink/TiDBBatchWrite.scala | 34 ++++ .../tispark/v2/sink/TiDBDataWrite.scala | 48 ++++++ .../v2/sink/TiDBDataWriterFactory.scala | 35 ++++ .../tispark/v2/sink/TiDBWriterBuilder.scala | 41 +++++ .../org/apache/spark/sql/TiContext.scala | 55 ------ .../org/apache/spark/sql/TiExtensions.scala | 8 +- .../catalyst/expressions/TiExprUtils.scala | 1 - .../spark/sql/execution/CoprocessorRDD.scala | 2 +- .../datasource/BasicBatchWriteSuite.scala | 11 ++ .../tispark/datasource/BasicSQLSuite.scala | 9 +- .../TiAggregationProjectionV2.scala | 4 + 15 files changed, 236 insertions(+), 241 deletions(-) create mode 100644 core/src/main/scala/com/pingcap/tispark/v2/sink/TiDBBatchWrite.scala create mode 100644 core/src/main/scala/com/pingcap/tispark/v2/sink/TiDBDataWrite.scala create mode 100644 core/src/main/scala/com/pingcap/tispark/v2/sink/TiDBDataWriterFactory.scala create mode 100644 core/src/main/scala/com/pingcap/tispark/v2/sink/TiDBWriterBuilder.scala diff --git a/README.md b/README.md index 6e1504f304..39e7222631 100644 --- a/README.md +++ b/README.md @@ -130,10 +130,6 @@ spark.sql("use tidb_catalog.${database}") spark.sql("select count(*) from ${table}").show ``` -> **Note:** -> -> If you use TiSpark 2.0+, for spark-submit on Pyspark, `tidbMapDatabase` is still required and `TiExtension` is not supported yet. PingCAP is working on this issue. - ## Current Version ``` diff --git a/core/src/main/scala/com/pingcap/tispark/TiDBRelation.scala b/core/src/main/scala/com/pingcap/tispark/TiDBRelation.scala index 1086485335..a2ef7c5800 100644 --- a/core/src/main/scala/com/pingcap/tispark/TiDBRelation.scala +++ b/core/src/main/scala/com/pingcap/tispark/TiDBRelation.scala @@ -15,155 +15,11 @@ package com.pingcap.tispark -import com.pingcap.tikv.TiSession -import com.pingcap.tikv.exception.{TiBatchWriteException, TiClientInternalException} -import com.pingcap.tikv.key.Handle -import com.pingcap.tikv.meta.{TiDAGRequest, TiTableInfo, TiTimestamp} -import com.pingcap.tispark.utils.TiUtil -import com.pingcap.tispark.write.{TiDBOptions, TiDBWriter} -import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} -import org.apache.spark.sql.execution._ -import org.apache.spark.sql.sources.{BaseRelation, InsertableRelation} -import org.apache.spark.sql.tispark.{TiHandleRDD, TiRowRDD} -import org.apache.spark.sql.types.{ArrayType, LongType, Metadata, ObjectType, StructType} -import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode, execution} +import org.apache.spark.sql.SQLContext +import org.apache.spark.sql.sources.BaseRelation +import org.apache.spark.sql.types.StructType -import scala.collection.mutable.ListBuffer - -case class TiDBRelation( - session: TiSession, - tableRef: TiTableReference, - meta: MetaManager, - var ts: TiTimestamp = null, - options: Option[TiDBOptions] = None)(@transient val sqlContext: SQLContext) - extends BaseRelation - with InsertableRelation { - lazy val table: TiTableInfo = getTableOrThrow(tableRef.databaseName, tableRef.tableName) - - override lazy val schema: StructType = TiUtil.getSchemaFromTable(table) - lazy val isTiFlashReplicaAvailable: Boolean = { - // Note: - // - INFORMATION_SCHEMA.TIFLASH_REPLICA is not present in TiKV or PD, - // it is calculated in TiDB and stored in memory. - // - In order to get those helpful information we have to read them from - // either TiKV or PD and keep them in memory as well. - // - // select * from INFORMATION_SCHEMA.TIFLASH_REPLICA where table_id = $id - // TABLE_SCHEMA, TABLE_NAME, TABLE_ID, REPLICA_COUNT, LOCATION_LABELS, AVAILABLE, PROGRESS - table.getTiflashReplicaInfo != null && table.getTiflashReplicaInfo.isAvailable - } - - def getTiFlashReplicaProgress: Double = { - import scala.collection.JavaConversions._ - val progress = table.getPartitionInfo.getDefs - .map(partitonDef => session.getPDClient.getTiFlashReplicaProgress(partitonDef.getId)) - .sum - progress / table.getPartitionInfo.getDefs.size() - } - - override def sizeInBytes: Long = tableRef.sizeInBytes - - def logicalPlanToRDD(dagRequest: TiDAGRequest, output: Seq[Attribute]): List[TiRowRDD] = { - import scala.collection.JavaConverters._ - val ids = dagRequest.getPrunedPhysicalIds.asScala - var tiRDDs = new ListBuffer[TiRowRDD] - val tiConf = session.getConf - tiConf.setPartitionPerSplit(TiUtil.getPartitionPerSplit(sqlContext)) - ids.foreach(id => { - tiRDDs += new TiRowRDD( - dagRequest.copyReqWithPhysicalId(id), - id, - TiUtil.getChunkBatchSize(sqlContext), - tiConf, - output, - tableRef, - session, - sqlContext.sparkSession) - }) - tiRDDs.toList - } - - def dagRequestToRegionTaskExec(dagRequest: TiDAGRequest, output: Seq[Attribute]): SparkPlan = { - import scala.collection.JavaConverters._ - val ids = dagRequest.getPrunedPhysicalIds.asScala - var tiHandleRDDs = new ListBuffer[TiHandleRDD]() - lazy val attributeRef = Seq( - AttributeReference("RegionId", LongType, nullable = false, Metadata.empty)(), - AttributeReference( - "Handles", - ArrayType(ObjectType(classOf[Handle]), containsNull = false), - nullable = false, - Metadata.empty)()) - - val tiConf = session.getConf - tiConf.setPartitionPerSplit(TiUtil.getPartitionPerSplit(sqlContext)) - ids.foreach(id => { - tiHandleRDDs += - new TiHandleRDD( - dagRequest, - id, - attributeRef, - tiConf, - tableRef, - session, - sqlContext.sparkSession) - }) - - // TODO: we may optimize by partitioning the result by region. - // https://github.com/pingcap/tispark/issues/1200 - val handlePlan = ColumnarCoprocessorRDD(attributeRef, tiHandleRDDs.toList, fetchHandle = true) - execution.ColumnarRegionTaskExec( - handlePlan, - output, - TiUtil.getChunkBatchSize(sqlContext), - dagRequest, - session.getConf, - session.getTimestamp, - session, - sqlContext.sparkSession) - } - - override def equals(obj: Any): Boolean = - obj match { - case other: TiDBRelation => - this.table.equals(other.table) - case _ => - false - } - - override def insert(data: DataFrame, overwrite: Boolean): Unit = - // default forbid sql interface - // cause tispark provide `replace` instead of `insert` semantic - if (session.getConf.isWriteAllowSparkSQL) { - val saveMode = if (overwrite) { - SaveMode.Overwrite - } else { - SaveMode.Append - } - TiDBWriter.write(data, sqlContext, saveMode, options.get) - } else { - throw new TiBatchWriteException( - "SparkSQL entry for tispark write is disabled. Set spark.tispark.write.allow_spark_sql to enable.") - } - - override def toString: String = { - s"TiDBRelation($tableRef, $ts)" - } - - private def getTableOrThrow(database: String, table: String): TiTableInfo = - meta.getTable(database, table).getOrElse { - val db = meta.getDatabaseFromCache(database) - if (db.isEmpty) { - throw new TiClientInternalException( - "Database not exist " + database + " valid databases are: " + meta.getDatabasesFromCache - .map(_.getName) - .mkString("[", ",", "]")) - } else { - throw new TiClientInternalException( - "Table not exist " + tableRef + " valid tables are: " + meta - .getTablesFromCache(db.get) - .map(_.getName) - .mkString("[", ",", "]")) - } - } +// Just for the build of v1 path +case class TiDBRelation(sqlContext: SQLContext) extends BaseRelation { + override def schema: StructType = ??? } diff --git a/core/src/main/scala/com/pingcap/tispark/v2/TiDBTable.scala b/core/src/main/scala/com/pingcap/tispark/v2/TiDBTable.scala index 9b6e05c218..d6fc97d45c 100644 --- a/core/src/main/scala/com/pingcap/tispark/v2/TiDBTable.scala +++ b/core/src/main/scala/com/pingcap/tispark/v2/TiDBTable.scala @@ -15,22 +15,24 @@ package com.pingcap.tispark.v2 -import com.pingcap.tikv.{TiConfiguration, TiSession} -import com.pingcap.tikv.exception.TiInternalException +import com.pingcap.tikv.TiSession +import com.pingcap.tikv.exception.TiBatchWriteException import com.pingcap.tikv.key.Handle import com.pingcap.tikv.meta.{TiDAGRequest, TiTableInfo, TiTimestamp} import com.pingcap.tispark.utils.TiUtil import com.pingcap.tispark.v2.TiDBTable.{getDagRequestToRegionTaskExec, getLogicalPlanToRDD} +import com.pingcap.tispark.v2.sink.TiDBWriterBuilder import com.pingcap.tispark.write.TiDBOptions import com.pingcap.tispark.TiTableReference import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} -import org.apache.spark.sql.connector.catalog.{SupportsRead, TableCapability} +import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, TableCapability} import org.apache.spark.sql.connector.read.ScanBuilder +import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder} import org.apache.spark.sql.execution.{ColumnarCoprocessorRDD, SparkPlan} import org.apache.spark.sql.tispark.{TiHandleRDD, TiRowRDD} import org.apache.spark.sql.types._ import org.apache.spark.sql.util.CaseInsensitiveStringMap -import org.apache.spark.sql.{SQLContext, SparkSession, TiExtensions, execution} +import org.apache.spark.sql.{SQLContext, execution} import java.util import java.util.Collections @@ -43,7 +45,8 @@ case class TiDBTable( table: TiTableInfo, var ts: TiTimestamp = null, options: Option[TiDBOptions] = None)(@transient val sqlContext: SQLContext) - extends SupportsRead { + extends SupportsRead + with SupportsWrite { implicit class IdentifierHelper(identifier: TiTableReference) { def quoted: String = { @@ -91,6 +94,7 @@ case class TiDBTable( override def capabilities(): util.Set[TableCapability] = { val capabilities = new util.HashSet[TableCapability] capabilities.add(TableCapability.BATCH_READ) + capabilities.add(TableCapability.V1_BATCH_WRITE) capabilities } @@ -103,6 +107,25 @@ case class TiDBTable( def logicalPlanToRDD(dagRequest: TiDAGRequest, output: Seq[Attribute]): List[TiRowRDD] = { getLogicalPlanToRDD(dagRequest, output, session, sqlContext, tableRef) } + + override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = { + var scalaMap = info.options().asScala.toMap + // TODO https://github.com/pingcap/tispark/issues/2269 we need to move TiDB dependencies which will block insert SQL. + // if we don't support it before release, insert SQL should throw exception in catalyst + if (scalaMap.isEmpty) { + throw new TiBatchWriteException("tidbOption is neccessary.") + } + // Support df.writeto: need add db and table for write + if (!scalaMap.contains("database")) { + scalaMap += ("database" -> databaseName) + } + if (!scalaMap.contains("table")) { + scalaMap += ("table" -> tableName) + } + // Get TiDBOptions + val tiDBOptions = new TiDBOptions(scalaMap) + TiDBWriterBuilder(info, tiDBOptions, sqlContext) + } } object TiDBTable { @@ -175,5 +198,4 @@ object TiDBTable { }) tiRDDs.toList } - } diff --git a/core/src/main/scala/com/pingcap/tispark/v2/TiDBTableProvider.scala b/core/src/main/scala/com/pingcap/tispark/v2/TiDBTableProvider.scala index 11bbf85eed..09de6f39d9 100644 --- a/core/src/main/scala/com/pingcap/tispark/v2/TiDBTableProvider.scala +++ b/core/src/main/scala/com/pingcap/tispark/v2/TiDBTableProvider.scala @@ -20,7 +20,7 @@ import com.pingcap.tispark.TiDBRelation import com.pingcap.tispark.write.{TiDBOptions, TiDBWriter} import org.apache.spark.sql.catalyst.analysis.NoSuchTableException import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode, SparkSession, TiExtensions} -import org.apache.spark.sql.connector.catalog.{Table, TableProvider} +import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableProvider} import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.sources.{BaseRelation, CreatableRelationProvider, DataSourceRegister} import org.apache.spark.sql.types.StructType @@ -62,7 +62,18 @@ class TiDBTableProvider override def shortName(): String = "tidb" - // TODO: replace v1 path in next pr + def extractIdentifier(options: CaseInsensitiveStringMap): Identifier = { + require(options.get("database") != null, "Option 'database' is required.") + require(options.get("table") != null, "Option 'table' is required.") + Identifier.of(Array(options.get("database")), options.get("table")) + } + + def extractCatalog(options: CaseInsensitiveStringMap): String = { + "tidb_catalog" + } + + // DF.write still go v1 path now + // Because v2 path will go through catalyst, which may block something like datatype convert. override def createRelation( sqlContext: SQLContext, mode: SaveMode, @@ -70,24 +81,6 @@ class TiDBTableProvider data: DataFrame): BaseRelation = { val options = new TiDBOptions(parameters) TiDBWriter.write(data, sqlContext, mode, options) - createRelation(sqlContext, parameters) - } - - def createRelation(sqlContext: SQLContext, parameters: Map[String, String]): BaseRelation = { - - val options = new TiDBOptions(parameters) - val sparkSession = sqlContext.sparkSession - - TiExtensions.getTiContext(sparkSession) match { - case Some(tiContext) => - val ts = tiContext.tiSession.getTimestamp - TiDBRelation( - tiContext.tiSession, - options.getTiTableRef(tiContext.tiConf), - tiContext.meta, - ts, - Some(options))(sqlContext) - case None => throw new TiBatchWriteException("TiExtensions is disable!") - } + TiDBRelation(sqlContext) } } diff --git a/core/src/main/scala/com/pingcap/tispark/v2/sink/TiDBBatchWrite.scala b/core/src/main/scala/com/pingcap/tispark/v2/sink/TiDBBatchWrite.scala new file mode 100644 index 0000000000..315b3b52cb --- /dev/null +++ b/core/src/main/scala/com/pingcap/tispark/v2/sink/TiDBBatchWrite.scala @@ -0,0 +1,34 @@ +/* + * Copyright 2021 PingCAP, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.pingcap.tispark.v2.sink + +import com.pingcap.tispark.write.TiDBOptions +import org.apache.spark.sql.TiContext +import org.apache.spark.sql.connector.write._ + +/** + * Use V1WriteBuilder before turn to v2 + */ +case class TiDBBatchWrite(logicalInfo: LogicalWriteInfo, tiDBOptions: TiDBOptions)( + @transient val tiContext: TiContext) + extends BatchWrite { + override def createBatchWriterFactory(info: PhysicalWriteInfo): DataWriterFactory = + TiDBDataWriterFactory(logicalInfo.schema(), tiDBOptions, tiContext.tiConf) + + override def commit(messages: Array[WriterCommitMessage]): Unit = ??? + + override def abort(messages: Array[WriterCommitMessage]): Unit = ??? +} diff --git a/core/src/main/scala/com/pingcap/tispark/v2/sink/TiDBDataWrite.scala b/core/src/main/scala/com/pingcap/tispark/v2/sink/TiDBDataWrite.scala new file mode 100644 index 0000000000..f22d04bcf5 --- /dev/null +++ b/core/src/main/scala/com/pingcap/tispark/v2/sink/TiDBDataWrite.scala @@ -0,0 +1,48 @@ +/* + * Copyright 2021 PingCAP, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.pingcap.tispark.v2.sink + +import com.pingcap.tikv.TiConfiguration +import com.pingcap.tispark.write.TiDBOptions +import org.apache.spark.sql.Row +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.connector.write.{DataWriter, WriterCommitMessage} +import org.apache.spark.sql.types.StructType + +/** + * Use V1WriteBuilder before turn to v2 + */ +case class TiDBDataWrite( + partitionId: Int, + taskId: Long, + schema: StructType, + tiDBOptions: TiDBOptions, + ticonf: TiConfiguration) + extends DataWriter[InternalRow] { + + override def write(record: InternalRow): Unit = { + val row = Row.fromSeq(record.toSeq(schema)) + ??? + } + + override def commit(): WriterCommitMessage = ??? + + override def abort(): Unit = {} + + override def close(): Unit = {} +} + +object WriteSucceeded extends WriterCommitMessage diff --git a/core/src/main/scala/com/pingcap/tispark/v2/sink/TiDBDataWriterFactory.scala b/core/src/main/scala/com/pingcap/tispark/v2/sink/TiDBDataWriterFactory.scala new file mode 100644 index 0000000000..e639335cc2 --- /dev/null +++ b/core/src/main/scala/com/pingcap/tispark/v2/sink/TiDBDataWriterFactory.scala @@ -0,0 +1,35 @@ +/* + * Copyright 2021 PingCAP, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.pingcap.tispark.v2.sink + +import com.pingcap.tikv.TiConfiguration +import com.pingcap.tispark.write.TiDBOptions +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.connector.write.{DataWriter, DataWriterFactory} +import org.apache.spark.sql.types.StructType + +/** + * Use V1WriteBuilder before turn to v2 + */ +case class TiDBDataWriterFactory( + schema: StructType, + tiDBOptions: TiDBOptions, + ticonf: TiConfiguration) + extends DataWriterFactory { + + override def createWriter(partitionId: Int, taskId: Long): DataWriter[InternalRow] = + TiDBDataWrite(partitionId, taskId, schema, tiDBOptions, ticonf) +} diff --git a/core/src/main/scala/com/pingcap/tispark/v2/sink/TiDBWriterBuilder.scala b/core/src/main/scala/com/pingcap/tispark/v2/sink/TiDBWriterBuilder.scala new file mode 100644 index 0000000000..fdea0e8018 --- /dev/null +++ b/core/src/main/scala/com/pingcap/tispark/v2/sink/TiDBWriterBuilder.scala @@ -0,0 +1,41 @@ +/* + * Copyright 2021 PingCAP, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.pingcap.tispark.v2.sink + +import com.pingcap.tispark.write.{TiDBOptions, TiDBWriter} +import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode} +import org.apache.spark.sql.connector.write.{LogicalWriteInfo, V1WriteBuilder} +import org.apache.spark.sql.sources.InsertableRelation + +case class TiDBWriterBuilder( + info: LogicalWriteInfo, + tiDBOptions: TiDBOptions, + sqlContext: SQLContext) + extends V1WriteBuilder { + + // Use V1WriteBuilder before turn to v2 + override def buildForV1Write(): InsertableRelation = { (data: DataFrame, overwrite: Boolean) => + { + val saveMode = if (overwrite) { + SaveMode.Overwrite + } else { + SaveMode.Append + } + TiDBWriter.write(data, sqlContext, saveMode, tiDBOptions) + } + } + +} diff --git a/core/src/main/scala/org/apache/spark/sql/TiContext.scala b/core/src/main/scala/org/apache/spark/sql/TiContext.scala index 6e1a12689b..b98df6d704 100644 --- a/core/src/main/scala/org/apache/spark/sql/TiContext.scala +++ b/core/src/main/scala/org/apache/spark/sql/TiContext.scala @@ -71,61 +71,6 @@ class TiContext(val sparkSession: SparkSession) extends Serializable with Loggin val autoLoad: Boolean = conf.getBoolean(TiConfigConst.ENABLE_AUTO_LOAD_STATISTICS, defaultValue = true) - // tidbMapTable does not do any check any meta information - // it just register table for later use - @Deprecated - def tidbMapTable( - dbName: String, - tableName: String, - dbNameAsPrefix: Boolean = false): DataFrame = { - val df = getDataFrame(dbName, tableName) - val viewName = getViewName(dbName, tableName, dbNameAsPrefix) - df.createOrReplaceTempView(viewName) - logInfo("Registered table [" + tableName + "] as [" + viewName + "]") - df - } - - @Deprecated - def getDataFrame(dbName: String, tableName: String): DataFrame = { - val tiRelation = - TiDBRelation(tiSession, TiTableReference(dbName, tableName), meta)(sqlContext) - sqlContext.baseRelationToDataFrame(tiRelation) - } - - @Deprecated - def tidbMapDatabase(dbName: String, dbNameAsPrefix: Boolean): Unit = - tidbMapDatabase(dbName, dbNameAsPrefix, autoLoad) - - @Deprecated - def tidbMapDatabase( - dbName: String, - dbNameAsPrefix: Boolean = false, - autoLoadStatistics: Boolean = autoLoad): Unit = - for { - db <- meta.getDatabase(dbName) - table <- meta.getTables(db) - } { - var sizeInBytes = Long.MaxValue - val tableName = table.getName - if (autoLoadStatistics) { - StatisticsManager.loadStatisticsInfo(table) - } - sizeInBytes = StatisticsManager.estimateTableSize(table) - - if (!sqlContext.sparkSession.catalog.tableExists("`" + tableName + "`")) { - val rel: TiDBRelation = - TiDBRelation(tiSession, TiTableReference(dbName, tableName, sizeInBytes), meta)( - sqlContext) - - val viewName = getViewName(dbName, tableName, dbNameAsPrefix) - sqlContext.baseRelationToDataFrame(rel).createTempView(viewName) - logInfo("Registered table [" + tableName + "] as [" + viewName + "]") - } else { - logInfo( - "Duplicate table [" + tableName + "] exist in catalog, you might want to set dbNameAsPrefix = true") - } - } - // add backtick for table name in case it contains, e.g., a minus sign private def getViewName(dbName: String, tableName: String, dbNameAsPrefix: Boolean): String = "`" + (if (dbNameAsPrefix) dbName + "_" + tableName else tableName) + "`" diff --git a/core/src/main/scala/org/apache/spark/sql/TiExtensions.scala b/core/src/main/scala/org/apache/spark/sql/TiExtensions.scala index 455415c14e..485741331d 100644 --- a/core/src/main/scala/org/apache/spark/sql/TiExtensions.scala +++ b/core/src/main/scala/org/apache/spark/sql/TiExtensions.scala @@ -58,6 +58,10 @@ object TiExtensions { def enabled(sparkSession: SparkSession): Boolean = getTiContext(sparkSession).isDefined + /** + * Catalog for tidb is necessary now. + * @param sparkSession + */ def validateCatalog(sparkSession: SparkSession): Unit = { sparkSession.sparkContext.conf .getAllWithPrefix("spark.sql.catalog.") @@ -72,8 +76,8 @@ object TiExtensions { } /** - * use TiAuthorizationRule to judge if TiExtensions is enable. - * it needs to be changed if TiAuthorizationRule is deleted + * Use TiAuthorizationRule to judge if TiExtensions is enable. + * It needs to be changed when TiAuthorizationRule is not a must-have * @param sparkSession * @return */ diff --git a/core/src/main/scala/org/apache/spark/sql/catalyst/expressions/TiExprUtils.scala b/core/src/main/scala/org/apache/spark/sql/catalyst/expressions/TiExprUtils.scala index d033dfe5f3..382e491c99 100644 --- a/core/src/main/scala/org/apache/spark/sql/catalyst/expressions/TiExprUtils.scala +++ b/core/src/main/scala/org/apache/spark/sql/catalyst/expressions/TiExprUtils.scala @@ -25,7 +25,6 @@ import com.pingcap.tikv.expression.visitor.{ import com.pingcap.tikv.expression.{AggregateFunction, ByItem, ColumnRef, ExpressionBlocklist} import com.pingcap.tikv.meta.{TiColumnInfo, TiDAGRequest, TiTableInfo} import com.pingcap.tikv.region.RegionStoreClient.RequestTypes -import com.pingcap.tispark.TiDBRelation import com.pingcap.tispark.v2.TiDBTable import org.apache.spark.sql.catalyst.expressions.aggregate._ import org.apache.spark.sql.execution.TiConverter.fromSparkType diff --git a/core/src/main/scala/org/apache/spark/sql/execution/CoprocessorRDD.scala b/core/src/main/scala/org/apache/spark/sql/execution/CoprocessorRDD.scala index 41319c2cc6..8614bb44bc 100644 --- a/core/src/main/scala/org/apache/spark/sql/execution/CoprocessorRDD.scala +++ b/core/src/main/scala/org/apache/spark/sql/execution/CoprocessorRDD.scala @@ -101,7 +101,7 @@ case class ColumnarCoprocessorRDD( * RegionTaskExec will downgrade a index scan plan to table scan plan if handles retrieved from one * region exceed spark.tispark.plan.downgrade.index_threshold in your spark config. * - * Refer to code in [[com.pingcap.tispark.TiDBRelation]] and [[ColumnarCoprocessorRDD]] for further details. + * Refer to code in [[com.pingcap.tispark.v2.TiDBTable]] and [[ColumnarCoprocessorRDD]] for further details. * */ case class ColumnarRegionTaskExec( diff --git a/core/src/test/scala/com/pingcap/tispark/datasource/BasicBatchWriteSuite.scala b/core/src/test/scala/com/pingcap/tispark/datasource/BasicBatchWriteSuite.scala index 50635590ad..ea7c528abc 100644 --- a/core/src/test/scala/com/pingcap/tispark/datasource/BasicBatchWriteSuite.scala +++ b/core/src/test/scala/com/pingcap/tispark/datasource/BasicBatchWriteSuite.scala @@ -74,4 +74,15 @@ class BasicBatchWriteSuite extends BaseBatchWriteWithoutDropTableTest("test_data caught.getMessage .equals("SaveMode: Overwrite is not supported. TiSpark only support SaveMode.Append.")) } + + // Experimental + test("Test Datasource api v2 write") { + jdbcUpdate(s"drop table if exists $dbtable") + jdbcUpdate(s"create table $dbtable(i int, s varchar(128))") + jdbcUpdate(s"insert into $dbtable values(null, 'Hello'), (2, 'TiDB')") + val data: RDD[Row] = sc.makeRDD(List(row3, row4)) + val df = sqlContext.createDataFrame(data, schema) + df.writeTo(s"tidb_catalog.$database.$table").options(tidbOptions).append() + testTiDBSelect(Seq(row1, row2, row3, row4)) + } } diff --git a/core/src/test/scala/com/pingcap/tispark/datasource/BasicSQLSuite.scala b/core/src/test/scala/com/pingcap/tispark/datasource/BasicSQLSuite.scala index f4dadb9c43..1627b31088 100644 --- a/core/src/test/scala/com/pingcap/tispark/datasource/BasicSQLSuite.scala +++ b/core/src/test/scala/com/pingcap/tispark/datasource/BasicSQLSuite.scala @@ -20,6 +20,14 @@ import org.apache.spark.sql.Row import scala.util.Random +/** + * Ignore the test in this suit, because we don't support them anymore. + * - as for select: use tidb_catalog to read + * - as for insert: use write api to write + * Insert statement in tidb_catalog is not able to support in two reasons: + * 1. options can't be passed by sql + * 2. insert statement will involve with catalyst,which may conflict with write logical. such as data type conversion + */ class BasicSQLSuite extends BaseBatchWriteWithoutDropTableTest("test_datasource_sql") { private val row1 = Row(null, "Hello") private val row2 = Row(2, "TiDB") @@ -32,7 +40,6 @@ class BasicSQLSuite extends BaseBatchWriteWithoutDropTableTest("test_datasource_ jdbcUpdate(s"insert into $dbtable values(null, 'Hello'), (2, 'TiDB')") } - // temporarily not supported ignore("Test Select") { testSelectSQL(Seq(row1, row2)) } diff --git a/spark-wrapper/spark-3.1/src/main/scala/org/apache/spark/sql/extensions/TiAggregationProjectionV2.scala b/spark-wrapper/spark-3.1/src/main/scala/org/apache/spark/sql/extensions/TiAggregationProjectionV2.scala index c335b4bea5..a4a2f9046c 100644 --- a/spark-wrapper/spark-3.1/src/main/scala/org/apache/spark/sql/extensions/TiAggregationProjectionV2.scala +++ b/spark-wrapper/spark-3.1/src/main/scala/org/apache/spark/sql/extensions/TiAggregationProjectionV2.scala @@ -24,6 +24,10 @@ import org.apache.spark.sql.execution.datasources.v2.{ DataSourceV2ScanRelation } +/** + * I'm afraid that the same name with the object under the spark-wrapper/spark3.0 will lead to some problems. + * Although the same name will pass the itegration test + */ object TiAggregationProjectionV2 { type ReturnType = (Seq[Expression], LogicalPlan, TiDBTable, Seq[NamedExpression])