From 75f9bb6d80d8ace67a5f6fb1c49681e110c1d353 Mon Sep 17 00:00:00 2001 From: osopardo1 Date: Thu, 28 Oct 2021 16:46:23 +0200 Subject: [PATCH 1/5] Initialize QueryRange to AllSpace and add test --- .../qbeast/spark/sql/files/OTreeIndex.scala | 7 ++---- .../QbeastDataSourceIntegrationTest.scala | 24 +++++++++++++++++++ 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/src/main/scala/io/qbeast/spark/sql/files/OTreeIndex.scala b/src/main/scala/io/qbeast/spark/sql/files/OTreeIndex.scala index 097d29043..507ad1c8c 100644 --- a/src/main/scala/io/qbeast/spark/sql/files/OTreeIndex.scala +++ b/src/main/scala/io/qbeast/spark/sql/files/OTreeIndex.scala @@ -4,7 +4,7 @@ package io.qbeast.spark.sql.files import io.qbeast.spark.index.{CubeId, Weight} -import io.qbeast.spark.model.{Point, QuerySpace, QuerySpaceFromTo, RangeValues} +import io.qbeast.spark.model.{AllSpace, QuerySpace, RangeValues} import io.qbeast.spark.sql.qbeast import io.qbeast.spark.sql.utils.State import io.qbeast.spark.sql.utils.TagUtils @@ -96,10 +96,7 @@ case class OTreeIndex(index: TahoeLogFileIndex) val revisionData = qbeastSnapshot.getRevisionData(revision.id) val dimensionCount = revision.dimensionCount - val originalFrom = Point(Vector.fill(dimensionCount)(Int.MinValue.doubleValue())) - val originalTo = Point(Vector.fill(dimensionCount)(Int.MaxValue.doubleValue())) - val querySpace = QuerySpaceFromTo(originalFrom, originalTo, revision) - + val querySpace = AllSpace(dimensionCount) val cubeWeights = revisionData.cubeWeights val replicatedSet = revisionData.replicatedSet val filesRevision = filesVector.filter(_.tags(TagUtils.revision) == revision.id.toString) diff --git a/src/test/scala/io/qbeast/spark/utils/QbeastDataSourceIntegrationTest.scala b/src/test/scala/io/qbeast/spark/utils/QbeastDataSourceIntegrationTest.scala index 00ef02485..0519765a0 100644 --- a/src/test/scala/io/qbeast/spark/utils/QbeastDataSourceIntegrationTest.scala +++ b/src/test/scala/io/qbeast/spark/utils/QbeastDataSourceIntegrationTest.scala @@ -8,6 +8,7 @@ import io.qbeast.spark.index.CubeId import io.qbeast.spark.sql.files.OTreeIndex import io.qbeast.spark.table.QbeastTable import org.apache.spark.sql.execution.FileSourceScanExec +import org.apache.spark.sql.functions.{col, lit} import org.apache.spark.sql.{DataFrame, SparkSession} class QbeastDataSourceIntegrationTest extends QbeastIntegrationTestSpec { @@ -41,6 +42,29 @@ class QbeastDataSourceIntegrationTest extends QbeastIntegrationTestSpec { } } + it should "index correctly on bigger spaces" in withQbeastContextSparkAndTmpDir { + (spark, tmpDir) => + { + val data = loadTestData(spark) + .withColumn("user_id", lit(col("user_id") * Long.MaxValue)) + // WRITE SOME DATA + data.write + .mode("overwrite") + .format("qbeast") + .option("columnsToIndex", "user_id,product_id") + .save(tmpDir) + + val indexed = spark.read.format("qbeast").load(tmpDir) + + data.count() shouldBe indexed.count() + + assertLargeDatasetEquality(indexed, data, orderedComparison = false) + + data.columns.toSet shouldBe indexed.columns.toSet + + } + } + it should "index correctly on overwrite" in withQbeastContextSparkAndTmpDir { (spark, tmpDir) => { val data = loadTestData(spark) From 949082b70d99b6b95033aed23d522dfad4dd3ec1 Mon Sep 17 00:00:00 2001 From: Cesare Cugnasco Date: Tue, 7 Dec 2021 16:48:51 +0100 Subject: [PATCH 2/5] fixing typos --- CODE_OF_CONDUCT.md | 2 +- CONTRIBUTING.md | 2 +- core/src/test/scala/io/qbeast/core/model/CubeIdTest.scala | 2 +- .../scala/io/qbeast/core/model/JSONSerializationTests.scala | 6 +++--- .../scala/io/qbeast/core/transform/TransformerTest.scala | 6 +++--- src/main/scala/io/qbeast/spark/delta/CubeDataLoader.scala | 4 ++-- .../io/qbeast/spark/index/MaxWeightEstimationTest.scala | 2 +- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index c9f130900..074e6ecf8 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,6 +1,6 @@ ## Our Pledge -In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. +In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socioeconomic status, nationality, personal appearance, race, religion, or sexual identity and orientation. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 29a3c0a9e..52e4716f4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -5,7 +5,7 @@ Welcome to the Qbeast community! Nice to meet you :) Either you want to know more about our guidelines or open a Pull Request, this is your page. We are pleased to help you through the different steps for contributing to our (your) project. ## Licensing of contributed material -All contributed code, docs, and otro materials are considered licensed under the same terms as the rest of the project. Check [LICENSE](./LICENCE) for more details. +All contributed code, docs, and other materials are considered licensed under the same terms as the rest of the project. Check [LICENSE](./LICENCE) for more details. ## Version control branching - Always make a new branch for your work, no matter how small diff --git a/core/src/test/scala/io/qbeast/core/model/CubeIdTest.scala b/core/src/test/scala/io/qbeast/core/model/CubeIdTest.scala index e70086ad5..daed46cc4 100644 --- a/core/src/test/scala/io/qbeast/core/model/CubeIdTest.scala +++ b/core/src/test/scala/io/qbeast/core/model/CubeIdTest.scala @@ -10,7 +10,7 @@ import org.scalatest.matchers.should.Matchers * Tests for CubeId. */ class CubeIdTest extends AnyFlatSpec with Matchers { - "CubeId" should "implement equals corretly" in { + "CubeId" should "implement equals correctly" in { val id1 = CubeId.root(2) val id2 = id1.firstChild val id3 = id1.firstChild diff --git a/core/src/test/scala/io/qbeast/core/model/JSONSerializationTests.scala b/core/src/test/scala/io/qbeast/core/model/JSONSerializationTests.scala index b77b351c2..49d8f3d5a 100644 --- a/core/src/test/scala/io/qbeast/core/model/JSONSerializationTests.scala +++ b/core/src/test/scala/io/qbeast/core/model/JSONSerializationTests.scala @@ -77,7 +77,7 @@ class JSONSerializationTests extends AnyFlatSpec with Matchers { } - "A revision" should "Serialize with all simple value a part from Transoform*" in { + "A revision" should "Serialize with all simple value a part from Transform*" in { val rev = Revision( 12L, @@ -96,7 +96,7 @@ class JSONSerializationTests extends AnyFlatSpec with Matchers { mapper.readValue[Revision](json, classOf[Revision]) shouldBe rev } - "A revision" should "Serialize with all Long Transoform*" in { + "A revision" should "Serialize with all Long Transform*" in { val rev = Revision( 12L, @@ -115,7 +115,7 @@ class JSONSerializationTests extends AnyFlatSpec with Matchers { mapper.readValue[Revision](json, classOf[Revision]) shouldBe rev } - "A revision" should "Serialize with all Hash Transoform*" in { + "A revision" should "Serialize with all Hash Transform*" in { val rev = Revision( 12L, diff --git a/core/src/test/scala/io/qbeast/core/transform/TransformerTest.scala b/core/src/test/scala/io/qbeast/core/transform/TransformerTest.scala index d5553cb1e..efd596a8a 100644 --- a/core/src/test/scala/io/qbeast/core/transform/TransformerTest.scala +++ b/core/src/test/scala/io/qbeast/core/transform/TransformerTest.scala @@ -6,7 +6,7 @@ import org.scalatest.matchers.should.Matchers class TransformerTest extends AnyFlatSpec with Matchers { - behavior of "Tranformer" + behavior of "Transformer" it should "return correct column name" in { val columnName = "a" @@ -45,8 +45,8 @@ class TransformerTest extends AnyFlatSpec with Matchers { val transformation = Map("a_min" -> 0, "a_max" -> 1) val currentTransformation = transformer.makeTransformation(transformation) - val newTranformation = Map("a_min" -> 3, "a_max" -> 8) - transformer.maybeUpdateTransformation(currentTransformation, newTranformation) shouldBe Some( + val newTransformation = Map("a_min" -> 3, "a_max" -> 8) + transformer.maybeUpdateTransformation(currentTransformation, newTransformation) shouldBe Some( LinearTransformation(0, 8, dataType)) transformer.maybeUpdateTransformation(currentTransformation, transformation) shouldBe None diff --git a/src/main/scala/io/qbeast/spark/delta/CubeDataLoader.scala b/src/main/scala/io/qbeast/spark/delta/CubeDataLoader.scala index 0e6f5c5da..6471602e8 100644 --- a/src/main/scala/io/qbeast/spark/delta/CubeDataLoader.scala +++ b/src/main/scala/io/qbeast/spark/delta/CubeDataLoader.scala @@ -22,7 +22,7 @@ case class CubeDataLoader(tableID: QTableID) { /** * Loads the data from a set of cubes in a specific revision - * and adds column infomation + * and adds column information * @param cubeSet the set of cubes to load * @param revision the revision to load * @param columnName the column name to add @@ -41,7 +41,7 @@ case class CubeDataLoader(tableID: QTableID) { /** * Loads the data from a single cube in a specific revision - * and adds column infomation + * and adds column information * @param cubeId the cube to load * @param revision the revision to load * @param columnName the column name to add diff --git a/src/test/scala/io/qbeast/spark/index/MaxWeightEstimationTest.scala b/src/test/scala/io/qbeast/spark/index/MaxWeightEstimationTest.scala index 2a67659b6..b5b212b09 100644 --- a/src/test/scala/io/qbeast/spark/index/MaxWeightEstimationTest.scala +++ b/src/test/scala/io/qbeast/spark/index/MaxWeightEstimationTest.scala @@ -29,7 +29,7 @@ class MaxWeightEstimationTest extends AnyFlatSpec with Matchers with QbeastInteg MaxWeightEstimation.finish(finalWeight) shouldBe finalWeight } - "MaxWeight zero" should "be minium positive value" in { + "MaxWeight zero" should "be minimum positive value" in { MaxWeightEstimation.zero shouldBe 0.0 } From 5e8b4498c12949669f6ab480a52e51f832eedbcb Mon Sep 17 00:00:00 2001 From: Cesare Cugnasco Date: Tue, 7 Dec 2021 17:25:19 +0100 Subject: [PATCH 3/5] using named parameters --- .../spark/index/DoublePassOTreeDataAnalyzerTest.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/test/scala/io/qbeast/spark/index/DoublePassOTreeDataAnalyzerTest.scala b/src/test/scala/io/qbeast/spark/index/DoublePassOTreeDataAnalyzerTest.scala index fd0b41380..2a2b9712c 100644 --- a/src/test/scala/io/qbeast/spark/index/DoublePassOTreeDataAnalyzerTest.scala +++ b/src/test/scala/io/qbeast/spark/index/DoublePassOTreeDataAnalyzerTest.scala @@ -199,7 +199,8 @@ class DoublePassOTreeDataAnalyzerTest extends QbeastIntegrationTestSpec { val weightedDataFrame = data.withColumn(weightColumnName, lit(scala.util.Random.nextInt())) val cubeNormalizedWeights = - weightedDataFrame.transform(estimatePartitionCubeWeights(revision, indexStatus, false)) + weightedDataFrame.transform( + estimatePartitionCubeWeights(revision, indexStatus, isReplication = false)) val partitions = weightedDataFrame.rdd.getNumPartitions @@ -225,7 +226,8 @@ class DoublePassOTreeDataAnalyzerTest extends QbeastIntegrationTestSpec { val weightedDataFrame = data.withColumn(weightColumnName, lit(scala.util.Random.nextInt())) val cubeNormalizedWeights = - weightedDataFrame.transform(estimatePartitionCubeWeights(revision, indexStatus, false)) + weightedDataFrame.transform( + estimatePartitionCubeWeights(revision, indexStatus, isReplication = false)) val cubeWeights = cubeNormalizedWeights.transform(estimateCubeWeights(revision)) cubeWeights.columns.length shouldBe 2 From 58639e24c97078ff90d0783261fd7aa66b6cce95 Mon Sep 17 00:00:00 2001 From: Cesare Cugnasco Date: Tue, 7 Dec 2021 17:26:00 +0100 Subject: [PATCH 4/5] reorganizing test code using matchers instead of assertion --- .../io/qbeast/docs/DocumentationTests.scala | 37 ++++++------ .../spark/delta/QbeastSnapshotTest.scala | 7 ++- .../index/CubeWeightsIntegrationTest.scala | 2 +- .../io/qbeast/spark/index/IndexTest.scala | 9 ++- .../qbeast/spark/index/IndexTestChecks.scala | 58 +++++++++---------- .../qbeast/spark/index/NewRevisionTest.scala | 2 +- .../index/SparkRevisionFactoryTest.scala | 6 +- .../spark/index/writer/BlockWriterTest.scala | 2 +- .../QbeastDataSourceIntegrationTest.scala | 15 +++-- 9 files changed, 70 insertions(+), 68 deletions(-) diff --git a/src/test/scala/io/qbeast/docs/DocumentationTests.scala b/src/test/scala/io/qbeast/docs/DocumentationTests.scala index 64361fb04..bc2de6382 100644 --- a/src/test/scala/io/qbeast/docs/DocumentationTests.scala +++ b/src/test/scala/io/qbeast/docs/DocumentationTests.scala @@ -3,6 +3,7 @@ package io.qbeast.docs import io.qbeast.spark.QbeastIntegrationTestSpec import org.apache.spark.sql.delta.DeltaLog import org.apache.spark.sql.functions.input_file_name +import org.scalatest.AppendedClues.convertToClueful class DocumentationTests extends QbeastIntegrationTestSpec { @@ -28,8 +29,10 @@ class DocumentationTests extends QbeastIntegrationTestSpec { .format("qbeast") .load(tmp_dir) - assert(qbeast_df.count() == csv_df.count(), "Readme count does not match the original") - assert(qbeast_df.schema == csv_df.schema, "Readme schema does not match the original") + qbeast_df.count() shouldBe csv_df + .count() withClue "Readme count does not match the original" + + qbeast_df.schema shouldBe csv_df.schema withClue "Readme schema does not match the original" } } @@ -49,10 +52,10 @@ class DocumentationTests extends QbeastIntegrationTestSpec { val qbeastDf = spark.read.format("qbeast").load(qbeastTablePath) - assert( - qbeastDf.count() == parquetDf.count(), - "Quickstart count does not match the original") - assert(qbeastDf.schema == parquetDf.schema, "Quickstart schema does not match the original") + qbeastDf.count() shouldBe parquetDf.count() withClue + "Quickstart count does not match the original" + qbeastDf.schema shouldBe parquetDf.schema withClue + "Quickstart schema does not match the original" } } @@ -83,29 +86,27 @@ class DocumentationTests extends QbeastIntegrationTestSpec { val df = spark.read.format("parquet").load(processed_parquet_dir) val qbeast_df = spark.read.format("qbeast").load(qbeast_table_path) - assert( - qbeast_df.count() == df.count(), - "Pushdown notebook count of indexed dataframe does not match the original") + qbeast_df.count() shouldBe df.count() withClue + "Pushdown notebook count of indexed dataframe does not match the original" // Table changes? val deltaLog = DeltaLog.forTable(spark, qbeast_table_path) val totalNumberOfFiles = deltaLog.snapshot.allFiles.count() - assert( - totalNumberOfFiles == 21, - "Total number of files in pushdown notebook changes to " + totalNumberOfFiles) + + totalNumberOfFiles shouldBe 21 withClue + "Total number of files in pushdown notebook changes to " + totalNumberOfFiles val query = qbeast_df.sample(0.1) val numberOfFilesQuery = query.select(input_file_name()).distinct().count() - assert( - numberOfFilesQuery == 1, - "Number of files read in pushdown notebook changes to " + numberOfFilesQuery) + numberOfFilesQuery shouldBe 1 withClue + "Number of files read in pushdown notebook changes to " + numberOfFilesQuery val file = query.select(input_file_name()).distinct().head().getString(0) val numberOfRowsRead = spark.read.format("parquet").load(file).count() - assert( - numberOfRowsRead == 302715, - "Number of rows read in pushdown notebook changes to " + numberOfRowsRead) + + numberOfRowsRead shouldBe 302715 withClue + "Number of rows read in pushdown notebook changes to " + numberOfRowsRead } } diff --git a/src/test/scala/io/qbeast/spark/delta/QbeastSnapshotTest.scala b/src/test/scala/io/qbeast/spark/delta/QbeastSnapshotTest.scala index e45abf905..09f8149c4 100644 --- a/src/test/scala/io/qbeast/spark/delta/QbeastSnapshotTest.scala +++ b/src/test/scala/io/qbeast/spark/delta/QbeastSnapshotTest.scala @@ -10,6 +10,7 @@ import io.qbeast.spark.utils.TagUtils import io.qbeast.spark.QbeastIntegrationTestSpec import org.apache.spark.sql.delta.DeltaLog import org.apache.spark.sql.{Dataset, SparkSession} +import org.scalatest.AppendedClues.convertToClueful class QbeastSnapshotTest extends QbeastIntegrationTestSpec { @@ -133,10 +134,10 @@ class QbeastSnapshotTest extends QbeastIntegrationTestSpec { .map(fileInfo) .map(a => a.tags(TagUtils.elementCount).toLong) .sum - assert( - size > cubeSize * 0.9, + + size should be > (cubeSize * 0.9).toLong withClue "assertion failed in cube " + cube + - " where size is " + size + " and weight is " + weight) + " where size is " + size + " and weight is " + weight } } } diff --git a/src/test/scala/io/qbeast/spark/index/CubeWeightsIntegrationTest.scala b/src/test/scala/io/qbeast/spark/index/CubeWeightsIntegrationTest.scala index 640dea423..ce1b6625b 100644 --- a/src/test/scala/io/qbeast/spark/index/CubeWeightsIntegrationTest.scala +++ b/src/test/scala/io/qbeast/spark/index/CubeWeightsIntegrationTest.scala @@ -44,7 +44,7 @@ class CubeWeightsIntegrationTest extends QbeastIntegrationTestSpec { // commitLogWeightMap shouldBe weightMap commitLogWeightMap.keys.foreach(cubeId => { - assert(weightMap.contains(cubeId) || weightMap.contains(cubeId.parent.get)) + weightMap should contain key cubeId }) } diff --git a/src/test/scala/io/qbeast/spark/index/IndexTest.scala b/src/test/scala/io/qbeast/spark/index/IndexTest.scala index ff584e472..6db425c12 100644 --- a/src/test/scala/io/qbeast/spark/index/IndexTest.scala +++ b/src/test/scala/io/qbeast/spark/index/IndexTest.scala @@ -5,7 +5,6 @@ package io.qbeast.spark.index import io.qbeast.TestClasses.{Client3, Client4} import io.qbeast.core.model._ -import io.qbeast.spark.index.IndexTestChecks._ import io.qbeast.spark.index.QbeastColumns.cubeColumnName import io.qbeast.spark.utils.TagUtils import io.qbeast.spark.{QbeastIntegrationTestSpec, delta} @@ -17,7 +16,11 @@ import org.apache.spark.sql.{AnalysisException, DataFrame, SparkSession} import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers -class IndexTest extends AnyFlatSpec with Matchers with QbeastIntegrationTestSpec { +class IndexTest + extends AnyFlatSpec + with Matchers + with QbeastIntegrationTestSpec + with IndexTestChecks { // TEST CONFIGURATIONS private val options = Map("columnsToIndex" -> "age,val2", "cubeSize" -> "10000") @@ -95,7 +98,7 @@ class IndexTest extends AnyFlatSpec with Matchers with QbeastIntegrationTestSpec val (indexed, tc) = oTreeAlgorithm.index(df, IndexStatus(rev)) - checkCubesOnData(tc.indexChanges.cubeWeights, indexed, 2) + checkCubesOnData(tc.indexChanges.cubeWeights, indexed, dimensionCount = 2) } } } diff --git a/src/test/scala/io/qbeast/spark/index/IndexTestChecks.scala b/src/test/scala/io/qbeast/spark/index/IndexTestChecks.scala index f16a07213..da876908e 100644 --- a/src/test/scala/io/qbeast/spark/index/IndexTestChecks.scala +++ b/src/test/scala/io/qbeast/spark/index/IndexTestChecks.scala @@ -3,21 +3,22 @@ package io.qbeast.spark.index import io.qbeast.core.model.{CubeId, Revision, TableChanges, Weight} import io.qbeast.spark.index.QbeastColumns.cubeColumnName import org.apache.spark.sql.DataFrame +import org.scalatest.AppendedClues.convertToClueful +import org.scalatest.matchers.should.Matchers -object IndexTestChecks { +trait IndexTestChecks extends Matchers { def checkDFSize(indexed: DataFrame, original: DataFrame): Unit = { val indexedSize = indexed.count() val originalSize = original.count() - assert( - indexedSize == originalSize, - s"Indexed dataset has size ${indexedSize} and original has size $originalSize") + indexedSize shouldBe originalSize withClue + s"Indexed dataset has size $indexedSize and original has size $originalSize" } def checkCubeSize(tableChanges: TableChanges, revision: Revision, indexed: DataFrame): Unit = { val weightMap: Map[CubeId, Weight] = tableChanges.indexChanges.cubeWeights val desiredCubeSize = revision.desiredCubeSize - val minSize = desiredCubeSize * 0.9 + val minSize = (desiredCubeSize * 0.9).toLong val cubeSizes = indexed .groupBy(cubeColumnName) @@ -33,9 +34,8 @@ object IndexTestChecks { if (weight != Weight.MaxValue) { // If the weight is not set to MaxValue, // then the size should be greater than the desiredCubeSize - assert( - size > minSize, - s"cube ${cubeId.string} appear as overflowed but has size $size") + (size should be > minSize) withClue + s"cube ${cubeId.string} appear as overflowed but has size $size" // And parent cube should be overflowed as well cubeId.parent match { @@ -44,14 +44,14 @@ object IndexTestChecks { case Some(parent) if weightMap.contains(parent) && cubeSizes.contains(parent) => val weightParent = weightMap(parent) val parentSize = cubeSizes(parent) - assert( - weightParent != Weight.MaxValue && size > minSize, - s"cube ${cubeId.string} is overflowed but parent ${parent.string} is not" + - s" It has weight ${weightParent} and size ${parentSize}") + weightParent should not be Weight.MaxValue + + (size should be > minSize) withClue + s"cube $cubeId is overflowed but parent ${parent.string} is not" + + s" It has weight $weightParent and size $parentSize" case Some(parent) => - assert( - false, + fail( s"Parent ${parent.string} of ${cubeId.string}" + s" does not appear in weight map or data") @@ -64,17 +64,14 @@ object IndexTestChecks { def checkCubes(weightMap: Map[CubeId, Weight]): Unit = { - def checkCubeParents(): Unit = weightMap.foreach { case (cube, _) => + weightMap.foreach { case (cube, _) => cube.parent match { case Some(parent) => - assert( - weightMap.contains(parent), - s"parent ${parent.string} of ${cube.string} does not appear in the list of cubes") + (weightMap should contain key parent) withClue + s"parent ${parent.string} of ${cube.string} does not appear in the list of cubes" case None => // root cube } } - - checkCubeParents() } def checkCubesOnData( @@ -92,9 +89,9 @@ object IndexTestChecks { cubesOnData.foreach { cube => cube.parent match { case Some(parent) => - assert( - cubesOnData.contains(parent), - s"Parent ${parent.string} of ${cube.string} does not appear in the indexed data") + (cubesOnData should contain(parent)) withClue + s"Parent ${parent.string} of ${cube.string} does not appear in the indexed data" + case None => // root cube } } @@ -103,13 +100,15 @@ object IndexTestChecks { def checkDataWithWeightMap(): Unit = { cubesOnData.foreach { cube => if (cube.isRoot) { - assert(weightMap.contains(cube), s"Cube root appears in data but not in weight map") + + (weightMap should contain key cube) withClue + s"Cube root appears in data but not in weight map" } else { val parent = cube.parent.get - assert( - weightMap.contains(cube) || weightMap.contains(parent), + + (weightMap should (contain key cube or contain key parent)) withClue s"Either weight map doesn't contain ${cube.string}" + - s" or doesn't contain it's parent ${parent.string}") + s" or doesn't contain it's parent ${parent.string}" } } } @@ -127,10 +126,9 @@ object IndexTestChecks { } // scalastyle:off childrenWeights.foreach { case (child, childWeight) => - assert( - childWeight >= maxWeight, + childWeight should be >= maxWeight withClue s"MaxWeight of child ${child.string} is ${childWeight.fraction} " + - s"and maxWeight of parent ${cube.string} is ${maxWeight.fraction}") + s"and maxWeight of parent ${cube.string} is ${maxWeight.fraction}" } } } diff --git a/src/test/scala/io/qbeast/spark/index/NewRevisionTest.scala b/src/test/scala/io/qbeast/spark/index/NewRevisionTest.scala index 1e9a2f9e1..844aed9c4 100644 --- a/src/test/scala/io/qbeast/spark/index/NewRevisionTest.scala +++ b/src/test/scala/io/qbeast/spark/index/NewRevisionTest.scala @@ -60,7 +60,7 @@ class NewRevisionTest revisions .map(revision => qbeastSnapshot.loadIndexStatus(revision.revisionID).cubeNormalizedWeights) - allWM.foreach(wm => assert(wm.nonEmpty)) + allWM.foreach(wm => wm should not be empty) } it should diff --git a/src/test/scala/io/qbeast/spark/index/SparkRevisionFactoryTest.scala b/src/test/scala/io/qbeast/spark/index/SparkRevisionFactoryTest.scala index d06e8fe3e..85133d15f 100644 --- a/src/test/scala/io/qbeast/spark/index/SparkRevisionFactoryTest.scala +++ b/src/test/scala/io/qbeast/spark/index/SparkRevisionFactoryTest.scala @@ -28,8 +28,8 @@ class SparkRevisionFactoryTest extends QbeastIntegrationTestSpec { "column:LinearTransformer" match { case SpecExtractor(column, transformer) => - assert(column == "column") - assert(transformer == "LinearTransformer") + column shouldBe "column" + transformer shouldBe "LinearTransformer" case _ => fail("It did not recognize the type") } @@ -37,7 +37,7 @@ class SparkRevisionFactoryTest extends QbeastIntegrationTestSpec { case SpecExtractor(column, transformer) => fail("It shouldn't be here") case column => - assert(column == "column") + column shouldBe "column" } } diff --git a/src/test/scala/io/qbeast/spark/index/writer/BlockWriterTest.scala b/src/test/scala/io/qbeast/spark/index/writer/BlockWriterTest.scala index bd12212bc..cf148fc97 100644 --- a/src/test/scala/io/qbeast/spark/index/writer/BlockWriterTest.scala +++ b/src/test/scala/io/qbeast/spark/index/writer/BlockWriterTest.scala @@ -120,6 +120,6 @@ class BlockWriterTest extends AnyFlatSpec with Matchers with QbeastIntegrationTe CubeId(dimensionCount, row.getAs[Array[Byte]](0)).string } .toSet - assert(files.map(_.tags(TagUtils.cube)).forall(cube => cubes.contains(cube))) + files.map(_.tags(TagUtils.cube)).forall(cube => cubes.contains(cube)) shouldBe true } } diff --git a/src/test/scala/io/qbeast/spark/utils/QbeastDataSourceIntegrationTest.scala b/src/test/scala/io/qbeast/spark/utils/QbeastDataSourceIntegrationTest.scala index ce9b26884..27482033f 100644 --- a/src/test/scala/io/qbeast/spark/utils/QbeastDataSourceIntegrationTest.scala +++ b/src/test/scala/io/qbeast/spark/utils/QbeastDataSourceIntegrationTest.scala @@ -162,13 +162,12 @@ class QbeastDataSourceIntegrationTest extends QbeastIntegrationTestSpec { val executionPlan = query.queryExecution.executedPlan.collectLeaves() - assert( - executionPlan.exists(p => - p - .asInstanceOf[FileSourceScanExec] - .relation - .location - .isInstanceOf[OTreeIndex])) + executionPlan.exists(p => + p + .asInstanceOf[FileSourceScanExec] + .relation + .location + .isInstanceOf[OTreeIndex]) shouldBe true val filesDeltaQuery = deltaQuery @@ -183,7 +182,7 @@ class QbeastDataSourceIntegrationTest extends QbeastIntegrationTestSpec { .collect() filesQbeastQuery.length shouldBe <=(filesDeltaQuery.length) - filesQbeastQuery.foreach(f => assert(filesDeltaQuery.contains(f))) + filesQbeastQuery.foreach(f => filesDeltaQuery should contain(f)) } } From be8ad6ddeb489ac30f30cb099610fed792b4a82f Mon Sep 17 00:00:00 2001 From: Cesare Cugnasco Date: Tue, 7 Dec 2021 18:19:20 +0100 Subject: [PATCH 5/5] adding toString to CubeId for easier testing --- core/src/main/scala/io/qbeast/core/model/CubeId.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/src/main/scala/io/qbeast/core/model/CubeId.scala b/core/src/main/scala/io/qbeast/core/model/CubeId.scala index 4983c2ad5..e1877d691 100644 --- a/core/src/main/scala/io/qbeast/core/model/CubeId.scala +++ b/core/src/main/scala/io/qbeast/core/model/CubeId.scala @@ -390,4 +390,6 @@ case class CubeId(dimensionCount: Int, depth: Int, bitMask: Array[Long]) extends (Point(from.toIndexedSeq), Point(to.toIndexedSeq)) } + override def toString: String = s"CubeId($dimensionCount, $depth, $string)" + }