From 75f9bb6d80d8ace67a5f6fb1c49681e110c1d353 Mon Sep 17 00:00:00 2001
From: osopardo1 <paolapardoat@gmail.com>
Date: Thu, 28 Oct 2021 16:46:23 +0200
Subject: [PATCH 1/5] Initialize QueryRange to AllSpace and add test

---
 .../qbeast/spark/sql/files/OTreeIndex.scala   |  7 ++----
 .../QbeastDataSourceIntegrationTest.scala     | 24 +++++++++++++++++++
 2 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/src/main/scala/io/qbeast/spark/sql/files/OTreeIndex.scala b/src/main/scala/io/qbeast/spark/sql/files/OTreeIndex.scala
index 097d29043..507ad1c8c 100644
--- a/src/main/scala/io/qbeast/spark/sql/files/OTreeIndex.scala
+++ b/src/main/scala/io/qbeast/spark/sql/files/OTreeIndex.scala
@@ -4,7 +4,7 @@
 package io.qbeast.spark.sql.files
 
 import io.qbeast.spark.index.{CubeId, Weight}
-import io.qbeast.spark.model.{Point, QuerySpace, QuerySpaceFromTo, RangeValues}
+import io.qbeast.spark.model.{AllSpace, QuerySpace, RangeValues}
 import io.qbeast.spark.sql.qbeast
 import io.qbeast.spark.sql.utils.State
 import io.qbeast.spark.sql.utils.TagUtils
@@ -96,10 +96,7 @@ case class OTreeIndex(index: TahoeLogFileIndex)
         val revisionData = qbeastSnapshot.getRevisionData(revision.id)
         val dimensionCount = revision.dimensionCount
 
-        val originalFrom = Point(Vector.fill(dimensionCount)(Int.MinValue.doubleValue()))
-        val originalTo = Point(Vector.fill(dimensionCount)(Int.MaxValue.doubleValue()))
-        val querySpace = QuerySpaceFromTo(originalFrom, originalTo, revision)
-
+        val querySpace = AllSpace(dimensionCount)
         val cubeWeights = revisionData.cubeWeights
         val replicatedSet = revisionData.replicatedSet
         val filesRevision = filesVector.filter(_.tags(TagUtils.revision) == revision.id.toString)
diff --git a/src/test/scala/io/qbeast/spark/utils/QbeastDataSourceIntegrationTest.scala b/src/test/scala/io/qbeast/spark/utils/QbeastDataSourceIntegrationTest.scala
index 00ef02485..0519765a0 100644
--- a/src/test/scala/io/qbeast/spark/utils/QbeastDataSourceIntegrationTest.scala
+++ b/src/test/scala/io/qbeast/spark/utils/QbeastDataSourceIntegrationTest.scala
@@ -8,6 +8,7 @@ import io.qbeast.spark.index.CubeId
 import io.qbeast.spark.sql.files.OTreeIndex
 import io.qbeast.spark.table.QbeastTable
 import org.apache.spark.sql.execution.FileSourceScanExec
+import org.apache.spark.sql.functions.{col, lit}
 import org.apache.spark.sql.{DataFrame, SparkSession}
 
 class QbeastDataSourceIntegrationTest extends QbeastIntegrationTestSpec {
@@ -41,6 +42,29 @@ class QbeastDataSourceIntegrationTest extends QbeastIntegrationTestSpec {
         }
     }
 
+  it should "index correctly on bigger spaces" in withQbeastContextSparkAndTmpDir {
+    (spark, tmpDir) =>
+      {
+        val data = loadTestData(spark)
+          .withColumn("user_id", lit(col("user_id") * Long.MaxValue))
+        // WRITE SOME DATA
+        data.write
+          .mode("overwrite")
+          .format("qbeast")
+          .option("columnsToIndex", "user_id,product_id")
+          .save(tmpDir)
+
+        val indexed = spark.read.format("qbeast").load(tmpDir)
+
+        data.count() shouldBe indexed.count()
+
+        assertLargeDatasetEquality(indexed, data, orderedComparison = false)
+
+        data.columns.toSet shouldBe indexed.columns.toSet
+
+      }
+  }
+
   it should "index correctly on overwrite" in withQbeastContextSparkAndTmpDir { (spark, tmpDir) =>
     {
       val data = loadTestData(spark)

From 949082b70d99b6b95033aed23d522dfad4dd3ec1 Mon Sep 17 00:00:00 2001
From: Cesare Cugnasco <cesare.cugnasco@gmail.com>
Date: Tue, 7 Dec 2021 16:48:51 +0100
Subject: [PATCH 2/5] fixing typos

---
 CODE_OF_CONDUCT.md                                          | 2 +-
 CONTRIBUTING.md                                             | 2 +-
 core/src/test/scala/io/qbeast/core/model/CubeIdTest.scala   | 2 +-
 .../scala/io/qbeast/core/model/JSONSerializationTests.scala | 6 +++---
 .../scala/io/qbeast/core/transform/TransformerTest.scala    | 6 +++---
 src/main/scala/io/qbeast/spark/delta/CubeDataLoader.scala   | 4 ++--
 .../io/qbeast/spark/index/MaxWeightEstimationTest.scala     | 2 +-
 7 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
index c9f130900..074e6ecf8 100644
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -1,6 +1,6 @@
 ## Our Pledge
 
-In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation.
+In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socioeconomic status, nationality, personal appearance, race, religion, or sexual identity and orientation.
 
 
 
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 29a3c0a9e..52e4716f4 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -5,7 +5,7 @@ Welcome to the Qbeast community! Nice to meet you :)
 Either you want to know more about our guidelines or open a Pull Request, this is your page. We are pleased to help you through the different steps for contributing to our (your) project. 
 
 ## Licensing of contributed material
-All contributed code, docs, and otro materials are considered licensed under the same terms as the rest of the project. Check [LICENSE](./LICENCE) for more details.
+All contributed code, docs, and other materials are considered licensed under the same terms as the rest of the project. Check [LICENSE](./LICENCE) for more details.
 
 ## Version control branching
 - Always make a new branch for your work, no matter how small
diff --git a/core/src/test/scala/io/qbeast/core/model/CubeIdTest.scala b/core/src/test/scala/io/qbeast/core/model/CubeIdTest.scala
index e70086ad5..daed46cc4 100644
--- a/core/src/test/scala/io/qbeast/core/model/CubeIdTest.scala
+++ b/core/src/test/scala/io/qbeast/core/model/CubeIdTest.scala
@@ -10,7 +10,7 @@ import org.scalatest.matchers.should.Matchers
  * Tests for CubeId.
  */
 class CubeIdTest extends AnyFlatSpec with Matchers {
-  "CubeId" should "implement equals corretly" in {
+  "CubeId" should "implement equals correctly" in {
     val id1 = CubeId.root(2)
     val id2 = id1.firstChild
     val id3 = id1.firstChild
diff --git a/core/src/test/scala/io/qbeast/core/model/JSONSerializationTests.scala b/core/src/test/scala/io/qbeast/core/model/JSONSerializationTests.scala
index b77b351c2..49d8f3d5a 100644
--- a/core/src/test/scala/io/qbeast/core/model/JSONSerializationTests.scala
+++ b/core/src/test/scala/io/qbeast/core/model/JSONSerializationTests.scala
@@ -77,7 +77,7 @@ class JSONSerializationTests extends AnyFlatSpec with Matchers {
 
   }
 
-  "A revision" should "Serialize with all simple value a part from Transoform*" in {
+  "A revision" should "Serialize with all simple value a part from Transform*" in {
     val rev =
       Revision(
         12L,
@@ -96,7 +96,7 @@ class JSONSerializationTests extends AnyFlatSpec with Matchers {
     mapper.readValue[Revision](json, classOf[Revision]) shouldBe rev
 
   }
-  "A revision" should "Serialize with all Long Transoform*" in {
+  "A revision" should "Serialize with all Long Transform*" in {
     val rev =
       Revision(
         12L,
@@ -115,7 +115,7 @@ class JSONSerializationTests extends AnyFlatSpec with Matchers {
     mapper.readValue[Revision](json, classOf[Revision]) shouldBe rev
 
   }
-  "A revision" should "Serialize with all Hash Transoform*" in {
+  "A revision" should "Serialize with all Hash Transform*" in {
     val rev =
       Revision(
         12L,
diff --git a/core/src/test/scala/io/qbeast/core/transform/TransformerTest.scala b/core/src/test/scala/io/qbeast/core/transform/TransformerTest.scala
index d5553cb1e..efd596a8a 100644
--- a/core/src/test/scala/io/qbeast/core/transform/TransformerTest.scala
+++ b/core/src/test/scala/io/qbeast/core/transform/TransformerTest.scala
@@ -6,7 +6,7 @@ import org.scalatest.matchers.should.Matchers
 
 class TransformerTest extends AnyFlatSpec with Matchers {
 
-  behavior of "Tranformer"
+  behavior of "Transformer"
 
   it should "return correct column name" in {
     val columnName = "a"
@@ -45,8 +45,8 @@ class TransformerTest extends AnyFlatSpec with Matchers {
     val transformation = Map("a_min" -> 0, "a_max" -> 1)
     val currentTransformation = transformer.makeTransformation(transformation)
 
-    val newTranformation = Map("a_min" -> 3, "a_max" -> 8)
-    transformer.maybeUpdateTransformation(currentTransformation, newTranformation) shouldBe Some(
+    val newTransformation = Map("a_min" -> 3, "a_max" -> 8)
+    transformer.maybeUpdateTransformation(currentTransformation, newTransformation) shouldBe Some(
       LinearTransformation(0, 8, dataType))
 
     transformer.maybeUpdateTransformation(currentTransformation, transformation) shouldBe None
diff --git a/src/main/scala/io/qbeast/spark/delta/CubeDataLoader.scala b/src/main/scala/io/qbeast/spark/delta/CubeDataLoader.scala
index 0e6f5c5da..6471602e8 100644
--- a/src/main/scala/io/qbeast/spark/delta/CubeDataLoader.scala
+++ b/src/main/scala/io/qbeast/spark/delta/CubeDataLoader.scala
@@ -22,7 +22,7 @@ case class CubeDataLoader(tableID: QTableID) {
 
   /**
    * Loads the data from a set of cubes in a specific revision
-   * and adds column infomation
+   * and adds column information
    * @param cubeSet the set of cubes to load
    * @param revision the revision to load
    * @param columnName the column name to add
@@ -41,7 +41,7 @@ case class CubeDataLoader(tableID: QTableID) {
 
   /**
    * Loads the data from a single cube in a specific revision
-   * and adds column infomation
+   * and adds column information
    * @param cubeId the cube to load
    * @param revision the revision to load
    * @param columnName the column name to add
diff --git a/src/test/scala/io/qbeast/spark/index/MaxWeightEstimationTest.scala b/src/test/scala/io/qbeast/spark/index/MaxWeightEstimationTest.scala
index 2a67659b6..b5b212b09 100644
--- a/src/test/scala/io/qbeast/spark/index/MaxWeightEstimationTest.scala
+++ b/src/test/scala/io/qbeast/spark/index/MaxWeightEstimationTest.scala
@@ -29,7 +29,7 @@ class MaxWeightEstimationTest extends AnyFlatSpec with Matchers with QbeastInteg
     MaxWeightEstimation.finish(finalWeight) shouldBe finalWeight
   }
 
-  "MaxWeight zero" should "be minium positive value" in {
+  "MaxWeight zero" should "be minimum positive value" in {
     MaxWeightEstimation.zero shouldBe 0.0
   }
 

From 5e8b4498c12949669f6ab480a52e51f832eedbcb Mon Sep 17 00:00:00 2001
From: Cesare Cugnasco <cesare.cugnasco@gmail.com>
Date: Tue, 7 Dec 2021 17:25:19 +0100
Subject: [PATCH 3/5] using named parameters

---
 .../spark/index/DoublePassOTreeDataAnalyzerTest.scala       | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/test/scala/io/qbeast/spark/index/DoublePassOTreeDataAnalyzerTest.scala b/src/test/scala/io/qbeast/spark/index/DoublePassOTreeDataAnalyzerTest.scala
index fd0b41380..2a2b9712c 100644
--- a/src/test/scala/io/qbeast/spark/index/DoublePassOTreeDataAnalyzerTest.scala
+++ b/src/test/scala/io/qbeast/spark/index/DoublePassOTreeDataAnalyzerTest.scala
@@ -199,7 +199,8 @@ class DoublePassOTreeDataAnalyzerTest extends QbeastIntegrationTestSpec {
     val weightedDataFrame =
       data.withColumn(weightColumnName, lit(scala.util.Random.nextInt()))
     val cubeNormalizedWeights =
-      weightedDataFrame.transform(estimatePartitionCubeWeights(revision, indexStatus, false))
+      weightedDataFrame.transform(
+        estimatePartitionCubeWeights(revision, indexStatus, isReplication = false))
 
     val partitions = weightedDataFrame.rdd.getNumPartitions
 
@@ -225,7 +226,8 @@ class DoublePassOTreeDataAnalyzerTest extends QbeastIntegrationTestSpec {
     val weightedDataFrame =
       data.withColumn(weightColumnName, lit(scala.util.Random.nextInt()))
     val cubeNormalizedWeights =
-      weightedDataFrame.transform(estimatePartitionCubeWeights(revision, indexStatus, false))
+      weightedDataFrame.transform(
+        estimatePartitionCubeWeights(revision, indexStatus, isReplication = false))
 
     val cubeWeights = cubeNormalizedWeights.transform(estimateCubeWeights(revision))
     cubeWeights.columns.length shouldBe 2

From 58639e24c97078ff90d0783261fd7aa66b6cce95 Mon Sep 17 00:00:00 2001
From: Cesare Cugnasco <cesare.cugnasco@gmail.com>
Date: Tue, 7 Dec 2021 17:26:00 +0100
Subject: [PATCH 4/5] reorganizing test code using matchers instead of
 assertion

---
 .../io/qbeast/docs/DocumentationTests.scala   | 37 ++++++------
 .../spark/delta/QbeastSnapshotTest.scala      |  7 ++-
 .../index/CubeWeightsIntegrationTest.scala    |  2 +-
 .../io/qbeast/spark/index/IndexTest.scala     |  9 ++-
 .../qbeast/spark/index/IndexTestChecks.scala  | 58 +++++++++----------
 .../qbeast/spark/index/NewRevisionTest.scala  |  2 +-
 .../index/SparkRevisionFactoryTest.scala      |  6 +-
 .../spark/index/writer/BlockWriterTest.scala  |  2 +-
 .../QbeastDataSourceIntegrationTest.scala     | 15 +++--
 9 files changed, 70 insertions(+), 68 deletions(-)

diff --git a/src/test/scala/io/qbeast/docs/DocumentationTests.scala b/src/test/scala/io/qbeast/docs/DocumentationTests.scala
index 64361fb04..bc2de6382 100644
--- a/src/test/scala/io/qbeast/docs/DocumentationTests.scala
+++ b/src/test/scala/io/qbeast/docs/DocumentationTests.scala
@@ -3,6 +3,7 @@ package io.qbeast.docs
 import io.qbeast.spark.QbeastIntegrationTestSpec
 import org.apache.spark.sql.delta.DeltaLog
 import org.apache.spark.sql.functions.input_file_name
+import org.scalatest.AppendedClues.convertToClueful
 
 class DocumentationTests extends QbeastIntegrationTestSpec {
 
@@ -28,8 +29,10 @@ class DocumentationTests extends QbeastIntegrationTestSpec {
           .format("qbeast")
           .load(tmp_dir)
 
-      assert(qbeast_df.count() == csv_df.count(), "Readme count does not match the original")
-      assert(qbeast_df.schema == csv_df.schema, "Readme schema does not match the original")
+      qbeast_df.count() shouldBe csv_df
+        .count() withClue "Readme count does not match the original"
+
+      qbeast_df.schema shouldBe csv_df.schema withClue "Readme schema does not match the original"
 
     }
   }
@@ -49,10 +52,10 @@ class DocumentationTests extends QbeastIntegrationTestSpec {
 
       val qbeastDf = spark.read.format("qbeast").load(qbeastTablePath)
 
-      assert(
-        qbeastDf.count() == parquetDf.count(),
-        "Quickstart count does not match the original")
-      assert(qbeastDf.schema == parquetDf.schema, "Quickstart schema does not match the original")
+      qbeastDf.count() shouldBe parquetDf.count() withClue
+        "Quickstart count does not match the original"
+      qbeastDf.schema shouldBe parquetDf.schema withClue
+        "Quickstart schema does not match the original"
     }
   }
 
@@ -83,29 +86,27 @@ class DocumentationTests extends QbeastIntegrationTestSpec {
       val df = spark.read.format("parquet").load(processed_parquet_dir)
       val qbeast_df = spark.read.format("qbeast").load(qbeast_table_path)
 
-      assert(
-        qbeast_df.count() == df.count(),
-        "Pushdown notebook count of indexed dataframe does not match the original")
+      qbeast_df.count() shouldBe df.count() withClue
+        "Pushdown notebook count of indexed dataframe does not match the original"
 
       // Table changes?
 
       val deltaLog = DeltaLog.forTable(spark, qbeast_table_path)
       val totalNumberOfFiles = deltaLog.snapshot.allFiles.count()
-      assert(
-        totalNumberOfFiles == 21,
-        "Total number of files in pushdown notebook changes to " + totalNumberOfFiles)
+
+      totalNumberOfFiles shouldBe 21 withClue
+        "Total number of files in pushdown notebook changes to " + totalNumberOfFiles
 
       val query = qbeast_df.sample(0.1)
       val numberOfFilesQuery = query.select(input_file_name()).distinct().count()
-      assert(
-        numberOfFilesQuery == 1,
-        "Number of files read in pushdown notebook changes to " + numberOfFilesQuery)
+      numberOfFilesQuery shouldBe 1 withClue
+        "Number of files read in pushdown notebook changes to " + numberOfFilesQuery
 
       val file = query.select(input_file_name()).distinct().head().getString(0)
       val numberOfRowsRead = spark.read.format("parquet").load(file).count()
-      assert(
-        numberOfRowsRead == 302715,
-        "Number of rows read in pushdown notebook changes to " + numberOfRowsRead)
+
+      numberOfRowsRead shouldBe 302715 withClue
+        "Number of rows read in pushdown notebook changes to " + numberOfRowsRead
 
     }
   }
diff --git a/src/test/scala/io/qbeast/spark/delta/QbeastSnapshotTest.scala b/src/test/scala/io/qbeast/spark/delta/QbeastSnapshotTest.scala
index e45abf905..09f8149c4 100644
--- a/src/test/scala/io/qbeast/spark/delta/QbeastSnapshotTest.scala
+++ b/src/test/scala/io/qbeast/spark/delta/QbeastSnapshotTest.scala
@@ -10,6 +10,7 @@ import io.qbeast.spark.utils.TagUtils
 import io.qbeast.spark.QbeastIntegrationTestSpec
 import org.apache.spark.sql.delta.DeltaLog
 import org.apache.spark.sql.{Dataset, SparkSession}
+import org.scalatest.AppendedClues.convertToClueful
 
 class QbeastSnapshotTest extends QbeastIntegrationTestSpec {
 
@@ -133,10 +134,10 @@ class QbeastSnapshotTest extends QbeastIntegrationTestSpec {
                 .map(fileInfo)
                 .map(a => a.tags(TagUtils.elementCount).toLong)
                 .sum
-              assert(
-                size > cubeSize * 0.9,
+
+              size should be > (cubeSize * 0.9).toLong withClue
                 "assertion failed in cube " + cube +
-                  " where size is " + size + " and weight is " + weight)
+                " where size is " + size + " and weight is " + weight
             }
         }
     }
diff --git a/src/test/scala/io/qbeast/spark/index/CubeWeightsIntegrationTest.scala b/src/test/scala/io/qbeast/spark/index/CubeWeightsIntegrationTest.scala
index 640dea423..ce1b6625b 100644
--- a/src/test/scala/io/qbeast/spark/index/CubeWeightsIntegrationTest.scala
+++ b/src/test/scala/io/qbeast/spark/index/CubeWeightsIntegrationTest.scala
@@ -44,7 +44,7 @@ class CubeWeightsIntegrationTest extends QbeastIntegrationTestSpec {
 
           // commitLogWeightMap shouldBe weightMap
           commitLogWeightMap.keys.foreach(cubeId => {
-            assert(weightMap.contains(cubeId) || weightMap.contains(cubeId.parent.get))
+            weightMap should contain key cubeId
           })
         }
 
diff --git a/src/test/scala/io/qbeast/spark/index/IndexTest.scala b/src/test/scala/io/qbeast/spark/index/IndexTest.scala
index ff584e472..6db425c12 100644
--- a/src/test/scala/io/qbeast/spark/index/IndexTest.scala
+++ b/src/test/scala/io/qbeast/spark/index/IndexTest.scala
@@ -5,7 +5,6 @@ package io.qbeast.spark.index
 
 import io.qbeast.TestClasses.{Client3, Client4}
 import io.qbeast.core.model._
-import io.qbeast.spark.index.IndexTestChecks._
 import io.qbeast.spark.index.QbeastColumns.cubeColumnName
 import io.qbeast.spark.utils.TagUtils
 import io.qbeast.spark.{QbeastIntegrationTestSpec, delta}
@@ -17,7 +16,11 @@ import org.apache.spark.sql.{AnalysisException, DataFrame, SparkSession}
 import org.scalatest.flatspec.AnyFlatSpec
 import org.scalatest.matchers.should.Matchers
 
-class IndexTest extends AnyFlatSpec with Matchers with QbeastIntegrationTestSpec {
+class IndexTest
+    extends AnyFlatSpec
+    with Matchers
+    with QbeastIntegrationTestSpec
+    with IndexTestChecks {
 
   // TEST CONFIGURATIONS
   private val options = Map("columnsToIndex" -> "age,val2", "cubeSize" -> "10000")
@@ -95,7 +98,7 @@ class IndexTest extends AnyFlatSpec with Matchers with QbeastIntegrationTestSpec
 
         val (indexed, tc) = oTreeAlgorithm.index(df, IndexStatus(rev))
 
-        checkCubesOnData(tc.indexChanges.cubeWeights, indexed, 2)
+        checkCubesOnData(tc.indexChanges.cubeWeights, indexed, dimensionCount = 2)
       }
     }
   }
diff --git a/src/test/scala/io/qbeast/spark/index/IndexTestChecks.scala b/src/test/scala/io/qbeast/spark/index/IndexTestChecks.scala
index f16a07213..da876908e 100644
--- a/src/test/scala/io/qbeast/spark/index/IndexTestChecks.scala
+++ b/src/test/scala/io/qbeast/spark/index/IndexTestChecks.scala
@@ -3,21 +3,22 @@ package io.qbeast.spark.index
 import io.qbeast.core.model.{CubeId, Revision, TableChanges, Weight}
 import io.qbeast.spark.index.QbeastColumns.cubeColumnName
 import org.apache.spark.sql.DataFrame
+import org.scalatest.AppendedClues.convertToClueful
+import org.scalatest.matchers.should.Matchers
 
-object IndexTestChecks {
+trait IndexTestChecks extends Matchers {
 
   def checkDFSize(indexed: DataFrame, original: DataFrame): Unit = {
     val indexedSize = indexed.count()
     val originalSize = original.count()
-    assert(
-      indexedSize == originalSize,
-      s"Indexed dataset has size ${indexedSize} and original has size $originalSize")
+    indexedSize shouldBe originalSize withClue
+      s"Indexed dataset has size $indexedSize and original has size $originalSize"
   }
 
   def checkCubeSize(tableChanges: TableChanges, revision: Revision, indexed: DataFrame): Unit = {
     val weightMap: Map[CubeId, Weight] = tableChanges.indexChanges.cubeWeights
     val desiredCubeSize = revision.desiredCubeSize
-    val minSize = desiredCubeSize * 0.9
+    val minSize = (desiredCubeSize * 0.9).toLong
 
     val cubeSizes = indexed
       .groupBy(cubeColumnName)
@@ -33,9 +34,8 @@ object IndexTestChecks {
           if (weight != Weight.MaxValue) {
             // If the weight is not set to MaxValue,
             // then the size should be greater than the desiredCubeSize
-            assert(
-              size > minSize,
-              s"cube ${cubeId.string} appear as overflowed but has size $size")
+            (size should be > minSize) withClue
+              s"cube ${cubeId.string} appear as overflowed but has size $size"
 
             // And parent cube should be overflowed as well
             cubeId.parent match {
@@ -44,14 +44,14 @@ object IndexTestChecks {
               case Some(parent) if weightMap.contains(parent) && cubeSizes.contains(parent) =>
                 val weightParent = weightMap(parent)
                 val parentSize = cubeSizes(parent)
-                assert(
-                  weightParent != Weight.MaxValue && size > minSize,
-                  s"cube ${cubeId.string} is overflowed but parent ${parent.string} is not" +
-                    s" It has weight ${weightParent} and size ${parentSize}")
+                weightParent should not be Weight.MaxValue
+
+                (size should be > minSize) withClue
+                  s"cube $cubeId is overflowed but parent ${parent.string} is not" +
+                  s" It has weight $weightParent and size $parentSize"
 
               case Some(parent) =>
-                assert(
-                  false,
+                fail(
                   s"Parent ${parent.string} of ${cubeId.string}" +
                     s" does not appear in weight map or data")
 
@@ -64,17 +64,14 @@ object IndexTestChecks {
 
   def checkCubes(weightMap: Map[CubeId, Weight]): Unit = {
 
-    def checkCubeParents(): Unit = weightMap.foreach { case (cube, _) =>
+    weightMap.foreach { case (cube, _) =>
       cube.parent match {
         case Some(parent) =>
-          assert(
-            weightMap.contains(parent),
-            s"parent ${parent.string} of ${cube.string} does not appear in the list of cubes")
+          (weightMap should contain key parent) withClue
+            s"parent ${parent.string} of ${cube.string} does not appear in the list of cubes"
         case None => // root cube
       }
     }
-
-    checkCubeParents()
   }
 
   def checkCubesOnData(
@@ -92,9 +89,9 @@ object IndexTestChecks {
       cubesOnData.foreach { cube =>
         cube.parent match {
           case Some(parent) =>
-            assert(
-              cubesOnData.contains(parent),
-              s"Parent ${parent.string} of ${cube.string} does not appear in the indexed data")
+            (cubesOnData should contain(parent)) withClue
+              s"Parent ${parent.string} of ${cube.string} does not appear in the indexed data"
+
           case None => // root cube
         }
       }
@@ -103,13 +100,15 @@ object IndexTestChecks {
     def checkDataWithWeightMap(): Unit = {
       cubesOnData.foreach { cube =>
         if (cube.isRoot) {
-          assert(weightMap.contains(cube), s"Cube root appears in data but not in weight map")
+
+          (weightMap should contain key cube) withClue
+            s"Cube root appears in data but not in weight map"
         } else {
           val parent = cube.parent.get
-          assert(
-            weightMap.contains(cube) || weightMap.contains(parent),
+
+          (weightMap should (contain key cube or contain key parent)) withClue
             s"Either weight map doesn't contain ${cube.string}" +
-              s" or doesn't contain it's parent ${parent.string}")
+            s" or doesn't contain it's parent ${parent.string}"
         }
       }
     }
@@ -127,10 +126,9 @@ object IndexTestChecks {
       }
       // scalastyle:off
       childrenWeights.foreach { case (child, childWeight) =>
-        assert(
-          childWeight >= maxWeight,
+        childWeight should be >= maxWeight withClue
           s"MaxWeight of child ${child.string} is ${childWeight.fraction} " +
-            s"and maxWeight of parent ${cube.string} is ${maxWeight.fraction}")
+          s"and maxWeight of parent ${cube.string} is ${maxWeight.fraction}"
       }
     }
   }
diff --git a/src/test/scala/io/qbeast/spark/index/NewRevisionTest.scala b/src/test/scala/io/qbeast/spark/index/NewRevisionTest.scala
index 1e9a2f9e1..844aed9c4 100644
--- a/src/test/scala/io/qbeast/spark/index/NewRevisionTest.scala
+++ b/src/test/scala/io/qbeast/spark/index/NewRevisionTest.scala
@@ -60,7 +60,7 @@ class NewRevisionTest
           revisions
             .map(revision =>
               qbeastSnapshot.loadIndexStatus(revision.revisionID).cubeNormalizedWeights)
-        allWM.foreach(wm => assert(wm.nonEmpty))
+        allWM.foreach(wm => wm should not be empty)
     }
 
   it should
diff --git a/src/test/scala/io/qbeast/spark/index/SparkRevisionFactoryTest.scala b/src/test/scala/io/qbeast/spark/index/SparkRevisionFactoryTest.scala
index d06e8fe3e..85133d15f 100644
--- a/src/test/scala/io/qbeast/spark/index/SparkRevisionFactoryTest.scala
+++ b/src/test/scala/io/qbeast/spark/index/SparkRevisionFactoryTest.scala
@@ -28,8 +28,8 @@ class SparkRevisionFactoryTest extends QbeastIntegrationTestSpec {
 
     "column:LinearTransformer" match {
       case SpecExtractor(column, transformer) =>
-        assert(column == "column")
-        assert(transformer == "LinearTransformer")
+        column shouldBe "column"
+        transformer shouldBe "LinearTransformer"
       case _ => fail("It did not recognize the type")
     }
 
@@ -37,7 +37,7 @@ class SparkRevisionFactoryTest extends QbeastIntegrationTestSpec {
       case SpecExtractor(column, transformer) =>
         fail("It shouldn't be here")
       case column =>
-        assert(column == "column")
+        column shouldBe "column"
     }
   }
 
diff --git a/src/test/scala/io/qbeast/spark/index/writer/BlockWriterTest.scala b/src/test/scala/io/qbeast/spark/index/writer/BlockWriterTest.scala
index bd12212bc..cf148fc97 100644
--- a/src/test/scala/io/qbeast/spark/index/writer/BlockWriterTest.scala
+++ b/src/test/scala/io/qbeast/spark/index/writer/BlockWriterTest.scala
@@ -120,6 +120,6 @@ class BlockWriterTest extends AnyFlatSpec with Matchers with QbeastIntegrationTe
         CubeId(dimensionCount, row.getAs[Array[Byte]](0)).string
       }
       .toSet
-    assert(files.map(_.tags(TagUtils.cube)).forall(cube => cubes.contains(cube)))
+    files.map(_.tags(TagUtils.cube)).forall(cube => cubes.contains(cube)) shouldBe true
   }
 }
diff --git a/src/test/scala/io/qbeast/spark/utils/QbeastDataSourceIntegrationTest.scala b/src/test/scala/io/qbeast/spark/utils/QbeastDataSourceIntegrationTest.scala
index ce9b26884..27482033f 100644
--- a/src/test/scala/io/qbeast/spark/utils/QbeastDataSourceIntegrationTest.scala
+++ b/src/test/scala/io/qbeast/spark/utils/QbeastDataSourceIntegrationTest.scala
@@ -162,13 +162,12 @@ class QbeastDataSourceIntegrationTest extends QbeastIntegrationTestSpec {
 
         val executionPlan = query.queryExecution.executedPlan.collectLeaves()
 
-        assert(
-          executionPlan.exists(p =>
-            p
-              .asInstanceOf[FileSourceScanExec]
-              .relation
-              .location
-              .isInstanceOf[OTreeIndex]))
+        executionPlan.exists(p =>
+          p
+            .asInstanceOf[FileSourceScanExec]
+            .relation
+            .location
+            .isInstanceOf[OTreeIndex]) shouldBe true
 
         val filesDeltaQuery =
           deltaQuery
@@ -183,7 +182,7 @@ class QbeastDataSourceIntegrationTest extends QbeastIntegrationTestSpec {
           .collect()
 
         filesQbeastQuery.length shouldBe <=(filesDeltaQuery.length)
-        filesQbeastQuery.foreach(f => assert(filesDeltaQuery.contains(f)))
+        filesQbeastQuery.foreach(f => filesDeltaQuery should contain(f))
 
       }
     }

From be8ad6ddeb489ac30f30cb099610fed792b4a82f Mon Sep 17 00:00:00 2001
From: Cesare Cugnasco <cesare.cugnasco@gmail.com>
Date: Tue, 7 Dec 2021 18:19:20 +0100
Subject: [PATCH 5/5] adding toString to CubeId for easier testing

---
 core/src/main/scala/io/qbeast/core/model/CubeId.scala | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/core/src/main/scala/io/qbeast/core/model/CubeId.scala b/core/src/main/scala/io/qbeast/core/model/CubeId.scala
index 4983c2ad5..e1877d691 100644
--- a/core/src/main/scala/io/qbeast/core/model/CubeId.scala
+++ b/core/src/main/scala/io/qbeast/core/model/CubeId.scala
@@ -390,4 +390,6 @@ case class CubeId(dimensionCount: Int, depth: Int, bitMask: Array[Long]) extends
     (Point(from.toIndexedSeq), Point(to.toIndexedSeq))
   }
 
+  override def toString: String = s"CubeId($dimensionCount, $depth, $string)"
+
 }