Qbeast-io · cugni · Dec 7, 2021 · Oct 28, 2021 · Dec 7, 2021 · Dec 7, 2021
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
@@ -1,6 +1,6 @@
 ## Our Pledge
 
-In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation.
+In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socioeconomic status, nationality, personal appearance, race, religion, or sexual identity and orientation.
 
 
 

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -5,7 +5,7 @@ Welcome to the Qbeast community! Nice to meet you :)
 Either you want to know more about our guidelines or open a Pull Request, this is your page. We are pleased to help you through the different steps for contributing to our (your) project. 
 
 ## Licensing of contributed material
-All contributed code, docs, and otro materials are considered licensed under the same terms as the rest of the project. Check [LICENSE](./LICENCE) for more details.
+All contributed code, docs, and other materials are considered licensed under the same terms as the rest of the project. Check [LICENSE](./LICENCE) for more details.
 
 ## Version control branching
 - Always make a new branch for your work, no matter how small

diff --git a/core/src/main/scala/io/qbeast/core/model/CubeId.scala b/core/src/main/scala/io/qbeast/core/model/CubeId.scala
@@ -390,4 +390,6 @@ case class CubeId(dimensionCount: Int, depth: Int, bitMask: Array[Long]) extends
     (Point(from.toIndexedSeq), Point(to.toIndexedSeq))
   }
 
+  override def toString: String = s"CubeId($dimensionCount, $depth, $string)"
+
 }
diff --git a/core/src/test/scala/io/qbeast/core/model/CubeIdTest.scala b/core/src/test/scala/io/qbeast/core/model/CubeIdTest.scala
@@ -10,7 +10,7 @@ import org.scalatest.matchers.should.Matchers
  * Tests for CubeId.
  */
 class CubeIdTest extends AnyFlatSpec with Matchers {
-  "CubeId" should "implement equals corretly" in {
+  "CubeId" should "implement equals correctly" in {
     val id1 = CubeId.root(2)
     val id2 = id1.firstChild
     val id3 = id1.firstChild

diff --git a/core/src/test/scala/io/qbeast/core/model/JSONSerializationTests.scala b/core/src/test/scala/io/qbeast/core/model/JSONSerializationTests.scala
@@ -77,7 +77,7 @@ class JSONSerializationTests extends AnyFlatSpec with Matchers {
 
   }
 
-  "A revision" should "Serialize with all simple value a part from Transoform*" in {
+  "A revision" should "Serialize with all simple value a part from Transform*" in {
     val rev =
       Revision(
         12L,
@@ -96,7 +96,7 @@ class JSONSerializationTests extends AnyFlatSpec with Matchers {
     mapper.readValue[Revision](json, classOf[Revision]) shouldBe rev
 
   }
-  "A revision" should "Serialize with all Long Transoform*" in {
+  "A revision" should "Serialize with all Long Transform*" in {
     val rev =
       Revision(
         12L,
@@ -115,7 +115,7 @@ class JSONSerializationTests extends AnyFlatSpec with Matchers {
     mapper.readValue[Revision](json, classOf[Revision]) shouldBe rev
 
   }
-  "A revision" should "Serialize with all Hash Transoform*" in {
+  "A revision" should "Serialize with all Hash Transform*" in {
     val rev =
       Revision(
         12L,

diff --git a/core/src/test/scala/io/qbeast/core/transform/TransformerTest.scala b/core/src/test/scala/io/qbeast/core/transform/TransformerTest.scala
@@ -6,7 +6,7 @@ import org.scalatest.matchers.should.Matchers
 
 class TransformerTest extends AnyFlatSpec with Matchers {
 
-  behavior of "Tranformer"
+  behavior of "Transformer"
 
   it should "return correct column name" in {
     val columnName = "a"
@@ -45,8 +45,8 @@ class TransformerTest extends AnyFlatSpec with Matchers {
     val transformation = Map("a_min" -> 0, "a_max" -> 1)
     val currentTransformation = transformer.makeTransformation(transformation)
 
-    val newTranformation = Map("a_min" -> 3, "a_max" -> 8)
-    transformer.maybeUpdateTransformation(currentTransformation, newTranformation) shouldBe Some(
+    val newTransformation = Map("a_min" -> 3, "a_max" -> 8)
+    transformer.maybeUpdateTransformation(currentTransformation, newTransformation) shouldBe Some(
       LinearTransformation(0, 8, dataType))
 
     transformer.maybeUpdateTransformation(currentTransformation, transformation) shouldBe None

diff --git a/src/main/scala/io/qbeast/spark/delta/CubeDataLoader.scala b/src/main/scala/io/qbeast/spark/delta/CubeDataLoader.scala
@@ -22,7 +22,7 @@ case class CubeDataLoader(tableID: QTableID) {
 
   /**
    * Loads the data from a set of cubes in a specific revision
-   * and adds column infomation
+   * and adds column information
    * @param cubeSet the set of cubes to load
    * @param revision the revision to load
    * @param columnName the column name to add
@@ -41,7 +41,7 @@ case class CubeDataLoader(tableID: QTableID) {
 
   /**
    * Loads the data from a single cube in a specific revision
-   * and adds column infomation
+   * and adds column information
    * @param cubeId the cube to load
    * @param revision the revision to load
    * @param columnName the column name to add

diff --git a/src/test/scala/io/qbeast/docs/DocumentationTests.scala b/src/test/scala/io/qbeast/docs/DocumentationTests.scala
@@ -3,6 +3,7 @@ package io.qbeast.docs
 import io.qbeast.spark.QbeastIntegrationTestSpec
 import org.apache.spark.sql.delta.DeltaLog
 import org.apache.spark.sql.functions.input_file_name
+import org.scalatest.AppendedClues.convertToClueful
 
 class DocumentationTests extends QbeastIntegrationTestSpec {
 
@@ -28,8 +29,10 @@ class DocumentationTests extends QbeastIntegrationTestSpec {
           .format("qbeast")
           .load(tmp_dir)
 
-      assert(qbeast_df.count() == csv_df.count(), "Readme count does not match the original")
-      assert(qbeast_df.schema == csv_df.schema, "Readme schema does not match the original")
+      qbeast_df.count() shouldBe csv_df
+        .count() withClue "Readme count does not match the original"
+
+      qbeast_df.schema shouldBe csv_df.schema withClue "Readme schema does not match the original"
 
     }
   }
@@ -49,10 +52,10 @@ class DocumentationTests extends QbeastIntegrationTestSpec {
 
       val qbeastDf = spark.read.format("qbeast").load(qbeastTablePath)
 
-      assert(
-        qbeastDf.count() == parquetDf.count(),
-        "Quickstart count does not match the original")
-      assert(qbeastDf.schema == parquetDf.schema, "Quickstart schema does not match the original")
+      qbeastDf.count() shouldBe parquetDf.count() withClue
+        "Quickstart count does not match the original"
+      qbeastDf.schema shouldBe parquetDf.schema withClue
+        "Quickstart schema does not match the original"
     }
   }
 
@@ -83,29 +86,27 @@ class DocumentationTests extends QbeastIntegrationTestSpec {
       val df = spark.read.format("parquet").load(processed_parquet_dir)
       val qbeast_df = spark.read.format("qbeast").load(qbeast_table_path)
 
-      assert(
-        qbeast_df.count() == df.count(),
-        "Pushdown notebook count of indexed dataframe does not match the original")
+      qbeast_df.count() shouldBe df.count() withClue
+        "Pushdown notebook count of indexed dataframe does not match the original"
 
       // Table changes?
 
       val deltaLog = DeltaLog.forTable(spark, qbeast_table_path)
       val totalNumberOfFiles = deltaLog.snapshot.allFiles.count()
-      assert(
-        totalNumberOfFiles == 21,
-        "Total number of files in pushdown notebook changes to " + totalNumberOfFiles)
+
+      totalNumberOfFiles shouldBe 21 withClue
+        "Total number of files in pushdown notebook changes to " + totalNumberOfFiles
 
       val query = qbeast_df.sample(0.1)
       val numberOfFilesQuery = query.select(input_file_name()).distinct().count()
-      assert(
-        numberOfFilesQuery == 1,
-        "Number of files read in pushdown notebook changes to " + numberOfFilesQuery)
+      numberOfFilesQuery shouldBe 1 withClue
+        "Number of files read in pushdown notebook changes to " + numberOfFilesQuery
 
       val file = query.select(input_file_name()).distinct().head().getString(0)
       val numberOfRowsRead = spark.read.format("parquet").load(file).count()
-      assert(
-        numberOfRowsRead == 302715,
-        "Number of rows read in pushdown notebook changes to " + numberOfRowsRead)
+
+      numberOfRowsRead shouldBe 302715 withClue
+        "Number of rows read in pushdown notebook changes to " + numberOfRowsRead
 
     }
   }

diff --git a/src/test/scala/io/qbeast/spark/delta/QbeastSnapshotTest.scala b/src/test/scala/io/qbeast/spark/delta/QbeastSnapshotTest.scala
@@ -10,6 +10,7 @@ import io.qbeast.spark.utils.TagUtils
 import io.qbeast.spark.QbeastIntegrationTestSpec
 import org.apache.spark.sql.delta.DeltaLog
 import org.apache.spark.sql.{Dataset, SparkSession}
+import org.scalatest.AppendedClues.convertToClueful
 
 class QbeastSnapshotTest extends QbeastIntegrationTestSpec {
 
@@ -133,10 +134,10 @@ class QbeastSnapshotTest extends QbeastIntegrationTestSpec {
                 .map(fileInfo)
                 .map(a => a.tags(TagUtils.elementCount).toLong)
                 .sum
-              assert(
-                size > cubeSize * 0.9,
+
+              size should be > (cubeSize * 0.9).toLong withClue
                 "assertion failed in cube " + cube +
-                  " where size is " + size + " and weight is " + weight)
+                " where size is " + size + " and weight is " + weight
             }
         }
     }

diff --git a/src/test/scala/io/qbeast/spark/index/CubeWeightsIntegrationTest.scala b/src/test/scala/io/qbeast/spark/index/CubeWeightsIntegrationTest.scala
@@ -44,7 +44,7 @@ class CubeWeightsIntegrationTest extends QbeastIntegrationTestSpec {
 
           // commitLogWeightMap shouldBe weightMap
           commitLogWeightMap.keys.foreach(cubeId => {
-            assert(weightMap.contains(cubeId) || weightMap.contains(cubeId.parent.get))
+            weightMap should contain key cubeId
           })
         }
 

diff --git a/src/test/scala/io/qbeast/spark/index/DoublePassOTreeDataAnalyzerTest.scala b/src/test/scala/io/qbeast/spark/index/DoublePassOTreeDataAnalyzerTest.scala
@@ -199,7 +199,8 @@ class DoublePassOTreeDataAnalyzerTest extends QbeastIntegrationTestSpec {
     val weightedDataFrame =
       data.withColumn(weightColumnName, lit(scala.util.Random.nextInt()))
     val cubeNormalizedWeights =
-      weightedDataFrame.transform(estimatePartitionCubeWeights(revision, indexStatus, false))
+      weightedDataFrame.transform(
+        estimatePartitionCubeWeights(revision, indexStatus, isReplication = false))
 
     val partitions = weightedDataFrame.rdd.getNumPartitions
 
@@ -225,7 +226,8 @@ class DoublePassOTreeDataAnalyzerTest extends QbeastIntegrationTestSpec {
     val weightedDataFrame =
       data.withColumn(weightColumnName, lit(scala.util.Random.nextInt()))
     val cubeNormalizedWeights =
-      weightedDataFrame.transform(estimatePartitionCubeWeights(revision, indexStatus, false))
+      weightedDataFrame.transform(
+        estimatePartitionCubeWeights(revision, indexStatus, isReplication = false))
 
     val cubeWeights = cubeNormalizedWeights.transform(estimateCubeWeights(revision))
     cubeWeights.columns.length shouldBe 2

diff --git a/src/test/scala/io/qbeast/spark/index/IndexTest.scala b/src/test/scala/io/qbeast/spark/index/IndexTest.scala
@@ -5,7 +5,6 @@ package io.qbeast.spark.index
 
 import io.qbeast.TestClasses.{Client3, Client4}
 import io.qbeast.core.model._
-import io.qbeast.spark.index.IndexTestChecks._
 import io.qbeast.spark.index.QbeastColumns.cubeColumnName
 import io.qbeast.spark.utils.TagUtils
 import io.qbeast.spark.{QbeastIntegrationTestSpec, delta}
@@ -17,7 +16,11 @@ import org.apache.spark.sql.{AnalysisException, DataFrame, SparkSession}
 import org.scalatest.flatspec.AnyFlatSpec
 import org.scalatest.matchers.should.Matchers
 
-class IndexTest extends AnyFlatSpec with Matchers with QbeastIntegrationTestSpec {
+class IndexTest
+    extends AnyFlatSpec
+    with Matchers
+    with QbeastIntegrationTestSpec
+    with IndexTestChecks {
 
   // TEST CONFIGURATIONS
   private val options = Map("columnsToIndex" -> "age,val2", "cubeSize" -> "10000")
@@ -95,7 +98,7 @@ class IndexTest extends AnyFlatSpec with Matchers with QbeastIntegrationTestSpec
 
         val (indexed, tc) = oTreeAlgorithm.index(df, IndexStatus(rev))
 
-        checkCubesOnData(tc.indexChanges.cubeWeights, indexed, 2)
+        checkCubesOnData(tc.indexChanges.cubeWeights, indexed, dimensionCount = 2)
       }
     }
   }

diff --git a/src/test/scala/io/qbeast/spark/index/IndexTestChecks.scala b/src/test/scala/io/qbeast/spark/index/IndexTestChecks.scala
@@ -3,21 +3,22 @@ package io.qbeast.spark.index
 import io.qbeast.core.model.{CubeId, Revision, TableChanges, Weight}
 import io.qbeast.spark.index.QbeastColumns.cubeColumnName
 import org.apache.spark.sql.DataFrame
+import org.scalatest.AppendedClues.convertToClueful
+import org.scalatest.matchers.should.Matchers
 
-object IndexTestChecks {
+trait IndexTestChecks extends Matchers {
 
   def checkDFSize(indexed: DataFrame, original: DataFrame): Unit = {
     val indexedSize = indexed.count()
     val originalSize = original.count()
-    assert(
-      indexedSize == originalSize,
-      s"Indexed dataset has size ${indexedSize} and original has size $originalSize")
+    indexedSize shouldBe originalSize withClue
+      s"Indexed dataset has size $indexedSize and original has size $originalSize"
   }
 
   def checkCubeSize(tableChanges: TableChanges, revision: Revision, indexed: DataFrame): Unit = {
     val weightMap: Map[CubeId, Weight] = tableChanges.indexChanges.cubeWeights
     val desiredCubeSize = revision.desiredCubeSize
-    val minSize = desiredCubeSize * 0.9
+    val minSize = (desiredCubeSize * 0.9).toLong
 
     val cubeSizes = indexed
       .groupBy(cubeColumnName)
@@ -33,9 +34,8 @@ object IndexTestChecks {
           if (weight != Weight.MaxValue) {
             // If the weight is not set to MaxValue,
             // then the size should be greater than the desiredCubeSize
-            assert(
-              size > minSize,
-              s"cube ${cubeId.string} appear as overflowed but has size $size")
+            (size should be > minSize) withClue
+              s"cube ${cubeId.string} appear as overflowed but has size $size"
 
             // And parent cube should be overflowed as well
             cubeId.parent match {
@@ -44,14 +44,14 @@ object IndexTestChecks {
               case Some(parent) if weightMap.contains(parent) && cubeSizes.contains(parent) =>
                 val weightParent = weightMap(parent)
                 val parentSize = cubeSizes(parent)
-                assert(
-                  weightParent != Weight.MaxValue && size > minSize,
-                  s"cube ${cubeId.string} is overflowed but parent ${parent.string} is not" +
-                    s" It has weight ${weightParent} and size ${parentSize}")
+                weightParent should not be Weight.MaxValue
+
+                (size should be > minSize) withClue
+                  s"cube $cubeId is overflowed but parent ${parent.string} is not" +
+                  s" It has weight $weightParent and size $parentSize"
 
               case Some(parent) =>
-                assert(
-                  false,
+                fail(
                   s"Parent ${parent.string} of ${cubeId.string}" +
                     s" does not appear in weight map or data")
 
@@ -64,17 +64,14 @@ object IndexTestChecks {
 
   def checkCubes(weightMap: Map[CubeId, Weight]): Unit = {
 
-    def checkCubeParents(): Unit = weightMap.foreach { case (cube, _) =>
+    weightMap.foreach { case (cube, _) =>
       cube.parent match {
         case Some(parent) =>
-          assert(
-            weightMap.contains(parent),
-            s"parent ${parent.string} of ${cube.string} does not appear in the list of cubes")
+          (weightMap should contain key parent) withClue
+            s"parent ${parent.string} of ${cube.string} does not appear in the list of cubes"
         case None => // root cube
       }
     }
-
-    checkCubeParents()
   }
 
   def checkCubesOnData(
@@ -92,9 +89,9 @@ object IndexTestChecks {
       cubesOnData.foreach { cube =>
         cube.parent match {
           case Some(parent) =>
-            assert(
-              cubesOnData.contains(parent),
-              s"Parent ${parent.string} of ${cube.string} does not appear in the indexed data")
+            (cubesOnData should contain(parent)) withClue
+              s"Parent ${parent.string} of ${cube.string} does not appear in the indexed data"
+
           case None => // root cube
         }
       }
@@ -103,13 +100,15 @@ object IndexTestChecks {
     def checkDataWithWeightMap(): Unit = {
       cubesOnData.foreach { cube =>
         if (cube.isRoot) {
-          assert(weightMap.contains(cube), s"Cube root appears in data but not in weight map")
+
+          (weightMap should contain key cube) withClue
+            s"Cube root appears in data but not in weight map"
         } else {
           val parent = cube.parent.get
-          assert(
-            weightMap.contains(cube) || weightMap.contains(parent),
+
+          (weightMap should (contain key cube or contain key parent)) withClue
             s"Either weight map doesn't contain ${cube.string}" +
-              s" or doesn't contain it's parent ${parent.string}")
+            s" or doesn't contain it's parent ${parent.string}"
         }
       }
     }
@@ -127,10 +126,9 @@ object IndexTestChecks {
       }
       // scalastyle:off
       childrenWeights.foreach { case (child, childWeight) =>
-        assert(
-          childWeight >= maxWeight,
+        childWeight should be >= maxWeight withClue
           s"MaxWeight of child ${child.string} is ${childWeight.fraction} " +
-            s"and maxWeight of parent ${cube.string} is ${maxWeight.fraction}")
+          s"and maxWeight of parent ${cube.string} is ${maxWeight.fraction}"
       }
     }
   }

diff --git a/src/test/scala/io/qbeast/spark/index/MaxWeightEstimationTest.scala b/src/test/scala/io/qbeast/spark/index/MaxWeightEstimationTest.scala
@@ -29,7 +29,7 @@ class MaxWeightEstimationTest extends AnyFlatSpec with Matchers with QbeastInteg
     MaxWeightEstimation.finish(finalWeight) shouldBe finalWeight
   }
 
-  "MaxWeight zero" should "be minium positive value" in {
+  "MaxWeight zero" should "be minimum positive value" in {
     MaxWeightEstimation.zero shouldBe 0.0
   }
-Original file line number
+Diff line change
@@ Expand Up @@
         (Point(from.toIndexedSeq), Point(to.toIndexedSeq))
       }
+      override def toString: String = s"CubeId($dimensionCount, $depth, $string)"
     }