Merge pull request #193 from labmec/BlockMatrixContribution2

Block matrix contribution2
labmec · Aug 4, 2023 · 71ea026 · 71ea026
2 parents 9fd35a8 + d28eb0a
commit 71ea026
Show file tree

Hide file tree

Showing 5 changed files with 251 additions and 4 deletions.
diff --git a/Matrix/pzfmatrix.cpp b/Matrix/pzfmatrix.cpp
@@ -814,7 +814,6 @@ void TPZFMatrix<std::complex<double> >::MultAdd(const TPZFMatrix<std::complex<do
                     &alpha, this->fElem, this->Rows(), x.fElem, x.Rows(), &beta, z.fElem, z.Rows());
     }
 }
-
 #endif // USING_LAPACK
 
 /**
@@ -907,7 +906,115 @@ void TPZFMatrix<TVar>::MultAdd(const TPZFMatrix<TVar> &x,const TPZFMatrix<TVar>
             }
         }
     }
-
+}
+
+template <class TVar>
+void TPZFMatrix<TVar>::AddContribution(int64_t i, int64_t j, const TPZFMatrix<TVar> & A, bool transpA, const TPZFMatrix<TVar>& B, 
+						 		       bool transpB, const TVar alpha)
+{
+    if constexpr (!std::is_floating_point_v<RTVar>)
+        Error( "AddContribution must be used with a floating point type variable TVar!\n");
+
+#ifdef USING_LAPACK
+    lapack_int nrows = A.Rows();
+    lapack_int ncols = B.Cols();
+    lapack_int seconddimA = A.Cols();
+    lapack_int seconddimB = B.Rows();
+    CBLAS_TRANSPOSE transposeA = CblasNoTrans;
+    CBLAS_TRANSPOSE transposeB = CblasNoTrans;
+
+    if (transpA)
+    {
+        nrows = A.Cols();
+        seconddimA = A.Rows();
+        transposeA = CblasTrans;
+    }
+    if (transpB)
+    {
+        ncols = B.Rows();
+        seconddimB = B.Cols();
+        transposeB = CblasTrans;
+    }
+
+#else
+    int64_t nrows = A.Rows();
+    int64_t ncols = B.Cols();
+    int64_t seconddimA = A.Cols();
+    int64_t seconddimB = B.Rows();
+
+    if (transpA)
+    {
+        nrows = A.Cols();
+        seconddimA = A.Rows();
+    }
+    if (transpB)
+    {
+        ncols = B.Rows();
+        seconddimB = B.Cols();
+    }
+#endif
+
+#ifdef PZDEBUG
+    if (seconddimA != seconddimB)
+    {
+        std::stringstream out;
+        out << "AddContribution Matrices A and B have incompatible dimensions. ";
+        out << "Second dimension of A: " << seconddimA << ". Second dimension of B: " << seconddimB;
+        Error( out.str().c_str() );
+        return;
+    }
+    else if ((i+nrows > this->Rows()) || (j+ncols > this->Cols()))
+    {
+        std::stringstream out;
+        out << "AddContribution trying to add a submatrix out of the full matrix bounds. ";
+        out << "Submatrix bigger row: " << i+nrows << ". Full matrix size: " << this->Rows();
+        out << "Submatrix bigger column: " << j+ncols << ". Full matrix size: " << this->Cols();
+        Error( "AddContribution trying to add a submatrix out of the original matrix bounds" );
+        return;
+    }
+#endif
+
+#ifdef USING_LAPACK
+    TVar* ptr = &(*this)(i,j);
+    const TVar beta = 1.0;
+    if constexpr(std::is_same_v<TVar,double>){
+        cblas_dgemm(CblasColMajor, transposeA, transposeB, nrows, ncols, seconddimA, alpha, A.fElem, A.Rows(), B.fElem, B.Rows(), beta, ptr, this->Rows());
+        return;
+    }
+    else if constexpr(std::is_same_v<TVar,float>){
+        cblas_sgemm(CblasColMajor, transposeA, transposeB, nrows, ncols, seconddimA, alpha, A.fElem, A.Rows(), B.fElem, B.Rows(), beta, ptr, this->Rows());
+        return;
+    }
+    else if constexpr(std::is_same_v<TVar,std::complex<double>>){
+        cblas_zgemm(CblasColMajor, transposeA, transposeB, nrows, ncols, seconddimA, &alpha, A.fElem, A.Rows(), B.fElem, B.Rows(), &beta, ptr, this->Rows());
+        return;
+    }
+    else if constexpr(std::is_same_v<TVar,std::complex<float>>){
+        cblas_cgemm(CblasColMajor, transposeA, transposeB, nrows, ncols, seconddimA, &alpha, A.fElem, A.Rows(), B.fElem, B.Rows(), &beta, ptr, this->Rows());
+        return;
+    }
+
+#endif //USING LAPACK
+
+TPZFMatrix<TVar> tempA = A;
+tempA *= alpha;
+TPZFMatrix<TVar> tempB = B;
+if (transpA)
+    tempA.Transpose();
+if (transpB)
+    tempB.Transpose();
+
+for (int64_t row = 0; row < nrows; row++)
+{
+	for (int64_t col = 0; col < ncols; col++)
+	{
+		for (int64_t k = 0; k < seconddimA; k++)
+		{
+			(*this)(i+row,j+col) += tempA(row,k) * tempB(k,col);
+		}
+	}
+}
+
 }
 
 /********************************/

diff --git a/Matrix/pzfmatrix.h b/Matrix/pzfmatrix.h
@@ -217,6 +217,19 @@ class TPZFMatrix: public TPZMatrix<TVar> {
     static void MultAdd(const TVar *ptr, int64_t rows, int64_t cols, const TPZFMatrix<TVar> &x,const TPZFMatrix<TVar> &y, TPZFMatrix<TVar> &z,
                         const TVar alpha=1.,const TVar beta = 0.,const int opt = 0);
 
+    /**
+     * @brief It computes this += alpha*(A * B), where A or B can be transposed.
+     * @param i Is the row of (this) where the first element of the matrices product should be added 
+     * @param j Is the column of (this) where the first element of the matrices product should be added 
+     * @param A Is A on the above operation
+     * @param transpA Indicates if A is Transpose or not
+     * @param B Is B on the above operation
+     * @param transpB Indicates if B is Transpose or not
+     * @param alpha Is alpha on the above operation
+     */
+    virtual void AddContribution(int64_t i, int64_t j, const TPZFMatrix<TVar> & A, bool transpA, const TPZFMatrix<TVar>& B, 
+						 		 bool transpB, const TVar alpha = 1.0) override;
+
     /**
      * @name Generic operator with TVar type
      * @{

diff --git a/Matrix/pzmatrix.cpp b/Matrix/pzmatrix.cpp
@@ -157,6 +157,13 @@ void TPZMatrix<TVar>::MultAdd(const TPZFMatrix<TVar> &x,const TPZFMatrix<TVar> &
 	}
 }
 
+template <class TVar>
+void TPZMatrix<TVar>::AddContribution(int64_t i, int64_t j, const TPZFMatrix<TVar> & A, bool transpA, const TPZFMatrix<TVar>& B, 
+						 		       bool transpB, const TVar alpha)
+{
+    Error( "Not implemented for this type of matrix\n" );
+}
+
 template<class TVar>
 TVar TPZMatrix<TVar>::RowTimesVector(const int row, const TPZFMatrix<TVar> &v) const
 {

diff --git a/Matrix/pzmatrix.h b/Matrix/pzmatrix.h
@@ -215,7 +215,10 @@ class TPZMatrix: public TPZBaseMatrix
 	virtual void MultAdd(const TPZFMatrix<TVar> & x,const TPZFMatrix<TVar>& y, TPZFMatrix<TVar>& z,
 						 const TVar alpha=1., const TVar beta = 0., const int opt = 0) const;
 
-  virtual TVar RowTimesVector(const int row, const TPZFMatrix<TVar> &v) const;
+    virtual TVar RowTimesVector(const int row, const TPZFMatrix<TVar> &v) const;
+
+	virtual void AddContribution(int64_t i, int64_t j, const TPZFMatrix<TVar> & A, bool transpA, const TPZFMatrix<TVar>& B, 
+						 		 bool transpB, const TVar alpha = 1.0);
 
 	/** @brief Computes res = rhs - this * x */
 	virtual void Residual(const TPZFMatrix<TVar>& x,const TPZFMatrix<TVar>& rhs, TPZFMatrix<TVar>& res ) ;

diff --git a/UnitTest_PZ/TestMatrix/TestMatrix.cpp b/UnitTest_PZ/TestMatrix/TestMatrix.cpp
@@ -169,6 +169,15 @@ void TestingTransposeWithAutoFill(int rows, int cols, SymProp sp);
  */
 template <class matx, class TVar>
 void TestingMultAdd(int dim, SymProp sp, DecomposeType dec);
+
+/**
+ * @brief Tests the addContribution method of the matrix, that adds a block C += alpha * A*B starting at C(i,j), using AutoFill to build a square matrix of dimension dim (user defined)
+ * @param nrows Number of rows of the matrix to be build.
+ * @param ncols Number of columns of the matrix to be build.
+ * @note Process: build a matrix C with randomic values, adds a contribution C += A*B of the same size as C. Compare the results with AddContribution and MultAdd.
+ */
+template <class TVar>
+void TestingAddContribution(int nrows, int ncols, int ntype);
 #ifdef PZ_USING_LAPACK
 
 /**
@@ -774,6 +783,25 @@ template<class TVar>
 #endif
       }
     }
+
+    template <class TVar>
+    void TestAddContribution()
+    {
+      SECTION("TPZFMatrix-SQUARE-MULTADD")
+      {
+          TestingAddContribution<TVar>(10, 10, 0);
+      }
+#ifdef PZDEBUG
+      SECTION("TPZFMatrix-INCOMPATIBLE-DIMENSIONS")
+      {
+          TestingAddContribution<TVar>(10, 8, 1);
+      }
+      SECTION("TPZFMatrix-OUT-OF-BOUNDS")
+      {
+          TestingAddContribution<TVar>(4, 4, 2);
+      }
+#endif
+    }
 #ifdef PZ_USING_LAPACK
     template <class TVar> void GeneralisedEigenvaluesAutoFill() {
         for (int dim = 5; dim < 6; dim += 10) {
@@ -1073,6 +1101,22 @@ TEMPLATE_TEST_CASE("MultAdd (CPLX)","[matrix_tests]",
     testmatrix::TestMultAdd<TestType>();
 }
 
+TEMPLATE_TEST_CASE("AddContribution (REAL)","[matrix_tests]",
+                   float,
+                   double,
+                   long double
+                   ) {
+    testmatrix::TestAddContribution<TestType>();
+}
+
+TEMPLATE_TEST_CASE("AddContribution (CPLX)","[matrix_tests]",
+                   std::complex<float>,
+                   std::complex<double>,
+                   std::complex<long double>
+                   ) {
+    testmatrix::TestAddContribution<TestType>();
+}
+
 #ifdef PZ_USING_LAPACK
 /*There is no long double lapack interface in our code*/
 TEMPLATE_TEST_CASE("Eigenvalues (REAL)","[matrix_tests]",
@@ -1714,6 +1758,79 @@ void TestingMultAdd(int dim, SymProp sp, DecomposeType dec) {
     REQUIRE(check);
 }
 
+template <class TVar>
+void TestingAddContribution(int nrows, int ncols, int ntype)
+{
+    TPZFMatrix<TVar> C1;
+    C1.AutoFill(nrows, ncols, SymProp::NonSym);
+    TPZFMatrix<TVar> C2;
+    TPZFMatrix<TVar> A(C1);
+    TPZFMatrix<TVar> B(C1);
+    TPZFMatrix<TVar> BT;
+    B.Transpose(&BT);
+    TPZFMatrix<TVar> y(C1);
+
+    switch (ntype)
+    {
+      case 0: // Comparison between AddContribution and MultAdd
+      {
+          C1.AddContribution(0, 0, A, false, B, true, 1.0);
+          A.MultAdd(BT, y, C2, 1.0, 1.0);
+
+          constexpr RTVar tol = []()
+          {
+            if constexpr (std::is_same_v<RTVar, float>)
+              return (RTVar)100;
+            else if constexpr (std::is_same_v<RTVar, long double>)
+              return (RTVar)10;
+            else
+              return (RTVar)1;
+          }();
+
+          bool check = true;
+
+          for (int i = 0; i < nrows; i++)
+          {
+              for (int j = 0; j < ncols; j++)
+              {
+                  TVar diff = C1(i, j) - C2(i, j);
+                  if (!IsZero(diff / tol))
+                  {
+                      CAPTURE(nrows, ncols);
+                      CAPTURE(C1(i, j), C2(i, j));
+                      std::cout << "i " << i << " j " << j << " C1 " << C1(i, j) << " C2 " << C2(i, j) << std::endl;
+                      if (check)
+                      {
+                        A.Print("A = ", std::cout, EMathematicaInput);
+                        B.Print("B = ", std::cout, EMathematicaInput);
+                        BT.Print("BT = ", std::cout, EMathematicaInput);
+                      }
+                      check = false;
+                  }
+              }
+          }
+
+          REQUIRE(check);
+          break;
+      }
+      case 1: // Multiplying matrices with incompatible dimensions
+      {
+          REQUIRE_THROWS(C1.AddContribution(0, 0, A, false, B, false, 1.0)); // this will fail for not square matrices, as A and B have the same sizes
+          break;
+      }
+      case 2: // Adding a contribution out of matrix bounds
+      {
+          REQUIRE_THROWS(C1.AddContribution(1, 1, A, false, B, false, 1.0)); // this will fail because we are adding a contribution out of C1 bounds
+          break;
+      }
+      default:
+      {
+          std::cout << "Test type not implemented\n";
+          break;
+      }
+    }
+}
+
 #ifdef PZ_USING_LAPACK
 
 template <class matx, class TVar>
@@ -1873,7 +1990,7 @@ void TestingEigenDecompositionAutoFill(int dim, SymProp sp) {
 
   RTVar mult = 1.;
   if (sizeof(RTVar) == 4) {
-    mult *= 10.;
+    mult *= 12.; //This value is arbitrary
   }
   TPZFMatrix<CTVar> x(dim, 1, 0.);
   TPZFMatrix<CTVar> res(dim, 1, 0.);