diff --git a/Matrix/pzfmatrix.cpp b/Matrix/pzfmatrix.cpp index e6cd17079..c635197ae 100644 --- a/Matrix/pzfmatrix.cpp +++ b/Matrix/pzfmatrix.cpp @@ -814,7 +814,6 @@ void TPZFMatrix >::MultAdd(const TPZFMatrixfElem, this->Rows(), x.fElem, x.Rows(), &beta, z.fElem, z.Rows()); } } - #endif // USING_LAPACK /** @@ -907,7 +906,115 @@ void TPZFMatrix::MultAdd(const TPZFMatrix &x,const TPZFMatrix } } } - +} + +template +void TPZFMatrix::AddContribution(int64_t i, int64_t j, const TPZFMatrix & A, bool transpA, const TPZFMatrix& B, + bool transpB, const TVar alpha) +{ + if constexpr (!std::is_floating_point_v) + Error( "AddContribution must be used with a floating point type variable TVar!\n"); + +#ifdef USING_LAPACK + lapack_int nrows = A.Rows(); + lapack_int ncols = B.Cols(); + lapack_int seconddimA = A.Cols(); + lapack_int seconddimB = B.Rows(); + CBLAS_TRANSPOSE transposeA = CblasNoTrans; + CBLAS_TRANSPOSE transposeB = CblasNoTrans; + + if (transpA) + { + nrows = A.Cols(); + seconddimA = A.Rows(); + transposeA = CblasTrans; + } + if (transpB) + { + ncols = B.Rows(); + seconddimB = B.Cols(); + transposeB = CblasTrans; + } + +#else + int64_t nrows = A.Rows(); + int64_t ncols = B.Cols(); + int64_t seconddimA = A.Cols(); + int64_t seconddimB = B.Rows(); + + if (transpA) + { + nrows = A.Cols(); + seconddimA = A.Rows(); + } + if (transpB) + { + ncols = B.Rows(); + seconddimB = B.Cols(); + } +#endif + +#ifdef PZDEBUG + if (seconddimA != seconddimB) + { + std::stringstream out; + out << "AddContribution Matrices A and B have incompatible dimensions. "; + out << "Second dimension of A: " << seconddimA << ". Second dimension of B: " << seconddimB; + Error( out.str().c_str() ); + return; + } + else if ((i+nrows > this->Rows()) || (j+ncols > this->Cols())) + { + std::stringstream out; + out << "AddContribution trying to add a submatrix out of the full matrix bounds. "; + out << "Submatrix bigger row: " << i+nrows << ". Full matrix size: " << this->Rows(); + out << "Submatrix bigger column: " << j+ncols << ". Full matrix size: " << this->Cols(); + Error( "AddContribution trying to add a submatrix out of the original matrix bounds" ); + return; + } +#endif + +#ifdef USING_LAPACK + TVar* ptr = &(*this)(i,j); + const TVar beta = 1.0; + if constexpr(std::is_same_v){ + cblas_dgemm(CblasColMajor, transposeA, transposeB, nrows, ncols, seconddimA, alpha, A.fElem, A.Rows(), B.fElem, B.Rows(), beta, ptr, this->Rows()); + return; + } + else if constexpr(std::is_same_v){ + cblas_sgemm(CblasColMajor, transposeA, transposeB, nrows, ncols, seconddimA, alpha, A.fElem, A.Rows(), B.fElem, B.Rows(), beta, ptr, this->Rows()); + return; + } + else if constexpr(std::is_same_v>){ + cblas_zgemm(CblasColMajor, transposeA, transposeB, nrows, ncols, seconddimA, &alpha, A.fElem, A.Rows(), B.fElem, B.Rows(), &beta, ptr, this->Rows()); + return; + } + else if constexpr(std::is_same_v>){ + cblas_cgemm(CblasColMajor, transposeA, transposeB, nrows, ncols, seconddimA, &alpha, A.fElem, A.Rows(), B.fElem, B.Rows(), &beta, ptr, this->Rows()); + return; + } + +#endif //USING LAPACK + +TPZFMatrix tempA = A; +tempA *= alpha; +TPZFMatrix tempB = B; +if (transpA) + tempA.Transpose(); +if (transpB) + tempB.Transpose(); + +for (int64_t row = 0; row < nrows; row++) +{ + for (int64_t col = 0; col < ncols; col++) + { + for (int64_t k = 0; k < seconddimA; k++) + { + (*this)(i+row,j+col) += tempA(row,k) * tempB(k,col); + } + } +} + } /********************************/ diff --git a/Matrix/pzfmatrix.h b/Matrix/pzfmatrix.h index 7c6b0016b..8e23a80ca 100644 --- a/Matrix/pzfmatrix.h +++ b/Matrix/pzfmatrix.h @@ -217,6 +217,19 @@ class TPZFMatrix: public TPZMatrix { static void MultAdd(const TVar *ptr, int64_t rows, int64_t cols, const TPZFMatrix &x,const TPZFMatrix &y, TPZFMatrix &z, const TVar alpha=1.,const TVar beta = 0.,const int opt = 0); + /** + * @brief It computes this += alpha*(A * B), where A or B can be transposed. + * @param i Is the row of (this) where the first element of the matrices product should be added + * @param j Is the column of (this) where the first element of the matrices product should be added + * @param A Is A on the above operation + * @param transpA Indicates if A is Transpose or not + * @param B Is B on the above operation + * @param transpB Indicates if B is Transpose or not + * @param alpha Is alpha on the above operation + */ + virtual void AddContribution(int64_t i, int64_t j, const TPZFMatrix & A, bool transpA, const TPZFMatrix& B, + bool transpB, const TVar alpha = 1.0) override; + /** * @name Generic operator with TVar type * @{ diff --git a/Matrix/pzmatrix.cpp b/Matrix/pzmatrix.cpp index 92009db1b..2ade73245 100644 --- a/Matrix/pzmatrix.cpp +++ b/Matrix/pzmatrix.cpp @@ -157,6 +157,13 @@ void TPZMatrix::MultAdd(const TPZFMatrix &x,const TPZFMatrix & } } +template +void TPZMatrix::AddContribution(int64_t i, int64_t j, const TPZFMatrix & A, bool transpA, const TPZFMatrix& B, + bool transpB, const TVar alpha) +{ + Error( "Not implemented for this type of matrix\n" ); +} + template TVar TPZMatrix::RowTimesVector(const int row, const TPZFMatrix &v) const { diff --git a/Matrix/pzmatrix.h b/Matrix/pzmatrix.h index c0d920a71..241d52f86 100644 --- a/Matrix/pzmatrix.h +++ b/Matrix/pzmatrix.h @@ -215,7 +215,10 @@ class TPZMatrix: public TPZBaseMatrix virtual void MultAdd(const TPZFMatrix & x,const TPZFMatrix& y, TPZFMatrix& z, const TVar alpha=1., const TVar beta = 0., const int opt = 0) const; - virtual TVar RowTimesVector(const int row, const TPZFMatrix &v) const; + virtual TVar RowTimesVector(const int row, const TPZFMatrix &v) const; + + virtual void AddContribution(int64_t i, int64_t j, const TPZFMatrix & A, bool transpA, const TPZFMatrix& B, + bool transpB, const TVar alpha = 1.0); /** @brief Computes res = rhs - this * x */ virtual void Residual(const TPZFMatrix& x,const TPZFMatrix& rhs, TPZFMatrix& res ) ; diff --git a/UnitTest_PZ/TestMatrix/TestMatrix.cpp b/UnitTest_PZ/TestMatrix/TestMatrix.cpp index 6d81cd1c4..7c3804e10 100644 --- a/UnitTest_PZ/TestMatrix/TestMatrix.cpp +++ b/UnitTest_PZ/TestMatrix/TestMatrix.cpp @@ -169,6 +169,15 @@ void TestingTransposeWithAutoFill(int rows, int cols, SymProp sp); */ template void TestingMultAdd(int dim, SymProp sp, DecomposeType dec); + +/** + * @brief Tests the addContribution method of the matrix, that adds a block C += alpha * A*B starting at C(i,j), using AutoFill to build a square matrix of dimension dim (user defined) + * @param nrows Number of rows of the matrix to be build. + * @param ncols Number of columns of the matrix to be build. + * @note Process: build a matrix C with randomic values, adds a contribution C += A*B of the same size as C. Compare the results with AddContribution and MultAdd. + */ +template +void TestingAddContribution(int nrows, int ncols, int ntype); #ifdef PZ_USING_LAPACK /** @@ -774,6 +783,25 @@ template #endif } } + + template + void TestAddContribution() + { + SECTION("TPZFMatrix-SQUARE-MULTADD") + { + TestingAddContribution(10, 10, 0); + } +#ifdef PZDEBUG + SECTION("TPZFMatrix-INCOMPATIBLE-DIMENSIONS") + { + TestingAddContribution(10, 8, 1); + } + SECTION("TPZFMatrix-OUT-OF-BOUNDS") + { + TestingAddContribution(4, 4, 2); + } +#endif + } #ifdef PZ_USING_LAPACK template void GeneralisedEigenvaluesAutoFill() { for (int dim = 5; dim < 6; dim += 10) { @@ -1073,6 +1101,22 @@ TEMPLATE_TEST_CASE("MultAdd (CPLX)","[matrix_tests]", testmatrix::TestMultAdd(); } +TEMPLATE_TEST_CASE("AddContribution (REAL)","[matrix_tests]", + float, + double, + long double + ) { + testmatrix::TestAddContribution(); +} + +TEMPLATE_TEST_CASE("AddContribution (CPLX)","[matrix_tests]", + std::complex, + std::complex, + std::complex + ) { + testmatrix::TestAddContribution(); +} + #ifdef PZ_USING_LAPACK /*There is no long double lapack interface in our code*/ TEMPLATE_TEST_CASE("Eigenvalues (REAL)","[matrix_tests]", @@ -1714,6 +1758,79 @@ void TestingMultAdd(int dim, SymProp sp, DecomposeType dec) { REQUIRE(check); } +template +void TestingAddContribution(int nrows, int ncols, int ntype) +{ + TPZFMatrix C1; + C1.AutoFill(nrows, ncols, SymProp::NonSym); + TPZFMatrix C2; + TPZFMatrix A(C1); + TPZFMatrix B(C1); + TPZFMatrix BT; + B.Transpose(&BT); + TPZFMatrix y(C1); + + switch (ntype) + { + case 0: // Comparison between AddContribution and MultAdd + { + C1.AddContribution(0, 0, A, false, B, true, 1.0); + A.MultAdd(BT, y, C2, 1.0, 1.0); + + constexpr RTVar tol = []() + { + if constexpr (std::is_same_v) + return (RTVar)100; + else if constexpr (std::is_same_v) + return (RTVar)10; + else + return (RTVar)1; + }(); + + bool check = true; + + for (int i = 0; i < nrows; i++) + { + for (int j = 0; j < ncols; j++) + { + TVar diff = C1(i, j) - C2(i, j); + if (!IsZero(diff / tol)) + { + CAPTURE(nrows, ncols); + CAPTURE(C1(i, j), C2(i, j)); + std::cout << "i " << i << " j " << j << " C1 " << C1(i, j) << " C2 " << C2(i, j) << std::endl; + if (check) + { + A.Print("A = ", std::cout, EMathematicaInput); + B.Print("B = ", std::cout, EMathematicaInput); + BT.Print("BT = ", std::cout, EMathematicaInput); + } + check = false; + } + } + } + + REQUIRE(check); + break; + } + case 1: // Multiplying matrices with incompatible dimensions + { + REQUIRE_THROWS(C1.AddContribution(0, 0, A, false, B, false, 1.0)); // this will fail for not square matrices, as A and B have the same sizes + break; + } + case 2: // Adding a contribution out of matrix bounds + { + REQUIRE_THROWS(C1.AddContribution(1, 1, A, false, B, false, 1.0)); // this will fail because we are adding a contribution out of C1 bounds + break; + } + default: + { + std::cout << "Test type not implemented\n"; + break; + } + } +} + #ifdef PZ_USING_LAPACK template @@ -1873,7 +1990,7 @@ void TestingEigenDecompositionAutoFill(int dim, SymProp sp) { RTVar mult = 1.; if (sizeof(RTVar) == 4) { - mult *= 10.; + mult *= 12.; //This value is arbitrary } TPZFMatrix x(dim, 1, 0.); TPZFMatrix res(dim, 1, 0.);