Skip to content

Commit

Permalink
Intrepid2: Fences for CUDA_LAUNCH_BLOCKING=0
Browse files Browse the repository at this point in the history
  • Loading branch information
MicheldeMessieres committed Aug 15, 2020
1 parent 8f25a4d commit 0a7cf78
Show file tree
Hide file tree
Showing 13 changed files with 65 additions and 25 deletions.
6 changes: 5 additions & 1 deletion packages/intrepid2/unit-test/Cell/test_03.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,9 @@ namespace Intrepid2 {
}

rst::vecprod(faceNormal, tanX, tanY);


DeviceSpaceType().fence();

// Compare direct normal with d-component of the face/side normal by CellTools
for (ordinal_type d=0;d<cellDim;++d) {

Expand Down Expand Up @@ -323,6 +325,8 @@ namespace Intrepid2 {

rst::vecprod(faceNormal, tanX, tanY);

DeviceSpaceType().fence();

// Compare direct normal with d-component of the face/side normal by CellTools
for (ordinal_type d=0;d<cellDim;++d) {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ namespace

auto subBasisOutputValues = getOutputView<OutputScalar>(fs, op, subBasis->getCardinality(), numPoints, spaceDim);
subBasis->getValues(subBasisOutputValues, inputPoints, op);

Kokkos::fence();
bool vectorValued = (outputValues.rank() == 3); // F,P,D -- if scalar-valued, F,P

for (int pointOrdinal=0; pointOrdinal<numPoints; pointOrdinal++)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,8 @@ namespace Intrepid2 {
// apply field signs
fst::applyLeftFieldSigns(mass_matrices, field_signs);
fst::applyRightFieldSigns(mass_matrices, field_signs);

DeviceSpaceType().fence();

/******************* STOP COMPUTATION ***********************/


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ namespace Intrepid2 {
computeRefVolume(const ordinal_type numPoints,
const cubWeightViewType cubWeights) {
typename cubWeightViewType::value_type r_val = 0.0;
Kokkos::fence();
for (auto i=0;i<numPoints;++i)
r_val += cubWeights(i);

Expand All @@ -83,6 +84,7 @@ namespace Intrepid2 {
const ordinal_type polydeg[3] = { xDeg, yDeg, zDeg };

const auto dim = p.extent(0);
Kokkos::fence();
for (size_type i=0;i<dim;++i)
r_val *= std::pow(p(i),polydeg[i]);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,7 @@ int ConvergenceHex(const bool verbose) {
Basis_HGRAD_HEX_C1_FEM<DeviceSpaceType,ValueType,ValueType> hexaLinearBasis; //used for computing physical coordinates
DynRankView ConstructWithLabel(hexaLinearBasisValuesAtRefCoords, hexa.getNodeCount(), numRefCoords);
hexaLinearBasis.getValues(hexaLinearBasisValuesAtRefCoords, refPoints);
DeviceSpaceType().fence();
for(ordinal_type i=0; i<numElems; ++i)
for(ordinal_type d=0; d<dim; ++d)
for(ordinal_type j=0; j<numRefCoords; ++j)
Expand Down Expand Up @@ -460,6 +461,7 @@ int ConvergenceHex(const bool verbose) {
hexLinearBasis.getValues(hexLinearBasisValuesAtEvalPoints, Kokkos::subview(evaluationPoints,i,Kokkos::ALL(),Kokkos::ALL()));
if(numGradPoints>0)
hexLinearBasis.getValues(hexLinearBasisValuesAtEvalGradPoints, Kokkos::subview(evaluationGradPoints,i,Kokkos::ALL(),Kokkos::ALL()));
DeviceSpaceType().fence();
for(ordinal_type d=0; d<dim; ++d) {
for(std::size_t k=0; k<hexa.getNodeCount(); ++k) {
for(ordinal_type j=0; j<numPoints; ++j)
Expand Down Expand Up @@ -610,6 +612,7 @@ int ConvergenceHex(const bool verbose) {
Basis_HGRAD_HEX_C1_FEM<DeviceSpaceType,ValueType,ValueType> hexaLinearBasis; //used for computing physical coordinates
DynRankView ConstructWithLabel(hexaLinearBasisValuesAtRefCoords, hexa.getNodeCount(), numRefCoords);
hexaLinearBasis.getValues(hexaLinearBasisValuesAtRefCoords, refPoints);
DeviceSpaceType().fence();
for(ordinal_type i=0; i<numElems; ++i)
for(ordinal_type d=0; d<dim; ++d)
for(ordinal_type j=0; j<numRefCoords; ++j)
Expand Down Expand Up @@ -663,6 +666,7 @@ int ConvergenceHex(const bool verbose) {
for(ordinal_type i=0; i<numElems; ++i) {
hexLinearBasis.getValues(hexLinearBasisValuesAtEvalPoints, Kokkos::subview(evaluationPoints,i,Kokkos::ALL(),Kokkos::ALL()));
hexLinearBasis.getValues(hexLinearBasisValuesAtEvalCurlPoints, Kokkos::subview(evaluationCurlPoints,i,Kokkos::ALL(),Kokkos::ALL()));
DeviceSpaceType().fence();
for(ordinal_type d=0; d<dim; ++d) {
for(std::size_t k=0; k<hexa.getNodeCount(); ++k) {
for(ordinal_type j=0; j<numPoints; ++j)
Expand Down Expand Up @@ -829,6 +833,7 @@ int ConvergenceHex(const bool verbose) {
Basis_HGRAD_HEX_C1_FEM<DeviceSpaceType,ValueType,ValueType> hexaLinearBasis; //used for computing physical coordinates
DynRankView ConstructWithLabel(hexaLinearBasisValuesAtRefCoords, hexa.getNodeCount(), numRefCoords);
hexaLinearBasis.getValues(hexaLinearBasisValuesAtRefCoords, refPoints);
DeviceSpaceType().fence();
for(ordinal_type i=0; i<numElems; ++i)
for(ordinal_type d=0; d<dim; ++d)
for(ordinal_type j=0; j<numRefCoords; ++j)
Expand Down Expand Up @@ -881,6 +886,7 @@ int ConvergenceHex(const bool verbose) {
for(ordinal_type i=0; i<numElems; ++i) {
hexLinearBasis.getValues(hexLinearBasisValuesAtEvalPoints, Kokkos::subview(evaluationPoints,i,Kokkos::ALL(),Kokkos::ALL()));
hexLinearBasis.getValues(hexLinearBasisValuesAtEvalDivPoints, Kokkos::subview(evaluationDivPoints,i,Kokkos::ALL(),Kokkos::ALL()));
DeviceSpaceType().fence();
for(ordinal_type d=0; d<dim; ++d) {
for(std::size_t k=0; k<hexa.getNodeCount(); ++k) {
for(ordinal_type j=0; j<numPoints; ++j)
Expand Down Expand Up @@ -1043,6 +1049,7 @@ int ConvergenceHex(const bool verbose) {
Basis_HGRAD_HEX_C1_FEM<DeviceSpaceType,ValueType,ValueType> hexaLinearBasis; //used for computing physical coordinates
DynRankView ConstructWithLabel(hexaLinearBasisValuesAtRefCoords, hexa.getNodeCount(), numRefCoords);
hexaLinearBasis.getValues(hexaLinearBasisValuesAtRefCoords, refPoints);
DeviceSpaceType().fence();
for(ordinal_type i=0; i<numElems; ++i)
for(ordinal_type d=0; d<dim; ++d)
for(ordinal_type j=0; j<numRefCoords; ++j)
Expand Down Expand Up @@ -1085,6 +1092,7 @@ int ConvergenceHex(const bool verbose) {

for(ordinal_type i=0; i<numElems; ++i) {
hexLinearBasis.getValues(hexLinearBasisValuesAtEvalPoints, Kokkos::subview(evaluationPoints,i,Kokkos::ALL(),Kokkos::ALL()));
DeviceSpaceType().fence();
for(ordinal_type d=0; d<dim; ++d) {
for(std::size_t k=0; k<hexa.getNodeCount(); ++k) {
for(ordinal_type j=0; j<numPoints; ++j)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,7 @@ int ConvergenceQuad(const bool verbose) {
Basis_HGRAD_QUAD_C1_FEM<DeviceSpaceType,ValueType,ValueType> quadLinearBasis; //used for computing physical coordinates
DynRankView ConstructWithLabel(quadLinearBasisValuesAtRefCoords, quad.getNodeCount(), numRefCoords);
quadLinearBasis.getValues(quadLinearBasisValuesAtRefCoords, refPoints);
DeviceSpaceType().fence();
for(ordinal_type i=0; i<numElems; ++i)
for(ordinal_type d=0; d<dim; ++d)
for(ordinal_type j=0; j<numRefCoords; ++j)
Expand Down Expand Up @@ -419,6 +420,7 @@ int ConvergenceQuad(const bool verbose) {
quadLinearBasis.getValues(quadLinearBasisValuesAtEvalPoints, Kokkos::subview(evaluationPoints,i,Kokkos::ALL(),Kokkos::ALL()));
if(numGradPoints>0)
quadLinearBasis.getValues(quadLinearBasisValuesAtEvalGradPoints, Kokkos::subview(evaluationGradPoints,i,Kokkos::ALL(),Kokkos::ALL()));
DeviceSpaceType().fence();
for(ordinal_type d=0; d<dim; ++d) {
for(std::size_t k=0; k<quad.getNodeCount(); ++k) {
for(ordinal_type j=0; j<numPoints; ++j)
Expand Down Expand Up @@ -569,6 +571,7 @@ int ConvergenceQuad(const bool verbose) {
Basis_HGRAD_QUAD_C1_FEM<DeviceSpaceType,ValueType,ValueType> quadLinearBasis; //used for computing physical coordinates
DynRankView ConstructWithLabel(quadLinearBasisValuesAtRefCoords, quad.getNodeCount(), numRefCoords);
quadLinearBasis.getValues(quadLinearBasisValuesAtRefCoords, refPoints);
DeviceSpaceType().fence();
for(ordinal_type i=0; i<numElems; ++i)
for(ordinal_type d=0; d<dim; ++d)
for(ordinal_type j=0; j<numRefCoords; ++j)
Expand Down Expand Up @@ -621,6 +624,7 @@ int ConvergenceQuad(const bool verbose) {
for(ordinal_type i=0; i<numElems; ++i) {
quadLinearBasis.getValues(quadLinearBasisValuesAtEvalPoints, Kokkos::subview(evaluationPoints,i,Kokkos::ALL(),Kokkos::ALL()));
quadLinearBasis.getValues(quadLinearBasisValuesAtEvalCurlPoints, Kokkos::subview(evaluationCurlPoints,i,Kokkos::ALL(),Kokkos::ALL()));
DeviceSpaceType().fence();
for(ordinal_type d=0; d<dim; ++d) {
for(std::size_t k=0; k<quad.getNodeCount(); ++k) {
for(ordinal_type j=0; j<numPoints; ++j)
Expand Down Expand Up @@ -777,6 +781,7 @@ int ConvergenceQuad(const bool verbose) {
Basis_HGRAD_QUAD_C1_FEM<DeviceSpaceType,ValueType,ValueType> quadLinearBasis; //used for computing physical coordinates
DynRankView ConstructWithLabel(quadLinearBasisValuesAtRefCoords, quad.getNodeCount(), numRefCoords);
quadLinearBasis.getValues(quadLinearBasisValuesAtRefCoords, refPoints);
DeviceSpaceType().fence();
for(ordinal_type i=0; i<numElems; ++i)
for(ordinal_type d=0; d<dim; ++d)
for(ordinal_type j=0; j<numRefCoords; ++j)
Expand Down Expand Up @@ -829,6 +834,7 @@ int ConvergenceQuad(const bool verbose) {
for(ordinal_type i=0; i<numElems; ++i) {
quadLinearBasis.getValues(quadLinearBasisValuesAtEvalPoints, Kokkos::subview(evaluationPoints,i,Kokkos::ALL(),Kokkos::ALL()));
quadLinearBasis.getValues(quadLinearBasisValuesAtEvalDivPoints, Kokkos::subview(evaluationDivPoints,i,Kokkos::ALL(),Kokkos::ALL()));
DeviceSpaceType().fence();
for(ordinal_type d=0; d<dim; ++d) {
for(std::size_t k=0; k<quad.getNodeCount(); ++k) {
for(ordinal_type j=0; j<numPoints; ++j)
Expand Down Expand Up @@ -989,6 +995,7 @@ int ConvergenceQuad(const bool verbose) {
Basis_HGRAD_QUAD_C1_FEM<DeviceSpaceType,ValueType,ValueType> quadLinearBasis; //used for computing physical coordinates
DynRankView ConstructWithLabel(quadLinearBasisValuesAtRefCoords, quad.getNodeCount(), numRefCoords);
quadLinearBasis.getValues(quadLinearBasisValuesAtRefCoords, refPoints);
DeviceSpaceType().fence();
for(ordinal_type i=0; i<numElems; ++i)
for(ordinal_type d=0; d<dim; ++d)
for(ordinal_type j=0; j<numRefCoords; ++j)
Expand Down Expand Up @@ -1031,6 +1038,7 @@ int ConvergenceQuad(const bool verbose) {

for(ordinal_type i=0; i<numElems; ++i) {
quadLinearBasis.getValues(quadLinearBasisValuesAtEvalPoints, Kokkos::subview(evaluationPoints,i,Kokkos::ALL(),Kokkos::ALL()));
DeviceSpaceType().fence();
for(ordinal_type d=0; d<dim; ++d) {
for(std::size_t k=0; k<quad.getNodeCount(); ++k) {
for(ordinal_type j=0; j<numPoints; ++j)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,7 @@ int ConvergenceTet(const bool verbose) {
Basis_HGRAD_TET_C1_FEM<DeviceSpaceType,ValueType,ValueType> tetLinearBasis; //used for computing physical coordinates
DynRankView ConstructWithLabel(tetLinearBasisValuesAtRefCoords, tet.getNodeCount(), numRefCoords);
tetLinearBasis.getValues(tetLinearBasisValuesAtRefCoords, refPoints);
DeviceSpaceType().fence();
for(ordinal_type i=0; i<numElems; ++i)
for(ordinal_type d=0; d<dim; ++d)
for(ordinal_type j=0; j<numRefCoords; ++j)
Expand Down Expand Up @@ -489,6 +490,7 @@ int ConvergenceTet(const bool verbose) {
hexLinearBasis.getValues(hexLinearBasisValuesAtEvalPoints, Kokkos::subview(evaluationPoints,i,Kokkos::ALL(),Kokkos::ALL()));
if(numGradPoints>0)
hexLinearBasis.getValues(hexLinearBasisValuesAtEvalGradPoints, Kokkos::subview(evaluationGradPoints,i,Kokkos::ALL(),Kokkos::ALL()));
DeviceSpaceType().fence();
for(ordinal_type d=0; d<dim; ++d) {
for(std::size_t k=0; k<tet.getNodeCount(); ++k) {
for(ordinal_type j=0; j<numPoints; ++j)
Expand Down Expand Up @@ -638,6 +640,7 @@ int ConvergenceTet(const bool verbose) {
Basis_HGRAD_TET_C1_FEM<DeviceSpaceType,ValueType,ValueType> tetLinearBasis; //used for computing physical coordinates
DynRankView ConstructWithLabel(tetLinearBasisValuesAtRefCoords, tet.getNodeCount(), numRefCoords);
tetLinearBasis.getValues(tetLinearBasisValuesAtRefCoords, refPoints);
DeviceSpaceType().fence();
for(ordinal_type i=0; i<numElems; ++i)
for(ordinal_type d=0; d<dim; ++d)
for(ordinal_type j=0; j<numRefCoords; ++j)
Expand Down Expand Up @@ -691,6 +694,7 @@ int ConvergenceTet(const bool verbose) {
for(ordinal_type i=0; i<numElems; ++i) {
hexLinearBasis.getValues(hexLinearBasisValuesAtEvalPoints, Kokkos::subview(evaluationPoints,i,Kokkos::ALL(),Kokkos::ALL()));
hexLinearBasis.getValues(hexLinearBasisValuesAtEvalCurlPoints, Kokkos::subview(evaluationCurlPoints,i,Kokkos::ALL(),Kokkos::ALL()));
DeviceSpaceType().fence();
for(ordinal_type d=0; d<dim; ++d) {
for(std::size_t k=0; k<tet.getNodeCount(); ++k) {
for(ordinal_type j=0; j<numPoints; ++j)
Expand Down Expand Up @@ -854,6 +858,7 @@ int ConvergenceTet(const bool verbose) {
Basis_HGRAD_TET_C1_FEM<DeviceSpaceType,ValueType,ValueType> tetLinearBasis; //used for computing physical coordinates
DynRankView ConstructWithLabel(tetLinearBasisValuesAtRefCoords, tet.getNodeCount(), numRefCoords);
tetLinearBasis.getValues(tetLinearBasisValuesAtRefCoords, refPoints);
DeviceSpaceType().fence();
for(ordinal_type i=0; i<numElems; ++i)
for(ordinal_type d=0; d<dim; ++d)
for(ordinal_type j=0; j<numRefCoords; ++j)
Expand Down Expand Up @@ -906,6 +911,7 @@ int ConvergenceTet(const bool verbose) {
for(ordinal_type i=0; i<numElems; ++i) {
hexLinearBasis.getValues(hexLinearBasisValuesAtEvalPoints, Kokkos::subview(evaluationPoints,i,Kokkos::ALL(),Kokkos::ALL()));
hexLinearBasis.getValues(hexLinearBasisValuesAtEvalDivPoints, Kokkos::subview(evaluationDivPoints,i,Kokkos::ALL(),Kokkos::ALL()));
DeviceSpaceType().fence();
for(ordinal_type d=0; d<dim; ++d) {
for(std::size_t k=0; k<tet.getNodeCount(); ++k) {
for(ordinal_type j=0; j<numPoints; ++j)
Expand Down Expand Up @@ -1066,6 +1072,7 @@ int ConvergenceTet(const bool verbose) {
Basis_HGRAD_TET_C1_FEM<DeviceSpaceType,ValueType,ValueType> tetLinearBasis; //used for computing physical coordinates
DynRankView ConstructWithLabel(tetLinearBasisValuesAtRefCoords, tet.getNodeCount(), numRefCoords);
tetLinearBasis.getValues(tetLinearBasisValuesAtRefCoords, refPoints);
DeviceSpaceType().fence();
for(ordinal_type i=0; i<numElems; ++i)
for(ordinal_type d=0; d<dim; ++d)
for(ordinal_type j=0; j<numRefCoords; ++j)
Expand Down Expand Up @@ -1108,6 +1115,7 @@ int ConvergenceTet(const bool verbose) {

for(ordinal_type i=0; i<numElems; ++i) {
hexLinearBasis.getValues(hexLinearBasisValuesAtEvalPoints, Kokkos::subview(evaluationPoints,i,Kokkos::ALL(),Kokkos::ALL()));
DeviceSpaceType().fence();
for(ordinal_type d=0; d<dim; ++d) {
for(std::size_t k=0; k<tet.getNodeCount(); ++k) {
for(ordinal_type j=0; j<numPoints; ++j)
Expand Down
Loading

0 comments on commit 0a7cf78

Please sign in to comment.