diff --git a/library/src/data_types.cpp b/library/src/data_types.cpp index b17de958..9b69b9f1 100644 --- a/library/src/data_types.cpp +++ b/library/src/data_types.cpp @@ -350,6 +350,111 @@ namespace hiptensor return "HIP_TYPE_NONE"; } } + + std::string opTypeToString(hiptensorOperator_t opType) + { + if(opType == HIPTENSOR_OP_IDENTITY) + { + return "HIPTENSOR_OP_IDENTITY"; + } + else if(opType == HIPTENSOR_OP_SQRT) + { + return "HIPTENSOR_OP_SQRT"; + } + else if(opType == HIPTENSOR_OP_ADD) + { + return "HIPTENSOR_OP_ADD"; + } + else if(opType == HIPTENSOR_OP_MUL) + { + return "HIPTENSOR_OP_MUL"; + } + else if(opType == HIPTENSOR_OP_MAX) + { + return "HIPTENSOR_OP_MAX"; + } + else if(opType == HIPTENSOR_OP_MIN) + { + return "HIPTENSOR_OP_MIN"; + } + else + { + return "HIPTENSOR_OP_UNKNOWN"; + } + } + + std::string algoTypeToString(hiptensorAlgo_t algoType) + { + if(algoType == HIPTENSOR_ALGO_ACTOR_CRITIC) + { + return "HIPTENSOR_ALGO_ACTOR_CRITIC"; + } + else if(algoType == HIPTENSOR_ALGO_DEFAULT) + { + return "HIPTENSOR_ALGO_DEFAULT"; + } + else if(algoType == HIPTENSOR_ALGO_DEFAULT_PATIENT) + { + return "HIPTENSOR_ALGO_DEFAULT_PATIENT"; + } + else + { + return "HIPTENSOR_ALGO_UNKNOWN"; + } + } + + std::string logLevelToString(hiptensorLogLevel_t logLevel) + { + if(logLevel == HIPTENSOR_LOG_LEVEL_OFF) + { + return "HIPTENSOR_LOG_LEVEL_OFF"; + } + else if(logLevel == HIPTENSOR_LOG_LEVEL_ERROR) + { + return "HIPTENSOR_LOG_LEVEL_ERROR"; + } + else if(logLevel == HIPTENSOR_LOG_LEVEL_PERF_TRACE) + { + return "HIPTENSOR_LOG_LEVEL_PERF_TRACE"; + } + else if(logLevel == HIPTENSOR_LOG_LEVEL_PERF_HINT) + { + return "HIPTENSOR_LOG_LEVEL_PERF_HINT"; + } + else if(logLevel == HIPTENSOR_LOG_LEVEL_HEURISTICS_TRACE) + { + return "HIPTENSOR_LOG_LEVEL_HEURISTICS_TRACE"; + } + else if(logLevel == HIPTENSOR_LOG_LEVEL_API_TRACE) + { + return "HIPTENSOR_LOG_LEVEL_API_TRACE"; + } + else + { + return "HIPTENSOR_LOG_LEVEL_UNKNOWN"; + } + } + + std::string workSizePrefToString(hiptensorWorksizePreference_t workSize) + { + if(workSize == HIPTENSOR_WORKSPACE_MIN) + { + return "HIPTENSOR_WORKSPACE_MIN"; + } + else if(workSize == HIPTENSOR_WORKSPACE_RECOMMENDED) + { + return "HIPTENSOR_WORKSPACE_RECOMMENDED"; + } + else if(workSize == HIPTENSOR_WORKSPACE_MAX) + { + return "HIPTENSOR_WORKSPACE_MAX"; + } + else + { + return "HIPTENSOR_WORKSPACE_UNKNOWN"; + } + } + } // namespace hiptensor bool operator==(hipDataType hipType, hiptensorComputeType_t computeType) diff --git a/library/src/include/data_types.hpp b/library/src/include/data_types.hpp index 59e70f2a..452fda8f 100644 --- a/library/src/include/data_types.hpp +++ b/library/src/include/data_types.hpp @@ -110,6 +110,10 @@ namespace hiptensor std::string computeTypeToString(hiptensorComputeType_t computeType); std::string hipTypeToString(hipDataType hipType); + std::string opTypeToString(hiptensorOperator_t opType); + std::string algoTypeToString(hiptensorAlgo_t algoType); + std::string logLevelToString(hiptensorLogLevel_t); + std::string workSizePrefToString(hiptensorWorksizePreference_t workSize); } // namespace hiptensor bool operator==(hipDataType hipType, hiptensorComputeType_t computeType); diff --git a/test/01_contraction/contraction_test.cpp b/test/01_contraction/contraction_test.cpp index ad072120..025f4e21 100644 --- a/test/01_contraction/contraction_test.cpp +++ b/test/01_contraction/contraction_test.cpp @@ -78,6 +78,8 @@ namespace hiptensor mRunFlag = true; mValidationResult = false; mMaxRelativeError = 0.0; + + mElapsedTimeMs = mTotalGFlops = mMeasuredTFlopsPerSec = mTotalBytes = 0.0; } ContractionResource* ContractionTest::getResource() const @@ -85,6 +87,93 @@ namespace hiptensor return DataStorage::instance().get(); } + std::ostream& ContractionTest::printHeader(std::ostream& stream /* = std::cout */) const + { + return stream << "TypeA, TypeB, TypeC, " + << "TypeD, TypeCompute, " + << "Algorithm, Operator, " + << "WorkSizePreference, LogLevel, " + << "Lengths, Strides, Modes, Alpha," + << "Beta, elapsedMs, " + << "Problem Size(GFlops), " + << "TFlops/s, " + << "TotalBytes, " + << "Result" << std::endl; + } + + std::ostream& ContractionTest::printKernel(std::ostream& stream) const + { + auto param = Base::GetParam(); + auto testType = std::get<0>(param); + auto algorithm = std::get<1>(param); + auto operatorType = std::get<2>(param); + auto workSizePref = std::get<3>(param); + auto logLevel = std::get<4>(param); + auto lengths = std::get<5>(param); + auto strides = std::get<6>(param); + auto modes = std::get<7>(param); + auto alpha = std::get<8>(param); + auto beta = std::get<9>(param); + + stream << hipTypeToString(testType[0]) << ", " << hipTypeToString(testType[1]) << ", " << hipTypeToString(testType[2]) << ", " + << hipTypeToString(testType[3]) << ", " << computeTypeToString(convertToComputeType(testType[4])) << ", " << algoTypeToString(algorithm) << ", " + << opTypeToString(operatorType) << ", " << workSizePrefToString(workSizePref) << ", " << logLevelToString(logLevel) << ", ["; + + for(int i = 0; i < lengths.size(); i++) { + stream << "[" ; + for(int j = 0; j < lengths[i].size(); j++) { + stream << lengths[i][j] << ", "; + } + stream << "], "; + } + stream << "], ["; + + if(!strides.empty()) { + for(int i = 0; i < strides.size(); i++) { + stream << "[" ; + for(int j = 0; j < strides[i].size(); j++) { + stream << strides[i][j] << ", "; + } + stream << "], "; + } + } + stream << "], ["; + + if(!modes.empty()) { + for(int i = 0; i < modes.size(); i++) { + stream << "[" ; + for(int j = 0; j < modes[i].size(); j++) { + stream << modes[i][j] << ", "; + } + stream << "],"; + } + } + stream << "], " << alpha << "," << beta << ", "; + + if(!mRunFlag) + { + stream << "n/a" + << ", " + << "n/a" + << ", " + << "n/a" + << ", " + << "n/a" + << ", " + << "SKIPPED" << std::endl; + } + else + { + + stream << mElapsedTimeMs << ", " << mTotalGFlops << ", " << mMeasuredTFlopsPerSec + << ", " << mTotalBytes << ", " + <<((bool)mValidationResult ? "PASSED" : "FAILED") + << std::endl; + } + + return stream; + } + void ContractionTest::SetUp() { // reset API log buffer @@ -413,20 +502,21 @@ namespace hiptensor void ContractionTest::reportResults(std::ostream& stream, hipDataType DDataType, hiptensorComputeType_t computeType, + bool omitHeader, bool omitSkipped, bool omitFailed, bool omitPassed) const { + if(!omitHeader) + { + printHeader(stream); + } + // Conditionally print outputs if((mRunFlag || !omitSkipped) && (mValidationResult || !omitFailed) && (!mValidationResult || !omitPassed)) { - if(mPrintTypes) - { - ContractionTest::sAPILogBuff - << "TypeA/B/C/D: " << hipTypeToString(DDataType) - << ", ComputeType: " << computeTypeToString(computeType) << std::endl; - } + printKernel(stream); stream << ContractionTest::sAPILogBuff.str(); @@ -658,6 +748,11 @@ namespace hiptensor auto resource = getResource(); + hipEvent_t startEvent, stopEvent; + CHECK_HIP_ERROR(hipEventCreate(&startEvent)); + CHECK_HIP_ERROR(hipEventCreate(&stopEvent)); + CHECK_HIP_ERROR(hipEventRecord(startEvent)); + CHECK_HIPTENSOR_ERROR(hiptensorContraction(handle, &plan, (void*)&alphaBuf, @@ -670,6 +765,54 @@ namespace hiptensor worksize, 0 /* stream */)); + CHECK_HIP_ERROR(hipEventRecord(stopEvent)); + CHECK_HIP_ERROR(hipEventSynchronize(stopEvent)) + + auto timeMs = 0.0f; + CHECK_HIP_ERROR(hipEventElapsedTime(&timeMs, startEvent, stopEvent)); + + size_t totalLength = std::accumulate(d_ms_ns.mLengths.begin(), + d_ms_ns.mLengths.end(), + size_t(1), + std::multiplies()); + + uint32_t hops = desc.mTensorMode[2].size() / 2; + auto iter = std::find(desc.mTensorMode[0].cbegin(), desc.mTensorMode[0].cend(), desc.mTensorMode[2][desc.mTensorMode[2].size() - 1]); + if(iter != desc.mTensorMode[0].cend()) + { + auto offset = std::distance(desc.mTensorMode[0].cbegin(), iter); + totalLength *= std::accumulate(a_ms_ks.mLengths.begin() + offset, + a_ms_ks.mLengths.begin() + offset + hops, + size_t(1), + std::multiplies()); + } + + mElapsedTimeMs = float64_t(timeMs); + mTotalGFlops = 2.0 * totalLength; + mMeasuredTFlopsPerSec = mTotalGFlops / mElapsedTimeMs; + + size_t sizeA = std::accumulate(a_ms_ks.mLengths.begin(), + a_ms_ks.mLengths.end(), + hipDataTypeSize(ADataType), + std::multiplies()); + + size_t sizeB = std::accumulate(b_ns_ks.mLengths.begin(), + b_ns_ks.mLengths.end(), + hipDataTypeSize(BDataType), + std::multiplies()); + + size_t sizeD = std::accumulate(d_ms_ns.mLengths.begin(), + d_ms_ns.mLengths.end(), + hipDataTypeSize(DDataType), + std::multiplies()); + + mTotalBytes = sizeA + sizeB + sizeD; + mTotalBytes += (betaBuf.mReal != 0.0) ? sizeD : 0; + mTotalBytes /= (1e9 * mElapsedTimeMs); + + CHECK_HIP_ERROR(hipEventDestroy(startEvent)); + CHECK_HIP_ERROR(hipEventDestroy(stopEvent)); + auto& testOptions = HiptensorOptions::instance(); if(testOptions->performValidation()) @@ -699,12 +842,7 @@ namespace hiptensor DDataType, workspace)); - size_t elementsCD = std::accumulate(d_ms_ns.mLengths.begin(), - d_ms_ns.mLengths.end(), - size_t{1}, - std::multiplies()); - int sizeD = elementsCD * hipDataTypeSize(DDataType); auto reference = resource->allocDevice(sizeD); resource->copyData(reference, resource->hostD(), sizeD); @@ -715,6 +853,47 @@ namespace hiptensor size_t{1}, std::multiplies()); + + size_t elementsCD = sizeD / hipDataTypeSize(ADataType); + + if(DDataType == HIP_R_16F) + { + std::tie(mValidationResult, mMaxRelativeError) + = compareEqualLaunchKernel<_Float16>((_Float16*)resource->deviceD().get(), + (_Float16*)reference.get(), + elementsCD, + computeType, + tolerance); + } + else if(DDataType == HIP_R_16BF) + { + std::tie(mValidationResult, mMaxRelativeError) + = compareEqualLaunchKernel( + (hip_bfloat16*)resource->deviceD().get(), + (hip_bfloat16*)reference.get(), + elementsCD, + computeType, + tolerance); + } + else if(DDataType == HIP_R_32F || DDataType == HIP_C_32F) + { + std::tie(mValidationResult, mMaxRelativeError) + = compareEqualLaunchKernel((float*)resource->deviceD().get(), + (float*)reference.get(), + elementsCD, + computeType, + tolerance); + } + else if(DDataType == HIP_R_64F || DDataType == HIP_C_64F) + { + std::tie(mValidationResult, mMaxRelativeError) + = compareEqualLaunchKernel((double*)resource->deviceD().get(), + (double*)reference.get(), + elementsCD, + computeType, + tolerance); + } + auto eps = getEpsilon(computeType == HIPTENSOR_COMPUTE_64F ? HIPTENSOR_COMPUTE_64F : HIPTENSOR_COMPUTE_32F); double tolerance = 2 * nelems_k * eps; @@ -780,6 +959,7 @@ namespace hiptensor reportResults(std::cout, DDataType, computeType, + false, loggingOptions->omitSkipped(), loggingOptions->omitFailed(), loggingOptions->omitPassed()); @@ -790,6 +970,7 @@ namespace hiptensor reportResults(loggingOptions->ostream().fstream(), DDataType, computeType, + false, loggingOptions->omitSkipped(), loggingOptions->omitFailed(), loggingOptions->omitPassed()); diff --git a/test/01_contraction/contraction_test.hpp b/test/01_contraction/contraction_test.hpp index a192293a..d7e054b3 100644 --- a/test/01_contraction/contraction_test.hpp +++ b/test/01_contraction/contraction_test.hpp @@ -90,6 +90,9 @@ namespace hiptensor ContractionResource* getResource() const; + std::ostream& printHeader(std::ostream& stream) const; + std::ostream& printKernel(std::ostream& stream) const; + void SetUp() final; void TearDown() final; @@ -99,6 +102,7 @@ namespace hiptensor void reportResults(std::ostream& stream, hipDataType DDataType, hiptensorComputeType_t computeType, + bool omitHeader, bool omitSkipped, bool omitFailed, bool omitPassed) const; @@ -124,6 +128,9 @@ namespace hiptensor // Output buffer static std::stringstream sAPILogBuff; + + // Performance + float64_t mElapsedTimeMs, mTotalGFlops, mMeasuredTFlopsPerSec, mTotalBytes; }; } // namespace hiptensor diff --git a/test/02_permutation/permutation_test.cpp b/test/02_permutation/permutation_test.cpp index e3a06b1b..18292de9 100644 --- a/test/02_permutation/permutation_test.cpp +++ b/test/02_permutation/permutation_test.cpp @@ -72,6 +72,69 @@ namespace hiptensor mRunFlag = true; mValidationResult = false; mMaxRelativeError = 0.0; + + mElapsedTimeMs = mTotalGFlops = mMeasuredTFlopsPerSec = mTotalBytes = 0.0; + } + + std::ostream& PermutationTest::printHeader(std::ostream& stream /* = std::cout */) const + { + return stream << "TypeIn, TypeCompute, " + << "Operators , LogLevel, " + << "Lengths, PermutedOrder, " + << "Alpha, elapsedMs, " + << "Problem Size(GFlops), " + << "TFlops/s, " + << "TotalBytes, " + << "Result" << std::endl; + } + + std::ostream& PermutationTest::printKernel(std::ostream& stream) const + { + auto param = Base::GetParam(); + auto testType = std::get<0>(param); + auto logLevel = std::get<1>(param); + auto lengths = std::get<2>(param); + auto permutedDims = std::get<3>(param); + auto alpha = std::get<4>(param); + auto operators = std::get<5>(param); + + stream << hipTypeToString(testType[0]) << ", " << computeTypeToString(convertToComputeType(testType[1])) << ", " << opTypeToString(operators[0]) << ", " + << opTypeToString(operators[1]) << ", " << logLevelToString(logLevel) << ", ["; + + for(int i = 0; i < lengths.size(); i++) { + stream << lengths[i] << ", "; + } + stream << "], ["; + + if(!permutedDims.empty()) { + for(int i = 0; i < permutedDims.size(); i++) { + stream << permutedDims[i] << ", "; + } + } + stream << "], " << alpha << ", "; + + if(!mRunFlag) + { + stream << "n/a" + << ", " + << "n/a" + << ", " + << "n/a" + << ", " + << "n/a" + << ", " + << "SKIPPED" << std::endl; + } + else + { + + stream << mElapsedTimeMs << ", " << mTotalGFlops << ", " << mMeasuredTFlopsPerSec + << ", " << mTotalBytes << ", " + <<((bool)mValidationResult ? "PASSED" : "FAILED") + << std::endl; + } + + return stream; } PermutationResource* PermutationTest::getResource() const @@ -120,16 +183,24 @@ namespace hiptensor void PermutationTest::reportResults(std::ostream& stream, hipDataType dataType, + bool omitHeader, bool omitSkipped, bool omitFailed, bool omitPassed) const { + if(!omitHeader) + { + printHeader(stream); + } + // Conditionally print outputs if((mRunFlag || !omitSkipped) && (mValidationResult || !omitFailed) && (!mValidationResult || !omitPassed)) { stream << PermutationTest::sAPILogBuff.str(); + printKernel(stream); + if(mPrintElements) { auto resource = getResource(); @@ -240,6 +311,12 @@ namespace hiptensor { *(reinterpret_cast(&alphaValue)) = static_cast(alpha); } + + hipEvent_t startEvent, stopEvent; + CHECK_HIP_ERROR(hipEventCreate(&startEvent)); + CHECK_HIP_ERROR(hipEventCreate(&stopEvent)); + CHECK_HIP_ERROR(hipEventRecord(startEvent)); + CHECK_HIPTENSOR_ERROR(hiptensorPermutation(handle, &alphaValue, resource->deviceA().get(), @@ -250,6 +327,35 @@ namespace hiptensor modeB.data(), computeDataType, 0 /* stream */)); + + CHECK_HIP_ERROR(hipEventRecord(stopEvent)); + CHECK_HIP_ERROR(hipEventSynchronize(stopEvent)) + + auto timeMs = 0.0f; + CHECK_HIP_ERROR(hipEventElapsedTime(&timeMs, startEvent, stopEvent)); + + size_t sizeA = std::accumulate(extentA.begin(), + extentA.end(), + hipDataTypeSize(abDataType), + std::multiplies()); + + size_t sizeB = std::accumulate(extentB.begin(), + extentB.end(), + hipDataTypeSize(abDataType), + std::multiplies()); + + mElapsedTimeMs = float64_t(timeMs); + mTotalGFlops = 2.0 * ((sizeA * sizeB) / hipDataTypeSize(abDataType)); + mMeasuredTFlopsPerSec = mTotalGFlops / mElapsedTimeMs; + + mTotalBytes = sizeA + sizeB; + mTotalBytes /= (1e9 * mElapsedTimeMs); + + CHECK_HIP_ERROR(hipEventDestroy(startEvent)); + CHECK_HIP_ERROR(hipEventDestroy(stopEvent)); + + resource->copyBToHost(); + auto& testOptions = HiptensorOptions::instance(); if(testOptions->performValidation()) @@ -311,6 +417,7 @@ namespace hiptensor { reportResults(std::cout, abDataType, + false, loggingOptions->omitSkipped(), loggingOptions->omitFailed(), loggingOptions->omitPassed()); @@ -320,6 +427,7 @@ namespace hiptensor { reportResults(loggingOptions->ostream().fstream(), abDataType, + false, loggingOptions->omitSkipped(), loggingOptions->omitFailed(), loggingOptions->omitPassed()); diff --git a/test/02_permutation/permutation_test.hpp b/test/02_permutation/permutation_test.hpp index f2a14b71..79505b57 100644 --- a/test/02_permutation/permutation_test.hpp +++ b/test/02_permutation/permutation_test.hpp @@ -71,6 +71,9 @@ namespace hiptensor bool checkSizes() const; void reset(); + std::ostream& printHeader(std::ostream& stream) const; + std::ostream& printKernel(std::ostream& stream) const; + PermutationResource* getResource() const; void SetUp() final; @@ -81,6 +84,7 @@ namespace hiptensor void reportResults(std::ostream& stream, hipDataType DDataType, + bool omitHeader, bool omitSkipped, bool omitFailed, bool omitPassed) const; @@ -100,6 +104,9 @@ namespace hiptensor // Output buffer static std::stringstream sAPILogBuff; + + // Performance + float64_t mElapsedTimeMs, mTotalGFlops, mMeasuredTFlopsPerSec, mTotalBytes; }; } // namespace hiptensor diff --git a/test/03_reduction/reduction_test.cpp b/test/03_reduction/reduction_test.cpp index 33cf5087..618b8bed 100644 --- a/test/03_reduction/reduction_test.cpp +++ b/test/03_reduction/reduction_test.cpp @@ -101,6 +101,8 @@ namespace hiptensor mRunFlag = true; mValidationResult = false; mMaxRelativeError = 0.0; + + mElapsedTimeMs = mTotalGFlops = mMeasuredTFlopsPerSec = mTotalBytes = 0.0; } ReductionResource* ReductionTest::getResource() const @@ -108,6 +110,67 @@ namespace hiptensor return DataStorage::instance().get(); } + std::ostream& ReductionTest::printHeader(std::ostream& stream /* = std::cout */) const + { + return stream << "TypeIn, TypeCompute, " + << "Operator, LogLevel, " + << "Lengths, ReOrder, " + << "Alpha, Beta, elapsedMs, " + << "Problem Size(GFlops), " + << "TFlops/s, " + << "TotalBytes, " + << "Result" << std::endl; + } + + std::ostream& ReductionTest::printKernel(std::ostream& stream) const + { + auto param = Base::GetParam(); + auto testType = std::get<0>(param); + auto logLevel = std::get<1>(param); + auto lengths = std::get<2>(param); + auto outputDims = std::get<3>(param); + auto alpha = std::get<4>(param); + auto beta = std::get<5>(param); + auto op = std::get<6>(param); + + stream << hipTypeToString(testType[0]) << ", " << computeTypeToString(convertToComputeType(testType[1])) << ", " << opTypeToString(op) << ", " << logLevelToString(logLevel) << ", ["; + + for(int i = 0; i < lengths.size(); i++) { + stream << lengths[i] << ", "; + } + stream << "], ["; + + if(!outputDims.empty()) { + for(int i = 0; i < outputDims.size(); i++) { + stream << outputDims[i] << ", "; + } + } + stream << "], " << alpha << ", " << beta << ", "; + + if(!mRunFlag) + { + stream << "n/a" + << ", " + << "n/a" + << ", " + << "n/a" + << ", " + << "n/a" + << ", " + << "SKIPPED" << std::endl; + } + else + { + + stream << mElapsedTimeMs << ", " << mTotalGFlops << ", " << mMeasuredTFlopsPerSec + << ", " << mTotalBytes << ", " + <<((bool)mValidationResult ? "PASSED" : "FAILED") + << std::endl; + } + + return stream; + } + void ReductionTest::SetUp() { // reset API log buffer @@ -161,16 +224,24 @@ namespace hiptensor void ReductionTest::reportResults(std::ostream& stream, hipDataType dataType, + bool omitHeader, bool omitSkipped, bool omitFailed, bool omitPassed) const { + if(!omitHeader) + { + printHeader(stream); + } + // Conditionally print outputs if((mRunFlag || !omitSkipped) && (mValidationResult || !omitFailed) && (!mValidationResult || !omitPassed)) { stream << ReductionTest::sAPILogBuff.str(); + printKernel(stream); + if(mPrintElements) { auto resource = getResource(); @@ -341,6 +412,12 @@ namespace hiptensor double betaValue{}; writeVal(&alphaValue, computeDataType, {computeDataType, alpha}); writeVal(&betaValue, computeDataType, {computeDataType, beta}); + + hipEvent_t startEvent, stopEvent; + CHECK_HIP_ERROR(hipEventCreate(&startEvent)); + CHECK_HIP_ERROR(hipEventCreate(&stopEvent)); + CHECK_HIP_ERROR(hipEventRecord(startEvent)); + CHECK_HIPTENSOR_ERROR(hiptensorReduction(handle, (const void*)&alphaValue, resource->deviceA().get(), @@ -359,9 +436,37 @@ namespace hiptensor worksize, 0 /* stream */)); + CHECK_HIP_ERROR(hipEventRecord(stopEvent)); + CHECK_HIP_ERROR(hipEventSynchronize(stopEvent)) + + auto timeMs = 0.0f; + CHECK_HIP_ERROR(hipEventElapsedTime(&timeMs, startEvent, stopEvent)); + + size_t sizeA = std::accumulate(extentA.begin(), + extentA.end(), + hipDataTypeSize(acDataType), + std::multiplies()); + + size_t sizeCD = std::accumulate(extentC.begin(), + extentC.end(), + hipDataTypeSize(acDataType), + std::multiplies()); + + mElapsedTimeMs = float64_t(timeMs); + mTotalGFlops = 2.0 * ((sizeA * sizeCD) / hipDataTypeSize(acDataType)); + mMeasuredTFlopsPerSec = mTotalGFlops / mElapsedTimeMs; + + mTotalBytes = sizeA + sizeCD; + mTotalBytes += (betaValue != 0.0) ? sizeCD : 0; + mTotalBytes /= (1e9 * mElapsedTimeMs); + + CHECK_HIP_ERROR(hipEventDestroy(startEvent)); + CHECK_HIP_ERROR(hipEventDestroy(stopEvent)); + auto& testOptions = HiptensorOptions::instance(); if(testOptions->performValidation()) + { resource->copyOutputToHost(); diff --git a/test/03_reduction/reduction_test.hpp b/test/03_reduction/reduction_test.hpp index 98d45961..23309ef7 100644 --- a/test/03_reduction/reduction_test.hpp +++ b/test/03_reduction/reduction_test.hpp @@ -74,6 +74,9 @@ namespace hiptensor ReductionResource* getResource() const; + std::ostream& printHeader(std::ostream& stream) const; + std::ostream& printKernel(std::ostream& stream) const; + void SetUp() final; void TearDown() final; @@ -82,6 +85,7 @@ namespace hiptensor void reportResults(std::ostream& stream, hipDataType DDataType, + bool omitHeader, bool omitSkipped, bool omitFailed, bool omitPassed) const; @@ -101,6 +105,9 @@ namespace hiptensor // Output buffer static std::stringstream sAPILogBuff; + + // Performance + float64_t mElapsedTimeMs, mTotalGFlops, mMeasuredTFlopsPerSec, mTotalBytes; }; } // namespace hiptensor