From 074c0009ea1988fae56e3a9d2b77a93c042fae3c Mon Sep 17 00:00:00 2001 From: Cong Ma Date: Tue, 31 Oct 2023 00:23:10 +0000 Subject: [PATCH 1/5] Add CPU implementation of elementwise permutation - Add CPU implementation of elementwise permutation - permutationByCpu - Add sample of elementwise permutation GPU implementation of elementwise permutation is not done yet. --- .../src/permutation/hiptensor_permutation.cpp | 14 +- .../permutation/permutation_cpu_reference.hpp | 50 +++++ .../permutation_cpu_reference_impl.hpp | 85 +++++++++ .../simple_bilinear_contraction_f32.cpp | 9 +- .../simple_scale_contraction_f32.cpp | 9 +- samples/02_permutation/CMakeLists.txt | 34 ++++ samples/02_permutation/permutation.cpp | 179 ++++++++++++++++++ samples/CMakeLists.txt | 9 +- samples/{01_contraction => }/common.hpp | 8 +- 9 files changed, 380 insertions(+), 17 deletions(-) create mode 100644 library/src/permutation/permutation_cpu_reference.hpp create mode 100644 library/src/permutation/permutation_cpu_reference_impl.hpp create mode 100644 samples/02_permutation/CMakeLists.txt create mode 100644 samples/02_permutation/permutation.cpp rename samples/{01_contraction => }/common.hpp (93%) diff --git a/library/src/permutation/hiptensor_permutation.cpp b/library/src/permutation/hiptensor_permutation.cpp index ed4db184..493616a0 100644 --- a/library/src/permutation/hiptensor_permutation.cpp +++ b/library/src/permutation/hiptensor_permutation.cpp @@ -25,6 +25,8 @@ *******************************************************************************/ #include +#include "permutation_cpu_reference.hpp" + hiptensorStatus_t hiptensorPermutation(const hiptensorHandle_t* handle, const void* alpha, const void* A, @@ -36,5 +38,15 @@ hiptensorStatus_t hiptensorPermutation(const hiptensorHandle_t* handle const hipDataType typeScalar, const hipStream_t stream) { - return HIPTENSOR_STATUS_SUCCESS; + assert(descA->mType == HIP_R_16F || descA->mType == HIP_R_32F); + assert(descA->mType == descB->mType); + if(descA->mType == HIP_R_16F) + { + return hiptensor::detail::permuteByCpu(alpha, static_cast(A), descA, modeA, static_cast<_Float16 *>(B), descB, modeB, typeScalar); + } + else if(descA->mType == HIP_R_32F) + { + return hiptensor::detail::permuteByCpu(alpha, static_cast(A), descA, modeA, static_cast(B), descB, modeB, typeScalar); + } + return HIPTENSOR_STATUS_NOT_SUPPORTED; } diff --git a/library/src/permutation/permutation_cpu_reference.hpp b/library/src/permutation/permutation_cpu_reference.hpp new file mode 100644 index 00000000..f7e335f2 --- /dev/null +++ b/library/src/permutation/permutation_cpu_reference.hpp @@ -0,0 +1,50 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (C) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *******************************************************************************/ + +#ifndef HIPTENSOR_PERMUTATION_CPU_REFERENCE_HPP +#define HIPTENSOR_PERMUTATION_CPU_REFERENCE_HPP + +#include + +#include +namespace hiptensor +{ + namespace detail + { + template + hiptensorStatus_t permuteByCpu(const void* alpha, + const DataType* A, + const hiptensorTensorDescriptor_t* descA, + const int32_t modeA[], + DataType* B, + const hiptensorTensorDescriptor_t* descB, + const int32_t modeB[], + const hipDataType typeScalar); + } +} + +#include "permutation_cpu_reference_impl.hpp" +#endif // HIPTENSOR_PERMUTATION_CPU_REFERENCE_HPP diff --git a/library/src/permutation/permutation_cpu_reference_impl.hpp b/library/src/permutation/permutation_cpu_reference_impl.hpp new file mode 100644 index 00000000..6cc34e6b --- /dev/null +++ b/library/src/permutation/permutation_cpu_reference_impl.hpp @@ -0,0 +1,85 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (C) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *******************************************************************************/ +#ifndef HIPTENSOR_PERMUTATION_CPU_REFERENCE_IMPL_HPP +#define HIPTENSOR_PERMUTATION_CPU_REFERENCE_IMPL_HPP +#include +#include +#include + +#include "permutation_cpu_reference.hpp" +#include "types.hpp" +#include "util.hpp" + +namespace hiptensor +{ + namespace detail + { + template + hiptensorStatus_t permuteByCpu(const void* alpha, + const DataType* A, + const hiptensorTensorDescriptor_t* descA, + const int32_t modeA[], + DataType* B, + const hiptensorTensorDescriptor_t* descB, + const int32_t modeB[], + const hipDataType typeScalar) + { + const auto modeSize = descA->mLengths.size(); + assert(modeSize <= 4); + + std::unordered_map bModeToIndex; + for(int32_t index = 0; index < modeSize; index++) + { + bModeToIndex[modeB[index]] = index; + } + + auto& aLens = descA->mLengths; + // auto bStrides = descB->mStrides; // TODO descB->mStrides contains incorrect strides + auto bStrides = std::vector(modeSize, 1); + for(int i = 1; i < modeSize; i++) + { + bStrides[i] = descB->mLengths[i - 1] * bStrides[i - 1]; + } + auto bIndices = std::vector(modeSize, 0); + auto elementCount = hiptensor::elementsFromLengths(aLens); + for(int elementIndex = 0; elementIndex < elementCount; elementIndex++) + { + auto index = elementIndex; + for(int modeIndex = 0; modeIndex < modeSize; modeIndex++) + { + bIndices[bModeToIndex[modeA[modeIndex]]] = index % aLens[modeIndex]; + index /= aLens[modeIndex]; + } + auto bOffset + = std::inner_product(bIndices.begin(), bIndices.end(), bStrides.begin(), 0); + B[bOffset] = A[elementIndex]; + } + + return HIPTENSOR_STATUS_SUCCESS; + } + } +} +#endif //HIPTENSOR_PERMUTATION_CPU_REFERENCE_IMPL_HPP diff --git a/samples/01_contraction/simple_bilinear_contraction_f32.cpp b/samples/01_contraction/simple_bilinear_contraction_f32.cpp index 4781e72b..5704a59d 100644 --- a/samples/01_contraction/simple_bilinear_contraction_f32.cpp +++ b/samples/01_contraction/simple_bilinear_contraction_f32.cpp @@ -150,9 +150,12 @@ int main(int argc, char* argv[]) size_t sizeB = sizeof(BDataType) * elementsB; size_t sizeC = sizeof(CDataType) * elementsC; - ADataType* A = (ADataType*)malloc(sizeA); - BDataType* B = (BDataType*)malloc(sizeB); - CDataType* C = (CDataType*)malloc(sizeC); + ADataType* A = nullptr; + BDataType* B = nullptr; + CDataType* C = nullptr; + CHECK_HIP_ERROR(hipHostMalloc((void**)&A, sizeA)); + CHECK_HIP_ERROR(hipHostMalloc((void**)&B, sizeB)); + CHECK_HIP_ERROR(hipHostMalloc((void**)&C, sizeC)); void *A_d, *B_d, *C_d; diff --git a/samples/01_contraction/simple_scale_contraction_f32.cpp b/samples/01_contraction/simple_scale_contraction_f32.cpp index e8c45e31..c76ec370 100644 --- a/samples/01_contraction/simple_scale_contraction_f32.cpp +++ b/samples/01_contraction/simple_scale_contraction_f32.cpp @@ -147,9 +147,12 @@ int main(int argc, char* argv[]) size_t sizeB = sizeof(BDataType) * elementsB; size_t sizeD = sizeof(DDataType) * elementsD; - ADataType* A = (ADataType*)malloc(sizeA); - BDataType* B = (BDataType*)malloc(sizeB); - DDataType* D = (DDataType*)malloc(sizeD); + ADataType* A = nullptr; + BDataType* B = nullptr; + DDataType* D = nullptr; + CHECK_HIP_ERROR(hipHostMalloc((void**)&A, sizeA)); + CHECK_HIP_ERROR(hipHostMalloc((void**)&B, sizeB)); + CHECK_HIP_ERROR(hipHostMalloc((void**)&D, sizeD)); void *A_d, *B_d, *D_d; diff --git a/samples/02_permutation/CMakeLists.txt b/samples/02_permutation/CMakeLists.txt new file mode 100644 index 00000000..68857b54 --- /dev/null +++ b/samples/02_permutation/CMakeLists.txt @@ -0,0 +1,34 @@ +############################################################################### + # + # MIT License + # + # Copyright (C) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to deal + # in the Software without restriction, including without limitation the rights + # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + # copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in + # all copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + # THE SOFTWARE. + # + ############################################################################### + +# Check whether building within hiptensor context +if( CMAKE_PROJECT_NAME STREQUAL "hiptensor" ) + add_hiptensor_sample(permutation permutation.cpp) +# If building hipTensor samples as a standalone Cmake project +else() + add_executable(permutation permutation.cpp) + target_link_libraries(permutation PRIVATE hiptensor::hiptensor) +endif() diff --git a/samples/02_permutation/permutation.cpp b/samples/02_permutation/permutation.cpp new file mode 100644 index 00000000..2c95d076 --- /dev/null +++ b/samples/02_permutation/permutation.cpp @@ -0,0 +1,179 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (C) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *******************************************************************************/ +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include "common.hpp" + +int main() +{ + if(!isF32Supported()) + { + std::cout << "unsupported host device" << std::endl; + exit(EXIT_FAILURE); + } + + typedef float floatTypeA; + typedef float floatTypeC; + typedef float floatTypeCompute; + + hipDataType typeA = HIP_R_32F; + hipDataType typeC = HIP_R_32F; + hipDataType typeCompute = HIP_R_32F; + + /********************** + B_{w, h, c, n} = 1.0 * \textsl{IDENTITY}(A_{c, n, h, w}) + **********************/ + + std::vector modeA{'w', 'h', 'c', 'n'}; + std::vector modeC{'c', 'n', 'h', 'w'}; + int nmodeA = modeA.size(); + int nmodeC = modeC.size(); + + std::unordered_map extent; + extent['h'] = 2; + extent['w'] = 3; + extent['c'] = 4; + extent['n'] = 5; + + std::vector extentA; + for(auto mode : modeA) + extentA.push_back(extent[mode]); + std::vector extentC; + for(auto mode : modeC) + extentC.push_back(extent[mode]); + + /********************** + * Allocating data + **********************/ + + size_t elementsA = 1; + for(auto mode : modeA) + elementsA *= extent[mode]; + size_t elementsC = 1; + for(auto mode : modeC) + elementsC *= extent[mode]; + + size_t sizeA = sizeof(floatTypeA) * elementsA; + size_t sizeC = sizeof(floatTypeC) * elementsC; + + void *A_d, *C_d; + CHECK_HIP_ERROR(hipMalloc((void**)&A_d, sizeA)); + CHECK_HIP_ERROR(hipMalloc((void**)&C_d, sizeC)); + + floatTypeA *A, *C; + CHECK_HIP_ERROR(hipHostMalloc((void**)&A, sizeof(floatTypeA) * elementsA)); + CHECK_HIP_ERROR(hipHostMalloc((void**)&C, sizeof(floatTypeC) * elementsC)); + + for(size_t i = 0; i < elementsA; i++) + { + A[i] = (float)i; + } + + CHECK_HIP_ERROR(hipMemcpy(A_d, A, sizeA, hipMemcpyDefault)); + + hiptensorStatus_t err; + hiptensorHandle_t* handle; + CHECK_HIPTENSOR_ERROR(hiptensorCreate(&handle)); + + hiptensorTensorDescriptor_t descA; + CHECK_HIPTENSOR_ERROR(hiptensorInitTensorDescriptor( + handle, &descA, nmodeA, extentA.data(), NULL /* stride */, typeA, HIPTENSOR_OP_IDENTITY)); + + hiptensorTensorDescriptor_t descC; + CHECK_HIPTENSOR_ERROR(hiptensorInitTensorDescriptor( + handle, &descC, nmodeC, extentC.data(), NULL /* stride */, typeC, HIPTENSOR_OP_IDENTITY)); + + const floatTypeCompute one = 1.0f; + CHECK_HIPTENSOR_ERROR(hiptensorPermutation(handle, + &one, + A_d, + &descA, + modeA.data(), + C_d, + &descC, + modeC.data(), + typeCompute, + 0 /* stream */)); + +#if !NDEBUG + bool printElements = false; + bool storeElements = false; + + if(printElements || storeElements) + { + CHECK_HIP_ERROR(hipMemcpy(C, C_d, sizeC, hipMemcpyDefault)); + } + + if(printElements) + { + if(elementsA < MAX_ELEMENTS_PRINT_COUNT) + { + std::cout << "Tensor A elements:\n"; + hiptensorPrintArrayElements(std::cout, A, elementsA); + std::cout << std::endl; + } + + if(elementsC < MAX_ELEMENTS_PRINT_COUNT) + { + std::cout << "Tensor C elements:\n"; + hiptensorPrintArrayElements(std::cout, C, elementsC); + std::cout << std::endl; + } + } + + if(storeElements) + { + std::ofstream tensorA, tensorB, tensorC; + tensorA.open("tensor_A.txt"); + hiptensorPrintElementsToFile(tensorA, A, elementsA, ", "); + tensorA.close(); + + tensorC.open("tensor_C_scale_contraction_results.txt"); + hiptensorPrintElementsToFile(tensorC, C, elementsC, ", "); + tensorC.close(); + } + +#endif + + CHECK_HIPTENSOR_ERROR(hiptensorDestroy(handle)); + HIPTENSOR_FREE_HOST(A); + HIPTENSOR_FREE_HOST(C); + HIPTENSOR_FREE_DEVICE(A_d); + HIPTENSOR_FREE_DEVICE(C_d); + + std::cout << "Finished!" << std::endl; + return 0; +} diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 4914859b..ab06097b 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -40,6 +40,7 @@ if( CMAKE_PROJECT_NAME STREQUAL "hiptensor" ) target_link_libraries(${BINARY_NAME} PRIVATE hiptensor::hiptensor "-L${HIP_CLANG_ROOT}/lib" "-Wl,-rpath=$ORIGIN/../${CMAKE_INSTALL_LIBDIR}") target_include_directories(${BINARY_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} + ${PROJECT_SOURCE_DIR}/samples ${PROJECT_SOURCE_DIR}/library/include) # Build this sample under custom target @@ -51,10 +52,6 @@ if( CMAKE_PROJECT_NAME STREQUAL "hiptensor" ) COMPONENT samples ) endfunction() - - add_subdirectory(01_contraction) - - # If building hipTensor samples as a standalone Cmake project else() if(NOT CMAKE_CXX_COMPILER) @@ -81,6 +78,6 @@ else() find_package( hip REQUIRED ) message(STATUS "Build with HIP ${hip_VERSION}") - - add_subdirectory(01_contraction) endif() +add_subdirectory(01_contraction) +add_subdirectory(02_permutation) diff --git a/samples/01_contraction/common.hpp b/samples/common.hpp similarity index 93% rename from samples/01_contraction/common.hpp rename to samples/common.hpp index a7f75542..acb614c4 100644 --- a/samples/01_contraction/common.hpp +++ b/samples/common.hpp @@ -35,10 +35,10 @@ CHECK_HIP_ERROR(hipFree(ptr)); \ } -#define HIPTENSOR_FREE_HOST(ptr) \ - if(ptr != nullptr) \ - { \ - free(ptr); \ +#define HIPTENSOR_FREE_HOST(ptr) \ + if(ptr != nullptr) \ + { \ + CHECK_HIP_ERROR(hipHostFree(ptr)); \ } inline bool isF32Supported() From 60f4b900b82704da2c9043f12e672017f71f7ef1 Mon Sep 17 00:00:00 2001 From: Cong Ma Date: Wed, 1 Nov 2023 20:24:08 +0000 Subject: [PATCH 2/5] Move files used by all tests out of 01_contraction moved 01_contraction/device/common.hpp -> device/common.hpp renamed 01_contraction/common.hpp -> utils.hpp since there is already a file named common.hpp in test folder --- test/01_contraction/contraction_test.cpp | 2 +- test/01_contraction/contraction_test_params.hpp | 2 +- test/{01_contraction => }/device/common.hpp | 0 test/hiptensor_gtest_main.cpp | 2 +- test/{01_contraction/common.hpp => utils.hpp} | 6 +++--- 5 files changed, 6 insertions(+), 6 deletions(-) rename test/{01_contraction => }/device/common.hpp (100%) rename test/{01_contraction/common.hpp => utils.hpp} (98%) diff --git a/test/01_contraction/contraction_test.cpp b/test/01_contraction/contraction_test.cpp index ca67b8c1..790676cd 100644 --- a/test/01_contraction/contraction_test.cpp +++ b/test/01_contraction/contraction_test.cpp @@ -28,7 +28,7 @@ #include "../library/src/include/types.hpp" #include "llvm/hiptensor_options.hpp" -#include "common.hpp" +#include "../utils.hpp" #include "contraction_cpu_reference.hpp" #include "contraction_test.hpp" diff --git a/test/01_contraction/contraction_test_params.hpp b/test/01_contraction/contraction_test_params.hpp index 952219ac..30c99349 100644 --- a/test/01_contraction/contraction_test_params.hpp +++ b/test/01_contraction/contraction_test_params.hpp @@ -33,7 +33,7 @@ #include #include -#include "common.hpp" +#include "../utils.hpp" namespace hiptensor { diff --git a/test/01_contraction/device/common.hpp b/test/device/common.hpp similarity index 100% rename from test/01_contraction/device/common.hpp rename to test/device/common.hpp diff --git a/test/hiptensor_gtest_main.cpp b/test/hiptensor_gtest_main.cpp index 2fcaa275..2d8f1d45 100644 --- a/test/hiptensor_gtest_main.cpp +++ b/test/hiptensor_gtest_main.cpp @@ -23,7 +23,7 @@ * SOFTWARE. * *******************************************************************************/ -#include "01_contraction/common.hpp" +#include "utils.hpp" #include "llvm/hiptensor_options.hpp" #include diff --git a/test/01_contraction/common.hpp b/test/utils.hpp similarity index 98% rename from test/01_contraction/common.hpp rename to test/utils.hpp index c31de3e6..421c1a04 100644 --- a/test/01_contraction/common.hpp +++ b/test/utils.hpp @@ -24,8 +24,8 @@ * *******************************************************************************/ -#ifndef HIPTENSOR_TEST_CONTRACTION_COMMON_HPP -#define HIPTENSOR_TEST_CONTRACTION_COMMON_HPP +#ifndef HIPTENSOR_TEST_UTILS_HPP +#define HIPTENSOR_TEST_UTILS_HPP #include #include @@ -292,4 +292,4 @@ namespace std } } -#endif // HIPTENSOR_TEST_CONTRACTION_COMMON_HPP +#endif // HIPTENSOR_TEST_UTILS_HPP From bf1c7eed5ecfeb28f3f85dae62c1ed9d3dea7b69 Mon Sep 17 00:00:00 2001 From: Cong Ma Date: Thu, 2 Nov 2023 14:24:53 +0000 Subject: [PATCH 3/5] Implement permutation with CK - implemented `hiptensorPermutation` with CK - added unit tests for `hiptensorPermutation` --- .gitignore | 2 +- .../hiptensor/internal/hiptensor_utility.hpp | 5 + library/src/hiptensor.cpp | 3 +- .../src/permutation/hiptensor_permutation.cpp | 131 +++++++- .../src/permutation/permutation_ck_col.hpp | 48 +++ .../permutation/permutation_ck_col_impl.hpp | 123 +++++++ .../permutation/permutation_cpu_reference.hpp | 3 - .../permutation_cpu_reference_impl.hpp | 9 +- test/01_contraction/contraction_resource.hpp | 4 +- test/01_contraction/contraction_test.cpp | 6 +- .../contraction_test_params.hpp | 2 +- test/02_permutation/CMakeLists.txt | 35 ++ test/02_permutation/configs/test_params.yaml | 41 +++ .../permutation_column_major_test.cpp} | 32 +- test/02_permutation/permutation_resource.cpp | 161 +++++++++ test/02_permutation/permutation_resource.hpp | 104 ++++++ test/02_permutation/permutation_test.cpp | 306 ++++++++++++++++++ test/02_permutation/permutation_test.hpp | 105 ++++++ .../permutation_test_helpers.hpp | 68 ++++ .../permutation_test_params.hpp | 95 ++++++ test/CMakeLists.txt | 4 +- test/device/common.hpp | 2 +- test/llvm/CMakeLists.txt | 4 +- ...tion_config.cpp => yaml_parser_config.cpp} | 47 ++- test/utils.hpp | 45 ++- 25 files changed, 1337 insertions(+), 48 deletions(-) create mode 100644 library/src/permutation/permutation_ck_col.hpp create mode 100644 library/src/permutation/permutation_ck_col_impl.hpp create mode 100644 test/02_permutation/CMakeLists.txt create mode 100644 test/02_permutation/configs/test_params.yaml rename test/{singleton.hpp => 02_permutation/permutation_column_major_test.cpp} (74%) create mode 100644 test/02_permutation/permutation_resource.cpp create mode 100644 test/02_permutation/permutation_resource.hpp create mode 100644 test/02_permutation/permutation_test.cpp create mode 100644 test/02_permutation/permutation_test.hpp create mode 100644 test/02_permutation/permutation_test_helpers.hpp create mode 100644 test/02_permutation/permutation_test_params.hpp rename test/llvm/{yaml_parser_contraction_config.cpp => yaml_parser_config.cpp} (85%) diff --git a/.gitignore b/.gitignore index ae21cc0c..ad44a303 100644 --- a/.gitignore +++ b/.gitignore @@ -9,7 +9,7 @@ hiptensor_version.hpp hiptensor-version.hpp # Generated source file -test/01_contraction/configs/*.hpp +test/*/configs/*.hpp # Precompiled Headers *.gch diff --git a/library/include/hiptensor/internal/hiptensor_utility.hpp b/library/include/hiptensor/internal/hiptensor_utility.hpp index f2df2dd2..5fd9e331 100644 --- a/library/include/hiptensor/internal/hiptensor_utility.hpp +++ b/library/include/hiptensor/internal/hiptensor_utility.hpp @@ -131,6 +131,11 @@ namespace std return os; } + static ostream& operator<<(ostream& os, const _Float16 value) + { + os << static_cast(value); + return os; + } } #endif // HIPTENSOR_UTILITY_INTERNAL_HPP diff --git a/library/src/hiptensor.cpp b/library/src/hiptensor.cpp index 6638f398..3016ef64 100644 --- a/library/src/hiptensor.cpp +++ b/library/src/hiptensor.cpp @@ -151,7 +151,8 @@ hiptensorStatus_t hiptensorInitTensorDescriptor(const hiptensorHandle_t* han return HIPTENSOR_STATUS_NOT_INITIALIZED; } - if((lens == nullptr) || ((dataType != HIP_R_32F) && (dataType != HIP_R_64F)) + if((lens == nullptr) + || ((dataType != HIP_R_16F) && (dataType != HIP_R_32F) && (dataType != HIP_R_64F)) || unaryOp != HIPTENSOR_OP_IDENTITY) { auto errorCode = HIPTENSOR_STATUS_INVALID_VALUE; diff --git a/library/src/permutation/hiptensor_permutation.cpp b/library/src/permutation/hiptensor_permutation.cpp index 493616a0..b3d60379 100644 --- a/library/src/permutation/hiptensor_permutation.cpp +++ b/library/src/permutation/hiptensor_permutation.cpp @@ -25,7 +25,8 @@ *******************************************************************************/ #include -#include "permutation_cpu_reference.hpp" +#include "logger.hpp" +#include "permutation_ck_col.hpp" hiptensorStatus_t hiptensorPermutation(const hiptensorHandle_t* handle, const void* alpha, @@ -38,15 +39,133 @@ hiptensorStatus_t hiptensorPermutation(const hiptensorHandle_t* handle const hipDataType typeScalar, const hipStream_t stream) { - assert(descA->mType == HIP_R_16F || descA->mType == HIP_R_32F); - assert(descA->mType == descB->mType); + using hiptensor::Logger; + auto& logger = Logger::instance(); + + // Log API access + char msg[2048]; + snprintf(msg, + sizeof(msg), + "handle=%p, alpha=%p, A=%p, descA=%p, modeA=%p, B=%p, descB=%p, modeB=%p, " + "typeScalar=0x%02X, stream=%p", + handle, + alpha, + A, + descA, + modeA, + B, + descB, + modeB, + (unsigned int)typeScalar, + stream); + + logger->logAPITrace("hiptensorPermutation", msg); + + if(!handle || !alpha || !A || !descA || !modeA || !B || !descB || !modeB) + { + auto errorCode = HIPTENSOR_STATUS_NOT_INITIALIZED; + auto printErrorMessage = [&logger, errorCode](const std::string& paramName) { + char msg[512]; + snprintf(msg, + sizeof(msg), + "Initialization Error : %s = nullptr (%s)", + paramName.c_str(), + hiptensorGetErrorString(errorCode)); + logger->logError("hiptensorPermutation", msg); + }; + if(!handle) + { + printErrorMessage("handle"); + } + if(!alpha) + { + printErrorMessage("alpha"); + } + if(!A) + { + printErrorMessage("A"); + } + if(!descA) + { + printErrorMessage("descA"); + } + if(!modeA) + { + printErrorMessage("modeA"); + } + if(!B) + { + printErrorMessage("B"); + } + if(!descB) + { + printErrorMessage("descB"); + } + if(!modeB) + { + printErrorMessage("modeB"); + } + return errorCode; + } + + if(descA->mType != HIP_R_16F && descA->mType != HIP_R_32F) + { + auto errorCode = HIPTENSOR_STATUS_NOT_SUPPORTED; + snprintf(msg, + sizeof(msg), + "Unsupported Data Type Error : The supported data types of A and B are HIP_R_16F " + "and HIP_R_32F (%s)", + hiptensorGetErrorString(errorCode)); + logger->logError("hiptensorPermutation", msg); + return errorCode; + } + + if(descA->mType != descB->mType) + { + auto errorCode = HIPTENSOR_STATUS_INVALID_VALUE; + snprintf(msg, + sizeof(msg), + "Mismatched Data Type Error : Data types of A and B are not the same. (%s)", + hiptensorGetErrorString(errorCode)); + logger->logError("hiptensorPermutation", msg); + return errorCode; + } + + if(typeScalar != HIP_R_16F && typeScalar != HIP_R_32F) + { + auto errorCode = HIPTENSOR_STATUS_NOT_SUPPORTED; + snprintf(msg, + sizeof(msg), + "Unsupported Data Type Error : The supported data types of alpha are HIP_R_16F " + "and HIP_R_32F (%s)", + hiptensorGetErrorString(errorCode)); + logger->logError("hiptensorPermutation", msg); + return errorCode; + } + if(descA->mType == HIP_R_16F) { - return hiptensor::detail::permuteByCpu(alpha, static_cast(A), descA, modeA, static_cast<_Float16 *>(B), descB, modeB, typeScalar); + return hiptensor::detail::permuteByCk(alpha, + static_cast(A), + descA, + modeA, + static_cast<_Float16*>(B), + descB, + modeB, + typeScalar, + stream); } else if(descA->mType == HIP_R_32F) { - return hiptensor::detail::permuteByCpu(alpha, static_cast(A), descA, modeA, static_cast(B), descB, modeB, typeScalar); + return hiptensor::detail::permuteByCk(alpha, + static_cast(A), + descA, + modeA, + static_cast(B), + descB, + modeB, + typeScalar, + stream); } - return HIPTENSOR_STATUS_NOT_SUPPORTED; + return HIPTENSOR_STATUS_NOT_SUPPORTED; } diff --git a/library/src/permutation/permutation_ck_col.hpp b/library/src/permutation/permutation_ck_col.hpp new file mode 100644 index 00000000..600658c6 --- /dev/null +++ b/library/src/permutation/permutation_ck_col.hpp @@ -0,0 +1,48 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (C) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *******************************************************************************/ +#ifndef HIPTENSOR_PERMUTATION_CK_COL_HPP +#define HIPTENSOR_PERMUTATION_CK_COL_HPP +#include + +namespace hiptensor +{ + namespace detail + { + template + hiptensorStatus_t permuteByCk(const void* alpha, + const DataType* A, + const hiptensorTensorDescriptor_t* descA, + const int32_t modeA[], + DataType* B, + const hiptensorTensorDescriptor_t* descB, + const int32_t modeB[], + const hipDataType typeScalar); + + } +} + +#include "permutation_ck_col_impl.hpp" +#endif // HIPTENSOR_PERMUTATION_CK_COL_HPP diff --git a/library/src/permutation/permutation_ck_col_impl.hpp b/library/src/permutation/permutation_ck_col_impl.hpp new file mode 100644 index 00000000..7f16bb8a --- /dev/null +++ b/library/src/permutation/permutation_ck_col_impl.hpp @@ -0,0 +1,123 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (C) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *******************************************************************************/ +#ifndef HIPTENSOR_PERMUTATION_CK_COL_IMPL_HPP +#define HIPTENSOR_PERMUTATION_CK_COL_IMPL_HPP +#include + +#include +#include +#include + +#include "types.hpp" + +namespace hiptensor +{ + namespace detail + { + template + hiptensorStatus_t permuteByCk(const void* alpha, + const DataType* A, + const hiptensorTensorDescriptor_t* descA, + const int32_t modeA[], + DataType* B, + const hiptensorTensorDescriptor_t* descB, + const int32_t modeB[], + const hipDataType typeScalar, + const hipStream_t stream) + { + using PassThrough = ck::tensor_operation::element_wise::PassThrough; + using UnaryOp = ck::tensor_operation::element_wise::PassThrough; + using Scale = ck::tensor_operation::element_wise::Scale; + using DeviceElementwisePermuteInstance + = ck::tensor_operation::device::DeviceElementwiseImpl< + ck::Tuple, // InDataTypeTuple + ck::Tuple, // OutDataTypeTuple + PassThrough, // ElementwiseOp + UnaryOp, // UnaryOp + Scale, // Scalar + 4, // NumDim + 1, // MPerThread + ck::Sequence<1>, // InScalarPerVectorSeq + ck::Sequence<1>>; // OutScalarPerVectorSeq + + const auto modeSize = descA->mLengths.size(); + assert(modeSize == 4); + + std::unordered_map + modeToLength; // for example {'n': 1, 'c': 2, 'w': 3, 'h':0} + + for(int32_t index = 0; index < modeSize; index++) + { + modeToLength[modeA[index]] = descA->mLengths[index]; + } + + std::unordered_map bModeToStrides; + int32_t stride = 1; + bModeToStrides[modeB[0]] = stride; + for(int32_t index = 1; index < modeSize; index++) + { + stride *= modeToLength[modeB[index - 1]]; + bModeToStrides[modeB[index]] = stride; + } + + float alphaValue = readVal(alpha, typeScalar); + std::array input = {A}; + std::array output = {B}; + std::array a_strides + = {1, + modeToLength[modeA[0]], + modeToLength[modeA[0]] * modeToLength[modeA[1]], + modeToLength[modeA[0]] * modeToLength[modeA[1]] * modeToLength[modeA[2]]}; + std::array b_strides = {bModeToStrides[modeA[0]], + bModeToStrides[modeA[1]], + bModeToStrides[modeA[2]], + bModeToStrides[modeA[3]]}; + std::array ab_lengths = {modeToLength[modeA[0]], + modeToLength[modeA[1]], + modeToLength[modeA[2]], + modeToLength[modeA[3]]}; + auto broadcastPermute = DeviceElementwisePermuteInstance{}; + auto argument = broadcastPermute.MakeArgumentPointer(ab_lengths, + {a_strides}, + {b_strides}, + input, + output, + PassThrough{}, + UnaryOp{}, + Scale{alphaValue}); + + if(!broadcastPermute.IsSupportedArgument(argument.get())) + { + return HIPTENSOR_STATUS_NOT_SUPPORTED; + }; + + auto broadcastPermute_invoker_ptr = broadcastPermute.MakeInvokerPointer(); + broadcastPermute_invoker_ptr->Run(argument.get(), StreamConfig{stream, false}); + return HIPTENSOR_STATUS_SUCCESS; + } + } +} +#endif // HIPTENSOR_PERMUTATION_CK_COL_IMPL_HPP diff --git a/library/src/permutation/permutation_cpu_reference.hpp b/library/src/permutation/permutation_cpu_reference.hpp index f7e335f2..0bcff3cf 100644 --- a/library/src/permutation/permutation_cpu_reference.hpp +++ b/library/src/permutation/permutation_cpu_reference.hpp @@ -26,9 +26,6 @@ #ifndef HIPTENSOR_PERMUTATION_CPU_REFERENCE_HPP #define HIPTENSOR_PERMUTATION_CPU_REFERENCE_HPP - -#include - #include namespace hiptensor { diff --git a/library/src/permutation/permutation_cpu_reference_impl.hpp b/library/src/permutation/permutation_cpu_reference_impl.hpp index 6cc34e6b..a04e4176 100644 --- a/library/src/permutation/permutation_cpu_reference_impl.hpp +++ b/library/src/permutation/permutation_cpu_reference_impl.hpp @@ -26,8 +26,8 @@ #ifndef HIPTENSOR_PERMUTATION_CPU_REFERENCE_IMPL_HPP #define HIPTENSOR_PERMUTATION_CPU_REFERENCE_IMPL_HPP #include -#include #include +#include #include "permutation_cpu_reference.hpp" #include "types.hpp" @@ -63,8 +63,9 @@ namespace hiptensor { bStrides[i] = descB->mLengths[i - 1] * bStrides[i - 1]; } - auto bIndices = std::vector(modeSize, 0); - auto elementCount = hiptensor::elementsFromLengths(aLens); + auto bIndices = std::vector(modeSize, 0); + auto elementCount = hiptensor::elementsFromLengths(aLens); + float alphaValue = readVal(alpha, typeScalar); for(int elementIndex = 0; elementIndex < elementCount; elementIndex++) { auto index = elementIndex; @@ -75,7 +76,7 @@ namespace hiptensor } auto bOffset = std::inner_product(bIndices.begin(), bIndices.end(), bStrides.begin(), 0); - B[bOffset] = A[elementIndex]; + B[bOffset] = static_cast(A[elementIndex] * alphaValue); } return HIPTENSOR_STATUS_SUCCESS; diff --git a/test/01_contraction/contraction_resource.hpp b/test/01_contraction/contraction_resource.hpp index 88f13e88..2fd3e62c 100644 --- a/test/01_contraction/contraction_resource.hpp +++ b/test/01_contraction/contraction_resource.hpp @@ -30,8 +30,8 @@ #include #include -#include "../hip_resource.hpp" -#include "../singleton.hpp" +#include "hip_resource.hpp" +#include "singleton.hpp" // ContractionResource class is intended to manage a shared pool of resources for // testing hiptensor contraction kernels on the GPU. diff --git a/test/01_contraction/contraction_test.cpp b/test/01_contraction/contraction_test.cpp index 790676cd..4ad97610 100644 --- a/test/01_contraction/contraction_test.cpp +++ b/test/01_contraction/contraction_test.cpp @@ -25,12 +25,12 @@ *******************************************************************************/ #include -#include "../library/src/include/types.hpp" +#include "types.hpp" #include "llvm/hiptensor_options.hpp" -#include "../utils.hpp" -#include "contraction_cpu_reference.hpp" +#include "contraction/contraction_cpu_reference.hpp" #include "contraction_test.hpp" +#include "utils.hpp" namespace hiptensor { diff --git a/test/01_contraction/contraction_test_params.hpp b/test/01_contraction/contraction_test_params.hpp index 30c99349..29c4aa1b 100644 --- a/test/01_contraction/contraction_test_params.hpp +++ b/test/01_contraction/contraction_test_params.hpp @@ -33,7 +33,7 @@ #include #include -#include "../utils.hpp" +#include "utils.hpp" namespace hiptensor { diff --git a/test/02_permutation/CMakeLists.txt b/test/02_permutation/CMakeLists.txt new file mode 100644 index 00000000..4334901c --- /dev/null +++ b/test/02_permutation/CMakeLists.txt @@ -0,0 +1,35 @@ +############################################################################### + # + # MIT License + # + # Copyright (C) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to deal + # in the Software without restriction, including without limitation the rights + # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + # copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in + # all copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + # THE SOFTWARE. + # + ############################################################################### +set(PermutationCommonSources ${HIPTENSOR_COMMON_TEST_SOURCES} + ${CMAKE_CURRENT_SOURCE_DIR}/permutation_resource.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/permutation_test.cpp) + +# tests +set (PermutationTestSources ${PermutationCommonSources} + ${CMAKE_CURRENT_SOURCE_DIR}/permutation_column_major_test.cpp) +set (PermutationTestConfig ${CMAKE_CURRENT_SOURCE_DIR}/configs/test_params.yaml) +add_hiptensor_test(permutation_test ${PermutationTestConfig} ${PermutationTestSources}) + diff --git a/test/02_permutation/configs/test_params.yaml b/test/02_permutation/configs/test_params.yaml new file mode 100644 index 00000000..f6aa92e2 --- /dev/null +++ b/test/02_permutation/configs/test_params.yaml @@ -0,0 +1,41 @@ +--- +Log Level: [ HIPTENSOR_LOG_LEVEL_ERROR, HIPTENSOR_LOG_LEVEL_PERF_TRACE ] +Tensor Data Types: + - [ HIP_R_32F, HIP_R_32F] + - [ HIP_R_16F, HIP_R_16F] + - [ HIP_R_16F, HIP_R_32F] +Alphas: + - 0.0 + - 1.0 + - 2.3 +Lengths: + - [ 1, 1, 1, 1] + - [ 5, 2, 3, 4] + - [ 5, 2, 1, 1] + - [ 15, 12, 23, 11] +Permuted Dims: + - [0, 1, 2, 3] + - [0, 1, 3, 2] + - [0, 2, 1, 3] + - [0, 2, 3, 1] + - [0, 3, 1, 2] + - [0, 3, 2, 1] + - [1, 0, 2, 3] + - [1, 0, 3, 2] + - [1, 2, 0, 3] + - [1, 2, 3, 0] + - [1, 3, 0, 2] + - [1, 3, 2, 0] + - [2, 0, 1, 3] + - [2, 0, 3, 1] + - [2, 1, 0, 3] + - [2, 1, 3, 0] + - [2, 3, 0, 1] + - [2, 3, 1, 0] + - [3, 0, 1, 2] + - [3, 0, 2, 1] + - [3, 1, 0, 2] + - [3, 1, 2, 0] + - [3, 2, 0, 1] + - [3, 2, 1, 0] +... diff --git a/test/singleton.hpp b/test/02_permutation/permutation_column_major_test.cpp similarity index 74% rename from test/singleton.hpp rename to test/02_permutation/permutation_column_major_test.cpp index eedf37c1..9a4ace70 100644 --- a/test/singleton.hpp +++ b/test/02_permutation/permutation_column_major_test.cpp @@ -24,25 +24,25 @@ * *******************************************************************************/ -#ifndef HIPTENSOR_TEST_SINGLETON_HPP -#define HIPTENSOR_TEST_SINGLETON_HPP +#include +#include -#include +#include "permutation_test.hpp" +#include "permutation_test_helpers.hpp" -namespace hiptensor +class PermutationTest : public hiptensor::PermutationTest { +}; - template - class LazySingleton +TEST_P(PermutationTest, RunKernel) +{ + static bool ranWarmup = false; + if(!ranWarmup) { - public: - static inline std::unique_ptr const& instance() - { - static auto sInstance = std::make_unique(); - return sInstance; - } - }; - -} // namespace hiptensor + this->Warmup(); + ranWarmup = true; + } + this->RunKernel(); +} -#endif // HIPTENSOR_TEST_SINGLETON_HPP +INSTANTIATE_TEST_SUITE_P(PermutationTests, PermutationTest, load_config_helper()); diff --git a/test/02_permutation/permutation_resource.cpp b/test/02_permutation/permutation_resource.cpp new file mode 100644 index 00000000..4323e01d --- /dev/null +++ b/test/02_permutation/permutation_resource.cpp @@ -0,0 +1,161 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (C) 2021-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#ifndef HIPTENSOR_PERMUTATION_RESOURCE_IMPL_HPP +#define HIPTENSOR_PERMUTATION_RESOURCE_IMPL_HPP + +#include "permutation_resource.hpp" +#include "types.hpp" +#include "utils.hpp" + +namespace hiptensor +{ + + PermutationResource::PermutationResource() + : HipResource() + , mDeviceA(Base::allocDevice(0)) + , mDeviceB(Base::allocDevice(0)) + , mHostA(Base::allocHost(0)) + , mHostB(Base::allocHost(0)) + , mCurrentMatrixElement(0) + , mCurrentDataType(HIP_R_32F) + , mCurrentAllocByte(0) + { + } + + PermutationResource::PermutationResource(PermutationResource&& rhs) + : HipResource() + , mDeviceA(std::move(rhs.mDeviceA)) + , mDeviceB(std::move(rhs.mDeviceB)) + , mHostA(std::move(rhs.mHostA)) + , mHostB(std::move(rhs.mHostB)) + , mCurrentMatrixElement(rhs.mCurrentMatrixElement) + , mCurrentDataType(rhs.mCurrentDataType) + , mCurrentAllocByte(rhs.mCurrentAllocByte) + { + } + + void PermutationResource::setupStorage(ProblemDims const& dimSizes, hipDataType dataType) + { + auto requiredElementCount = getProduct(dimSizes); + auto requiredMemorySize = requiredElementCount * hipDataTypeSize(dataType); + + bool needFillData = false; + if(requiredMemorySize > mCurrentAllocByte) + { + Base::reallocDeviceHostPair(mDeviceA, mHostA, requiredMemorySize); + Base::reallocDeviceHostPair(mDeviceB, mHostB, requiredMemorySize); + Base::reallocDeviceHostPair(mDeviceReference, mHostReference, requiredMemorySize); + mCurrentAllocByte = requiredMemorySize; + needFillData = true; + } + else if(mCurrentDataType != dataType) + { + needFillData = true; + } + mCurrentMatrixElement = requiredElementCount; + mCurrentDataType = dataType; + if(needFillData) + { + fillRandToA(); + } + } + + void PermutationResource::reset() + { + Base::reallocDeviceHostPair(mDeviceA, mHostA, 0); + Base::reallocDeviceHostPair(mDeviceB, mHostB, 0); + Base::reallocDeviceHostPair(mDeviceReference, mHostReference, 0); + mCurrentMatrixElement = 0; + mCurrentDataType = HIP_R_32F; + mCurrentAllocByte = 0; + } + + void PermutationResource::fillRandToA() + { + if(mCurrentDataType == HIP_R_32F) + { + fillLaunchKernel((float*)deviceA().get(), mCurrentMatrixElement); + } + else + { + fillLaunchKernel<_Float16>((_Float16*)deviceA().get(), mCurrentMatrixElement); + } + Base::copyData(hostA(), deviceA(), getCurrentMatrixMemorySize()); + } + + void PermutationResource::copyBToHost() + { + Base::copyData(hostB(), deviceB(), getCurrentMatrixMemorySize()); + } + + void PermutationResource::copyReferenceToDevice() + { + Base::copyData(deviceReference(), hostReference(), getCurrentMatrixMemorySize()); + } + + size_t PermutationResource::getCurrentMatrixElement() const + { + return mCurrentMatrixElement; + } + + size_t PermutationResource::getCurrentMatrixMemorySize() const + { + return mCurrentMatrixElement * hipDataTypeSize(mCurrentDataType); + } + + auto PermutationResource::hostA() -> HostPtrT& + { + return mHostA; + } + + auto PermutationResource::hostB() -> HostPtrT& + { + return mHostB; + } + + auto PermutationResource::hostReference() -> HostPtrT& + { + return mHostReference; + } + + auto PermutationResource::deviceA() -> DevicePtrT& + { + return mDeviceA; + } + + auto PermutationResource::deviceB() -> DevicePtrT& + { + return mDeviceB; + } + + auto PermutationResource::deviceReference() -> DevicePtrT& + { + return mDeviceReference; + } +} // namespace hiptensor + +#endif // HIPTENSOR_PERMUTATION_RESOURCE_IMPL_HPP diff --git a/test/02_permutation/permutation_resource.hpp b/test/02_permutation/permutation_resource.hpp new file mode 100644 index 00000000..2e0d77ef --- /dev/null +++ b/test/02_permutation/permutation_resource.hpp @@ -0,0 +1,104 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (C) 2021-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#ifndef HIPTENSOR_PERMUTATION_RESOURCE_HPP +#define HIPTENSOR_PERMUTATION_RESOURCE_HPP + +#include +#include + +#include "hip_resource.hpp" +#include "singleton.hpp" + +// PermutationResource class is intended to manage a shared pool of resources for +// testing hiptensor contraction kernels on the GPU. +// +// It minimizes the memory handling overhead for launching thousands of GPU +// kernels by allowing re-use of existing memory allocations. Memory is only +// re-allocated as necessary to satisfy minimum size requirements. +// +// The interface indicates memory ownership by this class and shall only be +// used to access for read/write purposes. +// +// Currently uses HIP as the backend for device allocation. + +namespace hiptensor +{ + + struct PermutationResource : public HipResource, public LazySingleton + { + // For static initialization + friend std::unique_ptr std::make_unique(); + + using Base = HipResource; + + public: + using DevicePtrT = Base::DevicePtrT; + using HostPtrT = Base::HostPtrT; + + // N, C, W, H + using ProblemDims = std::vector; + + private: // No public instantiation except make_unique. + // No copy + PermutationResource(); + PermutationResource(const PermutationResource&) = delete; + PermutationResource& operator=(const PermutationResource&) = delete; + + public: + PermutationResource(PermutationResource&&); + virtual ~PermutationResource() = default; + + void setupStorage(ProblemDims const& dimSizes, hipDataType dataType); + void fillRandToA(); + void copyBToHost(); + void copyReferenceToDevice(); + + HostPtrT& hostA(); + HostPtrT& hostB(); + HostPtrT& hostReference(); + + DevicePtrT& deviceA(); + DevicePtrT& deviceB(); + DevicePtrT& deviceReference(); + + size_t getCurrentMatrixElement() const; + size_t getCurrentMatrixMemorySize() const; + void reset() final; + + protected: + DevicePtrT mDeviceA, mDeviceB, mDeviceReference; + HostPtrT mHostA, mHostB, mHostReference; + + size_t mCurrentMatrixElement; /**< Element count of A/B */ + hipDataType + mCurrentDataType; /**< Type size of element of A/B, only support HIP_R_16F, HIP_R_32F */ + size_t mCurrentAllocByte; /**< Allocated size of memory */ + }; + +} // namespace hiptensor + +#endif // HIPTENSOR_PERMUTATION_RESOURCE_HPP diff --git a/test/02_permutation/permutation_test.cpp b/test/02_permutation/permutation_test.cpp new file mode 100644 index 00000000..dbb52d6e --- /dev/null +++ b/test/02_permutation/permutation_test.cpp @@ -0,0 +1,306 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (C) 2021-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#include + +#include "logger.hpp" +#include "permutation/permutation_cpu_reference.hpp" +#include "permutation_test.hpp" +#include "types.hpp" +#include "utils.hpp" +#include "llvm/hiptensor_options.hpp" + +namespace hiptensor +{ + /*static*/ std::stringstream PermutationTest::sAPILogBuff = std::stringstream(); + + static void logMessage(int32_t logLevel, const char* funcName /*=""*/, const char* msg /*=""*/) + { + PermutationTest::sAPILogBuff << msg; + } + + PermutationTest::PermutationTest() + : Base() + { + reset(); + + // Handle our own outputs + hiptensorLoggerOpenFile("/dev/null"); + hiptensorLoggerSetCallback(logMessage); + } + + // Kernel run checks. Virtual as different Permutation kernels have different requirements + // True = run test + // False = skip test + bool PermutationTest::checkDevice(hipDataType datatype) const + { + return (isF32Supported() && datatype == HIP_R_32F) + || (isF64Supported() && datatype == HIP_R_64F); + } + + bool PermutationTest::checkSizes() const + { + return true; + } + + void PermutationTest::reset() + { + handle = nullptr; + + mRepeats = 1u; + mRunFlag = true; + mValidationResult = false; + mMaxRelativeError = 0.0; + } + + PermutationResource* PermutationTest::getResource() const + { + return DataStorage::instance().get(); + } + + void PermutationTest::SetUp() + { + // reset API log buffer + sAPILogBuff.str(std::string()); + + auto param = Base::GetParam(); + auto testType = std::get<0>(param); + auto logLevel = std::get<1>(param); + auto lengths = std::get<2>(param); + auto permutedDims = std::get<3>(param); + auto alpha = std::get<4>(param); + + // 4D tensors only at the moment. + EXPECT_EQ(lengths.size(), 4); // Format {'n', 'c', 'w', 'h'} + EXPECT_EQ(permutedDims.size(), 4); // permutation of {0, 1, 2, 3} + + EXPECT_EQ(testType.size(), 2); // HIP_R_16F or HIP_R_32F + auto abDataType = testType[0]; + EXPECT_TRUE((abDataType == HIP_R_16F) || (abDataType == HIP_R_32F)); + + getResource()->setupStorage(lengths, abDataType); + + // set mPrintElements to true to print element + mPrintElements = false; + } + + void PermutationTest::reportResults(std::ostream& stream, + hipDataType dataType, + bool omitSkipped, + bool omitFailed, + bool omitPassed) const + { + // Conditionally print outputs + if((mRunFlag || !omitSkipped) && (mValidationResult || !omitFailed) + && (!mValidationResult || !omitPassed)) + { + stream << PermutationTest::sAPILogBuff.str(); + + if(mPrintElements) + { + auto resource = getResource(); + + size_t elementsA = resource->getCurrentMatrixElement(); + size_t elementsB = elementsA; + + if(dataType == HIP_R_32F) + { + stream << "Tensor A elements (" << elementsA << "):\n"; + hiptensorPrintArrayElements( + stream, (float*)resource->hostA().get(), elementsA); + stream << std::endl; + + stream << "Tensor B elements (" << elementsB << "):\n"; + hiptensorPrintArrayElements( + stream, (float*)resource->hostB().get(), elementsB); + stream << std::endl; + } + else + { + stream << "Tensor A elements (" << elementsA << "):\n"; + hiptensorPrintArrayElements<_Float16>( + stream, (_Float16*)resource->hostA().get(), elementsA); + stream << std::endl; + + stream << "Tensor B elements (" << elementsB << "):\n"; + hiptensorPrintArrayElements<_Float16>( + stream, (_Float16*)resource->hostB().get(), elementsB); + stream << std::endl; + } + } + } + } + + void PermutationTest::RunKernel() + { + auto param = Base::GetParam(); + auto testType = std::get<0>(param); + auto logLevel = std::get<1>(param); + auto lengths = std::get<2>(param); + auto permutedDims = std::get<3>(param); + auto alpha = std::get<4>(param); + + auto abDataType = testType[0]; + auto computeDataType = testType[1]; + + if(!mRunFlag) + { + GTEST_SKIP(); + } + auto resource = getResource(); + + if(mRunFlag) + { + /********************** + B_{w, h, c, n} = 1.0 * \textsl{IDENTITY}(A_{c, n, h, w}) + **********************/ + + std::vector modeA{'n', 'c', 'w', 'h'}; + std::vector modeB; + for(auto dim : permutedDims) + { + modeB.push_back(modeA[dim]); + } + + int nmodeA = modeA.size(); + int nmodeB = modeB.size(); + std::unordered_map extent; + for(auto [modeIt, i] = std::tuple{modeA.begin(), 0}; modeIt != modeA.end(); + ++modeIt, ++i) + { + extent[*modeIt] = lengths[i]; + } + + std::vector extentA; + for(auto mode : modeA) + extentA.push_back(extent[mode]); + std::vector extentB; + for(auto mode : modeB) + extentB.push_back(extent[mode]); + + hiptensorStatus_t err; + hiptensorHandle_t* handle; + CHECK_HIPTENSOR_ERROR(hiptensorCreate(&handle)); + + hiptensorTensorDescriptor_t descA; + CHECK_HIPTENSOR_ERROR(hiptensorInitTensorDescriptor(handle, + &descA, + nmodeA, + extentA.data(), + NULL /* stride */, + abDataType, + HIPTENSOR_OP_IDENTITY)); + + hiptensorTensorDescriptor_t descB; + CHECK_HIPTENSOR_ERROR(hiptensorInitTensorDescriptor(handle, + &descB, + nmodeB, + extentB.data(), + NULL /* stride */, + abDataType, + HIPTENSOR_OP_IDENTITY)); + + float alphaValue{}; + if(computeDataType == HIP_R_16F) + { + *(reinterpret_cast<_Float16*>(&alphaValue)) = static_cast<_Float16>(alpha); + } + else + { + *(reinterpret_cast(&alphaValue)) = static_cast(alpha); + } + CHECK_HIPTENSOR_ERROR(hiptensorPermutation(handle, + &alphaValue, + resource->deviceA().get(), + &descA, + modeA.data(), + resource->deviceB().get(), + &descB, + modeB.data(), + computeDataType, + 0 /* stream */)); + resource->copyBToHost(); + + if(abDataType == HIP_R_32F) + { + hiptensor::detail::permuteByCpu(&alphaValue, + (const float*)resource->hostA().get(), + &descA, + modeA.data(), + (float*)resource->hostReference().get(), + &descB, + modeB.data(), + computeDataType); + resource->copyReferenceToDevice(); + std::tie(mValidationResult, mMaxRelativeError) + = compareEqualLaunchKernel((float*)resource->deviceB().get(), + (float*)resource->deviceReference().get(), + resource->getCurrentMatrixElement()); + } + else if(abDataType == HIP_R_16F) + { + hiptensor::detail::permuteByCpu(&alphaValue, + (const _Float16*)resource->hostA().get(), + &descA, + modeA.data(), + (_Float16*)resource->hostReference().get(), + &descB, + modeB.data(), + computeDataType); + resource->copyReferenceToDevice(); + std::tie(mValidationResult, mMaxRelativeError) = compareEqualLaunchKernel<_Float16>( + (_Float16*)resource->deviceB().get(), + (_Float16*)resource->deviceReference().get(), + resource->getCurrentMatrixElement()); + } + } + + EXPECT_TRUE(mValidationResult) << "Max relative error: " << mMaxRelativeError; + + using Options = hiptensor::HiptensorOptions; + auto& loggingOptions = Options::instance(); + + if(!loggingOptions->omitCout()) + { + reportResults(std::cout, + abDataType, + loggingOptions->omitSkipped(), + loggingOptions->omitFailed(), + loggingOptions->omitPassed()); + } + + if(loggingOptions->ostream().isOpen()) + { + reportResults(loggingOptions->ostream().fstream(), + abDataType, + loggingOptions->omitSkipped(), + loggingOptions->omitFailed(), + loggingOptions->omitPassed()); + } + } + + void PermutationTest::TearDown() {} + +} // namespace hiptensor diff --git a/test/02_permutation/permutation_test.hpp b/test/02_permutation/permutation_test.hpp new file mode 100644 index 00000000..9009d8ea --- /dev/null +++ b/test/02_permutation/permutation_test.hpp @@ -0,0 +1,105 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (C) 2021-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#ifndef HIPTENSOR_PERMUTATION_TEST_HPP +#define HIPTENSOR_PERMUTATION_TEST_HPP + +#include + +#include + +#include "permutation_resource.hpp" +#include "permutation_test_params.hpp" + +#include + +namespace hiptensor +{ + static void logMessage(int32_t logLevel, const char* funcName = "", const char* msg = ""); + + using PermutationTestParams_t = std::tuple; + class PermutationTest : public ::testing::TestWithParam + { + protected: // Types + using Base = ::testing::TestWithParam; + + // Shared access to Permutation storage + using DataStorage = PermutationResource; + + friend void logMessage(int32_t, const char*, const char*); + + public: + PermutationTest(); + virtual ~PermutationTest() = default; + + protected: // Functions + PermutationTest(PermutationTest&&) = delete; + PermutationTest(PermutationTest const&) = delete; + PermutationTest& operator=(PermutationTest&) = delete; + PermutationTest& operator=(PermutationTest&&) = delete; + + bool checkDevice(hipDataType datatype) const; + bool checkSizes() const; + void reset(); + + PermutationResource* getResource() const; + + void SetUp() final; + void TearDown() final; + + void Warmup() {} + void RunKernel(); + + void reportResults(std::ostream& stream, + hipDataType DDataType, + bool omitSkipped, + bool omitFailed, + bool omitPassed) const; + + protected: + // Workspace items + hiptensorHandle_t* handle = nullptr; + + hiptensorTensorDescriptor_t a_ms_ks, b_ns_ks, c_ms_ns, d_ms_ns; + + // Execution flow control + uint32_t mRepeats; + bool mRunFlag = true; + bool mValidationResult = false; + bool mPrintElements = false; + double mMaxRelativeError; + + // Output buffer + static std::stringstream sAPILogBuff; + }; + +} // namespace hiptensor + +#endif // HIPTENSOR_PERMUTATION_TEST_HPP diff --git a/test/02_permutation/permutation_test_helpers.hpp b/test/02_permutation/permutation_test_helpers.hpp new file mode 100644 index 00000000..ac34a21a --- /dev/null +++ b/test/02_permutation/permutation_test_helpers.hpp @@ -0,0 +1,68 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (C) 2021-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#ifndef HIPTENSOR_PERMUTATION_TEST_HELPERS_HPP +#define HIPTENSOR_PERMUTATION_TEST_HELPERS_HPP + +#include + +#include "llvm/hiptensor_options.hpp" +#include "llvm/yaml_parser.hpp" + +#ifdef HIPTENSOR_TEST_YAML_INCLUDE +#include HIPTENSOR_TEST_YAML_INCLUDE +#define HIPTENSOR_TEST_YAML_BUNDLE 1 +#else +#define HIPTENSOR_TEST_YAML_BUNDLE 0 +#endif // HIPTENSOR_TEST_YAML_INCLUDE + +auto inline load_config_helper() +{ + hiptensor::PermutationTestParams testParams; + using Options = hiptensor::HiptensorOptions; + auto& testOptions = Options::instance(); + + if(testOptions->usingDefaultConfig() && HIPTENSOR_TEST_YAML_BUNDLE) + { + testParams = hiptensor::YamlConfigLoader::loadFromString( + HIPTENSOR_TEST_GET_YAML); + } + else + { + testParams = hiptensor::YamlConfigLoader::loadFromFile( + testOptions->inputFilename()); + } + + // testParams.printParams(); + + return ::testing::Combine(::testing::ValuesIn(testParams.dataTypes()), + ::testing::Values(testParams.logLevelMask()), + ::testing::ValuesIn(testParams.problemLengths()), + ::testing::ValuesIn(testParams.permutedDims()), + ::testing::ValuesIn(testParams.alphas())); +} + +#endif // HIPTENSOR_PERMUTATION_TEST_HELPERS_HPP diff --git a/test/02_permutation/permutation_test_params.hpp b/test/02_permutation/permutation_test_params.hpp new file mode 100644 index 00000000..f526ad47 --- /dev/null +++ b/test/02_permutation/permutation_test_params.hpp @@ -0,0 +1,95 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (C) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *******************************************************************************/ + +#ifndef HIPTENSOR_PERMUTATION_TEST_PARAMS_HPP +#define HIPTENSOR_PERMUTATION_TEST_PARAMS_HPP + +#include +#include + +#include +#include + +#include "utils.hpp" + +namespace hiptensor +{ + + struct PermutationTestParams + { + using TestTypesT = std::vector; + using LogLevelT = hiptensorLogLevel_t; + using LengthsT = std::vector; + using AlphaT = double; + using PermutedDimsT = std::vector; + + public: + std::vector& dataTypes() + { + return mDataTypes; + } + + LogLevelT& logLevelMask() + { + return mLogLevelMask; + } + + std::vector& problemLengths() + { + return mProblemLengths; + } + + std::vector& permutedDims() + { + return mPermutedDims; + } + + std::vector& alphas() + { + return mAlphas; + } + + void printParams() + { + std::cout << "DataTypes: " << mDataTypes << "\n" + << "LogLevelMask: " << mLogLevelMask << "\n" + << "ProblemLengths: " << mProblemLengths << "\n" + << "Alphas: " << mAlphas << "\n" + << "PermutedDims: " << mPermutedDims << "\n"; + } + + private: + //Data types of input and output tensors + std::vector mDataTypes; + LogLevelT mLogLevelMask; + std::vector mProblemLengths; + std::vector mAlphas; + std::vector mPermutedDims; + }; + +} // namespace hiptensor + +#endif // HIPTENSOR_PERMUTATION_TEST_PARAMS_HPP diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index e9f791cd..06967a09 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -101,7 +101,8 @@ function(add_hiptensor_test BINARY_NAME YAML_CONFIG_FILE TEST_SOURCES) target_include_directories(${BINARY_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/library/include - ${PROJECT_SOURCE_DIR}/library/src/contraction + ${PROJECT_SOURCE_DIR}/library/src/include + ${PROJECT_SOURCE_DIR}/library/src ${PROJECT_SOURCE_DIR}/test ${hiptensor_test_YAML_DIR}) @@ -179,6 +180,7 @@ endfunction() add_subdirectory(00_unit) add_subdirectory(01_contraction) +add_subdirectory(02_permutation) rocm_install( FILES "${INSTALL_TEST_FILE}" diff --git a/test/device/common.hpp b/test/device/common.hpp index 8ad75dd4..f961abc1 100644 --- a/test/device/common.hpp +++ b/test/device/common.hpp @@ -66,7 +66,7 @@ __global__ static void // fill kernel for 'elementSize' elements template -__global__ void fillKernel(DataType* data, uint32_t elementSize) +__global__ void fillKernel(DataType* data, uint32_t elementSize, uint32_t seed) { uint32_t index = (blockIdx.x * blockDim.x + threadIdx.x); diff --git a/test/llvm/CMakeLists.txt b/test/llvm/CMakeLists.txt index 064746bb..6708ae2e 100644 --- a/test/llvm/CMakeLists.txt +++ b/test/llvm/CMakeLists.txt @@ -56,11 +56,13 @@ set(HIPTENSOR_LLVM_LIBS "-L${LLVM_LIBRARY_DIR}" # Includes set(HIPTENSOR_LLVM_INCLUDES ${LLVM_INCLUDE_DIRS} ${PROJECT_SOURCE_DIR}/library/include + ${PROJECT_SOURCE_DIR}/library/src/include + ${PROJECT_SOURCE_DIR}/library/src ${PROJECT_SOURCE_DIR}/test ) # Sources for this static object -set(HIPTENSOR_LLVM_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/yaml_parser_contraction_config.cpp +set(HIPTENSOR_LLVM_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/yaml_parser_config.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hiptensor_options.cpp) # Create hiptensor_llvm object target diff --git a/test/llvm/yaml_parser_contraction_config.cpp b/test/llvm/yaml_parser_config.cpp similarity index 85% rename from test/llvm/yaml_parser_contraction_config.cpp rename to test/llvm/yaml_parser_config.cpp index 6982eeea..46f4c43e 100644 --- a/test/llvm/yaml_parser_contraction_config.cpp +++ b/test/llvm/yaml_parser_config.cpp @@ -31,6 +31,7 @@ #include #include "01_contraction/contraction_test_params.hpp" +#include "02_permutation/permutation_test_params.hpp" #include "yaml_parser_impl.hpp" // Fwd declare NoneType @@ -108,6 +109,7 @@ namespace llvm { static void enumeration(IO& io, hipDataType& value) { + io.enumCase(value, "HIP_R_16F", HIP_R_16F); io.enumCase(value, "HIP_R_32F", HIP_R_32F); io.enumCase(value, "HIP_R_64F", HIP_R_64F); io.enumCase(value, "NONE_TYPE", hiptensor::NONE_TYPE); @@ -209,7 +211,7 @@ namespace llvm }; /// - // Mapping of the test param elements for reading / writing. + // Mapping of the test param elements of ContractionTestParams for reading / writing. /// template <> struct MappingTraits @@ -269,12 +271,53 @@ namespace llvm } }; + /// + // Mapping of the test param elements of PermutationTestParams for reading / writing. + /// + template <> + struct MappingTraits + { + static void mapping(IO& io, hiptensor::PermutationTestParams& doc) + { + // Logging bitfield + io.mapRequired("Log Level", doc.logLevelMask()); + + // Sequences of combinatorial fields + io.mapRequired("Tensor Data Types", doc.dataTypes()); + io.mapRequired("Alphas", (std::vector&)(doc.alphas())); + io.mapRequired("Lengths", doc.problemLengths()); + io.mapRequired("Permuted Dims", doc.permutedDims()); + } + + // Additional validation for input / output of the config + static std::string validate(IO& io, hiptensor::PermutationTestParams& doc) + { + if(doc.problemLengths().size() == 0) + { + return "Error: Empty Lengths"; + } + + if(doc.alphas().size() == 0) + { + return "Error: Empty Alphas"; + } + + if(doc.permutedDims().size() == 0) + { + return "Error: Empty Permuted Dims"; + } + + return std::string{}; + } + }; + } // namespace yaml } // namespace llvm -// Instantiate the yaml loader for the ContractionTestParams +// Instantiate the yaml loader for the ContractionTestParams and PermutationTestParams namespace hiptensor { template struct YamlConfigLoader; + template struct YamlConfigLoader; } diff --git a/test/utils.hpp b/test/utils.hpp index 421c1a04..67418568 100644 --- a/test/utils.hpp +++ b/test/utils.hpp @@ -28,6 +28,7 @@ #define HIPTENSOR_TEST_UTILS_HPP #include +#include #include #include #include @@ -49,10 +50,10 @@ CHECK_HIP_ERROR(hipFree(ptr)); \ } -#define HIPTENSOR_FREE_HOST(ptr) \ - if(ptr != nullptr) \ - { \ - free(ptr); \ +#define HIPTENSOR_FREE_HOST(ptr) \ + if(ptr != nullptr) \ + { \ + CHECK_HIP_ERROR(hipHostFree(ptr)); \ } inline bool isF32Supported() @@ -97,13 +98,30 @@ static constexpr intT1 ceilDiv(const intT1 numerator, const intT2 divisor) return (numerator + divisor - 1) / divisor; } +template +auto getProduct(const Container& container, + typename Container::value_type init = typename Container::value_type{1}) +{ + return std::accumulate(std::begin(container), + std::end(container), + init, + std::multiplies{}); +} + // fill kernel for 'elementSize' elements template __host__ static inline void fillLaunchKernel(DataType* data, uint32_t elementSize) { auto blockDim = dim3(1024, 1, 1); auto gridDim = dim3(ceilDiv(elementSize, blockDim.x), 1, 1); - hipLaunchKernelGGL((fillKernel), gridDim, blockDim, 0, 0, data, elementSize); + hipLaunchKernelGGL((fillKernel), + gridDim, + blockDim, + 0, + 0, + data, + elementSize, + static_cast(std::time(nullptr))); } // fill kernel wrapper for 'elementSize' elements with a specific value @@ -191,6 +209,19 @@ std::pair compareEqual(DDataType const* deviceD, return std::make_pair(retval, max_relative_error); } +template +double getEpsilon() +{ + if(std::is_same_v) + { + return 0.0009765625; // numeric_limits<_Float16>::epsilon() => 0 + } + else + { + return std::numeric_limits::epsilon(); + } +}; + template std::pair compareEqualLaunchKernel(DDataType* deviceD, DDataType* hostD, @@ -257,7 +288,7 @@ std::pair compareEqualLaunchKernel(DDataType* deviceD, auto toDouble = [](DDataType const& val) { return static_cast(static_cast(val)); }; - auto eps = toDouble(std::numeric_limits::epsilon()); + auto eps = getEpsilon(); if(isNaN) { retval = false; @@ -276,6 +307,7 @@ namespace std template ostream& operator<<(ostream& os, const std::vector& vec) { + os << "[ "; for(auto i = 0; i < vec.size(); i++) { if(i < vec.size() - 1) @@ -287,6 +319,7 @@ namespace std os << vec[i]; } } + os << " ]"; return os; } From 082a9662a46135fb083ce2028a55d792a9852fdf Mon Sep 17 00:00:00 2001 From: Cong Ma Date: Wed, 15 Nov 2023 19:02:38 +0000 Subject: [PATCH 4/5] Support both row_major and col_major data layout Data layout of cuTenor is col_major by default. HipTensor contraction only supports row_major for now. HipTensor supports both col_major and row_major to make users be able to choose which layout they want to use. - Use CMake option `HIPTENSOR_DATA_LAYOUT_COL_MAJOR` to choose layout --- CMakeLists.txt | 8 ++ .../src/contraction/hiptensor_contraction.cpp | 2 +- .../src/permutation/hiptensor_permutation.cpp | 2 +- ...mutation_ck_col.hpp => permutation_ck.hpp} | 2 +- ...k_col_impl.hpp => permutation_ck_impl.hpp} | 88 +++++++++++++++---- .../permutation_cpu_reference_impl.hpp | 22 ++++- samples/02_permutation/permutation.cpp | 9 +- 7 files changed, 106 insertions(+), 27 deletions(-) rename library/src/permutation/{permutation_ck_col.hpp => permutation_ck.hpp} (98%) rename library/src/permutation/{permutation_ck_col_impl.hpp => permutation_ck_impl.hpp} (64%) diff --git a/CMakeLists.txt b/CMakeLists.txt index c88bd54f..6fd11fb8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,6 +60,7 @@ set(CMAKE_CXX_EXTENSIONS OFF) if( CMAKE_PROJECT_NAME STREQUAL "hiptensor" ) option( HIPTENSOR_BUILD_TESTS "Build hiptensor tests" ON ) option( HIPTENSOR_BUILD_SAMPLES "Build hiptensor samples" ON ) + option( HIPTENSOR_DATA_LAYOUT_COL_MAJOR "Set hiptensor data layout to column major" ON ) endif() # Setup output paths @@ -93,6 +94,13 @@ else() endif() message( VERBOSE "AMDGPU_TARGETS=${AMDGPU_TARGETS}") +if(HIPTENSOR_DATA_LAYOUT_COL_MAJOR) + add_compile_definitions(HIPTENSOR_DATA_LAYOUT_COL_MAJOR=1) +else() + add_compile_definitions(HIPTENSOR_DATA_LAYOUT_COL_MAJOR=0) +endif() +message("-- HIPTENSOR_DATA_LAYOUT_COL_MAJOR=${HIPTENSOR_DATA_LAYOUT_COL_MAJOR}") + # Setup HIP find_package(hip REQUIRED ) message(STATUS "HIP version: ${hip_VERSION}") diff --git a/library/src/contraction/hiptensor_contraction.cpp b/library/src/contraction/hiptensor_contraction.cpp index 078689ea..09f5ddf6 100644 --- a/library/src/contraction/hiptensor_contraction.cpp +++ b/library/src/contraction/hiptensor_contraction.cpp @@ -87,7 +87,7 @@ hiptensorStatus_t hiptensorInitContractionDescriptor(const hiptensorHandle_t* auto& logger = Logger::instance(); // Log API access - char msg[1024]; + char msg[2048]; snprintf( msg, sizeof(msg), diff --git a/library/src/permutation/hiptensor_permutation.cpp b/library/src/permutation/hiptensor_permutation.cpp index b3d60379..2b092655 100644 --- a/library/src/permutation/hiptensor_permutation.cpp +++ b/library/src/permutation/hiptensor_permutation.cpp @@ -26,7 +26,7 @@ #include #include "logger.hpp" -#include "permutation_ck_col.hpp" +#include "permutation_ck.hpp" hiptensorStatus_t hiptensorPermutation(const hiptensorHandle_t* handle, const void* alpha, diff --git a/library/src/permutation/permutation_ck_col.hpp b/library/src/permutation/permutation_ck.hpp similarity index 98% rename from library/src/permutation/permutation_ck_col.hpp rename to library/src/permutation/permutation_ck.hpp index 600658c6..8fa4959c 100644 --- a/library/src/permutation/permutation_ck_col.hpp +++ b/library/src/permutation/permutation_ck.hpp @@ -44,5 +44,5 @@ namespace hiptensor } } -#include "permutation_ck_col_impl.hpp" +#include "permutation_ck_impl.hpp" #endif // HIPTENSOR_PERMUTATION_CK_COL_HPP diff --git a/library/src/permutation/permutation_ck_col_impl.hpp b/library/src/permutation/permutation_ck_impl.hpp similarity index 64% rename from library/src/permutation/permutation_ck_col_impl.hpp rename to library/src/permutation/permutation_ck_impl.hpp index 7f16bb8a..7878e908 100644 --- a/library/src/permutation/permutation_ck_col_impl.hpp +++ b/library/src/permutation/permutation_ck_impl.hpp @@ -26,11 +26,13 @@ #ifndef HIPTENSOR_PERMUTATION_CK_COL_IMPL_HPP #define HIPTENSOR_PERMUTATION_CK_COL_IMPL_HPP #include +#include #include #include #include +#include "performance.hpp" #include "types.hpp" namespace hiptensor @@ -74,35 +76,50 @@ namespace hiptensor modeToLength[modeA[index]] = descA->mLengths[index]; } + float alphaValue = readVal(alpha, typeScalar); + std::array input = {A}; + std::array output = {B}; std::unordered_map bModeToStrides; - int32_t stride = 1; - bModeToStrides[modeB[0]] = stride; +#if HIPTENSOR_DATA_LAYOUT_COL_MAJOR + std::array aStrides + = {1, + modeToLength[modeA[0]], + modeToLength[modeA[0]] * modeToLength[modeA[1]], + modeToLength[modeA[0]] * modeToLength[modeA[1]] * modeToLength[modeA[2]]}; + int32_t stride = 1; + bModeToStrides[modeB[0]] = stride; for(int32_t index = 1; index < modeSize; index++) { stride *= modeToLength[modeB[index - 1]]; bModeToStrides[modeB[index]] = stride; } - - float alphaValue = readVal(alpha, typeScalar); - std::array input = {A}; - std::array output = {B}; - std::array a_strides - = {1, - modeToLength[modeA[0]], - modeToLength[modeA[0]] * modeToLength[modeA[1]], - modeToLength[modeA[0]] * modeToLength[modeA[1]] * modeToLength[modeA[2]]}; - std::array b_strides = {bModeToStrides[modeA[0]], +#else // HIPTENSOR_DATA_LAYOUT_COL_MAJOR + std::array aStrides = { + modeToLength[modeA[1]] * modeToLength[modeA[2]] * modeToLength[modeA[3]], + modeToLength[modeA[2]] * modeToLength[modeA[3]], + modeToLength[modeA[3]], + 1, + }; + int32_t stride = 1; + bModeToStrides[modeB[modeSize - 1]] = stride; + for(int32_t index = modeSize - 2; index >= 0; index--) + { + stride *= modeToLength[modeB[index + 1]]; + bModeToStrides[modeB[index]] = stride; + } +#endif // HIPTENSOR_DATA_LAYOUT_COL_MAJOR + std::array bStrides = {bModeToStrides[modeA[0]], bModeToStrides[modeA[1]], bModeToStrides[modeA[2]], bModeToStrides[modeA[3]]}; - std::array ab_lengths = {modeToLength[modeA[0]], + std::array abLengths = {modeToLength[modeA[0]], modeToLength[modeA[1]], modeToLength[modeA[2]], modeToLength[modeA[3]]}; auto broadcastPermute = DeviceElementwisePermuteInstance{}; - auto argument = broadcastPermute.MakeArgumentPointer(ab_lengths, - {a_strides}, - {b_strides}, + auto argument = broadcastPermute.MakeArgumentPointer(abLengths, + {aStrides}, + {bStrides}, input, output, PassThrough{}, @@ -115,7 +132,44 @@ namespace hiptensor }; auto broadcastPermute_invoker_ptr = broadcastPermute.MakeInvokerPointer(); - broadcastPermute_invoker_ptr->Run(argument.get(), StreamConfig{stream, false}); + + // Perform contraction with timing if LOG_LEVEL_PERF_TRACE + using hiptensor::Logger; + auto& logger = Logger::instance(); + bool measurePermuteTime = logger->getLogMask() & HIPTENSOR_LOG_LEVEL_PERF_TRACE; + + auto permuteTime = broadcastPermute_invoker_ptr->Run( + argument.get(), StreamConfig{stream, measurePermuteTime}); + if(measurePermuteTime) + { + std::size_t problemSize + = std::accumulate(abLengths.begin(), abLengths.end(), 1, std::multiplies{}); + std::size_t flops = std::size_t(2) * problemSize; + + std::size_t bytes = 2 * sizeof(DataType) * problemSize; + float tflops = static_cast(flops) / 1.E9 / permuteTime; + float bandwidth = bytes / 1.E6 / permuteTime; + + hiptensor::PerfMetrics metrics = { + 0, // id, permute has only one solution, set id to 0 + "default solution", // name + permuteTime, // avg time + tflops, // tflops + bandwidth // BW + }; + + // log perf metrics (not name/id) + char msg[2048]; + snprintf(msg, + sizeof(msg), + "KernelId: %lu KernelName: %s, %0.3f ms, %0.3f TFlops, %0.3f GB/s", + metrics.mKernelUid, + metrics.mKernelName.c_str(), + metrics.mAvgTimeMs, + metrics.mTflops, + metrics.mBandwidth); + logger->logPerformanceTrace("hiptensorPermutation", msg); + } return HIPTENSOR_STATUS_SUCCESS; } } diff --git a/library/src/permutation/permutation_cpu_reference_impl.hpp b/library/src/permutation/permutation_cpu_reference_impl.hpp index a04e4176..d64147fe 100644 --- a/library/src/permutation/permutation_cpu_reference_impl.hpp +++ b/library/src/permutation/permutation_cpu_reference_impl.hpp @@ -56,19 +56,26 @@ namespace hiptensor bModeToIndex[modeB[index]] = index; } - auto& aLens = descA->mLengths; - // auto bStrides = descB->mStrides; // TODO descB->mStrides contains incorrect strides - auto bStrides = std::vector(modeSize, 1); + auto& aLens = descA->mLengths; + auto bStrides = std::vector(modeSize, 1); +#if HIPTENSOR_DATA_LAYOUT_COL_MAJOR for(int i = 1; i < modeSize; i++) { bStrides[i] = descB->mLengths[i - 1] * bStrides[i - 1]; } +#else // HIPTENSOR_DATA_LAYOUT_COL_MAJOR + for(int i = modeSize - 2; i >= 0; i--) + { + bStrides[i] = descB->mLengths[i + 1] * bStrides[i + 1]; + } +#endif // HIPTENSOR_DATA_LAYOUT_COL_MAJOR auto bIndices = std::vector(modeSize, 0); auto elementCount = hiptensor::elementsFromLengths(aLens); float alphaValue = readVal(alpha, typeScalar); for(int elementIndex = 0; elementIndex < elementCount; elementIndex++) { auto index = elementIndex; +#if HIPTENSOR_DATA_LAYOUT_COL_MAJOR for(int modeIndex = 0; modeIndex < modeSize; modeIndex++) { bIndices[bModeToIndex[modeA[modeIndex]]] = index % aLens[modeIndex]; @@ -76,6 +83,15 @@ namespace hiptensor } auto bOffset = std::inner_product(bIndices.begin(), bIndices.end(), bStrides.begin(), 0); +#else // HIPTENSOR_DATA_LAYOUT_COL_MAJOR + for(int modeIndex = modeSize - 1; modeIndex >= 0; modeIndex--) + { + bIndices[bModeToIndex[modeA[modeIndex]]] = index % aLens[modeIndex]; + index /= aLens[modeIndex]; + } + auto bOffset + = std::inner_product(bIndices.rbegin(), bIndices.rend(), bStrides.rbegin(), 0); +#endif // HIPTENSOR_DATA_LAYOUT_COL_MAJOR B[bOffset] = static_cast(A[elementIndex] * alphaValue); } diff --git a/samples/02_permutation/permutation.cpp b/samples/02_permutation/permutation.cpp index 2c95d076..ffae2d33 100644 --- a/samples/02_permutation/permutation.cpp +++ b/samples/02_permutation/permutation.cpp @@ -63,10 +63,10 @@ int main() int nmodeC = modeC.size(); std::unordered_map extent; - extent['h'] = 2; - extent['w'] = 3; - extent['c'] = 4; - extent['n'] = 5; + extent['h'] = 32; + extent['w'] = 33; + extent['c'] = 34; + extent['n'] = 35; std::vector extentA; for(auto mode : modeA) @@ -107,6 +107,7 @@ int main() hiptensorStatus_t err; hiptensorHandle_t* handle; CHECK_HIPTENSOR_ERROR(hiptensorCreate(&handle)); + CHECK_HIPTENSOR_ERROR(hiptensorLoggerSetMask(HIPTENSOR_LOG_LEVEL_PERF_TRACE)); hiptensorTensorDescriptor_t descA; CHECK_HIPTENSOR_ERROR(hiptensorInitTensorDescriptor( From 53d863aff10647a30e1a89632036d255f5a582bb Mon Sep 17 00:00:00 2001 From: Cong Ma Date: Wed, 15 Nov 2023 22:31:58 +0000 Subject: [PATCH 5/5] Add type related files new file: include/native_types.hpp new file: include/native_types_impl.hpp new file: include/type_traits.hpp new file: include/types_ext.hpp new file: include/xfloat32.hpp --- .../hiptensor/internal/hiptensor_utility.hpp | 5 - library/src/CMakeLists.txt | 2 +- .../contraction/contraction_meta_traits.hpp | 2 +- .../contraction_solution_params.hpp | 2 +- .../contraction_solution_params_impl.hpp | 2 +- .../contraction_solution_registry.hpp | 2 +- library/src/{types.cpp => data_types.cpp} | 2 +- library/src/hiptensor.cpp | 2 +- library/src/include/config.hpp | 104 +++++ library/src/include/data_types.hpp | 78 ++++ .../{types_impl.hpp => data_types_impl.hpp} | 8 +- library/src/include/native_types.hpp | 102 +++++ library/src/include/native_types_impl.hpp | 36 ++ library/src/include/type_traits.hpp | 390 ++++++++++++++++++ library/src/include/types.hpp | 60 +-- library/src/include/types_ext.hpp | 181 ++++++++ library/src/include/xfloat32.hpp | 334 +++++++++++++++ .../src/permutation/permutation_ck_impl.hpp | 2 +- .../permutation_cpu_reference_impl.hpp | 2 +- test/01_contraction/contraction_test.cpp | 2 +- test/02_permutation/permutation_resource.cpp | 2 +- test/02_permutation/permutation_test.cpp | 2 +- test/utils.hpp | 16 +- 23 files changed, 1248 insertions(+), 90 deletions(-) rename library/src/{types.cpp => data_types.cpp} (99%) create mode 100644 library/src/include/config.hpp create mode 100644 library/src/include/data_types.hpp rename library/src/include/{types_impl.hpp => data_types_impl.hpp} (97%) create mode 100644 library/src/include/native_types.hpp create mode 100644 library/src/include/native_types_impl.hpp create mode 100644 library/src/include/type_traits.hpp create mode 100644 library/src/include/types_ext.hpp create mode 100644 library/src/include/xfloat32.hpp diff --git a/library/include/hiptensor/internal/hiptensor_utility.hpp b/library/include/hiptensor/internal/hiptensor_utility.hpp index 5fd9e331..f2df2dd2 100644 --- a/library/include/hiptensor/internal/hiptensor_utility.hpp +++ b/library/include/hiptensor/internal/hiptensor_utility.hpp @@ -131,11 +131,6 @@ namespace std return os; } - static ostream& operator<<(ostream& os, const _Float16 value) - { - os << static_cast(value); - return os; - } } #endif // HIPTENSOR_UTILITY_INTERNAL_HPP diff --git a/library/src/CMakeLists.txt b/library/src/CMakeLists.txt index 801d0761..286ed2e3 100644 --- a/library/src/CMakeLists.txt +++ b/library/src/CMakeLists.txt @@ -62,7 +62,7 @@ set(HIPTENSOR_CORE_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/hiptensor.cpp ${CMAKE_CURRENT_SOURCE_DIR}/logger.cpp ${CMAKE_CURRENT_SOURCE_DIR}/performance.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/types.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/data_types.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hip_device.cpp ${CMAKE_CURRENT_SOURCE_DIR}/handle.cpp ) diff --git a/library/src/contraction/contraction_meta_traits.hpp b/library/src/contraction/contraction_meta_traits.hpp index e018661b..4fa7acf7 100644 --- a/library/src/contraction/contraction_meta_traits.hpp +++ b/library/src/contraction/contraction_meta_traits.hpp @@ -34,8 +34,8 @@ #include // hiptensor includes +#include "data_types.hpp" #include "meta_traits.hpp" -#include "types.hpp" namespace hiptensor { diff --git a/library/src/contraction/contraction_solution_params.hpp b/library/src/contraction/contraction_solution_params.hpp index f55d3c85..ec9de45c 100644 --- a/library/src/contraction/contraction_solution_params.hpp +++ b/library/src/contraction/contraction_solution_params.hpp @@ -30,7 +30,7 @@ #include #include "contraction_types.hpp" -#include "types.hpp" +#include "data_types.hpp" namespace hiptensor { diff --git a/library/src/contraction/contraction_solution_params_impl.hpp b/library/src/contraction/contraction_solution_params_impl.hpp index 194fb0d5..bff33960 100644 --- a/library/src/contraction/contraction_solution_params_impl.hpp +++ b/library/src/contraction/contraction_solution_params_impl.hpp @@ -29,8 +29,8 @@ #include "contraction_meta_traits.hpp" #include "contraction_solution_params.hpp" +#include "data_types.hpp" #include "hash.hpp" -#include "types.hpp" namespace std { diff --git a/library/src/contraction/contraction_solution_registry.hpp b/library/src/contraction/contraction_solution_registry.hpp index 039e9e14..d1b80ec5 100644 --- a/library/src/contraction/contraction_solution_registry.hpp +++ b/library/src/contraction/contraction_solution_registry.hpp @@ -32,8 +32,8 @@ #include #include "contraction_types.hpp" +#include "data_types.hpp" #include "singleton.hpp" -#include "types.hpp" namespace hiptensor { diff --git a/library/src/types.cpp b/library/src/data_types.cpp similarity index 99% rename from library/src/types.cpp rename to library/src/data_types.cpp index 9cfb3290..b270973d 100644 --- a/library/src/types.cpp +++ b/library/src/data_types.cpp @@ -24,7 +24,7 @@ * *******************************************************************************/ -#include "types.hpp" +#include "data_types.hpp" namespace hiptensor { diff --git a/library/src/hiptensor.cpp b/library/src/hiptensor.cpp index 3016ef64..9740d2a8 100644 --- a/library/src/hiptensor.cpp +++ b/library/src/hiptensor.cpp @@ -27,9 +27,9 @@ #include +#include "data_types.hpp" #include "handle.hpp" #include "logger.hpp" -#include "types.hpp" #include "util.hpp" hiptensorStatus_t hiptensorCreate(hiptensorHandle_t** handle) diff --git a/library/src/include/config.hpp b/library/src/include/config.hpp new file mode 100644 index 00000000..3348968e --- /dev/null +++ b/library/src/include/config.hpp @@ -0,0 +1,104 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (C) 2021-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#ifndef HIPTENSOR_CONFIG_HPP +#define HIPTENSOR_CONFIG_HPP + +namespace hiptensor +{ + +/// +/// Architecture support +/// Guaranteed symbols: +/// HIPTENSOR_ARCH_GFX908 +/// HIPTENSOR_ARCH_GFX90a +/// HIPTENSOR_ARCH_GFX940 +/// HIPTENSOR_ARCH_GFX941 +/// HIPTENSOR_ARCH_GFX942 +#if defined(__gfx908__) +#define HIPTENSOR_ARCH_GFX908 __gfx908__ +#elif defined(__gfx90a__) +#define HIPTENSOR_ARCH_GFX90A __gfx90a__ +#elif defined(__gfx940__) +#define HIPTENSOR_ARCH_GFX940 __gfx940__ +#elif defined(__gfx941__) +#define HIPTENSOR_ARCH_GFX941 __gfx941__ +#elif defined(__gfx942__) +#define HIPTENSOR_ARCH_GFX942 __gfx942__ +#else +#define HIPTENSOR_ARCH_HOST 1 +#endif + +#if !defined(HIPTENSOR_ARCH_GFX908) +#define HIPTENSOR_ARCH_GFX908 0 +#endif +#if !defined(HIPTENSOR_ARCH_GFX90A) +#define HIPTENSOR_ARCH_GFX90A 0 +#endif +#if !defined(HIPTENSOR_ARCH_GFX940) +#define HIPTENSOR_ARCH_GFX940 0 +#endif +#if !defined(HIPTENSOR_ARCH_GFX941) +#define HIPTENSOR_ARCH_GFX941 0 +#endif +#if !defined(HIPTENSOR_ARCH_GFX942) +#define HIPTENSOR_ARCH_GFX942 0 +#endif +#if !defined(HIPTENSOR_ARCH_HOST) +#define HIPTENSOR_ARCH_HOST 0 +#endif + +#if defined(NDEBUG) +#define HIPTENSOR_UNSUPPORTED_IMPL(MSG) +#else +#define HIPTENSOR_UNSUPPORTED_IMPL(MSG) __attribute__((deprecated(MSG))) +#endif + +#if defined(HIP_NO_HALF) +#define HIPTENSOR_NO_HALF 1 +#else +#define HIPTENSOR_NO_HALF 0 +#endif // HIP_NO_HALF + +#if HIPTENSOR_NO_HALF || (!HIPTENSOR_NO_HALF && defined(__HIP_NO_HALF_CONVERSIONS__)) +#define HIPTENSOR_TESTS_NO_HALF 1 +#else +#define HIPTENSOR_TESTS_NO_HALF 0 +#endif // !HIPTENSOR_NO_HALF && defined(__HIP_NO_HALF_CONVERSIONS__) + +/// +/// Host and Device symbols +/// +#define HIPTENSOR_DEVICE __device__ + +#define HIPTENSOR_HOST __host__ + +#define HIPTENSOR_HOST_DEVICE HIPTENSOR_HOST HIPTENSOR_DEVICE + +#define HIPTENSOR_KERNEL __global__ + +} // namespace hiptensor + +#endif // HIPTENSOR_CONFIG_HPP diff --git a/library/src/include/data_types.hpp b/library/src/include/data_types.hpp new file mode 100644 index 00000000..42197650 --- /dev/null +++ b/library/src/include/data_types.hpp @@ -0,0 +1,78 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (C) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *******************************************************************************/ + +#ifndef HIPTENSOR_LIBRARY_DATA_TYPES_HPP +#define HIPTENSOR_LIBRARY_DATA_TYPES_HPP + +// clang-format off +// Include order needs to be preserved +#include +#include +#include +#include + +#include + +// clang-format on + +namespace hiptensor +{ + // Used to map to empty tensors + struct NoneType; + + static constexpr hipDataType NONE_TYPE = (hipDataType)31; + + // Map type to runtime HipDataType + template + struct HipDataType; + + template + static constexpr auto HipDataType_v = HipDataType::value; + + // Get data size in bytes from id + uint32_t hipDataTypeSize(hipDataType id); + + // Convert hipDataType to hiptensorComputeType_t + hiptensorComputeType_t convertToComputeType(hipDataType hipType); + + // Read a single value from void pointer, casted to T + template + T readVal(void const* value, hipDataType id); + + template + T readVal(void const* value, hiptensorComputeType_t id); + +} // namespace hiptensor + +bool operator==(hipDataType hipType, hiptensorComputeType_t computeType); +bool operator==(hiptensorComputeType_t computeType, hipDataType hipType); + +bool operator!=(hipDataType hipType, hiptensorComputeType_t computeType); +bool operator!=(hiptensorComputeType_t computeType, hipDataType hipType); + +#include "data_types_impl.hpp" + +#endif // HIPTENSOR_LIBRARY_DATA_TYPES_HPP diff --git a/library/src/include/types_impl.hpp b/library/src/include/data_types_impl.hpp similarity index 97% rename from library/src/include/types_impl.hpp rename to library/src/include/data_types_impl.hpp index 7a07d0b0..7df6d7d9 100644 --- a/library/src/include/types_impl.hpp +++ b/library/src/include/data_types_impl.hpp @@ -24,10 +24,10 @@ * *******************************************************************************/ -#ifndef HIPTENSOR_LIBRARY_TYPES_IMPL_HPP -#define HIPTENSOR_LIBRARY_TYPES_IMPL_HPP +#ifndef HIPTENSOR_LIBRARY_DATA_TYPES_IMPL_HPP +#define HIPTENSOR_LIBRARY_DATA_TYPES_IMPL_HPP -#include "types.hpp" +#include "data_types.hpp" namespace hiptensor { @@ -217,4 +217,4 @@ namespace hiptensor } // namespace hiptensor -#endif // HIPTENSOR_LIBRARY_TYPES_IMPL_HPP +#endif // HIPTENSOR_LIBRARY_DATA_TYPES_IMPL_HPP diff --git a/library/src/include/native_types.hpp b/library/src/include/native_types.hpp new file mode 100644 index 00000000..6c9dbee8 --- /dev/null +++ b/library/src/include/native_types.hpp @@ -0,0 +1,102 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (C) 2021-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#ifndef HIPTENSOR_NATIVE_TYPES_HPP +#define HIPTENSOR_NATIVE_TYPES_HPP + +#include +#include +#include +#include +#include +#include + +#include "xfloat32.hpp" + +namespace hiptensor +{ + + /** + * \defgroup DataTypes Data Type Metadata + * + * @brief Definition and metadata on supported data types of matrices. + * + * @{ + * + * Native Data Types: + * float64_t = f64 = double + * float = f32 + * _Float16 = f16 + * int8 + * uint8 + * int16 + * int32 + * uint32 + * + * + * Non-Native Data Types: + * h16 = __half + * bf16 = bfloat16 + * + */ + + // Native types + using float16_t = _Float16; + using float32_t = float; + using float64_t = double; + + using int8_t = ::int8_t; + using uint8_t = ::uint8_t; + using int16_t = ::int16_t; + using uint16_t = ::uint16_t; + using int32_t = ::int32_t; + using uint32_t = ::uint32_t; + using int64_t = ::int64_t; + using uint64_t = ::uint64_t; + using index_t = ::int32_t; + using index64_t = ::int64_t; + + // Non-native types + using bfloat16_t = hip_bfloat16; + +#if !HIPTENSOR_NO_HALF + using hfloat16_t = __half; +#endif // !HIPTENSOR_NO_HALF + + using xfloat32_t = hiptensor_xfloat32; + + // clang-format off + + +} // namespace hiptensor + +// Add in some extensions to basic type support. +// Some of these are required for vector implementations. +// #include "type_traits.hpp" +// #include "types_ext.hpp" + +#include "native_types_impl.hpp" + +#endif // HIPTENSOR_NATIVE_TYPES_HPP diff --git a/library/src/include/native_types_impl.hpp b/library/src/include/native_types_impl.hpp new file mode 100644 index 00000000..1b29b459 --- /dev/null +++ b/library/src/include/native_types_impl.hpp @@ -0,0 +1,36 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (C) 2021-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#ifndef HIPTENSOR_NATIVE_TYPES_IMPL_HPP +#define HIPTENSOR_NATIVE_TYPES_IMPL_HPP + +#include "native_types.hpp" + +namespace hiptensor +{ + +} // namespace hiptensor + +#endif // HIPTENSOR_NATIVE_TYPES_IMPL_HPP diff --git a/library/src/include/type_traits.hpp b/library/src/include/type_traits.hpp new file mode 100644 index 00000000..3867839d --- /dev/null +++ b/library/src/include/type_traits.hpp @@ -0,0 +1,390 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (C) 2021-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#ifndef HIPTENSOR_TYPE_TRAITS_HPP +#define HIPTENSOR_TYPE_TRAITS_HPP +#include "native_types.hpp" +#include + +namespace hiptensor +{ + namespace detail + { + struct Fp16Bits + { + union + { + uint16_t i16; + float16_t f16; +#if !HIPTENSOR_NO_HALF + hfloat16_t h16; +#endif // !HIPTENSOR_NO_HALF + bfloat16_t b16; + }; + constexpr Fp16Bits(uint16_t initVal) + : i16(initVal) + { + } +#define TEST_TEST 1 + constexpr Fp16Bits(float16_t initVal) + : f16(initVal) + { + } +#if !HIPTENSOR_NO_HALF + constexpr Fp16Bits(hfloat16_t initVal) + : h16(initVal) + { + } +#endif + constexpr Fp16Bits(bfloat16_t initVal) + : b16(initVal) + { + } + }; + + struct Fp32Bits + { + union + { + uint32_t i32; + float32_t f32; + xfloat32_t xf32; + }; + constexpr Fp32Bits(uint32_t initVal) + : i32(initVal) + { + } + constexpr Fp32Bits(float32_t initVal) + : f32(initVal) + { + } + constexpr Fp32Bits(xfloat32_t initVal) + : xf32(initVal) + { + } + }; + + } // namespace detail +} // namespace hiptensor + +namespace std +{ + /////////////////////////////////////////////////////////// + /////////// std::numeric_limits ////////////// + /////////////////////////////////////////////////////////// + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::float16_t + numeric_limits::epsilon() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x1400)); + return eps.f16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::float16_t + numeric_limits::infinity() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x7C00)); + return eps.f16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::float16_t + numeric_limits::lowest() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0xFBFF)); + return eps.f16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::float16_t + numeric_limits::max() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x7BFF)); + return eps.f16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::float16_t + numeric_limits::min() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x0400)); + return eps.f16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::float16_t + numeric_limits::quiet_NaN() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x7FFF)); + return eps.f16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::float16_t + numeric_limits::signaling_NaN() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x7DFF)); + return eps.f16; + } + + /////////////////////////////////////////////////////////// + /////////// std::numeric_limits ///////////// + /////////////////////////////////////////////////////////// +#if !HIPTENSOR_NO_HALF + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::hfloat16_t + numeric_limits::epsilon() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x1400)); + return eps.h16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::hfloat16_t + numeric_limits::infinity() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x7C00)); + return eps.h16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::hfloat16_t + numeric_limits::lowest() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0xFBFF)); + return eps.h16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::hfloat16_t + numeric_limits::max() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x7BFF)); + return eps.h16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::hfloat16_t + numeric_limits::min() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x0400)); + return eps.h16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::hfloat16_t + numeric_limits::quiet_NaN() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x7FFF)); + return eps.h16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::hfloat16_t + numeric_limits::signaling_NaN() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x7DFF)); + return eps.h16; + } + +#endif // !HIPTENSOR_NO_HALF + + /////////////////////////////////////////////////////////// + /////////// std::numeric_limits ///////////// + /////////////////////////////////////////////////////////// + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::bfloat16_t + numeric_limits::epsilon() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x3C00)); + return eps.b16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::bfloat16_t + numeric_limits::infinity() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x7F80)); + return eps.b16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::bfloat16_t + numeric_limits::lowest() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0xFF7F)); + return eps.b16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::bfloat16_t + numeric_limits::max() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x7F7F)); + return eps.b16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::bfloat16_t + numeric_limits::min() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x007F)); + return eps.b16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::bfloat16_t + numeric_limits::quiet_NaN() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x7FC0)); + return eps.b16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::bfloat16_t + numeric_limits::signaling_NaN() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x7FC0)); + return eps.b16; + } + + /////////////////////////////////////////////////////////// + /////////// std::numeric_limits ////////////// + /////////////////////////////////////////////////////////// + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::xfloat32_t + numeric_limits::epsilon() noexcept + { + hiptensor::detail::Fp32Bits eps(static_cast(FLT_EPSILON)); + return eps.xf32; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::xfloat32_t + numeric_limits::infinity() noexcept + { + hiptensor::detail::Fp32Bits eps(static_cast(HUGE_VALF)); + return eps.xf32; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::xfloat32_t + numeric_limits::lowest() noexcept + { + hiptensor::detail::Fp32Bits eps(static_cast(-FLT_MAX)); + return eps.xf32; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::xfloat32_t + numeric_limits::max() noexcept + { + hiptensor::detail::Fp32Bits eps(static_cast(FLT_MAX)); + return eps.xf32; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::xfloat32_t + numeric_limits::min() noexcept + { + hiptensor::detail::Fp32Bits eps(static_cast(FLT_MIN)); + return eps.xf32; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::xfloat32_t + numeric_limits::quiet_NaN() noexcept + { + hiptensor::detail::Fp32Bits eps(static_cast(0x7FF80000)); + return eps.xf32; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::xfloat32_t + numeric_limits::signaling_NaN() noexcept + { + hiptensor::detail::Fp32Bits eps(static_cast(0x7FF00000)); + return eps.xf32; + } + // @endcond + +} // namespace std + +namespace hiptensor +{ + template ::value, int> = 0> + constexpr auto maxExactInteger() -> decltype(std::numeric_limits::max()) + { + return std::numeric_limits::max(); + } + + template ::value + && std::numeric_limits::digits, + int> + = 0> + constexpr auto maxExactInteger() -> + typename std::conditional_t::value, int64_t, int32_t> + { + using RetT = + typename std::conditional_t::value, int64_t, int32_t>; + return ((RetT)1 << std::numeric_limits::digits); + } + + template ::value || +#endif // !HIPTENSOR_NO_HALF + std::is_same::value, + int> + = 0> + constexpr auto maxExactInteger() -> int32_t + { + // f16 mantissa is 10 bits + return ((int32_t)1 << 11); + } + + template ::value, int> = 0> + constexpr auto maxExactInteger() -> int32_t + { + // b16 mantissa is 7 bits + return ((int32_t)1 << 8); + } + + template ::value, int> = 0> + constexpr auto maxExactInteger() -> int32_t + { + // xf32 mantissa is 7 bits + return ((int32_t)1 << 8); + } +} // namespace hiptensor + +#endif // HIPTENSOR_TYPE_TRAITS_HPP diff --git a/library/src/include/types.hpp b/library/src/include/types.hpp index 8cd677f3..b59604be 100644 --- a/library/src/include/types.hpp +++ b/library/src/include/types.hpp @@ -19,60 +19,10 @@ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. * *******************************************************************************/ - -#ifndef HIPTENSOR_LIBRARY_TYPES_HPP -#define HIPTENSOR_LIBRARY_TYPES_HPP - -// clang-format off -// Include order needs to be preserved -#include -#include -#include -#include - -#include - -// clang-format on - -namespace hiptensor -{ - // Used to map to empty tensors - struct NoneType; - - static constexpr hipDataType NONE_TYPE = (hipDataType)31; - - // Map type to runtime HipDataType - template - struct HipDataType; - - template - static constexpr auto HipDataType_v = HipDataType::value; - - // Get data size in bytes from id - uint32_t hipDataTypeSize(hipDataType id); - - // Convert hipDataType to hiptensorComputeType_t - hiptensorComputeType_t convertToComputeType(hipDataType hipType); - - // Read a single value from void pointer, casted to T - template - T readVal(void const* value, hipDataType id); - - template - T readVal(void const* value, hiptensorComputeType_t id); - -} // namespace hiptensor - -bool operator==(hipDataType hipType, hiptensorComputeType_t computeType); -bool operator==(hiptensorComputeType_t computeType, hipDataType hipType); - -bool operator!=(hipDataType hipType, hiptensorComputeType_t computeType); -bool operator!=(hiptensorComputeType_t computeType, hipDataType hipType); - -#include "types_impl.hpp" - -#endif // HIPTENSOR_LIBRARY_TYPES_HPP +#include "native_types.hpp" +#include "type_traits.hpp" +#include "types_ext.hpp" diff --git a/library/src/include/types_ext.hpp b/library/src/include/types_ext.hpp new file mode 100644 index 00000000..2f2b09e1 --- /dev/null +++ b/library/src/include/types_ext.hpp @@ -0,0 +1,181 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (C) 2021-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#ifndef HIPTENSOR_TYPES_EXT_HPP +#define HIPTENSOR_TYPES_EXT_HPP + +#include +#include +#include +#include + +#include "type_traits.hpp" + +namespace hiptensor +{ + //////////////////////////////////////////////////////////////////////// + /////////// hiptensor::hfloat16_t host and device conversions ////////// + //////////////////////////////////////////////////////////////////////// + template , int> = 0> + __host__ __device__ inline Outgoing convert(const Incoming& value) + { +#if !HIPTENSOR_NO_HALF + if constexpr(std::is_same_v) + { + +#if defined(__HIP_NO_HALF_CONVERSIONS__) + detail::Fp16Bits fp16(static_cast(value)); + return fp16.h16; +#else + return static_cast(value); +#endif // defined(__HIP_NO_HALF_CONVERSIONS__) + } + else if constexpr(std::is_same_v) + { + +#if defined(__HIP_NO_HALF_CONVERSIONS__) + detail::Fp16Bits fp16(value); + return static_cast(fp16.f16); +#else + return static_cast(value); +#endif // defined(__HIP_NO_HALF_CONVERSIONS__) + } + else +#endif // !HIPTENSOR_NO_HALF + { + return static_cast(value); + } + } + + template , int> = 0> + __host__ __device__ inline Outgoing const& convert(const Incoming& value) + { + return value; + } + + //////////////////////////////////////////////////////////////////// + /////////// hiptensor::hfloat16_t host & device operators ////////// + /////////////////////////////////////////////////////////////////// + +#if defined(__HIP_NO_HALF_OPERATORS__) +// No operators defined for host or device +#define HIPTENSOR_HALF_OP_ATTR HIPTENSOR_HOST_DEVICE +#else +// No operators defined just for host +#define HIPTENSOR_HALF_OP_ATTR HIPTENSOR_HOST +#endif // defined(__HIP_NO_HALF_OPERATORS__) + +#if !HIPTENSOR_NO_HALF + + HIPTENSOR_HALF_OP_ATTR inline bool operator==(const hfloat16_t& x, const hfloat16_t& y) + { + auto absDiff = std::fabs(__half2float(x) - __half2float(y)); + auto absAdd = std::fabs(__half2float(x) + __half2float(y)); + return absDiff <= __half2float(std::numeric_limits::epsilon()) * absAdd * 2.0f + || absDiff < __half2float(std::numeric_limits::min()); + } + + HIPTENSOR_HALF_OP_ATTR inline bool operator!=(const hfloat16_t& x, const hfloat16_t& y) + { + return !(x == y); + } + + HIPTENSOR_HALF_OP_ATTR inline hfloat16_t operator-(const hfloat16_t& x) + { + detail::Fp16Bits fp16(x); + fp16.i16 ^= 0x8000; // Flip sign + return fp16.h16; + } + + HIPTENSOR_HALF_OP_ATTR inline hfloat16_t operator+(const hfloat16_t& x, const hfloat16_t& y) + { + return convert(convert(x) + convert(y)); + } + + HIPTENSOR_HALF_OP_ATTR inline hfloat16_t operator-(const hfloat16_t& x, const hfloat16_t& y) + { + return convert(convert(x) - convert(y)); + } + + HIPTENSOR_HALF_OP_ATTR inline hfloat16_t operator*(const hfloat16_t& x, const hfloat16_t& y) + { + return convert(convert(x) * convert(y)); + } + + HIPTENSOR_HALF_OP_ATTR inline hfloat16_t operator/(const hfloat16_t& x, const hfloat16_t& y) + { + return convert(convert(x) / convert(y)); + } + + HIPTENSOR_HALF_OP_ATTR inline hfloat16_t& operator+=(hfloat16_t& x, const hfloat16_t& y) + { + return x = x + y; + } + + HIPTENSOR_HALF_OP_ATTR inline hfloat16_t& operator-=(hfloat16_t& x, const hfloat16_t& y) + { + return x = x - y; + } + + HIPTENSOR_HALF_OP_ATTR inline hfloat16_t& operator*=(hfloat16_t& x, const hfloat16_t& y) + { + return x = x * y; + } + + HIPTENSOR_HALF_OP_ATTR inline hfloat16_t& operator/=(hfloat16_t& x, const hfloat16_t& y) + { + return x = x / y; + } + +#endif // !HIPTENSOR_NO_HALF +} // namespace hiptensor + +namespace std +{ + /////////////////////////////////////////////////////////// + ////////// std::ostream::operator<<(float16_t) ////////// + /////////////////////////////////////////////////////////// + + inline ostream& operator<<(ostream& stream, hiptensor::float16_t const& val) + { + return stream << static_cast(val); + } + + /////////////////////////////////////////////////////////// + ////////// std::ostream::operator<<(hfloat16_t) ///////// + /////////////////////////////////////////////////////////// +#if !HIPTENSOR_NO_HALF + inline ostream& operator<<(ostream& stream, hiptensor::hfloat16_t const& val) + { + return stream << __half2float(val); + } +#endif // !HIPTENSOR_NO_HALF +} // namespace std + +#endif // HIPTENSOR_TYPES_EXT_HPP diff --git a/library/src/include/xfloat32.hpp b/library/src/include/xfloat32.hpp new file mode 100644 index 00000000..6e9168cf --- /dev/null +++ b/library/src/include/xfloat32.hpp @@ -0,0 +1,334 @@ +/* ************************************************************************ + * Copyright (C) 2016-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- + * ies of the Software, and to permit persons to whom the Software is furnished + * to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- + * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- + * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ************************************************************************ */ + +/*!\file + * \brief xfloat32.h provides struct for hiptensor_xfloat32 typedef + */ + +#ifndef HIPTENSOR_XFLOAT32_HPP +#define HIPTENSOR_XFLOAT32_HPP + +#if __cplusplus < 201103L || (!defined(__HCC__) && !defined(__HIPCC__)) + +// If this is a C compiler, C++ compiler below C++11, or a host-only compiler, we only +// include a minimal definition of hiptensor_xfloat32 + +#include +typedef struct +{ + float data; +} hiptensor_xfloat32; + +#else // __cplusplus < 201103L || (!defined(__HCC__) && !defined(__HIPCC__)) + +#include +#include +#include +#include +#include +#include + +#include "config.hpp" + +struct hiptensor_xfloat32 +{ + float data; + + enum round_t + { + round_up + }; + + HIPTENSOR_HOST_DEVICE hiptensor_xfloat32() = default; + + // round upper 19 bits of IEEE float to convert to xfloat32 + explicit HIPTENSOR_HOST_DEVICE hiptensor_xfloat32(float f, round_t) + : data(float_to_xfloat32(f)) + { + } + + explicit HIPTENSOR_HOST_DEVICE hiptensor_xfloat32(float f) + : data(truncate_float_to_xfloat32(f)) + { + } + + // zero extend lower 13 bits of xfloat32 to convert to IEEE float + HIPTENSOR_HOST_DEVICE operator float() const + { + return data; + } + + explicit HIPTENSOR_HOST_DEVICE operator bool() const + { + union + { + float fp32; + uint32_t int32; + } u = {data}; + return u.int32 & 0x7fffe000; + } + + explicit HIPTENSOR_HOST_DEVICE operator uint32_t() const + { + return uint32_t(float(*this)); + } + + explicit HIPTENSOR_HOST_DEVICE operator long() const + { + return long(float(*this)); + } + + explicit HIPTENSOR_HOST_DEVICE operator double() const + { + return double(float(*this)); + } + +private: + static HIPTENSOR_HOST_DEVICE float float_to_xfloat32(float f) + { + union + { + float fp32; + uint32_t int32; + } u = {f}; + if(~u.int32 & 0x7f800000) + { + // When the exponent bits are not all 1s, then the value is zero, normal, + // or subnormal. We round the xfloat32 mantissa up by adding 0xFFF, plus + // 1 if the least significant bit of the xfloat32 mantissa is 1 (odd). + // This causes the xfloat32's mantissa to be incremented by 1 if the 13 + // least significant bits of the float mantissa are greater than 0x1000, + // or if they are equal to 0x1000 and the least significant bit of the + // xfloat32 mantissa is 1 (odd). This causes it to be rounded to even when + // the lower 13 bits are exactly 0x1000. If the xfloat32 mantissa already + // has the value 0x3ff, then incrementing it causes it to become 0x00 and + // the exponent is incremented by one, which is the next higher FP value + // to the unrounded xfloat32 value. When the xfloat32 value is subnormal + // with an exponent of 0x00 and a mantissa of 0x3FF, it may be rounded up + // to a normal value with an exponent of 0x01 and a mantissa of 0x00. + // When the xfloat32 value has an exponent of 0xFE and a mantissa of 0x3FF, + // incrementing it causes it to become an exponent of 0xFF and a mantissa + // of 0x00, which is Inf, the next higher value to the unrounded value. + + u.int32 += 0xfff + ((u.int32 >> 13) & 1); // Round to nearest, round to even + } + else if(u.int32 & 0x1fff) + { + // When all of the exponent bits are 1, the value is Inf or NaN. + // Inf is indicated by a zero mantissa. NaN is indicated by any nonzero + // mantissa bit. Quiet NaN is indicated by the most significant mantissa + // bit being 1. Signaling NaN is indicated by the most significant + // mantissa bit being 0 but some other bit(s) being 1. If any of the + // lower 13 bits of the mantissa are 1, we set the least significant bit + // of the xfloat32 mantissa, in order to preserve signaling NaN in case + // the xfloat32's mantissa bits are all 0. + u.int32 |= 0x2000; // Preserve signaling NaN + } + + u.int32 &= 0xffffe000; + return u.fp32; + } + + // Truncate instead of rounding + static HIPTENSOR_HOST_DEVICE float truncate_float_to_xfloat32(float f) + { + union + { + float fp32; + uint32_t int32; + } u = {f}; + + u.int32 = u.int32 & 0xffffe000; + return u.fp32; + } +}; + +typedef struct +{ + float data; +} hiptensor_xfloat32_public; + +static_assert(std::is_standard_layout{}, + "hiptensor_xfloat32 is not a standard layout type, and thus is " + "incompatible with C."); + +static_assert(std::is_trivial{}, + "hiptensor_xfloat32 is not a trivial type, and thus is " + "incompatible with C."); + +static_assert(sizeof(hiptensor_xfloat32) == sizeof(hiptensor_xfloat32_public) + && offsetof(hiptensor_xfloat32, data) + == offsetof(hiptensor_xfloat32_public, data), + "internal hiptensor_xfloat32 does not match public hiptensor_xfloat32"); + +inline std::ostream& operator<<(std::ostream& os, const hiptensor_xfloat32& xf32) +{ + return os << float(xf32); +} + +inline HIPTENSOR_HOST_DEVICE hiptensor_xfloat32 operator+(hiptensor_xfloat32 a) +{ + return a; +} +inline HIPTENSOR_HOST_DEVICE hiptensor_xfloat32 operator-(hiptensor_xfloat32 a) +{ + union + { + float fp32; + uint32_t int32; + } u = {a.data}; + u.int32 ^= 0x80000000; + return hiptensor_xfloat32(u.fp32); +} +inline HIPTENSOR_HOST_DEVICE hiptensor_xfloat32 operator+(hiptensor_xfloat32 a, + hiptensor_xfloat32 b) +{ + return hiptensor_xfloat32(float(a) + float(b)); +} +inline HIPTENSOR_HOST_DEVICE hiptensor_xfloat32 operator-(hiptensor_xfloat32 a, + hiptensor_xfloat32 b) +{ + return hiptensor_xfloat32(float(a) - float(b)); +} +inline HIPTENSOR_HOST_DEVICE hiptensor_xfloat32 operator*(hiptensor_xfloat32 a, + hiptensor_xfloat32 b) +{ + return hiptensor_xfloat32(float(a) * float(b)); +} +inline HIPTENSOR_HOST_DEVICE hiptensor_xfloat32 operator/(hiptensor_xfloat32 a, + hiptensor_xfloat32 b) +{ + return hiptensor_xfloat32(float(a) / float(b)); +} +inline HIPTENSOR_HOST_DEVICE bool operator<(hiptensor_xfloat32 a, hiptensor_xfloat32 b) +{ + return float(a) < float(b); +} +inline HIPTENSOR_HOST_DEVICE bool operator==(hiptensor_xfloat32 a, hiptensor_xfloat32 b) +{ + return float(a) == float(b); +} +inline HIPTENSOR_HOST_DEVICE bool operator>(hiptensor_xfloat32 a, hiptensor_xfloat32 b) +{ + return b < a; +} +inline HIPTENSOR_HOST_DEVICE bool operator<=(hiptensor_xfloat32 a, hiptensor_xfloat32 b) +{ + return !(a > b); +} +inline HIPTENSOR_HOST_DEVICE bool operator!=(hiptensor_xfloat32 a, hiptensor_xfloat32 b) +{ + return !(a == b); +} +inline HIPTENSOR_HOST_DEVICE bool operator>=(hiptensor_xfloat32 a, hiptensor_xfloat32 b) +{ + return !(a < b); +} +inline HIPTENSOR_HOST_DEVICE hiptensor_xfloat32& operator+=(hiptensor_xfloat32& a, + hiptensor_xfloat32 b) +{ + return a = a + b; +} +inline HIPTENSOR_HOST_DEVICE hiptensor_xfloat32& operator-=(hiptensor_xfloat32& a, + hiptensor_xfloat32 b) +{ + return a = a - b; +} +inline HIPTENSOR_HOST_DEVICE hiptensor_xfloat32& operator*=(hiptensor_xfloat32& a, + hiptensor_xfloat32 b) +{ + return a = a * b; +} +inline HIPTENSOR_HOST_DEVICE hiptensor_xfloat32& operator/=(hiptensor_xfloat32& a, + hiptensor_xfloat32 b) +{ + return a = a / b; +} +inline HIPTENSOR_HOST_DEVICE hiptensor_xfloat32& operator++(hiptensor_xfloat32& a) +{ + return a += hiptensor_xfloat32(1.0f); +} +inline HIPTENSOR_HOST_DEVICE hiptensor_xfloat32& operator--(hiptensor_xfloat32& a) +{ + return a -= hiptensor_xfloat32(1.0f); +} +inline HIPTENSOR_HOST_DEVICE hiptensor_xfloat32 operator++(hiptensor_xfloat32& a, int) +{ + hiptensor_xfloat32 orig = a; + ++a; + return orig; +} +inline HIPTENSOR_HOST_DEVICE hiptensor_xfloat32 operator--(hiptensor_xfloat32& a, int) +{ + hiptensor_xfloat32 orig = a; + --a; + return orig; +} + +namespace std +{ + constexpr HIPTENSOR_HOST_DEVICE bool isinf(hiptensor_xfloat32 a) + { + union + { + float fp32; + uint32_t int32; + } u = {a.data}; + return !(~u.int32 & 0x7f800000) && !(u.int32 & 0x7fe000); + } + constexpr HIPTENSOR_HOST_DEVICE bool isnan(hiptensor_xfloat32 a) + { + union + { + float fp32; + uint32_t int32; + } u = {a.data}; + return !(~u.int32 & 0x7f800000) && +(u.int32 & 0x7fe000); + } + constexpr HIPTENSOR_HOST_DEVICE bool iszero(hiptensor_xfloat32 a) + { + union + { + float fp32; + uint32_t int32; + } u = {a.data}; + return (u.fp32 == 0.0f); + } + + HIPTENSOR_HOST_DEVICE inline hiptensor_xfloat32 sin(hiptensor_xfloat32 a) + { + return hiptensor_xfloat32(sinf(float(a))); + } + HIPTENSOR_HOST_DEVICE inline hiptensor_xfloat32 cos(hiptensor_xfloat32 a) + { + return hiptensor_xfloat32(cosf(float(a))); + } + + HIPTENSOR_HOST_DEVICE constexpr hiptensor_xfloat32 real(const hiptensor_xfloat32& a) + { + return a; + } +} + +#endif // __cplusplus < 201103L || (!defined(__HCC__) && !defined(__HIPCC__)) + +#endif // HIPTENSOR_XFLOAT32_HPP diff --git a/library/src/permutation/permutation_ck_impl.hpp b/library/src/permutation/permutation_ck_impl.hpp index 7878e908..0c73ccff 100644 --- a/library/src/permutation/permutation_ck_impl.hpp +++ b/library/src/permutation/permutation_ck_impl.hpp @@ -32,8 +32,8 @@ #include #include +#include "data_types.hpp" #include "performance.hpp" -#include "types.hpp" namespace hiptensor { diff --git a/library/src/permutation/permutation_cpu_reference_impl.hpp b/library/src/permutation/permutation_cpu_reference_impl.hpp index d64147fe..c1d4a3af 100644 --- a/library/src/permutation/permutation_cpu_reference_impl.hpp +++ b/library/src/permutation/permutation_cpu_reference_impl.hpp @@ -29,8 +29,8 @@ #include #include +#include "data_types.hpp" #include "permutation_cpu_reference.hpp" -#include "types.hpp" #include "util.hpp" namespace hiptensor diff --git a/test/01_contraction/contraction_test.cpp b/test/01_contraction/contraction_test.cpp index 4ad97610..5d745d12 100644 --- a/test/01_contraction/contraction_test.cpp +++ b/test/01_contraction/contraction_test.cpp @@ -25,7 +25,7 @@ *******************************************************************************/ #include -#include "types.hpp" +#include "data_types.hpp" #include "llvm/hiptensor_options.hpp" #include "contraction/contraction_cpu_reference.hpp" diff --git a/test/02_permutation/permutation_resource.cpp b/test/02_permutation/permutation_resource.cpp index 4323e01d..1f448ff8 100644 --- a/test/02_permutation/permutation_resource.cpp +++ b/test/02_permutation/permutation_resource.cpp @@ -28,7 +28,7 @@ #define HIPTENSOR_PERMUTATION_RESOURCE_IMPL_HPP #include "permutation_resource.hpp" -#include "types.hpp" +#include "data_types.hpp" #include "utils.hpp" namespace hiptensor diff --git a/test/02_permutation/permutation_test.cpp b/test/02_permutation/permutation_test.cpp index dbb52d6e..cfadf5c0 100644 --- a/test/02_permutation/permutation_test.cpp +++ b/test/02_permutation/permutation_test.cpp @@ -25,10 +25,10 @@ *******************************************************************************/ #include +#include "data_types.hpp" #include "logger.hpp" #include "permutation/permutation_cpu_reference.hpp" #include "permutation_test.hpp" -#include "types.hpp" #include "utils.hpp" #include "llvm/hiptensor_options.hpp" diff --git a/test/utils.hpp b/test/utils.hpp index 67418568..1f7ece44 100644 --- a/test/utils.hpp +++ b/test/utils.hpp @@ -43,6 +43,7 @@ #include #include "device/common.hpp" +#include "types.hpp" #define HIPTENSOR_FREE_DEVICE(ptr) \ if(ptr != nullptr) \ @@ -209,19 +210,6 @@ std::pair compareEqual(DDataType const* deviceD, return std::make_pair(retval, max_relative_error); } -template -double getEpsilon() -{ - if(std::is_same_v) - { - return 0.0009765625; // numeric_limits<_Float16>::epsilon() => 0 - } - else - { - return std::numeric_limits::epsilon(); - } -}; - template std::pair compareEqualLaunchKernel(DDataType* deviceD, DDataType* hostD, @@ -288,7 +276,7 @@ std::pair compareEqualLaunchKernel(DDataType* deviceD, auto toDouble = [](DDataType const& val) { return static_cast(static_cast(val)); }; - auto eps = getEpsilon(); + auto eps = toDouble(std::numeric_limits::epsilon()); if(isNaN) { retval = false;