Skip to content

Commit

Permalink
Implement permutation with CK
Browse files Browse the repository at this point in the history
- implemented `hiptensorPermutation` with CK
- added unit tests for `hiptensorPermutation`
  • Loading branch information
CongMa13 committed Nov 13, 2023
1 parent 60f4b90 commit bf1c7ee
Show file tree
Hide file tree
Showing 25 changed files with 1,337 additions and 48 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ hiptensor_version.hpp
hiptensor-version.hpp

# Generated source file
test/01_contraction/configs/*.hpp
test/*/configs/*.hpp

# Precompiled Headers
*.gch
Expand Down
5 changes: 5 additions & 0 deletions library/include/hiptensor/internal/hiptensor_utility.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,11 @@ namespace std

return os;
}
static ostream& operator<<(ostream& os, const _Float16 value)
{
os << static_cast<float>(value);
return os;
}
}

#endif // HIPTENSOR_UTILITY_INTERNAL_HPP
3 changes: 2 additions & 1 deletion library/src/hiptensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,8 @@ hiptensorStatus_t hiptensorInitTensorDescriptor(const hiptensorHandle_t* han
return HIPTENSOR_STATUS_NOT_INITIALIZED;
}

if((lens == nullptr) || ((dataType != HIP_R_32F) && (dataType != HIP_R_64F))
if((lens == nullptr)
|| ((dataType != HIP_R_16F) && (dataType != HIP_R_32F) && (dataType != HIP_R_64F))
|| unaryOp != HIPTENSOR_OP_IDENTITY)
{
auto errorCode = HIPTENSOR_STATUS_INVALID_VALUE;
Expand Down
131 changes: 125 additions & 6 deletions library/src/permutation/hiptensor_permutation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
*******************************************************************************/
#include <hiptensor/hiptensor.hpp>

#include "permutation_cpu_reference.hpp"
#include "logger.hpp"
#include "permutation_ck_col.hpp"

hiptensorStatus_t hiptensorPermutation(const hiptensorHandle_t* handle,
const void* alpha,
Expand All @@ -38,15 +39,133 @@ hiptensorStatus_t hiptensorPermutation(const hiptensorHandle_t* handle
const hipDataType typeScalar,
const hipStream_t stream)
{
assert(descA->mType == HIP_R_16F || descA->mType == HIP_R_32F);
assert(descA->mType == descB->mType);
using hiptensor::Logger;
auto& logger = Logger::instance();

// Log API access
char msg[2048];
snprintf(msg,
sizeof(msg),
"handle=%p, alpha=%p, A=%p, descA=%p, modeA=%p, B=%p, descB=%p, modeB=%p, "
"typeScalar=0x%02X, stream=%p",
handle,
alpha,
A,
descA,
modeA,
B,
descB,
modeB,
(unsigned int)typeScalar,
stream);

logger->logAPITrace("hiptensorPermutation", msg);

if(!handle || !alpha || !A || !descA || !modeA || !B || !descB || !modeB)
{
auto errorCode = HIPTENSOR_STATUS_NOT_INITIALIZED;
auto printErrorMessage = [&logger, errorCode](const std::string& paramName) {
char msg[512];
snprintf(msg,
sizeof(msg),
"Initialization Error : %s = nullptr (%s)",
paramName.c_str(),
hiptensorGetErrorString(errorCode));
logger->logError("hiptensorPermutation", msg);
};
if(!handle)
{
printErrorMessage("handle");
}
if(!alpha)
{
printErrorMessage("alpha");
}
if(!A)
{
printErrorMessage("A");
}
if(!descA)
{
printErrorMessage("descA");
}
if(!modeA)
{
printErrorMessage("modeA");
}
if(!B)
{
printErrorMessage("B");
}
if(!descB)
{
printErrorMessage("descB");
}
if(!modeB)
{
printErrorMessage("modeB");
}
return errorCode;
}

if(descA->mType != HIP_R_16F && descA->mType != HIP_R_32F)
{
auto errorCode = HIPTENSOR_STATUS_NOT_SUPPORTED;
snprintf(msg,
sizeof(msg),
"Unsupported Data Type Error : The supported data types of A and B are HIP_R_16F "
"and HIP_R_32F (%s)",
hiptensorGetErrorString(errorCode));
logger->logError("hiptensorPermutation", msg);
return errorCode;
}

if(descA->mType != descB->mType)
{
auto errorCode = HIPTENSOR_STATUS_INVALID_VALUE;
snprintf(msg,
sizeof(msg),
"Mismatched Data Type Error : Data types of A and B are not the same. (%s)",
hiptensorGetErrorString(errorCode));
logger->logError("hiptensorPermutation", msg);
return errorCode;
}

if(typeScalar != HIP_R_16F && typeScalar != HIP_R_32F)
{
auto errorCode = HIPTENSOR_STATUS_NOT_SUPPORTED;
snprintf(msg,
sizeof(msg),
"Unsupported Data Type Error : The supported data types of alpha are HIP_R_16F "
"and HIP_R_32F (%s)",
hiptensorGetErrorString(errorCode));
logger->logError("hiptensorPermutation", msg);
return errorCode;
}

if(descA->mType == HIP_R_16F)
{
return hiptensor::detail::permuteByCpu(alpha, static_cast<const _Float16 *>(A), descA, modeA, static_cast<_Float16 *>(B), descB, modeB, typeScalar);
return hiptensor::detail::permuteByCk(alpha,
static_cast<const _Float16*>(A),
descA,
modeA,
static_cast<_Float16*>(B),
descB,
modeB,
typeScalar,
stream);
}
else if(descA->mType == HIP_R_32F)
{
return hiptensor::detail::permuteByCpu(alpha, static_cast<const float *>(A), descA, modeA, static_cast<float *>(B), descB, modeB, typeScalar);
return hiptensor::detail::permuteByCk(alpha,
static_cast<const float*>(A),
descA,
modeA,
static_cast<float*>(B),
descB,
modeB,
typeScalar,
stream);
}
return HIPTENSOR_STATUS_NOT_SUPPORTED;
return HIPTENSOR_STATUS_NOT_SUPPORTED;
}
48 changes: 48 additions & 0 deletions library/src/permutation/permutation_ck_col.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*******************************************************************************
*
* MIT License
*
* Copyright (C) 2023-2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*******************************************************************************/
#ifndef HIPTENSOR_PERMUTATION_CK_COL_HPP
#define HIPTENSOR_PERMUTATION_CK_COL_HPP
#include <hiptensor/hiptensor.hpp>

namespace hiptensor
{
namespace detail
{
template <typename DataType>
hiptensorStatus_t permuteByCk(const void* alpha,
const DataType* A,
const hiptensorTensorDescriptor_t* descA,
const int32_t modeA[],
DataType* B,
const hiptensorTensorDescriptor_t* descB,
const int32_t modeB[],
const hipDataType typeScalar);

}
}

#include "permutation_ck_col_impl.hpp"
#endif // HIPTENSOR_PERMUTATION_CK_COL_HPP
123 changes: 123 additions & 0 deletions library/src/permutation/permutation_ck_col_impl.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
/*******************************************************************************
*
* MIT License
*
* Copyright (C) 2023-2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*******************************************************************************/
#ifndef HIPTENSOR_PERMUTATION_CK_COL_IMPL_HPP
#define HIPTENSOR_PERMUTATION_CK_COL_IMPL_HPP
#include <cstdlib>

#include <ck/ck.hpp>
#include <ck/tensor_operation/gpu/device/impl/device_elementwise_scale_impl.hpp>
#include <ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp>

#include "types.hpp"

namespace hiptensor
{
namespace detail
{
template <typename DataType>
hiptensorStatus_t permuteByCk(const void* alpha,
const DataType* A,
const hiptensorTensorDescriptor_t* descA,
const int32_t modeA[],
DataType* B,
const hiptensorTensorDescriptor_t* descB,
const int32_t modeB[],
const hipDataType typeScalar,
const hipStream_t stream)
{
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
using UnaryOp = ck::tensor_operation::element_wise::PassThrough;
using Scale = ck::tensor_operation::element_wise::Scale;
using DeviceElementwisePermuteInstance
= ck::tensor_operation::device::DeviceElementwiseImpl<
ck::Tuple<DataType>, // InDataTypeTuple
ck::Tuple<DataType>, // OutDataTypeTuple
PassThrough, // ElementwiseOp
UnaryOp, // UnaryOp
Scale, // Scalar
4, // NumDim
1, // MPerThread
ck::Sequence<1>, // InScalarPerVectorSeq
ck::Sequence<1>>; // OutScalarPerVectorSeq

const auto modeSize = descA->mLengths.size();
assert(modeSize == 4);

std::unordered_map<int32_t, int32_t>
modeToLength; // for example {'n': 1, 'c': 2, 'w': 3, 'h':0}

for(int32_t index = 0; index < modeSize; index++)
{
modeToLength[modeA[index]] = descA->mLengths[index];
}

std::unordered_map<int32_t, int32_t> bModeToStrides;
int32_t stride = 1;
bModeToStrides[modeB[0]] = stride;
for(int32_t index = 1; index < modeSize; index++)
{
stride *= modeToLength[modeB[index - 1]];
bModeToStrides[modeB[index]] = stride;
}

float alphaValue = readVal<float>(alpha, typeScalar);
std::array<const void*, 1> input = {A};
std::array<void*, 1> output = {B};
std::array<ck::index_t, 4> a_strides
= {1,
modeToLength[modeA[0]],
modeToLength[modeA[0]] * modeToLength[modeA[1]],
modeToLength[modeA[0]] * modeToLength[modeA[1]] * modeToLength[modeA[2]]};
std::array<ck::index_t, 4> b_strides = {bModeToStrides[modeA[0]],
bModeToStrides[modeA[1]],
bModeToStrides[modeA[2]],
bModeToStrides[modeA[3]]};
std::array<ck::index_t, 4> ab_lengths = {modeToLength[modeA[0]],
modeToLength[modeA[1]],
modeToLength[modeA[2]],
modeToLength[modeA[3]]};
auto broadcastPermute = DeviceElementwisePermuteInstance{};
auto argument = broadcastPermute.MakeArgumentPointer(ab_lengths,
{a_strides},
{b_strides},
input,
output,
PassThrough{},
UnaryOp{},
Scale{alphaValue});

if(!broadcastPermute.IsSupportedArgument(argument.get()))
{
return HIPTENSOR_STATUS_NOT_SUPPORTED;
};

auto broadcastPermute_invoker_ptr = broadcastPermute.MakeInvokerPointer();
broadcastPermute_invoker_ptr->Run(argument.get(), StreamConfig{stream, false});
return HIPTENSOR_STATUS_SUCCESS;
}
}
}
#endif // HIPTENSOR_PERMUTATION_CK_COL_IMPL_HPP
3 changes: 0 additions & 3 deletions library/src/permutation/permutation_cpu_reference.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,6 @@

#ifndef HIPTENSOR_PERMUTATION_CPU_REFERENCE_HPP
#define HIPTENSOR_PERMUTATION_CPU_REFERENCE_HPP

#include <hip/library_types.h>

#include <hiptensor/hiptensor.hpp>
namespace hiptensor
{
Expand Down
9 changes: 5 additions & 4 deletions library/src/permutation/permutation_cpu_reference_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
#ifndef HIPTENSOR_PERMUTATION_CPU_REFERENCE_IMPL_HPP
#define HIPTENSOR_PERMUTATION_CPU_REFERENCE_IMPL_HPP
#include <hiptensor/hiptensor.hpp>
#include <vector>
#include <unordered_map>
#include <vector>

#include "permutation_cpu_reference.hpp"
#include "types.hpp"
Expand Down Expand Up @@ -63,8 +63,9 @@ namespace hiptensor
{
bStrides[i] = descB->mLengths[i - 1] * bStrides[i - 1];
}
auto bIndices = std::vector<int32_t>(modeSize, 0);
auto elementCount = hiptensor::elementsFromLengths(aLens);
auto bIndices = std::vector<int32_t>(modeSize, 0);
auto elementCount = hiptensor::elementsFromLengths(aLens);
float alphaValue = readVal<float>(alpha, typeScalar);
for(int elementIndex = 0; elementIndex < elementCount; elementIndex++)
{
auto index = elementIndex;
Expand All @@ -75,7 +76,7 @@ namespace hiptensor
}
auto bOffset
= std::inner_product(bIndices.begin(), bIndices.end(), bStrides.begin(), 0);
B[bOffset] = A[elementIndex];
B[bOffset] = static_cast<DataType>(A[elementIndex] * alphaValue);
}

return HIPTENSOR_STATUS_SUCCESS;
Expand Down
Loading

0 comments on commit bf1c7ee

Please sign in to comment.