From d14242f7154159ee7d2d2a1d828931f3f24bec9f Mon Sep 17 00:00:00 2001 From: atamazov Date: Thu, 5 Oct 2023 01:17:02 +0300 Subject: [PATCH 1/8] rocblas-remove-int8x4-uses(01) Removed support of INT8x4 type from the library (except miopen.h, some tests and driver) --- include/miopen/miopen.h | 11 ++-- src/check_numerics.cpp | 2 +- src/convolution.cpp | 2 +- src/gemm_v2.cpp | 52 ++++++------------- src/hip/batched_transpose_sol.cpp | 6 ++- src/include/miopen/datatype.hpp | 12 ++--- src/include/miopen/tensor.hpp | 4 +- src/include/miopen/visit_float.hpp | 2 +- src/kernels/MIOpenIm2d2Col.cl | 6 --- src/kernels/MIOpenIm3d2Col.cl | 6 --- .../MIOpenSubTensorOpWithScalarKernel.cl | 6 +-- .../MIOpenSubTensorOpWithSubTensorKernel.cl | 6 +-- .../MIOpenSubTensorOpWithTransformKernel.cl | 6 +-- src/kernels/MIOpenUtilKernels4.cl | 6 --- src/ocl/convolutionocl.cpp | 8 +-- src/ocl/tensorocl.cpp | 42 ++++++--------- src/ocl/utilocl.cpp | 33 +++--------- src/pooling_api.cpp | 2 +- src/reducetensor.cpp | 7 ++- src/reducetensor_api.cpp | 2 +- src/solver/batchnorm/forward_inference_ck.cpp | 4 +- .../conv_ck_igemm_fwd_bias_activ_fused.cpp | 8 +-- src/solver/conv_direct_naive_conv.cpp | 2 +- ...ip_implicit_gemm_3d_grouped_bwd_xdlops.cpp | 8 +-- ...ip_implicit_gemm_3d_grouped_fwd_xdlops.cpp | 8 +-- ...ip_implicit_gemm_3d_grouped_wrw_xdlops.cpp | 8 +-- ...conv_hip_implicit_gemm_bwd_data_xdlops.cpp | 8 +-- .../conv_hip_implicit_gemm_fwd_xdlops.cpp | 8 +-- ...v_hip_implicit_gemm_grouped_fwd_xdlops.cpp | 8 +-- src/solver/gemm.cpp | 50 ++++++------------ src/solver/mlir_common.cpp | 2 +- src/tensor.cpp | 2 +- test/conv_common.hpp | 33 +++++------- test/driver.hpp | 4 +- test/gpu_reference_kernel.cpp | 2 - test/gtest/conv_embed_db.cpp | 4 +- test/gtest/conv_hip_igemm_xdlops.cpp | 4 +- test/gtest/conv_igemm_dynamic.cpp | 4 +- test/gtest/conv_igemm_mlir.cpp | 4 +- test/gtest/conv_igemm_mlir_xdlops.cpp | 4 +- test/gtest/conv_trans.cpp | 4 +- test/gtest/db_sync.cpp | 2 +- test/tensor_holder.hpp | 7 +-- test/tensor_transform.cpp | 2 +- 44 files changed, 150 insertions(+), 261 deletions(-) diff --git a/include/miopen/miopen.h b/include/miopen/miopen.h index a59181acf3..7a85ec5875 100644 --- a/include/miopen/miopen.h +++ b/include/miopen/miopen.h @@ -346,12 +346,11 @@ MIOPEN_DECLARE_OBJECT(miopenReduceTensorDescriptor); */ typedef enum { - miopenHalf = 0, /*!< 16-bit floating point (Fully supported) */ - miopenFloat = 1, /*!< 32-bit floating point (Fully supported) */ - miopenInt32 = 2, /*!< 32-bit int point (Partially supported) */ - miopenInt8 = 3, /*!< 8-bit int point (Partially supported) */ - miopenInt8x4 = - 4, /*!< Pack of four 8-bit int points in NCHW_VECT_C format (Partially supported) */ + miopenHalf = 0, /*!< 16-bit floating point (Fully supported) */ + miopenFloat = 1, /*!< 32-bit floating point (Fully supported) */ + miopenInt32 = 2, /*!< 32-bit int point (Partially supported) */ + miopenInt8 = 3, /*!< 8-bit int point (Partially supported) */ + miopenInt8x4 = 4, /*!< Pack of four Int8 in NCHW_VECT_C format (Support discontinued) */ miopenBFloat16 = 5, /*!< 16-bit binary floating point (8-bit exponent, 7-bit fraction) (Partially supported) */ miopenDouble = 6, /*!< 64-bit floating point (Partially supported) */ diff --git a/src/check_numerics.cpp b/src/check_numerics.cpp index b0882995e9..50cb0af4af 100644 --- a/src/check_numerics.cpp +++ b/src/check_numerics.cpp @@ -64,7 +64,7 @@ std::string GetKernelName(miopenDataType_t data_type) case miopenBFloat8: return {"check_numerics_bf8"}; case miopenInt32: case miopenInt8: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenDouble: default: return {""}; } diff --git a/src/convolution.cpp b/src/convolution.cpp index 403ff777cd..ac7c28fdc4 100644 --- a/src/convolution.cpp +++ b/src/convolution.cpp @@ -358,7 +358,7 @@ ConvolutionDescriptor::GetForwardOutputTensorWithLayout(const TensorDescriptor& std::vector out_strides; tensor_layout_to_strides( out_lens, default_layout, yLayout, xDesc.GetVectorLength(), out_strides); - return {(xDesc.GetType() == miopenInt8 || xDesc.GetType() == miopenInt8x4 + return {(xDesc.GetType() == miopenInt8 ? (yType) : xDesc.GetType()), // TODO: This function overrides the output type with // essentially the input which is incorrect. diff --git a/src/gemm_v2.cpp b/src/gemm_v2.cpp index 804587aac0..35add906a1 100644 --- a/src/gemm_v2.cpp +++ b/src/gemm_v2.cpp @@ -62,10 +62,6 @@ /// "disabled expansion of recursive macro" injected by rocblas headers. #define AVOID_ROCBLAS_WRAPPERS_204 (MIOPEN_ROCBLAS_VERSION_FLAT >= 2004000) -/// Maintain API compatibility with various rocBLAS version -#define USE_GEMM_FLAGS_PACK_INT8X4 \ - ((MIOPEN_ROCBLAS_VERSION_FLAT >= 2038000) && (MIOPEN_ROCBLAS_VERSION_FLAT < 4000000)) - /// Maintain API compatibility for versions not supporting FP16 alternate implementations #define USE_GEMM_FLAGS_FP16_ALT_IMPL (MIOPEN_ROCBLAS_VERSION_FLAT >= 2043000) /// Some 2.42 versions have rocblas_gemm_flags_fp16_alt_impl, but @@ -109,7 +105,7 @@ static inline rocblas_datatype rocBlasComputeType(const miopen::GemmDescriptor& { // Complex compute types are only supported in newer version of the API assert(desc.dataType == desc.a_cast_type && desc.dataType == desc.b_cast_type); - if(desc.dataType == miopenInt8 || desc.dataType == miopenInt8x4) + if(desc.dataType == miopenInt8) return rocblas_datatype::rocblas_datatype_i32_r; else return rocblas_datatype::rocblas_datatype_f32_r; @@ -439,7 +435,6 @@ miopenStatus_t CallGemm(const Handle& handle, switch(gemm_desc.dataType) { - case miopenInt8x4: case miopenInt8: { assert(gemm_desc.k % 4 == 0); @@ -471,12 +466,7 @@ miopenStatus_t CallGemm(const Handle& handle, rocBlasComputeType(gemm_desc), // rocblas_datatype::rocblas_datatype_i32_r, rocblas_gemm_algo::rocblas_gemm_algo_standard, 0, -#if USE_GEMM_FLAGS_PACK_INT8X4 - rocblas_gemm_flags_pack_int8x4 -#else - 0 -#endif - ); + 0); } break; case miopenInt32: break; @@ -620,9 +610,9 @@ miopenStatus_t CallGemm(const Handle& handle, }; break; + case miopenInt8x4: case miopenDouble: { - MIOPEN_THROW(miopenStatusBadParm, - "miopenDouble data type not supported by MIOpenGEMM."); + MIOPEN_THROW(miopenStatusBadParm, "Unknown or unsupported data type."); }; break; } @@ -692,7 +682,6 @@ miopenStatus_t CallGemmStridedBatched(const Handle& handle, switch(gemm_desc.dataType) { - case miopenInt8x4: case miopenInt8: { assert(gemm_desc.k % 4 == 0); @@ -728,12 +717,7 @@ miopenStatus_t CallGemmStridedBatched(const Handle& handle, rocblas_datatype::rocblas_datatype_i32_r, rocblas_gemm_algo::rocblas_gemm_algo_standard, 0, -#if USE_GEMM_FLAGS_PACK_INT8X4 - rocblas_gemm_flags_pack_int8x4 -#else - 0 -#endif - ); + 0); } break; case miopenInt32: break; @@ -892,10 +876,10 @@ miopenStatus_t CallGemmStridedBatched(const Handle& handle, break; } + case miopenInt8x4: case miopenDouble: { - MIOPEN_THROW(miopenStatusBadParm, - "miopenDouble data type not supported by MIOpenGEMM."); - } + MIOPEN_THROW(miopenStatusBadParm, "Unknown or unsupported data type."); + }; break; } @@ -967,7 +951,6 @@ miopenStatus_t CallGemmStridedBatchedSequential(const Handle& handle, switch(gemm_desc.dataType) { - case miopenInt8x4: case miopenInt8: { assert(gemm_desc.k % 4 == 0); @@ -1001,12 +984,7 @@ miopenStatus_t CallGemmStridedBatchedSequential(const Handle& handle, rocBlasComputeType(gemm_desc), // rocblas_datatype::rocblas_datatype_i32_r, rocblas_gemm_algo::rocblas_gemm_algo_standard, 0, -#if USE_GEMM_FLAGS_PACK_INT8X4 - rocblas_gemm_flags_pack_int8x4 -#else - 0 -#endif - ); + 0); } } break; @@ -1162,10 +1140,10 @@ miopenStatus_t CallGemmStridedBatchedSequential(const Handle& handle, break; } + case miopenInt8x4: case miopenDouble: { - MIOPEN_THROW(miopenStatusBadParm, - "miopenDouble data type not supported by MIOpenGEMM."); - } + MIOPEN_THROW(miopenStatusBadParm, "Unknown or unsupported data type."); + }; break; } @@ -1195,7 +1173,7 @@ GemmDescriptor CreateGemmDescriptorConvFwd(const TensorDescriptor& wDesc, { #ifndef NDEBUG assert(wDesc.GetType() == xDesc.GetType()); - if(wDesc.GetType() != miopenInt8 && wDesc.GetType() != miopenInt8x4) + if(wDesc.GetType() != miopenInt8) assert(wDesc.GetType() == yDesc.GetType()); #endif @@ -1350,7 +1328,7 @@ GemmDescriptor CreateGemmDescriptorConvCNHWFwd(const TensorDescriptor& wDesc, { #ifndef NDEBUG assert(wDesc.GetType() == xDesc.GetType()); - if(wDesc.GetType() != miopenInt8 && wDesc.GetType() != miopenInt8x4) + if(wDesc.GetType() != miopenInt8) assert(wDesc.GetType() == yDesc.GetType()); #endif @@ -1454,7 +1432,7 @@ GemmDescriptor CreateGemmStridedBatchedDescriptorConv1x1Fwd(const TensorDescript { #ifndef NDEBUG assert(wDesc.GetType() == xDesc.GetType()); - if(wDesc.GetType() != miopenInt8 && wDesc.GetType() != miopenInt8x4) + if(wDesc.GetType() != miopenInt8) assert(wDesc.GetType() == yDesc.GetType()); #else (void)yDesc; diff --git a/src/hip/batched_transpose_sol.cpp b/src/hip/batched_transpose_sol.cpp index 4ffcf34b77..01349775ca 100644 --- a/src/hip/batched_transpose_sol.cpp +++ b/src/hip/batched_transpose_sol.cpp @@ -322,7 +322,11 @@ BatchedTransposeSolution::BatchedTransposeSolution(const ExecutionContext& ctx, uint32_t width_) : data_type(data_type_), batch(batch_), height(height_), width(width_) { - if(data_type == miopenInt8x4 || data_type == miopenDouble) + if(!(data_type == miopenHalf // + || data_type == miopenFloat // + || data_type == miopenInt32 // + || data_type == miopenInt8 // + || data_type == miopenBFloat16)) MIOPEN_THROW("These data type are not supported"); num_cu = ctx.GetStream().GetMaxComputeUnits(); std::size_t data_size = miopen::GetTypeSize(data_type); diff --git a/src/include/miopen/datatype.hpp b/src/include/miopen/datatype.hpp index 485bdb3d67..29057e3de9 100644 --- a/src/include/miopen/datatype.hpp +++ b/src/include/miopen/datatype.hpp @@ -53,7 +53,10 @@ inline std::string GetDataType(miopenDataType_t type) type_str = "bfloat16"; } break; - case miopenInt8x4: + case miopenInt8x4: { + type_str = "UNSUPPORTED_TYPE"; + } + break; case miopenInt8: { type_str = "int8_t"; } @@ -137,7 +140,6 @@ inline KernelBuildParameters GetDataTypeKBP(miopenDataType_t type) int use_fp16x8 = 0; int use_fp32 = 0; int use_int8 = 0; - int use_int8x4 = 0; int use_int32 = 0; int use_bfp16 = 0; int use_fp64 = 0; @@ -150,15 +152,14 @@ inline KernelBuildParameters GetDataTypeKBP(miopenDataType_t type) case miopenHalf: use_fp16 = 1; break; case miopenFloat: use_fp32 = 1; break; case miopenInt8: use_int8 = 1; break; - case miopenInt8x4: use_int8x4 = 1; break; case miopenBFloat16: use_bfp16 = 1; break; case miopenInt32: use_int32 = 1; break; case miopenDouble: use_fp64 = 1; break; case miopenFloat8: use_fp8 = 1; break; case miopenBFloat8: use_bfp8 = 1; break; + case miopenInt8x4: // fallthrough default: - MIOPEN_THROW( - "Only float, half, bfloat16, int8, int8x4, float8, bfloat8 data type is supported."); + MIOPEN_THROW("Only float, half, bfloat16, int8, float8, bfloat8 data types are supported."); break; } @@ -168,7 +169,6 @@ inline KernelBuildParameters GetDataTypeKBP(miopenDataType_t type) {"MIOPEN_USE_FP16x8", use_fp16x8}, {"MIOPEN_USE_FP32", use_fp32}, {"MIOPEN_USE_INT8", use_int8}, - {"MIOPEN_USE_INT8x4", use_int8x4}, {"MIOPEN_USE_BFP16", use_bfp16}, {"MIOPEN_USE_INT32", use_int32}, {"MIOPEN_USE_RNE_BFLOAT16", use_rne_bfloat16}, diff --git a/src/include/miopen/tensor.hpp b/src/include/miopen/tensor.hpp index b8d72df67c..a133ab4adc 100644 --- a/src/include/miopen/tensor.hpp +++ b/src/include/miopen/tensor.hpp @@ -101,13 +101,13 @@ inline std::size_t GetTypeSize(miopenDataType_t d) case miopenFloat: return 4; case miopenHalf: case miopenBFloat16: return 2; - case miopenInt8x4: + case miopenInt8x4: break; case miopenInt8: case miopenFloat8: case miopenBFloat8: return 1; case miopenDouble: return 8; } - MIOPEN_THROW("Unknown data type"); + MIOPEN_THROW("Unknown or unsupported data type"); } template diff --git a/src/include/miopen/visit_float.hpp b/src/include/miopen/visit_float.hpp index d26afba9dd..35e1ae6ae7 100644 --- a/src/include/miopen/visit_float.hpp +++ b/src/include/miopen/visit_float.hpp @@ -79,7 +79,6 @@ void visit_float(miopenDataType_t t, F f) } case miopenFloat8: case miopenBFloat8: - case miopenInt8x4: case miopenInt8: { f(as_float{}); break; @@ -92,6 +91,7 @@ void visit_float(miopenDataType_t t, F f) f(as_float{}); break; } + case miopenInt8x4: MIOPEN_THROW("miopenInt8x4: Support discontinued."); } } diff --git a/src/kernels/MIOpenIm2d2Col.cl b/src/kernels/MIOpenIm2d2Col.cl index 7b1522db6f..852ccff955 100644 --- a/src/kernels/MIOpenIm2d2Col.cl +++ b/src/kernels/MIOpenIm2d2Col.cl @@ -40,10 +40,6 @@ #define MIOPEN_USE_INT8 0 #endif -#ifndef MIOPEN_USE_INT8x4 -#define MIOPEN_USE_INT8x4 0 -#endif - #ifndef MIOPEN_USE_INT32 #define MIOPEN_USE_INT32 0 #endif @@ -58,8 +54,6 @@ #if MIOPEN_USE_INT8 || MIOPEN_USE_FP8 || MIOPEN_USE_BFP8 typedef char data_t; -#elif MIOPEN_USE_INT8x4 -typedef uint data_t; #elif MIOPEN_USE_INT32 typedef int data_t; #elif(MIOPEN_USE_FP16 || MIOPEN_USE_BFP16) diff --git a/src/kernels/MIOpenIm3d2Col.cl b/src/kernels/MIOpenIm3d2Col.cl index 7ccb8363b7..5ee437a068 100644 --- a/src/kernels/MIOpenIm3d2Col.cl +++ b/src/kernels/MIOpenIm3d2Col.cl @@ -40,18 +40,12 @@ #define MIOPEN_USE_INT8 0 #endif -#ifndef MIOPEN_USE_INT8x4 -#define MIOPEN_USE_INT8x4 0 -#endif - #ifndef MIOPEN_USE_INT32 #define MIOPEN_USE_INT32 0 #endif #if MIOPEN_USE_INT8 typedef char data_t; -#elif MIOPEN_USE_INT8x4 -typedef uint data_t; #elif MIOPEN_USE_INT32 typedef int data_t; #elif(MIOPEN_USE_FP16 || MIOPEN_USE_BFP16) diff --git a/src/kernels/MIOpenSubTensorOpWithScalarKernel.cl b/src/kernels/MIOpenSubTensorOpWithScalarKernel.cl index 87fd82a7fd..dc8c96fc60 100644 --- a/src/kernels/MIOpenSubTensorOpWithScalarKernel.cl +++ b/src/kernels/MIOpenSubTensorOpWithScalarKernel.cl @@ -46,17 +46,13 @@ #define MIOPEN_USE_INT8 0 #endif -#ifndef MIOPEN_USE_INT8x4 -#define MIOPEN_USE_INT8x4 0 -#endif - #ifndef MIOPEN_USE_INT32 #define MIOPEN_USE_INT32 0 #endif #include "float_types.h" -#if MIOPEN_USE_INT8 == 1 || MIOPEN_USE_INT8x4 == 1 +#if MIOPEN_USE_INT8 == 1 #define _FLOAT char #endif diff --git a/src/kernels/MIOpenSubTensorOpWithSubTensorKernel.cl b/src/kernels/MIOpenSubTensorOpWithSubTensorKernel.cl index d0d1762a10..1de4843712 100644 --- a/src/kernels/MIOpenSubTensorOpWithSubTensorKernel.cl +++ b/src/kernels/MIOpenSubTensorOpWithSubTensorKernel.cl @@ -46,11 +46,7 @@ #define MIOPEN_USE_INT8 0 #endif -#ifndef MIOPEN_USE_INT8x4 -#define MIOPEN_USE_INT8x4 0 -#endif - -#if MIOPEN_USE_INT8 == 1 || MIOPEN_USE_INT8x4 == 1 +#if MIOPEN_USE_INT8 == 1 #define _FLOAT char #ifndef FLT_MAX #define MAX_VAL 127 /* max value */ diff --git a/src/kernels/MIOpenSubTensorOpWithTransformKernel.cl b/src/kernels/MIOpenSubTensorOpWithTransformKernel.cl index bc17f2ecf4..bd06f89626 100644 --- a/src/kernels/MIOpenSubTensorOpWithTransformKernel.cl +++ b/src/kernels/MIOpenSubTensorOpWithTransformKernel.cl @@ -43,11 +43,7 @@ #define MIOPEN_USE_INT8 0 #endif -#ifndef MIOPEN_USE_INT8x4 -#define MIOPEN_USE_INT8x4 0 -#endif - -#if MIOPEN_USE_INT8 == 1 || MIOPEN_USE_INT8x4 == 1 +#if MIOPEN_USE_INT8 == 1 #define _FLOAT char #ifndef FLT_MAX #define MAX_VAL 127 /* max value */ diff --git a/src/kernels/MIOpenUtilKernels4.cl b/src/kernels/MIOpenUtilKernels4.cl index 861e563012..09e6b73cff 100644 --- a/src/kernels/MIOpenUtilKernels4.cl +++ b/src/kernels/MIOpenUtilKernels4.cl @@ -40,10 +40,6 @@ #define MIOPEN_USE_INT8 0 #endif -#ifndef MIOPEN_USE_INT8x4 -#define MIOPEN_USE_INT8x4 0 -#endif - #ifndef MIOPEN_USE_INT32 #define MIOPEN_USE_INT32 0 #endif @@ -58,8 +54,6 @@ #if MIOPEN_USE_INT8 || MIOPEN_USE_FP8 || MIOPEN_USE_BFP8 typedef char data_t; -#elif MIOPEN_USE_INT8x4 -typedef uint data_t; #elif MIOPEN_USE_INT32 typedef int data_t; #elif(MIOPEN_USE_FP16 || MIOPEN_USE_BFP16) diff --git a/src/ocl/convolutionocl.cpp b/src/ocl/convolutionocl.cpp index 8c042e3e7b..94b083577d 100644 --- a/src/ocl/convolutionocl.cpp +++ b/src/ocl/convolutionocl.cpp @@ -296,8 +296,7 @@ void ValidateConvTensors(const ConvTensors& tensors) tensors.xDesc.GetSize() != tensors.wDesc.GetSize(); const auto trivial_tensor_types_not_matched = - tensors.xDesc.GetType() != tensors.yDesc.GetType() && - tensors.xDesc.GetType() != miopenInt8 && tensors.xDesc.GetType() != miopenInt8x4; + tensors.xDesc.GetType() != tensors.yDesc.GetType() && tensors.xDesc.GetType() != miopenInt8; // if(xDesc.GetLengths()[1] != wDesc.GetLengths()[1]) { // MIOPEN_THROW(miopenStatusBadParm); @@ -426,11 +425,6 @@ void ConvolutionDescriptor::ConvolutionForward(Handle& handle, ValidateConvTensors(tensors); ValidateAlphaBeta(alpha, beta); - if(algo != miopenConvolutionFwdAlgoGEMM && xDesc.GetType() == miopenInt8x4) - { - MIOPEN_THROW(miopenStatusBadParm); - } - ConvForwardCheckNumerics(handle, tensors, [&]() { ValidateGroupCount(xDesc, wDesc, *this); diff --git a/src/ocl/tensorocl.cpp b/src/ocl/tensorocl.cpp index 9c7bff6992..1e1f8b1df9 100644 --- a/src/ocl/tensorocl.cpp +++ b/src/ocl/tensorocl.cpp @@ -1597,10 +1597,13 @@ void ScaleTensor(const Handle& handle, assert(yDim_flat > 0 && yDim_flat <= 5); const miopenDataType_t dataType = yDesc_flat.GetType(); - if(dataType == miopenInt8 || dataType == miopenInt8x4 || dataType == miopenBFloat16) + + if(!(dataType == miopenHalf // + || dataType == miopenFloat // + || dataType == miopenInt32 // + || dataType == miopenDouble)) { - MIOPEN_THROW(miopenStatusBadParm, - "Tensor scale operation is not supported for int8, int8x4, and bfloat16."); + MIOPEN_THROW(miopenStatusBadParm, "ScaleTensor: unsupported data type."); } std::string kernel_name = "SubTensorOpWithScalar" + std::to_string(yDim_flat) + "d"; @@ -1941,8 +1944,7 @@ std::string GetCastTensorBuildOptionFromType(const std::string& buildOption, mio case miopenDouble: // TODO MIOPEN_THROW(miopenStatusBadParm, "miopenDouble data type not supported in cast tensor."); - case miopenInt8x4: - MIOPEN_THROW(miopenStatusBadParm, "miopenInt8x4 data type not supported in cast tensor."); + case miopenInt8x4: // fallthrough default: MIOPEN_THROW(miopenStatusBadParm, "Invalid data type in cast tensor desc."); } } @@ -2237,24 +2239,6 @@ void TransformTensor(const Handle& handle, } } } - else if(xDesc.GetType() == miopenInt8 && yDesc.GetType() == miopenInt8x4 && x_len.size() >= 3) - { - if(x_len[1] <= (y_len[1] - 4) || y_len[1] % 4 != 0) - { - MIOPEN_THROW("Invalid y channel size"); - } - - transpose_NCHW2Vec(handle, x_len, x, y, 4, false, true, alpha, beta); - } - else if(xDesc.GetType() == miopenInt8x4 && yDesc.GetType() == miopenInt8 && x_len.size() >= 3) - { - if(y_len[1] <= (x_len[1] - 4) || x_len[1] % 4 != 0) - { - MIOPEN_THROW("Invalid x channel size"); - } - - transpose_NCHW2Vec(handle, y_len, x, y, 4, false, false, alpha, beta); - } else { auto x_y_len = boost::combine(x_len, y_len); @@ -2294,12 +2278,20 @@ void TransformTensor(const Handle& handle, const miopenDataType_t dataTypex = xDesc_flat.GetType(); const miopenDataType_t dataTypey = yDesc_flat.GetType(); - if(dataTypex == miopenInt8 || dataTypex == miopenInt8x4) + if(!(dataTypex == miopenHalf // + || dataTypex == miopenFloat // + || dataTypex == miopenInt32 // + || dataTypex == miopenBFloat16 // + || dataTypex == miopenDouble)) { MIOPEN_THROW("Tensor x is a unsupported data type"); } - if(dataTypey == miopenInt8 || dataTypey == miopenInt8x4) + if(!(dataTypey == miopenHalf // + || dataTypey == miopenFloat // + || dataTypey == miopenInt32 // + || dataTypey == miopenBFloat16 // + || dataTypey == miopenDouble)) { MIOPEN_THROW("Tensor y is a unsupported data type"); } diff --git a/src/ocl/utilocl.cpp b/src/ocl/utilocl.cpp index 0a88efb2f1..d536e819e6 100644 --- a/src/ocl/utilocl.cpp +++ b/src/ocl/utilocl.cpp @@ -80,8 +80,8 @@ float Im2d2ColGPU(const Handle& handle, int data_size_bound = c * in_h * in_w; - int data_size_bound_pack = type == miopenInt8x4 ? data_size_bound * 4 : data_size_bound; - int im_offset_pack = type == miopenInt8x4 ? im_offset / 4 : im_offset; + int data_size_bound_pack = data_size_bound; + int im_offset_pack = im_offset; if(!kernels.empty()) { @@ -105,7 +105,7 @@ float Im2d2ColGPU(const Handle& handle, } else { - const int c_pack = type == miopenInt8x4 ? c / 4 : c; + const int c_pack = c; std::string params; int num_ch_per_wg; @@ -331,9 +331,8 @@ float Im3d2ColGPU(const Handle& handle, auto&& kernels = handle.GetKernels("miopenIm3d2Col", network_config); - // int8x4 vectorize-c format - int im_offset_pack = type == miopenInt8x4 ? im_offset / 4 : im_offset; - int im_c_pack = type == miopenInt8x4 ? im_c / 4 : im_c; + int im_offset_pack = im_offset; + int im_c_pack = im_c; if(!kernels.empty()) { @@ -772,13 +771,6 @@ float transpose_NCHW2CNHW(const Handle& handle, std::string params = GetDataTypeKernelParams(type); - if(type == miopenInt8x4) - { - c /= 4; - in_offset /= 4; - out_offset /= 4; - } - if(h_stride == 1 && w_stride == 1 && type == miopenFloat) { kernel_name += "_V1"; @@ -910,13 +902,6 @@ float transpose_CNHW2NCHW(const Handle& handle, std::string params = GetDataTypeKernelParams(type); - if(type == miopenInt8x4) - { - c /= 4; - in_offset /= 4; - out_offset /= 4; - } - if(h_stride == 1 && w_stride == 1 && type == miopenFloat) { kernel_name += "_V1"; @@ -1170,14 +1155,8 @@ float transpose_packed_MN2NM(const Handle& handle, auto&& kernels = handle.GetKernels(kernel_name, network_config); std::string params = GetDataTypeKernelParams(type); - if(type == miopenInt8x4) - { - m /= 4; - in_offset /= 4; - out_offset /= 4; - } - if(!(type == miopenInt8x4 || type == miopenInt8)) + if(type != miopenInt8) { MIOPEN_THROW("transpose_packed_MN2NM only meant for int8 variants."); } diff --git a/src/pooling_api.cpp b/src/pooling_api.cpp index ef526804cf..bf318f7b78 100644 --- a/src/pooling_api.cpp +++ b/src/pooling_api.cpp @@ -50,7 +50,7 @@ inline void Pooling_logging_cmd(const miopenPoolingDescriptor_t poolDesc, case miopenFloat: ss << "pool"; break; case miopenInt32: case miopenInt8: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: case miopenFloat8: diff --git a/src/reducetensor.cpp b/src/reducetensor.cpp index 6cf29c2f64..9cabbc03f6 100644 --- a/src/reducetensor.cpp +++ b/src/reducetensor.cpp @@ -211,11 +211,10 @@ inline int GetDataTypeSize(miopenDataType_t t) case miopenFloat8: case miopenBFloat8: case miopenInt8: return (1); - case miopenInt8x4: return (4); + case miopenInt8x4: return (4); // Support discontinued. case miopenBFloat16: return (2); case miopenInt32: return (4); - default: - MIOPEN_THROW("Only float, half, double, bfloat16, int8, int8x4 data type is supported."); + default: MIOPEN_THROW("Only float, half, double, bfloat16, int8 data types are supported."); }; }; @@ -269,7 +268,7 @@ inline int GetDataTypeId(miopenDataType_t t) case miopenBFloat16: return (static_cast('B')); case miopenDouble: return (static_cast('D')); case miopenInt8: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenFloat8: case miopenBFloat8: case miopenInt32: return (static_cast('O')); diff --git a/src/reducetensor_api.cpp b/src/reducetensor_api.cpp index 685b2fb430..10b1ff73bf 100644 --- a/src/reducetensor_api.cpp +++ b/src/reducetensor_api.cpp @@ -43,7 +43,7 @@ static void LogCmdRedux(const miopen::ReduceTensorDescriptor reduceTensorDesc, ss << "reducefp16"; else if(aDesc.GetType() == miopenBFloat16) ss << "reducebfp16"; - else if(aDesc.GetType() == miopenInt8 || aDesc.GetType() == miopenInt8x4) + else if(aDesc.GetType() == miopenInt8) ss << "reduceint8"; else if(aDesc.GetType() == miopenDouble) ss << "reducefp64"; diff --git a/src/solver/batchnorm/forward_inference_ck.cpp b/src/solver/batchnorm/forward_inference_ck.cpp index 5a7918cc64..ff17432a62 100644 --- a/src/solver/batchnorm/forward_inference_ck.cpp +++ b/src/solver/batchnorm/forward_inference_ck.cpp @@ -200,7 +200,7 @@ bool BnCKFwdInference::IsApplicable(const ExecutionContext& context, return (CheckCKApplicability(bn_problem) != -1); case miopenInt32: case miopenInt8: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenFloat8: case miopenBFloat8: default: MIOPEN_THROW("Unsupported datatype"); @@ -244,7 +244,7 @@ BnCKFwdInference::GetSolution(const ExecutionContext& context, break; case miopenInt8: case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenFloat8: case miopenBFloat8: default: MIOPEN_THROW("Unsupported datatype"); diff --git a/src/solver/conv_ck_igemm_fwd_bias_activ_fused.cpp b/src/solver/conv_ck_igemm_fwd_bias_activ_fused.cpp index 9520a79a90..e2df6f8097 100644 --- a/src/solver/conv_ck_igemm_fwd_bias_activ_fused.cpp +++ b/src/solver/conv_ck_igemm_fwd_bias_activ_fused.cpp @@ -287,7 +287,7 @@ void PerformanceConfigConvCKIgemmFwdBiasActivFused::HeuristicInit( case miopenInt8: case miopenFloat: case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: default: MIOPEN_THROW("Unsupported datatype"); @@ -342,7 +342,7 @@ bool PerformanceConfigConvCKIgemmFwdBiasActivFused::IsValid( case miopenInt8: case miopenFloat: case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: default: MIOPEN_THROW("Unsupported datatype"); @@ -435,7 +435,7 @@ bool ConvCKIgemmFwdBiasActivFused::IsApplicable(const FusionContext& ctx, case miopenInt8: case miopenFloat: case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: default: MIOPEN_THROW("Unsupported datatype"); @@ -469,7 +469,7 @@ ConvSolution ConvCKIgemmFwdBiasActivFused::GetSolution( case miopenInt8: case miopenFloat: case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: default: MIOPEN_THROW("Unsupported datatype"); diff --git a/src/solver/conv_direct_naive_conv.cpp b/src/solver/conv_direct_naive_conv.cpp index 4df8df5874..6d89af7eaf 100644 --- a/src/solver/conv_direct_naive_conv.cpp +++ b/src/solver/conv_direct_naive_conv.cpp @@ -48,7 +48,7 @@ bool ConvDirectNaiveConvIsAssemblyKernel(const ExecutionContext& ctx, { const auto device_name = ctx.GetStream().GetDeviceName(); return (device_name == "gfx906" || device_name == "gfx908") && ctx.rmv.IsV3() && - problem.IsLayoutDefault() && (!problem.IsInt8()); + problem.IsLayoutDefault() && (problem.IsFp16() || problem.IsFp32() || problem.IsBfp16()); } // Check tensor data type respectively diff --git a/src/solver/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp index 2602c54320..94983c7f0e 100644 --- a/src/solver/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp @@ -210,7 +210,7 @@ void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::HeuristicInit( case miopenFloat: Init(problem); break; case miopenInt8: Init(problem); break; case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -253,7 +253,7 @@ bool PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::IsValid( case miopenFloat: return CheckIsSupportCKArgs(problem); case miopenInt8: return CheckIsSupportCKArgs(problem); case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -324,7 +324,7 @@ bool ConvHipImplicitGemm3DGroupBwdXdlops::IsApplicable( case miopenFloat: return CheckCKApplicability(problem); case miopenInt8: return CheckCKApplicability(problem); case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -352,7 +352,7 @@ ConvSolution ConvHipImplicitGemm3DGroupBwdXdlops::GetSolution( return InitInvokerFactory, CKArgs, conv::DataInvokeParams>( problem, config.kernel_id); case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: case miopenFloat8: diff --git a/src/solver/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp index f0623c642d..a21c9ba300 100644 --- a/src/solver/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp @@ -210,7 +210,7 @@ void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::HeuristicInit( case miopenFloat: Init(problem); break; case miopenInt8: Init(problem); break; case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenFloat8: case miopenBFloat8: case miopenBFloat16: @@ -253,7 +253,7 @@ bool PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::IsValid( case miopenFloat: return CheckIsSupportCKArgs(problem); case miopenInt8: return CheckIsSupportCKArgs(problem); case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenFloat8: case miopenBFloat8: case miopenBFloat16: @@ -322,7 +322,7 @@ bool ConvHipImplicitGemm3DGroupFwdXdlops::IsApplicable( case miopenFloat: return CheckCKApplicability(problem); case miopenInt8: return CheckCKApplicability(problem); case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenFloat8: case miopenBFloat8: case miopenBFloat16: @@ -350,7 +350,7 @@ ConvSolution ConvHipImplicitGemm3DGroupFwdXdlops::GetSolution( return InitInvokerFactory, CKArgs, conv::DataInvokeParams>( problem, config.kernel_id); case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: case miopenFloat8: diff --git a/src/solver/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp index 6fce8a80b8..5c92f57142 100644 --- a/src/solver/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp @@ -206,7 +206,7 @@ void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::HeuristicInit( case miopenFloat: Init(problem); break; case miopenInt8: Init(problem); break; case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -249,7 +249,7 @@ bool PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::IsValid( case miopenFloat: return CheckIsSupportCKArgs(problem); case miopenInt8: return CheckIsSupportCKArgs(problem); case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -320,7 +320,7 @@ bool ConvHipImplicitGemm3DGroupWrwXdlops::IsApplicable( case miopenFloat: return CheckCKApplicability(problem); case miopenInt8: return CheckCKApplicability(problem); case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -348,7 +348,7 @@ ConvSolution ConvHipImplicitGemm3DGroupWrwXdlops::GetSolution( return InitInvokerFactory, CKArgs, conv::WrWInvokeParams>( problem, config.kernel_id); case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: diff --git a/src/solver/conv_hip_implicit_gemm_bwd_data_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_bwd_data_xdlops.cpp index 57c63267d2..5aa17e75bf 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_data_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_data_xdlops.cpp @@ -172,7 +172,7 @@ void PerformanceConfigHipImplicitGemmBwdXdlops::HeuristicInit( case miopenBFloat8: case miopenInt8: case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: break; } @@ -214,7 +214,7 @@ bool PerformanceConfigHipImplicitGemmBwdXdlops::IsValid( case miopenBFloat8: case miopenInt8: case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: break; } @@ -294,7 +294,7 @@ bool ConvHipImplicitGemmBwdXdlops::IsApplicable( case miopenBFloat8: case miopenInt8: case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: break; } @@ -318,7 +318,7 @@ ConvSolution ConvHipImplicitGemmBwdXdlops::GetSolution( problem, config.kernel_id); case miopenInt8: case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: case miopenFloat8: diff --git a/src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp index 73907eb788..7a9c322bc7 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp @@ -172,7 +172,7 @@ void PerformanceConfigHipImplicitGemmFwdXdlops::HeuristicInit( case miopenFloat8: case miopenBFloat8: case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: break; } @@ -215,7 +215,7 @@ bool PerformanceConfigHipImplicitGemmFwdXdlops::IsValid( case miopenFloat8: case miopenBFloat8: case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: break; } @@ -294,7 +294,7 @@ bool ConvHipImplicitGemmFwdXdlops::IsApplicable( case miopenFloat8: case miopenBFloat8: case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: break; } @@ -320,7 +320,7 @@ ConvSolution ConvHipImplicitGemmFwdXdlops::GetSolution( return InitInvokerFactory, CKArgs, conv::DataInvokeParams>( problem, config.kernel_id); case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: case miopenFloat8: diff --git a/src/solver/conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp index c2d0b83141..0a8b6eeb03 100644 --- a/src/solver/conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp @@ -198,7 +198,7 @@ void PerformanceConfigHipImplicitGemmGroupFwdXdlops::HeuristicInit( case miopenFloat: Init(problem); break; case miopenInt8: Init(problem); break; case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -240,7 +240,7 @@ bool PerformanceConfigHipImplicitGemmGroupFwdXdlops::IsValid( case miopenFloat: return CheckIsSupportCKArgs(problem); case miopenInt8: return CheckIsSupportCKArgs(problem); case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -311,7 +311,7 @@ bool ConvHipImplicitGemmGroupFwdXdlops::IsApplicable( case miopenFloat: return CheckCKApplicability(problem); case miopenInt8: return CheckCKApplicability(problem); case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -339,7 +339,7 @@ ConvSolution ConvHipImplicitGemmGroupFwdXdlops::GetSolution( return InitInvokerFactory, CKArgs, conv::DataInvokeParams>( problem, config.kernel_id); case miopenInt32: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: case miopenFloat8: diff --git a/src/solver/gemm.cpp b/src/solver/gemm.cpp index 7ad238e7fa..6ae4f5dde7 100644 --- a/src/solver/gemm.cpp +++ b/src/solver/gemm.cpp @@ -87,13 +87,15 @@ bool GemmFwdBase::IsApplicable(const ExecutionContext& ctx, const auto& xDesc = problem.GetIn(); const auto& wDesc = problem.GetWeights(); const auto& yDesc = problem.GetOut(); - if(xDesc.GetType() == miopenInt8x4 || xDesc.GetType() == miopenInt8) - { - // rocBlas needs the output to be int32 always - if(yDesc.GetType() != miopenFloat && yDesc.GetType() != miopenInt32 && - yDesc.GetType() != miopenInt8x4) - return false; - } + + // rocBlas needs the output to be 32-bit always + if(xDesc.GetType() == miopenInt8x4) + return false; + if(xDesc.GetType() == miopenInt8 // + && (yDesc.GetType() != miopenFloat // + && yDesc.GetType() != miopenInt32)) + return false; + const auto rblas_fp8_supported = miopen::StartsWith(ctx.GetStream().GetDeviceName(), "gfx94"); if(problem.IsTensorsCasted()) { @@ -185,8 +187,7 @@ float GemmFwdBase::GetWti(const ExecutionContext&, const conv::ProblemDescriptio n_transpose_packed_MN2NM = 1; n_gemm_strided_batched = conv.group_count; n_transpose_CNHW2NCHW = 1; - if((wDesc.GetType() == miopenInt8 || wDesc.GetType() == miopenInt8x4) && - yDesc.GetType() != miopenInt32) + if(wDesc.GetType() == miopenInt8 && yDesc.GetType() != miopenInt32) n_CastTensor = 1; } // 1x1_stride=1 with GEMM and zero workspace @@ -205,8 +206,7 @@ float GemmFwdBase::GetWti(const ExecutionContext&, const conv::ProblemDescriptio n_gemm_strided_batched = conv.group_count; n_gemm_runs = in_n; } - if((wDesc.GetType() == miopenInt8 || wDesc.GetType() == miopenInt8x4) && - yDesc.GetType() != miopenInt32) + if(wDesc.GetType() == miopenInt8 && yDesc.GetType() != miopenInt32) n_CastTensor = 1; } else // not 1x1 @@ -216,8 +216,7 @@ float GemmFwdBase::GetWti(const ExecutionContext&, const conv::ProblemDescriptio n_transpose_packed_MN2NM = in_n; n_gemm_strided_batched = conv.group_count; n_gemm_runs = in_n; - if((wDesc.GetType() == miopenInt8 || wDesc.GetType() == miopenInt8x4) && - yDesc.GetType() != miopenInt32) + if(wDesc.GetType() == miopenInt8 && yDesc.GetType() != miopenInt32) n_CastTensor = 1; } @@ -410,7 +409,7 @@ ConvSolution GemmFwd1x1_0_2::GetSolution(const ExecutionContext& context, x_t_size *= 2; } - if(wDesc.GetType() == miopenInt8 || wDesc.GetType() == miopenInt8x4) + if(wDesc.GetType() == miopenInt8) { const auto xts = GetTypeSize(xDesc.GetType()); if(xts > 0) @@ -494,8 +493,7 @@ ConvSolution GemmFwd1x1_0_2::GetSolution(const ExecutionContext& context, if(handle.IsProfilingEnabled()) time_gemm += handle.GetKernelTime(); - if((wDesc.GetType() == miopenInt8 || wDesc.GetType() == miopenInt8x4) && - yDesc.GetType() != miopenInt32) + if(wDesc.GetType() == miopenInt8 && yDesc.GetType() != miopenInt32) { TensorDescriptor ygemmDesc(miopenInt32, yDesc.GetLengths(), yDesc.GetStrides()); @@ -782,7 +780,6 @@ ConvSolution GemmFwd1x1_0_1::GetSolution(const ExecutionContext& context, auto solution = ConvSolution{miopenStatusSuccess}; const auto group_count = conv.group_count; - const auto lowp_quant = conv.lowp_quant; if(group_count > 1) { @@ -881,14 +878,6 @@ ConvSolution GemmFwd1x1_0_1::GetSolution(const ExecutionContext& context, } } - if(wDesc.GetType() == miopenInt8x4 && yDesc.GetType() != miopenInt32) - { - TensorDescriptor ygemmDesc(miopenInt32, yDesc.GetLengths(), yDesc.GetStrides()); - CastTensor(handle, &lowp_quant, ygemmDesc, y, yDesc, y, 0, 0); - if(handle.IsProfilingEnabled()) - time_gemm += handle.GetKernelTime(); - } - if(handle.IsProfilingEnabled()) { handle.ResetKernelTime(); @@ -967,14 +956,6 @@ ConvSolution GemmFwd1x1_0_1::GetSolution(const ExecutionContext& context, if(handle.IsProfilingEnabled()) time += handle.GetKernelTime(); - if(wDesc.GetType() == miopenInt8x4 && yDesc.GetType() != miopenInt32) - { - TensorDescriptor ygemmDesc(miopenInt32, yDesc.GetLengths(), yDesc.GetStrides()); - CastTensor(handle, &lowp_quant, ygemmDesc, y, yDesc, y, 0, 0); - if(handle.IsProfilingEnabled()) - time += handle.GetKernelTime(); - } - if(handle.IsProfilingEnabled()) { handle.ResetKernelTime(); @@ -1268,8 +1249,7 @@ ConvSolution GemmFwdRest::GetSolution(const ExecutionContext& context, } } - if((wDesc.GetType() == miopenInt8 || wDesc.GetType() == miopenInt8x4) && - yDesc.GetType() != miopenInt32) + if(wDesc.GetType() == miopenInt8 && yDesc.GetType() != miopenInt32) { TensorDescriptor ygemmDesc(miopenInt32, yDesc.GetLengths(), yDesc.GetStrides()); diff --git a/src/solver/mlir_common.cpp b/src/solver/mlir_common.cpp index 4101db86f9..4e41b8944a 100644 --- a/src/solver/mlir_common.cpp +++ b/src/solver/mlir_common.cpp @@ -57,7 +57,7 @@ static const char* DTypeName(miopenDataType_t ty) case miopenBFloat16: return "bf16"; case miopenInt32: return "i32"; case miopenInt8: return "i8"; - case miopenInt8x4: return "i8x4"; + case miopenInt8x4: return "i8x4"; // Support discontinued. case miopenFloat8: return "fp8"; case miopenBFloat8: return "bfp8"; } diff --git a/src/tensor.cpp b/src/tensor.cpp index ca4f1afc7a..df0d7c2819 100644 --- a/src/tensor.cpp +++ b/src/tensor.cpp @@ -50,7 +50,7 @@ bool IsDataTypeSupported(miopenDataType_t t) case miopenFloat8: case miopenBFloat8: case miopenInt8: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: return true; } diff --git a/test/conv_common.hpp b/test/conv_common.hpp index 99f964a0f6..e3fa9766e6 100644 --- a/test/conv_common.hpp +++ b/test/conv_common.hpp @@ -217,12 +217,7 @@ tensor get_output_tensor(const miopen::ConvolutionDescriptor& filter, ? input.desc.GetLayout(miopen::tensor_layout_get_default(input.desc.GetSize())) : out_layout; return tensor{filter.GetForwardOutputTensorWithLayout( - input.desc, - weights.desc, - yLayout, - weights.desc.GetType() == miopenInt8x4 - ? (std::is_same{} ? miopenInt32 : miopenFloat) - : miopen_type{})}; + input.desc, weights.desc, yLayout, miopen_type{})}; } enum class ConvApi @@ -572,8 +567,7 @@ struct verify_forward_conv : conv_base auto rout = ref_conv_fwd(input, weights, out, filter); if(filter.mode != miopenTranspose) { - bool is_int8 = - weights.desc.GetType() == miopenInt8 || weights.desc.GetType() == miopenInt8x4; + bool is_int8 = weights.desc.GetType() == miopenInt8; bool is_vect_c = weights.desc.GetVectorLength() > 1; rout.par_for_each([&](auto... is) { if(is_int8 && !is_vect_c) @@ -774,7 +768,7 @@ struct verify_forward_conv : conv_base break; case ConvApi::Find_1_0: case ConvApi::Find_2_0: - if(weights.desc.GetType() == miopenInt8 || weights.desc.GetType() == miopenInt8x4) + if(weights.desc.GetType() == miopenInt8) { bool is_transform = (input.desc.GetLengths()[1] % 4 != 0 || is_vect); @@ -786,10 +780,8 @@ struct verify_forward_conv : conv_base in_len[1] = ((in_len[1] + 3) / 4) * 4; wei_len[1] = ((wei_len[1] + 3) / 4) * 4; - miopen::TensorDescriptor input_vpad_desc(is_vect ? miopenInt8x4 : miopenInt8, - in_len); - miopen::TensorDescriptor weight_vpad_desc(is_vect ? miopenInt8x4 : miopenInt8, - wei_len); + miopen::TensorDescriptor input_vpad_desc(miopenInt8, in_len); + miopen::TensorDescriptor weight_vpad_desc(miopenInt8, wei_len); auto input_vpad = tensor{in_len}; auto weights_vpad = tensor{wei_len}; @@ -1738,8 +1730,8 @@ struct verify_forward_conv_int8 : conv_base in_len[1] = ((in_len[1] + 3) / 4) * 4; wei_len[1] = ((wei_len[1] + 3) / 4) * 4; - miopen::TensorDescriptor input_vpad_desc(is_vect ? miopenInt8x4 : miopenInt8, in_len); - miopen::TensorDescriptor weight_vpad_desc(is_vect ? miopenInt8x4 : miopenInt8, wei_len); + miopen::TensorDescriptor input_vpad_desc(miopenInt8, in_len); + miopen::TensorDescriptor weight_vpad_desc(miopenInt8, wei_len); auto input_vpad = tensor{in_len}; auto weights_vpad = tensor{wei_len}; @@ -2046,7 +2038,7 @@ struct conv_driver : test_driver filter.spatialDim = get_spatial_dim(); else filter.spatialDim = filter_dims.size(); - bool is_int8 = (input.desc.GetType() == miopenInt8 || input.desc.GetType() == miopenInt8x4); + bool is_int8 = (input.desc.GetType() == miopenInt8); filter.mode = cmode_lookup[miopen::ToUpper(conv_mode)]; filter.paddingMode = pmode_lookup[miopen::ToUpper(pad_mode)]; @@ -2360,8 +2352,7 @@ struct conv_driver : test_driver bool skip_backward_weights = is_int8; #if TEST_DIRECT_SUPPORTED_CONFIG_ONLY - if(input.desc.GetType() == miopenInt8 || input.desc.GetType() == miopenInt8x4 || - input.desc.GetType() == miopenBFloat16) + if(input.desc.GetType() == miopenInt8 || input.desc.GetType() == miopenBFloat16) { show_command(); std::cout << "Direct path doesn't support Int8 or BFloat16 type." << std::endl; @@ -2405,7 +2396,8 @@ struct conv_driver : test_driver size_t total_mem; if(is_int8) { - // TODO: Tout here was float which should have been int32 + /// \todo Properly construct the `output` tensor descriptor + /// and get rid of this special "int8" stuff. auto output_int8 = get_output_tensor(filter, input, weights, out_layout); const auto problem = ConvProblemDescription{input.desc, @@ -2422,6 +2414,9 @@ struct conv_driver : test_driver } else { + /// \todo Take into account `skip_forward`, `skip_backward_data`, + /// `skip_backward_weights` and use this path to compute `total_mem` for int8 + /// variations. const auto fwd_problem = miopen::conv::ProblemDescription{ input.desc, weights.desc, diff --git a/test/driver.hpp b/test/driver.hpp index 0a8e2d3080..fd83dd1fc5 100644 --- a/test/driver.hpp +++ b/test/driver.hpp @@ -274,7 +274,7 @@ struct test_driver { case miopenHalf: ss << "--half "; break; case miopenBFloat16: ss << "--bfloat16 "; break; - case miopenInt8x4: + case miopenInt8x4: ss << "--UNSUPPORED_TYPE "; break; case miopenInt8: ss << "--int8 "; break; case miopenInt32: ss << "--int32 "; break; case miopenFloat: ss << "--float "; break; @@ -303,7 +303,7 @@ struct test_driver { case miopenHalf: ret.emplace_back("--half"); break; case miopenBFloat16: ret.emplace_back("--bf16"); break; - case miopenInt8x4: + case miopenInt8x4: ret.emplace_back("--UNSUPPORTED_TYPE"); break; case miopenInt8: ret.emplace_back("--int8"); break; case miopenInt32: ret.emplace_back("--int32"); break; case miopenFloat: ret.emplace_back("--float"); break; diff --git a/test/gpu_reference_kernel.cpp b/test/gpu_reference_kernel.cpp index c3b26a80a9..db62684bec 100644 --- a/test/gpu_reference_kernel.cpp +++ b/test/gpu_reference_kernel.cpp @@ -302,8 +302,6 @@ static std::string miopen_type_to_string(miopenDataType_t type) return "int32"; if(type == miopenInt8) return "int8"; - if(type == miopenInt8x4) - return "int8x4"; if(type == miopenBFloat16) return "bf16"; return "n/a"; diff --git a/test/gtest/conv_embed_db.cpp b/test/gtest/conv_embed_db.cpp index b69fde1b5e..4672bb0404 100644 --- a/test/gtest/conv_embed_db.cpp +++ b/test/gtest/conv_embed_db.cpp @@ -73,12 +73,12 @@ void Run2dDriver(miopenDataType_t prec) case miopenHalf: params = ConfigWithHalf::GetParam(); break; case miopenInt8: params = ConfigWithInt8::GetParam(); break; case miopenBFloat16: params = ConfigWithBFloat16::GetParam(); break; - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenInt32: case miopenFloat8: case miopenBFloat8: case miopenDouble: - FAIL() << "miopenInt8x4, miopenInt32, miopenFloat8, miopenBFloat8, miopenDouble data type " + FAIL() << "miopenInt32, miopenFloat8, miopenBFloat8, miopenDouble data type " "not supported by conv_embed_db test"; default: params = ConfigWithFloat::GetParam(); diff --git a/test/gtest/conv_hip_igemm_xdlops.cpp b/test/gtest/conv_hip_igemm_xdlops.cpp index 508624c847..3407446557 100644 --- a/test/gtest/conv_hip_igemm_xdlops.cpp +++ b/test/gtest/conv_hip_igemm_xdlops.cpp @@ -65,10 +65,10 @@ void Run2dDriver(miopenDataType_t prec) case miopenHalf: case miopenBFloat16: case miopenFloat: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenInt32: case miopenDouble: - FAIL() << "miopenHalf, miopenBFloat16, miopenFloat, miopenInt8x4, miopenInt32, " + FAIL() << "miopenHalf, miopenBFloat16, miopenFloat, miopenInt32, " "miopenDouble data " "type not supported by " "test_conv_hip_igemm_xdlops test"; diff --git a/test/gtest/conv_igemm_dynamic.cpp b/test/gtest/conv_igemm_dynamic.cpp index 25a4e179c5..59341ac8a0 100644 --- a/test/gtest/conv_igemm_dynamic.cpp +++ b/test/gtest/conv_igemm_dynamic.cpp @@ -68,12 +68,12 @@ void Run2dDriver(miopenDataType_t prec) case miopenHalf: case miopenInt8: case miopenBFloat16: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenInt32: case miopenDouble: case miopenFloat8: case miopenBFloat8: - FAIL() << "miopenHalf, miopenInt8, miopenBFloat16, miopenInt8x4, miopenInt32, " + FAIL() << "miopenHalf, miopenInt8, miopenBFloat16, miopenInt32, " "miopenDouble, miopenFloat8, miopenBFloat8 " "data type not supported by conv_igemm_dynamic test"; diff --git a/test/gtest/conv_igemm_mlir.cpp b/test/gtest/conv_igemm_mlir.cpp index d5fc0d426c..29d240645c 100644 --- a/test/gtest/conv_igemm_mlir.cpp +++ b/test/gtest/conv_igemm_mlir.cpp @@ -81,13 +81,13 @@ void Run2dDriver(miopenDataType_t prec) case miopenInt8: params = ConfigWithInt8::GetParam(); break; case miopenFloat: params = ConfigWithFloat::GetParam(); break; case miopenBFloat16: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenInt32: case miopenFloat8: case miopenBFloat8: case miopenDouble: MIOPEN_THROW(miopenStatusBadParm, - "miopenBFloat16, miopenInt8x4, miopenInt32, miopenFloat8, miopenBFloat8, " + "miopenBFloat16, miopenInt32, miopenFloat8, miopenBFloat8, " "miopenDouble data type not supported by conv_igemm_mlir test"); default: params = ConfigWithFloat::GetParam(); diff --git a/test/gtest/conv_igemm_mlir_xdlops.cpp b/test/gtest/conv_igemm_mlir_xdlops.cpp index 19913093c0..e2c5a3ed8f 100644 --- a/test/gtest/conv_igemm_mlir_xdlops.cpp +++ b/test/gtest/conv_igemm_mlir_xdlops.cpp @@ -53,13 +53,13 @@ void Run2dDriver(miopenDataType_t prec) case miopenInt8: params = ConfigWithInt8::GetParam(); break; case miopenBFloat16: case miopenFloat: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenInt32: case miopenDouble: case miopenFloat8: case miopenBFloat8: MIOPEN_THROW(miopenStatusBadParm, - "miopenBFloat16, miopenFloat, miopenInt8x4, miopenInt32, miopenDouble data " + "miopenBFloat16, miopenFloat, miopenInt32, miopenDouble data " "type not supported by " "conv_igemm_mlir_xdlops test"); diff --git a/test/gtest/conv_trans.cpp b/test/gtest/conv_trans.cpp index 20015336a0..185a459947 100644 --- a/test/gtest/conv_trans.cpp +++ b/test/gtest/conv_trans.cpp @@ -55,10 +55,10 @@ void Run2dDriver(miopenDataType_t prec) case miopenBFloat8: case miopenInt8: case miopenBFloat16: - case miopenInt8x4: + case miopenInt8x4: // Support discontinued. case miopenInt32: case miopenDouble: - FAIL() << "miopenHalf, miopenInt8, miopenBFloat16, miopenInt8x4, miopenInt32, miopenDouble " + FAIL() << "miopenHalf, miopenInt8, miopenBFloat16, miopenInt32, miopenDouble " "data type not supported by " "conv_trans test"; diff --git a/test/gtest/db_sync.cpp b/test/gtest/db_sync.cpp index d7e59468a2..13b647ceac 100644 --- a/test/gtest/db_sync.cpp +++ b/test/gtest/db_sync.cpp @@ -73,7 +73,7 @@ miopenDataType_t GetDataTypeFromString(const std::string& data_type) else if(data_type == "INT8") return miopenInt8; else if(data_type == "INT8x4") - return miopenInt8x4; + return miopenInt8x4; // Support discontinued. Maintain compatibility with old databases. else if(data_type == "INT32") return miopenInt32; else if(data_type == "BF16") diff --git a/test/tensor_holder.hpp b/test/tensor_holder.hpp index 0b05a1e5e8..3fda3b5cf7 100644 --- a/test/tensor_holder.hpp +++ b/test/tensor_holder.hpp @@ -195,9 +195,10 @@ struct tensor tensor(miopen::TensorDescriptor rhs) : desc(std::move(rhs)) { - assert(desc.GetType() == miopen_type{} || - ((miopen_type{} == miopenInt8 || miopen_type{} == miopenInt8x4) && - (desc.GetType() == miopenFloat || desc.GetType() == miopenInt32))); + assert(desc.GetType() == miopen_type{} // + || (miopen_type{} == miopenInt8 // + && (desc.GetType() == miopenFloat // + || desc.GetType() == miopenInt32))); data.resize(desc.GetElementSpace()); } diff --git a/test/tensor_transform.cpp b/test/tensor_transform.cpp index a0585853a0..e87420bb7a 100644 --- a/test/tensor_transform.cpp +++ b/test/tensor_transform.cpp @@ -422,7 +422,7 @@ struct tensor_transform_driver : test_driver } // Test tensor scale addition - if(miopen_type{} == miopenInt8 || miopen_type{} == miopenInt8x4) + if(miopen_type{} == miopenInt8) return; super_src = tensor{superLens_src}.generate(tensor_elem_gen_integer{max_value}); From 6737d66a66206f1ad90ab9da5871ada2eeb02875 Mon Sep 17 00:00:00 2001 From: atamazov Date: Fri, 6 Oct 2023 21:36:24 +0300 Subject: [PATCH 2/8] rocblas-remove-int8x4-uses(02) Removed the rest of INT8x4 support (except the driver) --- driver/conv_driver.hpp | 4 ++++ include/miopen/miopen.h | 1 - src/check_numerics.cpp | 1 - src/driver_arguments.cpp | 4 +--- src/gemm_v2.cpp | 16 ++++++++-------- src/hip/general_tensor_reorder_sol.cpp | 2 -- src/include/miopen/conv/problem_description.hpp | 1 - src/include/miopen/datatype.hpp | 5 ----- src/include/miopen/tensor.hpp | 1 - src/include/miopen/visit_float.hpp | 1 - src/ocl/tensorocl.cpp | 6 ------ src/pooling_api.cpp | 1 - src/reducetensor.cpp | 3 --- src/solver/batchnorm/forward_inference_ck.cpp | 2 -- .../conv_ck_igemm_fwd_bias_activ_fused.cpp | 4 ---- ...v_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp | 4 ---- ...v_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp | 4 ---- ...v_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp | 4 ---- .../conv_hip_implicit_gemm_bwd_data_xdlops.cpp | 4 ---- src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp | 4 ---- ...conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp | 4 ---- src/solver/gemm.cpp | 2 -- src/solver/mlir_common.cpp | 1 - src/tensor.cpp | 1 - test/driver.hpp | 2 -- test/gtest/conv_embed_db.cpp | 1 - test/gtest/conv_hip_igemm_xdlops.cpp | 1 - test/gtest/conv_igemm_dynamic.cpp | 1 - test/gtest/conv_igemm_mlir.cpp | 1 - test/gtest/conv_igemm_mlir_xdlops.cpp | 1 - test/gtest/conv_trans.cpp | 1 - test/gtest/db_sync.cpp | 2 -- 32 files changed, 13 insertions(+), 77 deletions(-) diff --git a/driver/conv_driver.hpp b/driver/conv_driver.hpp index 5f67b83588..3c28859776 100644 --- a/driver/conv_driver.hpp +++ b/driver/conv_driver.hpp @@ -83,6 +83,10 @@ MIOPEN_DECLARE_ENV_VAR(MIOPEN_DRIVER_PAD_BUFFERS_2M) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DRIVER_USE_GPU_REFERENCE) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DRIVER_SUBNORM_PERCENTAGE) +// Support in the library discontinued, but left in the driver +// for reference in the future. +#define miopenInt8x4 (static_cast(4)) + #if MIOPEN_BACKEND_OPENCL #define STATUS_SUCCESS CL_SUCCESS typedef cl_int status_t; diff --git a/include/miopen/miopen.h b/include/miopen/miopen.h index 7a85ec5875..fd978c6200 100644 --- a/include/miopen/miopen.h +++ b/include/miopen/miopen.h @@ -350,7 +350,6 @@ typedef enum miopenFloat = 1, /*!< 32-bit floating point (Fully supported) */ miopenInt32 = 2, /*!< 32-bit int point (Partially supported) */ miopenInt8 = 3, /*!< 8-bit int point (Partially supported) */ - miopenInt8x4 = 4, /*!< Pack of four Int8 in NCHW_VECT_C format (Support discontinued) */ miopenBFloat16 = 5, /*!< 16-bit binary floating point (8-bit exponent, 7-bit fraction) (Partially supported) */ miopenDouble = 6, /*!< 64-bit floating point (Partially supported) */ diff --git a/src/check_numerics.cpp b/src/check_numerics.cpp index 50cb0af4af..12210069df 100644 --- a/src/check_numerics.cpp +++ b/src/check_numerics.cpp @@ -64,7 +64,6 @@ std::string GetKernelName(miopenDataType_t data_type) case miopenBFloat8: return {"check_numerics_bf8"}; case miopenInt32: case miopenInt8: - case miopenInt8x4: // Support discontinued. case miopenDouble: default: return {""}; } diff --git a/src/driver_arguments.cpp b/src/driver_arguments.cpp index 53aa6b41ab..d9e82d270a 100644 --- a/src/driver_arguments.cpp +++ b/src/driver_arguments.cpp @@ -50,7 +50,7 @@ void ConvDataType(std::stringstream& ss, const miopen::TensorDescriptor& desc) { ss << "convbfp16"; } - else if(desc.GetType() == miopenInt8 || desc.GetType() == miopenInt8x4) + else if(desc.GetType() == miopenInt8) { ss << "convint8"; } @@ -190,8 +190,6 @@ std::string ConvArgsForMIOpenDriver(const miopen::TensorDescriptor& xDesc, ss << " -g " << convDesc.group_count; if(print_for_conv_driver) ss << " -F " << std::to_string(static_cast(conv_dir)) << " -t 1"; // clang-format on - if(xDesc.GetType() == miopenInt8x4) - ss << " -Z 1"; if(immediate_mode_solver_id.has_value()) { ss << " -S " << *immediate_mode_solver_id; diff --git a/src/gemm_v2.cpp b/src/gemm_v2.cpp index 35add906a1..a44daf962d 100644 --- a/src/gemm_v2.cpp +++ b/src/gemm_v2.cpp @@ -610,9 +610,9 @@ miopenStatus_t CallGemm(const Handle& handle, }; break; - case miopenInt8x4: case miopenDouble: { - MIOPEN_THROW(miopenStatusBadParm, "Unknown or unsupported data type."); + MIOPEN_THROW(miopenStatusBadParm, + "miopenDouble data type not supported by MIOpenGEMM."); }; break; } @@ -876,10 +876,10 @@ miopenStatus_t CallGemmStridedBatched(const Handle& handle, break; } - case miopenInt8x4: case miopenDouble: { - MIOPEN_THROW(miopenStatusBadParm, "Unknown or unsupported data type."); - }; + MIOPEN_THROW(miopenStatusBadParm, + "miopenDouble data type not supported by MIOpenGEMM."); + } break; } @@ -1140,10 +1140,10 @@ miopenStatus_t CallGemmStridedBatchedSequential(const Handle& handle, break; } - case miopenInt8x4: case miopenDouble: { - MIOPEN_THROW(miopenStatusBadParm, "Unknown or unsupported data type."); - }; + MIOPEN_THROW(miopenStatusBadParm, + "miopenDouble data type not supported by MIOpenGEMM."); + } break; } diff --git a/src/hip/general_tensor_reorder_sol.cpp b/src/hip/general_tensor_reorder_sol.cpp index f78aa358f8..ae85048f63 100644 --- a/src/hip/general_tensor_reorder_sol.cpp +++ b/src/hip/general_tensor_reorder_sol.cpp @@ -137,8 +137,6 @@ GenericReorderSolutionImpl::GenericReorderSolutionImpl(miopenDataType_t data_typ order_2(order_2_), order_3(order_3_) { - if(data_type == miopenInt8x4) - MIOPEN_THROW("These data type are not supported"); std::size_t data_size = miopen::GetTypeSize(data_type); kernel_param_heuristic = tensor_reorder::HeuristicGet(data_size, dim_0, dim_1, dim_2, dim_3); } diff --git a/src/include/miopen/conv/problem_description.hpp b/src/include/miopen/conv/problem_description.hpp index 438e1d306a..12d6f08811 100644 --- a/src/include/miopen/conv/problem_description.hpp +++ b/src/include/miopen/conv/problem_description.hpp @@ -51,7 +51,6 @@ inline std::string GetDataTypeName(miopenDataType_t data_type) case miopenFloat: return "FP32"; case miopenHalf: return "FP16"; case miopenInt8: return "INT8"; - case miopenInt8x4: return "INT8x4"; case miopenInt32: return "INT32"; case miopenBFloat16: return "BF16"; case miopenDouble: return "FP64"; diff --git a/src/include/miopen/datatype.hpp b/src/include/miopen/datatype.hpp index 29057e3de9..2260cb5b4f 100644 --- a/src/include/miopen/datatype.hpp +++ b/src/include/miopen/datatype.hpp @@ -53,10 +53,6 @@ inline std::string GetDataType(miopenDataType_t type) type_str = "bfloat16"; } break; - case miopenInt8x4: { - type_str = "UNSUPPORTED_TYPE"; - } - break; case miopenInt8: { type_str = "int8_t"; } @@ -157,7 +153,6 @@ inline KernelBuildParameters GetDataTypeKBP(miopenDataType_t type) case miopenDouble: use_fp64 = 1; break; case miopenFloat8: use_fp8 = 1; break; case miopenBFloat8: use_bfp8 = 1; break; - case miopenInt8x4: // fallthrough default: MIOPEN_THROW("Only float, half, bfloat16, int8, float8, bfloat8 data types are supported."); break; diff --git a/src/include/miopen/tensor.hpp b/src/include/miopen/tensor.hpp index a133ab4adc..1bf2bcbaae 100644 --- a/src/include/miopen/tensor.hpp +++ b/src/include/miopen/tensor.hpp @@ -101,7 +101,6 @@ inline std::size_t GetTypeSize(miopenDataType_t d) case miopenFloat: return 4; case miopenHalf: case miopenBFloat16: return 2; - case miopenInt8x4: break; case miopenInt8: case miopenFloat8: case miopenBFloat8: return 1; diff --git a/src/include/miopen/visit_float.hpp b/src/include/miopen/visit_float.hpp index 35e1ae6ae7..ae84b7f0a9 100644 --- a/src/include/miopen/visit_float.hpp +++ b/src/include/miopen/visit_float.hpp @@ -91,7 +91,6 @@ void visit_float(miopenDataType_t t, F f) f(as_float{}); break; } - case miopenInt8x4: MIOPEN_THROW("miopenInt8x4: Support discontinued."); } } diff --git a/src/ocl/tensorocl.cpp b/src/ocl/tensorocl.cpp index 1e1f8b1df9..6329d33f69 100644 --- a/src/ocl/tensorocl.cpp +++ b/src/ocl/tensorocl.cpp @@ -1944,7 +1944,6 @@ std::string GetCastTensorBuildOptionFromType(const std::string& buildOption, mio case miopenDouble: // TODO MIOPEN_THROW(miopenStatusBadParm, "miopenDouble data type not supported in cast tensor."); - case miopenInt8x4: // fallthrough default: MIOPEN_THROW(miopenStatusBadParm, "Invalid data type in cast tensor desc."); } } @@ -1968,11 +1967,6 @@ void CastTensor(const Handle& handle, MIOPEN_THROW(miopenStatusBadParm, "Tensor dimension lengths do not match."); } - if(srcDesc.GetType() == miopenInt8x4 || dstDesc.GetType() == miopenInt8x4) - { - MIOPEN_THROW(miopenStatusBadParm, "Tensor cast operation is not supported for int8x4."); - } - auto flat_descriptors = GetConsistentFlattenedTensorDescriptors(srcDesc, dstDesc); const TensorDescriptor& srcDesc_flat = std::get<0>(flat_descriptors); const TensorDescriptor& dstDesc_flat = std::get<1>(flat_descriptors); diff --git a/src/pooling_api.cpp b/src/pooling_api.cpp index bf318f7b78..a2c792f8bb 100644 --- a/src/pooling_api.cpp +++ b/src/pooling_api.cpp @@ -50,7 +50,6 @@ inline void Pooling_logging_cmd(const miopenPoolingDescriptor_t poolDesc, case miopenFloat: ss << "pool"; break; case miopenInt32: case miopenInt8: - case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: case miopenFloat8: diff --git a/src/reducetensor.cpp b/src/reducetensor.cpp index 9cabbc03f6..6ca1067fb8 100644 --- a/src/reducetensor.cpp +++ b/src/reducetensor.cpp @@ -211,7 +211,6 @@ inline int GetDataTypeSize(miopenDataType_t t) case miopenFloat8: case miopenBFloat8: case miopenInt8: return (1); - case miopenInt8x4: return (4); // Support discontinued. case miopenBFloat16: return (2); case miopenInt32: return (4); default: MIOPEN_THROW("Only float, half, double, bfloat16, int8 data types are supported."); @@ -268,7 +267,6 @@ inline int GetDataTypeId(miopenDataType_t t) case miopenBFloat16: return (static_cast('B')); case miopenDouble: return (static_cast('D')); case miopenInt8: - case miopenInt8x4: // Support discontinued. case miopenFloat8: case miopenBFloat8: case miopenInt32: return (static_cast('O')); @@ -308,7 +306,6 @@ static ck::DataTypeEnum_t mapDataTypeId(miopenDataType_t t) case miopenBFloat16: return DataTypeEnum_t::BFloat16; case miopenDouble: return DataTypeEnum_t::Double; case miopenInt8: return DataTypeEnum_t::Int8; - case miopenInt8x4: return DataTypeEnum_t::Int8x4; case miopenInt32: return DataTypeEnum_t::Int32; case miopenFloat8: case miopenBFloat8: diff --git a/src/solver/batchnorm/forward_inference_ck.cpp b/src/solver/batchnorm/forward_inference_ck.cpp index ff17432a62..d1d67ddea6 100644 --- a/src/solver/batchnorm/forward_inference_ck.cpp +++ b/src/solver/batchnorm/forward_inference_ck.cpp @@ -200,7 +200,6 @@ bool BnCKFwdInference::IsApplicable(const ExecutionContext& context, return (CheckCKApplicability(bn_problem) != -1); case miopenInt32: case miopenInt8: - case miopenInt8x4: // Support discontinued. case miopenFloat8: case miopenBFloat8: default: MIOPEN_THROW("Unsupported datatype"); @@ -244,7 +243,6 @@ BnCKFwdInference::GetSolution(const ExecutionContext& context, break; case miopenInt8: case miopenInt32: - case miopenInt8x4: // Support discontinued. case miopenFloat8: case miopenBFloat8: default: MIOPEN_THROW("Unsupported datatype"); diff --git a/src/solver/conv_ck_igemm_fwd_bias_activ_fused.cpp b/src/solver/conv_ck_igemm_fwd_bias_activ_fused.cpp index e2df6f8097..d9c0410d98 100644 --- a/src/solver/conv_ck_igemm_fwd_bias_activ_fused.cpp +++ b/src/solver/conv_ck_igemm_fwd_bias_activ_fused.cpp @@ -287,7 +287,6 @@ void PerformanceConfigConvCKIgemmFwdBiasActivFused::HeuristicInit( case miopenInt8: case miopenFloat: case miopenInt32: - case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: default: MIOPEN_THROW("Unsupported datatype"); @@ -342,7 +341,6 @@ bool PerformanceConfigConvCKIgemmFwdBiasActivFused::IsValid( case miopenInt8: case miopenFloat: case miopenInt32: - case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: default: MIOPEN_THROW("Unsupported datatype"); @@ -435,7 +433,6 @@ bool ConvCKIgemmFwdBiasActivFused::IsApplicable(const FusionContext& ctx, case miopenInt8: case miopenFloat: case miopenInt32: - case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: default: MIOPEN_THROW("Unsupported datatype"); @@ -469,7 +466,6 @@ ConvSolution ConvCKIgemmFwdBiasActivFused::GetSolution( case miopenInt8: case miopenFloat: case miopenInt32: - case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: default: MIOPEN_THROW("Unsupported datatype"); diff --git a/src/solver/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp index 94983c7f0e..cd95c85cf6 100644 --- a/src/solver/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp @@ -210,7 +210,6 @@ void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::HeuristicInit( case miopenFloat: Init(problem); break; case miopenInt8: Init(problem); break; case miopenInt32: - case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -253,7 +252,6 @@ bool PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::IsValid( case miopenFloat: return CheckIsSupportCKArgs(problem); case miopenInt8: return CheckIsSupportCKArgs(problem); case miopenInt32: - case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -324,7 +322,6 @@ bool ConvHipImplicitGemm3DGroupBwdXdlops::IsApplicable( case miopenFloat: return CheckCKApplicability(problem); case miopenInt8: return CheckCKApplicability(problem); case miopenInt32: - case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -352,7 +349,6 @@ ConvSolution ConvHipImplicitGemm3DGroupBwdXdlops::GetSolution( return InitInvokerFactory, CKArgs, conv::DataInvokeParams>( problem, config.kernel_id); case miopenInt32: - case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: case miopenFloat8: diff --git a/src/solver/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp index a21c9ba300..4a5ecc8a3f 100644 --- a/src/solver/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp @@ -210,7 +210,6 @@ void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::HeuristicInit( case miopenFloat: Init(problem); break; case miopenInt8: Init(problem); break; case miopenInt32: - case miopenInt8x4: // Support discontinued. case miopenFloat8: case miopenBFloat8: case miopenBFloat16: @@ -253,7 +252,6 @@ bool PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::IsValid( case miopenFloat: return CheckIsSupportCKArgs(problem); case miopenInt8: return CheckIsSupportCKArgs(problem); case miopenInt32: - case miopenInt8x4: // Support discontinued. case miopenFloat8: case miopenBFloat8: case miopenBFloat16: @@ -322,7 +320,6 @@ bool ConvHipImplicitGemm3DGroupFwdXdlops::IsApplicable( case miopenFloat: return CheckCKApplicability(problem); case miopenInt8: return CheckCKApplicability(problem); case miopenInt32: - case miopenInt8x4: // Support discontinued. case miopenFloat8: case miopenBFloat8: case miopenBFloat16: @@ -350,7 +347,6 @@ ConvSolution ConvHipImplicitGemm3DGroupFwdXdlops::GetSolution( return InitInvokerFactory, CKArgs, conv::DataInvokeParams>( problem, config.kernel_id); case miopenInt32: - case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: case miopenFloat8: diff --git a/src/solver/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp index 5c92f57142..f09fca29fd 100644 --- a/src/solver/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp @@ -206,7 +206,6 @@ void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::HeuristicInit( case miopenFloat: Init(problem); break; case miopenInt8: Init(problem); break; case miopenInt32: - case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -249,7 +248,6 @@ bool PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::IsValid( case miopenFloat: return CheckIsSupportCKArgs(problem); case miopenInt8: return CheckIsSupportCKArgs(problem); case miopenInt32: - case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -320,7 +318,6 @@ bool ConvHipImplicitGemm3DGroupWrwXdlops::IsApplicable( case miopenFloat: return CheckCKApplicability(problem); case miopenInt8: return CheckCKApplicability(problem); case miopenInt32: - case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -348,7 +345,6 @@ ConvSolution ConvHipImplicitGemm3DGroupWrwXdlops::GetSolution( return InitInvokerFactory, CKArgs, conv::WrWInvokeParams>( problem, config.kernel_id); case miopenInt32: - case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: diff --git a/src/solver/conv_hip_implicit_gemm_bwd_data_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_bwd_data_xdlops.cpp index 5aa17e75bf..d55ded78de 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_data_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_data_xdlops.cpp @@ -172,7 +172,6 @@ void PerformanceConfigHipImplicitGemmBwdXdlops::HeuristicInit( case miopenBFloat8: case miopenInt8: case miopenInt32: - case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: break; } @@ -214,7 +213,6 @@ bool PerformanceConfigHipImplicitGemmBwdXdlops::IsValid( case miopenBFloat8: case miopenInt8: case miopenInt32: - case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: break; } @@ -294,7 +292,6 @@ bool ConvHipImplicitGemmBwdXdlops::IsApplicable( case miopenBFloat8: case miopenInt8: case miopenInt32: - case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: break; } @@ -318,7 +315,6 @@ ConvSolution ConvHipImplicitGemmBwdXdlops::GetSolution( problem, config.kernel_id); case miopenInt8: case miopenInt32: - case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: case miopenFloat8: diff --git a/src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp index 7a9c322bc7..697ab33f70 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp @@ -172,7 +172,6 @@ void PerformanceConfigHipImplicitGemmFwdXdlops::HeuristicInit( case miopenFloat8: case miopenBFloat8: case miopenInt32: - case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: break; } @@ -215,7 +214,6 @@ bool PerformanceConfigHipImplicitGemmFwdXdlops::IsValid( case miopenFloat8: case miopenBFloat8: case miopenInt32: - case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: break; } @@ -294,7 +292,6 @@ bool ConvHipImplicitGemmFwdXdlops::IsApplicable( case miopenFloat8: case miopenBFloat8: case miopenInt32: - case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: break; } @@ -320,7 +317,6 @@ ConvSolution ConvHipImplicitGemmFwdXdlops::GetSolution( return InitInvokerFactory, CKArgs, conv::DataInvokeParams>( problem, config.kernel_id); case miopenInt32: - case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: case miopenFloat8: diff --git a/src/solver/conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp index 0a8b6eeb03..0c5a91b627 100644 --- a/src/solver/conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp @@ -198,7 +198,6 @@ void PerformanceConfigHipImplicitGemmGroupFwdXdlops::HeuristicInit( case miopenFloat: Init(problem); break; case miopenInt8: Init(problem); break; case miopenInt32: - case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -240,7 +239,6 @@ bool PerformanceConfigHipImplicitGemmGroupFwdXdlops::IsValid( case miopenFloat: return CheckIsSupportCKArgs(problem); case miopenInt8: return CheckIsSupportCKArgs(problem); case miopenInt32: - case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -311,7 +309,6 @@ bool ConvHipImplicitGemmGroupFwdXdlops::IsApplicable( case miopenFloat: return CheckCKApplicability(problem); case miopenInt8: return CheckCKApplicability(problem); case miopenInt32: - case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -339,7 +336,6 @@ ConvSolution ConvHipImplicitGemmGroupFwdXdlops::GetSolution( return InitInvokerFactory, CKArgs, conv::DataInvokeParams>( problem, config.kernel_id); case miopenInt32: - case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: case miopenFloat8: diff --git a/src/solver/gemm.cpp b/src/solver/gemm.cpp index 6ae4f5dde7..2cfbd257dc 100644 --- a/src/solver/gemm.cpp +++ b/src/solver/gemm.cpp @@ -89,8 +89,6 @@ bool GemmFwdBase::IsApplicable(const ExecutionContext& ctx, const auto& yDesc = problem.GetOut(); // rocBlas needs the output to be 32-bit always - if(xDesc.GetType() == miopenInt8x4) - return false; if(xDesc.GetType() == miopenInt8 // && (yDesc.GetType() != miopenFloat // && yDesc.GetType() != miopenInt32)) diff --git a/src/solver/mlir_common.cpp b/src/solver/mlir_common.cpp index 4e41b8944a..e54da1ac89 100644 --- a/src/solver/mlir_common.cpp +++ b/src/solver/mlir_common.cpp @@ -57,7 +57,6 @@ static const char* DTypeName(miopenDataType_t ty) case miopenBFloat16: return "bf16"; case miopenInt32: return "i32"; case miopenInt8: return "i8"; - case miopenInt8x4: return "i8x4"; // Support discontinued. case miopenFloat8: return "fp8"; case miopenBFloat8: return "bfp8"; } diff --git a/src/tensor.cpp b/src/tensor.cpp index df0d7c2819..ba1b2ba106 100644 --- a/src/tensor.cpp +++ b/src/tensor.cpp @@ -50,7 +50,6 @@ bool IsDataTypeSupported(miopenDataType_t t) case miopenFloat8: case miopenBFloat8: case miopenInt8: - case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: return true; } diff --git a/test/driver.hpp b/test/driver.hpp index fd83dd1fc5..bcbfe7d676 100644 --- a/test/driver.hpp +++ b/test/driver.hpp @@ -274,7 +274,6 @@ struct test_driver { case miopenHalf: ss << "--half "; break; case miopenBFloat16: ss << "--bfloat16 "; break; - case miopenInt8x4: ss << "--UNSUPPORED_TYPE "; break; case miopenInt8: ss << "--int8 "; break; case miopenInt32: ss << "--int32 "; break; case miopenFloat: ss << "--float "; break; @@ -303,7 +302,6 @@ struct test_driver { case miopenHalf: ret.emplace_back("--half"); break; case miopenBFloat16: ret.emplace_back("--bf16"); break; - case miopenInt8x4: ret.emplace_back("--UNSUPPORTED_TYPE"); break; case miopenInt8: ret.emplace_back("--int8"); break; case miopenInt32: ret.emplace_back("--int32"); break; case miopenFloat: ret.emplace_back("--float"); break; diff --git a/test/gtest/conv_embed_db.cpp b/test/gtest/conv_embed_db.cpp index 4672bb0404..36d1e28123 100644 --- a/test/gtest/conv_embed_db.cpp +++ b/test/gtest/conv_embed_db.cpp @@ -73,7 +73,6 @@ void Run2dDriver(miopenDataType_t prec) case miopenHalf: params = ConfigWithHalf::GetParam(); break; case miopenInt8: params = ConfigWithInt8::GetParam(); break; case miopenBFloat16: params = ConfigWithBFloat16::GetParam(); break; - case miopenInt8x4: // Support discontinued. case miopenInt32: case miopenFloat8: case miopenBFloat8: diff --git a/test/gtest/conv_hip_igemm_xdlops.cpp b/test/gtest/conv_hip_igemm_xdlops.cpp index 3407446557..14e1738a35 100644 --- a/test/gtest/conv_hip_igemm_xdlops.cpp +++ b/test/gtest/conv_hip_igemm_xdlops.cpp @@ -65,7 +65,6 @@ void Run2dDriver(miopenDataType_t prec) case miopenHalf: case miopenBFloat16: case miopenFloat: - case miopenInt8x4: // Support discontinued. case miopenInt32: case miopenDouble: FAIL() << "miopenHalf, miopenBFloat16, miopenFloat, miopenInt32, " diff --git a/test/gtest/conv_igemm_dynamic.cpp b/test/gtest/conv_igemm_dynamic.cpp index 59341ac8a0..6e2863eb8f 100644 --- a/test/gtest/conv_igemm_dynamic.cpp +++ b/test/gtest/conv_igemm_dynamic.cpp @@ -68,7 +68,6 @@ void Run2dDriver(miopenDataType_t prec) case miopenHalf: case miopenInt8: case miopenBFloat16: - case miopenInt8x4: // Support discontinued. case miopenInt32: case miopenDouble: case miopenFloat8: diff --git a/test/gtest/conv_igemm_mlir.cpp b/test/gtest/conv_igemm_mlir.cpp index 29d240645c..c99d0a03c4 100644 --- a/test/gtest/conv_igemm_mlir.cpp +++ b/test/gtest/conv_igemm_mlir.cpp @@ -81,7 +81,6 @@ void Run2dDriver(miopenDataType_t prec) case miopenInt8: params = ConfigWithInt8::GetParam(); break; case miopenFloat: params = ConfigWithFloat::GetParam(); break; case miopenBFloat16: - case miopenInt8x4: // Support discontinued. case miopenInt32: case miopenFloat8: case miopenBFloat8: diff --git a/test/gtest/conv_igemm_mlir_xdlops.cpp b/test/gtest/conv_igemm_mlir_xdlops.cpp index e2c5a3ed8f..ea14e3b68e 100644 --- a/test/gtest/conv_igemm_mlir_xdlops.cpp +++ b/test/gtest/conv_igemm_mlir_xdlops.cpp @@ -53,7 +53,6 @@ void Run2dDriver(miopenDataType_t prec) case miopenInt8: params = ConfigWithInt8::GetParam(); break; case miopenBFloat16: case miopenFloat: - case miopenInt8x4: // Support discontinued. case miopenInt32: case miopenDouble: case miopenFloat8: diff --git a/test/gtest/conv_trans.cpp b/test/gtest/conv_trans.cpp index 185a459947..a5266d9f9c 100644 --- a/test/gtest/conv_trans.cpp +++ b/test/gtest/conv_trans.cpp @@ -55,7 +55,6 @@ void Run2dDriver(miopenDataType_t prec) case miopenBFloat8: case miopenInt8: case miopenBFloat16: - case miopenInt8x4: // Support discontinued. case miopenInt32: case miopenDouble: FAIL() << "miopenHalf, miopenInt8, miopenBFloat16, miopenInt32, miopenDouble " diff --git a/test/gtest/db_sync.cpp b/test/gtest/db_sync.cpp index 13b647ceac..13f0297724 100644 --- a/test/gtest/db_sync.cpp +++ b/test/gtest/db_sync.cpp @@ -72,8 +72,6 @@ miopenDataType_t GetDataTypeFromString(const std::string& data_type) return miopenHalf; else if(data_type == "INT8") return miopenInt8; - else if(data_type == "INT8x4") - return miopenInt8x4; // Support discontinued. Maintain compatibility with old databases. else if(data_type == "INT32") return miopenInt32; else if(data_type == "BF16") From b6908bcf27490ea16637135bcbec092c75076aae Mon Sep 17 00:00:00 2001 From: atamazov Date: Fri, 6 Oct 2023 21:44:39 +0300 Subject: [PATCH 3/8] rocblas-remove-int8x4-uses(04) Fix merge error --- src/include/miopen/hip_f8_impl.hpp | 362 +--------------- src/include/miopen/hip_float8.hpp | 652 +---------------------------- 2 files changed, 2 insertions(+), 1012 deletions(-) diff --git a/src/include/miopen/hip_f8_impl.hpp b/src/include/miopen/hip_f8_impl.hpp index c7a62f9f72..22052778a0 120000 --- a/src/include/miopen/hip_f8_impl.hpp +++ b/src/include/miopen/hip_f8_impl.hpp @@ -1,361 +1 @@ -/******************************************************************************* - * - * MIT License - * - * Copyright (c) 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - *******************************************************************************/ -// #include -// #include -namespace miopen_hip_f8_impl { - -#ifndef __HIP_PLATFORM_HCC__ -using hip_bfloat16 = bfloat16; -using half = half_float::half; -#endif - -template -MIOPEN_HIP_HOST_DEVICE uint8_t cast_to_f8_no_range_reduce(T _x, - bool stoch = false, - uint32_t rng = 0) -{ - static_assert(we == 5, "we==5"); - static_assert(sizeof(T) == 2, "no_range_reduce only works for float16"); - - uint32_t x = *(reinterpret_cast(&_x)); - - uint32_t head, mantissa, exponent; - uint32_t sign; - - const int mfmt = 10; - head = x & 0xFC00; - mantissa = x & 0x3FF; - exponent = (head >> 10) & 0x1F; - sign = head >> 15; - uint32_t signed_inf = (sign << 7) + (((1 << we) - 1) << wm); - - if((x & 0x7FFF) == 0x7C00) - return signed_inf; - if((x & 0x7C00) == 0x7C00) - return signed_inf + 1; - if(x == 0) - return 0; - if(x == 0x8000) - return 0x80; - - // uint32_t nextbit = 1<<(mfmt-wm-1); - uint32_t drop_mask = (1 << (mfmt - wm)) - 1; - - // const int max_exp = (1<= (2 << mfmt)) - { - mantissa >>= 1; - exponent++; - } - else if(mantissa >= (1 << mfmt) && exponent == 0) - { - exponent++; - } - mantissa >>= (mfmt - wm); - mantissa &= (1 << wm) - 1; - if(exponent == 31) - return (sign << 7) | 0x7B; - return (sign << 7) | (exponent << wm) | mantissa; -} - -template -MIOPEN_HIP_HOST_DEVICE uint8_t cast_to_f8(T _x, bool stoch, uint32_t rng) -{ - constexpr bool is_half = std::is_same::value; - constexpr bool is_float = std::is_same::value; - static_assert(wm + we == 7, "wm+we==7"); - static_assert(is_half || is_float, "Only half and float can be cast to f8"); - - if(sizeof(T) == 2 && we == 5 && !negative_zero_nan) - return cast_to_f8_no_range_reduce<2, 5, half>(static_cast(_x), stoch, rng); - - const int mfmt = (sizeof(T) == 4) ? 23 : 10; - uint32_t x; - if(sizeof(T) == 4) - x = *(reinterpret_cast(&_x)); // cppcheck-suppress invalidPointerCast - else - x = *(reinterpret_cast(&_x)); // cppcheck-suppress invalidPointerCast - - uint32_t head, mantissa; - int exponent, bias; - uint32_t sign; - - if(sizeof(T) == 4) - { - head = x & 0xFF800000; - mantissa = x & 0x7FFFFF; - exponent = (head >> 23) & 0xFF; - sign = head >> 31; - bias = 127; - } - else - { - head = x & 0xFC00; - mantissa = x & 0x3FF; - exponent = (head >> 10) & 0x1F; - sign = head >> 15; - bias = 15; - } - - uint32_t signed_inf = (sign << 7) + (((1 << we) - 1) << wm); - - if(negative_zero_nan) - { - if(sizeof(T) == 4) - { - if((x & 0x7F800000) == 0x7F800000) - return 0x80; - } - else - { - // if(__hisinf(x) || __hisnan(x)) - if((x & 0x7C00) == 0x7C00) - return 0x80; - } - } - else - { - if(sizeof(T) == 4) - { - if((x & 0x7F800000) == 0x7F800000) - return signed_inf + (mantissa != 0 ? 1 : 0); - } - else - { - if((x & 0x7C00) == 0x7C00) - return signed_inf + (mantissa != 0 ? 1 : 0); - } - } - if(x == 0) - return 0; - // First need to check if it is normal or denorm as there is a difference of implict 1 - // Then need to adjust the exponent to align with the F8 exponent, in the meanwhile, shift - // The mantissa. Then for stochastic rounding, add rng to mantissa and truncate. And for - // RNE, no need to add rng. Then probably need to check whether there is carry and adjust - // exponent and mantissa again - - // For IEEE bias mode, the bias is 2^(k-1) -1 where k is the width of exponent bits - const int f8_bias = (1 << (we - 1)) - 1 + (negative_zero_nan ? 1 : 0); - const int f8_denormal_act_exponent = 1 - f8_bias; // actual exponent of f8 denormal - // act_exponent is the actual exponent of fp32/fp16 (after subtracting bias) - // f8_exponent is the converted f8 exponent with bias encoding - // exponent_diff is the diff between fp32/fp16 exponent and f8 exponent, - // the difference needs to be adjusted and mantissa shifted - int act_exponent, f8_exponent, exponent_diff; - - if(exponent == 0) - { // fp32/fp16 is in denormal. - /* fp32 denormal is below 2^-127 so it is usually not a concern here, we mostly concern fp16 - here. In this case, f8 is usually in denormal. But there could be exceptions. fp16 - denormal has exponent bias 15 while bf8 with NANOO has exponent bias 16. It means that - there are some numbers in fp16 denormal but they are bf8 (NANOO) normals - smallest bf8 - (NANOO) normal is 2^-15. fp16 numbers where exponent==0 (actual exponent -14) and highest - bit of mantissa is 1 are bf8 (NANOO) normal. In this case, the fp16 mantissa should be - shift left by 1 */ - act_exponent = exponent - bias + 1; - exponent_diff = f8_denormal_act_exponent - - act_exponent; // actual exponent is exponent-bias+1 as it is denormal - } - else - { // fp32/fp16 is normal with implicit 1 - act_exponent = exponent - bias; - if(act_exponent <= f8_denormal_act_exponent) - { - /* This is the case where fp32/fp16 is normal but it is in f8 denormal range. - For example fp8 nanoo mode, denormal exponent is -7, but if the fp32/fp16 - actual exponent is -7, it is actually larger due to the implict 1, - Therefore it needs to be adjust to -6 and mantissa shift right by 1. - So for fp32/fp16, exponent -8 is the cut point to convert to fp8 nanoo */ - exponent_diff = f8_denormal_act_exponent - act_exponent; - } - else - { // both fp32/fp16 and f8 are in normal range - exponent_diff = - 0; // exponent_diff=0 does not mean there is no difference for this case, - // act_exponent could be larger. Just that it does not need shift mantissa - } - mantissa += (1 << mfmt); // Add the implicit 1 into mantissa - } - const long tmp = (mfmt - wm + exponent_diff); - if(tmp == 33) - printf("Gotcha"); - - bool midpoint = (mantissa & ((static_cast(1) << (mfmt - wm + exponent_diff)) - 1)) == - (static_cast(1) << (mfmt - wm + exponent_diff - 1)); - /* This part is a bit tricky. The judgment of whether it is a tie needs to be done before we - shift right as shift right could rip off some residual part and make something not midpoint - look like midpoint. For example, the fp16 number 0x1002 (0 00100 0000000010), it is larger - than midpoint, but after shift right by 4 bits, it would look like midpoint. - */ - - if(exponent_diff > 0) - mantissa >>= exponent_diff; - else if(exponent_diff == -1) - mantissa <<= -exponent_diff; - bool implicit_one = mantissa & (1 << mfmt); - // if there is no implict 1, it means the f8 is denormal and need to adjust to denorm exponent - f8_exponent = - (act_exponent + exponent_diff) /*actual f8 exponent*/ + f8_bias - (implicit_one ? 0 : 1); - - // Now we have the exponent and mantissa adjusted - uint32_t drop_mask = (1 << (mfmt - wm)) - 1; - bool odd = - mantissa & (1 << (mfmt - wm)); // if the least significant bit that is not truncated is 1 - mantissa += (stoch ? rng : (midpoint ? (odd ? mantissa : mantissa - 1) : mantissa)) & drop_mask; - - // Now we deal with overflow - if(f8_exponent == 0) - { - if((1 << mfmt) & mantissa) - { - f8_exponent = 1; // denormal overflow to become normal, promote exponent - } - } - else - { - if((1 << (mfmt + 1)) & mantissa) - { - mantissa >>= 1; - f8_exponent++; - } - } - - mantissa >>= (mfmt - wm); - - // above range: quantize to maximum possible float of the same sign - const int max_exp = (1 << we) - (negative_zero_nan ? 1 : 2); - if(f8_exponent > max_exp) - { - if(clip) - { - mantissa = (1 << wm) - 1; - f8_exponent = max_exp; - } - else - { - return signed_inf; - } - } - - if(f8_exponent == 0 && mantissa == 0) - return negative_zero_nan ? 0 : (sign << 7); - mantissa &= (1 << wm) - 1; - return (sign << 7) | (f8_exponent << wm) | mantissa; -} - -template -MIOPEN_HIP_HOST_DEVICE T cast_from_f8(uint8_t x) -{ - constexpr bool is_half = std::is_same::value; - constexpr bool is_float = std::is_same::value; - static_assert(is_half || is_float, "only half and float are supported"); - - constexpr int weo = is_half ? 5 : 8; - constexpr int wmo = is_half ? 10 : (is_float ? 23 : 7); - - T fInf, fNegInf, fNaN, fNeg0; - if(is_half) - { - const uint16_t ihInf = 0x7C00; - const uint16_t ihNegInf = 0xFC00; - const uint16_t ihNaN = 0x7C01; - const uint16_t ihNeg0 = 0x8000; - fInf = *(reinterpret_cast(&ihInf)); - fNegInf = *(reinterpret_cast(&ihNegInf)); - fNaN = *(reinterpret_cast(&ihNaN)); - fNeg0 = *(reinterpret_cast(&ihNeg0)); - } - else if(is_float) - { - const uint32_t ifInf = 0x7F800000; - const uint32_t ifNegInf = 0xFF800000; - const uint32_t ifNaN = 0x7F800001; - const uint32_t ifNeg0 = 0x80000000; - fInf = *(reinterpret_cast(&ifInf)); // cppcheck-suppress invalidPointerCast - fNegInf = - *(reinterpret_cast(&ifNegInf)); // cppcheck-suppress invalidPointerCast - fNaN = *(reinterpret_cast(&ifNaN)); // cppcheck-suppress invalidPointerCast - fNeg0 = *(reinterpret_cast(&ifNeg0)); // cppcheck-suppress invalidPointerCast - } - - if(x == 0) - return static_cast(0); - - uint32_t sign = x >> 7; - uint32_t mantissa = x & ((1 << wm) - 1); - int exponent = (x & 0x7F) >> wm; - if(negative_zero_nan) - { - if(x == 0x80) - return fNaN; - } - else - { - if(x == 0x80) - return fNeg0; - if(exponent == ((1 << we) - 1)) - return (mantissa == 0) ? (sign ? fNegInf : fInf) : fNaN; - } - typename std::conditional::type retval; - if(we == 5 && is_half && !negative_zero_nan) - { - retval = x << 8; - return *(reinterpret_cast(&retval)); - } - - const int exp_low_cutoff = (1 << (weo - 1)) - (1 << (we - 1)) + 1 - (negative_zero_nan ? 1 : 0); - - // subnormal input - if(exponent == 0) - { - // guaranteed mantissa!=0 since cases 0x0 and 0x80 are handled above - int sh = 1 + __builtin_clz(mantissa) - (32 - wm); - mantissa <<= sh; - exponent += 1 - sh; - mantissa &= ((1 << wm) - 1); - } - exponent += exp_low_cutoff - 1; - mantissa <<= wmo - wm; - - // subnormal output (occurs when T=half, we=5, negative_zero_nan=true) - if(exponent <= 0) - { - mantissa |= 1 << wmo; - mantissa >>= 1 - exponent; - exponent = 0; - } - - if(sizeof(T) == 2) - retval = (sign << 15) | (exponent << 10) | mantissa; - else - retval = (sign << 31) | (exponent << 23) | mantissa; - return *(reinterpret_cast(&retval)); -} - -} // namespace miopen_hip_f8_impl +../../kernels/hip_f8_impl.hpp \ No newline at end of file diff --git a/src/include/miopen/hip_float8.hpp b/src/include/miopen/hip_float8.hpp index dd57c9ca5b..5e16a70c91 120000 --- a/src/include/miopen/hip_float8.hpp +++ b/src/include/miopen/hip_float8.hpp @@ -1,651 +1 @@ -/******************************************************************************* - * - * MIT License - * - * Copyright (c) 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - *******************************************************************************/ -#pragma once -#ifndef MIOPEN_ENABLE_F8_DEVICE_CODE -#define MIOPEN_ENABLE_F8_DEVICE_CODE 0 -#endif - -// FP8 header version 0.4, 2021/05/11 -#if defined __HIP_PLATFORM_HCC__ && MIOPEN_ENABLE_F8_DEVICE_CODE -// MIOpen by default does not have device code in the regular compilation paths, -// therefore, when this file is used from the host side, compilation takes much -// longer. By guarding the __device__ directive we can control that such compilation -// only happens for kernels which include this file. -#define MIOPEN_HIP_HOST_DEVICE __host__ __device__ -#else -#define MIOPEN_HIP_HOST_DEVICE -#endif - -#define USE_SIMPLER_HIP_F8x8 0 - -#ifndef MIOPEN_FP8_CLIPPING -#define MIOPEN_FP8_CLIPPING 1 -#endif - -#ifndef MIOPEN_FP8_IEEE_EXPONENT_BIAS -#define MIOPEN_FP8_IEEE_EXPONENT_BIAS 1 -#endif - -namespace miopen_hip_f8_impl { - -template -MIOPEN_HIP_HOST_DEVICE uint8_t cast_to_f8(T _x, bool stoch = false, uint32_t rng = 0); - -template -MIOPEN_HIP_HOST_DEVICE T cast_from_f8(uint8_t x); - -} // namespace miopen_hip_f8_impl - -#include "hip_f8_impl.hpp" - -namespace miopen_f8 { -enum class hip_f8_type -{ - bf8 = 0, // 1:5:2 - fp8 = 1 // 1:4:3 -}; - -enum class hip_f8_rounding_mode -{ - standard, - stochastic -}; - -inline MIOPEN_HIP_HOST_DEVICE bool get_hip_f8_bias_mode() -{ -#if MIOPEN_FP8_IEEE_EXPONENT_BIAS - return false; -#else - return true; -#endif -} - -template -struct hip_f8 -{ - uint8_t data; - - // default constructor - MIOPEN_HIP_HOST_DEVICE hip_f8() = default; - - MIOPEN_HIP_HOST_DEVICE hip_f8(hip_f8 const&) = default; - - // constructor from bits - explicit MIOPEN_HIP_HOST_DEVICE hip_f8(uint8_t v) { data = v; } - - // constructor from in - explicit MIOPEN_HIP_HOST_DEVICE hip_f8(int v) : hip_f8(static_cast(v)) {} - - explicit MIOPEN_HIP_HOST_DEVICE hip_f8(double v) : hip_f8(static_cast(v)) {} - - // constructor from float - explicit MIOPEN_HIP_HOST_DEVICE - hip_f8(float v, - miopen_f8::hip_f8_rounding_mode rm = miopen_f8::hip_f8_rounding_mode::standard, - uint32_t rng = 0) - { - if(T == hip_f8_type::bf8) - { - if(get_hip_f8_bias_mode()) - { - data = miopen_hip_f8_impl::cast_to_f8<2, - 5, - float, - true /*negative_zero_nan*/, - MIOPEN_FP8_CLIPPING /*clip*/>( - v, (rm == miopen_f8::hip_f8_rounding_mode::stochastic), rng); - } - else - { - data = miopen_hip_f8_impl::cast_to_f8<2, - 5, - float, - false /*negative_zero_nan*/, - MIOPEN_FP8_CLIPPING /*clip*/>( - v, (rm == miopen_f8::hip_f8_rounding_mode::stochastic), rng); - } - } - else /* fp8*/ - { - if(get_hip_f8_bias_mode()) - { - data = miopen_hip_f8_impl::cast_to_f8<3, - 4, - float, - true /*negative_zero_nan*/, - MIOPEN_FP8_CLIPPING /*clip*/>( - v, (rm == miopen_f8::hip_f8_rounding_mode::stochastic), rng); - } - else - { - data = miopen_hip_f8_impl::cast_to_f8<3, - 4, - float, - false /*negative_zero_nan*/, - MIOPEN_FP8_CLIPPING /*clip*/>( - v, (rm == miopen_f8::hip_f8_rounding_mode::stochastic), rng); - } - } - } - - // constructor from half - explicit MIOPEN_HIP_HOST_DEVICE - hip_f8(half v, - miopen_f8::hip_f8_rounding_mode rm = miopen_f8::hip_f8_rounding_mode::standard, - uint32_t rng = 0) - { - if(T == hip_f8_type::bf8) - { - if(get_hip_f8_bias_mode()) - { - data = miopen_hip_f8_impl::cast_to_f8<2, - 5, - half, - true /*negative_zero_nan*/, - MIOPEN_FP8_CLIPPING /*clip*/>( - v, (rm == miopen_f8::hip_f8_rounding_mode::stochastic), rng); - } - else - { - data = miopen_hip_f8_impl::cast_to_f8<2, - 5, - half, - false /*negative_zero_nan*/, - MIOPEN_FP8_CLIPPING /*clip*/>( - v, (rm == miopen_f8::hip_f8_rounding_mode::stochastic), rng); - } - } - else /* fp8*/ - { - if(get_hip_f8_bias_mode()) - { - data = miopen_hip_f8_impl::cast_to_f8<3, - 4, - half, - true /*negative_zero_nan*/, - MIOPEN_FP8_CLIPPING /*clip*/>( - v, (rm == miopen_f8::hip_f8_rounding_mode::stochastic), rng); - } - else - { - data = miopen_hip_f8_impl::cast_to_f8<3, - 4, - half, - false /*negative_zero_nan*/, - MIOPEN_FP8_CLIPPING /*clip*/>( - v, (rm == miopen_f8::hip_f8_rounding_mode::stochastic), rng); - } - } - } - template - explicit MIOPEN_HIP_HOST_DEVICE - hip_f8(hip_f8 v, - miopen_f8::hip_f8_rounding_mode rm = miopen_f8::hip_f8_rounding_mode::standard, - uint32_t rng = 0) - { - if(T == U) - { - data = v.data; - } - else - { - const auto tmp = static_cast(v); - const auto tmp2 = hip_f8(tmp, rm, rng); - data = tmp2.data; - } - } - - explicit MIOPEN_HIP_HOST_DEVICE hip_f8(hip_f8 v, hip_f8_rounding_mode, uint32_t) - { - this->data = v.data; - } - - // constructor from hip_bfloat16 - explicit MIOPEN_HIP_HOST_DEVICE - hip_f8(hip_bfloat16 v, - hip_f8_rounding_mode r = miopen_f8::hip_f8_rounding_mode::standard, - uint32_t rng = 0); - - MIOPEN_HIP_HOST_DEVICE - hip_f8& operator*=(const hip_f8& rhs) - { - const auto tmp = static_cast(*this) * static_cast(rhs); - *this = static_cast(tmp); - return *this; - } - - MIOPEN_HIP_HOST_DEVICE - hip_f8& operator+=(const hip_f8& rhs) - { - const auto tmp = static_cast(*this) + static_cast(rhs); - *this = static_cast(tmp); - return *this; - } - - MIOPEN_HIP_HOST_DEVICE - hip_f8& operator-=(const hip_f8& rhs) - { - const auto tmp = static_cast(*this) - static_cast(rhs); - *this = static_cast(tmp); - return *this; - } - - inline MIOPEN_HIP_HOST_DEVICE hip_f8& operator=(const hip_f8& rhs) - { - if(&rhs != this) - this->data = rhs.data; - return *this; - } - - inline MIOPEN_HIP_HOST_DEVICE bool operator==(const hip_f8& rhs) const - { - if((rhs.is_zero() && this->is_zero()) || - (fabs(rhs - *this) < std::numeric_limits>::epsilon())) - return true; - else if(rhs.is_nan() || rhs.is_inf() || this->is_nan() || this->is_inf()) - return false; - - return false; - } - - inline MIOPEN_HIP_HOST_DEVICE bool operator<(const hip_f8& rhs) const - { - const auto we = static_cast(*this); - const auto them = static_cast(rhs); - return we < them; - } - - inline MIOPEN_HIP_HOST_DEVICE bool operator>(const hip_f8& rhs) const - { - const auto we = static_cast(*this); - const auto them = static_cast(rhs); - return we > them; - } -#if 0 - /*explicit*/ inline MIOPEN_HIP_HOST_DEVICE operator double() - { - // float tmp = static_cast(*this); - // return tmp; - } - - /*explicit*/ inline MIOPEN_HIP_HOST_DEVICE operator double() const - { - // float tmp = static_cast(*this); - // return tmp; - } -#endif - // convert to float - /*explicit*/ inline MIOPEN_HIP_HOST_DEVICE operator float() const - { - if(T == hip_f8_type::bf8) - { - if(get_hip_f8_bias_mode()) - { - return miopen_hip_f8_impl::cast_from_f8<2, 5, float, true /*negative_zero_nan*/>( - data); - } - else - { - return miopen_hip_f8_impl::cast_from_f8<2, 5, float, false /*negative_zero_nan*/>( - data); - } - } - else /* fp8*/ - { - if(get_hip_f8_bias_mode()) - { - return miopen_hip_f8_impl::cast_from_f8<3, 4, float, true /*negative_zero_nan*/>( - data); - } - else - { - return miopen_hip_f8_impl::cast_from_f8<3, 4, float, false /*negative_zero_nan*/>( - data); - } - } - } - - // convert to half - /*explicit*/ inline MIOPEN_HIP_HOST_DEVICE operator half() const - { - if(T == hip_f8_type::bf8) - { - if(get_hip_f8_bias_mode()) - { - return miopen_hip_f8_impl::cast_from_f8<2, 5, half, true /*negative_zero_nan*/>( - data); - } - else - { - return miopen_hip_f8_impl::cast_from_f8<2, 5, half, false /*negative_zero_nan*/>( - data); - } - } - else /* fp8*/ - { - if(get_hip_f8_bias_mode()) - { - return miopen_hip_f8_impl::cast_from_f8<3, 4, half, true /*negative_zero_nan*/>( - data); - } - else - { - return miopen_hip_f8_impl::cast_from_f8<3, 4, half, false /*negative_zero_nan*/>( - data); - } - } - } - - // convert to hip_bfloat16 - /*explicit*/ inline MIOPEN_HIP_HOST_DEVICE operator hip_bfloat16() const; - - // check for zero - inline MIOPEN_HIP_HOST_DEVICE bool is_zero() const - { - if(get_hip_f8_bias_mode()) - { - return data == 0x00; - } - else - { - return (data == 0x00) || (data == 0x80); - } - } - - // check for nan - inline MIOPEN_HIP_HOST_DEVICE bool is_nan() const - { - if(get_hip_f8_bias_mode()) - { - return data == 0x80; - } - else - { - if(T == hip_f8_type::bf8) - { - return (data == 0x7d) || (data == 0x7e) || (data == 0x7f) || (data == 0xfd) || - (data == 0xfe) || (data == 0xff); - } - else - { - return (data == 0x79) || (data == 0x7a) || (data == 0x7b) || (data == 0x7c) || - (data == 0x7d) || (data == 0x7e) || (data == 0x7f) || (data == 0xf9) || - (data == 0xfa) || (data == 0xfb) || (data == 0xfc) || (data == 0xfd) || - (data == 0xfe) || (data == 0xff); - } - } - } - - // check for inf - inline MIOPEN_HIP_HOST_DEVICE bool is_inf() const - { - if(get_hip_f8_bias_mode()) - { - return data == 0x80; - } - else - { - if(T == hip_f8_type::bf8) - { - return (data == 0x7c) || (data == 0xfc); - } - else - { - return (data == 0x78) || (data == 0xf8); - } - } - } -}; // end of class hip_f8 - -template -inline MIOPEN_HIP_HOST_DEVICE miopen_f8::hip_f8 operator*(miopen_f8::hip_f8 lhs, - const miopen_f8::hip_f8& rhs) -{ - lhs *= rhs; - return lhs; -} - -template -inline MIOPEN_HIP_HOST_DEVICE miopen_f8::hip_f8 operator+(miopen_f8::hip_f8 lhs, - const miopen_f8::hip_f8& rhs) -{ - lhs += rhs; - return lhs; -} - -template -inline MIOPEN_HIP_HOST_DEVICE miopen_f8::hip_f8 operator-(miopen_f8::hip_f8 lhs, - const miopen_f8::hip_f8& rhs) -{ - lhs -= rhs; - return lhs; -} - -template -inline MIOPEN_HIP_HOST_DEVICE miopen_f8::hip_f8 operator-(U lhs, const miopen_f8::hip_f8& rhs) -{ - const auto tmp = static_cast(rhs); - return static_cast>(lhs - tmp); -} - -template -inline MIOPEN_HIP_HOST_DEVICE bool operator<(const miopen_f8::hip_f8& lhs, - const miopen_f8::hip_f8& rhs) -{ - return static_cast(lhs) < static_cast(rhs); -} - -template -inline MIOPEN_HIP_HOST_DEVICE bool operator>(const miopen_f8::hip_f8& lhs, - const miopen_f8::hip_f8& rhs) -{ - return static_cast(lhs) > static_cast(rhs); -} - -template -inline MIOPEN_HIP_HOST_DEVICE miopen_f8::hip_f8 fabs(miopen_f8::hip_f8 v) -{ - v.data = v.data & 0x7f; - return v; -} -template -MIOPEN_HIP_HOST_DEVICE T F8_Max() -{ - union - { - uint8_t bits; - T value; - } x; - - x.bits = 0x7F; - return x.value; -} -} // namespace miopen_f8 - -// define numeric limits for the new data type -namespace std { -inline bool isfinite(miopen_f8::hip_f8 x) // NOLINT -{ - return x.is_inf(); -} - -inline bool isfinite(miopen_f8::hip_f8 x) // NOLINT -{ - return x.is_inf(); -} - -template <> -class numeric_limits> -{ -public: - static MIOPEN_HIP_HOST_DEVICE miopen_f8::hip_f8 epsilon() - { - return static_cast>(float(0.0625)); - } - - static MIOPEN_HIP_HOST_DEVICE miopen_f8::hip_f8 quiet_NaN() - { - return static_cast>( - static_cast(miopen_f8::get_hip_f8_bias_mode() ? 0X80 : 0x79)); - } - - static MIOPEN_HIP_HOST_DEVICE miopen_f8::hip_f8 max() - { - return miopen_f8::F8_Max>(); - } - - static MIOPEN_HIP_HOST_DEVICE miopen_f8::hip_f8 min() - { - return static_cast>(-1.0f) * - miopen_f8::F8_Max>(); - } -}; - -template <> -class numeric_limits> -{ -public: - static MIOPEN_HIP_HOST_DEVICE miopen_f8::hip_f8 epsilon() - { - return static_cast>(float(0.125)); - } - - static MIOPEN_HIP_HOST_DEVICE miopen_f8::hip_f8 quiet_NaN() - { - return static_cast>( - static_cast(miopen_f8::get_hip_f8_bias_mode() ? 0X80 : 0x7d)); - } - - static MIOPEN_HIP_HOST_DEVICE miopen_f8::hip_f8 max() - { - return static_cast>( - miopen_f8::F8_Max>()); - } - static MIOPEN_HIP_HOST_DEVICE miopen_f8::hip_f8 min() - { - return static_cast>(-1.0f) * - miopen_f8::F8_Max>(); - } -}; - -} // namespace std - -template -struct hip_f8x4 -{ - // define some convenience types - using float32x2 = float __attribute__((ext_vector_type(2))); - using float32x4 = float __attribute__((ext_vector_type(4))); - - using halfx2 = _Float16 __attribute__((ext_vector_type(2))); - using halfx4 = _Float16 __attribute__((ext_vector_type(4))); - - using hip_bfloat16x2 = uint16_t __attribute__((ext_vector_type(2))); - using hip_bfloat16x4 = uint16_t __attribute__((ext_vector_type(4))); - - uint32_t data; - - // default constructor - MIOPEN_HIP_HOST_DEVICE hip_f8x4() = default; - - // constructor from bits - MIOPEN_HIP_HOST_DEVICE hip_f8x4(uint32_t v); - - // constructor from float - MIOPEN_HIP_HOST_DEVICE - hip_f8x4(float v0, - float v1 = 0, - float v2 = 0, - float v3 = 0, - miopen_f8::hip_f8_rounding_mode rm = miopen_f8::hip_f8_rounding_mode::standard, - uint32_t rng = 0); - MIOPEN_HIP_HOST_DEVICE - hip_f8x4(float32x2 v, - miopen_f8::hip_f8_rounding_mode rm = miopen_f8::hip_f8_rounding_mode::standard, - uint32_t rng = 0); - MIOPEN_HIP_HOST_DEVICE - hip_f8x4(float32x4 v, - miopen_f8::hip_f8_rounding_mode rm = miopen_f8::hip_f8_rounding_mode::standard, - uint32_t rng = 0); - - // constructor from half - MIOPEN_HIP_HOST_DEVICE - hip_f8x4(half v0, - half v1 = {}, - half v2 = {}, - half v3 = {}, - miopen_f8::hip_f8_rounding_mode rm = miopen_f8::hip_f8_rounding_mode::standard, - uint32_t rng = 0); - MIOPEN_HIP_HOST_DEVICE - hip_f8x4(halfx2 v, - miopen_f8::hip_f8_rounding_mode rm = miopen_f8::hip_f8_rounding_mode::standard, - uint32_t rng = 0); - MIOPEN_HIP_HOST_DEVICE - hip_f8x4(halfx4 v, - miopen_f8::hip_f8_rounding_mode rm = miopen_f8::hip_f8_rounding_mode::standard, - uint32_t rng = 0); - - // convert to float32x4 - inline MIOPEN_HIP_HOST_DEVICE operator float32x4() const; - - // convert to halfx4 - inline MIOPEN_HIP_HOST_DEVICE operator halfx4() const; -}; - -template -struct hip_f8x8 -{ - // define some convenience types - using f8x8 = hip_f8x4 __attribute__((ext_vector_type(2))); - - f8x8 data; - - // default constructor - MIOPEN_HIP_HOST_DEVICE hip_f8x8() = default; - - // do we need to define other constructors or any conversion routines here? -}; - -// If we do not end up needing either any constructors or conversion routines for the above type, -// then we can simplify the above type to the following -#if USE_SIMPLER_HIP_F8x8 -template -using hip_f8x8 = hip_f8x4 __attribute__((ext_vector_type(2))); -#endif - -using hip_float32x4 = float __attribute__((ext_vector_type(4))); -using hip_float32x16 = float __attribute__((ext_vector_type(16))); - -// these are device-specific and we don't expect them to exist unless we're compiling with hip-clang -// for MI300. -template -__device__ hip_float32x4 mfma_f32_16x16x32(hip_f8x8 a, hip_f8x8 b, hip_float32x4 c); - -template -__device__ hip_float32x16 mfma_f32_32x32x16(hip_f8x8 a, hip_f8x8 b, hip_float32x16 c); - -using float8 = miopen_f8::hip_f8; -using bfloat8 = miopen_f8::hip_f8; +../../kernels/hip_float8.hpp \ No newline at end of file From 7efc89cae9250aeceba4fba534996d7d4fd3f709 Mon Sep 17 00:00:00 2001 From: atamazov Date: Sat, 7 Oct 2023 01:02:08 +0300 Subject: [PATCH 4/8] rocblas-remove-int8x4-uses(05) Doc update + review comments --- docs/datatypes.md | 3 +-- include/miopen/miopen.h | 9 +++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/datatypes.md b/docs/datatypes.md index 1a46e5fd62..f5d8f28931 100644 --- a/docs/datatypes.md +++ b/docs/datatypes.md @@ -10,7 +10,7 @@ typedef enum { miopenFloat = 1, miopenInt32 = 2, miopenInt8 = 3, - miopenInt8x4 = 4, + /* Value 4 is reserved. */ miopenBFloat16 = 5, } miopenDataType_t; ``` @@ -22,7 +22,6 @@ Type descriptions: * `miopenFloat` - 32-bit floating point * `miopenInt32` - 32-bit integer, used primarily for `int8` convolution outputs * `miopenInt8` - 8-bit integer, currently only supported by `int8` convolution forward path, tensor set, tensor copy, tensor cast, tensor transform, tensor transpose, and im2col. - * `miopenInt8x4` - 8-bit 4 element vector type used primarily with `int8` convolutions forward path. * `miopenBFloat16` - brain float fp-16 (8-bit exponent, 7-bit fraction), currently only supported by convolutions, tensor set, and tensor copy. diff --git a/include/miopen/miopen.h b/include/miopen/miopen.h index fd978c6200..7121ada6bf 100644 --- a/include/miopen/miopen.h +++ b/include/miopen/miopen.h @@ -346,10 +346,11 @@ MIOPEN_DECLARE_OBJECT(miopenReduceTensorDescriptor); */ typedef enum { - miopenHalf = 0, /*!< 16-bit floating point (Fully supported) */ - miopenFloat = 1, /*!< 32-bit floating point (Fully supported) */ - miopenInt32 = 2, /*!< 32-bit int point (Partially supported) */ - miopenInt8 = 3, /*!< 8-bit int point (Partially supported) */ + miopenHalf = 0, /*!< 16-bit floating point (Fully supported) */ + miopenFloat = 1, /*!< 32-bit floating point (Fully supported) */ + miopenInt32 = 2, /*!< 32-bit int point (Partially supported) */ + miopenInt8 = 3, /*!< 8-bit int point (Partially supported) */ + // miopenInt8x4 = 4, /*!< Pack of 4x Int8 in NCHW_VECT_C format (Support discontinued) */ miopenBFloat16 = 5, /*!< 16-bit binary floating point (8-bit exponent, 7-bit fraction) (Partially supported) */ miopenDouble = 6, /*!< 64-bit floating point (Partially supported) */ From e984c54ba848cdf6db72e615f3287258211d4d8a Mon Sep 17 00:00:00 2001 From: atamazov Date: Sat, 7 Oct 2023 01:38:14 +0300 Subject: [PATCH 5/8] rocblas-remove-int8x4-uses(07) Fix build errors after merge of #2385 --- src/solver/batchnorm/backward_ck.cpp | 2 -- src/solver/batchnorm/forward_training_ck.cpp | 2 -- 2 files changed, 4 deletions(-) diff --git a/src/solver/batchnorm/backward_ck.cpp b/src/solver/batchnorm/backward_ck.cpp index fba8724990..35fb8adbad 100644 --- a/src/solver/batchnorm/backward_ck.cpp +++ b/src/solver/batchnorm/backward_ck.cpp @@ -182,7 +182,6 @@ bool BnCKBwdBackward::IsApplicable(const ExecutionContext& ctx, return CheckCKApplicability(bn_problem); case miopenInt32: case miopenInt8: - case miopenInt8x4: case miopenBFloat8: case miopenFloat8: default: MIOPEN_THROW("Unsupported datatype"); @@ -236,7 +235,6 @@ ConvSolution BnCKBwdBackward::GetSolution( return MakeAnyInvokerFactory(bn_problem); case miopenInt8: case miopenInt32: - case miopenInt8x4: case miopenBFloat8: case miopenFloat8: default: diff --git a/src/solver/batchnorm/forward_training_ck.cpp b/src/solver/batchnorm/forward_training_ck.cpp index a65cec14a9..17191a4c9e 100644 --- a/src/solver/batchnorm/forward_training_ck.cpp +++ b/src/solver/batchnorm/forward_training_ck.cpp @@ -174,7 +174,6 @@ bool BnCKFwdTraining::IsApplicable(const ExecutionContext& context, case miopenBFloat16: return CheckCKApplicability(bn_problem); case miopenInt32: case miopenInt8: - case miopenInt8x4: case miopenBFloat8: case miopenFloat8: default: MIOPEN_THROW("BnCKFwdTraining operation does not supprot this data type"); @@ -224,7 +223,6 @@ ConvSolution BnCKFwdTraining::GetSolution( case miopenBFloat16: return MakeAnyInvokerFactory(bn_problem); case miopenInt8: case miopenInt32: - case miopenInt8x4: case miopenBFloat8: case miopenFloat8: default: From 27f146a5c971ca13bd3d611f5f99f487ad8896e7 Mon Sep 17 00:00:00 2001 From: atamazov Date: Tue, 10 Oct 2023 00:23:56 +0300 Subject: [PATCH 6/8] rocblas-remove-int8x4-uses(08) Revert "rocblas-remove-int8x4-uses(02) Removed the rest of INT8x4 support (except the driver)" This reverts commit 6737d66a66206f1ad90ab9da5871ada2eeb02875. # RESOLVED Conflicts: # include/miopen/miopen.h --- driver/conv_driver.hpp | 4 ---- include/miopen/miopen.h | 2 +- src/check_numerics.cpp | 1 + src/driver_arguments.cpp | 4 +++- src/gemm_v2.cpp | 16 ++++++++-------- src/hip/general_tensor_reorder_sol.cpp | 2 ++ src/include/miopen/conv/problem_description.hpp | 1 + src/include/miopen/datatype.hpp | 5 +++++ src/include/miopen/tensor.hpp | 1 + src/include/miopen/visit_float.hpp | 1 + src/ocl/tensorocl.cpp | 6 ++++++ src/pooling_api.cpp | 1 + src/reducetensor.cpp | 3 +++ src/solver/batchnorm/forward_inference_ck.cpp | 2 ++ .../conv_ck_igemm_fwd_bias_activ_fused.cpp | 4 ++++ ...v_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp | 4 ++++ ...v_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp | 4 ++++ ...v_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp | 4 ++++ .../conv_hip_implicit_gemm_bwd_data_xdlops.cpp | 4 ++++ src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp | 4 ++++ ...conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp | 4 ++++ src/solver/gemm.cpp | 2 ++ src/solver/mlir_common.cpp | 1 + src/tensor.cpp | 1 + test/driver.hpp | 2 ++ test/gtest/conv_embed_db.cpp | 1 + test/gtest/conv_hip_igemm_xdlops.cpp | 1 + test/gtest/conv_igemm_dynamic.cpp | 1 + test/gtest/conv_igemm_mlir.cpp | 1 + test/gtest/conv_igemm_mlir_xdlops.cpp | 1 + test/gtest/conv_trans.cpp | 1 + test/gtest/db_sync.cpp | 2 ++ 32 files changed, 77 insertions(+), 14 deletions(-) diff --git a/driver/conv_driver.hpp b/driver/conv_driver.hpp index 3c28859776..5f67b83588 100644 --- a/driver/conv_driver.hpp +++ b/driver/conv_driver.hpp @@ -83,10 +83,6 @@ MIOPEN_DECLARE_ENV_VAR(MIOPEN_DRIVER_PAD_BUFFERS_2M) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DRIVER_USE_GPU_REFERENCE) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DRIVER_SUBNORM_PERCENTAGE) -// Support in the library discontinued, but left in the driver -// for reference in the future. -#define miopenInt8x4 (static_cast(4)) - #if MIOPEN_BACKEND_OPENCL #define STATUS_SUCCESS CL_SUCCESS typedef cl_int status_t; diff --git a/include/miopen/miopen.h b/include/miopen/miopen.h index 7121ada6bf..600484e392 100644 --- a/include/miopen/miopen.h +++ b/include/miopen/miopen.h @@ -350,7 +350,7 @@ typedef enum miopenFloat = 1, /*!< 32-bit floating point (Fully supported) */ miopenInt32 = 2, /*!< 32-bit int point (Partially supported) */ miopenInt8 = 3, /*!< 8-bit int point (Partially supported) */ - // miopenInt8x4 = 4, /*!< Pack of 4x Int8 in NCHW_VECT_C format (Support discontinued) */ + miopenInt8x4 = 4, /*!< Pack of four Int8 in NCHW_VECT_C format (Support discontinued) */ miopenBFloat16 = 5, /*!< 16-bit binary floating point (8-bit exponent, 7-bit fraction) (Partially supported) */ miopenDouble = 6, /*!< 64-bit floating point (Partially supported) */ diff --git a/src/check_numerics.cpp b/src/check_numerics.cpp index 12210069df..50cb0af4af 100644 --- a/src/check_numerics.cpp +++ b/src/check_numerics.cpp @@ -64,6 +64,7 @@ std::string GetKernelName(miopenDataType_t data_type) case miopenBFloat8: return {"check_numerics_bf8"}; case miopenInt32: case miopenInt8: + case miopenInt8x4: // Support discontinued. case miopenDouble: default: return {""}; } diff --git a/src/driver_arguments.cpp b/src/driver_arguments.cpp index d9e82d270a..53aa6b41ab 100644 --- a/src/driver_arguments.cpp +++ b/src/driver_arguments.cpp @@ -50,7 +50,7 @@ void ConvDataType(std::stringstream& ss, const miopen::TensorDescriptor& desc) { ss << "convbfp16"; } - else if(desc.GetType() == miopenInt8) + else if(desc.GetType() == miopenInt8 || desc.GetType() == miopenInt8x4) { ss << "convint8"; } @@ -190,6 +190,8 @@ std::string ConvArgsForMIOpenDriver(const miopen::TensorDescriptor& xDesc, ss << " -g " << convDesc.group_count; if(print_for_conv_driver) ss << " -F " << std::to_string(static_cast(conv_dir)) << " -t 1"; // clang-format on + if(xDesc.GetType() == miopenInt8x4) + ss << " -Z 1"; if(immediate_mode_solver_id.has_value()) { ss << " -S " << *immediate_mode_solver_id; diff --git a/src/gemm_v2.cpp b/src/gemm_v2.cpp index c4c6057687..bd2f916092 100644 --- a/src/gemm_v2.cpp +++ b/src/gemm_v2.cpp @@ -611,9 +611,9 @@ miopenStatus_t CallGemm(const Handle& handle, }; break; + case miopenInt8x4: case miopenDouble: { - MIOPEN_THROW(miopenStatusBadParm, - "miopenDouble data type not supported by MIOpenGEMM."); + MIOPEN_THROW(miopenStatusBadParm, "Unknown or unsupported data type."); }; break; } @@ -877,10 +877,10 @@ miopenStatus_t CallGemmStridedBatched(const Handle& handle, break; } + case miopenInt8x4: case miopenDouble: { - MIOPEN_THROW(miopenStatusBadParm, - "miopenDouble data type not supported by MIOpenGEMM."); - } + MIOPEN_THROW(miopenStatusBadParm, "Unknown or unsupported data type."); + }; break; } @@ -1141,10 +1141,10 @@ miopenStatus_t CallGemmStridedBatchedSequential(const Handle& handle, break; } + case miopenInt8x4: case miopenDouble: { - MIOPEN_THROW(miopenStatusBadParm, - "miopenDouble data type not supported by MIOpenGEMM."); - } + MIOPEN_THROW(miopenStatusBadParm, "Unknown or unsupported data type."); + }; break; } diff --git a/src/hip/general_tensor_reorder_sol.cpp b/src/hip/general_tensor_reorder_sol.cpp index ae85048f63..f78aa358f8 100644 --- a/src/hip/general_tensor_reorder_sol.cpp +++ b/src/hip/general_tensor_reorder_sol.cpp @@ -137,6 +137,8 @@ GenericReorderSolutionImpl::GenericReorderSolutionImpl(miopenDataType_t data_typ order_2(order_2_), order_3(order_3_) { + if(data_type == miopenInt8x4) + MIOPEN_THROW("These data type are not supported"); std::size_t data_size = miopen::GetTypeSize(data_type); kernel_param_heuristic = tensor_reorder::HeuristicGet(data_size, dim_0, dim_1, dim_2, dim_3); } diff --git a/src/include/miopen/conv/problem_description.hpp b/src/include/miopen/conv/problem_description.hpp index 12d6f08811..438e1d306a 100644 --- a/src/include/miopen/conv/problem_description.hpp +++ b/src/include/miopen/conv/problem_description.hpp @@ -51,6 +51,7 @@ inline std::string GetDataTypeName(miopenDataType_t data_type) case miopenFloat: return "FP32"; case miopenHalf: return "FP16"; case miopenInt8: return "INT8"; + case miopenInt8x4: return "INT8x4"; case miopenInt32: return "INT32"; case miopenBFloat16: return "BF16"; case miopenDouble: return "FP64"; diff --git a/src/include/miopen/datatype.hpp b/src/include/miopen/datatype.hpp index 2260cb5b4f..29057e3de9 100644 --- a/src/include/miopen/datatype.hpp +++ b/src/include/miopen/datatype.hpp @@ -53,6 +53,10 @@ inline std::string GetDataType(miopenDataType_t type) type_str = "bfloat16"; } break; + case miopenInt8x4: { + type_str = "UNSUPPORTED_TYPE"; + } + break; case miopenInt8: { type_str = "int8_t"; } @@ -153,6 +157,7 @@ inline KernelBuildParameters GetDataTypeKBP(miopenDataType_t type) case miopenDouble: use_fp64 = 1; break; case miopenFloat8: use_fp8 = 1; break; case miopenBFloat8: use_bfp8 = 1; break; + case miopenInt8x4: // fallthrough default: MIOPEN_THROW("Only float, half, bfloat16, int8, float8, bfloat8 data types are supported."); break; diff --git a/src/include/miopen/tensor.hpp b/src/include/miopen/tensor.hpp index 1bf2bcbaae..a133ab4adc 100644 --- a/src/include/miopen/tensor.hpp +++ b/src/include/miopen/tensor.hpp @@ -101,6 +101,7 @@ inline std::size_t GetTypeSize(miopenDataType_t d) case miopenFloat: return 4; case miopenHalf: case miopenBFloat16: return 2; + case miopenInt8x4: break; case miopenInt8: case miopenFloat8: case miopenBFloat8: return 1; diff --git a/src/include/miopen/visit_float.hpp b/src/include/miopen/visit_float.hpp index ae84b7f0a9..35e1ae6ae7 100644 --- a/src/include/miopen/visit_float.hpp +++ b/src/include/miopen/visit_float.hpp @@ -91,6 +91,7 @@ void visit_float(miopenDataType_t t, F f) f(as_float{}); break; } + case miopenInt8x4: MIOPEN_THROW("miopenInt8x4: Support discontinued."); } } diff --git a/src/ocl/tensorocl.cpp b/src/ocl/tensorocl.cpp index 6329d33f69..1e1f8b1df9 100644 --- a/src/ocl/tensorocl.cpp +++ b/src/ocl/tensorocl.cpp @@ -1944,6 +1944,7 @@ std::string GetCastTensorBuildOptionFromType(const std::string& buildOption, mio case miopenDouble: // TODO MIOPEN_THROW(miopenStatusBadParm, "miopenDouble data type not supported in cast tensor."); + case miopenInt8x4: // fallthrough default: MIOPEN_THROW(miopenStatusBadParm, "Invalid data type in cast tensor desc."); } } @@ -1967,6 +1968,11 @@ void CastTensor(const Handle& handle, MIOPEN_THROW(miopenStatusBadParm, "Tensor dimension lengths do not match."); } + if(srcDesc.GetType() == miopenInt8x4 || dstDesc.GetType() == miopenInt8x4) + { + MIOPEN_THROW(miopenStatusBadParm, "Tensor cast operation is not supported for int8x4."); + } + auto flat_descriptors = GetConsistentFlattenedTensorDescriptors(srcDesc, dstDesc); const TensorDescriptor& srcDesc_flat = std::get<0>(flat_descriptors); const TensorDescriptor& dstDesc_flat = std::get<1>(flat_descriptors); diff --git a/src/pooling_api.cpp b/src/pooling_api.cpp index a2c792f8bb..bf318f7b78 100644 --- a/src/pooling_api.cpp +++ b/src/pooling_api.cpp @@ -50,6 +50,7 @@ inline void Pooling_logging_cmd(const miopenPoolingDescriptor_t poolDesc, case miopenFloat: ss << "pool"; break; case miopenInt32: case miopenInt8: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: case miopenFloat8: diff --git a/src/reducetensor.cpp b/src/reducetensor.cpp index 6ca1067fb8..9cabbc03f6 100644 --- a/src/reducetensor.cpp +++ b/src/reducetensor.cpp @@ -211,6 +211,7 @@ inline int GetDataTypeSize(miopenDataType_t t) case miopenFloat8: case miopenBFloat8: case miopenInt8: return (1); + case miopenInt8x4: return (4); // Support discontinued. case miopenBFloat16: return (2); case miopenInt32: return (4); default: MIOPEN_THROW("Only float, half, double, bfloat16, int8 data types are supported."); @@ -267,6 +268,7 @@ inline int GetDataTypeId(miopenDataType_t t) case miopenBFloat16: return (static_cast('B')); case miopenDouble: return (static_cast('D')); case miopenInt8: + case miopenInt8x4: // Support discontinued. case miopenFloat8: case miopenBFloat8: case miopenInt32: return (static_cast('O')); @@ -306,6 +308,7 @@ static ck::DataTypeEnum_t mapDataTypeId(miopenDataType_t t) case miopenBFloat16: return DataTypeEnum_t::BFloat16; case miopenDouble: return DataTypeEnum_t::Double; case miopenInt8: return DataTypeEnum_t::Int8; + case miopenInt8x4: return DataTypeEnum_t::Int8x4; case miopenInt32: return DataTypeEnum_t::Int32; case miopenFloat8: case miopenBFloat8: diff --git a/src/solver/batchnorm/forward_inference_ck.cpp b/src/solver/batchnorm/forward_inference_ck.cpp index d1d67ddea6..ff17432a62 100644 --- a/src/solver/batchnorm/forward_inference_ck.cpp +++ b/src/solver/batchnorm/forward_inference_ck.cpp @@ -200,6 +200,7 @@ bool BnCKFwdInference::IsApplicable(const ExecutionContext& context, return (CheckCKApplicability(bn_problem) != -1); case miopenInt32: case miopenInt8: + case miopenInt8x4: // Support discontinued. case miopenFloat8: case miopenBFloat8: default: MIOPEN_THROW("Unsupported datatype"); @@ -243,6 +244,7 @@ BnCKFwdInference::GetSolution(const ExecutionContext& context, break; case miopenInt8: case miopenInt32: + case miopenInt8x4: // Support discontinued. case miopenFloat8: case miopenBFloat8: default: MIOPEN_THROW("Unsupported datatype"); diff --git a/src/solver/conv_ck_igemm_fwd_bias_activ_fused.cpp b/src/solver/conv_ck_igemm_fwd_bias_activ_fused.cpp index d9c0410d98..e2df6f8097 100644 --- a/src/solver/conv_ck_igemm_fwd_bias_activ_fused.cpp +++ b/src/solver/conv_ck_igemm_fwd_bias_activ_fused.cpp @@ -287,6 +287,7 @@ void PerformanceConfigConvCKIgemmFwdBiasActivFused::HeuristicInit( case miopenInt8: case miopenFloat: case miopenInt32: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: default: MIOPEN_THROW("Unsupported datatype"); @@ -341,6 +342,7 @@ bool PerformanceConfigConvCKIgemmFwdBiasActivFused::IsValid( case miopenInt8: case miopenFloat: case miopenInt32: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: default: MIOPEN_THROW("Unsupported datatype"); @@ -433,6 +435,7 @@ bool ConvCKIgemmFwdBiasActivFused::IsApplicable(const FusionContext& ctx, case miopenInt8: case miopenFloat: case miopenInt32: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: default: MIOPEN_THROW("Unsupported datatype"); @@ -466,6 +469,7 @@ ConvSolution ConvCKIgemmFwdBiasActivFused::GetSolution( case miopenInt8: case miopenFloat: case miopenInt32: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: default: MIOPEN_THROW("Unsupported datatype"); diff --git a/src/solver/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp index cd95c85cf6..94983c7f0e 100644 --- a/src/solver/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp @@ -210,6 +210,7 @@ void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::HeuristicInit( case miopenFloat: Init(problem); break; case miopenInt8: Init(problem); break; case miopenInt32: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -252,6 +253,7 @@ bool PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::IsValid( case miopenFloat: return CheckIsSupportCKArgs(problem); case miopenInt8: return CheckIsSupportCKArgs(problem); case miopenInt32: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -322,6 +324,7 @@ bool ConvHipImplicitGemm3DGroupBwdXdlops::IsApplicable( case miopenFloat: return CheckCKApplicability(problem); case miopenInt8: return CheckCKApplicability(problem); case miopenInt32: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -349,6 +352,7 @@ ConvSolution ConvHipImplicitGemm3DGroupBwdXdlops::GetSolution( return InitInvokerFactory, CKArgs, conv::DataInvokeParams>( problem, config.kernel_id); case miopenInt32: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: case miopenFloat8: diff --git a/src/solver/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp index 4a5ecc8a3f..a21c9ba300 100644 --- a/src/solver/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp @@ -210,6 +210,7 @@ void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::HeuristicInit( case miopenFloat: Init(problem); break; case miopenInt8: Init(problem); break; case miopenInt32: + case miopenInt8x4: // Support discontinued. case miopenFloat8: case miopenBFloat8: case miopenBFloat16: @@ -252,6 +253,7 @@ bool PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::IsValid( case miopenFloat: return CheckIsSupportCKArgs(problem); case miopenInt8: return CheckIsSupportCKArgs(problem); case miopenInt32: + case miopenInt8x4: // Support discontinued. case miopenFloat8: case miopenBFloat8: case miopenBFloat16: @@ -320,6 +322,7 @@ bool ConvHipImplicitGemm3DGroupFwdXdlops::IsApplicable( case miopenFloat: return CheckCKApplicability(problem); case miopenInt8: return CheckCKApplicability(problem); case miopenInt32: + case miopenInt8x4: // Support discontinued. case miopenFloat8: case miopenBFloat8: case miopenBFloat16: @@ -347,6 +350,7 @@ ConvSolution ConvHipImplicitGemm3DGroupFwdXdlops::GetSolution( return InitInvokerFactory, CKArgs, conv::DataInvokeParams>( problem, config.kernel_id); case miopenInt32: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: case miopenFloat8: diff --git a/src/solver/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp index d472c4181c..6225410599 100644 --- a/src/solver/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp @@ -206,6 +206,7 @@ void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::HeuristicInit( case miopenFloat: Init(problem); break; case miopenInt8: Init(problem); break; case miopenInt32: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -248,6 +249,7 @@ bool PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::IsValid( case miopenFloat: return CheckIsSupportCKArgs(problem); case miopenInt8: return CheckIsSupportCKArgs(problem); case miopenInt32: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -318,6 +320,7 @@ bool ConvHipImplicitGemm3DGroupWrwXdlops::IsApplicable( case miopenFloat: return CheckCKApplicability(problem); case miopenInt8: return CheckCKApplicability(problem); case miopenInt32: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -345,6 +348,7 @@ ConvSolution ConvHipImplicitGemm3DGroupWrwXdlops::GetSolution( return InitInvokerFactory, CKArgs, conv::WrWInvokeParams>( problem, config.kernel_id); case miopenInt32: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: diff --git a/src/solver/conv_hip_implicit_gemm_bwd_data_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_bwd_data_xdlops.cpp index d55ded78de..5aa17e75bf 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_data_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_data_xdlops.cpp @@ -172,6 +172,7 @@ void PerformanceConfigHipImplicitGemmBwdXdlops::HeuristicInit( case miopenBFloat8: case miopenInt8: case miopenInt32: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: break; } @@ -213,6 +214,7 @@ bool PerformanceConfigHipImplicitGemmBwdXdlops::IsValid( case miopenBFloat8: case miopenInt8: case miopenInt32: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: break; } @@ -292,6 +294,7 @@ bool ConvHipImplicitGemmBwdXdlops::IsApplicable( case miopenBFloat8: case miopenInt8: case miopenInt32: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: break; } @@ -315,6 +318,7 @@ ConvSolution ConvHipImplicitGemmBwdXdlops::GetSolution( problem, config.kernel_id); case miopenInt8: case miopenInt32: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: case miopenFloat8: diff --git a/src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp index 697ab33f70..7a9c322bc7 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp @@ -172,6 +172,7 @@ void PerformanceConfigHipImplicitGemmFwdXdlops::HeuristicInit( case miopenFloat8: case miopenBFloat8: case miopenInt32: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: break; } @@ -214,6 +215,7 @@ bool PerformanceConfigHipImplicitGemmFwdXdlops::IsValid( case miopenFloat8: case miopenBFloat8: case miopenInt32: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: break; } @@ -292,6 +294,7 @@ bool ConvHipImplicitGemmFwdXdlops::IsApplicable( case miopenFloat8: case miopenBFloat8: case miopenInt32: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: break; } @@ -317,6 +320,7 @@ ConvSolution ConvHipImplicitGemmFwdXdlops::GetSolution( return InitInvokerFactory, CKArgs, conv::DataInvokeParams>( problem, config.kernel_id); case miopenInt32: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: case miopenFloat8: diff --git a/src/solver/conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp index 0c5a91b627..0a8b6eeb03 100644 --- a/src/solver/conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp @@ -198,6 +198,7 @@ void PerformanceConfigHipImplicitGemmGroupFwdXdlops::HeuristicInit( case miopenFloat: Init(problem); break; case miopenInt8: Init(problem); break; case miopenInt32: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -239,6 +240,7 @@ bool PerformanceConfigHipImplicitGemmGroupFwdXdlops::IsValid( case miopenFloat: return CheckIsSupportCKArgs(problem); case miopenInt8: return CheckIsSupportCKArgs(problem); case miopenInt32: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -309,6 +311,7 @@ bool ConvHipImplicitGemmGroupFwdXdlops::IsApplicable( case miopenFloat: return CheckCKApplicability(problem); case miopenInt8: return CheckCKApplicability(problem); case miopenInt32: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenFloat8: case miopenBFloat8: @@ -336,6 +339,7 @@ ConvSolution ConvHipImplicitGemmGroupFwdXdlops::GetSolution( return InitInvokerFactory, CKArgs, conv::DataInvokeParams>( problem, config.kernel_id); case miopenInt32: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: case miopenFloat8: diff --git a/src/solver/gemm.cpp b/src/solver/gemm.cpp index 2cfbd257dc..6ae4f5dde7 100644 --- a/src/solver/gemm.cpp +++ b/src/solver/gemm.cpp @@ -89,6 +89,8 @@ bool GemmFwdBase::IsApplicable(const ExecutionContext& ctx, const auto& yDesc = problem.GetOut(); // rocBlas needs the output to be 32-bit always + if(xDesc.GetType() == miopenInt8x4) + return false; if(xDesc.GetType() == miopenInt8 // && (yDesc.GetType() != miopenFloat // && yDesc.GetType() != miopenInt32)) diff --git a/src/solver/mlir_common.cpp b/src/solver/mlir_common.cpp index e54da1ac89..4e41b8944a 100644 --- a/src/solver/mlir_common.cpp +++ b/src/solver/mlir_common.cpp @@ -57,6 +57,7 @@ static const char* DTypeName(miopenDataType_t ty) case miopenBFloat16: return "bf16"; case miopenInt32: return "i32"; case miopenInt8: return "i8"; + case miopenInt8x4: return "i8x4"; // Support discontinued. case miopenFloat8: return "fp8"; case miopenBFloat8: return "bfp8"; } diff --git a/src/tensor.cpp b/src/tensor.cpp index ba1b2ba106..df0d7c2819 100644 --- a/src/tensor.cpp +++ b/src/tensor.cpp @@ -50,6 +50,7 @@ bool IsDataTypeSupported(miopenDataType_t t) case miopenFloat8: case miopenBFloat8: case miopenInt8: + case miopenInt8x4: // Support discontinued. case miopenBFloat16: case miopenDouble: return true; } diff --git a/test/driver.hpp b/test/driver.hpp index bcbfe7d676..fd83dd1fc5 100644 --- a/test/driver.hpp +++ b/test/driver.hpp @@ -274,6 +274,7 @@ struct test_driver { case miopenHalf: ss << "--half "; break; case miopenBFloat16: ss << "--bfloat16 "; break; + case miopenInt8x4: ss << "--UNSUPPORED_TYPE "; break; case miopenInt8: ss << "--int8 "; break; case miopenInt32: ss << "--int32 "; break; case miopenFloat: ss << "--float "; break; @@ -302,6 +303,7 @@ struct test_driver { case miopenHalf: ret.emplace_back("--half"); break; case miopenBFloat16: ret.emplace_back("--bf16"); break; + case miopenInt8x4: ret.emplace_back("--UNSUPPORTED_TYPE"); break; case miopenInt8: ret.emplace_back("--int8"); break; case miopenInt32: ret.emplace_back("--int32"); break; case miopenFloat: ret.emplace_back("--float"); break; diff --git a/test/gtest/conv_embed_db.cpp b/test/gtest/conv_embed_db.cpp index 36d1e28123..4672bb0404 100644 --- a/test/gtest/conv_embed_db.cpp +++ b/test/gtest/conv_embed_db.cpp @@ -73,6 +73,7 @@ void Run2dDriver(miopenDataType_t prec) case miopenHalf: params = ConfigWithHalf::GetParam(); break; case miopenInt8: params = ConfigWithInt8::GetParam(); break; case miopenBFloat16: params = ConfigWithBFloat16::GetParam(); break; + case miopenInt8x4: // Support discontinued. case miopenInt32: case miopenFloat8: case miopenBFloat8: diff --git a/test/gtest/conv_hip_igemm_xdlops.cpp b/test/gtest/conv_hip_igemm_xdlops.cpp index 14e1738a35..3407446557 100644 --- a/test/gtest/conv_hip_igemm_xdlops.cpp +++ b/test/gtest/conv_hip_igemm_xdlops.cpp @@ -65,6 +65,7 @@ void Run2dDriver(miopenDataType_t prec) case miopenHalf: case miopenBFloat16: case miopenFloat: + case miopenInt8x4: // Support discontinued. case miopenInt32: case miopenDouble: FAIL() << "miopenHalf, miopenBFloat16, miopenFloat, miopenInt32, " diff --git a/test/gtest/conv_igemm_dynamic.cpp b/test/gtest/conv_igemm_dynamic.cpp index 6e2863eb8f..59341ac8a0 100644 --- a/test/gtest/conv_igemm_dynamic.cpp +++ b/test/gtest/conv_igemm_dynamic.cpp @@ -68,6 +68,7 @@ void Run2dDriver(miopenDataType_t prec) case miopenHalf: case miopenInt8: case miopenBFloat16: + case miopenInt8x4: // Support discontinued. case miopenInt32: case miopenDouble: case miopenFloat8: diff --git a/test/gtest/conv_igemm_mlir.cpp b/test/gtest/conv_igemm_mlir.cpp index c99d0a03c4..29d240645c 100644 --- a/test/gtest/conv_igemm_mlir.cpp +++ b/test/gtest/conv_igemm_mlir.cpp @@ -81,6 +81,7 @@ void Run2dDriver(miopenDataType_t prec) case miopenInt8: params = ConfigWithInt8::GetParam(); break; case miopenFloat: params = ConfigWithFloat::GetParam(); break; case miopenBFloat16: + case miopenInt8x4: // Support discontinued. case miopenInt32: case miopenFloat8: case miopenBFloat8: diff --git a/test/gtest/conv_igemm_mlir_xdlops.cpp b/test/gtest/conv_igemm_mlir_xdlops.cpp index ea14e3b68e..e2c5a3ed8f 100644 --- a/test/gtest/conv_igemm_mlir_xdlops.cpp +++ b/test/gtest/conv_igemm_mlir_xdlops.cpp @@ -53,6 +53,7 @@ void Run2dDriver(miopenDataType_t prec) case miopenInt8: params = ConfigWithInt8::GetParam(); break; case miopenBFloat16: case miopenFloat: + case miopenInt8x4: // Support discontinued. case miopenInt32: case miopenDouble: case miopenFloat8: diff --git a/test/gtest/conv_trans.cpp b/test/gtest/conv_trans.cpp index a5266d9f9c..185a459947 100644 --- a/test/gtest/conv_trans.cpp +++ b/test/gtest/conv_trans.cpp @@ -55,6 +55,7 @@ void Run2dDriver(miopenDataType_t prec) case miopenBFloat8: case miopenInt8: case miopenBFloat16: + case miopenInt8x4: // Support discontinued. case miopenInt32: case miopenDouble: FAIL() << "miopenHalf, miopenInt8, miopenBFloat16, miopenInt32, miopenDouble " diff --git a/test/gtest/db_sync.cpp b/test/gtest/db_sync.cpp index 13f0297724..13b647ceac 100644 --- a/test/gtest/db_sync.cpp +++ b/test/gtest/db_sync.cpp @@ -72,6 +72,8 @@ miopenDataType_t GetDataTypeFromString(const std::string& data_type) return miopenHalf; else if(data_type == "INT8") return miopenInt8; + else if(data_type == "INT8x4") + return miopenInt8x4; // Support discontinued. Maintain compatibility with old databases. else if(data_type == "INT32") return miopenInt32; else if(data_type == "BF16") From 483009288b0dff3f851f5409dd16166331bcefd1 Mon Sep 17 00:00:00 2001 From: atamazov Date: Tue, 10 Oct 2023 01:22:17 +0300 Subject: [PATCH 7/8] rocblas-remove-int8x4-uses(10) tidy fixes --- src/solver/batchnorm/backward_ck.cpp | 2 ++ src/solver/batchnorm/forward_training_ck.cpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/solver/batchnorm/backward_ck.cpp b/src/solver/batchnorm/backward_ck.cpp index 35fb8adbad..fba8724990 100644 --- a/src/solver/batchnorm/backward_ck.cpp +++ b/src/solver/batchnorm/backward_ck.cpp @@ -182,6 +182,7 @@ bool BnCKBwdBackward::IsApplicable(const ExecutionContext& ctx, return CheckCKApplicability(bn_problem); case miopenInt32: case miopenInt8: + case miopenInt8x4: case miopenBFloat8: case miopenFloat8: default: MIOPEN_THROW("Unsupported datatype"); @@ -235,6 +236,7 @@ ConvSolution BnCKBwdBackward::GetSolution( return MakeAnyInvokerFactory(bn_problem); case miopenInt8: case miopenInt32: + case miopenInt8x4: case miopenBFloat8: case miopenFloat8: default: diff --git a/src/solver/batchnorm/forward_training_ck.cpp b/src/solver/batchnorm/forward_training_ck.cpp index 17191a4c9e..a65cec14a9 100644 --- a/src/solver/batchnorm/forward_training_ck.cpp +++ b/src/solver/batchnorm/forward_training_ck.cpp @@ -174,6 +174,7 @@ bool BnCKFwdTraining::IsApplicable(const ExecutionContext& context, case miopenBFloat16: return CheckCKApplicability(bn_problem); case miopenInt32: case miopenInt8: + case miopenInt8x4: case miopenBFloat8: case miopenFloat8: default: MIOPEN_THROW("BnCKFwdTraining operation does not supprot this data type"); @@ -223,6 +224,7 @@ ConvSolution BnCKFwdTraining::GetSolution( case miopenBFloat16: return MakeAnyInvokerFactory(bn_problem); case miopenInt8: case miopenInt32: + case miopenInt8x4: case miopenBFloat8: case miopenFloat8: default: From 676da096edec3cc26e6ac8f8c3e49cd48280e9b1 Mon Sep 17 00:00:00 2001 From: atamazov Date: Tue, 10 Oct 2023 15:28:18 +0300 Subject: [PATCH 8/8] rocblas-remove-int8x4-uses(11) format --- include/miopen/miopen.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/miopen/miopen.h b/include/miopen/miopen.h index 600484e392..7a85ec5875 100644 --- a/include/miopen/miopen.h +++ b/include/miopen/miopen.h @@ -346,10 +346,10 @@ MIOPEN_DECLARE_OBJECT(miopenReduceTensorDescriptor); */ typedef enum { - miopenHalf = 0, /*!< 16-bit floating point (Fully supported) */ - miopenFloat = 1, /*!< 32-bit floating point (Fully supported) */ - miopenInt32 = 2, /*!< 32-bit int point (Partially supported) */ - miopenInt8 = 3, /*!< 8-bit int point (Partially supported) */ + miopenHalf = 0, /*!< 16-bit floating point (Fully supported) */ + miopenFloat = 1, /*!< 32-bit floating point (Fully supported) */ + miopenInt32 = 2, /*!< 32-bit int point (Partially supported) */ + miopenInt8 = 3, /*!< 8-bit int point (Partially supported) */ miopenInt8x4 = 4, /*!< Pack of four Int8 in NCHW_VECT_C format (Support discontinued) */ miopenBFloat16 = 5, /*!< 16-bit binary floating point (8-bit exponent, 7-bit fraction) (Partially supported) */