From d666d727d6a3625399e3ea1ea6f222656b063536 Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Mon, 17 May 2021 07:11:59 +0000 Subject: [PATCH 01/20] Add miopenDouble data type ID --- include/miopen/miopen.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/miopen/miopen.h b/include/miopen/miopen.h index 7741162457..9c44779362 100644 --- a/include/miopen/miopen.h +++ b/include/miopen/miopen.h @@ -334,6 +334,7 @@ typedef enum { 4, /*!< Pack of four 8-bit int points in NCHW_VECT_C format (Partially supported) */ miopenBFloat16 = 5, /*!< 16-bit binary floating point (8-bit exponent, 7-bit fraction) (Partially supported) */ + miopenDouble = 6, /*!< 64-bit floating point (Partially supported) */ } miopenDataType_t; /*! @ingroup pooling From 031d47c4dcec07530fe659619c55fa6679c8cf8c Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Mon, 17 May 2021 07:14:59 +0000 Subject: [PATCH 02/20] Update the miopenReduceTensor() host C++ interface implementation to support Reduction on Double --- src/reducetensor.cpp | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/reducetensor.cpp b/src/reducetensor.cpp index c268069bfd..6721a493aa 100644 --- a/src/reducetensor.cpp +++ b/src/reducetensor.cpp @@ -224,12 +224,13 @@ inline int GetDataTypeSize(miopenDataType_t t) { case miopenHalf: return (2); case miopenFloat: return (4); + case miopenDouble: return (8); case miopenInt8: return (1); case miopenInt8x4: return (4); case miopenBFloat16: return (2); case miopenInt32: return (4); default: - MIOPEN_THROW("Only float, half, bfloat16, int8, int8x4 data type is supported."); + MIOPEN_THROW("Only float, half, double, bfloat16, int8, int8x4 data type is supported."); break; }; }; @@ -241,6 +242,7 @@ inline int GetDataTypeId(miopenDataType_t t) case miopenHalf: return (static_cast('H')); case miopenFloat: return (static_cast('F')); case miopenBFloat16: return (static_cast('B')); + case miopenDouble: return (static_cast('D')); case miopenInt8: case miopenInt8x4: case miopenInt32: return (static_cast('O')); @@ -568,6 +570,13 @@ void ReduceTensorDescriptor::ReduceTensor(const Handle& handle, #if WORKAROUND_MIOPEN_ISSUE_557 if(StartsWith(handle.GetDeviceName(), "gfx10")) param += " -DCK_USE_AMD_BUFFER_ADDRESSING=0 "; + else if(srcDataType == miopenDouble) + // TODO: support from composable kernel utility for using AMD Buffer Addressing for double + param += " -DCK_USE_AMD_BUFFER_ADDRESSING=0 "; +#else + if(srcDataType == miopenDouble) + // TODO: support from composable kernel utility for using AMD Buffer Addressing for double + param += " -DCK_USE_AMD_BUFFER_ADDRESSING=0 "; #endif std::string program_name = "gridwise_generic_reduction.cpp"; @@ -590,8 +599,12 @@ void ReduceTensorDescriptor::ReduceTensor(const Handle& handle, const std::vector vgd_1 = { static_cast(gridSize * blockSize), size_t{1}, size_t{1}}; - float alphaVal = *reinterpret_cast(alpha); - float betaVal = *reinterpret_cast(beta); + float alphaVal = (srcDataType == miopenDouble) + ? static_cast(*reinterpret_cast(alpha)) + : *reinterpret_cast(alpha); + float betaVal = (srcDataType == miopenDouble) + ? static_cast(*reinterpret_cast(beta)) + : *reinterpret_cast(beta); handle.AddKernel(algo_name, network_config, program_name, kernel_name1, vld_1, vgd_1, param)( alphaVal, A, betaVal, C, ws_buf1_global, ws_buf2_bytes_offset, indices); From 0a1fbff1eb2e9d0e43135e0344d94aa21efbe946 Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Mon, 17 May 2021 07:17:36 +0000 Subject: [PATCH 03/20] Tiny update in reduction kernel layer to use built-in shuffle for Double --- .../include/kernel_algorithm/reduction_functions.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/kernels/composable_kernel/include/kernel_algorithm/reduction_functions.hpp b/src/kernels/composable_kernel/include/kernel_algorithm/reduction_functions.hpp index af4a5ea356..4ed6bb6fd2 100644 --- a/src/kernels/composable_kernel/include/kernel_algorithm/reduction_functions.hpp +++ b/src/kernels/composable_kernel/include/kernel_algorithm/reduction_functions.hpp @@ -167,7 +167,8 @@ struct WarpReduce { using compType = typename opReduce::dataType; using binop = detail::binop_with_nan_check; - constexpr static bool have_builtin_shuffle = std::is_same::value; + constexpr static bool have_builtin_shuffle = + std::is_same::value || std::is_same::value; // This interface does not accumulate on indices __device__ static void Reduce(const DataType* p_thread_buffer, compType& accuData) From fbc28c4e35b85cdab83d2a45062d869c3a1c25f4 Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Mon, 17 May 2021 07:36:27 +0000 Subject: [PATCH 04/20] Update in reduce_driver to support Reduction on Double --- driver/driver.hpp | 4 +-- driver/main.cpp | 4 +++ driver/miopen_Reduction.hpp | 5 ++-- driver/reduce_driver.hpp | 56 +++++++++++++++++++++++++++---------- 4 files changed, 50 insertions(+), 19 deletions(-) diff --git a/driver/driver.hpp b/driver/driver.hpp index 35ebcc7a80..793b9bf013 100644 --- a/driver/driver.hpp +++ b/driver/driver.hpp @@ -130,7 +130,7 @@ void PadBufferSize(size_t& sz, int datatype_sz) printf( "Supported Base Arguments: conv[fp16|int8|bfp16], CBAInfer[fp16], pool[fp16], lrn[fp16], " "activ[fp16], softmax[fp16], bnorm[fp16], rnn[fp16], gemm, ctc, dropout[fp16], " - "tensorop[fp16], reduce[fp16]\n"); + "tensorop[fp16], reduce[fp16,fp64]\n"); exit(0); } @@ -150,7 +150,7 @@ std::string ParseBaseArg(int argc, char* argv[]) arg != "softmax" && arg != "softmaxfp16" && arg != "bnorm" && arg != "bnormfp16" && arg != "rnn" && arg != "rnnfp16" && arg != "gemm" /*&& arg != "gemmfp16"*/ && arg != "ctc" && arg != "dropout" && arg != "dropoutfp16" && arg != "tensorop" && arg != "tensoropfp16" && - arg != "reduce" && arg != "reducefp16" && arg != "--version") + arg != "reduce" && arg != "reducefp16" && arg != "reducefp64" && arg != "--version") { printf("Invalid Base Input Argument\n"); Usage(); diff --git a/driver/main.cpp b/driver/main.cpp index 5d876d0f6d..7859c210e5 100644 --- a/driver/main.cpp +++ b/driver/main.cpp @@ -174,6 +174,10 @@ int main(int argc, char* argv[]) { drv = new ReduceDriver(); } + else if(base_arg == "reducefp64") + { + drv = new ReduceDriver(); + } else { printf("Incorrect BaseArg\n"); diff --git a/driver/miopen_Reduction.hpp b/driver/miopen_Reduction.hpp index 65b84cae10..8fbf9859c8 100644 --- a/driver/miopen_Reduction.hpp +++ b/driver/miopen_Reduction.hpp @@ -90,8 +90,9 @@ class miopenReductionHost RunImpl(alpha, in_data, beta, out_data, indices); else RunImpl(alpha, in_data, beta, out_data, indices); - }; - + } + else if(compTypeVal == miopenDouble) + RunImpl(alpha, in_data, beta, out_data, indices); return; }; diff --git a/driver/reduce_driver.hpp b/driver/reduce_driver.hpp index 55ad168fb5..d28ae8fec7 100644 --- a/driver/reduce_driver.hpp +++ b/driver/reduce_driver.hpp @@ -38,11 +38,12 @@ #include #include #include -#include #include #include #include #include +#include +#include #include "random.hpp" #include "miopen_Reduction.hpp" @@ -58,7 +59,10 @@ class ReduceDriver : public Driver miopenCreateReduceTensorDescriptor(&reduceDesc); - data_type = (sizeof(Tgpu) == 4) ? miopenFloat : miopenHalf; + if(std::is_same::value) + data_type = miopenDouble; + else + data_type = (sizeof(Tgpu) == 4) ? miopenFloat : miopenHalf; } int AddCmdLineArgs() override; @@ -289,6 +293,9 @@ int ReduceDriver::SetReduceTensorDescriptorFromCmdLineArgs() (reduceOp == MIOPEN_REDUCE_TENSOR_MIN || reduceOp == MIOPEN_REDUCE_TENSOR_MAX || reduceOp == MIOPEN_REDUCE_TENSOR_AMAX); + if(std::is_same::value) + compType = miopenDouble; + return (miopenSetReduceTensorDescriptor( reduceDesc, reduceOp, compType, nanOpt, indicesOpt, indicesType)); } @@ -373,16 +380,32 @@ int ReduceDriver::RunForwardGPU() bool output_accumulate = !(reduce::float_equal_one(alpha) && reduce::float_equal_zero(beta)); + double alphaData, betaData; + + void* alphaPara = reinterpret_cast(&alphaData); + void* betaPara = reinterpret_cast(&betaData); + + if(std::is_same::value) + { + *reinterpret_cast(alphaPara) = static_cast(alpha); + *reinterpret_cast(betaPara) = static_cast(beta); + } + else + { + *reinterpret_cast(alphaPara) = alpha; + *reinterpret_cast(betaPara) = beta; + }; + miopenReduceTensor(GetHandle(), reduceDesc, this->need_indices ? indices_dev->GetMem() : nullptr, // indices this->need_indices ? indices_sizeInBytes : 0, // indices size in bytes ws_sizeInBytes > 0 ? ws_dev->GetMem() : nullptr, // workspace ws_sizeInBytes, // workspace size in bytes - &alpha, + const_cast(alphaPara), inputTensor, in_dev->GetMem(), - &beta, + const_cast(betaPara), outputTensor, out_dev->GetMem()); @@ -404,10 +427,10 @@ int ReduceDriver::RunForwardGPU() this->need_indices ? indices_sizeInBytes : 0, // indices size in bytes ws_sizeInBytes > 0 ? ws_dev->GetMem() : nullptr, // workspace ws_sizeInBytes, // workspace size in bytes - &alpha, + const_cast(alphaPara), inputTensor, in_dev->GetMem(), - &beta, + const_cast(betaPara), outputTensor, out_dev->GetMem()); } @@ -455,10 +478,8 @@ int ReduceDriver::VerifyForward() this->dimsInvariant, this->dimsToReduce); - auto alpha = - reduce::convert_type(static_cast(this->inflags.GetValueDouble("alpha"))); - auto beta = - reduce::convert_type(static_cast(this->inflags.GetValueDouble("beta"))); + auto alpha = static_cast(this->inflags.GetValueDouble("alpha")); + auto beta = static_cast(this->inflags.GetValueDouble("beta")); auto reduceOp = static_cast(inflags.GetValueInt("ReduceOp")); @@ -470,14 +491,19 @@ int ReduceDriver::VerifyForward() hostReduction.Run(alpha, in.data(), beta, outhost.data(), outhost_indices.data()); - auto error = miopen::rms_range(outhost, out); - const double tolerance = - std::is_same::value || reduceOp == MIOPEN_REDUCE_TENSOR_NORM2 ? 2e-3 - : 1.5e-4; + auto error = miopen::rms_range(outhost, out); + double tolerance = 1.5e-4; + + if(std::is_same::value) + tolerance *= 4.0; + + if(std::is_same::value && reduceOp == MIOPEN_REDUCE_TENSOR_NORM2) + tolerance *= 12.0; if(error > tolerance) { - std::cout << "ReduceTensor() Failed: " << error << "\n"; + std::cout << "ReduceTensor() Failed with error = " << error + << " , tolerance = " << tolerance << "\n"; } else { From 5bd4bc4a7e081dc358050e5a069038eb486676ca Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Mon, 17 May 2021 07:40:00 +0000 Subject: [PATCH 05/20] Update in reduce_test to support Reduction on Double --- test/driver.hpp | 17 ++++-- test/reduce_test.cpp | 114 +++++++++++++++++++++++++++++++++-------- test/tensor_holder.hpp | 5 ++ 3 files changed, 112 insertions(+), 24 deletions(-) diff --git a/test/driver.hpp b/test/driver.hpp index ce99ef2417..0730f22c2e 100644 --- a/test/driver.hpp +++ b/test/driver.hpp @@ -282,6 +282,7 @@ struct test_driver case miopenInt8: ss << "--int8 "; break; case miopenInt32: ss << "--int32 "; break; case miopenFloat: ss << "--float "; break; + case miopenDouble: ss << "--double "; break; } for(auto&& arg : this->arguments) { @@ -308,6 +309,7 @@ struct test_driver case miopenInt8: ret.emplace_back("--int8"); break; case miopenInt32: ret.emplace_back("--int32"); break; case miopenFloat: ret.emplace_back("--float"); break; + case miopenDouble: ret.emplace_back("--double"); break; } for(auto&& arg : this->arguments) @@ -1008,7 +1010,7 @@ void set_driver_datatype(Driver& d, } else if(arg_map.count("--double") > 0) { - throw std::runtime_error("Double is not supported"); + d.type = miopenDouble; } else { @@ -1113,7 +1115,11 @@ void test_drive_impl_2(std::string program_name, std::vector as) return; } - set_driver_datatype(d, arg_map); + if(program_name.find("reduce") != std::string::npos) + set_driver_datatype(d, arg_map); + else + throw std::runtime_error("Double is not supported"); + std::vector> configs = build_configs(d, arg_map, keywords); size_t config_count = configs.size(); double running_average = 0; @@ -1169,7 +1175,10 @@ void test_drive_impl_1(std::string program_name, std::vector as) } else if(arg_map.count("--double") > 0) { - throw std::runtime_error("Double is not supported"); + if(program_name.find("reduce") == std::string::npos) + throw std::runtime_error("Double is not supported"); + else + d.type = miopenDouble; } else { @@ -1310,7 +1319,7 @@ void test_drive(int argc, const char* argv[]) } if(arg == "--double") { - // test_drive_impl>(argv[0], std::move(as)); + test_drive_impl>(argv[0], std::move(as)); break; } } diff --git a/test/reduce_test.cpp b/test/reduce_test.cpp index fdab312652..4e5085959c 100644 --- a/test/reduce_test.cpp +++ b/test/reduce_test.cpp @@ -38,6 +38,7 @@ #include #include #include +#include #include "cpu_reduce_util.hpp" @@ -107,7 +108,9 @@ struct verify_reduce_with_indices results = cpuImpl(); else results = cpuImpl(); - }; + } + else if(compTypeVal == miopenDouble) + results = cpuImpl(); if(toVerifyData) { @@ -344,6 +347,22 @@ struct verify_reduce_with_indices std::size_t ws_sizeInBytes = workspace.desc.GetElementSize() * sizeof(T); std::size_t indices_sizeInBytes = indices.desc.GetElementSize() * sizeof(int); + double alphaData, betaData; + + void* alphaPara = reinterpret_cast(&alphaData); + void* betaPara = reinterpret_cast(&betaData); + + if(std::is_same::value) + { + *reinterpret_cast(alphaPara) = static_cast(alpha); + *reinterpret_cast(betaPara) = static_cast(beta); + } + else + { + *reinterpret_cast(alphaPara) = alpha; + *reinterpret_cast(betaPara) = beta; + }; + if(ws_sizeInBytes > 0) { auto workspace_dev = handle.Write(workspace.data); @@ -353,10 +372,10 @@ struct verify_reduce_with_indices indices_sizeInBytes, workspace_dev.get(), ws_sizeInBytes, - static_cast(&alpha), + const_cast(alphaPara), input.desc, input_dev.get(), - static_cast(&beta), + const_cast(betaPara), output.desc, output_dev.get()); } @@ -367,10 +386,10 @@ struct verify_reduce_with_indices indices_sizeInBytes, nullptr, 0, - static_cast(&alpha), + const_cast(alphaPara), input.desc, input_dev.get(), - static_cast(&beta), + const_cast(betaPara), output.desc, output_dev.get()); }; @@ -423,26 +442,38 @@ struct verify_reduce_no_indices nanOpt = reduce.reduceTensorNanOpt_; } - tensor cpu() + tensor cpu() { + using reduce::convert_type; + + tensor result; + if(compTypeVal == miopenFloat) { if(std::is_same::value) - return (cpuImpl()); + result = cpuImpl(); else - return (cpuImpl()); + result = cpuImpl(); } else if(compTypeVal == miopenHalf) { if(std::is_same::value) - return (cpuImpl()); + result = cpuImpl(); else if(std::is_same::value) - return (cpuImpl()); + result = cpuImpl(); else - return (cpuImpl()); - }; + result = cpuImpl(); + } + else if(compTypeVal == miopenDouble) + result = cpuImpl(); - return (tensor{}); + const auto dimLengths = output.desc.GetLengths(); + auto result_dataFloat = make_tensor(dimLengths); + + for(size_t i = 0; i < result.data.size(); i++) + result_dataFloat.data[i] = convert_type(result.data[i]); + + return (result_dataFloat); }; template @@ -593,7 +624,22 @@ struct verify_reduce_no_indices return (res); } - tensor gpu() const + tensor gpu() const + { + using reduce::convert_type; + + auto result = gpuImpl(); + + const auto dimLengths = output.desc.GetLengths(); + auto result_dataFloat = make_tensor(dimLengths); + + for(size_t i = 0; i < result.data.size(); i++) + result_dataFloat.data[i] = convert_type(result.data[i]); + + return (result_dataFloat); + }; + + tensor gpuImpl() const { auto&& handle = get_handle(); auto input_dev = handle.Write(input.data); @@ -604,6 +650,22 @@ struct verify_reduce_no_indices std::size_t ws_sizeInBytes = workspace.desc.GetElementSize() * sizeof(T); + double alphaData, betaData; + + void* alphaPara = reinterpret_cast(&alphaData); + void* betaPara = reinterpret_cast(&betaData); + + if(std::is_same::value) + { + *reinterpret_cast(alphaPara) = static_cast(alpha); + *reinterpret_cast(betaPara) = static_cast(beta); + } + else + { + *reinterpret_cast(alphaPara) = alpha; + *reinterpret_cast(betaPara) = beta; + }; + if(ws_sizeInBytes > 0) { auto workspace_dev = handle.Write(workspace.data); @@ -613,10 +675,10 @@ struct verify_reduce_no_indices 0, workspace_dev.get(), ws_sizeInBytes, - static_cast(&alpha), + static_cast(alphaPara), input.desc, input_dev.get(), - static_cast(&beta), + static_cast(betaPara), output.desc, output_dev.get()); } @@ -627,10 +689,10 @@ struct verify_reduce_no_indices 0, nullptr, 0, - static_cast(&alpha), + static_cast(alphaPara), input.desc, input_dev.get(), - static_cast(&beta), + static_cast(betaPara), output.desc, output_dev.get()); }; @@ -704,6 +766,9 @@ struct reduce_driver : test_driver { using reduce::convert_type; + if(std::is_same::value) + compTypeVal = static_cast(miopenDouble); + if(std::is_same::value) { if(reduceOp == MIOPEN_REDUCE_TENSOR_MIN || reduceOp == MIOPEN_REDUCE_TENSOR_MAX || @@ -801,7 +866,7 @@ struct reduce_driver : test_driver }; if(reduceOp == MIOPEN_REDUCE_TENSOR_MUL) - this->tolerance = 80 * 500; + this->tolerance = 80 * 300; else if(reduceOp == MIOPEN_REDUCE_TENSOR_NORM1 || reduceOp == MIOPEN_REDUCE_TENSOR_NORM2) { if(toReduceDims.size() == 4) @@ -810,6 +875,9 @@ struct reduce_driver : test_driver this->tolerance = 80 * 10; }; + if(std::is_same::value) + this->tolerance *= this->tolerance * 10.0; + auto inputTensor = (reduceOp == MIOPEN_REDUCE_TENSOR_MUL) ? tensor{this->inLengths}.generate(gen_value_2) : (need_indices || reduceOp == MIOPEN_REDUCE_TENSOR_NORM1 || @@ -858,13 +926,19 @@ int main(int argc, const char* argv[]) { std::vector as(argv + 1, argv + argc); - bool test_half = false; + bool test_half = false; + bool test_double = false; test_half = std::any_of( as.begin(), as.end(), [](const std::string& elem) { return (elem == "--half"); }); + test_double = std::any_of( + as.begin(), as.end(), [](const std::string& elem) { return (elem == "--double"); }); + if(test_half) test_drive>(argc, argv); + else if(test_double) + test_drive>(argc, argv); else test_drive>(argc, argv); }; diff --git a/test/tensor_holder.hpp b/test/tensor_holder.hpp index 1b3ccc728c..12158ec7d7 100644 --- a/test/tensor_holder.hpp +++ b/test/tensor_holder.hpp @@ -85,6 +85,11 @@ struct miopen_type : std::integral_constant +struct miopen_type : std::integral_constant +{ +}; + template <> struct miopen_type : std::integral_constant { From e18aff85cdd793b887064f48d2ce8cf96a4788fa Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Mon, 17 May 2021 12:55:06 +0000 Subject: [PATCH 06/20] Update to remove compiler warnings caused by the adding of miopenDouble --- src/gemm_v2.cpp | 23 +++++++++++++++++++ .../miopen/conv/problem_description.hpp | 1 + src/include/miopen/datatype.hpp | 5 ++++ src/include/miopen/tensor.hpp | 1 + src/include/miopen/visit_float.hpp | 5 ++++ src/ocl/tensorocl.cpp | 3 +++ src/tensor.cpp | 1 + 7 files changed, 39 insertions(+) diff --git a/src/gemm_v2.cpp b/src/gemm_v2.cpp index 31f69e10e2..9dd547f44e 100644 --- a/src/gemm_v2.cpp +++ b/src/gemm_v2.cpp @@ -297,6 +297,8 @@ miopenStatus_t CallGemmMIOpenTensile(const Handle& handle, ptrA = Data_t(reinterpret_cast(A) + a_offset); ptrB = Data_t(reinterpret_cast(B) + b_offset); ptrC = Data_t(reinterpret_cast(C) + c_offset); + case miopenDouble: + MIOPEN_THROW(miopenStatusBadParm, "miopenDouble data type not supported by MIOpenGEMM."); } if(gemm_desc.dataType == miopenInt8 || gemm_desc.dataType == miopenInt8x4) { @@ -570,6 +572,13 @@ miopenStatus_t CallGemm(const Handle& handle, 0); } break; + + case miopenDouble: + { + MIOPEN_THROW(miopenStatusBadParm, + "miopenDouble data type not supported by MIOpenGEMM."); + }; + break; } if(handle.IsProfilingEnabled()) @@ -887,6 +896,13 @@ miopenStatus_t CallGemmStridedBatched(const Handle& handle, 0); } break; + + case miopenDouble: + { + MIOPEN_THROW(miopenStatusBadParm, + "miopenDouble data type not supported by MIOpenGEMM."); + } + break; } if(handle.IsProfilingEnabled()) @@ -1123,6 +1139,13 @@ miopenStatus_t CallGemmStridedBatchedSequential(const Handle& handle, } } break; + + case miopenDouble: + { + MIOPEN_THROW(miopenStatusBadParm, + "miopenDouble data type not supported by MIOpenGEMM."); + } + break; } if(handle.IsProfilingEnabled()) diff --git a/src/include/miopen/conv/problem_description.hpp b/src/include/miopen/conv/problem_description.hpp index 2a7120a5e5..169de1197b 100644 --- a/src/include/miopen/conv/problem_description.hpp +++ b/src/include/miopen/conv/problem_description.hpp @@ -49,6 +49,7 @@ inline std::string GetDataTypeName(miopenDataType_t data_type) case miopenInt8x4: return "INT8x4"; case miopenInt32: return "INT32"; case miopenBFloat16: return "BF16"; + case miopenDouble: return "FP64"; } return "Unknown(" + std::to_string(data_type) + ")"; diff --git a/src/include/miopen/datatype.hpp b/src/include/miopen/datatype.hpp index 9d17116cf7..9c33fb5a6e 100644 --- a/src/include/miopen/datatype.hpp +++ b/src/include/miopen/datatype.hpp @@ -53,6 +53,9 @@ inline std::string GetDataType(miopenDataType_t type) case miopenInt32: { type_str = "int"; } break; + case miopenDouble: { type_str = "double"; + } + break; } return type_str; } @@ -104,6 +107,7 @@ inline std::string GetDataTypeKernelParams(miopenDataType_t type) int use_int8x4 = 0; int use_int32 = 0; int use_bfp16 = 0; + int use_fp64 = 0; const int use_rne_bfloat16 = MIOPEN_USE_RNE_BFLOAT16; switch(type) @@ -114,6 +118,7 @@ inline std::string GetDataTypeKernelParams(miopenDataType_t type) case miopenInt8x4: use_int8x4 = 1; break; case miopenBFloat16: use_bfp16 = 1; break; case miopenInt32: use_int32 = 1; break; + case miopenDouble: use_fp64 = 1; break; default: MIOPEN_THROW("Only float, half, bfloat16, int8, int8x4 data type is supported."); break; diff --git a/src/include/miopen/tensor.hpp b/src/include/miopen/tensor.hpp index 57a5a53502..db06490d77 100644 --- a/src/include/miopen/tensor.hpp +++ b/src/include/miopen/tensor.hpp @@ -99,6 +99,7 @@ inline std::size_t GetTypeSize(miopenDataType_t d) case miopenBFloat16: return 2; case miopenInt8x4: case miopenInt8: return 1; + case miopenDouble: return 8; } MIOPEN_THROW("Unknown data type"); } diff --git a/src/include/miopen/visit_float.hpp b/src/include/miopen/visit_float.hpp index 96b398eb01..ec1e394cc2 100644 --- a/src/include/miopen/visit_float.hpp +++ b/src/include/miopen/visit_float.hpp @@ -87,6 +87,11 @@ void visit_float(miopenDataType_t t, F f) f(as_float{}); break; } + case miopenDouble: + { + f(as_float{}); + break; + } } } diff --git a/src/ocl/tensorocl.cpp b/src/ocl/tensorocl.cpp index 997d64c896..b9727d3adc 100644 --- a/src/ocl/tensorocl.cpp +++ b/src/ocl/tensorocl.cpp @@ -1924,6 +1924,9 @@ std::string GetCastTensorBuildOptionFromType(const std::string& buildOption, mio case miopenHalf: return option += "2"; case miopenFloat: return option += "3"; case miopenBFloat16: return option += "4"; + case miopenDouble: + // TODO + MIOPEN_THROW(miopenStatusBadParm, "miopenDouble data type not supported in cast tensor."); case miopenInt8x4: MIOPEN_THROW(miopenStatusBadParm, "miopenInt8x4 data type not supported in cast tensor."); default: MIOPEN_THROW(miopenStatusBadParm, "Invalid data type in cast tensor desc."); diff --git a/src/tensor.cpp b/src/tensor.cpp index c8446d551e..50118dfc16 100644 --- a/src/tensor.cpp +++ b/src/tensor.cpp @@ -141,6 +141,7 @@ std::size_t TensorDescriptor::GetNumBytes() const case miopenHalf: typesize = 2; break; case miopenInt32: case miopenFloat: typesize = 4; break; + case miopenDouble: typesize = 8; break; } return typesize * this->GetElementSpace(); } From 6f849d9ae8aad2715f7bcf54a358c81eb92f7c75 Mon Sep 17 00:00:00 2001 From: Artem Tamazov Date: Tue, 18 May 2021 02:25:32 +0300 Subject: [PATCH 07/20] Formatting --- src/tensor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tensor.cpp b/src/tensor.cpp index 50118dfc16..b3f745233e 100644 --- a/src/tensor.cpp +++ b/src/tensor.cpp @@ -140,7 +140,7 @@ std::size_t TensorDescriptor::GetNumBytes() const case miopenBFloat16: case miopenHalf: typesize = 2; break; case miopenInt32: - case miopenFloat: typesize = 4; break; + case miopenFloat: typesize = 4; break; case miopenDouble: typesize = 8; break; } return typesize * this->GetElementSpace(); From 30d5faa4f7f41e5d6187be9d430b03ddcba2b5d2 Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Wed, 19 May 2021 06:36:15 +0000 Subject: [PATCH 08/20] Update to eliminate tidy-checking warnings --- driver/reduce_driver.hpp | 31 +++++++---------- src/include/miopen/datatype.hpp | 1 + src/reducetensor.cpp | 10 ++++-- test/reduce_test.cpp | 60 +++++++++++++-------------------- 4 files changed, 44 insertions(+), 58 deletions(-) diff --git a/driver/reduce_driver.hpp b/driver/reduce_driver.hpp index d28ae8fec7..180c7a3f95 100644 --- a/driver/reduce_driver.hpp +++ b/driver/reduce_driver.hpp @@ -380,21 +380,14 @@ int ReduceDriver::RunForwardGPU() bool output_accumulate = !(reduce::float_equal_one(alpha) && reduce::float_equal_zero(beta)); - double alphaData, betaData; - - void* alphaPara = reinterpret_cast(&alphaData); - void* betaPara = reinterpret_cast(&betaData); - - if(std::is_same::value) - { - *reinterpret_cast(alphaPara) = static_cast(alpha); - *reinterpret_cast(betaPara) = static_cast(beta); - } - else - { - *reinterpret_cast(alphaPara) = alpha; - *reinterpret_cast(betaPara) = beta; - }; + const double alpha64 = alpha; + const double beta64 = beta; + const void* const alphaPtr = std::is_same::value + ? reinterpret_cast(&alpha64) + : reinterpret_cast(&alpha); + const void* const betaPtr = std::is_same::value + ? reinterpret_cast(&beta64) + : reinterpret_cast(&beta); miopenReduceTensor(GetHandle(), reduceDesc, @@ -402,10 +395,10 @@ int ReduceDriver::RunForwardGPU() this->need_indices ? indices_sizeInBytes : 0, // indices size in bytes ws_sizeInBytes > 0 ? ws_dev->GetMem() : nullptr, // workspace ws_sizeInBytes, // workspace size in bytes - const_cast(alphaPara), + alphaPtr, inputTensor, in_dev->GetMem(), - const_cast(betaPara), + betaPtr, outputTensor, out_dev->GetMem()); @@ -427,10 +420,10 @@ int ReduceDriver::RunForwardGPU() this->need_indices ? indices_sizeInBytes : 0, // indices size in bytes ws_sizeInBytes > 0 ? ws_dev->GetMem() : nullptr, // workspace ws_sizeInBytes, // workspace size in bytes - const_cast(alphaPara), + alphaPtr, inputTensor, in_dev->GetMem(), - const_cast(betaPara), + betaPtr, outputTensor, out_dev->GetMem()); } diff --git a/src/include/miopen/datatype.hpp b/src/include/miopen/datatype.hpp index 9c33fb5a6e..7ff4babb26 100644 --- a/src/include/miopen/datatype.hpp +++ b/src/include/miopen/datatype.hpp @@ -132,6 +132,7 @@ inline std::string GetDataTypeKernelParams(miopenDataType_t type) ss << " -DMIOPEN_USE_BFP16=" << use_bfp16; ss << " -DMIOPEN_USE_INT32=" << use_int32; ss << " -DMIOPEN_USE_RNE_BFLOAT16=" << use_rne_bfloat16; + ss << " -DMIOPEN_USE_FP64=" << use_fp64; return ss.str(); } diff --git a/src/reducetensor.cpp b/src/reducetensor.cpp index 6721a493aa..54394bca75 100644 --- a/src/reducetensor.cpp +++ b/src/reducetensor.cpp @@ -570,9 +570,13 @@ void ReduceTensorDescriptor::ReduceTensor(const Handle& handle, #if WORKAROUND_MIOPEN_ISSUE_557 if(StartsWith(handle.GetDeviceName(), "gfx10")) param += " -DCK_USE_AMD_BUFFER_ADDRESSING=0 "; - else if(srcDataType == miopenDouble) - // TODO: support from composable kernel utility for using AMD Buffer Addressing for double - param += " -DCK_USE_AMD_BUFFER_ADDRESSING=0 "; + else + { + if(srcDataType == miopenDouble) + // TODO: support from composable kernel utility for using AMD Buffer Addressing for + // double + param += " -DCK_USE_AMD_BUFFER_ADDRESSING=0 "; + }; #else if(srcDataType == miopenDouble) // TODO: support from composable kernel utility for using AMD Buffer Addressing for double diff --git a/test/reduce_test.cpp b/test/reduce_test.cpp index 4e5085959c..6caf855675 100644 --- a/test/reduce_test.cpp +++ b/test/reduce_test.cpp @@ -347,21 +347,15 @@ struct verify_reduce_with_indices std::size_t ws_sizeInBytes = workspace.desc.GetElementSize() * sizeof(T); std::size_t indices_sizeInBytes = indices.desc.GetElementSize() * sizeof(int); - double alphaData, betaData; + const double alpha64 = alpha; + const double beta64 = beta; - void* alphaPara = reinterpret_cast(&alphaData); - void* betaPara = reinterpret_cast(&betaData); - - if(std::is_same::value) - { - *reinterpret_cast(alphaPara) = static_cast(alpha); - *reinterpret_cast(betaPara) = static_cast(beta); - } - else - { - *reinterpret_cast(alphaPara) = alpha; - *reinterpret_cast(betaPara) = beta; - }; + const void* const alphaPtr = (std::is_same::value) + ? reinterpret_cast(&alpha64) + : reinterpret_cast(&alpha); + const void* const betaPtr = (std::is_same::value) + ? reinterpret_cast(&beta64) + : reinterpret_cast(&beta); if(ws_sizeInBytes > 0) { @@ -372,10 +366,10 @@ struct verify_reduce_with_indices indices_sizeInBytes, workspace_dev.get(), ws_sizeInBytes, - const_cast(alphaPara), + alphaPtr, input.desc, input_dev.get(), - const_cast(betaPara), + betaPtr, output.desc, output_dev.get()); } @@ -386,10 +380,10 @@ struct verify_reduce_with_indices indices_sizeInBytes, nullptr, 0, - const_cast(alphaPara), + alphaPtr, input.desc, input_dev.get(), - const_cast(betaPara), + betaPtr, output.desc, output_dev.get()); }; @@ -650,21 +644,15 @@ struct verify_reduce_no_indices std::size_t ws_sizeInBytes = workspace.desc.GetElementSize() * sizeof(T); - double alphaData, betaData; + const double alpha64 = alpha; + const double beta64 = beta; - void* alphaPara = reinterpret_cast(&alphaData); - void* betaPara = reinterpret_cast(&betaData); - - if(std::is_same::value) - { - *reinterpret_cast(alphaPara) = static_cast(alpha); - *reinterpret_cast(betaPara) = static_cast(beta); - } - else - { - *reinterpret_cast(alphaPara) = alpha; - *reinterpret_cast(betaPara) = beta; - }; + const void* const alphaPtr = (std::is_same::value) + ? reinterpret_cast(&alpha64) + : reinterpret_cast(&alpha); + const void* const betaPtr = (std::is_same::value) + ? reinterpret_cast(&beta64) + : reinterpret_cast(&beta); if(ws_sizeInBytes > 0) { @@ -675,10 +663,10 @@ struct verify_reduce_no_indices 0, workspace_dev.get(), ws_sizeInBytes, - static_cast(alphaPara), + alphaPtr, input.desc, input_dev.get(), - static_cast(betaPara), + betaPtr, output.desc, output_dev.get()); } @@ -689,10 +677,10 @@ struct verify_reduce_no_indices 0, nullptr, 0, - static_cast(alphaPara), + alphaPtr, input.desc, input_dev.get(), - static_cast(betaPara), + betaPtr, output.desc, output_dev.get()); }; From ecad57f3b82347b26eecc4ef51021df90507c0ca Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Wed, 19 May 2021 10:05:44 +0000 Subject: [PATCH 09/20] Update to elminate tidy-checking warnings ... --- driver/reduce_driver.hpp | 8 ++++---- src/solver/conv_asm_1x1u.cpp | 3 +++ test/reduce_test.cpp | 16 ++++++++-------- test/tensor_vec.cpp | 6 ++++++ 4 files changed, 21 insertions(+), 12 deletions(-) diff --git a/driver/reduce_driver.hpp b/driver/reduce_driver.hpp index 180c7a3f95..9409392111 100644 --- a/driver/reduce_driver.hpp +++ b/driver/reduce_driver.hpp @@ -383,11 +383,11 @@ int ReduceDriver::RunForwardGPU() const double alpha64 = alpha; const double beta64 = beta; const void* const alphaPtr = std::is_same::value - ? reinterpret_cast(&alpha64) - : reinterpret_cast(&alpha); + ? static_cast(&alpha64) + : static_cast(&alpha); const void* const betaPtr = std::is_same::value - ? reinterpret_cast(&beta64) - : reinterpret_cast(&beta); + ? static_cast(&beta64) + : static_cast(&beta); miopenReduceTensor(GetHandle(), reduceDesc, diff --git a/src/solver/conv_asm_1x1u.cpp b/src/solver/conv_asm_1x1u.cpp index cf20612181..33f053f350 100644 --- a/src/solver/conv_asm_1x1u.cpp +++ b/src/solver/conv_asm_1x1u.cpp @@ -308,6 +308,9 @@ bool PerformanceConfigConvAsm1x1U::IsValid(const ConvolutionContext& config) con void PerformanceConfigConvAsm1x1U::HeuristicInit(const ConvolutionContext& config) { + if(config.in_data_type == miopenDouble) + MIOPEN_THROW("Double data type is not supported by ConvAsm1x1U"); + const auto elements_in_dword = 4 / GetTypeSize(config.in_data_type); read_size = 4; k_mult = 16; diff --git a/test/reduce_test.cpp b/test/reduce_test.cpp index 6caf855675..7d8ddc680c 100644 --- a/test/reduce_test.cpp +++ b/test/reduce_test.cpp @@ -351,11 +351,11 @@ struct verify_reduce_with_indices const double beta64 = beta; const void* const alphaPtr = (std::is_same::value) - ? reinterpret_cast(&alpha64) - : reinterpret_cast(&alpha); + ? static_cast(&alpha64) + : static_cast(&alpha); const void* const betaPtr = (std::is_same::value) - ? reinterpret_cast(&beta64) - : reinterpret_cast(&beta); + ? static_cast(&beta64) + : static_cast(&beta); if(ws_sizeInBytes > 0) { @@ -648,11 +648,11 @@ struct verify_reduce_no_indices const double beta64 = beta; const void* const alphaPtr = (std::is_same::value) - ? reinterpret_cast(&alpha64) - : reinterpret_cast(&alpha); + ? static_cast(&alpha64) + : static_cast(&alpha); const void* const betaPtr = (std::is_same::value) - ? reinterpret_cast(&beta64) - : reinterpret_cast(&beta); + ? static_cast(&beta64) + : static_cast(&beta); if(ws_sizeInBytes > 0) { diff --git a/test/tensor_vec.cpp b/test/tensor_vec.cpp index 39bd37a87c..e9ae254b08 100644 --- a/test/tensor_vec.cpp +++ b/test/tensor_vec.cpp @@ -319,6 +319,12 @@ struct tensor_vec_driver : test_driver return; } + if(std::is_same::value) + { + std::cout << "VEC2 transpose does not support double type" << std::endl; + return; + } + if(!(miopen::float_equal(static_cast(alpha), 1.0) && miopen::float_equal(static_cast(beta), 0.0))) return; From ac3de38dac8289f6be806b83e99977139ecc5bfc Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Wed, 19 May 2021 10:10:12 +0000 Subject: [PATCH 10/20] Tiny Fix in reduce_driver.cpp --- driver/reduce_driver.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/driver/reduce_driver.hpp b/driver/reduce_driver.hpp index 9409392111..47e9c12e61 100644 --- a/driver/reduce_driver.hpp +++ b/driver/reduce_driver.hpp @@ -374,8 +374,8 @@ int ReduceDriver::RunForwardGPU() if(this->need_indices) { - alpha = reduce::convert_type(1.0f); - beta = reduce::convert_type(0.0f); + alpha = 1.0f; + beta = 0.0f; }; bool output_accumulate = !(reduce::float_equal_one(alpha) && reduce::float_equal_zero(beta)); From 4721ccdbd5240ec3ae0ebed95c0ba8f85bf475b8 Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Thu, 20 May 2021 12:15:56 +0000 Subject: [PATCH 11/20] Tiny fix in gemm_v2.cpp --- src/gemm_v2.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gemm_v2.cpp b/src/gemm_v2.cpp index 9dd547f44e..cfcbd0680c 100644 --- a/src/gemm_v2.cpp +++ b/src/gemm_v2.cpp @@ -297,6 +297,7 @@ miopenStatus_t CallGemmMIOpenTensile(const Handle& handle, ptrA = Data_t(reinterpret_cast(A) + a_offset); ptrB = Data_t(reinterpret_cast(B) + b_offset); ptrC = Data_t(reinterpret_cast(C) + c_offset); + break; case miopenDouble: MIOPEN_THROW(miopenStatusBadParm, "miopenDouble data type not supported by MIOpenGEMM."); } From 7e3f738e2d1eb10bd16437090c2b794dd5024601 Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Fri, 21 May 2021 06:56:43 +0000 Subject: [PATCH 12/20] Add workaround for rocm 3.7 in reduce_test.cpp --- test/reduce_test.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/test/reduce_test.cpp b/test/reduce_test.cpp index 7d8ddc680c..768fb7aa7d 100644 --- a/test/reduce_test.cpp +++ b/test/reduce_test.cpp @@ -46,6 +46,10 @@ #define WORKAROUND_GPU_MEM_ACCESS_FAULT \ (HIP_PACKAGE_VERSION_MAJOR == 3 && HIP_PACKAGE_VERSION_MINOR == 7) +/// Not reproducible with ROCm 4.1 and 4.2. +#define WORKAROUND_GPU_NUMERIC_ERROR \ + (HIP_PACKAGE_VERSION_MAJOR == 3 && HIP_PACKAGE_VERSION_MINOR == 7) + template struct verify_reduce_with_indices { @@ -782,6 +786,20 @@ struct reduce_driver : test_driver } } #endif + +#if WORKAROUND_GPU_NUMERIC_ERROR + if(std::is_same::value) + { + if(inLengths == std::vector{64, 3, 280, 81} && + toReduceDims == std::vector{0, 1, 2, 3} && (reduceOp == 3 || reduceOp == 4) && + indicesOpt == 1) + { + std::cout << "Workaround: Skipping the test." << std::endl; + return; + }; + } +#endif + miopen::ReduceTensorDescriptor reduceDesc( static_cast(reduceOp), static_cast(compTypeVal), From 83c097faca88e0d63a472aff5e1a0b797f82cdef Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Thu, 27 May 2021 15:44:04 +0000 Subject: [PATCH 13/20] Fix repeated scaling tolerance for half tests in reduce_test.cpp --- test/reduce_test.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/reduce_test.cpp b/test/reduce_test.cpp index 768fb7aa7d..df9016022a 100644 --- a/test/reduce_test.cpp +++ b/test/reduce_test.cpp @@ -871,6 +871,9 @@ struct reduce_driver : test_driver return rand_upper * sign_value * rand_ratio; }; + // default tolerance (refer to driver.hpp) + this->tolerance = 80; + if(reduceOp == MIOPEN_REDUCE_TENSOR_MUL) this->tolerance = 80 * 300; else if(reduceOp == MIOPEN_REDUCE_TENSOR_NORM1 || reduceOp == MIOPEN_REDUCE_TENSOR_NORM2) From e0e2cd7a15e99671205b08871a7b5bf3113ca956 Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Fri, 28 May 2021 14:31:10 +0000 Subject: [PATCH 14/20] Fix to use unique network-config for two calls --- src/reducetensor.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/reducetensor.cpp b/src/reducetensor.cpp index 47f737534e..b3aba09049 100644 --- a/src/reducetensor.cpp +++ b/src/reducetensor.cpp @@ -622,6 +622,8 @@ void ReduceTensorDescriptor::ReduceTensor(const Handle& handle, std::string param2 = param + " -DCK_PARAM_GRIDSIZE=" + std::to_string(gridSize_2) + " "; + std::string network_config2 = network_config + "_C2"; + // compile option and network config for the second-time call const std::vector vld_2 = {static_cast(blockSize), size_t{1}, size_t{1}}; const std::vector vgd_2 = { @@ -631,7 +633,7 @@ void ReduceTensorDescriptor::ReduceTensor(const Handle& handle, std::string kernel_name2 = "gridwise_generic_reduce_2"; handle.AddKernel( - algo_name, network_config, program_name, kernel_name2, vld_2, vgd_2, param2)( + algo_name, network_config2, program_name, kernel_name2, vld_2, vgd_2, param2)( alphaVal, A, betaVal, C, ws_buf1_global, ws_buf2_bytes_offset, indices); }; }; From 4da22f43f5dee0c68ee2663f2bfa3bb139f28190 Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Wed, 2 Jun 2021 04:05:55 +0000 Subject: [PATCH 15/20] Avoid the using of -DMIOPEN_USE_FP64= when fp64 is not used --- src/include/miopen/datatype.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/include/miopen/datatype.hpp b/src/include/miopen/datatype.hpp index 7ff4babb26..8c05ecd325 100644 --- a/src/include/miopen/datatype.hpp +++ b/src/include/miopen/datatype.hpp @@ -132,7 +132,8 @@ inline std::string GetDataTypeKernelParams(miopenDataType_t type) ss << " -DMIOPEN_USE_BFP16=" << use_bfp16; ss << " -DMIOPEN_USE_INT32=" << use_int32; ss << " -DMIOPEN_USE_RNE_BFLOAT16=" << use_rne_bfloat16; - ss << " -DMIOPEN_USE_FP64=" << use_fp64; + if(use_fp64) + ss << " -DMIOPEN_USE_FP64=" << use_fp64; return ss.str(); } From bd1610bc043cf1482880be18798de7f29faca973 Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Wed, 2 Jun 2021 04:07:36 +0000 Subject: [PATCH 16/20] Avoid unnecessary runtime warning for using double in test/driver.hpp --- test/driver.hpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/test/driver.hpp b/test/driver.hpp index 0730f22c2e..11e7a020f1 100644 --- a/test/driver.hpp +++ b/test/driver.hpp @@ -1115,10 +1115,7 @@ void test_drive_impl_2(std::string program_name, std::vector as) return; } - if(program_name.find("reduce") != std::string::npos) - set_driver_datatype(d, arg_map); - else - throw std::runtime_error("Double is not supported"); + set_driver_datatype(d, arg_map); std::vector> configs = build_configs(d, arg_map, keywords); size_t config_count = configs.size(); @@ -1175,10 +1172,7 @@ void test_drive_impl_1(std::string program_name, std::vector as) } else if(arg_map.count("--double") > 0) { - if(program_name.find("reduce") == std::string::npos) - throw std::runtime_error("Double is not supported"); - else - d.type = miopenDouble; + d.type = miopenDouble; } else { From 7d61d2a6257d53bce832fe9d913cf230bfde1900 Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Wed, 2 Jun 2021 06:44:22 +0000 Subject: [PATCH 17/20] Fix for tidy --- src/include/miopen/datatype.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/miopen/datatype.hpp b/src/include/miopen/datatype.hpp index 8c05ecd325..05e4042f58 100644 --- a/src/include/miopen/datatype.hpp +++ b/src/include/miopen/datatype.hpp @@ -132,7 +132,7 @@ inline std::string GetDataTypeKernelParams(miopenDataType_t type) ss << " -DMIOPEN_USE_BFP16=" << use_bfp16; ss << " -DMIOPEN_USE_INT32=" << use_int32; ss << " -DMIOPEN_USE_RNE_BFLOAT16=" << use_rne_bfloat16; - if(use_fp64) + if(use_fp64 != 0) ss << " -DMIOPEN_USE_FP64=" << use_fp64; return ss.str(); } From eae5f7f1e5241fe4f01e84b65a2201479fee0bb2 Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Fri, 4 Jun 2021 17:11:51 +0800 Subject: [PATCH 18/20] Remove un-needed setting in test/CMakeLists.txt --- test/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8cd36dd470..396f1d214d 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -217,7 +217,6 @@ set( LONG_TESTS pooling3d.cpp soft_max.cpp lrn_test.cpp - reduce_test.cpp ) foreach(TEST ${TESTS}) From 40f3ee74db56920e944270c645e39bf5b0052353 Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Fri, 4 Jun 2021 17:32:04 +0800 Subject: [PATCH 19/20] Add custom_test for testing reduce double --- test/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 396f1d214d..17906baf34 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1052,3 +1052,7 @@ if(MIOPEN_TEST_CONV) COMMAND $ --verbose --input 1 48 7 7 --weights 1 48 5 5 --pads_strides_dilations 0 0 4 4 1 1 ) endif() + +if(MIOPEN_TEST_FLOAT) + add_custom_test(test_reduce_double SKIP_UNLESS_ALL FLOAT_ENABLED COMMAND $ --double --all) +endif() From b3626a599f2731cb55aa3e0e434006b40b1424f6 Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Fri, 4 Jun 2021 22:59:23 +0800 Subject: [PATCH 20/20] Update to the test_reduce_double custom_test --- test/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 17906baf34..3d25c23954 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1054,5 +1054,5 @@ if(MIOPEN_TEST_CONV) endif() if(MIOPEN_TEST_FLOAT) - add_custom_test(test_reduce_double SKIP_UNLESS_ALL FLOAT_ENABLED COMMAND $ --double --all) + add_custom_test(test_reduce_double SKIP_UNLESS_ALL GFX908_ENABLED COMMAND $ --double --all --verbose) endif()