Generic Tensor Reduction for Double (SWDEV-284915) #934

qianfengz · 2021-05-17T08:06:11Z

Adds miopenDouble to the public API.

Adds support for miopenReduceTensor() to be used on Tensors of double type data. All the 8 Reduction Operations which are supported with fp32/fp16 Tensors can be used with double Tensors. The alpha/beta scaling factors follow the same specification as what is used by cuddnReduceTensor(), that is, for fp32/fp16, alpha/beta should be float type passed by the application; for double, alpha/beta should be double type passed by the application.

This is expected to resolve https://ontrack-internal.amd.com/browse/SWDEV-284915.

The commits have gone through the following tests:

Regular MIOpen tests:

$ ./bin/test_reduce_test --double --all
$ ./bin/test_reduce_test --all
$ ./bin/test_reduce_test --half --all

Manual testing:

#!/bin/bash

### for fp64
bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0 -O 0 
bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0,1 -O 0 
bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0,1,2 -O 0 
bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0,1,2,3 -O 0 

bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0 -O 1 
bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0,1 -O 1 
bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0,1,2 -O 1 
bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0,1,2,3 -O 1 

bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0 -O 2 -N 0 -I 1 
bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0 -O 3 -N 0 -I 1 
bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0 -O 4 -N 0 -I 1 

bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0,1 -O 2 -N 0 -I 1 
bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0,1 -O 3 -N 0 -I 1 
bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0,1 -O 4 -N 0 -I 1 

bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0,1,2 -O 2 -N 0 -I 1 
bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0,1,2 -O 3 -N 0 -I 1 
bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0,1,2 -O 4 -N 0 -I 1 

bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0,1,2,3 -O 2 -N 0 -I 1 

## skip this on ROCm 3.7 due to unexpected numeric error
bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0,1,2,3 -O 3 -N 0 -I 1 
## skip this on ROCm 3.7 due to unexpected numeric error
bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0,1,2,3 -O 4 -N 0 -I 1 

bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0 -O 5 
bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0 -O 6 
bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0 -O 7 

bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0,1 -O 5 
bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0,1 -O 6 
bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0,1 -O 7 

bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0,1,2 -O 5 
bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0,1,2 -O 6 
bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0,1,2 -O 7 

bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0,1,2,3 -O 5 
bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0,1,2,3 -O 6 
bin/MIOpenDriver reducefp64 -D 64,3,280,81  -R 0,1,2,3 -O 7 

### for fp32
bin/MIOpenDriver reduce -D 64,3,280,81  -R 0 -O 0
bin/MIOpenDriver reduce -D 64,3,280,81  -R 0,1 -O 0 
bin/MIOpenDriver reduce -D 64,3,280,81  -R 0,1,2 -O 0 
bin/MIOpenDriver reduce -D 64,3,280,81  -R 0,1,2,3 -O 0 

bin/MIOpenDriver reduce -D 64,3,280,81  -R 0 -O 1 
bin/MIOpenDriver reduce -D 64,3,280,81  -R 0,1 -O 1 
bin/MIOpenDriver reduce -D 64,3,280,81  -R 0,1,2 -O 1 
bin/MIOpenDriver reduce -D 64,3,280,81  -R 0,1,2,3 -O 1

bin/MIOpenDriver reduce -D 64,3,280,81  -R 0 -O 2 -N 0 -I 1
bin/MIOpenDriver reduce -D 64,3,280,81  -R 0 -O 3 -N 0 -I 1
bin/MIOpenDriver reduce -D 64,3,280,81  -R 0 -O 4 -N 0 -I 1

bin/MIOpenDriver reduce -D 64,3,280,81  -R 0,1 -O 2 -N 0 -I 1
bin/MIOpenDriver reduce -D 64,3,280,81  -R 0,1 -O 3 -N 0 -I 1
bin/MIOpenDriver reduce -D 64,3,280,81  -R 0,1 -O 4 -N 0 -I 1

bin/MIOpenDriver reduce -D 64,3,280,81  -R 0,1,2 -O 2 -N 0 -I 1
bin/MIOpenDriver reduce -D 64,3,280,81  -R 0,1,2 -O 3 -N 0 -I 1
bin/MIOpenDriver reduce -D 64,3,280,81  -R 0,1,2 -O 4 -N 0 -I 1

bin/MIOpenDriver reduce -D 64,3,280,81  -R 0,1,2,3 -O 2 -N 0 -I 1
bin/MIOpenDriver reduce -D 64,3,280,81  -R 0,1,2,3 -O 3 -N 0 -I 1
bin/MIOpenDriver reduce -D 64,3,280,81  -R 0,1,2,3 -O 4 -N 0 -I 1

bin/MIOpenDriver reduce -D 64,3,280,81  -R 0 -O 5 
bin/MIOpenDriver reduce -D 64,3,280,81  -R 0 -O 6
bin/MIOpenDriver reduce -D 64,3,280,81  -R 0 -O 7 

bin/MIOpenDriver reduce -D 64,3,280,81  -R 0,1 -O 5 
bin/MIOpenDriver reduce -D 64,3,280,81  -R 0,1 -O 6 
bin/MIOpenDriver reduce -D 64,3,280,81  -R 0,1 -O 7 

bin/MIOpenDriver reduce -D 64,3,280,81  -R 0,1,2 -O 5 
bin/MIOpenDriver reduce -D 64,3,280,81  -R 0,1,2 -O 6 
bin/MIOpenDriver reduce -D 64,3,280,81  -R 0,1,2 -O 7 

bin/MIOpenDriver reduce -D 64,3,280,81  -R 0,1,2,3 -O 5
bin/MIOpenDriver reduce -D 64,3,280,81  -R 0,1,2,3 -O 6 
bin/MIOpenDriver reduce -D 64,3,280,81  -R 0,1,2,3 -O 7 

### for fp16
bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0 -O 0
bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0,1 -O 0 
bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0,1,2 -O 0 
bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0,1,2,3 -O 0 

bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0 -O 1 
bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0,1 -O 1 
bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0,1,2 -O 1 
bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0,1,2,3 -O 1

bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0 -O 2 -N 0 -I 1
bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0 -O 3 -N 0 -I 1
bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0 -O 4 -N 0 -I 1

bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0,1 -O 2 -N 0 -I 1
bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0,1 -O 3 -N 0 -I 1
bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0,1 -O 4 -N 0 -I 1

bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0,1,2 -O 2 -N 0 -I 1
bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0,1,2 -O 3 -N 0 -I 1
bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0,1,2 -O 4 -N 0 -I 1

bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0,1,2,3 -O 2 -N 0 -I 1
bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0,1,2,3 -O 3 -N 0 -I 1
bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0,1,2,3 -O 4 -N 0 -I 1

bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0 -O 5 
bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0 -O 6
bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0 -O 7 

bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0,1 -O 5 
bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0,1 -O 6 
bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0,1 -O 7 

bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0,1,2 -O 5 
bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0,1,2 -O 6 
bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0,1,2 -O 7 

bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0,1,2,3 -O 5
bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0,1,2,3 -O 6 
bin/MIOpenDriver reducefp16 -D 4,3,60,50  -R 0,1,2,3 -O 7

…support Reduction on Double

atamazov · 2021-05-18T11:24:32Z

@qianfengz Please fix tidy issues. Do not hesitate to ask for help, if needed. Thanks.

qianfengz · 2021-05-18T13:43:41Z

@qianfengz Please fix tidy issues. Do not hesitate to ask for help, if needed. Thanks.

I am trying to create a docker to be used locally for doing "make analyze". But it seems from my location, the libboost-1.72 could not be accessed in the scripts used by MIOpen/Dockerfile. Any existing docker image can be pulled to do "make analyze" ?

junliume

libboost-1.72

@qianfengz
Could you test if the following URL is not accessible?
https://boostorg.jfrog.io/artifactory/main/release/1.72.0/source/boost_1_72_0.tar.gz

test/reduce_test.cpp

atamazov · 2021-05-18T22:18:08Z

@qianfengz Please fix tidy issues. Do not hesitate to ask for help, if needed. Thanks.

I am trying to create a docker to be used locally for doing "make analyze". But it seems from my location, the libboost-1.72 could not be accessed in the scripts used by MIOpen/Dockerfile.

Any existing docker image can be pulled to do "make analyze"?

Make analyze consists of make cppcheck and make tidy.

For the former, you need to install cppcheck (see ./dev-requirements.txt)

git clone --single-branch https://github.com/danmar/cppcheck
cd cppcheck
git checkout dd05839a7e63ef04afd34711cb3e1e0ef742882f
mkdir build
cd build
cmake ..
make -j $(nproc)
make test
sudo make install

Then, remake build directory from scratch and then make cppcheck -j $(nproc)

atamazov · 2021-05-18T22:32:17Z

@qianfengz For make tidy, you can docker pull rocm/miopen:miopen-rocm-37. It has everything required for building & running MIOpen (including boost) except cppcheck. Dependencies reside in /root/driver/MLOpen/deps_hip and in /root/driver/MLOpen/deps_opencl. There, build MIOpen as usual and then make tidy -j $(nproc).

You can also install cppcheck within the container and then docker commit it and use your personalized container for both checks.

test/reduce_test.cpp

qianfengz · 2021-05-19T02:53:55Z

libboost-1.72

@qianfengz
Could you test if the following URL is not accessible?
https://boostorg.jfrog.io/artifactory/main/release/1.72.0/source/boost_1_72_0.tar.gz

I can manually download boost 1.72 and install it for building MIOpen. But for "make analyze", the issue seems be more complicated since in Dockerfile, the breaking of boost downloading also breaks other downloading/installation

qianfengz · 2021-05-19T07:07:25Z

@qianfengz For make tidy, you can docker pull rocm/miopen:miopen-rocm-37. It has everything required for building & running MIOpen (including boost) except cppcheck. Dependencies reside in /root/driver/MLOpen/deps_hip and in /root/driver/MLOpen/deps_opencl. There, build MIOpen as usual and then make tidy -j $(nproc).

You can also install cppcheck within the container and then docker commit it and use your personalized container for both checks.

Thank Artem. rocm/miopen:miopen-rocm-37 works perfectly for me.

qianfengz · 2021-05-19T13:04:51Z

One strange issue is that on rocm/miopen:miopen-rocm-37, the follow testings

bin/MIOpenDriver reducefp64 -D 64,3,280,81 -R 0,1,2,3 -O 4 -N 0 -I 1
bin/MIOpenDriver reducefp64 -D 64,3,280,81 -R 0,1,2,3 -O 3 -N 0 -I 1
bin/test_reduce_test --double --D 64 3 280 81 --R 0 1 2 3 --ReduceOp 4 --N 0 --I 1
bin/test_reduce_test --double --D 64 3 280 81 --R 0 1 2 3 --ReduceOp 3 --N 0 --I 1

failed to get correct maximum value on my MI100/MI25. While they are able to pass on ROCM 3.8, 3.10 and 4.1.1 on my M100/MI25 (installed from official repository by me).

Watching the dumped results, it can be seen that in cases of failure, the second largest value rather than the largest one is picked by the GPU kernel, for example the second largest value is 0.999997, the largest value is 0.999999 or 1.0 by different runnings.

I am doubting there are compiler issues which cause expression 0.999997 < 0.999999 return false result.

junliume · 2021-05-19T15:29:09Z

One strange issue is that on rocm/miopen:miopen-rocm-37, the follow testings

bin/MIOpenDriver reducefp64 -D 64,3,280,81 -R 0,1,2,3 -O 4 -N 0 -I 1

bin/MIOpenDriver reducefp64 -D 64,3,280,81 -R 0,1,2,3 -O 3 -N 0 -I 1

bin/test_reduce_test --double --D 64 3 280 81 --R 0 1 2 3 --ReduceOp 4 --N 0 --I 1

bin/test_reduce_test --double --D 64 3 280 81 --R 0 1 2 3 --ReduceOp 3 --N 0 --I 1

failed to get correct maximum value on my MI100/MI25. While they are able to pass on ROCM 3.8, 3.10 and 4.1.1 on my M100/MI25 (installed from official repository by me).

Watching the dumped results, it can be seen that in cases of failure, the second largest value rather than the largest one is picked by the GPU kernel, for example the second largest value is 0.999997, the largest value is 0.999999 or 1.0 by different runnings.

I am doubting there are compiler issues which cause expression 0.999997 < 0.999999 return false result.

So the new feature revealed an issue only found in previous version of ROCm? I think all CI nodes have already updated to 4.1 at least, and I doubt if they will patch issues for earlier version of compiler, especially if it is already "fixed" in later versions. :)

atamazov · 2021-05-19T16:25:14Z

@junliume

I think all CI nodes have already updated to 4.1

No, our CI nodes are running 3.7. @shurale-nkn is working on the dockerfile upgrade to 4.2.

atamazov · 2021-05-19T16:30:52Z

@qianfengz

I am doubting there are compiler issues which cause expression 0.999997 < 0.999999 return false result.

Can you please check with rocm 4.2 release? Please see confluence where to pull it from.

atamazov · 2021-05-19T16:40:20Z

@qianfengz If everything works fine with 4.2, then we can disable failing test cases (workaround) for 3.7 and forget about this problem.

qianfengz · 2021-05-20T11:57:45Z

@qianfengz If everything works fine with 4.2, then we can disable failing test cases (workaround) for 3.7 and forget about this problem.

Wait some time, I need download 4.2 from home, could not access repo.radeon.com from the office.

atamazov · 2021-05-20T19:36:43Z

@qianfengz

Wait some time, I need download 4.2 from home, could not access repo.radeon.com from the office.

You can pull docker image of 4.2 from compute-artifactory.amd.com:5000/rocm-plus-docker/compute-rocm-rel-4.2:21-STG2.

atamazov

Almost good!

src/include/miopen/datatype.hpp

test/driver.hpp

test/reduce_test.cpp

atamazov

LGTM!

shurale-nkn

As I can see, a new data type has been added here, which means need to add tests for reduction with double type. But since we don't have testing stage for double on jenkins, need add custom test with reduce_test --all and attach it for full fp32 stages on target GPUs.

atamazov · 2021-06-03T18:00:20Z

need add custom test with reduce_test --all and attach it for full fp32 stages on target GPUs.

Theoretically, we should add MIOPEN_TEST_DOUBLE stages, but yes, it seems too expensive to add two stages only to test reduction. Let's add custom reduction tests for double to FULL FP32 stages.

atamazov · 2021-06-03T18:04:37Z

@qianfengz Please merge from develop and add tests. Do not hesitate to ask me if you have any questions or need assistance. Thanks.

qianfengz · 2021-06-04T08:40:25Z

@qianfengz Please merge from develop and add tests. Do not hesitate to ask me if you have any questions or need assistance. Thanks.

So my understand is that since there is no stage/paralleled sub-stage for double defined in the Jekinsfile, the bin/test_reduce_test --double --all has no chance to be executed during the CI. And so you want me to add bin/test_reduce_test --double --all using add_custom_test method so that it can be executed when MIOPEN_TEST_FLOAT and MIOPEN_TEST_ALL are enabled by some stages/sub-stages. Is this right ?

test/CMakeLists.txt

atamazov · 2021-06-04T12:11:53Z

test/CMakeLists.txt

+
+if(MIOPEN_TEST_FLOAT)
+    add_custom_test(test_reduce_double SKIP_UNLESS_ALL FLOAT_ENABLED COMMAND  $<TARGET_FILE:test_reduce_test> --double --all)
+endif()


Custom tests are enabled for FLOAT by default -> let's remove.
gfx908 are disabled by default -> let's enable explicitly.

Suggested change

if(MIOPEN_TEST_FLOAT)

add_custom_test(test_reduce_double SKIP_UNLESS_ALL FLOAT_ENABLED COMMAND $<TARGET_FILE:test_reduce_test> --double --all)

endif()

if(MIOPEN_TEST_FLOAT)

add_custom_test(test_reduce_double SKIP_UNLESS_ALL GFX908_ENABLED

COMMAND $<TARGET_FILE:test_reduce_test> --double --all --verbose

)

endif()

atamazov · 2021-06-04T12:15:27Z

...And so you want me to add bin/test_reduce_test --double --all using add_custom_test method so that it can be executed when MIOPEN_TEST_FLOAT and MIOPEN_TEST_ALL are enabled by some stages/sub-stages. Is this right ?

Correct!

atamazov

LGTM!

Resolved.

atamazov · 2021-06-04T23:17:33Z

test/CMakeLists.txt

@@ -1053,3 +1052,7 @@ if(MIOPEN_TEST_CONV)
    COMMAND	$<TARGET_FILE:test_conv2d>	--verbose	--input	1	48	7	7	--weights	1	48	5	5	--pads_strides_dilations	0	0	4	4	1	1
 )
 endif()
+
+if(MIOPEN_TEST_FLOAT)


[Notice] Well, this if is not required actually (but there is no harm). Ok.

atamazov · 2021-06-06T19:14:13Z

@qianfengz 🎉

atamazov

Post-merge review 1 (part of work on https://ontrack-internal.amd.com/browse/SWDEV-293780)

atamazov · 2021-07-17T11:01:58Z

src/reducetensor.cpp

@@ -605,6 +622,8 @@ void ReduceTensorDescriptor::ReduceTensor(const Handle& handle,

        std::string param2 = param + " -DCK_PARAM_GRIDSIZE=" + std::to_string(gridSize_2) + " ";

+        std::string network_config2 = network_config + "_C2";


@qianfengz Why this change (appending "_C2") is necessary?

The second time and the first time use different kernels even though they are for solving the same problem, so they should have different key for mapping in the kernel cache.

One other thing that I want to mention is that, kernel cache is actually not queried, since Handle::AddKernel() actually only queries the Program cache, and we does not use Handle::HasKernel() + Handle::GetKernel() here

@qianfengz

The second time and the first time use different kernels even though they are for solving the same problem, so they should have different key for mapping in the kernel cache.

I do not understand, sorry.

atamazov · 2021-07-17T11:03:27Z

src/solver/conv_asm_1x1u.cpp

@@ -308,6 +308,9 @@ bool PerformanceConfigConvAsm1x1U::IsValid(const ConvolutionContext& config) con

 void PerformanceConfigConvAsm1x1U::HeuristicInit(const ConvolutionContext& config)
 {
+    if(config.in_data_type == miopenDouble)


It seems like this change is not needed. IsApplicable() of this Solver should refuse configs with double.

atamazov · 2021-07-17T11:18:38Z

It seems like this PR is the reason of https://ontrack-internal.amd.com/browse/SWDEV-293780. I am in the process of double confirming this. Meanwhile please check from your side if this PR contains something wrong. Ignore this.

* Add miopenDouble data type ID * Update the miopenReduceTensor() host C++ interface implementation to support Reduction on Double * Tiny update in reduction kernel layer to use built-in shuffle for Double * Update in reduce_driver to support Reduction on Double * Update in reduce_test to support Reduction on Double * Update to remove compiler warnings caused by the adding of miopenDouble * Add workaround for rocm 3.7 in reduce_test.cpp * Add custom_test for testing reduce double

qianfengz added 5 commits May 17, 2021 07:11

Add miopenDouble data type ID

d666d72

Update the miopenReduceTensor() host C++ interface implementation to …

031d47c

…support Reduction on Double

Tiny update in reduction kernel layer to use built-in shuffle for Double

0a1fbff

Update in reduce_driver to support Reduction on Double

fbc28c4

Update in reduce_test to support Reduction on Double

5bd4bc4

qianfengz requested a review from atamazov May 17, 2021 08:06

This comment has been minimized.

Sign in to view

qianfengz and others added 2 commits May 17, 2021 12:55

Update to remove compiler warnings caused by the adding of miopenDouble

e18aff8

Formatting

6f849d9

atamazov requested a review from junliume May 18, 2021 11:23

junliume reviewed May 18, 2021

View reviewed changes

test/reduce_test.cpp Outdated Show resolved Hide resolved

atamazov suggested changes May 18, 2021

View reviewed changes

test/reduce_test.cpp Outdated Show resolved Hide resolved

Update to eliminate tidy-checking warnings

30d5faa

root and others added 3 commits May 19, 2021 07:13

Merge branch 'develop' into reduction-fp64

7571a74

Update to elminate tidy-checking warnings ...

ecad57f

Tiny Fix in reduce_driver.cpp

ac3de38

Tiny fix in gemm_v2.cpp

4721ccd

Fix to use unique network-config for two calls

e0e2cd7

atamazov suggested changes Jun 1, 2021

View reviewed changes

src/include/miopen/datatype.hpp Outdated Show resolved Hide resolved

test/driver.hpp Outdated Show resolved Hide resolved

test/reduce_test.cpp Show resolved Hide resolved

qianfengz added 3 commits June 2, 2021 04:05

Avoid the using of -DMIOPEN_USE_FP64=<xxx> when fp64 is not used

4da22f4

Avoid unnecessary runtime warning for using double in test/driver.hpp

bd1610b

Fix for tidy

7d61d2a

atamazov added the new feature label Jun 3, 2021

atamazov previously approved these changes Jun 3, 2021

View reviewed changes

shurale-nkn previously requested changes Jun 3, 2021

View reviewed changes

atamazov changed the title ~~Generic Tensor Reduction for Double data~~ Generic Tensor Reduction for Double (SWDEV-284915) Jun 3, 2021

qianfengz added 3 commits June 4, 2021 16:44

Merge branch 'develop' into reduction-fp64

ecef4f2

Remove un-needed setting in test/CMakeLists.txt

eae5f7f

Add custom_test for testing reduce double

40f3ee7

qianfengz dismissed atamazov’s stale review via 40f3ee7 June 4, 2021 09:39

atamazov suggested changes Jun 4, 2021

View reviewed changes

Update to the test_reduce_double custom_test

b3626a5

atamazov approved these changes Jun 4, 2021

View reviewed changes

atamazov removed the TESTING_CI_PASSED label Jun 4, 2021

atamazov reviewed Jun 4, 2021

View reviewed changes

shurale-nkn approved these changes Jun 5, 2021

View reviewed changes

atamazov merged commit 01ff3da into develop Jun 6, 2021

qianfengz deleted the reduction-fp64 branch June 22, 2021 06:29

atamazov reviewed Jul 17, 2021

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Generic Tensor Reduction for Double (SWDEV-284915) #934

Generic Tensor Reduction for Double (SWDEV-284915) #934

qianfengz commented May 17, 2021 •

edited by atamazov

Loading

This comment has been minimized.

atamazov commented May 18, 2021

qianfengz commented May 18, 2021

junliume left a comment •

edited by atamazov

Loading

atamazov commented May 18, 2021 •

edited

Loading

atamazov commented May 18, 2021 •

edited

Loading

qianfengz commented May 19, 2021

qianfengz commented May 19, 2021

qianfengz commented May 19, 2021 •

edited

Loading

junliume commented May 19, 2021

atamazov commented May 19, 2021 •

edited

Loading

atamazov commented May 19, 2021

atamazov commented May 19, 2021

qianfengz commented May 20, 2021

atamazov commented May 20, 2021

atamazov left a comment

atamazov left a comment

shurale-nkn left a comment •

edited

Loading

atamazov commented Jun 3, 2021

atamazov commented Jun 3, 2021

qianfengz commented Jun 4, 2021

atamazov Jun 4, 2021

atamazov commented Jun 4, 2021

atamazov left a comment

atamazov Jun 4, 2021

atamazov commented Jun 6, 2021

atamazov left a comment

atamazov Jul 17, 2021

qianfengz Jul 19, 2021 •

edited

Loading

atamazov Jul 19, 2021

atamazov Jul 24, 2021

atamazov Jul 17, 2021

atamazov Jul 24, 2021

atamazov commented Jul 17, 2021 •

edited

Loading

		@@ -605,6 +622,8 @@ void ReduceTensorDescriptor::ReduceTensor(const Handle& handle,

		std::string param2 = param + " -DCK_PARAM_GRIDSIZE=" + std::to_string(gridSize_2) + " ";

		std::string network_config2 = network_config + "_C2";

Generic Tensor Reduction for Double (SWDEV-284915) #934

Generic Tensor Reduction for Double (SWDEV-284915) #934

Conversation

qianfengz commented May 17, 2021 • edited by atamazov Loading

This comment has been minimized.

atamazov commented May 18, 2021

qianfengz commented May 18, 2021

junliume left a comment • edited by atamazov Loading

Choose a reason for hiding this comment

atamazov commented May 18, 2021 • edited Loading

atamazov commented May 18, 2021 • edited Loading

qianfengz commented May 19, 2021

qianfengz commented May 19, 2021

qianfengz commented May 19, 2021 • edited Loading

junliume commented May 19, 2021

atamazov commented May 19, 2021 • edited Loading

atamazov commented May 19, 2021

atamazov commented May 19, 2021

qianfengz commented May 20, 2021

atamazov commented May 20, 2021

atamazov left a comment

Choose a reason for hiding this comment

atamazov left a comment

Choose a reason for hiding this comment

shurale-nkn left a comment • edited Loading

Choose a reason for hiding this comment

atamazov commented Jun 3, 2021

atamazov commented Jun 3, 2021

qianfengz commented Jun 4, 2021

atamazov Jun 4, 2021

Choose a reason for hiding this comment

atamazov commented Jun 4, 2021

atamazov left a comment

Choose a reason for hiding this comment

atamazov Jun 4, 2021

Choose a reason for hiding this comment

atamazov commented Jun 6, 2021

@qianfengz 🎉

atamazov left a comment

Choose a reason for hiding this comment

atamazov Jul 17, 2021

Choose a reason for hiding this comment

qianfengz Jul 19, 2021 • edited Loading

Choose a reason for hiding this comment

atamazov Jul 19, 2021

Choose a reason for hiding this comment

atamazov Jul 24, 2021

Choose a reason for hiding this comment

atamazov Jul 17, 2021

Choose a reason for hiding this comment

atamazov Jul 24, 2021

Choose a reason for hiding this comment

atamazov commented Jul 17, 2021 • edited Loading

qianfengz commented May 17, 2021 •

edited by atamazov

Loading

junliume left a comment •

edited by atamazov

Loading

atamazov commented May 18, 2021 •

edited

Loading

atamazov commented May 18, 2021 •

edited

Loading

qianfengz commented May 19, 2021 •

edited

Loading

atamazov commented May 19, 2021 •

edited

Loading

shurale-nkn left a comment •

edited

Loading

qianfengz Jul 19, 2021 •

edited

Loading

atamazov commented Jul 17, 2021 •

edited

Loading