diff --git a/src/include/miopen/solver/implicitgemm_ck_util.hpp b/src/include/miopen/solver/implicitgemm_ck_util.hpp index 6967313720..5a0e9304b6 100644 --- a/src/include/miopen/solver/implicitgemm_ck_util.hpp +++ b/src/include/miopen/solver/implicitgemm_ck_util.hpp @@ -376,9 +376,10 @@ class TransposeInstance Run(handle, kernels, out_ptr, buf_handle.get()); } - void ZeroOutBuffer() + void ZeroOutBuffer(const Handle& handle) { - [[maybe_unused]] auto status = hipMemset(buf_handle.get(), 0, tensor_sz); + [[maybe_unused]] auto status = + hipMemsetAsync(buf_handle.get(), 0, tensor_sz, handle.GetStream()); assert(status == hipSuccess); } @@ -702,7 +703,7 @@ ConvSolution InitInvokerFactoryNCHW(const ExecutionContext& ctx, /// \todo: Will need SetTensor() to properly zero out non-packed tensors if(output_tr_inst.GetConvOperandTag() == internal::ConvOperandTag::Weights) { - output_tr_inst.ZeroOutBuffer(); + output_tr_inst.ZeroOutBuffer(handle); } std::array tr_ptrs = { @@ -845,7 +846,7 @@ ConvSolution InitInvokerFactoryWrwNCHW(const ExecutionContext& ctx, /// \todo: Will need SetTensor() to properly zero out non-packed tensors if(output_tr_inst.GetConvOperandTag() == internal::ConvOperandTag::Weights) { - output_tr_inst.ZeroOutBuffer(); + output_tr_inst.ZeroOutBuffer(handle); } std::array tr_ptrs = {