ROCm · junliume · Nov 15, 2023 · Oct 12, 2023 · Oct 12, 2023 · Oct 12, 2023
@@ -150,11 +150,7 @@ inline void PadBufferSize(size_t& sz, int datatype_sz)
     printf("Supported Base Arguments: conv[fp16|int8|bfp16|fp8|bfp8], CBAInfer[fp16], "
            "pool[fp16], lrn[fp16], "
            "activ[fp16], softmax[fp16], bnorm[fp16], rnn[fp16], gemm, ctc, dropout[fp16], "
-           "tensorop[fp16], reduce[fp16,fp64]"
-#ifdef MIOPEN_BETA_API
-           ", layernorm[bf16, fp16, fp32]"
-#endif
-           "\n");
+           "tensorop[fp16], reduce[fp16,fp64], layernorm[bfp16, fp16]\n");
     exit(0); // NOLINT (concurrency-mt-unsafe)
 }
 
@@ -175,11 +171,8 @@ inline std::string ParseBaseArg(int argc, char* argv[])
        arg != "bnormfp16" && arg != "rnn" && arg != "rnnfp16" && arg != "rnn_seq" &&
        arg != "rnn_seqfp16" && arg != "gemm" /*&& arg != "gemmfp16"*/ && arg != "ctc" &&
        arg != "dropout" && arg != "dropoutfp16" && arg != "tensorop" && arg != "tensoropfp16" &&
-       arg != "reduce" && arg != "reducefp16" && arg != "reducefp64" &&
-#ifdef MIOPEN_BETA_API
-       arg != "layernorm" && arg != "layernormfp16" && arg != "layernormbfp16" &&
-#endif
-       arg != "--version")
+       arg != "reduce" && arg != "reducefp16" && arg != "reducefp64" && arg != "layernorm" &&
+       arg != "layernormfp16" && arg != "layernormbfp16" && arg != "--version")
     {
         printf("FAILED: Invalid Base Input Argument\n");
         Usage();

@@ -24,7 +24,6 @@
  *
  *******************************************************************************/
 #include <miopen/miopen.h>
-#ifdef MIOPEN_BETA_API
 #ifndef GUARD_MIOPEN_LAYERNORM_DRIVER_HPP
 #define GUARD_MIOPEN_LAYERNORM_DRIVER_HPP
 
@@ -164,7 +163,7 @@ int LayerNormDriver<Tgpu, Tref>::GetandSetData()
     eps  = static_cast<double>(inflags.GetValueDouble("eps"));
     mode = miopenLayerNormMode_t(inflags.GetValueInt("mode"));
 
-    return (0);
+    return 0;
 }
 
 template <typename Tgpu, typename Tref>
@@ -200,24 +199,31 @@ std::vector<int> LayerNormDriver<Tgpu, Tref>::GetInputTensorLengthsFromCmdLine()
     int in_h = inflags.GetValueInt("in_h");
     int in_d = inflags.GetValueInt("in_d");
 
-    if(in_h != 0)
+    if((in_n != 0) && (in_c != 0) && (in_d != 0) && (in_h != 0) && (in_w != 0))
     {
-        if(in_d != 0)
-        {
-            dim_size = 5;
-            return std::vector<int>({in_n, in_c, in_d, in_h, in_w});
-        }
-        else
-        {
-            dim_size = 4;
-            return std::vector<int>({in_n, in_c, in_h, in_w});
-        }
+        dim_size = 5;
+        return std::vector<int>({in_n, in_c, in_d, in_h, in_w});
     }
-    else
+    else if((in_n != 0) && (in_c != 0) && (in_h != 0) && (in_w != 0))
+    {
+        dim_size = 4;
+        return std::vector<int>({in_n, in_c, in_h, in_w});
+    }
+    else if((in_n != 0) && (in_c != 0) && (in_w != 0))
     {
         dim_size = 3;
         return std::vector<int>({in_n, in_c, in_w});
     }
+    else if((in_n != 0) && (in_w != 0))
+    {
+        dim_size = 2;
+        return std::vector<int>({in_n, in_w});
+    }
+    else
+    {
+        std::cout << "Error Input Tensor Lengths\n" << std::endl;
+        return std::vector<int>({0});
+    }
 }
 
 template <typename Tgpu, typename Tref>
@@ -230,7 +236,6 @@ int LayerNormDriver<Tgpu, Tref>::AllocateBuffersAndCopy()
     size_t mean_sz   = GetTensorSize(meanDesc);
     size_t rstd_sz   = GetTensorSize(rstdDesc);
 
-    // MIOPEN_BACKEND_HIP
     uint32_t ctx = 0;
 
     in_dev     = std::unique_ptr<GPUMem>(new GPUMem(ctx, in_sz, sizeof(Tgpu)));
@@ -250,7 +255,6 @@ int LayerNormDriver<Tgpu, Tref>::AllocateBuffersAndCopy()
     meanhost = std::vector<Tref>(mean_sz, static_cast<Tref>(0));
     rstdhost = std::vector<Tref>(rstd_sz, static_cast<Tref>(0));
 
-    // MIOPEN_BACKEND_HIP
     int status;
 
     for(int i = 0; i < in_sz; i++)
@@ -261,22 +265,28 @@ int LayerNormDriver<Tgpu, Tref>::AllocateBuffersAndCopy()
 
     for(int i = 0; i < weight_sz; i++)
     {
-        weight[i] = prng::gen_A_to_B<Tgpu>(static_cast<Tgpu>(0.0), static_cast<Tgpu>(1.0));
+        if(mode == MIOPEN_ELEMENTWISE_AFFINE)
+            weight[i] = static_cast<Tgpu>(1);
+        else
+            weight[i] = prng::gen_A_to_B<Tgpu>(static_cast<Tgpu>(0.0), static_cast<Tgpu>(1.0));
     }
-    status = weight_dev->ToGPU(q, weight.data());
+    status |= weight_dev->ToGPU(q, weight.data());
 
     for(int i = 0; i < bias_sz; i++)
     {
-        bias[i] = prng::gen_A_to_B<Tgpu>(static_cast<Tgpu>(0.0), static_cast<Tgpu>(1.0));
+        if(mode == MIOPEN_ELEMENTWISE_AFFINE)
+            bias[i] = static_cast<Tgpu>(0);
+        else
+            bias[i] = prng::gen_A_to_B<Tgpu>(static_cast<Tgpu>(0.0), static_cast<Tgpu>(1.0));
     }
-    status = bias_dev->ToGPU(q, bias.data());
+    status |= bias_dev->ToGPU(q, bias.data());
 
     status |= out_dev->ToGPU(q, out.data());
     status |= mean_dev->ToGPU(q, mean.data());
     status |= rstd_dev->ToGPU(q, rstd.data());
 
-    if(status != CL_SUCCESS)
-        printf("Error copying data to GPU\n");
+    if(status != 0)
+        std::cout << "Error copying data to GPU\n" << std::endl;
 
     return miopenStatusSuccess;
 }
@@ -426,4 +436,3 @@ int LayerNormDriver<Tgpu, Tref>::VerifyBackward()
 }
 
 #endif // GUARD_MIOPEN_SOFTMAX_DRIVER_HPP
-#endif
@@ -43,9 +43,7 @@
 #include "reduce_driver.hpp"
 #include <miopen/config.h>
 #include <miopen/stringutils.hpp>
-#ifdef MIOPEN_BETA_API
 #include "layernorm_driver.hpp"
-#endif
 
 int main(int argc, char* argv[])
 {
@@ -199,7 +197,6 @@ int main(int argc, char* argv[])
     {
         drv = new ReduceDriver<double, double>();
     }
-#ifdef MIOPEN_BETA_API
     else if(base_arg == "layernorm")
     {
         drv = new LayerNormDriver<float, float>();
@@ -212,7 +209,6 @@ int main(int argc, char* argv[])
     {
         drv = new LayerNormDriver<bfloat16, float>();
     }
-#endif
     else
     {
         printf("Incorrect BaseArg\n");

@@ -23,7 +23,6 @@
  * SOFTWARE.
  *
  *******************************************************************************/
-#ifdef MIOPEN_BETA_API
 #ifndef MLO_LAYERNORMHOST_H_
 #define MLO_LAYERNORMHOST_H_
 
@@ -79,13 +78,12 @@ int32_t mloLayerNormForwardRunHost(miopenTensorDescriptor_t inputDesc,
 
         for(i = 0; i < inner_size; i++)
         {
-            Tcheck pweight = mode ? 1 : static_cast<Tcheck>(weight[i]);
-            Tcheck pbias   = mode ? 0 : static_cast<Tcheck>(bias[i]);
+            Tcheck pweight = mode ? static_cast<Tcheck>(weight[i]) : 1;
+            Tcheck pbias   = mode ? static_cast<Tcheck>(bias[i]) : 0;
             outputhost[o * inner_size + i] =
                 (static_cast<Tcheck>(input[o * inner_size + i]) - pmean) * prstd * pweight + pbias;
         }
     }
     return ret;
 }
 #endif
-#endif
@@ -131,6 +131,7 @@ set( MIOpen_Source
     logger.cpp
     layernorm_api.cpp
     lrn_api.cpp
+    normalization/problem_description.cpp
     op_args.cpp
     operator.cpp
     performance_config.cpp
@@ -166,6 +167,8 @@ set( MIOpen_Source
     solver/batchnorm/forward_spatial_multiple.cpp
     solver/batchnorm/forward_spatial_single.cpp
     solver/batchnorm/forward_training_ck.cpp
+    solver/normalization/forward_layernorm.cpp
+    solver/normalization/forward_layernorm2d_ck.cpp
     solver/conv_asm_1x1u.cpp
     solver/conv_asm_1x1u_bias_activ_fused.cpp
     solver/conv_asm_1x1u_stride2.cpp

@@ -142,6 +142,11 @@ struct GcnAsm
 {
     static std::string Generate(const std::vector<KernelBuildParameter>& options);
 };
+
+struct HIP
+{
+    static std::string Generate(const std::vector<KernelBuildParameter>& options);
+};
 } // namespace kbp
 
 } // namespace miopen

@@ -24,7 +24,6 @@
  *
  *******************************************************************************/
 #include <miopen/miopen.h>
-#ifdef MIOPEN_BETA_API
 #ifndef MIOPEN_LAYERNORM_HPP_
 #define MIOPEN_LAYERNORM_HPP_
 
@@ -35,7 +34,7 @@ namespace miopen {
 struct Handle;
 struct TensorDescriptor;
 
-miopenStatus_t LayerNormForward(const Handle& handle,
+miopenStatus_t LayerNormForward(Handle& handle,
                                 const TensorDescriptor& xDesc,
                                 ConstData_t x,
                                 const TensorDescriptor& weightDesc,
@@ -54,4 +53,3 @@ miopenStatus_t LayerNormForward(const Handle& handle,
 
 } // namespace miopen
 #endif // _MIOPEN_LAYERNORM_HPP_
-#endif
diff --git a/src/include/miopen/normalization/invoke_params.hpp b/src/include/miopen/normalization/invoke_params.hpp
@@ -0,0 +1,57 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#pragma once
+
+#include <miopen/invoke_params.hpp>
+#include <miopen/tensor.hpp>
+
+namespace miopen {
+namespace normalization {
+
+struct InvokeParams : public miopen::InvokeParams
+{
+    InvokeParams() = default;
+
+    const TensorDescriptor* xDesc = nullptr;
+
+    ConstData_t x              = nullptr;
+    ConstData_t weight         = nullptr;
+    ConstData_t bias           = nullptr;
+    Data_t y                   = nullptr;
+    Data_t mean                = nullptr;
+    Data_t rstd                = nullptr;
+    float epsilon              = 0;
+    int32_t normalized_dim     = 0;
+    miopenLayerNormMode_t mode = MIOPEN_ELEMENTWISE_AFFINE;
+
+    std::size_t GetWorkspaceSize() const { return 0; }
+    Data_t GetWorkspace() const { return nullptr; }
+};
+
+} // namespace normalization
+
+} // namespace miopen