ROCm · junliume · Dec 8, 2023 · Nov 10, 2023 · Nov 11, 2023 · Nov 11, 2023
@@ -436,7 +436,8 @@ Metadata::Metadata(const std::string& arch, const std::string& solver)
     const nlohmann::json metadata =
         common::LoadJSON(GetSystemDbPath() + "/" + arch + "_" + solver + "_metadata.ktn.model");
     num_tuning_params = metadata["num_tuning_params"].get<std::size_t>();
-    tuning_decodings = metadata["decodings"]["tunings"].get<std::unordered_map<std::string, int>>();
+    tuning_decodings =
+        metadata["decodings"]["tunings"].get<std::unordered_map<std::string, std::string>>();
 }
 
 class Model
@@ -450,9 +451,14 @@ class Model
     {
     }
     virtual ~Model() = default;
-    fdeep::tensors Encode(const std::vector<float>& features, std::size_t dim) const
+    fdeep::tensors Encode(const std::vector<float>& features, std::size_t dim, bool transform) const
     {
-        fdeep::tensor input_tensor = fdeep::tensor(fdeep::tensor_shape(dim, dim), features);
+        if(transform)
+        {
+            fdeep::tensor input_tensor = fdeep::tensor(fdeep::tensor_shape(dim, dim), features);
+            return encoder.predict({input_tensor});
+        }
+        fdeep::tensor input_tensor = fdeep::tensor(fdeep::tensor_shape(dim, 1), features);
         return encoder.predict({input_tensor});
     }
     fdeep::tensors Decode(const float prev_token, const fdeep::tensors& context) const
@@ -509,11 +515,16 @@ std::shared_ptr<Model> GetModel(const std::string& arch, const std::string& solv
 bool ModelSetParams(const std::string& arch,
                     const std::string& solver,
                     const std::vector<float>& features,
-                    std::function<bool(int, int)> validator)
+                    bool transform_features,
+                    std::function<bool(std::size_t, std::string)> validator)
 {
-    auto model             = GetModel(arch, solver);
-    int dim                = std::sqrt(features.size());
-    fdeep::tensors context = model->Encode(features, dim);
+    auto model = GetModel(arch, solver);
+    int dim    = 0;
+    if(transform_features)
+        dim = std::sqrt(features.size());
+    else
+        dim = features.size();
+    fdeep::tensors context = model->Encode(features, dim, transform_features);
     float decoder_input    = 0.0;
     for(std::size_t i = 0; i < model->metadata.num_tuning_params; ++i)
     {
@@ -529,9 +540,9 @@ bool ModelSetParams(const std::string& arch,
         {
             int token = pq.top().second;
             // convert index to token value
-            int value = model->metadata.tuning_decodings[std::to_string(token)];
+            std::string value = model->metadata.tuning_decodings[std::to_string(token)];
             pq.pop();
-            if(value < 0)
+            if(value == "-1")
                 return false;
             if(validator(i, value))
             {

@@ -81,14 +81,15 @@ namespace tuning {
 struct Metadata
 {
     std::size_t num_tuning_params;
-    std::unordered_map<std::string, int> tuning_decodings;
+    std::unordered_map<std::string, std::string> tuning_decodings;
     Metadata(const std::string& arch, const std::string& solver);
 };
 
 bool ModelSetParams(const std::string& arch,
                     const std::string& solver,
                     const std::vector<float>& features,
-                    std::function<bool(int, int)> validator);
+                    bool transform_features,
+                    std::function<bool(std::size_t, std::string)> validator);
 } // namespace tuning
 #endif // MIOPEN_ENABLE_AI_KERNEL_TUNING
 } // namespace ai

@@ -370,12 +370,8 @@ struct PerformanceConfigConvAsm1x1U : PerfConfigBase<PerformanceConfigConvAsm1x1
 
     void StaticHeuristic(const miopen::conv::ProblemDescription& problem);
     void HeuristicInit(const ExecutionContext&, const miopen::conv::ProblemDescription&);
-#if MIOPEN_ENABLE_AI_KERNEL_TUNING
-    void RunParmeterPredictionModel(const ExecutionContext&,
-                                    const miopen::conv::ProblemDescription&,
-                                    bool& valid);
-    bool ModelApplyToken(int index, int value, const miopen::conv::ProblemDescription&);
-#endif
+    bool IsModelApplicable(const ExecutionContext& ctx,
+                           const miopen::conv::ProblemDescription& problem) const;
     bool IsValidValue() const { return IsValidValueImpl(8); }
     bool SetNextValue(const miopen::conv::ProblemDescription&);
     bool IsValid(const ExecutionContext&, const miopen::conv::ProblemDescription& problem) const
@@ -399,6 +395,10 @@ struct PerformanceConfigConvAsm1x1U : PerfConfigBase<PerformanceConfigConvAsm1x1
     {
         return IsValidValueImpl(sequence_length);
     }
+    void RunParmeterPredictionModel(const ExecutionContext&,
+                                    const miopen::conv::ProblemDescription&,
+                                    bool& valid);
+    bool ModelApplyToken(int index, std::string value, const miopen::conv::ProblemDescription&);
 #endif
     bool IsValidImpl(const miopen::conv::ProblemDescription& problem, int sequence_length) const;
     bool IsValidValueImpl(int sequence_length) const;
@@ -4488,7 +4488,7 @@ struct PerformanceConfigHipImplicitGemmGroupFwdXdlops
         : PerformanceConfigHipImplicitGemmGroupFwdXdlops(0, "")
     {
     }
-    void HeuristicInit(const miopen::conv::ProblemDescription&);
+    void HeuristicInit(const ExecutionContext&, const miopen::conv::ProblemDescription&);
     bool SetNextValue(const miopen::conv::ProblemDescription&);
     bool IsValidValue() const;
     bool IsValid(const ExecutionContext&, const miopen::conv::ProblemDescription& problem) const
@@ -4497,12 +4497,21 @@ struct PerformanceConfigHipImplicitGemmGroupFwdXdlops
     }
     bool IsValid(const miopen::conv::ProblemDescription&) const;
     bool operator==(const PerformanceConfigHipImplicitGemmGroupFwdXdlops& other) const;
+    bool IsModelApplicable(const ExecutionContext& ctx,
+                           const miopen::conv::ProblemDescription& problem) const;
 
 private:
+    std::vector<int> heuristic_indexes;
+    std::vector<std::vector<std::string>> heuristic_kernels;
     template <typename DataType>
     void Init(const miopen::conv::ProblemDescription&);
     template <typename DataType>
     bool CheckIsSupportCKArgs(const miopen::conv::ProblemDescription&) const;
+    template <typename DataType>
+    void RunParameterPredictionModel(const ExecutionContext& ctx,
+                                     const miopen::conv::ProblemDescription& problem);
+    void InitHeuristicKernelIDs();
+    bool ModelApplyToken(int idx, std::string value);
 };
 
 struct ConvHipImplicitGemmGroupFwdXdlops final

@@ -2,44 +2,44 @@
     "num_tuning_params": 8,
     "decodings": {
         "tunings": {
-            "0": -1,
-            "1": 4,
-            "2": 2,
-            "3": 1,
-            "4": 3,
-            "5": 16,
-            "6": 8,
-            "7": 1,
-            "8": 4,
-            "9": 32,
-            "10": 4,
-            "11": 1,
-            "12": 2,
-            "13": 5,
-            "14": 7,
-            "15": 3,
-            "16": 6,
-            "17": 8,
-            "18": 64,
-            "19": 16,
-            "20": 32,
-            "21": 4,
-            "22": 1,
-            "23": 1,
-            "24": 3,
-            "25": 2,
-            "26": 4,
-            "27": 2,
-            "28": 4,
-            "29": 1,
-            "30": 2,
-            "31": 1,
-            "32": 4,
-            "33": 2,
-            "34": 4,
-            "35": 8,
-            "36": 1,
-            "37": -1
+            "0": "-1",
+            "1": "4",
+            "2": "2",
+            "3": "1",
+            "4": "3",
+            "5": "16",
+            "6": "8",
+            "7": "1",
+            "8": "4",
+            "9": "32",
+            "10": "4",
+            "11": "1",
+            "12": "2",
+            "13": "5",
+            "14": "7",
+            "15": "3",
+            "16": "6",
+            "17": "8",
+            "18": "64",
+            "19": "16",
+            "20": "32",
+            "21": "4",
+            "22": "1",
+            "23": "1",
+            "24": "3",
+            "25": "2",
+            "26": "4",
+            "27": "2",
+            "28": "4",
+            "29": "1",
+            "30": "2",
+            "31": "1",
+            "32": "4",
+            "33": "2",
+            "34": "4",
+            "35": "8",
+            "36": "1",
+            "37": "-1"
         }
     }
 }
@@ -0,0 +1,36 @@
+{
+    "num_tuning_params": 9,
+    "decodings": {
+        "tunings": {
+            "0": "-1",
+            "1": "64",
+            "2": "256",
+            "3": "128",
+            "4": "64",
+            "5": "128",
+            "6": "32",
+            "7": "256",
+            "8": "32",
+            "9": "128",
+            "10": "64",
+            "11": "256",
+            "12": "32",
+            "13": "16",
+            "14": "Default",
+            "15": "OddC",
+            "16": "2",
+            "17": "1",
+            "18": "4",
+            "19": "1",
+            "20": "2",
+            "21": "4",
+            "22": "8",
+            "23": "1",
+            "24": "4",
+            "25": "8",
+            "26": "1",
+            "27": "4",
+            "28": "-1"
+        }
+    }
+}
@@ -367,37 +367,42 @@ bool PerformanceConfigConvAsm1x1U::IsValidImpl(const ProblemDescription& problem
     }
     return true;
 }
-#if MIOPEN_ENABLE_AI_KERNEL_TUNING
 
 bool PerformanceConfigConvAsm1x1U::ModelApplyToken(int index,
-                                                   int value,
+                                                   std::string value,
                                                    const ProblemDescription& problem)
 {
+    int val = stoi(value);
     switch(index)
     {
-    case 0: read_size = value; break;
-    case 1: k_mult = value; break;
-    case 2: chunks_per_wave = value; break;
-    case 3: chunk_size = value; break;
-    case 4: n_mult = value; break;
-    case 5: c_mult = value; break;
-    case 6: waves_c_in_group = value; break;
-    case 7: waves_k_in_group = value; break;
+    case 0: read_size = val; break;
+    case 1: k_mult = val; break;
+    case 2: chunks_per_wave = val; break;
+    case 3: chunk_size = val; break;
+    case 4: n_mult = val; break;
+    case 5: c_mult = val; break;
+    case 6: waves_c_in_group = val; break;
+    case 7: waves_k_in_group = val; break;
     default: return false;
     }
     // this function may leave PerformanceConfigConvAsm1x1U in a partially valid or invalid state
     return this->IsPartiallyValid(problem, index + 1);
 }
 
-static bool IsModelApplicable(const ExecutionContext& ctx, const ProblemDescription& problem)
+bool PerformanceConfigConvAsm1x1U::IsModelApplicable(const ExecutionContext& ctx,
+                                                     const ProblemDescription& problem) const
 {
-    if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_AI_HEUR{}))
+#if MIOPEN_ENABLE_AI_KERNEL_TUNING
+    if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_AI_HEUR{}))
         return false;
     if(ctx.GetStream().GetDeviceName() != "gfx908")
         return false;
     if(problem.GetKernelStrideH() != 1)
         return false;
     return true;
+#else
+    return false;
+#endif
 }
 
 static std::vector<float> TransformFeatures(const ProblemDescription& problem, std::size_t n)
@@ -426,15 +431,14 @@ void PerformanceConfigConvAsm1x1U::RunParmeterPredictionModel(const ExecutionCon
     static const std::string& arch  = ctx.GetStream().GetDeviceName();
     static const std::string solver = "ConvAsm1x1U";
     std::vector<float> features     = TransformFeatures(problem, n);
-    if(ai::tuning::ModelSetParams(arch, solver, features, [&](int idx, int value) {
+    if(ai::tuning::ModelSetParams(arch, solver, features, true, [&](int idx, std::string value) {
            return this->ModelApplyToken(idx, value, problem);
        }))
     {
         MIOPEN_LOG_I("Params set by AI: " << ToString());
         valid = true;
     }
 }
-#endif
 
 void PerformanceConfigConvAsm1x1U::StaticHeuristic(const ProblemDescription& problem)
 {
@@ -488,17 +492,13 @@ void PerformanceConfigConvAsm1x1U::HeuristicInit(const ExecutionContext& ctx,
     if(problem.GetInDataType() == miopenDouble)
         MIOPEN_THROW("Double data type is not supported by ConvAsm1x1U");
 
-#if MIOPEN_ENABLE_AI_KERNEL_TUNING
     if(IsModelApplicable(ctx, problem))
     {
         bool valid = false;
         RunParmeterPredictionModel(ctx, problem, valid);
         if(valid)
             return;
     }
-#else
-    std::ignore = ctx;
-#endif
     StaticHeuristic(problem);
     MIOPEN_LOG_I(ToString());
 }