opensearch-project · conggguan · Aug 6, 2024 · Aug 7, 2024 · Aug 7, 2024
@@ -77,12 +77,12 @@ jobs:
       id: init_folders
       run: |
         model_id=${{ github.event.inputs.model_id }}
-        echo "model_folder=ml-models/${{github.event.inputs.model_source}}/${model_id}" >> $GITHUB_OUTPUT
         if [[ -n "${{ github.event.inputs.upload_prefix }}" ]]; then
           model_prefix="ml-models/${{ github.event.inputs.model_source }}/${{ github.event.inputs.upload_prefix }}"
         else
           model_prefix="ml-models/${{ github.event.inputs.model_source }}/${model_id%%/*}"
         fi
+        echo "model_folder=$model_prefix/${model_id##*/}" >> $GITHUB_OUTPUT
         echo "model_prefix_folder=$model_prefix" >> $GITHUB_OUTPUT
     - name: Initiate workflow_info
       id: init_workflow_info

@@ -43,6 +43,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 - Update model upload history -  opensearch-project/opensearch-neural-sparse-encoding-doc-v2-distill (v.1.0.0)(TORCH_SCRIPT) by @dhrubo-os ([#400](https://github.com/opensearch-project/opensearch-py-ml/pull/400))
 
 ### Fixed
+- Fix the wrong input parameter for model_uploader's base_download_path in jekins trigger.([#403](https://github.com/opensearch-project/opensearch-py-ml/pull/403))
 - Enable make_model_config_json to add model description to model config file by @thanawan-atc in ([#203](https://github.com/opensearch-project/opensearch-py-ml/pull/203))
 - Correct demo_ml_commons_integration.ipynb by @thanawan-atc in ([#208](https://github.com/opensearch-project/opensearch-py-ml/pull/208))
 - Handle the case when the model max length is undefined in tokenizer by @thanawan-atc in ([#219](https://github.com/opensearch-project/opensearch-py-ml/pull/219))

@@ -85,25 +85,40 @@ def create_model_json_obj(
     return model_obj
 
 
-def sort_models(models: List[Dict]) -> List[Dict]:
+def sort_and_deduplicate_models(models: List[Dict]) -> List[Dict]:
     """
-    Sort models
+    Sort and deduplicate models
 
     :param models: List of model dictionary objects to be sorted
     :type models: list[dict]
     :return: Sorted list of model dictionary objects
     :rtype: list[dict]
     """
-    models = sorted(
-        models,
+
+    # Remove duplicates
+    unique_models = {}
+    for model in models:
+        key = (model["Model Version"], model["Model ID"], model["Model Format"])
+        if (
+            key not in unique_models
+            or model["Upload Time"] > unique_models[key]["Upload Time"]
+        ):
+            unique_models[key] = model
+
+    # Convert the unique_models dictionary back to a list
+    unique_models_list = list(unique_models.values())
+
+    # Sort the deduplicated list
+    sorted_models = sorted(
+        unique_models_list,
         key=lambda d: (
             d["Upload Time"],
             d["Model Version"],
             d["Model ID"],
             d["Model Format"],
         ),
     )
-    return models
+    return sorted_models
 
 
 def update_model_json_file(
@@ -172,7 +187,7 @@ def update_model_json_file(
         models.append(model_obj)
 
     models = [dict(t) for t in {tuple(m.items()) for m in models}]
-    models = sort_models(models)
+    models = sort_and_deduplicate_models(models)
     with open(MODEL_JSON_FILEPATH, "w") as f:
         json.dump(models, f, indent=4)
 
@@ -188,7 +203,7 @@ def update_md_file():
     if os.path.exists(MODEL_JSON_FILEPATH):
         with open(MODEL_JSON_FILEPATH, "r") as f:
             models = json.load(f)
-    models = sort_models(models)
+    models = sort_and_deduplicate_models(models)
     table_data = KEYS[:]
     for m in models:
         for k in KEYS: