add nvidia gpu tag along with resources-gpu

bdattoma · Oct 17, 2024 · 6890690 · 6890690
1 parent 55b7506
commit 6890690
Show file tree

Hide file tree

Showing 12 changed files with 29 additions and 29 deletions.
diff --git a/ods_ci/tests/Tests/100__deploy/100__installation/102__post_install.robot b/ods_ci/tests/Tests/100__deploy/100__installation/102__post_install.robot
@@ -54,7 +54,7 @@ Verify Notebook Controller Deployment
 Verify GPU Operator Deployment  # robocop: disable
     [Documentation]  Verifies Nvidia GPU Operator is correctly installed
     [Tags]  Sanity    Tier1
-    ...     Resources-GPU  # Not actually needed, but we first need to enable operator install by default
+    ...     Resources-GPU    NVIDIA-GPUs  # Not actually needed, but we first need to enable operator install by default
     ...     ODS-1157
 
     # Before GPU Node is added to the cluster

diff --git a/...0__ods_dashboard/415__ods_dashboard_projects/415__ods_dashboard_projects_additional.robot b/...0__ods_dashboard/415__ods_dashboard_projects/415__ods_dashboard_projects_additional.robot
@@ -85,7 +85,7 @@ Verify Notebook Tolerations Are Applied To Workbenches
 Verify User Can Add GPUs To Workbench
     [Documentation]    Verifies user can add GPUs to an already started workbench
     [Tags]    Tier1    Sanity
-    ...       ODS-2013    Resources-GPU
+    ...       ODS-2013    Resources-GPU    NVIDIA-GPUs
     Launch Data Science Project Main Page
     Open Data Science Project Details Page       project_title=${PRJ_TITLE}
     Create Workbench    workbench_title=${WORKBENCH_TITLE_GPU}  workbench_description=${EMPTY}
@@ -109,7 +109,7 @@ Verify User Can Add GPUs To Workbench
 Verify User Can Remove GPUs From Workbench
     [Documentation]    Verifies user can remove GPUs from an already started workbench
     [Tags]    Tier1    Sanity
-    ...       ODS-2014    Resources-GPU
+    ...       ODS-2014    Resources-GPU    NVIDIA-GPUs
     Launch Data Science Project Main Page
     Open Data Science Project Details Page       project_title=${PRJ_TITLE}
     Create Workbench    workbench_title=${WORKBENCH_TITLE_GPU}  workbench_description=${EMPTY}

diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/421__model_serving_gpu.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/421__model_serving_gpu.robot
@@ -24,7 +24,7 @@ ${RUNTIME_NAME}=    Model Serving GPU Test
 *** Test Cases ***
 Verify GPU Model Deployment Via UI
     [Documentation]    Test the deployment of an openvino_ir model on a model server with GPUs attached
-    [Tags]    Sanity    Tier1    Resources-GPU
+    [Tags]    Sanity    Tier1    Resources-GPU    NVIDIA-GPUs
     ...    ODS-2214
     Clean All Models Of Current User
     Open Data Science Projects Home Page
@@ -57,7 +57,7 @@ Verify GPU Model Deployment Via UI
 
 Test Inference Load On GPU
     [Documentation]    Test the inference load on the GPU after sending random requests to the endpoint
-    [Tags]    Sanity    Tier1    Resources-GPU
+    [Tags]    Sanity    Tier1    Resources-GPU    NVIDIA-GPUs
     ...    ODS-2213
     ${url}=    Get Model Route Via UI    ${MODEL_NAME}
     Send Random Inference Request     endpoint=${url}    no_requests=100

diff --git a/...tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot b/...tests/Tests/400__ods_dashboard/420__model_serving/425__model_serving_ovms_on_kserve.robot
@@ -100,7 +100,7 @@ Verify Multiple Projects With Same Model (OVMS on Kserve)
 
 Verify GPU Model Deployment Via UI (OVMS on Kserve)
     [Documentation]    Test the deployment of an openvino_ir model on a model server with GPUs attached
-    [Tags]    Sanity    Tier1    Resources-GPU
+    [Tags]    Sanity    Tier1    Resources-GPU    NVIDIA-GPUs
     ...       ODS-2630    ODS-2631    ProductBug    RHOAIENG-3355
     ${requests}=    Create Dictionary    nvidia.com/gpu=1
     ${limits}=    Create Dictionary    nvidia.com/gpu=1

diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot
@@ -343,7 +343,7 @@ Verify User Can Set Requests And Limits For A Model
 Verify Model Can Be Served And Query On A GPU Node
     [Documentation]    Basic tests for preparing, deploying and querying a LLM model on GPU node
     ...                using Kserve and Caikit+TGIS runtime
-    [Tags]    Sanity    Tier1    ODS-2381    Resources-GPU
+    [Tags]    Sanity    Tier1    ODS-2381    Resources-GPU    NVIDIA-GPUs
     [Setup]    Set Project And Runtime    namespace=singlemodel-gpu
     ${test_namespace}=    Set Variable    singlemodel-gpu
     ${model_name}=    Set Variable    flan-t5-small-caikit

diff --git a/...ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_UI.robot b/...ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_UI.robot
@@ -145,7 +145,7 @@ Verify User Can Set Requests And Limits For A Model Using The UI  # robocop: dis
 Verify Model Can Be Served And Query On A GPU Node Using The UI  # robocop: disable
     [Documentation]    Basic tests for preparing, deploying and querying a LLM model on GPU node
     ...                using Kserve and Caikit+TGIS runtime
-    [Tags]    Sanity    Tier1    ODS-2523   Resources-GPU
+    [Tags]    Sanity    Tier1    ODS-2523   Resources-GPU    NVIDIA-GPUs
     [Setup]    Set Up Project    namespace=singlemodel-gpu
     ${test_namespace}=    Set Variable    singlemodel-gpu
     ${model_name}=    Set Variable    flan-t5-small-caikit

diff --git a/...400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot b/...400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_other_runtimes_UI.robot
@@ -55,7 +55,7 @@ Verify Non Admin Can Serve And Query A Model Using The UI  # robocop: disable
 Verify Model Can Be Served And Query On A GPU Node Using The UI  # robocop: disable
     [Documentation]    Basic tests for preparing, deploying and querying a LLM model on GPU node
     ...                using Single-model platform and TGIS Standalone runtime.
-    [Tags]    Sanity    Tier1    ODS-2612   Resources-GPU
+    [Tags]    Sanity    Tier1    ODS-2612   Resources-GPU    NVIDIA-GPUs
     [Setup]    Run    git clone https://github.com/IBM/text-generation-inference/
     ${test_namespace}=    Set Variable     ${TEST_NS}
     ${isvc__name}=    Set Variable    flan-t5-small-hf-gpu

diff --git a/.../tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot b/.../tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot
@@ -380,7 +380,7 @@ Verify User Can Set Requests And Limits For A Model
 Verify Model Can Be Served And Query On A GPU Node
     [Documentation]    Basic tests for preparing, deploying and querying a LLM model on GPU node
     ...                using Kserve and Caikit+TGIS runtime
-    [Tags]    Tier1    ODS-2381    Resources-GPU
+    [Tags]    Tier1    ODS-2381    Resources-GPU    NVIDIA-GPUs
     [Setup]    Set Project And Runtime    runtime=${TGIS_RUNTIME_NAME}     namespace=singlemodel-gpu
     ${test_namespace}=    Set Variable    singlemodel-gpu
     ${model_name}=    Set Variable    flan-t5-small-caikit

diff --git a/ods_ci/tests/Tests/500__jupyterhub/minimal-cuda-test.robot b/ods_ci/tests/Tests/500__jupyterhub/minimal-cuda-test.robot
@@ -21,43 +21,43 @@ Verify CUDA Image Can Be Spawned With GPU
     [Documentation]    Spawns CUDA image with 1 GPU and verifies that the GPU is
     ...    not available for other users.
     [Tags]  Sanity    Tier1
-    ...     Resources-GPU
+    ...     Resources-GPU    NVIDIA-GPUs
     ...     ODS-1141    ODS-346    ODS-1359
     Pass Execution    Passing tests, as suite setup ensures that image can be spawned
 
 Verify CUDA Image Includes Expected CUDA Version
     [Documentation]    Checks CUDA version
     [Tags]  Sanity    Tier1
-    ...     Resources-GPU
+    ...     Resources-GPU    NVIDIA-GPUs
     ...     ODS-1142
     Verify Installed CUDA Version    ${EXPECTED_CUDA_VERSION}
 
 Verify PyTorch Library Can See GPUs In Minimal CUDA
     [Documentation]    Installs PyTorch and verifies it can see the GPU
     [Tags]  Sanity    Tier1
-    ...     Resources-GPU
+    ...     Resources-GPU    NVIDIA-GPUs
     ...     ODS-1144
     Verify Pytorch Can See GPU    install=True
 
 Verify Tensorflow Library Can See GPUs In Minimal CUDA
     [Documentation]    Installs Tensorflow and verifies it can see the GPU
     [Tags]  Sanity    Tier1
-    ...     Resources-GPU
+    ...     Resources-GPU    NVIDIA-GPUs
     ...     ODS-1143
     Verify Tensorflow Can See GPU    install=True
 
 Verify Cuda Image Has NVCC Installed
     [Documentation]     Verifies NVCC Version in Minimal CUDA Image
     [Tags]  Sanity    Tier1
-    ...     Resources-GPU
+    ...     Resources-GPU    NVIDIA-GPUs
     ...     ODS-483
     ${nvcc_version} =  Run Cell And Get Output    input=!nvcc --version
     Should Not Contain    ${nvcc_version}  /usr/bin/sh: nvcc: command not found
 
 Verify Previous CUDA Notebook Image With GPU
     [Documentation]    Runs a workload after spawning the N-1 CUDA Notebook
     [Tags]    Tier2    LiveTesting
-    ...       Resources-GPU
+    ...       Resources-GPU    NVIDIA-GPUs
     ...       ODS-2128
     [Setup]    N-1 CUDA Setup
     Spawn Notebook With Arguments    image=${NOTEBOOK_IMAGE}    size=Small    gpus=1    version=previous
@@ -89,7 +89,7 @@ Verify CUDA Image Suite Setup
     # This will fail in case there are two nodes with the same number of GPUs
     # Since the overall available number won't change even after 1 GPU is assigned
     # However I can't think of a better way to execute this check, under the assumption that
-    # the Resources-GPU tag will always ensure there is 1 node with 1 GPU on the cluster.
+    # the Resources-GPU    NVIDIA-GPUs tag will always ensure there is 1 node with 1 GPU on the cluster.
     ${maxNo} =    Find Max Number Of GPUs In One Node
     ${maxSpawner} =    Fetch Max Number Of GPUs In Spawner Page
     # Need to continue execution even on failure or the whole suite will be failed

diff --git a/ods_ci/tests/Tests/500__jupyterhub/minimal-pytorch-test.robot b/ods_ci/tests/Tests/500__jupyterhub/minimal-pytorch-test.robot
@@ -51,7 +51,7 @@ Verify Tensorboard Is Accessible
 Verify PyTorch Image Can Be Spawned With GPU
     [Documentation]    Spawns PyTorch image with 1 GPU
     [Tags]  Sanity    Tier1
-    ...     Resources-GPU
+    ...     Resources-GPU    NVIDIA-GPUs
     ...     ODS-1145
     Clean Up Server
     Stop JupyterLab Notebook Server
@@ -62,28 +62,28 @@ Verify PyTorch Image Can Be Spawned With GPU
 Verify PyTorch Image Includes Expected CUDA Version
     [Documentation]    Checks CUDA version
     [Tags]  Sanity    Tier1
-    ...     Resources-GPU
+    ...     Resources-GPU    NVIDIA-GPUs
     ...     ODS-1146
     Verify Installed CUDA Version    ${EXPECTED_CUDA_VERSION}
 
 Verify PyTorch Library Can See GPUs In PyTorch Image
     [Documentation]    Verifies PyTorch can see the GPU
     [Tags]  Sanity    Tier1
-    ...     Resources-GPU
+    ...     Resources-GPU    NVIDIA-GPUs
     ...     ODS-1147
     Verify Pytorch Can See GPU
 
 Verify PyTorch Image GPU Workload
     [Documentation]  Runs a workload on GPUs in PyTorch image
     [Tags]  Sanity    Tier1
-    ...     Resources-GPU
+    ...     Resources-GPU    NVIDIA-GPUs
     ...     ODS-1148
     Run Repo And Clean  https://github.com/lugi0/notebook-benchmarks  notebook-benchmarks/pytorch/fgsm_tutorial.ipynb
 
 Verify Previous PyTorch Notebook Image With GPU
     [Documentation]    Runs a workload after spawning the N-1 PyTorch Notebook
     [Tags]    Tier2    LiveTesting
-    ...       Resources-GPU
+    ...       Resources-GPU    NVIDIA-GPUs
     ...       ODS-2129
     [Setup]    N-1 PyTorch Setup
     Spawn Notebook With Arguments    image=${NOTEBOOK_IMAGE}    size=Small    gpus=1    version=previous

diff --git a/ods_ci/tests/Tests/500__jupyterhub/minimal-tensorflow-test.robot b/ods_ci/tests/Tests/500__jupyterhub/minimal-tensorflow-test.robot
@@ -52,36 +52,36 @@ Verify Tensorboard Is Accessible
 Verify Tensorflow Image Can Be Spawned With GPU
     [Documentation]    Spawns PyTorch image with 1 GPU
     [Tags]  Sanity    Tier1
-    ...     Resources-GPU
+    ...     Resources-GPU    NVIDIA-GPUs
     ...     ODS-1151
     Close Previous Server
     Spawn Notebook With Arguments  image=${NOTEBOOK_IMAGE}  size=Small  gpus=1
 
 Verify Tensorflow Image Includes Expected CUDA Version
     [Documentation]    Checks CUDA version
     [Tags]  Sanity    Tier1
-    ...     Resources-GPU
+    ...     Resources-GPU    NVIDIA-GPUs
     ...     ODS-1152
     Verify Installed CUDA Version    ${EXPECTED_CUDA_VERSION}
 
 Verify Tensorflow Library Can See GPUs In Tensorflow Image
     [Documentation]    Verifies Tensorlow can see the GPU
     [Tags]  Sanity    Tier1
-    ...     Resources-GPU
+    ...     Resources-GPU    NVIDIA-GPUs
     ...     ODS-1153
     Verify Tensorflow Can See GPU
 
 Verify Tensorflow Image GPU Workload
     [Documentation]  Runs a workload on GPUs in Tensorflow image
     [Tags]  Sanity    Tier1
-    ...     Resources-GPU
+    ...     Resources-GPU    NVIDIA-GPUs
     ...     ODS-1154
     Run Repo And Clean  https://github.com/lugi0/notebook-benchmarks  notebook-benchmarks/tensorflow/GPU-no-warnings.ipynb
 
 Verify Previous Tensorflow Notebook Image With GPU
     [Documentation]    Runs a workload after spawning the N-1 Tensorflow Notebook
     [Tags]    Tier2    LiveTesting
-    ...       Resources-GPU
+    ...       Resources-GPU    NVIDIA-GPUs
     ...       ODS-2130
     [Setup]    N-1 Tensorflow Setup
     Spawn Notebook With Arguments    image=${NOTEBOOK_IMAGE}    size=Small    gpus=1    version=previous

diff --git a/ods_ci/tests/Tests/500__jupyterhub/multiple-gpus.robot b/ods_ci/tests/Tests/500__jupyterhub/multiple-gpus.robot
@@ -22,7 +22,7 @@ Verify Number Of Available GPUs Is Correct
     [Documentation]  Verifies that the number of available GPUs in the
     ...    Spawner dropdown is correct; i.e., it should show the maximum
     ...    Number of GPUs available in a single node.
-    [Tags]    Tier1  Sanity  Resources-2GPUS
+    [Tags]    Tier1  Sanity  Resources-2GPU    NVIDIA-GPUsS
     ...       ODS-1256
     ${maxNo} =    Find Max Number Of GPUs In One Node
     ${maxSpawner} =    Fetch Max Number Of GPUs In Spawner Page
@@ -31,7 +31,7 @@ Verify Number Of Available GPUs Is Correct
 Verify Two Servers Can Be Spawned
     [Documentation]    Spawns two servers requesting 1 gpu each, and checks
     ...    that both can schedule and are scheduled on different nodes.
-    [Tags]    Tier1  Sanity  Resources-2GPUS
+    [Tags]    Tier1  Sanity  Resources-2GPU    NVIDIA-GPUsS
     ...       ODS-1257
     Spawn Notebook With Arguments  image=${NOTEBOOK_IMAGE}  size=Small  gpus=1
     ${serial_first} =    Get GPU Serial Number