diff --git a/amdsmi_cli/amdsmi_parser.py b/amdsmi_cli/amdsmi_parser.py index be80ece1..52b43128 100644 --- a/amdsmi_cli/amdsmi_parser.py +++ b/amdsmi_cli/amdsmi_parser.py @@ -1014,7 +1014,7 @@ def _add_set_value_parser(self, subparsers, func): set_power_cap_help = "Set power capacity limit" set_soc_pstate_help = "Set the GPU soc pstate policy using policy id\n" set_xgmi_plpd_help = "Set the GPU XGMI per-link power down policy using policy id\n" - set_process_isolation_help = "Enable or disable the GPU process isolation: 0 for disable and 1 for enable.\n" + set_process_isolation_help = "Enable or disable the GPU process isolation on a per partition basis: 0 for disable and 1 for enable.\n" # Help text for CPU set options set_cpu_pwr_limit_help = "Set power limit for the given socket. Input parameter is power limit value." @@ -1104,7 +1104,7 @@ def _add_reset_parser(self, subparsers, func): reset_compute_help = "Reset compute partitions on the specified GPU" reset_memory_help = "Reset memory partitions on the specified GPU" reset_power_cap_help = "Reset power capacity limit to max capable" - reset_gpu_clean_local_data_help = "Clean up local data in LDS/GPRs" + reset_gpu_clean_local_data_help = "Clean up local data in LDS/GPRs on a per partition basis" # Create reset subparser reset_parser = subparsers.add_parser('reset', help=reset_help, description=reset_subcommand_help) diff --git a/rocm_smi/include/rocm_smi/rocm_smi.h b/rocm_smi/include/rocm_smi/rocm_smi.h index acbd6782..0b299eb6 100755 --- a/rocm_smi/include/rocm_smi/rocm_smi.h +++ b/rocm_smi/include/rocm_smi/rocm_smi.h @@ -3461,12 +3461,9 @@ rsmi_status_t rsmi_dev_process_isolation_set(uint32_t dv_ind, * * @param[in] dv_ind a device index * - * @param[in] sclean the clean flag. Only 1 will take effect and other number - * are reserved for future usage. - * * @return ::RSMI_STATUS_SUCCESS is returned upon successful call, non-zero on fail */ -rsmi_status_t rsmi_dev_gpu_run_cleaner_shader(uint32_t dv_ind, uint32_t sclean); +rsmi_status_t rsmi_dev_gpu_run_cleaner_shader(uint32_t dv_ind); /** @} */ // end of PerfCont diff --git a/rocm_smi/src/rocm_smi.cc b/rocm_smi/src/rocm_smi.cc index fd4ace7f..0c3e20e6 100755 --- a/rocm_smi/src/rocm_smi.cc +++ b/rocm_smi/src/rocm_smi.cc @@ -2122,8 +2122,7 @@ rsmi_status_t rsmi_dev_process_isolation_set(uint32_t dv_ind, CATCH } -rsmi_status_t rsmi_dev_gpu_run_cleaner_shader(uint32_t dv_ind, - uint32_t sclean) { +rsmi_status_t rsmi_dev_gpu_run_cleaner_shader(uint32_t dv_ind) { rsmi_status_t ret; TRY @@ -2134,7 +2133,10 @@ rsmi_status_t rsmi_dev_gpu_run_cleaner_shader(uint32_t dv_ind, DEVICE_MUTEX GET_DEV_FROM_INDX - std::string value = std::to_string(sclean); + // To reset you need to provide the partition id + // echo "0" | sudo tee  /sys/class/drm/cardX/device/run_cleaner_shader + int partition_id = dev->get_partition_id(); + std::string value = std::to_string(partition_id); int ret = dev->writeDevInfo(amd::smi::kDevShaderClean , value); return amd::smi::ErrnoToRsmiStatus(ret); diff --git a/src/amd_smi/amd_smi.cc b/src/amd_smi/amd_smi.cc index 0ae21147..21bae135 100644 --- a/src/amd_smi/amd_smi.cc +++ b/src/amd_smi/amd_smi.cc @@ -1462,8 +1462,7 @@ amdsmi_status_t amdsmi_set_gpu_process_isolation(amdsmi_processor_handle process amdsmi_status_t amdsmi_clean_gpu_local_data(amdsmi_processor_handle processor_handle) { AMDSMI_CHECK_INIT(); - return rsmi_wrapper(rsmi_dev_gpu_run_cleaner_shader, processor_handle, - 1); + return rsmi_wrapper(rsmi_dev_gpu_run_cleaner_shader, processor_handle); } amdsmi_status_t