Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

xrt-smi rearchitecture Patch 4 #8714

Merged
merged 17 commits into from
Jan 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/runtime_src/core/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ add_library(core_common_library_objects OBJECT
sysinfo.cpp
xclbin_parser.cpp
xclbin_swemu.cpp
smi.cpp
)

target_include_directories(core_common_library_objects
Expand Down
242 changes: 242 additions & 0 deletions src/runtime_src/core/common/smi.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved.

#define XRT_CORE_COMMON_SOURCE

// Local - Include Files
#include "smi.h"

// 3rd Party Library - Include Files
#include <boost/property_tree/json_parser.hpp>
#include <boost/property_tree/ptree.hpp>

#include <string>
#include <vector>

namespace xrt_core::smi {

using boost::property_tree::ptree;

ptree
option::
to_ptree() const
{
boost::property_tree::ptree pt;
pt.put("name", name);
pt.put("description", description);
pt.put("type", type);
pt.put("alias", alias);
pt.put("default_value", default_value);
pt.put("value_type", value_type);
if (!description_array.empty()) {
boost::property_tree::ptree description_array_ptree;
for (const auto& desc : description_array) {
boost::property_tree::ptree desc_node;
desc_node.put("name", desc.name);
desc_node.put("description", desc.description);
desc_node.put("type", desc.type);
description_array_ptree.push_back(std::make_pair("", desc_node));
}
pt.add_child("description_array", description_array_ptree);
}
return pt;
}

const tuple_vector&
smi_base::
get_validate_test_desc() const
{
static const tuple_vector validate_test_desc = {
{"aie-reconfig-overhead", "Run end-to-end array reconfiguration overhead through shim DMA", "hidden"},
{"all", "All applicable validate tests will be executed (default)", "common"},
{"cmd-chain-latency", "Run end-to-end latency test using command chaining", "common"},
{"cmd-chain-throughput", "Run end-to-end throughput test using command chaining", "common"},
{"df-bw", "Run bandwidth test on data fabric", "common"},
{"gemm", "Measure the TOPS value of GEMM operations", "common"},
{"latency", "Run end-to-end latency test", "common"},
{"quick", "Run a subset of four tests: \n1. latency \n2. throughput \n3. cmd-chain-latency \n4. cmd-chain-throughput", "common"},
{"spatial-sharing-overhead", "Run Spatial Sharing Overhead Test", "hidden"},
{"tct-all-col", "Measure average TCT processing time for all columns", "common"},
{"tct-one-col", "Measure average TCT processing time for one column", "common"},
{"temporal-sharing-overhead", "Run Temporal Sharing Overhead Test", "hidden"},
{"throughput", "Run end-to-end throughput test", "common"},
{"aux-connection", "Check if auxiliary power is connected", "common"},
{"dma", "Run dma test", "common"},
{"thostmem-bw", "Run 'bandwidth kernel' when host memory is enabled", "common"},
{"m2m", "Run M2M test", "common"},
{"mem-bw", "Run 'bandwidth kernel' and check the throughput", "common"},
{"p2p", "Run P2P test", "common"},
{"pcie-link", "Check if PCIE link is active", "common"},
{"sc-version","Check if SC firmware is up-to-date", "common"},
{"verify", "Run 'Hello World' kernel test", "common"}
};
return validate_test_desc;
}

const tuple_vector&
smi_base::
get_examine_report_desc() const
{
static const tuple_vector examine_report_desc = {
{"aie-partitions", "AIE partition information", "common"},
{"host", "Host information", "common"},
{"platform", "Platforms flashed on the device", "common"},
{"telemetry", "Telemetry data for the device", "common"},
{"aie", "AIE metadata in xclbin", "common"},
{"aiemem", "AIE memory tile information", "common"},
{"aieshim", "AIE shim tile status", "common"},
{"debug-ip-status", "Status of Debug IPs present in xclbin loaded on device", "common"},
{"dynamic-regions", "Information about the xclbin and the compute units", "common"},
{"electrical", "Electrical and power sensors present on the device", "common"},
{"error", "Asyncronus Error present on the device", "common"},
{"firewall", "Firewall status", "common"},
{"mailbox", "Mailbox metrics of the device", "common"},
{"mechanical", "Mechanical sensors on and surrounding the device", "common"},
{"memory", "Memory information present on the device", "common"},
{"pcie-info", "Pcie information of the device", "common"},
{"qspi-status", "QSPI write protection status", "common"},
{"thermal", "Thermal sensors present on the device", "common"}
};
return examine_report_desc;
}

std::vector<basic_option>
smi_base::
construct_run_option_description() const
{
std::vector<basic_option> run_option_descriptions;
for (const auto& [name, description, type] : get_validate_test_desc()) {
run_option_descriptions.push_back({name, description, type});
}
return run_option_descriptions;
}

std::vector<basic_option>
smi_base::
construct_report_option_description() const
{
std::vector<basic_option> report_option_descriptions;
for (const auto& [name, description, type] : get_examine_report_desc()) {
report_option_descriptions.push_back({name, description, type});
}
return report_option_descriptions;
}

ptree
smi_base::
construct_validate_subcommand() const
{
ptree subcommand;
subcommand.put("name", "validate");
subcommand.put("description", "Validates the given device by executing the platform's validate executable.");
subcommand.put("type", "common");

std::vector<option> options = {
{"device", "d", "The Bus:Device.Function (e.g., 0000:d8:00.0) device of interest", "common", "", "string"},
{"format", "f", "Report output format. Valid values are:\n"
"\tJSON - Latest JSON schema\n"
"\tJSON-2020.2 - JSON 2020.2 schema", "common", "JSON", "string"},
{"output", "o", "Direct the output to the given file", "common", "", "string"},
{"help", "h", "Help to use this sub-command", "common", "", "none"},
{"run", "r", "Run a subset of the test suite. Valid options are:\n", "common", "", "array", construct_run_option_description()},
{"path", "p", "Path to the directory containing validate xclbins", "hidden", "", "string"},
{"param", "", "Extended parameter for a given test. Format: <test-name>:<key>:<value>", "hidden", "", "string"},
{"pmode", "", "Specify which power mode to run the benchmarks in. Note: Some tests might be unavailable for some modes", "hidden", "", "string"}
};

ptree options_ptree;
for (const auto& option : options) {
options_ptree.push_back(std::make_pair("", option.to_ptree()));
}

subcommand.add_child("options", options_ptree);
return subcommand;
}

ptree
smi_base::
construct_examine_subcommand() const
{
ptree subcommand;
subcommand.put("name", "examine");
subcommand.put("type", "common");
subcommand.put("description", "This command will 'examine' the state of the system/device and will generate a report of interest in a text or JSON format.");

std::vector<option> options = {
{"device", "d", "The Bus:Device.Function (e.g., 0000:d8:00.0) device of interest", "common", "", "string"},
{"format", "f", "Report output format. Valid values are:\n"
"\tJSON - Latest JSON schema\n"
"\tJSON-2020.2 - JSON 2020.2 schema", "common", "", "string"},
{"output", "o", "Direct the output to the given file", "common", "", "string"},
{"help", "h", "Help to use this sub-command", "common", "", "none"},
{"report", "r", "The type of report to be produced. Reports currently available are:\n", "common", "", "array", construct_report_option_description()},
{"element", "e", "Filters individual elements(s) from the report. Format: '/<key>/<key>/...'", "hidden", "", "array"}
};

ptree options_ptree;
for (const auto& option : options) {
options_ptree.push_back(std::make_pair("", option.to_ptree()));
}

subcommand.add_child("options", options_ptree);
return subcommand;
}

ptree
smi_base::
construct_configure_subcommand() const
{
ptree subcommand;
subcommand.put("name", "configure");
subcommand.put("type", "common");
subcommand.put("description", "Device and host configuration");

std::vector<option> options = {
{"device", "d", "The Bus:Device.Function (e.g., 0000:d8:00.0) device of interest", "common", "", "string"},
{"help", "h", "Help to use this sub-command", "common", "", "none"},
{"daemon", "", "Update the device daemon configuration", "common", "", "none"},
{"purge", "", "Remove the daemon configuration file", "hidden", "", "string"},
{"host", "", "IP or hostname for device peer", "common", "", "string"},
{"security", "", "Update the security level for the device", "hidden", "", "string"},
{"clk_throttle", "", "Enable/disable the device clock throttling", "hidden", "", "string"},
{"ct_threshold_power_override", "", "Update the power threshold in watts", "hidden", "", "string"},
{"ct_threshold_temp_override", "", "Update the temperature threshold in celsius", "hidden", "", "string"},
{"ct_reset", "", "Reset all throttling options", "hidden", "", "string"},
{"showx", "", "Display the device configuration settings", "hidden", "", "string"}
};

ptree options_ptree;
for (const auto& option : options) {
options_ptree.push_back(std::make_pair("", option.to_ptree()));
}

subcommand.add_child("options", options_ptree);
return subcommand;
}

std::string
smi_base::
get_smi_config() const
{
ptree config;
ptree subcommands;

subcommands.push_back(std::make_pair("", construct_validate_subcommand()));
subcommands.push_back(std::make_pair("", construct_examine_subcommand()));
subcommands.push_back(std::make_pair("", construct_configure_subcommand()));

config.add_child("subcommands", subcommands);

std::ostringstream oss;
boost::property_tree::write_json(oss, config, true); // Pretty print with true
return oss.str();
}

std::string
get_smi_config()
{
xrt_core::smi::smi_base instance;

return instance.get_smi_config();
}
} // namespace xrt_core::smi
86 changes: 86 additions & 0 deletions src/runtime_src/core/common/smi.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved.

#pragma once
// Local include files
#include "config.h"

// 3rd Party Library - Include Files
#include <boost/property_tree/ptree.hpp>

#include <string>
#include <tuple>
#include <vector>

namespace xrt_core::smi {

using tuple_vector = std::vector<std::tuple<std::string, std::string, std::string>>;

struct basic_option {
std::string name;
std::string description;
std::string type;
};

struct option : public basic_option {
std::string alias;
std::string default_value;
std::string value_type;
std::vector<basic_option> description_array;

option(const std::string name,
const std::string alias,
const std::string description,
const std::string type,
const std::string default_value,
const std::string value_type,
const std::vector<basic_option>& description_array = {})
: basic_option{std::move(name), std::move(description), std::move(type)},
alias(std::move(alias)),
default_value(std::move(default_value)),
value_type(std::move(value_type)),
description_array(std::move(description_array)) {}

boost::property_tree::ptree to_ptree() const;
};

// Each shim's smi class derives from this class
// and adds its custom functionalities. Currently only validate tests and examine
// reports differ between each shim but going forward, each shim can define its
// custom behavior for xrt-smi as required. This also gives us the flexibility
// to add device specific xrt-smi behavior.
class smi_base {
protected:

XRT_CORE_COMMON_EXPORT
virtual const tuple_vector&
get_validate_test_desc() const;

XRT_CORE_COMMON_EXPORT
virtual const tuple_vector&
get_examine_report_desc() const;

std::vector<basic_option>
construct_run_option_description() const;

std::vector<basic_option>
construct_report_option_description() const;

boost::property_tree::ptree
construct_validate_subcommand() const;

boost::property_tree::ptree
construct_examine_subcommand() const;

boost::property_tree::ptree
construct_configure_subcommand() const;

public:
XRT_CORE_COMMON_EXPORT
std::string get_smi_config() const;
};

XRT_CORE_COMMON_EXPORT
std::string get_smi_config();

} // namespace xrt_core::smi
26 changes: 26 additions & 0 deletions src/runtime_src/core/edge/user/device_linux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "xrt.h"
#include "zynq_dev.h"
#include "aie_sys_parser.h"
#include "smi.h"

#include "core/common/debug_ip.h"
#include "core/common/query_requests.h"
Expand Down Expand Up @@ -678,6 +679,30 @@ struct am_counter
}
};

struct xrt_smi_config
{
using result_type = std::any;

static result_type
get(const xrt_core::device* device, key_type key, const std::any& reqType)
{
if (key != key_type::xrt_smi_config)
throw xrt_core::query::no_such_key(key, "Not implemented");

std::string xrt_smi_config;
const auto xrt_smi_config_type = std::any_cast<xrt_core::query::xrt_smi_config::type>(reqType);
switch (xrt_smi_config_type) {
case xrt_core::query::xrt_smi_config::type::options_config:
xrt_smi_config = shim_edge::smi::get_smi_config();
break;
default:
throw xrt_core::query::no_such_key(key, "Not implemented");
}

return xrt_smi_config;
}
};

struct asm_counter
{
using result_type = query::asm_counter::result_type;
Expand Down Expand Up @@ -1058,6 +1083,7 @@ initialize_query_table()
emplace_func4_request<query::aim_counter, aim_counter>();
emplace_func4_request<query::am_counter, am_counter>();
emplace_func4_request<query::asm_counter, asm_counter>();
emplace_func4_request<query::xrt_smi_config, xrt_smi_config>();
emplace_func4_request<query::lapc_status, lapc_status>();
emplace_func4_request<query::spc_status, spc_status>();
emplace_func4_request<query::accel_deadlock_status, accel_deadlock_status>();
Expand Down
Loading
Loading