Skip to content

Commit

Permalink
CR-1158005 backport (#7551)
Browse files Browse the repository at this point in the history
* CR-1158005 Report if a device is in a bad state (#7507)

* CR-1158005 Add status field to hardware context. Add device status query to legacy query path

Signed-off-by: Daniel Benusovich <dbenusov@xilinx.com>

* CR-1158005 Fix compilation error

Signed-off-by: Daniel Benusovich <dbenusov@xilinx.com>

* CR-1158005 Refactor what is displayed for context status. Refactor variable names

Signed-off-by: Daniel Benusovich <dbenusov@xilinx.com>

* CR-1158005 Add comments for device status

Signed-off-by: Daniel Benusovich <dbenusov@xilinx.com>

* CR-1158005 Remove context status from hw context. Add device status to json output. Update display for dynamic regions report

Signed-off-by: Daniel Benusovich <dbenusov@xilinx.com>

---------

Signed-off-by: Daniel Benusovich <dbenusov@xilinx.com>
(cherry picked from commit 3554c14)

* CR-1158005 Fix compilation issues

Signed-off-by: Daniel Benusovich <dbenusov@xilinx.com>

---------

Signed-off-by: Daniel Benusovich <dbenusov@xilinx.com>
  • Loading branch information
dbenusov authored May 19, 2023
1 parent 4698c28 commit 73a310b
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 2 deletions.
29 changes: 29 additions & 0 deletions src/runtime_src/core/common/query_requests.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ enum class key_type
ip_layout_raw,
clock_freq_topology_raw,
dma_stream,
device_status,
kds_cu_info,
kds_mode,
kds_cu_stat,
Expand Down Expand Up @@ -725,6 +726,34 @@ struct kds_mode : request
get(const device*) const = 0;
};

/**
* Extract the status of the device
* This states whether or not a device is stuck due to an xclbin issue
*/
struct device_status : request
{
using result_type = uint32_t;
static const key_type key = key_type::device_status;

virtual boost::any
get(const device*) const = 0;

static std::string
parse_status(const result_type status)
{
switch (status) {
case 0:
return "HEALTHY";
case 1:
return "HANG";
case 2:
return "UNKNOWN";
default:
throw xrt_core::system_error(EINVAL, "Invalid device status: " + status);
}
}
};

struct kds_cu_stat : request
{
struct data {
Expand Down
1 change: 1 addition & 0 deletions src/runtime_src/core/pcie/linux/device_linux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,7 @@ initialize_query_table()
emplace_sysfs_get<query::mig_ecc_ce_ffa> ("mig", "ecc_ce_ffa");
emplace_sysfs_get<query::mig_ecc_ue_ffa> ("mig", "ecc_ue_ffa");
emplace_sysfs_get<query::flash_bar_offset> ("flash", "bar_off");
emplace_sysfs_get<query::device_status> ("", "device_bad_state");
emplace_sysfs_get<query::is_mfg> ("", "mfg");
emplace_sysfs_get<query::mfg_ver> ("", "mfg_ver");
emplace_sysfs_get<query::is_recovery> ("", "recovery");
Expand Down
8 changes: 7 additions & 1 deletion src/runtime_src/core/tools/common/ReportDynamicRegion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ ReportDynamicRegion::getPropertyTree20202( const xrt_core::device * _pDevice,
}

void
ReportDynamicRegion::writeReport( const xrt_core::device* /*_pDevice*/,
ReportDynamicRegion::writeReport( const xrt_core::device* _pDevice,
const boost::property_tree::ptree& _pt,
const std::vector<std::string>& /*_elementsFilter*/,
std::ostream & _output) const
Expand All @@ -292,6 +292,12 @@ ReportDynamicRegion::writeReport( const xrt_core::device* /*_pDevice*/,
if(pt_dfx.empty())
return;

auto device_status = 2;
try {
device_status = xrt_core::device_query<xrt_core::query::device_status>(_pDevice);
} catch (...) {}
_output << boost::format(" Device Status: %s\n") % xrt_core::query::device_status::parse_status(device_status);

for(auto& k_dfx : pt_dfx) {
const boost::property_tree::ptree& dfx = k_dfx.second;
_output << "Xclbin UUID" << std::endl;
Expand Down
11 changes: 10 additions & 1 deletion src/runtime_src/core/tools/common/XBHelpMenus.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -697,10 +697,19 @@ XBUtilities::produce_reports( xrt_core::device_collection devices,
ptDevice.put("interface_type", "pcie");
ptDevice.put("device_id", xrt_core::query::pcie_bdf::to_string(bdf));

auto device_status = 2;
try {
device_status = xrt_core::device_query<xrt_core::query::device_status>(device);
} catch (...) {}
ptDevice.put("device_status", xrt_core::query::device_status::parse_status(device_status));

bool is_mfg = false;
try {
is_mfg = xrt_core::device_query<xrt_core::query::is_mfg>(device);
} catch (...) {}
}
catch (...) {
is_mfg = false;
}

//if factory mode
std::string platform = "<not defined>";
Expand Down

0 comments on commit 73a310b

Please sign in to comment.