From 3999b41b981b18d3b2f907a316cd32aef456a6a7 Mon Sep 17 00:00:00 2001 From: Andrii Mandiuk Date: Mon, 3 Oct 2022 10:30:10 +0300 Subject: [PATCH] Notification NOS running SAI and SDK, about HW/FW failures Signed-off-by: Andrii Mandiuk --- health_event.md | 88 +++++++++++++++++++++++++++++++++++++ inc/saiswitch.h | 113 ++++++++++++++++++++++++++++++++++++++++++++++++ inc/saitypes.h | 13 ++++++ meta/style.pm | 1 + 4 files changed, 215 insertions(+) create mode 100644 health_event.md diff --git a/health_event.md b/health_event.md new file mode 100644 index 000000000..e0846a5b3 --- /dev/null +++ b/health_event.md @@ -0,0 +1,88 @@ +# Switch health events + +Health event is a way for SAI adapter to inform NOS about HW/SW health issues. When some error occurs on the switch, SAI should in some way inform the NOS about the status. In order to provide flexibility, depending on different types, SAI generates health event with several parameters that describes that event. Hence, to inform NOS - sai_switch_asic_sdk_health_event_notification_fn callback is invoked. It takes 6 parameters: + + 1. _In_ sai_object_id_t switch_id - Switch object identifier + 2. _In_ sai_switch_asic_sdk_health_severity_t severity - severity of an issue (fatal, warning, notice) + 3. _In_ sai_timespec_t timestamp - time when issue occurred + 4. _In_ sai_switch_asic_sdk_health_category_t category - category of health issue + 5. _In_ sai_switch_health_data_t data - specific data for that event + 6. _In_ const sai_u8_list_t description - JSON-encoded description string with information delivered from SDK event/trap + + Example of possible descritption: + { + "switch_id": "0x00000000000000AB”, + "severity": “2”, + "timestamp” : { + “tv_sec“ : “22429”, + “tv_nsec” : “3428724” + }, + "category": "3", + “data : { + data_type : “0” + }, + "additional_data": "Some additional information" + } + + +These fields provide the ability to add as much information as needed about the event. + +For health severity and health category there are two respective enums: sai_switch_asic_sdk_health_severity_t and sai_switch_asic_sdk_health_category_t. + +For data parameter we have struct that contains two fields: type of the data and data itself. New type of data can be added if needed. + +## Event registration +NOS provides an event callback to SAI adapter, through SAI_SWITCH_ATTR_SWITCH_ASIC_SDK_HEALTH_EVENT_NOTIFY + +NOS can choose which categories to register to, per each severity +For example, to register for SW and FW categories, NOS will set +SAI_SWITCH_ATTR_REG_WARNING_SWITCH_ASIC_SDK_HEALTH_CATEGORY +as s32list.count = 2, s32list.list = {SAI_SWITCH_ASIC_SDK_HEALTH_CATEGORY_SW, SAI_SWITCH_ASIC_SDK_HEALTH_CATEGORY_FW} +The default is empty category list per severity + +## Extending the health data +In order to add new information in the data field, several steps should be completed: + +1) Data type should be added to enum sai_health_data_type_t + +2) For all types that have additional data, a struct should be created. The first time a new type that has actual struct data will be created, union should be created as well. Each struct should be added to that union. + +For example if we want to add a SER item (https://github.com/opencomputeproject/SAI/pull/1307), the flow will be : + +``` +typedef enum _sai_health_data_type_t +{ + /** General health data type */ + SAI_HEALTH_DATA_TYPE_GENERAL, + + /** SER health data type */ + SAI_HEALTH_DATA_TYPE_SER + +} sai_health_data_type_t; + +typedef struct _sai_ser_health_data_t +{ + ... + /* SER specific fields */ + +} sai_ser_health_data_t; + +/** +* @extraparam sai_health_data_type_t data_type +*/ +typedef union _sai_health_data_t +{ + /** @validonly data_type == SAI_HEALTH_DATA_TYPE_SER */ + sai_ser_health_data_t ser; +} sai_health_data_t; + +typedef struct _sai_health_t +{ + /** Type of health data */ + sai_health_data_type_t data_type; + + /** @passparam data_type */ + sai_health_data_t data; +} sai_health_t; +``` + diff --git a/inc/saiswitch.h b/inc/saiswitch.h index 78a80be14..662054073 100644 --- a/inc/saiswitch.h +++ b/inc/saiswitch.h @@ -2873,6 +2873,50 @@ typedef enum _sai_switch_attr_t */ SAI_SWITCH_ATTR_HOSTIF_OPER_STATUS_UPDATE_MODE, + /** + * @brief Health notification callback function passed to the adapter. + * + * Use sai_switch_asic_sdk_health_event_notification_fn as notification function. + * + * @type sai_pointer_t sai_switch_asic_sdk_health_event_notification_fn + * @flags CREATE_AND_SET + * @default NULL + */ + SAI_SWITCH_ATTR_SWITCH_ASIC_SDK_HEALTH_EVENT_NOTIFY, + + /** + * @brief Registration for health fatal categories. + * + * For specifying categories of causes for severity fatal events + * + * @type sai_s32_list_t sai_switch_asic_sdk_health_category_t + * @flags CREATE_AND_SET + * @default empty + */ + SAI_SWITCH_ATTR_REG_FATAL_SWITCH_ASIC_SDK_HEALTH_CATEGORY, + + /** + * @brief Registration for health warning categories. + * + * For specifying categories of causes for severity warning events + * + * @type sai_s32_list_t sai_switch_asic_sdk_health_category_t + * @flags CREATE_AND_SET + * @default empty + */ + SAI_SWITCH_ATTR_REG_WARNING_SWITCH_ASIC_SDK_HEALTH_CATEGORY, + + /** + * @brief Registration for health notice categories. + * + * For specifying categories of causes for severity notice events + * + * @type sai_s32_list_t sai_switch_asic_sdk_health_category_t + * @flags CREATE_AND_SET + * @default empty + */ + SAI_SWITCH_ATTR_REG_NOTICE_SWITCH_ASIC_SDK_HEALTH_CATEGORY, + /** * @brief End of attributes */ @@ -2886,6 +2930,41 @@ typedef enum _sai_switch_attr_t } sai_switch_attr_t; +/** + * @brief Switch health event severity + */ +typedef enum _sai_switch_asic_sdk_health_severity_t +{ + /** Switch event severity fatal */ + SAI_SWITCH_ASIC_SDK_HEALTH_SEVERITY_FATAL, + + /** Switch event severity warning */ + SAI_SWITCH_ASIC_SDK_HEALTH_SEVERITY_WARNING, + + /** Switch event severity notice */ + SAI_SWITCH_ASIC_SDK_HEALTH_SEVERITY_NOTICE + +} sai_switch_asic_sdk_health_severity_t; + +/** + * @brief Switch health categories + */ +typedef enum _sai_switch_asic_sdk_health_category_t +{ + /** Switch health software category */ + SAI_SWITCH_ASIC_SDK_HEALTH_CATEGORY_SW, + + /** Switch health firmware category */ + SAI_SWITCH_ASIC_SDK_HEALTH_CATEGORY_FW, + + /** Switch health cpu hardware category */ + SAI_SWITCH_ASIC_SDK_HEALTH_CATEGORY_CPU_HW, + + /** Switch health ASIC hardware category */ + SAI_SWITCH_ASIC_SDK_HEALTH_CATEGORY_ASIC_HW + +} sai_switch_asic_sdk_health_category_t; + /** * @brief Switch counter IDs in sai_get_switch_stats() call * @@ -3085,6 +3164,40 @@ typedef enum _sai_switch_stat_t */ #define SAI_KEY_HW_PORT_PROFILE_ID_CONFIG_FILE "SAI_HW_PORT_PROFILE_ID_CONFIG_FILE" +/** + * @brief Switch health event callback + * + * @objects switch_id SAI_OBJECT_TYPE_SWITCH + * + * @param[in] switch_id Switch Id + * @param[in] severity Health event severity + * @param[in] timestamp Time and date of receiving the SDK Health event + * @param[in] category Category of cause + * @param[in] data Data of switch health + * @param[in] description JSON-encoded description string with information delivered from SDK event/trap + * Example of a possible description: + * { + * "switch_id": "0x00000000000000AB", + * "severity": "2", + * "timestamp": { + * "tv_sec": "22429", + * "tv_nsec": "3428724" + * }, + * "category": "3", + * "data": { + * data_type: "0" + * }, + * "additional_data": "Some additional information" + * } + */ +typedef void (*sai_switch_asic_sdk_health_event_notification_fn)( + _In_ sai_object_id_t switch_id, + _In_ sai_switch_asic_sdk_health_severity_t severity, + _In_ sai_timespec_t timestamp, + _In_ sai_switch_asic_sdk_health_category_t category, + _In_ sai_switch_health_data_t data, + _In_ const sai_u8_list_t description); + /** * @brief Switch shutdown request callback. * diff --git a/inc/saitypes.h b/inc/saitypes.h index 3a8ee79b6..dd863abff 100644 --- a/inc/saitypes.h +++ b/inc/saitypes.h @@ -1619,6 +1619,19 @@ typedef enum _sai_object_stage_t } sai_object_stage_t; +typedef enum _sai_health_data_type_t +{ + /** General health data type */ + SAI_HEALTH_DATA_TYPE_GENERAL +} sai_health_data_type_t; + +typedef struct _sai_switch_health_data_t +{ + /** Type of switch health data */ + sai_health_data_type_t data_type; + +} sai_switch_health_data_t; + /** * @} */ diff --git a/meta/style.pm b/meta/style.pm index c97752b4f..21173bbdc 100644 --- a/meta/style.pm +++ b/meta/style.pm @@ -700,6 +700,7 @@ sub CheckInOutParams return if $line =~ /_Inout_ \w+ \*\w+/ and $const eq ""; # non const types with pointer should be Inout return if $line =~ /_Out_ \w+ \*\w+/ and $const eq ""; # non const types with pointer should be Out return if $line =~ /_In_ const \w+ \*\*?\w+/; # const types with pointer should be In + return if $line =~ /_In_ const \w+ \w+/; # const types without pointer return if $line =~ /_Out_ const char \*\*\w+/; return if $line =~ /_Out_ void \*\*\w+/;