Skip to content

Commit

Permalink
Add dynamic switch of ras events support.
Browse files Browse the repository at this point in the history
Rasdaemon does not support a way to disable some events by config.
If user want to disable specified event(eg:block_rq_complete), he
should recompile rasdaemon, which is not so convenient.

This patch add dynamic switch of ras event support.You can add
events you want to disabled in /etc/sysconfig/rasdaemon.For example,
`DISABLE="ras:mc_event,block:block_rq_complete"`.Then restart
rasdaemon, these two events will be disabled without recompilation.
  • Loading branch information
caixiaomeng 00662745 authored and root committed Dec 1, 2023
1 parent cfabd93 commit feae2a4
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 17 deletions.
55 changes: 38 additions & 17 deletions ras-events.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@
#define ENDIAN KBUFFER_ENDIAN_BIG
#endif

extern char* choices_disable;

static int get_debugfs_dir(char *tracing_dir, size_t len)
{
FILE *fp;
Expand Down Expand Up @@ -150,6 +152,18 @@ static int get_tracing_dir(struct ras_events *ras)
return 0;
}

int is_disabled_event(char *group, char *event) {
char ras_event_name[MAX_PATH + 1];

snprintf(ras_event_name, sizeof(ras_event_name), "%s:%s",
group, event);

if (choices_disable != NULL && strlen(choices_disable) != 0 && strstr(choices_disable, ras_event_name)) {
return 1;
}
return 0;
}

/*
* Tracing enable/disable code
*/
Expand All @@ -158,6 +172,7 @@ static int __toggle_ras_mc_event(struct ras_events *ras,
{
int fd, rc;
char fname[MAX_PATH + 1];
enable = is_disabled_event(group, event) ? 0 : 1;

snprintf(fname, sizeof(fname), "%s%s:%s\n",
enable ? "" : "!",
Expand Down Expand Up @@ -839,6 +854,12 @@ static int add_event_handler(struct ras_events *ras, struct tep_handle *pevent,

ras->filters[id] = filter;

if (is_disabled_event(group, event)) {
log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n",
group, event);
return -EINVAL;
}

/* Enable RAS events */
rc = __toggle_ras_mc_event(ras, group, event, 1);
free(page);
Expand Down Expand Up @@ -906,7 +927,7 @@ int handle_ras_events(int record_events)
ras_mc_event_handler, NULL, MC_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"ras", "mc_event");

Expand All @@ -915,7 +936,7 @@ int handle_ras_events(int record_events)
ras_aer_event_handler, NULL, AER_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"ras", "aer_event");
#endif
Expand All @@ -925,7 +946,7 @@ int handle_ras_events(int record_events)
ras_non_standard_event_handler, NULL, NON_STANDARD_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"ras", "non_standard_event");
#endif
Expand All @@ -935,7 +956,7 @@ int handle_ras_events(int record_events)
ras_arm_event_handler, NULL, ARM_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"ras", "arm_event");
#endif
Expand Down Expand Up @@ -969,7 +990,7 @@ int handle_ras_events(int record_events)
/* tell kernel we are listening, so don't printk to console */
(void)open("/sys/kernel/debug/ras/daemon_active", 0);
num_events++;
} else
} else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"ras", "extlog_mem_event");
#endif
Expand All @@ -986,7 +1007,7 @@ int handle_ras_events(int record_events)
ras_devlink_event_handler, filter_str, DEVLINK_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"devlink", "devlink_health_report");
#endif
Expand All @@ -998,7 +1019,7 @@ int handle_ras_events(int record_events)
NULL, DISKERROR_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"block", "block_rq_error");
#else
Expand All @@ -1009,7 +1030,7 @@ int handle_ras_events(int record_events)
NULL, DISKERROR_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"block", "block_rq_complete");
}
Expand All @@ -1021,7 +1042,7 @@ int handle_ras_events(int record_events)
ras_memory_failure_event_handler, NULL, MF_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"ras", "memory_failure_event");
#endif
Expand All @@ -1031,63 +1052,63 @@ int handle_ras_events(int record_events)
ras_cxl_poison_event_handler, NULL, CXL_POISON_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"cxl", "cxl_poison");

rc = add_event_handler(ras, pevent, page_size, "cxl", "cxl_aer_uncorrectable_error",
ras_cxl_aer_ue_event_handler, NULL, CXL_AER_UE_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"cxl", "cxl_aer_uncorrectable_error");

rc = add_event_handler(ras, pevent, page_size, "cxl", "cxl_aer_correctable_error",
ras_cxl_aer_ce_event_handler, NULL, CXL_AER_CE_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"cxl", "cxl_aer_correctable_error");

rc = add_event_handler(ras, pevent, page_size, "cxl", "cxl_overflow",
ras_cxl_overflow_event_handler, NULL, CXL_OVERFLOW_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"cxl", "cxl_overflow");

rc = add_event_handler(ras, pevent, page_size, "cxl", "cxl_generic_event",
ras_cxl_generic_event_handler, NULL, CXL_GENERIC_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"cxl", "cxl_generic_event");

rc = add_event_handler(ras, pevent, page_size, "cxl", "cxl_general_media",
ras_cxl_general_media_event_handler, NULL, CXL_GENERAL_MEDIA_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"cxl", "cxl_general_media");

rc = add_event_handler(ras, pevent, page_size, "cxl", "cxl_dram",
ras_cxl_dram_event_handler, NULL, CXL_DRAM_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"cxl", "cxl_dram");

rc = add_event_handler(ras, pevent, page_size, "cxl", "cxl_memory_module",
ras_cxl_memory_module_event_handler, NULL, CXL_MEMORY_MODULE_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"cxl", "memory_module");
#endif
Expand Down
3 changes: 3 additions & 0 deletions rasdaemon.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
#define TOOL_NAME "rasdaemon"
#define TOOL_DESCRIPTION "RAS daemon to log the RAS events."
#define ARGS_DOC "<options>"
#define DISABLE "DISABLE"
char *choices_disable = NULL;

const char *argp_program_version = TOOL_NAME " " VERSION;
const char *argp_program_bug_address = "Mauro Carvalho Chehab <mchehab@kernel.org>";
Expand Down Expand Up @@ -127,6 +129,7 @@ int main(int argc, char *argv[])
{
struct arguments args;
int idx = -1;
choices_disable = getenv(DISABLE);

#ifdef HAVE_MCE
const struct argp_option offline_options[] = {
Expand Down

0 comments on commit feae2a4

Please sign in to comment.