From c568ba89985d6337c46e91124ecb37f12ff2b75e Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 25 May 2024 10:30:47 +0200 Subject: [PATCH] [new] inspect_virus: use parallel collector to run clamav on all CPUs On a 8-thread CPU (Intel Tigerlake laptop), in test run a-la rpminspect -c /usr/share/rpminspect/fedora.yaml -p rawhide -Dv -k -t VERIFY -a x86_64,noarch,src -T virus kernel-6.8.10-200.fc39 this cuts "virus inspection" run time from 19 to 8 minutes. Signed-off-by: Denys Vlasenko --- lib/inspect_virus.c | 173 +++++++++++++++++++++++++++++++++++++------- src/rpminspect.c | 10 +++ 2 files changed, 156 insertions(+), 27 deletions(-) diff --git a/lib/inspect_virus.c b/lib/inspect_virus.c index ecddd98f..40b67096 100644 --- a/lib/inspect_virus.c +++ b/lib/inspect_virus.c @@ -11,20 +11,38 @@ #include #include #include "rpminspect.h" +#include "parallel.h" static struct cl_engine *engine = NULL; #ifndef CL_SCAN_STDOPT struct cl_scan_options clamav_opts; #endif -static struct result_params params; +static parallel_t *col = NULL; -static bool virus_driver(struct rpminspect *ri, rpmfile_entry_t *file) +/* these variables have different values in each child */ +static unsigned child_no = 0; +static unsigned file_no = (unsigned)-1; +static int virus_countdown = 4000; +static int write_fd; + +static bool virus_driver(struct rpminspect *ri __attribute__((unused)), rpmfile_entry_t *file) { - bool result = true; int r = 0; const char *virus = NULL; + /* cyclically count from 0 to NCHILDREN-1 */ + file_no++; + + if (file_no == col->max_pids) { + file_no = 0; + } + + /* handle only every Nth file */ + if (file_no != child_no) { + return true; + } + /* only check regular files */ if (!S_ISREG(file->st.st_mode)) { return true; @@ -36,32 +54,37 @@ static bool virus_driver(struct rpminspect *ri, rpmfile_entry_t *file) #else r = cl_scanfile(file->fullpath, &virus, NULL, engine, CL_SCAN_STDOPT); #endif +#if 0 /* debug: uncomment for error injection - test that virus detection indeed works */ + if (child_no == 0 && file_no == 0 && virus_countdown == 4000) { + virus = "b0g0virus"; + r = CL_VIRUS; + } +#endif + + if (r != CL_CLEAN && r != CL_VIRUS) { + /* unexpected failure, bail out */ + errx(EXIT_FAILURE, "*** cl_scanfile(%s): %s", file->localpath, cl_strerror(r)); + } if (r == CL_VIRUS) { - params.severity = get_secrule_result_severity(ri, file, SECRULE_VIRUS); - - if (params.severity != RESULT_NULL && params.severity != RESULT_SKIP) { - if (params.severity == RESULT_INFO) { - params.waiverauth = NOT_WAIVABLE; - params.verb = VERB_OK; - } else { - params.waiverauth = WAIVABLE_BY_SECURITY; - params.verb = VERB_FAILED; - result = false; - } + if (!virus || !virus[0]) { + /* "nameless" virus? probably clamav bug, and our code would break on such: bail out */ + errx(EXIT_FAILURE, "*** cl_scanfile(%s): virus with no name???", file->localpath); + } - params.arch = get_rpm_header_arch(file->rpm_header); - params.file = file->localpath; - params.remedy = get_remedy(REMEDY_VIRUS); - xasprintf(¶ms.msg, _("Virus detected in %s in the %s package on %s: %s"), file->localpath, headerGetString(file->rpm_header, RPMTAG_NAME), params.arch, virus); - add_result(ri, ¶ms); - free(params.msg); + /* Cap the number of reported infections. + * Receiving buffer has sanity limit, and we'd abort if it is exceeded. + * If we see thousands of "infected" files, we probably aren't + * interested in every one of them anyway. + */ + if (virus_countdown != 0) { + virus_countdown--; + full_write(write_fd, virus, strlen(virus) + 1); + full_write(write_fd, &file, sizeof(file)); } - } else if (r != CL_CLEAN) { - warnx("*** cl_scanfile(%s): %s", file->localpath, cl_strerror(r)); } - return result; + return true; } bool inspect_virus(struct rpminspect *ri) @@ -72,8 +95,8 @@ bool inspect_virus(struct rpminspect *ri) struct dirent *de = NULL; char *cvdpath = NULL; struct cl_cvd *cvd = NULL; - bool result = false; int r = 0; + struct result_params params; unsigned int loaded_signatures; /* unused, exists to make cl_load() happy */ /* initialize clamav */ @@ -189,11 +212,107 @@ bool inspect_virus(struct rpminspect *ri) params.msg = NULL; free(params.details); params.details = NULL; - - /* run the virus check on each file */ params.header = NAME_VIRUS; params.noun = _("virus or malware in ${FILE} on ${ARCH}"); - result = foreach_peer_file(ri, NAME_VIRUS, virus_driver); + + /* fork $NCPUS children */ + fflush(NULL); + col = new_parallel(0); /* 0: will have one child per CPU */ + + for (child_no = 0; child_no < col->max_pids; child_no++) { + pid_t pid; + int pipefd[2]; + + if (pipe(pipefd)) { + err(EXIT_FAILURE, "pipe"); /* fatal */ + } + + int rnd = rand(); + pid = fork(); + + if (pid < 0) { + err(EXIT_FAILURE, "fork"); /* fatal */ + } + + if (pid == 0) { + /* child */ + close(pipefd[0]); + write_fd = pipefd[1]; + + /* "If you’re using libclamav with a forking daemon you + * should call srand() inside a forked child before making + * any calls to the libclamav functions" - clamav docs + */ + srand(child_no ^ rnd); + + /* run the virus check on each Nth file, then exit */ + foreach_peer_file(ri, NAME_VIRUS, virus_driver); + _exit(0); + } + + /* parent */ + /* insert the child into collector */ + close(pipefd[1]); + insert_new_pid_and_fd(col, pid, pipefd[0]); + } /* forking N children */ + + /* Let all children run, collecting their outputs. + * When any one of them finish, process its output. + * Repeat until all of them exit. + */ + bool result = true; + parallel_slot_t *slot; + while ((slot = collect_one(col)) != NULL) { + int r = slot->exit_status; + + if (!WIFEXITED(r)) { + errx(EXIT_FAILURE, "cl_scanfile() killed by signal %u", WTERMSIG(r)); + } + + if (WEXITSTATUS(r) != 0) { + errx(EXIT_FAILURE, "cl_scanfile() exited with %u", WEXITSTATUS(r)); + } + + char *output = slot->output; + + if (output) { + /* consume all pairs of ("virusname",rpmfile_entry_t pointer) in output */ + while (output[0]) { + rpmfile_entry_t *file; + const char *virus = output; + + output += strlen(virus) + 1; + memcpy(&file, output, sizeof(file)); /* copy unaligned bytes */ + output += sizeof(file); + + params.severity = get_secrule_result_severity(ri, file, SECRULE_VIRUS); + + if (params.severity != RESULT_NULL && params.severity != RESULT_SKIP) { + if (params.severity == RESULT_INFO) { + params.waiverauth = NOT_WAIVABLE; + params.verb = VERB_OK; + } else { + params.waiverauth = WAIVABLE_BY_SECURITY; + params.verb = VERB_FAILED; + result = false; + } + + params.arch = get_rpm_header_arch(file->rpm_header); + params.file = file->localpath; + params.remedy = get_remedy(REMEDY_VIRUS); + xasprintf(¶ms.msg, _("Virus detected in %s in the %s package on %s: %s"), file->localpath, headerGetString(file->rpm_header, RPMTAG_NAME), params.arch, virus); + add_result(ri, ¶ms); + free(params.msg); + } + } /* while (there is unprocessed output from this child) */ + + free(slot->output); + slot->output = NULL; /* avoid double-free in delete_parallel() */ + } + } /* while (waiting for a child to finish) */ + + delete_parallel(col, /*signal:*/ 0); + col = NULL; /* hope the result is always this */ if (result) { diff --git a/src/rpminspect.c b/src/rpminspect.c index 2c9ad678..57dcb13e 100644 --- a/src/rpminspect.c +++ b/src/rpminspect.c @@ -390,6 +390,16 @@ int main(int argc, char **argv) /* Be friendly to "rpminspect ... 2>&1 | tee" use case */ setlinebuf(stdout); + /* clamav uses rand() to generate random temporary file names, + * call srand() to avoid name collisions. + * (In fact, source of clamav I examined isn't that bad, it calls + * srand itself once: "srand(curtime_microsec + clock() + rand())" + * but just to be safe, make sure _this_ call to rand() ^^^^^^^^ + * isn't deterministic). + * Mixing in PID ensures that even two simultaneously + * started rpminspect instances get differing seeds */ + srand(time(NULL) ^ getpid()); + /* SIGABRT handler since we use abort() in some failure cases */ abrt.sa_handler = sigabrt_handler; sigemptyset(&abrt.sa_mask);