Skip to content

Commit

Permalink
[new] inspect_virus: use parallel collector to run clamav on all CPUs
Browse files Browse the repository at this point in the history
On a 8-thread CPU (Intel Tigerlake laptop), in test run a-la
rpminspect -c /usr/share/rpminspect/fedora.yaml -p rawhide
    -Dv -k -t VERIFY -a x86_64,noarch,src -T virus kernel-6.8.10-200.fc39
this cuts "virus inspection" run time from 19 to 8 minutes.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
  • Loading branch information
dvlasenk authored and dcantrell committed May 31, 2024
1 parent b89d8ca commit c568ba8
Show file tree
Hide file tree
Showing 2 changed files with 156 additions and 27 deletions.
173 changes: 146 additions & 27 deletions lib/inspect_virus.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,38 @@
#include <sys/types.h>
#include <clamav.h>
#include "rpminspect.h"
#include "parallel.h"

static struct cl_engine *engine = NULL;
#ifndef CL_SCAN_STDOPT
struct cl_scan_options clamav_opts;
#endif

static struct result_params params;
static parallel_t *col = NULL;

static bool virus_driver(struct rpminspect *ri, rpmfile_entry_t *file)
/* these variables have different values in each child */
static unsigned child_no = 0;
static unsigned file_no = (unsigned)-1;
static int virus_countdown = 4000;
static int write_fd;

static bool virus_driver(struct rpminspect *ri __attribute__((unused)), rpmfile_entry_t *file)
{
bool result = true;
int r = 0;
const char *virus = NULL;

/* cyclically count from 0 to NCHILDREN-1 */
file_no++;

if (file_no == col->max_pids) {
file_no = 0;
}

/* handle only every Nth file */
if (file_no != child_no) {
return true;
}

/* only check regular files */
if (!S_ISREG(file->st.st_mode)) {
return true;
Expand All @@ -36,32 +54,37 @@ static bool virus_driver(struct rpminspect *ri, rpmfile_entry_t *file)
#else
r = cl_scanfile(file->fullpath, &virus, NULL, engine, CL_SCAN_STDOPT);
#endif
#if 0 /* debug: uncomment for error injection - test that virus detection indeed works */
if (child_no == 0 && file_no == 0 && virus_countdown == 4000) {
virus = "b0g0virus";
r = CL_VIRUS;
}
#endif

if (r != CL_CLEAN && r != CL_VIRUS) {
/* unexpected failure, bail out */
errx(EXIT_FAILURE, "*** cl_scanfile(%s): %s", file->localpath, cl_strerror(r));
}

if (r == CL_VIRUS) {
params.severity = get_secrule_result_severity(ri, file, SECRULE_VIRUS);

if (params.severity != RESULT_NULL && params.severity != RESULT_SKIP) {
if (params.severity == RESULT_INFO) {
params.waiverauth = NOT_WAIVABLE;
params.verb = VERB_OK;
} else {
params.waiverauth = WAIVABLE_BY_SECURITY;
params.verb = VERB_FAILED;
result = false;
}
if (!virus || !virus[0]) {
/* "nameless" virus? probably clamav bug, and our code would break on such: bail out */
errx(EXIT_FAILURE, "*** cl_scanfile(%s): virus with no name???", file->localpath);
}

params.arch = get_rpm_header_arch(file->rpm_header);
params.file = file->localpath;
params.remedy = get_remedy(REMEDY_VIRUS);
xasprintf(&params.msg, _("Virus detected in %s in the %s package on %s: %s"), file->localpath, headerGetString(file->rpm_header, RPMTAG_NAME), params.arch, virus);
add_result(ri, &params);
free(params.msg);
/* Cap the number of reported infections.
* Receiving buffer has sanity limit, and we'd abort if it is exceeded.
* If we see thousands of "infected" files, we probably aren't
* interested in every one of them anyway.
*/
if (virus_countdown != 0) {
virus_countdown--;
full_write(write_fd, virus, strlen(virus) + 1);
full_write(write_fd, &file, sizeof(file));
}
} else if (r != CL_CLEAN) {
warnx("*** cl_scanfile(%s): %s", file->localpath, cl_strerror(r));
}

return result;
return true;
}

bool inspect_virus(struct rpminspect *ri)
Expand All @@ -72,8 +95,8 @@ bool inspect_virus(struct rpminspect *ri)
struct dirent *de = NULL;
char *cvdpath = NULL;
struct cl_cvd *cvd = NULL;
bool result = false;
int r = 0;
struct result_params params;
unsigned int loaded_signatures; /* unused, exists to make cl_load() happy */

/* initialize clamav */
Expand Down Expand Up @@ -189,11 +212,107 @@ bool inspect_virus(struct rpminspect *ri)
params.msg = NULL;
free(params.details);
params.details = NULL;

/* run the virus check on each file */
params.header = NAME_VIRUS;
params.noun = _("virus or malware in ${FILE} on ${ARCH}");
result = foreach_peer_file(ri, NAME_VIRUS, virus_driver);

/* fork $NCPUS children */
fflush(NULL);
col = new_parallel(0); /* 0: will have one child per CPU */

for (child_no = 0; child_no < col->max_pids; child_no++) {
pid_t pid;
int pipefd[2];

if (pipe(pipefd)) {
err(EXIT_FAILURE, "pipe"); /* fatal */
}

int rnd = rand();
pid = fork();

if (pid < 0) {
err(EXIT_FAILURE, "fork"); /* fatal */
}

if (pid == 0) {
/* child */
close(pipefd[0]);
write_fd = pipefd[1];

/* "If you’re using libclamav with a forking daemon you
* should call srand() inside a forked child before making
* any calls to the libclamav functions" - clamav docs
*/
srand(child_no ^ rnd);

/* run the virus check on each Nth file, then exit */
foreach_peer_file(ri, NAME_VIRUS, virus_driver);
_exit(0);
}

/* parent */
/* insert the child into collector */
close(pipefd[1]);
insert_new_pid_and_fd(col, pid, pipefd[0]);
} /* forking N children */

/* Let all children run, collecting their outputs.
* When any one of them finish, process its output.
* Repeat until all of them exit.
*/
bool result = true;
parallel_slot_t *slot;
while ((slot = collect_one(col)) != NULL) {
int r = slot->exit_status;

if (!WIFEXITED(r)) {
errx(EXIT_FAILURE, "cl_scanfile() killed by signal %u", WTERMSIG(r));
}

if (WEXITSTATUS(r) != 0) {
errx(EXIT_FAILURE, "cl_scanfile() exited with %u", WEXITSTATUS(r));
}

char *output = slot->output;

if (output) {
/* consume all pairs of ("virusname",rpmfile_entry_t pointer) in output */
while (output[0]) {
rpmfile_entry_t *file;
const char *virus = output;

output += strlen(virus) + 1;
memcpy(&file, output, sizeof(file)); /* copy unaligned bytes */
output += sizeof(file);

params.severity = get_secrule_result_severity(ri, file, SECRULE_VIRUS);

if (params.severity != RESULT_NULL && params.severity != RESULT_SKIP) {
if (params.severity == RESULT_INFO) {
params.waiverauth = NOT_WAIVABLE;
params.verb = VERB_OK;
} else {
params.waiverauth = WAIVABLE_BY_SECURITY;
params.verb = VERB_FAILED;
result = false;
}

params.arch = get_rpm_header_arch(file->rpm_header);
params.file = file->localpath;
params.remedy = get_remedy(REMEDY_VIRUS);
xasprintf(&params.msg, _("Virus detected in %s in the %s package on %s: %s"), file->localpath, headerGetString(file->rpm_header, RPMTAG_NAME), params.arch, virus);
add_result(ri, &params);
free(params.msg);
}
} /* while (there is unprocessed output from this child) */

free(slot->output);
slot->output = NULL; /* avoid double-free in delete_parallel() */
}
} /* while (waiting for a child to finish) */

delete_parallel(col, /*signal:*/ 0);
col = NULL;

/* hope the result is always this */
if (result) {
Expand Down
10 changes: 10 additions & 0 deletions src/rpminspect.c
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,16 @@ int main(int argc, char **argv)
/* Be friendly to "rpminspect ... 2>&1 | tee" use case */
setlinebuf(stdout);

/* clamav uses rand() to generate random temporary file names,
* call srand() to avoid name collisions.
* (In fact, source of clamav I examined isn't that bad, it calls
* srand itself once: "srand(curtime_microsec + clock() + rand())"
* but just to be safe, make sure _this_ call to rand() ^^^^^^^^
* isn't deterministic).
* Mixing in PID ensures that even two simultaneously
* started rpminspect instances get differing seeds */
srand(time(NULL) ^ getpid());

/* SIGABRT handler since we use abort() in some failure cases */
abrt.sa_handler = sigabrt_handler;
sigemptyset(&abrt.sa_mask);
Expand Down

0 comments on commit c568ba8

Please sign in to comment.