diff --git a/doc/userguide/capture-hardware/dpdk.rst b/doc/userguide/capture-hardware/dpdk.rst index 91ae1c876ca9..1b9ecae763a4 100644 --- a/doc/userguide/capture-hardware/dpdk.rst +++ b/doc/userguide/capture-hardware/dpdk.rst @@ -15,6 +15,57 @@ learn more about the basic setup for DPDK. The following sections contain examples of how to set up DPDK and Suricata for more obscure use-cases. +Hugepage analysis +----------------- + +Suricata can analyse utilized hugepages on the system. This can be particularly +beneficial when there's a potential overallocation of hugepages. +The hugepage analysis is designed to examine the hugepages in use and +provide recommendations on an adequate number of hugepages. This then ensures +Suricata operates optimally while leaving sufficient memory for other +applications on the system. The analysis works by comparing snapshots of the +hugepages before and after Suricata is initialized. After the initialization, +no more hugepages are allocated by Suricata. +The hugepage analysis can be seen in the Perf log level and is printed out +during the Suricata start. It is only printed when Suricata detects some +disrepancies in the system related to hugepage allocation. + +It's recommended to perform this analysis from a "clean" state - +that is a state when all your hugepages are free. It is especially recommended +when no other hugepage-dependent applications are running on your system. +This can be checked in one of two ways: + +.. code-block:: + + # global check + cat /proc/meminfo + + HugePages_Total: 1024 + HugePages_Free: 1024 + + # per-numa check depends on NUMA node ID, hugepage size, + # and nr_hugepages/free_hugepages - e.g.: + cat /sys/devices/system/node/node0/hugepages/hugepages-2048kB/free_hugepages + +After the termination of Suricata and other hugepage-related applications, +if the count of free hugepages is not equal with the total number of hugepages, +it indicates some hugepages were not freed completely. +This can be fixed by removing DPDK-related files from the hugepage-mounted +directory (filesystem). +It's important to exercise caution while removing hugepages, especially when +other hugepage-dependent applications are in operation, as this action will +disrupt their memory functionality. +Removing the DPDK files from the hugepage directory can often be done as: + +.. code-block:: bash + + sudo rm -rf /dev/hugepages/rtemap_* + + # To check where hugepages are mounted: + dpdk-hugepages.py -s + # or + mount | grep huge + Bond interface -------------- diff --git a/src/Makefile.am b/src/Makefile.am index 21e1dfe5fbeb..592e0ed28ccd 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -552,6 +552,7 @@ noinst_HEADERS = \ util-hash-string.h \ util-host-info.h \ util-host-os-info.h \ + util-hugepages.h \ util-hyperscan.h \ util-ioctl.h \ util-ip.h \ @@ -1152,6 +1153,7 @@ libsuricata_c_a_SOURCES = \ util-hash-string.c \ util-host-info.c \ util-host-os-info.c \ + util-hugepages.c \ util-hyperscan.c \ util-ioctl.c \ util-ip.c \ diff --git a/src/suricata.c b/src/suricata.c index ffa970ae7297..1974f8a177f8 100644 --- a/src/suricata.c +++ b/src/suricata.c @@ -127,6 +127,7 @@ #include "util-ebpf.h" #include "util-exception-policy.h" #include "util-host-os-info.h" +#include "util-hugepages.h" #include "util-ioctl.h" #include "util-landlock.h" #include "util-luajit.h" @@ -2968,6 +2969,7 @@ int SuricataMain(int argc, char **argv) goto out; } + SystemHugepageSnapshot *prerun_snap = SystemHugepageSnapshotCreate(); SCSetStartTime(&suricata); RunModeDispatch(suricata.run_mode, suricata.runmode_custom_mode, suricata.capture_plugin_name, suricata.capture_plugin_args); @@ -3026,7 +3028,11 @@ int SuricataMain(int argc, char **argv) PostRunStartedDetectSetup(&suricata); - DPDKEvaluateHugepages(); + SystemHugepageSnapshot *postrun_snap = SystemHugepageSnapshotCreate(); + if (run_mode == RUNMODE_DPDK) // only DPDK uses hpages at the moment + SystemHugepageEvaluateHugepages(prerun_snap, postrun_snap); + SystemHugepageSnapshotDestroy(prerun_snap); + SystemHugepageSnapshotDestroy(postrun_snap); SCPledge(); SuricataMainLoop(&suricata); diff --git a/src/util-dpdk.c b/src/util-dpdk.c index 089aa45674ae..13329a81d13a 100644 --- a/src/util-dpdk.c +++ b/src/util-dpdk.c @@ -66,106 +66,7 @@ void DPDKFreeDevice(LiveDevice *ldev) #endif } -static FILE *HugepagesMeminfoOpen(void) -{ - FILE *fp = fopen("/proc/meminfo", "r"); - if (fp == NULL) { - SCLogInfo("Can't analyze hugepage usage: failed to open /proc/meminfo"); - } - return fp; -} - -static void HugepagesMeminfoClose(FILE *fp) -{ - if (fp) { - fclose(fp); - } -} - -/** - * Parsing values of meminfo - * - * \param fp Opened file pointer for reading of file /proc/meminfo at beginning - * \param keyword Entry to look for e.g. "HugePages_Free:" - * \return n Value of the entry - * \return -1 On error - * - */ -static int32_t MemInfoParseValue(FILE *fp, const char *keyword) -{ - char path[256], value_str[64]; - int32_t value = -1; - - while (fscanf(fp, "%255s", path) != EOF) { - if (strcmp(path, keyword) == 0) { - if (fscanf(fp, "%63s", value_str) == EOF) { - SCLogDebug("%s: not followed by any number", keyword); - break; - } - - if (StringParseInt32(&value, 10, 23, value_str) < 0) { - SCLogDebug("Failed to convert %s from /proc/meminfo", keyword); - value = -1; - } - break; - } - } - return value; -} - -static void MemInfoEvaluateHugepages(FILE *fp) -{ - int32_t free_hugepages = MemInfoParseValue(fp, "HugePages_Free:"); - if (free_hugepages < 0) { - SCLogInfo("HugePages_Free information not found in /proc/meminfo"); - return; - } - - rewind(fp); - - int32_t total_hugepages = MemInfoParseValue(fp, "HugePages_Total:"); - if (total_hugepages < 0) { - SCLogInfo("HugePages_Total information not found in /proc/meminfo"); - return; - } else if (total_hugepages == 0) { - SCLogInfo("HugePages_Total equals to zero"); - return; - } - - float free_hugepages_ratio = (float)free_hugepages / (float)total_hugepages; - if (free_hugepages_ratio > 0.5) { - SCLogInfo("%" PRIu32 " of %" PRIu32 - " of hugepages are free - number of hugepages can be lowered to e.g. %.0lf", - free_hugepages, total_hugepages, ceil((total_hugepages - free_hugepages) * 1.15)); - } -} - -static void MemInfoWith(void (*callback)(FILE *)) -{ - FILE *fp = HugepagesMeminfoOpen(); - if (fp) { - callback(fp); - HugepagesMeminfoClose(fp); - } -} - -void DPDKEvaluateHugepages(void) -{ - if (run_mode != RUNMODE_DPDK) - return; - -#ifdef HAVE_DPDK - if (rte_eal_has_hugepages() == 0) { // hugepages disabled - SCLogPerf("Hugepages not enabled - enabling hugepages can improve performance"); - return; - } -#endif - - MemInfoWith(MemInfoEvaluateHugepages); -} - #ifdef HAVE_DPDK - /** * Retrieves name of the port from port id * Not thread-safe diff --git a/src/util-dpdk.h b/src/util-dpdk.h index a94f46225217..1fb3532f5d4d 100644 --- a/src/util-dpdk.h +++ b/src/util-dpdk.h @@ -121,7 +121,6 @@ void DPDKCleanupEAL(void); void DPDKCloseDevice(LiveDevice *ldev); void DPDKFreeDevice(LiveDevice *ldev); -void DPDKEvaluateHugepages(void); #ifdef HAVE_DPDK const char *DPDKGetPortNameByPortID(uint16_t pid); diff --git a/src/util-hugepages.c b/src/util-hugepages.c new file mode 100644 index 000000000000..94f714d01832 --- /dev/null +++ b/src/util-hugepages.c @@ -0,0 +1,360 @@ +/* Copyright (C) 2023 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Lukas Sismis + */ + +#include "suricata.h" +#include "util-debug.h" +#include "util-hugepages.h" + +static uint16_t SystemHugepageSizesCntPerNodeGet(uint16_t node_index); +static uint16_t SystemNodeCountGet(void); +static void SystemHugepagePerNodeGetHugepageSizes( + uint16_t node_index, uint16_t hp_sizes_cnt, uint32_t *hp_sizes); +static HugepageInfo *SystemHugepageHugepageInfoCreate(uint16_t hp_size_cnt); +static int16_t SystemHugepagePerNodeGetHugepageInfo(uint16_t node_index, NodeInfo *node); +static void SystemHugepageHugepageInfoDestroy(HugepageInfo *h); +static void SystemHugepageNodeInfoDestroy(NodeInfo *n); +static void SystemHugepageNodeInfoDump(NodeInfo *n); +static void SystemHugepageSnapshotDump(SystemHugepageSnapshot *s); + +static bool SystemHugepageSupported(void) +{ +#if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun + return true; +#else + return false; +#endif /* !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun */ +} + +// block of all hugepage-specific internal functions +#if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun + +static uint16_t SystemNodeCountGetLinux(void) +{ + char dir_path[] = "/sys/devices/system/node/"; + DIR *dir = opendir(dir_path); + if (dir == NULL) { + SCLogError("unable to open %s", dir_path); + return 0; + } + + uint16_t count = 0; + struct dirent *entry; + while ((entry = readdir(dir)) != NULL) { + char d_name[] = "node"; + if (entry->d_type == DT_DIR && strncmp(entry->d_name, d_name, strlen(d_name)) == 0) + count++; + } + closedir(dir); + return count; +} + +static uint16_t SystemHugepageSizesCntPerNodeGetLinux(uint16_t node_index) +{ + char dir_path[256]; + snprintf(dir_path, sizeof(dir_path), "/sys/devices/system/node/node%d/hugepages/", node_index); + DIR *dir = opendir(dir_path); + if (dir == NULL) { + SCLogError("unable to open %s", dir_path); + return 0; + } + + uint16_t count = 0; + struct dirent *entry; + while ((entry = readdir(dir)) != NULL) { + char d_name[] = "hugepages-"; + if (entry->d_type == DT_DIR && strncmp(entry->d_name, d_name, strlen(d_name)) == 0) + count++; + } + closedir(dir); + return count; +} + +static void SystemHugepagePerNodeGetHugepageSizesLinux( + uint16_t node_index, uint16_t hp_sizes_cnt, uint32_t *hp_sizes) +{ + char dir_path[256]; + snprintf(dir_path, sizeof(dir_path), "/sys/devices/system/node/node%d/hugepages/", node_index); + DIR *dir = opendir(dir_path); + if (dir == NULL) { + SCLogError("unable to open %s", dir_path); + return; + } + uint16_t index = 0; + struct dirent *entry; + while ((entry = readdir(dir)) != NULL) { + if (entry->d_type == DT_DIR && strncmp(entry->d_name, "hugepages-", 10) == 0) { + sscanf(entry->d_name, "hugepages-%ukB", &(hp_sizes[index])); + index++; + } + } + closedir(dir); +} + +// arrays `hugepages` and `hp_sizes` are expected to have the same size +static int16_t SystemHugepagePerNodeGetHugepageInfoLinux( + HugepageInfo *hugepages, uint32_t *hp_sizes, uint16_t hp_sizes_cnt, uint16_t node_index) +{ + for (int16_t i = 0; i < hp_sizes_cnt; i++) { + hugepages[i].size_kb = hp_sizes[i]; + char path[256]; + snprintf(path, sizeof(path), + "/sys/devices/system/node/node%hd/hugepages/hugepages-%ukB/nr_hugepages", + node_index, hp_sizes[i]); + FILE *f = fopen(path, "r"); + if (!f) { + SCLogError("unable to open %s", path); + fclose(f); + return -1; + } + if (fscanf(f, "%hd", &hugepages[i].allocated) != 1) { + SCLogError("failed to read the total number of allocated hugepages (%ukB) on node %hd", + hp_sizes[i], node_index); + fclose(f); + return -1; + } + fclose(f); + + snprintf(path, sizeof(path), + "/sys/devices/system/node/node%hd/hugepages/hugepages-%ukB/free_hugepages", + node_index, hp_sizes[i]); + f = fopen(path, "r"); + if (!f) { + SCLogError("unable to open %s", path); + fclose(f); + return -1; + } + if (fscanf(f, "%hd", &hugepages[i].free) != 1) { + SCLogError("failed to read the total number of free hugepages (%ukB) on node %hd", + hp_sizes[i], node_index); + fclose(f); + return -1; + } + fclose(f); + } + + return 0; +} + +#endif /* !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun */ + +static int16_t SystemHugepagePerNodeGetHugepageInfo(uint16_t node_index, NodeInfo *node) +{ + uint16_t hp_sizes_cnt = SystemHugepageSizesCntPerNodeGet(node_index); + if (hp_sizes_cnt == 0) { + SCLogError("hugepages not found for node %d", node_index); + return -1; + } + uint32_t *hp_sizes = SCCalloc(hp_sizes_cnt, sizeof(*hp_sizes)); + if (hp_sizes == NULL) { + FatalError("failed to allocate memory for hugepage info"); + } + SystemHugepagePerNodeGetHugepageSizes(node_index, hp_sizes_cnt, hp_sizes); + + node->hugepages = SystemHugepageHugepageInfoCreate(hp_sizes_cnt); + node->num_hugepage_sizes = hp_sizes_cnt; + + int16_t ret = 0; +#if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun + ret = SystemHugepagePerNodeGetHugepageInfoLinux( + node->hugepages, hp_sizes, node->num_hugepage_sizes, node_index); +#endif /* !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun */ + + SCFree(hp_sizes); + return ret; +} + +/** + * \brief The function attempts to detect number of NUMA nodes on the system + * \returns 0 if detection is unsuccessful, otherwise number of detected nodes + */ +static uint16_t SystemNodeCountGet(void) +{ +#if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun + return SystemNodeCountGetLinux(); +#endif /* !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun */ + return 0; +} + +/** + * \brief The function attempts to detect number of unique hugepage sizes + * \returns 0 if detection is unsuccessful, otherwise number of hugepage sizes + */ +static uint16_t SystemHugepageSizesCntPerNodeGet(uint16_t node_index) +{ +#if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun + return SystemHugepageSizesCntPerNodeGetLinux(node_index); +#endif /* !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun */ + return 0; +} + +static void SystemHugepagePerNodeGetHugepageSizes( + uint16_t node_index, uint16_t hp_sizes_cnt, uint32_t *hp_sizes) +{ +#if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun + return SystemHugepagePerNodeGetHugepageSizesLinux(node_index, hp_sizes_cnt, hp_sizes); +#endif /* !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun */ +} + +static HugepageInfo *SystemHugepageHugepageInfoCreate(uint16_t hp_size_cnt) +{ + HugepageInfo *h = SCCalloc(hp_size_cnt, sizeof(*h)); + if (h == NULL) { + FatalError("failed to allocate hugepage info array"); + } + return h; +} + +static void SystemHugepageHugepageInfoDestroy(HugepageInfo *h) +{ + if (h != NULL) + SCFree(h); +} + +static void SystemHugepageNodeInfoDestroy(NodeInfo *n) +{ + if (n == NULL) + return; + + SystemHugepageHugepageInfoDestroy(n->hugepages); +} + +static void SystemHugepageNodeInfoDump(NodeInfo *n) +{ + if (n == NULL) + return; + + for (uint16_t i = 0; i < n->num_hugepage_sizes; i++) { + SCLogDebug("Hugepage size - %dkB - allocated: %d free: %d", n->hugepages[i].size_kb, + n->hugepages[i].allocated, n->hugepages[i].free); + } +} + +static void SystemHugepageSnapshotDump(SystemHugepageSnapshot *s) +{ + if (s == NULL) + return; + + for (uint16_t i = 0; i < s->num_nodes; i++) { + SCLogDebug("NUMA Node %d", i); + SystemHugepageNodeInfoDump(&(s->nodes[i])); + } +} + +void SystemHugepageSnapshotDestroy(SystemHugepageSnapshot *s) +{ + if (s == NULL) + return; + + for (uint16_t i = 0; i < s->num_nodes; i++) { + SystemHugepageNodeInfoDestroy(&(s->nodes[i])); + } + SCFree(s->nodes); + SCFree(s); +} + +SystemHugepageSnapshot *SystemHugepageSnapshotCreate(void) +{ + if (!SystemHugepageSupported()) + return NULL; + + uint16_t node_cnt = SystemNodeCountGet(); + if (node_cnt == 0) { + SCLogError("failed to obtain number of NUMA nodes in the system"); + return NULL; + } + NodeInfo *nodes = SCCalloc(node_cnt, sizeof(*nodes)); + if (nodes == NULL) { + FatalError("failed to allocate memory for NUMA node info"); + return NULL; + } + + SystemHugepageSnapshot *s = SCCalloc(1, sizeof(*s)); + if (s == NULL) { + SCFree(nodes); + FatalError("failed to allocate memory for NUMA node snapshot"); + } + s->num_nodes = node_cnt; + s->nodes = nodes; + + for (uint16_t i = 0; i < s->num_nodes; i++) { + int16_t ret = SystemHugepagePerNodeGetHugepageInfo(i, &s->nodes[i]); + if (ret != 0) { + SystemHugepageSnapshotDestroy(s); + return NULL; + } + } + + return s; +} + +void SystemHugepageEvaluateHugepages(SystemHugepageSnapshot *pre_s, SystemHugepageSnapshot *post_s) +{ + if (!SystemHugepageSupported() || pre_s == NULL || post_s == NULL) + return; + + SCLogDebug("Hugepages before initialization"); + SystemHugepageSnapshotDump(pre_s); + + SCLogDebug("Hugepages after initialization"); + SystemHugepageSnapshotDump(post_s); + + if (pre_s->num_nodes != post_s->num_nodes) + FatalError("Number of NUMA nodes changed during hugepage evaluation"); + + for (int32_t i = 0; i < post_s->num_nodes; i++) { + if (pre_s->nodes[i].num_hugepage_sizes != post_s->nodes[i].num_hugepage_sizes) + FatalError("Number of NUMA node hugepage sizes changed during hugepage evaluation"); + + for (int32_t j = 0; j < post_s->nodes->num_hugepage_sizes; j++) { + HugepageInfo *prerun_hp = &pre_s->nodes[i].hugepages[j]; + HugepageInfo *postrun_hp = &post_s->nodes[i].hugepages[j]; + + if (prerun_hp->free == 0) { + continue; // this HP size on this node has no HPs allocated + } else if (prerun_hp->free < postrun_hp->free) { + SCLogWarning( + "Hugepage usage decreased while it should only increase/stay the same"); + } else if (prerun_hp->free > 0 && prerun_hp->free == postrun_hp->free) { + SCLogPerf("Hugepages on NUMA node %u are unused and can be deallocated", i); + } else { // assumes this is an active NUMA node because at least some hugepages were + // used + // speculative hint only for 2048kB pages as e.g. 1 GB pages can leave a lot of room + // for additional allocations + if (postrun_hp->size_kb == 2048 && postrun_hp->free == 0) { + SCLogPerf("all %ukB hugepages used on NUMA node %d - consider increasing to " + "prevent memory allocation from other NUMA nodes", + postrun_hp->size_kb, i); + } + + float free_hugepages_ratio = (float)postrun_hp->free / (float)prerun_hp->free; + if (free_hugepages_ratio > 0.5) { + int32_t used_hps = prerun_hp->free - postrun_hp->free; + SCLogPerf("Hugepages on NUMA node %u can be set to %.0lf (only using %u/%u " + "%ukB hugepages)", + i, ceil((prerun_hp->free - postrun_hp->free) * 1.15), used_hps, + prerun_hp->free, postrun_hp->size_kb); + } + } + } + } +} diff --git a/src/util-hugepages.h b/src/util-hugepages.h new file mode 100644 index 000000000000..0480d2f09e86 --- /dev/null +++ b/src/util-hugepages.h @@ -0,0 +1,51 @@ +/* Copyright (C) 2023 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Lukas Sismis + */ + +#ifndef UTIL_HUGEPAGES_H +#define UTIL_HUGEPAGES_H + +typedef struct { + uint32_t size_kb; + uint16_t allocated; + uint16_t free; +} HugepageInfo; + +// Structure to hold information about individual NUMA nodes in the system and +// and their respective allocated hugepages +// So for e.g. NUMA node 0 there can be 2 hugepage_size - 2 MB and 1 GB +// Each hugepage size will then have a record of number of allocated/free hpages +typedef struct { + uint16_t num_hugepage_sizes; + HugepageInfo *hugepages; +} NodeInfo; + +typedef struct { + uint16_t num_nodes; + NodeInfo *nodes; +} SystemHugepageSnapshot; + +SystemHugepageSnapshot *SystemHugepageSnapshotCreate(void); +void SystemHugepageSnapshotDestroy(SystemHugepageSnapshot *s); +void SystemHugepageEvaluateHugepages(SystemHugepageSnapshot *pre_s, SystemHugepageSnapshot *post_s); + +#endif /* UTIL_HUGEPAGES_H */