diff --git a/kernel/bpf/token.c b/kernel/bpf/token.c index 17212efcde60b5..a86fccd57e2d0c 100644 --- a/kernel/bpf/token.c +++ b/kernel/bpf/token.c @@ -152,6 +152,15 @@ int bpf_token_create(union bpf_attr *attr) goto out_path; } + mnt_opts = path.dentry->d_sb->s_fs_info; + if (mnt_opts->delegate_cmds == 0 && + mnt_opts->delegate_maps == 0 && + mnt_opts->delegate_progs == 0 && + mnt_opts->delegate_attachs == 0) { + err = -ENOENT; /* no BPF token delegation is set up */ + goto out_path; + } + mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); inode = bpf_get_inode(path.mnt->mnt_sb, NULL, mode); if (IS_ERR(inode)) { @@ -181,7 +190,6 @@ int bpf_token_create(union bpf_attr *attr) /* remember bpffs owning userns for future ns_capable() checks */ token->userns = get_user_ns(userns); - mnt_opts = path.dentry->d_sb->s_fs_info; token->allowed_cmds = mnt_opts->delegate_cmds; token->allowed_maps = mnt_opts->delegate_maps; token->allowed_progs = mnt_opts->delegate_progs; diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 2d0c282c8588b5..b6619199a7066a 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1,4 +1,4 @@ libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \ btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \ - usdt.o zip.o elf.o + usdt.o zip.o elf.o features.o diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index f4e1da3c6d5f9f..0ad8e532b3cfc4 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -103,7 +103,7 @@ int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts) * [0] https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com/ * [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper") */ -int probe_memcg_account(void) +int probe_memcg_account(int token_fd) { const size_t attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd); struct bpf_insn insns[] = { @@ -120,6 +120,7 @@ int probe_memcg_account(void) attr.insns = ptr_to_u64(insns); attr.insn_cnt = insn_cnt; attr.license = ptr_to_u64("GPL"); + attr.prog_token_fd = token_fd; prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, attr_sz); if (prog_fd >= 0) { @@ -146,7 +147,7 @@ int bump_rlimit_memlock(void) struct rlimit rlim; /* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */ - if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT)) + if (memlock_bumped || feat_supported(NULL, FEAT_MEMCG_ACCOUNT)) return 0; memlock_bumped = true; @@ -181,7 +182,7 @@ int bpf_map_create(enum bpf_map_type map_type, return libbpf_err(-EINVAL); attr.map_type = map_type; - if (map_name && kernel_supports(NULL, FEAT_PROG_NAME)) + if (map_name && feat_supported(NULL, FEAT_PROG_NAME)) libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); attr.key_size = key_size; attr.value_size = value_size; @@ -265,7 +266,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, attr.kern_version = OPTS_GET(opts, kern_version, 0); attr.prog_token_fd = OPTS_GET(opts, token_fd, 0); - if (prog_name && kernel_supports(NULL, FEAT_PROG_NAME)) + if (prog_name && feat_supported(NULL, FEAT_PROG_NAME)) libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); attr.license = ptr_to_u64(license); diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index ee95fd379d4d8a..63033c3343205f 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1317,7 +1317,9 @@ struct btf *btf__parse_split(const char *path, struct btf *base_btf) static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian); -int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level) +int btf_load_into_kernel(struct btf *btf, + char *log_buf, size_t log_sz, __u32 log_level, + int token_fd) { LIBBPF_OPTS(bpf_btf_load_opts, opts); __u32 buf_sz = 0, raw_size; @@ -1367,6 +1369,7 @@ int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 lo opts.log_level = log_level; } + opts.token_fd = token_fd; btf->fd = bpf_btf_load(raw_data, raw_size, &opts); if (btf->fd < 0) { /* time to turn on verbose mode and try again */ @@ -1394,7 +1397,7 @@ int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 lo int btf__load_into_kernel(struct btf *btf) { - return btf_load_into_kernel(btf, NULL, 0, 0); + return btf_load_into_kernel(btf, NULL, 0, 0, 0); } int btf__fd(const struct btf *btf) diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c index b02faec748a51b..c92e02394159e1 100644 --- a/tools/lib/bpf/elf.c +++ b/tools/lib/bpf/elf.c @@ -11,8 +11,6 @@ #include "libbpf_internal.h" #include "str_error.h" -#define STRERR_BUFSIZE 128 - /* A SHT_GNU_versym section holds 16-bit words. This bit is set if * the symbol is hidden and can only be seen when referenced using an * explicit version number. This is a GNU extension. diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c new file mode 100644 index 00000000000000..ce98a334be21ee --- /dev/null +++ b/tools/lib/bpf/features.c @@ -0,0 +1,478 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ +#include +#include +#include "bpf.h" +#include "libbpf.h" +#include "libbpf_common.h" +#include "libbpf_internal.h" +#include "str_error.h" + +static inline __u64 ptr_to_u64(const void *ptr) +{ + return (__u64)(unsigned long)ptr; +} + +static int probe_fd(int fd) +{ + if (fd >= 0) + close(fd); + return fd >= 0; +} + +static int probe_kern_prog_name(int token_fd) +{ + const size_t attr_sz = offsetofend(union bpf_attr, prog_name); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + union bpf_attr attr; + int ret; + + memset(&attr, 0, attr_sz); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.license = ptr_to_u64("GPL"); + attr.insns = ptr_to_u64(insns); + attr.insn_cnt = (__u32)ARRAY_SIZE(insns); + attr.prog_token_fd = token_fd; + libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name)); + + /* make sure loading with name works */ + ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS); + return probe_fd(ret); +} + +static int probe_kern_global_data(int token_fd) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + LIBBPF_OPTS(bpf_map_create_opts, map_opts, .token_fd = token_fd); + LIBBPF_OPTS(bpf_prog_load_opts, prog_opts, .token_fd = token_fd); + int ret, map, insn_cnt = ARRAY_SIZE(insns); + + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, &map_opts); + if (map < 0) { + ret = -errno; + cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); + pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", + __func__, cp, -ret); + return ret; + } + + insns[0].imm = map; + + ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts); + close(map); + return probe_fd(ret); +} + +static int probe_kern_btf(int token_fd) +{ + static const char strs[] = "\0int"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_btf_func(int token_fd) +{ + static const char strs[] = "\0int\0x\0a"; + /* void x(int a) {} */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* FUNC_PROTO */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), + BTF_PARAM_ENC(7, 1), + /* FUNC x */ /* [3] */ + BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_btf_func_global(int token_fd) +{ + static const char strs[] = "\0int\0x\0a"; + /* static void x(int a) {} */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* FUNC_PROTO */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), + BTF_PARAM_ENC(7, 1), + /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */ + BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_btf_datasec(int token_fd) +{ + static const char strs[] = "\0x\0.data"; + /* static int a; */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* DATASEC val */ /* [3] */ + BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_btf_float(int token_fd) +{ + static const char strs[] = "\0float"; + __u32 types[] = { + /* float */ + BTF_TYPE_FLOAT_ENC(1, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_btf_decl_tag(int token_fd) +{ + static const char strs[] = "\0tag"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* attr */ + BTF_TYPE_DECL_TAG_ENC(1, 2, -1), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_btf_type_tag(int token_fd) +{ + static const char strs[] = "\0tag"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* attr */ + BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */ + /* ptr */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */ + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_array_mmap(int token_fd) +{ + LIBBPF_OPTS(bpf_map_create_opts, opts, + .map_flags = BPF_F_MMAPABLE, + .token_fd = token_fd, + ); + int fd; + + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts); + return probe_fd(fd); +} + +static int probe_kern_exp_attach_type(int token_fd) +{ + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .token_fd = token_fd, + ); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int fd, insn_cnt = ARRAY_SIZE(insns); + + /* use any valid combination of program type and (optional) + * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS) + * to see if kernel supports expected_attach_type field for + * BPF_PROG_LOAD command + */ + fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts); + return probe_fd(fd); +} + +static int probe_kern_probe_read_kernel(int token_fd) +{ + LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = token_fd); + struct bpf_insn insns[] = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */ + BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */ + BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel), + BPF_EXIT_INSN(), + }; + int fd, insn_cnt = ARRAY_SIZE(insns); + + fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); + return probe_fd(fd); +} + +static int probe_prog_bind_map(int token_fd) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + LIBBPF_OPTS(bpf_map_create_opts, map_opts, .token_fd = token_fd); + LIBBPF_OPTS(bpf_prog_load_opts, prog_opts, .token_fd = token_fd); + int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); + + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, &map_opts); + if (map < 0) { + ret = -errno; + cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); + pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", + __func__, cp, -ret); + return ret; + } + + prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts); + if (prog < 0) { + close(map); + return 0; + } + + ret = bpf_prog_bind_map(prog, map, NULL); + + close(map); + close(prog); + + return ret >= 0; +} + +static int probe_module_btf(int token_fd) +{ + static const char strs[] = "\0int"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), + }; + struct bpf_btf_info info; + __u32 len = sizeof(info); + char name[16]; + int fd, err; + + fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), token_fd); + if (fd < 0) + return 0; /* BTF not supported at all */ + + memset(&info, 0, sizeof(info)); + info.name = ptr_to_u64(name); + info.name_len = sizeof(name); + + /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer; + * kernel's module BTF support coincides with support for + * name/name_len fields in struct bpf_btf_info. + */ + err = bpf_btf_get_info_by_fd(fd, &info, &len); + close(fd); + return !err; +} + +static int probe_perf_link(int token_fd) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = token_fd); + int prog_fd, link_fd, err; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", + insns, ARRAY_SIZE(insns), &opts); + if (prog_fd < 0) + return -errno; + + /* use invalid perf_event FD to get EBADF, if link is supported; + * otherwise EINVAL should be returned + */ + link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL); + err = -errno; /* close() can clobber errno */ + + if (link_fd >= 0) + close(link_fd); + close(prog_fd); + + return link_fd < 0 && err == -EBADF; +} + +static int probe_uprobe_multi_link(int token_fd) +{ + LIBBPF_OPTS(bpf_prog_load_opts, load_opts, + .expected_attach_type = BPF_TRACE_UPROBE_MULTI, + .token_fd = token_fd, + ); + LIBBPF_OPTS(bpf_link_create_opts, link_opts); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int prog_fd, link_fd, err; + unsigned long offset = 0; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", + insns, ARRAY_SIZE(insns), &load_opts); + if (prog_fd < 0) + return -errno; + + /* Creating uprobe in '/' binary should fail with -EBADF. */ + link_opts.uprobe_multi.path = "/"; + link_opts.uprobe_multi.offsets = &offset; + link_opts.uprobe_multi.cnt = 1; + + link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts); + err = -errno; /* close() can clobber errno */ + + if (link_fd >= 0) + close(link_fd); + close(prog_fd); + + return link_fd < 0 && err == -EBADF; +} + +static int probe_kern_bpf_cookie(int token_fd) +{ + struct bpf_insn insns[] = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie), + BPF_EXIT_INSN(), + }; + LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = token_fd); + int ret, insn_cnt = ARRAY_SIZE(insns); + + ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); + return probe_fd(ret); +} + +static int probe_kern_btf_enum64(int token_fd) +{ + static const char strs[] = "\0enum64"; + __u32 types[] = { + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +typedef int (*feature_probe_fn)(int /* token_fd */); + +static struct kern_feature_cache feature_cache; + +static struct kern_feature_desc { + const char *desc; + feature_probe_fn probe; +} feature_probes[__FEAT_CNT] = { + [FEAT_PROG_NAME] = { + "BPF program name", probe_kern_prog_name, + }, + [FEAT_GLOBAL_DATA] = { + "global variables", probe_kern_global_data, + }, + [FEAT_BTF] = { + "minimal BTF", probe_kern_btf, + }, + [FEAT_BTF_FUNC] = { + "BTF functions", probe_kern_btf_func, + }, + [FEAT_BTF_GLOBAL_FUNC] = { + "BTF global function", probe_kern_btf_func_global, + }, + [FEAT_BTF_DATASEC] = { + "BTF data section and variable", probe_kern_btf_datasec, + }, + [FEAT_ARRAY_MMAP] = { + "ARRAY map mmap()", probe_kern_array_mmap, + }, + [FEAT_EXP_ATTACH_TYPE] = { + "BPF_PROG_LOAD expected_attach_type attribute", + probe_kern_exp_attach_type, + }, + [FEAT_PROBE_READ_KERN] = { + "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel, + }, + [FEAT_PROG_BIND_MAP] = { + "BPF_PROG_BIND_MAP support", probe_prog_bind_map, + }, + [FEAT_MODULE_BTF] = { + "module BTF support", probe_module_btf, + }, + [FEAT_BTF_FLOAT] = { + "BTF_KIND_FLOAT support", probe_kern_btf_float, + }, + [FEAT_PERF_LINK] = { + "BPF perf link support", probe_perf_link, + }, + [FEAT_BTF_DECL_TAG] = { + "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag, + }, + [FEAT_BTF_TYPE_TAG] = { + "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag, + }, + [FEAT_MEMCG_ACCOUNT] = { + "memcg-based memory accounting", probe_memcg_account, + }, + [FEAT_BPF_COOKIE] = { + "BPF cookie support", probe_kern_bpf_cookie, + }, + [FEAT_BTF_ENUM64] = { + "BTF_KIND_ENUM64 support", probe_kern_btf_enum64, + }, + [FEAT_SYSCALL_WRAPPER] = { + "Kernel using syscall wrapper", probe_kern_syscall_wrapper, + }, + [FEAT_UPROBE_MULTI_LINK] = { + "BPF multi-uprobe link support", probe_uprobe_multi_link, + }, +}; + +bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id) +{ + struct kern_feature_desc *feat = &feature_probes[feat_id]; + int ret; + + /* assume global feature cache, unless custom one is provided */ + if (!cache) + cache = &feature_cache; + + if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) { + ret = feat->probe(cache->token_fd); + if (ret > 0) { + WRITE_ONCE(cache->res[feat_id], FEAT_SUPPORTED); + } else if (ret == 0) { + WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); + } else { + pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret); + WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); + } + } + + return READ_ONCE(cache->res[feat_id]) == FEAT_SUPPORTED; +} diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ac54ebc0629f18..4b5ff9508e18fd 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -59,6 +59,8 @@ #define BPF_FS_MAGIC 0xcafe4a11 #endif +#define BPF_FS_DEFAULT_PATH "/sys/fs/bpf" + #define BPF_INSN_SZ (sizeof(struct bpf_insn)) /* vsprintf() in __base_pr() uses nonliteral format string. It may break @@ -693,6 +695,10 @@ struct bpf_object { struct usdt_manager *usdt_man; + struct kern_feature_cache *feat_cache; + char *token_path; + int token_fd; + char path[]; }; @@ -2192,7 +2198,7 @@ static int build_map_pin_path(struct bpf_map *map, const char *path) int err; if (!path) - path = "/sys/fs/bpf"; + path = BPF_FS_DEFAULT_PATH; err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); if (err) @@ -3279,7 +3285,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) } else { /* currently BPF_BTF_LOAD only supports log_level 1 */ err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size, - obj->log_level ? 1 : 0); + obj->log_level ? 1 : 0, obj->token_fd); } if (sanitize) { if (!err) { @@ -4602,6 +4608,63 @@ int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) return 0; } +static int bpf_object_prepare_token(struct bpf_object *obj) +{ + const char *bpffs_path; + int bpffs_fd = -1, token_fd, err; + bool mandatory; + enum libbpf_print_level level; + + /* token is already set up */ + if (obj->token_fd > 0) + return 0; + /* token is explicitly prevented */ + if (obj->token_fd < 0) { + pr_debug("object '%s': token is prevented, skipping...\n", obj->name); + /* reset to zero to avoid extra checks during map_create and prog_load steps */ + obj->token_fd = 0; + return 0; + } + + mandatory = obj->token_path != NULL; + level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG; + + bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH; + bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR); + if (bpffs_fd < 0) { + err = -errno; + __pr(level, "object '%s': failed (%d) to open BPF FS mount at '%s'%s\n", + obj->name, err, bpffs_path, + mandatory ? "" : ", skipping optional step..."); + return mandatory ? err : 0; + } + + token_fd = bpf_token_create(bpffs_fd, 0); + close(bpffs_fd); + if (token_fd < 0) { + if (!mandatory && token_fd == -ENOENT) { + pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n", + obj->name, bpffs_path); + return 0; + } + __pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n", + obj->name, token_fd, bpffs_path, + mandatory ? "" : ", skipping optional step..."); + return mandatory ? token_fd : 0; + } + + obj->feat_cache = calloc(1, sizeof(*obj->feat_cache)); + if (!obj->feat_cache) { + close(token_fd); + return -ENOMEM; + } + + obj->token_fd = token_fd; + obj->feat_cache->token_fd = token_fd; + + return 0; +} + static int bpf_object__probe_loading(struct bpf_object *obj) { @@ -4611,6 +4674,7 @@ bpf_object__probe_loading(struct bpf_object *obj) BPF_EXIT_INSN(), }; int ret, insn_cnt = ARRAY_SIZE(insns); + LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = obj->token_fd); if (obj->gen_loader) return 0; @@ -4620,9 +4684,9 @@ bpf_object__probe_loading(struct bpf_object *obj) pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret); /* make sure basic loading works */ - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts); if (ret < 0) - ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); + ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); if (ret < 0) { ret = errno; cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); @@ -4637,462 +4701,18 @@ bpf_object__probe_loading(struct bpf_object *obj) return 0; } -static int probe_fd(int fd) -{ - if (fd >= 0) - close(fd); - return fd >= 0; -} - -static int probe_kern_prog_name(void) -{ - const size_t attr_sz = offsetofend(union bpf_attr, prog_name); - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - union bpf_attr attr; - int ret; - - memset(&attr, 0, attr_sz); - attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; - attr.license = ptr_to_u64("GPL"); - attr.insns = ptr_to_u64(insns); - attr.insn_cnt = (__u32)ARRAY_SIZE(insns); - libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name)); - - /* make sure loading with name works */ - ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS); - return probe_fd(ret); -} - -static int probe_kern_global_data(void) -{ - char *cp, errmsg[STRERR_BUFSIZE]; - struct bpf_insn insns[] = { - BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), - BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int ret, map, insn_cnt = ARRAY_SIZE(insns); - - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL); - if (map < 0) { - ret = -errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, -ret); - return ret; - } - - insns[0].imm = map; - - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); - close(map); - return probe_fd(ret); -} - -static int probe_kern_btf(void) -{ - static const char strs[] = "\0int"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_func(void) -{ - static const char strs[] = "\0int\0x\0a"; - /* void x(int a) {} */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* FUNC_PROTO */ /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), - BTF_PARAM_ENC(7, 1), - /* FUNC x */ /* [3] */ - BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_func_global(void) -{ - static const char strs[] = "\0int\0x\0a"; - /* static void x(int a) {} */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* FUNC_PROTO */ /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), - BTF_PARAM_ENC(7, 1), - /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */ - BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_datasec(void) -{ - static const char strs[] = "\0x\0.data"; - /* static int a; */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* VAR x */ /* [2] */ - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), - BTF_VAR_STATIC, - /* DATASEC val */ /* [3] */ - BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), - BTF_VAR_SECINFO_ENC(2, 0, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_float(void) -{ - static const char strs[] = "\0float"; - __u32 types[] = { - /* float */ - BTF_TYPE_FLOAT_ENC(1, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_decl_tag(void) -{ - static const char strs[] = "\0tag"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* VAR x */ /* [2] */ - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), - BTF_VAR_STATIC, - /* attr */ - BTF_TYPE_DECL_TAG_ENC(1, 2, -1), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_type_tag(void) -{ - static const char strs[] = "\0tag"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* attr */ - BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */ - /* ptr */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */ - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_array_mmap(void) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE); - int fd; - - fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts); - return probe_fd(fd); -} - -static int probe_kern_exp_attach_type(void) -{ - LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE); - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int fd, insn_cnt = ARRAY_SIZE(insns); - - /* use any valid combination of program type and (optional) - * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS) - * to see if kernel supports expected_attach_type field for - * BPF_PROG_LOAD command - */ - fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts); - return probe_fd(fd); -} - -static int probe_kern_probe_read_kernel(void) -{ - struct bpf_insn insns[] = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */ - BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */ - BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */ - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }; - int fd, insn_cnt = ARRAY_SIZE(insns); - - fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); - return probe_fd(fd); -} - -static int probe_prog_bind_map(void) -{ - char *cp, errmsg[STRERR_BUFSIZE]; - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); - - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL); - if (map < 0) { - ret = -errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, -ret); - return ret; - } - - prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); - if (prog < 0) { - close(map); - return 0; - } - - ret = bpf_prog_bind_map(prog, map, NULL); - - close(map); - close(prog); - - return ret >= 0; -} - -static int probe_module_btf(void) -{ - static const char strs[] = "\0int"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), - }; - struct bpf_btf_info info; - __u32 len = sizeof(info); - char name[16]; - int fd, err; - - fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs)); - if (fd < 0) - return 0; /* BTF not supported at all */ - - memset(&info, 0, sizeof(info)); - info.name = ptr_to_u64(name); - info.name_len = sizeof(name); - - /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer; - * kernel's module BTF support coincides with support for - * name/name_len fields in struct bpf_btf_info. - */ - err = bpf_btf_get_info_by_fd(fd, &info, &len); - close(fd); - return !err; -} - -static int probe_perf_link(void) -{ - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int prog_fd, link_fd, err; - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", - insns, ARRAY_SIZE(insns), NULL); - if (prog_fd < 0) - return -errno; - - /* use invalid perf_event FD to get EBADF, if link is supported; - * otherwise EINVAL should be returned - */ - link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL); - err = -errno; /* close() can clobber errno */ - - if (link_fd >= 0) - close(link_fd); - close(prog_fd); - - return link_fd < 0 && err == -EBADF; -} - -static int probe_uprobe_multi_link(void) -{ - LIBBPF_OPTS(bpf_prog_load_opts, load_opts, - .expected_attach_type = BPF_TRACE_UPROBE_MULTI, - ); - LIBBPF_OPTS(bpf_link_create_opts, link_opts); - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int prog_fd, link_fd, err; - unsigned long offset = 0; - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", - insns, ARRAY_SIZE(insns), &load_opts); - if (prog_fd < 0) - return -errno; - - /* Creating uprobe in '/' binary should fail with -EBADF. */ - link_opts.uprobe_multi.path = "/"; - link_opts.uprobe_multi.offsets = &offset; - link_opts.uprobe_multi.cnt = 1; - - link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts); - err = -errno; /* close() can clobber errno */ - - if (link_fd >= 0) - close(link_fd); - close(prog_fd); - - return link_fd < 0 && err == -EBADF; -} - -static int probe_kern_bpf_cookie(void) -{ - struct bpf_insn insns[] = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie), - BPF_EXIT_INSN(), - }; - int ret, insn_cnt = ARRAY_SIZE(insns); - - ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); - return probe_fd(ret); -} - -static int probe_kern_btf_enum64(void) -{ - static const char strs[] = "\0enum64"; - __u32 types[] = { - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_syscall_wrapper(void); - -enum kern_feature_result { - FEAT_UNKNOWN = 0, - FEAT_SUPPORTED = 1, - FEAT_MISSING = 2, -}; - -typedef int (*feature_probe_fn)(void); - -static struct kern_feature_desc { - const char *desc; - feature_probe_fn probe; - enum kern_feature_result res; -} feature_probes[__FEAT_CNT] = { - [FEAT_PROG_NAME] = { - "BPF program name", probe_kern_prog_name, - }, - [FEAT_GLOBAL_DATA] = { - "global variables", probe_kern_global_data, - }, - [FEAT_BTF] = { - "minimal BTF", probe_kern_btf, - }, - [FEAT_BTF_FUNC] = { - "BTF functions", probe_kern_btf_func, - }, - [FEAT_BTF_GLOBAL_FUNC] = { - "BTF global function", probe_kern_btf_func_global, - }, - [FEAT_BTF_DATASEC] = { - "BTF data section and variable", probe_kern_btf_datasec, - }, - [FEAT_ARRAY_MMAP] = { - "ARRAY map mmap()", probe_kern_array_mmap, - }, - [FEAT_EXP_ATTACH_TYPE] = { - "BPF_PROG_LOAD expected_attach_type attribute", - probe_kern_exp_attach_type, - }, - [FEAT_PROBE_READ_KERN] = { - "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel, - }, - [FEAT_PROG_BIND_MAP] = { - "BPF_PROG_BIND_MAP support", probe_prog_bind_map, - }, - [FEAT_MODULE_BTF] = { - "module BTF support", probe_module_btf, - }, - [FEAT_BTF_FLOAT] = { - "BTF_KIND_FLOAT support", probe_kern_btf_float, - }, - [FEAT_PERF_LINK] = { - "BPF perf link support", probe_perf_link, - }, - [FEAT_BTF_DECL_TAG] = { - "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag, - }, - [FEAT_BTF_TYPE_TAG] = { - "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag, - }, - [FEAT_MEMCG_ACCOUNT] = { - "memcg-based memory accounting", probe_memcg_account, - }, - [FEAT_BPF_COOKIE] = { - "BPF cookie support", probe_kern_bpf_cookie, - }, - [FEAT_BTF_ENUM64] = { - "BTF_KIND_ENUM64 support", probe_kern_btf_enum64, - }, - [FEAT_SYSCALL_WRAPPER] = { - "Kernel using syscall wrapper", probe_kern_syscall_wrapper, - }, - [FEAT_UPROBE_MULTI_LINK] = { - "BPF multi-uprobe link support", probe_uprobe_multi_link, - }, -}; - bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) { - struct kern_feature_desc *feat = &feature_probes[feat_id]; - int ret; - if (obj && obj->gen_loader) /* To generate loader program assume the latest kernel * to avoid doing extra prog_load, map_create syscalls. */ return true; - if (READ_ONCE(feat->res) == FEAT_UNKNOWN) { - ret = feat->probe(); - if (ret > 0) { - WRITE_ONCE(feat->res, FEAT_SUPPORTED); - } else if (ret == 0) { - WRITE_ONCE(feat->res, FEAT_MISSING); - } else { - pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret); - WRITE_ONCE(feat->res, FEAT_MISSING); - } - } + if (obj->token_fd) + return feat_supported(obj->feat_cache, feat_id); - return READ_ONCE(feat->res) == FEAT_SUPPORTED; + return feat_supported(NULL, feat_id); } static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) @@ -5211,6 +4831,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.map_flags = def->map_flags; create_attr.numa_node = map->numa_node; create_attr.map_extra = map->map_extra; + create_attr.token_fd = obj->token_fd; if (bpf_map__is_struct_ops(map)) create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; @@ -7046,6 +6667,7 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog load_attr.attach_btf_id = prog->attach_btf_id; load_attr.kern_version = kern_version; load_attr.prog_ifindex = prog->prog_ifindex; + load_attr.token_fd = obj->token_fd; /* specify func_info/line_info only if kernel supports them */ btf_fd = bpf_object__btf_fd(obj); @@ -7507,10 +7129,10 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz, const struct bpf_object_open_opts *opts) { - const char *obj_name, *kconfig, *btf_tmp_path; + const char *obj_name, *kconfig, *btf_tmp_path, *token_path; struct bpf_object *obj; char tmp_name[64]; - int err; + int err, token_fd; char *log_buf; size_t log_size; __u32 log_level; @@ -7544,6 +7166,28 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, if (log_size && !log_buf) return ERR_PTR(-EINVAL); + token_path = OPTS_GET(opts, bpf_token_path, NULL); + token_fd = OPTS_GET(opts, bpf_token_fd, -1); + /* non-empty token path can't be combined with invalid token FD */ + if (token_path && token_path[0] != '\0' && token_fd < 0) + return ERR_PTR(-EINVAL); + /* empty token path can't be combined with valid token FD */ + if (token_path && token_path[0] == '\0' && token_fd > 0) + return ERR_PTR(-EINVAL); + /* if user didn't specify bpf_token_path/bpf_token_fd explicitly, + * check if LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as + * bpf_token_path option + */ + if (token_fd == 0 && !token_path) + token_path = getenv("LIBBPF_BPF_TOKEN_PATH"); + /* empty token_path is equivalent to invalid token_fd */ + if (token_path && token_path[0] == '\0') { + token_path = NULL; + token_fd = -1; + } + if (token_path && strlen(token_path) >= PATH_MAX) + return ERR_PTR(-ENAMETOOLONG); + obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name); if (IS_ERR(obj)) return obj; @@ -7552,6 +7196,19 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, obj->log_size = log_size; obj->log_level = log_level; + obj->token_fd = token_fd <= 0 ? token_fd : dup_good_fd(token_fd); + if (token_fd > 0 && obj->token_fd < 0) { + err = -errno; + goto out; + } + if (token_path) { + obj->token_path = strdup(token_path); + if (!obj->token_path) { + err = -ENOMEM; + goto out; + } + } + btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL); if (btf_tmp_path) { if (strlen(btf_tmp_path) >= PATH_MAX) { @@ -8062,7 +7719,8 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch if (obj->gen_loader) bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps); - err = bpf_object__probe_loading(obj); + err = bpf_object_prepare_token(obj); + err = err ? : bpf_object__probe_loading(obj); err = err ? : bpf_object__load_vmlinux_btf(obj, false); err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); err = err ? : bpf_object__sanitize_and_load_btf(obj); @@ -8599,6 +8257,11 @@ void bpf_object__close(struct bpf_object *obj) } zfree(&obj->programs); + zfree(&obj->feat_cache); + zfree(&obj->token_path); + if (obj->token_fd > 0) + close(obj->token_fd); + free(obj); } @@ -10612,7 +10275,7 @@ static const char *arch_specific_syscall_pfx(void) #endif } -static int probe_kern_syscall_wrapper(void) +int probe_kern_syscall_wrapper(int token_fd) { char syscall_name[64]; const char *ksys_pfx; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 6cd9c501624f50..916904bd2a7ad1 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -177,10 +177,45 @@ struct bpf_object_open_opts { * logs through its print callback. */ __u32 kernel_log_level; + /* FD of a BPF token instantiated by user through bpf_token_create() + * API. BPF object will keep dup()'ed FD internally, so passed token + * FD can be closed after BPF object/skeleton open step. + * + * Setting bpf_token_fd to negative value disables libbpf's automatic + * attempt to create BPF token from default BPF FS mount point + * (/sys/fs/bpf), in case this default behavior is undesirable. + * + * If bpf_token_path and bpf_token_fd are not specified, libbpf will + * consult LIBBPF_BPF_TOKEN_PATH environment variable. If set, it will + * be taken as a value of bpf_token_path option and will force libbpf + * to either create BPF token from provided custom BPF FS path, or + * will disable implicit BPF token creation, if envvar value is an + * empty string. + * + * bpf_token_path and bpf_token_fd are mutually exclusive and only one + * of those options should be set. Either of them overrides + * LIBBPF_BPF_TOKEN_PATH envvar. + */ + int bpf_token_fd; + /* Path to BPF FS mount point to derive BPF token from. + * + * Created BPF token will be used for all bpf() syscall operations + * that accept BPF token (e.g., map creation, BTF and program loads, + * etc) automatically within instantiated BPF object. + * + * Setting bpf_token_path option to empty string disables libbpf's + * automatic attempt to create BPF token from default BPF FS mount + * point (/sys/fs/bpf), in case this default behavior is undesirable. + * + * bpf_token_path and bpf_token_fd are mutually exclusive and only one + * of those options should be set. Either of them overrides + * LIBBPF_BPF_TOKEN_PATH envvar. + */ + const char *bpf_token_path; size_t :0; }; -#define bpf_object_open_opts__last_field kernel_log_level +#define bpf_object_open_opts__last_field bpf_token_path /** * @brief **bpf_object__open()** creates a bpf_object by opening diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index b5d334754e5dc8..4cda32298c49c4 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -360,15 +360,32 @@ enum kern_feature_id { __FEAT_CNT, }; -int probe_memcg_account(void); +enum kern_feature_result { + FEAT_UNKNOWN = 0, + FEAT_SUPPORTED = 1, + FEAT_MISSING = 2, +}; + +struct kern_feature_cache { + enum kern_feature_result res[__FEAT_CNT]; + int token_fd; +}; + +bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id); bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); + +int probe_kern_syscall_wrapper(int token_fd); +int probe_memcg_account(int token_fd); int bump_rlimit_memlock(void); int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz); int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); int libbpf__load_raw_btf(const char *raw_types, size_t types_len, - const char *str_sec, size_t str_len); -int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level); + const char *str_sec, size_t str_len, + int token_fd); +int btf_load_into_kernel(struct btf *btf, + char *log_buf, size_t log_sz, __u32 log_level, + int token_fd); struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, @@ -532,6 +549,17 @@ static inline bool is_ldimm64_insn(struct bpf_insn *insn) return insn->code == (BPF_LD | BPF_IMM | BPF_DW); } +/* Unconditionally dup FD, ensuring it doesn't use [0, 2] range. + * Original FD is not closed or altered in any other way. + * Preserves original FD value, if it's invalid (negative). + */ +static inline int dup_good_fd(int fd) +{ + if (fd < 0) + return fd; + return fcntl(fd, F_DUPFD_CLOEXEC, 3); +} + /* if fd is stdin, stdout, or stderr, dup to a fd greater than 2 * Takes ownership of the fd passed in, and closes it if calling * fcntl(fd, F_DUPFD_CLOEXEC, 3). @@ -543,7 +571,7 @@ static inline int ensure_good_fd(int fd) if (fd < 0) return fd; if (fd < 3) { - fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); + fd = dup_good_fd(fd); saved_errno = errno; close(old_fd); errno = saved_errno; diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 9c4db90b92b6b5..8e743700663916 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -219,7 +219,8 @@ int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts) } int libbpf__load_raw_btf(const char *raw_types, size_t types_len, - const char *str_sec, size_t str_len) + const char *str_sec, size_t str_len, + int token_fd) { struct btf_header hdr = { .magic = BTF_MAGIC, @@ -229,6 +230,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len, .str_off = types_len, .str_len = str_len, }; + LIBBPF_OPTS(bpf_btf_load_opts, opts, .token_fd = token_fd); int btf_fd, btf_len; __u8 *raw_btf; @@ -241,7 +243,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len, memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len); memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len); - btf_fd = bpf_btf_load(raw_btf, btf_len, NULL); + btf_fd = bpf_btf_load(raw_btf, btf_len, &opts); free(raw_btf); return btf_fd; @@ -271,7 +273,7 @@ static int load_local_storage_btf(void) }; return libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs)); + strs, sizeof(strs), 0); } static int probe_map_create(enum bpf_map_type map_type) diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h index a139334d57b603..626d7ffb03d60d 100644 --- a/tools/lib/bpf/str_error.h +++ b/tools/lib/bpf/str_error.h @@ -2,5 +2,8 @@ #ifndef __LIBBPF_STR_ERROR_H #define __LIBBPF_STR_ERROR_H +#define STRERR_BUFSIZE 128 + char *libbpf_strerror_r(int err, char *dst, int len); + #endif /* __LIBBPF_STR_ERROR_H */ diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c index dc03790c6272be..548aeb91ab0dc5 100644 --- a/tools/testing/selftests/bpf/prog_tests/token.c +++ b/tools/testing/selftests/bpf/prog_tests/token.c @@ -12,8 +12,12 @@ #include #include #include +#include #include #include +#include "priv_map.skel.h" +#include "priv_prog.skel.h" +#include "dummy_st_ops_success.skel.h" static inline int sys_mount(const char *dev_name, const char *dir_name, const char *type, unsigned long flags, @@ -42,6 +46,13 @@ static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags) return syscall(__NR_fsmount, fs_fd, flags, ms_flags); } +static inline int sys_move_mount(int from_dfd, const char *from_path, + int to_dfd, const char *to_path, + unsigned flags) +{ + return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, flags); +} + static int drop_priv_caps(__u64 *old_caps) { return cap_disable_effective((1ULL << CAP_BPF) | @@ -643,6 +654,281 @@ static int userns_prog_load(int mnt_fd) return err; } +static int userns_obj_priv_map(int mnt_fd) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + char buf[256]; + struct priv_map *skel; + int err, token_fd; + + skel = priv_map__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { + priv_map__destroy(skel); + return -EINVAL; + } + + /* use bpf_token_path to provide BPF FS path */ + snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); + opts.bpf_token_path = buf; + skel = priv_map__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_token_path_open")) + return -EINVAL; + + err = priv_map__load(skel); + priv_map__destroy(skel); + if (!ASSERT_OK(err, "obj_token_path_load")) + return -EINVAL; + + /* create token and pass it through bpf_token_fd */ + token_fd = bpf_token_create(mnt_fd, NULL); + if (!ASSERT_GT(token_fd, 0, "create_token")) + return -EINVAL; + + opts.bpf_token_path = NULL; + opts.bpf_token_fd = token_fd; + skel = priv_map__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_token_fd_open")) + return -EINVAL; + + /* we can close our token FD, bpf_object owns dup()'ed FD now */ + close(token_fd); + + err = priv_map__load(skel); + priv_map__destroy(skel); + if (!ASSERT_OK(err, "obj_token_fd_load")) + return -EINVAL; + + return 0; +} + +static int userns_obj_priv_prog(int mnt_fd) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + char buf[256]; + struct priv_prog *skel; + int err; + + skel = priv_prog__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { + priv_prog__destroy(skel); + return -EINVAL; + } + + /* use bpf_token_path to provide BPF FS path */ + snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); + opts.bpf_token_path = buf; + skel = priv_prog__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_token_path_open")) + return -EINVAL; + + err = priv_prog__load(skel); + priv_prog__destroy(skel); + if (!ASSERT_OK(err, "obj_token_path_load")) + return -EINVAL; + + return 0; +} + +/* this test is called with BPF FS that doesn't delegate BPF_BTF_LOAD command, + * which should cause struct_ops application to fail, as BTF won't be uploaded + * into the kernel, even if STRUCT_OPS programs themselves are allowed + */ +static int validate_struct_ops_load(int mnt_fd, bool expect_success) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + char buf[256]; + struct dummy_st_ops_success *skel; + int err; + + snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); + opts.bpf_token_path = buf; + skel = dummy_st_ops_success__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_token_path_open")) + return -EINVAL; + + err = dummy_st_ops_success__load(skel); + dummy_st_ops_success__destroy(skel); + if (expect_success) { + if (!ASSERT_OK(err, "obj_token_path_load")) + return -EINVAL; + } else /* expect failure */ { + if (!ASSERT_ERR(err, "obj_token_path_load")) + return -EINVAL; + } + + return 0; +} + +static int userns_obj_priv_btf_fail(int mnt_fd) +{ + return validate_struct_ops_load(mnt_fd, false /* should fail */); +} + +static int userns_obj_priv_btf_success(int mnt_fd) +{ + return validate_struct_ops_load(mnt_fd, true /* should succeed */); +} + +#define TOKEN_ENVVAR "LIBBPF_BPF_TOKEN_PATH" +#define TOKEN_BPFFS_CUSTOM "/bpf-token-fs" + +static int userns_obj_priv_implicit_token(int mnt_fd) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct dummy_st_ops_success *skel; + int err; + + /* before we mount BPF FS with token delegation, struct_ops skeleton + * should fail to load + */ + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { + dummy_st_ops_success__destroy(skel); + return -EINVAL; + } + + /* mount custom BPF FS over /sys/fs/bpf so that libbpf can create BPF + * token automatically and implicitly + */ + err = sys_move_mount(mnt_fd, "", AT_FDCWD, "/sys/fs/bpf", MOVE_MOUNT_F_EMPTY_PATH); + if (!ASSERT_OK(err, "move_mount_bpffs")) + return -EINVAL; + + /* disable implicit BPF token creation by setting + * LIBBPF_BPF_TOKEN_PATH envvar to empty value, load should fail + */ + err = setenv(TOKEN_ENVVAR, "", 1 /*overwrite*/); + if (!ASSERT_OK(err, "setenv_token_path")) + return -EINVAL; + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_token_envvar_disabled_load")) { + unsetenv(TOKEN_ENVVAR); + dummy_st_ops_success__destroy(skel); + return -EINVAL; + } + unsetenv(TOKEN_ENVVAR); + + /* now the same struct_ops skeleton should succeed thanks to libppf + * creating BPF token from /sys/fs/bpf mount point + */ + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load")) + return -EINVAL; + + dummy_st_ops_success__destroy(skel); + + /* now disable implicit token through empty bpf_token_path, should fail */ + opts.bpf_token_path = ""; + skel = dummy_st_ops_success__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open")) + return -EINVAL; + + err = dummy_st_ops_success__load(skel); + dummy_st_ops_success__destroy(skel); + if (!ASSERT_ERR(err, "obj_empty_token_path_load")) + return -EINVAL; + + /* now disable implicit token through negative bpf_token_fd, should fail */ + opts.bpf_token_path = NULL; + opts.bpf_token_fd = -1; + skel = dummy_st_ops_success__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_neg_token_fd_open")) + return -EINVAL; + + err = dummy_st_ops_success__load(skel); + dummy_st_ops_success__destroy(skel); + if (!ASSERT_ERR(err, "obj_neg_token_fd_load")) + return -EINVAL; + + return 0; +} + +static int userns_obj_priv_implicit_token_envvar(int mnt_fd) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct dummy_st_ops_success *skel; + int err; + + /* before we mount BPF FS with token delegation, struct_ops skeleton + * should fail to load + */ + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { + dummy_st_ops_success__destroy(skel); + return -EINVAL; + } + + /* mount custom BPF FS over custom location, so libbpf can't create + * BPF token implicitly, unless pointed to it through + * LIBBPF_BPF_TOKEN_PATH envvar + */ + rmdir(TOKEN_BPFFS_CUSTOM); + if (!ASSERT_OK(mkdir(TOKEN_BPFFS_CUSTOM, 0777), "mkdir_bpffs_custom")) + goto err_out; + err = sys_move_mount(mnt_fd, "", AT_FDCWD, TOKEN_BPFFS_CUSTOM, MOVE_MOUNT_F_EMPTY_PATH); + if (!ASSERT_OK(err, "move_mount_bpffs")) + goto err_out; + + /* even though we have BPF FS with delegation, it's not at default + * /sys/fs/bpf location, so we still fail to load until envvar is set up + */ + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load2")) { + dummy_st_ops_success__destroy(skel); + goto err_out; + } + + err = setenv(TOKEN_ENVVAR, TOKEN_BPFFS_CUSTOM, 1 /*overwrite*/); + if (!ASSERT_OK(err, "setenv_token_path")) + goto err_out; + + /* now the same struct_ops skeleton should succeed thanks to libppf + * creating BPF token from custom mount point + */ + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load")) + goto err_out; + + dummy_st_ops_success__destroy(skel); + + /* now disable implicit token through empty bpf_token_path, envvar + * will be ignored, should fail + */ + opts.bpf_token_path = ""; + skel = dummy_st_ops_success__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open")) + goto err_out; + + err = dummy_st_ops_success__load(skel); + dummy_st_ops_success__destroy(skel); + if (!ASSERT_ERR(err, "obj_empty_token_path_load")) + goto err_out; + + /* now disable implicit token through negative bpf_token_fd, envvar + * will be ignored, should fail + */ + opts.bpf_token_path = NULL; + opts.bpf_token_fd = -1; + skel = dummy_st_ops_success__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_neg_token_fd_open")) + goto err_out; + + err = dummy_st_ops_success__load(skel); + dummy_st_ops_success__destroy(skel); + if (!ASSERT_ERR(err, "obj_neg_token_fd_load")) + goto err_out; + + rmdir(TOKEN_BPFFS_CUSTOM); + unsetenv(TOKEN_ENVVAR); + return 0; +err_out: + rmdir(TOKEN_BPFFS_CUSTOM); + unsetenv(TOKEN_ENVVAR); + return -EINVAL; +} + +#define bit(n) (1ULL << (n)) + void test_token(void) { if (test__start_subtest("map_token")) { @@ -669,4 +955,65 @@ void test_token(void) subtest_userns(&opts, userns_prog_load); } + if (test__start_subtest("obj_priv_map")) { + struct bpffs_opts opts = { + .cmds = bit(BPF_MAP_CREATE), + .maps = bit(BPF_MAP_TYPE_QUEUE), + }; + + subtest_userns(&opts, userns_obj_priv_map); + } + if (test__start_subtest("obj_priv_prog")) { + struct bpffs_opts opts = { + .cmds = bit(BPF_PROG_LOAD), + .progs = bit(BPF_PROG_TYPE_KPROBE), + .attachs = ~0ULL, + }; + + subtest_userns(&opts, userns_obj_priv_prog); + } + if (test__start_subtest("obj_priv_btf_fail")) { + struct bpffs_opts opts = { + /* disallow BTF loading */ + .cmds = bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), + .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), + .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), + .attachs = ~0ULL, + }; + + subtest_userns(&opts, userns_obj_priv_btf_fail); + } + if (test__start_subtest("obj_priv_btf_success")) { + struct bpffs_opts opts = { + /* allow BTF loading */ + .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), + .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), + .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), + .attachs = ~0ULL, + }; + + subtest_userns(&opts, userns_obj_priv_btf_success); + } + if (test__start_subtest("obj_priv_implicit_token")) { + struct bpffs_opts opts = { + /* allow BTF loading */ + .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), + .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), + .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), + .attachs = ~0ULL, + }; + + subtest_userns(&opts, userns_obj_priv_implicit_token); + } + if (test__start_subtest("obj_priv_implicit_token_envvar")) { + struct bpffs_opts opts = { + /* allow BTF loading */ + .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), + .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), + .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), + .attachs = ~0ULL, + }; + + subtest_userns(&opts, userns_obj_priv_implicit_token_envvar); + } } diff --git a/tools/testing/selftests/bpf/progs/priv_map.c b/tools/testing/selftests/bpf/progs/priv_map.c new file mode 100644 index 00000000000000..9085be50f03bf4 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/priv_map.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_QUEUE); + __uint(max_entries, 1); + __type(value, __u32); +} priv_map SEC(".maps"); diff --git a/tools/testing/selftests/bpf/progs/priv_prog.c b/tools/testing/selftests/bpf/progs/priv_prog.c new file mode 100644 index 00000000000000..3c7b2b618c8a9b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/priv_prog.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include + +char _license[] SEC("license") = "GPL"; + +SEC("kprobe") +int kprobe_prog(void *ctx) +{ + return 1; +}