diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index e67cd45a85be47..24213a99cc79db 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -373,10 +373,25 @@ struct bpf_verifier_state { struct bpf_active_lock active_lock; bool speculative; bool active_rcu_lock; + /* If this state was ever pointed-to by other state's loop_entry field + * this flag would be set to true. Used to avoid freeing such states + * while they are still in use. + */ + bool used_as_loop_entry; /* first and last insn idx of this verifier state */ u32 first_insn_idx; u32 last_insn_idx; + /* If this state is a part of states loop this field points to some + * parent of this state such that: + * - it is also a member of the same states loop; + * - DFS states traversal starting from initial state visits loop_entry + * state before this state. + * Used to compute topmost loop entry for state loops. + * State loops might appear because of open coded iterators logic. + * See get_loop_entry() for more information. + */ + struct bpf_verifier_state *loop_entry; /* jmp history recorded from first to last. * backtracking is using it to go from last to first. * For most states jmp_history_cnt is [0-3]. @@ -384,6 +399,7 @@ struct bpf_verifier_state { */ struct bpf_idx_pair *jmp_history; u32 jmp_history_cnt; + u32 dfs_depth; }; #define bpf_get_spilled_reg(slot, frame, mask) \ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e9bc5d4a25a176..98f9d0f35931f7 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1802,6 +1802,8 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, dst_state->parent = src->parent; dst_state->first_insn_idx = src->first_insn_idx; dst_state->last_insn_idx = src->last_insn_idx; + dst_state->dfs_depth = src->dfs_depth; + dst_state->used_as_loop_entry = src->used_as_loop_entry; for (i = 0; i <= src->curframe; i++) { dst = dst_state->frame[i]; if (!dst) { @@ -1817,11 +1819,203 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, return 0; } +static u32 state_htab_size(struct bpf_verifier_env *env) +{ + return env->prog->len; +} + +static struct bpf_verifier_state_list **explored_state(struct bpf_verifier_env *env, int idx) +{ + struct bpf_verifier_state *cur = env->cur_state; + struct bpf_func_state *state = cur->frame[cur->curframe]; + + return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)]; +} + +static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_state *b) +{ + int fr; + + if (a->curframe != b->curframe) + return false; + + for (fr = a->curframe; fr >= 0; fr--) + if (a->frame[fr]->callsite != b->frame[fr]->callsite) + return false; + + return true; +} + +/* Open coded iterators allow back-edges in the state graph in order to + * check unbounded loops that iterators. + * + * In is_state_visited() it is necessary to know if explored states are + * part of some loops in order to decide whether non-exact states + * comparison could be used: + * - non-exact states comparison establishes sub-state relation and uses + * read and precision marks to do so, these marks are propagated from + * children states and thus are not guaranteed to be final in a loop; + * - exact states comparison just checks if current and explored states + * are identical (and thus form a back-edge). + * + * Paper "A New Algorithm for Identifying Loops in Decompilation" + * by Tao Wei, Jian Mao, Wei Zou and Yu Chen [1] presents a convenient + * algorithm for loop structure detection and gives an overview of + * relevant terminology. It also has helpful illustrations. + * + * [1] https://api.semanticscholar.org/CorpusID:15784067 + * + * We use a similar algorithm but because loop nested structure is + * irrelevant for verifier ours is significantly simpler and resembles + * strongly connected components algorithm from Sedgewick's textbook. + * + * Define topmost loop entry as a first node of the loop traversed in a + * depth first search starting from initial state. The goal of the loop + * tracking algorithm is to associate topmost loop entries with states + * derived from these entries. + * + * For each step in the DFS states traversal algorithm needs to identify + * the following situations: + * + * initial initial initial + * | | | + * V V V + * ... ... .---------> hdr + * | | | | + * V V | V + * cur .-> succ | .------... + * | | | | | | + * V | V | V V + * succ '-- cur | ... ... + * | | | + * | V V + * | succ <- cur + * | | + * | V + * | ... + * | | + * '----' + * + * (A) successor state of cur (B) successor state of cur or it's entry + * not yet traversed are in current DFS path, thus cur and succ + * are members of the same outermost loop + * + * initial initial + * | | + * V V + * ... ... + * | | + * V V + * .------... .------... + * | | | | + * V V V V + * .-> hdr ... ... ... + * | | | | | + * | V V V V + * | succ <- cur succ <- cur + * | | | + * | V V + * | ... ... + * | | | + * '----' exit + * + * (C) successor state of cur is a part of some loop but this loop + * does not include cur or successor state is not in a loop at all. + * + * Algorithm could be described as the following python code: + * + * traversed = set() # Set of traversed nodes + * entries = {} # Mapping from node to loop entry + * depths = {} # Depth level assigned to graph node + * path = set() # Current DFS path + * + * # Find outermost loop entry known for n + * def get_loop_entry(n): + * h = entries.get(n, None) + * while h in entries and entries[h] != h: + * h = entries[h] + * return h + * + * # Update n's loop entry if h's outermost entry comes + * # before n's outermost entry in current DFS path. + * def update_loop_entry(n, h): + * n1 = get_loop_entry(n) or n + * h1 = get_loop_entry(h) or h + * if h1 in path and depths[h1] <= depths[n1]: + * entries[n] = h1 + * + * def dfs(n, depth): + * traversed.add(n) + * path.add(n) + * depths[n] = depth + * for succ in G.successors(n): + * if succ not in traversed: + * # Case A: explore succ and update cur's loop entry + * # only if succ's entry is in current DFS path. + * dfs(succ, depth + 1) + * h = get_loop_entry(succ) + * update_loop_entry(n, h) + * else: + * # Case B or C depending on `h1 in path` check in update_loop_entry(). + * update_loop_entry(n, succ) + * path.remove(n) + * + * To adapt this algorithm for use with verifier: + * - use st->branch == 0 as a signal that DFS of succ had been finished + * and cur's loop entry has to be updated (case A), handle this in + * update_branch_counts(); + * - use st->branch > 0 as a signal that st is in the current DFS path; + * - handle cases B and C in is_state_visited(); + * - update topmost loop entry for intermediate states in get_loop_entry(). + */ +static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_state *st) +{ + struct bpf_verifier_state *topmost = st->loop_entry, *old; + + while (topmost && topmost->loop_entry && topmost != topmost->loop_entry) + topmost = topmost->loop_entry; + /* Update loop entries for intermediate states to avoid this + * traversal in future get_loop_entry() calls. + */ + while (st && st->loop_entry != topmost) { + old = st->loop_entry; + st->loop_entry = topmost; + st = old; + } + return topmost; +} + +static void update_loop_entry(struct bpf_verifier_state *cur, struct bpf_verifier_state *hdr) +{ + struct bpf_verifier_state *cur1, *hdr1; + + cur1 = get_loop_entry(cur) ?: cur; + hdr1 = get_loop_entry(hdr) ?: hdr; + /* The head1->branches check decides between cases B and C in + * comment for get_loop_entry(). If hdr1->branches == 0 then + * head's topmost loop entry is not in current DFS path, + * hence 'cur' and 'hdr' are not in the same loop and there is + * no need to update cur->loop_entry. + */ + if (hdr1->branches && hdr1->dfs_depth <= cur1->dfs_depth) { + cur->loop_entry = hdr; + hdr->used_as_loop_entry = true; + } +} + static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st) { while (st) { u32 br = --st->branches; + /* br == 0 signals that DFS exploration for 'st' is finished, + * thus it is necessary to update parent's loop entry if it + * turned out that st is a part of some loop. + * This is a part of 'case A' in get_loop_entry() comment. + */ + if (br == 0 && st->parent && st->loop_entry) + update_loop_entry(st->parent, st->loop_entry); + /* WARN_ON(br > 1) technically makes sense here, * but see comment in push_stack(), hence: */ @@ -7696,6 +7890,81 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id return 0; } +/* Look for a previous loop entry at insn_idx: nearest parent state + * stopped at insn_idx with callsites matching those in cur->frame. + */ +static struct bpf_verifier_state *find_prev_entry(struct bpf_verifier_env *env, + struct bpf_verifier_state *cur, + int insn_idx) +{ + struct bpf_verifier_state_list *sl; + struct bpf_verifier_state *st; + + /* Explored states are pushed in stack order, most recent states come first */ + sl = *explored_state(env, insn_idx); + for (; sl; sl = sl->next) { + /* If st->branches != 0 state is a part of current DFS verification path, + * hence cur & st for a loop. + */ + st = &sl->state; + if (st->insn_idx == insn_idx && st->branches && same_callsites(st, cur) && + st->dfs_depth < cur->dfs_depth) + return st; + } + + return NULL; +} + +static void reset_idmap_scratch(struct bpf_verifier_env *env); +static bool regs_exact(const struct bpf_reg_state *rold, + const struct bpf_reg_state *rcur, + struct bpf_idmap *idmap); + +static void maybe_widen_reg(struct bpf_verifier_env *env, + struct bpf_reg_state *rold, struct bpf_reg_state *rcur, + struct bpf_idmap *idmap) +{ + if (rold->type != SCALAR_VALUE) + return; + if (rold->type != rcur->type) + return; + if (rold->precise || rcur->precise || regs_exact(rold, rcur, idmap)) + return; + __mark_reg_unknown(env, rcur); +} + +static int widen_imprecise_scalars(struct bpf_verifier_env *env, + struct bpf_verifier_state *old, + struct bpf_verifier_state *cur) +{ + struct bpf_func_state *fold, *fcur; + int i, fr; + + reset_idmap_scratch(env); + for (fr = old->curframe; fr >= 0; fr--) { + fold = old->frame[fr]; + fcur = cur->frame[fr]; + + for (i = 0; i < MAX_BPF_REG; i++) + maybe_widen_reg(env, + &fold->regs[i], + &fcur->regs[i], + &env->idmap_scratch); + + for (i = 0; i < fold->allocated_stack / BPF_REG_SIZE; i++) { + if (!is_spilled_reg(&fold->stack[i]) || + !is_spilled_reg(&fcur->stack[i])) + continue; + + maybe_widen_reg(env, + &fold->stack[i].spilled_ptr, + &fcur->stack[i].spilled_ptr, + &env->idmap_scratch); + } + } + return 0; +} + /* process_iter_next_call() is called when verifier gets to iterator's next * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer * to it as just "iter_next()" in comments below. @@ -7737,25 +8006,47 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id * is some statically known limit on number of iterations (e.g., if there is * an explicit `if n > 100 then break;` statement somewhere in the loop). * - * One very subtle but very important aspect is that we *always* simulate NULL - * condition first (as the current state) before we simulate non-NULL case. - * This has to do with intricacies of scalar precision tracking. By simulating - * "exit condition" of iter_next() returning NULL first, we make sure all the - * relevant precision marks *that will be set **after** we exit iterator loop* - * are propagated backwards to common parent state of NULL and non-NULL - * branches. Thanks to that, state equivalence checks done later in forked - * state, when reaching iter_next() for ACTIVE iterator, can assume that - * precision marks are finalized and won't change. Because simulating another - * ACTIVE iterator iteration won't change them (because given same input - * states we'll end up with exactly same output states which we are currently - * comparing; and verification after the loop already propagated back what - * needs to be **additionally** tracked as precise). It's subtle, grok - * precision tracking for more intuitive understanding. + * Iteration convergence logic in is_state_visited() relies on exact + * states comparison, which ignores read and precision marks. + * This is necessary because read and precision marks are not finalized + * while in the loop. Exact comparison might preclude convergence for + * simple programs like below: + * + * i = 0; + * while(iter_next(&it)) + * i++; + * + * At each iteration step i++ would produce a new distinct state and + * eventually instruction processing limit would be reached. + * + * To avoid such behavior speculatively forget (widen) range for + * imprecise scalar registers, if those registers were not precise at the + * end of the previous iteration and do not match exactly. + * + * This is a conservative heuristic that allows to verify wide range of programs, + * however it precludes verification of programs that conjure an + * imprecise value on the first loop iteration and use it as precise on a second. + * For example, the following safe program would fail to verify: + * + * struct bpf_num_iter it; + * int arr[10]; + * int i = 0, a = 0; + * bpf_iter_num_new(&it, 0, 10); + * while (bpf_iter_num_next(&it)) { + * if (a == 0) { + * a = 1; + * i = 7; // Because i changed verifier would forget + * // it's range on second loop entry. + * } else { + * arr[i] = 42; // This would fail to verify. + * } + * } + * bpf_iter_num_destroy(&it); */ static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx, struct bpf_kfunc_call_arg_meta *meta) { - struct bpf_verifier_state *cur_st = env->cur_state, *queued_st; + struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st; struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr; struct bpf_reg_state *cur_iter, *queued_iter; int iter_frameno = meta->iter.frameno; @@ -7773,6 +8064,19 @@ static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx, } if (cur_iter->iter.state == BPF_ITER_STATE_ACTIVE) { + /* Because iter_next() call is a checkpoint is_state_visitied() + * should guarantee parent state with same call sites and insn_idx. + */ + if (!cur_st->parent || cur_st->parent->insn_idx != insn_idx || + !same_callsites(cur_st->parent, cur_st)) { + verbose(env, "bug: bad parent state for iter next call"); + return -EFAULT; + } + /* Note cur_st->parent in the call below, it is necessary to skip + * checkpoint created for cur_st by is_state_visited() + * right at this instruction. + */ + prev_st = find_prev_entry(env, cur_st->parent, insn_idx); /* branch out active iter state */ queued_st = push_stack(env, insn_idx + 1, insn_idx, false); if (!queued_st) @@ -7781,6 +8085,8 @@ static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx, queued_iter = &queued_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr; queued_iter->iter.state = BPF_ITER_STATE_ACTIVE; queued_iter->iter.depth++; + if (prev_st) + widen_imprecise_scalars(env, prev_st, queued_st); queued_fr = queued_st->frame[queued_st->curframe]; mark_ptr_not_null_reg(&queued_fr->regs[BPF_REG_0]); @@ -15020,21 +15326,6 @@ enum { BRANCH = 2, }; -static u32 state_htab_size(struct bpf_verifier_env *env) -{ - return env->prog->len; -} - -static struct bpf_verifier_state_list **explored_state( - struct bpf_verifier_env *env, - int idx) -{ - struct bpf_verifier_state *cur = env->cur_state; - struct bpf_func_state *state = cur->frame[cur->curframe]; - - return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)]; -} - static void mark_prune_point(struct bpf_verifier_env *env, int idx) { env->insn_aux_data[idx].prune_point = true; @@ -15911,18 +16202,14 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn, struct bpf_verifier_state *cur) { struct bpf_verifier_state_list *sl; - int i; sl = *explored_state(env, insn); while (sl) { if (sl->state.branches) goto next; if (sl->state.insn_idx != insn || - sl->state.curframe != cur->curframe) + !same_callsites(&sl->state, cur)) goto next; - for (i = 0; i <= cur->curframe; i++) - if (sl->state.frame[i]->callsite != cur->frame[i]->callsite) - goto next; clean_verifier_state(env, &sl->state); next: sl = sl->next; @@ -15940,8 +16227,11 @@ static bool regs_exact(const struct bpf_reg_state *rold, /* Returns true if (rold safe implies rcur safe) */ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, - struct bpf_reg_state *rcur, struct bpf_idmap *idmap) + struct bpf_reg_state *rcur, struct bpf_idmap *idmap, bool exact) { + if (exact) + return regs_exact(rold, rcur, idmap); + if (!(rold->live & REG_LIVE_READ)) /* explored state didn't use this */ return true; @@ -16058,7 +16348,7 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, } static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, - struct bpf_func_state *cur, struct bpf_idmap *idmap) + struct bpf_func_state *cur, struct bpf_idmap *idmap, bool exact) { int i, spi; @@ -16071,7 +16361,12 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, spi = i / BPF_REG_SIZE; - if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) { + if (exact && + old->stack[spi].slot_type[i % BPF_REG_SIZE] != + cur->stack[spi].slot_type[i % BPF_REG_SIZE]) + return false; + + if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ) && !exact) { i += BPF_REG_SIZE - 1; /* explored state didn't use this */ continue; @@ -16121,7 +16416,7 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, * return false to continue verification of this path */ if (!regsafe(env, &old->stack[spi].spilled_ptr, - &cur->stack[spi].spilled_ptr, idmap)) + &cur->stack[spi].spilled_ptr, idmap, exact)) return false; break; case STACK_DYNPTR: @@ -16203,16 +16498,16 @@ static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur, * the current state will reach 'bpf_exit' instruction safely */ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old, - struct bpf_func_state *cur) + struct bpf_func_state *cur, bool exact) { int i; for (i = 0; i < MAX_BPF_REG; i++) if (!regsafe(env, &old->regs[i], &cur->regs[i], - &env->idmap_scratch)) + &env->idmap_scratch, exact)) return false; - if (!stacksafe(env, old, cur, &env->idmap_scratch)) + if (!stacksafe(env, old, cur, &env->idmap_scratch, exact)) return false; if (!refsafe(old, cur, &env->idmap_scratch)) @@ -16221,17 +16516,23 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat return true; } +static void reset_idmap_scratch(struct bpf_verifier_env *env) +{ + env->idmap_scratch.tmp_id_gen = env->id_gen; + memset(&env->idmap_scratch.map, 0, sizeof(env->idmap_scratch.map)); +} + static bool states_equal(struct bpf_verifier_env *env, struct bpf_verifier_state *old, - struct bpf_verifier_state *cur) + struct bpf_verifier_state *cur, + bool exact) { int i; if (old->curframe != cur->curframe) return false; - env->idmap_scratch.tmp_id_gen = env->id_gen; - memset(&env->idmap_scratch.map, 0, sizeof(env->idmap_scratch.map)); + reset_idmap_scratch(env); /* Verification state from speculative execution simulation * must never prune a non-speculative execution one. @@ -16261,7 +16562,7 @@ static bool states_equal(struct bpf_verifier_env *env, for (i = 0; i <= old->curframe; i++) { if (old->frame[i]->callsite != cur->frame[i]->callsite) return false; - if (!func_states_equal(env, old->frame[i], cur->frame[i])) + if (!func_states_equal(env, old->frame[i], cur->frame[i], exact)) return false; } return true; @@ -16515,10 +16816,11 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) { struct bpf_verifier_state_list *new_sl; struct bpf_verifier_state_list *sl, **pprev; - struct bpf_verifier_state *cur = env->cur_state, *new; - int i, j, err, states_cnt = 0; + struct bpf_verifier_state *cur = env->cur_state, *new, *loop_entry; + int i, j, n, err, states_cnt = 0; bool force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx); bool add_new_state = force_new_state; + bool force_exact; /* bpf progs typically have pruning point every 4 instructions * http://vger.kernel.org/bpfconf2019.html#session-1 @@ -16571,9 +16873,33 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) * It's safe to assume that iterator loop will finish, taking into * account iter_next() contract of eventually returning * sticky NULL result. + * + * Note, that states have to be compared exactly in this case because + * read and precision marks might not be finalized inside the loop. + * E.g. as in the program below: + * + * 1. r7 = -16 + * 2. r6 = bpf_get_prandom_u32() + * 3. while (bpf_iter_num_next(&fp[-8])) { + * 4. if (r6 != 42) { + * 5. r7 = -32 + * 6. r6 = bpf_get_prandom_u32() + * 7. continue + * 8. } + * 9. r0 = r10 + * 10. r0 += r7 + * 11. r8 = *(u64 *)(r0 + 0) + * 12. r6 = bpf_get_prandom_u32() + * 13. } + * + * Here verifier would first visit path 1-3, create a checkpoint at 3 + * with r7=-16, continue to 4-7,3. Existing checkpoint at 3 does + * not have read or precision mark for r7 yet, thus inexact states + * comparison would discard current state with r7=-32 + * => unsafe memory access at 11 would not be caught. */ if (is_iter_next_insn(env, insn_idx)) { - if (states_equal(env, &sl->state, cur)) { + if (states_equal(env, &sl->state, cur, true)) { struct bpf_func_state *cur_frame; struct bpf_reg_state *iter_state, *iter_reg; int spi; @@ -16589,17 +16915,23 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) */ spi = __get_spi(iter_reg->off + iter_reg->var_off.value); iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr; - if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) + if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) { + update_loop_entry(cur, &sl->state); goto hit; + } } goto skip_inf_loop_check; } /* attempt to detect infinite loop to avoid unnecessary doomed work */ if (states_maybe_looping(&sl->state, cur) && - states_equal(env, &sl->state, cur) && + states_equal(env, &sl->state, cur, false) && !iter_active_depths_differ(&sl->state, cur)) { verbose_linfo(env, insn_idx, "; "); verbose(env, "infinite loop detected at insn %d\n", insn_idx); + verbose(env, "cur state:"); + print_verifier_state(env, cur->frame[cur->curframe], true); + verbose(env, "old state:"); + print_verifier_state(env, sl->state.frame[cur->curframe], true); return -EINVAL; } /* if the verifier is processing a loop, avoid adding new state @@ -16621,7 +16953,36 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) add_new_state = false; goto miss; } - if (states_equal(env, &sl->state, cur)) { + /* If sl->state is a part of a loop and this loop's entry is a part of + * current verification path then states have to be compared exactly. + * 'force_exact' is needed to catch the following case: + * + * initial Here state 'succ' was processed first, + * | it was eventually tracked to produce a + * V state identical to 'hdr'. + * .---------> hdr All branches from 'succ' had been explored + * | | and thus 'succ' has its .branches == 0. + * | V + * | .------... Suppose states 'cur' and 'succ' correspond + * | | | to the same instruction + callsites. + * | V V In such case it is necessary to check + * | ... ... if 'succ' and 'cur' are states_equal(). + * | | | If 'succ' and 'cur' are a part of the + * | V V same loop exact flag has to be set. + * | succ <- cur To check if that is the case, verify + * | | if loop entry of 'succ' is in current + * | V DFS path. + * | ... + * | | + * '----' + * + * Additional details are in the comment before get_loop_entry(). + */ + loop_entry = get_loop_entry(&sl->state); + force_exact = loop_entry && loop_entry->branches > 0; + if (states_equal(env, &sl->state, cur, force_exact)) { + if (force_exact) + update_loop_entry(cur, loop_entry); hit: sl->hit_cnt++; /* reached equivalent register/stack state, @@ -16660,13 +17021,18 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) * to keep checking from state equivalence point of view. * Higher numbers increase max_states_per_insn and verification time, * but do not meaningfully decrease insn_processed. + * 'n' controls how many times state could miss before eviction. + * Use bigger 'n' for checkpoints because evicting checkpoint states + * too early would hinder iterator convergence. */ - if (sl->miss_cnt > sl->hit_cnt * 3 + 3) { + n = is_force_checkpoint(env, insn_idx) && sl->state.branches > 0 ? 64 : 3; + if (sl->miss_cnt > sl->hit_cnt * n + n) { /* the state is unlikely to be useful. Remove it to * speed up verification */ *pprev = sl->next; - if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) { + if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE && + !sl->state.used_as_loop_entry) { u32 br = sl->state.branches; WARN_ONCE(br, @@ -16735,6 +17101,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) cur->parent = new; cur->first_insn_idx = insn_idx; + cur->dfs_depth = new->dfs_depth + 1; clear_jmp_history(cur); new_sl->next = *explored_state(env, insn_idx); *explored_state(env, insn_idx) = new_sl; diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index 6b9b3c56f009a3..c20c4e38b71c5b 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -14,6 +14,13 @@ int my_pid; int arr[256]; int small_arr[16] SEC(".data.small_arr"); +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 10); + __type(key, int); + __type(value, int); +} amap SEC(".maps"); + #ifdef REAL_TEST #define MY_PID_GUARD() if (my_pid != (bpf_get_current_pid_tgid() >> 32)) return 0 #else @@ -716,4 +723,692 @@ int iter_pass_iter_ptr_to_subprog(const void *ctx) return 0; } +SEC("?raw_tp") +__failure +__msg("R1 type=scalar expected=fp") +__naked int delayed_read_mark(void) +{ + /* This is equivalent to C program below. + * The call to bpf_iter_num_next() is reachable with r7 values &fp[-16] and 0xdead. + * State with r7=&fp[-16] is visited first and follows r6 != 42 ... continue branch. + * At this point iterator next() call is reached with r7 that has no read mark. + * Loop body with r7=0xdead would only be visited if verifier would decide to continue + * with second loop iteration. Absence of read mark on r7 might affect state + * equivalent logic used for iterator convergence tracking. + * + * r7 = &fp[-16] + * fp[-16] = 0 + * r6 = bpf_get_prandom_u32() + * bpf_iter_num_new(&fp[-8], 0, 10) + * while (bpf_iter_num_next(&fp[-8])) { + * r6++ + * if (r6 != 42) { + * r7 = 0xdead + * continue; + * } + * bpf_probe_read_user(r7, 8, 0xdeadbeef); // this is not safe + * } + * bpf_iter_num_destroy(&fp[-8]) + * return 0 + */ + asm volatile ( + "r7 = r10;" + "r7 += -16;" + "r0 = 0;" + "*(u64 *)(r7 + 0) = r0;" + "call %[bpf_get_prandom_u32];" + "r6 = r0;" + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "1:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto 2f;" + "r6 += 1;" + "if r6 != 42 goto 3f;" + "r7 = 0xdead;" + "goto 1b;" + "3:" + "r1 = r7;" + "r2 = 8;" + "r3 = 0xdeadbeef;" + "call %[bpf_probe_read_user];" + "goto 1b;" + "2:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy), + __imm(bpf_probe_read_user) + : __clobber_all + ); +} + +SEC("?raw_tp") +__failure +__msg("math between fp pointer and register with unbounded") +__naked int delayed_precision_mark(void) +{ + /* This is equivalent to C program below. + * The test is similar to delayed_iter_mark but verifies that incomplete + * precision don't fool verifier. + * The call to bpf_iter_num_next() is reachable with r7 values -16 and -32. + * State with r7=-16 is visited first and follows r6 != 42 ... continue branch. + * At this point iterator next() call is reached with r7 that has no read + * and precision marks. + * Loop body with r7=-32 would only be visited if verifier would decide to continue + * with second loop iteration. Absence of precision mark on r7 might affect state + * equivalent logic used for iterator convergence tracking. + * + * r8 = 0 + * fp[-16] = 0 + * r7 = -16 + * r6 = bpf_get_prandom_u32() + * bpf_iter_num_new(&fp[-8], 0, 10) + * while (bpf_iter_num_next(&fp[-8])) { + * if (r6 != 42) { + * r7 = -32 + * r6 = bpf_get_prandom_u32() + * continue; + * } + * r0 = r10 + * r0 += r7 + * r8 = *(u64 *)(r0 + 0) // this is not safe + * r6 = bpf_get_prandom_u32() + * } + * bpf_iter_num_destroy(&fp[-8]) + * return r8 + */ + asm volatile ( + "r8 = 0;" + "*(u64 *)(r10 - 16) = r8;" + "r7 = -16;" + "call %[bpf_get_prandom_u32];" + "r6 = r0;" + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "1:" + "r1 = r10;" + "r1 += -8;\n" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto 2f;" + "if r6 != 42 goto 3f;" + "r7 = -32;" + "call %[bpf_get_prandom_u32];" + "r6 = r0;" + "goto 1b;\n" + "3:" + "r0 = r10;" + "r0 += r7;" + "r8 = *(u64 *)(r0 + 0);" + "call %[bpf_get_prandom_u32];" + "r6 = r0;" + "goto 1b;\n" + "2:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = r8;" + "exit;" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy), + __imm(bpf_probe_read_user) + : __clobber_all + ); +} + +SEC("?raw_tp") +__failure +__msg("math between fp pointer and register with unbounded") +__flag(BPF_F_TEST_STATE_FREQ) +__naked int loop_state_deps1(void) +{ + /* This is equivalent to C program below. + * + * The case turns out to be tricky in a sense that: + * - states with c=-25 are explored only on a second iteration + * of the outer loop; + * - states with read+precise mark on c are explored only on + * second iteration of the inner loop and in a state which + * is pushed to states stack first. + * + * Depending on the details of iterator convergence logic + * verifier might stop states traversal too early and miss + * unsafe c=-25 memory access. + * + * j = iter_new(); // fp[-16] + * a = 0; // r6 + * b = 0; // r7 + * c = -24; // r8 + * while (iter_next(j)) { + * i = iter_new(); // fp[-8] + * a = 0; // r6 + * b = 0; // r7 + * while (iter_next(i)) { + * if (a == 1) { + * a = 0; + * b = 1; + * } else if (a == 0) { + * a = 1; + * if (random() == 42) + * continue; + * if (b == 1) { + * *(r10 + c) = 7; // this is not safe + * iter_destroy(i); + * iter_destroy(j); + * return; + * } + * } + * } + * iter_destroy(i); + * a = 0; + * b = 0; + * c = -25; + * } + * iter_destroy(j); + * return; + */ + asm volatile ( + "r1 = r10;" + "r1 += -16;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "r6 = 0;" + "r7 = 0;" + "r8 = -24;" + "j_loop_%=:" + "r1 = r10;" + "r1 += -16;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto j_loop_end_%=;" + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "r6 = 0;" + "r7 = 0;" + "i_loop_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto i_loop_end_%=;" + "check_one_r6_%=:" + "if r6 != 1 goto check_zero_r6_%=;" + "r6 = 0;" + "r7 = 1;" + "goto i_loop_%=;" + "check_zero_r6_%=:" + "if r6 != 0 goto i_loop_%=;" + "r6 = 1;" + "call %[bpf_get_prandom_u32];" + "if r0 != 42 goto check_one_r7_%=;" + "goto i_loop_%=;" + "check_one_r7_%=:" + "if r7 != 1 goto i_loop_%=;" + "r0 = r10;" + "r0 += r8;" + "r1 = 7;" + "*(u64 *)(r0 + 0) = r1;" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r1 = r10;" + "r1 += -16;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + "i_loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r6 = 0;" + "r7 = 0;" + "r8 = -25;" + "goto j_loop_%=;" + "j_loop_end_%=:" + "r1 = r10;" + "r1 += -16;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy) + : __clobber_all + ); +} + +SEC("?raw_tp") +__failure +__msg("math between fp pointer and register with unbounded") +__flag(BPF_F_TEST_STATE_FREQ) +__naked int loop_state_deps2(void) +{ + /* This is equivalent to C program below. + * + * The case turns out to be tricky in a sense that: + * - states with read+precise mark on c are explored only on a second + * iteration of the first inner loop and in a state which is pushed to + * states stack first. + * - states with c=-25 are explored only on a second iteration of the + * second inner loop and in a state which is pushed to states stack + * first. + * + * Depending on the details of iterator convergence logic + * verifier might stop states traversal too early and miss + * unsafe c=-25 memory access. + * + * j = iter_new(); // fp[-16] + * a = 0; // r6 + * b = 0; // r7 + * c = -24; // r8 + * while (iter_next(j)) { + * i = iter_new(); // fp[-8] + * a = 0; // r6 + * b = 0; // r7 + * while (iter_next(i)) { + * if (a == 1) { + * a = 0; + * b = 1; + * } else if (a == 0) { + * a = 1; + * if (random() == 42) + * continue; + * if (b == 1) { + * *(r10 + c) = 7; // this is not safe + * iter_destroy(i); + * iter_destroy(j); + * return; + * } + * } + * } + * iter_destroy(i); + * i = iter_new(); // fp[-8] + * a = 0; // r6 + * b = 0; // r7 + * while (iter_next(i)) { + * if (a == 1) { + * a = 0; + * b = 1; + * } else if (a == 0) { + * a = 1; + * if (random() == 42) + * continue; + * if (b == 1) { + * a = 0; + * c = -25; + * } + * } + * } + * iter_destroy(i); + * } + * iter_destroy(j); + * return; + */ + asm volatile ( + "r1 = r10;" + "r1 += -16;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "r6 = 0;" + "r7 = 0;" + "r8 = -24;" + "j_loop_%=:" + "r1 = r10;" + "r1 += -16;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto j_loop_end_%=;" + + /* first inner loop */ + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "r6 = 0;" + "r7 = 0;" + "i_loop_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto i_loop_end_%=;" + "check_one_r6_%=:" + "if r6 != 1 goto check_zero_r6_%=;" + "r6 = 0;" + "r7 = 1;" + "goto i_loop_%=;" + "check_zero_r6_%=:" + "if r6 != 0 goto i_loop_%=;" + "r6 = 1;" + "call %[bpf_get_prandom_u32];" + "if r0 != 42 goto check_one_r7_%=;" + "goto i_loop_%=;" + "check_one_r7_%=:" + "if r7 != 1 goto i_loop_%=;" + "r0 = r10;" + "r0 += r8;" + "r1 = 7;" + "*(u64 *)(r0 + 0) = r1;" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r1 = r10;" + "r1 += -16;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + "i_loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + + /* second inner loop */ + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "r6 = 0;" + "r7 = 0;" + "i2_loop_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto i2_loop_end_%=;" + "check2_one_r6_%=:" + "if r6 != 1 goto check2_zero_r6_%=;" + "r6 = 0;" + "r7 = 1;" + "goto i2_loop_%=;" + "check2_zero_r6_%=:" + "if r6 != 0 goto i2_loop_%=;" + "r6 = 1;" + "call %[bpf_get_prandom_u32];" + "if r0 != 42 goto check2_one_r7_%=;" + "goto i2_loop_%=;" + "check2_one_r7_%=:" + "if r7 != 1 goto i2_loop_%=;" + "r6 = 0;" + "r8 = -25;" + "goto i2_loop_%=;" + "i2_loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + + "r6 = 0;" + "r7 = 0;" + "goto j_loop_%=;" + "j_loop_end_%=:" + "r1 = r10;" + "r1 += -16;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy) + : __clobber_all + ); +} + +SEC("?raw_tp") +__success +__naked int triple_continue(void) +{ + /* This is equivalent to C program below. + * High branching factor of the loop body turned out to be + * problematic for one of the iterator convergence tracking + * algorithms explored. + * + * r6 = bpf_get_prandom_u32() + * bpf_iter_num_new(&fp[-8], 0, 10) + * while (bpf_iter_num_next(&fp[-8])) { + * if (bpf_get_prandom_u32() != 42) + * continue; + * if (bpf_get_prandom_u32() != 42) + * continue; + * if (bpf_get_prandom_u32() != 42) + * continue; + * r0 += 0; + * } + * bpf_iter_num_destroy(&fp[-8]) + * return 0 + */ + asm volatile ( + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "loop_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto loop_end_%=;" + "call %[bpf_get_prandom_u32];" + "if r0 != 42 goto loop_%=;" + "call %[bpf_get_prandom_u32];" + "if r0 != 42 goto loop_%=;" + "call %[bpf_get_prandom_u32];" + "if r0 != 42 goto loop_%=;" + "r0 += 0;" + "goto loop_%=;" + "loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy) + : __clobber_all + ); +} + +SEC("?raw_tp") +__success +__naked int widen_spill(void) +{ + /* This is equivalent to C program below. + * The counter is stored in fp[-16], if this counter is not widened + * verifier states representing loop iterations would never converge. + * + * fp[-16] = 0 + * bpf_iter_num_new(&fp[-8], 0, 10) + * while (bpf_iter_num_next(&fp[-8])) { + * r0 = fp[-16]; + * r0 += 1; + * fp[-16] = r0; + * } + * bpf_iter_num_destroy(&fp[-8]) + * return 0 + */ + asm volatile ( + "r0 = 0;" + "*(u64 *)(r10 - 16) = r0;" + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "loop_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto loop_end_%=;" + "r0 = *(u64 *)(r10 - 16);" + "r0 += 1;" + "*(u64 *)(r10 - 16) = r0;" + "goto loop_%=;" + "loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + : + : __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy) + : __clobber_all + ); +} + +SEC("raw_tp") +__success +__naked int checkpoint_states_deletion(void) +{ + /* This is equivalent to C program below. + * + * int *a, *b, *c, *d, *e, *f; + * int i, sum = 0; + * bpf_for(i, 0, 10) { + * a = bpf_map_lookup_elem(&amap, &i); + * b = bpf_map_lookup_elem(&amap, &i); + * c = bpf_map_lookup_elem(&amap, &i); + * d = bpf_map_lookup_elem(&amap, &i); + * e = bpf_map_lookup_elem(&amap, &i); + * f = bpf_map_lookup_elem(&amap, &i); + * if (a) sum += 1; + * if (b) sum += 1; + * if (c) sum += 1; + * if (d) sum += 1; + * if (e) sum += 1; + * if (f) sum += 1; + * } + * return 0; + * + * The body of the loop spawns multiple simulation paths + * with different combination of NULL/non-NULL information for a/b/c/d/e/f. + * Each combination is unique from states_equal() point of view. + * Explored states checkpoint is created after each iterator next call. + * Iterator convergence logic expects that eventually current state + * would get equal to one of the explored states and thus loop + * exploration would be finished (at-least for a specific path). + * Verifier evicts explored states with high miss to hit ratio + * to to avoid comparing current state with too many explored + * states per instruction. + * This test is designed to "stress test" eviction policy defined using formula: + * + * sl->miss_cnt > sl->hit_cnt * N + N // if true sl->state is evicted + * + * Currently N is set to 64, which allows for 6 variables in this test. + */ + asm volatile ( + "r6 = 0;" /* a */ + "r7 = 0;" /* b */ + "r8 = 0;" /* c */ + "*(u64 *)(r10 - 24) = r6;" /* d */ + "*(u64 *)(r10 - 32) = r6;" /* e */ + "*(u64 *)(r10 - 40) = r6;" /* f */ + "r9 = 0;" /* sum */ + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "loop_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto loop_end_%=;" + + "*(u64 *)(r10 - 16) = r0;" + + "r1 = %[amap] ll;" + "r2 = r10;" + "r2 += -16;" + "call %[bpf_map_lookup_elem];" + "r6 = r0;" + + "r1 = %[amap] ll;" + "r2 = r10;" + "r2 += -16;" + "call %[bpf_map_lookup_elem];" + "r7 = r0;" + + "r1 = %[amap] ll;" + "r2 = r10;" + "r2 += -16;" + "call %[bpf_map_lookup_elem];" + "r8 = r0;" + + "r1 = %[amap] ll;" + "r2 = r10;" + "r2 += -16;" + "call %[bpf_map_lookup_elem];" + "*(u64 *)(r10 - 24) = r0;" + + "r1 = %[amap] ll;" + "r2 = r10;" + "r2 += -16;" + "call %[bpf_map_lookup_elem];" + "*(u64 *)(r10 - 32) = r0;" + + "r1 = %[amap] ll;" + "r2 = r10;" + "r2 += -16;" + "call %[bpf_map_lookup_elem];" + "*(u64 *)(r10 - 40) = r0;" + + "if r6 == 0 goto +1;" + "r9 += 1;" + "if r7 == 0 goto +1;" + "r9 += 1;" + "if r8 == 0 goto +1;" + "r9 += 1;" + "r0 = *(u64 *)(r10 - 24);" + "if r0 == 0 goto +1;" + "r9 += 1;" + "r0 = *(u64 *)(r10 - 32);" + "if r0 == 0 goto +1;" + "r9 += 1;" + "r0 = *(u64 *)(r10 - 40);" + "if r0 == 0 goto +1;" + "r9 += 1;" + + "goto loop_%=;" + "loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + : + : __imm(bpf_map_lookup_elem), + __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy), + __imm_addr(amap) + : __clobber_all + ); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/iters_task_vma.c b/tools/testing/selftests/bpf/progs/iters_task_vma.c index 44edecfdfaee9e..e085a51d153ee8 100644 --- a/tools/testing/selftests/bpf/progs/iters_task_vma.c +++ b/tools/testing/selftests/bpf/progs/iters_task_vma.c @@ -30,6 +30,7 @@ int iter_task_vma_for_each(const void *ctx) bpf_for_each(task_vma, vma, task, 0) { if (seen >= 1000) break; + barrier_var(seen); vm_ranges[seen].vm_start = vma->vm_start; vm_ranges[seen].vm_end = vma->vm_end;