diff --git a/assertion/function/preprocess/cfg.go b/assertion/function/preprocess/cfg.go index 28c28cdc..daf25a1f 100644 --- a/assertion/function/preprocess/cfg.go +++ b/assertion/function/preprocess/cfg.go @@ -55,7 +55,14 @@ func (p *Preprocessor) CFG(graph *cfg.CFG, funcDecl *ast.FuncDecl) *cfg.CFG { failureBlock := &cfg.Block{Index: int32(len(graph.Blocks))} graph.Blocks = append(graph.Blocks, failureBlock) - // Perform the (series of) CFG transformations. + // Perform a series of CFG transformations here (for hooks and canonicalization). The order of + // these transformations matters due to canonicalization. Some transformations may expect the + // CFG to be in canonical form, and some transformations may change the CFG structure in a way + // that it needs to be re-canonicalized. + + // split blocks do not require the CFG to be in canonical form, and it may modify the CFG + // structure in a way that it needs to be re-canonicalized. Here, we cleverly bundles the two + // operations together such that we only need to run canonicalization once. for _, block := range graph.Blocks { if block.Live { p.splitBlockOnTrustedFuncs(graph, block, failureBlock) @@ -63,7 +70,15 @@ func (p *Preprocessor) CFG(graph *cfg.CFG, funcDecl *ast.FuncDecl) *cfg.CFG { } for _, block := range graph.Blocks { if block.Live { - p.restructureConditional(graph, block) + p.canonicalizeConditional(graph, block) + } + } + // Replacing conditionals in the CFG requires the CFG to be in canonical form (such that it + // does not have to handle "trustedFunc() && trustedFunc()"), and it will canonicalize the + // modified block by itself. + for _, block := range graph.Blocks { + if block.Live { + p.replaceConditional(graph, block) } } @@ -119,6 +134,10 @@ func copyGraph(graph *cfg.CFG) *cfg.CFG { return newGraph } +// splitBlockOnTrustedFuncs splits the CFG block into two parts upon seeing a trusted function +// from the hook framework (e.g., "require.Nil(t, arg)" to "if arg == nil { }". +// This does not expect the CFG to be in canonical form, and it may change the CFG structure in a +// way that it needs to be re-canonicalized. func (p *Preprocessor) splitBlockOnTrustedFuncs(graph *cfg.CFG, thisBlock, failureBlock *cfg.Block) { for i, node := range thisBlock.Nodes { expr, ok := node.(*ast.ExprStmt) @@ -153,7 +172,39 @@ func (p *Preprocessor) splitBlockOnTrustedFuncs(graph *cfg.CFG, thisBlock, failu } } -func (p *Preprocessor) restructureConditional(graph *cfg.CFG, thisBlock *cfg.Block) { +// replaceConditional calls the hook functions and replaces the conditional expressions in the CFG +// with the returned equivalent expression for analysis. +// +// This function expects the CFG to be in canonical form to fully function (otherwise it may miss +// cases like "trustedFunc() && trustedFunc()"). +// +// It also calls canonicalizeConditional to canonicalize the transformed block such that the CFG +// is still canonical. +func (p *Preprocessor) replaceConditional(graph *cfg.CFG, block *cfg.Block) { + // We only replace conditionals on branching blocks. + if len(block.Nodes) == 0 || len(block.Succs) != 2 { + return + } + call, ok := block.Nodes[len(block.Nodes)-1].(*ast.CallExpr) + if !ok { + return + } + replaced := hook.ReplaceConditional(p.pass, call) + if replaced == nil { + return + } + + block.Nodes[len(block.Nodes)-1] = replaced + // The returned expression may be a binary expression, so we need to canonicalize the CFG again + // after such replacement. + p.canonicalizeConditional(graph, block) +} + +// canonicalizeConditional canonicalizes the conditional CFG structures to make it easier to reason +// about control flows later. For example, it rewrites +// `if !cond {T} {F}` to `if cond {F} {T}` (swap successors), and rewrites +// `if cond1 && cond2 {T} {F}` to `if cond1 {if cond2 {T} else {F}}{F}` (nesting). +func (p *Preprocessor) canonicalizeConditional(graph *cfg.CFG, thisBlock *cfg.Block) { // We only restructure non-empty branching blocks. if len(thisBlock.Nodes) == 0 || len(thisBlock.Succs) != 2 { return @@ -172,23 +223,18 @@ func (p *Preprocessor) restructureConditional(graph *cfg.CFG, thisBlock *cfg.Blo if !ok { return } - if call, ok := cond.(*ast.CallExpr); ok { - if replaced := hook.ReplaceConditional(p.pass, call); replaced != nil { - replaceCond(replaced) - } - } switch cond := cond.(type) { case *ast.ParenExpr: // if a parenexpr, strip and restart - this is done with recursion to account for ((((x)))) case replaceCond(cond.X) - p.restructureConditional(graph, thisBlock) // recur within parens + p.canonicalizeConditional(graph, thisBlock) // recur within parens case *ast.UnaryExpr: if cond.Op == token.NOT { // swap successors - i.e. swap true and false branches swapTrueFalseBranches() replaceCond(cond.X) - p.restructureConditional(graph, thisBlock) // recur within NOT + p.canonicalizeConditional(graph, thisBlock) // recur within NOT } case *ast.BinaryExpr: // Logical AND and Logical OR actually require the exact same short circuiting behavior @@ -209,8 +255,8 @@ func (p *Preprocessor) restructureConditional(graph *cfg.CFG, thisBlock *cfg.Blo replaceFalseBranch(newBlock) } graph.Blocks = append(graph.Blocks, newBlock) - p.restructureConditional(graph, thisBlock) - p.restructureConditional(graph, newBlock) + p.canonicalizeConditional(graph, thisBlock) + p.canonicalizeConditional(graph, newBlock) } // Standardize binary expressions to be of the form `expr OP literal` by swapping `x` and `y`, if `x` is a literal. @@ -272,8 +318,8 @@ func (p *Preprocessor) restructureConditional(graph *cfg.CFG, thisBlock *cfg.Blo Op: token.NOT, X: x, } - replaceCond(newCond) // replaces `ok != true` with `!ok` - p.restructureConditional(graph, thisBlock) // recur to swap true and false branches for the unary expr `!ok` + replaceCond(newCond) // replaces `ok != true` with `!ok` + p.canonicalizeConditional(graph, thisBlock) // recur to swap true and false branches for the unary expr `!ok` } case token.EQL: @@ -287,8 +333,8 @@ func (p *Preprocessor) restructureConditional(graph *cfg.CFG, thisBlock *cfg.Blo Op: token.NOT, X: x, } - replaceCond(newCond) // replaces `ok == false` with `!ok` - p.restructureConditional(graph, thisBlock) // recur to swap true and false branches for the unary expr `!ok` + replaceCond(newCond) // replaces `ok == false` with `!ok` + p.canonicalizeConditional(graph, thisBlock) // recur to swap true and false branches for the unary expr `!ok` } } } diff --git a/hook/replace_conditional.go b/hook/replace_conditional.go index f0379735..ca711089 100644 --- a/hook/replace_conditional.go +++ b/hook/replace_conditional.go @@ -17,16 +17,26 @@ import ( func ReplaceConditional(pass *analysis.Pass, call *ast.CallExpr) ast.Expr { for sig, act := range _replaceConditionals { if sig.match(pass, call) { - return act(call, pass) + return act(pass, call) } } return nil } -type replaceConditionalAction func(call *ast.CallExpr, p *analysis.Pass) ast.Expr +type replaceConditionalAction func(pass *analysis.Pass, call *ast.CallExpr) ast.Expr -// _errorAsAction replaces a call to `errors.As(err, &target)` with the expression `target != nil`. -var _errorAsAction replaceConditionalAction = func(call *ast.CallExpr, p *analysis.Pass) ast.Expr { +// _errorAsAction replaces a call to `errors.As(err, &target)` with an equivalent expression +// `errors.As(err, &target) && target != nil`. Keeping the `errors.As(err, &target)` is important +// since `err` may contain complex expressions that may have nilness issues. +// +// Note that technically `target` can still be nil even if `errors.As(err, &target)` is true. For +// example, if err is a typed nil (e.g., `var err *exec.ExitError`), then `errors.As` would +// actually find a match, but `target` would be set to the typed nil value, resulting in a `nil` +// target. However, in practice this should rarely happen such that even the official documentation +// assumes the target is non-nil after such check [1]. So here we make this assumption as well. +// +// [1] https://pkg.go.dev/errors#As +var _errorAsAction replaceConditionalAction = func(pass *analysis.Pass, call *ast.CallExpr) ast.Expr { if len(call.Args) != 2 { return nil } @@ -37,7 +47,12 @@ var _errorAsAction replaceConditionalAction = func(call *ast.CallExpr, p *analys if unaryExpr.Op != token.AND { return nil } - return newNilBinaryExpr(unaryExpr.X, token.NEQ) + return &ast.BinaryExpr{ + X: call, + Op: token.LAND, + OpPos: call.Pos(), + Y: newNilBinaryExpr(unaryExpr.X, token.NEQ), + } } var _replaceConditionals = map[trustedFuncSig]replaceConditionalAction{ diff --git a/testdata/src/go.uber.org/testing/trustedfuncs.go b/testdata/src/go.uber.org/testing/trustedfuncs.go index f30547a0..1880f006 100644 --- a/testdata/src/go.uber.org/testing/trustedfuncs.go +++ b/testdata/src/go.uber.org/testing/trustedfuncs.go @@ -958,10 +958,42 @@ func testEmpty(t *testing.T, i int, a []int, mp map[int]*int) interface{} { return 0 } -func errorsAs(err error) { - var exitErr *exec.ExitError - if errors.As(err, &exitErr) { - exitErr.Exited() +// nilable(err) +func errorsAs(err error, num string, dummy bool) { + switch num { + case "simple": + var exitErr *exec.ExitError + if errors.As(err, &exitErr) { + print(*exitErr) + } + print(*exitErr) //want "unassigned variable `exitErr` dereferenced" + case "not in if block": + var exitErr *exec.ExitError + // Not checking the result of `errors.As` would not guard the variable. + errors.As(err, &exitErr) + print(*exitErr) //want "unassigned variable `exitErr` dereferenced" + case "two errors connected by AND": + var exitErr, anotherErr *exec.ExitError + if errors.As(err, &exitErr) && errors.As(err, &anotherErr) { + print(*exitErr) + print(*anotherErr) + } + case "errors.As with other conditionals connected by AND": + var exitErr *exec.ExitError + if errors.As(err, &exitErr) && dummy { + print(*exitErr) + } + case "errors.As with other conditionals connected by OR": + var exitErr *exec.ExitError + if errors.As(err, &exitErr) || dummy { + print(*exitErr) //want "unassigned variable `exitErr` dereferenced" + } + case "two errors connected by OR": + var exitErr, anotherErr *exec.ExitError + if errors.As(err, &exitErr) || errors.As(err, &anotherErr) { + // We do not know the nilability of either. + print(*exitErr) //want "unassigned variable `exitErr` dereferenced" + print(*anotherErr) //want "unassigned variable `anotherErr` dereferenced" + } } - print(exitErr.Exited()) }