diff --git a/lib/bap/bap.mli b/lib/bap/bap.mli index 48eb8c3e7..22f5588a9 100644 --- a/lib/bap/bap.mli +++ b/lib/bap/bap.mli @@ -1959,7 +1959,7 @@ module Std : sig (** Bil is an instance of Domain. A flat domain with the empty Bil program being the empty element. - *) + *) val domain : stmt list Knowledge.domain (** Instance of the persistence class *) @@ -6574,9 +6574,19 @@ module Std : sig This is a graph where all information is distilled to term identifiers and relations between them, that are also labeled with term identifiers. *) - module Tid : Graph with type node = tid - and type Node.label = tid - and type Edge.label = tid + module Tid : sig + type node = tid + + (** [start] is a pseudo node that is used as the entry node of a graph. *) + val start : node + + (** [exit] is a pseudo node that is used as the exit node of a graph. *) + val exit : node + include Graph with type node := tid + and type Node.label = tid + and type Edge.label = tid + end + end (** Disassembled program. @@ -7294,12 +7304,24 @@ module Std : sig dominators tree is used. *) val free_vars : t -> Var.Set.t - (** [to_graph sub] builds a graph of subroutine [sub]. Graph nodes - are block term identifiers, and edges are labeled with term - identifiers of the jmp terms, that corresponds to the edge. - This representation is useful, if you need to compute some - graph relation on a subroutine, that will be later used to - perform its incremental transformation. *) + (** [to_graph sub] builds a graph of subroutine [sub]. + + Graph nodes are block term identifiers and edges are labeled + with term identifiers of the jmp terms that correspond to + the given edge. + + @since 2.1 the returned graph contains two pseudo-nodes + [Graphs.Tid.start] and [Graphs.Tid.exit] so that all nodes + that has in-degree [0] or that start a strongly connected + component are connected to the [start] node (the same for + [exit] but on the reversed graph. + + Edges from [start] to other nodes are labeled with the + [Graphs.Tid.start] tid. + + Edges from nodes to the [exit] node are labeled with the + [Graphs.Tid.exit] tid. + *) val to_graph : t -> Graphs.Tid.t (** [to_cfg sub] builds a graph representation of a subroutine @@ -7312,6 +7334,21 @@ module Std : sig operation is just a projection, i.e., it has O(0) complexity. *) val of_cfg : Graphs.Ir.t -> t + + (** [compute_liveness sub] computes a set of live variables for each block. + + For a block [b] and solution [s = compute_liveness sub], + [Solution.get s (Term.tid b)] is a set of variables that are + live at the _exit_ from this block. + + A set of variables that are live (free) in the + whole subroutine is the set of variables that are live at the + [Graphs.Tid.start] node. + + @since 2.1 + *) + val compute_liveness : t -> (tid, Var.Set.t) Solution.t + (** other names for the given subroutine.*) val aliases : string list tag @@ -7675,25 +7712,25 @@ module Std : sig (** [reify ()] reifies inputs into a jump term. - Calls and interrupt subroutines invocations are represented - with two edges: the normal edge (denoted [dst]) is the - intra-procedural edge which connects the callsite with the - fall-through destination (if such exists) and an alternative - destination (denoted with [alt]) which represents an - inter-procedural destination between the callsite and the - call destination. + Calls and interrupt subroutines invocations are represented + with two edges: the normal edge (denoted [dst]) is the + intra-procedural edge which connects the callsite with the + fall-through destination (if such exists) and an alternative + destination (denoted with [alt]) which represents an + inter-procedural destination between the callsite and the + call destination. - @param cnd is a core theory term that denotes the - guard condition of a conditional jump. + @param cnd is a core theory term that denotes the + guard condition of a conditional jump. - @param alt is the alternative control flow destination. + @param alt is the alternative control flow destination. - @param dst is the direct control flow destination + @param dst is the direct control flow destination - @tid is the jump identifier, if not specified a fresh - new identifier is created. + @tid is the jump identifier, if not specified a fresh + new identifier is created. - *) + *) val reify : ?tid:tid -> ?cnd:Theory.Bool.t Theory.value -> ?alt:dst -> ?dst:dst -> unit -> t @@ -8233,7 +8270,7 @@ module Std : sig (** A factory of rooters. Useful to register custom rooters *) module Factory : Source.Factory.S with type t = t - [@@deprecated "[since 2019-05] use [provide]"] + [@@deprecated "[since 2019-05] use [provide]"] end @@ -8264,7 +8301,7 @@ module Std : sig val provide : t -> unit module Factory : Source.Factory.S with type t = t - [@@deprecated "[since 2019-05] use [provide]"] + [@@deprecated "[since 2019-05] use [provide]"] end diff --git a/lib/bap_sema/bap_sema.ml b/lib/bap_sema/bap_sema.ml index 1245a46bc..3f7168c5d 100644 --- a/lib/bap_sema/bap_sema.ml +++ b/lib/bap_sema/bap_sema.ml @@ -34,6 +34,7 @@ module Std = struct let ssa = Ssa.sub let is_ssa = Ssa.is_transformed let free_vars = FV.free_vars_of_sub + let compute_liveness = FV.compute_liveness end module Taint = Bap_sema_taint diff --git a/lib/bap_sema/bap_sema_free_vars.ml b/lib/bap_sema/bap_sema_free_vars.ml index f4b27fcc9..49b7b4bbd 100644 --- a/lib/bap_sema/bap_sema_free_vars.ml +++ b/lib/bap_sema/bap_sema_free_vars.ml @@ -4,7 +4,7 @@ open Graphlib.Std open Bap_ir module Ssa = Bap_sema_ssa -module G = Bap_ir_graph +module G = Bap_tid_graph let (++) = Set.union and (--) = Set.diff let blk = G.Node.label @@ -20,93 +20,38 @@ let defined_by_blk b = | `Def def -> Set.add kill @@ Ir_def.lhs def | `Jmp _ -> kill) -let free_vars_of_dom_tree dom root = - let rec bfs (vars,kill) root = - let cs = Tree.children dom root in - let vars = vars ++ Seq.fold cs ~init:Var.Set.empty ~f:(fun vars c -> - Ir_blk.free_vars (blk c) -- kill ++ vars) in - let kill = kill ++ defined_by_blk (blk root) in - Seq.fold cs ~init:(vars,kill) ~f:bfs in - fst @@ bfs (Var.Set.empty,Var.Set.empty) root - -let dom_free_vars sub = - match Term.first blk_t sub with - | None -> Var.Set.empty - | Some entry -> - let entry = G.Node.create entry in - let cfg = G.of_sub sub in - let dom = Graphlib.dominators (module G) cfg entry in - free_vars_of_dom_tree dom entry +type blk_transfer = { + defs : Var.Set.t; + uses : Var.Set.t; +} + +let blk_defs blk = + Term.enum def_t blk |> + Seq.fold ~init:Var.Set.empty ~f:(fun defs def -> + Set.add defs (Ir_def.lhs def)) + +let block_transitions sub = + Term.enum blk_t sub |> + Seq.fold ~init:Tid.Map.empty ~f:(fun fs blk -> + Map.add_exn fs (Term.tid blk) { + defs = blk_defs blk; + uses = Ir_blk.free_vars blk; + }) + +let compute_liveness sub = + let g = G.create sub in + let init = Solution.create Tid.Map.empty Var.Set.empty in + let tran = block_transitions sub in + Graphlib.fixpoint (module G) ~init ~start:G.exit ~rev:true g + ~merge:Set.union + ~equal:Var.Set.equal + ~f:(fun n vars -> + if Tid.equal n G.exit || Tid.equal n G.start then vars + else + let {defs; uses} = Map.find_exn tran n in + vars -- defs ++ uses) let free_vars_of_sub sub = if Ssa.is_transformed sub then ssa_free_vars sub - else dom_free_vars sub - -let has_sub_exp x = Exp.exists (object - inherit [unit] Exp.finder - method! enter_exp exp search = - if Exp.equal exp x then search.return (Some ()) - else search - end) - -let substitute_exp x y = Exp.map (object - inherit Exp.mapper - method! map_exp exp = - if Exp.equal exp x then y else x - end) - -let substitute blk x y = - Ir_blk.elts blk |> Seq.fold ~init:(false,blk) - ~f:(fun (finished,blk) elt -> - if finished then finished,blk else match elt with - | `Phi phi -> - let exps = Ir_phi.values phi |> Seq.map ~f:snd in - if Seq.exists exps ~f:(has_sub_exp x) then - let phi = Ir_phi.map_exp phi ~f:(substitute_exp x y) in - true,Term.update phi_t blk phi - else false,blk - | `Def def -> - if has_sub_exp x (Ir_def.rhs def) then - let def = Ir_def.map_exp def ~f:(substitute_exp x y) in - true, Term.update def_t blk def - else false,blk - | `Jmp jmp -> - if Seq.exists (Ir_jmp.exps jmp) ~f:(has_sub_exp x) then - let jmp = Ir_jmp.map_exp jmp ~f:(substitute_exp x y) in - true, Term.update jmp_t blk jmp - else false,blk) - -exception Finished of sub term - -let finish sub blk = - Exn.raise_without_backtrace (Finished (Term.update blk_t sub blk)) - -let dom_bind_arg dom root sub (var,exp) = - let x = Bil.var var in - let rec bfs root = - let subst root = - let blk = Term.find_exn blk_t sub root in - let finished,blk = substitute blk x exp in - if finished then finish sub blk in - subst root; - Seq.iter (Tree.children dom root) ~f:subst; - Seq.iter (Tree.children dom root) ~f:bfs in - try bfs root; sub with Finished sub -> sub - -let dom_bind_args sub entry args = - let module G = Bap_tid_graph in - let entry = Term.tid entry in - let cfg = G.create sub in - let dom = Graphlib.dominators (module G) cfg entry in - Seq.fold args ~init:sub ~f:(dom_bind_arg dom entry) - -(* we do not provide algorithm that will take advantage of SSA form, - since the latter will work only for variables, but the DOM tree - algorithm will work correctly for any kind of expression. *) -let bind_args sub = match Term.first blk_t sub with - | None -> sub - | Some entry -> - Term.enum arg_t sub |> - Seq.map ~f:(fun arg -> Ir_arg.(lhs arg, rhs arg)) |> - dom_bind_args sub entry + else Solution.get (compute_liveness sub) G.start diff --git a/lib/bap_sema/bap_sema_free_vars.mli b/lib/bap_sema/bap_sema_free_vars.mli index 050d8f0b9..05ab2f3ba 100644 --- a/lib/bap_sema/bap_sema_free_vars.mli +++ b/lib/bap_sema/bap_sema_free_vars.mli @@ -1,5 +1,6 @@ open Bap_types.Std +open Graphlib.Std open Bap_ir +val compute_liveness : sub term -> (tid, Var.Set.t) Solution.t val free_vars_of_sub : sub term -> Var.Set.t -val bind_args : sub term -> sub term diff --git a/lib/bap_sema/bap_sema_ssa.ml b/lib/bap_sema/bap_sema_ssa.ml index 7baa9d626..7eeff4494 100644 --- a/lib/bap_sema/bap_sema_ssa.ml +++ b/lib/bap_sema/bap_sema_ssa.ml @@ -26,7 +26,6 @@ type state = { cfg : Cfg.t; dom : tid tree; frontier : tid frontier; - entry : tid; vars : Var.Set.t; } @@ -55,16 +54,18 @@ let iterated_frontier f blks = fixpoint Tid.Set.empty let blk_of_tid sub tid = match Term.find blk_t sub tid with - | Some blk -> blk + | Some blk -> Some blk | None -> - failwithf - "Internal error. Broken invariant in subroutine %s: \ - A term %a is missing" (Ir_sub.name sub) Tid.pps tid - () + if Tid.equal Cfg.start tid || Tid.equal Cfg.exit tid + then None + else failwithf + "Internal error. Broken invariant in subroutine %s: \ + A term %a is missing" (Ir_sub.name sub) Tid.pps tid + () let succs cfg sub tid = - Cfg.Node.succs tid cfg |> Seq.map ~f:(blk_of_tid sub) + Cfg.Node.succs tid cfg |> Seq.filter_map ~f:(blk_of_tid sub) (** [collect_vars] traverses through subroutine [sub] and collects variables, that are live across multiple blocks. *) @@ -92,11 +93,6 @@ let substitute vars = (object | Some (d :: _) -> d end)#map_exp -let blk sub tid = - match Term.find blk_t sub tid with - | Some blk -> blk - | _ -> failwithf "failed to get block %a" Tid.str tid () - (** [rename t] performs a renaming of variables in a subroutine [t.sub]. An algorithm is described in section 19.7 of [[2]] and 9.12 of [[3]] (but there is a small error in the latter). The only @@ -130,8 +126,7 @@ let rename t = Ir_def.with_rhs (Ir_def.with_lhs def lhs) rhs) in let rename_jmps blk = Term.map jmp_t blk ~f:(Ir_jmp.map_exp ~f:(substitute vars)) in - let update_phis src dst = - let tid = Term.tid src in + let update_phis tid dst = Term.map phi_t dst ~f:(fun phi -> Ir_phi.values phi |> Seq.fold ~init:phi ~f:(fun phi rhs -> match rhs with @@ -147,21 +142,21 @@ let rename t = Term.enum def_t blk |> Seq.iter ~f:(fun def -> pop (Ir_def.lhs def)) in - let rec rename_block sub blk' = - let tid = Term.tid blk' in - let blk' = blk sub tid in - let blk = blk' |> rename_phis |> rename_defs |> rename_jmps in - let sub = Term.update blk_t sub blk in + let rec rename_block sub' tid = + let sub = match blk_of_tid sub' tid with + | None -> sub' + | Some blk -> + blk |> rename_phis |> rename_defs |> rename_jmps |> + Term.update blk_t sub' in let sub = succs t.cfg sub tid |> Seq.fold ~init:sub ~f:(fun sub dst -> - Term.update blk_t sub (update_phis blk dst)) in + Term.update blk_t sub (update_phis tid dst)) in let children = Cfg.nodes t.cfg |> - Seq.filter ~f:(Tree.is_child_of ~parent:tid t.dom) |> - Seq.map ~f:(blk_of_tid sub) in + Seq.filter ~f:(Tree.is_child_of ~parent:tid t.dom) in let sub = Seq.fold children ~init:sub ~f:rename_block in - pop_defs blk'; + Option.iter (blk_of_tid sub' tid) ~f:pop_defs; sub in - rename_block t.sub (blk_of_tid t.sub t.entry) + rename_block t.sub Cfg.start let has_phi_for_var blk x = Term.enum phi_t blk |> Seq.exists ~f:(fun phi -> @@ -181,37 +176,25 @@ let insert_phi_node ins blk x = let insert_phi_nodes t : sub term = Set.fold t.vars ~init:t.sub ~f:(fun sub x -> let bs = blocks_that_define_var x sub in - iterated_frontier t.frontier (t.entry :: bs) |> + iterated_frontier t.frontier (Cfg.start :: bs) |> Set.fold ~init:sub ~f:(fun sub tid -> - let blk = blk_of_tid sub tid in - let ins = Cfg.Node.preds tid t.cfg |> - Seq.map ~f:(blk_of_tid sub) in - Term.update blk_t sub (insert_phi_node ins blk x))) + match blk_of_tid sub tid with + | None -> sub + | Some blk -> + let ins = Cfg.Node.preds tid t.cfg |> + Seq.filter_map ~f:(blk_of_tid sub) in + Term.update blk_t sub (insert_phi_node ins blk x))) let is_transformed sub = Term.has_attr sub ssa_form (** transforms subroutine into a semi-pruned SSA form. *) let sub sub = - match Term.first blk_t sub with - | Some entry when not (is_transformed sub) -> - let entry = Term.tid entry in + if not (is_transformed sub) then let cfg = Cfg.create sub in - let cfg,sub,entry = - if Cfg.Node.degree ~dir:`In entry cfg = 0 - then cfg,sub,entry - else - let blk = Ir_blk.create () in - let jmp = Ir_jmp.create_goto (Direct entry) in - let blk = Term.append jmp_t blk jmp in - let sub = Term.prepend blk_t sub blk in - let entry' = Cfg.Node.create (Term.tid blk) in - let edge = Cfg.Edge.create entry' entry (Term.tid jmp) in - let cfg = Cfg.Edge.insert edge cfg in - cfg,sub,entry' in let vars = collect_vars sub in - let dom = Graphlib.dominators (module Cfg) cfg entry in + let dom = Graphlib.dominators (module Cfg) cfg Cfg.start in let frontier = Graphlib.dom_frontier (module Cfg) cfg dom in - let t = {entry; dom; frontier; cfg; sub; vars} in + let t = {dom; frontier; cfg; sub; vars} in let sub = rename {t with sub = insert_phi_nodes t} in Term.set_attr sub ssa_form () - | _ -> sub + else sub diff --git a/lib/bap_types/bap_ir.ml b/lib/bap_types/bap_ir.ml index 42809966d..e36e5881c 100644 --- a/lib/bap_types/bap_ir.ml +++ b/lib/bap_types/bap_ir.ml @@ -831,7 +831,7 @@ module Ir_phi = struct (String.concat ~sep:", " @@ List.map ~f:(fun (id,exp) -> let exp = Rhs.exp exp in - Format.asprintf "[%a, %%%a]" Bap_exp.pp exp Tid.pp id) + Format.asprintf "[%a, %a]" Bap_exp.pp exp Tid.pp id) (Map.to_alist map)) let pp_self_slots ds ppf {Phi.var; map} = diff --git a/lib/bap_types/bap_tid_graph.ml b/lib/bap_types/bap_tid_graph.ml index 58bd83765..8473dd3e7 100644 --- a/lib/bap_types/bap_tid_graph.ml +++ b/lib/bap_types/bap_tid_graph.ml @@ -2,20 +2,49 @@ open Core_kernel open Regular.Std open Graphlib.Std open Bap_ir -open Bap_ir_graph + module G = Graphlib.Make(Tid)(Tid) -let create sub = +let of_sub sub = Term.enum blk_t sub |> Seq.fold ~init:G.empty ~f:(fun g src -> let sid = Term.tid src in let g = G.Node.insert sid g in Term.enum jmp_t src |> Seq.fold ~init:g ~f:(fun g jmp -> - match succ_tid_of_jmp jmp with + match Bap_ir_graph.succ_tid_of_jmp jmp with | None -> g | Some did -> let jid = Term.tid jmp in let edge = G.Edge.create sid did jid in G.Edge.insert edge g)) +let start = Tid.for_name "%start-pseudo-node%" +let exit = Tid.for_name "%exit-pseudo-node%" + +let connect_with_exit n = + if Tid.equal n exit then ident + else G.Edge.insert (G.Edge.create n exit exit) + +let connect_with_start n = + if Tid.equal n start then ident + else + G.Edge.insert @@ + G.Edge.create start n start + +let create sub = + let g = of_sub sub in + G.nodes g |> Seq.fold ~init:g ~f:(fun g n -> + if G.Node.degree ~dir:`Out n g = 0 + then connect_with_exit n g else + if G.Node.degree ~dir:`In n g = 0 + then connect_with_start n g + else g) |> fun g -> + Graphlib.depth_first_search (module G) g + ~init:g ~start + ~start_tree:connect_with_start + |> fun g -> + Graphlib.depth_first_search (module G) g + ~rev:true ~init:g ~start:exit + ~start_tree:connect_with_exit + include G diff --git a/lib/bap_types/bap_tid_graph.mli b/lib/bap_types/bap_tid_graph.mli index 1e951c71b..61cde1116 100644 --- a/lib/bap_types/bap_tid_graph.mli +++ b/lib/bap_types/bap_tid_graph.mli @@ -5,5 +5,6 @@ include Graph with type node = tid and type Node.label = tid and type Edge.label = tid - +val start : node +val exit : node val create : sub term -> t