From fbffee7a3d44256b4e567ec97ab2b4168fe3d956 Mon Sep 17 00:00:00 2001 From: Caleb Helbling Date: Tue, 6 Aug 2024 17:29:03 -0400 Subject: [PATCH 01/14] Added initial code for underconstrained execution. Registers are currently underconstrained correctly, however memory is not. Added cmp_simple_cond_underconstrained. --- .idea/cozy.iml | 2 +- .idea/misc.xml | 2 +- cozy/analysis.py | 23 +++- cozy/concretization_strategies.py | 9 ++ cozy/project.py | 4 +- cozy/session.py | 134 +++++++++++++++---- examples/cmp_simple_cond_underconstrained.py | 19 +++ 7 files changed, 157 insertions(+), 36 deletions(-) create mode 100644 cozy/concretization_strategies.py create mode 100644 examples/cmp_simple_cond_underconstrained.py diff --git a/.idea/cozy.iml b/.idea/cozy.iml index 3cc2dd7..eec49a9 100644 --- a/.idea/cozy.iml +++ b/.idea/cozy.iml @@ -4,7 +4,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index d12e803..89f47df 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,5 +3,5 @@ - + \ No newline at end of file diff --git a/cozy/analysis.py b/cozy/analysis.py index 9789113..5a308f9 100644 --- a/cozy/analysis.py +++ b/cozy/analysis.py @@ -231,7 +231,8 @@ def difference(self, compute_reg_diff=True, compute_side_effect_diff=True, use_unsat_core=True, - simplify=False) -> DiffResult | None: + simplify=False, + extra_constraints=[]) -> DiffResult | None: """ Compares two states to find differences in memory. This function will return None if the two states have\ non-intersecting inputs. Otherwise, it will return a dict of addresses and a dict of registers which are\ @@ -271,6 +272,9 @@ def difference(self, joint_solver.add(sl.solver.constraints) joint_solver.add(sr.solver.constraints) + for constraint in extra_constraints: + joint_solver.add(constraint) + try: is_sat = joint_solver.satisfiable() except claripy.ClaripyZ3Error as err: @@ -574,6 +578,17 @@ def __init__(self, pre_patched: RunResult, post_patched: RunResult, ignore_addrs total_num_pairs = len(states_pre_patched) * len(states_post_patched) count = 0 + extra_constraints = [] + + # If we use underconstrained symbolic execution, the initial register states for the two executions + # will not be the same. So let's go ahead and construct constraints that inform the SMT solver that the + # registers are the same at the start. + if pre_patched.initial_registers is not None and post_patched.initial_registers is not None: + for reg_name in pre_patched.initial_registers.keys(): + value_left = pre_patched.initial_registers[reg_name] + value_right = post_patched.initial_registers[reg_name] + extra_constraints.append(value_left == value_right) + for (i, state_pre) in enumerate(states_pre_patched): for (j, state_post) in enumerate(states_post_patched): count += 1 @@ -589,12 +604,14 @@ def __init__(self, pre_patched: RunResult, post_patched: RunResult, ignore_addrs is_deadended_comparison = isinstance(state_pre, DeadendedState) and isinstance(state_post, DeadendedState) diff = memoized_diff.difference( - state_pre.state, state_post.state, pair_ignore_addrs, + state_pre.state, state_post.state, + pair_ignore_addrs, compute_mem_diff=compare_memory if is_deadended_comparison else False, compute_reg_diff=compare_registers if is_deadended_comparison else False, compute_side_effect_diff=compare_side_effects if is_deadended_comparison else False, use_unsat_core=use_unsat_core, - simplify=simplify + simplify=simplify, + extra_constraints=extra_constraints ) if diff is not None: diff --git a/cozy/concretization_strategies.py b/cozy/concretization_strategies.py new file mode 100644 index 0000000..8ecb778 --- /dev/null +++ b/cozy/concretization_strategies.py @@ -0,0 +1,9 @@ +import angr + +class SimConcretizationStrategyNorepeatsRangeMin(angr.concretization_strategies.SimConcretizationStrategyNorepeatsRange): + def _any(self, *args, **kwargs): + # The default SimConcretizationStrategyNorepeatsRange class chooses a concrete address using the _any + # method. By using this class, we redirect the _any call to _min instead. This ensures that the concretized + # address is low in memory, guaranteeing sufficient space for member contents of the object that may + # be located at the symbolic address. + return self._min(*args, **kwargs) \ No newline at end of file diff --git a/cozy/project.py b/cozy/project.py index d6b6766..14bfb8b 100644 --- a/cozy/project.py +++ b/cozy/project.py @@ -70,7 +70,7 @@ def add_prototype(self, fun: str | int, fun_prototype: str) -> None: """ self.fun_prototypes[fun] = fun_prototype - def session(self, start_fun: str | int | None=None) -> Session: + def session(self, start_fun: str | int | None=None, underconstrained_execution: bool=False) -> Session: """ Returns a new session derived from this project. @@ -78,7 +78,7 @@ def session(self, start_fun: str | int | None=None) -> Session: :return: The fresh session. :rtype: Session """ - return Session(self, start_fun=start_fun) + return Session(self, start_fun=start_fun, underconstrained_execution=underconstrained_execution) @property def cfg(self): diff --git a/cozy/session.py b/cozy/session.py index 439f013..4ddce74 100644 --- a/cozy/session.py +++ b/cozy/session.py @@ -1,4 +1,5 @@ import sys +import uuid from collections.abc import Callable import portion as P @@ -8,10 +9,15 @@ from angr.sim_manager import ErrorRecord, SimulationManager from . import log, side_effect +from .concretization_strategies import SimConcretizationStrategyNorepeatsRangeMin from .directive import Directive, Assume, Assert, VirtualPrint, ErrorDirective, AssertType, Breakpoint, Postcondition from .terminal_state import AssertFailedState, ErrorState, DeadendedState, PostconditionFailedState, SpinningState import cozy +# This size is used to malloc nonaliasing memory contents in underconstrained symbolic execution +UNDERCONSTRAINED_CHUNK_SIZE = 20 * 1_000_000 # 20 megabytes +UNDERCONSTRAINED_GRANULARITY = 5_000 # The amount of space each concretized memory write is granted + class RunResult: """ This class is used for storing the results of running a session. @@ -30,13 +36,15 @@ class RunResult: def __init__(self, deadended: list[DeadendedState], errored: list[ErrorState], asserts_failed: list[AssertFailedState], assume_warnings: list[tuple[Assume, SimState]], postconditions_failed: list[PostconditionFailedState], - spinning: list[SpinningState]): + spinning: list[SpinningState], + initial_registers: dict[str, claripy.ast.Bits] | None): self.deadended = deadended self.errored = errored self.asserts_failed = asserts_failed self.assume_warnings = assume_warnings self.postconditions_failed = postconditions_failed self.spinning = spinning + self.initial_registers = initial_registers @property def assertion_triggered(self) -> bool: @@ -551,6 +559,23 @@ def explore(self, simgr): if self.cache_intermediate_info: _save_states(simgr.active) +# Pass a state.arch.registers to this function and receive a set of top level registers in return. This can be used +# for enumerating registers without having to worry about subregisters +""" +def top_level_regs(reg_info: dict[str, tuple[int, int]]) -> set[str]: + results = P.IntervalDict() + for (name, (index, size)) in reg_info.items(): + interval = P.closedopen(index, index+size) + current_val = results.get(index) + if current_val is None: + results[interval] = (name, interval) + else: + (_, current_interval) = current_val + if len(interval) > len(current_interval): + results[interval] = (name, interval) + return {name for (name, _) in results.values()} +""" + class Session: """ A session is a particular run of a project, consisting of attached directives (asserts/assumes).\ @@ -565,27 +590,41 @@ class Session: :ivar list[Directive] directives: The directives added to this session. :ivar bool has_run: True if the :py:meth:`cozy.project.Session.run` method has been called, otherwise False. """ - def __init__(self, proj, start_fun: str | int | None=None): + def __init__(self, proj, start_fun: str | int | None=None, underconstrained_execution: bool=False): """ Constructs a session derived from a project. The :py:meth:`cozy.project.Project.session` is the preferred method for creating a session, not this constructor. """ self.proj = proj self.start_fun = start_fun - - state_options = { + self.underconstrained_execution = underconstrained_execution + + # Unfortunately adding options after constructing a base state does not work. So we have to define + # all the options up front in the __init__ constructor rather than _run + if underconstrained_execution: + add_options = { + angr.options.UNDER_CONSTRAINED_SYMEXEC, + angr.options.SYMBOL_FILL_UNCONSTRAINED_REGISTERS, + angr.options.SYMBOL_FILL_UNCONSTRAINED_MEMORY + } + else: + add_options = { angr.options.ZERO_FILL_UNCONSTRAINED_MEMORY, angr.options.ZERO_FILL_UNCONSTRAINED_REGISTERS, - angr.options.SYMBOLIC_WRITE_ADDRESSES, - angr.options.TRACK_MEMORY_ACTIONS, - angr.options.TRACK_REGISTER_ACTIONS, + angr.options.SYMBOLIC_WRITE_ADDRESSES } + add_options.add(angr.options.TRACK_MEMORY_ACTIONS) + add_options.add(angr.options.TRACK_REGISTER_ACTIONS) + # Create the initial state if start_fun is None: - self.state = self.proj.angr_proj.factory.entry_state(add_options=state_options) + self.state = self.proj.angr_proj.factory.entry_state(add_options=add_options) else: - self.state = self.proj.angr_proj.factory.blank_state(add_options=state_options) + self.state = self.proj.angr_proj.factory.blank_state(add_options=add_options) + + if underconstrained_execution: + self._initialize_regs(self.state) self.state.inspect.b('mem_write', when=angr.BP_AFTER, action=_on_mem_write) self.state.globals['mem_writes'] = P.IntervalDict() @@ -597,6 +636,17 @@ def __init__(self, proj, start_fun: str | int | None=None): self.directives = [] self.has_run = False + def _initialize_regs(self, state: angr.SimState): + # Make sure any unconstrained registers are initialized by poking them with a getattr request + top_level_regs = set(state.arch.register_names.values()) + # Initialize top level registers first. This ensures that a register like rax is initialized before eax + for reg_name in top_level_regs: + getattr(state.regs, reg_name) + # Initialize any remaining registers that weren't covered by the first loop. On platforms like x64 this + # includes flag registers. + for reg_name in dir(state.regs): + getattr(state.regs, reg_name) + def store_fs(self, filename: str, simfile: angr.SimFile) -> None: """ Stores a file in a virtual filesystem available during execution. This method simply forwards the arguments\ @@ -683,7 +733,7 @@ def start_fun_addr(self): else: return None - def _call(self, args: list[claripy.ast.bits], cache_intermediate_info: bool=True, + def _call(self, args: list[claripy.ast.bits] | None, cache_intermediate_info: bool=True, ret_addr: int | None=None) -> SimulationManager: if self.has_run: raise RuntimeError("This session has already been run once. Make a new session to run again.") @@ -704,26 +754,44 @@ def _call(self, args: list[claripy.ast.bits], cache_intermediate_info: bool=True kwargs = dict() if ret_addr is None else {"ret_addr": ret_addr} - state = self.proj.angr_proj.factory.call_state(fun_addr, *args, base_state=self.state, - prototype=fun_prototype, - add_options={angr.options.ZERO_FILL_UNCONSTRAINED_MEMORY, - angr.options.ZERO_FILL_UNCONSTRAINED_REGISTERS, - angr.options.SYMBOLIC_WRITE_ADDRESSES}, - **kwargs) + if args is None: + args = [] + fun_prototype = None + + state: SimState = self.proj.angr_proj.factory.call_state( + fun_addr, *args, base_state=self.state, prototype=fun_prototype, **kwargs + ) + else: + state: SimState = self.state + + if self.underconstrained_execution: + single = angr.concretization_strategies.SimConcretizationStrategySingle() + big_memory_chunk = self.malloc(UNDERCONSTRAINED_CHUNK_SIZE, name="underconstrained_memory") + # Any time we want to concretize an unconstrained value, return a fresh chunk of memory. The maximum size + # of a concretized memory address should be UNCONSTRAINED_GRANULARITY. + nonaliasing = SimConcretizationStrategyNorepeatsRangeMin(uuid.uuid4(), min=big_memory_chunk, granularity=UNDERCONSTRAINED_GRANULARITY) + + read_strategies = state.memory.read_strategies + read_strategies.clear() + read_strategies.append(single) + read_strategies.append(nonaliasing) + + write_strategies = state.memory.write_strategies + write_strategies.clear() + write_strategies.append(single) + write_strategies.append(nonaliasing) else: - state = self.state + # This concretization strategy is necessary for nullable symbolic pointers. The default + # concretization strategies attempts to concretize to integers within a small (128) byte + # range. If that fails, it just selects the maximum possible concrete value. This strategy + # will ensure that we generate up to 128 possible concrete values. + concrete_strategy = angr.concretization_strategies.SimConcretizationStrategyUnlimitedRange(128) + state.memory.read_strategies.insert(0, concrete_strategy) + state.memory.write_strategies.insert(0, concrete_strategy) if cache_intermediate_info: _save_states([state]) - # This concretization strategy is necessary for nullable symbolic pointers. The default - # concretization strategies attempts to concretize to integers within a small (128) byte - # range. If that fails, it just selects the maximum possible concrete value. This strategy - # will ensure that we generate up to 128 possible concrete values. - concrete_strategy = angr.concretization_strategies.SimConcretizationStrategyUnlimitedRange(128) - state.memory.read_strategies.insert(0, concrete_strategy) - state.memory.write_strategies.insert(0, concrete_strategy) - return self.proj.angr_proj.factory.simulation_manager(state) def _session_exploration(self, cache_intermediate_info: bool=True) -> _SessionExploration: @@ -744,16 +812,21 @@ def _run_result(self, simgr: SimulationManager, sess_exploration: _SessionExplor else: spinning = [] - return RunResult(deadended, errored, asserts_failed, sess_exploration.assume_warnings, postconditions_failed, spinning) + if self.underconstrained_execution: + initial_registers = {reg_name: getattr(self.state.regs, reg_name) for reg_name in dir(self.state.regs)} + else: + initial_registers = None + + return RunResult(deadended, errored, asserts_failed, sess_exploration.assume_warnings, postconditions_failed, spinning, initial_registers) - def run(self, args: list[claripy.ast.bits], cache_intermediate_info: bool=True, ret_addr: int | None=None, + def run(self, args: list[claripy.ast.bits] | None=None, cache_intermediate_info: bool=True, ret_addr: int | None=None, loop_bound: int | None=None) -> RunResult: """ Runs a session to completion, either starting from the start_fun used to create the session, or from the\ program start. Note that currently a session may be run only once. If run is called multiple times, a\ RuntimeError will be thrown. - :param list[claripy.ast.bits] args: The arguments to pass to the function. angr will utilize the function's\ + :param list[claripy.ast.bits] | None args: The arguments to pass to the function. angr will utilize the function's\ type signature to figure out the calling convention to use with the arguments. :param bool cache_intermediate_info: If this flag is True, then information about intermediate states will be cached. This is required for dumping the execution graph which is used in visualization. @@ -764,7 +837,10 @@ def run(self, args: list[claripy.ast.bits], cache_intermediate_info: bool=True, :rtype: RunResult """ - simgr = self._call(list(args), cache_intermediate_info=cache_intermediate_info, ret_addr=ret_addr) + if args is None and not self.underconstrained_execution: + raise ValueError("The args passed to run can only be None if underconstrained_execution has been enabled.") + + simgr = self._call(args, cache_intermediate_info=cache_intermediate_info, ret_addr=ret_addr) if isinstance(loop_bound, int): simgr.use_technique(angr.exploration_techniques.LocalLoopSeer(bound=loop_bound)) diff --git a/examples/cmp_simple_cond_underconstrained.py b/examples/cmp_simple_cond_underconstrained.py new file mode 100644 index 0000000..97a0962 --- /dev/null +++ b/examples/cmp_simple_cond_underconstrained.py @@ -0,0 +1,19 @@ +# coding: utf-8 +from cozy.project import Project +from cozy.analysis import Comparison +import cozy.execution_graph as execution_graph + +import claripy + +proj_pre = Project('test_programs/simple_branch/simpleBranch-pre') +proj_post = Project('test_programs/simple_branch/simpleBranch-post') + +sess_pre = proj_pre.session("my_fun", underconstrained_execution=True) +sess_post = proj_post.session("my_fun", underconstrained_execution=True) + +rslt_pre = sess_pre.run() +rslt_post = sess_post.run() + +comparison = Comparison(rslt_pre, rslt_post) + +execution_graph.visualize_comparison(proj_pre, proj_post, rslt_pre, rslt_post, comparison, args=[], num_examples=2, open_browser=True, include_actions=True) From dcf163ee0a65b62d91c36b232c85f0c3143983d8 Mon Sep 17 00:00:00 2001 From: Caleb Helbling Date: Thu, 8 Aug 2024 16:51:27 -0400 Subject: [PATCH 02/14] More work on underconstrained execution. Added class that allows for unconstrained shared backing. --- cozy/concretization_strategies.py | 9 ---- cozy/session.py | 8 ++-- cozy/underconstrained.py | 48 +++++++++++++++++++ .../cmp_buff_overflow_underconstrained.py | 18 +++++++ 4 files changed, 71 insertions(+), 12 deletions(-) delete mode 100644 cozy/concretization_strategies.py create mode 100644 cozy/underconstrained.py create mode 100644 examples/cmp_buff_overflow_underconstrained.py diff --git a/cozy/concretization_strategies.py b/cozy/concretization_strategies.py deleted file mode 100644 index 8ecb778..0000000 --- a/cozy/concretization_strategies.py +++ /dev/null @@ -1,9 +0,0 @@ -import angr - -class SimConcretizationStrategyNorepeatsRangeMin(angr.concretization_strategies.SimConcretizationStrategyNorepeatsRange): - def _any(self, *args, **kwargs): - # The default SimConcretizationStrategyNorepeatsRange class chooses a concrete address using the _any - # method. By using this class, we redirect the _any call to _min instead. This ensures that the concretized - # address is low in memory, guaranteeing sufficient space for member contents of the object that may - # be located at the symbolic address. - return self._min(*args, **kwargs) \ No newline at end of file diff --git a/cozy/session.py b/cozy/session.py index 4ddce74..16884d2 100644 --- a/cozy/session.py +++ b/cozy/session.py @@ -9,7 +9,7 @@ from angr.sim_manager import ErrorRecord, SimulationManager from . import log, side_effect -from .concretization_strategies import SimConcretizationStrategyNorepeatsRangeMin +from .underconstrained import SimConcretizationStrategyNorepeatsRangeMin from .directive import Directive, Assume, Assert, VirtualPrint, ErrorDirective, AssertType, Breakpoint, Postcondition from .terminal_state import AssertFailedState, ErrorState, DeadendedState, PostconditionFailedState, SpinningState import cozy @@ -607,21 +607,23 @@ def __init__(self, proj, start_fun: str | int | None=None, underconstrained_exec angr.options.SYMBOL_FILL_UNCONSTRAINED_REGISTERS, angr.options.SYMBOL_FILL_UNCONSTRAINED_MEMORY } + plugin_preset = "underconstrained" else: add_options = { angr.options.ZERO_FILL_UNCONSTRAINED_MEMORY, angr.options.ZERO_FILL_UNCONSTRAINED_REGISTERS, angr.options.SYMBOLIC_WRITE_ADDRESSES } + plugin_preset = "default" add_options.add(angr.options.TRACK_MEMORY_ACTIONS) add_options.add(angr.options.TRACK_REGISTER_ACTIONS) # Create the initial state if start_fun is None: - self.state = self.proj.angr_proj.factory.entry_state(add_options=add_options) + self.state = self.proj.angr_proj.factory.entry_state(add_options=add_options, plugin_preset=plugin_preset) else: - self.state = self.proj.angr_proj.factory.blank_state(add_options=add_options) + self.state = self.proj.angr_proj.factory.blank_state(add_options=add_options, plugin_preset=plugin_preset) if underconstrained_execution: self._initialize_regs(self.state) diff --git a/cozy/underconstrained.py b/cozy/underconstrained.py new file mode 100644 index 0000000..e9b4899 --- /dev/null +++ b/cozy/underconstrained.py @@ -0,0 +1,48 @@ +import angr +from angr.storage.memory_mixins import DefaultMemory + + +class SimConcretizationStrategyNorepeatsRangeMin(angr.concretization_strategies.SimConcretizationStrategyNorepeatsRange): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.extra_constraints = [] + self.single = angr.concretization_strategies.SimConcretizationStrategySingle() + + def _concretize(self, memory, addr, **kwargs): + single_value = self.single._concretize(memory, addr, extra_constraints=self.extra_constraints) + if single_value is not None: + return single_value + else: + results = super()._concretize(memory, addr, extra_constraints=self.extra_constraints) + self.extra_constraints.append(results[0] == addr) + return results + + def _any(self, *args, **kwargs): + # The default SimConcretizationStrategyNorepeatsRange class chooses a concrete address using the _any + # method. By using this class, we redirect the _any call to _min instead. This ensures that the concretized + # address is low in memory, guaranteeing sufficient space for member contents of the object that may + # be located at the symbolic address. + return self._min(*args, **kwargs) + +class DefaultMemoryUnderconstrained(DefaultMemory): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.default_backer = DefaultMemory(*args, **kwargs) + + def set_default_backer(self, backer: DefaultMemory): + self.default_backer = backer + + def copy(self, *args, **kwargs): + o = super().copy(*args, **kwargs) + o.default_backer = self.default_backer + return o + + def _default_value(self, addr, size, **kwargs): + if angr.sim_options.UNDER_CONSTRAINED_SYMEXEC in self.state.options and type(addr) is int: + self.default_backer.state = self.state + return self.default_backer.load(addr, size) + return super()._default_value(addr, size, **kwargs) + +underconstrained_preset = angr.SimState._presets['default'].copy() +underconstrained_preset.add_default_plugin("sym_memory", DefaultMemoryUnderconstrained) +angr.SimState.register_preset("underconstrained", underconstrained_preset) \ No newline at end of file diff --git a/examples/cmp_buff_overflow_underconstrained.py b/examples/cmp_buff_overflow_underconstrained.py new file mode 100644 index 0000000..add8ba5 --- /dev/null +++ b/examples/cmp_buff_overflow_underconstrained.py @@ -0,0 +1,18 @@ +import claripy +import cozy.analysis as analysis +import cozy.claripy_ext as claripy_ext +from cozy.directive import Assume +from cozy.project import Project +from cozy.constants import * +import cozy.primitives as primitives + +pre_proj = Project('test_programs/buff_overflow/buff_overflow') +pre_sess = pre_proj.session('patch_fun', underconstrained_execution=True) +pre_patched = pre_sess.run() + +post_proj = Project('test_programs/buff_overflow/buff_overflow_patched') +post_sess = post_proj.session('patch_fun', underconstrained_execution=True) +post_patched = post_sess.run() + +comparison_results = analysis.Comparison(pre_patched, post_patched) +print(comparison_results.report([])) \ No newline at end of file From 021d10f1007183c69ff3c82d7b2f9aeb96769a51 Mon Sep 17 00:00:00 2001 From: Caleb Helbling Date: Fri, 9 Aug 2024 13:13:05 -0400 Subject: [PATCH 03/14] Added support for underconstrained_initial_state to Session constructor. Underconstrained memory backer between sessions can now set. Added hueristic to underconstrained concretization strategy for arrays. Underconstrained concretization strategy can now evaluate to multiple values for cases where an array access is being made. --- cozy/analysis.py | 9 -- cozy/claripy_ext.py | 8 ++ cozy/project.py | 18 +++- cozy/session.py | 99 ++++++++++--------- cozy/underconstrained.py | 59 +++++++++-- .../cmp_buff_overflow_underconstrained.py | 9 +- 6 files changed, 128 insertions(+), 74 deletions(-) diff --git a/cozy/analysis.py b/cozy/analysis.py index 5a308f9..27755ed 100644 --- a/cozy/analysis.py +++ b/cozy/analysis.py @@ -580,15 +580,6 @@ def __init__(self, pre_patched: RunResult, post_patched: RunResult, ignore_addrs extra_constraints = [] - # If we use underconstrained symbolic execution, the initial register states for the two executions - # will not be the same. So let's go ahead and construct constraints that inform the SMT solver that the - # registers are the same at the start. - if pre_patched.initial_registers is not None and post_patched.initial_registers is not None: - for reg_name in pre_patched.initial_registers.keys(): - value_left = pre_patched.initial_registers[reg_name] - value_right = post_patched.initial_registers[reg_name] - extra_constraints.append(value_left == value_right) - for (i, state_pre) in enumerate(states_pre_patched): for (j, state_post) in enumerate(states_post_patched): count += 1 diff --git a/cozy/claripy_ext.py b/cozy/claripy_ext.py index 40fec6b..4b0e7f2 100644 --- a/cozy/claripy_ext.py +++ b/cozy/claripy_ext.py @@ -39,6 +39,14 @@ def simplify_kb(expr: claripy.ast.bits, kb: claripy.ast.Bool) -> claripy.ast.bit def get_symbol_name(sym): return sym.args[0] +def sub_asts(expr): + yield expr + for child in expr.children_asts(): + yield child + +def sym_variables(expr): + return {child for child in sub_asts(expr) if child.depth == 1 and child.symbolic} + def model(constraints, extra_symbols: set[typing.Union[claripy.BVS, claripy.FPS]] | frozenset[typing.Union[claripy.BVS, claripy.FPS]]=frozenset(), n=1, diff --git a/cozy/project.py b/cozy/project.py index 14bfb8b..ea024f6 100644 --- a/cozy/project.py +++ b/cozy/project.py @@ -6,7 +6,7 @@ from angr import SimProcedure from cle import Backend -from cozy.session import Session +from cozy.session import Session, UnderconstrainedMachineState class Project: @@ -70,15 +70,25 @@ def add_prototype(self, fun: str | int, fun_prototype: str) -> None: """ self.fun_prototypes[fun] = fun_prototype - def session(self, start_fun: str | int | None=None, underconstrained_execution: bool=False) -> Session: + def session(self, start_fun: str | int | None=None, underconstrained_execution: bool=False, + underconstrained_initial_state: UnderconstrainedMachineState | None=None) -> Session: """ Returns a new session derived from this project. - :param str | int | None start_fun: The name or address of the function which this session will start with. If None is specified, then the program will start at the entry point (main function). + :param str | int | None start_fun: The name or address of the function which this session will start with.\ + If None is specified, then the program will start at the entry point (main function). + :param bool underconstrained_execution: Set to True to enable underconstrained symbolic execution. With this\ + option, all initial registers will be initialized with unconstrained fresh symbolic variables, and memory\ + contents that are not initialized will likewise return fresh unconstrained symbolic variables. + :param UnderconstrainedMachineState underconstrained_initial_state: If this value is not None, then\ + the inferred memory layout from a previous unconstrained symbolic execution should be used. You can retrieve\ + this value from a previous :py:class:`~cozy.session.RunResult` via the\ + :py:class:`cozy.session.RunResult.underconstrained_machine_state` field. :return: The fresh session. :rtype: Session """ - return Session(self, start_fun=start_fun, underconstrained_execution=underconstrained_execution) + return Session(self, start_fun=start_fun, underconstrained_execution=underconstrained_execution, + underconstrained_initial_state=underconstrained_initial_state) @property def cfg(self): diff --git a/cozy/session.py b/cozy/session.py index 16884d2..8585d88 100644 --- a/cozy/session.py +++ b/cozy/session.py @@ -9,7 +9,7 @@ from angr.sim_manager import ErrorRecord, SimulationManager from . import log, side_effect -from .underconstrained import SimConcretizationStrategyNorepeatsRangeMin +from .underconstrained import SimConcretizationStrategyUnderconstrained from .directive import Directive, Assume, Assert, VirtualPrint, ErrorDirective, AssertType, Breakpoint, Postcondition from .terminal_state import AssertFailedState, ErrorState, DeadendedState, PostconditionFailedState, SpinningState import cozy @@ -18,6 +18,14 @@ UNDERCONSTRAINED_CHUNK_SIZE = 20 * 1_000_000 # 20 megabytes UNDERCONSTRAINED_GRANULARITY = 5_000 # The amount of space each concretized memory write is granted +class UnderconstrainedMachineState: + def __init__(self, initial_registers: dict[str, claripy.ast.Bits], + default_backer: angr.storage.DefaultMemory, + concretization_strategy: SimConcretizationStrategyUnderconstrained): + self.initial_registers = initial_registers + self.default_backer = default_backer + self.concretization_strategy = concretization_strategy + class RunResult: """ This class is used for storing the results of running a session. @@ -32,19 +40,21 @@ class RunResult: :ivar list[PostconditionFailedState] postconditions_failed: States where the function returned, and the assertion\ as part of the postcondition could be falsified. :ivar list[SpinningState] spinning: States that were stashed due to a loop bound being breached. + :ivar UnderconstrainedMachineState | None underconstrained_machine_state: The inferred memory layout of the input\ + machine state. This field is not None only if underconstrained_execution is enabled for the session. """ def __init__(self, deadended: list[DeadendedState], errored: list[ErrorState], asserts_failed: list[AssertFailedState], assume_warnings: list[tuple[Assume, SimState]], postconditions_failed: list[PostconditionFailedState], spinning: list[SpinningState], - initial_registers: dict[str, claripy.ast.Bits] | None): + underconstrained_machine_state: UnderconstrainedMachineState | None): self.deadended = deadended self.errored = errored self.asserts_failed = asserts_failed self.assume_warnings = assume_warnings self.postconditions_failed = postconditions_failed self.spinning = spinning - self.initial_registers = initial_registers + self.underconstrained_machine_state = underconstrained_machine_state @property def assertion_triggered(self) -> bool: @@ -559,23 +569,6 @@ def explore(self, simgr): if self.cache_intermediate_info: _save_states(simgr.active) -# Pass a state.arch.registers to this function and receive a set of top level registers in return. This can be used -# for enumerating registers without having to worry about subregisters -""" -def top_level_regs(reg_info: dict[str, tuple[int, int]]) -> set[str]: - results = P.IntervalDict() - for (name, (index, size)) in reg_info.items(): - interval = P.closedopen(index, index+size) - current_val = results.get(index) - if current_val is None: - results[interval] = (name, interval) - else: - (_, current_interval) = current_val - if len(interval) > len(current_interval): - results[interval] = (name, interval) - return {name for (name, _) in results.values()} -""" - class Session: """ A session is a particular run of a project, consisting of attached directives (asserts/assumes).\ @@ -590,7 +583,8 @@ class Session: :ivar list[Directive] directives: The directives added to this session. :ivar bool has_run: True if the :py:meth:`cozy.project.Session.run` method has been called, otherwise False. """ - def __init__(self, proj, start_fun: str | int | None=None, underconstrained_execution: bool=False): + def __init__(self, proj, start_fun: str | int | None=None, underconstrained_execution: bool=False, + underconstrained_initial_state: UnderconstrainedMachineState | None=None): """ Constructs a session derived from a project. The :py:meth:`cozy.project.Project.session` is the preferred method for creating a session, not this constructor. """ @@ -625,8 +619,32 @@ def __init__(self, proj, start_fun: str | int | None=None, underconstrained_exec else: self.state = self.proj.angr_proj.factory.blank_state(add_options=add_options, plugin_preset=plugin_preset) + if underconstrained_initial_state is not None: + self._apply_underconstrained_machine_state(self.state, underconstrained_initial_state) + else: + if underconstrained_execution: + self._initialize_regs(self.state) + + # Initialize concretization strategies if underconstrained_execution: - self._initialize_regs(self.state) + if underconstrained_initial_state is not None: + concrete_strategy = underconstrained_initial_state.concretization_strategy + else: + big_memory_chunk = self.malloc(UNDERCONSTRAINED_CHUNK_SIZE, name="underconstrained_memory") + # Any time we want to concretize an unconstrained value, return a fresh chunk of memory. The maximum size + # of a concretized memory address should be UNCONSTRAINED_GRANULARITY. + concrete_strategy = SimConcretizationStrategyUnderconstrained(uuid.uuid4(), min=big_memory_chunk, granularity=UNDERCONSTRAINED_GRANULARITY) + self.underconstrained_concrete_strategy = concrete_strategy + else: + # This concretization strategy is necessary for nullable symbolic pointers. The default + # concretization strategies attempts to concretize to integers within a small (128) byte + # range. If that fails, it just selects the maximum possible concrete value. This strategy + # will ensure that we generate up to 128 possible concrete values. + concrete_strategy = angr.concretization_strategies.SimConcretizationStrategyUnlimitedRange(128) + self.underconstrained_concrete_strategy = None + + self.state.memory.read_strategies.insert(0, concrete_strategy) + self.state.memory.write_strategies.insert(0, concrete_strategy) self.state.inspect.b('mem_write', when=angr.BP_AFTER, action=_on_mem_write) self.state.globals['mem_writes'] = P.IntervalDict() @@ -649,6 +667,11 @@ def _initialize_regs(self, state: angr.SimState): for reg_name in dir(state.regs): getattr(state.regs, reg_name) + def _apply_underconstrained_machine_state(self, state: SimState, machine_state: UnderconstrainedMachineState): + for (name, value) in machine_state.initial_registers.items(): + setattr(state.regs, name, value) + state.memory.set_default_backer(machine_state.default_backer) + def store_fs(self, filename: str, simfile: angr.SimFile) -> None: """ Stores a file in a virtual filesystem available during execution. This method simply forwards the arguments\ @@ -766,31 +789,6 @@ def _call(self, args: list[claripy.ast.bits] | None, cache_intermediate_info: bo else: state: SimState = self.state - if self.underconstrained_execution: - single = angr.concretization_strategies.SimConcretizationStrategySingle() - big_memory_chunk = self.malloc(UNDERCONSTRAINED_CHUNK_SIZE, name="underconstrained_memory") - # Any time we want to concretize an unconstrained value, return a fresh chunk of memory. The maximum size - # of a concretized memory address should be UNCONSTRAINED_GRANULARITY. - nonaliasing = SimConcretizationStrategyNorepeatsRangeMin(uuid.uuid4(), min=big_memory_chunk, granularity=UNDERCONSTRAINED_GRANULARITY) - - read_strategies = state.memory.read_strategies - read_strategies.clear() - read_strategies.append(single) - read_strategies.append(nonaliasing) - - write_strategies = state.memory.write_strategies - write_strategies.clear() - write_strategies.append(single) - write_strategies.append(nonaliasing) - else: - # This concretization strategy is necessary for nullable symbolic pointers. The default - # concretization strategies attempts to concretize to integers within a small (128) byte - # range. If that fails, it just selects the maximum possible concrete value. This strategy - # will ensure that we generate up to 128 possible concrete values. - concrete_strategy = angr.concretization_strategies.SimConcretizationStrategyUnlimitedRange(128) - state.memory.read_strategies.insert(0, concrete_strategy) - state.memory.write_strategies.insert(0, concrete_strategy) - if cache_intermediate_info: _save_states([state]) @@ -816,10 +814,13 @@ def _run_result(self, simgr: SimulationManager, sess_exploration: _SessionExplor if self.underconstrained_execution: initial_registers = {reg_name: getattr(self.state.regs, reg_name) for reg_name in dir(self.state.regs)} + default_backer = self.state.memory.get_default_backer() + underconstrained_machine_state = UnderconstrainedMachineState(initial_registers, default_backer, self.underconstrained_concrete_strategy) else: - initial_registers = None + underconstrained_machine_state = None - return RunResult(deadended, errored, asserts_failed, sess_exploration.assume_warnings, postconditions_failed, spinning, initial_registers) + return RunResult(deadended, errored, asserts_failed, sess_exploration.assume_warnings, postconditions_failed, + spinning, underconstrained_machine_state) def run(self, args: list[claripy.ast.bits] | None=None, cache_intermediate_info: bool=True, ret_addr: int | None=None, loop_bound: int | None=None) -> RunResult: diff --git a/cozy/underconstrained.py b/cozy/underconstrained.py index e9b4899..306ff0e 100644 --- a/cozy/underconstrained.py +++ b/cozy/underconstrained.py @@ -1,21 +1,56 @@ import angr from angr.storage.memory_mixins import DefaultMemory +import cozy.log +from cozy import claripy_ext -class SimConcretizationStrategyNorepeatsRangeMin(angr.concretization_strategies.SimConcretizationStrategyNorepeatsRange): + +class SimConcretizationStrategyUnderconstrained(angr.concretization_strategies.SimConcretizationStrategyNorepeatsRange): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.extra_constraints = [] self.single = angr.concretization_strategies.SimConcretizationStrategySingle() + self.multiple = angr.concretization_strategies.SimConcretizationStrategyUnlimitedRange(16) + self.extra_constraints_symbols = set() + + def constrain_addr(self, memory, addr): + if addr not in self.extra_constraints_symbols: + results = super()._concretize(memory, addr, extra_constraints=self.extra_constraints) + self.extra_constraints.append(addr == results[0]) + self.extra_constraints_symbols.add(addr) def _concretize(self, memory, addr, **kwargs): single_value = self.single._concretize(memory, addr, extra_constraints=self.extra_constraints) if single_value is not None: return single_value else: - results = super()._concretize(memory, addr, extra_constraints=self.extra_constraints) - self.extra_constraints.append(results[0] == addr) - return results + children = claripy_ext.sym_variables(addr) + + if len(children) == 1: + # If there is only one child, constrain that child to be some concrete address + # This is the most common case for accessing a field inside a struct + self.constrain_addr(memory, list(children)[0]) + # We have now constrained the symbolic variable to be a concrete variable + # If we evaluate addr now, the compound expression should now evaluate to a single value + return self.single._concretize(memory, addr, extra_constraints=self.extra_constraints) + else: + # If there are multiple children, we are likely accessing an array. In this case we should + # genuinely concretize to multiple values since the index into the array can be multiple values. + # Now let's use a heuristic to try to figure out what is the address of the array start + # and what is the variable that corresponds to the index. For anything but uint8_t/char arrays, + # the array variable will be the child of an add operation, and the index will be multiplied or bit + # shifted. For example with an int array, the addr expression will be ARRAY + SignExt(32, INDEX << 2) + array_candidates = set(children) + for expr in claripy_ext.sub_asts(addr): + if expr.op != '__add__' and expr.op != '__sub__': + for child in expr.args: + if child in array_candidates: + array_candidates.remove(child) + if len(array_candidates) == 1: + self.constrain_addr(memory, list(array_candidates)[0]) + else: + cozy.log.warning("Unable to find array base when concretizing symbolic address") + return self.multiple._concretize(memory, addr, extra_constraints=self.extra_constraints) def _any(self, *args, **kwargs): # The default SimConcretizationStrategyNorepeatsRange class chooses a concrete address using the _any @@ -24,13 +59,21 @@ def _any(self, *args, **kwargs): # be located at the symbolic address. return self._min(*args, **kwargs) +class Box: + def __init__(self, value): + self.value = value + class DefaultMemoryUnderconstrained(DefaultMemory): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.default_backer = DefaultMemory(*args, **kwargs) + # Make a box so that if we change the backer in any copies, all copies get the update + self.default_backer = Box(DefaultMemory(*args, **kwargs)) + + def get_default_backer(self): + return self.default_backer.value def set_default_backer(self, backer: DefaultMemory): - self.default_backer = backer + self.default_backer.value = backer def copy(self, *args, **kwargs): o = super().copy(*args, **kwargs) @@ -39,8 +82,8 @@ def copy(self, *args, **kwargs): def _default_value(self, addr, size, **kwargs): if angr.sim_options.UNDER_CONSTRAINED_SYMEXEC in self.state.options and type(addr) is int: - self.default_backer.state = self.state - return self.default_backer.load(addr, size) + self.default_backer.value.state = self.state + return self.default_backer.value.load(addr, size) return super()._default_value(addr, size, **kwargs) underconstrained_preset = angr.SimState._presets['default'].copy() diff --git a/examples/cmp_buff_overflow_underconstrained.py b/examples/cmp_buff_overflow_underconstrained.py index add8ba5..75accb1 100644 --- a/examples/cmp_buff_overflow_underconstrained.py +++ b/examples/cmp_buff_overflow_underconstrained.py @@ -8,11 +8,12 @@ pre_proj = Project('test_programs/buff_overflow/buff_overflow') pre_sess = pre_proj.session('patch_fun', underconstrained_execution=True) -pre_patched = pre_sess.run() +pre_patched_results = pre_sess.run() post_proj = Project('test_programs/buff_overflow/buff_overflow_patched') -post_sess = post_proj.session('patch_fun', underconstrained_execution=True) -post_patched = post_sess.run() +post_sess = post_proj.session('patch_fun', underconstrained_execution=True, + underconstrained_initial_state=pre_patched_results.underconstrained_machine_state) +post_patched_results = post_sess.run() -comparison_results = analysis.Comparison(pre_patched, post_patched) +comparison_results = analysis.Comparison(pre_patched_results, post_patched_results) print(comparison_results.report([])) \ No newline at end of file From f46423be1d3d9a0ee24150be22f50691e922788a Mon Sep 17 00:00:00 2001 From: Caleb Helbling Date: Fri, 9 Aug 2024 16:24:54 -0400 Subject: [PATCH 04/14] You can now get args from UnderconstrainedMachineState. Useful when generating concretizations for reports. --- cozy/analysis.py | 2 +- cozy/session.py | 23 +++++++++++++--- cozy/underconstrained.py | 27 ++++++++++++++++++- .../cmp_buff_overflow_underconstrained.py | 2 +- 4 files changed, 48 insertions(+), 6 deletions(-) diff --git a/cozy/analysis.py b/cozy/analysis.py index 27755ed..aec51b9 100644 --- a/cozy/analysis.py +++ b/cozy/analysis.py @@ -485,7 +485,7 @@ def concrete_examples(self, args: any, num_examples=3) -> list[CompatiblePairInp Concretizes the arguments used to put the program in these states by jointly using the constraints attached to\ the compatible states. - :param any args: The input arguments to concretize. This argument may be a Python datastructure, the\ + :param any args: The input arguments to concretize. This argument may be a Python datastructure, the \ concretizer will make a deep copy with claripy symbolic variables replaced with concrete values. :param int num_examples: The maximum number of concrete examples to generate for this particular pair. :return: A list of concrete inputs that satisfy both constraints attached to the states. diff --git a/cozy/session.py b/cozy/session.py index 8585d88..afd56ae 100644 --- a/cozy/session.py +++ b/cozy/session.py @@ -8,7 +8,7 @@ from angr import SimStateError, SimState from angr.sim_manager import ErrorRecord, SimulationManager -from . import log, side_effect +from . import log, side_effect, claripy_ext from .underconstrained import SimConcretizationStrategyUnderconstrained from .directive import Directive, Assume, Assert, VirtualPrint, ErrorDirective, AssertType, Breakpoint, Postcondition from .terminal_state import AssertFailedState, ErrorState, DeadendedState, PostconditionFailedState, SpinningState @@ -21,10 +21,22 @@ class UnderconstrainedMachineState: def __init__(self, initial_registers: dict[str, claripy.ast.Bits], default_backer: angr.storage.DefaultMemory, - concretization_strategy: SimConcretizationStrategyUnderconstrained): + concretization_strategy: SimConcretizationStrategyUnderconstrained, + underconstrained_memory_symbols): self.initial_registers = initial_registers self.default_backer = default_backer self.concretization_strategy = concretization_strategy + self.memory_symbols = underconstrained_memory_symbols + + @property + def args(self): + return { + 'initial_regs': self.initial_registers, + 'memory_symbols': { + claripy_ext.get_symbol_name(sym): sym for sym in self.memory_symbols + } + } + class RunResult: """ @@ -671,6 +683,7 @@ def _apply_underconstrained_machine_state(self, state: SimState, machine_state: for (name, value) in machine_state.initial_registers.items(): setattr(state.regs, name, value) state.memory.set_default_backer(machine_state.default_backer) + state.memory.set_symbols(machine_state.memory_symbols) def store_fs(self, filename: str, simfile: angr.SimFile) -> None: """ @@ -815,7 +828,11 @@ def _run_result(self, simgr: SimulationManager, sess_exploration: _SessionExplor if self.underconstrained_execution: initial_registers = {reg_name: getattr(self.state.regs, reg_name) for reg_name in dir(self.state.regs)} default_backer = self.state.memory.get_default_backer() - underconstrained_machine_state = UnderconstrainedMachineState(initial_registers, default_backer, self.underconstrained_concrete_strategy) + underconstrained_memory_symbols = self.state.memory.get_symbols() + underconstrained_machine_state = UnderconstrainedMachineState( + initial_registers, default_backer, self.underconstrained_concrete_strategy, + underconstrained_memory_symbols + ) else: underconstrained_machine_state = None diff --git a/cozy/underconstrained.py b/cozy/underconstrained.py index 306ff0e..b509053 100644 --- a/cozy/underconstrained.py +++ b/cozy/underconstrained.py @@ -1,4 +1,5 @@ import angr +import claripy from angr.storage.memory_mixins import DefaultMemory import cozy.log @@ -9,6 +10,7 @@ class SimConcretizationStrategyUnderconstrained(angr.concretization_strategies.S def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.extra_constraints = [] + self.multi_extra_constraints = [] self.single = angr.concretization_strategies.SimConcretizationStrategySingle() self.multiple = angr.concretization_strategies.SimConcretizationStrategyUnlimitedRange(16) self.extra_constraints_symbols = set() @@ -19,6 +21,18 @@ def constrain_addr(self, memory, addr): self.extra_constraints.append(addr == results[0]) self.extra_constraints_symbols.add(addr) + # TODO: See if we can improve this function to ensure that when we call the multiple concretization strategy that + # TODO: we receive consistent results. This implementation isn't quite right as it makes it easy to end up + # TODO: in a situtation where self.multi_extra_constraints becomes too constrained. Might have to do something + # TODO: with compatibilities, which seems like a bad idea. + def multi_concretize(self, memory, addr): + results = self.multiple._concretize(memory, addr, extra_constraints=(self.extra_constraints + self.multi_extra_constraints)) + if len(results) > 0: + self.multi_extra_constraints.append(claripy.Or(*[addr == r for r in results])) + return results + else: + return self.multiple._concretize(memory, addr, extra_constraints=self.extra_constraints) + def _concretize(self, memory, addr, **kwargs): single_value = self.single._concretize(memory, addr, extra_constraints=self.extra_constraints) if single_value is not None: @@ -51,6 +65,7 @@ def _concretize(self, memory, addr, **kwargs): else: cozy.log.warning("Unable to find array base when concretizing symbolic address") return self.multiple._concretize(memory, addr, extra_constraints=self.extra_constraints) + #return self.multi_concretize(memory, addr) def _any(self, *args, **kwargs): # The default SimConcretizationStrategyNorepeatsRange class chooses a concrete address using the _any @@ -68,6 +83,7 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # Make a box so that if we change the backer in any copies, all copies get the update self.default_backer = Box(DefaultMemory(*args, **kwargs)) + self.symbols = [] def get_default_backer(self): return self.default_backer.value @@ -75,15 +91,24 @@ def get_default_backer(self): def set_default_backer(self, backer: DefaultMemory): self.default_backer.value = backer + def set_symbols(self, symbols): + self.symbols = symbols + + def get_symbols(self): + return self.symbols + def copy(self, *args, **kwargs): o = super().copy(*args, **kwargs) o.default_backer = self.default_backer + o.symbols = self.symbols return o def _default_value(self, addr, size, **kwargs): if angr.sim_options.UNDER_CONSTRAINED_SYMEXEC in self.state.options and type(addr) is int: self.default_backer.value.state = self.state - return self.default_backer.value.load(addr, size) + sym = self.default_backer.value.load(addr, size) + self.symbols.append(sym) + return sym return super()._default_value(addr, size, **kwargs) underconstrained_preset = angr.SimState._presets['default'].copy() diff --git a/examples/cmp_buff_overflow_underconstrained.py b/examples/cmp_buff_overflow_underconstrained.py index 75accb1..d45f567 100644 --- a/examples/cmp_buff_overflow_underconstrained.py +++ b/examples/cmp_buff_overflow_underconstrained.py @@ -16,4 +16,4 @@ post_patched_results = post_sess.run() comparison_results = analysis.Comparison(pre_patched_results, post_patched_results) -print(comparison_results.report([])) \ No newline at end of file +print(comparison_results.report(post_patched_results.underconstrained_machine_state.args)) \ No newline at end of file From 7a72cff97662993650022a73da52f9475db8743f Mon Sep 17 00:00:00 2001 From: Caleb Helbling Date: Wed, 14 Aug 2024 12:20:16 -0400 Subject: [PATCH 05/14] Added retrowrite harness code for paper evaluation. Fixed issue with incorrect variable name in primitives.py --- cozy/primitives.py | 4 +- examples/paper_eval/retrowrite.py | 126 ++++++++++++++++++++++++++++++ 2 files changed, 128 insertions(+), 2 deletions(-) create mode 100644 examples/paper_eval/retrowrite.py diff --git a/cozy/primitives.py b/cozy/primitives.py index f4fbd32..bf3ae0c 100644 --- a/cozy/primitives.py +++ b/cozy/primitives.py @@ -18,8 +18,8 @@ def sym_ptr(arch: Type[Arch], name: str | None=None) -> claripy.BVS: """ global _malloc_name_ctr if name is None: - name = "symbolic_ptr_{}".format(_name_ctr) - _name_ctr += 1 + name = "symbolic_ptr_{}".format(_malloc_name_ctr) + _malloc_name_ctr += 1 return claripy.BVS(name, arch.bits) def sym_ptr_constraints(symbolic_ptr: claripy.ast.bits, concrete_addr: int, can_be_null: bool=True) -> claripy.ast.bool: diff --git a/examples/paper_eval/retrowrite.py b/examples/paper_eval/retrowrite.py new file mode 100644 index 0000000..987b41f --- /dev/null +++ b/examples/paper_eval/retrowrite.py @@ -0,0 +1,126 @@ +import angr +import claripy + +import cozy +from cozy.project import Project + +proj_prepatch = Project('test_programs/paper_eval/retrowrite/base64-gcc') +proj_postpatch = Project('test_programs/paper_eval/retrowrite/base64-retrowrite') + +class lava_set(angr.SimProcedure): + def run(self, bug_num, val): + pass + +class lava_get(angr.SimProcedure): + def run(self, bug_num): + return 0x0 + +class __afl_maybe_log(angr.SimProcedure): + def run(self): + return 0x0 + +def afl_hook(proj: Project): + proj.hook_symbol('lava_set', lava_set) + proj.hook_symbol('lava_get', lava_get) + proj.hook_symbol('__afl_maybe_log', __afl_maybe_log) + +afl_hook(proj_prepatch) +afl_hook(proj_postpatch) + +def base64_decode_alloc_ctx(): + BUFF_SIZE = 4 + ctx_ptr = claripy.BVS('ctx_ptr', 64) + + in_buff_contents = [] + for i in range(BUFF_SIZE): + char_sym = claripy.BVS('in_buff_char_{}'.format(i), 8) + in_buff_contents.append(char_sym) + + def run(proj: Project): + proj.add_prototype('base64_decode_alloc_ctx', 'int base64_decode_ctx(void *, char *, unsigned long, char *, unsigned long *)') + + sess = proj.session('base64_decode_alloc_ctx') + + ctx = sess.malloc(8) + + in_buff = sess.malloc(BUFF_SIZE, name='in') + out_ptr_buff = sess.malloc(8, name='out') + out_len_ptr = sess.malloc(8, name='outlen') + + for (i, char_sym) in enumerate(in_buff_contents): + sess.store(in_buff + i, char_sym, endness=proj.arch.memory_endness) + + afl_area_addr = sess.malloc(100) + + try: + afl_area_ptr = proj.find_symbol_addr('__afl_area_ptr') + sess.store(afl_area_ptr, claripy.BVV(afl_area_addr, 64), endness=proj.arch.memory_endness) + except RuntimeError: + pass + + sess.add_constraints(cozy.primitives.sym_ptr_constraints(ctx_ptr, ctx, can_be_null=True)) + return sess.run([ctx_ptr, in_buff, BUFF_SIZE, out_ptr_buff, out_len_ptr]) + + return (run, {'in_buff_contents': in_buff_contents, 'ctx_ptr': ctx_ptr}) + +def base64_decode_ctx(): + BUFF_SIZE = 5 + ctx_ptr = claripy.BVS('ctx_ptr', 64) + + in_buff_contents = [] + for i in range(BUFF_SIZE - 1): + char_sym = claripy.BVS('in_buff_char_{}'.format(i), 8) + in_buff_contents.append(char_sym) + + in_len = claripy.BVS('inlen', 64) + out_len = claripy.BVS('outlen', 64) + + def run(proj: Project): + proj.add_prototype('base64_decode_ctx', 'int base64_decode_ctx(void *, char *, unsigned long, char *, unsigned long *)') + sess = proj.session('base64_decode_ctx') + + ctx = sess.malloc(8) + + in_buff = sess.malloc(BUFF_SIZE, name='in') + out_buff = sess.malloc(BUFF_SIZE, name='out') + out_len_ptr = sess.malloc(BUFF_SIZE, name='outlen') + + print("malloced addresses: ", hex(ctx), hex(in_buff), hex(out_buff), hex(out_len_ptr)) + + sess.store(out_len_ptr, out_len, endness=proj.arch.memory_endness) + + for (i, char_sym) in enumerate(in_buff_contents): + sess.store(in_buff + i, char_sym, endness=proj.arch.memory_endness) + + try: + afl_area_ptr = proj.find_symbol_addr('__afl_area_ptr') + afl_area_addr = sess.malloc(100) + sess.store(afl_area_ptr, claripy.BVV(afl_area_addr, 64), endness=proj.arch.memory_endness) + except RuntimeError: + pass + + sess.add_constraints(cozy.primitives.sym_ptr_constraints(ctx_ptr, ctx, can_be_null=True)) + sess.add_constraints(in_len.SGE(0), out_len.SGE(0)) + sess.add_constraints(in_len.SLE(BUFF_SIZE), out_len.SLE(BUFF_SIZE)) + + return sess.run([0, in_buff, in_len, out_buff, out_len_ptr]) + + return (run, {'in_buff_contents': in_buff_contents, 'in_len': in_len, 'out_len': out_len}) + + +(base64_decode_ctx_run, base64_decode_ctx_args) = base64_decode_alloc_ctx() + +pre_results = base64_decode_ctx_run(proj_prepatch) +post_results = base64_decode_ctx_run(proj_postpatch) + +comparison = cozy.analysis.Comparison(pre_results, post_results) + +cozy.execution_graph.visualize_comparison(proj_prepatch, proj_postpatch, pre_results, post_results, comparison, + args=base64_decode_ctx_args, num_examples=2, open_browser=True, include_actions=True) + +exit(0) + +proj_postpatch.add_prototype('__afl_maybe_log', 'unsigned char __afl_maybe_log()') + +sess = proj_postpatch.session('__afl_maybe_log') +sess.run([]) \ No newline at end of file From 3aa130aaff4764ea60219d009436679efc67131a Mon Sep 17 00:00:00 2001 From: Caleb Helbling Date: Wed, 14 Aug 2024 12:48:54 -0400 Subject: [PATCH 06/14] Finished harnesses for first 3 base64_ functions --- examples/paper_eval/retrowrite.py | 82 +++++++++++++++---------------- 1 file changed, 40 insertions(+), 42 deletions(-) diff --git a/examples/paper_eval/retrowrite.py b/examples/paper_eval/retrowrite.py index 987b41f..ddbf184 100644 --- a/examples/paper_eval/retrowrite.py +++ b/examples/paper_eval/retrowrite.py @@ -2,6 +2,7 @@ import claripy import cozy +from cozy.analysis import Comparison from cozy.project import Project proj_prepatch = Project('test_programs/paper_eval/retrowrite/base64-gcc') @@ -27,6 +28,10 @@ def afl_hook(proj: Project): afl_hook(proj_prepatch) afl_hook(proj_postpatch) +########### +# base64_ +########### + def base64_decode_alloc_ctx(): BUFF_SIZE = 4 ctx_ptr = claripy.BVS('ctx_ptr', 64) @@ -50,77 +55,70 @@ def run(proj: Project): for (i, char_sym) in enumerate(in_buff_contents): sess.store(in_buff + i, char_sym, endness=proj.arch.memory_endness) - afl_area_addr = sess.malloc(100) - - try: - afl_area_ptr = proj.find_symbol_addr('__afl_area_ptr') - sess.store(afl_area_ptr, claripy.BVV(afl_area_addr, 64), endness=proj.arch.memory_endness) - except RuntimeError: - pass - sess.add_constraints(cozy.primitives.sym_ptr_constraints(ctx_ptr, ctx, can_be_null=True)) return sess.run([ctx_ptr, in_buff, BUFF_SIZE, out_ptr_buff, out_len_ptr]) return (run, {'in_buff_contents': in_buff_contents, 'ctx_ptr': ctx_ptr}) def base64_decode_ctx(): - BUFF_SIZE = 5 + IN_LEN = 4 + OUT_LEN = (IN_LEN // 4) * 3 + 3 ctx_ptr = claripy.BVS('ctx_ptr', 64) in_buff_contents = [] - for i in range(BUFF_SIZE - 1): + for i in range(IN_LEN): char_sym = claripy.BVS('in_buff_char_{}'.format(i), 8) in_buff_contents.append(char_sym) - in_len = claripy.BVS('inlen', 64) - out_len = claripy.BVS('outlen', 64) - def run(proj: Project): proj.add_prototype('base64_decode_ctx', 'int base64_decode_ctx(void *, char *, unsigned long, char *, unsigned long *)') sess = proj.session('base64_decode_ctx') ctx = sess.malloc(8) - in_buff = sess.malloc(BUFF_SIZE, name='in') - out_buff = sess.malloc(BUFF_SIZE, name='out') - out_len_ptr = sess.malloc(BUFF_SIZE, name='outlen') - - print("malloced addresses: ", hex(ctx), hex(in_buff), hex(out_buff), hex(out_len_ptr)) - - sess.store(out_len_ptr, out_len, endness=proj.arch.memory_endness) - + in_buff = sess.malloc(IN_LEN, name='in') for (i, char_sym) in enumerate(in_buff_contents): sess.store(in_buff + i, char_sym, endness=proj.arch.memory_endness) - try: - afl_area_ptr = proj.find_symbol_addr('__afl_area_ptr') - afl_area_addr = sess.malloc(100) - sess.store(afl_area_ptr, claripy.BVV(afl_area_addr, 64), endness=proj.arch.memory_endness) - except RuntimeError: - pass + out_buff = sess.malloc(OUT_LEN, name='out') - sess.add_constraints(cozy.primitives.sym_ptr_constraints(ctx_ptr, ctx, can_be_null=True)) - sess.add_constraints(in_len.SGE(0), out_len.SGE(0)) - sess.add_constraints(in_len.SLE(BUFF_SIZE), out_len.SLE(BUFF_SIZE)) + out_len_ptr = sess.malloc(8, name='outlenptr') + sess.store(out_len_ptr, claripy.BVV(OUT_LEN, 64), endness=proj.arch.memory_endness) - return sess.run([0, in_buff, in_len, out_buff, out_len_ptr]) + sess.add_constraints(cozy.primitives.sym_ptr_constraints(ctx_ptr, ctx, can_be_null=True)) + return sess.run([ctx_ptr, in_buff, IN_LEN, out_buff, out_len_ptr]) - return (run, {'in_buff_contents': in_buff_contents, 'in_len': in_len, 'out_len': out_len}) + return (run, {'in_buff_contents': in_buff_contents, 'ctx_ptr': ctx_ptr}) +def base64_decode_ctx_init(): + ctx_contents = claripy.BVS('ctx_contents', 64) -(base64_decode_ctx_run, base64_decode_ctx_args) = base64_decode_alloc_ctx() + def run(proj: Project): + proj.add_prototype('base64_decode_ctx_init', 'void base64_decode_ctx_init(void *)') + sess = proj.session('base64_decode_ctx_init') + ctx = sess.malloc(8) -pre_results = base64_decode_ctx_run(proj_prepatch) -post_results = base64_decode_ctx_run(proj_postpatch) + sess.store(ctx, ctx_contents, endness=proj.arch.memory_endness) -comparison = cozy.analysis.Comparison(pre_results, post_results) + return sess.run([ctx]) -cozy.execution_graph.visualize_comparison(proj_prepatch, proj_postpatch, pre_results, post_results, comparison, - args=base64_decode_ctx_args, num_examples=2, open_browser=True, include_actions=True) + return (run, {'ctx_contents': ctx_contents}) -exit(0) +def verify_equivalence(comp: Comparison): + for pair in comp.pairs.values(): + assert(len(pair.mem_diff) == 0) + assert(len(pair.reg_diff) == 0) -proj_postpatch.add_prototype('__afl_maybe_log', 'unsigned char __afl_maybe_log()') +def run_and_verify(f, visualize=False): + (run, args) = f() + pre_results = run(proj_prepatch) + post_results = run(proj_postpatch) + comparison = cozy.analysis.Comparison(pre_results, post_results) + verify_equivalence(comparison) + if visualize: + cozy.execution_graph.visualize_comparison(proj_prepatch, proj_postpatch, pre_results, post_results, comparison, + args=args, num_examples=2, open_browser=True, include_actions=True) -sess = proj_postpatch.session('__afl_maybe_log') -sess.run([]) \ No newline at end of file +run_and_verify(base64_decode_alloc_ctx) +run_and_verify(base64_decode_ctx) +run_and_verify(base64_decode_ctx_init) \ No newline at end of file From 2dbb6179f5f064473b3628a3d7f5a83f19358b3a Mon Sep 17 00:00:00 2001 From: Caleb Helbling Date: Wed, 14 Aug 2024 13:31:09 -0400 Subject: [PATCH 07/14] Completed harnesses for all base64_ functions. Added filtering to only check callee saved registers --- examples/paper_eval/retrowrite.py | 83 ++++++++++++++++++++++++++++++- 1 file changed, 82 insertions(+), 1 deletion(-) diff --git a/examples/paper_eval/retrowrite.py b/examples/paper_eval/retrowrite.py index ddbf184..b8db6ab 100644 --- a/examples/paper_eval/retrowrite.py +++ b/examples/paper_eval/retrowrite.py @@ -104,16 +104,95 @@ def run(proj: Project): return (run, {'ctx_contents': ctx_contents}) +def base64_encode_alloc(): + in_len = claripy.BVS('in_len', 64) + MAX_IN_LEN = 16 + + in_buff_contents = [] + for i in range(MAX_IN_LEN): + char_sym = claripy.BVS('in_buff_char_{}'.format(i), 8) + in_buff_contents.append(char_sym) + + def run(proj: Project): + proj.add_prototype('base64_encode_alloc', 'unsigned long base64_encode_alloc(char *in, unsigned long, char **)') + sess = proj.session('base64_encode_alloc') + + in_buff = sess.malloc(MAX_IN_LEN, name='in') + sess.add_constraints(in_len.SGE(0), in_len.SLE(MAX_IN_LEN)) + for (i, char_sym) in enumerate(in_buff_contents): + sess.store(in_buff + i, char_sym, endness=proj.arch.memory_endness) + + out_ptr = sess.malloc(8) + + return sess.run([in_buff, in_len, out_ptr]) + + return (run, {'in_buff_contents': in_buff_contents, 'in_len': in_len}) + +def base64_encode(): + in_len = claripy.BVS('in_len', 64) + def calc_out_len(in_len): + return ((in_len + 2) / 3) * 4 + 1 + out_len = calc_out_len(in_len) + + MAX_IN_LEN = 16 + MAX_OUT_LEN = calc_out_len(MAX_IN_LEN) + + in_buff_contents = [] + for i in range(MAX_IN_LEN): + char_sym = claripy.BVS('in_buff_char_{}'.format(i), 8) + in_buff_contents.append(char_sym) + + def run(proj: Project): + proj.add_prototype('base64_encode', 'void base64_encode(char *in, unsigned long, char *, unsigned long)') + sess = proj.session('base64_encode') + + sess.add_constraints(out_len >= in_len) + + in_buff = sess.malloc(MAX_IN_LEN, name='in') + sess.add_constraints(in_len.SGE(0), in_len.SLE(MAX_IN_LEN)) + for (i, char_sym) in enumerate(in_buff_contents): + sess.store(in_buff + i, char_sym, endness=proj.arch.memory_endness) + + out_buff = sess.malloc(MAX_OUT_LEN) + + return sess.run([in_buff, in_len, out_buff, out_len]) + + return (run, {'in_buff_contents': in_buff_contents, 'in_len': in_len}) + def verify_equivalence(comp: Comparison): for pair in comp.pairs.values(): assert(len(pair.mem_diff) == 0) assert(len(pair.reg_diff) == 0) +callee_saved = frozenset( + [ + 'rbx', 'ebx', 'bx', 'bl', + 'rsp', 'esp', 'sp', 'spl', + 'rbp', 'ebp', 'bp', 'bpl', + 'r12', 'r12d', 'r12w', 'r12b', + 'r13', 'r13d', 'r13w', 'r13b', + 'r14', 'r14d', 'r14w', 'r14b', + 'r15', 'r15d', 'r15w', 'r15b' + ] +) + +def apply_callee_saved(comp: Comparison): + # Ignore all registers that are not callee saved + for pair in comp.pairs.values(): + to_remove = [] + for reg_name in pair.reg_diff.keys(): + if reg_name not in callee_saved: + to_remove.append(reg_name) + for reg_name in to_remove: + del pair.reg_diff[reg_name] + + def run_and_verify(f, visualize=False): (run, args) = f() pre_results = run(proj_prepatch) post_results = run(proj_postpatch) comparison = cozy.analysis.Comparison(pre_results, post_results) + apply_callee_saved(comparison) verify_equivalence(comparison) if visualize: cozy.execution_graph.visualize_comparison(proj_prepatch, proj_postpatch, pre_results, post_results, comparison, @@ -121,4 +200,6 @@ def run_and_verify(f, visualize=False): run_and_verify(base64_decode_alloc_ctx) run_and_verify(base64_decode_ctx) -run_and_verify(base64_decode_ctx_init) \ No newline at end of file +run_and_verify(base64_decode_ctx_init) +run_and_verify(base64_encode_alloc) +run_and_verify(base64_encode) \ No newline at end of file From d34668b6f459010f029586aee35f51fbb43e02ce Mon Sep 17 00:00:00 2001 From: Caleb Helbling Date: Thu, 15 Aug 2024 16:33:17 -0400 Subject: [PATCH 08/14] Fixed issue with model generation when there are no constraints in claripy_ext module --- cozy/claripy_ext.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cozy/claripy_ext.py b/cozy/claripy_ext.py index 4b0e7f2..8891a2c 100644 --- a/cozy/claripy_ext.py +++ b/cozy/claripy_ext.py @@ -103,7 +103,11 @@ def value(sym, v): else: ret[leaf] = zero(leaf) else: - raise ValueError("Failed to generate a model for a satisfiable solution") + if len(solver.constraints) == 0: + for extra in extra_symbols: + ret[extra] = zero(extra) + else: + raise ValueError("Failed to generate a model for a satisfiable solution") # Set any symbols not in the model to 0 for extra in extra_symbols: From 27e840cf854bb927a728bb0f5aa9f3e7be2284fb Mon Sep 17 00:00:00 2001 From: Caleb Helbling Date: Thu, 15 Aug 2024 16:33:48 -0400 Subject: [PATCH 09/14] Finished clo functions in retrowrite demo --- examples/paper_eval/retrowrite.py | 149 +++++++++++++++++++++++++++++- 1 file changed, 146 insertions(+), 3 deletions(-) diff --git a/examples/paper_eval/retrowrite.py b/examples/paper_eval/retrowrite.py index b8db6ab..6c4344d 100644 --- a/examples/paper_eval/retrowrite.py +++ b/examples/paper_eval/retrowrite.py @@ -28,6 +28,9 @@ def afl_hook(proj: Project): afl_hook(proj_prepatch) afl_hook(proj_postpatch) +# These are global variables that must be equal after execution. Tuples are (pre_patch_name, post_patch_name, sym_size) +global_vars_eq = [('file_name', 'file_name_5d99e8', 8), ('ignore_EPIPE', 'ignore_EPIPE_5d99e0', 1)] + ########### # base64_ ########### @@ -159,6 +162,99 @@ def run(proj: Project): return (run, {'in_buff_contents': in_buff_contents, 'in_len': in_len}) +########### +# clo +########### + +def clone_quoting_options(): + o_contents = claripy.BVS('o_contents', 0x38) + o = cozy.primitives.sym_ptr(proj_prepatch.arch, 'o') + + def run(proj: Project): + proj.add_prototype('clone_quoting_options', 'void *clone_quoting_options(void *)') + sess = proj.session('clone_quoting_options') + + o_addr = sess.malloc(0x38) + sess.store(o_addr, o_contents, endness=proj.arch.memory_endness) + + sess.add_constraints(cozy.primitives.sym_ptr_constraints(o, o_addr, can_be_null=True)) + + return sess.run([o]) + + return (run, {'o': o, 'o_contents': o_contents}) + +#def clone_quoting_options(proj: Project, prev_underconstrained_state=None): +# sess = proj.session('clone_quoting_options', underconstrained_execution=True, underconstrained_initial_state=prev_underconstrained_state) +# return sess.run() + +def close_stdout(): + def run(proj: Project): + proj.add_prototype('close_stdout', 'void close_stdout()') + sess = proj.session('close_stdout') + return sess.run([]) + return (run, dict()) + +def close_stdout_set_file_name(): + file = cozy.primitives.sym_ptr(proj_prepatch.arch, 'file') + + def run(proj: Project): + proj.add_prototype('close_stdout_set_file_name', 'void close_stdout_set_file_name(char *)') + sess = proj.session('close_stdout_set_file_name') + return sess.run([file]) + + return (run, {'file': file}) + +def close_stdout_set_ignore_EPIPE(): + ignore = claripy.BVS('ignore', 8) + + def run(proj: Project): + proj.add_prototype('close_stdout_set_ignore_EPIPE', 'void close_stdout_set_ignore_EPIPE(unsigned char)') + sess = proj.session('close_stdout_set_ignore_EPIPE') + return sess.run([ignore]) + + return (run, {'ignore': ignore}) + +""" +def file_symbols(): + return { + '_flags': claripy.BVS('_flags', 0x4 * 8), + '_IO_read_ptr': claripy.BVS('_IO_read_ptr', 64), + '_IO_read_end': claripy.BVS('_IO_read_end', 64), + '_IO_read_base': claripy.BVS('_IO_read_base', 64), + '_IO_write_base': claripy.BVS('_IO_write_base', 64), + '_IO_write_ptr': claripy.BVS('_IO_write_ptr', 64), + '_IO_write_end': claripy.BVS('_IO_write_end', 64), + '_IO_buf_base': claripy.BVS('_IO_buf_base', 64), + '_IO_buf_end': claripy.BVS('_IO_buf_end', 64), + '_IO_save_base': claripy.BVS('_IO_save_base', 64), + '_IO_backup_base': claripy.BVS('_IO_backup_base', 64), + '_IO_save_end': claripy.BVS('_IO_save_end', 64), + '_markers': claripy.BVS('_markers', 64), + '_chain': claripy.BVS('_chain', 64), + '_fileno': claripy.BVS('_fileno', 0x4 * 8), + '_flags2': claripy.BVS('_flags2', 0x4 * 8), + '_old_offset': claripy.BVS('_old_offset', 64), + '_cur_column': claripy.BVS('_cur_column', 0x2 * 8), + '_vtable_offset': None + } + +def close_stream(): + file_struct_contents = claripy.BVS('file_struct_contents', 0xd8) + + def run(proj: Project): + proj.add_prototype('close_stream', 'int close_stream(void *)') + sess = proj.session('close_stream') + file_ptr = sess.malloc(0xd8, 'file_struct_contents') + sess.store(file_ptr, file_struct_contents) + return sess.run([file_ptr]) + + return (run, {'file_struct_contents': file_struct_contents}) +""" + +def close_stream_underconstrained(proj: Project, prev_underconstrained_state=None): + sess = proj.session('close_stream', underconstrained_execution=True, underconstrained_initial_state=prev_underconstrained_state) + return sess.run([]) + def verify_equivalence(comp: Comparison): for pair in comp.pairs.values(): assert(len(pair.mem_diff) == 0) @@ -186,15 +282,55 @@ def apply_callee_saved(comp: Comparison): for reg_name in to_remove: del pair.reg_diff[reg_name] - -def run_and_verify(f, visualize=False): +def global_var_eq_condition(pair: cozy.analysis.CompatiblePair): + ret = None + for (sym_name_left, sym_name_right, size) in global_vars_eq: + pre = pair.state_left.state.memory.load(proj_prepatch.find_symbol_addr(sym_name_left), size) + post = pair.state_right.state.memory.load(proj_postpatch.find_symbol_addr(sym_name_right), size) + if ret is None: + ret = (pre == post) + else: + ret = ret & (pre == post) + return ret + +def apply_global_var_eq(comp: Comparison): + # Foreach global variable + for (sym_name_left, sym_name_right, size) in global_vars_eq: + # Get the prepatch and postpatch addrs + addr_pre = proj_prepatch.find_symbol_addr(sym_name_left) + addr_post = proj_postpatch.find_symbol_addr(sym_name_right) + # Remove these addrs from the mem_diff of all the pairs + for pair in comp.pairs.values(): + to_remove = [] + for rng in pair.mem_diff: + if rng.start == addr_pre or rng.start == addr_post: + to_remove.append(rng) + for rng in to_remove: + del pair.mem_diff[rng] + +def run_and_verify(f, visualize=False, verification_condition=None): (run, args) = f() pre_results = run(proj_prepatch) post_results = run(proj_postpatch) comparison = cozy.analysis.Comparison(pre_results, post_results) apply_callee_saved(comparison) + assert(len(comparison.verify(global_var_eq_condition)) == 0) + apply_global_var_eq(comparison) + if verification_condition is not None: + assert(len(comparison.verify(verification_condition)) == 0) + verify_equivalence(comparison) + if visualize: + cozy.execution_graph.visualize_comparison(proj_prepatch, proj_postpatch, pre_results, post_results, comparison, + args=args, num_examples=2, open_browser=True, include_actions=True) + +def run_and_verify_underconstrained(run, visualize=False): + pre_results = run(proj_prepatch) + post_results = run(proj_postpatch, prev_underconstrained_state=pre_results.underconstrained_machine_state) + comparison = cozy.analysis.Comparison(pre_results, post_results) + apply_callee_saved(comparison) verify_equivalence(comparison) if visualize: + args = post_results.underconstrained_machine_state.args cozy.execution_graph.visualize_comparison(proj_prepatch, proj_postpatch, pre_results, post_results, comparison, args=args, num_examples=2, open_browser=True, include_actions=True) @@ -202,4 +338,11 @@ def run_and_verify(f, visualize=False): run_and_verify(base64_decode_ctx) run_and_verify(base64_decode_ctx_init) run_and_verify(base64_encode_alloc) -run_and_verify(base64_encode) \ No newline at end of file +run_and_verify(base64_encode) + +run_and_verify(clone_quoting_options) +run_and_verify(close_stdout) +run_and_verify(close_stdout_set_file_name) +run_and_verify(close_stdout_set_ignore_EPIPE) +run_and_verify(close_stream) +run_and_verify_underconstrained(close_stream_underconstrained) \ No newline at end of file From 7565d15622140111413a1075831638693f72019e Mon Sep 17 00:00:00 2001 From: Caleb Helbling Date: Thu, 15 Aug 2024 16:35:04 -0400 Subject: [PATCH 10/14] Minor fixes for retrowrite --- examples/paper_eval/retrowrite.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/paper_eval/retrowrite.py b/examples/paper_eval/retrowrite.py index 6c4344d..91d1c4d 100644 --- a/examples/paper_eval/retrowrite.py +++ b/examples/paper_eval/retrowrite.py @@ -328,6 +328,8 @@ def run_and_verify_underconstrained(run, visualize=False): post_results = run(proj_postpatch, prev_underconstrained_state=pre_results.underconstrained_machine_state) comparison = cozy.analysis.Comparison(pre_results, post_results) apply_callee_saved(comparison) + assert(len(comparison.verify(global_var_eq_condition)) == 0) + apply_global_var_eq(comparison) verify_equivalence(comparison) if visualize: args = post_results.underconstrained_machine_state.args @@ -344,5 +346,4 @@ def run_and_verify_underconstrained(run, visualize=False): run_and_verify(close_stdout) run_and_verify(close_stdout_set_file_name) run_and_verify(close_stdout_set_ignore_EPIPE) -run_and_verify(close_stream) run_and_verify_underconstrained(close_stream_underconstrained) \ No newline at end of file From bce082eb2a0872e29caf4c4557fea7b2c1720855 Mon Sep 17 00:00:00 2001 From: Caleb Helbling Date: Fri, 16 Aug 2024 16:51:54 -0400 Subject: [PATCH 11/14] Enabled execution of __afl_maybe_log in retrowrite example. Added filtering of return register based on function return type. For instance if function return type is void, rax, eax, ax, al will not be checked for differences. If function return type is int32_t, rax will not be checked for difference, but eax, ax, al will be. Added decode_4 test case. --- cozy/project.py | 7 + examples/paper_eval/retrowrite.py | 231 +++++++++++++++++++++++------- 2 files changed, 183 insertions(+), 55 deletions(-) diff --git a/cozy/project.py b/cozy/project.py index ea024f6..e242596 100644 --- a/cozy/project.py +++ b/cozy/project.py @@ -44,6 +44,13 @@ def object_ranges(self, obj_filter: Callable[[Backend], bool] | None=None) -> li obj_filter = lambda x: True return [range(obj.min_addr, obj.max_addr + 1) for obj in self.angr_proj.loader.all_objects if obj_filter(obj)] + def try_find_symbol_addr(self, sym_name: str) -> int | None: + sym = self.angr_proj.loader.find_symbol(sym_name) + if sym is not None: + return sym.rebased_addr + else: + return None + def find_symbol_addr(self, sym_name: str) -> int: """ Finds the rebased addressed of a symbol. Functions are the most common symbol type. diff --git a/examples/paper_eval/retrowrite.py b/examples/paper_eval/retrowrite.py index 91d1c4d..46dc23f 100644 --- a/examples/paper_eval/retrowrite.py +++ b/examples/paper_eval/retrowrite.py @@ -4,6 +4,7 @@ import cozy from cozy.analysis import Comparison from cozy.project import Project +from cozy.session import Session proj_prepatch = Project('test_programs/paper_eval/retrowrite/base64-gcc') proj_postpatch = Project('test_programs/paper_eval/retrowrite/base64-retrowrite') @@ -16,20 +17,31 @@ class lava_get(angr.SimProcedure): def run(self, bug_num): return 0x0 -class __afl_maybe_log(angr.SimProcedure): - def run(self): - return 0x0 +# The __afl_maybe_log function inserted by retrowrite requires __afl_arena_ptr to be set to some value +# if it is not set, the program will fork in preparation for fuzzing. Here we simulate the fact that +# AFL is already initialized, skipping the forking process. +afl_arena_range = range(0xde00000000, 0xdf00000000) + +class fread_unlocked(angr.SimProcedure): + def run(self, ptr, size, n, stream): + raise NotImplementedError() def afl_hook(proj: Project): proj.hook_symbol('lava_set', lava_set) proj.hook_symbol('lava_get', lava_get) - proj.hook_symbol('__afl_maybe_log', __afl_maybe_log) afl_hook(proj_prepatch) afl_hook(proj_postpatch) +def setup_afl(proj: Project, sess: Session): + try: + addr = proj.find_symbol_addr("__afl_area_ptr") + sess.store(addr, claripy.BVV(afl_arena_range.start, 64), endness=proj.arch.memory_endness) + except RuntimeError: + pass + # These are global variables that must be equal after execution. Tuples are (pre_patch_name, post_patch_name, sym_size) -global_vars_eq = [('file_name', 'file_name_5d99e8', 8), ('ignore_EPIPE', 'ignore_EPIPE_5d99e0', 1)] +global_vars_eq = [('file_name', 'file_name_5d99e8', 8), ('ignore_EPIPE', 'ignore_EPIPE_5d99e0', 1), (None, '__afl_prev_loc', 8), (None, '__afl_area_ptr', 8)] ########### # base64_ @@ -46,8 +58,8 @@ def base64_decode_alloc_ctx(): def run(proj: Project): proj.add_prototype('base64_decode_alloc_ctx', 'int base64_decode_ctx(void *, char *, unsigned long, char *, unsigned long *)') - sess = proj.session('base64_decode_alloc_ctx') + setup_afl(proj, sess) ctx = sess.malloc(8) @@ -77,6 +89,8 @@ def run(proj: Project): proj.add_prototype('base64_decode_ctx', 'int base64_decode_ctx(void *, char *, unsigned long, char *, unsigned long *)') sess = proj.session('base64_decode_ctx') + setup_afl(proj, sess) + ctx = sess.malloc(8) in_buff = sess.malloc(IN_LEN, name='in') @@ -99,6 +113,7 @@ def base64_decode_ctx_init(): def run(proj: Project): proj.add_prototype('base64_decode_ctx_init', 'void base64_decode_ctx_init(void *)') sess = proj.session('base64_decode_ctx_init') + setup_afl(proj, sess) ctx = sess.malloc(8) sess.store(ctx, ctx_contents, endness=proj.arch.memory_endness) @@ -119,6 +134,7 @@ def base64_encode_alloc(): def run(proj: Project): proj.add_prototype('base64_encode_alloc', 'unsigned long base64_encode_alloc(char *in, unsigned long, char **)') sess = proj.session('base64_encode_alloc') + setup_afl(proj, sess) in_buff = sess.malloc(MAX_IN_LEN, name='in') sess.add_constraints(in_len.SGE(0), in_len.SLE(MAX_IN_LEN)) @@ -148,6 +164,7 @@ def calc_out_len(in_len): def run(proj: Project): proj.add_prototype('base64_encode', 'void base64_encode(char *in, unsigned long, char *, unsigned long)') sess = proj.session('base64_encode') + setup_afl(proj, sess) sess.add_constraints(out_len >= in_len) @@ -173,6 +190,7 @@ def clone_quoting_options(): def run(proj: Project): proj.add_prototype('clone_quoting_options', 'void *clone_quoting_options(void *)') sess = proj.session('clone_quoting_options') + setup_afl(proj, sess) o_addr = sess.malloc(0x38) sess.store(o_addr, o_contents, endness=proj.arch.memory_endness) @@ -191,6 +209,7 @@ def close_stdout(): def run(proj: Project): proj.add_prototype('close_stdout', 'void close_stdout()') sess = proj.session('close_stdout') + setup_afl(proj, sess) return sess.run([]) return (run, dict()) @@ -200,6 +219,7 @@ def close_stdout_set_file_name(): def run(proj: Project): proj.add_prototype('close_stdout_set_file_name', 'void close_stdout_set_file_name(char *)') sess = proj.session('close_stdout_set_file_name') + setup_afl(proj, sess) return sess.run([file]) return (run, {'file': file}) @@ -210,6 +230,7 @@ def close_stdout_set_ignore_EPIPE(): def run(proj: Project): proj.add_prototype('close_stdout_set_ignore_EPIPE', 'void close_stdout_set_ignore_EPIPE(unsigned char)') sess = proj.session('close_stdout_set_ignore_EPIPE') + setup_afl(proj, sess) return sess.run([ignore]) return (run, {'ignore': ignore}) @@ -253,8 +274,52 @@ def run(proj: Project): def close_stream_underconstrained(proj: Project, prev_underconstrained_state=None): sess = proj.session('close_stream', underconstrained_execution=True, underconstrained_initial_state=prev_underconstrained_state) + setup_afl(proj, sess) return sess.run([]) +########### +# d +########### + +def decode_4(): + MAX_IN_LEN = 4 + MAX_OUT_LEFT = 8 + + in_buff_contents = [] + for i in range(MAX_IN_LEN): + char_sym = claripy.BVS('in_buff_char_{}'.format(i), 8) + in_buff_contents.append(char_sym) + + inlen = claripy.BVS('inlen', 64) + outleft_sym = claripy.BVS('outleft_sym', 64) + + def run(proj: Project): + proj.add_prototype('decode_4', 'unsigned char decode_4(char *, unsigned long, char **, unsigned long *)') + sess = proj.session('decode_4') + setup_afl(proj, sess) + in_buff = sess.malloc(MAX_IN_LEN) + + for (i, char_sym) in enumerate(in_buff_contents): + sess.store(in_buff + i, char_sym, endness=proj.arch.memory_endness) + + out_buff = sess.malloc(MAX_OUT_LEFT) + out_buff_ptr = sess.malloc(8) + sess.store(out_buff_ptr, claripy.BVV(out_buff, 64), endness=proj.arch.memory_endness) + + outleft = sess.malloc(8) + sess.store(outleft, outleft_sym, endness=proj.arch.memory_endness) + + return sess.run([in_buff, inlen, out_buff_ptr, outleft]) + + return (run, {'in_buff_contents': in_buff_contents, 'inlen': inlen, 'outleft': outleft_sym}) + +def deregister_tm_clones(): + def run(proj: Project): + proj.add_prototype('deregister_tm_clones', 'void deregister_tm_clones()') + sess = proj.session('deregister_tm_clones') + return sess.run([]) + return (run, dict()) + def verify_equivalence(comp: Comparison): for pair in comp.pairs.values(): assert(len(pair.mem_diff) == 0) @@ -272,12 +337,12 @@ def verify_equivalence(comp: Comparison): ] ) -def apply_callee_saved(comp: Comparison): +def ignore_registers(comp: Comparison, regs: frozenset[str]): # Ignore all registers that are not callee saved for pair in comp.pairs.values(): to_remove = [] for reg_name in pair.reg_diff.keys(): - if reg_name not in callee_saved: + if reg_name not in regs: to_remove.append(reg_name) for reg_name in to_remove: del pair.reg_diff[reg_name] @@ -285,20 +350,27 @@ def apply_callee_saved(comp: Comparison): def global_var_eq_condition(pair: cozy.analysis.CompatiblePair): ret = None for (sym_name_left, sym_name_right, size) in global_vars_eq: - pre = pair.state_left.state.memory.load(proj_prepatch.find_symbol_addr(sym_name_left), size) - post = pair.state_right.state.memory.load(proj_postpatch.find_symbol_addr(sym_name_right), size) - if ret is None: - ret = (pre == post) - else: - ret = ret & (pre == post) + if sym_name_left is not None and sym_name_right is not None: + pre = pair.state_left.state.memory.load(proj_prepatch.find_symbol_addr(sym_name_left), size) + post = pair.state_right.state.memory.load(proj_postpatch.find_symbol_addr(sym_name_right), size) + if ret is None: + ret = (pre == post) + else: + ret = ret & (pre == post) return ret def apply_global_var_eq(comp: Comparison): # Foreach global variable for (sym_name_left, sym_name_right, size) in global_vars_eq: # Get the prepatch and postpatch addrs - addr_pre = proj_prepatch.find_symbol_addr(sym_name_left) - addr_post = proj_postpatch.find_symbol_addr(sym_name_right) + if sym_name_left is not None: + addr_pre = proj_prepatch.find_symbol_addr(sym_name_left) + else: + addr_pre = None + if sym_name_right is not None: + addr_post = proj_postpatch.find_symbol_addr(sym_name_right) + else: + addr_post = None # Remove these addrs from the mem_diff of all the pairs for pair in comp.pairs.values(): to_remove = [] @@ -308,42 +380,91 @@ def apply_global_var_eq(comp: Comparison): for rng in to_remove: del pair.mem_diff[rng] -def run_and_verify(f, visualize=False, verification_condition=None): - (run, args) = f() - pre_results = run(proj_prepatch) - post_results = run(proj_postpatch) - comparison = cozy.analysis.Comparison(pre_results, post_results) - apply_callee_saved(comparison) - assert(len(comparison.verify(global_var_eq_condition)) == 0) - apply_global_var_eq(comparison) - if verification_condition is not None: - assert(len(comparison.verify(verification_condition)) == 0) - verify_equivalence(comparison) - if visualize: - cozy.execution_graph.visualize_comparison(proj_prepatch, proj_postpatch, pre_results, post_results, comparison, - args=args, num_examples=2, open_browser=True, include_actions=True) - -def run_and_verify_underconstrained(run, visualize=False): - pre_results = run(proj_prepatch) - post_results = run(proj_postpatch, prev_underconstrained_state=pre_results.underconstrained_machine_state) - comparison = cozy.analysis.Comparison(pre_results, post_results) - apply_callee_saved(comparison) - assert(len(comparison.verify(global_var_eq_condition)) == 0) - apply_global_var_eq(comparison) - verify_equivalence(comparison) - if visualize: - args = post_results.underconstrained_machine_state.args - cozy.execution_graph.visualize_comparison(proj_prepatch, proj_postpatch, pre_results, post_results, comparison, - args=args, num_examples=2, open_browser=True, include_actions=True) - -run_and_verify(base64_decode_alloc_ctx) -run_and_verify(base64_decode_ctx) -run_and_verify(base64_decode_ctx_init) -run_and_verify(base64_encode_alloc) -run_and_verify(base64_encode) - -run_and_verify(clone_quoting_options) -run_and_verify(close_stdout) -run_and_verify(close_stdout_set_file_name) -run_and_verify(close_stdout_set_ignore_EPIPE) -run_and_verify_underconstrained(close_stream_underconstrained) \ No newline at end of file +def apply_afl_arena_filter(comp: Comparison): + for pair in comp.pairs.values(): + to_remove = [] + for rng in pair.mem_diff: + if rng.start in afl_arena_range: + to_remove.append(rng) + for rng in to_remove: + del pair.mem_diff[rng] + +failures = [] + +def ignore_return(comparison: Comparison, return_size: int): + if return_size == 32: + # uint32_t return + ignore_registers(comparison, frozenset(['rax'])) + if return_size == 16: + # uint16_t return + ignore_registers(comparison, frozenset(['rax', 'eax'])) + if return_size == 8: + # uint8_t return + ignore_registers(comparison, frozenset(['rax', 'eax', 'ax'])) + if return_size == 0: + # void return + ignore_registers(comparison, frozenset(['rax', 'eax', 'ax', 'al'])) + +def run_and_verify(name: str, f, return_size=64, visualize=False, verification_condition=None): + try: + (run, args) = f() + pre_results = run(proj_prepatch) + post_results = run(proj_postpatch) + comparison = cozy.analysis.Comparison(pre_results, post_results) + ignore_registers(comparison, callee_saved) + ignore_return(comparison, return_size) + assert(len(comparison.verify(global_var_eq_condition)) == 0) + apply_global_var_eq(comparison) + apply_afl_arena_filter(comparison) + if verification_condition is not None: + assert(len(comparison.verify(verification_condition)) == 0) + if visualize: + cozy.execution_graph.visualize_comparison(proj_prepatch, proj_postpatch, pre_results, post_results, comparison, + args=args, num_examples=2, open_browser=True, include_actions=True) + else: + verify_equivalence(comparison) + except AssertionError: + print("FAILED: " + name) + failures.append(name) + return + print("PASSED: " + name) + +def run_and_verify_underconstrained(name: str, run, return_size=64, visualize=False): + try: + pre_results = run(proj_prepatch) + post_results = run(proj_postpatch, prev_underconstrained_state=pre_results.underconstrained_machine_state) + comparison = cozy.analysis.Comparison(pre_results, post_results) + ignore_registers(comparison, callee_saved) + ignore_return(comparison, return_size) + assert(len(comparison.verify(global_var_eq_condition)) == 0) + apply_global_var_eq(comparison) + apply_afl_arena_filter(comparison) + if visualize: + args = post_results.underconstrained_machine_state.args + cozy.execution_graph.visualize_comparison(proj_prepatch, proj_postpatch, pre_results, post_results, comparison, + args=args, num_examples=2, open_browser=True, include_actions=False) + else: + verify_equivalence(comparison) + except AssertionError: + print("FAILED: " + name) + failures.append(name) + return + print("PASSED: " + name) + + +run_and_verify("base64_decode_alloc_ctx", base64_decode_alloc_ctx, return_size=8) +run_and_verify("base64_decode_ctx", base64_decode_ctx, return_size=8) +run_and_verify("base64_decode_ctx_init", base64_decode_ctx_init, return_size=0) +run_and_verify("base64_encode_alloc", base64_encode_alloc, return_size=64) +run_and_verify("base64_encode", base64_encode, return_size=0) + +run_and_verify("clone_quoting_options", clone_quoting_options, return_size=64) +run_and_verify("close_stdout", close_stdout, return_size=0) +run_and_verify("close_stdout_set_file_name", close_stdout_set_file_name, return_size=0) +run_and_verify("close_stdout_set_ignore_EPIPE", close_stdout_set_ignore_EPIPE, return_size=0) +run_and_verify_underconstrained("close_stream", close_stream_underconstrained, return_size=32) + +run_and_verify("decode_4", decode_4, return_size=8) +run_and_verify("deregister_tm_clones", deregister_tm_clones, return_size=0) + +print("Failed tests: ", failures) \ No newline at end of file From 805faa80d13fcef1ca34dddef4e423bc84f36bbc Mon Sep 17 00:00:00 2001 From: Caleb Helbling Date: Fri, 16 Aug 2024 17:20:33 -0400 Subject: [PATCH 12/14] Added a couple more harnesses for retrowrite --- examples/paper_eval/retrowrite.py | 60 ++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/examples/paper_eval/retrowrite.py b/examples/paper_eval/retrowrite.py index 46dc23f..3a7f557 100644 --- a/examples/paper_eval/retrowrite.py +++ b/examples/paper_eval/retrowrite.py @@ -320,6 +320,59 @@ def run(proj: Project): return sess.run([]) return (run, dict()) +def do_decode(): + raise NotImplementedError() + +def do_encode(): + raise NotImplementedError() + +########### +# e +########### + +def emit_bug_reporting_address(): + raise NotImplementedError + +########### +# f +########### + +def fadvise_underconstrained(proj: Project, prev_underconstrained_state=None): + sess = proj.session('fadvise', underconstrained_execution=True, + underconstrained_initial_state=prev_underconstrained_state) + setup_afl(proj, sess) + return sess.run() + +########### +# get_ +########### + +def get_quoting_style_underconstrained(proj: Project, prev_underconstrained_state=None): + sess = proj.session('get_quoting_style', underconstrained_execution=True, + underconstrained_initial_state=prev_underconstrained_state) + setup_afl(proj, sess) + o_buff = sess.malloc(0x38) + o = cozy.primitives.sym_ptr(proj.arch, 'o') + sess.add_constraints(cozy.primitives.sym_ptr_constraints(o, o_buff)) + return sess.run([o]) + +def gettext_quote(): + raise NotImplementedError() + +########### +# i +########### + +def isbase64_underconstrained(proj: Project, prev_underconstrained_state=None): + sess = proj.session('isbase64', underconstrained_execution=True, + underconstrained_initial_state=prev_underconstrained_state) + setup_afl(proj, sess) + return sess.run() + +######################## +# END FUNCTION HARNESSES +######################## + def verify_equivalence(comp: Comparison): for pair in comp.pairs.values(): assert(len(pair.mem_diff) == 0) @@ -451,7 +504,6 @@ def run_and_verify_underconstrained(name: str, run, return_size=64, visualize=Fa return print("PASSED: " + name) - run_and_verify("base64_decode_alloc_ctx", base64_decode_alloc_ctx, return_size=8) run_and_verify("base64_decode_ctx", base64_decode_ctx, return_size=8) run_and_verify("base64_decode_ctx_init", base64_decode_ctx_init, return_size=0) @@ -467,4 +519,10 @@ def run_and_verify_underconstrained(name: str, run, return_size=64, visualize=Fa run_and_verify("decode_4", decode_4, return_size=8) run_and_verify("deregister_tm_clones", deregister_tm_clones, return_size=0) +run_and_verify_underconstrained('fadvise', fadvise_underconstrained, return_size=0) + +run_and_verify_underconstrained('get_quoting_style', get_quoting_style_underconstrained, return_size=32) + +run_and_verify_underconstrained('isbase64', isbase64_underconstrained, return_size=8) + print("Failed tests: ", failures) \ No newline at end of file From 7a36cc491b8c134b98a99d9396b2c4e156d190e2 Mon Sep 17 00:00:00 2001 From: Caleb Helbling Date: Wed, 11 Sep 2024 11:42:05 -0400 Subject: [PATCH 13/14] Added timing information and harness for paper evaluation --- examples/paper_eval/retrowrite.py | 155 +++++++++++++++++++++++------- paper_eval.sh | 16 +++ 2 files changed, 136 insertions(+), 35 deletions(-) create mode 100755 paper_eval.sh diff --git a/examples/paper_eval/retrowrite.py b/examples/paper_eval/retrowrite.py index 3a7f557..5adca8f 100644 --- a/examples/paper_eval/retrowrite.py +++ b/examples/paper_eval/retrowrite.py @@ -1,10 +1,13 @@ +import sys + import angr import claripy import cozy from cozy.analysis import Comparison from cozy.project import Project -from cozy.session import Session +from cozy.session import Session, RunResult +import time proj_prepatch = Project('test_programs/paper_eval/retrowrite/base64-gcc') proj_postpatch = Project('test_programs/paper_eval/retrowrite/base64-retrowrite') @@ -458,12 +461,34 @@ def ignore_return(comparison: Comparison, return_size: int): # void return ignore_registers(comparison, frozenset(['rax', 'eax', 'ax', 'al'])) +def write_results(name, pre_time, post_time, comp_time, num_pre_states, num_post_states, passed): + with open("eval_results.csv", "a") as f: + f.write(f"\n{name},{pre_time},{post_time},{comp_time},{num_pre_states},{num_post_states},{passed}") + +def num_terminal_states(result: RunResult): + return len(result.deadended) + len(result.errored) + len(result.spinning) + len(result.postconditions_failed) + def run_and_verify(name: str, f, return_size=64, visualize=False, verification_condition=None): + (run, args) = f() + + pre_time_start = time.time() + pre_results: RunResult = run(proj_prepatch) + pre_time_stop = time.time() + + post_time_start = time.time() + post_results: RunResult = run(proj_postpatch) + post_time_stop = time.time() + + comp_time_start = time.time() + comparison = cozy.analysis.Comparison(pre_results, post_results) + comp_time_stop = time.time() + + num_pre_states = num_terminal_states(pre_results) + num_post_states = num_terminal_states(post_results) + + passed = True + try: - (run, args) = f() - pre_results = run(proj_prepatch) - post_results = run(proj_postpatch) - comparison = cozy.analysis.Comparison(pre_results, post_results) ignore_registers(comparison, callee_saved) ignore_return(comparison, return_size) assert(len(comparison.verify(global_var_eq_condition)) == 0) @@ -477,16 +502,36 @@ def run_and_verify(name: str, f, return_size=64, visualize=False, verification_c else: verify_equivalence(comparison) except AssertionError: - print("FAILED: " + name) + passed = False failures.append(name) - return - print("PASSED: " + name) + + if passed: + print("PASSED: " + name) + else: + print("FAILED: " + name) + + write_results(name, pre_time_stop - pre_time_start, post_time_stop - post_time_start, comp_time_stop - comp_time_start, + num_pre_states, num_post_states, passed) def run_and_verify_underconstrained(name: str, run, return_size=64, visualize=False): + pre_time_start = time.time() + pre_results = run(proj_prepatch) + pre_time_stop = time.time() + + post_time_start = time.time() + post_results = run(proj_postpatch, prev_underconstrained_state=pre_results.underconstrained_machine_state) + post_time_stop = time.time() + + comp_time_start = time.time() + comparison = cozy.analysis.Comparison(pre_results, post_results) + comp_time_stop = time.time() + + num_pre_states = num_terminal_states(pre_results) + num_post_states = num_terminal_states(post_results) + + passed = True + try: - pre_results = run(proj_prepatch) - post_results = run(proj_postpatch, prev_underconstrained_state=pre_results.underconstrained_machine_state) - comparison = cozy.analysis.Comparison(pre_results, post_results) ignore_registers(comparison, callee_saved) ignore_return(comparison, return_size) assert(len(comparison.verify(global_var_eq_condition)) == 0) @@ -499,30 +544,70 @@ def run_and_verify_underconstrained(name: str, run, return_size=64, visualize=Fa else: verify_equivalence(comparison) except AssertionError: - print("FAILED: " + name) + passed = False failures.append(name) - return - print("PASSED: " + name) -run_and_verify("base64_decode_alloc_ctx", base64_decode_alloc_ctx, return_size=8) -run_and_verify("base64_decode_ctx", base64_decode_ctx, return_size=8) -run_and_verify("base64_decode_ctx_init", base64_decode_ctx_init, return_size=0) -run_and_verify("base64_encode_alloc", base64_encode_alloc, return_size=64) -run_and_verify("base64_encode", base64_encode, return_size=0) - -run_and_verify("clone_quoting_options", clone_quoting_options, return_size=64) -run_and_verify("close_stdout", close_stdout, return_size=0) -run_and_verify("close_stdout_set_file_name", close_stdout_set_file_name, return_size=0) -run_and_verify("close_stdout_set_ignore_EPIPE", close_stdout_set_ignore_EPIPE, return_size=0) -run_and_verify_underconstrained("close_stream", close_stream_underconstrained, return_size=32) - -run_and_verify("decode_4", decode_4, return_size=8) -run_and_verify("deregister_tm_clones", deregister_tm_clones, return_size=0) - -run_and_verify_underconstrained('fadvise', fadvise_underconstrained, return_size=0) - -run_and_verify_underconstrained('get_quoting_style', get_quoting_style_underconstrained, return_size=32) - -run_and_verify_underconstrained('isbase64', isbase64_underconstrained, return_size=8) + if passed: + print("PASSED: " + name) + else: + print("FAILED: " + name) -print("Failed tests: ", failures) \ No newline at end of file + write_results(name, pre_time_stop - pre_time_start, post_time_stop - post_time_start, + comp_time_stop - comp_time_start, + num_pre_states, num_post_states, passed) + +def do_test(function_name: str): + match function_name: + case "base64_decode_alloc_ctx": + run_and_verify("base64_decode_alloc_ctx", base64_decode_alloc_ctx, return_size=8) + case "base64_decode_ctx": + run_and_verify("base64_decode_ctx", base64_decode_ctx, return_size=8) + case "base64_decode_ctx_init": + run_and_verify("base64_decode_ctx_init", base64_decode_ctx_init, return_size=0) + case "base64_encode_alloc": + run_and_verify("base64_encode_alloc", base64_encode_alloc, return_size=64) + case "base64_encode": + run_and_verify("base64_encode", base64_encode, return_size=0) + case "clone_quoting_options": + run_and_verify("clone_quoting_options", clone_quoting_options, return_size=64) + case "close_stdout": + run_and_verify("close_stdout", close_stdout, return_size=0) + case "close_stdout_set_file_name": + run_and_verify("close_stdout_set_file_name", close_stdout_set_file_name, return_size=0) + case "close_stdout_set_ignore_EPIPE": + run_and_verify("close_stdout_set_ignore_EPIPE", close_stdout_set_ignore_EPIPE, return_size=0) + case "close_stream": + run_and_verify_underconstrained("close_stream", close_stream_underconstrained, return_size=32) + case "decode_4": + run_and_verify("decode_4", decode_4, return_size=8) + case "deregister_tm_clones": + run_and_verify("deregister_tm_clones", deregister_tm_clones, return_size=0) + case "fadvise": + run_and_verify_underconstrained('fadvise', fadvise_underconstrained, return_size=0) + case "get_quoting_style": + run_and_verify_underconstrained('get_quoting_style', get_quoting_style_underconstrained, return_size=32) + case "isbase64": + run_and_verify_underconstrained('isbase64', isbase64_underconstrained, return_size=8) + +if __name__ == "__main__": + if len(sys.argv) >= 2: + function_name = sys.argv[1] + do_test(function_name) + else: + do_test("base64_decode_alloc_ctx") + do_test("base64_decode_ctx") + do_test("base64_decode_ctx_init") + do_test("base64_encode_alloc") + do_test("base64_encode") + do_test("clone_quoting_options") + do_test("close_stdout") + do_test("close_stdout_set_file_name") + do_test("close_stdout_set_ignore_EPIPE") + do_test("close_stream") + do_test("decode_4") + do_test("deregister_tm_clones") + do_test("fadvise") + do_test("get_quoting_style") + do_test("isbase64") + + print("Failed tests: ", failures) \ No newline at end of file diff --git a/paper_eval.sh b/paper_eval.sh new file mode 100755 index 0000000..5233de5 --- /dev/null +++ b/paper_eval.sh @@ -0,0 +1,16 @@ +echo "name,pre_time(s),post_time(s),comp_time(s),num_pre_states,num_post_states,passed" > "eval_results.csv" +python3 examples/paper_eval/retrowrite.py base64_decode_alloc_ctx +python3 examples/paper_eval/retrowrite.py base64_decode_ctx +python3 examples/paper_eval/retrowrite.py base64_decode_ctx_init +python3 examples/paper_eval/retrowrite.py base64_encode_alloc +python3 examples/paper_eval/retrowrite.py base64_encode +python3 examples/paper_eval/retrowrite.py clone_quoting_options +python3 examples/paper_eval/retrowrite.py close_stdout +python3 examples/paper_eval/retrowrite.py close_stdout_set_file_name +python3 examples/paper_eval/retrowrite.py close_stdout_set_ignore_EPIPE +python3 examples/paper_eval/retrowrite.py close_stream +python3 examples/paper_eval/retrowrite.py decode_4 +python3 examples/paper_eval/retrowrite.py deregister_tm_clones +python3 examples/paper_eval/retrowrite.py fadvise +python3 examples/paper_eval/retrowrite.py get_quoting_style +python3 examples/paper_eval/retrowrite.py isbase64 \ No newline at end of file From 26a3cbb7ab3333932b71a1daab924047d070fff8 Mon Sep 17 00:00:00 2001 From: Caleb Helbling Date: Mon, 23 Sep 2024 11:29:59 -0400 Subject: [PATCH 14/14] Added doc on underconstrained execution --- cozy/underconstrained.py | 15 ++++++++-- docs/source/index.rst | 1 + docs/source/underconstrained.rst | 47 ++++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 2 deletions(-) create mode 100644 docs/source/underconstrained.rst diff --git a/cozy/underconstrained.py b/cozy/underconstrained.py index b509053..2586ecc 100644 --- a/cozy/underconstrained.py +++ b/cozy/underconstrained.py @@ -7,6 +7,13 @@ class SimConcretizationStrategyUnderconstrained(angr.concretization_strategies.SimConcretizationStrategyNorepeatsRange): + """ + This class extends SimConcretizationStrategyNorepeatsRange, making it suitable for use with underconstrained + execution. The primary use case of this class is to provide concretization strategies for when memory contents + are underconstrained/symbolic. The main problem occurs when reading/writing symbolic addresses, which means + those addresses must be concretized. The strategy we employ is to allocate a fresh chunk of memory for + fresh symbols, giving them sufficient scratch space to store their members. + """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.extra_constraints = [] @@ -79,6 +86,10 @@ def __init__(self, value): self.value = value class DefaultMemoryUnderconstrained(DefaultMemory): + """ + The primary goal of this class is to provide a wrapper for memory use in underconstrained symbolic execution. + Here we record all the fresh symbols that are created whenever underconstrained memory is read. + """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # Make a box so that if we change the backer in any copies, all copies get the update @@ -91,10 +102,10 @@ def get_default_backer(self): def set_default_backer(self, backer: DefaultMemory): self.default_backer.value = backer - def set_symbols(self, symbols): + def set_symbols(self, symbols: list[claripy.BVS]): self.symbols = symbols - def get_symbols(self): + def get_symbols(self) -> list[claripy.BVS]: return self.symbols def copy(self, *args, **kwargs): diff --git a/docs/source/index.rst b/docs/source/index.rst index c12b2b4..379bd47 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -15,6 +15,7 @@ Welcome to cozy's documentation! concolic hooks sideeffects + underconstrained diff --git a/docs/source/underconstrained.rst b/docs/source/underconstrained.rst new file mode 100644 index 0000000..282bc34 --- /dev/null +++ b/docs/source/underconstrained.rst @@ -0,0 +1,47 @@ +Underconstrained Execution +================================= + +.. warning:: + Underconstrained symbolic execution is currently an experimental feature and may + not be as stable as typical symbolic execution. + +Underconstrained symbolic execution is a symbol creation and management strategy +that enables execution that requires a minimal amount of set up. In ordinary cozy +symbolic execution, harnesses must be written to set up the input for the programs +being tested. These harnesses can be tedious to write, and require some reverse +engineering effort to write. Underconstrained execution drastically cuts down +on harness creation time. + +In underconstrained execution, the initial registers and memory are entirely symbolic. +This means that setting up the arguments and related data structures is not required. +For simple cases this is fine, however in some scenarios constraints on inputs +are required to ensure that program execution is finite. For these scenarios +underconstrained execution is not suitable. + +Let's take a look at some examples of how to use underconstrained symbolic execution:: + + sess_prepatched = proj_prepatched.session('my_fun', underconstrained_execution=True) + results_prepatched = sess_prepatched.run() + sess_postpatched = proj_postpatched.session('my_fun', underconstrained_execution=True, + underconstrained_initial_state=results_prepatched.underconstrained_machine_state) + results_postpatched = sess_postpatched.run() + comparison = cozy.analysis.Comparison(results_prepatched, results_postpatched) + +Here is a summary of the new features that are used for underconstrained execution: + +#. We must enable underconstrained execution by setting `underconstrained_execution` to `True` when creating a :py:meth:`~cozy.project.Project.session`. +#. We do not pass any arguments to the :py:meth:`~cozy.session.Session.run` method. The underconstrained initial registers and memory allows the execution to evaluate all possible input values. +#. When we create the second session, we must pass the machine state from the first session by passing a :py:class:`~cozy.session.UnderconstrainedMachineState` object. This object is easily obtained by getting it from the `underconstrained_machine_state` property from the :py:class:`~cozy.session.RunResult` of the previous execution. The :py:class:`~cozy.session.UnderconstrainedMachineState` object is used to ensure that the initial symbolic registers and memory layout is the same between the two executions. + +Underconstrained execution employs concretization strategies to concretize symbolic pointers +and symbolic arrays. The strategy employed is based off of the angr +:py:class:`angr.concretization_strategies.SimConcretizationStrategyNorepeatsRange` strategy. +In this strategy, pointers that are symbolic are concretized to fresh areas of memory. This +strategy makes the assumption that pointers shouldn't be aliasing and can in fact point to +different data structures. Pointers for arrays likewise will be concretized to a contiguous +fresh chunk of memory. + +This introduces an initial difficulty to diff analysis. Since these fresh chunks of memory are +allocated as needed (on demand), memory layout between two runs needs to be consistent for the comparison +process. This is why an `underconstrained_initial_state` must be passed to the second session's +run. \ No newline at end of file