diff --git a/src/meta_basic_block.cc b/src/meta_basic_block.cc index f2e8092..319f6b2 100644 --- a/src/meta_basic_block.cc +++ b/src/meta_basic_block.cc @@ -12,6 +12,9 @@ static bool IsJumpInstruction(const triton::arch::Instruction& instr); static void MergeLinkedBasicBlocks(const BasicBlockEdge& edge, MetaBasicBlock& root_bb, MetaBasicBlock& target_bb); +static triton::arch::BasicBlock RemoveNopLikeInstructions( + const triton::Context& triton_context, + const triton::arch::BasicBlock& triton_bb, bool padding = false); // Transform a given "Binary Ninja" basic block into one or several // `MetaBasicBlock`s that can be simplified with Triton @@ -222,6 +225,94 @@ static bool IsJumpInstruction(const triton::arch::Instruction& instr) { } } +// Function inspired from Triton's DSE utility. +// This function looks for instruction that behave like NOP instructions and +// removes them from the given basic block and returns a new basic block as a +// result. +static triton::arch::BasicBlock RemoveNopLikeInstructions( + const triton::Context& triton, const triton::arch::BasicBlock& triton_bb, + bool padding) { + triton::arch::BasicBlock in = triton_bb; + triton::arch::BasicBlock out; + + const auto nop_instr = [&]() { + triton::Context tmp_ctx(triton.getArchitecture()); + return tmp_ctx.getNopInstruction(); + }(); + + triton::arch::Architecture arch; + arch.setArchitecture(triton.getArchitecture()); + const auto& pc_reg = arch.getProgramCounter(); + + auto last_addr = in.getFirstAddress(); + for (auto& instr : in.getInstructions()) { + triton::Context tmp_ctx(triton.getArchitecture()); + // Symbolize all registers + for (auto& [reg_t, reg] : tmp_ctx.getAllRegisters()) { + tmp_ctx.symbolizeRegister(reg); + } + // Concretize RIP + const auto instruction_addr = instr.getAddress(); + tmp_ctx.setConcreteRegisterValue(pc_reg, instruction_addr); + + // Execute instruction symbolically + tmp_ctx.processing(instr); + const auto post_instruction_addr = tmp_ctx.getConcreteRegisterValue(pc_reg); + + // Iterate over all symbolic expressions generated by the instruction and + // keep only those who modified the CPU or memory state meaningfully + std::vector + effectual_symbolic_expressions; + for (const auto& expr : instr.symbolicExpressions) { + // Check for PC being assigned the value of the instruction located right + // after the one we executed + if (expr->getOriginRegister().getId() == pc_reg.getId()) { + if (post_instruction_addr > instruction_addr && + post_instruction_addr - instruction_addr == instr.getSize()) { + // Instruction doesn't "jump around", ignore PC-related assignment + continue; + } + } + + // Check for same-register assignments + if (expr->isRegister()) { + const auto& lhs_origin_reg = expr->getOriginRegister(); + if (expr->getAst()->getType() == triton::ast::REFERENCE_NODE) { + auto* reference_node = reinterpret_cast( + expr->getAst().get()); + const auto& rhs_origin_reg = + reference_node->getSymbolicExpression()->getOriginRegister(); + if (lhs_origin_reg.getId() == rhs_origin_reg.getId()) { + // Both sides of the assignment contain the same symbolic register, + // ignore + continue; + } + } + } + + effectual_symbolic_expressions.push_back(expr); + } + + // Check instruction's side effects + if (effectual_symbolic_expressions.empty()) { + // Instruction has no side effects, get rid of it + if (padding) { + // Replace with a nop padding of the appropriate size + while (instr.getAddress() > last_addr) { + out.add(nop_instr); + last_addr += nop_instr.getSize(); + } + } + } else { + // Instruction has side effects, keep it in the basic block + out.add(instr); + } + last_addr = instr.getNextAddress(); + } + + return out; +} + // Simplify the given `MetaBasicBlock`s with Triton's dead store elimination // pass std::vector SimplifyMetaBasicBlocks( @@ -242,6 +333,8 @@ std::vector SimplifyMetaBasicBlocks( // Simplify basic blocks and disassemble the result try { auto simplified_triton_bb = triton.simplify(meta_bb.triton_bb()); + simplified_triton_bb = + RemoveNopLikeInstructions(triton, simplified_triton_bb); triton.disassembly(simplified_triton_bb, meta_bb.GetStart()); meta_bb.set_triton_bb(simplified_triton_bb); return std::move(meta_bb);