From 7220001c128ab96819ab0ec7b514d5bf222ea231 Mon Sep 17 00:00:00 2001 From: Peter Goodman Date: Sat, 9 Oct 2021 20:12:25 -0400 Subject: [PATCH] Add some extra apis to remill arch for instruction minimum size and alignment (#549) --- include/remill/Arch/Arch.h | 12 +++++++++++- lib/Arch/AArch32/Arch.cpp | 11 ++++++++++- lib/Arch/AArch32/Arch.h | 20 +++++++++++--------- lib/Arch/AArch64/Arch.cpp | 31 +++++++++++++++++++++---------- lib/Arch/SPARC32/Arch.cpp | 12 ++++++++++-- lib/Arch/SPARC64/Arch.cpp | 12 ++++++++++-- lib/Arch/X86/Arch.cpp | 28 +++++++++++++++++++--------- 7 files changed, 92 insertions(+), 34 deletions(-) diff --git a/include/remill/Arch/Arch.h b/include/remill/Arch/Arch.h index f9b23904a..4e5616970 100644 --- a/include/remill/Arch/Arch.h +++ b/include/remill/Arch/Arch.h @@ -208,8 +208,18 @@ class Arch { return this->DecodeInstruction(address, instr_bytes, inst); } + // Minimum alignment of an instruction for this particular architecture. + virtual uint64_t MinInstructionAlign(void) const = 0; + + // Minimum number of bytes in an instruction for this particular architecture. + virtual uint64_t MinInstructionSize(void) const = 0; + // Maximum number of bytes in an instruction for this particular architecture. - virtual uint64_t MaxInstructionSize(void) const = 0; + // + // `permit_fuse_idioms` is `true` if Remill is allowed to decode multiple + // instructions at a time and look for instruction fusing idioms that are + // common to this architecture. + virtual uint64_t MaxInstructionSize(bool permit_fuse_idioms=true) const = 0; // Default calling convention for this architecture. virtual llvm::CallingConv::ID DefaultCallingConv(void) const = 0; diff --git a/lib/Arch/AArch32/Arch.cpp b/lib/Arch/AArch32/Arch.cpp index 853859d24..42c12ad9f 100644 --- a/lib/Arch/AArch32/Arch.cpp +++ b/lib/Arch/AArch32/Arch.cpp @@ -51,8 +51,17 @@ AArch32Arch::AArch32Arch(llvm::LLVMContext *context_, OSName os_name_, AArch32Arch::~AArch32Arch(void) {} +// TODO(pag): Eventually handle Thumb2 and unaligned addresses. +uint64_t AArch32Arch::MinInstructionAlign(void) const { + return 4; +} + +uint64_t AArch32Arch::MinInstructionSize(void) const { + return 4; +} + // Maximum number of bytes in an instruction for this particular architecture. -uint64_t AArch32Arch::MaxInstructionSize(void) const { +uint64_t AArch32Arch::MaxInstructionSize(bool) const { return 4; } diff --git a/lib/Arch/AArch32/Arch.h b/lib/Arch/AArch32/Arch.h index 291c94577..a7434fa41 100644 --- a/lib/Arch/AArch32/Arch.h +++ b/lib/Arch/AArch32/Arch.h @@ -27,27 +27,29 @@ class AArch32Arch final : public Arch { virtual ~AArch32Arch(void); // Returns the name of the stack pointer register. - std::string_view StackPointerRegisterName(void) const override; + std::string_view StackPointerRegisterName(void) const final; // Returns the name of the program counter register. - std::string_view ProgramCounterRegisterName(void) const override; + std::string_view ProgramCounterRegisterName(void) const final; // Decode an instuction. bool DecodeInstruction(uint64_t address, std::string_view inst_bytes, - Instruction &inst) const override; + Instruction &inst) const final; - // Maximum number of bytes in an instruction. - uint64_t MaxInstructionSize(void) const override; + // Align/Minimum/Maximum number of bytes in an instruction. + uint64_t MinInstructionAlign(void) const final; + uint64_t MinInstructionSize(void) const final; + uint64_t MaxInstructionSize(bool permit_fuse_idioms) const final; - llvm::Triple Triple(void) const override; - llvm::DataLayout DataLayout(void) const override; + llvm::Triple Triple(void) const final; + llvm::DataLayout DataLayout(void) const final; // Default calling convention for this architecture. - llvm::CallingConv::ID DefaultCallingConv(void) const override; + llvm::CallingConv::ID DefaultCallingConv(void) const final; // Populate the `__remill_basic_block` function with variables. void PopulateBasicBlockFunction(llvm::Module *module, - llvm::Function *bb_func) const override; + llvm::Function *bb_func) const final; private: AArch32Arch(void) = delete; diff --git a/lib/Arch/AArch64/Arch.cpp b/lib/Arch/AArch64/Arch.cpp index 08dcb1174..198f0b61a 100644 --- a/lib/Arch/AArch64/Arch.cpp +++ b/lib/Arch/AArch64/Arch.cpp @@ -113,27 +113,29 @@ class AArch64Arch final : public Arch { virtual ~AArch64Arch(void); // Returns the name of the stack pointer register. - std::string_view StackPointerRegisterName(void) const override; + std::string_view StackPointerRegisterName(void) const final; // Returns the name of the program counter register. - std::string_view ProgramCounterRegisterName(void) const override; + std::string_view ProgramCounterRegisterName(void) const final; // Decode an instruction. bool DecodeInstruction(uint64_t address, std::string_view instr_bytes, - Instruction &inst) const override; + Instruction &inst) const final; - // Maximum number of bytes in an instruction. - uint64_t MaxInstructionSize(void) const override; + // Align/Minimum/Maximum number of bytes in an instruction. + uint64_t MinInstructionAlign(void) const final; + uint64_t MinInstructionSize(void) const final; + uint64_t MaxInstructionSize(bool permit_fuse_idioms) const final; - llvm::Triple Triple(void) const override; - llvm::DataLayout DataLayout(void) const override; + llvm::Triple Triple(void) const final; + llvm::DataLayout DataLayout(void) const final; // Default calling convention for this architecture. - llvm::CallingConv::ID DefaultCallingConv(void) const override; + llvm::CallingConv::ID DefaultCallingConv(void) const final; // Populate the `__remill_basic_block` function with variables. void PopulateBasicBlockFunction(llvm::Module *module, - llvm::Function *bb_func) const override; + llvm::Function *bb_func) const final; private: AArch64Arch(void) = delete; @@ -473,8 +475,17 @@ void AArch64Arch::PopulateBasicBlockFunction(llvm::Module *module, (void) this->RegisterByName(kPCVariableName)->AddressOf(state_ptr_arg, ir); } +// TODO(pag): Eventually handle Thumb2 and unaligned addresses. +uint64_t AArch64Arch::MinInstructionAlign(void) const { + return 4; +} + +uint64_t AArch64Arch::MinInstructionSize(void) const { + return 4; +} + // Maximum number of bytes in an instruction for this particular architecture. -uint64_t AArch64Arch::MaxInstructionSize(void) const { +uint64_t AArch64Arch::MaxInstructionSize(bool) const { return 4; } diff --git a/lib/Arch/SPARC32/Arch.cpp b/lib/Arch/SPARC32/Arch.cpp index 1d6b6c8d2..cac3cfb46 100644 --- a/lib/Arch/SPARC32/Arch.cpp +++ b/lib/Arch/SPARC32/Arch.cpp @@ -148,9 +148,17 @@ class SPARC32Arch final : public Arch { return kPCRegName; } + uint64_t MinInstructionAlign(void) const final { + return 4; + } + + uint64_t MinInstructionSize(void) const final { + return 4; + } + // Maximum number of bytes in an instruction. - uint64_t MaxInstructionSize(void) const final { - return 8; // To handle `SET` idioms. + uint64_t MaxInstructionSize(bool permit_fuse_idioms) const final { + return permit_fuse_idioms ? 8 : 4; // To handle `SET` idioms. } // Default calling convention for this architecture. diff --git a/lib/Arch/SPARC64/Arch.cpp b/lib/Arch/SPARC64/Arch.cpp index 0919da751..ba33449f9 100644 --- a/lib/Arch/SPARC64/Arch.cpp +++ b/lib/Arch/SPARC64/Arch.cpp @@ -56,9 +56,17 @@ class SPARC64Arch final : public Arch { return kPCRegName; } + uint64_t MinInstructionAlign(void) const final { + return 4; + } + + uint64_t MinInstructionSize(void) const final { + return 4; + } + // Maximum number of bytes in an instruction. - uint64_t MaxInstructionSize(void) const final { - return 8; // To handle `SET` idioms. + uint64_t MaxInstructionSize(bool permit_fuse_idioms) const final { + return permit_fuse_idioms ? 8 : 4; // To handle `SET` idioms. } // Default calling convention for this architecture. diff --git a/lib/Arch/X86/Arch.cpp b/lib/Arch/X86/Arch.cpp index 0997fe378..346645820 100644 --- a/lib/Arch/X86/Arch.cpp +++ b/lib/Arch/X86/Arch.cpp @@ -793,27 +793,29 @@ class X86Arch final : public Arch { virtual ~X86Arch(void); // Returns the name of the stack pointer register. - std::string_view StackPointerRegisterName(void) const override; + std::string_view StackPointerRegisterName(void) const final; // Returns the name of the program counter register. - std::string_view ProgramCounterRegisterName(void) const override; + std::string_view ProgramCounterRegisterName(void) const final; // Decode an instruction. bool DecodeInstruction(uint64_t address, std::string_view inst_bytes, - Instruction &inst) const override; + Instruction &inst) const final; // Maximum number of bytes in an instruction. - uint64_t MaxInstructionSize(void) const override; + uint64_t MinInstructionAlign(void) const final; + uint64_t MinInstructionSize(void) const final; + uint64_t MaxInstructionSize(bool permit_fuse_idioms) const final; - llvm::Triple Triple(void) const override; - llvm::DataLayout DataLayout(void) const override; + llvm::Triple Triple(void) const final; + llvm::DataLayout DataLayout(void) const final; // Default calling convention for this architecture. - llvm::CallingConv::ID DefaultCallingConv(void) const override; + llvm::CallingConv::ID DefaultCallingConv(void) const final; // Populate the `__remill_basic_block` function with variables. void PopulateBasicBlockFunction(llvm::Module *module, - llvm::Function *bb_func) const override; + llvm::Function *bb_func) const final; private: X86Arch(void) = delete; @@ -833,8 +835,16 @@ X86Arch::X86Arch(llvm::LLVMContext *context_, OSName os_name_, X86Arch::~X86Arch(void) {} +uint64_t X86Arch::MinInstructionAlign(void) const { + return 1; +} + +uint64_t X86Arch::MinInstructionSize(void) const { + return 1; +} + // Maximum number of bytes in an instruction for this particular architecture. -uint64_t X86Arch::MaxInstructionSize(void) const { +uint64_t X86Arch::MaxInstructionSize(bool) const { return 15; }