From 9d215e5d143a2705e01be1fed5dd26b5cb332a10 Mon Sep 17 00:00:00 2001 From: Ivan Gotovchits Date: Wed, 9 Feb 2022 09:57:57 -0500 Subject: [PATCH] reimplements clz using the branchless/loopless algorithm (#1423) Inspired by Benjamin Mourad (@bmourad01) implementation in #1416. It is not only efficient but also helps with bap-veri-fication, as the latter do not support while loops (which is used in the previous implementation). --- lib/arm/arm_lifter.ml | 12 ----- lib/arm/arm_types.ml | 1 - plugins/arm/semantics/arm.lisp | 9 ++++ plugins/primus_lisp/semantics/bits.lisp | 58 +++++++++++++++++++++++++ 4 files changed, 67 insertions(+), 13 deletions(-) create mode 100644 plugins/arm/semantics/arm.lisp diff --git a/lib/arm/arm_lifter.ml b/lib/arm/arm_lifter.ml index c1cf8cc45..dc1886689 100644 --- a/lib/arm/arm_lifter.ml +++ b/lib/arm/arm_lifter.ml @@ -335,18 +335,6 @@ let lift_bits mem ops (insn : bits_insn ) = extract 7 0 s ^ extract 15 8 s) in exec [assn (Env.of_reg dest) rev] cond - | `CLZ, [|`Reg dest; src; cond; _|] -> - let shift = tmp ~name:"shift" reg32_t in - let accum = tmp ~name:"accum" reg32_t in - Bil.(exec [ - shift := exp_of_op src; - accum := int32 32; - while_ (var shift <> int32 0) [ - shift := var shift lsr int32 1; - accum := var accum - int32 1; - ]; - Env.of_reg dest := var accum; - ]) cond | insn,ops -> fail [%here] "ops %s doesn't match bits insn %s" (string_of_ops ops) (Arm_insn.to_string (insn :> insn)) diff --git a/lib/arm/arm_types.ml b/lib/arm/arm_types.ml index b73e283ec..b3bf12649 100644 --- a/lib/arm/arm_types.ml +++ b/lib/arm/arm_types.ml @@ -159,7 +159,6 @@ type bits_insn = [ | `UXTH | `REV | `REV16 - | `CLZ ] [@@deriving bin_io, compare, sexp, enumerate] type mult_insn = [ diff --git a/plugins/arm/semantics/arm.lisp b/plugins/arm/semantics/arm.lisp new file mode 100644 index 000000000..b14e62c49 --- /dev/null +++ b/plugins/arm/semantics/arm.lisp @@ -0,0 +1,9 @@ +(declare (context (target arm))) +(defpackage llvm-armv7 (:use arm)) +(in-package arm) + +(require bits) + +(defun CLZ (rd rn pre _) + (when (condition-holds pre) + (set$ rd (clz32 rn)))) diff --git a/plugins/primus_lisp/semantics/bits.lisp b/plugins/primus_lisp/semantics/bits.lisp index fae27f2e9..8538d3664 100644 --- a/plugins/primus_lisp/semantics/bits.lisp +++ b/plugins/primus_lisp/semantics/bits.lisp @@ -21,3 +21,61 @@ in two's complement overflow." (logor (logand (msb rn) (msb rm) (lnot (msb rd))) (logand (lnot (msb rn)) (lnot (msb rm)) (msb rd)))) + +(defmacro popcount/helper (x sh m1 m2 m4 h01) + (prog + (set x (- x (logand (rshift x 1) m1))) + (set x (+ (logand x m2) (logand (rshift x 2) m2))) + (set x (logand (+ x (rshift x 4)) m4)) + (rshift (* x h01) sh))) + +(defmacro popcount16 (x) + (popcount/helper x 8 + 0x5555 + 0x3333 + 0x0f0f + 0x0101)) + +(defmacro popcount32 (x) + (popcount/helper x 24 + 0x55555555 + 0x33333333 + 0x0f0f0f0f + 0x01010101)) + +(defmacro popcount64 (x) + (popcount/helper x 56 + 0x5555555555555555 + 0x3333333333333333 + 0x0f0f0f0f0f0f0f0f + 0x0101010101010101)) + +(defun clz16 (r) + (let ((x r)) + (set x (logor x (rshift x 1))) + (set x (logor x (rshift x 2))) + (set x (logor x (rshift x 4))) + (set x (logor x (rshift x 8))) + (set x (lnot x)) + (popcount16 x))) + +(defun clz32 (x) + (let ((x x)) + (set x (logor x (rshift x 1))) + (set x (logor x (rshift x 2))) + (set x (logor x (rshift x 4))) + (set x (logor x (rshift x 8))) + (set x (logor x (rshift x 16))) + (set x (lnot x)) + (popcount32 x))) + +(defun clz64 (x) + (let ((x x)) + (set x (logor x (rshift x 1))) + (set x (logor x (rshift x 2))) + (set x (logor x (rshift x 4))) + (set x (logor x (rshift x 8))) + (set x (logor x (rshift x 16))) + (set x (logor x (rshift x 32))) + (set x (lnot x)) + (popcount64 x)))