Skip to content

Commit

Permalink
reimplements clz using the branchless/loopless algorithm (#1423)
Browse files Browse the repository at this point in the history
Inspired by Benjamin Mourad (@bmourad01) implementation in #1416.

It is not only efficient but also helps with bap-veri-fication, as the
latter do not support while loops (which is used in the previous
implementation).
  • Loading branch information
ivg authored Feb 9, 2022
1 parent 594c9cc commit 9d215e5
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 13 deletions.
12 changes: 0 additions & 12 deletions lib/arm/arm_lifter.ml
Original file line number Diff line number Diff line change
Expand Up @@ -335,18 +335,6 @@ let lift_bits mem ops (insn : bits_insn ) =
extract 7 0 s ^
extract 15 8 s) in
exec [assn (Env.of_reg dest) rev] cond
| `CLZ, [|`Reg dest; src; cond; _|] ->
let shift = tmp ~name:"shift" reg32_t in
let accum = tmp ~name:"accum" reg32_t in
Bil.(exec [
shift := exp_of_op src;
accum := int32 32;
while_ (var shift <> int32 0) [
shift := var shift lsr int32 1;
accum := var accum - int32 1;
];
Env.of_reg dest := var accum;
]) cond
| insn,ops ->
fail [%here] "ops %s doesn't match bits insn %s"
(string_of_ops ops) (Arm_insn.to_string (insn :> insn))
Expand Down
1 change: 0 additions & 1 deletion lib/arm/arm_types.ml
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,6 @@ type bits_insn = [
| `UXTH
| `REV
| `REV16
| `CLZ
] [@@deriving bin_io, compare, sexp, enumerate]

type mult_insn = [
Expand Down
9 changes: 9 additions & 0 deletions plugins/arm/semantics/arm.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
(declare (context (target arm)))
(defpackage llvm-armv7 (:use arm))
(in-package arm)

(require bits)

(defun CLZ (rd rn pre _)
(when (condition-holds pre)
(set$ rd (clz32 rn))))
58 changes: 58 additions & 0 deletions plugins/primus_lisp/semantics/bits.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,61 @@
in two's complement overflow."
(logor (logand (msb rn) (msb rm) (lnot (msb rd)))
(logand (lnot (msb rn)) (lnot (msb rm)) (msb rd))))

(defmacro popcount/helper (x sh m1 m2 m4 h01)
(prog
(set x (- x (logand (rshift x 1) m1)))
(set x (+ (logand x m2) (logand (rshift x 2) m2)))
(set x (logand (+ x (rshift x 4)) m4))
(rshift (* x h01) sh)))

(defmacro popcount16 (x)
(popcount/helper x 8
0x5555
0x3333
0x0f0f
0x0101))

(defmacro popcount32 (x)
(popcount/helper x 24
0x55555555
0x33333333
0x0f0f0f0f
0x01010101))

(defmacro popcount64 (x)
(popcount/helper x 56
0x5555555555555555
0x3333333333333333
0x0f0f0f0f0f0f0f0f
0x0101010101010101))

(defun clz16 (r)
(let ((x r))
(set x (logor x (rshift x 1)))
(set x (logor x (rshift x 2)))
(set x (logor x (rshift x 4)))
(set x (logor x (rshift x 8)))
(set x (lnot x))
(popcount16 x)))

(defun clz32 (x)
(let ((x x))
(set x (logor x (rshift x 1)))
(set x (logor x (rshift x 2)))
(set x (logor x (rshift x 4)))
(set x (logor x (rshift x 8)))
(set x (logor x (rshift x 16)))
(set x (lnot x))
(popcount32 x)))

(defun clz64 (x)
(let ((x x))
(set x (logor x (rshift x 1)))
(set x (logor x (rshift x 2)))
(set x (logor x (rshift x 4)))
(set x (logor x (rshift x 8)))
(set x (logor x (rshift x 16)))
(set x (logor x (rshift x 32)))
(set x (lnot x))
(popcount64 x)))

0 comments on commit 9d215e5

Please sign in to comment.