Skip to content

Commit

Permalink
wazevo(arm64): support for atomic cas (#2031)
Browse files Browse the repository at this point in the history
Signed-off-by: Anuraag Agrawal <anuraaga@gmail.com>
  • Loading branch information
anuraaga authored Feb 11, 2024
1 parent 06dc518 commit 2560f84
Show file tree
Hide file tree
Showing 8 changed files with 307 additions and 4 deletions.
35 changes: 31 additions & 4 deletions internal/engine/wazevo/backend/isa/arm64/instr.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ var defKinds = [numInstructionKinds]defKind{
movFromFPSR: defKindRD,
emitSourceOffsetInfo: defKindNone,
atomicRmw: defKindRD,
atomicCas: defKindNone,
}

// Defs returns the list of regalloc.VReg that are defined by the instruction.
Expand Down Expand Up @@ -210,7 +211,11 @@ const (
useKindAMode
useKindRNAMode
useKindCond
useKindVecRRRRewrite
// useKindRDRewrite indicates an instruction where RD is used both as a source and destination.
// A temporary register for RD must be allocated explicitly with the source copied to this
// register before the instruction and the value copied from this register to the instruction
// return register.
useKindRDRewrite
)

var useKinds = [numInstructionKinds]useKind{
Expand Down Expand Up @@ -280,7 +285,7 @@ var useKinds = [numInstructionKinds]useKind{
vecTbl: useKindRNRM,
vecTbl2: useKindRNRN1RM,
vecRRR: useKindRNRM,
vecRRRRewrite: useKindVecRRRRewrite,
vecRRRRewrite: useKindRDRewrite,
vecPermute: useKindRNRM,
fpuToInt: useKindRN,
intToFpu: useKindRN,
Expand All @@ -289,6 +294,7 @@ var useKinds = [numInstructionKinds]useKind{
adr: useKindNone,
emitSourceOffsetInfo: useKindNone,
atomicRmw: useKindRNRM,
atomicCas: useKindRDRewrite,
}

// Uses returns the list of regalloc.VReg that are used by the instruction.
Expand Down Expand Up @@ -357,7 +363,7 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg {
for i := byte(0); i < argFloatRealRegs; i++ {
*regs = append(*regs, regInfo.RealRegToVReg[floatParamResultRegs[i]])
}
case useKindVecRRRRewrite:
case useKindRDRewrite:
*regs = append(*regs, i.rn.reg())
*regs = append(*regs, i.rm.reg())
*regs = append(*regs, i.rd.reg())
Expand All @@ -384,7 +390,7 @@ func (i *instruction) AssignUse(index int, reg regalloc.VReg) {
i.rm = i.rm.assignReg(reg)
}
}
case useKindVecRRRRewrite:
case useKindRDRewrite:
if index == 0 {
if rn := i.rn.reg(); rn.Valid() {
i.rn = i.rn.assignReg(reg)
Expand Down Expand Up @@ -1458,6 +1464,18 @@ func (i *instruction) String() (str string) {
m = m + "b"
}
str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), 64))
case atomicCas:
m := "casal"
size := byte(32)
switch i.u2 {
case 8:
size = 64
case 2:
m = m + "h"
case 1:
m = m + "b"
}
str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rn.nr(), 64))
case udf:
str = "udf"
case emitSourceOffsetInfo:
Expand All @@ -1483,6 +1501,12 @@ func (i *instruction) asAtomicRmw(op atomicRmwOp, rn, rs, rt operand, size uint6
i.u2 = size
}

func (i *instruction) asAtomicCas(rn, rs, rt operand, size uint64) {
i.kind = atomicCas
i.rm, i.rn, i.rd = rt, rn, rs
i.u2 = size
}

// TODO: delete unnecessary things.
const (
// nop0 represents a no-op of zero size.
Expand Down Expand Up @@ -1651,6 +1675,9 @@ const (
exitSequence
// atomicRmw represents an atomic read-modify-write operation with two register sources and a register destination.
atomicRmw
// atomicCas represents an atomic compare-and-swap operation with three register sources. The value is loaded to
// the source register containing the comparison value.
atomicCas
// UDF is the undefined instruction. For debugging only.
udf

Expand Down
27 changes: 27 additions & 0 deletions internal/engine/wazevo/backend/isa/arm64/instr_encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,13 @@ func (i *instruction) encode(m *machine) {
regNumberInEncoding[i.rn.realReg()],
uint32(i.u2),
))
case atomicCas:
c.Emit4Bytes(encodeAtomicCas(
regNumberInEncoding[i.rd.realReg()],
regNumberInEncoding[i.rm.realReg()],
regNumberInEncoding[i.rn.realReg()],
uint32(i.u2),
))
default:
panic(i.String())
}
Expand Down Expand Up @@ -2257,3 +2264,23 @@ func encodeAtomicRmw(op atomicRmwOp, rs, rt, rn uint32, size uint32) uint32 {

return _31to21<<21 | rs<<16 | _15to10<<10 | rn<<5 | rt
}

func encodeAtomicCas(rs, rt, rn uint32, size uint32) uint32 {
var _31to21, _15to10, sz uint32

switch size {
case 8:
sz = 0b11
case 4:
sz = 0b10
case 2:
sz = 0b01
case 1:
sz = 0b00
}

_31to21 = 0b00001000_111 | sz<<9
_15to10 = 0b111111

return _31to21<<21 | rs<<16 | _15to10<<10 | rn<<5 | rt
}
21 changes: 21 additions & 0 deletions internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1718,6 +1718,27 @@ func TestInstruction_encode(t *testing.T) {
{want: "0280e138", setup: func(i *instruction) {
i.asAtomicRmw(atomicRmwOpSwp, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 1)
}},
{want: "02fce188", setup: func(i *instruction) {
i.asAtomicCas(operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 4)
}},
{want: "02fce148", setup: func(i *instruction) {
i.asAtomicCas(operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 2)
}},
{want: "02fce108", setup: func(i *instruction) {
i.asAtomicCas(operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 1)
}},
{want: "02fce1c8", setup: func(i *instruction) {
i.asAtomicCas(operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 8)
}},
{want: "02fce188", setup: func(i *instruction) {
i.asAtomicCas(operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 4)
}},
{want: "02fce148", setup: func(i *instruction) {
i.asAtomicCas(operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 2)
}},
{want: "02fce108", setup: func(i *instruction) {
i.asAtomicCas(operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 1)
}},
{want: "4201231e4201631e4201239e4201639e4201221e4201621e4201229e4201629e", setup: func(i *instruction) {
i.asNop0()
cur := i
Expand Down
42 changes: 42 additions & 0 deletions internal/engine/wazevo/backend/isa/arm64/lower_instr.go
Original file line number Diff line number Diff line change
Expand Up @@ -708,6 +708,9 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
case ssa.OpcodeAtomicRmw:
m.lowerAtomicRmw(instr)

case ssa.OpcodeAtomicCas:
m.lowerAtomicCas(instr)

default:
panic("TODO: lowering " + op.String())
}
Expand Down Expand Up @@ -2042,6 +2045,45 @@ func (m *machine) lowerAtomicRmwImpl(op atomicRmwOp, rn, rs, rt, tmp operand, si
m.insert(rmw)
}

func (m *machine) lowerAtomicCas(si *ssa.Instruction) {
addr, exp, repl := si.Arg3()
size := si.AtomicCasData()

addrDef, expDef, replDef := m.compiler.ValueDefinition(addr), m.compiler.ValueDefinition(exp), m.compiler.ValueDefinition(repl)
rn := m.getOperand_NR(addrDef, extModeNone)
rt := m.getOperand_NR(replDef, extModeNone)
rs := m.getOperand_NR(expDef, extModeNone)
tmp := operandNR(m.compiler.AllocateVReg(si.Return().Type()))

_64 := si.Return().Type().Bits() == 64
// rs is overwritten by CAS, so we need to move it to the result register before the instruction
// in case when it is used somewhere else.
mov := m.allocateInstr()
if _64 {
mov.asMove64(tmp.nr(), rs.nr())
} else {
mov.asMove32(tmp.nr(), rs.nr())
}
m.insert(mov)

m.lowerAtomicCasImpl(rn, tmp, rt, size)

mov2 := m.allocateInstr()
rd := m.compiler.VRegOf(si.Return())
if _64 {
mov2.asMove64(rd, tmp.nr())
} else {
mov2.asMove32(rd, tmp.nr())
}
m.insert(mov2)
}

func (m *machine) lowerAtomicCasImpl(rn, rs, rt operand, size uint64) {
cas := m.allocateInstr()
cas.asAtomicCas(rn, rs, rt, size)
m.insert(cas)
}

// copyToTmp copies the given regalloc.VReg to a temporary register. This is called before cbr to avoid the regalloc issue
// e.g. reload happening in the middle of the exit sequence which is not the path the normal path executes
func (m *machine) copyToTmp(v regalloc.VReg) regalloc.VReg {
Expand Down
86 changes: 86 additions & 0 deletions internal/engine/wazevo/backend/isa/arm64/lower_instr_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1385,3 +1385,89 @@ swpalb w3?, w4?, x2?
})
}
}

func TestMachine_lowerAtomicCas(t *testing.T) {
tests := []struct {
name string
_64bit bool
size uint64
exp string
}{
{
name: "cas 32",
size: 4,
exp: `
casal w2?, w3?, x1?
`,
},
{
name: "cas 32_16u",
size: 2,
exp: `
casalh w2?, w3?, x1?
`,
},
{
name: "cas 32_8u",
size: 1,
exp: `
casalb w2?, w3?, x1?
`,
},
{
name: "cas 64",
size: 8,
_64bit: true,
exp: `
casal x2?, x3?, x1?
`,
},
{
name: "cas 64_32u",
size: 4,
_64bit: true,
exp: `
casal w2?, w3?, x1?
`,
},
{
name: "cas 64_16u",
size: 2,
_64bit: true,
exp: `
casalh w2?, w3?, x1?
`,
},
{
name: "cas 64_8u",
size: 1,
_64bit: true,
exp: `
casalb w2?, w3?, x1?
`,
},
}

for _, tc := range tests {
tc := tc
t.Run(tc.name, func(t *testing.T) {
_, _, m := newSetupWithMockContext()
var typ ssa.Type
if tc._64bit {
typ = ssa.TypeI64
} else {
typ = ssa.TypeI32
}
rn := operandNR(m.compiler.AllocateVReg(ssa.TypeI64))
rs := operandNR(m.compiler.AllocateVReg(typ))
rt := operandNR(m.compiler.AllocateVReg(typ))

require.Equal(t, 1, int(rn.reg().ID()))
require.Equal(t, 2, int(rs.reg().ID()))
require.Equal(t, 3, int(rt.reg().ID()))

m.lowerAtomicCasImpl(rn, rs, rt, tc.size)
require.Equal(t, tc.exp, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
})
}
}
51 changes: 51 additions & 0 deletions internal/engine/wazevo/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -784,6 +784,57 @@ func TestE2E(t *testing.T) {
},
},
},
{
name: "atomic_cas",
m: testcases.AtomicCas.Module,
features: api.CoreFeaturesV2 | experimental.CoreFeaturesThreads,
skipAMD64: true,
calls: []callCase{
// no store
{
params: []uint64{1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2},
expResults: []uint64{0, 0, 0, 0, 0, 0, 0},
},
// store
{
params: []uint64{0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2},
expResults: []uint64{0, 0, 0, 0, 0, 0, 0},
},
// store
{
params: []uint64{2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3},
expResults: []uint64{2, 2, 2, 2, 2, 2, 2},
},
// no store
{
params: []uint64{2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4},
expResults: []uint64{3, 3, 3, 3, 3, 3, 3},
},
},
},
{
// Checks if load works when comparison value is zero. It wouldn't if
// the zero register gets used.
name: "atomic_cas_const0",
m: testcases.AtomicCasConst0.Module,
features: api.CoreFeaturesV2 | experimental.CoreFeaturesThreads,
skipAMD64: true,
setupMemory: func(mem api.Memory) {
mem.WriteUint32Le(0, 1)
mem.WriteUint32Le(8, 2)
mem.WriteUint32Le(16, 3)
mem.WriteUint64Le(24, 4)
mem.WriteUint64Le(32, 5)
mem.WriteUint64Le(40, 6)
mem.WriteUint64Le(48, 7)
},
calls: []callCase{
{
params: []uint64{8, 9, 10, 11, 12, 13, 14},
expResults: []uint64{1, 2, 3, 4, 5, 6, 7},
},
},
},
{
name: "float_le",
m: testcases.FloatLe.Module,
Expand Down
5 changes: 5 additions & 0 deletions internal/engine/wazevo/ssa/instructions.go
Original file line number Diff line number Diff line change
Expand Up @@ -2026,6 +2026,11 @@ func (i *Instruction) AtomicRmwData() (op AtomicRmwOp, size uint64) {
return AtomicRmwOp(i.u1), i.u2
}

// AtomicCasData returns the data for this atomic compare-and-swap instruction.
func (i *Instruction) AtomicCasData() (size uint64) {
return i.u1
}

// ReturnVals returns the return values of OpcodeReturn.
func (i *Instruction) ReturnVals() []Value {
return i.vs
Expand Down
Loading

0 comments on commit 2560f84

Please sign in to comment.