From 5b817098af4f9b6adf1c6ed3e8efc6095f5b24c5 Mon Sep 17 00:00:00 2001 From: Takeshi Yoneda Date: Sun, 11 Feb 2024 15:31:19 -0800 Subject: [PATCH 1/2] wazevo: aligns globals at 16 byte Signed-off-by: Takeshi Yoneda --- .../engine/wazevo/backend/backend_test.go | 48 ++++++++--------- internal/engine/wazevo/engine.go | 2 +- .../engine/wazevo/frontend/frontend_test.go | 52 +++++++++---------- internal/engine/wazevo/hostmodule.go | 2 +- internal/engine/wazevo/module_engine.go | 10 ++++ internal/engine/wazevo/module_engine_test.go | 9 ++++ .../engine/wazevo/wazevoapi/offsetdata.go | 16 +++++- .../wazevo/wazevoapi/offsetdata_test.go | 52 ++++++++++--------- 8 files changed, 113 insertions(+), 78 deletions(-) diff --git a/internal/engine/wazevo/backend/backend_test.go b/internal/engine/wazevo/backend/backend_test.go index 56b21a8b1f..6c45fe9242 100644 --- a/internal/engine/wazevo/backend/backend_test.go +++ b/internal/engine/wazevo/backend/backend_test.go @@ -1941,18 +1941,18 @@ L2: L1 (SSA Block: blk0): mov x128?, x0 mov x129?, x1 - ldr w130?, [x129?, #0x8] - ldr x131?, [x129?, #0x18] - ldr s132?, [x129?, #0x28] - ldr d133?, [x129?, #0x38] + ldr w130?, [x129?, #0x10] + ldr x131?, [x129?, #0x20] + ldr s132?, [x129?, #0x30] + ldr d133?, [x129?, #0x40] str x129?, [x128?, #0x8] mov x0, x128? mov x1, x129? bl f1 - ldr w134?, [x129?, #0x8] - ldr x135?, [x129?, #0x18] - ldr s136?, [x129?, #0x28] - ldr d137?, [x129?, #0x38] + ldr w134?, [x129?, #0x10] + ldr x135?, [x129?, #0x20] + ldr s136?, [x129?, #0x30] + ldr d137?, [x129?, #0x40] mov v3.8b, v137?.8b mov v2.8b, v136?.8b mov x3, x135? @@ -1970,21 +1970,21 @@ L1 (SSA Block: blk0): orr x27, xzr, #0x20 str x27, [sp, #-0x10]! str x1, [sp, #0x10] - ldr w8, [x1, #0x8] + ldr w8, [x1, #0x10] str w8, [sp, #0x2c] - ldr x9, [x1, #0x18] + ldr x9, [x1, #0x20] str x9, [sp, #0x24] - ldr s8, [x1, #0x28] + ldr s8, [x1, #0x30] str s8, [sp, #0x20] - ldr d9, [x1, #0x38] + ldr d9, [x1, #0x40] str d9, [sp, #0x18] str x1, [x0, #0x8] bl f1 ldr x8, [sp, #0x10] - ldr w9, [x8, #0x8] - ldr x10, [x8, #0x18] - ldr s8, [x8, #0x28] - ldr d9, [x8, #0x38] + ldr w9, [x8, #0x10] + ldr x10, [x8, #0x20] + ldr s8, [x8, #0x30] + ldr d9, [x8, #0x40] mov v3.8b, v9.8b mov v2.8b, v8.8b mov x3, x10 @@ -2011,13 +2011,13 @@ L1 (SSA Block: blk0): L1 (SSA Block: blk0): mov x129?, x1 orr w137?, wzr, #0x1 - str w137?, [x129?, #0x8] + str w137?, [x129?, #0x10] orr x136?, xzr, #0x2 - str x136?, [x129?, #0x18] + str x136?, [x129?, #0x20] ldr s135?, #8; b 8; data.f32 3.000000 - str s135?, [x129?, #0x28] + str s135?, [x129?, #0x30] ldr d134?, #8; b 16; data.f64 4.000000 - str d134?, [x129?, #0x38] + str d134?, [x129?, #0x40] ret `, afterFinalizeARM64: ` @@ -2025,13 +2025,13 @@ L1 (SSA Block: blk0): stp x30, xzr, [sp, #-0x10]! str xzr, [sp, #-0x10]! orr w8, wzr, #0x1 - str w8, [x1, #0x8] + str w8, [x1, #0x10] orr x8, xzr, #0x2 - str x8, [x1, #0x18] + str x8, [x1, #0x20] ldr s8, #8; b 8; data.f32 3.000000 - str s8, [x1, #0x28] + str s8, [x1, #0x30] ldr d8, #8; b 16; data.f64 4.000000 - str d8, [x1, #0x38] + str d8, [x1, #0x40] add sp, sp, #0x10 ldr x30, [sp], #0x10 ret diff --git a/internal/engine/wazevo/engine.go b/internal/engine/wazevo/engine.go index 0fd3b70fd9..131a47bc54 100644 --- a/internal/engine/wazevo/engine.go +++ b/internal/engine/wazevo/engine.go @@ -569,7 +569,7 @@ func (e *engine) NewModuleEngine(m *wasm.Module, mi *wasm.ModuleInstance) (wasm. me.opaquePtr = &me.opaque[0] } else { if size := compiled.offsets.TotalSize; size != 0 { - opaque := make([]byte, size) + opaque := newAlignedOpaque(size) me.opaque = opaque me.opaquePtr = &opaque[0] } diff --git a/internal/engine/wazevo/frontend/frontend_test.go b/internal/engine/wazevo/frontend/frontend_test.go index 0a48c61d83..fa4dcb35c5 100644 --- a/internal/engine/wazevo/frontend/frontend_test.go +++ b/internal/engine/wazevo/frontend/frontend_test.go @@ -1183,11 +1183,11 @@ blk0: (exec_ctx:i64, module_ctx:i64, v2:i32) m: testcases.GlobalsGet.Module, exp: ` blk0: (exec_ctx:i64, module_ctx:i64) - v2:i32 = Load module_ctx, 0x8 - v3:i64 = Load module_ctx, 0x18 - v4:f32 = Load module_ctx, 0x28 - v5:f64 = Load module_ctx, 0x38 - v6:v128 = Load module_ctx, 0x48 + v2:i32 = Load module_ctx, 0x10 + v3:i64 = Load module_ctx, 0x20 + v4:f32 = Load module_ctx, 0x30 + v5:f64 = Load module_ctx, 0x40 + v6:v128 = Load module_ctx, 0x50 Jump blk_ret, v2, v3, v4, v5, v6 `, }, @@ -1197,15 +1197,15 @@ blk0: (exec_ctx:i64, module_ctx:i64) exp: ` blk0: (exec_ctx:i64, module_ctx:i64) v2:i32 = Iconst_32 0x1 - Store v2, module_ctx, 0x8 + Store v2, module_ctx, 0x10 v3:i64 = Iconst_64 0x2 - Store v3, module_ctx, 0x18 + Store v3, module_ctx, 0x20 v4:f32 = F32const 3.000000 - Store v4, module_ctx, 0x28 + Store v4, module_ctx, 0x30 v5:f64 = F64const 4.000000 - Store v5, module_ctx, 0x38 + Store v5, module_ctx, 0x40 v6:v128 = Vconst 000000000000000a 0000000000000014 - Store v6, module_ctx, 0x48 + Store v6, module_ctx, 0x50 Jump blk_ret, v2, v3, v4, v5, v6 `, }, @@ -1217,16 +1217,16 @@ signatures: sig1: i64i64_v blk0: (exec_ctx:i64, module_ctx:i64) - v2:i32 = Load module_ctx, 0x8 - v3:i64 = Load module_ctx, 0x18 - v4:f32 = Load module_ctx, 0x28 - v5:f64 = Load module_ctx, 0x38 + v2:i32 = Load module_ctx, 0x10 + v3:i64 = Load module_ctx, 0x20 + v4:f32 = Load module_ctx, 0x30 + v5:f64 = Load module_ctx, 0x40 Store module_ctx, exec_ctx, 0x8 Call f1:sig1, exec_ctx, module_ctx - v6:i32 = Load module_ctx, 0x8 - v7:i64 = Load module_ctx, 0x18 - v8:f32 = Load module_ctx, 0x28 - v9:f64 = Load module_ctx, 0x38 + v6:i32 = Load module_ctx, 0x10 + v7:i64 = Load module_ctx, 0x20 + v8:f32 = Load module_ctx, 0x30 + v9:f64 = Load module_ctx, 0x40 Jump blk_ret, v2, v3, v4, v5, v6, v7, v8, v9 `, expAfterOpt: ` @@ -1234,16 +1234,16 @@ signatures: sig1: i64i64_v blk0: (exec_ctx:i64, module_ctx:i64) - v2:i32 = Load module_ctx, 0x8 - v3:i64 = Load module_ctx, 0x18 - v4:f32 = Load module_ctx, 0x28 - v5:f64 = Load module_ctx, 0x38 + v2:i32 = Load module_ctx, 0x10 + v3:i64 = Load module_ctx, 0x20 + v4:f32 = Load module_ctx, 0x30 + v5:f64 = Load module_ctx, 0x40 Store module_ctx, exec_ctx, 0x8 Call f1:sig1, exec_ctx, module_ctx - v6:i32 = Load module_ctx, 0x8 - v7:i64 = Load module_ctx, 0x18 - v8:f32 = Load module_ctx, 0x28 - v9:f64 = Load module_ctx, 0x38 + v6:i32 = Load module_ctx, 0x10 + v7:i64 = Load module_ctx, 0x20 + v8:f32 = Load module_ctx, 0x30 + v9:f64 = Load module_ctx, 0x40 Jump blk_ret, v2, v3, v4, v5, v6, v7, v8, v9 `, }, diff --git a/internal/engine/wazevo/hostmodule.go b/internal/engine/wazevo/hostmodule.go index 3ce9850f44..c01b4fe148 100644 --- a/internal/engine/wazevo/hostmodule.go +++ b/internal/engine/wazevo/hostmodule.go @@ -11,7 +11,7 @@ import ( func buildHostModuleOpaque(m *wasm.Module, listeners []experimental.FunctionListener) moduleContextOpaque { size := len(m.CodeSection)*16 + 32 - ret := make(moduleContextOpaque, size) + ret := newAlignedOpaque(size) binary.LittleEndian.PutUint64(ret[0:], uint64(uintptr(unsafe.Pointer(m)))) diff --git a/internal/engine/wazevo/module_engine.go b/internal/engine/wazevo/module_engine.go index 62a8b4336f..17d28c13fc 100644 --- a/internal/engine/wazevo/module_engine.go +++ b/internal/engine/wazevo/module_engine.go @@ -74,6 +74,16 @@ type ( moduleContextOpaque []byte ) +func newAlignedOpaque(size int) moduleContextOpaque { + // Check if the size is a multiple of 16. + if size%16 != 0 { + panic("size must be a multiple of 16") + } + buf := make([][2]uint64, size/16) + slice := unsafe.Slice(&buf[0][0], size) + return *(*moduleContextOpaque)(unsafe.Pointer(&slice)) +} + func putLocalMemory(opaque []byte, offset wazevoapi.Offset, mem *wasm.MemoryInstance) { s := uint64(len(mem.Buffer)) var b uint64 diff --git a/internal/engine/wazevo/module_engine_test.go b/internal/engine/wazevo/module_engine_test.go index e9e2936fe4..45234e19c0 100644 --- a/internal/engine/wazevo/module_engine_test.go +++ b/internal/engine/wazevo/module_engine_test.go @@ -331,3 +331,12 @@ func Test_getTypeIDOf(t *testing.T) { require.Equal(t, wasm.FunctionTypeID(222), getTypeIDOf(2, m)) require.Equal(t, wasm.FunctionTypeID(111), getTypeIDOf(3, m)) } + +func Test_newAlignedOpaque(t *testing.T) { + for i := 0; i < 100; i++ { + s := 16 * (i + 10) + buf := newAlignedOpaque(s) + require.Equal(t, s, len(buf)) + require.Equal(t, 0, int(uintptr(unsafe.Pointer(&buf[0]))&15)) + } +} diff --git a/internal/engine/wazevo/wazevoapi/offsetdata.go b/internal/engine/wazevo/wazevoapi/offsetdata.go index 56a89517d0..fe6161b049 100644 --- a/internal/engine/wazevo/wazevoapi/offsetdata.go +++ b/internal/engine/wazevo/wazevoapi/offsetdata.go @@ -142,6 +142,7 @@ func NewModuleContextOffsetData(m *wasm.Module, withListener bool) ModuleContext } if m.ImportMemoryCount > 0 { + offset = align8(offset) // *wasm.MemoryInstance + imported memory's owner (moduleContextOpaque) const importedMemorySizeInOpaqueModuleContext = 16 ret.ImportedMemoryBegin = offset @@ -152,6 +153,7 @@ func NewModuleContextOffsetData(m *wasm.Module, withListener bool) ModuleContext } if m.ImportFunctionCount > 0 { + offset = align8(offset) ret.ImportedFunctionsBegin = offset // Each function is stored wazevo.functionInstance. size := int(m.ImportFunctionCount) * FunctionInstanceSize @@ -161,6 +163,8 @@ func NewModuleContextOffsetData(m *wasm.Module, withListener bool) ModuleContext } if globals := int(m.ImportGlobalCount) + len(m.GlobalSection); globals > 0 { + // Align to 16 bytes for globals, as f32/f64/v128 might be loaded via SIMD instructions. + offset = align16(offset) ret.GlobalsBegin = offset // Pointers to *wasm.GlobalInstance. offset += Offset(globals) * 16 @@ -169,6 +173,7 @@ func NewModuleContextOffsetData(m *wasm.Module, withListener bool) ModuleContext } if tables := len(m.TableSection) + int(m.ImportTableCount); tables > 0 { + offset = align8(offset) ret.TypeIDs1stElement = offset offset += 8 // First element of TypeIDs. @@ -181,6 +186,7 @@ func NewModuleContextOffsetData(m *wasm.Module, withListener bool) ModuleContext } if withListener { + offset = align8(offset) ret.BeforeListenerTrampolines1stElement = offset offset += 8 // First element of BeforeListenerTrampolines. @@ -197,6 +203,14 @@ func NewModuleContextOffsetData(m *wasm.Module, withListener bool) ModuleContext ret.ElementInstances1stElement = offset offset += 8 // First element of ElementInstances. - ret.TotalSize = int(offset) + ret.TotalSize = int(align16(offset)) return ret } + +func align16(o Offset) Offset { + return (o + 15) &^ 15 +} + +func align8(o Offset) Offset { + return (o + 7) &^ 7 +} diff --git a/internal/engine/wazevo/wazevoapi/offsetdata_test.go b/internal/engine/wazevo/wazevoapi/offsetdata_test.go index 9c45f7d5c0..09cf6b7120 100644 --- a/internal/engine/wazevo/wazevoapi/offsetdata_test.go +++ b/internal/engine/wazevo/wazevoapi/offsetdata_test.go @@ -28,7 +28,7 @@ func TestNewModuleContextOffsetData(t *testing.T) { AfterListenerTrampolines1stElement: -1, DataInstances1stElement: 8, ElementInstances1stElement: 16, - TotalSize: 24, + TotalSize: 32, // 16 byte alignment. }, }, { @@ -45,7 +45,7 @@ func TestNewModuleContextOffsetData(t *testing.T) { AfterListenerTrampolines1stElement: -1, DataInstances1stElement: 24, ElementInstances1stElement: 32, - TotalSize: 40, + TotalSize: 48, // 16 byte alignment. }, }, { @@ -62,7 +62,7 @@ func TestNewModuleContextOffsetData(t *testing.T) { AfterListenerTrampolines1stElement: -1, DataInstances1stElement: 24, ElementInstances1stElement: 32, - TotalSize: 40, + TotalSize: 48, // 16 byte alignment. }, }, { @@ -79,7 +79,7 @@ func TestNewModuleContextOffsetData(t *testing.T) { AfterListenerTrampolines1stElement: -1, DataInstances1stElement: 10*FunctionInstanceSize + 8, ElementInstances1stElement: 10*FunctionInstanceSize + 16, - TotalSize: 10*FunctionInstanceSize + 24, + TotalSize: int(align16(Offset(10*FunctionInstanceSize + 24))), }, }, { @@ -96,7 +96,7 @@ func TestNewModuleContextOffsetData(t *testing.T) { AfterListenerTrampolines1stElement: -1, DataInstances1stElement: 10*FunctionInstanceSize + 24, ElementInstances1stElement: 10*FunctionInstanceSize + 32, - TotalSize: 10*FunctionInstanceSize + 40, + TotalSize: int(align16(Offset(10*FunctionInstanceSize + 40))), }, }, { @@ -110,17 +110,18 @@ func TestNewModuleContextOffsetData(t *testing.T) { GlobalSection: make([]wasm.Global, 20), }, exp: ModuleContextOffsetData{ - LocalMemoryBegin: 8, - ImportedMemoryBegin: -1, - ImportedFunctionsBegin: 24, - GlobalsBegin: 24 + 10*FunctionInstanceSize, - TypeIDs1stElement: 24 + 10*FunctionInstanceSize + 16*30, - TablesBegin: 24 + 10*FunctionInstanceSize + 16*30 + 8, + LocalMemoryBegin: 8, + ImportedMemoryBegin: -1, + ImportedFunctionsBegin: 24, + // Align to 16 bytes for globals. + GlobalsBegin: 32 + 10*FunctionInstanceSize, + TypeIDs1stElement: 32 + 10*FunctionInstanceSize + 16*30, + TablesBegin: 32 + 10*FunctionInstanceSize + 16*30 + 8, BeforeListenerTrampolines1stElement: -1, AfterListenerTrampolines1stElement: -1, - DataInstances1stElement: 24 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15, - ElementInstances1stElement: 24 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 8, - TotalSize: 24 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 16, + DataInstances1stElement: 32 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15, + ElementInstances1stElement: 32 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 8, + TotalSize: 32 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 16, }, }, { @@ -135,17 +136,18 @@ func TestNewModuleContextOffsetData(t *testing.T) { }, withListener: true, exp: ModuleContextOffsetData{ - LocalMemoryBegin: 8, - ImportedMemoryBegin: -1, - ImportedFunctionsBegin: 24, - GlobalsBegin: 24 + 10*FunctionInstanceSize, - TypeIDs1stElement: 24 + 10*FunctionInstanceSize + 16*30, - TablesBegin: 24 + 10*FunctionInstanceSize + 16*30 + 8, - BeforeListenerTrampolines1stElement: 24 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15, - AfterListenerTrampolines1stElement: 24 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 8, - DataInstances1stElement: 24 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 16, - ElementInstances1stElement: 24 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 24, - TotalSize: 24 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 32, + LocalMemoryBegin: 8, + ImportedMemoryBegin: -1, + ImportedFunctionsBegin: 24, + // Align to 16 bytes for globals. + GlobalsBegin: 32 + 10*FunctionInstanceSize, + TypeIDs1stElement: 32 + 10*FunctionInstanceSize + 16*30, + TablesBegin: 32 + 10*FunctionInstanceSize + 16*30 + 8, + BeforeListenerTrampolines1stElement: 32 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15, + AfterListenerTrampolines1stElement: 32 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 8, + DataInstances1stElement: 32 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 16, + ElementInstances1stElement: 32 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 24, + TotalSize: 32 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 32, }, }, } { From 0b793b459e623d8aadfa519ee470855a5194fc84 Mon Sep 17 00:00:00 2001 From: Takeshi Yoneda Date: Sun, 11 Feb 2024 15:40:40 -0800 Subject: [PATCH 2/2] fix Signed-off-by: Takeshi Yoneda --- internal/engine/wazevo/module_engine.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/internal/engine/wazevo/module_engine.go b/internal/engine/wazevo/module_engine.go index 17d28c13fc..f95e3e684e 100644 --- a/internal/engine/wazevo/module_engine.go +++ b/internal/engine/wazevo/module_engine.go @@ -79,9 +79,10 @@ func newAlignedOpaque(size int) moduleContextOpaque { if size%16 != 0 { panic("size must be a multiple of 16") } - buf := make([][2]uint64, size/16) + type _16 [16]byte + buf := make([]_16, size/16) slice := unsafe.Slice(&buf[0][0], size) - return *(*moduleContextOpaque)(unsafe.Pointer(&slice)) + return *(*moduleContextOpaque)(&slice) } func putLocalMemory(opaque []byte, offset wazevoapi.Offset, mem *wasm.MemoryInstance) {