From 309e2ae582b6ad1ac6a415c6129f631eae24fe10 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 19 Dec 2019 14:03:14 -0800 Subject: [PATCH 1/4] Support 32 byte alignment of code on xarch Update jit and runtime to allow jit to ask for code to be 32 byte aligned. Request 32 byte alignment for Tier1 methods on x86/x64. Add minimal crossgen support; one can imagine requesting or choosing 32 byte alignment for crossgenned code, but that is left as future work. This should provide some measure of performance stability, in particular for microbenchmarks or other code where performance depends crucially on a few branches. It may or may not improve performance. If/when there are regressions we can contemplate updating the jit to add intra-method padding to address alignment sensitive code layout (e.g. dotnet/coreclr#11607). This will require a jit GUID update in addition to the changes here. --- src/coreclr/src/inc/corjit.h | 2 ++ src/coreclr/src/jit/emit.cpp | 9 +++++++++ .../tools/Common/JitInterface/CorInfoTypes.cs | 2 ++ src/coreclr/src/vm/codeman.cpp | 6 +++++- src/coreclr/src/vm/jitinterface.cpp | 19 ++++++++++++++++--- src/coreclr/src/zap/zapinfo.cpp | 12 ++++++++++-- 6 files changed, 44 insertions(+), 6 deletions(-) diff --git a/src/coreclr/src/inc/corjit.h b/src/coreclr/src/inc/corjit.h index 2e0a42c95d41a3..190d911f4d54fb 100644 --- a/src/coreclr/src/inc/corjit.h +++ b/src/coreclr/src/inc/corjit.h @@ -179,6 +179,8 @@ enum CorJitAllocMemFlag CORJIT_ALLOCMEM_DEFAULT_CODE_ALIGN = 0x00000000, // The code will be use the normal alignment CORJIT_ALLOCMEM_FLG_16BYTE_ALIGN = 0x00000001, // The code will be 16-byte aligned CORJIT_ALLOCMEM_FLG_RODATA_16BYTE_ALIGN = 0x00000002, // The read-only data will be 16-byte aligned + CORJIT_ALLOCMEM_FLG_32BYTE_ALIGN = 0x00000004, // The code will be 32-byte aligned + CORJIT_ALLOCMEM_FLG_RODATA_32BYTE_ALIGN = 0x00000008, // The read-only data will be 32-byte aligned }; inline CorJitAllocMemFlag operator |(CorJitAllocMemFlag a, CorJitAllocMemFlag b) diff --git a/src/coreclr/src/jit/emit.cpp b/src/coreclr/src/jit/emit.cpp index d621d5a060481a..623ff25fc84e2b 100644 --- a/src/coreclr/src/jit/emit.cpp +++ b/src/coreclr/src/jit/emit.cpp @@ -4615,6 +4615,15 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, } #endif +#ifdef _TARGET_XARCH_ + // For x64/x86, align Tier1 methods to 32 byte boundaries + // + if (emitComp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER1)) + { + allocMemFlag = CORJIT_ALLOCMEM_FLG_32BYTE_ALIGN; + } +#endif + if (emitConsDsc.align16) { allocMemFlag = static_cast(allocMemFlag | CORJIT_ALLOCMEM_FLG_RODATA_16BYTE_ALIGN); diff --git a/src/coreclr/src/tools/Common/JitInterface/CorInfoTypes.cs b/src/coreclr/src/tools/Common/JitInterface/CorInfoTypes.cs index 9f211d80a50fd4..e249197fd3480a 100644 --- a/src/coreclr/src/tools/Common/JitInterface/CorInfoTypes.cs +++ b/src/coreclr/src/tools/Common/JitInterface/CorInfoTypes.cs @@ -764,6 +764,8 @@ public enum CorJitAllocMemFlag CORJIT_ALLOCMEM_DEFAULT_CODE_ALIGN = 0x00000000, // The code will be use the normal alignment CORJIT_ALLOCMEM_FLG_16BYTE_ALIGN = 0x00000001, // The code will be 16-byte aligned CORJIT_ALLOCMEM_FLG_RODATA_16BYTE_ALIGN = 0x00000002, // The read-only data will be 16-byte aligned + CORJIT_ALLOCMEM_FLG_32BYTE_ALIGN = 0x00000004, // The code will be 32-byte aligned + CORJIT_ALLOCMEM_FLG_RODATA_32BYTE_ALIGN = 0x00000008, // The read-only data will be 32-byte aligned } public enum CorJitFuncKind diff --git a/src/coreclr/src/vm/codeman.cpp b/src/coreclr/src/vm/codeman.cpp index c0a111beb5a778..e7c08e0255adc5 100644 --- a/src/coreclr/src/vm/codeman.cpp +++ b/src/coreclr/src/vm/codeman.cpp @@ -2551,7 +2551,11 @@ CodeHeader* EEJitManager::allocCode(MethodDesc* pMD, size_t blockSize, size_t re unsigned alignment = CODE_SIZE_ALIGN; - if ((flag & CORJIT_ALLOCMEM_FLG_16BYTE_ALIGN) != 0) + if ((flag & CORJIT_ALLOCMEM_FLG_32BYTE_ALIGN) != 0) + { + alignment = max(alignment, 32); + } + else if ((flag & CORJIT_ALLOCMEM_FLG_16BYTE_ALIGN) != 0) { alignment = max(alignment, 16); } diff --git a/src/coreclr/src/vm/jitinterface.cpp b/src/coreclr/src/vm/jitinterface.cpp index dab5c726373333..8fd7319af20449 100644 --- a/src/coreclr/src/vm/jitinterface.cpp +++ b/src/coreclr/src/vm/jitinterface.cpp @@ -12065,7 +12065,11 @@ void CEEJitInfo::allocMem ( S_SIZE_T totalSize = S_SIZE_T(codeSize); size_t roDataAlignment = sizeof(void*); - if ((flag & CORJIT_ALLOCMEM_FLG_RODATA_16BYTE_ALIGN)!= 0) + if ((flag & CORJIT_ALLOCMEM_FLG_RODATA_32BYTE_ALIGN)!= 0) + { + roDataAlignment = 32; + } + else if ((flag & CORJIT_ALLOCMEM_FLG_RODATA_16BYTE_ALIGN)!= 0) { roDataAlignment = 16; } @@ -12075,9 +12079,18 @@ void CEEJitInfo::allocMem ( } if (roDataSize > 0) { - size_t codeAlignment = ((flag & CORJIT_ALLOCMEM_FLG_16BYTE_ALIGN)!= 0) - ? 16 : sizeof(void*); + size_t codeAlignment = sizeof(void*); + + if ((flag & CORJIT_ALLOCMEM_FLG_32BYTE_ALIGN) != 0) + { + codeAlignment = 32; + } + else if ((flag & CORJIT_ALLOCMEM_FLG_16BYTE_ALIGN) != 0) + { + codeAlignment = 16; + } totalSize.AlignUp(codeAlignment); + if (roDataAlignment > codeAlignment) { // Add padding to align read-only data. totalSize += (roDataAlignment - codeAlignment); diff --git a/src/coreclr/src/zap/zapinfo.cpp b/src/coreclr/src/zap/zapinfo.cpp index e5ffb56d1d1650..187fd6c9764818 100644 --- a/src/coreclr/src/zap/zapinfo.cpp +++ b/src/coreclr/src/zap/zapinfo.cpp @@ -1085,7 +1085,11 @@ void ZapInfo::allocMem( UINT align = DEFAULT_CODE_ALIGN; - if ((flag & CORJIT_ALLOCMEM_FLG_16BYTE_ALIGN) && !IsReadyToRunCompilation()) align = max(align, 16); + if (!IsReadyToRunCompilation()) + { + if (flag & CORJIT_ALLOCMEM_FLG_32BYTE_ALIGN) align = max(align, 32); + else if (flag & CORJIT_ALLOCMEM_FLG_16BYTE_ALIGN) align = max(align, 16); + } m_pCode = ZapCodeBlob::NewAlignedBlob(m_pImage, NULL, hotCodeSize, align); *hotCodeBlock = m_pCode->GetData(); @@ -1104,7 +1108,11 @@ void ZapInfo::allocMem( if (roDataSize > 0) { - if (flag & CORJIT_ALLOCMEM_FLG_RODATA_16BYTE_ALIGN) + if (flag & CORJIT_ALLOCMEM_FLG_RODATA_32BYTE_ALIGN) + { + align = 32; + } + else if (flag & CORJIT_ALLOCMEM_FLG_RODATA_16BYTE_ALIGN) { align = 16; } From 3cc708c654e292c1b34ad581378381ec2c5318e4 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 28 Jan 2020 13:34:55 -0800 Subject: [PATCH 2/4] restrict to larger methods with loops; don't update zapper --- src/coreclr/src/jit/emit.cpp | 5 +++-- src/coreclr/src/zap/zapinfo.cpp | 12 ++---------- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/src/coreclr/src/jit/emit.cpp b/src/coreclr/src/jit/emit.cpp index 623ff25fc84e2b..a346ba7e25b513 100644 --- a/src/coreclr/src/jit/emit.cpp +++ b/src/coreclr/src/jit/emit.cpp @@ -4616,9 +4616,10 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, #endif #ifdef _TARGET_XARCH_ - // For x64/x86, align Tier1 methods to 32 byte boundaries + // For x64/x86, align Tier1 methods to 32 byte boundaries if + // they are larger than 16 bytes and contain a loop. // - if (emitComp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER1)) + if (emitComp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER1) && (emitTotalHotCodeSize > 16) && emitComp->fgHasLoops) { allocMemFlag = CORJIT_ALLOCMEM_FLG_32BYTE_ALIGN; } diff --git a/src/coreclr/src/zap/zapinfo.cpp b/src/coreclr/src/zap/zapinfo.cpp index 187fd6c9764818..e5ffb56d1d1650 100644 --- a/src/coreclr/src/zap/zapinfo.cpp +++ b/src/coreclr/src/zap/zapinfo.cpp @@ -1085,11 +1085,7 @@ void ZapInfo::allocMem( UINT align = DEFAULT_CODE_ALIGN; - if (!IsReadyToRunCompilation()) - { - if (flag & CORJIT_ALLOCMEM_FLG_32BYTE_ALIGN) align = max(align, 32); - else if (flag & CORJIT_ALLOCMEM_FLG_16BYTE_ALIGN) align = max(align, 16); - } + if ((flag & CORJIT_ALLOCMEM_FLG_16BYTE_ALIGN) && !IsReadyToRunCompilation()) align = max(align, 16); m_pCode = ZapCodeBlob::NewAlignedBlob(m_pImage, NULL, hotCodeSize, align); *hotCodeBlock = m_pCode->GetData(); @@ -1108,11 +1104,7 @@ void ZapInfo::allocMem( if (roDataSize > 0) { - if (flag & CORJIT_ALLOCMEM_FLG_RODATA_32BYTE_ALIGN) - { - align = 32; - } - else if (flag & CORJIT_ALLOCMEM_FLG_RODATA_16BYTE_ALIGN) + if (flag & CORJIT_ALLOCMEM_FLG_RODATA_16BYTE_ALIGN) { align = 16; } From 452e1040350c161daf28a30d67398dbe7b1d479a Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 29 Jan 2020 08:55:53 -0800 Subject: [PATCH 3/4] new jit GUID --- src/coreclr/src/inc/corinfo.h | 10 +++++----- .../src/tools/crossgen2/jitinterface/jitwrapper.cpp | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/coreclr/src/inc/corinfo.h b/src/coreclr/src/inc/corinfo.h index 932afb1d49032d..ca7cdabf45d4fd 100644 --- a/src/coreclr/src/inc/corinfo.h +++ b/src/coreclr/src/inc/corinfo.h @@ -217,11 +217,11 @@ TODO: Talk about initializing strutures before use #endif #endif -SELECTANY const GUID JITEEVersionIdentifier = { /* 13028353-152c-4886-b05b-fa76ee8169cf */ - 0x13028353, - 0x152c, - 0x4886, - {0xb0, 0x5b, 0xfa, 0x76, 0xee, 0x81, 0x69, 0xcf} +SELECTANY const GUID JITEEVersionIdentifier = { /* 96fc0c0a-9f77-450d-9663-ee33ae0fcae8 */ + 0x96fc0c0a, + 0x9f77, + 0x450d, + {0x96, 0x63, 0xee, 0x33, 0xae, 0x0f, 0xca, 0xe8} }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/src/tools/crossgen2/jitinterface/jitwrapper.cpp b/src/coreclr/src/tools/crossgen2/jitinterface/jitwrapper.cpp index ce6aae9ba83d90..44ef2b69d49c09 100644 --- a/src/coreclr/src/tools/crossgen2/jitinterface/jitwrapper.cpp +++ b/src/coreclr/src/tools/crossgen2/jitinterface/jitwrapper.cpp @@ -27,11 +27,11 @@ class CORJIT_FLAGS uint64_t corJitFlags; }; -static const GUID JITEEVersionIdentifier = { /* 13028353-152c-4886-b05b-fa76ee8169cf */ - 0x13028353, - 0x152c, - 0x4886, - {0xb0, 0x5b, 0xfa, 0x76, 0xee, 0x81, 0x69, 0xcf} +static const GUID JITEEVersionIdentifier = { /* 96fc0c0a-9f77-450d-9663-ee33ae0fcae8 */ + 0x96fc0c0a, + 0x9f77, + 0x450d, + {0x96, 0x63, 0xee, 0x33, 0xae, 0x0f, 0xca, 0xe8} }; class Jit From daf588a41571edb0a7a0248c43a7f27e1b039388 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 14 Feb 2020 12:11:03 -0800 Subject: [PATCH 4/4] fix target ifdef name --- src/coreclr/src/jit/emit.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/emit.cpp b/src/coreclr/src/jit/emit.cpp index a346ba7e25b513..9288f6c2c7392d 100644 --- a/src/coreclr/src/jit/emit.cpp +++ b/src/coreclr/src/jit/emit.cpp @@ -4615,7 +4615,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, } #endif -#ifdef _TARGET_XARCH_ +#ifdef TARGET_XARCH // For x64/x86, align Tier1 methods to 32 byte boundaries if // they are larger than 16 bytes and contain a loop. //