Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NativeAOT/Windows/Arm64: Add TLS inline support #104282

Merged
merged 13 commits into from
Jul 3, 2024
22 changes: 20 additions & 2 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2235,15 +2235,25 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size,
{
// reg cannot be a FP register
assert(!genIsValidFloatReg(reg));

emitAttr origAttr = size;
if (!compiler->opts.compReloc)
{
size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs
}

if (EA_IS_RELOC(size))
{
// This emits a pair of adrp/add (two instructions) with fix-ups.
GetEmitter()->emitIns_R_AI(INS_adrp, size, reg, imm DEBUGARG(targetHandle) DEBUGARG(gtFlags));
if (!EA_IS_CNS_SEC_RELOC(origAttr))
{
// This emits a pair of adrp/add (two instructions) with fix-ups.
GetEmitter()->emitIns_R_AI(INS_adrp, size, reg, imm DEBUGARG(targetHandle) DEBUGARG(gtFlags));
}
else if (compiler->IsTargetAbi(CORINFO_NATIVEAOT_ABI))
{
// This emits pair of `add` instructions for TLS reloc
GetEmitter()->emitIns_Add_Add_Tls_Reloc(size, reg, imm DEBUGARG(gtFlags));
}
kunalspathak marked this conversation as resolved.
Show resolved Hide resolved
}
else if (imm == 0)
{
Expand Down Expand Up @@ -2357,6 +2367,14 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
attr = EA_SET_FLG(attr, EA_BYREF_FLG);
}

if (compiler->IsTargetAbi(CORINFO_NATIVEAOT_ABI))
{
if (con->IsIconHandle(GTF_ICON_SECREL_OFFSET))
{
attr = EA_SET_FLG(attr, EA_CNS_SEC_RELOC);
}
}

instGen_Set_Reg_To_Imm(attr, targetReg, cnsVal,
INS_FLAGS_DONT_CARE DEBUGARG(con->gtTargetHandle) DEBUGARG(con->gtFlags));
regSet.verifyRegUsed(targetReg);
Expand Down
142 changes: 138 additions & 4 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3743,6 +3743,83 @@ void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg, insOpts o
appendToCurIG(id);
}

//-----------------------------------------------------------------------------
// emitIns_Add_Add_Tls_Reloc: Generates pair of "add" instructions needed for TLS access
// on windows for NativeAOT.
// add reg, reg, #0, LSL #0xC
// add reg, reg, #0
//
// Arguments:
// attr - Instruction attributes
// reg - Register
// imm - The handle of TLS variable
// gtFlags - DEBUG only gtFlags.
//
void emitter::emitIns_Add_Add_Tls_Reloc(emitAttr attr,
regNumber reg,
ssize_t imm DEBUGARG(GenTreeFlags gtFlags /* = GTF_EMPTY */))
{
emitAttr size = EA_SIZE(attr);

assert(emitComp->IsTargetAbi(CORINFO_NATIVEAOT_ABI));
assert(TargetOS::IsWindows);

assert(isValidGeneralDatasize(size));
assert(EA_IS_CNS_SEC_RELOC(attr));

insFormat fmt = IF_DI_2A;

instrDesc* id = emitNewInstrCns(attr, 0);

// add reg, reg, #0, LSL 0xC
id->idIns(INS_add);
id->idInsFmt(fmt);
id->idInsOpt(INS_OPTS_LSL12);
id->idAddr()->iiaAddr = (BYTE*)imm;

id->idReg1(reg);
id->idReg2(reg);

// Since this is relocation, set to 8 byte size.
id->idOpSize(EA_8BYTE);
id->idSetTlsGD();

#ifdef DEBUG
id->idDebugOnlyInfo()->idMemCookie = imm;
id->idDebugOnlyInfo()->idFlags = gtFlags;
#endif

dispIns(id);
appendToCurIG(id);

// add reg, reg, #0

// To differentiate from first add, instead of passing
// `attr`, we pass `size` so this instruction is not
// set as having "constant relocation" i.e EA_CNS_RELOC_FLG
// is not set.
id = emitNewInstrCns(size, 0);

id->idIns(INS_add);
id->idInsFmt(fmt);
id->idAddr()->iiaAddr = (BYTE*)imm;

id->idReg1(reg);
id->idReg2(reg);

// Since this is relocation, set to 8 byte size.
id->idOpSize(EA_8BYTE);
id->idSetTlsGD();

#ifdef DEBUG
id->idDebugOnlyInfo()->idMemCookie = imm;
id->idDebugOnlyInfo()->idFlags = gtFlags;
#endif

dispIns(id);
appendToCurIG(id);
}

/*****************************************************************************
*
* Add an instruction referencing a register and a constant.
Expand Down Expand Up @@ -11252,14 +11329,71 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
dst += emitOutput_Instr(dst, code);

if (id->idIsReloc())
if (id->idIsReloc() && !emitComp->IsTargetAbi(CORINFO_NATIVEAOT_ABI))
{
assert(sz == sizeof(instrDesc));
assert(id->idAddr()->iiaAddr != nullptr);
emitRecordRelocation(odst, id->idAddr()->iiaAddr,
id->idIsTlsGD() ? IMAGE_REL_AARCH64_TLSDESC_ADD_LO12
: IMAGE_REL_ARM64_PAGEOFFSET_12A);
emitRecordRelocation(odst, id->idAddr()->iiaAddr, IMAGE_REL_ARM64_PAGEOFFSET_12A);
}
else
{
if (id->idIsTlsGD())
{
if (TargetOS::IsWindows)
{
if (id->idIsReloc())
{
// This is first "add" of "add/add" pair
emitRecordRelocation(odst, id->idAddr()->iiaAddr, IMAGE_REL_ARM64_SECREL_HIGH12A);
}
else
{
// This is second "add" of "add/add" pair
emitRecordRelocation(odst, id->idAddr()->iiaAddr, IMAGE_REL_ARM64_SECREL_LOW12L);
}
}
else
{
// For unix/arm64 it is the "add" of "adrp/add" pair
emitRecordRelocation(odst, id->idAddr()->iiaAddr, IMAGE_REL_AARCH64_TLSDESC_ADD_LO12);
}
}
else if (id->idIsReloc())
{
assert(sz == sizeof(instrDesc));
assert(id->idAddr()->iiaAddr != nullptr);
emitRecordRelocation(odst, id->idAddr()->iiaAddr, IMAGE_REL_ARM64_PAGEOFFSET_12A);
}
}

// if (id->idIsReloc())
kunalspathak marked this conversation as resolved.
Show resolved Hide resolved
//{
// assert(sz == sizeof(instrDesc));
// assert(id->idAddr()->iiaAddr != nullptr);

// if (id->idIsTlsGD())
// {
// assert(emitComp->IsTargetAbi(CORINFO_NATIVEAOT_ABI));

// if (TargetOS::IsWindows)
// {
// emitRecordRelocation(odst, id->idAddr()->iiaAddr, IMAGE_REL_ARM64_SECREL_HIGH12A);
// }
// else
// {
// emitRecordRelocation(odst, id->idAddr()->iiaAddr, IMAGE_REL_AARCH64_TLSDESC_ADD_LO12);
// }
// }
// else
// {
// emitRecordRelocation(odst, id->idAddr()->iiaAddr, IMAGE_REL_ARM64_PAGEOFFSET_12A);
// }
//}
// else if (emitComp->IsTargetAbi(CORINFO_NATIVEAOT_ABI) && id->idIsTlsGD())
//{
// assert(TargetOS::IsWindows);
// emitRecordRelocation(odst, id->idAddr()->iiaAddr, IMAGE_REL_ARM64_SECREL_LOW12L);
//}
break;

case IF_DI_2B: // DI_2B X.........Xnnnnn ssssssnnnnnddddd Rd Rn imm(0-63)
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/emitarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -1455,6 +1455,7 @@ void emitIns_R_I(instruction ins,
insOpts opt = INS_OPTS_NONE,
insScalableOpts sopt = INS_SCALABLE_OPTS_NONE DEBUGARG(size_t targetHandle = 0)
DEBUGARG(GenTreeFlags gtFlags = GTF_EMPTY));
void emitIns_Add_Add_Tls_Reloc(emitAttr attr, regNumber reg, ssize_t imm DEBUGARG(GenTreeFlags gtFlags = GTF_EMPTY));

void emitInsSve_R_I(instruction ins,
emitAttr attr,
Expand Down
5 changes: 1 addition & 4 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3779,14 +3779,11 @@ bool GenTreeOp::IsValidLongMul()
GenTree* op1 = gtGetOp1();
GenTree* op2 = gtGetOp2();

if (!TypeIs(TYP_LONG))
if (!TypeIs(TYP_LONG) || !op1->TypeIs(TYP_LONG) || op2->TypeIs(TYP_LONG))
{
return false;
}

assert(op1->TypeIs(TYP_LONG));
assert(op2->TypeIs(TYP_LONG));

kunalspathak marked this conversation as resolved.
Show resolved Hide resolved
if (!(op1->OperIs(GT_CAST) && genActualTypeIsInt(op1->AsCast()->CastOp())))
{
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,13 @@ public enum RelocType
// Windows x64
IMAGE_REL_SECREL = 0x104,

// Windows arm64 TLS access
// The value of IMAGE_REL_BASED_ARM64_SECREL_HIGH12A and IMAGE_REL_BASED_DIR64 are same.
kunalspathak marked this conversation as resolved.
Show resolved Hide resolved
// Hence, IMAGE_REL_BASED_ARM64_SECREL_HIGH12A is assigned a different value.
// IMAGE_REL_BASED_ARM64_SECREL_LOW12L is assigned 0x1B, just for consistency.
IMAGE_REL_BASED_ARM64_SECREL_HIGH12A = 0x1A, // ADD high 12-bit offset for tls
IMAGE_REL_BASED_ARM64_SECREL_LOW12L = 0x1B, // ADD low 12-bit offset for tls

// Linux x64
// GD model
IMAGE_REL_TLSGD = 0x105,
Expand Down Expand Up @@ -288,6 +295,28 @@ private static unsafe void PutArm64Rel12(uint* pCode, int imm12)
Debug.Assert(GetArm64Rel12(pCode) == imm12);
}

//*****************************************************************************
// Deposit the PC-Relative offset 'imm12' into an add instruction
// Same as PutArm64Rel12(), except the assert here checks if "LSL #3" is encoded
// in the instruction.
//*****************************************************************************
private static unsafe void PutArm64TlsRel12(uint* pCode, int imm12)
{
// Verify that we got a valid offset
Debug.Assert(FitsInRel12(imm12));

uint addInstr = *pCode;
// Check add opcode 1001 0001 00...
Debug.Assert((addInstr & 0xFFC00000) == 0x91400000);

addInstr &= 0xFFC003FF; // keep bits 31-22, 9-0
addInstr |= (uint)(imm12 << 10); // Occupy 21-10.

*pCode = addInstr; // write the assembled instruction

Debug.Assert(GetArm64Rel12(pCode) == imm12);
}

private static unsafe int GetArm64Rel28(uint* pCode)
{
uint branchInstr = *pCode;
Expand Down Expand Up @@ -505,8 +534,12 @@ public static unsafe void WriteValue(RelocType relocType, void* location, long v
case RelocType.IMAGE_REL_AARCH64_TLSDESC_ADR_PAGE21:
PutArm64Rel21((uint*)location, (int)value);
break;
case RelocType.IMAGE_REL_BASED_ARM64_SECREL_HIGH12A:
PutArm64TlsRel12((uint*)location, (int)value);
break;
case RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A:
case RelocType.IMAGE_REL_AARCH64_TLSDESC_ADD_LO12:
case RelocType.IMAGE_REL_BASED_ARM64_SECREL_LOW12L:
PutArm64Rel12((uint*)location, (int)value);
break;
case RelocType.IMAGE_REL_BASED_LOONGARCH64_PC:
Expand Down Expand Up @@ -562,6 +595,8 @@ public static unsafe long ReadValue(RelocType relocType, void* location)
case RelocType.IMAGE_REL_BASED_ARM64_PAGEBASE_REL21:
return GetArm64Rel21((uint*)location);
case RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A:
case RelocType.IMAGE_REL_BASED_ARM64_SECREL_HIGH12A:
case RelocType.IMAGE_REL_BASED_ARM64_SECREL_LOW12L:
return GetArm64Rel12((uint*)location);
case RelocType.IMAGE_REL_AARCH64_TLSDESC_LD64_LO12:
case RelocType.IMAGE_REL_AARCH64_TLSDESC_ADD_LO12:
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3902,6 +3902,8 @@ private static RelocType GetRelocType(TargetArchitecture targetArchitecture, ush
const ushort IMAGE_REL_ARM64_BRANCH26 = 3;
const ushort IMAGE_REL_ARM64_PAGEBASE_REL21 = 4;
const ushort IMAGE_REL_ARM64_PAGEOFFSET_12A = 6;
const ushort IMAGE_REL_ARM64_SECREL_HIGH12A = 0xA;
const ushort IMAGE_REL_ARM64_SECREL_LOW12L = 0xB;
Copy link
Member

@VSadov VSadov Jul 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: some constants here match values from RelocType and some match the actual platform-specific values (for the ease of disassembling?).

If that is a case, then maybe always use platform-specific values?
I.E. IMAGE_REL_ARM64_TLSDESC_CALL would match R_AARCH64_TLSDESC_CALL, which is 0x239

Or the other way - just use RelocType values for all?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does not need to be addressed in this change since the inconsistency is already there.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, probably will not include in this PR.

const ushort IMAGE_REL_ARM64_TLSDESC_ADR_PAGE21 = 0x107;
const ushort IMAGE_REL_ARM64_TLSDESC_LD64_LO12 = 0x108;
const ushort IMAGE_REL_ARM64_TLSDESC_ADD_LO12 = 0x109;
Expand All @@ -3924,6 +3926,10 @@ private static RelocType GetRelocType(TargetArchitecture targetArchitecture, ush
return RelocType.IMAGE_REL_AARCH64_TLSDESC_LD64_LO12;
case IMAGE_REL_ARM64_TLSDESC_CALL:
return RelocType.IMAGE_REL_AARCH64_TLSDESC_CALL;
case IMAGE_REL_ARM64_SECREL_HIGH12A:
return RelocType.IMAGE_REL_BASED_ARM64_SECREL_HIGH12A;
case IMAGE_REL_ARM64_SECREL_LOW12L:
return RelocType.IMAGE_REL_BASED_ARM64_SECREL_LOW12L;
default:
Debug.Fail("Invalid RelocType: " + fRelocType);
return 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,8 @@ private protected override void EmitRelocations(int sectionIndex, List<SymbolicR
IMAGE_REL_BASED_ARM64_BRANCH26 => IMAGE_REL_ARM64_BRANCH26,
IMAGE_REL_BASED_ARM64_PAGEBASE_REL21 => IMAGE_REL_ARM64_PAGEBASE_REL21,
IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A => IMAGE_REL_ARM64_PAGEOFFSET_12A,
IMAGE_REL_BASED_ARM64_SECREL_HIGH12A => IMAGE_REL_ARM64_SECREL_HIGH12A,
IMAGE_REL_BASED_ARM64_SECREL_LOW12L => IMAGE_REL_ARM64_SECREL_LOW12L,
IMAGE_REL_SECREL => IMAGE_REL_ARM64_SECREL,
IMAGE_REL_SECTION => IMAGE_REL_ARM64_SECTION,
_ => throw new NotSupportedException($"Unsupported relocation: {relocation.Type}")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2165,7 +2165,7 @@ private void getFieldInfo(ref CORINFO_RESOLVED_TOKEN pResolvedToken, CORINFO_MET
else if (field.IsThreadStatic)
{
var target = MethodBeingCompiled.Context.Target;
if ((target.IsWindows && target.Architecture is TargetArchitecture.X64) ||
if ((target.IsWindows && target.Architecture is TargetArchitecture.X64 or TargetArchitecture.ARM64) ||
((target.OperatingSystem == TargetOS.Linux) &&
(target.Architecture is TargetArchitecture.X64 or TargetArchitecture.ARM64)))
{
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/tools/aot/ILCompiler/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -542,7 +542,7 @@ void RunScanner()
// If we have a scanner, we can inline threadstatics storage using the information we collected at scanning time.
if (!Get(_command.NoInlineTls) &&
((targetOS == TargetOS.Linux && targetArchitecture is TargetArchitecture.X64 or TargetArchitecture.ARM64) ||
(targetOS == TargetOS.Windows && targetArchitecture is TargetArchitecture.X64)))
(targetOS == TargetOS.Windows && targetArchitecture is TargetArchitecture.X64 or TargetArchitecture.ARM64)))
{
builder.UseInlinedThreadStatics(scanResults.GetInlinedThreadStatics());
}
Expand Down
Loading