Skip to content

Commit

Permalink
Fix genPutArgStkFieldList for SIMD12 (#88920)
Browse files Browse the repository at this point in the history
  • Loading branch information
EgorBo authored Jul 15, 2023
1 parent 2a7ba8a commit b898a54
Show file tree
Hide file tree
Showing 11 changed files with 169 additions and 52 deletions.
5 changes: 1 addition & 4 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5313,14 +5313,11 @@ void CodeGen::genStoreLclTypeSimd12(GenTreeLclVarCommon* treeNode)
{
// Simply use mov if we move a SIMD12 reg to another SIMD12 reg
assert(GetEmitter()->isVectorRegister(tgtReg));

inst_Mov(treeNode->TypeGet(), tgtReg, dataReg, /* canSkip */ true);
}
else
{
// Need an additional integer register to extract upper 4 bytes from data.
regNumber tmpReg = treeNode->GetSingleTempReg();
GetEmitter()->emitStoreSimd12ToLclOffset(varNum, offs, dataReg, tmpReg);
GetEmitter()->emitStoreSimd12ToLclOffset(varNum, offs, dataReg, treeNode);
}
genUpdateLifeStore(treeNode, tgtReg, varDsc);
}
Expand Down
5 changes: 2 additions & 3 deletions src/coreclr/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -815,10 +815,9 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
assert(compMacOsArm64Abi() || treeNode->GetStackByteSize() % TARGET_POINTER_SIZE == 0);

#ifdef TARGET_ARM64
if (compMacOsArm64Abi() && (treeNode->GetStackByteSize() == 12))
if (treeNode->GetStackByteSize() == 12)
{
regNumber tmpReg = treeNode->GetSingleTempReg();
GetEmitter()->emitStoreSimd12ToLclOffset(varNumOut, argOffsetOut, srcReg, tmpReg);
GetEmitter()->emitStoreSimd12ToLclOffset(varNumOut, argOffsetOut, srcReg, treeNode);
argOffsetOut += 12;
}
else
Expand Down
9 changes: 3 additions & 6 deletions src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1868,13 +1868,10 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk, unsigned outArg
// Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing
// argument area.

#if defined(FEATURE_SIMD) && defined(TARGET_ARM64)
// storing of TYP_SIMD12 (i.e. Vector3) argument.
if (compMacOsArm64Abi() && (type == TYP_SIMD12))
#if defined(FEATURE_SIMD)
if (type == TYP_SIMD12)
{
// Need an additional integer register to extract upper 4 bytes from data.
regNumber tmpReg = nextArgNode->GetSingleTempReg();
GetEmitter()->emitStoreSimd12ToLclOffset(outArgVarNum, thisFieldOffset, reg, tmpReg);
GetEmitter()->emitStoreSimd12ToLclOffset(outArgVarNum, thisFieldOffset, reg, nextArgNode);
}
else
#endif // FEATURE_SIMD
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -2714,6 +2714,10 @@ class emitter
void emitMarkStackLvl(unsigned stackLevel);
#endif

#if defined(FEATURE_SIMD)
void emitStoreSimd12ToLclOffset(unsigned varNum, unsigned offset, regNumber dataReg, GenTree* tmpRegProvider);
#endif // FEATURE_SIMD

int emitNextRandomNop();

//
Expand Down
28 changes: 28 additions & 0 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16767,4 +16767,32 @@ bool emitter::IsOptimizableLdrToMov(

return true;
}

#if defined(FEATURE_SIMD)
//-----------------------------------------------------------------------------------
// emitStoreSimd12ToLclOffset: store SIMD12 value from dataReg to varNum+offset.
//
// Arguments:
// varNum - the variable on the stack to use as a base;
// offset - the offset from the varNum;
// dataReg - the src reg with SIMD12 value;
// tmpRegProvider - a tree to grab a tmp reg from if needed.
//
void emitter::emitStoreSimd12ToLclOffset(unsigned varNum, unsigned offset, regNumber dataReg, GenTree* tmpRegProvider)
{
assert(varNum != BAD_VAR_NUM);
assert(isVectorRegister(dataReg));

// store lower 8 bytes
emitIns_S_R(INS_str, EA_8BYTE, dataReg, varNum, offset);

// Extract upper 4-bytes from data
regNumber tmpReg = tmpRegProvider->GetSingleTempReg();
emitIns_R_R_I(INS_mov, EA_4BYTE, tmpReg, dataReg, 2);

// 4-byte write
emitIns_S_R(INS_str, EA_4BYTE, tmpReg, varNum, offset + 8);
}
#endif // FEATURE_SIMD

#endif // defined(TARGET_ARM64)
26 changes: 0 additions & 26 deletions src/coreclr/jit/emitarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -1003,30 +1003,4 @@ inline bool emitIsLoadConstant(instrDesc* jmp)
(jmp->idInsFmt() == IF_LARGELDC));
}

#if defined(FEATURE_SIMD)
//-----------------------------------------------------------------------------------
// emitStoreSimd12ToLclOffset: store SIMD12 value from dataReg to varNum+offset.
//
// Arguments:
// varNum - the variable on the stack to use as a base;
// offset - the offset from the varNum;
// dataReg - the src reg with SIMD12 value;
// tmpReg - a tmp reg to use for the write, can be general or float.
//
void emitStoreSimd12ToLclOffset(unsigned varNum, unsigned offset, regNumber dataReg, regNumber tmpReg)
{
assert(varNum != BAD_VAR_NUM);
assert(isVectorRegister(dataReg));

// store lower 8 bytes
emitIns_S_R(INS_str, EA_8BYTE, dataReg, varNum, offset);

// Extract upper 4-bytes from data
emitIns_R_R_I(INS_mov, EA_4BYTE, tmpReg, dataReg, 2);

// 4-byte write
emitIns_S_R(INS_str, EA_4BYTE, tmpReg, varNum, offset + 8);
}
#endif // FEATURE_SIMD

#endif // TARGET_ARM64
37 changes: 37 additions & 0 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5648,6 +5648,43 @@ void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg)
emitAdjustStackDepthPushPop(ins);
}

#if defined(FEATURE_SIMD)
//-----------------------------------------------------------------------------------
// emitStoreSimd12ToLclOffset: store SIMD12 value from dataReg to varNum+offset.
//
// Arguments:
// varNum - the variable on the stack to use as a base;
// offset - the offset from the varNum;
// dataReg - the src reg with SIMD12 value;
// tmpRegProvider - a tree to grab a tmp reg from if needed.
//
void emitter::emitStoreSimd12ToLclOffset(unsigned varNum, unsigned offset, regNumber dataReg, GenTree* tmpRegProvider)
{
assert(varNum != BAD_VAR_NUM);
assert(isFloatReg(dataReg));

// Store lower 8 bytes
emitIns_S_R(INS_movsd_simd, EA_8BYTE, dataReg, varNum, offset);

if (emitComp->compOpportunisticallyDependsOn(InstructionSet_SSE41))
{
// Extract and store upper 4 bytes
emitIns_S_R_I(INS_extractps, EA_16BYTE, varNum, offset + 8, dataReg, 2);
}
else
{
regNumber tmpReg = tmpRegProvider->GetSingleTempReg();
assert(isFloatReg(tmpReg));

// Extract upper 4 bytes from data
emitIns_R_R(INS_movhlps, EA_16BYTE, tmpReg, dataReg);

// Store upper 4 bytes
emitIns_S_R(INS_movss, EA_4BYTE, tmpReg, varNum, offset + 8);
}
}
#endif // FEATURE_SIMD

/*****************************************************************************
*
* Add an instruction referencing a register and a constant.
Expand Down
12 changes: 4 additions & 8 deletions src/coreclr/jit/lsraarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -425,15 +425,11 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode)
srcCount++;

#if defined(FEATURE_SIMD)
if (compMacOsArm64Abi())
if (use.GetType() == TYP_SIMD12)
{
if (use.GetType() == TYP_SIMD12)
{
// Vector3 is read/written as two reads/writes: 8 byte and 4 byte.
// To assemble the vector properly we would need an additional int register.
// The other platforms can write it as 16-byte using 1 write.
buildInternalIntRegisterDefForNode(use.GetNode());
}
// Vector3 is read/written as two reads/writes: 8 byte and 4 byte.
// To assemble the vector properly we would need an additional int register.
buildInternalIntRegisterDefForNode(use.GetNode());
}
#endif // FEATURE_SIMD
}
Expand Down
20 changes: 15 additions & 5 deletions src/coreclr/jit/lsraxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1651,12 +1651,22 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk)
#endif // TARGET_X86

#if defined(FEATURE_SIMD)
// Note that we need to check the field type, not the type of the node. This is because the
// field type will be TYP_SIMD12 whereas the node type might be TYP_SIMD16 for lclVar, where
// we "round up" to 16.
if ((fieldType == TYP_SIMD12) && (simdTemp == nullptr))
if (fieldType == TYP_SIMD12)
{
simdTemp = buildInternalFloatRegisterDefForNode(putArgStk);
// Note that we need to check the field type, not the type of the node. This is because the
// field type will be TYP_SIMD12 whereas the node type might be TYP_SIMD16 for lclVar, where
// we "round up" to 16.
if (simdTemp == nullptr)
{
simdTemp = buildInternalFloatRegisterDefForNode(putArgStk);
}

if (!compiler->compOpportunisticallyDependsOn(InstructionSet_SSE41))
{
// To store SIMD12 without SSE4.1 (extractps) we will need
// a temp xmm reg to do the shuffle.
buildInternalFloatRegisterDefForNode(use.GetNode());
}
}
#endif // defined(FEATURE_SIMD)

Expand Down
67 changes: 67 additions & 0 deletions src/tests/JIT/Regression/JitBlue/Runtime_79118/Runtime_79118.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections.Generic;
using System.Numerics;
using System.Runtime.CompilerServices;
using Xunit;

public readonly struct Ray
{
public readonly Vector3 Origin;
public readonly Vector3 Direction;

public Ray(Vector3 origin, Vector3 direction)
{
Origin = origin;
Direction = direction;
}
}

public class RayTracer
{
private static IEnumerable<object> hittables;

static RayTracer()
{
var list = new List<object>();
list.Add(new object());
hittables = list;
}

[Fact]
public static int TestEntryPoint()
{
Ray r = GetRay();
Consume(r.Direction);
traceRay(r);
return 100;
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static void Consume(object o)
{
var _ = o.ToString();
}

private static Ray GetRay()
{
return new Ray(Vector3.Zero, -Vector3.UnitY);
}

private static Vector3 traceRay(Ray r)
{
foreach (object h in hittables)
{
TestHit(r);
return Vector3.One;
}
return Vector3.Zero;
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static void TestHit(Ray ray)
{
return;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<Optimize>True</Optimize>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs" />
</ItemGroup>
</Project>

0 comments on commit b898a54

Please sign in to comment.