Skip to content

Commit

Permalink
JIT: Build and consume FMA node operands in standard order
Browse files Browse the repository at this point in the history
The building and consumption of these operands can happen in op1, op2,
op3 order regardless of whether the codegen uses the registers in a
different order.

Fix dotnet#102773
  • Loading branch information
jakobbotsch committed Jun 5, 2024
1 parent f5a7326 commit b0b1dcd
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 90 deletions.
22 changes: 2 additions & 20 deletions src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3071,6 +3071,8 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)

regNumber targetReg = node->GetRegNum();

genConsumeMultiOpOperands(node);

regNumber op1NodeReg = op1->GetRegNum();
regNumber op2NodeReg = op2->GetRegNum();
regNumber op3NodeReg = op3->GetRegNum();
Expand All @@ -3084,11 +3086,6 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
// Intrinsics with CopyUpperBits semantics cannot have op1 be contained
assert(!copiesUpperBits || !op1->isContained());

// We need to keep this in sync with lsraxarch.cpp
// Ideally we'd actually swap the operands in lsra and simplify codegen
// but its a bit more complicated to do so for many operands as well
// as being complicated to tell codegen how to pick the right instruction

instruction ins = INS_invalid;

if (op1->isContained() || op1->isUsedFromSpillTemp())
Expand Down Expand Up @@ -3159,21 +3156,6 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
}
}

#ifdef DEBUG
// Use nums are assigned in LIR order but this node is special and doesn't
// actually use operands. Fix up the use nums here to avoid asserts.
unsigned useNum1 = op1->gtUseNum;
unsigned useNum2 = op2->gtUseNum;
unsigned useNum3 = op3->gtUseNum;
emitOp1->gtUseNum = useNum1;
emitOp2->gtUseNum = useNum2;
emitOp3->gtUseNum = useNum3;
#endif

genConsumeRegs(emitOp1);
genConsumeRegs(emitOp2);
genConsumeRegs(emitOp3);

assert(ins != INS_invalid);
genHWIntrinsic_R_R_R_RM(ins, attr, targetReg, emitOp1->GetRegNum(), emitOp2->GetRegNum(), emitOp3, instOptions);
genProduceReg(node);
Expand Down
83 changes: 13 additions & 70 deletions src/coreclr/jit/lsraxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2467,88 +2467,31 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
}
unsigned resultOpNum = intrinsicTree->GetResultOpNumForRmwIntrinsic(user, op1, op2, op3);

unsigned containedOpNum = 0;

// containedOpNum remains 0 when no operand is contained or regOptional
if (op1->isContained() || op1->IsRegOptional())
{
containedOpNum = 1;
}
else if (op2->isContained() || op2->IsRegOptional())
{
containedOpNum = 2;
}
else if (op3->isContained() || op3->IsRegOptional())
// Intrinsics with CopyUpperBits semantics must have op1 as target
if (copiesUpperBits)
{
containedOpNum = 3;
resultOpNum = 1;
}

GenTree* emitOp1 = op1;
GenTree* emitOp2 = op2;
GenTree* emitOp3 = op3;

// Intrinsics with CopyUpperBits semantics must have op1 as target
assert(containedOpNum != 1 || !copiesUpperBits);

// We need to keep this in sync with hwintrinsiccodegenxarch.cpp
// Ideally we'd actually swap the operands here and simplify codegen
// but its a bit more complicated to do so for many operands as well
// as being complicated to tell codegen how to pick the right instruction
GenTree* ops[] = {op1, op2, op3};
GenTree* resultOp = ops[resultOpNum - 1];

if (containedOpNum == 1)
{
// https://github.com/dotnet/runtime/issues/62215
// resultOpNum might change between lowering and lsra, comment out assertion for now.
// assert(containedOpNum != resultOpNum);
// resultOpNum is 3 or 0: op3/? = ([op1] * op2) + op3
std::swap(emitOp1, emitOp3);

if (resultOpNum == 2)
{
// op2 = ([op1] * op2) + op3
std::swap(emitOp1, emitOp2);
}
}
else if (containedOpNum == 3)
for (GenTree* op : ops)
{
// assert(containedOpNum != resultOpNum);
if (resultOpNum == 2 && !copiesUpperBits)
if (op == resultOp)
{
// op2 = (op1 * op2) + [op3]
std::swap(emitOp1, emitOp2);
tgtPrefUse = BuildUse(op);
srcCount++;
}
// else: op1/? = (op1 * op2) + [op3]
}
else if (containedOpNum == 2)
{
// assert(containedOpNum != resultOpNum);

// op1/? = (op1 * [op2]) + op3
std::swap(emitOp2, emitOp3);
if (resultOpNum == 3 && !copiesUpperBits)
else if (op->isContained() || op->IsRegOptional())
{
// op3 = (op1 * [op2]) + op3
std::swap(emitOp1, emitOp2);
srcCount += op->isContained() ? BuildOperandUses(op) : BuildDelayFreeUses(op, resultOp);
}
}
else
{
// containedOpNum == 0
// no extra work when resultOpNum is 0 or 1
if (resultOpNum == 2)
{
std::swap(emitOp1, emitOp2);
}
else if (resultOpNum == 3)
else
{
std::swap(emitOp1, emitOp3);
srcCount += BuildDelayFreeUses(op, resultOp);
}
}
tgtPrefUse = BuildUse(emitOp1);

srcCount += 1;
srcCount += BuildDelayFreeUses(emitOp2, emitOp1);
srcCount += emitOp3->isContained() ? BuildOperandUses(emitOp3) : BuildDelayFreeUses(emitOp3, emitOp1);

if (intrinsicTree->OperIsEmbRoundingEnabled() && !intrinsicTree->Op(4)->IsCnsIntOrI())
{
Expand Down

0 comments on commit b0b1dcd

Please sign in to comment.