From 2453f16807b85b279efc26d17d6f20de87801c09 Mon Sep 17 00:00:00 2001 From: SingleAccretion <62474226+SingleAccretion@users.noreply.github.com> Date: Wed, 30 Mar 2022 07:48:39 +0300 Subject: [PATCH] Do not generate `TYP_STRUCT` `LCL_FLD` nodes (#66251) * Do not create TYP_STRUCT LCL_FLD * Fix regressions This is also a correctness fix. Missing adding zero-offset field sequences is fatal. * More fixes for regressions Types of location nodes do not matter, the underlying locations do. * Add a comment --- src/coreclr/jit/gentree.cpp | 29 +++++++++++------ src/coreclr/jit/gentree.h | 2 ++ src/coreclr/jit/morph.cpp | 63 ++++++++++++++++++++++++++++--------- 3 files changed, 70 insertions(+), 24 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 7c4c5fe1a9d3f..0b3d94c4f6d3b 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -7042,16 +7042,9 @@ void Compiler::gtBlockOpInit(GenTree* result, GenTree* dst, GenTree* srcOrFillVa GenTree* Compiler::gtNewBlkOpNode(GenTree* dst, GenTree* srcOrFillVal, bool isVolatile, bool isCopyBlock) { assert(dst->OperIsBlk() || dst->OperIsLocal()); - if (isCopyBlock) - { - if (srcOrFillVal->OperIsIndir() && (srcOrFillVal->gtGetOp1()->gtOper == GT_ADDR)) - { - srcOrFillVal = srcOrFillVal->gtGetOp1()->gtGetOp1(); - } - } - else + + if (!isCopyBlock) // InitBlk { - // InitBlk assert(varTypeIsIntegral(srcOrFillVal)); if (varTypeIsStruct(dst)) { @@ -15757,8 +15750,15 @@ bool GenTree::IsLocalAddrExpr(Compiler* comp, { assert(!comp->compRationalIRForm); GenTree* addrArg = AsOp()->gtOp1; + if (addrArg->IsLocal()) // Note that this covers "GT_LCL_FLD." { + FieldSeqNode* zeroOffsetFieldSeq = nullptr; + if (comp->GetZeroOffsetFieldMap()->Lookup(this, &zeroOffsetFieldSeq)) + { + *pFldSeq = comp->GetFieldSeqStore()->Append(zeroOffsetFieldSeq, *pFldSeq); + } + *pLclVarTree = addrArg->AsLclVarCommon(); if (addrArg->OperGet() == GT_LCL_FLD) { @@ -22340,6 +22340,17 @@ uint16_t GenTreeLclVarCommon::GetLclOffs() const } } +//------------------------------------------------------------------------ +// GetFieldSeq: Get the field sequence for this local node. +// +// Return Value: +// The sequence of the node for local fields, empty ("nullptr") otherwise. +// +FieldSeqNode* GenTreeLclVarCommon::GetFieldSeq() const +{ + return OperIsLocalField() ? AsLclFld()->GetFieldSeq() : nullptr; +} + #if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS) //------------------------------------------------------------------------ // GetResultOpNumForFMA: check if the result is written into one of the operands. diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 15d648d467cff..f6a97dbaa9516 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -3378,6 +3378,8 @@ struct GenTreeLclVarCommon : public GenTreeUnOp return (GetSsaNum() != SsaConfig::RESERVED_SSA_NUM); } + FieldSeqNode* GetFieldSeq() const; + #if DEBUGGABLE_GENTREE GenTreeLclVarCommon() : GenTreeUnOp() { diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 8121191b95977..94e93fd622755 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -12520,7 +12520,8 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) ival1 = 0; // Don't remove a volatile GT_IND, even if the address points to a local variable. - if ((tree->gtFlags & GTF_IND_VOLATILE) == 0) + // For TYP_STRUCT INDs, we do not know their size, and so will not morph as well. + if (!tree->AsIndir()->IsVolatile() && !tree->TypeIs(TYP_STRUCT)) { /* Try to Fold *(&X) into X */ if (op1->gtOper == GT_ADDR) @@ -12533,13 +12534,9 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) temp = op1->AsOp()->gtOp1; // X - // In the test below, if they're both TYP_STRUCT, this of course does *not* mean that - // they are the *same* struct type. In fact, they almost certainly aren't. If the - // address has an associated field sequence, that identifies this case; go through - // the "lcl_fld" path rather than this one. - FieldSeqNode* addrFieldSeq = nullptr; // This is an unused out parameter below. - if (typ == temp->TypeGet() && !GetZeroOffsetFieldMap()->Lookup(op1, &addrFieldSeq)) + if (typ == temp->TypeGet()) { + assert(typ != TYP_STRUCT); foldAndReturnTemp = true; } else if (temp->OperIsLocal()) @@ -12715,14 +12712,9 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) // out-of-bounds w.r.t. the local). if ((temp != nullptr) && !foldAndReturnTemp) { - assert(temp->OperIsLocal()); + assert(temp->OperIsLocalRead()); - const unsigned lclNum = temp->AsLclVarCommon()->GetLclNum(); - LclVarDsc* const varDsc = lvaGetDesc(lclNum); - - const var_types tempTyp = temp->TypeGet(); - const bool useExactSize = varTypeIsStruct(tempTyp) || (tempTyp == TYP_BLK) || (tempTyp == TYP_LCLBLK); - const unsigned varSize = useExactSize ? varDsc->lvExactSize : genTypeSize(temp); + unsigned lclNum = temp->AsLclVarCommon()->GetLclNum(); // Make sure we do not enregister this lclVar. lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::LocalField)); @@ -12730,7 +12722,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) // If the size of the load is greater than the size of the lclVar, we cannot fold this access into // a lclFld: the access represented by an lclFld node must begin at or after the start of the // lclVar and must not extend beyond the end of the lclVar. - if ((ival1 >= 0) && ((ival1 + genTypeSize(typ)) <= varSize)) + if ((ival1 >= 0) && ((ival1 + genTypeSize(typ)) <= lvaLclExactSize(lclNum))) { GenTreeLclFld* lclFld; @@ -13824,6 +13816,47 @@ GenTree* Compiler::fgOptimizeAddition(GenTreeOp* add) return op1; } + // Reduce local addresses: ADD(ADDR(LCL_VAR), OFFSET) => ADDR(LCL_FLD OFFSET). + // TODO-ADDR: do ADD(LCL_FLD/VAR_ADDR, OFFSET) => LCL_FLD_ADDR instead. + // + if (opts.OptimizationEnabled() && fgGlobalMorph && op1->OperIs(GT_ADDR) && op2->IsCnsIntOrI() && + op1->AsUnOp()->gtGetOp1()->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + GenTreeUnOp* addrNode = op1->AsUnOp(); + GenTreeLclVarCommon* lclNode = addrNode->gtGetOp1()->AsLclVarCommon(); + GenTreeIntCon* offsetNode = op2->AsIntCon(); + if (FitsIn(offsetNode->IconValue())) + { + unsigned offset = lclNode->GetLclOffs() + static_cast(offsetNode->IconValue()); + + // Note: the emitter does not expect out-of-bounds access for LCL_FLD_ADDR. + if (FitsIn(offset) && (offset < lvaLclExactSize(lclNode->GetLclNum()))) + { + // Compose the field sequence: [LCL, ADDR, OFFSET]. + FieldSeqNode* fieldSeq = lclNode->GetFieldSeq(); + FieldSeqNode* zeroOffsetFieldSeq = nullptr; + if (GetZeroOffsetFieldMap()->Lookup(addrNode, &zeroOffsetFieldSeq)) + { + fieldSeq = GetFieldSeqStore()->Append(fieldSeq, zeroOffsetFieldSeq); + GetZeroOffsetFieldMap()->Remove(addrNode); + } + fieldSeq = GetFieldSeqStore()->Append(fieldSeq, offsetNode->gtFieldSeq); + + // Types of location nodes under ADDRs do not matter. We arbitrarily choose TYP_UBYTE. + lclNode->ChangeType(TYP_UBYTE); + lclNode->SetOper(GT_LCL_FLD); + lclNode->AsLclFld()->SetLclOffs(offset); + lclNode->AsLclFld()->SetFieldSeq(fieldSeq); + lvaSetVarDoNotEnregister(lclNode->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); + + DEBUG_DESTROY_NODE(offsetNode); + DEBUG_DESTROY_NODE(add); + + return addrNode; + } + } + } + // Note that these transformations are legal for floating-point ADDs as well. if (opts.OptimizationEnabled()) {