From d06d9c92ca6601eb4170015d3fb2182ecac54388 Mon Sep 17 00:00:00 2001 From: SingleAccretion <62474226+SingleAccretion@users.noreply.github.com> Date: Fri, 28 Jan 2022 22:48:30 +0300 Subject: [PATCH] Commutative morph optimizations (#64122) * Optimize order of morphing "fgMorphCommutative" exposes information to the later transforms that make them more precise. For example, "MUL(MUL(X, CONST1), CONST2)" is now morphed properly into a single "mul" instead of one "mul" plus a shift and lea in case "CONST2" was a viable candidate for "mulshift". Another example is how "fgMorphCommutative" can end up with an "ADD(X, 0)" that was only being discarded in lowering. * Fold (x + 0) => 0 for LONGs on 32 bit * Enable one opt outside global morph No reason not to. Just a few diffs. * Disable mulshift on ARM/64 No LEAs - no point. --- src/coreclr/jit/compiler.h | 6 ---- src/coreclr/jit/compiler.hpp | 16 --------- src/coreclr/jit/morph.cpp | 66 +++++++++++++++++++++--------------- 3 files changed, 39 insertions(+), 49 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 3f423e8e38f1f..967d27eac4703 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7122,12 +7122,6 @@ class Compiler bool optNarrowTree(GenTree* tree, var_types srct, var_types dstt, ValueNumPair vnpNarrow, bool doit); - /************************************************************************** - * Optimization conditions - *************************************************************************/ - - bool optAvoidIntMult(void); - protected: // The following is the upper limit on how many expressions we'll keep track // of for the CSE analysis. diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index fa6863bef0d2d..f03ecd43f7702 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -3578,22 +3578,6 @@ inline bool Compiler::LoopDsc::lpArrLenLimit(Compiler* comp, ArrIndex* index) co return false; } -/* -XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -XX XX -XX Optimization activation rules XX -XX XX -XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -*/ - -// should we try to replace integer multiplication with lea/add/shift sequences? -inline bool Compiler::optAvoidIntMult(void) -{ - return (compCodeOpt() != SMALL_CODE); -} - /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index e7a3db4e80afd..f07fe3ca9ed02 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -13435,28 +13435,6 @@ GenTree* Compiler::fgOptimizeCommutativeArithmetic(GenTreeOp* tree) std::swap(tree->gtOp1, tree->gtOp2); } - if (!optValnumCSE_phase) - { - GenTree* optimizedTree = nullptr; - if (tree->OperIs(GT_ADD)) - { - optimizedTree = fgOptimizeAddition(tree); - } - else if (tree->OperIs(GT_MUL)) - { - optimizedTree = fgOptimizeMultiply(tree); - } - else if (tree->OperIs(GT_AND)) - { - optimizedTree = fgOptimizeBitwiseAnd(tree); - } - - if (optimizedTree != nullptr) - { - return optimizedTree; - } - } - if (fgOperIsBitwiseRotationRoot(tree->OperGet())) { GenTree* rotationTree = fgRecognizeAndMorphBitwiseRotation(tree); @@ -13477,7 +13455,36 @@ GenTree* Compiler::fgOptimizeCommutativeArithmetic(GenTreeOp* tree) if (varTypeIsIntegralOrI(tree)) { + genTreeOps oldTreeOper = tree->OperGet(); GenTreeOp* optimizedTree = fgMorphCommutative(tree->AsOp()); + if (optimizedTree != nullptr) + { + if (!optimizedTree->OperIs(oldTreeOper)) + { + // "optimizedTree" could end up being a COMMA. + return optimizedTree; + } + + tree = optimizedTree; + } + } + + if (!optValnumCSE_phase) + { + GenTree* optimizedTree = nullptr; + if (tree->OperIs(GT_ADD)) + { + optimizedTree = fgOptimizeAddition(tree); + } + else if (tree->OperIs(GT_MUL)) + { + optimizedTree = fgOptimizeMultiply(tree); + } + else if (tree->OperIs(GT_AND)) + { + optimizedTree = fgOptimizeBitwiseAnd(tree); + } + if (optimizedTree != nullptr) { return optimizedTree; @@ -13529,8 +13536,7 @@ GenTree* Compiler::fgOptimizeAddition(GenTreeOp* add) // Fold (x + 0) - given it won't change the tree type to TYP_REF. // TODO-Bug: this code will lose the GC-ness of a tree like "native int + byref(0)". - if (op2->IsCnsIntOrI() && (op2->AsIntCon()->IconValue() == 0) && - ((add->TypeGet() == op1->TypeGet()) || !op1->TypeIs(TYP_REF))) + if (op2->IsIntegralConst(0) && ((add->TypeGet() == op1->TypeGet()) || !op1->TypeIs(TYP_REF))) { if (op2->IsCnsIntOrI() && (op2->AsIntCon()->gtFieldSeq != nullptr) && (op2->AsIntCon()->gtFieldSeq != FieldSeqStore::NotAField())) @@ -13544,9 +13550,8 @@ GenTree* Compiler::fgOptimizeAddition(GenTreeOp* add) return op1; } - // TODO-CQ: this transform preserves VNs and can be enabled outside global morph. // Note that these transformations are legal for floating-point ADDs as well. - if (opts.OptimizationEnabled() && fgGlobalMorph) + if (opts.OptimizationEnabled()) { // - a + b = > b - a // ADD((NEG(a), b) => SUB(b, a) @@ -13655,6 +13660,13 @@ GenTree* Compiler::fgOptimizeMultiply(GenTreeOp* mul) return mul; } +#ifdef TARGET_XARCH + // Should we try to replace integer multiplication with lea/add/shift sequences? + bool mulShiftOpt = compCodeOpt() != SMALL_CODE; +#else // !TARGET_XARCH + bool mulShiftOpt = false; +#endif // !TARGET_XARCH + size_t abs_mult = (mult >= 0) ? mult : -mult; size_t lowestBit = genFindLowestBit(abs_mult); bool changeToShift = false; @@ -13697,7 +13709,7 @@ GenTree* Compiler::fgOptimizeMultiply(GenTreeOp* mul) op2->AsIntConCommon()->SetIconValue(genLog2(abs_mult)); changeToShift = true; } - else if ((lowestBit > 1) && jitIsScaleIndexMul(lowestBit) && optAvoidIntMult()) + else if (mulShiftOpt && (lowestBit > 1) && jitIsScaleIndexMul(lowestBit)) { int shift = genLog2(lowestBit); ssize_t factor = abs_mult >> shift;