From d06d9c92ca6601eb4170015d3fb2182ecac54388 Mon Sep 17 00:00:00 2001
From: SingleAccretion <62474226+SingleAccretion@users.noreply.github.com>
Date: Fri, 28 Jan 2022 22:48:30 +0300
Subject: [PATCH] Commutative morph optimizations (#64122)

* Optimize order of morphing

"fgMorphCommutative" exposes information to the later
transforms that make them more precise.

For example, "MUL(MUL(X, CONST1), CONST2)" is now morphed
properly into a single "mul" instead of one "mul" plus a shift
and lea in case "CONST2" was a viable candidate for "mulshift".

Another example is how "fgMorphCommutative" can end up with an
"ADD(X, 0)" that was only being discarded in lowering.

* Fold (x + 0) => 0 for LONGs on 32 bit

* Enable one opt outside global morph

No reason not to. Just a few diffs.

* Disable mulshift on ARM/64

No LEAs - no point.
---
 src/coreclr/jit/compiler.h   |  6 ----
 src/coreclr/jit/compiler.hpp | 16 ---------
 src/coreclr/jit/morph.cpp    | 66 +++++++++++++++++++++---------------
 3 files changed, 39 insertions(+), 49 deletions(-)

diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index 3f423e8e38f1f..967d27eac4703 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -7122,12 +7122,6 @@ class Compiler
 
     bool optNarrowTree(GenTree* tree, var_types srct, var_types dstt, ValueNumPair vnpNarrow, bool doit);
 
-    /**************************************************************************
-     *                       Optimization conditions
-     *************************************************************************/
-
-    bool optAvoidIntMult(void);
-
 protected:
     //  The following is the upper limit on how many expressions we'll keep track
     //  of for the CSE analysis.
diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp
index fa6863bef0d2d..f03ecd43f7702 100644
--- a/src/coreclr/jit/compiler.hpp
+++ b/src/coreclr/jit/compiler.hpp
@@ -3578,22 +3578,6 @@ inline bool Compiler::LoopDsc::lpArrLenLimit(Compiler* comp, ArrIndex* index) co
     return false;
 }
 
-/*
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-XX                                                                           XX
-XX                Optimization activation rules                              XX
-XX                                                                           XX
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-*/
-
-// should we try to replace integer multiplication with lea/add/shift sequences?
-inline bool Compiler::optAvoidIntMult(void)
-{
-    return (compCodeOpt() != SMALL_CODE);
-}
-
 /*
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index e7a3db4e80afd..f07fe3ca9ed02 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -13435,28 +13435,6 @@ GenTree* Compiler::fgOptimizeCommutativeArithmetic(GenTreeOp* tree)
         std::swap(tree->gtOp1, tree->gtOp2);
     }
 
-    if (!optValnumCSE_phase)
-    {
-        GenTree* optimizedTree = nullptr;
-        if (tree->OperIs(GT_ADD))
-        {
-            optimizedTree = fgOptimizeAddition(tree);
-        }
-        else if (tree->OperIs(GT_MUL))
-        {
-            optimizedTree = fgOptimizeMultiply(tree);
-        }
-        else if (tree->OperIs(GT_AND))
-        {
-            optimizedTree = fgOptimizeBitwiseAnd(tree);
-        }
-
-        if (optimizedTree != nullptr)
-        {
-            return optimizedTree;
-        }
-    }
-
     if (fgOperIsBitwiseRotationRoot(tree->OperGet()))
     {
         GenTree* rotationTree = fgRecognizeAndMorphBitwiseRotation(tree);
@@ -13477,7 +13455,36 @@ GenTree* Compiler::fgOptimizeCommutativeArithmetic(GenTreeOp* tree)
 
     if (varTypeIsIntegralOrI(tree))
     {
+        genTreeOps oldTreeOper   = tree->OperGet();
         GenTreeOp* optimizedTree = fgMorphCommutative(tree->AsOp());
+        if (optimizedTree != nullptr)
+        {
+            if (!optimizedTree->OperIs(oldTreeOper))
+            {
+                // "optimizedTree" could end up being a COMMA.
+                return optimizedTree;
+            }
+
+            tree = optimizedTree;
+        }
+    }
+
+    if (!optValnumCSE_phase)
+    {
+        GenTree* optimizedTree = nullptr;
+        if (tree->OperIs(GT_ADD))
+        {
+            optimizedTree = fgOptimizeAddition(tree);
+        }
+        else if (tree->OperIs(GT_MUL))
+        {
+            optimizedTree = fgOptimizeMultiply(tree);
+        }
+        else if (tree->OperIs(GT_AND))
+        {
+            optimizedTree = fgOptimizeBitwiseAnd(tree);
+        }
+
         if (optimizedTree != nullptr)
         {
             return optimizedTree;
@@ -13529,8 +13536,7 @@ GenTree* Compiler::fgOptimizeAddition(GenTreeOp* add)
 
     // Fold (x + 0) - given it won't change the tree type to TYP_REF.
     // TODO-Bug: this code will lose the GC-ness of a tree like "native int + byref(0)".
-    if (op2->IsCnsIntOrI() && (op2->AsIntCon()->IconValue() == 0) &&
-        ((add->TypeGet() == op1->TypeGet()) || !op1->TypeIs(TYP_REF)))
+    if (op2->IsIntegralConst(0) && ((add->TypeGet() == op1->TypeGet()) || !op1->TypeIs(TYP_REF)))
     {
         if (op2->IsCnsIntOrI() && (op2->AsIntCon()->gtFieldSeq != nullptr) &&
             (op2->AsIntCon()->gtFieldSeq != FieldSeqStore::NotAField()))
@@ -13544,9 +13550,8 @@ GenTree* Compiler::fgOptimizeAddition(GenTreeOp* add)
         return op1;
     }
 
-    // TODO-CQ: this transform preserves VNs and can be enabled outside global morph.
     // Note that these transformations are legal for floating-point ADDs as well.
-    if (opts.OptimizationEnabled() && fgGlobalMorph)
+    if (opts.OptimizationEnabled())
     {
         // - a + b = > b - a
         // ADD((NEG(a), b) => SUB(b, a)
@@ -13655,6 +13660,13 @@ GenTree* Compiler::fgOptimizeMultiply(GenTreeOp* mul)
             return mul;
         }
 
+#ifdef TARGET_XARCH
+        // Should we try to replace integer multiplication with lea/add/shift sequences?
+        bool mulShiftOpt = compCodeOpt() != SMALL_CODE;
+#else  // !TARGET_XARCH
+        bool mulShiftOpt = false;
+#endif // !TARGET_XARCH
+
         size_t abs_mult      = (mult >= 0) ? mult : -mult;
         size_t lowestBit     = genFindLowestBit(abs_mult);
         bool   changeToShift = false;
@@ -13697,7 +13709,7 @@ GenTree* Compiler::fgOptimizeMultiply(GenTreeOp* mul)
             op2->AsIntConCommon()->SetIconValue(genLog2(abs_mult));
             changeToShift = true;
         }
-        else if ((lowestBit > 1) && jitIsScaleIndexMul(lowestBit) && optAvoidIntMult())
+        else if (mulShiftOpt && (lowestBit > 1) && jitIsScaleIndexMul(lowestBit))
         {
             int     shift  = genLog2(lowestBit);
             ssize_t factor = abs_mult >> shift;