diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index cbe7fb95046a3d..854cda9b9ca90b 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2492,6 +2492,7 @@ class Compiler friend class CSE_HeuristicRandom; friend class CSE_HeuristicReplay; friend class CSE_HeuristicRL; + friend class CSE_HeuristicParameterized; friend class CSE_Heuristic; friend class CodeGenInterface; friend class CodeGen; diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index abc510d967a80d..d84c2f79b7dda0 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -377,6 +377,14 @@ CONFIG_INTEGER(JitConstCSE, W("JitConstCSE"), 0) #define CONST_CSE_ENABLE_ALL 3 #define CONST_CSE_ENABLE_ALL_NO_SHARING 4 +// If nonzero, use the greedy RL policy. +// +CONFIG_INTEGER(JitRLCSEGreedy, W("JitRLCSEGreedy"), 0) + +// If nonzero, dump out details of parameterized policy evaluation and +// gradient updates +CONFIG_INTEGER(JitRLCSEVerbose, W("JitRLCSEVerbose"), 0) + #if defined(DEBUG) // Allow fine-grained controls of CSEs done in a particular method // @@ -415,7 +423,7 @@ CONFIG_STRING(JitReplayCSE, W("JitReplayCSE")) CONFIG_STRING(JitReplayCSEReward, W("JitReplayCSEReward")) // When set, specifies the initial parameter string for -// a reinforcement-learning based CSE heuristic. +// the reinforcement-learning based CSE heuristic. // // Note you can also set JitReplayCSE and JitReplayCSEPerfScore // along with this, in which case we are asking for a policy @@ -426,16 +434,9 @@ CONFIG_STRING(JitRLCSE, W("JitRLCSE")) // use in learning. CONFIG_STRING(JitRLCSEAlpha, W("JitRLCSEAlpha")) -// If nonzero, dump out details of policy evaluation and -// gradient updates -CONFIG_INTEGER(JitRLCSEVerbose, W("JitRLCSEVerbose"), 0) - // If nonzero, dump candidate feature values CONFIG_INTEGER(JitRLCSECandidateFeatures, W("JitRLCSECandidateFeatures"), 0) -// If nonzero, use the greedy policy with current parameters. -CONFIG_INTEGER(JitRLCSEGreedy, W("JitRLCSEGreedy"), 0) - #endif /// diff --git a/src/coreclr/jit/optcse.cpp b/src/coreclr/jit/optcse.cpp index 8c888db5de7c6d..cf2f9486e98697 100644 --- a/src/coreclr/jit/optcse.cpp +++ b/src/coreclr/jit/optcse.cpp @@ -18,6 +18,22 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "optcse.h" +#ifdef DEBUG +#define RLDUMP(...) \ + { \ + if (m_verbose) \ + logf(__VA_ARGS__); \ + } +#define RLDUMPEXEC(x) \ + { \ + if (m_verbose) \ + x; \ + } +#else +#define RLDUMP(...) +#define RLDUMPEXEC(x) +#endif + /* static */ const size_t Compiler::s_optCSEhashSizeInitial = EXPSET_SZ * 2; const size_t Compiler::s_optCSEhashGrowthFactor = 2; @@ -2093,7 +2109,6 @@ void CSE_HeuristicCommon::DumpMetrics() CSE_HeuristicRandom::CSE_HeuristicRandom(Compiler* pCompiler) : CSE_HeuristicCommon(pCompiler) { m_cseRNG.Init(m_pCompiler->info.compMethodHash() ^ JitConfig.JitRandomCSE()); - Announce(); } //------------------------------------------------------------------------ @@ -2212,7 +2227,6 @@ void CSE_HeuristicRandom::ConsiderCandidates() // CSE_HeuristicReplay::CSE_HeuristicReplay(Compiler* pCompiler) : CSE_HeuristicCommon(pCompiler) { - Announce(); } //------------------------------------------------------------------------ @@ -2288,108 +2302,33 @@ void CSE_HeuristicReplay::ConsiderCandidates() } } +#endif // DEBUG + +// From PolicyGradient +// Greedy/Base: 35483 methods, 8669 better, 23752 same, 3061 worse, 1.0041 geomean + +double CSE_HeuristicParameterized::s_defaultParameters[CSE_HeuristicParameterized::numParameters] = + {0.2425, 0.2479, 0.1089, -0.2363, 0.2472, -0.0559, -0.8418, -0.0585, -0.2773, 0.0000, 0.0213, -0.4116, 0.0000, + -0.0922, 0.2593, -0.0315, -0.0745, 0.2607, 0.3475, -0.0590, -0.3177, -0.6883, -0.4998, -0.3220, -0.2268}; + //------------------------------------------------------------------------ -// CSE_HeuristicRL: construct RL CSE heuristic +// CSE_HeuristicParameterized: CSE heuristic using parameterized, linear profitability model // // Arguments; // pCompiler - compiler instance // -// Notes: -// This creates the RL CSE heuristic. It does CSEs based on a stochastic -// softmax policy, governed by a parameter vector. -// -// JitRLCSE specified the initial parameter values. -// JitRandomCSE can be used to supply salt for the RNG. -// JitReplayCSE can be used to supply a sequence to follow. -// JitReplayCSEReward can be used to supply the perf score for the sequence. -// -CSE_HeuristicRL::CSE_HeuristicRL(Compiler* pCompiler) - : CSE_HeuristicCommon(pCompiler), m_alpha(0.0), m_updateParameters(false), m_greedy(false), m_verbose(false) +CSE_HeuristicParameterized::CSE_HeuristicParameterized(Compiler* pCompiler) : CSE_HeuristicCommon(pCompiler) { - // Set up the random state - // - m_cseRNG.Init(m_pCompiler->info.compMethodHash() ^ JitConfig.JitRandomCSE()); - - // Parameters + // Default parameter values... // - ConfigDoubleArray initialParameters; - initialParameters.EnsureInit(JitConfig.JitRLCSE()); - const unsigned initialParamLength = initialParameters.GetLength(); - - for (unsigned i = 0; (i < initialParamLength) && (i < numParameters); i++) - { - m_parameters[i] = initialParameters.GetData()[i]; - } - - if (numParameters > initialParamLength) - { - JITDUMP("Too few parameters (expected %d), trailing will be zero\n", numParameters); - for (unsigned i = initialParamLength; i < numParameters; i++) - { - m_parameters[i] = 0; - } - } - else if (numParameters < initialParamLength) + for (unsigned i = 0; i < numParameters; i++) { - JITDUMP("Too many parameters (expected %d), trailing will be ignored\n", numParameters); + m_parameters[i] = s_defaultParameters[i]; } - // Policy sub-behavior: explore / update / greedy - // - // We may be given a prior sequence and perf score to use to - // update the parameters .... if so, we will replay same sequence of CSEs - // (like the replay policy) and update the parameters via the policy - // gradient algorithm. - // - // For updates: - // - // m_alpha controls the "step size" or learning rate; when we want to adjust - // the parameters we only partially move them towards the gradient indicated values. - // - // m_rewards describes the reward associated with each step. - // - // This "two-pass" technique (first run the current policy and, obtain the perf score - // and CSE sequence, then rerun with the same sequence and update the policy - // parameters) ensures all the policy model logic is within the - // JIT, so the preference computation and its gradient can be kept in sync. + // These get set during... // - if ((JitConfig.JitReplayCSE() != nullptr) && (JitConfig.JitReplayCSEReward() != nullptr)) - { - m_updateParameters = true; - - // Reward - // - ConfigDoubleArray rewards; - rewards.EnsureInit(JitConfig.JitReplayCSEReward()); - const unsigned rewardsLength = rewards.GetLength(); - - for (unsigned i = 0; (i < rewardsLength) && (i < maxSteps); i++) - { - m_rewards[i] = rewards.GetData()[i]; - } - - for (unsigned i = rewardsLength; i < maxSteps; i++) - { - m_rewards[i] = 0; - } - - // Alpha - // - if (JitConfig.JitRLCSEAlpha() != nullptr) - { - ConfigDoubleArray JitRLCSEAlphaArray; - JitRLCSEAlphaArray.EnsureInit(JitConfig.JitRLCSEAlpha()); - m_alpha = JitRLCSEAlphaArray.GetData()[0]; - } - else - { - m_alpha = 0.001; - } - } - else if (JitConfig.JitRLCSEGreedy() > 0) - { - m_greedy = true; - } + m_localWeights = nullptr; // Stopping "parameter" // @@ -2397,121 +2336,29 @@ CSE_HeuristicRL::CSE_HeuristicRL(Compiler* pCompiler) // Verbose // - if (m_pCompiler->verbose || (JitConfig.JitRLCSEVerbose() > 0)) - { - m_verbose = true; - } + m_verbose = (JitConfig.JitRLCSEVerbose() > 0); #ifdef DEBUG + m_verbose |= m_pCompiler->verbose; CompAllocator allocator = m_pCompiler->getAllocator(CMK_CSE); m_likelihoods = new (allocator) jitstd::vector(allocator); - m_baseLikelihoods = new (allocator) jitstd::vector(allocator); - m_features = new (allocator) jitstd::vector(allocator); #endif - Announce(); -} - -//------------------------------------------------------------------------ -// Name: name this jit heuristic -// -// Returns: -// descriptive name string -// -const char* CSE_HeuristicRL::Name() const -{ - if (m_updateParameters) - { - return "RL Policy Gradient Update"; - } - else if (m_greedy) - { - return "RL Policy Gradient Greedy"; - } - else - { - return "RL Policy Gradient Stochastic"; - } -} - -//------------------------------------------------------------------------ -// Announce: describe heuristic in jit dump -// -void CSE_HeuristicRL::Announce() -{ - JITDUMP("%s salt %d parameters ", Name(), JitConfig.JitRandomCSE()); - for (int i = 0; i < numParameters; i++) - { - JITDUMP("%s%f", (i == 0) ? "" : ",", m_parameters[i]); - } - JITDUMP("\n"); - - if (m_updateParameters) - { - JITDUMP("Operating in update mode with sequence %ls, rewards %ls, and alpha %f\n", JitConfig.JitReplayCSE(), - JitConfig.JitReplayCSEReward(), m_alpha); - } } //------------------------------------------------------------------------ -// DumpMetrics: dump post-CSE metrics +// ConsiderCandidates: examine candidates and perform CSEs. // -void CSE_HeuristicRL::DumpMetrics() +void CSE_HeuristicParameterized::ConsiderCandidates() { - CSE_HeuristicCommon::DumpMetrics(); - - if (m_updateParameters) - { - // For update, dump the new parameter values - // - printf(" updatedparams "); - for (int i = 0; i < numParameters; i++) - { - printf("%s%f", (i == 0) ? "" : ",", m_parameters[i]); - } - - if (JitConfig.JitRLCSECandidateFeatures() > 0) - { - bool first = true; - printf(", features "); - for (char* f : *m_features) - { - printf("%s%s", first ? "" : ",", f); - first = false; - } - } - } - else if (m_greedy) - { - // Show the parameters used. - // - printf(" params "); - for (int i = 0; i < numParameters; i++) - { - printf("%s%f", (i == 0) ? "" : ",", m_parameters[i]); - } - } - else - { - // For evaluation, dump likelihood of the choices made - // - printf(" likelihoods "); - bool first = true; - for (double d : *m_likelihoods) - { - printf("%s%.3f", first ? "" : ",", d); - first = false; - } + const int numCandidates = m_pCompiler->optCSECandidateCount; + sortTab = new (m_pCompiler, CMK_CSE) CSEdsc*[numCandidates]; + sortSiz = numCandidates * sizeof(*sortTab); + memcpy(sortTab, m_pCompiler->optCSEtab, sortSiz); - // For evaluation, dump initial likelihood each choice - // - printf(" baseLikelihoods "); - first = true; - for (double d : *m_baseLikelihoods) - { - printf("%s%.3f", first ? "" : ",", d); - first = false; - } - } + // Capture distribution of enregisterable local var weights. + // + CaptureLocalWeights(); + GreedyPolicy(); } //------------------------------------------------------------------------ @@ -2524,7 +2371,7 @@ void CSE_HeuristicRL::DumpMetrics() // Returns: // true if this tree can be a CSE candidate // -bool CSE_HeuristicRL::ConsiderTree(GenTree* tree, bool isReturn) +bool CSE_HeuristicParameterized::ConsiderTree(GenTree* tree, bool isReturn) { return CanConsiderTree(tree, isReturn); } @@ -2537,7 +2384,7 @@ bool CSE_HeuristicRL::ConsiderTree(GenTree* tree, bool isReturn) // Used to estimate where the temp introduced by a CSE would rank compared // to other locals in the method, as they compete for registers. // -void CSE_HeuristicRL::CaptureLocalWeights() +void CSE_HeuristicParameterized::CaptureLocalWeights() { JITDUMP("Local weight table...\n"); CompAllocator allocator = m_pCompiler->getAllocator(CMK_SSA); @@ -2573,35 +2420,6 @@ void CSE_HeuristicRL::CaptureLocalWeights() } } -//------------------------------------------------------------------------ -// ConsiderCandidates: examine candidates and perform CSEs. -// -void CSE_HeuristicRL::ConsiderCandidates() -{ - const int numCandidates = m_pCompiler->optCSECandidateCount; - sortTab = new (m_pCompiler, CMK_CSE) CSEdsc*[numCandidates]; - sortSiz = numCandidates * sizeof(*sortTab); - memcpy(sortTab, m_pCompiler->optCSEtab, sortSiz); - - // Capture distribution of enregisterable local var weights. - // - CaptureLocalWeights(); - - if (m_updateParameters) - { - UpdateParameters(); - return; - } - - if (m_greedy) - { - GreedyPolicy(); - return; - } - - SoftmaxPolicy(); -} - //------------------------------------------------------------------------ // GreedyPolicy: use a greedy policy // @@ -2609,12 +2427,9 @@ void CSE_HeuristicRL::ConsiderCandidates() // This always performs the most-preferred choice, using lower candidate number // as a tie-breaker. // -void CSE_HeuristicRL::GreedyPolicy() +void CSE_HeuristicParameterized::GreedyPolicy() { - if (m_verbose) - { - printf("RL using greedy policy\n"); - } + RLDUMP("RL using greedy policy\n"); // Number of choices is num candidates + 1, since // early stopping is also a choice. @@ -2627,11 +2442,13 @@ void CSE_HeuristicRL::GreedyPolicy() Choice& choice = ChooseGreedy(choices); CSEdsc* const dsc = choice.m_dsc; +#ifdef DEBUG if (dsc == nullptr) { m_likelihoods->push_back(choice.m_softmax); break; } +#endif // purge this CSE from sortTab so we won't choose it again // @@ -2655,100 +2472,24 @@ void CSE_HeuristicRL::GreedyPolicy() PerformCSE(&candidate); madeChanges = true; + +#ifdef DEBUG m_likelihoods->push_back(choice.m_softmax); +#endif } return; } //------------------------------------------------------------------------ -// SoftmaxPolicy: use a randomized softmax policy +// GetFeatures: extract features for this CSE +// +// Arguments: +// cse - cse descriptor +// features - array to fill in with feature values // // Notes: -// This converts preferences to likelihoods using softmax, and then -// randomly selects a candidate proportional to its likelihood. -// -void CSE_HeuristicRL::SoftmaxPolicy() -{ - if (m_verbose) - { - printf("RL using softmax policy\n"); - } - - // Number of choices is num candidates + 1, since - // early stopping is also a choice. - // - const int numCandidates = m_pCompiler->optCSECandidateCount; - ArrayStack choices(m_pCompiler->getAllocator(CMK_CSE), numCandidates + 1); - bool first = true; - - while (true) - { - Choice& choice = ChooseSoftmax(choices); - - if (first) - { - for (int i = 0; i < choices.Height(); i++) - { - Choice& option = choices.TopRef(i); - if (option.m_dsc == nullptr) - { - m_baseLikelihoods->push_back(0); - } - else - { - m_baseLikelihoods->push_back(option.m_dsc->csdIndex); - } - m_baseLikelihoods->push_back(option.m_softmax); - } - first = false; - } - - CSEdsc* const dsc = choice.m_dsc; - - if (dsc == nullptr) - { - m_likelihoods->push_back(choice.m_softmax); - break; - } - - // purge this CSE from sortTab so we won't choose it again - // - assert(sortTab[dsc->csdIndex - 1] == dsc); - sortTab[dsc->csdIndex - 1] = nullptr; - - // ChooseCSE should only choose viable options - // - assert(dsc->IsViable()); - - CSE_Candidate candidate(this, dsc); - - if (m_verbose) - { - printf("\nRL attempting " FMT_CSE "\n", candidate.CseIndex()); - } - - JITDUMP("CSE Expression : \n"); - JITDUMPEXEC(m_pCompiler->gtDispTree(candidate.Expr())); - JITDUMP("\n"); - - PerformCSE(&candidate); - madeChanges = true; - m_likelihoods->push_back(choice.m_softmax); - } - - return; -} - -//------------------------------------------------------------------------ -// GetFeatures: extract features for this CSE -// -// Arguments: -// cse - cse descriptor -// features - array to fill in with feature values -// -// Notes: -// Current set of features: +// Current set of features: // // 0. cse costEx // 1. cse use count weighted (log) @@ -2779,7 +2520,7 @@ void CSE_HeuristicRL::SoftmaxPolicy() // // 24. log (pressure estimate weight) // -void CSE_HeuristicRL::GetFeatures(CSEdsc* cse, double* features) +void CSE_HeuristicParameterized::GetFeatures(CSEdsc* cse, double* features) { for (int i = 0; i < numParameters; i++) { @@ -2911,7 +2652,7 @@ void CSE_HeuristicRL::GetFeatures(CSEdsc* cse, double* features) // All boolean features are scaled up by booleanScale so their // numeric range is similar to the non-boolean features // -void CSE_HeuristicRL::GetStoppingFeatures(double* features) +void CSE_HeuristicParameterized::GetStoppingFeatures(double* features) { // Estimate the (log) weight at which a new CSE would cause a spill // if m_registerPressure registers were initially available. @@ -2949,137 +2690,589 @@ void CSE_HeuristicRL::GetStoppingFeatures(double* features) // todo: scan all vars, not just tracked? // - features[24] = deMinimusAdj + log(max(deMinimis, spillAtWeight)); + features[24] = deMinimusAdj + log(max(deMinimis, spillAtWeight)); +} + +//------------------------------------------------------------------------ +// Preference: determine a preference score for this CSE +// +// Arguments: +// cse - cse descriptor, or nullptr for the option to stop doing CSEs. +// +double CSE_HeuristicParameterized::Preference(CSEdsc* cse) +{ + double features[numParameters]; + GetFeatures(cse, features); + +#ifdef DEBUG + if (JitConfig.JitRLCSECandidateFeatures() > 0) + { + DumpFeatures(cse, features); + } +#endif + + double preference = 0; + for (int i = 0; i < numParameters; i++) + { + preference += features[i] * m_parameters[i]; + } + + return preference; +} + +//------------------------------------------------------------------------ +// StoppingPreference: determine a preference score for this stopping CSE +// +// Arguments: +// regAvail - number of registers threshold +// +double CSE_HeuristicParameterized::StoppingPreference() +{ + double features[numParameters]; + GetFeatures(nullptr, features); + +#ifdef DEBUG + if (JitConfig.JitRLCSECandidateFeatures() > 0) + { + DumpFeatures(nullptr, features); + } +#endif + + double preference = 0; + for (int i = 0; i < numParameters; i++) + { + preference += features[i] * m_parameters[i]; + } + + return preference; +} + +//------------------------------------------------------------------------ +// ChooseGreedy: examine candidates and choose the next CSE to perform +// via greedy policy +// +// Returns: +// Choice of CSE to perform +// +// Notes: +// Picks the most-preferred candidate. +// If there is a tie, picks stop, or the lowest cse index. +// +CSE_HeuristicParameterized::Choice& CSE_HeuristicParameterized::ChooseGreedy(ArrayStack& choices) +{ + choices.Reset(); + BuildChoices(choices); + + // Find the maximally preferred case. + // + int choiceNum = 0; + + for (int i = 1; i < choices.Height(); i++) + { + Choice& choice = choices.TopRef(i); + Choice& bestChoice = choices.TopRef(choiceNum); + + const double delta = choice.m_preference - bestChoice.m_preference; + + bool update = false; + + if (delta > 0) + { + update = true; + } + else if (delta == 0) + { + if (choice.m_dsc == nullptr) + { + update = true; + } + else if ((bestChoice.m_dsc != nullptr) && (choice.m_dsc->csdIndex < bestChoice.m_dsc->csdIndex)) + { + update = true; + } + } + + if (update) + { + choiceNum = i; + } + } + + RLDUMP("Greedy candidate evaluation\n"); + RLDUMPEXEC(DumpChoices(choices, choiceNum)); + + return choices.TopRef(choiceNum); +} + +//------------------------------------------------------------------------ +// BuildChoices: fill in the choices currently available +// +// choices - array of choices to be filled in +// +// Notes: +// Also computes the preference for each choice. +// +void CSE_HeuristicParameterized::BuildChoices(ArrayStack& choices) +{ + for (unsigned i = 0; i < m_pCompiler->optCSECandidateCount; i++) + { + CSEdsc* const dsc = sortTab[i]; + if ((dsc == nullptr) || !dsc->IsViable()) + { + // already did this cse, + // or the cse is not viable + continue; + } + + double preference = Preference(dsc); + choices.Emplace(dsc, preference); + } + + // Doing nothing is also an option. + // + const double stoppingPreference = StoppingPreference(); + choices.Emplace(nullptr, stoppingPreference); +} + +#ifdef DEBUG + +//------------------------------------------------------------------------ +// Announce: describe heuristic in jit dump +// +void CSE_HeuristicParameterized::Announce() +{ + JITDUMP("%s parameters ", Name()); + for (int i = 0; i < numParameters; i++) + { + JITDUMP("%s%f", (i == 0) ? "" : ",", m_parameters[i]); + } + JITDUMP("\n"); +} + +//------------------------------------------------------------------------ +// DumpMetrics: dump post-CSE metrics +// +void CSE_HeuristicParameterized::DumpMetrics() +{ + CSE_HeuristicCommon::DumpMetrics(); + + // Show the parameters used. + // + printf(" params "); + for (int i = 0; i < numParameters; i++) + { + printf("%s%f", (i == 0) ? "" : ",", m_parameters[i]); + } +} + +//------------------------------------------------------------------------ +// DumpFeatures: dump feature values for a CSE candidate +// +// Arguments: +// dsc - cse descriptor +// features - feature vector for that candidate +// +// Notes: +// Dumps a comma separated row of data, prefixed by method index. +// +void CSE_HeuristicParameterized::DumpFeatures(CSEdsc* dsc, double* features) +{ + printf("features,%d," FMT_CSE, m_pCompiler->info.compMethodSuperPMIIndex, dsc == nullptr ? 0 : dsc->csdIndex); + for (int i = 0; i < numParameters; i++) + { + printf(",%f", features[i]); + } + printf("\n"); +} + +//------------------------------------------------------------------------ +// DumpChoices: dump out information on current choices +// +// Arguments: +// choices - array of choices +// highlight - highlight this choice +// +void CSE_HeuristicParameterized::DumpChoices(ArrayStack& choices, int highlight) +{ + for (int i = 0; i < choices.Height(); i++) + { + Choice& choice = choices.TopRef(i); + CSEdsc* const cse = choice.m_dsc; + const char* msg = i == highlight ? "=>" : " "; + if (cse != nullptr) + { + printf("%s%2d: " FMT_CSE " preference %10.7f likelihood %10.7f\n", msg, i, cse->csdIndex, + choice.m_preference, choice.m_softmax); + } + else + { + printf("%s%2d: QUIT preference %10.7f likelihood %10.7f\n", msg, i, choice.m_preference, + choice.m_softmax); + } + } +} + +//------------------------------------------------------------------------ +// DumpChoices: dump out information on current choices +// +// Arguments: +// choices - array of choices +// highlight - highlight this choice +// +void CSE_HeuristicParameterized::DumpChoices(ArrayStack& choices, CSEdsc* highlight) +{ + for (int i = 0; i < choices.Height(); i++) + { + Choice& choice = choices.TopRef(i); + CSEdsc* const cse = choice.m_dsc; + const char* msg = cse == highlight ? "=>" : " "; + if (cse != nullptr) + { + printf("%s%2d: " FMT_CSE " preference %10.7f likelihood %10.7f\n", msg, i, cse->csdIndex, + choice.m_preference, choice.m_softmax); + } + else + { + printf("%s%2d: QUIT preference %10.7f likelihood %10.7f\n", msg, i, choice.m_preference, + choice.m_softmax); + } + } +} + +#endif // DEBUG + +#ifdef DEBUG + +//------------------------------------------------------------------------ +// CSE_HeuristicRL: construct RL CSE heuristic +// +// Arguments; +// pCompiler - compiler instance +// +// Notes: +// This creates the RL CSE heuristic, selected when JitRLCSE is set. +// It has 3 modes of operation: +// +// (1) Stochastic (default) softmax policy, governed by a parameter vector. +// * JitRLCSE specifies the initial parameter values. +// Missing values default to zero, extra values are ignored. +// * JitRandomCSE can be used to supply salt for the RNG. +// (2) Update: replay a sequence with known rewards, and compute updated +// parameters based on stochastic gradient ascent +// * JitReplayCSE specifies the sequence +// * JitReplayCSEReward the rewards per step (actor-critic style) +// (3) Greedy: +// Enable via JitRLCSEGreedy=1. +// Uses parameters from JitRLCSE to drive a deterministic greedy policy +// +CSE_HeuristicRL::CSE_HeuristicRL(Compiler* pCompiler) + : CSE_HeuristicParameterized(pCompiler), m_alpha(0.0), m_updateParameters(false), m_greedy(false) +{ + // Set up the random state + // + m_cseRNG.Init(m_pCompiler->info.compMethodHash() ^ JitConfig.JitRandomCSE()); + + // Parameters + // + ConfigDoubleArray initialParameters; + initialParameters.EnsureInit(JitConfig.JitRLCSE()); + const unsigned initialParamLength = initialParameters.GetLength(); + + for (unsigned i = 0; (i < initialParamLength) && (i < numParameters); i++) + { + m_parameters[i] = initialParameters.GetData()[i]; + } + + if (numParameters > initialParamLength) + { + JITDUMP("Too few parameters (expected %d), trailing will be zero\n", numParameters); + for (unsigned i = initialParamLength; i < numParameters; i++) + { + m_parameters[i] = 0; + } + } + else if (numParameters < initialParamLength) + { + JITDUMP("Too many parameters (expected %d), trailing will be ignored\n", numParameters); + } + + // Policy sub-behavior: explore / update / greedy + // + // We may be given a prior sequence and perf score to use to + // update the parameters .... if so, we will replay same sequence of CSEs + // (like the replay policy) and update the parameters via the policy + // gradient algorithm. + // + // For updates: + // + // m_alpha controls the "step size" or learning rate; when we want to adjust + // the parameters we only partially move them towards the gradient indicated values. + // + // m_rewards describes the reward associated with each step. + // + // This "two-pass" technique (first run the current policy and, obtain the perf score + // and CSE sequence, then rerun with the same sequence and update the policy + // parameters) ensures all the policy model logic is within the + // JIT, so the preference computation and its gradient can be kept in sync. + // + if ((JitConfig.JitReplayCSE() != nullptr) && (JitConfig.JitReplayCSEReward() != nullptr)) + { + m_updateParameters = true; + + // Reward + // + ConfigDoubleArray rewards; + rewards.EnsureInit(JitConfig.JitReplayCSEReward()); + const unsigned rewardsLength = rewards.GetLength(); + + for (unsigned i = 0; (i < rewardsLength) && (i < maxSteps); i++) + { + m_rewards[i] = rewards.GetData()[i]; + } + + for (unsigned i = rewardsLength; i < maxSteps; i++) + { + m_rewards[i] = 0; + } + + // Alpha + // + if (JitConfig.JitRLCSEAlpha() != nullptr) + { + ConfigDoubleArray JitRLCSEAlphaArray; + JitRLCSEAlphaArray.EnsureInit(JitConfig.JitRLCSEAlpha()); + m_alpha = JitRLCSEAlphaArray.GetData()[0]; + } + else + { + m_alpha = 0.001; + } + } + else if (JitConfig.JitRLCSEGreedy() > 0) + { + m_greedy = true; + } + + CompAllocator allocator = m_pCompiler->getAllocator(CMK_CSE); + m_baseLikelihoods = new (allocator) jitstd::vector(allocator); + m_features = new (allocator) jitstd::vector(allocator); +} + +//------------------------------------------------------------------------ +// Name: name this jit heuristic +// +// Returns: +// descriptive name string +// +const char* CSE_HeuristicRL::Name() const +{ + if (m_updateParameters) + { + return "RL Policy Gradient Update"; + } + else + { + return "RL Policy Gradient Stochastic"; + } +} + +//------------------------------------------------------------------------ +// Announce: describe heuristic in jit dump +// +void CSE_HeuristicRL::Announce() +{ + JITDUMP("%s salt %d parameters ", Name(), JitConfig.JitRandomCSE()); + for (int i = 0; i < numParameters; i++) + { + JITDUMP("%s%f", (i == 0) ? "" : ",", m_parameters[i]); + } + JITDUMP("\n"); + + if (m_updateParameters) + { + JITDUMP("Operating in update mode with sequence %ls, rewards %ls, and alpha %f\n", JitConfig.JitReplayCSE(), + JitConfig.JitReplayCSEReward(), m_alpha); + } +} + +//------------------------------------------------------------------------ +// DumpMetrics: dump post-CSE metrics +// +void CSE_HeuristicRL::DumpMetrics() +{ + CSE_HeuristicParameterized::DumpMetrics(); + + if (m_updateParameters) + { + // For update, dump the new parameter values + // + printf(" updatedparams "); + for (int i = 0; i < numParameters; i++) + { + printf("%s%f", (i == 0) ? "" : ",", m_parameters[i]); + } + + if (JitConfig.JitRLCSECandidateFeatures() > 0) + { + bool first = true; + printf(", features "); + for (char* f : *m_features) + { + printf("%s%s", first ? "" : ",", f); + first = false; + } + } + } + else if (m_greedy) + { + // handled by base class + } + else + { + // For evaluation, dump likelihood of the choices made + // + printf(" likelihoods "); + bool first = true; + for (double d : *m_likelihoods) + { + printf("%s%.3f", first ? "" : ",", d); + first = false; + } + + // For evaluation, dump initial likelihood each choice + // + printf(" baseLikelihoods "); + first = true; + for (double d : *m_baseLikelihoods) + { + printf("%s%.3f", first ? "" : ",", d); + first = false; + } + } } //------------------------------------------------------------------------ -// DumpFeatures: dump feature values for a CSE candidate +// ConsiderTree: check if this tree can be a CSE candidate // // Arguments: -// dsc - cse descriptor -// features - feature vector for that candidate +// tree - tree in question +// isReturn - true if tree is part of a return statement // -// Notes: -// Dumps a comma separated row of data, prefixed by method index. +// Returns: +// true if this tree can be a CSE candidate // -void CSE_HeuristicRL::DumpFeatures(CSEdsc* dsc, double* features) +bool CSE_HeuristicRL::ConsiderTree(GenTree* tree, bool isReturn) { - printf("features,%d," FMT_CSE, m_pCompiler->info.compMethodSuperPMIIndex, dsc == nullptr ? 0 : dsc->csdIndex); - for (int i = 0; i < numParameters; i++) - { - printf(",%f", features[i]); - } - printf("\n"); + return CanConsiderTree(tree, isReturn); } //------------------------------------------------------------------------ -// Preference: determine a preference score for this CSE -// -// Arguments: -// cse - cse descriptor, or nullptr for the option to stop doing CSEs. +// ConsiderCandidates: examine candidates and perform CSEs. // -double CSE_HeuristicRL::Preference(CSEdsc* cse) +void CSE_HeuristicRL::ConsiderCandidates() { - double features[numParameters]; - GetFeatures(cse, features); + const int numCandidates = m_pCompiler->optCSECandidateCount; + sortTab = new (m_pCompiler, CMK_CSE) CSEdsc*[numCandidates]; + sortSiz = numCandidates * sizeof(*sortTab); + memcpy(sortTab, m_pCompiler->optCSEtab, sortSiz); - if (JitConfig.JitRLCSECandidateFeatures() > 0) - { - DumpFeatures(cse, features); - } + // Capture distribution of enregisterable local var weights. + // + CaptureLocalWeights(); - double preference = 0; - for (int i = 0; i < numParameters; i++) + if (m_updateParameters) { - preference += features[i] * m_parameters[i]; + UpdateParameters(); + return; } - - return preference; -} - -//------------------------------------------------------------------------ -// StoppingPreference: determine a preference score for this stopping CSE -// -// Arguments: -// regAvail - number of registers threshold -// -double CSE_HeuristicRL::StoppingPreference() -{ - double features[numParameters]; - GetFeatures(nullptr, features); - - if (JitConfig.JitRLCSECandidateFeatures() > 0) + else if (m_greedy) { - DumpFeatures(nullptr, features); + GreedyPolicy(); + return; } - - double preference = 0; - for (int i = 0; i < numParameters; i++) + else { - preference += features[i] * m_parameters[i]; + SoftmaxPolicy(); } - - return preference; } //------------------------------------------------------------------------ -// ChooseGreedy: examine candidates and choose the next CSE to perform -// via greedy policy -// -// Returns: -// Choice of CSE to perform +// SoftmaxPolicy: use a randomized softmax policy // // Notes: -// Picks the most-preferred candidate. -// If there is a tie, picks stop, or the lowest cse index. +// This converts preferences to likelihoods using softmax, and then +// randomly selects a candidate proportional to its likelihood. // -CSE_HeuristicRL::Choice& CSE_HeuristicRL::ChooseGreedy(ArrayStack& choices) +void CSE_HeuristicRL::SoftmaxPolicy() { - choices.Reset(); - BuildChoices(choices); + if (m_verbose) + { + printf("RL using softmax policy\n"); + } - // Find the maximally preferred case. + // Number of choices is num candidates + 1, since + // early stopping is also a choice. // - Choice& bestChoice = choices.TopRef(0); - int choiceNum = 0; + const int numCandidates = m_pCompiler->optCSECandidateCount; + ArrayStack choices(m_pCompiler->getAllocator(CMK_CSE), numCandidates + 1); + bool first = true; - for (int i = 1; i < choices.Height(); i++) + while (true) { - Choice& choice = choices.TopRef(i); - const double delta = choice.m_preference - bestChoice.m_preference; - - bool update = false; + Choice& choice = ChooseSoftmax(choices); - if (delta > 0) - { - update = true; - } - else if (delta == 0) + if (first) { - if (choice.m_dsc == nullptr) - { - update = true; - } - else if ((bestChoice.m_dsc != nullptr) && (choice.m_dsc->csdIndex < bestChoice.m_dsc->csdIndex)) + for (int i = 0; i < choices.Height(); i++) { - update = true; + Choice& option = choices.TopRef(i); + if (option.m_dsc == nullptr) + { + m_baseLikelihoods->push_back(0); + } + else + { + m_baseLikelihoods->push_back(option.m_dsc->csdIndex); + } + m_baseLikelihoods->push_back(option.m_softmax); } + first = false; } - if (update) + CSEdsc* const dsc = choice.m_dsc; + + if (dsc == nullptr) { - bestChoice = choice; - choiceNum = i; + m_likelihoods->push_back(choice.m_softmax); + break; } - } - if (m_verbose) - { - printf("Greedy candidate evaluation\n"); - DumpChoices(choices, choiceNum); + // purge this CSE from sortTab so we won't choose it again + // + assert(sortTab[dsc->csdIndex - 1] == dsc); + sortTab[dsc->csdIndex - 1] = nullptr; + + // ChooseCSE should only choose viable options + // + assert(dsc->IsViable()); + + CSE_Candidate candidate(this, dsc); + + if (m_verbose) + { + printf("\nRL attempting " FMT_CSE "\n", candidate.CseIndex()); + } + + JITDUMP("CSE Expression : \n"); + JITDUMPEXEC(m_pCompiler->gtDispTree(candidate.Expr())); + JITDUMP("\n"); + + PerformCSE(&candidate); + madeChanges = true; + m_likelihoods->push_back(choice.m_softmax); } - return bestChoice; + return; } //------------------------------------------------------------------------ @@ -3140,36 +3333,6 @@ CSE_HeuristicRL::Choice& CSE_HeuristicRL::ChooseSoftmax(ArrayStack& choi return choices.TopRef(choiceNum); } -//------------------------------------------------------------------------ -// BuildChoices: fill in the choices currently available -// -// choices - array of choices to be filled in -// -// Notes: -// Also computes the preference for each choice. -// -void CSE_HeuristicRL::BuildChoices(ArrayStack& choices) -{ - for (unsigned i = 0; i < m_pCompiler->optCSECandidateCount; i++) - { - CSEdsc* const dsc = sortTab[i]; - if ((dsc == nullptr) || !dsc->IsViable()) - { - // already did this cse, - // or the cse is not viable - continue; - } - - double preference = Preference(dsc); - choices.Emplace(dsc, preference); - } - - // Doing nothing is also an option. - // - const double stoppingPreference = StoppingPreference(); - choices.Emplace(nullptr, stoppingPreference); -} - //------------------------------------------------------------------------ // Softmax: fill in likelihoods for each choice vis softmax // @@ -3208,60 +3371,6 @@ void CSE_HeuristicRL::Softmax(ArrayStack& choices) } } -//------------------------------------------------------------------------ -// DumpChoices: dump out information on current choices -// -// Arguments: -// choices - array of choices -// highlight - highlight this choice -// -void CSE_HeuristicRL::DumpChoices(ArrayStack& choices, int highlight) -{ - for (int i = 0; i < choices.Height(); i++) - { - Choice& choice = choices.TopRef(i); - CSEdsc* const cse = choice.m_dsc; - const char* msg = i == highlight ? "=>" : " "; - if (cse != nullptr) - { - printf("%s%2d: " FMT_CSE " preference %10.7f likelihood %10.7f\n", msg, i, cse->csdIndex, - choice.m_preference, choice.m_softmax); - } - else - { - printf("%s%2d: QUIT preference %10.7f likelihood %10.7f\n", msg, i, choice.m_preference, - choice.m_softmax); - } - } -} - -//------------------------------------------------------------------------ -// DumpChoices: dump out information on current choices -// -// Arguments: -// choices - array of choices -// highlight - highlight this choice -// -void CSE_HeuristicRL::DumpChoices(ArrayStack& choices, CSEdsc* highlight) -{ - for (int i = 0; i < choices.Height(); i++) - { - Choice& choice = choices.TopRef(i); - CSEdsc* const cse = choice.m_dsc; - const char* msg = cse == highlight ? "=>" : " "; - if (cse != nullptr) - { - printf("%s%2d: " FMT_CSE " preference %10.7f likelihood %10.7f\n", msg, i, cse->csdIndex, - choice.m_preference, choice.m_softmax); - } - else - { - printf("%s%2d: QUIT preference %10.7f likelihood %10.7f\n", msg, i, choice.m_preference, - choice.m_softmax); - } - } -} - //------------------------------------------------------------------------ // UpdateParameters: Replay an existing CSE sequence with known reward, // and update the model parameters via the policy gradient. @@ -5027,12 +5136,10 @@ CSE_HeuristicCommon* Compiler::optGetCSEheuristic() if (JitConfig.JitRandomCSE() > 0) { - JITDUMP("Using Random CSE heuristic (JitRandomCSE)\n"); useRandomHeuristic = true; } else if (compStressCompile(Compiler::STRESS_MAKE_CSE, MAX_STRESS_WEIGHT)) { - JITDUMP("Using Random CSE heuristic (stress)\n"); useRandomHeuristic = true; } @@ -5056,12 +5163,24 @@ CSE_HeuristicCommon* Compiler::optGetCSEheuristic() #endif + // Parameterized (greedy) RL-based heuristic + // + if (optCSEheuristic == nullptr) + { + bool useGreedyHeuristic = (JitConfig.JitRLCSEGreedy() > 0); + + if (useGreedyHeuristic) + { + optCSEheuristic = new (this, CMK_CSE) CSE_HeuristicParameterized(this); + } + } + if (optCSEheuristic == nullptr) { - JITDUMP("Using standard CSE heuristic\n"); optCSEheuristic = new (this, CMK_CSE) CSE_Heuristic(this); } + INDEBUG(optCSEheuristic->Announce()); return optCSEheuristic; } @@ -5084,6 +5203,7 @@ PhaseStatus Compiler::optOptimizeValnumCSEs() // Determine which heuristic to use... // CSE_HeuristicCommon* const heuristic = optGetCSEheuristic(); + INDEBUG(heuristic->Announce()); optValnumCSE_phase = true; optCSEweight = -1.0f; diff --git a/src/coreclr/jit/optcse.h b/src/coreclr/jit/optcse.h index 86ff3b742d3041..3d1c7f0702ba96 100644 --- a/src/coreclr/jit/optcse.h +++ b/src/coreclr/jit/optcse.h @@ -142,14 +142,13 @@ class CSE_HeuristicReplay : public CSE_HeuristicCommon #endif }; -// Reinforcement Learning CSE heuristic -// -// Uses a "linear" feature model with -// softmax policy. -// -class CSE_HeuristicRL : public CSE_HeuristicCommon +#endif // DEBUG + +// Parameterized Policy + +class CSE_HeuristicParameterized : public CSE_HeuristicCommon { -private: +protected: struct Choice { Choice(CSEdsc* dsc, double preference) : m_dsc(dsc), m_preference(preference), m_softmax(0) @@ -167,32 +166,70 @@ class CSE_HeuristicRL : public CSE_HeuristicCommon maxSteps = 65, // MAX_CSE_CNT + 1 (for stopping) }; + static double s_defaultParameters[numParameters]; double m_parameters[numParameters]; - double m_alpha; - double m_rewards[maxSteps]; - CLRRandom m_cseRNG; - bool m_updateParameters; - bool m_greedy; - bool m_verbose; unsigned m_registerPressure; jitstd::vector* m_localWeights; + bool m_verbose; +public: + CSE_HeuristicParameterized(Compiler*); + void ConsiderCandidates(); + bool ConsiderTree(GenTree* tree, bool isReturn); void CaptureLocalWeights(); + void GreedyPolicy(); + void GetFeatures(CSEdsc* dsc, double* features); double Preference(CSEdsc* dsc); void GetStoppingFeatures(double* features); double StoppingPreference(); - void DumpFeatures(CSEdsc* dsc, double* features); - Choice& ChooseSoftmax(ArrayStack& choices); - Choice& ChooseGreedy(ArrayStack& choices); void BuildChoices(ArrayStack& choices); - void Softmax(ArrayStack& choices); + + Choice& ChooseGreedy(ArrayStack& choices); + + virtual const char* Name() const + { + return "Parameterized CSE Heuristic"; + } + +#ifdef DEBUG + void DumpFeatures(CSEdsc* dsc, double* features); void DumpChoices(ArrayStack& choices, int higlight = -1); void DumpChoices(ArrayStack& choices, CSEdsc* higlight); - void UpdateParameters(); - void GreedyPolicy(); + void DumpMetrics(); + void Announce(); + + // Likelihood of each choice made in the sequence + jitstd::vector* m_likelihoods; + // Likelihood of each action from starting state + jitstd::vector* m_baseLikelihoods; + // Features of each candidate + jitstd::vector* m_features; + +#endif +}; + +#ifdef DEBUG + +// Reinforcement Learning CSE heuristic +// +// Uses a "linear" feature model with +// softmax policy. +// +class CSE_HeuristicRL : public CSE_HeuristicParameterized +{ +private: + double m_alpha; + double m_rewards[maxSteps]; + CLRRandom m_cseRNG; + bool m_updateParameters; + bool m_greedy; + + Choice& ChooseSoftmax(ArrayStack& choices); + void Softmax(ArrayStack& choices); void SoftmaxPolicy(); void UpdateParametersStep(CSEdsc* dsc, ArrayStack& choices, double reward, double* delta); + void UpdateParameters(); Choice* FindChoice(CSEdsc* dsc, ArrayStack& choices); const char* Name() const; @@ -203,11 +240,6 @@ class CSE_HeuristicRL : public CSE_HeuristicCommon #ifdef DEBUG virtual void DumpMetrics(); virtual void Announce(); - // Likelihood of each choice made in the sequence - jitstd::vector* m_likelihoods; - // Likelihood of each action from starting state - jitstd::vector* m_baseLikelihoods; - jitstd::vector* m_features; #endif };