Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding support for training metrics in PipelineSweeperMacro + new graph variable outputs #152

Merged
merged 15 commits into from
May 23, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions src/Microsoft.ML.Data/EntryPoints/InputBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -832,21 +832,21 @@ public static class SweepableDiscreteParam
public static class PipelineSweeperSupportedMetrics
{
public new static string ToString() => "SupportedMetric";
public const string Auc = "Auc";
public const string Auc = "AUC";
public const string AccuracyMicro = "AccuracyMicro";
public const string AccuracyMacro = "AccuracyMacro";
public const string F1 = "F1";
public const string AuPrc = "AuPrc";
public const string AuPrc = "AUPRC";
public const string TopKAccuracy = "TopKAccuracy";
public const string L1 = "L1";
public const string L2 = "L2";
public const string Rms = "Rms";
public const string Rms = "RMS";
public const string LossFn = "LossFn";
public const string RSquared = "RSquared";
public const string LogLoss = "LogLoss";
public const string LogLossReduction = "LogLossReduction";
public const string Ndcg = "Ndcg";
public const string Dcg = "Dcg";
public const string Ndcg = "NDCG";
public const string Dcg = "DCG";
public const string PositivePrecision = "PositivePrecision";
public const string PositiveRecall = "PositiveRecall";
public const string NegativePrecision = "NegativePrecision";
Expand All @@ -858,9 +858,9 @@ public static class PipelineSweeperSupportedMetrics
public const string ThreshAtK = "ThreshAtK";
public const string ThreshAtP = "ThreshAtP";
public const string ThreshAtNumPos = "ThreshAtNumPos";
public const string Nmi = "Nmi";
public const string Nmi = "NMI";
public const string AvgMinScore = "AvgMinScore";
public const string Dbi = "Dbi";
public const string Dbi = "DBI";
}
}
}
20 changes: 12 additions & 8 deletions src/Microsoft.ML.PipelineInference/AutoInference.cs
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,8 @@ private bool GetDataVariableName(IExceptionContext ectx, string nameOfData, JTok
return false;

string dataVar = firstNodeInputs.Value<String>(nameOfData);
ectx.Check(VariableBinding.IsValidVariableName(ectx, dataVar), $"Invalid variable name {dataVar}.");
if (!VariableBinding.IsValidVariableName(ectx, dataVar))
throw ectx.ExceptParam(nameof(nameOfData), $"Invalid variable name {dataVar}.");

variableName = dataVar.Substring(1);
return true;
Expand All @@ -172,12 +173,14 @@ private bool GetDataVariableName(IExceptionContext ectx, string nameOfData, JTok
public sealed class RunSummary
{
public double MetricValue { get; }
public double TrainingMetricValue { get; }
public int NumRowsInTraining { get; }
public long RunTimeMilliseconds { get; }

public RunSummary(double metricValue, int numRows, long runTimeMilliseconds)
public RunSummary(double metricValue, int numRows, long runTimeMilliseconds, double trainingMetricValue)
{
MetricValue = metricValue;
TrainingMetricValue = trainingMetricValue;
NumRowsInTraining = numRows;
RunTimeMilliseconds = runTimeMilliseconds;
}
Expand Down Expand Up @@ -303,7 +306,7 @@ private void MainLearningLoop(int batchSize, int numOfTrainingRows)
var stopwatch = new Stopwatch();
var probabilityUtils = new Sweeper.Algorithms.SweeperProbabilityUtils(_host);

while (!_terminator.ShouldTerminate(_history))
while (!_terminator.ShouldTerminate(_history))
{
// Get next set of candidates
var currentBatchSize = batchSize;
Expand Down Expand Up @@ -341,16 +344,17 @@ private void ProcessPipeline(Sweeper.Algorithms.SweeperProbabilityUtils utils, S

// Run pipeline, and time how long it takes
stopwatch.Restart();
double d = candidate.RunTrainTestExperiment(_trainData.Take(randomizedNumberOfRows),
_testData, Metric, TrainerKind);
candidate.RunTrainTestExperiment(_trainData.Take(randomizedNumberOfRows),
_testData, Metric, TrainerKind, out var testMetricVal, out var trainMetricVal);
stopwatch.Stop();

// Handle key collisions on sorted list
while (_sortedSampledElements.ContainsKey(d))
d += 1e-10;
while (_sortedSampledElements.ContainsKey(testMetricVal))
testMetricVal += 1e-10;

// Save performance score
candidate.PerformanceSummary = new RunSummary(d, randomizedNumberOfRows, stopwatch.ElapsedMilliseconds);
candidate.PerformanceSummary =
new RunSummary(testMetricVal, randomizedNumberOfRows, stopwatch.ElapsedMilliseconds, trainMetricVal);
_sortedSampledElements.Add(candidate.PerformanceSummary.MetricValue, candidate);
_history.Add(candidate);
}
Expand Down
33 changes: 24 additions & 9 deletions src/Microsoft.ML.PipelineInference/AutoMlUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,34 @@ namespace Microsoft.ML.Runtime.PipelineInference
{
public static class AutoMlUtils
{
public static AutoInference.RunSummary ExtractRunSummary(IHostEnvironment env, IDataView data, string metricColumnName)
public static double ExtractValueFromIDV(IHostEnvironment env, IDataView result, string columnName)
{
double metricValue = 0;
int numRows = 0;
var schema = data.Schema;
schema.TryGetColumnIndex(metricColumnName, out var metricCol);
Contracts.CheckValue(env, nameof(env));
env.CheckValue(result, nameof(result));
env.CheckNonEmpty(columnName, nameof(columnName));

using (var cursor = data.GetRowCursor(col => col == metricCol))
double outputValue = 0;
var schema = result.Schema;
if (!schema.TryGetColumnIndex(columnName, out var metricCol))
throw env.ExceptParam(nameof(columnName), $"Schema does not contain column: {columnName}");

using (var cursor = result.GetRowCursor(col => col == metricCol))
{
var getter = cursor.GetGetter<double>(metricCol);
cursor.MoveNext();
getter(ref metricValue);
bool moved = cursor.MoveNext();
env.Check(moved, "Expected an IDataView with a single row. Results dataset has no rows to extract.");
getter(ref outputValue);
env.Check(!cursor.MoveNext(), "Expected an IDataView with a single row. Results dataset has too many rows.");
}

return new AutoInference.RunSummary(metricValue, numRows, 0);
return outputValue;
}

public static AutoInference.RunSummary ExtractRunSummary(IHostEnvironment env, IDataView result, string metricColumnName, IDataView trainResult = null)
{
double testingMetricValue = ExtractValueFromIDV(env, result, metricColumnName);
double trainingMetricValue = trainResult != null ? ExtractValueFromIDV(env, trainResult, metricColumnName) : double.MinValue;
return new AutoInference.RunSummary(testingMetricValue, 0, 0, trainingMetricValue);
}

public static CommonInputs.IEvaluatorInput CloneEvaluatorInstance(CommonInputs.IEvaluatorInput evalInput) =>
Expand Down Expand Up @@ -618,5 +631,7 @@ public static Tuple<string, string[]>[] ConvertToSweepArgumentStrings(TlcModule.
}
return results;
}

public static string GenerateOverallTrainingMetricVarName(Guid id) => $"Var_Training_OM_{id:N}";
}
}
19 changes: 14 additions & 5 deletions src/Microsoft.ML.PipelineInference/Macros/PipelineSweeperMacro.cs
Original file line number Diff line number Diff line change
Expand Up @@ -65,18 +65,24 @@ public static Output ExtractSweepResult(IHostEnvironment env, ResultInput input)
var col1 = new KeyValuePair<string, ColumnType>("Graph", TextType.Instance);
var col2 = new KeyValuePair<string, ColumnType>("MetricValue", PrimitiveType.FromKind(DataKind.R8));
var col3 = new KeyValuePair<string, ColumnType>("PipelineId", TextType.Instance);
var col4 = new KeyValuePair<string, ColumnType>("TrainingMetricValue", PrimitiveType.FromKind(DataKind.R8));
var col5 = new KeyValuePair<string, ColumnType>("FirstInput", TextType.Instance);
var col6 = new KeyValuePair<string, ColumnType>("PredictorModel", TextType.Instance);

if (rows.Count == 0)
{
var host = env.Register("ExtractSweepResult");
outputView = new EmptyDataView(host, new SimpleSchema(host, col1, col2, col3));
outputView = new EmptyDataView(host, new SimpleSchema(host, col1, col2, col3, col4, col5, col6));
}
else
{
var builder = new ArrayDataViewBuilder(env);
builder.AddColumn(col1.Key, (PrimitiveType)col1.Value, rows.Select(r => new DvText(r.GraphJson)).ToArray());
builder.AddColumn(col2.Key, (PrimitiveType)col2.Value, rows.Select(r => r.MetricValue).ToArray());
builder.AddColumn(col3.Key, (PrimitiveType)col3.Value, rows.Select(r => new DvText(r.PipelineId)).ToArray());
builder.AddColumn(col4.Key, (PrimitiveType)col4.Value, rows.Select(r => r.TrainingMetricValue).ToArray());
builder.AddColumn(col5.Key, (PrimitiveType)col5.Value, rows.Select(r => new DvText(r.FirstInput)).ToArray());
builder.AddColumn(col6.Key, (PrimitiveType)col6.Value, rows.Select(r => new DvText(r.PredictorModel)).ToArray());
outputView = builder.GetDataView();
}
return new Output { Results = outputView, State = autoMlState };
Expand Down Expand Up @@ -132,11 +138,11 @@ public static CommonOutputs.MacroOutput<Output> PipelineSweep(
// Extract performance summaries and assign to previous candidate pipelines.
foreach (var pipeline in autoMlState.BatchCandidates)
{
if (node.Context.TryGetVariable(ExperimentUtils.GenerateOverallMetricVarName(pipeline.UniqueId),
out var v))
if (node.Context.TryGetVariable(ExperimentUtils.GenerateOverallMetricVarName(pipeline.UniqueId), out var v) &&
node.Context.TryGetVariable(AutoMlUtils.GenerateOverallTrainingMetricVarName(pipeline.UniqueId), out var v2))
{
pipeline.PerformanceSummary =
AutoMlUtils.ExtractRunSummary(env, (IDataView)v.Value, autoMlState.Metric.Name);
AutoMlUtils.ExtractRunSummary(env, (IDataView)v.Value, autoMlState.Metric.Name, (IDataView)v2.Value);
autoMlState.AddEvaluated(pipeline);
}
}
Expand Down Expand Up @@ -168,14 +174,17 @@ public static CommonOutputs.MacroOutput<Output> PipelineSweep(
{
// Add train test experiments to current graph for candidate pipeline
var subgraph = new Experiment(env);
var trainTestOutput = p.AddAsTrainTest(training, testing, autoMlState.TrainerKind, subgraph);
var trainTestOutput = p.AddAsTrainTest(training, testing, autoMlState.TrainerKind, subgraph, true);

// Change variable name to reference pipeline ID in output map, context and entrypoint output.
var uniqueName = ExperimentUtils.GenerateOverallMetricVarName(p.UniqueId);
var uniqueNameTraining = AutoMlUtils.GenerateOverallTrainingMetricVarName(p.UniqueId);
var sgNode = EntryPointNode.ValidateNodes(env, node.Context,
new JArray(subgraph.GetNodes().Last()), node.Catalog).Last();
sgNode.RenameOutputVariable(trainTestOutput.OverallMetrics.VarName, uniqueName, cascadeChanges: true);
sgNode.RenameOutputVariable(trainTestOutput.TrainingOverallMetrics.VarName, uniqueNameTraining, cascadeChanges: true);
trainTestOutput.OverallMetrics.VarName = uniqueName;
trainTestOutput.TrainingOverallMetrics.VarName = uniqueNameTraining;
expNodes.Add(sgNode);

// Store indicators, to pass to next iteration of macro.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
<ProjectReference Include="..\Microsoft.ML.Core\Microsoft.ML.Core.csproj" />
<ProjectReference Include="..\Microsoft.ML.StandardLearners\Microsoft.ML.StandardLearners.csproj" />
<ProjectReference Include="..\Microsoft.ML.Sweeper\Microsoft.ML.Sweeper.csproj" />
<ProjectReference Include="..\Microsoft.ML\Microsoft.ML.csproj" />
</ItemGroup>

</Project>
Loading