Skip to content

Commit

Permalink
API rev (dotnet#181)
Browse files Browse the repository at this point in the history
  • Loading branch information
daholste authored and Dmitry-A committed Aug 22, 2019
1 parent db7acd8 commit 7b46ccc
Show file tree
Hide file tree
Showing 14 changed files with 159 additions and 100 deletions.
17 changes: 12 additions & 5 deletions src/Microsoft.ML.Auto/API/BinaryClassificationExperiment.cs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,13 @@ internal BinaryClassificationExperiment(MLContext context, BinaryExperimentSetti
_settings = settings;
}

public IEnumerable<RunResult<BinaryClassificationMetrics>> Execute(IDataView trainData, ColumnInformation columnInformation = null, IEstimator<ITransformer> preFeaturizers = null)
public IEnumerable<RunResult<BinaryClassificationMetrics>> Execute(IDataView trainData, string labelColumn = DefaultColumnNames.Label, IEstimator<ITransformer> preFeaturizers = null)
{
var columnInformation = new ColumnInformation() { LabelColumn = labelColumn };
return Execute(_context, trainData, columnInformation, null, preFeaturizers);
}

public IEnumerable<RunResult<BinaryClassificationMetrics>> Execute(IDataView trainData, ColumnInformation columnInformation, IEstimator<ITransformer> preFeaturizers = null)
{
return Execute(_context, trainData, columnInformation, null, preFeaturizers);
}
Expand All @@ -81,7 +87,7 @@ internal IEnumerable<RunResult<BinaryClassificationMetrics>> Execute(MLContext c
// run autofit & get all pipelines run in that process
var autoFitter = new AutoFitter<BinaryClassificationMetrics>(context, TaskKind.BinaryClassification, trainData, columnInfo,
validationData, preFeaturizers, new OptimizingMetricInfo(_settings.OptimizingMetric), _settings.ProgressCallback,
_settings, new BinaryDataScorer(_settings.OptimizingMetric),
_settings, new BinaryMetricsAgent(_settings.OptimizingMetric),
TrainerExtensionUtil.GetTrainerNames(_settings.WhitelistedTrainers));

return autoFitter.Fit();
Expand All @@ -90,10 +96,11 @@ internal IEnumerable<RunResult<BinaryClassificationMetrics>> Execute(MLContext c

public static class BinaryExperimentResultExtensions
{
public static RunResult<BinaryClassificationMetrics> Best(this IEnumerable<RunResult<BinaryClassificationMetrics>> results)
public static RunResult<BinaryClassificationMetrics> Best(this IEnumerable<RunResult<BinaryClassificationMetrics>> results, BinaryClassificationMetric metric = BinaryClassificationMetric.Accuracy)
{
double maxScore = results.Select(r => r.Metrics.Accuracy).Max();
return results.First(r => r.Metrics.Accuracy == maxScore);
var metricsAgent = new BinaryMetricsAgent(metric);
double maxScore = results.Select(r => metricsAgent.GetScore(r.Metrics)).Max();
return results.First(r => metricsAgent.GetScore(r.Metrics) == maxScore);
}
}
}
17 changes: 12 additions & 5 deletions src/Microsoft.ML.Auto/API/MulticlassClassificationExperiment.cs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,13 @@ internal MulticlassClassificationExperiment(MLContext context, MulticlassExperim
_settings = settings;
}

public IEnumerable<RunResult<MultiClassClassifierMetrics>> Execute(IDataView trainData, ColumnInformation columnInformation = null, IEstimator<ITransformer> preFeaturizers = null)
public IEnumerable<RunResult<MultiClassClassifierMetrics>> Execute(IDataView trainData, string labelColumn = DefaultColumnNames.Label, IEstimator<ITransformer> preFeaturizers = null)
{
var columnInformation = new ColumnInformation() { LabelColumn = labelColumn };
return Execute(_context, trainData, columnInformation, null, preFeaturizers);
}

public IEnumerable<RunResult<MultiClassClassifierMetrics>> Execute(IDataView trainData, ColumnInformation columnInformation, IEstimator<ITransformer> preFeaturizers = null)
{
return Execute(_context, trainData, columnInformation, null, preFeaturizers);
}
Expand All @@ -79,7 +85,7 @@ internal IEnumerable<RunResult<MultiClassClassifierMetrics>> Execute(MLContext c
// run autofit & get all pipelines run in that process
var autoFitter = new AutoFitter<MultiClassClassifierMetrics>(context, TaskKind.MulticlassClassification, trainData,
columnInfo, validationData, preFeaturizers, new OptimizingMetricInfo(_settings.OptimizingMetric),
_settings.ProgressCallback, _settings, new MultiDataScorer(_settings.OptimizingMetric),
_settings.ProgressCallback, _settings, new MultiMetricsAgent(_settings.OptimizingMetric),
TrainerExtensionUtil.GetTrainerNames(_settings.WhitelistedTrainers));

return autoFitter.Fit();
Expand All @@ -88,10 +94,11 @@ internal IEnumerable<RunResult<MultiClassClassifierMetrics>> Execute(MLContext c

public static class MulticlassExperimentResultExtensions
{
public static RunResult<MultiClassClassifierMetrics> Best(this IEnumerable<RunResult<MultiClassClassifierMetrics>> results)
public static RunResult<MultiClassClassifierMetrics> Best(this IEnumerable<RunResult<MultiClassClassifierMetrics>> results, MulticlassClassificationMetric metric = MulticlassClassificationMetric.AccuracyMicro)
{
double maxScore = results.Select(r => r.Metrics.AccuracyMicro).Max();
return results.First(r => r.Metrics.AccuracyMicro == maxScore);
var metricsAgent = new MultiMetricsAgent(metric);
double maxScore = results.Select(r => metricsAgent.GetScore(r.Metrics)).Max();
return results.First(r => metricsAgent.GetScore(r.Metrics) == maxScore);
}
}
}
17 changes: 12 additions & 5 deletions src/Microsoft.ML.Auto/API/RegressionExperiment.cs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,13 @@ internal RegressionExperiment(MLContext context, RegressionExperimentSettings se
_settings = settings;
}

public IEnumerable<RunResult<RegressionMetrics>> Execute(IDataView trainData, ColumnInformation columnInformation = null, IEstimator<ITransformer> preFeaturizers = null)
public IEnumerable<RunResult<RegressionMetrics>> Execute(IDataView trainData, string labelColumn = DefaultColumnNames.Label, IEstimator<ITransformer> preFeaturizers = null)
{
var columnInformation = new ColumnInformation() { LabelColumn = labelColumn };
return Execute(_context, trainData, columnInformation, null, preFeaturizers);
}

public IEnumerable<RunResult<RegressionMetrics>> Execute(IDataView trainData, ColumnInformation columnInformation, IEstimator<ITransformer> preFeaturizers = null)
{
return Execute(_context, trainData, columnInformation, null, preFeaturizers);
}
Expand All @@ -76,7 +82,7 @@ internal IEnumerable<RunResult<RegressionMetrics>> Execute(MLContext context,
// run autofit & get all pipelines run in that process
var autoFitter = new AutoFitter<RegressionMetrics>(context, TaskKind.Regression, trainData, columnInfo,
validationData, preFeaturizers, new OptimizingMetricInfo(_settings.OptimizingMetric),
_settings.ProgressCallback, _settings, new RegressionDataScorer(_settings.OptimizingMetric),
_settings.ProgressCallback, _settings, new RegressionMetricsAgent(_settings.OptimizingMetric),
TrainerExtensionUtil.GetTrainerNames(_settings.WhitelistedTrainers));

return autoFitter.Fit();
Expand All @@ -85,10 +91,11 @@ internal IEnumerable<RunResult<RegressionMetrics>> Execute(MLContext context,

public static class RegressionExperimentResultExtensions
{
public static RunResult<RegressionMetrics> Best(this IEnumerable<RunResult<RegressionMetrics>> results)
public static RunResult<RegressionMetrics> Best(this IEnumerable<RunResult<RegressionMetrics>> results, RegressionMetric metric = RegressionMetric.RSquared)
{
double maxScore = results.Select(r => r.Metrics.RSquared).Max();
return results.First(r => r.Metrics.RSquared == maxScore);
var metricsAgent = new RegressionMetricsAgent(metric);
double maxScore = results.Select(r => metricsAgent.GetScore(r.Metrics)).Max();
return results.First(r => metricsAgent.GetScore(r.Metrics) == maxScore);
}
}
}
8 changes: 4 additions & 4 deletions src/Microsoft.ML.Auto/AutoFitter/AutoFitter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ internal class AutoFitter<T> where T : class
private readonly IEstimator<ITransformer> _preFeaturizers;
private readonly IProgress<RunResult<T>> _progressCallback;
private readonly ExperimentSettings _experimentSettings;
private readonly IDataScorer<T> _dataScorer;
private readonly IMetricsAgent<T> _metricsAgent;
private readonly IEnumerable<TrainerName> _trainerWhitelist;

private IDataView _trainData;
Expand All @@ -38,7 +38,7 @@ public AutoFitter(MLContext context,
OptimizingMetricInfo metricInfo,
IProgress<RunResult<T>> progressCallback,
ExperimentSettings experimentSettings,
IDataScorer<T> dataScorer,
IMetricsAgent<T> metricsAgent,
IEnumerable<TrainerName> trainerWhitelist)
{
if (validationData == null)
Expand All @@ -56,7 +56,7 @@ public AutoFitter(MLContext context,
_preFeaturizers = preFeaturizers;
_progressCallback = progressCallback;
_experimentSettings = experimentSettings;
_dataScorer = dataScorer;
_metricsAgent = metricsAgent;
_trainerWhitelist = trainerWhitelist;
}

Expand Down Expand Up @@ -149,7 +149,7 @@ private SuggestedPipelineResult<T> ProcessPipeline(SuggestedPipeline pipeline)
var pipelineModel = pipeline.Fit(_trainData);
var scoredValidationData = pipelineModel.Transform(_validationData);
var metrics = GetEvaluatedMetrics(scoredValidationData);
var score = _dataScorer.GetScore(metrics);
var score = _metricsAgent.GetScore(metrics);
runResult = new SuggestedPipelineResult<T>(metrics, pipelineModel, pipeline, score, null);
}
catch(Exception ex)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,18 @@

namespace Microsoft.ML.Auto
{
internal class BinaryDataScorer : IDataScorer<BinaryClassificationMetrics>
internal class BinaryMetricsAgent : IMetricsAgent<BinaryClassificationMetrics>
{
private readonly BinaryClassificationMetric _metric;
private readonly BinaryClassificationMetric _optimizingMetric;

public BinaryDataScorer(BinaryClassificationMetric metric)
public BinaryMetricsAgent(BinaryClassificationMetric optimizingMetric)
{
this._metric = metric;
this._optimizingMetric = optimizingMetric;
}

public double GetScore(BinaryClassificationMetrics metrics)
{
switch(_metric)
switch(_optimizingMetric)
{
case BinaryClassificationMetric.Accuracy:
return metrics.Accuracy;
Expand All @@ -39,7 +39,7 @@ public double GetScore(BinaryClassificationMetrics metrics)
}

// never expected to reach here
throw new NotSupportedException($"{_metric} is not a supported sweep metric");
throw new NotSupportedException($"{_optimizingMetric} is not a supported sweep metric");
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

namespace Microsoft.ML.Auto
{
internal interface IDataScorer<T>
internal interface IMetricsAgent<T>
{
double GetScore(T metrics);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,18 @@

namespace Microsoft.ML.Auto
{
internal class MultiDataScorer : IDataScorer<MultiClassClassifierMetrics>
internal class MultiMetricsAgent : IMetricsAgent<MultiClassClassifierMetrics>
{
private readonly MulticlassClassificationMetric _metric;
private readonly MulticlassClassificationMetric _optimizingMetric;

public MultiDataScorer(MulticlassClassificationMetric metric)
public MultiMetricsAgent(MulticlassClassificationMetric optimizingMetric)
{
this._metric = metric;
this._optimizingMetric = optimizingMetric;
}

public double GetScore(MultiClassClassifierMetrics metrics)
{
switch (_metric)
switch (_optimizingMetric)
{
case MulticlassClassificationMetric.AccuracyMacro:
return metrics.AccuracyMacro;
Expand All @@ -33,7 +33,7 @@ public double GetScore(MultiClassClassifierMetrics metrics)
}

// never expected to reach here
throw new NotSupportedException($"{_metric} is not a supported sweep metric");
throw new NotSupportedException($"{_optimizingMetric} is not a supported sweep metric");
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,18 @@

namespace Microsoft.ML.Auto
{
internal class RegressionDataScorer : IDataScorer<RegressionMetrics>
internal class RegressionMetricsAgent : IMetricsAgent<RegressionMetrics>
{
private readonly RegressionMetric _metric;
private readonly RegressionMetric _optimizingMetric;

public RegressionDataScorer(RegressionMetric metric)
public RegressionMetricsAgent(RegressionMetric optimizingMetric)
{
this._metric = metric;
this._optimizingMetric = optimizingMetric;
}

public double GetScore(RegressionMetrics metrics)
{
switch(_metric)
switch(_optimizingMetric)
{
case RegressionMetric.L1:
return metrics.L1;
Expand All @@ -31,7 +31,7 @@ public double GetScore(RegressionMetrics metrics)
}

// never expected to reach here
throw new NotSupportedException($"{_metric} is not a supported sweep metric");
throw new NotSupportedException($"{_optimizingMetric} is not a supported sweep metric");
}
}
}
11 changes: 2 additions & 9 deletions src/Samples/AutoTrainBinaryClassification.cs
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,8 @@ public static void Run()
// STEP 3: Auto featurize, auto train and auto hyperparameter tuning
Console.WriteLine($"Invoking BinaryClassification.AutoFit");
var autoFitResults = mlContext.AutoInference()
.CreateBinaryClassificationExperiment(new BinaryExperimentSettings()
{
MaxInferenceTimeInSeconds = 60,
OptimizingMetric = BinaryClassificationMetric.Auc
})
.Execute(trainDataView, new ColumnInformation()
{
LabelColumn = LabelColumnName
});
.CreateBinaryClassificationExperiment(60)
.Execute(trainDataView, LabelColumnName);

// STEP 4: Print metric from the best model
var best = autoFitResults.Best();
Expand Down
6 changes: 2 additions & 4 deletions src/Samples/AutoTrainRegression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,8 @@ public static void Run()
// STEP 3: Auto featurize, auto train and auto hyperparameter tuning
Console.WriteLine($"Invoking Regression.AutoFit");
var autoFitResults = mlContext.AutoInference()
.CreateRegressionExperiment(60)
.Execute(trainDataView, new ColumnInformation() {
LabelColumn = LabelColumnName
});
.CreateRegressionExperiment(0)
.Execute(trainDataView, LabelColumnName);

// STEP 4: Compare and print actual value vs predicted value for top 5 rows from validation data
var best = autoFitResults.Best();
Expand Down
5 changes: 1 addition & 4 deletions src/Samples/Cancellation.cs
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,7 @@ public static void Run()
MaxInferenceTimeInSeconds = 60,
CancellationToken = cts.Token
})
.Execute(trainDataView, new ColumnInformation()
{
LabelColumn = LabelColumnName
});
.Execute(trainDataView, LabelColumnName);

Console.WriteLine($"{autoFitResults.Count()} models were returned after {cancelAfterInSeconds} seconds");

Expand Down
47 changes: 47 additions & 0 deletions src/Samples/CustomizeTraining.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using Microsoft.Data.DataView;
using Microsoft.ML;
using Microsoft.ML.Auto;
using Microsoft.ML.Data;

namespace Samples
{
static class CustomizeTraining
{
private static string BaseDatasetsLocation = @"../../../../src/Samples/Data";
private static string TrainDataPath = $"{BaseDatasetsLocation}/taxi-fare-train.csv";
private static string TestDataPath = $"{BaseDatasetsLocation}/taxi-fare-test.csv";
private static string ModelPath = $"{BaseDatasetsLocation}/TaxiFareModel.zip";
private static string LabelColumnName = "fare_amount";

public static void Run()
{
MLContext mlContext = new MLContext();

// STEP 1: Infer columns
var columnInference = mlContext.AutoInference().InferColumns(TrainDataPath, LabelColumnName, ',');

// STEP 2: Load data
TextLoader textLoader = mlContext.Data.CreateTextLoader(columnInference.TextLoaderArgs);
IDataView trainDataView = textLoader.Read(TrainDataPath);
IDataView testDataView = textLoader.Read(TestDataPath);

// STEP 3: Autofit with a callback configured
var autoFitExperiment = mlContext.AutoInference().CreateRegressionExperiment(new RegressionExperimentSettings()
{
MaxInferenceTimeInSeconds = 20,
OptimizingMetric = RegressionMetric.L2,
WhitelistedTrainers = new[] { RegressionTrainer.LightGbm },
ProgressCallback = new Progress()
});
autoFitExperiment.Execute(trainDataView, LabelColumnName);

Console.WriteLine("Press any key to continue..");
Console.ReadLine();
}
}
}
5 changes: 4 additions & 1 deletion src/Samples/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ public static void Main(string[] args)

AutoTrainMulticlassClassification.Run();
Console.Clear();


CustomizeTraining.Run();
Console.Clear();

Console.WriteLine("Done");
}
catch (Exception ex)
Expand Down
Loading

0 comments on commit 7b46ccc

Please sign in to comment.