Skip to content

Commit

Permalink
Binary classification samples update (#3311)
Browse files Browse the repository at this point in the history
  • Loading branch information
artidoro authored Apr 16, 2019
1 parent 2e99197 commit 5538ccf
Show file tree
Hide file tree
Showing 61 changed files with 2,760 additions and 702 deletions.
Original file line number Diff line number Diff line change
@@ -1,44 +1,110 @@
using Microsoft.ML;
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;

namespace Samples.Dynamic.Trainers.BinaryClassification
{
public static class AveragedPerceptron
{
// In this examples we will use the adult income dataset. The goal is to predict
// if a person's income is above $50K or not, based on demographic information about that person.
// For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult.
public static void Example()
{
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
// as a catalog of available operations and as the source of randomness.
// Setting the seed to a fixed number in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);

// Download and featurize the dataset.
var data = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);

// Leave out 10% of data for testing.
var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);
// Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);

// Create data training pipeline.
var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron(numberOfIterations: 10);
// Define the trainer.
var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron();

// Fit this pipeline to the training data.
var model = pipeline.Fit(trainTestData.TrainSet);
// Train the model.
var model = pipeline.Fit(trainingData);

// Evaluate how the model is doing on the test data.
var dataWithPredictions = model.Transform(trainTestData.TestSet);
var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions);
Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
// Create testing data. Use different random seed to make it different from training data.
var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));

// Run the model on test data set.
var transformedTestData = model.Transform(testData);

// Convert IDataView object to a list.
var predictions = mlContext.Data.CreateEnumerable<Prediction>(transformedTestData, reuseRowObject: false).ToList();

// Print 5 predictions.
foreach (var p in predictions.Take(5))
Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");

// Expected output:
// Label: True, Prediction: True
// Label: False, Prediction: False
// Label: True, Prediction: True
// Label: True, Prediction: False
// Label: False, Prediction: False

// Evaluate the overall metrics.
var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(transformedTestData);
PrintMetrics(metrics);

// Expected output:
// Accuracy: 0.86
// AUC: 0.91
// F1 Score: 0.68
// Negative Precision: 0.90
// Negative Recall: 0.91
// Positive Precision: 0.70
// Positive Recall: 0.66
// Accuracy: 0.72
// AUC: 0.79
// F1 Score: 0.68
// Negative Precision: 0.71
// Negative Recall: 0.80
// Positive Precision: 0.74
// Positive Recall: 0.63
}

private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int seed=0)
{
var random = new Random(seed);
float randomFloat() => (float)random.NextDouble();
for (int i = 0; i < count; i++)
{
var label = randomFloat() > 0.5f;
yield return new DataPoint
{
Label = label,
// Create random features that are correlated with the label.
// For data points with false label, the feature values are slightly increased by adding a constant.
Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.1f).ToArray()
};
}
}

// Example with label and 50 feature values. A data set is a collection of such examples.
private class DataPoint
{
public bool Label { get; set; }
[VectorType(50)]
public float[] Features { get; set; }
}

// Class used to capture predictions.
private class Prediction
{
// Original label.
public bool Label { get; set; }
// Predicted label from the trainer.
public bool PredictedLabel { get; set; }
}

// Pretty-print BinaryClassificationMetrics objects.
private static void PrintMetrics(BinaryClassificationMetrics metrics)
{
Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}");
Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}");
Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}");
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<#@ include file="BinaryClassification.ttinclude"#>
<#+
string ClassName = "AveragedPerceptron";
string Trainer = "AveragedPerceptron";
string TrainerOptions = null;
bool IsCalibrated = false;
bool CacheData = false;

string LabelThreshold = "0.5f";
string DataSepValue = "0.1f";
string OptionsInclude = "";
string Comments= "";

string ExpectedOutputPerInstance = @"// Expected output:
// Label: True, Prediction: True
// Label: False, Prediction: False
// Label: True, Prediction: True
// Label: True, Prediction: False
// Label: False, Prediction: False";

string ExpectedOutput = @"// Expected output:
// Accuracy: 0.72
// AUC: 0.79
// F1 Score: 0.68
// Negative Precision: 0.71
// Negative Recall: 0.80
// Positive Precision: 0.74
// Positive Recall: 0.63";
#>
Original file line number Diff line number Diff line change
@@ -1,28 +1,29 @@
using Microsoft.ML;
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers;

namespace Samples.Dynamic.Trainers.BinaryClassification
{
public static class AveragedPerceptronWithOptions
{
// In this examples we will use the adult income dataset. The goal is to predict
// if a person's income is above $50K or not, based on demographic information about that person.
// For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult.
public static void Example()
{
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
// as a catalog of available operations and as the source of randomness.
// Setting the seed to a fixed number in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);

// Download and featurize the dataset.
var data = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);

// Leave out 10% of data for testing.
var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);
// Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);

// Define the trainer options.
var options = new AveragedPerceptronTrainer.Options()
// Define trainer options.
var options = new AveragedPerceptronTrainer.Options
{
LossFunction = new SmoothedHingeLoss(),
LearningRate = 0.1f,
Expand All @@ -31,25 +32,90 @@ public static void Example()
NumberOfIterations = 10
};

// Create data training pipeline.
// Define the trainer.
var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron(options);

// Fit this pipeline to the training data.
var model = pipeline.Fit(trainTestData.TrainSet);
// Train the model.
var model = pipeline.Fit(trainingData);

// Evaluate how the model is doing on the test data.
var dataWithPredictions = model.Transform(trainTestData.TestSet);
var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions);
Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
// Create testing data. Use different random seed to make it different from training data.
var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));

// Run the model on test data set.
var transformedTestData = model.Transform(testData);

// Convert IDataView object to a list.
var predictions = mlContext.Data.CreateEnumerable<Prediction>(transformedTestData, reuseRowObject: false).ToList();

// Print 5 predictions.
foreach (var p in predictions.Take(5))
Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");

// Expected output:
// Accuracy: 0.86
// AUC: 0.90
// F1 Score: 0.66
// Negative Precision: 0.89
// Negative Recall: 0.93
// Positive Precision: 0.72
// Positive Recall: 0.61
// Label: True, Prediction: True
// Label: False, Prediction: False
// Label: True, Prediction: True
// Label: True, Prediction: True
// Label: False, Prediction: False

// Evaluate the overall metrics.
var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(transformedTestData);
PrintMetrics(metrics);

// Expected output:
// Accuracy: 0.89
// AUC: 0.96
// F1 Score: 0.88
// Negative Precision: 0.87
// Negative Recall: 0.92
// Positive Precision: 0.91
// Positive Recall: 0.85
}

private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int seed=0)
{
var random = new Random(seed);
float randomFloat() => (float)random.NextDouble();
for (int i = 0; i < count; i++)
{
var label = randomFloat() > 0.5f;
yield return new DataPoint
{
Label = label,
// Create random features that are correlated with the label.
// For data points with false label, the feature values are slightly increased by adding a constant.
Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.1f).ToArray()
};
}
}

// Example with label and 50 feature values. A data set is a collection of such examples.
private class DataPoint
{
public bool Label { get; set; }
[VectorType(50)]
public float[] Features { get; set; }
}

// Class used to capture predictions.
private class Prediction
{
// Original label.
public bool Label { get; set; }
// Predicted label from the trainer.
public bool PredictedLabel { get; set; }
}

// Pretty-print BinaryClassificationMetrics objects.
private static void PrintMetrics(BinaryClassificationMetrics metrics)
{
Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}");
Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}");
Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}");
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
<#@ include file="BinaryClassification.ttinclude"#>
<#+
string ClassName="AveragedPerceptronWithOptions";
string Trainer = "AveragedPerceptron";
bool IsCalibrated = false;

string LabelThreshold = "0.5f";
string DataSepValue = "0.1f";
string OptionsInclude = "using Microsoft.ML.Trainers;";
string Comments= "";
bool CacheData = false;

string TrainerOptions = @"AveragedPerceptronTrainer.Options
{
LossFunction = new SmoothedHingeLoss(),
LearningRate = 0.1f,
LazyUpdate = false,
RecencyGain = 0.1f,
NumberOfIterations = 10
}";

string ExpectedOutputPerInstance= @"// Expected output:
// Label: True, Prediction: True
// Label: False, Prediction: False
// Label: True, Prediction: True
// Label: True, Prediction: True
// Label: False, Prediction: False";

string ExpectedOutput = @"// Expected output:
// Accuracy: 0.89
// AUC: 0.96
// F1 Score: 0.88
// Negative Precision: 0.87
// Negative Recall: 0.92
// Positive Precision: 0.91
// Positive Recall: 0.85";
#>
Loading

0 comments on commit 5538ccf

Please sign in to comment.