Skip to content

Commit

Permalink
linear svm
Browse files Browse the repository at this point in the history
  • Loading branch information
artidoro committed Apr 12, 2019
1 parent 273eba2 commit ace5d1b
Show file tree
Hide file tree
Showing 6 changed files with 300 additions and 27 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;

namespace Samples.Dynamic.Trainers.BinaryClassification
{
public static class LinearSvm
{
public static void Example()
{
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
// as a catalog of available operations and as the source of randomness.
// Setting the seed to a fixed number in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);

// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);

// Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);

// Define the trainer.
var pipeline = mlContext.BinaryClassification.Trainers.LinearSvm();

// Train the model.
var model = pipeline.Fit(trainingData);

// Create testing data. Use different random seed to make it different from training data.
var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));

// Run the model on test data set.
var transformedTestData = model.Transform(testData);

// Convert IDataView object to a list.
var predictions = mlContext.Data.CreateEnumerable<Prediction>(transformedTestData, reuseRowObject: false).ToList();

// Look at 5 predictions
foreach (var p in predictions.Take(5))
Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");

// Expected output:
// Label: True, Prediction: True
// Label: False, Prediction: False
// Label: True, Prediction: True
// Label: True, Prediction: False
// Label: False, Prediction: True

// Evaluate the overall metrics
var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(transformedTestData);
Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}");
Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}");
Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}");

// Expected output:
// Accuracy: 0.53
// AUC: 0.56
// F1 Score: 0.49
// Negative Precision: 0.55
// Negative Recall: 0.58
// Positive Precision: 0.50
// Positive Recall: 0.47
}

private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int seed=0)
{
var random = new Random(seed);
float randomFloat() => (float)random.NextDouble();
for (int i = 0; i < count; i++)
{
var label = randomFloat() > 0.5f;
yield return new DataPoint
{
Label = label,
// Create random features that are correlated with the label.
// For data points with false label, the feature values are slightly increased by adding a constant.
Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.03f).ToArray()
};
}
}

// Example with label and 50 feature values. A data set is a collection of such examples.
private class DataPoint
{
public bool Label { get; set; }
[VectorType(50)]
public float[] Features { get; set; }
}

// Class used to capture predictions.
private class Prediction
{
// Original label.
public bool Label { get; set; }
// Predicted label from the trainer.
public bool PredictedLabel { get; set; }
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<#@ include file="BinaryClassification.ttinclude"#>
<#+
string ClassName = "LinearSvm";
string Trainer = "LinearSvm";
string TrainerOptions = null;
bool IsCalibrated = false;
bool CacheData = false;

string DataSepValue = "0.03f";
string OptionsInclude = "";
string Comments = "";

string ExpectedOutputPerInstance = @"// Expected output:
// Label: True, Prediction: True
// Label: False, Prediction: False
// Label: True, Prediction: True
// Label: True, Prediction: False
// Label: False, Prediction: True";

string ExpectedOutput = @"// Expected output:
// Accuracy: 0.53
// AUC: 0.56
// F1 Score: 0.49
// Negative Precision: 0.55
// Negative Recall: 0.58
// Positive Precision: 0.50
// Positive Recall: 0.47";
#>
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers;

namespace Samples.Dynamic.Trainers.BinaryClassification
{
public static class LinearSvmWithOptions
{
public static void Example()
{
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
// as a catalog of available operations and as the source of randomness.
// Setting the seed to a fixed number in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);

// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);

// Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);

// Define trainer options.
var options = new LinearSvmTrainer.Options
{
BatchSize = 10,
PerformProjection = true,
NumberOfIterations = 10
};

// Define the trainer.
var pipeline = mlContext.BinaryClassification.Trainers.LinearSvm(options);

// Train the model.
var model = pipeline.Fit(trainingData);

// Create testing data. Use different random seed to make it different from training data.
var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));

// Run the model on test data set.
var transformedTestData = model.Transform(testData);

// Convert IDataView object to a list.
var predictions = mlContext.Data.CreateEnumerable<Prediction>(transformedTestData, reuseRowObject: false).ToList();

// Look at 5 predictions
foreach (var p in predictions.Take(5))
Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");

// Expected output:
// Label: True, Prediction: False
// Label: False, Prediction: False
// Label: True, Prediction: True
// Label: True, Prediction: True
// Label: False, Prediction: False

// Evaluate the overall metrics
var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(transformedTestData);
Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}");
Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}");
Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}");

// Expected output:
// Accuracy: 0.58
// AUC: 0.67
// F1 Score: 0.28
// Negative Precision: 0.56
// Negative Recall: 0.94
// Positive Precision: 0.74
// Positive Recall: 0.18
}

private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int seed=0)
{
var random = new Random(seed);
float randomFloat() => (float)random.NextDouble();
for (int i = 0; i < count; i++)
{
var label = randomFloat() > 0.5f;
yield return new DataPoint
{
Label = label,
// Create random features that are correlated with the label.
// For data points with false label, the feature values are slightly increased by adding a constant.
Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.03f).ToArray()
};
}
}

// Example with label and 50 feature values. A data set is a collection of such examples.
private class DataPoint
{
public bool Label { get; set; }
[VectorType(50)]
public float[] Features { get; set; }
}

// Class used to capture predictions.
private class Prediction
{
// Original label.
public bool Label { get; set; }
// Predicted label from the trainer.
public bool PredictedLabel { get; set; }
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
<#@ include file="BinaryClassification.ttinclude"#>
<#+
string ClassName = "LinearSvmWithOptions";
string Trainer = "LinearSvm";
bool IsCalibrated = false;

string DataSepValue = "0.03f";
string OptionsInclude = "using Microsoft.ML.Trainers;";
string Comments = "";
bool CacheData = false;

string TrainerOptions = @"LinearSvmTrainer.Options
{
BatchSize = 10,
PerformProjection = true,
NumberOfIterations = 10
}";

string ExpectedOutputPerInstance = @"// Expected output:
// Label: True, Prediction: False
// Label: False, Prediction: False
// Label: True, Prediction: True
// Label: True, Prediction: True
// Label: False, Prediction: False";

string ExpectedOutput = @"// Expected output:
// Accuracy: 0.58
// AUC: 0.67
// F1 Score: 0.28
// Negative Precision: 0.56
// Negative Recall: 0.94
// Positive Precision: 0.74
// Positive Recall: 0.18";
#>
18 changes: 18 additions & 0 deletions docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,14 @@
<Generator>TextTemplatingFileGenerator</Generator>
<LastGenOutput>LightGbmWithOptions.cs</LastGenOutput>
</None>
<None Update="Dynamic\Trainers\BinaryClassification\LinearSvm.tt">
<Generator>TextTemplatingFileGenerator</Generator>
<LastGenOutput>LinearSvm.cs</LastGenOutput>
</None>
<None Update="Dynamic\Trainers\BinaryClassification\LinearSvmWithOptions.tt">
<Generator>TextTemplatingFileGenerator</Generator>
<LastGenOutput>LinearSvmWithOptions.cs</LastGenOutput>
</None>
<None Update="Dynamic\Trainers\BinaryClassification\SdcaLogisticRegression.tt">
<Generator>TextTemplatingFileGenerator</Generator>
<LastGenOutput>SdcaLogisticRegression.cs</LastGenOutput>
Expand Down Expand Up @@ -229,6 +237,16 @@
<AutoGen>True</AutoGen>
<DependentUpon>LightGbmWithOptions.tt</DependentUpon>
</Compile>
<Compile Update="Dynamic\Trainers\BinaryClassification\LinearSvm.cs">
<DesignTime>True</DesignTime>
<AutoGen>True</AutoGen>
<DependentUpon>LinearSvm.tt</DependentUpon>
</Compile>
<Compile Update="Dynamic\Trainers\BinaryClassification\LinearSvmWithOptions.cs">
<DesignTime>True</DesignTime>
<AutoGen>True</AutoGen>
<DependentUpon>LinearSvmWithOptions.tt</DependentUpon>
</Compile>
<Compile Update="Dynamic\Trainers\BinaryClassification\PriorTrainer.cs">
<DesignTime>True</DesignTime>
<AutoGen>True</AutoGen>
Expand Down
30 changes: 3 additions & 27 deletions docs/samples/Microsoft.ML.Samples/Program.cs
Original file line number Diff line number Diff line change
@@ -1,36 +1,12 @@
using System;
using Samples.Dynamic.Trainers.BinaryClassification;
using Samples.Dynamic;

namespace Microsoft.ML.Samples
{
internal static class Program
{
static void Main(string[] args)
{
Console.WriteLine("Prior");
Prior.Example();
Console.WriteLine("\nSdcaLogisticRegression");
SdcaLogisticRegression.Example();
Console.WriteLine("\nSdcaLogisticRegressionWithOptions");
SdcaLogisticRegressionWithOptions.Example();
Console.WriteLine("\nSdcaNonCalibrated");
SdcaNonCalibrated.Example();
Console.WriteLine("\nSdcaNonCalibratedWithOptions");
SdcaNonCalibratedWithOptions.Example();
Console.WriteLine("\nSgdCalibrated");
SgdCalibrated.Example();
Console.WriteLine("\nSgdCalibratedWithOptions");
SgdCalibratedWithOptions.Example();
Console.WriteLine("\nSgdNonCalibrated");
SgdNonCalibrated.Example();
Console.WriteLine("\nSgdNonCalibratedWithOptions");
SgdNonCalibratedWithOptions.Example();
Console.WriteLine("\nSymbolicSgdLogisticRegression");
SymbolicSgdLogisticRegression.Example();
Console.WriteLine("\nSymbolicSgdLogisticRegressionWithOptions");
SymbolicSgdLogisticRegression.Example();

Console.ReadLine();
CalculateFeatureContribution.Example();
}
}
}
}

0 comments on commit ace5d1b

Please sign in to comment.