diff --git a/.gitignore b/.gitignore
index d1c557c2ad..36b327cc99 100644
--- a/.gitignore
+++ b/.gitignore
@@ -328,3 +328,5 @@ ASALocalRun/
# MSBuild Binary and Structured Log
*.binlog
+# Ignore external test datasets.
+/test/data/external/
diff --git a/build.proj b/build.proj
index 65aa05fb5b..bd3ccbd651 100644
--- a/build.proj
+++ b/build.proj
@@ -8,7 +8,7 @@
-
+
true
@@ -33,6 +33,7 @@
RestoreProjects;
BuildNative;
$(TraversalBuildDependsOn);
+ DownloadExternalTestFiles;
RunTests;
@@ -62,7 +63,18 @@
-
+
+
+
+
+
+
+
+
+
+
diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs
index b66d61ae69..7a35af8814 100644
--- a/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs
+++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs
@@ -269,10 +269,10 @@ public void TestCrossValidationBinaryMacro()
}
}
- [Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")]
+ [Fact()]
public void TestCrossValidationMacro()
{
- var dataPath = GetDataPath(@"housing.txt");
+ var dataPath = GetDataPath(@"external/winequality-white.csv");
using (var env = new TlcEnvironment())
{
var subGraph = env.CreateExperiment();
@@ -295,7 +295,7 @@ public void TestCrossValidationMacro()
var modelCombineOutput = subGraph.Add(modelCombine);
var experiment = env.CreateExperiment();
- var importInput = new ML.Data.TextLoader();
+ var importInput = new ML.Data.TextLoader() { CustomSchema = "col=Label:R4:11 col=Features:R4:0-10 sep=; header+" };
var importOutput = experiment.Add(importInput);
var crossValidate = new ML.Models.CrossValidator
@@ -324,7 +324,7 @@ public void TestCrossValidationMacro()
Assert.True(b);
double val = 0;
getter(ref val);
- Assert.Equal(3.32, val, 1);
+ Assert.Equal(0.58, val, 1);
b = cursor.MoveNext();
Assert.False(b);
}
diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
index 4fe503b9b7..ca8f120c9e 100644
--- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
+++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
@@ -729,7 +729,7 @@ public void EntryPointTextToKeyToText()
}
private void RunTrainScoreEvaluate(string learner, string evaluator, string dataPath, string warningsPath, string overallMetricsPath,
- string instanceMetricsPath, string confusionMatrixPath = null)
+ string instanceMetricsPath, string confusionMatrixPath = null, string loader = null)
{
string inputGraph = string.Format(@"
{{
@@ -738,6 +738,7 @@ private void RunTrainScoreEvaluate(string learner, string evaluator, string data
'Name': 'Data.TextLoader',
'Inputs': {{
'InputFile': '$file'
+ {8}
}},
'Outputs': {{
'Data': '$AllData'
@@ -797,7 +798,8 @@ private void RunTrainScoreEvaluate(string learner, string evaluator, string data
}}
}}", learner, evaluator, EscapePath(dataPath), EscapePath(warningsPath), EscapePath(overallMetricsPath), EscapePath(instanceMetricsPath),
confusionMatrixPath != null ? ", 'ConfusionMatrix': '$ConfusionMatrix'" : "",
- confusionMatrixPath != null ? string.Format(", 'ConfusionMatrix' : '{0}'", EscapePath(confusionMatrixPath)) : "");
+ confusionMatrixPath != null ? string.Format(", 'ConfusionMatrix' : '{0}'", EscapePath(confusionMatrixPath)) : "",
+ string.IsNullOrWhiteSpace(loader) ? "" : string.Format(",'CustomSchema': '{0}'", loader));
var jsonPath = DeleteOutputPath("graph.json");
File.WriteAllLines(jsonPath, new[] { inputGraph });
@@ -855,15 +857,16 @@ public void EntryPointEvaluateMultiClass()
Assert.Equal(3, CountRows(loader));
}
- [Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")]
+ [Fact()]
public void EntryPointEvaluateRegression()
{
- var dataPath = GetDataPath("housing.txt");
+ var dataPath = GetDataPath(@"external/winequality-white.csv");
var warningsPath = DeleteOutputPath("warnings.idv");
var overallMetricsPath = DeleteOutputPath("overall.idv");
var instanceMetricsPath = DeleteOutputPath("instance.idv");
- RunTrainScoreEvaluate("Trainers.StochasticDualCoordinateAscentRegressor", "Models.RegressionEvaluator", dataPath, warningsPath, overallMetricsPath, instanceMetricsPath);
+ RunTrainScoreEvaluate("Trainers.StochasticDualCoordinateAscentRegressor", "Models.RegressionEvaluator",
+ dataPath, warningsPath, overallMetricsPath, instanceMetricsPath, loader: "col=Label:R4:11 col=Features:R4:0-10 sep=; header+");
using (var loader = new BinaryLoader(Env, new BinaryLoader.Arguments(), warningsPath))
Assert.Equal(0, CountRows(loader));
@@ -872,7 +875,7 @@ public void EntryPointEvaluateRegression()
Assert.Equal(1, CountRows(loader));
using (var loader = new BinaryLoader(Env, new BinaryLoader.Arguments(), instanceMetricsPath))
- Assert.Equal(104, CountRows(loader));
+ Assert.Equal(975, CountRows(loader));
}
[Fact]
@@ -887,10 +890,10 @@ public void EntryPointSDCAMultiClass()
TestEntryPointRoutine("iris.txt", "Trainers.StochasticDualCoordinateAscentClassifier");
}
- [Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")]
+ [Fact()]
public void EntryPointSDCARegression()
{
- TestEntryPointRoutine("housing.txt", "Trainers.StochasticDualCoordinateAscentRegressor");
+ TestEntryPointRoutine(@"external/winequality-white.csv", "Trainers.StochasticDualCoordinateAscentRegressor", loader: "col=Label:R4:11 col=Features:R4:0-10 sep=; header+");
}
[Fact]
@@ -961,10 +964,10 @@ public void EntryPointHogwildSGD()
TestEntryPointRoutine("breast-cancer.txt", "Trainers.StochasticGradientDescentBinaryClassifier");
}
- [Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")]
+ [Fact()]
public void EntryPointPoissonRegression()
{
- TestEntryPointRoutine("housing.txt", "Trainers.PoissonRegressor");
+ TestEntryPointRoutine(@"external/winequality-white.csv", "Trainers.PoissonRegressor", loader: "col=Label:R4:11 col=Features:R4:0-10 sep=; header+");
}
[Fact]