diff --git a/.gitignore b/.gitignore index d1c557c2ad..36b327cc99 100644 --- a/.gitignore +++ b/.gitignore @@ -328,3 +328,5 @@ ASALocalRun/ # MSBuild Binary and Structured Log *.binlog +# Ignore external test datasets. +/test/data/external/ diff --git a/build.proj b/build.proj index 65aa05fb5b..bd3ccbd651 100644 --- a/build.proj +++ b/build.proj @@ -8,7 +8,7 @@ - + true @@ -33,6 +33,7 @@ RestoreProjects; BuildNative; $(TraversalBuildDependsOn); + DownloadExternalTestFiles; RunTests; @@ -62,7 +63,18 @@ - + + + + + + + + + + diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs index b66d61ae69..7a35af8814 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs @@ -269,10 +269,10 @@ public void TestCrossValidationBinaryMacro() } } - [Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")] + [Fact()] public void TestCrossValidationMacro() { - var dataPath = GetDataPath(@"housing.txt"); + var dataPath = GetDataPath(@"external/winequality-white.csv"); using (var env = new TlcEnvironment()) { var subGraph = env.CreateExperiment(); @@ -295,7 +295,7 @@ public void TestCrossValidationMacro() var modelCombineOutput = subGraph.Add(modelCombine); var experiment = env.CreateExperiment(); - var importInput = new ML.Data.TextLoader(); + var importInput = new ML.Data.TextLoader() { CustomSchema = "col=Label:R4:11 col=Features:R4:0-10 sep=; header+" }; var importOutput = experiment.Add(importInput); var crossValidate = new ML.Models.CrossValidator @@ -324,7 +324,7 @@ public void TestCrossValidationMacro() Assert.True(b); double val = 0; getter(ref val); - Assert.Equal(3.32, val, 1); + Assert.Equal(0.58, val, 1); b = cursor.MoveNext(); Assert.False(b); } diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs index 4fe503b9b7..ca8f120c9e 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs @@ -729,7 +729,7 @@ public void EntryPointTextToKeyToText() } private void RunTrainScoreEvaluate(string learner, string evaluator, string dataPath, string warningsPath, string overallMetricsPath, - string instanceMetricsPath, string confusionMatrixPath = null) + string instanceMetricsPath, string confusionMatrixPath = null, string loader = null) { string inputGraph = string.Format(@" {{ @@ -738,6 +738,7 @@ private void RunTrainScoreEvaluate(string learner, string evaluator, string data 'Name': 'Data.TextLoader', 'Inputs': {{ 'InputFile': '$file' + {8} }}, 'Outputs': {{ 'Data': '$AllData' @@ -797,7 +798,8 @@ private void RunTrainScoreEvaluate(string learner, string evaluator, string data }} }}", learner, evaluator, EscapePath(dataPath), EscapePath(warningsPath), EscapePath(overallMetricsPath), EscapePath(instanceMetricsPath), confusionMatrixPath != null ? ", 'ConfusionMatrix': '$ConfusionMatrix'" : "", - confusionMatrixPath != null ? string.Format(", 'ConfusionMatrix' : '{0}'", EscapePath(confusionMatrixPath)) : ""); + confusionMatrixPath != null ? string.Format(", 'ConfusionMatrix' : '{0}'", EscapePath(confusionMatrixPath)) : "", + string.IsNullOrWhiteSpace(loader) ? "" : string.Format(",'CustomSchema': '{0}'", loader)); var jsonPath = DeleteOutputPath("graph.json"); File.WriteAllLines(jsonPath, new[] { inputGraph }); @@ -855,15 +857,16 @@ public void EntryPointEvaluateMultiClass() Assert.Equal(3, CountRows(loader)); } - [Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")] + [Fact()] public void EntryPointEvaluateRegression() { - var dataPath = GetDataPath("housing.txt"); + var dataPath = GetDataPath(@"external/winequality-white.csv"); var warningsPath = DeleteOutputPath("warnings.idv"); var overallMetricsPath = DeleteOutputPath("overall.idv"); var instanceMetricsPath = DeleteOutputPath("instance.idv"); - RunTrainScoreEvaluate("Trainers.StochasticDualCoordinateAscentRegressor", "Models.RegressionEvaluator", dataPath, warningsPath, overallMetricsPath, instanceMetricsPath); + RunTrainScoreEvaluate("Trainers.StochasticDualCoordinateAscentRegressor", "Models.RegressionEvaluator", + dataPath, warningsPath, overallMetricsPath, instanceMetricsPath, loader: "col=Label:R4:11 col=Features:R4:0-10 sep=; header+"); using (var loader = new BinaryLoader(Env, new BinaryLoader.Arguments(), warningsPath)) Assert.Equal(0, CountRows(loader)); @@ -872,7 +875,7 @@ public void EntryPointEvaluateRegression() Assert.Equal(1, CountRows(loader)); using (var loader = new BinaryLoader(Env, new BinaryLoader.Arguments(), instanceMetricsPath)) - Assert.Equal(104, CountRows(loader)); + Assert.Equal(975, CountRows(loader)); } [Fact] @@ -887,10 +890,10 @@ public void EntryPointSDCAMultiClass() TestEntryPointRoutine("iris.txt", "Trainers.StochasticDualCoordinateAscentClassifier"); } - [Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")] + [Fact()] public void EntryPointSDCARegression() { - TestEntryPointRoutine("housing.txt", "Trainers.StochasticDualCoordinateAscentRegressor"); + TestEntryPointRoutine(@"external/winequality-white.csv", "Trainers.StochasticDualCoordinateAscentRegressor", loader: "col=Label:R4:11 col=Features:R4:0-10 sep=; header+"); } [Fact] @@ -961,10 +964,10 @@ public void EntryPointHogwildSGD() TestEntryPointRoutine("breast-cancer.txt", "Trainers.StochasticGradientDescentBinaryClassifier"); } - [Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")] + [Fact()] public void EntryPointPoissonRegression() { - TestEntryPointRoutine("housing.txt", "Trainers.PoissonRegressor"); + TestEntryPointRoutine(@"external/winequality-white.csv", "Trainers.PoissonRegressor", loader: "col=Label:R4:11 col=Features:R4:0-10 sep=; header+"); } [Fact]