From 0e5feee85815a04e15326043b976fe48078697ce Mon Sep 17 00:00:00 2001 From: srsaggam <41802116+srsaggam@users.noreply.github.com> Date: Mon, 11 Feb 2019 14:26:17 -0800 Subject: [PATCH] Fix Codegen for columnConvert and ValueToKeyMapping transform and add individual transform tests (#95) * Added sequential grouping of columns * reverted the file * fix usings for type convert * added transforms tests * review comments --- src/mlnet.Test/CodeGenTests.cs | 170 ++++++++++++++++-- .../CodeGenerator/TransformGenerators.cs | 4 +- 2 files changed, 156 insertions(+), 18 deletions(-) diff --git a/src/mlnet.Test/CodeGenTests.cs b/src/mlnet.Test/CodeGenTests.cs index cb5877ef463..d8908c8e464 100644 --- a/src/mlnet.Test/CodeGenTests.cs +++ b/src/mlnet.Test/CodeGenTests.cs @@ -94,14 +94,14 @@ public void ClassLabelGenerationBasicTest() }; var result = (new TextLoader.Arguments() - { - Column = columns, - AllowQuoting = false, - AllowSparse = false, - Separators = new[] { ',' }, - HasHeader = true, - TrimWhitespace = true - }, purposes); + { + Column = columns, + AllowQuoting = false, + AllowSparse = false, + Separators = new[] { ',' }, + HasHeader = true, + TrimWhitespace = true + }, purposes); CodeGenerator codeGenerator = new CodeGenerator(null, result); var actual = codeGenerator.GenerateClassLabels(); @@ -128,14 +128,14 @@ public void ColumnGenerationTest() }; var result = (new TextLoader.Arguments() - { - Column = columns, - AllowQuoting = false, - AllowSparse = false, - Separators = new[] { ',' }, - HasHeader = true, - TrimWhitespace = true - }, purposes); + { + Column = columns, + AllowQuoting = false, + AllowSparse = false, + Separators = new[] { ',' }, + HasHeader = true, + TrimWhitespace = true + }, purposes); var context = new MLContext(); var elementProperties = new Dictionary(); @@ -170,5 +170,143 @@ public void TrainerComplexParameterTest() } + #region Transform Tests + [TestMethod] + public void OneHotEncodingTest() + { + var context = new MLContext(); + var elementProperties = new Dictionary();//categorical + PipelineNode node = new PipelineNode("OneHotEncoding", PipelineNodeType.Transform, new string[] { "categorical_column_1" }, new string[] { "categorical_column_1" }, elementProperties); + Pipeline pipeline = new Pipeline(new PipelineNode[] { node }); + CodeGenerator codeGenerator = new CodeGenerator(pipeline, (null, null)); + var actual = codeGenerator.GenerateTransformsAndUsings(); + string expectedTransform = "Categorical.OneHotEncoding(new []{new OneHotEncodingEstimator.ColumnInfo(\"categorical_column_1\",\"categorical_column_1\")})"; + var expectedUsings = "using Microsoft.ML.Transforms.Categorical;\r\n"; + Assert.AreEqual(expectedTransform, actual[0].Item1); + Assert.AreEqual(expectedUsings, actual[0].Item2); + } + + [TestMethod] + public void NormalizingTest() + { + var context = new MLContext(); + var elementProperties = new Dictionary(); + PipelineNode node = new PipelineNode("Normalizing", PipelineNodeType.Transform, new string[] { "numeric_column_1" }, new string[] { "numeric_column_1_copy" }, elementProperties); + Pipeline pipeline = new Pipeline(new PipelineNode[] { node }); + CodeGenerator codeGenerator = new CodeGenerator(pipeline, (null, null)); + var actual = codeGenerator.GenerateTransformsAndUsings(); + string expectedTransform = "Normalize(\"numeric_column_1_copy\",\"numeric_column_1\")"; + string expectedUsings = null; + Assert.AreEqual(expectedTransform, actual[0].Item1); + Assert.AreEqual(expectedUsings, actual[0].Item2); + } + + [TestMethod] + public void ColumnConcatenatingTest() + { + var context = new MLContext(); + var elementProperties = new Dictionary(); + PipelineNode node = new PipelineNode("ColumnConcatenating", PipelineNodeType.Transform, new string[] { "numeric_column_1", "numeric_column_2" }, new string[] { "Features" }, elementProperties); + Pipeline pipeline = new Pipeline(new PipelineNode[] { node }); + CodeGenerator codeGenerator = new CodeGenerator(pipeline, (null, null)); + var actual = codeGenerator.GenerateTransformsAndUsings(); + string expectedTransform = "Concatenate(\"Features\",new []{\"numeric_column_1\",\"numeric_column_2\"})"; + string expectedUsings = null; + Assert.AreEqual(expectedTransform, actual[0].Item1); + Assert.AreEqual(expectedUsings, actual[0].Item2); + } + + [TestMethod] + public void ColumnCopyingTest() + { + var context = new MLContext(); + var elementProperties = new Dictionary();//nume to num feature 2 + PipelineNode node = new PipelineNode("ColumnCopying", PipelineNodeType.Transform, new string[] { "numeric_column_1" }, new string[] { "numeric_column_2" }, elementProperties); + Pipeline pipeline = new Pipeline(new PipelineNode[] { node }); + CodeGenerator codeGenerator = new CodeGenerator(pipeline, (null, null)); + var actual = codeGenerator.GenerateTransformsAndUsings(); + string expectedTransform = "CopyColumns(\"numeric_column_2\",\"numeric_column_1\")"; + string expectedUsings = null; + Assert.AreEqual(expectedTransform, actual[0].Item1); + Assert.AreEqual(expectedUsings, actual[0].Item2); + } + + [TestMethod] + public void MissingValueIndicatingTest() + { + var context = new MLContext(); + var elementProperties = new Dictionary();//numeric feature + PipelineNode node = new PipelineNode("MissingValueIndicating", PipelineNodeType.Transform, new string[] { "numeric_column_1" }, new string[] { "numeric_column_1" }, elementProperties); + Pipeline pipeline = new Pipeline(new PipelineNode[] { node }); + CodeGenerator codeGenerator = new CodeGenerator(pipeline, (null, null)); + var actual = codeGenerator.GenerateTransformsAndUsings(); + string expectedTransform = "IndicateMissingValues(new []{(\"numeric_column_1\",\"numeric_column_1\")})"; + string expectedUsings = null; + Assert.AreEqual(expectedTransform, actual[0].Item1); + Assert.AreEqual(expectedUsings, actual[0].Item2); + } + + [TestMethod] + public void OneHotHashEncodingTest() + { + var context = new MLContext(); + var elementProperties = new Dictionary(); + PipelineNode node = new PipelineNode("OneHotHashEncoding", PipelineNodeType.Transform, new string[] { "Categorical_column_1" }, new string[] { "Categorical_column_1" }, elementProperties); + Pipeline pipeline = new Pipeline(new PipelineNode[] { node }); + CodeGenerator codeGenerator = new CodeGenerator(pipeline, (null, null)); + var actual = codeGenerator.GenerateTransformsAndUsings(); + string expectedTransform = "Categorical.OneHotHashEncoding(new []{new OneHotHashEncodingEstimator.ColumnInfo(\"Categorical_column_1\",\"Categorical_column_1\")})"; + var expectedUsings = "using Microsoft.ML.Transforms.Categorical;\r\n"; + Assert.AreEqual(expectedTransform, actual[0].Item1); + Assert.AreEqual(expectedUsings, actual[0].Item2); + } + + [TestMethod] + public void TextFeaturizingTest() + { + var context = new MLContext(); + var elementProperties = new Dictionary(); + PipelineNode node = new PipelineNode("TextFeaturizing", PipelineNodeType.Transform, new string[] { "Text_column_1" }, new string[] { "Text_column_1" }, elementProperties); + Pipeline pipeline = new Pipeline(new PipelineNode[] { node }); + CodeGenerator codeGenerator = new CodeGenerator(pipeline, (null, null)); + var actual = codeGenerator.GenerateTransformsAndUsings(); + string expectedTransform = "Text.FeaturizeText(\"Text_column_1\",\"Text_column_1\")"; + string expectedUsings = null; + Assert.AreEqual(expectedTransform, actual[0].Item1); + Assert.AreEqual(expectedUsings, actual[0].Item2); + } + + [TestMethod] + public void TypeConvertingTest() + { + var context = new MLContext(); + var elementProperties = new Dictionary(); + PipelineNode node = new PipelineNode("TypeConverting", PipelineNodeType.Transform, new string[] { "I4_column_1" }, new string[] { "R4_column_1" }, elementProperties); + Pipeline pipeline = new Pipeline(new PipelineNode[] { node }); + CodeGenerator codeGenerator = new CodeGenerator(pipeline, (null, null)); + var actual = codeGenerator.GenerateTransformsAndUsings(); + string expectedTransform = "Conversion.ConvertType(new []{new TypeConvertingTransformer.ColumnInfo(\"R4_column_1\",DataKind.R4,\"I4_column_1\")})"; + string expectedUsings = "using Microsoft.ML.Transforms.Conversions;\r\n"; + Assert.AreEqual(expectedTransform, actual[0].Item1); + Assert.AreEqual(expectedUsings, actual[0].Item2); + } + + [TestMethod] + public void ValueToKeyMappingTest() + { + var context = new MLContext(); + var elementProperties = new Dictionary(); + PipelineNode node = new PipelineNode("ValueToKeyMapping", PipelineNodeType.Transform, new string[] { "Label" }, new string[] { "Label" }, elementProperties); + Pipeline pipeline = new Pipeline(new PipelineNode[] { node }); + CodeGenerator codeGenerator = new CodeGenerator(pipeline, (null, null)); + var actual = codeGenerator.GenerateTransformsAndUsings(); + string expectedTransform = "Conversion.MapValueToKey(\"Label\",\"Label\")"; + var expectedUsings = "using Microsoft.ML.Transforms.Conversions;\r\n"; + Assert.AreEqual(expectedTransform, actual[0].Item1); + Assert.AreEqual(expectedUsings, actual[0].Item2); + } + + #endregion + } } diff --git a/src/mlnet/CodeGenerator/TransformGenerators.cs b/src/mlnet/CodeGenerator/TransformGenerators.cs index 1239e5947d8..73faa97fc38 100644 --- a/src/mlnet/CodeGenerator/TransformGenerators.cs +++ b/src/mlnet/CodeGenerator/TransformGenerators.cs @@ -232,7 +232,7 @@ public TypeConverting(PipelineNode node) : base(node) internal override string MethodName => "Conversion.ConvertType"; - internal override string Usings => null; + internal override string Usings => "using Microsoft.ML.Transforms.Conversions;\r\n"; private string ArgumentsName = "TypeConvertingTransformer.ColumnInfo"; @@ -271,7 +271,7 @@ public ValueToKeyMapping(PipelineNode node) : base(node) internal override string MethodName => "Conversion.MapValueToKey"; - internal override string Usings => null; + internal override string Usings => "using Microsoft.ML.Transforms.Conversions;\r\n"; public override string GenerateTransformer() {