Skip to content

Commit

Permalink
infer purpose of hidden columns as 'ignore' (dotnet#142)
Browse files Browse the repository at this point in the history
  • Loading branch information
daholste authored and Dmitry-A committed Aug 22, 2019
1 parent 87b6766 commit b87d3af
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 0 deletions.
4 changes: 4 additions & 0 deletions src/Microsoft.ML.Auto/ColumnInference/PurposeInference.cs
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,10 @@ public static PurposeInference.Column[] InferPurposes(MLContext context, IDataVi
{
intermediateCol = new IntermediateColumn(data, i, ColumnPurpose.Label);
}
else if (column.IsHidden)
{
intermediateCol = new IntermediateColumn(data, i, ColumnPurpose.Ignore);
}
else if(columnOverrides != null && columnOverrides.TryGetValue(column.Name, out var columnPurpose))
{
intermediateCol = new IntermediateColumn(data, i, columnPurpose);
Expand Down
39 changes: 39 additions & 0 deletions src/Test/PurposeInferenceTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
using System.Linq;
using Microsoft.Data.DataView;
using Microsoft.ML.Data;
using Microsoft.VisualStudio.TestTools.UnitTesting;

namespace Microsoft.ML.Auto.Test
{
[TestClass]
public class PurposeInferenceTests
{
[TestMethod]
public void PurposeInferenceHiddenColumnsTest()
{
var context = new MLContext();

// build basic data view
var schemaBuilder = new SchemaBuilder();
schemaBuilder.AddColumn(DefaultColumnNames.Label, BoolType.Instance);
schemaBuilder.AddColumn(DefaultColumnNames.Features, NumberType.R4);
var schema = schemaBuilder.GetSchema();
IDataView data = new EmptyDataView(context, schema);

// normalize 'Features' column. this has the effect of creating 2 columns named
// 'Features' in the data view, the first of which gets marked as 'Hidden'
var normalizer = context.Transforms.Normalize(DefaultColumnNames.Features);
data = normalizer.Fit(data).Transform(data);

// infer purposes
var purposes = PurposeInference.InferPurposes(context, data, DefaultColumnNames.Label);

Assert.AreEqual(3, purposes.Count());
Assert.AreEqual(ColumnPurpose.Label, purposes[0].Purpose);
// assert first 'Features' purpose (hidden column) is Ignore
Assert.AreEqual(ColumnPurpose.Ignore, purposes[1].Purpose);
// assert second 'Features' purpose is NumericFeature
Assert.AreEqual(ColumnPurpose.NumericFeature, purposes[2].Purpose);
}
}
}

0 comments on commit b87d3af

Please sign in to comment.