Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix column purpose for PipelineSweeperMacro #461

Merged
18 commits merged into from Jul 13, 2018
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 21 additions & 4 deletions src/Microsoft.ML.PipelineInference/AutoInference.cs
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ public sealed class AutoMlMlState : IMlState
private TransformInference.SuggestedTransform[] _availableTransforms;
private RecipeInference.SuggestedRecipe.SuggestedLearner[] _availableLearners;
private DependencyMap _dependencyMapping;
private RoleMappedData _dataRoles;
public IPipelineOptimizer AutoMlEngine { get; set; }
public PipelinePattern[] BatchCandidates { get; set; }
public SupportedMetric Metric { get; }
Expand Down Expand Up @@ -313,7 +314,7 @@ private void MainLearningLoop(int batchSize, int numOfTrainingRows)
var currentBatchSize = batchSize;
if (_terminator is IterationTerminator itr)
currentBatchSize = Math.Min(itr.RemainingIterations(_history), batchSize);
var candidates = AutoMlEngine.GetNextCandidates(_sortedSampledElements.Values, currentBatchSize);
var candidates = AutoMlEngine.GetNextCandidates(_sortedSampledElements.Values, currentBatchSize, _dataRoles);

// Break if no candidates returned, means no valid pipeline available.
if (candidates.Length == 0)
Expand Down Expand Up @@ -370,19 +371,21 @@ private TransformInference.SuggestedTransform[] InferAndFilter(IDataView data, T
TransformInference.SuggestedTransform[] existingTransforms = null)
{
// Infer transforms using experts
var levelTransforms = TransformInference.InferTransforms(_env, data, args);
var levelTransforms = TransformInference.InferTransforms(_env, data, args, _dataRoles);

// Retain only those transforms inferred which were also passed in.
if (existingTransforms != null)
return levelTransforms.Where(t => existingTransforms.Any(t2 => t2.Equals(t))).ToArray();
return levelTransforms;
}

public void InferSearchSpace(int numTransformLevels)
public void InferSearchSpace(int numTransformLevels, RoleMappedData dataRoles = null)
{
var learners = RecipeInference.AllowedLearners(_env, TrainerKind).ToArray();
if (_requestedLearners != null && _requestedLearners.Length > 0)
learners = learners.Where(l => _requestedLearners.Contains(l.LearnerName)).ToArray();

_dataRoles = dataRoles;
ComputeSearchSpace(numTransformLevels, learners, (b, c) => InferAndFilter(b, c));
}

Expand Down Expand Up @@ -536,7 +539,21 @@ public PipelinePattern[] GetNextCandidates(int numberOfCandidates)
var currentBatchSize = numberOfCandidates;
if (_terminator is IterationTerminator itr)
currentBatchSize = Math.Min(itr.RemainingIterations(_history), numberOfCandidates);
BatchCandidates = AutoMlEngine.GetNextCandidates(_sortedSampledElements.Select(kvp => kvp.Value), currentBatchSize);
BatchCandidates = AutoMlEngine.GetNextCandidates(_sortedSampledElements.Select(kvp => kvp.Value), currentBatchSize, _dataRoles);

using (var ch = _host.Start("Suggested Pipeline"))
{
foreach (var pipeline in BatchCandidates)
{
ch.Info($"AutoInference Pipeline Id : {pipeline.UniqueId}");
foreach (var transform in pipeline.Transforms)
{
ch.Info($"AutoInference Transform : {transform.Transform}");
}
ch.Info($"AutoInference Learner : {pipeline.Learner}");
}
}

return BatchCandidates;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.EntryPoints;
using Microsoft.ML.Runtime.PipelineInference;

Expand Down Expand Up @@ -33,9 +34,10 @@ public DefaultsEngine(IHostEnvironment env, Arguments args)
_currentLearnerIndex = 0;
}

public override PipelinePattern[] GetNextCandidates(IEnumerable<PipelinePattern> history, int numCandidates)
public override PipelinePattern[] GetNextCandidates(IEnumerable<PipelinePattern> history, int numCandidates, RoleMappedData dataRoles)
{
var candidates = new List<PipelinePattern>();
DataRoles = dataRoles;

while (candidates.Count < numCandidates)
{
Expand All @@ -53,7 +55,8 @@ public override PipelinePattern[] GetNextCandidates(IEnumerable<PipelinePattern>

do
{ // Make sure transforms set is valid. Repeat until passes verifier.
pipeline = new PipelinePattern(SampleTransforms(out var transformsBitMask), learner, "", Env);
pipeline = new PipelinePattern(SampleTransforms(out var transformsBitMask),
learner, "", Env);
valid = PipelineVerifier(pipeline, transformsBitMask);
count++;
} while (!valid && count <= 1000);
Expand All @@ -77,7 +80,7 @@ private TransformInference.SuggestedTransform[] SampleTransforms(out long transf

// Add final features concat transform.
sampledTransforms.AddRange(AutoMlUtils.GetFinalFeatureConcat(Env, FullyTransformedData,
DependencyMapping, sampledTransforms.ToArray(), AvailableTransforms));
DependencyMapping, sampledTransforms.ToArray(), AvailableTransforms, DataRoles));

return sampledTransforms.ToArray();
}
Expand Down
20 changes: 12 additions & 8 deletions src/Microsoft.ML.PipelineInference/AutoMlEngines/RocketEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ private TransformInference.SuggestedTransform[] SampleTransforms(RecipeInference
// cause an error in verification, since it isn't included in the original
// dependency mapping (i.e., its level isn't in the dictionary).
sampledTransforms.AddRange(AutoMlUtils.GetFinalFeatureConcat(Env, FullyTransformedData,
DependencyMapping, sampledTransforms.ToArray(), AvailableTransforms));
DependencyMapping, sampledTransforms.ToArray(), AvailableTransforms, DataRoles));
transformsBitMask = mask;

return sampledTransforms.ToArray();
Expand All @@ -202,9 +202,10 @@ private RecipeInference.SuggestedRecipe.SuggestedLearner[] GetTopLearners(IEnume
.Select(t=>AvailableLearners[t.Index]).ToArray();
}

public override PipelinePattern[] GetNextCandidates(IEnumerable<PipelinePattern> history, int numCandidates)
public override PipelinePattern[] GetNextCandidates(IEnumerable<PipelinePattern> history, int numCandidates, RoleMappedData dataRoles)
{
var prevCandidates = history.ToArray();
DataRoles = dataRoles;

switch (_currentStage)
{
Expand All @@ -220,7 +221,7 @@ public override PipelinePattern[] GetNextCandidates(IEnumerable<PipelinePattern>
// number of candidates, using second stage logic.
UpdateLearners(GetTopLearners(prevCandidates));
_currentStage++;
return GetNextCandidates(prevCandidates, numCandidates);
return GetNextCandidates(prevCandidates, numCandidates, DataRoles);
}
else
return GetInitialPipelines(prevCandidates, remainingNum);
Expand Down Expand Up @@ -252,9 +253,11 @@ public override PipelinePattern[] GetNextCandidates(IEnumerable<PipelinePattern>
}
}

private PipelinePattern[] GetInitialPipelines(IEnumerable<PipelinePattern> history, int numCandidates) =>
_secondaryEngines[_randomInit ? nameof(UniformRandomEngine) : nameof(DefaultsEngine)]
.GetNextCandidates(history, numCandidates);
private PipelinePattern[] GetInitialPipelines(IEnumerable<PipelinePattern> history, int numCandidates)
{
var engine = _secondaryEngines[_randomInit ? nameof(UniformRandomEngine) : nameof(DefaultsEngine)];
return engine.GetNextCandidates(history, numCandidates, DataRoles);
}

private PipelinePattern[] NextCandidates(PipelinePattern[] history, int numCandidates,
bool defaultHyperParams = false, bool uniformRandomTransforms = false)
Expand Down Expand Up @@ -294,8 +297,9 @@ private PipelinePattern[] NextCandidates(PipelinePattern[] history, int numCandi
do
{ // Make sure transforms set is valid and have not seen pipeline before.
// Repeat until passes or runs out of chances.
pipeline = new PipelinePattern(SampleTransforms(learner, history,
out var transformsBitMask, uniformRandomTransforms), learner, "", Env);
pipeline = new PipelinePattern(
SampleTransforms(learner, history, out var transformsBitMask, uniformRandomTransforms),
learner, "", Env);
hashKey = GetHashKey(transformsBitMask, learner);
valid = PipelineVerifier(pipeline, transformsBitMask) && !VisitedPipelines.Contains(hashKey);
count++;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.EntryPoints;
using Microsoft.ML.Runtime.PipelineInference;

Expand All @@ -30,8 +31,9 @@ public UniformRandomEngine(IHostEnvironment env)
: base(env, env.Register("UniformRandomEngine(AutoML)"))
{}

public override PipelinePattern[] GetNextCandidates(IEnumerable<PipelinePattern> history, int numberOfCandidates)
public override PipelinePattern[] GetNextCandidates(IEnumerable<PipelinePattern> history, int numberOfCandidates, RoleMappedData dataRoles)
{
DataRoles = dataRoles;
return GetRandomPipelines(numberOfCandidates);
}

Expand Down Expand Up @@ -66,7 +68,7 @@ private PipelinePattern[] GetRandomPipelines(int numOfPipelines)

// Always include features concat transform
selectedTransforms.AddRange(AutoMlUtils.GetFinalFeatureConcat(Env, FullyTransformedData,
DependencyMapping, selectedTransforms.ToArray(), AvailableTransforms));
DependencyMapping, selectedTransforms.ToArray(), AvailableTransforms, DataRoles));

// Compute hash key for checking if we've already seen this pipeline.
// However, if we keep missing, don't want to get stuck in infinite loop.
Expand Down
8 changes: 4 additions & 4 deletions src/Microsoft.ML.PipelineInference/AutoMlUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ public static long TransformsToBitmask(TransformInference.SuggestedTransform[] t
/// (In other words, if there would be nothing for that concatenate transform to do.)
/// </summary>
private static TransformInference.SuggestedTransform[] GetFinalFeatureConcat(IHostEnvironment env,
IDataView dataSample, int[] excludedColumnIndices, int level, int atomicIdOffset)
IDataView dataSample, int[] excludedColumnIndices, int level, int atomicIdOffset, RoleMappedData dataRoles)
{
var finalArgs = new TransformInference.Arguments
{
Expand All @@ -266,7 +266,7 @@ private static TransformInference.SuggestedTransform[] GetFinalFeatureConcat(IHo
ExcludedColumnIndices = excludedColumnIndices
};

var featuresConcatTransforms = TransformInference.InferConcatNumericFeatures(env, dataSample, finalArgs);
var featuresConcatTransforms = TransformInference.InferConcatNumericFeatures(env, dataSample, finalArgs, dataRoles);

for (int i = 0; i < featuresConcatTransforms.Length; i++)
{
Expand All @@ -282,7 +282,7 @@ private static TransformInference.SuggestedTransform[] GetFinalFeatureConcat(IHo
/// </summary>
public static TransformInference.SuggestedTransform[] GetFinalFeatureConcat(IHostEnvironment env, IDataView data,
AutoInference.DependencyMap dependencyMapping, TransformInference.SuggestedTransform[] selectedTransforms,
TransformInference.SuggestedTransform[] allTransforms)
TransformInference.SuggestedTransform[] allTransforms, RoleMappedData dataRoles)
{
int level = 1;
int atomicGroupLimit = 0;
Expand All @@ -292,7 +292,7 @@ public static TransformInference.SuggestedTransform[] GetFinalFeatureConcat(IHos
atomicGroupLimit = allTransforms.Max(t => t.AtomicGroupId) + 1;
}
var excludedColumnIndices = GetExcludedColumnIndices(selectedTransforms, data, dependencyMapping);
return GetFinalFeatureConcat(env, data, excludedColumnIndices, level, atomicGroupLimit);
return GetFinalFeatureConcat(env, data, excludedColumnIndices, level, atomicGroupLimit, dataRoles);
}

public static IDataView ApplyTransformSet(IHostEnvironment env, IDataView data, TransformInference.SuggestedTransform[] transforms)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ namespace Microsoft.ML.Runtime.PipelineInference
/// </summary>
public interface IPipelineOptimizer
{
PipelinePattern[] GetNextCandidates(IEnumerable<PipelinePattern> history, int numberOfCandidates);
PipelinePattern[] GetNextCandidates(IEnumerable<PipelinePattern> history, int numberOfCandidates, RoleMappedData dataRoles);

void SetSpace(TransformInference.SuggestedTransform[] availableTransforms,
RecipeInference.SuggestedRecipe.SuggestedLearner[] availableLearners,
Expand All @@ -44,6 +44,7 @@ public abstract class PipelineOptimizerBase : IPipelineOptimizer
protected IDataView OriginalData;
protected IDataView FullyTransformedData;
protected AutoInference.DependencyMap DependencyMapping;
protected RoleMappedData DataRoles;
protected readonly IHostEnvironment Env;
protected readonly IHost Host;
protected readonly Dictionary<long, bool> TransformsMaskValidity;
Expand All @@ -60,7 +61,7 @@ protected PipelineOptimizerBase(IHostEnvironment env, IHost host)
ProbUtils = new SweeperProbabilityUtils(host);
}

public abstract PipelinePattern[] GetNextCandidates(IEnumerable<PipelinePattern> history, int numberOfCandidates);
public abstract PipelinePattern[] GetNextCandidates(IEnumerable<PipelinePattern> history, int numberOfCandidates, RoleMappedData dataRoles);

public virtual void SetSpace(TransformInference.SuggestedTransform[] availableTransforms,
RecipeInference.SuggestedRecipe.SuggestedLearner[] availableLearners,
Expand Down
Loading