Skip to content

Commit

Permalink
Mem leak fix (dotnet#328)
Browse files Browse the repository at this point in the history
* Create test.txt

* Create test.txt

* changes needed for benchmarking

* forgot one file

* merge conflict fix

* fix build break

* back out my version of the fix for Label column issue and fix the original fix

* bogus file removal

* undo SuggestedPipeline change

* remove labelCol from pipeline suggester

* fix build break

* rename AutoML to Microsoft.ML.Auto everywhere and a shot at publishing nuget package (will probably need tweaks once I try to use the pipleline)

* tweak queue in vsts-ci.yml

* there is still investigation to be done but this fix works and solves memory leak problems

* minor refactor
  • Loading branch information
Dmitry-A authored Apr 2, 2019
1 parent 93cf2d3 commit 6a752a5
Showing 1 changed file with 26 additions and 22 deletions.
48 changes: 26 additions & 22 deletions src/Microsoft.ML.Auto/ColumnInference/PurposeInference.cs
Original file line number Diff line number Diff line change
Expand Up @@ -47,21 +47,20 @@ private class IntermediateColumn
{
private readonly IDataView _data;
private readonly int _columnId;
private bool _isPurposeSuggested;
private ColumnPurpose _suggestedPurpose;
private readonly Lazy<DataViewType> _type;
private readonly Lazy<string> _columnName;
private object _cachedData;
private IReadOnlyList<ReadOnlyMemory<char>> _cachedData;

public bool IsPurposeSuggested { get { return _isPurposeSuggested; } }
public bool IsPurposeSuggested { get; private set; }

public ColumnPurpose SuggestedPurpose
{
get { return _suggestedPurpose; }
set
{
_suggestedPurpose = value;
_isPurposeSuggested = true;
IsPurposeSuggested = true;
}
}

Expand All @@ -83,26 +82,30 @@ public Column GetColumn()
return new Column(_columnId, _suggestedPurpose);
}

public T[] GetData<T>()
public IReadOnlyList<ReadOnlyMemory<char>> GetColumnData()
{
if (_cachedData is T[])
return _cachedData as T[];
if (_cachedData != null)
return _cachedData;

var results = new List<ReadOnlyMemory<char>>();

var results = new List<T>();
using (var cursor = _data.GetRowCursor(new[] { _data.Schema[_columnId] }))
{
var getter = cursor.GetGetter<T>(_columnId);
var getter = cursor.GetGetter<ReadOnlyMemory<char>>(_columnId);
while (cursor.MoveNext())
{
T value = default(T);
var value = default(ReadOnlyMemory<char>);
getter(ref value);
results.Add(value);

var copy = new ReadOnlyMemory<char>(value.ToArray());

results.Add(copy);
}
}

T[] resultArray;
_cachedData = resultArray = results.ToArray();
return resultArray;
_cachedData = results;

return results;
}
}

Expand All @@ -117,7 +120,8 @@ public void Apply(IntermediateColumn[] columns)
{
if (column.IsPurposeSuggested || !column.Type.IsText())
continue;
var data = column.GetData<ReadOnlyMemory<char>>();

var data = column.GetColumnData();

long sumLength = 0;
int sumSpaces = 0;
Expand All @@ -140,11 +144,11 @@ public void Apply(IntermediateColumn[] columns)
}
}

if (imagePathCount < data.Length - 1)
if (imagePathCount < data.Count - 1)
{
Double avgLength = 1.0 * sumLength / data.Length;
Double cardinalityRatio = 1.0 * seen.Count / data.Length;
Double avgSpaces = 1.0 * sumSpaces / data.Length;
Double avgLength = 1.0 * sumLength / data.Count;
Double cardinalityRatio = 1.0 * seen.Count / data.Count;
Double avgSpaces = 1.0 * sumSpaces / data.Count;
if (cardinalityRatio < 0.7)
column.SuggestedPurpose = ColumnPurpose.CategoricalFeature;
// (note: the columns.Count() == 1 condition below, in case a dataset has only
Expand Down Expand Up @@ -218,7 +222,7 @@ public void Apply(IntermediateColumn[] columns)
private static IEnumerable<IPurposeInferenceExpert> GetExperts()
{
// Each of the experts respects the decisions of all the experts above.

// Single-value text columns may be category, name, text or ignore.
yield return new Experts.TextClassification();
// Vector-value text columns are always treated as text.
Expand Down Expand Up @@ -248,15 +252,15 @@ public static PurposeInference.Column[] InferPurposes(MLContext context, IDataVi
var column = data.Schema[i];
IntermediateColumn intermediateCol;

if(column.IsHidden)
if (column.IsHidden)
{
intermediateCol = new IntermediateColumn(data, i, ColumnPurpose.Ignore);
allColumns.Add(intermediateCol);
continue;
}

var columnPurpose = columnInfo.GetColumnPurpose(column.Name);
if(columnPurpose == null)
if (columnPurpose == null)
{
intermediateCol = new IntermediateColumn(data, i);
columnsToInfer.Add(intermediateCol);
Expand Down

0 comments on commit 6a752a5

Please sign in to comment.