From 7ea51d8bea5236b29abb0c6972e5d63c88073f8b Mon Sep 17 00:00:00 2001 From: klausmh Date: Thu, 14 May 2020 20:57:30 -0700 Subject: [PATCH 01/11] Draft PR for SrCnn batch detection API interface (#1) * POC Batch transform * SrCnn batch interface * Removed comment * Handled some APIreview comments. * Handled other review comments. * Resolved review comments. Added sample. Co-authored-by: Yael Dekel --- .../DetectAnomalyBySrCnnBatchPrediction.cs | 56 ++--- .../DataView/BatchDataViewMapperBase.cs | 172 +++++++++++++++ .../ExtensionsCatalog.cs | 25 ++- .../SrCnnBatchAnomalyDetection.cs | 196 ++++++++++++++++++ .../TimeSeriesDirectApi.cs | 34 ++- 5 files changed, 439 insertions(+), 44 deletions(-) create mode 100644 src/Microsoft.ML.Data/DataView/BatchDataViewMapperBase.cs create mode 100644 src/Microsoft.ML.TimeSeries/SrCnnBatchAnomalyDetection.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs index 5ef334bc76..493673e2af 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using Microsoft.ML; using Microsoft.ML.Data; +using Microsoft.ML.TimeSeries; namespace Samples.Dynamic { @@ -29,69 +30,44 @@ public static void Example() // Convert data to IDataView. var dataView = ml.Data.LoadFromEnumerable(data); - // Setup the estimator arguments + + // Setup the detection arguments string outputColumnName = nameof(SrCnnAnomalyDetection.Prediction); string inputColumnName = nameof(TimeSeriesData.Value); - // The transformed data. - var transformedData = ml.Transforms.DetectAnomalyBySrCnn( - outputColumnName, inputColumnName, 16, 5, 5, 3, 8, 0.35).Fit( - dataView).Transform(dataView); + // Do batch anomaly detection + var outputDataView = ml.Data.BatchDetectAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, batchSize: 512, sensitivity: 70, detectMode: SrCnnDetectMode.AnomalyAndMargin); // Getting the data of the newly created column as an IEnumerable of // SrCnnAnomalyDetection. var predictionColumn = ml.Data.CreateEnumerable( - transformedData, reuseRowObject: false); + outputDataView, reuseRowObject: false); Console.WriteLine($"{outputColumnName} column obtained post-" + $"transformation."); - Console.WriteLine("Data\tAlert\tScore\tMag"); + Console.WriteLine("Data\tAlert\tScore\tMag\tExpectedValue\tBoundaryUnit\tUpperBoundary\tLowerBoundary"); int k = 0; foreach (var prediction in predictionColumn) PrintPrediction(data[k++].Value, prediction); //Prediction column obtained post-transformation. - //Data Alert Score Mag - //5 0 0.00 0.00 - //5 0 0.00 0.00 - //5 0 0.00 0.00 - //5 0 0.00 0.00 - //5 0 0.00 0.00 - //5 0 0.00 0.00 - //5 0 0.00 0.00 - //5 0 0.00 0.00 - //5 0 0.00 0.00 - //5 0 0.00 0.00 - //5 0 0.00 0.00 - //5 0 0.00 0.00 - //5 0 0.00 0.00 - //5 0 0.00 0.00 - //5 0 0.00 0.00 - //5 0 0.03 0.18 - //5 0 0.03 0.18 - //5 0 0.03 0.18 - //5 0 0.03 0.18 - //5 0 0.03 0.18 - //10 1 0.47 0.93 - //5 0 0.31 0.50 - //5 0 0.05 0.30 - //5 0 0.01 0.23 - //5 0 0.00 0.21 - //5 0 0.01 0.25 + //Data Alert Score Mag ExpectedValue BoundaryUnit UpperBoundary LowerBoundary + // TODO: update with actual output from SrCnn } - private static void PrintPrediction(float value, SrCnnAnomalyDetection + private static void PrintPrediction(double value, SrCnnAnomalyDetection prediction) => - Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, prediction - .Prediction[0], prediction.Prediction[1], prediction.Prediction[2]); + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}\t{5:0.00}\t{6:0.00}", value, + prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2], + prediction.Prediction[3], prediction.Prediction[4], prediction.Prediction[5]); private class TimeSeriesData { - public float Value; + public double Value; - public TimeSeriesData(float value) + public TimeSeriesData(double value) { Value = value; } @@ -99,7 +75,7 @@ public TimeSeriesData(float value) private class SrCnnAnomalyDetection { - [VectorType(3)] + [VectorType(6)] public double[] Prediction { get; set; } } } diff --git a/src/Microsoft.ML.Data/DataView/BatchDataViewMapperBase.cs b/src/Microsoft.ML.Data/DataView/BatchDataViewMapperBase.cs new file mode 100644 index 0000000000..e2d49abcbb --- /dev/null +++ b/src/Microsoft.ML.Data/DataView/BatchDataViewMapperBase.cs @@ -0,0 +1,172 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.ML.Runtime; + +namespace Microsoft.ML.Data.DataView +{ + internal abstract class BatchDataViewMapperBase : IDataView + { + public bool CanShuffle => false; + + public DataViewSchema Schema => SchemaBindings.AsSchema; + + private readonly IDataView _source; + private readonly IHost _host; + + protected BatchDataViewMapperBase(IHostEnvironment env, string registrationName, IDataView input) + { + _host = env.Register(registrationName); + _source = input; + } + + public long? GetRowCount() => _source.GetRowCount(); + + public DataViewRowCursor GetRowCursor(IEnumerable columnsNeeded, Random rand = null) + { + _host.CheckValue(columnsNeeded, nameof(columnsNeeded)); + _host.CheckValueOrNull(rand); + + var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, SchemaBindings.AsSchema); + + // If we aren't selecting any of the output columns, don't construct our cursor. + // Note that because we cannot support random due to the inherently + // stratified nature, neither can we allow the base data to be shuffled, + // even if it supports shuffling. + if (!SchemaBindings.AnyNewColumnsActive(predicate)) + { + var activeInput = SchemaBindings.GetActiveInput(predicate); + var inputCursor = _source.GetRowCursor(_source.Schema.Where(c => activeInput[c.Index]), null); + return new BindingsWrappedRowCursor(_host, inputCursor, SchemaBindings); + } + var active = SchemaBindings.GetActive(predicate); + Contracts.Assert(active.Length == SchemaBindings.ColumnCount); + + // REVIEW: We can get a different input predicate for the input cursor and for the lookahead cursor. The lookahead + // cursor is only used for getting the values from the input column, so it only needs that column activated. The + // other cursor is used to get source columns, so it needs the rest of them activated. + var predInput = GetSchemaBindingDependencies(predicate); + var inputCols = _source.Schema.Where(c => predInput(c.Index)); + return new Cursor(this, _source.GetRowCursor(inputCols), _source.GetRowCursor(inputCols), active); + } + + public DataViewRowCursor[] GetRowCursorSet(IEnumerable columnsNeeded, int n, Random rand = null) + { + return new[] { GetRowCursor(columnsNeeded, rand) }; + } + + protected abstract ColumnBindingsBase SchemaBindings { get; } + protected abstract TBatch InitializeBatch(DataViewRowCursor input); + protected abstract void ProcessBatch(TBatch currentBatch); + protected abstract void ProcessExample(TBatch currentBatch, TInput currentInput); + protected abstract Func GetLastInBatchDelegate(DataViewRowCursor lookAheadCursor); + protected abstract Func GetIsNewBatchDelegate(DataViewRowCursor lookAheadCursor); + protected abstract ValueGetter GetLookAheadGetter(DataViewRowCursor lookAheadCursor); + protected abstract Delegate[] CreateGetters(DataViewRowCursor input, TBatch currentBatch, bool[] active); + protected abstract Func GetSchemaBindingDependencies(Func predicate); + + private sealed class Cursor : RootCursorBase + { + private readonly BatchDataViewMapperBase _parent; + private readonly DataViewRowCursor _lookAheadCursor; + private readonly DataViewRowCursor _input; + + private readonly bool[] _active; + private readonly Delegate[] _getters; + + private readonly TBatch _currentBatch; + private readonly Func _lastInBatchInLookAheadCursorDel; + private readonly Func _firstInBatchInInputCursorDel; + private readonly ValueGetter _inputGetterInLookAheadCursor; + private TInput _currentInput; + + public override long Batch => 0; + + public override DataViewSchema Schema => _parent.Schema; + + public Cursor(BatchDataViewMapperBase parent, DataViewRowCursor input, DataViewRowCursor lookAheadCursor, bool[] active) + : base(parent._host) + { + _parent = parent; + _input = input; + _lookAheadCursor = lookAheadCursor; + _active = active; + + _currentBatch = _parent.InitializeBatch(_input); + + _getters = _parent.CreateGetters(_input, _currentBatch, _active); + + _lastInBatchInLookAheadCursorDel = _parent.GetLastInBatchDelegate(_lookAheadCursor); + _firstInBatchInInputCursorDel = _parent.GetIsNewBatchDelegate(_input); + _inputGetterInLookAheadCursor = _parent.GetLookAheadGetter(_lookAheadCursor); + } + + public override ValueGetter GetGetter(DataViewSchema.Column column) + { + Contracts.CheckParam(IsColumnActive(column), nameof(column), "requested column is not active"); + + var col = _parent.SchemaBindings.MapColumnIndex(out bool isSrc, column.Index); + if (isSrc) + { + Contracts.AssertValue(_input); + return _input.GetGetter(_input.Schema[col]); + } + + Ch.AssertValue(_getters); + var getter = _getters[col]; + Ch.Assert(getter != null); + var fn = getter as ValueGetter; + if (fn == null) + throw Ch.Except("Invalid TValue in GetGetter: '{0}'", typeof(TValue)); + return fn; + } + + public override ValueGetter GetIdGetter() + { + return + (ref DataViewRowId val) => + { + Ch.Check(IsGood, "Cannot call ID getter in current state"); + val = new DataViewRowId((ulong)Position, 0); + }; + } + + public override bool IsColumnActive(DataViewSchema.Column column) + { + Ch.Check(column.Index < _parent.SchemaBindings.AsSchema.Count); + return _active[column.Index]; + } + + protected override bool MoveNextCore() + { + if (!_input.MoveNext()) + return false; + if (!_firstInBatchInInputCursorDel()) + return true; + + // If we are here, this means that _input.MoveNext() has gotten us to the beginning of the next batch, + // so now we need to look ahead at the entire next batch in the _lookAheadCursor. + // The _lookAheadCursor's position should be on the last row of the previous batch (or -1). + Ch.Assert(_lastInBatchInLookAheadCursorDel()); + + var good = _lookAheadCursor.MoveNext(); + // The two cursors should have the same number of elements, so if _input.MoveNext() returned true, + // then it must return true here too. + Ch.Assert(good); + + do + { + _inputGetterInLookAheadCursor(ref _currentInput); + _parent.ProcessExample(_currentBatch, _currentInput); + } while (!_lastInBatchInLookAheadCursorDel() && _lookAheadCursor.MoveNext()); + + _parent.ProcessBatch(_currentBatch); + return true; + } + } + } +} diff --git a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs index 2cc2aa3b0a..abb411f8e9 100644 --- a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs @@ -2,8 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System; -using System.Reflection; using Microsoft.ML.Data; using Microsoft.ML.Runtime; using Microsoft.ML.TimeSeries; @@ -150,6 +148,29 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog int windowSize = 64, int backAddWindowSize = 5, int lookaheadWindowSize = 5, int averageingWindowSize = 3, int judgementWindowSize = 21, double threshold = 0.3) => new SrCnnAnomalyEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, windowSize, backAddWindowSize, lookaheadWindowSize, averageingWindowSize, judgementWindowSize, threshold, inputColumnName); + /// + /// Create , which detects timeseries anomalies using SRCNN algorithm. + /// + /// The transform's catalog. + /// ... + /// Name of the column resulting from the transformation of . + /// The column data is a vector of . The vector contains 3 elements: alert (1 means anomaly while 0 means normal), raw score, and magnitude of spectual residual. + /// Name of column to transform. The column data must be . + /// The threshold to determine anomaly, score larger than the threshold is considered as anomaly. Should be in (0,1) + /// .Divide the input data into batches to fit SrCnn model. Must be -1 or a positive integer no less than 12. Default value is 1024. + /// The sensitivity of boundaries. Must be in the interval (0, 100). + /// The detect mode of the SrCnn model. + /// + /// + /// + /// + /// + public static IDataView BatchDetectAnomalyBySrCnn(this DataOperationsCatalog catalog, IDataView input, string outputColumnName, string inputColumnName, + double threshold = 0.3, int batchSize = 1024, double sensitivity = 99, SrCnnDetectMode detectMode = SrCnnDetectMode.AnomalyAndMargin) + => new SrCnnBatchAnomalyDetector(CatalogUtils.GetEnvironment(catalog), input, inputColumnName, outputColumnName, threshold, batchSize, sensitivity, detectMode); + /// /// Create , which localizes root causes using decision tree algorithm. /// diff --git a/src/Microsoft.ML.TimeSeries/SrCnnBatchAnomalyDetection.cs b/src/Microsoft.ML.TimeSeries/SrCnnBatchAnomalyDetection.cs new file mode 100644 index 0000000000..e724a9990e --- /dev/null +++ b/src/Microsoft.ML.TimeSeries/SrCnnBatchAnomalyDetection.cs @@ -0,0 +1,196 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.ML.Data; +using Microsoft.ML.Data.DataView; +using Microsoft.ML.Numeric; +using Microsoft.ML.Runtime; + +namespace Microsoft.ML.TimeSeries +{ + /// + /// The detect modes of SrCnn models. + /// + public enum SrCnnDetectMode + { + /// + /// In this mode, output (IsAnomaly, RawScore, Mag). + /// + AnomalyOnly = 0, + + /// + /// In this mode, output (IsAnomaly, AnomalyScore, Mag, ExpectedValue, BoundaryUnit, UpperBoundary, LowerBoundary). + /// + AnomalyAndMargin = 1, + + /// + /// In this mode, output (IsAnomaly, RawScore, Mag, ExpectedValue). + /// + AnomalyAndExpectedValue = 2 + } + + // TODO: SrCnn + internal sealed class SrCnnBatchAnomalyDetector : BatchDataViewMapperBase + { + private const int MinBatchSize = 12; + private readonly int _batchSize; + private readonly string _inputColumnName; + private readonly SrCnnDetectMode _detectMode; + + private class Bindings : ColumnBindingsBase + { + private readonly VectorDataViewType _outputColumnType; + private readonly int _inputColumnIndex; + + public Bindings(DataViewSchema input, string inputColumnName, string outputColumnName, VectorDataViewType outputColumnType) + : base(input, true, outputColumnName) + { + _outputColumnType = outputColumnType; + _inputColumnIndex = Input[inputColumnName].Index; + } + + protected override DataViewType GetColumnTypeCore(int iinfo) + { + Contracts.Check(iinfo == 0); + return _outputColumnType; + } + + // Get a predicate for the input columns. + public Func GetDependencies(Func predicate) + { + Contracts.AssertValue(predicate); + + var active = new bool[Input.Count]; + for (int col = 0; col < ColumnCount; col++) + { + if (!predicate(col)) + continue; + + bool isSrc; + int index = MapColumnIndex(out isSrc, col); + if (isSrc) + active[index] = true; + else + active[_inputColumnIndex] = true; + } + + return col => 0 <= col && col < active.Length && active[col]; + } + } + + public SrCnnBatchAnomalyDetector(IHostEnvironment env, IDataView input, string inputColumnName, string outputColumnName, double threshold, int batchSize, double sensitivity, SrCnnDetectMode detectMode) + : base(env, "SrCnnBatchAnomalyDetector", input) + { + + Contracts.CheckParam(batchSize >= MinBatchSize, nameof(batchSize), "batch size is too small"); + _detectMode = detectMode; + int outputSize = 6; // TODO: determine based on detectMode + SchemaBindings = new Bindings(input.Schema, inputColumnName, outputColumnName, new VectorDataViewType(NumberDataViewType.Double, outputSize)); + _batchSize = batchSize; + _inputColumnName = inputColumnName; + } + + protected override ColumnBindingsBase SchemaBindings { get; } + + protected override Delegate[] CreateGetters(DataViewRowCursor input, Batch currentBatch, bool[] active) + { + if (!SchemaBindings.AnyNewColumnsActive(x => active[x])) + return new Delegate[1]; + return new[] { currentBatch.CreateGetter(input, _inputColumnName) }; + } + + protected override Batch InitializeBatch(DataViewRowCursor input) => new Batch(_batchSize, _detectMode); + + protected override Func GetIsNewBatchDelegate(DataViewRowCursor input) + { + return () => input.Position % _batchSize == 0; + } + + protected override Func GetLastInBatchDelegate(DataViewRowCursor input) + { + return () => (input.Position + 1) % _batchSize == 0; + } + + protected override ValueGetter GetLookAheadGetter(DataViewRowCursor input) + { + return input.GetGetter(input.Schema[_inputColumnName]); + } + + protected override Func GetSchemaBindingDependencies(Func predicate) + { + return (SchemaBindings as Bindings).GetDependencies(predicate); + } + + protected override void ProcessExample(Batch currentBatch, double currentInput) + { + currentBatch.AddValue(currentInput); + } + + protected override void ProcessBatch(Batch currentBatch) + { + currentBatch.Process(); + currentBatch.Reset(); + } + + public sealed class Batch + { + private List _previousBatch; + private List _batch; + private readonly SrCnnDetectMode _detectMode; + private double _cursor; + + public Batch(int batchSize, SrCnnDetectMode detectMode) + { + _detectMode = detectMode; + _previousBatch = new List(batchSize); + _batch = new List(batchSize); + } + + public void AddValue(double value) + { + _batch.Add(value); + } + + public int Count => _batch.Count; + + public void Process() + { + // TODO: replace with run of SrCnn + _cursor = _batch.Sum(); + } + + public void Reset() + { + var tempBatch = _previousBatch; + _previousBatch = _batch; + _batch = tempBatch; + _batch.Clear(); + } + + public ValueGetter> CreateGetter(DataViewRowCursor input, string inputCol) + { + ValueGetter srcGetter = input.GetGetter(input.Schema[inputCol]); + ValueGetter> getter = + (ref VBuffer dst) => + { + double src = default; + srcGetter(ref src); + // TODO: replace with SrCnn result + dst = new VBuffer(6, new[] { + src * _cursor, + (src + 1) * _cursor, + (src + 2) * _cursor, + (src + 3) * _cursor, + (src + 4) * _cursor, + (src + 5) * _cursor, + }); + }; + return getter; + } + } + } +} diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index f3f548fd7b..7811e72967 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -5,10 +5,9 @@ using System; using System.Collections.Generic; using System.IO; +using System.Linq; using Microsoft.ML.Data; using Microsoft.ML.TestFramework; -using Microsoft.ML.TestFramework.Attributes; -using Microsoft.ML.TestFrameworkCommon; using Microsoft.ML.TimeSeries; using Microsoft.ML.Transforms.TimeSeries; using Xunit; @@ -571,6 +570,37 @@ public void AnomalyDetectionWithSrCnn(bool loadDataFromFile) } } + [Fact] + public void TestSrCnnBatchAnomalyDetector() + { + // TODO: delete/replace with SrCnn tests + var ml = new MLContext(1); + var bldr = new ArrayDataViewBuilder(ml); + bldr.AddColumn("Input", NumberDataViewType.Double, new[] { 1.0, 2.0, 3.0, 2.0, 3.0, 4.0, 3.0, 4.0, 5.0, 4.0, 6.0, 7.0, 1.0, }); + var input = bldr.GetDataView(); + var output = new SrCnnBatchAnomalyDetector( + ml, + input, + "Input", + "Output", + 0.3, + 12, + 99, + SrCnnDetectMode.AnomalyAndExpectedValue); + var batchTransformOutput = ml.Data.CreateEnumerable(output, reuseRowObject: false).ToList(); + + var inputs = batchTransformOutput.Select(e => e.Input); + var outputs = batchTransformOutput.Select(e => e.Output); + } + + private class BatchTransformOutput + { + public double Input { get; set; } + + [VectorType] + public double[] Output { get; set; } + } + [Fact] public void RootCauseLocalization() { From 1f8e1e09cbbedd950297df7cdbde8ca70ae436f5 Mon Sep 17 00:00:00 2001 From: Meng Ai Date: Fri, 15 May 2020 20:14:32 +0800 Subject: [PATCH 02/11] Implement SrCnn entire API by function --- .../DetectAnomalyBySrCnnBatchPrediction.cs | 42 +- .../ExtensionsCatalog.cs | 2 +- .../SrCnnBatchAnomalyDetection.cs | 619 +++++++++++++++++- 3 files changed, 614 insertions(+), 49 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs index 493673e2af..ca3818b6eb 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs @@ -19,63 +19,53 @@ public static void Example() var data = new List(); for (int index = 0; index < 20; index++) { - data.Add(new TimeSeriesData(5)); + data.Add(new TimeSeriesData { Value = 5 }); } - data.Add(new TimeSeriesData(10)); + data.Add(new TimeSeriesData { Value = 10 }); for (int index = 0; index < 5; index++) { - data.Add(new TimeSeriesData(5)); + data.Add(new TimeSeriesData { Value = 5 }); } // Convert data to IDataView. var dataView = ml.Data.LoadFromEnumerable(data); - // Setup the detection arguments string outputColumnName = nameof(SrCnnAnomalyDetection.Prediction); string inputColumnName = nameof(TimeSeriesData.Value); // Do batch anomaly detection - var outputDataView = ml.Data.BatchDetectAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, batchSize: 512, sensitivity: 70, detectMode: SrCnnDetectMode.AnomalyAndMargin); + var outputDataView = ml.Data.BatchDetectAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, + threshold:0.35, batchSize: 512, sensitivity: 90.0, detectMode: SrCnnDetectMode.AnomalyAndMargin); // Getting the data of the newly created column as an IEnumerable of // SrCnnAnomalyDetection. var predictionColumn = ml.Data.CreateEnumerable( outputDataView, reuseRowObject: false); - Console.WriteLine($"{outputColumnName} column obtained post-" + - $"transformation."); - - Console.WriteLine("Data\tAlert\tScore\tMag\tExpectedValue\tBoundaryUnit\tUpperBoundary\tLowerBoundary"); + Console.WriteLine("Index\tData\tAnomaly\tAnomalyScore\tMag\tExpectedValue\tBoundaryUnit\tUpperBoundary\tLowerBoundary"); int k = 0; foreach (var prediction in predictionColumn) - PrintPrediction(data[k++].Value, prediction); - - //Prediction column obtained post-transformation. - //Data Alert Score Mag ExpectedValue BoundaryUnit UpperBoundary LowerBoundary - // TODO: update with actual output from SrCnn + { + PrintPrediction(k, data[k].Value, prediction); + k++; + } } - private static void PrintPrediction(double value, SrCnnAnomalyDetection - prediction) => - Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}\t{5:0.00}\t{6:0.00}", value, - prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2], - prediction.Prediction[3], prediction.Prediction[4], prediction.Prediction[5]); + private static void PrintPrediction(int idx, double value, SrCnnAnomalyDetection prediction) => + Console.WriteLine("{0}\t{1:0.00}\t{2}\t\t{3:0.00}\t{4:0.00}\t\t{5:0.00}\t\t{6:0.00}\t\t{7:0.00}\t\t{8:0.00}", + idx, value, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2], + prediction.Prediction[3], prediction.Prediction[4], prediction.Prediction[5], prediction.Prediction[6]); private class TimeSeriesData { - public double Value; - - public TimeSeriesData(double value) - { - Value = value; - } + public double Value { get; set; } } private class SrCnnAnomalyDetection { - [VectorType(6)] + [VectorType] public double[] Prediction { get; set; } } } diff --git a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs index abb411f8e9..497d8aca90 100644 --- a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs @@ -168,7 +168,7 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog /// /// public static IDataView BatchDetectAnomalyBySrCnn(this DataOperationsCatalog catalog, IDataView input, string outputColumnName, string inputColumnName, - double threshold = 0.3, int batchSize = 1024, double sensitivity = 99, SrCnnDetectMode detectMode = SrCnnDetectMode.AnomalyAndMargin) + double threshold = 0.3, int batchSize = 1024, double sensitivity = 99, SrCnnDetectMode detectMode = SrCnnDetectMode.AnomalyOnly) => new SrCnnBatchAnomalyDetector(CatalogUtils.GetEnvironment(catalog), input, inputColumnName, outputColumnName, threshold, batchSize, sensitivity, detectMode); /// diff --git a/src/Microsoft.ML.TimeSeries/SrCnnBatchAnomalyDetection.cs b/src/Microsoft.ML.TimeSeries/SrCnnBatchAnomalyDetection.cs index e724a9990e..86b722c750 100644 --- a/src/Microsoft.ML.TimeSeries/SrCnnBatchAnomalyDetection.cs +++ b/src/Microsoft.ML.TimeSeries/SrCnnBatchAnomalyDetection.cs @@ -9,6 +9,7 @@ using Microsoft.ML.Data.DataView; using Microsoft.ML.Numeric; using Microsoft.ML.Runtime; +using Microsoft.ML.Transforms.TimeSeries; namespace Microsoft.ML.TimeSeries { @@ -37,9 +38,14 @@ public enum SrCnnDetectMode internal sealed class SrCnnBatchAnomalyDetector : BatchDataViewMapperBase { private const int MinBatchSize = 12; + private const int AnomalyOnlyOutputLength = 3; + private const int AnomalyAndExpectedValueOutputLength = 4; + private const int AnomalyAndMarginOutputLength = 7; + private readonly int _batchSize; private readonly string _inputColumnName; - private readonly SrCnnDetectMode _detectMode; + private readonly int _outputLength; + private readonly SrCnnEntireModeler _modler; private class Bindings : ColumnBindingsBase { @@ -85,13 +91,37 @@ public Func GetDependencies(Func predicate) public SrCnnBatchAnomalyDetector(IHostEnvironment env, IDataView input, string inputColumnName, string outputColumnName, double threshold, int batchSize, double sensitivity, SrCnnDetectMode detectMode) : base(env, "SrCnnBatchAnomalyDetector", input) { + Contracts.CheckValue(env, nameof(env)); - Contracts.CheckParam(batchSize >= MinBatchSize, nameof(batchSize), "batch size is too small"); - _detectMode = detectMode; - int outputSize = 6; // TODO: determine based on detectMode - SchemaBindings = new Bindings(input.Schema, inputColumnName, outputColumnName, new VectorDataViewType(NumberDataViewType.Double, outputSize)); - _batchSize = batchSize; + Contracts.CheckValue(inputColumnName, nameof(inputColumnName)); _inputColumnName = inputColumnName; + + env.CheckUserArg(batchSize == -1 || batchSize >= MinBatchSize, nameof(batchSize), "BatchSize must be -1 or no less than 12."); + _batchSize = batchSize; + + env.CheckUserArg(threshold >= 0 && threshold <= 1, nameof(threshold), "Must be in [0,1]."); + env.CheckUserArg(detectMode == SrCnnDetectMode.AnomalyOnly + || detectMode == SrCnnDetectMode.AnomalyAndExpectedValue + || detectMode == SrCnnDetectMode.AnomalyAndMargin, nameof(detectMode), "Invalid detectMode"); + + if (detectMode.Equals(SrCnnDetectMode.AnomalyOnly)) + { + _outputLength = AnomalyOnlyOutputLength; + _modler = new SrCnnEntireModeler(threshold, sensitivity, detectMode, _outputLength); + } + else if (detectMode.Equals(SrCnnDetectMode.AnomalyAndMargin)) + { + env.CheckUserArg(sensitivity >= 0 && sensitivity <= 100, nameof(sensitivity), "Must be in [0,100]."); + _outputLength = AnomalyAndMarginOutputLength; + _modler = new SrCnnEntireModeler(threshold, sensitivity, detectMode, _outputLength); + } + else if (detectMode.Equals(SrCnnDetectMode.AnomalyAndExpectedValue)) + { + _outputLength = AnomalyAndExpectedValueOutputLength; + _modler = new SrCnnEntireModeler(threshold, sensitivity, detectMode, _outputLength); + } + + SchemaBindings = new Bindings(input.Schema, inputColumnName, outputColumnName, new VectorDataViewType(NumberDataViewType.Double, _outputLength)); } protected override ColumnBindingsBase SchemaBindings { get; } @@ -103,7 +133,7 @@ protected override Delegate[] CreateGetters(DataViewRowCursor input, Batch curre return new[] { currentBatch.CreateGetter(input, _inputColumnName) }; } - protected override Batch InitializeBatch(DataViewRowCursor input) => new Batch(_batchSize, _detectMode); + protected override Batch InitializeBatch(DataViewRowCursor input) => new Batch(_batchSize, _outputLength, _modler); protected override Func GetIsNewBatchDelegate(DataViewRowCursor input) { @@ -140,14 +170,18 @@ public sealed class Batch { private List _previousBatch; private List _batch; - private readonly SrCnnDetectMode _detectMode; - private double _cursor; + private readonly int _batchSize; + private readonly int _outputLength; + private SrCnnEntireModeler _modler; + private double[][] _results; - public Batch(int batchSize, SrCnnDetectMode detectMode) + public Batch(int batchSize, int outputLength, SrCnnEntireModeler modeler) { - _detectMode = detectMode; + _batchSize = batchSize; + _outputLength = outputLength; _previousBatch = new List(batchSize); _batch = new List(batchSize); + _modler = modeler; } public void AddValue(double value) @@ -159,8 +193,19 @@ public void AddValue(double value) public void Process() { - // TODO: replace with run of SrCnn - _cursor = _batch.Sum(); + if (_batch.Count < MinBatchSize) + { + if (_previousBatch.Count + _batch.Count < MinBatchSize) + return; + var bLen = _previousBatch.Count - _batch.Count; + _previousBatch = _previousBatch.GetRange(_batch.Count, bLen); + _previousBatch.AddRange(_batch); + _results = _modler.Train(_previousBatch.ToArray()).Skip(bLen).ToArray(); + } + else + { + _results = _modler.Train(_batch.ToArray()); + } } public void Reset() @@ -179,18 +224,548 @@ public ValueGetter> CreateGetter(DataViewRowCursor input, string { double src = default; srcGetter(ref src); - // TODO: replace with SrCnn result - dst = new VBuffer(6, new[] { - src * _cursor, - (src + 1) * _cursor, - (src + 2) * _cursor, - (src + 3) * _cursor, - (src + 4) * _cursor, - (src + 5) * _cursor, - }); + var result = VBufferEditor.Create(ref dst, _outputLength); + for (int i = 0; i < _outputLength; ++i) + { + result.Values[i] = _results[input.Position % _batchSize][i]; + } + dst = result.Commit(); }; return getter; } } + + public sealed class SrCnnEntireModeler + { + private static readonly int _lookaheadWindowSize = 5; + private static readonly int _backAddWindowSize = 5; + private static readonly int _avergingWindowSize = 3; + private static readonly int _judgementWindowSize = 40; + private static readonly double _eps = 1e-8; + private static readonly double _deanomalyThreshold = 0.35; + + // A fixed lookup table which returns factor using sensitivity as index. + // Since Margin = BoundaryUnit * factor, this factor is calculated to make sure Margin == Boundary when sensitivity is 50, + // and increases/decreases exponentially as sensitivity increases/decreases. + // The factor array is generated by formula: + // f(x)=1, if x=50; + // f(x)=f(x+1)*(1.25+0.001*x), if 0<=x<50; + // f(x)=f(x+1)/(1.25+0.001*(x-50)), if 50 _eps) + { + magLogList[i] = Math.Log(magList[i]); + } + else + { + magLogList[i] = 0; + } + } + + // Step 4: Calculate spectral + double[] filteredLogList = AverageFilter(magLogList, _avergingWindowSize); + double[] spectralList = new double[length]; + for (int i = 0; i < length; ++i) + { + spectralList[i] = Math.Exp(magLogList[i] - filteredLogList[i]); + } + + // Step 5: IFFT transformation + double[] transRe = new double[length]; + double[] transIm = new double[length]; + for (int i = 0; i < length; ++i) + { + if (magLogList[i] != 0) + { + transRe[i] = fftRe[i] * spectralList[i] / magList[i]; + transIm[i] = fftIm[i] * spectralList[i] / magList[i]; + } + else + { + transRe[i] = 0; + transIm[i] = 0; + } + } + + double[] ifftRe = new double[length]; + double[] ifftIm = new double[length]; + FftUtils.ComputeBackwardFft(transRe, transIm, ifftRe, ifftIm, length); + + // Step 6: Calculate mag and ave_mag of IFFT + double[] ifftMagList = new double[length]; + for (int i = 0; i < length; ++i) + { + ifftMagList[i] = Math.Sqrt((Math.Pow(ifftRe[i], 2) + Math.Pow(ifftIm[i], 2))); + } + double[] filteredIfftMagList = AverageFilter(ifftMagList, Math.Min(ifftMagList.Length, _judgementWindowSize)); + + // Step 7: Calculate raw score and set result + for (int i = 0; i < results.GetLength(0); ++i) + { + var score = CalculateSocre(ifftMagList[i], filteredIfftMagList[i]); + score /= 10.0f; + score = Math.Min(score, 1); + score = Math.Max(score, 0); + + var detres = score > threshold ? 1 : 0; + + results[i][0] = detres; + results[i][1] = score; + results[i][2] = ifftMagList[i]; + } + } + + private static double[] BackAdd(double[] data) + { + double[] predictArray = new double[_lookaheadWindowSize + 1]; + int j = 0; + for (int i = data.Length - _lookaheadWindowSize - 2; i < data.Length - 1; ++i) + { + predictArray[j] = data[i]; + } + var predictedValue = PredictNext(predictArray); + double[] backAddArray = new double[data.Length + _backAddWindowSize]; + for (int i = 0; i < data.Length; ++i) + { + backAddArray[i] = data[i]; + } + for (int i = 0; i < _backAddWindowSize; ++i) + { + backAddArray[data.Length + i] = predictedValue; + } + return backAddArray; + } + + private static double PredictNext(double[] data) + { + var n = data.Length; + double slopeSum = 0.0f; + for (int i = 0; i < n - 1; ++i) + { + slopeSum += (data[n - 1] - data[i]) / (n - 1 - i); + } + return (data[1] + slopeSum); + } + + private static double[] AverageFilter(double[] data, int n) + { + double cumsum = 0.0f; + int length = data.Length; + double[] cumSumList = new double[length]; + double[] cumSumShift = new double[length]; + + for (int i = 0; i < length; ++i) + { + cumsum += cumSumList[i]; + cumSumList[i] = cumsum; + cumSumShift[i] = cumsum; + } + for (int i = n; i < length; ++i) + { + cumSumList[i] = (cumSumList[i] - cumSumShift[i - n]) / n; + } + for (int i = 1; i < n; ++i) + { + cumSumList[i] /= (i + 1); + } + return cumSumList; + } + + private static double CalculateSocre(double mag, double avgMag) + { + double safeDivisor = avgMag; + if (Math.Abs(safeDivisor) < _eps) + { + safeDivisor = _eps; + } + return (Math.Abs(mag - avgMag) / safeDivisor); + } + + private static void GetExpectedValue(double[] values, double[][] results) + { + //Step 8: Calculate Expected Value + var exps = CalculateExpectedValueByFft(GetDeanomalyData(values, GetAnomalyIndex(results.Select(x => x[1]).ToArray()))); + + for (int i = 0; i < results.Length; ++i) + { + results[i][3] = exps[i]; + } + } + + private static void GetMargin(double[] values, double[][] results, double sensitivity) + { + //Step 8: Calculate Expected Value + var exps = CalculateExpectedValueByFft(GetDeanomalyData(values, GetAnomalyIndex(results.Select(x => x[1]).ToArray()))); + + //Step 9: Calculate Boundary Unit + var units = CalculateBoundaryUnit(values, results.Select(x => x[0] > 0 ? true : false).ToArray()); + + //Step 10: Calculate UpperBound and LowerBound + var margins = units.Select(x => CalculateMargin(x, sensitivity)).ToList(); + + for (int i = 0; i < results.Length; ++i) + { + results[i][3] = exps[i]; + results[i][4] = units[i]; + results[i][5] = exps[i] + margins[i]; + results[i][6] = exps[i] - margins[i]; + //Step 11: Update Anomaly Score + results[i][1] = CalculateAnomalyScore(values[i], exps[i], units[i], results[i][0] > 0); + } + } + + private static int[] GetAnomalyIndex(double[] scores) + { + List anomalyIdxList = new List(); + for (int i = 0; i < scores.Length; ++i) + if (scores[i] > _deanomalyThreshold) + { + anomalyIdxList.Add(i); + } + + return anomalyIdxList.ToArray(); + } + + private static double[] GetDeanomalyData(double[] data, int[] anomalyIdxList) + { + double[] deAnomalyData = (double[])data.Clone(); + int minPointsToFit = 4; + foreach (var idx in anomalyIdxList) + { + int step = 1; + int start = Math.Max(idx - step, 0); + int end = Math.Min(data.Length - 1, idx + step); + + List> fitValues = new List>(); + for (int i = start; i <= end; ++i) + { + if (!anomalyIdxList.Contains(i)) + { + fitValues.Add(new Tuple(i, data[i])); + } + } + + while (fitValues.Count < minPointsToFit && (start > 0 || end < data.Length - 1)) + { + step += 2; + start = Math.Max(idx - step, 0); + end = Math.Min(data.Length - 1, idx + step); + fitValues.Clear(); + for (int i = start; i <= end; ++i) + { + if (!anomalyIdxList.Contains(i)) + { + fitValues.Add(new Tuple(i, data[i])); + } + } + } + + if (fitValues.Count > 1) + { + deAnomalyData[idx] = CalculateInterplate(fitValues, idx); + } + } + + return deAnomalyData; + } + + private static double CalculateInterplate(List> values, int idx) + { + var n = values.Count; + double sumX = values.Sum(item => item.Item1); + double sumY = values.Sum(item => item.Item2); + double sumXX = values.Sum(item => Math.Pow(item.Item1, 2)); + double sumXY = values.Sum(item => item.Item1 * item.Item2); + + var a = ((double)n * sumXY - sumX * sumY) / ((double)n * sumXX - sumX * sumX); + var b = (sumXX * sumY - sumX * sumXY) / ((double)n * sumXX - sumX * sumX); + + return a * (double)idx + b; + } + + private static double[] CalculateExpectedValueByFft(double[] data) + { + int length = data.Length; + double[] fftRe = new double[length]; + double[] fftIm = new double[length]; + FftUtils.ComputeForwardFft(data, Enumerable.Repeat((double)0.0f, length).ToArray(), fftRe, fftIm, length); + + for (int i = 0; i < length; ++i) + { + if (i > (double)length * 3 / 8 && i < (double)length * 5 / 8) + { + fftRe[i] = 0.0f; + fftIm[i] = 0.0f; + } + } + + double[] ifftRe = new double[length]; + double[] ifftIm = new double[length]; + FftUtils.ComputeBackwardFft(fftRe, fftIm, ifftRe, ifftIm, length); + + return ifftRe.Take(length).ToArray(); + } + + private static double[] CalculateBoundaryUnit(double[] data, bool[] isAnomalys) + { + int window = Math.Min(data.Length / 3, 512); + double trendFraction = 0.5; // mix trend and average of trend + double trendSum = 0; + int calculationSize = 0; + + double[] trends = MedianFilter(data, window, true); + for (int i = 0; i < trends.Length; ++i) + { + if (!isAnomalys[i]) + { + trendSum += Math.Abs(trends[i]); + ++calculationSize; + } + } + + double averageTrendPart = 0; + if (calculationSize > 0) + { + averageTrendPart = trendSum / calculationSize * (1 - trendFraction); + } + else + { + trendFraction = 1.0; + } + + double[] units = new double[trends.Length]; + for (int i = 0; i < units.Length; ++i) + { + units[i] = Math.Max(1, averageTrendPart + Math.Abs(trends[i]) * trendFraction); + if (double.IsInfinity(units[i])) + { + throw new ArithmeticException("Not finite unit value"); + } + } + + return units; + } + + private static double[] MedianFilter(double[] data, int window, bool needTwoEnd = false) + { + int wLen = window / 2 * 2 + 1; + int tLen = data.Length; + double[] val = (double[]) data.Clone(); + double[] ans = (double[])data.Clone(); + double[] curWindow = new double[wLen]; + if (tLen < wLen) + { + return ans; + } + + for (int i = 0; i < wLen; i++) + { + int index = i; + int addId = BisectRight(curWindow, 0, i, val[i]); + while (index > addId) + { + curWindow[index] = curWindow[index - 1]; + index -= 1; + } + curWindow[addId] = data[i]; + if (i >= wLen / 2 && needTwoEnd) + ans[i - wLen / 2] = SortedMedian(curWindow, 0, i + 1); + } + + ans[window / 2] = SortedMedian(curWindow, 0, wLen); + + for (int i = window / 2 + 1; i < tLen - window / 2; i++) + { + int deleteId = BisectRight(curWindow, 0, wLen, val[i - window / 2 - 1]) - 1; + int index = deleteId; + while (index < wLen - 1) + { + curWindow[index] = curWindow[index + 1]; + index += 1; + } + int addId = BisectRight(curWindow, 0, wLen - 1, val[i + window / 2]); + index = wLen - 1; + while (index > addId) + { + curWindow[index] = curWindow[index - 1]; + index -= 1; + } + curWindow[addId] = data[i + window / 2]; + ans[i] = SortedMedian(curWindow, 0, wLen); + } + + if (needTwoEnd) + { + for (int i = tLen - window / 2; i < tLen; i++) + { + int deleteId = BisectRight(curWindow, 0, wLen, data[i - window / 2 - 1]) - 1; + int index = deleteId; + while (index < wLen - 1) + { + curWindow[index] = curWindow[index + 1]; + index += 1; + } + wLen -= 1; + ans[i] = SortedMedian(curWindow, 0, wLen); + } + } + + return ans; + } + + private static int BisectRight(double[] arr, int begin, int end, double tar) + { + while (begin < end) + { + int mid = begin + (end - begin) / 2; + if (arr[mid] <= tar) + begin = mid + 1; + else + end = mid; + } + return begin; + } + + private static double SortedMedian(double[] sortedValues, int begin, int end) + { + int n = end - begin; + if (n % 2 == 1) + return sortedValues[begin + n / 2]; + else + { + int mid = begin + n / 2; + return (sortedValues[mid - 1] + sortedValues[mid]) / 2; + } + } + + private static double CalculateMargin(double unit, double sensitivity) + { + if (Math.Floor(sensitivity) == sensitivity) + { + return unit * _factors[(int)sensitivity]; + } + else + { + int lb = (int)sensitivity; + return (_factors[lb + 1] + (_factors[lb] - _factors[lb + 1]) * (1 - sensitivity + lb)) * unit; + } + } + + private static double CalculateAnomalyScore(double value, double exp, double unit, bool isAnomaly) + { + double anomalyScore = 0.0f; + + if (isAnomaly.Equals(false)) + { + return anomalyScore; + } + + double distance = Math.Abs(exp - value); + List margins = new List(); + for (int i = 100; i >= 0; --i) + { + margins.Add(CalculateMargin(unit, i)); + } + + int lb = 0; + int ub = 100; + while (lb < ub) + { + int mid = (lb + ub) / 2; + if (margins[mid] < distance) + { + lb = mid + 1; + } + else + { + ub = mid; + } + } + + if (Math.Abs(margins[lb] - distance) < _eps || lb == 0) + { + anomalyScore = lb; + } + else + { + double lowerMargin = margins[lb - 1]; + double upperMargin = margins[lb]; + anomalyScore = lb - 1 + (distance - lowerMargin) / (upperMargin - lowerMargin); + } + + return anomalyScore / 100.0f; + } + } } } From b8351adda496ed9e01fcc6bd0158b98e2a0653d6 Mon Sep 17 00:00:00 2001 From: Meng Ai Date: Fri, 15 May 2020 21:23:42 +0800 Subject: [PATCH 03/11] Fix bugs and add test --- .../DetectAnomalyBySrCnnBatchPrediction.cs | 27 +++++ .../SrCnnBatchAnomalyDetection.cs | 13 +- .../TimeSeriesDirectApi.cs | 113 ++++++++++++++---- 3 files changed, 124 insertions(+), 29 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs index ca3818b6eb..c3154bdc7e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs @@ -51,6 +51,33 @@ public static void Example() PrintPrediction(k, data[k].Value, prediction); k++; } + //Index Data Anomaly AnomalyScore Mag ExpectedValue BoundaryUnit UpperBoundary LowerBoundary + //0 5.00 0 0.00 0.21 5.00 5.00 5.01 4.99 + //1 5.00 0 0.00 0.11 5.00 5.00 5.01 4.99 + //2 5.00 0 0.00 0.03 5.00 5.00 5.01 4.99 + //3 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 + //4 5.00 0 0.00 0.03 5.00 5.00 5.01 4.99 + //5 5.00 0 0.00 0.06 5.00 5.00 5.01 4.99 + //6 5.00 0 0.00 0.02 5.00 5.00 5.01 4.99 + //7 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 + //8 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 + //9 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 + //10 5.00 0 0.00 0.00 5.00 5.00 5.01 4.99 + //11 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 + //12 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 + //13 5.00 0 0.00 0.02 5.00 5.00 5.01 4.99 + //14 5.00 0 0.00 0.07 5.00 5.00 5.01 4.99 + //15 5.00 0 0.00 0.08 5.00 5.00 5.01 4.99 + //16 5.00 0 0.00 0.02 5.00 5.00 5.01 4.99 + //17 5.00 0 0.00 0.05 5.00 5.00 5.01 4.99 + //18 5.00 0 0.00 0.12 5.00 5.00 5.01 4.99 + //19 5.00 0 0.00 0.17 5.00 5.00 5.01 4.99 + //20 10.00 1 0.50 0.80 5.00 5.00 5.01 4.99 + //21 5.00 0 0.00 0.16 5.00 5.00 5.01 4.99 + //22 5.00 0 0.00 0.11 5.00 5.00 5.01 4.99 + //23 5.00 0 0.00 0.05 5.00 5.00 5.01 4.99 + //24 5.00 0 0.00 0.11 5.00 5.00 5.01 4.99 + //25 5.00 0 0.00 0.19 5.00 5.00 5.01 4.99 } private static void PrintPrediction(int idx, double value, SrCnnAnomalyDetection prediction) => diff --git a/src/Microsoft.ML.TimeSeries/SrCnnBatchAnomalyDetection.cs b/src/Microsoft.ML.TimeSeries/SrCnnBatchAnomalyDetection.cs index 86b722c750..6ab8a53145 100644 --- a/src/Microsoft.ML.TimeSeries/SrCnnBatchAnomalyDetection.cs +++ b/src/Microsoft.ML.TimeSeries/SrCnnBatchAnomalyDetection.cs @@ -97,7 +97,14 @@ public SrCnnBatchAnomalyDetector(IHostEnvironment env, IDataView input, string i _inputColumnName = inputColumnName; env.CheckUserArg(batchSize == -1 || batchSize >= MinBatchSize, nameof(batchSize), "BatchSize must be -1 or no less than 12."); - _batchSize = batchSize; + if (batchSize == -1) + { + _batchSize = (int)input.GetRowCount(); + } + else + { + _batchSize = batchSize; + } env.CheckUserArg(threshold >= 0 && threshold <= 1, nameof(threshold), "Must be in [0,1]."); env.CheckUserArg(detectMode == SrCnnDetectMode.AnomalyOnly @@ -392,7 +399,7 @@ private static double[] BackAdd(double[] data) int j = 0; for (int i = data.Length - _lookaheadWindowSize - 2; i < data.Length - 1; ++i) { - predictArray[j] = data[i]; + predictArray[j++] = data[i]; } var predictedValue = PredictNext(predictArray); double[] backAddArray = new double[data.Length + _backAddWindowSize]; @@ -427,7 +434,7 @@ private static double[] AverageFilter(double[] data, int n) for (int i = 0; i < length; ++i) { - cumsum += cumSumList[i]; + cumsum += data[i]; cumSumList[i] = cumsum; cumSumShift[i] = cumsum; } diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index 7811e72967..a3379961f1 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -86,9 +86,14 @@ public TimeSeriesData(float value) } } + private sealed class TimeSeriesDataDouble + { + public double Value { get; set; } + } + private sealed class SrCnnAnomalyDetection { - [VectorType(3)] + [VectorType] public double[] Prediction { get; set; } } @@ -570,35 +575,91 @@ public void AnomalyDetectionWithSrCnn(bool loadDataFromFile) } } - [Fact] - public void TestSrCnnBatchAnomalyDetector() + [Theory, CombinatorialData] + public void TestSrCnnBatchAnomalyDetector( + [CombinatorialValues(SrCnnDetectMode.AnomalyOnly, SrCnnDetectMode.AnomalyAndExpectedValue, SrCnnDetectMode.AnomalyAndMargin)]SrCnnDetectMode mode, + [CombinatorialValues(true, false)]bool loadDataFromFile) { - // TODO: delete/replace with SrCnn tests var ml = new MLContext(1); - var bldr = new ArrayDataViewBuilder(ml); - bldr.AddColumn("Input", NumberDataViewType.Double, new[] { 1.0, 2.0, 3.0, 2.0, 3.0, 4.0, 3.0, 4.0, 5.0, 4.0, 6.0, 7.0, 1.0, }); - var input = bldr.GetDataView(); - var output = new SrCnnBatchAnomalyDetector( - ml, - input, - "Input", - "Output", - 0.3, - 12, - 99, - SrCnnDetectMode.AnomalyAndExpectedValue); - var batchTransformOutput = ml.Data.CreateEnumerable(output, reuseRowObject: false).ToList(); - - var inputs = batchTransformOutput.Select(e => e.Input); - var outputs = batchTransformOutput.Select(e => e.Output); - } + IDataView dataView; + if (loadDataFromFile) + { + var dataPath = GetDataPath(Path.Combine("Timeseries", "anomaly_detection.csv")); - private class BatchTransformOutput - { - public double Input { get; set; } + // Load data from file into the dataView + dataView = ml.Data.LoadFromTextFile(dataPath, new[] { + new TextLoader.Column("Value", DataKind.Double, 0), + }, hasHeader: true); + } + else + { + // Generate sample series data with an anomaly + var data = new List(); + for (int index = 0; index < 20; index++) + { + data.Add(new TimeSeriesDataDouble { Value = 5 } ); + } + data.Add(new TimeSeriesDataDouble { Value = 10 }); + for (int index = 0; index < 5; index++) + { + data.Add(new TimeSeriesDataDouble { Value = 5 }); + } - [VectorType] - public double[] Output { get; set; } + // Convert data to IDataView. + dataView = ml.Data.LoadFromEnumerable(data); + } + + // Setup the detection arguments + string outputColumnName = nameof(SrCnnAnomalyDetection.Prediction); + string inputColumnName = nameof(TimeSeriesDataDouble.Value); + + // Do batch anomaly detection + var outputDataView = ml.Data.BatchDetectAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, + threshold: 0.35, batchSize: -1, sensitivity: 90.0, mode); + + // Getting the data of the newly created column as an IEnumerable of + // SrCnnAnomalyDetection. + var predictionColumn = ml.Data.CreateEnumerable( + outputDataView, reuseRowObject: false); + + int k = 0; + foreach (var prediction in predictionColumn) + { + switch (mode) + { + case SrCnnDetectMode.AnomalyOnly: + Assert.Equal(3, prediction.Prediction.Length); + if (k == 20) + Assert.Equal(1, prediction.Prediction[0]); + else + Assert.Equal(0, prediction.Prediction[0]); + break; + case SrCnnDetectMode.AnomalyAndExpectedValue: + Assert.Equal(4, prediction.Prediction.Length); + if (k == 20) + { + Assert.Equal(1, prediction.Prediction[0]); + Assert.Equal("5.00", prediction.Prediction[3].ToString("0.00")); + } + else + Assert.Equal(0, prediction.Prediction[0]); + break; + case SrCnnDetectMode.AnomalyAndMargin: + Assert.Equal(7, prediction.Prediction.Length); + if (k == 20) + { + Assert.Equal(1, prediction.Prediction[0]); + Assert.Equal("5.00", prediction.Prediction[3].ToString("0.00")); + Assert.Equal("5.00", prediction.Prediction[4].ToString("0.00")); + Assert.Equal("5.01", prediction.Prediction[5].ToString("0.00")); + Assert.Equal("4.99", prediction.Prediction[6].ToString("0.00")); + } + else + Assert.Equal(0, prediction.Prediction[0]); + break; + } + k += 1; + } } [Fact] From 0f22a33c6b3ac8c11970255ad7282f92cbe075a7 Mon Sep 17 00:00:00 2001 From: Meng Ai Date: Tue, 19 May 2020 16:46:34 +0800 Subject: [PATCH 04/11] Resolve comments --- .../DetectAnomalyBySrCnnBatchPrediction.cs | 2 +- .../DataView/BatchDataViewMapperBase.cs | 13 ++- .../ExtensionsCatalog.cs | 2 +- .../SrCnnBatchAnomalyDetection.cs | 103 ++++++++---------- .../TimeSeriesDirectApi.cs | 15 ++- 5 files changed, 59 insertions(+), 76 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs index c3154bdc7e..f38f6263f1 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs @@ -35,7 +35,7 @@ public static void Example() string inputColumnName = nameof(TimeSeriesData.Value); // Do batch anomaly detection - var outputDataView = ml.Data.BatchDetectAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, + var outputDataView = ml.AnomalyDetection.BatchDetectAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, threshold:0.35, batchSize: 512, sensitivity: 90.0, detectMode: SrCnnDetectMode.AnomalyAndMargin); // Getting the data of the newly created column as an IEnumerable of diff --git a/src/Microsoft.ML.Data/DataView/BatchDataViewMapperBase.cs b/src/Microsoft.ML.Data/DataView/BatchDataViewMapperBase.cs index e2d49abcbb..80ef9c64d6 100644 --- a/src/Microsoft.ML.Data/DataView/BatchDataViewMapperBase.cs +++ b/src/Microsoft.ML.Data/DataView/BatchDataViewMapperBase.cs @@ -16,11 +16,12 @@ internal abstract class BatchDataViewMapperBase : IDataView public DataViewSchema Schema => SchemaBindings.AsSchema; private readonly IDataView _source; - private readonly IHost _host; + protected readonly IHost Host; protected BatchDataViewMapperBase(IHostEnvironment env, string registrationName, IDataView input) { - _host = env.Register(registrationName); + Contracts.CheckValue(env, nameof(env)); + Host = env.Register(registrationName); _source = input; } @@ -28,8 +29,8 @@ protected BatchDataViewMapperBase(IHostEnvironment env, string registrationName, public DataViewRowCursor GetRowCursor(IEnumerable columnsNeeded, Random rand = null) { - _host.CheckValue(columnsNeeded, nameof(columnsNeeded)); - _host.CheckValueOrNull(rand); + Host.CheckValue(columnsNeeded, nameof(columnsNeeded)); + Host.CheckValueOrNull(rand); var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, SchemaBindings.AsSchema); @@ -41,7 +42,7 @@ public DataViewRowCursor GetRowCursor(IEnumerable columns { var activeInput = SchemaBindings.GetActiveInput(predicate); var inputCursor = _source.GetRowCursor(_source.Schema.Where(c => activeInput[c.Index]), null); - return new BindingsWrappedRowCursor(_host, inputCursor, SchemaBindings); + return new BindingsWrappedRowCursor(Host, inputCursor, SchemaBindings); } var active = SchemaBindings.GetActive(predicate); Contracts.Assert(active.Length == SchemaBindings.ColumnCount); @@ -89,7 +90,7 @@ private sealed class Cursor : RootCursorBase public override DataViewSchema Schema => _parent.Schema; public Cursor(BatchDataViewMapperBase parent, DataViewRowCursor input, DataViewRowCursor lookAheadCursor, bool[] active) - : base(parent._host) + : base(parent.Host) { _parent = parent; _input = input; diff --git a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs index 497d8aca90..8efbc62d5b 100644 --- a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs @@ -167,7 +167,7 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog /// ]]> /// /// - public static IDataView BatchDetectAnomalyBySrCnn(this DataOperationsCatalog catalog, IDataView input, string outputColumnName, string inputColumnName, + public static IDataView BatchDetectAnomalyBySrCnn(this AnomalyDetectionCatalog catalog, IDataView input, string outputColumnName, string inputColumnName, double threshold = 0.3, int batchSize = 1024, double sensitivity = 99, SrCnnDetectMode detectMode = SrCnnDetectMode.AnomalyOnly) => new SrCnnBatchAnomalyDetector(CatalogUtils.GetEnvironment(catalog), input, inputColumnName, outputColumnName, threshold, batchSize, sensitivity, detectMode); diff --git a/src/Microsoft.ML.TimeSeries/SrCnnBatchAnomalyDetection.cs b/src/Microsoft.ML.TimeSeries/SrCnnBatchAnomalyDetection.cs index 6ab8a53145..f8ed3d1496 100644 --- a/src/Microsoft.ML.TimeSeries/SrCnnBatchAnomalyDetection.cs +++ b/src/Microsoft.ML.TimeSeries/SrCnnBatchAnomalyDetection.cs @@ -7,7 +7,6 @@ using System.Linq; using Microsoft.ML.Data; using Microsoft.ML.Data.DataView; -using Microsoft.ML.Numeric; using Microsoft.ML.Runtime; using Microsoft.ML.Transforms.TimeSeries; @@ -34,18 +33,16 @@ public enum SrCnnDetectMode AnomalyAndExpectedValue = 2 } - // TODO: SrCnn internal sealed class SrCnnBatchAnomalyDetector : BatchDataViewMapperBase { private const int MinBatchSize = 12; - private const int AnomalyOnlyOutputLength = 3; - private const int AnomalyAndExpectedValueOutputLength = 4; - private const int AnomalyAndMarginOutputLength = 7; + private static readonly int[] _outputLengthArray = {3, 7, 4}; private readonly int _batchSize; private readonly string _inputColumnName; private readonly int _outputLength; - private readonly SrCnnEntireModeler _modler; + private readonly SrCnnEntireModeler _modeler; + private readonly Bindings _bindings; private class Bindings : ColumnBindingsBase { @@ -89,49 +86,28 @@ public Func GetDependencies(Func predicate) } public SrCnnBatchAnomalyDetector(IHostEnvironment env, IDataView input, string inputColumnName, string outputColumnName, double threshold, int batchSize, double sensitivity, SrCnnDetectMode detectMode) - : base(env, "SrCnnBatchAnomalyDetector", input) + : base(env, nameof(SrCnnBatchAnomalyDetector), input) { - Contracts.CheckValue(env, nameof(env)); - - Contracts.CheckValue(inputColumnName, nameof(inputColumnName)); + Host.CheckValue(inputColumnName, nameof(inputColumnName)); _inputColumnName = inputColumnName; - env.CheckUserArg(batchSize == -1 || batchSize >= MinBatchSize, nameof(batchSize), "BatchSize must be -1 or no less than 12."); - if (batchSize == -1) - { - _batchSize = (int)input.GetRowCount(); - } - else - { - _batchSize = batchSize; - } + Host.CheckUserArg(batchSize == -1 || batchSize >= MinBatchSize, nameof(batchSize), "BatchSize must be -1 or no less than 12."); + _batchSize = batchSize; - env.CheckUserArg(threshold >= 0 && threshold <= 1, nameof(threshold), "Must be in [0,1]."); - env.CheckUserArg(detectMode == SrCnnDetectMode.AnomalyOnly + Host.CheckUserArg(threshold >= 0 && threshold <= 1, nameof(threshold), "Must be in [0,1]."); + Host.CheckUserArg(detectMode == SrCnnDetectMode.AnomalyOnly || detectMode == SrCnnDetectMode.AnomalyAndExpectedValue || detectMode == SrCnnDetectMode.AnomalyAndMargin, nameof(detectMode), "Invalid detectMode"); - if (detectMode.Equals(SrCnnDetectMode.AnomalyOnly)) - { - _outputLength = AnomalyOnlyOutputLength; - _modler = new SrCnnEntireModeler(threshold, sensitivity, detectMode, _outputLength); - } - else if (detectMode.Equals(SrCnnDetectMode.AnomalyAndMargin)) - { - env.CheckUserArg(sensitivity >= 0 && sensitivity <= 100, nameof(sensitivity), "Must be in [0,100]."); - _outputLength = AnomalyAndMarginOutputLength; - _modler = new SrCnnEntireModeler(threshold, sensitivity, detectMode, _outputLength); - } - else if (detectMode.Equals(SrCnnDetectMode.AnomalyAndExpectedValue)) - { - _outputLength = AnomalyAndExpectedValueOutputLength; - _modler = new SrCnnEntireModeler(threshold, sensitivity, detectMode, _outputLength); - } + Host.CheckUserArg(sensitivity >= 0 && sensitivity <= 100, nameof(sensitivity), "Must be in [0,100]."); + + _outputLength = _outputLengthArray[(int)detectMode]; + _modeler = new SrCnnEntireModeler(threshold, sensitivity, detectMode); - SchemaBindings = new Bindings(input.Schema, inputColumnName, outputColumnName, new VectorDataViewType(NumberDataViewType.Double, _outputLength)); + _bindings = new Bindings(input.Schema, inputColumnName, outputColumnName, new VectorDataViewType(NumberDataViewType.Double, _outputLength)); } - protected override ColumnBindingsBase SchemaBindings { get; } + protected override ColumnBindingsBase SchemaBindings => _bindings; protected override Delegate[] CreateGetters(DataViewRowCursor input, Batch currentBatch, bool[] active) { @@ -140,16 +116,16 @@ protected override Delegate[] CreateGetters(DataViewRowCursor input, Batch curre return new[] { currentBatch.CreateGetter(input, _inputColumnName) }; } - protected override Batch InitializeBatch(DataViewRowCursor input) => new Batch(_batchSize, _outputLength, _modler); + protected override Batch InitializeBatch(DataViewRowCursor input) => new Batch(_batchSize, _outputLength, _modeler); protected override Func GetIsNewBatchDelegate(DataViewRowCursor input) { - return () => input.Position % _batchSize == 0; + return () => _batchSize == -1 ? input.Position == 0 : input.Position % _batchSize == 0; } protected override Func GetLastInBatchDelegate(DataViewRowCursor input) { - return () => (input.Position + 1) % _batchSize == 0; + return () => _batchSize == -1 ? input.Position == -1 : (input.Position + 1) % _batchSize == 0; } protected override ValueGetter GetLookAheadGetter(DataViewRowCursor input) @@ -159,7 +135,7 @@ protected override ValueGetter GetLookAheadGetter(DataViewRowCursor inpu protected override Func GetSchemaBindingDependencies(Func predicate) { - return (SchemaBindings as Bindings).GetDependencies(predicate); + return _bindings.GetDependencies(predicate); } protected override void ProcessExample(Batch currentBatch, double currentInput) @@ -173,22 +149,30 @@ protected override void ProcessBatch(Batch currentBatch) currentBatch.Reset(); } - public sealed class Batch + internal sealed class Batch { private List _previousBatch; private List _batch; - private readonly int _batchSize; private readonly int _outputLength; - private SrCnnEntireModeler _modler; + private SrCnnEntireModeler _modeler; + private int _batchSize; private double[][] _results; public Batch(int batchSize, int outputLength, SrCnnEntireModeler modeler) { _batchSize = batchSize; _outputLength = outputLength; - _previousBatch = new List(batchSize); - _batch = new List(batchSize); - _modler = modeler; + if (batchSize == -1) + { + _previousBatch = new List(); + _batch = new List(); + } + else + { + _previousBatch = new List(batchSize); + _batch = new List(batchSize); + } + _modeler = modeler; } public void AddValue(double value) @@ -200,6 +184,7 @@ public void AddValue(double value) public void Process() { + _batchSize = _batch.Count; if (_batch.Count < MinBatchSize) { if (_previousBatch.Count + _batch.Count < MinBatchSize) @@ -207,11 +192,11 @@ public void Process() var bLen = _previousBatch.Count - _batch.Count; _previousBatch = _previousBatch.GetRange(_batch.Count, bLen); _previousBatch.AddRange(_batch); - _results = _modler.Train(_previousBatch.ToArray()).Skip(bLen).ToArray(); + _results = _modeler.Train(_previousBatch.ToArray()).Skip(bLen).ToArray(); } else { - _results = _modler.Train(_batch.ToArray()); + _results = _modeler.Train(_batch.ToArray()); } } @@ -242,7 +227,7 @@ public ValueGetter> CreateGetter(DataViewRowCursor input, string } } - public sealed class SrCnnEntireModeler + internal sealed class SrCnnEntireModeler { private static readonly int _lookaheadWindowSize = 5; private static readonly int _backAddWindowSize = 5; @@ -283,14 +268,12 @@ public sealed class SrCnnEntireModeler private readonly double _threshold; private readonly double _sensitivity; private readonly SrCnnDetectMode _detectMode; - private readonly int _outputLength; - public SrCnnEntireModeler(double threshold, double sensitivity, SrCnnDetectMode detectMode, int outputLength) + public SrCnnEntireModeler(double threshold, double sensitivity, SrCnnDetectMode detectMode) { _threshold = threshold; _sensitivity = sensitivity; _detectMode = detectMode; - _outputLength = outputLength; } public double[][] Train(double[] values) @@ -298,9 +281,9 @@ public double[][] Train(double[] values) double[][] results = new double[values.Length][]; for (int i = 0; i < results.Length; ++i) { - results[i] = new double[_outputLength]; + results[i] = new double[_outputLengthArray[(int)_detectMode]]; } - SpecturalResidual(values, results, _threshold); + SpectralResidual(values, results, _threshold); //Optional Steps if (_detectMode == SrCnnDetectMode.AnomalyAndMargin) { @@ -313,7 +296,7 @@ public double[][] Train(double[] values) return results; } - private static void SpecturalResidual(double[] values, double[][] results, double threshold) + private static void SpectralResidual(double[] values, double[][] results, double threshold) { // Step 1: Get backadd wave double[] backAddList = BackAdd(values); @@ -380,7 +363,7 @@ private static void SpecturalResidual(double[] values, double[][] results, doubl // Step 7: Calculate raw score and set result for (int i = 0; i < results.GetLength(0); ++i) { - var score = CalculateSocre(ifftMagList[i], filteredIfftMagList[i]); + var score = CalculateScore(ifftMagList[i], filteredIfftMagList[i]); score /= 10.0f; score = Math.Min(score, 1); score = Math.Max(score, 0); @@ -449,7 +432,7 @@ private static double[] AverageFilter(double[] data, int n) return cumSumList; } - private static double CalculateSocre(double mag, double avgMag) + private static double CalculateScore(double mag, double avgMag) { double safeDivisor = avgMag; if (Math.Abs(safeDivisor) < _eps) diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index a3379961f1..24cca86cab 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -5,7 +5,6 @@ using System; using System.Collections.Generic; using System.IO; -using System.Linq; using Microsoft.ML.Data; using Microsoft.ML.TestFramework; using Microsoft.ML.TimeSeries; @@ -584,7 +583,7 @@ public void TestSrCnnBatchAnomalyDetector( IDataView dataView; if (loadDataFromFile) { - var dataPath = GetDataPath(Path.Combine("Timeseries", "anomaly_detection.csv")); + var dataPath = GetDataPath("Timeseries", "anomaly_detection.csv"); // Load data from file into the dataView dataView = ml.Data.LoadFromTextFile(dataPath, new[] { @@ -614,7 +613,7 @@ public void TestSrCnnBatchAnomalyDetector( string inputColumnName = nameof(TimeSeriesDataDouble.Value); // Do batch anomaly detection - var outputDataView = ml.Data.BatchDetectAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, + var outputDataView = ml.AnomalyDetection.BatchDetectAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, threshold: 0.35, batchSize: -1, sensitivity: 90.0, mode); // Getting the data of the newly created column as an IEnumerable of @@ -639,7 +638,7 @@ public void TestSrCnnBatchAnomalyDetector( if (k == 20) { Assert.Equal(1, prediction.Prediction[0]); - Assert.Equal("5.00", prediction.Prediction[3].ToString("0.00")); + Assert.Equal(5.00, prediction.Prediction[3], 2); } else Assert.Equal(0, prediction.Prediction[0]); @@ -649,10 +648,10 @@ public void TestSrCnnBatchAnomalyDetector( if (k == 20) { Assert.Equal(1, prediction.Prediction[0]); - Assert.Equal("5.00", prediction.Prediction[3].ToString("0.00")); - Assert.Equal("5.00", prediction.Prediction[4].ToString("0.00")); - Assert.Equal("5.01", prediction.Prediction[5].ToString("0.00")); - Assert.Equal("4.99", prediction.Prediction[6].ToString("0.00")); + Assert.Equal(5.00, prediction.Prediction[3], 2); + Assert.Equal(5.00, prediction.Prediction[4], 2); + Assert.Equal(5.01, prediction.Prediction[5], 2); + Assert.Equal(4.99, prediction.Prediction[6], 2); } else Assert.Equal(0, prediction.Prediction[0]); From 9de4250e7403d71a6e649c31fbbbc7d31185f01a Mon Sep 17 00:00:00 2001 From: Meng Ai Date: Tue, 19 May 2020 20:26:49 +0800 Subject: [PATCH 05/11] Change names and add documentation --- .../DetectAnomalyBySrCnnBatchPrediction.cs | 103 ++++++++++-------- .../TimeSeries/DetectEntireAnomalyBySrCnn.cs | 99 +++++++++++++++++ .../ExtensionsCatalog.cs | 32 +++--- ...ction.cs => SrCnnEntireAnomalyDetector.cs} | 46 +++++++- .../TimeSeriesDirectApi.cs | 2 +- 5 files changed, 217 insertions(+), 65 deletions(-) create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectEntireAnomalyBySrCnn.cs rename src/Microsoft.ML.TimeSeries/{SrCnnBatchAnomalyDetection.cs => SrCnnEntireAnomalyDetector.cs} (88%) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs index f38f6263f1..8c43b8f3f0 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs @@ -2,7 +2,6 @@ using System.Collections.Generic; using Microsoft.ML; using Microsoft.ML.Data; -using Microsoft.ML.TimeSeries; namespace Samples.Dynamic { @@ -19,81 +18,89 @@ public static void Example() var data = new List(); for (int index = 0; index < 20; index++) { - data.Add(new TimeSeriesData { Value = 5 }); + data.Add(new TimeSeriesData(5)); } - data.Add(new TimeSeriesData { Value = 10 }); + data.Add(new TimeSeriesData(10)); for (int index = 0; index < 5; index++) { - data.Add(new TimeSeriesData { Value = 5 }); + data.Add(new TimeSeriesData(5)); } // Convert data to IDataView. var dataView = ml.Data.LoadFromEnumerable(data); - // Setup the detection arguments + // Setup the estimator arguments string outputColumnName = nameof(SrCnnAnomalyDetection.Prediction); string inputColumnName = nameof(TimeSeriesData.Value); - // Do batch anomaly detection - var outputDataView = ml.AnomalyDetection.BatchDetectAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, - threshold:0.35, batchSize: 512, sensitivity: 90.0, detectMode: SrCnnDetectMode.AnomalyAndMargin); + // The transformed data. + var transformedData = ml.Transforms.DetectAnomalyBySrCnn( + outputColumnName, inputColumnName, 16, 5, 5, 3, 8, 0.35).Fit( + dataView).Transform(dataView); // Getting the data of the newly created column as an IEnumerable of // SrCnnAnomalyDetection. var predictionColumn = ml.Data.CreateEnumerable( - outputDataView, reuseRowObject: false); + transformedData, reuseRowObject: false); - Console.WriteLine("Index\tData\tAnomaly\tAnomalyScore\tMag\tExpectedValue\tBoundaryUnit\tUpperBoundary\tLowerBoundary"); + Console.WriteLine($"{outputColumnName} column obtained post-" + + $"transformation."); + + Console.WriteLine("Data\tAlert\tScore\tMag"); int k = 0; foreach (var prediction in predictionColumn) - { - PrintPrediction(k, data[k].Value, prediction); - k++; - } - //Index Data Anomaly AnomalyScore Mag ExpectedValue BoundaryUnit UpperBoundary LowerBoundary - //0 5.00 0 0.00 0.21 5.00 5.00 5.01 4.99 - //1 5.00 0 0.00 0.11 5.00 5.00 5.01 4.99 - //2 5.00 0 0.00 0.03 5.00 5.00 5.01 4.99 - //3 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 - //4 5.00 0 0.00 0.03 5.00 5.00 5.01 4.99 - //5 5.00 0 0.00 0.06 5.00 5.00 5.01 4.99 - //6 5.00 0 0.00 0.02 5.00 5.00 5.01 4.99 - //7 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 - //8 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 - //9 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 - //10 5.00 0 0.00 0.00 5.00 5.00 5.01 4.99 - //11 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 - //12 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 - //13 5.00 0 0.00 0.02 5.00 5.00 5.01 4.99 - //14 5.00 0 0.00 0.07 5.00 5.00 5.01 4.99 - //15 5.00 0 0.00 0.08 5.00 5.00 5.01 4.99 - //16 5.00 0 0.00 0.02 5.00 5.00 5.01 4.99 - //17 5.00 0 0.00 0.05 5.00 5.00 5.01 4.99 - //18 5.00 0 0.00 0.12 5.00 5.00 5.01 4.99 - //19 5.00 0 0.00 0.17 5.00 5.00 5.01 4.99 - //20 10.00 1 0.50 0.80 5.00 5.00 5.01 4.99 - //21 5.00 0 0.00 0.16 5.00 5.00 5.01 4.99 - //22 5.00 0 0.00 0.11 5.00 5.00 5.01 4.99 - //23 5.00 0 0.00 0.05 5.00 5.00 5.01 4.99 - //24 5.00 0 0.00 0.11 5.00 5.00 5.01 4.99 - //25 5.00 0 0.00 0.19 5.00 5.00 5.01 4.99 + PrintPrediction(data[k++].Value, prediction); + + //Prediction column obtained post-transformation. + //Data Alert Score Mag + //5 0 0.00 0.00 + //5 0 0.00 0.00 + //5 0 0.00 0.00 + //5 0 0.00 0.00 + //5 0 0.00 0.00 + //5 0 0.00 0.00 + //5 0 0.00 0.00 + //5 0 0.00 0.00 + //5 0 0.00 0.00 + //5 0 0.00 0.00 + //5 0 0.00 0.00 + //5 0 0.00 0.00 + //5 0 0.00 0.00 + //5 0 0.00 0.00 + //5 0 0.00 0.00 + //5 0 0.03 0.18 + //5 0 0.03 0.18 + //5 0 0.03 0.18 + //5 0 0.03 0.18 + //5 0 0.03 0.18 + //10 1 0.47 0.93 + //5 0 0.31 0.50 + //5 0 0.05 0.30 + //5 0 0.01 0.23 + //5 0 0.00 0.21 + //5 0 0.01 0.25 } - private static void PrintPrediction(int idx, double value, SrCnnAnomalyDetection prediction) => - Console.WriteLine("{0}\t{1:0.00}\t{2}\t\t{3:0.00}\t{4:0.00}\t\t{5:0.00}\t\t{6:0.00}\t\t{7:0.00}\t\t{8:0.00}", - idx, value, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2], - prediction.Prediction[3], prediction.Prediction[4], prediction.Prediction[5], prediction.Prediction[6]); + private static void PrintPrediction(float value, SrCnnAnomalyDetection + prediction) => + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, prediction + .Prediction[0], prediction.Prediction[1], prediction.Prediction[2]); private class TimeSeriesData { - public double Value { get; set; } + public float Value; + + public TimeSeriesData(float value) + { + Value = value; + } } private class SrCnnAnomalyDetection { - [VectorType] + [VectorType(3)] public double[] Prediction { get; set; } } } -} +} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectEntireAnomalyBySrCnn.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectEntireAnomalyBySrCnn.cs new file mode 100644 index 0000000000..d264dd69f4 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectEntireAnomalyBySrCnn.cs @@ -0,0 +1,99 @@ +using System; +using System.Collections.Generic; +using Microsoft.ML; +using Microsoft.ML.Data; +using Microsoft.ML.TimeSeries; + +namespace Samples.Dynamic +{ + public static class DetectEntireAnomalyBySrCnn + { + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, + // as well as the source of randomness. + var ml = new MLContext(); + + // Generate sample series data with an anomaly + var data = new List(); + for (int index = 0; index < 20; index++) + { + data.Add(new TimeSeriesData { Value = 5 }); + } + data.Add(new TimeSeriesData { Value = 10 }); + for (int index = 0; index < 5; index++) + { + data.Add(new TimeSeriesData { Value = 5 }); + } + + // Convert data to IDataView. + var dataView = ml.Data.LoadFromEnumerable(data); + + // Setup the detection arguments + string outputColumnName = nameof(SrCnnAnomalyDetection.Prediction); + string inputColumnName = nameof(TimeSeriesData.Value); + + // Do batch anomaly detection + var outputDataView = ml.AnomalyDetection.DetectEntireAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, + threshold: 0.35, batchSize: 512, sensitivity: 90.0, detectMode: SrCnnDetectMode.AnomalyAndMargin); + + // Getting the data of the newly created column as an IEnumerable of + // SrCnnAnomalyDetection. + var predictionColumn = ml.Data.CreateEnumerable( + outputDataView, reuseRowObject: false); + + Console.WriteLine("Index\tData\tAnomaly\tAnomalyScore\tMag\tExpectedValue\tBoundaryUnit\tUpperBoundary\tLowerBoundary"); + + int k = 0; + foreach (var prediction in predictionColumn) + { + PrintPrediction(k, data[k].Value, prediction); + k++; + } + //Index Data Anomaly AnomalyScore Mag ExpectedValue BoundaryUnit UpperBoundary LowerBoundary + //0 5.00 0 0.00 0.21 5.00 5.00 5.01 4.99 + //1 5.00 0 0.00 0.11 5.00 5.00 5.01 4.99 + //2 5.00 0 0.00 0.03 5.00 5.00 5.01 4.99 + //3 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 + //4 5.00 0 0.00 0.03 5.00 5.00 5.01 4.99 + //5 5.00 0 0.00 0.06 5.00 5.00 5.01 4.99 + //6 5.00 0 0.00 0.02 5.00 5.00 5.01 4.99 + //7 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 + //8 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 + //9 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 + //10 5.00 0 0.00 0.00 5.00 5.00 5.01 4.99 + //11 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 + //12 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 + //13 5.00 0 0.00 0.02 5.00 5.00 5.01 4.99 + //14 5.00 0 0.00 0.07 5.00 5.00 5.01 4.99 + //15 5.00 0 0.00 0.08 5.00 5.00 5.01 4.99 + //16 5.00 0 0.00 0.02 5.00 5.00 5.01 4.99 + //17 5.00 0 0.00 0.05 5.00 5.00 5.01 4.99 + //18 5.00 0 0.00 0.12 5.00 5.00 5.01 4.99 + //19 5.00 0 0.00 0.17 5.00 5.00 5.01 4.99 + //20 10.00 1 0.50 0.80 5.00 5.00 5.01 4.99 + //21 5.00 0 0.00 0.16 5.00 5.00 5.01 4.99 + //22 5.00 0 0.00 0.11 5.00 5.00 5.01 4.99 + //23 5.00 0 0.00 0.05 5.00 5.00 5.01 4.99 + //24 5.00 0 0.00 0.11 5.00 5.00 5.01 4.99 + //25 5.00 0 0.00 0.19 5.00 5.00 5.01 4.99 + } + + private static void PrintPrediction(int idx, double value, SrCnnAnomalyDetection prediction) => + Console.WriteLine("{0}\t{1:0.00}\t{2}\t\t{3:0.00}\t{4:0.00}\t\t{5:0.00}\t\t{6:0.00}\t\t{7:0.00}\t\t{8:0.00}", + idx, value, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2], + prediction.Prediction[3], prediction.Prediction[4], prediction.Prediction[5], prediction.Prediction[6]); + + private class TimeSeriesData + { + public double Value { get; set; } + } + + private class SrCnnAnomalyDetection + { + [VectorType] + public double[] Prediction { get; set; } + } + } +} diff --git a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs index 8efbc62d5b..1ca2e95cf6 100644 --- a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs @@ -149,27 +149,33 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog => new SrCnnAnomalyEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, windowSize, backAddWindowSize, lookaheadWindowSize, averageingWindowSize, judgementWindowSize, threshold, inputColumnName); /// - /// Create , which detects timeseries anomalies using SRCNN algorithm. + /// Create , which detects timeseries anomalies for entire input using SRCNN algorithm. /// - /// The transform's catalog. - /// ... - /// Name of the column resulting from the transformation of . - /// The column data is a vector of . The vector contains 3 elements: alert (1 means anomaly while 0 means normal), raw score, and magnitude of spectual residual. - /// Name of column to transform. The column data must be . - /// The threshold to determine anomaly, score larger than the threshold is considered as anomaly. Should be in (0,1) - /// .Divide the input data into batches to fit SrCnn model. Must be -1 or a positive integer no less than 12. Default value is 1024. - /// The sensitivity of boundaries. Must be in the interval (0, 100). - /// The detect mode of the SrCnn model. + /// The AnomalyDetectionCatalog. + /// Input DataView. + /// Name of the column resulting from data processing of . + /// The column data is a vector of . The length of this vector varies depending on . + /// Name of column to process. The column data must be . + /// The threshold to determine anomaly, score larger than the threshold is considered as anomaly. Must be in [0,1]. Default value is 0.3. + /// Divide the input data into batches to fit srcnn model. + /// When set to -1, use the whole input to fit model instead of batch by batch, when set to a positive integer, use this number as batch size. + /// Must be -1 or a positive integer no less than 12. Default value is 1024. + /// Sensitivity of boundaries, only useful when srCnnDetectMode is AnomalyAndMargin. Must be in [0,100]. Default value is 99. + /// An enum type of . + /// When set to AnomalyOnly, the output vector would be a 3-element Double vector of (IsAnomaly, RawScore, Mag). + /// When set to AnomalyAndExpectedValue, the output vector would be a 4-element Double vector of (IsAnomaly, RawScore, Mag, ExpectedValue). + /// When set to AnomalyAndMargin, the output vector would be a 7-element Double vector of (IsAnomaly, AnomalyScore, Mag, ExpectedValue, BoundaryUnit, UpperBoundary, LowerBoundary). + /// Default value is AnomalyOnly. /// /// /// /// /// - public static IDataView BatchDetectAnomalyBySrCnn(this AnomalyDetectionCatalog catalog, IDataView input, string outputColumnName, string inputColumnName, + public static IDataView DetectEntireAnomalyBySrCnn(this AnomalyDetectionCatalog catalog, IDataView input, string outputColumnName, string inputColumnName, double threshold = 0.3, int batchSize = 1024, double sensitivity = 99, SrCnnDetectMode detectMode = SrCnnDetectMode.AnomalyOnly) - => new SrCnnBatchAnomalyDetector(CatalogUtils.GetEnvironment(catalog), input, inputColumnName, outputColumnName, threshold, batchSize, sensitivity, detectMode); + => new SrCnnEntireAnomalyDetector(CatalogUtils.GetEnvironment(catalog), input, inputColumnName, outputColumnName, threshold, batchSize, sensitivity, detectMode); /// /// Create , which localizes root causes using decision tree algorithm. diff --git a/src/Microsoft.ML.TimeSeries/SrCnnBatchAnomalyDetection.cs b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs similarity index 88% rename from src/Microsoft.ML.TimeSeries/SrCnnBatchAnomalyDetection.cs rename to src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs index f8ed3d1496..52402b4291 100644 --- a/src/Microsoft.ML.TimeSeries/SrCnnBatchAnomalyDetection.cs +++ b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs @@ -33,7 +33,47 @@ public enum SrCnnDetectMode AnomalyAndExpectedValue = 2 } - internal sealed class SrCnnBatchAnomalyDetector : BatchDataViewMapperBase + /// + /// Detect timeseries anomalies for entire input using Spectral Residual(SR) algorithm. + /// + /// + /// \tau\\0,otherwise,\end{cases}$$ + /// where $x_i$ represents an arbitrary point in sequence $\mathbf{x}$; $S(x_i)$is the corresponding point in the saliency map; + /// and $\overline{S(x_i)}$ is the local average of the preceding points of $S(x_i)$. + /// + /// * [Link to the KDD 2019 paper](https://dl.acm.org/doi/10.1145/3292500.3330680) + /// ]]> + /// + /// + /// + internal sealed class SrCnnEntireAnomalyDetector : BatchDataViewMapperBase { private const int MinBatchSize = 12; @@ -85,8 +125,8 @@ public Func GetDependencies(Func predicate) } } - public SrCnnBatchAnomalyDetector(IHostEnvironment env, IDataView input, string inputColumnName, string outputColumnName, double threshold, int batchSize, double sensitivity, SrCnnDetectMode detectMode) - : base(env, nameof(SrCnnBatchAnomalyDetector), input) + public SrCnnEntireAnomalyDetector(IHostEnvironment env, IDataView input, string inputColumnName, string outputColumnName, double threshold, int batchSize, double sensitivity, SrCnnDetectMode detectMode) + : base(env, nameof(SrCnnEntireAnomalyDetector), input) { Host.CheckValue(inputColumnName, nameof(inputColumnName)); _inputColumnName = inputColumnName; diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index 24cca86cab..f149fa6a94 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -613,7 +613,7 @@ public void TestSrCnnBatchAnomalyDetector( string inputColumnName = nameof(TimeSeriesDataDouble.Value); // Do batch anomaly detection - var outputDataView = ml.AnomalyDetection.BatchDetectAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, + var outputDataView = ml.AnomalyDetection.DetectEntireAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, threshold: 0.35, batchSize: -1, sensitivity: 90.0, mode); // Getting the data of the newly created column as an IEnumerable of From 372dec2e92b29b54b73aa4d4ef7672abe1e75cfb Mon Sep 17 00:00:00 2001 From: Klaus Marius Hansen Date: Tue, 19 May 2020 12:26:24 -0700 Subject: [PATCH 06/11] Handling review comments --- .../DetectAnomalyBySrCnnBatchPrediction.cs | 2 +- .../SrCnnEntireAnomalyDetector.cs | 31 ++++++++++--------- .../TimeSeriesDirectApi.cs | 5 ++- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs index 8c43b8f3f0..5ef334bc76 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs @@ -103,4 +103,4 @@ private class SrCnnAnomalyDetection public double[] Prediction { get; set; } } } -} \ No newline at end of file +} diff --git a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs index 52402b4291..dccd35c06d 100644 --- a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs +++ b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs @@ -42,13 +42,13 @@ public enum SrCnnDetectMode /// [DetectEntireAnomalyBySrCnn](xref:Microsoft.ML.TimeSeriesCatalog.DetectEntireAnomalyBySrCnn(Microsoft.ML.AnomalyDetectionCatalog,Microsoft.ML.IDataView,System.String,System.String,System.Double,System.Int32,System.Double,SrCnnDetectMode)) /// /// ### Background - /// In Microsoft, we developed a time-series anomaly detection service which helps customers to monitor the time-series continuously + /// At Microsoft, we developed a time-series anomaly detection service which helps customers to monitor the time-series continuously /// and alert for potential incidents on time. To tackle the problem of time-series anomaly detection, /// we proposed a novel algorithm based on Spectral Residual (SR) and Convolutional Neural Network /// (CNN). The SR model is borrowed from visual saliency detection domain to time-series anomaly detection. /// And here we onboarded this SR algorithm firstly. /// - /// The Spectral Residual (SR) algorithm is unsupervised, which means training step is not needed while using SR. It consists of three major steps: + /// The Spectral Residual (SR) algorithm is unsupervised, which means a training step is not needed when using SR. It consists of three major steps: /// (1) Fourier Transform to get the log amplitude spectrum; /// (2) calculation of spectral residual; /// (3) Inverse Fourier Transform that transforms the sequence back to spatial domain. @@ -232,11 +232,11 @@ public void Process() var bLen = _previousBatch.Count - _batch.Count; _previousBatch = _previousBatch.GetRange(_batch.Count, bLen); _previousBatch.AddRange(_batch); - _results = _modeler.Train(_previousBatch.ToArray()).Skip(bLen).ToArray(); + _results = _modeler.Train(_previousBatch.ToArray(), ref _results).Skip(bLen).ToArray(); } else { - _results = _modeler.Train(_batch.ToArray()); + _results = _modeler.Train(_batch.ToArray(), ref _results); } } @@ -271,7 +271,7 @@ internal sealed class SrCnnEntireModeler { private static readonly int _lookaheadWindowSize = 5; private static readonly int _backAddWindowSize = 5; - private static readonly int _avergingWindowSize = 3; + private static readonly int _averagingWindowSize = 3; private static readonly int _judgementWindowSize = 40; private static readonly double _eps = 1e-8; private static readonly double _deanomalyThreshold = 0.35; @@ -316,12 +316,15 @@ public SrCnnEntireModeler(double threshold, double sensitivity, SrCnnDetectMode _detectMode = detectMode; } - public double[][] Train(double[] values) + public double[][] Train(double[] values, ref double[][] results) { - double[][] results = new double[values.Length][]; - for (int i = 0; i < results.Length; ++i) + if (results == null) { - results[i] = new double[_outputLengthArray[(int)_detectMode]]; + results = new double[values.Length][]; + for (int i = 0; i < results.Length; ++i) + { + results[i] = new double[_outputLengthArray[(int)_detectMode]]; + } } SpectralResidual(values, results, _threshold); //Optional Steps @@ -364,7 +367,7 @@ private static void SpectralResidual(double[] values, double[][] results, double } // Step 4: Calculate spectral - double[] filteredLogList = AverageFilter(magLogList, _avergingWindowSize); + double[] filteredLogList = AverageFilter(magLogList, _averagingWindowSize); double[] spectralList = new double[length]; for (int i = 0; i < length; ++i) { @@ -499,7 +502,7 @@ private static void GetMargin(double[] values, double[][] results, double sensit var exps = CalculateExpectedValueByFft(GetDeanomalyData(values, GetAnomalyIndex(results.Select(x => x[1]).ToArray()))); //Step 9: Calculate Boundary Unit - var units = CalculateBoundaryUnit(values, results.Select(x => x[0] > 0 ? true : false).ToArray()); + var units = CalculateBoundaryUnit(values, results.Select(x => x[0] > 0).ToArray()); //Step 10: Calculate UpperBound and LowerBound var margins = units.Select(x => CalculateMargin(x, sensitivity)).ToList(); @@ -563,14 +566,14 @@ private static double[] GetDeanomalyData(double[] data, int[] anomalyIdxList) if (fitValues.Count > 1) { - deAnomalyData[idx] = CalculateInterplate(fitValues, idx); + deAnomalyData[idx] = CalculateInterpolate(fitValues, idx); } } return deAnomalyData; } - private static double CalculateInterplate(List> values, int idx) + private static double CalculateInterpolate(List> values, int idx) { var n = values.Count; double sumX = values.Sum(item => item.Item1); @@ -604,7 +607,7 @@ private static double[] CalculateExpectedValueByFft(double[] data) double[] ifftIm = new double[length]; FftUtils.ComputeBackwardFft(fftRe, fftIm, ifftRe, ifftIm, length); - return ifftRe.Take(length).ToArray(); + return ifftRe; } private static double[] CalculateBoundaryUnit(double[] data, bool[] isAnomalys) diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index f149fa6a94..8bb7b0d618 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -87,6 +87,7 @@ public TimeSeriesData(float value) private sealed class TimeSeriesDataDouble { + [LoadColumn(0)] public double Value { get; set; } } @@ -586,9 +587,7 @@ public void TestSrCnnBatchAnomalyDetector( var dataPath = GetDataPath("Timeseries", "anomaly_detection.csv"); // Load data from file into the dataView - dataView = ml.Data.LoadFromTextFile(dataPath, new[] { - new TextLoader.Column("Value", DataKind.Double, 0), - }, hasHeader: true); + dataView = ml.Data.LoadFromTextFile(dataPath, hasHeader: true); } else { From 116a1fac78b32a3af1d5221b7d15f40ab987df84 Mon Sep 17 00:00:00 2001 From: Meng Ai Date: Wed, 20 May 2020 16:38:31 +0800 Subject: [PATCH 07/11] Resolve the array allocating issue --- .../DataView/BatchDataViewMapperBase.cs | 4 +- .../SrCnnEntireAnomalyDetector.cs | 234 +++++++++++------- .../TimeSeriesDirectApi.cs | 5 +- 3 files changed, 143 insertions(+), 100 deletions(-) diff --git a/src/Microsoft.ML.Data/DataView/BatchDataViewMapperBase.cs b/src/Microsoft.ML.Data/DataView/BatchDataViewMapperBase.cs index 80ef9c64d6..e1cde83032 100644 --- a/src/Microsoft.ML.Data/DataView/BatchDataViewMapperBase.cs +++ b/src/Microsoft.ML.Data/DataView/BatchDataViewMapperBase.cs @@ -61,7 +61,7 @@ public DataViewRowCursor[] GetRowCursorSet(IEnumerable co } protected abstract ColumnBindingsBase SchemaBindings { get; } - protected abstract TBatch InitializeBatch(DataViewRowCursor input); + protected abstract TBatch CreateBatch(DataViewRowCursor input); protected abstract void ProcessBatch(TBatch currentBatch); protected abstract void ProcessExample(TBatch currentBatch, TInput currentInput); protected abstract Func GetLastInBatchDelegate(DataViewRowCursor lookAheadCursor); @@ -97,7 +97,7 @@ public Cursor(BatchDataViewMapperBase parent, DataViewRowCursor _lookAheadCursor = lookAheadCursor; _active = active; - _currentBatch = _parent.InitializeBatch(_input); + _currentBatch = _parent.CreateBatch(_input); _getters = _parent.CreateGetters(_input, _currentBatch, _active); diff --git a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs index dccd35c06d..3791c8d890 100644 --- a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs +++ b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs @@ -156,7 +156,7 @@ protected override Delegate[] CreateGetters(DataViewRowCursor input, Batch curre return new[] { currentBatch.CreateGetter(input, _inputColumnName) }; } - protected override Batch InitializeBatch(DataViewRowCursor input) => new Batch(_batchSize, _outputLength, _modeler); + protected override Batch CreateBatch(DataViewRowCursor input) => new Batch(_batchSize, _outputLength, _modeler); protected override Func GetIsNewBatchDelegate(DataViewRowCursor input) { @@ -232,11 +232,12 @@ public void Process() var bLen = _previousBatch.Count - _batch.Count; _previousBatch = _previousBatch.GetRange(_batch.Count, bLen); _previousBatch.AddRange(_batch); - _results = _modeler.Train(_previousBatch.ToArray(), ref _results).Skip(bLen).ToArray(); + _modeler.Train(_previousBatch.ToArray(), ref _results); + _results = _results.Skip(bLen).ToArray(); } else { - _results = _modeler.Train(_batch.ToArray(), ref _results); + _modeler.Train(_batch.ToArray(), ref _results); } } @@ -309,6 +310,27 @@ internal sealed class SrCnnEntireModeler private readonly double _sensitivity; private readonly SrCnnDetectMode _detectMode; + //used in all modes + private double[] _predictArray; + private double[] _backAddArray; + private double[] _fftRe; + private double[] _fftIm; + private double[] _magList; + private double[] _magLogList; + private double[] _spectralList; + private double[] _transRe; + private double[] _transIm; + private double[] _ifftRe; + private double[] _ifftIm; + private double[] _ifftMagList; + private double[] _cumSumList; + private double[] _cumSumShift; + //used in AnomalyAndMargin mode + private double[] _units; + private double[] _val; + private double[] _ans; + private double[] _curWindow; + public SrCnnEntireModeler(double threshold, double sensitivity, SrCnnDetectMode detectMode) { _threshold = threshold; @@ -316,7 +338,7 @@ public SrCnnEntireModeler(double threshold, double sensitivity, SrCnnDetectMode _detectMode = detectMode; } - public double[][] Train(double[] values, ref double[][] results) + public void Train(double[] values, ref double[][] results) { if (results == null) { @@ -326,6 +348,10 @@ public double[][] Train(double[] values, ref double[][] results) results[i] = new double[_outputLengthArray[(int)_detectMode]]; } } + else if (results.Length > values.Length) + { + Array.Resize(ref results, values.Length); + } SpectralResidual(values, results, _threshold); //Optional Steps if (_detectMode == SrCnnDetectMode.AnomalyAndMargin) @@ -336,77 +362,89 @@ public double[][] Train(double[] values, ref double[][] results) { GetExpectedValue(values, results); } - return results; } - private static void SpectralResidual(double[] values, double[][] results, double threshold) + private void AllocateDoubleArray(ref double[] arr, int length) + { + if (arr == null) + { + arr = new double[length]; + } + else if (arr.Length != length) + { + Array.Resize(ref arr, length); + } + } + + private void SpectralResidual(double[] values, double[][] results, double threshold) { // Step 1: Get backadd wave double[] backAddList = BackAdd(values); // Step 2: FFT transformation int length = backAddList.Length; - double[] fftRe = new double[length]; - double[] fftIm = new double[length]; - FftUtils.ComputeForwardFft(backAddList, Enumerable.Repeat((double)0.0f, length).ToArray(), fftRe, fftIm, length); + AllocateDoubleArray(ref _fftRe, length); + AllocateDoubleArray(ref _fftIm, length); + + FftUtils.ComputeForwardFft(backAddList, Enumerable.Repeat((double)0.0f, length).ToArray(), _fftRe, _fftIm, length); // Step 3: Calculate mags of FFT - double[] magList = new double[length]; - double[] magLogList = new double[length]; + AllocateDoubleArray(ref _magList, length); + AllocateDoubleArray(ref _magLogList, length); for (int i = 0; i < length; ++i) { - magList[i] = Math.Sqrt((Math.Pow(fftRe[i], 2) + Math.Pow(fftIm[i], 2))); - if (magList[i] > _eps) + _magList[i] = Math.Sqrt((Math.Pow(_fftRe[i], 2) + Math.Pow(_fftIm[i], 2))); + if (_magList[i] > _eps) { - magLogList[i] = Math.Log(magList[i]); + _magLogList[i] = Math.Log(_magList[i]); } else { - magLogList[i] = 0; + _magLogList[i] = 0; } } // Step 4: Calculate spectral - double[] filteredLogList = AverageFilter(magLogList, _averagingWindowSize); - double[] spectralList = new double[length]; + double[] filteredLogList = AverageFilter(_magLogList, _averagingWindowSize); + AllocateDoubleArray(ref _spectralList, length); for (int i = 0; i < length; ++i) { - spectralList[i] = Math.Exp(magLogList[i] - filteredLogList[i]); + _spectralList[i] = Math.Exp(_magLogList[i] - filteredLogList[i]); } // Step 5: IFFT transformation - double[] transRe = new double[length]; - double[] transIm = new double[length]; + AllocateDoubleArray(ref _transRe, length); + AllocateDoubleArray(ref _transIm, length); for (int i = 0; i < length; ++i) { - if (magLogList[i] != 0) + if (_magLogList[i] != 0) { - transRe[i] = fftRe[i] * spectralList[i] / magList[i]; - transIm[i] = fftIm[i] * spectralList[i] / magList[i]; + _transRe[i] = _fftRe[i] * _spectralList[i] / _magList[i]; + _transIm[i] = _fftIm[i] * _spectralList[i] / _magList[i]; } else { - transRe[i] = 0; - transIm[i] = 0; + _transRe[i] = 0; + _transIm[i] = 0; } } - double[] ifftRe = new double[length]; - double[] ifftIm = new double[length]; - FftUtils.ComputeBackwardFft(transRe, transIm, ifftRe, ifftIm, length); + AllocateDoubleArray(ref _ifftRe, length); + AllocateDoubleArray(ref _ifftIm, length); + FftUtils.ComputeBackwardFft(_transRe, _transIm, _ifftRe, _ifftIm, length); // Step 6: Calculate mag and ave_mag of IFFT - double[] ifftMagList = new double[length]; + AllocateDoubleArray(ref _ifftMagList, length); for (int i = 0; i < length; ++i) { - ifftMagList[i] = Math.Sqrt((Math.Pow(ifftRe[i], 2) + Math.Pow(ifftIm[i], 2))); + _ifftMagList[i] = Math.Sqrt((Math.Pow(_ifftRe[i], 2) + Math.Pow(_ifftIm[i], 2))); } - double[] filteredIfftMagList = AverageFilter(ifftMagList, Math.Min(ifftMagList.Length, _judgementWindowSize)); + double[] filteredIfftMagList = AverageFilter(_ifftMagList, Math.Min(_ifftMagList.Length, _judgementWindowSize)); // Step 7: Calculate raw score and set result for (int i = 0; i < results.GetLength(0); ++i) { - var score = CalculateScore(ifftMagList[i], filteredIfftMagList[i]); + var score = CalculateScore(_ifftMagList[i], filteredIfftMagList[i]); score /= 10.0f; score = Math.Min(score, 1); score = Math.Max(score, 0); @@ -415,32 +453,32 @@ private static void SpectralResidual(double[] values, double[][] results, double results[i][0] = detres; results[i][1] = score; - results[i][2] = ifftMagList[i]; + results[i][2] = _ifftMagList[i]; } } - private static double[] BackAdd(double[] data) + private double[] BackAdd(double[] data) { - double[] predictArray = new double[_lookaheadWindowSize + 1]; + AllocateDoubleArray(ref _predictArray, _lookaheadWindowSize + 1); int j = 0; for (int i = data.Length - _lookaheadWindowSize - 2; i < data.Length - 1; ++i) { - predictArray[j++] = data[i]; + _predictArray[j++] = data[i]; } - var predictedValue = PredictNext(predictArray); - double[] backAddArray = new double[data.Length + _backAddWindowSize]; + var predictedValue = PredictNext(_predictArray); + AllocateDoubleArray(ref _backAddArray, data.Length + _backAddWindowSize); for (int i = 0; i < data.Length; ++i) { - backAddArray[i] = data[i]; + _backAddArray[i] = data[i]; } for (int i = 0; i < _backAddWindowSize; ++i) { - backAddArray[data.Length + i] = predictedValue; + _backAddArray[data.Length + i] = predictedValue; } - return backAddArray; + return _backAddArray; } - private static double PredictNext(double[] data) + private double PredictNext(double[] data) { var n = data.Length; double slopeSum = 0.0f; @@ -451,31 +489,32 @@ private static double PredictNext(double[] data) return (data[1] + slopeSum); } - private static double[] AverageFilter(double[] data, int n) + private double[] AverageFilter(double[] data, int n) { double cumsum = 0.0f; int length = data.Length; - double[] cumSumList = new double[length]; - double[] cumSumShift = new double[length]; + + AllocateDoubleArray(ref _cumSumList, length); + AllocateDoubleArray(ref _cumSumShift, length); for (int i = 0; i < length; ++i) { cumsum += data[i]; - cumSumList[i] = cumsum; - cumSumShift[i] = cumsum; + _cumSumList[i] = cumsum; + _cumSumShift[i] = cumsum; } for (int i = n; i < length; ++i) { - cumSumList[i] = (cumSumList[i] - cumSumShift[i - n]) / n; + _cumSumList[i] = (_cumSumList[i] - _cumSumShift[i - n]) / n; } for (int i = 1; i < n; ++i) { - cumSumList[i] /= (i + 1); + _cumSumList[i] /= (i + 1); } - return cumSumList; + return _cumSumList; } - private static double CalculateScore(double mag, double avgMag) + private double CalculateScore(double mag, double avgMag) { double safeDivisor = avgMag; if (Math.Abs(safeDivisor) < _eps) @@ -485,7 +524,7 @@ private static double CalculateScore(double mag, double avgMag) return (Math.Abs(mag - avgMag) / safeDivisor); } - private static void GetExpectedValue(double[] values, double[][] results) + private void GetExpectedValue(double[] values, double[][] results) { //Step 8: Calculate Expected Value var exps = CalculateExpectedValueByFft(GetDeanomalyData(values, GetAnomalyIndex(results.Select(x => x[1]).ToArray()))); @@ -496,7 +535,7 @@ private static void GetExpectedValue(double[] values, double[][] results) } } - private static void GetMargin(double[] values, double[][] results, double sensitivity) + private void GetMargin(double[] values, double[][] results, double sensitivity) { //Step 8: Calculate Expected Value var exps = CalculateExpectedValueByFft(GetDeanomalyData(values, GetAnomalyIndex(results.Select(x => x[1]).ToArray()))); @@ -518,7 +557,7 @@ private static void GetMargin(double[] values, double[][] results, double sensit } } - private static int[] GetAnomalyIndex(double[] scores) + private int[] GetAnomalyIndex(double[] scores) { List anomalyIdxList = new List(); for (int i = 0; i < scores.Length; ++i) @@ -530,7 +569,7 @@ private static int[] GetAnomalyIndex(double[] scores) return anomalyIdxList.ToArray(); } - private static double[] GetDeanomalyData(double[] data, int[] anomalyIdxList) + private double[] GetDeanomalyData(double[] data, int[] anomalyIdxList) { double[] deAnomalyData = (double[])data.Clone(); int minPointsToFit = 4; @@ -573,7 +612,7 @@ private static double[] GetDeanomalyData(double[] data, int[] anomalyIdxList) return deAnomalyData; } - private static double CalculateInterpolate(List> values, int idx) + private double CalculateInterpolate(List> values, int idx) { var n = values.Count; double sumX = values.Sum(item => item.Item1); @@ -587,30 +626,30 @@ private static double CalculateInterpolate(List> values, int return a * (double)idx + b; } - private static double[] CalculateExpectedValueByFft(double[] data) + private double[] CalculateExpectedValueByFft(double[] data) { int length = data.Length; - double[] fftRe = new double[length]; - double[] fftIm = new double[length]; - FftUtils.ComputeForwardFft(data, Enumerable.Repeat((double)0.0f, length).ToArray(), fftRe, fftIm, length); + AllocateDoubleArray(ref _fftRe, length); + AllocateDoubleArray(ref _fftIm, length); + FftUtils.ComputeForwardFft(data, Enumerable.Repeat((double)0.0f, length).ToArray(), _fftRe, _fftIm, length); for (int i = 0; i < length; ++i) { if (i > (double)length * 3 / 8 && i < (double)length * 5 / 8) { - fftRe[i] = 0.0f; - fftIm[i] = 0.0f; + _fftRe[i] = 0.0f; + _fftIm[i] = 0.0f; } } - double[] ifftRe = new double[length]; - double[] ifftIm = new double[length]; - FftUtils.ComputeBackwardFft(fftRe, fftIm, ifftRe, ifftIm, length); + AllocateDoubleArray(ref _ifftRe, length); + AllocateDoubleArray(ref _ifftIm, length); + FftUtils.ComputeBackwardFft(_fftRe, _fftIm, _ifftRe, _ifftIm, length); - return ifftRe; + return _ifftRe; } - private static double[] CalculateBoundaryUnit(double[] data, bool[] isAnomalys) + private double[] CalculateBoundaryUnit(double[] data, bool[] isAnomalys) { int window = Math.Min(data.Length / 3, 512); double trendFraction = 0.5; // mix trend and average of trend @@ -637,87 +676,90 @@ private static double[] CalculateBoundaryUnit(double[] data, bool[] isAnomalys) trendFraction = 1.0; } - double[] units = new double[trends.Length]; - for (int i = 0; i < units.Length; ++i) + AllocateDoubleArray(ref _units, trends.Length); + for (int i = 0; i < _units.Length; ++i) { - units[i] = Math.Max(1, averageTrendPart + Math.Abs(trends[i]) * trendFraction); - if (double.IsInfinity(units[i])) + _units[i] = Math.Max(1, averageTrendPart + Math.Abs(trends[i]) * trendFraction); + if (double.IsInfinity(_units[i])) { throw new ArithmeticException("Not finite unit value"); } } - return units; + return _units; } - private static double[] MedianFilter(double[] data, int window, bool needTwoEnd = false) + private double[] MedianFilter(double[] data, int window, bool needTwoEnd = false) { int wLen = window / 2 * 2 + 1; int tLen = data.Length; - double[] val = (double[]) data.Clone(); - double[] ans = (double[])data.Clone(); - double[] curWindow = new double[wLen]; + AllocateDoubleArray(ref _val, tLen); + Array.Copy(data, _val, tLen); + AllocateDoubleArray(ref _ans, tLen); + Array.Copy(data, _ans, tLen); + AllocateDoubleArray(ref _curWindow, wLen); + if (tLen < wLen) { - return ans; + return _ans; } for (int i = 0; i < wLen; i++) { int index = i; - int addId = BisectRight(curWindow, 0, i, val[i]); + int addId = BisectRight(_curWindow, 0, i, _val[i]); while (index > addId) { - curWindow[index] = curWindow[index - 1]; + _curWindow[index] = _curWindow[index - 1]; index -= 1; } - curWindow[addId] = data[i]; + _curWindow[addId] = data[i]; if (i >= wLen / 2 && needTwoEnd) - ans[i - wLen / 2] = SortedMedian(curWindow, 0, i + 1); + _ans[i - wLen / 2] = SortedMedian(_curWindow, 0, i + 1); } - ans[window / 2] = SortedMedian(curWindow, 0, wLen); + _ans[window / 2] = SortedMedian(_curWindow, 0, wLen); for (int i = window / 2 + 1; i < tLen - window / 2; i++) { - int deleteId = BisectRight(curWindow, 0, wLen, val[i - window / 2 - 1]) - 1; + int deleteId = BisectRight(_curWindow, 0, wLen, _val[i - window / 2 - 1]) - 1; int index = deleteId; while (index < wLen - 1) { - curWindow[index] = curWindow[index + 1]; + _curWindow[index] = _curWindow[index + 1]; index += 1; } - int addId = BisectRight(curWindow, 0, wLen - 1, val[i + window / 2]); + int addId = BisectRight(_curWindow, 0, wLen - 1, _val[i + window / 2]); index = wLen - 1; while (index > addId) { - curWindow[index] = curWindow[index - 1]; + _curWindow[index] = _curWindow[index - 1]; index -= 1; } - curWindow[addId] = data[i + window / 2]; - ans[i] = SortedMedian(curWindow, 0, wLen); + _curWindow[addId] = data[i + window / 2]; + _ans[i] = SortedMedian(_curWindow, 0, wLen); } if (needTwoEnd) { for (int i = tLen - window / 2; i < tLen; i++) { - int deleteId = BisectRight(curWindow, 0, wLen, data[i - window / 2 - 1]) - 1; + int deleteId = BisectRight(_curWindow, 0, wLen, data[i - window / 2 - 1]) - 1; int index = deleteId; while (index < wLen - 1) { - curWindow[index] = curWindow[index + 1]; + _curWindow[index] = _curWindow[index + 1]; index += 1; } wLen -= 1; - ans[i] = SortedMedian(curWindow, 0, wLen); + _ans[i] = SortedMedian(_curWindow, 0, wLen); } } - return ans; + return _ans; } - private static int BisectRight(double[] arr, int begin, int end, double tar) + private int BisectRight(double[] arr, int begin, int end, double tar) { while (begin < end) { @@ -730,7 +772,7 @@ private static int BisectRight(double[] arr, int begin, int end, double tar) return begin; } - private static double SortedMedian(double[] sortedValues, int begin, int end) + private double SortedMedian(double[] sortedValues, int begin, int end) { int n = end - begin; if (n % 2 == 1) @@ -742,7 +784,7 @@ private static double SortedMedian(double[] sortedValues, int begin, int end) } } - private static double CalculateMargin(double unit, double sensitivity) + private double CalculateMargin(double unit, double sensitivity) { if (Math.Floor(sensitivity) == sensitivity) { @@ -755,7 +797,7 @@ private static double CalculateMargin(double unit, double sensitivity) } } - private static double CalculateAnomalyScore(double value, double exp, double unit, bool isAnomaly) + private double CalculateAnomalyScore(double value, double exp, double unit, bool isAnomaly) { double anomalyScore = 0.0f; diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index 8bb7b0d618..205b36fbb2 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -578,7 +578,8 @@ public void AnomalyDetectionWithSrCnn(bool loadDataFromFile) [Theory, CombinatorialData] public void TestSrCnnBatchAnomalyDetector( [CombinatorialValues(SrCnnDetectMode.AnomalyOnly, SrCnnDetectMode.AnomalyAndExpectedValue, SrCnnDetectMode.AnomalyAndMargin)]SrCnnDetectMode mode, - [CombinatorialValues(true, false)]bool loadDataFromFile) + [CombinatorialValues(true, false)]bool loadDataFromFile, + [CombinatorialValues(-1, 24, 26, 512)]int batchSize) { var ml = new MLContext(1); IDataView dataView; @@ -613,7 +614,7 @@ public void TestSrCnnBatchAnomalyDetector( // Do batch anomaly detection var outputDataView = ml.AnomalyDetection.DetectEntireAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, - threshold: 0.35, batchSize: -1, sensitivity: 90.0, mode); + threshold: 0.35, batchSize: batchSize, sensitivity: 90.0, mode); // Getting the data of the newly created column as an IEnumerable of // SrCnnAnomalyDetection. From ab99eea10917bed4d624c69fd77f3adb028f680c Mon Sep 17 00:00:00 2001 From: Meng Ai Date: Wed, 20 May 2020 21:05:21 +0800 Subject: [PATCH 08/11] Move modeler initializing to CreateBatch and other minor fix. --- .../SrCnnEntireAnomalyDetector.cs | 140 ++++++++++-------- 1 file changed, 76 insertions(+), 64 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs index 3791c8d890..49dacf0147 100644 --- a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs +++ b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs @@ -81,8 +81,10 @@ internal sealed class SrCnnEntireAnomalyDetector : BatchDataViewMapperBase= 0 && sensitivity <= 100, nameof(sensitivity), "Must be in [0,100]."); _outputLength = _outputLengthArray[(int)detectMode]; - _modeler = new SrCnnEntireModeler(threshold, sensitivity, detectMode); + _threshold = threshold; + _sensitivity = sensitivity; + _detectMode = detectMode; _bindings = new Bindings(input.Schema, inputColumnName, outputColumnName, new VectorDataViewType(NumberDataViewType.Double, _outputLength)); } @@ -156,7 +160,7 @@ protected override Delegate[] CreateGetters(DataViewRowCursor input, Batch curre return new[] { currentBatch.CreateGetter(input, _inputColumnName) }; } - protected override Batch CreateBatch(DataViewRowCursor input) => new Batch(_batchSize, _outputLength, _modeler); + protected override Batch CreateBatch(DataViewRowCursor input) => new Batch(_batchSize, _outputLength, _threshold, _sensitivity, _detectMode); protected override Func GetIsNewBatchDelegate(DataViewRowCursor input) { @@ -198,7 +202,7 @@ internal sealed class Batch private int _batchSize; private double[][] _results; - public Batch(int batchSize, int outputLength, SrCnnEntireModeler modeler) + public Batch(int batchSize, int outputLength, double threshold, double sensitivity, SrCnnDetectMode detectMode) { _batchSize = batchSize; _outputLength = outputLength; @@ -212,7 +216,7 @@ public Batch(int batchSize, int outputLength, SrCnnEntireModeler modeler) _previousBatch = new List(batchSize); _batch = new List(batchSize); } - _modeler = modeler; + _modeler = new SrCnnEntireModeler(threshold, sensitivity, detectMode); } public void AddValue(double value) @@ -258,10 +262,7 @@ public ValueGetter> CreateGetter(DataViewRowCursor input, string double src = default; srcGetter(ref src); var result = VBufferEditor.Create(ref dst, _outputLength); - for (int i = 0; i < _outputLength; ++i) - { - result.Values[i] = _results[input.Position % _batchSize][i]; - } + _results[input.Position % _batchSize].CopyTo(result.Values); dst = result.Commit(); }; return getter; @@ -311,7 +312,7 @@ internal sealed class SrCnnEntireModeler private readonly SrCnnDetectMode _detectMode; //used in all modes - private double[] _predictArray; + private readonly double[] _predictArray; private double[] _backAddArray; private double[] _fftRe; private double[] _fftIm; @@ -325,10 +326,13 @@ internal sealed class SrCnnEntireModeler private double[] _ifftMagList; private double[] _cumSumList; private double[] _cumSumShift; + private double[] _zeroArray; + //used in AnomalyAndExpectedValue and AnomalyAndMargin + private double[] _deAnomalyData; //used in AnomalyAndMargin mode private double[] _units; private double[] _val; - private double[] _ans; + private double[] _trends; private double[] _curWindow; public SrCnnEntireModeler(double threshold, double sensitivity, SrCnnDetectMode detectMode) @@ -336,6 +340,7 @@ public SrCnnEntireModeler(double threshold, double sensitivity, SrCnnDetectMode _threshold = threshold; _sensitivity = sensitivity; _detectMode = detectMode; + _predictArray = new double[_lookaheadWindowSize + 1]; } public void Train(double[] values, ref double[][] results) @@ -376,17 +381,34 @@ private void AllocateDoubleArray(ref double[] arr, int length) } } + private void AllocateZeroArray(ref double[] arr, int length) + { + if (arr == null) + { + arr = new double[length]; + } + else if (arr.Length != length) + { + Array.Resize(ref arr, length); + } + for (int i = 0; i < arr.Length; ++i) + { + arr[i] = 0.0; + } + } + private void SpectralResidual(double[] values, double[][] results, double threshold) { // Step 1: Get backadd wave - double[] backAddList = BackAdd(values); + BackAdd(values); // Step 2: FFT transformation - int length = backAddList.Length; + int length = _backAddArray.Length; AllocateDoubleArray(ref _fftRe, length); AllocateDoubleArray(ref _fftIm, length); - FftUtils.ComputeForwardFft(backAddList, Enumerable.Repeat((double)0.0f, length).ToArray(), _fftRe, _fftIm, length); + AllocateZeroArray(ref _zeroArray, length); + FftUtils.ComputeForwardFft(_backAddArray, _zeroArray, _fftRe, _fftIm, length); // Step 3: Calculate mags of FFT AllocateDoubleArray(ref _magList, length); @@ -405,11 +427,11 @@ private void SpectralResidual(double[] values, double[][] results, double thresh } // Step 4: Calculate spectral - double[] filteredLogList = AverageFilter(_magLogList, _averagingWindowSize); + AverageFilter(_magLogList, _averagingWindowSize); AllocateDoubleArray(ref _spectralList, length); for (int i = 0; i < length; ++i) { - _spectralList[i] = Math.Exp(_magLogList[i] - filteredLogList[i]); + _spectralList[i] = Math.Exp(_magLogList[i] - _cumSumList[i]); } // Step 5: IFFT transformation @@ -439,12 +461,12 @@ private void SpectralResidual(double[] values, double[][] results, double thresh { _ifftMagList[i] = Math.Sqrt((Math.Pow(_ifftRe[i], 2) + Math.Pow(_ifftIm[i], 2))); } - double[] filteredIfftMagList = AverageFilter(_ifftMagList, Math.Min(_ifftMagList.Length, _judgementWindowSize)); + AverageFilter(_ifftMagList, Math.Min(_ifftMagList.Length, _judgementWindowSize)); // Step 7: Calculate raw score and set result for (int i = 0; i < results.GetLength(0); ++i) { - var score = CalculateScore(_ifftMagList[i], filteredIfftMagList[i]); + var score = CalculateScore(_ifftMagList[i], _cumSumList[i]); score /= 10.0f; score = Math.Min(score, 1); score = Math.Max(score, 0); @@ -457,9 +479,8 @@ private void SpectralResidual(double[] values, double[][] results, double thresh } } - private double[] BackAdd(double[] data) + private void BackAdd(double[] data) { - AllocateDoubleArray(ref _predictArray, _lookaheadWindowSize + 1); int j = 0; for (int i = data.Length - _lookaheadWindowSize - 2; i < data.Length - 1; ++i) { @@ -475,7 +496,6 @@ private double[] BackAdd(double[] data) { _backAddArray[data.Length + i] = predictedValue; } - return _backAddArray; } private double PredictNext(double[] data) @@ -489,7 +509,7 @@ private double PredictNext(double[] data) return (data[1] + slopeSum); } - private double[] AverageFilter(double[] data, int n) + private void AverageFilter(double[] data, int n) { double cumsum = 0.0f; int length = data.Length; @@ -511,7 +531,6 @@ private double[] AverageFilter(double[] data, int n) { _cumSumList[i] /= (i + 1); } - return _cumSumList; } private double CalculateScore(double mag, double avgMag) @@ -527,33 +546,34 @@ private double CalculateScore(double mag, double avgMag) private void GetExpectedValue(double[] values, double[][] results) { //Step 8: Calculate Expected Value - var exps = CalculateExpectedValueByFft(GetDeanomalyData(values, GetAnomalyIndex(results.Select(x => x[1]).ToArray()))); + GetDeanomalyData(values, GetAnomalyIndex(results.Select(x => x[1]).ToArray())); + CalculateExpectedValueByFft(_deAnomalyData); for (int i = 0; i < results.Length; ++i) { - results[i][3] = exps[i]; + results[i][3] = _ifftRe[i]; } } private void GetMargin(double[] values, double[][] results, double sensitivity) { //Step 8: Calculate Expected Value - var exps = CalculateExpectedValueByFft(GetDeanomalyData(values, GetAnomalyIndex(results.Select(x => x[1]).ToArray()))); + GetDeanomalyData(values, GetAnomalyIndex(results.Select(x => x[1]).ToArray())); + CalculateExpectedValueByFft(_deAnomalyData); //Step 9: Calculate Boundary Unit - var units = CalculateBoundaryUnit(values, results.Select(x => x[0] > 0).ToArray()); - - //Step 10: Calculate UpperBound and LowerBound - var margins = units.Select(x => CalculateMargin(x, sensitivity)).ToList(); + CalculateBoundaryUnit(values, results.Select(x => x[0] > 0).ToArray()); for (int i = 0; i < results.Length; ++i) { - results[i][3] = exps[i]; - results[i][4] = units[i]; - results[i][5] = exps[i] + margins[i]; - results[i][6] = exps[i] - margins[i]; + //Step 10: Calculate UpperBound and LowerBound + var margin = CalculateMargin(_units[i], sensitivity); + results[i][3] = _ifftRe[i]; + results[i][4] = _units[i]; + results[i][5] = _ifftRe[i] + margin; + results[i][6] = _ifftRe[i] - margin; //Step 11: Update Anomaly Score - results[i][1] = CalculateAnomalyScore(values[i], exps[i], units[i], results[i][0] > 0); + results[i][1] = CalculateAnomalyScore(values[i], _ifftRe[i], _units[i], results[i][0] > 0); } } @@ -569,9 +589,10 @@ private int[] GetAnomalyIndex(double[] scores) return anomalyIdxList.ToArray(); } - private double[] GetDeanomalyData(double[] data, int[] anomalyIdxList) + private void GetDeanomalyData(double[] data, int[] anomalyIdxList) { - double[] deAnomalyData = (double[])data.Clone(); + AllocateDoubleArray(ref _deAnomalyData, data.Length); + Array.Copy(data, _deAnomalyData, data.Length); int minPointsToFit = 4; foreach (var idx in anomalyIdxList) { @@ -605,11 +626,9 @@ private double[] GetDeanomalyData(double[] data, int[] anomalyIdxList) if (fitValues.Count > 1) { - deAnomalyData[idx] = CalculateInterpolate(fitValues, idx); + _deAnomalyData[idx] = CalculateInterpolate(fitValues, idx); } } - - return deAnomalyData; } private double CalculateInterpolate(List> values, int idx) @@ -626,12 +645,13 @@ private double CalculateInterpolate(List> values, int idx) return a * (double)idx + b; } - private double[] CalculateExpectedValueByFft(double[] data) + private void CalculateExpectedValueByFft(double[] data) { int length = data.Length; AllocateDoubleArray(ref _fftRe, length); AllocateDoubleArray(ref _fftIm, length); - FftUtils.ComputeForwardFft(data, Enumerable.Repeat((double)0.0f, length).ToArray(), _fftRe, _fftIm, length); + AllocateZeroArray(ref _zeroArray, length); + FftUtils.ComputeForwardFft(data, _zeroArray, _fftRe, _fftIm, length); for (int i = 0; i < length; ++i) { @@ -645,23 +665,21 @@ private double[] CalculateExpectedValueByFft(double[] data) AllocateDoubleArray(ref _ifftRe, length); AllocateDoubleArray(ref _ifftIm, length); FftUtils.ComputeBackwardFft(_fftRe, _fftIm, _ifftRe, _ifftIm, length); - - return _ifftRe; } - private double[] CalculateBoundaryUnit(double[] data, bool[] isAnomalys) + private void CalculateBoundaryUnit(double[] data, bool[] isAnomalys) { int window = Math.Min(data.Length / 3, 512); double trendFraction = 0.5; // mix trend and average of trend double trendSum = 0; int calculationSize = 0; - double[] trends = MedianFilter(data, window, true); - for (int i = 0; i < trends.Length; ++i) + MedianFilter(data, window, true); + for (int i = 0; i < _trends.Length; ++i) { if (!isAnomalys[i]) { - trendSum += Math.Abs(trends[i]); + trendSum += Math.Abs(_trends[i]); ++calculationSize; } } @@ -676,33 +694,29 @@ private double[] CalculateBoundaryUnit(double[] data, bool[] isAnomalys) trendFraction = 1.0; } - AllocateDoubleArray(ref _units, trends.Length); + AllocateDoubleArray(ref _units, _trends.Length); for (int i = 0; i < _units.Length; ++i) { - _units[i] = Math.Max(1, averageTrendPart + Math.Abs(trends[i]) * trendFraction); + _units[i] = Math.Max(1, averageTrendPart + Math.Abs(_trends[i]) * trendFraction); if (double.IsInfinity(_units[i])) { throw new ArithmeticException("Not finite unit value"); } } - - return _units; } - private double[] MedianFilter(double[] data, int window, bool needTwoEnd = false) + private void MedianFilter(double[] data, int window, bool needTwoEnd = false) { int wLen = window / 2 * 2 + 1; int tLen = data.Length; AllocateDoubleArray(ref _val, tLen); Array.Copy(data, _val, tLen); - AllocateDoubleArray(ref _ans, tLen); - Array.Copy(data, _ans, tLen); + AllocateDoubleArray(ref _trends, tLen); + Array.Copy(data, _trends, tLen); AllocateDoubleArray(ref _curWindow, wLen); if (tLen < wLen) - { - return _ans; - } + return; for (int i = 0; i < wLen; i++) { @@ -715,10 +729,10 @@ private double[] MedianFilter(double[] data, int window, bool needTwoEnd = false } _curWindow[addId] = data[i]; if (i >= wLen / 2 && needTwoEnd) - _ans[i - wLen / 2] = SortedMedian(_curWindow, 0, i + 1); + _trends[i - wLen / 2] = SortedMedian(_curWindow, 0, i + 1); } - _ans[window / 2] = SortedMedian(_curWindow, 0, wLen); + _trends[window / 2] = SortedMedian(_curWindow, 0, wLen); for (int i = window / 2 + 1; i < tLen - window / 2; i++) { @@ -737,7 +751,7 @@ private double[] MedianFilter(double[] data, int window, bool needTwoEnd = false index -= 1; } _curWindow[addId] = data[i + window / 2]; - _ans[i] = SortedMedian(_curWindow, 0, wLen); + _trends[i] = SortedMedian(_curWindow, 0, wLen); } if (needTwoEnd) @@ -752,11 +766,9 @@ private double[] MedianFilter(double[] data, int window, bool needTwoEnd = false index += 1; } wLen -= 1; - _ans[i] = SortedMedian(_curWindow, 0, wLen); + _trends[i] = SortedMedian(_curWindow, 0, wLen); } } - - return _ans; } private int BisectRight(double[] arr, int begin, int end, double tar) From df08f45378ed96f42979c48a689dd5ed41cf47ae Mon Sep 17 00:00:00 2001 From: Meng Ai Date: Thu, 21 May 2020 21:22:19 +0800 Subject: [PATCH 09/11] Fix 3 remaining comments --- .../SrCnnEntireAnomalyDetector.cs | 37 ++++++------------- 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs index 49dacf0147..b590659962 100644 --- a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs +++ b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs @@ -4,6 +4,7 @@ using System; using System.Collections.Generic; +using System.IO; using System.Linq; using Microsoft.ML.Data; using Microsoft.ML.Data.DataView; @@ -142,7 +143,6 @@ public SrCnnEntireAnomalyDetector(IHostEnvironment env, IDataView input, string || detectMode == SrCnnDetectMode.AnomalyAndMargin, nameof(detectMode), "Invalid detectMode"); Host.CheckUserArg(sensitivity >= 0 && sensitivity <= 100, nameof(sensitivity), "Must be in [0,100]."); - _outputLength = _outputLengthArray[(int)detectMode]; _threshold = threshold; _sensitivity = sensitivity; @@ -201,6 +201,7 @@ internal sealed class Batch private SrCnnEntireModeler _modeler; private int _batchSize; private double[][] _results; + private int _bLen = 0; public Batch(int batchSize, int outputLength, double threshold, double sensitivity, SrCnnDetectMode detectMode) { @@ -231,13 +232,14 @@ public void Process() _batchSize = _batch.Count; if (_batch.Count < MinBatchSize) { - if (_previousBatch.Count + _batch.Count < MinBatchSize) - return; - var bLen = _previousBatch.Count - _batch.Count; - _previousBatch = _previousBatch.GetRange(_batch.Count, bLen); + if (_previousBatch.Count == 0) + { + throw new InvalidOperationException("The input must contain no less than 12 points."); + } + _bLen = _previousBatch.Count - _batch.Count; + _previousBatch = _previousBatch.GetRange(_batch.Count, _bLen); _previousBatch.AddRange(_batch); _modeler.Train(_previousBatch.ToArray(), ref _results); - _results = _results.Skip(bLen).ToArray(); } else { @@ -251,6 +253,7 @@ public void Reset() _previousBatch = _batch; _batch = tempBatch; _batch.Clear(); + _bLen = 0; } public ValueGetter> CreateGetter(DataViewRowCursor input, string inputCol) @@ -262,7 +265,7 @@ public ValueGetter> CreateGetter(DataViewRowCursor input, string double src = default; srcGetter(ref src); var result = VBufferEditor.Create(ref dst, _outputLength); - _results[input.Position % _batchSize].CopyTo(result.Values); + _results[input.Position % _batchSize + _bLen].CopyTo(result.Values); dst = result.Commit(); }; return getter; @@ -381,22 +384,6 @@ private void AllocateDoubleArray(ref double[] arr, int length) } } - private void AllocateZeroArray(ref double[] arr, int length) - { - if (arr == null) - { - arr = new double[length]; - } - else if (arr.Length != length) - { - Array.Resize(ref arr, length); - } - for (int i = 0; i < arr.Length; ++i) - { - arr[i] = 0.0; - } - } - private void SpectralResidual(double[] values, double[][] results, double threshold) { // Step 1: Get backadd wave @@ -407,7 +394,7 @@ private void SpectralResidual(double[] values, double[][] results, double thresh AllocateDoubleArray(ref _fftRe, length); AllocateDoubleArray(ref _fftIm, length); - AllocateZeroArray(ref _zeroArray, length); + AllocateDoubleArray(ref _zeroArray, length); FftUtils.ComputeForwardFft(_backAddArray, _zeroArray, _fftRe, _fftIm, length); // Step 3: Calculate mags of FFT @@ -650,7 +637,7 @@ private void CalculateExpectedValueByFft(double[] data) int length = data.Length; AllocateDoubleArray(ref _fftRe, length); AllocateDoubleArray(ref _fftIm, length); - AllocateZeroArray(ref _zeroArray, length); + AllocateDoubleArray(ref _zeroArray, length); FftUtils.ComputeForwardFft(data, _zeroArray, _fftRe, _fftIm, length); for (int i = 0; i < length; ++i) From 81ae699f85361c5187d0f85e2ed7327f3168123b Mon Sep 17 00:00:00 2001 From: Klaus Marius Hansen Date: Thu, 21 May 2020 08:53:14 -0700 Subject: [PATCH 10/11] Fixed code analysis issue. --- src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs index b590659962..30a2a6dcc5 100644 --- a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs +++ b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs @@ -201,7 +201,7 @@ internal sealed class Batch private SrCnnEntireModeler _modeler; private int _batchSize; private double[][] _results; - private int _bLen = 0; + private int _bLen; public Batch(int batchSize, int outputLength, double threshold, double sensitivity, SrCnnDetectMode detectMode) { From 3355c4a8b5b3c2bc8d3a251bd8989e7578d3a6b0 Mon Sep 17 00:00:00 2001 From: Klaus Marius Hansen Date: Thu, 21 May 2020 11:02:29 -0700 Subject: [PATCH 11/11] Fixed minor comments --- src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs index 30a2a6dcc5..e146ff1ebf 100644 --- a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs +++ b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs @@ -4,7 +4,6 @@ using System; using System.Collections.Generic; -using System.IO; using System.Linq; using Microsoft.ML.Data; using Microsoft.ML.Data.DataView; @@ -234,7 +233,7 @@ public void Process() { if (_previousBatch.Count == 0) { - throw new InvalidOperationException("The input must contain no less than 12 points."); + throw Contracts.Except("The input must contain no less than 12 points."); } _bLen = _previousBatch.Count - _batch.Count; _previousBatch = _previousBatch.GetRange(_batch.Count, _bLen);