diff --git a/benchmarks/Fastenshtein.Benchmarking/FastenshteinBenchmark.cs b/benchmarks/Fastenshtein.Benchmarking/FastenshteinBenchmark.cs index 2257334..5fd489f 100644 --- a/benchmarks/Fastenshtein.Benchmarking/FastenshteinBenchmark.cs +++ b/benchmarks/Fastenshtein.Benchmarking/FastenshteinBenchmark.cs @@ -31,20 +31,6 @@ public void Fastenshtein() } } - [Benchmark] - public void Fastenshtein2() - { - for (int i = 0; i < words.Length; i++) - { - var levenshtein = new global::Fastenshtein.Levenshtein(words[i]); - - for (int j = 0; j < words.Length; j++) - { - levenshtein.DistanceFrom2(words[j]); - } - } - } - [Benchmark] public void Fastenshtein3() { @@ -60,22 +46,7 @@ public void Fastenshtein3() } [Benchmark] - public void Fastenshtein4() - { - for (int i = 0; i < words.Length; i++) - { - var levenshtein = new global::Fastenshtein.Levenshtein(words[i]); - - for (int j = 0; j < words.Length; j++) - { - levenshtein.DistanceFrom4(words[j]); - } - } - } - - - [Benchmark] - public void Fastenshtein5() + public void Fastenshtein_Inc() { for (int i = 0; i < words.Length; i++) { @@ -83,7 +54,7 @@ public void Fastenshtein5() for (int j = 0; j < words.Length; j++) { - levenshtein.DistanceFrom5(words[j]); + levenshtein.DistanceFrom_Inc(words[j]); } } } diff --git a/benchmarks/Fastenshtein.Benchmarking/FastenshteinDisassembly.cs b/benchmarks/Fastenshtein.Benchmarking/FastenshteinDisassembly.cs index 95f1dac..6167a5b 100644 --- a/benchmarks/Fastenshtein.Benchmarking/FastenshteinDisassembly.cs +++ b/benchmarks/Fastenshtein.Benchmarking/FastenshteinDisassembly.cs @@ -15,13 +15,6 @@ public int Fastenshtein() return levenshtein.DistanceFrom("test"); } - [Benchmark] - [SuppressMessage("Performance", "CA1822:Mark members as static", Justification = "Must be an instance method for BenchmarkDotNet")] - public int Fastenshtein2() - { - var levenshtein = new global::Fastenshtein.Levenshtein("test"); - return levenshtein.DistanceFrom2("test"); - } [Benchmark] [SuppressMessage("Performance", "CA1822:Mark members as static", Justification = "Must be an instance method for BenchmarkDotNet")] public int Fastenshtein3() @@ -29,22 +22,16 @@ public int Fastenshtein3() var levenshtein = new global::Fastenshtein.Levenshtein("test"); return levenshtein.DistanceFrom3("test"); } - [Benchmark] - [SuppressMessage("Performance", "CA1822:Mark members as static", Justification = "Must be an instance method for BenchmarkDotNet")] - public int Fastenshtein4() - { - var levenshtein = new global::Fastenshtein.Levenshtein("test"); - return levenshtein.DistanceFrom4("test"); - } [Benchmark] [SuppressMessage("Performance", "CA1822:Mark members as static", Justification = "Must be an instance method for BenchmarkDotNet")] - public int Fastenshtein5() + public int Fastenshtein_Inc() { var levenshtein = new global::Fastenshtein.Levenshtein("test"); - return levenshtein.DistanceFrom5("test"); + return levenshtein.DistanceFrom_Inc("test"); } + ////[Benchmark(Baseline = true)] ////[SuppressMessage("Performance", "CA1822:Mark members as static", Justification = "Must be an instance method for BenchmarkDotNet")] ////public int Fastenshtein_1_0_0_8() diff --git a/src/Fastenshtein/AutoCompleteLevenshtein.cs b/src/Fastenshtein/AutoCompleteLevenshtein.cs index b9046d9..c8a749b 100644 --- a/src/Fastenshtein/AutoCompleteLevenshtein.cs +++ b/src/Fastenshtein/AutoCompleteLevenshtein.cs @@ -21,10 +21,12 @@ public static int Distance(string value1, string value2) int[] costs = new int[value1.Length]; + int previousCost = 0; + // Add indexing for insertion to first row - for (int i = 0; i < costs.Length;) + for (; previousCost < costs.Length;) { - costs[i] = ++i; + costs[previousCost] = ++previousCost; } int minSize = value1.Length < value2.Length ? value1.Length : value2.Length; @@ -33,7 +35,7 @@ public static int Distance(string value1, string value2) { // cost of the first index int cost = i; - int previousCost = i; + previousCost = i; // cache value for inner loop to avoid index lookup and bonds checking, profiled this is quicker char value2Char = value2[i]; diff --git a/src/Fastenshtein/Levenshtein.cs b/src/Fastenshtein/Levenshtein.cs index 4446b49..505d3ef 100644 --- a/src/Fastenshtein/Levenshtein.cs +++ b/src/Fastenshtein/Levenshtein.cs @@ -102,141 +102,207 @@ public int DistanceFrom3(string value) return costs[costs.Length - 1]; } + /////// + /////// Compares a value to the stored value. + /////// Not thread safe. + /////// + /////// Difference. 0 complete match. + ////public int DistanceFrom2(string value) + ////{ + //// var costs = this.costs; + //// ref var refCosts = ref MemoryMarshal.GetArrayDataReference(this.costs); + + //// if (costs.Length == 0) + //// { + //// return value.Length; + //// } + + //// // Add indexing for insertion to first row + //// for (int i = 0; i < costs.Length;) + //// { + //// costs[i] = ++i; + //// } + + //// for (int i = 0; i < value.Length; i++) + //// { + //// // cost of the first index + //// int cost = i; + //// int previousCost = i; + + //// // cache value for inner loop to avoid index lookup and bonds checking, profiled this is quicker + //// char value1Char = value[i]; + + //// for (int j = 0; j < this.storedValue.Length; j++) + //// { + //// int currentCost = cost; + + //// // assigning this here reduces the array reads we do, improvement of the old version + //// cost = Unsafe.Add(ref refCosts, j); + + //// if (value1Char != this.storedValue[j]) + //// { + //// if (previousCost < currentCost) + //// { + //// currentCost = previousCost; + //// } + + //// if (cost < currentCost) + //// { + //// currentCost = cost; + //// } + + //// ++currentCost; + //// } + + //// /* + //// * Improvement on the older versions. + //// * Swapping the variables here results in a performance improvement for modern intel CPU’s, but I have no idea why? + //// */ + //// Unsafe.Add(ref refCosts, j) = currentCost; + //// previousCost = currentCost; + //// } + //// } + + //// return costs[costs.Length - 1]; + ////} + + /////// + /////// Compares a value to the stored value. + /////// Not thread safe. + /////// + /////// Difference. 0 complete match. + ////public int DistanceFrom4(string value) + ////{ + //// var costs = this.costs; + + //// if (costs.Length == 0) + //// { + //// return value.Length; + //// } + + //// // Add indexing for insertion to first row + //// ref var refCosts = ref MemoryMarshal.GetArrayDataReference(costs); + //// for (int i = 1; i <= costs.Length; i++) + //// { + //// refCosts = i; + //// refCosts = ref Unsafe.Add(ref refCosts, 1); + //// } + + //// for (int i = 0; i < value.Length; i++) + //// { + //// // cost of the first index + //// int cost = i; + //// int previousCost = i; + + //// // cache value for inner loop to avoid index lookup and bonds checking, profiled this is quicker + //// char value1Char = value[i]; + + //// for (int j = 0; j < this.storedValue.Length; j++) + //// { + //// int currentCost = cost; + + //// // assigning this here reduces the array reads we do, improvement of the old version + //// cost = costs[j]; + + //// if (value1Char != this.storedValue[j]) + //// { + //// if (previousCost < currentCost) + //// { + //// currentCost = previousCost; + //// } + + //// if (cost < currentCost) + //// { + //// currentCost = cost; + //// } + + //// ++currentCost; + //// } + + //// /* + //// * Improvement on the older versions. + //// * Swapping the variables here results in a performance improvement for modern intel CPU’s, but I have no idea why? + //// */ + //// costs[j] = currentCost; + //// previousCost = currentCost; + //// } + //// } + + //// return costs[costs.Length - 1]; + ////} + + /////// + /////// Compares a value to the stored value. + /////// Not thread safe. + /////// + /////// Difference. 0 complete match. + ////public int DistanceFrom5(string value) + ////{ + //// var costs = this.costs; + //// var storedValue = this.storedValue; + + //// if (costs.Length == 0 || costs.Length != storedValue.Length) + //// { + //// return value.Length; + //// } + + //// int previousCost = 0; + + //// // Add indexing for insertion to first row + //// for (; previousCost < costs.Length;) + //// { + //// costs[previousCost] = ++previousCost; + //// } + + //// for (int i = 0; i < value.Length; i++) + //// { + //// // cost of the first index + //// int cost = i; + //// previousCost = i; + + //// // cache value for inner loop to avoid index lookup and bonds checking, profiled this is quicker + //// char value1Char = value[i]; + + //// for (int j = 0; j < storedValue.Length; j++) + //// { + //// int currentCost = cost; + + //// // assigning this here reduces the array reads we do, improvement of the old version + //// cost = costs[j]; + + //// if (value1Char != storedValue[j]) + //// { + //// if (previousCost < currentCost) + //// { + //// currentCost = previousCost; + //// } + + //// if (cost < currentCost) + //// { + //// currentCost = cost; + //// } + + //// ++currentCost; + //// } + + //// /* + //// * Improvement on the older versions. + //// * Swapping the variables here results in a performance improvement for modern intel CPU’s, but I have no idea why? + //// */ + //// costs[j] = currentCost; + //// previousCost = currentCost; + //// } + //// } + + //// return previousCost; + ////} + /// /// /// Compares a value to the stored value. /// Not thread safe. /// /// Difference. 0 complete match. - public int DistanceFrom2(string value) - { - var costs = this.costs; - ref var refCosts = ref MemoryMarshal.GetArrayDataReference(this.costs); - - if (costs.Length == 0) - { - return value.Length; - } - - // Add indexing for insertion to first row - for (int i = 0; i < costs.Length;) - { - costs[i] = ++i; - } - - for (int i = 0; i < value.Length; i++) - { - // cost of the first index - int cost = i; - int previousCost = i; - - // cache value for inner loop to avoid index lookup and bonds checking, profiled this is quicker - char value1Char = value[i]; - - for (int j = 0; j < this.storedValue.Length; j++) - { - int currentCost = cost; - - // assigning this here reduces the array reads we do, improvement of the old version - cost = Unsafe.Add(ref refCosts, j); - - if (value1Char != this.storedValue[j]) - { - if (previousCost < currentCost) - { - currentCost = previousCost; - } - - if (cost < currentCost) - { - currentCost = cost; - } - - ++currentCost; - } - - /* - * Improvement on the older versions. - * Swapping the variables here results in a performance improvement for modern intel CPU’s, but I have no idea why? - */ - Unsafe.Add(ref refCosts, j) = currentCost; - previousCost = currentCost; - } - } - - return costs[costs.Length - 1]; - } - - /// - /// Compares a value to the stored value. - /// Not thread safe. - /// - /// Difference. 0 complete match. - public int DistanceFrom4(string value) - { - var costs = this.costs; - - if (costs.Length == 0) - { - return value.Length; - } - - // Add indexing for insertion to first row - ref var refCosts = ref MemoryMarshal.GetArrayDataReference(costs); - for (int i = 1; i <= costs.Length; i++) - { - refCosts = i; - refCosts = ref Unsafe.Add(ref refCosts, 1); - } - - for (int i = 0; i < value.Length; i++) - { - // cost of the first index - int cost = i; - int previousCost = i; - - // cache value for inner loop to avoid index lookup and bonds checking, profiled this is quicker - char value1Char = value[i]; - - for (int j = 0; j < this.storedValue.Length; j++) - { - int currentCost = cost; - - // assigning this here reduces the array reads we do, improvement of the old version - cost = costs[j]; - - if (value1Char != this.storedValue[j]) - { - if (previousCost < currentCost) - { - currentCost = previousCost; - } - - if (cost < currentCost) - { - currentCost = cost; - } - - ++currentCost; - } - - /* - * Improvement on the older versions. - * Swapping the variables here results in a performance improvement for modern intel CPU’s, but I have no idea why? - */ - costs[j] = currentCost; - previousCost = currentCost; - } - } - - return costs[costs.Length - 1]; - } - - /// - /// Compares a value to the stored value. - /// Not thread safe. - /// - /// Difference. 0 complete match. - public int DistanceFrom5(string value) + public int DistanceFrom_Inc(string value) { var costs = this.costs; var storedValue = this.storedValue; @@ -249,9 +315,9 @@ public int DistanceFrom5(string value) int previousCost = 0; // Add indexing for insertion to first row - for (; previousCost < costs.Length;) + for (; previousCost < costs.Length; previousCost++) { - costs[previousCost] = ++previousCost; + costs[previousCost] = previousCost + 1; } for (int i = 0; i < value.Length; i++) @@ -268,7 +334,7 @@ public int DistanceFrom5(string value) int currentCost = cost; // assigning this here reduces the array reads we do, improvement of the old version - cost = costs[j]; + cost = costs[j];// Unsafe.Add(ref refCosts, j); if (value1Char != storedValue[j]) { @@ -306,21 +372,19 @@ public int DistanceFrom(string value) { var costs = this.costs; var storedValue = this.storedValue; - ref var storedValueRef = ref MemoryMarshal.GetReference(storedValue.AsSpan()); - if (costs.Length == 0) + if (costs.Length == 0 || costs.Length != storedValue.Length) { return value.Length; } int previousCost = 0; - ref var refCosts = ref MemoryMarshal.GetArrayDataReference(costs); + + // Add indexing for insertion to first row for (; previousCost < costs.Length;) { - refCosts = ++previousCost; - refCosts = ref Unsafe.Add(ref refCosts, 1); + costs[previousCost] = ++previousCost; } - refCosts = ref MemoryMarshal.GetArrayDataReference(costs); for (int i = 0; i < value.Length; i++) { @@ -336,9 +400,9 @@ public int DistanceFrom(string value) int currentCost = cost; // assigning this here reduces the array reads we do, improvement of the old version - cost = Unsafe.Add(ref refCosts, j); + cost = costs[j];// Unsafe.Add(ref refCosts, j); - if (value1Char != Unsafe.Add(ref storedValueRef, j)) + if (value1Char != storedValue[j]) { if (previousCost < currentCost) { @@ -357,7 +421,7 @@ public int DistanceFrom(string value) * Improvement on the older versions. * Swapping the variables here results in a performance improvement for modern intel CPU’s, but I have no idea why? */ - Unsafe.Add(ref refCosts, j) = currentCost; + costs[j] = currentCost; previousCost = currentCost; } } diff --git a/src/Fastenshtein/StaticLevenshtein.cs b/src/Fastenshtein/StaticLevenshtein.cs index deaf457..cde4389 100644 --- a/src/Fastenshtein/StaticLevenshtein.cs +++ b/src/Fastenshtein/StaticLevenshtein.cs @@ -20,17 +20,19 @@ public static int Distance(string value1, string value2) int[] costs = new int[value2.Length]; + int previousCost = 0; + // Add indexing for insertion to first row - for (int i = 0; i < costs.Length;) + for (; previousCost < costs.Length;) { - costs[i] = ++i; + costs[previousCost] = ++previousCost; } for (int i = 0; i < value1.Length; i++) { // cost of the first index int cost = i; - int previousCost = i; + previousCost = i; // cache value for inner loop to avoid index lookup and bonds checking, profiled this is quicker char value1Char = value1[i]; @@ -66,7 +68,7 @@ public static int Distance(string value1, string value2) } } - return costs[costs.Length - 1]; + return previousCost; } } }