diff --git a/benchmarks/Fastenshtein.Benchmarking/FastenshteinBenchmark.cs b/benchmarks/Fastenshtein.Benchmarking/FastenshteinBenchmark.cs
index 2257334..5fd489f 100644
--- a/benchmarks/Fastenshtein.Benchmarking/FastenshteinBenchmark.cs
+++ b/benchmarks/Fastenshtein.Benchmarking/FastenshteinBenchmark.cs
@@ -31,20 +31,6 @@ public void Fastenshtein()
}
}
- [Benchmark]
- public void Fastenshtein2()
- {
- for (int i = 0; i < words.Length; i++)
- {
- var levenshtein = new global::Fastenshtein.Levenshtein(words[i]);
-
- for (int j = 0; j < words.Length; j++)
- {
- levenshtein.DistanceFrom2(words[j]);
- }
- }
- }
-
[Benchmark]
public void Fastenshtein3()
{
@@ -60,22 +46,7 @@ public void Fastenshtein3()
}
[Benchmark]
- public void Fastenshtein4()
- {
- for (int i = 0; i < words.Length; i++)
- {
- var levenshtein = new global::Fastenshtein.Levenshtein(words[i]);
-
- for (int j = 0; j < words.Length; j++)
- {
- levenshtein.DistanceFrom4(words[j]);
- }
- }
- }
-
-
- [Benchmark]
- public void Fastenshtein5()
+ public void Fastenshtein_Inc()
{
for (int i = 0; i < words.Length; i++)
{
@@ -83,7 +54,7 @@ public void Fastenshtein5()
for (int j = 0; j < words.Length; j++)
{
- levenshtein.DistanceFrom5(words[j]);
+ levenshtein.DistanceFrom_Inc(words[j]);
}
}
}
diff --git a/benchmarks/Fastenshtein.Benchmarking/FastenshteinDisassembly.cs b/benchmarks/Fastenshtein.Benchmarking/FastenshteinDisassembly.cs
index 95f1dac..6167a5b 100644
--- a/benchmarks/Fastenshtein.Benchmarking/FastenshteinDisassembly.cs
+++ b/benchmarks/Fastenshtein.Benchmarking/FastenshteinDisassembly.cs
@@ -15,13 +15,6 @@ public int Fastenshtein()
return levenshtein.DistanceFrom("test");
}
- [Benchmark]
- [SuppressMessage("Performance", "CA1822:Mark members as static", Justification = "Must be an instance method for BenchmarkDotNet")]
- public int Fastenshtein2()
- {
- var levenshtein = new global::Fastenshtein.Levenshtein("test");
- return levenshtein.DistanceFrom2("test");
- }
[Benchmark]
[SuppressMessage("Performance", "CA1822:Mark members as static", Justification = "Must be an instance method for BenchmarkDotNet")]
public int Fastenshtein3()
@@ -29,22 +22,16 @@ public int Fastenshtein3()
var levenshtein = new global::Fastenshtein.Levenshtein("test");
return levenshtein.DistanceFrom3("test");
}
- [Benchmark]
- [SuppressMessage("Performance", "CA1822:Mark members as static", Justification = "Must be an instance method for BenchmarkDotNet")]
- public int Fastenshtein4()
- {
- var levenshtein = new global::Fastenshtein.Levenshtein("test");
- return levenshtein.DistanceFrom4("test");
- }
[Benchmark]
[SuppressMessage("Performance", "CA1822:Mark members as static", Justification = "Must be an instance method for BenchmarkDotNet")]
- public int Fastenshtein5()
+ public int Fastenshtein_Inc()
{
var levenshtein = new global::Fastenshtein.Levenshtein("test");
- return levenshtein.DistanceFrom5("test");
+ return levenshtein.DistanceFrom_Inc("test");
}
+
////[Benchmark(Baseline = true)]
////[SuppressMessage("Performance", "CA1822:Mark members as static", Justification = "Must be an instance method for BenchmarkDotNet")]
////public int Fastenshtein_1_0_0_8()
diff --git a/src/Fastenshtein/AutoCompleteLevenshtein.cs b/src/Fastenshtein/AutoCompleteLevenshtein.cs
index b9046d9..c8a749b 100644
--- a/src/Fastenshtein/AutoCompleteLevenshtein.cs
+++ b/src/Fastenshtein/AutoCompleteLevenshtein.cs
@@ -21,10 +21,12 @@ public static int Distance(string value1, string value2)
int[] costs = new int[value1.Length];
+ int previousCost = 0;
+
// Add indexing for insertion to first row
- for (int i = 0; i < costs.Length;)
+ for (; previousCost < costs.Length;)
{
- costs[i] = ++i;
+ costs[previousCost] = ++previousCost;
}
int minSize = value1.Length < value2.Length ? value1.Length : value2.Length;
@@ -33,7 +35,7 @@ public static int Distance(string value1, string value2)
{
// cost of the first index
int cost = i;
- int previousCost = i;
+ previousCost = i;
// cache value for inner loop to avoid index lookup and bonds checking, profiled this is quicker
char value2Char = value2[i];
diff --git a/src/Fastenshtein/Levenshtein.cs b/src/Fastenshtein/Levenshtein.cs
index 4446b49..505d3ef 100644
--- a/src/Fastenshtein/Levenshtein.cs
+++ b/src/Fastenshtein/Levenshtein.cs
@@ -102,141 +102,207 @@ public int DistanceFrom3(string value)
return costs[costs.Length - 1];
}
+ ///////
+ /////// Compares a value to the stored value.
+ /////// Not thread safe.
+ ///////
+ /////// Difference. 0 complete match.
+ ////public int DistanceFrom2(string value)
+ ////{
+ //// var costs = this.costs;
+ //// ref var refCosts = ref MemoryMarshal.GetArrayDataReference(this.costs);
+
+ //// if (costs.Length == 0)
+ //// {
+ //// return value.Length;
+ //// }
+
+ //// // Add indexing for insertion to first row
+ //// for (int i = 0; i < costs.Length;)
+ //// {
+ //// costs[i] = ++i;
+ //// }
+
+ //// for (int i = 0; i < value.Length; i++)
+ //// {
+ //// // cost of the first index
+ //// int cost = i;
+ //// int previousCost = i;
+
+ //// // cache value for inner loop to avoid index lookup and bonds checking, profiled this is quicker
+ //// char value1Char = value[i];
+
+ //// for (int j = 0; j < this.storedValue.Length; j++)
+ //// {
+ //// int currentCost = cost;
+
+ //// // assigning this here reduces the array reads we do, improvement of the old version
+ //// cost = Unsafe.Add(ref refCosts, j);
+
+ //// if (value1Char != this.storedValue[j])
+ //// {
+ //// if (previousCost < currentCost)
+ //// {
+ //// currentCost = previousCost;
+ //// }
+
+ //// if (cost < currentCost)
+ //// {
+ //// currentCost = cost;
+ //// }
+
+ //// ++currentCost;
+ //// }
+
+ //// /*
+ //// * Improvement on the older versions.
+ //// * Swapping the variables here results in a performance improvement for modern intel CPU’s, but I have no idea why?
+ //// */
+ //// Unsafe.Add(ref refCosts, j) = currentCost;
+ //// previousCost = currentCost;
+ //// }
+ //// }
+
+ //// return costs[costs.Length - 1];
+ ////}
+
+ ///////
+ /////// Compares a value to the stored value.
+ /////// Not thread safe.
+ ///////
+ /////// Difference. 0 complete match.
+ ////public int DistanceFrom4(string value)
+ ////{
+ //// var costs = this.costs;
+
+ //// if (costs.Length == 0)
+ //// {
+ //// return value.Length;
+ //// }
+
+ //// // Add indexing for insertion to first row
+ //// ref var refCosts = ref MemoryMarshal.GetArrayDataReference(costs);
+ //// for (int i = 1; i <= costs.Length; i++)
+ //// {
+ //// refCosts = i;
+ //// refCosts = ref Unsafe.Add(ref refCosts, 1);
+ //// }
+
+ //// for (int i = 0; i < value.Length; i++)
+ //// {
+ //// // cost of the first index
+ //// int cost = i;
+ //// int previousCost = i;
+
+ //// // cache value for inner loop to avoid index lookup and bonds checking, profiled this is quicker
+ //// char value1Char = value[i];
+
+ //// for (int j = 0; j < this.storedValue.Length; j++)
+ //// {
+ //// int currentCost = cost;
+
+ //// // assigning this here reduces the array reads we do, improvement of the old version
+ //// cost = costs[j];
+
+ //// if (value1Char != this.storedValue[j])
+ //// {
+ //// if (previousCost < currentCost)
+ //// {
+ //// currentCost = previousCost;
+ //// }
+
+ //// if (cost < currentCost)
+ //// {
+ //// currentCost = cost;
+ //// }
+
+ //// ++currentCost;
+ //// }
+
+ //// /*
+ //// * Improvement on the older versions.
+ //// * Swapping the variables here results in a performance improvement for modern intel CPU’s, but I have no idea why?
+ //// */
+ //// costs[j] = currentCost;
+ //// previousCost = currentCost;
+ //// }
+ //// }
+
+ //// return costs[costs.Length - 1];
+ ////}
+
+ ///////
+ /////// Compares a value to the stored value.
+ /////// Not thread safe.
+ ///////
+ /////// Difference. 0 complete match.
+ ////public int DistanceFrom5(string value)
+ ////{
+ //// var costs = this.costs;
+ //// var storedValue = this.storedValue;
+
+ //// if (costs.Length == 0 || costs.Length != storedValue.Length)
+ //// {
+ //// return value.Length;
+ //// }
+
+ //// int previousCost = 0;
+
+ //// // Add indexing for insertion to first row
+ //// for (; previousCost < costs.Length;)
+ //// {
+ //// costs[previousCost] = ++previousCost;
+ //// }
+
+ //// for (int i = 0; i < value.Length; i++)
+ //// {
+ //// // cost of the first index
+ //// int cost = i;
+ //// previousCost = i;
+
+ //// // cache value for inner loop to avoid index lookup and bonds checking, profiled this is quicker
+ //// char value1Char = value[i];
+
+ //// for (int j = 0; j < storedValue.Length; j++)
+ //// {
+ //// int currentCost = cost;
+
+ //// // assigning this here reduces the array reads we do, improvement of the old version
+ //// cost = costs[j];
+
+ //// if (value1Char != storedValue[j])
+ //// {
+ //// if (previousCost < currentCost)
+ //// {
+ //// currentCost = previousCost;
+ //// }
+
+ //// if (cost < currentCost)
+ //// {
+ //// currentCost = cost;
+ //// }
+
+ //// ++currentCost;
+ //// }
+
+ //// /*
+ //// * Improvement on the older versions.
+ //// * Swapping the variables here results in a performance improvement for modern intel CPU’s, but I have no idea why?
+ //// */
+ //// costs[j] = currentCost;
+ //// previousCost = currentCost;
+ //// }
+ //// }
+
+ //// return previousCost;
+ ////}
+ ///
///
/// Compares a value to the stored value.
/// Not thread safe.
///
/// Difference. 0 complete match.
- public int DistanceFrom2(string value)
- {
- var costs = this.costs;
- ref var refCosts = ref MemoryMarshal.GetArrayDataReference(this.costs);
-
- if (costs.Length == 0)
- {
- return value.Length;
- }
-
- // Add indexing for insertion to first row
- for (int i = 0; i < costs.Length;)
- {
- costs[i] = ++i;
- }
-
- for (int i = 0; i < value.Length; i++)
- {
- // cost of the first index
- int cost = i;
- int previousCost = i;
-
- // cache value for inner loop to avoid index lookup and bonds checking, profiled this is quicker
- char value1Char = value[i];
-
- for (int j = 0; j < this.storedValue.Length; j++)
- {
- int currentCost = cost;
-
- // assigning this here reduces the array reads we do, improvement of the old version
- cost = Unsafe.Add(ref refCosts, j);
-
- if (value1Char != this.storedValue[j])
- {
- if (previousCost < currentCost)
- {
- currentCost = previousCost;
- }
-
- if (cost < currentCost)
- {
- currentCost = cost;
- }
-
- ++currentCost;
- }
-
- /*
- * Improvement on the older versions.
- * Swapping the variables here results in a performance improvement for modern intel CPU’s, but I have no idea why?
- */
- Unsafe.Add(ref refCosts, j) = currentCost;
- previousCost = currentCost;
- }
- }
-
- return costs[costs.Length - 1];
- }
-
- ///
- /// Compares a value to the stored value.
- /// Not thread safe.
- ///
- /// Difference. 0 complete match.
- public int DistanceFrom4(string value)
- {
- var costs = this.costs;
-
- if (costs.Length == 0)
- {
- return value.Length;
- }
-
- // Add indexing for insertion to first row
- ref var refCosts = ref MemoryMarshal.GetArrayDataReference(costs);
- for (int i = 1; i <= costs.Length; i++)
- {
- refCosts = i;
- refCosts = ref Unsafe.Add(ref refCosts, 1);
- }
-
- for (int i = 0; i < value.Length; i++)
- {
- // cost of the first index
- int cost = i;
- int previousCost = i;
-
- // cache value for inner loop to avoid index lookup and bonds checking, profiled this is quicker
- char value1Char = value[i];
-
- for (int j = 0; j < this.storedValue.Length; j++)
- {
- int currentCost = cost;
-
- // assigning this here reduces the array reads we do, improvement of the old version
- cost = costs[j];
-
- if (value1Char != this.storedValue[j])
- {
- if (previousCost < currentCost)
- {
- currentCost = previousCost;
- }
-
- if (cost < currentCost)
- {
- currentCost = cost;
- }
-
- ++currentCost;
- }
-
- /*
- * Improvement on the older versions.
- * Swapping the variables here results in a performance improvement for modern intel CPU’s, but I have no idea why?
- */
- costs[j] = currentCost;
- previousCost = currentCost;
- }
- }
-
- return costs[costs.Length - 1];
- }
-
- ///
- /// Compares a value to the stored value.
- /// Not thread safe.
- ///
- /// Difference. 0 complete match.
- public int DistanceFrom5(string value)
+ public int DistanceFrom_Inc(string value)
{
var costs = this.costs;
var storedValue = this.storedValue;
@@ -249,9 +315,9 @@ public int DistanceFrom5(string value)
int previousCost = 0;
// Add indexing for insertion to first row
- for (; previousCost < costs.Length;)
+ for (; previousCost < costs.Length; previousCost++)
{
- costs[previousCost] = ++previousCost;
+ costs[previousCost] = previousCost + 1;
}
for (int i = 0; i < value.Length; i++)
@@ -268,7 +334,7 @@ public int DistanceFrom5(string value)
int currentCost = cost;
// assigning this here reduces the array reads we do, improvement of the old version
- cost = costs[j];
+ cost = costs[j];// Unsafe.Add(ref refCosts, j);
if (value1Char != storedValue[j])
{
@@ -306,21 +372,19 @@ public int DistanceFrom(string value)
{
var costs = this.costs;
var storedValue = this.storedValue;
- ref var storedValueRef = ref MemoryMarshal.GetReference(storedValue.AsSpan());
- if (costs.Length == 0)
+ if (costs.Length == 0 || costs.Length != storedValue.Length)
{
return value.Length;
}
int previousCost = 0;
- ref var refCosts = ref MemoryMarshal.GetArrayDataReference(costs);
+
+ // Add indexing for insertion to first row
for (; previousCost < costs.Length;)
{
- refCosts = ++previousCost;
- refCosts = ref Unsafe.Add(ref refCosts, 1);
+ costs[previousCost] = ++previousCost;
}
- refCosts = ref MemoryMarshal.GetArrayDataReference(costs);
for (int i = 0; i < value.Length; i++)
{
@@ -336,9 +400,9 @@ public int DistanceFrom(string value)
int currentCost = cost;
// assigning this here reduces the array reads we do, improvement of the old version
- cost = Unsafe.Add(ref refCosts, j);
+ cost = costs[j];// Unsafe.Add(ref refCosts, j);
- if (value1Char != Unsafe.Add(ref storedValueRef, j))
+ if (value1Char != storedValue[j])
{
if (previousCost < currentCost)
{
@@ -357,7 +421,7 @@ public int DistanceFrom(string value)
* Improvement on the older versions.
* Swapping the variables here results in a performance improvement for modern intel CPU’s, but I have no idea why?
*/
- Unsafe.Add(ref refCosts, j) = currentCost;
+ costs[j] = currentCost;
previousCost = currentCost;
}
}
diff --git a/src/Fastenshtein/StaticLevenshtein.cs b/src/Fastenshtein/StaticLevenshtein.cs
index deaf457..cde4389 100644
--- a/src/Fastenshtein/StaticLevenshtein.cs
+++ b/src/Fastenshtein/StaticLevenshtein.cs
@@ -20,17 +20,19 @@ public static int Distance(string value1, string value2)
int[] costs = new int[value2.Length];
+ int previousCost = 0;
+
// Add indexing for insertion to first row
- for (int i = 0; i < costs.Length;)
+ for (; previousCost < costs.Length;)
{
- costs[i] = ++i;
+ costs[previousCost] = ++previousCost;
}
for (int i = 0; i < value1.Length; i++)
{
// cost of the first index
int cost = i;
- int previousCost = i;
+ previousCost = i;
// cache value for inner loop to avoid index lookup and bonds checking, profiled this is quicker
char value1Char = value1[i];
@@ -66,7 +68,7 @@ public static int Distance(string value1, string value2)
}
}
- return costs[costs.Length - 1];
+ return previousCost;
}
}
}