From 9e564d230231396e890daef10373403bf3efd0c3 Mon Sep 17 00:00:00 2001 From: Dan H Date: Sat, 18 May 2024 21:33:56 +0100 Subject: [PATCH] Updating readme with newer competitive benchmark results, updating Benchmarks, bumping version number, ReleaseNotes, including docs in nuget package. Remove old Fastenshtein versions from competitive benchmark Updating Benchmarks Altering message Project file upgrades, fixing License link, including docs, version bump --- README.md | 15 +- .../Fastenshtein.Benchmarking/Benchmarks.cs | 91 ++++--- .../CompetitiveMultiThreadBenchmark.cs | 185 +++++++------- .../CompetitiveSingleThreadBenchmark.cs | 163 ++++++------- .../FastenshteinBenchmark.cs | 88 ++++--- .../FastenshteinDisassembly.cs | 33 ++- .../FastenshteinOld/Fastenshtein_1_0_0_8.cs | 229 +++++++++--------- .../Fastenshtein.Benchmarking/Program.cs | 66 +++-- .../Fastenshtein.Benchmarking/RandomWords.cs | 43 ++-- src/Fastenshtein/Fastenshtein.csproj | 11 +- src/NuGet/Fastenshtein.dll.nuspec | 37 --- src/NuGet/Package.bat | 13 - 12 files changed, 433 insertions(+), 541 deletions(-) delete mode 100644 src/NuGet/Fastenshtein.dll.nuspec delete mode 100644 src/NuGet/Package.bat diff --git a/README.md b/README.md index ef823e5..f90260b 100644 --- a/README.md +++ b/README.md @@ -7,14 +7,13 @@ Fastenshtein is an optimized and fully unit tested Levenshtein implementation. I From the included brenchmarking tests comparing random words of 3 to 20 random chars to other Nuget Levenshtein implementations. -| Method | Mean | StdDev | Scaled | Scaled-StdDev | Gen 0 | Allocated | -|---------------------- |------------ |---------- |------- |-------------- |---------- |---------- | -| Fastenshtein | 16.2006 ms | 0.0069 ms | 1.00 | 0.00 | - | 20.48 kB | -| FastenshteinStatic | 17.2029 ms | 0.0234 ms | 1.06 | 0.00 | - | 2.81 MB | -| StringSimilarity | 24.1955 ms | 0.0280 ms | 1.49 | 0.00 | 329.1667 | 5.87 MB | -| NinjaNye | 35.9226 ms | 0.0152 ms | 2.22 | 0.00 | 6337.5000 | 44.21 MB | -| TNXStringManipulation | 45.4600 ms | 0.0065 ms | 2.81 | 0.00 | 3329.1667 | 24.63 MB | -| MinimumEditDistance | 207.9967 ms | 0.0893 ms | 12.84 | 0.01 | 3404.1667 | 25.59 MB | +| Method | Mean | Ratio | Rank | Gen0 | Allocated | Alloc Ratio | +|------------------------ |---------:|------:|-----:|---------:|-----------:|------------:| +| Fastenshtein | 1.077 ms | 1.00 | 1 | - | 6345 B | 1.000 | +| FastenshteinStatic | 1.122 ms | 1.04 | 2 | 3.9063 | 265441 B | 41.835 | +| NinjaNye | 1.899 ms | 1.76 | 4 | 76.1719 | 4274593 B | 673.695 | +| StringSimilarity | 2.899 ms | 2.69 | 5 | 7.8125 | 543770 B | 85.701 | +| FuzzyStringsNetStandard | 7.351 ms | 6.81 | 6 | 414.0625 | 22967283 B | 3,619.745 | ## Usage diff --git a/benchmarks/Fastenshtein.Benchmarking/Benchmarks.cs b/benchmarks/Fastenshtein.Benchmarking/Benchmarks.cs index a9f3d6d..3530772 100644 --- a/benchmarks/Fastenshtein.Benchmarking/Benchmarks.cs +++ b/benchmarks/Fastenshtein.Benchmarking/Benchmarks.cs @@ -1,47 +1,46 @@ -namespace Fastenshtein.Benchmarking -{ - public class BenchmarkSmallWordsSingleThread : FastenshteinBenchmark - { - protected override string[] CreateTestData() => RandomWords.Create(90, 5); - } - - public class BenchmarkNormalWordsSingleThread : FastenshteinBenchmark - { - protected override string[] CreateTestData() => RandomWords.Create(60, 20); - } - - public class BenchmarkLargeWordsSingleThread : FastenshteinBenchmark - { - protected override string[] CreateTestData() => RandomWords.Create(10, 400); - } - - public class CompetitiveBenchmarkSmallWordsSingleThread : CompetitiveSingleThreadBenchmark - { - protected override string[] CreateTestData() => RandomWords.Create(90, 5); - } - - public class CompetitiveBenchmarkNormalWordsSingleThread : CompetitiveSingleThreadBenchmark - { - protected override string[] CreateTestData() => RandomWords.Create(60, 20); - } - - public class CompetitiveBenchmarkLargeWordsSingleThread : CompetitiveSingleThreadBenchmark - { - protected override string[] CreateTestData() => RandomWords.Create(20, 400); - } - - public class CompetitiveBenchmarkSmallWordsMultiThread : CompetitiveMultiThreadBenchmark - { - protected override string[] CreateTestData() => RandomWords.Create(100, 5); - } - - public class CompetitiveBenchmarkNormalWordsMultiThread : CompetitiveMultiThreadBenchmark - { - protected override string[] CreateTestData() => RandomWords.Create(90, 20); - } - - public class CompetitiveBenchmarkLargeWordsMultiThread : CompetitiveMultiThreadBenchmark - { - protected override string[] CreateTestData() => RandomWords.Create(50, 400); - } +namespace Fastenshtein.Benchmarking; + +public class BenchmarkSmallWordsSingleThread : FastenshteinBenchmark +{ + protected override string[] CreateTestData() => RandomWords.Create(90, 5); +} + +public class BenchmarkNormalWordsSingleThread : FastenshteinBenchmark +{ + protected override string[] CreateTestData() => RandomWords.Create(60, 20); +} + +public class BenchmarkLargeWordsSingleThread : FastenshteinBenchmark +{ + protected override string[] CreateTestData() => RandomWords.Create(10, 400); +} + +public class CompetitiveBenchmarkSmallWordsSingleThread : CompetitiveSingleThreadBenchmark +{ + protected override string[] CreateTestData() => RandomWords.Create(90, 5); +} + +public class CompetitiveBenchmarkNormalWordsSingleThread : CompetitiveSingleThreadBenchmark +{ + protected override string[] CreateTestData() => RandomWords.Create(60, 20); +} + +public class CompetitiveBenchmarkLargeWordsSingleThread : CompetitiveSingleThreadBenchmark +{ + protected override string[] CreateTestData() => RandomWords.Create(20, 400); +} + +public class CompetitiveBenchmarkSmallWordsMultiThread : CompetitiveMultiThreadBenchmark +{ + protected override string[] CreateTestData() => RandomWords.Create(100, 5); +} + +public class CompetitiveBenchmarkNormalWordsMultiThread : CompetitiveMultiThreadBenchmark +{ + protected override string[] CreateTestData() => RandomWords.Create(90, 20); +} + +public class CompetitiveBenchmarkLargeWordsMultiThread : CompetitiveMultiThreadBenchmark +{ + protected override string[] CreateTestData() => RandomWords.Create(50, 400); } diff --git a/benchmarks/Fastenshtein.Benchmarking/CompetitiveMultiThreadBenchmark.cs b/benchmarks/Fastenshtein.Benchmarking/CompetitiveMultiThreadBenchmark.cs index 8946530..89f1951 100644 --- a/benchmarks/Fastenshtein.Benchmarking/CompetitiveMultiThreadBenchmark.cs +++ b/benchmarks/Fastenshtein.Benchmarking/CompetitiveMultiThreadBenchmark.cs @@ -1,128 +1,105 @@ -namespace Fastenshtein.Benchmarking -{ - using BenchmarkDotNet.Attributes; - using System.Threading.Tasks; +namespace Fastenshtein.Benchmarking; - [RankColumn] - public abstract class CompetitiveMultiThreadBenchmark - { - protected string[] words; +using BenchmarkDotNet.Attributes; +using System.Threading.Tasks; - protected abstract string[] CreateTestData(); +[RankColumn] +public abstract class CompetitiveMultiThreadBenchmark +{ + private string[] _words; - [GlobalSetup] - public void SetUp() - { - this.words = this.CreateTestData(); - } + protected abstract string[] CreateTestData(); - /* - * To add your own Levenshtein to the benchmarking alter the below code. - * Replace YourLevenshtein with your method. - */ - ////[Benchmark] - ////public void YourLevenshtein() - ////{ - //// Parallel.For(0, words.Length, i => - //// { - //// for (int j = 0; j < words.Length; j++) - //// { - //// YourLevenshtein(words[i], words[j]); - //// } - //// }); - ////} + [GlobalSetup] + public void SetUp() + => _words = CreateTestData(); - [Benchmark] - public void Fastenshtein() - { - Parallel.For(0, words.Length, i => - { - var levenshtein = new global::Fastenshtein.Levenshtein(words[i]); + /* + * To add your own Levenshtein to the benchmarking alter the below code. + * Replace YourLevenshtein with your method. + */ + ////[Benchmark] + ////public void YourLevenshtein() + ////{ + //// var words = _words; + //// Parallel.For(0, words.Length, i => + //// { + //// for (int j = 0; j < words.Length; j++) + //// { + //// YourLevenshtein(words[i], words[j]); + //// } + //// }); + ////} - for (int j = 0; j < words.Length; j++) - { - levenshtein.DistanceFrom(words[j]); - } - }); - } - - [Benchmark] - public void FastenshteinStatic() + [Benchmark(Baseline = true)] + public void Fastenshtein() + { + var words = _words; + Parallel.For(0, words.Length, i => { - Parallel.For(0, words.Length, i => - { - for (int j = 0; j < words.Length; j++) - { - global::Fastenshtein.Levenshtein.Distance(words[i], words[j]); - } - }); - } + var levenshtein = new global::Fastenshtein.Levenshtein(words[i]); - [Benchmark(Baseline = true)] - public void Fastenshtein_1_0_0_8() - { - Parallel.For(0, words.Length, i => + for (int j = 0; j < words.Length; j++) { - var levenshtein = new global::Fastenshtein.Benchmarking.FastenshteinOld.Fastenshtein_1_0_0_8(words[i]); - - for (int j = 0; j < words.Length; j++) - { - levenshtein.DistanceFrom(words[j]); - } - }); - } + levenshtein.DistanceFrom(words[j]); + } + }); + } - [Benchmark] - public void FastenshteinStatic_1_0_0_8() + [Benchmark] + public void FastenshteinStatic() + { + var words = _words; + Parallel.For(0, words.Length, i => { - Parallel.For(0, words.Length, i => + for (int j = 0; j < words.Length; j++) { - for (int j = 0; j < words.Length; j++) - { - global::Fastenshtein.Benchmarking.FastenshteinOld.Fastenshtein_1_0_0_8.Distance(words[i], words[j]); - } - }); - } + global::Fastenshtein.Levenshtein.Distance(words[i], words[j]); + } + }); + } - [Benchmark] - public void StringSimilarity() - { - // I've read the source code it is thread safe - var lev = new global::F23.StringSimilarity.Levenshtein(); + [Benchmark] + public void StringSimilarity() + { + // I've read the source code it is thread safe + var lev = new global::F23.StringSimilarity.Levenshtein(); - Parallel.For(0, words.Length, i => + var words = _words; + Parallel.For(0, words.Length, i => + { + for (int j = 0; j < words.Length; j++) { - for (int j = 0; j < words.Length; j++) - { - // why does it return a double ?? - lev.Distance(words[i], words[j]); - } - }); - } + // why does it return a double ?? + lev.Distance(words[i], words[j]); + } + }); + } - [Benchmark] - public void NinjaNye() + [Benchmark] + public void NinjaNye() + { + var words = _words; + Parallel.For(0, words.Length, i => { - Parallel.For(0, words.Length, i => + for (int j = 0; j < words.Length; j++) { - for (int j = 0; j < words.Length; j++) - { - global::NinjaNye.SearchExtensions.Levenshtein.LevenshteinProcessor.LevenshteinDistance(words[i], words[j]); - } - }); - } + global::NinjaNye.SearchExtensions.Levenshtein.LevenshteinProcessor.LevenshteinDistance(words[i], words[j]); + } + }); + } - [Benchmark] - public void FuzzyStringsNetStandard() + [Benchmark] + public void FuzzyStringsNetStandard() + { + var words = _words; + Parallel.For(0, words.Length, i => { - Parallel.For(0, words.Length, i => + for (int j = 0; j < words.Length; j++) { - for (int j = 0; j < words.Length; j++) - { - global::DuoVia.FuzzyStrings.LevenshteinDistanceExtensions.LevenshteinDistance(words[i], words[j], true); - } - }); - } + global::DuoVia.FuzzyStrings.LevenshteinDistanceExtensions.LevenshteinDistance(words[i], words[j], true); + } + }); } } \ No newline at end of file diff --git a/benchmarks/Fastenshtein.Benchmarking/CompetitiveSingleThreadBenchmark.cs b/benchmarks/Fastenshtein.Benchmarking/CompetitiveSingleThreadBenchmark.cs index 53c6303..6d2a8ff 100644 --- a/benchmarks/Fastenshtein.Benchmarking/CompetitiveSingleThreadBenchmark.cs +++ b/benchmarks/Fastenshtein.Benchmarking/CompetitiveSingleThreadBenchmark.cs @@ -1,125 +1,102 @@ -namespace Fastenshtein.Benchmarking -{ - using BenchmarkDotNet.Attributes; +namespace Fastenshtein.Benchmarking; - [RankColumn] - public abstract class CompetitiveSingleThreadBenchmark - { - protected string[] words; +using BenchmarkDotNet.Attributes; - protected abstract string[] CreateTestData(); +[RankColumn, MemoryDiagnoser] +public abstract class CompetitiveSingleThreadBenchmark +{ + private string[] _words; - [GlobalSetup] - public void SetUp() - { - this.words = this.CreateTestData(); - } + protected abstract string[] CreateTestData(); - /* - * To add your own Levenshtein to the benchmarking alter the below code. - * Replace YourLevenshtein with your method. - */ - ////[Benchmark] - ////public void YourLevenshtein() - ////{ - //// for (int i = 0; i < words.Length; i++) - //// { - //// for (int j = 0; j < words.Length; j++) - //// { - //// YourLevenshtein(words[i], words[j]); - //// } - //// } - ////} + [GlobalSetup] + public void SetUp() + => _words = CreateTestData(); - [Benchmark] - public void Fastenshtein() - { - for (int i = 0; i < words.Length; i++) - { - var levenshtein = new global::Fastenshtein.Levenshtein(words[i]); + /* + * To add your own Levenshtein to the benchmarking alter the below code. + * Replace YourLevenshtein with your method. + */ + ////[Benchmark] + ////public void YourLevenshtein() + ////{ + //// var words = _words; + //// for (int i = 0; i < words.Length; i++) + //// { + //// for (int j = 0; j < words.Length; j++) + //// { + //// YourLevenshtein(words[i], words[j]); + //// } + //// } + ////} - for (int j = 0; j < words.Length; j++) - { - levenshtein.DistanceFrom(words[j]); - } - } - } - - [Benchmark] - public void FastenshteinStatic() + [Benchmark(Baseline = true)] + public void Fastenshtein() + { + var words = _words; + for (int i = 0; i < words.Length; i++) { - for (int i = 0; i < words.Length; i++) - { - for (int j = 0; j < words.Length; j++) - { - global::Fastenshtein.Levenshtein.Distance(words[i], words[j]); - } - } - } + var levenshtein = new global::Fastenshtein.Levenshtein(words[i]); - [Benchmark(Baseline = true)] - public void Fastenshtein_1_0_0_8() - { - for (int i = 0; i < words.Length; i++) + for (int j = 0; j < words.Length; j++) { - var levenshtein = new global::Fastenshtein.Benchmarking.FastenshteinOld.Fastenshtein_1_0_0_8(words[i]); - - for (int j = 0; j < words.Length; j++) - { - levenshtein.DistanceFrom(words[j]); - } + levenshtein.DistanceFrom(words[j]); } } + } - [Benchmark] - public void FastenshteinStatic_1_0_0_8() + [Benchmark] + public void FastenshteinStatic() + { + var words = _words; + for (int i = 0; i < words.Length; i++) { - for (int i = 0; i < words.Length; i++) + for (int j = 0; j < words.Length; j++) { - for (int j = 0; j < words.Length; j++) - { - global::Fastenshtein.Benchmarking.FastenshteinOld.Fastenshtein_1_0_0_8.Distance(words[i], words[j]); - } + global::Fastenshtein.Levenshtein.Distance(words[i], words[j]); } } + } - [Benchmark] - public void StringSimilarity() - { - // I've read the source code it is thread safe - var lev = new global::F23.StringSimilarity.Levenshtein(); + [Benchmark] + public void StringSimilarity() + { + // I've read the source code it is thread safe + var lev = new global::F23.StringSimilarity.Levenshtein(); - for (int i = 0; i < words.Length; i++) + var words = _words; + for (int i = 0; i < words.Length; i++) + { + for (int j = 0; j < words.Length; j++) { - for (int j = 0; j < words.Length; j++) - { - // why does it return a double ?? - lev.Distance(words[i], words[j]); - } + // why does it return a double ?? + lev.Distance(words[i], words[j]); } } + } - [Benchmark] - public void NinjaNye() + [Benchmark] + public void NinjaNye() + { + var words = _words; + for (int i = 0; i < words.Length; i++) { - for (int i = 0; i < words.Length; i++) + for (int j = 0; j < words.Length; j++) { - for (int j = 0; j < words.Length; j++) - { - global::NinjaNye.SearchExtensions.Levenshtein.LevenshteinProcessor.LevenshteinDistance(words[i], words[j]); - } + global::NinjaNye.SearchExtensions.Levenshtein.LevenshteinProcessor.LevenshteinDistance(words[i], words[j]); } } + } - [Benchmark] - public void FuzzyStringsNetStandard() + [Benchmark] + public void FuzzyStringsNetStandard() + { + var words = _words; + for (int i = 0; i < words.Length; i++) { - for (int i = 0; i < words.Length; i++) + for (int j = 0; j < words.Length; j++) { - for (int j = 0; j < words.Length; j++) - { - global::DuoVia.FuzzyStrings.LevenshteinDistanceExtensions.LevenshteinDistance(words[i], words[j]); - } + global::DuoVia.FuzzyStrings.LevenshteinDistanceExtensions.LevenshteinDistance(words[i], words[j]); } } } diff --git a/benchmarks/Fastenshtein.Benchmarking/FastenshteinBenchmark.cs b/benchmarks/Fastenshtein.Benchmarking/FastenshteinBenchmark.cs index ccb8647..81cc946 100644 --- a/benchmarks/Fastenshtein.Benchmarking/FastenshteinBenchmark.cs +++ b/benchmarks/Fastenshtein.Benchmarking/FastenshteinBenchmark.cs @@ -1,71 +1,69 @@ using BenchmarkDotNet.Attributes; -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -namespace Fastenshtein.Benchmarking +namespace Fastenshtein.Benchmarking; + +[RankColumn] +public abstract class FastenshteinBenchmark { - [RankColumn] - public abstract class FastenshteinBenchmark - { - protected string[] words; + private string[] _words; - protected abstract string[] CreateTestData(); + protected abstract string[] CreateTestData(); - [GlobalSetup] - public void SetUp() => this.words = this.CreateTestData(); + [GlobalSetup] + public void SetUp() => _words = CreateTestData(); - [Benchmark] - public void Fastenshtein() + [Benchmark] + public void Fastenshtein() + { + var words = _words; + for (int i = 0; i < words.Length; i++) { - for (int i = 0; i < words.Length; i++) - { - var levenshtein = new global::Fastenshtein.Levenshtein(words[i]); + var levenshtein = new global::Fastenshtein.Levenshtein(words[i]); - for (int j = 0; j < words.Length; j++) - { - levenshtein.DistanceFrom(words[j]); - } + for (int j = 0; j < words.Length; j++) + { + levenshtein.DistanceFrom(words[j]); } } + } - [Benchmark] - public void FastenshteinStatic() + [Benchmark] + public void FastenshteinStatic() + { + var words = _words; + for (int i = 0; i < words.Length; i++) { - for (int i = 0; i < words.Length; i++) + for (int j = 0; j < words.Length; j++) { - for (int j = 0; j < words.Length; j++) - { - global::Fastenshtein.Levenshtein.Distance(words[i], words[j]); - } + global::Fastenshtein.Levenshtein.Distance(words[i], words[j]); } } + } - [Benchmark(Baseline = true)] - public void Fastenshtein_1_0_0_8() + [Benchmark(Baseline = true)] + public void Fastenshtein_1_0_0_8() + { + var words = _words; + for (int i = 0; i < words.Length; i++) { - for (int i = 0; i < words.Length; i++) - { - var levenshtein = new global::Fastenshtein.Benchmarking.FastenshteinOld.Fastenshtein_1_0_0_8(words[i]); + var levenshtein = new global::Fastenshtein.Benchmarking.FastenshteinOld.Fastenshtein_1_0_0_8(words[i]); - for (int j = 0; j < words.Length; j++) - { - levenshtein.DistanceFrom(words[j]); - } + for (int j = 0; j < words.Length; j++) + { + levenshtein.DistanceFrom(words[j]); } } + } - [Benchmark] - public void FastenshteinStatic_1_0_0_8() + [Benchmark] + public void FastenshteinStatic_1_0_0_8() + { + var words = _words; + for (int i = 0; i < words.Length; i++) { - for (int i = 0; i < words.Length; i++) + for (int j = 0; j < words.Length; j++) { - for (int j = 0; j < words.Length; j++) - { - global::Fastenshtein.Benchmarking.FastenshteinOld.Fastenshtein_1_0_0_8.Distance(words[i], words[j]); - } + global::Fastenshtein.Benchmarking.FastenshteinOld.Fastenshtein_1_0_0_8.Distance(words[i], words[j]); } } } diff --git a/benchmarks/Fastenshtein.Benchmarking/FastenshteinDisassembly.cs b/benchmarks/Fastenshtein.Benchmarking/FastenshteinDisassembly.cs index 8ee0bce..ae58286 100644 --- a/benchmarks/Fastenshtein.Benchmarking/FastenshteinDisassembly.cs +++ b/benchmarks/Fastenshtein.Benchmarking/FastenshteinDisassembly.cs @@ -2,25 +2,24 @@ using BenchmarkDotNet.Diagnosers; using System.Diagnostics.CodeAnalysis; -namespace Fastenshtein.Benchmarking +namespace Fastenshtein.Benchmarking; + +[DisassemblyDiagnoser()] +public class FastenshteinDisassembly { - [DisassemblyDiagnoser(/*printSource: true,*/), MemoryDiagnoser] - public class FastenshteinDisassembly + [Benchmark] + [SuppressMessage("Performance", "CA1822:Mark members as static", Justification = "Must be an instance method for BenchmarkDotNet")] + public int Fastenshtein() { - [Benchmark] - [SuppressMessage("Performance", "CA1822:Mark members as static", Justification = "Must be an instance method for BenchmarkDotNet")] - public int Fastenshtein() - { - var levenshtein = new global::Fastenshtein.Levenshtein("test"); - return levenshtein.DistanceFrom("test"); - } + var levenshtein = new global::Fastenshtein.Levenshtein("test"); + return levenshtein.DistanceFrom("test"); + } - [Benchmark(Baseline = true)] - [SuppressMessage("Performance", "CA1822:Mark members as static", Justification = "Must be an instance method for BenchmarkDotNet")] - public int Fastenshtein_1_0_0_8() - { - var levenshtein = new global::Fastenshtein.Benchmarking.FastenshteinOld.Fastenshtein_1_0_0_8("test"); - return levenshtein.DistanceFrom("test"); - } + [Benchmark(Baseline = true)] + [SuppressMessage("Performance", "CA1822:Mark members as static", Justification = "Must be an instance method for BenchmarkDotNet")] + public int Fastenshtein_1_0_0_8() + { + var levenshtein = new global::Fastenshtein.Benchmarking.FastenshteinOld.Fastenshtein_1_0_0_8("test"); + return levenshtein.DistanceFrom("test"); } } diff --git a/benchmarks/Fastenshtein.Benchmarking/FastenshteinOld/Fastenshtein_1_0_0_8.cs b/benchmarks/Fastenshtein.Benchmarking/FastenshteinOld/Fastenshtein_1_0_0_8.cs index 7a34e6b..f7559c6 100644 --- a/benchmarks/Fastenshtein.Benchmarking/FastenshteinOld/Fastenshtein_1_0_0_8.cs +++ b/benchmarks/Fastenshtein.Benchmarking/FastenshteinOld/Fastenshtein_1_0_0_8.cs @@ -1,156 +1,151 @@ -namespace Fastenshtein.Benchmarking.FastenshteinOld +namespace Fastenshtein.Benchmarking.FastenshteinOld; + +internal class Fastenshtein_1_0_0_8 { - using System; - using System.Collections.Generic; - using System.Text; + /* + * WARRING this class is performance critical (Speed). + */ + + private readonly string storedValue; + private readonly int[] costs; + + /// + /// Creates a new instance with a value to test other values against + /// + /// Value to compare other values to. + public Fastenshtein_1_0_0_8(string value) + { + this.storedValue = value; + // Create matrix row + this.costs = new int[this.storedValue.Length]; + } - internal class Fastenshtein_1_0_0_8 + /// + /// gets the length of the stored value that is tested against + /// + public int StoredLength => this.storedValue.Length; + + /// + /// Compares a value to the stored value. + /// Not thread safe. + /// + /// Difference. 0 complete match. + public int DistanceFrom(string value) { - /* - * WARRING this class is performance critical (Speed). - */ - - private readonly string storedValue; - private readonly int[] costs; - - /// - /// Creates a new instance with a value to test other values against - /// - /// Value to compare other values to. - public Fastenshtein_1_0_0_8(string value) + if (costs.Length == 0) { - this.storedValue = value; - // Create matrix row - this.costs = new int[this.storedValue.Length]; + return value.Length; } - /// - /// gets the length of the stored value that is tested against - /// - public int StoredLength => this.storedValue.Length; - - /// - /// Compares a value to the stored value. - /// Not thread safe. - /// - /// Difference. 0 complete match. - public int DistanceFrom(string value) + // Add indexing for insertion to first row + for (int i = 0; i < this.costs.Length;) { - if (costs.Length == 0) - { - return value.Length; - } + this.costs[i] = ++i; + } - // Add indexing for insertion to first row - for (int i = 0; i < this.costs.Length;) - { - this.costs[i] = ++i; - } + for (int i = 0; i < value.Length; i++) + { + // cost of the first index + int cost = i; + int previousCost = i; - for (int i = 0; i < value.Length; i++) + // cache value for inner loop to avoid index lookup and bonds checking, profiled this is quicker + char value1Char = value[i]; + + for (int j = 0; j < this.storedValue.Length; j++) { - // cost of the first index - int cost = i; - int previousCost = i; + int currentCost = cost; - // cache value for inner loop to avoid index lookup and bonds checking, profiled this is quicker - char value1Char = value[i]; + // assigning this here reduces the array reads we do, improvement of the old version + cost = costs[j]; - for (int j = 0; j < this.storedValue.Length; j++) + if (value1Char != this.storedValue[j]) { - int currentCost = cost; - - // assigning this here reduces the array reads we do, improvement of the old version - cost = costs[j]; - - if (value1Char != this.storedValue[j]) + if (previousCost < currentCost) { - if (previousCost < currentCost) - { - currentCost = previousCost; - } - - if (cost < currentCost) - { - currentCost = cost; - } + currentCost = previousCost; + } - ++currentCost; + if (cost < currentCost) + { + currentCost = cost; } - /* - * Improvement on the older versions. - * Swapping the variables here results in a performance improvement for modern intel CPU’s, but I have no idea why? - */ - costs[j] = currentCost; - previousCost = currentCost; + ++currentCost; } + + /* + * Improvement on the older versions. + * Swapping the variables here results in a performance improvement for modern intel CPU’s, but I have no idea why? + */ + costs[j] = currentCost; + previousCost = currentCost; } + } + + return this.costs[this.costs.Length - 1]; + } - return this.costs[this.costs.Length - 1]; + /// + /// Compares the two values to find the minimum Levenshtein distance. + /// Thread safe. + /// + /// Difference. 0 complete match. + public static int Distance(string value1, string value2) + { + if (value2.Length == 0) + { + return value1.Length; } - /// - /// Compares the two values to find the minimum Levenshtein distance. - /// Thread safe. - /// - /// Difference. 0 complete match. - public static int Distance(string value1, string value2) + int[] costs = new int[value2.Length]; + + // Add indexing for insertion to first row + for (int i = 0; i < costs.Length;) { - if (value2.Length == 0) - { - return value1.Length; - } + costs[i] = ++i; + } - int[] costs = new int[value2.Length]; + for (int i = 0; i < value1.Length; i++) + { + // cost of the first index + int cost = i; + int previousCost = i; - // Add indexing for insertion to first row - for (int i = 0; i < costs.Length;) - { - costs[i] = ++i; - } + // cache value for inner loop to avoid index lookup and bonds checking, profiled this is quicker + char value1Char = value1[i]; - for (int i = 0; i < value1.Length; i++) + for (int j = 0; j < value2.Length; j++) { - // cost of the first index - int cost = i; - int previousCost = i; + int currentCost = cost; - // cache value for inner loop to avoid index lookup and bonds checking, profiled this is quicker - char value1Char = value1[i]; + // assigning this here reduces the array reads we do, improvement of the old version + cost = costs[j]; - for (int j = 0; j < value2.Length; j++) + if (value1Char != value2[j]) { - int currentCost = cost; - - // assigning this here reduces the array reads we do, improvement of the old version - cost = costs[j]; - - if (value1Char != value2[j]) + if (previousCost < currentCost) { - if (previousCost < currentCost) - { - currentCost = previousCost; - } - - if (cost < currentCost) - { - currentCost = cost; - } + currentCost = previousCost; + } - ++currentCost; + if (cost < currentCost) + { + currentCost = cost; } - /* - * Improvement on the older versions. - * Swapping the variables here results in a performance improvement for modern intel CPU’s, but I have no idea why? - */ - costs[j] = currentCost; - previousCost = currentCost; + ++currentCost; } - } - return costs[costs.Length - 1]; + /* + * Improvement on the older versions. + * Swapping the variables here results in a performance improvement for modern intel CPU’s, but I have no idea why? + */ + costs[j] = currentCost; + previousCost = currentCost; + } } + + return costs[costs.Length - 1]; } } \ No newline at end of file diff --git a/benchmarks/Fastenshtein.Benchmarking/Program.cs b/benchmarks/Fastenshtein.Benchmarking/Program.cs index 9845e20..f5483f9 100644 --- a/benchmarks/Fastenshtein.Benchmarking/Program.cs +++ b/benchmarks/Fastenshtein.Benchmarking/Program.cs @@ -1,43 +1,41 @@ -namespace Fastenshtein.Benchmarking +namespace Fastenshtein.Benchmarking; + +using BenchmarkDotNet.Running; +using System; + +static class Program { - using BenchmarkDotNet.Configs; - using BenchmarkDotNet.Running; - using System; + /* + * If you want to add your method to the benchmarking. + * There are just two files that need altering + * BenchmarkMultiThread & BenchmarkSingleThread. + */ - static class Program + static void Main(string[] args) { - /* - * If you want to add your method to the benchmarking. - * There are just two files that need altering - * BenchmarkMultiThread & BenchmarkSingleThread. - */ + DateTime startTime = DateTime.UtcNow; - static void Main(string[] args) + if (args.Length != 0 && string.Equals(args[0], "d", StringComparison.OrdinalIgnoreCase)) { - DateTime startTime = DateTime.UtcNow; - - if (args.Length != 0 && string.Equals(args[0], "d", StringComparison.OrdinalIgnoreCase)) - { - _ = BenchmarkRunner.Run(); - } - else if (args.Length != 0 && string.Equals(args[0], "c", StringComparison.OrdinalIgnoreCase)) - { - _ = BenchmarkRunner.Run(); - _ = BenchmarkRunner.Run(); - _ = BenchmarkRunner.Run(); - - _ = BenchmarkRunner.Run(); - _ = BenchmarkRunner.Run(); - _ = BenchmarkRunner.Run(); - } - else - { - _ = BenchmarkRunner.Run(); - _ = BenchmarkRunner.Run(); - _ = BenchmarkRunner.Run(); - } + _ = BenchmarkRunner.Run(); + } + else if (args.Length != 0 && string.Equals(args[0], "c", StringComparison.OrdinalIgnoreCase)) + { + _ = BenchmarkRunner.Run(); + _ = BenchmarkRunner.Run(); + _ = BenchmarkRunner.Run(); - Console.WriteLine("Completed in : " + (DateTime.UtcNow - startTime)); + _ = BenchmarkRunner.Run(); + _ = BenchmarkRunner.Run(); + _ = BenchmarkRunner.Run(); + } + else + { + _ = BenchmarkRunner.Run(); + _ = BenchmarkRunner.Run(); + _ = BenchmarkRunner.Run(); } + + Console.WriteLine("Completed in : " + (DateTime.UtcNow - startTime)); } } \ No newline at end of file diff --git a/benchmarks/Fastenshtein.Benchmarking/RandomWords.cs b/benchmarks/Fastenshtein.Benchmarking/RandomWords.cs index 906cd09..7ecfa16 100644 --- a/benchmarks/Fastenshtein.Benchmarking/RandomWords.cs +++ b/benchmarks/Fastenshtein.Benchmarking/RandomWords.cs @@ -1,33 +1,32 @@ -namespace Fastenshtein.Benchmarking +namespace Fastenshtein.Benchmarking; + +using System; + +public static class RandomWords { - using System; + private static readonly char[] Letters = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z' }; - public static class RandomWords + public static string[] Create(int size, int maxWordSize) { - private static readonly char[] Letters = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z' }; + var words = new string[size]; - public static string[] Create(int size, int maxWordSize) - { - var words = new string[size]; + // using a const seed to make sure runs of the performance tests are consistent. + var random = new Random(69); - // using a const seed to make sure runs of the performance tests are consistent. - var random = new Random(69); + for (var i = 0; i < words.Length; i++) + { + var wordSize = random.Next(3, maxWordSize); - for (var i = 0; i < words.Length; i++) + words[i] = string.Create(wordSize, random, static (word, r) => { - var wordSize = random.Next(3, maxWordSize); - - words[i] = string.Create(wordSize, random, static (word, r) => + for (var j = 0; j < word.Length; j++) { - for (var j = 0; j < word.Length; j++) - { - var index = r.Next(0, Letters.Length); - word[j] = Letters[index]; - } - }); - } - - return words; + var index = r.Next(0, Letters.Length); + word[j] = Letters[index]; + } + }); } + + return words; } } diff --git a/src/Fastenshtein/Fastenshtein.csproj b/src/Fastenshtein/Fastenshtein.csproj index 4508f4f..f66134e 100644 --- a/src/Fastenshtein/Fastenshtein.csproj +++ b/src/Fastenshtein/Fastenshtein.csproj @@ -10,16 +10,17 @@ DanHartley One of the fastest Levenshtein distance packages on NuGet. Supports .NET Framework and .NET. Levenshtein calculates the shortest possible distance between two strings. Producing a count of the number of insertions, deletions and substitutions to make one string into another. Copyright © 2017 - LICENSE + MIT https://github.com/DanHarltey/Fastenshtein GIT https://github.com/DanHarltey/Fastenshtein.git - 1.0.0.9 - 1.0.0.9 - 1.0.0.9 - Adding .Net 8 support, AOT support, nullable reference types, deterministic build, SourceLink and symbols package + 1.0.10 + 1.0.10 + 1.0.10 + Performance improvement in the DistanceFrom method. Levenshtein Distance;String;Fuzzy Matching;Search;netstandard; nuget-readme.md + True diff --git a/src/NuGet/Fastenshtein.dll.nuspec b/src/NuGet/Fastenshtein.dll.nuspec deleted file mode 100644 index a806351..0000000 --- a/src/NuGet/Fastenshtein.dll.nuspec +++ /dev/null @@ -1,37 +0,0 @@ - - - - Fastenshtein - Fastenshtein - 1.0.0.5 - DanHartley - DanHartley - https://raw.githubusercontent.com/DanHarltey/Fastenshtein/master/LICENSE - https://github.com/DanHarltey/Fastenshtein - - false - The fastest Levenshtein on NuGet. Supports .NET Framework and .NET Core (.NET Standard 1.0). - - Levenshtein calculates the shortest possible distance between two strings. Producing a count of the number of insertions, deletions and substitutions to make one string into another. - Method renamed to help with SQL server hosting - Freely available under the MIT license - Levenshtein Distance String Fuzzy Matching Search netstandard - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/src/NuGet/Package.bat b/src/NuGet/Package.bat deleted file mode 100644 index fd1e843..0000000 --- a/src/NuGet/Package.bat +++ /dev/null @@ -1,13 +0,0 @@ -rmdir /s /q ".\Package\" -rmdir /s /q "..\Fastenshtein\bin\" -rmdir /s /q "..\FastenshteinFramework\bin\" -rmdir /s /q "..\FastenshteinPcl\bin\" - -"C:\Program Files (x86)\Microsoft Visual Studio\2017\Enterprise\MSBuild\15.0\Bin\msbuild.exe" ..\Fastenshtein\Fastenshtein.csproj /p:Configuration=Release - -"C:\Program Files (x86)\Microsoft Visual Studio\2017\Enterprise\MSBuild\15.0\Bin\msbuild.exe" ..\FastenshteinFramework\FastenshteinFramework.csproj /p:Configuration=Release;TargetFrameworkVersion=v4.0;TargetFrameworkProfile=Client - -"C:\Program Files (x86)\Microsoft Visual Studio\2017\Enterprise\MSBuild\15.0\Bin\msbuild.exe" ..\FastenshteinPcl\FastenshteinPcl.csproj /p:Configuration=Release - -C:\NuGet.exe pack Fastenshtein.dll.nuspec -@pause \ No newline at end of file