From cebafa101965bb23ad6f31d4b2d8b645912ce124 Mon Sep 17 00:00:00 2001 From: Ice3man Date: Sun, 1 Dec 2024 18:53:32 +0530 Subject: [PATCH] feat: improve LongestRepeatingSequence performance by reducing allocations (#575) Uses an optimization to keep only two arrays + other misc changes. Benchmarks: $ go test -bench=. -benchmem -memprofile=allocs goos: darwin goarch: amd64 pkg: github.com/projectdiscovery/utils/strings cpu: VirtualApple @ 2.50GHz BenchmarkLongestRepeatingSequence-10 171566 6612 ns/op 15584 B/op 61 allocs/op BenchmarkLongestRepeatingSequenceNew-10 360710 3129 ns/op 704 B/op 2 allocs/op +++ PASS ok github.com/projectdiscovery/utils/strings 1.851s --- strings/stringsutil.go | 51 +++++++++++++++++++++---------------- strings/stringsutil_test.go | 28 ++++++++++++++++---- 2 files changed, 52 insertions(+), 27 deletions(-) diff --git a/strings/stringsutil.go b/strings/stringsutil.go index d7cf3974..58e3c880 100644 --- a/strings/stringsutil.go +++ b/strings/stringsutil.go @@ -231,41 +231,48 @@ type LongestSequence struct { // LongestRepeatingSequence finds the longest repeating non-overlapping sequence in a string func LongestRepeatingSequence(s string) LongestSequence { - res := "" - resLength := 0 n := len(s) - lcsre := make([][]int, n+1) - - for i := range lcsre { - lcsre[i] = make([]int, n+1) + if n == 0 { + return LongestSequence{} } - idx := 0 + // Use single row instead of full matrix + prev := make([]int, n+1) + curr := make([]int, n+1) + + maxLen := 0 + endPos := 0 + for i := 1; i <= n; i++ { for j := i + 1; j <= n; j++ { - if s[i-1] == s[j-1] && lcsre[i-1][j-1] < (j-i) { - lcsre[i][j] = lcsre[i-1][j-1] + 1 - if lcsre[i][j] > resLength { - resLength = lcsre[i][j] - if i > idx { - idx = i - } + if s[i-1] == s[j-1] && prev[j-1] < (j-i) { + curr[j] = prev[j-1] + 1 + if curr[j] > maxLen { + maxLen = curr[j] + endPos = i } } else { - lcsre[i][j] = 0 + curr[j] = 0 } } + prev, curr = curr, prev + for j := range curr { + curr[j] = 0 + } } - if resLength > 0 { - for i := idx - resLength + 1; i <= idx; i++ { - res += string(s[i-1]) + + var sequence string + if maxLen > 0 { + start := endPos - maxLen + if start >= 0 { + sequence = s[start:endPos] } } - resCount := 0 - if res != "" { - resCount = strings.Count(s, res) + + return LongestSequence{ + Sequence: sequence, + Count: strings.Count(s, sequence), } - return LongestSequence{Sequence: res, Count: resCount} } // IsPrintable checks if the strings is made only of printable characters diff --git a/strings/stringsutil_test.go b/strings/stringsutil_test.go index 3fff9453..0e006a6e 100644 --- a/strings/stringsutil_test.go +++ b/strings/stringsutil_test.go @@ -303,11 +303,20 @@ func TestLongestRepeatingSequence(t *testing.T) { s string expected string }{ - {"abcdefg", ""}, - {"abcabcabc", "abc"}, - {"abcdefabcdef", "abcdef"}, - {"abcdefgabcdefg", "abcdefg"}, - {"abcabcdefdef", "abc"}, + {s: "abcabca", expected: "abc"}, + {s: "abcdefg", expected: ""}, + {s: "abcabcabc", expected: "abc"}, + {s: "abcdefabcdef", expected: "abcdef"}, + {s: "abcdefgabcdefg", expected: "abcdefg"}, + {s: "abcabcdefdef", expected: "abc"}, + + // edge cases + {s: "aaa", expected: "a"}, + {s: "aaaa", expected: "aa"}, + {s: "abababab", expected: "abab"}, + {s: "test test test", expected: "test "}, + {s: "AbcAbcAbc", expected: "Abc"}, + {s: "!@#$!@#$", expected: "!@#$"}, } for _, test := range tests { @@ -467,3 +476,12 @@ func TestNormalizeWithOptions(t *testing.T) { require.Equal(t, test.result, res) } } + +func BenchmarkLongestRepeatingSequence(b *testing.B) { + s := "AAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBB" // 40 chars test string + b.ResetTimer() + b.ReportAllocs() + for i := 0; i < b.N; i++ { + LongestRepeatingSequence(s) + } +}