From 9e9050b7493f3f8291203f160d5c2c9cc746b7c3 Mon Sep 17 00:00:00 2001 From: Delyan Angelov Date: Fri, 24 May 2024 17:41:41 +0300 Subject: [PATCH 1/4] builtin: reduce allocations for s.index_kmp/1 and s.replace_once/2 --- vlib/builtin/string.v | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/vlib/builtin/string.v b/vlib/builtin/string.v index c2df03b41c1352..1e476fb605cd2d 100644 --- a/vlib/builtin/string.v +++ b/vlib/builtin/string.v @@ -4,6 +4,7 @@ module builtin import strconv +import strings /* Note: A V string should be/is immutable from the point of view of @@ -337,12 +338,19 @@ pub fn (a string) clone() string { } // replace_once replaces the first occurrence of `rep` with the string passed in `with`. +@[manualfree] pub fn (s string) replace_once(rep string, with string) string { idx := s.index_(rep) if idx == -1 { return s.clone() } - return s.substr(0, idx) + with + s.substr(idx + rep.len, s.len) + mut sb := strings.new_builder(idx + with.len + (s.len - (idx + rep.len))) + sb.write_string(s.substr_unsafe(0, idx)) + sb.write_string(with) + sb.write_string(s.substr_unsafe(idx + rep.len, s.len)) + res := sb.str() + unsafe { sb.free() } + return res } // replace replaces all occurrences of `rep` with the string passed in `with`. @@ -1245,30 +1253,42 @@ pub fn (s string) last_index(needle string) ?int { return idx } -// index_kmp does KMP search. +const kmp_stack_buffer_size = 20 + +// index_kmp does KMP search inside the string `s` for the needle `p`. +// It returns the first found index where the string `p` is found. +// It returns -1, when the needle `p` is not present in `s`. @[direct_array_access; manualfree] fn (s string) index_kmp(p string) int { if p.len > s.len { return -1 } - mut prefix := []int{len: p.len} + mut stack_prefixes := [kmp_stack_buffer_size]int{} + mut p_prefixes := unsafe { &stack_prefixes[0] } + if p.len > kmp_stack_buffer_size { + p_prefixes = unsafe { vcalloc(p.len * sizeof(int)) } + } defer { - unsafe { prefix.free() } + if p.len > kmp_stack_buffer_size { + unsafe { free(p_prefixes) } + } } mut j := 0 for i := 1; i < p.len; i++ { for unsafe { p.str[j] != p.str[i] } && j > 0 { - j = prefix[j - 1] + j = unsafe { p_prefixes[j - 1] } } if unsafe { p.str[j] == p.str[i] } { j++ } - prefix[i] = j + unsafe { + p_prefixes[i] = j + } } j = 0 for i in 0 .. s.len { for unsafe { p.str[j] != s.str[i] } && j > 0 { - j = prefix[j - 1] + j = unsafe { p_prefixes[j - 1] } } if unsafe { p.str[j] == s.str[i] } { j++ From 4ffa7f8088749f25e4b858f2d8139bd56f173480 Mon Sep 17 00:00:00 2001 From: Delyan Angelov Date: Fri, 24 May 2024 18:24:03 +0300 Subject: [PATCH 2/4] builtin: reduce allocations for s.replace/2 --- vlib/builtin/string.v | 58 +++++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/vlib/builtin/string.v b/vlib/builtin/string.v index 1e476fb605cd2d..470a5635641e0e 100644 --- a/vlib/builtin/string.v +++ b/vlib/builtin/string.v @@ -353,8 +353,9 @@ pub fn (s string) replace_once(rep string, with string) string { return res } +const replace_stack_buffer_size = 10 // replace replaces all occurrences of `rep` with the string passed in `with`. -@[direct_array_access] +@[direct_array_access; manualfree] pub fn (s string) replace(rep string, with string) string { if s.len == 0 || rep.len == 0 || rep.len > s.len { return s.clone() @@ -364,9 +365,17 @@ pub fn (s string) replace(rep string, with string) string { } // TODO: PERF Allocating ints is expensive. Should be a stack array // Get locations of all reps within this string - mut idxs := []int{cap: s.len / rep.len} + mut pidxs_len := 0 + pidxs_cap := s.len / rep.len + mut stack_idxs := [replace_stack_buffer_size]int{} + mut pidxs := unsafe { &stack_idxs[0] } + if pidxs_cap > replace_stack_buffer_size { + pidxs = unsafe { malloc(sizeof(int) * pidxs_cap) } + } defer { - unsafe { idxs.free() } + if pidxs_cap > replace_stack_buffer_size { + unsafe { free(pidxs) } + } } mut idx := 0 for { @@ -374,41 +383,36 @@ pub fn (s string) replace(rep string, with string) string { if idx == -1 { break } - idxs << idx + unsafe { + pidxs[pidxs_len] = idx + pidxs_len++ + } idx += rep.len } // Dont change the string if there's nothing to replace - if idxs.len == 0 { + if pidxs_len == 0 { return s.clone() } // Now we know the number of replacements we need to do and we can calc the len of the new string - new_len := s.len + idxs.len * (with.len - rep.len) + new_len := s.len + pidxs_len * (with.len - rep.len) mut b := unsafe { malloc_noscan(new_len + 1) } // add space for the null byte at the end // Fill the new string mut b_i := 0 mut s_idx := 0 - for _, rep_pos in idxs { - for i in s_idx .. rep_pos { // copy everything up to piece being replaced - unsafe { - b[b_i] = s[i] - } - b_i++ - } + for j in 0 .. pidxs_len { + rep_pos := unsafe { pidxs[j] } + // copy everything up to piece being replaced + before_len := rep_pos - s_idx + unsafe { vmemcpy(&b[b_i], &s[s_idx], before_len) } + b_i += before_len s_idx = rep_pos + rep.len // move string index past replacement - for i in 0 .. with.len { // copy replacement piece - unsafe { - b[b_i] = with[i] - } - b_i++ - } + // copy replacement piece + unsafe { vmemcpy(&b[b_i], &with[0], with.len) } + b_i += with.len } - if s_idx < s.len { // if any original after last replacement, copy it - for i in s_idx .. s.len { - unsafe { - b[b_i] = s[i] - } - b_i++ - } + if s_idx < s.len { + // if any original after last replacement, copy it + unsafe { vmemcpy(&b[b_i], &s[s_idx], s.len - s_idx) } } unsafe { b[new_len] = 0 @@ -453,7 +457,7 @@ pub fn (s string) replace_each(vals []string) string { // The string already found is set to `/del`, to avoid duplicate searches. for i in 0 .. rep.len { unsafe { - s_.str[idx + i] = 127 + s_.str[idx + i] = 0 } } // We need to remember both the position in the string, From 203730438cb10e3f1fbac401f18d6d0bcb661e31 Mon Sep 17 00:00:00 2001 From: Delyan Angelov Date: Fri, 24 May 2024 19:20:17 +0300 Subject: [PATCH 3/4] builtin: restore s.replace_once/2 from master --- vlib/builtin/string.v | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/vlib/builtin/string.v b/vlib/builtin/string.v index 470a5635641e0e..3c83cb6083713c 100644 --- a/vlib/builtin/string.v +++ b/vlib/builtin/string.v @@ -4,7 +4,6 @@ module builtin import strconv -import strings /* Note: A V string should be/is immutable from the point of view of @@ -338,19 +337,12 @@ pub fn (a string) clone() string { } // replace_once replaces the first occurrence of `rep` with the string passed in `with`. -@[manualfree] pub fn (s string) replace_once(rep string, with string) string { idx := s.index_(rep) if idx == -1 { return s.clone() } - mut sb := strings.new_builder(idx + with.len + (s.len - (idx + rep.len))) - sb.write_string(s.substr_unsafe(0, idx)) - sb.write_string(with) - sb.write_string(s.substr_unsafe(idx + rep.len, s.len)) - res := sb.str() - unsafe { sb.free() } - return res + return s.substr(0, idx) + with + s.substr(idx + rep.len, s.len) } const replace_stack_buffer_size = 10 From 92e4dfcb0ae87087c25b37f7d070c1625a8d000b Mon Sep 17 00:00:00 2001 From: Delyan Angelov Date: Fri, 24 May 2024 19:34:20 +0300 Subject: [PATCH 4/4] ci: fix -cstrict builds with clang-18, remove obsolete TODO comment --- vlib/builtin/string.v | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/vlib/builtin/string.v b/vlib/builtin/string.v index 3c83cb6083713c..ff187b7307bdc0 100644 --- a/vlib/builtin/string.v +++ b/vlib/builtin/string.v @@ -355,14 +355,12 @@ pub fn (s string) replace(rep string, with string) string { if !s.contains(rep) { return s.clone() } - // TODO: PERF Allocating ints is expensive. Should be a stack array - // Get locations of all reps within this string mut pidxs_len := 0 pidxs_cap := s.len / rep.len mut stack_idxs := [replace_stack_buffer_size]int{} mut pidxs := unsafe { &stack_idxs[0] } if pidxs_cap > replace_stack_buffer_size { - pidxs = unsafe { malloc(sizeof(int) * pidxs_cap) } + pidxs = unsafe { &int(malloc(sizeof(int) * pidxs_cap)) } } defer { if pidxs_cap > replace_stack_buffer_size { @@ -1262,7 +1260,7 @@ fn (s string) index_kmp(p string) int { mut stack_prefixes := [kmp_stack_buffer_size]int{} mut p_prefixes := unsafe { &stack_prefixes[0] } if p.len > kmp_stack_buffer_size { - p_prefixes = unsafe { vcalloc(p.len * sizeof(int)) } + p_prefixes = unsafe { &int(vcalloc(p.len * sizeof(int))) } } defer { if p.len > kmp_stack_buffer_size {