Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

default last in strutils.find to -1 #18173

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 63 additions & 87 deletions lib/pure/strutils.nim
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ import parseutils
from math import pow, floor, log10
from algorithm import fill, reverse
import std/enumutils
import std/strbasics

from unicode import toLower, toUpper
export toLower, toUpper
Expand Down Expand Up @@ -1805,58 +1806,17 @@ func join*[T: not string](a: openArray[T], sep: string = ""): string =
add(result, $x)

type
SkipTable* = array[char, int]
SkipTable* {.deprecated: "no longer used in strutils.find".} = array[char, int]

func initSkipTable*(a: var SkipTable, sub: string) {.rtl,
extern: "nsuInitSkipTable".} =
## Preprocess table `a` for `sub`.
let m = len(sub)
fill(a, m)

for i in 0 ..< m - 1:
a[sub[i]] = m - 1 - i

func find*(a: SkipTable, s, sub: string, start: Natural = 0, last = 0): int {.
rtl, extern: "nsuFindStrA".} =
## Searches for `sub` in `s` inside range `start..last` using preprocessed
## table `a`. If `last` is unspecified, it defaults to `s.high` (the last
## element).
##
## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
let
last = if last == 0: s.high else: last
subLast = sub.len - 1

if subLast == -1:
# this was an empty needle string,
# we count this as match in the first possible position:
return start

# This is an implementation of the Boyer-Moore Horspool algorithms
# https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore%E2%80%93Horspool_algorithm
var skip = start

while last - skip >= subLast:
var i = subLast
while s[skip + i] == sub[i]:
if i == 0:
return skip
dec i
inc skip, a[s[skip + subLast]]
return -1

when not (defined(js) or defined(nimdoc) or defined(nimscript)):
func c_memchr(cstr: pointer, c: char, n: csize_t): pointer {.
importc: "memchr", header: "<string.h>".}
func c_strstr(haystack, needle: cstring): cstring {.
importc: "strstr", header: "<string.h>".}

const hasCStringBuiltin = true
else:
const hasCStringBuiltin = false
extern: "nsuInitSkipTable", deprecated: "is a no-op".} =
## Deprecated: Does nothing. Exists solely for backwards compatibility.
discard

func find*(s: string, sub: char, start: Natural = 0, last = 0): int {.rtl,
extern: "nsuFindChar".} =
extern: "nsuFindChar", deprecated: "use strbasics.indexOf", raises: [].} =
## Deprecated: use `strbasics.indexOf func<strbasics.html#indexOf,openArray[char],char>`_.
##
## Searches for `sub` in `s` inside range `start..last` (both ends included).
## If `last` is unspecified, it defaults to `s.high` (the last element).
##
Expand All @@ -1865,26 +1825,25 @@ func find*(s: string, sub: char, start: Natural = 0, last = 0): int {.rtl,
## Use `s[start..last].rfind` for a `start`-origin index.
##
## See also:
## * `strbasics.indexOf func<strbasics.html#indexOf,openArray[char],char>`_
## * `rfind func<#rfind,string,char,Natural>`_
## * `replace func<#replace,string,char,char>`_
if s.len == 0:
return -1
let last = if last == 0: s.high else: last
when nimvm:
for i in int(start)..last:
if sub == s[i]: return i
if last < 0:
return -1
let index: int = strbasics.indexOf(s[start..last], sub)
if index == -1:
return -1
else:
when hasCStringBuiltin:
let L = last-start+1
if L > 0:
let found = c_memchr(s[start].unsafeAddr, sub, cast[csize_t](L))
if not found.isNil:
return cast[ByteAddress](found) -% cast[ByteAddress](s.cstring)
else:
for i in int(start)..last:
if sub == s[i]: return i
return -1
return start + index

func find*(s: string, chars: set[char], start: Natural = 0, last = 0): int {.
rtl, extern: "nsuFindCharSet".} =
rtl, extern: "nsuFindCharSet", deprecated: "use strbasics.indexOf",
raises: [].} =
## Deprecated: use `strbasics.indexOf func<strbasics.html#indexOf,openArray[char],set[char]>`_.
##
## Searches for `chars` in `s` inside range `start..last` (both ends included).
## If `last` is unspecified, it defaults to `s.high` (the last element).
##
Expand All @@ -1893,15 +1852,23 @@ func find*(s: string, chars: set[char], start: Natural = 0, last = 0): int {.
## Use `s[start..last].find` for a `start`-origin index.
##
## See also:
## * `strbasics.indexOf func<strbasics.html#indexOf,openArray[char],set[char]>`_
## * `rfind func<#rfind,string,set[char],Natural>`_
## * `multiReplace func<#multiReplace,string,varargs[]>`_
let last = if last == 0: s.high else: last
for i in int(start)..last:
if s[i] in chars: return i
return -1
if s.len == 0:
return -1
let start: int = min(start, s.high)
let last: int = if last == 0: s.high else: min(last, s.high)
let index: int = strbasics.indexOf(s[start..last], chars)
if index == -1:
return -1
else:
return start + index

func find*(s, sub: string, start: Natural = 0, last = 0): int {.rtl,
extern: "nsuFindStr".} =
extern: "nsuFindStr", deprecated: "use strbasics.indexOf", raises: [].} =
## Deprecated: use `strbasics.indexOf func<strbasics.html#indexOf,openArray[char],openArray[char]>`_.
##
## Searches for `sub` in `s` inside range `start..last` (both ends included).
## If `last` is unspecified, it defaults to `s.high` (the last element).
##
Expand All @@ -1910,33 +1877,42 @@ func find*(s, sub: string, start: Natural = 0, last = 0): int {.rtl,
## Use `s[start..last].find` for a `start`-origin index.
##
## See also:
## * `strbasics.indexOf func<strbasics.html#indexOf,openArray[char],openArray[char]>`_
## * `rfind func<#rfind,string,string,Natural>`_
## * `replace func<#replace,string,string,string>`_
if sub.len > s.len - start: return -1
if sub.len == 0:
if last <= 0:
if start <= s.len:
return start
else:
return -1
elif last < start:
return -1
elif last < 0:
return -1
if s.len == 0: return -1
if sub.len == 1: return find(s, sub[0], start, last)

template useSkipTable {.dirty.} =
var a {.noinit.}: SkipTable
initSkipTable(a, sub)
result = find(a, s, sub, start, last)
let start: int = min(start, s.high)
let last: int = if last == 0: s.high else: min(last, s.high)

when not hasCStringBuiltin:
useSkipTable()
let index: int = strbasics.indexOf(s[start..last], sub)
if index == -1:
return -1
else:
when nimvm:
useSkipTable()
else:
when hasCStringBuiltin:
if last == 0 and s.len > start:
let found = c_strstr(s[start].unsafeAddr, sub)
if not found.isNil:
result = cast[ByteAddress](found) -% cast[ByteAddress](s.cstring)
else:
result = -1
else:
useSkipTable()
else:
useSkipTable()
return start + index

func find*(a: SkipTable, s, sub: string, start: Natural = 0, last = 0): int {.
rtl, extern: "nsuFindStrA", deprecated: "use strbasics.indexOf".} =
## Deprecated: use `strbasics.indexOf func<strbasics.html#indexOf,openArray[char],openArray[char]>`_.
##
## Shorthand for `find(s, sub, start, last)`. Makes no use of the `SkipTable`.
##
## See also:
## * `find func<#find,string,string,Natural,int>`_
## * `strbasics.indexOf func<strbasics.html#indexOf,openArray[char],openArray[char]>`_
return strutils.find(s, sub, start=start, last=last)

func rfind*(s: string, sub: char, start: Natural = 0, last = -1): int {.rtl,
extern: "nsuRFindChar".} =
Expand Down
127 changes: 127 additions & 0 deletions lib/std/strbasics.nim
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
##
## Experimental API, subject to change.

from algorithm import fill

const whitespaces = {' ', '\t', '\v', '\r', '\l', '\f'}

proc add*(x: var string, y: openArray[char]) =
Expand Down Expand Up @@ -113,3 +115,128 @@ func strip*(a: var string, leading = true, trailing = true, chars: set[char] = w
assert c == "X"

setSlice(a, stripSlice(a, leading, trailing, chars))

func indexOfUsingBoyerMooreHorspool(
haystack: openArray[char],
needle: openArray[char]
): int =
## This is an implementation of the Boyer-Moore-Horspool algorithm
## https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore%E2%80%93Horspool_algorithm

# Compute the lookup table
var table {.noinit.}: array[char, int]
fill(table, needle.len)
for i in 0 ..< needle.len - 1:
table[needle[i]] = needle.len - 1 - i

var skip: int = 0
let rightEndpoint: int = haystack.len - needle.len
while skip <= rightEndpoint:
var i: int = needle.high
while haystack[skip + i] == needle[i]:
if i == 0:
return skip
dec i
skip += table[haystack[skip + needle.high]]
return -1

when not (defined(js) or defined(nimdoc) or defined(nimscript)):
func c_memchr(cstr: pointer, c: char, n: csize_t): pointer {.
importc: "memchr", header: "<string.h>".}
func c_strstr(haystack, needle: cstring): cstring {.
importc: "strstr", header: "<string.h>".}

const hasCStringBuiltin: bool = false
else:
const hasCStringBuiltin: bool = false

func indexOf*(haystack: openArray[char], needle: char): int =
## Searches for the leftmost occurrence in `haystack` of `needle` and returns
## its index if it is found. Otherwise, returns -1. Note that this
## differs from
## `strutils.find <strutils.html#find,string,char,Natural,int>`_ in that
## `strutils.find` returns an index based on the start of the string, not the
## start of the slice.
##
## See also:
## * `deprecated strutils.find<strutils.html#find,string,char,Natural,int>`_
runnableExamples:
doAssert "abcabc".indexOf('b') == 1
doAssert "abcdef".toOpenArray(3, 5).indexOf('e') == 1
doAssert "abc".indexOf('z') == -1
if haystack.len == 0:
return -1
when nimvm:
return system.find(haystack, needle)
else:
when hasCStringBuiltin:
let found = c_memchr(haystack[0].unsafeAddr, needle, csize_t(haystack.len))
if not found.isNil:
return cast[ByteAddress](found) -% cast[ByteAddress](haystack[0].unsafeAddr)
else:
return -1
else:
return system.find(haystack, needle)

func indexOf*(haystack: openArray[char], needles: set[char]): int =
## Searches for the leftmost character in `haystack` that is in `needles` and
## returns its index if it is found. Otherwise, returns -1. Note that this
## differs from
## `strutils.find <strutils.html#find,string,set[char],Natural,int>`_ in that
## `strutils.find` returns an index based on the start of the string, not the
## start of the slice.
##
## See also:
## * `deprecated strutils.find<strutils.html#find,string,set[char],Natural,int>`_
runnableExamples:
doAssert "abcabc".indexOf({'b', 'c', 'z'}) == 1
doAssert "abcabc".toOpenArray(3, 5).indexOf({'c', 'z'}) == 2
doAssert "abcabc".indexOf({'x'}) == -1
for index in low(haystack) .. high(haystack):
if haystack[index] in needles:
return index
return -1

func indexOf*(haystack: openArray[char], needle: openArray[char]): int =
## Searches for `needle` in `haystack`. Returns the leftmost index of `needle`
## if it is found. Otherwise, returns -1. Note that this differs from
## `strutils.find <strutils.html#find,string,string,Natural,int>`_ in that
## `strutils.find` returns an index based on the start of the string, not the
## start of the slice.
##
## See also:
## * `deprecated strutils.find<strutils.html#find,string,string,Natural,int>`_
runnableExamples:
doAssert "abcabc".indexOf("") == 0
doAssert "abcabc".indexOf("a") == 0
doAssert "abcabc".indexOf("bc") == 1
doAssert "abcabc".toOpenArray(2, 5).indexOf("bc") == 2
doAssert "abcabc".indexOf("z") == -1
if needle.len == 0:
return 0
elif haystack.len < needle.len:
return -1

when not hasCStringBuiltin:
result = indexOfUsingBoyerMooreHorspool(haystack, needle)
else:
when nimvm:
result = indexOfUsingBoyerMooreHorspool(haystack, needle)
else:
when hasCStringBuiltin:
if haystack.len > 0:
let found = c_strstr(haystack[0].unsafeAddr, needle[0].unsafeAddr)
if not found.isNil:
result = cast[ByteAddress](found) -% cast[ByteAddress](haystack[0].unsafeAddr)

if result > haystack.len - needle.len:
# c_strstr will look all the way until a null byte is found, so
# we must ensure that the return value is inside the
# openArray-defined bounds of the strings
result = -1
else:
result = -1
else:
result = indexOfUsingBoyerMooreHorspool(haystack, needle)
else:
result = indexOfUsingBoyerMooreHorspool(haystack, needle)
Loading