Skip to content

Commit

Permalink
字符串搜索算法
Browse files Browse the repository at this point in the history
  • Loading branch information
copi143 committed Nov 1, 2024
1 parent 73166e7 commit 1b08f9c
Show file tree
Hide file tree
Showing 9 changed files with 104 additions and 8 deletions.
2 changes: 2 additions & 0 deletions include/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ mkheader_h_recurse(libc-base)
mkheader_hpp_recurse(libc-base)
mkheader_h_recurse(data-structure)
mkheader_hpp_recurse(data-structure)
mkheader_h_recurse(algorithm)
mkheader_hpp_recurse(algorithm)
mkheader_recurse(misc)
mkheader_recurse(libc)
mkheader_hpp_recurse(libc++)
Expand Down
12 changes: 12 additions & 0 deletions include/algorithm.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// Generated by macro 'mkheader'.
// This file is automatically generated, please do not modify it.
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#include "algorithm/base.h"
#include "algorithm/bm.h"
#include "algorithm/kmp.h"
#ifdef __cplusplus
}
#endif
3 changes: 3 additions & 0 deletions include/algorithm.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
// Generated by macro 'mkheader'.
// This file is automatically generated, please do not modify it.
#pragma once
60 changes: 60 additions & 0 deletions include/algorithm/bm.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#pragma once
#include "base.h"

static ssize_t bm_nsearch(cstr text, i32 textlen, cstr pattern, i32 patternlen) {
i32 bc[256];
for (i32 i = 0; i < 256; i++) {
bc[i] = patternlen;
}
for (i32 i = 0; i < patternlen - 1; i++) {
bc[(byte)pattern[i]] = patternlen - i - 1;
}

for (i32 i = patternlen - 1; i < textlen;) {
for (i32 j = patternlen - 1; text[i] == pattern[j]; i--, j--) {
if (j == 0) return i;
}
i += bc[(byte)text[i]];
}

return -1;
}

static ssize_t bm_search(cstr text, cstr pattern) {
return bm_nsearch(text, strlen(text), pattern, strlen(pattern));
}

static void *bm_nprepare(cstr pattern, i32 patternlen) {
i32 *data = (i32 *)calloc(1 + PADDING_UP(patternlen, 4) / 4 + 256, 4);
data[0] = patternlen;
memcpy(data + 1, pattern, patternlen);
i32 *bc = data + 1 + PADDING_UP(patternlen, 4) / 4;

for (i32 i = 0; i < 256; i++) {
bc[i] = patternlen;
}
for (i32 i = 0; i < patternlen - 1; i++) {
bc[(byte)pattern[i]] = patternlen - i - 1;
}

return data;
}

static void *bm_prepare(cstr pattern) {
return bm_nprepare(pattern, strlen(pattern));
}

static ssize_t bm_run(cstr text, i32 textlen, const void *data) {
i32 patternlen = *(const i32 *)data;
cstr pattern = (cstr)data + 4;
var bc = (const i32 *)data + 1 + PADDING_UP(patternlen, 4) / 4;

for (i32 i = patternlen - 1; i < textlen;) {
for (i32 j = patternlen - 1; text[i] == pattern[j]; i--, j--) {
if (j == 0) return i;
}
i += bc[(byte)text[i]];
}

return -1;
}
14 changes: 7 additions & 7 deletions include/algorithm/kmp.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ static ssize_t kmp_nsearch(cstr text, i32 textlen, cstr pattern, i32 patternlen)
}
if (j == patternlen) {
free(lps);
return i - j;
return i - j + 1;
}
}

Expand All @@ -35,7 +35,7 @@ static ssize_t kmp_search(cstr text, cstr pattern) {
}

static void *kmp_nprepare(cstr pattern, i32 patternlen) {
i32 *data = (i32 *)malloc(1 + PADDING_UP(patternlen, 4) + patternlen * 4);
i32 *data = (i32 *)calloc(1 + PADDING_UP(patternlen, 4) / 4 + patternlen, 4);
data[0] = patternlen;
memcpy(data + 1, pattern, patternlen);
i32 *lps = data + 1 + PADDING_UP(patternlen, 4) / 4;
Expand All @@ -49,17 +49,17 @@ static void *kmp_nprepare(cstr pattern, i32 patternlen) {
}
}

return lps - 1;
return data;
}

static void *kmp_prepare(cstr pattern) {
return kmp_nprepare(pattern, strlen(pattern));
}

static ssize_t kmp_nrun(cstr text, i32 textlen, void *data) {
i32 patternlen = *(i32 *)data;
static ssize_t kmp_nrun(cstr text, i32 textlen, const void *data) {
i32 patternlen = *(const i32 *)data;
cstr pattern = (cstr)data + 4;
i32 *lps = (i32 *)data + 1 + PADDING_UP(patternlen, 4) / 4;
var lps = (const i32 *)data + 1 + PADDING_UP(patternlen, 4) / 4;

for (int i = 0, j = 0; i < textlen; i++) {
if (pattern[j] == text[i]) {
Expand All @@ -68,7 +68,7 @@ static ssize_t kmp_nrun(cstr text, i32 textlen, void *data) {
j = lps[j - 1];
i--;
}
if (j == patternlen) return i - j;
if (j == patternlen) return i - j + 1;
}

return -1;
Expand Down
1 change: 1 addition & 0 deletions include/define/config/undef.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
#undef __wur
#undef __nonnull
#undef __attr_dealloc
#undef __attr_access
#undef __attribute_pure__

#undef INFINITY
Expand Down
2 changes: 1 addition & 1 deletion include/define/define/attribute.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
# define __attr_readonly(...)
# define __attr_writeonly(...)
#else
# define __attr_access(x) __attr(access x) // 这个 redefine 没有问题,忽略掉
# define __attr_access(x) __attr(access x)
# define __attr_readonly(...) __attr(access(read_only, ##__VA_ARGS__))
# define __attr_writeonly(...) __attr(access(write_only, ##__VA_ARGS__))
#endif
Expand Down
1 change: 1 addition & 0 deletions test/c/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
benchmark(c-rbtree rbtree-bench.cpp)
gtest(c-rbtree rbtree-test.cpp)
benchmark(c-avltree avltree-bench.cpp)
testexec(string-search string-search.c)
17 changes: 17 additions & 0 deletions test/c/string-search.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include <stdio.h>
#include <stdlib.h>

#define NO_STD 0
#include <algorithm.h>

int main() {
assert(kmp_search("hello, world!", "world") == 7);
assert(kmp_search("hello, world!", "hello") == 0);
assert(kmp_search("hello, world!", "lo") == 3);
assert(kmp_search("hello, world!", "wl") == -1);
assert(bm_search("hello, world!", "world") == 7);
assert(bm_search("hello, world!", "hello") == 0);
assert(bm_search("hello, world!", "lo") == 3);
assert(bm_search("hello, world!", "wl") == -1);
return 0;
}

0 comments on commit 1b08f9c

Please sign in to comment.