-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscws_header.lua
214 lines (164 loc) · 3.85 KB
/
scws_header.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
local ffi = require("ffi")
ffi.cdef[[
struct pheap
{
int size;
int used;
char block[0];
};
struct pclean
{
void *obj;
struct pclean *nxt;
};
typedef struct
{
int size;
int dirty;
struct pheap *heap;
struct pclean *clean;
} pool_st, *pool_t;
pool_t pool_new();
void pool_free(pool_t p);
void *pmalloc(pool_t p, int size);
void *pmalloc_x(pool_t p, int size, char c);
void *pmalloc_z(pool_t p, int size);
char *pstrdup(pool_t p, const char *s);
char *pstrndup(pool_t p, const char *s, int l);
typedef struct tree_node node_st, *node_t;
struct tree_node
{
char *key;
void *value;
int vlen;
node_t left;
node_t right;
};
typedef struct
{
pool_t p;
int base;
int prime;
int count;
node_t *trees;
} xtree_st, *xtree_t;
int xtree_hasher(xtree_t xt, const char *key, int len);
xtree_t xtree_new(int base, int prime);
void xtree_free(xtree_t xt);
void xtree_put(xtree_t xt, const char *value, const char *key);
void xtree_nput(xtree_t xt, void *value, int vlen, const char *key, int len);
void *xtree_get(xtree_t xt, const char *key, int *vlen);
void *xtree_nget(xtree_t xt, const char *key, int len, int *vlen);
void xtree_optimize(xtree_t xt);
void xtree_to_xdb(xtree_t xt, const char *fpath);
typedef struct scws_rule_item
{
short flag;
char zmin;
char zmax;
char name[17];
char attr[3];
float tf;
float idf;
unsigned int bit;
unsigned int inc;
unsigned int exc;
} *rule_item_t;
typedef struct scws_rule_attr *rule_attr_t;
struct scws_rule_attr
{
char attr1[2];
char attr2[2];
unsigned char npath[2];
short ratio;
rule_attr_t next;
};
typedef struct scws_rule
{
xtree_t tree;
rule_attr_t attr;
struct scws_rule_item items[32];
} rule_st, *rule_t;
rule_t scws_rule_new(const char *fpath, unsigned char *mblen);
void scws_rule_free(rule_t r);
rule_item_t scws_rule_get(rule_t r, const char *str, int len);
int scws_rule_checkbit(rule_t r, const char *str, int len, unsigned int bit);
int scws_rule_attr_ratio(rule_t r, const char *attr1, const char *attr2, const unsigned char *npath);
int scws_rule_check(rule_t r, rule_item_t cr, const char *str, int len);
typedef struct scws_word
{
float tf;
float idf;
unsigned char flag;
char attr[3];
} word_st, *word_t;
typedef struct scws_xdict
{
void *xdict;
int xmode;
struct scws_xdict *next;
} xdict_st, *xdict_t;
xdict_t xdict_open(const char *fpath, int mode);
void xdict_close(xdict_t xd);
xdict_t xdict_add(xdict_t xd, const char *fpath, int mode, unsigned char *ml);
word_t xdict_query(xdict_t xd, const char *key, int len);
typedef struct scws_result *scws_res_t;
struct scws_result
{
int off;
float idf;
unsigned char len;
char attr[3];
scws_res_t next;
};
typedef struct scws_topword *scws_top_t;
struct scws_topword
{
char *word;
float weight;
short times;
char attr[2];
scws_top_t next;
};
struct scws_zchar
{
int start;
int end;
};
typedef struct scws_st scws_st, *scws_t;
struct scws_st
{
scws_t p;
xdict_t d;
rule_t r;
unsigned char *mblen;
unsigned int mode;
unsigned char *txt;
int zis;
int len;
int off;
int wend;
scws_res_t res0;
scws_res_t res1;
word_t **wmap;
struct scws_zchar *zmap;
};
scws_t scws_new();
void scws_free(scws_t s);
scws_t scws_fork(scws_t s);
int scws_add_dict(scws_t s, const char *fpath, int mode);
int scws_set_dict(scws_t s, const char *fpath, int mode);
void scws_set_charset(scws_t s, const char *cs);
void scws_set_rule(scws_t s, const char *fpath);
void scws_set_ignore(scws_t s, int yes);
void scws_set_multi(scws_t s, int mode);
void scws_set_debug(scws_t s, int yes);
void scws_set_duality(scws_t s, int yes);
void scws_send_text(scws_t s, const char *text, int len);
scws_res_t scws_get_result(scws_t s);
void scws_free_result(scws_res_t result);
scws_top_t scws_get_tops(scws_t s, int limit, char *xattr);
void scws_free_tops(scws_top_t tops);
scws_top_t scws_get_words(scws_t s, char *xattr);
int scws_has_word(scws_t s, char *xattr);
]]