-
-
Notifications
You must be signed in to change notification settings - Fork 1.5k
/
Copy pathpcre.nim
473 lines (412 loc) · 15.7 KB
/
pcre.nim
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
#
#
# Nim's Runtime Library
# (c) Copyright 2015 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
# The current PCRE version information.
const
PCRE_MAJOR* = 8
PCRE_MINOR* = 36
PCRE_PRERELEASE* = true
PCRE_DATE* = "2014-09-26"
# When an application links to a PCRE DLL in Windows, the symbols that are
# imported have to be identified as such. When building PCRE, the appropriate
# export setting is defined in pcre_internal.h, which includes this file. So we
# don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL.
# By default, we use the standard "extern" declarations.
# Allow for C++ users
# Public options. Some are compile-time only, some are run-time only, and some
# are both. Most of the compile-time options are saved with the compiled regex
# so that they can be inspected during studying (and therefore JIT compiling).
# Note that pcre_study() has its own set of options. Originally, all the options
# defined here used distinct bits. However, almost all the bits in a 32-bit word
# are now used, so in order to conserve them, option bits that were previously
# only recognized at matching time (i.e. by pcre_exec() or pcre_dfa_exec()) may
# also be used for compile-time options that affect only compiling and are not
# relevant for studying or JIT compiling.
#
# Some options for pcre_compile() change its behaviour but do not affect the
# behaviour of the execution functions. Other options are passed through to the
# execution functions and affect their behaviour, with or without affecting the
# behaviour of pcre_compile().
#
# Options that can be passed to pcre_compile() are tagged Cx below, with these
# variants:
#
# C1 Affects compile only
# C2 Does not affect compile; affects exec, dfa_exec
# C3 Affects compile, exec, dfa_exec
# C4 Affects compile, exec, dfa_exec, study
# C5 Affects compile, exec, study
#
# Options that can be set for pcre_exec() and/or pcre_dfa_exec() are flagged
# with E and D, respectively. They take precedence over C3, C4, and C5 settings
# passed from pcre_compile(). Those that are compatible with JIT execution are
# flagged with J.
const
CASELESS* = 0x00000001 # C1
MULTILINE* = 0x00000002 # C1
DOTALL* = 0x00000004 # C1
EXTENDED* = 0x00000008 # C1
ANCHORED* = 0x00000010 # C4 E D
DOLLAR_ENDONLY* = 0x00000020 # C2
EXTRA* = 0x00000040 # C1
NOTBOL* = 0x00000080 # E D J
NOTEOL* = 0x00000100 # E D J
UNGREEDY* = 0x00000200 # C1
NOTEMPTY* = 0x00000400 # E D J
UTF8* = 0x00000800 # C4 )
UTF16* = 0x00000800 # C4 ) Synonyms
UTF32* = 0x00000800 # C4 )
NO_AUTO_CAPTURE* = 0x00001000 # C1
NO_UTF8_CHECK* = 0x00002000 # C1 E D J )
NO_UTF16_CHECK* = 0x00002000 # C1 E D J ) Synonyms
NO_UTF32_CHECK* = 0x00002000 # C1 E D J )
AUTO_CALLOUT* = 0x00004000 # C1
PARTIAL_SOFT* = 0x00008000 # E D J ) Synonyms
PARTIAL* = 0x00008000 # E D J )
# This pair use the same bit.
const
NEVER_UTF* = 0x00010000 # C1 ) Overlaid
DFA_SHORTEST* = 0x00010000 # D ) Overlaid
# This pair use the same bit.
const
NO_AUTO_POSSESS* = 0x00020000 # C1 ) Overlaid
DFA_RESTART* = 0x00020000 # D ) Overlaid
const
FIRSTLINE* = 0x00040000 # C3
DUPNAMES* = 0x00080000 # C1
NEWLINE_CR* = 0x00100000 # C3 E D
NEWLINE_LF* = 0x00200000 # C3 E D
NEWLINE_CRLF* = 0x00300000 # C3 E D
NEWLINE_ANY* = 0x00400000 # C3 E D
NEWLINE_ANYCRLF* = 0x00500000 # C3 E D
BSR_ANYCRLF* = 0x00800000 # C3 E D
BSR_UNICODE* = 0x01000000 # C3 E D
JAVASCRIPT_COMPAT* = 0x02000000 # C5
NO_START_OPTIMIZE* = 0x04000000 # C2 E D ) Synonyms
NO_START_OPTIMISE* = 0x04000000 # C2 E D )
PARTIAL_HARD* = 0x08000000 # E D J
NOTEMPTY_ATSTART* = 0x10000000 # E D J
UCP* = 0x20000000 # C3
# Exec-time and get/set-time error codes
const
ERROR_NOMATCH* = -1
ERROR_NULL* = -2
ERROR_BADOPTION* = -3
ERROR_BADMAGIC* = -4
ERROR_UNKNOWN_OPCODE* = -5
ERROR_UNKNOWN_NODE* = -5 ## For backward compatibility
ERROR_NOMEMORY* = -6
ERROR_NOSUBSTRING* = -7
ERROR_MATCHLIMIT* = -8
ERROR_CALLOUT* = -9 ## Never used by PCRE itself
ERROR_BADUTF8* = -10 ## Same for 8/16/32
ERROR_BADUTF16* = -10 ## Same for 8/16/32
ERROR_BADUTF32* = -10 ## Same for 8/16/32
ERROR_BADUTF8_OFFSET* = -11 ## Same for 8/16
ERROR_BADUTF16_OFFSET* = -11 ## Same for 8/16
ERROR_PARTIAL* = -12
ERROR_BADPARTIAL* = -13
ERROR_INTERNAL* = -14
ERROR_BADCOUNT* = -15
ERROR_DFA_UITEM* = -16
ERROR_DFA_UCOND* = -17
ERROR_DFA_UMLIMIT* = -18
ERROR_DFA_WSSIZE* = -19
ERROR_DFA_RECURSE* = -20
ERROR_RECURSIONLIMIT* = -21
ERROR_NULLWSLIMIT* = -22 ## No longer actually used
ERROR_BADNEWLINE* = -23
ERROR_BADOFFSET* = -24
ERROR_SHORTUTF8* = -25
ERROR_SHORTUTF16* = -25 ## Same for 8/16
ERROR_RECURSELOOP* = -26
ERROR_JIT_STACKLIMIT* = -27
ERROR_BADMODE* = -28
ERROR_BADENDIANNESS* = -29
ERROR_DFA_BADRESTART* = -30
ERROR_JIT_BADOPTION* = -31
ERROR_BADLENGTH* = -32
ERROR_UNSET* = -33
# Specific error codes for UTF-8 validity checks
const
UTF8_ERR0* = 0
UTF8_ERR1* = 1
UTF8_ERR2* = 2
UTF8_ERR3* = 3
UTF8_ERR4* = 4
UTF8_ERR5* = 5
UTF8_ERR6* = 6
UTF8_ERR7* = 7
UTF8_ERR8* = 8
UTF8_ERR9* = 9
UTF8_ERR10* = 10
UTF8_ERR11* = 11
UTF8_ERR12* = 12
UTF8_ERR13* = 13
UTF8_ERR14* = 14
UTF8_ERR15* = 15
UTF8_ERR16* = 16
UTF8_ERR17* = 17
UTF8_ERR18* = 18
UTF8_ERR19* = 19
UTF8_ERR20* = 20
UTF8_ERR21* = 21
UTF8_ERR22* = 22 # Unused (was non-character)
# Specific error codes for UTF-16 validity checks
const
UTF16_ERR0* = 0
UTF16_ERR1* = 1
UTF16_ERR2* = 2
UTF16_ERR3* = 3
UTF16_ERR4* = 4 # Unused (was non-character)
# Specific error codes for UTF-32 validity checks
const
UTF32_ERR0* = 0
UTF32_ERR1* = 1
UTF32_ERR2* = 2 # Unused (was non-character)
UTF32_ERR3* = 3
# Request types for pcre_fullinfo()
const
INFO_OPTIONS* = 0
INFO_SIZE* = 1
INFO_CAPTURECOUNT* = 2
INFO_BACKREFMAX* = 3
INFO_FIRSTBYTE* = 4
INFO_FIRSTCHAR* = 4 ## For backwards compatibility
INFO_FIRSTTABLE* = 5
INFO_LASTLITERAL* = 6
INFO_NAMEENTRYSIZE* = 7
INFO_NAMECOUNT* = 8
INFO_NAMETABLE* = 9
INFO_STUDYSIZE* = 10
INFO_DEFAULT_TABLES* = 11
INFO_OKPARTIAL* = 12
INFO_JCHANGED* = 13
INFO_HASCRORLF* = 14
INFO_MINLENGTH* = 15
INFO_JIT* = 16
INFO_JITSIZE* = 17
INFO_MAXLOOKBEHIND* = 18
INFO_FIRSTCHARACTER* = 19
INFO_FIRSTCHARACTERFLAGS* = 20
INFO_REQUIREDCHAR* = 21
INFO_REQUIREDCHARFLAGS* = 22
INFO_MATCHLIMIT* = 23
INFO_RECURSIONLIMIT* = 24
INFO_MATCH_EMPTY* = 25
# Request types for pcre_config(). Do not re-arrange, in order to remain
# compatible.
const
CONFIG_UTF8* = 0
CONFIG_NEWLINE* = 1
CONFIG_LINK_SIZE* = 2
CONFIG_POSIX_MALLOC_THRESHOLD* = 3
CONFIG_MATCH_LIMIT* = 4
CONFIG_STACKRECURSE* = 5
CONFIG_UNICODE_PROPERTIES* = 6
CONFIG_MATCH_LIMIT_RECURSION* = 7
CONFIG_BSR* = 8
CONFIG_JIT* = 9
CONFIG_UTF16* = 10
CONFIG_JITTARGET* = 11
CONFIG_UTF32* = 12
CONFIG_PARENS_LIMIT* = 13
# Request types for pcre_study(). Do not re-arrange, in order to remain
# compatible.
const
STUDY_JIT_COMPILE* = 0x0001
STUDY_JIT_PARTIAL_SOFT_COMPILE* = 0x0002
STUDY_JIT_PARTIAL_HARD_COMPILE* = 0x0004
STUDY_EXTRA_NEEDED* = 0x0008
# Bit flags for the pcre[16|32]_extra structure. Do not re-arrange or redefine
# these bits, just add new ones on the end, in order to remain compatible.
const
EXTRA_STUDY_DATA* = 0x0001
EXTRA_MATCH_LIMIT* = 0x0002
EXTRA_CALLOUT_DATA* = 0x0004
EXTRA_TABLES* = 0x0008
EXTRA_MATCH_LIMIT_RECURSION* = 0x0010
EXTRA_MARK* = 0x0020
EXTRA_EXECUTABLE_JIT* = 0x0040
# Types
type
Pcre* = object
Pcre16* = object
Pcre32* = object
JitStack* = object
JitStack16* = object
JitStack32* = object
when defined(nimHasStyleChecks):
{.push styleChecks: off.}
# The structure for passing additional data to pcre_exec(). This is defined in
# such as way as to be extensible. Always add new fields at the end, in order
# to remain compatible.
type
ExtraData* = object
flags*: clong ## Bits for which fields are set
study_data*: pointer ## Opaque data from pcre_study()
match_limit*: clong ## Maximum number of calls to match()
callout_data*: pointer ## Data passed back in callouts
tables*: pointer ## Pointer to character tables
match_limit_recursion*: clong ## Max recursive calls to match()
mark*: pointer ## For passing back a mark pointer
executable_jit*: pointer ## Contains a pointer to a compiled jit code
# The structure for passing out data via the pcre_callout_function. We use a
# structure so that new fields can be added on the end in future versions,
# without changing the API of the function, thereby allowing old clients to
# work without modification.
type
CalloutBlock* = object
version* : cint ## Identifies version of block
# ------------------------ Version 0 -------------------------------
callout_number* : cint ## Number compiled into pattern
offset_vector* : ptr cint ## The offset vector
subject* : cstring ## The subject being matched
subject_length* : cint ## The length of the subject
start_match* : cint ## Offset to start of this match attempt
current_position*: cint ## Where we currently are in the subject
capture_top* : cint ## Max current capture
capture_last* : cint ## Most recently closed capture
callout_data* : pointer ## Data passed in with the call
# ------------------- Added for Version 1 --------------------------
pattern_position*: cint ## Offset to next item in the pattern
next_item_length*: cint ## Length of next item in the pattern
# ------------------- Added for Version 2 --------------------------
mark* : pointer ## Pointer to current mark or NULL
# ------------------------------------------------------------------
when defined(nimHasStyleChecks):
{.pop.}
# User defined callback which provides a stack just before the match starts.
type
JitCallback* = proc (a: pointer): ptr JitStack {.cdecl.}
when not defined(usePcreHeader):
when hostOS == "windows":
when defined(nimOldDlls):
const pcreDll = "pcre.dll"
elif defined(cpu64):
const pcreDll = "pcre64.dll"
else:
const pcreDll = "pcre32.dll"
elif hostOS == "macosx":
const pcreDll = "libpcre(.3|.1|).dylib"
else:
const pcreDll = "libpcre.so(.3|.1|)"
{.push dynlib: pcreDll.}
else:
{.push header: "<pcre.h>".}
{.push cdecl, importc: "pcre_$1".}
# Exported PCRE functions
proc compile*(pattern: cstring,
options: cint,
errptr: ptr cstring,
erroffset: ptr cint,
tableptr: pointer): ptr Pcre
proc compile2*(pattern: cstring,
options: cint,
errorcodeptr: ptr cint,
errptr: ptr cstring,
erroffset: ptr cint,
tableptr: pointer): ptr Pcre
proc config*(what: cint,
where: pointer): cint
proc copy_named_substring*(code: ptr Pcre,
subject: cstring,
ovector: ptr cint,
stringcount: cint,
stringname: cstring,
buffer: cstring,
buffersize: cint): cint
proc copy_substring*(subject: cstring,
ovector: ptr cint,
stringcount: cint,
stringnumber: cint,
buffer: cstring,
buffersize: cint): cint
proc dfa_exec*(code: ptr Pcre,
extra: ptr ExtraData,
subject: cstring,
length: cint,
startoffset: cint,
options: cint,
ovector: ptr cint,
ovecsize: cint,
workspace: ptr cint,
wscount: cint): cint
proc exec*(code: ptr Pcre,
extra: ptr ExtraData,
subject: cstring,
length: cint,
startoffset: cint,
options: cint,
ovector: ptr cint,
ovecsize: cint): cint
proc jit_exec*(code: ptr Pcre,
extra: ptr ExtraData,
subject: cstring,
length: cint,
startoffset: cint,
options: cint,
ovector: ptr cint,
ovecsize: cint,
jstack: ptr JitStack): cint
proc free_substring*(stringptr: cstring)
proc free_substring_list*(stringptr: cstringArray)
proc fullinfo*(code: ptr Pcre,
extra: ptr ExtraData,
what: cint,
where: pointer): cint
proc get_named_substring*(code: ptr Pcre,
subject: cstring,
ovector: ptr cint,
stringcount: cint,
stringname: cstring,
stringptr: cstringArray): cint
proc get_stringnumber*(code: ptr Pcre,
name: cstring): cint
proc get_stringtable_entries*(code: ptr Pcre,
name: cstring,
first: cstringArray,
last: cstringArray): cint
proc get_substring*(subject: cstring,
ovector: ptr cint,
stringcount: cint,
stringnumber: cint,
stringptr: cstringArray): cint
proc get_substring_list*(subject: cstring,
ovector: ptr cint,
stringcount: cint,
listptr: ptr cstringArray): cint
proc maketables*(): pointer
proc refcount*(code: ptr Pcre,
adjust: cint): cint
proc study*(code: ptr Pcre,
options: cint,
errptr: ptr cstring): ptr ExtraData
proc free_study*(extra: ptr ExtraData)
proc version*(): cstring
# Utility functions for byte order swaps.
proc pattern_to_host_byte_order*(code: ptr Pcre,
extra: ptr ExtraData,
tables: pointer): cint
# JIT compiler related functions.
proc jit_stack_alloc*(startsize: cint,
maxsize: cint): ptr JitStack
proc jit_stack_free*(stack: ptr JitStack)
proc assign_jit_stack*(extra: ptr ExtraData,
callback: JitCallback,
data: pointer)
proc jit_free_unused_memory*()
# There was an odd function with `var cstring` instead of `ptr`
proc study*(code: ptr Pcre,
options: cint,
errptr: var cstring): ptr ExtraData {.deprecated.}
{.pop.}
{.pop.}
type
PPcre* {.deprecated.} = ptr Pcre
PJitStack* {.deprecated.} = ptr JitStack