Skip to content

Commit

Permalink
Update regexp.c to use pcre2
Browse files Browse the repository at this point in the history
  • Loading branch information
tobil4sk committed Feb 19, 2022
1 parent 2e98a47 commit bd4b7ac
Showing 1 changed file with 43 additions and 34 deletions.
77 changes: 43 additions & 34 deletions src/std/regexp.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,103 +20,112 @@
* DEALINGS IN THE SOFTWARE.
*/
#include <hl.h>
#define PCRE_STATIC
#include <pcre.h>

#include <pcre2.h>

typedef struct _ereg ereg;

static pcre16_extra limit;
static pcre2_match_context_16 *match_context;

struct _ereg {
void (*finalize)( ereg * );
pcre16 *p;
int *matches;
int nmatches;
/* The compiled regex code */
pcre2_code_16 *regex;
/* Number of capture groups */
int n_groups;

/* Pointer to the allocated memory for match data */
pcre2_match_data_16 *match_data;
/* Whether the last string was matched successfully */
bool matched;
};

static void regexp_finalize( ereg *e ) {
pcre16_free(e->p);
free(e->matches);
pcre2_code_free_16(e->regex);
pcre2_match_data_free_16(e->match_data);
}

HL_PRIM ereg *hl_regexp_new_options( vbyte *str, vbyte *opts ) {
ereg *r;
const char *error;
int err_offset;
int errorcode;
pcre16 *p;
int error_code;
size_t error_offset;
pcre2_code_16 *p;
uchar *o = (uchar*)opts;
int options = PCRE_JAVASCRIPT_COMPAT | PCRE_UCP | PCRE_UTF16;
int options = PCRE2_UCP | PCRE2_UTF;
while( *o ) {
switch( *o++ ) {
case 'i':
options |= PCRE_CASELESS;
options |= PCRE2_CASELESS;
break;
case 's':
options |= PCRE_DOTALL;
options |= PCRE2_DOTALL;
break;
case 'm':
options |= PCRE_MULTILINE;
options |= PCRE2_MULTILINE;
break;
case 'u':
break;
case 'g':
options |= PCRE_UNGREEDY;
options |= PCRE2_UNGREEDY;
break;
default:
return NULL;
}
}
p = pcre16_compile2((PCRE_SPTR16)str,options,&errorcode,&error,&err_offset,NULL);
p = pcre2_compile_16((PCRE2_SPTR16)str,PCRE2_ZERO_TERMINATED,options,&error_code,&error_offset,NULL);
if( p == NULL ) {
hl_buffer *b = hl_alloc_buffer();
vdynamic *d = hl_alloc_dynamic(&hlt_bytes);
PCRE2_UCHAR error_buffer[256];
pcre2_get_error_message_16(error_code,error_buffer,sizeof(error_buffer));
hl_buffer_str(b,USTR("Regexp compilation error : "));
hl_buffer_cstr(b,error);
hl_buffer_str(b,error_buffer);
hl_buffer_str(b,USTR(" in "));
hl_buffer_str(b,(uchar*)str);
d->v.bytes = (vbyte*)hl_buffer_content(b,NULL);
hl_throw(d);
}
r = (ereg*)hl_gc_alloc_finalizer(sizeof(ereg));
r->finalize = regexp_finalize;
r->p = p;
r->nmatches = 0;
r->regex = p;
r->matched = 0;
pcre16_fullinfo(p,NULL,PCRE_INFO_CAPTURECOUNT,&r->nmatches);
r->nmatches++;
r->matches = (int*)malloc(sizeof(int) * 3 * r->nmatches);
limit.flags = PCRE_EXTRA_MATCH_LIMIT_RECURSION;
limit.match_limit_recursion = 3500; // adapted based on Windows 1MB stack size
r->n_groups = 0;
pcre2_pattern_info_16(p,PCRE2_INFO_CAPTURECOUNT,&r->n_groups);
r->n_groups++;
r->match_data = pcre2_match_data_create_from_pattern_16(r->regex, NULL);

// this is reinitialised for each new regex object...
match_context = pcre2_match_context_create_16(NULL);
pcre2_set_depth_limit_16(match_context, 3500); // adapted based on Windows 1MB stack size
return r;
}

HL_PRIM int hl_regexp_matched_pos( ereg *e, int m, int *len ) {
int start;
size_t *matches = pcre2_get_ovector_pointer_16(e->match_data);
if( !e->matched )
hl_error("Calling matchedPos() on an unmatched regexp");
if( m < 0 || m >= e->nmatches )
hl_error("Calling regexp_matched_pos() on an unmatched regexp");
if( m < 0 || m >= e->n_groups )
hl_error("Matched index %d outside bounds",m);
start = e->matches[m*2];
if( len ) *len = e->matches[m*2+1] - start;
start = matches[m*2];
if( len ) *len = matches[m*2+1] - start;
return start;
}

HL_PRIM int hl_regexp_matched_num( ereg *e ) {
if( !e->matched )
return -1;
else
return e->nmatches;
return e->n_groups;
}

HL_PRIM bool hl_regexp_match( ereg *e, vbyte *s, int pos, int len ) {
int res = pcre16_exec(e->p,&limit,(PCRE_SPTR16)s,pos+len,pos,PCRE_NO_UTF16_CHECK,e->matches,e->nmatches * 3);
int res = pcre2_match_16(e->regex,(PCRE2_SPTR16)s,pos+len,pos,PCRE2_NO_UTF_CHECK,e->match_data,match_context);
e->matched = res >= 0;
if( res >= 0 )
return true;
if( res != PCRE_ERROR_NOMATCH )
hl_error("An error occurred while running pcre_exec");
if( res != PCRE2_ERROR_NOMATCH )
hl_error("An error occurred while running pcre2_match_16()");
return false;
}

Expand Down

0 comments on commit bd4b7ac

Please sign in to comment.