Skip to content

Commit

Permalink
Cache pcre subpattern table
Browse files Browse the repository at this point in the history
Recreating this over and over is pointless, cache this as well.
Fixes phpGH-14361.
  • Loading branch information
nielsdos committed Jun 1, 2024
1 parent 4fca8a6 commit 569c291
Showing 1 changed file with 41 additions and 44 deletions.
85 changes: 41 additions & 44 deletions ext/pcre/php_pcre.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ char *php_pcre_version;

struct _pcre_cache_entry {
pcre2_code *re;
zend_string **subpats_table;
uint32_t preg_options;
uint32_t capture_count;
uint32_t name_count;
uint32_t compile_options;
uint32_t refcount;
};
Expand Down Expand Up @@ -90,6 +90,8 @@ static MUTEX_T pcre_mt = NULL;

ZEND_TLS HashTable char_tables;

static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats, bool persistent);

static void php_pcre_free_char_table(zval *data)
{/*{{{*/
void *ptr = Z_PTR_P(data);
Expand Down Expand Up @@ -163,6 +165,9 @@ static void php_free_pcre_cache(zval *data) /* {{{ */
{
pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
if (!pce) return;
if (pce->subpats_table) {
free_subpats_table(pce->subpats_table, pce->capture_count + 1, true);
}
pcre2_code_free(pce->re);
free(pce);
}
Expand All @@ -172,6 +177,9 @@ static void php_efree_pcre_cache(zval *data) /* {{{ */
{
pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
if (!pce) return;
if (pce->subpats_table) {
free_subpats_table(pce->subpats_table, pce->capture_count + 1, false);
}
pcre2_code_free(pce->re);
efree(pce);
}
Expand Down Expand Up @@ -520,20 +528,21 @@ static int pcre_clean_cache(zval *data, void *arg)
}
/* }}} */

static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats) {
static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats, bool persistent) {
uint32_t i;
for (i = 0; i < num_subpats; i++) {
if (subpat_names[i]) {
zend_string_release_ex(subpat_names[i], false);
zend_string_release_ex(subpat_names[i], persistent);
}
}
efree(subpat_names);
pefree(subpat_names, persistent);
}

/* {{{ static make_subpats_table */
static zend_string **make_subpats_table(uint32_t num_subpats, pcre_cache_entry *pce)
static zend_string **make_subpats_table(uint32_t name_cnt, pcre_cache_entry *pce, bool persistent)
{
uint32_t name_cnt = pce->name_count, name_size, ni = 0;
uint32_t num_subpats = pce->capture_count + 1;
uint32_t name_size, ni = 0;
char *name_table;
zend_string **subpat_names;
int rc1, rc2;
Expand All @@ -545,14 +554,21 @@ static zend_string **make_subpats_table(uint32_t num_subpats, pcre_cache_entry *
return NULL;
}

subpat_names = ecalloc(num_subpats, sizeof(zend_string *));
subpat_names = pecalloc(num_subpats, sizeof(zend_string *), persistent);
while (ni++ < name_cnt) {
unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
const char *name = name_table + 2;
subpat_names[name_idx] = zend_string_init(name, strlen(name), 0);
/* Note: if we're making persistent strings, they will only be used within this thread.
* Although we will be storing them in user-exposed arrays, they cannot cause problems
* because they only live in this thread and the last reference is deleted on shutdown
* instead of by user code. */
subpat_names[name_idx] = zend_string_init(name, strlen(name), persistent);
if (persistent) {
GC_MAKE_PERSISTENT_LOCAL(subpat_names[name_idx]);
}
if (is_numeric_string(ZSTR_VAL(subpat_names[name_idx]), ZSTR_LEN(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
free_subpats_table(subpat_names, num_subpats);
free_subpats_table(subpat_names, num_subpats, persistent);
return NULL;
}
name_table += name_size;
Expand Down Expand Up @@ -843,7 +859,8 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bo
return NULL;
}

rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count);
uint32_t name_count;
rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &name_count);
if (rc < 0) {
if (key != regex) {
zend_string_release_ex(key, 0);
Expand All @@ -853,6 +870,17 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bo
return NULL;
}

/* Compute and cache the subpattern table to avoid computing it again over and over. */
new_entry.subpats_table = make_subpats_table(name_count, &new_entry, !PCRE_G(per_request_cache));
if (!new_entry.subpats_table) {
if (key != regex) {
zend_string_release_ex(key, 0);
}
/* Warning already emitted by make_subpats_table() */
pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
return NULL;
}

/*
* Interned strings are not duplicated when stored in HashTable,
* but all the interned strings created during HTTP request are removed
Expand Down Expand Up @@ -1209,11 +1237,8 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
* allocate the table only if there are any named subpatterns.
*/
subpat_names = NULL;
if (subpats && pce->name_count > 0) {
subpat_names = make_subpats_table(num_subpats, pce);
if (!subpat_names) {
RETURN_FALSE;
}
if (subpats) {
subpat_names = pce->subpats_table;
}

matched = 0;
Expand All @@ -1225,9 +1250,6 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
if (!match_data) {
PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
if (subpat_names) {
free_subpats_table(subpat_names, num_subpats);
}
RETURN_FALSE;
}
}
Expand Down Expand Up @@ -1274,9 +1296,6 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
if (subpats != NULL) {
/* Try to get the list of substrings and display a warning if failed. */
if (UNEXPECTED(offsets[1] < offsets[0])) {
if (subpat_names) {
free_subpats_table(subpat_names, num_subpats);
}
if (match_sets) efree(match_sets);
php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
RETURN_FALSE;
Expand Down Expand Up @@ -1440,10 +1459,6 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
}
}

if (subpat_names) {
free_subpats_table(subpat_names, num_subpats);
}

if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
/* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
if ((pce->compile_options & PCRE2_UTF)
Expand Down Expand Up @@ -1857,18 +1872,7 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin

/* Calculate the size of the offsets array, and allocate memory for it. */
num_subpats = pce->capture_count + 1;

/*
* Build a mapping from subpattern numbers to their names. We will
* allocate the table only if there are any named subpatterns.
*/
subpat_names = NULL;
if (UNEXPECTED(pce->name_count > 0)) {
subpat_names = make_subpats_table(num_subpats, pce);
if (!subpat_names) {
return NULL;
}
}
subpat_names = pce->subpats_table;

alloc_len = 0;
result = NULL;
Expand All @@ -1888,9 +1892,6 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
if (!match_data) {
PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
if (subpat_names) {
free_subpats_table(subpat_names, num_subpats);
}
mdata_used = old_mdata_used;
return NULL;
}
Expand Down Expand Up @@ -2041,10 +2042,6 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
}
mdata_used = old_mdata_used;

if (UNEXPECTED(subpat_names)) {
free_subpats_table(subpat_names, num_subpats);
}

return result;
}
/* }}} */
Expand Down

0 comments on commit 569c291

Please sign in to comment.