Skip to content

Commit

Permalink
ext/pgsql: pg_convert/pg_insert/pg_update/pg_delete caching regexes.
Browse files Browse the repository at this point in the history
Close GH-15039
  • Loading branch information
devnexen committed Jul 20, 2024
1 parent 2cfcfe0 commit ba54ceb
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 77 deletions.
4 changes: 4 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ PHP NEWS
. Added getApiVersion() and removed from getAttribute().
(SakiTakamachi)

- PGSQL:
. pg_convert/pg_insert/pg_update/pg_delete ; regexes are now cached.
(David Carlier)

- Standard:
. Fix references in request_parse_body() options array. (nielsdos)
. Add RoundingMode enum. (timwolla, saki)
Expand Down
146 changes: 70 additions & 76 deletions ext/pgsql/pgsql.c
Original file line number Diff line number Diff line change
Expand Up @@ -490,8 +490,55 @@ static PHP_GINIT_FUNCTION(pgsql)
#if defined(COMPILE_DL_PGSQL) && defined(ZTS)
ZEND_TSRMLS_CACHE_UPDATE();
#endif

size_t i = 0;
memset(pgsql_globals, 0, sizeof(zend_pgsql_globals));
zend_hash_init(&pgsql_globals->connections, 0, NULL, NULL, 1);

#define ADD_REGEX(reg) \
do { \
ZEND_ASSERT(i < PGSQL_MAX_REGEXES); \
pgsql_globals->regexes[i ++] = zend_string_init(reg, strlen(reg), true);\
} while(0)
ADD_REGEX("#^([+-]{0,1}[0-9]+)$#n");
ADD_REGEX("#^[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?$#n");
ADD_REGEX("#^[+-]{0,1}(inf)(inity){0,1}$#ni");
ADD_REGEX("#^[0-9]+$#n");
ADD_REGEX("#^((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])(\\/[0-9]{1,3})?$#n");
ADD_REGEX("#^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))(\\/[0-9]{1,3})?$#n");
ADD_REGEX("#^([0-9]{4}[/-][0-9]{1,2}[/-][0-9]{1,2})(([ \\t]+|T)(([0-9]{1,2}:[0-9]{1,2}){1}(:[0-9]{1,2}){0,1}(\\.[0-9]+){0,1}([ \\t]*([+-][0-9]{1,4}(:[0-9]{1,2}){0,1}|[-a-zA-Z_/+]{1,50})){0,1})){0,1}$#ni");
ADD_REGEX("#^([0-9]{4}[/-][0-9]{1,2}[/-][0-9]{1,2})$#ni");
ADD_REGEX("#^(([0-9]{1,2}:[0-9]{1,2}){1}(:[0-9]{1,2}){0,1}){0,1}$#ni");
ADD_REGEX("#^(@?[ \\t]+)?("
/* Textual time units and their abbreviations: */
"(([-+]?[ \\t]+)?"
"[0-9]+(\\.[0-9]*)?[ \\t]*"
"(millenniums|millennia|millennium|mil|mils|"
"centuries|century|cent|c|"
"decades|decade|dec|decs|"
"years|year|y|"
"months|month|mon|"
"weeks|week|w|"
"days|day|d|"
"hours|hour|hr|hrs|h|"
"minutes|minute|mins|min|m|"
"seconds|second|secs|sec|s))+|"
/* Textual time units plus (dd)* hh[:mm[:ss]] */
"((([-+]?[ \\t]+)?"
"[0-9]+(\\.[0-9]*)?[ \\t]*"
"(millenniums|millennia|millennium|mil|mils|"
"centuries|century|cent|c|"
"decades|decade|dec|decs|"
"years|year|y|"
"months|month|mon|"
"weeks|week|w|"
"days|day|d))+"
"([-+]?[ \\t]+"
"([0-9]+[ \\t]+)+" /* dd */
"(([0-9]{1,2}:){0,2}[0-9]{0,2})" /* hh:[mm:[ss]] */
")?))"
"([ \\t]+ago)?$#ni");
ADD_REGEX("#^([0-9a-f]{2,2}:){5,5}[0-9a-f]{2,2}$#ni");
}

static void php_libpq_version(char *buf, size_t len)
Expand Down Expand Up @@ -560,6 +607,9 @@ PHP_MSHUTDOWN_FUNCTION(pgsql)
UNREGISTER_INI_ENTRIES();
zend_hash_destroy(&PGG(connections));

for (size_t i = 0; i < PGSQL_MAX_REGEXES; i ++)
zend_string_release_ex(PGG(regexes[i]), true);

return SUCCESS;
}

Expand Down Expand Up @@ -4671,12 +4721,11 @@ static php_pgsql_data_type php_pgsql_get_data_type(const zend_string *type_name)
/* {{{ php_pgsql_convert_match
* test field value with regular expression specified.
*/
static int php_pgsql_convert_match(const zend_string *str, const char *regex , size_t regex_len, int icase)
static int php_pgsql_convert_match(const zend_string *str, zend_string *regex)
{
pcre_cache_entry *centry;
pcre2_code *re;
PCRE2_SIZE err_offset;
int res, errnumber;
uint32_t options = PCRE2_NO_AUTO_CAPTURE;
int res;
size_t i;
pcre2_match_data *match_data;

Expand All @@ -4689,27 +4738,21 @@ static int php_pgsql_convert_match(const zend_string *str, const char *regex , s
}
}

if (icase) {
options |= PCRE2_CASELESS;
}

re = pcre2_compile((PCRE2_SPTR)regex, regex_len, options, &errnumber, &err_offset, php_pcre_cctx());
if (NULL == re) {
PCRE2_UCHAR err_msg[128];
pcre2_get_error_message(errnumber, err_msg, sizeof(err_msg));
php_error_docref(NULL, E_WARNING, "Cannot compile regex: '%s'", err_msg);
centry = pcre_get_compiled_regex_cache(regex);
if (NULL == centry) {
return FAILURE;
}

re = php_pcre_pce_re(centry);
match_data = php_pcre_create_match_data(0, re);
if (NULL == match_data) {
pcre2_code_free(re);
php_error_docref(NULL, E_WARNING, "Cannot allocate match data");
return FAILURE;
}
php_pcre_pce_incref(centry);
res = pcre2_match(re, (PCRE2_SPTR)ZSTR_VAL(str), ZSTR_LEN(str), 0, 0, match_data, php_pcre_mctx());
php_pcre_free_match_data(match_data);
pcre2_code_free(re);
php_pcre_pce_decref(centry);

if (res == PCRE2_ERROR_NOMATCH) {
return FAILURE;
Expand Down Expand Up @@ -4890,14 +4933,12 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string *
}
else {
/* FIXME: better regex must be used */
#define REGEX0 "^([+-]{0,1}[0-9]+)$"
if (php_pgsql_convert_match(Z_STR_P(val), REGEX0, sizeof(REGEX0)-1, 0) == FAILURE) {
if (php_pgsql_convert_match(Z_STR_P(val), PGG(regexes[0])) == FAILURE) {
err = 1;
}
else {
ZVAL_STRINGL(&new_val, Z_STRVAL_P(val), Z_STRLEN_P(val));
}
#undef REGEX0
}
break;

Expand Down Expand Up @@ -4933,11 +4974,9 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string *
ZVAL_STR(&new_val, ZSTR_KNOWN(ZEND_STR_NULL));
}
else {
#define REGEX0 "^[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?$"
#define REGEX1 "^[+-]{0,1}(inf)(inity){0,1}$"
/* better regex? */
if (php_pgsql_convert_match(Z_STR_P(val), REGEX0, sizeof(REGEX0)-1, 0) == FAILURE) {
if (php_pgsql_convert_match(Z_STR_P(val), REGEX1, sizeof(REGEX1)-1, 1) == FAILURE) {
if (php_pgsql_convert_match(Z_STR_P(val), PGG(regexes[1])) == FAILURE) {
if (php_pgsql_convert_match(Z_STR_P(val), PGG(regexes[2])) == FAILURE) {
err = 1;
} else {
ZVAL_STR(&new_val, php_pgsql_add_quotes(Z_STR_P(val)));
Expand All @@ -4946,8 +4985,6 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string *
else {
ZVAL_STRING(&new_val, Z_STRVAL_P(val));
}
#undef REGEX0
#undef REGEX1
}
break;

Expand Down Expand Up @@ -5043,7 +5080,7 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string *
}
else {
/* better regex? */
if (php_pgsql_convert_match(Z_STR_P(val), "^[0-9]+$", sizeof("^[0-9]+$")-1, 0) == FAILURE) {
if (php_pgsql_convert_match(Z_STR_P(val), PGG(regexes[3])) == FAILURE) {
err = 1;
}
else {
Expand Down Expand Up @@ -5083,20 +5120,16 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string *
ZVAL_STR(&new_val, ZSTR_KNOWN(ZEND_STR_NULL));
}
else {
#define REGEX0 "^((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])(\\/[0-9]{1,3})?$"
#define REGEX1 "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))(\\/[0-9]{1,3})?$"
/* The inet type holds an IPv4 or IPv6 host address, and optionally its subnet, all in one field. See more in the doc.
The regex might still be not perfect, but catches the most of IP variants. We might decide to remove the regex
at all though and let the server side to handle it.*/
if (php_pgsql_convert_match(Z_STR_P(val), REGEX0, sizeof(REGEX0)-1, 0) == FAILURE
&& php_pgsql_convert_match(Z_STR_P(val), REGEX1, sizeof(REGEX1)-1, 0) == FAILURE) {
if (php_pgsql_convert_match(Z_STR_P(val), PGG(regexes[4])) == FAILURE
&& php_pgsql_convert_match(Z_STR_P(val), PGG(regexes[5])) == FAILURE) {
err = 2;
}
else {
ZVAL_STR(&new_val, php_pgsql_add_quotes(Z_STR_P(val)));
}
#undef REGEX0
#undef REGEX1
}
break;

Expand Down Expand Up @@ -5127,14 +5160,12 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string *
} else if (zend_string_equals_literal_ci(Z_STR_P(val), "now()")) {
ZVAL_STRINGL(&new_val, "NOW()", sizeof("NOW()")-1);
} else {
#define REGEX0 "^([0-9]{4}[/-][0-9]{1,2}[/-][0-9]{1,2})(([ \\t]+|T)(([0-9]{1,2}:[0-9]{1,2}){1}(:[0-9]{1,2}){0,1}(\\.[0-9]+){0,1}([ \\t]*([+-][0-9]{1,4}(:[0-9]{1,2}){0,1}|[-a-zA-Z_/+]{1,50})){0,1})){0,1}$"
/* better regex? */
if (php_pgsql_convert_match(Z_STR_P(val), REGEX0, sizeof(REGEX0)-1, 1) == FAILURE) {
if (php_pgsql_convert_match(Z_STR_P(val), PGG(regexes[6])) == FAILURE) {
err = 1;
} else {
ZVAL_STR(&new_val, php_pgsql_add_quotes(Z_STR_P(val)));
}
#undef REGEX0
}
break;

Expand All @@ -5158,15 +5189,13 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string *
ZVAL_STR(&new_val, ZSTR_KNOWN(ZEND_STR_NULL));
}
else {
#define REGEX0 "^([0-9]{4}[/-][0-9]{1,2}[/-][0-9]{1,2})$"
/* FIXME: better regex must be used */
if (php_pgsql_convert_match(Z_STR_P(val), REGEX0, sizeof(REGEX0)-1, 1) == FAILURE) {
if (php_pgsql_convert_match(Z_STR_P(val), PGG(regexes[7])) == FAILURE) {
err = 1;
}
else {
ZVAL_STR(&new_val, php_pgsql_add_quotes(Z_STR_P(val)));
}
#undef REGEX0
}
break;

Expand All @@ -5190,15 +5219,13 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string *
ZVAL_STR(&new_val, ZSTR_KNOWN(ZEND_STR_NULL));
}
else {
#define REGEX0 "^(([0-9]{1,2}:[0-9]{1,2}){1}(:[0-9]{1,2}){0,1}){0,1}$"
/* FIXME: better regex must be used */
if (php_pgsql_convert_match(Z_STR_P(val), REGEX0, sizeof(REGEX0)-1, 1) == FAILURE) {
if (php_pgsql_convert_match(Z_STR_P(val), PGG(regexes[8])) == FAILURE) {
err = 1;
}
else {
ZVAL_STR(&new_val, php_pgsql_add_quotes(Z_STR_P(val)));
}
#undef REGEX0
}
break;

Expand Down Expand Up @@ -5239,44 +5266,13 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string *
unit markings. For example, '1 12:59:10' is read the same as '1 day 12 hours 59 min 10
sec'.
*/
#define REGEX0 \
"^(@?[ \\t]+)?(" \
/* Textual time units and their abbreviations: */ \
"(([-+]?[ \\t]+)?" \
"[0-9]+(\\.[0-9]*)?[ \\t]*" \
"(millenniums|millennia|millennium|mil|mils|" \
"centuries|century|cent|c|" \
"decades|decade|dec|decs|" \
"years|year|y|" \
"months|month|mon|" \
"weeks|week|w|" \
"days|day|d|" \
"hours|hour|hr|hrs|h|" \
"minutes|minute|mins|min|m|" \
"seconds|second|secs|sec|s))+|" \
/* Textual time units plus (dd)* hh[:mm[:ss]] */ \
"((([-+]?[ \\t]+)?" \
"[0-9]+(\\.[0-9]*)?[ \\t]*" \
"(millenniums|millennia|millennium|mil|mils|" \
"centuries|century|cent|c|" \
"decades|decade|dec|decs|" \
"years|year|y|" \
"months|month|mon|" \
"weeks|week|w|" \
"days|day|d))+" \
"([-+]?[ \\t]+" \
"([0-9]+[ \\t]+)+" /* dd */ \
"(([0-9]{1,2}:){0,2}[0-9]{0,2})" /* hh:[mm:[ss]] */ \
")?))" \
"([ \\t]+ago)?$"

if (php_pgsql_convert_match(Z_STR_P(val), REGEX0, sizeof(REGEX0)-1, 1) == FAILURE) {

if (php_pgsql_convert_match(Z_STR_P(val), PGG(regexes[9])) == FAILURE) {
err = 1;
}
else {
ZVAL_STR(&new_val, php_pgsql_add_quotes(Z_STR_P(val)));
}
#undef REGEX0
}
break;

Expand Down Expand Up @@ -5341,14 +5337,12 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string *
ZVAL_STR(&new_val, ZSTR_KNOWN(ZEND_STR_NULL));
}
else {
#define REGEX0 "^([0-9a-f]{2,2}:){5,5}[0-9a-f]{2,2}$"
if (php_pgsql_convert_match(Z_STR_P(val), REGEX0, sizeof(REGEX0)-1, 1) == FAILURE) {
if (php_pgsql_convert_match(Z_STR_P(val), PGG(regexes[10])) == FAILURE) {
err = 1;
}
else {
ZVAL_STR(&new_val, php_pgsql_add_quotes(Z_STR_P(val)));
}
#undef REGEX0
}
break;

Expand Down
5 changes: 4 additions & 1 deletion ext/pgsql/php_pgsql.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,14 +175,17 @@ static const php_stream_ops php_stream_pgsql_fd_ops = {
php_pgsql_fd_set_option
};

#define PGSQL_MAX_REGEXES 11

ZEND_BEGIN_MODULE_GLOBALS(pgsql)
zend_long num_links,num_persistent;
zend_long max_links,max_persistent;
bool allow_persistent;
int ignore_notices;
int ignore_notices;
zend_long auto_reset_persistent;
int log_notices;
zend_object *default_link; /* default link when connection is omitted */
zend_string *regexes[PGSQL_MAX_REGEXES];
HashTable field_oids;
HashTable table_oids;
HashTable connections;
Expand Down

0 comments on commit ba54ceb

Please sign in to comment.