/* * RE_wchar_execute - execute a RE on pg_wchar data * * Returns TRUE on match, FALSE on no match * * re --- the compiled pattern as returned by RE_compile_and_cache * data --- the data to match against (need not be null-terminated) * data_len --- the length of the data string * start_search -- the offset in the data to start searching * nmatch, pmatch --- optional return area for match details * * Data is given as array of pg_wchar which is what Spencer's regex package * wants. */ static bool RE_wchar_execute(regex_t *re, pg_wchar *data, int data_len, int start_search, int nmatch, regmatch_t *pmatch) { int regexec_result; char errMsg[100]; /* Perform RE match and return result */ regexec_result = pg_regexec(re, data, data_len, start_search, NULL, /* no details */ nmatch, pmatch, 0); if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH) { /* re failed??? */ CHECK_FOR_INTERRUPTS(); pg_regerror(regexec_result, re, errMsg, sizeof(errMsg)); ereport(ERROR, (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), errmsg("regular expression failed: %s", errMsg))); } return (regexec_result == REG_OKAY); }
/* * regexp_fixed_prefix - extract fixed prefix, if any, for a regexp * * The result is NULL if there is no fixed prefix, else a palloc'd string. * If it is an exact match, not just a prefix, *exact is returned as TRUE. */ char * regexp_fixed_prefix(text *text_re, bool case_insensitive, Oid collation, bool *exact) { char *result; regex_t *re; int cflags; int re_result; pg_wchar *str; size_t slen; size_t maxlen; char errMsg[100]; *exact = false; /* default result */ /* Compile RE */ cflags = REG_ADVANCED; if (case_insensitive) cflags |= REG_ICASE; re = RE_compile_and_cache(text_re, cflags, collation); /* Examine it to see if there's a fixed prefix */ re_result = pg_regprefix(re, &str, &slen); switch (re_result) { case REG_NOMATCH: return NULL; case REG_PREFIX: /* continue with wchar conversion */ break; case REG_EXACT: *exact = true; /* continue with wchar conversion */ break; default: /* re failed??? */ CHECK_FOR_INTERRUPTS(); pg_regerror(re_result, re, errMsg, sizeof(errMsg)); ereport(ERROR, (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), errmsg("regular expression failed: %s", errMsg))); break; } /* Convert pg_wchar result back to database encoding */ maxlen = pg_database_encoding_max_length() * slen + 1; result = (char *) palloc(maxlen); slen = pg_wchar2mb_with_len(str, result, slen); Assert(slen < maxlen); free(str); return result; }
/* * RE_compile_and_execute - compile and execute a RE * * Returns TRUE on match, FALSE on no match * * text_re --- the pattern, expressed as an *untoasted* TEXT object * dat --- the data to match against (need not be null-terminated) * dat_len --- the length of the data string * cflags --- compile options for the pattern * nmatch, pmatch --- optional return area for match details * * Both pattern and data are given in the database encoding. We internally * convert to array of pg_wchar which is what Spencer's regex package wants. */ static bool RE_compile_and_execute(text *text_re, char *dat, int dat_len, int cflags, int nmatch, regmatch_t *pmatch) { pg_wchar *data; size_t data_len; int regexec_result; regex_t *re; char errMsg[100]; /* Convert data string to wide characters */ data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar)); data_len = pg_mb2wchar_with_len(dat, data, dat_len); /* Compile RE */ re = RE_compile_and_cache(text_re, cflags); /* Perform RE match and return result */ regexec_result = pg_regexec(re, data, data_len, 0, NULL, /* no details */ nmatch, pmatch, 0); pfree(data); if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH) { /* re failed??? */ pg_regerror(regexec_result, re, errMsg, sizeof(errMsg)); ereport(ERROR, (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), errmsg("regular expression failed: %s", errMsg))); } return (regexec_result == REG_OKAY); }
/* * RE_compile_and_cache - compile a RE, caching if possible * * Returns regex_t * * * text_re --- the pattern, expressed as an *untoasted* TEXT object * cflags --- compile options for the pattern * * Pattern is given in the database encoding. We internally convert to * array of pg_wchar which is what Spencer's regex package wants. */ static regex_t * RE_compile_and_cache(text *text_re, int cflags) { int text_re_len = VARSIZE(text_re); pg_wchar *pattern; size_t pattern_len; int i; int regcomp_result; cached_re_str re_temp; char errMsg[100]; /* * Look for a match among previously compiled REs. Since the data * structure is self-organizing with most-used entries at the front, our * search strategy can just be to scan from the front. */ for (i = 0; i < num_res; i++) { if (VARSIZE(re_array[i].cre_pat) == text_re_len && memcmp(re_array[i].cre_pat, text_re, text_re_len) == 0 && re_array[i].cre_flags == cflags) { /* * Found a match; move it to front if not there already. */ if (i > 0) { re_temp = re_array[i]; memmove(&re_array[1], &re_array[0], i * sizeof(cached_re_str)); re_array[0] = re_temp; } return &re_array[0].cre_re; } } /* * Couldn't find it, so try to compile the new RE. To avoid leaking * resources on failure, we build into the re_temp local. */ /* Convert pattern string to wide characters */ pattern = (pg_wchar *) palloc((text_re_len - VARHDRSZ + 1) * sizeof(pg_wchar)); pattern_len = pg_mb2wchar_with_len(VARDATA(text_re), pattern, text_re_len - VARHDRSZ); regcomp_result = pg_regcomp(&re_temp.cre_re, pattern, pattern_len, cflags); pfree(pattern); if (regcomp_result != REG_OKAY) { /* re didn't compile */ pg_regerror(regcomp_result, &re_temp.cre_re, errMsg, sizeof(errMsg)); /* XXX should we pg_regfree here? */ ereport(ERROR, (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), errmsg("invalid regular expression: %s", errMsg))); } /* * use malloc/free for the cre_pat field because the storage has to * persist across transactions */ re_temp.cre_pat = malloc(text_re_len); if (re_temp.cre_pat == NULL) { pg_regfree(&re_temp.cre_re); ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); } memcpy(re_temp.cre_pat, text_re, text_re_len); re_temp.cre_flags = cflags; /* * Okay, we have a valid new item in re_temp; insert it into the storage * array. Discard last entry if needed. */ if (num_res >= MAX_CACHED_RES) { --num_res; Assert(num_res < MAX_CACHED_RES); pg_regfree(&re_array[num_res].cre_re); free(re_array[num_res].cre_pat); } if (num_res > 0) memmove(&re_array[1], &re_array[0], num_res * sizeof(cached_re_str)); re_array[0] = re_temp; num_res++; return &re_array[0].cre_re; }
/* * RE_compile_and_cache - compile a RE, caching if possible * * Returns regex_t * * * text_re --- the pattern, expressed as a TEXT object * cflags --- compile options for the pattern * collation --- collation to use for LC_CTYPE-dependent behavior * * Pattern is given in the database encoding. We internally convert to * an array of pg_wchar, which is what Spencer's regex package wants. */ static regex_t * RE_compile_and_cache(text *text_re, int cflags, Oid collation) { int text_re_len = VARSIZE_ANY_EXHDR(text_re); char *text_re_val = VARDATA_ANY(text_re); pg_wchar *pattern; int pattern_len; int i; int regcomp_result; cached_re_str re_temp; char errMsg[100]; /* * Look for a match among previously compiled REs. Since the data * structure is self-organizing with most-used entries at the front, our * search strategy can just be to scan from the front. */ for (i = 0; i < num_res; i++) { if (re_array[i].cre_pat_len == text_re_len && re_array[i].cre_flags == cflags && re_array[i].cre_collation == collation && memcmp(re_array[i].cre_pat, text_re_val, text_re_len) == 0) { /* * Found a match; move it to front if not there already. */ if (i > 0) { re_temp = re_array[i]; memmove(&re_array[1], &re_array[0], i * sizeof(cached_re_str)); re_array[0] = re_temp; } return &re_array[0].cre_re; } } /* * Couldn't find it, so try to compile the new RE. To avoid leaking * resources on failure, we build into the re_temp local. */ /* Convert pattern string to wide characters */ pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar)); pattern_len = pg_mb2wchar_with_len(text_re_val, pattern, text_re_len); regcomp_result = pg_regcomp(&re_temp.cre_re, pattern, pattern_len, cflags, collation); pfree(pattern); if (regcomp_result != REG_OKAY) { /* re didn't compile (no need for pg_regfree, if so) */ /* * Here and in other places in this file, do CHECK_FOR_INTERRUPTS * before reporting a regex error. This is so that if the regex * library aborts and returns REG_CANCEL, we don't print an error * message that implies the regex was invalid. */ CHECK_FOR_INTERRUPTS(); pg_regerror(regcomp_result, &re_temp.cre_re, errMsg, sizeof(errMsg)); ereport(ERROR, (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), errmsg("invalid regular expression: %s", errMsg))); } /* * We use malloc/free for the cre_pat field because the storage has to * persist across transactions, and because we want to get control back on * out-of-memory. The Max() is because some malloc implementations return * NULL for malloc(0). */ re_temp.cre_pat = malloc(Max(text_re_len, 1)); if (re_temp.cre_pat == NULL) { pg_regfree(&re_temp.cre_re); ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); } memcpy(re_temp.cre_pat, text_re_val, text_re_len); re_temp.cre_pat_len = text_re_len; re_temp.cre_flags = cflags; re_temp.cre_collation = collation; /* * Okay, we have a valid new item in re_temp; insert it into the storage * array. Discard last entry if needed. */ if (num_res >= MAX_CACHED_RES) { --num_res; Assert(num_res < MAX_CACHED_RES); pg_regfree(&re_array[num_res].cre_re); free(re_array[num_res].cre_pat); } if (num_res > 0) memmove(&re_array[1], &re_array[0], num_res * sizeof(cached_re_str)); re_array[0] = re_temp; num_res++; return &re_array[0].cre_re; }