/* {{{ php_mbregex_compile_pattern */ static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, size_t patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax) { int err_code = 0; php_mb_regex_t *retval = NULL, *rc = NULL; OnigErrorInfo err_info; OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; rc = zend_hash_str_find_ptr(&MBREX(ht_rc), (char *)pattern, patlen); if (!rc || onig_get_options(rc) != options || onig_get_encoding(rc) != enc || onig_get_syntax(rc) != syntax) { if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) { onig_error_code_to_str(err_str, err_code, &err_info); php_error_docref(NULL, E_WARNING, "mbregex compile err: %s", err_str); retval = NULL; goto out; } if (rc == MBREX(search_re)) { /* reuse the new rc? see bug #72399 */ MBREX(search_re) = NULL; } zend_hash_str_update_ptr(&MBREX(ht_rc), (char *)pattern, patlen, retval); } else { retval = rc; } out: return retval; }
/* {{{ php_mb_regex_set_options */ static void _php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax) { if (prev_options != NULL) { *prev_options = MBREX(regex_default_options); } if (prev_syntax != NULL) { *prev_syntax = MBREX(regex_default_syntax); } MBREX(regex_default_options) = options; MBREX(regex_default_syntax) = syntax; }
/* {{{ php_mb_regex_set_default_mbctype */ int php_mb_regex_set_default_mbctype(const char *encname) { OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname); if (mbctype == ONIG_ENCODING_UNDEF) { return FAILURE; } MBREX(default_mbctype) = mbctype; return SUCCESS; }
/* {{{ php_mbregex_compile_pattern */ static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax) { int err_code = 0; php_mb_regex_t *retval = NULL, *rc = NULL; OnigErrorInfo err_info; OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; rc = zend_hash_str_find_ptr(&MBREX(ht_rc), (char *)pattern, patlen); if (!rc || rc->options != options || rc->enc != enc || rc->syntax != syntax) { if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) { onig_error_code_to_str(err_str, err_code, err_info); php_error_docref(NULL, E_WARNING, "mbregex compile err: %s", err_str); retval = NULL; goto out; } zend_hash_str_update_ptr(&MBREX(ht_rc), (char *)pattern, patlen, retval); } else if (rc) { retval = rc; } out: return retval; }
/* {{{ _php_mb_regex_ereg_replace_exec */ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options, int is_callable) { zval *arg_pattern_zval; char *arg_pattern; size_t arg_pattern_len; char *replace; size_t replace_len; zend_fcall_info arg_replace_fci; zend_fcall_info_cache arg_replace_fci_cache; char *string; size_t string_len; char *p; php_mb_regex_t *re; OnigSyntaxType *syntax; OnigRegion *regs = NULL; smart_str out_buf = {0}; smart_str eval_buf = {0}; smart_str *pbuf; size_t i; int err, eval, n; OnigUChar *pos; OnigUChar *string_lim; char *description = NULL; char pat_buf[6]; const mbfl_encoding *enc; { const char *current_enc_name; current_enc_name = _php_mb_regex_mbctype2name(MBREX(current_mbctype)); if (current_enc_name == NULL || (enc = mbfl_name2encoding(current_enc_name)) == NULL) { php_error_docref(NULL, E_WARNING, "Unknown error"); RETURN_FALSE; } } eval = 0; { char *option_str = NULL; size_t option_str_len = 0; if (!is_callable) { if (zend_parse_parameters(ZEND_NUM_ARGS(), "zss|s", &arg_pattern_zval, &replace, &replace_len, &string, &string_len, &option_str, &option_str_len) == FAILURE) { RETURN_FALSE; } } else { if (zend_parse_parameters(ZEND_NUM_ARGS(), "zfs|s", &arg_pattern_zval, &arg_replace_fci, &arg_replace_fci_cache, &string, &string_len, &option_str, &option_str_len) == FAILURE) { RETURN_FALSE; } } if (!php_mb_check_encoding( string, string_len, _php_mb_regex_mbctype2name(MBREX(current_mbctype)) )) { RETURN_NULL(); } if (option_str != NULL) { _php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval); } else { options |= MBREX(regex_default_options); syntax = MBREX(regex_default_syntax); } } if (eval && !is_callable) { php_error_docref(NULL, E_DEPRECATED, "The 'e' option is deprecated, use mb_ereg_replace_callback instead"); } if (Z_TYPE_P(arg_pattern_zval) == IS_STRING) { arg_pattern = Z_STRVAL_P(arg_pattern_zval); arg_pattern_len = Z_STRLEN_P(arg_pattern_zval); } else { /* FIXME: this code is not multibyte aware! */ convert_to_long_ex(arg_pattern_zval); pat_buf[0] = (char)Z_LVAL_P(arg_pattern_zval); pat_buf[1] = '\0'; pat_buf[2] = '\0'; pat_buf[3] = '\0'; pat_buf[4] = '\0'; pat_buf[5] = '\0'; arg_pattern = pat_buf; arg_pattern_len = 1; } /* create regex pattern buffer */ re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(current_mbctype), syntax); if (re == NULL) { RETURN_FALSE; } if (eval || is_callable) { pbuf = &eval_buf; description = zend_make_compiled_string_description("mbregex replace"); } else { pbuf = &out_buf; description = NULL; } if (is_callable) { if (eval) { php_error_docref(NULL, E_WARNING, "Option 'e' cannot be used with replacement callback"); RETURN_FALSE; } } /* do the actual work */ err = 0; pos = (OnigUChar *)string; string_lim = (OnigUChar*)(string + string_len); regs = onig_region_new(); while (err >= 0) { err = onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0); if (err <= -2) { OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(err_str, err); php_error_docref(NULL, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str); break; } if (err >= 0) { #if moriyoshi_0 if (regs->beg[0] == regs->end[0]) { php_error_docref(NULL, E_WARNING, "Empty regular expression"); break; } #endif /* copy the part of the string before the match */ smart_str_appendl(&out_buf, (char *)pos, (size_t)((OnigUChar *)(string + regs->beg[0]) - pos)); if (!is_callable) { /* copy replacement and backrefs */ i = 0; p = replace; while (i < replace_len) { int fwd = (int) php_mb_mbchar_bytes_ex(p, enc); n = -1; if ((replace_len - i) >= 2 && fwd == 1 && p[0] == '\\' && p[1] >= '0' && p[1] <= '9') { n = p[1] - '0'; } if (n >= 0 && n < regs->num_regs) { if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] && (size_t)regs->end[n] <= string_len) { smart_str_appendl(pbuf, string + regs->beg[n], regs->end[n] - regs->beg[n]); } p += 2; i += 2; } else { smart_str_appendl(pbuf, p, fwd); p += fwd; i += fwd; } } } if (eval) { zval v; zend_string *eval_str; /* null terminate buffer */ smart_str_0(&eval_buf); if (eval_buf.s) { eval_str = eval_buf.s; } else { eval_str = ZSTR_EMPTY_ALLOC(); } /* do eval */ if (zend_eval_stringl(ZSTR_VAL(eval_str), ZSTR_LEN(eval_str), &v, description) == FAILURE) { efree(description); zend_throw_error(NULL, "Failed evaluating code: %s%s", PHP_EOL, ZSTR_VAL(eval_str)); onig_region_free(regs, 0); smart_str_free(&out_buf); smart_str_free(&eval_buf); RETURN_FALSE; } /* result of eval */ convert_to_string(&v); smart_str_appendl(&out_buf, Z_STRVAL(v), Z_STRLEN(v)); /* Clean up */ smart_str_free(&eval_buf); zval_dtor(&v); } else if (is_callable) { zval args[1]; zval subpats, retval; int i; array_init(&subpats); for (i = 0; i < regs->num_regs; i++) { add_next_index_stringl(&subpats, string + regs->beg[i], regs->end[i] - regs->beg[i]); } ZVAL_COPY_VALUE(&args[0], &subpats); /* null terminate buffer */ smart_str_0(&eval_buf); arg_replace_fci.param_count = 1; arg_replace_fci.params = args; arg_replace_fci.retval = &retval; if (zend_call_function(&arg_replace_fci, &arg_replace_fci_cache) == SUCCESS && !Z_ISUNDEF(retval)) { convert_to_string_ex(&retval); smart_str_appendl(&out_buf, Z_STRVAL(retval), Z_STRLEN(retval)); smart_str_free(&eval_buf); zval_ptr_dtor(&retval); } else { if (!EG(exception)) { php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function"); } } zval_ptr_dtor(&subpats); } n = regs->end[0]; if ((pos - (OnigUChar *)string) < n) { pos = (OnigUChar *)string + n; } else { if (pos < string_lim) { smart_str_appendl(&out_buf, (char *)pos, 1); } pos++; } } else { /* nomatch */ /* stick that last bit of string on our output */ if (string_lim - pos > 0) { smart_str_appendl(&out_buf, (char *)pos, string_lim - pos); } } onig_region_free(regs, 0); } if (description) { efree(description); } if (regs != NULL) { onig_region_free(regs, 1); } smart_str_free(&eval_buf); if (err <= -2) { smart_str_free(&out_buf); RETVAL_FALSE; } else if (out_buf.s) { smart_str_0(&out_buf); RETVAL_STR(out_buf.s); } else { RETVAL_EMPTY_STRING(); } }
/* {{{ _php_mb_regex_ereg_exec */ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase) { zval *arg_pattern, *array = NULL; char *string; size_t string_len; php_mb_regex_t *re; OnigRegion *regs = NULL; int i, match_len, beg, end; OnigOptionType options; char *str; if (zend_parse_parameters(ZEND_NUM_ARGS(), "zs|z/", &arg_pattern, &string, &string_len, &array) == FAILURE) { RETURN_FALSE; } if (!php_mb_check_encoding( string, string_len, _php_mb_regex_mbctype2name(MBREX(current_mbctype)) )) { if (array != NULL) { zval_dtor(array); array_init(array); } RETURN_FALSE; } if (array != NULL) { zval_dtor(array); array_init(array); } options = MBREX(regex_default_options); if (icase) { options |= ONIG_OPTION_IGNORECASE; } /* compile the regular expression from the supplied regex */ if (Z_TYPE_P(arg_pattern) != IS_STRING) { /* we convert numbers to integers and treat them as a string */ if (Z_TYPE_P(arg_pattern) == IS_DOUBLE) { convert_to_long_ex(arg_pattern); /* get rid of decimal places */ } convert_to_string_ex(arg_pattern); /* don't bother doing an extended regex with just a number */ } if (Z_STRLEN_P(arg_pattern) == 0) { php_error_docref(NULL, E_WARNING, "empty pattern"); RETVAL_FALSE; goto out; } re = php_mbregex_compile_pattern(Z_STRVAL_P(arg_pattern), Z_STRLEN_P(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax)); if (re == NULL) { RETVAL_FALSE; goto out; } regs = onig_region_new(); /* actually execute the regular expression */ if (onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) { RETVAL_FALSE; goto out; } match_len = 1; str = string; if (array != NULL) { match_len = regs->end[0] - regs->beg[0]; for (i = 0; i < regs->num_regs; i++) { beg = regs->beg[i]; end = regs->end[i]; if (beg >= 0 && beg < end && (size_t)end <= string_len) { add_index_stringl(array, i, (char *)&str[beg], end - beg); } else { add_index_bool(array, i, 0); } } } if (match_len == 0) { match_len = 1; } RETVAL_LONG(match_len); out: if (regs != NULL) { onig_region_free(regs, 1); } }
/* {{{ php_mb_regex_get_default_mbctype */ const char *php_mb_regex_get_default_mbctype(void) { return _php_mb_regex_mbctype2name(MBREX(default_mbctype)); }
/* {{{ php_mb_regex_get_mbctype */ const char *php_mb_regex_get_mbctype(void) { return _php_mb_regex_mbctype2name(MBREX(current_mbctype)); }
/* {{{ _php_mb_regex_ereg_search_exec */ static void _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode) { char *arg_pattern = NULL, *arg_options = NULL; size_t arg_pattern_len, arg_options_len; int err; size_t n, i, pos, len, beg, end; OnigOptionType option; OnigUChar *str; OnigSyntaxType *syntax; if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ss", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) { return; } option = MBREX(regex_default_options); if (arg_options) { option = 0; _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL); } if (arg_pattern) { /* create regex pattern buffer */ if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), MBREX(regex_default_syntax))) == NULL) { RETURN_FALSE; } } pos = MBREX(search_pos); str = NULL; len = 0; if (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING){ str = (OnigUChar *)Z_STRVAL(MBREX(search_str)); len = Z_STRLEN(MBREX(search_str)); } if (MBREX(search_re) == NULL) { php_error_docref(NULL, E_WARNING, "No regex given"); RETURN_FALSE; } if (str == NULL) { php_error_docref(NULL, E_WARNING, "No string given"); RETURN_FALSE; } if (MBREX(search_regs)) { onig_region_free(MBREX(search_regs), 1); } MBREX(search_regs) = onig_region_new(); err = onig_search(MBREX(search_re), str, str + len, str + pos, str + len, MBREX(search_regs), 0); if (err == ONIG_MISMATCH) { MBREX(search_pos) = len; RETVAL_FALSE; } else if (err <= -2) { OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(err_str, err); php_error_docref(NULL, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str); RETVAL_FALSE; } else { switch (mode) { case 1: array_init(return_value); beg = MBREX(search_regs)->beg[0]; end = MBREX(search_regs)->end[0]; add_next_index_long(return_value, beg); add_next_index_long(return_value, end - beg); break; case 2: array_init(return_value); n = MBREX(search_regs)->num_regs; for (i = 0; i < n; i++) { beg = MBREX(search_regs)->beg[i]; end = MBREX(search_regs)->end[i]; if (beg >= 0 && beg <= end && end <= len) { add_index_stringl(return_value, i, (char *)&str[beg], end - beg); } else { add_index_bool(return_value, i, 0); } } break; default: RETVAL_TRUE; break; } end = MBREX(search_regs)->end[0]; if (pos <= end) { MBREX(search_pos) = end; } else { MBREX(search_pos) = pos + 1; } } if (err < 0) { onig_region_free(MBREX(search_regs), 1); MBREX(search_regs) = (OnigRegion *)NULL; } }