Exemple #1
0
/* {{{ php_mbregex_compile_pattern */
static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, size_t patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax)
{
	int err_code = 0;
	php_mb_regex_t *retval = NULL, *rc = NULL;
	OnigErrorInfo err_info;
	OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];

	rc = zend_hash_str_find_ptr(&MBREX(ht_rc), (char *)pattern, patlen);
	if (!rc || onig_get_options(rc) != options || onig_get_encoding(rc) != enc || onig_get_syntax(rc) != syntax) {
		if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
			onig_error_code_to_str(err_str, err_code, &err_info);
			php_error_docref(NULL, E_WARNING, "mbregex compile err: %s", err_str);
			retval = NULL;
			goto out;
		}
		if (rc == MBREX(search_re)) {
			/* reuse the new rc? see bug #72399 */
			MBREX(search_re) = NULL;
		}
		zend_hash_str_update_ptr(&MBREX(ht_rc), (char *)pattern, patlen, retval);
	} else {
		retval = rc;
	}
out:
	return retval;
}
Exemple #2
0
/* {{{ php_mb_regex_set_options */
static void _php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax)
{
	if (prev_options != NULL) {
		*prev_options = MBREX(regex_default_options);
	}
	if (prev_syntax != NULL) {
		*prev_syntax = MBREX(regex_default_syntax);
	}
	MBREX(regex_default_options) = options;
	MBREX(regex_default_syntax) = syntax;
}
Exemple #3
0
/* {{{ php_mb_regex_set_default_mbctype */
int php_mb_regex_set_default_mbctype(const char *encname)
{
	OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
	if (mbctype == ONIG_ENCODING_UNDEF) {
		return FAILURE;
	}
	MBREX(default_mbctype) = mbctype;
	return SUCCESS;
}
Exemple #4
0
/* {{{ php_mbregex_compile_pattern */
static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax)
{
	int err_code = 0;
	php_mb_regex_t *retval = NULL, *rc = NULL;
	OnigErrorInfo err_info;
	OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];

	rc = zend_hash_str_find_ptr(&MBREX(ht_rc), (char *)pattern, patlen);
	if (!rc || rc->options != options || rc->enc != enc || rc->syntax != syntax) {
		if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
			onig_error_code_to_str(err_str, err_code, err_info);
			php_error_docref(NULL, E_WARNING, "mbregex compile err: %s", err_str);
			retval = NULL;
			goto out;
		}
		zend_hash_str_update_ptr(&MBREX(ht_rc), (char *)pattern, patlen, retval);
	} else if (rc) {
		retval = rc;
	}
out:
	return retval;
}
Exemple #5
0
/* {{{ _php_mb_regex_ereg_replace_exec */
static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options, int is_callable)
{
	zval *arg_pattern_zval;

	char *arg_pattern;
	size_t arg_pattern_len;

	char *replace;
	size_t replace_len;

	zend_fcall_info arg_replace_fci;
	zend_fcall_info_cache arg_replace_fci_cache;

	char *string;
	size_t string_len;

	char *p;
	php_mb_regex_t *re;
	OnigSyntaxType *syntax;
	OnigRegion *regs = NULL;
	smart_str out_buf = {0};
	smart_str eval_buf = {0};
	smart_str *pbuf;
	size_t i;
	int err, eval, n;
	OnigUChar *pos;
	OnigUChar *string_lim;
	char *description = NULL;
	char pat_buf[6];

	const mbfl_encoding *enc;

	{
		const char *current_enc_name;
		current_enc_name = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
		if (current_enc_name == NULL ||
			(enc = mbfl_name2encoding(current_enc_name)) == NULL) {
			php_error_docref(NULL, E_WARNING, "Unknown error");
			RETURN_FALSE;
		}
	}
	eval = 0;
	{
		char *option_str = NULL;
		size_t option_str_len = 0;

		if (!is_callable) {
			if (zend_parse_parameters(ZEND_NUM_ARGS(), "zss|s",
						&arg_pattern_zval,
						&replace, &replace_len,
						&string, &string_len,
						&option_str, &option_str_len) == FAILURE) {
				RETURN_FALSE;
			}
		} else {
			if (zend_parse_parameters(ZEND_NUM_ARGS(), "zfs|s",
						&arg_pattern_zval,
						&arg_replace_fci, &arg_replace_fci_cache,
						&string, &string_len,
						&option_str, &option_str_len) == FAILURE) {
				RETURN_FALSE;
			}
		}

		if (!php_mb_check_encoding(
		string,
		string_len,
		_php_mb_regex_mbctype2name(MBREX(current_mbctype))
		)) {
			RETURN_NULL();
		}

		if (option_str != NULL) {
			_php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval);
		} else {
			options |= MBREX(regex_default_options);
			syntax = MBREX(regex_default_syntax);
		}
	}
	if (eval && !is_callable) {
		php_error_docref(NULL, E_DEPRECATED, "The 'e' option is deprecated, use mb_ereg_replace_callback instead");
	}
	if (Z_TYPE_P(arg_pattern_zval) == IS_STRING) {
		arg_pattern = Z_STRVAL_P(arg_pattern_zval);
		arg_pattern_len = Z_STRLEN_P(arg_pattern_zval);
	} else {
		/* FIXME: this code is not multibyte aware! */
		convert_to_long_ex(arg_pattern_zval);
		pat_buf[0] = (char)Z_LVAL_P(arg_pattern_zval);
		pat_buf[1] = '\0';
		pat_buf[2] = '\0';
		pat_buf[3] = '\0';
		pat_buf[4] = '\0';
		pat_buf[5] = '\0';

		arg_pattern = pat_buf;
		arg_pattern_len = 1;
	}
	/* create regex pattern buffer */
	re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(current_mbctype), syntax);
	if (re == NULL) {
		RETURN_FALSE;
	}

	if (eval || is_callable) {
		pbuf = &eval_buf;
		description = zend_make_compiled_string_description("mbregex replace");
	} else {
		pbuf = &out_buf;
		description = NULL;
	}

	if (is_callable) {
		if (eval) {
			php_error_docref(NULL, E_WARNING, "Option 'e' cannot be used with replacement callback");
			RETURN_FALSE;
		}
	}

	/* do the actual work */
	err = 0;
	pos = (OnigUChar *)string;
	string_lim = (OnigUChar*)(string + string_len);
	regs = onig_region_new();
	while (err >= 0) {
		err = onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0);
		if (err <= -2) {
			OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
			onig_error_code_to_str(err_str, err);
			php_error_docref(NULL, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str);
			break;
		}
		if (err >= 0) {
#if moriyoshi_0
			if (regs->beg[0] == regs->end[0]) {
				php_error_docref(NULL, E_WARNING, "Empty regular expression");
				break;
			}
#endif
			/* copy the part of the string before the match */
			smart_str_appendl(&out_buf, (char *)pos, (size_t)((OnigUChar *)(string + regs->beg[0]) - pos));

			if (!is_callable) {
				/* copy replacement and backrefs */
				i = 0;
				p = replace;
				while (i < replace_len) {
					int fwd = (int) php_mb_mbchar_bytes_ex(p, enc);
					n = -1;
					if ((replace_len - i) >= 2 && fwd == 1 &&
					p[0] == '\\' && p[1] >= '0' && p[1] <= '9') {
						n = p[1] - '0';
					}
					if (n >= 0 && n < regs->num_regs) {
						if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] && (size_t)regs->end[n] <= string_len) {
							smart_str_appendl(pbuf, string + regs->beg[n], regs->end[n] - regs->beg[n]);
						}
						p += 2;
						i += 2;
					} else {
						smart_str_appendl(pbuf, p, fwd);
						p += fwd;
						i += fwd;
					}
				}
			}

			if (eval) {
				zval v;
				zend_string *eval_str;
				/* null terminate buffer */
				smart_str_0(&eval_buf);

				if (eval_buf.s) {
					eval_str = eval_buf.s;
				} else {
					eval_str = ZSTR_EMPTY_ALLOC();
				}

				/* do eval */
				if (zend_eval_stringl(ZSTR_VAL(eval_str), ZSTR_LEN(eval_str), &v, description) == FAILURE) {
					efree(description);
					zend_throw_error(NULL, "Failed evaluating code: %s%s", PHP_EOL, ZSTR_VAL(eval_str));
					onig_region_free(regs, 0);
					smart_str_free(&out_buf);
					smart_str_free(&eval_buf);
					RETURN_FALSE;
				}

				/* result of eval */
				convert_to_string(&v);
				smart_str_appendl(&out_buf, Z_STRVAL(v), Z_STRLEN(v));
				/* Clean up */
				smart_str_free(&eval_buf);
				zval_dtor(&v);
			} else if (is_callable) {
				zval args[1];
				zval subpats, retval;
				int i;

				array_init(&subpats);
				for (i = 0; i < regs->num_regs; i++) {
					add_next_index_stringl(&subpats, string + regs->beg[i], regs->end[i] - regs->beg[i]);
				}

				ZVAL_COPY_VALUE(&args[0], &subpats);
				/* null terminate buffer */
				smart_str_0(&eval_buf);

				arg_replace_fci.param_count = 1;
				arg_replace_fci.params = args;
				arg_replace_fci.retval = &retval;
				if (zend_call_function(&arg_replace_fci, &arg_replace_fci_cache) == SUCCESS &&
						!Z_ISUNDEF(retval)) {
					convert_to_string_ex(&retval);
					smart_str_appendl(&out_buf, Z_STRVAL(retval), Z_STRLEN(retval));
					smart_str_free(&eval_buf);
					zval_ptr_dtor(&retval);
				} else {
					if (!EG(exception)) {
						php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
					}
				}
				zval_ptr_dtor(&subpats);
			}

			n = regs->end[0];
			if ((pos - (OnigUChar *)string) < n) {
				pos = (OnigUChar *)string + n;
			} else {
				if (pos < string_lim) {
					smart_str_appendl(&out_buf, (char *)pos, 1);
				}
				pos++;
			}
		} else { /* nomatch */
			/* stick that last bit of string on our output */
			if (string_lim - pos > 0) {
				smart_str_appendl(&out_buf, (char *)pos, string_lim - pos);
			}
		}
		onig_region_free(regs, 0);
	}

	if (description) {
		efree(description);
	}
	if (regs != NULL) {
		onig_region_free(regs, 1);
	}
	smart_str_free(&eval_buf);

	if (err <= -2) {
		smart_str_free(&out_buf);
		RETVAL_FALSE;
	} else if (out_buf.s) {
		smart_str_0(&out_buf);
		RETVAL_STR(out_buf.s);
	} else {
		RETVAL_EMPTY_STRING();
	}
}
Exemple #6
0
/* {{{ _php_mb_regex_ereg_exec */
static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
{
	zval *arg_pattern, *array = NULL;
	char *string;
	size_t string_len;
	php_mb_regex_t *re;
	OnigRegion *regs = NULL;
	int i, match_len, beg, end;
	OnigOptionType options;
	char *str;

	if (zend_parse_parameters(ZEND_NUM_ARGS(), "zs|z/", &arg_pattern, &string, &string_len, &array) == FAILURE) {
		RETURN_FALSE;
	}

	if (!php_mb_check_encoding(
	string,
	string_len,
	_php_mb_regex_mbctype2name(MBREX(current_mbctype))
	)) {
		if (array != NULL) {
			zval_dtor(array);
			array_init(array);
		}
		RETURN_FALSE;
	}

	if (array != NULL) {
		zval_dtor(array);
		array_init(array);
	}

	options = MBREX(regex_default_options);
	if (icase) {
		options |= ONIG_OPTION_IGNORECASE;
	}

	/* compile the regular expression from the supplied regex */
	if (Z_TYPE_P(arg_pattern) != IS_STRING) {
		/* we convert numbers to integers and treat them as a string */
		if (Z_TYPE_P(arg_pattern) == IS_DOUBLE) {
			convert_to_long_ex(arg_pattern);	/* get rid of decimal places */
		}
		convert_to_string_ex(arg_pattern);
		/* don't bother doing an extended regex with just a number */
	}

	if (Z_STRLEN_P(arg_pattern) == 0) {
		php_error_docref(NULL, E_WARNING, "empty pattern");
		RETVAL_FALSE;
		goto out;
	}

	re = php_mbregex_compile_pattern(Z_STRVAL_P(arg_pattern), Z_STRLEN_P(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax));
	if (re == NULL) {
		RETVAL_FALSE;
		goto out;
	}

	regs = onig_region_new();

	/* actually execute the regular expression */
	if (onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) {
		RETVAL_FALSE;
		goto out;
	}

	match_len = 1;
	str = string;
	if (array != NULL) {

		match_len = regs->end[0] - regs->beg[0];
		for (i = 0; i < regs->num_regs; i++) {
			beg = regs->beg[i];
			end = regs->end[i];
			if (beg >= 0 && beg < end && (size_t)end <= string_len) {
				add_index_stringl(array, i, (char *)&str[beg], end - beg);
			} else {
				add_index_bool(array, i, 0);
			}
		}
	}

	if (match_len == 0) {
		match_len = 1;
	}
	RETVAL_LONG(match_len);
out:
	if (regs != NULL) {
		onig_region_free(regs, 1);
	}
}
Exemple #7
0
/* {{{ php_mb_regex_get_default_mbctype */
const char *php_mb_regex_get_default_mbctype(void)
{
	return _php_mb_regex_mbctype2name(MBREX(default_mbctype));
}
Exemple #8
0
/* {{{ php_mb_regex_get_mbctype */
const char *php_mb_regex_get_mbctype(void)
{
	return _php_mb_regex_mbctype2name(MBREX(current_mbctype));
}
Exemple #9
0
/* {{{ _php_mb_regex_ereg_search_exec */
static void
_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
{
	char *arg_pattern = NULL, *arg_options = NULL;
	size_t arg_pattern_len, arg_options_len;
	int err;
	size_t n, i, pos, len, beg, end;
	OnigOptionType option;
	OnigUChar *str;
	OnigSyntaxType *syntax;

	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ss", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
		return;
	}

	option = MBREX(regex_default_options);

	if (arg_options) {
		option = 0;
		_php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
	}

	if (arg_pattern) {
		/* create regex pattern buffer */
		if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), MBREX(regex_default_syntax))) == NULL) {
			RETURN_FALSE;
		}
	}

	pos = MBREX(search_pos);
	str = NULL;
	len = 0;
	if (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING){
		str = (OnigUChar *)Z_STRVAL(MBREX(search_str));
		len = Z_STRLEN(MBREX(search_str));
	}

	if (MBREX(search_re) == NULL) {
		php_error_docref(NULL, E_WARNING, "No regex given");
		RETURN_FALSE;
	}

	if (str == NULL) {
		php_error_docref(NULL, E_WARNING, "No string given");
		RETURN_FALSE;
	}

	if (MBREX(search_regs)) {
		onig_region_free(MBREX(search_regs), 1);
	}
	MBREX(search_regs) = onig_region_new();

	err = onig_search(MBREX(search_re), str, str + len, str + pos, str  + len, MBREX(search_regs), 0);
	if (err == ONIG_MISMATCH) {
		MBREX(search_pos) = len;
		RETVAL_FALSE;
	} else if (err <= -2) {
		OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
		onig_error_code_to_str(err_str, err);
		php_error_docref(NULL, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str);
		RETVAL_FALSE;
	} else {
		switch (mode) {
		case 1:
			array_init(return_value);
			beg = MBREX(search_regs)->beg[0];
			end = MBREX(search_regs)->end[0];
			add_next_index_long(return_value, beg);
			add_next_index_long(return_value, end - beg);
			break;
		case 2:
			array_init(return_value);
			n = MBREX(search_regs)->num_regs;
			for (i = 0; i < n; i++) {
				beg = MBREX(search_regs)->beg[i];
				end = MBREX(search_regs)->end[i];
				if (beg >= 0 && beg <= end && end <= len) {
					add_index_stringl(return_value, i, (char *)&str[beg], end - beg);
				} else {
					add_index_bool(return_value, i, 0);
				}
			}
			break;
		default:
			RETVAL_TRUE;
			break;
		}
		end = MBREX(search_regs)->end[0];
		if (pos <= end) {
			MBREX(search_pos) = end;
		} else {
			MBREX(search_pos) = pos + 1;
		}
	}

	if (err < 0) {
		onig_region_free(MBREX(search_regs), 1);
		MBREX(search_regs) = (OnigRegion *)NULL;
	}
}