Exemple #1
0
/* {{{ intl_errors_set_code( intl_error* err, UErrorCode err_code )
 */
void intl_errors_set_code( intl_error* err, UErrorCode err_code )
{
	if(err) {
		intl_error_set_code( err, err_code );
	}
	intl_error_set_code( NULL, err_code );
}
Exemple #2
0
static inline int convert_cp(UChar32* pcp, zval *zcp) {
	zend_long cp = -1;

	if (Z_TYPE_P(zcp) == IS_LONG) {
		cp = Z_LVAL_P(zcp);
	} else if (Z_TYPE_P(zcp) == IS_STRING) {
		int32_t i = 0;
		size_t zcp_len = Z_STRLEN_P(zcp);

		if (ZEND_SIZE_T_INT_OVFL(zcp_len)) {
			intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
			intl_error_set_custom_msg(NULL, "Input string is too long.", 0);
			return FAILURE;
		}

		U8_NEXT(Z_STRVAL_P(zcp), i, zcp_len, cp);
		if ((size_t)i != zcp_len) {
			intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
			intl_error_set_custom_msg(NULL, "Passing a UTF-8 character for codepoint requires a string which is exactly one UTF-8 codepoint long.", 0);
			return FAILURE;
		}
	} else {
		intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
		intl_error_set_custom_msg(NULL, "Invalid parameter for unicode point.  Must be either integer or UTF-8 sequence.", 0);
		return FAILURE;
	}
	if ((cp < UCHAR_MIN_VALUE) || (cp > UCHAR_MAX_VALUE)) {
		intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
		intl_error_set_custom_msg(NULL, "Codepoint out of range", 0);
		return FAILURE;
	}
	*pcp = (UChar32)cp;
	return SUCCESS;
}
Exemple #3
0
static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS,
		const char *domain, int32_t domain_len, uint32_t option, int mode)
{
	UChar* ustring = NULL;
	int ustring_len = 0;
	UErrorCode status;
	char     *converted_utf8;
	size_t    converted_utf8_len;
	UChar     converted[MAXPATHLEN];
	int32_t   converted_ret_len;

	/* convert the string to UTF-16. */
	status = U_ZERO_ERROR;
	intl_convert_utf8_to_utf16(&ustring, &ustring_len, domain, domain_len, &status);

	if (U_FAILURE(status)) {
		intl_error_set_code(NULL, status);

		/* Set error messages. */
		intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 );
		if (ustring) {
			efree(ustring);
		}
		RETURN_FALSE;
	} else {
		UParseError parse_error;

		status = U_ZERO_ERROR;
		if (mode == INTL_IDN_TO_ASCII) {
			converted_ret_len = uidna_IDNToASCII(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
		} else {
			converted_ret_len = uidna_IDNToUnicode(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
		}
		efree(ustring);

		if (U_FAILURE(status)) {
			intl_error_set( NULL, status, "idn_to_ascii: cannot convert to ASCII", 0 );
			RETURN_FALSE;
		}

		status = U_ZERO_ERROR;
		intl_convert_utf16_to_utf8(&converted_utf8, &converted_utf8_len, converted, converted_ret_len, &status);

		if (U_FAILURE(status)) {
			/* Set global error code. */
			intl_error_set_code(NULL, status);

			/* Set error messages. */
			intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 );
			efree(converted_utf8);
			RETURN_FALSE;
		}
	}

	/* return the allocated string, not a duplicate */
	RETVAL_STRINGL(converted_utf8, converted_utf8_len);
	//????
	efree(converted_utf8);
}
Exemple #4
0
static UBool enumCharNames_callback(enumCharNames_data *context,
                                    UChar32 code, UCharNameChoice nameChoice,
                                    const char *name, int32_t length) {
	zval retval;
	zval args[3];

	ZVAL_NULL(&retval);
	ZVAL_LONG(&args[0], code);
	ZVAL_LONG(&args[1], nameChoice);
	ZVAL_STRINGL(&args[2], name, length);

	context->fci.retval = &retval;
	context->fci.param_count = 3;
	context->fci.params = args;

	if (zend_call_function(&context->fci, &context->fci_cache) == FAILURE) {
		intl_error_set_code(NULL, U_INTERNAL_PROGRAM_ERROR);
		intl_error_set_custom_msg(NULL, "enumCharNames callback failed", 0);
		zval_dtor(&retval);
		zval_dtor(&args[2]);
		return 0;
	}
	zval_dtor(&retval);
	zval_dtor(&args[2]);
	return 1;
}
Exemple #5
0
static UBool enumCharType_callback(enumCharType_data *context,
                                   UChar32 start, UChar32 limit,
                                   UCharCategory type) {
	zval retval;
	zval args[3];

	ZVAL_NULL(&retval);
	/* Note that $start is INclusive, while $limit is EXclusive
	 * Therefore (0, 32, 15) means CPs 0..31 are of type 15
	 */
	ZVAL_LONG(&args[0], start);
	ZVAL_LONG(&args[1], limit);
	ZVAL_LONG(&args[2], type);

	context->fci.retval = &retval;
	context->fci.param_count = 3;
	context->fci.params = args;

	if (zend_call_function(&context->fci, &context->fci_cache) == FAILURE) {
		intl_error_set_code(NULL, U_INTERNAL_PROGRAM_ERROR);
		intl_errors_set_custom_msg(NULL, "enumCharTypes callback failed", 0);
		zval_dtor(&retval);
		return 0;
	}
	zval_dtor(&retval);
	return 1;
}
Exemple #6
0
/* {{{ clone handler for Transliterator */
static zend_object *Transliterator_clone_obj( zval *object )
{
	Transliterator_object *to_orig,
	                      *to_new;
	zend_object 		  *ret_val;
	intl_error_reset( NULL );

	to_orig = Z_INTL_TRANSLITERATOR_P( object );
	intl_error_reset( INTL_DATA_ERROR_P( to_orig ) );
	ret_val = Transliterator_ce_ptr->create_object( Z_OBJCE_P( object ) );
	to_new  = php_intl_transliterator_fetch_object( ret_val );

	zend_objects_clone_members( &to_new->zo, &to_orig->zo );

	if( to_orig->utrans != NULL )
	{
		zval			tempz; /* dummy zval to pass to transliterator_object_construct */

		/* guaranteed to return NULL if it fails */
		UTransliterator *utrans = utrans_clone( to_orig->utrans, TRANSLITERATOR_ERROR_CODE_P( to_orig ) );

		if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to_orig ) ) )
			goto err;

		ZVAL_OBJ(&tempz, ret_val);
		transliterator_object_construct( &tempz, utrans,
			TRANSLITERATOR_ERROR_CODE_P( to_orig ) );

		if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to_orig ) ) )
		{
			zend_string *err_msg;
err:

			if( utrans != NULL )
				transliterator_object_destroy( to_new );

			/* set the error anyway, in case in the future we decide not to
			 * throw an error. It also helps build the error message */
			intl_error_set_code( NULL, INTL_DATA_ERROR_CODE( to_orig ) );
			intl_errors_set_custom_msg( TRANSLITERATOR_ERROR_P( to_orig ),
				"Could not clone transliterator", 0 );

			err_msg = intl_error_get_message( TRANSLITERATOR_ERROR_P( to_orig ) );
			zend_throw_error( NULL, "%s", ZSTR_VAL(err_msg) );
			zend_string_free( err_msg ); /* if it's changed into a warning */
			/* do not destroy tempz; we need to return something */
		}
	}
	else
	{
		/* We shouldn't have unconstructed objects in the first place */
		php_error_docref( NULL, E_WARNING,
			"Cloning unconstructed transliterator." );
	}

	return ret_val;
}
Exemple #7
0
/* {{{ resourcebundle_array_fetch */
static void resourcebundle_array_fetch(zval *object, zval *offset, zval *return_value, int fallback)
{
	int32_t     meindex = 0;
	char *      mekey = NULL;
    zend_bool    is_numeric = 0;
	char         *pbuf;
	ResourceBundle_object *rb;

	intl_error_reset( NULL );
	RESOURCEBUNDLE_METHOD_FETCH_OBJECT;

	if(Z_TYPE_P(offset) == IS_LONG) {
		is_numeric = 1;
		meindex = (int32_t)Z_LVAL_P(offset);
		rb->child = ures_getByIndex( rb->me, meindex, rb->child, &INTL_DATA_ERROR_CODE(rb) );
	} else if(Z_TYPE_P(offset) == IS_STRING) {
		mekey = Z_STRVAL_P(offset);
		rb->child = ures_getByKey(rb->me, mekey, rb->child, &INTL_DATA_ERROR_CODE(rb) );
	} else {
		intl_errors_set(INTL_DATA_ERROR_P(rb), U_ILLEGAL_ARGUMENT_ERROR,
			"resourcebundle_get: index should be integer or string", 0);
		RETURN_NULL();
	}

	intl_error_set_code( NULL, INTL_DATA_ERROR_CODE(rb) );
	if (U_FAILURE(INTL_DATA_ERROR_CODE(rb))) {
		if (is_numeric) {
			spprintf( &pbuf, 0, "Cannot load resource element %d", meindex );
		} else {
			spprintf( &pbuf, 0, "Cannot load resource element '%s'", mekey );
		}
		intl_errors_set_custom_msg( INTL_DATA_ERROR_P(rb), pbuf, 1 );
		efree(pbuf);
		RETURN_NULL();
	}

	if (!fallback && (INTL_DATA_ERROR_CODE(rb) == U_USING_FALLBACK_WARNING || INTL_DATA_ERROR_CODE(rb) == U_USING_DEFAULT_WARNING)) {
		UErrorCode icuerror;
		const char * locale = ures_getLocaleByType( rb->me, ULOC_ACTUAL_LOCALE, &icuerror );
		if (is_numeric) {
			spprintf( &pbuf, 0, "Cannot load element %d without fallback from to %s", meindex, locale );
		} else {
			spprintf( &pbuf, 0, "Cannot load element '%s' without fallback from to %s", mekey, locale );
		}
		intl_errors_set_custom_msg( INTL_DATA_ERROR_P(rb), pbuf, 1 );
		efree(pbuf);
		RETURN_NULL();
	}

	resourcebundle_extract_value( return_value, rb );
}
Exemple #8
0
/* like INTL_CHECK_STATUS, but as a function and varying the name of the func */
static int php_intl_idn_check_status(UErrorCode err, const char *msg)
{
	intl_error_set_code(NULL, err);
	if (U_FAILURE(err)) {
		char *buff;
		spprintf(&buff, 0, "%s: %s",
			get_active_function_name(),
			msg);
		intl_error_set_custom_msg(NULL, buff, 1);
		efree(buff);
		return FAILURE;
	}

	return SUCCESS;
}
Exemple #9
0
/* like INTL_CHECK_STATUS, but as a function and varying the name of the func */
static int php_intl_idn_check_status(UErrorCode err, const char *msg, int mode)
{
	intl_error_set_code(NULL, err);
	if (U_FAILURE(err)) {
		char *buff;
		spprintf(&buff, 0, "%s: %s",
			mode == INTL_IDN_TO_ASCII ? "idn_to_ascii" : "idn_to_utf8",
			msg);
		intl_error_set_custom_msg(NULL, buff, 1);
		efree(buff);
		return FAILURE;
	}

	return SUCCESS;
}
Exemple #10
0
static void regexp_ctor(INTERNAL_FUNCTION_PARAMETERS)
{
    zval *object;
    Regexp_object *ro;
    char *pattern;
    int32_t pattern_len;
    UChar *upattern = NULL;
    int32_t upattern_len = 0;
    zval *zflags = NULL;
    uint32_t flags = 0;
    UParseError pe = { -1, -1, {0}, {0} };

    intl_error_reset(NULL TSRMLS_CC);
    object = return_value;
    if (FAILURE == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|z", &pattern, &pattern_len, &zflags)) {
        intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "bad arguments", 0 TSRMLS_CC);
        zval_dtor(object);
        RETURN_NULL();
    }
    if (NULL != zflags) {
        switch (Z_TYPE_P(zflags)) {
            case IS_LONG:
                flags = (uint32_t) Z_LVAL_P(zflags);
                break;
            case IS_STRING:
            {
                const char *p;

                for (p = Z_STRVAL_P(zflags); '\0' != *p; p++) {
                    switch (*p) {
                        case 'i': flags |= UREGEX_CASE_INSENSITIVE; break;
                        case 'm': flags |= UREGEX_MULTILINE;        break;
                        case 's': flags |= UREGEX_DOTALL;           break;
                        case 'x': flags |= UREGEX_COMMENTS;         break;
                        case 'w': flags |= UREGEX_UWORD;            break;
                        default:
                            intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "invalid modifier", 0 TSRMLS_CC);
                            zval_dtor(object);
                            RETURN_NULL();
                    }
                }
                break;
            }
            default:
                intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "bad arguments", 0 TSRMLS_CC);
                zval_dtor(object);
                RETURN_NULL();
        }
    }
    ro = (Regexp_object *) zend_object_store_get_object(object TSRMLS_CC);
    intl_convert_utf8_to_utf16(&upattern, &upattern_len, pattern, pattern_len, REGEXP_ERROR_CODE_P(ro));
    INTL_CTOR_CHECK_STATUS(ro, "string conversion of pattern to UTF-16 failed");
    ro->uregex = uregex_open(upattern, upattern_len, flags, &pe, REGEXP_ERROR_CODE_P(ro));
    efree(upattern);
    if (U_FAILURE(REGEXP_ERROR_CODE(ro))) {
        intl_error_set_code(NULL, REGEXP_ERROR_CODE(ro) TSRMLS_CC);
        if (-1 != pe.line) {
            regexp_parse_error_to_string(pe, pattern, pattern_len);
        } else {
            intl_error_set_custom_msg(NULL, "unable to compile ICU regular expression", 0 TSRMLS_CC);
        }
        zval_dtor(object);
        RETURN_NULL();
    }
}
Exemple #11
0
static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS,
		const zend_string *domain, uint32_t option, int mode)
{
	UChar* ustring = NULL;
	int ustring_len = 0;
	UErrorCode status;
	zend_string *u8str;

	/* convert the string to UTF-16. */
	status = U_ZERO_ERROR;
	intl_convert_utf8_to_utf16(&ustring, &ustring_len, ZSTR_VAL(domain), ZSTR_LEN(domain), &status);

	if (U_FAILURE(status)) {
		intl_error_set_code(NULL, status);

		/* Set error messages. */
		intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 );
		if (ustring) {
			efree(ustring);
		}
		RETURN_FALSE;
	} else {
		UChar       converted[MAXPATHLEN];
		int32_t     converted_ret_len;

		status = U_ZERO_ERROR;

#if U_ICU_VERSION_MAJOR_NUM >= 55
		UIDNAInfo info = UIDNA_INFO_INITIALIZER;
		UIDNA *idna = uidna_openUTS46((int32_t)option, &status);

		if (U_FAILURE(status)) {
			intl_error_set( NULL, status, "idn_to_ascii: failed to create an UIDNA instance", 0 );
			RETURN_FALSE;
		}

		if (mode == INTL_IDN_TO_ASCII) {
			converted_ret_len = uidna_nameToASCII(idna, ustring, ustring_len, converted, MAXPATHLEN, &info, &status);
		} else {
			converted_ret_len = uidna_nameToUnicode(idna, ustring, ustring_len, converted, MAXPATHLEN, &info, &status);
		}
		uidna_close(idna);
#else
		UParseError parse_error;
		if (mode == INTL_IDN_TO_ASCII) {
			converted_ret_len = uidna_IDNToASCII(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
		} else {
			converted_ret_len = uidna_IDNToUnicode(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
		}
#endif
		efree(ustring);

		if (U_FAILURE(status)) {
			intl_error_set( NULL, status, "idn_to_ascii: cannot convert to ASCII", 0 );
			RETURN_FALSE;
		}

		status = U_ZERO_ERROR;
		u8str = intl_convert_utf16_to_utf8(converted, converted_ret_len, &status);

		if (!u8str) {
			/* Set global error code. */
			intl_error_set_code(NULL, status);

			/* Set error messages. */
			intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 );
			RETURN_FALSE;
		}
	}

	/* return the allocated string, not a duplicate */
	RETVAL_NEW_STR(u8str);
}
static int create_transliterator( char *str_id, int str_id_len, zend_long direction, zval *object )
{
	Transliterator_object *to;
	UChar	              *ustr_id    = NULL;
	int32_t               ustr_id_len = 0;
	UTransliterator       *utrans;
	UParseError           parse_error   = {0, -1};

	intl_error_reset( NULL );

	if( ( direction != TRANSLITERATOR_FORWARD ) && (direction != TRANSLITERATOR_REVERSE ) )
	{
		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
			"transliterator_create: invalid direction", 0 );
		return FAILURE;
	}

	object_init_ex( object, Transliterator_ce_ptr );
	TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK; /* fetch zend object from zval "object" into "to" */

	/* Convert transliterator id to UTF-16 */
	intl_convert_utf8_to_utf16( &ustr_id, &ustr_id_len, str_id, str_id_len, TRANSLITERATOR_ERROR_CODE_P( to ) );
	if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
	{
		intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) );
		intl_error_set_custom_msg( NULL, "String conversion of id to UTF-16 failed", 0 );
		zval_dtor( object );
		return FAILURE;
	}

	/* Open ICU Transliterator. */
	utrans = utrans_openU( ustr_id, ustr_id_len, (UTransDirection ) direction,
		NULL, -1, &parse_error, TRANSLITERATOR_ERROR_CODE_P( to ) );
	if (ustr_id) {
		efree( ustr_id );
	}
	
	if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
	{
		char *buf = NULL;
		intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) );
		spprintf( &buf, 0, "transliterator_create: unable to open ICU transliterator"
			" with id \"%s\"", str_id );
		if( buf == NULL ) {
			intl_error_set_custom_msg( NULL,
				"transliterator_create: unable to open ICU transliterator", 0 );
		}
		else
		{
			intl_error_set_custom_msg( NULL, buf, /* copy message */ 1 );
			efree( buf );
		}
		zval_dtor( object );
		return FAILURE;
	}
	
	transliterator_object_construct( object, utrans, TRANSLITERATOR_ERROR_CODE_P( to ) );
	/* no need to close the transliterator manually on construction error */
	if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
	{
		intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) );
		intl_error_set_custom_msg( NULL,
			"transliterator_create: internal constructor call failed", 0 );
		zval_dtor( object );
		return FAILURE;
	}

	return SUCCESS;
}
Exemple #13
0
/* {{{ void intl_error_set( intl_error* err, UErrorCode code, char* msg, int copyMsg )
 * Set error code and message.
 */
void intl_error_set( intl_error* err, UErrorCode code, char* msg, int copyMsg )
{
	intl_error_set_code( err, code );
	intl_error_set_custom_msg( err, msg, copyMsg );
}
Exemple #14
0
static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS, int mode)
{
	unsigned char* domain;
	int domain_len;
	long option = 0;
	UChar* ustring = NULL;
	int ustring_len = 0;
	UErrorCode status;
	char     *converted_utf8;
	int32_t   converted_utf8_len;
	UChar     converted[MAXPATHLEN];
	int32_t   converted_ret_len;

	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|l", (char **)&domain, &domain_len, &option) == FAILURE) {
		return;
	}

	if (domain_len < 1) {
		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "idn_to_ascii: empty domain name", 0 TSRMLS_CC );
		RETURN_FALSE;
	}

	/* convert the string to UTF-16. */
	status = U_ZERO_ERROR;
	intl_convert_utf8_to_utf16(&ustring, &ustring_len, (char*) domain, domain_len, &status );

	if (U_FAILURE(status)) {
		intl_error_set_code(NULL, status TSRMLS_CC);

		/* Set error messages. */
		intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
		efree(ustring);
		RETURN_FALSE;
	} else {
		UParseError parse_error;

		status = U_ZERO_ERROR;
		if (mode == INTL_IDN_TO_ASCII) {
			converted_ret_len = uidna_IDNToASCII(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
		} else {
			converted_ret_len = uidna_IDNToUnicode(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
		}
		efree(ustring);

		if (U_FAILURE(status)) {
			intl_error_set( NULL, status, "idn_to_ascii: cannot convert to ASCII", 0 TSRMLS_CC );
			RETURN_FALSE;
		}

		status = U_ZERO_ERROR;
		intl_convert_utf16_to_utf8(&converted_utf8, &converted_utf8_len, converted, converted_ret_len, &status);

		if (U_FAILURE(status)) {
			/* Set global error code. */
			intl_error_set_code(NULL, status TSRMLS_CC);

			/* Set error messages. */
			intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 TSRMLS_CC );
			efree(converted_utf8);
			RETURN_FALSE;
		}
	}

	/* return the allocated string, not a duplicate */
	RETURN_STRINGL(((char *)converted_utf8), converted_utf8_len, 0);
}
Exemple #15
0
/* {{{ grapheme_strpos_utf16 - strrpos using utf16*/
int
grapheme_strpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned char*needle, int32_t needle_len, int32_t offset, int32_t *puchar_pos, int f_ignore_case TSRMLS_DC)
{
	UChar *uhaystack, *puhaystack, *uneedle;
	int32_t uhaystack_len, uneedle_len;
	int ret_pos;
	unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
	UBreakIterator* bi;
	UErrorCode status;

	*puchar_pos = -1;

	/* convert the strings to UTF-16. */

	uhaystack = NULL;
	uhaystack_len = 0;
	status = U_ZERO_ERROR;
	intl_convert_utf8_to_utf16(&uhaystack, &uhaystack_len, (char *) haystack, haystack_len, &status );

	if ( U_FAILURE( status ) ) {
		/* Set global error code. */
		intl_error_set_code( NULL, status TSRMLS_CC );

		/* Set error messages. */
		intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
		efree( uhaystack );
		return -1;
	}

	/* get a pointer to the haystack taking into account the offset */
	bi = NULL;
	status = U_ZERO_ERROR;
	bi = grapheme_get_break_iterator(u_break_iterator_buffer, &status TSRMLS_CC );
	
	puhaystack = grapheme_get_haystack_offset(bi, uhaystack, uhaystack_len, offset);
	uhaystack_len = (uhaystack_len - ( puhaystack - uhaystack));

	if ( NULL == puhaystack ) {
	
		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 TSRMLS_CC );
		
		efree( uhaystack );
		ubrk_close (bi);
					
		return -1;
	}

	if ( f_ignore_case ) {
		grapheme_intl_case_fold(&uhaystack, &puhaystack, &uhaystack_len, &status );
	}

	uneedle = NULL;
	uneedle_len = 0;
	status = U_ZERO_ERROR;
	intl_convert_utf8_to_utf16(&uneedle, &uneedle_len, (char *) needle, needle_len, &status );

	if ( U_FAILURE( status ) ) {
		/* Set global error code. */
		intl_error_set_code( NULL, status TSRMLS_CC );

		/* Set error messages. */
		intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
		efree( uhaystack );
		efree( uneedle );
		ubrk_close (bi);
		
		return -1;
	}

	if ( f_ignore_case ) {
		grapheme_intl_case_fold(&uneedle, &uneedle, &uneedle_len, &status );
	}

	ret_pos = grapheme_memnstr_grapheme(bi, puhaystack, uneedle, uneedle_len, puhaystack + uhaystack_len );
	
	*puchar_pos = ubrk_current(bi);

	efree( uhaystack );
	efree( uneedle );
	ubrk_close (bi);

	return ret_pos;
}
Exemple #16
0
/* {{{ grapheme_strrpos_utf16 - strrpos using utf16 */
int
grapheme_strrpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned char*needle, int32_t needle_len, int32_t offset, int f_ignore_case TSRMLS_DC)
{
    UChar *uhaystack, *puhaystack, *uhaystack_end, *uneedle;
    int32_t uhaystack_len, uneedle_len;
    UErrorCode status;
    unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
    UBreakIterator* bi = NULL;
    int ret_pos, pos;

    /* convert the strings to UTF-16. */
    uhaystack = NULL;
    uhaystack_len = 0;
    status = U_ZERO_ERROR;
    intl_convert_utf8_to_utf16(&uhaystack, &uhaystack_len, (char *) haystack, haystack_len, &status );

    if ( U_FAILURE( status ) ) {
        /* Set global error code. */
        intl_error_set_code( NULL, status TSRMLS_CC );

        /* Set error messages. */
        intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
        efree( uhaystack );
        return -1;
    }

    if ( f_ignore_case ) {
        grapheme_intl_case_fold(&uhaystack, &uhaystack, &uhaystack_len, &status );
    }

    /* get a pointer to the haystack taking into account the offset */
    bi = NULL;
    status = U_ZERO_ERROR;
    bi = grapheme_get_break_iterator(u_break_iterator_buffer, &status TSRMLS_CC );

    puhaystack = grapheme_get_haystack_offset(bi, uhaystack, uhaystack_len, offset);

    if ( NULL == puhaystack ) {
        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 TSRMLS_CC );
        efree( uhaystack );
        ubrk_close (bi);
        return -1;
    }

    uneedle = NULL;
    uneedle_len = 0;
    status = U_ZERO_ERROR;
    intl_convert_utf8_to_utf16(&uneedle, &uneedle_len, (char *) needle, needle_len, &status );

    if ( U_FAILURE( status ) ) {
        /* Set global error code. */
        intl_error_set_code( NULL, status TSRMLS_CC );

        /* Set error messages. */
        intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
        efree( uhaystack );
        efree( uneedle );
        ubrk_close (bi);
        return -1;
    }

    if ( f_ignore_case ) {
        grapheme_intl_case_fold(&uneedle, &uneedle, &uneedle_len, &status );
    }

    ret_pos = -1;   /* -1 represents 'not found' */

    /* back up until there's needle_len characters to compare */

    uhaystack_end = uhaystack + uhaystack_len;
    pos = ubrk_last(bi);
    puhaystack = uhaystack + pos;

    while ( uhaystack_end - puhaystack < uneedle_len ) {

        pos = ubrk_previous(bi);

        if ( UBRK_DONE == pos ) {
            break;
        }

        puhaystack = uhaystack + pos;
    }

    /* is there enough haystack left to hold the needle? */
    if ( ( uhaystack_end - puhaystack ) < uneedle_len ) {
        /* not enough, not found */
        goto exit;
    }

    while ( UBRK_DONE != pos ) {

        if (!u_memcmp(uneedle, puhaystack, uneedle_len)) {  /* needle_len - 1 in zend memnstr? */

            /* does the grapheme in the haystack end at the same place as the last grapheme in the needle? */

            if ( ubrk_isBoundary(bi, pos + uneedle_len) ) {

                /* found it, get grapheme count offset */
                ret_pos = grapheme_count_graphemes(bi, uhaystack, pos);
                break;
            }

            /* set position back */
            ubrk_isBoundary(bi, pos);
        }

        pos = ubrk_previous(bi);
        puhaystack = uhaystack + pos;
    }

exit:
    efree( uhaystack );
    efree( uneedle );
    ubrk_close (bi);

    return ret_pos;
}
static void _php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAMETERS)
{
    zval		*object		= getThis();
    char		*rules;
    int			rules_len;
    zend_bool	compiled	= 0;
    UErrorCode	status		= U_ZERO_ERROR;
    intl_error_reset(NULL TSRMLS_CC);

    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|b",
                              &rules, &rules_len, &compiled) == FAILURE) {
        intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
                       "rbbi_create_instance: bad arguments", 0 TSRMLS_CC);
        RETURN_NULL();
    }

    // instantiation of ICU object
    RuleBasedBreakIterator *rbbi;

    if (!compiled) {
        UnicodeString	rulesStr;
        UParseError		parseError = UParseError();
        if (intl_stringFromChar(rulesStr, rules, rules_len, &status)
                == FAILURE) {
            intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
                           "rbbi_create_instance: rules were not a valid UTF-8 string",
                           0 TSRMLS_CC);
            RETURN_NULL();
        }

        rbbi = new RuleBasedBreakIterator(rulesStr, parseError, status);
        intl_error_set_code(NULL, status TSRMLS_CC);
        if (U_FAILURE(status)) {
            char *msg;
            smart_str parse_error_str;
            parse_error_str = intl_parse_error_to_string(&parseError);
            spprintf(&msg, 0, "rbbi_create_instance: unable to create "
                     "RuleBasedBreakIterator from rules (%s)", parse_error_str.c);
            smart_str_free(&parse_error_str);
            intl_error_set_custom_msg(NULL, msg, 1 TSRMLS_CC);
            efree(msg);
            delete rbbi;
            RETURN_NULL();
        }
    } else { // compiled
#if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 48
        rbbi = new RuleBasedBreakIterator((uint8_t*)rules, rules_len, status);
        if (U_FAILURE(status)) {
            intl_error_set(NULL, status, "rbbi_create_instance: unable to "
                           "create instance from compiled rules", 0 TSRMLS_CC);
            delete rbbi;
            RETURN_NULL();
        }
#else
        intl_error_set(NULL, U_UNSUPPORTED_ERROR, "rbbi_create_instance: "
                       "compiled rules require ICU >= 4.8", 0 TSRMLS_CC);
        RETURN_NULL();
#endif
    }

    breakiterator_object_create(return_value, rbbi TSRMLS_CC);
}