/* {{{ intl_errors_set_code( intl_error* err, UErrorCode err_code ) */ void intl_errors_set_code( intl_error* err, UErrorCode err_code ) { if(err) { intl_error_set_code( err, err_code ); } intl_error_set_code( NULL, err_code ); }
static inline int convert_cp(UChar32* pcp, zval *zcp) { zend_long cp = -1; if (Z_TYPE_P(zcp) == IS_LONG) { cp = Z_LVAL_P(zcp); } else if (Z_TYPE_P(zcp) == IS_STRING) { int32_t i = 0; size_t zcp_len = Z_STRLEN_P(zcp); if (ZEND_SIZE_T_INT_OVFL(zcp_len)) { intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR); intl_error_set_custom_msg(NULL, "Input string is too long.", 0); return FAILURE; } U8_NEXT(Z_STRVAL_P(zcp), i, zcp_len, cp); if ((size_t)i != zcp_len) { intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR); intl_error_set_custom_msg(NULL, "Passing a UTF-8 character for codepoint requires a string which is exactly one UTF-8 codepoint long.", 0); return FAILURE; } } else { intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR); intl_error_set_custom_msg(NULL, "Invalid parameter for unicode point. Must be either integer or UTF-8 sequence.", 0); return FAILURE; } if ((cp < UCHAR_MIN_VALUE) || (cp > UCHAR_MAX_VALUE)) { intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR); intl_error_set_custom_msg(NULL, "Codepoint out of range", 0); return FAILURE; } *pcp = (UChar32)cp; return SUCCESS; }
static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS, const char *domain, int32_t domain_len, uint32_t option, int mode) { UChar* ustring = NULL; int ustring_len = 0; UErrorCode status; char *converted_utf8; size_t converted_utf8_len; UChar converted[MAXPATHLEN]; int32_t converted_ret_len; /* convert the string to UTF-16. */ status = U_ZERO_ERROR; intl_convert_utf8_to_utf16(&ustring, &ustring_len, domain, domain_len, &status); if (U_FAILURE(status)) { intl_error_set_code(NULL, status); /* Set error messages. */ intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 ); if (ustring) { efree(ustring); } RETURN_FALSE; } else { UParseError parse_error; status = U_ZERO_ERROR; if (mode == INTL_IDN_TO_ASCII) { converted_ret_len = uidna_IDNToASCII(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status); } else { converted_ret_len = uidna_IDNToUnicode(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status); } efree(ustring); if (U_FAILURE(status)) { intl_error_set( NULL, status, "idn_to_ascii: cannot convert to ASCII", 0 ); RETURN_FALSE; } status = U_ZERO_ERROR; intl_convert_utf16_to_utf8(&converted_utf8, &converted_utf8_len, converted, converted_ret_len, &status); if (U_FAILURE(status)) { /* Set global error code. */ intl_error_set_code(NULL, status); /* Set error messages. */ intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 ); efree(converted_utf8); RETURN_FALSE; } } /* return the allocated string, not a duplicate */ RETVAL_STRINGL(converted_utf8, converted_utf8_len); //???? efree(converted_utf8); }
static UBool enumCharNames_callback(enumCharNames_data *context, UChar32 code, UCharNameChoice nameChoice, const char *name, int32_t length) { zval retval; zval args[3]; ZVAL_NULL(&retval); ZVAL_LONG(&args[0], code); ZVAL_LONG(&args[1], nameChoice); ZVAL_STRINGL(&args[2], name, length); context->fci.retval = &retval; context->fci.param_count = 3; context->fci.params = args; if (zend_call_function(&context->fci, &context->fci_cache) == FAILURE) { intl_error_set_code(NULL, U_INTERNAL_PROGRAM_ERROR); intl_error_set_custom_msg(NULL, "enumCharNames callback failed", 0); zval_dtor(&retval); zval_dtor(&args[2]); return 0; } zval_dtor(&retval); zval_dtor(&args[2]); return 1; }
static UBool enumCharType_callback(enumCharType_data *context, UChar32 start, UChar32 limit, UCharCategory type) { zval retval; zval args[3]; ZVAL_NULL(&retval); /* Note that $start is INclusive, while $limit is EXclusive * Therefore (0, 32, 15) means CPs 0..31 are of type 15 */ ZVAL_LONG(&args[0], start); ZVAL_LONG(&args[1], limit); ZVAL_LONG(&args[2], type); context->fci.retval = &retval; context->fci.param_count = 3; context->fci.params = args; if (zend_call_function(&context->fci, &context->fci_cache) == FAILURE) { intl_error_set_code(NULL, U_INTERNAL_PROGRAM_ERROR); intl_errors_set_custom_msg(NULL, "enumCharTypes callback failed", 0); zval_dtor(&retval); return 0; } zval_dtor(&retval); return 1; }
/* {{{ clone handler for Transliterator */ static zend_object *Transliterator_clone_obj( zval *object ) { Transliterator_object *to_orig, *to_new; zend_object *ret_val; intl_error_reset( NULL ); to_orig = Z_INTL_TRANSLITERATOR_P( object ); intl_error_reset( INTL_DATA_ERROR_P( to_orig ) ); ret_val = Transliterator_ce_ptr->create_object( Z_OBJCE_P( object ) ); to_new = php_intl_transliterator_fetch_object( ret_val ); zend_objects_clone_members( &to_new->zo, &to_orig->zo ); if( to_orig->utrans != NULL ) { zval tempz; /* dummy zval to pass to transliterator_object_construct */ /* guaranteed to return NULL if it fails */ UTransliterator *utrans = utrans_clone( to_orig->utrans, TRANSLITERATOR_ERROR_CODE_P( to_orig ) ); if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to_orig ) ) ) goto err; ZVAL_OBJ(&tempz, ret_val); transliterator_object_construct( &tempz, utrans, TRANSLITERATOR_ERROR_CODE_P( to_orig ) ); if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to_orig ) ) ) { zend_string *err_msg; err: if( utrans != NULL ) transliterator_object_destroy( to_new ); /* set the error anyway, in case in the future we decide not to * throw an error. It also helps build the error message */ intl_error_set_code( NULL, INTL_DATA_ERROR_CODE( to_orig ) ); intl_errors_set_custom_msg( TRANSLITERATOR_ERROR_P( to_orig ), "Could not clone transliterator", 0 ); err_msg = intl_error_get_message( TRANSLITERATOR_ERROR_P( to_orig ) ); zend_throw_error( NULL, "%s", ZSTR_VAL(err_msg) ); zend_string_free( err_msg ); /* if it's changed into a warning */ /* do not destroy tempz; we need to return something */ } } else { /* We shouldn't have unconstructed objects in the first place */ php_error_docref( NULL, E_WARNING, "Cloning unconstructed transliterator." ); } return ret_val; }
/* {{{ resourcebundle_array_fetch */ static void resourcebundle_array_fetch(zval *object, zval *offset, zval *return_value, int fallback) { int32_t meindex = 0; char * mekey = NULL; zend_bool is_numeric = 0; char *pbuf; ResourceBundle_object *rb; intl_error_reset( NULL ); RESOURCEBUNDLE_METHOD_FETCH_OBJECT; if(Z_TYPE_P(offset) == IS_LONG) { is_numeric = 1; meindex = (int32_t)Z_LVAL_P(offset); rb->child = ures_getByIndex( rb->me, meindex, rb->child, &INTL_DATA_ERROR_CODE(rb) ); } else if(Z_TYPE_P(offset) == IS_STRING) { mekey = Z_STRVAL_P(offset); rb->child = ures_getByKey(rb->me, mekey, rb->child, &INTL_DATA_ERROR_CODE(rb) ); } else { intl_errors_set(INTL_DATA_ERROR_P(rb), U_ILLEGAL_ARGUMENT_ERROR, "resourcebundle_get: index should be integer or string", 0); RETURN_NULL(); } intl_error_set_code( NULL, INTL_DATA_ERROR_CODE(rb) ); if (U_FAILURE(INTL_DATA_ERROR_CODE(rb))) { if (is_numeric) { spprintf( &pbuf, 0, "Cannot load resource element %d", meindex ); } else { spprintf( &pbuf, 0, "Cannot load resource element '%s'", mekey ); } intl_errors_set_custom_msg( INTL_DATA_ERROR_P(rb), pbuf, 1 ); efree(pbuf); RETURN_NULL(); } if (!fallback && (INTL_DATA_ERROR_CODE(rb) == U_USING_FALLBACK_WARNING || INTL_DATA_ERROR_CODE(rb) == U_USING_DEFAULT_WARNING)) { UErrorCode icuerror; const char * locale = ures_getLocaleByType( rb->me, ULOC_ACTUAL_LOCALE, &icuerror ); if (is_numeric) { spprintf( &pbuf, 0, "Cannot load element %d without fallback from to %s", meindex, locale ); } else { spprintf( &pbuf, 0, "Cannot load element '%s' without fallback from to %s", mekey, locale ); } intl_errors_set_custom_msg( INTL_DATA_ERROR_P(rb), pbuf, 1 ); efree(pbuf); RETURN_NULL(); } resourcebundle_extract_value( return_value, rb ); }
/* like INTL_CHECK_STATUS, but as a function and varying the name of the func */ static int php_intl_idn_check_status(UErrorCode err, const char *msg) { intl_error_set_code(NULL, err); if (U_FAILURE(err)) { char *buff; spprintf(&buff, 0, "%s: %s", get_active_function_name(), msg); intl_error_set_custom_msg(NULL, buff, 1); efree(buff); return FAILURE; } return SUCCESS; }
/* like INTL_CHECK_STATUS, but as a function and varying the name of the func */ static int php_intl_idn_check_status(UErrorCode err, const char *msg, int mode) { intl_error_set_code(NULL, err); if (U_FAILURE(err)) { char *buff; spprintf(&buff, 0, "%s: %s", mode == INTL_IDN_TO_ASCII ? "idn_to_ascii" : "idn_to_utf8", msg); intl_error_set_custom_msg(NULL, buff, 1); efree(buff); return FAILURE; } return SUCCESS; }
static void regexp_ctor(INTERNAL_FUNCTION_PARAMETERS) { zval *object; Regexp_object *ro; char *pattern; int32_t pattern_len; UChar *upattern = NULL; int32_t upattern_len = 0; zval *zflags = NULL; uint32_t flags = 0; UParseError pe = { -1, -1, {0}, {0} }; intl_error_reset(NULL TSRMLS_CC); object = return_value; if (FAILURE == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|z", &pattern, &pattern_len, &zflags)) { intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "bad arguments", 0 TSRMLS_CC); zval_dtor(object); RETURN_NULL(); } if (NULL != zflags) { switch (Z_TYPE_P(zflags)) { case IS_LONG: flags = (uint32_t) Z_LVAL_P(zflags); break; case IS_STRING: { const char *p; for (p = Z_STRVAL_P(zflags); '\0' != *p; p++) { switch (*p) { case 'i': flags |= UREGEX_CASE_INSENSITIVE; break; case 'm': flags |= UREGEX_MULTILINE; break; case 's': flags |= UREGEX_DOTALL; break; case 'x': flags |= UREGEX_COMMENTS; break; case 'w': flags |= UREGEX_UWORD; break; default: intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "invalid modifier", 0 TSRMLS_CC); zval_dtor(object); RETURN_NULL(); } } break; } default: intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "bad arguments", 0 TSRMLS_CC); zval_dtor(object); RETURN_NULL(); } } ro = (Regexp_object *) zend_object_store_get_object(object TSRMLS_CC); intl_convert_utf8_to_utf16(&upattern, &upattern_len, pattern, pattern_len, REGEXP_ERROR_CODE_P(ro)); INTL_CTOR_CHECK_STATUS(ro, "string conversion of pattern to UTF-16 failed"); ro->uregex = uregex_open(upattern, upattern_len, flags, &pe, REGEXP_ERROR_CODE_P(ro)); efree(upattern); if (U_FAILURE(REGEXP_ERROR_CODE(ro))) { intl_error_set_code(NULL, REGEXP_ERROR_CODE(ro) TSRMLS_CC); if (-1 != pe.line) { regexp_parse_error_to_string(pe, pattern, pattern_len); } else { intl_error_set_custom_msg(NULL, "unable to compile ICU regular expression", 0 TSRMLS_CC); } zval_dtor(object); RETURN_NULL(); } }
static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS, const zend_string *domain, uint32_t option, int mode) { UChar* ustring = NULL; int ustring_len = 0; UErrorCode status; zend_string *u8str; /* convert the string to UTF-16. */ status = U_ZERO_ERROR; intl_convert_utf8_to_utf16(&ustring, &ustring_len, ZSTR_VAL(domain), ZSTR_LEN(domain), &status); if (U_FAILURE(status)) { intl_error_set_code(NULL, status); /* Set error messages. */ intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 ); if (ustring) { efree(ustring); } RETURN_FALSE; } else { UChar converted[MAXPATHLEN]; int32_t converted_ret_len; status = U_ZERO_ERROR; #if U_ICU_VERSION_MAJOR_NUM >= 55 UIDNAInfo info = UIDNA_INFO_INITIALIZER; UIDNA *idna = uidna_openUTS46((int32_t)option, &status); if (U_FAILURE(status)) { intl_error_set( NULL, status, "idn_to_ascii: failed to create an UIDNA instance", 0 ); RETURN_FALSE; } if (mode == INTL_IDN_TO_ASCII) { converted_ret_len = uidna_nameToASCII(idna, ustring, ustring_len, converted, MAXPATHLEN, &info, &status); } else { converted_ret_len = uidna_nameToUnicode(idna, ustring, ustring_len, converted, MAXPATHLEN, &info, &status); } uidna_close(idna); #else UParseError parse_error; if (mode == INTL_IDN_TO_ASCII) { converted_ret_len = uidna_IDNToASCII(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status); } else { converted_ret_len = uidna_IDNToUnicode(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status); } #endif efree(ustring); if (U_FAILURE(status)) { intl_error_set( NULL, status, "idn_to_ascii: cannot convert to ASCII", 0 ); RETURN_FALSE; } status = U_ZERO_ERROR; u8str = intl_convert_utf16_to_utf8(converted, converted_ret_len, &status); if (!u8str) { /* Set global error code. */ intl_error_set_code(NULL, status); /* Set error messages. */ intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 ); RETURN_FALSE; } } /* return the allocated string, not a duplicate */ RETVAL_NEW_STR(u8str); }
static int create_transliterator( char *str_id, int str_id_len, zend_long direction, zval *object ) { Transliterator_object *to; UChar *ustr_id = NULL; int32_t ustr_id_len = 0; UTransliterator *utrans; UParseError parse_error = {0, -1}; intl_error_reset( NULL ); if( ( direction != TRANSLITERATOR_FORWARD ) && (direction != TRANSLITERATOR_REVERSE ) ) { intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "transliterator_create: invalid direction", 0 ); return FAILURE; } object_init_ex( object, Transliterator_ce_ptr ); TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK; /* fetch zend object from zval "object" into "to" */ /* Convert transliterator id to UTF-16 */ intl_convert_utf8_to_utf16( &ustr_id, &ustr_id_len, str_id, str_id_len, TRANSLITERATOR_ERROR_CODE_P( to ) ); if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) ) { intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) ); intl_error_set_custom_msg( NULL, "String conversion of id to UTF-16 failed", 0 ); zval_dtor( object ); return FAILURE; } /* Open ICU Transliterator. */ utrans = utrans_openU( ustr_id, ustr_id_len, (UTransDirection ) direction, NULL, -1, &parse_error, TRANSLITERATOR_ERROR_CODE_P( to ) ); if (ustr_id) { efree( ustr_id ); } if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) ) { char *buf = NULL; intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) ); spprintf( &buf, 0, "transliterator_create: unable to open ICU transliterator" " with id \"%s\"", str_id ); if( buf == NULL ) { intl_error_set_custom_msg( NULL, "transliterator_create: unable to open ICU transliterator", 0 ); } else { intl_error_set_custom_msg( NULL, buf, /* copy message */ 1 ); efree( buf ); } zval_dtor( object ); return FAILURE; } transliterator_object_construct( object, utrans, TRANSLITERATOR_ERROR_CODE_P( to ) ); /* no need to close the transliterator manually on construction error */ if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) ) { intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) ); intl_error_set_custom_msg( NULL, "transliterator_create: internal constructor call failed", 0 ); zval_dtor( object ); return FAILURE; } return SUCCESS; }
/* {{{ void intl_error_set( intl_error* err, UErrorCode code, char* msg, int copyMsg ) * Set error code and message. */ void intl_error_set( intl_error* err, UErrorCode code, char* msg, int copyMsg ) { intl_error_set_code( err, code ); intl_error_set_custom_msg( err, msg, copyMsg ); }
static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS, int mode) { unsigned char* domain; int domain_len; long option = 0; UChar* ustring = NULL; int ustring_len = 0; UErrorCode status; char *converted_utf8; int32_t converted_utf8_len; UChar converted[MAXPATHLEN]; int32_t converted_ret_len; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|l", (char **)&domain, &domain_len, &option) == FAILURE) { return; } if (domain_len < 1) { intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "idn_to_ascii: empty domain name", 0 TSRMLS_CC ); RETURN_FALSE; } /* convert the string to UTF-16. */ status = U_ZERO_ERROR; intl_convert_utf8_to_utf16(&ustring, &ustring_len, (char*) domain, domain_len, &status ); if (U_FAILURE(status)) { intl_error_set_code(NULL, status TSRMLS_CC); /* Set error messages. */ intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC ); efree(ustring); RETURN_FALSE; } else { UParseError parse_error; status = U_ZERO_ERROR; if (mode == INTL_IDN_TO_ASCII) { converted_ret_len = uidna_IDNToASCII(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status); } else { converted_ret_len = uidna_IDNToUnicode(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status); } efree(ustring); if (U_FAILURE(status)) { intl_error_set( NULL, status, "idn_to_ascii: cannot convert to ASCII", 0 TSRMLS_CC ); RETURN_FALSE; } status = U_ZERO_ERROR; intl_convert_utf16_to_utf8(&converted_utf8, &converted_utf8_len, converted, converted_ret_len, &status); if (U_FAILURE(status)) { /* Set global error code. */ intl_error_set_code(NULL, status TSRMLS_CC); /* Set error messages. */ intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 TSRMLS_CC ); efree(converted_utf8); RETURN_FALSE; } } /* return the allocated string, not a duplicate */ RETURN_STRINGL(((char *)converted_utf8), converted_utf8_len, 0); }
/* {{{ grapheme_strpos_utf16 - strrpos using utf16*/ int grapheme_strpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned char*needle, int32_t needle_len, int32_t offset, int32_t *puchar_pos, int f_ignore_case TSRMLS_DC) { UChar *uhaystack, *puhaystack, *uneedle; int32_t uhaystack_len, uneedle_len; int ret_pos; unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE]; UBreakIterator* bi; UErrorCode status; *puchar_pos = -1; /* convert the strings to UTF-16. */ uhaystack = NULL; uhaystack_len = 0; status = U_ZERO_ERROR; intl_convert_utf8_to_utf16(&uhaystack, &uhaystack_len, (char *) haystack, haystack_len, &status ); if ( U_FAILURE( status ) ) { /* Set global error code. */ intl_error_set_code( NULL, status TSRMLS_CC ); /* Set error messages. */ intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC ); efree( uhaystack ); return -1; } /* get a pointer to the haystack taking into account the offset */ bi = NULL; status = U_ZERO_ERROR; bi = grapheme_get_break_iterator(u_break_iterator_buffer, &status TSRMLS_CC ); puhaystack = grapheme_get_haystack_offset(bi, uhaystack, uhaystack_len, offset); uhaystack_len = (uhaystack_len - ( puhaystack - uhaystack)); if ( NULL == puhaystack ) { intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 TSRMLS_CC ); efree( uhaystack ); ubrk_close (bi); return -1; } if ( f_ignore_case ) { grapheme_intl_case_fold(&uhaystack, &puhaystack, &uhaystack_len, &status ); } uneedle = NULL; uneedle_len = 0; status = U_ZERO_ERROR; intl_convert_utf8_to_utf16(&uneedle, &uneedle_len, (char *) needle, needle_len, &status ); if ( U_FAILURE( status ) ) { /* Set global error code. */ intl_error_set_code( NULL, status TSRMLS_CC ); /* Set error messages. */ intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC ); efree( uhaystack ); efree( uneedle ); ubrk_close (bi); return -1; } if ( f_ignore_case ) { grapheme_intl_case_fold(&uneedle, &uneedle, &uneedle_len, &status ); } ret_pos = grapheme_memnstr_grapheme(bi, puhaystack, uneedle, uneedle_len, puhaystack + uhaystack_len ); *puchar_pos = ubrk_current(bi); efree( uhaystack ); efree( uneedle ); ubrk_close (bi); return ret_pos; }
/* {{{ grapheme_strrpos_utf16 - strrpos using utf16 */ int grapheme_strrpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned char*needle, int32_t needle_len, int32_t offset, int f_ignore_case TSRMLS_DC) { UChar *uhaystack, *puhaystack, *uhaystack_end, *uneedle; int32_t uhaystack_len, uneedle_len; UErrorCode status; unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE]; UBreakIterator* bi = NULL; int ret_pos, pos; /* convert the strings to UTF-16. */ uhaystack = NULL; uhaystack_len = 0; status = U_ZERO_ERROR; intl_convert_utf8_to_utf16(&uhaystack, &uhaystack_len, (char *) haystack, haystack_len, &status ); if ( U_FAILURE( status ) ) { /* Set global error code. */ intl_error_set_code( NULL, status TSRMLS_CC ); /* Set error messages. */ intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC ); efree( uhaystack ); return -1; } if ( f_ignore_case ) { grapheme_intl_case_fold(&uhaystack, &uhaystack, &uhaystack_len, &status ); } /* get a pointer to the haystack taking into account the offset */ bi = NULL; status = U_ZERO_ERROR; bi = grapheme_get_break_iterator(u_break_iterator_buffer, &status TSRMLS_CC ); puhaystack = grapheme_get_haystack_offset(bi, uhaystack, uhaystack_len, offset); if ( NULL == puhaystack ) { intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 TSRMLS_CC ); efree( uhaystack ); ubrk_close (bi); return -1; } uneedle = NULL; uneedle_len = 0; status = U_ZERO_ERROR; intl_convert_utf8_to_utf16(&uneedle, &uneedle_len, (char *) needle, needle_len, &status ); if ( U_FAILURE( status ) ) { /* Set global error code. */ intl_error_set_code( NULL, status TSRMLS_CC ); /* Set error messages. */ intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC ); efree( uhaystack ); efree( uneedle ); ubrk_close (bi); return -1; } if ( f_ignore_case ) { grapheme_intl_case_fold(&uneedle, &uneedle, &uneedle_len, &status ); } ret_pos = -1; /* -1 represents 'not found' */ /* back up until there's needle_len characters to compare */ uhaystack_end = uhaystack + uhaystack_len; pos = ubrk_last(bi); puhaystack = uhaystack + pos; while ( uhaystack_end - puhaystack < uneedle_len ) { pos = ubrk_previous(bi); if ( UBRK_DONE == pos ) { break; } puhaystack = uhaystack + pos; } /* is there enough haystack left to hold the needle? */ if ( ( uhaystack_end - puhaystack ) < uneedle_len ) { /* not enough, not found */ goto exit; } while ( UBRK_DONE != pos ) { if (!u_memcmp(uneedle, puhaystack, uneedle_len)) { /* needle_len - 1 in zend memnstr? */ /* does the grapheme in the haystack end at the same place as the last grapheme in the needle? */ if ( ubrk_isBoundary(bi, pos + uneedle_len) ) { /* found it, get grapheme count offset */ ret_pos = grapheme_count_graphemes(bi, uhaystack, pos); break; } /* set position back */ ubrk_isBoundary(bi, pos); } pos = ubrk_previous(bi); puhaystack = uhaystack + pos; } exit: efree( uhaystack ); efree( uneedle ); ubrk_close (bi); return ret_pos; }
static void _php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAMETERS) { zval *object = getThis(); char *rules; int rules_len; zend_bool compiled = 0; UErrorCode status = U_ZERO_ERROR; intl_error_reset(NULL TSRMLS_CC); if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|b", &rules, &rules_len, &compiled) == FAILURE) { intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "rbbi_create_instance: bad arguments", 0 TSRMLS_CC); RETURN_NULL(); } // instantiation of ICU object RuleBasedBreakIterator *rbbi; if (!compiled) { UnicodeString rulesStr; UParseError parseError = UParseError(); if (intl_stringFromChar(rulesStr, rules, rules_len, &status) == FAILURE) { intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "rbbi_create_instance: rules were not a valid UTF-8 string", 0 TSRMLS_CC); RETURN_NULL(); } rbbi = new RuleBasedBreakIterator(rulesStr, parseError, status); intl_error_set_code(NULL, status TSRMLS_CC); if (U_FAILURE(status)) { char *msg; smart_str parse_error_str; parse_error_str = intl_parse_error_to_string(&parseError); spprintf(&msg, 0, "rbbi_create_instance: unable to create " "RuleBasedBreakIterator from rules (%s)", parse_error_str.c); smart_str_free(&parse_error_str); intl_error_set_custom_msg(NULL, msg, 1 TSRMLS_CC); efree(msg); delete rbbi; RETURN_NULL(); } } else { // compiled #if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 48 rbbi = new RuleBasedBreakIterator((uint8_t*)rules, rules_len, status); if (U_FAILURE(status)) { intl_error_set(NULL, status, "rbbi_create_instance: unable to " "create instance from compiled rules", 0 TSRMLS_CC); delete rbbi; RETURN_NULL(); } #else intl_error_set(NULL, U_UNSUPPORTED_ERROR, "rbbi_create_instance: " "compiled rules require ICU >= 4.8", 0 TSRMLS_CC); RETURN_NULL(); #endif } breakiterator_object_create(return_value, rbbi TSRMLS_CC); }