/* {{{ grapheme_memnstr_grapheme: find needle in haystack using grapheme boundaries */ inline int32_t grapheme_memnstr_grapheme(UBreakIterator *bi, UChar *haystack, UChar *needle, int32_t needle_len, UChar *end) { UChar *p = haystack; UChar ne = needle[needle_len-1]; UErrorCode status; int32_t grapheme_offset; end -= needle_len; while (p <= end) { if ((p = u_memchr(p, *needle, (end-p+1))) && ne == p[needle_len-1]) { if (!u_memcmp(needle, p, needle_len - 1)) { /* needle_len - 1 works because if needle_len is 1, we've already tested the char */ /* does the grapheme end here? */ status = U_ZERO_ERROR; ubrk_setText (bi, haystack, (end - haystack) + needle_len, &status); if ( ubrk_isBoundary (bi, (p - haystack) + needle_len) ) { /* found it, get grapheme count offset */ grapheme_offset = grapheme_count_graphemes(bi, haystack, (p - haystack)); return grapheme_offset; } } } if (p == NULL) { return -1; } p++; } return -1; }
/* {{{ grapheme_strrpos_utf16 - strrpos using utf16 */ int grapheme_strrpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned char*needle, int32_t needle_len, int32_t offset, int f_ignore_case TSRMLS_DC) { UChar *uhaystack, *puhaystack, *uhaystack_end, *uneedle; int32_t uhaystack_len, uneedle_len; UErrorCode status; unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE]; UBreakIterator* bi = NULL; int ret_pos, pos; /* convert the strings to UTF-16. */ uhaystack = NULL; uhaystack_len = 0; status = U_ZERO_ERROR; intl_convert_utf8_to_utf16(&uhaystack, &uhaystack_len, (char *) haystack, haystack_len, &status ); if ( U_FAILURE( status ) ) { /* Set global error code. */ intl_error_set_code( NULL, status TSRMLS_CC ); /* Set error messages. */ intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC ); efree( uhaystack ); return -1; } if ( f_ignore_case ) { grapheme_intl_case_fold(&uhaystack, &uhaystack, &uhaystack_len, &status ); } /* get a pointer to the haystack taking into account the offset */ bi = NULL; status = U_ZERO_ERROR; bi = grapheme_get_break_iterator(u_break_iterator_buffer, &status TSRMLS_CC ); puhaystack = grapheme_get_haystack_offset(bi, uhaystack, uhaystack_len, offset); if ( NULL == puhaystack ) { intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 TSRMLS_CC ); efree( uhaystack ); ubrk_close (bi); return -1; } uneedle = NULL; uneedle_len = 0; status = U_ZERO_ERROR; intl_convert_utf8_to_utf16(&uneedle, &uneedle_len, (char *) needle, needle_len, &status ); if ( U_FAILURE( status ) ) { /* Set global error code. */ intl_error_set_code( NULL, status TSRMLS_CC ); /* Set error messages. */ intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC ); efree( uhaystack ); efree( uneedle ); ubrk_close (bi); return -1; } if ( f_ignore_case ) { grapheme_intl_case_fold(&uneedle, &uneedle, &uneedle_len, &status ); } ret_pos = -1; /* -1 represents 'not found' */ /* back up until there's needle_len characters to compare */ uhaystack_end = uhaystack + uhaystack_len; pos = ubrk_last(bi); puhaystack = uhaystack + pos; while ( uhaystack_end - puhaystack < uneedle_len ) { pos = ubrk_previous(bi); if ( UBRK_DONE == pos ) { break; } puhaystack = uhaystack + pos; } /* is there enough haystack left to hold the needle? */ if ( ( uhaystack_end - puhaystack ) < uneedle_len ) { /* not enough, not found */ goto exit; } while ( UBRK_DONE != pos ) { if (!u_memcmp(uneedle, puhaystack, uneedle_len)) { /* needle_len - 1 in zend memnstr? */ /* does the grapheme in the haystack end at the same place as the last grapheme in the needle? */ if ( ubrk_isBoundary(bi, pos + uneedle_len) ) { /* found it, get grapheme count offset */ ret_pos = grapheme_count_graphemes(bi, uhaystack, pos); break; } /* set position back */ ubrk_isBoundary(bi, pos); } pos = ubrk_previous(bi); puhaystack = uhaystack + pos; } exit: efree( uhaystack ); efree( uneedle ); ubrk_close (bi); return ret_pos; }
/* {{{ grapheme_strpos_utf16 - strrpos using utf16*/ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int32_t *puchar_pos, int f_ignore_case, int last) { UChar *uhaystack = NULL, *uneedle = NULL; int32_t uhaystack_len = 0, uneedle_len = 0, char_pos, ret_pos, offset_pos = 0; unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE]; UBreakIterator* bi = NULL; UErrorCode status; UStringSearch* src = NULL; UCollator *coll; if(puchar_pos) { *puchar_pos = -1; } /* convert the strings to UTF-16. */ status = U_ZERO_ERROR; intl_convert_utf8_to_utf16(&uhaystack, &uhaystack_len, haystack, haystack_len, &status ); STRPOS_CHECK_STATUS(status, "Error converting input string to UTF-16"); status = U_ZERO_ERROR; intl_convert_utf8_to_utf16(&uneedle, &uneedle_len, needle, needle_len, &status ); STRPOS_CHECK_STATUS(status, "Error converting needle string to UTF-16"); /* get a pointer to the haystack taking into account the offset */ status = U_ZERO_ERROR; bi = grapheme_get_break_iterator(u_break_iterator_buffer, &status ); STRPOS_CHECK_STATUS(status, "Failed to get iterator"); status = U_ZERO_ERROR; ubrk_setText(bi, uhaystack, uhaystack_len, &status); STRPOS_CHECK_STATUS(status, "Failed to set up iterator"); status = U_ZERO_ERROR; src = usearch_open(uneedle, uneedle_len, uhaystack, uhaystack_len, "", bi, &status); STRPOS_CHECK_STATUS(status, "Error creating search object"); if(f_ignore_case) { coll = usearch_getCollator(src); status = U_ZERO_ERROR; ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_SECONDARY, &status); STRPOS_CHECK_STATUS(status, "Error setting collation strength"); usearch_reset(src); } if(offset != 0) { offset_pos = grapheme_get_haystack_offset(bi, offset); if(offset_pos == -1) { status = U_ILLEGAL_ARGUMENT_ERROR; STRPOS_CHECK_STATUS(status, "Invalid search offset"); } status = U_ZERO_ERROR; usearch_setOffset(src, offset_pos, &status); STRPOS_CHECK_STATUS(status, "Invalid search offset"); } if(last) { char_pos = usearch_last(src, &status); if(char_pos < offset_pos) { /* last one is beyound our start offset */ char_pos = USEARCH_DONE; } } else { char_pos = usearch_next(src, &status); } STRPOS_CHECK_STATUS(status, "Error looking up string"); if(char_pos != USEARCH_DONE && ubrk_isBoundary(bi, char_pos)) { ret_pos = grapheme_count_graphemes(bi, uhaystack,char_pos); if(puchar_pos) { *puchar_pos = char_pos; } } else { ret_pos = -1; } if (uhaystack) { efree( uhaystack ); } if (uneedle) { efree( uneedle ); } ubrk_close (bi); usearch_close (src); return ret_pos; }