Ejemplo n.º 1
0
/* {{{ grapheme_extract_bytecount_iter - grapheme iterator for grapheme_extract MAXBYTES */
static inline int32_t
grapheme_extract_bytecount_iter(UBreakIterator *bi, int32_t bsize, unsigned char *pstr, int32_t str_len)
{
	int pos = 0, prev_pos = 0;
	int ret_pos = 0, prev_ret_pos = 0;

	while ( 1 ) {
		pos = ubrk_next(bi);

		if ( UBRK_DONE == pos ) {
			break;
		}

		prev_ret_pos = ret_pos;
		U8_FWD_N(pstr, ret_pos, str_len, pos - prev_pos);

		if ( ret_pos > bsize ) {
			ret_pos = prev_ret_pos;
			break;
		}

		if ( prev_ret_pos == ret_pos ) {
			/* something wrong - malformed utf8? */
			break;
		}

		prev_pos = pos;
	}

	return ret_pos;
}
Ejemplo n.º 2
0
/* {{{ grapheme_extract_charcount_iter - grapheme iterator for grapheme_extract MAXCHARS */
static inline int32_t
grapheme_extract_charcount_iter(UBreakIterator *bi, int32_t csize, unsigned char *pstr, int32_t str_len)
{
	int pos = 0, prev_pos = 0;
	int ret_pos = 0, prev_ret_pos = 0;

	while ( 1 ) {
		pos = ubrk_next(bi);

		if ( UBRK_DONE == pos ) {
			break;
		}

		/* if we are beyond our limit, then the loop is done */
		if ( pos > csize ) {
			break;
		}

		/* update our pointer in the original UTF-8 buffer by as many characters
		   as ubrk_next iterated over */

		prev_ret_pos = ret_pos;
		U8_FWD_N(pstr, ret_pos, str_len, pos - prev_pos);

		if ( prev_ret_pos == ret_pos ) {
			/* something wrong - malformed utf8? */
			break;
		}

		prev_pos = pos;
	}

	return ret_pos;
}
Ejemplo n.º 3
0
/* {{{ grapheme_extract_count_iter - grapheme iterator for grapheme_extract COUNT */
static inline int32_t
grapheme_extract_count_iter(UBreakIterator *bi, int32_t size, unsigned char *pstr, int32_t str_len)
{
	int pos = 0, next_pos = 0;
	int ret_pos = 0;

	while ( size ) {
		next_pos = ubrk_next(bi);

		if ( UBRK_DONE == next_pos ) {
			break;
		}
		pos = next_pos;
		size--;
	}

	/* pos is one past the last UChar - and represent the number of code units to
		advance in the utf-8 buffer
	*/

	U8_FWD_N(pstr, ret_pos, str_len, pos);

	return ret_pos;
}
Ejemplo n.º 4
0
/* {{{	strstr_common_handler */
static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_case)
{
	char *haystack, *needle;
	const char *found;
	size_t haystack_len, needle_len;
	int32_t ret_pos, uchar_pos;
	zend_bool part = 0;

	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|b", &haystack, &haystack_len, &needle, &needle_len, &part) == FAILURE) {

		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
			 "grapheme_strstr: unable to parse input param", 0 );

		RETURN_FALSE;
	}

	if (needle_len == 0) {

		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Empty delimiter", 1 );

		RETURN_FALSE;
	}


	if ( !f_ignore_case ) {

		/* ASCII optimization: quick check to see if the string might be there
		 * I realize that 'offset' is 'grapheme count offset' but will work in spite of that
		*/
		found = php_memnstr(haystack, needle, needle_len, haystack + haystack_len);

		/* if it isn't there the we are done */
		if ( !found ) {
			RETURN_FALSE;
		}

		/* if it is there, and if the haystack is ascii, we are all done */
		if ( grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0 ) {
			size_t found_offset = found - haystack;

			if (part) {
				RETURN_STRINGL(haystack, found_offset);
			} else {
				RETURN_STRINGL(found, haystack_len - found_offset);
			}
		}

	}

	/* need to work in utf16 */
	ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, 0, &uchar_pos, f_ignore_case, 0 /*last */ );

	if ( ret_pos < 0 ) {
		RETURN_FALSE;
	}

	/* uchar_pos is the 'nth' Unicode character position of the needle */

	ret_pos = 0;
	U8_FWD_N(haystack, ret_pos, haystack_len, uchar_pos);

	if (part) {
		RETURN_STRINGL(haystack, ret_pos);
	} else {
		RETURN_STRINGL(haystack + ret_pos, haystack_len - ret_pos);
	}

}
Ejemplo n.º 5
0
static void TestFwdBack(){ 
    static const uint8_t input[]={0x61, 0xF0, 0x90, 0x90, 0x81, 0xff, 0x62, 0xc0, 0x80, 0x7f, 0x8f, 0xc0, 0x63, 0x81, 0x90, 0x90, 0xF0, 0x00};
    static const uint16_t fwd_unsafe[] ={1, 5, 6, 7,  9, 10, 11, 13, 14, 15, 16,  20, };
    static const uint16_t fwd_safe[]   ={1, 5, 6, 7, 9, 10, 11,  12, 13, 14, 15, 16, 17, 18};
    static const uint16_t back_unsafe[]={17, 16, 12, 11, 9, 7, 6, 5, 1, 0};
    static const uint16_t back_safe[]  ={17, 16, 15, 14, 13, 12, 11, 10, 9, 7, 6, 5, 1, 0};

    static const uint16_t Nvalue[]= {0, 1, 2, 3, 1, 2, 1, 5};
    static const uint16_t fwd_N_unsafe[] ={0, 1, 6, 10, 11, 14, 15};
    static const uint16_t fwd_N_safe[]   ={0, 1, 6, 10, 11, 13, 14, 18}; /*safe macro keeps it at the end of the string */
    static const uint16_t back_N_unsafe[]={18, 17, 12, 7, 6, 1, 0};
    static const uint16_t back_N_safe[]  ={18, 17, 15, 12, 11, 9, 7, 0};   


    uint32_t offunsafe=0, offsafe=0;

    uint32_t i=0;
    while(offunsafe < sizeof(input)){
        UTF8_FWD_1_UNSAFE(input, offunsafe);
        if(offunsafe != fwd_unsafe[i]){
            log_err("ERROR: Forward_unsafe offset expected:%d, Got:%d\n", fwd_unsafe[i], offunsafe);
        }
        i++;
    }

    i=0;
    while(offunsafe < sizeof(input)){
        U8_FWD_1_UNSAFE(input, offunsafe);
        if(offunsafe != fwd_unsafe[i]){
            log_err("ERROR: U8_FWD_1_UNSAFE offset expected:%d, Got:%d\n", fwd_unsafe[i], offunsafe);
        }
        i++;
    }

    i=0;
    while(offsafe < sizeof(input)){
        UTF8_FWD_1_SAFE(input, offsafe, sizeof(input));
        if(offsafe != fwd_safe[i]){
            log_err("ERROR: Forward_safe offset expected:%d, Got:%d\n", fwd_safe[i], offsafe);
        }
        i++;
    }

    i=0;
    while(offsafe < sizeof(input)){
        U8_FWD_1(input, offsafe, sizeof(input));
        if(offsafe != fwd_safe[i]){
            log_err("ERROR: U8_FWD_1 offset expected:%d, Got:%d\n", fwd_safe[i], offsafe);
        }
        i++;
    }

    offunsafe=sizeof(input);
    i=0;
    while(offunsafe > 0){
        UTF8_BACK_1_UNSAFE(input, offunsafe);
        if(offunsafe != back_unsafe[i]){
            log_err("ERROR: Backward_unsafe offset expected:%d, Got:%d\n", back_unsafe[i], offunsafe);
        }
        i++;
    }

    offunsafe=sizeof(input);
    i=0;
    while(offunsafe > 0){
        U8_BACK_1_UNSAFE(input, offunsafe);
        if(offunsafe != back_unsafe[i]){
            log_err("ERROR: U8_BACK_1_UNSAFE offset expected:%d, Got:%d\n", back_unsafe[i], offunsafe);
        }
        i++;
    }

    i=0;
    offsafe=sizeof(input);
    while(offsafe > 0){
        UTF8_BACK_1_SAFE(input, 0,  offsafe);
        if(offsafe != back_safe[i]){
            log_err("ERROR: Backward_safe offset expected:%d, Got:%d\n", back_unsafe[i], offsafe);
        }
        i++;
    }

    i=0;
    offsafe=sizeof(input);
    while(offsafe > 0){
        U8_BACK_1(input, 0,  offsafe);
        if(offsafe != back_safe[i]){
            log_err("ERROR: U8_BACK_1 offset expected:%d, Got:%d\n", back_unsafe[i], offsafe);
        }
        i++;
    }

    offunsafe=0;
    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0])-2; i++){  
        UTF8_FWD_N_UNSAFE(input, offunsafe, Nvalue[i]);
        if(offunsafe != fwd_N_unsafe[i]){
            log_err("ERROR: Forward_N_unsafe offset=%d expected:%d, Got:%d\n", i, fwd_N_unsafe[i], offunsafe);
        }
    }

    offunsafe=0;
    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0])-2; i++){  
        U8_FWD_N_UNSAFE(input, offunsafe, Nvalue[i]);
        if(offunsafe != fwd_N_unsafe[i]){
            log_err("ERROR: U8_FWD_N_UNSAFE offset=%d expected:%d, Got:%d\n", i, fwd_N_unsafe[i], offunsafe);
        }
    }

    offsafe=0;
    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0]); i++){
        UTF8_FWD_N_SAFE(input, offsafe, sizeof(input), Nvalue[i]);
        if(offsafe != fwd_N_safe[i]){
            log_err("ERROR: Forward_N_safe offset=%d expected:%d, Got:%d\n", i, fwd_N_safe[i], offsafe);
        }
    
    }

    offsafe=0;
    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0]); i++){
        U8_FWD_N(input, offsafe, sizeof(input), Nvalue[i]);
        if(offsafe != fwd_N_safe[i]){
            log_err("ERROR: U8_FWD_N offset=%d expected:%d, Got:%d\n", i, fwd_N_safe[i], offsafe);
        }
    
    }

    offunsafe=sizeof(input);
    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0])-2; i++){
        UTF8_BACK_N_UNSAFE(input, offunsafe, Nvalue[i]);
        if(offunsafe != back_N_unsafe[i]){
            log_err("ERROR: backward_N_unsafe offset=%d expected:%d, Got:%d\n", i, back_N_unsafe[i], offunsafe);
        }
    }

    offunsafe=sizeof(input);
    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0])-2; i++){
        U8_BACK_N_UNSAFE(input, offunsafe, Nvalue[i]);
        if(offunsafe != back_N_unsafe[i]){
            log_err("ERROR: U8_BACK_N_UNSAFE offset=%d expected:%d, Got:%d\n", i, back_N_unsafe[i], offunsafe);
        }
    }

    offsafe=sizeof(input);
    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0]); i++){
        UTF8_BACK_N_SAFE(input, 0, offsafe, Nvalue[i]);
        if(offsafe != back_N_safe[i]){
            log_err("ERROR: backward_N_safe offset=%d expected:%d, Got:%ld\n", i, back_N_safe[i], offsafe);
        }
    }

    offsafe=sizeof(input);
    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0]); i++){
        U8_BACK_N(input, 0, offsafe, Nvalue[i]);
        if(offsafe != back_N_safe[i]){
            log_err("ERROR: U8_BACK_N offset=%d expected:%d, Got:%ld\n", i, back_N_safe[i], offsafe);
        }
    }
}
Ejemplo n.º 6
0
void
UTF8CollationIterator::forwardNumCodePoints(int32_t num, UErrorCode & /*errorCode*/) {
    U8_FWD_N(u8, pos, length, num);
}
Ejemplo n.º 7
0
/* keep this in sync with utf16tst.c's TestNulTerminated() */
static void TestNulTerminated() {
    static const uint8_t input[]={
        /*  0 */  0x61,
        /*  1 */  0xf0, 0x90, 0x90, 0x81,
        /*  5 */  0xc0, 0x80,
        /*  7 */  0xdf, 0x80,
        /*  9 */  0xc2,
        /* 10 */  0x62,
        /* 11 */  0xfd, 0xbe,
        /* 13 */  0xe0, 0xa0, 0x80,
        /* 16 */  0xe2, 0x82, 0xac,
        /* 19 */  0xf0, 0x90, 0x90,
        /* 22 */  0x00
        /* 23 */
    };
    static const UChar32 result[]={
        0x61,
        0x10401,
        U_SENTINEL,
        0x7c0,
        U_SENTINEL,
        0x62,
        U_SENTINEL,
        0x800,
        0x20ac,
        U_SENTINEL,
        0
    };

    UChar32 c, c2, expected;
    int32_t i0, i=0, j, k, expectedIndex;
    int32_t cpIndex=0;
    do {
        i0=i;
        U8_NEXT(input, i, -1, c);
        expected=result[cpIndex];
        if(c!=expected) {
            log_err("U8_NEXT(from %d)=U+%04x != U+%04x\n", i0, c, expected);
        }
        j=i0;
        U8_NEXT_OR_FFFD(input, j, -1, c);
        if(expected<0) { expected=0xfffd; }
        if(c!=expected) {
            log_err("U8_NEXT_OR_FFFD(from %d)=U+%04x != U+%04x\n", i0, c, expected);
        }
        if(j!=i) {
            log_err("U8_NEXT_OR_FFFD() moved to index %d but U8_NEXT() moved to %d\n", j, i);
        }
        j=i0;
        U8_FWD_1(input, j, -1);
        if(j!=i) {
            log_err("U8_FWD_1() moved to index %d but U8_NEXT() moved to %d\n", j, i);
        }
        ++cpIndex;
        /*
         * Move by this many code points from the start.
         * U8_FWD_N() stops at the end of the string, that is, at the NUL if necessary.
         */
        expectedIndex= (c==0) ? i-1 : i;
        k=0;
        U8_FWD_N(input, k, -1, cpIndex);
        if(k!=expectedIndex) {
            log_err("U8_FWD_N(code points from 0) moved to index %d but expected %d\n", k, expectedIndex);
        }
    } while(c!=0);

    i=0;
    do {
        j=i0=i;
        U8_NEXT(input, i, -1, c);
        do {
            U8_GET(input, 0, j, -1, c2);
            if(c2!=c) {
                log_err("U8_NEXT(from %d)=U+%04x != U+%04x=U8_GET(at %d)\n", i0, c, c2, j);
            }
            U8_GET_OR_FFFD(input, 0, j, -1, c2);
            expected= (c>=0) ? c : 0xfffd;
            if(c2!=expected) {
                log_err("U8_NEXT_OR_FFFD(from %d)=U+%04x != U+%04x=U8_GET_OR_FFFD(at %d)\n", i0, expected, c2, j);
            }
            /* U8_SET_CP_LIMIT moves from a non-lead byte to the limit of the code point */
            k=j+1;
            U8_SET_CP_LIMIT(input, 0, k, -1);
            if(k!=i) {
                log_err("U8_NEXT() moved to %d but U8_SET_CP_LIMIT(%d) moved to %d\n", i, j+1, k);
            }
        } while(++j<i);
    } while(c!=0);
}