Ejemplo n.º 1
0
static UBool readLine(UCHARBUF *f, UnicodeString &fileLine, IcuToolErrorCode &errorCode) {
    int32_t lineLength;
    const UChar *line = ucbuf_readline(f, &lineLength, errorCode);
    if(line == NULL || errorCode.isFailure()) { return FALSE; }
    // Strip trailing CR/LF, comments, and spaces.
    const UChar *comment = u_memchr(line, 0x23, lineLength);  // '#'
    if(comment != NULL) {
        lineLength = (int32_t)(comment - line);
    } else {
        while(lineLength > 0 && (line[lineLength - 1] == CARRIAGE_RETURN_CHARACTER || line[lineLength - 1] == LINEFEED_CHARACTER)) { --lineLength; }
    }
    while(lineLength > 0 && u_isspace(line[lineLength - 1])) { --lineLength; }
    fileLine.setTo(FALSE, line, lineLength);
    return TRUE;
}
Ejemplo n.º 2
0
/* {{{ grapheme_memnstr_grapheme: find needle in haystack using grapheme boundaries */
inline int32_t
grapheme_memnstr_grapheme(UBreakIterator *bi, UChar *haystack, UChar *needle, int32_t needle_len, UChar *end)
{
	UChar *p = haystack;
	UChar ne = needle[needle_len-1];
	UErrorCode status;
	int32_t grapheme_offset;
	
	end -= needle_len;

	while (p <= end) {

		if ((p = u_memchr(p, *needle, (end-p+1))) && ne == p[needle_len-1]) {

			if (!u_memcmp(needle, p, needle_len - 1)) {  /* needle_len - 1 works because if needle_len is 1, we've already tested the char */

				/* does the grapheme end here? */

				status = U_ZERO_ERROR;
				ubrk_setText (bi, haystack, (end - haystack) + needle_len, &status);

				if ( ubrk_isBoundary (bi, (p - haystack) + needle_len) ) {

					/* found it, get grapheme count offset */
					grapheme_offset = grapheme_count_graphemes(bi, haystack, (p - haystack));

					return grapheme_offset;
				}
			}
		}

		if (p == NULL) {
			return -1;
		}

		p++;
	}

	return -1;
}
Ejemplo n.º 3
0
U_CAPI UChar* U_EXPORT2
u_memchr32(const UChar* s, UChar32 c, int32_t count) {
    if ((uint32_t) c <= U_BMP_MAX) {
        /* find BMP code point */
        return u_memchr(s, (UChar) c, count);
    } else if (count < 2) {
        /* too short for a surrogate pair */
        return NULL;
    } else if ((uint32_t) c <= UCHAR_MAX_VALUE) {
        /* find supplementary code point as surrogate pair */
        const UChar* limit = s + count - 1; /* -1 so that we do not need a separate check for the trail unit */
        UChar lead = U16_LEAD(c), trail = U16_TRAIL(c);

        do {
            if (*s == lead && *(s + 1) == trail) {
                return (UChar*) s;
            }
        } while (++s != limit);
        return NULL;
    } else {
        /* not a Unicode code point, not findable */
        return NULL;
    }
}
Ejemplo n.º 4
0
U_CAPI UChar* U_EXPORT2
u_strFindFirst(const UChar* s, int32_t length,
               const UChar* sub, int32_t subLength) {
    const UChar* start, * p, * q, * subLimit;
    UChar c, cs, cq;

    if (sub == NULL || subLength < -1) {
        return (UChar*) s;
    }
    if (s == NULL || length < -1) {
        return NULL;
    }

    start = s;

    if (length < 0 && subLength < 0) {
        /* both strings are NUL-terminated */
        if ((cs = *sub++) == 0) {
            return (UChar*) s;
        }
        if (*sub == 0 && !U16_IS_SURROGATE(cs)) {
            /* the substring consists of a single, non-surrogate BMP code point */
            return u_strchr(s, cs);
        }

        while ((c = *s++) != 0) {
            if (c == cs) {
                /* found first substring UChar, compare rest */
                p = s;
                q = sub;
                for (; ;) {
                    if ((cq = *q) == 0) {
                        if (isMatchAtCPBoundary(start, s - 1, p, NULL)) {
                            return (UChar*) (s - 1); /* well-formed match */
                        } else {
                            break; /* no match because surrogate pair is split */
                        }
                    }
                    if ((c = *p) == 0) {
                        return NULL; /* no match, and none possible after s */
                    }
                    if (c != cq) {
                        break; /* no match */
                    }
                    ++p;
                    ++q;
                }
            }
        }

        /* not found */
        return NULL;
    }

    if (subLength < 0) {
        subLength = u_strlen(sub);
    }
    if (subLength == 0) {
        return (UChar*) s;
    }

    /* get sub[0] to search for it fast */
    cs = *sub++;
    --subLength;
    subLimit = sub + subLength;

    if (subLength == 0 && !U16_IS_SURROGATE(cs)) {
        /* the substring consists of a single, non-surrogate BMP code point */
        return length < 0 ? u_strchr(s, cs) : u_memchr(s, cs, length);
    }

    if (length < 0) {
        /* s is NUL-terminated */
        while ((c = *s++) != 0) {
            if (c == cs) {
                /* found first substring UChar, compare rest */
                p = s;
                q = sub;
                for (; ;) {
                    if (q == subLimit) {
                        if (isMatchAtCPBoundary(start, s - 1, p, NULL)) {
                            return (UChar*) (s - 1); /* well-formed match */
                        } else {
                            break; /* no match because surrogate pair is split */
                        }
                    }
                    if ((c = *p) == 0) {
                        return NULL; /* no match, and none possible after s */
                    }
                    if (c != *q) {
                        break; /* no match */
                    }
                    ++p;
                    ++q;
                }
            }
        }
    } else {
        const UChar* limit, * preLimit;

        /* subLength was decremented above */
        if (length <= subLength) {
            return NULL; /* s is shorter than sub */
        }

        limit = s + length;

        /* the substring must start before preLimit */
        preLimit = limit - subLength;

        while (s != preLimit) {
            c = *s++;
            if (c == cs) {
                /* found first substring UChar, compare rest */
                p = s;
                q = sub;
                for (; ;) {
                    if (q == subLimit) {
                        if (isMatchAtCPBoundary(start, s - 1, p, limit)) {
                            return (UChar*) (s - 1); /* well-formed match */
                        } else {
                            break; /* no match because surrogate pair is split */
                        }
                    }
                    if (*p != *q) {
                        break; /* no match */
                    }
                    ++p;
                    ++q;
                }
            }
        }
    }

    /* not found */
    return NULL;
}