static UBool readLine(UCHARBUF *f, UnicodeString &fileLine, IcuToolErrorCode &errorCode) { int32_t lineLength; const UChar *line = ucbuf_readline(f, &lineLength, errorCode); if(line == NULL || errorCode.isFailure()) { return FALSE; } // Strip trailing CR/LF, comments, and spaces. const UChar *comment = u_memchr(line, 0x23, lineLength); // '#' if(comment != NULL) { lineLength = (int32_t)(comment - line); } else { while(lineLength > 0 && (line[lineLength - 1] == CARRIAGE_RETURN_CHARACTER || line[lineLength - 1] == LINEFEED_CHARACTER)) { --lineLength; } } while(lineLength > 0 && u_isspace(line[lineLength - 1])) { --lineLength; } fileLine.setTo(FALSE, line, lineLength); return TRUE; }
/* {{{ grapheme_memnstr_grapheme: find needle in haystack using grapheme boundaries */ inline int32_t grapheme_memnstr_grapheme(UBreakIterator *bi, UChar *haystack, UChar *needle, int32_t needle_len, UChar *end) { UChar *p = haystack; UChar ne = needle[needle_len-1]; UErrorCode status; int32_t grapheme_offset; end -= needle_len; while (p <= end) { if ((p = u_memchr(p, *needle, (end-p+1))) && ne == p[needle_len-1]) { if (!u_memcmp(needle, p, needle_len - 1)) { /* needle_len - 1 works because if needle_len is 1, we've already tested the char */ /* does the grapheme end here? */ status = U_ZERO_ERROR; ubrk_setText (bi, haystack, (end - haystack) + needle_len, &status); if ( ubrk_isBoundary (bi, (p - haystack) + needle_len) ) { /* found it, get grapheme count offset */ grapheme_offset = grapheme_count_graphemes(bi, haystack, (p - haystack)); return grapheme_offset; } } } if (p == NULL) { return -1; } p++; } return -1; }
U_CAPI UChar* U_EXPORT2 u_memchr32(const UChar* s, UChar32 c, int32_t count) { if ((uint32_t) c <= U_BMP_MAX) { /* find BMP code point */ return u_memchr(s, (UChar) c, count); } else if (count < 2) { /* too short for a surrogate pair */ return NULL; } else if ((uint32_t) c <= UCHAR_MAX_VALUE) { /* find supplementary code point as surrogate pair */ const UChar* limit = s + count - 1; /* -1 so that we do not need a separate check for the trail unit */ UChar lead = U16_LEAD(c), trail = U16_TRAIL(c); do { if (*s == lead && *(s + 1) == trail) { return (UChar*) s; } } while (++s != limit); return NULL; } else { /* not a Unicode code point, not findable */ return NULL; } }
U_CAPI UChar* U_EXPORT2 u_strFindFirst(const UChar* s, int32_t length, const UChar* sub, int32_t subLength) { const UChar* start, * p, * q, * subLimit; UChar c, cs, cq; if (sub == NULL || subLength < -1) { return (UChar*) s; } if (s == NULL || length < -1) { return NULL; } start = s; if (length < 0 && subLength < 0) { /* both strings are NUL-terminated */ if ((cs = *sub++) == 0) { return (UChar*) s; } if (*sub == 0 && !U16_IS_SURROGATE(cs)) { /* the substring consists of a single, non-surrogate BMP code point */ return u_strchr(s, cs); } while ((c = *s++) != 0) { if (c == cs) { /* found first substring UChar, compare rest */ p = s; q = sub; for (; ;) { if ((cq = *q) == 0) { if (isMatchAtCPBoundary(start, s - 1, p, NULL)) { return (UChar*) (s - 1); /* well-formed match */ } else { break; /* no match because surrogate pair is split */ } } if ((c = *p) == 0) { return NULL; /* no match, and none possible after s */ } if (c != cq) { break; /* no match */ } ++p; ++q; } } } /* not found */ return NULL; } if (subLength < 0) { subLength = u_strlen(sub); } if (subLength == 0) { return (UChar*) s; } /* get sub[0] to search for it fast */ cs = *sub++; --subLength; subLimit = sub + subLength; if (subLength == 0 && !U16_IS_SURROGATE(cs)) { /* the substring consists of a single, non-surrogate BMP code point */ return length < 0 ? u_strchr(s, cs) : u_memchr(s, cs, length); } if (length < 0) { /* s is NUL-terminated */ while ((c = *s++) != 0) { if (c == cs) { /* found first substring UChar, compare rest */ p = s; q = sub; for (; ;) { if (q == subLimit) { if (isMatchAtCPBoundary(start, s - 1, p, NULL)) { return (UChar*) (s - 1); /* well-formed match */ } else { break; /* no match because surrogate pair is split */ } } if ((c = *p) == 0) { return NULL; /* no match, and none possible after s */ } if (c != *q) { break; /* no match */ } ++p; ++q; } } } } else { const UChar* limit, * preLimit; /* subLength was decremented above */ if (length <= subLength) { return NULL; /* s is shorter than sub */ } limit = s + length; /* the substring must start before preLimit */ preLimit = limit - subLength; while (s != preLimit) { c = *s++; if (c == cs) { /* found first substring UChar, compare rest */ p = s; q = sub; for (; ;) { if (q == subLimit) { if (isMatchAtCPBoundary(start, s - 1, p, limit)) { return (UChar*) (s - 1); /* well-formed match */ } else { break; /* no match because surrogate pair is split */ } } if (*p != *q) { break; /* no match */ } ++p; ++q; } } } } /* not found */ return NULL; }