示例#1
0
文件: uchar.c 项目: icu-project/icu4c
/*Checks if the Unicode character can be ignorable in a Java or Unicode identifier.*/
U_CAPI UBool U_EXPORT2
u_isIDIgnorable(UChar32 c) {
    if(c<=0x9f) {
        return u_isISOControl(c) && !IS_THAT_ASCII_CONTROL_SPACE(c);
    } else {
        uint32_t props;
        GET_PROPS(c, props);
        return (UBool)(GET_CATEGORY(props)==U_FORMAT_CHAR);
    }
}
示例#2
0
//----------------------------------------------------------------------------------------
//
//  stripRules    Return a rules string without unnecessary
//                characters.
//
//----------------------------------------------------------------------------------------
UnicodeString RBBIRuleScanner::stripRules(const UnicodeString &rules) {
    UnicodeString strippedRules;
    int rulesLength = rules.length();
    for (int idx = 0; idx < rulesLength; ) {
        UChar ch = rules[idx++];
        if (ch == chPound) {
            while (idx < rulesLength
                    && ch != chCR && ch != chLF && ch != chNEL)
            {
                ch = rules[idx++];
            }
        }
        if (!u_isISOControl(ch)) {
            strippedRules.append(ch);
        }
    }
    // strippedRules = strippedRules.unescape();
    return strippedRules;
}
示例#3
0
bool IsInterchangeValid(const char32 ch) {
  return IsValidCodepoint(ch) &&
         !(ch >= 0xFDD0 && ch <= 0xFDEF) &&  // Noncharacters.
         !(ch >= 0xFFFE && ch <= 0xFFFF) && !(ch >= 0x1FFFE && ch <= 0x1FFFF) &&
         !(ch >= 0x2FFFE && ch <= 0x2FFFF) &&
         !(ch >= 0x3FFFE && ch <= 0x3FFFF) &&
         !(ch >= 0x4FFFE && ch <= 0x4FFFF) &&
         !(ch >= 0x5FFFE && ch <= 0x5FFFF) &&
         !(ch >= 0x6FFFE && ch <= 0x6FFFF) &&
         !(ch >= 0x7FFFE && ch <= 0x7FFFF) &&
         !(ch >= 0x8FFFE && ch <= 0x8FFFF) &&
         !(ch >= 0x9FFFE && ch <= 0x9FFFF) &&
         !(ch >= 0xAFFFE && ch <= 0xAFFFF) &&
         !(ch >= 0xBFFFE && ch <= 0xBFFFF) &&
         !(ch >= 0xCFFFE && ch <= 0xCFFFF) &&
         !(ch >= 0xDFFFE && ch <= 0xDFFFF) &&
         !(ch >= 0xEFFFE && ch <= 0xEFFFF) &&
         !(ch >= 0xFFFFE && ch <= 0xFFFFF) &&
         !(ch >= 0x10FFFE && ch <= 0x10FFFF) &&
         (!u_isISOControl(static_cast<UChar32>(ch)) || ch == '\n' ||
          ch == '\f' || ch == '\t' || ch == '\r');
}
示例#4
0
bool IsInterchangeValid7BitAscii(const char32 ch) {
  return IsValidCodepoint(ch) &&
      ch <= 128 &&
      (!u_isISOControl(static_cast<UChar32>(ch)) ||
       ch == '\n' || ch == '\f' || ch == '\t' || ch == '\r');
}