/*Checks if the Unicode character can be ignorable in a Java or Unicode identifier.*/ U_CAPI UBool U_EXPORT2 u_isIDIgnorable(UChar32 c) { if(c<=0x9f) { return u_isISOControl(c) && !IS_THAT_ASCII_CONTROL_SPACE(c); } else { uint32_t props; GET_PROPS(c, props); return (UBool)(GET_CATEGORY(props)==U_FORMAT_CHAR); } }
//---------------------------------------------------------------------------------------- // // stripRules Return a rules string without unnecessary // characters. // //---------------------------------------------------------------------------------------- UnicodeString RBBIRuleScanner::stripRules(const UnicodeString &rules) { UnicodeString strippedRules; int rulesLength = rules.length(); for (int idx = 0; idx < rulesLength; ) { UChar ch = rules[idx++]; if (ch == chPound) { while (idx < rulesLength && ch != chCR && ch != chLF && ch != chNEL) { ch = rules[idx++]; } } if (!u_isISOControl(ch)) { strippedRules.append(ch); } } // strippedRules = strippedRules.unescape(); return strippedRules; }
bool IsInterchangeValid(const char32 ch) { return IsValidCodepoint(ch) && !(ch >= 0xFDD0 && ch <= 0xFDEF) && // Noncharacters. !(ch >= 0xFFFE && ch <= 0xFFFF) && !(ch >= 0x1FFFE && ch <= 0x1FFFF) && !(ch >= 0x2FFFE && ch <= 0x2FFFF) && !(ch >= 0x3FFFE && ch <= 0x3FFFF) && !(ch >= 0x4FFFE && ch <= 0x4FFFF) && !(ch >= 0x5FFFE && ch <= 0x5FFFF) && !(ch >= 0x6FFFE && ch <= 0x6FFFF) && !(ch >= 0x7FFFE && ch <= 0x7FFFF) && !(ch >= 0x8FFFE && ch <= 0x8FFFF) && !(ch >= 0x9FFFE && ch <= 0x9FFFF) && !(ch >= 0xAFFFE && ch <= 0xAFFFF) && !(ch >= 0xBFFFE && ch <= 0xBFFFF) && !(ch >= 0xCFFFE && ch <= 0xCFFFF) && !(ch >= 0xDFFFE && ch <= 0xDFFFF) && !(ch >= 0xEFFFE && ch <= 0xEFFFF) && !(ch >= 0xFFFFE && ch <= 0xFFFFF) && !(ch >= 0x10FFFE && ch <= 0x10FFFF) && (!u_isISOControl(static_cast<UChar32>(ch)) || ch == '\n' || ch == '\f' || ch == '\t' || ch == '\r'); }
bool IsInterchangeValid7BitAscii(const char32 ch) { return IsValidCodepoint(ch) && ch <= 128 && (!u_isISOControl(static_cast<UChar32>(ch)) || ch == '\n' || ch == '\f' || ch == '\t' || ch == '\r'); }