/** * Parse a Unicode identifier from the given string at the given * position. Return the identifier, or an empty string if there * is no identifier. * @param str the string to parse * @param pos INPUT-OUPUT parameter. On INPUT, pos is the * first character to examine. It must be less than str.length(), * and it must not point to a whitespace character. That is, must * have pos < str.length(). On * OUTPUT, the position after the last parsed character. * @return the Unicode identifier, or an empty string if there is * no valid identifier at pos. */ UnicodeString ICU_Utility::parseUnicodeIdentifier(const UnicodeString& str, int32_t& pos) { // assert(pos < str.length()); UnicodeString buf; int p = pos; while (p < str.length()) { UChar32 ch = str.char32At(p); if (buf.length() == 0) { if (u_isIDStart(ch)) { buf.append(ch); } else { buf.truncate(0); return buf; } } else { if (u_isIDPart(ch)) { buf.append(ch); } else { break; } } p += U16_LENGTH(ch); } pos = p; return buf; }
// // RBBISymbolTable::parseReference This function from the abstract symbol table interface // looks for a $variable name in the source text. // It does not look it up, only scans for it. // It is used by the UnicodeSet parser. // // This implementation is lifted pretty much verbatim // from the rules based transliterator implementation. // I didn't see an obvious way of sharing it. // UnicodeString RBBISymbolTable::parseReference(const UnicodeString& text, ParsePosition& pos, int32_t limit) const { int32_t start = pos.getIndex(); int32_t i = start; UnicodeString result; while (i < limit) { UChar c = text.charAt(i); if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) { break; } ++i; } if (i == start) { // No valid name chars return result; // Indicate failure with empty string } pos.setIndex(i); text.extractBetween(start, i, result); return result; }
bool IsIdStart(codepoint_t ch) { #ifdef HAS_REAL_ICU if (u_isIDStart(ch)) { return true; } #endif // Following codepoints are treated as part of ID_Start // for backwards compatibility as per section 2.5 of the Unicode 8 spec // See http://www.unicode.org/reports/tr31/tr31-23.html#Backward_Compatibility // The exact list is in PropList.txt in the Unicode database switch (ch) { case 0x2118: return true; // SCRIPT CAPITAL P case 0x212E: return true; // ESTIMATED SYMBOL case 0x309B: return true; // KATAKANA-HIRAGANA VOICED SOUND MARK case 0x309C: return true; // KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK default: return false; } }
static jboolean Character_isUnicodeIdentifierStartImpl(JNIEnv*, jclass, jint codePoint) { return u_isIDStart(codePoint); }
//static jboolean Character_isUnicodeIdentifierStartImpl(JNIEnv*, jclass, jint codePoint) { JNIEXPORT jboolean JNICALL Java_java_lang_Character_isUnicodeIdentifierStartImpl(JNIEnv*, jclass, jint codePoint) { return u_isIDStart(codePoint); }
jboolean fastiva_vm_Character_C$__isUnicodeIdentifierStartImpl(jint codePoint) { return u_isIDStart(codePoint); }