Ejemplo n.º 1
0
/**
 * Parse a Unicode identifier from the given string at the given
 * position.  Return the identifier, or an empty string if there
 * is no identifier.
 * @param str the string to parse
 * @param pos INPUT-OUPUT parameter.  On INPUT, pos is the
 * first character to examine.  It must be less than str.length(),
 * and it must not point to a whitespace character.  That is, must
 * have pos < str.length().  On
 * OUTPUT, the position after the last parsed character.
 * @return the Unicode identifier, or an empty string if there is
 * no valid identifier at pos.
 */
UnicodeString ICU_Utility::parseUnicodeIdentifier(const UnicodeString& str, int32_t& pos) {
    // assert(pos < str.length());
    UnicodeString buf;
    int p = pos;
    while (p < str.length()) {
        UChar32 ch = str.char32At(p);
        if (buf.length() == 0) {
            if (u_isIDStart(ch)) {
                buf.append(ch);
            } else {
                buf.truncate(0);
                return buf;
            }
        } else {
            if (u_isIDPart(ch)) {
                buf.append(ch);
            } else {
                break;
            }
        }
        p += U16_LENGTH(ch);
    }
    pos = p;
    return buf;
}
Ejemplo n.º 2
0
//
// RBBISymbolTable::parseReference   This function from the abstract symbol table interface
//                                   looks for a $variable name in the source text.
//                                   It does not look it up, only scans for it.
//                                   It is used by the UnicodeSet parser.
//
//                                   This implementation is lifted pretty much verbatim
//                                   from the rules based transliterator implementation.
//                                   I didn't see an obvious way of sharing it.
//
UnicodeString   RBBISymbolTable::parseReference(const UnicodeString& text,
                                                ParsePosition& pos, int32_t limit) const
{
    int32_t start = pos.getIndex();
    int32_t i = start;
    UnicodeString result;
    while (i < limit) {
        UChar c = text.charAt(i);
        if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) {
            break;
        }
        ++i;
    }
    if (i == start) { // No valid name chars
        return result; // Indicate failure with empty string
    }
    pos.setIndex(i);
    text.extractBetween(start, i, result);
    return result;
}
Ejemplo n.º 3
0
        bool IsIdStart(codepoint_t ch)
        {
#ifdef HAS_REAL_ICU
            if (u_isIDStart(ch))
            {
                return true;
            }
#endif
            // Following codepoints are treated as part of ID_Start
            // for backwards compatibility as per section 2.5 of the Unicode 8 spec
            // See http://www.unicode.org/reports/tr31/tr31-23.html#Backward_Compatibility
            // The exact list is in PropList.txt in the Unicode database
            switch (ch)
            {
            case 0x2118: return true; // SCRIPT CAPITAL P
            case 0x212E: return true; // ESTIMATED SYMBOL
            case 0x309B: return true; // KATAKANA-HIRAGANA VOICED SOUND MARK
            case 0x309C: return true; // KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
            default: return false;
            }
        }
static jboolean Character_isUnicodeIdentifierStartImpl(JNIEnv*, jclass, jint codePoint) {
    return u_isIDStart(codePoint);
}
Ejemplo n.º 5
0
//static jboolean Character_isUnicodeIdentifierStartImpl(JNIEnv*, jclass, jint codePoint) {
JNIEXPORT jboolean JNICALL
Java_java_lang_Character_isUnicodeIdentifierStartImpl(JNIEnv*, jclass, jint codePoint) {
    return u_isIDStart(codePoint);
}
Ejemplo n.º 6
0
jboolean fastiva_vm_Character_C$__isUnicodeIdentifierStartImpl(jint codePoint) {
    return u_isIDStart(codePoint);
}