int findNextWordFromIndex(StringView text, int position, bool forward)
{
    TextBreakIterator* it = wordBreakIterator(text);

    if (forward) {
        position = textBreakFollowing(it, position);
        while (position != TextBreakDone) {
            // We stop searching when the character preceeding the break is alphanumeric.
            if (static_cast<unsigned>(position) < text.length() && u_isalnum(text[position - 1]))
                return position;

            position = textBreakFollowing(it, position);
        }

        return text.length();
    } else {
        position = textBreakPreceding(it, position);
        while (position != TextBreakDone) {
            // We stop searching when the character following the break is alphanumeric.
            if (position && u_isalnum(text[position]))
                return position;

            position = textBreakPreceding(it, position);
        }

        return 0;
    }
}
Esempio n. 2
0
int findNextWordFromIndex(const UChar* chars, int len, int position, bool forward)
{
    UBreakIterator* it = wordBreakIterator(chars, len);

    if (forward) {
        position = ubrk_following(it, position);
        while (position != UBRK_DONE) {
            // We stop searching when the character preceeding the break
            // is alphanumeric.
            if (position < len && u_isalnum(chars[position - 1]))
                return position;

            position = ubrk_following(it, position);
        }

        return len;
    } else {
        position = ubrk_preceding(it, position);
        while (position != UBRK_DONE) {
            // We stop searching when the character following the break
            // is alphanumeric.
            if (position > 0 && u_isalnum(chars[position]))
                return position;

            position = ubrk_preceding(it, position);
        }

        return 0;
    }
}
Esempio n. 3
0
static int
u_iscclass(PARROT_INTERP, UINTVAL codepoint, INTVAL flags)
{
    ASSERT_ARGS(u_iscclass)
#if PARROT_HAS_ICU
    UNUSED(interp);
            /* XXX which one
               return u_charDigitValue(codepoint);
               */
    if ((flags & enum_cclass_uppercase)    && u_isupper(codepoint))  return 1;
    if ((flags & enum_cclass_lowercase)    && u_islower(codepoint))  return 1;
    if ((flags & enum_cclass_alphabetic)   && u_isalpha(codepoint))  return 1;
    if ((flags & enum_cclass_numeric)      && u_isdigit(codepoint))  return 1;
    if ((flags & enum_cclass_hexadecimal)  && u_isxdigit(codepoint)) return 1;
    if ((flags & enum_cclass_whitespace)   && u_isspace(codepoint))  return 1;
    if ((flags & enum_cclass_printing)     && u_isprint(codepoint))  return 1;
    if ((flags & enum_cclass_graphical)    && u_isgraph(codepoint))  return 1;
    if ((flags & enum_cclass_blank)        && u_isblank(codepoint))  return 1;
    if ((flags & enum_cclass_control)      && u_iscntrl(codepoint))  return 1;
    if ((flags & enum_cclass_alphanumeric) && u_isalnum(codepoint))  return 1;
    if ((flags & enum_cclass_word)         &&
        (u_isalnum(codepoint) || codepoint == '_'))                  return 1;
    if ((flags & enum_cclass_newline)      &&
        (codepoint == 0x2028 || codepoint == 0x2029 ||
         u_hasBinaryProperty(codepoint, UCHAR_LINE_BREAK)))          return 1;

    return 0;
#else
    if (codepoint < 256)
        return (Parrot_iso_8859_1_typetable[codepoint] & flags) ? 1 : 0;

    if (flags == enum_cclass_any)
        return 1;

    /* All codepoints from u+0100 to u+02af are alphabetic, so we
     * cheat on the WORD and ALPHABETIC properties to include these
     * (and incorrectly exclude all others).  This is a stopgap until
     * ICU is everywhere, or we have better non-ICU unicode support. */
    if (flags == enum_cclass_word || flags == enum_cclass_alphabetic)
        return (codepoint < 0x2b0);

    if (flags & enum_cclass_whitespace) {
        /* from http://www.unicode.org/Public/UNIDATA/PropList.txt */
        switch (codepoint) {
          case 0x1680: case 0x180e: case 0x2000: case 0x2001:
          case 0x2002: case 0x2003: case 0x2004: case 0x2005:
          case 0x2006: case 0x2007: case 0x2008: case 0x2009:
          case 0x200a: case 0x2028: case 0x2029: case 0x202f:
          case 0x205f: case 0x3000:
            return 1;
          default:
            break;
        }
    }

    if (flags & enum_cclass_numeric) {
        /* from http://www.unicode.org/Public/UNIDATA/UnicodeData.txt */
        if (codepoint >= 0x0660 && codepoint <= 0x0669) return 1;
        if (codepoint >= 0x06f0 && codepoint <= 0x06f9) return 1;
        if (codepoint >= 0x07c0 && codepoint <= 0x07c9) return 1;
        if (codepoint >= 0x0966 && codepoint <= 0x096f) return 1;
        if (codepoint >= 0x09e6 && codepoint <= 0x09ef) return 1;
        if (codepoint >= 0x0a66 && codepoint <= 0x0a6f) return 1;
        if (codepoint >= 0x0ae6 && codepoint <= 0x0aef) return 1;
        if (codepoint >= 0x0b66 && codepoint <= 0x0b6f) return 1;
        if (codepoint >= 0x0be6 && codepoint <= 0x0bef) return 1;
        if (codepoint >= 0x0c66 && codepoint <= 0x0c6f) return 1;
        if (codepoint >= 0x0ce6 && codepoint <= 0x0cef) return 1;
        if (codepoint >= 0x0d66 && codepoint <= 0x0d6f) return 1;
        if (codepoint >= 0x0e50 && codepoint <= 0x0e59) return 1;
        if (codepoint >= 0x0ed0 && codepoint <= 0x0ed9) return 1;
        if (codepoint >= 0x0f20 && codepoint <= 0x0f29) return 1;
        if (codepoint >= 0x1040 && codepoint <= 0x1049) return 1;
        if (codepoint >= 0x17e0 && codepoint <= 0x17e9) return 1;
        if (codepoint >= 0x1810 && codepoint <= 0x1819) return 1;
        if (codepoint >= 0x1946 && codepoint <= 0x194f) return 1;
        if (codepoint >= 0x19d0 && codepoint <= 0x19d9) return 1;
        if (codepoint >= 0x1b50 && codepoint <= 0x1b59) return 1;
        if (codepoint >= 0xff10 && codepoint <= 0xff19) return 1;
    }

    if (flags & enum_cclass_newline) {
        /* from http://www.unicode.org/Public/UNIDATA/extracted/DerivedLineBreak.txt
         * Line_Break=Mandatory_Break*/
        if (codepoint == 0x2028 || codepoint == 0x2029) return 1;
    }

    if (flags & ~(enum_cclass_whitespace | enum_cclass_numeric | enum_cclass_newline))
        Parrot_ex_throw_from_c_noargs(interp, EXCEPTION_LIBRARY_ERROR,
            "no ICU lib loaded");

    return 0;
#endif
}
static jboolean Character_isLetterOrDigitImpl(JNIEnv*, jclass, jint codePoint) {
    return u_isalnum(codePoint);
}
Esempio n. 5
0
//static jboolean Character_isLetterOrDigitImpl(JNIEnv*, jclass, jint codePoint) {
JNIEXPORT jboolean JNICALL
Java_java_lang_Character_isLetterOrDigitImpl(JNIEnv*, jclass, jint codePoint) {
    return u_isalnum(codePoint);
}
Esempio n. 6
0
jboolean fastiva_vm_Character_C$__isLetterOrDigitImpl(jint codePoint) {
    return u_isalnum(codePoint);
}
Esempio n. 7
0
bool IsAlphaNumeric(wxChar ch)
{
	return u_isalnum(ch);
}
Esempio n. 8
0
// Determines whether the specified code point is an alphanumeric character
// (letter or digit).
// True for characters with general categories
// "L" (letters) and "Nd" (decimal digit numbers).
bool
BUnicodeChar::IsAlNum(uint32 c)
{
	BUnicodeChar();
	return u_isalnum(c);
}