int findNextWordFromIndex(StringView text, int position, bool forward) { TextBreakIterator* it = wordBreakIterator(text); if (forward) { position = textBreakFollowing(it, position); while (position != TextBreakDone) { // We stop searching when the character preceeding the break is alphanumeric. if (static_cast<unsigned>(position) < text.length() && u_isalnum(text[position - 1])) return position; position = textBreakFollowing(it, position); } return text.length(); } else { position = textBreakPreceding(it, position); while (position != TextBreakDone) { // We stop searching when the character following the break is alphanumeric. if (position && u_isalnum(text[position])) return position; position = textBreakPreceding(it, position); } return 0; } }
int findNextWordFromIndex(const UChar* chars, int len, int position, bool forward) { UBreakIterator* it = wordBreakIterator(chars, len); if (forward) { position = ubrk_following(it, position); while (position != UBRK_DONE) { // We stop searching when the character preceeding the break // is alphanumeric. if (position < len && u_isalnum(chars[position - 1])) return position; position = ubrk_following(it, position); } return len; } else { position = ubrk_preceding(it, position); while (position != UBRK_DONE) { // We stop searching when the character following the break // is alphanumeric. if (position > 0 && u_isalnum(chars[position])) return position; position = ubrk_preceding(it, position); } return 0; } }
static int u_iscclass(PARROT_INTERP, UINTVAL codepoint, INTVAL flags) { ASSERT_ARGS(u_iscclass) #if PARROT_HAS_ICU UNUSED(interp); /* XXX which one return u_charDigitValue(codepoint); */ if ((flags & enum_cclass_uppercase) && u_isupper(codepoint)) return 1; if ((flags & enum_cclass_lowercase) && u_islower(codepoint)) return 1; if ((flags & enum_cclass_alphabetic) && u_isalpha(codepoint)) return 1; if ((flags & enum_cclass_numeric) && u_isdigit(codepoint)) return 1; if ((flags & enum_cclass_hexadecimal) && u_isxdigit(codepoint)) return 1; if ((flags & enum_cclass_whitespace) && u_isspace(codepoint)) return 1; if ((flags & enum_cclass_printing) && u_isprint(codepoint)) return 1; if ((flags & enum_cclass_graphical) && u_isgraph(codepoint)) return 1; if ((flags & enum_cclass_blank) && u_isblank(codepoint)) return 1; if ((flags & enum_cclass_control) && u_iscntrl(codepoint)) return 1; if ((flags & enum_cclass_alphanumeric) && u_isalnum(codepoint)) return 1; if ((flags & enum_cclass_word) && (u_isalnum(codepoint) || codepoint == '_')) return 1; if ((flags & enum_cclass_newline) && (codepoint == 0x2028 || codepoint == 0x2029 || u_hasBinaryProperty(codepoint, UCHAR_LINE_BREAK))) return 1; return 0; #else if (codepoint < 256) return (Parrot_iso_8859_1_typetable[codepoint] & flags) ? 1 : 0; if (flags == enum_cclass_any) return 1; /* All codepoints from u+0100 to u+02af are alphabetic, so we * cheat on the WORD and ALPHABETIC properties to include these * (and incorrectly exclude all others). This is a stopgap until * ICU is everywhere, or we have better non-ICU unicode support. */ if (flags == enum_cclass_word || flags == enum_cclass_alphabetic) return (codepoint < 0x2b0); if (flags & enum_cclass_whitespace) { /* from http://www.unicode.org/Public/UNIDATA/PropList.txt */ switch (codepoint) { case 0x1680: case 0x180e: case 0x2000: case 0x2001: case 0x2002: case 0x2003: case 0x2004: case 0x2005: case 0x2006: case 0x2007: case 0x2008: case 0x2009: case 0x200a: case 0x2028: case 0x2029: case 0x202f: case 0x205f: case 0x3000: return 1; default: break; } } if (flags & enum_cclass_numeric) { /* from http://www.unicode.org/Public/UNIDATA/UnicodeData.txt */ if (codepoint >= 0x0660 && codepoint <= 0x0669) return 1; if (codepoint >= 0x06f0 && codepoint <= 0x06f9) return 1; if (codepoint >= 0x07c0 && codepoint <= 0x07c9) return 1; if (codepoint >= 0x0966 && codepoint <= 0x096f) return 1; if (codepoint >= 0x09e6 && codepoint <= 0x09ef) return 1; if (codepoint >= 0x0a66 && codepoint <= 0x0a6f) return 1; if (codepoint >= 0x0ae6 && codepoint <= 0x0aef) return 1; if (codepoint >= 0x0b66 && codepoint <= 0x0b6f) return 1; if (codepoint >= 0x0be6 && codepoint <= 0x0bef) return 1; if (codepoint >= 0x0c66 && codepoint <= 0x0c6f) return 1; if (codepoint >= 0x0ce6 && codepoint <= 0x0cef) return 1; if (codepoint >= 0x0d66 && codepoint <= 0x0d6f) return 1; if (codepoint >= 0x0e50 && codepoint <= 0x0e59) return 1; if (codepoint >= 0x0ed0 && codepoint <= 0x0ed9) return 1; if (codepoint >= 0x0f20 && codepoint <= 0x0f29) return 1; if (codepoint >= 0x1040 && codepoint <= 0x1049) return 1; if (codepoint >= 0x17e0 && codepoint <= 0x17e9) return 1; if (codepoint >= 0x1810 && codepoint <= 0x1819) return 1; if (codepoint >= 0x1946 && codepoint <= 0x194f) return 1; if (codepoint >= 0x19d0 && codepoint <= 0x19d9) return 1; if (codepoint >= 0x1b50 && codepoint <= 0x1b59) return 1; if (codepoint >= 0xff10 && codepoint <= 0xff19) return 1; } if (flags & enum_cclass_newline) { /* from http://www.unicode.org/Public/UNIDATA/extracted/DerivedLineBreak.txt * Line_Break=Mandatory_Break*/ if (codepoint == 0x2028 || codepoint == 0x2029) return 1; } if (flags & ~(enum_cclass_whitespace | enum_cclass_numeric | enum_cclass_newline)) Parrot_ex_throw_from_c_noargs(interp, EXCEPTION_LIBRARY_ERROR, "no ICU lib loaded"); return 0; #endif }
static jboolean Character_isLetterOrDigitImpl(JNIEnv*, jclass, jint codePoint) { return u_isalnum(codePoint); }
//static jboolean Character_isLetterOrDigitImpl(JNIEnv*, jclass, jint codePoint) { JNIEXPORT jboolean JNICALL Java_java_lang_Character_isLetterOrDigitImpl(JNIEnv*, jclass, jint codePoint) { return u_isalnum(codePoint); }
jboolean fastiva_vm_Character_C$__isLetterOrDigitImpl(jint codePoint) { return u_isalnum(codePoint); }
bool IsAlphaNumeric(wxChar ch) { return u_isalnum(ch); }
// Determines whether the specified code point is an alphanumeric character // (letter or digit). // True for characters with general categories // "L" (letters) and "Nd" (decimal digit numbers). bool BUnicodeChar::IsAlNum(uint32 c) { BUnicodeChar(); return u_isalnum(c); }