U_CAPI const UChar * U_EXPORT2 u_strSkipWhiteSpace(const UChar *s, int32_t length) { int32_t i = 0, toReturn = 0; UChar32 c = 0; if(s == NULL) { return NULL; } if(length == 0) { return s; } if(length > 0) { for(;;) { if(i >= length) { break; } toReturn = i; U16_NEXT(s, i, length, c); if(!(c == 0x20 || u_isUWhiteSpace(c))) { break; } } } else { for(;;) { toReturn = i; U16_NEXT(s, i, length, c); if(!(c == 0x20 || u_isUWhiteSpace(c)) || c == 0) { break; } } } return s+toReturn; }
static float *getFloatArray(const UnicodeString &numbers, int32_t &arraySize) { int32_t offset = -1; arraySize = 1; while((offset = numbers.indexOf(CH_COMMA, offset + 1)) >= 0) { arraySize += 1; } float *array = NEW_ARRAY(float, arraySize); char number[32]; le_int32 count = 0; le_int32 start = 0, end = 0; le_int32 len = 0; // trim leading whitespace while(u_isUWhiteSpace(numbers[start])) { start += 1; } while((end = numbers.indexOf(CH_COMMA, start)) >= 0) { len = numbers.extract(start, end - start, number, ARRAY_SIZE(number), US_INV); number[len] = '\0'; start = end + 1; sscanf(number, "%f", &array[count++]); // trim whiteapce following the comma while(u_isUWhiteSpace(numbers[start])) { start += 1; } } while(u_isUWhiteSpace(numbers[start])) { start += 1; } // trim trailing whitespace end = numbers.length(); while(u_isUWhiteSpace(numbers[end - 1])) { end -= 1; } len = numbers.extract(start, end - start, number, ARRAY_SIZE(number), US_INV); number[len] = '\0'; sscanf(number, "%f", &array[count]); return array; }
ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) { resetAll(); if (U_FAILURE(status)) { return *this; } UnicodeString oneScriptName; for (int32_t i=0; i<scriptString.length();) { UChar32 c = scriptString.char32At(i); i = scriptString.moveIndex32(i, 1); if (!u_isUWhiteSpace(c)) { oneScriptName.append(c); if (i < scriptString.length()) { continue; } } if (oneScriptName.length() > 0) { char buf[40]; oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV); buf[sizeof(buf)-1] = 0; int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf); if (sc == UCHAR_INVALID_CODE) { status = U_ILLEGAL_ARGUMENT_ERROR; } else { this->set((UScriptCode)sc, status); } if (U_FAILURE(status)) { return *this; } oneScriptName.remove(); } } return *this; }
U_CAPI const UChar * U_EXPORT2 u_strTrailingWhiteSpaceStart(const UChar *s, int32_t length) { int32_t i = 0, toReturn = 0; UChar32 c = 0; if(s == NULL) { return NULL; } if(length == 0) { return s; } if(length < 0) { length = u_strlen(s); } i = length; for(;;) { toReturn = i; if(i <= 0) { break; } U16_PREV(s, 0, i, c); if(!(c == 0x20 || u_isUWhiteSpace(c))) { break; } } return s+toReturn; }
Validator::CharClass ValidateGrapheme::UnicodeToCharClass(char32 ch) const { if (IsVedicAccent(ch)) return CharClass::kVedicMark; // The ZeroWidth[Non]Joiner characters are mapped to kCombiner as they // always combine with the previous character. if (u_hasBinaryProperty(ch, UCHAR_GRAPHEME_LINK)) return CharClass::kVirama; if (u_isUWhiteSpace(ch)) return CharClass::kWhitespace; int char_type = u_charType(ch); if (char_type == U_NON_SPACING_MARK || char_type == U_ENCLOSING_MARK || char_type == U_COMBINING_SPACING_MARK || ch == kZeroWidthNonJoiner || ch == kZeroWidthJoiner) return CharClass::kCombiner; return CharClass::kOther; }
static void countLeadingSpaces(const CString& utf8String, int32_t& pointerOffset, int32_t& characterOffset) { pointerOffset = 0; characterOffset = 0; const char* stringData = utf8String.data(); UChar32 character = 0; while (static_cast<unsigned>(pointerOffset) < utf8String.length()) { int32_t nextPointerOffset = pointerOffset; U8_NEXT(stringData, nextPointerOffset, static_cast<int32_t>(utf8String.length()), character); if (character < 0 || !u_isUWhiteSpace(character)) return; pointerOffset = nextPointerOffset; characterOffset++; } }
bool GetRawBytesFromHexUnicodeText(std::vector<char>& cs, const std::vector<ucs4_t>& ucs) { std::vector<int> tmp_hex; BOOST_FOREACH(ucs4_t u, ucs) { if (u_isUWhiteSpace(u) && tmp_hex.size()%2==0) continue; if (!isxdigit(u)) return false; int hex = (u > ucs4_t('9'))? (u | 0x20) - 'a' + 10: u - '0'; tmp_hex.push_back(hex); } if (tmp_hex.empty() || tmp_hex.size() % 2 != 0) return false; for (size_t i=0; i<tmp_hex.size(); i+=2) cs.push_back(char((tmp_hex[i]<<4) | tmp_hex[i+1])); return true; }
bool IsWhitespace(const char32 ch) { ASSERT_HOST_MSG(IsValidCodepoint(ch), "Invalid Unicode codepoint: 0x%x\n", ch); return u_isUWhiteSpace(static_cast<UChar32>(ch)); }
/* * imp: common/uchar.c * hdr: common/unicode/uchar.h * @stable ICU 2.0 */ U_CAPI UBool U_EXPORT2 u_isUWhiteSpace_4_0(UChar32 c) { return u_isUWhiteSpace(c); }
bool IsWhitespace(codepoint_t ch) { return u_isUWhiteSpace(ch) == 1; }