Esempio n. 1
0
U_CAPI const UChar * U_EXPORT2
u_strSkipWhiteSpace(const UChar *s, int32_t length) {
    int32_t i = 0, toReturn = 0;
    UChar32 c = 0;
    if(s == NULL) {
        return NULL;
    }
    if(length == 0) {
        return s;
    }
    if(length > 0) {
        for(;;) {
            if(i >= length) {
                break;
            }
            toReturn = i;
            U16_NEXT(s, i, length, c);
            if(!(c == 0x20 || u_isUWhiteSpace(c))) {
                break;
            }
        }
    } else {
        for(;;) {
            toReturn = i;
            U16_NEXT(s, i, length, c);
            if(!(c == 0x20 || u_isUWhiteSpace(c)) || c == 0) {
                break;
            }
        }
    }
    return s+toReturn;
}
Esempio n. 2
0
static float *getFloatArray(const UnicodeString &numbers, int32_t &arraySize)
{
    int32_t offset = -1;

    arraySize = 1;
    while((offset = numbers.indexOf(CH_COMMA, offset + 1)) >= 0) {
        arraySize += 1;
    }

    float *array = NEW_ARRAY(float, arraySize);
    char number[32];
    le_int32 count = 0;
    le_int32 start = 0, end = 0;
    le_int32 len = 0;

    // trim leading whitespace
    while(u_isUWhiteSpace(numbers[start])) {
        start += 1;
    }

    while((end = numbers.indexOf(CH_COMMA, start)) >= 0) {
        len = numbers.extract(start, end - start, number, ARRAY_SIZE(number), US_INV);
        number[len] = '\0';
        start = end + 1;

        sscanf(number, "%f", &array[count++]);

        // trim whiteapce following the comma
        while(u_isUWhiteSpace(numbers[start])) {
            start += 1;
        }
    }

    while(u_isUWhiteSpace(numbers[start])) {
        start += 1;
    }

    // trim trailing whitespace
    end = numbers.length();
    while(u_isUWhiteSpace(numbers[end - 1])) {
        end -= 1;
    }

    len = numbers.extract(start, end - start, number, ARRAY_SIZE(number), US_INV);
    number[len] = '\0';
    sscanf(number, "%f", &array[count]);

    return array;
}
Esempio n. 3
0
ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) {
    resetAll();
    if (U_FAILURE(status)) {
        return *this;
    }
    UnicodeString oneScriptName;
    for (int32_t i=0; i<scriptString.length();) {
        UChar32 c = scriptString.char32At(i);
        i = scriptString.moveIndex32(i, 1);
        if (!u_isUWhiteSpace(c)) {
            oneScriptName.append(c);
            if (i < scriptString.length()) {
                continue;
            }
        }
        if (oneScriptName.length() > 0) {
            char buf[40];
            oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV);
            buf[sizeof(buf)-1] = 0;
            int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf);
            if (sc == UCHAR_INVALID_CODE) {
                status = U_ILLEGAL_ARGUMENT_ERROR;
            } else {
                this->set((UScriptCode)sc, status);
            }
            if (U_FAILURE(status)) {
                return *this;
            }
            oneScriptName.remove();
        }
    }
    return *this;
}
Esempio n. 4
0
U_CAPI const UChar * U_EXPORT2
u_strTrailingWhiteSpaceStart(const UChar *s, int32_t length) {
    int32_t i = 0, toReturn = 0;
    UChar32 c = 0;
    
    if(s == NULL) {
        return NULL;
    }
    if(length == 0) {
        return s;
    }
    
    if(length < 0) {
        length = u_strlen(s);
    }
    
    i = length;
    for(;;) {
        toReturn = i;
        if(i <= 0) {
            break;
        }
        U16_PREV(s, 0, i, c);
        if(!(c == 0x20 || u_isUWhiteSpace(c))) {
            break;
        }
    }
    
    return s+toReturn;
}
Esempio n. 5
0
Validator::CharClass ValidateGrapheme::UnicodeToCharClass(char32 ch) const {
  if (IsVedicAccent(ch)) return CharClass::kVedicMark;
  // The ZeroWidth[Non]Joiner characters are mapped to kCombiner as they
  // always combine with the previous character.
  if (u_hasBinaryProperty(ch, UCHAR_GRAPHEME_LINK)) return CharClass::kVirama;
  if (u_isUWhiteSpace(ch)) return CharClass::kWhitespace;
  int char_type = u_charType(ch);
  if (char_type == U_NON_SPACING_MARK || char_type == U_ENCLOSING_MARK ||
      char_type == U_COMBINING_SPACING_MARK || ch == kZeroWidthNonJoiner ||
      ch == kZeroWidthJoiner)
    return CharClass::kCombiner;
  return CharClass::kOther;
}
Esempio n. 6
0
static void countLeadingSpaces(const CString& utf8String, int32_t& pointerOffset, int32_t& characterOffset)
{
    pointerOffset = 0;
    characterOffset = 0;
    const char* stringData = utf8String.data();
    UChar32 character = 0;
    while (static_cast<unsigned>(pointerOffset) < utf8String.length()) {
        int32_t nextPointerOffset = pointerOffset;
        U8_NEXT(stringData, nextPointerOffset, static_cast<int32_t>(utf8String.length()), character);

        if (character < 0 || !u_isUWhiteSpace(character))
            return;

        pointerOffset = nextPointerOffset;
        characterOffset++;
    }
}
Esempio n. 7
0
bool GetRawBytesFromHexUnicodeText(std::vector<char>& cs, const std::vector<ucs4_t>& ucs)
{
	std::vector<int> tmp_hex;

	BOOST_FOREACH(ucs4_t u, ucs)
	{
		if (u_isUWhiteSpace(u) && tmp_hex.size()%2==0)
			continue;
		if (!isxdigit(u))
			return false;

		int hex = (u > ucs4_t('9'))? (u | 0x20) - 'a' + 10: u - '0';
		tmp_hex.push_back(hex);
	}

	if (tmp_hex.empty() || tmp_hex.size() % 2 != 0)
		return false;

	for (size_t i=0; i<tmp_hex.size(); i+=2)
		cs.push_back(char((tmp_hex[i]<<4) | tmp_hex[i+1]));

	return true;
}
Esempio n. 8
0
bool IsWhitespace(const char32 ch) {
  ASSERT_HOST_MSG(IsValidCodepoint(ch),
                  "Invalid Unicode codepoint: 0x%x\n", ch);
  return u_isUWhiteSpace(static_cast<UChar32>(ch));
}
Esempio n. 9
0
/*
 * imp: common/uchar.c
 * hdr: common/unicode/uchar.h
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isUWhiteSpace_4_0(UChar32 c)
{
    return u_isUWhiteSpace(c);
}
Esempio n. 10
0
 bool IsWhitespace(codepoint_t ch)
 {
     return u_isUWhiteSpace(ch) == 1;
 }