Esempio n. 1
0
U_NAMESPACE_BEGIN

/**
 * Parse an integer at pos, either of the form \d+ or of the form
 * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex,
 * or octal format.
 * @param pos INPUT-OUTPUT parameter.  On input, the first
 * character to parse.  On output, the character after the last
 * parsed character.
 */
int32_t ICU_Utility::parseInteger(const UnicodeString& rule, int32_t& pos, int32_t limit) {
    int32_t count = 0;
    int32_t value = 0;
    int32_t p = pos;
    int8_t radix = 10;

    if (p < limit && rule.charAt(p) == 48 /*0*/) {
        if (p+1 < limit && (rule.charAt(p+1) == 0x78 /*x*/ || rule.charAt(p+1) == 0x58 /*X*/)) {
            p += 2;
            radix = 16;
        }
        else {
            p++;
            count = 1;
            radix = 8;
        }
    }

    while (p < limit) {
        int32_t d = u_digit(rule.charAt(p++), radix);
        if (d < 0) {
            --p;
            break;
        }
        ++count;
        int32_t v = (value * radix) + d;
        if (v <= value) {
            // If there are too many input digits, at some point
            // the value will go negative, e.g., if we have seen
            // "0x8000000" already and there is another '0', when
            // we parse the next 0 the value will go negative.
            return 0;
        }
        value = v;
    }
    if (count > 0) {
        pos = p;
    }
    return value;
}
Esempio n. 2
0
//
//   replaceCharRefs
//
//      replace the char entities &lt;  &amp; &#123; &#x12ab; etc. in a string
//       with the corresponding actual character.
//
void
UXMLParser::replaceCharRefs(UnicodeString &s, UErrorCode &status) {
    UnicodeString result;
    UnicodeString replacement;
    int     i;

    mAmps.reset(s);
    // See the initialization for the regex matcher mAmps.
    //    Which entity we've matched is determined by which capture group has content,
    //      which is flaged by start() of that group not being -1.
    while (mAmps.find()) {
        if (mAmps.start(1, status) != -1) {
            replacement.setTo((UChar)x_AMP);
        } else if (mAmps.start(2, status) != -1) {
            replacement.setTo((UChar)x_LT);
        } else if (mAmps.start(3, status) != -1) {
            replacement.setTo((UChar)x_GT);
        } else if (mAmps.start(4, status) != -1) {
            replacement.setTo((UChar)x_APOS);
        } else if (mAmps.start(5, status) != -1) {
            replacement.setTo((UChar)x_QUOT);
        } else if (mAmps.start(6, status) != -1) {
            UnicodeString hexString = mAmps.group(6, status);
            UChar32 val = 0;
            for (i=0; i<hexString.length(); i++) {
                val = (val << 4) + u_digit(hexString.charAt(i), 16);
            }
            // TODO:  some verification that the character is valid
            replacement.setTo(val);
        } else if (mAmps.start(7, status) != -1) {
            UnicodeString decimalString = mAmps.group(7, status);
            UChar32 val = 0;
            for (i=0; i<decimalString.length(); i++) {
                val = val*10 + u_digit(decimalString.charAt(i), 10);
            }
            // TODO:  some verification that the character is valid
            replacement.setTo(val);
        } else {
            // An unrecognized &entity;  Leave it alone.
            //  TODO:  check that it really looks like an entity, and is not some
            //         random & in the text.
            replacement = mAmps.group((int32_t)0, status);
        }
        mAmps.appendReplacement(result, replacement, status);
    }
    mAmps.appendTail(result);
    s = result;
}
static void setCharField(JNIEnv* env, jobject obj, const char* fieldName, const UnicodeString& value) {
    if (value.length() == 0) {
        return;
    }
    jfieldID fid = env->GetFieldID(JniConstants::localeDataClass, fieldName, "C");
    env->SetCharField(obj, fid, value.charAt(0));
}
Esempio n. 4
0
UBool
LocaleUtility::isFallbackOf(const UnicodeString& root, const UnicodeString& child)
{
    return child.indexOf(root) == 0 &&
      (child.length() == root.length() ||
       child.charAt(root.length()) == UNDERSCORE_CHAR);
}
Esempio n. 5
0
static UnicodeString parseHex(const UnicodeString &in) {
    // Convert a series of hex numbers in a Unicode String to a string with the
    // corresponding characters.
    // The conversion is _really_ annoying.  There must be some function to just do it.
    UnicodeString result;
    UChar32 cc = 0;
    for (int32_t i=0; i<in.length(); i++) {
        UChar c = in.charAt(i);
        if (c == 0x20) {   // Space
            if (cc > 0) {
               result.append(cc);
               cc = 0;
            }
        } else if (c>=0x30 && c<=0x39) {
            cc = (cc<<4) + (c - 0x30);
        } else if ((c>=0x41 && c<=0x46) || (c>=0x61 && c<=0x66)) {
            cc = (cc<<4) + (c & 0x0f)+9;
        }
        // else do something with bad input.
    }
    if (cc > 0) {
        result.append(cc);
    }
    return result;
}
// fixQuotes unescapes single quotes. Don''t -> Don't. Letter 'j' -> Letter j.
// Modifies s in place.
static void fixQuotes(UnicodeString& s) {
  QuoteState state = OUTSIDE;
  int32_t len = s.length();
  int32_t dest = 0;
  for (int32_t i = 0; i < len; ++i) {
    UChar ch = s.charAt(i);
    if (ch == u_apos) {
      if (state == INSIDE_EMPTY) {
        s.setCharAt(dest, ch);
        ++dest;
      }
    } else {
      s.setCharAt(dest, ch);
      ++dest;
    }

    // Update state
    switch (state) {
      case OUTSIDE:
        state = ch == u_apos ? INSIDE_EMPTY : OUTSIDE;
        break;
      case INSIDE_EMPTY:
      case INSIDE_FULL:
        state = ch == u_apos ? OUTSIDE : INSIDE_FULL;
        break;
      default:
        break;
    }
  }
  s.truncate(dest);
}
Esempio n. 7
0
void TextGroup::addRun(const UnicodeString &input, UBiDiDirection direction, int32_t start, int32_t end)
{
    std::string text;
    input.tempSubString(start, end - start).toUTF8String(text);
    printf("Hominlinx-->======TextGroup::addRun[%s]==== %d\n",text.c_str(), input.charAt(0) );

    runs_.emplace_back(text, script_, lang_, uciDirectionToHB(direction));
}
Esempio n. 8
0
 TestReplaceable (const UnicodeString& text, 
                  const UnicodeString& newStyles) {
     chars = text;
     UnicodeString s;
     for (int i = 0; i < text.length(); ++i) {
         if (i < newStyles.length()) {
             s.append(newStyles.charAt(i));
         } else {
             if (text.charAt(i) == NO_STYLE_MARK) {
                 s.append(NO_STYLE);
             } else {
                 s.append((UChar)(i + 0x0031));
             }
         }
     }
     this->styles = s;
 }
Esempio n. 9
0
/**
 * Parse a pattern string starting at offset pos.  Keywords are
 * matched case-insensitively.  Spaces may be skipped and may be
 * optional or required.  Integer values may be parsed, and if
 * they are, they will be returned in the given array.  If
 * successful, the offset of the next non-space character is
 * returned.  On failure, -1 is returned.
 * @param pattern must only contain lowercase characters, which
 * will match their uppercase equivalents as well.  A space
 * character matches one or more required spaces.  A '~' character
 * matches zero or more optional spaces.  A '#' character matches
 * an integer and stores it in parsedInts, which the caller must
 * ensure has enough capacity.
 * @param parsedInts array to receive parsed integers.  Caller
 * must ensure that parsedInts.length is >= the number of '#'
 * signs in 'pattern'.
 * @return the position after the last character parsed, or -1 if
 * the parse failed
 */
int32_t ICU_Utility::parsePattern(const UnicodeString& rule, int32_t pos, int32_t limit,
                              const UnicodeString& pattern, int32_t* parsedInts) {
    // TODO Update this to handle surrogates
    int32_t p;
    int32_t intCount = 0; // number of integers parsed
    for (int32_t i=0; i<pattern.length(); ++i) {
        UChar cpat = pattern.charAt(i);
        UChar c;
        switch (cpat) {
        case 32 /*' '*/:
            if (pos >= limit) {
                return -1;
            }
            c = rule.charAt(pos++);
            if (!PatternProps::isWhiteSpace(c)) {
                return -1;
            }
            // FALL THROUGH to skipWhitespace
            U_FALLTHROUGH;
        case 126 /*'~'*/:
            pos = skipWhitespace(rule, pos);
            break;
        case 35 /*'#'*/:
            p = pos;
            parsedInts[intCount++] = parseInteger(rule, p, limit);
            if (p == pos) {
                // Syntax error; failed to parse integer
                return -1;
            }
            pos = p;
            break;
        default:
            if (pos >= limit) {
                return -1;
            }
            c = (UChar) u_tolower(rule.charAt(pos++));
            if (c != cpat) {
                return -1;
            }
            break;
        }
    }
    return pos;
}
Esempio n. 10
0
UBool
SelectFormat::checkValidKeyword(const UnicodeString& argKeyword ) const{
    int32_t len = argKeyword.length();
    if (len < 1){
        return FALSE;
    }
    CharacterClass type = classifyCharacter(argKeyword.charAt(0));
    if( type != tStartKeyword ){
        return FALSE;
    }

    for (int32_t i = 0; i < argKeyword.length(); ++i) {
        type = classifyCharacter(argKeyword.charAt(i));
        if( type != tStartKeyword && type != tContinueKeyword ){
            return FALSE;
        }
    }
    return TRUE;
}
Esempio n. 11
0
int32_t
MessagePattern::parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit) {
    // If the identifier contains only ASCII digits, then it is an argument _number_
    // and must not have leading zeros (except "0" itself).
    // Otherwise it is an argument _name_.
    if(start>=limit) {
        return UMSGPAT_ARG_NAME_NOT_VALID;
    }
    int32_t number;
    // Defer numeric errors until we know there are only digits.
    UBool badNumber;
    UChar c=s.charAt(start++);
    if(c==0x30) {
        if(start==limit) {
            return 0;
        } else {
            number=0;
            badNumber=TRUE;  // leading zero
        }
    } else if(0x31<=c && c<=0x39) {
        number=c-0x30;
        badNumber=FALSE;
    } else {
        return UMSGPAT_ARG_NAME_NOT_NUMBER;
    }
    while(start<limit) {
        c=s.charAt(start++);
        if(0x30<=c && c<=0x39) {
            if(number>=INT32_MAX/10) {
                badNumber=TRUE;  // overflow
            }
            number=number*10+(c-0x30);
        } else {
            return UMSGPAT_ARG_NAME_NOT_NUMBER;
        }
    }
    // There are only ASCII digits.
    if(badNumber) {
        return UMSGPAT_ARG_NAME_NOT_VALID;
    } else {
        return number;
    }
}
Esempio n. 12
0
U_NAMESPACE_BEGIN

int32_t ICU_Utility::parseInteger(const UnicodeString& rule, int32_t& pos, int32_t limit) {
    int32_t count = 0;
    int32_t value = 0;
    int32_t p = pos;
    int8_t radix = 10;

    if (p < limit && rule.charAt(p) == 48 /*0*/) {
        if (p+1 < limit && (rule.charAt(p+1) == 0x78 /*x*/ || rule.charAt(p+1) == 0x58 /*X*/)) {
            p += 2;
            radix = 16;
        }
        else {
            p++;
            count = 1;
            radix = 8;
        }
    }

    while (p < limit) {
        int32_t d = u_digit(rule.charAt(p++), radix);
        if (d < 0) {
            --p;
            break;
        }
        ++count;
        int32_t v = (value * radix) + d;
        if (v <= value) {
            // If there are too many input digits, at some point
            // the value will go negative, e.g., if we have seen
            // "0x8000000" already and there is another '0', when
            // we parse the next 0 the value will go negative.
            return 0;
        }
        value = v;
    }
    if (count > 0) {
        pos = p;
    }
    return value;
}
UBool checkEqual(const PluralRules &test, char *result, int32_t max) {
    UnicodeString key;
    UBool isEqual = TRUE;
    for (int32_t i=0; i<max; ++i) {
        key= test.select(i);
        if ( key.charAt(0)!=result[i] ) {
            isEqual = FALSE;
        }
    }
    return isEqual;
}
Esempio n. 14
0
U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth)
{
    int i;
    for (i=0; i<s.length(); i++) {
        RBBIDebugPrintf("%c", s.charAt(i));
        // putc(s.charAt(i), stdout);
    }
    for (i=s.length(); i<minWidth; i++) {
        RBBIDebugPrintf(" ");
    }
}
Esempio n. 15
0
void UObjectTest::TestMFCCompatibility() {
#if U_HAVE_DEBUG_LOCATION_NEW
    /* Make sure that it compiles with MFC's debuggable new usage. */
    UnicodeString *str = new(__FILE__, __LINE__) UnicodeString();
    str->append((UChar)0x0040); // Is it usable?
    if(str->charAt(0) != 0x0040) {
        errln("debug new doesn't work.");
    }
    UnicodeString::operator delete(str, __FILE__, __LINE__);
#endif
}
Esempio n. 16
0
void
PluralRules::getNextLocale(const UnicodeString& localeData, int32_t* curIndex, UnicodeString& localeName) {
    int32_t i=*curIndex;

    localeName.remove();
    while (i< localeData.length()) {
       if ( (localeData.charAt(i)!= SPACE) && (localeData.charAt(i)!= COMMA) ) {
           break;
       }
       i++;
    }

    while (i< localeData.length()) {
       if ( (localeData.charAt(i)== SPACE) || (localeData.charAt(i)== COMMA) ) {
           break;
       }
       localeName+=localeData.charAt(i++);
    }
    *curIndex=i;
}
Esempio n. 17
0
// Computes the restriction level of a string, according to UTS 39 section 5.2.
URestrictionLevel SpoofImpl::getRestrictionLevel(const UnicodeString& input, UErrorCode& status) const {
    // Section 5.2 step 1:
    if (!fAllowedCharsSet->containsAll(input)) {
        return USPOOF_UNRESTRICTIVE;
    }

    // Section 5.2 step 2
    // Java use a static UnicodeSet for this test.  In C++, avoid the static variable
    // and just do a simple for loop.
    UBool allASCII = TRUE;
    for (int32_t i=0, length=input.length(); i<length; i++) {
        if (input.charAt(i) > 0x7f) {
            allASCII = FALSE;
            break;
        }
    }
    if (allASCII) {
        return USPOOF_ASCII;
    }

    // Section 5.2 steps 3:
    ScriptSet resolvedScriptSet;
    getResolvedScriptSet(input, resolvedScriptSet, status);
    if (U_FAILURE(status)) { return USPOOF_UNRESTRICTIVE; }

    // Section 5.2 step 4:
    if (!resolvedScriptSet.isEmpty()) {
        return USPOOF_SINGLE_SCRIPT_RESTRICTIVE;
    }

    // Section 5.2 step 5:
    ScriptSet resolvedNoLatn;
    getResolvedScriptSetWithout(input, USCRIPT_LATIN, resolvedNoLatn, status);
    if (U_FAILURE(status)) { return USPOOF_UNRESTRICTIVE; }

    // Section 5.2 step 6:
    if (resolvedNoLatn.test(USCRIPT_HAN_WITH_BOPOMOFO, status)
            || resolvedNoLatn.test(USCRIPT_JAPANESE, status)
            || resolvedNoLatn.test(USCRIPT_KOREAN, status)) {
        return USPOOF_HIGHLY_RESTRICTIVE;
    }

    // Section 5.2 step 7:
    if (!resolvedNoLatn.isEmpty()
            && !resolvedNoLatn.test(USCRIPT_CYRILLIC, status)
            && !resolvedNoLatn.test(USCRIPT_GREEK, status)
            && !resolvedNoLatn.test(USCRIPT_CHEROKEE, status)) {
        return USPOOF_MODERATELY_RESTRICTIVE;
    }

    // Section 5.2 step 8:
    return USPOOF_MINIMALLY_RESTRICTIVE;
}
Esempio n. 18
0
int32_t NumberStringBuilder::insert(int32_t index, const UnicodeString &unistr, Field field,
                                    UErrorCode &status) {
    if (unistr.length() == 0) {
        // Nothing to insert.
        return 0;
    } else if (unistr.length() == 1) {
        // Fast path: insert using insertCodePoint.
        return insertCodePoint(index, unistr.charAt(0), field, status);
    } else {
        return insert(index, unistr, 0, unistr.length(), field, status);
    }
}
Esempio n. 19
0
 void fixStyles(int32_t start, int32_t limit, int32_t newLen) {
     UChar newStyle = NO_STYLE;
     if (start != limit && styles.charAt(start) != NO_STYLE) {
         newStyle = styles.charAt(start);
     } else if (start > 0 && getCharAt(start-1) != NO_STYLE_MARK) {
         newStyle = styles.charAt(start-1);
     } else if (limit < styles.length()) {
         newStyle = styles.charAt(limit);
     }
     // dumb implementation for now.
     UnicodeString s;
     for (int i = 0; i < newLen; ++i) {
         // this doesn't really handle an embedded NO_STYLE_MARK
         // in the middle of a long run of characters right -- but
         // that case shouldn't happen anyway
         if (getCharAt(start+i) == NO_STYLE_MARK) {
             s.append(NO_STYLE);
         } else {
             s.append(newStyle);
         }
     }
     styles.replaceBetween(start, limit, s);
 }
Esempio n. 20
0
int32_t
NumberStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end,
                            Field field, UErrorCode &status) {
    int32_t count = end - start;
    int32_t position = prepareForInsert(index, count, status);
    if (U_FAILURE(status)) {
        return count;
    }
    for (int32_t i = 0; i < count; i++) {
        getCharPtr()[position + i] = unistr.charAt(start + i);
        getFieldPtr()[position + i] = field;
    }
    return count;
}
Esempio n. 21
0
void ReplaceableTest::check(const UnicodeString& transliteratorName, 
                            const UnicodeString& test, 
                            const UnicodeString& shouldProduceStyles) 
{
    UErrorCode status = U_ZERO_ERROR;
    TestReplaceable *tr = new TestReplaceable(test, "");
    UnicodeString expectedStyles = shouldProduceStyles;
    UnicodeString original = tr->toString();

    Transliterator* t;
    if (transliteratorName.charAt(0) == 0x2A /*'*'*/) {
        UnicodeString rules(transliteratorName);
        rules.remove(0,1);
        UParseError pe;
        t = Transliterator::createFromRules("test", rules, UTRANS_FORWARD,
                                            pe, status);

        // test clone()
        TestReplaceable *tr2 = (TestReplaceable *)tr->clone();
        if(tr2 != NULL) {
            delete tr;
            tr = tr2;
        }
    } else {
        t = Transliterator::createInstance(transliteratorName, UTRANS_FORWARD, status);
    }
    if (U_FAILURE(status)) {
        log("FAIL: failed to create the ");
        log(transliteratorName);
        errln(" transliterator.");
        delete tr;
        return;
    }
    t->transliterate(*tr);
    UnicodeString newStyles = tr->getStyles();
    if (newStyles != expectedStyles) {
        errln("FAIL Styles: " + transliteratorName + "{" + original + "} => "
            + tr->toString() + "; should be {" + expectedStyles + "}!");
    } else {
        log("OK: ");
        log(transliteratorName);
        log("(");
        log(original);
        log(") => ");
        logln(tr->toString());
    }
    delete tr;
    delete t;
}
// populatePrefixSuffix Adds a specific prefix-suffix pair to result for a
// given variant and log10 value.
// variant is 'zero', 'one', 'two', 'few', 'many', or 'other'.
// formatStr is the format string from which the prefix and suffix are
// extracted. It is usually of form 'Pefix 000 suffix'.
// populatePrefixSuffix returns the number of 0's found in formatStr
// before the decimal point.
// In the special case that formatStr contains only spaces for prefix
// and suffix, populatePrefixSuffix returns log10Value + 1.
static int32_t populatePrefixSuffix(
    const char* variant, int32_t log10Value, const UnicodeString& formatStr, UHashtable* result, UBool overwrite, UErrorCode& status) {
  if (U_FAILURE(status)) {
    return 0;
  }
  int32_t firstIdx = formatStr.indexOf(kZero, UPRV_LENGTHOF(kZero), 0);
  // We must have 0's in format string.
  if (firstIdx == -1) {
    status = U_INTERNAL_PROGRAM_ERROR;
    return 0;
  }
  int32_t lastIdx = formatStr.lastIndexOf(kZero, UPRV_LENGTHOF(kZero), firstIdx);
  CDFUnit* unit = createCDFUnit(variant, log10Value, result, status);
  if (U_FAILURE(status)) {
    return 0;
  }

  // Return -1 if we are not overwriting an existing value
  if (unit->isSet() && !overwrite) {
    return -1;
  }
  unit->markAsSet();

  // Everything up to first 0 is the prefix
  unit->prefix = formatStr.tempSubString(0, firstIdx);
  fixQuotes(unit->prefix);
  // Everything beyond the last 0 is the suffix
  unit->suffix = formatStr.tempSubString(lastIdx + 1);
  fixQuotes(unit->suffix);

  // If there is effectively no prefix or suffix, ignore the actual number of
  // 0's and act as if the number of 0's matches the size of the number.
  if (onlySpaces(unit->prefix) && onlySpaces(unit->suffix)) {
    return log10Value + 1;
  }

  // Calculate number of zeros before decimal point
  int32_t idx = firstIdx + 1;
  while (idx <= lastIdx && formatStr.charAt(idx) == u_0) {
    ++idx;
  }
  return (idx - firstIdx);
}
Esempio n. 23
0
UnicodeString
PluralFormat::insertFormattedNumber(double number,
                                    UnicodeString & message,
                                    UnicodeString & appendTo,
                                    FieldPosition & pos) const
{
	UnicodeString result;
	int32_t braceStack = 0;
	int32_t startIndex = 0;

	if (message.length() == 0)
	{
		return result;
	}
	appendTo = numberFormat->format(number, appendTo, pos);
	for (int32_t i = 0; i < message.length(); ++i)
	{
		switch (message.charAt(i))
		{
		case LEFTBRACE:
			++braceStack;
			break;
		case RIGHTBRACE:
			--braceStack;
			break;
		case NUMBER_SIGN:
			if (braceStack == 0)
			{
				result += UnicodeString(message, startIndex, i);
				result += appendTo;
				startIndex = i + 1;
			}
			break;
		}
	}
	if (startIndex < message.length())
	{
		result += UnicodeString(message, startIndex, message.length() - startIndex);
	}
	appendTo = result;
	return result;
}
Esempio n. 24
0
//
// RBBISymbolTable::parseReference   This function from the abstract symbol table interface
//                                   looks for a $variable name in the source text.
//                                   It does not look it up, only scans for it.
//                                   It is used by the UnicodeSet parser.
//
//                                   This implementation is lifted pretty much verbatim
//                                   from the rules based transliterator implementation.
//                                   I didn't see an obvious way of sharing it.
//
UnicodeString   RBBISymbolTable::parseReference(const UnicodeString& text,
                                                ParsePosition& pos, int32_t limit) const
{
    int32_t start = pos.getIndex();
    int32_t i = start;
    UnicodeString result;
    while (i < limit) {
        UChar c = text.charAt(i);
        if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) {
            break;
        }
        ++i;
    }
    if (i == start) { // No valid name chars
        return result; // Indicate failure with empty string
    }
    pos.setIndex(i);
    text.extractBetween(start, i, result);
    return result;
}
Esempio n. 25
0
void
RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
{
    // iterate through the characters...
    UnicodeString result;

    int start = 0;
    while (start != -1 && start < description.length()) {
        // seek to the first non-whitespace character...
        while (start < description.length()
            && PatternProps::isWhiteSpace(description.charAt(start))) {
            ++start;
        }

        // locate the next semicolon in the text and copy the text from
        // our current position up to that semicolon into the result
        int32_t p = description.indexOf(gSemiColon, start);
        if (p == -1) {
            // or if we don't find a semicolon, just copy the rest of
            // the string into the result
            result.append(description, start, description.length() - start);
            start = -1;
        }
        else if (p < description.length()) {
            result.append(description, start, p + 1 - start);
            start = p + 1;
        }

        // when we get here, we've seeked off the end of the sring, and
        // we terminate the loop (we continue until *start* is -1 rather
        // than until *p* is -1, because otherwise we'd miss the last
        // rule in the description)
        else {
            start = -1;
        }
    }

    description.setTo(result);
}
Esempio n. 26
0
static void
printUnicodeString(const char *announce, const UnicodeString &s) {
    static char out[200];
    int32_t i, length;

    // output the string, converted to the platform encoding

    // Note for Windows: The "platform encoding" defaults to the "ANSI codepage",
    // which is different from the "OEM codepage" in the console window.
    // However, if you pipe the output into a file and look at it with Notepad
    // or similar, then "ANSI" characters will show correctly.
    // Production code should be aware of what encoding is required,
    // and use a UConverter or at least a charset name explicitly.
    out[s.extract(0, 99, out)]=0;
    printf("%s%s {", announce, out);

    // output the code units (not code points)
    length=s.length();
    for(i=0; i<length; ++i) {
        printf(" %04x", s.charAt(i));
    }
    printf(" }\n");
}
Esempio n. 27
0
int32_t
NumberStringBuilder::splice(int32_t startThis, int32_t endThis,  const UnicodeString &unistr,
                            int32_t startOther, int32_t endOther, Field field, UErrorCode& status) {
    int32_t thisLength = endThis - startThis;
    int32_t otherLength = endOther - startOther;
    int32_t count = otherLength - thisLength;
    int32_t position;
    if (count > 0) {
        // Overall, chars need to be added.
        position = prepareForInsert(startThis, count, status);
    } else {
        // Overall, chars need to be removed or kept the same.
        position = remove(startThis, -count);
    }
    if (U_FAILURE(status)) {
        return count;
    }
    for (int32_t i = 0; i < otherLength; i++) {
        getCharPtr()[position + i] = unistr.charAt(startOther + i);
        getFieldPtr()[position + i] = field;
    }
    return count;
}
//constructor
NamePrepTransform::NamePrepTransform(UParseError& parseError, UErrorCode& status)
: unassigned(), prohibited(), labelSeparatorSet(){
        
    mapping = NULL;
    bundle = NULL;


    const char* testDataName = IntlTest::loadTestData(status);
    
    if(U_FAILURE(status)){
        return;
    }
    
    bundle = ures_openDirect(testDataName,"idna_rules",&status);
    
    if(bundle != NULL && U_SUCCESS(status)){
        // create the mapping transliterator
        int32_t ruleLen = 0;
        const UChar* ruleUChar = ures_getStringByKey(bundle, "MapNFKC",&ruleLen, &status);
        int32_t mapRuleLen = 0;
        const UChar *mapRuleUChar = ures_getStringByKey(bundle, "MapNoNormalization", &mapRuleLen, &status);
        UnicodeString rule(mapRuleUChar, mapRuleLen);
        rule.append(ruleUChar, ruleLen);

        mapping = Transliterator::createFromRules(UnicodeString("NamePrepTransform", ""), rule,
                                                   UTRANS_FORWARD, parseError,status);
        if(U_FAILURE(status)) {
          return;
        }

        //create the unassigned set
        int32_t patternLen =0;
        const UChar* pattern = ures_getStringByKey(bundle,"UnassignedSet",&patternLen, &status);
        unassigned.applyPattern(UnicodeString(pattern, patternLen), status);

        //create prohibited set
        patternLen=0;
        pattern =  ures_getStringByKey(bundle,"ProhibitedSet",&patternLen, &status);
        UnicodeString test(pattern,patternLen);
        prohibited.applyPattern(test,status);
#ifdef DEBUG
        if(U_FAILURE(status)){
            printf("Construction of Unicode set failed\n");
        }

        if(U_SUCCESS(status)){
            if(prohibited.contains((UChar) 0x644)){
                printf("The string contains 0x644 ... damn !!\n");
            }
            UnicodeString temp;
            prohibited.toPattern(temp,TRUE);

            for(int32_t i=0;i<temp.length();i++){
                printf("%c", (char)temp.charAt(i));
            }
            printf("\n");
        }
#endif
        
        //create label separator set
        patternLen=0;
        pattern =  ures_getStringByKey(bundle,"LabelSeparatorSet",&patternLen, &status);
        labelSeparatorSet.applyPattern(UnicodeString(pattern,patternLen),status);
    }

    if(U_SUCCESS(status) && 
        (mapping == NULL)
      ){
        status = U_MEMORY_ALLOCATION_ERROR;
        delete mapping;
        ures_close(bundle);
        mapping = NULL;
        bundle = NULL;
    }
        
}
Esempio n. 29
0
/**
 * Append c to buf, unless buf is empty or buf already ends in c.
 */
static void _smartAppend(UnicodeString& buf, UChar c) {
    if (buf.length() != 0 &&
        buf.charAt(buf.length() - 1) != c) {
        buf.append(c);
    }
}
Esempio n. 30
0
//---------------------------------------------------------------------
//
//   dump    Output the compiled form of the pattern.
//           Debugging function only.
//
//---------------------------------------------------------------------
void   RegexPattern::dumpOp(int32_t index) const {
    (void)index;  // Suppress warnings in non-debug build.
#if defined(REGEX_DEBUG)
    static const char * const opNames[] = {URX_OPCODE_NAMES};
    int32_t op          = fCompiledPat->elementAti(index);
    int32_t val         = URX_VAL(op);
    int32_t type        = URX_TYPE(op);
    int32_t pinnedType  = type;
    if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) {
        pinnedType = 0;
    }

    printf("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]);
    switch (type) {
    case URX_NOP:
    case URX_DOTANY:
    case URX_DOTANY_ALL:
    case URX_FAIL:
    case URX_CARET:
    case URX_DOLLAR:
    case URX_BACKSLASH_G:
    case URX_BACKSLASH_X:
    case URX_END:
    case URX_DOLLAR_M:
    case URX_CARET_M:
        // Types with no operand field of interest.
        break;

    case URX_RESERVED_OP:
    case URX_START_CAPTURE:
    case URX_END_CAPTURE:
    case URX_STATE_SAVE:
    case URX_JMP:
    case URX_JMP_SAV:
    case URX_JMP_SAV_X:
    case URX_BACKSLASH_B:
    case URX_BACKSLASH_BU:
    case URX_BACKSLASH_D:
    case URX_BACKSLASH_Z:
    case URX_STRING_LEN:
    case URX_CTR_INIT:
    case URX_CTR_INIT_NG:
    case URX_CTR_LOOP:
    case URX_CTR_LOOP_NG:
    case URX_RELOC_OPRND:
    case URX_STO_SP:
    case URX_LD_SP:
    case URX_BACKREF:
    case URX_STO_INP_LOC:
    case URX_JMPX:
    case URX_LA_START:
    case URX_LA_END:
    case URX_BACKREF_I:
    case URX_LB_START:
    case URX_LB_CONT:
    case URX_LB_END:
    case URX_LBN_CONT:
    case URX_LBN_END:
    case URX_LOOP_C:
    case URX_LOOP_DOT_I:
    case URX_BACKSLASH_H:
    case URX_BACKSLASH_R:
    case URX_BACKSLASH_V:
        // types with an integer operand field.
        printf("%d", val);
        break;

    case URX_ONECHAR:
    case URX_ONECHAR_I:
        printf("%c", val<256?val:'?');
        break;

    case URX_STRING:
    case URX_STRING_I:
        {
            int32_t lengthOp       = fCompiledPat->elementAti(index+1);
            U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
            int32_t length = URX_VAL(lengthOp);
            int32_t i;
            for (i=val; i<val+length; i++) {
                UChar c = fLiteralText[i];
                if (c < 32 || c >= 256) {c = '.';}
                printf("%c", c);
            }
        }
        break;

    case URX_SETREF:
    case URX_LOOP_SR_I:
        {
            UnicodeString s;
            UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
            set->toPattern(s, TRUE);
            for (int32_t i=0; i<s.length(); i++) {
                printf("%c", s.charAt(i));
            }
        }
        break;

    case URX_STATIC_SETREF:
    case URX_STAT_SETREF_N:
        {
            UnicodeString s;
            if (val & URX_NEG_SET) {
                printf("NOT ");
                val &= ~URX_NEG_SET;
            }
            UnicodeSet *set = fStaticSets[val];
            set->toPattern(s, TRUE);
            for (int32_t i=0; i<s.length(); i++) {
                printf("%c", s.charAt(i));
            }
        }
        break;


    default:
        printf("??????");
        break;
    }
    printf("\n");
#endif
}