Ejemplo n.º 1
0
/**
 * Parse an ID into pieces.  Take IDs of the form T, T/V, S-T,
 * S-T/V, or S/V-T.  If the source is missing, return a source of
 * ANY.
 * @param id the id string, in any of several forms
 * @return an array of 4 strings: source, target, variant, and
 * isSourcePresent.  If the source is not present, ANY will be
 * given as the source, and isSourcePresent will be NULL.  Otherwise
 * isSourcePresent will be non-NULL.  The target may be empty if the
 * id is not well-formed.  The variant may be empty.
 */
void TransliteratorIDParser::IDtoSTV(const UnicodeString & id,
                                     UnicodeString & source,
                                     UnicodeString & target,
                                     UnicodeString & variant,
                                     UBool & isSourcePresent)
{
	source = ANY;
	target.truncate(0);
	variant.truncate(0);

	int32_t sep = id.indexOf(TARGET_SEP);
	int32_t var = id.indexOf(VARIANT_SEP);
	if (var < 0)
	{
		var = id.length();
	}
	isSourcePresent = FALSE;

	if (sep < 0)
	{
		// Form: T/V or T (or /V)
		id.extractBetween(0, var, target);
		id.extractBetween(var, id.length(), variant);
	}
	else if (sep < var)
	{
		// Form: S-T/V or S-T (or -T/V or -T)
		if (sep > 0)
		{
			id.extractBetween(0, sep, source);
			isSourcePresent = TRUE;
		}
		id.extractBetween(++sep, var, target);
		id.extractBetween(var, id.length(), variant);
	}
	else
	{
		// Form: (S/V-T or /V-T)
		if (var > 0)
		{
			id.extractBetween(0, var, source);
			isSourcePresent = TRUE;
		}
		id.extractBetween(var, sep++, variant);
		id.extractBetween(sep, id.length(), target);
	}

	if (variant.length() > 0)
	{
		variant.remove(0, 1);
	}
}
Ejemplo n.º 2
0
/**
 * Do a normalization using the iterative API in the given direction.
 * @param dir either +1 or -1
 */
void NormalizerConformanceTest::iterativeNorm(const UnicodeString& str,
                                              UNormalizationMode mode, int32_t options,
                                              UnicodeString& result,
                                              int8_t dir) {
    UErrorCode status = U_ZERO_ERROR;
    normalizer.setText(str, status);
    normalizer.setMode(mode);
    normalizer.setOption(-1, 0);        // reset all options
    normalizer.setOption(options, 1);   // set desired options
    result.truncate(0);
    if (U_FAILURE(status)) {
        return;
    }
    UChar32 ch;
    if (dir > 0) {
        for (ch = normalizer.first(); ch != Normalizer::DONE;
             ch = normalizer.next()) {
            result.append(ch);
        }
    } else {
        for (ch = normalizer.last(); ch != Normalizer::DONE;
             ch = normalizer.previous()) {
            result.insert(0, ch);
        }
    }
}
Ejemplo n.º 3
0
/**
 * Parse a Unicode identifier from the given string at the given
 * position.  Return the identifier, or an empty string if there
 * is no identifier.
 * @param str the string to parse
 * @param pos INPUT-OUPUT parameter.  On INPUT, pos is the
 * first character to examine.  It must be less than str.length(),
 * and it must not point to a whitespace character.  That is, must
 * have pos < str.length().  On
 * OUTPUT, the position after the last parsed character.
 * @return the Unicode identifier, or an empty string if there is
 * no valid identifier at pos.
 */
UnicodeString ICU_Utility::parseUnicodeIdentifier(const UnicodeString& str, int32_t& pos) {
    // assert(pos < str.length());
    UnicodeString buf;
    int p = pos;
    while (p < str.length()) {
        UChar32 ch = str.char32At(p);
        if (buf.length() == 0) {
            if (u_isIDStart(ch)) {
                buf.append(ch);
            } else {
                buf.truncate(0);
                return buf;
            }
        } else {
            if (u_isIDPart(ch)) {
                buf.append(ch);
            } else {
                break;
            }
        }
        p += U16_LENGTH(ch);
    }
    pos = p;
    return buf;
}
// fixQuotes unescapes single quotes. Don''t -> Don't. Letter 'j' -> Letter j.
// Modifies s in place.
static void fixQuotes(UnicodeString& s) {
  QuoteState state = OUTSIDE;
  int32_t len = s.length();
  int32_t dest = 0;
  for (int32_t i = 0; i < len; ++i) {
    UChar ch = s.charAt(i);
    if (ch == u_apos) {
      if (state == INSIDE_EMPTY) {
        s.setCharAt(dest, ch);
        ++dest;
      }
    } else {
      s.setCharAt(dest, ch);
      ++dest;
    }

    // Update state
    switch (state) {
      case OUTSIDE:
        state = ch == u_apos ? INSIDE_EMPTY : OUTSIDE;
        break;
      case INSIDE_EMPTY:
      case INSIDE_FULL:
        state = ch == u_apos ? OUTSIDE : INSIDE_FULL;
        break;
      default:
        break;
    }
  }
  s.truncate(dest);
}
Ejemplo n.º 5
0
void TransliteratorSpec::setupNext() {
    isNextLocale = FALSE;
    if (isSpecLocale) {
        nextSpec = spec;
        int32_t i = nextSpec.lastIndexOf(LOCALE_SEP);
        // If i == 0 then we have _FOO, so we fall through
        // to the scriptName.
        if (i > 0) {
            nextSpec.truncate(i);
            isNextLocale = TRUE;
        } else {
            nextSpec = scriptName; // scriptName may be empty
        }
    } else {
        // spec is a script, so we are at the end
        nextSpec.truncate(0);
    }
}
Ejemplo n.º 6
0
/**
 * Given source, target, and variant strings, concatenate them into a
 * full ID.  If the source is empty, then "Any" will be used for the
 * source, so the ID will always be of the form s-t/v or s-t.
 */
void TransliteratorIDParser::STVtoID(const UnicodeString& source,
                                     const UnicodeString& target,
                                     const UnicodeString& variant,
                                     UnicodeString& id) {
    id = source;
    if (id.length() == 0) {
        id.setTo(ANY, 3);
    }
    id.append(TARGET_SEP).append(target);
    if (variant.length() != 0) {
        id.append(VARIANT_SEP).append(variant);
    }
    // NUL-terminate the ID string for getTerminatedBuffer.
    // This prevents valgrind and Purify warnings.
    id.append((UChar)0);
    id.truncate(id.length()-1);
}
Ejemplo n.º 7
0
//
//  createElement
//      We've just matched an element start tag.  Create and fill in a UXMLElement object
//      for it.
//
UXMLElement *
UXMLParser::createElement(RegexMatcher  &mEl, UErrorCode &status) {
    // First capture group is the element's name.
    UXMLElement *el = new UXMLElement(this, intern(mEl.group(1, status), status), status);

    // Scan for attributes.
    int32_t   pos = mEl.end(1, status);  // The position after the end of the tag name

    while (mAttrValue.lookingAt(pos, status)) {  // loop runs once per attribute on this element.
        UnicodeString attName  = mAttrValue.group(1, status);
        UnicodeString attValue = mAttrValue.group(2, status);

        // Trim the quotes from the att value.  These are left over from the original regex
        //   that parsed the attribue, which couldn't conveniently strip them.
        attValue.remove(0,1);                    // one char from the beginning
        attValue.truncate(attValue.length()-1);  // and one from the end.
        
        // XML Attribue value normalization. 
        // This is one of the really screwy parts of the XML spec.
        // See http://www.w3.org/TR/2004/REC-xml11-20040204/#AVNormalize
        // Note that non-validating parsers must treat all entities as type CDATA
        //   which simplifies things some.

        // Att normalization step 1:  normalize any newlines in the attribute value
        mNewLineNormalizer.reset(attValue);
        attValue = mNewLineNormalizer.replaceAll(fOneLF, status);

        // Next change all xml white space chars to plain \u0020 spaces.
        mAttrNormalizer.reset(attValue);
        UnicodeString oneSpace((UChar)0x0020);
        attValue = mAttrNormalizer.replaceAll(oneSpace, status);

        // Replace character entities.
        replaceCharRefs(attValue, status);

        // Save the attribute name and value in our document structure.
        el->fAttNames.addElement((void *)intern(attName, status), status);
        el->fAttValues.addElement(attValue.clone(), status);
        pos = mAttrValue.end(2, status);
    }
    fPos = mEl.end(0, status);
    return el;
}
Ejemplo n.º 8
0
/**
 * Helper for TestPatterns()
 */
void TestChoiceFormat::_testPattern(const char* pattern,
                                    UBool isValid,
                                    double v1, const char* str1,
                                    double v2, const char* str2,
                                    double v3, const char* str3) {
    UErrorCode ec = U_ZERO_ERROR;
    ChoiceFormat fmt(pattern, ec);
    if (!isValid) {
        if (U_FAILURE(ec)) {
            logln((UnicodeString)"Ok: " + pattern + " failed");
        } else {
            logln((UnicodeString)"FAIL: " + pattern + " accepted");
        }
        return;
    }
    if (U_FAILURE(ec)) {
        errln((UnicodeString)"FAIL: ChoiceFormat(" + pattern + ") failed");
        return;
    } else {
        logln((UnicodeString)"Ok: Pattern: " + pattern);
    }
    UnicodeString out;
    logln((UnicodeString)"  toPattern: " + fmt.toPattern(out));

    double v[] = {v1, v2, v3};
    const char* str[] = {str1, str2, str3};
    for (int32_t i=0; i<3; ++i) {
        out.truncate(0);
        fmt.format(v[i], out);
        if (out == str[i]) {
            logln((UnicodeString)"Ok: " + v[i] + " => " + out); 
        } else {
            errln((UnicodeString)"FAIL: " + v[i] + " => " + out +
                  ", expected " + str[i]);
        }
    }
}
Ejemplo n.º 9
0
/**
 * Attempt to find an entry in a single resource bundle.  This is
 * a one-sided lookup.  findInStaticStore() performs up to two such
 * lookups, one for the source, and one for the target.
 *
 * Do not perform fallback.  Return 0 on failure.
 *
 * On success, create a new Entry object, populate it, and return it.
 * The caller owns the returned object.
 */
TransliteratorEntry* TransliteratorRegistry::findInBundle(const TransliteratorSpec& specToOpen,
                                            const TransliteratorSpec& specToFind,
                                            const UnicodeString& variant,
                                            UTransDirection direction)
{
    UnicodeString utag;
    UnicodeString resStr;
    int32_t pass;

    for (pass=0; pass<2; ++pass) {
        utag.truncate(0);
        // First try either TransliteratorTo_xxx or
        // TransliterateFrom_xxx, then try the bidirectional
        // Transliterate_xxx.  This precedence order is arbitrary
        // but must be consistent and documented.
        if (pass == 0) {
            utag.append(direction == UTRANS_FORWARD ?
                        TRANSLITERATE_TO : TRANSLITERATE_FROM, -1);
        } else {
            utag.append(TRANSLITERATE, -1);
        }
        UnicodeString s(specToFind.get());
        utag.append(s.toUpper(""));
        UErrorCode status = U_ZERO_ERROR;
        ResourceBundle subres(specToOpen.getBundle().get(
            CharString().appendInvariantChars(utag, status).data(), status));
        if (U_FAILURE(status) || status == U_USING_DEFAULT_WARNING) {
            continue;
        }

        s.truncate(0);
        if (specToOpen.get() != LocaleUtility::initNameFromLocale(subres.getLocale(), s)) {
            continue;
        }

        if (variant.length() != 0) {
            status = U_ZERO_ERROR;
            resStr = subres.getStringEx(
                CharString().appendInvariantChars(variant, status).data(), status);
            if (U_SUCCESS(status)) {
                // Exit loop successfully
                break;
            }
        } else {
            // Variant is empty, which means match the first variant listed.
            status = U_ZERO_ERROR;
            resStr = subres.getStringEx(1, status);
            if (U_SUCCESS(status)) {
                // Exit loop successfully
                break;
            }
        }
    }

    if (pass==2) {
        // Failed
        return NULL;
    }

    // We have succeeded in loading a string from the locale
    // resources.  Create a new registry entry to hold it and return it.
    TransliteratorEntry *entry = new TransliteratorEntry();
    if (entry != 0) {
        // The direction is always forward for the
        // TransliterateTo_xxx and TransliterateFrom_xxx
        // items; those are unidirectional forward rules.
        // For the bidirectional Transliterate_xxx items,
        // the direction is the value passed in to this
        // function.
        int32_t dir = (pass == 0) ? UTRANS_FORWARD : direction;
        entry->entryType = TransliteratorEntry::LOCALE_RULES;
        entry->stringArg = resStr;
        entry->intArg = dir;
    }

    return entry;
}
Ejemplo n.º 10
0
U_CDECL_END

/**
 * Parse a compound ID, consisting of an optional forward global
 * filter, a separator, one or more single IDs delimited by
 * separators, an an optional reverse global filter.  The
 * separator is a semicolon.  The global filters are UnicodeSet
 * patterns.  The reverse global filter must be enclosed in
 * parentheses.
 * @param id the pattern the parse
 * @param dir the direction.
 * @param canonID OUTPUT parameter that receives the canonical ID,
 * consisting of canonical IDs for all elements, as returned by
 * parseSingleID(), separated by semicolons.  Previous contents
 * are discarded.
 * @param list OUTPUT parameter that receives a list of SingleID
 * objects representing the parsed IDs.  Previous contents are
 * discarded.
 * @param globalFilter OUTPUT parameter that receives a pointer to
 * a newly created global filter for this ID in this direction, or
 * NULL if there is none.
 * @return TRUE if the parse succeeds, that is, if the entire
 * id is consumed without syntax error.
 */
UBool TransliteratorIDParser::parseCompoundID(const UnicodeString & id, int32_t dir,
        UnicodeString & canonID,
        UVector & list,
        UnicodeSet *& globalFilter)
{
	UErrorCode ec = U_ZERO_ERROR;
	int32_t i;
	int32_t pos = 0;
	int32_t withParens = 1;
	list.removeAllElements();
	UnicodeSet * filter;
	globalFilter = NULL;
	canonID.truncate(0);

	// Parse leading global filter, if any
	withParens = 0; // parens disallowed
	filter = parseGlobalFilter(id, pos, dir, withParens, &canonID);
	if (filter != NULL)
	{
		if (!ICU_Utility::parseChar(id, pos, ID_DELIM))
		{
			// Not a global filter; backup and resume
			canonID.truncate(0);
			pos = 0;
		}
		if (dir == FORWARD)
		{
			globalFilter = filter;
		}
		else
		{
			delete filter;
		}
		filter = NULL;
	}

	UBool sawDelimiter = TRUE;
	for (;;)
	{
		SingleID * single = parseSingleID(id, pos, dir, ec);
		if (single == NULL)
		{
			break;
		}
		if (dir == FORWARD)
		{
			list.addElement(single, ec);
		}
		else
		{
			list.insertElementAt(single, 0, ec);
		}
		if (U_FAILURE(ec))
		{
			goto FAIL;
		}
		if (!ICU_Utility::parseChar(id, pos, ID_DELIM))
		{
			sawDelimiter = FALSE;
			break;
		}
	}

	if (list.size() == 0)
	{
		goto FAIL;
	}

	// Construct canonical ID
	for (i = 0; i < list.size(); ++i)
	{
		SingleID * single = (SingleID *) list.elementAt(i);
		canonID.append(single->canonID);
		if (i != (list.size() - 1))
		{
			canonID.append(ID_DELIM);
		}
	}

	// Parse trailing global filter, if any, and only if we saw
	// a trailing delimiter after the IDs.
	if (sawDelimiter)
	{
		withParens = 1; // parens required
		filter = parseGlobalFilter(id, pos, dir, withParens, &canonID);
		if (filter != NULL)
		{
			// Don't require trailing ';', but parse it if present
			ICU_Utility::parseChar(id, pos, ID_DELIM);

			if (dir == REVERSE)
			{
				globalFilter = filter;
			}
			else
			{
				delete filter;
			}
			filter = NULL;
		}
	}

	// Trailing unparsed text is a syntax error
	ICU_Utility::skipWhitespace(id, pos, TRUE);
	if (pos != id.length())
	{
		goto FAIL;
	}

	return TRUE;

FAIL:
	UObjectDeleter * save = list.setDeleter(_deleteSingleID);
	list.removeAllElements();
	list.setDeleter(save);
	delete globalFilter;
	globalFilter = NULL;
	return FALSE;
}
Ejemplo n.º 11
0
UXMLElement *
UXMLParser::parseFile(const char *filename, UErrorCode &errorCode) {
    char bytes[4096], charsetBuffer[100];
    FileStream *f;
    const char *charset, *pb;
    UnicodeString src;
    UConverter *cnv;
    UChar *buffer, *pu;
    int32_t fileLength, bytesLength, length, capacity;
    UBool flush;

    if(U_FAILURE(errorCode)) {
        return NULL;
    }

    f=T_FileStream_open(filename, "rb");
    if(f==NULL) {
        errorCode=U_FILE_ACCESS_ERROR;
        return NULL;
    }

    bytesLength=T_FileStream_read(f, bytes, (int32_t)sizeof(bytes));
    if(bytesLength<(int32_t)sizeof(bytes)) {
        // we have already read the entire file
        fileLength=bytesLength;
    } else {
        // get the file length
        fileLength=T_FileStream_size(f);
    }

    /*
     * get the charset:
     * 1. Unicode signature
     * 2. treat as ISO-8859-1 and read XML encoding="charser"
     * 3. default to UTF-8
     */
    charset=ucnv_detectUnicodeSignature(bytes, bytesLength, NULL, &errorCode);
    if(U_SUCCESS(errorCode) && charset!=NULL) {
        // open converter according to Unicode signature
        cnv=ucnv_open(charset, &errorCode);
    } else {
        // read as Latin-1 and parse the XML declaration and encoding
        cnv=ucnv_open("ISO-8859-1", &errorCode);
        if(U_FAILURE(errorCode)) {
            // unexpected error opening Latin-1 converter
            goto exit;
        }

        buffer=src.getBuffer(bytesLength);
        if(buffer==NULL) {
            // unexpected failure to reserve some string capacity
            errorCode=U_MEMORY_ALLOCATION_ERROR;
            goto exit;
        }
        pb=bytes;
        pu=buffer;
        ucnv_toUnicode(
            cnv,
            &pu, buffer+src.getCapacity(),
            &pb, bytes+bytesLength,
            NULL, TRUE, &errorCode);
        src.releaseBuffer(U_SUCCESS(errorCode) ? (int32_t)(pu-buffer) : 0);
        ucnv_close(cnv);
        cnv=NULL;
        if(U_FAILURE(errorCode)) {
            // unexpected error in conversion from Latin-1
            src.remove();
            goto exit;
        }

        // parse XML declaration
        if(mXMLDecl.reset(src).lookingAt(0, errorCode)) {
            int32_t declEnd=mXMLDecl.end(errorCode);
            // go beyond <?xml
            int32_t pos=src.indexOf((UChar)x_l)+1;

            mAttrValue.reset(src);
            while(pos<declEnd && mAttrValue.lookingAt(pos, errorCode)) {  // loop runs once per attribute on this element.
                UnicodeString attName  = mAttrValue.group(1, errorCode);
                UnicodeString attValue = mAttrValue.group(2, errorCode);

                // Trim the quotes from the att value.  These are left over from the original regex
                //   that parsed the attribue, which couldn't conveniently strip them.
                attValue.remove(0,1);                    // one char from the beginning
                attValue.truncate(attValue.length()-1);  // and one from the end.

                if(attName==UNICODE_STRING("encoding", 8)) {
                    length=attValue.extract(0, 0x7fffffff, charsetBuffer, (int32_t)sizeof(charsetBuffer));
                    charset=charsetBuffer;
                    break;
                }
                pos = mAttrValue.end(2, errorCode);
            }

            if(charset==NULL) {
                // default to UTF-8
                charset="UTF-8";
            }
            cnv=ucnv_open(charset, &errorCode);
        }
    }

    if(U_FAILURE(errorCode)) {
        // unable to open the converter
        goto exit;
    }

    // convert the file contents
    capacity=fileLength;        // estimated capacity
    src.getBuffer(capacity);
    src.releaseBuffer(0);       // zero length
    flush=FALSE;
    for(;;) {
        // convert contents of bytes[bytesLength]
        pb=bytes;
        for(;;) {
            length=src.length();
            buffer=src.getBuffer(capacity);
            if(buffer==NULL) {
                // unexpected failure to reserve some string capacity
                errorCode=U_MEMORY_ALLOCATION_ERROR;
                goto exit;
            }

            pu=buffer+length;
            ucnv_toUnicode(
                cnv, &pu, buffer+src.getCapacity(),
                &pb, bytes+bytesLength,
                NULL, FALSE, &errorCode);
            src.releaseBuffer(U_SUCCESS(errorCode) ? (int32_t)(pu-buffer) : 0);
            if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
                errorCode=U_ZERO_ERROR;
                capacity=(3*src.getCapacity())/2; // increase capacity by 50%
            } else {
                break;
            }
        }

        if(U_FAILURE(errorCode)) {
            break; // conversion error
        }

        if(flush) {
            break; // completely converted the file
        }

        // read next block
        bytesLength=T_FileStream_read(f, bytes, (int32_t)sizeof(bytes));
        if(bytesLength==0) {
            // reached end of file, convert once more to flush the converter
            flush=TRUE;
        }
    };

exit:
    ucnv_close(cnv);
    T_FileStream_close(f);

    if(U_SUCCESS(errorCode)) {
        return parse(src, errorCode);
    } else {
        return NULL;
    }
}
Ejemplo n.º 12
0
/**
 * Test new closure API
 */
void TestChoiceFormat::TestClosures(void) {
    // Construct open, half-open, half-open (the other way), and closed
    // intervals.  Do this both using arrays and using a pattern.

    // 'fmt1' is created using arrays
    UBool T = TRUE, F = FALSE;
    // 0:   ,1)
    // 1: [1,2]
    // 2: (2,3]
    // 3: (3,4)
    // 4: [4,5)
    // 5: [5,
    double limits[]  = { 0, 1, 2, 3, 4, 5 };
    UBool closures[] = { F, F, T, T, F, F };
    UnicodeString fmts[] = {
        ",1)", "[1,2]", "(2,3]", "(3,4)", "[4,5)", "[5,"
    };
    ChoiceFormat fmt1(limits, closures, fmts, 6);

    // 'fmt2' is created using a pattern; it should be equivalent
    UErrorCode status = U_ZERO_ERROR;
    const char* PAT = "0#,1)|1#[1,2]|2<(2,3]|3<(3,4)|4#[4,5)|5#[5,";
    ChoiceFormat fmt2(PAT, status);
    if (U_FAILURE(status)) {
        errln("FAIL: ChoiceFormat constructor failed");
        return;
    }
    
    // Check the patterns
    UnicodeString str;
    fmt1.toPattern(str);
    if (str == PAT) {
        logln("Ok: " + str);
    } else {
        errln("FAIL: " + str + ", expected " + PAT);
    }
    str.truncate(0);

    // Check equality
    if (fmt1 != fmt2) {
        errln("FAIL: fmt1 != fmt2");
    }

#if 0  // ICU 4.8 deprecates and disables the ChoiceFormat getters.
    int32_t i;
    int32_t count2 = 0;
    const double *limits2 = fmt2.getLimits(count2);
    const UBool *closures2 = fmt2.getClosures(count2);

    if((count2 != 6) || !limits2 || !closures2) {
        errln("FAIL: couldn't get limits or closures");
    } else {
        for(i=0;i<count2;i++) {
          logln("#%d/%d: limit %g closed %s\n",
                i, count2,
                limits2[i],
                closures2[i] ?"T":"F");
          if(limits2[i] != limits[i]) {
            errln("FAIL: limit #%d = %g, should be %g\n", i, limits2[i], limits[i]);
          }
          if((closures2[i]!=0) != (closures[i]!=0)) {
            errln("FAIL: closure #%d = %s, should be %s\n", i, closures2[i]?"T":"F", closures[i]?"T":"F");
          }
        }
    }
#endif

    // Now test both format objects
    UnicodeString exp[] = {
        /*-0.5 => */ ",1)",
        /* 0.0 => */ ",1)",
        /* 0.5 => */ ",1)",
        /* 1.0 => */ "[1,2]",
        /* 1.5 => */ "[1,2]",
        /* 2.0 => */ "[1,2]",
        /* 2.5 => */ "(2,3]",
        /* 3.0 => */ "(2,3]",
        /* 3.5 => */ "(3,4)",
        /* 4.0 => */ "[4,5)",
        /* 4.5 => */ "[4,5)",
        /* 5.0 => */ "[5,",
        /* 5.5 => */ "[5,"
    };

    // Each format object should behave exactly the same
    ChoiceFormat* FMT[] = { &fmt1, &fmt2 };
    for (int32_t pass=0; pass<2; ++pass) {
        int32_t j=0;
        for (int32_t ix=-5; ix<=55; ix+=5) {
            double x = ix / 10.0; // -0.5 to 5.5 step +0.5
            FMT[pass]->format(x, str);
            if (str == exp[j]) {
                logln((UnicodeString)"Ok: " + x + " => " + str);
            } else {
                errln((UnicodeString)"FAIL: " + x + " => " + str +
                      ", expected " + exp[j]);
            }
            str.truncate(0);
            ++j;
        }
    }
}
Ejemplo n.º 13
0
void
ChoiceFormat::applyPattern(const UnicodeString& pattern,
                           UParseError& parseError,
                           UErrorCode& status)
{
    if (U_FAILURE(status)) 
    {
        return;
    }

    // Clear error struct
    parseError.offset = -1;
    parseError.preContext[0] = parseError.postContext[0] = (UChar)0;

    // Perform 2 passes.  The first computes the number of limits in
    // this pattern (fCount), which is 1 more than the number of
    // literal VERTICAL_BAR characters.
    int32_t count = 1;
    int32_t i;
    for (i=0; i<pattern.length(); ++i) {
        UChar c = pattern[i];
        if (c == SINGLE_QUOTE) {
            // Skip over the entire quote, including embedded
            // contiguous pairs of SINGLE_QUOTE.
            for (;;) {
                do {
                    ++i;
                } while (i<pattern.length() &&
                         pattern[i] != SINGLE_QUOTE);
                if ((i+1)<pattern.length() &&
                    pattern[i+1] == SINGLE_QUOTE) {
                    // SINGLE_QUOTE pair; skip over it
                    ++i;
                } else {
                    break;
                }
            }
        } else if (c == VERTICAL_BAR) {
            ++count;
        }
    }

    // Allocate the required storage.
    double *newLimits = (double*) uprv_malloc( sizeof(double) * count);
    /* test for NULL */
    if (newLimits == 0) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    UBool *newClosures = (UBool*) uprv_malloc( sizeof(UBool) * count);
    /* test for NULL */
    if (newClosures == 0) {
        status = U_MEMORY_ALLOCATION_ERROR;
        uprv_free(newLimits);
        return;
    }
    UnicodeString *newFormats = new UnicodeString[count];
    /* test for NULL */
    if (newFormats == 0) {
        status = U_MEMORY_ALLOCATION_ERROR;
        uprv_free(newLimits);
        uprv_free(newClosures);
        return;
    }

    // Perform the second pass
    int32_t k = 0; // index into newXxx[] arrays
    UnicodeString buf; // scratch buffer
    UBool inQuote = FALSE;
    UBool inNumber = TRUE; // TRUE before < or #, FALSE after

    for (i=0; i<pattern.length(); ++i) {
        UChar c = pattern[i];
        if (c == SINGLE_QUOTE) {
            // Check for SINGLE_QUOTE pair indicating a literal quote
            if ((i+1) < pattern.length() &&
                pattern[i+1] == SINGLE_QUOTE) {
                buf += SINGLE_QUOTE;
                ++i;
            } else {
                inQuote = !inQuote;
            }
        } else if (inQuote) {
            buf += c;
        } else if (c == LESS_THAN || c == LESS_EQUAL || c == LESS_EQUAL2) {
            if (!inNumber || buf.length() == 0) {
                goto error;
            }
            inNumber = FALSE;

            double limit;
            buf.trim();
            if (!buf.compare(gPositiveInfinity, POSITIVE_INF_STRLEN)) {
                limit = uprv_getInfinity();
            } else if (!buf.compare(gNegativeInfinity, NEGATIVE_INF_STRLEN)) {
                limit = -uprv_getInfinity();
            } else {
                limit = stod(buf);
            }

            if (k == count) {
                // This shouldn't happen.  If it does, it means that
                // the count determined in the first pass did not
                // match the number of elements found in the second
                // pass.
                goto error;
            }
            newLimits[k] = limit;
            newClosures[k] = (c == LESS_THAN);

            if (k > 0 && limit <= newLimits[k-1]) {
                // Each limit must be strictly > than the previous
                // limit.  One exception: Two subsequent limits may be
                // == if the first closure is FALSE and the second
                // closure is TRUE.  This places the limit value in
                // the second interval.
                if (!(limit == newLimits[k-1] &&
                      !newClosures[k-1] &&
                      newClosures[k])) {
                    goto error;
                }
            }

            buf.truncate(0);
        } else if (c == VERTICAL_BAR) {
            if (inNumber) {
                goto error;
            }
            inNumber = TRUE;

            newFormats[k] = buf;
            ++k;
            buf.truncate(0);
        } else {
            buf += c;
        }        
    }

    if (k != (count-1) || inNumber || inQuote) {
        goto error;
    }
    newFormats[k] = buf;

    // Don't modify this object until the parse succeeds
    uprv_free(fChoiceLimits);
    uprv_free(fClosures);
    delete[] fChoiceFormats;
    fCount = count;
    fChoiceLimits  = newLimits;
    fClosures      = newClosures;
    fChoiceFormats = newFormats;
    return;

error:
    status = U_ILLEGAL_ARGUMENT_ERROR;
    syntaxError(pattern,i,parseError);
    uprv_free(newLimits);
    uprv_free(newClosures);
    delete[] newFormats;
    return;

}
Ejemplo n.º 14
0
int32_t StringReplacer::replace(Replaceable& text,
                                int32_t start,
                                int32_t limit,
                                int32_t& cursor) {
    int32_t outLen;
    int32_t newStart = 0;

    // NOTE: It should be possible to _always_ run the complex
    // processing code; just slower.  If not, then there is a bug
    // in the complex processing code.

    // Simple (no nested replacers) Processing Code :
    if (!isComplex) {
        text.handleReplaceBetween(start, limit, output);
        outLen = output.length();

        // Setup default cursor position (for cursorPos within output)
        newStart = cursorPos;
    }

    // Complex (nested replacers) Processing Code :
    else {
        /* When there are segments to be copied, use the Replaceable.copy()
         * API in order to retain out-of-band data.  Copy everything to the
         * end of the string, then copy them back over the key.  This preserves
         * the integrity of indices into the key and surrounding context while
         * generating the output text.
         */
        UnicodeString buf;
        int32_t oOutput; // offset into 'output'
        isComplex = FALSE;

        // The temporary buffer starts at tempStart, and extends
        // to destLimit.  The start of the buffer has a single
        // character from before the key.  This provides style
        // data when addition characters are filled into the
        // temporary buffer.  If there is nothing to the left, use
        // the non-character U+FFFF, which Replaceable subclasses
        // should treat specially as a "no-style character."
        // destStart points to the point after the style context
        // character, so it is tempStart+1 or tempStart+2.
        int32_t tempStart = text.length(); // start of temp buffer
        int32_t destStart = tempStart; // copy new text to here
        if (start > 0) {
            int32_t len = UTF_CHAR_LENGTH(text.char32At(start-1));
            text.copy(start-len, start, tempStart);
            destStart += len;
        } else {
            UnicodeString str((UChar) 0xFFFF);
            text.handleReplaceBetween(tempStart, tempStart, str);
            destStart++;
        }
        int32_t destLimit = destStart;

        for (oOutput=0; oOutput<output.length(); ) {
            if (oOutput == cursorPos) {
                // Record the position of the cursor
                newStart = destLimit - destStart; // relative to start
            }
            UChar32 c = output.char32At(oOutput);
            UnicodeReplacer* r = data->lookupReplacer(c);
            if (r == NULL) {
                // Accumulate straight (non-segment) text.
                buf.append(c);
            } else {
                isComplex = TRUE;

                // Insert any accumulated straight text.
                if (buf.length() > 0) {
                    text.handleReplaceBetween(destLimit, destLimit, buf);
                    destLimit += buf.length();
                    buf.truncate(0);
                }

                // Delegate output generation to replacer object
                int32_t len = r->replace(text, destLimit, destLimit, cursor);
                destLimit += len;
            }
            oOutput += UTF_CHAR_LENGTH(c);
        }
        // Insert any accumulated straight text.
        if (buf.length() > 0) {
            text.handleReplaceBetween(destLimit, destLimit, buf);
            destLimit += buf.length();
        }
        if (oOutput == cursorPos) {
            // Record the position of the cursor
            newStart = destLimit - destStart; // relative to start
        }

        outLen = destLimit - destStart;

        // Copy new text to start, and delete it
        text.copy(destStart, destLimit, start);
        text.handleReplaceBetween(tempStart + outLen, destLimit + outLen, EMPTY);

        // Delete the old text (the key)
        text.handleReplaceBetween(start + outLen, limit + outLen, EMPTY);
    }        

    if (hasCursor) {
        // Adjust the cursor for positions outside the key.  These
        // refer to code points rather than code units.  If cursorPos
        // is within the output string, then use newStart, which has
        // already been set above.
        if (cursorPos < 0) {
            newStart = start;
            int32_t n = cursorPos;
            // Outside the output string, cursorPos counts code points
            while (n < 0 && newStart > 0) {
                newStart -= UTF_CHAR_LENGTH(text.char32At(newStart-1));
                ++n;
            }
            newStart += n;
        } else if (cursorPos > output.length()) {
            newStart = start + outLen;
            int32_t n = cursorPos - output.length();
            // Outside the output string, cursorPos counts code points
            while (n > 0 && newStart < text.length()) {
                newStart += UTF_CHAR_LENGTH(text.char32At(newStart));
                --n;
            }
            newStart += n;
        } else {
            // Cursor is within output string.  It has been set up above
            // to be relative to start.
            newStart += start;
        }

        cursor = newStart;
    }

    return outLen;
}
Ejemplo n.º 15
0
UnicodeString& U_EXPORT2 Transliterator::getDisplayName(const UnicodeString& id,
                                              const Locale& inLocale,
                                              UnicodeString& result) {
    UErrorCode status = U_ZERO_ERROR;

    ResourceBundle bundle(U_ICUDATA_TRANSLIT, inLocale, status);

    // Suspend checking status until later...

    result.truncate(0);

    // Normalize the ID
    UnicodeString source, target, variant;
    UBool sawSource;
    TransliteratorIDParser::IDtoSTV(id, source, target, variant, sawSource);
    if (target.length() < 1) {
        // No target; malformed id
        return result;
    }
    if (variant.length() > 0) { // Change "Foo" to "/Foo"
        variant.insert(0, VARIANT_SEP);
    }
    UnicodeString ID(source);
    ID.append(TARGET_SEP).append(target).append(variant);

    // build the char* key
    if (uprv_isInvariantUString(ID.getBuffer(), ID.length())) {
        char key[200];
        uprv_strcpy(key, RB_DISPLAY_NAME_PREFIX);
        int32_t length=(int32_t)uprv_strlen(RB_DISPLAY_NAME_PREFIX);
        ID.extract(0, (int32_t)(sizeof(key)-length), key+length, (int32_t)(sizeof(key)-length), US_INV);

        // Try to retrieve a UnicodeString from the bundle.
        UnicodeString resString = bundle.getStringEx(key, status);

        if (U_SUCCESS(status) && resString.length() != 0) {
            return result = resString; // [sic] assign & return
        }

#if !UCONFIG_NO_FORMATTING
        // We have failed to get a name from the locale data.  This is
        // typical, since most transliterators will not have localized
        // name data.  The next step is to retrieve the MessageFormat
        // pattern from the locale data and to use it to synthesize the
        // name from the ID.

        status = U_ZERO_ERROR;
        resString = bundle.getStringEx(RB_DISPLAY_NAME_PATTERN, status);

        if (U_SUCCESS(status) && resString.length() != 0) {
            MessageFormat msg(resString, inLocale, status);
            // Suspend checking status until later...

            // We pass either 2 or 3 Formattable objects to msg.
            Formattable args[3];
            int32_t nargs;
            args[0].setLong(2); // # of args to follow
            args[1].setString(source);
            args[2].setString(target);
            nargs = 3;

            // Use display names for the scripts, if they exist
            UnicodeString s;
            length=(int32_t)uprv_strlen(RB_SCRIPT_DISPLAY_NAME_PREFIX);
            for (int j=1; j<=2; ++j) {
                status = U_ZERO_ERROR;
                uprv_strcpy(key, RB_SCRIPT_DISPLAY_NAME_PREFIX);
                args[j].getString(s);
                if (uprv_isInvariantUString(s.getBuffer(), s.length())) {
                    s.extract(0, sizeof(key)-length-1, key+length, (int32_t)sizeof(key)-length-1, US_INV);

                    resString = bundle.getStringEx(key, status);

                    if (U_SUCCESS(status)) {
                        args[j] = resString;
                    }
                }
            }

            status = U_ZERO_ERROR;
            FieldPosition pos; // ignored by msg
            msg.format(args, nargs, result, pos, status);
            if (U_SUCCESS(status)) {
                result.append(variant);
                return result;
            }
        }
#endif
    }

    // We should not reach this point unless there is something
    // wrong with the build or the RB_DISPLAY_NAME_PATTERN has
    // been deleted from the root RB_LOCALE_ELEMENTS resource.
    result = ID;
    return result;
}
Ejemplo n.º 16
0
void 
IntlTestNumberFormat::tryIt(double aNumber)
{
    const int32_t DEPTH = 10;
    Formattable number[DEPTH];
    UnicodeString string[DEPTH];

    int32_t numberMatch = 0;
    int32_t stringMatch = 0;
    UnicodeString errMsg;
    int32_t i;
    for (i=0; i<DEPTH; ++i)
    {
        errMsg.truncate(0); // if non-empty, we failed this iteration
        UErrorCode status = U_ZERO_ERROR;
        string[i] = "(n/a)"; // "format was never done" value
        if (i == 0) {
            number[i].setDouble(aNumber);
        } else {
            fFormat->parse(string[i-1], number[i], status);
            if (U_FAILURE(status)) {
                number[i].setDouble(1234.5); // "parse failed" value
                errMsg = "**** FAIL: Parse of " + prettify(string[i-1]) + " failed.";
                --i; // don't show empty last line: "1234.5 F> (n/a) P>"
                break;
            }
        }
        // Convert from long to double
        if (number[i].getType() == Formattable::kLong)
            number[i].setDouble(number[i].getLong());
        else if (number[i].getType() == Formattable::kInt64)
            number[i].setDouble((double)number[i].getInt64());
        else if (number[i].getType() != Formattable::kDouble)
        {
            errMsg = ("**** FAIL: Parse of " + prettify(string[i-1])
                + " returned non-numeric Formattable, type " + UnicodeString(formattableTypeName(number[i].getType()))
                + ", Locale=" + UnicodeString(fLocale.getName())
                + ", longValue=" + number[i].getLong());
            break;
        }
        string[i].truncate(0);
        fFormat->format(number[i].getDouble(), string[i]);
        if (i > 0)
        {
            if (numberMatch == 0 && number[i] == number[i-1])
                numberMatch = i;
            else if (numberMatch > 0 && number[i] != number[i-1])
            {
                errMsg = ("**** FAIL: Numeric mismatch after match.");
                break;
            }
            if (stringMatch == 0 && string[i] == string[i-1])
                stringMatch = i;
            else if (stringMatch > 0 && string[i] != string[i-1])
            {
                errMsg = ("**** FAIL: String mismatch after match.");
                break;
            }
        }
        if (numberMatch > 0 && stringMatch > 0)
            break;
    }
    if (i == DEPTH)
        --i;

    if (stringMatch > 2 || numberMatch > 2)
    {
        errMsg = ("**** FAIL: No string and/or number match within 2 iterations.");
    }

    if (errMsg.length() != 0)
    {
        for (int32_t k=0; k<=i; ++k)
        {
            logln((UnicodeString)"" + k + ": " + number[k].getDouble() + " F> " +
                  prettify(string[k]) + " P> ");
        }
        errln(errMsg);
    }
}