/** * Parse an ID into pieces. Take IDs of the form T, T/V, S-T, * S-T/V, or S/V-T. If the source is missing, return a source of * ANY. * @param id the id string, in any of several forms * @return an array of 4 strings: source, target, variant, and * isSourcePresent. If the source is not present, ANY will be * given as the source, and isSourcePresent will be NULL. Otherwise * isSourcePresent will be non-NULL. The target may be empty if the * id is not well-formed. The variant may be empty. */ void TransliteratorIDParser::IDtoSTV(const UnicodeString & id, UnicodeString & source, UnicodeString & target, UnicodeString & variant, UBool & isSourcePresent) { source = ANY; target.truncate(0); variant.truncate(0); int32_t sep = id.indexOf(TARGET_SEP); int32_t var = id.indexOf(VARIANT_SEP); if (var < 0) { var = id.length(); } isSourcePresent = FALSE; if (sep < 0) { // Form: T/V or T (or /V) id.extractBetween(0, var, target); id.extractBetween(var, id.length(), variant); } else if (sep < var) { // Form: S-T/V or S-T (or -T/V or -T) if (sep > 0) { id.extractBetween(0, sep, source); isSourcePresent = TRUE; } id.extractBetween(++sep, var, target); id.extractBetween(var, id.length(), variant); } else { // Form: (S/V-T or /V-T) if (var > 0) { id.extractBetween(0, var, source); isSourcePresent = TRUE; } id.extractBetween(var, sep++, variant); id.extractBetween(sep, id.length(), target); } if (variant.length() > 0) { variant.remove(0, 1); } }
/** * Do a normalization using the iterative API in the given direction. * @param dir either +1 or -1 */ void NormalizerConformanceTest::iterativeNorm(const UnicodeString& str, UNormalizationMode mode, int32_t options, UnicodeString& result, int8_t dir) { UErrorCode status = U_ZERO_ERROR; normalizer.setText(str, status); normalizer.setMode(mode); normalizer.setOption(-1, 0); // reset all options normalizer.setOption(options, 1); // set desired options result.truncate(0); if (U_FAILURE(status)) { return; } UChar32 ch; if (dir > 0) { for (ch = normalizer.first(); ch != Normalizer::DONE; ch = normalizer.next()) { result.append(ch); } } else { for (ch = normalizer.last(); ch != Normalizer::DONE; ch = normalizer.previous()) { result.insert(0, ch); } } }
/** * Parse a Unicode identifier from the given string at the given * position. Return the identifier, or an empty string if there * is no identifier. * @param str the string to parse * @param pos INPUT-OUPUT parameter. On INPUT, pos is the * first character to examine. It must be less than str.length(), * and it must not point to a whitespace character. That is, must * have pos < str.length(). On * OUTPUT, the position after the last parsed character. * @return the Unicode identifier, or an empty string if there is * no valid identifier at pos. */ UnicodeString ICU_Utility::parseUnicodeIdentifier(const UnicodeString& str, int32_t& pos) { // assert(pos < str.length()); UnicodeString buf; int p = pos; while (p < str.length()) { UChar32 ch = str.char32At(p); if (buf.length() == 0) { if (u_isIDStart(ch)) { buf.append(ch); } else { buf.truncate(0); return buf; } } else { if (u_isIDPart(ch)) { buf.append(ch); } else { break; } } p += U16_LENGTH(ch); } pos = p; return buf; }
// fixQuotes unescapes single quotes. Don''t -> Don't. Letter 'j' -> Letter j. // Modifies s in place. static void fixQuotes(UnicodeString& s) { QuoteState state = OUTSIDE; int32_t len = s.length(); int32_t dest = 0; for (int32_t i = 0; i < len; ++i) { UChar ch = s.charAt(i); if (ch == u_apos) { if (state == INSIDE_EMPTY) { s.setCharAt(dest, ch); ++dest; } } else { s.setCharAt(dest, ch); ++dest; } // Update state switch (state) { case OUTSIDE: state = ch == u_apos ? INSIDE_EMPTY : OUTSIDE; break; case INSIDE_EMPTY: case INSIDE_FULL: state = ch == u_apos ? OUTSIDE : INSIDE_FULL; break; default: break; } } s.truncate(dest); }
void TransliteratorSpec::setupNext() { isNextLocale = FALSE; if (isSpecLocale) { nextSpec = spec; int32_t i = nextSpec.lastIndexOf(LOCALE_SEP); // If i == 0 then we have _FOO, so we fall through // to the scriptName. if (i > 0) { nextSpec.truncate(i); isNextLocale = TRUE; } else { nextSpec = scriptName; // scriptName may be empty } } else { // spec is a script, so we are at the end nextSpec.truncate(0); } }
/** * Given source, target, and variant strings, concatenate them into a * full ID. If the source is empty, then "Any" will be used for the * source, so the ID will always be of the form s-t/v or s-t. */ void TransliteratorIDParser::STVtoID(const UnicodeString& source, const UnicodeString& target, const UnicodeString& variant, UnicodeString& id) { id = source; if (id.length() == 0) { id.setTo(ANY, 3); } id.append(TARGET_SEP).append(target); if (variant.length() != 0) { id.append(VARIANT_SEP).append(variant); } // NUL-terminate the ID string for getTerminatedBuffer. // This prevents valgrind and Purify warnings. id.append((UChar)0); id.truncate(id.length()-1); }
// // createElement // We've just matched an element start tag. Create and fill in a UXMLElement object // for it. // UXMLElement * UXMLParser::createElement(RegexMatcher &mEl, UErrorCode &status) { // First capture group is the element's name. UXMLElement *el = new UXMLElement(this, intern(mEl.group(1, status), status), status); // Scan for attributes. int32_t pos = mEl.end(1, status); // The position after the end of the tag name while (mAttrValue.lookingAt(pos, status)) { // loop runs once per attribute on this element. UnicodeString attName = mAttrValue.group(1, status); UnicodeString attValue = mAttrValue.group(2, status); // Trim the quotes from the att value. These are left over from the original regex // that parsed the attribue, which couldn't conveniently strip them. attValue.remove(0,1); // one char from the beginning attValue.truncate(attValue.length()-1); // and one from the end. // XML Attribue value normalization. // This is one of the really screwy parts of the XML spec. // See http://www.w3.org/TR/2004/REC-xml11-20040204/#AVNormalize // Note that non-validating parsers must treat all entities as type CDATA // which simplifies things some. // Att normalization step 1: normalize any newlines in the attribute value mNewLineNormalizer.reset(attValue); attValue = mNewLineNormalizer.replaceAll(fOneLF, status); // Next change all xml white space chars to plain \u0020 spaces. mAttrNormalizer.reset(attValue); UnicodeString oneSpace((UChar)0x0020); attValue = mAttrNormalizer.replaceAll(oneSpace, status); // Replace character entities. replaceCharRefs(attValue, status); // Save the attribute name and value in our document structure. el->fAttNames.addElement((void *)intern(attName, status), status); el->fAttValues.addElement(attValue.clone(), status); pos = mAttrValue.end(2, status); } fPos = mEl.end(0, status); return el; }
/** * Helper for TestPatterns() */ void TestChoiceFormat::_testPattern(const char* pattern, UBool isValid, double v1, const char* str1, double v2, const char* str2, double v3, const char* str3) { UErrorCode ec = U_ZERO_ERROR; ChoiceFormat fmt(pattern, ec); if (!isValid) { if (U_FAILURE(ec)) { logln((UnicodeString)"Ok: " + pattern + " failed"); } else { logln((UnicodeString)"FAIL: " + pattern + " accepted"); } return; } if (U_FAILURE(ec)) { errln((UnicodeString)"FAIL: ChoiceFormat(" + pattern + ") failed"); return; } else { logln((UnicodeString)"Ok: Pattern: " + pattern); } UnicodeString out; logln((UnicodeString)" toPattern: " + fmt.toPattern(out)); double v[] = {v1, v2, v3}; const char* str[] = {str1, str2, str3}; for (int32_t i=0; i<3; ++i) { out.truncate(0); fmt.format(v[i], out); if (out == str[i]) { logln((UnicodeString)"Ok: " + v[i] + " => " + out); } else { errln((UnicodeString)"FAIL: " + v[i] + " => " + out + ", expected " + str[i]); } } }
/** * Attempt to find an entry in a single resource bundle. This is * a one-sided lookup. findInStaticStore() performs up to two such * lookups, one for the source, and one for the target. * * Do not perform fallback. Return 0 on failure. * * On success, create a new Entry object, populate it, and return it. * The caller owns the returned object. */ TransliteratorEntry* TransliteratorRegistry::findInBundle(const TransliteratorSpec& specToOpen, const TransliteratorSpec& specToFind, const UnicodeString& variant, UTransDirection direction) { UnicodeString utag; UnicodeString resStr; int32_t pass; for (pass=0; pass<2; ++pass) { utag.truncate(0); // First try either TransliteratorTo_xxx or // TransliterateFrom_xxx, then try the bidirectional // Transliterate_xxx. This precedence order is arbitrary // but must be consistent and documented. if (pass == 0) { utag.append(direction == UTRANS_FORWARD ? TRANSLITERATE_TO : TRANSLITERATE_FROM, -1); } else { utag.append(TRANSLITERATE, -1); } UnicodeString s(specToFind.get()); utag.append(s.toUpper("")); UErrorCode status = U_ZERO_ERROR; ResourceBundle subres(specToOpen.getBundle().get( CharString().appendInvariantChars(utag, status).data(), status)); if (U_FAILURE(status) || status == U_USING_DEFAULT_WARNING) { continue; } s.truncate(0); if (specToOpen.get() != LocaleUtility::initNameFromLocale(subres.getLocale(), s)) { continue; } if (variant.length() != 0) { status = U_ZERO_ERROR; resStr = subres.getStringEx( CharString().appendInvariantChars(variant, status).data(), status); if (U_SUCCESS(status)) { // Exit loop successfully break; } } else { // Variant is empty, which means match the first variant listed. status = U_ZERO_ERROR; resStr = subres.getStringEx(1, status); if (U_SUCCESS(status)) { // Exit loop successfully break; } } } if (pass==2) { // Failed return NULL; } // We have succeeded in loading a string from the locale // resources. Create a new registry entry to hold it and return it. TransliteratorEntry *entry = new TransliteratorEntry(); if (entry != 0) { // The direction is always forward for the // TransliterateTo_xxx and TransliterateFrom_xxx // items; those are unidirectional forward rules. // For the bidirectional Transliterate_xxx items, // the direction is the value passed in to this // function. int32_t dir = (pass == 0) ? UTRANS_FORWARD : direction; entry->entryType = TransliteratorEntry::LOCALE_RULES; entry->stringArg = resStr; entry->intArg = dir; } return entry; }
U_CDECL_END /** * Parse a compound ID, consisting of an optional forward global * filter, a separator, one or more single IDs delimited by * separators, an an optional reverse global filter. The * separator is a semicolon. The global filters are UnicodeSet * patterns. The reverse global filter must be enclosed in * parentheses. * @param id the pattern the parse * @param dir the direction. * @param canonID OUTPUT parameter that receives the canonical ID, * consisting of canonical IDs for all elements, as returned by * parseSingleID(), separated by semicolons. Previous contents * are discarded. * @param list OUTPUT parameter that receives a list of SingleID * objects representing the parsed IDs. Previous contents are * discarded. * @param globalFilter OUTPUT parameter that receives a pointer to * a newly created global filter for this ID in this direction, or * NULL if there is none. * @return TRUE if the parse succeeds, that is, if the entire * id is consumed without syntax error. */ UBool TransliteratorIDParser::parseCompoundID(const UnicodeString & id, int32_t dir, UnicodeString & canonID, UVector & list, UnicodeSet *& globalFilter) { UErrorCode ec = U_ZERO_ERROR; int32_t i; int32_t pos = 0; int32_t withParens = 1; list.removeAllElements(); UnicodeSet * filter; globalFilter = NULL; canonID.truncate(0); // Parse leading global filter, if any withParens = 0; // parens disallowed filter = parseGlobalFilter(id, pos, dir, withParens, &canonID); if (filter != NULL) { if (!ICU_Utility::parseChar(id, pos, ID_DELIM)) { // Not a global filter; backup and resume canonID.truncate(0); pos = 0; } if (dir == FORWARD) { globalFilter = filter; } else { delete filter; } filter = NULL; } UBool sawDelimiter = TRUE; for (;;) { SingleID * single = parseSingleID(id, pos, dir, ec); if (single == NULL) { break; } if (dir == FORWARD) { list.addElement(single, ec); } else { list.insertElementAt(single, 0, ec); } if (U_FAILURE(ec)) { goto FAIL; } if (!ICU_Utility::parseChar(id, pos, ID_DELIM)) { sawDelimiter = FALSE; break; } } if (list.size() == 0) { goto FAIL; } // Construct canonical ID for (i = 0; i < list.size(); ++i) { SingleID * single = (SingleID *) list.elementAt(i); canonID.append(single->canonID); if (i != (list.size() - 1)) { canonID.append(ID_DELIM); } } // Parse trailing global filter, if any, and only if we saw // a trailing delimiter after the IDs. if (sawDelimiter) { withParens = 1; // parens required filter = parseGlobalFilter(id, pos, dir, withParens, &canonID); if (filter != NULL) { // Don't require trailing ';', but parse it if present ICU_Utility::parseChar(id, pos, ID_DELIM); if (dir == REVERSE) { globalFilter = filter; } else { delete filter; } filter = NULL; } } // Trailing unparsed text is a syntax error ICU_Utility::skipWhitespace(id, pos, TRUE); if (pos != id.length()) { goto FAIL; } return TRUE; FAIL: UObjectDeleter * save = list.setDeleter(_deleteSingleID); list.removeAllElements(); list.setDeleter(save); delete globalFilter; globalFilter = NULL; return FALSE; }
UXMLElement * UXMLParser::parseFile(const char *filename, UErrorCode &errorCode) { char bytes[4096], charsetBuffer[100]; FileStream *f; const char *charset, *pb; UnicodeString src; UConverter *cnv; UChar *buffer, *pu; int32_t fileLength, bytesLength, length, capacity; UBool flush; if(U_FAILURE(errorCode)) { return NULL; } f=T_FileStream_open(filename, "rb"); if(f==NULL) { errorCode=U_FILE_ACCESS_ERROR; return NULL; } bytesLength=T_FileStream_read(f, bytes, (int32_t)sizeof(bytes)); if(bytesLength<(int32_t)sizeof(bytes)) { // we have already read the entire file fileLength=bytesLength; } else { // get the file length fileLength=T_FileStream_size(f); } /* * get the charset: * 1. Unicode signature * 2. treat as ISO-8859-1 and read XML encoding="charser" * 3. default to UTF-8 */ charset=ucnv_detectUnicodeSignature(bytes, bytesLength, NULL, &errorCode); if(U_SUCCESS(errorCode) && charset!=NULL) { // open converter according to Unicode signature cnv=ucnv_open(charset, &errorCode); } else { // read as Latin-1 and parse the XML declaration and encoding cnv=ucnv_open("ISO-8859-1", &errorCode); if(U_FAILURE(errorCode)) { // unexpected error opening Latin-1 converter goto exit; } buffer=src.getBuffer(bytesLength); if(buffer==NULL) { // unexpected failure to reserve some string capacity errorCode=U_MEMORY_ALLOCATION_ERROR; goto exit; } pb=bytes; pu=buffer; ucnv_toUnicode( cnv, &pu, buffer+src.getCapacity(), &pb, bytes+bytesLength, NULL, TRUE, &errorCode); src.releaseBuffer(U_SUCCESS(errorCode) ? (int32_t)(pu-buffer) : 0); ucnv_close(cnv); cnv=NULL; if(U_FAILURE(errorCode)) { // unexpected error in conversion from Latin-1 src.remove(); goto exit; } // parse XML declaration if(mXMLDecl.reset(src).lookingAt(0, errorCode)) { int32_t declEnd=mXMLDecl.end(errorCode); // go beyond <?xml int32_t pos=src.indexOf((UChar)x_l)+1; mAttrValue.reset(src); while(pos<declEnd && mAttrValue.lookingAt(pos, errorCode)) { // loop runs once per attribute on this element. UnicodeString attName = mAttrValue.group(1, errorCode); UnicodeString attValue = mAttrValue.group(2, errorCode); // Trim the quotes from the att value. These are left over from the original regex // that parsed the attribue, which couldn't conveniently strip them. attValue.remove(0,1); // one char from the beginning attValue.truncate(attValue.length()-1); // and one from the end. if(attName==UNICODE_STRING("encoding", 8)) { length=attValue.extract(0, 0x7fffffff, charsetBuffer, (int32_t)sizeof(charsetBuffer)); charset=charsetBuffer; break; } pos = mAttrValue.end(2, errorCode); } if(charset==NULL) { // default to UTF-8 charset="UTF-8"; } cnv=ucnv_open(charset, &errorCode); } } if(U_FAILURE(errorCode)) { // unable to open the converter goto exit; } // convert the file contents capacity=fileLength; // estimated capacity src.getBuffer(capacity); src.releaseBuffer(0); // zero length flush=FALSE; for(;;) { // convert contents of bytes[bytesLength] pb=bytes; for(;;) { length=src.length(); buffer=src.getBuffer(capacity); if(buffer==NULL) { // unexpected failure to reserve some string capacity errorCode=U_MEMORY_ALLOCATION_ERROR; goto exit; } pu=buffer+length; ucnv_toUnicode( cnv, &pu, buffer+src.getCapacity(), &pb, bytes+bytesLength, NULL, FALSE, &errorCode); src.releaseBuffer(U_SUCCESS(errorCode) ? (int32_t)(pu-buffer) : 0); if(errorCode==U_BUFFER_OVERFLOW_ERROR) { errorCode=U_ZERO_ERROR; capacity=(3*src.getCapacity())/2; // increase capacity by 50% } else { break; } } if(U_FAILURE(errorCode)) { break; // conversion error } if(flush) { break; // completely converted the file } // read next block bytesLength=T_FileStream_read(f, bytes, (int32_t)sizeof(bytes)); if(bytesLength==0) { // reached end of file, convert once more to flush the converter flush=TRUE; } }; exit: ucnv_close(cnv); T_FileStream_close(f); if(U_SUCCESS(errorCode)) { return parse(src, errorCode); } else { return NULL; } }
/** * Test new closure API */ void TestChoiceFormat::TestClosures(void) { // Construct open, half-open, half-open (the other way), and closed // intervals. Do this both using arrays and using a pattern. // 'fmt1' is created using arrays UBool T = TRUE, F = FALSE; // 0: ,1) // 1: [1,2] // 2: (2,3] // 3: (3,4) // 4: [4,5) // 5: [5, double limits[] = { 0, 1, 2, 3, 4, 5 }; UBool closures[] = { F, F, T, T, F, F }; UnicodeString fmts[] = { ",1)", "[1,2]", "(2,3]", "(3,4)", "[4,5)", "[5," }; ChoiceFormat fmt1(limits, closures, fmts, 6); // 'fmt2' is created using a pattern; it should be equivalent UErrorCode status = U_ZERO_ERROR; const char* PAT = "0#,1)|1#[1,2]|2<(2,3]|3<(3,4)|4#[4,5)|5#[5,"; ChoiceFormat fmt2(PAT, status); if (U_FAILURE(status)) { errln("FAIL: ChoiceFormat constructor failed"); return; } // Check the patterns UnicodeString str; fmt1.toPattern(str); if (str == PAT) { logln("Ok: " + str); } else { errln("FAIL: " + str + ", expected " + PAT); } str.truncate(0); // Check equality if (fmt1 != fmt2) { errln("FAIL: fmt1 != fmt2"); } #if 0 // ICU 4.8 deprecates and disables the ChoiceFormat getters. int32_t i; int32_t count2 = 0; const double *limits2 = fmt2.getLimits(count2); const UBool *closures2 = fmt2.getClosures(count2); if((count2 != 6) || !limits2 || !closures2) { errln("FAIL: couldn't get limits or closures"); } else { for(i=0;i<count2;i++) { logln("#%d/%d: limit %g closed %s\n", i, count2, limits2[i], closures2[i] ?"T":"F"); if(limits2[i] != limits[i]) { errln("FAIL: limit #%d = %g, should be %g\n", i, limits2[i], limits[i]); } if((closures2[i]!=0) != (closures[i]!=0)) { errln("FAIL: closure #%d = %s, should be %s\n", i, closures2[i]?"T":"F", closures[i]?"T":"F"); } } } #endif // Now test both format objects UnicodeString exp[] = { /*-0.5 => */ ",1)", /* 0.0 => */ ",1)", /* 0.5 => */ ",1)", /* 1.0 => */ "[1,2]", /* 1.5 => */ "[1,2]", /* 2.0 => */ "[1,2]", /* 2.5 => */ "(2,3]", /* 3.0 => */ "(2,3]", /* 3.5 => */ "(3,4)", /* 4.0 => */ "[4,5)", /* 4.5 => */ "[4,5)", /* 5.0 => */ "[5,", /* 5.5 => */ "[5," }; // Each format object should behave exactly the same ChoiceFormat* FMT[] = { &fmt1, &fmt2 }; for (int32_t pass=0; pass<2; ++pass) { int32_t j=0; for (int32_t ix=-5; ix<=55; ix+=5) { double x = ix / 10.0; // -0.5 to 5.5 step +0.5 FMT[pass]->format(x, str); if (str == exp[j]) { logln((UnicodeString)"Ok: " + x + " => " + str); } else { errln((UnicodeString)"FAIL: " + x + " => " + str + ", expected " + exp[j]); } str.truncate(0); ++j; } } }
void ChoiceFormat::applyPattern(const UnicodeString& pattern, UParseError& parseError, UErrorCode& status) { if (U_FAILURE(status)) { return; } // Clear error struct parseError.offset = -1; parseError.preContext[0] = parseError.postContext[0] = (UChar)0; // Perform 2 passes. The first computes the number of limits in // this pattern (fCount), which is 1 more than the number of // literal VERTICAL_BAR characters. int32_t count = 1; int32_t i; for (i=0; i<pattern.length(); ++i) { UChar c = pattern[i]; if (c == SINGLE_QUOTE) { // Skip over the entire quote, including embedded // contiguous pairs of SINGLE_QUOTE. for (;;) { do { ++i; } while (i<pattern.length() && pattern[i] != SINGLE_QUOTE); if ((i+1)<pattern.length() && pattern[i+1] == SINGLE_QUOTE) { // SINGLE_QUOTE pair; skip over it ++i; } else { break; } } } else if (c == VERTICAL_BAR) { ++count; } } // Allocate the required storage. double *newLimits = (double*) uprv_malloc( sizeof(double) * count); /* test for NULL */ if (newLimits == 0) { status = U_MEMORY_ALLOCATION_ERROR; return; } UBool *newClosures = (UBool*) uprv_malloc( sizeof(UBool) * count); /* test for NULL */ if (newClosures == 0) { status = U_MEMORY_ALLOCATION_ERROR; uprv_free(newLimits); return; } UnicodeString *newFormats = new UnicodeString[count]; /* test for NULL */ if (newFormats == 0) { status = U_MEMORY_ALLOCATION_ERROR; uprv_free(newLimits); uprv_free(newClosures); return; } // Perform the second pass int32_t k = 0; // index into newXxx[] arrays UnicodeString buf; // scratch buffer UBool inQuote = FALSE; UBool inNumber = TRUE; // TRUE before < or #, FALSE after for (i=0; i<pattern.length(); ++i) { UChar c = pattern[i]; if (c == SINGLE_QUOTE) { // Check for SINGLE_QUOTE pair indicating a literal quote if ((i+1) < pattern.length() && pattern[i+1] == SINGLE_QUOTE) { buf += SINGLE_QUOTE; ++i; } else { inQuote = !inQuote; } } else if (inQuote) { buf += c; } else if (c == LESS_THAN || c == LESS_EQUAL || c == LESS_EQUAL2) { if (!inNumber || buf.length() == 0) { goto error; } inNumber = FALSE; double limit; buf.trim(); if (!buf.compare(gPositiveInfinity, POSITIVE_INF_STRLEN)) { limit = uprv_getInfinity(); } else if (!buf.compare(gNegativeInfinity, NEGATIVE_INF_STRLEN)) { limit = -uprv_getInfinity(); } else { limit = stod(buf); } if (k == count) { // This shouldn't happen. If it does, it means that // the count determined in the first pass did not // match the number of elements found in the second // pass. goto error; } newLimits[k] = limit; newClosures[k] = (c == LESS_THAN); if (k > 0 && limit <= newLimits[k-1]) { // Each limit must be strictly > than the previous // limit. One exception: Two subsequent limits may be // == if the first closure is FALSE and the second // closure is TRUE. This places the limit value in // the second interval. if (!(limit == newLimits[k-1] && !newClosures[k-1] && newClosures[k])) { goto error; } } buf.truncate(0); } else if (c == VERTICAL_BAR) { if (inNumber) { goto error; } inNumber = TRUE; newFormats[k] = buf; ++k; buf.truncate(0); } else { buf += c; } } if (k != (count-1) || inNumber || inQuote) { goto error; } newFormats[k] = buf; // Don't modify this object until the parse succeeds uprv_free(fChoiceLimits); uprv_free(fClosures); delete[] fChoiceFormats; fCount = count; fChoiceLimits = newLimits; fClosures = newClosures; fChoiceFormats = newFormats; return; error: status = U_ILLEGAL_ARGUMENT_ERROR; syntaxError(pattern,i,parseError); uprv_free(newLimits); uprv_free(newClosures); delete[] newFormats; return; }
int32_t StringReplacer::replace(Replaceable& text, int32_t start, int32_t limit, int32_t& cursor) { int32_t outLen; int32_t newStart = 0; // NOTE: It should be possible to _always_ run the complex // processing code; just slower. If not, then there is a bug // in the complex processing code. // Simple (no nested replacers) Processing Code : if (!isComplex) { text.handleReplaceBetween(start, limit, output); outLen = output.length(); // Setup default cursor position (for cursorPos within output) newStart = cursorPos; } // Complex (nested replacers) Processing Code : else { /* When there are segments to be copied, use the Replaceable.copy() * API in order to retain out-of-band data. Copy everything to the * end of the string, then copy them back over the key. This preserves * the integrity of indices into the key and surrounding context while * generating the output text. */ UnicodeString buf; int32_t oOutput; // offset into 'output' isComplex = FALSE; // The temporary buffer starts at tempStart, and extends // to destLimit. The start of the buffer has a single // character from before the key. This provides style // data when addition characters are filled into the // temporary buffer. If there is nothing to the left, use // the non-character U+FFFF, which Replaceable subclasses // should treat specially as a "no-style character." // destStart points to the point after the style context // character, so it is tempStart+1 or tempStart+2. int32_t tempStart = text.length(); // start of temp buffer int32_t destStart = tempStart; // copy new text to here if (start > 0) { int32_t len = UTF_CHAR_LENGTH(text.char32At(start-1)); text.copy(start-len, start, tempStart); destStart += len; } else { UnicodeString str((UChar) 0xFFFF); text.handleReplaceBetween(tempStart, tempStart, str); destStart++; } int32_t destLimit = destStart; for (oOutput=0; oOutput<output.length(); ) { if (oOutput == cursorPos) { // Record the position of the cursor newStart = destLimit - destStart; // relative to start } UChar32 c = output.char32At(oOutput); UnicodeReplacer* r = data->lookupReplacer(c); if (r == NULL) { // Accumulate straight (non-segment) text. buf.append(c); } else { isComplex = TRUE; // Insert any accumulated straight text. if (buf.length() > 0) { text.handleReplaceBetween(destLimit, destLimit, buf); destLimit += buf.length(); buf.truncate(0); } // Delegate output generation to replacer object int32_t len = r->replace(text, destLimit, destLimit, cursor); destLimit += len; } oOutput += UTF_CHAR_LENGTH(c); } // Insert any accumulated straight text. if (buf.length() > 0) { text.handleReplaceBetween(destLimit, destLimit, buf); destLimit += buf.length(); } if (oOutput == cursorPos) { // Record the position of the cursor newStart = destLimit - destStart; // relative to start } outLen = destLimit - destStart; // Copy new text to start, and delete it text.copy(destStart, destLimit, start); text.handleReplaceBetween(tempStart + outLen, destLimit + outLen, EMPTY); // Delete the old text (the key) text.handleReplaceBetween(start + outLen, limit + outLen, EMPTY); } if (hasCursor) { // Adjust the cursor for positions outside the key. These // refer to code points rather than code units. If cursorPos // is within the output string, then use newStart, which has // already been set above. if (cursorPos < 0) { newStart = start; int32_t n = cursorPos; // Outside the output string, cursorPos counts code points while (n < 0 && newStart > 0) { newStart -= UTF_CHAR_LENGTH(text.char32At(newStart-1)); ++n; } newStart += n; } else if (cursorPos > output.length()) { newStart = start + outLen; int32_t n = cursorPos - output.length(); // Outside the output string, cursorPos counts code points while (n > 0 && newStart < text.length()) { newStart += UTF_CHAR_LENGTH(text.char32At(newStart)); --n; } newStart += n; } else { // Cursor is within output string. It has been set up above // to be relative to start. newStart += start; } cursor = newStart; } return outLen; }
UnicodeString& U_EXPORT2 Transliterator::getDisplayName(const UnicodeString& id, const Locale& inLocale, UnicodeString& result) { UErrorCode status = U_ZERO_ERROR; ResourceBundle bundle(U_ICUDATA_TRANSLIT, inLocale, status); // Suspend checking status until later... result.truncate(0); // Normalize the ID UnicodeString source, target, variant; UBool sawSource; TransliteratorIDParser::IDtoSTV(id, source, target, variant, sawSource); if (target.length() < 1) { // No target; malformed id return result; } if (variant.length() > 0) { // Change "Foo" to "/Foo" variant.insert(0, VARIANT_SEP); } UnicodeString ID(source); ID.append(TARGET_SEP).append(target).append(variant); // build the char* key if (uprv_isInvariantUString(ID.getBuffer(), ID.length())) { char key[200]; uprv_strcpy(key, RB_DISPLAY_NAME_PREFIX); int32_t length=(int32_t)uprv_strlen(RB_DISPLAY_NAME_PREFIX); ID.extract(0, (int32_t)(sizeof(key)-length), key+length, (int32_t)(sizeof(key)-length), US_INV); // Try to retrieve a UnicodeString from the bundle. UnicodeString resString = bundle.getStringEx(key, status); if (U_SUCCESS(status) && resString.length() != 0) { return result = resString; // [sic] assign & return } #if !UCONFIG_NO_FORMATTING // We have failed to get a name from the locale data. This is // typical, since most transliterators will not have localized // name data. The next step is to retrieve the MessageFormat // pattern from the locale data and to use it to synthesize the // name from the ID. status = U_ZERO_ERROR; resString = bundle.getStringEx(RB_DISPLAY_NAME_PATTERN, status); if (U_SUCCESS(status) && resString.length() != 0) { MessageFormat msg(resString, inLocale, status); // Suspend checking status until later... // We pass either 2 or 3 Formattable objects to msg. Formattable args[3]; int32_t nargs; args[0].setLong(2); // # of args to follow args[1].setString(source); args[2].setString(target); nargs = 3; // Use display names for the scripts, if they exist UnicodeString s; length=(int32_t)uprv_strlen(RB_SCRIPT_DISPLAY_NAME_PREFIX); for (int j=1; j<=2; ++j) { status = U_ZERO_ERROR; uprv_strcpy(key, RB_SCRIPT_DISPLAY_NAME_PREFIX); args[j].getString(s); if (uprv_isInvariantUString(s.getBuffer(), s.length())) { s.extract(0, sizeof(key)-length-1, key+length, (int32_t)sizeof(key)-length-1, US_INV); resString = bundle.getStringEx(key, status); if (U_SUCCESS(status)) { args[j] = resString; } } } status = U_ZERO_ERROR; FieldPosition pos; // ignored by msg msg.format(args, nargs, result, pos, status); if (U_SUCCESS(status)) { result.append(variant); return result; } } #endif } // We should not reach this point unless there is something // wrong with the build or the RB_DISPLAY_NAME_PATTERN has // been deleted from the root RB_LOCALE_ELEMENTS resource. result = ID; return result; }
void IntlTestNumberFormat::tryIt(double aNumber) { const int32_t DEPTH = 10; Formattable number[DEPTH]; UnicodeString string[DEPTH]; int32_t numberMatch = 0; int32_t stringMatch = 0; UnicodeString errMsg; int32_t i; for (i=0; i<DEPTH; ++i) { errMsg.truncate(0); // if non-empty, we failed this iteration UErrorCode status = U_ZERO_ERROR; string[i] = "(n/a)"; // "format was never done" value if (i == 0) { number[i].setDouble(aNumber); } else { fFormat->parse(string[i-1], number[i], status); if (U_FAILURE(status)) { number[i].setDouble(1234.5); // "parse failed" value errMsg = "**** FAIL: Parse of " + prettify(string[i-1]) + " failed."; --i; // don't show empty last line: "1234.5 F> (n/a) P>" break; } } // Convert from long to double if (number[i].getType() == Formattable::kLong) number[i].setDouble(number[i].getLong()); else if (number[i].getType() == Formattable::kInt64) number[i].setDouble((double)number[i].getInt64()); else if (number[i].getType() != Formattable::kDouble) { errMsg = ("**** FAIL: Parse of " + prettify(string[i-1]) + " returned non-numeric Formattable, type " + UnicodeString(formattableTypeName(number[i].getType())) + ", Locale=" + UnicodeString(fLocale.getName()) + ", longValue=" + number[i].getLong()); break; } string[i].truncate(0); fFormat->format(number[i].getDouble(), string[i]); if (i > 0) { if (numberMatch == 0 && number[i] == number[i-1]) numberMatch = i; else if (numberMatch > 0 && number[i] != number[i-1]) { errMsg = ("**** FAIL: Numeric mismatch after match."); break; } if (stringMatch == 0 && string[i] == string[i-1]) stringMatch = i; else if (stringMatch > 0 && string[i] != string[i-1]) { errMsg = ("**** FAIL: String mismatch after match."); break; } } if (numberMatch > 0 && stringMatch > 0) break; } if (i == DEPTH) --i; if (stringMatch > 2 || numberMatch > 2) { errMsg = ("**** FAIL: No string and/or number match within 2 iterations."); } if (errMsg.length() != 0) { for (int32_t k=0; k<=i; ++k) { logln((UnicodeString)"" + k + ": " + number[k].getDouble() + " F> " + prettify(string[k]) + " P> "); } errln(errMsg); } }