예제 #1
0
 virtual void dataerrln( const UnicodeString &message ) {
     char buffer[4000];
     message.extract(0, message.length(), buffer, sizeof(buffer));
     buffer[3999] = 0; /* NULL terminate */
     log_data_err(buffer);
 }
예제 #2
0
void  U_EXPORT2 
DateIntervalFormat::adjustFieldWidth(const UnicodeString& inputSkeleton,
                 const UnicodeString& bestMatchSkeleton,
                 const UnicodeString& bestIntervalPattern,
                 int8_t differenceInfo,
                 UnicodeString& adjustedPtn) {
    adjustedPtn = bestIntervalPattern;
    int32_t inputSkeletonFieldWidth[] = 
    {
    //       A   B   C   D   E   F   G   H   I   J   K   L   M   N   O
             0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
    //   P   Q   R   S   T   U   V   W   X   Y   Z
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 0, 0,  0, 0, 0,
    //       a   b   c   d   e   f   g   h   i   j   k   l   m   n   o
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
    //   p   q   r   s   t   u   v   w   x   y   z
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
    };

    int32_t bestMatchSkeletonFieldWidth[] = 
    {
    //       A   B   C   D   E   F   G   H   I   J   K   L   M   N   O
             0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
    //   P   Q   R   S   T   U   V   W   X   Y   Z
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 0, 0,  0, 0, 0,
    //       a   b   c   d   e   f   g   h   i   j   k   l   m   n   o
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
    //   p   q   r   s   t   u   v   w   x   y   z
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
    };

    DateIntervalInfo::parseSkeleton(inputSkeleton, inputSkeletonFieldWidth);
    DateIntervalInfo::parseSkeleton(bestMatchSkeleton, bestMatchSkeletonFieldWidth);
    if ( differenceInfo == 2 ) {
        adjustedPtn.findAndReplace("v", "z");
    }

    UBool inQuote = false;
    UChar prevCh = 0;
    int32_t count = 0;

    const int8_t PATTERN_CHAR_BASE = 0x41;
    
    // loop through the pattern string character by character 
    int32_t adjustedPtnLength = adjustedPtn.length();
    int32_t i;
    for (i = 0; i < adjustedPtnLength; ++i) {
        UChar ch = adjustedPtn.charAt(i);
        if (ch != prevCh && count > 0) {
            // check the repeativeness of pattern letter
            UChar skeletonChar = prevCh;
            if ( skeletonChar ==  CAP_L ) {
                // there is no "L" (always be "M") in skeleton, 
                // but there is "L" in pattern.
                // for skeleton "M+", the pattern might be "...L..." 
                skeletonChar = CAP_M;
            }
            int32_t fieldCount = bestMatchSkeletonFieldWidth[(int)(skeletonChar - PATTERN_CHAR_BASE)];
            int32_t inputFieldCount = inputSkeletonFieldWidth[(int)(skeletonChar - PATTERN_CHAR_BASE)];
            if ( fieldCount == count && inputFieldCount > fieldCount ) {
                count = inputFieldCount - fieldCount;
                int32_t j;
                for ( j = 0; j < count; ++j ) {
                    adjustedPtn.insert(i, prevCh);    
                }                    
                i += count;
                adjustedPtnLength += count;
            }
            count = 0;
        }
        if (ch == '\'') {
            // Consecutive single quotes are a single quote literal,
            // either outside of quotes or between quotes
            if ((i+1) < adjustedPtn.length() && adjustedPtn.charAt(i+1) == '\'') {
                ++i;
            } else {
                inQuote = ! inQuote;
            }
        } 
        else if ( ! inQuote && ((ch >= 0x0061 /*'a'*/ && ch <= 0x007A /*'z'*/) 
                    || (ch >= 0x0041 /*'A'*/ && ch <= 0x005A /*'Z'*/))) {
            // ch is a date-time pattern character 
            prevCh = ch;
            ++count;
        }
    }
    if ( count > 0 ) {
        // last item
        // check the repeativeness of pattern letter
        UChar skeletonChar = prevCh;
        if ( skeletonChar == CAP_L ) {
            // there is no "L" (always be "M") in skeleton, 
            // but there is "L" in pattern.
            // for skeleton "M+", the pattern might be "...L..." 
            skeletonChar = CAP_M;
        }
        int32_t fieldCount = bestMatchSkeletonFieldWidth[(int)(skeletonChar - PATTERN_CHAR_BASE)];
        int32_t inputFieldCount = inputSkeletonFieldWidth[(int)(skeletonChar - PATTERN_CHAR_BASE)];
        if ( fieldCount == count && inputFieldCount > fieldCount ) {
            count = inputFieldCount - fieldCount;
            int32_t j;
            for ( j = 0; j < count; ++j ) {
                adjustedPtn.append(prevCh);    
            }                    
        }
    }
}
예제 #3
0
void  U_EXPORT2 
DateIntervalFormat::getDateTimeSkeleton(const UnicodeString& skeleton, 
                                        UnicodeString& dateSkeleton, 
                                        UnicodeString& normalizedDateSkeleton, 
                                        UnicodeString& timeSkeleton,
                                        UnicodeString& normalizedTimeSkeleton) {
    // dateSkeleton follows the sequence of y*M*E*d*
    // timeSkeleton follows the sequence of hm*[v|z]?
    int32_t ECount = 0;
    int32_t dCount = 0;
    int32_t MCount = 0;
    int32_t yCount = 0;
    int32_t hCount = 0;
    int32_t HCount = 0;
    int32_t mCount = 0;
    int32_t vCount = 0;
    int32_t zCount = 0;
    int32_t i;

    for (i = 0; i < skeleton.length(); ++i) {
        UChar ch = skeleton[i];
        switch ( ch ) {
          case CAP_E:
            dateSkeleton.append(ch);
            ++ECount;
            break;
          case LOW_D:
            dateSkeleton.append(ch);
            ++dCount;
            break;
          case CAP_M:
            dateSkeleton.append(ch);
            ++MCount;
            break;
          case LOW_Y:
            dateSkeleton.append(ch);
            ++yCount;
            break;
          case CAP_G:
          case CAP_Y:
          case LOW_U:
          case CAP_Q:
          case LOW_Q:
          case CAP_L:
          case LOW_L:
          case CAP_W:
          case LOW_W:
          case CAP_D:
          case CAP_F:
          case LOW_G:
          case LOW_E:
          case LOW_C:
            normalizedDateSkeleton.append(ch);
            dateSkeleton.append(ch);
            break;
          case LOW_A:
            // 'a' is implicitly handled 
            timeSkeleton.append(ch);
            break;
          case LOW_H:
            timeSkeleton.append(ch);
            ++hCount;
            break;
          case CAP_H:
            timeSkeleton.append(ch);
            ++HCount;
            break;
          case LOW_M:
            timeSkeleton.append(ch);
            ++mCount;
            break;
          case LOW_Z:
            ++zCount;
            timeSkeleton.append(ch);
            break;
          case LOW_V:
            ++vCount;
            timeSkeleton.append(ch);
            break;
          case CAP_V:
          case CAP_Z:
          case LOW_K:
          case CAP_K:
          case LOW_J:
          case LOW_S:
          case CAP_S:
          case CAP_A:
            timeSkeleton.append(ch);
            normalizedTimeSkeleton.append(ch);
            break;     
        }
    }

    /* generate normalized form for date*/
    if ( yCount != 0 ) {
        normalizedDateSkeleton.append(LOW_Y);
    }
    if ( MCount != 0 ) {
        if ( MCount < 3 ) {
            normalizedDateSkeleton.append(CAP_M);
        } else {
            int32_t i;
            for ( i = 0; i < MCount && i < MAX_M_COUNT; ++i ) {
                 normalizedDateSkeleton.append(CAP_M);
            }
        }
    }
    if ( ECount != 0 ) {
        if ( ECount <= 3 ) {
            normalizedDateSkeleton.append(CAP_E);
        } else {
            int32_t i;
            for ( i = 0; i < ECount && i < MAX_E_COUNT; ++i ) {
                 normalizedDateSkeleton.append(CAP_E);
            }
        }
    }
    if ( dCount != 0 ) {
        normalizedDateSkeleton.append(LOW_D);
    }

    /* generate normalized form for time */
    if ( HCount != 0 ) {
        normalizedTimeSkeleton.append(CAP_H);
    }
    else if ( hCount != 0 ) {
        normalizedTimeSkeleton.append(LOW_H);
    }
    if ( mCount != 0 ) {
        normalizedTimeSkeleton.append(LOW_M);
    }
    if ( zCount != 0 ) {
        normalizedTimeSkeleton.append(LOW_Z);
    }
    if ( vCount != 0 ) {
        normalizedTimeSkeleton.append(LOW_V);
    }
}
예제 #4
0
void 
IntlTestNumberFormat::tryIt(double aNumber)
{
    const int32_t DEPTH = 10;
    Formattable number[DEPTH];
    UnicodeString string[DEPTH];

    int32_t numberMatch = 0;
    int32_t stringMatch = 0;
    UnicodeString errMsg;
    int32_t i;
    for (i=0; i<DEPTH; ++i)
    {
        errMsg.truncate(0); // if non-empty, we failed this iteration
        UErrorCode status = U_ZERO_ERROR;
        string[i] = "(n/a)"; // "format was never done" value
        if (i == 0) {
            number[i].setDouble(aNumber);
        } else {
            fFormat->parse(string[i-1], number[i], status);
            if (U_FAILURE(status)) {
                number[i].setDouble(1234.5); // "parse failed" value
                errMsg = "**** FAIL: Parse of " + prettify(string[i-1]) + " failed.";
                --i; // don't show empty last line: "1234.5 F> (n/a) P>"
                break;
            }
        }
        // Convert from long to double
        if (number[i].getType() == Formattable::kLong)
            number[i].setDouble(number[i].getLong());
        else if (number[i].getType() == Formattable::kInt64)
            number[i].setDouble((double)number[i].getInt64());
        else if (number[i].getType() != Formattable::kDouble)
        {
            errMsg = ("**** FAIL: Parse of " + prettify(string[i-1])
                + " returned non-numeric Formattable, type " + UnicodeString(formattableTypeName(number[i].getType()))
                + ", Locale=" + UnicodeString(fLocale.getName())
                + ", longValue=" + number[i].getLong());
            break;
        }
        string[i].truncate(0);
        fFormat->format(number[i].getDouble(), string[i]);
        if (i > 0)
        {
            if (numberMatch == 0 && number[i] == number[i-1])
                numberMatch = i;
            else if (numberMatch > 0 && number[i] != number[i-1])
            {
                errMsg = ("**** FAIL: Numeric mismatch after match.");
                break;
            }
            if (stringMatch == 0 && string[i] == string[i-1])
                stringMatch = i;
            else if (stringMatch > 0 && string[i] != string[i-1])
            {
                errMsg = ("**** FAIL: String mismatch after match.");
                break;
            }
        }
        if (numberMatch > 0 && stringMatch > 0)
            break;
    }
    if (i == DEPTH)
        --i;

    if (stringMatch > 2 || numberMatch > 2)
    {
        errMsg = ("**** FAIL: No string and/or number match within 2 iterations.");
    }

    if (errMsg.length() != 0)
    {
        for (int32_t k=0; k<=i; ++k)
        {
            logln((UnicodeString)"" + k + ": " + number[k].getDouble() + " F> " +
                  prettify(string[k]) + " P> ");
        }
        errln(errMsg);
    }
}
예제 #5
0
파일: choicfmt.cpp 프로젝트: Botyto/Core
void
ChoiceFormat::applyPattern(const UnicodeString & pattern,
                           UParseError & parseError,
                           UErrorCode & status)
{
	if (U_FAILURE(status))
	{
		return;
	}

	// Clear error struct
	parseError.offset = -1;
	parseError.preContext[0] = parseError.postContext[0] = (UChar)0;

	// Perform 2 passes.  The first computes the number of limits in
	// this pattern (fCount), which is 1 more than the number of
	// literal VERTICAL_BAR characters.
	int32_t count = 1;
	int32_t i;
	for (i = 0; i < pattern.length(); ++i)
	{
		UChar c = pattern[i];
		if (c == SINGLE_QUOTE)
		{
			// Skip over the entire quote, including embedded
			// contiguous pairs of SINGLE_QUOTE.
			for (;;)
			{
				do
				{
					++i;
				}
				while (i < pattern.length() &&
				       pattern[i] != SINGLE_QUOTE);
				if ((i + 1) < pattern.length() &&
				    pattern[i + 1] == SINGLE_QUOTE)
				{
					// SINGLE_QUOTE pair; skip over it
					++i;
				}
				else
				{
					break;
				}
			}
		}
		else if (c == VERTICAL_BAR)
		{
			++count;
		}
	}

	// Allocate the required storage.
	double * newLimits = (double *) uprv_malloc(sizeof(double) * count);
	/* test for NULL */
	if (newLimits == 0)
	{
		status = U_MEMORY_ALLOCATION_ERROR;
		return;
	}
	UBool * newClosures = (UBool *) uprv_malloc(sizeof(UBool) * count);
	/* test for NULL */
	if (newClosures == 0)
	{
		status = U_MEMORY_ALLOCATION_ERROR;
		uprv_free(newLimits);
		return;
	}
	UnicodeString * newFormats = new UnicodeString[count];
	/* test for NULL */
	if (newFormats == 0)
	{
		status = U_MEMORY_ALLOCATION_ERROR;
		uprv_free(newLimits);
		uprv_free(newClosures);
		return;
	}

	// Perform the second pass
	int32_t k = 0; // index into newXxx[] arrays
	UnicodeString buf; // scratch buffer
	UBool inQuote = FALSE;
	UBool inNumber = TRUE; // TRUE before < or #, FALSE after

	for (i = 0; i < pattern.length(); ++i)
	{
		UChar c = pattern[i];
		if (c == SINGLE_QUOTE)
		{
			// Check for SINGLE_QUOTE pair indicating a literal quote
			if ((i + 1) < pattern.length() &&
			    pattern[i + 1] == SINGLE_QUOTE)
			{
				buf += SINGLE_QUOTE;
				++i;
			}
			else
			{
				inQuote = !inQuote;
			}
		}
		else if (inQuote)
		{
			buf += c;
		}
		else if (c == LESS_THAN || c == LESS_EQUAL || c == LESS_EQUAL2)
		{
			if (!inNumber || buf.length() == 0)
			{
				goto error;
			}
			inNumber = FALSE;

			double limit;
			buf.trim();
			if (!buf.compare(gPositiveInfinity, POSITIVE_INF_STRLEN))
			{
				limit = uprv_getInfinity();
			}
			else if (!buf.compare(gNegativeInfinity, NEGATIVE_INF_STRLEN))
			{
				limit = -uprv_getInfinity();
			}
			else
			{
				limit = stod(buf);
			}

			if (k == count)
			{
				// This shouldn't happen.  If it does, it means that
				// the count determined in the first pass did not
				// match the number of elements found in the second
				// pass.
				goto error;
			}
			newLimits[k] = limit;
			newClosures[k] = (c == LESS_THAN);

			if (k > 0 && limit <= newLimits[k - 1])
			{
				// Each limit must be strictly > than the previous
				// limit.  One exception: Two subsequent limits may be
				// == if the first closure is FALSE and the second
				// closure is TRUE.  This places the limit value in
				// the second interval.
				if (!(limit == newLimits[k - 1] &&
				      !newClosures[k - 1] &&
				      newClosures[k]))
				{
					goto error;
				}
			}

			buf.truncate(0);
		}
		else if (c == VERTICAL_BAR)
		{
			if (inNumber)
			{
				goto error;
			}
			inNumber = TRUE;

			newFormats[k] = buf;
			++k;
			buf.truncate(0);
		}
		else
		{
			buf += c;
		}
	}

	if (k != (count - 1) || inNumber || inQuote)
	{
		goto error;
	}
	newFormats[k] = buf;

	// Don't modify this object until the parse succeeds
	uprv_free(fChoiceLimits);
	uprv_free(fClosures);
	delete[] fChoiceFormats;
	fCount = count;
	fChoiceLimits  = newLimits;
	fClosures      = newClosures;
	fChoiceFormats = newFormats;
	return;

error:
	status = U_ILLEGAL_ARGUMENT_ERROR;
	syntaxError(pattern, i, parseError);
	uprv_free(newLimits);
	uprv_free(newClosures);
	delete[] newFormats;
	return;

}
예제 #6
0
/**
 * Return true if the given position, in the given pattern, appears
 * to be the start of a UnicodeSet pattern.
 */
UBool UnicodeSet::resemblesPattern(const UnicodeString& pattern, int32_t pos) {
    return ((pos+1) < pattern.length() &&
            pattern.charAt(pos) == (UChar)91/*[*/) ||
        resemblesPropertyPattern(pattern, pos);
}
예제 #7
0
파일: nfrs.cpp 프로젝트: MIPS/external-icu
void
NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status)
{
    // start by creating a Vector whose elements are Strings containing
    // the descriptions of the rules (one rule per element).  The rules
    // are separated by semicolons (there's no escape facility: ALL
    // semicolons are rule delimiters)

    if (U_FAILURE(status)) {
        return;
    }

    // ensure we are starting with an empty rule list
    rules.deleteAll();

    // dlf - the original code kept a separate description array for no reason,
    // so I got rid of it.  The loop was too complex so I simplified it.

    UnicodeString currentDescription;
    int32_t oldP = 0;
    while (oldP < description.length()) {
        int32_t p = description.indexOf(gSemicolon, oldP);
        if (p == -1) {
            p = description.length();
        }
        currentDescription.setTo(description, oldP, p - oldP);
        NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
        oldP = p + 1;
    }

    // for rules that didn't specify a base value, their base values
    // were initialized to 0.  Make another pass through the list and
    // set all those rules' base values.  We also remove any special
    // rules from the list and put them into their own member variables
    int64_t defaultBaseValue = 0;

    // (this isn't a for loop because we might be deleting items from
    // the vector-- we want to make sure we only increment i when
    // we _didn't_ delete aything from the vector)
    int32_t rulesSize = rules.size();
    for (int32_t i = 0; i < rulesSize; i++) {
        NFRule* rule = rules[i];
        int64_t baseValue = rule->getBaseValue();

        if (baseValue == 0) {
            // if the rule's base value is 0, fill in a default
            // base value (this will be 1 plus the preceding
            // rule's base value for regular rule sets, and the
            // same as the preceding rule's base value in fraction
            // rule sets)
            rule->setBaseValue(defaultBaseValue, status);
        }
        else {
            // if it's a regular rule that already knows its base value,
            // check to make sure the rules are in order, and update
            // the default base value for the next rule
            if (baseValue < defaultBaseValue) {
                // throw new IllegalArgumentException("Rules are not in order");
                status = U_PARSE_ERROR;
                return;
            }
            defaultBaseValue = baseValue;
        }
        if (!fIsFractionRuleSet) {
            ++defaultBaseValue;
        }
    }
}
예제 #8
0
파일: nptrans.cpp 프로젝트: winlibs/icu4c
//constructor
NamePrepTransform::NamePrepTransform(UParseError& parseError, UErrorCode& status)
: unassigned(), prohibited(), labelSeparatorSet(){
        
    mapping = NULL;
    bundle = NULL;


    const char* testDataName = IntlTest::loadTestData(status);
    
    if(U_FAILURE(status)){
        return;
    }
    
    bundle = ures_openDirect(testDataName,"idna_rules",&status);
    
    if(bundle != NULL && U_SUCCESS(status)){
        // create the mapping transliterator
        int32_t ruleLen = 0;
        const UChar* ruleUChar = ures_getStringByKey(bundle, "MapNFKC",&ruleLen, &status);
        int32_t mapRuleLen = 0;
        const UChar *mapRuleUChar = ures_getStringByKey(bundle, "MapNoNormalization", &mapRuleLen, &status);
        UnicodeString rule(mapRuleUChar, mapRuleLen);
        rule.append(ruleUChar, ruleLen);

        mapping = Transliterator::createFromRules(UnicodeString("NamePrepTransform", ""), rule,
                                                   UTRANS_FORWARD, parseError,status);
        if(U_FAILURE(status)) {
          return;
        }

        //create the unassigned set
        int32_t patternLen =0;
        const UChar* pattern = ures_getStringByKey(bundle,"UnassignedSet",&patternLen, &status);
        unassigned.applyPattern(UnicodeString(pattern, patternLen), status);

        //create prohibited set
        patternLen=0;
        pattern =  ures_getStringByKey(bundle,"ProhibitedSet",&patternLen, &status);
        UnicodeString test(pattern,patternLen);
        prohibited.applyPattern(test,status);
#ifdef NPTRANS_DEBUG
        if(U_FAILURE(status)){
            printf("Construction of Unicode set failed\n");
        }

        if(U_SUCCESS(status)){
            if(prohibited.contains((UChar) 0x644)){
                printf("The string contains 0x644 ... !!\n");
            }
            UnicodeString temp;
            prohibited.toPattern(temp,TRUE);

            for(int32_t i=0;i<temp.length();i++){
                printf("%c", (char)temp.charAt(i));
            }
            printf("\n");
        }
#endif
        
        //create label separator set
        patternLen=0;
        pattern =  ures_getStringByKey(bundle,"LabelSeparatorSet",&patternLen, &status);
        labelSeparatorSet.applyPattern(UnicodeString(pattern,patternLen),status);
    }

    if(U_SUCCESS(status) && 
        (mapping == NULL)
      ){
        status = U_MEMORY_ALLOCATION_ERROR;
        delete mapping;
        ures_close(bundle);
        mapping = NULL;
        bundle = NULL;
    }
        
}
void
NFRuleSet::parseRules(UnicodeString& description, const RuleBasedNumberFormat* owner, UErrorCode& status)
{
    // start by creating a Vector whose elements are Strings containing
    // the descriptions of the rules (one rule per element).  The rules
    // are separated by semicolons (there's no escape facility: ALL
    // semicolons are rule delimiters)

    if (U_FAILURE(status)) {
        return;
    }

    // dlf - the original code kept a separate description array for no reason,
    // so I got rid of it.  The loop was too complex so I simplified it.

    UnicodeString currentDescription;
    int32_t oldP = 0;
    while (oldP < description.length()) {
        int32_t p = description.indexOf(gSemicolon, oldP);
        if (p == -1) {
            p = description.length();
        }
        currentDescription.setTo(description, oldP, p - oldP);
        NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
        oldP = p + 1;
    }

    // for rules that didn't specify a base value, their base values
    // were initialized to 0.  Make another pass through the list and
    // set all those rules' base values.  We also remove any special
    // rules from the list and put them into their own member variables
    int64_t defaultBaseValue = 0;

    // (this isn't a for loop because we might be deleting items from
    // the vector-- we want to make sure we only increment i when
    // we _didn't_ delete aything from the vector)
    uint32_t i = 0;
    while (i < rules.size()) {
        NFRule* rule = rules[i];

        switch (rule->getType()) {
            // if the rule's base value is 0, fill in a default
            // base value (this will be 1 plus the preceding
            // rule's base value for regular rule sets, and the
            // same as the preceding rule's base value in fraction
            // rule sets)
        case NFRule::kNoBase:
            rule->setBaseValue(defaultBaseValue, status);
            if (!isFractionRuleSet()) {
                ++defaultBaseValue;
            }
            ++i;
            break;

            // if it's the negative-number rule, copy it into its own
            // data member and delete it from the list
        case NFRule::kNegativeNumberRule:
            negativeNumberRule = rules.remove(i);
            break;

            // if it's the improper fraction rule, copy it into the
            // correct element of fractionRules
        case NFRule::kImproperFractionRule:
            fractionRules[0] = rules.remove(i);
            break;

            // if it's the proper fraction rule, copy it into the
            // correct element of fractionRules
        case NFRule::kProperFractionRule:
            fractionRules[1] = rules.remove(i);
            break;

            // if it's the master rule, copy it into the
            // correct element of fractionRules
        case NFRule::kMasterRule:
            fractionRules[2] = rules.remove(i);
            break;

            // if it's a regular rule that already knows its base value,
            // check to make sure the rules are in order, and update
            // the default base value for the next rule
        default:
            if (rule->getBaseValue() < defaultBaseValue) {
                // throw new IllegalArgumentException("Rules are not in order");
                status = U_PARSE_ERROR;
                return;
            }
            defaultBaseValue = rule->getBaseValue();
            if (!isFractionRuleSet()) {
                ++defaultBaseValue;
            }
            ++i;
            break;
        }
    }
}
예제 #10
0
파일: tmutfmt.cpp 프로젝트: Akesure/jxcore
void 
TimeUnitFormat::parseObject(const UnicodeString& source, 
                            Formattable& result,
                            ParsePosition& pos) const {
    double resultNumber = -1; 
    UBool withNumberFormat = false;
    TimeUnit::UTimeUnitFields resultTimeUnit = TimeUnit::UTIMEUNIT_FIELD_COUNT;
    int32_t oldPos = pos.getIndex();
    int32_t newPos = -1;
    int32_t longestParseDistance = 0;
    UnicodeString* countOfLongestMatch = NULL;
#ifdef TMUTFMT_DEBUG
    char res[1000];
    source.extract(0, source.length(), res, "UTF-8");
    std::cout << "parse source: " << res << "\n";           
#endif
    // parse by iterating through all available patterns
    // and looking for the longest match.
    for (TimeUnit::UTimeUnitFields i = TimeUnit::UTIMEUNIT_YEAR;
         i < TimeUnit::UTIMEUNIT_FIELD_COUNT;
         i = (TimeUnit::UTimeUnitFields)(i+1)) {
        Hashtable* countToPatterns = fTimeUnitToCountToPatterns[i];
        int32_t elemPos = -1;
        const UHashElement* elem = NULL;
        while ((elem = countToPatterns->nextElement(elemPos)) != NULL){
            const UHashTok keyTok = elem->key;
            UnicodeString* count = (UnicodeString*)keyTok.pointer;
#ifdef TMUTFMT_DEBUG
            count->extract(0, count->length(), res, "UTF-8");
            std::cout << "parse plural count: " << res << "\n";           
#endif
            const UHashTok valueTok = elem->value;
            // the value is a pair of MessageFormat*
            MessageFormat** patterns = (MessageFormat**)valueTok.pointer;
            for (UTimeUnitFormatStyle style = UTMUTFMT_FULL_STYLE; style < UTMUTFMT_FORMAT_STYLE_COUNT;
                 style = (UTimeUnitFormatStyle)(style + 1)) {
                MessageFormat* pattern = patterns[style];
                pos.setErrorIndex(-1);
                pos.setIndex(oldPos);
                // see if we can parse
                Formattable parsed;
                pattern->parseObject(source, parsed, pos);
                if (pos.getErrorIndex() != -1 || pos.getIndex() == oldPos) {
                    continue;
                }
    #ifdef TMUTFMT_DEBUG
                std::cout << "parsed.getType: " << parsed.getType() << "\n";
    #endif
                double tmpNumber = 0;
                if (pattern->getArgTypeCount() != 0) {
                    // pattern with Number as beginning, such as "{0} d".
                    // check to make sure that the timeUnit is consistent
                    Formattable& temp = parsed[0];
                    if (temp.getType() == Formattable::kDouble) {
                        tmpNumber = temp.getDouble();
                    } else if (temp.getType() == Formattable::kLong) {
                        tmpNumber = temp.getLong();
                    } else {
                        continue;
                    }
                    UnicodeString select = fPluralRules->select(tmpNumber);
    #ifdef TMUTFMT_DEBUG
                    select.extract(0, select.length(), res, "UTF-8");
                    std::cout << "parse plural select count: " << res << "\n"; 
    #endif
                    if (*count != select) {
                        continue;
                    }
                }
                int32_t parseDistance = pos.getIndex() - oldPos;
                if (parseDistance > longestParseDistance) {
                    if (pattern->getArgTypeCount() != 0) {
                        resultNumber = tmpNumber;
                        withNumberFormat = true;
                    } else {
                        withNumberFormat = false;
                    }
                    resultTimeUnit = i;
                    newPos = pos.getIndex();
                    longestParseDistance = parseDistance;
                    countOfLongestMatch = count;
                }
            }
        }
    }
    /* After find the longest match, parse the number.
     * Result number could be null for the pattern without number pattern.
     * such as unit pattern in Arabic.
     * When result number is null, use plural rule to set the number.
     */
    if (withNumberFormat == false && longestParseDistance != 0) {
        // set the number using plurrual count
        if (0 == countOfLongestMatch->compare(PLURAL_COUNT_ZERO, 4)) {
            resultNumber = 0;
        } else if (0 == countOfLongestMatch->compare(PLURAL_COUNT_ONE, 3)) {
            resultNumber = 1;
        } else if (0 == countOfLongestMatch->compare(PLURAL_COUNT_TWO, 3)) {
            resultNumber = 2;
        } else {
            // should not happen.
            // TODO: how to handle?
            resultNumber = 3;
        }
    }
    if (longestParseDistance == 0) {
        pos.setIndex(oldPos);
        pos.setErrorIndex(0);
    } else {
        UErrorCode status = U_ZERO_ERROR;
        TimeUnitAmount* tmutamt = new TimeUnitAmount(resultNumber, resultTimeUnit, status);
        if (U_SUCCESS(status)) {
            result.adoptObject(tmutamt);
            pos.setIndex(newPos);
            pos.setErrorIndex(-1);
        } else {
            pos.setIndex(oldPos);
            pos.setErrorIndex(0);
        }
    }
}
예제 #11
0
void   RegexPattern::dumpOp(int32_t index) const {
    static const char * const opNames[] = {URX_OPCODE_NAMES};
    int32_t op          = fCompiledPat->elementAti(index);
    int32_t val         = URX_VAL(op);
    int32_t type        = URX_TYPE(op);
    int32_t pinnedType  = type;
    if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) {
        pinnedType = 0;
    }

    REGEX_DUMP_DEBUG_PRINTF(("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]));
    switch (type) {
    case URX_NOP:
    case URX_DOTANY:
    case URX_DOTANY_ALL:
    case URX_FAIL:
    case URX_CARET:
    case URX_DOLLAR:
    case URX_BACKSLASH_G:
    case URX_BACKSLASH_X:
    case URX_END:
    case URX_DOLLAR_M:
    case URX_CARET_M:
        // Types with no operand field of interest.
        break;

    case URX_RESERVED_OP:
    case URX_START_CAPTURE:
    case URX_END_CAPTURE:
    case URX_STATE_SAVE:
    case URX_JMP:
    case URX_JMP_SAV:
    case URX_JMP_SAV_X:
    case URX_BACKSLASH_B:
    case URX_BACKSLASH_BU:
    case URX_BACKSLASH_D:
    case URX_BACKSLASH_Z:
    case URX_STRING_LEN:
    case URX_CTR_INIT:
    case URX_CTR_INIT_NG:
    case URX_CTR_LOOP:
    case URX_CTR_LOOP_NG:
    case URX_RELOC_OPRND:
    case URX_STO_SP:
    case URX_LD_SP:
    case URX_BACKREF:
    case URX_STO_INP_LOC:
    case URX_JMPX:
    case URX_LA_START:
    case URX_LA_END:
    case URX_BACKREF_I:
    case URX_LB_START:
    case URX_LB_CONT:
    case URX_LB_END:
    case URX_LBN_CONT:
    case URX_LBN_END:
    case URX_LOOP_C:
    case URX_LOOP_DOT_I:
        // types with an integer operand field.
        REGEX_DUMP_DEBUG_PRINTF(("%d", val));
        break;

    case URX_ONECHAR:
    case URX_ONECHAR_I:
        REGEX_DUMP_DEBUG_PRINTF(("%c", val<256?val:'?'));
        break;

    case URX_STRING:
    case URX_STRING_I:
        {
            int32_t lengthOp       = fCompiledPat->elementAti(index+1);
            U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
            int32_t length = URX_VAL(lengthOp);
            int32_t i;
            for (i=val; i<val+length; i++) {
                UChar c = fLiteralText[i];
                if (c < 32 || c >= 256) {c = '.';}
                REGEX_DUMP_DEBUG_PRINTF(("%c", c));
            }
        }
        break;

    case URX_SETREF:
    case URX_LOOP_SR_I:
        {
            UnicodeString s;
            UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
            set->toPattern(s, TRUE);
            for (int32_t i=0; i<s.length(); i++) {
                REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
            }
        }
        break;

    case URX_STATIC_SETREF:
    case URX_STAT_SETREF_N:
        {
            UnicodeString s;
            if (val & URX_NEG_SET) {
                REGEX_DUMP_DEBUG_PRINTF(("NOT "));
                val &= ~URX_NEG_SET;
            }
            UnicodeSet *set = fStaticSets[val];
            set->toPattern(s, TRUE);
            for (int32_t i=0; i<s.length(); i++) {
                REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
            }
        }
        break;


    default:
        REGEX_DUMP_DEBUG_PRINTF(("??????"));
        break;
    }
    REGEX_DUMP_DEBUG_PRINTF(("\n"));
}
예제 #12
0
void grid_renderer<T>::process(text_symbolizer const& sym,
                              Feature const& feature,
                              proj_transform const& prj_trans)
{
    typedef  coord_transform2<CoordTransform,geometry_type> path_type;

    bool placement_found = false;
    text_placement_info_ptr placement_options = sym.get_placement_options()->get_placement_info();
    while (!placement_found && placement_options->next())
    {
        expression_ptr name_expr = sym.get_name();
        if (!name_expr) return;
        value_type result = boost::apply_visitor(evaluate<Feature,value_type>(feature),*name_expr);
        UnicodeString text = result.to_unicode();

        if ( sym.get_text_transform() == UPPERCASE)
        {
            text = text.toUpper();
        }
        else if ( sym.get_text_transform() == LOWERCASE)
        {
            text = text.toLower();
        }
        else if ( sym.get_text_transform() == CAPITALIZE)
        {
            text = text.toTitle(NULL);
        }

        if ( text.length() <= 0 ) continue;
        color const& fill = sym.get_fill();

        face_set_ptr faces;

        if (sym.get_fontset().size() > 0)
        {
            faces = font_manager_.get_face_set(sym.get_fontset());
        }
        else
        {
            faces = font_manager_.get_face_set(sym.get_face_name());
        }

        stroker_ptr strk = font_manager_.get_stroker();
        if (!(faces->size() > 0 && strk))
        {
            throw config_error("Unable to find specified font face '" + sym.get_face_name() + "'");
        }
        text_renderer<T> ren(pixmap_, faces, *strk);
        ren.set_pixel_size(placement_options->text_size * (scale_factor_ * (1.0/pixmap_.get_resolution())));
        ren.set_fill(fill);
        ren.set_halo_fill(sym.get_halo_fill());
        ren.set_halo_radius(sym.get_halo_radius() * scale_factor_);
        ren.set_opacity(sym.get_text_opacity());

        // /pixmap_.get_resolution() ?
        box2d<double> dims(0,0,width_,height_);
        placement_finder<label_collision_detector4> finder(detector_,dims);

        string_info info(text);

        faces->get_string_info(info);
        unsigned num_geom = feature.num_geometries();
        for (unsigned i=0; i<num_geom; ++i)
        {
            geometry_type const& geom = feature.get_geometry(i);
            if (geom.num_points() == 0) continue; // don't bother with empty geometries
            while (!placement_found && placement_options->next_position_only())
            {
                placement text_placement(info, sym, scale_factor_);
                text_placement.avoid_edges = sym.get_avoid_edges();
                if (sym.get_label_placement() == POINT_PLACEMENT ||
                        sym.get_label_placement() == INTERIOR_PLACEMENT)
                {
                    double label_x, label_y, z=0.0;
                    if (sym.get_label_placement() == POINT_PLACEMENT)
                        geom.label_position(&label_x, &label_y);
                    else
                        geom.label_interior_position(&label_x, &label_y);
                    prj_trans.backward(label_x,label_y, z);
                    t_.forward(&label_x,&label_y);

                    double angle = 0.0;
                    expression_ptr angle_expr = sym.get_orientation();
                    if (angle_expr)
                    {
                        // apply rotation
                        value_type result = boost::apply_visitor(evaluate<Feature,value_type>(feature),*angle_expr);
                        angle = result.to_double();
                    }

                    finder.find_point_placement(text_placement, placement_options,
                                                label_x, label_y,
                                                angle, sym.get_line_spacing(),
                                                sym.get_character_spacing());

                    finder.update_detector(text_placement);
                }
                else if ( geom.num_points() > 1 && sym.get_label_placement() == LINE_PLACEMENT)
                {
                    path_type path(t_,geom,prj_trans);
                    finder.find_line_placements<path_type>(text_placement, placement_options, path);
                }

                if (!text_placement.placements.size()) continue;
                placement_found = true;

                for (unsigned int ii = 0; ii < text_placement.placements.size(); ++ii)
                {
                    double x = text_placement.placements[ii].starting_x;
                    double y = text_placement.placements[ii].starting_y;
                    ren.prepare_glyphs(&text_placement.placements[ii]);
                    ren.render_id(feature.id(),x,y,2);
                }
            }
        }
    }
    if (placement_found)
        pixmap_.add_feature(feature);
}
예제 #13
0
NFSubstitution::NFSubstitution(int32_t _pos,
                               const NFRuleSet* _ruleSet,
                               const RuleBasedNumberFormat* formatter,
                               const UnicodeString& description,
                               UErrorCode& status)
                               : pos(_pos), ruleSet(NULL), numberFormat(NULL)
{
    // the description should begin and end with the same character.
    // If it doesn't that's a syntax error.  Otherwise,
    // makeSubstitution() was the only thing that needed to know
    // about these characters, so strip them off
    UnicodeString workingDescription(description);
    if (description.length() >= 2
        && description.charAt(0) == description.charAt(description.length() - 1))
    {
        workingDescription.remove(description.length() - 1, 1);
        workingDescription.remove(0, 1);
    }
    else if (description.length() != 0) {
        // throw new IllegalArgumentException("Illegal substitution syntax");
        status = U_PARSE_ERROR;
        return;
    }

    // if the description was just two paired token characters
    // (i.e., "<<" or ">>"), it uses the rule set it belongs to to
    // format its result
    if (workingDescription.length() == 0) {
        this->ruleSet = _ruleSet;
    }
    // if the description contains a rule set name, that's the rule
    // set we use to format the result: get a reference to the
    // names rule set
    else if (workingDescription.charAt(0) == gPercent) {
        this->ruleSet = formatter->findRuleSet(workingDescription, status);
    }
    // if the description begins with 0 or #, treat it as a
    // DecimalFormat pattern, and initialize a DecimalFormat with
    // that pattern (then set it to use the DecimalFormatSymbols
    // belonging to our formatter)
    else if (workingDescription.charAt(0) == gPound || workingDescription.charAt(0) ==gZero) {
        DecimalFormatSymbols* sym = formatter->getDecimalFormatSymbols();
        if (!sym) {
            status = U_MISSING_RESOURCE_ERROR;
            return;
        }
        this->numberFormat = new DecimalFormat(workingDescription, *sym, status);
        /* test for NULL */
        if (this->numberFormat == 0) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }
        if (U_FAILURE(status)) {
            delete (DecimalFormat*)this->numberFormat;
            this->numberFormat = NULL;
            return;
        }
        // this->numberFormat->setDecimalFormatSymbols(formatter->getDecimalFormatSymbols());
    }
    // if the description is ">>>", this substitution bypasses the
    // usual rule-search process and always uses the rule that precedes
    // it in its own rule set's rule list (this is used for place-value
    // notations: formats where you want to see a particular part of
    // a number even when it's 0)
    else if (workingDescription.charAt(0) == gGreaterThan) {
        // this causes problems when >>> is used in a frationalPartSubstitution
        // this->ruleSet = NULL;
        this->ruleSet = _ruleSet;
        this->numberFormat = NULL;
    }
    // and of the description is none of these things, it's a syntax error
    else {
        // throw new IllegalArgumentException("Illegal substitution syntax");
        status = U_PARSE_ERROR;
    }
}
예제 #14
0
U_CAPI int32_t U_EXPORT2
uspoof_checkUnicodeString(const USpoofChecker *sc,
                          const icu::UnicodeString &id, 
                          int32_t *position,
                          UErrorCode *status) {
    const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
    if (This == NULL) {
        return 0;
    }
    int32_t result = 0;

    IdentifierInfo *identifierInfo = NULL;
    if ((This->fChecks) & (USPOOF_RESTRICTION_LEVEL | USPOOF_MIXED_NUMBERS)) {
        identifierInfo = This->getIdentifierInfo(*status);
        if (U_FAILURE(*status)) {
            goto cleanupAndReturn;
        }
        identifierInfo->setIdentifier(id, *status);
        identifierInfo->setIdentifierProfile(*This->fAllowedCharsSet);
    }


    if ((This->fChecks) & USPOOF_RESTRICTION_LEVEL) {
        URestrictionLevel idRestrictionLevel = identifierInfo->getRestrictionLevel(*status);
        if (idRestrictionLevel > This->fRestrictionLevel) {
            result |= USPOOF_RESTRICTION_LEVEL;
        }
        if (This->fChecks & USPOOF_AUX_INFO) {
            result |= idRestrictionLevel;
        }
    }

    if ((This->fChecks) & USPOOF_MIXED_NUMBERS) {
        const UnicodeSet *numerics = identifierInfo->getNumerics();
        if (numerics->size() > 1) {
            result |= USPOOF_MIXED_NUMBERS;
        }

        // TODO: ICU4J returns the UnicodeSet of the numerics found in the identifier.
        //       We have no easy way to do the same in C.
        // if (checkResult != null) {
        //     checkResult.numerics = numerics;
        // }
    }


    if (This->fChecks & (USPOOF_CHAR_LIMIT)) {
        int32_t i;
        UChar32 c;
        int32_t length = id.length();
        for (i=0; i<length ;) {
            c = id.char32At(i);
            i += U16_LENGTH(c);
            if (!This->fAllowedCharsSet->contains(c)) {
                result |= USPOOF_CHAR_LIMIT;
                break;
            }
        }
    }

    if (This->fChecks & 
        (USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_INVISIBLE)) {
        // These are the checks that need to be done on NFD input
        UnicodeString nfdText;
        gNfdNormalizer->normalize(id, nfdText, *status);
        int32_t nfdLength = nfdText.length();

        if (This->fChecks & USPOOF_INVISIBLE) {
           
            // scan for more than one occurence of the same non-spacing mark
            // in a sequence of non-spacing marks.
            int32_t     i;
            UChar32     c;
            UChar32     firstNonspacingMark = 0;
            UBool       haveMultipleMarks = FALSE;  
            UnicodeSet  marksSeenSoFar;   // Set of combining marks in a single combining sequence.
            
            for (i=0; i<nfdLength ;) {
                c = nfdText.char32At(i);
                i += U16_LENGTH(c);
                if (u_charType(c) != U_NON_SPACING_MARK) {
                    firstNonspacingMark = 0;
                    if (haveMultipleMarks) {
                        marksSeenSoFar.clear();
                        haveMultipleMarks = FALSE;
                    }
                    continue;
                }
                if (firstNonspacingMark == 0) {
                    firstNonspacingMark = c;
                    continue;
                }
                if (!haveMultipleMarks) {
                    marksSeenSoFar.add(firstNonspacingMark);
                    haveMultipleMarks = TRUE;
                }
                if (marksSeenSoFar.contains(c)) {
                    // report the error, and stop scanning.
                    // No need to find more than the first failure.
                    result |= USPOOF_INVISIBLE;
                    break;
                }
                marksSeenSoFar.add(c);
            }
        }
       
        
        if (This->fChecks & (USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE)) {
            // The basic test is the same for both whole and mixed script confusables.
            // Compute the set of scripts that every input character has a confusable in.
            // For this computation an input character is always considered to be
            // confusable with itself in its own script.
            //
            // If the number of such scripts is two or more, and the input consisted of
            // characters all from a single script, we have a whole script confusable.
            // (The two scripts will be the original script and the one that is confusable)
            //
            // If the number of such scripts >= one, and the original input contained characters from
            // more than one script, we have a mixed script confusable.  (We can transform
            // some of the characters, and end up with a visually similar string all in
            // one script.)

            if (identifierInfo == NULL) {
                identifierInfo = This->getIdentifierInfo(*status);
                if (U_FAILURE(*status)) {
                    goto cleanupAndReturn;
                }
                identifierInfo->setIdentifier(id, *status);
            }

            int32_t scriptCount = identifierInfo->getScriptCount();
            
            ScriptSet scripts;
            This->wholeScriptCheck(nfdText, &scripts, *status);
            int32_t confusableScriptCount = scripts.countMembers();
            //printf("confusableScriptCount = %d\n", confusableScriptCount);
            
            if ((This->fChecks & USPOOF_WHOLE_SCRIPT_CONFUSABLE) &&
                confusableScriptCount >= 2 &&
                scriptCount == 1) {
                result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE;
            }
        
            if ((This->fChecks & USPOOF_MIXED_SCRIPT_CONFUSABLE) &&
                confusableScriptCount >= 1 &&
                scriptCount > 1) {
                result |= USPOOF_MIXED_SCRIPT_CONFUSABLE;
            }
        }
    }

cleanupAndReturn:
    This->releaseIdentifierInfo(identifierInfo);
    if (position != NULL) {
        *position = 0;
    }
    return result;
}
예제 #15
0
UVector*
ZoneMeta::createMetazoneMappings(const UnicodeString &tzid) {
    UVector *mzMappings = NULL;
    UErrorCode status = U_ZERO_ERROR;

    UnicodeString canonicalID;
    UResourceBundle *rb = ures_openDirect(NULL, gMetaZones, &status);
    ures_getByKey(rb, gMetazoneInfo, rb, &status);
    getCanonicalCLDRID(tzid, canonicalID, status);

    if (U_SUCCESS(status)) {
        char tzKey[ZID_KEY_MAX + 1];
        int32_t tzKeyLen = canonicalID.extract(0, canonicalID.length(), tzKey, sizeof(tzKey), US_INV);
        tzKey[tzKeyLen] = 0;

        // tzid keys are using ':' as separators
        char *p = tzKey;
        while (*p) {
            if (*p == '/') {
                *p = ':';
            }
            p++;
        }

        ures_getByKey(rb, tzKey, rb, &status);

        if (U_SUCCESS(status)) {
            UResourceBundle *mz = NULL;
            while (ures_hasNext(rb)) {
                mz = ures_getNextResource(rb, mz, &status);

                const UChar *mz_name = ures_getStringByIndex(mz, 0, NULL, &status);
                const UChar *mz_from = gDefaultFrom;
                const UChar *mz_to = gDefaultTo;

                if (ures_getSize(mz) == 3) {
                    mz_from = ures_getStringByIndex(mz, 1, NULL, &status);
                    mz_to   = ures_getStringByIndex(mz, 2, NULL, &status);
                }

                if(U_FAILURE(status)){
                    status = U_ZERO_ERROR;
                    continue;
                }
                // We do not want to use SimpleDateformat to parse boundary dates,
                // because this code could be triggered by the initialization code
                // used by SimpleDateFormat.
                UDate from = parseDate(mz_from, status);
                UDate to = parseDate(mz_to, status);
                if (U_FAILURE(status)) {
                    status = U_ZERO_ERROR;
                    continue;
                }

                OlsonToMetaMappingEntry *entry = (OlsonToMetaMappingEntry*)uprv_malloc(sizeof(OlsonToMetaMappingEntry));
                if (entry == NULL) {
                    status = U_MEMORY_ALLOCATION_ERROR;
                    break;
                }
                entry->mzid = mz_name;
                entry->from = from;
                entry->to = to;

                if (mzMappings == NULL) {
                    mzMappings = new UVector(deleteOlsonToMetaMappingEntry, NULL, status);
                    if (U_FAILURE(status)) {
                        delete mzMappings;
                        deleteOlsonToMetaMappingEntry(entry);
                        uprv_free(entry);
                        break;
                    }
                }

                mzMappings->addElement(entry, status);
                if (U_FAILURE(status)) {
                    break;
                }
            }
            ures_close(mz);
            if (U_FAILURE(status)) {
                if (mzMappings != NULL) {
                    delete mzMappings;
                    mzMappings = NULL;
                }
            }
        }
    }
    ures_close(rb);
    return mzMappings;
}
예제 #16
0
void Win32DateTimeTest::testLocales(TestLog *log)
{
    SYSTEMTIME winNow;
    UDate icuNow = 0;
    SYSTEMTIME st;
    FILETIME ft;
    UnicodeString zoneID;
    const TimeZone *tz = TimeZone::createDefault();
    TIME_ZONE_INFORMATION tzi;

    tz->getID(zoneID);
    if (! uprv_getWindowsTimeZoneInfo(&tzi, zoneID.getBuffer(), zoneID.length())) {
        UBool found = FALSE;
        int32_t ec = TimeZone::countEquivalentIDs(zoneID);

        for (int z = 0; z < ec; z += 1) {
            UnicodeString equiv = TimeZone::getEquivalentID(zoneID, z);

            if (found = uprv_getWindowsTimeZoneInfo(&tzi, equiv.getBuffer(), equiv.length())) {
                break;
            }
        }

        if (! found) {
            GetTimeZoneInformation(&tzi);
        }
    }

    GetSystemTime(&st);
    SystemTimeToFileTime(&st, &ft);
    SystemTimeToTzSpecificLocalTime(&tzi, &st, &winNow);

    int64_t wftNow = ((int64_t) ft.dwHighDateTime << 32) + ft.dwLowDateTime;
    UErrorCode status = U_ZERO_ERROR;

    int64_t udtsNow = utmscale_fromInt64(wftNow, UDTS_WINDOWS_FILE_TIME, &status);

    icuNow = (UDate) utmscale_toInt64(udtsNow, UDTS_ICU4C_TIME, &status);

    int32_t lcidCount = 0;
    Win32Utilities::LCIDRecord *lcidRecords = Win32Utilities::getLocales(lcidCount);

    for(int i = 0; i < lcidCount; i += 1) {
        UErrorCode status = U_ZERO_ERROR;
        WCHAR longDateFormat[81], longTimeFormat[81], wdBuffer[256], wtBuffer[256];
        int32_t calType = 0;

        // NULL localeID means ICU didn't recognize this locale
        if (lcidRecords[i].localeID == NULL) {
            continue;
        }

        GetLocaleInfoW(lcidRecords[i].lcid, LOCALE_SLONGDATE,   longDateFormat, 81);
        GetLocaleInfoW(lcidRecords[i].lcid, LOCALE_STIMEFORMAT, longTimeFormat, 81);
        GetLocaleInfoW(lcidRecords[i].lcid, LOCALE_RETURN_NUMBER|LOCALE_ICALENDARTYPE, (LPWSTR) calType, sizeof(int32_t));

        char localeID[64];

        uprv_strcpy(localeID, lcidRecords[i].localeID);
        uprv_strcat(localeID, getCalendarType(calType));

        UnicodeString ubBuffer, udBuffer, utBuffer;
        Locale ulocale(localeID);
        int32_t wdLength, wtLength;

        wdLength = GetDateFormatW(lcidRecords[i].lcid, DATE_LONGDATE, &winNow, NULL, wdBuffer, UPRV_LENGTHOF(wdBuffer));
        wtLength = GetTimeFormatW(lcidRecords[i].lcid, 0, &winNow, NULL, wtBuffer, UPRV_LENGTHOF(wtBuffer));

        if (uprv_strchr(localeID, '@') > 0) {
            uprv_strcat(localeID, ";");
        } else {
            uprv_strcat(localeID, "@");
        }

        uprv_strcat(localeID, "compat=host");

        Locale wlocale(localeID);
        DateFormat *wbf = DateFormat::createDateTimeInstance(DateFormat::kFull, DateFormat::kFull, wlocale);
        DateFormat *wdf = DateFormat::createDateInstance(DateFormat::kFull, wlocale);
        DateFormat *wtf = DateFormat::createTimeInstance(DateFormat::kFull, wlocale);

        wbf->format(icuNow, ubBuffer);
        wdf->format(icuNow, udBuffer);
        wtf->format(icuNow, utBuffer);

        if (ubBuffer.indexOf(wdBuffer, wdLength - 1, 0) < 0) {
            UnicodeString baseName(wlocale.getBaseName());
            UnicodeString expected(wdBuffer);

            log->errln("DateTime format error for locale " + baseName + ": expected date \"" + expected +
                       "\" got \"" + ubBuffer + "\"");
        }

        if (ubBuffer.indexOf(wtBuffer, wtLength - 1, 0) < 0) {
            UnicodeString baseName(wlocale.getBaseName());
            UnicodeString expected(wtBuffer);

            log->errln("DateTime format error for locale " + baseName + ": expected time \"" + expected +
                       "\" got \"" + ubBuffer + "\"");
        }

        if (udBuffer.compare(wdBuffer) != 0) {
            UnicodeString baseName(wlocale.getBaseName());
            UnicodeString expected(wdBuffer);

            log->errln("Date format error for locale " + baseName + ": expected \"" + expected +
                       "\" got \"" + udBuffer + "\"");
        }

        if (utBuffer.compare(wtBuffer) != 0) {
            UnicodeString baseName(wlocale.getBaseName());
            UnicodeString expected(wtBuffer);

            log->errln("Time format error for locale " + baseName + ": expected \"" + expected +
                       "\" got \"" + utBuffer + "\"");
        }
        delete wbf;
        delete wdf;
        delete wtf;
    }

    Win32Utilities::freeLocales(lcidRecords);
    delete tz;
}
예제 #17
0
UnicodeSet& UnicodeSet::closeOver(int32_t attribute) {
    if (attribute & (USET_CASE_INSENSITIVE | USET_ADD_CASE_MAPPINGS)) {
        UErrorCode status = U_ZERO_ERROR;
        const UCaseProps *csp = ucase_getSingleton(&status);
        if (U_SUCCESS(status)) {
            UnicodeSet foldSet(*this);
            UnicodeString str;
            USetAdder sa = {
                (USet *)&foldSet,
                _set_add,
                _set_addRange,
                _set_addString,
                NULL // don't need remove()
            };

            // start with input set to guarantee inclusion
            // USET_CASE: remove strings because the strings will actually be reduced (folded);
            //            therefore, start with no strings and add only those needed
            if (attribute & USET_CASE_INSENSITIVE) {
                foldSet.strings->removeAllElements();
            }

            int32_t n = getRangeCount();
            UChar32 result;
            const UChar *full;
            int32_t locCache = 0;

            for (int32_t i=0; i<n; ++i) {
                UChar32 start = getRangeStart(i);
                UChar32 end   = getRangeEnd(i);

                if (attribute & USET_CASE_INSENSITIVE) {
                    // full case closure
                    for (UChar32 cp=start; cp<=end; ++cp) {
                        ucase_addCaseClosure(csp, cp, &sa);
                    }
                } else {
                    // add case mappings
                    // (does not add long s for regular s, or Kelvin for k, for example)
                    for (UChar32 cp=start; cp<=end; ++cp) {
                        result = ucase_toFullLower(csp, cp, NULL, NULL, &full, "", &locCache);
                        addCaseMapping(foldSet, result, full, str);

                        result = ucase_toFullTitle(csp, cp, NULL, NULL, &full, "", &locCache);
                        addCaseMapping(foldSet, result, full, str);

                        result = ucase_toFullUpper(csp, cp, NULL, NULL, &full, "", &locCache);
                        addCaseMapping(foldSet, result, full, str);

                        result = ucase_toFullFolding(csp, cp, &full, 0);
                        addCaseMapping(foldSet, result, full, str);
                    }
                }
            }
            if (strings != NULL && strings->size() > 0) {
                if (attribute & USET_CASE_INSENSITIVE) {
                    for (int32_t j=0; j<strings->size(); ++j) {
                        str = *(const UnicodeString *) strings->elementAt(j);
                        str.foldCase();
                        if(!ucase_addStringCaseClosure(csp, str.getBuffer(), str.length(), &sa)) {
                            foldSet.add(str); // does not map to code points: add the folded string itself
                        }
                    }
                } else {
                    Locale root("");
#if !UCONFIG_NO_BREAK_ITERATION
                    BreakIterator *bi = BreakIterator::createWordInstance(root, status);
#endif
                    if (U_SUCCESS(status)) {
                        const UnicodeString *pStr;

                        for (int32_t j=0; j<strings->size(); ++j) {
                            pStr = (const UnicodeString *) strings->elementAt(j);
                            (str = *pStr).toLower(root);
                            foldSet.add(str);
#if !UCONFIG_NO_BREAK_ITERATION
                            (str = *pStr).toTitle(bi, root);
                            foldSet.add(str);
#endif
                            (str = *pStr).toUpper(root);
                            foldSet.add(str);
                            (str = *pStr).foldCase();
                            foldSet.add(str);
                        }
                    }
#if !UCONFIG_NO_BREAK_ITERATION
                    delete bi;
#endif
                }
            }
            *this = foldSet;
        }
    }
    return *this;
}
예제 #18
0
void
RuleParser::getNextToken(const UnicodeString& ruleData,
                         int32_t *ruleIndex,
                         UnicodeString& token,
                         tokenType& type,
                         UErrorCode &status)
{
    int32_t curIndex= *ruleIndex;
    UChar ch;
    tokenType prevType=none;

    if (U_FAILURE(status)) {
        return;
    }
    while (curIndex<ruleData.length()) {
        ch = ruleData.charAt(curIndex);
        if ( !inRange(ch, type) ) {
            status = U_ILLEGAL_CHARACTER;
            return;
        }
        switch (type) {
        case tSpace:
            if ( *ruleIndex != curIndex ) { // letter
                token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
                *ruleIndex=curIndex;
                type=prevType;
                getKeyType(token, type, status);
                return;
            }
            else {
                *ruleIndex=*ruleIndex+1;
            }
            break; // consective space
        case tColon:
        case tSemiColon:
            if ( *ruleIndex != curIndex ) {
                token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
                *ruleIndex=curIndex;
                type=prevType;
                getKeyType(token, type, status);
                return;
            }
            else {
                *ruleIndex=curIndex+1;
                return;
            }
        case tLetter:
             if ((type==prevType)||(prevType==none)) {
                prevType=type;
                break;
             }
             break;
        case tNumber:
             if ((type==prevType)||(prevType==none)) {
                prevType=type;
                break;
             }
             else {
                *ruleIndex=curIndex+1;
                return;
             }
         case tDot:
             if (prevType==none) {  // first dot
                prevType=type;
                continue;
             }
             else {
                 if ( *ruleIndex != curIndex ) {
                    token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
                    *ruleIndex=curIndex;  // letter
                    type=prevType;
                    getKeyType(token, type, status);
                    return;
                 }
                 else {  // two consective dots
                    *ruleIndex=curIndex+2;
                    return;
                 }
             }
         default:
             status = U_UNEXPECTED_TOKEN;
             return;
        }
        curIndex++;
    }
    if ( curIndex>=ruleData.length() ) {
        if ( (type == tLetter)||(type == tNumber) ) {
            token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
            getKeyType(token, type, status);
            if (U_FAILURE(status)) {
                return;
            }
        }
        *ruleIndex = ruleData.length();
    }
}
예제 #19
0
U_CAPI void U_EXPORT2
umsg_vparse(const UMessageFormat *fmt,
            const UChar    *source,
            int32_t        sourceLength,
            int32_t        *count,
            va_list        ap,
            UErrorCode     *status)
{
    //check arguments
    if(status==NULL||U_FAILURE(*status))
    {
        return;
    }
    if(fmt==NULL||source==NULL || sourceLength<-1 || count==NULL){
        *status=U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }
    if(sourceLength==-1){
        sourceLength=u_strlen(source);
    }

    UnicodeString srcString(source,sourceLength);
    Formattable *args = ((const MessageFormat*)fmt)->parse(source,*count,*status);
    UDate *aDate;
    double *aDouble;
    UChar *aString;
    int32_t* aInt;
    int64_t* aInt64;
    UnicodeString temp;
    int len =0;
    // assign formattables to varargs
    for(int32_t i = 0; i < *count; i++) {
        switch(args[i].getType()) {

        case Formattable::kDate:
            aDate = va_arg(ap, UDate*);
            if(aDate){
                *aDate = args[i].getDate();
            }else{
                *status=U_ILLEGAL_ARGUMENT_ERROR;
            }
            break;

        case Formattable::kDouble:
            aDouble = va_arg(ap, double*);
            if(aDouble){
                *aDouble = args[i].getDouble();
            }else{
                *status=U_ILLEGAL_ARGUMENT_ERROR;
            }
            break;

        case Formattable::kLong:
            aInt = va_arg(ap, int32_t*);
            if(aInt){
                *aInt = (int32_t) args[i].getLong();
            }else{
                *status=U_ILLEGAL_ARGUMENT_ERROR;
            }
            break;

        case Formattable::kInt64:
            aInt64 = va_arg(ap, int64_t*);
            if(aInt64){
                *aInt64 = args[i].getInt64();
            }else{
                *status=U_ILLEGAL_ARGUMENT_ERROR;
            }
            break;

        case Formattable::kString:
            aString = va_arg(ap, UChar*);
            if(aString){
                args[i].getString(temp);
                len = temp.length();
                temp.extract(0,len,aString);
                aString[len]=0;
            }else{
                *status= U_ILLEGAL_ARGUMENT_ERROR;
            }
            break;

        case Formattable::kObject:
            // This will never happen because MessageFormat doesn't
            // support kObject.  When MessageFormat is changed to
            // understand MeasureFormats, modify this code to do the
            // right thing. [alan]
            U_ASSERT(FALSE);
            break;

        // better not happen!
        case Formattable::kArray:
            U_ASSERT(FALSE);
            break;
        }
    }

    // clean up
    delete [] args;
}
예제 #20
0
UBool
RuleParser::isValidKeyword(const UnicodeString& token) {
    return PatternProps::isIdentifier(token.getBuffer(), token.length());
}
예제 #21
0
파일: nfrs.cpp 프로젝트: MIPS/external-icu
UBool
NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
{
    // try matching each rule in the rule set against the text being
    // parsed.  Whichever one matches the most characters is the one
    // that determines the value we return.

    result.setLong(0);

    // dump out if there's no text to parse
    if (text.length() == 0) {
        return 0;
    }

    ParsePosition highWaterMark;
    ParsePosition workingPos = pos;

#ifdef RBNF_DEBUG
    fprintf(stderr, "<nfrs> %x '", this);
    dumpUS(stderr, name);
    fprintf(stderr, "' text '");
    dumpUS(stderr, text);
    fprintf(stderr, "'\n");
    fprintf(stderr, "  parse negative: %d\n", this, negativeNumberRule != 0);
#endif
    // Try each of the negative rules, fraction rules, infinity rules and NaN rules
    for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
        if (nonNumericalRules[i]) {
            Formattable tempResult;
            UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
            if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
                result = tempResult;
                highWaterMark = workingPos;
            }
            workingPos = pos;
        }
    }
#ifdef RBNF_DEBUG
    fprintf(stderr, "<nfrs> continue other with text '");
    dumpUS(stderr, text);
    fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
#endif

    // finally, go through the regular rules one at a time.  We start
    // at the end of the list because we want to try matching the most
    // sigificant rule first (this helps ensure that we parse
    // "five thousand three hundred six" as
    // "(five thousand) (three hundred) (six)" rather than
    // "((five thousand three) hundred) (six)").  Skip rules whose
    // base values are higher than the upper bound (again, this helps
    // limit ambiguity by making sure the rules that match a rule's
    // are less significant than the rule containing the substitutions)/
    {
        int64_t ub = util64_fromDouble(upperBound);
#ifdef RBNF_DEBUG
        {
            char ubstr[64];
            util64_toa(ub, ubstr, 64);
            char ubstrhex[64];
            util64_toa(ub, ubstrhex, 64, 16);
            fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
        }
#endif
        for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
            if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
                continue;
            }
            Formattable tempResult;
            UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
            if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
                result = tempResult;
                highWaterMark = workingPos;
            }
            workingPos = pos;
        }
    }
#ifdef RBNF_DEBUG
    fprintf(stderr, "<nfrs> exit\n");
#endif
    // finally, update the parse postion we were passed to point to the
    // first character we didn't use, and return the result that
    // corresponds to that string of characters
    pos = highWaterMark;

    return 1;
}
예제 #22
0
void
PluralRules::parseDescription(UnicodeString& data, RuleChain& rules, UErrorCode &status)
{
    int32_t ruleIndex=0;
    UnicodeString token;
    tokenType type;
    tokenType prevType=none;
    RuleChain *ruleChain=NULL;
    AndConstraint *curAndConstraint=NULL;
    OrConstraint *orNode=NULL;
    RuleChain *lastChain=NULL;

    if (U_FAILURE(status)) {
        return;
    }
    UnicodeString ruleData = data.toLower("");
    while (ruleIndex< ruleData.length()) {
        mParser->getNextToken(ruleData, &ruleIndex, token, type, status);
        if (U_FAILURE(status)) {
            return;
        }
        mParser->checkSyntax(prevType, type, status);
        if (U_FAILURE(status)) {
            return;
        }
        switch (type) {
        case tAnd:
            U_ASSERT(curAndConstraint != NULL);
            curAndConstraint = curAndConstraint->add();
            break;
        case tOr:
            lastChain = &rules;
            while (lastChain->next !=NULL) {
                lastChain = lastChain->next;
            }
            orNode=lastChain->ruleHeader;
            while (orNode->next != NULL) {
                orNode = orNode->next;
            }
            orNode->next= new OrConstraint();
            orNode=orNode->next;
            orNode->next=NULL;
            curAndConstraint = orNode->add();
            break;
        case tIs:
            U_ASSERT(curAndConstraint != NULL);
            curAndConstraint->rangeHigh=-1;
            break;
        case tNot:
            U_ASSERT(curAndConstraint != NULL);
            curAndConstraint->notIn=TRUE;
            break;
        case tIn:
            U_ASSERT(curAndConstraint != NULL);
            curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
            curAndConstraint->integerOnly = TRUE;
            break;
        case tWithin:
            U_ASSERT(curAndConstraint != NULL);
            curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
            break;
        case tNumber:
            U_ASSERT(curAndConstraint != NULL);
            if ( (curAndConstraint->op==AndConstraint::MOD)&&
                 (curAndConstraint->opNum == -1 ) ) {
                curAndConstraint->opNum=getNumberValue(token);
            }
            else {
                if (curAndConstraint->rangeLow == -1) {
                    curAndConstraint->rangeLow=getNumberValue(token);
                }
                else {
                    curAndConstraint->rangeHigh=getNumberValue(token);
                }
            }
            break;
        case tMod:
            U_ASSERT(curAndConstraint != NULL);
            curAndConstraint->op=AndConstraint::MOD;
            break;
        case tKeyword:
            if (ruleChain==NULL) {
                ruleChain = &rules;
            }
            else {
                while (ruleChain->next!=NULL){
                    ruleChain=ruleChain->next;
                }
                ruleChain=ruleChain->next=new RuleChain();
            }
            if (ruleChain->ruleHeader != NULL) {
                delete ruleChain->ruleHeader;
            }
            orNode = ruleChain->ruleHeader = new OrConstraint();
            curAndConstraint = orNode->add();
            ruleChain->keyword = token;
            break;
        default:
            break;
        }
        prevType=type;
    }
}
예제 #23
0
void font_face_set::get_string_info(string_info & info)
{
    unsigned width = 0;
    unsigned height = 0;
    UErrorCode err = U_ZERO_ERROR;
    UnicodeString const& ustr = info.get_string();
    const UChar * text = ustr.getBuffer();
    UBiDi * bidi = ubidi_openSized(ustr.length(),0,&err);

    if (U_SUCCESS(err))
    {
        ubidi_setPara(bidi,text,ustr.length(), UBIDI_DEFAULT_LTR,0,&err);

        if (U_SUCCESS(err))
        {
            int32_t count = ubidi_countRuns(bidi,&err);
            int32_t logicalStart;
            int32_t length;

            for (int32_t i=0; i< count;++i)
            {
                if (UBIDI_LTR == ubidi_getVisualRun(bidi,i,&logicalStart,&length))
                {
                    do {
                        UChar ch = text[logicalStart++];
                        dimension_t char_dim = character_dimensions(ch);
                        info.add_info(ch, char_dim.width, char_dim.height);
                        width += char_dim.width;
                        height = char_dim.height > height ? char_dim.height : height;

                    } while (--length > 0);
                }
                else
                {
                    logicalStart += length;

                    int32_t j=0,i=length;
                    UnicodeString arabic;
                    UChar * buf = arabic.getBuffer(length);
                    do {
                        UChar ch = text[--logicalStart];
                        buf[j++] = ch;
                    } while (--i > 0);

                    arabic.releaseBuffer(length);
                    if ( *arabic.getBuffer() >= 0x0600 && *arabic.getBuffer() <= 0x06ff)
                    {
                        UnicodeString shaped;
                        u_shapeArabic(arabic.getBuffer(),arabic.length(),shaped.getBuffer(arabic.length()),arabic.length(),
                                      U_SHAPE_LETTERS_SHAPE|U_SHAPE_LENGTH_FIXED_SPACES_NEAR|
                                      U_SHAPE_TEXT_DIRECTION_VISUAL_LTR
                                      ,&err);

                        shaped.releaseBuffer(arabic.length());

                        if (U_SUCCESS(err))
                        {
                            for (int j=0;j<shaped.length();++j)
                            {
                                dimension_t char_dim = character_dimensions(shaped[j]);
                                info.add_info(shaped[j], char_dim.width, char_dim.height);
                                width += char_dim.width;
                                height = char_dim.height > height ? char_dim.height : height;
                            }
                        }
                    } else {
                        // Non-Arabic RTL
                        for (int j=0;j<arabic.length();++j)
                        {
                            dimension_t char_dim = character_dimensions(arabic[j]);
                            info.add_info(arabic[j], char_dim.width, char_dim.height);
                            width += char_dim.width;
                            height = char_dim.height > height ? char_dim.height : height;
                        }
                    }
                }
            }
        }
        ubidi_close(bidi);
    }

    info.set_dimensions(width, height);
}
예제 #24
0
UBool TransliteratorSpec::hasFallback() const {
    return nextSpec.length() != 0;
}
예제 #25
0
int32_t  U_EXPORT2 
DateIntervalFormat::splitPatternInto2Part(const UnicodeString& intervalPattern) {
    UBool inQuote = false;
    UChar prevCh = 0;
    int32_t count = 0;

    /* repeatedPattern used to record whether a pattern has already seen.
       It is a pattern applies to first calendar if it is first time seen,
       otherwise, it is a pattern applies to the second calendar
     */
    UBool patternRepeated[] = 
    {
    //       A   B   C   D   E   F   G   H   I   J   K   L   M   N   O
             0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
    //   P   Q   R   S   T   U   V   W   X   Y   Z
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 0, 0,  0, 0, 0,
    //       a   b   c   d   e   f   g   h   i   j   k   l   m   n   o
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
    //   p   q   r   s   t   u   v   w   x   y   z
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
    };

    int8_t PATTERN_CHAR_BASE = 0x41;
    
    /* loop through the pattern string character by character looking for
     * the first repeated pattern letter, which breaks the interval pattern
     * into 2 parts. 
     */
    int32_t i;
    UBool foundRepetition = false;
    for (i = 0; i < intervalPattern.length(); ++i) {
        UChar ch = intervalPattern.charAt(i);
        
        if (ch != prevCh && count > 0) {
            // check the repeativeness of pattern letter
            UBool repeated = patternRepeated[(int)(prevCh - PATTERN_CHAR_BASE)];
            if ( repeated == FALSE ) {
                patternRepeated[prevCh - PATTERN_CHAR_BASE] = TRUE;
            } else {
                foundRepetition = true;
                break;
            }
            count = 0;
        }
        if (ch == '\'') {
            // Consecutive single quotes are a single quote literal,
            // either outside of quotes or between quotes
            if ((i+1) < intervalPattern.length() && 
                intervalPattern.charAt(i+1) == '\'') {
                ++i;
            } else {
                inQuote = ! inQuote;
            }
        } 
        else if (!inQuote && ((ch >= 0x0061 /*'a'*/ && ch <= 0x007A /*'z'*/)
                    || (ch >= 0x0041 /*'A'*/ && ch <= 0x005A /*'Z'*/))) {
            // ch is a date-time pattern character 
            prevCh = ch;
            ++count;
        }
    }
    // check last pattern char, distinguish
    // "dd MM" ( no repetition ), 
    // "d-d"(last char repeated ), and 
    // "d-d MM" ( repetition found )
    if ( count > 0 && foundRepetition == FALSE ) {
        if ( patternRepeated[(int)(prevCh - PATTERN_CHAR_BASE)] == FALSE ) {
            count = 0;
        }
    }
    return (i - count);
}
예제 #26
0
const UChar* U_EXPORT2
ZoneMeta::getCanonicalCLDRID(const UnicodeString &tzid, UErrorCode& status) {
    if (U_FAILURE(status)) {
        return NULL;
    }

    int32_t len = tzid.length();
    if (len > ZID_KEY_MAX) {
        status = U_ILLEGAL_ARGUMENT_ERROR;
        return NULL;
    }

    // Checking the cached results
    umtx_initOnce(gCanonicalIDCacheInitOnce, &initCanonicalIDCache, status);
    if (U_FAILURE(status)) {
        return NULL;
    }

    const UChar *canonicalID = NULL;

    UErrorCode tmpStatus = U_ZERO_ERROR;
    UChar utzid[ZID_KEY_MAX + 1];
    tzid.extract(utzid, ZID_KEY_MAX + 1, tmpStatus);
    U_ASSERT(tmpStatus == U_ZERO_ERROR);    // we checked the length of tzid already

    // Check if it was already cached
    umtx_lock(&gZoneMetaLock);
    {
        canonicalID = (const UChar *)uhash_get(gCanonicalIDCache, utzid);
    }
    umtx_unlock(&gZoneMetaLock);

    if (canonicalID != NULL) {
        return canonicalID;
    }

    // If not, resolve CLDR canonical ID with resource data
    UBool isInputCanonical = FALSE;
    char id[ZID_KEY_MAX + 1];
    const UChar* idChars = tzid.getBuffer();

    u_UCharsToChars(idChars,id,len);
    id[len] = (char) 0; // Make sure it is null terminated.

    // replace '/' with ':'
    char *p = id;
    while (*p++) {
        if (*p == '/') {
            *p = ':';
        }
    }

    UResourceBundle *top = ures_openDirect(NULL, gKeyTypeData, &tmpStatus);
    UResourceBundle *rb = ures_getByKey(top, gTypeMapTag, NULL, &tmpStatus);
    ures_getByKey(rb, gTimezoneTag, rb, &tmpStatus);
    ures_getByKey(rb, id, rb, &tmpStatus);
    if (U_SUCCESS(tmpStatus)) {
        // type entry (canonical) found
        // the input is the canonical ID. resolve to const UChar*
        canonicalID = TimeZone::findID(tzid);
        isInputCanonical = TRUE;
    }

    if (canonicalID == NULL) {
        // If a map element not found, then look for an alias
        tmpStatus = U_ZERO_ERROR;
        ures_getByKey(top, gTypeAliasTag, rb, &tmpStatus);
        ures_getByKey(rb, gTimezoneTag, rb, &tmpStatus);
        const UChar *canonical = ures_getStringByKey(rb,id,NULL,&tmpStatus);
        if (U_SUCCESS(tmpStatus)) {
            // canonical map found
            canonicalID = canonical;
        }

        if (canonicalID == NULL) {
            // Dereference the input ID using the tz data
            const UChar *derefer = TimeZone::dereferOlsonLink(tzid);
            if (derefer == NULL) {
                status = U_ILLEGAL_ARGUMENT_ERROR;
            } else {
                len = u_strlen(derefer);
                u_UCharsToChars(derefer,id,len);
                id[len] = (char) 0; // Make sure it is null terminated.

                // replace '/' with ':'
                char *p = id;
                while (*p++) {
                    if (*p == '/') {
                        *p = ':';
                    }
                }

                // If a dereference turned something up then look for an alias.
                // rb still points to the alias table, so we don't have to go looking
                // for it.
                tmpStatus = U_ZERO_ERROR;
                canonical = ures_getStringByKey(rb,id,NULL,&tmpStatus);
                if (U_SUCCESS(tmpStatus)) {
                    // canonical map for the dereferenced ID found
                    canonicalID = canonical;
                } else {
                    canonicalID = derefer;
                    isInputCanonical = TRUE;
                }
            }
        }
    }
    ures_close(rb);
    ures_close(top);

    if (U_SUCCESS(status)) {
        U_ASSERT(canonicalID != NULL);  // canocanilD must be non-NULL here

        // Put the resolved canonical ID to the cache
        umtx_lock(&gZoneMetaLock);
        {
            const UChar* idInCache = (const UChar *)uhash_get(gCanonicalIDCache, utzid);
            if (idInCache == NULL) {
                const UChar* key = ZoneMeta::findTimeZoneID(tzid);
                U_ASSERT(key != NULL);
                if (key != NULL) {
                    idInCache = (const UChar *)uhash_put(gCanonicalIDCache, (void *)key, (void *)canonicalID, &status);
                    U_ASSERT(idInCache == NULL);
                }
            }
            if (U_SUCCESS(status) && isInputCanonical) {
                // Also put canonical ID itself into the cache if not exist
                const UChar *canonicalInCache = (const UChar*)uhash_get(gCanonicalIDCache, canonicalID);
                if (canonicalInCache == NULL) {
                    canonicalInCache = (const UChar *)uhash_put(gCanonicalIDCache, (void *)canonicalID, (void *)canonicalID, &status);
                    U_ASSERT(canonicalInCache == NULL);
                }
            }
        }
        umtx_unlock(&gZoneMetaLock);
    }

    return canonicalID;
}
예제 #27
0
void 
DateIntervalFormat::initializePattern(UErrorCode& status) {
    if ( U_FAILURE(status) ) {
        return;
    }
    const Locale& locale = fDateFormat->getSmpFmtLocale();
    if ( fSkeleton.isEmpty() ) {
        UnicodeString fullPattern;
        fDateFormat->toPattern(fullPattern);
#ifdef DTITVFMT_DEBUG
    char result[1000];
    char result_1[1000];
    char mesg[2000];
    fSkeleton.extract(0,  fSkeleton.length(), result, "UTF-8");
    sprintf(mesg, "in getBestSkeleton: fSkeleton: %s; \n", result);
    PRINTMESG(mesg)
#endif
        // fSkeleton is already set by createDateIntervalInstance()
        // or by createInstance(UnicodeString skeleton, .... )
        fSkeleton = fDtpng->getSkeleton(fullPattern, status);
        if ( U_FAILURE(status) ) {
            return;    
        }
    }

    // initialize the fIntervalPattern ordering
    int8_t i;
    for ( i = 0; i < DateIntervalInfo::kIPI_MAX_INDEX; ++i ) {
        fIntervalPatterns[i].laterDateFirst = fInfo->getDefaultOrder();
    }

    /* Check whether the skeleton is a combination of date and time.
     * For the complication reason 1 explained above.
     */
    UnicodeString dateSkeleton;
    UnicodeString timeSkeleton;
    UnicodeString normalizedTimeSkeleton;
    UnicodeString normalizedDateSkeleton;


    /* the difference between time skeleton and normalizedTimeSkeleton are:
     * 1. (Formerly, normalized time skeleton folded 'H' to 'h'; no longer true)
     * 2. 'a' is omitted in normalized time skeleton.
     * 3. there is only one appearance for 'h' or 'H', 'm','v', 'z' in normalized 
     *    time skeleton
     *
     * The difference between date skeleton and normalizedDateSkeleton are:
     * 1. both 'y' and 'd' appear only once in normalizeDateSkeleton
     * 2. 'E' and 'EE' are normalized into 'EEE'
     * 3. 'MM' is normalized into 'M'
     */
    getDateTimeSkeleton(fSkeleton, dateSkeleton, normalizedDateSkeleton,
                        timeSkeleton, normalizedTimeSkeleton);

#ifdef DTITVFMT_DEBUG
    char result[1000];
    char result_1[1000];
    char mesg[2000];
    fSkeleton.extract(0,  fSkeleton.length(), result, "UTF-8");
    sprintf(mesg, "in getBestSkeleton: fSkeleton: %s; \n", result);
    PRINTMESG(mesg)
#endif


    UBool found = setSeparateDateTimePtn(normalizedDateSkeleton, 
                                         normalizedTimeSkeleton);

    if ( found == false ) {
        // use fallback
        // TODO: if user asks "m"(minute), but "d"(day) differ
        if ( timeSkeleton.length() != 0 ) {
            if ( dateSkeleton.length() == 0 ) {
                // prefix with yMd
                timeSkeleton.insert(0, gDateFormatSkeleton[DateFormat::kShort]);
                UnicodeString pattern = fDtpng->getBestPattern(timeSkeleton, status);
                if ( U_FAILURE(status) ) {
                    return;    
                }
                // for fall back interval patterns,
                // the first part of the pattern is empty,
                // the second part of the pattern is the full-pattern
                // should be used in fall-back.
                setPatternInfo(UCAL_DATE, NULL, &pattern, fInfo->getDefaultOrder()); 
                setPatternInfo(UCAL_MONTH, NULL, &pattern, fInfo->getDefaultOrder()); 
                setPatternInfo(UCAL_YEAR, NULL, &pattern, fInfo->getDefaultOrder()); 
            } else {
                // TODO: fall back
            }
        } else {
            // TODO: fall back
        }
        return;
    } // end of skeleton not found
    // interval patterns for skeleton are found in resource 
    if ( timeSkeleton.length() == 0 ) {
        // done
    } else if ( dateSkeleton.length() == 0 ) {
        // prefix with yMd
        timeSkeleton.insert(0, gDateFormatSkeleton[DateFormat::kShort]);
        UnicodeString pattern = fDtpng->getBestPattern(timeSkeleton, status);
        if ( U_FAILURE(status) ) {
            return;    
        }
        // for fall back interval patterns,
        // the first part of the pattern is empty,
        // the second part of the pattern is the full-pattern
        // should be used in fall-back.
        setPatternInfo(UCAL_DATE, NULL, &pattern, fInfo->getDefaultOrder()); 
        setPatternInfo(UCAL_MONTH, NULL, &pattern, fInfo->getDefaultOrder()); 
        setPatternInfo(UCAL_YEAR, NULL, &pattern, fInfo->getDefaultOrder()); 
    } else {
        /* if both present,
         * 1) when the year, month, or day differs, 
         * concatenate the two original expressions with a separator between, 
         * 2) otherwise, present the date followed by the 
         * range expression for the time. 
         */
        /*
         * 1) when the year, month, or day differs, 
         * concatenate the two original expressions with a separator between, 
         */
        // if field exists, use fall back
        UnicodeString skeleton = fSkeleton;
        if ( !fieldExistsInSkeleton(UCAL_DATE, dateSkeleton) ) {
            // prefix skeleton with 'd'
            skeleton.insert(0, LOW_D);
            setFallbackPattern(UCAL_DATE, skeleton, status);
        }
        if ( !fieldExistsInSkeleton(UCAL_MONTH, dateSkeleton) ) {
            // then prefix skeleton with 'M'
            skeleton.insert(0, CAP_M);
            setFallbackPattern(UCAL_MONTH, skeleton, status);
        }
        if ( !fieldExistsInSkeleton(UCAL_YEAR, dateSkeleton) ) {
            // then prefix skeleton with 'y'
            skeleton.insert(0, LOW_Y);
            setFallbackPattern(UCAL_YEAR, skeleton, status);
        }
        
        /*
         * 2) otherwise, present the date followed by the 
         * range expression for the time. 
         */
        // Need the Date/Time pattern for concatnation the date with
        // the time interval.
        // The date/time pattern ( such as {0} {1} ) is saved in
        // calendar, that is why need to get the CalendarData here.
        CalendarData* calData = new CalendarData(locale, NULL, status);

        if ( U_FAILURE(status) ) {
            delete calData;
            return;
        }

        if ( calData == NULL ) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }
       
        const UResourceBundle* dateTimePatternsRes = calData->getByKey(
                                           gDateTimePatternsTag, status);
        int32_t dateTimeFormatLength;
        const UChar* dateTimeFormat = ures_getStringByIndex(
                                            dateTimePatternsRes,
                                            (int32_t)DateFormat::kDateTime,
                                            &dateTimeFormatLength, &status);
        if ( U_FAILURE(status) ) {
            return;
        }

        UnicodeString datePattern = fDtpng->getBestPattern(dateSkeleton, status);

        concatSingleDate2TimeInterval(dateTimeFormat, dateTimeFormatLength,
                                      datePattern, UCAL_AM_PM, status);
        concatSingleDate2TimeInterval(dateTimeFormat, dateTimeFormatLength,
                                      datePattern, UCAL_HOUR, status);
        concatSingleDate2TimeInterval(dateTimeFormat, dateTimeFormatLength,
                                      datePattern, UCAL_MINUTE, status);
        delete calData;
    }
}
예제 #28
0
const UVector* U_EXPORT2
ZoneMeta::getMetazoneMappings(const UnicodeString &tzid) {
    UErrorCode status = U_ZERO_ERROR;
    UChar tzidUChars[ZID_KEY_MAX + 1];
    tzid.extract(tzidUChars, ZID_KEY_MAX + 1, status);
    if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
        return NULL;
    }

    umtx_initOnce(gOlsonToMetaInitOnce, &olsonToMetaInit, status);
    if (U_FAILURE(status)) {
        return NULL;
    }

    // get the mapping from cache
    const UVector *result = NULL;

    umtx_lock(&gZoneMetaLock);
    {
        result = (UVector*) uhash_get(gOlsonToMeta, tzidUChars);
    }
    umtx_unlock(&gZoneMetaLock);

    if (result != NULL) {
        return result;
    }

    // miss the cache - create new one
    UVector *tmpResult = createMetazoneMappings(tzid);
    if (tmpResult == NULL) {
        // not available
        return NULL;
    }

    // put the new one into the cache
    umtx_lock(&gZoneMetaLock);
    {
        // make sure it's already created
        result = (UVector*) uhash_get(gOlsonToMeta, tzidUChars);
        if (result == NULL) {
            // add the one just created
            int32_t tzidLen = tzid.length() + 1;
            UChar *key = (UChar*)uprv_malloc(tzidLen * sizeof(UChar));
            if (key == NULL) {
                // memory allocation error..  just return NULL
                result = NULL;
                delete tmpResult;
            } else {
                tzid.extract(key, tzidLen, status);
                uhash_put(gOlsonToMeta, key, tmpResult, &status);
                if (U_FAILURE(status)) {
                    // delete the mapping
                    result = NULL;
                    delete tmpResult;
                } else {
                    result = tmpResult;
                }
            }
        } else {
            // another thread already put the one
            delete tmpResult;
        }
    }
    umtx_unlock(&gZoneMetaLock);

    return result;
}
예제 #29
0
UBool 
DateIntervalFormat::setSeparateDateTimePtn(
                                 const UnicodeString& dateSkeleton,
                                 const UnicodeString& timeSkeleton) {
    const UnicodeString* skeleton;
    // if both date and time skeleton present,
    // the final interval pattern might include time interval patterns
    // ( when, am_pm, hour, minute differ ),
    // but not date interval patterns ( when year, month, day differ ).
    // For year/month/day differ, it falls back to fall-back pattern.
    if ( timeSkeleton.length() != 0  ) {
        skeleton = &timeSkeleton;
    } else {
        skeleton = &dateSkeleton;
    }

    /* interval patterns for skeleton "dMMMy" (but not "dMMMMy") 
     * are defined in resource,
     * interval patterns for skeleton "dMMMMy" are calculated by
     * 1. get the best match skeleton for "dMMMMy", which is "dMMMy"
     * 2. get the interval patterns for "dMMMy",
     * 3. extend "MMM" to "MMMM" in above interval patterns for "dMMMMy" 
     * getBestSkeleton() is step 1.
     */
    // best skeleton, and the difference information
    int8_t differenceInfo = 0;
    const UnicodeString* bestSkeleton = fInfo->getBestSkeleton(*skeleton, 
                                                               differenceInfo);
    /* best skeleton could be NULL.
       For example: in "ca" resource file,
       interval format is defined as following
           intervalFormats{
                fallback{"{0} - {1}"}
            }
       there is no skeletons/interval patterns defined,
       and the best skeleton match could be NULL
     */
    if ( bestSkeleton == NULL ) {
        return false; 
    } 
   
    // difference:
    // 0 means the best matched skeleton is the same as input skeleton
    // 1 means the fields are the same, but field width are different
    // 2 means the only difference between fields are v/z,
    // -1 means there are other fields difference 
    if ( differenceInfo == -1 ) { 
        // skeleton has different fields, not only  v/z difference
        return false;
    }

    if ( timeSkeleton.length() == 0 ) {
        UnicodeString extendedSkeleton;
        UnicodeString extendedBestSkeleton;
        // only has date skeleton
        setIntervalPattern(UCAL_DATE, skeleton, bestSkeleton, differenceInfo,
                           &extendedSkeleton, &extendedBestSkeleton);

        UBool extended = setIntervalPattern(UCAL_MONTH, skeleton, bestSkeleton, 
                                     differenceInfo,
                                     &extendedSkeleton, &extendedBestSkeleton);
                                              
        if ( extended ) {
            bestSkeleton = &extendedBestSkeleton;
            skeleton = &extendedSkeleton;
        }
        setIntervalPattern(UCAL_YEAR, skeleton, bestSkeleton, differenceInfo,
                           &extendedSkeleton, &extendedBestSkeleton);
    } else {
        setIntervalPattern(UCAL_MINUTE, skeleton, bestSkeleton, differenceInfo);
        setIntervalPattern(UCAL_HOUR, skeleton, bestSkeleton, differenceInfo);
        setIntervalPattern(UCAL_AM_PM, skeleton, bestSkeleton, differenceInfo);
    }
    return true;
}
예제 #30
0
int32_t RuleBasedCollator::getSortKey(const UnicodeString& source,
                                         uint8_t *result, int32_t resultLength)
                                         const
{
    return ucol_getSortKey(ucollator, source.getBuffer(), source.length(), result, resultLength);
}