Пример #1
0
int pelet::HandleHeredoc(BufferClass *buffer) {

	/*
	 * find out the stopping identifier. Since current is past the newline, the
	 * identifier is located past the "<<<" (3) and before the newline (trimming
	 * since newline can be more than 1 char)
	 *
	 * difference from PHP: we will treat single quoted strings and heredoc the same for now
	 * the PHP scanner scans the string and returns T_ENCAPSED_AND_WHITESPACE if string
	 * does not have embedded variables; since we don't care about embedded variables 
	 * we will always treat heredoc as singles quote strings
	 */
	UnicodeString identifier(buffer->TokenStart + 3, buffer->Current - buffer->TokenStart - 3 - 1);
	identifier.trim();
	
	// remove double quotes if they are there
	if (identifier.startsWith(UNICODE_STRING("\"", 1))) {
		identifier.remove(0, 1);
	}
	if (identifier.endsWith(UNICODE_STRING("\"", 1))) {
		identifier.remove(identifier.length() - 1, 1);
	}
	if ((buffer->Limit - buffer->Current) < 2) {
		buffer->AppendToLexeme(1);
	}
	int failed = pelet::SkipToIdentifier(buffer, identifier);
	if (!failed) {
		return T_CONSTANT_ENCAPSED_STRING;
	}
	return failed;
}
Пример #2
0
SimpleDateFormatStaticSets::SimpleDateFormatStaticSets(UErrorCode &status)
: fDateIgnorables(NULL),
  fTimeIgnorables(NULL),
  fOtherIgnorables(NULL)
{
    fDateIgnorables  = new UnicodeSet(UNICODE_STRING("[-,./[:whitespace:]]", 20), status);
    fTimeIgnorables  = new UnicodeSet(UNICODE_STRING("[-.:[:whitespace:]]", 19),  status);
    fOtherIgnorables = new UnicodeSet(UNICODE_STRING("[:whitespace:]", 14),       status);

    // Check for null pointers
    if (fDateIgnorables == NULL || fTimeIgnorables == NULL || fOtherIgnorables == NULL) {
        goto ExitConstrDeleteAll;
    }

    // Freeze all the sets
    fDateIgnorables->freeze();
    fTimeIgnorables->freeze();
    fOtherIgnorables->freeze();

    return; // If we reached this point, everything is fine so just exit

ExitConstrDeleteAll: // Remove all sets and return error
    delete fDateIgnorables;  fDateIgnorables = NULL;
    delete fTimeIgnorables;  fTimeIgnorables = NULL;
    delete fOtherIgnorables; fOtherIgnorables = NULL;

    status = U_MEMORY_ALLOCATION_ERROR;
}
Пример #3
0
static void
demoUnicodeStringInit() {
    // *** Make sure to read about invariant characters in utypes.h! ***
    // Initialization of Unicode strings from C literals works _only_ for
    // invariant characters!

    printf("\n* demoUnicodeStringInit() ---------- ***\n\n");

    // the string literal is 32 chars long - this must be counted for the macro
    UnicodeString invariantOnly=UNICODE_STRING("such characters are safe 123 %-.", 32);

    /*
     * In C, we need two macros: one to declare the UChar[] array, and
     * one to populate it; the second one is a noop on platforms where
     * wchar_t is compatible with UChar and ASCII-based.
     * The length of the string literal must be counted for both macros.
     */
    /* declare the invString array for the string */
    U_STRING_DECL(invString, "such characters are safe 123 %-.", 32);
    /* populate it with the characters */
    U_STRING_INIT(invString, "such characters are safe 123 %-.", 32);

    // compare the C and C++ strings
    printf("C and C++ Unicode strings are equal: %d\n", invariantOnly==UnicodeString(TRUE, invString, 32));

    /*
     * convert between char * and UChar * strings that
     * contain only invariant characters
     */
    static const char *cs1="such characters are safe 123 %-.";
    static UChar us1[40];
    static char cs2[40];
    u_charsToUChars(cs1, us1, 33); /* include the terminating NUL */
    u_UCharsToChars(us1, cs2, 33);
    printf("char * -> UChar * -> char * with only "
           "invariant characters: \"%s\"\n",
           cs2);

    // initialize a UnicodeString from a string literal that contains
    // escape sequences written with invariant characters
    // do not forget to duplicate the backslashes for ICU to see them
    // then, count each double backslash only once!
    UnicodeString german=UNICODE_STRING(
        "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\n", 64).
        unescape();
    printUnicodeString("german UnicodeString from unescaping:\n    ", german);

    /*
     * C: convert and unescape a char * string with only invariant
     * characters to fill a UChar * string
     */
    UChar buffer[200];
    int32_t length;
    length=u_unescape(
        "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\n",
        buffer, UPRV_LENGTHOF(buffer));
    printf("german C Unicode string from char * unescaping: (length %d)\n    ", length);
    printUnicodeString("", UnicodeString(buffer));
}
Пример #4
0
int pelet::SkipToIdentifier(BufferClass *buffer, UnicodeString identifier) {
	bool end = false;
	
	// add semicolon to make checks easier
	identifier.append(';');
	UChar c = *buffer->Current;
	while (!end) {
	
		/*
		 * read one line at a time.  If the line is the identifier we'll stop. If we reach the
		 * end, then this heredoc in unterminated.
		 * be careful; do NOT store buffer->Current since it may change at any after buffer->AppendToLexeme
		 * is called
		 */
		UnicodeString line;
		while (c != 0 && c != '\n' && c != '\r') {
			line.append(c);
			
			// only fill buffer when we its close to being filled up; this will prevent
			// useless copying of the buffer to remove slack
			if ((buffer->Limit - buffer->Current) < 2) {
				buffer->AppendToLexeme(1);
			}
			c = *(++buffer->Current);
			
		}
		if (c == 0) {
			end = true;
			return T_ERROR_UNTERMINATED_STRING;
		}
		
		// since we are eating up a  newline, otherwise line numbering in lint errors
		// will be wrong
		buffer->IncrementLine();
		bool hasEndingSemicolon = true;
		if (!line.endsWith(UNICODE_STRING(";", 1))) {
			line.append(UNICODE_STRING(";", 1));
			hasEndingSemicolon = false;
		}
		if (line.compare(identifier) == 0) {
			end = true;
			
			// semicolons and newlines are NOT part of the nowdoc; the parser will look for semicolons
			// semicolon is OPTIONAL for heredoc / nowdoc
			if (hasEndingSemicolon) {
				buffer->Current--;
			}
		}
		else {
			if ((buffer->Limit - buffer->Current) < 2) {
				buffer->AppendToLexeme(1);
			}
			c = *(++buffer->Current);
		}
	}
	return 0;
}
Пример #5
0
void RBBISetBuilder::printRanges()
{
	RangeDescriptor    *   rlRange;
	int                    i;

	RBBIDebugPrintf("\n\n Nonoverlapping Ranges ...\n");
	for (rlRange = fRangeList; rlRange != 0; rlRange = rlRange->fNext)
	{
		RBBIDebugPrintf("%2i  %4x-%4x  ", rlRange->fNum, rlRange->fStartChar, rlRange->fEndChar);

		for (i = 0; i < rlRange->fIncludesSets->size(); i++)
		{
			RBBINode    *   usetNode    = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
			UnicodeString   setName = UNICODE_STRING("anon", 4);
			RBBINode    *   setRef = usetNode->fParent;
			if (setRef != NULL)
			{
				RBBINode * varRef = setRef->fParent;
				if (varRef != NULL  &&  varRef->fType == RBBINode::varRef)
				{
					setName = varRef->fText;
				}
			}
			RBBI_DEBUG_printUnicodeString(setName); RBBIDebugPrintf("  ");
		}
		RBBIDebugPrintf("\n");
	}
}
void RBBISetBuilder::printSets() {
    int                   i;

    RBBIDebugPrintf("\n\nUnicode Sets List\n------------------\n");
    for (i=0; ; i++) {
        RBBINode        *usetNode;
        RBBINode        *setRef;
        RBBINode        *varRef;
        UnicodeString    setName;

        usetNode = (RBBINode *)fRB->fUSetNodes->elementAt(i);
        if (usetNode == NULL) {
            break;
        }

        RBBIDebugPrintf("%3d    ", i);
        setName = UNICODE_STRING("anonymous", 9);
        setRef = usetNode->fParent;
        if (setRef != NULL) {
            varRef = setRef->fParent;
            if (varRef != NULL  &&  varRef->fType == RBBINode::varRef) {
                setName = varRef->fText;
            }
        }
        RBBI_DEBUG_printUnicodeString(setName);
        RBBIDebugPrintf("   ");
        RBBI_DEBUG_printUnicodeString(usetNode->fText);
        RBBIDebugPrintf("\n");
        if (usetNode->fLeftChild != NULL) {
            usetNode->fLeftChild->printTree(TRUE);
        }
    }
    RBBIDebugPrintf("\n");
}
Пример #7
0
/**
 * Constructs a transliterator with the default delimiters '{' and
 * '}'.
 */
BreakTransliterator::BreakTransliterator(UnicodeFilter* adoptedFilter) :
    Transliterator(UNICODE_STRING("Any-BreakInternal", 17), adoptedFilter),
    fInsertion(SPACE) {
        bi = NULL;
        UErrorCode status = U_ZERO_ERROR;
        boundaries = new UVector32(status);
    }
Пример #8
0
void MessageFormatRegressionTest::Test4142938() 
{
    UnicodeString pat = CharsToUnicodeString("''Vous'' {0,choice,0#n''|1#}avez s\\u00E9lectionn\\u00E9 "
        "{0,choice,0#aucun|1#{0}} client{0,choice,0#s|1#|2#s} "
        "personnel{0,choice,0#s|1#|2#s}.");
    UErrorCode status = U_ZERO_ERROR;
    MessageFormat *mf = new MessageFormat(pat, status);
    failure(status, "new MessageFormat");

    UnicodeString PREFIX [] = {
        CharsToUnicodeString("'Vous' n'avez s\\u00E9lectionn\\u00E9 aucun clients personnels."),
        CharsToUnicodeString("'Vous' avez s\\u00E9lectionn\\u00E9 "),
        CharsToUnicodeString("'Vous' avez s\\u00E9lectionn\\u00E9 ")
    };  
    UnicodeString SUFFIX [] = {
        UnicodeString(),
        UNICODE_STRING(" client personnel.", 18),
        UNICODE_STRING(" clients personnels.", 20)
    };

    for (int i=0; i<3; i++) {
        UnicodeString out;
        //out = mf->format(new Object[]{new Integer(i)});
        Formattable objs [] = {
            Formattable((int32_t)i)
        };
        FieldPosition pos(FieldPosition::DONT_CARE);
        out = mf->format(objs, 1, out, pos, status);
        if (!failure(status, "mf->format", TRUE)) {
            if (SUFFIX[i] == "") {
                if (out != PREFIX[i])
                    errln((UnicodeString)"" + i + ": Got \"" + out + "\"; Want \"" + PREFIX[i] + "\"");
            }
            else {
                if (!out.startsWith(PREFIX[i]) ||
                    !out.endsWith(SUFFIX[i]))
                    errln((UnicodeString)"" + i + ": Got \"" + out + "\"; Want \"" + PREFIX[i] + "\"...\"" +
                          SUFFIX[i] + "\"");
            }
        }
    }

    delete mf;
}
void PluralRulesTest::testOrdinal() {
    IcuTestErrorCode errorCode(*this, "testOrdinal");
    LocalPointer<PluralRules> pr(PluralRules::forLocale("en", UPLURAL_TYPE_ORDINAL, errorCode));
    if (errorCode.logIfFailureAndReset("PluralRules::forLocale(en, UPLURAL_TYPE_ORDINAL) failed")) {
        return;
    }
    UnicodeString keyword = pr->select(2.);
    if (keyword != UNICODE_STRING("two", 3)) {
        dataerrln("PluralRules(en-ordinal).select(2) failed");
    }
}
Пример #10
0
void RBBISetBuilder::printRangeGroups()
{
	RangeDescriptor    *   rlRange;
	RangeDescriptor    *   tRange;
	int                    i;
	int                    lastPrintedGroupNum = 0;

	RBBIDebugPrintf("\nRanges grouped by Unicode Set Membership...\n");
	for (rlRange = fRangeList; rlRange != 0; rlRange = rlRange->fNext)
	{
		int groupNum = rlRange->fNum & 0xbfff;
		if (groupNum > lastPrintedGroupNum)
		{
			lastPrintedGroupNum = groupNum;
			RBBIDebugPrintf("%2i  ", groupNum);

			if (rlRange->fNum & 0x4000) { RBBIDebugPrintf(" <DICT> ");}

			for (i = 0; i < rlRange->fIncludesSets->size(); i++)
			{
				RBBINode    *   usetNode    = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
				UnicodeString   setName = UNICODE_STRING("anon", 4);
				RBBINode    *   setRef = usetNode->fParent;
				if (setRef != NULL)
				{
					RBBINode * varRef = setRef->fParent;
					if (varRef != NULL  &&  varRef->fType == RBBINode::varRef)
					{
						setName = varRef->fText;
					}
				}
				RBBI_DEBUG_printUnicodeString(setName); RBBIDebugPrintf(" ");
			}

			i = 0;
			for (tRange = rlRange; tRange != 0; tRange = tRange->fNext)
			{
				if (tRange->fNum == rlRange->fNum)
				{
					if (i++ % 5 == 0)
					{
						RBBIDebugPrintf("\n    ");
					}
					RBBIDebugPrintf("  %05x-%05x", tRange->fStartChar, tRange->fEndChar);
				}
			}
			RBBIDebugPrintf("\n");
		}
	}
	RBBIDebugPrintf("\n");
}
Пример #11
0
void
StringTest::Test_UNICODE_STRING() {
    UnicodeString ustringVar=UNICODE_STRING("aZ0 -", 5);
    if( ustringVar.length()!=5 ||
        ustringVar[0]!=0x61 ||
        ustringVar[1]!=0x5a ||
        ustringVar[2]!=0x30 ||
        ustringVar[3]!=0x20 ||
        ustringVar[4]!=0x2d
    ) {
        errln("Test_UNICODE_STRING: UNICODE_STRING does not work right! "
              "See unistr.h and utypes.h with platform.h.");
    }
}
//-------------------------------------------------------------------------------------
//
//   RangeDescriptor::setDictionaryFlag
//
//            Character Category Numbers that include characters from
//            the original Unicode Set named "dictionary" have bit 14
//            set to 1.  The RBBI runtime engine uses this to trigger
//            use of the word dictionary.
//
//            This function looks through the Unicode Sets that it
//            (the range) includes, and sets the bit in fNum when
//            "dictionary" is among them.
//
//            TODO:  a faster way would be to find the set node for
//                   "dictionary" just once, rather than looking it
//                   up by name every time.
//
//-------------------------------------------------------------------------------------
void RangeDescriptor::setDictionaryFlag() {
    int i;

    for (i=0; i<this->fIncludesSets->size(); i++) {
        RBBINode       *usetNode    = (RBBINode *)fIncludesSets->elementAt(i);
        UnicodeString   setName;
        RBBINode       *setRef = usetNode->fParent;
        if (setRef != NULL) {
            RBBINode *varRef = setRef->fParent;
            if (varRef != NULL  &&  varRef->fType == RBBINode::varRef) {
                setName = varRef->fText;
            }
        }
        if (setName.compare(UNICODE_STRING("dictionary", 10)) == 0) {   // TODO:  no string literals.
            this->fNum |= 0x4000;
            break;
        }
    }
}
U_CDECL_END

/**
 * Constructs a transliterator with the default delimiters '{' and
 * '}'.
 */
NameUnicodeTransliterator::NameUnicodeTransliterator(UnicodeFilter* adoptedFilter) :
    Transliterator(UNICODE_STRING("Name-Any", 8), adoptedFilter) {

    UnicodeSet *legalPtr = &legal;
    // Get the legal character set
    USetAdder sa = {
        (USet *)legalPtr, // USet* == UnicodeSet*
        _set_add,
        NULL, // Don't need _set_addRange
        NULL, // Don't need _set_addString
        NULL // Don't need remove()
    };
    uprv_getCharNameCharacters(&sa);
}
Пример #14
0
 SubStringCharIter() {
     setText(UNICODE_STRING("abc", 3));
 }
void DataDrivenNumberFormatTestSuite::run(const char *fileName, UBool runAllTests) {
    fFileLineNumber = 0;
    fFormatTestNumber = 0;
    UErrorCode status = U_ZERO_ERROR;
    for (int32_t i = 0; i < UPRV_LENGTHOF(fPreviousFormatters); ++i) {
        delete fPreviousFormatters[i];
        fPreviousFormatters[i] = newFormatter(status);
    }
    if (!assertSuccess("Can't create previous formatters", status)) {
        return;
    }
    CharString path(getSourceTestData(status), status);
    path.appendPathPart(fileName, status);
    const char *codePage = "UTF-8";
    LocalUCHARBUFPointer f(ucbuf_open(path.data(), &codePage, TRUE, FALSE, &status));
    if (!assertSuccess("Can't open data file", status)) {
        return;
    }
    UnicodeString columnValues[kNumberFormatTestTupleFieldCount];
    ENumberFormatTestTupleField columnTypes[kNumberFormatTestTupleFieldCount];
    int32_t columnCount;
    int32_t state = 0;
    while(U_SUCCESS(status)) {
        // Read a new line if necessary.
        if(fFileLine.isEmpty()) {
            if(!readLine(f.getAlias(), status)) { break; }
            if (fFileLine.isEmpty() && state == 2) {
                state = 0;
            }
            continue;
        }
        if (fFileLine.startsWith("//")) {
            fFileLine.remove();
            continue;
        }
        // Initial setup of test.
        if (state == 0) {
            if (fFileLine.startsWith(UNICODE_STRING("test ", 5))) {
                fFileTestName = fFileLine;
                fTuple.clear();
            } else if(fFileLine.startsWith(UNICODE_STRING("set ", 4))) {
                setTupleField(status);
            } else if(fFileLine.startsWith(UNICODE_STRING("begin", 5))) {
                state = 1;
            } else {
                showError("Unrecognized verb.");
                return;
            }
        // column specification
        } else if (state == 1) {
            columnCount = splitBy(columnValues, UPRV_LENGTHOF(columnValues), 0x9);
            for (int32_t i = 0; i < columnCount; ++i) {
                columnTypes[i] = NumberFormatTestTuple::getFieldByName(
                    columnValues[i]);
                if (columnTypes[i] == kNumberFormatTestTupleFieldCount) {
                    showError("Unrecognized field name.");
                    return;
                }
            }
            state = 2;
        // run the tests
        } else {
            int32_t columnsInThisRow = splitBy(columnValues, columnCount, 0x9);
            for (int32_t i = 0; i < columnsInThisRow; ++i) {
                fTuple.setField(
                        columnTypes[i], columnValues[i].unescape(), status);
            }
            for (int32_t i = columnsInThisRow; i < columnCount; ++i) {
                fTuple.clearField(columnTypes[i], status);
            }
            if (U_FAILURE(status)) {
                showError("Invalid column values");
                return;
            }
            if (!breaksC() || runAllTests) {
                UnicodeString errorMessage;
                if (!isPass(fTuple, errorMessage, status)) {
                    showFailure(errorMessage);
                }
            }
        }
        fFileLine.remove();
    }
}
Пример #16
0
UXMLElement *
UXMLParser::parseFile(const char *filename, UErrorCode &errorCode) {
    char bytes[4096], charsetBuffer[100];
    FileStream *f;
    const char *charset, *pb;
    UnicodeString src;
    UConverter *cnv;
    UChar *buffer, *pu;
    int32_t fileLength, bytesLength, length, capacity;
    UBool flush;

    if(U_FAILURE(errorCode)) {
        return NULL;
    }

    f=T_FileStream_open(filename, "rb");
    if(f==NULL) {
        errorCode=U_FILE_ACCESS_ERROR;
        return NULL;
    }

    bytesLength=T_FileStream_read(f, bytes, (int32_t)sizeof(bytes));
    if(bytesLength<(int32_t)sizeof(bytes)) {
        // we have already read the entire file
        fileLength=bytesLength;
    } else {
        // get the file length
        fileLength=T_FileStream_size(f);
    }

    /*
     * get the charset:
     * 1. Unicode signature
     * 2. treat as ISO-8859-1 and read XML encoding="charser"
     * 3. default to UTF-8
     */
    charset=ucnv_detectUnicodeSignature(bytes, bytesLength, NULL, &errorCode);
    if(U_SUCCESS(errorCode) && charset!=NULL) {
        // open converter according to Unicode signature
        cnv=ucnv_open(charset, &errorCode);
    } else {
        // read as Latin-1 and parse the XML declaration and encoding
        cnv=ucnv_open("ISO-8859-1", &errorCode);
        if(U_FAILURE(errorCode)) {
            // unexpected error opening Latin-1 converter
            goto exit;
        }

        buffer=src.getBuffer(bytesLength);
        if(buffer==NULL) {
            // unexpected failure to reserve some string capacity
            errorCode=U_MEMORY_ALLOCATION_ERROR;
            goto exit;
        }
        pb=bytes;
        pu=buffer;
        ucnv_toUnicode(
            cnv,
            &pu, buffer+src.getCapacity(),
            &pb, bytes+bytesLength,
            NULL, TRUE, &errorCode);
        src.releaseBuffer(U_SUCCESS(errorCode) ? (int32_t)(pu-buffer) : 0);
        ucnv_close(cnv);
        cnv=NULL;
        if(U_FAILURE(errorCode)) {
            // unexpected error in conversion from Latin-1
            src.remove();
            goto exit;
        }

        // parse XML declaration
        if(mXMLDecl.reset(src).lookingAt(0, errorCode)) {
            int32_t declEnd=mXMLDecl.end(errorCode);
            // go beyond <?xml
            int32_t pos=src.indexOf((UChar)x_l)+1;

            mAttrValue.reset(src);
            while(pos<declEnd && mAttrValue.lookingAt(pos, errorCode)) {  // loop runs once per attribute on this element.
                UnicodeString attName  = mAttrValue.group(1, errorCode);
                UnicodeString attValue = mAttrValue.group(2, errorCode);

                // Trim the quotes from the att value.  These are left over from the original regex
                //   that parsed the attribue, which couldn't conveniently strip them.
                attValue.remove(0,1);                    // one char from the beginning
                attValue.truncate(attValue.length()-1);  // and one from the end.

                if(attName==UNICODE_STRING("encoding", 8)) {
                    length=attValue.extract(0, 0x7fffffff, charsetBuffer, (int32_t)sizeof(charsetBuffer));
                    charset=charsetBuffer;
                    break;
                }
                pos = mAttrValue.end(2, errorCode);
            }

            if(charset==NULL) {
                // default to UTF-8
                charset="UTF-8";
            }
            cnv=ucnv_open(charset, &errorCode);
        }
    }

    if(U_FAILURE(errorCode)) {
        // unable to open the converter
        goto exit;
    }

    // convert the file contents
    capacity=fileLength;        // estimated capacity
    src.getBuffer(capacity);
    src.releaseBuffer(0);       // zero length
    flush=FALSE;
    for(;;) {
        // convert contents of bytes[bytesLength]
        pb=bytes;
        for(;;) {
            length=src.length();
            buffer=src.getBuffer(capacity);
            if(buffer==NULL) {
                // unexpected failure to reserve some string capacity
                errorCode=U_MEMORY_ALLOCATION_ERROR;
                goto exit;
            }

            pu=buffer+length;
            ucnv_toUnicode(
                cnv, &pu, buffer+src.getCapacity(),
                &pb, bytes+bytesLength,
                NULL, FALSE, &errorCode);
            src.releaseBuffer(U_SUCCESS(errorCode) ? (int32_t)(pu-buffer) : 0);
            if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
                errorCode=U_ZERO_ERROR;
                capacity=(3*src.getCapacity())/2; // increase capacity by 50%
            } else {
                break;
            }
        }

        if(U_FAILURE(errorCode)) {
            break; // conversion error
        }

        if(flush) {
            break; // completely converted the file
        }

        // read next block
        bytesLength=T_FileStream_read(f, bytes, (int32_t)sizeof(bytes));
        if(bytesLength==0) {
            // reached end of file, convert once more to flush the converter
            flush=TRUE;
        }
    };

exit:
    ucnv_close(cnv);
    T_FileStream_close(f);

    if(U_SUCCESS(errorCode)) {
        return parse(src, errorCode);
    } else {
        return NULL;
    }
}
Пример #17
0
UnicodeString& RelativeDateFormat::format(  Calendar& cal,
                                UnicodeString& appendTo,
                                FieldPosition& pos) const {

    UErrorCode status = U_ZERO_ERROR;
    UnicodeString relativeDayString;
    UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status);

    // calculate the difference, in days, between 'cal' and now.
    int dayDiff = dayDifference(cal, status);

    // look up string
    int32_t len = 0;
    const UChar *theString = getStringForDay(dayDiff, len, status);
    if(U_SUCCESS(status) && (theString!=NULL)) {
        // found a relative string
        relativeDayString.setTo(theString, len);
    }

    if ( relativeDayString.length() > 0 && !fDatePattern.isEmpty() &&
         (fTimePattern.isEmpty() || fCombinedFormat == NULL || fCombinedHasDateAtStart)) {
#if !UCONFIG_NO_BREAK_ITERATION
        // capitalize relativeDayString according to context for relative, set formatter no context
        if ( u_islower(relativeDayString.char32At(0)) && fCapitalizationBrkIter!= NULL &&
             ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
               (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && fCapitalizationOfRelativeUnitsForUIListMenu) ||
               (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && fCapitalizationOfRelativeUnitsForStandAlone) ) ) {
            // titlecase first word of relativeDayString
            relativeDayString.toTitle(fCapitalizationBrkIter, fLocale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
        }
#endif
        fDateTimeFormatter->setContext(UDISPCTX_CAPITALIZATION_NONE, status);
    } else {
        // set our context for the formatter
        fDateTimeFormatter->setContext(capitalizationContext, status);
    }

    if (fDatePattern.isEmpty()) {
        fDateTimeFormatter->applyPattern(fTimePattern);
        fDateTimeFormatter->format(cal,appendTo,pos);
    } else if (fTimePattern.isEmpty() || fCombinedFormat == NULL) {
        if (relativeDayString.length() > 0) {
            appendTo.append(relativeDayString);
        } else {
            fDateTimeFormatter->applyPattern(fDatePattern);
            fDateTimeFormatter->format(cal,appendTo,pos);
        }
    } else {
        UnicodeString datePattern;
        if (relativeDayString.length() > 0) {
            // Need to quote the relativeDayString to make it a legal date pattern
            relativeDayString.findAndReplace(UNICODE_STRING("'", 1), UNICODE_STRING("''", 2)); // double any existing APOSTROPHE
            relativeDayString.insert(0, APOSTROPHE); // add APOSTROPHE at beginning...
            relativeDayString.append(APOSTROPHE); // and at end
            datePattern.setTo(relativeDayString);
        } else {
            datePattern.setTo(fDatePattern);
        }
        UnicodeString combinedPattern;
        fCombinedFormat->format(fTimePattern, datePattern, combinedPattern, status);
        fDateTimeFormatter->applyPattern(combinedPattern);
        fDateTimeFormatter->format(cal,appendTo,pos);
    }

    return appendTo;
}
Пример #18
0
TitlecaseTransliterator::TitlecaseTransliterator() :
    CaseMapTransliterator(UNICODE_STRING("Any-Title", 9), NULL)
{
    // Need to look back 2 characters in the case of "can't"
    setMaximumContextLength(2);
}
Пример #19
0
/**
 * Constructs a transliterator.
 */
UnicodeNameTransliterator::UnicodeNameTransliterator(UnicodeFilter* adoptedFilter) :
    Transliterator(UNICODE_STRING("Any-Name", 8), adoptedFilter) {
}
Пример #20
0
//----------------------------------------------------------------------------------------
//
//  doParseAction        Do some action during rule parsing.
//                       Called by the parse state machine.
//                       Actions build the parse tree and Unicode Sets,
//                       and maintain the parse stack for nested expressions.
//
//                       TODO:  unify EParseAction and RBBI_RuleParseAction enum types.
//                              They represent exactly the same thing.  They're separate
//                              only to work around enum forward declaration restrictions
//                              in some compilers, while at the same time avoiding multiple
//                              definitions problems.  I'm sure that there's a better way.
//
//----------------------------------------------------------------------------------------
UBool RBBIRuleScanner::doParseActions(EParseAction action)
{
    RBBINode *n       = NULL;

    UBool   returnVal = TRUE;

    switch ((RBBI_RuleParseAction)action) {

    case doExprStart:
        pushNewNode(RBBINode::opStart);
        fRuleNum++;
        break;


    case doExprOrOperator:
    {
        fixOpStack(RBBINode::precOpCat);
        RBBINode  *operandNode = fNodeStack[fNodeStackPtr--];
        RBBINode  *orNode      = pushNewNode(RBBINode::opOr);
        orNode->fLeftChild     = operandNode;
        operandNode->fParent   = orNode;
    }
    break;

    case doExprCatOperator:
        // concatenation operator.
        // For the implicit concatenation of adjacent terms in an expression that are
        //   not separated by any other operator.  Action is invoked between the
        //   actions for the two terms.
    {
        fixOpStack(RBBINode::precOpCat);
        RBBINode  *operandNode = fNodeStack[fNodeStackPtr--];
        RBBINode  *catNode     = pushNewNode(RBBINode::opCat);
        catNode->fLeftChild    = operandNode;
        operandNode->fParent   = catNode;
    }
    break;

    case doLParen:
        // Open Paren.
        //   The openParen node is a dummy operation type with a low precedence,
        //     which has the affect of ensuring that any real binary op that
        //     follows within the parens binds more tightly to the operands than
        //     stuff outside of the parens.
        pushNewNode(RBBINode::opLParen);
        break;

    case doExprRParen:
        fixOpStack(RBBINode::precLParen);
        break;

    case doNOP:
        break;

    case doStartAssign:
        // We've just scanned "$variable = "
        // The top of the node stack has the $variable ref node.

        // Save the start position of the RHS text in the StartExpression node
        //   that precedes the $variableReference node on the stack.
        //   This will eventually be used when saving the full $variable replacement
        //   text as a string.
        n = fNodeStack[fNodeStackPtr-1];
        n->fFirstPos = fNextIndex;              // move past the '='

        // Push a new start-of-expression node; needed to keep parse of the
        //   RHS expression happy.
        pushNewNode(RBBINode::opStart);
        break;




    case doEndAssign:
    {
        // We have reached the end of an assignement statement.
        //   Current scan char is the ';' that terminates the assignment.

        // Terminate expression, leaves expression parse tree rooted in TOS node.
        fixOpStack(RBBINode::precStart);

        RBBINode *startExprNode  = fNodeStack[fNodeStackPtr-2];
        RBBINode *varRefNode     = fNodeStack[fNodeStackPtr-1];
        RBBINode *RHSExprNode    = fNodeStack[fNodeStackPtr];

        // Save original text of right side of assignment, excluding the terminating ';'
        //  in the root of the node for the right-hand-side expression.
        RHSExprNode->fFirstPos = startExprNode->fFirstPos;
        RHSExprNode->fLastPos  = fScanIndex;
        fRB->fRules.extractBetween(RHSExprNode->fFirstPos, RHSExprNode->fLastPos, RHSExprNode->fText);

        // Expression parse tree becomes l. child of the $variable reference node.
        varRefNode->fLeftChild = RHSExprNode;
        RHSExprNode->fParent   = varRefNode;

        // Make a symbol table entry for the $variableRef node.
        fSymbolTable->addEntry(varRefNode->fText, varRefNode, *fRB->fStatus);
        if (U_FAILURE(*fRB->fStatus)) {
            // This is a round-about way to get the parse position set
            //  so that duplicate symbols error messages include a line number.
            UErrorCode t = *fRB->fStatus;
            *fRB->fStatus = U_ZERO_ERROR;
            error(t);
        }

        // Clean up the stack.
        delete startExprNode;
        fNodeStackPtr-=3;
        break;
    }

    case doEndOfRule:
    {
        fixOpStack(RBBINode::precStart);      // Terminate expression, leaves expression
        if (U_FAILURE(*fRB->fStatus)) {       //   parse tree rooted in TOS node.
            break;
        }
#ifdef RBBI_DEBUG
        if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rtree")) {
            printNodeStack("end of rule");
        }
#endif
        U_ASSERT(fNodeStackPtr == 1);

        // If this rule includes a look-ahead '/', add a endMark node to the
        //   expression tree.
        if (fLookAheadRule) {
            RBBINode  *thisRule       = fNodeStack[fNodeStackPtr];
            RBBINode  *endNode        = pushNewNode(RBBINode::endMark);
            RBBINode  *catNode        = pushNewNode(RBBINode::opCat);
            fNodeStackPtr -= 2;
            catNode->fLeftChild       = thisRule;
            catNode->fRightChild      = endNode;
            fNodeStack[fNodeStackPtr] = catNode;
            endNode->fVal             = fRuleNum;
            endNode->fLookAheadEnd    = TRUE;
        }

        // All rule expressions are ORed together.
        // The ';' that terminates an expression really just functions as a '|' with
        //   a low operator prededence.
        //
        // Each of the four sets of rules are collected separately.
        //  (forward, reverse, safe_forward, safe_reverse)
        //  OR this rule into the appropriate group of them.
        //
        RBBINode **destRules = (fReverseRule? &fRB->fReverseTree : fRB->fDefaultTree);

        if (*destRules != NULL) {
            // This is not the first rule encounted.
            // OR previous stuff  (from *destRules)
            // with the current rule expression (on the Node Stack)
            //  with the resulting OR expression going to *destRules
            //
            RBBINode  *thisRule    = fNodeStack[fNodeStackPtr];
            RBBINode  *prevRules   = *destRules;
            RBBINode  *orNode      = pushNewNode(RBBINode::opOr);
            orNode->fLeftChild     = prevRules;
            prevRules->fParent     = orNode;
            orNode->fRightChild    = thisRule;
            thisRule->fParent      = orNode;
            *destRules             = orNode;
        }
        else
        {
            // This is the first rule encountered (for this direction).
            // Just move its parse tree from the stack to *destRules.
            *destRules = fNodeStack[fNodeStackPtr];
        }
        fReverseRule   = FALSE;   // in preparation for the next rule.
        fLookAheadRule = FALSE;
        fNodeStackPtr  = 0;
    }
    break;


    case doRuleError:
        error(U_BRK_RULE_SYNTAX);
        returnVal = FALSE;
        break;


    case doVariableNameExpectedErr:
        error(U_BRK_RULE_SYNTAX);
        break;


    //
    //  Unary operands  + ? *
    //    These all appear after the operand to which they apply.
    //    When we hit one, the operand (may be a whole sub expression)
    //    will be on the top of the stack.
    //    Unary Operator becomes TOS, with the old TOS as its one child.
    case doUnaryOpPlus:
    {
        RBBINode  *operandNode = fNodeStack[fNodeStackPtr--];
        RBBINode  *plusNode    = pushNewNode(RBBINode::opPlus);
        plusNode->fLeftChild   = operandNode;
        operandNode->fParent   = plusNode;
    }
    break;

    case doUnaryOpQuestion:
    {
        RBBINode  *operandNode = fNodeStack[fNodeStackPtr--];
        RBBINode  *qNode       = pushNewNode(RBBINode::opQuestion);
        qNode->fLeftChild      = operandNode;
        operandNode->fParent   = qNode;
    }
    break;

    case doUnaryOpStar:
    {
        RBBINode  *operandNode = fNodeStack[fNodeStackPtr--];
        RBBINode  *starNode    = pushNewNode(RBBINode::opStar);
        starNode->fLeftChild   = operandNode;
        operandNode->fParent   = starNode;
    }
    break;

    case doRuleChar:
        // A "Rule Character" is any single character that is a literal part
        // of the regular expression.  Like a, b and c in the expression "(abc*) | [:L:]"
        // These are pretty uncommon in break rules; the terms are more commonly
        //  sets.  To keep things uniform, treat these characters like as
        // sets that just happen to contain only one character.
    {
        n = pushNewNode(RBBINode::setRef);
        findSetFor(fC.fChar, n);
        n->fFirstPos = fScanIndex;
        n->fLastPos  = fNextIndex;
        fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
        break;
    }

    case doDotAny:
        // scanned a ".", meaning match any single character.
    {
        n = pushNewNode(RBBINode::setRef);
        findSetFor(kAny, n);
        n->fFirstPos = fScanIndex;
        n->fLastPos  = fNextIndex;
        fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
        break;
    }

    case doSlash:
        // Scanned a '/', which identifies a look-ahead break position in a rule.
        n = pushNewNode(RBBINode::lookAhead);
        n->fVal      = fRuleNum;
        n->fFirstPos = fScanIndex;
        n->fLastPos  = fNextIndex;
        fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
        fLookAheadRule = TRUE;
        break;


    case doStartTagValue:
        // Scanned a '{', the opening delimiter for a tag value within a rule.
        n = pushNewNode(RBBINode::tag);
        n->fVal      = 0;
        n->fFirstPos = fScanIndex;
        n->fLastPos  = fNextIndex;
        break;

    case doTagDigit:
        // Just scanned a decimal digit that's part of a tag value
    {
        n = fNodeStack[fNodeStackPtr];
        uint32_t v = u_charDigitValue(fC.fChar);
        U_ASSERT(v < 10);
        n->fVal = n->fVal*10 + v;
        break;
    }

    case doTagValue:
        n = fNodeStack[fNodeStackPtr];
        n->fLastPos = fNextIndex;
        fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
        break;

    case doTagExpectedError:
        error(U_BRK_MALFORMED_RULE_TAG);
        returnVal = FALSE;
        break;

    case doOptionStart:
        // Scanning a !!option.   At the start of string.
        fOptionStart = fScanIndex;
        break;

    case doOptionEnd:
    {
        UnicodeString opt(fRB->fRules, fOptionStart, fScanIndex-fOptionStart);
        if (opt == UNICODE_STRING("chain", 5)) {
            fRB->fChainRules = TRUE;
        } else if (opt == UNICODE_STRING("LBCMNoChain", 11)) {
            fRB->fLBCMNoChain = TRUE;
        } else if (opt == UNICODE_STRING("forward", 7)) {
            fRB->fDefaultTree   = &fRB->fForwardTree;
        } else if (opt == UNICODE_STRING("reverse", 7)) {
            fRB->fDefaultTree   = &fRB->fReverseTree;
        } else if (opt == UNICODE_STRING("safe_forward", 12)) {
            fRB->fDefaultTree   = &fRB->fSafeFwdTree;
        } else if (opt == UNICODE_STRING("safe_reverse", 12)) {
            fRB->fDefaultTree   = &fRB->fSafeRevTree;
        } else if (opt == UNICODE_STRING("lookAheadHardBreak", 18)) {
            fRB->fLookAheadHardBreak = TRUE;
        } else {
            error(U_BRK_UNRECOGNIZED_OPTION);
        }
    }
    break;

    case doReverseDir:
        fReverseRule = TRUE;
        break;

    case doStartVariableName:
        n = pushNewNode(RBBINode::varRef);
        if (U_FAILURE(*fRB->fStatus)) {
            break;
        }
        n->fFirstPos = fScanIndex;
        break;

    case doEndVariableName:
        n = fNodeStack[fNodeStackPtr];
        if (n==NULL || n->fType != RBBINode::varRef) {
            error(U_BRK_INTERNAL_ERROR);
            break;
        }
        n->fLastPos = fScanIndex;
        fRB->fRules.extractBetween(n->fFirstPos+1, n->fLastPos, n->fText);
        // Look the newly scanned name up in the symbol table
        //   If there's an entry, set the l. child of the var ref to the replacement expression.
        //   (We also pass through here when scanning assignments, but no harm is done, other
        //    than a slight wasted effort that seems hard to avoid.  Lookup will be null)
        n->fLeftChild = fSymbolTable->lookupNode(n->fText);
        break;

    case doCheckVarDef:
        n = fNodeStack[fNodeStackPtr];
        if (n->fLeftChild == NULL) {
            error(U_BRK_UNDEFINED_VARIABLE);
            returnVal = FALSE;
        }
        break;

    case doExprFinished:
        break;

    case doRuleErrorAssignExpr:
        error(U_BRK_ASSIGN_ERROR);
        returnVal = FALSE;
        break;

    case doExit:
        returnVal = FALSE;
        break;

    case doScanUnicodeSet:
        scanSet();
        break;

    default:
        error(U_BRK_INTERNAL_ERROR);
        returnVal = FALSE;
        break;
    }
    return returnVal;
}
Пример #21
0
/**
 * Constructs a transliterator.
 */
LowercaseTransliterator::LowercaseTransliterator() : 
    CaseMapTransliterator(UNICODE_STRING("Any-Lower", 9), ucase_toFullLower)
{
}
Пример #22
0
_tstring SMCDatasetVector::GetName()
{
	UGC::UGDatasetVector* pDatasetVector = (UGC::UGDatasetVector*)m_pDataset ;
	return UNICODE_STRING(pDatasetVector->GetName().Cstr());
}
Пример #23
0
 ICUBreakIteratorService()
     : ICULocaleService(UNICODE_STRING("Break Iterator", 14))
 {
     UErrorCode status = U_ZERO_ERROR;
     registerFactory(new ICUBreakIteratorFactory(), status);
 }
Пример #24
0
/**
 * Constructs a transliterator.
 */
UppercaseTransliterator::UppercaseTransliterator() :
    CaseMapTransliterator(UNICODE_STRING("Any-Upper", 9), ucase_toFullUpper)
{
}