static UBool getSystemTimeInformation(TimeZone *tz, SYSTEMTIME &daylightDate, SYSTEMTIME &standardDate, int32_t &bias, int32_t &daylightBias, int32_t &standardBias) { UErrorCode status = U_ZERO_ERROR; UBool result = TRUE; BasicTimeZone *btz = (BasicTimeZone*)tz; // we should check type InitialTimeZoneRule *initial = NULL; AnnualTimeZoneRule *std = NULL, *dst = NULL; btz->getSimpleRulesNear(uprv_getUTCtime(), initial, std, dst, status); if (U_SUCCESS(status)) { if (std == NULL || dst == NULL) { bias = -1 * (initial->getRawOffset()/60000); daylightBias = 0; // Do not use DST. Set 0 to all stadardDate/daylightDate fields standardDate.wYear = standardDate.wMonth = standardDate.wDayOfWeek = standardDate.wDay = standardDate.wHour = standardDate.wMinute = standardDate.wSecond = standardDate.wMilliseconds = 0; daylightDate.wYear = daylightDate.wMonth = daylightDate.wDayOfWeek = daylightDate.wDay = daylightDate.wHour = daylightDate.wMinute = daylightDate.wSecond = daylightDate.wMilliseconds = 0; } else { U_ASSERT(std->getRule()->getDateRuleType() == DateTimeRule::DOW); U_ASSERT(dst->getRule()->getDateRuleType() == DateTimeRule::DOW); bias = -1 * (std->getRawOffset()/60000); daylightBias = -1 * (dst->getDSTSavings()/60000); // Always use DOW type rule int32_t hour, min, sec, mil; standardDate.wYear = 0; standardDate.wMonth = std->getRule()->getRuleMonth() + 1; standardDate.wDay = std->getRule()->getRuleWeekInMonth(); if (standardDate.wDay < 0) { standardDate.wDay = 5; } standardDate.wDayOfWeek = std->getRule()->getRuleDayOfWeek() - 1; mil = std->getRule()->getRuleMillisInDay(); hour = mil/3600000; mil %= 3600000; min = mil/60000; mil %= 60000; sec = mil/1000; mil %= 1000; standardDate.wHour = hour; standardDate.wMinute = min; standardDate.wSecond = sec; standardDate.wMilliseconds = mil; daylightDate.wYear = 0; daylightDate.wMonth = dst->getRule()->getRuleMonth() + 1; daylightDate.wDay = dst->getRule()->getRuleWeekInMonth(); if (daylightDate.wDay < 0) { daylightDate.wDay = 5; } daylightDate.wDayOfWeek = dst->getRule()->getRuleDayOfWeek() - 1; mil = dst->getRule()->getRuleMillisInDay(); hour = mil/3600000; mil %= 3600000; min = mil/60000; mil %= 60000; sec = mil/1000; mil %= 1000; daylightDate.wHour = hour; daylightDate.wMinute = min; daylightDate.wSecond = sec; daylightDate.wMilliseconds = mil; } } else { result = FALSE; } delete initial; delete std; delete dst; return result; }
UnicodeString& TZGNCore::formatGenericNonLocationName(const TimeZone& tz, UTimeZoneGenericNameType type, UDate date, UnicodeString& name) const { U_ASSERT(type == UTZGNM_LONG || type == UTZGNM_SHORT); name.setToBogus(); const UChar* uID = ZoneMeta::getCanonicalCLDRID(tz); if (uID == NULL) { return name; } UnicodeString tzID(TRUE, uID, -1); // Try to get a name from time zone first UTimeZoneNameType nameType = (type == UTZGNM_LONG) ? UTZNM_LONG_GENERIC : UTZNM_SHORT_GENERIC; fTimeZoneNames->getTimeZoneDisplayName(tzID, nameType, name); if (!name.isEmpty()) { return name; } // Try meta zone UChar mzIDBuf[32]; UnicodeString mzID(mzIDBuf, 0, UPRV_LENGTHOF(mzIDBuf)); fTimeZoneNames->getMetaZoneID(tzID, date, mzID); if (!mzID.isEmpty()) { UErrorCode status = U_ZERO_ERROR; UBool useStandard = FALSE; int32_t raw, sav; UChar tmpNameBuf[64]; tz.getOffset(date, FALSE, raw, sav, status); if (U_FAILURE(status)) { return name; } if (sav == 0) { useStandard = TRUE; TimeZone *tmptz = tz.clone(); // Check if the zone actually uses daylight saving time around the time BasicTimeZone *btz = NULL; if (dynamic_cast<OlsonTimeZone *>(tmptz) != NULL || dynamic_cast<SimpleTimeZone *>(tmptz) != NULL || dynamic_cast<RuleBasedTimeZone *>(tmptz) != NULL || dynamic_cast<VTimeZone *>(tmptz) != NULL) { btz = (BasicTimeZone*)tmptz; } if (btz != NULL) { TimeZoneTransition before; UBool beforTrs = btz->getPreviousTransition(date, TRUE, before); if (beforTrs && (date - before.getTime() < kDstCheckRange) && before.getFrom()->getDSTSavings() != 0) { useStandard = FALSE; } else { TimeZoneTransition after; UBool afterTrs = btz->getNextTransition(date, FALSE, after); if (afterTrs && (after.getTime() - date < kDstCheckRange) && after.getTo()->getDSTSavings() != 0) { useStandard = FALSE; } } } else { // If not BasicTimeZone... only if the instance is not an ICU's implementation. // We may get a wrong answer in edge case, but it should practically work OK. tmptz->getOffset(date - kDstCheckRange, FALSE, raw, sav, status); if (sav != 0) { useStandard = FALSE; } else { tmptz->getOffset(date + kDstCheckRange, FALSE, raw, sav, status); if (sav != 0){ useStandard = FALSE; } } if (U_FAILURE(status)) { delete tmptz; return name; } } delete tmptz; } if (useStandard) { UTimeZoneNameType stdNameType = (nameType == UTZNM_LONG_GENERIC) ? UTZNM_LONG_STANDARD : UTZNM_SHORT_STANDARD; UnicodeString stdName(tmpNameBuf, 0, UPRV_LENGTHOF(tmpNameBuf)); fTimeZoneNames->getDisplayName(tzID, stdNameType, date, stdName); if (!stdName.isEmpty()) { name.setTo(stdName); // TODO: revisit this issue later // In CLDR, a same display name is used for both generic and standard // for some meta zones in some locales. This looks like a data bugs. // For now, we check if the standard name is different from its generic // name below. UChar genNameBuf[64]; UnicodeString mzGenericName(genNameBuf, 0, UPRV_LENGTHOF(genNameBuf)); fTimeZoneNames->getMetaZoneDisplayName(mzID, nameType, mzGenericName); if (stdName.caseCompare(mzGenericName, 0) == 0) { name.setToBogus(); } } } if (name.isEmpty()) { // Get a name from meta zone UnicodeString mzName(tmpNameBuf, 0, UPRV_LENGTHOF(tmpNameBuf)); fTimeZoneNames->getMetaZoneDisplayName(mzID, nameType, mzName); if (!mzName.isEmpty()) { // Check if we need to use a partial location format. // This check is done by comparing offset with the meta zone's // golden zone at the given date. UChar idBuf[32]; UnicodeString goldenID(idBuf, 0, UPRV_LENGTHOF(idBuf)); fTimeZoneNames->getReferenceZoneID(mzID, fTargetRegion, goldenID); if (!goldenID.isEmpty() && goldenID != tzID) { TimeZone *goldenZone = TimeZone::createTimeZone(goldenID); int32_t raw1, sav1; // Check offset in the golden zone with wall time. // With getOffset(date, false, offsets1), // you may get incorrect results because of time overlap at DST->STD // transition. goldenZone->getOffset(date + raw + sav, TRUE, raw1, sav1, status); delete goldenZone; if (U_SUCCESS(status)) { if (raw != raw1 || sav != sav1) { // Now we need to use a partial location format getPartialLocationName(tzID, mzID, (nameType == UTZNM_LONG_GENERIC), mzName, name); } else { name.setTo(mzName); } } } else { name.setTo(mzName); } } } } return name; }
static int32_t _internal_toUnicode(const UChar* src, int32_t srcLength, UChar* dest, int32_t destCapacity, int32_t options, UStringPrepProfile* nameprep, UParseError* parseError, UErrorCode* status) { //get the options //UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0); int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0; // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too. UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE]; //initialize pointers to stack buffers UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack; int32_t b1Len, b2Len, b1PrimeLen, b3Len, b1Capacity = MAX_LABEL_BUFFER_SIZE, b2Capacity = MAX_LABEL_BUFFER_SIZE, b3Capacity = MAX_LABEL_BUFFER_SIZE, reqLength=0; b1Len = 0; UBool* caseFlags = NULL; UBool srcIsASCII = TRUE; /*UBool srcIsLDH = TRUE; int32_t failPos =0;*/ // step 1: find out if all the codepoints in src are ASCII if(srcLength==-1){ srcLength = 0; for(;src[srcLength]!=0;){ if(src[srcLength]> 0x7f){ srcIsASCII = FALSE; }/*else if(isLDHChar(src[srcLength])==FALSE){ // here we do not assemble surrogates // since we know that LDH code points // are in the ASCII range only srcIsLDH = FALSE; failPos = srcLength; }*/ srcLength++; } }else if(srcLength > 0){ for(int32_t j=0; j<srcLength; j++){ if(src[j]> 0x7f){ srcIsASCII = FALSE; }/*else if(isLDHChar(src[j])==FALSE){ // here we do not assemble surrogates // since we know that LDH code points // are in the ASCII range only srcIsLDH = FALSE; failPos = j; }*/ } }else{ return 0; } if(srcIsASCII == FALSE){ // step 2: process the string b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status); if(*status == U_BUFFER_OVERFLOW_ERROR){ // redo processing of string /* we do not have enough room so grow the buffer*/ b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); if(b1==NULL){ *status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP; } *status = U_ZERO_ERROR; // reset error b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status); } //bail out on error if(U_FAILURE(*status)){ goto CLEANUP; } }else{ //just point src to b1 b1 = (UChar*) src; b1Len = srcLength; } // The RFC states that // <quote> // ToUnicode never fails. If any step fails, then the original input // is returned immediately in that step. // </quote> //step 3: verify ACE Prefix if(startsWithPrefix(b1,b1Len)){ //step 4: Remove the ACE Prefix b1Prime = b1 + ACE_PREFIX_LENGTH; b1PrimeLen = b1Len - ACE_PREFIX_LENGTH; //step 5: Decode using punycode b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Capacity, caseFlags,status); if(*status == U_BUFFER_OVERFLOW_ERROR){ // redo processing of string /* we do not have enough room so grow the buffer*/ b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); if(b2==NULL){ *status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP; } *status = U_ZERO_ERROR; // reset error b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Len, caseFlags, status); } //step 6:Apply toASCII b3Len = uidna_toASCII(b2, b2Len, b3, b3Capacity, options, parseError, status); if(*status == U_BUFFER_OVERFLOW_ERROR){ // redo processing of string /* we do not have enough room so grow the buffer*/ b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR); if(b3==NULL){ *status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP; } *status = U_ZERO_ERROR; // reset error b3Len = uidna_toASCII(b2,b2Len,b3,b3Len,options,parseError, status); } //bail out on error if(U_FAILURE(*status)){ goto CLEANUP; } //step 7: verify if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){ // Cause the original to be returned. *status = U_IDNA_VERIFICATION_ERROR; goto CLEANUP; } //step 8: return output of step 5 reqLength = b2Len; if(b2Len <= destCapacity) { uprv_memmove(dest, b2, b2Len * U_SIZEOF_UCHAR); } } else{ // See the start of this if statement for why this is commented out. // verify that STD3 ASCII rules are satisfied /*if(useSTD3ASCIIRules == TRUE){ if( srcIsLDH == FALSE // source contains some non-LDH characters || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){ *status = U_IDNA_STD3_ASCII_RULES_ERROR; // populate the parseError struct if(srcIsLDH==FALSE){ // failPos is always set the index of failure uprv_syntaxError(src,failPos, srcLength,parseError); }else if(src[0] == HYPHEN){ // fail position is 0 uprv_syntaxError(src,0,srcLength,parseError); }else{ // the last index in the source is always length-1 uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError); } goto CLEANUP; } }*/ // just return the source //copy the source to destination if(srcLength <= destCapacity){ uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); } reqLength = srcLength; } CLEANUP: if(b1 != b1Stack && b1!=src){ uprv_free(b1); } if(b2 != b2Stack){ uprv_free(b2); } uprv_free(caseFlags); // The RFC states that // <quote> // ToUnicode never fails. If any step fails, then the original input // is returned immediately in that step. // </quote> // So if any step fails lets copy source to destination if(U_FAILURE(*status)){ //copy the source to destination if(dest && srcLength <= destCapacity){ // srcLength should have already been set earlier. U_ASSERT(srcLength >= 0); uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); } reqLength = srcLength; *status = U_ZERO_ERROR; } return u_terminateUChars(dest, destCapacity, reqLength, status); }
void uBuildMemory(uType* type) { U_ASSERT(type); if (!type->IsClosed()) return; size_t strongCount = 0, weakCount = 0, objOffset = U_IS_OBJECT(type) ? sizeof(uObject) : 0, typeOffset = 0; if (type->Base) type->Base->Build(); for (size_t i = 0; i < type->FieldCount; i++) { uFieldInfo& f = type->Fields[i]; U_ASSERT(f.Type); if (f.Type != type && !U_IS_OBJECT(f.Type)) f.Type->Build(); if ((f.Flags & uFieldFlagsStatic) == 0) { if ((f.Flags & uFieldFlagsConstrained) == 0) objOffset = f.Offset + f.Type->ValueSize; if (U_IS_VALUE(f.Type)) { strongCount += f.Type->Refs.StrongCount; weakCount += f.Type->Refs.WeakCount; } else if ((f.Flags & uFieldFlagsWeak) != 0) weakCount++; else strongCount++; } else if ((f.Flags & uFieldFlagsConstrained) != 0) { uAlignField(typeOffset, f.Type); f.Offset = typeOffset; typeOffset += f.Type->ValueSize; } } size_t size = typeOffset + (strongCount + weakCount) * sizeof(uRefInfo<size_t>); uint8_t* ptr = (uint8_t*)malloc(size); // Leak memset(ptr, 0, size); type->Refs.Strong = (uRefInfo<size_t>*)ptr; ptr += strongCount * sizeof(uRefInfo<size_t>); type->Refs.Weak = (uRefInfo<size_t>*)ptr; ptr += weakCount * sizeof(uRefInfo<size_t>); for (size_t i = 0; i < type->FieldCount; i++) { #ifdef DEBUG_ARC #define DEBUG_NAME ((Xli::String)type->FullName + "[" + (int)i + "]").CopyPtr(), // Leak #else #define DEBUG_NAME #endif uFieldInfo& f = type->Fields[i]; if ((f.Flags & uFieldFlagsStatic) == 0) { if ((f.Flags & uFieldFlagsConstrained) != 0) { uAlignField(objOffset, f.Type); f.Flags &= ~uFieldFlagsConstrained; f.Offset = objOffset; objOffset += f.Type->ValueSize; } if (U_IS_VALUE(f.Type)) { f.Flags &= ~uFieldFlagsWeak; for (size_t j = 0; j < f.Type->Refs.StrongCount; j++) type->Refs.Strong[type->Refs.StrongCount++] = f.Type->Refs.Strong[j] + f.Offset; for (size_t j = 0; j < f.Type->Refs.WeakCount; j++) type->Refs.Weak[type->Refs.WeakCount++] = f.Type->Refs.Weak[j] + f.Offset; } else if ((f.Flags & uFieldFlagsWeak) != 0) { uRefInfo<size_t> ref = {DEBUG_NAME f.Offset}; type->Refs.Weak[type->Refs.WeakCount++] = ref; } else { uRefInfo<size_t> ref = {DEBUG_NAME f.Offset}; type->Refs.Strong[type->Refs.StrongCount++] = ref; } } else { if ((f.Flags & uFieldFlagsConstrained) != 0) { f.Flags &= ~uFieldFlagsConstrained; f.Offset += (uintptr_t)ptr; } if ((f.Flags & uFieldFlagsWeak) != 0) { uRefInfo<uWeakObject**> ref = {DEBUG_NAME (uWeakObject**)f.Offset}; _WeakRefs->Add(ref); } else if (U_IS_OBJECT(f.Type)) { uRefInfo<uObject**> ref = {DEBUG_NAME (uObject**)f.Offset}; _StrongRefs->Add(ref); } } #undef DEBUG_NAME } if (U_IS_VALUE(type)) { if (objOffset != 0) { uAlignField(objOffset, type); U_ASSERT(type->ValueSize == objOffset || type->ValueSize == 0); type->ValueSize = objOffset; } type->ObjectSize = sizeof(uObject) + type->ValueSize; } else { if (type->Base && type->Base->ObjectSize > objOffset) objOffset = type->Base->ObjectSize; if (objOffset > type->ObjectSize) type->ObjectSize = objOffset; } #ifdef DEBUG_UNSAFE uint8_t* layout = (uint8_t*)U_ALLOCA(type->ObjectSize); memset(layout, 0, type->ObjectSize); for (size_t i = 0; i < type->FieldCount; i++) { uFieldInfo& f = type->Fields[i]; if ((f.Flags & uFieldFlagsStatic) == 0) { for (size_t j = 0; j < f.Type->ValueSize; j++) { U_ASSERT(f.Offset + j < type->ObjectSize); layout[f.Offset + j]++; } } } // Verify that no fields are overlapping for (size_t i = 0; i < type->ObjectSize; i++) U_ASSERT(layout[i] < 2); #endif }
/* * @param text A UText representing the text * @param rangeStart The start of the range of dictionary characters * @param rangeEnd The end of the range of dictionary characters * @param foundBreaks Output of C array of int32_t break positions, or 0 * @return The number of breaks found */ int32_t CjkBreakEngine::divideUpDictionaryRange( UText *text, int32_t rangeStart, int32_t rangeEnd, UStack &foundBreaks ) const { if (rangeStart >= rangeEnd) { return 0; } const size_t defaultInputLength = 80; size_t inputLength = rangeEnd - rangeStart; // TODO: Replace by UnicodeString. AutoBuffer<UChar, defaultInputLength> charString(inputLength); // Normalize the input string and put it in normalizedText. // The map from the indices of the normalized input to the raw // input is kept in charPositions. UErrorCode status = U_ZERO_ERROR; utext_extract(text, rangeStart, rangeEnd, charString.elems(), inputLength, &status); if (U_FAILURE(status)) { return 0; } UnicodeString inputString(charString.elems(), inputLength); // TODO: Use Normalizer2. UNormalizationMode norm_mode = UNORM_NFKC; UBool isNormalized = Normalizer::quickCheck(inputString, norm_mode, status) == UNORM_YES || Normalizer::isNormalized(inputString, norm_mode, status); // TODO: Replace by UVector32. AutoBuffer<int32_t, defaultInputLength> charPositions(inputLength + 1); int numChars = 0; UText normalizedText = UTEXT_INITIALIZER; // Needs to be declared here because normalizedText holds onto its buffer. UnicodeString normalizedString; if (isNormalized) { int32_t index = 0; charPositions[0] = 0; while(index < inputString.length()) { index = inputString.moveIndex32(index, 1); charPositions[++numChars] = index; } utext_openUnicodeString(&normalizedText, &inputString, &status); } else { Normalizer::normalize(inputString, norm_mode, 0, normalizedString, status); if (U_FAILURE(status)) { return 0; } charPositions.resize(normalizedString.length() + 1); Normalizer normalizer(charString.elems(), inputLength, norm_mode); int32_t index = 0; charPositions[0] = 0; while(index < normalizer.endIndex()){ /* UChar32 uc = */ normalizer.next(); charPositions[++numChars] = index = normalizer.getIndex(); } utext_openUnicodeString(&normalizedText, &normalizedString, &status); } if (U_FAILURE(status)) { return 0; } // From this point on, all the indices refer to the indices of // the normalized input string. // bestSnlp[i] is the snlp of the best segmentation of the first i // characters in the range to be matched. // TODO: Replace by UVector32. AutoBuffer<uint32_t, defaultInputLength> bestSnlp(numChars + 1); bestSnlp[0] = 0; for(int i = 1; i <= numChars; i++) { bestSnlp[i] = kuint32max; } // prev[i] is the index of the last CJK character in the previous word in // the best segmentation of the first i characters. // TODO: Replace by UVector32. AutoBuffer<int, defaultInputLength> prev(numChars + 1); for(int i = 0; i <= numChars; i++){ prev[i] = -1; } const size_t maxWordSize = 20; // TODO: Replace both with UVector32. AutoBuffer<int32_t, maxWordSize> values(numChars); AutoBuffer<int32_t, maxWordSize> lengths(numChars); // Dynamic programming to find the best segmentation. bool is_prev_katakana = false; for (int32_t i = 0; i < numChars; ++i) { //utext_setNativeIndex(text, rangeStart + i); utext_setNativeIndex(&normalizedText, i); if (bestSnlp[i] == kuint32max) continue; int32_t count; // limit maximum word length matched to size of current substring int32_t maxSearchLength = (i + maxWordSize < (size_t) numChars)? maxWordSize : (numChars - i); fDictionary->matches(&normalizedText, maxSearchLength, lengths.elems(), count, maxSearchLength, values.elems()); // if there are no single character matches found in the dictionary // starting with this charcter, treat character as a 1-character word // with the highest value possible, i.e. the least likely to occur. // Exclude Korean characters from this treatment, as they should be left // together by default. if((count == 0 || lengths[0] != 1) && !fHangulWordSet.contains(utext_current32(&normalizedText))) { values[count] = maxSnlp; lengths[count++] = 1; } for (int j = 0; j < count; j++) { uint32_t newSnlp = bestSnlp[i] + values[j]; if (newSnlp < bestSnlp[lengths[j] + i]) { bestSnlp[lengths[j] + i] = newSnlp; prev[lengths[j] + i] = i; } } // In Japanese, // Katakana word in single character is pretty rare. So we apply // the following heuristic to Katakana: any continuous run of Katakana // characters is considered a candidate word with a default cost // specified in the katakanaCost table according to its length. //utext_setNativeIndex(text, rangeStart + i); utext_setNativeIndex(&normalizedText, i); bool is_katakana = isKatakana(utext_current32(&normalizedText)); if (!is_prev_katakana && is_katakana) { int j = i + 1; utext_next32(&normalizedText); // Find the end of the continuous run of Katakana characters while (j < numChars && (j - i) < kMaxKatakanaGroupLength && isKatakana(utext_current32(&normalizedText))) { utext_next32(&normalizedText); ++j; } if ((j - i) < kMaxKatakanaGroupLength) { uint32_t newSnlp = bestSnlp[i] + getKatakanaCost(j - i); if (newSnlp < bestSnlp[j]) { bestSnlp[j] = newSnlp; prev[j] = i; } } } is_prev_katakana = is_katakana; } // Start pushing the optimal offset index into t_boundary (t for tentative). // prev[numChars] is guaranteed to be meaningful. // We'll first push in the reverse order, i.e., // t_boundary[0] = numChars, and afterwards do a swap. // TODO: Replace by UVector32. AutoBuffer<int, maxWordSize> t_boundary(numChars + 1); int numBreaks = 0; // No segmentation found, set boundary to end of range if (bestSnlp[numChars] == kuint32max) { t_boundary[numBreaks++] = numChars; } else { for (int i = numChars; i > 0; i = prev[i]) { t_boundary[numBreaks++] = i; } U_ASSERT(prev[t_boundary[numBreaks - 1]] == 0); } // Reverse offset index in t_boundary. // Don't add a break for the start of the dictionary range if there is one // there already. if (foundBreaks.size() == 0 || foundBreaks.peeki() < rangeStart) { t_boundary[numBreaks++] = 0; } // Now that we're done, convert positions in t_bdry[] (indices in // the normalized input string) back to indices in the raw input string // while reversing t_bdry and pushing values to foundBreaks. for (int i = numBreaks-1; i >= 0; i--) { foundBreaks.push(charPositions[t_boundary[i]] + rangeStart, status); } utext_close(&normalizedText); return numBreaks; }
U_CAPI void U_EXPORT2 umtx_condSignal(UConditionVar *cond) { int sysErr = pthread_cond_signal(&cond->fCondition); (void)sysErr; U_ASSERT(sysErr == 0); }
static int32_t U_CALLCONV ubidi_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize; const uint8_t *inBytes; uint8_t *outBytes; const int32_t *inIndexes; int32_t indexes[16]; int32_t i, offset, count, size; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==UBIDI_FMT_0 && /* dataFormat="BiDi" */ pInfo->dataFormat[1]==UBIDI_FMT_1 && pInfo->dataFormat[2]==UBIDI_FMT_2 && pInfo->dataFormat[3]==UBIDI_FMT_3 && ((pInfo->formatVersion[0]==1 && pInfo->formatVersion[2]==UTRIE_SHIFT && pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) || pInfo->formatVersion[0]==2) )) { udata_printError(ds, "ubidi_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as bidi/shaping data\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } inBytes=(const uint8_t *)inData+headerSize; outBytes=(uint8_t *)outData+headerSize; inIndexes=(const int32_t *)inBytes; if(length>=0) { length-=headerSize; if(length<16*4) { udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for bidi/shaping data\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } /* read the first 16 indexes (ICU 3.4/format version 1: UBIDI_IX_TOP==16, might grow) */ for(i=0; i<16; ++i) { indexes[i]=udata_readInt32(ds, inIndexes[i]); } /* get the total length of the data */ size=indexes[UBIDI_IX_LENGTH]; if(length>=0) { if(length<size) { udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for all of bidi/shaping data\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } /* copy the data for inaccessible bytes */ if(inBytes!=outBytes) { uprv_memcpy(outBytes, inBytes, size); } offset=0; /* swap the int32_t indexes[] */ count=indexes[UBIDI_IX_INDEX_TOP]*4; ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); offset+=count; /* swap the UTrie */ count=indexes[UBIDI_IX_TRIE_SIZE]; utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; /* swap the uint32_t mirrors[] */ count=indexes[UBIDI_IX_MIRROR_LENGTH]*4; ds->swapArray32(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; /* just skip the uint8_t jgArray[] */ count=indexes[UBIDI_IX_JG_LIMIT]-indexes[UBIDI_IX_JG_START]; offset+=count; U_ASSERT(offset==size); } return headerSize+size; }
/** * Override Calendar to compute several fields specific to the Islamic * calendar system. These are: * * <ul><li>ERA * <li>YEAR * <li>MONTH * <li>DAY_OF_MONTH * <li>DAY_OF_YEAR * <li>EXTENDED_YEAR</ul> * * The DAY_OF_WEEK and DOW_LOCAL fields are already set when this * method is called. The getGregorianXxx() methods return Gregorian * calendar equivalents for the given Julian day. * @draft ICU 2.4 */ void IslamicCalendar::handleComputeFields(int32_t julianDay, UErrorCode &status) { int32_t year, month, dayOfMonth, dayOfYear; int32_t startDate; int32_t days = julianDay - CIVIL_EPOC; if (cType == CIVIL || cType == TBLA) { if(cType == TBLA) { days = julianDay - ASTRONOMICAL_EPOC; } // Use the civil calendar approximation, which is just arithmetic year = (int)ClockMath::floorDivide( (double)(30 * days + 10646) , 10631.0 ); month = (int32_t)uprv_ceil((days - 29 - yearStart(year)) / 29.5 ); month = month<11?month:11; startDate = monthStart(year, month); } else if(cType == ASTRONOMICAL){ // Guess at the number of elapsed full months since the epoch int32_t months = (int32_t)uprv_floor((double)days / CalendarAstronomer::SYNODIC_MONTH); startDate = (int32_t)uprv_floor(months * CalendarAstronomer::SYNODIC_MONTH); double age = moonAge(internalGetTime(), status); if (U_FAILURE(status)) { status = U_MEMORY_ALLOCATION_ERROR; return; } if ( days - startDate >= 25 && age > 0) { // If we're near the end of the month, assume next month and search backwards months++; } // Find out the last time that the new moon was actually visible at this longitude // This returns midnight the night that the moon was visible at sunset. while ((startDate = trueMonthStart(months)) > days) { // If it was after the date in question, back up a month and try again months--; } year = months / 12 + 1; month = months % 12; } else if(cType == UMALQURA) { int32_t umalquraStartdays = yearStart(UMALQURA_YEAR_START) ; if( days < umalquraStartdays){ //Use Civil calculation year = (int)ClockMath::floorDivide( (double)(30 * days + 10646) , 10631.0 ); month = (int32_t)uprv_ceil((days - 29 - yearStart(year)) / 29.5 ); month = month<11?month:11; startDate = monthStart(year, month); }else{ int y =UMALQURA_YEAR_START-1, m =0; long d = 1; while(d > 0){ y++; d = days - yearStart(y) +1; if(d == handleGetYearLength(y)){ m=11; break; }else if(d < handleGetYearLength(y) ){ int monthLen = handleGetMonthLength(y, m); m=0; while(d > monthLen){ d -= monthLen; m++; monthLen = handleGetMonthLength(y, m); } break; } } year = y; month = m; } } else { // invalid 'civil' U_ASSERT(false); // should not get here, out of range year=month=0; } dayOfMonth = (days - monthStart(year, month)) + 1; // Now figure out the day of the year. dayOfYear = (days - monthStart(year, 0)) + 1; internalSet(UCAL_ERA, 0); internalSet(UCAL_YEAR, year); internalSet(UCAL_EXTENDED_YEAR, year); internalSet(UCAL_MONTH, month); internalSet(UCAL_DAY_OF_MONTH, dayOfMonth); internalSet(UCAL_DAY_OF_YEAR, dayOfYear); }
//------------------------------------------------------------------------------ // // scanSet Construct a UnicodeSet from the text at the current scan // position. Advance the scan position to the first character // after the set. // // A new RBBI setref node referring to the set is pushed onto the node // stack. // // The scan position is normally under the control of the state machine // that controls rule parsing. UnicodeSets, however, are parsed by // the UnicodeSet constructor, not by the RBBI rule parser. // //------------------------------------------------------------------------------ void RBBIRuleScanner::scanSet() { UnicodeSet *uset; ParsePosition pos; int startPos; int i; if (U_FAILURE(*fRB->fStatus)) { return; } pos.setIndex(fScanIndex); startPos = fScanIndex; UErrorCode localStatus = U_ZERO_ERROR; uset = new UnicodeSet(); if (uset == NULL) { localStatus = U_MEMORY_ALLOCATION_ERROR; } else { uset->applyPatternIgnoreSpace(fRB->fRules, pos, fSymbolTable, localStatus); } if (U_FAILURE(localStatus)) { // TODO: Get more accurate position of the error from UnicodeSet's return info. // UnicodeSet appears to not be reporting correctly at this time. #ifdef RBBI_DEBUG RBBIDebugPrintf("UnicodeSet parse postion.ErrorIndex = %d\n", pos.getIndex()); #endif error(localStatus); delete uset; return; } // Verify that the set contains at least one code point. // U_ASSERT(uset!=NULL); if (uset->isEmpty()) { // This set is empty. // Make it an error, because it almost certainly is not what the user wanted. // Also, avoids having to think about corner cases in the tree manipulation code // that occurs later on. error(U_BRK_RULE_EMPTY_SET); delete uset; return; } // Advance the RBBI parse postion over the UnicodeSet pattern. // Don't just set fScanIndex because the line/char positions maintained // for error reporting would be thrown off. i = pos.getIndex(); for (;;) { if (fNextIndex >= i) { break; } nextCharLL(); } if (U_SUCCESS(*fRB->fStatus)) { RBBINode *n; n = pushNewNode(RBBINode::setRef); if (U_FAILURE(*fRB->fStatus)) { return; } n->fFirstPos = startPos; n->fLastPos = fNextIndex; fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); // findSetFor() serves several purposes here: // - Adopts storage for the UnicodeSet, will be responsible for deleting. // - Mantains collection of all sets in use, needed later for establishing // character categories for run time engine. // - Eliminates mulitiple instances of the same set. // - Creates a new uset node if necessary (if this isn't a duplicate.) findSetFor(n->fText, n, uset); } }
U_CAPI void U_EXPORT2 unum_setAttribute( UNumberFormat* fmt, UNumberFormatAttribute attr, int32_t newValue) { if (((NumberFormat*)fmt)->getDynamicClassID() == DecimalFormat::getStaticClassID()) { DecimalFormat* df = (DecimalFormat*) fmt; switch(attr) { case UNUM_PARSE_INT_ONLY: df->setParseIntegerOnly(newValue!=0); break; case UNUM_GROUPING_USED: df->setGroupingUsed(newValue!=0); break; case UNUM_DECIMAL_ALWAYS_SHOWN: df->setDecimalSeparatorAlwaysShown(newValue!=0); break; case UNUM_MAX_INTEGER_DIGITS: df->setMaximumIntegerDigits(newValue); break; case UNUM_MIN_INTEGER_DIGITS: df->setMinimumIntegerDigits(newValue); break; case UNUM_INTEGER_DIGITS: df->setMinimumIntegerDigits(newValue); df->setMaximumIntegerDigits(newValue); break; case UNUM_MAX_FRACTION_DIGITS: df->setMaximumFractionDigits(newValue); break; case UNUM_MIN_FRACTION_DIGITS: df->setMinimumFractionDigits(newValue); break; case UNUM_FRACTION_DIGITS: df->setMinimumFractionDigits(newValue); df->setMaximumFractionDigits(newValue); break; case UNUM_SIGNIFICANT_DIGITS_USED: df->setSignificantDigitsUsed(newValue!=0); break; case UNUM_MAX_SIGNIFICANT_DIGITS: df->setMaximumSignificantDigits(newValue); break; case UNUM_MIN_SIGNIFICANT_DIGITS: df->setMinimumSignificantDigits(newValue); break; case UNUM_MULTIPLIER: df->setMultiplier(newValue); break; case UNUM_GROUPING_SIZE: df->setGroupingSize(newValue); break; case UNUM_ROUNDING_MODE: df->setRoundingMode((DecimalFormat::ERoundingMode)newValue); break; case UNUM_FORMAT_WIDTH: df->setFormatWidth(newValue); break; case UNUM_PADDING_POSITION: /** The position at which padding will take place. */ df->setPadPosition((DecimalFormat::EPadPosition)newValue); break; case UNUM_SECONDARY_GROUPING_SIZE: df->setSecondaryGroupingSize(newValue); break; default: /* Shouldn't get here anyway */ break; } } else { U_ASSERT(((NumberFormat*)fmt)->getDynamicClassID() == RuleBasedNumberFormat::getStaticClassID()); if (attr == UNUM_LENIENT_PARSE) { #if !UCONFIG_NO_COLLATION ((RuleBasedNumberFormat*)fmt)->setLenient((UBool)newValue); #endif } } }
U_CAPI int32_t U_EXPORT2 unum_getTextAttribute(const UNumberFormat* fmt, UNumberFormatTextAttribute tag, UChar* result, int32_t resultLength, UErrorCode* status) { if(U_FAILURE(*status)) return -1; UnicodeString res; if(!(result==NULL && resultLength==0)) { // NULL destination for pure preflighting: empty dummy string // otherwise, alias the destination buffer res.setTo(result, 0, resultLength); } if (((const NumberFormat*)fmt)->getDynamicClassID() == DecimalFormat::getStaticClassID()) { const DecimalFormat* df = (const DecimalFormat*) fmt; switch(tag) { case UNUM_POSITIVE_PREFIX: df->getPositivePrefix(res); break; case UNUM_POSITIVE_SUFFIX: df->getPositiveSuffix(res); break; case UNUM_NEGATIVE_PREFIX: df->getNegativePrefix(res); break; case UNUM_NEGATIVE_SUFFIX: df->getNegativeSuffix(res); break; case UNUM_PADDING_CHARACTER: res = df->getPadCharacterString(); break; case UNUM_CURRENCY_CODE: res = UnicodeString(df->getCurrency()); break; default: *status = U_UNSUPPORTED_ERROR; return -1; } } else { U_ASSERT(((const NumberFormat*)fmt)->getDynamicClassID() == RuleBasedNumberFormat::getStaticClassID()); const RuleBasedNumberFormat* rbnf = (const RuleBasedNumberFormat*)fmt; if (tag == UNUM_DEFAULT_RULESET) { res = rbnf->getDefaultRuleSetName(); } else if (tag == UNUM_PUBLIC_RULESETS) { int32_t count = rbnf->getNumberOfRuleSetNames(); for (int i = 0; i < count; ++i) { res += rbnf->getRuleSetName(i); res += (UChar)0x003b; // semicolon } } else { *status = U_UNSUPPORTED_ERROR; return -1; } } return res.extract(result, resultLength, *status); }
U_CAPI int32_t U_EXPORT2 unum_getAttribute(const UNumberFormat* fmt, UNumberFormatAttribute attr) { if (((const NumberFormat*)fmt)->getDynamicClassID() == DecimalFormat::getStaticClassID()) { const DecimalFormat* df = (const DecimalFormat*) fmt; switch(attr) { case UNUM_PARSE_INT_ONLY: return df->isParseIntegerOnly(); case UNUM_GROUPING_USED: return df->isGroupingUsed(); case UNUM_DECIMAL_ALWAYS_SHOWN: return df->isDecimalSeparatorAlwaysShown(); case UNUM_MAX_INTEGER_DIGITS: return df->getMaximumIntegerDigits(); case UNUM_MIN_INTEGER_DIGITS: return df->getMinimumIntegerDigits(); case UNUM_INTEGER_DIGITS: // TBD: what should this return? return df->getMinimumIntegerDigits(); case UNUM_MAX_FRACTION_DIGITS: return df->getMaximumFractionDigits(); case UNUM_MIN_FRACTION_DIGITS: return df->getMinimumFractionDigits(); case UNUM_FRACTION_DIGITS: // TBD: what should this return? return df->getMinimumFractionDigits(); case UNUM_SIGNIFICANT_DIGITS_USED: return df->areSignificantDigitsUsed(); case UNUM_MAX_SIGNIFICANT_DIGITS: return df->getMaximumSignificantDigits(); case UNUM_MIN_SIGNIFICANT_DIGITS: return df->getMinimumSignificantDigits(); case UNUM_MULTIPLIER: return df->getMultiplier(); case UNUM_GROUPING_SIZE: return df->getGroupingSize(); case UNUM_ROUNDING_MODE: return df->getRoundingMode(); case UNUM_FORMAT_WIDTH: return df->getFormatWidth(); case UNUM_PADDING_POSITION: return df->getPadPosition(); case UNUM_SECONDARY_GROUPING_SIZE: return df->getSecondaryGroupingSize(); default: /* enums out of sync? unsupported enum? */ break; } } else { U_ASSERT(((const NumberFormat*)fmt)->getDynamicClassID() == RuleBasedNumberFormat::getStaticClassID()); if (attr == UNUM_LENIENT_PARSE) { #if !UCONFIG_NO_COLLATION return ((const RuleBasedNumberFormat*)fmt)->isLenient(); #endif } } return -1; }
UStringEnumeration::UStringEnumeration(UEnumeration* _uenum) : uenum(_uenum) { U_ASSERT(_uenum != 0); }
const char *StandardPlural::getKeyword(Form p) { U_ASSERT(ZERO <= p && p < COUNT); return gKeywords[p]; }
//----------------------------------------------------------------------------- // // buildStateTable() Determine the set of runtime DFA states and the // transition tables for these states, by the algorithm // of fig. 3.44 in Aho. // // Most of the comments are quotes of Aho's psuedo-code. // //----------------------------------------------------------------------------- void RBBITableBuilder::buildStateTable() { if (U_FAILURE(*fStatus)) { return; } RBBIStateDescriptor *failState; // Set it to NULL to avoid uninitialized warning RBBIStateDescriptor *initialState = NULL; // // Add a dummy state 0 - the stop state. Not from Aho. int lastInputSymbol = fRB->fSetBuilder->getNumCharCategories() - 1; failState = new RBBIStateDescriptor(lastInputSymbol, fStatus); if (failState == NULL) { *fStatus = U_MEMORY_ALLOCATION_ERROR; goto ExitBuildSTdeleteall; } failState->fPositions = new UVector(*fStatus); if (failState->fPositions == NULL) { *fStatus = U_MEMORY_ALLOCATION_ERROR; } if (failState->fPositions == NULL || U_FAILURE(*fStatus)) { goto ExitBuildSTdeleteall; } fDStates->addElement(failState, *fStatus); if (U_FAILURE(*fStatus)) { goto ExitBuildSTdeleteall; } // initially, the only unmarked state in Dstates is firstpos(root), // where toot is the root of the syntax tree for (r)#; initialState = new RBBIStateDescriptor(lastInputSymbol, fStatus); if (initialState == NULL) { *fStatus = U_MEMORY_ALLOCATION_ERROR; } if (U_FAILURE(*fStatus)) { goto ExitBuildSTdeleteall; } initialState->fPositions = new UVector(*fStatus); if (initialState->fPositions == NULL) { *fStatus = U_MEMORY_ALLOCATION_ERROR; } if (U_FAILURE(*fStatus)) { goto ExitBuildSTdeleteall; } setAdd(initialState->fPositions, fTree->fFirstPosSet); fDStates->addElement(initialState, *fStatus); if (U_FAILURE(*fStatus)) { goto ExitBuildSTdeleteall; } // while there is an unmarked state T in Dstates do begin for (;;) { RBBIStateDescriptor *T = NULL; int32_t tx; for (tx=1; tx<fDStates->size(); tx++) { RBBIStateDescriptor *temp; temp = (RBBIStateDescriptor *)fDStates->elementAt(tx); if (temp->fMarked == FALSE) { T = temp; break; } } if (T == NULL) { break; } // mark T; T->fMarked = TRUE; // for each input symbol a do begin int32_t a; for (a = 1; a<=lastInputSymbol; a++) { // let U be the set of positions that are in followpos(p) // for some position p in T // such that the symbol at position p is a; UVector *U = NULL; RBBINode *p; int32_t px; for (px=0; px<T->fPositions->size(); px++) { p = (RBBINode *)T->fPositions->elementAt(px); if ((p->fType == RBBINode::leafChar) && (p->fVal == a)) { if (U == NULL) { U = new UVector(*fStatus); if (U == NULL) { *fStatus = U_MEMORY_ALLOCATION_ERROR; goto ExitBuildSTdeleteall; } } setAdd(U, p->fFollowPos); } } // if U is not empty and not in DStates then int32_t ux = 0; UBool UinDstates = FALSE; if (U != NULL) { U_ASSERT(U->size() > 0); int ix; for (ix=0; ix<fDStates->size(); ix++) { RBBIStateDescriptor *temp2; temp2 = (RBBIStateDescriptor *)fDStates->elementAt(ix); if (setEquals(U, temp2->fPositions)) { delete U; U = temp2->fPositions; ux = ix; UinDstates = TRUE; break; } } // Add U as an unmarked state to Dstates if (!UinDstates) { RBBIStateDescriptor *newState = new RBBIStateDescriptor(lastInputSymbol, fStatus); if (newState == NULL) { *fStatus = U_MEMORY_ALLOCATION_ERROR; } if (U_FAILURE(*fStatus)) { goto ExitBuildSTdeleteall; } newState->fPositions = U; fDStates->addElement(newState, *fStatus); if (U_FAILURE(*fStatus)) { return; } ux = fDStates->size()-1; } // Dtran[T, a] := U; T->fDtran->setElementAt(ux, a); } } } return; // delete local pointers only if error occured. ExitBuildSTdeleteall: delete initialState; delete failState; }
//------------------------------------------------------------------------------ // // doParseAction Do some action during rule parsing. // Called by the parse state machine. // Actions build the parse tree and Unicode Sets, // and maintain the parse stack for nested expressions. // // TODO: unify EParseAction and RBBI_RuleParseAction enum types. // They represent exactly the same thing. They're separate // only to work around enum forward declaration restrictions // in some compilers, while at the same time avoiding multiple // definitions problems. I'm sure that there's a better way. // //------------------------------------------------------------------------------ UBool RBBIRuleScanner::doParseActions(int32_t action) { RBBINode *n = NULL; UBool returnVal = TRUE; switch (action) { case doExprStart: pushNewNode(RBBINode::opStart); fRuleNum++; break; case doNoChain: // Scanned a '^' while on the rule start state. fNoChainInRule = TRUE; break; case doExprOrOperator: { fixOpStack(RBBINode::precOpCat); RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; RBBINode *orNode = pushNewNode(RBBINode::opOr); if (U_FAILURE(*fRB->fStatus)) { break; } orNode->fLeftChild = operandNode; operandNode->fParent = orNode; } break; case doExprCatOperator: // concatenation operator. // For the implicit concatenation of adjacent terms in an expression that are // not separated by any other operator. Action is invoked between the // actions for the two terms. { fixOpStack(RBBINode::precOpCat); RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; RBBINode *catNode = pushNewNode(RBBINode::opCat); if (U_FAILURE(*fRB->fStatus)) { break; } catNode->fLeftChild = operandNode; operandNode->fParent = catNode; } break; case doLParen: // Open Paren. // The openParen node is a dummy operation type with a low precedence, // which has the affect of ensuring that any real binary op that // follows within the parens binds more tightly to the operands than // stuff outside of the parens. pushNewNode(RBBINode::opLParen); break; case doExprRParen: fixOpStack(RBBINode::precLParen); break; case doNOP: break; case doStartAssign: // We've just scanned "$variable = " // The top of the node stack has the $variable ref node. // Save the start position of the RHS text in the StartExpression node // that precedes the $variableReference node on the stack. // This will eventually be used when saving the full $variable replacement // text as a string. n = fNodeStack[fNodeStackPtr-1]; n->fFirstPos = fNextIndex; // move past the '=' // Push a new start-of-expression node; needed to keep parse of the // RHS expression happy. pushNewNode(RBBINode::opStart); break; case doEndAssign: { // We have reached the end of an assignement statement. // Current scan char is the ';' that terminates the assignment. // Terminate expression, leaves expression parse tree rooted in TOS node. fixOpStack(RBBINode::precStart); RBBINode *startExprNode = fNodeStack[fNodeStackPtr-2]; RBBINode *varRefNode = fNodeStack[fNodeStackPtr-1]; RBBINode *RHSExprNode = fNodeStack[fNodeStackPtr]; // Save original text of right side of assignment, excluding the terminating ';' // in the root of the node for the right-hand-side expression. RHSExprNode->fFirstPos = startExprNode->fFirstPos; RHSExprNode->fLastPos = fScanIndex; fRB->fRules.extractBetween(RHSExprNode->fFirstPos, RHSExprNode->fLastPos, RHSExprNode->fText); // Expression parse tree becomes l. child of the $variable reference node. varRefNode->fLeftChild = RHSExprNode; RHSExprNode->fParent = varRefNode; // Make a symbol table entry for the $variableRef node. fSymbolTable->addEntry(varRefNode->fText, varRefNode, *fRB->fStatus); if (U_FAILURE(*fRB->fStatus)) { // This is a round-about way to get the parse position set // so that duplicate symbols error messages include a line number. UErrorCode t = *fRB->fStatus; *fRB->fStatus = U_ZERO_ERROR; error(t); } // Clean up the stack. delete startExprNode; fNodeStackPtr-=3; break; } case doEndOfRule: { fixOpStack(RBBINode::precStart); // Terminate expression, leaves expression if (U_FAILURE(*fRB->fStatus)) { // parse tree rooted in TOS node. break; } #ifdef RBBI_DEBUG if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rtree")) {printNodeStack("end of rule");} #endif U_ASSERT(fNodeStackPtr == 1); RBBINode *thisRule = fNodeStack[fNodeStackPtr]; // If this rule includes a look-ahead '/', add a endMark node to the // expression tree. if (fLookAheadRule) { RBBINode *endNode = pushNewNode(RBBINode::endMark); RBBINode *catNode = pushNewNode(RBBINode::opCat); if (U_FAILURE(*fRB->fStatus)) { break; } fNodeStackPtr -= 2; catNode->fLeftChild = thisRule; catNode->fRightChild = endNode; fNodeStack[fNodeStackPtr] = catNode; endNode->fVal = fRuleNum; endNode->fLookAheadEnd = TRUE; thisRule = catNode; // TODO: Disable chaining out of look-ahead (hard break) rules. // The break on rule match is forced, so there is no point in building up // the state table to chain into another rule for a longer match. } // Mark this node as being the root of a rule. thisRule->fRuleRoot = TRUE; // Flag if chaining into this rule is wanted. // if (fRB->fChainRules && // If rule chaining is enabled globally via !!chain !fNoChainInRule) { // and no '^' chain-in inhibit was on this rule thisRule->fChainIn = TRUE; } // All rule expressions are ORed together. // The ';' that terminates an expression really just functions as a '|' with // a low operator prededence. // // Each of the four sets of rules are collected separately. // (forward, reverse, safe_forward, safe_reverse) // OR this rule into the appropriate group of them. // RBBINode **destRules = (fReverseRule? &fRB->fReverseTree : fRB->fDefaultTree); if (*destRules != NULL) { // This is not the first rule encounted. // OR previous stuff (from *destRules) // with the current rule expression (on the Node Stack) // with the resulting OR expression going to *destRules // RBBINode *thisRule = fNodeStack[fNodeStackPtr]; RBBINode *prevRules = *destRules; RBBINode *orNode = pushNewNode(RBBINode::opOr); if (U_FAILURE(*fRB->fStatus)) { break; } orNode->fLeftChild = prevRules; prevRules->fParent = orNode; orNode->fRightChild = thisRule; thisRule->fParent = orNode; *destRules = orNode; } else { // This is the first rule encountered (for this direction). // Just move its parse tree from the stack to *destRules. *destRules = fNodeStack[fNodeStackPtr]; } fReverseRule = FALSE; // in preparation for the next rule. fLookAheadRule = FALSE; fNoChainInRule = FALSE; fNodeStackPtr = 0; } break; case doRuleError: error(U_BRK_RULE_SYNTAX); returnVal = FALSE; break; case doVariableNameExpectedErr: error(U_BRK_RULE_SYNTAX); break; // // Unary operands + ? * // These all appear after the operand to which they apply. // When we hit one, the operand (may be a whole sub expression) // will be on the top of the stack. // Unary Operator becomes TOS, with the old TOS as its one child. case doUnaryOpPlus: { RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; RBBINode *plusNode = pushNewNode(RBBINode::opPlus); if (U_FAILURE(*fRB->fStatus)) { break; } plusNode->fLeftChild = operandNode; operandNode->fParent = plusNode; } break; case doUnaryOpQuestion: { RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; RBBINode *qNode = pushNewNode(RBBINode::opQuestion); if (U_FAILURE(*fRB->fStatus)) { break; } qNode->fLeftChild = operandNode; operandNode->fParent = qNode; } break; case doUnaryOpStar: { RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; RBBINode *starNode = pushNewNode(RBBINode::opStar); if (U_FAILURE(*fRB->fStatus)) { break; } starNode->fLeftChild = operandNode; operandNode->fParent = starNode; } break; case doRuleChar: // A "Rule Character" is any single character that is a literal part // of the regular expression. Like a, b and c in the expression "(abc*) | [:L:]" // These are pretty uncommon in break rules; the terms are more commonly // sets. To keep things uniform, treat these characters like as // sets that just happen to contain only one character. { n = pushNewNode(RBBINode::setRef); if (U_FAILURE(*fRB->fStatus)) { break; } findSetFor(UnicodeString(fC.fChar), n); n->fFirstPos = fScanIndex; n->fLastPos = fNextIndex; fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); break; } case doDotAny: // scanned a ".", meaning match any single character. { n = pushNewNode(RBBINode::setRef); if (U_FAILURE(*fRB->fStatus)) { break; } findSetFor(UnicodeString(TRUE, kAny, 3), n); n->fFirstPos = fScanIndex; n->fLastPos = fNextIndex; fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); break; } case doSlash: // Scanned a '/', which identifies a look-ahead break position in a rule. n = pushNewNode(RBBINode::lookAhead); if (U_FAILURE(*fRB->fStatus)) { break; } n->fVal = fRuleNum; n->fFirstPos = fScanIndex; n->fLastPos = fNextIndex; fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); fLookAheadRule = TRUE; break; case doStartTagValue: // Scanned a '{', the opening delimiter for a tag value within a rule. n = pushNewNode(RBBINode::tag); if (U_FAILURE(*fRB->fStatus)) { break; } n->fVal = 0; n->fFirstPos = fScanIndex; n->fLastPos = fNextIndex; break; case doTagDigit: // Just scanned a decimal digit that's part of a tag value { n = fNodeStack[fNodeStackPtr]; uint32_t v = u_charDigitValue(fC.fChar); U_ASSERT(v < 10); n->fVal = n->fVal*10 + v; break; } case doTagValue: n = fNodeStack[fNodeStackPtr]; n->fLastPos = fNextIndex; fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); break; case doTagExpectedError: error(U_BRK_MALFORMED_RULE_TAG); returnVal = FALSE; break; case doOptionStart: // Scanning a !!option. At the start of string. fOptionStart = fScanIndex; break; case doOptionEnd: { UnicodeString opt(fRB->fRules, fOptionStart, fScanIndex-fOptionStart); if (opt == UNICODE_STRING("chain", 5)) { fRB->fChainRules = TRUE; } else if (opt == UNICODE_STRING("LBCMNoChain", 11)) { fRB->fLBCMNoChain = TRUE; } else if (opt == UNICODE_STRING("forward", 7)) { fRB->fDefaultTree = &fRB->fForwardTree; } else if (opt == UNICODE_STRING("reverse", 7)) { fRB->fDefaultTree = &fRB->fReverseTree; } else if (opt == UNICODE_STRING("safe_forward", 12)) { fRB->fDefaultTree = &fRB->fSafeFwdTree; } else if (opt == UNICODE_STRING("safe_reverse", 12)) { fRB->fDefaultTree = &fRB->fSafeRevTree; } else if (opt == UNICODE_STRING("lookAheadHardBreak", 18)) { fRB->fLookAheadHardBreak = TRUE; } else { error(U_BRK_UNRECOGNIZED_OPTION); } } break; case doReverseDir: fReverseRule = TRUE; break; case doStartVariableName: n = pushNewNode(RBBINode::varRef); if (U_FAILURE(*fRB->fStatus)) { break; } n->fFirstPos = fScanIndex; break; case doEndVariableName: n = fNodeStack[fNodeStackPtr]; if (n==NULL || n->fType != RBBINode::varRef) { error(U_BRK_INTERNAL_ERROR); break; } n->fLastPos = fScanIndex; fRB->fRules.extractBetween(n->fFirstPos+1, n->fLastPos, n->fText); // Look the newly scanned name up in the symbol table // If there's an entry, set the l. child of the var ref to the replacement expression. // (We also pass through here when scanning assignments, but no harm is done, other // than a slight wasted effort that seems hard to avoid. Lookup will be null) n->fLeftChild = fSymbolTable->lookupNode(n->fText); break; case doCheckVarDef: n = fNodeStack[fNodeStackPtr]; if (n->fLeftChild == NULL) { error(U_BRK_UNDEFINED_VARIABLE); returnVal = FALSE; } break; case doExprFinished: break; case doRuleErrorAssignExpr: error(U_BRK_ASSIGN_ERROR); returnVal = FALSE; break; case doExit: returnVal = FALSE; break; case doScanUnicodeSet: scanSet(); break; default: error(U_BRK_INTERNAL_ERROR); returnVal = FALSE; break; } return returnVal && U_SUCCESS(*fRB->fStatus); }
U_CAPI void U_EXPORT2 umtx_condBroadcast(UConditionVar *cond) { int sysErr = pthread_cond_broadcast(&cond->fCondition); (void)sysErr; U_ASSERT(sysErr == 0); }
//------------------------------------------------------------------------------ // // findSetFor given a UnicodeString, // - find the corresponding Unicode Set (uset node) // (create one if necessary) // - Set fLeftChild of the caller's node (should be a setRef node) // to the uset node // Maintain a hash table of uset nodes, so the same one is always used // for the same string. // If a "to adopt" set is provided and we haven't seen this key before, // add the provided set to the hash table. // If the string is one (32 bit) char in length, the set contains // just one element which is the char in question. // If the string is "any", return a set containing all chars. // //------------------------------------------------------------------------------ void RBBIRuleScanner::findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt) { RBBISetTableEl *el; // First check whether we've already cached a set for this string. // If so, just use the cached set in the new node. // delete any set provided by the caller, since we own it. el = (RBBISetTableEl *)uhash_get(fSetTable, &s); if (el != NULL) { delete setToAdopt; node->fLeftChild = el->val; U_ASSERT(node->fLeftChild->fType == RBBINode::uset); return; } // Haven't seen this set before. // If the caller didn't provide us with a prebuilt set, // create a new UnicodeSet now. if (setToAdopt == NULL) { if (s.compare(kAny, -1) == 0) { setToAdopt = new UnicodeSet(0x000000, 0x10ffff); } else { UChar32 c; c = s.char32At(0); setToAdopt = new UnicodeSet(c, c); } } // // Make a new uset node to refer to this UnicodeSet // This new uset node becomes the child of the caller's setReference node. // RBBINode *usetNode = new RBBINode(RBBINode::uset); if (usetNode == NULL) { error(U_MEMORY_ALLOCATION_ERROR); return; } usetNode->fInputSet = setToAdopt; usetNode->fParent = node; node->fLeftChild = usetNode; usetNode->fText = s; // // Add the new uset node to the list of all uset nodes. // fRB->fUSetNodes->addElement(usetNode, *fRB->fStatus); // // Add the new set to the set hash table. // el = (RBBISetTableEl *)uprv_malloc(sizeof(RBBISetTableEl)); UnicodeString *tkey = new UnicodeString(s); if (tkey == NULL || el == NULL || setToAdopt == NULL) { // Delete to avoid memory leak delete tkey; tkey = NULL; uprv_free(el); el = NULL; delete setToAdopt; setToAdopt = NULL; error(U_MEMORY_ALLOCATION_ERROR); return; } el->key = tkey; el->val = usetNode; uhash_put(fSetTable, el->key, el, fRB->fStatus); return; }
U_CFUNC int32_t u_strFromPunycode(const UChar *src, int32_t srcLength, UChar *dest, int32_t destCapacity, UBool *caseFlags, UErrorCode *pErrorCode) { int32_t n, destLength, i, bias, basicLength, j, in, oldi, w, k, digit, t, destCPCount, firstSupplementaryIndex, cpLength; UChar b; /* argument checking */ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } if(srcLength==-1) { srcLength=u_strlen(src); } /* * Handle the basic code points: * Let basicLength be the number of input code points * before the last delimiter, or 0 if there is none, * then copy the first basicLength code points to the output. * * The two following loops iterate backward. */ for(j=srcLength; j>0;) { if(src[--j]==DELIMITER) { break; } } destLength=basicLength=destCPCount=j; U_ASSERT(destLength>=0); while(j>0) { b=src[--j]; if(!IS_BASIC(b)) { *pErrorCode=U_INVALID_CHAR_FOUND; return 0; } if(j<destCapacity) { dest[j]=(UChar)b; if(caseFlags!=NULL) { caseFlags[j]=IS_BASIC_UPPERCASE(b); } } } /* Initialize the state: */ n=INITIAL_N; i=0; bias=INITIAL_BIAS; firstSupplementaryIndex=1000000000; /* * Main decoding loop: * Start just after the last delimiter if any * basic code points were copied; start at the beginning otherwise. */ for(in=basicLength>0 ? basicLength+1 : 0; in<srcLength; /* no op */) { /* * in is the index of the next character to be consumed, and * destCPCount is the number of code points in the output array. * * Decode a generalized variable-length integer into delta, * which gets added to i. The overflow checking is easier * if we increase i as we go, then subtract off its starting * value at the end to obtain delta. */ for(oldi=i, w=1, k=BASE; /* no condition */; k+=BASE) { if(in>=srcLength) { *pErrorCode=U_ILLEGAL_CHAR_FOUND; return 0; } digit=basicToDigit[(uint8_t)src[in++]]; if(digit<0) { *pErrorCode=U_INVALID_CHAR_FOUND; return 0; } if(digit>(0x7fffffff-i)/w) { /* integer overflow */ *pErrorCode=U_ILLEGAL_CHAR_FOUND; return 0; } i+=digit*w; /** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt t=k-bias; if(t<TMIN) { t=TMIN; } else if(t>TMAX) { t=TMAX; } */ t=k-bias; if(t<TMIN) { t=TMIN; } else if(k>=(bias+TMAX)) { t=TMAX; } if(digit<t) { break; } if(w>0x7fffffff/(BASE-t)) { /* integer overflow */ *pErrorCode=U_ILLEGAL_CHAR_FOUND; return 0; } w*=BASE-t; } /* * Modification from sample code: * Increments destCPCount here, * where needed instead of in for() loop tail. */ ++destCPCount; bias=adaptBias(i-oldi, destCPCount, (UBool)(oldi==0)); /* * i was supposed to wrap around from (incremented) destCPCount to 0, * incrementing n each time, so we'll fix that now: */ if(i/destCPCount>(0x7fffffff-n)) { /* integer overflow */ *pErrorCode=U_ILLEGAL_CHAR_FOUND; return 0; } n+=i/destCPCount; i%=destCPCount; /* not needed for Punycode: */ /* if (decode_digit(n) <= BASE) return punycode_invalid_input; */ if(n>0x10ffff || U_IS_SURROGATE(n)) { /* Unicode code point overflow */ *pErrorCode=U_ILLEGAL_CHAR_FOUND; return 0; } /* Insert n at position i of the output: */ cpLength=U16_LENGTH(n); if(dest!=NULL && ((destLength+cpLength)<=destCapacity)) { int32_t codeUnitIndex; /* * Handle indexes when supplementary code points are present. * * In almost all cases, there will be only BMP code points before i * and even in the entire string. * This is handled with the same efficiency as with UTF-32. * * Only the rare cases with supplementary code points are handled * more slowly - but not too bad since this is an insertion anyway. */ if(i<=firstSupplementaryIndex) { codeUnitIndex=i; if(cpLength>1) { firstSupplementaryIndex=codeUnitIndex; } else { ++firstSupplementaryIndex; } } else { codeUnitIndex=firstSupplementaryIndex; U16_FWD_N(dest, codeUnitIndex, destLength, i-codeUnitIndex); } /* use the UChar index codeUnitIndex instead of the code point index i */ if(codeUnitIndex<destLength) { uprv_memmove(dest+codeUnitIndex+cpLength, dest+codeUnitIndex, (destLength-codeUnitIndex)*U_SIZEOF_UCHAR); if(caseFlags!=NULL) { uprv_memmove(caseFlags+codeUnitIndex+cpLength, caseFlags+codeUnitIndex, destLength-codeUnitIndex); } } if(cpLength==1) { /* BMP, insert one code unit */ dest[codeUnitIndex]=(UChar)n; } else { /* supplementary character, insert two code units */ dest[codeUnitIndex]=U16_LEAD(n); dest[codeUnitIndex+1]=U16_TRAIL(n); } if(caseFlags!=NULL) { /* Case of last character determines uppercase flag: */ caseFlags[codeUnitIndex]=IS_BASIC_UPPERCASE(src[in-1]); if(cpLength==2) { caseFlags[codeUnitIndex+1]=FALSE; } } } destLength+=cpLength; U_ASSERT(destLength>=0); ++i; } return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); }
//------------------------------------------------------------------------------ // // Parse RBBI rules. The state machine for rules parsing is here. // The state tables are hand-written in the file rbbirpt.txt, // and converted to the form used here by a perl // script rbbicst.pl // //------------------------------------------------------------------------------ void RBBIRuleScanner::parse() { uint16_t state; const RBBIRuleTableEl *tableEl; if (U_FAILURE(*fRB->fStatus)) { return; } state = 1; nextChar(fC); // // Main loop for the rule parsing state machine. // Runs once per state transition. // Each time through optionally performs, depending on the state table, // - an advance to the the next input char // - an action to be performed. // - pushing or popping a state to/from the local state return stack. // for (;;) { // Bail out if anything has gone wrong. // RBBI rule file parsing stops on the first error encountered. if (U_FAILURE(*fRB->fStatus)) { break; } // Quit if state == 0. This is the normal way to exit the state machine. // if (state == 0) { break; } // Find the state table element that matches the input char from the rule, or the // class of the input character. Start with the first table row for this // state, then linearly scan forward until we find a row that matches the // character. The last row for each state always matches all characters, so // the search will stop there, if not before. // tableEl = &gRuleParseStateTable[state]; #ifdef RBBI_DEBUG if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) { RBBIDebugPrintf("char, line, col = (\'%c\', %d, %d) state=%s ", fC.fChar, fLineNum, fCharNum, RBBIRuleStateNames[state]); } #endif for (;;) { #ifdef RBBI_DEBUG if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) { RBBIDebugPrintf("."); fflush(stdout);} #endif if (tableEl->fCharClass < 127 && fC.fEscaped == FALSE && tableEl->fCharClass == fC.fChar) { // Table row specified an individual character, not a set, and // the input character is not escaped, and // the input character matched it. break; } if (tableEl->fCharClass == 255) { // Table row specified default, match anything character class. break; } if (tableEl->fCharClass == 254 && fC.fEscaped) { // Table row specified "escaped" and the char was escaped. break; } if (tableEl->fCharClass == 253 && fC.fEscaped && (fC.fChar == 0x50 || fC.fChar == 0x70 )) { // Table row specified "escaped P" and the char is either 'p' or 'P'. break; } if (tableEl->fCharClass == 252 && fC.fChar == (UChar32)-1) { // Table row specified eof and we hit eof on the input. break; } if (tableEl->fCharClass >= 128 && tableEl->fCharClass < 240 && // Table specs a char class && fC.fEscaped == FALSE && // char is not escaped && fC.fChar != (UChar32)-1) { // char is not EOF U_ASSERT((tableEl->fCharClass-128) < UPRV_LENGTHOF(fRuleSets)); if (fRuleSets[tableEl->fCharClass-128].contains(fC.fChar)) { // Table row specified a character class, or set of characters, // and the current char matches it. break; } } // No match on this row, advance to the next row for this state, tableEl++; } if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) { RBBIDebugPuts("");} // // We've found the row of the state table that matches the current input // character from the rules string. // Perform any action specified by this row in the state table. if (doParseActions((int32_t)tableEl->fAction) == FALSE) { // Break out of the state machine loop if the // the action signalled some kind of error, or // the action was to exit, occurs on normal end-of-rules-input. break; } if (tableEl->fPushState != 0) { fStackPtr++; if (fStackPtr >= kStackSize) { error(U_BRK_INTERNAL_ERROR); RBBIDebugPuts("RBBIRuleScanner::parse() - state stack overflow."); fStackPtr--; } fStack[fStackPtr] = tableEl->fPushState; } if (tableEl->fNextChar) { nextChar(fC); } // Get the next state from the table entry, or from the // state stack if the next state was specified as "pop". if (tableEl->fNextState != 255) { state = tableEl->fNextState; } else { state = fStack[fStackPtr]; fStackPtr--; if (fStackPtr < 0) { error(U_BRK_INTERNAL_ERROR); RBBIDebugPuts("RBBIRuleScanner::parse() - state stack underflow."); fStackPtr++; } } } if (U_FAILURE(*fRB->fStatus)) { return; } // If there are no forward rules set an error. // if (fRB->fForwardTree == NULL) { error(U_BRK_RULE_SYNTAX); return; } // // If there were NO user specified reverse rules, set up the equivalent of ".*;" // if (fRB->fReverseTree == NULL) { fRB->fReverseTree = pushNewNode(RBBINode::opStar); RBBINode *operand = pushNewNode(RBBINode::setRef); if (U_FAILURE(*fRB->fStatus)) { return; } findSetFor(UnicodeString(TRUE, kAny, 3), operand); fRB->fReverseTree->fLeftChild = operand; operand->fParent = fRB->fReverseTree; fNodeStackPtr -= 2; } // // Parsing of the input RBBI rules is complete. // We now have a parse tree for the rule expressions // and a list of all UnicodeSets that are referenced. // #ifdef RBBI_DEBUG if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "symbols")) {fSymbolTable->rbbiSymtablePrint();} if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ptree")) { RBBIDebugPrintf("Completed Forward Rules Parse Tree...\n"); fRB->fForwardTree->printTree(TRUE); RBBIDebugPrintf("\nCompleted Reverse Rules Parse Tree...\n"); fRB->fReverseTree->printTree(TRUE); RBBIDebugPrintf("\nCompleted Safe Point Forward Rules Parse Tree...\n"); fRB->fSafeFwdTree->printTree(TRUE); RBBIDebugPrintf("\nCompleted Safe Point Reverse Rules Parse Tree...\n"); fRB->fSafeRevTree->printTree(TRUE); } #endif }
void uRelease(uObject* object) { if (object) { if (Xli::AtomicDecrement(&object->__retains) == 0) { if (!uTryClearWeak(object)) return; #ifdef DEBUG_ARC uThreadData* thread = uGetThreadData(); if (thread->AutoReleasePtr >= thread->AutoReleaseStack) { uAutoReleaseFrame* frame = thread->AutoReleasePtr; if (frame->AllocCount > 0) { frame->FreeCount++; frame->FreeSize += object->__size; } } #endif uType* type = object->__type; switch (type->Type) { case uTypeTypeClass: { uType* baseType = type; do { if (baseType->fp_Finalize) { try { (*baseType->fp_Finalize)(object); } catch (...) { Xli::Error->WriteFormat("Runtime Error: Unhandled exception in finalizer for %s\n", baseType->FullName); } } } while ((baseType = baseType->Base)); uReleaseStruct(type, object); break; } case uTypeTypeStruct: // This must be a boxed value, so append size of object header uReleaseStruct(type, (uint8_t*)object + sizeof(uObject)); break; case uTypeTypeDelegate: uRelease(((uDelegate*)object)->_object); uRelease(((uDelegate*)object)->_prev); break; case uTypeTypeArray: { uArray* array = (uArray*)object; uArrayType* arrayType = (uArrayType*)type; uType* elmType = arrayType->ElementType; switch (elmType->Type) { case uTypeTypeClass: case uTypeTypeInterface: case uTypeTypeDelegate: case uTypeTypeArray: for (uObject** objAddr = (uObject**)array->_ptr; array->_length--; objAddr++) uRelease(*objAddr); break; case uTypeTypeStruct: for (uint8_t* address = (uint8_t*)array->_ptr; array->_length--; address += elmType->ValueSize) uReleaseStruct(elmType, address); break; default: break; } break; } default: break; } #if DEBUG_ARC >= 2 Xli::Error->WriteFormat("free %s #%d (%d bytes)%s\n", object->__type->FullName, object->__id, object->__size, uGetCaller().Ptr()); #endif #ifdef DEBUG_DUMPS uEnterCritical(); _HeapObjects->Remove(object); uExitCritical(); #endif U_ASSERT(object->__type != ::g::Uno::Type_typeof()); U_FREE_OBJECT(object); return; } if (object->__retains < 0) { #if DEBUG_ARC >= 4 Xli::Error->WriteFormat("*** BAD OBJECT: %s #%d (%d retains) ***%s\n", object->__type->FullName, object->__id, object->__retains, uGetCaller().Ptr()); #else Xli::Error->WriteFormat("*** BAD OBJECT: 0x%llx ***\n", (uintptr_t)object); #endif U_FATAL(); } else { #if DEBUG_ARC >= 3 Xli::Error->WriteFormat("release %s #%d (%d bytes, %d retains)%s\n", object->__type->FullName, object->__id, object->__size, object->__retains, uGetCaller().Ptr()); #endif } } }
static void U_CALLCONV initNumberFormatService() { U_ASSERT(gService == NULL); ucln_i18n_registerCleanup(UCLN_I18N_NUMFMT, numfmt_cleanup); gService = new ICUNumberFormatService(); }
uObject* uNew(uType* type, size_t size) { U_ASSERT(type && size); return uInitObject(type, U_MALLOC_OBJECT(size, type), size); }
int U_EXPORT main (int argc, char* argv[]) { U_ULIB_INIT(argv); U_TRACE(5,"main(%d)",argc) UTimeDate data1(31,12,99), data2("31/12/99"); U_ASSERT( UTimeDate("14/09/1752").getJulian() == 2361222 ) U_ASSERT( UTimeDate("31/12/1900").getJulian() == 2415385 ) U_ASSERT( UTimeDate("01/01/1970").getJulian() == 2440588 ) U_ASSERT( data1 == data2 ) U_ASSERT( data1.getDayOfWeek() == 5 ) // Venerdi U_ASSERT( data2.getDayOfYear() == 365 ) U_ASSERT( UTimeDate("1/3/00").getDayOfWeek() == 3 ) // Mercoledi U_ASSERT( UTimeDate(31,12,0).getDayOfYear() == 366 ) UTimeDate data3(60,2000); UTimeDate data4("29/02/00"); U_ASSERT( data3 == data4 ) U_ASSERT( data3.getDayOfYear() == 60 ) UTimeDate data5(60,1901); UTimeDate data6("1/3/1901"); U_ASSERT( data5 == data6 ) U_ASSERT( UTimeDate(17, 5, 2002).isValid() == true ) // TRUE May 17th 2002 is valid U_ASSERT( UTimeDate(30, 2, 2002).isValid() == false ) // FALSE Feb 30th does not exist U_ASSERT( UTimeDate(29, 2, 2004).isValid() == true ) // TRUE 2004 is a leap year UTimeDate data7(29, 2, 2004); UString x = data7.strftime("%Y-%m-%d"); U_ASSERT( x == U_STRING_FROM_CONSTANT("2004-02-29") ) U_ASSERT( UTimeDate("14/09/1752").getJulian() == 2361222 ) cout << "Date: " << data6.strftime("%d/%m/%y") << '\n'; while (cin >> data6) cout << data6 << '\n'; U_ASSERT( UTimeDate::getSecondFromTime("19030314104248Z", true, "%4u%2u%2u%2u%2u%2uZ") < u_now->tv_sec ) /* typedef struct static_date { struct timeval _timeval; // => u_now char lock1[1]; char date1[17+1]; // 18/06/12 18:45:56 char lock2[1]; char date2[26+1]; // 04/Jun/2012:18:18:37 +0200 char lock3[1]; char date3[6+29+2+12+2+19+1]; // Date: Wed, 20 Jun 2012 11:43:17 GMT\r\nServer: ULib\r\nConnection: close\r\n } static_date; */ ULog::static_date log_data; (void) u_strftime2(log_data.date1, 17, "%d/%m/%y %T", u_now->tv_sec + u_now_adjust); (void) u_strftime2(log_data.date2, 26, "%d/%b/%Y:%T %z", u_now->tv_sec + u_now_adjust); (void) u_strftime2(log_data.date3, 6+29+2+12+2+17+2, "Date: %a, %d %b %Y %T GMT\r\nServer: ULib\r\nConnection: close\r\n", u_now->tv_sec); U_INTERNAL_DUMP("date1 = %.17S date2 = %.26S date3+6 = %.29S", log_data.date1, log_data.date2, log_data.date3+6) /* for (int i = 0; i < 360; ++i) { u_now->tv_sec++; UTimeDate::updateTime(log_data.date1 + 12); UTimeDate::updateTime(log_data.date2 + 15); UTimeDate::updateTime(log_data.date3+6 + 20); cout.write(log_data.date1, 17); cout.write(" - ", 3); cout.write(log_data.date2, 26); cout.write(" - ", 3); cout.write(log_data.date3+6, 29); cout.put('\n'); } */ }
/* * This method updates the cache and must be called with a lock */ const UChar* TZGNCore::getGenericLocationName(const UnicodeString& tzCanonicalID) { U_ASSERT(!tzCanonicalID.isEmpty()); if (tzCanonicalID.length() > ZID_KEY_MAX) { return NULL; } UErrorCode status = U_ZERO_ERROR; UChar tzIDKey[ZID_KEY_MAX + 1]; int32_t tzIDKeyLen = tzCanonicalID.extract(tzIDKey, ZID_KEY_MAX + 1, status); U_ASSERT(status == U_ZERO_ERROR); // already checked length above tzIDKey[tzIDKeyLen] = 0; const UChar *locname = (const UChar *)uhash_get(fLocationNamesMap, tzIDKey); if (locname != NULL) { // gEmpty indicate the name is not available if (locname == gEmpty) { return NULL; } return locname; } // Construct location name UnicodeString name; UnicodeString usCountryCode; UBool isPrimary = FALSE; ZoneMeta::getCanonicalCountry(tzCanonicalID, usCountryCode, &isPrimary); if (!usCountryCode.isEmpty()) { if (isPrimary) { // If this is the primary zone in the country, use the country name. char countryCode[ULOC_COUNTRY_CAPACITY]; U_ASSERT(usCountryCode.length() < ULOC_COUNTRY_CAPACITY); int32_t ccLen = usCountryCode.extract(0, usCountryCode.length(), countryCode, sizeof(countryCode), US_INV); countryCode[ccLen] = 0; UnicodeString country; fLocaleDisplayNames->regionDisplayName(countryCode, country); fRegionFormat.format(country, name, status); } else { // If this is not the primary zone in the country, // use the exemplar city name. // getExemplarLocationName should retur non-empty string // if the time zone is associated with a region UnicodeString city; fTimeZoneNames->getExemplarLocationName(tzCanonicalID, city); fRegionFormat.format(city, name, status); } if (U_FAILURE(status)) { return NULL; } } locname = name.isEmpty() ? NULL : fStringPool.get(name, status); if (U_SUCCESS(status)) { // Cache the result const UChar* cacheID = ZoneMeta::findTimeZoneID(tzCanonicalID); U_ASSERT(cacheID != NULL); if (locname == NULL) { // gEmpty to indicate - no location name available uhash_put(fLocationNamesMap, (void *)cacheID, (void *)gEmpty, &status); } else { uhash_put(fLocationNamesMap, (void *)cacheID, (void *)locname, &status); if (U_FAILURE(status)) { locname = NULL; } else { // put the name info into the trie GNameInfo *nameinfo = (ZNameInfo *)uprv_malloc(sizeof(GNameInfo)); if (nameinfo != NULL) { nameinfo->type = UTZGNM_LOCATION; nameinfo->tzID = cacheID; fGNamesTrie.put(locname, nameinfo, status); } } } } return locname; }
//--------------------------------------------------------------------- // // dump Output the compiled form of the pattern. // Debugging function only. // //--------------------------------------------------------------------- void RegexPattern::dumpOp(int32_t index) const { (void)index; // Suppress warnings in non-debug build. #if defined(REGEX_DEBUG) static const char * const opNames[] = {URX_OPCODE_NAMES}; int32_t op = fCompiledPat->elementAti(index); int32_t val = URX_VAL(op); int32_t type = URX_TYPE(op); int32_t pinnedType = type; if ((uint32_t)pinnedType >= UPRV_LENGTHOF(opNames)) { pinnedType = 0; } printf("%4d %08x %-15s ", index, op, opNames[pinnedType]); switch (type) { case URX_NOP: case URX_DOTANY: case URX_DOTANY_ALL: case URX_FAIL: case URX_CARET: case URX_DOLLAR: case URX_BACKSLASH_G: case URX_BACKSLASH_X: case URX_END: case URX_DOLLAR_M: case URX_CARET_M: // Types with no operand field of interest. break; case URX_RESERVED_OP: case URX_START_CAPTURE: case URX_END_CAPTURE: case URX_STATE_SAVE: case URX_JMP: case URX_JMP_SAV: case URX_JMP_SAV_X: case URX_BACKSLASH_B: case URX_BACKSLASH_BU: case URX_BACKSLASH_D: case URX_BACKSLASH_Z: case URX_STRING_LEN: case URX_CTR_INIT: case URX_CTR_INIT_NG: case URX_CTR_LOOP: case URX_CTR_LOOP_NG: case URX_RELOC_OPRND: case URX_STO_SP: case URX_LD_SP: case URX_BACKREF: case URX_STO_INP_LOC: case URX_JMPX: case URX_LA_START: case URX_LA_END: case URX_BACKREF_I: case URX_LB_START: case URX_LB_CONT: case URX_LB_END: case URX_LBN_CONT: case URX_LBN_END: case URX_LOOP_C: case URX_LOOP_DOT_I: case URX_BACKSLASH_H: case URX_BACKSLASH_R: case URX_BACKSLASH_V: // types with an integer operand field. printf("%d", val); break; case URX_ONECHAR: case URX_ONECHAR_I: if (val < 0x20) { printf("%#x", val); } else { printf("'%s'", CStr(UnicodeString(val))()); } break; case URX_STRING: case URX_STRING_I: { int32_t lengthOp = fCompiledPat->elementAti(index+1); U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN); int32_t length = URX_VAL(lengthOp); UnicodeString str(fLiteralText, val, length); printf("%s", CStr(str)()); } break; case URX_SETREF: case URX_LOOP_SR_I: { UnicodeString s; UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val); set->toPattern(s, TRUE); printf("%s", CStr(s)()); } break; case URX_STATIC_SETREF: case URX_STAT_SETREF_N: { UnicodeString s; if (val & URX_NEG_SET) { printf("NOT "); val &= ~URX_NEG_SET; } UnicodeSet *set = fStaticSets[val]; set->toPattern(s, TRUE); printf("%s", CStr(s)()); } break; default: printf("??????"); break; } printf("\n"); #endif }
/* * This method updates the cache and must be called with a lock */ const UChar* TZGNCore::getPartialLocationName(const UnicodeString& tzCanonicalID, const UnicodeString& mzID, UBool isLong, const UnicodeString& mzDisplayName) { U_ASSERT(!tzCanonicalID.isEmpty()); U_ASSERT(!mzID.isEmpty()); U_ASSERT(!mzDisplayName.isEmpty()); PartialLocationKey key; key.tzID = ZoneMeta::findTimeZoneID(tzCanonicalID); key.mzID = ZoneMeta::findMetaZoneID(mzID); key.isLong = isLong; U_ASSERT(key.tzID != NULL && key.mzID != NULL); const UChar* uplname = (const UChar*)uhash_get(fPartialLocationNamesMap, (void *)&key); if (uplname != NULL) { return uplname; } UnicodeString location; UnicodeString usCountryCode; ZoneMeta::getCanonicalCountry(tzCanonicalID, usCountryCode); if (!usCountryCode.isEmpty()) { char countryCode[ULOC_COUNTRY_CAPACITY]; U_ASSERT(usCountryCode.length() < ULOC_COUNTRY_CAPACITY); int32_t ccLen = usCountryCode.extract(0, usCountryCode.length(), countryCode, sizeof(countryCode), US_INV); countryCode[ccLen] = 0; UnicodeString regionalGolden; fTimeZoneNames->getReferenceZoneID(mzID, countryCode, regionalGolden); if (tzCanonicalID == regionalGolden) { // Use country name fLocaleDisplayNames->regionDisplayName(countryCode, location); } else { // Otherwise, use exemplar city name fTimeZoneNames->getExemplarLocationName(tzCanonicalID, location); } } else { fTimeZoneNames->getExemplarLocationName(tzCanonicalID, location); if (location.isEmpty()) { // This could happen when the time zone is not associated with a country, // and its ID is not hierarchical, for example, CST6CDT. // We use the canonical ID itself as the location for this case. location.setTo(tzCanonicalID); } } UErrorCode status = U_ZERO_ERROR; UnicodeString name; fFallbackFormat.format(location, mzDisplayName, name, status); if (U_FAILURE(status)) { return NULL; } uplname = fStringPool.get(name, status); if (U_SUCCESS(status)) { // Add the name to cache PartialLocationKey* cacheKey = (PartialLocationKey *)uprv_malloc(sizeof(PartialLocationKey)); if (cacheKey != NULL) { cacheKey->tzID = key.tzID; cacheKey->mzID = key.mzID; cacheKey->isLong = key.isLong; uhash_put(fPartialLocationNamesMap, (void *)cacheKey, (void *)uplname, &status); if (U_FAILURE(status)) { uprv_free(cacheKey); } else { // put the name to the local trie as well GNameInfo *nameinfo = (ZNameInfo *)uprv_malloc(sizeof(GNameInfo)); if (nameinfo != NULL) { nameinfo->type = isLong ? UTZGNM_LONG : UTZGNM_SHORT; nameinfo->tzID = key.tzID; fGNamesTrie.put(uplname, nameinfo, status); } } } } return uplname; }
//----------------------------------------------------------------------------- // // calcChainedFollowPos. Modify the previously calculated followPos sets // to implement rule chaining. NOT described by Aho // //----------------------------------------------------------------------------- void RBBITableBuilder::calcChainedFollowPos(RBBINode *tree) { UVector endMarkerNodes(*fStatus); UVector leafNodes(*fStatus); int32_t i; if (U_FAILURE(*fStatus)) { return; } // get a list of all endmarker nodes. tree->findNodes(&endMarkerNodes, RBBINode::endMark, *fStatus); // get a list all leaf nodes tree->findNodes(&leafNodes, RBBINode::leafChar, *fStatus); if (U_FAILURE(*fStatus)) { return; } // Get all nodes that can be the start a match, which is FirstPosition() // of the portion of the tree corresponding to user-written rules. // See the tree description in bofFixup(). RBBINode *userRuleRoot = tree; if (fRB->fSetBuilder->sawBOF()) { userRuleRoot = tree->fLeftChild->fRightChild; } U_ASSERT(userRuleRoot != NULL); UVector *matchStartNodes = userRuleRoot->fFirstPosSet; // Iteratate over all leaf nodes, // int32_t endNodeIx; int32_t startNodeIx; for (endNodeIx=0; endNodeIx<leafNodes.size(); endNodeIx++) { RBBINode *tNode = (RBBINode *)leafNodes.elementAt(endNodeIx); RBBINode *endNode = NULL; // Identify leaf nodes that correspond to overall rule match positions. // These include an endMarkerNode in their followPos sets. for (i=0; i<endMarkerNodes.size(); i++) { if (tNode->fFollowPos->contains(endMarkerNodes.elementAt(i))) { endNode = tNode; break; } } if (endNode == NULL) { // node wasn't an end node. Try again with the next. continue; } // We've got a node that can end a match. // Line Break Specific hack: If this node's val correspond to the $CM char class, // don't chain from it. // TODO: Add rule syntax for this behavior, get specifics out of here and // into the rule file. if (fRB->fLBCMNoChain) { UChar32 c = this->fRB->fSetBuilder->getFirstChar(endNode->fVal); if (c != -1) { // c == -1 occurs with sets containing only the {eof} marker string. ULineBreak cLBProp = (ULineBreak)u_getIntPropertyValue(c, UCHAR_LINE_BREAK); if (cLBProp == U_LB_COMBINING_MARK) { continue; } } } // Now iterate over the nodes that can start a match, looking for ones // with the same char class as our ending node. RBBINode *startNode; for (startNodeIx = 0; startNodeIx<matchStartNodes->size(); startNodeIx++) { startNode = (RBBINode *)matchStartNodes->elementAt(startNodeIx); if (startNode->fType != RBBINode::leafChar) { continue; } if (endNode->fVal == startNode->fVal) { // The end val (character class) of one possible match is the // same as the start of another. // Add all nodes from the followPos of the start node to the // followPos set of the end node, which will have the effect of // letting matches transition from a match state at endNode // to the second char of a match starting with startNode. setAdd(endNode->fFollowPos, startNode->fFollowPos); } } } }
UErrorCode convsample_06() { printf("\n\n==============================================\n" "Sample 06: C: frequency distribution of letters in a UTF-8 document\n"); FILE *f; int32_t count; char inBuf[BUFFERSIZE]; const char *source; const char *sourceLimit; int32_t uBufSize = 0; UConverter *conv; UErrorCode status = U_ZERO_ERROR; uint32_t letters=0, total=0; CharFreqInfo *info; UChar32 charCount = 0x10000; /* increase this if you want to handle non bmp.. todo: automatically bump it.. */ UChar32 p; uint32_t ie = 0; uint32_t gh = 0; UChar32 l = 0; f = fopen("data06.txt", "r"); if(!f) { fprintf(stderr, "Couldn't open file 'data06.txt' (UTF-8 data file).\n"); return U_FILE_ACCESS_ERROR; } info = (CharFreqInfo*)malloc(sizeof(CharFreqInfo) * charCount); if(!info) { fprintf(stderr, " Couldn't allocate %d bytes for freq counter\n", sizeof(CharFreqInfo)*charCount); } /* reset frequencies */ for(p=0;p<charCount;p++) { info[p].codepoint = p; info[p].frequency = 0; } // **************************** START SAMPLE ******************* conv = ucnv_open("utf-8", &status); assert(U_SUCCESS(status)); uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv)); printf("input bytes %d / min chars %d = %d UChars\n", BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize); // grab another buffer's worth while((!feof(f)) && ((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) ) { // Convert bytes to unicode source = inBuf; sourceLimit = inBuf + count; while(source < sourceLimit) { p = ucnv_getNextUChar(conv, &source, sourceLimit, &status); if(U_FAILURE(status)) { fprintf(stderr, "%s @ %d\n", u_errorName(status), total); status = U_ZERO_ERROR; continue; } U_ASSERT(status); total++; if(u_isalpha(p)) letters++; if((u_tolower(l) == 'i') && (u_tolower(p) == 'e')) ie++; if((u_tolower(l) == 'g') && (u_tolower(p) == 0x0127)) gh++; if(p>charCount) { fprintf(stderr, "U+%06X: oh.., we only handle BMP characters so far.. redesign!\n", p); free(info); fclose(f); ucnv_close(conv); return U_UNSUPPORTED_ERROR; } info[p].frequency++; l = p; } } fclose(f); ucnv_close(conv); printf("%d letters out of %d total UChars.\n", letters, total); printf("%d ie digraphs, %d gh digraphs.\n", ie, gh); // now, we could sort it.. // qsort(info, charCount, sizeof(info[0]), charfreq_compare); for(p=0;p<charCount;p++) { if(info[p].frequency) { printf("% 5d U+%06X ", info[p].frequency, p); if(p <= 0xFFFF) { prettyPrintUChar((UChar)p); } printf("\n"); } } free(info); // ***************************** END SAMPLE ******************** printf("\n"); return U_ZERO_ERROR; }
/** * Currently, getDouble() depends on atof() to do its conversion. * * WARNING!! * This is an extremely costly function. ~1/2 of the conversion time * can be linked to this function. */ double DigitList::getDouble() const { // TODO: fix thread safety. Can probably be finessed some by analyzing // what public const functions can see which DigitLists. // Like precompute fDouble for DigitLists coming in from a parse // or from a Formattable::set(), but not for any others. if (fHaveDouble) { return fDouble; } DigitList *nonConstThis = const_cast<DigitList *>(this); if (gDecimal == 0) { char rep[MAX_DIGITS]; // For machines that decide to change the decimal on you, // and try to be too smart with localization. // This normally should be just a '.'. sprintf(rep, "%+1.1f", 1.0); gDecimal = rep[2]; } if (isZero()) { nonConstThis->fDouble = 0.0; if (decNumberIsNegative(fDecNumber)) { nonConstThis->fDouble /= -1; } } else if (isInfinite()) { if (std::numeric_limits<double>::has_infinity) { nonConstThis->fDouble = std::numeric_limits<double>::infinity(); } else { nonConstThis->fDouble = std::numeric_limits<double>::max(); } if (!isPositive()) { nonConstThis->fDouble = -fDouble; } } else { MaybeStackArray<char, MAX_DBL_DIGITS+18> s; // Note: 14 is a magic constant from the decNumber library documentation, // the max number of extra characters beyond the number of digits // needed to represent the number in string form. Add a few more // for the additional digits we retain. // Round down to appx. double precision, if the number is longer than that. // Copy the number first, so that we don't modify the original. if (getCount() > MAX_DBL_DIGITS + 3) { DigitList numToConvert(*this); numToConvert.reduce(); // Removes any trailing zeros, so that digit count is good. numToConvert.round(MAX_DBL_DIGITS+3); uprv_decNumberToString(numToConvert.fDecNumber, s); // TODO: how many extra digits should be included for an accurate conversion? } else { uprv_decNumberToString(this->fDecNumber, s); } U_ASSERT(uprv_strlen(&s[0]) < MAX_DBL_DIGITS+18); if (gDecimal != '.') { char *decimalPt = strchr(s, '.'); if (decimalPt != NULL) { *decimalPt = gDecimal; } } char *end = NULL; nonConstThis->fDouble = uprv_strtod(s, &end); } nonConstThis->fHaveDouble = TRUE; return fDouble; }