U_NAMESPACE_BEGIN U_CFUNC UChar U_CALLCONV uregex_utext_unescape_charAt(int32_t offset, void *ct) { struct URegexUTextUnescapeCharContext *context = (struct URegexUTextUnescapeCharContext *)ct; UChar32 c; if (offset == context->lastOffset + 1) { c = UTEXT_NEXT32(context->text); context->lastOffset++; } else if (offset == context->lastOffset) { c = UTEXT_PREVIOUS32(context->text); UTEXT_NEXT32(context->text); } else { utext_moveIndex32(context->text, offset - context->lastOffset - 1); c = UTEXT_NEXT32(context->text); context->lastOffset = offset; } // !!!: Doesn't handle characters outside BMP if (U_IS_BMP(c)) { return (UChar)c; } else { return 0; } }
//----------------------------------------------------------------------------------- // // handlePrevious() // // Iterate backwards, according to the logic of the reverse rules. // This version handles the exact style backwards rules. // // The logic of this function is very similar to handleNext(), above. // //----------------------------------------------------------------------------------- int32_t BreakIterator::handlePrevious(const RBBIStateTable *statetable) { int32_t state; int16_t category = 0; RBBIRunMode mode; RBBIStateTableRow *row; UChar32 c; int32_t lookaheadStatus = 0; int32_t result = 0; int32_t initialPosition = 0; int32_t lookaheadResult = 0; UBool lookAheadHardBreak = (statetable->fFlags & RBBI_LOOKAHEAD_HARD_BREAK) != 0; #ifdef RBBI_DEBUG if (fTrace) { RBBIDebugPuts("Handle Previous pos char state category"); } #endif // handlePrevious() never gets the rule status. // Flag the status as invalid; if the user ever asks for status, we will need // to back up, then re-find the break position using handleNext(), which does // get the status value. fLastStatusIndexValid = FALSE; fLastRuleStatusIndex = 0; // if we're already at the start of the text, return DONE. if (fText == NULL || fData == NULL || UTEXT_GETNATIVEINDEX(fText)==0) { return BreakIterator::DONE; } // Set up the starting char. initialPosition = (int32_t)UTEXT_GETNATIVEINDEX(fText); result = initialPosition; c = UTEXT_PREVIOUS32(fText); // Set the initial state for the state machine state = START_STATE; row = (RBBIStateTableRow *) (statetable->fTableData + (statetable->fRowLen * state)); category = 3; mode = RBBI_RUN; if (statetable->fFlags & RBBI_BOF_REQUIRED) { category = 2; mode = RBBI_START; } // loop until we reach the start of the text or transition to state 0 // for (;;) { if (c == U_SENTINEL) { // Reached end of input string. if (mode == RBBI_END || *(int32_t *)fHeader->fFormatVersion == 1 ) { // We have already run the loop one last time with the // character set to the psueudo {eof} value. Now it is time // to unconditionally bail out. // (Or we have an old format binary rule file that does not support {eof}.) if (lookaheadResult < result) { // We ran off the end of the string with a pending look-ahead match. // Treat this as if the look-ahead condition had been met, and return // the match at the / position from the look-ahead rule. result = lookaheadResult; lookaheadStatus = 0; } else if (result == initialPosition) { // Ran off start, no match found. // move one index one (towards the start, since we are doing a previous()) UTEXT_SETNATIVEINDEX(fText, initialPosition); UTEXT_PREVIOUS32(fText); // TODO: shouldn't be necessary. We're already at beginning. Check. } break; } // Run the loop one last time with the fake end-of-input character category. mode = RBBI_END; category = 1; } // // Get the char category. An incoming category of 1 or 2 means that // we are preset for doing the beginning or end of input, and // that we shouldn't get a category from an actual text input character. // if (mode == RBBI_RUN) { // look up the current character's character category, which tells us // which column in the state table to look at. // Note: the 16 in UTRIE_GET16 refers to the size of the data being returned, // not the size of the character going in, which is a UChar32. // UTRIE_GET16(&fTrie, c, category); // Check the dictionary bit in the character's category. // Counter is only used by dictionary based iterators (subclasses). // Chars that need to be handled by a dictionary have a flag bit set // in their category values. // if ((category & 0x4000) != 0) { fDictionaryCharCount++; // And off the dictionary flag bit. category &= ~0x4000; } } #ifdef RBBI_DEBUG if (fTrace) { RBBIDebugPrintf(" %4d ", (int32_t)utext_getNativeIndex(fText)); if (0x20<=c && c<0x7f) { RBBIDebugPrintf("\"%c\" ", c); } else { RBBIDebugPrintf("%5x ", c); } RBBIDebugPrintf("%3d %3d\n", state, category); } #endif // State Transition - move machine to its next state // state = row->fNextState[category]; row = (RBBIStateTableRow *) (statetable->fTableData + (statetable->fRowLen * state)); if (row->fAccepting == -1) { // Match found, common case. result = (int32_t)UTEXT_GETNATIVEINDEX(fText); } if (row->fLookAhead != 0) { if (lookaheadStatus != 0 && row->fAccepting == lookaheadStatus) { // Lookahead match is completed. result = lookaheadResult; lookaheadStatus = 0; // TODO: make a standalone hard break in a rule work. if (lookAheadHardBreak) { UTEXT_SETNATIVEINDEX(fText, result); return result; } // Look-ahead completed, but other rules may match further. Continue on // TODO: junk this feature? I don't think it's used anywhwere. goto continueOn; } int32_t r = (int32_t)UTEXT_GETNATIVEINDEX(fText); lookaheadResult = r; lookaheadStatus = row->fLookAhead; goto continueOn; } if (row->fAccepting != 0) { // Because this is an accepting state, any in-progress look-ahead match // is no longer relavant. Clear out the pending lookahead status. lookaheadStatus = 0; } continueOn: if (state == STOP_STATE) { // This is the normal exit from the lookup state machine. // We have advanced through the string until it is certain that no // longer match is possible, no matter what characters follow. break; } // Move (backwards) to the next character to process. // If this is a beginning-of-input loop iteration, don't advance // the input position. The next iteration will be processing the // first real input character. if (mode == RBBI_RUN) { c = UTEXT_PREVIOUS32(fText); } else { if (mode == RBBI_START) { mode = RBBI_RUN; } } } // The state machine is done. Check whether it found a match... // If the iterator failed to advance in the match engine, force it ahead by one. // (This really indicates a defect in the break rules. They should always match // at least one character.) if (result == initialPosition) { UTEXT_SETNATIVEINDEX(fText, initialPosition); UTEXT_PREVIOUS32(fText); result = (int32_t)UTEXT_GETNATIVEINDEX(fText); } // Leave the iterator at our result position. UTEXT_SETNATIVEINDEX(fText, result); #ifdef RBBI_DEBUG if (fTrace) { RBBIDebugPrintf("result = %d\n\n", result); } #endif return result; }
/** * Sets the iterator to refer to the last boundary position before the * specified position. * @offset The position to begin searching for a break from. * @return The position of the last boundary before the starting position. */ int32_t BreakIterator::preceding(int32_t offset) { // if we have cached break positions and offset is in the range // covered by them, use them if (fCachedBreakPositions != NULL) { // TODO: binary search? // TODO: What if offset is outside range, but break is not? if (offset > fCachedBreakPositions[0] && offset <= fCachedBreakPositions[fNumCachedBreakPositions - 1]) { fPositionInCache = 0; while (fPositionInCache < fNumCachedBreakPositions && offset > fCachedBreakPositions[fPositionInCache]) ++fPositionInCache; --fPositionInCache; // If we're at the beginning of the cache, need to reevaluate the // rule status if (fPositionInCache <= 0) { fLastStatusIndexValid = FALSE; } utext_setNativeIndex(fText, fCachedBreakPositions[fPositionInCache]); return fCachedBreakPositions[fPositionInCache]; } else { reset(); } } // if the offset passed in is already past the end of the text, // just return DONE; if it's before the beginning, return the // text's starting offset if (fText == NULL || offset > utext_nativeLength(fText)) { // return BreakIterator::DONE; return last(); } else if (offset < 0) { return first(); } // if we start by updating the current iteration position to the // position specified by the caller, we can just use previous() // to carry out this operation if (fSafeFwdTable != NULL) { // new rule syntax utext_setNativeIndex(fText, offset); int32_t newOffset = (int32_t)UTEXT_GETNATIVEINDEX(fText); if (newOffset != offset) { // Will come here if specified offset was not a code point boundary AND // the underlying implmentation is using UText, which snaps any non-code-point-boundary // indices to the containing code point. // For breakitereator::preceding only, these non-code-point indices need to be moved // up to refer to the following codepoint. UTEXT_NEXT32(fText); offset = (int32_t)UTEXT_GETNATIVEINDEX(fText); } // TODO: (synwee) would it be better to just check for being in the middle of a surrogate pair, // rather than adjusting the position unconditionally? // (Change would interact with safe rules.) // TODO: change RBBI behavior for off-boundary indices to match that of UText? // affects only preceding(), seems cleaner, but is slightly different. UTEXT_PREVIOUS32(fText); handleNext(fSafeFwdTable); int32_t result = (int32_t)UTEXT_GETNATIVEINDEX(fText); while (result >= offset) { result = previous(); } return result; } if (fSafeRevTable != NULL) { // backup plan if forward safe table is not available // TODO: check whether this path can be discarded // It's probably OK to say that rules must supply both safe tables // if they use safe tables at all. We have certainly never described // to anyone how to work with just one safe table. utext_setNativeIndex(fText, offset); UTEXT_NEXT32(fText); // handle previous will give result <= offset handlePrevious(fSafeRevTable); // next will give result 0 or 1 boundary away from offset, // most of the time // we have to int32_t oldresult = next(); while (oldresult < offset) { int32_t result = next(); if (result >= offset) { return oldresult; } oldresult = result; } int32_t result = previous(); if (result >= offset) { return previous(); } return result; } // old rule syntax utext_setNativeIndex(fText, offset); return previous(); }
/** * Sets the iterator to refer to the first boundary position following * the specified position. * @offset The position from which to begin searching for a break position. * @return The position of the first break after the current position. */ int32_t BreakIterator::following(int32_t offset) { // if we have cached break positions and offset is in the range // covered by them, use them // TODO: could use binary search // TODO: what if offset is outside range, but break is not? if (fCachedBreakPositions != NULL) { if (offset >= fCachedBreakPositions[0] && offset < fCachedBreakPositions[fNumCachedBreakPositions - 1]) { fPositionInCache = 0; // We are guaranteed not to leave the array due to range test above while (offset >= fCachedBreakPositions[fPositionInCache]) { ++fPositionInCache; } int32_t pos = fCachedBreakPositions[fPositionInCache]; utext_setNativeIndex(fText, pos); return pos; } else { reset(); } } // if the offset passed in is already past the end of the text, // just return DONE; if it's before the beginning, return the // text's starting offset fLastRuleStatusIndex = 0; fLastStatusIndexValid = TRUE; if (fText == NULL || offset >= utext_nativeLength(fText)) { last(); return next(); } else if (offset < 0) { return first(); } // otherwise, set our internal iteration position (temporarily) // to the position passed in. If this is the _beginning_ position, // then we can just use next() to get our return value int32_t result = 0; if (fSafeRevTable != NULL) { // new rule syntax utext_setNativeIndex(fText, offset); // move forward one codepoint to prepare for moving back to a // safe point. // this handles offset being between a supplementary character UTEXT_NEXT32(fText); // handlePrevious will move most of the time to < 1 boundary away handlePrevious(fSafeRevTable); int32_t result = next(); while (result <= offset) { result = next(); } return result; } if (fSafeFwdTable != NULL) { // backup plan if forward safe table is not available utext_setNativeIndex(fText, offset); UTEXT_PREVIOUS32(fText); // handle next will give result >= offset handleNext(fSafeFwdTable); // previous will give result 0 or 1 boundary away from offset, // most of the time // we have to int32_t oldresult = previous(); while (oldresult > offset) { int32_t result = previous(); if (result <= offset) { return oldresult; } oldresult = result; } int32_t result = next(); if (result <= offset) { return next(); } return result; } // otherwise, we have to sync up first. Use handlePrevious() to back // up to a known break position before the specified position (if // we can determine that the specified position is a break position, // we don't back up at all). This may or may not be the last break // position at or before our starting position. Advance forward // from here until we've passed the starting position. The position // we stop on will be the first break position after the specified one. // old rule syntax utext_setNativeIndex(fText, offset); if (offset==0 || offset==1 && utext_getNativeIndex(fText)==0) { return next(); } result = previous(); while (result != BreakIterator::DONE && result <= offset) { result = next(); } return result; }
/** * Advances the iterator backwards, to the last boundary preceding this one. * @return The position of the last boundary position preceding this one. */ int32_t BreakIterator::previous(void) { int32_t result; int32_t startPos; // if we have cached break positions and we're still in the range // covered by them, just move one step backward in the cache if (fCachedBreakPositions != NULL) { if (fPositionInCache > 0) { --fPositionInCache; // If we're at the beginning of the cache, need to reevaluate the // rule status if (fPositionInCache <= 0) { fLastStatusIndexValid = FALSE; } int32_t pos = fCachedBreakPositions[fPositionInCache]; utext_setNativeIndex(fText, pos); return pos; } else { reset(); } } // if we're already sitting at the beginning of the text, return DONE if (fText == NULL || (startPos = current()) == 0) { fLastRuleStatusIndex = 0; fLastStatusIndexValid = TRUE; return BreakIterator::DONE; } if (fSafeRevTable != NULL || fSafeFwdTable != NULL) { result = handlePrevious(fReverseTable); if (fDictionaryCharCount > 0) { result = checkDictionary(result, startPos, TRUE); } return result; } // old rule syntax // set things up. handlePrevious() will back us up to some valid // break position before the current position (we back our internal // iterator up one step to prevent handlePrevious() from returning // the current position), but not necessarily the last one before // where we started int32_t start = current(); UTEXT_PREVIOUS32(fText); int32_t lastResult = handlePrevious(fReverseTable); if (lastResult == UBRK_DONE) { lastResult = 0; utext_setNativeIndex(fText, 0); } result = lastResult; int32_t lastTag = 0; UBool breakTagValid = FALSE; // iterate forward from the known break position until we pass our // starting point. The last break position before the starting // point is our return value for (;;) { result = next(); if (result == BreakIterator::DONE || result >= start) { break; } lastResult = result; lastTag = fLastRuleStatusIndex; breakTagValid = TRUE; } // fLastBreakTag wants to have the value for section of text preceding // the result position that we are to return (in lastResult.) If // the backwards rules overshot and the above loop had to do two or more // next()s to move up to the desired return position, we will have a valid // tag value. But, if handlePrevious() took us to exactly the correct result positon, // we wont have a tag value for that position, which is only set by handleNext(). // set the current iteration position to be the last break position // before where we started, and then return that value utext_setNativeIndex(fText, lastResult); fLastRuleStatusIndex = lastTag; // for use by getRuleStatus() fLastStatusIndexValid = breakTagValid; // No need to check the dictionary; it will have been handled by // next() return lastResult; }
//------------------------------------------------------------------------------- // // checkDictionary This function handles all processing of characters in // the "dictionary" set. It will determine the appropriate // course of action, and possibly set up a cache in the // process. // //------------------------------------------------------------------------------- int32_t BreakIterator::checkDictionary(int32_t startPos, int32_t endPos, UBool reverse) { #if 1 return reverse ? startPos : endPos; #else // Reset the old break cache first. uint32_t dictionaryCount = fDictionaryCharCount; reset(); if (dictionaryCount <= 1 || (endPos - startPos) <= 1) { return (reverse ? startPos : endPos); } // Starting from the starting point, scan towards the proposed result, // looking for the first dictionary character (which may be the one // we're on, if we're starting in the middle of a range). utext_setNativeIndex(fText, reverse ? endPos : startPos); if (reverse) { UTEXT_PREVIOUS32(fText); } int32_t rangeStart = startPos; int32_t rangeEnd = endPos; uint16_t category; int32_t current; UErrorCode status = U_ZERO_ERROR; UStack breaks(status); int32_t foundBreakCount = 0; UChar32 c = utext_current32(fText); UTRIE_GET16(&fData->fTrie, c, category); // Is the character we're starting on a dictionary character? If so, we // need to back up to include the entire run; otherwise the results of // the break algorithm will differ depending on where we start. Since // the result is cached and there is typically a non-dictionary break // within a small number of words, there should be little performance impact. if (category & 0x4000) { if (reverse) { do { utext_next32(fText); // TODO: recast to work directly with postincrement. c = utext_current32(fText); UTRIE_GET16(&fData->fTrie, c, category); } while (c != U_SENTINEL && (category & 0x4000)); // Back up to the last dictionary character rangeEnd = (int32_t)UTEXT_GETNATIVEINDEX(fText); if (c == U_SENTINEL) { // c = fText->last32(); // TODO: why was this if needed? c = UTEXT_PREVIOUS32(fText); } else { c = UTEXT_PREVIOUS32(fText); } } else { do { c = UTEXT_PREVIOUS32(fText); UTRIE_GET16(&fData->fTrie, c, category); } while (c != U_SENTINEL && (category & 0x4000)); // Back up to the last dictionary character if (c == U_SENTINEL) { // c = fText->first32(); c = utext_current32(fText); } else { utext_next32(fText); c = utext_current32(fText); } rangeStart = (int32_t)UTEXT_GETNATIVEINDEX(fText);; } UTRIE_GET16(&fData->fTrie, c, category); } // Loop through the text, looking for ranges of dictionary characters. // For each span, find the appropriate break engine, and ask it to find // any breaks within the span. // Note: we always do this in the forward direction, so that the break // cache is built in the right order. if (reverse) { utext_setNativeIndex(fText, rangeStart); c = utext_current32(fText); UTRIE_GET16(&fData->fTrie, c, category); } while(U_SUCCESS(status)) { while((current = (int32_t)UTEXT_GETNATIVEINDEX(fText)) < rangeEnd && (category & 0x4000) == 0) { utext_next32(fText); // TODO: tweak for post-increment operation c = utext_current32(fText); UTRIE_GET16(&fData->fTrie, c, category); } if (current >= rangeEnd) { break; } // We now have a dictionary character. Get the appropriate language object // to deal with it. const LanguageBreakEngine *lbe = getLanguageBreakEngine(c); // Ask the language object if there are any breaks. It will leave the text // pointer on the other side of its range, ready to search for the next one. if (lbe != NULL) { foundBreakCount += lbe->findBreaks(fText, rangeStart, rangeEnd, FALSE, fBreakType, breaks); } // Reload the loop variables for the next go-round c = utext_current32(fText); UTRIE_GET16(&fData->fTrie, c, category); } // If we found breaks, build a new break cache. The first and last entries must // be the original starting and ending position. if (foundBreakCount > 0) { int32_t totalBreaks = foundBreakCount; if (startPos < breaks.elementAti(0)) { totalBreaks += 1; } if (endPos > breaks.peeki()) { totalBreaks += 1; } fCachedBreakPositions = (int32_t *)uprv_malloc(totalBreaks * sizeof(int32_t)); if (fCachedBreakPositions != NULL) { int32_t out = 0; fNumCachedBreakPositions = totalBreaks; if (startPos < breaks.elementAti(0)) { fCachedBreakPositions[out++] = startPos; } for (int32_t i = 0; i < foundBreakCount; ++i) { fCachedBreakPositions[out++] = breaks.elementAti(i); } if (endPos > fCachedBreakPositions[out-1]) { fCachedBreakPositions[out] = endPos; } // If there are breaks, then by definition, we are replacing the original // proposed break by one of the breaks we found. Use following() and // preceding() to do the work. They should never recurse in this case. if (reverse) { return preceding(endPos - 1); } else { return following(startPos); } } // If the allocation failed, just fall through to the "no breaks found" case. } // If we get here, there were no language-based breaks. Set the text pointer // to the original proposed break. utext_setNativeIndex(fText, reverse ? startPos : endPos); return (reverse ? startPos : endPos); #endif }
static void TestAPI(void) { UErrorCode status = U_ZERO_ERROR; UBool gFailed = FALSE; (void)gFailed; /* Suppress set but not used warning. */ /* Open */ { UText utLoc = UTEXT_INITIALIZER; const char * cString = "\x61\x62\x63\x64"; UChar uString[] = {0x41, 0x42, 0x43, 0}; UText *uta; UText *utb; UChar c; uta = utext_openUChars(NULL, uString, -1, &status); TEST_SUCCESS(status); c = utext_next32(uta); TEST_ASSERT(c == 0x41); utb = utext_close(uta); TEST_ASSERT(utb == NULL); uta = utext_openUTF8(&utLoc, cString, -1, &status); TEST_SUCCESS(status); TEST_ASSERT(uta == &utLoc); uta = utext_close(&utLoc); TEST_ASSERT(uta == &utLoc); } /* utext_clone() */ { UChar uString[] = {0x41, 0x42, 0x43, 0}; int64_t len; UText *uta; UText *utb; status = U_ZERO_ERROR; uta = utext_openUChars(NULL, uString, -1, &status); TEST_SUCCESS(status); utb = utext_clone(NULL, uta, FALSE, FALSE, &status); TEST_SUCCESS(status); TEST_ASSERT(utb != NULL); TEST_ASSERT(utb != uta); len = utext_nativeLength(uta); TEST_ASSERT(len == u_strlen(uString)); utext_close(uta); utext_close(utb); } /* basic access functions */ { UChar uString[] = {0x41, 0x42, 0x43, 0}; UText *uta; UChar32 c; int64_t len; UBool b; int64_t i; status = U_ZERO_ERROR; uta = utext_openUChars(NULL, uString, -1, &status); TEST_ASSERT(uta!=NULL); TEST_SUCCESS(status); b = utext_isLengthExpensive(uta); TEST_ASSERT(b==TRUE); len = utext_nativeLength(uta); TEST_ASSERT(len == u_strlen(uString)); b = utext_isLengthExpensive(uta); TEST_ASSERT(b==FALSE); c = utext_char32At(uta, 0); TEST_ASSERT(c==uString[0]); c = utext_current32(uta); TEST_ASSERT(c==uString[0]); c = utext_next32(uta); TEST_ASSERT(c==uString[0]); c = utext_current32(uta); TEST_ASSERT(c==uString[1]); c = utext_previous32(uta); TEST_ASSERT(c==uString[0]); c = utext_current32(uta); TEST_ASSERT(c==uString[0]); c = utext_next32From(uta, 1); TEST_ASSERT(c==uString[1]); c = utext_next32From(uta, u_strlen(uString)); TEST_ASSERT(c==U_SENTINEL); c = utext_previous32From(uta, 2); TEST_ASSERT(c==uString[1]); i = utext_getNativeIndex(uta); TEST_ASSERT(i == 1); utext_setNativeIndex(uta, 0); b = utext_moveIndex32(uta, 1); TEST_ASSERT(b==TRUE); i = utext_getNativeIndex(uta); TEST_ASSERT(i==1); b = utext_moveIndex32(uta, u_strlen(uString)-1); TEST_ASSERT(b==TRUE); i = utext_getNativeIndex(uta); TEST_ASSERT(i==u_strlen(uString)); b = utext_moveIndex32(uta, 1); TEST_ASSERT(b==FALSE); i = utext_getNativeIndex(uta); TEST_ASSERT(i==u_strlen(uString)); utext_setNativeIndex(uta, 0); c = UTEXT_NEXT32(uta); TEST_ASSERT(c==uString[0]); c = utext_current32(uta); TEST_ASSERT(c==uString[1]); c = UTEXT_PREVIOUS32(uta); TEST_ASSERT(c==uString[0]); c = UTEXT_PREVIOUS32(uta); TEST_ASSERT(c==U_SENTINEL); utext_close(uta); } { /* * UText opened on a NULL string with zero length */ UText *uta; UChar32 c; status = U_ZERO_ERROR; uta = utext_openUChars(NULL, NULL, 0, &status); TEST_SUCCESS(status); c = UTEXT_NEXT32(uta); TEST_ASSERT(c == U_SENTINEL); utext_close(uta); uta = utext_openUTF8(NULL, NULL, 0, &status); TEST_SUCCESS(status); c = UTEXT_NEXT32(uta); TEST_ASSERT(c == U_SENTINEL); utext_close(uta); } { /* * extract */ UText *uta; UChar uString[] = {0x41, 0x42, 0x43, 0}; UChar buf[100]; int32_t i; /* Test pinning of input bounds */ UChar uString2[] = {0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0}; UChar * uString2Ptr = uString2 + 5; status = U_ZERO_ERROR; uta = utext_openUChars(NULL, uString, -1, &status); TEST_SUCCESS(status); status = U_ZERO_ERROR; i = utext_extract(uta, 0, 100, NULL, 0, &status); TEST_ASSERT(status==U_BUFFER_OVERFLOW_ERROR); TEST_ASSERT(i == u_strlen(uString)); status = U_ZERO_ERROR; memset(buf, 0, sizeof(buf)); i = utext_extract(uta, 0, 100, buf, 100, &status); TEST_SUCCESS(status); TEST_ASSERT(i == u_strlen(uString)); i = u_strcmp(uString, buf); TEST_ASSERT(i == 0); utext_close(uta); /* Test pinning of input bounds */ status = U_ZERO_ERROR; uta = utext_openUChars(NULL, uString2Ptr, -1, &status); TEST_SUCCESS(status); status = U_ZERO_ERROR; memset(buf, 0, sizeof(buf)); i = utext_extract(uta, -3, 20, buf, 100, &status); TEST_SUCCESS(status); TEST_ASSERT(i == u_strlen(uString2Ptr)); i = u_strcmp(uString2Ptr, buf); TEST_ASSERT(i == 0); utext_close(uta); } { /* * Copy, Replace, isWritable * Can't create an editable UText from plain C, so all we * can easily do is check that errors returned. */ UText *uta; UChar uString[] = {0x41, 0x42, 0x43, 0}; UBool b; status = U_ZERO_ERROR; uta = utext_openUChars(NULL, uString, -1, &status); TEST_SUCCESS(status); b = utext_isWritable(uta); TEST_ASSERT(b == FALSE); b = utext_hasMetaData(uta); TEST_ASSERT(b == FALSE); utext_replace(uta, 0, 1, /* start, limit */ uString, -1, /* replacement, replacement length */ &status); TEST_ASSERT(status == U_NO_WRITE_PERMISSION); utext_copy(uta, 0, 1, /* start, limit */ 2, /* destination index */ FALSE, /* move flag */ &status); TEST_ASSERT(status == U_NO_WRITE_PERMISSION); utext_close(uta); } }