Exemplo n.º 1
0
//-----------------------------------------------------------------------------
//
//  sortedAdd  Add a value to a vector of sorted values (ints).
//             Do not replicate entries; if the value is already there, do not
//                add a second one.
//             Lazily create the vector if it does not already exist.
//
//-----------------------------------------------------------------------------
void RBBITableBuilder::sortedAdd(UVector ** vector, int32_t val)
{
	int32_t i;

	if (*vector == NULL)
	{
		*vector = new UVector(*fStatus);
	}
	if (*vector == NULL || U_FAILURE(*fStatus))
	{
		return;
	}
	UVector * vec = *vector;
	int32_t  vSize = vec->size();
	for (i = 0; i < vSize; i++)
	{
		int32_t valAtI = vec->elementAti(i);
		if (valAtI == val)
		{
			// The value is already in the vector.  Don't add it again.
			return;
		}
		if (valAtI > val)
		{
			break;
		}
	}
	vec->insertElementAt(val, i, *fStatus);
}
Exemplo n.º 2
0
/**
 * Register a source-target/variant in the specDAG.  Variant may be
 * empty, but source and target must not be.  If variant is empty then
 * the special variant NO_VARIANT is stored in slot zero of the
 * UVector of variants.
 */
void TransliteratorRegistry::registerSTV(const UnicodeString& source,
                                         const UnicodeString& target,
                                         const UnicodeString& variant) {
    // assert(source.length() > 0);
    // assert(target.length() > 0);
    UErrorCode status = U_ZERO_ERROR;
    Hashtable *targets = (Hashtable*) specDAG.get(source);
    if (targets == 0) {
        targets = new Hashtable(TRUE, status);
        if (U_FAILURE(status) || targets == 0) {
            return;
        }
        targets->setValueDeleter(uprv_deleteUObject);
        specDAG.put(source, targets, status);
    }
    UVector *variants = (UVector*) targets->get(target);
    if (variants == 0) {
        variants = new UVector(uprv_deleteUObject,
                               uhash_compareCaselessUnicodeString, status);
        if (variants == 0) {
            return;
        }
        targets->put(target, variants, status);
    }
    // assert(NO_VARIANT == "");
    // We add the variant string.  If it is the special "no variant"
    // string, that is, the empty string, we add it at position zero.
    if (!variants->contains((void*) &variant)) {
    	UnicodeString *tempus; // Used for null pointer check.
        if (variant.length() > 0) {
        	tempus = new UnicodeString(variant);
        	if (tempus != NULL) {
        		variants->addElement(tempus, status);
        	}
        } else {
        	tempus = new UnicodeString();  // = NO_VARIANT
        	if (tempus != NULL) {
        		variants->insertElementAt(tempus, 0, status);
        	}
        }
    }
}
Exemplo n.º 3
0
U_CDECL_END

/**
 * Parse a compound ID, consisting of an optional forward global
 * filter, a separator, one or more single IDs delimited by
 * separators, an an optional reverse global filter.  The
 * separator is a semicolon.  The global filters are UnicodeSet
 * patterns.  The reverse global filter must be enclosed in
 * parentheses.
 * @param id the pattern the parse
 * @param dir the direction.
 * @param canonID OUTPUT parameter that receives the canonical ID,
 * consisting of canonical IDs for all elements, as returned by
 * parseSingleID(), separated by semicolons.  Previous contents
 * are discarded.
 * @param list OUTPUT parameter that receives a list of SingleID
 * objects representing the parsed IDs.  Previous contents are
 * discarded.
 * @param globalFilter OUTPUT parameter that receives a pointer to
 * a newly created global filter for this ID in this direction, or
 * NULL if there is none.
 * @return TRUE if the parse succeeds, that is, if the entire
 * id is consumed without syntax error.
 */
UBool TransliteratorIDParser::parseCompoundID(const UnicodeString & id, int32_t dir,
        UnicodeString & canonID,
        UVector & list,
        UnicodeSet *& globalFilter)
{
	UErrorCode ec = U_ZERO_ERROR;
	int32_t i;
	int32_t pos = 0;
	int32_t withParens = 1;
	list.removeAllElements();
	UnicodeSet * filter;
	globalFilter = NULL;
	canonID.truncate(0);

	// Parse leading global filter, if any
	withParens = 0; // parens disallowed
	filter = parseGlobalFilter(id, pos, dir, withParens, &canonID);
	if (filter != NULL)
	{
		if (!ICU_Utility::parseChar(id, pos, ID_DELIM))
		{
			// Not a global filter; backup and resume
			canonID.truncate(0);
			pos = 0;
		}
		if (dir == FORWARD)
		{
			globalFilter = filter;
		}
		else
		{
			delete filter;
		}
		filter = NULL;
	}

	UBool sawDelimiter = TRUE;
	for (;;)
	{
		SingleID * single = parseSingleID(id, pos, dir, ec);
		if (single == NULL)
		{
			break;
		}
		if (dir == FORWARD)
		{
			list.addElement(single, ec);
		}
		else
		{
			list.insertElementAt(single, 0, ec);
		}
		if (U_FAILURE(ec))
		{
			goto FAIL;
		}
		if (!ICU_Utility::parseChar(id, pos, ID_DELIM))
		{
			sawDelimiter = FALSE;
			break;
		}
	}

	if (list.size() == 0)
	{
		goto FAIL;
	}

	// Construct canonical ID
	for (i = 0; i < list.size(); ++i)
	{
		SingleID * single = (SingleID *) list.elementAt(i);
		canonID.append(single->canonID);
		if (i != (list.size() - 1))
		{
			canonID.append(ID_DELIM);
		}
	}

	// Parse trailing global filter, if any, and only if we saw
	// a trailing delimiter after the IDs.
	if (sawDelimiter)
	{
		withParens = 1; // parens required
		filter = parseGlobalFilter(id, pos, dir, withParens, &canonID);
		if (filter != NULL)
		{
			// Don't require trailing ';', but parse it if present
			ICU_Utility::parseChar(id, pos, ID_DELIM);

			if (dir == REVERSE)
			{
				globalFilter = filter;
			}
			else
			{
				delete filter;
			}
			filter = NULL;
		}
	}

	// Trailing unparsed text is a syntax error
	ICU_Utility::skipWhitespace(id, pos, TRUE);
	if (pos != id.length())
	{
		goto FAIL;
	}

	return TRUE;

FAIL:
	UObjectDeleter * save = list.setDeleter(_deleteSingleID);
	list.removeAllElements();
	list.setDeleter(save);
	delete globalFilter;
	globalFilter = NULL;
	return FALSE;
}
Exemplo n.º 4
0
void AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const {
    const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode);
    if (U_FAILURE(errorCode)) { return; }

    const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0);
    const UnicodeString &overflowBoundary =
        *getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1);

    // We make a sorted array of elements.
    // Some of the input may be redundant.
    // That is, we might have c, ch, d, where "ch" sorts just like "c", "h".
    // We filter out those cases.
    UnicodeSetIterator iter(*initialLabels_);
    while (iter.next()) {
        const UnicodeString *item = &iter.getString();
        LocalPointer<UnicodeString> ownedItem;
        UBool checkDistinct;
        int32_t itemLength = item->length();
        if (!item->hasMoreChar32Than(0, itemLength, 1)) {
            checkDistinct = FALSE;
        } else if(item->charAt(itemLength - 1) == 0x2a &&  // '*'
                item->charAt(itemLength - 2) != 0x2a) {
            // Use a label if it is marked with one trailing star,
            // even if the label string sorts the same when all contractions are suppressed.
            ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1));
            item = ownedItem.getAlias();
            if (item == NULL) {
                errorCode = U_MEMORY_ALLOCATION_ERROR;
                return;
            }
            checkDistinct = FALSE;
        } else {
            checkDistinct = TRUE;
        }
        if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) {
            // Ignore a primary-ignorable or non-alphabetic index character.
        } else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) {
            // Ignore an index character that will land in the overflow bucket.
        } else if (checkDistinct &&
                collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) {
            // Ignore a multi-code point index character that does not sort distinctly
            // from the sequence of its separate characters.
        } else {
            int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_);
            if (insertionPoint < 0) {
                indexCharacters.insertElementAt(
                    ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode);
            } else {
                const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint);
                if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) {
                    indexCharacters.setElementAt(
                        ownedString(*item, ownedItem, errorCode), insertionPoint);
                }
            }
        }
    }
    if (U_FAILURE(errorCode)) { return; }

    // if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element

    int32_t size = indexCharacters.size() - 1;
    if (size > maxLabelCount_) {
        int32_t count = 0;
        int32_t old = -1;
        for (int32_t i = 0; i < indexCharacters.size();) {
            ++count;
            int32_t bump = count * maxLabelCount_ / size;
            if (bump == old) {
                indexCharacters.removeElementAt(i);
            } else {
                old = bump;
                ++i;
            }
        }
    }
}