UnicodeString ConfusabledataBuilder::getMapping(int32_t index) { int32_t key = fKeyVec->elementAti(index); int32_t value = fValueVec->elementAti(index); int32_t length = USPOOF_KEY_LENGTH_FIELD(key); int32_t lastIndexWithLen; switch (length) { case 0: return UnicodeString(static_cast<UChar>(value)); case 1: case 2: return UnicodeString(*fStringTable, value, length+1); case 3: length = 0; int32_t i; for (i=0; i<fStringLengthsTable->size(); i+=2) { lastIndexWithLen = fStringLengthsTable->elementAti(i); if (value <= lastIndexWithLen) { length = fStringLengthsTable->elementAti(i+1); break; } } U_ASSERT(length>=3); return UnicodeString(*fStringTable, value, length); default: U_ASSERT(FALSE); } return UnicodeString(); }
//-------------------------------------------------------------------------------------- // // confusableLookup() This is the heart of the confusable skeleton generation // implementation. // // Given a source character, produce the corresponding // replacement character(s), appending them to the dest string. // //--------------------------------------------------------------------------------------- int32_t SpoofImpl::confusableLookup(UChar32 inChar, int32_t tableMask, UnicodeString &dest) const { // Binary search the spoof data key table for the inChar int32_t *low = fSpoofData->fCFUKeys; int32_t *mid = NULL; int32_t *limit = low + fSpoofData->fRawData->fCFUKeysSize; UChar32 midc; do { int32_t delta = ((int32_t)(limit-low))/2; mid = low + delta; midc = *mid & 0x1fffff; if (inChar == midc) { goto foundChar; } else if (inChar < midc) { limit = mid; } else { low = mid; } } while (low < limit-1); mid = low; midc = *mid & 0x1fffff; if (inChar != midc) { // Char not found. It maps to itself. int i = 0; dest.append(inChar); return i; } foundChar: int32_t keyFlags = *mid & 0xff000000; if ((keyFlags & tableMask) == 0) { // We found the right key char, but the entry doesn't pertain to the // table we need. See if there is an adjacent key that does if (keyFlags & USPOOF_KEY_MULTIPLE_VALUES) { int32_t *altMid; for (altMid = mid-1; (*altMid&0x00ffffff) == inChar; altMid--) { keyFlags = *altMid & 0xff000000; if (keyFlags & tableMask) { mid = altMid; goto foundKey; } } for (altMid = mid+1; (*altMid&0x00ffffff) == inChar; altMid++) { keyFlags = *altMid & 0xff000000; if (keyFlags & tableMask) { mid = altMid; goto foundKey; } } } // No key entry for this char & table. // The input char maps to itself. int i = 0; dest.append(inChar); return i; } foundKey: int32_t stringLen = USPOOF_KEY_LENGTH_FIELD(keyFlags) + 1; int32_t keyTableIndex = (int32_t)(mid - fSpoofData->fCFUKeys); // Value is either a UChar (for strings of length 1) or // an index into the string table (for longer strings) uint16_t value = fSpoofData->fCFUValues[keyTableIndex]; if (stringLen == 1) { dest.append((UChar)value); return 1; } // String length of 4 from the above lookup is used for all strings of length >= 4. // For these, get the real length from the string lengths table, // which maps string table indexes to lengths. // All strings of the same length are stored contiguously in the string table. // 'value' from the lookup above is the starting index for the desired string. int32_t ix; if (stringLen == 4) { int32_t stringLengthsLimit = fSpoofData->fRawData->fCFUStringLengthsSize; for (ix = 0; ix < stringLengthsLimit; ix++) { if (fSpoofData->fCFUStringLengths[ix].fLastString >= value) { stringLen = fSpoofData->fCFUStringLengths[ix].fStrLength; break; } } U_ASSERT(ix < stringLengthsLimit); } U_ASSERT(value + stringLen <= fSpoofData->fRawData->fCFUStringTableLen); UChar *src = &fSpoofData->fCFUStrings[value]; dest.append(src, stringLen); return stringLen; }