static UBool * getResultsManually(const char** encodings, int32_t num_encodings, const char *utf8, int32_t length, const USet* excludedCodePoints, const UConverterUnicodeSet whichSet) { UBool* resultsManually; int32_t i; resultsManually = (UBool*) uprv_malloc(gCountAvailable); uprv_memset(resultsManually, 0, gCountAvailable); for(i = 0 ; i < num_encodings ; i++) { UErrorCode status = U_ZERO_ERROR; /* get unicode set for that converter */ USet* set; UConverter* test_converter; UChar32 cp; int32_t encIndex, offset; set = uset_openEmpty(); test_converter = ucnv_open(encodings[i], &status); ucnv_getUnicodeSet(test_converter, set, whichSet, &status); if (excludedCodePoints != NULL) { uset_addAll(set, excludedCodePoints); } uset_freeze(set); offset = 0; cp = 0; encIndex = findIndex(encodings[i]); /* * The following is almost, but not entirely, the same as * resultsManually[encIndex] = * (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length); * They might be different if the set contains strings, * or if the utf8 string contains an illegal sequence. * * The UConverterSelector does not currently handle strings that can be * converted, and it treats an illegal sequence as convertible * while uset_spanUTF8() treats it like U+FFFD which may not be convertible. */ resultsManually[encIndex] = TRUE; while(offset<length) { U8_NEXT(utf8, offset, length, cp); if (cp >= 0 && !uset_contains(set, cp)) { resultsManually[encIndex] = FALSE; break; } } uset_close(set); ucnv_close(test_converter); } return resultsManually; }
static jboolean NativeConverter_contains(JNIEnv* env, jclass, jstring name1, jstring name2) { ScopedUtfChars name1Chars(env, name1); if (name1Chars.c_str() == NULL) { return JNI_FALSE; } ScopedUtfChars name2Chars(env, name2); if (name2Chars.c_str() == NULL) { return JNI_FALSE; } UErrorCode errorCode = U_ZERO_ERROR; icu::LocalUConverterPointer converter1(ucnv_open(name1Chars.c_str(), &errorCode)); icu::UnicodeSet set1; ucnv_getUnicodeSet(&*converter1, set1.toUSet(), UCNV_ROUNDTRIP_SET, &errorCode); icu::LocalUConverterPointer converter2(ucnv_open(name2Chars.c_str(), &errorCode)); icu::UnicodeSet set2; ucnv_getUnicodeSet(&*converter2, set2.toUSet(), UCNV_ROUNDTRIP_SET, &errorCode); return U_SUCCESS(errorCode) && set1.containsAll(set2); }
static void generateSelectorData(UConverterSelector* result, UPropsVectors *upvec, const USet* excludedCodePoints, const UConverterUnicodeSet whichSet, UErrorCode* status) { if (U_FAILURE(*status)) { return; } int32_t columns = (result->encodingsCount+31)/32; // set errorValue to all-ones for (int32_t col = 0; col < columns; col++) { upvec_setValue(upvec, UPVEC_ERROR_VALUE_CP, UPVEC_ERROR_VALUE_CP, col, static_cast<uint32_t>(~0), static_cast<uint32_t>(~0), status); } for (int32_t i = 0; i < result->encodingsCount; ++i) { uint32_t mask; uint32_t column; int32_t item_count; int32_t j; UConverter* test_converter = ucnv_open(result->encodings[i], status); if (U_FAILURE(*status)) { return; } USet* unicode_point_set; unicode_point_set = uset_open(1, 0); // empty set ucnv_getUnicodeSet(test_converter, unicode_point_set, whichSet, status); if (U_FAILURE(*status)) { ucnv_close(test_converter); return; } column = i / 32; mask = 1 << (i%32); // now iterate over intervals on set i! item_count = uset_getItemCount(unicode_point_set); for (j = 0; j < item_count; ++j) { UChar32 start_char; UChar32 end_char; UErrorCode smallStatus = U_ZERO_ERROR; uset_getItem(unicode_point_set, j, &start_char, &end_char, NULL, 0, &smallStatus); if (U_FAILURE(smallStatus)) { // this will be reached for the converters that fill the set with // strings. Those should be ignored by our system } else { upvec_setValue(upvec, start_char, end_char, column, static_cast<uint32_t>(~0), mask, status); } } ucnv_close(test_converter); uset_close(unicode_point_set); if (U_FAILURE(*status)) { return; } } // handle excluded encodings! Simply set their values to all 1's in the upvec if (excludedCodePoints) { int32_t item_count = uset_getItemCount(excludedCodePoints); for (int32_t j = 0; j < item_count; ++j) { UChar32 start_char; UChar32 end_char; uset_getItem(excludedCodePoints, j, &start_char, &end_char, NULL, 0, status); for (int32_t col = 0; col < columns; col++) { upvec_setValue(upvec, start_char, end_char, col, static_cast<uint32_t>(~0), static_cast<uint32_t>(~0), status); } } } // alright. Now, let's put things in the same exact form you'd get when you // unserialize things. result->trie = upvec_compactToUTrie2WithRowIndexes(upvec, status); result->pv = upvec_cloneArray(upvec, &result->pvCount, NULL, status); result->pvCount *= columns; // number of uint32_t = rows * columns result->ownPv = TRUE; }