static UBool * getResultsManually(const char** encodings, int32_t num_encodings, const char *utf8, int32_t length, const USet* excludedCodePoints, const UConverterUnicodeSet whichSet) { UBool* resultsManually; int32_t i; resultsManually = (UBool*) uprv_malloc(gCountAvailable); uprv_memset(resultsManually, 0, gCountAvailable); for(i = 0 ; i < num_encodings ; i++) { UErrorCode status = U_ZERO_ERROR; /* get unicode set for that converter */ USet* set; UConverter* test_converter; UChar32 cp; int32_t encIndex, offset; set = uset_openEmpty(); test_converter = ucnv_open(encodings[i], &status); ucnv_getUnicodeSet(test_converter, set, whichSet, &status); if (excludedCodePoints != NULL) { uset_addAll(set, excludedCodePoints); } uset_freeze(set); offset = 0; cp = 0; encIndex = findIndex(encodings[i]); /* * The following is almost, but not entirely, the same as * resultsManually[encIndex] = * (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length); * They might be different if the set contains strings, * or if the utf8 string contains an illegal sequence. * * The UConverterSelector does not currently handle strings that can be * converted, and it treats an illegal sequence as convertible * while uset_spanUTF8() treats it like U+FFFD which may not be convertible. */ resultsManually[encIndex] = TRUE; while(offset<length) { U8_NEXT(utf8, offset, length, cp); if (cp >= 0 && !uset_contains(set, cp)) { resultsManually[encIndex] = FALSE; break; } } uset_close(set); ucnv_close(test_converter); } return resultsManually; }
static void TestSerialized() { uint16_t buffer[1000]; USerializedSet sset; USet *set; UErrorCode errorCode; UChar32 c; int32_t length; /* use a pattern that generates both BMP and supplementary code points */ U_STRING_DECL(pattern, "[:Cf:]", 6); U_STRING_INIT(pattern, "[:Cf:]", 6); errorCode=U_ZERO_ERROR; set=uset_openPattern(pattern, -1, &errorCode); if(U_FAILURE(errorCode)) { log_data_err("uset_openPattern([:Cf:]) failed - %s (Are you missing data?)\n", u_errorName(errorCode)); return; } length=uset_serialize(set, buffer, UPRV_LENGTHOF(buffer), &errorCode); if(U_FAILURE(errorCode)) { log_err("unable to uset_serialize([:Cf:]) - %s\n", u_errorName(errorCode)); uset_close(set); return; } uset_getSerializedSet(&sset, buffer, length); for(c=0; c<=0x10ffff; ++c) { if(uset_contains(set, c)!=uset_serializedContains(&sset, c)) { log_err("uset_contains(U+%04x)!=uset_serializedContains(U+%04x)\n", c); break; } } uset_close(set); }
U_CDECL_BEGIN static UBool U_CALLCONV _processSpecials(const void *context, UChar32 start, UChar32 limit, uint32_t CE) { UErrorCode *status = ((contContext *)context)->status; USet *expansions = ((contContext *)context)->expansions; USet *removed = ((contContext *)context)->removedContractions; UBool addPrefixes = ((contContext *)context)->addPrefixes; UChar contraction[internalBufferSize]; if(isSpecial(CE)) { if(((getCETag(CE) == SPEC_PROC_TAG && addPrefixes) || getCETag(CE) == CONTRACTION_TAG)) { while(start < limit && U_SUCCESS(*status)) { // if there are suppressed contractions, we don't // want to add them. if(removed && uset_contains(removed, start)) { start++; continue; } // we start our contraction from middle, since we don't know if it // will grow toward right or left contraction[internalBufferSize/2] = (UChar)start; addSpecial(((contContext *)context), contraction, internalBufferSize, CE, internalBufferSize/2, internalBufferSize/2+1, status); start++; } } else if(expansions && getCETag(CE) == EXPANSION_TAG) { while(start < limit && U_SUCCESS(*status)) { uset_add(expansions, start++); } } } if(U_FAILURE(*status)) { return FALSE; } else { return TRUE; } }
bool isCharacterSmartReplaceExempt(UChar32 c, bool isPreviousCharacter) { return uset_contains(getSmartSet(isPreviousCharacter), c); }
static void expectContainment(const USet* set, const char* list, UBool isIn) { const char* p = list; UChar ustr[4096]; char *pat; UErrorCode ec; int32_t rangeStart = -1, rangeEnd = -1, length; ec = U_ZERO_ERROR; length = uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec); if(U_FAILURE(ec)) { log_err("FAIL: uset_toPattern() fails in expectContainment() - %s\n", u_errorName(ec)); return; } pat=aescstrdup(ustr, length); while (*p) { if (*p=='{') { const char* stringStart = ++p; int32_t stringLength = 0; char strCopy[64]; while (*p++ != '}') { } stringLength = (int32_t)(p - stringStart - 1); strncpy(strCopy, stringStart, stringLength); strCopy[stringLength] = 0; u_charsToUChars(stringStart, ustr, stringLength); if (uset_containsString(set, ustr, stringLength) == isIn) { log_verbose("Ok: %s %s \"%s\"\n", pat, (isIn ? "contains" : "does not contain"), strCopy); } else { log_data_err("FAIL: %s %s \"%s\" (Are you missing data?)\n", pat, (isIn ? "does not contain" : "contains"), strCopy); } } else { UChar32 c; u_charsToUChars(p, ustr, 1); c = ustr[0]; if (uset_contains(set, c) == isIn) { log_verbose("Ok: %s %s '%c'\n", pat, (isIn ? "contains" : "does not contain"), *p); } else { log_data_err("FAIL: %s %s '%c' (Are you missing data?)\n", pat, (isIn ? "does not contain" : "contains"), *p); } /* Test the range API too by looking for ranges */ if (c == rangeEnd+1) { rangeEnd = c; } else { if (rangeStart >= 0) { if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) { log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat, (isIn ? "contains" : "does not contain"), rangeStart, rangeEnd); } else { log_data_err("FAIL: %s %s U+%04X-U+%04X (Are you missing data?)\n", pat, (isIn ? "does not contain" : "contains"), rangeStart, rangeEnd); } } rangeStart = rangeEnd = c; } ++p; } } if (rangeStart >= 0) { if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) { log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat, (isIn ? "contains" : "does not contain"), rangeStart, rangeEnd); } else { log_data_err("FAIL: %s %s U+%04X-U+%04X (Are you missing data?)\n", pat, (isIn ? "does not contain" : "contains"), rangeStart, rangeEnd); } } }
static int32_t u_scanf_scanset_handler(UFILE *input, u_scanf_spec_info *info, ufmt_args *args, const UChar *fmt, int32_t *fmtConsumed, int32_t *argConverted) { USet *scanset; UErrorCode status = U_ZERO_ERROR; int32_t chLeft = INT32_MAX; UChar32 c; UChar *alias = (UChar*) (args[0].ptrValue); UBool isNotEOF = FALSE; UBool readCharacter = FALSE; /* Create an empty set */ scanset = uset_open(0, -1); /* Back up one to get the [ */ fmt--; /* truncate to the width, if specified and alias the target */ if(info->fWidth >= 0) { chLeft = info->fWidth; } /* parse the scanset from the fmt string */ *fmtConsumed = uset_applyPattern(scanset, fmt, -1, 0, &status); /* verify that the parse was successful */ if (U_SUCCESS(status)) { c=0; /* grab characters one at a time and make sure they are in the scanset */ while(chLeft > 0) { if ((isNotEOF = ufile_getch32(input, &c)) && uset_contains(scanset, c)) { readCharacter = TRUE; if (!info->fSkipArg) { int32_t idx = 0; UBool isError = FALSE; U16_APPEND(alias, idx, chLeft, c, isError); if (isError) { break; } alias += idx; } chLeft -= (1 + U_IS_SUPPLEMENTARY(c)); } else { /* if the character's not in the scanset, break out */ break; } } /* put the final character we read back on the input */ if(isNotEOF && chLeft > 0) { u_fungetc(c, input); } } uset_close(scanset); /* if we didn't match at least 1 character, fail */ if(!readCharacter) return -1; /* otherwise, add the terminator */ else if (!info->fSkipArg) { *alias = 0x00; } /* we converted 1 arg */ *argConverted = !info->fSkipArg; return (info->fWidth >= 0 ? info->fWidth : INT32_MAX) - chLeft; }