U_CAPI USet* U_EXPORT2 ulocdata_getExemplarSet(ULocaleData *uld, USet *fillIn, uint32_t options, ULocaleDataExemplarSetType extype, UErrorCode *status){ static const char* const exemplarSetTypes[] = { "ExemplarCharacters", "AuxExemplarCharacters" }; const UChar *exemplarChars = NULL; int32_t len = 0; UErrorCode localStatus = U_ZERO_ERROR; if (U_FAILURE(*status)) return NULL; exemplarChars = ures_getStringByKey(uld->bundle, exemplarSetTypes[extype], &len, &localStatus); if ( (localStatus == U_USING_DEFAULT_WARNING) && uld->noSubstitute ) { localStatus = U_MISSING_RESOURCE_ERROR; } if (localStatus != U_ZERO_ERROR) { *status = localStatus; } if (U_FAILURE(*status)) return NULL; if(fillIn != NULL) uset_applyPattern(fillIn, exemplarChars, len, USET_IGNORE_SPACE | options, status); else fillIn = uset_openPatternOptions(exemplarChars, len, USET_IGNORE_SPACE | options, status); return fillIn; }
U_CAPI int32_t U_EXPORT2 ucol_getUnsafeSet( const UCollator *coll, USet *unsafe, UErrorCode *status) { UChar buffer[internalBufferSize]; int32_t len = 0; uset_clear(unsafe); // cccpattern = "[[:^tccc=0:][:^lccc=0:]]", unfortunately variant static const UChar cccpattern[25] = { 0x5b, 0x5b, 0x3a, 0x5e, 0x74, 0x63, 0x63, 0x63, 0x3d, 0x30, 0x3a, 0x5d, 0x5b, 0x3a, 0x5e, 0x6c, 0x63, 0x63, 0x63, 0x3d, 0x30, 0x3a, 0x5d, 0x5d, 0x00 }; // add chars that fail the fcd check uset_applyPattern(unsafe, cccpattern, 24, USET_IGNORE_SPACE, status); // add Thai/Lao prevowels uset_addRange(unsafe, 0xe40, 0xe44); uset_addRange(unsafe, 0xec0, 0xec4); // add lead/trail surrogates uset_addRange(unsafe, 0xd800, 0xdfff); USet *contractions = uset_open(0,0); int32_t i = 0, j = 0; int32_t contsSize = ucol_getContractions(coll, contractions, status); UChar32 c = 0; // Contraction set consists only of strings // to get unsafe code points, we need to // break the strings apart and add them to the unsafe set for(i = 0; i < contsSize; i++) { len = uset_getItem(contractions, i, NULL, NULL, buffer, internalBufferSize, status); if(len > 0) { j = 0; while(j < len) { U16_NEXT(buffer, j, len, c); if(j < len) { uset_add(unsafe, c); } } } } uset_close(contractions); return uset_size(unsafe); }
static int32_t u_scanf_scanset_handler(UFILE *input, u_scanf_spec_info *info, ufmt_args *args, const UChar *fmt, int32_t *fmtConsumed, int32_t *argConverted) { USet *scanset; UErrorCode status = U_ZERO_ERROR; int32_t chLeft = INT32_MAX; UChar32 c; UChar *alias = (UChar*) (args[0].ptrValue); UBool isNotEOF = FALSE; UBool readCharacter = FALSE; /* Create an empty set */ scanset = uset_open(0, -1); /* Back up one to get the [ */ fmt--; /* truncate to the width, if specified and alias the target */ if(info->fWidth >= 0) { chLeft = info->fWidth; } /* parse the scanset from the fmt string */ *fmtConsumed = uset_applyPattern(scanset, fmt, -1, 0, &status); /* verify that the parse was successful */ if (U_SUCCESS(status)) { c=0; /* grab characters one at a time and make sure they are in the scanset */ while(chLeft > 0) { if ((isNotEOF = ufile_getch32(input, &c)) && uset_contains(scanset, c)) { readCharacter = TRUE; if (!info->fSkipArg) { int32_t idx = 0; UBool isError = FALSE; U16_APPEND(alias, idx, chLeft, c, isError); if (isError) { break; } alias += idx; } chLeft -= (1 + U_IS_SUPPLEMENTARY(c)); } else { /* if the character's not in the scanset, break out */ break; } } /* put the final character we read back on the input */ if(isNotEOF && chLeft > 0) { u_fungetc(c, input); } } uset_close(scanset); /* if we didn't match at least 1 character, fail */ if(!readCharacter) return -1; /* otherwise, add the terminator */ else if (!info->fSkipArg) { *alias = 0x00; } /* we converted 1 arg */ *argConverted = !info->fSkipArg; return (info->fWidth >= 0 ? info->fWidth : INT32_MAX) - chLeft; }