void SetMonkey::append(UnicodeString &test, UnicodeString &alternate) { int32_t size = uset_size(set); int32_t index = m_rand() % size; UChar32 ch = uset_charAt(set, index); UnicodeString str(ch); test.append(str); alternate.append(str); // flip case, or some junk? }
U_CAPI int32_t U_EXPORT2 ucol_getUnsafeSet( const UCollator *coll, USet *unsafe, UErrorCode *status) { UChar buffer[internalBufferSize]; int32_t len = 0; uset_clear(unsafe); // cccpattern = "[[:^tccc=0:][:^lccc=0:]]", unfortunately variant static const UChar cccpattern[25] = { 0x5b, 0x5b, 0x3a, 0x5e, 0x74, 0x63, 0x63, 0x63, 0x3d, 0x30, 0x3a, 0x5d, 0x5b, 0x3a, 0x5e, 0x6c, 0x63, 0x63, 0x63, 0x3d, 0x30, 0x3a, 0x5d, 0x5d, 0x00 }; // add chars that fail the fcd check uset_applyPattern(unsafe, cccpattern, 24, USET_IGNORE_SPACE, status); // add Thai/Lao prevowels uset_addRange(unsafe, 0xe40, 0xe44); uset_addRange(unsafe, 0xec0, 0xec4); // add lead/trail surrogates uset_addRange(unsafe, 0xd800, 0xdfff); USet *contractions = uset_open(0,0); int32_t i = 0, j = 0; int32_t contsSize = ucol_getContractions(coll, contractions, status); UChar32 c = 0; // Contraction set consists only of strings // to get unsafe code points, we need to // break the strings apart and add them to the unsafe set for(i = 0; i < contsSize; i++) { len = uset_getItem(contractions, i, NULL, NULL, buffer, internalBufferSize, status); if(len > 0) { j = 0; while(j < len) { U16_NEXT(buffer, j, len, c); if(j < len) { uset_add(unsafe, c); } } } } uset_close(contractions); return uset_size(unsafe); }
static void expectItems(const USet* set, const char* items) { const char* p = items; UChar ustr[4096], itemStr[4096]; char buf[4096]; char *pat; UErrorCode ec; int32_t expectedSize = 0; int32_t itemCount = uset_getItemCount(set); int32_t itemIndex = 0; UChar32 start = 1, end = 0; int32_t itemLen = 0, length; ec = U_ZERO_ERROR; length = uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec); if (U_FAILURE(ec)) { log_err("FAIL: uset_toPattern => %s\n", u_errorName(ec)); return; } pat=aescstrdup(ustr, length); if (uset_isEmpty(set) != (strlen(items)==0)) { log_data_err("FAIL: %s should return %s from isEmpty (Are you missing data?)\n", pat, strlen(items)==0 ? "TRUE" : "FALSE"); } /* Don't test patterns starting with "[^" */ if (u_strlen(ustr) > 2 && ustr[1] == 0x5e /*'^'*/) { return; } while (*p) { ++expectedSize; if (start > end || start == -1) { /* Fetch our next item */ if (itemIndex >= itemCount) { log_data_err("FAIL: ran out of items iterating %s (Are you missing data?)\n", pat); return; } itemLen = uset_getItem(set, itemIndex, &start, &end, itemStr, sizeof(itemStr), &ec); if (U_FAILURE(ec) || itemLen < 0) { log_err("FAIL: uset_getItem => %s\n", u_errorName(ec)); return; } if (itemLen == 0) { log_verbose("Ok: %s item %d is %c-%c\n", pat, itemIndex, oneUCharToChar(start), oneUCharToChar(end)); } else { itemStr[itemLen] = 0; u_UCharsToChars(itemStr, buf, itemLen+1); log_verbose("Ok: %s item %d is \"%s\"\n", pat, itemIndex, buf); } ++itemIndex; } if (*p=='{') { const char* stringStart = ++p; int32_t stringLength = 0; char strCopy[64]; while (*p++ != '}') { } stringLength = (int32_t)(p - stringStart - 1); strncpy(strCopy, stringStart, stringLength); strCopy[stringLength] = 0; u_charsToUChars(stringStart, ustr, stringLength); ustr[stringLength] = 0; if (itemLen == 0) { log_err("FAIL: for %s expect \"%s\" next, but got a char\n", pat, strCopy); return; } if (u_strcmp(ustr, itemStr) != 0) { log_err("FAIL: for %s expect \"%s\" next\n", pat, strCopy); return; } } else { UChar32 c; u_charsToUChars(p, ustr, 1); c = ustr[0]; if (itemLen != 0) { log_err("FAIL: for %s expect '%c' next, but got a string\n", pat, *p); return; } if (c != start++) { log_err("FAIL: for %s expect '%c' next\n", pat, *p); return; } ++p; } } if (uset_size(set) == expectedSize) { log_verbose("Ok: %s size is %d\n", pat, expectedSize); } else { log_err("FAIL: %s size is %d, expected %d\n", pat, uset_size(set), expectedSize); } }