bool operator() (int a, int b) const { // if (col) { UErrorCode status = U_ZERO_ERROR; int ret = (int)ucol_strcollUTF8(col, cont->get(a).c_str(), cont->get(a).length(), cont->get(b).c_str(), cont->get(b).length(), &status); STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */}) return (decreasing)?(ret > 0):(ret < 0);
/** * Compare elements in 2 character vectors, with collation * * @param e1 character vector * @param e2 character vector * @param opts_collator passed to stri__ucol_open() * @param type [internal] vector of length 2, * type[0]: 0 for ==, -1 for < and 1 for >, * type[1]: 0 or 1 (whether to negate the results) * * @return logical vector * * @version 0.2-1 (Marek Gagolewski, 2014-03-19) * * @version 0.2-3 (Marek Gagolewski, 2014-05-07) * opts_collator == NA no longer allowed * * @version 0.3-1 (Marek Gagolewski, 2014-11-04) * Issue #112: str_prepare_arg* retvals were not PROTECTed from gc */ SEXP stri_cmp_logical(SEXP e1, SEXP e2, SEXP opts_collator, SEXP type) { // we'll perform a collator-based cmp // type is an internal arg, check manually, error() allowed here if (!Rf_isInteger(type) || LENGTH(type) != 2) Rf_error(MSG__INCORRECT_INTERNAL_ARG); int _type = INTEGER(type)[0]; int _negate = INTEGER(type)[1]; if (_type > 1 || _type < -1 || _negate < 0 || _negate > 1) Rf_error(MSG__INCORRECT_INTERNAL_ARG); PROTECT(e1 = stri_prepare_arg_string(e1, "e1")); // prepare string argument PROTECT(e2 = stri_prepare_arg_string(e2, "e2")); // prepare string argument // call stri__ucol_open after prepare_arg: // if prepare_arg had failed, we would have a mem leak UCollator* col = NULL; col = stri__ucol_open(opts_collator); STRI__ERROR_HANDLER_BEGIN(2) R_len_t vectorize_length = stri__recycling_rule(true, 2, LENGTH(e1), LENGTH(e2)); StriContainerUTF8 e1_cont(e1, vectorize_length); StriContainerUTF8 e2_cont(e2, vectorize_length); SEXP ret; STRI__PROTECT(ret = Rf_allocVector(LGLSXP, vectorize_length)); int* ret_tab = LOGICAL(ret); for (R_len_t i = 0; i < vectorize_length; ++i) { if (e1_cont.isNA(i) || e2_cont.isNA(i)) { ret_tab[i] = NA_LOGICAL; continue; } R_len_t cur1_n = e1_cont.get(i).length(); const char* cur1_s = e1_cont.get(i).c_str(); R_len_t cur2_n = e2_cont.get(i).length(); const char* cur2_s = e2_cont.get(i).c_str(); // with collation UErrorCode status = U_ZERO_ERROR; ret_tab[i] = (_type == (int)ucol_strcollUTF8(col, cur1_s, cur1_n, cur2_s, cur2_n, &status )); STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */}) if (_negate) ret_tab[i] = !ret_tab[i]; }
int main() { UErrorCode status = U_ZERO_ERROR; UCollator *coll = ucol_open(0, &status); ucol_setStrength(coll, UCOL_PRIMARY); for (int i = 0; i < PASSES; ++i) { UCollationResult coll_res = ucol_strcollUTF8(coll, INPUT1, -1, INPUT2, -1, &status); (void)(coll_res); } ucol_close(coll); return 0; }
static void doTestVariant(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result) { int32_t sortklen1, sortklen2, sortklenmax, sortklenmin; int temp=0, gSortklen1=0,gSortklen2=0; UCollationResult compareResult, compareResulta, keyResult, compareResultIter = result; uint8_t *sortKey1, *sortKey2, *sortKey1a, *sortKey2a; uint32_t sLen = u_strlen(source); uint32_t tLen = u_strlen(target); char buffer[256]; uint32_t len; UErrorCode status = U_ZERO_ERROR; UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status); UCharIterator sIter, tIter; compareResult = ucol_strcoll(myCollation, source, sLen, target, tLen); if (compareResult != result) { log_err("ucol_strcoll with explicit length returned wrong result (%i exp. %i): %s, %s\n", compareResult, result, aescstrdup(source,-1), aescstrdup(target,-1)); } compareResulta = ucol_strcoll(myCollation, source, -1, target, -1); if (compareResulta != result) { log_err("ucol_strcoll with null terminated strings returned wrong result (%i exp. %i): %s, %s\n", compareResult, result, aescstrdup(source,-1), aescstrdup(target,-1)); } uiter_setString(&sIter, source, sLen); uiter_setString(&tIter, target, tLen); compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status); if(compareResultIter != result) { log_err("different results in iterative comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1)); } /* convert the strings to UTF-8 and do try comparing with char iterator and ucol_strcollUTF8 */ { char utf8Source[256], utf8Target[256]; int32_t utf8SourceLen = 0, utf8TargetLen = 0; u_strToUTF8(utf8Source, 256, &utf8SourceLen, source, sLen, &status); if(U_FAILURE(status)) { /* probably buffer is not big enough */ log_verbose("Src UTF-8 buffer too small! Will not compare!\n"); } else { u_strToUTF8(utf8Target, 256, &utf8TargetLen, target, tLen, &status); if(U_SUCCESS(status)) { { /* ucol_strcollUTF8 */ compareResulta = ucol_strcollUTF8(myCollation, utf8Source, utf8SourceLen, utf8Target, utf8TargetLen, &status); if (U_FAILURE(status)) { log_err("Error in ucol_strcollUTF8 with explicit length\n"); status = U_ZERO_ERROR; } else if (compareResulta != result) { log_err("ucol_strcollUTF8 with explicit length returned wrong result (%i exp. %i): %s, %s\n", compareResulta, result, aescstrdup(source,-1), aescstrdup(target,-1)); } compareResulta = ucol_strcollUTF8(myCollation, utf8Source, -1, utf8Target, -1, &status); if (U_FAILURE(status)) { log_err("Error in ucol_strcollUTF8 with null terminated strings\n"); status = U_ZERO_ERROR; } else if (compareResulta != result) { log_err("ucol_strcollUTF8 with null terminated strings returned wrong result (%i exp. %i): %s, %s\n", compareResulta, result, aescstrdup(source,-1), aescstrdup(target,-1)); } } { /* char iterator over UTF8 */ UCollationResult compareResultUTF8Iter = result, compareResultUTF8IterNorm = result; uiter_setUTF8(&sIter, utf8Source, utf8SourceLen); uiter_setUTF8(&tIter, utf8Target, utf8TargetLen); compareResultUTF8Iter = ucol_strcollIter(myCollation, &sIter, &tIter, &status); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); sIter.move(&sIter, 0, UITER_START); tIter.move(&tIter, 0, UITER_START); compareResultUTF8IterNorm = ucol_strcollIter(myCollation, &sIter, &tIter, &status); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); if(compareResultUTF8Iter != compareResultIter) { log_err("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1)); } if(compareResultUTF8Iter != compareResultUTF8IterNorm) { log_err("different results in iterative when normalization is turned on with UTF-8 strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1)); } } } else { log_verbose("Target UTF-8 buffer too small! Did not compare!\n"); } if(U_FAILURE(status)) { log_verbose("UTF-8 strcoll failed! Ignoring result\n"); } } } /* testing the partial sortkeys */ if(1) { /*!QUICK*/ int32_t i = 0; int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */ int32_t partialSizesSize = 1; if(getTestOption(QUICK_OPTION) <= 0) { partialSizesSize = 7; } /*log_verbose("partial sortkey test piecesize=");*/ for(i = 0; i < partialSizesSize; i++) { UCollationResult partialSKResult = result, partialNormalizedSKResult = result; /*log_verbose("%i ", partialSizes[i]);*/ partialSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status); if(partialSKResult != result) { log_err("Partial sortkey comparison returned wrong result (%i exp. %i): %s, %s (size %i)\n", partialSKResult, result, aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]); } if(getTestOption(QUICK_OPTION) <= 0 && norm != UCOL_ON) { /*log_verbose("N ");*/ ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); partialNormalizedSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); if(partialSKResult != partialNormalizedSKResult) { log_err("Partial sortkey comparison gets different result when normalization is on: %s, %s (size %i)\n", aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]); } } } /*log_verbose("\n");*/ } sortklen1=ucol_getSortKey(myCollation, source, sLen, NULL, 0); sortklen2=ucol_getSortKey(myCollation, target, tLen, NULL, 0); sortklenmax = (sortklen1>sortklen2?sortklen1:sortklen2); sortklenmin = (sortklen1<sortklen2?sortklen1:sortklen2); (void)sortklenmin; /* Suppress set but not used warning. */ sortKey1 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); sortKey1a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); ucol_getSortKey(myCollation, source, sLen, sortKey1, sortklen1+1); ucol_getSortKey(myCollation, source, -1, sortKey1a, sortklen1+1); sortKey2 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); sortKey2a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); ucol_getSortKey(myCollation, target, tLen, sortKey2, sortklen2+1); ucol_getSortKey(myCollation, target, -1, sortKey2a, sortklen2+1); /* Check that sort key generated with null terminated string is identical */ /* to that generted with a length specified. */ if (uprv_strcmp((const char *)sortKey1, (const char *)sortKey1a) != 0 || uprv_strcmp((const char *)sortKey2, (const char *)sortKey2a) != 0 ) { log_err("Sort Keys from null terminated and explicit length strings differ.\n"); } /*memcmp(sortKey1, sortKey2,sortklenmax);*/ temp= uprv_strcmp((const char *)sortKey1, (const char *)sortKey2); gSortklen1 = uprv_strlen((const char *)sortKey1)+1; gSortklen2 = uprv_strlen((const char *)sortKey2)+1; if(sortklen1 != gSortklen1){ log_err("SortKey length does not match Expected: %i Got: %i\n",sortklen1, gSortklen1); log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey1, buffer, &len)); } if(sortklen2!= gSortklen2){ log_err("SortKey length does not match Expected: %i Got: %i\n", sortklen2, gSortklen2); log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey2, buffer, &len)); } if(temp < 0) { keyResult=UCOL_LESS; } else if(temp > 0) { keyResult= UCOL_GREATER; } else { keyResult = UCOL_EQUAL; } reportCResult( source, target, sortKey1, sortKey2, compareResult, keyResult, compareResultIter, result ); free(sortKey1); free(sortKey2); free(sortKey1a); free(sortKey2a); }