Example #1
0
   bool operator() (int a, int b) const
   {
//      if (col) {
         UErrorCode status = U_ZERO_ERROR;
         int ret = (int)ucol_strcollUTF8(col,
            cont->get(a).c_str(), cont->get(a).length(),
            cont->get(b).c_str(), cont->get(b).length(), &status);
         STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */})
         return (decreasing)?(ret > 0):(ret < 0);
Example #2
0
/**
 * Compare elements in 2 character vectors, with collation
 *
 * @param e1 character vector
 * @param e2 character vector
 * @param opts_collator passed to stri__ucol_open()
 * @param type [internal] vector of length 2,
 * type[0]: 0 for ==, -1 for < and 1 for >,
 * type[1]: 0 or 1 (whether to negate the results)
 *
 * @return logical vector
 *
 * @version 0.2-1  (Marek Gagolewski, 2014-03-19)
 *
 * @version 0.2-3 (Marek Gagolewski, 2014-05-07)
 *          opts_collator == NA no longer allowed
 *
 * @version 0.3-1 (Marek Gagolewski, 2014-11-04)
 *    Issue #112: str_prepare_arg* retvals were not PROTECTed from gc
 */
SEXP stri_cmp_logical(SEXP e1, SEXP e2, SEXP opts_collator, SEXP type)
{
   // we'll perform a collator-based cmp
   // type is an internal arg, check manually, error() allowed here
   if (!Rf_isInteger(type) || LENGTH(type) != 2)
      Rf_error(MSG__INCORRECT_INTERNAL_ARG);
   int _type = INTEGER(type)[0];
   int _negate = INTEGER(type)[1];
   if (_type > 1 || _type < -1 || _negate < 0 || _negate > 1)
      Rf_error(MSG__INCORRECT_INTERNAL_ARG);

   PROTECT(e1 = stri_prepare_arg_string(e1, "e1")); // prepare string argument
   PROTECT(e2 = stri_prepare_arg_string(e2, "e2")); // prepare string argument

   // call stri__ucol_open after prepare_arg:
   // if prepare_arg had failed, we would have a mem leak
   UCollator* col = NULL;
   col = stri__ucol_open(opts_collator);

   STRI__ERROR_HANDLER_BEGIN(2)

   R_len_t vectorize_length = stri__recycling_rule(true, 2, LENGTH(e1), LENGTH(e2));

   StriContainerUTF8 e1_cont(e1, vectorize_length);
   StriContainerUTF8 e2_cont(e2, vectorize_length);

   SEXP ret;
   STRI__PROTECT(ret = Rf_allocVector(LGLSXP, vectorize_length));
   int* ret_tab = LOGICAL(ret);

   for (R_len_t i = 0; i < vectorize_length; ++i)
   {
      if (e1_cont.isNA(i) || e2_cont.isNA(i)) {
         ret_tab[i] = NA_LOGICAL;
         continue;
      }

      R_len_t     cur1_n = e1_cont.get(i).length();
      const char* cur1_s = e1_cont.get(i).c_str();
      R_len_t     cur2_n = e2_cont.get(i).length();
      const char* cur2_s = e2_cont.get(i).c_str();

      // with collation
      UErrorCode status = U_ZERO_ERROR;
      ret_tab[i] = (_type == (int)ucol_strcollUTF8(col,
         cur1_s, cur1_n, cur2_s, cur2_n, &status
      ));
      STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */})

      if (_negate)
         ret_tab[i] = !ret_tab[i];
   }
int main() {
	UErrorCode status = U_ZERO_ERROR;
	UCollator *coll = ucol_open(0, &status);
	ucol_setStrength(coll, UCOL_PRIMARY);

	for (int i = 0; i < PASSES; ++i) {
		UCollationResult coll_res = ucol_strcollUTF8(coll, INPUT1, -1, INPUT2, -1, &status);
		(void)(coll_res);
	}

	ucol_close(coll);

	return 0;
}
Example #4
0
static void doTestVariant(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result)
{
    int32_t sortklen1, sortklen2, sortklenmax, sortklenmin;
    int temp=0, gSortklen1=0,gSortklen2=0;
    UCollationResult compareResult, compareResulta, keyResult, compareResultIter = result;
    uint8_t *sortKey1, *sortKey2, *sortKey1a, *sortKey2a;
    uint32_t sLen = u_strlen(source);
    uint32_t tLen = u_strlen(target);
    char buffer[256];
    uint32_t len;
    UErrorCode status = U_ZERO_ERROR;
    UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status);

    UCharIterator sIter, tIter;

    compareResult  = ucol_strcoll(myCollation, source, sLen, target, tLen);
    if (compareResult != result) {
        log_err("ucol_strcoll with explicit length returned wrong result (%i exp. %i): %s, %s\n",
            compareResult, result, aescstrdup(source,-1), aescstrdup(target,-1));
    }
    compareResulta = ucol_strcoll(myCollation, source, -1,   target, -1); 
    if (compareResulta != result) {
        log_err("ucol_strcoll with null terminated strings returned wrong result (%i exp. %i): %s, %s\n",
            compareResult, result, aescstrdup(source,-1), aescstrdup(target,-1));
    }

    uiter_setString(&sIter, source, sLen);
    uiter_setString(&tIter, target, tLen);
    compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
    if(compareResultIter != result) {
        log_err("different results in iterative comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
    }

    /* convert the strings to UTF-8 and do try comparing with char iterator and ucol_strcollUTF8 */
    {
        char utf8Source[256], utf8Target[256];
        int32_t utf8SourceLen = 0, utf8TargetLen = 0;

        u_strToUTF8(utf8Source, 256, &utf8SourceLen, source, sLen, &status);
        if(U_FAILURE(status)) { /* probably buffer is not big enough */
            log_verbose("Src UTF-8 buffer too small! Will not compare!\n");
        } else {
            u_strToUTF8(utf8Target, 256, &utf8TargetLen, target, tLen, &status);
            if(U_SUCCESS(status)) {
                {
                    /* ucol_strcollUTF8 */
                    compareResulta = ucol_strcollUTF8(myCollation, utf8Source, utf8SourceLen, utf8Target, utf8TargetLen, &status);
                    if (U_FAILURE(status)) {
                        log_err("Error in ucol_strcollUTF8 with explicit length\n");
                        status = U_ZERO_ERROR;
                    } else if (compareResulta != result) {
                        log_err("ucol_strcollUTF8 with explicit length returned wrong result (%i exp. %i): %s, %s\n",
                            compareResulta, result, aescstrdup(source,-1), aescstrdup(target,-1));
                    }
                    compareResulta = ucol_strcollUTF8(myCollation, utf8Source, -1, utf8Target, -1, &status);
                    if (U_FAILURE(status)) {
                        log_err("Error in ucol_strcollUTF8 with null terminated strings\n");
                        status = U_ZERO_ERROR;
                    } else if (compareResulta != result) {
                        log_err("ucol_strcollUTF8 with null terminated strings returned wrong result (%i exp. %i): %s, %s\n",
                            compareResulta, result, aescstrdup(source,-1), aescstrdup(target,-1));
                    }
                }

                {
                    /* char iterator over UTF8 */
                    UCollationResult compareResultUTF8Iter = result, compareResultUTF8IterNorm = result;

                    uiter_setUTF8(&sIter, utf8Source, utf8SourceLen);
                    uiter_setUTF8(&tIter, utf8Target, utf8TargetLen);
                    compareResultUTF8Iter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);

                    ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
                    sIter.move(&sIter, 0, UITER_START);
                    tIter.move(&tIter, 0, UITER_START);
                    compareResultUTF8IterNorm = ucol_strcollIter(myCollation, &sIter, &tIter, &status);

                    ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
                    if(compareResultUTF8Iter != compareResultIter) {
                        log_err("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
                    }
                    if(compareResultUTF8Iter != compareResultUTF8IterNorm) {
                        log_err("different results in iterative when normalization is turned on with UTF-8 strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
                    }
                }
            } else {
                log_verbose("Target UTF-8 buffer too small! Did not compare!\n");
            }
            if(U_FAILURE(status)) {
                log_verbose("UTF-8 strcoll failed! Ignoring result\n");
            }
        }
    }

    /* testing the partial sortkeys */
    if(1) { /*!QUICK*/
      int32_t i = 0;
      int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
      int32_t partialSizesSize = 1;
      if(getTestOption(QUICK_OPTION) <= 0) {
        partialSizesSize = 7;
      }
      /*log_verbose("partial sortkey test piecesize=");*/
      for(i = 0; i < partialSizesSize; i++) {
        UCollationResult partialSKResult = result, partialNormalizedSKResult = result;
        /*log_verbose("%i ", partialSizes[i]);*/

        partialSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status);
        if(partialSKResult != result) {
          log_err("Partial sortkey comparison returned wrong result (%i exp. %i): %s, %s (size %i)\n", 
            partialSKResult, result,
            aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]);
        }

        if(getTestOption(QUICK_OPTION) <= 0 && norm != UCOL_ON) {
          /*log_verbose("N ");*/
          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
          partialNormalizedSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status);
          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
          if(partialSKResult != partialNormalizedSKResult) {
            log_err("Partial sortkey comparison gets different result when normalization is on: %s, %s (size %i)\n", 
              aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]);
          }
        }
      }
      /*log_verbose("\n");*/
    }

    sortklen1=ucol_getSortKey(myCollation, source, sLen,  NULL, 0);
    sortklen2=ucol_getSortKey(myCollation, target, tLen,  NULL, 0);

    sortklenmax = (sortklen1>sortklen2?sortklen1:sortklen2);
    sortklenmin = (sortklen1<sortklen2?sortklen1:sortklen2);
    (void)sortklenmin;  /* Suppress set but not used warning. */

    sortKey1 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
    sortKey1a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
    ucol_getSortKey(myCollation, source, sLen, sortKey1,  sortklen1+1);
    ucol_getSortKey(myCollation, source, -1,   sortKey1a, sortklen1+1);
    
    sortKey2 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
    sortKey2a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
    ucol_getSortKey(myCollation, target, tLen, sortKey2,  sortklen2+1);
    ucol_getSortKey(myCollation, target, -1,   sortKey2a, sortklen2+1);

    /* Check that sort key generated with null terminated string is identical  */
    /*  to that generted with a length specified.                              */
    if (uprv_strcmp((const char *)sortKey1, (const char *)sortKey1a) != 0 ||
        uprv_strcmp((const char *)sortKey2, (const char *)sortKey2a) != 0 ) {
        log_err("Sort Keys from null terminated and explicit length strings differ.\n");
    }

    /*memcmp(sortKey1, sortKey2,sortklenmax);*/
    temp= uprv_strcmp((const char *)sortKey1, (const char *)sortKey2);
    gSortklen1 = uprv_strlen((const char *)sortKey1)+1;
    gSortklen2 = uprv_strlen((const char *)sortKey2)+1;
    if(sortklen1 != gSortklen1){
        log_err("SortKey length does not match Expected: %i Got: %i\n",sortklen1, gSortklen1);
        log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey1, buffer, &len));
    }
    if(sortklen2!= gSortklen2){
        log_err("SortKey length does not match Expected: %i Got: %i\n", sortklen2, gSortklen2);
        log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey2, buffer, &len));
    }

    if(temp < 0) {
        keyResult=UCOL_LESS;
    }
    else if(temp > 0) {
        keyResult= UCOL_GREATER;
    }
    else {
        keyResult = UCOL_EQUAL;
    }
    reportCResult( source, target, sortKey1, sortKey2, compareResult, keyResult, compareResultIter, result );
    free(sortKey1);
    free(sortKey2);
    free(sortKey1a);
    free(sortKey2a);

}