Example #1
0
static UBool *
getResultsManually(const char** encodings, int32_t num_encodings,
                   const char *utf8, int32_t length,
                   const USet* excludedCodePoints, const UConverterUnicodeSet whichSet) {
  UBool* resultsManually;
  int32_t i;

  resultsManually = (UBool*) uprv_malloc(gCountAvailable);
  uprv_memset(resultsManually, 0, gCountAvailable);

  for(i = 0 ; i < num_encodings ; i++) {
    UErrorCode status = U_ZERO_ERROR;
    /* get unicode set for that converter */
    USet* set;
    UConverter* test_converter;
    UChar32 cp;
    int32_t encIndex, offset;

    set = uset_openEmpty();
    test_converter = ucnv_open(encodings[i], &status);
    ucnv_getUnicodeSet(test_converter, set,
                       whichSet, &status);
    if (excludedCodePoints != NULL) {
      uset_addAll(set, excludedCodePoints);
    }
    uset_freeze(set);
    offset = 0;
    cp = 0;

    encIndex = findIndex(encodings[i]);
    /*
     * The following is almost, but not entirely, the same as
     * resultsManually[encIndex] =
     *   (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length);
     * They might be different if the set contains strings,
     * or if the utf8 string contains an illegal sequence.
     *
     * The UConverterSelector does not currently handle strings that can be
     * converted, and it treats an illegal sequence as convertible
     * while uset_spanUTF8() treats it like U+FFFD which may not be convertible.
     */
    resultsManually[encIndex] = TRUE;
    while(offset<length) {
      U8_NEXT(utf8, offset, length, cp);
      if (cp >= 0 && !uset_contains(set, cp)) {
        resultsManually[encIndex] = FALSE;
        break;
      }
    }
    uset_close(set);
    ucnv_close(test_converter);
  }
  return resultsManually;
}
Example #2
0
File: utr.c Project: julp/ugrep
// pattern (ustr) is not expected to be a class here (no square brackets here)
USet *create_set_from_ustring(UString *ustr, UBool negate, error_t **UNUSED(error))
{
    USet *uset;

    uset = uset_openEmpty();
    uset_addAllCodePoints(uset, ustr->ptr, ustr->len);
    if (negate) {
        uset_complement(uset);
    }
    uset_freeze(uset);

    return uset;
}
Example #3
0
static void TestSpan() {
    static const UChar s16[2]={ 0xe01, 0x3000 };
    static const char* s8="\xE0\xB8\x81\xE3\x80\x80";

    USet *idSet=openIDSet();

    if (idSet == NULL) {
        log_data_err("openIDSet() returned NULL (Are you missing data?)\n");
        return;
    }

    if(
        1!=uset_span(idSet, s16, 2, USET_SPAN_CONTAINED) ||
        0!=uset_span(idSet, s16, 2, USET_SPAN_NOT_CONTAINED) ||
        2!=uset_spanBack(idSet, s16, 2, USET_SPAN_CONTAINED) ||
        1!=uset_spanBack(idSet, s16, 2, USET_SPAN_NOT_CONTAINED)
    ) {
        log_err("uset_span() or uset_spanBack() does not work\n");
    }

    if(
        3!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
        0!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED) ||
        6!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
        3!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED)
    ) {
        log_err("uset_spanUTF8() or uset_spanBackUTF8() does not work\n");
    }

    uset_freeze(idSet);

    if(
        1!=uset_span(idSet, s16, 2, USET_SPAN_CONTAINED) ||
        0!=uset_span(idSet, s16, 2, USET_SPAN_NOT_CONTAINED) ||
        2!=uset_spanBack(idSet, s16, 2, USET_SPAN_CONTAINED) ||
        1!=uset_spanBack(idSet, s16, 2, USET_SPAN_NOT_CONTAINED)
    ) {
        log_err("uset_span(frozen) or uset_spanBack(frozen) does not work\n");
    }

    if(
        3!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
        0!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED) ||
        6!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
        3!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED)
    ) {
        log_err("uset_spanUTF8(frozen) or uset_spanBackUTF8(frozen) does not work\n");
    }

    uset_close(idSet);
}
Example #4
0
static void TestFreezable() {
    USet *idSet;
    USet *frozen;
    USet *thawed;

    idSet=openIDSet();

    if (idSet == NULL) {
        log_data_err("openIDSet() returned NULL. (Are you missing data?)\n");
        uset_close(idSet);
        return;
    }

    frozen=uset_clone(idSet);

    if (frozen == NULL) {
        log_err("uset_Clone() returned NULL\n");
        return;
    }

    if(!uset_equals(frozen, idSet)) {
        log_err("uset_clone() did not make an equal copy\n");
    }

    uset_freeze(frozen);
    uset_addRange(frozen, 0xd802, 0xd805);

    if(uset_isFrozen(idSet) || !uset_isFrozen(frozen) || !uset_equals(frozen, idSet)) {
        log_err("uset_freeze() or uset_isFrozen() does not work\n");
    }

    thawed=uset_cloneAsThawed(frozen);

    if (thawed == NULL) {
        log_err("uset_cloneAsThawed(frozen) returned NULL");
        uset_close(frozen);
        uset_close(idSet);
        return;
    }

    uset_addRange(thawed, 0xd802, 0xd805);

    if(uset_isFrozen(thawed) || uset_equals(thawed, idSet) || !uset_containsRange(thawed, 0xd802, 0xd805)) {
        log_err("uset_cloneAsThawed() does not work\n");
    }

    uset_close(idSet);
    uset_close(frozen);
    uset_close(thawed);
}
Example #5
0
File: utr.c Project: julp/ugrep
// cpattern is expected to be a class (= surrounded by square brackets)
USet *create_set_from_argv(const char *cpattern, UBool negate, error_t **error)
{
    USet *uset;
    UString *ustr;
    UErrorCode status;

    status = U_ZERO_ERROR;
    if (NULL == (ustr = ustring_convert_argv_from_local(cpattern, error, TRUE))) {
        return NULL;
    }
    uset = uset_openPattern(ustr->ptr, ustr->len, &status);
    if (U_FAILURE(status)) {
        ustring_destroy(ustr);
        icu_error_set(error, FATAL, status, "uset_openPattern");
        return NULL;
    }
    ustring_destroy(ustr);
    if (negate) {
        uset_complement(uset);
    }
    uset_freeze(uset);

    return uset;
}