Esempio n. 1
0
static UBool *
getResultsManually(const char** encodings, int32_t num_encodings,
                   const char *utf8, int32_t length,
                   const USet* excludedCodePoints, const UConverterUnicodeSet whichSet) {
  UBool* resultsManually;
  int32_t i;

  resultsManually = (UBool*) uprv_malloc(gCountAvailable);
  uprv_memset(resultsManually, 0, gCountAvailable);

  for(i = 0 ; i < num_encodings ; i++) {
    UErrorCode status = U_ZERO_ERROR;
    /* get unicode set for that converter */
    USet* set;
    UConverter* test_converter;
    UChar32 cp;
    int32_t encIndex, offset;

    set = uset_openEmpty();
    test_converter = ucnv_open(encodings[i], &status);
    ucnv_getUnicodeSet(test_converter, set,
                       whichSet, &status);
    if (excludedCodePoints != NULL) {
      uset_addAll(set, excludedCodePoints);
    }
    uset_freeze(set);
    offset = 0;
    cp = 0;

    encIndex = findIndex(encodings[i]);
    /*
     * The following is almost, but not entirely, the same as
     * resultsManually[encIndex] =
     *   (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length);
     * They might be different if the set contains strings,
     * or if the utf8 string contains an illegal sequence.
     *
     * The UConverterSelector does not currently handle strings that can be
     * converted, and it treats an illegal sequence as convertible
     * while uset_spanUTF8() treats it like U+FFFD which may not be convertible.
     */
    resultsManually[encIndex] = TRUE;
    while(offset<length) {
      U8_NEXT(utf8, offset, length, cp);
      if (cp >= 0 && !uset_contains(set, cp)) {
        resultsManually[encIndex] = FALSE;
        break;
      }
    }
    uset_close(set);
    ucnv_close(test_converter);
  }
  return resultsManually;
}
Esempio n. 2
0
static void
TestSerialized() {
    uint16_t buffer[1000];
    USerializedSet sset;
    USet *set;
    UErrorCode errorCode;
    UChar32 c;
    int32_t length;

    /* use a pattern that generates both BMP and supplementary code points */
    U_STRING_DECL(pattern, "[:Cf:]", 6);
    U_STRING_INIT(pattern, "[:Cf:]", 6);

    errorCode=U_ZERO_ERROR;
    set=uset_openPattern(pattern, -1, &errorCode);
    if(U_FAILURE(errorCode)) {
        log_data_err("uset_openPattern([:Cf:]) failed - %s (Are you missing data?)\n", u_errorName(errorCode));
        return;
    }

    length=uset_serialize(set, buffer, UPRV_LENGTHOF(buffer), &errorCode);
    if(U_FAILURE(errorCode)) {
        log_err("unable to uset_serialize([:Cf:]) - %s\n", u_errorName(errorCode));
        uset_close(set);
        return;
    }

    uset_getSerializedSet(&sset, buffer, length);
    for(c=0; c<=0x10ffff; ++c) {
        if(uset_contains(set, c)!=uset_serializedContains(&sset, c)) {
            log_err("uset_contains(U+%04x)!=uset_serializedContains(U+%04x)\n", c);
            break;
        }
    }

    uset_close(set);
}
Esempio n. 3
0
U_CDECL_BEGIN
static UBool U_CALLCONV
_processSpecials(const void *context, UChar32 start, UChar32 limit, uint32_t CE)
{
    UErrorCode *status = ((contContext *)context)->status;
    USet *expansions = ((contContext *)context)->expansions;
    USet *removed = ((contContext *)context)->removedContractions;
    UBool addPrefixes = ((contContext *)context)->addPrefixes;
    UChar contraction[internalBufferSize];
    if(isSpecial(CE)) {
      if(((getCETag(CE) == SPEC_PROC_TAG && addPrefixes) || getCETag(CE) == CONTRACTION_TAG)) {
        while(start < limit && U_SUCCESS(*status)) {
            // if there are suppressed contractions, we don't
            // want to add them.
            if(removed && uset_contains(removed, start)) {
                start++;
                continue;
            }
            // we start our contraction from middle, since we don't know if it
            // will grow toward right or left
            contraction[internalBufferSize/2] = (UChar)start;
            addSpecial(((contContext *)context), contraction, internalBufferSize, CE, internalBufferSize/2, internalBufferSize/2+1, status);
            start++;
        }
      } else if(expansions && getCETag(CE) == EXPANSION_TAG) {
        while(start < limit && U_SUCCESS(*status)) {
          uset_add(expansions, start++);
        }
      }
    }
    if(U_FAILURE(*status)) {
        return FALSE;
    } else {
        return TRUE;
    }
}
Esempio n. 4
0
bool isCharacterSmartReplaceExempt(UChar32 c, bool isPreviousCharacter)
{
    return uset_contains(getSmartSet(isPreviousCharacter), c);
}
Esempio n. 5
0
static void expectContainment(const USet* set,
                              const char* list,
                              UBool isIn) {
    const char* p = list;
    UChar ustr[4096];
    char *pat;
    UErrorCode ec;
    int32_t rangeStart = -1, rangeEnd = -1, length;
            
    ec = U_ZERO_ERROR;
    length = uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec);
    if(U_FAILURE(ec)) {
        log_err("FAIL: uset_toPattern() fails in expectContainment() - %s\n", u_errorName(ec));
        return;
    }
    pat=aescstrdup(ustr, length);

    while (*p) {
        if (*p=='{') {
            const char* stringStart = ++p;
            int32_t stringLength = 0;
            char strCopy[64];

            while (*p++ != '}') {
            }
            stringLength = (int32_t)(p - stringStart - 1);
            strncpy(strCopy, stringStart, stringLength);
            strCopy[stringLength] = 0;

            u_charsToUChars(stringStart, ustr, stringLength);
            
            if (uset_containsString(set, ustr, stringLength) == isIn) {
                log_verbose("Ok: %s %s \"%s\"\n", pat,
                            (isIn ? "contains" : "does not contain"),
                            strCopy);
            } else {
                log_data_err("FAIL: %s %s \"%s\" (Are you missing data?)\n", pat,
                        (isIn ? "does not contain" : "contains"),
                        strCopy);
            }
        }

        else {
            UChar32 c;

            u_charsToUChars(p, ustr, 1);
            c = ustr[0];

            if (uset_contains(set, c) == isIn) {
                log_verbose("Ok: %s %s '%c'\n", pat,
                            (isIn ? "contains" : "does not contain"),
                            *p);
            } else {
                log_data_err("FAIL: %s %s '%c' (Are you missing data?)\n", pat,
                        (isIn ? "does not contain" : "contains"),
                        *p);
            }

            /* Test the range API too by looking for ranges */
            if (c == rangeEnd+1) {
                rangeEnd = c;
            } else {
                if (rangeStart >= 0) {
                    if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) {
                        log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat,
                                    (isIn ? "contains" : "does not contain"),
                                    rangeStart, rangeEnd);
                    } else {
                        log_data_err("FAIL: %s %s U+%04X-U+%04X (Are you missing data?)\n", pat,
                                (isIn ? "does not contain" : "contains"),
                                rangeStart, rangeEnd);
                    }
                }
                rangeStart = rangeEnd = c;
            }

            ++p;
        }
    }

    if (rangeStart >= 0) {
        if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) {
            log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat,
                        (isIn ? "contains" : "does not contain"),
                        rangeStart, rangeEnd);
        } else {
            log_data_err("FAIL: %s %s U+%04X-U+%04X (Are you missing data?)\n", pat,
                    (isIn ? "does not contain" : "contains"),
                    rangeStart, rangeEnd);
        }
    }
}
Esempio n. 6
0
static int32_t
u_scanf_scanset_handler(UFILE       *input,
                        u_scanf_spec_info *info,
                        ufmt_args   *args,
                        const UChar *fmt,
                        int32_t     *fmtConsumed,
                        int32_t     *argConverted)
{
    USet        *scanset;
    UErrorCode  status = U_ZERO_ERROR;
    int32_t     chLeft = INT32_MAX;
    UChar32     c;
    UChar       *alias = (UChar*) (args[0].ptrValue);
    UBool       isNotEOF = FALSE;
    UBool       readCharacter = FALSE;

    /* Create an empty set */
    scanset = uset_open(0, -1);

    /* Back up one to get the [ */
    fmt--;

    /* truncate to the width, if specified and alias the target */
    if(info->fWidth >= 0) {
        chLeft = info->fWidth;
    }

    /* parse the scanset from the fmt string */
    *fmtConsumed = uset_applyPattern(scanset, fmt, -1, 0, &status);

    /* verify that the parse was successful */
    if (U_SUCCESS(status)) {
        c=0;

        /* grab characters one at a time and make sure they are in the scanset */
        while(chLeft > 0) {
            if ((isNotEOF = ufile_getch32(input, &c)) && uset_contains(scanset, c)) {
                readCharacter = TRUE;
                if (!info->fSkipArg) {
                    int32_t idx = 0;
                    UBool isError = FALSE;

                    U16_APPEND(alias, idx, chLeft, c, isError);
                    if (isError) {
                        break;
                    }
                    alias += idx;
                }
                chLeft -= (1 + U_IS_SUPPLEMENTARY(c));
            }
            else {
                /* if the character's not in the scanset, break out */
                break;
            }
        }

        /* put the final character we read back on the input */
        if(isNotEOF && chLeft > 0) {
            u_fungetc(c, input);
        }
    }

    uset_close(scanset);

    /* if we didn't match at least 1 character, fail */
    if(!readCharacter)
        return -1;
    /* otherwise, add the terminator */
    else if (!info->fSkipArg) {
        *alias = 0x00;
    }

    /* we converted 1 arg */
    *argConverted = !info->fSkipArg;
    return (info->fWidth >= 0 ? info->fWidth : INT32_MAX) - chLeft;
}