static void
addClosureMapping(UChar32 src, UChar32 dest) {
    uint32_t value;

    if(beVerbose) {
        printf("add closure mapping U+%04lx->U+%04lx\n",
                (unsigned long)src, (unsigned long)dest);
    }

    value=upvec_getValue(pv, src, 0);
    if(value&UCASE_EXCEPTION) {
        Props *p=excProps+(value>>UGENCASE_EXC_SHIFT);
        int32_t i;

        /* append dest to src's closure array */
        for(i=0;; ++i) {
            if(i==LENGTHOF(p->closure)) {
                fprintf(stderr, "closure[] overflow for U+%04lx->U+%04lx\n",
                                (unsigned long)src, (unsigned long)dest);
                exit(U_BUFFER_OVERFLOW_ERROR);
            } else if(p->closure[i]==dest) {
                break; /* do not store duplicates */
            } else if(p->closure[i]==0) {
                p->closure[i]=dest;
                break;
            }
        }
    } else {
Exemple #2
0
/* Improve code coverage of UPropsVectors */
static void TestUPropsVector() {
    UErrorCode errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    UPropsVectors *pv = upvec_open(100, &errorCode);
    if (pv != NULL) {
        log_err("Should have returned NULL if UErrorCode is an error.");
        return;
    }
    errorCode = U_ZERO_ERROR;
    pv = upvec_open(-1, &errorCode);
    if (pv != NULL || U_SUCCESS(errorCode)) {
        log_err("Should have returned NULL if column is less than 0.\n");
        return;
    }
    errorCode = U_ZERO_ERROR;
    pv = upvec_open(100, &errorCode);
    if (pv == NULL || U_FAILURE(errorCode)) {
        log_err("Unable to open UPropsVectors.\n");
        return;
    }

    if (upvec_getValue(pv, 0, 1) != 0) {
        log_err("upvec_getValue should return 0.\n");
    }
    if (upvec_getRow(pv, 0, NULL, NULL) == NULL) {
        log_err("upvec_getRow should not return NULL.\n");
    }
    if (upvec_getArray(pv, NULL, NULL) != NULL) {
        log_err("upvec_getArray should return NULL.\n");
    }

    upvec_close(pv);
}
extern void
setProps(Props *p) {
    UErrorCode errorCode;
    uint32_t value, oldValue;
    int32_t delta;
    UBool isCaseIgnorable;

    /* get the non-UnicodeData.txt properties */
    value=oldValue=upvec_getValue(pv, p->code, 0);

    /* default: map to self */
    delta=0;

    if(p->gc==U_TITLECASE_LETTER) {
        /* the Titlecase property is read late, from UnicodeData.txt */
        value|=UCASE_TITLE;
    }

    if(p->upperCase!=0) {
        /* uppercase mapping as delta if the character is lowercase */
        if((value&UCASE_TYPE_MASK)==UCASE_LOWER) {
            delta=p->upperCase-p->code;
        } else {
            value|=UCASE_EXCEPTION;
        }
    }
    if(p->lowerCase!=0) {
        /* lowercase mapping as delta if the character is uppercase or titlecase */
        if((value&UCASE_TYPE_MASK)>=UCASE_UPPER) {
            delta=p->lowerCase-p->code;
        } else {
            value|=UCASE_EXCEPTION;
        }
    }
    if(p->upperCase!=p->titleCase) {
        value|=UCASE_EXCEPTION;
    }
    if(p->closure[0]!=0) {
        value|=UCASE_EXCEPTION;
    }
    if(p->specialCasing!=NULL) {
        value|=UCASE_EXCEPTION;
    }
    if(p->caseFolding!=NULL) {
        value|=UCASE_EXCEPTION;
    }

    if(delta<UCASE_MIN_DELTA || UCASE_MAX_DELTA<delta) {
        value|=UCASE_EXCEPTION;
    }

    if(p->cc!=0) {
        if(value&UCASE_DOT_MASK) {
            fprintf(stderr, "gencase: a soft-dotted character has cc!=0\n");
            exit(U_INTERNAL_PROGRAM_ERROR);
        }
        if(p->cc==230) {
            value|=UCASE_ABOVE;
        } else {
            value|=UCASE_OTHER_ACCENT;
        }
    }

    /* encode case-ignorable as delta==1 on uncased characters */
    isCaseIgnorable=FALSE;
    if((value&UCASE_TYPE_MASK)==UCASE_NONE) {
        if(ucdVersion>=UNI_4_1) {
            /*
             * Unicode 4.1 and up: (D47a) Word_Break=MidLetter or Mn, Me, Cf, Lm, Sk
             * Unicode 5.1 and up: Word_Break=(MidLetter or MidNumLet) or Mn, Me, Cf, Lm, Sk
             *   The UGENCASE_IS_MID_LETTER_SHIFT bit is set for both WB=MidLetter and WB=MidNumLet.
             */
            if(
                (U_MASK(p->gc)&(U_GC_MN_MASK|U_GC_ME_MASK|U_GC_CF_MASK|U_GC_LM_MASK|U_GC_SK_MASK))!=0 ||
                (upvec_getValue(pv, p->code, 1)&U_MASK(UGENCASE_IS_MID_LETTER_SHIFT))!=0
            ) {
                isCaseIgnorable=TRUE;
            }
        } else {
            /* before Unicode 4.1: Mn, Me, Cf, Lm, Sk or 0027 or 00AD or 2019 */
            if(
                (U_MASK(p->gc)&(U_GC_MN_MASK|U_GC_ME_MASK|U_GC_CF_MASK|U_GC_LM_MASK|U_GC_SK_MASK))!=0 ||
                p->code==0x27 || p->code==0xad || p->code==0x2019
            ) {
                isCaseIgnorable=TRUE;
            }
        }
    }

    if(isCaseIgnorable && p->code!=0x307) {
        /*
         * We use one of the delta/exception bits, which works because we only
         * store the case-ignorable flag for uncased characters.
         * There is no delta for uncased characters (see checks above).
         * If there is an exception for an uncased, case-ignorable character
         * (although there should not be any case mappings if it's uncased)
         * then we have a problem.
         * There is one character which is case-ignorable but has an exception:
         * U+0307 is uncased, Mn, has conditional special casing and
         * is therefore handled in code instead.
         */
        if(value&UCASE_EXCEPTION) {
            fprintf(stderr, "gencase error: unable to encode case-ignorable for U+%04lx with exceptions\n",
                            (unsigned long)p->code);
            exit(U_INTERNAL_PROGRAM_ERROR);
        }

        delta=1;
    }

    /* handle exceptions */
    if(value&UCASE_EXCEPTION) {
        /* simply store exceptions for later processing and encoding */
        value|=(uint32_t)exceptionsCount<<UGENCASE_EXC_SHIFT;
        uprv_memcpy(excProps+exceptionsCount, p, sizeof(*p));
        if(++exceptionsCount==MAX_EXC_COUNT) {
            fprintf(stderr, "gencase: too many exceptions\n");
            exit(U_INDEX_OUTOFBOUNDS_ERROR);
        }
    } else {
        /* store the simple case mapping delta */
        value|=((uint32_t)delta<<UCASE_DELTA_SHIFT)&UCASE_DELTA_MASK;
    }

    errorCode=U_ZERO_ERROR;
    if(value!=oldValue) {
        upvec_setValue(pv, p->code, p->code, 0, value, 0xffffffff, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "gencase error: unable to set case mapping values, code: %s\n",
                            u_errorName(errorCode));
            exit(errorCode);
        }
    }

    /* add the multi-character case folding to the "unfold" data */
    if(p->caseFolding!=NULL) {
        int32_t length=p->caseFolding->full[0];
        if(length>1 && u_strHasMoreChar32Than(p->caseFolding->full+1, length, 1)) {
            addUnfolding(p->code, p->caseFolding->full+1, length);
        }
    }
}