Exemplo n.º 1
0
/* read a range like start or start..end */
U_CAPI int32_t U_EXPORT2
u_parseCodePointRangeAnyTerminator(const char *s,
                                   uint32_t *pStart, uint32_t *pEnd,
                                   const char **terminator,
                                   UErrorCode *pErrorCode) {
    char *end;
    uint32_t value;

    if(U_FAILURE(*pErrorCode)) {
        return 0;
    }
    if(s==NULL || pStart==NULL || pEnd==NULL) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    /* read the start code point */
    s=u_skipWhitespace(s);
    value=(uint32_t)uprv_strtoul(s, &end, 16);
    if(end<=s || value>=0x110000) {
        *pErrorCode=U_PARSE_ERROR;
        return 0;
    }
    *pStart=*pEnd=value;

    /* is there a "..end"? */
    s=u_skipWhitespace(end);
    if(*s!='.' || s[1]!='.') {
        *terminator=end;
        return 1;
    }
    s=u_skipWhitespace(s+2);

    /* read the end code point */
    value=(uint32_t)uprv_strtoul(s, &end, 16);
    if(end<=s || value>=0x110000) {
        *pErrorCode=U_PARSE_ERROR;
        return 0;
    }
    *pEnd=value;

    /* is this a valid range? */
    if(value<*pStart) {
        *pErrorCode=U_PARSE_ERROR;
        return 0;
    }

    *terminator=end;
    return value-*pStart+1;
}
Exemplo n.º 2
0
static void U_CALLCONV
ageLineFn(void *context,
          char *fields[][2], int32_t fieldCount,
          UErrorCode *pErrorCode) {
    char *s, *numberLimit;
    uint32_t value, start, end, version;

    u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
        fprintf(stderr, "genprops: syntax error in DerivedAge.txt field 0 at %s\n", fields[0][0]);
        exit(*pErrorCode);
    }

    /* ignore "unassigned" (the default is already set to 0.0) */
    s=(char *)u_skipWhitespace(fields[1][0]);
    if(0==uprv_strncmp(s, "unassigned", 10)) {
        return;
    }

    /* parse version number */
    value=(uint32_t)uprv_strtoul(s, &numberLimit, 10);
    if(s==numberLimit || value==0 || value>15 || (*numberLimit!='.' && *numberLimit!=' ' && *numberLimit!='\t' && *numberLimit!=0)) {
        fprintf(stderr, "genprops: syntax error in DerivedAge.txt field 1 at %s\n", fields[1][0]);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }
    version=value<<4;

    /* parse minor version number */
    if(*numberLimit=='.') {
        s=(char *)u_skipWhitespace(numberLimit+1);
        value=(uint32_t)uprv_strtoul(s, &numberLimit, 10);
        if(s==numberLimit || value>15 || (*numberLimit!=' ' && *numberLimit!='\t' && *numberLimit!=0)) {
            fprintf(stderr, "genprops: syntax error in DerivedAge.txt field 1 at %s\n", fields[1][0]);
            *pErrorCode=U_PARSE_ERROR;
            exit(U_PARSE_ERROR);
        }
        version|=value;
    }

    if(start==0 && end==0x10ffff) {
        /* Also set bits for initialValue and errorValue. */
        end=UPVEC_MAX_CP;
    }
    upvec_setValue(pv, start, end, 0, version<<UPROPS_AGE_SHIFT, UPROPS_AGE_MASK, pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
        fprintf(stderr, "genprops error: unable to set character age: %s\n", u_errorName(*pErrorCode));
        exit(*pErrorCode);
    }
}
Exemplo n.º 3
0
U_CAPI int8_t U_EXPORT2
ucm_parseBytes(uint8_t bytes[UCNV_EXT_MAX_BYTES], const char *line, const char **ps) {
    const char *s=*ps;
    char *end;
    uint8_t byte;
    int8_t bLen;

    bLen=0;
    for(;;) {
        /* skip an optional plus sign */
        if(bLen>0 && *s=='+') {
            ++s;
        }
        if(*s!='\\') {
            break;
        }

        if( s[1]!='x' ||
            (byte=(uint8_t)uprv_strtoul(s+2, &end, 16), end)!=s+4
        ) {
            fprintf(stderr, "ucm error: byte must be formatted as \\xXX (2 hex digits) - \"%s\"\n", line);
            return -1;
        }

        if(bLen==UCNV_EXT_MAX_BYTES) {
            fprintf(stderr, "ucm error: too many bytes on \"%s\"\n", line);
            return -1;
        }
        bytes[bLen++]=byte;
        s=end;
    }

    *ps=s;
    return bLen;
}
Exemplo n.º 4
0
U_CAPI int32_t U_EXPORT2
T_CString_stringToInteger(const char *integerString, int32_t radix)
{
    char *end;
    return uprv_strtoul(integerString, &end, radix);

}
Exemplo n.º 5
0
static void U_CALLCONV
unicodeDataLineFn(void *context,
                  char *fields[][2], int32_t fieldCount,
                  UErrorCode *pErrorCode) {
    char *end;
    UErrorCode errorCode;
    UChar32 c;

    errorCode=U_ZERO_ERROR;

    /* get the character code, field 0 */
    c=(UChar32)uprv_strtoul(fields[0][0], &end, 16);
    if(end<=fields[0][0] || end!=fields[0][1]) {
        fprintf(stderr, "genbidi: syntax error in field 0 at %s\n", fields[0][0]);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }

    /* get Mirrored flag, field 9 */
    if(*fields[9][0]=='Y') {
        upvec_setValue(pv, c, c, 0, U_MASK(UBIDI_IS_MIRRORED_SHIFT), U_MASK(UBIDI_IS_MIRRORED_SHIFT), &errorCode);
        if(U_FAILURE(*pErrorCode)) {
            fprintf(stderr, "genbidi error: unable to set 'is mirrored' for U+%04lx, code: %s\n",
                            (long)c, u_errorName(errorCode));
            exit(errorCode);
        }
    } else if(fields[9][1]-fields[9][0]!=1 || *fields[9][0]!='N') {
        fprintf(stderr, "genbidi: syntax error in field 9 at U+%04lx\n",
            (long)c);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }
}
Exemplo n.º 6
0
/*
 * parse a list of code points
 * store them as a string in dest[destCapacity]
 * set the first code point in *pFirst
 * @return The length of the string in numbers of UChars.
 */
U_CAPI int32_t U_EXPORT2
u_parseString(const char *s,
              UChar *dest, int32_t destCapacity,
              uint32_t *pFirst,
              UErrorCode *pErrorCode) {
    char *end;
    uint32_t value;
    int32_t destLength;

    if(U_FAILURE(*pErrorCode)) {
        return 0;
    }
    if(s==NULL || destCapacity<0 || (destCapacity>0 && dest==NULL)) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    if(pFirst!=NULL) {
        *pFirst=0xffffffff;
    }

    destLength=0;
    for(;;) {
        s=u_skipWhitespace(s);
        if(*s==';' || *s==0) {
            if(destLength<destCapacity) {
                dest[destLength]=0;
            } else if(destLength==destCapacity) {
                *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;
            } else {
                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
            }
            return destLength;
        }

        /* read one code point */
        value=(uint32_t)uprv_strtoul(s, &end, 16);
        if(end<=s || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || value>=0x110000) {
            *pErrorCode=U_PARSE_ERROR;
            return 0;
        }

        /* store the first code point */
        if(pFirst!=NULL) {
            *pFirst=value;
            pFirst=NULL;
        }

        /* append it to the destination array */
        if((destLength+U16_LENGTH(value))<=destCapacity) {
            U16_APPEND_UNSAFE(dest, destLength, value);
        } else {
            destLength+=U16_LENGTH(value);
        }

        /* go to the following characters */
        s=end;
    }
}
Exemplo n.º 7
0
U_CDECL_BEGIN

static void U_CALLCONV
strprepProfileLineFn(void * /*context*/,
              char *fields[][2], int32_t fieldCount,
              UErrorCode *pErrorCode) {
    uint32_t mapping[40];
    char *end, *map;
    uint32_t code;
    int32_t length;
   /*UBool* mapWithNorm = (UBool*) context;*/
    const char* typeName;
    uint32_t rangeStart=0,rangeEnd =0;
    const char *s;

    s = u_skipWhitespace(fields[0][0]);
    if (*s == '@') {
        /* a special directive introduced in 4.2 */
        return;
    }

    if(fieldCount != 3){
        *pErrorCode = U_INVALID_FORMAT_ERROR;
        return;
    }

    typeName = fields[2][0];
    map = fields[1][0];
   
    if(uprv_strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){

        u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode);

        /* store the range */
        compareFlagsForRange(rangeStart,rangeEnd,USPREP_UNASSIGNED);

    }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){

        u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode);

        /* store the range */
        compareFlagsForRange(rangeStart,rangeEnd,USPREP_PROHIBITED);

    }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){
        /* get the character code, field 0 */
        code=(uint32_t)uprv_strtoul(s, &end, 16);

        /* parse the mapping string */
        length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode);
        
        /* store the mapping */
        compareMapping(code,mapping, length,USPREP_MAP);

    }else{
        *pErrorCode = U_INVALID_FORMAT_ERROR;
    }

}
Exemplo n.º 8
0
static void U_CALLCONV
nameAliasesLineFn(void *context,
       char *fields[][2], int32_t fieldCount,
       UErrorCode *pErrorCode) {
    char *name;
    int16_t length=0;
    static uint32_t prevCode=0;
    uint32_t code=0;

    if(U_FAILURE(*pErrorCode)) {
        return;
    }
    /* get the character code */
    code=uprv_strtoul(fields[0][0], NULL, 16);

    /* get the character name */
    name=fields[1][0];
    length=getName(&name, fields[1][1]);
    if(length==0 || length>=sizeof(cpNameAliases[cpNameAliasesTop].nameAlias)) {
        fprintf(stderr, "gennames: error - name alias %s empty or too long for code point U+%04lx\n",
                name, (unsigned long)code);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }

    /* check for non-character code points */
    if(!U_IS_UNICODE_CHAR(code)) {
        fprintf(stderr, "gennames: error - name alias for non-character code point U+%04lx\n",
                (unsigned long)code);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }

    /* check that the code points (code) are in ascending order */
    if(code<=prevCode && code>0) {
        fprintf(stderr, "gennames: error - NameAliases entries out of order, U+%04lx after U+%04lx\n",
                (unsigned long)code, (unsigned long)prevCode);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }
    prevCode=code;

    if(cpNameAliasesTop>=LENGTHOF(cpNameAliases)) {
        fprintf(stderr, "gennames: error - too many name aliases\n");
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }
    cpNameAliases[cpNameAliasesTop].code=code;
    uprv_memcpy(cpNameAliases[cpNameAliasesTop].nameAlias, name, length);
    cpNameAliases[cpNameAliasesTop].nameAlias[length]=0;
    ++cpNameAliasesTop;

    parseName(name, length);
}
Exemplo n.º 9
0
UChar32
PreparsedUCD::parseCodePoint(const char *s, UErrorCode &errorCode) {
    char *end;
    uint32_t value=(uint32_t)uprv_strtoul(s, &end, 16);
    if(end<=s || *end!=0 || value>=0x110000) {
        fprintf(stderr,
                "error in preparsed UCD: '%s' is not a valid code point on line %ld\n",
                s, (long)lineNumber);
        errorCode=U_PARSE_ERROR;
        return U_SENTINEL;
    }
    return (UChar32)value;
}
Exemplo n.º 10
0
static void U_CALLCONV
specialCasingLineFn(void *context,
                    char *fields[][2], int32_t fieldCount,
                    UErrorCode *pErrorCode) {
    char *end;

    /* get code point */
    specialCasings[specialCasingCount].code=(UChar32)uprv_strtoul(u_skipWhitespace(fields[0][0]), &end, 16);
    end=(char *)u_skipWhitespace(end);
    if(end<=fields[0][0] || end!=fields[0][1]) {
        fprintf(stderr, "gencase: syntax error in SpecialCasing.txt field 0 at %s\n", fields[0][0]);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }

    /* is this a complex mapping? */
    if(*(end=(char *)u_skipWhitespace(fields[4][0]))!=0 && *end!=';' && *end!='#') {
        /* there is some condition text in the fifth field */
        specialCasings[specialCasingCount].isComplex=TRUE;

        /* do not store any actual mappings for this */
        specialCasings[specialCasingCount].lowerCase[0]=0;
        specialCasings[specialCasingCount].upperCase[0]=0;
        specialCasings[specialCasingCount].titleCase[0]=0;
    } else {
        /* just set the "complex" flag and get the case mappings */
        specialCasings[specialCasingCount].isComplex=FALSE;
        specialCasings[specialCasingCount].lowerCase[0]=
            (UChar)u_parseString(fields[1][0], specialCasings[specialCasingCount].lowerCase+1, 31, NULL, pErrorCode);
        specialCasings[specialCasingCount].upperCase[0]=
            (UChar)u_parseString(fields[3][0], specialCasings[specialCasingCount].upperCase+1, 31, NULL, pErrorCode);
        specialCasings[specialCasingCount].titleCase[0]=
            (UChar)u_parseString(fields[2][0], specialCasings[specialCasingCount].titleCase+1, 31, NULL, pErrorCode);
        if(U_FAILURE(*pErrorCode)) {
            fprintf(stderr, "gencase: error parsing special casing at %s\n", fields[0][0]);
            exit(*pErrorCode);
        }

        uset_add(caseSensitive, (UChar32)specialCasings[specialCasingCount].code);
        _set_addAll(caseSensitive, specialCasings[specialCasingCount].lowerCase+1, specialCasings[specialCasingCount].lowerCase[0]);
        _set_addAll(caseSensitive, specialCasings[specialCasingCount].upperCase+1, specialCasings[specialCasingCount].upperCase[0]);
        _set_addAll(caseSensitive, specialCasings[specialCasingCount].titleCase+1, specialCasings[specialCasingCount].titleCase[0]);
    }

    if(++specialCasingCount==MAX_SPECIAL_CASING_COUNT) {
        fprintf(stderr, "gencase: too many special casing mappings\n");
        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
        exit(U_INDEX_OUTOFBOUNDS_ERROR);
    }
}
Exemplo n.º 11
0
/**
 * Split a string into pieces based on the given delimiter
 * character.  Then, parse the resultant fields from hex into
 * characters.  That is, "0040 0400;0C00;0899" -> new String[] {
 * "\u0040\u0400", "\u0C00", "\u0899" }.  The output is assumed to
 * be of the proper length already, and exactly output.length
 * fields are parsed.  If there are too few an exception is
 * thrown.  If there are too many the extras are ignored.
 *
 * @return FALSE upon failure
 */
UBool NormalizerConformanceTest::hexsplit(const char *s, char delimiter,
                                          UnicodeString output[], int32_t outputLength) {
    const char *t = s;
    char *end = NULL;
    UChar32 c;
    int32_t i;
    for (i=0; i<outputLength; ++i) {
        // skip whitespace
        while(*t == ' ' || *t == '\t') {
            ++t;
        }

        // read a sequence of code points
        output[i].remove();
        for(;;) {
            c = (UChar32)uprv_strtoul(t, &end, 16);

            if( (char *)t == end ||
                (uint32_t)c > 0x10ffff ||
                (*end != ' ' && *end != '\t' && *end != delimiter)
            ) {
                errln(UnicodeString("Bad field ", "") + (i + 1) + " in " + UnicodeString(s, ""));
                return FALSE;
            }

            output[i].append(c);

            t = (const char *)end;

            // skip whitespace
            while(*t == ' ' || *t == '\t') {
                ++t;
            }

            if(*t == delimiter) {
                ++t;
                break;
            }
            if(*t == 0) {
                if((i + 1) == outputLength) {
                    return TRUE;
                } else {
                    errln(UnicodeString("Missing field(s) in ", "") + s + " only " + (i + 1) + " out of " + outputLength);
                    return FALSE;
                }
            }
        }
    }
    return TRUE;
}
Exemplo n.º 12
0
static void U_CALLCONV
mirrorLineFn(void *context,
             char *fields[][2], int32_t fieldCount,
             UErrorCode *pErrorCode) {
    char *end;
    UChar32 src, mirror;

    src=(UChar32)uprv_strtoul(fields[0][0], &end, 16);
    if(end<=fields[0][0] || end!=fields[0][1]) {
        fprintf(stderr, "genbidi: syntax error in BidiMirroring.txt field 0 at %s\n", fields[0][0]);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }

    mirror=(UChar32)uprv_strtoul(fields[1][0], &end, 16);
    if(end<=fields[1][0] || end!=fields[1][1]) {
        fprintf(stderr, "genbidi: syntax error in BidiMirroring.txt field 1 at %s\n", fields[1][0]);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }

    addMirror(src, mirror);
}
Exemplo n.º 13
0
 // sets the desired transformation data.
 // should be populated from a command line argument
 // so far the only acceptable format is offset-<hex constant>
 // eventually others (mask-<hex constant>?) may be enabled
 // more complex functions may be more difficult
 void setTransform(const char *t) {
     if (strncmp(t, "offset-", 7) == 0) {
         char *end;
         unsigned long base = uprv_strtoul(t + 7, &end, 16);
         if (end == (t + 7) || *end != 0 || base > 0x10FF80) {
             fprintf(stderr, "Syntax for offset value in --transform offset-%s invalid!\n", t + 7);
             usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
         }
         transformType = DictionaryData::TRANSFORM_TYPE_OFFSET;
         transformConstant = (UChar32)base;
     }
     else {
         fprintf(stderr, "Invalid transform specified: %s\n", t);
         usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
     }
 }
Exemplo n.º 14
0
static void
getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
    if( (name[0]=='i' || name[0]=='I') &&
        (name[1]=='b' || name[1]=='B') &&
        (name[2]=='m' || name[2]=='M')
    ) {
        name+=3;
        if(*name=='-') {
            ++name;
        }
        *pPlatform=UCNV_IBM;
        *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
    } else {
        *pPlatform=UCNV_UNKNOWN;
        *pCCSID=0;
    }
}
Exemplo n.º 15
0
static void U_CALLCONV
normalizationCorrectionsLineFn(void *context,
                    char *fields[][2], int32_t fieldCount,
                    UErrorCode *pErrorCode) {
    uint32_t mapping[40];
    char *end, *s;
    uint32_t code;
    int32_t length;
    UVersionInfo version;
    UVersionInfo thisVersion;

    /* get the character code, field 0 */
    code=(uint32_t)uprv_strtoul(fields[0][0], &end, 16);
    if(U_FAILURE(*pErrorCode)) {
        fprintf(stderr, "gensprep: error parsing NormalizationCorrections.txt mapping at %s\n", fields[0][0]);
        exit(*pErrorCode);
    }
    /* Original (erroneous) decomposition */
    s = fields[1][0];

    /* parse the mapping string */
    length=u_parseCodePoints(s, mapping, sizeof(mapping)/4, pErrorCode);

    /* ignore corrected decomposition */

    u_versionFromString(version,fields[3][0] );
    u_versionFromString(thisVersion, "3.2.0");



    if(U_FAILURE(*pErrorCode)) {
        fprintf(stderr, "gensprep error parsing NormalizationCorrections.txt of U+%04lx - %s\n",
                (long)code, u_errorName(*pErrorCode));
        exit(*pErrorCode);
    }

    /* store the mapping */
    if( version[0] > thisVersion[0] || 
        ((version[0]==thisVersion[0]) && (version[1] > thisVersion[1]))
        ){
        storeMapping(code,mapping, length, USPREP_MAP, pErrorCode);
    }
    setUnicodeVersionNC(version);
}
Exemplo n.º 16
0
static void U_CALLCONV
strprepProfileLineFn(void *context,
              char *fields[][2], int32_t fieldCount,
              UErrorCode *pErrorCode) {
    uint32_t mapping[40];
    char *end, *map;
    uint32_t code;
    int32_t length;
    UStringPrepProfile* data = (UStringPrepProfile*) context;
    const char* typeName;
    uint32_t rangeStart=0,rangeEnd =0;

    typeName = fields[2][0];
    map = fields[1][0];

    if(strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){

        u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);

        /* store the range */
        compareFlagsForRange(data, rangeStart,rangeEnd,USPREP_UNASSIGNED);

    }else if(strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){

        u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);

        /* store the range */
        compareFlagsForRange(data, rangeStart,rangeEnd,USPREP_PROHIBITED);

    }else if(strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){
        /* get the character code, field 0 */
        code=(uint32_t)uprv_strtoul(fields[0][0], &end, 16);

        /* parse the mapping string */
        length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode);

        /* compare the mapping */
        compareMapping(data, code,mapping, length,USPREP_MAP);
    }else{
        *pErrorCode = U_INVALID_FORMAT_ERROR;
    }

}
Exemplo n.º 17
0
/*
 * parse a list of code points
 * store them as a UTF-32 string in dest[destCapacity]
 * return the number of code points
 */
U_CAPI int32_t U_EXPORT2
u_parseCodePoints(const char *s,
                  uint32_t *dest, int32_t destCapacity,
                  UErrorCode *pErrorCode) {
    char *end;
    uint32_t value;
    int32_t count;

    if(U_FAILURE(*pErrorCode)) {
        return 0;
    }
    if(s==NULL || destCapacity<0 || (destCapacity>0 && dest==NULL)) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    count=0;
    for(;;) {
        s=u_skipWhitespace(s);
        if(*s==';' || *s==0) {
            return count;
        }

        /* read one code point */
        value=(uint32_t)uprv_strtoul(s, &end, 16);
        if(end<=s || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || value>=0x110000) {
            *pErrorCode=U_PARSE_ERROR;
            return 0;
        }

        /* append it to the destination array */
        if(count<destCapacity) {
            dest[count++]=value;
        } else {
            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
        }

        /* go to the following characters */
        s=end;
    }
}
Exemplo n.º 18
0
U_CAPI void U_EXPORT2
u_versionFromString(UVersionInfo versionArray, const char *versionString) {
    char *end;
    uint16_t part=0;

    if(versionArray==NULL) {
        return;
    }

    if(versionString!=NULL) {
        for(;;) {
            versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
            if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
                break;
            }
            versionString=end+1;
        }
    }

    while(part<U_MAX_VERSION_LENGTH) {
        versionArray[part++]=0;
    }
}
Exemplo n.º 19
0
// Returns TRUE for "ok to continue parsing fields".
UBool
PreparsedUCD::parseProperty(UniProps &props, const char *field, UnicodeSet &newValues,
                            UErrorCode &errorCode) {
    CharString pBuffer;
    const char *p=field;
    const char *v=strchr(p, '=');
    int binaryValue;
    if(*p=='-') {
        if(v!=NULL) {
            fprintf(stderr,
                    "error in preparsed UCD: mix of binary-property-no and "
                    "enum-property syntax '%s' on line %ld\n",
                    field, (long)lineNumber);
            errorCode=U_PARSE_ERROR;
            return FALSE;
        }
        binaryValue=0;
        ++p;
    } else if(v==NULL) {
        binaryValue=1;
    } else {
        binaryValue=-1;
        // Copy out the property name rather than modifying the field (writing a NUL).
        pBuffer.append(p, (int32_t)(v-p), errorCode);
        p=pBuffer.data();
        ++v;
    }
    int32_t prop=pnames->getPropertyEnum(p);
    if(prop<0) {
        for(int32_t i=0;; ++i) {
            if(i==UPRV_LENGTHOF(ppucdProperties)) {
                // Ignore unknown property names.
                return TRUE;
            }
            if(0==uprv_stricmp(p, ppucdProperties[i].name)) {
                prop=ppucdProperties[i].prop;
                U_ASSERT(prop>=0);
                break;
            }
        }
    }
    if(prop<UCHAR_BINARY_LIMIT) {
        if(binaryValue>=0) {
            props.binProps[prop]=(UBool)binaryValue;
        } else {
            // No binary value for a binary property.
            fprintf(stderr,
                    "error in preparsed UCD: enum-property syntax '%s' "
                    "for binary property on line %ld\n",
                    field, (long)lineNumber);
            errorCode=U_PARSE_ERROR;
        }
    } else if(binaryValue>=0) {
        // Binary value for a non-binary property.
        fprintf(stderr,
                "error in preparsed UCD: binary-property syntax '%s' "
                "for non-binary property on line %ld\n",
                field, (long)lineNumber);
        errorCode=U_PARSE_ERROR;
    } else if (prop < UCHAR_INT_START) {
        fprintf(stderr,
                "error in preparsed UCD: prop value is invalid: '%d' for line %ld\n",
                prop, (long)lineNumber);
        errorCode=U_PARSE_ERROR;
    } else if(prop<UCHAR_INT_LIMIT) {
        int32_t value=pnames->getPropertyValueEnum(prop, v);
        if(value==UCHAR_INVALID_CODE && prop==UCHAR_CANONICAL_COMBINING_CLASS) {
            // TODO: Make getPropertyValueEnum(UCHAR_CANONICAL_COMBINING_CLASS, v) work.
            char *end;
            unsigned long ccc=uprv_strtoul(v, &end, 10);
            if(v<end && *end==0 && ccc<=254) {
                value=(int32_t)ccc;
            }
        }
        if(value==UCHAR_INVALID_CODE) {
            fprintf(stderr,
                    "error in preparsed UCD: '%s' is not a valid value on line %ld\n",
                    field, (long)lineNumber);
            errorCode=U_PARSE_ERROR;
        } else {
            props.intProps[prop-UCHAR_INT_START]=value;
        }
    } else if(*v=='<') {
        // Do not parse default values like <code point>, just set null values.
        switch(prop) {
        case UCHAR_BIDI_MIRRORING_GLYPH:
            props.bmg=U_SENTINEL;
            break;
        case UCHAR_BIDI_PAIRED_BRACKET:
            props.bpb=U_SENTINEL;
            break;
        case UCHAR_SIMPLE_CASE_FOLDING:
            props.scf=U_SENTINEL;
            break;
        case UCHAR_SIMPLE_LOWERCASE_MAPPING:
            props.slc=U_SENTINEL;
            break;
        case UCHAR_SIMPLE_TITLECASE_MAPPING:
            props.stc=U_SENTINEL;
            break;
        case UCHAR_SIMPLE_UPPERCASE_MAPPING:
            props.suc=U_SENTINEL;
            break;
        case UCHAR_CASE_FOLDING:
            props.cf.remove();
            break;
        case UCHAR_LOWERCASE_MAPPING:
            props.lc.remove();
            break;
        case UCHAR_TITLECASE_MAPPING:
            props.tc.remove();
            break;
        case UCHAR_UPPERCASE_MAPPING:
            props.uc.remove();
            break;
        case UCHAR_SCRIPT_EXTENSIONS:
            props.scx.clear();
            break;
        default:
            fprintf(stderr,
                    "error in preparsed UCD: '%s' is not a valid default value on line %ld\n",
                    field, (long)lineNumber);
            errorCode=U_PARSE_ERROR;
        }
    } else {
        char c;
        switch(prop) {
        case UCHAR_NUMERIC_VALUE:
            props.numericValue=v;
            c=*v;
            if('0'<=c && c<='9' && v[1]==0) {
                props.digitValue=c-'0';
            } else {
                props.digitValue=-1;
            }
            break;
        case UCHAR_NAME:
            props.name=v;
            break;
        case UCHAR_AGE:
            u_versionFromString(props.age, v);  // Writes 0.0.0.0 if v is not numeric.
            break;
        case UCHAR_BIDI_MIRRORING_GLYPH:
            props.bmg=parseCodePoint(v, errorCode);
            break;
        case UCHAR_BIDI_PAIRED_BRACKET:
            props.bpb=parseCodePoint(v, errorCode);
            break;
        case UCHAR_SIMPLE_CASE_FOLDING:
            props.scf=parseCodePoint(v, errorCode);
            break;
        case UCHAR_SIMPLE_LOWERCASE_MAPPING:
            props.slc=parseCodePoint(v, errorCode);
            break;
        case UCHAR_SIMPLE_TITLECASE_MAPPING:
            props.stc=parseCodePoint(v, errorCode);
            break;
        case UCHAR_SIMPLE_UPPERCASE_MAPPING:
            props.suc=parseCodePoint(v, errorCode);
            break;
        case UCHAR_CASE_FOLDING:
            parseString(v, props.cf, errorCode);
            break;
        case UCHAR_LOWERCASE_MAPPING:
            parseString(v, props.lc, errorCode);
            break;
        case UCHAR_TITLECASE_MAPPING:
            parseString(v, props.tc, errorCode);
            break;
        case UCHAR_UPPERCASE_MAPPING:
            parseString(v, props.uc, errorCode);
            break;
        case PPUCD_NAME_ALIAS:
            props.nameAlias=v;
            break;
        case PPUCD_CONDITIONAL_CASE_MAPPINGS:
        case PPUCD_TURKIC_CASE_FOLDING:
            // No need to parse their values: They are hardcoded in the runtime library.
            break;
        case UCHAR_SCRIPT_EXTENSIONS:
            parseScriptExtensions(v, props.scx, errorCode);
            break;
        default:
            // Ignore unhandled properties.
            return TRUE;
        }
    }
    if(U_SUCCESS(errorCode)) {
        newValues.add((UChar32)prop);
        return TRUE;
    } else {
        return FALSE;
    }
}
Exemplo n.º 20
0
/*
 * state table row grammar (ebnf-style):
 * (whitespace is allowed between all tokens)
 *
 * row=[[firstentry ','] entry (',' entry)*]
 * firstentry="initial" | "surrogates"
 *            (initial state (default for state 0), output is all surrogate pairs)
 * entry=range [':' nextstate] ['.' action]
 * range=number ['-' number]
 * nextstate=number
 *           (0..7f)
 * action='u' | 's' | 'p' | 'i'
 *        (unassigned, state change only, surrogate pair, illegal)
 * number=(1- or 2-digit hexadecimal number)
 */
static const char *
parseState(const char *s, int32_t state[256], uint32_t *pFlags) {
    const char *t;
    uint32_t start, end, i;
    int32_t entry;

    /* initialize the state: all illegal with U+ffff */
    for(i=0; i<256; ++i) {
        state[i]=MBCS_ENTRY_FINAL(0, MBCS_STATE_ILLEGAL, 0xffff);
    }

    /* skip leading white space */
    s=u_skipWhitespace(s);

    /* is there an "initial" or "surrogates" directive? */
    if(uprv_strncmp("initial", s, 7)==0) {
        *pFlags=MBCS_STATE_FLAG_DIRECT;
        s=u_skipWhitespace(s+7);
        if(*s++!=',') {
            return s-1;
        }
    } else if(*pFlags==0 && uprv_strncmp("surrogates", s, 10)==0) {
        *pFlags=MBCS_STATE_FLAG_SURROGATES;
        s=u_skipWhitespace(s+10);
        if(*s++!=',') {
            return s-1;
        }
    } else if(*s==0) {
        /* empty state row: all-illegal */
        return NULL;
    }

    for(;;) {
        /* read an entry, the start of the range first */
        s=u_skipWhitespace(s);
        start=uprv_strtoul(s, (char **)&t, 16);
        if(s==t || 0xff<start) {
            return s;
        }
        s=u_skipWhitespace(t);

        /* read the end of the range if there is one */
        if(*s=='-') {
            s=u_skipWhitespace(s+1);
            end=uprv_strtoul(s, (char **)&t, 16);
            if(s==t || end<start || 0xff<end) {
                return s;
            }
            s=u_skipWhitespace(t);
        } else {
            end=start;
        }

        /* determine the state entrys for this range */
        if(*s!=':' && *s!='.') {
            /* the default is: final state with valid entries */
            entry=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_16, 0);
        } else {
            entry=MBCS_ENTRY_TRANSITION(0, 0);
            if(*s==':') {
                /* get the next state, default to 0 */
                s=u_skipWhitespace(s+1);
                i=uprv_strtoul(s, (char **)&t, 16);
                if(s!=t) {
                    if(0x7f<i) {
                        return s;
                    }
                    s=u_skipWhitespace(t);
                    entry=MBCS_ENTRY_SET_STATE(entry, i);
                }
            }

            /* get the state action, default to valid */
            if(*s=='.') {
                /* this is a final state */
                entry=MBCS_ENTRY_SET_FINAL(entry);

                s=u_skipWhitespace(s+1);
                if(*s=='u') {
                    /* unassigned set U+fffe */
                    entry=MBCS_ENTRY_FINAL_SET_ACTION_VALUE(entry, MBCS_STATE_UNASSIGNED, 0xfffe);
                    s=u_skipWhitespace(s+1);
                } else if(*s=='p') {
                    if(*pFlags!=MBCS_STATE_FLAG_DIRECT) {
                        entry=MBCS_ENTRY_FINAL_SET_ACTION(entry, MBCS_STATE_VALID_16_PAIR);
                    } else {
                        entry=MBCS_ENTRY_FINAL_SET_ACTION(entry, MBCS_STATE_VALID_16);
                    }
                    s=u_skipWhitespace(s+1);
                } else if(*s=='s') {
                    entry=MBCS_ENTRY_FINAL_SET_ACTION(entry, MBCS_STATE_CHANGE_ONLY);
                    s=u_skipWhitespace(s+1);
                } else if(*s=='i') {
                    /* illegal set U+ffff */
                    entry=MBCS_ENTRY_FINAL_SET_ACTION_VALUE(entry, MBCS_STATE_ILLEGAL, 0xffff);
                    s=u_skipWhitespace(s+1);
                } else {
                    /* default to valid */
                    entry=MBCS_ENTRY_FINAL_SET_ACTION(entry, MBCS_STATE_VALID_16);
                }
            } else {
                /* this is an intermediate state, nothing to do */
            }
        }

        /* adjust "final valid" states according to the state flags */
        if(MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16) {
            switch(*pFlags) {
            case 0:
                /* no adjustment */
                break;
            case MBCS_STATE_FLAG_DIRECT:
                /* set the valid-direct code point to "unassigned"==0xfffe */
                entry=MBCS_ENTRY_FINAL_SET_ACTION_VALUE(entry, MBCS_STATE_VALID_DIRECT_16, 0xfffe);
                break;
            case MBCS_STATE_FLAG_SURROGATES:
                entry=MBCS_ENTRY_FINAL_SET_ACTION_VALUE(entry, MBCS_STATE_VALID_16_PAIR, 0);
                break;
            default:
                break;
            }
        }

        /* set this entry for the range */
        for(i=start; i<=end; ++i) {
            state[i]=entry;
        }

        if(*s==',') {
            ++s;
        } else {
            return *s==0 ? NULL : s;
        }
    }
}
Exemplo n.º 21
0
/* parse a mapping line; must not be empty */
U_CAPI UBool U_EXPORT2
ucm_parseMappingLine(UCMapping *m,
                     UChar32 codePoints[UCNV_EXT_MAX_UCHARS],
                     uint8_t bytes[UCNV_EXT_MAX_BYTES],
                     const char *line) {
    const char *s;
    char *end;
    UChar32 cp;
    int32_t u16Length;
    int8_t uLen, bLen, f;

    s=line;
    uLen=bLen=0;

    /* parse code points */
    for(;;) {
        /* skip an optional plus sign */
        if(uLen>0 && *s=='+') {
            ++s;
        }
        if(*s!='<') {
            break;
        }

        if( s[1]!='U' ||
            (cp=(UChar32)uprv_strtoul(s+2, &end, 16), end)==s+2 ||
            *end!='>'
        ) {
            fprintf(stderr, "ucm error: Unicode code point must be formatted as <UXXXX> (1..6 hex digits) - \"%s\"\n", line);
            return FALSE;
        }
        if((uint32_t)cp>0x10ffff || U_IS_SURROGATE(cp)) {
            fprintf(stderr, "ucm error: Unicode code point must be 0..d7ff or e000..10ffff - \"%s\"\n", line);
            return FALSE;
        }

        if(uLen==UCNV_EXT_MAX_UCHARS) {
            fprintf(stderr, "ucm error: too many code points on \"%s\"\n", line);
            return FALSE;
        }
        codePoints[uLen++]=cp;
        s=end+1;
    }

    if(uLen==0) {
        fprintf(stderr, "ucm error: no Unicode code points on \"%s\"\n", line);
        return FALSE;
    } else if(uLen==1) {
        m->u=codePoints[0];
    } else {
        UErrorCode errorCode=U_ZERO_ERROR;
        u_strFromUTF32(NULL, 0, &u16Length, codePoints, uLen, &errorCode);
        if( (U_FAILURE(errorCode) && errorCode!=U_BUFFER_OVERFLOW_ERROR) ||
            u16Length>UCNV_EXT_MAX_UCHARS
        ) {
            fprintf(stderr, "ucm error: too many UChars on \"%s\"\n", line);
            return FALSE;
        }
    }

    s=u_skipWhitespace(s);

    /* parse bytes */
    bLen=ucm_parseBytes(bytes, line, &s);

    if(bLen<0) {
        return FALSE;
    } else if(bLen==0) {
        fprintf(stderr, "ucm error: no bytes on \"%s\"\n", line);
        return FALSE;
    } else if(bLen<=4) {
        uprv_memcpy(m->b.bytes, bytes, bLen);
    }

    /* skip everything until the fallback indicator, even the start of a comment */
    for(;;) {
        if(*s==0) {
            f=-1; /* no fallback indicator */
            break;
        } else if(*s=='|') {
            f=(int8_t)(s[1]-'0');
            if((uint8_t)f>4) {
                fprintf(stderr, "ucm error: fallback indicator must be |0..|4 - \"%s\"\n", line);
                return FALSE;
            }
            break;
        }
        ++s;
    }

    m->uLen=uLen;
    m->bLen=bLen;
    m->f=f;
    return TRUE;
}
Exemplo n.º 22
0
static void U_CALLCONV
lineFn(void *context,
       char *fields[][2], int32_t fieldCount,
       UErrorCode *pErrorCode) {
    Options *storeOptions=(Options *)context;
    char *names[4];
    int16_t lengths[4]={ 0, 0, 0, 0 };
    static uint32_t prevCode=0;
    uint32_t code=0;

    if(U_FAILURE(*pErrorCode)) {
        return;
    }
    /* get the character code */
    code=uprv_strtoul(fields[0][0], NULL, 16);

    /* get the character name */
    if(storeOptions->storeNames) {
        names[0]=fields[1][0];
        lengths[0]=getName(names+0, fields[1][1]);
        if(names[0][0]=='<') {
            /* do not store pseudo-names in <> brackets */
            lengths[0]=0;
        }
    }

    /* store 1.0 names */
    /* get the second character name, the one from Unicode 1.0 */
    if(storeOptions->store10Names) {
        names[1]=fields[10][0];
        lengths[1]=getName(names+1, fields[10][1]);
        if(names[1][0]=='<') {
            /* do not store pseudo-names in <> brackets */
            lengths[1]=0;
        }
    }

    /* get the ISO 10646 comment */
    if(storeOptions->storeISOComments) {
        names[2]=fields[11][0];
        lengths[2]=getName(names+2, fields[11][1]);
    }

    if(lengths[0]+lengths[1]+lengths[2]==0) {
        return;
    }

    /* check for non-character code points */
    if(!U_IS_UNICODE_CHAR(code)) {
        fprintf(stderr, "gennames: error - properties for non-character code point U+%04lx\n",
                (unsigned long)code);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }

    /* check that the code points (code) are in ascending order */
    if(code<=prevCode && code>0) {
        fprintf(stderr, "gennames: error - UnicodeData entries out of order, U+%04lx after U+%04lx\n",
                (unsigned long)code, (unsigned long)prevCode);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }
    prevCode=code;

    parseName(names[0], lengths[0]);
    parseName(names[1], lengths[1]);
    parseName(names[2], lengths[2]);

    if(cpNameAliasesIndex<cpNameAliasesTop && code>=cpNameAliases[cpNameAliasesIndex].code) {
        if(code==cpNameAliases[cpNameAliasesIndex].code) {
            names[3]=cpNameAliases[cpNameAliasesIndex].nameAlias;
            lengths[3]=(int16_t)uprv_strlen(cpNameAliases[cpNameAliasesIndex].nameAlias);
            ++cpNameAliasesIndex;
        } else {
            fprintf(stderr, "gennames: error - NameAlias but no UnicodeData entry for U+%04lx\n",
                    (unsigned long)code);
            *pErrorCode=U_PARSE_ERROR;
            exit(U_PARSE_ERROR);
        }
    }

    /*
     * set the count argument to
     * 1: only store regular names, or only store ISO 10646 comments
     * 2: store regular and 1.0 names
     * 3: store names and ISO 10646 comment
     * 4: also store name alias
     *
     * addLine() will ignore empty trailing names
     */
    if(storeOptions->storeNames) {
        /* store names and comments as parsed according to storeOptions */
        addLine(code, names, lengths, LENGTHOF(names));
    } else {
        /* store only ISO 10646 comments */
        addLine(code, names+2, lengths+2, 1);
    }
}
Exemplo n.º 23
0
/* read a range like start or start..end */
U_CAPI int32_t U_EXPORT2
u_parseCodePointRange(const char *s,
                      uint32_t *pStart, uint32_t *pEnd,
                      UErrorCode *pErrorCode) {
    char *end;
    uint32_t value;

    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return 0;
    }
    if(s==NULL || pStart==NULL || pEnd==NULL) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    }

    s=u_skipWhitespace(s);
    if(*s==';' || *s==0) {
        *pErrorCode=U_PARSE_ERROR;
        return 0;
    }

    /* read the start code point */
    value=(uint32_t)uprv_strtoul(s, &end, 16);
    if(end<=s || (*end!=' ' && *end!='\t' && *end!='.' && *end!=';') || value>=0x110000) {
        *pErrorCode=U_PARSE_ERROR;
        return 0;
    }
    *pStart=*pEnd=value;

    /* is there a "..end"? */
    s=u_skipWhitespace(end);
    if(*s==';' || *s==0) {
        return 1;
    }

    if(*s!='.' || s[1]!='.') {
        *pErrorCode=U_PARSE_ERROR;
        return 0;
    }
    s+=2;

    /* read the end code point */
    value=(uint32_t)uprv_strtoul(s, &end, 16);
    if(end<=s || (*end!=' ' && *end!='\t' && *end!=';') || value>=0x110000) {
        *pErrorCode=U_PARSE_ERROR;
        return 0;
    }
    *pEnd=value;

    /* is this a valid range? */
    if(value<*pStart) {
        *pErrorCode=U_PARSE_ERROR;
        return 0;
    }

    /* no garbage after that? */
    s=u_skipWhitespace(end);
    if(*s==';' || *s==0) {
        return value-*pStart+1;
    } else {
        *pErrorCode=U_PARSE_ERROR;
        return 0;
    }
}
Exemplo n.º 24
0
//----------------------------------------------------------------------------
//
//  main      for gendict
//
//----------------------------------------------------------------------------
int  main(int argc, char **argv) {
    //
    // Pick up and check the command line arguments,
    //    using the standard ICU tool utils option handling.
    //
    U_MAIN_INIT_ARGS(argc, argv);
    progName = argv[0];
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
    if(argc<0) {
        // Unrecognized option
        fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]);
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }

    if(options[ARG_HELP].doesOccur || options[ARG_QMARK].doesOccur) {
        //  -? or -h for help.
        usageAndDie(U_ZERO_ERROR);
    }

    UBool verbose = options[ARG_VERBOSE].doesOccur;

    if (argc < 3) {
        fprintf(stderr, "input and output file must both be specified.\n");
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }
    const char *outFileName  = argv[2];
    const char *wordFileName = argv[1];

    startTime = uprv_getRawUTCtime(); // initialize start timer

	if (options[ARG_ICUDATADIR].doesOccur) {
        u_setDataDirectory(options[ARG_ICUDATADIR].value);
    }

    const char *copyright = NULL;
    if (options[ARG_COPYRIGHT].doesOccur) {
        copyright = U_COPYRIGHT_STRING;
    }

    if (options[ARG_UCHARS].doesOccur == options[ARG_BYTES].doesOccur) {
        fprintf(stderr, "you must specify exactly one type of trie to output!\n");
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }
    UBool isBytesTrie = options[ARG_BYTES].doesOccur;
    if (isBytesTrie != options[ARG_TRANSFORM].doesOccur) {
        fprintf(stderr, "you must provide a transformation for a bytes trie, and must not provide one for a uchars trie!\n");
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }

    IcuToolErrorCode status("gendict/main()");

#if UCONFIG_NO_BREAK_ITERATION || UCONFIG_NO_FILE_IO
    const char* outDir=NULL;

    UNewDataMemory *pData;
    char msg[1024];
    UErrorCode tempstatus = U_ZERO_ERROR;

    /* write message with just the name */ // potential for a buffer overflow here...
    sprintf(msg, "gendict writes dummy %s because of UCONFIG_NO_BREAK_ITERATION and/or UCONFIG_NO_FILE_IO, see uconfig.h", outFileName);
    fprintf(stderr, "%s\n", msg);

    /* write the dummy data file */
    pData = udata_create(outDir, NULL, outFileName, &dataInfo, NULL, &tempstatus);
    udata_writeBlock(pData, msg, strlen(msg));
    udata_finish(pData, &tempstatus);
    return (int)tempstatus;

#else
    //  Read in the dictionary source file
    if (verbose) { printf("Opening file %s...\n", wordFileName); }
    const char *codepage = "UTF-8";
    UCHARBUF *f = ucbuf_open(wordFileName, &codepage, TRUE, FALSE, status);
    if (status.isFailure()) {
        fprintf(stderr, "error opening input file: ICU Error \"%s\"\n", status.errorName());
        exit(status.reset());
    }
    if (verbose) { printf("Initializing dictionary builder of type %s...\n", (isBytesTrie ? "BytesTrie" : "UCharsTrie")); }
    DataDict dict(isBytesTrie, status);
    if (status.isFailure()) {
        fprintf(stderr, "new DataDict: ICU Error \"%s\"\n", status.errorName());
        exit(status.reset());
    }
    if (options[ARG_TRANSFORM].doesOccur) {
        dict.setTransform(options[ARG_TRANSFORM].value);
    }

    UnicodeString fileLine;
    if (verbose) { puts("Adding words to dictionary..."); }
    UBool hasValues = FALSE;
    UBool hasValuelessContents = FALSE;
    int lineCount = 0;
    int wordCount = 0;
    int minlen = 255;
    int maxlen = 0;
    UBool isOk = TRUE;
    while (readLine(f, fileLine, status)) {
        lineCount++;
        if (fileLine.isEmpty()) continue;
        
        // Parse word [spaces value].
        int32_t keyLen;
        for (keyLen = 0; keyLen < fileLine.length() && !u_isspace(fileLine[keyLen]); ++keyLen) {}
        if (keyLen == 0) {
            fprintf(stderr, "Error: no word on line %i!\n", lineCount);
            isOk = FALSE;
            continue;
        }
        int32_t valueStart;
        for (valueStart = keyLen;
            valueStart < fileLine.length() && u_isspace(fileLine[valueStart]);
            ++valueStart) {}

        if (keyLen < valueStart) {
            int32_t valueLength = fileLine.length() - valueStart;
            if (valueLength > 15) {
                fprintf(stderr, "Error: value too long on line %i!\n", lineCount);
                isOk = FALSE;
                continue;
            }
            char s[16];
            fileLine.extract(valueStart, valueLength, s, 16, US_INV);
            char *end;
            unsigned long value = uprv_strtoul(s, &end, 0);
            if (end == s || *end != 0 || (int32_t)uprv_strlen(s) != valueLength || value > 0xffffffff) {
                fprintf(stderr, "Error: value syntax error or value too large on line %i!\n", lineCount);
                isOk = FALSE;
                continue;
            }
            dict.addWord(fileLine.tempSubString(0, keyLen), (int32_t)value, status);
            hasValues = TRUE;
            wordCount++;
            if (keyLen < minlen) minlen = keyLen;
            if (keyLen > maxlen) maxlen = keyLen;
        } else {
            dict.addWord(fileLine.tempSubString(0, keyLen), 0, status);
            hasValuelessContents = TRUE;
            wordCount++;
            if (keyLen < minlen) minlen = keyLen;
            if (keyLen > maxlen) maxlen = keyLen;
        }

        if (status.isFailure()) {
            fprintf(stderr, "ICU Error \"%s\": Failed to add word to trie at input line %d in input file\n",
                status.errorName(), lineCount);
            exit(status.reset());
        }
    }
    if (verbose) { printf("Processed %d lines, added %d words, minlen %d, maxlen %d\n", lineCount, wordCount, minlen, maxlen); }

    if (!isOk && status.isSuccess()) {
        status.set(U_ILLEGAL_ARGUMENT_ERROR);
    }
    if (hasValues && hasValuelessContents) {
        fprintf(stderr, "warning: file contained both valued and unvalued strings!\n");
    }

    if (verbose) { printf("Serializing data...isBytesTrie? %d\n", isBytesTrie); }
    int32_t outDataSize;
    const void *outData;
    UnicodeString usp;
    if (isBytesTrie) {
        StringPiece sp = dict.serializeBytes(status);
        outDataSize = sp.size();
        outData = sp.data();
    } else {
        dict.serializeUChars(usp, status);
        outDataSize = usp.length() * U_SIZEOF_UCHAR;
        outData = usp.getBuffer();
    }
    if (status.isFailure()) {
        fprintf(stderr, "gendict: got failure of type %s while serializing, if U_ILLEGAL_ARGUMENT_ERROR possibly due to duplicate dictionary entries\n", status.errorName());
        exit(status.reset());
    }
    if (verbose) { puts("Opening output file..."); }
    UNewDataMemory *pData = udata_create(NULL, NULL, outFileName, &dataInfo, copyright, status);
    if (status.isFailure()) {
        fprintf(stderr, "gendict: could not open output file \"%s\", \"%s\"\n", outFileName, status.errorName());
        exit(status.reset());
    }

    if (verbose) { puts("Writing to output file..."); }
    int32_t indexes[DictionaryData::IX_COUNT] = {
        DictionaryData::IX_COUNT * sizeof(int32_t), 0, 0, 0, 0, 0, 0, 0
    };
    int32_t size = outDataSize + indexes[DictionaryData::IX_STRING_TRIE_OFFSET];
    indexes[DictionaryData::IX_RESERVED1_OFFSET] = size;
    indexes[DictionaryData::IX_RESERVED2_OFFSET] = size;
    indexes[DictionaryData::IX_TOTAL_SIZE] = size;

    indexes[DictionaryData::IX_TRIE_TYPE] = isBytesTrie ? DictionaryData::TRIE_TYPE_BYTES : DictionaryData::TRIE_TYPE_UCHARS;
    if (hasValues) {
        indexes[DictionaryData::IX_TRIE_TYPE] |= DictionaryData::TRIE_HAS_VALUES;
    }

    indexes[DictionaryData::IX_TRANSFORM] = dict.getTransform();
    udata_writeBlock(pData, indexes, sizeof(indexes));
    udata_writeBlock(pData, outData, outDataSize);
    size_t bytesWritten = udata_finish(pData, status);
    if (status.isFailure()) {
        fprintf(stderr, "gendict: error \"%s\" writing the output file\n", status.errorName());
        exit(status.reset());
    }

    if (bytesWritten != (size_t)size) {
        fprintf(stderr, "Error writing to output file \"%s\"\n", outFileName);
        exit(U_INTERNAL_PROGRAM_ERROR);
    }

    printf("%s: done writing\t%s (%ds).\n", progName, outFileName, elapsedTime());

#ifdef TEST_GENDICT
    if (isBytesTrie) {
        BytesTrie::Iterator it(outData, outDataSize, status);
        while (it.hasNext()) {
            it.next(status);
            const StringPiece s = it.getString();
            int32_t val = it.getValue();
            printf("%s -> %i\n", s.data(), val);
        }
    } else {
        UCharsTrie::Iterator it((const UChar *)outData, outDataSize, status);
        while (it.hasNext()) {
            it.next(status);
            const UnicodeString s = it.getString();
            int32_t val = it.getValue();
            char tmp[1024];
            s.extract(0, s.length(), tmp, 1024);
            printf("%s -> %i\n", tmp, val);
        }
    }
#endif

    return 0;
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
}
static void U_CALLCONV
lineFn(void *context,
       char *fields[][2], int32_t fieldCount,
       UErrorCode *pErrorCode) {
    char *names[3];
    int16_t lengths[3];
    static uint32_t prevCode=0;
    uint32_t code=0;

    if(U_FAILURE(*pErrorCode)) {
        return;
    }
    /* get the character code */
    code=uprv_strtoul(fields[0][0], NULL, 16);

    /* get the character name */
    names[0]=fields[1][0];
    lengths[0]=getName(names+0, fields[1][1]);
    if(names[0][0]=='<') {
        /* do not store pseudo-names in <> brackets */
        lengths[0]=0;
    }

    /* store 1.0 names */
    /* get the second character name, the one from Unicode 1.0 */
    /* do not store pseudo-names in <> brackets */
    names[1]=fields[10][0];
    lengths[1]=getName(names+1, fields[10][1]);
    if(*(UBool *)context && names[1][0]!='<') {
        /* keep the name */
    } else {
        lengths[1]=0;
    }

    /* get the ISO 10646 comment */
    names[2]=fields[11][0];
    lengths[2]=getName(names+2, fields[11][1]);

    if(lengths[0]+lengths[1]+lengths[2]==0) {
        return;
    }

    /* check for non-character code points */
    if(!UTF_IS_UNICODE_CHAR(code)) {
        fprintf(stderr, "gennames: error - properties for non-character code point U+%04lx\n",
                (unsigned long)code);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }

    /* check that the code points (code) are in ascending order */
    if(code<=prevCode && code>0) {
        fprintf(stderr, "gennames: error - UnicodeData entries out of order, U+%04lx after U+%04lx\n",
                (unsigned long)code, (unsigned long)prevCode);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }
    prevCode=code;

    parseName(names[0], lengths[0]);
    parseName(names[1], lengths[1]);
    parseName(names[2], lengths[2]);

    /*
     * set the count argument to
     * 1: only store regular names
     * 2: store regular and 1.0 names
     * 3: store names and ISO 10646 comment
     */
    addLine(code, names, lengths, 3);
}
Exemplo n.º 26
0
static void U_CALLCONV
strprepProfileLineFn(void *context,
              char *fields[][2], int32_t fieldCount,
              UErrorCode *pErrorCode) {
    uint32_t mapping[40];
    char *end, *map;
    uint32_t code;
    int32_t length;
   /*UBool* mapWithNorm = (UBool*) context;*/
    const char* typeName;
    uint32_t rangeStart=0,rangeEnd =0;
    const char* filename = (const char*) context;
    const char *s;

    s = u_skipWhitespace(fields[0][0]);
    if (*s == '@') {
        /* special directive */
        s++;
        length = fields[0][1] - s;
        if (length >= NORMALIZE_DIRECTIVE_LEN
            && uprv_strncmp(s, NORMALIZE_DIRECTIVE, NORMALIZE_DIRECTIVE_LEN) == 0) {
            options[NORMALIZE].doesOccur = TRUE;
            return;
        }
        else if (length >= CHECK_BIDI_DIRECTIVE_LEN
            && uprv_strncmp(s, CHECK_BIDI_DIRECTIVE, CHECK_BIDI_DIRECTIVE_LEN) == 0) {
            options[CHECK_BIDI].doesOccur = TRUE;
            return;
        }
        else {
            fprintf(stderr, "gensprep error parsing a directive %s.", fields[0][0]);
        }
    }

    typeName = fields[2][0];
    map = fields[1][0];

    if(uprv_strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){

        u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode);
        if(U_FAILURE(*pErrorCode)){
            fprintf(stderr, "Could not parse code point range. Error: %s\n",u_errorName(*pErrorCode));
            return;
        }

        /* store the range */
        storeRange(rangeStart,rangeEnd,USPREP_UNASSIGNED, pErrorCode);

    }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){

        u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode);
        if(U_FAILURE(*pErrorCode)){
            fprintf(stderr, "Could not parse code point range. Error: %s\n",u_errorName(*pErrorCode));
            return;
        }

        /* store the range */
        storeRange(rangeStart,rangeEnd,USPREP_PROHIBITED, pErrorCode);

    }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){

        /* get the character code, field 0 */
        code=(uint32_t)uprv_strtoul(s, &end, 16);
        if(end<=s || end!=fields[0][1]) {
            fprintf(stderr, "gensprep: syntax error in field 0 at %s\n", fields[0][0]);
            *pErrorCode=U_PARSE_ERROR;
            exit(U_PARSE_ERROR);
        }

        /* parse the mapping string */
        length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode);
        
        /* store the mapping */
        storeMapping(code,mapping, length,USPREP_MAP, pErrorCode);

    }else{
        *pErrorCode = U_INVALID_FORMAT_ERROR;
    }
    
    if(U_FAILURE(*pErrorCode)) {
        fprintf(stderr, "gensprep error parsing  %s line %s at %s. Error: %s\n",filename,
               fields[0][0],fields[2][0],u_errorName(*pErrorCode));
        exit(*pErrorCode);
    }

}
Exemplo n.º 27
0
static void U_CALLCONV
unicodeDataLineFn(void *context,
                  char *fields[][2], int32_t fieldCount,
                  UErrorCode *pErrorCode) {
    uint32_t decomp[40];
    Norm norm;
    const char *s;
    char *end;
    uint32_t code, value;
    int32_t length;
    UBool isCompat, something=FALSE;

    /* ignore First and Last entries for ranges */
    if( *fields[1][0]=='<' &&
        (length=(int32_t)(fields[1][1]-fields[1][0]))>=9 &&
        (0==uprv_memcmp(", First>", fields[1][1]-8, 8) || 0==uprv_memcmp(", Last>", fields[1][1]-7, 7))
    ) {
        return;
    }

    /* reset the properties */
    uprv_memset(&norm, 0, sizeof(Norm));

    /* get the character code, field 0 */
    code=(uint32_t)uprv_strtoul(fields[0][0], &end, 16);
    if(end<=fields[0][0] || end!=fields[0][1]) {
        fprintf(stderr, "gennorm: syntax error in field 0 at %s\n", fields[0][0]);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }

    /* get canonical combining class, field 3 */
    value=(uint32_t)uprv_strtoul(fields[3][0], &end, 10);
    if(end<=fields[3][0] || end!=fields[3][1] || value>0xff) {
        fprintf(stderr, "gennorm: syntax error in field 3 at %s\n", fields[0][0]);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }
    if(value>0) {
        norm.udataCC=(uint8_t)value;
        something=TRUE;
    }

    /* get the decomposition, field 5 */
    if(fields[5][0]<fields[5][1]) {
        if(*(s=fields[5][0])=='<') {
            ++s;
            isCompat=TRUE;

            /* skip and ignore the compatibility type name */
            do {
                if(s==fields[5][1]) {
                    /* missing '>' */
                    fprintf(stderr, "gennorm: syntax error in field 5 at %s\n", fields[0][0]);
                    *pErrorCode=U_PARSE_ERROR;
                    exit(U_PARSE_ERROR);
                }
            } while(*s++!='>');
        } else {
            isCompat=FALSE;
        }

        /* parse the decomposition string */
        length=u_parseCodePoints(s, decomp, sizeof(decomp)/4, pErrorCode);
        if(U_FAILURE(*pErrorCode)) {
            fprintf(stderr, "gennorm error parsing UnicodeData.txt decomposition of U+%04lx - %s\n",
                    (long)code, u_errorName(*pErrorCode));
            exit(*pErrorCode);
        }

        /* store the string */
        if(length>0) {
            something=TRUE;
            if(isCompat) {
                norm.lenNFKD=(uint8_t)length;
                norm.nfkd=decomp;
            } else {
                if(length>2) {
                    fprintf(stderr, "gennorm: error - length of NFD(U+%04lx) = %ld >2 in UnicodeData - illegal\n",
                            (long)code, (long)length);
                    *pErrorCode=U_PARSE_ERROR;
                    exit(U_PARSE_ERROR);
                }
                norm.lenNFD=(uint8_t)length;
                norm.nfd=decomp;
            }
        }
    }

    /* check for non-character code points */
    if((code&0xfffe)==0xfffe || (uint32_t)(code-0xfdd0)<0x20 || code>0x10ffff) {
        fprintf(stderr, "gennorm: error - properties for non-character code point U+%04lx\n",
                (long)code);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }

    if(something) {
        /* there are normalization values, so store them */
#if 0
        if(beVerbose) {
            printf("store values for U+%04lx: cc=%d, lenNFD=%ld, lenNFKD=%ld\n",
                   (long)code, norm.udataCC, (long)norm.lenNFD, (long)norm.lenNFKD);
        }
#endif
        storeNorm(code, &norm);
    }
}
Exemplo n.º 28
0
static void U_CALLCONV
unicodeDataLineFn(void *context,
                  char *fields[][2], int32_t fieldCount,
                  UErrorCode *pErrorCode) {
    Props p;
    char *end;
    static UChar32 prevCode=0;
    UChar32 value;
    int32_t i;

    /* reset the properties */
    uprv_memset(&p, 0, sizeof(Props));

    /* get the character code, field 0 */
    p.code=(UChar32)uprv_strtoul(fields[0][0], &end, 16);
    if(end<=fields[0][0] || end!=fields[0][1]) {
        fprintf(stderr, "gencase: syntax error in field 0 at %s\n", fields[0][0]);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }

    /* get general category, field 2 */
    i=getTokenIndex(genCategoryNames, U_CHAR_CATEGORY_COUNT, fields[2][0]);
    if(i>=0) {
        p.gc=(uint8_t)i;
    } else {
        fprintf(stderr, "gencase: unknown general category \"%s\" at code 0x%lx\n",
            fields[2][0], (unsigned long)p.code);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }

    /* get canonical combining class, field 3 */
    value=(UChar32)uprv_strtoul(fields[3][0], &end, 10);
    if(end<=fields[3][0] || end!=fields[3][1] || value>0xff) {
        fprintf(stderr, "gencase: syntax error in field 3 at %s\n", fields[0][0]);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }
    p.cc=(uint8_t)value;

    /* get uppercase mapping, field 12 */
    value=(UChar32)uprv_strtoul(fields[12][0], &end, 16);
    if(end!=fields[12][1]) {
        fprintf(stderr, "gencase: syntax error in field 12 at code 0x%lx\n",
            (unsigned long)p.code);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }
    if(value!=0 && value!=p.code) {
        p.upperCase=value;
        uset_add(caseSensitive, p.code);
        uset_add(caseSensitive, value);
    }

    /* get lowercase value, field 13 */
    value=(UChar32)uprv_strtoul(fields[13][0], &end, 16);
    if(end!=fields[13][1]) {
        fprintf(stderr, "gencase: syntax error in field 13 at code 0x%lx\n",
            (unsigned long)p.code);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }
    if(value!=0 && value!=p.code) {
        p.lowerCase=value;
        uset_add(caseSensitive, p.code);
        uset_add(caseSensitive, value);
    }

    /* get titlecase value, field 14 */
    value=(UChar32)uprv_strtoul(fields[14][0], &end, 16);
    if(end!=fields[14][1]) {
        fprintf(stderr, "gencase: syntax error in field 14 at code 0x%lx\n",
            (unsigned long)p.code);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }
    if(value!=0 && value!=p.code) {
        p.titleCase=value;
        uset_add(caseSensitive, p.code);
        uset_add(caseSensitive, value);
    }

    /* set additional properties from previously parsed files */
    if(specialCasingIndex<specialCasingCount && p.code==specialCasings[specialCasingIndex].code) {
        p.specialCasing=specialCasings+specialCasingIndex++;
    } else {
        p.specialCasing=NULL;
    }
    if(caseFoldingIndex<caseFoldingCount && p.code==caseFoldings[caseFoldingIndex].code) {
        p.caseFolding=caseFoldings+caseFoldingIndex++;

        /* ignore "Common" mappings (simple==full) that map to the same code point as the regular lowercase mapping */
        if( p.caseFolding->status=='C' &&
            p.caseFolding->simple==p.lowerCase
        ) {
            p.caseFolding=NULL;
        }
    } else {
        p.caseFolding=NULL;
    }

    /* check for non-character code points */
    if((p.code&0xfffe)==0xfffe || (uint32_t)(p.code-0xfdd0)<0x20) {
        fprintf(stderr, "gencase: error - properties for non-character code point U+%04lx\n",
                (unsigned long)p.code);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }

    /* check that the code points (p.code) are in ascending order */
    if(p.code<=prevCode && p.code>0) {
        fprintf(stderr, "gencase: error - UnicodeData entries out of order, U+%04lx after U+%04lx\n",
                (unsigned long)p.code, (unsigned long)prevCode);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }

    /* properties for a single code point */
    setProps(&p);

    prevCode=p.code;
}
Exemplo n.º 29
0
static void U_CALLCONV
caseFoldingLineFn(void *context,
                  char *fields[][2], int32_t fieldCount,
                  UErrorCode *pErrorCode) {
    char *end;
    static UChar32 prevCode=0;
    int32_t count;
    char status;

    /* get code point */
    caseFoldings[caseFoldingCount].code=(UChar32)uprv_strtoul(u_skipWhitespace(fields[0][0]), &end, 16);
    end=(char *)u_skipWhitespace(end);
    if(end<=fields[0][0] || end!=fields[0][1]) {
        fprintf(stderr, "gencase: syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }

    /* get the status of this mapping */
    caseFoldings[caseFoldingCount].status=status=*u_skipWhitespace(fields[1][0]);
    if(status!='L' && status!='E' && status!='C' && status!='S' && status!='F' && status!='I' && status!='T') {
        fprintf(stderr, "gencase: unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }

    /* ignore all case folding mappings that are the same as the UnicodeData.txt lowercase mappings */
    if(status=='L') {
        return;
    }

    /* get the mapping */
    count=caseFoldings[caseFoldingCount].full[0]=
        (UChar)u_parseString(fields[2][0], caseFoldings[caseFoldingCount].full+1, 31, (uint32_t *)&caseFoldings[caseFoldingCount].simple, pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
        fprintf(stderr, "gencase: error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);
        exit(*pErrorCode);
    }

    /* there is a simple mapping only if there is exactly one code point (count is in UChars) */
    if(count==0 || count>2 || (count==2 && UTF_IS_SINGLE(caseFoldings[caseFoldingCount].full[1]))) {
        caseFoldings[caseFoldingCount].simple=0;
    }

    /* update the case-sensitive set */
    if(status!='T') {
        uset_add(caseSensitive, (UChar32)caseFoldings[caseFoldingCount].code);
        _set_addAll(caseSensitive, caseFoldings[caseFoldingCount].full+1, caseFoldings[caseFoldingCount].full[0]);
    }

    /* check the status */
    if(status=='S') {
        /* check if there was a full mapping for this code point before */
        if( caseFoldingCount>0 &&
            caseFoldings[caseFoldingCount-1].code==caseFoldings[caseFoldingCount].code &&
            caseFoldings[caseFoldingCount-1].status=='F'
        ) {
            /* merge the two entries */
            caseFoldings[caseFoldingCount-1].simple=caseFoldings[caseFoldingCount].simple;
            return;
        }
    } else if(status=='F') {
        /* check if there was a simple mapping for this code point before */
        if( caseFoldingCount>0 &&
            caseFoldings[caseFoldingCount-1].code==caseFoldings[caseFoldingCount].code &&
            caseFoldings[caseFoldingCount-1].status=='S'
        ) {
            /* merge the two entries */
            uprv_memcpy(caseFoldings[caseFoldingCount-1].full, caseFoldings[caseFoldingCount].full, 32*U_SIZEOF_UCHAR);
            return;
        }
    } else if(status=='I' || status=='T') {
        /* check if there was a default mapping for this code point before (remove it) */
        while(caseFoldingCount>0 &&
              caseFoldings[caseFoldingCount-1].code==caseFoldings[caseFoldingCount].code
        ) {
            prevCode=0;
            --caseFoldingCount;
        }
        /* store only a marker for special handling for cases like dotless i */
        caseFoldings[caseFoldingCount].simple=0;
        caseFoldings[caseFoldingCount].full[0]=0;
    }

    /* check that the code points (caseFoldings[caseFoldingCount].code) are in ascending order */
    if(caseFoldings[caseFoldingCount].code<=prevCode && caseFoldings[caseFoldingCount].code>0) {
        fprintf(stderr, "gencase: error - CaseFolding entries out of order, U+%04lx after U+%04lx\n",
                (unsigned long)caseFoldings[caseFoldingCount].code,
                (unsigned long)prevCode);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }
    prevCode=caseFoldings[caseFoldingCount].code;

    if(++caseFoldingCount==MAX_CASE_FOLDING_COUNT) {
        fprintf(stderr, "gencase: too many case folding mappings\n");
        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
        exit(U_INDEX_OUTOFBOUNDS_ERROR);
    }
}
Exemplo n.º 30
0
extern int
main(int argc, char* argv[]) {
    UBool sourceTOC, verbose;
    uint32_t maxSize;

    U_MAIN_INIT_ARGS(argc, argv);

    /* preset then read command line options */
    argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr,
            "error in command line argument \"%s\"\n",
            argv[-argc]);
    } else if(argc<2) {
        argc=-1;
    }

    if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
        FILE *where = argc < 0 ? stderr : stdout;

        /*
         * Broken into chucks because the C89 standard says the minimum
         * required supported string length is 509 bytes.
         */
        fprintf(where,
                "%csage: %s [ -h, -?, --help ] [ -v, --verbose ] [ -c, --copyright ] [ -C, --comment comment ] [ -d, --destdir dir ] [ -n, --name filename ] [ -t, --type filetype ] [ -S, --source tocfile ] [ -e, --entrypoint name ] maxsize listfile\n", argc < 0 ? 'u' : 'U', *argv);
        if (options[0].doesOccur || options[1].doesOccur) {
            fprintf(where, "\n"
                "Read the list file (default: standard input) and create a common data\n"
                "file from specified files. Omit any files larger than maxsize, if maxsize > 0.\n");
            fprintf(where, "\n"
            "Options:\n"
            "\t-h, -?, --help              this usage text\n"
            "\t-v, --verbose               verbose output\n"
            "\t-c, --copyright             include the ICU copyright notice\n"
            "\t-C, --comment comment       include a comment string\n"
            "\t-d, --destdir dir           destination directory\n");
            fprintf(where,
            "\t-n, --name filename         output filename, without .type extension\n"
            "\t                            (default: " U_ICUDATA_NAME ")\n"
            "\t-t, --type filetype         type of the destination file\n"
            "\t                            (default: \" dat \")\n"
            "\t-S, --source tocfile        write a .c source file with the table of\n"
            "\t                            contents\n"
            "\t-e, --entrypoint name       override the c entrypoint name\n"
            "\t                            (default: \"<name>_<type>\")\n");
        }
        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    }

    sourceTOC=options[8].doesOccur;

    verbose = options[2].doesOccur;

    maxSize=(uint32_t)uprv_strtoul(argv[1], NULL, 0);

    createCommonDataFile(options[4].doesOccur ? options[4].value : NULL,
                         options[6].doesOccur ? options[6].value : NULL,
                         options[9].doesOccur ? options[9].value : options[6].doesOccur ? options[6].value : NULL,
                         options[7].doesOccur ? options[7].value : NULL,
                         options[10].doesOccur ? options[10].value : NULL,
                         options[3].doesOccur ? U_COPYRIGHT_STRING : options[5].doesOccur ? options[5].value : NULL,
                         argc == 2 ? NULL : argv[2],
                         maxSize, sourceTOC, verbose, NULL);

    return 0;
}