Example #1
0
/* parse a mapping line; must not be empty */
U_CAPI UBool U_EXPORT2
ucm_parseMappingLine(UCMapping *m,
                     UChar32 codePoints[UCNV_EXT_MAX_UCHARS],
                     uint8_t bytes[UCNV_EXT_MAX_BYTES],
                     const char *line) {
    const char *s;
    char *end;
    UChar32 cp;
    int32_t u16Length;
    int8_t uLen, bLen, f;

    s=line;
    uLen=bLen=0;

    /* parse code points */
    for(;;) {
        /* skip an optional plus sign */
        if(uLen>0 && *s=='+') {
            ++s;
        }
        if(*s!='<') {
            break;
        }

        if( s[1]!='U' ||
            (cp=(UChar32)uprv_strtoul(s+2, &end, 16), end)==s+2 ||
            *end!='>'
        ) {
            fprintf(stderr, "ucm error: Unicode code point must be formatted as <UXXXX> (1..6 hex digits) - \"%s\"\n", line);
            return FALSE;
        }
        if((uint32_t)cp>0x10ffff || U_IS_SURROGATE(cp)) {
            fprintf(stderr, "ucm error: Unicode code point must be 0..d7ff or e000..10ffff - \"%s\"\n", line);
            return FALSE;
        }

        if(uLen==UCNV_EXT_MAX_UCHARS) {
            fprintf(stderr, "ucm error: too many code points on \"%s\"\n", line);
            return FALSE;
        }
        codePoints[uLen++]=cp;
        s=end+1;
    }

    if(uLen==0) {
        fprintf(stderr, "ucm error: no Unicode code points on \"%s\"\n", line);
        return FALSE;
    } else if(uLen==1) {
        m->u=codePoints[0];
    } else {
        UErrorCode errorCode=U_ZERO_ERROR;
        u_strFromUTF32(NULL, 0, &u16Length, codePoints, uLen, &errorCode);
        if( (U_FAILURE(errorCode) && errorCode!=U_BUFFER_OVERFLOW_ERROR) ||
            u16Length>UCNV_EXT_MAX_UCHARS
        ) {
            fprintf(stderr, "ucm error: too many UChars on \"%s\"\n", line);
            return FALSE;
        }
    }

    s=u_skipWhitespace(s);

    /* parse bytes */
    bLen=ucm_parseBytes(bytes, line, &s);

    if(bLen<0) {
        return FALSE;
    } else if(bLen==0) {
        fprintf(stderr, "ucm error: no bytes on \"%s\"\n", line);
        return FALSE;
    } else if(bLen<=4) {
        uprv_memcpy(m->b.bytes, bytes, bLen);
    }

    /* skip everything until the fallback indicator, even the start of a comment */
    for(;;) {
        if(*s==0) {
            f=-1; /* no fallback indicator */
            break;
        } else if(*s=='|') {
            f=(int8_t)(s[1]-'0');
            if((uint8_t)f>4) {
                fprintf(stderr, "ucm error: fallback indicator must be |0..|4 - \"%s\"\n", line);
                return FALSE;
            }
            break;
        }
        ++s;
    }

    m->uLen=uLen;
    m->bLen=bLen;
    m->f=f;
    return TRUE;
}
static void
readHeader(ConvData *data,
           FileStream* convFile,
           const char* converterName,
           UErrorCode *pErrorCode) {
    char line[1024];
    char *s, *key, *value;
    const UConverterStaticData *prototype;
    UConverterStaticData *staticData;

    if(U_FAILURE(*pErrorCode)) {
        return;
    }

    staticData=&data->staticData;
    staticData->platform=UCNV_IBM;
    staticData->subCharLen=0;

    while(T_FileStream_readLine(convFile, line, sizeof(line))) {
        /* basic parsing and handling of state-related items */
        if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
            continue;
        }

        /* stop at the beginning of the mapping section */
        if(uprv_strcmp(line, "CHARMAP")==0) {
            break;
        }

        /* collect the information from the header field, ignore unknown keys */
        if(uprv_strcmp(key, "code_set_name")==0) {
            if(*value!=0) {
                uprv_strcpy((char *)staticData->name, value);
                getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
            }
        } else if(uprv_strcmp(key, "subchar")==0) {
            uint8_t bytes[UCNV_EXT_MAX_BYTES];
            int8_t length;

            s=value;
            length=ucm_parseBytes(bytes, line, (const char **)&s);
            if(1<=length && length<=4 && *s==0) {
                staticData->subCharLen=length;
                uprv_memcpy(staticData->subChar, bytes, length);
            } else {
                fprintf(stderr, "error: illegal <subchar> %s\n", value);
                *pErrorCode=U_INVALID_TABLE_FORMAT;
                return;
            }
        } else if(uprv_strcmp(key, "subchar1")==0) {
            uint8_t bytes[UCNV_EXT_MAX_BYTES];

            s=value;
            if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
                staticData->subChar1=bytes[0];
            } else {
                fprintf(stderr, "error: illegal <subchar1> %s\n", value);
                *pErrorCode=U_INVALID_TABLE_FORMAT;
                return;
            }
        }
    }

    /* copy values from the UCMFile to the static data */
    staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
    staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
    staticData->conversionType=data->ucm->states.conversionType;

    if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
        fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
        *pErrorCode=U_INVALID_TABLE_FORMAT;
        return;
    }

    /*
     * Now that we know the type, copy any 'default' values from the table.
     * We need not check the type any further because the parser only
     * recognizes what we have prototypes for.
     *
     * For delta (extension-only) tables, copy values from the base file
     * instead, see createConverter().
     */
    if(data->ucm->baseName[0]==0) {
        prototype=ucnv_converterStaticData[staticData->conversionType];
        if(prototype!=NULL) {
            if(staticData->name[0]==0) {
                uprv_strcpy((char *)staticData->name, prototype->name);
            }

            if(staticData->codepage==0) {
                staticData->codepage=prototype->codepage;
            }

            if(staticData->platform==0) {
                staticData->platform=prototype->platform;
            }

            if(staticData->minBytesPerChar==0) {
                staticData->minBytesPerChar=prototype->minBytesPerChar;
            }

            if(staticData->maxBytesPerChar==0) {
                staticData->maxBytesPerChar=prototype->maxBytesPerChar;
            }

            if(staticData->subCharLen==0) {
                staticData->subCharLen=prototype->subCharLen;
                if(prototype->subCharLen>0) {
                    uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
                }
            }
        }
    }

    if(data->ucm->states.outputType<0) {
        data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
    }

    if( staticData->subChar1!=0 &&
            (staticData->minBytesPerChar>1 ||
                (staticData->conversionType!=UCNV_MBCS &&
                 staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
    ) {
        fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
        *pErrorCode=U_INVALID_TABLE_FORMAT;
    }
}