static uint8_t *
readFile(const char *path, const char *name, int32_t &length, char &type) {
    char filename[1024];
    FILE *file;
    uint8_t *data;
    UErrorCode errorCode;
    int32_t fileLength, typeEnum;

    makeFullFilename(path, name, filename, (int32_t)sizeof(filename));

    /* open the input file, get its length, allocate memory for it, read the file */
    file=fopen(filename, "rb");
    if(file==NULL) {
        fprintf(stderr, "icupkg: unable to open input file \"%s\"\n", filename);
        exit(U_FILE_ACCESS_ERROR);
    }

    /* get the file length */
    fileLength=getFileLength(file);
    if(ferror(file) || fileLength<=0) {
        fprintf(stderr, "icupkg: empty input file \"%s\"\n", filename);
        fclose(file);
        exit(U_FILE_ACCESS_ERROR);
    }

    /* allocate the buffer, pad to multiple of 16 */
    length=(fileLength+0xf)&~0xf;
    data=(uint8_t *)malloc(length);
    if(data==NULL) {
        fclose(file);
        exit(U_MEMORY_ALLOCATION_ERROR);
    }

    /* read the file */
    if(fileLength!=(int32_t)fread(data, 1, fileLength, file)) {
        fprintf(stderr, "icupkg: error reading \"%s\"\n", filename);
        fclose(file);
        free(data);
        exit(U_FILE_ACCESS_ERROR);
    }

    /* pad the file to a multiple of 16 using the usual padding byte */
    if(fileLength<length) {
        memset(data+fileLength, 0xaa, length-fileLength);
    }

    fclose(file);

    // minimum check for ICU-format data
    errorCode=U_ZERO_ERROR;
    typeEnum=getTypeEnumForInputData(data, length, &errorCode);
    if(typeEnum<0 || U_FAILURE(errorCode)) {
        fprintf(stderr, "icupkg: not an ICU data file: \"%s\"\n", filename);
        free(data);
        exit(U_INVALID_FORMAT_ERROR);
    }
    type=makeTypeLetter(typeEnum);

    return data;
}
Пример #2
0
char
Package::getInType() {
    return makeTypeLetter(inCharset, inIsBigEndian);
}
Пример #3
0
void
Package::readPackage(const char *filename) {
    UDataSwapper *ds;
    const UDataInfo *pInfo;
    UErrorCode errorCode;

    const uint8_t *inBytes;

    int32_t length, offset, i;
    int32_t itemLength, typeEnum;
    char type;

    const UDataOffsetTOCEntry *inEntries;

    extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName));

    /* read the file */
    inData=readFile(NULL, filename, inLength, type);
    length=inLength;

    /*
     * swap the header - even if the swapping itself is a no-op
     * because it tells us the header length
     */
    errorCode=U_ZERO_ERROR;
    makeTypeProps(type, inCharset, inIsBigEndian);
    ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
                filename, u_errorName(errorCode));
        exit(errorCode);
    }

    ds->printError=printPackageError;
    ds->printErrorContext=stderr;

    headerLength=sizeof(header);
    if(length<headerLength) {
        headerLength=length;
    }
    headerLength=udata_swapDataHeader(ds, inData, headerLength, header, &errorCode);
    if(U_FAILURE(errorCode)) {
        exit(errorCode);
    }

    /* check data format and format version */
    pInfo=(const UDataInfo *)((const char *)inData+4);
    if(!(
        pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CmnD" */
        pInfo->dataFormat[1]==0x6d &&
        pInfo->dataFormat[2]==0x6e &&
        pInfo->dataFormat[3]==0x44 &&
        pInfo->formatVersion[0]==1
    )) {
        fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n",
                pInfo->dataFormat[0], pInfo->dataFormat[1],
                pInfo->dataFormat[2], pInfo->dataFormat[3],
                pInfo->formatVersion[0]);
        exit(U_UNSUPPORTED_ERROR);
    }
    inIsBigEndian=(UBool)pInfo->isBigEndian;
    inCharset=pInfo->charsetFamily;

    inBytes=(const uint8_t *)inData+headerLength;
    inEntries=(const UDataOffsetTOCEntry *)(inBytes+4);

    /* check that the itemCount fits, then the ToC table, then at least the header of the last item */
    length-=headerLength;
    if(length<4) {
        /* itemCount does not fit */
        offset=0x7fffffff;
    } else {
        itemCount=udata_readInt32(ds, *(const int32_t *)inBytes);
        if(itemCount==0) {
            offset=4;
        } else if(length<(4+8*itemCount)) {
            /* ToC table does not fit */
            offset=0x7fffffff;
        } else {
            /* offset of the last item plus at least 20 bytes for its header */
            offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset);
        }
    }
    if(length<offset) {
        fprintf(stderr, "icupkg: too few bytes (%ld after header) for a .dat package\n",
                        (long)length);
        exit(U_INDEX_OUTOFBOUNDS_ERROR);
    }
    /* do not modify the package length variable until the last item's length is set */

    if(itemCount>0) {
        char prefix[MAX_PKG_NAME_LENGTH+4];
        char *s, *inItemStrings;
        int32_t inPkgNameLength, prefixLength, stringsOffset;

        if(itemCount>MAX_FILE_COUNT) {
            fprintf(stderr, "icupkg: too many items, maximum is %d\n", MAX_FILE_COUNT);
            exit(U_BUFFER_OVERFLOW_ERROR);
        }

        /* swap the item name strings */
        stringsOffset=4+8*itemCount;
        itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset;

        // don't include padding bytes at the end of the item names
        while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) {
            --itemLength;
        }

        if((inStringTop+itemLength)>STRING_STORE_SIZE) {
            fprintf(stderr, "icupkg: total length of item name strings too long\n");
            exit(U_BUFFER_OVERFLOW_ERROR);
        }

        inItemStrings=inStrings+inStringTop;
        ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n");
            exit(U_INVALID_FORMAT_ERROR);
        }
        inStringTop+=itemLength;

        // reset the Item entries
        memset(items, 0, itemCount*sizeof(Item));

        inPkgNameLength=strlen(inPkgName);
        memcpy(prefix, inPkgName, inPkgNameLength);
        prefixLength=inPkgNameLength;

        /*
         * Get the common prefix of the items.
         * New-style ICU .dat packages use tree separators ('/') between package names,
         * tree names, and item names,
         * while old-style ICU .dat packages (before multi-tree support)
         * use an underscore ('_') between package and item names.
         */
        offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset;
        s=inItemStrings+offset;
        if( (int32_t)strlen(s)>=(inPkgNameLength+2) &&
            0==memcmp(s, inPkgName, inPkgNameLength) &&
            s[inPkgNameLength]=='_'
        ) {
            // old-style .dat package
            prefix[prefixLength++]='_';
        } else {
            // new-style .dat package
            prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR;
            // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR
            // then the test in the loop below will fail
        }
        prefix[prefixLength]=0;

        /* read the ToC table */
        for(i=0; i<itemCount; ++i) {
            // skip the package part of the item name, error if it does not match the actual package name
            // or if nothing follows the package name
            offset=(int32_t)ds->readUInt32(inEntries[i].nameOffset)-stringsOffset;
            s=inItemStrings+offset;
            if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) {
                fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n",
                        s, prefix);
                exit(U_UNSUPPORTED_ERROR);
            }
            items[i].name=s+prefixLength;

            // set the item's data
            items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset);
            if(i>0) {
                items[i-1].length=(int32_t)(items[i].data-items[i-1].data);

                // set the previous item's platform type
                typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode);
                if(typeEnum<0 || U_FAILURE(errorCode)) {
                    fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename);
                    exit(U_INVALID_FORMAT_ERROR);
                }
                items[i-1].type=makeTypeLetter(typeEnum);
            }
            items[i].isDataOwned=FALSE;
        }
        // set the last item's length
        items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset);

        // set the last item's platform type
        typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode);
        if(typeEnum<0 || U_FAILURE(errorCode)) {
            fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename);
            exit(U_INVALID_FORMAT_ERROR);
        }
        items[itemCount-1].type=makeTypeLetter(typeEnum);

        if(type!=U_ICUDATA_TYPE_LETTER[0]) {
            // sort the item names for the local charset
            sortItems();
        }
    }

    udata_closeSwapper(ds);
}