static uint8_t * readFile(const char *path, const char *name, int32_t &length, char &type) { char filename[1024]; FILE *file; uint8_t *data; UErrorCode errorCode; int32_t fileLength, typeEnum; makeFullFilename(path, name, filename, (int32_t)sizeof(filename)); /* open the input file, get its length, allocate memory for it, read the file */ file=fopen(filename, "rb"); if(file==NULL) { fprintf(stderr, "icupkg: unable to open input file \"%s\"\n", filename); exit(U_FILE_ACCESS_ERROR); } /* get the file length */ fileLength=getFileLength(file); if(ferror(file) || fileLength<=0) { fprintf(stderr, "icupkg: empty input file \"%s\"\n", filename); fclose(file); exit(U_FILE_ACCESS_ERROR); } /* allocate the buffer, pad to multiple of 16 */ length=(fileLength+0xf)&~0xf; data=(uint8_t *)malloc(length); if(data==NULL) { fclose(file); exit(U_MEMORY_ALLOCATION_ERROR); } /* read the file */ if(fileLength!=(int32_t)fread(data, 1, fileLength, file)) { fprintf(stderr, "icupkg: error reading \"%s\"\n", filename); fclose(file); free(data); exit(U_FILE_ACCESS_ERROR); } /* pad the file to a multiple of 16 using the usual padding byte */ if(fileLength<length) { memset(data+fileLength, 0xaa, length-fileLength); } fclose(file); // minimum check for ICU-format data errorCode=U_ZERO_ERROR; typeEnum=getTypeEnumForInputData(data, length, &errorCode); if(typeEnum<0 || U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: not an ICU data file: \"%s\"\n", filename); free(data); exit(U_INVALID_FORMAT_ERROR); } type=makeTypeLetter(typeEnum); return data; }
char Package::getInType() { return makeTypeLetter(inCharset, inIsBigEndian); }
void Package::readPackage(const char *filename) { UDataSwapper *ds; const UDataInfo *pInfo; UErrorCode errorCode; const uint8_t *inBytes; int32_t length, offset, i; int32_t itemLength, typeEnum; char type; const UDataOffsetTOCEntry *inEntries; extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName)); /* read the file */ inData=readFile(NULL, filename, inLength, type); length=inLength; /* * swap the header - even if the swapping itself is a no-op * because it tells us the header length */ errorCode=U_ZERO_ERROR; makeTypeProps(type, inCharset, inIsBigEndian); ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n", filename, u_errorName(errorCode)); exit(errorCode); } ds->printError=printPackageError; ds->printErrorContext=stderr; headerLength=sizeof(header); if(length<headerLength) { headerLength=length; } headerLength=udata_swapDataHeader(ds, inData, headerLength, header, &errorCode); if(U_FAILURE(errorCode)) { exit(errorCode); } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */ pInfo->dataFormat[1]==0x6d && pInfo->dataFormat[2]==0x6e && pInfo->dataFormat[3]==0x44 && pInfo->formatVersion[0]==1 )) { fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); exit(U_UNSUPPORTED_ERROR); } inIsBigEndian=(UBool)pInfo->isBigEndian; inCharset=pInfo->charsetFamily; inBytes=(const uint8_t *)inData+headerLength; inEntries=(const UDataOffsetTOCEntry *)(inBytes+4); /* check that the itemCount fits, then the ToC table, then at least the header of the last item */ length-=headerLength; if(length<4) { /* itemCount does not fit */ offset=0x7fffffff; } else { itemCount=udata_readInt32(ds, *(const int32_t *)inBytes); if(itemCount==0) { offset=4; } else if(length<(4+8*itemCount)) { /* ToC table does not fit */ offset=0x7fffffff; } else { /* offset of the last item plus at least 20 bytes for its header */ offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset); } } if(length<offset) { fprintf(stderr, "icupkg: too few bytes (%ld after header) for a .dat package\n", (long)length); exit(U_INDEX_OUTOFBOUNDS_ERROR); } /* do not modify the package length variable until the last item's length is set */ if(itemCount>0) { char prefix[MAX_PKG_NAME_LENGTH+4]; char *s, *inItemStrings; int32_t inPkgNameLength, prefixLength, stringsOffset; if(itemCount>MAX_FILE_COUNT) { fprintf(stderr, "icupkg: too many items, maximum is %d\n", MAX_FILE_COUNT); exit(U_BUFFER_OVERFLOW_ERROR); } /* swap the item name strings */ stringsOffset=4+8*itemCount; itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset; // don't include padding bytes at the end of the item names while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) { --itemLength; } if((inStringTop+itemLength)>STRING_STORE_SIZE) { fprintf(stderr, "icupkg: total length of item name strings too long\n"); exit(U_BUFFER_OVERFLOW_ERROR); } inItemStrings=inStrings+inStringTop; ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n"); exit(U_INVALID_FORMAT_ERROR); } inStringTop+=itemLength; // reset the Item entries memset(items, 0, itemCount*sizeof(Item)); inPkgNameLength=strlen(inPkgName); memcpy(prefix, inPkgName, inPkgNameLength); prefixLength=inPkgNameLength; /* * Get the common prefix of the items. * New-style ICU .dat packages use tree separators ('/') between package names, * tree names, and item names, * while old-style ICU .dat packages (before multi-tree support) * use an underscore ('_') between package and item names. */ offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset; s=inItemStrings+offset; if( (int32_t)strlen(s)>=(inPkgNameLength+2) && 0==memcmp(s, inPkgName, inPkgNameLength) && s[inPkgNameLength]=='_' ) { // old-style .dat package prefix[prefixLength++]='_'; } else { // new-style .dat package prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR; // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR // then the test in the loop below will fail } prefix[prefixLength]=0; /* read the ToC table */ for(i=0; i<itemCount; ++i) { // skip the package part of the item name, error if it does not match the actual package name // or if nothing follows the package name offset=(int32_t)ds->readUInt32(inEntries[i].nameOffset)-stringsOffset; s=inItemStrings+offset; if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) { fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n", s, prefix); exit(U_UNSUPPORTED_ERROR); } items[i].name=s+prefixLength; // set the item's data items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset); if(i>0) { items[i-1].length=(int32_t)(items[i].data-items[i-1].data); // set the previous item's platform type typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode); if(typeEnum<0 || U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); exit(U_INVALID_FORMAT_ERROR); } items[i-1].type=makeTypeLetter(typeEnum); } items[i].isDataOwned=FALSE; } // set the last item's length items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset); // set the last item's platform type typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode); if(typeEnum<0 || U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); exit(U_INVALID_FORMAT_ERROR); } items[itemCount-1].type=makeTypeLetter(typeEnum); if(type!=U_ICUDATA_TYPE_LETTER[0]) { // sort the item names for the local charset sortItems(); } } udata_closeSwapper(ds); }