U_CAPI uint32_t U_EXPORT2 udata_write_ucmp16 (UNewDataMemory *pData, const CompactShortArray* array) { int32_t size = 0; udata_write32(pData, ICU_UCMP16_VERSION); size += 4; udata_write32(pData, array->fCount); size += 4; udata_write32(pData, array->kBlockShift); size += 4; udata_write32(pData, array->kBlockMask); size += 4; udata_writeBlock(pData, array->fIndex, sizeof(array->fIndex[0])*UCMP16_kIndexCount); size += sizeof(array->fIndex[0])*UCMP16_kIndexCount; udata_writeBlock(pData, array->fArray, sizeof(array->fArray[0])*array->fCount); size += sizeof(array->fArray[0])*array->fCount; while(size%4) /* end padding */ { udata_writePadding(pData, 1); /* Pad total so far to even size */ size += 1; } return size; }
void BiDiPropsBuilder::writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return; } UNewDataMemory *pData=udata_create(path, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &dataInfo, withCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "genprops: udata_create(%s, ubidi.icu) failed - %s\n", path, u_errorName(errorCode)); return; } udata_writeBlock(pData, indexes, sizeof(indexes)); udata_writeBlock(pData, trieBlock, trieSize); udata_writeBlock(pData, mirrors, 4*mirrorTop); UChar32 jgStart=indexes[UBIDI_IX_JG_START]; UChar32 jgLimit=indexes[UBIDI_IX_JG_LIMIT]; udata_writeBlock(pData, jgArray+(jgStart-MIN_JG_START), jgLimit-jgStart); UChar32 jgStart2=indexes[UBIDI_IX_JG_START2]; UChar32 jgLimit2=indexes[UBIDI_IX_JG_LIMIT2]; udata_writeBlock(pData, jgArray2+(jgStart2-MIN_JG_START2), jgLimit2-jgStart2); long dataLength=udata_finish(pData, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "genprops error: bidipropsbuilder %d writing the output file\n", errorCode); return; } if(dataLength!=indexes[UBIDI_IX_LENGTH]) { fprintf(stderr, "udata_finish(ubidi.icu) reports %ld bytes written but should be %ld\n", dataLength, (long)indexes[UBIDI_IX_LENGTH]); errorCode=U_INTERNAL_PROGRAM_ERROR; } }
static void writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status) { UNewDataMemory *mem = NULL; uint32_t sz2; uint32_t size = 0; int32_t tableType; if(U_FAILURE(*status)) { return; } tableType=TABLE_NONE; if(data->cnvData!=NULL) { tableType|=TABLE_BASE; } if(data->extData!=NULL) { tableType|=TABLE_EXT; } mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status); if(U_FAILURE(*status)) { fprintf(stderr, "Couldn't create the udata %s.%s: %s\n", cnvName, "cnv", u_errorName(*status)); return; } if(VERBOSE) { printf("- Opened udata %s.%s\n", cnvName, "cnv"); } /* all read only, clean, platform independent data. Mmmm. :) */ udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData)); size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */ /* Now, write the table */ if(tableType&TABLE_BASE) { size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType); } if(tableType&TABLE_EXT) { size += data->extData->write(data->extData, &data->staticData, mem, tableType); } sz2 = udata_finish(mem, status); if(size != sz2) { fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size); *status=U_INTERNAL_PROGRAM_ERROR; } if(VERBOSE) { printf("- Wrote %u bytes to the udata.\n", (int)sz2); } }
int32_t genpname::writeDataFile(const char *destdir, const Builder& builder) { int32_t length; int8_t* data = builder.createData(length); UNewDataMemory *pdata; UErrorCode status = U_ZERO_ERROR; pdata = udata_create(destdir, PNAME_DATA_TYPE, PNAME_DATA_NAME, &dataInfo, useCopyright ? U_COPYRIGHT_STRING : 0, &status); if (U_FAILURE(status)) { die("Unable to create data memory"); } udata_writeBlock(pdata, data, length); int32_t dataLength = (int32_t) udata_finish(pdata, &status); if (U_FAILURE(status)) { die("Error writing output file"); } if (dataLength != length) { die("Written file doesn't match expected size"); } return dataLength; }
void PNamesBuilderImpl::writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return; } UNewDataMemory *pdata=udata_create(path, PNAME_DATA_TYPE, PNAME_DATA_NAME, &dataInfo, withCopyright ? U_COPYRIGHT_STRING : 0, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "genprops: udata_create(%s, pnames.icu) failed - %s\n", path, u_errorName(errorCode)); return; } udata_writeBlock(pdata, indexes, PropNameData::IX_COUNT*4); udata_writeBlock(pdata, valueMaps.getBuffer(), valueMaps.size()*4); udata_writeBlock(pdata, bytesTries.data(), bytesTries.length()); udata_writeBlock(pdata, nameGroups.data(), nameGroups.length()); int32_t dataLength=(int32_t)udata_finish(pdata, &errorCode); if(dataLength!=indexes[PropNameData::IX_TOTAL_SIZE]) { fprintf(stderr, "udata_finish(pnames.icu) reports %ld bytes written but should be %ld\n", (long)dataLength, (long)indexes[PropNameData::IX_TOTAL_SIZE]); errorCode=U_INTERNAL_PROGRAM_ERROR; } }
//---------------------------------------------------------------------------- // // main for gencfu // //---------------------------------------------------------------------------- int main(int argc, char **argv) { UErrorCode status = U_ZERO_ERROR; const char *confFileName; const char *confWSFileName; const char *outFileName; const char *outDir = NULL; const char *copyright = NULL; // // Pick up and check the command line arguments, // using the standard ICU tool utils option handling. // U_MAIN_INIT_ARGS(argc, argv); progName = argv[0]; argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options); if(argc<0) { // Unrecognized option fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]); usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); } if(options[0].doesOccur || options[1].doesOccur) { // -? or -h for help. usageAndDie(0); } if (!(options[3].doesOccur && options[4].doesOccur && options[5].doesOccur)) { fprintf(stderr, "confusables file, whole script confusables file and output file must all be specified.\n"); usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); } confFileName = options[3].value; confWSFileName = options[4].value; outFileName = options[5].value; if (options[6].doesOccur) { u_setDataDirectory(options[6].value); } status = U_ZERO_ERROR; /* Combine the directory with the file name */ if(options[7].doesOccur) { outDir = options[7].value; } if (options[8].doesOccur) { copyright = U_COPYRIGHT_STRING; } #if UCONFIG_NO_REGULAR_EXPRESSIONS || UCONFIG_NO_NORMALIZATION || UCONFIG_NO_FILE_IO // spoof detection data file parsing is dependent on regular expressions. // TODO: have the tool return an error status. Requires fixing the ICU data build // so that it doesn't abort entirely on that error. UNewDataMemory *pData; char msg[1024]; /* write message with just the name */ sprintf(msg, "gencfu writes dummy %s because of UCONFIG_NO_REGULAR_EXPRESSIONS and/or UCONFIG_NO_NORMALIZATION and/or UCONFIG_NO_FILE_IO, see uconfig.h", outFileName); fprintf(stderr, "%s\n", msg); /* write the dummy data file */ pData = udata_create(outDir, NULL, outFileName, &dummyDataInfo, NULL, &status); udata_writeBlock(pData, msg, strlen(msg)); udata_finish(pData, &status); return (int)status; #else /* Initialize ICU */ u_init(&status); if (U_FAILURE(status)) { fprintf(stderr, "%s: can not initialize ICU. status = %s\n", argv[0], u_errorName(status)); exit(1); } status = U_ZERO_ERROR; // Read in the confusables source file int32_t confusablesLen = 0; const char *confusables = readFile(confFileName, &confusablesLen); if (confusables == NULL) { printf("gencfu: error reading file \"%s\"\n", confFileName); exit(-1); } int32_t wsConfusablesLen = 0; const char *wsConfsables = readFile(confWSFileName, &wsConfusablesLen); if (wsConfsables == NULL) { printf("gencfu: error reading file \"%s\"\n", confFileName); exit(-1); } // // Create the Spoof Detector from the source confusables files. // This will compile the data. // UParseError parseError; parseError.line = 0; parseError.offset = 0; int32_t errType; USpoofChecker *sc = uspoof_openFromSource(confusables, confusablesLen, wsConfsables, wsConfusablesLen, &errType, &parseError, &status); if (U_FAILURE(status)) { const char *errFile = (errType == USPOOF_WHOLE_SCRIPT_CONFUSABLE)? confWSFileName : confFileName; fprintf(stderr, "gencfu: uspoof_openFromSource error \"%s\" at file %s, line %d, column %d\n", u_errorName(status), errFile, (int)parseError.line, (int)parseError.offset); exit(status); }; // // Get the compiled rule data from the USpoofChecker. // uint32_t outDataSize; uint8_t *outData; outDataSize = uspoof_serialize(sc, NULL, 0, &status); if (status != U_BUFFER_OVERFLOW_ERROR) { fprintf(stderr, "gencfu: uspoof_serialize() returned %s\n", u_errorName(status)); exit(status); } status = U_ZERO_ERROR; outData = new uint8_t[outDataSize]; uspoof_serialize(sc, outData, outDataSize, &status); // Copy the data format version numbers from the spoof data header into the UDataMemory header. uprv_memcpy(dh.info.formatVersion, reinterpret_cast<SpoofDataHeader *>(outData)->fFormatVersion, sizeof(dh.info.formatVersion)); // // Create the output file // size_t bytesWritten; UNewDataMemory *pData; pData = udata_create(outDir, NULL, outFileName, &(dh.info), copyright, &status); if(U_FAILURE(status)) { fprintf(stderr, "gencfu: Could not open output file \"%s\", \"%s\"\n", outFileName, u_errorName(status)); exit(status); } // Write the data itself. udata_writeBlock(pData, outData, outDataSize); // finish up bytesWritten = udata_finish(pData, &status); if(U_FAILURE(status)) { fprintf(stderr, "gencfu: Error %d writing the output file\n", status); exit(status); } if (bytesWritten != outDataSize) { fprintf(stderr, "gencfu: Error writing to output file \"%s\"\n", outFileName); exit(-1); } uspoof_close(sc); delete [] outData; delete [] confusables; delete [] wsConfsables; u_cleanup(); printf("gencfu: tool completed successfully.\n"); return 0; #endif // UCONFIG_NO_REGULAR_EXPRESSIONS }
//---------------------------------------------------------------------------- // // main for genctd // //---------------------------------------------------------------------------- int main(int argc, char **argv) { UErrorCode status = U_ZERO_ERROR; const char *wordFileName; const char *outFileName; const char *outDir = NULL; const char *copyright = NULL; // // Pick up and check the command line arguments, // using the standard ICU tool utils option handling. // U_MAIN_INIT_ARGS(argc, argv); progName = argv[0]; argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options); if(argc<0) { // Unrecognized option fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]); usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); } if(options[0].doesOccur || options[1].doesOccur) { // -? or -h for help. usageAndDie(0); } if (!options[3].doesOccur || argc < 2) { fprintf(stderr, "input and output file must both be specified.\n"); usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); } outFileName = options[3].value; wordFileName = argv[1]; if (options[4].doesOccur) { u_setDataDirectory(options[4].value); } status = U_ZERO_ERROR; /* Combine the directory with the file name */ if(options[5].doesOccur) { outDir = options[5].value; } if (options[6].doesOccur) { copyright = U_COPYRIGHT_STRING; } #if UCONFIG_NO_BREAK_ITERATION || UCONFIG_NO_FILE_IO UNewDataMemory *pData; char msg[1024]; /* write message with just the name */ sprintf(msg, "genctd writes dummy %s because of UCONFIG_NO_BREAK_ITERATION and/or UCONFIG_NO_FILE_IO, see uconfig.h", outFileName); fprintf(stderr, "%s\n", msg); /* write the dummy data file */ pData = udata_create(outDir, NULL, outFileName, &dummyDataInfo, NULL, &status); udata_writeBlock(pData, msg, strlen(msg)); udata_finish(pData, &status); return (int)status; #else /* Initialize ICU */ u_init(&status); if (U_FAILURE(status)) { fprintf(stderr, "%s: can not initialize ICU. status = %s\n", argv[0], u_errorName(status)); exit(1); } status = U_ZERO_ERROR; // // Read in the dictionary source file // long result; long wordFileSize; FILE *file; char *wordBufferC; MutableTrieDictionary *mtd = NULL; file = fopen(wordFileName, "rb"); if( file == 0 ) { //cannot find file //create 1-line dummy file: ie 1 char, 1 value UNewDataMemory *pData; char msg[1024]; /* write message with just the name */ sprintf(msg, "%s not found, genctd writes dummy %s", wordFileName, outFileName); fprintf(stderr, "%s\n", msg); UChar c = 0x0020; mtd = new MutableTrieDictionary(c, status, TRUE); mtd->addWord(&c, 1, status, 1); } else { //read words in from input file fseek(file, 0, SEEK_END); wordFileSize = ftell(file); fseek(file, 0, SEEK_SET); wordBufferC = new char[wordFileSize+10]; result = (long)fread(wordBufferC, 1, wordFileSize, file); if (result != wordFileSize) { fprintf(stderr, "Error reading file \"%s\"\n", wordFileName); exit (-1); } wordBufferC[wordFileSize]=0; fclose(file); // // Look for a Unicode Signature (BOM) on the word file // int32_t signatureLength; const char * wordSourceC = wordBufferC; const char* encoding = ucnv_detectUnicodeSignature( wordSourceC, wordFileSize, &signatureLength, &status); if (U_FAILURE(status)) { exit(status); } if(encoding!=NULL ){ wordSourceC += signatureLength; wordFileSize -= signatureLength; } // // Open a converter to take the rule file to UTF-16 // UConverter* conv; conv = ucnv_open(encoding, &status); if (U_FAILURE(status)) { fprintf(stderr, "ucnv_open: ICU Error \"%s\"\n", u_errorName(status)); exit(status); } // // Convert the words to UChar. // Preflight first to determine required buffer size. // uint32_t destCap = ucnv_toUChars(conv, NULL, // dest, 0, // destCapacity, wordSourceC, wordFileSize, &status); if (status != U_BUFFER_OVERFLOW_ERROR) { fprintf(stderr, "ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status)); exit(status); }; status = U_ZERO_ERROR; UChar *wordSourceU = new UChar[destCap+1]; ucnv_toUChars(conv, wordSourceU, // dest, destCap+1, wordSourceC, wordFileSize, &status); if (U_FAILURE(status)) { fprintf(stderr, "ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status)); exit(status); }; ucnv_close(conv); // Get rid of the original file buffer delete[] wordBufferC; // Create a MutableTrieDictionary, and loop through all the lines, inserting // words. // First, pick a median character. UChar *current = wordSourceU + (destCap/2); UChar uc = *current++; UnicodeSet breaks; breaks.add(0x000A); // Line Feed breaks.add(0x000D); // Carriage Return breaks.add(0x2028); // Line Separator breaks.add(0x2029); // Paragraph Separator do { // Look for line break while (uc && !breaks.contains(uc)) { uc = *current++; } // Now skip to first non-line-break while (uc && breaks.contains(uc)) { uc = *current++; } } while (uc && (breaks.contains(uc) || u_isspace(uc))); mtd = new MutableTrieDictionary(uc, status); if (U_FAILURE(status)) { fprintf(stderr, "new MutableTrieDictionary: ICU Error \"%s\"\n", u_errorName(status)); exit(status); } // Now add the words. Words are non-space characters at the beginning of // lines, and must be at least one UChar. If a word has an associated value, // the value should follow the word on the same line after a tab character. current = wordSourceU; UChar *candidate = current; uc = *current++; int32_t length = 0; int count = 0; while (uc) { while (uc && !u_isspace(uc)) { ++length; uc = *current++; } UnicodeString valueString; UChar candidateValue; if(uc == 0x0009){ //separator is a tab char, read in number after space while (uc && u_isspace(uc)) { uc = *current++; } while (uc && !u_isspace(uc)) { valueString.append(uc); uc = *current++; } } if (length > 0) { count++; if(valueString.length() > 0){ mtd->setValued(TRUE); uint32_t value = 0; char* s = new char[valueString.length()]; valueString.extract(0,valueString.length(), s, valueString.length()); int n = sscanf(s, "%ud", &value); U_ASSERT(n == 1); U_ASSERT(value >= 0); mtd->addWord(candidate, length, status, (uint16_t)value); delete[] s; } else { mtd->addWord(candidate, length, status); } if (U_FAILURE(status)) { fprintf(stderr, "MutableTrieDictionary::addWord: ICU Error \"%s\" at line %d in input file\n", u_errorName(status), count); exit(status); } } // Find beginning of next line while (uc && !breaks.contains(uc)) { uc = *current++; } // Find next non-line-breaking character while (uc && breaks.contains(uc)) { uc = *current++; } candidate = current-1; length = 0; } // Get rid of the Unicode text buffer delete[] wordSourceU; } // Now, create a CompactTrieDictionary from the mutable dictionary CompactTrieDictionary *ctd = new CompactTrieDictionary(*mtd, status); if (U_FAILURE(status)) { fprintf(stderr, "new CompactTrieDictionary: ICU Error \"%s\"\n", u_errorName(status)); exit(status); } // Get rid of the MutableTrieDictionary delete mtd; // // Get the binary data from the dictionary. // uint32_t outDataSize = ctd->dataSize(); const uint8_t *outData = (const uint8_t *)ctd->data(); // // Create the output file // size_t bytesWritten; UNewDataMemory *pData; pData = udata_create(outDir, NULL, outFileName, &(dh.info), copyright, &status); if(U_FAILURE(status)) { fprintf(stderr, "genctd: Could not open output file \"%s\", \"%s\"\n", outFileName, u_errorName(status)); exit(status); } // Write the data itself. udata_writeBlock(pData, outData, outDataSize); // finish up bytesWritten = udata_finish(pData, &status); if(U_FAILURE(status)) { fprintf(stderr, "genctd: error \"%s\" writing the output file\n", u_errorName(status)); exit(status); } if (bytesWritten != outDataSize) { fprintf(stderr, "Error writing to output file \"%s\"\n", outFileName); exit(-1); } // Get rid of the CompactTrieDictionary delete ctd; u_cleanup(); printf("genctd: tool completed successfully.\n"); return 0; #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ }
extern void generateData(const char *dataDir, UBool csource) { static int32_t indexes[UBIDI_IX_TOP]={ UBIDI_IX_TOP }; static uint8_t trieBlock[40000]; static uint8_t jgArray[0x300]; /* at most for U+0600..U+08FF */ const uint32_t *row; UChar32 start, end, prev, jgStart; int32_t i; UNewDataMemory *pData; UNewTrie *pTrie; UErrorCode errorCode=U_ZERO_ERROR; int32_t trieSize; long dataLength; makeMirror(); pTrie=utrie_open(NULL, NULL, 20000, 0, 0, TRUE); if(pTrie==NULL) { fprintf(stderr, "genbidi error: unable to create a UNewTrie\n"); exit(U_MEMORY_ALLOCATION_ERROR); } prev=jgStart=0; for(i=0; (row=upvec_getRow(pv, i, &start, &end))!=NULL && start<UPVEC_FIRST_SPECIAL_CP; ++i) { /* store most values from vector column 0 in the trie */ if(!utrie_setRange32(pTrie, start, end+1, *row, TRUE)) { fprintf(stderr, "genbidi error: unable to set trie value (overflow)\n"); exit(U_BUFFER_OVERFLOW_ERROR); } /* store Joining_Group values from vector column 1 in a simple byte array */ if(row[1]!=0) { if(start<0x600 || 0x8ff<end) { fprintf(stderr, "genbidi error: Joining_Group for out-of-range code points U+%04lx..U+%04lx\n", (long)start, (long)end); exit(U_ILLEGAL_ARGUMENT_ERROR); } if(prev==0) { /* first code point with any value */ prev=jgStart=start; } else { /* add No_Joining_Group for code points between prev and start */ while(prev<start) { jgArray[prev++ -jgStart]=0; } } /* set Joining_Group value for start..end */ while(prev<=end) { jgArray[prev++ -jgStart]=(uint8_t)row[1]; } } } /* finish jgArray, pad to multiple of 4 */ while((prev-jgStart)&3) { jgArray[prev++ -jgStart]=0; } indexes[UBIDI_IX_JG_START]=jgStart; indexes[UBIDI_IX_JG_LIMIT]=prev; trieSize=utrie_serialize(pTrie, trieBlock, sizeof(trieBlock), NULL, TRUE, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "genbidi error: utrie_serialize failed: %s (length %ld)\n", u_errorName(errorCode), (long)trieSize); exit(errorCode); } indexes[UBIDI_IX_TRIE_SIZE]=trieSize; indexes[UBIDI_IX_MIRROR_LENGTH]=mirrorTop; indexes[UBIDI_IX_LENGTH]= (int32_t)sizeof(indexes)+ trieSize+ 4*mirrorTop+ (prev-jgStart); if(beVerbose) { printf("trie size in bytes: %5d\n", (int)trieSize); printf("size in bytes of mirroring table: %5d\n", (int)(4*mirrorTop)); printf("length of Joining_Group array: %5d (U+%04x..U+%04x)\n", (int)(prev-jgStart), (int)jgStart, (int)(prev-1)); printf("data size: %5d\n", (int)indexes[UBIDI_IX_LENGTH]); } indexes[UBIDI_MAX_VALUES_INDEX]= ((int32_t)U_CHAR_DIRECTION_COUNT-1)| (((int32_t)U_JT_COUNT-1)<<UBIDI_JT_SHIFT)| (((int32_t)U_JG_COUNT-1)<<UBIDI_MAX_JG_SHIFT); if(csource) { /* write .c file for hardcoded data */ UTrie trie={ NULL }; UTrie2 *trie2; FILE *f; utrie_unserialize(&trie, trieBlock, trieSize, &errorCode); if(U_FAILURE(errorCode)) { fprintf( stderr, "genbidi error: failed to utrie_unserialize(ubidi.icu trie) - %s\n", u_errorName(errorCode)); exit(errorCode); } /* use UTrie2 */ dataInfo.formatVersion[0]=2; dataInfo.formatVersion[2]=0; dataInfo.formatVersion[3]=0; trie2=utrie2_fromUTrie(&trie, 0, &errorCode); if(U_FAILURE(errorCode)) { fprintf( stderr, "genbidi error: utrie2_fromUTrie() failed - %s\n", u_errorName(errorCode)); exit(errorCode); } { /* delete lead surrogate code unit values */ UChar lead; trie2=utrie2_cloneAsThawed(trie2, &errorCode); for(lead=0xd800; lead<0xdc00; ++lead) { utrie2_set32ForLeadSurrogateCodeUnit(trie2, lead, trie2->initialValue, &errorCode); } utrie2_freeze(trie2, UTRIE2_16_VALUE_BITS, &errorCode); if(U_FAILURE(errorCode)) { fprintf( stderr, "genbidi error: deleting lead surrogate code unit values failed - %s\n", u_errorName(errorCode)); exit(errorCode); } } f=usrc_create(dataDir, "ubidi_props_data.c"); if(f!=NULL) { usrc_writeArray(f, "static const UVersionInfo ubidi_props_dataVersion={", dataInfo.dataVersion, 8, 4, "};\n\n"); usrc_writeArray(f, "static const int32_t ubidi_props_indexes[UBIDI_IX_TOP]={", indexes, 32, UBIDI_IX_TOP, "};\n\n"); usrc_writeUTrie2Arrays(f, "static const uint16_t ubidi_props_trieIndex[%ld]={\n", NULL, trie2, "\n};\n\n"); usrc_writeArray(f, "static const uint32_t ubidi_props_mirrors[%ld]={\n", mirrors, 32, mirrorTop, "\n};\n\n"); usrc_writeArray(f, "static const uint8_t ubidi_props_jgArray[%ld]={\n", jgArray, 8, prev-jgStart, "\n};\n\n"); fputs( "static const UBiDiProps ubidi_props_singleton={\n" " NULL,\n" " ubidi_props_indexes,\n" " ubidi_props_mirrors,\n" " ubidi_props_jgArray,\n", f); usrc_writeUTrie2Struct(f, " {\n", trie2, "ubidi_props_trieIndex", NULL, " },\n"); usrc_writeArray(f, " { ", dataInfo.formatVersion, 8, 4, " }\n"); fputs("};\n", f); fclose(f); } utrie2_close(trie2); } else { /* write the data */ pData=udata_create(dataDir, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "genbidi: unable to create data memory, %s\n", u_errorName(errorCode)); exit(errorCode); } udata_writeBlock(pData, indexes, sizeof(indexes)); udata_writeBlock(pData, trieBlock, trieSize); udata_writeBlock(pData, mirrors, 4*mirrorTop); udata_writeBlock(pData, jgArray, prev-jgStart); /* finish up */ dataLength=udata_finish(pData, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "genbidi: error %d writing the output file\n", errorCode); exit(errorCode); } if(dataLength!=indexes[UBIDI_IX_LENGTH]) { fprintf(stderr, "genbidi: data length %ld != calculated size %d\n", dataLength, (int)indexes[UBIDI_IX_LENGTH]); exit(U_INTERNAL_PROGRAM_ERROR); } } utrie_close(pTrie); upvec_close(pv); }
static uint32_t CnvExtWrite(NewConverter *cnvData, const UConverterStaticData *staticData, UNewDataMemory *pData, int32_t tableType) { CnvExtData *extData=(CnvExtData *)cnvData; int32_t length, top, headerSize; int32_t indexes[UCNV_EXT_INDEXES_MIN_LENGTH]={ 0 }; if(tableType&TABLE_BASE) { headerSize=0; } else { _MBCSHeader header={ { 0, 0, 0, 0 }, 0, 0, 0, 0, 0, 0, 0 }; /* write the header and base table name for an extension-only table */ length=(int32_t)uprv_strlen(extData->ucm->baseName)+1; while(length&3) { /* add padding */ extData->ucm->baseName[length++]=0; } headerSize=sizeof(header)+length; /* fill the header */ header.version[0]=4; header.version[1]=2; header.flags=(uint32_t)((headerSize<<8)|MBCS_OUTPUT_EXT_ONLY); /* write the header and the base table name */ udata_writeBlock(pData, &header, sizeof(header)); udata_writeBlock(pData, extData->ucm->baseName, length); } /* fill indexes[] - offsets/indexes are in units of the target array */ top=0; indexes[UCNV_EXT_INDEXES_LENGTH]=length=UCNV_EXT_INDEXES_MIN_LENGTH; top+=length*4; indexes[UCNV_EXT_TO_U_INDEX]=top; indexes[UCNV_EXT_TO_U_LENGTH]=length=utm_countItems(extData->toUTable); top+=length*4; indexes[UCNV_EXT_TO_U_UCHARS_INDEX]=top; indexes[UCNV_EXT_TO_U_UCHARS_LENGTH]=length=utm_countItems(extData->toUUChars); top+=length*2; indexes[UCNV_EXT_FROM_U_UCHARS_INDEX]=top; length=utm_countItems(extData->fromUTableUChars); top+=length*2; if(top&3) { /* add padding */ *((UChar *)utm_alloc(extData->fromUTableUChars))=0; *((uint32_t *)utm_alloc(extData->fromUTableValues))=0; ++length; top+=2; } indexes[UCNV_EXT_FROM_U_LENGTH]=length; indexes[UCNV_EXT_FROM_U_VALUES_INDEX]=top; top+=length*4; indexes[UCNV_EXT_FROM_U_BYTES_INDEX]=top; length=utm_countItems(extData->fromUBytes); top+=length; if(top&1) { /* add padding */ *((uint8_t *)utm_alloc(extData->fromUBytes))=0; ++length; ++top; } indexes[UCNV_EXT_FROM_U_BYTES_LENGTH]=length; indexes[UCNV_EXT_FROM_U_STAGE_12_INDEX]=top; indexes[UCNV_EXT_FROM_U_STAGE_1_LENGTH]=length=extData->stage1Top; indexes[UCNV_EXT_FROM_U_STAGE_12_LENGTH]=length+=extData->stage2Top; top+=length*2; indexes[UCNV_EXT_FROM_U_STAGE_3_INDEX]=top; length=extData->stage3Top; top+=length*2; if(top&3) { /* add padding */ extData->stage3[extData->stage3Top++]=0; ++length; top+=2; } indexes[UCNV_EXT_FROM_U_STAGE_3_LENGTH]=length; indexes[UCNV_EXT_FROM_U_STAGE_3B_INDEX]=top; indexes[UCNV_EXT_FROM_U_STAGE_3B_LENGTH]=length=extData->stage3bTop; top+=length*4; indexes[UCNV_EXT_SIZE]=top; /* statistics */ indexes[UCNV_EXT_COUNT_BYTES]= (extData->maxInBytes<<16)| (extData->maxOutBytes<<8)| extData->maxBytesPerUChar; indexes[UCNV_EXT_COUNT_UCHARS]= (extData->maxInUChars<<16)| (extData->maxOutUChars<<8)| extData->maxUCharsPerByte; indexes[UCNV_EXT_FLAGS]=extData->ucm->ext->unicodeMask; /* write the extension data */ udata_writeBlock(pData, indexes, sizeof(indexes)); udata_writeBlock(pData, utm_getStart(extData->toUTable), indexes[UCNV_EXT_TO_U_LENGTH]*4); udata_writeBlock(pData, utm_getStart(extData->toUUChars), indexes[UCNV_EXT_TO_U_UCHARS_LENGTH]*2); udata_writeBlock(pData, utm_getStart(extData->fromUTableUChars), indexes[UCNV_EXT_FROM_U_LENGTH]*2); udata_writeBlock(pData, utm_getStart(extData->fromUTableValues), indexes[UCNV_EXT_FROM_U_LENGTH]*4); udata_writeBlock(pData, utm_getStart(extData->fromUBytes), indexes[UCNV_EXT_FROM_U_BYTES_LENGTH]); udata_writeBlock(pData, extData->stage1, extData->stage1Top*2); udata_writeBlock(pData, extData->stage2, extData->stage2Top*2); udata_writeBlock(pData, extData->stage3, extData->stage3Top*2); udata_writeBlock(pData, extData->stage3b, extData->stage3bTop*4); #if 0 { int32_t i, j; length=extData->stage1Top; printf("\nstage1[%x]:\n", length); for(i=0; i<length; ++i) { if(extData->stage1[i]!=length) { printf("stage1[%04x]=%04x\n", i, extData->stage1[i]); } } j=length; length=extData->stage2Top; printf("\nstage2[%x]:\n", length); for(i=0; i<length; ++j, ++i) { if(extData->stage2[i]!=0) { printf("stage12[%04x]=%04x\n", j, extData->stage2[i]); } } length=extData->stage3Top; printf("\nstage3[%x]:\n", length); for(i=0; i<length; ++i) { if(extData->stage3[i]!=0) { printf("stage3[%04x]=%04x\n", i, extData->stage3[i]); } } length=extData->stage3bTop; printf("\nstage3b[%x]:\n", length); for(i=0; i<length; ++i) { if(extData->stage3b[i]!=0) { printf("stage3b[%04x]=%08x\n", i, extData->stage3b[i]); } } } #endif if(VERBOSE) { printf("size of extension data: %ld\n", (long)top); } /* return the number of bytes that should have been written */ return (uint32_t)(headerSize+top); }
U_CAPI void U_EXPORT2 createCommonDataFile(const char *destDir, const char *name, const char *entrypointName, const char *type, const char *source, const char *copyRight, const char *dataFile, uint32_t max_size, UBool sourceTOC, UBool verbose, char *gencmnFileName) { static char buffer[4096]; char *line; char *linePtr; char *s = NULL; UErrorCode errorCode=U_ZERO_ERROR; uint32_t i, fileOffset, basenameOffset, length, nread; FileStream *in, *file; line = (char *)uprv_malloc(sizeof(char) * LINE_BUFFER_SIZE); if (line == NULL) { fprintf(stderr, "gencmn: unable to allocate memory for line buffer of size %d\n", LINE_BUFFER_SIZE); exit(U_MEMORY_ALLOCATION_ERROR); } linePtr = line; maxSize = max_size; if (destDir == NULL) { destDir = u_getDataDirectory(); } if (name == NULL) { name = COMMON_DATA_NAME; } if (type == NULL) { type = DATA_TYPE; } if (source == NULL) { source = "."; } if (dataFile == NULL) { in = T_FileStream_stdin(); } else { in = T_FileStream_open(dataFile, "r"); if(in == NULL) { fprintf(stderr, "gencmn: unable to open input file %s\n", dataFile); exit(U_FILE_ACCESS_ERROR); } } if (verbose) { if(sourceTOC) { printf("generating %s_%s.c (table of contents source file)\n", name, type); } else { printf("generating %s.%s (common data file with table of contents)\n", name, type); } } /* read the list of files and get their lengths */ while((s != NULL && *s != 0) || (s=T_FileStream_readLine(in, (line=linePtr), LINE_BUFFER_SIZE))!=NULL) { /* remove trailing newline characters and parse space separated items */ if (s != NULL && *s != 0) { line=s; } else { s=line; } while(*s!=0) { if(*s==' ') { *s=0; ++s; break; } else if(*s=='\r' || *s=='\n') { *s=0; break; } ++s; } /* check for comment */ if (*line == '#') { continue; } /* add the file */ #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) { char *t; while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) { *t = U_FILE_SEP_CHAR; } } #endif addFile(getLongPathname(line), name, source, sourceTOC, verbose); } uprv_free(linePtr); if(in!=T_FileStream_stdin()) { T_FileStream_close(in); } if(fileCount==0) { fprintf(stderr, "gencmn: no files listed in %s\n", dataFile == NULL ? "<stdin>" : dataFile); return; } /* sort the files by basename */ qsort(files, fileCount, sizeof(File), compareFiles); if(!sourceTOC) { UNewDataMemory *out; /* determine the offsets of all basenames and files in this common one */ basenameOffset=4+8*fileCount; fileOffset=(basenameOffset+(basenameTotal+15))&~0xf; for(i=0; i<fileCount; ++i) { files[i].fileOffset=fileOffset; fileOffset+=(files[i].fileSize+15)&~0xf; files[i].basenameOffset=basenameOffset; basenameOffset+=files[i].basenameLength; } /* create the output file */ out=udata_create(destDir, type, name, &dataInfo, copyRight == NULL ? U_COPYRIGHT_STRING : copyRight, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n", destDir, name, type, u_errorName(errorCode)); exit(errorCode); } /* write the table of contents */ udata_write32(out, fileCount); for(i=0; i<fileCount; ++i) { udata_write32(out, files[i].basenameOffset); udata_write32(out, files[i].fileOffset); } /* write the basenames */ for(i=0; i<fileCount; ++i) { udata_writeString(out, files[i].basename, files[i].basenameLength); } length=4+8*fileCount+basenameTotal; /* copy the files */ for(i=0; i<fileCount; ++i) { /* pad to 16-align the next file */ length&=0xf; if(length!=0) { udata_writePadding(out, 16-length); } if (verbose) { printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s"); } /* copy the next file */ file=T_FileStream_open(files[i].pathname, "rb"); if(file==NULL) { fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname); exit(U_FILE_ACCESS_ERROR); } for(nread = 0;;) { length=T_FileStream_read(file, buffer, sizeof(buffer)); if(length <= 0) { break; } nread += length; udata_writeBlock(out, buffer, length); } T_FileStream_close(file); length=files[i].fileSize; if (nread != files[i].fileSize) { fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname, (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s"); exit(U_FILE_ACCESS_ERROR); } } /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */ length&=0xf; if(length!=0) { udata_writePadding(out, 16-length); } /* finish */ udata_finish(out, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode)); exit(errorCode); } } else { /* write a .c source file with the table of contents */ char *filename; FileStream *out; /* create the output filename */ filename=s=buffer; uprv_strcpy(filename, destDir); s=filename+uprv_strlen(filename); if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) { *s++=U_FILE_SEP_CHAR; } uprv_strcpy(s, name); if(*(type)!=0) { s+=uprv_strlen(s); *s++='_'; uprv_strcpy(s, type); } s+=uprv_strlen(s); uprv_strcpy(s, ".c"); /* open the output file */ out=T_FileStream_open(filename, "w"); if (gencmnFileName != NULL) { uprv_strcpy(gencmnFileName, filename); } if(out==NULL) { fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename); exit(U_FILE_ACCESS_ERROR); } /* write the source file */ sprintf(buffer, "/*\n" " * ICU common data table of contents for %s.%s\n" " * Automatically generated by icu/source/tools/gencmn/gencmn .\n" " */\n\n" "#include \"unicode/utypes.h\"\n" "#include \"unicode/udata.h\"\n" "\n" "/* external symbol declarations for data (%d files) */\n", name, type, fileCount); T_FileStream_writeLine(out, buffer); sprintf(buffer, "extern const char\n %s%s[]", symPrefix?symPrefix:"", files[0].pathname); T_FileStream_writeLine(out, buffer); for(i=1; i<fileCount; ++i) { sprintf(buffer, ",\n %s%s[]", symPrefix?symPrefix:"", files[i].pathname); T_FileStream_writeLine(out, buffer); } T_FileStream_writeLine(out, ";\n\n"); sprintf( buffer, "U_EXPORT struct {\n" " uint16_t headerSize;\n" " uint8_t magic1, magic2;\n" " UDataInfo info;\n" " char padding[%lu];\n" " uint32_t count, reserved;\n" " struct {\n" " const char *name;\n" " const void *data;\n" " } toc[%lu];\n" "} U_EXPORT2 %s_dat = {\n" " 32, 0xda, 0x27, {\n" " %lu, 0,\n" " %u, %u, %u, 0,\n" " {0x54, 0x6f, 0x43, 0x50},\n" " {1, 0, 0, 0},\n" " {0, 0, 0, 0}\n" " },\n" " \"\", %lu, 0, {\n", (unsigned long)32-4-sizeof(UDataInfo), (unsigned long)fileCount, entrypointName, (unsigned long)sizeof(UDataInfo), U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, U_SIZEOF_UCHAR, (unsigned long)fileCount ); T_FileStream_writeLine(out, buffer); sprintf(buffer, " { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname); T_FileStream_writeLine(out, buffer); for(i=1; i<fileCount; ++i) { sprintf(buffer, ",\n { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname); T_FileStream_writeLine(out, buffer); } T_FileStream_writeLine(out, "\n }\n};\n"); T_FileStream_close(out); uprv_free(symPrefix); } }
extern void generateData(const char *dataDir, UBool csource) { static int32_t indexes[UPROPS_INDEX_COUNT]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; static uint8_t trieBlock[40000]; static uint8_t additionalProps[120000]; UNewDataMemory *pData; UErrorCode errorCode=U_ZERO_ERROR; uint32_t size = 0; int32_t trieSize, additionalPropsSize, offset; long dataLength; trieSize=utrie_serialize(pTrie, trieBlock, sizeof(trieBlock), NULL, TRUE, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "error: utrie_serialize failed: %s (length %ld)\n", u_errorName(errorCode), (long)trieSize); exit(errorCode); } offset=sizeof(indexes)/4; /* uint32_t offset to the properties trie */ /* round up trie size to 4-alignment */ trieSize=(trieSize+3)&~3; offset+=trieSize>>2; indexes[UPROPS_PROPS32_INDEX]= /* set indexes to the same offsets for empty */ indexes[UPROPS_EXCEPTIONS_INDEX]= /* structures from the old format version 3 */ indexes[UPROPS_EXCEPTIONS_TOP_INDEX]= /* so that less runtime code has to be changed */ indexes[UPROPS_ADDITIONAL_TRIE_INDEX]=offset; if(beVerbose) { printf("trie size in bytes: %5u\n", (int)trieSize); } if(csource) { /* write .c file for hardcoded data */ UTrie trie={ NULL }; UTrie2 *trie2; FILE *f; utrie_unserialize(&trie, trieBlock, trieSize, &errorCode); if(U_FAILURE(errorCode)) { fprintf( stderr, "genprops error: failed to utrie_unserialize(uprops.icu main trie) - %s\n", u_errorName(errorCode)); exit(errorCode); } /* use UTrie2 */ trie2=utrie2_fromUTrie(&trie, 0, &errorCode); if(U_FAILURE(errorCode)) { fprintf( stderr, "genprops error: utrie2_fromUTrie() failed - %s\n", u_errorName(errorCode)); exit(errorCode); } { /* delete lead surrogate code unit values */ UChar lead; trie2=utrie2_cloneAsThawed(trie2, &errorCode); for(lead=0xd800; lead<0xdc00; ++lead) { utrie2_set32ForLeadSurrogateCodeUnit(trie2, lead, trie2->initialValue, &errorCode); } utrie2_freeze(trie2, UTRIE2_16_VALUE_BITS, &errorCode); if(U_FAILURE(errorCode)) { fprintf( stderr, "genprops error: deleting lead surrogate code unit values failed - %s\n", u_errorName(errorCode)); exit(errorCode); } } f=usrc_create(dataDir, "uchar_props_data.c"); if(f!=NULL) { /* unused usrc_writeArray(f, "static const UVersionInfo formatVersion={", dataInfo.formatVersion, 8, 4, "};\n\n"); */ usrc_writeArray(f, "static const UVersionInfo dataVersion={", dataInfo.dataVersion, 8, 4, "};\n\n"); usrc_writeUTrie2Arrays(f, "static const uint16_t propsTrie_index[%ld]={\n", NULL, trie2, "\n};\n\n"); usrc_writeUTrie2Struct(f, "static const UTrie2 propsTrie={\n", trie2, "propsTrie_index", NULL, "};\n\n"); additionalPropsSize=writeAdditionalData(f, additionalProps, sizeof(additionalProps), indexes); size=4*offset+additionalPropsSize; /* total size of data */ usrc_writeArray(f, "static const int32_t indexes[UPROPS_INDEX_COUNT]={", indexes, 32, UPROPS_INDEX_COUNT, "};\n\n"); fclose(f); } utrie2_close(trie2); } else { /* write the data */ pData=udata_create(dataDir, DATA_TYPE, DATA_NAME, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "genprops: unable to create data memory, %s\n", u_errorName(errorCode)); exit(errorCode); } additionalPropsSize=writeAdditionalData(NULL, additionalProps, sizeof(additionalProps), indexes); size=4*offset+additionalPropsSize; /* total size of data */ udata_writeBlock(pData, indexes, sizeof(indexes)); udata_writeBlock(pData, trieBlock, trieSize); udata_writeBlock(pData, additionalProps, additionalPropsSize); /* finish up */ dataLength=udata_finish(pData, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "genprops: error %d writing the output file\n", errorCode); exit(errorCode); } if(dataLength!=(long)size) { fprintf(stderr, "genprops: data length %ld != calculated size %lu\n", dataLength, (unsigned long)size); exit(U_INTERNAL_PROGRAM_ERROR); } } if(beVerbose) { printf("data size: %6lu\n", (unsigned long)size); } }
//---------------------------------------------------------------------------- // // main for gendict // //---------------------------------------------------------------------------- int main(int argc, char **argv) { // // Pick up and check the command line arguments, // using the standard ICU tool utils option handling. // U_MAIN_INIT_ARGS(argc, argv); progName = argv[0]; argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options); if(argc<0) { // Unrecognized option fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]); usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); } if(options[ARG_HELP].doesOccur || options[ARG_QMARK].doesOccur) { // -? or -h for help. usageAndDie(U_ZERO_ERROR); } UBool verbose = options[ARG_VERBOSE].doesOccur; if (argc < 3) { fprintf(stderr, "input and output file must both be specified.\n"); usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); } const char *outFileName = argv[2]; const char *wordFileName = argv[1]; startTime = uprv_getRawUTCtime(); // initialize start timer if (options[ARG_ICUDATADIR].doesOccur) { u_setDataDirectory(options[ARG_ICUDATADIR].value); } const char *copyright = NULL; if (options[ARG_COPYRIGHT].doesOccur) { copyright = U_COPYRIGHT_STRING; } if (options[ARG_UCHARS].doesOccur == options[ARG_BYTES].doesOccur) { fprintf(stderr, "you must specify exactly one type of trie to output!\n"); usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); } UBool isBytesTrie = options[ARG_BYTES].doesOccur; if (isBytesTrie != options[ARG_TRANSFORM].doesOccur) { fprintf(stderr, "you must provide a transformation for a bytes trie, and must not provide one for a uchars trie!\n"); usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); } IcuToolErrorCode status("gendict/main()"); #if UCONFIG_NO_BREAK_ITERATION || UCONFIG_NO_FILE_IO const char* outDir=NULL; UNewDataMemory *pData; char msg[1024]; UErrorCode tempstatus = U_ZERO_ERROR; /* write message with just the name */ // potential for a buffer overflow here... sprintf(msg, "gendict writes dummy %s because of UCONFIG_NO_BREAK_ITERATION and/or UCONFIG_NO_FILE_IO, see uconfig.h", outFileName); fprintf(stderr, "%s\n", msg); /* write the dummy data file */ pData = udata_create(outDir, NULL, outFileName, &dataInfo, NULL, &tempstatus); udata_writeBlock(pData, msg, strlen(msg)); udata_finish(pData, &tempstatus); return (int)tempstatus; #else // Read in the dictionary source file if (verbose) { printf("Opening file %s...\n", wordFileName); } const char *codepage = "UTF-8"; UCHARBUF *f = ucbuf_open(wordFileName, &codepage, TRUE, FALSE, status); if (status.isFailure()) { fprintf(stderr, "error opening input file: ICU Error \"%s\"\n", status.errorName()); exit(status.reset()); } if (verbose) { printf("Initializing dictionary builder of type %s...\n", (isBytesTrie ? "BytesTrie" : "UCharsTrie")); } DataDict dict(isBytesTrie, status); if (status.isFailure()) { fprintf(stderr, "new DataDict: ICU Error \"%s\"\n", status.errorName()); exit(status.reset()); } if (options[ARG_TRANSFORM].doesOccur) { dict.setTransform(options[ARG_TRANSFORM].value); } UnicodeString fileLine; if (verbose) { puts("Adding words to dictionary..."); } UBool hasValues = FALSE; UBool hasValuelessContents = FALSE; int lineCount = 0; int wordCount = 0; int minlen = 255; int maxlen = 0; UBool isOk = TRUE; while (readLine(f, fileLine, status)) { lineCount++; if (fileLine.isEmpty()) continue; // Parse word [spaces value]. int32_t keyLen; for (keyLen = 0; keyLen < fileLine.length() && !u_isspace(fileLine[keyLen]); ++keyLen) {} if (keyLen == 0) { fprintf(stderr, "Error: no word on line %i!\n", lineCount); isOk = FALSE; continue; } int32_t valueStart; for (valueStart = keyLen; valueStart < fileLine.length() && u_isspace(fileLine[valueStart]); ++valueStart) {} if (keyLen < valueStart) { int32_t valueLength = fileLine.length() - valueStart; if (valueLength > 15) { fprintf(stderr, "Error: value too long on line %i!\n", lineCount); isOk = FALSE; continue; } char s[16]; fileLine.extract(valueStart, valueLength, s, 16, US_INV); char *end; unsigned long value = uprv_strtoul(s, &end, 0); if (end == s || *end != 0 || (int32_t)uprv_strlen(s) != valueLength || value > 0xffffffff) { fprintf(stderr, "Error: value syntax error or value too large on line %i!\n", lineCount); isOk = FALSE; continue; } dict.addWord(fileLine.tempSubString(0, keyLen), (int32_t)value, status); hasValues = TRUE; wordCount++; if (keyLen < minlen) minlen = keyLen; if (keyLen > maxlen) maxlen = keyLen; } else { dict.addWord(fileLine.tempSubString(0, keyLen), 0, status); hasValuelessContents = TRUE; wordCount++; if (keyLen < minlen) minlen = keyLen; if (keyLen > maxlen) maxlen = keyLen; } if (status.isFailure()) { fprintf(stderr, "ICU Error \"%s\": Failed to add word to trie at input line %d in input file\n", status.errorName(), lineCount); exit(status.reset()); } } if (verbose) { printf("Processed %d lines, added %d words, minlen %d, maxlen %d\n", lineCount, wordCount, minlen, maxlen); } if (!isOk && status.isSuccess()) { status.set(U_ILLEGAL_ARGUMENT_ERROR); } if (hasValues && hasValuelessContents) { fprintf(stderr, "warning: file contained both valued and unvalued strings!\n"); } if (verbose) { printf("Serializing data...isBytesTrie? %d\n", isBytesTrie); } int32_t outDataSize; const void *outData; UnicodeString usp; if (isBytesTrie) { StringPiece sp = dict.serializeBytes(status); outDataSize = sp.size(); outData = sp.data(); } else { dict.serializeUChars(usp, status); outDataSize = usp.length() * U_SIZEOF_UCHAR; outData = usp.getBuffer(); } if (status.isFailure()) { fprintf(stderr, "gendict: got failure of type %s while serializing, if U_ILLEGAL_ARGUMENT_ERROR possibly due to duplicate dictionary entries\n", status.errorName()); exit(status.reset()); } if (verbose) { puts("Opening output file..."); } UNewDataMemory *pData = udata_create(NULL, NULL, outFileName, &dataInfo, copyright, status); if (status.isFailure()) { fprintf(stderr, "gendict: could not open output file \"%s\", \"%s\"\n", outFileName, status.errorName()); exit(status.reset()); } if (verbose) { puts("Writing to output file..."); } int32_t indexes[DictionaryData::IX_COUNT] = { DictionaryData::IX_COUNT * sizeof(int32_t), 0, 0, 0, 0, 0, 0, 0 }; int32_t size = outDataSize + indexes[DictionaryData::IX_STRING_TRIE_OFFSET]; indexes[DictionaryData::IX_RESERVED1_OFFSET] = size; indexes[DictionaryData::IX_RESERVED2_OFFSET] = size; indexes[DictionaryData::IX_TOTAL_SIZE] = size; indexes[DictionaryData::IX_TRIE_TYPE] = isBytesTrie ? DictionaryData::TRIE_TYPE_BYTES : DictionaryData::TRIE_TYPE_UCHARS; if (hasValues) { indexes[DictionaryData::IX_TRIE_TYPE] |= DictionaryData::TRIE_HAS_VALUES; } indexes[DictionaryData::IX_TRANSFORM] = dict.getTransform(); udata_writeBlock(pData, indexes, sizeof(indexes)); udata_writeBlock(pData, outData, outDataSize); size_t bytesWritten = udata_finish(pData, status); if (status.isFailure()) { fprintf(stderr, "gendict: error \"%s\" writing the output file\n", status.errorName()); exit(status.reset()); } if (bytesWritten != (size_t)size) { fprintf(stderr, "Error writing to output file \"%s\"\n", outFileName); exit(U_INTERNAL_PROGRAM_ERROR); } printf("%s: done writing\t%s (%ds).\n", progName, outFileName, elapsedTime()); #ifdef TEST_GENDICT if (isBytesTrie) { BytesTrie::Iterator it(outData, outDataSize, status); while (it.hasNext()) { it.next(status); const StringPiece s = it.getString(); int32_t val = it.getValue(); printf("%s -> %i\n", s.data(), val); } } else { UCharsTrie::Iterator it((const UChar *)outData, outDataSize, status); while (it.hasNext()) { it.next(status); const UnicodeString s = it.getString(); int32_t val = it.getValue(); char tmp[1024]; s.extract(0, s.length(), tmp, 1024); printf("%s -> %i\n", tmp, val); } } #endif return 0; #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ }
//---------------------------------------------------------------------------- // // main for genbrk // //---------------------------------------------------------------------------- int main(int argc, char **argv) { UErrorCode status = U_ZERO_ERROR; const char *ruleFileName; const char *outFileName; const char *outDir = NULL; const char *copyright = NULL; // // Pick up and check the command line arguments, // using the standard ICU tool utils option handling. // U_MAIN_INIT_ARGS(argc, argv); progName = argv[0]; argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options); if(argc<0) { // Unrecognized option fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]); usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); } if(options[0].doesOccur || options[1].doesOccur) { // -? or -h for help. usageAndDie(0); } if (!(options[3].doesOccur && options[4].doesOccur)) { fprintf(stderr, "rule file and output file must both be specified.\n"); usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); } ruleFileName = options[3].value; outFileName = options[4].value; if (options[5].doesOccur) { u_setDataDirectory(options[5].value); } /* Initialize ICU */ u_init(&status); if (U_FAILURE(status)) { fprintf(stderr, "%s: can not initialize ICU. status = %s\n", argv[0], u_errorName(status)); exit(1); } status = U_ZERO_ERROR; /* Combine the directory with the file name */ if(options[6].doesOccur) { outDir = options[6].value; } if (options[7].doesOccur) { copyright = U_COPYRIGHT_STRING; } #if UCONFIG_NO_BREAK_ITERATION UNewDataMemory *pData; char msg[1024]; /* write message with just the name */ sprintf(msg, "genbrk writes dummy %s because of UCONFIG_NO_BREAK_ITERATION, see uconfig.h", outFileName); fprintf(stderr, "%s\n", msg); /* write the dummy data file */ pData = udata_create(outDir, NULL, outFileName, &dummyDataInfo, NULL, &status); udata_writeBlock(pData, msg, strlen(msg)); udata_finish(pData, &status); return (int)status; #else // // Read in the rule source file // long result; long ruleFileSize; FILE *file; char *ruleBufferC; file = fopen(ruleFileName, "rb"); if( file == 0 ) { fprintf(stderr, "Could not open file \"%s\"\n", ruleFileName); exit(-1); } fseek(file, 0, SEEK_END); ruleFileSize = ftell(file); fseek(file, 0, SEEK_SET); ruleBufferC = new char[ruleFileSize+10]; result = (long)fread(ruleBufferC, 1, ruleFileSize, file); if (result != ruleFileSize) { fprintf(stderr, "Error reading file \"%s\"\n", ruleFileName); exit (-1); } ruleBufferC[ruleFileSize]=0; fclose(file); // // Look for a Unicode Signature (BOM) on the rule file // int32_t signatureLength; const char * ruleSourceC = ruleBufferC; const char* encoding = ucnv_detectUnicodeSignature( ruleSourceC, ruleFileSize, &signatureLength, &status); if (U_FAILURE(status)) { exit(status); } if(encoding!=NULL ){ ruleSourceC += signatureLength; ruleFileSize -= signatureLength; } // // Open a converter to take the rule file to UTF-16 // UConverter* conv; conv = ucnv_open(encoding, &status); if (U_FAILURE(status)) { fprintf(stderr, "ucnv_open: ICU Error \"%s\"\n", u_errorName(status)); exit(status); } // // Convert the rules to UChar. // Preflight first to determine required buffer size. // uint32_t destCap = ucnv_toUChars(conv, NULL, // dest, 0, // destCapacity, ruleSourceC, ruleFileSize, &status); if (status != U_BUFFER_OVERFLOW_ERROR) { fprintf(stderr, "ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status)); exit(status); }; status = U_ZERO_ERROR; UChar *ruleSourceU = new UChar[destCap+1]; ucnv_toUChars(conv, ruleSourceU, // dest, destCap+1, ruleSourceC, ruleFileSize, &status); if (U_FAILURE(status)) { fprintf(stderr, "ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status)); exit(status); }; ucnv_close(conv); // // Put the source rules into a UnicodeString // UnicodeString ruleSourceS(FALSE, ruleSourceU, destCap); // // Create the break iterator from the rules // This will compile the rules. // UParseError parseError; parseError.line = 0; parseError.offset = 0; RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(ruleSourceS, parseError, status); if (U_FAILURE(status)) { fprintf(stderr, "createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n", u_errorName(status), (int)parseError.line, (int)parseError.offset); exit(status); }; // // Get the compiled rule data from the break iterator. // uint32_t outDataSize; const uint8_t *outData; outData = bi->getBinaryRules(outDataSize); // Copy the data format version numbers from the RBBI data header into the UDataMemory header. uprv_memcpy(dh.info.formatVersion, ((RBBIDataHeader *)outData)->fFormatVersion, sizeof(dh.info.formatVersion)); // // Create the output file // size_t bytesWritten; UNewDataMemory *pData; pData = udata_create(outDir, NULL, outFileName, &(dh.info), copyright, &status); if(U_FAILURE(status)) { fprintf(stderr, "genbrk: Could not open output file \"%s\", \"%s\"\n", outFileName, u_errorName(status)); exit(status); } // Write the data itself. udata_writeBlock(pData, outData, outDataSize); // finish up bytesWritten = udata_finish(pData, &status); if(U_FAILURE(status)) { fprintf(stderr, "genbrk: error %d writing the output file\n", status); exit(status); } if (bytesWritten != outDataSize) { fprintf(stderr, "Error writing to output file \"%s\"\n", outFileName); exit(-1); } delete bi; delete[] ruleSourceU; delete[] ruleBufferC; u_cleanup(); printf("genbrk: tool completed successfully.\n"); return 0; #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ }
extern void generateData(const char *dataDir, const char* bundleName) { static uint8_t sprepTrieBlock[100000]; UNewDataMemory *pData; UErrorCode errorCode=U_ZERO_ERROR; int32_t size, dataLength; char* fileName = (char*) uprv_malloc(uprv_strlen(bundleName) +100); #if UCONFIG_NO_IDNA size=0; #else int32_t sprepTrieSize; /* sort and add mapping data */ storeMappingData(); sprepTrieSize=utrie_serialize(sprepTrie, sprepTrieBlock, sizeof(sprepTrieBlock), getFoldedValue, TRUE, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "error: utrie_serialize(sprep trie) failed, %s\n", u_errorName(errorCode)); exit(errorCode); } size = sprepTrieSize + mappingDataCapacity*U_SIZEOF_UCHAR + sizeof(indexes); if(beVerbose) { printf("size of sprep trie %5u bytes\n", (int)sprepTrieSize); printf("size of " U_ICUDATA_NAME "_%s." DATA_TYPE " contents: %ld bytes\n", bundleName,(long)size); printf("size of mapping data array %5u bytes\n",(int)mappingDataCapacity * U_SIZEOF_UCHAR); printf("Number of code units in mappingData (currentIndex) are: %i \n", currentIndex); printf("Maximum length of the mapping string is : %i \n", (int)maxLength); } #endif fileName[0]=0; uprv_strcat(fileName,bundleName); /* write the data */ pData=udata_create(dataDir, DATA_TYPE, fileName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "gensprep: unable to create the output file, error %d\n", errorCode); exit(errorCode); } #if !UCONFIG_NO_IDNA indexes[_SPREP_INDEX_TRIE_SIZE]=sprepTrieSize; indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]=mappingDataCapacity*U_SIZEOF_UCHAR; udata_writeBlock(pData, indexes, sizeof(indexes)); udata_writeBlock(pData, sprepTrieBlock, sprepTrieSize); udata_writeBlock(pData, mappingData, indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]); #endif /* finish up */ dataLength=udata_finish(pData, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "gensprep: error %d writing the output file\n", errorCode); exit(errorCode); } if(dataLength!=size) { fprintf(stderr, "gensprep error: data length %ld != calculated size %ld\n", (long)dataLength, (long)size); exit(U_INTERNAL_PROGRAM_ERROR); } #if !UCONFIG_NO_IDNA /* done with writing the data .. close the hashtable */ if (hashTable != NULL) { uhash_close(hashTable); } #endif }
static void writeAliasTable(UNewDataMemory *out) { uint32_t i, j; uint32_t uniqueAliasesSize; uint16_t aliasOffset = (uint16_t)(tagBlock.top/sizeof(uint16_t)); uint16_t *aliasArrLists = (uint16_t *)uprv_malloc(tagCount * converterCount * sizeof(uint16_t)); uint16_t *uniqueAliases = (uint16_t *)uprv_malloc(knownAliasesCount * sizeof(uint16_t)); uint16_t *uniqueAliasesToConverter = (uint16_t *)uprv_malloc(knownAliasesCount * sizeof(uint16_t)); qsort(knownAliases, knownAliasesCount, sizeof(knownAliases[0]), compareAliases); uniqueAliasesSize = resolveAliases(uniqueAliases, uniqueAliasesToConverter, aliasOffset); /* Array index starts at 1. aliasLists[0] is the size of the lists section. */ aliasListsSize = 0; /* write the offsets of all the aliases lists in a 2D array, and create the lists. */ for (i = 0; i < tagCount; ++i) { for (j = 0; j < converterCount; ++j) { createOneAliasList(aliasArrLists, i, j, aliasOffset); } } /* Write the size of the TOC */ if (tableOptions.stringNormalizationType == UCNV_IO_UNNORMALIZED) { udata_write32(out, 8); } else { udata_write32(out, 9); } /* Write the sizes of each section */ /* All sizes are the number of uint16_t units, not bytes */ udata_write32(out, converterCount); udata_write32(out, tagCount); udata_write32(out, uniqueAliasesSize); /* list of aliases */ udata_write32(out, uniqueAliasesSize); /* The preresolved form of mapping an untagged the alias to a converter */ udata_write32(out, tagCount * converterCount); udata_write32(out, aliasListsSize + 1); udata_write32(out, sizeof(tableOptions) / sizeof(uint16_t)); udata_write32(out, (tagBlock.top + stringBlock.top) / sizeof(uint16_t)); if (tableOptions.stringNormalizationType != UCNV_IO_UNNORMALIZED) { udata_write32(out, (tagBlock.top + stringBlock.top) / sizeof(uint16_t)); } /* write the table of converters */ /* Think of this as the column headers */ for(i=0; i<converterCount; ++i) { udata_write16(out, (uint16_t)(converters[i].converter + aliasOffset)); } /* write the table of tags */ /* Think of this as the row headers */ for(i=UCNV_NUM_RESERVED_TAGS; i<tagCount; ++i) { udata_write16(out, tags[i].tag); } /* The empty tag is considered the leftover list, and put that at the end of the priority list. */ udata_write16(out, tags[EMPTY_TAG_NUM].tag); udata_write16(out, tags[ALL_TAG_NUM].tag); /* Write the unique list of aliases */ udata_writeBlock(out, uniqueAliases, uniqueAliasesSize * sizeof(uint16_t)); /* Write the unique list of aliases */ udata_writeBlock(out, uniqueAliasesToConverter, uniqueAliasesSize * sizeof(uint16_t)); /* Write the array to the lists */ udata_writeBlock(out, (const void *)(aliasArrLists + (2*converterCount)), (((tagCount - 2) * converterCount) * sizeof(uint16_t))); /* Now write the leftover part of the array for the EMPTY and ALL lists */ udata_writeBlock(out, (const void *)aliasArrLists, (2 * converterCount * sizeof(uint16_t))); /* Offset the next array to make the index start at 1. */ udata_write16(out, 0xDEAD); /* Write the lists */ udata_writeBlock(out, (const void *)aliasLists, aliasListsSize * sizeof(uint16_t)); /* Write any options for the alias table. */ udata_writeBlock(out, (const void *)&tableOptions, sizeof(tableOptions)); /* write the tags strings */ udata_writeString(out, tagBlock.store, tagBlock.top); /* write the aliases strings */ udata_writeString(out, stringBlock.store, stringBlock.top); /* write the normalized aliases strings */ if (tableOptions.stringNormalizationType != UCNV_IO_UNNORMALIZED) { char *normalizedStrings = (char *)uprv_malloc(tagBlock.top + stringBlock.top); createNormalizedAliasStrings(normalizedStrings, tagBlock.store, tagBlock.top); createNormalizedAliasStrings(normalizedStrings + tagBlock.top, stringBlock.store, stringBlock.top); /* Write out the complete normalized array. */ udata_writeString(out, normalizedStrings, tagBlock.top + stringBlock.top); uprv_free(normalizedStrings); } uprv_free(uniqueAliasesToConverter); uprv_free(uniqueAliases); uprv_free(aliasArrLists); }
extern int main(int argc, char* argv[]) { static char buffer[4096]; char line[512]; FileStream *in, *file; char *s; UErrorCode errorCode=U_ZERO_ERROR; uint32_t i, fileOffset, basenameOffset, length, nread; UBool sourceTOC, verbose; const char *entrypointName = NULL; U_MAIN_INIT_ARGS(argc, argv); /* preset then read command line options */ options[4].value=u_getDataDirectory(); options[6].value=COMMON_DATA_NAME; options[7].value=DATA_TYPE; options[10].value="."; argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options); /* error handling, printing usage message */ if(argc<0) { fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]); } else if(argc<2) { argc=-1; } if(argc<0 || options[0].doesOccur || options[1].doesOccur) { FILE *where = argc < 0 ? stderr : stdout; /* * Broken into chucks because the C89 standard says the minimum * required supported string length is 509 bytes. */ fprintf(where, "%csage: %s [ -h, -?, --help ] [ -v, --verbose ] [ -c, --copyright ] [ -C, --comment comment ] [ -d, --destdir dir ] [ -n, --name filename ] [ -t, --type filetype ] [ -S, --source tocfile ] [ -e, --entrypoint name ] maxsize listfile\n", argc < 0 ? 'u' : 'U', *argv); if (options[0].doesOccur || options[1].doesOccur) { fprintf(where, "\n" "Read the list file (default: standard input) and create a common data\n" "file from specified files. Omit any files larger than maxsize, if maxsize > 0.\n"); fprintf(where, "\n" "Options:\n" "\t-h, -?, --help this usage text\n" "\t-v, --verbose verbose output\n" "\t-c, --copyright include the ICU copyright notice\n" "\t-C, --comment comment include a comment string\n" "\t-d, --destdir dir destination directory\n"); fprintf(where, "\t-n, --name filename output filename, without .type extension\n" "\t (default: " COMMON_DATA_NAME ")\n" "\t-t, --type filetype type of the destination file\n" "\t (default: \"" DATA_TYPE "\")\n" "\t-S, --source tocfile write a .c source file with the table of\n" "\t contents\n" "\t-e, --entrypoint name override the c entrypoint name\n" "\t (default: \"<name>_<type>\")\n"); } return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; } sourceTOC=options[8].doesOccur; verbose = options[2].doesOccur; maxSize=(uint32_t)uprv_strtoul(argv[1], NULL, 0); if(argc==2) { in=T_FileStream_stdin(); } else { in=T_FileStream_open(argv[2], "r"); if(in==NULL) { fprintf(stderr, "gencmn: unable to open input file %s\n", argv[2]); exit(U_FILE_ACCESS_ERROR); } } if (verbose) { if(sourceTOC) { printf("generating %s_%s.c (table of contents source file)\n", options[6].value, options[7].value); } else { printf("generating %s.%s (common data file with table of contents)\n", options[6].value, options[7].value); } } /* read the list of files and get their lengths */ while(T_FileStream_readLine(in, line, sizeof(line))!=NULL) { /* remove trailing newline characters */ s=line; while(*s!=0) { if(*s=='\r' || *s=='\n') { *s=0; break; } ++s; } /* check for comment */ if (*line == '#') { continue; } /* add the file */ #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) { char *t; while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) { *t = U_FILE_SEP_CHAR; } } #endif addFile(getLongPathname(line), sourceTOC, verbose); } if(in!=T_FileStream_stdin()) { T_FileStream_close(in); } if(fileCount==0) { fprintf(stderr, "gencmn: no files listed in %s\n", argc==2 ? "<stdin>" : argv[2]); return 0; } /* sort the files by basename */ qsort(files, fileCount, sizeof(File), compareFiles); if(!sourceTOC) { UNewDataMemory *out; /* determine the offsets of all basenames and files in this common one */ basenameOffset=4+8*fileCount; fileOffset=(basenameOffset+(basenameTotal+15))&~0xf; for(i=0; i<fileCount; ++i) { files[i].fileOffset=fileOffset; fileOffset+=(files[i].fileSize+15)&~0xf; files[i].basenameOffset=basenameOffset; basenameOffset+=files[i].basenameLength; } /* create the output file */ out=udata_create(options[4].value, options[7].value, options[6].value, &dataInfo, options[3].doesOccur ? U_COPYRIGHT_STRING : options[5].value, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n", options[4].value, options[6].value, options[7].value, u_errorName(errorCode)); exit(errorCode); } /* write the table of contents */ udata_write32(out, fileCount); for(i=0; i<fileCount; ++i) { udata_write32(out, files[i].basenameOffset); udata_write32(out, files[i].fileOffset); } /* write the basenames */ for(i=0; i<fileCount; ++i) { udata_writeString(out, files[i].basename, files[i].basenameLength); } length=4+8*fileCount+basenameTotal; /* copy the files */ for(i=0; i<fileCount; ++i) { /* pad to 16-align the next file */ length&=0xf; if(length!=0) { udata_writePadding(out, 16-length); } if (verbose) { printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s"); } /* copy the next file */ file=T_FileStream_open(files[i].pathname, "rb"); if(file==NULL) { fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname); exit(U_FILE_ACCESS_ERROR); } for(nread = 0;;) { length=T_FileStream_read(file, buffer, sizeof(buffer)); if(length <= 0) { break; } nread += length; udata_writeBlock(out, buffer, length); } T_FileStream_close(file); length=files[i].fileSize; if (nread != files[i].fileSize) { fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname, (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s"); exit(U_FILE_ACCESS_ERROR); } } /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */ length&=0xf; if(length!=0) { udata_writePadding(out, 16-length); } /* finish */ udata_finish(out, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode)); exit(errorCode); } } else { /* write a .c source file with the table of contents */ char *filename; FileStream *out; /* create the output filename */ filename=s=buffer; uprv_strcpy(filename, options[4].value); s=filename+uprv_strlen(filename); if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) { *s++=U_FILE_SEP_CHAR; } uprv_strcpy(s, options[6].value); if(*(options[7].value)!=0) { s+=uprv_strlen(s); *s++='_'; uprv_strcpy(s, options[7].value); } s+=uprv_strlen(s); uprv_strcpy(s, ".c"); /* open the output file */ out=T_FileStream_open(filename, "w"); if(out==NULL) { fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename); exit(U_FILE_ACCESS_ERROR); } /* If an entrypoint is specified, use it. */ if(options[9].doesOccur) { entrypointName = options[9].value; } else { entrypointName = options[6].value; } /* write the source file */ sprintf(buffer, "/*\n" " * ICU common data table of contents for %s.%s ,\n" " * Automatically generated by icu/source/tools/gencmn/gencmn .\n" " */\n\n" "#include \"unicode/utypes.h\"\n" "#include \"unicode/udata.h\"\n" "\n" "/* external symbol declarations for data */\n", options[6].value, options[7].value); T_FileStream_writeLine(out, buffer); sprintf(buffer, "extern const char\n %s%s[]", symPrefix?symPrefix:"", files[0].pathname); T_FileStream_writeLine(out, buffer); for(i=1; i<fileCount; ++i) { sprintf(buffer, ",\n %s%s[]", symPrefix?symPrefix:"", files[i].pathname); T_FileStream_writeLine(out, buffer); } T_FileStream_writeLine(out, ";\n\n"); sprintf( buffer, "U_EXPORT struct {\n" " uint16_t headerSize;\n" " uint8_t magic1, magic2;\n" " UDataInfo info;\n" " char padding[%lu];\n" " uint32_t count, reserved;\n" " struct {\n" " const char *name;\n" " const void *data;\n" " } toc[%lu];\n" "} U_EXPORT2 %s_dat = {\n" " 32, 0xda, 0x27, {\n" " %lu, 0,\n" " %u, %u, %u, 0,\n" " {0x54, 0x6f, 0x43, 0x50},\n" " {1, 0, 0, 0},\n" " {0, 0, 0, 0}\n" " },\n" " \"\", %lu, 0, {\n", (unsigned long)32-4-sizeof(UDataInfo), (unsigned long)fileCount, entrypointName, (unsigned long)sizeof(UDataInfo), U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, U_SIZEOF_UCHAR, (unsigned long)fileCount ); T_FileStream_writeLine(out, buffer); sprintf(buffer, " { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname); T_FileStream_writeLine(out, buffer); for(i=1; i<fileCount; ++i) { sprintf(buffer, ",\n { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname); T_FileStream_writeLine(out, buffer); } T_FileStream_writeLine(out, "\n }\n};\n"); T_FileStream_close(out); uprv_free(symPrefix); } return 0; }