Пример #1
0
static void
writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
{
    UNewDataMemory *mem = NULL;
    uint32_t sz2;
    uint32_t size = 0;
    int32_t tableType;

    if(U_FAILURE(*status))
      {
        return;
      }

    tableType=TABLE_NONE;
    if(data->cnvData!=NULL) {
        tableType|=TABLE_BASE;
    }
    if(data->extData!=NULL) {
        tableType|=TABLE_EXT;
    }

    mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);

    if(U_FAILURE(*status))
      {
        fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
                cnvName,
                "cnv",
                u_errorName(*status));
        return;
      }

    if(VERBOSE)
      {
        printf("- Opened udata %s.%s\n", cnvName, "cnv");
      }


    /* all read only, clean, platform independent data.  Mmmm. :)  */
    udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
    size += sizeof(UConverterStaticData); /* Is 4-aligned  - by size */
    /* Now, write the table */
    if(tableType&TABLE_BASE) {
        size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
    }
    if(tableType&TABLE_EXT) {
        size += data->extData->write(data->extData, &data->staticData, mem, tableType);
    }

    sz2 = udata_finish(mem, status);
    if(size != sz2)
    {
        fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size);
        *status=U_INTERNAL_PROGRAM_ERROR;
    }
    if(VERBOSE)
    {
      printf("- Wrote %u bytes to the udata.\n", (int)sz2);
    }
}
Пример #2
0
void
BiDiPropsBuilder::writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) { return; }

    UNewDataMemory *pData=udata_create(path, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &dataInfo,
                                       withCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "genprops: udata_create(%s, ubidi.icu) failed - %s\n",
                path, u_errorName(errorCode));
        return;
    }

    udata_writeBlock(pData, indexes, sizeof(indexes));
    udata_writeBlock(pData, trieBlock, trieSize);
    udata_writeBlock(pData, mirrors, 4*mirrorTop);
    UChar32 jgStart=indexes[UBIDI_IX_JG_START];
    UChar32 jgLimit=indexes[UBIDI_IX_JG_LIMIT];
    udata_writeBlock(pData, jgArray+(jgStart-MIN_JG_START), jgLimit-jgStart);
    UChar32 jgStart2=indexes[UBIDI_IX_JG_START2];
    UChar32 jgLimit2=indexes[UBIDI_IX_JG_LIMIT2];
    udata_writeBlock(pData, jgArray2+(jgStart2-MIN_JG_START2), jgLimit2-jgStart2);

    long dataLength=udata_finish(pData, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "genprops error: bidipropsbuilder %d writing the output file\n", errorCode);
        return;
    }

    if(dataLength!=indexes[UBIDI_IX_LENGTH]) {
        fprintf(stderr,
                "udata_finish(ubidi.icu) reports %ld bytes written but should be %ld\n",
                dataLength, (long)indexes[UBIDI_IX_LENGTH]);
        errorCode=U_INTERNAL_PROGRAM_ERROR;
    }
}
Пример #3
0
int32_t genpname::writeDataFile(const char *destdir, const Builder& builder) {
    int32_t length;
    int8_t* data = builder.createData(length);

    UNewDataMemory *pdata;
    UErrorCode status = U_ZERO_ERROR;

    pdata = udata_create(destdir, PNAME_DATA_TYPE, PNAME_DATA_NAME, &dataInfo,
                         useCopyright ? U_COPYRIGHT_STRING : 0, &status);
    if (U_FAILURE(status)) {
        die("Unable to create data memory");
    }

    udata_writeBlock(pdata, data, length);

    int32_t dataLength = (int32_t) udata_finish(pdata, &status);
    if (U_FAILURE(status)) {
        die("Error writing output file");
    }
    if (dataLength != length) {
        die("Written file doesn't match expected size");
    }

    return dataLength;
}
Пример #4
0
U_CAPI void U_EXPORT2
udata_createDummy(const char *dir, const char *type, const char *name, UErrorCode *pErrorCode) {
    if(U_SUCCESS(*pErrorCode)) {
        udata_finish(udata_create(dir, type, name, &dummyDataInfo, NULL, pErrorCode), pErrorCode);
        if(U_FAILURE(*pErrorCode)) {
            fprintf(stderr, "error %s writing dummy data file %s" U_FILE_SEP_STRING "%s.%s\n",
                    u_errorName(*pErrorCode), dir, name, type);
            exit(*pErrorCode);
        }
    }
}
Пример #5
0
extern int
main(int argc, const char *argv[]) {
    UNewDataMemory *pData;
    UErrorCode errorCode=U_ZERO_ERROR;
    char stringValue[]={'E', 'X', 'A', 'M', 'P', 'L', 'E', '\0'};
    uint16_t intValue=2000;
    
    long dataLength;
    uint32_t size;
#ifdef WIN32
    char *currdir = _getcwd(NULL, 0);
#else
    char *currdir = getcwd(NULL, 0);
#endif

    pData=udata_create(currdir, DATA_TYPE, DATA_NAME, &dataInfo,
                       U_COPYRIGHT_STRING, &errorCode);

    if(currdir != NULL) {
        free(currdir);
    }


    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "Error: unable to create data memory, error %d\n", errorCode);
        exit(errorCode);
    }

    /* write the data to the file */
    /* a 16 bit value  and a String*/
    printf("Writing uint16_t value of %d\n", intValue);
    udata_write16(pData, intValue);
    printf("Writing string value of %s\n", stringValue);
    udata_writeString(pData, stringValue, sizeof(stringValue));

    /* finish up */
    dataLength=udata_finish(pData, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "Error: error %d writing the output file\n", errorCode);
        exit(errorCode);
    }
    size=sizeof(stringValue) + sizeof(intValue);


    if(dataLength!=(long)size) {
        fprintf(stderr, "Error: data length %ld != calculated size %lu\n", dataLength, size);
        exit(U_INTERNAL_PROGRAM_ERROR);
    }
    return 0;
}
Пример #6
0
void
PNamesBuilderImpl::writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) { return; }
    UNewDataMemory *pdata=udata_create(path, PNAME_DATA_TYPE, PNAME_DATA_NAME, &dataInfo,
                                       withCopyright ? U_COPYRIGHT_STRING : 0, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "genprops: udata_create(%s, pnames.icu) failed - %s\n",
                path, u_errorName(errorCode));
        return;
    }

    udata_writeBlock(pdata, indexes, PropNameData::IX_COUNT*4);
    udata_writeBlock(pdata, valueMaps.getBuffer(), valueMaps.size()*4);
    udata_writeBlock(pdata, bytesTries.data(), bytesTries.length());
    udata_writeBlock(pdata, nameGroups.data(), nameGroups.length());

    int32_t dataLength=(int32_t)udata_finish(pdata, &errorCode);
    if(dataLength!=indexes[PropNameData::IX_TOTAL_SIZE]) {
        fprintf(stderr,
                "udata_finish(pnames.icu) reports %ld bytes written but should be %ld\n",
                (long)dataLength, (long)indexes[PropNameData::IX_TOTAL_SIZE]);
        errorCode=U_INTERNAL_PROGRAM_ERROR;
    }
}
Пример #7
0
//----------------------------------------------------------------------------
//
//  main      for gencfu
//
//----------------------------------------------------------------------------
int  main(int argc, char **argv) {
    UErrorCode  status = U_ZERO_ERROR;
    const char *confFileName;
    const char *confWSFileName;
    const char *outFileName;
    const char *outDir = NULL;
    const char *copyright = NULL;

    //
    // Pick up and check the command line arguments,
    //    using the standard ICU tool utils option handling.
    //
    U_MAIN_INIT_ARGS(argc, argv);
    progName = argv[0];
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
    if(argc<0) {
        // Unrecognized option
        fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]);
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }

    if(options[0].doesOccur || options[1].doesOccur) {
        //  -? or -h for help.
        usageAndDie(0);
    }

    if (!(options[3].doesOccur && options[4].doesOccur && options[5].doesOccur)) {
        fprintf(stderr, "confusables file, whole script confusables file and output file must all be specified.\n");
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }
    confFileName   = options[3].value;
    confWSFileName = options[4].value;
    outFileName    = options[5].value;

    if (options[6].doesOccur) {
        u_setDataDirectory(options[6].value);
    }

    status = U_ZERO_ERROR;

    /* Combine the directory with the file name */
    if(options[7].doesOccur) {
        outDir = options[7].value;
    }
    if (options[8].doesOccur) {
        copyright = U_COPYRIGHT_STRING;
    }

#if UCONFIG_NO_REGULAR_EXPRESSIONS || UCONFIG_NO_NORMALIZATION || UCONFIG_NO_FILE_IO
    // spoof detection data file parsing is dependent on regular expressions.
    // TODO: have the tool return an error status.  Requires fixing the ICU data build
    //       so that it doesn't abort entirely on that error.

    UNewDataMemory *pData;
    char msg[1024];

    /* write message with just the name */
    sprintf(msg, "gencfu writes dummy %s because of UCONFIG_NO_REGULAR_EXPRESSIONS and/or UCONFIG_NO_NORMALIZATION and/or UCONFIG_NO_FILE_IO, see uconfig.h", outFileName);
    fprintf(stderr, "%s\n", msg);

    /* write the dummy data file */
    pData = udata_create(outDir, NULL, outFileName, &dummyDataInfo, NULL, &status);
    udata_writeBlock(pData, msg, strlen(msg));
    udata_finish(pData, &status);
    return (int)status;

#else
    /* Initialize ICU */
    u_init(&status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
            argv[0], u_errorName(status));
        exit(1);
    }
    status = U_ZERO_ERROR;

    //  Read in the confusables source file

    int32_t      confusablesLen = 0;
    const char  *confusables = readFile(confFileName, &confusablesLen);
    if (confusables == NULL) {
        printf("gencfu: error reading file  \"%s\"\n", confFileName);
        exit(-1);
    }

    int32_t     wsConfusablesLen = 0;
    const char *wsConfsables =  readFile(confWSFileName, &wsConfusablesLen);
    if (wsConfsables == NULL) {
        printf("gencfu: error reading file  \"%s\"\n", confFileName);
        exit(-1);
    }

    //
    //  Create the Spoof Detector from the source confusables files.
    //     This will compile the data.
    //
    UParseError parseError;
    parseError.line = 0;
    parseError.offset = 0;
    int32_t errType;
    USpoofChecker *sc = uspoof_openFromSource(confusables, confusablesLen,
                                              wsConfsables, wsConfusablesLen,
                                              &errType, &parseError, &status);
    if (U_FAILURE(status)) {
        const char *errFile = 
            (errType == USPOOF_WHOLE_SCRIPT_CONFUSABLE)? confWSFileName : confFileName;
        fprintf(stderr, "gencfu: uspoof_openFromSource error \"%s\"  at file %s, line %d, column %d\n",
                u_errorName(status), errFile, (int)parseError.line, (int)parseError.offset);
        exit(status);
    };


    //
    //  Get the compiled rule data from the USpoofChecker.
    //
    uint32_t        outDataSize;
    uint8_t        *outData;
    outDataSize = uspoof_serialize(sc, NULL, 0, &status);
    if (status != U_BUFFER_OVERFLOW_ERROR) {
        fprintf(stderr, "gencfu: uspoof_serialize() returned %s\n", u_errorName(status));
        exit(status);
    }
    status = U_ZERO_ERROR;
    outData = new uint8_t[outDataSize];
    uspoof_serialize(sc, outData, outDataSize, &status);

    // Copy the data format version numbers from the spoof data header into the UDataMemory header.
    
    uprv_memcpy(dh.info.formatVersion, 
                reinterpret_cast<SpoofDataHeader *>(outData)->fFormatVersion,
                sizeof(dh.info.formatVersion));

    //
    //  Create the output file
    //
    size_t bytesWritten;
    UNewDataMemory *pData;
    pData = udata_create(outDir, NULL, outFileName, &(dh.info), copyright, &status);
    if(U_FAILURE(status)) {
        fprintf(stderr, "gencfu: Could not open output file \"%s\", \"%s\"\n", 
                         outFileName, u_errorName(status));
        exit(status);
    }


    //  Write the data itself.
    udata_writeBlock(pData, outData, outDataSize);
    // finish up 
    bytesWritten = udata_finish(pData, &status);
    if(U_FAILURE(status)) {
        fprintf(stderr, "gencfu: Error %d writing the output file\n", status);
        exit(status);
    }
    
    if (bytesWritten != outDataSize) {
        fprintf(stderr, "gencfu: Error writing to output file \"%s\"\n", outFileName);
        exit(-1);
    }

    uspoof_close(sc);
    delete [] outData;
    delete [] confusables;
    delete [] wsConfsables;
    u_cleanup();
    printf("gencfu: tool completed successfully.\n");
    return 0;
#endif   // UCONFIG_NO_REGULAR_EXPRESSIONS
}
Пример #8
0
//----------------------------------------------------------------------------
//
//  main      for genctd
//
//----------------------------------------------------------------------------
int  main(int argc, char **argv) {
    UErrorCode  status = U_ZERO_ERROR;
    const char *wordFileName;
    const char *outFileName;
    const char *outDir = NULL;
    const char *copyright = NULL;

    //
    // Pick up and check the command line arguments,
    //    using the standard ICU tool utils option handling.
    //
    U_MAIN_INIT_ARGS(argc, argv);
    progName = argv[0];
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
    if(argc<0) {
        // Unrecognized option
        fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]);
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }

    if(options[0].doesOccur || options[1].doesOccur) {
        //  -? or -h for help.
        usageAndDie(0);
    }

    if (!options[3].doesOccur || argc < 2) {
        fprintf(stderr, "input and output file must both be specified.\n");
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }
    outFileName  = options[3].value;
    wordFileName = argv[1];

    if (options[4].doesOccur) {
        u_setDataDirectory(options[4].value);
    }

    status = U_ZERO_ERROR;

    /* Combine the directory with the file name */
    if(options[5].doesOccur) {
        outDir = options[5].value;
    }
    if (options[6].doesOccur) {
        copyright = U_COPYRIGHT_STRING;
    }

#if UCONFIG_NO_BREAK_ITERATION || UCONFIG_NO_FILE_IO

    UNewDataMemory *pData;
    char msg[1024];

    /* write message with just the name */
    sprintf(msg, "genctd writes dummy %s because of UCONFIG_NO_BREAK_ITERATION and/or UCONFIG_NO_FILE_IO, see uconfig.h", outFileName);
    fprintf(stderr, "%s\n", msg);

    /* write the dummy data file */
    pData = udata_create(outDir, NULL, outFileName, &dummyDataInfo, NULL, &status);
    udata_writeBlock(pData, msg, strlen(msg));
    udata_finish(pData, &status);
    return (int)status;

#else
    /* Initialize ICU */
    u_init(&status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
            argv[0], u_errorName(status));
        exit(1);
    }
    status = U_ZERO_ERROR;

    //
    //  Read in the dictionary source file
    //
    long        result;
    long        wordFileSize;
    FILE        *file;
    char        *wordBufferC;
    MutableTrieDictionary *mtd = NULL;
    
    file = fopen(wordFileName, "rb");
    if( file == 0 ) { //cannot find file
        //create 1-line dummy file: ie 1 char, 1 value
        UNewDataMemory *pData;
        char msg[1024];

        /* write message with just the name */
        sprintf(msg, "%s not found, genctd writes dummy %s", wordFileName, outFileName);
        fprintf(stderr, "%s\n", msg);

        UChar c = 0x0020;
        mtd = new MutableTrieDictionary(c, status, TRUE);
        mtd->addWord(&c, 1, status, 1);

    } else { //read words in from input file
        fseek(file, 0, SEEK_END);
        wordFileSize = ftell(file);
        fseek(file, 0, SEEK_SET);
        wordBufferC = new char[wordFileSize+10];
    
        result = (long)fread(wordBufferC, 1, wordFileSize, file);
        if (result != wordFileSize)  {
            fprintf(stderr, "Error reading file \"%s\"\n", wordFileName);
            exit (-1);
        }
        wordBufferC[wordFileSize]=0;
        fclose(file);
    
        //
        // Look for a Unicode Signature (BOM) on the word file
        //
        int32_t        signatureLength;
        const char *   wordSourceC = wordBufferC;
        const char*    encoding = ucnv_detectUnicodeSignature(
                               wordSourceC, wordFileSize, &signatureLength, &status);
        if (U_FAILURE(status)) {
            exit(status);
        }
        if(encoding!=NULL ){
            wordSourceC  += signatureLength;
            wordFileSize -= signatureLength;
        }
    
        //
        // Open a converter to take the rule file to UTF-16
        //
        UConverter* conv;
        conv = ucnv_open(encoding, &status);
        if (U_FAILURE(status)) {
            fprintf(stderr, "ucnv_open: ICU Error \"%s\"\n", u_errorName(status));
            exit(status);
        }
    
        //
        // Convert the words to UChar.
        //  Preflight first to determine required buffer size.
        //
        uint32_t destCap = ucnv_toUChars(conv,
                           NULL,           //  dest,
                           0,              //  destCapacity,
                           wordSourceC,
                           wordFileSize,
                           &status);
        if (status != U_BUFFER_OVERFLOW_ERROR) {
            fprintf(stderr, "ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
            exit(status);
        };
    
        status = U_ZERO_ERROR;
        UChar *wordSourceU = new UChar[destCap+1];
        ucnv_toUChars(conv,
                      wordSourceU,     //  dest,
                      destCap+1,
                      wordSourceC,
                      wordFileSize,
                      &status);
        if (U_FAILURE(status)) {
            fprintf(stderr, "ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
            exit(status);
        };
        ucnv_close(conv);
    
        // Get rid of the original file buffer
        delete[] wordBufferC;
    
        // Create a MutableTrieDictionary, and loop through all the lines, inserting
        // words.
    
        // First, pick a median character.
        UChar *current = wordSourceU + (destCap/2);
        UChar uc = *current++;
        UnicodeSet breaks;
        breaks.add(0x000A);     // Line Feed
        breaks.add(0x000D);     // Carriage Return
        breaks.add(0x2028);     // Line Separator
        breaks.add(0x2029);     // Paragraph Separator
    
        do { 
            // Look for line break
            while (uc && !breaks.contains(uc)) {
                uc = *current++;
            }
            // Now skip to first non-line-break
            while (uc && breaks.contains(uc)) {
                uc = *current++;
            }
        }
        while (uc && (breaks.contains(uc) || u_isspace(uc)));
    
        mtd = new MutableTrieDictionary(uc, status);
        
        if (U_FAILURE(status)) {
            fprintf(stderr, "new MutableTrieDictionary: ICU Error \"%s\"\n", u_errorName(status));
            exit(status);
        }
        
        // Now add the words. Words are non-space characters at the beginning of
        // lines, and must be at least one UChar. If a word has an associated value,
        // the value should follow the word on the same line after a tab character.
        current = wordSourceU;
        UChar *candidate = current;
        uc = *current++;
        int32_t length = 0;
        int count = 0;
                
        while (uc) {
            while (uc && !u_isspace(uc)) {
                ++length;
                uc = *current++;
            }
            
            UnicodeString valueString;
            UChar candidateValue;
            if(uc == 0x0009){ //separator is a tab char, read in number after space
            	while (uc && u_isspace(uc)) {
            		uc = *current++;
            	}
                while (uc && !u_isspace(uc)) {
                    valueString.append(uc);
                    uc = *current++;
                }
            }
            
            if (length > 0) {
                count++;
                if(valueString.length() > 0){
                    mtd->setValued(TRUE);
    
                    uint32_t value = 0;
                    char* s = new char[valueString.length()];
                    valueString.extract(0,valueString.length(), s, valueString.length());
                    int n = sscanf(s, "%ud", &value);
                    U_ASSERT(n == 1);
                    U_ASSERT(value >= 0); 
                    mtd->addWord(candidate, length, status, (uint16_t)value);
                    delete[] s;
                } else {
                    mtd->addWord(candidate, length, status);
                }
    
                if (U_FAILURE(status)) {
                    fprintf(stderr, "MutableTrieDictionary::addWord: ICU Error \"%s\" at line %d in input file\n",
                            u_errorName(status), count);
                    exit(status);
                }
            }
    
            // Find beginning of next line
            while (uc && !breaks.contains(uc)) {
                uc = *current++;
            }
            // Find next non-line-breaking character
            while (uc && breaks.contains(uc)) {
                uc = *current++;
            }
            candidate = current-1;
            length = 0;
        }
    
        // Get rid of the Unicode text buffer
        delete[] wordSourceU;
    }

    // Now, create a CompactTrieDictionary from the mutable dictionary
    CompactTrieDictionary *ctd = new CompactTrieDictionary(*mtd, status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "new CompactTrieDictionary: ICU Error \"%s\"\n", u_errorName(status));
        exit(status);
    }
    
    // Get rid of the MutableTrieDictionary
    delete mtd;

    //
    //  Get the binary data from the dictionary.
    //
    uint32_t        outDataSize = ctd->dataSize();
    const uint8_t  *outData = (const uint8_t *)ctd->data();

    //
    //  Create the output file
    //
    size_t bytesWritten;
    UNewDataMemory *pData;
    pData = udata_create(outDir, NULL, outFileName, &(dh.info), copyright, &status);
    if(U_FAILURE(status)) {
        fprintf(stderr, "genctd: Could not open output file \"%s\", \"%s\"\n", 
                         outFileName, u_errorName(status));
        exit(status);
    }


    //  Write the data itself.
    udata_writeBlock(pData, outData, outDataSize);
    // finish up 
    bytesWritten = udata_finish(pData, &status);
    if(U_FAILURE(status)) {
        fprintf(stderr, "genctd: error \"%s\" writing the output file\n", u_errorName(status));
        exit(status);
    }
    
    if (bytesWritten != outDataSize) {
        fprintf(stderr, "Error writing to output file \"%s\"\n", outFileName);
        exit(-1);
    }
    
    // Get rid of the CompactTrieDictionary
    delete ctd;

    u_cleanup();

    printf("genctd: tool completed successfully.\n");
    return 0;

#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
}
Пример #9
0
extern void
generateData(const char *dataDir, UBool csource) {
    static int32_t indexes[UBIDI_IX_TOP]={
        UBIDI_IX_TOP
    };
    static uint8_t trieBlock[40000];
    static uint8_t jgArray[0x300]; /* at most for U+0600..U+08FF */

    const uint32_t *row;
    UChar32 start, end, prev, jgStart;
    int32_t i;

    UNewDataMemory *pData;
    UNewTrie *pTrie;
    UErrorCode errorCode=U_ZERO_ERROR;
    int32_t trieSize;
    long dataLength;

    makeMirror();

    pTrie=utrie_open(NULL, NULL, 20000, 0, 0, TRUE);
    if(pTrie==NULL) {
        fprintf(stderr, "genbidi error: unable to create a UNewTrie\n");
        exit(U_MEMORY_ALLOCATION_ERROR);
    }

    prev=jgStart=0;
    for(i=0; (row=upvec_getRow(pv, i, &start, &end))!=NULL && start<UPVEC_FIRST_SPECIAL_CP; ++i) {
        /* store most values from vector column 0 in the trie */
        if(!utrie_setRange32(pTrie, start, end+1, *row, TRUE)) {
            fprintf(stderr, "genbidi error: unable to set trie value (overflow)\n");
            exit(U_BUFFER_OVERFLOW_ERROR);
        }

        /* store Joining_Group values from vector column 1 in a simple byte array */
        if(row[1]!=0) {
            if(start<0x600 || 0x8ff<end) {
                fprintf(stderr, "genbidi error: Joining_Group for out-of-range code points U+%04lx..U+%04lx\n",
                        (long)start, (long)end);
                exit(U_ILLEGAL_ARGUMENT_ERROR);
            }

            if(prev==0) {
                /* first code point with any value */
                prev=jgStart=start;
            } else {
                /* add No_Joining_Group for code points between prev and start */
                while(prev<start) {
                    jgArray[prev++ -jgStart]=0;
                }
            }

            /* set Joining_Group value for start..end */
            while(prev<=end) {
                jgArray[prev++ -jgStart]=(uint8_t)row[1];
            }
        }
    }

    /* finish jgArray, pad to multiple of 4 */
    while((prev-jgStart)&3) {
        jgArray[prev++ -jgStart]=0;
    }
    indexes[UBIDI_IX_JG_START]=jgStart;
    indexes[UBIDI_IX_JG_LIMIT]=prev;

    trieSize=utrie_serialize(pTrie, trieBlock, sizeof(trieBlock), NULL, TRUE, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "genbidi error: utrie_serialize failed: %s (length %ld)\n", u_errorName(errorCode), (long)trieSize);
        exit(errorCode);
    }

    indexes[UBIDI_IX_TRIE_SIZE]=trieSize;
    indexes[UBIDI_IX_MIRROR_LENGTH]=mirrorTop;
    indexes[UBIDI_IX_LENGTH]=
        (int32_t)sizeof(indexes)+
        trieSize+
        4*mirrorTop+
        (prev-jgStart);

    if(beVerbose) {
        printf("trie size in bytes:                    %5d\n", (int)trieSize);
        printf("size in bytes of mirroring table:      %5d\n", (int)(4*mirrorTop));
        printf("length of Joining_Group array:         %5d (U+%04x..U+%04x)\n", (int)(prev-jgStart), (int)jgStart, (int)(prev-1));
        printf("data size:                             %5d\n", (int)indexes[UBIDI_IX_LENGTH]);
    }

    indexes[UBIDI_MAX_VALUES_INDEX]=
        ((int32_t)U_CHAR_DIRECTION_COUNT-1)|
        (((int32_t)U_JT_COUNT-1)<<UBIDI_JT_SHIFT)|
        (((int32_t)U_JG_COUNT-1)<<UBIDI_MAX_JG_SHIFT);

    if(csource) {
        /* write .c file for hardcoded data */
        UTrie trie={ NULL };
        UTrie2 *trie2;
        FILE *f;

        utrie_unserialize(&trie, trieBlock, trieSize, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(
                stderr,
                "genbidi error: failed to utrie_unserialize(ubidi.icu trie) - %s\n",
                u_errorName(errorCode));
            exit(errorCode);
        }

        /* use UTrie2 */
        dataInfo.formatVersion[0]=2;
        dataInfo.formatVersion[2]=0;
        dataInfo.formatVersion[3]=0;
        trie2=utrie2_fromUTrie(&trie, 0, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(
                stderr,
                "genbidi error: utrie2_fromUTrie() failed - %s\n",
                u_errorName(errorCode));
            exit(errorCode);
        }
        {
            /* delete lead surrogate code unit values */
            UChar lead;
            trie2=utrie2_cloneAsThawed(trie2, &errorCode);
            for(lead=0xd800; lead<0xdc00; ++lead) {
                utrie2_set32ForLeadSurrogateCodeUnit(trie2, lead, trie2->initialValue, &errorCode);
            }
            utrie2_freeze(trie2, UTRIE2_16_VALUE_BITS, &errorCode);
            if(U_FAILURE(errorCode)) {
                fprintf(
                    stderr,
                    "genbidi error: deleting lead surrogate code unit values failed - %s\n",
                    u_errorName(errorCode));
                exit(errorCode);
            }
        }

        f=usrc_create(dataDir, "ubidi_props_data.c");
        if(f!=NULL) {
            usrc_writeArray(f,
                "static const UVersionInfo ubidi_props_dataVersion={",
                dataInfo.dataVersion, 8, 4,
                "};\n\n");
            usrc_writeArray(f,
                "static const int32_t ubidi_props_indexes[UBIDI_IX_TOP]={",
                indexes, 32, UBIDI_IX_TOP,
                "};\n\n");
            usrc_writeUTrie2Arrays(f,
                "static const uint16_t ubidi_props_trieIndex[%ld]={\n", NULL,
                trie2,
                "\n};\n\n");
            usrc_writeArray(f,
                "static const uint32_t ubidi_props_mirrors[%ld]={\n",
                mirrors, 32, mirrorTop,
                "\n};\n\n");
            usrc_writeArray(f,
                "static const uint8_t ubidi_props_jgArray[%ld]={\n",
                jgArray, 8, prev-jgStart,
                "\n};\n\n");
            fputs(
                "static const UBiDiProps ubidi_props_singleton={\n"
                "  NULL,\n"
                "  ubidi_props_indexes,\n"
                "  ubidi_props_mirrors,\n"
                "  ubidi_props_jgArray,\n",
                f);
            usrc_writeUTrie2Struct(f,
                "  {\n",
                trie2, "ubidi_props_trieIndex", NULL,
                "  },\n");
            usrc_writeArray(f, "  { ", dataInfo.formatVersion, 8, 4, " }\n");
            fputs("};\n", f);
            fclose(f);
        }
        utrie2_close(trie2);
    } else {
        /* write the data */
        pData=udata_create(dataDir, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &dataInfo,
                        haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "genbidi: unable to create data memory, %s\n", u_errorName(errorCode));
            exit(errorCode);
        }

        udata_writeBlock(pData, indexes, sizeof(indexes));
        udata_writeBlock(pData, trieBlock, trieSize);
        udata_writeBlock(pData, mirrors, 4*mirrorTop);
        udata_writeBlock(pData, jgArray, prev-jgStart);

        /* finish up */
        dataLength=udata_finish(pData, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "genbidi: error %d writing the output file\n", errorCode);
            exit(errorCode);
        }

        if(dataLength!=indexes[UBIDI_IX_LENGTH]) {
            fprintf(stderr, "genbidi: data length %ld != calculated size %d\n",
                dataLength, (int)indexes[UBIDI_IX_LENGTH]);
            exit(U_INTERNAL_PROGRAM_ERROR);
        }
    }

    utrie_close(pTrie);
    upvec_close(pv);
}
Пример #10
0
U_CAPI void U_EXPORT2
createCommonDataFile(const char *destDir, const char *name, const char *entrypointName, const char *type, const char *source, const char *copyRight,
                     const char *dataFile, uint32_t max_size, UBool sourceTOC, UBool verbose, char *gencmnFileName) {
    static char buffer[4096];
    char *line;
    char *linePtr;
    char *s = NULL;
    UErrorCode errorCode=U_ZERO_ERROR;
    uint32_t i, fileOffset, basenameOffset, length, nread;
    FileStream *in, *file;

    line = (char *)uprv_malloc(sizeof(char) * LINE_BUFFER_SIZE);
    if (line == NULL) {
        fprintf(stderr, "gencmn: unable to allocate memory for line buffer of size %d\n", LINE_BUFFER_SIZE);
        exit(U_MEMORY_ALLOCATION_ERROR);
    }

    linePtr = line;

    maxSize = max_size;

    if (destDir == NULL) {
        destDir = u_getDataDirectory();
    }
    if (name == NULL) {
        name = COMMON_DATA_NAME;
    }
    if (type == NULL) {
        type = DATA_TYPE;
    }
    if (source == NULL) {
        source = ".";
    }

    if (dataFile == NULL) {
        in = T_FileStream_stdin();
    } else {
        in = T_FileStream_open(dataFile, "r");
        if(in == NULL) {
            fprintf(stderr, "gencmn: unable to open input file %s\n", dataFile);
            exit(U_FILE_ACCESS_ERROR);
        }
    }

    if (verbose) {
        if(sourceTOC) {
            printf("generating %s_%s.c (table of contents source file)\n", name, type);
        } else {
            printf("generating %s.%s (common data file with table of contents)\n", name, type);
        }
    }

    /* read the list of files and get their lengths */
    while((s != NULL && *s != 0) || (s=T_FileStream_readLine(in, (line=linePtr),
                                                             LINE_BUFFER_SIZE))!=NULL) {
        /* remove trailing newline characters and parse space separated items */
        if (s != NULL && *s != 0) {
            line=s;
        } else {
            s=line;
        }
        while(*s!=0) {
            if(*s==' ') {
                *s=0;
                ++s;
                break;
            } else if(*s=='\r' || *s=='\n') {
                *s=0;
                break;
            }
            ++s;
        }

        /* check for comment */

        if (*line == '#') {
            continue;
        }

        /* add the file */
#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
        {
          char *t;
          while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) {
            *t = U_FILE_SEP_CHAR;
          }
        }
#endif
        addFile(getLongPathname(line), name, source, sourceTOC, verbose);
    }

    uprv_free(linePtr);

    if(in!=T_FileStream_stdin()) {
        T_FileStream_close(in);
    }

    if(fileCount==0) {
        fprintf(stderr, "gencmn: no files listed in %s\n", dataFile == NULL ? "<stdin>" : dataFile);
        return;
    }

    /* sort the files by basename */
    qsort(files, fileCount, sizeof(File), compareFiles);

    if(!sourceTOC) {
        UNewDataMemory *out;

        /* determine the offsets of all basenames and files in this common one */
        basenameOffset=4+8*fileCount;
        fileOffset=(basenameOffset+(basenameTotal+15))&~0xf;
        for(i=0; i<fileCount; ++i) {
            files[i].fileOffset=fileOffset;
            fileOffset+=(files[i].fileSize+15)&~0xf;
            files[i].basenameOffset=basenameOffset;
            basenameOffset+=files[i].basenameLength;
        }

        /* create the output file */
        out=udata_create(destDir, type, name,
                         &dataInfo,
                         copyRight == NULL ? U_COPYRIGHT_STRING : copyRight,
                         &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n",
                destDir, name, type,
                u_errorName(errorCode));
            exit(errorCode);
        }

        /* write the table of contents */
        udata_write32(out, fileCount);
        for(i=0; i<fileCount; ++i) {
            udata_write32(out, files[i].basenameOffset);
            udata_write32(out, files[i].fileOffset);
        }

        /* write the basenames */
        for(i=0; i<fileCount; ++i) {
            udata_writeString(out, files[i].basename, files[i].basenameLength);
        }
        length=4+8*fileCount+basenameTotal;

        /* copy the files */
        for(i=0; i<fileCount; ++i) {
            /* pad to 16-align the next file */
            length&=0xf;
            if(length!=0) {
                udata_writePadding(out, 16-length);
            }

            if (verbose) {
                printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
            }

            /* copy the next file */
            file=T_FileStream_open(files[i].pathname, "rb");
            if(file==NULL) {
                fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname);
                exit(U_FILE_ACCESS_ERROR);
            }
            for(nread = 0;;) {
                length=T_FileStream_read(file, buffer, sizeof(buffer));
                if(length <= 0) {
                    break;
                }
                nread += length;
                udata_writeBlock(out, buffer, length);
            }
            T_FileStream_close(file);
            length=files[i].fileSize;

            if (nread != files[i].fileSize) {
              fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname,  (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
                exit(U_FILE_ACCESS_ERROR);
            }
        }

        /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */
        length&=0xf;
        if(length!=0) {
            udata_writePadding(out, 16-length);
        }

        /* finish */
        udata_finish(out, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode));
            exit(errorCode);
        }
    } else {
        /* write a .c source file with the table of contents */
        char *filename;
        FileStream *out;

        /* create the output filename */
        filename=s=buffer;
        uprv_strcpy(filename, destDir);
        s=filename+uprv_strlen(filename);
        if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) {
            *s++=U_FILE_SEP_CHAR;
        }
        uprv_strcpy(s, name);
        if(*(type)!=0) {
            s+=uprv_strlen(s);
            *s++='_';
            uprv_strcpy(s, type);
        }
        s+=uprv_strlen(s);
        uprv_strcpy(s, ".c");

        /* open the output file */
        out=T_FileStream_open(filename, "w");
        if (gencmnFileName != NULL) {
            uprv_strcpy(gencmnFileName, filename);
        }
        if(out==NULL) {
            fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename);
            exit(U_FILE_ACCESS_ERROR);
        }

        /* write the source file */
        sprintf(buffer,
            "/*\n"
            " * ICU common data table of contents for %s.%s\n"
            " * Automatically generated by icu/source/tools/gencmn/gencmn .\n"
            " */\n\n"
            "#include \"unicode/utypes.h\"\n"
            "#include \"unicode/udata.h\"\n"
            "\n"
            "/* external symbol declarations for data (%d files) */\n",
                name, type, fileCount);
        T_FileStream_writeLine(out, buffer);

        sprintf(buffer, "extern const char\n    %s%s[]", symPrefix?symPrefix:"", files[0].pathname);
        T_FileStream_writeLine(out, buffer);
        for(i=1; i<fileCount; ++i) {
            sprintf(buffer, ",\n    %s%s[]", symPrefix?symPrefix:"", files[i].pathname);
            T_FileStream_writeLine(out, buffer);
        }
        T_FileStream_writeLine(out, ";\n\n");

        sprintf(
            buffer,
            "U_EXPORT struct {\n"
            "    uint16_t headerSize;\n"
            "    uint8_t magic1, magic2;\n"
            "    UDataInfo info;\n"
            "    char padding[%lu];\n"
            "    uint32_t count, reserved;\n"
            "    struct {\n"
            "        const char *name;\n"
            "        const void *data;\n"
            "    } toc[%lu];\n"
            "} U_EXPORT2 %s_dat = {\n"
            "    32, 0xda, 0x27, {\n"
            "        %lu, 0,\n"
            "        %u, %u, %u, 0,\n"
            "        {0x54, 0x6f, 0x43, 0x50},\n"
            "        {1, 0, 0, 0},\n"
            "        {0, 0, 0, 0}\n"
            "    },\n"
            "    \"\", %lu, 0, {\n",
            (unsigned long)32-4-sizeof(UDataInfo),
            (unsigned long)fileCount,
            entrypointName,
            (unsigned long)sizeof(UDataInfo),
            U_IS_BIG_ENDIAN,
            U_CHARSET_FAMILY,
            U_SIZEOF_UCHAR,
            (unsigned long)fileCount
        );
        T_FileStream_writeLine(out, buffer);

        sprintf(buffer, "        { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname);
        T_FileStream_writeLine(out, buffer);
        for(i=1; i<fileCount; ++i) {
            sprintf(buffer, ",\n        { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname);
            T_FileStream_writeLine(out, buffer);
        }

        T_FileStream_writeLine(out, "\n    }\n};\n");
        T_FileStream_close(out);

        uprv_free(symPrefix);
    }
}
Пример #11
0
extern void
generateData(const char *dataDir, UBool csource) {
    static int32_t indexes[UPROPS_INDEX_COUNT]={
        0, 0, 0, 0,
        0, 0, 0, 0,
        0, 0, 0, 0,
        0, 0, 0, 0
    };
    static uint8_t trieBlock[40000];
    static uint8_t additionalProps[120000];

    UNewDataMemory *pData;
    UErrorCode errorCode=U_ZERO_ERROR;
    uint32_t size = 0;
    int32_t trieSize, additionalPropsSize, offset;
    long dataLength;

    trieSize=utrie_serialize(pTrie, trieBlock, sizeof(trieBlock), NULL, TRUE, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "error: utrie_serialize failed: %s (length %ld)\n", u_errorName(errorCode), (long)trieSize);
        exit(errorCode);
    }

    offset=sizeof(indexes)/4;               /* uint32_t offset to the properties trie */

    /* round up trie size to 4-alignment */
    trieSize=(trieSize+3)&~3;
    offset+=trieSize>>2;
    indexes[UPROPS_PROPS32_INDEX]=          /* set indexes to the same offsets for empty */
    indexes[UPROPS_EXCEPTIONS_INDEX]=       /* structures from the old format version 3 */
    indexes[UPROPS_EXCEPTIONS_TOP_INDEX]=   /* so that less runtime code has to be changed */
    indexes[UPROPS_ADDITIONAL_TRIE_INDEX]=offset;

    if(beVerbose) {
        printf("trie size in bytes:                    %5u\n", (int)trieSize);
    }

    if(csource) {
        /* write .c file for hardcoded data */
        UTrie trie={ NULL };
        UTrie2 *trie2;
        FILE *f;

        utrie_unserialize(&trie, trieBlock, trieSize, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(
                stderr,
                "genprops error: failed to utrie_unserialize(uprops.icu main trie) - %s\n",
                u_errorName(errorCode));
            exit(errorCode);
        }

        /* use UTrie2 */
        trie2=utrie2_fromUTrie(&trie, 0, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(
                stderr,
                "genprops error: utrie2_fromUTrie() failed - %s\n",
                u_errorName(errorCode));
            exit(errorCode);
        }
        {
            /* delete lead surrogate code unit values */
            UChar lead;
            trie2=utrie2_cloneAsThawed(trie2, &errorCode);
            for(lead=0xd800; lead<0xdc00; ++lead) {
                utrie2_set32ForLeadSurrogateCodeUnit(trie2, lead, trie2->initialValue, &errorCode);
            }
            utrie2_freeze(trie2, UTRIE2_16_VALUE_BITS, &errorCode);
            if(U_FAILURE(errorCode)) {
                fprintf(
                    stderr,
                    "genprops error: deleting lead surrogate code unit values failed - %s\n",
                    u_errorName(errorCode));
                exit(errorCode);
            }
        }

        f=usrc_create(dataDir, "uchar_props_data.c");
        if(f!=NULL) {
            /* unused
            usrc_writeArray(f,
                "static const UVersionInfo formatVersion={",
                dataInfo.formatVersion, 8, 4,
                "};\n\n");
             */
            usrc_writeArray(f,
                "static const UVersionInfo dataVersion={",
                dataInfo.dataVersion, 8, 4,
                "};\n\n");
            usrc_writeUTrie2Arrays(f,
                "static const uint16_t propsTrie_index[%ld]={\n", NULL,
                trie2,
                "\n};\n\n");
            usrc_writeUTrie2Struct(f,
                "static const UTrie2 propsTrie={\n",
                trie2, "propsTrie_index", NULL,
                "};\n\n");

            additionalPropsSize=writeAdditionalData(f, additionalProps, sizeof(additionalProps), indexes);
            size=4*offset+additionalPropsSize;      /* total size of data */

            usrc_writeArray(f,
                "static const int32_t indexes[UPROPS_INDEX_COUNT]={",
                indexes, 32, UPROPS_INDEX_COUNT,
                "};\n\n");
            fclose(f);
        }
        utrie2_close(trie2);
    } else {
        /* write the data */
        pData=udata_create(dataDir, DATA_TYPE, DATA_NAME, &dataInfo,
                        haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "genprops: unable to create data memory, %s\n", u_errorName(errorCode));
            exit(errorCode);
        }

        additionalPropsSize=writeAdditionalData(NULL, additionalProps, sizeof(additionalProps), indexes);
        size=4*offset+additionalPropsSize;      /* total size of data */

        udata_writeBlock(pData, indexes, sizeof(indexes));
        udata_writeBlock(pData, trieBlock, trieSize);
        udata_writeBlock(pData, additionalProps, additionalPropsSize);

        /* finish up */
        dataLength=udata_finish(pData, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "genprops: error %d writing the output file\n", errorCode);
            exit(errorCode);
        }

        if(dataLength!=(long)size) {
            fprintf(stderr, "genprops: data length %ld != calculated size %lu\n",
                dataLength, (unsigned long)size);
            exit(U_INTERNAL_PROGRAM_ERROR);
        }
    }

    if(beVerbose) {
        printf("data size:                            %6lu\n", (unsigned long)size);
    }
}
Пример #12
0
//----------------------------------------------------------------------------
//
//  main      for gendict
//
//----------------------------------------------------------------------------
int  main(int argc, char **argv) {
    //
    // Pick up and check the command line arguments,
    //    using the standard ICU tool utils option handling.
    //
    U_MAIN_INIT_ARGS(argc, argv);
    progName = argv[0];
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
    if(argc<0) {
        // Unrecognized option
        fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]);
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }

    if(options[ARG_HELP].doesOccur || options[ARG_QMARK].doesOccur) {
        //  -? or -h for help.
        usageAndDie(U_ZERO_ERROR);
    }

    UBool verbose = options[ARG_VERBOSE].doesOccur;

    if (argc < 3) {
        fprintf(stderr, "input and output file must both be specified.\n");
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }
    const char *outFileName  = argv[2];
    const char *wordFileName = argv[1];

    startTime = uprv_getRawUTCtime(); // initialize start timer

	if (options[ARG_ICUDATADIR].doesOccur) {
        u_setDataDirectory(options[ARG_ICUDATADIR].value);
    }

    const char *copyright = NULL;
    if (options[ARG_COPYRIGHT].doesOccur) {
        copyright = U_COPYRIGHT_STRING;
    }

    if (options[ARG_UCHARS].doesOccur == options[ARG_BYTES].doesOccur) {
        fprintf(stderr, "you must specify exactly one type of trie to output!\n");
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }
    UBool isBytesTrie = options[ARG_BYTES].doesOccur;
    if (isBytesTrie != options[ARG_TRANSFORM].doesOccur) {
        fprintf(stderr, "you must provide a transformation for a bytes trie, and must not provide one for a uchars trie!\n");
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }

    IcuToolErrorCode status("gendict/main()");

#if UCONFIG_NO_BREAK_ITERATION || UCONFIG_NO_FILE_IO
    const char* outDir=NULL;

    UNewDataMemory *pData;
    char msg[1024];
    UErrorCode tempstatus = U_ZERO_ERROR;

    /* write message with just the name */ // potential for a buffer overflow here...
    sprintf(msg, "gendict writes dummy %s because of UCONFIG_NO_BREAK_ITERATION and/or UCONFIG_NO_FILE_IO, see uconfig.h", outFileName);
    fprintf(stderr, "%s\n", msg);

    /* write the dummy data file */
    pData = udata_create(outDir, NULL, outFileName, &dataInfo, NULL, &tempstatus);
    udata_writeBlock(pData, msg, strlen(msg));
    udata_finish(pData, &tempstatus);
    return (int)tempstatus;

#else
    //  Read in the dictionary source file
    if (verbose) { printf("Opening file %s...\n", wordFileName); }
    const char *codepage = "UTF-8";
    UCHARBUF *f = ucbuf_open(wordFileName, &codepage, TRUE, FALSE, status);
    if (status.isFailure()) {
        fprintf(stderr, "error opening input file: ICU Error \"%s\"\n", status.errorName());
        exit(status.reset());
    }
    if (verbose) { printf("Initializing dictionary builder of type %s...\n", (isBytesTrie ? "BytesTrie" : "UCharsTrie")); }
    DataDict dict(isBytesTrie, status);
    if (status.isFailure()) {
        fprintf(stderr, "new DataDict: ICU Error \"%s\"\n", status.errorName());
        exit(status.reset());
    }
    if (options[ARG_TRANSFORM].doesOccur) {
        dict.setTransform(options[ARG_TRANSFORM].value);
    }

    UnicodeString fileLine;
    if (verbose) { puts("Adding words to dictionary..."); }
    UBool hasValues = FALSE;
    UBool hasValuelessContents = FALSE;
    int lineCount = 0;
    int wordCount = 0;
    int minlen = 255;
    int maxlen = 0;
    UBool isOk = TRUE;
    while (readLine(f, fileLine, status)) {
        lineCount++;
        if (fileLine.isEmpty()) continue;
        
        // Parse word [spaces value].
        int32_t keyLen;
        for (keyLen = 0; keyLen < fileLine.length() && !u_isspace(fileLine[keyLen]); ++keyLen) {}
        if (keyLen == 0) {
            fprintf(stderr, "Error: no word on line %i!\n", lineCount);
            isOk = FALSE;
            continue;
        }
        int32_t valueStart;
        for (valueStart = keyLen;
            valueStart < fileLine.length() && u_isspace(fileLine[valueStart]);
            ++valueStart) {}

        if (keyLen < valueStart) {
            int32_t valueLength = fileLine.length() - valueStart;
            if (valueLength > 15) {
                fprintf(stderr, "Error: value too long on line %i!\n", lineCount);
                isOk = FALSE;
                continue;
            }
            char s[16];
            fileLine.extract(valueStart, valueLength, s, 16, US_INV);
            char *end;
            unsigned long value = uprv_strtoul(s, &end, 0);
            if (end == s || *end != 0 || (int32_t)uprv_strlen(s) != valueLength || value > 0xffffffff) {
                fprintf(stderr, "Error: value syntax error or value too large on line %i!\n", lineCount);
                isOk = FALSE;
                continue;
            }
            dict.addWord(fileLine.tempSubString(0, keyLen), (int32_t)value, status);
            hasValues = TRUE;
            wordCount++;
            if (keyLen < minlen) minlen = keyLen;
            if (keyLen > maxlen) maxlen = keyLen;
        } else {
            dict.addWord(fileLine.tempSubString(0, keyLen), 0, status);
            hasValuelessContents = TRUE;
            wordCount++;
            if (keyLen < minlen) minlen = keyLen;
            if (keyLen > maxlen) maxlen = keyLen;
        }

        if (status.isFailure()) {
            fprintf(stderr, "ICU Error \"%s\": Failed to add word to trie at input line %d in input file\n",
                status.errorName(), lineCount);
            exit(status.reset());
        }
    }
    if (verbose) { printf("Processed %d lines, added %d words, minlen %d, maxlen %d\n", lineCount, wordCount, minlen, maxlen); }

    if (!isOk && status.isSuccess()) {
        status.set(U_ILLEGAL_ARGUMENT_ERROR);
    }
    if (hasValues && hasValuelessContents) {
        fprintf(stderr, "warning: file contained both valued and unvalued strings!\n");
    }

    if (verbose) { printf("Serializing data...isBytesTrie? %d\n", isBytesTrie); }
    int32_t outDataSize;
    const void *outData;
    UnicodeString usp;
    if (isBytesTrie) {
        StringPiece sp = dict.serializeBytes(status);
        outDataSize = sp.size();
        outData = sp.data();
    } else {
        dict.serializeUChars(usp, status);
        outDataSize = usp.length() * U_SIZEOF_UCHAR;
        outData = usp.getBuffer();
    }
    if (status.isFailure()) {
        fprintf(stderr, "gendict: got failure of type %s while serializing, if U_ILLEGAL_ARGUMENT_ERROR possibly due to duplicate dictionary entries\n", status.errorName());
        exit(status.reset());
    }
    if (verbose) { puts("Opening output file..."); }
    UNewDataMemory *pData = udata_create(NULL, NULL, outFileName, &dataInfo, copyright, status);
    if (status.isFailure()) {
        fprintf(stderr, "gendict: could not open output file \"%s\", \"%s\"\n", outFileName, status.errorName());
        exit(status.reset());
    }

    if (verbose) { puts("Writing to output file..."); }
    int32_t indexes[DictionaryData::IX_COUNT] = {
        DictionaryData::IX_COUNT * sizeof(int32_t), 0, 0, 0, 0, 0, 0, 0
    };
    int32_t size = outDataSize + indexes[DictionaryData::IX_STRING_TRIE_OFFSET];
    indexes[DictionaryData::IX_RESERVED1_OFFSET] = size;
    indexes[DictionaryData::IX_RESERVED2_OFFSET] = size;
    indexes[DictionaryData::IX_TOTAL_SIZE] = size;

    indexes[DictionaryData::IX_TRIE_TYPE] = isBytesTrie ? DictionaryData::TRIE_TYPE_BYTES : DictionaryData::TRIE_TYPE_UCHARS;
    if (hasValues) {
        indexes[DictionaryData::IX_TRIE_TYPE] |= DictionaryData::TRIE_HAS_VALUES;
    }

    indexes[DictionaryData::IX_TRANSFORM] = dict.getTransform();
    udata_writeBlock(pData, indexes, sizeof(indexes));
    udata_writeBlock(pData, outData, outDataSize);
    size_t bytesWritten = udata_finish(pData, status);
    if (status.isFailure()) {
        fprintf(stderr, "gendict: error \"%s\" writing the output file\n", status.errorName());
        exit(status.reset());
    }

    if (bytesWritten != (size_t)size) {
        fprintf(stderr, "Error writing to output file \"%s\"\n", outFileName);
        exit(U_INTERNAL_PROGRAM_ERROR);
    }

    printf("%s: done writing\t%s (%ds).\n", progName, outFileName, elapsedTime());

#ifdef TEST_GENDICT
    if (isBytesTrie) {
        BytesTrie::Iterator it(outData, outDataSize, status);
        while (it.hasNext()) {
            it.next(status);
            const StringPiece s = it.getString();
            int32_t val = it.getValue();
            printf("%s -> %i\n", s.data(), val);
        }
    } else {
        UCharsTrie::Iterator it((const UChar *)outData, outDataSize, status);
        while (it.hasNext()) {
            it.next(status);
            const UnicodeString s = it.getString();
            int32_t val = it.getValue();
            char tmp[1024];
            s.extract(0, s.length(), tmp, 1024);
            printf("%s -> %i\n", tmp, val);
        }
    }
#endif

    return 0;
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
}
Пример #13
0
//----------------------------------------------------------------------------
//
//  main      for genbrk
//
//----------------------------------------------------------------------------
int  main(int argc, char **argv) {
    UErrorCode  status = U_ZERO_ERROR;
    const char *ruleFileName;
    const char *outFileName;
    const char *outDir = NULL;
    const char *copyright = NULL;

    //
    // Pick up and check the command line arguments,
    //    using the standard ICU tool utils option handling.
    //
    U_MAIN_INIT_ARGS(argc, argv);
    progName = argv[0];
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
    if(argc<0) {
        // Unrecognized option
        fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]);
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }

    if(options[0].doesOccur || options[1].doesOccur) {
        //  -? or -h for help.
        usageAndDie(0);
    }

    if (!(options[3].doesOccur && options[4].doesOccur)) {
        fprintf(stderr, "rule file and output file must both be specified.\n");
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }
    ruleFileName = options[3].value;
    outFileName  = options[4].value;

    if (options[5].doesOccur) {
        u_setDataDirectory(options[5].value);
    }

    /* Initialize ICU */
    u_init(&status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
            argv[0], u_errorName(status));
        exit(1);
    }
    status = U_ZERO_ERROR;

    /* Combine the directory with the file name */
    if(options[6].doesOccur) {
        outDir = options[6].value;
    }
    if (options[7].doesOccur) {
        copyright = U_COPYRIGHT_STRING;
    }

#if UCONFIG_NO_BREAK_ITERATION

    UNewDataMemory *pData;
    char msg[1024];

    /* write message with just the name */
    sprintf(msg, "genbrk writes dummy %s because of UCONFIG_NO_BREAK_ITERATION, see uconfig.h", outFileName);
    fprintf(stderr, "%s\n", msg);

    /* write the dummy data file */
    pData = udata_create(outDir, NULL, outFileName, &dummyDataInfo, NULL, &status);
    udata_writeBlock(pData, msg, strlen(msg));
    udata_finish(pData, &status);
    return (int)status;

#else

    //
    //  Read in the rule source file
    //
    long        result;
    long        ruleFileSize;
    FILE        *file;
    char        *ruleBufferC;

    file = fopen(ruleFileName, "rb");
    if( file == 0 ) {
        fprintf(stderr, "Could not open file \"%s\"\n", ruleFileName);
        exit(-1);
    }
    fseek(file, 0, SEEK_END);
    ruleFileSize = ftell(file);
    fseek(file, 0, SEEK_SET);
    ruleBufferC = new char[ruleFileSize+10];

    result = (long)fread(ruleBufferC, 1, ruleFileSize, file);
    if (result != ruleFileSize)  {
        fprintf(stderr, "Error reading file \"%s\"\n", ruleFileName);
        exit (-1);
    }
    ruleBufferC[ruleFileSize]=0;
    fclose(file);

    //
    // Look for a Unicode Signature (BOM) on the rule file
    //
    int32_t        signatureLength;
    const char *   ruleSourceC = ruleBufferC;
    const char*    encoding = ucnv_detectUnicodeSignature(
                           ruleSourceC, ruleFileSize, &signatureLength, &status);
    if (U_FAILURE(status)) {
        exit(status);
    }
    if(encoding!=NULL ){
        ruleSourceC  += signatureLength;
        ruleFileSize -= signatureLength;
    }

    //
    // Open a converter to take the rule file to UTF-16
    //
    UConverter* conv;
    conv = ucnv_open(encoding, &status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "ucnv_open: ICU Error \"%s\"\n", u_errorName(status));
        exit(status);
    }

    //
    // Convert the rules to UChar.
    //  Preflight first to determine required buffer size.
    //
    uint32_t destCap = ucnv_toUChars(conv,
                       NULL,           //  dest,
                       0,              //  destCapacity,
                       ruleSourceC,
                       ruleFileSize,
                       &status);
    if (status != U_BUFFER_OVERFLOW_ERROR) {
        fprintf(stderr, "ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
        exit(status);
    };

    status = U_ZERO_ERROR;
    UChar *ruleSourceU = new UChar[destCap+1];
    ucnv_toUChars(conv,
                  ruleSourceU,     //  dest,
                  destCap+1,
                  ruleSourceC,
                  ruleFileSize,
                  &status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
        exit(status);
    };
    ucnv_close(conv);


    //
    //  Put the source rules into a UnicodeString
    //
    UnicodeString ruleSourceS(FALSE, ruleSourceU, destCap);

    //
    //  Create the break iterator from the rules
    //     This will compile the rules.
    //
    UParseError parseError;
    parseError.line = 0;
    parseError.offset = 0;
    RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(ruleSourceS, parseError, status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "createRuleBasedBreakIterator: ICU Error \"%s\"  at line %d, column %d\n",
                u_errorName(status), (int)parseError.line, (int)parseError.offset);
        exit(status);
    };


    //
    //  Get the compiled rule data from the break iterator.
    //
    uint32_t        outDataSize;
    const uint8_t  *outData;
    outData = bi->getBinaryRules(outDataSize);

    // Copy the data format version numbers from the RBBI data header into the UDataMemory header.
    uprv_memcpy(dh.info.formatVersion, ((RBBIDataHeader *)outData)->fFormatVersion, sizeof(dh.info.formatVersion));

    //
    //  Create the output file
    //
    size_t bytesWritten;
    UNewDataMemory *pData;
    pData = udata_create(outDir, NULL, outFileName, &(dh.info), copyright, &status);
    if(U_FAILURE(status)) {
        fprintf(stderr, "genbrk: Could not open output file \"%s\", \"%s\"\n", 
                         outFileName, u_errorName(status));
        exit(status);
    }


    //  Write the data itself.
    udata_writeBlock(pData, outData, outDataSize);
    // finish up 
    bytesWritten = udata_finish(pData, &status);
    if(U_FAILURE(status)) {
        fprintf(stderr, "genbrk: error %d writing the output file\n", status);
        exit(status);
    }
    
    if (bytesWritten != outDataSize) {
        fprintf(stderr, "Error writing to output file \"%s\"\n", outFileName);
        exit(-1);
    }

    delete bi;
    delete[] ruleSourceU;
    delete[] ruleBufferC;
    u_cleanup();


    printf("genbrk: tool completed successfully.\n");
    return 0;

#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
}
Пример #14
0
extern void
generateData(const char *dataDir, const char* bundleName) {
    static uint8_t sprepTrieBlock[100000];

    UNewDataMemory *pData;
    UErrorCode errorCode=U_ZERO_ERROR;
    int32_t size, dataLength;
    char* fileName = (char*) uprv_malloc(uprv_strlen(bundleName) +100);

#if UCONFIG_NO_IDNA

    size=0;

#else

    int32_t sprepTrieSize;

    /* sort and add mapping data */
    storeMappingData();
    
    sprepTrieSize=utrie_serialize(sprepTrie, sprepTrieBlock, sizeof(sprepTrieBlock), getFoldedValue, TRUE, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "error: utrie_serialize(sprep trie) failed, %s\n", u_errorName(errorCode));
        exit(errorCode);
    }
    
    size = sprepTrieSize + mappingDataCapacity*U_SIZEOF_UCHAR + sizeof(indexes);
    if(beVerbose) {
        printf("size of sprep trie              %5u bytes\n", (int)sprepTrieSize);
        printf("size of " U_ICUDATA_NAME "_%s." DATA_TYPE " contents: %ld bytes\n", bundleName,(long)size);
        printf("size of mapping data array %5u bytes\n",(int)mappingDataCapacity * U_SIZEOF_UCHAR);
        printf("Number of code units in mappingData (currentIndex) are: %i \n", currentIndex);
        printf("Maximum length of the mapping string is : %i \n", (int)maxLength);
    }

#endif

    fileName[0]=0;
    uprv_strcat(fileName,bundleName);
    /* write the data */
    pData=udata_create(dataDir, DATA_TYPE, fileName, &dataInfo,
                       haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "gensprep: unable to create the output file, error %d\n", errorCode);
        exit(errorCode);
    }

#if !UCONFIG_NO_IDNA

    indexes[_SPREP_INDEX_TRIE_SIZE]=sprepTrieSize;
    indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]=mappingDataCapacity*U_SIZEOF_UCHAR;
    
    udata_writeBlock(pData, indexes, sizeof(indexes));
    udata_writeBlock(pData, sprepTrieBlock, sprepTrieSize);
    udata_writeBlock(pData, mappingData, indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]);
    

#endif

    /* finish up */
    dataLength=udata_finish(pData, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "gensprep: error %d writing the output file\n", errorCode);
        exit(errorCode);
    }

    if(dataLength!=size) {
        fprintf(stderr, "gensprep error: data length %ld != calculated size %ld\n",
            (long)dataLength, (long)size);
        exit(U_INTERNAL_PROGRAM_ERROR);
    }

#if !UCONFIG_NO_IDNA
    /* done with writing the data .. close the hashtable */
    if (hashTable != NULL) {
        uhash_close(hashTable);
    }
#endif
}
Пример #15
0
extern int
main(int argc, char* argv[]) {
    int i, n;
    char pathBuf[512];
    FileStream *in;
    UNewDataMemory *out;
    UErrorCode errorCode=U_ZERO_ERROR;

    U_MAIN_INIT_ARGS(argc, argv);

    /* preset then read command line options */
    options[DESTDIR].value=options[SOURCEDIR].value=u_getDataDirectory();
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr,
            "error in command line argument \"%s\"\n",
            argv[-argc]);
    }
    if(argc<0 || options[HELP1].doesOccur || options[HELP2].doesOccur) {
        fprintf(stderr,
            "usage: %s [-options] [convrtrs.txt]\n"
            "\tread convrtrs.txt and create " U_ICUDATA_NAME "_" DATA_NAME "." DATA_TYPE "\n"
            "options:\n"
            "\t-h or -? or --help  this usage text\n"
            "\t-v or --verbose     prints out extra information about the alias table\n"
            "\t-q or --quiet       do not display warnings and progress\n"
            "\t-c or --copyright   include a copyright notice\n"
            "\t-d or --destdir     destination directory, followed by the path\n"
            "\t-s or --sourcedir   source directory, followed by the path\n",
            argv[0]);
        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    }

    if(options[VERBOSE].doesOccur) {
        verbose = TRUE;
    }

    if(options[QUIET].doesOccur) {
        quiet = TRUE;
    }

    if(argc>=2) {
        path=argv[1];
    } else {
        path=options[SOURCEDIR].value;
        if(path!=NULL && *path!=0) {
            char *end;

            uprv_strcpy(pathBuf, path);
            end = uprv_strchr(pathBuf, 0);
            if(*(end-1)!=U_FILE_SEP_CHAR) {
                *(end++)=U_FILE_SEP_CHAR;
            }
            uprv_strcpy(end, "convrtrs.txt");
            path=pathBuf;
        } else {
            path = "convrtrs.txt";
        }
    }

    uprv_memset(stringStore, 0, sizeof(stringStore));
    uprv_memset(tagStore, 0, sizeof(tagStore));
    uprv_memset(converters, 0, sizeof(converters));
    uprv_memset(tags, 0, sizeof(tags));
    uprv_memset(aliasLists, 0, sizeof(aliasLists));
    uprv_memset(knownAliases, 0, sizeof(aliasLists));


    in=T_FileStream_open(path, "r");
    if(in==NULL) {
        fprintf(stderr, "gencnval: unable to open input file %s\n", path);
        exit(U_FILE_ACCESS_ERROR);
    }
    parseFile(in);
    T_FileStream_close(in);

    /* create the output file */
    out=udata_create(options[DESTDIR].value, DATA_TYPE, DATA_NAME, &dataInfo,
                     options[COPYRIGHT].doesOccur ? U_COPYRIGHT_STRING : NULL, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "gencnval: unable to open output file - error %s\n", u_errorName(errorCode));
        exit(errorCode);
    }

    /* write the table of aliases based on a tag/converter name combination */
    writeAliasTable(out);

    /* finish */
    udata_finish(out, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "gencnval: error finishing output file - %s\n", u_errorName(errorCode));
        exit(errorCode);
    }

    /* clean up tags */
    for (i = 0; i < MAX_TAG_COUNT; i++) {
        for (n = 0; n < MAX_CONV_COUNT; n++) {
            if (tags[i].aliasList[n].aliases!=NULL) {
                uprv_free(tags[i].aliasList[n].aliases);
            }
        }
    }

    return 0;
}
extern int
main(int argc, char* argv[]) {
    static char buffer[4096];
    char line[512];
    FileStream *in, *file;
    char *s;
    UErrorCode errorCode=U_ZERO_ERROR;
    uint32_t i, fileOffset, basenameOffset, length, nread;
    UBool sourceTOC, verbose;
    const char *entrypointName = NULL;

    U_MAIN_INIT_ARGS(argc, argv);

    /* preset then read command line options */
    options[4].value=u_getDataDirectory();
    options[6].value=COMMON_DATA_NAME;
    options[7].value=DATA_TYPE;
    options[10].value=".";
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr,
            "error in command line argument \"%s\"\n",
            argv[-argc]);
    } else if(argc<2) {
        argc=-1;
    }

    if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
        FILE *where = argc < 0 ? stderr : stdout;
        
        /*
         * Broken into chucks because the C89 standard says the minimum
         * required supported string length is 509 bytes.
         */
        fprintf(where,
                "%csage: %s [ -h, -?, --help ] [ -v, --verbose ] [ -c, --copyright ] [ -C, --comment comment ] [ -d, --destdir dir ] [ -n, --name filename ] [ -t, --type filetype ] [ -S, --source tocfile ] [ -e, --entrypoint name ] maxsize listfile\n", argc < 0 ? 'u' : 'U', *argv);
        if (options[0].doesOccur || options[1].doesOccur) {
            fprintf(where, "\n"
                "Read the list file (default: standard input) and create a common data\n"
                "file from specified files. Omit any files larger than maxsize, if maxsize > 0.\n");
            fprintf(where, "\n"
            "Options:\n"
            "\t-h, -?, --help              this usage text\n"
            "\t-v, --verbose               verbose output\n"
            "\t-c, --copyright             include the ICU copyright notice\n"
            "\t-C, --comment comment       include a comment string\n"
            "\t-d, --destdir dir           destination directory\n");
            fprintf(where,
            "\t-n, --name filename         output filename, without .type extension\n"
            "\t                            (default: " COMMON_DATA_NAME ")\n"
            "\t-t, --type filetype         type of the destination file\n"
            "\t                            (default: \"" DATA_TYPE "\")\n"
            "\t-S, --source tocfile        write a .c source file with the table of\n"
            "\t                            contents\n"
            "\t-e, --entrypoint name       override the c entrypoint name\n"
            "\t                            (default: \"<name>_<type>\")\n");
        }
        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    }

    sourceTOC=options[8].doesOccur;

    verbose = options[2].doesOccur;

    maxSize=(uint32_t)uprv_strtoul(argv[1], NULL, 0);

    if(argc==2) {
        in=T_FileStream_stdin();
    } else {
        in=T_FileStream_open(argv[2], "r");
        if(in==NULL) {
            fprintf(stderr, "gencmn: unable to open input file %s\n", argv[2]);
            exit(U_FILE_ACCESS_ERROR);
        }
    }

    if (verbose) {
        if(sourceTOC) {
            printf("generating %s_%s.c (table of contents source file)\n", options[6].value, options[7].value);
        } else {
            printf("generating %s.%s (common data file with table of contents)\n", options[6].value, options[7].value);
        }
    }

    /* read the list of files and get their lengths */
    while(T_FileStream_readLine(in, line, sizeof(line))!=NULL) {
        /* remove trailing newline characters */
        s=line;
        while(*s!=0) {
            if(*s=='\r' || *s=='\n') {
                *s=0;
                break;
            }
            ++s;
        }

        /* check for comment */

        if (*line == '#') {
            continue;
        }

        /* add the file */
#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
        {
          char *t;
          while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) {
            *t = U_FILE_SEP_CHAR;
          }
        }
#endif
        addFile(getLongPathname(line), sourceTOC, verbose);
    }

    if(in!=T_FileStream_stdin()) {
        T_FileStream_close(in);
    }

    if(fileCount==0) {
        fprintf(stderr, "gencmn: no files listed in %s\n", argc==2 ? "<stdin>" : argv[2]);
        return 0;
    }

    /* sort the files by basename */
    qsort(files, fileCount, sizeof(File), compareFiles);

    if(!sourceTOC) {
        UNewDataMemory *out;

        /* determine the offsets of all basenames and files in this common one */
        basenameOffset=4+8*fileCount;
        fileOffset=(basenameOffset+(basenameTotal+15))&~0xf;
        for(i=0; i<fileCount; ++i) {
            files[i].fileOffset=fileOffset;
            fileOffset+=(files[i].fileSize+15)&~0xf;
            files[i].basenameOffset=basenameOffset;
            basenameOffset+=files[i].basenameLength;
        }

        /* create the output file */
        out=udata_create(options[4].value, options[7].value, options[6].value,
                         &dataInfo,
                         options[3].doesOccur ? U_COPYRIGHT_STRING : options[5].value,
                         &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n",
                options[4].value, options[6].value, options[7].value,
                u_errorName(errorCode));
            exit(errorCode);
        }

        /* write the table of contents */
        udata_write32(out, fileCount);
        for(i=0; i<fileCount; ++i) {
            udata_write32(out, files[i].basenameOffset);
            udata_write32(out, files[i].fileOffset);
        }

        /* write the basenames */
        for(i=0; i<fileCount; ++i) {
            udata_writeString(out, files[i].basename, files[i].basenameLength);
        }
        length=4+8*fileCount+basenameTotal;

        /* copy the files */
        for(i=0; i<fileCount; ++i) {
            /* pad to 16-align the next file */
            length&=0xf;
            if(length!=0) {
                udata_writePadding(out, 16-length);
            }

            if (verbose) {
                printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
            }

            /* copy the next file */
            file=T_FileStream_open(files[i].pathname, "rb");
            if(file==NULL) {
                fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname);
                exit(U_FILE_ACCESS_ERROR);
            }
            for(nread = 0;;) {
                length=T_FileStream_read(file, buffer, sizeof(buffer));
                if(length <= 0) {
                    break;
                }
                nread += length;
                udata_writeBlock(out, buffer, length);
            }
            T_FileStream_close(file);
            length=files[i].fileSize;

            if (nread != files[i].fileSize) {
              fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname,  (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
                exit(U_FILE_ACCESS_ERROR);
            }
        }

        /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */
        length&=0xf;
        if(length!=0) {
            udata_writePadding(out, 16-length);
        }

        /* finish */
        udata_finish(out, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode));
            exit(errorCode);
        }
    } else {
        /* write a .c source file with the table of contents */
        char *filename;
        FileStream *out;

        /* create the output filename */
        filename=s=buffer;
        uprv_strcpy(filename, options[4].value);
        s=filename+uprv_strlen(filename);
        if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) {
            *s++=U_FILE_SEP_CHAR;
        }
        uprv_strcpy(s, options[6].value);
        if(*(options[7].value)!=0) {
            s+=uprv_strlen(s);
            *s++='_';
            uprv_strcpy(s, options[7].value);
        }
        s+=uprv_strlen(s);
        uprv_strcpy(s, ".c");

        /* open the output file */
        out=T_FileStream_open(filename, "w");
        if(out==NULL) {
            fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename);
            exit(U_FILE_ACCESS_ERROR);
        }

        /* If an entrypoint is specified, use it. */
        if(options[9].doesOccur) {
            entrypointName = options[9].value;
        } else {
            entrypointName = options[6].value;
        }


        /* write the source file */
        sprintf(buffer,
            "/*\n"
            " * ICU common data table of contents for %s.%s ,\n"
            " * Automatically generated by icu/source/tools/gencmn/gencmn .\n"
            " */\n\n"
            "#include \"unicode/utypes.h\"\n"
            "#include \"unicode/udata.h\"\n"
            "\n"
            "/* external symbol declarations for data */\n",
            options[6].value, options[7].value);
        T_FileStream_writeLine(out, buffer);

        sprintf(buffer, "extern const char\n    %s%s[]", symPrefix?symPrefix:"", files[0].pathname);
        T_FileStream_writeLine(out, buffer);
        for(i=1; i<fileCount; ++i) {
            sprintf(buffer, ",\n    %s%s[]", symPrefix?symPrefix:"", files[i].pathname);
            T_FileStream_writeLine(out, buffer);
        }
        T_FileStream_writeLine(out, ";\n\n");

        sprintf(
            buffer,
            "U_EXPORT struct {\n"
            "    uint16_t headerSize;\n"
            "    uint8_t magic1, magic2;\n"
            "    UDataInfo info;\n"
            "    char padding[%lu];\n"
            "    uint32_t count, reserved;\n"
            "    struct {\n"
            "        const char *name;\n"
            "        const void *data;\n"
            "    } toc[%lu];\n"
            "} U_EXPORT2 %s_dat = {\n"
            "    32, 0xda, 0x27, {\n"
            "        %lu, 0,\n"
            "        %u, %u, %u, 0,\n"
            "        {0x54, 0x6f, 0x43, 0x50},\n"
            "        {1, 0, 0, 0},\n"
            "        {0, 0, 0, 0}\n"
            "    },\n"
            "    \"\", %lu, 0, {\n",
            (unsigned long)32-4-sizeof(UDataInfo),
            (unsigned long)fileCount,
            entrypointName,
            (unsigned long)sizeof(UDataInfo),
            U_IS_BIG_ENDIAN,
            U_CHARSET_FAMILY,
            U_SIZEOF_UCHAR,
            (unsigned long)fileCount
        );
        T_FileStream_writeLine(out, buffer);

        sprintf(buffer, "        { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname);
        T_FileStream_writeLine(out, buffer);
        for(i=1; i<fileCount; ++i) {
            sprintf(buffer, ",\n        { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname);
            T_FileStream_writeLine(out, buffer);
        }

        T_FileStream_writeLine(out, "\n    }\n};\n");
        T_FileStream_close(out);

        uprv_free(symPrefix);
    }

    return 0;
}