Esempio n. 1
0
int main(int argc, char *argv[]) {
    UErrorCode status = U_ZERO_ERROR;
    u_init(&status);
    if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
        // Note: u_init() will try to open ICU property data.
        //       failures here are expected when building ICU from scratch.
        //       ignore them.
        fprintf(stderr, "genpname: can not initialize ICU.  Status = %s\n",
            u_errorName(status));
        exit(1);
    }

    genpname app;
    U_MAIN_INIT_ARGS(argc, argv);
    int retVal = app.MMain(argc, argv);
    u_cleanup();
    return retVal;
}
Esempio n. 2
0
void UPerfTest::init(UOption addOptions[], int32_t addOptionsCount,
                     UErrorCode& status) {
    //initialize the argument list
    U_MAIN_INIT_ARGS(_argc, _argv);

    resolvedFileName = NULL;

    // add specific options
    int32_t optionsCount = OPTIONS_COUNT;
    if (addOptionsCount > 0) {
        memcpy(options+optionsCount, addOptions, addOptionsCount*sizeof(UOption));
        optionsCount += addOptionsCount;
    }

    //parse the arguments
    _remainingArgc = u_parseArgs(_argc, (char**)_argv, optionsCount, options);

    // copy back values for additional options
    if (addOptionsCount > 0) {
        memcpy(addOptions, options+OPTIONS_COUNT, addOptionsCount*sizeof(UOption));
    }

    // Now setup the arguments
    if(_argc==1 || options[HELP1].doesOccur || options[HELP2].doesOccur) {
        status = U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }

    if(options[VERBOSE].doesOccur) {
        verbose = TRUE;
    }

    if(options[SOURCEDIR].doesOccur) {
        sourceDir = options[SOURCEDIR].value;
    }

    if(options[ENCODING].doesOccur) {
        encoding = options[ENCODING].value;
    }

    if(options[USELEN].doesOccur) {
        uselen = TRUE;
    }

    if(options[FILE_NAME].doesOccur){
        fileName = options[FILE_NAME].value;
    }

    if(options[PASSES].doesOccur) {
        passes = atoi(options[PASSES].value);
    }
    if(options[ITERATIONS].doesOccur) {
        iterations = atoi(options[ITERATIONS].value);
        if(options[TIME].doesOccur) {
            status = U_ILLEGAL_ARGUMENT_ERROR;
            return;
        }
    } else if(options[TIME].doesOccur) {
        time = atoi(options[TIME].value);
    } else {
        iterations = 1000; // some default
    }

    if(options[LINE_MODE].doesOccur) {
        line_mode = TRUE;
        bulk_mode = FALSE;
    }

    if(options[BULK_MODE].doesOccur) {
        bulk_mode = TRUE;
        line_mode = FALSE;
    }
    
    if(options[LOCALE].doesOccur) {
        locale = options[LOCALE].value;
    }

    int32_t len = 0;
    if(fileName!=NULL){
        //pre-flight
        ucbuf_resolveFileName(sourceDir, fileName, NULL, &len, &status);
        resolvedFileName = (char*) uprv_malloc(len);
        if(resolvedFileName==NULL){
            status= U_MEMORY_ALLOCATION_ERROR;
            return;
        }
        if(status == U_BUFFER_OVERFLOW_ERROR){
            status = U_ZERO_ERROR;
        }
        ucbuf_resolveFileName(sourceDir, fileName, resolvedFileName, &len, &status);
        ucharBuf = ucbuf_open(resolvedFileName,&encoding,TRUE,FALSE,&status);

        if(U_FAILURE(status)){
            printf("Could not open the input file %s. Error: %s\n", fileName, u_errorName(status));
            return;
        }
    }
}
Esempio n. 3
0
int main(int argc, const char* const argv[])
{
    int nerrors = 0;
    UBool   defaultDataFound;
    TestNode *root;
    const char *warnOrErr = "Failure"; 
    UDate startTime, endTime;
    int32_t diffTime;

    /* initial check for the default converter */
    UErrorCode errorCode = U_ZERO_ERROR;
    UResourceBundle *rb;
    UConverter *cnv;

    U_MAIN_INIT_ARGS(argc, argv);

    startTime = uprv_getRawUTCtime();

    gOrigArgc = argc;
    gOrigArgv = argv;
    if (!initArgs(argc, argv, NULL, NULL)) {
        /* Error already displayed. */
        return -1;
    }
    
    /* Check whether ICU will initialize without forcing the build data directory into
     *  the ICU_DATA path.  Success here means either the data dll contains data, or that
     *  this test program was run with ICU_DATA set externally.  Failure of this check
     *  is normal when ICU data is not packaged into a shared library.
     *
     *  Whether or not this test succeeds, we want to cleanup and reinitialize
     *  with a data path so that data loading from individual files can be tested.
     */
    defaultDataFound = TRUE;
    u_init(&errorCode);
    if (U_FAILURE(errorCode)) {
        fprintf(stderr,
            "#### Note:  ICU Init without build-specific setDataDirectory() failed. %s\n", u_errorName(errorCode));
        defaultDataFound = FALSE;
    }
    u_cleanup();
#ifdef URES_DEBUG
    fprintf(stderr, "After initial u_cleanup: RB cache %s empty.\n", ures_dumpCacheContents()?"WAS NOT":"was");
#endif

    while (getTestOption(REPEAT_TESTS_OPTION) > 0) {   /* Loop runs once per complete execution of the tests
                                  *   used for -r  (repeat) test option.                */
        if (!initArgs(argc, argv, NULL, NULL)) {
            /* Error already displayed. */
            return -1;
        }
        errorCode = U_ZERO_ERROR;

        /* Initialize ICU */
        if (!defaultDataFound) {
            ctest_setICU_DATA();    /* u_setDataDirectory() must happen Before u_init() */
        }
        u_init(&errorCode);
        if (U_FAILURE(errorCode)) {
            fprintf(stderr,
                "#### ERROR! %s: u_init() failed with status = \"%s\".\n" 
                "*** Check the ICU_DATA environment variable and \n"
                "*** check that the data files are present.\n", argv[0], u_errorName(errorCode));
                if(!getTestOption(WARN_ON_MISSING_DATA_OPTION)) {
                    fprintf(stderr, "*** Exiting.  Use the '-w' option if data files were\n*** purposely removed, to continue test anyway.\n");
                    u_cleanup();
                    return 1;
                }
        }
        


        /* try more data */
        cnv = ucnv_open(TRY_CNV_2, &errorCode);
        if(cnv != 0) {
            /* ok */
            ucnv_close(cnv);
        } else {
            fprintf(stderr,
                    "*** %s! The converter for " TRY_CNV_2 " cannot be opened.\n"
                    "*** Check the ICU_DATA environment variable and \n"
                    "*** check that the data files are present.\n", warnOrErr);
            if(!getTestOption(WARN_ON_MISSING_DATA_OPTION)) {
                fprintf(stderr, "*** Exitting.  Use the '-w' option if data files were\n*** purposely removed, to continue test anyway.\n");
                u_cleanup();
                return 1;
            }
        }

        rb = ures_open(NULL, "en", &errorCode);
        if(U_SUCCESS(errorCode)) {
            /* ok */
            ures_close(rb);
        } else {
            fprintf(stderr,
                    "*** %s! The \"en\" locale resource bundle cannot be opened.\n"
                    "*** Check the ICU_DATA environment variable and \n"
                    "*** check that the data files are present.\n", warnOrErr);
            if(!getTestOption(WARN_ON_MISSING_DATA_OPTION)) {
                fprintf(stderr, "*** Exitting.  Use the '-w' option if data files were\n*** purposely removed, to continue test anyway.\n");
                u_cleanup();
                return 1;
            }
        }

        errorCode = U_ZERO_ERROR;
        rb = ures_open(NULL, NULL, &errorCode);
        if(U_SUCCESS(errorCode)) {
            /* ok */
            if (errorCode == U_USING_DEFAULT_WARNING || errorCode == U_USING_FALLBACK_WARNING) {
                fprintf(stderr,
                        "#### Note: The default locale %s is not available\n", uloc_getDefault());
            }
            ures_close(rb);
        } else {
            fprintf(stderr,
                    "*** %s! Can not open a resource bundle for the default locale %s\n", warnOrErr, uloc_getDefault());
            if(!getTestOption(WARN_ON_MISSING_DATA_OPTION)) {
                fprintf(stderr, "*** Exitting.  Use the '-w' option if data files were\n"
                    "*** purposely removed, to continue test anyway.\n");
                u_cleanup();
                return 1;
            }
        }
        fprintf(stdout, "Default locale for this run is %s\n", uloc_getDefault());

        /* Build a tree of all tests.   
         *   Subsequently will be used to find / iterate the tests to run */
        root = NULL;
        addAllTests(&root);

        /*  Tests acutally run HERE.   TODO:  separate command line option parsing & setting from test execution!! */
        nerrors = runTestRequest(root, argc, argv);

        setTestOption(REPEAT_TESTS_OPTION, DECREMENT_OPTION_VALUE);
        if (getTestOption(REPEAT_TESTS_OPTION) > 0) {
            printf("Repeating tests %d more time(s)\n", getTestOption(REPEAT_TESTS_OPTION));
        }
        cleanUpTestTree(root);

#ifdef CTST_LEAK_CHECK
        ctst_freeAll();
        /* To check for leaks */
        u_cleanup(); /* nuke the hashtable.. so that any still-open cnvs are leaked */
        
        if(getTestOption(VERBOSITY_OPTION) && ctst_allocated_total>0) {
          fprintf(stderr,"ctst_freeAll():  cleaned up after %d allocations (queue of %d)\n", ctst_allocated_total, CTST_MAX_ALLOC);
        }
#ifdef URES_DEBUG
        if(ures_dumpCacheContents()) {
          fprintf(stderr, "Error: After final u_cleanup, RB cache was not empty.\n");
          nerrors++;
        } else {
          fprintf(stderr,"OK: After final u_cleanup, RB cache was empty.\n");
        }
#endif
#endif

    }  /* End of loop that repeats the entire test, if requested.  (Normally doesn't loop)  */

#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
    unistr_printLengths();
#endif

    endTime = uprv_getRawUTCtime();
    diffTime = (int32_t)(endTime - startTime);
    printf("Elapsed Time: %02d:%02d:%02d.%03d\n",
        (int)((diffTime%U_MILLIS_PER_DAY)/U_MILLIS_PER_HOUR),
        (int)((diffTime%U_MILLIS_PER_HOUR)/U_MILLIS_PER_MINUTE),
        (int)((diffTime%U_MILLIS_PER_MINUTE)/U_MILLIS_PER_SECOND),
        (int)(diffTime%U_MILLIS_PER_SECOND));

    return nerrors ? 1 : 0;
}
extern int
main(int argc, char* argv[]) {
    UVersionInfo version;
    UBool store10Names=FALSE;
    UErrorCode errorCode = U_ZERO_ERROR;

    U_MAIN_INIT_ARGS(argc, argv);

    /* Initialize ICU */
    u_init(&errorCode);
    if (U_FAILURE(errorCode) && errorCode != U_FILE_ACCESS_ERROR) {
        /* Note: u_init() will try to open ICU property data.
         *       failures here are expected when building ICU from scratch.
         *       ignore them.
         */
        fprintf(stderr, "%s: can not initialize ICU.  errorCode = %s\n",
            argv[0], u_errorName(errorCode));
        exit(1);
    }

    /* preset then read command line options */
    options[5].value=u_getDataDirectory();
    options[6].value="4.1";
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr,
            "error in command line argument \"%s\"\n",
            argv[-argc]);
    } else if(argc<2) {
        argc=-1;
    }
    if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
        /*
         * Broken into chucks because the C89 standard says the minimum
         * required supported string length is 509 bytes.
         */
        fprintf(stderr,
            "Usage: %s [-1[+|-]] [-v[+|-]] [-c[+|-]] filename\n"
            "\n"
            "Read the UnicodeData.txt file and \n"
            "create a binary file " DATA_NAME "." DATA_TYPE " with the character names\n"
            "\n"
            "\tfilename  absolute path/filename for the Unicode database text file\n"
            "\t\t(default: standard input)\n"
            "\n",
            argv[0]);
        fprintf(stderr,
            "Options:\n"
            "\t-h or -? or --help  this usage text\n"
            "\t-v or --verbose     verbose output\n"
            "\t-q or --quiet       no output\n"
            "\t-c or --copyright   include a copyright notice\n"
            "\t-d or --destdir     destination directory, followed by the path\n"
            "\t-u or --unicode     Unicode version, followed by the version like 3.0.0\n"
            "\t-1 or --unicode1-names  store Unicode 1.0 character names\n");
        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    }

    /* get the options values */
    beVerbose=options[2].doesOccur;
    beQuiet=options[3].doesOccur;
    haveCopyright=options[4].doesOccur;
    store10Names=options[7].doesOccur;

    /* set the Unicode version */
    u_versionFromString(version, options[6].value);
    uprv_memcpy(dataInfo.dataVersion, version, 4);
    ucdVersion=findUnicodeVersion(version);

    init();
    parseDB(argc>=2 ? argv[1] : "-", store10Names);
    compress();
    generateData(options[5].value);

    u_cleanup();
    return 0;
}
int
main(int argc,
     char* argv[])
{
    UErrorCode  status    = U_ZERO_ERROR;
    const char *arg       = NULL;
    const char *outputDir = NULL; /* NULL = no output directory, use current */
    const char *inputDir  = NULL;
    const char *encoding  = "";
    int         i;

    U_MAIN_INIT_ARGS(argc, argv);

    argc = u_parseArgs(argc, argv, (int32_t)(sizeof(options)/sizeof(options[0])), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr, "%s: error in command line argument \"%s\"\n", argv[0], argv[-argc]);
    } else if(argc<2) {
        argc = -1;
    }

    if(options[VERSION].doesOccur) {
        fprintf(stderr,
                "%s version %s (ICU version %s).\n"
                "%s\n",
                argv[0], GENRB_VERSION, U_ICU_VERSION, U_COPYRIGHT_STRING);
        return U_ZERO_ERROR;
    }

    if(argc<0 || options[HELP1].doesOccur || options[HELP2].doesOccur) {
        /*
         * Broken into chucks because the C89 standard says the minimum
         * required supported string length is 509 bytes.
         */
        fprintf(stderr,
                "Usage: %s [OPTIONS] [FILES]\n"
                "\tReads the list of resource bundle source files and creates\n"
                "\tbinary version of reosurce bundles (.res files)\n",
                argv[0]);
        fprintf(stderr,
                "Options:\n"
                "\t-h or -? or --help       this usage text\n"
                "\t-q or --quiet            do not display warnings\n"
                "\t-v or --verbose          print extra information when processing files\n"
                "\t-V or --version          prints out version number and exits\n"
                "\t-c or --copyright        include copyright notice\n");
        fprintf(stderr,
                "\t-e or --encoding         encoding of source files\n"
                "\t-d of --destdir          destination directory, followed by the path, defaults to %s\n"
                "\t-s or --sourcedir        source directory for files followed by path, defaults to %s\n"
                "\t-i or --icudatadir       directory for locating any needed intermediate data files,\n"
                "\t                         followed by path, defaults to %s\n",
                u_getDataDirectory(), u_getDataDirectory(), u_getDataDirectory());
        fprintf(stderr,
                "\t-j or --write-java       write a Java ListResourceBundle for ICU4J, followed by optional encoding\n"
                "\t                         defaults to ASCII and \\uXXXX format.\n"
                "\t-p or --package-name     For ICU4J: package name for writing the ListResourceBundle for ICU4J,\n"
                "\t                         defaults to com.ibm.icu.impl.data\n");
        fprintf(stderr,
                "\t-b or --bundle-name      bundle name for writing the ListResourceBundle for ICU4J,\n"
                "\t                         defaults to LocaleElements\n"
                "\t-x or --write-xliff      write a XLIFF file for the resource bundle. Followed by an optional output file name.\n"
                "\t-k or --strict           use pedantic parsing of syntax\n"
                /*added by Jing*/
                "\t-l or --language         For XLIFF: language code compliant with ISO 639.\n");

        return argc < 0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    }

    if(options[VERBOSE].doesOccur) {
        setVerbose(TRUE);
    }

    if(options[QUIET].doesOccur) {
        setShowWarning(FALSE);
    }
    if(options[STRICT].doesOccur) {
        setStrict(TRUE);
    }
    if(options[COPYRIGHT].doesOccur){
        setIncludeCopyright(TRUE);
    }

    if(options[SOURCEDIR].doesOccur) {
        inputDir = options[SOURCEDIR].value;
    }

    if(options[DESTDIR].doesOccur) {
        outputDir = options[DESTDIR].value;
    }
    if(options[PACKAGE_NAME].doesOccur) {
        gPackageName = options[PACKAGE_NAME].value;
        if(!strcmp(gPackageName, "ICUDATA"))
        {
            gPackageName = U_ICUDATA_NAME;
        }
        if(gPackageName[0] == 0)
        {
            gPackageName = NULL;
        }
    }

    if(options[ENCODING].doesOccur) {
        encoding = options[ENCODING].value;
    }

    if(options[ICUDATADIR].doesOccur) {
        u_setDataDirectory(options[ICUDATADIR].value);
    }
    /* Initialize ICU */
    u_init(&status);
    if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
        /* Note: u_init() will try to open ICU property data.
         *       failures here are expected when building ICU from scratch.
         *       ignore them.
        */
        fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
            argv[0], u_errorName(status));
        exit(1);
    }
    status = U_ZERO_ERROR;
    if(options[WRITE_JAVA].doesOccur) {
        write_java = TRUE;
        outputEnc = options[WRITE_JAVA].value;
    }

    if(options[BUNDLE_NAME].doesOccur) {
        bundleName = options[BUNDLE_NAME].value;
    }

    if(options[WRITE_XLIFF].doesOccur) {
        write_xliff = TRUE;
        if(options[WRITE_XLIFF].value != NULL){
            xliffOutputFileName = options[WRITE_XLIFF].value;
        }
    }

    if(options[NO_BINARY_COLLATION].doesOccur) {
      initParser(FALSE);
    } else {
      initParser(TRUE);
    }

    /*added by Jing*/
    if(options[LANGUAGE].doesOccur) {
        language = options[LANGUAGE].value;
    }

    /* generate the binary files */
    for(i = 1; i < argc; ++i) {
        status = U_ZERO_ERROR;
        arg    = getLongPathname(argv[i]);

        if (inputDir) {
            uprv_strcpy(theCurrentFileName, inputDir);
            uprv_strcat(theCurrentFileName, U_FILE_SEP_STRING);
        } else {
            *theCurrentFileName = 0;
        }
        uprv_strcat(theCurrentFileName, arg);

        if (isVerbose()) {
            printf("Processing file \"%s\"\n", theCurrentFileName);
        }
        processFile(arg, encoding, inputDir, outputDir, gPackageName, &status);
    }

    /* Dont return warnings as a failure */
    if (! U_FAILURE(status)) {
        return 0;
    }

    return status;
}
//----------------------------------------------------------------------------
//
//  main      for genctd
//
//----------------------------------------------------------------------------
int  main(int argc, char **argv) {
    UErrorCode  status = U_ZERO_ERROR;
    const char *wordFileName;
    const char *outFileName;
    const char *outDir = NULL;
    const char *copyright = NULL;

    //
    // Pick up and check the command line arguments,
    //    using the standard ICU tool utils option handling.
    //
    U_MAIN_INIT_ARGS(argc, argv);
    progName = argv[0];
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
    if(argc<0) {
        // Unrecognized option
        fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]);
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }

    if(options[0].doesOccur || options[1].doesOccur) {
        //  -? or -h for help.
        usageAndDie(0);
    }

    if (!options[3].doesOccur || argc < 2) {
        fprintf(stderr, "input and output file must both be specified.\n");
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }
    outFileName  = options[3].value;
    wordFileName = argv[1];

    if (options[4].doesOccur) {
        u_setDataDirectory(options[4].value);
    }

    status = U_ZERO_ERROR;

    /* Combine the directory with the file name */
    if(options[5].doesOccur) {
        outDir = options[5].value;
    }
    if (options[6].doesOccur) {
        copyright = U_COPYRIGHT_STRING;
    }

#if UCONFIG_NO_BREAK_ITERATION || UCONFIG_NO_FILE_IO

    UNewDataMemory *pData;
    char msg[1024];

    /* write message with just the name */
    sprintf(msg, "genctd writes dummy %s because of UCONFIG_NO_BREAK_ITERATION and/or UCONFIG_NO_FILE_IO, see uconfig.h", outFileName);
    fprintf(stderr, "%s\n", msg);

    /* write the dummy data file */
    pData = udata_create(outDir, NULL, outFileName, &dummyDataInfo, NULL, &status);
    udata_writeBlock(pData, msg, strlen(msg));
    udata_finish(pData, &status);
    return (int)status;

#else
    /* Initialize ICU */
    u_init(&status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
            argv[0], u_errorName(status));
        exit(1);
    }
    status = U_ZERO_ERROR;

    //
    //  Read in the dictionary source file
    //
    long        result;
    long        wordFileSize;
    FILE        *file;
    char        *wordBufferC;
    MutableTrieDictionary *mtd = NULL;
    
    file = fopen(wordFileName, "rb");
    if( file == 0 ) { //cannot find file
        //create 1-line dummy file: ie 1 char, 1 value
        UNewDataMemory *pData;
        char msg[1024];

        /* write message with just the name */
        sprintf(msg, "%s not found, genctd writes dummy %s", wordFileName, outFileName);
        fprintf(stderr, "%s\n", msg);

        UChar c = 0x0020;
        mtd = new MutableTrieDictionary(c, status, TRUE);
        mtd->addWord(&c, 1, status, 1);

    } else { //read words in from input file
        fseek(file, 0, SEEK_END);
        wordFileSize = ftell(file);
        fseek(file, 0, SEEK_SET);
        wordBufferC = new char[wordFileSize+10];
    
        result = (long)fread(wordBufferC, 1, wordFileSize, file);
        if (result != wordFileSize)  {
            fprintf(stderr, "Error reading file \"%s\"\n", wordFileName);
            exit (-1);
        }
        wordBufferC[wordFileSize]=0;
        fclose(file);
    
        //
        // Look for a Unicode Signature (BOM) on the word file
        //
        int32_t        signatureLength;
        const char *   wordSourceC = wordBufferC;
        const char*    encoding = ucnv_detectUnicodeSignature(
                               wordSourceC, wordFileSize, &signatureLength, &status);
        if (U_FAILURE(status)) {
            exit(status);
        }
        if(encoding!=NULL ){
            wordSourceC  += signatureLength;
            wordFileSize -= signatureLength;
        }
    
        //
        // Open a converter to take the rule file to UTF-16
        //
        UConverter* conv;
        conv = ucnv_open(encoding, &status);
        if (U_FAILURE(status)) {
            fprintf(stderr, "ucnv_open: ICU Error \"%s\"\n", u_errorName(status));
            exit(status);
        }
    
        //
        // Convert the words to UChar.
        //  Preflight first to determine required buffer size.
        //
        uint32_t destCap = ucnv_toUChars(conv,
                           NULL,           //  dest,
                           0,              //  destCapacity,
                           wordSourceC,
                           wordFileSize,
                           &status);
        if (status != U_BUFFER_OVERFLOW_ERROR) {
            fprintf(stderr, "ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
            exit(status);
        };
    
        status = U_ZERO_ERROR;
        UChar *wordSourceU = new UChar[destCap+1];
        ucnv_toUChars(conv,
                      wordSourceU,     //  dest,
                      destCap+1,
                      wordSourceC,
                      wordFileSize,
                      &status);
        if (U_FAILURE(status)) {
            fprintf(stderr, "ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
            exit(status);
        };
        ucnv_close(conv);
    
        // Get rid of the original file buffer
        delete[] wordBufferC;
    
        // Create a MutableTrieDictionary, and loop through all the lines, inserting
        // words.
    
        // First, pick a median character.
        UChar *current = wordSourceU + (destCap/2);
        UChar uc = *current++;
        UnicodeSet breaks;
        breaks.add(0x000A);     // Line Feed
        breaks.add(0x000D);     // Carriage Return
        breaks.add(0x2028);     // Line Separator
        breaks.add(0x2029);     // Paragraph Separator
    
        do { 
            // Look for line break
            while (uc && !breaks.contains(uc)) {
                uc = *current++;
            }
            // Now skip to first non-line-break
            while (uc && breaks.contains(uc)) {
                uc = *current++;
            }
        }
        while (uc && (breaks.contains(uc) || u_isspace(uc)));
    
        mtd = new MutableTrieDictionary(uc, status);
        
        if (U_FAILURE(status)) {
            fprintf(stderr, "new MutableTrieDictionary: ICU Error \"%s\"\n", u_errorName(status));
            exit(status);
        }
        
        // Now add the words. Words are non-space characters at the beginning of
        // lines, and must be at least one UChar. If a word has an associated value,
        // the value should follow the word on the same line after a tab character.
        current = wordSourceU;
        UChar *candidate = current;
        uc = *current++;
        int32_t length = 0;
        int count = 0;
                
        while (uc) {
            while (uc && !u_isspace(uc)) {
                ++length;
                uc = *current++;
            }
            
            UnicodeString valueString;
            UChar candidateValue;
            if(uc == 0x0009){ //separator is a tab char, read in number after space
            	while (uc && u_isspace(uc)) {
            		uc = *current++;
            	}
                while (uc && !u_isspace(uc)) {
                    valueString.append(uc);
                    uc = *current++;
                }
            }
            
            if (length > 0) {
                count++;
                if(valueString.length() > 0){
                    mtd->setValued(TRUE);
    
                    uint32_t value = 0;
                    char* s = new char[valueString.length()];
                    valueString.extract(0,valueString.length(), s, valueString.length());
                    int n = sscanf(s, "%ud", &value);
                    U_ASSERT(n == 1);
                    U_ASSERT(value >= 0); 
                    mtd->addWord(candidate, length, status, (uint16_t)value);
                    delete[] s;
                } else {
                    mtd->addWord(candidate, length, status);
                }
    
                if (U_FAILURE(status)) {
                    fprintf(stderr, "MutableTrieDictionary::addWord: ICU Error \"%s\" at line %d in input file\n",
                            u_errorName(status), count);
                    exit(status);
                }
            }
    
            // Find beginning of next line
            while (uc && !breaks.contains(uc)) {
                uc = *current++;
            }
            // Find next non-line-breaking character
            while (uc && breaks.contains(uc)) {
                uc = *current++;
            }
            candidate = current-1;
            length = 0;
        }
    
        // Get rid of the Unicode text buffer
        delete[] wordSourceU;
    }

    // Now, create a CompactTrieDictionary from the mutable dictionary
    CompactTrieDictionary *ctd = new CompactTrieDictionary(*mtd, status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "new CompactTrieDictionary: ICU Error \"%s\"\n", u_errorName(status));
        exit(status);
    }
    
    // Get rid of the MutableTrieDictionary
    delete mtd;

    //
    //  Get the binary data from the dictionary.
    //
    uint32_t        outDataSize = ctd->dataSize();
    const uint8_t  *outData = (const uint8_t *)ctd->data();

    //
    //  Create the output file
    //
    size_t bytesWritten;
    UNewDataMemory *pData;
    pData = udata_create(outDir, NULL, outFileName, &(dh.info), copyright, &status);
    if(U_FAILURE(status)) {
        fprintf(stderr, "genctd: Could not open output file \"%s\", \"%s\"\n", 
                         outFileName, u_errorName(status));
        exit(status);
    }


    //  Write the data itself.
    udata_writeBlock(pData, outData, outDataSize);
    // finish up 
    bytesWritten = udata_finish(pData, &status);
    if(U_FAILURE(status)) {
        fprintf(stderr, "genctd: error \"%s\" writing the output file\n", u_errorName(status));
        exit(status);
    }
    
    if (bytesWritten != outDataSize) {
        fprintf(stderr, "Error writing to output file \"%s\"\n", outFileName);
        exit(-1);
    }
    
    // Get rid of the CompactTrieDictionary
    delete ctd;

    u_cleanup();

    printf("genctd: tool completed successfully.\n");
    return 0;

#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
}
Esempio n. 7
0
int
main(int argc,
     char* argv[])
{
    UErrorCode  status    = U_ZERO_ERROR;
    const char *arg       = NULL;
    const char *outputDir = NULL; /* NULL = no output directory, use current */
    const char *inputDir  = NULL;
    const char *encoding  = "";
    int         i;

    U_MAIN_INIT_ARGS(argc, argv);

    argc = u_parseArgs(argc, argv, (int32_t)(sizeof(options)/sizeof(options[0])), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr, "%s: error in command line argument \"%s\"\n", argv[0], argv[-argc]);
    } else if(argc<2) {
        argc = -1;
    }
    if(options[WRITE_POOL_BUNDLE].doesOccur && options[USE_POOL_BUNDLE].doesOccur) {
        fprintf(stderr, "%s: cannot combine --writePoolBundle and --usePoolBundle\n", argv[0]);
        argc = -1;
    }
    if(options[FORMAT_VERSION].doesOccur) {
        const char *s = options[FORMAT_VERSION].value;
        if(uprv_strlen(s) != 1 || (s[0] != '1' && s[0] != '2')) {
            fprintf(stderr, "%s: unsupported --formatVersion %s\n", argv[0], s);
            argc = -1;
        } else if(s[0] == '1' &&
                  (options[WRITE_POOL_BUNDLE].doesOccur || options[USE_POOL_BUNDLE].doesOccur)
        ) {
            fprintf(stderr, "%s: cannot combine --formatVersion 1 with --writePoolBundle or --usePoolBundle\n", argv[0]);
            argc = -1;
        } else {
            setFormatVersion(s[0] - '0');
        }
    }

    if(options[VERSION].doesOccur) {
        fprintf(stderr,
                "%s version %s (ICU version %s).\n"
                "%s\n",
                argv[0], GENRB_VERSION, U_ICU_VERSION, U_COPYRIGHT_STRING);
        return U_ZERO_ERROR;
    }

    if(argc<0 || options[HELP1].doesOccur || options[HELP2].doesOccur) {
        /*
         * Broken into chunks because the C89 standard says the minimum
         * required supported string length is 509 bytes.
         */
        fprintf(stderr,
                "Usage: %s [OPTIONS] [FILES]\n"
                "\tReads the list of resource bundle source files and creates\n"
                "\tbinary version of reosurce bundles (.res files)\n",
                argv[0]);
        fprintf(stderr,
                "Options:\n"
                "\t-h or -? or --help       this usage text\n"
                "\t-q or --quiet            do not display warnings\n"
                "\t-v or --verbose          print extra information when processing files\n"
                "\t-V or --version          prints out version number and exits\n"
                "\t-c or --copyright        include copyright notice\n");
        fprintf(stderr,
                "\t-e or --encoding         encoding of source files\n"
                "\t-d of --destdir          destination directory, followed by the path, defaults to %s\n"
                "\t-s or --sourcedir        source directory for files followed by path, defaults to %s\n"
                "\t-i or --icudatadir       directory for locating any needed intermediate data files,\n"
                "\t                         followed by path, defaults to %s\n",
                u_getDataDirectory(), u_getDataDirectory(), u_getDataDirectory());
        fprintf(stderr,
                "\t-j or --write-java       write a Java ListResourceBundle for ICU4J, followed by optional encoding\n"
                "\t                         defaults to ASCII and \\uXXXX format.\n");
                /* This option is deprecated and should not be used ever.
                "\t-p or --package-name     For ICU4J: package name for writing the ListResourceBundle for ICU4J,\n"
                "\t                         defaults to com.ibm.icu.impl.data\n"); */
        fprintf(stderr,
                "\t-b or --bundle-name      bundle name for writing the ListResourceBundle for ICU4J,\n"
                "\t                         defaults to LocaleElements\n"
                "\t-x or --write-xliff      write an XLIFF file for the resource bundle. Followed by\n"
                "\t                         an optional output file name.\n"
                "\t-k or --strict           use pedantic parsing of syntax\n"
                /*added by Jing*/
                "\t-l or --language         for XLIFF: language code compliant with BCP 47.\n");
        fprintf(stderr,
                "\t-C or --noBinaryCollation  do not generate binary collation image;\n"
                "\t                           makes .res file smaller but collator instantiation much slower;\n"
                "\t                           maintains ability to get tailoring rules\n"
                "\t-R or --omitCollationRules do not include collation (tailoring) rules;\n"
                "\t                           makes .res file smaller and maintains collator instantiation speed\n"
                "\t                           but tailoring rules will not be available (they are rarely used)\n");
        fprintf(stderr,
                "\t      --formatVersion      write a .res file compatible with the requested formatVersion (single digit);\n"
                "\t                           for example, --formatVersion 1\n");
        fprintf(stderr,
                "\t      --writePoolBundle    write a pool.res file with all of the keys of all input bundles\n"
                "\t      --usePoolBundle [path-to-pool.res]  point to keys from the pool.res keys pool bundle if they are available there;\n"
                "\t                           makes .res files smaller but dependent on the pool bundle\n"
                "\t                           (--writePoolBundle and --usePoolBundle cannot be combined)\n");

        return argc < 0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    }

    if(options[VERBOSE].doesOccur) {
        setVerbose(TRUE);
    }

    if(options[QUIET].doesOccur) {
        setShowWarning(FALSE);
    }
    if(options[STRICT].doesOccur) {
        setStrict(TRUE);
    }
    if(options[COPYRIGHT].doesOccur){
        setIncludeCopyright(TRUE);
    }

    if(options[SOURCEDIR].doesOccur) {
        inputDir = options[SOURCEDIR].value;
    }

    if(options[DESTDIR].doesOccur) {
        outputDir = options[DESTDIR].value;
    }
    /* This option is deprecated and should never be used.
    if(options[PACKAGE_NAME].doesOccur) {
        gPackageName = options[PACKAGE_NAME].value;
        if(!strcmp(gPackageName, "ICUDATA"))
        {
            gPackageName = U_ICUDATA_NAME;
        }
        if(gPackageName[0] == 0)
        {
            gPackageName = NULL;
        }
    }*/

    if(options[ENCODING].doesOccur) {
        encoding = options[ENCODING].value;
    }

    if(options[ICUDATADIR].doesOccur) {
        u_setDataDirectory(options[ICUDATADIR].value);
    }
    /* Initialize ICU */
    u_init(&status);
    if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
        /* Note: u_init() will try to open ICU property data.
         *       failures here are expected when building ICU from scratch.
         *       ignore them.
        */
        fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
            argv[0], u_errorName(status));
        exit(1);
    }
    status = U_ZERO_ERROR;
    if(options[WRITE_JAVA].doesOccur) {
        write_java = TRUE;
        outputEnc = options[WRITE_JAVA].value;
    }

    if(options[BUNDLE_NAME].doesOccur) {
        bundleName = options[BUNDLE_NAME].value;
    }

    if(options[WRITE_XLIFF].doesOccur) {
        write_xliff = TRUE;
        if(options[WRITE_XLIFF].value != NULL){
            xliffOutputFileName = options[WRITE_XLIFF].value;
        }
    }

    initParser(options[NO_BINARY_COLLATION].doesOccur, options[NO_COLLATION_RULES].doesOccur);

    /*added by Jing*/
    if(options[LANGUAGE].doesOccur) {
        language = options[LANGUAGE].value;
    }

    if(options[WRITE_POOL_BUNDLE].doesOccur) {
        newPoolBundle = bundle_open(NULL, TRUE, &status);
        if(U_FAILURE(status)) {
            fprintf(stderr, "unable to create an empty bundle for the pool keys: %s\n", u_errorName(status));
            return status;
        } else {
            const char *poolResName = "pool.res";
            char *nameWithoutSuffix = uprv_malloc(uprv_strlen(poolResName) + 1);
            if (nameWithoutSuffix == NULL) {
                fprintf(stderr, "out of memory error\n");
                return U_MEMORY_ALLOCATION_ERROR;
            }
            uprv_strcpy(nameWithoutSuffix, poolResName);
            *uprv_strrchr(nameWithoutSuffix, '.') = 0;
            newPoolBundle->fLocale = nameWithoutSuffix;
        }
    }

    if(options[USE_POOL_BUNDLE].doesOccur) {
        const char *poolResName = "pool.res";
        FileStream *poolFile;
        int32_t poolFileSize;
        int32_t indexLength;
        /*
         * TODO: Consolidate inputDir/filename handling from main() and processFile()
         * into a common function, and use it here as well.
         * Try to create toolutil functions for dealing with dir/filenames and
         * loading ICU data files without udata_open().
         * Share code with icupkg?
         * Also, make_res_filename() seems to be unused. Review and remove.
         */
        if (options[USE_POOL_BUNDLE].value!=NULL) {
            uprv_strcpy(theCurrentFileName, options[USE_POOL_BUNDLE].value);
            uprv_strcat(theCurrentFileName, U_FILE_SEP_STRING);
        } else if (inputDir) {
            uprv_strcpy(theCurrentFileName, inputDir);
            uprv_strcat(theCurrentFileName, U_FILE_SEP_STRING);
        } else {
            *theCurrentFileName = 0;
        }
        uprv_strcat(theCurrentFileName, poolResName);
        poolFile = T_FileStream_open(theCurrentFileName, "rb");
        if (poolFile == NULL) {
            fprintf(stderr, "unable to open pool bundle file %s\n", theCurrentFileName);
            return 1;
        }
        poolFileSize = T_FileStream_size(poolFile);
        if (poolFileSize < 32) {
            fprintf(stderr, "the pool bundle file %s is too small\n", theCurrentFileName);
            return 1;
        }
        poolBundle.fBytes = (uint8_t *)uprv_malloc((poolFileSize + 15) & ~15);
        if (poolFileSize > 0 && poolBundle.fBytes == NULL) {
            fprintf(stderr, "unable to allocate memory for the pool bundle file %s\n", theCurrentFileName);
            return U_MEMORY_ALLOCATION_ERROR;
        } else {
            UDataSwapper *ds;
            const DataHeader *header;
            int32_t bytesRead = T_FileStream_read(poolFile, poolBundle.fBytes, poolFileSize);
            int32_t keysBottom;
            if (bytesRead != poolFileSize) {
                fprintf(stderr, "unable to read the pool bundle file %s\n", theCurrentFileName);
                return 1;
            }
            /*
             * Swap the pool bundle so that a single checked-in file can be used.
             * The swapper functions also test that the data looks like
             * a well-formed .res file.
             */
            ds = udata_openSwapperForInputData(poolBundle.fBytes, bytesRead,
                                               U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &status);
            if (U_FAILURE(status)) {
                fprintf(stderr, "udata_openSwapperForInputData(pool bundle %s) failed: %s\n",
                        theCurrentFileName, u_errorName(status));
                return status;
            }
            ures_swap(ds, poolBundle.fBytes, bytesRead, poolBundle.fBytes, &status);
            udata_closeSwapper(ds);
            if (U_FAILURE(status)) {
                fprintf(stderr, "ures_swap(pool bundle %s) failed: %s\n",
                        theCurrentFileName, u_errorName(status));
                return status;
            }
            header = (const DataHeader *)poolBundle.fBytes;
            if (header->info.formatVersion[0]!=2) {
                fprintf(stderr, "invalid format of pool bundle file %s\n", theCurrentFileName);
                return U_INVALID_FORMAT_ERROR;
            }
            poolBundle.fKeys = (const char *)header + header->dataHeader.headerSize;
            poolBundle.fIndexes = (const int32_t *)poolBundle.fKeys + 1;
            indexLength = poolBundle.fIndexes[URES_INDEX_LENGTH] & 0xff;
            if (indexLength <= URES_INDEX_POOL_CHECKSUM) {
                fprintf(stderr, "insufficient indexes[] in pool bundle file %s\n", theCurrentFileName);
                return U_INVALID_FORMAT_ERROR;
            }
            keysBottom = (1 + indexLength) * 4;
            poolBundle.fKeys += keysBottom;
            poolBundle.fKeysLength = (poolBundle.fIndexes[URES_INDEX_KEYS_TOP] * 4) - keysBottom;
            poolBundle.fChecksum = poolBundle.fIndexes[URES_INDEX_POOL_CHECKSUM];
        }
        for (i = 0; i < poolBundle.fKeysLength; ++i) {
            if (poolBundle.fKeys[i] == 0) {
                ++poolBundle.fKeysCount;
            }
        }
        T_FileStream_close(poolFile);
        setUsePoolBundle(TRUE);
    }

    if(options[INCLUDE_UNIHAN_COLL].doesOccur) {
        gIncludeUnihanColl = TRUE;
    }

    if((argc-1)!=1) {
        printf("genrb number of files: %d\n", argc - 1);
    }
    /* generate the binary files */
    for(i = 1; i < argc; ++i) {
        status = U_ZERO_ERROR;
        arg    = getLongPathname(argv[i]);

        if (inputDir) {
            uprv_strcpy(theCurrentFileName, inputDir);
            uprv_strcat(theCurrentFileName, U_FILE_SEP_STRING);
        } else {
            *theCurrentFileName = 0;
        }
        uprv_strcat(theCurrentFileName, arg);

        if (isVerbose()) {
            printf("Processing file \"%s\"\n", theCurrentFileName);
        }
        processFile(arg, encoding, inputDir, outputDir, gPackageName, &status);
    }

    uprv_free(poolBundle.fBytes);

    if(options[WRITE_POOL_BUNDLE].doesOccur) {
        char outputFileName[256];
        bundle_write(newPoolBundle, outputDir, NULL, outputFileName, sizeof(outputFileName), &status);
        bundle_close(newPoolBundle, &status);
        if(U_FAILURE(status)) {
            fprintf(stderr, "unable to write the pool bundle: %s\n", u_errorName(status));
        }
    }

    /* Dont return warnings as a failure */
    if (U_SUCCESS(status)) {
        return 0;
    }

    return status;
}
Esempio n. 8
0
int
main(int argc,
     char* argv[])
{
    UErrorCode  status    = U_ZERO_ERROR;
    const char *arg       = NULL;
    const char *outputDir = NULL; /* NULL = no output directory, use current */
    const char *inputDir  = NULL;
    const char *encoding  = "";
    int         i;
    UBool illegalArg = FALSE;

    U_MAIN_INIT_ARGS(argc, argv);

    options[JAVA_PACKAGE].value = "com.ibm.icu.impl.data";
    options[BUNDLE_NAME].value = "LocaleElements";
    argc = u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr, "%s: error in command line argument \"%s\"\n", argv[0], argv[-argc]);
        illegalArg = TRUE;
    } else if(argc<2) {
        illegalArg = TRUE;
    }
    if(options[WRITE_POOL_BUNDLE].doesOccur && options[USE_POOL_BUNDLE].doesOccur) {
        fprintf(stderr, "%s: cannot combine --writePoolBundle and --usePoolBundle\n", argv[0]);
        illegalArg = TRUE;
    }
    if(options[FORMAT_VERSION].doesOccur) {
        const char *s = options[FORMAT_VERSION].value;
        if(uprv_strlen(s) != 1 || (s[0] < '1' && '3' < s[0])) {
            fprintf(stderr, "%s: unsupported --formatVersion %s\n", argv[0], s);
            illegalArg = TRUE;
        } else if(s[0] == '1' &&
                  (options[WRITE_POOL_BUNDLE].doesOccur || options[USE_POOL_BUNDLE].doesOccur)
        ) {
            fprintf(stderr, "%s: cannot combine --formatVersion 1 with --writePoolBundle or --usePoolBundle\n", argv[0]);
            illegalArg = TRUE;
        } else {
            setFormatVersion(s[0] - '0');
        }
    }

    if((options[JAVA_PACKAGE].doesOccur || options[BUNDLE_NAME].doesOccur) &&
            !options[WRITE_JAVA].doesOccur) {
        fprintf(stderr,
                "%s error: command line argument --java-package or --bundle-name "
                "without --write-java\n",
                argv[0]);
        illegalArg = TRUE;
    }

    if(options[VERSION].doesOccur) {
        fprintf(stderr,
                "%s version %s (ICU version %s).\n"
                "%s\n",
                argv[0], GENRB_VERSION, U_ICU_VERSION, U_COPYRIGHT_STRING);
        if(!illegalArg) {
            return U_ZERO_ERROR;
        }
    }

    if(illegalArg || options[HELP1].doesOccur || options[HELP2].doesOccur) {
        /*
         * Broken into chunks because the C89 standard says the minimum
         * required supported string length is 509 bytes.
         */
        fprintf(stderr,
                "Usage: %s [OPTIONS] [FILES]\n"
                "\tReads the list of resource bundle source files and creates\n"
                "\tbinary version of resource bundles (.res files)\n",
                argv[0]);
        fprintf(stderr,
                "Options:\n"
                "\t-h or -? or --help       this usage text\n"
                "\t-q or --quiet            do not display warnings\n"
                "\t-v or --verbose          print extra information when processing files\n"
                "\t-V or --version          prints out version number and exits\n"
                "\t-c or --copyright        include copyright notice\n");
        fprintf(stderr,
                "\t-e or --encoding         encoding of source files\n"
                "\t-d of --destdir          destination directory, followed by the path, defaults to %s\n"
                "\t-s or --sourcedir        source directory for files followed by path, defaults to %s\n"
                "\t-i or --icudatadir       directory for locating any needed intermediate data files,\n"
                "\t                         followed by path, defaults to %s\n",
                u_getDataDirectory(), u_getDataDirectory(), u_getDataDirectory());
        fprintf(stderr,
                "\t-j or --write-java       write a Java ListResourceBundle for ICU4J, followed by optional encoding\n"
                "\t                         defaults to ASCII and \\uXXXX format.\n"
                "\t      --java-package     For --write-java: package name for writing the ListResourceBundle,\n"
                "\t                         defaults to com.ibm.icu.impl.data\n");
        fprintf(stderr,
                "\t-b or --bundle-name      For --write-java: root resource bundle name for writing the ListResourceBundle,\n"
                "\t                         defaults to LocaleElements\n"
                "\t-x or --write-xliff      write an XLIFF file for the resource bundle. Followed by\n"
                "\t                         an optional output file name.\n"
                "\t-k or --strict           use pedantic parsing of syntax\n"
                /*added by Jing*/
                "\t-l or --language         for XLIFF: language code compliant with BCP 47.\n");
        fprintf(stderr,
                "\t-C or --noBinaryCollation  do not generate binary collation image;\n"
                "\t                           makes .res file smaller but collator instantiation much slower;\n"
                "\t                           maintains ability to get tailoring rules\n"
                "\t-R or --omitCollationRules do not include collation (tailoring) rules;\n"
                "\t                           makes .res file smaller and maintains collator instantiation speed\n"
                "\t                           but tailoring rules will not be available (they are rarely used)\n");
        fprintf(stderr,
                "\t      --formatVersion      write a .res file compatible with the requested formatVersion (single digit);\n"
                "\t                           for example, --formatVersion 1\n");
        fprintf(stderr,
                "\t      --writePoolBundle    write a pool.res file with all of the keys of all input bundles\n"
                "\t      --usePoolBundle [path-to-pool.res]  point to keys from the pool.res keys pool bundle if they are available there;\n"
                "\t                           makes .res files smaller but dependent on the pool bundle\n"
                "\t                           (--writePoolBundle and --usePoolBundle cannot be combined)\n");

        return illegalArg ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    }

    if(options[VERBOSE].doesOccur) {
        setVerbose(TRUE);
    }

    if(options[QUIET].doesOccur) {
        setShowWarning(FALSE);
    }
    if(options[STRICT].doesOccur) {
        setStrict(TRUE);
    }
    if(options[COPYRIGHT].doesOccur){
        setIncludeCopyright(TRUE);
    }

    if(options[SOURCEDIR].doesOccur) {
        inputDir = options[SOURCEDIR].value;
    }

    if(options[DESTDIR].doesOccur) {
        outputDir = options[DESTDIR].value;
    }

    if(options[ENCODING].doesOccur) {
        encoding = options[ENCODING].value;
    }

    if(options[ICUDATADIR].doesOccur) {
        u_setDataDirectory(options[ICUDATADIR].value);
    }
    /* Initialize ICU */
    u_init(&status);
    if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
        /* Note: u_init() will try to open ICU property data.
         *       failures here are expected when building ICU from scratch.
         *       ignore them.
        */
        fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
            argv[0], u_errorName(status));
        exit(1);
    }
    status = U_ZERO_ERROR;
    if(options[WRITE_JAVA].doesOccur) {
        write_java = TRUE;
        outputEnc = options[WRITE_JAVA].value;
    }

    if(options[WRITE_XLIFF].doesOccur) {
        write_xliff = TRUE;
        if(options[WRITE_XLIFF].value != NULL){
            xliffOutputFileName = options[WRITE_XLIFF].value;
        }
    }

    initParser();

    /*added by Jing*/
    if(options[LANGUAGE].doesOccur) {
        language = options[LANGUAGE].value;
    }

    LocalPointer<SRBRoot> newPoolBundle;
    if(options[WRITE_POOL_BUNDLE].doesOccur) {
        newPoolBundle.adoptInsteadAndCheckErrorCode(new SRBRoot(NULL, TRUE, status), status);
        if(U_FAILURE(status)) {
            fprintf(stderr, "unable to create an empty bundle for the pool keys: %s\n", u_errorName(status));
            return status;
        } else {
            const char *poolResName = "pool.res";
            char *nameWithoutSuffix = static_cast<char *>(uprv_malloc(uprv_strlen(poolResName) + 1));
            if (nameWithoutSuffix == NULL) {
                fprintf(stderr, "out of memory error\n");
                return U_MEMORY_ALLOCATION_ERROR;
            }
            uprv_strcpy(nameWithoutSuffix, poolResName);
            *uprv_strrchr(nameWithoutSuffix, '.') = 0;
            newPoolBundle->fLocale = nameWithoutSuffix;
        }
    }

    if(options[USE_POOL_BUNDLE].doesOccur) {
        const char *poolResName = "pool.res";
        FileStream *poolFile;
        int32_t poolFileSize;
        int32_t indexLength;
        /*
         * TODO: Consolidate inputDir/filename handling from main() and processFile()
         * into a common function, and use it here as well.
         * Try to create toolutil functions for dealing with dir/filenames and
         * loading ICU data files without udata_open().
         * Share code with icupkg?
         * Also, make_res_filename() seems to be unused. Review and remove.
         */
        CharString poolFileName;
        if (options[USE_POOL_BUNDLE].value!=NULL) {
            poolFileName.append(options[USE_POOL_BUNDLE].value, status);
        } else if (inputDir) {
            poolFileName.append(inputDir, status);
        }
        poolFileName.appendPathPart(poolResName, status);
        if (U_FAILURE(status)) {
            return status;
        }
        poolFile = T_FileStream_open(poolFileName.data(), "rb");
        if (poolFile == NULL) {
            fprintf(stderr, "unable to open pool bundle file %s\n", poolFileName.data());
            return 1;
        }
        poolFileSize = T_FileStream_size(poolFile);
        if (poolFileSize < 32) {
            fprintf(stderr, "the pool bundle file %s is too small\n", poolFileName.data());
            return 1;
        }
        poolBundle.fBytes = new uint8_t[(poolFileSize + 15) & ~15];
        if (poolFileSize > 0 && poolBundle.fBytes == NULL) {
            fprintf(stderr, "unable to allocate memory for the pool bundle file %s\n", poolFileName.data());
            return U_MEMORY_ALLOCATION_ERROR;
        }

        UDataSwapper *ds;
        const DataHeader *header;
        int32_t bytesRead = T_FileStream_read(poolFile, poolBundle.fBytes, poolFileSize);
        if (bytesRead != poolFileSize) {
            fprintf(stderr, "unable to read the pool bundle file %s\n", poolFileName.data());
            return 1;
        }
        /*
         * Swap the pool bundle so that a single checked-in file can be used.
         * The swapper functions also test that the data looks like
         * a well-formed .res file.
         */
        ds = udata_openSwapperForInputData(poolBundle.fBytes, bytesRead,
                                           U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &status);
        if (U_FAILURE(status)) {
            fprintf(stderr, "udata_openSwapperForInputData(pool bundle %s) failed: %s\n",
                    poolFileName.data(), u_errorName(status));
            return status;
        }
        ures_swap(ds, poolBundle.fBytes, bytesRead, poolBundle.fBytes, &status);
        udata_closeSwapper(ds);
        if (U_FAILURE(status)) {
            fprintf(stderr, "ures_swap(pool bundle %s) failed: %s\n",
                    poolFileName.data(), u_errorName(status));
            return status;
        }
        header = (const DataHeader *)poolBundle.fBytes;
        if (header->info.formatVersion[0] < 2) {
            fprintf(stderr, "invalid format of pool bundle file %s\n", poolFileName.data());
            return U_INVALID_FORMAT_ERROR;
        }
        const int32_t *pRoot = (const int32_t *)(
                (const char *)header + header->dataHeader.headerSize);
        poolBundle.fIndexes = pRoot + 1;
        indexLength = poolBundle.fIndexes[URES_INDEX_LENGTH] & 0xff;
        if (indexLength <= URES_INDEX_POOL_CHECKSUM) {
            fprintf(stderr, "insufficient indexes[] in pool bundle file %s\n", poolFileName.data());
            return U_INVALID_FORMAT_ERROR;
        }
        int32_t keysBottom = 1 + indexLength;
        int32_t keysTop = poolBundle.fIndexes[URES_INDEX_KEYS_TOP];
        poolBundle.fKeys = (const char *)(pRoot + keysBottom);
        poolBundle.fKeysLength = (keysTop - keysBottom) * 4;
        poolBundle.fChecksum = poolBundle.fIndexes[URES_INDEX_POOL_CHECKSUM];

        for (i = 0; i < poolBundle.fKeysLength; ++i) {
            if (poolBundle.fKeys[i] == 0) {
                ++poolBundle.fKeysCount;
            }
        }

        // 16BitUnits[] begins with strings-v2.
        // The strings-v2 may optionally be terminated by what looks like
        // an explicit string length that exceeds the number of remaining 16-bit units.
        int32_t stringUnitsLength = (poolBundle.fIndexes[URES_INDEX_16BIT_TOP] - keysTop) * 2;
        if (stringUnitsLength >= 2 && getFormatVersion() >= 3) {
            poolBundle.fStrings = new PseudoListResource(NULL, status);
            if (poolBundle.fStrings == NULL) {
                fprintf(stderr, "unable to allocate memory for the pool bundle strings %s\n",
                        poolFileName.data());
                return U_MEMORY_ALLOCATION_ERROR;
            }
            // The PseudoListResource constructor call did not allocate further memory.
            assert(U_SUCCESS(status));
            const UChar *p = (const UChar *)(pRoot + keysTop);
            int32_t remaining = stringUnitsLength;
            do {
                int32_t first = *p;
                int8_t numCharsForLength;
                int32_t length;
                if (!U16_IS_TRAIL(first)) {
                    // NUL-terminated
                    numCharsForLength = 0;
                    for (length = 0;
                         length < remaining && p[length] != 0;
                         ++length) {}
                } else if (first < 0xdfef) {
                    numCharsForLength = 1;
                    length = first & 0x3ff;
                } else if (first < 0xdfff && remaining >= 2) {
                    numCharsForLength = 2;
                    length = ((first - 0xdfef) << 16) | p[1];
                } else if (first == 0xdfff && remaining >= 3) {
                    numCharsForLength = 3;
                    length = ((int32_t)p[1] << 16) | p[2];
                } else {
                    break;  // overrun
                }
                // Check for overrun before changing remaining,
                // so that it is always accurate after the loop body.
                if ((numCharsForLength + length) >= remaining ||
                        p[numCharsForLength + length] != 0) {
                    break;  // overrun or explicitly terminated
                }
                int32_t poolStringIndex = stringUnitsLength - remaining;
                // Maximum pool string index when suffix-sharing the last character.
                int32_t maxStringIndex = poolStringIndex + numCharsForLength + length - 1;
                if (maxStringIndex >= RES_MAX_OFFSET) {
                    // pool string index overrun
                    break;
                }
                p += numCharsForLength;
                remaining -= numCharsForLength;
                if (length != 0) {
                    StringResource *sr =
                            new StringResource(poolStringIndex, numCharsForLength,
                                               p, length, status);
                    if (sr == NULL) {
                        fprintf(stderr, "unable to allocate memory for a pool bundle string %s\n",
                                poolFileName.data());
                        return U_MEMORY_ALLOCATION_ERROR;
                    }
                    poolBundle.fStrings->add(sr);
                    poolBundle.fStringIndexLimit = maxStringIndex + 1;
                    // The StringResource constructor did not allocate further memory.
                    assert(U_SUCCESS(status));
                }
                p += length + 1;
                remaining -= length + 1;
            } while (remaining > 0);
            if (poolBundle.fStrings->fCount == 0) {
                delete poolBundle.fStrings;
                poolBundle.fStrings = NULL;
            }
        }

        T_FileStream_close(poolFile);
        setUsePoolBundle(TRUE);
        if (isVerbose() && poolBundle.fStrings != NULL) {
            printf("number of shared strings: %d\n", (int)poolBundle.fStrings->fCount);
            int32_t length = poolBundle.fStringIndexLimit + 1;  // incl. last NUL
            printf("16-bit units for strings: %6d = %6d bytes\n",
                   (int)length, (int)length * 2);
        }
    }

    if(!options[FORMAT_VERSION].doesOccur && getFormatVersion() == 3 &&
            poolBundle.fStrings == NULL &&
            !options[WRITE_POOL_BUNDLE].doesOccur) {
        // If we just default to formatVersion 3
        // but there are no pool bundle strings to share
        // and we do not write a pool bundle,
        // then write formatVersion 2 which is just as good.
        setFormatVersion(2);
    }

    if(options[INCLUDE_UNIHAN_COLL].doesOccur) {
        puts("genrb option --includeUnihanColl ignored: \n"
                "CLDR 26/ICU 54 unihan data is small, except\n"
                "the ucadata-unihan.icu version of the collation root data\n"
                "is about 300kB larger than the ucadata-implicithan.icu version.");
    }

    if((argc-1)!=1) {
        printf("genrb number of files: %d\n", argc - 1);
    }
    /* generate the binary files */
    for(i = 1; i < argc; ++i) {
        status = U_ZERO_ERROR;
        arg    = getLongPathname(argv[i]);

        CharString theCurrentFileName;
        if (inputDir) {
            theCurrentFileName.append(inputDir, status);
        }
        theCurrentFileName.appendPathPart(arg, status);
        if (U_FAILURE(status)) {
            break;
        }

        gCurrentFileName = theCurrentFileName.data();
        if (isVerbose()) {
            printf("Processing file \"%s\"\n", theCurrentFileName.data());
        }
        processFile(arg, encoding, inputDir, outputDir, NULL,
                    newPoolBundle.getAlias(),
                    options[NO_BINARY_COLLATION].doesOccur, status);
    }

    poolBundle.close();

    if(U_SUCCESS(status) && options[WRITE_POOL_BUNDLE].doesOccur) {
        char outputFileName[256];
        newPoolBundle->write(outputDir, NULL, outputFileName, sizeof(outputFileName), status);
        if(U_FAILURE(status)) {
            fprintf(stderr, "unable to write the pool bundle: %s\n", u_errorName(status));
        }
    }

    u_cleanup();

    /* Dont return warnings as a failure */
    if (U_SUCCESS(status)) {
        return 0;
    }

    return status;
}
Esempio n. 9
0
extern int
main(int argc, char *argv[]) {
    FILE *in, *out;
    const char *pname;
    char *data;
    int32_t length;
    UBool ishelp;
    int rc;

    UDataSwapper *ds;
    const UDataInfo *pInfo;
    UErrorCode errorCode;
    uint8_t outCharset;
    UBool outIsBigEndian;

    U_MAIN_INIT_ARGS(argc, argv);

    fprintf(stderr, "Warning: icuswap is an obsolete tool and it will be removed in the next ICU release.\nPlease use the icupkg tool instead.\n");

    /* get the program basename */
    pname=strrchr(argv[0], U_FILE_SEP_CHAR);
    if(pname==NULL) {
        pname=strrchr(argv[0], '/');
    }
    if(pname!=NULL) {
        ++pname;
    } else {
        pname=argv[0];
    }

    argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
    ishelp=options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur;
    if(ishelp || argc!=3) {
        return printUsage(pname, ishelp);
    }

    /* parse the output type option */
    data=(char *)options[OPT_OUT_TYPE].value;
    if(data[0]==0 || data[1]!=0) {
        /* the type must be exactly one letter */
        return printUsage(pname, FALSE);
    }
    switch(data[0]) {
    case 'l':
        outIsBigEndian=FALSE;
        outCharset=U_ASCII_FAMILY;
        break;
    case 'b':
        outIsBigEndian=TRUE;
        outCharset=U_ASCII_FAMILY;
        break;
    case 'e':
        outIsBigEndian=TRUE;
        outCharset=U_EBCDIC_FAMILY;
        break;
    default:
        return printUsage(pname, FALSE);
    }

    in=out=NULL;
    data=NULL;

    /* open the input file, get its length, allocate memory for it, read the file */
    in=fopen(argv[1], "rb");
    if(in==NULL) {
        fprintf(stderr, "%s: unable to open input file \"%s\"\n", pname, argv[1]);
        rc=2;
        goto done;
    }

    length=fileSize(in);
    if(length<DEFAULT_PADDING_LENGTH) {
        fprintf(stderr, "%s: empty input file \"%s\"\n", pname, argv[1]);
        rc=2;
        goto done;
    }

    /*
     * +15: udata_swapPackage() may need to add a few padding bytes to the
     * last item if charset swapping is done,
     * because the last item may be resorted into the middle and then needs
     * additional padding bytes
     */
    data=(char *)malloc(length+DEFAULT_PADDING_LENGTH);
    if(data==NULL) {
        fprintf(stderr, "%s: error allocating memory for \"%s\"\n", pname, argv[1]);
        rc=2;
        goto done;
    }

    /* set the last 15 bytes to the usual padding byte, see udata_swapPackage() */
    uprv_memset(data+length-DEFAULT_PADDING_LENGTH, 0xaa, DEFAULT_PADDING_LENGTH);

    if(length!=(int32_t)fread(data, 1, length, in)) {
        fprintf(stderr, "%s: error reading \"%s\"\n", pname, argv[1]);
        rc=3;
        goto done;
    }

    fclose(in);
    in=NULL;

    /* swap the data in-place */
    errorCode=U_ZERO_ERROR;
    ds=udata_openSwapperForInputData(data, length, outIsBigEndian, outCharset, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "%s: udata_openSwapperForInputData(\"%s\") failed - %s\n",
                pname, argv[1], u_errorName(errorCode));
        rc=4;
        goto done;
    }

    ds->printError=printError;
    ds->printErrorContext=stderr;

    /* speculative cast, protected by the following length check */
    pInfo=(const UDataInfo *)((const char *)data+4);

    if( length>=20 &&
        pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CmnD" */
        pInfo->dataFormat[1]==0x6d &&
        pInfo->dataFormat[2]==0x6e &&
        pInfo->dataFormat[3]==0x44
    ) {
        /*
         * swap the .dat package
         * udata_swapPackage() needs to rename ToC name entries from the old package
         * name to the new one.
         * We pass it the filenames, and udata_swapPackage() will extract the
         * package names.
         */
        length=udata_swapPackage(argv[1], argv[2], ds, data, length, data, &errorCode);
        udata_closeSwapper(ds);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "%s: udata_swapPackage(\"%s\") failed - %s\n",
                    pname, argv[1], u_errorName(errorCode));
            rc=4;
            goto done;
        }
    } else {
        /* swap the data, which is not a .dat package */
        length=udata_swap(ds, data, length, data, &errorCode);
        udata_closeSwapper(ds);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "%s: udata_swap(\"%s\") failed - %s\n",
                    pname, argv[1], u_errorName(errorCode));
            rc=4;
            goto done;
        }
    }

    out=fopen(argv[2], "wb");
    if(out==NULL) {
        fprintf(stderr, "%s: unable to open output file \"%s\"\n", pname, argv[2]);
        rc=5;
        goto done;
    }

    if(length!=(int32_t)fwrite(data, 1, length, out)) {
        fprintf(stderr, "%s: error writing \"%s\"\n", pname, argv[2]);
        rc=6;
        goto done;
    }

    fclose(out);
    out=NULL;

    /* all done */
    rc=0;

done:
    if(in!=NULL) {
        fclose(in);
    }
    if(out!=NULL) {
        fclose(out);
    }
    if(data!=NULL) {
        free(data);
    }
    return rc;
}
Esempio n. 10
0
int
main(int argc, char* argv[]) {
    int result = 0;
    /* FileStream  *out; */
    UPKGOptions  o;
    CharList    *tail;
    UBool        needsHelp = FALSE;
    UErrorCode   status = U_ZERO_ERROR;
    /* char         tmp[1024]; */
    uint32_t i;
    int32_t n;

    U_MAIN_INIT_ARGS(argc, argv);

    progname = argv[0];

    options[MODE].value = "common";

    /* read command line options */
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);

    /* error handling, printing usage message */
    /* I've decided to simply print an error and quit. This tool has too
    many options to just display them all of the time. */

    if(options[HELP].doesOccur || options[HELP_QUESTION_MARK].doesOccur) {
        needsHelp = TRUE;
    }
    else {
        if(!needsHelp && argc<0) {
            fprintf(stderr,
                "%s: error in command line argument \"%s\"\n",
                progname,
                argv[-argc]);
            fprintf(stderr, "Run '%s --help' for help.\n", progname);
            return 1;
        }


#ifndef WINDOWS_WITH_MSVC
        if(!options[BLDOPT].doesOccur) {
            if (pkg_getOptionsFromICUConfig(&options[BLDOPT]) != 0) {
                fprintf(stderr, " required parameter is missing: -O is required \n");
                fprintf(stderr, "Run '%s --help' for help.\n", progname);
                return 1;
            }
        }
#else
        if(options[BLDOPT].doesOccur) {
            fprintf(stdout, "Warning: You are using the -O option which is not needed for MSVC build on Windows.\n");
        }
#endif

        if(!options[NAME].doesOccur) /* -O we already have - don't report it. */
        {
            fprintf(stderr, " required parameter -p is missing \n");
            fprintf(stderr, "Run '%s --help' for help.\n", progname);
            return 1;
        }

        if(argc == 1) {
            fprintf(stderr,
                "No input files specified.\n"
                "Run '%s --help' for help.\n", progname);
            return 1;
        }
    }   /* end !needsHelp */

    if(argc<0 || needsHelp  ) {
        fprintf(stderr,
            "usage: %s [-options] [-] [packageFile] \n"
            "\tProduce packaged ICU data from the given list(s) of files.\n"
            "\t'-' by itself means to read from stdin.\n"
            "\tpackageFile is a text file containing the list of files to package.\n",
            progname);

        fprintf(stderr, "\n options:\n");
        for(i=0;i<(sizeof(options)/sizeof(options[0]));i++) {
            fprintf(stderr, "%-5s -%c %s%-10s  %s\n",
                (i<1?"[REQ]":""),
                options[i].shortName,
                options[i].longName ? "or --" : "     ",
                options[i].longName ? options[i].longName : "",
                options_help[i]);
        }

        fprintf(stderr, "modes: (-m option)\n");
        for(i=0;i<(sizeof(modes)/sizeof(modes[0]));i++) {
            fprintf(stderr, "   %-9s ", modes[i].name);
            if (modes[i].alt_name) {
                fprintf(stderr, "/ %-9s", modes[i].alt_name);
            } else {
                fprintf(stderr, "           ");
            }
            fprintf(stderr, "  %s\n", modes[i].desc);
        }
        return 1;
    }

    /* OK, fill in the options struct */
    uprv_memset(&o, 0, sizeof(o));

    o.mode      = options[MODE].value;
    o.version   = options[REVISION].doesOccur ? options[REVISION].value : 0;

    o.shortName = options[NAME].value;
    {
        int32_t len = (int32_t)uprv_strlen(o.shortName);
        char *csname, *cp;
        const char *sp;

        cp = csname = (char *) uprv_malloc((len + 1 + 1) * sizeof(*o.cShortName));
        if (*(sp = o.shortName)) {
            *cp++ = isalpha(*sp) ? * sp : '_';
            for (++sp; *sp; ++sp) {
                *cp++ = isalnum(*sp) ? *sp : '_';
            }
        }
        *cp = 0;

        o.cShortName = csname;
    }

    if(options[LIBNAME].doesOccur) { /* get libname from shortname, or explicit -L parameter */
      o.libName = options[LIBNAME].value;
    } else {
      o.libName = o.shortName;
    }

    if(options[QUIET].doesOccur) {
      o.quiet = TRUE;
    } else {
      o.quiet = FALSE;
    }

    o.verbose   = options[VERBOSE].doesOccur;

#ifndef WINDOWS_WITH_MSVC /* on UNIX, we'll just include the file... */
    o.options   = options[BLDOPT].value;
#endif
    if(options[COPYRIGHT].doesOccur) {
        o.comment = U_COPYRIGHT_STRING;
    } else if (options[COMMENT].doesOccur) {
        o.comment = options[COMMENT].value;
    }

    if( options[DESTDIR].doesOccur ) {
        o.targetDir = options[DESTDIR].value;
    } else {
        o.targetDir = ".";  /* cwd */
    }

    o.rebuild   = options[REBUILD].doesOccur;

    if( options[TEMPDIR].doesOccur ) {
        o.tmpDir    = options[TEMPDIR].value;
    } else {
        o.tmpDir    = o.targetDir;
    }

    if( options[INSTALL].doesOccur ) {
        o.install  = options[INSTALL].value;
    } else {
        o.install = NULL;
    }

    if( options[SOURCEDIR].doesOccur ) {
        o.srcDir   = options[SOURCEDIR].value;
    } else {
        o.srcDir   = ".";
    }

    if( options[ENTRYPOINT].doesOccur ) {
        o.entryName = options[ENTRYPOINT].value;
    } else {
        o.entryName = o.cShortName;
    }

    /* OK options are set up. Now the file lists. */
    tail = NULL;
    for( n=1; n<argc; n++) {
        o.fileListFiles = pkg_appendToList(o.fileListFiles, &tail, uprv_strdup(argv[n]));
    }

    /* load the files */
    loadLists(&o, &status);
    if( U_FAILURE(status) ) {
        fprintf(stderr, "error loading input file lists: %s\n", u_errorName(status));
        return 2;
    }

    result = pkg_executeOptions(&o);

    if (pkgDataFlags != NULL) {
        for (n = 0; n < PKGDATA_FLAGS_SIZE; n++) {
            if (pkgDataFlags[n] != NULL) {
                uprv_free(pkgDataFlags[n]);
            }
        }
        uprv_free(pkgDataFlags);
    }

    if (o.cShortName != NULL) {
        uprv_free((char *)o.cShortName);
    }
    if (o.fileListFiles != NULL) {
        pkg_deleteList(o.fileListFiles);
    }
    if (o.filePaths != NULL) {
        pkg_deleteList(o.filePaths);
    }
    if (o.files != NULL) {
        pkg_deleteList(o.files);
    }

    return result;
}
Esempio n. 11
0
//----------------------------------------------------------------------------
//
//  main      for genbrk
//
//----------------------------------------------------------------------------
int  main(int argc, char **argv) {
    UErrorCode  status = U_ZERO_ERROR;
    const char *ruleFileName;
    const char *outFileName;
    const char *outDir = NULL;
    const char *copyright = NULL;

    //
    // Pick up and check the command line arguments,
    //    using the standard ICU tool utils option handling.
    //
    U_MAIN_INIT_ARGS(argc, argv);
    progName = argv[0];
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
    if(argc<0) {
        // Unrecognized option
        fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]);
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }

    if(options[0].doesOccur || options[1].doesOccur) {
        //  -? or -h for help.
        usageAndDie(0);
    }

    if (!(options[3].doesOccur && options[4].doesOccur)) {
        fprintf(stderr, "rule file and output file must both be specified.\n");
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }
    ruleFileName = options[3].value;
    outFileName  = options[4].value;

    if (options[5].doesOccur) {
        u_setDataDirectory(options[5].value);
    }

    /* Initialize ICU */
    u_init(&status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
            argv[0], u_errorName(status));
        exit(1);
    }
    status = U_ZERO_ERROR;

    /* Combine the directory with the file name */
    if(options[6].doesOccur) {
        outDir = options[6].value;
    }
    if (options[7].doesOccur) {
        copyright = U_COPYRIGHT_STRING;
    }

#if UCONFIG_NO_BREAK_ITERATION

    UNewDataMemory *pData;
    char msg[1024];

    /* write message with just the name */
    sprintf(msg, "genbrk writes dummy %s because of UCONFIG_NO_BREAK_ITERATION, see uconfig.h", outFileName);
    fprintf(stderr, "%s\n", msg);

    /* write the dummy data file */
    pData = udata_create(outDir, NULL, outFileName, &dummyDataInfo, NULL, &status);
    udata_writeBlock(pData, msg, strlen(msg));
    udata_finish(pData, &status);
    return (int)status;

#else

    //
    //  Read in the rule source file
    //
    long        result;
    long        ruleFileSize;
    FILE        *file;
    char        *ruleBufferC;

    file = fopen(ruleFileName, "rb");
    if( file == 0 ) {
        fprintf(stderr, "Could not open file \"%s\"\n", ruleFileName);
        exit(-1);
    }
    fseek(file, 0, SEEK_END);
    ruleFileSize = ftell(file);
    fseek(file, 0, SEEK_SET);
    ruleBufferC = new char[ruleFileSize+10];

    result = (long)fread(ruleBufferC, 1, ruleFileSize, file);
    if (result != ruleFileSize)  {
        fprintf(stderr, "Error reading file \"%s\"\n", ruleFileName);
        exit (-1);
    }
    ruleBufferC[ruleFileSize]=0;
    fclose(file);

    //
    // Look for a Unicode Signature (BOM) on the rule file
    //
    int32_t        signatureLength;
    const char *   ruleSourceC = ruleBufferC;
    const char*    encoding = ucnv_detectUnicodeSignature(
                           ruleSourceC, ruleFileSize, &signatureLength, &status);
    if (U_FAILURE(status)) {
        exit(status);
    }
    if(encoding!=NULL ){
        ruleSourceC  += signatureLength;
        ruleFileSize -= signatureLength;
    }

    //
    // Open a converter to take the rule file to UTF-16
    //
    UConverter* conv;
    conv = ucnv_open(encoding, &status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "ucnv_open: ICU Error \"%s\"\n", u_errorName(status));
        exit(status);
    }

    //
    // Convert the rules to UChar.
    //  Preflight first to determine required buffer size.
    //
    uint32_t destCap = ucnv_toUChars(conv,
                       NULL,           //  dest,
                       0,              //  destCapacity,
                       ruleSourceC,
                       ruleFileSize,
                       &status);
    if (status != U_BUFFER_OVERFLOW_ERROR) {
        fprintf(stderr, "ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
        exit(status);
    };

    status = U_ZERO_ERROR;
    UChar *ruleSourceU = new UChar[destCap+1];
    ucnv_toUChars(conv,
                  ruleSourceU,     //  dest,
                  destCap+1,
                  ruleSourceC,
                  ruleFileSize,
                  &status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
        exit(status);
    };
    ucnv_close(conv);


    //
    //  Put the source rules into a UnicodeString
    //
    UnicodeString ruleSourceS(FALSE, ruleSourceU, destCap);

    //
    //  Create the break iterator from the rules
    //     This will compile the rules.
    //
    UParseError parseError;
    parseError.line = 0;
    parseError.offset = 0;
    RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(ruleSourceS, parseError, status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "createRuleBasedBreakIterator: ICU Error \"%s\"  at line %d, column %d\n",
                u_errorName(status), (int)parseError.line, (int)parseError.offset);
        exit(status);
    };


    //
    //  Get the compiled rule data from the break iterator.
    //
    uint32_t        outDataSize;
    const uint8_t  *outData;
    outData = bi->getBinaryRules(outDataSize);

    // Copy the data format version numbers from the RBBI data header into the UDataMemory header.
    uprv_memcpy(dh.info.formatVersion, ((RBBIDataHeader *)outData)->fFormatVersion, sizeof(dh.info.formatVersion));

    //
    //  Create the output file
    //
    size_t bytesWritten;
    UNewDataMemory *pData;
    pData = udata_create(outDir, NULL, outFileName, &(dh.info), copyright, &status);
    if(U_FAILURE(status)) {
        fprintf(stderr, "genbrk: Could not open output file \"%s\", \"%s\"\n", 
                         outFileName, u_errorName(status));
        exit(status);
    }


    //  Write the data itself.
    udata_writeBlock(pData, outData, outDataSize);
    // finish up 
    bytesWritten = udata_finish(pData, &status);
    if(U_FAILURE(status)) {
        fprintf(stderr, "genbrk: error %d writing the output file\n", status);
        exit(status);
    }
    
    if (bytesWritten != outDataSize) {
        fprintf(stderr, "Error writing to output file \"%s\"\n", outFileName);
        exit(-1);
    }

    delete bi;
    delete[] ruleSourceU;
    delete[] ruleBufferC;
    u_cleanup();


    printf("genbrk: tool completed successfully.\n");
    return 0;

#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
}
Esempio n. 12
0
extern int
main(int argc, char* argv[]) {
    UBool verbose = TRUE;
    char writeCode;

    U_MAIN_INIT_ARGS(argc, argv);

    options[kOptDestDir].value = ".";

    /* read command line options */
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr,
            "error in command line argument \"%s\"\n",
            argv[-argc]);
    }
    if(argc<0 || options[kOptHelpH].doesOccur || options[kOptHelpQuestionMark].doesOccur) {
        fprintf(stderr,
            "usage: %s [-options] filename1 filename2 ...\n"
            "\tread each binary input file and \n"
            "\tcreate a .c file with a byte array that contains the input file's data\n"
            "options:\n"
            "\t-h or -? or --help  this usage text\n"
            "\t-d or --destdir     destination directory, followed by the path\n"
            "\t-n or --name        symbol prefix, followed by the prefix\n"
            "\t-e or --entrypoint  entry point name, followed by the name (_dat will be appended)\n"
            "\t-r or --revision    Specify a version\n"
            , argv[0]);
#ifdef CAN_GENERATE_OBJECTS
        fprintf(stderr,
            "\t-o or --object      write a .obj file instead of .c\n"
            "\t-m or --match-arch file.o  match the architecture (CPU, 32/64 bits) of the specified .o\n"
            "\t                    ELF format defaults to i386. Windows defaults to the native platform.\n");
#endif
        fprintf(stderr,
            "\t-f or --filename    Specify an alternate base filename. (default: symbolname_typ)\n"
            "\t-a or --assembly    Create assembly file. (possible values are: ");

        printAssemblyHeadersToStdErr();
    } else {
        const char *message, *filename;
        /* TODO: remove void (*writeCode)(const char *, const char *); */

        if(options[kOptAssembly].doesOccur) {
            message="generating assembly code for %s\n";
            writeCode = CALL_WRITEASSEMBLY;
            /* TODO: remove writeCode=&writeAssemblyCode; */

            if (!checkAssemblyHeaderName(options[kOptAssembly].value)) {
                fprintf(stderr,
                    "Assembly type \"%s\" is unknown.\n", options[kOptAssembly].value);
                return -1;
            }
        }
#ifdef CAN_GENERATE_OBJECTS
        else if(options[kOptObject].doesOccur) {
            message="generating object code for %s\n";
            writeCode = CALL_WRITEOBJECT;
            /* TODO: remove writeCode=&writeObjectCode; */
        }
#endif
        else
        {
            message="generating C code for %s\n";
            writeCode = CALL_WRITECCODE;
            /* TODO: remove writeCode=&writeCCode; */
        }
        while(--argc) {
            filename=getLongPathname(argv[argc]);
            if (verbose) {
                fprintf(stdout, message, filename);
            }

            switch (writeCode) {
            case CALL_WRITECCODE:
                writeCCode(filename, options[kOptDestDir].value,
                           options[kOptName].doesOccur ? options[kOptName].value : NULL,
                           options[kOptFilename].doesOccur ? options[kOptFilename].value : NULL,
                           NULL);
                break;
            case CALL_WRITEASSEMBLY:
                writeAssemblyCode(filename, options[kOptDestDir].value,
                                  options[kOptEntryPoint].doesOccur ? options[kOptEntryPoint].value : NULL,
                                  options[kOptFilename].doesOccur ? options[kOptFilename].value : NULL,
                                  NULL);
                break;
#ifdef CAN_GENERATE_OBJECTS
            case CALL_WRITEOBJECT:
                writeObjectCode(filename, options[kOptDestDir].value,
                                options[kOptEntryPoint].doesOccur ? options[kOptEntryPoint].value : NULL,
                                options[kOptMatchArch].doesOccur ? options[kOptMatchArch].value : NULL,
                                options[kOptFilename].doesOccur ? options[kOptFilename].value : NULL,
                                NULL);
                break;
#endif
            default:
                /* Should never occur. */
                break;
            }
            /* TODO: remove writeCode(filename, options[kOptDestDir].value); */
        }
    }

    return 0;
}
Esempio n. 13
0
extern int
main(int argc, char* argv[]) {
    UVersionInfo version;
    Options moreOptions={ TRUE, FALSE, TRUE };
    UErrorCode errorCode = U_ZERO_ERROR;

    U_MAIN_INIT_ARGS(argc, argv);

    /* Initialize ICU */
    u_init(&errorCode);
    if (U_FAILURE(errorCode) && errorCode != U_FILE_ACCESS_ERROR) {
        /* Note: u_init() will try to open ICU property data.
         *       failures here are expected when building ICU from scratch.
         *       ignore them.
         */
        fprintf(stderr, "%s: can not initialize ICU.  errorCode = %s\n",
            argv[0], u_errorName(errorCode));
        exit(1);
    }

    /* preset then read command line options */
    options[DESTDIR].value=u_getDataDirectory();
    options[UNICODE].value="4.1";
    argc=u_parseArgs(argc, argv, LENGTHOF(options), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr,
            "error in command line argument \"%s\"\n",
            argv[-argc]);
    } else if(argc<2) {
        argc=-1;
    }
    if(argc<0 || options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur) {
        /*
         * Broken into chucks because the C89 standard says the minimum
         * required supported string length is 509 bytes.
         */
        fprintf(stderr,
            "Usage: %s [-1[+|-]] [-v[+|-]] [-c[+|-]] [filename_ud [filename_na]]\n"
            "\n"
            "Read the UnicodeData.txt file and \n"
            "create a binary file " DATA_NAME "." DATA_TYPE " with the character names\n"
            "\n"
            "\tfilename_ud  absolute path/filename for the UnicodeData.txt file\n"
            "\t             (default: standard input)\n"
            "\tfilename_na  absolute path/filename for the NameAliases.txt file\n"
            "\t             (default: no name aliases)\n"
            "\n",
            argv[0]);
        fprintf(stderr,
            "Options:\n"
            "\t-h or -? or --help  this usage text\n"
            "\t-v or --verbose     verbose output\n"
            "\t-q or --quiet       no output\n"
            "\t-c or --copyright   include a copyright notice\n"
            "\t-d or --destdir     destination directory, followed by the path\n"
            "\t-u or --unicode     Unicode version, followed by the version like 3.0.0\n");
        fprintf(stderr,
            "\t-1 or --unicode1-names     store Unicode 1.0 character names\n"
            "\t      --no-iso-comments    do not store ISO comments\n"
            "\t      --only-iso-comments  write ucomment.icu with only ISO comments\n");
        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    }

    /* get the options values */
    beVerbose=options[VERBOSE].doesOccur;
    beQuiet=options[QUIET].doesOccur;
    haveCopyright=options[COPYRIGHT].doesOccur;
    moreOptions.store10Names=options[UNICODE1_NAMES].doesOccur;
    moreOptions.storeISOComments=!options[NO_ISO_COMMENTS].doesOccur;
    if(options[ONLY_ISO_COMMENTS].doesOccur) {
        moreOptions.storeNames=moreOptions.store10Names=FALSE;
        moreOptions.storeISOComments=TRUE;
    }

    /* set the Unicode version */
    u_versionFromString(version, options[UNICODE].value);
    uprv_memcpy(dataInfo.dataVersion, version, 4);
    ucdVersion=findUnicodeVersion(version);

    init();
    if(argc>=3) {
        parseNameAliases(argv[2], &moreOptions);
    }
    parseDB(argc>=2 ? argv[1] : "-", &moreOptions);
    compress();
    generateData(options[DESTDIR].value, &moreOptions);

    u_cleanup();
    return 0;
}
Esempio n. 14
0
extern int
main(int argc, char* argv[]) {
    int i, n;
    char pathBuf[512];
    FileStream *in;
    UNewDataMemory *out;
    UErrorCode errorCode=U_ZERO_ERROR;

    U_MAIN_INIT_ARGS(argc, argv);

    /* preset then read command line options */
    options[DESTDIR].value=options[SOURCEDIR].value=u_getDataDirectory();
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr,
            "error in command line argument \"%s\"\n",
            argv[-argc]);
    }
    if(argc<0 || options[HELP1].doesOccur || options[HELP2].doesOccur) {
        fprintf(stderr,
            "usage: %s [-options] [convrtrs.txt]\n"
            "\tread convrtrs.txt and create " U_ICUDATA_NAME "_" DATA_NAME "." DATA_TYPE "\n"
            "options:\n"
            "\t-h or -? or --help  this usage text\n"
            "\t-v or --verbose     prints out extra information about the alias table\n"
            "\t-q or --quiet       do not display warnings and progress\n"
            "\t-c or --copyright   include a copyright notice\n"
            "\t-d or --destdir     destination directory, followed by the path\n"
            "\t-s or --sourcedir   source directory, followed by the path\n",
            argv[0]);
        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    }

    if(options[VERBOSE].doesOccur) {
        verbose = TRUE;
    }

    if(options[QUIET].doesOccur) {
        quiet = TRUE;
    }

    if(argc>=2) {
        path=argv[1];
    } else {
        path=options[SOURCEDIR].value;
        if(path!=NULL && *path!=0) {
            char *end;

            uprv_strcpy(pathBuf, path);
            end = uprv_strchr(pathBuf, 0);
            if(*(end-1)!=U_FILE_SEP_CHAR) {
                *(end++)=U_FILE_SEP_CHAR;
            }
            uprv_strcpy(end, "convrtrs.txt");
            path=pathBuf;
        } else {
            path = "convrtrs.txt";
        }
    }

    uprv_memset(stringStore, 0, sizeof(stringStore));
    uprv_memset(tagStore, 0, sizeof(tagStore));
    uprv_memset(converters, 0, sizeof(converters));
    uprv_memset(tags, 0, sizeof(tags));
    uprv_memset(aliasLists, 0, sizeof(aliasLists));
    uprv_memset(knownAliases, 0, sizeof(aliasLists));


    in=T_FileStream_open(path, "r");
    if(in==NULL) {
        fprintf(stderr, "gencnval: unable to open input file %s\n", path);
        exit(U_FILE_ACCESS_ERROR);
    }
    parseFile(in);
    T_FileStream_close(in);

    /* create the output file */
    out=udata_create(options[DESTDIR].value, DATA_TYPE, DATA_NAME, &dataInfo,
                     options[COPYRIGHT].doesOccur ? U_COPYRIGHT_STRING : NULL, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "gencnval: unable to open output file - error %s\n", u_errorName(errorCode));
        exit(errorCode);
    }

    /* write the table of aliases based on a tag/converter name combination */
    writeAliasTable(out);

    /* finish */
    udata_finish(out, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "gencnval: error finishing output file - %s\n", u_errorName(errorCode));
        exit(errorCode);
    }

    /* clean up tags */
    for (i = 0; i < MAX_TAG_COUNT; i++) {
        for (n = 0; n < MAX_CONV_COUNT; n++) {
            if (tags[i].aliasList[n].aliases!=NULL) {
                uprv_free(tags[i].aliasList[n].aliases);
            }
        }
    }

    return 0;
}
Esempio n. 15
0
extern int
main(int argc, char *argv[]) {
    const char *pname, *sourcePath, *destPath, *inFilename, *outFilename, *outComment;
    char outType;
    UBool isHelp, isModified, isPackage;
    int result = 0;

    Package *pkg, *listPkg, *addListPkg;

    U_MAIN_INIT_ARGS(argc, argv);

    /* get the program basename */
    pname=findBasename(argv[0]);

    argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
    isHelp=options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur;
    if(isHelp) {
        printUsage(pname, TRUE);
        return U_ZERO_ERROR;
    }

    pkg=new Package;
    if(pkg==NULL) {
        fprintf(stderr, "icupkg: not enough memory\n");
        return U_MEMORY_ALLOCATION_ERROR;
    }
    isModified=FALSE;

    int autoPrefix=0;
    if(options[OPT_AUTO_TOC_PREFIX].doesOccur) {
        pkg->setAutoPrefix();
        ++autoPrefix;
    }
    if(options[OPT_AUTO_TOC_PREFIX_WITH_TYPE].doesOccur) {
        if(options[OPT_TOC_PREFIX].doesOccur) {
            fprintf(stderr, "icupkg: --auto_toc_prefix_with_type and also --toc_prefix\n");
            printUsage(pname, FALSE);
            return U_ILLEGAL_ARGUMENT_ERROR;
        }
        pkg->setAutoPrefixWithType();
        ++autoPrefix;
    }
    if(argc<2 || 3<argc || autoPrefix>1) {
        printUsage(pname, FALSE);
        return U_ILLEGAL_ARGUMENT_ERROR;
    }

    if(options[OPT_SOURCEDIR].doesOccur) {
        sourcePath=options[OPT_SOURCEDIR].value;
    } else {
        // work relative to the current working directory
        sourcePath=NULL;
    }
    if(options[OPT_DESTDIR].doesOccur) {
        destPath=options[OPT_DESTDIR].value;
    } else {
        // work relative to the current working directory
        destPath=NULL;
    }

    if(0==strcmp(argv[1], "new")) {
        if(autoPrefix) {
            fprintf(stderr, "icupkg: --auto_toc_prefix[_with_type] but no input package\n");
            printUsage(pname, FALSE);
            return U_ILLEGAL_ARGUMENT_ERROR;
        }
        inFilename=NULL;
        isPackage=TRUE;
    } else {
        inFilename=argv[1];
        if(isPackageName(inFilename)) {
            pkg->readPackage(inFilename);
            isPackage=TRUE;
        } else {
            /* swap a single file (icuswap replacement) rather than work on a package */
            pkg->addFile(sourcePath, inFilename);
            isPackage=FALSE;
        }
    }

    if(argc>=3) {
        outFilename=argv[2];
        if(0!=strcmp(argv[1], argv[2])) {
            isModified=TRUE;
        }
    } else if(isPackage) {
        outFilename=NULL;
    } else /* !isPackage */ {
        outFilename=inFilename;
        isModified=(UBool)(sourcePath!=destPath);
    }

    /* parse the output type option */
    if(options[OPT_OUT_TYPE].doesOccur) {
        const char *type=options[OPT_OUT_TYPE].value;
        if(type[0]==0 || type[1]!=0) {
            /* the type must be exactly one letter */
            printUsage(pname, FALSE);
            return U_ILLEGAL_ARGUMENT_ERROR;
        }
        outType=type[0];
        switch(outType) {
        case 'l':
        case 'b':
        case 'e':
            break;
        default:
            printUsage(pname, FALSE);
            return U_ILLEGAL_ARGUMENT_ERROR;
        }

        /*
         * Set the isModified flag if the output type differs from the
         * input package type.
         * If we swap a single file, just assume that we are modifying it.
         * The Package class does not give us access to the item and its type.
         */
        isModified|=(UBool)(!isPackage || outType!=pkg->getInType());
    } else if(isPackage) {
        outType=pkg->getInType(); // default to input type
    } else /* !isPackage: swap single file */ {
        outType=0; /* tells extractItem() to not swap */
    }

    if(options[OPT_WRITEPKG].doesOccur) {
        isModified=TRUE;
    }

    if(!isPackage) {
        /*
         * icuswap tool replacement: Only swap a single file.
         * Check that irrelevant options are not set.
         */
        if( options[OPT_COMMENT].doesOccur ||
            options[OPT_COPYRIGHT].doesOccur ||
            options[OPT_MATCHMODE].doesOccur ||
            options[OPT_REMOVE_LIST].doesOccur ||
            options[OPT_ADD_LIST].doesOccur ||
            options[OPT_EXTRACT_LIST].doesOccur ||
            options[OPT_LIST_ITEMS].doesOccur
        ) {
            printUsage(pname, FALSE);
            return U_ILLEGAL_ARGUMENT_ERROR;
        }
        if(isModified) {
            pkg->extractItem(destPath, outFilename, 0, outType);
        }

        delete pkg;
        return result;
    }

    /* Work with a package. */

    if(options[OPT_COMMENT].doesOccur) {
        outComment=options[OPT_COMMENT].value;
    } else if(options[OPT_COPYRIGHT].doesOccur) {
        outComment=U_COPYRIGHT_STRING;
    } else {
        outComment=NULL;
    }

    if(options[OPT_MATCHMODE].doesOccur) {
        if(0==strcmp(options[OPT_MATCHMODE].value, "noslash")) {
            pkg->setMatchMode(Package::MATCH_NOSLASH);
        } else {
            printUsage(pname, FALSE);
            return U_ILLEGAL_ARGUMENT_ERROR;
        }
    }

    /* remove items */
    if(options[OPT_REMOVE_LIST].doesOccur) {
        listPkg=new Package();
        if(listPkg==NULL) {
            fprintf(stderr, "icupkg: not enough memory\n");
            exit(U_MEMORY_ALLOCATION_ERROR);
        }
        if(readList(NULL, options[OPT_REMOVE_LIST].value, FALSE, listPkg)) {
            pkg->removeItems(*listPkg);
            delete listPkg;
            isModified=TRUE;
        } else {
            printUsage(pname, FALSE);
            return U_ILLEGAL_ARGUMENT_ERROR;
        }
    }

    /*
     * add items
     * use a separate Package so that its memory and items stay around
     * as long as the main Package
     */
    addListPkg=NULL;
    if(options[OPT_ADD_LIST].doesOccur) {
        addListPkg=new Package();
        if(addListPkg==NULL) {
            fprintf(stderr, "icupkg: not enough memory\n");
            exit(U_MEMORY_ALLOCATION_ERROR);
        }
        if(readList(sourcePath, options[OPT_ADD_LIST].value, TRUE, addListPkg)) {
            pkg->addItems(*addListPkg);
            // delete addListPkg; deferred until after writePackage()
            isModified=TRUE;
        } else {
            printUsage(pname, FALSE);
            return U_ILLEGAL_ARGUMENT_ERROR;
        }
    }

    /* extract items */
    if(options[OPT_EXTRACT_LIST].doesOccur) {
        listPkg=new Package();
        if(listPkg==NULL) {
            fprintf(stderr, "icupkg: not enough memory\n");
            exit(U_MEMORY_ALLOCATION_ERROR);
        }
        if(readList(NULL, options[OPT_EXTRACT_LIST].value, FALSE, listPkg)) {
            pkg->extractItems(destPath, *listPkg, outType);
            delete listPkg;
        } else {
            printUsage(pname, FALSE);
            return U_ILLEGAL_ARGUMENT_ERROR;
        }
    }

    /* list items */
    if(options[OPT_LIST_ITEMS].doesOccur) {
        int32_t i;
        if (options[OPT_LIST_FILE].doesOccur) {
            FileStream *out;
            out = T_FileStream_open(options[OPT_LIST_FILE].value, "w");
            if (out != NULL) {
                for(i=0; i<pkg->getItemCount(); ++i) {
                    T_FileStream_writeLine(out, pkg->getItem(i)->name);
                    T_FileStream_writeLine(out, "\n");
                }
                T_FileStream_close(out);
            } else {
                return U_ILLEGAL_ARGUMENT_ERROR;
            }
        } else {
            for(i=0; i<pkg->getItemCount(); ++i) {
                fprintf(stdout, "%s\n", pkg->getItem(i)->name);
            }
        }
    }

    /* check dependencies between items */
    if(!pkg->checkDependencies()) {
        /* some dependencies are not fulfilled */
        return U_MISSING_RESOURCE_ERROR;
    }

    /* write the output .dat package if there are any modifications */
    if(isModified) {
        char outFilenameBuffer[1024]; // for auto-generated output filename, if necessary

        if(outFilename==NULL || outFilename[0]==0) {
            if(inFilename==NULL || inFilename[0]==0) {
                fprintf(stderr, "icupkg: unable to auto-generate an output filename if there is no input filename\n");
                exit(U_ILLEGAL_ARGUMENT_ERROR);
            }

            /*
             * auto-generate a filename:
             * copy the inFilename,
             * and if the last basename character matches the input file's type,
             * then replace it with the output file's type
             */
            char suffix[6]="?.dat";
            char *s;

            suffix[0]=pkg->getInType();
            strcpy(outFilenameBuffer, inFilename);
            s=strchr(outFilenameBuffer, 0);
            if((s-outFilenameBuffer)>5 && 0==memcmp(s-5, suffix, 5)) {
                *(s-5)=outType;
            }
            outFilename=outFilenameBuffer;
        }
        if(options[OPT_TOC_PREFIX].doesOccur) {
            pkg->setPrefix(options[OPT_TOC_PREFIX].value);
        }
        result = writePackageDatFile(outFilename, outComment, NULL, NULL, pkg, outType);
    }

    delete addListPkg;
    delete pkg;
    return result;
}
Esempio n. 16
0
UPerfTest::UPerfTest(int32_t argc, const char* argv[], UErrorCode& status){
    
    _argc = argc;
    _argv = argv;
    ucharBuf = NULL;
    encoding = "";
    uselen = FALSE;
    fileName = NULL;
    sourceDir = ".";
    lines = NULL;
    numLines = 0;
    line_mode = TRUE;
    buffer = NULL;
    bufferLen = 0;
    verbose = FALSE;
    bulk_mode = FALSE;
    passes = iterations = time = 0;
    locale = NULL;
    
    //initialize the argument list
    U_MAIN_INIT_ARGS(argc, argv);
    //parse the arguments
    _remainingArgc = u_parseArgs(argc, (char**)argv, (int32_t)(sizeof(options)/sizeof(options[0])), options);

    // Now setup the arguments
    if(argc==1 || options[HELP1].doesOccur || options[HELP2].doesOccur) {
        status = U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }

    if(options[VERBOSE].doesOccur) {
        verbose = TRUE;
    }

    if(options[SOURCEDIR].doesOccur) {
        sourceDir = options[SOURCEDIR].value;
    }

    if(options[ENCODING].doesOccur) {
        encoding = options[ENCODING].value;
    }

    if(options[USELEN].doesOccur) {
        uselen = TRUE;
    }

    if(options[FILE_NAME].doesOccur){
        fileName = options[FILE_NAME].value;
    }

    if(options[PASSES].doesOccur) {
        passes = atoi(options[PASSES].value);
    }
    if(options[ITERATIONS].doesOccur) {
        iterations = atoi(options[ITERATIONS].value);
    }
 
    if(options[TIME].doesOccur) {
        time = atoi(options[TIME].value);
    }
    
    if(options[LINE_MODE].doesOccur) {
        line_mode = TRUE;
        bulk_mode = FALSE;
    }

    if(options[BULK_MODE].doesOccur) {
        bulk_mode = TRUE;
        line_mode = FALSE;
    }
    
    if(options[LOCALE].doesOccur) {
      locale = options[LOCALE].value;
    }

    if(time > 0 && iterations >0){
        status = U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }

    int32_t len = 0;
    resolvedFileName = NULL;
    if(fileName!=NULL){
        //pre-flight
        ucbuf_resolveFileName(sourceDir, fileName,resolvedFileName,&len, &status);
        resolvedFileName = (char*) uprv_malloc(len);
        if(fileName==NULL){
            status= U_MEMORY_ALLOCATION_ERROR;
            return;
        }
        if(status == U_BUFFER_OVERFLOW_ERROR){
            status = U_ZERO_ERROR;
        }
        ucbuf_resolveFileName(sourceDir, fileName, resolvedFileName, &len, &status);
        ucharBuf = ucbuf_open(resolvedFileName,&encoding,TRUE,FALSE,&status);

        if(U_FAILURE(status)){
            printf("Could not open the input file %s. Error: %s\n", fileName, u_errorName(status));
            return;
        }
    }
}
Esempio n. 17
0
extern int
main(int argc, char *argv[]) {
    FILE *in, *out;
    const char *pname;
    char *data;
    int32_t length;
    UBool ishelp;
    int rc;

    UDataSwapper *ds;
    UErrorCode errorCode;
    uint8_t outCharset;
    UBool outIsBigEndian;

    U_MAIN_INIT_ARGS(argc, argv);

    /* get the program basename */
    pname=strrchr(argv[0], U_FILE_SEP_CHAR);
    if(pname==NULL) {
        pname=strrchr(argv[0], '/');
    }
    if(pname!=NULL) {
        ++pname;
    } else {
        pname=argv[0];
    }

    argc=u_parseArgs(argc, argv, LENGTHOF(options), options);
    ishelp=options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur;
    if(ishelp || argc!=3) {
        return printUsage(pname, ishelp);
    }

    /* parse the output type option */
    data=(char *)options[OPT_OUT_TYPE].value;
    if(data[0]==0 || data[1]!=0) {
        /* the type must be exactly one letter */
        return printUsage(pname, FALSE);
    }
    switch(data[0]) {
    case 'l':
        outIsBigEndian=FALSE;
        outCharset=U_ASCII_FAMILY;
        break;
    case 'b':
        outIsBigEndian=TRUE;
        outCharset=U_ASCII_FAMILY;
        break;
    case 'e':
        outIsBigEndian=TRUE;
        outCharset=U_EBCDIC_FAMILY;
        break;
    default:
        return printUsage(pname, FALSE);
    }

    in=out=NULL;
    data=NULL;

    /* udata_swapPackage() needs the filenames */
    inFilename=argv[1];
    outFilename=argv[2];

    /* open the input file, get its length, allocate memory for it, read the file */
    in=fopen(argv[1], "rb");
    if(in==NULL) {
        fprintf(stderr, "%s: unable to open input file \"%s\"\n", pname, argv[1]);
        rc=2;
        goto done;
    }

    length=fileSize(in);
    if(length<=0) {
        fprintf(stderr, "%s: empty input file \"%s\"\n", pname, argv[1]);
        rc=2;
        goto done;
    }

    /*
     * +15: udata_swapPackage() may need to add a few padding bytes to the
     * last item if charset swapping is done,
     * because the last item may be resorted into the middle and then needs
     * additional padding bytes
     */
    data=(char *)malloc(length+15);
    if(data==NULL) {
        fprintf(stderr, "%s: error allocating memory for \"%s\"\n", pname, argv[1]);
        rc=2;
        goto done;
    }

    /* set the last 15 bytes to the usual padding byte, see udata_swapPackage() */
    uprv_memset(data+length-15, 0xaa, 15);

    if(length!=(int32_t)fread(data, 1, length, in)) {
        fprintf(stderr, "%s: error reading \"%s\"\n", pname, argv[1]);
        rc=3;
        goto done;
    }

    fclose(in);
    in=NULL;

    /* swap the data in-place */
    errorCode=U_ZERO_ERROR;
    ds=udata_openSwapperForInputData(data, length, outIsBigEndian, outCharset, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "%s: udata_openSwapperForInputData(\"%s\") failed - %s\n",
                pname, argv[1], u_errorName(errorCode));
        rc=4;
        goto done;
    }

    ds->printError=printError;
    ds->printErrorContext=stderr;

    length=udata_swap(ds, data, length, data, &errorCode);
    udata_closeSwapper(ds);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "%s: udata_swap(\"%s\") failed - %s\n",
                pname, argv[1], u_errorName(errorCode));
        rc=4;
        goto done;
    }

    out=fopen(argv[2], "wb");
    if(out==NULL) {
        fprintf(stderr, "%s: unable to open output file \"%s\"\n", pname, argv[2]);
        rc=5;
        goto done;
    }

    if(length!=(int32_t)fwrite(data, 1, length, out)) {
        fprintf(stderr, "%s: error writing \"%s\"\n", pname, argv[2]);
        rc=6;
        goto done;
    }

    fclose(out);
    out=NULL;

    /* all done */
    rc=0;

done:
    if(in!=NULL) {
        fclose(in);
    }
    if(out!=NULL) {
        fclose(out);
    }
    if(data!=NULL) {
        free(data);
    }
    return rc;
}
Esempio n. 18
0
extern int
main(int argc, char* argv[]) {
    UBool sourceTOC, verbose;
    uint32_t maxSize;

    U_MAIN_INIT_ARGS(argc, argv);

    /* preset then read command line options */
    argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr,
            "error in command line argument \"%s\"\n",
            argv[-argc]);
    } else if(argc<2) {
        argc=-1;
    }

    if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
        FILE *where = argc < 0 ? stderr : stdout;

        /*
         * Broken into chucks because the C89 standard says the minimum
         * required supported string length is 509 bytes.
         */
        fprintf(where,
                "%csage: %s [ -h, -?, --help ] [ -v, --verbose ] [ -c, --copyright ] [ -C, --comment comment ] [ -d, --destdir dir ] [ -n, --name filename ] [ -t, --type filetype ] [ -S, --source tocfile ] [ -e, --entrypoint name ] maxsize listfile\n", argc < 0 ? 'u' : 'U', *argv);
        if (options[0].doesOccur || options[1].doesOccur) {
            fprintf(where, "\n"
                "Read the list file (default: standard input) and create a common data\n"
                "file from specified files. Omit any files larger than maxsize, if maxsize > 0.\n");
            fprintf(where, "\n"
            "Options:\n"
            "\t-h, -?, --help              this usage text\n"
            "\t-v, --verbose               verbose output\n"
            "\t-c, --copyright             include the ICU copyright notice\n"
            "\t-C, --comment comment       include a comment string\n"
            "\t-d, --destdir dir           destination directory\n");
            fprintf(where,
            "\t-n, --name filename         output filename, without .type extension\n"
            "\t                            (default: " U_ICUDATA_NAME ")\n"
            "\t-t, --type filetype         type of the destination file\n"
            "\t                            (default: \" dat \")\n"
            "\t-S, --source tocfile        write a .c source file with the table of\n"
            "\t                            contents\n"
            "\t-e, --entrypoint name       override the c entrypoint name\n"
            "\t                            (default: \"<name>_<type>\")\n");
        }
        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    }

    sourceTOC=options[8].doesOccur;

    verbose = options[2].doesOccur;

    maxSize=(uint32_t)uprv_strtoul(argv[1], NULL, 0);

    createCommonDataFile(options[4].doesOccur ? options[4].value : NULL,
                         options[6].doesOccur ? options[6].value : NULL,
                         options[9].doesOccur ? options[9].value : options[6].doesOccur ? options[6].value : NULL,
                         options[7].doesOccur ? options[7].value : NULL,
                         options[10].doesOccur ? options[10].value : NULL,
                         options[3].doesOccur ? U_COPYRIGHT_STRING : options[5].doesOccur ? options[5].value : NULL,
                         argc == 2 ? NULL : argv[2],
                         maxSize, sourceTOC, verbose, NULL);

    return 0;
}
Esempio n. 19
0
extern int
main(int argc, char* argv[]) {
    char filename[300];
    const char *srcDir=NULL, *destDir=NULL, *suffix=NULL;
    char *basename=NULL;
    UErrorCode errorCode=U_ZERO_ERROR;

    U_MAIN_INIT_ARGS(argc, argv);

    /* preset then read command line options */
    options[DESTDIR].value=u_getDataDirectory();
    options[SOURCEDIR].value="";
    options[UNICODE_VERSION].value="";
    options[ICUDATADIR].value=u_getDataDirectory();
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr,
            "error in command line argument \"%s\"\n",
            argv[-argc]);
    }
    if(argc<0 || options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur) {
        /*
         * Broken into chucks because the C89 standard says the minimum
         * required supported string length is 509 bytes.
         */
        fprintf(stderr,
            "Usage: %s [-options] [suffix]\n"
            "\n"
            "read the UnicodeData.txt file and other Unicode properties files and\n"
            "create a binary file " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE " with the bidi/shaping properties\n"
            "\n",
            argv[0]);
        fprintf(stderr,
            "Options:\n"
            "\t-h or -? or --help  this usage text\n"
            "\t-v or --verbose     verbose output\n"
            "\t-c or --copyright   include a copyright notice\n"
            "\t-u or --unicode     Unicode version, followed by the version like 3.0.0\n"
            "\t-C or --csource     generate a .c source file rather than the .icu binary\n");
        fprintf(stderr,
            "\t-d or --destdir     destination directory, followed by the path\n"
            "\t-s or --sourcedir   source directory, followed by the path\n"
            "\t-i or --icudatadir  directory for locating any needed intermediate data files,\n"
            "\t                    followed by path, defaults to %s\n"
            "\tsuffix              suffix that is to be appended with a '-'\n"
            "\t                    to the source file basenames before opening;\n"
            "\t                    'genbidi new' will read UnicodeData-new.txt etc.\n",
            u_getDataDirectory());
        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    }

    /* get the options values */
    beVerbose=options[VERBOSE].doesOccur;
    haveCopyright=options[COPYRIGHT].doesOccur;
    srcDir=options[SOURCEDIR].value;
    destDir=options[DESTDIR].value;

    if(argc>=2) {
        suffix=argv[1];
    } else {
        suffix=NULL;
    }

    if(options[UNICODE_VERSION].doesOccur) {
        setUnicodeVersion(options[UNICODE_VERSION].value);
    }
    /* else use the default dataVersion in store.c */

    if (options[ICUDATADIR].doesOccur) {
        u_setDataDirectory(options[ICUDATADIR].value);
    }

    /* prepare the filename beginning with the source dir */
    uprv_strcpy(filename, srcDir);
    basename=filename+uprv_strlen(filename);
    if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
        *basename++=U_FILE_SEP_CHAR;
    }

    /* initialize */
    pv=upvec_open(2, &errorCode);

    /* process BidiMirroring.txt */
    writeUCDFilename(basename, "BidiMirroring", suffix);
    parseBidiMirroring(filename, &errorCode);

    /* process additional properties files */
    *basename=0;

    parseBinariesFile(filename, basename, suffix, &propListBinaries, &errorCode);

    parseSingleEnumFile(filename, basename, suffix, &jtSingleEnum, &errorCode);

    parseSingleEnumFile(filename, basename, suffix, &jgSingleEnum, &errorCode);

    /* process UnicodeData.txt */
    writeUCDFilename(basename, "UnicodeData", suffix);
    parseDB(filename, &errorCode);

    /* set proper bidi class for unassigned code points (Cn) */
    parseTwoFieldFile(filename, basename, "DerivedBidiClass", suffix, bidiClassLineFn, &errorCode);

    /* process parsed data */
    if(U_SUCCESS(errorCode)) {
        /* write the properties data file */
        generateData(destDir, options[CSOURCE].doesOccur);
    }

    u_cleanup();
    return errorCode;
}
Esempio n. 20
0
int main(int argc, char* argv[])
{
    ConvData data;
    UErrorCode err = U_ZERO_ERROR, localError;
    char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
    const char* destdir, *arg;
    size_t destdirlen;
    char* dot = NULL, *outBasename;
    char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
    char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH];
    UVersionInfo icuVersion;
    UBool printFilename;

    err = U_ZERO_ERROR;

    U_MAIN_INIT_ARGS(argc, argv);

    /* Set up the ICU version number */
    u_getVersion(icuVersion);
    uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));

    /* preset then read command line options */
    options[OPT_DESTDIR].value=u_getDataDirectory();
    argc=u_parseArgs(argc, argv, LENGTHOF(options), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr,
            "error in command line argument \"%s\"\n",
            argv[-argc]);
    } else if(argc<2) {
        argc=-1;
    }
    if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
        FILE *stdfile=argc<0 ? stderr : stdout;
        fprintf(stdfile,
            "usage: %s [-options] files...\n"
            "\tread .ucm codepage mapping files and write .cnv files\n"
            "options:\n"
            "\t-h or -? or --help  this usage text\n"
            "\t-V or --version     show a version message\n"
            "\t-c or --copyright   include a copyright notice\n"
            "\t-d or --destdir     destination directory, followed by the path\n"
            "\t-v or --verbose     Turn on verbose output\n",
            argv[0]);
        fprintf(stdfile,
            "\t      --small       Generate smaller .cnv files. They will be\n"
            "\t                    significantly smaller but may not be compatible with\n"
            "\t                    older versions of ICU and will require heap memory\n"
            "\t                    allocation when loaded.\n"
            "\t      --ignore-siso-check         Use SI/SO other than 0xf/0xe.\n");
        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    }

    if(options[OPT_VERSION].doesOccur) {
        printf("makeconv version %hu.%hu, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
               dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
        printf("%s\n", U_COPYRIGHT_STRING);
        exit(0);
    }

    /* get the options values */
    haveCopyright = options[OPT_COPYRIGHT].doesOccur;
    destdir = options[OPT_DESTDIR].value;
    VERBOSE = options[OPT_VERBOSE].doesOccur;
    SMALL = options[OPT_SMALL].doesOccur;

    if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
        IGNORE_SISO_CHECK = TRUE;
    }

    if (destdir != NULL && *destdir != 0) {
        uprv_strcpy(outFileName, destdir);
        destdirlen = uprv_strlen(destdir);
        outBasename = outFileName + destdirlen;
        if (*(outBasename - 1) != U_FILE_SEP_CHAR) {
            *outBasename++ = U_FILE_SEP_CHAR;
            ++destdirlen;
        }
    } else {
        destdirlen = 0;
        outBasename = outFileName;
    }

#if DEBUG
    {
      int i;
      printf("makeconv: processing %d files...\n", argc - 1);
      for(i=1; i<argc; ++i) {
        printf("%s ", argv[i]);
      }
      printf("\n");
      fflush(stdout);
    }
#endif

    err = U_ZERO_ERROR;
    printFilename = (UBool) (argc > 2 || VERBOSE);
    for (++argv; --argc; ++argv)
    {
        arg = getLongPathname(*argv);

        /* Check for potential buffer overflow */
        if(strlen(arg) > UCNV_MAX_FULL_FILE_NAME_LENGTH)
        {
            fprintf(stderr, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR));
            return U_BUFFER_OVERFLOW_ERROR;
        }

        /*produces the right destination path for display*/
        if (destdirlen != 0)
        {
            const char *basename;

            /* find the last file sepator */
            basename = findBasename(arg);
            uprv_strcpy(outBasename, basename);
        }
        else
        {
            uprv_strcpy(outFileName, arg);
        }

        /*removes the extension if any is found*/
        dot = uprv_strrchr(outBasename, '.');
        if (dot)
        {
            *dot = '\0';
        }

        /* the basename without extension is the converter name */
        uprv_strcpy(cnvName, outBasename);

        /*Adds the target extension*/
        uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION);

#if DEBUG
        printf("makeconv: processing %s  ...\n", arg);
        fflush(stdout);
#endif
        localError = U_ZERO_ERROR;
        initConvData(&data);
        createConverter(&data, arg, &localError);

        if (U_FAILURE(localError))
        {
            /* if an error is found, print out an error msg and keep going */
            fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
                u_errorName(localError));
            if(U_SUCCESS(err)) {
                err = localError;
            }
        }
        else
        {
            /* Insure the static data name matches the  file name */
            /* Changed to ignore directory and only compare base name
             LDH 1/2/08*/
            char *p;
            p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */

            if(p == NULL)            /* OK, try alternate */
            {
                p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
                if(p == NULL)
                {
                    p=cnvName; /* If no separators, no problem */
                }
            }
            else
            {
                p++;   /* If found separtor, don't include it in compare */
            }
            if(uprv_stricmp(p,data.staticData.name))
            {
                fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
                    cnvName,  CONVERTER_FILE_EXTENSION,
                    data.staticData.name);
            }

            uprv_strcpy((char*)data.staticData.name, cnvName);

            if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
                fprintf(stderr,
                    "Error: A converter name must contain only invariant characters.\n"
                    "%s is not a valid converter name.\n",
                    data.staticData.name);
                if(U_SUCCESS(err)) {
                    err = U_INVALID_TABLE_FORMAT;
                }
            }

            uprv_strcpy(cnvNameWithPkg, cnvName);

            localError = U_ZERO_ERROR;
            writeConverterData(&data, cnvNameWithPkg, destdir, &localError);

            if(U_FAILURE(localError))
            {
                /* if an error is found, print out an error msg and keep going*/
                fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
                    u_errorName(localError));
                if(U_SUCCESS(err)) {
                    err = localError;
                }
            }
            else if (printFilename)
            {
                puts(outBasename);
            }
        }
        fflush(stdout);
        fflush(stderr);

        cleanupConvData(&data);
    }

    return err;
}
Esempio n. 21
0
extern "C" int
main(int argc, char* argv[]) {
    U_MAIN_INIT_ARGS(argc, argv);

    /* preset then read command line options */
    options[SOURCEDIR].value="";
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[HELP_H]), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr,
            "error in command line argument \"%s\"\n",
            argv[-argc]);
    }
    if(!options[OUTPUT_FILENAME].doesOccur) {
        argc=-1;
    }
    if( argc<2 ||
        options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur
    ) {
        /*
         * Broken into chunks because the C89 standard says the minimum
         * required supported string length is 509 bytes.
         */
        fprintf(stderr,
            "Usage: %s [-options] infiles+ -o outputfilename\n"
            "\n"
            "Reads the infiles with normalization data and\n"
            "creates a binary file (outputfilename) with the data.\n"
            "\n",
            argv[0]);
        fprintf(stderr,
            "Options:\n"
            "\t-h or -? or --help  this usage text\n"
            "\t-v or --verbose     verbose output\n"
            "\t-c or --copyright   include a copyright notice\n"
            "\t-u or --unicode     Unicode version, followed by the version like 5.2.0\n");
        fprintf(stderr,
            "\t-s or --sourcedir   source directory, followed by the path\n"
            "\t-o or --output      output filename\n");
        fprintf(stderr,
            "\t      --fast        optimize the .nrm file for fast normalization,\n"
            "\t                    which might increase its size  (Writes fully decomposed\n"
            "\t                    regular mappings instead of delta mappings.\n"
            "\t                    You should measure the runtime speed to make sure that\n"
            "\t                    this is a good trade-off.)\n");
        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    }

    beVerbose=options[VERBOSE].doesOccur;
    haveCopyright=options[COPYRIGHT].doesOccur;

    IcuToolErrorCode errorCode("gennorm2/main()");

#if UCONFIG_NO_NORMALIZATION

    fprintf(stderr,
        "gennorm2 writes a dummy binary data file "
        "because UCONFIG_NO_NORMALIZATION is set, \n"
        "see icu/source/common/unicode/uconfig.h\n");
    udata_createDummy(NULL, NULL, options[OUTPUT_FILENAME].value, errorCode);
    // Should not return an error since this is the expected behaviour if UCONFIG_NO_NORMALIZATION is on.
    // return U_UNSUPPORTED_ERROR;
    return 0;

#else

    LocalPointer<Normalizer2DataBuilder> builder(new Normalizer2DataBuilder(errorCode));
    errorCode.assertSuccess();

    if(options[UNICODE_VERSION].doesOccur) {
        builder->setUnicodeVersion(options[UNICODE_VERSION].value);
    }

    if(options[OPT_FAST].doesOccur) {
        builder->setOptimization(Normalizer2DataBuilder::OPTIMIZE_FAST);
    }

    // prepare the filename beginning with the source dir
    CharString filename(options[SOURCEDIR].value, errorCode);
    int32_t pathLength=filename.length();
    if( pathLength>0 &&
        filename[pathLength-1]!=U_FILE_SEP_CHAR &&
        filename[pathLength-1]!=U_FILE_ALT_SEP_CHAR
    ) {
        filename.append(U_FILE_SEP_CHAR, errorCode);
        pathLength=filename.length();
    }

    for(int i=1; i<argc; ++i) {
        printf("gennorm2: processing %s\n", argv[i]);
        filename.append(argv[i], errorCode);
        LocalStdioFilePointer f(fopen(filename.data(), "r"));
        if(f==NULL) {
            fprintf(stderr, "gennorm2 error: unable to open %s\n", filename.data());
            exit(U_FILE_ACCESS_ERROR);
        }
        builder->setOverrideHandling(Normalizer2DataBuilder::OVERRIDE_PREVIOUS);
        parseFile(f.getAlias(), *builder);
        filename.truncate(pathLength);
    }

    builder->writeBinaryFile(options[OUTPUT_FILENAME].value);

    return errorCode.get();

#endif
}
Esempio n. 22
0
extern int
main(int argc, char* argv[]) {
#if !UCONFIG_NO_IDNA
    char* filename = NULL;
#endif
    const char *srcDir=NULL, *destDir=NULL, *icuUniDataDir=NULL;
    const char *bundleName=NULL, *inputFileName = NULL;
    char *basename=NULL;
    int32_t sprepOptions = 0;

    UErrorCode errorCode=U_ZERO_ERROR;

    U_MAIN_INIT_ARGS(argc, argv);

    /* preset then read command line options */
    options[DESTDIR].value=u_getDataDirectory();
    options[SOURCEDIR].value="";
    options[UNICODE_VERSION].value="0"; /* don't assume the unicode version */
    options[BUNDLE_NAME].value = DATA_NAME;
    options[NORMALIZE].value = "";

    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr,
            "error in command line argument \"%s\"\n",
            argv[-argc]);
    }
    if(argc<0 || options[HELP].doesOccur || options[HELP_QUESTION_MARK].doesOccur) {
        return printHelp(argc, argv);
        
    }

    /* get the options values */
    beVerbose=options[VERBOSE].doesOccur;
    haveCopyright=options[COPYRIGHT].doesOccur;
    srcDir=options[SOURCEDIR].value;
    destDir=options[DESTDIR].value;
    bundleName = options[BUNDLE_NAME].value;
    if(options[NORMALIZE].doesOccur) {
        icuUniDataDir = options[NORMALIZE].value;
    } else {
        icuUniDataDir = options[NORM_CORRECTION_DIR].value;
    }

    if(argc<2) {
        /* print the help message */
        return printHelp(argc, argv);
    } else {
        inputFileName = argv[1];
    }
    if(!options[UNICODE_VERSION].doesOccur){
        return printHelp(argc, argv);
    }
    if(options[ICUDATADIR].doesOccur) {
        u_setDataDirectory(options[ICUDATADIR].value);
    }
#if UCONFIG_NO_IDNA

    fprintf(stderr,
        "gensprep writes dummy " U_ICUDATA_NAME "_" DATA_NAME "." DATA_TYPE
        " because UCONFIG_NO_IDNA is set, \n"
        "see icu/source/common/unicode/uconfig.h\n");
    generateData(destDir, bundleName);

#else

    setUnicodeVersion(options[UNICODE_VERSION].value);
    filename = (char* ) uprv_malloc(uprv_strlen(srcDir) + 300); /* hopefully this should be enough */
   
    /* prepare the filename beginning with the source dir */
    if(uprv_strchr(srcDir,U_FILE_SEP_CHAR) == NULL && uprv_strchr(srcDir,U_FILE_ALT_SEP_CHAR) == NULL){
        filename[0] = '.';
        filename[1] = U_FILE_SEP_CHAR;
        uprv_strcpy(filename+2,srcDir);
    }else{
        uprv_strcpy(filename, srcDir);
    }
    
    basename=filename+uprv_strlen(filename);
    if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
        *basename++=U_FILE_SEP_CHAR;
    }
    
    /* initialize */
    init();

    /* process the file */
    uprv_strcpy(basename,inputFileName);
    parseMappings(filename,FALSE, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "Could not open file %s for reading. Error: %s \n", filename, u_errorName(errorCode));
        return errorCode;
    }
    
    if(options[NORMALIZE].doesOccur){ /* this option might be set by @normalize;; in the source file */
        /* set up directory for NormalizationCorrections.txt */
        uprv_strcpy(filename,icuUniDataDir);
        basename=filename+uprv_strlen(filename);
        if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
            *basename++=U_FILE_SEP_CHAR;
        }

        *basename++=U_FILE_SEP_CHAR;
        uprv_strcpy(basename,NORM_CORRECTIONS_FILE_NAME);
    
        parseNormalizationCorrections(filename,&errorCode);
        if(U_FAILURE(errorCode)){
            fprintf(stderr,"Could not open file %s for reading \n", filename);
            return errorCode;
        }
        sprepOptions |= _SPREP_NORMALIZATION_ON;
    }
    
    if(options[CHECK_BIDI].doesOccur){ /* this option might be set by @check-bidi;; in the source file */
        sprepOptions |= _SPREP_CHECK_BIDI_ON;
    }

    setOptions(sprepOptions);

    /* process parsed data */
    if(U_SUCCESS(errorCode)) {
        /* write the data file */
        generateData(destDir, bundleName);

        cleanUpData();
    }

    uprv_free(filename);

    u_cleanup();

#endif

    return errorCode;
}
Esempio n. 23
0
extern int
main(int argc, char* argv[]) {
#if !UCONFIG_NO_NORMALIZATION
    char filename[300];
#endif
    const char *srcDir=NULL, *destDir=NULL, *suffix=NULL;
    char *basename=NULL;
    UErrorCode errorCode=U_ZERO_ERROR;

    U_MAIN_INIT_ARGS(argc, argv);

    /* preset then read command line options */
    options[4].value=u_getDataDirectory();
    options[5].value="";
    options[6].value="3.0.0";
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr,
            "error in command line argument \"%s\"\n",
            argv[-argc]);
    }
    if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
        /*
         * Broken into chucks because the C89 standard says the minimum
         * required supported string length is 509 bytes.
         */
        fprintf(stderr,
            "Usage: %s [-options] [suffix]\n"
            "\n"
            "Read the UnicodeData.txt file and other Unicode properties files and\n"
            "create a binary file " U_ICUDATA_NAME "_" DATA_NAME "." DATA_TYPE " with the normalization data\n"
            "\n",
            argv[0]);
        fprintf(stderr,
            "Options:\n"
            "\t-h or -? or --help  this usage text\n"
            "\t-v or --verbose     verbose output\n"
            "\t-c or --copyright   include a copyright notice\n"
            "\t-u or --unicode     Unicode version, followed by the version like 3.0.0\n");
        fprintf(stderr,
            "\t-d or --destdir     destination directory, followed by the path\n"
            "\t-s or --sourcedir   source directory, followed by the path\n"
            "\tsuffix              suffix that is to be appended with a '-'\n"
            "\t                    to the source file basenames before opening;\n"
            "\t                    'gennorm new' will read UnicodeData-new.txt etc.\n");
        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    }

    /* get the options values */
    beVerbose=options[2].doesOccur;
    haveCopyright=options[3].doesOccur;
    srcDir=options[5].value;
    destDir=options[4].value;

    if(argc>=2) {
        suffix=argv[1];
    } else {
        suffix=NULL;
    }

#if UCONFIG_NO_NORMALIZATION

    fprintf(stderr,
        "gennorm writes a dummy " U_ICUDATA_NAME "_" DATA_NAME "." DATA_TYPE
        " because UCONFIG_NO_NORMALIZATION is set, \n"
        "see icu/source/common/unicode/uconfig.h\n");
    generateData(destDir);

#else

    setUnicodeVersion(options[6].value);

    /* prepare the filename beginning with the source dir */
    uprv_strcpy(filename, srcDir);
    basename=filename+uprv_strlen(filename);
    if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
        *basename++=U_FILE_SEP_CHAR;
    }

    /* initialize */
    init();

    /* process DerivedNormalizationProps.txt (name changed for Unicode 3.2, to <=31 characters) */
    if(suffix==NULL) {
        uprv_strcpy(basename, "DerivedNormalizationProps.txt");
    } else {
        uprv_strcpy(basename, "DerivedNormalizationProps");
        basename[30]='-';
        uprv_strcpy(basename+31, suffix);
        uprv_strcat(basename+31, ".txt");
    }
    parseDerivedNormalizationProperties(filename, &errorCode, FALSE);
    if(U_FAILURE(errorCode)) {
        /* can be only U_FILE_ACCESS_ERROR - try filename from before Unicode 3.2 */
        if(suffix==NULL) {
            uprv_strcpy(basename, "DerivedNormalizationProperties.txt");
        } else {
            uprv_strcpy(basename, "DerivedNormalizationProperties");
            basename[30]='-';
            uprv_strcpy(basename+31, suffix);
            uprv_strcat(basename+31, ".txt");
        }
        parseDerivedNormalizationProperties(filename, &errorCode, TRUE);
    }

    /* process UnicodeData.txt */
    if(suffix==NULL) {
        uprv_strcpy(basename, "UnicodeData.txt");
    } else {
        uprv_strcpy(basename, "UnicodeData");
        basename[11]='-';
        uprv_strcpy(basename+12, suffix);
        uprv_strcat(basename+12, ".txt");
    }
    parseDB(filename, &errorCode);

    /* process parsed data */
    if(U_SUCCESS(errorCode)) {
        processData();

        /* write the properties data file */
        generateData(destDir);

        cleanUpData();
    }

#endif

    return errorCode;
}
Esempio n. 24
0
//----------------------------------------------------------------------------
//
//  main      for gendict
//
//----------------------------------------------------------------------------
int  main(int argc, char **argv) {
    //
    // Pick up and check the command line arguments,
    //    using the standard ICU tool utils option handling.
    //
    U_MAIN_INIT_ARGS(argc, argv);
    progName = argv[0];
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
    if(argc<0) {
        // Unrecognized option
        fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]);
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }

    if(options[ARG_HELP].doesOccur || options[ARG_QMARK].doesOccur) {
        //  -? or -h for help.
        usageAndDie(U_ZERO_ERROR);
    }

    UBool verbose = options[ARG_VERBOSE].doesOccur;

    if (argc < 3) {
        fprintf(stderr, "input and output file must both be specified.\n");
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }
    const char *outFileName  = argv[2];
    const char *wordFileName = argv[1];

    startTime = uprv_getRawUTCtime(); // initialize start timer

	if (options[ARG_ICUDATADIR].doesOccur) {
        u_setDataDirectory(options[ARG_ICUDATADIR].value);
    }

    const char *copyright = NULL;
    if (options[ARG_COPYRIGHT].doesOccur) {
        copyright = U_COPYRIGHT_STRING;
    }

    if (options[ARG_UCHARS].doesOccur == options[ARG_BYTES].doesOccur) {
        fprintf(stderr, "you must specify exactly one type of trie to output!\n");
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }
    UBool isBytesTrie = options[ARG_BYTES].doesOccur;
    if (isBytesTrie != options[ARG_TRANSFORM].doesOccur) {
        fprintf(stderr, "you must provide a transformation for a bytes trie, and must not provide one for a uchars trie!\n");
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }

    IcuToolErrorCode status("gendict/main()");

#if UCONFIG_NO_BREAK_ITERATION || UCONFIG_NO_FILE_IO
    const char* outDir=NULL;

    UNewDataMemory *pData;
    char msg[1024];
    UErrorCode tempstatus = U_ZERO_ERROR;

    /* write message with just the name */ // potential for a buffer overflow here...
    sprintf(msg, "gendict writes dummy %s because of UCONFIG_NO_BREAK_ITERATION and/or UCONFIG_NO_FILE_IO, see uconfig.h", outFileName);
    fprintf(stderr, "%s\n", msg);

    /* write the dummy data file */
    pData = udata_create(outDir, NULL, outFileName, &dataInfo, NULL, &tempstatus);
    udata_writeBlock(pData, msg, strlen(msg));
    udata_finish(pData, &tempstatus);
    return (int)tempstatus;

#else
    //  Read in the dictionary source file
    if (verbose) { printf("Opening file %s...\n", wordFileName); }
    const char *codepage = "UTF-8";
    UCHARBUF *f = ucbuf_open(wordFileName, &codepage, TRUE, FALSE, status);
    if (status.isFailure()) {
        fprintf(stderr, "error opening input file: ICU Error \"%s\"\n", status.errorName());
        exit(status.reset());
    }
    if (verbose) { printf("Initializing dictionary builder of type %s...\n", (isBytesTrie ? "BytesTrie" : "UCharsTrie")); }
    DataDict dict(isBytesTrie, status);
    if (status.isFailure()) {
        fprintf(stderr, "new DataDict: ICU Error \"%s\"\n", status.errorName());
        exit(status.reset());
    }
    if (options[ARG_TRANSFORM].doesOccur) {
        dict.setTransform(options[ARG_TRANSFORM].value);
    }

    UnicodeString fileLine;
    if (verbose) { puts("Adding words to dictionary..."); }
    UBool hasValues = FALSE;
    UBool hasValuelessContents = FALSE;
    int lineCount = 0;
    int wordCount = 0;
    int minlen = 255;
    int maxlen = 0;
    UBool isOk = TRUE;
    while (readLine(f, fileLine, status)) {
        lineCount++;
        if (fileLine.isEmpty()) continue;
        
        // Parse word [spaces value].
        int32_t keyLen;
        for (keyLen = 0; keyLen < fileLine.length() && !u_isspace(fileLine[keyLen]); ++keyLen) {}
        if (keyLen == 0) {
            fprintf(stderr, "Error: no word on line %i!\n", lineCount);
            isOk = FALSE;
            continue;
        }
        int32_t valueStart;
        for (valueStart = keyLen;
            valueStart < fileLine.length() && u_isspace(fileLine[valueStart]);
            ++valueStart) {}

        if (keyLen < valueStart) {
            int32_t valueLength = fileLine.length() - valueStart;
            if (valueLength > 15) {
                fprintf(stderr, "Error: value too long on line %i!\n", lineCount);
                isOk = FALSE;
                continue;
            }
            char s[16];
            fileLine.extract(valueStart, valueLength, s, 16, US_INV);
            char *end;
            unsigned long value = uprv_strtoul(s, &end, 0);
            if (end == s || *end != 0 || (int32_t)uprv_strlen(s) != valueLength || value > 0xffffffff) {
                fprintf(stderr, "Error: value syntax error or value too large on line %i!\n", lineCount);
                isOk = FALSE;
                continue;
            }
            dict.addWord(fileLine.tempSubString(0, keyLen), (int32_t)value, status);
            hasValues = TRUE;
            wordCount++;
            if (keyLen < minlen) minlen = keyLen;
            if (keyLen > maxlen) maxlen = keyLen;
        } else {
            dict.addWord(fileLine.tempSubString(0, keyLen), 0, status);
            hasValuelessContents = TRUE;
            wordCount++;
            if (keyLen < minlen) minlen = keyLen;
            if (keyLen > maxlen) maxlen = keyLen;
        }

        if (status.isFailure()) {
            fprintf(stderr, "ICU Error \"%s\": Failed to add word to trie at input line %d in input file\n",
                status.errorName(), lineCount);
            exit(status.reset());
        }
    }
    if (verbose) { printf("Processed %d lines, added %d words, minlen %d, maxlen %d\n", lineCount, wordCount, minlen, maxlen); }

    if (!isOk && status.isSuccess()) {
        status.set(U_ILLEGAL_ARGUMENT_ERROR);
    }
    if (hasValues && hasValuelessContents) {
        fprintf(stderr, "warning: file contained both valued and unvalued strings!\n");
    }

    if (verbose) { printf("Serializing data...isBytesTrie? %d\n", isBytesTrie); }
    int32_t outDataSize;
    const void *outData;
    UnicodeString usp;
    if (isBytesTrie) {
        StringPiece sp = dict.serializeBytes(status);
        outDataSize = sp.size();
        outData = sp.data();
    } else {
        dict.serializeUChars(usp, status);
        outDataSize = usp.length() * U_SIZEOF_UCHAR;
        outData = usp.getBuffer();
    }
    if (status.isFailure()) {
        fprintf(stderr, "gendict: got failure of type %s while serializing, if U_ILLEGAL_ARGUMENT_ERROR possibly due to duplicate dictionary entries\n", status.errorName());
        exit(status.reset());
    }
    if (verbose) { puts("Opening output file..."); }
    UNewDataMemory *pData = udata_create(NULL, NULL, outFileName, &dataInfo, copyright, status);
    if (status.isFailure()) {
        fprintf(stderr, "gendict: could not open output file \"%s\", \"%s\"\n", outFileName, status.errorName());
        exit(status.reset());
    }

    if (verbose) { puts("Writing to output file..."); }
    int32_t indexes[DictionaryData::IX_COUNT] = {
        DictionaryData::IX_COUNT * sizeof(int32_t), 0, 0, 0, 0, 0, 0, 0
    };
    int32_t size = outDataSize + indexes[DictionaryData::IX_STRING_TRIE_OFFSET];
    indexes[DictionaryData::IX_RESERVED1_OFFSET] = size;
    indexes[DictionaryData::IX_RESERVED2_OFFSET] = size;
    indexes[DictionaryData::IX_TOTAL_SIZE] = size;

    indexes[DictionaryData::IX_TRIE_TYPE] = isBytesTrie ? DictionaryData::TRIE_TYPE_BYTES : DictionaryData::TRIE_TYPE_UCHARS;
    if (hasValues) {
        indexes[DictionaryData::IX_TRIE_TYPE] |= DictionaryData::TRIE_HAS_VALUES;
    }

    indexes[DictionaryData::IX_TRANSFORM] = dict.getTransform();
    udata_writeBlock(pData, indexes, sizeof(indexes));
    udata_writeBlock(pData, outData, outDataSize);
    size_t bytesWritten = udata_finish(pData, status);
    if (status.isFailure()) {
        fprintf(stderr, "gendict: error \"%s\" writing the output file\n", status.errorName());
        exit(status.reset());
    }

    if (bytesWritten != (size_t)size) {
        fprintf(stderr, "Error writing to output file \"%s\"\n", outFileName);
        exit(U_INTERNAL_PROGRAM_ERROR);
    }

    printf("%s: done writing\t%s (%ds).\n", progName, outFileName, elapsedTime());

#ifdef TEST_GENDICT
    if (isBytesTrie) {
        BytesTrie::Iterator it(outData, outDataSize, status);
        while (it.hasNext()) {
            it.next(status);
            const StringPiece s = it.getString();
            int32_t val = it.getValue();
            printf("%s -> %i\n", s.data(), val);
        }
    } else {
        UCharsTrie::Iterator it((const UChar *)outData, outDataSize, status);
        while (it.hasNext()) {
            it.next(status);
            const UnicodeString s = it.getString();
            int32_t val = it.getValue();
            char tmp[1024];
            s.extract(0, s.length(), tmp, 1024);
            printf("%s -> %i\n", tmp, val);
        }
    }
#endif

    return 0;
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
}
Esempio n. 25
0
//----------------------------------------------------------------------------
//
//  main      for gencfu
//
//----------------------------------------------------------------------------
int  main(int argc, char **argv) {
    UErrorCode  status = U_ZERO_ERROR;
    const char *confFileName;
    const char *confWSFileName;
    const char *outFileName;
    const char *outDir = NULL;
    const char *copyright = NULL;

    //
    // Pick up and check the command line arguments,
    //    using the standard ICU tool utils option handling.
    //
    U_MAIN_INIT_ARGS(argc, argv);
    progName = argv[0];
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
    if(argc<0) {
        // Unrecognized option
        fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]);
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }

    if(options[0].doesOccur || options[1].doesOccur) {
        //  -? or -h for help.
        usageAndDie(0);
    }

    if (!(options[3].doesOccur && options[4].doesOccur && options[5].doesOccur)) {
        fprintf(stderr, "confusables file, whole script confusables file and output file must all be specified.\n");
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }
    confFileName   = options[3].value;
    confWSFileName = options[4].value;
    outFileName    = options[5].value;

    if (options[6].doesOccur) {
        u_setDataDirectory(options[6].value);
    }

    status = U_ZERO_ERROR;

    /* Combine the directory with the file name */
    if(options[7].doesOccur) {
        outDir = options[7].value;
    }
    if (options[8].doesOccur) {
        copyright = U_COPYRIGHT_STRING;
    }

#if UCONFIG_NO_REGULAR_EXPRESSIONS || UCONFIG_NO_NORMALIZATION || UCONFIG_NO_FILE_IO
    // spoof detection data file parsing is dependent on regular expressions.
    // TODO: have the tool return an error status.  Requires fixing the ICU data build
    //       so that it doesn't abort entirely on that error.

    UNewDataMemory *pData;
    char msg[1024];

    /* write message with just the name */
    sprintf(msg, "gencfu writes dummy %s because of UCONFIG_NO_REGULAR_EXPRESSIONS and/or UCONFIG_NO_NORMALIZATION and/or UCONFIG_NO_FILE_IO, see uconfig.h", outFileName);
    fprintf(stderr, "%s\n", msg);

    /* write the dummy data file */
    pData = udata_create(outDir, NULL, outFileName, &dummyDataInfo, NULL, &status);
    udata_writeBlock(pData, msg, strlen(msg));
    udata_finish(pData, &status);
    return (int)status;

#else
    /* Initialize ICU */
    u_init(&status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
            argv[0], u_errorName(status));
        exit(1);
    }
    status = U_ZERO_ERROR;

    //  Read in the confusables source file

    int32_t      confusablesLen = 0;
    const char  *confusables = readFile(confFileName, &confusablesLen);
    if (confusables == NULL) {
        printf("gencfu: error reading file  \"%s\"\n", confFileName);
        exit(-1);
    }

    int32_t     wsConfusablesLen = 0;
    const char *wsConfsables =  readFile(confWSFileName, &wsConfusablesLen);
    if (wsConfsables == NULL) {
        printf("gencfu: error reading file  \"%s\"\n", confFileName);
        exit(-1);
    }

    //
    //  Create the Spoof Detector from the source confusables files.
    //     This will compile the data.
    //
    UParseError parseError;
    parseError.line = 0;
    parseError.offset = 0;
    int32_t errType;
    USpoofChecker *sc = uspoof_openFromSource(confusables, confusablesLen,
                                              wsConfsables, wsConfusablesLen,
                                              &errType, &parseError, &status);
    if (U_FAILURE(status)) {
        const char *errFile = 
            (errType == USPOOF_WHOLE_SCRIPT_CONFUSABLE)? confWSFileName : confFileName;
        fprintf(stderr, "gencfu: uspoof_openFromSource error \"%s\"  at file %s, line %d, column %d\n",
                u_errorName(status), errFile, (int)parseError.line, (int)parseError.offset);
        exit(status);
    };


    //
    //  Get the compiled rule data from the USpoofChecker.
    //
    uint32_t        outDataSize;
    uint8_t        *outData;
    outDataSize = uspoof_serialize(sc, NULL, 0, &status);
    if (status != U_BUFFER_OVERFLOW_ERROR) {
        fprintf(stderr, "gencfu: uspoof_serialize() returned %s\n", u_errorName(status));
        exit(status);
    }
    status = U_ZERO_ERROR;
    outData = new uint8_t[outDataSize];
    uspoof_serialize(sc, outData, outDataSize, &status);

    // Copy the data format version numbers from the spoof data header into the UDataMemory header.
    
    uprv_memcpy(dh.info.formatVersion, 
                reinterpret_cast<SpoofDataHeader *>(outData)->fFormatVersion,
                sizeof(dh.info.formatVersion));

    //
    //  Create the output file
    //
    size_t bytesWritten;
    UNewDataMemory *pData;
    pData = udata_create(outDir, NULL, outFileName, &(dh.info), copyright, &status);
    if(U_FAILURE(status)) {
        fprintf(stderr, "gencfu: Could not open output file \"%s\", \"%s\"\n", 
                         outFileName, u_errorName(status));
        exit(status);
    }


    //  Write the data itself.
    udata_writeBlock(pData, outData, outDataSize);
    // finish up 
    bytesWritten = udata_finish(pData, &status);
    if(U_FAILURE(status)) {
        fprintf(stderr, "gencfu: Error %d writing the output file\n", status);
        exit(status);
    }
    
    if (bytesWritten != outDataSize) {
        fprintf(stderr, "gencfu: Error writing to output file \"%s\"\n", outFileName);
        exit(-1);
    }

    uspoof_close(sc);
    delete [] outData;
    delete [] confusables;
    delete [] wsConfsables;
    u_cleanup();
    printf("gencfu: tool completed successfully.\n");
    return 0;
#endif   // UCONFIG_NO_REGULAR_EXPRESSIONS
}
extern int
main(int argc, char* argv[]) {
    static char buffer[4096];
    char line[512];
    FileStream *in, *file;
    char *s;
    UErrorCode errorCode=U_ZERO_ERROR;
    uint32_t i, fileOffset, basenameOffset, length, nread;
    UBool sourceTOC, verbose;
    const char *entrypointName = NULL;

    U_MAIN_INIT_ARGS(argc, argv);

    /* preset then read command line options */
    options[4].value=u_getDataDirectory();
    options[6].value=COMMON_DATA_NAME;
    options[7].value=DATA_TYPE;
    options[10].value=".";
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr,
            "error in command line argument \"%s\"\n",
            argv[-argc]);
    } else if(argc<2) {
        argc=-1;
    }

    if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
        FILE *where = argc < 0 ? stderr : stdout;
        
        /*
         * Broken into chucks because the C89 standard says the minimum
         * required supported string length is 509 bytes.
         */
        fprintf(where,
                "%csage: %s [ -h, -?, --help ] [ -v, --verbose ] [ -c, --copyright ] [ -C, --comment comment ] [ -d, --destdir dir ] [ -n, --name filename ] [ -t, --type filetype ] [ -S, --source tocfile ] [ -e, --entrypoint name ] maxsize listfile\n", argc < 0 ? 'u' : 'U', *argv);
        if (options[0].doesOccur || options[1].doesOccur) {
            fprintf(where, "\n"
                "Read the list file (default: standard input) and create a common data\n"
                "file from specified files. Omit any files larger than maxsize, if maxsize > 0.\n");
            fprintf(where, "\n"
            "Options:\n"
            "\t-h, -?, --help              this usage text\n"
            "\t-v, --verbose               verbose output\n"
            "\t-c, --copyright             include the ICU copyright notice\n"
            "\t-C, --comment comment       include a comment string\n"
            "\t-d, --destdir dir           destination directory\n");
            fprintf(where,
            "\t-n, --name filename         output filename, without .type extension\n"
            "\t                            (default: " COMMON_DATA_NAME ")\n"
            "\t-t, --type filetype         type of the destination file\n"
            "\t                            (default: \"" DATA_TYPE "\")\n"
            "\t-S, --source tocfile        write a .c source file with the table of\n"
            "\t                            contents\n"
            "\t-e, --entrypoint name       override the c entrypoint name\n"
            "\t                            (default: \"<name>_<type>\")\n");
        }
        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    }

    sourceTOC=options[8].doesOccur;

    verbose = options[2].doesOccur;

    maxSize=(uint32_t)uprv_strtoul(argv[1], NULL, 0);

    if(argc==2) {
        in=T_FileStream_stdin();
    } else {
        in=T_FileStream_open(argv[2], "r");
        if(in==NULL) {
            fprintf(stderr, "gencmn: unable to open input file %s\n", argv[2]);
            exit(U_FILE_ACCESS_ERROR);
        }
    }

    if (verbose) {
        if(sourceTOC) {
            printf("generating %s_%s.c (table of contents source file)\n", options[6].value, options[7].value);
        } else {
            printf("generating %s.%s (common data file with table of contents)\n", options[6].value, options[7].value);
        }
    }

    /* read the list of files and get their lengths */
    while(T_FileStream_readLine(in, line, sizeof(line))!=NULL) {
        /* remove trailing newline characters */
        s=line;
        while(*s!=0) {
            if(*s=='\r' || *s=='\n') {
                *s=0;
                break;
            }
            ++s;
        }

        /* check for comment */

        if (*line == '#') {
            continue;
        }

        /* add the file */
#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
        {
          char *t;
          while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) {
            *t = U_FILE_SEP_CHAR;
          }
        }
#endif
        addFile(getLongPathname(line), sourceTOC, verbose);
    }

    if(in!=T_FileStream_stdin()) {
        T_FileStream_close(in);
    }

    if(fileCount==0) {
        fprintf(stderr, "gencmn: no files listed in %s\n", argc==2 ? "<stdin>" : argv[2]);
        return 0;
    }

    /* sort the files by basename */
    qsort(files, fileCount, sizeof(File), compareFiles);

    if(!sourceTOC) {
        UNewDataMemory *out;

        /* determine the offsets of all basenames and files in this common one */
        basenameOffset=4+8*fileCount;
        fileOffset=(basenameOffset+(basenameTotal+15))&~0xf;
        for(i=0; i<fileCount; ++i) {
            files[i].fileOffset=fileOffset;
            fileOffset+=(files[i].fileSize+15)&~0xf;
            files[i].basenameOffset=basenameOffset;
            basenameOffset+=files[i].basenameLength;
        }

        /* create the output file */
        out=udata_create(options[4].value, options[7].value, options[6].value,
                         &dataInfo,
                         options[3].doesOccur ? U_COPYRIGHT_STRING : options[5].value,
                         &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n",
                options[4].value, options[6].value, options[7].value,
                u_errorName(errorCode));
            exit(errorCode);
        }

        /* write the table of contents */
        udata_write32(out, fileCount);
        for(i=0; i<fileCount; ++i) {
            udata_write32(out, files[i].basenameOffset);
            udata_write32(out, files[i].fileOffset);
        }

        /* write the basenames */
        for(i=0; i<fileCount; ++i) {
            udata_writeString(out, files[i].basename, files[i].basenameLength);
        }
        length=4+8*fileCount+basenameTotal;

        /* copy the files */
        for(i=0; i<fileCount; ++i) {
            /* pad to 16-align the next file */
            length&=0xf;
            if(length!=0) {
                udata_writePadding(out, 16-length);
            }

            if (verbose) {
                printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
            }

            /* copy the next file */
            file=T_FileStream_open(files[i].pathname, "rb");
            if(file==NULL) {
                fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname);
                exit(U_FILE_ACCESS_ERROR);
            }
            for(nread = 0;;) {
                length=T_FileStream_read(file, buffer, sizeof(buffer));
                if(length <= 0) {
                    break;
                }
                nread += length;
                udata_writeBlock(out, buffer, length);
            }
            T_FileStream_close(file);
            length=files[i].fileSize;

            if (nread != files[i].fileSize) {
              fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname,  (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
                exit(U_FILE_ACCESS_ERROR);
            }
        }

        /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */
        length&=0xf;
        if(length!=0) {
            udata_writePadding(out, 16-length);
        }

        /* finish */
        udata_finish(out, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode));
            exit(errorCode);
        }
    } else {
        /* write a .c source file with the table of contents */
        char *filename;
        FileStream *out;

        /* create the output filename */
        filename=s=buffer;
        uprv_strcpy(filename, options[4].value);
        s=filename+uprv_strlen(filename);
        if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) {
            *s++=U_FILE_SEP_CHAR;
        }
        uprv_strcpy(s, options[6].value);
        if(*(options[7].value)!=0) {
            s+=uprv_strlen(s);
            *s++='_';
            uprv_strcpy(s, options[7].value);
        }
        s+=uprv_strlen(s);
        uprv_strcpy(s, ".c");

        /* open the output file */
        out=T_FileStream_open(filename, "w");
        if(out==NULL) {
            fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename);
            exit(U_FILE_ACCESS_ERROR);
        }

        /* If an entrypoint is specified, use it. */
        if(options[9].doesOccur) {
            entrypointName = options[9].value;
        } else {
            entrypointName = options[6].value;
        }


        /* write the source file */
        sprintf(buffer,
            "/*\n"
            " * ICU common data table of contents for %s.%s ,\n"
            " * Automatically generated by icu/source/tools/gencmn/gencmn .\n"
            " */\n\n"
            "#include \"unicode/utypes.h\"\n"
            "#include \"unicode/udata.h\"\n"
            "\n"
            "/* external symbol declarations for data */\n",
            options[6].value, options[7].value);
        T_FileStream_writeLine(out, buffer);

        sprintf(buffer, "extern const char\n    %s%s[]", symPrefix?symPrefix:"", files[0].pathname);
        T_FileStream_writeLine(out, buffer);
        for(i=1; i<fileCount; ++i) {
            sprintf(buffer, ",\n    %s%s[]", symPrefix?symPrefix:"", files[i].pathname);
            T_FileStream_writeLine(out, buffer);
        }
        T_FileStream_writeLine(out, ";\n\n");

        sprintf(
            buffer,
            "U_EXPORT struct {\n"
            "    uint16_t headerSize;\n"
            "    uint8_t magic1, magic2;\n"
            "    UDataInfo info;\n"
            "    char padding[%lu];\n"
            "    uint32_t count, reserved;\n"
            "    struct {\n"
            "        const char *name;\n"
            "        const void *data;\n"
            "    } toc[%lu];\n"
            "} U_EXPORT2 %s_dat = {\n"
            "    32, 0xda, 0x27, {\n"
            "        %lu, 0,\n"
            "        %u, %u, %u, 0,\n"
            "        {0x54, 0x6f, 0x43, 0x50},\n"
            "        {1, 0, 0, 0},\n"
            "        {0, 0, 0, 0}\n"
            "    },\n"
            "    \"\", %lu, 0, {\n",
            (unsigned long)32-4-sizeof(UDataInfo),
            (unsigned long)fileCount,
            entrypointName,
            (unsigned long)sizeof(UDataInfo),
            U_IS_BIG_ENDIAN,
            U_CHARSET_FAMILY,
            U_SIZEOF_UCHAR,
            (unsigned long)fileCount
        );
        T_FileStream_writeLine(out, buffer);

        sprintf(buffer, "        { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname);
        T_FileStream_writeLine(out, buffer);
        for(i=1; i<fileCount; ++i) {
            sprintf(buffer, ",\n        { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname);
            T_FileStream_writeLine(out, buffer);
        }

        T_FileStream_writeLine(out, "\n    }\n};\n");
        T_FileStream_close(out);

        uprv_free(symPrefix);
    }

    return 0;
}