Пример #1
0
extern int
main(int argc, char* argv[]) {
#if !UCONFIG_NO_IDNA
    char* filename = NULL;
#endif
    const char *srcDir=NULL, *destDir=NULL, *icuUniDataDir=NULL;
    const char *bundleName=NULL, *inputFileName = NULL;
    char *basename=NULL;
    int32_t sprepOptions = 0;

    UErrorCode errorCode=U_ZERO_ERROR;

    U_MAIN_INIT_ARGS(argc, argv);

    /* preset then read command line options */
    options[DESTDIR].value=u_getDataDirectory();
    options[SOURCEDIR].value="";
    options[UNICODE_VERSION].value="0"; /* don't assume the unicode version */
    options[BUNDLE_NAME].value = DATA_NAME;
    options[NORMALIZE].value = "";

    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr,
            "error in command line argument \"%s\"\n",
            argv[-argc]);
    }
    if(argc<0 || options[HELP].doesOccur || options[HELP_QUESTION_MARK].doesOccur) {
        return printHelp(argc, argv);
        
    }

    /* get the options values */
    beVerbose=options[VERBOSE].doesOccur;
    haveCopyright=options[COPYRIGHT].doesOccur;
    srcDir=options[SOURCEDIR].value;
    destDir=options[DESTDIR].value;
    bundleName = options[BUNDLE_NAME].value;
    if(options[NORMALIZE].doesOccur) {
        icuUniDataDir = options[NORMALIZE].value;
    } else {
        icuUniDataDir = options[NORM_CORRECTION_DIR].value;
    }

    if(argc<2) {
        /* print the help message */
        return printHelp(argc, argv);
    } else {
        inputFileName = argv[1];
    }
    if(!options[UNICODE_VERSION].doesOccur){
        return printHelp(argc, argv);
    }
    if(options[ICUDATADIR].doesOccur) {
        u_setDataDirectory(options[ICUDATADIR].value);
    }
#if UCONFIG_NO_IDNA

    fprintf(stderr,
        "gensprep writes dummy " U_ICUDATA_NAME "_" DATA_NAME "." DATA_TYPE
        " because UCONFIG_NO_IDNA is set, \n"
        "see icu/source/common/unicode/uconfig.h\n");
    generateData(destDir, bundleName);

#else

    setUnicodeVersion(options[UNICODE_VERSION].value);
    filename = (char* ) uprv_malloc(uprv_strlen(srcDir) + 300); /* hopefully this should be enough */
   
    /* prepare the filename beginning with the source dir */
    if(uprv_strchr(srcDir,U_FILE_SEP_CHAR) == NULL && uprv_strchr(srcDir,U_FILE_ALT_SEP_CHAR) == NULL){
        filename[0] = '.';
        filename[1] = U_FILE_SEP_CHAR;
        uprv_strcpy(filename+2,srcDir);
    }else{
        uprv_strcpy(filename, srcDir);
    }
    
    basename=filename+uprv_strlen(filename);
    if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
        *basename++=U_FILE_SEP_CHAR;
    }
    
    /* initialize */
    init();

    /* process the file */
    uprv_strcpy(basename,inputFileName);
    parseMappings(filename,FALSE, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "Could not open file %s for reading. Error: %s \n", filename, u_errorName(errorCode));
        return errorCode;
    }
    
    if(options[NORMALIZE].doesOccur){ /* this option might be set by @normalize;; in the source file */
        /* set up directory for NormalizationCorrections.txt */
        uprv_strcpy(filename,icuUniDataDir);
        basename=filename+uprv_strlen(filename);
        if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
            *basename++=U_FILE_SEP_CHAR;
        }

        *basename++=U_FILE_SEP_CHAR;
        uprv_strcpy(basename,NORM_CORRECTIONS_FILE_NAME);
    
        parseNormalizationCorrections(filename,&errorCode);
        if(U_FAILURE(errorCode)){
            fprintf(stderr,"Could not open file %s for reading \n", filename);
            return errorCode;
        }
        sprepOptions |= _SPREP_NORMALIZATION_ON;
    }
    
    if(options[CHECK_BIDI].doesOccur){ /* this option might be set by @check-bidi;; in the source file */
        sprepOptions |= _SPREP_CHECK_BIDI_ON;
    }

    setOptions(sprepOptions);

    /* process parsed data */
    if(U_SUCCESS(errorCode)) {
        /* write the data file */
        generateData(destDir, bundleName);

        cleanUpData();
    }

    uprv_free(filename);

#endif

    return errorCode;
}
Пример #2
0
Bool
CodeSet_Init(const char *icuDataDir) // IN: ICU data file location in Current code page.
                                     //     Default is used if NULL.
{
#ifdef NO_ICU
   /* Nothing required if not using ICU. */
   return TRUE;
#else // NO_ICU
   DynBuf dbpath;
#ifdef _WIN32
   DWORD attribs;
   utf16_t *modPath = NULL;
   utf16_t *lastSlash;
   utf16_t *wpath;
   HANDLE hFile = INVALID_HANDLE_VALUE;
   HANDLE hMapping = NULL;
   void *memMappedData = NULL;
#else
   struct stat finfo;
#endif
   char *path = NULL;
   Bool ret = FALSE;

   DynBuf_Init(&dbpath);

#ifdef USE_ICU
   /*
    * We're using system ICU, which finds its own data. So nothing to
    * do here.
    */
   dontUseIcu = FALSE;
   ret = TRUE;
   goto exit;
#endif

  /*
   * ********************* WARNING
   * Must avoid recursive calls into the codeset library here, hence
   * the idiotic hoop-jumping. DO NOT change any of these calls to
   * wrapper equivalents or call any other functions that may perform
   * string conversion.
   * ********************* WARNING
   */

#ifdef _WIN32 // {

#if vmx86_devel && !defined(TEST_CUSTOM_ICU_DATA_FILE)
   /*
    * Devel builds use toolchain directory first.
    */
   {
      WCHAR icuFilePath[MAX_PATH] = { 0 };
      DWORD n = ExpandEnvironmentStringsW(ICU_DATA_FILE_PATH,
                                          icuFilePath, ARRAYSIZE(icuFilePath));
      if (n > 0 && n < ARRAYSIZE(icuFilePath)) {
         attribs = GetFileAttributesW(icuFilePath);
         if ((INVALID_FILE_ATTRIBUTES != attribs) ||
             (attribs & FILE_ATTRIBUTE_DIRECTORY) == 0) {
            if (!CodeSetOld_Utf16leToCurrent((const char *) icuFilePath,
                                             n * sizeof *icuFilePath,
                                             &path, NULL)) {
               goto exit;
            }
            goto found;
         }
      }
   }
#endif

   if (icuDataDir) {
      /*
       * Data file must be in the specified directory.
       */
      size_t length = strlen(icuDataDir);

      if (!DynBuf_Append(&dbpath, icuDataDir, length)) {
         goto exit;
      }
      if (length && icuDataDir[length - 1] != DIRSEPC) {
         if (!DynBuf_Append(&dbpath, DIRSEPS, strlen(DIRSEPS))) {
            goto exit;
         }
      }
      if (!DynBuf_Append(&dbpath, ICU_DATA_FILE, strlen(ICU_DATA_FILE)) ||
          !DynBuf_Append(&dbpath, "\0", 1)) {
         goto exit;
      }

      /*
       * Check for file existence.
       */
      attribs = GetFileAttributesA(DynBuf_Get(&dbpath));

      if ((INVALID_FILE_ATTRIBUTES == attribs) ||
          (attribs & FILE_ATTRIBUTE_DIRECTORY)) {
         goto exit;
      }

      path = (char *) DynBuf_Detach(&dbpath);
   } else {
      /*
       * Data file must be in the directory of the current module
       * (i.e. the module that contains CodeSet_Init()).
       */
      HMODULE hModule = W32Util_GetModuleByAddress((void *) CodeSet_Init);
      if (!hModule) {
         goto exit;
      }

      modPath = CodeSetGetModulePath(hModule);
      if (!modPath) {
         goto exit;
      }

      lastSlash = wcsrchr(modPath, DIRSEPC_W);
      if (!lastSlash) {
         goto exit;
      }

      *lastSlash = L'\0';

      if (!DynBuf_Append(&dbpath, modPath,
                         wcslen(modPath) * sizeof(utf16_t)) ||
          !DynBuf_Append(&dbpath, DIRSEPS_W,
                         wcslen(DIRSEPS_W) * sizeof(utf16_t)) ||
          !DynBuf_Append(&dbpath, ICU_DATA_FILE_W,
                         wcslen(ICU_DATA_FILE_W) * sizeof(utf16_t)) ||
          !DynBuf_Append(&dbpath, L"\0", 2)) {
         goto exit;
      }

      /*
       * Since u_setDataDirectory can't handle UTF-16, we would have to
       * now convert this path to local encoding. But that fails when
       * the module is in a path containing characters not in the
       * local encoding (see 282524). So we'll memory-map the file
       * instead and call udata_setCommonData() below.
       */
      wpath = (utf16_t *) DynBuf_Get(&dbpath);
      hFile = CreateFileW(wpath, GENERIC_READ, 0, NULL, OPEN_EXISTING, 0,
                          NULL);
      if (INVALID_HANDLE_VALUE == hFile) {
         goto exit;
      }
      hMapping = CreateFileMapping(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
      if (NULL == hMapping) {
         goto exit;
      }
      memMappedData = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, 0);
      if (NULL == memMappedData) {
         goto exit;
      }
   }

#else // } _WIN32 {

#if vmx86_devel && !defined(TEST_CUSTOM_ICU_DATA_FILE)
   {
      char *modPath;
      char *lastSlash;

      /*
       * Devel builds use toolchain directory first.
       */
      if (stat(ICU_DATA_FILE_PATH, &finfo) >= 0 && !S_ISDIR(finfo.st_mode)) {
         if ((path = strdup(ICU_DATA_FILE_PATH)) == NULL) {
            goto exit;
         }
         goto found;
      }

      /*
       * Then we try module directory, if we can get it.
       */
      modPath = CodeSetGetModulePath(HGMP_PRIVILEGE);
      if (modPath) {
         lastSlash = strrchr(modPath, DIRSEPC);
         if (lastSlash) {
            *lastSlash = '\0';

            if (DynBuf_Append(&dbpath, modPath, strlen(modPath)) &&
                DynBuf_Append(&dbpath, DIRSEPS, strlen(DIRSEPS)) &&
                DynBuf_Append(&dbpath, ICU_DATA_FILE,
                              strlen(ICU_DATA_FILE)) &&
                DynBuf_Append(&dbpath, "\0", 1)) {

               if ((stat((const char *) DynBuf_Get(&dbpath), &finfo) >= 0) &&
                   !S_ISDIR(finfo.st_mode)) {
                  free(modPath);
                  path = DynBuf_Detach(&dbpath);
                  goto found;
               } else {
                  DynBuf_SetSize(&dbpath, 0);
               }
            }
         }

         free(modPath);
      }
   }
#endif // vmx86_devel

   if (icuDataDir) {
      /* Use the caller-specified ICU data dir. */
      if (!DynBuf_Append(&dbpath, icuDataDir, strlen(icuDataDir))) {
         goto exit;
      }
   } else {
      /* Use a default ICU data dir. */
#   if defined __APPLE__
      Location_GetLibrary_Type *Location_GetLibrary =
         Location_GetLibrary_Addr();

      if (Location_GetLibrary) {
         char *libDir = Location_GetLibrary();
         Bool success =    libDir
                        && DynBuf_Append(&dbpath, libDir, strlen(libDir));

         free(libDir);
         if (!success) {
            goto exit;
         }
      } else
#   endif

      {
         if (!DynBuf_Append(&dbpath, POSIX_ICU_DIR, strlen(POSIX_ICU_DIR))) {
            goto exit;
         }
      }

      if (!DynBuf_Append(&dbpath, "/icu", strlen("/icu"))) {
         goto exit;
      }
   }
   if (!DynBuf_Append(&dbpath, DIRSEPS, strlen(DIRSEPS)) ||
       !DynBuf_Append(&dbpath, ICU_DATA_FILE, strlen(ICU_DATA_FILE)) ||
       !DynBuf_Append(&dbpath, "\0", 1)) {
      goto exit;
   }

   /*
    * Check for file existence. (DO NOT CHANGE TO 'stat' WRAPPER).
    */
   path = (char *) DynBuf_Detach(&dbpath);
   if (stat(path, &finfo) < 0 || S_ISDIR(finfo.st_mode)) {
      goto exit;
   }

#endif // } _WIN32

#if vmx86_devel && !defined(TEST_CUSTOM_ICU_DATA_FILE)
found:
#endif

#ifdef _WIN32
   if (memMappedData) {
      /*
       * Tell ICU to use this mapped data.
       */
      UErrorCode uerr = U_ZERO_ERROR;
      ASSERT(memMappedData);

      udata_setCommonData(memMappedData, &uerr);
      if (uerr != U_ZERO_ERROR) {
         UnmapViewOfFile(memMappedData);
         goto exit;
      }
      udata_setAppData(ICU_DATA_ITEM, memMappedData, &uerr);
      if (uerr != U_ZERO_ERROR) {
         UnmapViewOfFile(memMappedData);
         goto exit;
      }
   } else {
#endif
      /*
       * Tell ICU to use this directory.
       */
      u_setDataDirectory(path);
#ifdef _WIN32
   }
#endif

   dontUseIcu = FALSE;
   ret = TRUE;

  exit:
   if (!ret) {
      /*
       * There was an error initing ICU, but if we can fall back on
       * non-ICU (old CodeSet) then things are OK.
       */
      if (CODESET_CAN_FALLBACK_ON_NON_ICU) {
         ret = TRUE;
         dontUseIcu = TRUE;

#ifdef _WIN32
         OutputDebugStringW(L"CodeSet_Init: no ICU\n");
#endif
      }
   }

#ifdef _WIN32
   free(modPath);
   if (hMapping) {
      CloseHandle(hMapping);
   }
   if (hFile != INVALID_HANDLE_VALUE) {
      CloseHandle(hFile);
   }
#endif
   free(path);
   DynBuf_Destroy(&dbpath);

   return ret;
#endif
}
Пример #3
0
//----------------------------------------------------------------------------
//
//  main      for gencfu
//
//----------------------------------------------------------------------------
int  main(int argc, char **argv) {
    UErrorCode  status = U_ZERO_ERROR;
    const char *confFileName;
    const char *confWSFileName;
    const char *outFileName;
    const char *outDir = NULL;
    const char *copyright = NULL;

    //
    // Pick up and check the command line arguments,
    //    using the standard ICU tool utils option handling.
    //
    U_MAIN_INIT_ARGS(argc, argv);
    progName = argv[0];
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
    if(argc<0) {
        // Unrecognized option
        fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]);
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }

    if(options[0].doesOccur || options[1].doesOccur) {
        //  -? or -h for help.
        usageAndDie(0);
    }

    if (!(options[3].doesOccur && options[4].doesOccur && options[5].doesOccur)) {
        fprintf(stderr, "confusables file, whole script confusables file and output file must all be specified.\n");
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }
    confFileName   = options[3].value;
    confWSFileName = options[4].value;
    outFileName    = options[5].value;

    if (options[6].doesOccur) {
        u_setDataDirectory(options[6].value);
    }

    status = U_ZERO_ERROR;

    /* Combine the directory with the file name */
    if(options[7].doesOccur) {
        outDir = options[7].value;
    }
    if (options[8].doesOccur) {
        copyright = U_COPYRIGHT_STRING;
    }

#if UCONFIG_NO_REGULAR_EXPRESSIONS || UCONFIG_NO_NORMALIZATION || UCONFIG_NO_FILE_IO
    // spoof detection data file parsing is dependent on regular expressions.
    // TODO: have the tool return an error status.  Requires fixing the ICU data build
    //       so that it doesn't abort entirely on that error.

    UNewDataMemory *pData;
    char msg[1024];

    /* write message with just the name */
    sprintf(msg, "gencfu writes dummy %s because of UCONFIG_NO_REGULAR_EXPRESSIONS and/or UCONFIG_NO_NORMALIZATION and/or UCONFIG_NO_FILE_IO, see uconfig.h", outFileName);
    fprintf(stderr, "%s\n", msg);

    /* write the dummy data file */
    pData = udata_create(outDir, NULL, outFileName, &dummyDataInfo, NULL, &status);
    udata_writeBlock(pData, msg, strlen(msg));
    udata_finish(pData, &status);
    return (int)status;

#else
    /* Initialize ICU */
    u_init(&status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
            argv[0], u_errorName(status));
        exit(1);
    }
    status = U_ZERO_ERROR;

    //  Read in the confusables source file

    int32_t      confusablesLen = 0;
    const char  *confusables = readFile(confFileName, &confusablesLen);
    if (confusables == NULL) {
        printf("gencfu: error reading file  \"%s\"\n", confFileName);
        exit(-1);
    }

    int32_t     wsConfusablesLen = 0;
    const char *wsConfsables =  readFile(confWSFileName, &wsConfusablesLen);
    if (wsConfsables == NULL) {
        printf("gencfu: error reading file  \"%s\"\n", confFileName);
        exit(-1);
    }

    //
    //  Create the Spoof Detector from the source confusables files.
    //     This will compile the data.
    //
    UParseError parseError;
    parseError.line = 0;
    parseError.offset = 0;
    int32_t errType;
    USpoofChecker *sc = uspoof_openFromSource(confusables, confusablesLen,
                                              wsConfsables, wsConfusablesLen,
                                              &errType, &parseError, &status);
    if (U_FAILURE(status)) {
        const char *errFile = 
            (errType == USPOOF_WHOLE_SCRIPT_CONFUSABLE)? confWSFileName : confFileName;
        fprintf(stderr, "gencfu: uspoof_openFromSource error \"%s\"  at file %s, line %d, column %d\n",
                u_errorName(status), errFile, (int)parseError.line, (int)parseError.offset);
        exit(status);
    };


    //
    //  Get the compiled rule data from the USpoofChecker.
    //
    uint32_t        outDataSize;
    uint8_t        *outData;
    outDataSize = uspoof_serialize(sc, NULL, 0, &status);
    if (status != U_BUFFER_OVERFLOW_ERROR) {
        fprintf(stderr, "gencfu: uspoof_serialize() returned %s\n", u_errorName(status));
        exit(status);
    }
    status = U_ZERO_ERROR;
    outData = new uint8_t[outDataSize];
    uspoof_serialize(sc, outData, outDataSize, &status);

    // Copy the data format version numbers from the spoof data header into the UDataMemory header.
    
    uprv_memcpy(dh.info.formatVersion, 
                reinterpret_cast<SpoofDataHeader *>(outData)->fFormatVersion,
                sizeof(dh.info.formatVersion));

    //
    //  Create the output file
    //
    size_t bytesWritten;
    UNewDataMemory *pData;
    pData = udata_create(outDir, NULL, outFileName, &(dh.info), copyright, &status);
    if(U_FAILURE(status)) {
        fprintf(stderr, "gencfu: Could not open output file \"%s\", \"%s\"\n", 
                         outFileName, u_errorName(status));
        exit(status);
    }


    //  Write the data itself.
    udata_writeBlock(pData, outData, outDataSize);
    // finish up 
    bytesWritten = udata_finish(pData, &status);
    if(U_FAILURE(status)) {
        fprintf(stderr, "gencfu: Error %d writing the output file\n", status);
        exit(status);
    }
    
    if (bytesWritten != outDataSize) {
        fprintf(stderr, "gencfu: Error writing to output file \"%s\"\n", outFileName);
        exit(-1);
    }

    uspoof_close(sc);
    delete [] outData;
    delete confusables;
    delete wsConfsables;
    u_cleanup();
    printf("gencfu: tool completed successfully.\n");
    return 0;
#endif   // UCONFIG_NO_REGULAR_EXPRESSIONS
}
Пример #4
0
static void TestHeapFunctions() {
    UErrorCode       status = U_ZERO_ERROR;
    UResourceBundle *rb     = NULL;
    char            *icuDataDir;
    UVersionInfo unicodeVersion = {0,0,0,0};

    icuDataDir = safeGetICUDataDirectory();   /* save icu data dir, so we can put it back
                                               *  after doing u_cleanup().                */


    /* Verify that ICU can be cleaned up and reinitialized successfully.
     *  Failure here usually means that some ICU service didn't clean up successfully,
     *  probably because some earlier test accidently left something open. */
    ctest_resetICU();

    /* Un-initialize ICU */
    u_cleanup();

    /* Can not set memory functions with NULL values */
    status = U_ZERO_ERROR;
    u_setMemoryFunctions(&gContext, NULL, myMemRealloc, myMemFree, &status);
    TEST_STATUS(status, U_ILLEGAL_ARGUMENT_ERROR);
    status = U_ZERO_ERROR;
    u_setMemoryFunctions(&gContext, myMemAlloc, NULL, myMemFree, &status);
    TEST_STATUS(status, U_ILLEGAL_ARGUMENT_ERROR);
    status = U_ZERO_ERROR;
    u_setMemoryFunctions(&gContext, myMemAlloc, myMemRealloc, NULL, &status);
    TEST_STATUS(status, U_ILLEGAL_ARGUMENT_ERROR);

    /* u_setMemoryFunctions() should work with null or non-null context pointer */
    status = U_ZERO_ERROR;
    u_setMemoryFunctions(NULL, myMemAlloc, myMemRealloc, myMemFree, &status);
    TEST_STATUS(status, U_ZERO_ERROR);
    u_setMemoryFunctions(&gContext, myMemAlloc, myMemRealloc, myMemFree, &status);
    TEST_STATUS(status, U_ZERO_ERROR);


    /* After reinitializing ICU, we can not set the memory funcs again. */
    status = U_ZERO_ERROR;
    u_setDataDirectory(icuDataDir);
    u_init(&status);
    TEST_STATUS(status, U_ZERO_ERROR);

    /* Doing ICU operations should cause allocations to come through our test heap */
    gBlockCount = 0;
    status = U_ZERO_ERROR;
    rb = ures_open(NULL, "es", &status);
    TEST_STATUS(status, U_ZERO_ERROR);
    if (gBlockCount == 0) {
        log_err("Heap functions are not being called from ICU.\n");
    }
    ures_close(rb);

    /* Cleanup should put the heap back to its default implementation. */
    ctest_resetICU();
    u_getUnicodeVersion(unicodeVersion);
    if (unicodeVersion[0] <= 0) {
        log_err("Properties doesn't reinitialize without u_init.\n");
    }
    status = U_ZERO_ERROR;
    u_init(&status);
    TEST_STATUS(status, U_ZERO_ERROR);

    /* ICU operations should no longer cause allocations to come through our test heap */
    gBlockCount = 0;
    status = U_ZERO_ERROR;
    rb = ures_open(NULL, "fr", &status);
    TEST_STATUS(status, U_ZERO_ERROR);
    if (gBlockCount != 0) {
        log_err("Heap functions did not reset after u_cleanup.\n");
    }
    ures_close(rb);
    free(icuDataDir);

    ctest_resetICU();
}
Пример #5
0
//----------------------------------------------------------------------------
//
//  main      for genbrk
//
//----------------------------------------------------------------------------
int  main(int argc, char **argv) {
    UErrorCode  status = U_ZERO_ERROR;
    const char *ruleFileName;
    const char *outFileName;
    const char *outDir = NULL;
    const char *copyright = NULL;

    //
    // Pick up and check the command line arguments,
    //    using the standard ICU tool utils option handling.
    //
    U_MAIN_INIT_ARGS(argc, argv);
    progName = argv[0];
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
    if(argc<0) {
        // Unrecognized option
        fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]);
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }

    if(options[0].doesOccur || options[1].doesOccur) {
        //  -? or -h for help.
        usageAndDie(0);
    }

    if (!(options[3].doesOccur && options[4].doesOccur)) {
        fprintf(stderr, "rule file and output file must both be specified.\n");
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }
    ruleFileName = options[3].value;
    outFileName  = options[4].value;

    if (options[5].doesOccur) {
        u_setDataDirectory(options[5].value);
    }

    /* Initialize ICU */
    u_init(&status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
            argv[0], u_errorName(status));
        exit(1);
    }
    status = U_ZERO_ERROR;

    /* Combine the directory with the file name */
    if(options[6].doesOccur) {
        outDir = options[6].value;
    }
    if (options[7].doesOccur) {
        copyright = U_COPYRIGHT_STRING;
    }

#if UCONFIG_NO_BREAK_ITERATION

    UNewDataMemory *pData;
    char msg[1024];

    /* write message with just the name */
    sprintf(msg, "genbrk writes dummy %s because of UCONFIG_NO_BREAK_ITERATION, see uconfig.h", outFileName);
    fprintf(stderr, "%s\n", msg);

    /* write the dummy data file */
    pData = udata_create(outDir, NULL, outFileName, &dummyDataInfo, NULL, &status);
    udata_writeBlock(pData, msg, strlen(msg));
    udata_finish(pData, &status);
    return (int)status;

#else

    //
    //  Read in the rule source file
    //
    long        result;
    long        ruleFileSize;
    FILE        *file;
    char        *ruleBufferC;

    file = fopen(ruleFileName, "rb");
    if( file == 0 ) {
        fprintf(stderr, "Could not open file \"%s\"\n", ruleFileName);
        exit(-1);
    }
    fseek(file, 0, SEEK_END);
    ruleFileSize = ftell(file);
    fseek(file, 0, SEEK_SET);
    ruleBufferC = new char[ruleFileSize+10];

    result = (long)fread(ruleBufferC, 1, ruleFileSize, file);
    if (result != ruleFileSize)  {
        fprintf(stderr, "Error reading file \"%s\"\n", ruleFileName);
        exit (-1);
    }
    ruleBufferC[ruleFileSize]=0;
    fclose(file);

    //
    // Look for a Unicode Signature (BOM) on the rule file
    //
    int32_t        signatureLength;
    const char *   ruleSourceC = ruleBufferC;
    const char*    encoding = ucnv_detectUnicodeSignature(
                           ruleSourceC, ruleFileSize, &signatureLength, &status);
    if (U_FAILURE(status)) {
        exit(status);
    }
    if(encoding!=NULL ){
        ruleSourceC  += signatureLength;
        ruleFileSize -= signatureLength;
    }

    //
    // Open a converter to take the rule file to UTF-16
    //
    UConverter* conv;
    conv = ucnv_open(encoding, &status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "ucnv_open: ICU Error \"%s\"\n", u_errorName(status));
        exit(status);
    }

    //
    // Convert the rules to UChar.
    //  Preflight first to determine required buffer size.
    //
    uint32_t destCap = ucnv_toUChars(conv,
                       NULL,           //  dest,
                       0,              //  destCapacity,
                       ruleSourceC,
                       ruleFileSize,
                       &status);
    if (status != U_BUFFER_OVERFLOW_ERROR) {
        fprintf(stderr, "ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
        exit(status);
    };

    status = U_ZERO_ERROR;
    UChar *ruleSourceU = new UChar[destCap+1];
    ucnv_toUChars(conv,
                  ruleSourceU,     //  dest,
                  destCap+1,
                  ruleSourceC,
                  ruleFileSize,
                  &status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
        exit(status);
    };
    ucnv_close(conv);


    //
    //  Put the source rules into a UnicodeString
    //
    UnicodeString ruleSourceS(FALSE, ruleSourceU, destCap);

    //
    //  Create the break iterator from the rules
    //     This will compile the rules.
    //
    UParseError parseError;
    parseError.line = 0;
    parseError.offset = 0;
    RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(ruleSourceS, parseError, status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "createRuleBasedBreakIterator: ICU Error \"%s\"  at line %d, column %d\n",
                u_errorName(status), (int)parseError.line, (int)parseError.offset);
        exit(status);
    };


    //
    //  Get the compiled rule data from the break iterator.
    //
    uint32_t        outDataSize;
    const uint8_t  *outData;
    outData = bi->getBinaryRules(outDataSize);

    // Copy the data format version numbers from the RBBI data header into the UDataMemory header.
    uprv_memcpy(dh.info.formatVersion, ((RBBIDataHeader *)outData)->fFormatVersion, sizeof(dh.info.formatVersion));

    //
    //  Create the output file
    //
    size_t bytesWritten;
    UNewDataMemory *pData;
    pData = udata_create(outDir, NULL, outFileName, &(dh.info), copyright, &status);
    if(U_FAILURE(status)) {
        fprintf(stderr, "genbrk: Could not open output file \"%s\", \"%s\"\n", 
                         outFileName, u_errorName(status));
        exit(status);
    }


    //  Write the data itself.
    udata_writeBlock(pData, outData, outDataSize);
    // finish up 
    bytesWritten = udata_finish(pData, &status);
    if(U_FAILURE(status)) {
        fprintf(stderr, "genbrk: error %d writing the output file\n", status);
        exit(status);
    }
    
    if (bytesWritten != outDataSize) {
        fprintf(stderr, "Error writing to output file \"%s\"\n", outFileName);
        exit(-1);
    }

    delete bi;
    delete[] ruleSourceU;
    delete[] ruleBufferC;
    u_cleanup();


    printf("genbrk: tool completed successfully.\n");
    return 0;

#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
}
Пример #6
0
//----------------------------------------------------------------------------
//
//  main      for genctd
//
//----------------------------------------------------------------------------
int  main(int argc, char **argv) {
    UErrorCode  status = U_ZERO_ERROR;
    const char *wordFileName;
    const char *outFileName;
    const char *outDir = NULL;
    const char *copyright = NULL;

    //
    // Pick up and check the command line arguments,
    //    using the standard ICU tool utils option handling.
    //
    U_MAIN_INIT_ARGS(argc, argv);
    progName = argv[0];
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
    if(argc<0) {
        // Unrecognized option
        fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]);
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }

    if(options[0].doesOccur || options[1].doesOccur) {
        //  -? or -h for help.
        usageAndDie(0);
    }

    if (!options[3].doesOccur || argc < 2) {
        fprintf(stderr, "input and output file must both be specified.\n");
        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
    }
    outFileName  = options[3].value;
    wordFileName = argv[1];

    if (options[4].doesOccur) {
        u_setDataDirectory(options[4].value);
    }

    status = U_ZERO_ERROR;

    /* Combine the directory with the file name */
    if(options[5].doesOccur) {
        outDir = options[5].value;
    }
    if (options[6].doesOccur) {
        copyright = U_COPYRIGHT_STRING;
    }

#if UCONFIG_NO_BREAK_ITERATION || UCONFIG_NO_FILE_IO

    UNewDataMemory *pData;
    char msg[1024];

    /* write message with just the name */
    sprintf(msg, "genctd writes dummy %s because of UCONFIG_NO_BREAK_ITERATION and/or UCONFIG_NO_FILE_IO, see uconfig.h", outFileName);
    fprintf(stderr, "%s\n", msg);

    /* write the dummy data file */
    pData = udata_create(outDir, NULL, outFileName, &dummyDataInfo, NULL, &status);
    udata_writeBlock(pData, msg, strlen(msg));
    udata_finish(pData, &status);
    return (int)status;

#else
    /* Initialize ICU */
    u_init(&status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
            argv[0], u_errorName(status));
        exit(1);
    }
    status = U_ZERO_ERROR;

    //
    //  Read in the dictionary source file
    //
    long        result;
    long        wordFileSize;
    FILE        *file;
    char        *wordBufferC;

    file = fopen(wordFileName, "rb");
    if( file == 0 ) {
        fprintf(stderr, "Could not open file \"%s\"\n", wordFileName);
        exit(-1);
    }
    fseek(file, 0, SEEK_END);
    wordFileSize = ftell(file);
    fseek(file, 0, SEEK_SET);
    wordBufferC = new char[wordFileSize+10];

    result = (long)fread(wordBufferC, 1, wordFileSize, file);
    if (result != wordFileSize)  {
        fprintf(stderr, "Error reading file \"%s\"\n", wordFileName);
        exit (-1);
    }
    wordBufferC[wordFileSize]=0;
    fclose(file);

    //
    // Look for a Unicode Signature (BOM) on the word file
    //
    int32_t        signatureLength;
    const char *   wordSourceC = wordBufferC;
    const char*    encoding = ucnv_detectUnicodeSignature(
                           wordSourceC, wordFileSize, &signatureLength, &status);
    if (U_FAILURE(status)) {
        exit(status);
    }
    if(encoding!=NULL ){
        wordSourceC  += signatureLength;
        wordFileSize -= signatureLength;
    }

    //
    // Open a converter to take the rule file to UTF-16
    //
    UConverter* conv;
    conv = ucnv_open(encoding, &status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "ucnv_open: ICU Error \"%s\"\n", u_errorName(status));
        exit(status);
    }

    //
    // Convert the words to UChar.
    //  Preflight first to determine required buffer size.
    //
    uint32_t destCap = ucnv_toUChars(conv,
                       NULL,           //  dest,
                       0,              //  destCapacity,
                       wordSourceC,
                       wordFileSize,
                       &status);
    if (status != U_BUFFER_OVERFLOW_ERROR) {
        fprintf(stderr, "ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
        exit(status);
    };

    status = U_ZERO_ERROR;
    UChar *wordSourceU = new UChar[destCap+1];
    ucnv_toUChars(conv,
                  wordSourceU,     //  dest,
                  destCap+1,
                  wordSourceC,
                  wordFileSize,
                  &status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
        exit(status);
    };
    ucnv_close(conv);

    // Get rid of the original file buffer
    delete[] wordBufferC;

    // Create a MutableTrieDictionary, and loop through all the lines, inserting
    // words.

    // First, pick a median character.
    UChar *current = wordSourceU + (destCap/2);
    UChar uc = *current++;
    UnicodeSet breaks;
    breaks.add(0x000A);     // Line Feed
    breaks.add(0x000D);     // Carriage Return
    breaks.add(0x2028);     // Line Separator
    breaks.add(0x2029);     // Paragraph Separator

    do { 
        // Look for line break
        while (uc && !breaks.contains(uc)) {
            uc = *current++;
        }
        // Now skip to first non-line-break
        while (uc && breaks.contains(uc)) {
            uc = *current++;
        }
    }
    while (uc && (breaks.contains(uc) || u_isspace(uc)));

    MutableTrieDictionary *mtd = new MutableTrieDictionary(uc, status);
    
    if (U_FAILURE(status)) {
        fprintf(stderr, "new MutableTrieDictionary: ICU Error \"%s\"\n", u_errorName(status));
        exit(status);
    }
    
    // Now add the words. Words are non-space characters at the beginning of
    // lines, and must be at least one UChar.
    current = wordSourceU;
    UChar *candidate = current;
    uc = *current++;
    int32_t length = 0;

    while (uc) {
        while (uc && !u_isspace(uc)) {
            ++length;
            uc = *current++;
        }
        if (length > 0) {
            mtd->addWord(candidate, length, status);
            if (U_FAILURE(status)) {
                fprintf(stderr, "MutableTrieDictionary::addWord: ICU Error \"%s\"\n",
                        u_errorName(status));
                exit(status);
            }
        }
        // Find beginning of next line
        while (uc && !breaks.contains(uc)) {
            uc = *current++;
        }
        while (uc && breaks.contains(uc)) {
            uc = *current++;
        }
        candidate = current-1;
        length = 0;
    }

    // Get rid of the Unicode text buffer
    delete[] wordSourceU;

    // Now, create a CompactTrieDictionary from the mutable dictionary
    CompactTrieDictionary *ctd = new CompactTrieDictionary(*mtd, status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "new CompactTrieDictionary: ICU Error \"%s\"\n", u_errorName(status));
        exit(status);
    }
    
    // Get rid of the MutableTrieDictionary
    delete mtd;

    //
    //  Get the binary data from the dictionary.
    //
    uint32_t        outDataSize = ctd->dataSize();
    const uint8_t  *outData = (const uint8_t *)ctd->data();

    //
    //  Create the output file
    //
    size_t bytesWritten;
    UNewDataMemory *pData;
    pData = udata_create(outDir, NULL, outFileName, &(dh.info), copyright, &status);
    if(U_FAILURE(status)) {
        fprintf(stderr, "genctd: Could not open output file \"%s\", \"%s\"\n", 
                         outFileName, u_errorName(status));
        exit(status);
    }


    //  Write the data itself.
    udata_writeBlock(pData, outData, outDataSize);
    // finish up 
    bytesWritten = udata_finish(pData, &status);
    if(U_FAILURE(status)) {
        fprintf(stderr, "genctd: error \"%s\" writing the output file\n", u_errorName(status));
        exit(status);
    }
    
    if (bytesWritten != outDataSize) {
        fprintf(stderr, "Error writing to output file \"%s\"\n", outFileName);
        exit(-1);
    }
    
    // Get rid of the CompactTrieDictionary
    delete ctd;

    u_cleanup();

    printf("genctd: tool completed successfully.\n");
    return 0;

#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
}
Пример #7
0
Файл: derb.c Проект: mojca/xetex
extern int
main(int argc, char* argv[]) {
    const char *encoding = NULL;
    const char *outputDir = NULL; /* NULL = no output directory, use current */
    const char *inputDir  = ".";
    int tostdout = 0;
    int prbom = 0;

    const char *pname;

    UResourceBundle *bundle = NULL;
    UErrorCode status = U_ZERO_ERROR;
    int32_t i = 0;

    UConverter *converter = NULL; // not used

    const char* arg;

    /* Get the name of tool. */
    pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
#if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR
    if (!pname) {
        pname = uprv_strrchr(*argv, U_FILE_ALT_SEP_CHAR);
    }
#endif
    if (!pname) {
        pname = *argv;
    } else {
        ++pname;
    }

    /* error handling, printing usage message */
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr,
            "%s: error in command line argument \"%s\"\n", pname,
            argv[-argc]);
    }
    if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
        fprintf(argc < 0 ? stderr : stdout,
            "%csage: %s [ -h, -?, --help ] [ -V, --version ]\n"
            " [ -v, --verbose ] [ -e, --encoding encoding ] [ --bom ]\n"
            " [ -t, --truncate [ size ] ]\n"
            " [ -s, --sourcedir source ] [ -d, --destdir destination ]\n"
            " [ -i, --icudatadir directory ] [ -c, --to-stdout ]\n"
            " [ -A, --suppressAliases]\n"
            " bundle ...\n", argc < 0 ? 'u' : 'U',
            pname);
        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    }

    if(options[10].doesOccur) {
        fprintf(stderr,
                "%s version %s (ICU version %s).\n"
                "%s\n",
                pname, DERB_VERSION, U_ICU_VERSION, U_COPYRIGHT_STRING);
        return U_ZERO_ERROR;
    }
    if(options[2].doesOccur) {
        encoding = options[2].value;
    }

    if (options[3].doesOccur) {
      if(options[2].doesOccur) {
        fprintf(stderr, "%s: Error: don't specify an encoding (-e) when writing to stdout (-c).\n", pname);
        return 3;
      }
      tostdout = 1;
    }

    if(options[4].doesOccur) {
        opt_truncate = TRUE;
        if(options[4].value != NULL) {
            truncsize = atoi(options[4].value); /* user defined printable size */
        } else {
            truncsize = DERB_DEFAULT_TRUNC; /* we'll use default omitting size */
        }
    } else {
        opt_truncate = FALSE;
    }

    if(options[5].doesOccur) {
        verbose = TRUE;
    }

    if (options[6].doesOccur) {
        outputDir = options[6].value;
    }

    if(options[7].doesOccur) {
        inputDir = options[7].value; /* we'll use users resources */
    }

    if (options[8].doesOccur) {
        prbom = 1;
    }

    if (options[9].doesOccur) {
        u_setDataDirectory(options[9].value);
    }

    if (options[11].doesOccur) {
      suppressAliases = TRUE;
    }

    fflush(stderr); // use ustderr now.
    ustderr = u_finit(stderr, NULL, NULL);

    for (i = 1; i < argc; ++i) {
        static const UChar sp[] = { 0x0020 }; /* " " */
        char infile[4096]; /* XXX Sloppy. */
        char locale[64];
        const char *thename = 0, *p, *q;
        UBool fromICUData = FALSE;

        arg = getLongPathname(argv[i]);

        if (verbose) {
          u_fprintf(ustderr, "processing bundle \"%s\"\n", argv[i]);
        }

        p = uprv_strrchr(arg, U_FILE_SEP_CHAR);
#if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR
        if (p == NULL) {
            p = uprv_strrchr(arg, U_FILE_ALT_SEP_CHAR);
        }
#endif
        if (!p) {
            p = arg;
        } else {
            p++;
        }
        q = uprv_strrchr(p, '.');
        if (!q) {
            for (q = p; *q; ++q)
                ;
        }
        uprv_strncpy(locale, p, q - p);
        locale[q - p] = 0;

        if (!(fromICUData = !uprv_strcmp(inputDir, "-"))) {
            UBool absfilename = *arg == U_FILE_SEP_CHAR;
#if U_PLATFORM_HAS_WIN32_API && U_PLATFORM != U_PF_CYGWIN
            if (!absfilename) {
                absfilename = (uprv_strlen(arg) > 2 && isalpha(arg[0])
                    && arg[1] == ':' && arg[2] == U_FILE_SEP_CHAR);
            }
#endif
            if (absfilename) {
                thename = arg;
            } else {
                q = uprv_strrchr(arg, U_FILE_SEP_CHAR);
#if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR
                if (q == NULL) {
                    q = uprv_strrchr(arg, U_FILE_ALT_SEP_CHAR);
                }
#endif
                uprv_strcpy(infile, inputDir);
                if(q != NULL) {
                    uprv_strcat(infile, U_FILE_SEP_STRING);
                    strncat(infile, arg, q-arg);
                }
                thename = infile;
            }
        }
        status = U_ZERO_ERROR;
        if (thename) {
            bundle = ures_openDirect(thename, locale, &status);
        } else {
            bundle = ures_open(fromICUData ? 0 : inputDir, locale, &status);
        }
        if (status == U_ZERO_ERROR) {
            UFILE *out = NULL;

            const char *filename = 0;
            const char *ext = 0;

            if (!locale[0] || !tostdout) {
                filename = uprv_strrchr(arg, U_FILE_SEP_CHAR);

#if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR
                if (!filename) {
                    filename = uprv_strrchr(arg, U_FILE_ALT_SEP_CHAR);
                }
#endif
                if (!filename) {
                    filename = arg;
                } else {
                    ++filename;
                }
                ext = uprv_strrchr(arg, '.');
                if (!ext) {
                    ext = filename + uprv_strlen(filename);
                }
            }

            if (tostdout) {
                out = u_get_stdout();
            } else {
                char thefile[4096], *tp;
                int32_t len;

                if (outputDir) {
                    uprv_strcpy(thefile, outputDir);
                    uprv_strcat(thefile, U_FILE_SEP_STRING);
                } else {
                    *thefile = 0;
                }
                uprv_strcat(thefile, filename);
                tp = thefile + uprv_strlen(thefile);
                len = (int32_t)uprv_strlen(ext);
                if (len) {
                    tp -= len - 1;
                } else {
                    *tp++ = '.';
                }
                uprv_strcpy(tp, "txt");

                out = u_fopen(thefile, "w", NULL, encoding);
                if (!out) {
                  u_fprintf(ustderr, "%s: couldn't create %s\n", pname, thefile);
                  u_fclose(ustderr);
                  return 4;
                }
            }

            // now, set the callback.
            ucnv_setFromUCallBack(u_fgetConverter(out), UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, 0, 0, &status);
            if (U_FAILURE(status)) {
              u_fprintf(ustderr, "%s: couldn't configure converter for encoding\n", pname);
              u_fclose(ustderr);
              if(!tostdout) {
                u_fclose(out);
              }
              return 3;
            }

            if (prbom) { /* XXX: Should be done only for UTFs */
              u_fputc(0xFEFF, out);
            }
            u_fprintf(out, "// -*- Coding: %s; -*-\n//\n", encoding ? encoding : getEncodingName(ucnv_getDefaultName()));
            u_fprintf(out, "// This file was dumped by derb(8) from ");
            if (thename) {
              u_fprintf(out, "%s", thename);
            } else if (fromICUData) {
              u_fprintf(out, "the ICU internal %s locale", locale);
            }

            u_fprintf(out, "\n// derb(8) by Vladimir Weinstein and Yves Arrouye\n\n");

            if (locale[0]) {
              u_fprintf(out, "%s", locale);
            } else {
              u_fprintf(out, "%.*s%.*S", (int32_t)(ext - filename),  filename, (int32_t)(sizeof(sp)/sizeof(*sp)), sp);
            }
            printOutBundle(out, converter, bundle, 0, pname, &status);

            if (!tostdout) {
                u_fclose(out);
            }
        }
        else {
            reportError(pname, &status, "opening resource file");
        }

        ures_close(bundle);
    }

    ucnv_close(converter);

    return 0;
}
Пример #8
0
int
main(int argc,
     char* argv[])
{
    UErrorCode  status    = U_ZERO_ERROR;
    const char *arg       = NULL;
    const char *outputDir = NULL; /* NULL = no output directory, use current */
    const char *inputDir  = NULL;
    const char *encoding  = "";
    int         i;
    UBool illegalArg = FALSE;

    U_MAIN_INIT_ARGS(argc, argv);

    options[JAVA_PACKAGE].value = "com.ibm.icu.impl.data";
    options[BUNDLE_NAME].value = "LocaleElements";
    argc = u_parseArgs(argc, argv, (int32_t)(sizeof(options)/sizeof(options[0])), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr, "%s: error in command line argument \"%s\"\n", argv[0], argv[-argc]);
    } else if(argc<2) {
        argc = -1;
    }
    if(options[WRITE_POOL_BUNDLE].doesOccur && options[USE_POOL_BUNDLE].doesOccur) {
        fprintf(stderr, "%s: cannot combine --writePoolBundle and --usePoolBundle\n", argv[0]);
        argc = -1;
    }
    if(options[FORMAT_VERSION].doesOccur) {
        const char *s = options[FORMAT_VERSION].value;
        if(uprv_strlen(s) != 1 || (s[0] != '1' && s[0] != '2')) {
            fprintf(stderr, "%s: unsupported --formatVersion %s\n", argv[0], s);
            argc = -1;
        } else if(s[0] == '1' &&
                  (options[WRITE_POOL_BUNDLE].doesOccur || options[USE_POOL_BUNDLE].doesOccur)
        ) {
            fprintf(stderr, "%s: cannot combine --formatVersion 1 with --writePoolBundle or --usePoolBundle\n", argv[0]);
            argc = -1;
        } else {
            setFormatVersion(s[0] - '0');
        }
    }

    if(options[VERSION].doesOccur) {
        fprintf(stderr,
                "%s version %s (ICU version %s).\n"
                "%s\n",
                argv[0], GENRB_VERSION, U_ICU_VERSION, U_COPYRIGHT_STRING);
        return U_ZERO_ERROR;
    }

    if(argc<0) {
        illegalArg = TRUE;
    } else if((options[JAVA_PACKAGE].doesOccur || options[BUNDLE_NAME].doesOccur) &&
              !options[WRITE_JAVA].doesOccur) {
        fprintf(stderr,
                "%s error: command line argument --java-package or --bundle-name "
                "without --write-java\n",
                argv[0]);
        illegalArg = TRUE;
    }

    if(illegalArg || options[HELP1].doesOccur || options[HELP2].doesOccur) {
        /*
         * Broken into chunks because the C89 standard says the minimum
         * required supported string length is 509 bytes.
         */
        fprintf(stderr,
                "Usage: %s [OPTIONS] [FILES]\n"
                "\tReads the list of resource bundle source files and creates\n"
                "\tbinary version of resource bundles (.res files)\n",
                argv[0]);
        fprintf(stderr,
                "Options:\n"
                "\t-h or -? or --help       this usage text\n"
                "\t-q or --quiet            do not display warnings\n"
                "\t-v or --verbose          print extra information when processing files\n"
                "\t-V or --version          prints out version number and exits\n"
                "\t-c or --copyright        include copyright notice\n");
        fprintf(stderr,
                "\t-e or --encoding         encoding of source files\n"
                "\t-d of --destdir          destination directory, followed by the path, defaults to %s\n"
                "\t-s or --sourcedir        source directory for files followed by path, defaults to %s\n"
                "\t-i or --icudatadir       directory for locating any needed intermediate data files,\n"
                "\t                         followed by path, defaults to %s\n",
                u_getDataDirectory(), u_getDataDirectory(), u_getDataDirectory());
        fprintf(stderr,
                "\t-j or --write-java       write a Java ListResourceBundle for ICU4J, followed by optional encoding\n"
                "\t                         defaults to ASCII and \\uXXXX format.\n"
                "\t      --java-package     For --write-java: package name for writing the ListResourceBundle,\n"
                "\t                         defaults to com.ibm.icu.impl.data\n");
        fprintf(stderr,
                "\t-b or --bundle-name      For --write-java: root resource bundle name for writing the ListResourceBundle,\n"
                "\t                         defaults to LocaleElements\n"
                "\t-x or --write-xliff      write an XLIFF file for the resource bundle. Followed by\n"
                "\t                         an optional output file name.\n"
                "\t-k or --strict           use pedantic parsing of syntax\n"
                /*added by Jing*/
                "\t-l or --language         for XLIFF: language code compliant with BCP 47.\n");
        fprintf(stderr,
                "\t-C or --noBinaryCollation  do not generate binary collation image;\n"
                "\t                           makes .res file smaller but collator instantiation much slower;\n"
                "\t                           maintains ability to get tailoring rules\n"
                "\t-R or --omitCollationRules do not include collation (tailoring) rules;\n"
                "\t                           makes .res file smaller and maintains collator instantiation speed\n"
                "\t                           but tailoring rules will not be available (they are rarely used)\n");
        fprintf(stderr,
                "\t      --formatVersion      write a .res file compatible with the requested formatVersion (single digit);\n"
                "\t                           for example, --formatVersion 1\n");
        fprintf(stderr,
                "\t      --writePoolBundle    write a pool.res file with all of the keys of all input bundles\n"
                "\t      --usePoolBundle [path-to-pool.res]  point to keys from the pool.res keys pool bundle if they are available there;\n"
                "\t                           makes .res files smaller but dependent on the pool bundle\n"
                "\t                           (--writePoolBundle and --usePoolBundle cannot be combined)\n");

        return illegalArg ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    }

    if(options[VERBOSE].doesOccur) {
        setVerbose(TRUE);
    }

    if(options[QUIET].doesOccur) {
        setShowWarning(FALSE);
    }
    if(options[STRICT].doesOccur) {
        setStrict(TRUE);
    }
    if(options[COPYRIGHT].doesOccur){
        setIncludeCopyright(TRUE);
    }

    if(options[SOURCEDIR].doesOccur) {
        inputDir = options[SOURCEDIR].value;
    }

    if(options[DESTDIR].doesOccur) {
        outputDir = options[DESTDIR].value;
    }

    if(options[ENCODING].doesOccur) {
        encoding = options[ENCODING].value;
    }

    if(options[ICUDATADIR].doesOccur) {
        u_setDataDirectory(options[ICUDATADIR].value);
    }
    /* Initialize ICU */
    u_init(&status);
    if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
        /* Note: u_init() will try to open ICU property data.
         *       failures here are expected when building ICU from scratch.
         *       ignore them.
        */
        fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
            argv[0], u_errorName(status));
        exit(1);
    }
    status = U_ZERO_ERROR;
    if(options[WRITE_JAVA].doesOccur) {
        write_java = TRUE;
        outputEnc = options[WRITE_JAVA].value;
    }

    if(options[WRITE_XLIFF].doesOccur) {
        write_xliff = TRUE;
        if(options[WRITE_XLIFF].value != NULL){
            xliffOutputFileName = options[WRITE_XLIFF].value;
        }
    }

    initParser(options[NO_COLLATION_RULES].doesOccur);

    /*added by Jing*/
    if(options[LANGUAGE].doesOccur) {
        language = options[LANGUAGE].value;
    }

    if(options[WRITE_POOL_BUNDLE].doesOccur) {
        newPoolBundle = bundle_open(NULL, TRUE, &status);
        if(U_FAILURE(status)) {
            fprintf(stderr, "unable to create an empty bundle for the pool keys: %s\n", u_errorName(status));
            return status;
        } else {
            const char *poolResName = "pool.res";
            char *nameWithoutSuffix = uprv_malloc(uprv_strlen(poolResName) + 1);
            if (nameWithoutSuffix == NULL) {
                fprintf(stderr, "out of memory error\n");
                return U_MEMORY_ALLOCATION_ERROR;
            }
            uprv_strcpy(nameWithoutSuffix, poolResName);
            *uprv_strrchr(nameWithoutSuffix, '.') = 0;
            newPoolBundle->fLocale = nameWithoutSuffix;
        }
    }

    if(options[USE_POOL_BUNDLE].doesOccur) {
        const char *poolResName = "pool.res";
        FileStream *poolFile;
        int32_t poolFileSize;
        int32_t indexLength;
        /*
         * TODO: Consolidate inputDir/filename handling from main() and processFile()
         * into a common function, and use it here as well.
         * Try to create toolutil functions for dealing with dir/filenames and
         * loading ICU data files without udata_open().
         * Share code with icupkg?
         * Also, make_res_filename() seems to be unused. Review and remove.
         */
        if (options[USE_POOL_BUNDLE].value!=NULL) {
            uprv_strcpy(theCurrentFileName, options[USE_POOL_BUNDLE].value);
            uprv_strcat(theCurrentFileName, U_FILE_SEP_STRING);
        } else if (inputDir) {
            uprv_strcpy(theCurrentFileName, inputDir);
            uprv_strcat(theCurrentFileName, U_FILE_SEP_STRING);
        } else {
            *theCurrentFileName = 0;
        }
        uprv_strcat(theCurrentFileName, poolResName);
        poolFile = T_FileStream_open(theCurrentFileName, "rb");
        if (poolFile == NULL) {
            fprintf(stderr, "unable to open pool bundle file %s\n", theCurrentFileName);
            return 1;
        }
        poolFileSize = T_FileStream_size(poolFile);
        if (poolFileSize < 32) {
            fprintf(stderr, "the pool bundle file %s is too small\n", theCurrentFileName);
            return 1;
        }
        poolBundle.fBytes = (uint8_t *)uprv_malloc((poolFileSize + 15) & ~15);
        if (poolFileSize > 0 && poolBundle.fBytes == NULL) {
            fprintf(stderr, "unable to allocate memory for the pool bundle file %s\n", theCurrentFileName);
            return U_MEMORY_ALLOCATION_ERROR;
        } else {
            UDataSwapper *ds;
            const DataHeader *header;
            int32_t bytesRead = T_FileStream_read(poolFile, poolBundle.fBytes, poolFileSize);
            int32_t keysBottom;
            if (bytesRead != poolFileSize) {
                fprintf(stderr, "unable to read the pool bundle file %s\n", theCurrentFileName);
                return 1;
            }
            /*
             * Swap the pool bundle so that a single checked-in file can be used.
             * The swapper functions also test that the data looks like
             * a well-formed .res file.
             */
            ds = udata_openSwapperForInputData(poolBundle.fBytes, bytesRead,
                                               U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &status);
            if (U_FAILURE(status)) {
                fprintf(stderr, "udata_openSwapperForInputData(pool bundle %s) failed: %s\n",
                        theCurrentFileName, u_errorName(status));
                return status;
            }
            ures_swap(ds, poolBundle.fBytes, bytesRead, poolBundle.fBytes, &status);
            udata_closeSwapper(ds);
            if (U_FAILURE(status)) {
                fprintf(stderr, "ures_swap(pool bundle %s) failed: %s\n",
                        theCurrentFileName, u_errorName(status));
                return status;
            }
            header = (const DataHeader *)poolBundle.fBytes;
            if (header->info.formatVersion[0]!=2) {
                fprintf(stderr, "invalid format of pool bundle file %s\n", theCurrentFileName);
                return U_INVALID_FORMAT_ERROR;
            }
            poolBundle.fKeys = (const char *)header + header->dataHeader.headerSize;
            poolBundle.fIndexes = (const int32_t *)poolBundle.fKeys + 1;
            indexLength = poolBundle.fIndexes[URES_INDEX_LENGTH] & 0xff;
            if (indexLength <= URES_INDEX_POOL_CHECKSUM) {
                fprintf(stderr, "insufficient indexes[] in pool bundle file %s\n", theCurrentFileName);
                return U_INVALID_FORMAT_ERROR;
            }
            keysBottom = (1 + indexLength) * 4;
            poolBundle.fKeys += keysBottom;
            poolBundle.fKeysLength = (poolBundle.fIndexes[URES_INDEX_KEYS_TOP] * 4) - keysBottom;
            poolBundle.fChecksum = poolBundle.fIndexes[URES_INDEX_POOL_CHECKSUM];
        }
        for (i = 0; i < poolBundle.fKeysLength; ++i) {
            if (poolBundle.fKeys[i] == 0) {
                ++poolBundle.fKeysCount;
            }
        }
        T_FileStream_close(poolFile);
        setUsePoolBundle(TRUE);
    }

    if(options[INCLUDE_UNIHAN_COLL].doesOccur) {
        gIncludeUnihanColl = TRUE;
    }

    if((argc-1)!=1) {
        printf("genrb number of files: %d\n", argc - 1);
    }
    /* generate the binary files */
    for(i = 1; i < argc; ++i) {
        status = U_ZERO_ERROR;
        arg    = getLongPathname(argv[i]);

        if (inputDir) {
            uprv_strcpy(theCurrentFileName, inputDir);
            uprv_strcat(theCurrentFileName, U_FILE_SEP_STRING);
        } else {
            *theCurrentFileName = 0;
        }
        uprv_strcat(theCurrentFileName, arg);

        if (isVerbose()) {
            printf("Processing file \"%s\"\n", theCurrentFileName);
        }
        processFile(arg, encoding, inputDir, outputDir, NULL,
                    options[NO_BINARY_COLLATION].doesOccur,
                    &status);
    }

    uprv_free(poolBundle.fBytes);

    if(options[WRITE_POOL_BUNDLE].doesOccur) {
        char outputFileName[256];
        bundle_write(newPoolBundle, outputDir, NULL, outputFileName, sizeof(outputFileName), &status);
        bundle_close(newPoolBundle, &status);
        if(U_FAILURE(status)) {
            fprintf(stderr, "unable to write the pool bundle: %s\n", u_errorName(status));
        }
    }

    u_cleanup();

    /* Dont return warnings as a failure */
    if (U_SUCCESS(status)) {
        return 0;
    }

    return status;
}
Пример #9
0
static void TestIncDecFunctions() {
    UErrorCode   status = U_ZERO_ERROR;
    int32_t      t = 1; /* random value to make sure that Inc/dec works */
    char         *dataDir;

    /* Save ICU's data dir and tracing functions so that they can be resored 
       after cleanup and reinit.  */
    dataDir = safeGetICUDataDirectory();

    /* Verify that ICU can be cleaned up and reinitialized successfully.
     *  Failure here usually means that some ICU service didn't clean up successfully,
     *  probably because some earlier test accidently left something open. */
    ctest_resetICU();

    /* Can not set mutex functions if ICU is already initialized */
    u_setAtomicIncDecFunctions(&gIncDecContext, myIncFunc, myDecFunc,  &status);
    TEST_STATUS(status, U_INVALID_STATE_ERROR);

    /* Clean up ICU */
    u_cleanup();

    /* Can not set functions with NULL values */
    status = U_ZERO_ERROR;
    u_setAtomicIncDecFunctions(&gIncDecContext, NULL, myDecFunc,  &status);
    TEST_STATUS(status, U_ILLEGAL_ARGUMENT_ERROR);
    status = U_ZERO_ERROR;
    u_setAtomicIncDecFunctions(&gIncDecContext, myIncFunc, NULL,  &status);
    TEST_STATUS(status, U_ILLEGAL_ARGUMENT_ERROR);

    /* u_setIncDecFunctions() should work with null or non-null context pointer */
    status = U_ZERO_ERROR;
    gExpectedContext = NULL;
    u_setAtomicIncDecFunctions(NULL, myIncFunc, myDecFunc,  &status);
    TEST_STATUS(status, U_ZERO_ERROR);
    gExpectedContext = &gIncDecContext;
    u_setAtomicIncDecFunctions(&gIncDecContext, myIncFunc, myDecFunc,  &status);
    TEST_STATUS(status, U_ZERO_ERROR);


    /* After reinitializing ICU, we should not be able to set the inc/dec funcs again. */
    status = U_ZERO_ERROR;
    u_setDataDirectory(dataDir);
    u_init(&status);
    TEST_STATUS(status, U_ZERO_ERROR);
    gExpectedContext = &gIncDecContext;
    u_setAtomicIncDecFunctions(&gIncDecContext, myIncFunc, myDecFunc,  &status);
    TEST_STATUS(status, U_INVALID_STATE_ERROR);

    /* Doing ICU operations should cause our functions to be called */
    gIncCount = 0;
    gDecCount = 0;
    umtx_atomic_inc(&t);
    TEST_ASSERT(t == 2);
    umtx_atomic_dec(&t);
    TEST_ASSERT(t == 1);
    TEST_ASSERT(gIncCount > 0);
    TEST_ASSERT(gDecCount > 0);


    /* Cleanup should cancel use of our inc/dec functions. */
    /* Additional ICU operations should not use them */
    ctest_resetICU();
    gIncCount = 0;
    gDecCount = 0;
    status = U_ZERO_ERROR;
    u_setDataDirectory(dataDir);
    u_init(&status);
    TEST_ASSERT(gIncCount == 0);
    TEST_ASSERT(gDecCount == 0);

    status = U_ZERO_ERROR;
    umtx_atomic_inc(&t);
    umtx_atomic_dec(&t);
    TEST_STATUS(status, U_ZERO_ERROR);
    TEST_ASSERT(gIncCount == 0);
    TEST_ASSERT(gDecCount == 0);

    free(dataDir);
}
Пример #10
0
static void TestMutexFunctions() {
    UErrorCode       status = U_ZERO_ERROR;
    UResourceBundle *rb     = NULL;
    char            *icuDataDir;

    gMutexFailures = 0;

    /*  Save initial ICU state so that it can be restored later.
     *  u_cleanup(), which is called in this test, resets ICU's state.
     */
    icuDataDir = safeGetICUDataDirectory();

    /* Verify that ICU can be cleaned up and reinitialized successfully.
     *  Failure here usually means that some ICU service didn't clean up successfully,
     *  probably because some earlier test accidently left something open. */
    ctest_resetICU();

    /* Can not set mutex functions if ICU is already initialized */
    u_setMutexFunctions(&gContext, myMutexInit, myMutexDestroy, myMutexLock, myMutexUnlock, &status);
    TEST_STATUS(status, U_INVALID_STATE_ERROR);

    /* Un-initialize ICU */
    u_cleanup();

    /* Can not set Mutex functions with NULL values */
    status = U_ZERO_ERROR;
    u_setMutexFunctions(&gContext, NULL, myMutexDestroy, myMutexLock, myMutexUnlock, &status);
    TEST_STATUS(status, U_ILLEGAL_ARGUMENT_ERROR);
    status = U_ZERO_ERROR;
    u_setMutexFunctions(&gContext, myMutexInit, NULL, myMutexLock, myMutexUnlock, &status);
    TEST_STATUS(status, U_ILLEGAL_ARGUMENT_ERROR);
    status = U_ZERO_ERROR;
    u_setMutexFunctions(&gContext, myMutexInit, myMutexDestroy, NULL, myMutexUnlock, &status);
    TEST_STATUS(status, U_ILLEGAL_ARGUMENT_ERROR);
    status = U_ZERO_ERROR;
    u_setMutexFunctions(&gContext, myMutexInit, myMutexDestroy, myMutexLock, NULL, &status);
    TEST_STATUS(status, U_ILLEGAL_ARGUMENT_ERROR);

    /* u_setMutexFunctions() should work with null or non-null context pointer */
    status = U_ZERO_ERROR;
    u_setMutexFunctions(NULL, myMutexInit, myMutexDestroy, myMutexLock, myMutexUnlock, &status);
    TEST_STATUS(status, U_ZERO_ERROR);
    u_setMutexFunctions(&gContext, myMutexInit, myMutexDestroy, myMutexLock, myMutexUnlock, &status);
    TEST_STATUS(status, U_ZERO_ERROR);


    /* After reinitializing ICU, we should not be able to set the mutex funcs again. */
    status = U_ZERO_ERROR;
    u_setDataDirectory(icuDataDir);
    u_init(&status);
    TEST_STATUS(status, U_ZERO_ERROR);
    u_setMutexFunctions(&gContext, myMutexInit, myMutexDestroy, myMutexLock, myMutexUnlock, &status);
    TEST_STATUS(status, U_INVALID_STATE_ERROR);

    /* Doing ICU operations should cause allocations to come through our test mutexes */
    gBlockCount = 0;
    status = U_ZERO_ERROR;
    /*
     * Note: If we get assertion failures here because
     * uresbund.c:resbMutex's fMagic is wrong, check if ures_flushCache() did
     * flush and delete the cache. If it fails to empty the cache, it will not
     * delete it and ures_cleanup() will not destroy resbMutex.
     * That would leave a mutex from the default implementation which does not
     * pass this test implementation's assertions.
     */
    rb = ures_open(NULL, "es", &status);
    TEST_STATUS(status, U_ZERO_ERROR);
    TEST_ASSERT(gTotalMutexesInitialized > 0);
    TEST_ASSERT(gTotalMutexesActive > 0);

    ures_close(rb);

    /* Cleanup should destroy all of the mutexes. */
    ctest_resetICU();
    status = U_ZERO_ERROR;
    TEST_ASSERT(gTotalMutexesInitialized > 0);
    TEST_ASSERT(gTotalMutexesActive == 0);


    /* Additional ICU operations should no longer use our dummy test mutexes */
    gTotalMutexesInitialized = 0;
    gTotalMutexesActive      = 0;
    u_init(&status);
    TEST_STATUS(status, U_ZERO_ERROR);

    status = U_ZERO_ERROR;
    rb = ures_open(NULL, "fr", &status);
    TEST_STATUS(status, U_ZERO_ERROR);
    TEST_ASSERT(gTotalMutexesInitialized == 0);
    TEST_ASSERT(gTotalMutexesActive == 0);

    ures_close(rb);
    free(icuDataDir);

    if(gMutexFailures) {
      log_info("Note: these failures may be caused by ICU failing to initialize/uninitialize properly.\n");
      log_verbose("Check for prior tests which may not have closed all open resources. See the internal function ures_flushCache()\n");
    }
}