Esempio n. 1
0
static void print_icu_converters(const struct config_t *p_config)
{
    int32_t count;
    int32_t i;

    count = ucnv_countAvailable();
    if (p_config->xmloutput)
        fprintf(p_config->outfile, "<converters count=\"%d\" default=\"%s\">\n",
                count, ucnv_getDefaultName());
    else
    {
        fprintf(p_config->outfile, "Available ICU converters: %d\n", count);
        fprintf(p_config->outfile, "Default ICU Converter is: '%s'\n",
                ucnv_getDefaultName());
    }

    for (i = 0; i < count; i++)
    {
        if (p_config->xmloutput)
            fprintf(p_config->outfile, "<converter id=\"%s\"/>\n",
                    ucnv_getAvailableName(i));
        else
            fprintf(p_config->outfile, "%s\n", ucnv_getAvailableName(i));
    }

    if (p_config->xmloutput)
        fprintf(p_config->outfile, "</converters>\n");
    else
        fprintf(p_config->outfile, "\n");
}
Esempio n. 2
0
int main()
{

  printf("Default Converter=%s\n", ucnv_getDefaultName() );
  
  convsample_02();  // C  , u->koi8r, conv
  convsample_03();  // C,   iterate

  convsample_05();  // C,  utf8->u, getNextUChar
  convsample_06(); // C freq counter thingy

  convsample_12();  // C,  sjis->u, conv
  convsample_13();  // C,  big5->u, getNextU
  
  convsample_20();  // C, callback
  convsample_21();  // C, callback debug
  
  convsample_40();  // C,   cp37 -> UTF16 [data02.bin -> data40.utf16]
  
  convsample_46();  // C,  UTF16 -> latin3 [data41.utf16 -> data46.out]
  
  printf("End of converter samples.\n");
  
  fflush(stdout);
  fflush(stderr);
  
  return 0;
}
Esempio n. 3
0
QTextCodec *QIcuCodec::defaultCodecUnlocked()
{
    QCoreGlobalData *globalData = QCoreGlobalData::instance();
    if (!globalData)
        return 0;
    QTextCodec *c = globalData->codecForLocale.loadAcquire();
    if (c)
        return c;

#if defined(QT_LOCALE_IS_UTF8)
    const char *name = "UTF-8";
#else
    const char *name = ucnv_getDefaultName();
#endif
    c = codecForNameUnlocked(name);
    globalData->codecForLocale.storeRelease(c);
    return c;
}
CFStringEncoding
CFStringGetSystemEncoding (void)
{
#if defined(_WIN32)
  return kCFStringEncodingASCII;
#else
  if (_kCFStringSystemEncoding == kCFStringEncodingInvalidId)
    {
      GSMutexLock (&_kCFStringEncodingLock);
      if (_kCFStringSystemEncoding == kCFStringEncodingInvalidId)
        {
          const char *name;
          const char *defaultName;
          UErrorCode err = U_ZERO_ERROR;

          defaultName = ucnv_getDefaultName ();
          name = ucnv_getStandardName (defaultName, "MIME", &err);
          if (name != NULL)
            {
              _kCFStringSystemEncoding =
                CFStringConvertStandardNameToEncoding (name, -1);
            }
          else
            {
              name = ucnv_getStandardName (defaultName, "IANA", &err);
              if (name != NULL)
                _kCFStringSystemEncoding =
                  CFStringConvertStandardNameToEncoding (name, -1);
              else
                _kCFStringSystemEncoding = kCFStringEncodingInvalidId;
            }
        }
      GSMutexUnlock (&_kCFStringEncodingLock);
    }
  return _kCFStringSystemEncoding;
#endif
}
Esempio n. 5
0
extern int
main(int argc, char* argv[]) {
    const char *encoding = NULL;
    const char *outputDir = NULL; /* NULL = no output directory, use current */
    const char *inputDir  = ".";
    int tostdout = 0;
    int prbom = 0;

    const char *pname;

    UResourceBundle *bundle = NULL;
    UErrorCode status = U_ZERO_ERROR;
    int32_t i = 0;

    UConverter *converter;

    const char* arg;

    /* Get the name of tool. */
    pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
#if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR
    if (!pname) {
        pname = uprv_strrchr(*argv, U_FILE_ALT_SEP_CHAR);
    }
#endif
    if (!pname) {
        pname = *argv;
    } else {
        ++pname;
    }

    /* error handling, printing usage message */
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr,
            "%s: error in command line argument \"%s\"\n", pname,
            argv[-argc]);
    }
    if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
        fprintf(argc < 0 ? stderr : stdout,
            "%csage: %s [ -h, -?, --help ] [ -V, --version ]\n"
            " [ -v, --verbose ] [ -e, --encoding encoding ] [ --bom ]\n"
            " [ -t, --truncate [ size ] ]\n"
            " [ -s, --sourcedir source ] [ -d, --destdir destination ]\n"
            " [ -i, --icudatadir directory ] [ -c, --to-stdout ]\n"
            " [ -A, --suppressAliases]\n"
            " bundle ...\n", argc < 0 ? 'u' : 'U',
            pname);
        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    }

    if(options[10].doesOccur) {
        fprintf(stderr,
                "%s version %s (ICU version %s).\n"
                "%s\n",
                pname, DERB_VERSION, U_ICU_VERSION, U_COPYRIGHT_STRING);
        return U_ZERO_ERROR;
    }
    if(options[2].doesOccur) {
        encoding = options[2].value;
    }

    if (options[3].doesOccur) {
        tostdout = 1;
    }

    if(options[4].doesOccur) {
        trunc = TRUE;
        if(options[4].value != NULL) {
            truncsize = atoi(options[4].value); /* user defined printable size */
        } else {
            truncsize = DERB_DEFAULT_TRUNC; /* we'll use default omitting size */
        }
    } else {
        trunc = FALSE;
    }

    if(options[5].doesOccur) {
        verbose = TRUE;
    }

    if (options[6].doesOccur) {
        outputDir = options[6].value;
    }

    if(options[7].doesOccur) {
        inputDir = options[7].value; /* we'll use users resources */
    }

    if (options[8].doesOccur) {
        prbom = 1;
    }

    if (options[9].doesOccur) {
        u_setDataDirectory(options[9].value);
    }

    if (options[11].doesOccur) {
      suppressAliases = TRUE;
    }

    converter = ucnv_open(encoding, &status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "%s: couldn't create %s converter for encoding\n", pname, encoding ? encoding : ucnv_getDefaultName());
        return 2;
    }
    ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, 0, 0, &status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "%s: couldn't configure converter for encoding\n", pname);
        return 3;
    }

    defaultConverter = ucnv_open(0, &status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "%s: couldn't create %s converter for encoding\n", ucnv_getDefaultName(), pname);
        return 2;
    }

    for (i = 1; i < argc; ++i) {
        static const UChar sp[] = { 0x0020 }; /* " " */
        char infile[4096]; /* XXX Sloppy. */
        char locale[64];
        const char *thename = 0, *p, *q;
        UBool fromICUData = FALSE;

        arg = getLongPathname(argv[i]);

        if (verbose) {
            printf("processing bundle \"%s\"\n", argv[i]);
        }

        p = uprv_strrchr(arg, U_FILE_SEP_CHAR);
#if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR
        if (p == NULL) {
            p = uprv_strrchr(arg, U_FILE_ALT_SEP_CHAR);
        }
#endif
        if (!p) {
            p = arg;
        } else {
            p++;
        }
        q = uprv_strrchr(p, '.');
        if (!q) {
            for (q = p; *q; ++q)
                ;
        }
        uprv_strncpy(locale, p, q - p);
        locale[q - p] = 0;

        if (!(fromICUData = !uprv_strcmp(inputDir, "-"))) {
            UBool absfilename = *arg == U_FILE_SEP_CHAR;
#ifdef U_WINDOWS
            if (!absfilename) {
                absfilename = (uprv_strlen(arg) > 2 && isalpha(arg[0])
                    && arg[1] == ':' && arg[2] == U_FILE_SEP_CHAR);
            }
#endif
            if (absfilename) {
                thename = arg;
            } else {
                q = uprv_strrchr(arg, U_FILE_SEP_CHAR);
#if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR
                if (q == NULL) {
                    q = uprv_strrchr(arg, U_FILE_ALT_SEP_CHAR);
                }
#endif
                uprv_strcpy(infile, inputDir);
                if(q != NULL) {
                    uprv_strcat(infile, U_FILE_SEP_STRING);
                    strncat(infile, arg, q-arg);
                }
                thename = infile;
            }
        }
        status = U_ZERO_ERROR;
        if (thename) {
            bundle = ures_openDirect(thename, locale, &status);
        } else {
            bundle = ures_open(fromICUData ? 0 : inputDir, locale, &status);
        }
        if (status == U_ZERO_ERROR) {
            FILE *out;

            const char *filename = 0;
            const char *ext = 0;

            if (!locale || !tostdout) {
                filename = uprv_strrchr(arg, U_FILE_SEP_CHAR);

#if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR
                if (!filename) {
                    filename = uprv_strrchr(arg, U_FILE_ALT_SEP_CHAR);
                }
#endif
                if (!filename) {
                    filename = arg;
                } else {
                    ++filename;
                }
                ext = uprv_strrchr(arg, '.');
                if (!ext) {
                    ext = filename + uprv_strlen(filename);
                }
            }

            if (tostdout) {
                out = stdout;
#if defined(U_WINDOWS) || defined(U_CYGWIN)
                if (setmode(fileno(out), O_BINARY) == -1) {
                    fprintf(stderr, "%s: couldn't set standard output to binary mode\n", pname);
                    return 4;
                }
#endif
            } else {
                char thefile[4096], *tp;
                int32_t len;

                if (outputDir) {
                    uprv_strcpy(thefile, outputDir);
                    uprv_strcat(thefile, U_FILE_SEP_STRING);
                } else {
                    *thefile = 0;
                }
                uprv_strcat(thefile, filename);
                tp = thefile + uprv_strlen(thefile);
                len = (int32_t)uprv_strlen(ext);
                if (len) {
                    tp -= len - 1;
                } else {
                    *tp++ = '.';
                }
                uprv_strcpy(tp, "txt");

                out = fopen(thefile, "w");
                if (!out) {
                    fprintf(stderr, "%s: couldn't create %s\n", pname, thefile);
                    return 4;
                }
            }

            if (prbom) { /* XXX: Should be done only for UTFs */
                static const UChar bom[] = { 0xFEFF };
                printString(out, converter, bom, (int32_t)(sizeof(bom)/sizeof(*bom)));
            }

            printCString(out, converter, "// -*- Coding: ", -1);
            printCString(out, converter, encoding ? encoding : getEncodingName(ucnv_getDefaultName()), -1);
            printCString(out, converter, "; -*-\n//\n", -1);
            printCString(out, converter, "// This file was dumped by derb(8) from ", -1);
            if (thename) {
                printCString(out, converter, thename, -1);
            } else if (fromICUData) {
                printCString(out, converter, "the ICU internal ", -1);
                printCString(out, converter, locale, -1);
                printCString(out, converter, " locale", -1);
            }

            printCString(out, converter, "\n// derb(8) by Vladimir Weinstein and Yves Arrouye\n\n", -1);

            if (locale) {
                printCString(out, converter, locale, -1);
            } else {
                printCString(out, converter, filename, (int32_t)(ext - filename));
                printString(out, converter, sp, (int32_t)(sizeof(sp)/sizeof(*sp)));
            }
            printOutBundle(out, converter, bundle, 0, pname, &status);

            if (out != stdout) {
                fclose(out);
            }
        }
        else {
            reportError(pname, &status, "opening resource file");
        }

        ures_close(bundle);
    }

    ucnv_close(defaultConverter);
    ucnv_close(converter);

    return 0;
}
Esempio n. 6
0
int main(int argc, char **argv)
{
    FILE *outfile;
    int ret = 0;
    int seenf = 0;

    size_t bufsz = DEFAULT_BUFSZ;

    const char *fromcpage = 0;
    const char *tocpage = 0;
    const char *translit = 0;
    const char *outfilestr = 0;
    int fallback = 0;

    UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_STOP;
    const void *fromuctxt = 0;
    UConverterToUCallback toucallback = UCNV_TO_U_CALLBACK_STOP;
    const void *touctxt = 0;

    char **iter;
    char **end = argv + argc;

    const char *pname;

    int printConvs = 0, printCanon = 0;
    const char *printName = 0;
    int printTranslits = 0;

    int verbose = 0;

    // Get and prettify pname.
    pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
#ifdef WIN32
    if (!pname) {
        pname = uprv_strrchr(*argv, '/');
    }
#endif
    if (!pname) {
        pname = *argv;
    } else {
        ++pname;
    }

    // First, get the arguments from command-line
    // to know the codepages to convert between

    // XXX When you add to this loop, you need to add to the similar loop
    // below.

    for (iter = argv + 1; iter != end; iter++) {
        // Check for from charset
        if (strcmp("-f", *iter) == 0 || !strcmp("--from-code", *iter)) {
            iter++;
            if (iter != end)
                fromcpage = *iter;
            else
                usage(pname, 1);
        } else if (strcmp("-t", *iter) == 0 || !strcmp("--to-code", *iter)) {
            iter++;
            if (iter != end)
                tocpage = *iter;
            else
                usage(pname, 1);
        } else if (strcmp("-x", *iter) == 0) {
            iter++;
            if (iter != end)
                translit = *iter;
            else
                usage(pname, 1);
        } else if (!strcmp("--fallback", *iter)) {
            fallback = 1;
        } else if (!strcmp("--no-fallback", *iter)) {
            fallback = 0;
        } else if (strcmp("-b", *iter) == 0 || !strcmp("--block-size", *iter)) {
            iter++;
            if (iter != end) {
                bufsz = atoi(*iter);
                if ((int) bufsz <= 0) {
                    initMsg(pname);
                    UnicodeString str(*iter);
                    initMsg(pname);
                    u_wmsg(stderr, "badBlockSize", str.getBuffer());
                    return 3;
                }
            } else {
                usage(pname, 1);
            }
        } else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter)) {
            if (printTranslits) {
                usage(pname, 1);
            }
            printConvs = 1;
        } else if (strcmp("--default-code", *iter) == 0) {
            if (printTranslits) {
                usage(pname, 1);
            }
            printName = ucnv_getDefaultName();
        } else if (strcmp("--list-code", *iter) == 0) {
            if (printTranslits) {
                usage(pname, 1);
            }

            iter++;
            if (iter != end) {
                UErrorCode e = U_ZERO_ERROR;
                printName = ucnv_getAlias(*iter, 0, &e);
                if (U_FAILURE(e) || !printName) {
                    UnicodeString str(*iter);
                    initMsg(pname);
                    u_wmsg(stderr, "noSuchCodeset", str.getBuffer());
                    return 2;
                }
            } else
                usage(pname, 1);
        } else if (strcmp("--canon", *iter) == 0) {
            printCanon = 1;
        } else if (strcmp("-L", *iter) == 0
            || !strcmp("--list-transliterators", *iter)) {
            if (printConvs) {
                usage(pname, 1);
            }
            printTranslits = 1;
        } else if (strcmp("-h", *iter) == 0 || !strcmp("-?", *iter)
            || !strcmp("--help", *iter)) {
            usage(pname, 0);
        } else if (!strcmp("-c", *iter)) {
            fromucallback = UCNV_FROM_U_CALLBACK_SKIP;
        } else if (!strcmp("--to-callback", *iter)) {
            iter++;
            if (iter != end) {
                const struct callback_ent *cbe = findCallback(*iter);
                if (cbe) {
                    fromucallback = cbe->fromu;
                    fromuctxt = cbe->fromuctxt;
                } else {
                    UnicodeString str(*iter);
                    initMsg(pname);
                    u_wmsg(stderr, "unknownCallback", str.getBuffer());
                    return 4;
                }
            } else {
                usage(pname, 1);
            }
        } else if (!strcmp("--from-callback", *iter)) {
            iter++;
            if (iter != end) {
                const struct callback_ent *cbe = findCallback(*iter);
                if (cbe) {
                    toucallback = cbe->tou;
                    touctxt = cbe->touctxt;
                } else {
                    UnicodeString str(*iter);
                    initMsg(pname);
                    u_wmsg(stderr, "unknownCallback", str.getBuffer());
                    return 4;
                }
            } else {
                usage(pname, 1);
            }
        } else if (!strcmp("-i", *iter)) {
            toucallback = UCNV_TO_U_CALLBACK_SKIP;
        } else if (!strcmp("--callback", *iter)) {
            iter++;
            if (iter != end) {
                const struct callback_ent *cbe = findCallback(*iter);
                if (cbe) {
                    fromucallback = cbe->fromu;
                    fromuctxt = cbe->fromuctxt;
                    toucallback = cbe->tou;
                    touctxt = cbe->touctxt;
                } else {
                    UnicodeString str(*iter);
                    initMsg(pname);
                    u_wmsg(stderr, "unknownCallback", str.getBuffer());
                    return 4;
                }
            } else {
                usage(pname, 1);
            }
        } else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) {
            verbose = 0;
        } else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) {
            verbose = 1;
        } else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) {
            printf("%s v2.0\n", pname);
            return 0;
        } else if (!strcmp("-o", *iter) || !strcmp("--output", *iter)) {
            ++iter;
            if (iter != end && !outfilestr) {
                outfilestr = *iter;
            } else {
                usage(pname, 1);
            }
        } else if (**iter == '-' && (*iter)[1]) {
            usage(pname, 1);
        }
    }

    if (printConvs || printName) {
        return printConverters(pname, printName, printCanon) ? 2 : 0;
    } else if (printTranslits) {
        return printTransliterators(printCanon) ? 3 : 0;
    }

    if (!fromcpage || !uprv_strcmp(fromcpage, "-")) {
        fromcpage = ucnv_getDefaultName();
    }
    if (!tocpage || !uprv_strcmp(tocpage, "-")) {
        tocpage = ucnv_getDefaultName();
    }

    // Open the correct output file or connect to stdout for reading input
    if (outfilestr != 0 && strcmp(outfilestr, "-")) {
        outfile = fopen(outfilestr, "wb");
        if (outfile == 0) {
            UnicodeString str1(outfilestr, "");
            UnicodeString str2(strerror(errno), "");
            initMsg(pname);
            u_wmsg(stderr, "cantCreateOutputF",
                str1.getBuffer(), str2.getBuffer());
            return 1;
        }
    } else {
        outfilestr = "-";
        outfile = stdout;
#ifdef WIN32
        if (setmode(fileno(outfile), O_BINARY) == -1) {
            u_wmsg(stderr, "cantSetOutBinMode");
            exit(-1);
        }
#endif
    }

    /* Loop again on the arguments to find all the input files, and
    convert them. XXX Cheap and sloppy. */

    for (iter = argv + 1; iter != end; iter++) {
        if (strcmp("-f", *iter) == 0 || !strcmp("--from-code", *iter)) {
            iter++;
        } else if (strcmp("-t", *iter) == 0 || !strcmp("--to-code", *iter)) {
            iter++;
        } else if (strcmp("-x", *iter) == 0) {
            iter++;
        } else if (!strcmp("--fallback", *iter)) {
            ;
        } else if (!strcmp("--no-fallback", *iter)) {
            ;
        } else if (strcmp("-b", *iter) == 0 || !strcmp("--block-size", *iter)) {
            iter++;
        } else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter)) {
            ;
        } else if (strcmp("--default-code", *iter) == 0) {
            ;
        } else if (strcmp("--list-code", *iter) == 0) {
            ;
        } else if (strcmp("--canon", *iter) == 0) {
            ;
        } else if (strcmp("-L", *iter) == 0
            || !strcmp("--list-transliterators", *iter)) {
            ;
        } else if (strcmp("-h", *iter) == 0 || !strcmp("-?", *iter)
            || !strcmp("--help", *iter)) {
            ;
        } else if (!strcmp("-c", *iter)) {
            ;
        } else if (!strcmp("--to-callback", *iter)) {
            iter++;
        } else if (!strcmp("--from-callback", *iter)) {
            iter++;
        } else if (!strcmp("-i", *iter)) {
            ;
        } else if (!strcmp("--callback", *iter)) {
            iter++;
        } else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) {
            ;
        } else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) {
            ;
        } else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) {
            ;
        } else if (!strcmp("-o", *iter) || !strcmp("--output", *iter)) {
            ++iter;
        } else {
            seenf = 1;
            if (!convertFile
                (pname, fromcpage, toucallback, touctxt, tocpage,
                fromucallback, fromuctxt, fallback, bufsz, translit, *iter,
                outfile, verbose)) {
                goto error_exit;
            }
        }
    }

    if (!seenf) {
        if (!convertFile
            (pname, fromcpage, toucallback, touctxt, tocpage,
            fromucallback, fromuctxt, fallback, bufsz, translit, 0, outfile,
            verbose)) {
            goto error_exit;
        }
    }

    goto normal_exit;
error_exit:
    ret = 1;
normal_exit:

    if (outfile != stdout)
        fclose(outfile);

    return ret;
}
/**
 * Convert character vector between given encodings
 *
 * @param str     input character vector or list of raw vectors
 * @param from  source encoding, \code{NULL} or \code{""} for default enc
 * @param to    target encoding, \code{NULL} or \code{""} for default enc
 * @param to_raw single logical, should list of raw vectors be returned?
 * @return a converted character vector or list of raw vectors
 *
 * @version 0.1 (Marek Gagolewski)
 * @version 0.2 (Marek Gagolewski) arg to_raw_added, encoding marking
 * @version 0.3 (Marek Gagolewski, 2013-06-16) make StriException-friendly
 * @version 0.4 (Marek Gagolewski, 2013-08-08) use StriContainerListRaw
 * @version 0.5 (Marek Gagolewski, 2013-11-20) BUGFIX call stri_encode_from_marked if necessary
 */
SEXP stri_encode(SEXP str, SEXP from, SEXP to, SEXP to_raw)
{
   const char* selected_from = stri__prepare_arg_enc(from, "from", true);
   if (!selected_from && Rf_isVectorAtomic(str))
      return stri_encode_from_marked(str, to, to_raw);

   str = stri_prepare_arg_list_raw(str, "str");
   const char* selected_to   = stri__prepare_arg_enc(to, "to", true);
   bool to_raw_logical = stri__prepare_arg_logical_1_notNA(to_raw, "to_raw");

   UConverter* uconv_from = NULL;
   UConverter* uconv_to = NULL;

   STRI__ERROR_HANDLER_BEGIN
   StriContainerListRaw str_cont(str);
   R_len_t str_n = str_cont.get_n();

   // get the number of strings to convert; if == 0, then you know what's the result
   if (str_n <= 0) return Rf_allocVector(to_raw_logical?VECSXP:STRSXP, 0);

   // Open converters
   uconv_from = stri__ucnv_open(selected_from);
   uconv_to = stri__ucnv_open(selected_to);


   // Get target encoding mark
   UErrorCode err = U_ZERO_ERROR;
   const char* uconv_to_name = ucnv_getName(uconv_to, &err);
   if (U_FAILURE(err))
      throw StriException(err);
   cetype_t encmark_to = CE_BYTES; // all other cases than the below ones
      // - bytes enc (this is reasonable, isn't it?)
   if (!to_raw_logical) { // otherwise not needed
      if (!strcmp(uconv_to_name, "US-ASCII") || !strcmp(uconv_to_name, "UTF-8"))
         encmark_to = CE_UTF8; // no CE for ASCII, will be auto-detected by mkCharLenCE
      else if (!strcmp(uconv_to_name, "ISO-8859-1"))
         encmark_to = CE_LATIN1;
      else if (!strcmp(uconv_to_name, ucnv_getDefaultName()))
         encmark_to = CE_NATIVE;
   }

   // Prepare out val
   SEXP ret;
   PROTECT(ret = Rf_allocVector(to_raw_logical?VECSXP:STRSXP, str_n));

   String8 buf(0); // will be extended in a moment

   for (R_len_t i=0; i<str_n; ++i) {
      if (str_cont.isNA(i)) {
         if (to_raw_logical) SET_VECTOR_ELT(ret, i, R_NilValue);
         else                SET_STRING_ELT(ret, i, NA_STRING);
         continue;
      }

      const char* curd = str_cont.get(i).c_str();
      R_len_t curn     = str_cont.get(i).length();

      err = U_ZERO_ERROR;
      UnicodeString encs(curd, curn, uconv_from, err); // FROM -> UTF-16 [this is the slow part]
      if (U_FAILURE(err))
         throw StriException(err);

      R_len_t curn_tmp = encs.length();
      const UChar* curs_tmp = encs.getBuffer(); // The buffer contents is (probably) not NUL-terminated.
      if (!curs_tmp)
         throw StriException(MSG__INTERNAL_ERROR);

      R_len_t bufneed = UCNV_GET_MAX_BYTES_FOR_STRING(curn_tmp, ucnv_getMaxCharSize(uconv_to));
      // "The calculated size is guaranteed to be sufficient for this conversion."
      buf.resize(bufneed);

      err = U_ZERO_ERROR;
//      bufneed = encs.extract(buf.data(), buf.size(), uconv_to, err); // UTF-16 -> TO
      ucnv_resetFromUnicode(uconv_to);
      bufneed = ucnv_fromUChars(uconv_to, buf.data(), buf.size(), curs_tmp, curn_tmp, &err);
      if (bufneed <= buf.size()) {
         if (U_FAILURE(err))
            throw StriException(err);
      }
      else {// larger buffer needed [this shouldn't happen?]
//         warning("buf extending");
         buf.resize(bufneed);
         err = U_ZERO_ERROR;
         bufneed = ucnv_fromUChars(uconv_to, buf.data(), buf.size(), curs_tmp, curn_tmp, &err);
         if (U_FAILURE(err))
            throw StriException(err);
         if (bufneed > buf.size())
            throw StriException(MSG__INTERNAL_ERROR);
      }

      if (to_raw_logical) {
         SEXP outobj = Rf_allocVector(RAWSXP, bufneed);
         memcpy(RAW(outobj), buf.data(), (size_t)bufneed);
         SET_VECTOR_ELT(ret, i, outobj);
      }
      else {
         SET_STRING_ELT(ret, i, Rf_mkCharLenCE(buf.data(), bufneed, encmark_to));
      }
   }

   if (uconv_from) {
      ucnv_close(uconv_from);
      uconv_from = NULL;
   }
   if (uconv_to) {
      ucnv_close(uconv_to);
      uconv_to = NULL;
   }

   UNPROTECT(1);
   return ret;

   STRI__ERROR_HANDLER_END({
      if (uconv_from)
         ucnv_close(uconv_from);
      if (uconv_to)
         ucnv_close(uconv_to);
   })
}
Esempio n. 8
0
U_CDECL_END

int main(int argc, char* argv[])
{
    int32_t nerrors = 0;
    TestNode *root = NULL;
    UErrorCode errorCode = U_ZERO_ERROR;
    UDate startTime, endTime;
    int32_t diffTime;

    startTime = uprv_getRawUTCtime();

    /* Check whether ICU will initialize without forcing the build data directory into
    *  the ICU_DATA path.  Success here means either the data dll contains data, or that
    *  this test program was run with ICU_DATA set externally.  Failure of this check
    *  is normal when ICU data is not packaged into a shared library.
    *
    *  Whether or not this test succeeds, we want to cleanup and reinitialize
    *  with a data path so that data loading from individual files can be tested.
    */
    u_init(&errorCode);
    if (U_FAILURE(errorCode)) {
        fprintf(stderr,
            "#### Note:  ICU Init without build-specific setDataDirectory() failed.\n");
    }
    u_cleanup();
    errorCode = U_ZERO_ERROR;
    if (!initArgs(argc, argv, argHandler, (void *) &STANDARD_TEST_FILE)) {
        /* Error already displayed. */
        return -1;
    }

    /* Initialize ICU */
    ctest_setICU_DATA();    /* u_setDataDirectory() must happen Before u_init() */
    u_init(&errorCode);
    if (U_FAILURE(errorCode)) {
        fprintf(stderr,
            "#### ERROR! %s: u_init() failed with status = \"%s\".\n"
            "*** Check the ICU_DATA environment variable and \n"
            "*** check that the data files are present.\n", argv[0], u_errorName(errorCode));
        return 1;
    }

    fprintf(stdout, "Default charset for this run is %s\n", ucnv_getDefaultName());

    addAllTests(&root);
    nerrors = runTestRequest(root, argc, argv);

#if 1
    {
        FILE* fileToRemove = fopen(STANDARD_TEST_FILE, "r");
        /* This should delete any temporary files. */
        if (fileToRemove) {
            fclose(fileToRemove);
            log_verbose("Deleting: %s\n", STANDARD_TEST_FILE);
            if (remove(STANDARD_TEST_FILE) != 0) {
                /* Maybe someone didn't close the file correctly. */
                fprintf(stderr, "FAIL: Could not delete %s\n", STANDARD_TEST_FILE);
                nerrors += 1;
            }
        }
    }
#endif

    cleanUpTestTree(root);
    DataDrivenLogger::cleanUp();
    u_cleanup();

    endTime = uprv_getRawUTCtime();
    diffTime = (int32_t)(endTime - startTime);
    printf("Elapsed Time: %02d:%02d:%02d.%03d\n",
        (int)((diffTime%U_MILLIS_PER_DAY)/U_MILLIS_PER_HOUR),
        (int)((diffTime%U_MILLIS_PER_HOUR)/U_MILLIS_PER_MINUTE),
        (int)((diffTime%U_MILLIS_PER_MINUTE)/U_MILLIS_PER_SECOND),
        (int)(diffTime%U_MILLIS_PER_SECOND));

    return nerrors;
}
Esempio n. 9
0
extern int
main(int argc, char* argv[]) {
    const char *encoding = NULL;
    const char *outputDir = NULL; /* NULL = no output directory, use current */
    const char *inputDir  = ".";
    int tostdout = 0;
    int prbom = 0;

    const char *pname;

    UResourceBundle *bundle = NULL;
    int32_t i = 0;

    const char* arg;

    /* Get the name of tool. */
    pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
#if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR
    if (!pname) {
        pname = uprv_strrchr(*argv, U_FILE_ALT_SEP_CHAR);
    }
#endif
    if (!pname) {
        pname = *argv;
    } else {
        ++pname;
    }

    /* error handling, printing usage message */
    argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr,
            "%s: error in command line argument \"%s\"\n", pname,
            argv[-argc]);
    }
    if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
        fprintf(argc < 0 ? stderr : stdout,
            "%csage: %s [ -h, -?, --help ] [ -V, --version ]\n"
            " [ -v, --verbose ] [ -e, --encoding encoding ] [ --bom ]\n"
            " [ -t, --truncate [ size ] ]\n"
            " [ -s, --sourcedir source ] [ -d, --destdir destination ]\n"
            " [ -i, --icudatadir directory ] [ -c, --to-stdout ]\n"
            " [ -A, --suppressAliases]\n"
            " bundle ...\n", argc < 0 ? 'u' : 'U',
            pname);
        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    }

    if(options[10].doesOccur) {
        fprintf(stderr,
                "%s version %s (ICU version %s).\n"
                "%s\n",
                pname, DERB_VERSION, U_ICU_VERSION, U_COPYRIGHT_STRING);
        return U_ZERO_ERROR;
    }
    if(options[2].doesOccur) {
        encoding = options[2].value;
    }

    if (options[3].doesOccur) {
      if(options[2].doesOccur) {
        fprintf(stderr, "%s: Error: don't specify an encoding (-e) when writing to stdout (-c).\n", pname);
        return 3;
      }
      tostdout = 1;
    }

    if(options[4].doesOccur) {
        opt_truncate = TRUE;
        if(options[4].value != NULL) {
            truncsize = atoi(options[4].value); /* user defined printable size */
        } else {
            truncsize = DERB_DEFAULT_TRUNC; /* we'll use default omitting size */
        }
    } else {
        opt_truncate = FALSE;
    }

    if(options[5].doesOccur) {
        verbose = TRUE;
    }

    if (options[6].doesOccur) {
        outputDir = options[6].value;
    }

    if(options[7].doesOccur) {
        inputDir = options[7].value; /* we'll use users resources */
    }

    if (options[8].doesOccur) {
        prbom = 1;
    }

    if (options[9].doesOccur) {
        u_setDataDirectory(options[9].value);
    }

    if (options[11].doesOccur) {
      suppressAliases = TRUE;
    }

    fflush(stderr); // use ustderr now.
    ustderr = u_finit(stderr, NULL, NULL);

    for (i = 1; i < argc; ++i) {
        static const UChar sp[] = { 0x0020 }; /* " " */

        arg = getLongPathname(argv[i]);

        if (verbose) {
          u_fprintf(ustderr, "processing bundle \"%s\"\n", argv[i]);
        }

        icu::CharString locale;
        UErrorCode status = U_ZERO_ERROR;
        {
            const char *p = findBasename(arg);
            const char *q = uprv_strrchr(p, '.');
            if (q == NULL) {
                locale.append(p, status);
            } else {
                locale.append(p, (int32_t)(q - p), status);
            }
        }
        if (U_FAILURE(status)) {
            return status;
        }

        icu::CharString infile;
        const char *thename = NULL;
        UBool fromICUData = !uprv_strcmp(inputDir, "-");
        if (!fromICUData) {
            UBool absfilename = *arg == U_FILE_SEP_CHAR;
#if U_PLATFORM_HAS_WIN32_API
            if (!absfilename) {
                absfilename = (uprv_strlen(arg) > 2 && isalpha(arg[0])
                    && arg[1] == ':' && arg[2] == U_FILE_SEP_CHAR);
            }
#endif
            if (absfilename) {
                thename = arg;
            } else {
                const char *q = uprv_strrchr(arg, U_FILE_SEP_CHAR);
#if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR
                if (q == NULL) {
                    q = uprv_strrchr(arg, U_FILE_ALT_SEP_CHAR);
                }
#endif
                infile.append(inputDir, status);
                if(q != NULL) {
                    infile.appendPathPart(icu::StringPiece(arg, (int32_t)(q - arg)), status);
                }
                if (U_FAILURE(status)) {
                    return status;
                }
                thename = infile.data();
            }
        }
        if (thename) {
            bundle = ures_openDirect(thename, locale.data(), &status);
        } else {
            bundle = ures_open(fromICUData ? 0 : inputDir, locale.data(), &status);
        }
        if (U_SUCCESS(status)) {
            UFILE *out = NULL;

            const char *filename = 0;
            const char *ext = 0;

            if (locale.isEmpty() || !tostdout) {
                filename = findBasename(arg);
                ext = uprv_strrchr(filename, '.');
                if (!ext) {
                    ext = uprv_strchr(filename, 0);
                }
            }

            if (tostdout) {
                out = u_get_stdout();
            } else {
                icu::CharString thefile;
                if (outputDir) {
                    thefile.append(outputDir, status);
                }
                thefile.appendPathPart(filename, status);
                if (*ext) {
                    thefile.truncate(thefile.length() - (int32_t)uprv_strlen(ext));
                }
                thefile.append(".txt", status);
                if (U_FAILURE(status)) {
                    return status;
                }

                out = u_fopen(thefile.data(), "w", NULL, encoding);
                if (!out) {
                  u_fprintf(ustderr, "%s: couldn't create %s\n", pname, thefile.data());
                  u_fclose(ustderr);
                  return 4;
                }
            }

            // now, set the callback.
            ucnv_setFromUCallBack(u_fgetConverter(out), UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, 0, 0, &status);
            if (U_FAILURE(status)) {
              u_fprintf(ustderr, "%s: couldn't configure converter for encoding\n", pname);
              u_fclose(ustderr);
              if(!tostdout) {
                u_fclose(out);
              }
              return 3;
            }

            if (prbom) { /* XXX: Should be done only for UTFs */
              u_fputc(0xFEFF, out);
            }
            u_fprintf(out, "// -*- Coding: %s; -*-\n//\n", encoding ? encoding : getEncodingName(ucnv_getDefaultName()));
            u_fprintf(out, "// This file was dumped by derb(8) from ");
            if (thename) {
              u_fprintf(out, "%s", thename);
            } else if (fromICUData) {
              u_fprintf(out, "the ICU internal %s locale", locale.data());
            }

            u_fprintf(out, "\n// derb(8) by Vladimir Weinstein and Yves Arrouye\n\n");

            if (!locale.isEmpty()) {
              u_fprintf(out, "%s", locale.data());
            } else {
              u_fprintf(out, "%.*s%.*S", (int32_t)(ext - filename),  filename, UPRV_LENGTHOF(sp), sp);
            }
            printOutBundle(out, bundle, 0, pname, &status);

            if (!tostdout) {
                u_fclose(out);
            }
        }
        else {
            reportError(pname, &status, "opening resource file");
        }

        ures_close(bundle);
    }

    return 0;
}
Esempio n. 10
0
void cmd_version(UBool noLoad)
{
    UVersionInfo icu;
    char str[200];
    printf("<ICUINFO>\n");
    printf("International Components for Unicode for C/C++\n");
    printf("%s\n", U_COPYRIGHT_STRING);
    printf("Compiled-Version: %s\n", U_ICU_VERSION);
    u_getVersion(icu);
    u_versionToString(icu, str);
    printf("Runtime-Version: %s\n", str);
    printf("Compiled-Unicode-Version: %s\n", U_UNICODE_VERSION);
    u_getUnicodeVersion(icu);
    u_versionToString(icu, str);
    printf("Runtime-Unicode-Version: %s\n", U_UNICODE_VERSION);
    printf("Platform: %s\n", getPlatform());
#if defined(U_BUILD)
    printf("Build: %s\n", U_BUILD);
#if defined(U_HOST)
    if(strcmp(U_BUILD,U_HOST)) {
      printf("Host: %s\n", U_HOST);
    }
#endif
#endif
#if defined(U_CC)
    printf("C compiler: %s\n", U_CC);
#endif
#if defined(U_CXX)
    printf("C++ compiler: %s\n", U_CXX);
#endif
#if defined(CYGWINMSVC)
    printf("Cygwin: CYGWINMSVC\n");
#endif
    printf("ICUDATA: %s\n", U_ICUDATA_NAME);
    do_init();
    printf("Data Directory: %s\n", u_getDataDirectory());
    printf("ICU Initialization returned: %s\n", u_errorName(initStatus));
    printf( "Default locale: %s\n", uloc_getDefault());
    {
      UErrorCode subStatus = U_ZERO_ERROR;
      ulocdata_getCLDRVersion(icu, &subStatus);
      if(U_SUCCESS(subStatus)) {
	u_versionToString(icu, str);
	printf("CLDR-Version: %s\n", str);
      } else {
	printf("CLDR-Version: %s\n", u_errorName(subStatus));
      }
    }
    
#if !UCONFIG_NO_CONVERSION
    if(noLoad == FALSE)
    {
      printf("Default converter: %s\n", ucnv_getDefaultName());
    }
#endif
#if !UCONFIG_NO_FORMATTING
    {
      UChar buf[100];
      char buf2[100];
      UErrorCode subsubStatus= U_ZERO_ERROR;
      int32_t len;

      len = ucal_getDefaultTimeZone(buf, 100, &subsubStatus);
      if(U_SUCCESS(subsubStatus)&&len>0) {
	u_UCharsToChars(buf, buf2, len+1);
	printf("Default TZ: %s\n", buf2);
      } else {
	printf("Default TZ: %s\n", u_errorName(subsubStatus));
      }
    }
    {
      UErrorCode subStatus = U_ZERO_ERROR;
      const char *tzVer = ucal_getTZDataVersion(&subStatus);
      if(U_FAILURE(subStatus)) {
	tzVer = u_errorName(subStatus);
      }
      printf("TZ data version: %s\n", tzVer);
    }
#endif
    
#if U_ENABLE_DYLOAD
    const char *pluginFile = uplug_getPluginFile();
    printf("Plugin file is: %s\n", (pluginFile&&*pluginFile)?pluginFile:"(not set. try setting ICU_PLUGINS to a directory.)");
#else
    fprintf(stderr, "Dynamic Loading: is disabled. No plugins will be loaded at start-up.\n");
#endif
    printf("</ICUINFO>\n\n");
}
Esempio n. 11
0
const char *__hs_ucnv_getDefaultName(void)
{
    return ucnv_getDefaultName();
}