Esempio n. 1
0
void
Init_nkf()
{
    VALUE mNKF = rb_define_module("NKF");

    rb_define_module_function(mNKF, "nkf", rb_nkf_convert, 2);
    rb_define_module_function(mNKF, "guess", rb_nkf_guess, 1);
    rb_define_alias(rb_singleton_class(mNKF), "guess", "guess");

    rb_define_const(mNKF, "AUTO",	Qnil);
    rb_define_const(mNKF, "NOCONV",	Qnil);
    rb_define_const(mNKF, "UNKNOWN",	Qnil);
    rb_define_const(mNKF, "BINARY",	rb_enc_from_encoding(rb_nkf_enc_get("BINARY")));
    rb_define_const(mNKF, "ASCII",	rb_enc_from_encoding(rb_nkf_enc_get("US-ASCII")));
    rb_define_const(mNKF, "JIS",	rb_enc_from_encoding(rb_nkf_enc_get("ISO-2022-JP")));
    rb_define_const(mNKF, "EUC",	rb_enc_from_encoding(rb_nkf_enc_get("EUC-JP")));
    rb_define_const(mNKF, "SJIS",	rb_enc_from_encoding(rb_nkf_enc_get("Shift_JIS")));
    // rb_define_const(mNKF, "UTF8",	rb_enc_from_encoding(rb_utf8_encoding()));
    rb_define_const(mNKF, "UTF8",	rb_enc_from_encoding(rb_nkf_enc_get("UTF-8")));
    rb_define_const(mNKF, "UTF16",	rb_enc_from_encoding(rb_nkf_enc_get("UTF-16BE")));
    rb_define_const(mNKF, "UTF32",	rb_enc_from_encoding(rb_nkf_enc_get("UTF-32BE")));

    /* Full version string of nkf */
    rb_define_const(mNKF, "VERSION", rb_str_new2(RUBY_NKF_VERSION));
    /* Version of nkf */
    rb_define_const(mNKF, "NKF_VERSION", rb_str_new2(NKF_VERSION));
    /* Release date of nkf */
    rb_define_const(mNKF, "NKF_RELEASE_DATE", rb_str_new2(NKF_RELEASE_DATE));
}
Esempio n. 2
0
static VALUE
rb_nkf_guess(VALUE obj, VALUE src)
{
    reinit();

    input_ctr = 0;
    StringValue(src);
    input = (unsigned char *)RSTRING_PTR(src);
    i_len = RSTRING_LEN(src);

    guess_f = TRUE;
    kanji_convert( NULL );
    guess_f = FALSE;

    return rb_enc_from_encoding(rb_nkf_enc_get(get_guessed_code()));
}
Esempio n. 3
0
static VALUE
rb_nkf_convert(VALUE obj, VALUE opt, VALUE src)
{
    reinit();
    StringValue(opt);
    nkf_split_options(RSTRING_PTR(opt));
    if (!output_encoding) rb_raise(rb_eArgError, "no output encoding given");

    switch (nkf_enc_to_index(output_encoding)) {
    case UTF_8_BOM:    output_encoding = nkf_enc_from_index(UTF_8); break;
    case UTF_16BE_BOM: output_encoding = nkf_enc_from_index(UTF_16BE); break;
    case UTF_16LE_BOM: output_encoding = nkf_enc_from_index(UTF_16LE); break;
    case UTF_32BE_BOM: output_encoding = nkf_enc_from_index(UTF_32BE); break;
    case UTF_32LE_BOM: output_encoding = nkf_enc_from_index(UTF_32LE); break;
    }
    output_bom_f = FALSE;

    incsize = INCSIZE;

    input_ctr = 0;
    StringValue(src);
    input = (unsigned char *)RSTRING_PTR(src);
    i_len = RSTRING_LEN(src);
    result = rb_str_new(0, i_len*3 + 10);

    output_ctr = 0;
    output     = (unsigned char *)RSTRING_PTR(result);
    o_len      = RSTRING_LEN(result);
    *output    = '\0';

    kanji_convert(NULL);
    rb_str_set_len(result, output_ctr);
    OBJ_INFECT(result, src);

    rb_enc_associate(result, rb_nkf_enc_get(nkf_enc_name(output_encoding)));

    return result;
}