void Init_nkf() { VALUE mNKF = rb_define_module("NKF"); rb_define_module_function(mNKF, "nkf", rb_nkf_convert, 2); rb_define_module_function(mNKF, "guess", rb_nkf_guess, 1); rb_define_alias(rb_singleton_class(mNKF), "guess", "guess"); rb_define_const(mNKF, "AUTO", Qnil); rb_define_const(mNKF, "NOCONV", Qnil); rb_define_const(mNKF, "UNKNOWN", Qnil); rb_define_const(mNKF, "BINARY", rb_enc_from_encoding(rb_nkf_enc_get("BINARY"))); rb_define_const(mNKF, "ASCII", rb_enc_from_encoding(rb_nkf_enc_get("US-ASCII"))); rb_define_const(mNKF, "JIS", rb_enc_from_encoding(rb_nkf_enc_get("ISO-2022-JP"))); rb_define_const(mNKF, "EUC", rb_enc_from_encoding(rb_nkf_enc_get("EUC-JP"))); rb_define_const(mNKF, "SJIS", rb_enc_from_encoding(rb_nkf_enc_get("Shift_JIS"))); // rb_define_const(mNKF, "UTF8", rb_enc_from_encoding(rb_utf8_encoding())); rb_define_const(mNKF, "UTF8", rb_enc_from_encoding(rb_nkf_enc_get("UTF-8"))); rb_define_const(mNKF, "UTF16", rb_enc_from_encoding(rb_nkf_enc_get("UTF-16BE"))); rb_define_const(mNKF, "UTF32", rb_enc_from_encoding(rb_nkf_enc_get("UTF-32BE"))); /* Full version string of nkf */ rb_define_const(mNKF, "VERSION", rb_str_new2(RUBY_NKF_VERSION)); /* Version of nkf */ rb_define_const(mNKF, "NKF_VERSION", rb_str_new2(NKF_VERSION)); /* Release date of nkf */ rb_define_const(mNKF, "NKF_RELEASE_DATE", rb_str_new2(NKF_RELEASE_DATE)); }
static VALUE rb_nkf_guess(VALUE obj, VALUE src) { reinit(); input_ctr = 0; StringValue(src); input = (unsigned char *)RSTRING_PTR(src); i_len = RSTRING_LEN(src); guess_f = TRUE; kanji_convert( NULL ); guess_f = FALSE; return rb_enc_from_encoding(rb_nkf_enc_get(get_guessed_code())); }
static VALUE rb_nkf_convert(VALUE obj, VALUE opt, VALUE src) { reinit(); StringValue(opt); nkf_split_options(RSTRING_PTR(opt)); if (!output_encoding) rb_raise(rb_eArgError, "no output encoding given"); switch (nkf_enc_to_index(output_encoding)) { case UTF_8_BOM: output_encoding = nkf_enc_from_index(UTF_8); break; case UTF_16BE_BOM: output_encoding = nkf_enc_from_index(UTF_16BE); break; case UTF_16LE_BOM: output_encoding = nkf_enc_from_index(UTF_16LE); break; case UTF_32BE_BOM: output_encoding = nkf_enc_from_index(UTF_32BE); break; case UTF_32LE_BOM: output_encoding = nkf_enc_from_index(UTF_32LE); break; } output_bom_f = FALSE; incsize = INCSIZE; input_ctr = 0; StringValue(src); input = (unsigned char *)RSTRING_PTR(src); i_len = RSTRING_LEN(src); result = rb_str_new(0, i_len*3 + 10); output_ctr = 0; output = (unsigned char *)RSTRING_PTR(result); o_len = RSTRING_LEN(result); *output = '\0'; kanji_convert(NULL); rb_str_set_len(result, output_ctr); OBJ_INFECT(result, src); rb_enc_associate(result, rb_nkf_enc_get(nkf_enc_name(output_encoding))); return result; }