rb_encoding* rb_nkf_enc_get(const char *name) { #if 0 int idx = rb_enc_find_index(name); if (idx < 0) { nkf_encoding *nkf_enc = nkf_enc_find(name); idx = rb_enc_find_index(nkf_enc_name(nkf_enc_to_base_encoding(nkf_enc))); if (idx < 0) { idx = rb_define_dummy_encoding(name); } else { rb_encoding *rb_enc = rb_enc_from_index(idx); idx = rb_enc_replicate(name, rb_enc); } } return rb_enc_from_index(idx); #else struct nkf_osx_name_enc_table { const char* name; CFStringEncoding enc; }; static struct nkf_osx_name_enc_table table[] = { { "BINARY", kCFStringEncodingNonLossyASCII }, { "US-ASCII", kCFStringEncodingASCII }, { "ISO-2022-JP", kCFStringEncodingISO_2022_JP }, { "ISO-2022-JP-1", kCFStringEncodingISO_2022_JP_1 }, { "ISO-2022-JP-2", kCFStringEncodingISO_2022_JP_2 }, { "ISO-2022-JP-3", kCFStringEncodingISO_2022_JP_3 }, { "EUC-JP", kCFStringEncodingEUC_JP }, { "Shift_JIS", kCFStringEncodingShiftJIS }, { "UTF-8", kCFStringEncodingUTF8 }, { "UTF-16", kCFStringEncodingUTF16 }, { "UTF-16BE", kCFStringEncodingUTF16BE }, { "UTF-16LE", kCFStringEncodingUTF16LE }, { "UTF-32", kCFStringEncodingUTF32 }, { "UTF-32BE", kCFStringEncodingUTF32BE }, { "UTF-32LE", kCFStringEncodingUTF32LE }, { NULL, kCFStringEncodingNonLossyASCII } }; struct nkf_osx_name_enc_table* ptr = table; while (ptr->name) { if (strcmp(name, ptr->name) == 0) return &(ptr->enc); ptr++; } return &(ptr->enc); #endif }
rb_encoding* rb_nkf_enc_get(const char *name) { int idx = rb_enc_find_index(name); if (idx < 0) { nkf_encoding *nkf_enc = nkf_enc_find(name); idx = rb_enc_find_index(nkf_enc_name(nkf_enc_to_base_encoding(nkf_enc))); if (idx < 0) { idx = rb_define_dummy_encoding(name); } else { rb_encoding *rb_enc = rb_enc_from_index(idx); idx = rb_enc_replicate(name, rb_enc); } } return rb_enc_from_index(idx); }
/* * Look up the JOHAB encoding, creating it as a dummy encoding if it's not * already defined. */ static rb_encoding * pg_find_or_create_johab(void) { static const char * const aliases[] = { "JOHAB", "Windows-1361", "CP1361" }; int enc_index; size_t i; for (i = 0; i < sizeof(aliases)/sizeof(aliases[0]); ++i) { enc_index = rb_enc_find_index(aliases[i]); if (enc_index > 0) return rb_enc_from_index(enc_index); } enc_index = rb_define_dummy_encoding(aliases[0]); for (i = 1; i < sizeof(aliases)/sizeof(aliases[0]); ++i) { ENC_ALIAS(aliases[i], aliases[0]); } return rb_enc_from_index(enc_index); }
static int str_transcode(int argc, VALUE *argv, VALUE *self) { VALUE dest; VALUE str = *self; long blen, slen; unsigned char *buf, *bp, *sp, *fromp; rb_encoding *from_enc, *to_enc; const char *from_e, *to_e; int from_encidx, to_encidx; VALUE from_encval, to_encval; const rb_transcoder *my_transcoder; rb_transcoding my_transcoding; int final_encoding = 0; VALUE opt; int options = 0; opt = rb_check_convert_type(argv[argc-1], T_HASH, "Hash", "to_hash"); if (!NIL_P(opt)) { VALUE v; argc--; v = rb_hash_aref(opt, sym_invalid); if (NIL_P(v)) { rb_raise(rb_eArgError, "unknown value for invalid: setting"); } else if (v==sym_ignore) { options |= INVALID_IGNORE; } } if (argc < 1 || argc > 2) { rb_raise(rb_eArgError, "wrong number of arguments (%d for 1..2)", argc); } if ((to_encidx = rb_to_encoding_index(to_encval = argv[0])) < 0) { to_enc = 0; to_encidx = 0; to_e = StringValueCStr(to_encval); } else { to_enc = rb_enc_from_index(to_encidx); to_e = rb_enc_name(to_enc); } if (argc==1) { from_encidx = rb_enc_get_index(str); from_enc = rb_enc_from_index(from_encidx); from_e = rb_enc_name(from_enc); } else if ((from_encidx = rb_to_encoding_index(from_encval = argv[1])) < 0) { from_enc = 0; from_e = StringValueCStr(from_encval); } else { from_enc = rb_enc_from_index(from_encidx); from_e = rb_enc_name(from_enc); } if (from_enc && from_enc == to_enc) { return -1; } if (from_enc && to_enc && rb_enc_asciicompat(from_enc) && rb_enc_asciicompat(to_enc)) { if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) { return to_encidx; } } if (encoding_equal(from_e, to_e)) { return -1; } do { /* loop for multistep transcoding */ /* later, maybe use smaller intermediate strings for very long strings */ if (!(my_transcoder = transcode_dispatch(from_e, to_e))) { rb_raise(rb_eArgError, "transcoding not supported (from %s to %s)", from_e, to_e); } my_transcoding.transcoder = my_transcoder; if (my_transcoder->preprocessor) { fromp = sp = (unsigned char *)RSTRING_PTR(str); slen = RSTRING_LEN(str); blen = slen + 30; /* len + margin */ dest = rb_str_tmp_new(blen); bp = (unsigned char *)RSTRING_PTR(dest); my_transcoding.ruby_string_dest = dest; (*my_transcoder->preprocessor)(&fromp, &bp, (sp+slen), (bp+blen), &my_transcoding); if (fromp != sp+slen) { rb_raise(rb_eArgError, "not fully converted, %td bytes left", sp+slen-fromp); } buf = (unsigned char *)RSTRING_PTR(dest); *bp = '\0'; rb_str_set_len(dest, bp - buf); str = dest; } fromp = sp = (unsigned char *)RSTRING_PTR(str); slen = RSTRING_LEN(str); blen = slen + 30; /* len + margin */ dest = rb_str_tmp_new(blen); bp = (unsigned char *)RSTRING_PTR(dest); my_transcoding.ruby_string_dest = dest; my_transcoding.flush_func = str_transcoding_resize; transcode_loop(&fromp, &bp, (sp+slen), (bp+blen), my_transcoder, &my_transcoding, options); if (fromp != sp+slen) { rb_raise(rb_eArgError, "not fully converted, %td bytes left", sp+slen-fromp); } buf = (unsigned char *)RSTRING_PTR(dest); *bp = '\0'; rb_str_set_len(dest, bp - buf); if (my_transcoder->postprocessor) { str = dest; fromp = sp = (unsigned char *)RSTRING_PTR(str); slen = RSTRING_LEN(str); blen = slen + 30; /* len + margin */ dest = rb_str_tmp_new(blen); bp = (unsigned char *)RSTRING_PTR(dest); my_transcoding.ruby_string_dest = dest; (*my_transcoder->postprocessor)(&fromp, &bp, (sp+slen), (bp+blen), &my_transcoding); if (fromp != sp+slen) { rb_raise(rb_eArgError, "not fully converted, %td bytes left", sp+slen-fromp); } buf = (unsigned char *)RSTRING_PTR(dest); *bp = '\0'; rb_str_set_len(dest, bp - buf); } if (encoding_equal(my_transcoder->to_encoding, to_e)) { final_encoding = 1; } else { from_e = my_transcoder->to_encoding; str = dest; } } while (!final_encoding); /* set encoding */ if (!to_enc) { to_encidx = rb_define_dummy_encoding(to_e); } *self = dest; return to_encidx; }