/* Return code page number of the encoding. Cache code page into a hash for performance since finding the code page in Encoding#names is slow. */ static UINT code_page(rb_encoding *enc) { VALUE code_page_value, name_key; VALUE encoding, names_ary = Qundef, name; char *enc_name; struct RString fake_str; ID names; long i; if (!enc) return system_code_page(); enc_name = (char *)rb_enc_name(enc); fake_str.basic.flags = T_STRING|RSTRING_NOEMBED; fake_str.basic.klass = rb_cString; fake_str.as.heap.len = strlen(enc_name); fake_str.as.heap.ptr = enc_name; fake_str.as.heap.aux.capa = fake_str.as.heap.len; name_key = (VALUE)&fake_str; ENCODING_CODERANGE_SET(name_key, rb_usascii_encindex(), ENC_CODERANGE_7BIT); code_page_value = rb_hash_lookup(rb_code_page, name_key); if (code_page_value != Qnil) return (UINT)FIX2INT(code_page_value); name_key = rb_usascii_str_new2(enc_name); encoding = rb_enc_from_encoding(enc); if (!NIL_P(encoding)) { CONST_ID(names, "names"); names_ary = rb_funcall(encoding, names, 0); } /* map US-ASCII and ASCII-8bit as code page 1252 (us-ascii) */ if (enc == rb_usascii_encoding() || enc == rb_ascii8bit_encoding()) { UINT code_page = 1252; rb_hash_aset(rb_code_page, name_key, INT2FIX(code_page)); return code_page; } if (names_ary != Qundef) { for (i = 0; i < RARRAY_LEN(names_ary); i++) { name = RARRAY_PTR(names_ary)[i]; if (strncmp("CP", RSTRING_PTR(name), 2) == 0) { int code_page = atoi(RSTRING_PTR(name) + 2); if (code_page != 0) { rb_hash_aset(rb_code_page, name_key, INT2FIX(code_page)); return (UINT)code_page; } } } } rb_hash_aset(rb_code_page, name_key, INT2FIX(INVALID_CODE_PAGE)); return INVALID_CODE_PAGE; }
/* * call-seq: * enc.inspect -> string * * Returns a string which represents the encoding for programmers. * * Encoding::UTF_8.inspect #=> "#<Encoding:UTF-8>" * Encoding::ISO_2022_JP.inspect #=> "#<Encoding:ISO-2022-JP (dummy)>" */ static VALUE enc_inspect(VALUE self) { VALUE str = rb_sprintf("#<%s:%s%s>", rb_obj_classname(self), rb_enc_name((rb_encoding*)DATA_PTR(self)), (enc_dummy_p(self) ? " (dummy)" : "")); ENCODING_CODERANGE_SET(str, rb_usascii_encindex(), ENC_CODERANGE_7BIT); return str; }
/* Return code page number of the encoding. Cache code page into a hash for performance since finding the code page in Encoding#names is slow. */ static UINT fenix_code_page(rb_encoding *enc) { VALUE code_page_value, name_key; VALUE encoding, names_ary = Qundef, name; char *enc_name; struct RString fake_str; ID names; long i; if (!enc) return system_code_page(); enc_name = (char *)rb_enc_name(enc); fake_str.basic.flags = T_STRING|RSTRING_NOEMBED; fake_str.basic.klass = rb_cString; fake_str.as.heap.len = strlen(enc_name); fake_str.as.heap.ptr = enc_name; fake_str.as.heap.aux.capa = fake_str.as.heap.len; name_key = (VALUE)&fake_str; ENCODING_CODERANGE_SET(name_key, rb_usascii_encindex(), ENC_CODERANGE_7BIT); OBJ_FREEZE(name_key); code_page_value = rb_hash_lookup(rb_code_page, name_key); if (code_page_value != Qnil) { // printf("cached code page: %i\n", FIX2INT(code_page_value)); if (FIX2INT(code_page_value) == -1) { return system_code_page(); } else { return (UINT)FIX2INT(code_page_value); } } name_key = rb_usascii_str_new2(enc_name); encoding = rb_enc_from_encoding(enc); if (!NIL_P(encoding)) { CONST_ID(names, "names"); names_ary = rb_funcall(encoding, names, 0); } if (names_ary != Qundef) { for (i = 0; i < RARRAY_LEN(names_ary); i++) { name = RARRAY_PTR(names_ary)[i]; if (strncmp("CP", RSTRING_PTR(name), 2) == 0) { int code_page = atoi(RSTRING_PTR(name) + 2); rb_hash_aset(rb_code_page, name_key, INT2FIX(code_page)); return (UINT)code_page; } } } rb_hash_aset(rb_code_page, name_key, INT2FIX(-1)); return system_code_page(); }
int rb_locale_encindex(void) { VALUE charmap = rb_locale_charmap(rb_cEncoding); int idx; if (NIL_P(charmap)) idx = rb_usascii_encindex(); else if ((idx = rb_enc_find_index(StringValueCStr(charmap))) < 0) idx = rb_ascii8bit_encindex(); if (rb_enc_registered("locale") < 0) enc_alias_internal("locale", idx); return idx; }
/** * Document-module: MessagePack * * MessagePack is a binary-based efficient object serialization library. * It enables to exchange structured objects between many languages like JSON. * But unlike JSON, it is very fast and small. * * You can install MessagePack with rubygems. * * gem install msgpack * * Simple usage is as follows: * * require 'msgpack' * msg = [1,2,3].to_msgpack #=> "\x93\x01\x02\x03" * MessagePack.unpack(msg) #=> [1,2,3] * * Use Unpacker class for streaming deserialization. * */ void Init_msgpack(void) { mMessagePack = rb_define_module("MessagePack"); rb_define_const(mMessagePack, "VERSION", rb_str_new2(MESSAGEPACK_VERSION)); #ifdef COMPAT_HAVE_ENCODING s_enc_ascii8bit = rb_ascii8bit_encindex(); s_enc_utf8 = rb_utf8_encindex(); s_enc_usascii = rb_usascii_encindex(); s_enc_utf8_value = rb_enc_from_encoding(rb_utf8_encoding()); #endif Init_msgpack_unpack(mMessagePack); Init_msgpack_pack(mMessagePack); }
static VALUE transcode_io(VALUE src, int * parser_encoding) { VALUE io_external_encoding; int io_external_enc_index; io_external_encoding = rb_funcall(src, rb_intern("external_encoding"), 0); /* if no encoding is returned, assume ascii8bit. */ if (NIL_P(io_external_encoding)) { io_external_enc_index = rb_ascii8bit_encindex(); } else { io_external_enc_index = rb_to_encoding_index(io_external_encoding); } /* Treat US-ASCII as utf_8 */ if (io_external_enc_index == rb_usascii_encindex()) { *parser_encoding = YAML_UTF8_ENCODING; return src; } if (io_external_enc_index == rb_utf8_encindex()) { *parser_encoding = YAML_UTF8_ENCODING; return src; } if (io_external_enc_index == rb_enc_find_index("UTF-16LE")) { *parser_encoding = YAML_UTF16LE_ENCODING; return src; } if (io_external_enc_index == rb_enc_find_index("UTF-16BE")) { *parser_encoding = YAML_UTF16BE_ENCODING; return src; } /* Just guess on ASCII-8BIT */ if (io_external_enc_index == rb_ascii8bit_encindex()) { *parser_encoding = YAML_ANY_ENCODING; return src; } /* If the external encoding is something we don't know how to handle, * fall back to YAML_ANY_ENCODING. */ *parser_encoding = YAML_ANY_ENCODING; return src; }
static VALUE transcode_io(VALUE src, int * parser_encoding) { VALUE io_external_encoding; int io_external_enc_index; io_external_encoding = rb_funcall(src, rb_intern("external_encoding"), 0); /* if no encoding is returned, assume ascii8bit. */ if (NIL_P(io_external_encoding)) { io_external_enc_index = rb_ascii8bit_encindex(); } else { io_external_enc_index = rb_to_encoding_index(io_external_encoding); } /* Treat US-ASCII as utf_8 */ if (io_external_enc_index == rb_usascii_encindex()) { *parser_encoding = YAML_UTF8_ENCODING; return src; } if (io_external_enc_index == rb_utf8_encindex()) { *parser_encoding = YAML_UTF8_ENCODING; return src; } if (io_external_enc_index == rb_enc_find_index("UTF-16LE")) { *parser_encoding = YAML_UTF16LE_ENCODING; return src; } if (io_external_enc_index == rb_enc_find_index("UTF-16BE")) { *parser_encoding = YAML_UTF16BE_ENCODING; return src; } /* Just guess on ASCII-8BIT */ if (io_external_enc_index == rb_ascii8bit_encindex()) { *parser_encoding = YAML_ANY_ENCODING; return src; } rb_raise(rb_eArgError, "YAML file must be UTF-8, UTF-16LE, or UTF-16BE, not %s", rb_enc_name(rb_enc_from_index(io_external_enc_index))); return Qnil; }
/* Return code page number of the encoding. Cache code page into a hash for performance since finding the code page in Encoding#names is slow. */ static UINT code_page(rb_encoding *enc) { int enc_idx; if (!enc) return system_code_page(); enc_idx = rb_enc_to_index(enc); /* map US-ASCII and ASCII-8bit as code page 1252 (us-ascii) */ if (enc_idx == rb_usascii_encindex() || enc_idx == rb_ascii8bit_encindex()) { return 1252; } if (enc_idx == rb_utf8_encindex()) { return CP_UTF8; } if (0 <= enc_idx && (unsigned int)enc_idx < rb_code_page.count) return rb_code_page.table[enc_idx]; return INVALID_CODE_PAGE; }
static VALUE encoding_spec_rb_usascii_encindex(VALUE self) { return INT2NUM(rb_usascii_encindex()); }