int rb_comparator_func(void * ctx, int a_len, const void * a, int b_len, const void * b) { VALUE comparator; VALUE a_str; VALUE b_str; VALUE comparison; #ifdef HAVE_RUBY_ENCODING_H rb_encoding * internal_encoding; internal_encoding = rb_default_internal_encoding(); #endif comparator = (VALUE)ctx; a_str = rb_str_new((const char *)a, a_len); b_str = rb_str_new((const char *)b, b_len); #ifdef HAVE_RUBY_ENCODING_H rb_enc_associate_index(a_str, rb_utf8_encindex()); rb_enc_associate_index(b_str, rb_utf8_encindex()); if(internal_encoding) { a_str = rb_str_export_to_enc(a_str, internal_encoding); b_str = rb_str_export_to_enc(b_str, internal_encoding); } #endif comparison = rb_funcall(comparator, rb_intern("compare"), 2, a_str, b_str); return NUM2INT(comparison); }
static VALUE stringForceUTF8(VALUE arg) { if (rb_type(arg) == RUBY_T_STRING && ENCODING_IS_ASCII8BIT(arg)) rb_enc_associate_index(arg, rb_utf8_encindex()); return arg; }
static VALUE stringForceUTF8(VALUE arg) { if (RB_TYPE_P(arg, RUBY_T_STRING) && ENCODING_IS_ASCII8BIT(arg)) rb_enc_associate_index(arg, rb_utf8_encindex()); return arg; }
static VALUE transcode_string(VALUE src, int * parser_encoding) { int utf8 = rb_utf8_encindex(); int utf16le = rb_enc_find_index("UTF16_LE"); int utf16be = rb_enc_find_index("UTF16_BE"); int source_encoding = rb_enc_get_index(src); if (source_encoding == utf8) { *parser_encoding = YAML_UTF8_ENCODING; return src; } if (source_encoding == utf16le) { *parser_encoding = YAML_UTF16LE_ENCODING; return src; } if (source_encoding == utf16be) { *parser_encoding = YAML_UTF16BE_ENCODING; return src; } src = rb_str_export_to_enc(src, rb_utf8_encoding()); RB_GC_GUARD(src); *parser_encoding = YAML_UTF8_ENCODING; return src; }
/** * Document-module: MessagePack * * MessagePack is a binary-based efficient object serialization library. * It enables to exchange structured objects between many languages like JSON. * But unlike JSON, it is very fast and small. * * You can install MessagePack with rubygems. * * gem install msgpack * * Simple usage is as follows: * * require 'msgpack' * msg = [1,2,3].to_msgpack #=> "\x93\x01\x02\x03" * MessagePack.unpack(msg) #=> [1,2,3] * * Use Unpacker class for streaming deserialization. * */ void Init_msgpack(void) { mMessagePack = rb_define_module("MessagePack"); rb_define_const(mMessagePack, "VERSION", rb_str_new2(MESSAGEPACK_VERSION)); #ifdef COMPAT_HAVE_ENCODING s_enc_ascii8bit = rb_ascii8bit_encindex(); s_enc_utf8 = rb_utf8_encindex(); s_enc_usascii = rb_usascii_encindex(); s_enc_utf8_value = rb_enc_from_encoding(rb_utf8_encoding()); #endif Init_msgpack_unpack(mMessagePack); Init_msgpack_pack(mMessagePack); }
static VALUE transcode_io(VALUE src, int * parser_encoding) { VALUE io_external_encoding; int io_external_enc_index; io_external_encoding = rb_funcall(src, rb_intern("external_encoding"), 0); /* if no encoding is returned, assume ascii8bit. */ if (NIL_P(io_external_encoding)) { io_external_enc_index = rb_ascii8bit_encindex(); } else { io_external_enc_index = rb_to_encoding_index(io_external_encoding); } /* Treat US-ASCII as utf_8 */ if (io_external_enc_index == rb_usascii_encindex()) { *parser_encoding = YAML_UTF8_ENCODING; return src; } if (io_external_enc_index == rb_utf8_encindex()) { *parser_encoding = YAML_UTF8_ENCODING; return src; } if (io_external_enc_index == rb_enc_find_index("UTF-16LE")) { *parser_encoding = YAML_UTF16LE_ENCODING; return src; } if (io_external_enc_index == rb_enc_find_index("UTF-16BE")) { *parser_encoding = YAML_UTF16BE_ENCODING; return src; } /* Just guess on ASCII-8BIT */ if (io_external_enc_index == rb_ascii8bit_encindex()) { *parser_encoding = YAML_ANY_ENCODING; return src; } /* If the external encoding is something we don't know how to handle, * fall back to YAML_ANY_ENCODING. */ *parser_encoding = YAML_ANY_ENCODING; return src; }
static VALUE transcode_io(VALUE src, int * parser_encoding) { VALUE io_external_encoding; int io_external_enc_index; io_external_encoding = rb_funcall(src, rb_intern("external_encoding"), 0); /* if no encoding is returned, assume ascii8bit. */ if (NIL_P(io_external_encoding)) { io_external_enc_index = rb_ascii8bit_encindex(); } else { io_external_enc_index = rb_to_encoding_index(io_external_encoding); } /* Treat US-ASCII as utf_8 */ if (io_external_enc_index == rb_usascii_encindex()) { *parser_encoding = YAML_UTF8_ENCODING; return src; } if (io_external_enc_index == rb_utf8_encindex()) { *parser_encoding = YAML_UTF8_ENCODING; return src; } if (io_external_enc_index == rb_enc_find_index("UTF-16LE")) { *parser_encoding = YAML_UTF16LE_ENCODING; return src; } if (io_external_enc_index == rb_enc_find_index("UTF-16BE")) { *parser_encoding = YAML_UTF16BE_ENCODING; return src; } /* Just guess on ASCII-8BIT */ if (io_external_enc_index == rb_ascii8bit_encindex()) { *parser_encoding = YAML_ANY_ENCODING; return src; } rb_raise(rb_eArgError, "YAML file must be UTF-8, UTF-16LE, or UTF-16BE, not %s", rb_enc_name(rb_enc_from_index(io_external_enc_index))); return Qnil; }
static int rb_filesystem_encindex(void) { int idx; #if defined NO_LOCALE_CHARMAP idx = rb_enc_to_index(rb_default_external_encoding()); #elif defined _WIN32 || defined __CYGWIN__ char cp[sizeof(int) * 8 / 3 + 4]; snprintf(cp, sizeof cp, "CP%d", AreFileApisANSI() ? GetACP() : GetOEMCP()); idx = rb_enc_find_index(cp); if (idx < 0) idx = rb_ascii8bit_encindex(); #elif defined __APPLE__ idx = rb_utf8_encindex(); #else idx = rb_locale_encindex(); #endif if (rb_enc_registered("filesystem") < 0) enc_alias_internal("filesystem", idx); return idx; }
/* Return code page number of the encoding. Cache code page into a hash for performance since finding the code page in Encoding#names is slow. */ static UINT code_page(rb_encoding *enc) { int enc_idx; if (!enc) return system_code_page(); enc_idx = rb_enc_to_index(enc); /* map US-ASCII and ASCII-8bit as code page 1252 (us-ascii) */ if (enc_idx == rb_usascii_encindex() || enc_idx == rb_ascii8bit_encindex()) { return 1252; } if (enc_idx == rb_utf8_encindex()) { return CP_UTF8; } if (0 <= enc_idx && (unsigned int)enc_idx < rb_code_page.count) return rb_code_page.table[enc_idx]; return INVALID_CODE_PAGE; }
static VALUE encoding_spec_rb_utf8_encindex(VALUE self) { return INT2NUM(rb_utf8_encindex()); }
static VALUE step(VALUE self) { sqlite3StmtRubyPtr ctx; sqlite3_stmt *stmt; int value, length; VALUE list; rb_encoding * internal_encoding; Data_Get_Struct(self, sqlite3StmtRuby, ctx); REQUIRE_OPEN_STMT(ctx); if(ctx->done_p) return Qnil; { VALUE db = rb_iv_get(self, "@connection"); rb_funcall(db, rb_intern("encoding"), 0); internal_encoding = rb_default_internal_encoding(); } stmt = ctx->st; value = sqlite3_step(stmt); length = sqlite3_column_count(stmt); list = rb_ary_new2((long)length); switch(value) { case SQLITE_ROW: { int i; for(i = 0; i < length; i++) { switch(sqlite3_column_type(stmt, i)) { case SQLITE_INTEGER: rb_ary_push(list, LL2NUM(sqlite3_column_int64(stmt, i))); break; case SQLITE_FLOAT: rb_ary_push(list, rb_float_new(sqlite3_column_double(stmt, i))); break; case SQLITE_TEXT: { VALUE str = rb_tainted_str_new( (const char *)sqlite3_column_text(stmt, i), (long)sqlite3_column_bytes(stmt, i) ); rb_enc_associate_index(str, rb_utf8_encindex()); if(internal_encoding) str = rb_str_export_to_enc(str, internal_encoding); rb_ary_push(list, str); } break; case SQLITE_BLOB: { VALUE str = rb_tainted_str_new( (const char *)sqlite3_column_blob(stmt, i), (long)sqlite3_column_bytes(stmt, i) ); rb_ary_push(list, str); } break; case SQLITE_NULL: rb_ary_push(list, Qnil); break; default: rb_raise(rb_eRuntimeError, "bad type"); } } } break; case SQLITE_DONE: ctx->done_p = 1; return Qnil; break; default: sqlite3_reset(stmt); ctx->done_p = 0; CHECK(sqlite3_db_handle(ctx->st), value); } return list; }
VALUE require_compiled(VALUE fname, VALUE* result, int bLoad) { VALUE path; char* szName1 = 0; VALUE retval = Qtrue; if (TYPE(fname) != T_STRING) rb_raise(rb_eLoadError, "can not load non-string"); szName1 = RSTRING_PTR(fname); if ( String_endsWith(szName1,".rb") ) { rb_str_chop_bang(fname); rb_str_chop_bang(fname); rb_str_chop_bang(fname); } //rb_funcall(fname, rb_intern("sub!"), 2, rb_str_new2(".rb"), rb_str_new2("") ); szName1 = RSTRING_PTR(fname); if ( strcmp("strscan",szName1)==0 || strcmp("enumerator",szName1)==0 || strcmp("stringio",szName1)==0 || strcmp("socket",szName1)==0 ) return Qtrue; RHO_LOCK(require_lock); if ( !bLoad && isAlreadyLoaded(fname) == Qtrue ) goto RCompExit; path = find_file(fname); if ( path != 0 ) { VALUE seq; RAWLOG_INFO1("require_compiled: %s", szName1); //optimize require //rb_ary_push(GET_VM()->loaded_features, path); rb_ary_push(GET_VM()->loaded_features, fname); #ifdef RHODES_EMULATOR if ( strstr( RSTRING_PTR(path), ".rb") == 0 ) rb_str_cat(path,".rb",3); GET_VM()->src_encoding_index = rb_utf8_encindex(); rb_load(path, 0); if( rho_simconf_getBool("reload_app_changes") ) { if ( strncmp( RSTRING_PTR(path), rho_native_rhopath(), strlen(rho_native_rhopath()) ) == 0 ) rb_ary_delete(GET_VM()->loaded_features, fname); } #else //rb_gc_disable(); seq = loadISeqFromFile(path); //*result = rb_funcall(seq, rb_intern("eval"), 0 ); *result = rb_iseq_eval(seq); //rb_gc_enable(); #endif goto RCompExit; } RAWLOG_ERROR1("require_compiled: error: can not find %s", RSTRING_PTR(fname)); retval = Qnil; RCompExit: RHO_UNLOCK(require_lock); return retval; }
/* * call-seq: * parser.parse(yaml) * * Parse the YAML document contained in +yaml+. Events will be called on * the handler set on the parser instance. * * See Psych::Parser and Psych::Parser#handler */ static VALUE parse(int argc, VALUE *argv, VALUE self) { VALUE yaml, path; yaml_parser_t * parser; yaml_event_t event; int done = 0; int tainted = 0; #ifdef HAVE_RUBY_ENCODING_H int encoding = rb_utf8_encindex(); rb_encoding * internal_enc = rb_default_internal_encoding(); #endif VALUE handler = rb_iv_get(self, "@handler"); if (rb_scan_args(argc, argv, "11", &yaml, &path) == 1) { if(rb_respond_to(yaml, id_path)) path = rb_funcall(yaml, id_path, 0); else path = rb_str_new2("<unknown>"); } Data_Get_Struct(self, yaml_parser_t, parser); if (OBJ_TAINTED(yaml)) tainted = 1; if(rb_respond_to(yaml, id_read)) { yaml_parser_set_input(parser, io_reader, (void *)yaml); if (RTEST(rb_obj_is_kind_of(yaml, rb_cIO))) tainted = 1; } else { StringValue(yaml); yaml_parser_set_input_string( parser, (const unsigned char *)RSTRING_PTR(yaml), (size_t)RSTRING_LEN(yaml) ); } while(!done) { if(!yaml_parser_parse(parser, &event)) { VALUE exception; exception = make_exception(parser, path); yaml_parser_delete(parser); yaml_parser_initialize(parser); rb_exc_raise(exception); } switch(event.type) { case YAML_STREAM_START_EVENT: rb_funcall(handler, id_start_stream, 1, INT2NUM((long)event.data.stream_start.encoding) ); break; case YAML_DOCUMENT_START_EVENT: { /* Get a list of tag directives (if any) */ VALUE tag_directives = rb_ary_new(); /* Grab the document version */ VALUE version = event.data.document_start.version_directive ? rb_ary_new3( (long)2, INT2NUM((long)event.data.document_start.version_directive->major), INT2NUM((long)event.data.document_start.version_directive->minor) ) : rb_ary_new(); if(event.data.document_start.tag_directives.start) { yaml_tag_directive_t *start = event.data.document_start.tag_directives.start; yaml_tag_directive_t *end = event.data.document_start.tag_directives.end; for(; start != end; start++) { VALUE handle = Qnil; VALUE prefix = Qnil; if(start->handle) { handle = rb_str_new2((const char *)start->handle); if (tainted) OBJ_TAINT(handle); #ifdef HAVE_RUBY_ENCODING_H PSYCH_TRANSCODE(handle, encoding, internal_enc); #endif } if(start->prefix) { prefix = rb_str_new2((const char *)start->prefix); if (tainted) OBJ_TAINT(prefix); #ifdef HAVE_RUBY_ENCODING_H PSYCH_TRANSCODE(prefix, encoding, internal_enc); #endif } rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix)); } } rb_funcall(handler, id_start_document, 3, version, tag_directives, event.data.document_start.implicit == 1 ? Qtrue : Qfalse ); } break; case YAML_DOCUMENT_END_EVENT: rb_funcall(handler, id_end_document, 1, event.data.document_end.implicit == 1 ? Qtrue : Qfalse ); break; case YAML_ALIAS_EVENT: { VALUE alias = Qnil; if(event.data.alias.anchor) { alias = rb_str_new2((const char *)event.data.alias.anchor); if (tainted) OBJ_TAINT(alias); #ifdef HAVE_RUBY_ENCODING_H PSYCH_TRANSCODE(alias, encoding, internal_enc); #endif } rb_funcall(handler, id_alias, 1, alias); } break; case YAML_SCALAR_EVENT: { VALUE anchor = Qnil; VALUE tag = Qnil; VALUE plain_implicit, quoted_implicit, style; VALUE val = rb_str_new( (const char *)event.data.scalar.value, (long)event.data.scalar.length ); if (tainted) OBJ_TAINT(val); #ifdef HAVE_RUBY_ENCODING_H PSYCH_TRANSCODE(val, encoding, internal_enc); #endif if(event.data.scalar.anchor) { anchor = rb_str_new2((const char *)event.data.scalar.anchor); if (tainted) OBJ_TAINT(anchor); #ifdef HAVE_RUBY_ENCODING_H PSYCH_TRANSCODE(anchor, encoding, internal_enc); #endif } if(event.data.scalar.tag) { tag = rb_str_new2((const char *)event.data.scalar.tag); if (tainted) OBJ_TAINT(tag); #ifdef HAVE_RUBY_ENCODING_H PSYCH_TRANSCODE(tag, encoding, internal_enc); #endif } plain_implicit = event.data.scalar.plain_implicit == 0 ? Qfalse : Qtrue; quoted_implicit = event.data.scalar.quoted_implicit == 0 ? Qfalse : Qtrue; style = INT2NUM((long)event.data.scalar.style); rb_funcall(handler, id_scalar, 6, val, anchor, tag, plain_implicit, quoted_implicit, style); } break; case YAML_SEQUENCE_START_EVENT: { VALUE anchor = Qnil; VALUE tag = Qnil; VALUE implicit, style; if(event.data.sequence_start.anchor) { anchor = rb_str_new2((const char *)event.data.sequence_start.anchor); if (tainted) OBJ_TAINT(anchor); #ifdef HAVE_RUBY_ENCODING_H PSYCH_TRANSCODE(anchor, encoding, internal_enc); #endif } tag = Qnil; if(event.data.sequence_start.tag) { tag = rb_str_new2((const char *)event.data.sequence_start.tag); if (tainted) OBJ_TAINT(tag); #ifdef HAVE_RUBY_ENCODING_H PSYCH_TRANSCODE(tag, encoding, internal_enc); #endif } implicit = event.data.sequence_start.implicit == 0 ? Qfalse : Qtrue; style = INT2NUM((long)event.data.sequence_start.style); rb_funcall(handler, id_start_sequence, 4, anchor, tag, implicit, style); } break; case YAML_SEQUENCE_END_EVENT: rb_funcall(handler, id_end_sequence, 0); break; case YAML_MAPPING_START_EVENT: { VALUE anchor = Qnil; VALUE tag = Qnil; VALUE implicit, style; if(event.data.mapping_start.anchor) { anchor = rb_str_new2((const char *)event.data.mapping_start.anchor); if (tainted) OBJ_TAINT(anchor); #ifdef HAVE_RUBY_ENCODING_H PSYCH_TRANSCODE(anchor, encoding, internal_enc); #endif } if(event.data.mapping_start.tag) { tag = rb_str_new2((const char *)event.data.mapping_start.tag); if (tainted) OBJ_TAINT(tag); #ifdef HAVE_RUBY_ENCODING_H PSYCH_TRANSCODE(tag, encoding, internal_enc); #endif } implicit = event.data.mapping_start.implicit == 0 ? Qfalse : Qtrue; style = INT2NUM((long)event.data.mapping_start.style); rb_funcall(handler, id_start_mapping, 4, anchor, tag, implicit, style); } break; case YAML_MAPPING_END_EVENT: rb_funcall(handler, id_end_mapping, 0); break; case YAML_NO_EVENT: rb_funcall(handler, id_empty, 0); break; case YAML_STREAM_END_EVENT: rb_funcall(handler, id_end_stream, 0); done = 1; break; } yaml_event_delete(&event); } return self; }
/* * call-seq: * parser.parse(yaml) * * Parse the YAML document contained in +yaml+. Events will be called on * the handler set on the parser instance. * * See Psych::Parser and Psych::Parser#handler */ static VALUE parse(int argc, VALUE *argv, VALUE self) { VALUE yaml, path; yaml_parser_t * parser; yaml_event_t event; int done = 0; int tainted = 0; int state = 0; int parser_encoding = YAML_ANY_ENCODING; int encoding = rb_utf8_encindex(); rb_encoding * internal_enc = rb_default_internal_encoding(); VALUE handler = rb_iv_get(self, "@handler"); if (rb_scan_args(argc, argv, "11", &yaml, &path) == 1) { if(rb_respond_to(yaml, id_path)) path = rb_funcall(yaml, id_path, 0); else path = rb_str_new2("<unknown>"); } TypedData_Get_Struct(self, yaml_parser_t, &psych_parser_type, parser); yaml_parser_delete(parser); yaml_parser_initialize(parser); if (OBJ_TAINTED(yaml)) tainted = 1; if (rb_respond_to(yaml, id_read)) { yaml = transcode_io(yaml, &parser_encoding); yaml_parser_set_encoding(parser, parser_encoding); yaml_parser_set_input(parser, io_reader, (void *)yaml); if (RTEST(rb_obj_is_kind_of(yaml, rb_cIO))) tainted = 1; } else { StringValue(yaml); yaml = transcode_string(yaml, &parser_encoding); yaml_parser_set_encoding(parser, parser_encoding); yaml_parser_set_input_string( parser, (const unsigned char *)RSTRING_PTR(yaml), (size_t)RSTRING_LEN(yaml) ); } while(!done) { VALUE event_args[5]; VALUE start_line, start_column, end_line, end_column; if(!yaml_parser_parse(parser, &event)) { VALUE exception; exception = make_exception(parser, path); yaml_parser_delete(parser); yaml_parser_initialize(parser); rb_exc_raise(exception); } start_line = INT2NUM((long)event.start_mark.line); start_column = INT2NUM((long)event.start_mark.column); end_line = INT2NUM((long)event.end_mark.line); end_column = INT2NUM((long)event.end_mark.column); event_args[0] = handler; event_args[1] = start_line; event_args[2] = start_column; event_args[3] = end_line; event_args[4] = end_column; rb_protect(protected_event_location, (VALUE)event_args, &state); switch(event.type) { case YAML_STREAM_START_EVENT: { VALUE args[2]; args[0] = handler; args[1] = INT2NUM((long)event.data.stream_start.encoding); rb_protect(protected_start_stream, (VALUE)args, &state); } break; case YAML_DOCUMENT_START_EVENT: { VALUE args[4]; /* Get a list of tag directives (if any) */ VALUE tag_directives = rb_ary_new(); /* Grab the document version */ VALUE version = event.data.document_start.version_directive ? rb_ary_new3( (long)2, INT2NUM((long)event.data.document_start.version_directive->major), INT2NUM((long)event.data.document_start.version_directive->minor) ) : rb_ary_new(); if(event.data.document_start.tag_directives.start) { yaml_tag_directive_t *start = event.data.document_start.tag_directives.start; yaml_tag_directive_t *end = event.data.document_start.tag_directives.end; for(; start != end; start++) { VALUE handle = Qnil; VALUE prefix = Qnil; if(start->handle) { handle = rb_str_new2((const char *)start->handle); if (tainted) OBJ_TAINT(handle); PSYCH_TRANSCODE(handle, encoding, internal_enc); } if(start->prefix) { prefix = rb_str_new2((const char *)start->prefix); if (tainted) OBJ_TAINT(prefix); PSYCH_TRANSCODE(prefix, encoding, internal_enc); } rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix)); } } args[0] = handler; args[1] = version; args[2] = tag_directives; args[3] = event.data.document_start.implicit == 1 ? Qtrue : Qfalse; rb_protect(protected_start_document, (VALUE)args, &state); } break; case YAML_DOCUMENT_END_EVENT: { VALUE args[2]; args[0] = handler; args[1] = event.data.document_end.implicit == 1 ? Qtrue : Qfalse; rb_protect(protected_end_document, (VALUE)args, &state); } break; case YAML_ALIAS_EVENT: { VALUE args[2]; VALUE alias = Qnil; if(event.data.alias.anchor) { alias = rb_str_new2((const char *)event.data.alias.anchor); if (tainted) OBJ_TAINT(alias); PSYCH_TRANSCODE(alias, encoding, internal_enc); } args[0] = handler; args[1] = alias; rb_protect(protected_alias, (VALUE)args, &state); } break; case YAML_SCALAR_EVENT: { VALUE args[7]; VALUE anchor = Qnil; VALUE tag = Qnil; VALUE plain_implicit, quoted_implicit, style; VALUE val = rb_str_new( (const char *)event.data.scalar.value, (long)event.data.scalar.length ); if (tainted) OBJ_TAINT(val); PSYCH_TRANSCODE(val, encoding, internal_enc); if(event.data.scalar.anchor) { anchor = rb_str_new2((const char *)event.data.scalar.anchor); if (tainted) OBJ_TAINT(anchor); PSYCH_TRANSCODE(anchor, encoding, internal_enc); } if(event.data.scalar.tag) { tag = rb_str_new2((const char *)event.data.scalar.tag); if (tainted) OBJ_TAINT(tag); PSYCH_TRANSCODE(tag, encoding, internal_enc); } plain_implicit = event.data.scalar.plain_implicit == 0 ? Qfalse : Qtrue; quoted_implicit = event.data.scalar.quoted_implicit == 0 ? Qfalse : Qtrue; style = INT2NUM((long)event.data.scalar.style); args[0] = handler; args[1] = val; args[2] = anchor; args[3] = tag; args[4] = plain_implicit; args[5] = quoted_implicit; args[6] = style; rb_protect(protected_scalar, (VALUE)args, &state); } break; case YAML_SEQUENCE_START_EVENT: { VALUE args[5]; VALUE anchor = Qnil; VALUE tag = Qnil; VALUE implicit, style; if(event.data.sequence_start.anchor) { anchor = rb_str_new2((const char *)event.data.sequence_start.anchor); if (tainted) OBJ_TAINT(anchor); PSYCH_TRANSCODE(anchor, encoding, internal_enc); } tag = Qnil; if(event.data.sequence_start.tag) { tag = rb_str_new2((const char *)event.data.sequence_start.tag); if (tainted) OBJ_TAINT(tag); PSYCH_TRANSCODE(tag, encoding, internal_enc); } implicit = event.data.sequence_start.implicit == 0 ? Qfalse : Qtrue; style = INT2NUM((long)event.data.sequence_start.style); args[0] = handler; args[1] = anchor; args[2] = tag; args[3] = implicit; args[4] = style; rb_protect(protected_start_sequence, (VALUE)args, &state); } break; case YAML_SEQUENCE_END_EVENT: rb_protect(protected_end_sequence, handler, &state); break; case YAML_MAPPING_START_EVENT: { VALUE args[5]; VALUE anchor = Qnil; VALUE tag = Qnil; VALUE implicit, style; if(event.data.mapping_start.anchor) { anchor = rb_str_new2((const char *)event.data.mapping_start.anchor); if (tainted) OBJ_TAINT(anchor); PSYCH_TRANSCODE(anchor, encoding, internal_enc); } if(event.data.mapping_start.tag) { tag = rb_str_new2((const char *)event.data.mapping_start.tag); if (tainted) OBJ_TAINT(tag); PSYCH_TRANSCODE(tag, encoding, internal_enc); } implicit = event.data.mapping_start.implicit == 0 ? Qfalse : Qtrue; style = INT2NUM((long)event.data.mapping_start.style); args[0] = handler; args[1] = anchor; args[2] = tag; args[3] = implicit; args[4] = style; rb_protect(protected_start_mapping, (VALUE)args, &state); } break; case YAML_MAPPING_END_EVENT: rb_protect(protected_end_mapping, handler, &state); break; case YAML_NO_EVENT: rb_protect(protected_empty, handler, &state); break; case YAML_STREAM_END_EVENT: rb_protect(protected_end_stream, handler, &state); done = 1; break; } yaml_event_delete(&event); if (state) rb_jump_tag(state); } return self; }
/** * @param str the string to be scrubbed * @param repl the replacement character * @return If given string is invalid, returns a new string. Otherwise, returns Qnil. */ static VALUE str_scrub0(int argc, VALUE *argv, VALUE str) { int cr = ENC_CODERANGE(str); rb_encoding *enc; int encidx; VALUE repl; if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID) return Qnil; enc = STR_ENC_GET(str); rb_scan_args(argc, argv, "01", &repl); if (argc != 0) { repl = str_compat_and_valid(repl, enc); } if (rb_enc_dummy_p(enc)) { return Qnil; } encidx = rb_enc_to_index(enc); #define DEFAULT_REPLACE_CHAR(str) do { \ static const char replace[sizeof(str)-1] = str; \ rep = replace; replen = (int)sizeof(replace); \ } while (0) if (rb_enc_asciicompat(enc)) { const char *p = RSTRING_PTR(str); const char *e = RSTRING_END(str); const char *p1 = p; const char *rep; long replen; int rep7bit_p; VALUE buf = Qnil; if (rb_block_given_p()) { rep = NULL; replen = 0; rep7bit_p = FALSE; } else if (!NIL_P(repl)) { rep = RSTRING_PTR(repl); replen = RSTRING_LEN(repl); rep7bit_p = (ENC_CODERANGE(repl) == ENC_CODERANGE_7BIT); } else if (encidx == rb_utf8_encindex()) { DEFAULT_REPLACE_CHAR("\xEF\xBF\xBD"); rep7bit_p = FALSE; } else { DEFAULT_REPLACE_CHAR("?"); rep7bit_p = TRUE; } cr = ENC_CODERANGE_7BIT; p = search_nonascii(p, e); if (!p) { p = e; } while (p < e) { int ret = rb_enc_precise_mbclen(p, e, enc); if (MBCLEN_NEEDMORE_P(ret)) { break; } else if (MBCLEN_CHARFOUND_P(ret)) { cr = ENC_CODERANGE_VALID; p += MBCLEN_CHARFOUND_LEN(ret); } else if (MBCLEN_INVALID_P(ret)) { /* * p1~p: valid ascii/multibyte chars * p ~e: invalid bytes + unknown bytes */ long clen = rb_enc_mbmaxlen(enc); if (NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str)); if (p > p1) { rb_str_buf_cat(buf, p1, p - p1); } if (e - p < clen) clen = e - p; if (clen <= 2) { clen = 1; } else { const char *q = p; clen--; for (; clen > 1; clen--) { ret = rb_enc_precise_mbclen(q, q + clen, enc); if (MBCLEN_NEEDMORE_P(ret)) break; if (MBCLEN_INVALID_P(ret)) continue; UNREACHABLE; } } if (rep) { rb_str_buf_cat(buf, rep, replen); if (!rep7bit_p) cr = ENC_CODERANGE_VALID; } else { repl = rb_yield(rb_enc_str_new(p, clen, enc)); repl = str_compat_and_valid(repl, enc); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID) cr = ENC_CODERANGE_VALID; } p += clen; p1 = p; p = search_nonascii(p, e); if (!p) { p = e; break; } } else { UNREACHABLE; } } if (NIL_P(buf)) { if (p == e) { ENC_CODERANGE_SET(str, cr); return Qnil; } buf = rb_str_buf_new(RSTRING_LEN(str)); } if (p1 < p) { rb_str_buf_cat(buf, p1, p - p1); } if (p < e) { if (rep) { rb_str_buf_cat(buf, rep, replen); if (!rep7bit_p) cr = ENC_CODERANGE_VALID; } else { repl = rb_yield(rb_enc_str_new(p, e-p, enc)); repl = str_compat_and_valid(repl, enc); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID) cr = ENC_CODERANGE_VALID; } } ENCODING_CODERANGE_SET(buf, rb_enc_to_index(enc), cr); return buf; } else { /* ASCII incompatible */ const char *p = RSTRING_PTR(str); const char *e = RSTRING_END(str); const char *p1 = p; VALUE buf = Qnil; const char *rep; long replen; long mbminlen = rb_enc_mbminlen(enc); if (!NIL_P(repl)) { rep = RSTRING_PTR(repl); replen = RSTRING_LEN(repl); } else if (!strcasecmp(rb_enc_name(enc), "UTF-16BE")) { DEFAULT_REPLACE_CHAR("\xFF\xFD"); } else if (!strcasecmp(rb_enc_name(enc), "UTF-16LE")) { DEFAULT_REPLACE_CHAR("\xFD\xFF"); } else if (!strcasecmp(rb_enc_name(enc), "UTF-32BE")) { DEFAULT_REPLACE_CHAR("\x00\x00\xFF\xFD"); } else if (!strcasecmp(rb_enc_name(enc), "UTF-32lE")) { DEFAULT_REPLACE_CHAR("\xFD\xFF\x00\x00"); } else { DEFAULT_REPLACE_CHAR("?"); } while (p < e) { int ret = rb_enc_precise_mbclen(p, e, enc); if (MBCLEN_NEEDMORE_P(ret)) { break; } else if (MBCLEN_CHARFOUND_P(ret)) { p += MBCLEN_CHARFOUND_LEN(ret); } else if (MBCLEN_INVALID_P(ret)) { const char *q = p; long clen = rb_enc_mbmaxlen(enc); if (NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str)); if (p > p1) rb_str_buf_cat(buf, p1, p - p1); if (e - p < clen) clen = e - p; if (clen <= mbminlen * 2) { clen = mbminlen; } else { clen -= mbminlen; for (; clen > mbminlen; clen-=mbminlen) { ret = rb_enc_precise_mbclen(q, q + clen, enc); if (MBCLEN_NEEDMORE_P(ret)) break; if (MBCLEN_INVALID_P(ret)) continue; UNREACHABLE; } } if (rep) { rb_str_buf_cat(buf, rep, replen); } else { repl = rb_yield(rb_enc_str_new(p, e-p, enc)); repl = str_compat_and_valid(repl, enc); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); } p += clen; p1 = p; } else { UNREACHABLE; } } if (NIL_P(buf)) { if (p == e) { ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID); return Qnil; } buf = rb_str_buf_new(RSTRING_LEN(str)); } if (p1 < p) { rb_str_buf_cat(buf, p1, p - p1); } if (p < e) { if (rep) { rb_str_buf_cat(buf, rep, replen); } else { repl = rb_yield(rb_enc_str_new(p, e-p, enc)); repl = str_compat_and_valid(repl, enc); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); } } ENCODING_CODERANGE_SET(buf, rb_enc_to_index(enc), ENC_CODERANGE_VALID); return buf; } }
/* * call-seq: * parser.parse(yaml) * * Parse the YAML document contained in +yaml+. Events will be called on * the handler set on the parser instance. * * See Psych::Parser and Psych::Parser#handler */ static VALUE parse(VALUE self, VALUE yaml) { yaml_parser_t * parser; yaml_event_t event; int done = 0; #ifdef HAVE_RUBY_ENCODING_H int encoding = rb_utf8_encindex(); rb_encoding * internal_enc = rb_default_internal_encoding(); #endif VALUE handler = rb_iv_get(self, "@handler"); Data_Get_Struct(self, yaml_parser_t, parser); if(rb_respond_to(yaml, id_read)) { yaml_parser_set_input(parser, io_reader, (void *)yaml); } else { StringValue(yaml); yaml_parser_set_input_string( parser, (const unsigned char *)RSTRING_PTR(yaml), (size_t)RSTRING_LEN(yaml) ); } while(!done) { if(!yaml_parser_parse(parser, &event)) { size_t line = parser->mark.line; size_t column = parser->mark.column; rb_raise(ePsychSyntaxError, "couldn't parse YAML at line %d column %d", (int)line, (int)column); } switch(event.type) { case YAML_STREAM_START_EVENT: rb_funcall(handler, id_start_stream, 1, INT2NUM((long)event.data.stream_start.encoding) ); break; case YAML_DOCUMENT_START_EVENT: { /* Get a list of tag directives (if any) */ VALUE tag_directives = rb_ary_new(); /* Grab the document version */ VALUE version = event.data.document_start.version_directive ? rb_ary_new3( (long)2, INT2NUM((long)event.data.document_start.version_directive->major), INT2NUM((long)event.data.document_start.version_directive->minor) ) : rb_ary_new(); if(event.data.document_start.tag_directives.start) { yaml_tag_directive_t *start = event.data.document_start.tag_directives.start; yaml_tag_directive_t *end = event.data.document_start.tag_directives.end; for(; start != end; start++) { VALUE handle = Qnil; VALUE prefix = Qnil; if(start->handle) { handle = rb_str_new2((const char *)start->handle); #ifdef HAVE_RUBY_ENCODING_H PSYCH_TRANSCODE(handle, encoding, internal_enc); #endif } if(start->prefix) { prefix = rb_str_new2((const char *)start->prefix); #ifdef HAVE_RUBY_ENCODING_H PSYCH_TRANSCODE(prefix, encoding, internal_enc); #endif } rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix)); } } rb_funcall(handler, id_start_document, 3, version, tag_directives, event.data.document_start.implicit == 1 ? Qtrue : Qfalse ); } break; case YAML_DOCUMENT_END_EVENT: rb_funcall(handler, id_end_document, 1, event.data.document_end.implicit == 1 ? Qtrue : Qfalse ); break; case YAML_ALIAS_EVENT: { VALUE alias = Qnil; if(event.data.alias.anchor) { alias = rb_str_new2((const char *)event.data.alias.anchor); #ifdef HAVE_RUBY_ENCODING_H PSYCH_TRANSCODE(alias, encoding, internal_enc); #endif } rb_funcall(handler, id_alias, 1, alias); } break; case YAML_SCALAR_EVENT: { VALUE anchor = Qnil; VALUE tag = Qnil; VALUE plain_implicit, quoted_implicit, style; VALUE val = rb_str_new( (const char *)event.data.scalar.value, (long)event.data.scalar.length ); #ifdef HAVE_RUBY_ENCODING_H PSYCH_TRANSCODE(val, encoding, internal_enc); #endif if(event.data.scalar.anchor) { anchor = rb_str_new2((const char *)event.data.scalar.anchor); #ifdef HAVE_RUBY_ENCODING_H PSYCH_TRANSCODE(anchor, encoding, internal_enc); #endif } if(event.data.scalar.tag) { tag = rb_str_new2((const char *)event.data.scalar.tag); #ifdef HAVE_RUBY_ENCODING_H PSYCH_TRANSCODE(tag, encoding, internal_enc); #endif } plain_implicit = event.data.scalar.plain_implicit == 0 ? Qfalse : Qtrue; quoted_implicit = event.data.scalar.quoted_implicit == 0 ? Qfalse : Qtrue; style = INT2NUM((long)event.data.scalar.style); rb_funcall(handler, id_scalar, 6, val, anchor, tag, plain_implicit, quoted_implicit, style); } break; case YAML_SEQUENCE_START_EVENT: { VALUE anchor = Qnil; VALUE tag = Qnil; VALUE implicit, style; if(event.data.sequence_start.anchor) { anchor = rb_str_new2((const char *)event.data.sequence_start.anchor); #ifdef HAVE_RUBY_ENCODING_H PSYCH_TRANSCODE(anchor, encoding, internal_enc); #endif } tag = Qnil; if(event.data.sequence_start.tag) { tag = rb_str_new2((const char *)event.data.sequence_start.tag); #ifdef HAVE_RUBY_ENCODING_H PSYCH_TRANSCODE(tag, encoding, internal_enc); #endif } implicit = event.data.sequence_start.implicit == 0 ? Qfalse : Qtrue; style = INT2NUM((long)event.data.sequence_start.style); rb_funcall(handler, id_start_sequence, 4, anchor, tag, implicit, style); } break; case YAML_SEQUENCE_END_EVENT: rb_funcall(handler, id_end_sequence, 0); break; case YAML_MAPPING_START_EVENT: { VALUE anchor = Qnil; VALUE tag = Qnil; VALUE implicit, style; if(event.data.mapping_start.anchor) { anchor = rb_str_new2((const char *)event.data.mapping_start.anchor); #ifdef HAVE_RUBY_ENCODING_H PSYCH_TRANSCODE(anchor, encoding, internal_enc); #endif } if(event.data.mapping_start.tag) { tag = rb_str_new2((const char *)event.data.mapping_start.tag); #ifdef HAVE_RUBY_ENCODING_H PSYCH_TRANSCODE(tag, encoding, internal_enc); #endif } implicit = event.data.mapping_start.implicit == 0 ? Qfalse : Qtrue; style = INT2NUM((long)event.data.mapping_start.style); rb_funcall(handler, id_start_mapping, 4, anchor, tag, implicit, style); } break; case YAML_MAPPING_END_EVENT: rb_funcall(handler, id_end_mapping, 0); break; case YAML_NO_EVENT: rb_funcall(handler, id_empty, 0); break; case YAML_STREAM_END_EVENT: rb_funcall(handler, id_end_stream, 0); done = 1; break; } } return self; }