Пример #1
0
int rb_comparator_func(void * ctx, int a_len, const void * a, int b_len, const void * b)
{
  VALUE comparator;
  VALUE a_str;
  VALUE b_str;
  VALUE comparison;
#ifdef HAVE_RUBY_ENCODING_H
  rb_encoding * internal_encoding;

  internal_encoding = rb_default_internal_encoding();
#endif

  comparator = (VALUE)ctx;
  a_str = rb_str_new((const char *)a, a_len);
  b_str = rb_str_new((const char *)b, b_len);

#ifdef HAVE_RUBY_ENCODING_H
  rb_enc_associate_index(a_str, rb_utf8_encindex());
  rb_enc_associate_index(b_str, rb_utf8_encindex());

  if(internal_encoding) {
    a_str = rb_str_export_to_enc(a_str, internal_encoding);
    b_str = rb_str_export_to_enc(b_str, internal_encoding);
  }
#endif

  comparison = rb_funcall(comparator, rb_intern("compare"), 2, a_str, b_str);

  return NUM2INT(comparison);
}
Пример #2
0
static VALUE stringForceUTF8(VALUE arg)
{
    if (rb_type(arg) == RUBY_T_STRING && ENCODING_IS_ASCII8BIT(arg))
        rb_enc_associate_index(arg, rb_utf8_encindex());

    return arg;
}
Пример #3
0
static VALUE stringForceUTF8(VALUE arg)
{
	if (RB_TYPE_P(arg, RUBY_T_STRING) && ENCODING_IS_ASCII8BIT(arg))
		rb_enc_associate_index(arg, rb_utf8_encindex());

	return arg;
}
Пример #4
0
static VALUE transcode_string(VALUE src, int * parser_encoding)
{
    int utf8    = rb_utf8_encindex();
    int utf16le = rb_enc_find_index("UTF16_LE");
    int utf16be = rb_enc_find_index("UTF16_BE");
    int source_encoding = rb_enc_get_index(src);

    if (source_encoding == utf8) {
	*parser_encoding = YAML_UTF8_ENCODING;
	return src;
    }

    if (source_encoding == utf16le) {
	*parser_encoding = YAML_UTF16LE_ENCODING;
	return src;
    }

    if (source_encoding == utf16be) {
	*parser_encoding = YAML_UTF16BE_ENCODING;
	return src;
    }

    src = rb_str_export_to_enc(src, rb_utf8_encoding());
    RB_GC_GUARD(src);

    *parser_encoding = YAML_UTF8_ENCODING;
    return src;
}
Пример #5
0
/**
 * Document-module: MessagePack
 *
 * MessagePack is a binary-based efficient object serialization library.
 * It enables to exchange structured objects between many languages like JSON.
 * But unlike JSON, it is very fast and small.
 *
 * You can install MessagePack with rubygems.
 *
 *   gem install msgpack
 *
 * Simple usage is as follows:
 *
 *   require 'msgpack'
 *   msg = [1,2,3].to_msgpack  #=> "\x93\x01\x02\x03"
 *   MessagePack.unpack(msg)   #=> [1,2,3]
 *
 * Use Unpacker class for streaming deserialization.
 *
 */
void Init_msgpack(void)
{
	mMessagePack = rb_define_module("MessagePack");

	rb_define_const(mMessagePack, "VERSION", rb_str_new2(MESSAGEPACK_VERSION));

#ifdef COMPAT_HAVE_ENCODING
	s_enc_ascii8bit = rb_ascii8bit_encindex();
	s_enc_utf8 = rb_utf8_encindex();
	s_enc_usascii = rb_usascii_encindex();
	s_enc_utf8_value = rb_enc_from_encoding(rb_utf8_encoding());
#endif

	Init_msgpack_unpack(mMessagePack);
	Init_msgpack_pack(mMessagePack);
}
Пример #6
0
static VALUE transcode_io(VALUE src, int * parser_encoding)
{
    VALUE io_external_encoding;
    int io_external_enc_index;

    io_external_encoding = rb_funcall(src, rb_intern("external_encoding"), 0);

    /* if no encoding is returned, assume ascii8bit. */
    if (NIL_P(io_external_encoding)) {
	io_external_enc_index = rb_ascii8bit_encindex();
    } else {
	io_external_enc_index = rb_to_encoding_index(io_external_encoding);
    }

    /* Treat US-ASCII as utf_8 */
    if (io_external_enc_index == rb_usascii_encindex()) {
	*parser_encoding = YAML_UTF8_ENCODING;
	return src;
    }

    if (io_external_enc_index == rb_utf8_encindex()) {
	*parser_encoding = YAML_UTF8_ENCODING;
	return src;
    }

    if (io_external_enc_index == rb_enc_find_index("UTF-16LE")) {
	*parser_encoding = YAML_UTF16LE_ENCODING;
	return src;
    }

    if (io_external_enc_index == rb_enc_find_index("UTF-16BE")) {
	*parser_encoding = YAML_UTF16BE_ENCODING;
	return src;
    }

    /* Just guess on ASCII-8BIT */
    if (io_external_enc_index == rb_ascii8bit_encindex()) {
	*parser_encoding = YAML_ANY_ENCODING;
	return src;
    }

    /* If the external encoding is something we don't know how to handle,
     * fall back to YAML_ANY_ENCODING. */
    *parser_encoding = YAML_ANY_ENCODING;

    return src;
}
Пример #7
0
static VALUE transcode_io(VALUE src, int * parser_encoding)
{
    VALUE io_external_encoding;
    int io_external_enc_index;

    io_external_encoding = rb_funcall(src, rb_intern("external_encoding"), 0);

    /* if no encoding is returned, assume ascii8bit. */
    if (NIL_P(io_external_encoding)) {
	io_external_enc_index = rb_ascii8bit_encindex();
    } else {
	io_external_enc_index = rb_to_encoding_index(io_external_encoding);
    }

    /* Treat US-ASCII as utf_8 */
    if (io_external_enc_index == rb_usascii_encindex()) {
	*parser_encoding = YAML_UTF8_ENCODING;
	return src;
    }

    if (io_external_enc_index == rb_utf8_encindex()) {
	*parser_encoding = YAML_UTF8_ENCODING;
	return src;
    }

    if (io_external_enc_index == rb_enc_find_index("UTF-16LE")) {
	*parser_encoding = YAML_UTF16LE_ENCODING;
	return src;
    }

    if (io_external_enc_index == rb_enc_find_index("UTF-16BE")) {
	*parser_encoding = YAML_UTF16BE_ENCODING;
	return src;
    }

    /* Just guess on ASCII-8BIT */
    if (io_external_enc_index == rb_ascii8bit_encindex()) {
	*parser_encoding = YAML_ANY_ENCODING;
	return src;
    }

    rb_raise(rb_eArgError, "YAML file must be UTF-8, UTF-16LE, or UTF-16BE, not %s",
	    rb_enc_name(rb_enc_from_index(io_external_enc_index)));

    return Qnil;
}
Пример #8
0
static int
rb_filesystem_encindex(void)
{
    int idx;
#if defined NO_LOCALE_CHARMAP
    idx = rb_enc_to_index(rb_default_external_encoding());
#elif defined _WIN32 || defined __CYGWIN__
    char cp[sizeof(int) * 8 / 3 + 4];
    snprintf(cp, sizeof cp, "CP%d", AreFileApisANSI() ? GetACP() : GetOEMCP());
    idx = rb_enc_find_index(cp);
    if (idx < 0) idx = rb_ascii8bit_encindex();
#elif defined __APPLE__
    idx = rb_utf8_encindex();
#else
    idx = rb_locale_encindex();
#endif

    if (rb_enc_registered("filesystem") < 0) enc_alias_internal("filesystem", idx);

    return idx;
}
Пример #9
0
/*
  Return code page number of the encoding.
  Cache code page into a hash for performance since finding the code page in
  Encoding#names is slow.
*/
static UINT
code_page(rb_encoding *enc)
{
    int enc_idx;

    if (!enc)
	return system_code_page();

    enc_idx = rb_enc_to_index(enc);

    /* map US-ASCII and ASCII-8bit as code page 1252 (us-ascii) */
    if (enc_idx == rb_usascii_encindex() || enc_idx == rb_ascii8bit_encindex()) {
	return 1252;
    }
    if (enc_idx == rb_utf8_encindex()) {
	return CP_UTF8;
    }

    if (0 <= enc_idx && (unsigned int)enc_idx < rb_code_page.count)
	return rb_code_page.table[enc_idx];

    return INVALID_CODE_PAGE;
}
Пример #10
0
static VALUE encoding_spec_rb_utf8_encindex(VALUE self) {
  return INT2NUM(rb_utf8_encindex());
}
Пример #11
0
static VALUE step(VALUE self)
{
  sqlite3StmtRubyPtr ctx;
  sqlite3_stmt *stmt;
  int value, length;
  VALUE list;
  rb_encoding * internal_encoding;

  Data_Get_Struct(self, sqlite3StmtRuby, ctx);

  REQUIRE_OPEN_STMT(ctx);

  if(ctx->done_p) return Qnil;

  {
      VALUE db          = rb_iv_get(self, "@connection");
      rb_funcall(db, rb_intern("encoding"), 0);
      internal_encoding = rb_default_internal_encoding();
  }

  stmt = ctx->st;

  value = sqlite3_step(stmt);
  length = sqlite3_column_count(stmt);
  list = rb_ary_new2((long)length);

  switch(value) {
    case SQLITE_ROW:
      {
        int i;
        for(i = 0; i < length; i++) {
          switch(sqlite3_column_type(stmt, i)) {
            case SQLITE_INTEGER:
              rb_ary_push(list, LL2NUM(sqlite3_column_int64(stmt, i)));
              break;
            case SQLITE_FLOAT:
              rb_ary_push(list, rb_float_new(sqlite3_column_double(stmt, i)));
              break;
            case SQLITE_TEXT:
              {
                VALUE str = rb_tainted_str_new(
                    (const char *)sqlite3_column_text(stmt, i),
                    (long)sqlite3_column_bytes(stmt, i)
                );
                rb_enc_associate_index(str, rb_utf8_encindex());
                if(internal_encoding)
                  str = rb_str_export_to_enc(str, internal_encoding);
                rb_ary_push(list, str);
              }
              break;
            case SQLITE_BLOB:
              {
                VALUE str = rb_tainted_str_new(
                    (const char *)sqlite3_column_blob(stmt, i),
                    (long)sqlite3_column_bytes(stmt, i)
                );
                rb_ary_push(list, str);
              }
              break;
            case SQLITE_NULL:
              rb_ary_push(list, Qnil);
              break;
            default:
              rb_raise(rb_eRuntimeError, "bad type");
          }
        }
      }
      break;
    case SQLITE_DONE:
      ctx->done_p = 1;
      return Qnil;
      break;
    default:
      sqlite3_reset(stmt);
      ctx->done_p = 0;
      CHECK(sqlite3_db_handle(ctx->st), value);
  }

  return list;
}
Пример #12
0
VALUE require_compiled(VALUE fname, VALUE* result, int bLoad)
{
    VALUE path;
    char* szName1 = 0;
    VALUE retval = Qtrue;
    
    if (TYPE(fname) != T_STRING)
        rb_raise(rb_eLoadError, "can not load non-string");

    szName1 = RSTRING_PTR(fname);

    if ( String_endsWith(szName1,".rb") ) 
    {
        rb_str_chop_bang(fname); rb_str_chop_bang(fname); rb_str_chop_bang(fname);
    }
    //rb_funcall(fname, rb_intern("sub!"), 2, rb_str_new2(".rb"), rb_str_new2("") );
    szName1 = RSTRING_PTR(fname);

    if ( strcmp("strscan",szName1)==0 || strcmp("enumerator",szName1)==0 ||
        strcmp("stringio",szName1)==0 || strcmp("socket",szName1)==0 )
        return Qtrue;

    RHO_LOCK(require_lock);

    if ( !bLoad && isAlreadyLoaded(fname) == Qtrue )
        goto RCompExit;

    path = find_file(fname);

    if ( path != 0 )
    {
        VALUE seq;

        RAWLOG_INFO1("require_compiled: %s", szName1);

        //optimize require
        //rb_ary_push(GET_VM()->loaded_features, path);
        rb_ary_push(GET_VM()->loaded_features, fname);

#ifdef RHODES_EMULATOR
        if ( strstr( RSTRING_PTR(path), ".rb") == 0 )
            rb_str_cat(path,".rb",3);

        GET_VM()->src_encoding_index = rb_utf8_encindex();
        rb_load(path, 0);

        if( rho_simconf_getBool("reload_app_changes") )
        {
            if ( strncmp( RSTRING_PTR(path), rho_native_rhopath(), strlen(rho_native_rhopath()) ) == 0 )
                rb_ary_delete(GET_VM()->loaded_features, fname);
        }
#else
        //rb_gc_disable();
        seq = loadISeqFromFile(path);
        

        //*result = rb_funcall(seq, rb_intern("eval"), 0 );
        *result = rb_iseq_eval(seq);
        
        //rb_gc_enable();
#endif
        goto RCompExit;
    }

    RAWLOG_ERROR1("require_compiled: error: can not find %s", RSTRING_PTR(fname));
    retval = Qnil;

RCompExit:
    RHO_UNLOCK(require_lock);
    return retval;
}
Пример #13
0
/*
 * call-seq:
 *    parser.parse(yaml)
 *
 * Parse the YAML document contained in +yaml+.  Events will be called on
 * the handler set on the parser instance.
 *
 * See Psych::Parser and Psych::Parser#handler
 */
static VALUE parse(int argc, VALUE *argv, VALUE self)
{
    VALUE yaml, path;
    yaml_parser_t * parser;
    yaml_event_t event;
    int done = 0;
    int tainted = 0;
#ifdef HAVE_RUBY_ENCODING_H
    int encoding = rb_utf8_encindex();
    rb_encoding * internal_enc = rb_default_internal_encoding();
#endif
    VALUE handler = rb_iv_get(self, "@handler");

    if (rb_scan_args(argc, argv, "11", &yaml, &path) == 1) {
	if(rb_respond_to(yaml, id_path))
	    path = rb_funcall(yaml, id_path, 0);
	else
	    path = rb_str_new2("<unknown>");
    }

    Data_Get_Struct(self, yaml_parser_t, parser);

    if (OBJ_TAINTED(yaml)) tainted = 1;

    if(rb_respond_to(yaml, id_read)) {
	yaml_parser_set_input(parser, io_reader, (void *)yaml);
	if (RTEST(rb_obj_is_kind_of(yaml, rb_cIO))) tainted = 1;
    } else {
	StringValue(yaml);
	yaml_parser_set_input_string(
		parser,
		(const unsigned char *)RSTRING_PTR(yaml),
		(size_t)RSTRING_LEN(yaml)
		);
    }

    while(!done) {
	if(!yaml_parser_parse(parser, &event)) {
	    VALUE exception;

	    exception = make_exception(parser, path);
	    yaml_parser_delete(parser);
	    yaml_parser_initialize(parser);

	    rb_exc_raise(exception);
	}

	switch(event.type) {
	  case YAML_STREAM_START_EVENT:

	    rb_funcall(handler, id_start_stream, 1,
		       INT2NUM((long)event.data.stream_start.encoding)
		);
	    break;
	  case YAML_DOCUMENT_START_EVENT:
	    {
		/* Get a list of tag directives (if any) */
		VALUE tag_directives = rb_ary_new();
		/* Grab the document version */
		VALUE version = event.data.document_start.version_directive ?
		    rb_ary_new3(
			(long)2,
			INT2NUM((long)event.data.document_start.version_directive->major),
			INT2NUM((long)event.data.document_start.version_directive->minor)
			) : rb_ary_new();

		if(event.data.document_start.tag_directives.start) {
		    yaml_tag_directive_t *start =
			event.data.document_start.tag_directives.start;
		    yaml_tag_directive_t *end =
			event.data.document_start.tag_directives.end;
		    for(; start != end; start++) {
			VALUE handle = Qnil;
			VALUE prefix = Qnil;
			if(start->handle) {
			    handle = rb_str_new2((const char *)start->handle);
			    if (tainted) OBJ_TAINT(handle);
#ifdef HAVE_RUBY_ENCODING_H
			    PSYCH_TRANSCODE(handle, encoding, internal_enc);
#endif
			}

			if(start->prefix) {
			    prefix = rb_str_new2((const char *)start->prefix);
			    if (tainted) OBJ_TAINT(prefix);
#ifdef HAVE_RUBY_ENCODING_H
			    PSYCH_TRANSCODE(prefix, encoding, internal_enc);
#endif
			}

			rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix));
		    }
		}
		rb_funcall(handler, id_start_document, 3,
			   version, tag_directives,
			   event.data.document_start.implicit == 1 ? Qtrue : Qfalse
		    );
	    }
	    break;
	  case YAML_DOCUMENT_END_EVENT:
	    rb_funcall(handler, id_end_document, 1,
		       event.data.document_end.implicit == 1 ? Qtrue : Qfalse
		);
	    break;
	  case YAML_ALIAS_EVENT:
	    {
		VALUE alias = Qnil;
		if(event.data.alias.anchor) {
		    alias = rb_str_new2((const char *)event.data.alias.anchor);
		    if (tainted) OBJ_TAINT(alias);
#ifdef HAVE_RUBY_ENCODING_H
		    PSYCH_TRANSCODE(alias, encoding, internal_enc);
#endif
		}

		rb_funcall(handler, id_alias, 1, alias);
	    }
	    break;
	  case YAML_SCALAR_EVENT:
	    {
		VALUE anchor = Qnil;
		VALUE tag = Qnil;
		VALUE plain_implicit, quoted_implicit, style;
		VALUE val = rb_str_new(
		    (const char *)event.data.scalar.value,
		    (long)event.data.scalar.length
		    );
		if (tainted) OBJ_TAINT(val);

#ifdef HAVE_RUBY_ENCODING_H
		PSYCH_TRANSCODE(val, encoding, internal_enc);
#endif

		if(event.data.scalar.anchor) {
		    anchor = rb_str_new2((const char *)event.data.scalar.anchor);
		    if (tainted) OBJ_TAINT(anchor);
#ifdef HAVE_RUBY_ENCODING_H
		    PSYCH_TRANSCODE(anchor, encoding, internal_enc);
#endif
		}

		if(event.data.scalar.tag) {
		    tag = rb_str_new2((const char *)event.data.scalar.tag);
		    if (tainted) OBJ_TAINT(tag);
#ifdef HAVE_RUBY_ENCODING_H
		    PSYCH_TRANSCODE(tag, encoding, internal_enc);
#endif
		}

		plain_implicit =
		    event.data.scalar.plain_implicit == 0 ? Qfalse : Qtrue;

		quoted_implicit =
		    event.data.scalar.quoted_implicit == 0 ? Qfalse : Qtrue;

		style = INT2NUM((long)event.data.scalar.style);

		rb_funcall(handler, id_scalar, 6,
			   val, anchor, tag, plain_implicit, quoted_implicit, style);
	    }
	    break;
	  case YAML_SEQUENCE_START_EVENT:
	    {
		VALUE anchor = Qnil;
		VALUE tag = Qnil;
		VALUE implicit, style;
		if(event.data.sequence_start.anchor) {
		    anchor = rb_str_new2((const char *)event.data.sequence_start.anchor);
		    if (tainted) OBJ_TAINT(anchor);
#ifdef HAVE_RUBY_ENCODING_H
		    PSYCH_TRANSCODE(anchor, encoding, internal_enc);
#endif
		}

		tag = Qnil;
		if(event.data.sequence_start.tag) {
		    tag = rb_str_new2((const char *)event.data.sequence_start.tag);
		    if (tainted) OBJ_TAINT(tag);
#ifdef HAVE_RUBY_ENCODING_H
		    PSYCH_TRANSCODE(tag, encoding, internal_enc);
#endif
		}

		implicit =
		    event.data.sequence_start.implicit == 0 ? Qfalse : Qtrue;

		style = INT2NUM((long)event.data.sequence_start.style);

		rb_funcall(handler, id_start_sequence, 4,
			   anchor, tag, implicit, style);
	    }
	    break;
	  case YAML_SEQUENCE_END_EVENT:
	    rb_funcall(handler, id_end_sequence, 0);
	    break;
	  case YAML_MAPPING_START_EVENT:
	    {
		VALUE anchor = Qnil;
		VALUE tag = Qnil;
		VALUE implicit, style;
		if(event.data.mapping_start.anchor) {
		    anchor = rb_str_new2((const char *)event.data.mapping_start.anchor);
		    if (tainted) OBJ_TAINT(anchor);
#ifdef HAVE_RUBY_ENCODING_H
		    PSYCH_TRANSCODE(anchor, encoding, internal_enc);
#endif
		}

		if(event.data.mapping_start.tag) {
		    tag = rb_str_new2((const char *)event.data.mapping_start.tag);
		    if (tainted) OBJ_TAINT(tag);
#ifdef HAVE_RUBY_ENCODING_H
		    PSYCH_TRANSCODE(tag, encoding, internal_enc);
#endif
		}

		implicit =
		    event.data.mapping_start.implicit == 0 ? Qfalse : Qtrue;

		style = INT2NUM((long)event.data.mapping_start.style);

		rb_funcall(handler, id_start_mapping, 4,
			   anchor, tag, implicit, style);
	    }
	    break;
	  case YAML_MAPPING_END_EVENT:
	    rb_funcall(handler, id_end_mapping, 0);
	    break;
	  case YAML_NO_EVENT:
	    rb_funcall(handler, id_empty, 0);
	    break;
	  case YAML_STREAM_END_EVENT:
	    rb_funcall(handler, id_end_stream, 0);
	    done = 1;
	    break;
	}
	yaml_event_delete(&event);
    }

    return self;
}
Пример #14
0
/*
 * call-seq:
 *    parser.parse(yaml)
 *
 * Parse the YAML document contained in +yaml+.  Events will be called on
 * the handler set on the parser instance.
 *
 * See Psych::Parser and Psych::Parser#handler
 */
static VALUE parse(int argc, VALUE *argv, VALUE self)
{
    VALUE yaml, path;
    yaml_parser_t * parser;
    yaml_event_t event;
    int done = 0;
    int tainted = 0;
    int state = 0;
    int parser_encoding = YAML_ANY_ENCODING;
    int encoding = rb_utf8_encindex();
    rb_encoding * internal_enc = rb_default_internal_encoding();
    VALUE handler = rb_iv_get(self, "@handler");

    if (rb_scan_args(argc, argv, "11", &yaml, &path) == 1) {
	if(rb_respond_to(yaml, id_path))
	    path = rb_funcall(yaml, id_path, 0);
	else
	    path = rb_str_new2("<unknown>");
    }

    TypedData_Get_Struct(self, yaml_parser_t, &psych_parser_type, parser);

    yaml_parser_delete(parser);
    yaml_parser_initialize(parser);

    if (OBJ_TAINTED(yaml)) tainted = 1;

    if (rb_respond_to(yaml, id_read)) {
	yaml = transcode_io(yaml, &parser_encoding);
	yaml_parser_set_encoding(parser, parser_encoding);
	yaml_parser_set_input(parser, io_reader, (void *)yaml);
	if (RTEST(rb_obj_is_kind_of(yaml, rb_cIO))) tainted = 1;
    } else {
	StringValue(yaml);
	yaml = transcode_string(yaml, &parser_encoding);
	yaml_parser_set_encoding(parser, parser_encoding);
	yaml_parser_set_input_string(
		parser,
		(const unsigned char *)RSTRING_PTR(yaml),
		(size_t)RSTRING_LEN(yaml)
		);
    }

    while(!done) {
	VALUE event_args[5];
	VALUE start_line, start_column, end_line, end_column;

	if(!yaml_parser_parse(parser, &event)) {
	    VALUE exception;

	    exception = make_exception(parser, path);
	    yaml_parser_delete(parser);
	    yaml_parser_initialize(parser);

	    rb_exc_raise(exception);
	}

	start_line = INT2NUM((long)event.start_mark.line);
	start_column = INT2NUM((long)event.start_mark.column);
	end_line = INT2NUM((long)event.end_mark.line);
	end_column = INT2NUM((long)event.end_mark.column);

	event_args[0] = handler;
	event_args[1] = start_line;
	event_args[2] = start_column;
	event_args[3] = end_line;
	event_args[4] = end_column;
	rb_protect(protected_event_location, (VALUE)event_args, &state);

	switch(event.type) {
	    case YAML_STREAM_START_EVENT:
	      {
		  VALUE args[2];

		  args[0] = handler;
		  args[1] = INT2NUM((long)event.data.stream_start.encoding);
		  rb_protect(protected_start_stream, (VALUE)args, &state);
	      }
	      break;
	  case YAML_DOCUMENT_START_EVENT:
	    {
		VALUE args[4];
		/* Get a list of tag directives (if any) */
		VALUE tag_directives = rb_ary_new();
		/* Grab the document version */
		VALUE version = event.data.document_start.version_directive ?
		    rb_ary_new3(
			(long)2,
			INT2NUM((long)event.data.document_start.version_directive->major),
			INT2NUM((long)event.data.document_start.version_directive->minor)
			) : rb_ary_new();

		if(event.data.document_start.tag_directives.start) {
		    yaml_tag_directive_t *start =
			event.data.document_start.tag_directives.start;
		    yaml_tag_directive_t *end =
			event.data.document_start.tag_directives.end;
		    for(; start != end; start++) {
			VALUE handle = Qnil;
			VALUE prefix = Qnil;
			if(start->handle) {
			    handle = rb_str_new2((const char *)start->handle);
			    if (tainted) OBJ_TAINT(handle);
			    PSYCH_TRANSCODE(handle, encoding, internal_enc);
			}

			if(start->prefix) {
			    prefix = rb_str_new2((const char *)start->prefix);
			    if (tainted) OBJ_TAINT(prefix);
			    PSYCH_TRANSCODE(prefix, encoding, internal_enc);
			}

			rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix));
		    }
		}
		args[0] = handler;
		args[1] = version;
		args[2] = tag_directives;
		args[3] = event.data.document_start.implicit == 1 ? Qtrue : Qfalse;
		rb_protect(protected_start_document, (VALUE)args, &state);
	    }
	    break;
	  case YAML_DOCUMENT_END_EVENT:
	    {
		VALUE args[2];

		args[0] = handler;
		args[1] = event.data.document_end.implicit == 1 ? Qtrue : Qfalse;
		rb_protect(protected_end_document, (VALUE)args, &state);
	    }
	    break;
	  case YAML_ALIAS_EVENT:
	    {
		VALUE args[2];
		VALUE alias = Qnil;
		if(event.data.alias.anchor) {
		    alias = rb_str_new2((const char *)event.data.alias.anchor);
		    if (tainted) OBJ_TAINT(alias);
		    PSYCH_TRANSCODE(alias, encoding, internal_enc);
		}

		args[0] = handler;
		args[1] = alias;
		rb_protect(protected_alias, (VALUE)args, &state);
	    }
	    break;
	  case YAML_SCALAR_EVENT:
	    {
		VALUE args[7];
		VALUE anchor = Qnil;
		VALUE tag = Qnil;
		VALUE plain_implicit, quoted_implicit, style;
		VALUE val = rb_str_new(
		    (const char *)event.data.scalar.value,
		    (long)event.data.scalar.length
		    );
		if (tainted) OBJ_TAINT(val);

		PSYCH_TRANSCODE(val, encoding, internal_enc);

		if(event.data.scalar.anchor) {
		    anchor = rb_str_new2((const char *)event.data.scalar.anchor);
		    if (tainted) OBJ_TAINT(anchor);
		    PSYCH_TRANSCODE(anchor, encoding, internal_enc);
		}

		if(event.data.scalar.tag) {
		    tag = rb_str_new2((const char *)event.data.scalar.tag);
		    if (tainted) OBJ_TAINT(tag);
		    PSYCH_TRANSCODE(tag, encoding, internal_enc);
		}

		plain_implicit =
		    event.data.scalar.plain_implicit == 0 ? Qfalse : Qtrue;

		quoted_implicit =
		    event.data.scalar.quoted_implicit == 0 ? Qfalse : Qtrue;

		style = INT2NUM((long)event.data.scalar.style);

		args[0] = handler;
		args[1] = val;
		args[2] = anchor;
		args[3] = tag;
		args[4] = plain_implicit;
		args[5] = quoted_implicit;
		args[6] = style;
		rb_protect(protected_scalar, (VALUE)args, &state);
	    }
	    break;
	  case YAML_SEQUENCE_START_EVENT:
	    {
		VALUE args[5];
		VALUE anchor = Qnil;
		VALUE tag = Qnil;
		VALUE implicit, style;
		if(event.data.sequence_start.anchor) {
		    anchor = rb_str_new2((const char *)event.data.sequence_start.anchor);
		    if (tainted) OBJ_TAINT(anchor);
		    PSYCH_TRANSCODE(anchor, encoding, internal_enc);
		}

		tag = Qnil;
		if(event.data.sequence_start.tag) {
		    tag = rb_str_new2((const char *)event.data.sequence_start.tag);
		    if (tainted) OBJ_TAINT(tag);
		    PSYCH_TRANSCODE(tag, encoding, internal_enc);
		}

		implicit =
		    event.data.sequence_start.implicit == 0 ? Qfalse : Qtrue;

		style = INT2NUM((long)event.data.sequence_start.style);

		args[0] = handler;
		args[1] = anchor;
		args[2] = tag;
		args[3] = implicit;
		args[4] = style;

		rb_protect(protected_start_sequence, (VALUE)args, &state);
	    }
	    break;
	  case YAML_SEQUENCE_END_EVENT:
	    rb_protect(protected_end_sequence, handler, &state);
	    break;
	  case YAML_MAPPING_START_EVENT:
	    {
		VALUE args[5];
		VALUE anchor = Qnil;
		VALUE tag = Qnil;
		VALUE implicit, style;
		if(event.data.mapping_start.anchor) {
		    anchor = rb_str_new2((const char *)event.data.mapping_start.anchor);
		    if (tainted) OBJ_TAINT(anchor);
		    PSYCH_TRANSCODE(anchor, encoding, internal_enc);
		}

		if(event.data.mapping_start.tag) {
		    tag = rb_str_new2((const char *)event.data.mapping_start.tag);
		    if (tainted) OBJ_TAINT(tag);
		    PSYCH_TRANSCODE(tag, encoding, internal_enc);
		}

		implicit =
		    event.data.mapping_start.implicit == 0 ? Qfalse : Qtrue;

		style = INT2NUM((long)event.data.mapping_start.style);

		args[0] = handler;
		args[1] = anchor;
		args[2] = tag;
		args[3] = implicit;
		args[4] = style;

		rb_protect(protected_start_mapping, (VALUE)args, &state);
	    }
	    break;
	  case YAML_MAPPING_END_EVENT:
	    rb_protect(protected_end_mapping, handler, &state);
	    break;
	  case YAML_NO_EVENT:
	    rb_protect(protected_empty, handler, &state);
	    break;
	  case YAML_STREAM_END_EVENT:
	    rb_protect(protected_end_stream, handler, &state);
	    done = 1;
	    break;
	}
	yaml_event_delete(&event);
	if (state) rb_jump_tag(state);
    }

    return self;
}
Пример #15
0
/**
 * @param str the string to be scrubbed
 * @param repl the replacement character
 * @return If given string is invalid, returns a new string. Otherwise, returns Qnil.
 */
static VALUE
str_scrub0(int argc, VALUE *argv, VALUE str)
{
    int cr = ENC_CODERANGE(str);
    rb_encoding *enc;
    int encidx;
    VALUE repl;

    if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID)
	return Qnil;

    enc = STR_ENC_GET(str);
    rb_scan_args(argc, argv, "01", &repl);
    if (argc != 0) {
	repl = str_compat_and_valid(repl, enc);
    }

    if (rb_enc_dummy_p(enc)) {
	return Qnil;
    }
    encidx = rb_enc_to_index(enc);

#define DEFAULT_REPLACE_CHAR(str) do { \
	static const char replace[sizeof(str)-1] = str; \
	rep = replace; replen = (int)sizeof(replace); \
    } while (0)

    if (rb_enc_asciicompat(enc)) {
	const char *p = RSTRING_PTR(str);
	const char *e = RSTRING_END(str);
	const char *p1 = p;
	const char *rep;
	long replen;
	int rep7bit_p;
	VALUE buf = Qnil;
	if (rb_block_given_p()) {
	    rep = NULL;
	    replen = 0;
	    rep7bit_p = FALSE;
	}
	else if (!NIL_P(repl)) {
	    rep = RSTRING_PTR(repl);
	    replen = RSTRING_LEN(repl);
	    rep7bit_p = (ENC_CODERANGE(repl) == ENC_CODERANGE_7BIT);
	}
	else if (encidx == rb_utf8_encindex()) {
	    DEFAULT_REPLACE_CHAR("\xEF\xBF\xBD");
	    rep7bit_p = FALSE;
	}
	else {
	    DEFAULT_REPLACE_CHAR("?");
	    rep7bit_p = TRUE;
	}
	cr = ENC_CODERANGE_7BIT;

	p = search_nonascii(p, e);
	if (!p) {
	    p = e;
	}
	while (p < e) {
	    int ret = rb_enc_precise_mbclen(p, e, enc);
	    if (MBCLEN_NEEDMORE_P(ret)) {
		break;
	    }
	    else if (MBCLEN_CHARFOUND_P(ret)) {
		cr = ENC_CODERANGE_VALID;
		p += MBCLEN_CHARFOUND_LEN(ret);
	    }
	    else if (MBCLEN_INVALID_P(ret)) {
		/*
		 * p1~p: valid ascii/multibyte chars
		 * p ~e: invalid bytes + unknown bytes
		 */
		long clen = rb_enc_mbmaxlen(enc);
		if (NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str));
		if (p > p1) {
		    rb_str_buf_cat(buf, p1, p - p1);
		}

		if (e - p < clen) clen = e - p;
		if (clen <= 2) {
		    clen = 1;
		}
		else {
		    const char *q = p;
		    clen--;
		    for (; clen > 1; clen--) {
			ret = rb_enc_precise_mbclen(q, q + clen, enc);
			if (MBCLEN_NEEDMORE_P(ret)) break;
			if (MBCLEN_INVALID_P(ret)) continue;
			UNREACHABLE;
		    }
		}
		if (rep) {
		    rb_str_buf_cat(buf, rep, replen);
		    if (!rep7bit_p) cr = ENC_CODERANGE_VALID;
		}
		else {
		    repl = rb_yield(rb_enc_str_new(p, clen, enc));
		    repl = str_compat_and_valid(repl, enc);
		    rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
		    if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID)
			cr = ENC_CODERANGE_VALID;
		}
		p += clen;
		p1 = p;
		p = search_nonascii(p, e);
		if (!p) {
		    p = e;
		    break;
		}
	    }
	    else {
		UNREACHABLE;
	    }
	}
	if (NIL_P(buf)) {
	    if (p == e) {
		ENC_CODERANGE_SET(str, cr);
		return Qnil;
	    }
	    buf = rb_str_buf_new(RSTRING_LEN(str));
	}
	if (p1 < p) {
	    rb_str_buf_cat(buf, p1, p - p1);
	}
	if (p < e) {
	    if (rep) {
		rb_str_buf_cat(buf, rep, replen);
		if (!rep7bit_p) cr = ENC_CODERANGE_VALID;
	    }
	    else {
		repl = rb_yield(rb_enc_str_new(p, e-p, enc));
		repl = str_compat_and_valid(repl, enc);
		rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
		if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID)
		    cr = ENC_CODERANGE_VALID;
	    }
	}
	ENCODING_CODERANGE_SET(buf, rb_enc_to_index(enc), cr);
	return buf;
    }
    else {
	/* ASCII incompatible */
	const char *p = RSTRING_PTR(str);
	const char *e = RSTRING_END(str);
	const char *p1 = p;
	VALUE buf = Qnil;
	const char *rep;
	long replen;
	long mbminlen = rb_enc_mbminlen(enc);
	if (!NIL_P(repl)) {
	    rep = RSTRING_PTR(repl);
	    replen = RSTRING_LEN(repl);
	}
	else if (!strcasecmp(rb_enc_name(enc), "UTF-16BE")) {
	    DEFAULT_REPLACE_CHAR("\xFF\xFD");
	}
	else if (!strcasecmp(rb_enc_name(enc), "UTF-16LE")) {
	    DEFAULT_REPLACE_CHAR("\xFD\xFF");
	}
	else if (!strcasecmp(rb_enc_name(enc), "UTF-32BE")) {
	    DEFAULT_REPLACE_CHAR("\x00\x00\xFF\xFD");
	}
	else if (!strcasecmp(rb_enc_name(enc), "UTF-32lE")) {
	    DEFAULT_REPLACE_CHAR("\xFD\xFF\x00\x00");
	}
	else {
	    DEFAULT_REPLACE_CHAR("?");
	}

	while (p < e) {
	    int ret = rb_enc_precise_mbclen(p, e, enc);
	    if (MBCLEN_NEEDMORE_P(ret)) {
		break;
	    }
	    else if (MBCLEN_CHARFOUND_P(ret)) {
		p += MBCLEN_CHARFOUND_LEN(ret);
	    }
	    else if (MBCLEN_INVALID_P(ret)) {
		const char *q = p;
		long clen = rb_enc_mbmaxlen(enc);
		if (NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str));
		if (p > p1) rb_str_buf_cat(buf, p1, p - p1);

		if (e - p < clen) clen = e - p;
		if (clen <= mbminlen * 2) {
		    clen = mbminlen;
		}
		else {
		    clen -= mbminlen;
		    for (; clen > mbminlen; clen-=mbminlen) {
			ret = rb_enc_precise_mbclen(q, q + clen, enc);
			if (MBCLEN_NEEDMORE_P(ret)) break;
			if (MBCLEN_INVALID_P(ret)) continue;
			UNREACHABLE;
		    }
		}
		if (rep) {
		    rb_str_buf_cat(buf, rep, replen);
		}
		else {
		    repl = rb_yield(rb_enc_str_new(p, e-p, enc));
		    repl = str_compat_and_valid(repl, enc);
		    rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
		}
		p += clen;
		p1 = p;
	    }
	    else {
		UNREACHABLE;
	    }
	}
	if (NIL_P(buf)) {
	    if (p == e) {
		ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID);
		return Qnil;
	    }
	    buf = rb_str_buf_new(RSTRING_LEN(str));
	}
	if (p1 < p) {
	    rb_str_buf_cat(buf, p1, p - p1);
	}
	if (p < e) {
	    if (rep) {
		rb_str_buf_cat(buf, rep, replen);
	    }
	    else {
		repl = rb_yield(rb_enc_str_new(p, e-p, enc));
		repl = str_compat_and_valid(repl, enc);
		rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
	    }
	}
	ENCODING_CODERANGE_SET(buf, rb_enc_to_index(enc), ENC_CODERANGE_VALID);
	return buf;
    }
}
Пример #16
0
/*
 * call-seq:
 *    parser.parse(yaml)
 *
 * Parse the YAML document contained in +yaml+.  Events will be called on
 * the handler set on the parser instance.
 *
 * See Psych::Parser and Psych::Parser#handler
 */
static VALUE parse(VALUE self, VALUE yaml)
{
    yaml_parser_t * parser;
    yaml_event_t event;
    int done = 0;
#ifdef HAVE_RUBY_ENCODING_H
    int encoding = rb_utf8_encindex();
    rb_encoding * internal_enc = rb_default_internal_encoding();
#endif
    VALUE handler = rb_iv_get(self, "@handler");

    Data_Get_Struct(self, yaml_parser_t, parser);

    if(rb_respond_to(yaml, id_read)) {
	yaml_parser_set_input(parser, io_reader, (void *)yaml);
    } else {
	StringValue(yaml);
	yaml_parser_set_input_string(
		parser,
		(const unsigned char *)RSTRING_PTR(yaml),
		(size_t)RSTRING_LEN(yaml)
		);
    }

    while(!done) {
	if(!yaml_parser_parse(parser, &event)) {
	    size_t line   = parser->mark.line;
	    size_t column = parser->mark.column;

	    rb_raise(ePsychSyntaxError, "couldn't parse YAML at line %d column %d",
		    (int)line, (int)column);
	}

	switch(event.type) {
	  case YAML_STREAM_START_EVENT:

	    rb_funcall(handler, id_start_stream, 1,
		       INT2NUM((long)event.data.stream_start.encoding)
		);
	    break;
	  case YAML_DOCUMENT_START_EVENT:
	    {
		/* Get a list of tag directives (if any) */
		VALUE tag_directives = rb_ary_new();
		/* Grab the document version */
		VALUE version = event.data.document_start.version_directive ?
		    rb_ary_new3(
			(long)2,
			INT2NUM((long)event.data.document_start.version_directive->major),
			INT2NUM((long)event.data.document_start.version_directive->minor)
			) : rb_ary_new();

		if(event.data.document_start.tag_directives.start) {
		    yaml_tag_directive_t *start =
			event.data.document_start.tag_directives.start;
		    yaml_tag_directive_t *end =
			event.data.document_start.tag_directives.end;
		    for(; start != end; start++) {
			VALUE handle = Qnil;
			VALUE prefix = Qnil;
			if(start->handle) {
			    handle = rb_str_new2((const char *)start->handle);
#ifdef HAVE_RUBY_ENCODING_H
			    PSYCH_TRANSCODE(handle, encoding, internal_enc);
#endif
			}

			if(start->prefix) {
			    prefix = rb_str_new2((const char *)start->prefix);
#ifdef HAVE_RUBY_ENCODING_H
			    PSYCH_TRANSCODE(prefix, encoding, internal_enc);
#endif
			}

			rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix));
		    }
		}
		rb_funcall(handler, id_start_document, 3,
			   version, tag_directives,
			   event.data.document_start.implicit == 1 ? Qtrue : Qfalse
		    );
	    }
	    break;
	  case YAML_DOCUMENT_END_EVENT:
	    rb_funcall(handler, id_end_document, 1,
		       event.data.document_end.implicit == 1 ? Qtrue : Qfalse
		);
	    break;
	  case YAML_ALIAS_EVENT:
	    {
		VALUE alias = Qnil;
		if(event.data.alias.anchor) {
		    alias = rb_str_new2((const char *)event.data.alias.anchor);
#ifdef HAVE_RUBY_ENCODING_H
		    PSYCH_TRANSCODE(alias, encoding, internal_enc);
#endif
		}

		rb_funcall(handler, id_alias, 1, alias);
	    }
	    break;
	  case YAML_SCALAR_EVENT:
	    {
		VALUE anchor = Qnil;
		VALUE tag = Qnil;
		VALUE plain_implicit, quoted_implicit, style;
		VALUE val = rb_str_new(
		    (const char *)event.data.scalar.value,
		    (long)event.data.scalar.length
		    );

#ifdef HAVE_RUBY_ENCODING_H
		PSYCH_TRANSCODE(val, encoding, internal_enc);
#endif

		if(event.data.scalar.anchor) {
		    anchor = rb_str_new2((const char *)event.data.scalar.anchor);
#ifdef HAVE_RUBY_ENCODING_H
		    PSYCH_TRANSCODE(anchor, encoding, internal_enc);
#endif
		}

		if(event.data.scalar.tag) {
		    tag = rb_str_new2((const char *)event.data.scalar.tag);
#ifdef HAVE_RUBY_ENCODING_H
		    PSYCH_TRANSCODE(tag, encoding, internal_enc);
#endif
		}

		plain_implicit =
		    event.data.scalar.plain_implicit == 0 ? Qfalse : Qtrue;

		quoted_implicit =
		    event.data.scalar.quoted_implicit == 0 ? Qfalse : Qtrue;

		style = INT2NUM((long)event.data.scalar.style);

		rb_funcall(handler, id_scalar, 6,
			   val, anchor, tag, plain_implicit, quoted_implicit, style);
	    }
	    break;
	  case YAML_SEQUENCE_START_EVENT:
	    {
		VALUE anchor = Qnil;
		VALUE tag = Qnil;
		VALUE implicit, style;
		if(event.data.sequence_start.anchor) {
		    anchor = rb_str_new2((const char *)event.data.sequence_start.anchor);
#ifdef HAVE_RUBY_ENCODING_H
		    PSYCH_TRANSCODE(anchor, encoding, internal_enc);
#endif
		}

		tag = Qnil;
		if(event.data.sequence_start.tag) {
		    tag = rb_str_new2((const char *)event.data.sequence_start.tag);
#ifdef HAVE_RUBY_ENCODING_H
		    PSYCH_TRANSCODE(tag, encoding, internal_enc);
#endif
		}

		implicit =
		    event.data.sequence_start.implicit == 0 ? Qfalse : Qtrue;

		style = INT2NUM((long)event.data.sequence_start.style);

		rb_funcall(handler, id_start_sequence, 4,
			   anchor, tag, implicit, style);
	    }
	    break;
	  case YAML_SEQUENCE_END_EVENT:
	    rb_funcall(handler, id_end_sequence, 0);
	    break;
	  case YAML_MAPPING_START_EVENT:
	    {
		VALUE anchor = Qnil;
		VALUE tag = Qnil;
		VALUE implicit, style;
		if(event.data.mapping_start.anchor) {
		    anchor = rb_str_new2((const char *)event.data.mapping_start.anchor);
#ifdef HAVE_RUBY_ENCODING_H
		    PSYCH_TRANSCODE(anchor, encoding, internal_enc);
#endif
		}

		if(event.data.mapping_start.tag) {
		    tag = rb_str_new2((const char *)event.data.mapping_start.tag);
#ifdef HAVE_RUBY_ENCODING_H
		    PSYCH_TRANSCODE(tag, encoding, internal_enc);
#endif
		}

		implicit =
		    event.data.mapping_start.implicit == 0 ? Qfalse : Qtrue;

		style = INT2NUM((long)event.data.mapping_start.style);

		rb_funcall(handler, id_start_mapping, 4,
			   anchor, tag, implicit, style);
	    }
	    break;
	  case YAML_MAPPING_END_EVENT:
	    rb_funcall(handler, id_end_mapping, 0);
	    break;
	  case YAML_NO_EVENT:
	    rb_funcall(handler, id_empty, 0);
	    break;
	  case YAML_STREAM_END_EVENT:
	    rb_funcall(handler, id_end_stream, 0);
	    done = 1;
	    break;
	}
    }

    return self;
}