Ejemplo n.º 1
0
VALUE
rb_enc_associate_index(VALUE obj, int idx)
{
/*    enc_check_capable(obj);*/
    if (rb_enc_get_index(obj) == idx)
	return obj;
    if (SPECIAL_CONST_P(obj)) {
	rb_raise(rb_eArgError, "cannot set encoding");
    }
    if (!ENC_CODERANGE_ASCIIONLY(obj) ||
	!rb_enc_asciicompat(rb_enc_from_index(idx))) {
	ENC_CODERANGE_CLEAR(obj);
    }
    rb_enc_set_index(obj, idx);
    return obj;
}
Ejemplo n.º 2
0
static rb_encoding*
enc_compatible_str(VALUE str1, VALUE str2)
{
    int idx1 = enc_get_index_str(str1);
    int idx2 = enc_get_index_str(str2);

    if (idx1 < 0 || idx2 < 0)
        return 0;

    if (idx1 == idx2) {
	return rb_enc_from_index(idx1);
    }
    else {
	return enc_compatible_latter(str1, str2, idx1, idx2);
    }
}
Ejemplo n.º 3
0
/*
 * call-seq:
 *   enc.inspect -> string
 *
 * Returns a string which represents the encoding for programmers.
 *
 *   Encoding::UTF_8.inspect       #=> "#<Encoding:UTF-8>"
 *   Encoding::ISO_2022_JP.inspect #=> "#<Encoding:ISO-2022-JP (dummy)>"
 */
static VALUE
enc_inspect(VALUE self)
{
    rb_encoding *enc;

    if (!is_data_encoding(self)) {
	not_encoding(self);
    }
    if (!(enc = DATA_PTR(self)) || rb_enc_from_index(rb_enc_to_index(enc)) != enc) {
	rb_raise(rb_eTypeError, "broken Encoding");
    }
    return rb_enc_sprintf(rb_usascii_encoding(),
			  "#<%"PRIsVALUE":%s%s%s>", rb_obj_class(self),
			  rb_enc_name(enc),
			  (ENC_DUMMY_P(enc) ? " (dummy)" : ""),
			  enc_autoload_p(enc) ? " (autoload)" : "");
}
Ejemplo n.º 4
0
static VALUE transcode_io(VALUE src, int * parser_encoding)
{
    VALUE io_external_encoding;
    int io_external_enc_index;

    io_external_encoding = rb_funcall(src, rb_intern("external_encoding"), 0);

    /* if no encoding is returned, assume ascii8bit. */
    if (NIL_P(io_external_encoding)) {
	io_external_enc_index = rb_ascii8bit_encindex();
    } else {
	io_external_enc_index = rb_to_encoding_index(io_external_encoding);
    }

    /* Treat US-ASCII as utf_8 */
    if (io_external_enc_index == rb_usascii_encindex()) {
	*parser_encoding = YAML_UTF8_ENCODING;
	return src;
    }

    if (io_external_enc_index == rb_utf8_encindex()) {
	*parser_encoding = YAML_UTF8_ENCODING;
	return src;
    }

    if (io_external_enc_index == rb_enc_find_index("UTF-16LE")) {
	*parser_encoding = YAML_UTF16LE_ENCODING;
	return src;
    }

    if (io_external_enc_index == rb_enc_find_index("UTF-16BE")) {
	*parser_encoding = YAML_UTF16BE_ENCODING;
	return src;
    }

    /* Just guess on ASCII-8BIT */
    if (io_external_enc_index == rb_ascii8bit_encindex()) {
	*parser_encoding = YAML_ANY_ENCODING;
	return src;
    }

    rb_raise(rb_eArgError, "YAML file must be UTF-8, UTF-16LE, or UTF-16BE, not %s",
	    rb_enc_name(rb_enc_from_index(io_external_enc_index)));

    return Qnil;
}
Ejemplo n.º 5
0
static VALUE
file_path_convert(VALUE name)
{
#ifndef _WIN32 /* non Windows == Unix */
    rb_encoding *fname_encoding = rb_enc_from_index(ENCODING_GET(name));
    rb_encoding *fs_encoding;
    if (rb_default_internal_encoding() != NULL
	    && rb_usascii_encoding() != fname_encoding
	    && rb_ascii8bit_encoding() != fname_encoding
	    && (fs_encoding = rb_filesystem_encoding()) != fname_encoding
	    && !rb_enc_str_asciionly_p(name)) {
	/* Don't call rb_filesystem_encoding() before US-ASCII and ASCII-8BIT */
	/* fs_encoding should be ascii compatible */
	name = rb_str_conv_enc(name, fname_encoding, fs_encoding);
    }
#endif
    return name;
}
Ejemplo n.º 6
0
static rb_encoding *
must_encindex(int index)
{
    rb_encoding *enc = rb_enc_from_index(index);
    if (!enc) {
	rb_raise(rb_eEncodingError, "encoding index out of bound: %d",
		 index);
    }
    if (ENC_TO_ENCINDEX(enc) != (int)(index & ENC_INDEX_MASK)) {
	rb_raise(rb_eEncodingError, "wrong encoding index %d for %s (expected %d)",
		 index, rb_enc_name(enc), ENC_TO_ENCINDEX(enc));
    }
    if (enc_autoload_p(enc) && enc_autoload(enc) == -1) {
	rb_loaderror("failed to load encoding (%s)",
		     rb_enc_name(enc));
    }
    return enc;
}
Ejemplo n.º 7
0
static void
sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options) {
    ox_sax_buf_init(&dr->buf, io);
    dr->buf.dr = dr;
    stack_init(&dr->stack);
    dr->handler = handler;
    dr->value_obj = rb_data_object_alloc(ox_sax_value_class, dr, 0, 0);
    rb_gc_register_address(&dr->value_obj);
    dr->options = *options;
    dr->hints = 0;
    dr->err = 0;
    has_init(&dr->has, handler);
#if HAS_ENCODING_SUPPORT
    if ('\0' == *ox_default_options.encoding) {
	VALUE	encoding;

	dr->encoding = 0;
	if (rb_respond_to(io, ox_external_encoding_id) && Qnil != (encoding = rb_funcall(io, ox_external_encoding_id, 0))) {
	    int	e = rb_enc_get_index(encoding);
	    if (0 <= e) {
		dr->encoding = rb_enc_from_index(e);
	    }
	}
    } else {
        dr->encoding = rb_enc_find(ox_default_options.encoding);
    }
#elif HAS_PRIVATE_ENCODING
    if ('\0' == *ox_default_options.encoding) {
	VALUE	encoding;

	if (rb_respond_to(io, ox_external_encoding_id) && Qnil != (encoding = rb_funcall(io, ox_external_encoding_id, 0))) {
	    dr->encoding = encoding;
	} else {
	    dr->encoding = Qnil;
	}
    } else {
        dr->encoding = rb_str_new2(ox_default_options.encoding);
    }
#else
    dr->encoding = 0;
#endif
}
Ejemplo n.º 8
0
void native_slot_validate_string_encoding(upb_fieldtype_t type, VALUE value) {
    bool bad_encoding = false;
    rb_encoding* string_encoding = rb_enc_from_index(ENCODING_GET(value));
    if (type == UPB_TYPE_STRING) {
        bad_encoding =
            string_encoding != kRubyStringUtf8Encoding &&
            string_encoding != kRubyStringASCIIEncoding;
    } else {
        bad_encoding =
            string_encoding != kRubyString8bitEncoding;
    }
    // Check that encoding is UTF-8 or ASCII (for string fields) or ASCII-8BIT
    // (for bytes fields).
    if (bad_encoding) {
        rb_raise(rb_eTypeError, "Encoding for '%s' fields must be %s (was %s)",
                 (type == UPB_TYPE_STRING) ? "string" : "bytes",
                 (type == UPB_TYPE_STRING) ? "UTF-8 or ASCII" : "ASCII-8BIT",
                 rb_enc_name(string_encoding));
    }
}
Ejemplo n.º 9
0
static VALUE
str_encode_bang(int argc, VALUE *argv, VALUE str)
{
    VALUE newstr = str;
    int encidx = str_transcode(argc, argv, &newstr);
    int cr = 0;

    if (encidx < 0) return str;
    rb_str_shared_replace(str, newstr);
    rb_enc_associate_index(str, encidx);

    /* transcoded string never be broken. */
    if (rb_enc_asciicompat(rb_enc_from_index(encidx))) {
	rb_str_coderange_scan_restartable(RSTRING_PTR(str), RSTRING_END(str), 0, &cr);
    }
    else {
	cr = ENC_CODERANGE_VALID;
    }
    ENC_CODERANGE_SET(str, cr);
    return str;
}
Ejemplo n.º 10
0
Archivo: encoding.c Proyecto: 217/ruby
int
rb_enc_find_index(const char *name)
{
    int i = rb_enc_registered(name);
    rb_encoding *enc;

    if (i < 0) {
	i = load_encoding(name);
    }
    else if (!(enc = rb_enc_from_index(i))) {
	if (i != UNSPECIFIED_ENCODING) {
	    rb_raise(rb_eArgError, "encoding %s is not registered", name);
	}
    }
    else if (enc_autoload_p(enc)) {
	if (enc_autoload(enc) < 0) {
	    rb_warn("failed to load encoding (%s); use ASCII-8BIT instead",
		    name);
	    return 0;
	}
    }
    return i;
}
Ejemplo n.º 11
0
Archivo: encoding.c Proyecto: 217/ruby
rb_encoding *
rb_filesystem_encoding(void)
{
    return rb_enc_from_index(rb_filesystem_encindex());
}
Ejemplo n.º 12
0
Archivo: encoding.c Proyecto: 217/ruby
rb_encoding *
rb_locale_encoding(void)
{
    return rb_enc_from_index(rb_locale_encindex());
}
Ejemplo n.º 13
0
static VALUE encoding_spec_rb_enc_from_index(VALUE self, VALUE index) {
  return rb_str_new2(rb_enc_from_index(NUM2INT(index))->name);
}
Ejemplo n.º 14
0
void
rb_encdb_set_unicode(int index)
{
    rb_enc_from_index(index)->flags |= ONIGENC_FLAG_UNICODE;
}
Ejemplo n.º 15
0
static int
str_transcode(int argc, VALUE *argv, VALUE *self)
{
    VALUE dest;
    VALUE str = *self;
    long blen, slen;
    unsigned char *buf, *bp, *sp, *fromp;
    rb_encoding *from_enc, *to_enc;
    const char *from_e, *to_e;
    int from_encidx, to_encidx;
    VALUE from_encval, to_encval;
    const rb_transcoder *my_transcoder;
    rb_transcoding my_transcoding;
    int final_encoding = 0;
    VALUE opt;
    int options = 0;

    opt = rb_check_convert_type(argv[argc-1], T_HASH, "Hash", "to_hash");
    if (!NIL_P(opt)) {
	VALUE v;

	argc--;
	v = rb_hash_aref(opt, sym_invalid);
	if (NIL_P(v)) {
	    rb_raise(rb_eArgError, "unknown value for invalid: setting");
	}
	else if (v==sym_ignore) {
	    options |= INVALID_IGNORE;
	}
    }
    if (argc < 1 || argc > 2) {
	rb_raise(rb_eArgError, "wrong number of arguments (%d for 1..2)", argc);
    }
    if ((to_encidx = rb_to_encoding_index(to_encval = argv[0])) < 0) {
	to_enc = 0;
	to_encidx = 0;
	to_e = StringValueCStr(to_encval);
    }
    else {
	to_enc = rb_enc_from_index(to_encidx);
	to_e = rb_enc_name(to_enc);
    }
    if (argc==1) {
	from_encidx = rb_enc_get_index(str);
	from_enc = rb_enc_from_index(from_encidx);
	from_e = rb_enc_name(from_enc);
    }
    else if ((from_encidx = rb_to_encoding_index(from_encval = argv[1])) < 0) {
	from_enc = 0;
	from_e = StringValueCStr(from_encval);
    }
    else {
	from_enc = rb_enc_from_index(from_encidx);
	from_e = rb_enc_name(from_enc);
    }

    if (from_enc && from_enc == to_enc) {
	return -1;
    }
    if (from_enc && to_enc && rb_enc_asciicompat(from_enc) && rb_enc_asciicompat(to_enc)) {
	if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) {
	    return to_encidx;
	}
    }
    if (encoding_equal(from_e, to_e)) {
	return -1;
    }

    do { /* loop for multistep transcoding */
	/* later, maybe use smaller intermediate strings for very long strings */
	if (!(my_transcoder = transcode_dispatch(from_e, to_e))) {
	    rb_raise(rb_eArgError, "transcoding not supported (from %s to %s)", from_e, to_e);
	}

	my_transcoding.transcoder = my_transcoder;

	if (my_transcoder->preprocessor) {
	    fromp = sp = (unsigned char *)RSTRING_PTR(str);
	    slen = RSTRING_LEN(str);
	    blen = slen + 30; /* len + margin */
	    dest = rb_str_tmp_new(blen);
	    bp = (unsigned char *)RSTRING_PTR(dest);
	    my_transcoding.ruby_string_dest = dest;
	    (*my_transcoder->preprocessor)(&fromp, &bp, (sp+slen), (bp+blen), &my_transcoding);
	    if (fromp != sp+slen) {
		rb_raise(rb_eArgError, "not fully converted, %td bytes left", sp+slen-fromp);
	    }
	    buf = (unsigned char *)RSTRING_PTR(dest);
	    *bp = '\0';
	    rb_str_set_len(dest, bp - buf);
	    str = dest;
	}
	fromp = sp = (unsigned char *)RSTRING_PTR(str);
	slen = RSTRING_LEN(str);
	blen = slen + 30; /* len + margin */
	dest = rb_str_tmp_new(blen);
	bp = (unsigned char *)RSTRING_PTR(dest);
	my_transcoding.ruby_string_dest = dest;
	my_transcoding.flush_func = str_transcoding_resize;

	transcode_loop(&fromp, &bp, (sp+slen), (bp+blen), my_transcoder, &my_transcoding, options);
	if (fromp != sp+slen) {
	    rb_raise(rb_eArgError, "not fully converted, %td bytes left", sp+slen-fromp);
	}
	buf = (unsigned char *)RSTRING_PTR(dest);
	*bp = '\0';
	rb_str_set_len(dest, bp - buf);
	if (my_transcoder->postprocessor) {
	    str = dest;
	    fromp = sp = (unsigned char *)RSTRING_PTR(str);
	    slen = RSTRING_LEN(str);
	    blen = slen + 30; /* len + margin */
	    dest = rb_str_tmp_new(blen);
	    bp = (unsigned char *)RSTRING_PTR(dest);
	    my_transcoding.ruby_string_dest = dest;
	    (*my_transcoder->postprocessor)(&fromp, &bp, (sp+slen), (bp+blen), &my_transcoding);
	    if (fromp != sp+slen) {
		rb_raise(rb_eArgError, "not fully converted, %td bytes left", sp+slen-fromp);
	    }
	    buf = (unsigned char *)RSTRING_PTR(dest);
	    *bp = '\0';
	    rb_str_set_len(dest, bp - buf);
	}

	if (encoding_equal(my_transcoder->to_encoding, to_e)) {
	    final_encoding = 1;
	}
	else {
	    from_e = my_transcoder->to_encoding;
	    str = dest;
	}
    } while (!final_encoding);
    /* set encoding */
    if (!to_enc) {
	to_encidx = rb_define_dummy_encoding(to_e);
    }
    *self = dest;

    return to_encidx;
}
Ejemplo n.º 16
0
Archivo: encoding.c Proyecto: 217/ruby
rb_encoding*
rb_enc_get(VALUE obj)
{
    return rb_enc_from_index(rb_enc_get_index(obj));
}
Ejemplo n.º 17
0
void
rb_encdb_set_unicode(int index)
{
    ((rb_raw_encoding *)rb_enc_from_index(index))->flags |= ONIGENC_FLAG_UNICODE;
}
Ejemplo n.º 18
0
VALUE bamfcsv_parse_string(VALUE self, VALUE string) {
  char *buf = RSTRING_PTR(string);
  long bufsize = RSTRING_LEN(string);
  rb_encoding *enc = rb_enc_from_index(ENCODING_GET(string));

  unsigned long num_rows = 1, cell_count = 1;
  int quote_count = 0, quotes_matched = 1;

  VALUE matrix = rb_ary_new();
  VALUE row = rb_ary_new();

  char *cur = buf, *cell_start = buf;

  if (bufsize > 0 && *(buf+bufsize-1) == '\n') {
    *(buf+bufsize-1) = 0;
    --bufsize;
  }

  VALUE dbl_dquote = rb_str_new("\"\"", 2), dquote = rb_str_new("\"", 1);
  ID gsub_bang = rb_intern("gsub!");
  
  for (; cur < buf+bufsize; cur++) {

    if (*cur == '"') {
      if (0 == quote_count && cell_start != cur) /* Quotes begin past opening of cell */
        rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %lu, cell %lu: Quoted cell must open with '\"'", num_rows, cell_count);
      else
        ++quote_count;
    }

    quotes_matched = !(quote_count & 1); /* count is even */

    if (quotes_matched) { 

      if (*cur == ',') {
        
        if (quote_count && *(cur-1) != '"')
          rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %lu, cell %lu.", num_rows, cell_count);

        VALUE cell_str = bamfcsv_finalize_cell(cell_start, cur-1, quote_count, enc);
        if (quote_count)
          rb_funcall(cell_str, gsub_bang, 2, dbl_dquote, dquote);

        rb_ary_push(row, cell_str);
        cell_start = cur+1;

        quote_count = 0;
        ++cell_count;

      } else if (*cur == '\n') {
        
        if (quote_count && !quotes_end_line(cur))
            rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %lu, cell %lu: EOL", num_rows, cell_count);

        VALUE cell_str = bamfcsv_finalize_cell(cell_start, cur-1, quote_count, enc);
        if (quote_count)
          rb_funcall(cell_str, gsub_bang, 2, dbl_dquote, dquote);
        /* Completely blank lines don't even get a nil. This matches CSV's behavior. */
        if (cell_count > 1 || cell_str != Qnil)
          rb_ary_push(row, cell_str);
        rb_ary_push(matrix, row);
        row = rb_ary_new();
        cell_start = cur+1;
        
        quote_count = 0;
        ++num_rows;
        cell_count = 0;

      } else if (quote_count && *cur != '\r' && *cur != '"')
        rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %lu, cell %lu", num_rows, cell_count);
      
    }

  }

  if (!quotes_matched) /* Reached EOF without matching quotes */
    rb_raise(BAMFCSV_MalformedCSVError_class, "Illegal quoting on line %lu, cell %lu: File ends without closing '\"'", num_rows, cell_count);
  else if (quote_count && !quotes_end_line(cur)) /* Quotes closed before end of final cell */
    rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %lu, cell %lu: EOF", num_rows, cell_count);

  VALUE cell_str = bamfcsv_finalize_cell(cell_start, cur-1, quote_count, enc);
  if (quote_count)
    rb_funcall(cell_str, gsub_bang, 2, dbl_dquote, dquote);
  /* Completely blank lines don't even get a nil. This matches CSV's behavior. */
  if (cell_count > 1 || cell_str != Qnil)
    rb_ary_push(row, cell_str);
  rb_ary_push(matrix, row);

  return matrix;

}
Ejemplo n.º 19
0
Archivo: encoding.c Proyecto: 217/ruby
rb_encoding*
rb_enc_compatible(VALUE str1, VALUE str2)
{
    int idx1, idx2;
    rb_encoding *enc1, *enc2;

    idx1 = rb_enc_get_index(str1);
    idx2 = rb_enc_get_index(str2);

    if (idx1 < 0 || idx2 < 0)
        return 0;

    if (idx1 == idx2) {
	return rb_enc_from_index(idx1);
    }
    enc1 = rb_enc_from_index(idx1);
    enc2 = rb_enc_from_index(idx2);

    if (TYPE(str2) == T_STRING && RSTRING_LEN(str2) == 0)
	return (idx1 == ENCINDEX_US_ASCII && rb_enc_asciicompat(enc2)) ? enc2 : enc1;
    if (TYPE(str1) == T_STRING && RSTRING_LEN(str1) == 0)
	return (idx2 == ENCINDEX_US_ASCII && rb_enc_asciicompat(enc1)) ? enc1 : enc2;
    if (!rb_enc_asciicompat(enc1) || !rb_enc_asciicompat(enc2)) {
	return 0;
    }

    /* objects whose encoding is the same of contents */
    if (BUILTIN_TYPE(str2) != T_STRING && idx2 == ENCINDEX_US_ASCII)
	return enc1;
    if (BUILTIN_TYPE(str1) != T_STRING && idx1 == ENCINDEX_US_ASCII)
	return enc2;

    if (BUILTIN_TYPE(str1) != T_STRING) {
	VALUE tmp = str1;
	int idx0 = idx1;
	str1 = str2;
	str2 = tmp;
	idx1 = idx2;
	idx2 = idx0;
    }
    if (BUILTIN_TYPE(str1) == T_STRING) {
	int cr1, cr2;

	cr1 = rb_enc_str_coderange(str1);
	if (BUILTIN_TYPE(str2) == T_STRING) {
	    cr2 = rb_enc_str_coderange(str2);
	    if (cr1 != cr2) {
		/* may need to handle ENC_CODERANGE_BROKEN */
		if (cr1 == ENC_CODERANGE_7BIT) return enc2;
		if (cr2 == ENC_CODERANGE_7BIT) return enc1;
	    }
	    if (cr2 == ENC_CODERANGE_7BIT) {
		if (idx1 == ENCINDEX_ASCII) return enc2;
		return enc1;
	    }
	}
	if (cr1 == ENC_CODERANGE_7BIT)
	    return enc2;
    }
    return 0;
}
Ejemplo n.º 20
0
static rb_encoding *
str_to_encoding(VALUE enc)
{
    return rb_enc_from_index(str_to_encindex(enc));
}