示例#1
0
static rb_encoding*
enc_compatible_latter(VALUE str1, VALUE str2, int idx1, int idx2)
{
    int isstr1, isstr2;
    rb_encoding *enc1 = rb_enc_from_index(idx1);
    rb_encoding *enc2 = rb_enc_from_index(idx2);

    isstr2 = RB_TYPE_P(str2, T_STRING);
    if (isstr2 && RSTRING_LEN(str2) == 0)
	return enc1;
    isstr1 = RB_TYPE_P(str1, T_STRING);
    if (isstr1 && RSTRING_LEN(str1) == 0)
	return (rb_enc_asciicompat(enc1) && rb_enc_str_asciionly_p(str2)) ? enc1 : enc2;
    if (!rb_enc_asciicompat(enc1) || !rb_enc_asciicompat(enc2)) {
	return 0;
    }

    /* objects whose encoding is the same of contents */
    if (!isstr2 && idx2 == ENCINDEX_US_ASCII)
	return enc1;
    if (!isstr1 && idx1 == ENCINDEX_US_ASCII)
	return enc2;

    if (!isstr1) {
	VALUE tmp = str1;
	int idx0 = idx1;
	str1 = str2;
	str2 = tmp;
	idx1 = idx2;
	idx2 = idx0;
	idx0 = isstr1;
	isstr1 = isstr2;
	isstr2 = idx0;
    }
    if (isstr1) {
	int cr1, cr2;

	cr1 = rb_enc_str_coderange(str1);
	if (isstr2) {
	    cr2 = rb_enc_str_coderange(str2);
	    if (cr1 != cr2) {
		/* may need to handle ENC_CODERANGE_BROKEN */
		if (cr1 == ENC_CODERANGE_7BIT) return enc2;
		if (cr2 == ENC_CODERANGE_7BIT) return enc1;
	    }
	    if (cr2 == ENC_CODERANGE_7BIT) {
		return enc1;
	    }
	}
	if (cr1 == ENC_CODERANGE_7BIT)
	    return enc2;
    }
    return 0;
}
示例#2
0
static VALUE
str_compat_and_valid(VALUE str, rb_encoding *enc)
{
    int cr;
    str = StringValue(str);
    cr = rb_enc_str_coderange(str);
    if (cr == ENC_CODERANGE_BROKEN) {
#ifdef PRIsVALUE
	rb_raise(rb_eArgError, "replacement must be valid byte sequence '%+"PRIsVALUE"'", str);
#else
	str = rb_inspect(str);
	rb_raise(rb_eArgError, "replacement must be valid byte sequence '%s'", RSTRING_PTR(str));
	RB_GC_GUARD(str);
#endif
    }
    else if (cr == ENC_CODERANGE_7BIT) {
	rb_encoding *e = STR_ENC_GET(str);
	if (!rb_enc_asciicompat(enc)) {
	    rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
		    rb_enc_name(enc), rb_enc_name(e));
	}
    }
    else { /* ENC_CODERANGE_VALID */
	rb_encoding *e = STR_ENC_GET(str);
	if (enc != e) {
	    rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
		    rb_enc_name(enc), rb_enc_name(e));
	}
    }
    return str;
}
示例#3
0
static VALUE encoding_spec_rb_enc_str_coderange(VALUE self, VALUE str) {
  int coderange = rb_enc_str_coderange(str);

  switch(coderange) {
  case ENC_CODERANGE_UNKNOWN:
    return ID2SYM(rb_intern("coderange_unknown"));
  case ENC_CODERANGE_7BIT:
    return ID2SYM(rb_intern("coderange_7bit"));
  case ENC_CODERANGE_VALID:
    return ID2SYM(rb_intern("coderange_valid"));
  case ENC_CODERANGE_BROKEN:
    return ID2SYM(rb_intern("coderange_broken"));
  default:
    return ID2SYM(rb_intern("coderange_unrecognized"));
  }
}
示例#4
0
VALUE native_slot_encode_and_freeze_string(upb_fieldtype_t type, VALUE value) {
  rb_encoding* desired_encoding = (type == UPB_TYPE_STRING) ?
      kRubyStringUtf8Encoding : kRubyString8bitEncoding;
  VALUE desired_encoding_value = rb_enc_from_encoding(desired_encoding);

  // Note: this will not duplicate underlying string data unless necessary.
  value = rb_str_encode(value, desired_encoding_value, 0, Qnil);

  if (type == UPB_TYPE_STRING &&
      rb_enc_str_coderange(value) == ENC_CODERANGE_BROKEN) {
    rb_raise(rb_eEncodingError, "String is invalid UTF-8");
  }

  // Ensure the data remains valid.  Since we called #encode a moment ago,
  // this does not freeze the string the user assigned.
  rb_obj_freeze(value);

  return value;
}
示例#5
0
文件: encoding.c 项目: 217/ruby
rb_encoding*
rb_enc_compatible(VALUE str1, VALUE str2)
{
    int idx1, idx2;
    rb_encoding *enc1, *enc2;

    idx1 = rb_enc_get_index(str1);
    idx2 = rb_enc_get_index(str2);

    if (idx1 < 0 || idx2 < 0)
        return 0;

    if (idx1 == idx2) {
	return rb_enc_from_index(idx1);
    }
    enc1 = rb_enc_from_index(idx1);
    enc2 = rb_enc_from_index(idx2);

    if (TYPE(str2) == T_STRING && RSTRING_LEN(str2) == 0)
	return (idx1 == ENCINDEX_US_ASCII && rb_enc_asciicompat(enc2)) ? enc2 : enc1;
    if (TYPE(str1) == T_STRING && RSTRING_LEN(str1) == 0)
	return (idx2 == ENCINDEX_US_ASCII && rb_enc_asciicompat(enc1)) ? enc1 : enc2;
    if (!rb_enc_asciicompat(enc1) || !rb_enc_asciicompat(enc2)) {
	return 0;
    }

    /* objects whose encoding is the same of contents */
    if (BUILTIN_TYPE(str2) != T_STRING && idx2 == ENCINDEX_US_ASCII)
	return enc1;
    if (BUILTIN_TYPE(str1) != T_STRING && idx1 == ENCINDEX_US_ASCII)
	return enc2;

    if (BUILTIN_TYPE(str1) != T_STRING) {
	VALUE tmp = str1;
	int idx0 = idx1;
	str1 = str2;
	str2 = tmp;
	idx1 = idx2;
	idx2 = idx0;
    }
    if (BUILTIN_TYPE(str1) == T_STRING) {
	int cr1, cr2;

	cr1 = rb_enc_str_coderange(str1);
	if (BUILTIN_TYPE(str2) == T_STRING) {
	    cr2 = rb_enc_str_coderange(str2);
	    if (cr1 != cr2) {
		/* may need to handle ENC_CODERANGE_BROKEN */
		if (cr1 == ENC_CODERANGE_7BIT) return enc2;
		if (cr2 == ENC_CODERANGE_7BIT) return enc1;
	    }
	    if (cr2 == ENC_CODERANGE_7BIT) {
		if (idx1 == ENCINDEX_ASCII) return enc2;
		return enc1;
	    }
	}
	if (cr1 == ENC_CODERANGE_7BIT)
	    return enc2;
    }
    return 0;
}
示例#6
0
文件: escape.c 项目: riddochc/ruby
static VALUE
optimized_unescape(VALUE str, VALUE encoding)
{
    long i, len, beg = 0;
    VALUE dest = 0;
    const char *cstr;
    int cr, origenc, encidx = rb_to_encoding_index(encoding);

    len  = RSTRING_LEN(str);
    cstr = RSTRING_PTR(str);

    for (i = 0; i < len; ++i) {
	char buf[1];
	const char c = cstr[i];
	int clen = 0;
	if (c == '%') {
	    if (i + 3 > len) break;
	    if (!ISXDIGIT(cstr[i+1])) continue;
	    if (!ISXDIGIT(cstr[i+2])) continue;
	    buf[0] = ((char_to_number(cstr[i+1]) << 4)
		      | char_to_number(cstr[i+2]));
	    clen = 2;
	}
	else if (c == '+') {
	    buf[0] = ' ';
	}
	else {
	    continue;
	}

	if (!dest) {
	    dest = rb_str_buf_new(len);
	}

	rb_str_cat(dest, cstr + beg, i - beg);
	i += clen;
	beg = i + 1;

	rb_str_cat(dest, buf, 1);
    }

    if (dest) {
	rb_str_cat(dest, cstr + beg, len - beg);
	preserve_original_state(str, dest);
	cr = ENC_CODERANGE_UNKNOWN;
    }
    else {
	dest = rb_str_dup(str);
	cr = ENC_CODERANGE(str);
    }
    origenc = rb_enc_get_index(str);
    if (origenc != encidx) {
	rb_enc_associate_index(dest, encidx);
	if (!ENC_CODERANGE_CLEAN_P(rb_enc_str_coderange(dest))) {
	    rb_enc_associate_index(dest, origenc);
	    if (cr != ENC_CODERANGE_UNKNOWN)
		ENC_CODERANGE_SET(dest, cr);
	}
    }
    return dest;
}