Exemplo n.º 1
0
VALUE
rb_grn_context_rb_string_encode (grn_ctx *context, VALUE rb_string)
{
#ifdef HAVE_RUBY_ENCODING_H
    rb_encoding *encoding, *to_encode;

    encoding = rb_enc_get(rb_string);
    to_encode = rb_grn_encoding_to_ruby_encoding(context->encoding);
    if (rb_enc_to_index(encoding) != rb_enc_to_index(to_encode))
	rb_string = rb_str_encode(rb_string, rb_enc_from_encoding(to_encode),
				  0, Qnil);
#endif
    return rb_string;
}
Exemplo n.º 2
0
Arquivo: encoding.c Projeto: 217/ruby
static int
enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const char *name)
{
    int overridden = FALSE;

    if (def->index != -2)
	/* Already set */
	overridden = TRUE;

    if (NIL_P(encoding)) {
	def->index = -1;
	def->enc = 0;
	st_insert(enc_table.names, (st_data_t)strdup(name),
		  (st_data_t)UNSPECIFIED_ENCODING);
    }
    else {
	def->index = rb_enc_to_index(rb_to_encoding(encoding));
	def->enc = 0;
	enc_alias_internal(name, def->index);
    }

    if (def == &default_external)
	enc_set_filesystem_encoding();

    return overridden;
}
Exemplo n.º 3
0
/*
 * call-seq:
 *    res.check -> nil
 *
 * Raises appropriate exception if PG::Result is in a bad state.
 */
VALUE
pg_result_check( VALUE self )
{
	VALUE error, exception, klass;
	VALUE rb_pgconn = rb_iv_get( self, "@connection" );
	PGconn *conn = pg_get_pgconn(rb_pgconn);
	PGresult *result;
#ifdef M17N_SUPPORTED
	rb_encoding *enc = pg_conn_enc_get( conn );
#endif
	char * sqlstate;

	Data_Get_Struct(self, PGresult, result);

	if(result == NULL)
	{
		error = rb_str_new2( PQerrorMessage(conn) );
	}
	else
	{
		switch (PQresultStatus(result))
		{
		case PGRES_TUPLES_OK:
		case PGRES_COPY_OUT:
		case PGRES_COPY_IN:
#ifdef HAVE_CONST_PGRES_COPY_BOTH
		case PGRES_COPY_BOTH:
#endif
#ifdef HAVE_CONST_PGRES_SINGLE_TUPLE
		case PGRES_SINGLE_TUPLE:
#endif
		case PGRES_EMPTY_QUERY:
		case PGRES_COMMAND_OK:
			return Qnil;
		case PGRES_BAD_RESPONSE:
		case PGRES_FATAL_ERROR:
		case PGRES_NONFATAL_ERROR:
			error = rb_str_new2( PQresultErrorMessage(result) );
			break;
		default:
			error = rb_str_new2( "internal error : unknown result status." );
		}
	}

#ifdef M17N_SUPPORTED
	rb_enc_set_index( error, rb_enc_to_index(enc) );
#endif

	sqlstate = PQresultErrorField( result, PG_DIAG_SQLSTATE );
	klass = lookup_error_class( sqlstate );
	exception = rb_exc_new3( klass, error );
	rb_iv_set( exception, "@connection", rb_pgconn );
	rb_iv_set( exception, "@result", self );
	rb_exc_raise( exception );

	/* Not reached */
	return Qnil;
}
Exemplo n.º 4
0
Arquivo: encoding.c Projeto: 217/ruby
static int
enc_set_filesystem_encoding(void)
{
    int idx;
#if defined NO_LOCALE_CHARMAP
    idx = rb_enc_to_index(rb_default_external_encoding());
#elif defined _WIN32 || defined __CYGWIN__
    char cp[sizeof(int) * 8 / 3 + 4];
    snprintf(cp, sizeof cp, "CP%d", AreFileApisANSI() ? GetACP() : GetOEMCP());
    idx = rb_enc_find_index(cp);
    if (idx < 0) idx = rb_ascii8bit_encindex();
#else
    idx = rb_enc_to_index(rb_default_external_encoding());
#endif

    enc_alias_internal("filesystem", idx);
    return idx;
}
Exemplo n.º 5
0
Arquivo: encoding.c Projeto: 217/ruby
static int
check_encoding(rb_encoding *enc)
{
    int index = rb_enc_to_index(enc);
    if (rb_enc_from_index(index) != enc)
	return -1;
    if (enc_autoload_p(enc)) {
	index = enc_autoload(enc);
    }
    return index;
}
Exemplo n.º 6
0
/*
 * Result constructor
 */
VALUE
pg_new_result(PGresult *result, VALUE rb_pgconn)
{
	PGconn *conn = pg_get_pgconn( rb_pgconn );
	VALUE val = Data_Wrap_Struct(rb_cPGresult, NULL, pgresult_gc_free, result);
#ifdef M17N_SUPPORTED
	rb_encoding *enc = pg_conn_enc_get( conn );
	ENCODING_SET( val, rb_enc_to_index(enc) );
#endif

	rb_iv_set( val, "@connection", rb_pgconn );

	return val;
}
Exemplo n.º 7
0
int
Init_enc_set_filesystem_encoding(void)
{
    int idx;
#if defined NO_LOCALE_CHARMAP
# error NO_LOCALE_CHARMAP defined
#elif defined _WIN32 || defined __CYGWIN__
    char cp[SIZEOF_CP_NAME];
    CP_FORMAT(cp, AreFileApisANSI() ? GetACP() : GetOEMCP());
    idx = rb_enc_find_index(cp);
    if (idx < 0) idx = ENCINDEX_ASCII;
#else
    idx = rb_enc_to_index(rb_default_external_encoding());
#endif
    return idx;
}
Exemplo n.º 8
0
/*
 * call-seq:
 *   enc.inspect -> string
 *
 * Returns a string which represents the encoding for programmers.
 *
 *   Encoding::UTF_8.inspect       #=> "#<Encoding:UTF-8>"
 *   Encoding::ISO_2022_JP.inspect #=> "#<Encoding:ISO-2022-JP (dummy)>"
 */
static VALUE
enc_inspect(VALUE self)
{
    rb_encoding *enc;

    if (!is_data_encoding(self)) {
	not_encoding(self);
    }
    if (!(enc = DATA_PTR(self)) || rb_enc_from_index(rb_enc_to_index(enc)) != enc) {
	rb_raise(rb_eTypeError, "broken Encoding");
    }
    return rb_enc_sprintf(rb_usascii_encoding(),
			  "#<%"PRIsVALUE":%s%s%s>", rb_obj_class(self),
			  rb_enc_name(enc),
			  (ENC_DUMMY_P(enc) ? " (dummy)" : ""),
			  enc_autoload_p(enc) ? " (autoload)" : "");
}
Exemplo n.º 9
0
Arquivo: file.c Projeto: 0x00evil/ruby
/*
  Return code page number of the encoding.
  Cache code page into a hash for performance since finding the code page in
  Encoding#names is slow.
*/
static UINT
code_page(rb_encoding *enc)
{
    int enc_idx;

    if (!enc)
	return system_code_page();

    enc_idx = rb_enc_to_index(enc);

    /* map US-ASCII and ASCII-8bit as code page 1252 (us-ascii) */
    if (enc_idx == rb_usascii_encindex() || enc_idx == rb_ascii8bit_encindex()) {
	return 1252;
    }
    if (enc_idx == rb_utf8_encindex()) {
	return CP_UTF8;
    }

    if (0 <= enc_idx && (unsigned int)enc_idx < rb_code_page.count)
	return rb_code_page.table[enc_idx];

    return INVALID_CODE_PAGE;
}
Exemplo n.º 10
0
Arquivo: encoding.c Projeto: 217/ruby
VALUE
rb_enc_associate(VALUE obj, rb_encoding *enc)
{
    return rb_enc_associate_index(obj, rb_enc_to_index(enc));
}
Exemplo n.º 11
0
static VALUE encoding_spec_rb_enc_to_index(VALUE self, VALUE name) {
  return INT2NUM(rb_enc_to_index(rb_enc_find(RSTRING_PTR(name))));
}
Exemplo n.º 12
0
int
Init_enc_set_filesystem_encoding(void)
{
    return rb_enc_to_index(rb_default_external_encoding());
}
Exemplo n.º 13
0
/**
 * @param str the string to be scrubbed
 * @param repl the replacement character
 * @return If given string is invalid, returns a new string. Otherwise, returns Qnil.
 */
static VALUE
str_scrub0(int argc, VALUE *argv, VALUE str)
{
    int cr = ENC_CODERANGE(str);
    rb_encoding *enc;
    int encidx;
    VALUE repl;

    if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID)
	return Qnil;

    enc = STR_ENC_GET(str);
    rb_scan_args(argc, argv, "01", &repl);
    if (argc != 0) {
	repl = str_compat_and_valid(repl, enc);
    }

    if (rb_enc_dummy_p(enc)) {
	return Qnil;
    }
    encidx = rb_enc_to_index(enc);

#define DEFAULT_REPLACE_CHAR(str) do { \
	static const char replace[sizeof(str)-1] = str; \
	rep = replace; replen = (int)sizeof(replace); \
    } while (0)

    if (rb_enc_asciicompat(enc)) {
	const char *p = RSTRING_PTR(str);
	const char *e = RSTRING_END(str);
	const char *p1 = p;
	const char *rep;
	long replen;
	int rep7bit_p;
	VALUE buf = Qnil;
	if (rb_block_given_p()) {
	    rep = NULL;
	    replen = 0;
	    rep7bit_p = FALSE;
	}
	else if (!NIL_P(repl)) {
	    rep = RSTRING_PTR(repl);
	    replen = RSTRING_LEN(repl);
	    rep7bit_p = (ENC_CODERANGE(repl) == ENC_CODERANGE_7BIT);
	}
	else if (encidx == rb_utf8_encindex()) {
	    DEFAULT_REPLACE_CHAR("\xEF\xBF\xBD");
	    rep7bit_p = FALSE;
	}
	else {
	    DEFAULT_REPLACE_CHAR("?");
	    rep7bit_p = TRUE;
	}
	cr = ENC_CODERANGE_7BIT;

	p = search_nonascii(p, e);
	if (!p) {
	    p = e;
	}
	while (p < e) {
	    int ret = rb_enc_precise_mbclen(p, e, enc);
	    if (MBCLEN_NEEDMORE_P(ret)) {
		break;
	    }
	    else if (MBCLEN_CHARFOUND_P(ret)) {
		cr = ENC_CODERANGE_VALID;
		p += MBCLEN_CHARFOUND_LEN(ret);
	    }
	    else if (MBCLEN_INVALID_P(ret)) {
		/*
		 * p1~p: valid ascii/multibyte chars
		 * p ~e: invalid bytes + unknown bytes
		 */
		long clen = rb_enc_mbmaxlen(enc);
		if (NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str));
		if (p > p1) {
		    rb_str_buf_cat(buf, p1, p - p1);
		}

		if (e - p < clen) clen = e - p;
		if (clen <= 2) {
		    clen = 1;
		}
		else {
		    const char *q = p;
		    clen--;
		    for (; clen > 1; clen--) {
			ret = rb_enc_precise_mbclen(q, q + clen, enc);
			if (MBCLEN_NEEDMORE_P(ret)) break;
			if (MBCLEN_INVALID_P(ret)) continue;
			UNREACHABLE;
		    }
		}
		if (rep) {
		    rb_str_buf_cat(buf, rep, replen);
		    if (!rep7bit_p) cr = ENC_CODERANGE_VALID;
		}
		else {
		    repl = rb_yield(rb_enc_str_new(p, clen, enc));
		    repl = str_compat_and_valid(repl, enc);
		    rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
		    if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID)
			cr = ENC_CODERANGE_VALID;
		}
		p += clen;
		p1 = p;
		p = search_nonascii(p, e);
		if (!p) {
		    p = e;
		    break;
		}
	    }
	    else {
		UNREACHABLE;
	    }
	}
	if (NIL_P(buf)) {
	    if (p == e) {
		ENC_CODERANGE_SET(str, cr);
		return Qnil;
	    }
	    buf = rb_str_buf_new(RSTRING_LEN(str));
	}
	if (p1 < p) {
	    rb_str_buf_cat(buf, p1, p - p1);
	}
	if (p < e) {
	    if (rep) {
		rb_str_buf_cat(buf, rep, replen);
		if (!rep7bit_p) cr = ENC_CODERANGE_VALID;
	    }
	    else {
		repl = rb_yield(rb_enc_str_new(p, e-p, enc));
		repl = str_compat_and_valid(repl, enc);
		rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
		if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID)
		    cr = ENC_CODERANGE_VALID;
	    }
	}
	ENCODING_CODERANGE_SET(buf, rb_enc_to_index(enc), cr);
	return buf;
    }
    else {
	/* ASCII incompatible */
	const char *p = RSTRING_PTR(str);
	const char *e = RSTRING_END(str);
	const char *p1 = p;
	VALUE buf = Qnil;
	const char *rep;
	long replen;
	long mbminlen = rb_enc_mbminlen(enc);
	if (!NIL_P(repl)) {
	    rep = RSTRING_PTR(repl);
	    replen = RSTRING_LEN(repl);
	}
	else if (!strcasecmp(rb_enc_name(enc), "UTF-16BE")) {
	    DEFAULT_REPLACE_CHAR("\xFF\xFD");
	}
	else if (!strcasecmp(rb_enc_name(enc), "UTF-16LE")) {
	    DEFAULT_REPLACE_CHAR("\xFD\xFF");
	}
	else if (!strcasecmp(rb_enc_name(enc), "UTF-32BE")) {
	    DEFAULT_REPLACE_CHAR("\x00\x00\xFF\xFD");
	}
	else if (!strcasecmp(rb_enc_name(enc), "UTF-32lE")) {
	    DEFAULT_REPLACE_CHAR("\xFD\xFF\x00\x00");
	}
	else {
	    DEFAULT_REPLACE_CHAR("?");
	}

	while (p < e) {
	    int ret = rb_enc_precise_mbclen(p, e, enc);
	    if (MBCLEN_NEEDMORE_P(ret)) {
		break;
	    }
	    else if (MBCLEN_CHARFOUND_P(ret)) {
		p += MBCLEN_CHARFOUND_LEN(ret);
	    }
	    else if (MBCLEN_INVALID_P(ret)) {
		const char *q = p;
		long clen = rb_enc_mbmaxlen(enc);
		if (NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str));
		if (p > p1) rb_str_buf_cat(buf, p1, p - p1);

		if (e - p < clen) clen = e - p;
		if (clen <= mbminlen * 2) {
		    clen = mbminlen;
		}
		else {
		    clen -= mbminlen;
		    for (; clen > mbminlen; clen-=mbminlen) {
			ret = rb_enc_precise_mbclen(q, q + clen, enc);
			if (MBCLEN_NEEDMORE_P(ret)) break;
			if (MBCLEN_INVALID_P(ret)) continue;
			UNREACHABLE;
		    }
		}
		if (rep) {
		    rb_str_buf_cat(buf, rep, replen);
		}
		else {
		    repl = rb_yield(rb_enc_str_new(p, e-p, enc));
		    repl = str_compat_and_valid(repl, enc);
		    rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
		}
		p += clen;
		p1 = p;
	    }
	    else {
		UNREACHABLE;
	    }
	}
	if (NIL_P(buf)) {
	    if (p == e) {
		ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID);
		return Qnil;
	    }
	    buf = rb_str_buf_new(RSTRING_LEN(str));
	}
	if (p1 < p) {
	    rb_str_buf_cat(buf, p1, p - p1);
	}
	if (p < e) {
	    if (rep) {
		rb_str_buf_cat(buf, rep, replen);
	    }
	    else {
		repl = rb_yield(rb_enc_str_new(p, e-p, enc));
		repl = str_compat_and_valid(repl, enc);
		rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
	    }
	}
	ENCODING_CODERANGE_SET(buf, rb_enc_to_index(enc), ENC_CODERANGE_VALID);
	return buf;
    }
}
Exemplo n.º 14
0
static VALUE
optimized_unescape(VALUE str, VALUE encoding)
{
    long i, len, beg = 0;
    VALUE dest = 0;
    const char *cstr;
    rb_encoding *enc = rb_to_encoding(encoding);
    int cr, origenc, encidx = rb_enc_to_index(enc);

    len  = RSTRING_LEN(str);
    cstr = RSTRING_PTR(str);

    for (i = 0; i < len; ++i) {
	char buf[1];
	const char c = cstr[i];
	int clen = 0;
	if (c == '%') {
	    if (i + 3 > len) break;
	    if (!ISXDIGIT(cstr[i+1])) continue;
	    if (!ISXDIGIT(cstr[i+2])) continue;
	    buf[0] = ((char_to_number(cstr[i+1]) << 4)
		      | char_to_number(cstr[i+2]));
	    clen = 2;
	}
	else if (c == '+') {
	    buf[0] = ' ';
	}
	else {
	    continue;
	}

	if (!dest) {
	    dest = rb_str_buf_new(len);
	}

	rb_str_cat(dest, cstr + beg, i - beg);
	i += clen;
	beg = i + 1;

	rb_str_cat(dest, buf, 1);
    }

    if (dest) {
	rb_str_cat(dest, cstr + beg, len - beg);
	preserve_original_state(str, dest);
	cr = ENC_CODERANGE_UNKNOWN;
    }
    else {
	dest = rb_str_dup(str);
	cr = ENC_CODERANGE(str);
    }
    origenc = rb_enc_get_index(str);
    if (origenc != encidx) {
	rb_enc_associate_index(dest, encidx);
	if (!ENC_CODERANGE_CLEAN_P(rb_enc_str_coderange(dest))) {
	    rb_enc_associate_index(dest, origenc);
	    if (cr != ENC_CODERANGE_UNKNOWN)
		ENC_CODERANGE_SET(dest, cr);
	}
    }
    return dest;
}