VALUE rb_grn_context_rb_string_encode (grn_ctx *context, VALUE rb_string) { #ifdef HAVE_RUBY_ENCODING_H rb_encoding *encoding, *to_encode; encoding = rb_enc_get(rb_string); to_encode = rb_grn_encoding_to_ruby_encoding(context->encoding); if (rb_enc_to_index(encoding) != rb_enc_to_index(to_encode)) rb_string = rb_str_encode(rb_string, rb_enc_from_encoding(to_encode), 0, Qnil); #endif return rb_string; }
static int enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const char *name) { int overridden = FALSE; if (def->index != -2) /* Already set */ overridden = TRUE; if (NIL_P(encoding)) { def->index = -1; def->enc = 0; st_insert(enc_table.names, (st_data_t)strdup(name), (st_data_t)UNSPECIFIED_ENCODING); } else { def->index = rb_enc_to_index(rb_to_encoding(encoding)); def->enc = 0; enc_alias_internal(name, def->index); } if (def == &default_external) enc_set_filesystem_encoding(); return overridden; }
/* * call-seq: * res.check -> nil * * Raises appropriate exception if PG::Result is in a bad state. */ VALUE pg_result_check( VALUE self ) { VALUE error, exception, klass; VALUE rb_pgconn = rb_iv_get( self, "@connection" ); PGconn *conn = pg_get_pgconn(rb_pgconn); PGresult *result; #ifdef M17N_SUPPORTED rb_encoding *enc = pg_conn_enc_get( conn ); #endif char * sqlstate; Data_Get_Struct(self, PGresult, result); if(result == NULL) { error = rb_str_new2( PQerrorMessage(conn) ); } else { switch (PQresultStatus(result)) { case PGRES_TUPLES_OK: case PGRES_COPY_OUT: case PGRES_COPY_IN: #ifdef HAVE_CONST_PGRES_COPY_BOTH case PGRES_COPY_BOTH: #endif #ifdef HAVE_CONST_PGRES_SINGLE_TUPLE case PGRES_SINGLE_TUPLE: #endif case PGRES_EMPTY_QUERY: case PGRES_COMMAND_OK: return Qnil; case PGRES_BAD_RESPONSE: case PGRES_FATAL_ERROR: case PGRES_NONFATAL_ERROR: error = rb_str_new2( PQresultErrorMessage(result) ); break; default: error = rb_str_new2( "internal error : unknown result status." ); } } #ifdef M17N_SUPPORTED rb_enc_set_index( error, rb_enc_to_index(enc) ); #endif sqlstate = PQresultErrorField( result, PG_DIAG_SQLSTATE ); klass = lookup_error_class( sqlstate ); exception = rb_exc_new3( klass, error ); rb_iv_set( exception, "@connection", rb_pgconn ); rb_iv_set( exception, "@result", self ); rb_exc_raise( exception ); /* Not reached */ return Qnil; }
static int enc_set_filesystem_encoding(void) { int idx; #if defined NO_LOCALE_CHARMAP idx = rb_enc_to_index(rb_default_external_encoding()); #elif defined _WIN32 || defined __CYGWIN__ char cp[sizeof(int) * 8 / 3 + 4]; snprintf(cp, sizeof cp, "CP%d", AreFileApisANSI() ? GetACP() : GetOEMCP()); idx = rb_enc_find_index(cp); if (idx < 0) idx = rb_ascii8bit_encindex(); #else idx = rb_enc_to_index(rb_default_external_encoding()); #endif enc_alias_internal("filesystem", idx); return idx; }
static int check_encoding(rb_encoding *enc) { int index = rb_enc_to_index(enc); if (rb_enc_from_index(index) != enc) return -1; if (enc_autoload_p(enc)) { index = enc_autoload(enc); } return index; }
/* * Result constructor */ VALUE pg_new_result(PGresult *result, VALUE rb_pgconn) { PGconn *conn = pg_get_pgconn( rb_pgconn ); VALUE val = Data_Wrap_Struct(rb_cPGresult, NULL, pgresult_gc_free, result); #ifdef M17N_SUPPORTED rb_encoding *enc = pg_conn_enc_get( conn ); ENCODING_SET( val, rb_enc_to_index(enc) ); #endif rb_iv_set( val, "@connection", rb_pgconn ); return val; }
int Init_enc_set_filesystem_encoding(void) { int idx; #if defined NO_LOCALE_CHARMAP # error NO_LOCALE_CHARMAP defined #elif defined _WIN32 || defined __CYGWIN__ char cp[SIZEOF_CP_NAME]; CP_FORMAT(cp, AreFileApisANSI() ? GetACP() : GetOEMCP()); idx = rb_enc_find_index(cp); if (idx < 0) idx = ENCINDEX_ASCII; #else idx = rb_enc_to_index(rb_default_external_encoding()); #endif return idx; }
/* * call-seq: * enc.inspect -> string * * Returns a string which represents the encoding for programmers. * * Encoding::UTF_8.inspect #=> "#<Encoding:UTF-8>" * Encoding::ISO_2022_JP.inspect #=> "#<Encoding:ISO-2022-JP (dummy)>" */ static VALUE enc_inspect(VALUE self) { rb_encoding *enc; if (!is_data_encoding(self)) { not_encoding(self); } if (!(enc = DATA_PTR(self)) || rb_enc_from_index(rb_enc_to_index(enc)) != enc) { rb_raise(rb_eTypeError, "broken Encoding"); } return rb_enc_sprintf(rb_usascii_encoding(), "#<%"PRIsVALUE":%s%s%s>", rb_obj_class(self), rb_enc_name(enc), (ENC_DUMMY_P(enc) ? " (dummy)" : ""), enc_autoload_p(enc) ? " (autoload)" : ""); }
/* Return code page number of the encoding. Cache code page into a hash for performance since finding the code page in Encoding#names is slow. */ static UINT code_page(rb_encoding *enc) { int enc_idx; if (!enc) return system_code_page(); enc_idx = rb_enc_to_index(enc); /* map US-ASCII and ASCII-8bit as code page 1252 (us-ascii) */ if (enc_idx == rb_usascii_encindex() || enc_idx == rb_ascii8bit_encindex()) { return 1252; } if (enc_idx == rb_utf8_encindex()) { return CP_UTF8; } if (0 <= enc_idx && (unsigned int)enc_idx < rb_code_page.count) return rb_code_page.table[enc_idx]; return INVALID_CODE_PAGE; }
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc) { return rb_enc_associate_index(obj, rb_enc_to_index(enc)); }
static VALUE encoding_spec_rb_enc_to_index(VALUE self, VALUE name) { return INT2NUM(rb_enc_to_index(rb_enc_find(RSTRING_PTR(name)))); }
int Init_enc_set_filesystem_encoding(void) { return rb_enc_to_index(rb_default_external_encoding()); }
/** * @param str the string to be scrubbed * @param repl the replacement character * @return If given string is invalid, returns a new string. Otherwise, returns Qnil. */ static VALUE str_scrub0(int argc, VALUE *argv, VALUE str) { int cr = ENC_CODERANGE(str); rb_encoding *enc; int encidx; VALUE repl; if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID) return Qnil; enc = STR_ENC_GET(str); rb_scan_args(argc, argv, "01", &repl); if (argc != 0) { repl = str_compat_and_valid(repl, enc); } if (rb_enc_dummy_p(enc)) { return Qnil; } encidx = rb_enc_to_index(enc); #define DEFAULT_REPLACE_CHAR(str) do { \ static const char replace[sizeof(str)-1] = str; \ rep = replace; replen = (int)sizeof(replace); \ } while (0) if (rb_enc_asciicompat(enc)) { const char *p = RSTRING_PTR(str); const char *e = RSTRING_END(str); const char *p1 = p; const char *rep; long replen; int rep7bit_p; VALUE buf = Qnil; if (rb_block_given_p()) { rep = NULL; replen = 0; rep7bit_p = FALSE; } else if (!NIL_P(repl)) { rep = RSTRING_PTR(repl); replen = RSTRING_LEN(repl); rep7bit_p = (ENC_CODERANGE(repl) == ENC_CODERANGE_7BIT); } else if (encidx == rb_utf8_encindex()) { DEFAULT_REPLACE_CHAR("\xEF\xBF\xBD"); rep7bit_p = FALSE; } else { DEFAULT_REPLACE_CHAR("?"); rep7bit_p = TRUE; } cr = ENC_CODERANGE_7BIT; p = search_nonascii(p, e); if (!p) { p = e; } while (p < e) { int ret = rb_enc_precise_mbclen(p, e, enc); if (MBCLEN_NEEDMORE_P(ret)) { break; } else if (MBCLEN_CHARFOUND_P(ret)) { cr = ENC_CODERANGE_VALID; p += MBCLEN_CHARFOUND_LEN(ret); } else if (MBCLEN_INVALID_P(ret)) { /* * p1~p: valid ascii/multibyte chars * p ~e: invalid bytes + unknown bytes */ long clen = rb_enc_mbmaxlen(enc); if (NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str)); if (p > p1) { rb_str_buf_cat(buf, p1, p - p1); } if (e - p < clen) clen = e - p; if (clen <= 2) { clen = 1; } else { const char *q = p; clen--; for (; clen > 1; clen--) { ret = rb_enc_precise_mbclen(q, q + clen, enc); if (MBCLEN_NEEDMORE_P(ret)) break; if (MBCLEN_INVALID_P(ret)) continue; UNREACHABLE; } } if (rep) { rb_str_buf_cat(buf, rep, replen); if (!rep7bit_p) cr = ENC_CODERANGE_VALID; } else { repl = rb_yield(rb_enc_str_new(p, clen, enc)); repl = str_compat_and_valid(repl, enc); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID) cr = ENC_CODERANGE_VALID; } p += clen; p1 = p; p = search_nonascii(p, e); if (!p) { p = e; break; } } else { UNREACHABLE; } } if (NIL_P(buf)) { if (p == e) { ENC_CODERANGE_SET(str, cr); return Qnil; } buf = rb_str_buf_new(RSTRING_LEN(str)); } if (p1 < p) { rb_str_buf_cat(buf, p1, p - p1); } if (p < e) { if (rep) { rb_str_buf_cat(buf, rep, replen); if (!rep7bit_p) cr = ENC_CODERANGE_VALID; } else { repl = rb_yield(rb_enc_str_new(p, e-p, enc)); repl = str_compat_and_valid(repl, enc); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID) cr = ENC_CODERANGE_VALID; } } ENCODING_CODERANGE_SET(buf, rb_enc_to_index(enc), cr); return buf; } else { /* ASCII incompatible */ const char *p = RSTRING_PTR(str); const char *e = RSTRING_END(str); const char *p1 = p; VALUE buf = Qnil; const char *rep; long replen; long mbminlen = rb_enc_mbminlen(enc); if (!NIL_P(repl)) { rep = RSTRING_PTR(repl); replen = RSTRING_LEN(repl); } else if (!strcasecmp(rb_enc_name(enc), "UTF-16BE")) { DEFAULT_REPLACE_CHAR("\xFF\xFD"); } else if (!strcasecmp(rb_enc_name(enc), "UTF-16LE")) { DEFAULT_REPLACE_CHAR("\xFD\xFF"); } else if (!strcasecmp(rb_enc_name(enc), "UTF-32BE")) { DEFAULT_REPLACE_CHAR("\x00\x00\xFF\xFD"); } else if (!strcasecmp(rb_enc_name(enc), "UTF-32lE")) { DEFAULT_REPLACE_CHAR("\xFD\xFF\x00\x00"); } else { DEFAULT_REPLACE_CHAR("?"); } while (p < e) { int ret = rb_enc_precise_mbclen(p, e, enc); if (MBCLEN_NEEDMORE_P(ret)) { break; } else if (MBCLEN_CHARFOUND_P(ret)) { p += MBCLEN_CHARFOUND_LEN(ret); } else if (MBCLEN_INVALID_P(ret)) { const char *q = p; long clen = rb_enc_mbmaxlen(enc); if (NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str)); if (p > p1) rb_str_buf_cat(buf, p1, p - p1); if (e - p < clen) clen = e - p; if (clen <= mbminlen * 2) { clen = mbminlen; } else { clen -= mbminlen; for (; clen > mbminlen; clen-=mbminlen) { ret = rb_enc_precise_mbclen(q, q + clen, enc); if (MBCLEN_NEEDMORE_P(ret)) break; if (MBCLEN_INVALID_P(ret)) continue; UNREACHABLE; } } if (rep) { rb_str_buf_cat(buf, rep, replen); } else { repl = rb_yield(rb_enc_str_new(p, e-p, enc)); repl = str_compat_and_valid(repl, enc); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); } p += clen; p1 = p; } else { UNREACHABLE; } } if (NIL_P(buf)) { if (p == e) { ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID); return Qnil; } buf = rb_str_buf_new(RSTRING_LEN(str)); } if (p1 < p) { rb_str_buf_cat(buf, p1, p - p1); } if (p < e) { if (rep) { rb_str_buf_cat(buf, rep, replen); } else { repl = rb_yield(rb_enc_str_new(p, e-p, enc)); repl = str_compat_and_valid(repl, enc); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); } } ENCODING_CODERANGE_SET(buf, rb_enc_to_index(enc), ENC_CODERANGE_VALID); return buf; } }
static VALUE optimized_unescape(VALUE str, VALUE encoding) { long i, len, beg = 0; VALUE dest = 0; const char *cstr; rb_encoding *enc = rb_to_encoding(encoding); int cr, origenc, encidx = rb_enc_to_index(enc); len = RSTRING_LEN(str); cstr = RSTRING_PTR(str); for (i = 0; i < len; ++i) { char buf[1]; const char c = cstr[i]; int clen = 0; if (c == '%') { if (i + 3 > len) break; if (!ISXDIGIT(cstr[i+1])) continue; if (!ISXDIGIT(cstr[i+2])) continue; buf[0] = ((char_to_number(cstr[i+1]) << 4) | char_to_number(cstr[i+2])); clen = 2; } else if (c == '+') { buf[0] = ' '; } else { continue; } if (!dest) { dest = rb_str_buf_new(len); } rb_str_cat(dest, cstr + beg, i - beg); i += clen; beg = i + 1; rb_str_cat(dest, buf, 1); } if (dest) { rb_str_cat(dest, cstr + beg, len - beg); preserve_original_state(str, dest); cr = ENC_CODERANGE_UNKNOWN; } else { dest = rb_str_dup(str); cr = ENC_CODERANGE(str); } origenc = rb_enc_get_index(str); if (origenc != encidx) { rb_enc_associate_index(dest, encidx); if (!ENC_CODERANGE_CLEAN_P(rb_enc_str_coderange(dest))) { rb_enc_associate_index(dest, origenc); if (cr != ENC_CODERANGE_UNKNOWN) ENC_CODERANGE_SET(dest, cr); } } return dest; }