/* * call-seq: * typemap.typecast_copy_get( field_str, fieldno, format, encoding ) * * Cast a field string received by PG::Connection#get_copy_data. * * This method implementation uses the #default_type_map to cast field_str. * It can be derived to change this behaviour. * * Parameters: * * +field_str+ : The String received from the server. * * +fieldno+ : The field number from left to right. * * +format+ : The format code (0 = text, 1 = binary) * * +encoding+ : The encoding of the connection and encoding the returned * value should get. * */ static VALUE pg_tmir_typecast_copy_get( VALUE self, VALUE field_str, VALUE fieldno, VALUE format, VALUE enc ) { t_tmir *this = DATA_PTR( self ); t_typemap *default_tm = DATA_PTR( this->typemap.default_typemap ); int enc_idx = rb_to_encoding_index( enc ); return default_tm->funcs.typecast_copy_get( default_tm, field_str, NUM2INT(fieldno), NUM2INT(format), enc_idx ); }
/* * call-seq: * enc.names -> array * * Returns the list of name and aliases of the encoding. * * Encoding::WINDOWS_31J.names #=> ["Windows-31J", "CP932", "csWindows31J"] */ static VALUE enc_names(VALUE self) { VALUE args[2]; args[0] = (VALUE)rb_to_encoding_index(self); args[1] = rb_ary_new2(0); st_foreach(enc_table.names, enc_names_i, (st_data_t)args); return args[1]; }
/* * call-seq: * coder.encode( value [, encoding] ) * * Encodes the given Ruby object into string representation, without * sending data to/from the database server. * * A nil value is passed through. * */ static VALUE pg_coder_encode(int argc, VALUE *argv, VALUE self) { VALUE res; VALUE intermediate; VALUE value; int len, len2; int enc_idx; t_pg_coder *this = DATA_PTR(self); if(argc < 1 || argc > 2){ rb_raise(rb_eArgError, "wrong number of arguments (%i for 1..2)", argc); }else if(argc == 1){ enc_idx = rb_ascii8bit_encindex(); }else{ enc_idx = rb_to_encoding_index(argv[1]); } value = argv[0]; if( NIL_P(value) ) return Qnil; if( !this->enc_func ){ rb_raise(rb_eRuntimeError, "no encoder function defined"); } len = this->enc_func( this, value, NULL, &intermediate, enc_idx ); if( len == -1 ){ /* The intermediate value is a String that can be used directly. */ OBJ_INFECT(intermediate, value); return intermediate; } res = rb_str_new(NULL, len); PG_ENCODING_SET_NOCHECK(res, enc_idx); len2 = this->enc_func( this, value, RSTRING_PTR(res), &intermediate, enc_idx ); if( len < len2 ){ rb_bug("%s: result length of first encoder run (%i) is less than second run (%i)", rb_obj_classname( self ), len, len2 ); } rb_str_set_len( res, len2 ); OBJ_INFECT(res, value); RB_GC_GUARD(intermediate); return res; }
static VALUE transcode_io(VALUE src, int * parser_encoding) { VALUE io_external_encoding; int io_external_enc_index; io_external_encoding = rb_funcall(src, rb_intern("external_encoding"), 0); /* if no encoding is returned, assume ascii8bit. */ if (NIL_P(io_external_encoding)) { io_external_enc_index = rb_ascii8bit_encindex(); } else { io_external_enc_index = rb_to_encoding_index(io_external_encoding); } /* Treat US-ASCII as utf_8 */ if (io_external_enc_index == rb_usascii_encindex()) { *parser_encoding = YAML_UTF8_ENCODING; return src; } if (io_external_enc_index == rb_utf8_encindex()) { *parser_encoding = YAML_UTF8_ENCODING; return src; } if (io_external_enc_index == rb_enc_find_index("UTF-16LE")) { *parser_encoding = YAML_UTF16LE_ENCODING; return src; } if (io_external_enc_index == rb_enc_find_index("UTF-16BE")) { *parser_encoding = YAML_UTF16BE_ENCODING; return src; } /* Just guess on ASCII-8BIT */ if (io_external_enc_index == rb_ascii8bit_encindex()) { *parser_encoding = YAML_ANY_ENCODING; return src; } /* If the external encoding is something we don't know how to handle, * fall back to YAML_ANY_ENCODING. */ *parser_encoding = YAML_ANY_ENCODING; return src; }
static VALUE transcode_io(VALUE src, int * parser_encoding) { VALUE io_external_encoding; int io_external_enc_index; io_external_encoding = rb_funcall(src, rb_intern("external_encoding"), 0); /* if no encoding is returned, assume ascii8bit. */ if (NIL_P(io_external_encoding)) { io_external_enc_index = rb_ascii8bit_encindex(); } else { io_external_enc_index = rb_to_encoding_index(io_external_encoding); } /* Treat US-ASCII as utf_8 */ if (io_external_enc_index == rb_usascii_encindex()) { *parser_encoding = YAML_UTF8_ENCODING; return src; } if (io_external_enc_index == rb_utf8_encindex()) { *parser_encoding = YAML_UTF8_ENCODING; return src; } if (io_external_enc_index == rb_enc_find_index("UTF-16LE")) { *parser_encoding = YAML_UTF16LE_ENCODING; return src; } if (io_external_enc_index == rb_enc_find_index("UTF-16BE")) { *parser_encoding = YAML_UTF16BE_ENCODING; return src; } /* Just guess on ASCII-8BIT */ if (io_external_enc_index == rb_ascii8bit_encindex()) { *parser_encoding = YAML_ANY_ENCODING; return src; } rb_raise(rb_eArgError, "YAML file must be UTF-8, UTF-16LE, or UTF-16BE, not %s", rb_enc_name(rb_enc_from_index(io_external_enc_index))); return Qnil; }
static VALUE encoding_spec_rb_to_encoding_index(VALUE self, VALUE obj) { return INT2NUM(rb_to_encoding_index(obj)); }
static VALUE optimized_unescape(VALUE str, VALUE encoding) { long i, len, beg = 0; VALUE dest = 0; const char *cstr; int cr, origenc, encidx = rb_to_encoding_index(encoding); len = RSTRING_LEN(str); cstr = RSTRING_PTR(str); for (i = 0; i < len; ++i) { char buf[1]; const char c = cstr[i]; int clen = 0; if (c == '%') { if (i + 3 > len) break; if (!ISXDIGIT(cstr[i+1])) continue; if (!ISXDIGIT(cstr[i+2])) continue; buf[0] = ((char_to_number(cstr[i+1]) << 4) | char_to_number(cstr[i+2])); clen = 2; } else if (c == '+') { buf[0] = ' '; } else { continue; } if (!dest) { dest = rb_str_buf_new(len); } rb_str_cat(dest, cstr + beg, i - beg); i += clen; beg = i + 1; rb_str_cat(dest, buf, 1); } if (dest) { rb_str_cat(dest, cstr + beg, len - beg); preserve_original_state(str, dest); cr = ENC_CODERANGE_UNKNOWN; } else { dest = rb_str_dup(str); cr = ENC_CODERANGE(str); } origenc = rb_enc_get_index(str); if (origenc != encidx) { rb_enc_associate_index(dest, encidx); if (!ENC_CODERANGE_CLEAN_P(rb_enc_str_coderange(dest))) { rb_enc_associate_index(dest, origenc); if (cr != ENC_CODERANGE_UNKNOWN) ENC_CODERANGE_SET(dest, cr); } } return dest; }
static VALUE step(VALUE self) { sqlite3StmtRubyPtr ctx; sqlite3_stmt *stmt; int value, length; VALUE list; #ifdef HAVE_RUBY_ENCODING_H rb_encoding * internal_encoding; int enc_index; #endif Data_Get_Struct(self, sqlite3StmtRuby, ctx); REQUIRE_OPEN_STMT(ctx); if(ctx->done_p) return Qnil; #ifdef HAVE_RUBY_ENCODING_H { VALUE db = rb_iv_get(self, "@connection"); VALUE encoding = rb_funcall(db, rb_intern("encoding"), 0); enc_index = NIL_P(encoding) ? rb_utf8_encindex() : rb_to_encoding_index(encoding); internal_encoding = rb_default_internal_encoding(); } #endif stmt = ctx->st; value = sqlite3_step(stmt); length = sqlite3_column_count(stmt); list = rb_ary_new2((long)length); switch(value) { case SQLITE_ROW: { int i; for(i = 0; i < length; i++) { switch(sqlite3_column_type(stmt, i)) { case SQLITE_INTEGER: rb_ary_push(list, LL2NUM(sqlite3_column_int64(stmt, i))); break; case SQLITE_FLOAT: rb_ary_push(list, rb_float_new(sqlite3_column_double(stmt, i))); break; case SQLITE_TEXT: { VALUE str = rb_tainted_str_new( (const char *)sqlite3_column_text(stmt, i), (long)sqlite3_column_bytes(stmt, i) ); #ifdef HAVE_RUBY_ENCODING_H rb_enc_associate_index(str, enc_index); if(internal_encoding) str = rb_str_export_to_enc(str, internal_encoding); #endif rb_ary_push(list, str); } break; case SQLITE_BLOB: { VALUE str = rb_tainted_str_new( (const char *)sqlite3_column_blob(stmt, i), (long)sqlite3_column_bytes(stmt, i) ); rb_ary_push(list, str); } break; case SQLITE_NULL: rb_ary_push(list, Qnil); break; default: rb_raise(rb_eRuntimeError, "bad type"); } } } break; case SQLITE_DONE: ctx->done_p = 1; return Qnil; break; default: CHECK(sqlite3_db_handle(ctx->st), value); } return list; }
static int str_transcode(int argc, VALUE *argv, VALUE *self) { VALUE dest; VALUE str = *self; long blen, slen; unsigned char *buf, *bp, *sp, *fromp; rb_encoding *from_enc, *to_enc; const char *from_e, *to_e; int from_encidx, to_encidx; VALUE from_encval, to_encval; const rb_transcoder *my_transcoder; rb_transcoding my_transcoding; int final_encoding = 0; VALUE opt; int options = 0; opt = rb_check_convert_type(argv[argc-1], T_HASH, "Hash", "to_hash"); if (!NIL_P(opt)) { VALUE v; argc--; v = rb_hash_aref(opt, sym_invalid); if (NIL_P(v)) { rb_raise(rb_eArgError, "unknown value for invalid: setting"); } else if (v==sym_ignore) { options |= INVALID_IGNORE; } } if (argc < 1 || argc > 2) { rb_raise(rb_eArgError, "wrong number of arguments (%d for 1..2)", argc); } if ((to_encidx = rb_to_encoding_index(to_encval = argv[0])) < 0) { to_enc = 0; to_encidx = 0; to_e = StringValueCStr(to_encval); } else { to_enc = rb_enc_from_index(to_encidx); to_e = rb_enc_name(to_enc); } if (argc==1) { from_encidx = rb_enc_get_index(str); from_enc = rb_enc_from_index(from_encidx); from_e = rb_enc_name(from_enc); } else if ((from_encidx = rb_to_encoding_index(from_encval = argv[1])) < 0) { from_enc = 0; from_e = StringValueCStr(from_encval); } else { from_enc = rb_enc_from_index(from_encidx); from_e = rb_enc_name(from_enc); } if (from_enc && from_enc == to_enc) { return -1; } if (from_enc && to_enc && rb_enc_asciicompat(from_enc) && rb_enc_asciicompat(to_enc)) { if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) { return to_encidx; } } if (encoding_equal(from_e, to_e)) { return -1; } do { /* loop for multistep transcoding */ /* later, maybe use smaller intermediate strings for very long strings */ if (!(my_transcoder = transcode_dispatch(from_e, to_e))) { rb_raise(rb_eArgError, "transcoding not supported (from %s to %s)", from_e, to_e); } my_transcoding.transcoder = my_transcoder; if (my_transcoder->preprocessor) { fromp = sp = (unsigned char *)RSTRING_PTR(str); slen = RSTRING_LEN(str); blen = slen + 30; /* len + margin */ dest = rb_str_tmp_new(blen); bp = (unsigned char *)RSTRING_PTR(dest); my_transcoding.ruby_string_dest = dest; (*my_transcoder->preprocessor)(&fromp, &bp, (sp+slen), (bp+blen), &my_transcoding); if (fromp != sp+slen) { rb_raise(rb_eArgError, "not fully converted, %td bytes left", sp+slen-fromp); } buf = (unsigned char *)RSTRING_PTR(dest); *bp = '\0'; rb_str_set_len(dest, bp - buf); str = dest; } fromp = sp = (unsigned char *)RSTRING_PTR(str); slen = RSTRING_LEN(str); blen = slen + 30; /* len + margin */ dest = rb_str_tmp_new(blen); bp = (unsigned char *)RSTRING_PTR(dest); my_transcoding.ruby_string_dest = dest; my_transcoding.flush_func = str_transcoding_resize; transcode_loop(&fromp, &bp, (sp+slen), (bp+blen), my_transcoder, &my_transcoding, options); if (fromp != sp+slen) { rb_raise(rb_eArgError, "not fully converted, %td bytes left", sp+slen-fromp); } buf = (unsigned char *)RSTRING_PTR(dest); *bp = '\0'; rb_str_set_len(dest, bp - buf); if (my_transcoder->postprocessor) { str = dest; fromp = sp = (unsigned char *)RSTRING_PTR(str); slen = RSTRING_LEN(str); blen = slen + 30; /* len + margin */ dest = rb_str_tmp_new(blen); bp = (unsigned char *)RSTRING_PTR(dest); my_transcoding.ruby_string_dest = dest; (*my_transcoder->postprocessor)(&fromp, &bp, (sp+slen), (bp+blen), &my_transcoding); if (fromp != sp+slen) { rb_raise(rb_eArgError, "not fully converted, %td bytes left", sp+slen-fromp); } buf = (unsigned char *)RSTRING_PTR(dest); *bp = '\0'; rb_str_set_len(dest, bp - buf); } if (encoding_equal(my_transcoder->to_encoding, to_e)) { final_encoding = 1; } else { from_e = my_transcoder->to_encoding; str = dest; } } while (!final_encoding); /* set encoding */ if (!to_enc) { to_encidx = rb_define_dummy_encoding(to_e); } *self = dest; return to_encidx; }