int rb_comparator_func(void * ctx, int a_len, const void * a, int b_len, const void * b) { VALUE comparator; VALUE a_str; VALUE b_str; VALUE comparison; #ifdef HAVE_RUBY_ENCODING_H rb_encoding * internal_encoding; internal_encoding = rb_default_internal_encoding(); #endif comparator = (VALUE)ctx; a_str = rb_str_new((const char *)a, a_len); b_str = rb_str_new((const char *)b, b_len); #ifdef HAVE_RUBY_ENCODING_H rb_enc_associate_index(a_str, rb_utf8_encindex()); rb_enc_associate_index(b_str, rb_utf8_encindex()); if(internal_encoding) { a_str = rb_str_export_to_enc(a_str, internal_encoding); b_str = rb_str_export_to_enc(b_str, internal_encoding); } #endif comparison = rb_funcall(comparator, rb_intern("compare"), 2, a_str, b_str); return NUM2INT(comparison); }
static VALUE rb_raise_mysql2_error(mysql_client_wrapper *wrapper) { VALUE rb_error_msg = rb_str_new2(mysql_error(wrapper->client)); VALUE rb_sql_state = rb_tainted_str_new2(mysql_sqlstate(wrapper->client)); VALUE e; #ifdef HAVE_RUBY_ENCODING_H if (wrapper->server_version < 50500) { /* MySQL < 5.5 uses mixed encoding, just call it binary. */ int err_enc = rb_ascii8bit_encindex(); rb_enc_associate_index(rb_error_msg, err_enc); rb_enc_associate_index(rb_sql_state, err_enc); } else { /* MySQL >= 5.5 uses UTF-8 errors internally and converts them to the connection encoding. */ rb_encoding *default_internal_enc = rb_default_internal_encoding(); rb_encoding *conn_enc = rb_to_encoding(wrapper->encoding); rb_enc_associate(rb_error_msg, conn_enc); rb_enc_associate(rb_sql_state, conn_enc); if (default_internal_enc) { rb_error_msg = rb_str_export_to_enc(rb_error_msg, default_internal_enc); rb_sql_state = rb_str_export_to_enc(rb_sql_state, default_internal_enc); } } #endif e = rb_exc_new3(cMysql2Error, rb_error_msg); rb_funcall(e, intern_error_number_eql, 1, UINT2NUM(mysql_errno(wrapper->client))); rb_funcall(e, intern_sql_state_eql, 1, rb_sql_state); rb_exc_raise(e); return Qnil; }
static VALUE summarize(const VALUE self, volatile VALUE rb_str, volatile VALUE rb_dict_file, const VALUE rb_ratio, const VALUE rb_topics, const VALUE rb_sections) { #ifdef HAVE_RUBY_ENCODING_H int enc = rb_enc_find_index("UTF-8"); #endif long int length = RSTRING_LEN(rb_str); char *text = StringValuePtr(rb_str); char *dictionary_file = StringValuePtr(rb_dict_file); int ratio = NUM2INT(rb_ratio); size_t result_len; OtsArticle *doc = ots_new_article(); VALUE summary; VALUE topics; VALUE result; VALUE sections; if (!ots_load_xml_dictionary(doc, dictionary_file)) { ots_free_article(doc); rb_raise(rb_eRuntimeError, "Cannot load dictionary file"); return Qnil; } ots_parse_stream(text, length, doc); ots_grade_doc(doc); ots_highlight_doc(doc, ratio); summary = rb_str_new2(ots_get_doc_text(doc, &result_len)); topics = rb_str_new2((const char *)doc->title); sections = rb_ary_new(); GList *li; for (li = (GList *) ots_get_doc_sections(doc); li != NULL; li = li->next) { VALUE section = rb_str_new2(li->data); rb_ary_push(sections, section); } #ifdef HAVE_RUBY_ENCODING_H rb_enc_associate_index(summary, enc); rb_enc_associate_index(summary, enc); #endif ots_free_article(doc); if (rb_topics == Qtrue) { result = rb_ary_new(); rb_ary_push(result, summary); rb_ary_push(result, topics); return result; } else if (rb_sections == Qtrue) { return sections; } else { return summary; } }
static VALUE stringForceUTF8(VALUE arg) { if (RB_TYPE_P(arg, RUBY_T_STRING) && ENCODING_IS_ASCII8BIT(arg)) rb_enc_associate_index(arg, rb_utf8_encindex()); return arg; }
static VALUE stringForceUTF8(VALUE arg) { if (rb_type(arg) == RUBY_T_STRING && ENCODING_IS_ASCII8BIT(arg)) rb_enc_associate_index(arg, rb_utf8_encindex()); return arg; }
static int yajl_found_string(void * ctx, const unsigned char * stringVal, unsigned int stringLen) { VALUE str = rb_str_new((const char *)stringVal, stringLen); #ifdef HAVE_RUBY_ENCODING_H rb_enc_associate_index(str, utf8Encoding); #endif yajl_set_static_value(ctx, str); yajl_check_and_fire_callback(ctx); return 1; }
/* Set string encoding to UTF8 See http://tenderlovemaking.com/2009/06/26/string-encoding-in-ruby-1-9-c-extensions.html */ static VALUE str_new(const char *str) { VALUE string = rb_str_new2(str); #ifdef HAVE_RUBY_ENCODING_H int enc = rb_enc_find_index("UTF-8"); if(enc != -1) rb_enc_associate_index(string, enc); #endif return string; }
/* * Document-method: new * * call-seq: initialize([:pretty => false[, :indent => ' '][, :terminator => "\n"]]) * * :pretty will enable/disable beautifying or "pretty priting" the output string. * * :indent is the character(s) used to indent the output string. * * :terminator allows you to specify a character to be used as the termination character after a full JSON string has been generated by * the encoder. This would be especially useful when encoding in chunks (via a block or callback during the encode process), to be able to * determine when the last chunk of the current encode is sent. * If you specify this option to be nil, it will be ignored if encoding directly to an IO or simply returning a string. But if a block is used, * the encoder will still pass it - I hope that makes sense ;). */ static VALUE rb_yajl_encoder_new(int argc, VALUE * argv, VALUE klass) { yajl_encoder_wrapper * wrapper; yajl_gen_config cfg; VALUE opts, obj, indent; const char * indentString = " "; int beautify = 0; /* Scan off config vars */ if (rb_scan_args(argc, argv, "01", &opts) == 1) { Check_Type(opts, T_HASH); if (rb_hash_aref(opts, sym_pretty) == Qtrue) { beautify = 1; indent = rb_hash_aref(opts, sym_indent); if (indent != Qnil) { #ifdef HAVE_RUBY_ENCODING_H rb_enc_associate_index(indent, utf8Encoding); #endif Check_Type(indent, T_STRING); indentString = RSTRING_PTR(indent); } } } cfg = (yajl_gen_config) { beautify, indentString }; obj = Data_Make_Struct(klass, yajl_encoder_wrapper, yajl_encoder_wrapper_mark, yajl_encoder_wrapper_free, wrapper); wrapper->encoder = yajl_gen_alloc(&cfg, NULL); wrapper->on_progress_callback = Qnil; if (opts != Qnil && rb_funcall(opts, intern_has_key, 1, sym_terminator) == Qtrue) { wrapper->terminator = rb_hash_aref(opts, sym_terminator); #ifdef HAVE_RUBY_ENCODING_H if (TYPE(wrapper->terminator) == T_STRING) { rb_enc_associate_index(wrapper->terminator, utf8Encoding); } #endif } else { wrapper->terminator = 0; } rb_obj_call_init(obj, 0, 0); return obj; }
int enc_cb(void * _self, int columns, char **data, char **names) { VALUE self = (VALUE)_self; int index = rb_enc_find_index("UTF-8"); VALUE enc = rb_str_new2(data[0]); rb_enc_associate_index(enc, index); rb_iv_set(self, "@encoding", enc); return 0; }
/* * Document-method: encode * * call-seq: encode(obj[, io[, &block]]) * * +obj+ is the Ruby object to encode to JSON * * +io+ is an optional IO used to stream the encoded JSON string to. * If +io+ isn't specified, this method will return the resulting JSON string. If +io+ is specified, this method returns nil * * If an optional block is passed, it's called when encoding is complete and passed the resulting JSON string * * It should be noted that you can reuse an instance of this class to continue encoding multiple JSON * to the same stream. Just continue calling this method, passing it the same IO object with new/different * ruby objects to encode. This is how streaming is accomplished. */ static VALUE rb_yajl_encoder_encode(int argc, VALUE * argv, VALUE self) { yajl_encoder_wrapper * wrapper; const unsigned char * buffer; unsigned int len; VALUE obj, io, blk, outBuff; GetEncoder(self, wrapper); rb_scan_args(argc, argv, "11&", &obj, &io, &blk); if (blk != Qnil) { wrapper->on_progress_callback = blk; } /* begin encode process */ yajl_encode_part(wrapper, obj, io); /* just make sure we output the remaining buffer */ yajl_gen_get_buf(wrapper->encoder, &buffer, &len); outBuff = rb_str_new((const char *)buffer, len); #ifdef HAVE_RUBY_ENCODING_H rb_enc_associate_index(outBuff, utf8Encoding); #endif yajl_gen_clear(wrapper->encoder); if (io != Qnil) { rb_io_write(io, outBuff); if (wrapper->terminator != 0 && wrapper->terminator != Qnil) { rb_io_write(io, wrapper->terminator); } return Qnil; } else if (blk != Qnil) { rb_funcall(blk, intern_call, 1, outBuff); if (wrapper->terminator != 0) { rb_funcall(blk, intern_call, 1, wrapper->terminator); } return Qnil; } else { if (wrapper->terminator != 0 && wrapper->terminator != Qnil) { rb_str_concat(outBuff, wrapper->terminator); } return outBuff; } return Qnil; }
VALUE do_sqlite3_cConnection_quote_string(VALUE self, VALUE string) { const char *source = rb_str_ptr_readonly(string); // Wrap the escaped string in single-quotes, this is DO's convention char *escaped_with_quotes = sqlite3_mprintf("%Q", source); if(!escaped_with_quotes) { rb_memerror(); } VALUE result = rb_str_new2(escaped_with_quotes); #ifdef HAVE_RUBY_ENCODING_H rb_enc_associate_index(result, FIX2INT(rb_iv_get(self, "@encoding_id"))); #endif sqlite3_free(escaped_with_quotes); return result; }
VALUE unserialize(VALUE self, VALUE string) { Check_Type(string, T_STRING); QByteArray byte_array(RSTRING_PTR(string), RSTRING_LEN(string)); QDataStream data_stream(byte_array); QVariant variant; data_stream >> variant; // FIXME return a Ruby object for the variant instead of its qDebug QString output; QDebug debug(&output); debug << variant; VALUE result = rb_str_new2(output.toUtf8().data()); rb_enc_associate_index(string, rb_enc_find_index("UTF-8")); return result; }
static int yajl_found_hash_key(void * ctx, const unsigned char * stringVal, unsigned int stringLen) { yajl_parser_wrapper * wrapper; GetParser((VALUE)ctx, wrapper); VALUE keyStr; if (wrapper->symbolizeKeys) { char buf[stringLen+1]; memcpy(buf, stringVal, stringLen); buf[stringLen] = 0; yajl_set_static_value(ctx, ID2SYM(rb_intern(buf))); } else { keyStr = rb_str_new((const char *)stringVal, stringLen); #ifdef HAVE_RUBY_ENCODING_H rb_enc_associate_index(keyStr, utf8Encoding); #endif yajl_set_static_value(ctx, keyStr); } yajl_check_and_fire_callback(ctx); return 1; }
static VALUE str_encode_bang(int argc, VALUE *argv, VALUE str) { VALUE newstr = str; int encidx = str_transcode(argc, argv, &newstr); int cr = 0; if (encidx < 0) return str; rb_str_shared_replace(str, newstr); rb_enc_associate_index(str, encidx); /* transcoded string never be broken. */ if (rb_enc_asciicompat(rb_enc_from_index(encidx))) { rb_str_coderange_scan_restartable(RSTRING_PTR(str), RSTRING_END(str), 0, &cr); } else { cr = ENC_CODERANGE_VALID; } ENC_CODERANGE_SET(str, cr); return str; }
void rb_enc_copy(VALUE obj1, VALUE obj2) { rb_enc_associate_index(obj1, rb_enc_get_index(obj2)); }
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc) { return rb_enc_associate_index(obj, rb_enc_to_index(enc)); }
static VALUE encoding_spec_rb_enc_associate_index(VALUE self, VALUE obj, VALUE enc) { return rb_enc_associate_index(obj, rb_enc_find_index(RSTRING_PTR(enc))); }
static VALUE step(VALUE self) { sqlite3StmtRubyPtr ctx; sqlite3_stmt *stmt; int value, length; VALUE list; rb_encoding * internal_encoding; Data_Get_Struct(self, sqlite3StmtRuby, ctx); REQUIRE_OPEN_STMT(ctx); if(ctx->done_p) return Qnil; { VALUE db = rb_iv_get(self, "@connection"); rb_funcall(db, rb_intern("encoding"), 0); internal_encoding = rb_default_internal_encoding(); } stmt = ctx->st; value = sqlite3_step(stmt); length = sqlite3_column_count(stmt); list = rb_ary_new2((long)length); switch(value) { case SQLITE_ROW: { int i; for(i = 0; i < length; i++) { switch(sqlite3_column_type(stmt, i)) { case SQLITE_INTEGER: rb_ary_push(list, LL2NUM(sqlite3_column_int64(stmt, i))); break; case SQLITE_FLOAT: rb_ary_push(list, rb_float_new(sqlite3_column_double(stmt, i))); break; case SQLITE_TEXT: { VALUE str = rb_tainted_str_new( (const char *)sqlite3_column_text(stmt, i), (long)sqlite3_column_bytes(stmt, i) ); rb_enc_associate_index(str, rb_utf8_encindex()); if(internal_encoding) str = rb_str_export_to_enc(str, internal_encoding); rb_ary_push(list, str); } break; case SQLITE_BLOB: { VALUE str = rb_tainted_str_new( (const char *)sqlite3_column_blob(stmt, i), (long)sqlite3_column_bytes(stmt, i) ); rb_ary_push(list, str); } break; case SQLITE_NULL: rb_ary_push(list, Qnil); break; default: rb_raise(rb_eRuntimeError, "bad type"); } } } break; case SQLITE_DONE: ctx->done_p = 1; return Qnil; break; default: sqlite3_reset(stmt); ctx->done_p = 0; CHECK(sqlite3_db_handle(ctx->st), value); } return list; }
/* * call-seq: * parser.parse(yaml) * * Parse the YAML document contained in +yaml+. Events will be called on * the handler set on the parser instance. * * See Psych::Parser and Psych::Parser#handler */ static VALUE parse(VALUE self, VALUE yaml) { yaml_parser_t parser; yaml_event_t event; yaml_parser_initialize(&parser); if(rb_respond_to(yaml, id_read)) { yaml_parser_set_input(&parser, io_reader, (void *)yaml); } else { yaml_parser_set_input_string( &parser, (const unsigned char *)StringValuePtr(yaml), (size_t)RSTRING_LEN(yaml) ); } int done = 0; #ifdef HAVE_RUBY_ENCODING_H int encoding = rb_enc_find_index("ASCII-8BIT"); #endif VALUE handler = rb_iv_get(self, "@handler"); while(!done) { if(!yaml_parser_parse(&parser, &event)) { size_t line = parser.mark.line; size_t column = parser.mark.column; yaml_parser_delete(&parser); rb_raise(ePsychSyntaxError, "couldn't parse YAML at line %d column %d", (int)line, (int)column); } switch(event.type) { case YAML_STREAM_START_EVENT: #ifdef HAVE_RUBY_ENCODING_H switch(event.data.stream_start.encoding) { case YAML_ANY_ENCODING: break; case YAML_UTF8_ENCODING: encoding = rb_enc_find_index("UTF-8"); break; case YAML_UTF16LE_ENCODING: encoding = rb_enc_find_index("UTF-16LE"); break; case YAML_UTF16BE_ENCODING: encoding = rb_enc_find_index("UTF-16BE"); break; default: break; } #endif rb_funcall(handler, id_start_stream, 1, INT2NUM((long)event.data.stream_start.encoding) ); break; case YAML_DOCUMENT_START_EVENT: { // Grab the document version VALUE version = event.data.document_start.version_directive ? rb_ary_new3( (long)2, INT2NUM((long)event.data.document_start.version_directive->major), INT2NUM((long)event.data.document_start.version_directive->minor) ) : rb_ary_new(); // Get a list of tag directives (if any) VALUE tag_directives = rb_ary_new(); if(event.data.document_start.tag_directives.start) { yaml_tag_directive_t *start = event.data.document_start.tag_directives.start; yaml_tag_directive_t *end = event.data.document_start.tag_directives.end; for(; start != end; start++) { VALUE handle = Qnil; if(start->handle) { handle = rb_str_new2((const char *)start->handle); #ifdef HAVE_RUBY_ENCODING_H rb_enc_associate_index(handle, encoding); #endif } VALUE prefix = Qnil; if(start->prefix) { prefix = rb_str_new2((const char *)start->prefix); #ifdef HAVE_RUBY_ENCODING_H rb_enc_associate_index(prefix, encoding); #endif } VALUE pair = rb_ary_new3((long)2, handle, prefix); rb_ary_push(tag_directives, pair); } } rb_funcall(handler, id_start_document, 3, version, tag_directives, event.data.document_start.implicit == 1 ? Qtrue : Qfalse ); } break; case YAML_DOCUMENT_END_EVENT: rb_funcall(handler, id_end_document, 1, event.data.document_end.implicit == 1 ? Qtrue : Qfalse ); break; case YAML_ALIAS_EVENT: { VALUE alias = Qnil; if(event.data.alias.anchor) { alias = rb_str_new2((const char *)event.data.alias.anchor); #ifdef HAVE_RUBY_ENCODING_H rb_enc_associate_index(alias, encoding); #endif } rb_funcall(handler, id_alias, 1, alias); } break; case YAML_SCALAR_EVENT: { VALUE val = rb_str_new( (const char *)event.data.scalar.value, (long)event.data.scalar.length ); #ifdef HAVE_RUBY_ENCODING_H rb_enc_associate_index(val, encoding); #endif VALUE anchor = Qnil; if(event.data.scalar.anchor) { anchor = rb_str_new2((const char *)event.data.scalar.anchor); #ifdef HAVE_RUBY_ENCODING_H rb_enc_associate_index(anchor, encoding); #endif } VALUE tag = Qnil; if(event.data.scalar.tag) { tag = rb_str_new2((const char *)event.data.scalar.tag); #ifdef HAVE_RUBY_ENCODING_H rb_enc_associate_index(tag, encoding); #endif } VALUE plain_implicit = event.data.scalar.plain_implicit == 0 ? Qfalse : Qtrue; VALUE quoted_implicit = event.data.scalar.quoted_implicit == 0 ? Qfalse : Qtrue; VALUE style = INT2NUM((long)event.data.scalar.style); rb_funcall(handler, id_scalar, 6, val, anchor, tag, plain_implicit, quoted_implicit, style); } break; case YAML_SEQUENCE_START_EVENT: { VALUE anchor = Qnil; if(event.data.sequence_start.anchor) { anchor = rb_str_new2((const char *)event.data.sequence_start.anchor); #ifdef HAVE_RUBY_ENCODING_H rb_enc_associate_index(anchor, encoding); #endif } VALUE tag = Qnil; if(event.data.sequence_start.tag) { tag = rb_str_new2((const char *)event.data.sequence_start.tag); #ifdef HAVE_RUBY_ENCODING_H rb_enc_associate_index(tag, encoding); #endif } VALUE implicit = event.data.sequence_start.implicit == 0 ? Qfalse : Qtrue; VALUE style = INT2NUM((long)event.data.sequence_start.style); rb_funcall(handler, id_start_sequence, 4, anchor, tag, implicit, style); } break; case YAML_SEQUENCE_END_EVENT: rb_funcall(handler, id_end_sequence, 0); break; case YAML_MAPPING_START_EVENT: { VALUE anchor = Qnil; if(event.data.mapping_start.anchor) { anchor = rb_str_new2((const char *)event.data.mapping_start.anchor); #ifdef HAVE_RUBY_ENCODING_H rb_enc_associate_index(anchor, encoding); #endif } VALUE tag = Qnil; if(event.data.mapping_start.tag) { tag = rb_str_new2((const char *)event.data.mapping_start.tag); #ifdef HAVE_RUBY_ENCODING_H rb_enc_associate_index(tag, encoding); #endif } VALUE implicit = event.data.mapping_start.implicit == 0 ? Qfalse : Qtrue; VALUE style = INT2NUM((long)event.data.mapping_start.style); rb_funcall(handler, id_start_mapping, 4, anchor, tag, implicit, style); } break; case YAML_MAPPING_END_EVENT: rb_funcall(handler, id_end_mapping, 0); break; case YAML_NO_EVENT: rb_funcall(handler, id_empty, 0); break; case YAML_STREAM_END_EVENT: rb_funcall(handler, id_end_stream, 0); done = 1; break; } } return self; }
static VALUE optimized_unescape(VALUE str, VALUE encoding) { long i, len, beg = 0; VALUE dest = 0; const char *cstr; int cr, origenc, encidx = rb_to_encoding_index(encoding); len = RSTRING_LEN(str); cstr = RSTRING_PTR(str); for (i = 0; i < len; ++i) { char buf[1]; const char c = cstr[i]; int clen = 0; if (c == '%') { if (i + 3 > len) break; if (!ISXDIGIT(cstr[i+1])) continue; if (!ISXDIGIT(cstr[i+2])) continue; buf[0] = ((char_to_number(cstr[i+1]) << 4) | char_to_number(cstr[i+2])); clen = 2; } else if (c == '+') { buf[0] = ' '; } else { continue; } if (!dest) { dest = rb_str_buf_new(len); } rb_str_cat(dest, cstr + beg, i - beg); i += clen; beg = i + 1; rb_str_cat(dest, buf, 1); } if (dest) { rb_str_cat(dest, cstr + beg, len - beg); preserve_original_state(str, dest); cr = ENC_CODERANGE_UNKNOWN; } else { dest = rb_str_dup(str); cr = ENC_CODERANGE(str); } origenc = rb_enc_get_index(str); if (origenc != encidx) { rb_enc_associate_index(dest, encidx); if (!ENC_CODERANGE_CLEAN_P(rb_enc_str_coderange(dest))) { rb_enc_associate_index(dest, origenc); if (cr != ENC_CODERANGE_UNKNOWN) ENC_CODERANGE_SET(dest, cr); } } return dest; }
static VALUE encoding_spec_rb_enc_associate_index(VALUE self, VALUE obj, VALUE index) { return rb_enc_associate_index(obj, FIX2INT(index)); }