static VALUE dnssd_tr_encode(VALUE self) { long i; VALUE buf; /* Declare ary volatile to prevent it from being reclaimed when: * buf is allocated later, key/values are converted to strings */ volatile VALUE ary = rb_funcall2(self, rb_intern("to_a"), 0, 0); /* array of key, value pairs */ VALUE *ptr = RARRAY(ary)->ptr; buf = rb_str_buf_new(dnssd_tr_convert_pairs(ary)); for(i=0; i<RARRAY(ary)->len; i++) { uint8_t len; VALUE key = RARRAY(ptr[i])->ptr[0]; VALUE value = RARRAY(ptr[i])->ptr[1]; if (!NIL_P(value)) { len = (uint8_t)(RSTRING(key)->len + RSTRING(value)->len + 1); rb_str_buf_cat(buf, &len, 1); rb_str_buf_append(buf, key); rb_str_buf_cat(buf, "=", 1); rb_str_buf_append(buf, value); } else { len = (uint8_t)RSTRING(key)->len; rb_str_buf_cat(buf, &len, 1); rb_str_buf_append(buf, key); } } return buf; }
size_t _msgpack_buffer_read_from_io_to_string(msgpack_buffer_t* b, VALUE string, size_t length) { if(RSTRING_LEN(string) == 0) { /* direct read */ VALUE ret = rb_funcall(b->io, b->io_partial_read_method, 2, LONG2NUM(length), string); if(ret == Qnil) { return 0; } return RSTRING_LEN(string); } /* copy via io_buffer */ if(b->io_buffer == Qnil) { b->io_buffer = rb_str_buf_new(0); } VALUE ret = rb_funcall(b->io, b->io_partial_read_method, 2, LONG2NUM(length), b->io_buffer); if(ret == Qnil) { return 0; } size_t rl = RSTRING_LEN(b->io_buffer); rb_str_buf_cat(string, (const void*)RSTRING_PTR(b->io_buffer), rl); return rl; }
static VALUE env_inspect(VALUE rcv, SEL sel) { rb_secure(4); VALUE str = rb_str_buf_new2("{"); char **env = GET_ENVIRON(); while (*env != NULL) { const char *s = strchr(*env, '='); if (env != GET_ENVIRON()) { rb_str_buf_cat2(str, ", "); } if (s != NULL) { rb_str_buf_cat2(str, "\""); rb_str_buf_cat(str, *env, s - *env); rb_str_buf_cat2(str, "\"=>"); VALUE i = rb_inspect(rb_str_new2(s + 1)); rb_str_buf_append(str, i); } env++; } rb_str_buf_cat2(str, "}"); OBJ_TAINT(str); return str; }
static char * utf16_string_value_ptr(VALUE str) { StringValue(str); rb_str_buf_cat(str, "\x00", 1L); return RSTRING_PTR(str); }
static size_t data_handler(char * stream, size_t size, size_t nmemb, VALUE handler) { size_t str_len = size * nmemb; if(TYPE(handler) == T_STRING) { #ifdef HAVE_RUBY_ENCODING_H rb_encoding *default_internal_enc = rb_default_internal_encoding(); if (default_internal_enc) { handler = rb_str_export_to_enc(handler, default_internal_enc); } else { handler = rb_str_export_to_enc(handler, utf8Encoding); } #endif rb_str_buf_cat(handler, stream, str_len); } else { VALUE chunk = rb_str_new(stream, str_len); #ifdef HAVE_RUBY_ENCODING_H rb_encoding *default_internal_enc = rb_default_internal_encoding(); if (default_internal_enc) { chunk = rb_str_export_to_enc(chunk, default_internal_enc); } else { chunk = rb_str_export_to_enc(chunk, utf8Encoding); } #endif rb_funcall(handler, rb_intern("call"), 1, chunk); } return str_len; }
size_t msgpack_buffer_read_to_string_nonblock(msgpack_buffer_t* b, VALUE string, size_t length) { size_t avail = msgpack_buffer_top_readable_size(b); #ifndef DISABLE_BUFFER_READ_REFERENCE_OPTIMIZE /* optimize */ if(length <= avail && RSTRING_LEN(string) == 0 && b->head->mapped_string != NO_MAPPED_STRING && length >= b->read_reference_threshold) { VALUE s = _msgpack_buffer_refer_head_mapped_string(b, length); #ifndef HAVE_RB_STR_REPLACE /* TODO MRI 1.8 */ rb_funcall(string, s_replace, 1, s); #else rb_str_replace(string, s); #endif /* here doesn't have to call ENCODING_SET because * encoding of s is always ASCII-8BIT */ _msgpack_buffer_consumed(b, length); return length; } #endif size_t const length_orig = length; while(true) { if(length <= avail) { rb_str_buf_cat(string, b->read_buffer, length); _msgpack_buffer_consumed(b, length); return length_orig; } rb_str_buf_cat(string, b->read_buffer, avail); length -= avail; if(!_msgpack_buffer_shift_chunk(b)) { return length_orig - length; } avail = msgpack_buffer_top_readable_size(b); } }
static void w_nbyte(const char *s, long n, struct dump_arg *arg) { VALUE buf = arg->str; rb_str_buf_cat(buf, s, n); RBASIC(buf)->flags |= arg->infection; if (arg->dest && RSTRING_LEN(buf) >= BUFSIZ) { rb_io_write(arg->dest, buf); rb_str_resize(buf, 0); } }
/* @overload append_crc16(buf) * Appends a CRC16 checksum to a string * * @param buf [String] the starting string * @return [String] the original string plus its CRC16 */ static VALUE mm_gps_add_CRC16(VALUE klass, VALUE str) { union { ushort u; char s[2]; } crc; Check_Type(str, T_STRING); crc.u = CRC16(RSTRING_PTR(str), RSTRING_LEN(str)); return rb_str_buf_cat(str, crc.s, 2); }
static VALUE zipruby_archive_read(VALUE self) { VALUE retval = Qnil; struct zipruby_archive *p_archive; FILE *fzip; char buf[DATA_BUFSIZE]; ssize_t n; int block_given; Data_Get_Struct(self, struct zipruby_archive, p_archive); if (NIL_P(p_archive->path)) { rb_raise(rb_eRuntimeError, "invalid Zip::Archive"); } #ifdef _WIN32 if (fopen_s(&fzip, RSTRING_PTR(p_archive->path), "rb") != 0) { rb_raise(Error, "Read archive failed: Cannot open archive"); } #else if ((fzip = fopen(RSTRING_PTR(p_archive->path), "rb")) == NULL) { rb_raise(Error, "Read archive failed: Cannot open archive"); } #endif block_given = rb_block_given_p(); while ((n = fread(buf, 1, sizeof(buf), fzip)) > 0) { if (block_given) { rb_yield(rb_str_new(buf, n)); } else { if (NIL_P(retval)) { retval = rb_str_new(buf, n); } else { rb_str_buf_cat(retval, buf, n); } } } #if defined(RUBY_VM) && defined(_WIN32) _fclose_nolock(fzip); #elif defined(RUBY_WIN32_H) #undef fclose fclose(fzip); #define fclose(f) rb_w32_fclose(f) #else fclose(fzip); #endif if (n == -1) { rb_raise(Error, "Read archive failed"); } return retval; }
bool mustache_write_to_buffer(mustache_context_t* m_ctx, char* data, size_t data_length) { if (data_length == 0) { return true; } mustache_ruby_context_t* r_ctx = (mustache_ruby_context_t*) m_ctx->custom; rb_str_buf_cat(r_ctx->buffer, data, data_length); r_ctx->buffer_length += data_length; return true; }
static VALUE exc_inspect(VALUE exc) { VALUE str, klass; klass = CLASS_OF(exc); exc = rb_obj_as_string(exc); if (RSTRING_LEN(exc) == 0) { return rb_str_dup(rb_class_name(klass)); } str = rb_str_buf_new2("#<"); klass = rb_class_name(klass); rb_str_buf_append(str, klass); rb_str_buf_cat(str, ": ", 2); rb_str_buf_append(str, exc); rb_str_buf_cat(str, ">", 1); return str; }
static void w_nbyte(const char *s, int n, struct dump_arg *arg) { VALUE buf = arg->str; rb_str_buf_cat(buf, s, n); if (arg->dest && RSTRING_LEN(buf) >= BUFSIZ) { if (arg->taint) OBJ_TAINT(buf); rb_io_write(arg->dest, buf); rb_str_resize(buf, 0); } }
VALUE rb_fairy_string_buffer_push(VALUE self, VALUE str) { fairy_string_buffer_t *sb; GetFairyStringBufferPtr(self, sb); if (!RB_TYPE_P(str, T_STRING)) rb_raise(rb_eTypeError, "wrong argument type (expected String)"); rb_fairy_fixnum_buffer_push_long(sb->string_sizes, RSTRING_LEN(str)); rb_str_buf_cat(sb->buffer, RSTRING_PTR(str), RSTRING_LEN(str)); sb->size++; return self; }
extern VALUE StringRaw(const char *buf, int bsiz){ VALUE vval; int i; vval = rb_str_buf_new2(""); char s[5]; for(i=0;i<bsiz;i++){ char c = *buf++; s[0] = c; rb_str_buf_cat(vval, s, 1); } // buf -= bsiz; // rb_str_buf_cat2(vval, ""); return vval; }
static VALUE fast_pack(VALUE self) { VALUE res; long i; char c; res = rb_str_buf_new(0); for (i = 0; i < RARRAY_LEN(self); i++) { c = FIX2LONG(RARRAY_PTR(self)[i]); rb_str_buf_cat(res, &c, sizeof(char)); } return res; }
/** * RubyWatchman.dump(serializable) * * Converts the Ruby object, `serializable`, into a binary string in the * Watchman binary protocol format. * * Examples of serializable objects include arrays, hashes, strings, numbers * (integers, floats), booleans, and nil. */ VALUE RubyWatchman_dump(VALUE self, VALUE serializable) { watchman_t *w = watchman_init(); watchman_dump(w, serializable); // update header with final length information uint64_t *len = (uint64_t *)(w->data + sizeof(WATCHMAN_HEADER) - sizeof(uint64_t) - 1); *len = w->len - sizeof(WATCHMAN_HEADER) + 1; // prepare final return value VALUE serialized = rb_str_buf_new(w->len); rb_str_buf_cat(serialized, (const char*)w->data, w->len); watchman_free(w); return serialized; }
static VALUE encode_cesu8(struct state *state, VALUE str) { duk_context *ctx = state->ctx; VALUE res = rb_str_new(0, 0); VALUE utf16 = rb_str_conv_enc(str, rb_enc_get(str), utf16enc); if (utf16 == str && rb_enc_get(str) != utf16enc) { clean_raise(ctx, rb_eEncodingError, "cannot convert Ruby string to UTF-16"); } long len = RSTRING_LEN(utf16) / 2; unsigned short *bytes = (unsigned short *)RSTRING_PTR(utf16); char buf[8]; for (int i = 0; i < len; i++) { int length = rb_uv_to_utf8(buf, bytes[i]); rb_str_buf_cat(res, (char*)buf, length); } return res; }
/* * call-seq: * conn.putline( str) -> nil * conn.putline( ary) -> nil * conn.putline( str) { ... } -> nil * * Sends the string to the backend server. * You have to open the stream with a +COPY+ command using +copy_stdin+. * * If +str+ doesn't end in a newline, one is appended. If the argument * is +ary+, a line will be built using +stringize_line+. * * If the connection is in nonblocking mode and no data could be sent * the closure will be called and its value will be returned. */ VALUE pgconn_putline( VALUE self, VALUE arg) { struct pgconn_data *c; VALUE str; const char *p; int l; int r; switch (TYPE( arg)) { case T_STRING: str = arg; break; case T_ARRAY: str = pgconn_stringize_line( self, arg); break; default: str = rb_obj_as_string( arg); break; } if (RSTRING_PTR( str)[ RSTRING_LEN( str) - 1] != '\n') { VALUE t; t = rb_str_dup( str); rb_str_buf_cat( t, "\n", 1); str = t; } Data_Get_Struct( self, struct pgconn_data, c); p = pgconn_destring( c, str, &l); r = PQputCopyData( c->conn, p, l); if (r < 0) rb_raise( rb_ePgConnCopy, "Copy from stdin failed."); else if (r == 0) return rb_yield( Qnil); return Qnil; }
static void http_field(void *data, const char *field, size_t flen, const char *value, size_t vlen) { char *ch, *end; VALUE req = (VALUE)data; VALUE v = Qnil; VALUE f = Qnil; VALIDATE_MAX_LENGTH(flen, FIELD_NAME); VALIDATE_MAX_LENGTH(vlen, FIELD_VALUE); v = rb_str_new(value, vlen); f = rb_str_dup(global_http_prefix); f = rb_str_buf_cat(f, field, flen); for(ch = RSTRING_PTR(f) + RSTRING_LEN(global_http_prefix), end = RSTRING_PTR(f) + RSTRING_LEN(f); ch < end; ch++) { if(*ch == '-') { *ch = '_'; } else { *ch = toupper(*ch); } } rb_hash_aset(req, f, v); }
static VALUE decode_cesu8(struct state *state, VALUE str) { duk_context *ctx = state->ctx; VALUE res = rb_str_new(0, 0); const char *ptr = RSTRING_PTR(str); const char *end = RSTRING_END(str); long len; while (ptr < end) { len = (end - ptr); unsigned short code = utf8_to_uv(ptr, &len); rb_str_buf_cat(res, (char*)&code, 2); ptr += len; } rb_enc_associate(res, utf16enc); VALUE utf8res = rb_str_conv_enc(res, utf16enc, rb_utf8_encoding()); if (utf8res == res) { clean_raise(ctx, rb_eEncodingError, "cannot convert JavaScript string to UTF-16"); } return utf8res; }
// Takes data streamed from libcurl and writes it to a Ruby string buffer. static size_t session_write_handler(char* stream, size_t size, size_t nmemb, VALUE out) { rb_str_buf_cat(out, stream, size * nmemb); return size * nmemb; }
char *JSON_convert_UTF16_to_UTF8 ( VALUE buffer, char *source, char *sourceEnd, ConversionFlags flags) { UTF16 *tmp, *tmpPtr, *tmpEnd; char buf[5]; long n = 0, i; char *p = source - 1; while (p < sourceEnd && p[0] == '\\' && p[1] == 'u') { p += 6; n++; } p = source + 1; buf[4] = 0; tmpPtr = tmp = ALLOC_N(UTF16, n); tmpEnd = tmp + n; for (i = 0; i < n; i++) { buf[0] = *p++; buf[1] = *p++; buf[2] = *p++; buf[3] = *p++; tmpPtr[i] = strtol(buf, NULL, 16); p += 2; } while (tmpPtr < tmpEnd) { UTF32 ch; unsigned short bytesToWrite = 0; const UTF32 byteMask = 0xBF; const UTF32 byteMark = 0x80; ch = *tmpPtr++; /* If we have a surrogate pair, convert to UTF32 first. */ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { /* If the 16 bits following the high surrogate are in the source * buffer... */ if (tmpPtr < tmpEnd) { UTF32 ch2 = *tmpPtr; /* If it's a low surrogate, convert to UTF32. */ if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + (ch2 - UNI_SUR_LOW_START) + halfBase; ++tmpPtr; } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ ruby_xfree(tmp); rb_raise(rb_path2class("JSON::ParserError"), "source sequence is illegal/malformed near %s", source); } } else { /* We don't have the 16 bits following the high surrogate. */ ruby_xfree(tmp); rb_raise(rb_path2class("JSON::ParserError"), "partial character in source, but hit end near %s", source); break; } } else if (flags == strictConversion) { /* UTF-16 surrogate values are illegal in UTF-32 */ if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { ruby_xfree(tmp); rb_raise(rb_path2class("JSON::ParserError"), "source sequence is illegal/malformed near %s", source); } } /* Figure out how many bytes the result will require */ if (ch < (UTF32) 0x80) { bytesToWrite = 1; } else if (ch < (UTF32) 0x800) { bytesToWrite = 2; } else if (ch < (UTF32) 0x10000) { bytesToWrite = 3; } else if (ch < (UTF32) 0x110000) { bytesToWrite = 4; } else { bytesToWrite = 3; ch = UNI_REPLACEMENT_CHAR; } buf[0] = 0; buf[1] = 0; buf[2] = 0; buf[3] = 0; p = buf + bytesToWrite; switch (bytesToWrite) { /* note: everything falls through. */ case 4: *--p = (UTF8) ((ch | byteMark) & byteMask); ch >>= 6; case 3: *--p = (UTF8) ((ch | byteMark) & byteMask); ch >>= 6; case 2: *--p = (UTF8) ((ch | byteMark) & byteMask); ch >>= 6; case 1: *--p = (UTF8) (ch | firstByteMark[bytesToWrite]); } rb_str_buf_cat(buffer, p, bytesToWrite); } ruby_xfree(tmp); source += 5 + (n - 1) * 6; return source; }
VALUE rb_thrift_memory_buffer_write(VALUE self, VALUE str) { VALUE buf = GET_BUF(self); rb_str_buf_cat(buf, RSTRING_PTR(str), RSTRING_LEN(str)); return Qnil; }
/* * Document-method: initialize * * call-seq: initialize(args) * * +args+ should be a Hash and is required * This Hash should at least contain +:url+ and +:method+ keys. * You may also provide the following optional keys: * +:headers+ - should be a Hash of name/value pairs * +:response_header_handler+ - can be a string or object that responds to #call * If an object was passed, it's #call method will be called and passed the current chunk of data * +:response_body_handler+ - can be a string or object that responds to #call * If an object was passed, it's #call method will be called and passed the current chunk of data * +:payload+ - If +:method+ is either +:post+ or +:put+ this will be used as the request body * */ static VALUE rb_streamly_init(int argc, VALUE * argv, VALUE self) { struct curl_instance * instance; VALUE args, url, payload, headers, username, password, credentials; GetInstance(self, instance); instance->handle = curl_easy_init(); instance->request_headers = NULL; instance->request_method = Qnil; instance->request_payload_handler = Qnil; instance->response_header_handler = Qnil; instance->response_body_handler = Qnil; instance->options = Qnil; rb_scan_args(argc, argv, "10", &args); // Ensure our args parameter is a hash Check_Type(args, T_HASH); instance->request_method = rb_hash_aref(args, sym_method); url = rb_hash_aref(args, sym_url); payload = rb_hash_aref(args, sym_payload); headers = rb_hash_aref(args, sym_headers); username = rb_hash_aref(args, sym_username); password = rb_hash_aref(args, sym_password); instance->response_header_handler = rb_hash_aref(args, sym_response_header_handler); instance->response_body_handler = rb_hash_aref(args, sym_response_body_handler); // First lets verify we have a :method key if (NIL_P(instance->request_method)) { rb_raise(eStreamlyError, "You must specify a :method"); } else { // OK, a :method was specified, but if it's POST or PUT we require a :payload if (instance->request_method == sym_post || instance->request_method == sym_put) { if (NIL_P(payload)) { rb_raise(eStreamlyError, "You must specify a :payload for POST and PUT requests"); } } } // Now verify a :url was provided if (NIL_P(url)) { rb_raise(eStreamlyError, "You must specify a :url to request"); } if (NIL_P(instance->response_header_handler)) { instance->response_header_handler = rb_str_new2(""); #ifdef HAVE_RUBY_ENCODING_H rb_encoding *default_internal_enc = rb_default_internal_encoding(); if (default_internal_enc) { instance->response_header_handler = rb_str_export_to_enc(instance->response_header_handler, default_internal_enc); } else { instance->response_header_handler = rb_str_export_to_enc(instance->response_header_handler, utf8Encoding); } #endif } if (instance->request_method != sym_head && NIL_P(instance->response_body_handler)) { instance->response_body_handler = rb_str_new2(""); #ifdef HAVE_RUBY_ENCODING_H rb_encoding *default_internal_enc = rb_default_internal_encoding(); if (default_internal_enc) { instance->response_body_handler = rb_str_export_to_enc(instance->response_body_handler, default_internal_enc); } else { instance->response_body_handler = rb_str_export_to_enc(instance->response_body_handler, utf8Encoding); } #endif } if (!NIL_P(headers)) { Check_Type(headers, T_HASH); rb_iterate(rb_each, headers, each_http_header, self); curl_easy_setopt(instance->handle, CURLOPT_HTTPHEADER, instance->request_headers); } // So far so good, lets start setting up our request // Set the type of request if (instance->request_method == sym_head) { curl_easy_setopt(instance->handle, CURLOPT_NOBODY, 1); } else if (instance->request_method == sym_get) { curl_easy_setopt(instance->handle, CURLOPT_HTTPGET, 1); } else if (instance->request_method == sym_post) { curl_easy_setopt(instance->handle, CURLOPT_POST, 1); curl_easy_setopt(instance->handle, CURLOPT_POSTFIELDS, RSTRING_PTR(payload)); curl_easy_setopt(instance->handle, CURLOPT_POSTFIELDSIZE, RSTRING_LEN(payload)); // (multipart) // curl_easy_setopt(instance->handle, CURLOPT_HTTPPOST, 1); // TODO: get streaming upload working // curl_easy_setopt(instance->handle, CURLOPT_READFUNCTION, &upload_data_handler); // curl_easy_setopt(instance->handle, CURLOPT_READDATA, &instance->upload_stream); // curl_easy_setopt(instance->handle, CURLOPT_INFILESIZE, len); } else if (instance->request_method == sym_put) { curl_easy_setopt(instance->handle, CURLOPT_CUSTOMREQUEST, "PUT"); curl_easy_setopt(instance->handle, CURLOPT_POSTFIELDS, RSTRING_PTR(payload)); curl_easy_setopt(instance->handle, CURLOPT_POSTFIELDSIZE, RSTRING_LEN(payload)); // TODO: get streaming upload working // curl_easy_setopt(instance->handle, CURLOPT_UPLOAD, 1); // curl_easy_setopt(instance->handle, CURLOPT_READFUNCTION, &upload_data_handler); // curl_easy_setopt(instance->handle, CURLOPT_READDATA, &instance->upload_stream); // curl_easy_setopt(instance->handle, CURLOPT_INFILESIZE, len); } else if (instance->request_method == sym_delete) { curl_easy_setopt(instance->handle, CURLOPT_CUSTOMREQUEST, "DELETE"); } // Other common options curl_easy_setopt(instance->handle, CURLOPT_URL, RSTRING_PTR(url)); curl_easy_setopt(instance->handle, CURLOPT_FOLLOWLOCATION, 1); curl_easy_setopt(instance->handle, CURLOPT_MAXREDIRS, 3); // Response header handling curl_easy_setopt(instance->handle, CURLOPT_HEADERFUNCTION, &header_handler); curl_easy_setopt(instance->handle, CURLOPT_HEADERDATA, instance->response_header_handler); // Response body handling if (instance->request_method != sym_head) { curl_easy_setopt(instance->handle, CURLOPT_ENCODING, "identity, deflate, gzip"); curl_easy_setopt(instance->handle, CURLOPT_WRITEFUNCTION, &data_handler); curl_easy_setopt(instance->handle, CURLOPT_WRITEDATA, instance->response_body_handler); } if (!NIL_P(username) || !NIL_P(password)) { credentials = rb_str_new2(""); if (!NIL_P(username)) { rb_str_buf_cat(credentials, RSTRING_PTR(username), RSTRING_LEN(username)); } rb_str_buf_cat(credentials, ":", 1); if (!NIL_P(password)) { rb_str_buf_cat(credentials, RSTRING_PTR(password), RSTRING_LEN(password)); } curl_easy_setopt(instance->handle, CURLOPT_HTTPAUTH, CURLAUTH_BASIC | CURLAUTH_DIGEST); curl_easy_setopt(instance->handle, CURLOPT_USERPWD, RSTRING_PTR(credentials)); rb_gc_mark(credentials); } curl_easy_setopt(instance->handle, CURLOPT_SSL_VERIFYPEER, 0); curl_easy_setopt(instance->handle, CURLOPT_SSL_VERIFYHOST, 0); curl_easy_setopt(instance->handle, CURLOPT_ERRORBUFFER, instance->error_buffer); return self; }
VALUE string_spec_rb_str_buf_cat(VALUE self, VALUE str) { const char *question_mark = "?"; rb_str_buf_cat(str, question_mark, strlen(question_mark)); return str; }
/** * @param str the string to be scrubbed * @param repl the replacement character * @return If given string is invalid, returns a new string. Otherwise, returns Qnil. */ static VALUE str_scrub0(int argc, VALUE *argv, VALUE str) { int cr = ENC_CODERANGE(str); rb_encoding *enc; int encidx; VALUE repl; if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID) return Qnil; enc = STR_ENC_GET(str); rb_scan_args(argc, argv, "01", &repl); if (argc != 0) { repl = str_compat_and_valid(repl, enc); } if (rb_enc_dummy_p(enc)) { return Qnil; } encidx = rb_enc_to_index(enc); #define DEFAULT_REPLACE_CHAR(str) do { \ static const char replace[sizeof(str)-1] = str; \ rep = replace; replen = (int)sizeof(replace); \ } while (0) if (rb_enc_asciicompat(enc)) { const char *p = RSTRING_PTR(str); const char *e = RSTRING_END(str); const char *p1 = p; const char *rep; long replen; int rep7bit_p; VALUE buf = Qnil; if (rb_block_given_p()) { rep = NULL; replen = 0; rep7bit_p = FALSE; } else if (!NIL_P(repl)) { rep = RSTRING_PTR(repl); replen = RSTRING_LEN(repl); rep7bit_p = (ENC_CODERANGE(repl) == ENC_CODERANGE_7BIT); } else if (encidx == rb_utf8_encindex()) { DEFAULT_REPLACE_CHAR("\xEF\xBF\xBD"); rep7bit_p = FALSE; } else { DEFAULT_REPLACE_CHAR("?"); rep7bit_p = TRUE; } cr = ENC_CODERANGE_7BIT; p = search_nonascii(p, e); if (!p) { p = e; } while (p < e) { int ret = rb_enc_precise_mbclen(p, e, enc); if (MBCLEN_NEEDMORE_P(ret)) { break; } else if (MBCLEN_CHARFOUND_P(ret)) { cr = ENC_CODERANGE_VALID; p += MBCLEN_CHARFOUND_LEN(ret); } else if (MBCLEN_INVALID_P(ret)) { /* * p1~p: valid ascii/multibyte chars * p ~e: invalid bytes + unknown bytes */ long clen = rb_enc_mbmaxlen(enc); if (NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str)); if (p > p1) { rb_str_buf_cat(buf, p1, p - p1); } if (e - p < clen) clen = e - p; if (clen <= 2) { clen = 1; } else { const char *q = p; clen--; for (; clen > 1; clen--) { ret = rb_enc_precise_mbclen(q, q + clen, enc); if (MBCLEN_NEEDMORE_P(ret)) break; if (MBCLEN_INVALID_P(ret)) continue; UNREACHABLE; } } if (rep) { rb_str_buf_cat(buf, rep, replen); if (!rep7bit_p) cr = ENC_CODERANGE_VALID; } else { repl = rb_yield(rb_enc_str_new(p, clen, enc)); repl = str_compat_and_valid(repl, enc); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID) cr = ENC_CODERANGE_VALID; } p += clen; p1 = p; p = search_nonascii(p, e); if (!p) { p = e; break; } } else { UNREACHABLE; } } if (NIL_P(buf)) { if (p == e) { ENC_CODERANGE_SET(str, cr); return Qnil; } buf = rb_str_buf_new(RSTRING_LEN(str)); } if (p1 < p) { rb_str_buf_cat(buf, p1, p - p1); } if (p < e) { if (rep) { rb_str_buf_cat(buf, rep, replen); if (!rep7bit_p) cr = ENC_CODERANGE_VALID; } else { repl = rb_yield(rb_enc_str_new(p, e-p, enc)); repl = str_compat_and_valid(repl, enc); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID) cr = ENC_CODERANGE_VALID; } } ENCODING_CODERANGE_SET(buf, rb_enc_to_index(enc), cr); return buf; } else { /* ASCII incompatible */ const char *p = RSTRING_PTR(str); const char *e = RSTRING_END(str); const char *p1 = p; VALUE buf = Qnil; const char *rep; long replen; long mbminlen = rb_enc_mbminlen(enc); if (!NIL_P(repl)) { rep = RSTRING_PTR(repl); replen = RSTRING_LEN(repl); } else if (!strcasecmp(rb_enc_name(enc), "UTF-16BE")) { DEFAULT_REPLACE_CHAR("\xFF\xFD"); } else if (!strcasecmp(rb_enc_name(enc), "UTF-16LE")) { DEFAULT_REPLACE_CHAR("\xFD\xFF"); } else if (!strcasecmp(rb_enc_name(enc), "UTF-32BE")) { DEFAULT_REPLACE_CHAR("\x00\x00\xFF\xFD"); } else if (!strcasecmp(rb_enc_name(enc), "UTF-32lE")) { DEFAULT_REPLACE_CHAR("\xFD\xFF\x00\x00"); } else { DEFAULT_REPLACE_CHAR("?"); } while (p < e) { int ret = rb_enc_precise_mbclen(p, e, enc); if (MBCLEN_NEEDMORE_P(ret)) { break; } else if (MBCLEN_CHARFOUND_P(ret)) { p += MBCLEN_CHARFOUND_LEN(ret); } else if (MBCLEN_INVALID_P(ret)) { const char *q = p; long clen = rb_enc_mbmaxlen(enc); if (NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str)); if (p > p1) rb_str_buf_cat(buf, p1, p - p1); if (e - p < clen) clen = e - p; if (clen <= mbminlen * 2) { clen = mbminlen; } else { clen -= mbminlen; for (; clen > mbminlen; clen-=mbminlen) { ret = rb_enc_precise_mbclen(q, q + clen, enc); if (MBCLEN_NEEDMORE_P(ret)) break; if (MBCLEN_INVALID_P(ret)) continue; UNREACHABLE; } } if (rep) { rb_str_buf_cat(buf, rep, replen); } else { repl = rb_yield(rb_enc_str_new(p, e-p, enc)); repl = str_compat_and_valid(repl, enc); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); } p += clen; p1 = p; } else { UNREACHABLE; } } if (NIL_P(buf)) { if (p == e) { ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID); return Qnil; } buf = rb_str_buf_new(RSTRING_LEN(str)); } if (p1 < p) { rb_str_buf_cat(buf, p1, p - p1); } if (p < e) { if (rep) { rb_str_buf_cat(buf, rep, replen); } else { repl = rb_yield(rb_enc_str_new(p, e-p, enc)); repl = str_compat_and_valid(repl, enc); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); } } ENCODING_CODERANGE_SET(buf, rb_enc_to_index(enc), ENC_CODERANGE_VALID); return buf; } }
void JSON_convert_UTF8_to_JSON(VALUE buffer, VALUE string, ConversionFlags flags) { char buf[7]; const UTF8* source = (UTF8 *) RSTRING_PTR(string); const UTF8* sourceEnd = source + RSTRING_LEN(string); while (source < sourceEnd) { UTF32 ch = 0; unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; if (source + extraBytesToRead >= sourceEnd) { rb_raise(rb_path2class("JSON::GeneratorError"), "partial character in source, but hit end"); } if (!isLegalUTF8(source, extraBytesToRead+1)) { rb_raise(rb_path2class("JSON::GeneratorError"), "source sequence is illegal/malformed"); } /* * The cases all fall through. See "Note A" below. */ switch (extraBytesToRead) { case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ case 3: ch += *source++; ch <<= 6; case 2: ch += *source++; ch <<= 6; case 1: ch += *source++; ch <<= 6; case 0: ch += *source++; } ch -= offsetsFromUTF8[extraBytesToRead]; if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ /* UTF-16 surrogate values are illegal in UTF-32 */ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { if (flags == strictConversion) { source -= (extraBytesToRead+1); /* return to the illegal value itself */ rb_raise(rb_path2class("JSON::GeneratorError"), "source sequence is illegal/malformed"); } else { unicode_escape(buffer, UNI_REPLACEMENT_CHAR); } } else { /* normal case */ if (ch == '"') { rb_str_buf_cat2(buffer, "\\\""); } else if (ch == '\\') { rb_str_buf_cat2(buffer, "\\\\"); } else if (ch == '/') { rb_str_buf_cat2(buffer, "\\/"); } else if (ch >= 0x20 && ch <= 0x7f) { rb_str_buf_cat(buffer, (char *) source - 1, 1); } else if (ch == '\n') { rb_str_buf_cat2(buffer, "\\n"); } else if (ch == '\r') { rb_str_buf_cat2(buffer, "\\r"); } else if (ch == '\t') { rb_str_buf_cat2(buffer, "\\t"); } else if (ch == '\f') { rb_str_buf_cat2(buffer, "\\f"); } else if (ch == '\b') { rb_str_buf_cat2(buffer, "\\b"); } else if (ch < 0x20) { unicode_escape(buffer, (UTF16) ch); } else { unicode_escape(buffer, (UTF16) ch); } } } else if (ch > UNI_MAX_UTF16) { if (flags == strictConversion) { source -= (extraBytesToRead+1); /* return to the start */ rb_raise(rb_path2class("JSON::GeneratorError"), "source sequence is illegal/malformed"); } else { unicode_escape(buffer, UNI_REPLACEMENT_CHAR); } } else { /* target is a character in range 0xFFFF - 0x10FFFF. */ ch -= halfBase; unicode_escape(buffer, (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START)); unicode_escape(buffer, (UTF16)((ch & halfMask) + UNI_SUR_LOW_START)); } } }
/* @overload gsub(pattern, replacement) * * Returns the receiver with all matches of PATTERN replaced by REPLACEMENT, * inheriting any taint and untrust from the receiver and from REPLACEMENT. * * The REPLACEMENT is used as a specification for what to replace matches * with: * * <table> * <thead> * <tr><th>Specification</th><th>Replacement</th></tr> * </thead> * <tbody> * <tr> * <td><code>\1</code>, <code>\2</code>, …, <code>\</code><em>n</em></td> * <td>Numbered sub-match <em>n</em></td> * </tr> * <tr> * <td><code>\k<</code><em>name</em><code>></code></td> * <td>Named sub-match <em>name</em></td> * </tr> * </tbody> * </table> * * The Regexp special variables `$&`, `$'`, <code>$\`</code>, `$1`, `$2`, …, * `$`_n_ are updated accordingly. * * @param [Regexp, #to_str] pattern * @param [#to_str] replacement * @return [U::String] * * @overload gsub(pattern, replacements) * * Returns the receiver with all matches of PATTERN replaced by * REPLACEMENTS#[_match_], where _match_ is the matched substring, inheriting * any taint and untrust from the receiver and from the * REPLACEMENTS#[_match_]es, as well as any taint on REPLACEMENTS. * * The Regexp special variables `$&`, `$'`, <code>$\`</code>, `$1`, `$2`, …, * `$`_n_ are updated accordingly. * * @param [Regexp, #to_str] pattern * @param [#to_hash] replacements * @raise [RuntimeError] If any replacement is the result being constructed * @raise [Exception] Any error raised by REPLACEMENTS#default, if it gets * called * @return [U::String] * * @overload gsub(pattern){ |match| … } * * Returns the receiver with all matches of PATTERN replaced by the results * of the given block, inheriting any taint and untrust from the receiver and * from the results of the given block. * * The Regexp special variables `$&`, `$'`, <code>$\`</code>, `$1`, `$2`, …, * `$`_n_ are updated accordingly. * * @param [Regexp, #to_str] pattern * @yieldparam [U::String] match * @yieldreturn [#to_str] * @return [U::String] * * @overload gsub(pattern) * * Returns an Enumerator over the matches of PATTERN in the receiver. * * The Regexp special variables `$&`, `$'`, <code>$\`</code>, `$1`, `$2`, …, * `$`_n_ will be updated accordingly. * * @param [Regexp, #to_str] pattern * @return [Enumerator] */ VALUE rb_u_string_gsub(int argc, VALUE *argv, VALUE self) { VALUE pattern, replacement; VALUE replacements = Qnil; bool use_block = false; bool tainted = false; if (argc == 1) { RETURN_ENUMERATOR(self, argc, argv); use_block = true; } if (rb_scan_args(argc, argv, "11", &pattern, &replacement) == 2) { replacements = rb_check_convert_type(replacement, T_HASH, "Hash", "to_hash"); if (NIL_P(replacements)) StringValue(replacement); if (OBJ_TAINTED(replacement)) tainted = true; } pattern = rb_u_pattern_argument(pattern, true); VALUE str = rb_str_to_str(self); long begin = rb_reg_search(pattern, str, 0, 0); if (begin < 0) return self; const char *base = RSTRING_PTR(str); const char *p = base; const char *end = RSTRING_END(str); VALUE substituted = rb_u_str_buf_new(RSTRING_LEN(str) + 30); do { VALUE match = rb_backref_get(); struct re_registers *registers = RMATCH_REGS(match); VALUE result; if (use_block || !NIL_P(replacements)) { if (use_block) { VALUE ustr = rb_u_string_new_rb(rb_reg_nth_match(0, match)); result = rb_u_string_object_as_string(rb_yield(ustr)); } else { VALUE ustr = rb_u_string_new_c(self, base + registers->beg[0], registers->end[0] - registers->beg[0]); result = rb_u_string_object_as_string(rb_hash_aref(replacements, ustr)); } if (result == substituted) rb_u_raise(rb_eRuntimeError, "result of block is string being built; please try not to cheat"); } else result = #ifdef HAVE_RB_REG_REGSUB4 rb_reg_regsub(replacement, str, registers, pattern); #else rb_reg_regsub(replacement, str, registers); #endif if (OBJ_TAINTED(result)) tainted = true; const struct rb_u_string *value = RVAL2USTRING_ANY(result); rb_str_buf_cat(substituted, p, registers->beg[0] - (p - base)); rb_str_buf_cat(substituted, USTRING_STR(value), USTRING_LENGTH(value)); OBJ_INFECT(substituted, result); p = base + registers->end[0]; if (registers->beg[0] == registers->end[0]) p = u_next(p); if (p >= end) break; begin = rb_reg_search(pattern, str, registers->end[0], 0); } while (begin >= 0); if (p < end) rb_str_buf_cat(substituted, p, end - p); rb_reg_search(pattern, str, end - p, 0); RBASIC(substituted)->klass = rb_obj_class(str); OBJ_INFECT(substituted, str); if (tainted) OBJ_TAINT(substituted); return rb_u_string_new_rb(substituted); }
VALUE ss_phrase_as_question(VALUE self, VALUE str) { const char *question_mark = "?"; rb_str_buf_cat(str, question_mark, strlen(question_mark)); return str; }