VALUE rb_str_ends_with_p( VALUE str, VALUE oth) { long i; char *s, *o; VALUE ost; #ifdef HAVE_HEADER_RUBY_H #else if (!rb_str_comparable( str, oth)) return Qnil; #endif ost = rb_string_value( &oth); i = RSTRING_LEN( ost); if (i > RSTRING_LEN( str)) return Qnil; s = RSTRING_END( str); o = RSTRING_END( ost); for (; i; i--) if (*--s != *--o) return Qnil; #ifdef HAVE_HEADER_RUBY_H return INT2FIX( RSTRING_LEN( str) - RSTRING_LEN( ost)); #else return INT2FIX( rb_str_strlen( str) - rb_str_strlen( ost)); #endif }
static mrb_value mrb_js_obj_get(mrb_state *mrb, mrb_value klass) { mrb_value str; mrb_get_args(mrb, "S", &str); NPObject *window; NPN_GetValue(MRB_UD_NPP(mrb), NPNVWindowNPObject, &window); NPUTF8 *s = (NPUTF8 *)NPN_MemAlloc(RSTRING_LEN(str)); std::copy(RSTRING_PTR(str), RSTRING_END(str), s); NPString evaluated_str = { s, RSTRING_LEN(str) }; NPVariant result; std::string js_str(RSTRING_PTR(str), RSTRING_LEN(str)); NPN_Evaluate(MRB_UD_NPP(mrb), window, &evaluated_str, &result); NPN_ReleaseObject(window); NPN_MemFree(s); mrb_value ret; if (!convert_js_to_mrb(MRB_UD_NPP(mrb), result, mrb, &ret)){ return mrb_nil_value(); } return ret; }
static int load_encoding(const char *name) { VALUE enclib = rb_sprintf("enc/%s.so", name); VALUE verbose = ruby_verbose; VALUE debug = ruby_debug; VALUE errinfo; char *s = RSTRING_PTR(enclib) + 4, *e = RSTRING_END(enclib) - 3; int loaded; int idx; while (s < e) { if (!ISALNUM(*s)) *s = '_'; else if (ISUPPER(*s)) *s = (char)TOLOWER(*s); ++s; } FL_UNSET(enclib, FL_TAINT); enclib = rb_fstring(enclib); ruby_verbose = Qfalse; ruby_debug = Qfalse; errinfo = rb_errinfo(); loaded = rb_require_internal(enclib, rb_safe_level()); ruby_verbose = verbose; ruby_debug = debug; rb_set_errinfo(errinfo); if (loaded < 0 || 1 < loaded) return -1; if ((idx = rb_enc_registered(name)) < 0) return -1; if (enc_autoload_p(enc_table.list[idx].enc)) return -1; return idx; }
static int load_encoding(const char *name) { VALUE enclib = rb_sprintf("enc/%s.so", name); VALUE verbose = ruby_verbose; VALUE debug = ruby_debug; VALUE loaded; char *s = RSTRING_PTR(enclib) + 4, *e = RSTRING_END(enclib) - 3; int idx; while (s < e) { if (!ISALNUM(*s)) *s = '_'; else if (ISUPPER(*s)) *s = TOLOWER(*s); ++s; } OBJ_FREEZE(enclib); ruby_verbose = Qfalse; ruby_debug = Qfalse; loaded = rb_protect(require_enc, enclib, 0); ruby_verbose = verbose; ruby_debug = debug; rb_set_errinfo(Qnil); if (NIL_P(loaded)) return -1; if ((idx = rb_enc_registered(name)) < 0) return -1; if (enc_autoload_p(enc_table.list[idx].enc)) return -1; return idx; }
VALUE rb_str_vcatf(VALUE str, const char *fmt, va_list ap) { rb_printf_buffer_extra buffer; #define f buffer.base VALUE klass; StringValue(str); rb_str_modify(str); f._flags = __SWR | __SSTR; f._bf._size = 0; f._w = rb_str_capacity(str); f._bf._base = (unsigned char *)str; f._p = (unsigned char *)RSTRING_END(str); klass = RBASIC(str)->klass; RBASIC_CLEAR_CLASS(str); f.vwrite = ruby__sfvwrite; f.vextra = ruby__sfvextra; buffer.value = 0; BSD_vfprintf(&f, fmt, ap); RBASIC_SET_CLASS_RAW(str, klass); rb_str_resize(str, (char *)f._p - RSTRING_PTR(str)); #undef f return str; }
static mrb_value mrb_str_reverse_bang(mrb_state *mrb, mrb_value str) { mrb_int utf8_len = mrb_utf8_strlen(str); if (utf8_len > 1) { mrb_int len = RSTRING_LEN(str); char *buf = (char *)mrb_malloc(mrb, (size_t)len); unsigned char* p = (unsigned char*)buf; unsigned char* e = (unsigned char*)buf + len; unsigned char* r = (unsigned char*)RSTRING_END(str); memcpy(buf, RSTRING_PTR(str), len); mrb_str_modify(mrb, mrb_str_ptr(str)); while (p<e) { mrb_int clen = utf8len(p); r -= clen; memcpy(r, p, clen); p += clen; } mrb_free(mrb, buf); } return str; }
static size_t str_format_len(mrb_value str) { size_t dump_len = 0; char *src; for (src = RSTRING_PTR(str); src < RSTRING_END(str); src++) { switch (*src) { case 0x07:/* BEL */ /* fall through */ case 0x08:/* BS */ /* fall through */ case 0x09:/* HT */ /* fall through */ case 0x0A:/* LF */ /* fall through */ case 0x0B:/* VT */ /* fall through */ case 0x0C:/* FF */ /* fall through */ case 0x0D:/* CR */ /* fall through */ case 0x22:/* " */ /* fall through */ case 0x27:/* ' */ /* fall through */ case 0x3F:/* ? */ /* fall through */ case 0x5C:/* \ */ /* fall through */ dump_len += 2; break; default: dump_len++; break; } } return dump_len; }
static char* str_to_format(mrb_value str, char *buf) { char *src; char *dst; for (src = RSTRING_PTR(str), dst = buf; src < RSTRING_END(str); src++) { switch (*src) { case 0x07:/* BEL */ *dst++ = '\\'; *dst++ = 'a'; break; case 0x08:/* BS */ *dst++ = '\\'; *dst++ = 'b'; break; case 0x09:/* HT */ *dst++ = '\\'; *dst++ = 't'; break; case 0x0A:/* LF */ *dst++ = '\\'; *dst++ = 'n'; break; case 0x0B:/* VT */ *dst++ = '\\'; *dst++ = 'v'; break; case 0x0C:/* FF */ *dst++ = '\\'; *dst++ = 'f'; break; case 0x0D:/* CR */ *dst++ = '\\'; *dst++ = 'r'; break; case 0x22:/* " */ *dst++ = '\\'; *dst++ = '\"'; break; case 0x27:/* ' */ *dst++ = '\\'; *dst++ = '\''; break; case 0x3F:/* ? */ *dst++ = '\\'; *dst++ = '\?'; break; case 0x5C:/* \ */ *dst++ = '\\'; *dst++ = '\\'; break; default: *dst++ = *src; break; } } return buf; }
bool convert_mrb_to_js_string(mrb_state *mrb, mrb_value value, NPP npp, NPVariant *result) { NPUTF8 *ptr = (NPUTF8 *)NPN_MemAlloc(RSTRING_LEN(value)); if (!ptr){ return false; } std::copy(RSTRING_PTR(value), RSTRING_END(value), ptr); STRINGN_TO_NPVARIANT(ptr, RSTRING_LEN(value), *result); return true; }
static VALUE rb_str_blank_as(VALUE str) { rb_encoding *enc; char *s, *e; enc = STR_ENC_GET(str); s = RSTRING_PTR(str); if (!s || RSTRING_LEN(str) == 0) return Qtrue; e = RSTRING_END(str); while (s < e) { int n; unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc); switch (cc) { case 9: case 0xa: case 0xb: case 0xc: case 0xd: case 0x20: case 0x85: case 0xa0: case 0x1680: case 0x180e: case 0x2000: case 0x2001: case 0x2002: case 0x2003: case 0x2004: case 0x2005: case 0x2006: case 0x2007: case 0x2008: case 0x2009: case 0x200a: case 0x2028: case 0x2029: case 0x202f: case 0x205f: case 0x3000: /* found */ break; default: return Qfalse; } s += n; } return Qtrue; }
static VALUE bug_str_s_cstr_noembed(VALUE self, VALUE str) { VALUE str2 = rb_str_new(NULL, 0); long capacity = RSTRING_LEN(str) + TERM_LEN(str); char *buf = ALLOC_N(char, capacity); Check_Type(str, T_STRING); FL_SET((str2), STR_NOEMBED); memcpy(buf, RSTRING_PTR(str), capacity); RBASIC(str2)->flags &= ~RSTRING_EMBED_LEN_MASK; RSTRING(str2)->as.heap.aux.capa = capacity; RSTRING(str2)->as.heap.ptr = buf; RSTRING(str2)->as.heap.len = RSTRING_LEN(str); TERM_FILL(RSTRING_END(str2), TERM_LEN(str)); return str2; }
static VALUE rb_str_blank(VALUE str) { rb_encoding *enc; char *s, *e; enc = STR_ENC_GET(str); s = RSTRING_PTR(str); if (!s || RSTRING_LEN(str) == 0) return Qtrue; e = RSTRING_END(str); while (s < e) { int n; unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc); if (!rb_isspace(cc) && cc != 0) return Qfalse; s += n; } return Qtrue; }
static VALUE str_encode_bang(int argc, VALUE *argv, VALUE str) { VALUE newstr = str; int encidx = str_transcode(argc, argv, &newstr); int cr = 0; if (encidx < 0) return str; rb_str_shared_replace(str, newstr); rb_enc_associate_index(str, encidx); /* transcoded string never be broken. */ if (rb_enc_asciicompat(rb_enc_from_index(encidx))) { rb_str_coderange_scan_restartable(RSTRING_PTR(str), RSTRING_END(str), 0, &cr); } else { cr = ENC_CODERANGE_VALID; } ENC_CODERANGE_SET(str, cr); return str; }
VALUE rb_str_vcatf(VALUE str, const char *fmt, va_list ap) { rb_printf_buffer f; VALUE klass; StringValue(str); rb_str_modify(str); f._flags = __SWR | __SSTR; f._bf._size = 0; f._w = rb_str_capacity(str); f._bf._base = (unsigned char *)str; f._p = (unsigned char *)RSTRING_END(str); klass = RBASIC(str)->klass; RBASIC(str)->klass = 0; f.vwrite = ruby__sfvwrite; BSD_vfprintf(&f, fmt, ap); RBASIC(str)->klass = klass; rb_str_resize(str, (char *)f._p - RSTRING_PTR(str)); return str; }
static VALUE decode_cesu8(struct state *state, VALUE str) { duk_context *ctx = state->ctx; VALUE res = rb_str_new(0, 0); const char *ptr = RSTRING_PTR(str); const char *end = RSTRING_END(str); long len; while (ptr < end) { len = (end - ptr); unsigned short code = utf8_to_uv(ptr, &len); rb_str_buf_cat(res, (char*)&code, 2); ptr += len; } rb_enc_associate(res, utf16enc); VALUE utf8res = rb_str_conv_enc(res, utf16enc, rb_utf8_encoding()); if (utf8res == res) { clean_raise(ctx, rb_eEncodingError, "cannot convert JavaScript string to UTF-16"); } return utf8res; }
static char* str_to_format(mrb_value str, char *buf) { char *src, *dst; for (src = RSTRING_PTR(str), dst = buf; src < RSTRING_END(str);) { switch (*src) { case 0x07:/* BEL */ memcpy(dst, "\\a", 2); dst+=2; src+=2; break; case 0x08:/* BS */ memcpy(dst, "\\b", 2); dst+=2; src+=2; break; case 0x09:/* HT */ memcpy(dst, "\\t", 2); dst+=2; src+=2; break; case 0x0A:/* LF */ memcpy(dst, "\\n", 2); dst+=2; src+=2; break; case 0x0B:/* VT */ memcpy(dst, "\\v", 2); dst+=2; src+=2; break; case 0x0C:/* FF */ memcpy(dst, "\\f", 2); dst+=2; src+=2; break; case 0x0D:/* CR */ memcpy(dst, "\\r", 2); dst+=2; src+=2; break; case 0x22:/* " */ memcpy(dst, "\\\"", 2); dst+=2; src+=2; break; case 0x27:/* ' */ memcpy(dst, "\\\'", 2); dst+=2; src+=2; break; case 0x3F:/* ? */ memcpy(dst, "\\\?", 2); dst+=2; src+=2; break; case 0x5C:/* \ */ memcpy(dst, "\\\\", 2); dst+=2; src+=2; break; default: *dst++ = *src++; break; } } return buf; }
mrb_value mrb_str_format(mrb_state *mrb, int argc, const mrb_value *argv, mrb_value fmt) { const char *p, *end; char *buf; long blen, bsiz; mrb_value result; int width, prec, flags = FNONE; int nextarg = 1; int posarg = 0; mrb_value nextvalue; mrb_value tmp; mrb_value str; mrb_value hash = mrb_undef_value(); #define CHECK_FOR_WIDTH(f) \ if ((f) & FWIDTH) { \ mrb_raise(mrb, E_ARGUMENT_ERROR, "width given twice"); \ } \ if ((f) & FPREC0) { \ mrb_raise(mrb, E_ARGUMENT_ERROR, "width after precision"); \ } #define CHECK_FOR_FLAGS(f) \ if ((f) & FWIDTH) { \ mrb_raise(mrb, E_ARGUMENT_ERROR, "flag after width"); \ } \ if ((f) & FPREC0) { \ mrb_raise(mrb, E_ARGUMENT_ERROR, "flag after precision"); \ } ++argc; --argv; mrb_string_value(mrb, &fmt); fmt = mrb_str_new4(mrb, fmt); p = RSTRING_PTR(fmt); end = p + RSTRING_LEN(fmt); blen = 0; bsiz = 120; result = mrb_str_buf_new(mrb, bsiz); buf = RSTRING_PTR(result); memset(buf, 0, bsiz); for (; p < end; p++) { const char *t; int n; mrb_sym id = 0; for (t = p; t < end && *t != '%'; t++) ; PUSH(p, t - p); if (t >= end) goto sprint_exit; /* end of fmt string */ p = t + 1; /* skip `%' */ width = prec = -1; nextvalue = mrb_undef_value(); retry: switch (*p) { default: mrb_raise(mrb, E_ARGUMENT_ERROR, "malformed format string - %%%c", *p); break; case ' ': CHECK_FOR_FLAGS(flags); flags |= FSPACE; p++; goto retry; case '#': CHECK_FOR_FLAGS(flags); flags |= FSHARP; p++; goto retry; case '+': CHECK_FOR_FLAGS(flags); flags |= FPLUS; p++; goto retry; case '-': CHECK_FOR_FLAGS(flags); flags |= FMINUS; p++; goto retry; case '0': CHECK_FOR_FLAGS(flags); flags |= FZERO; p++; goto retry; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': n = 0; GETNUM(n, width); if (*p == '$') { if (!UNDEF_P(nextvalue)) { mrb_raise(mrb, E_ARGUMENT_ERROR, "value given twice - %d$", n); } nextvalue = GETPOSARG(n); p++; goto retry; } CHECK_FOR_WIDTH(flags); width = n; flags |= FWIDTH; goto retry; case '<': case '{': { const char *start = p; char term = (*p == '<') ? '>' : '}'; mrb_value symname; for (; p < end && *p != term; ) p++; if (id) { mrb_raise(mrb, E_ARGUMENT_ERROR, "name%.*s after <%s>", (int)(p - start + 1), start, mrb_sym2name(mrb, id)); } symname = mrb_str_new(mrb, start + 1, p - start - 1); id = mrb_intern(mrb, RSTRING_PTR(symname)); nextvalue = GETNAMEARG(mrb_symbol_value(id), start, (int)(p - start + 1)); if (UNDEF_P(nextvalue)) { mrb_raise(mrb, E_KEY_ERROR, "key%.*s not found", (int)(p - start + 1), start); } if (term == '}') goto format_s; p++; goto retry; } case '*': CHECK_FOR_WIDTH(flags); flags |= FWIDTH; GETASTER(width); if (width < 0) { flags |= FMINUS; width = -width; } p++; goto retry; case '.': if (flags & FPREC0) { mrb_raise(mrb, E_ARGUMENT_ERROR, "precision given twice"); } flags |= FPREC|FPREC0; prec = 0; p++; if (*p == '*') { GETASTER(prec); if (prec < 0) { /* ignore negative precision */ flags &= ~FPREC; } p++; goto retry; } GETNUM(prec, precision); goto retry; case '\n': case '\0': p--; case '%': if (flags != FNONE) { mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid format character - %%"); } PUSH("%", 1); break; case 'c': { mrb_value val = GETARG(); mrb_value tmp; unsigned int c; int n; #ifdef INCLUDE_ENCODING mrb_encoding *enc = mrb_enc_get(mrb, fmt); #endif //INCLUDE_ENCODING tmp = mrb_check_string_type(mrb, val); if (!mrb_nil_p(tmp)) { if (RSTRING_LEN(tmp) != 1 ) { mrb_raise(mrb, E_ARGUMENT_ERROR, "%%c requires a character"); } #ifdef INCLUDE_ENCODING c = mrb_enc_codepoint_len(mrb, RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc); #else c = RSTRING_PTR(tmp)[0]; n = 1; #endif //INCLUDE_ENCODING } else { c = mrb_fixnum(val); n = mrb_enc_codelen(mrb, c, enc); } if (n <= 0) { mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid character"); } if (!(flags & FWIDTH)) { CHECK(n); mrb_enc_mbcput(c, &buf[blen], enc); blen += n; } else if ((flags & FMINUS)) { CHECK(n); mrb_enc_mbcput(c, &buf[blen], enc); blen += n; FILL(' ', width-1); } else { FILL(' ', width-1); CHECK(n); mrb_enc_mbcput(c, &buf[blen], enc); blen += n; } } break; case 's': case 'p': format_s: { mrb_value arg = GETARG(); long len, slen; #ifdef INCLUDE_ENCODING mrb_encoding *enc = mrb_enc_get(mrb, fmt); #endif //INCLUDE_ENCODING if (*p == 'p') arg = mrb_inspect(mrb, arg); str = mrb_obj_as_string(mrb, arg); len = RSTRING_LEN(str); mrb_str_set_len(mrb, result, blen); if (flags&(FPREC|FWIDTH)) { slen = RSTRING_LEN(str); if (slen < 0) { mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid mbstring sequence"); } if ((flags&FPREC) && (prec < slen)) { #ifdef INCLUDE_ENCODING char *p = mrb_enc_nth(mrb, RSTRING_PTR(str), RSTRING_END(str),prec, enc); #else char *p = RSTRING_PTR(str) + prec; #endif //INCLUDE_ENCODING slen = prec; len = p - RSTRING_PTR(str); } /* need to adjust multi-byte string pos */ if ((flags&FWIDTH) && (width > slen)) { width -= (int)slen; if (!(flags&FMINUS)) { CHECK(width); while (width--) { buf[blen++] = ' '; } } CHECK(len); memcpy(&buf[blen], RSTRING_PTR(str), len); blen += len; if (flags&FMINUS) { CHECK(width); while (width--) { buf[blen++] = ' '; } } mrb_enc_associate(mrb, result, enc); break; } } PUSH(RSTRING_PTR(str), len); mrb_enc_associate(mrb, result, enc); } break; case 'd': case 'i': case 'o': case 'x': case 'X': case 'b': case 'B': case 'u': { mrb_value val = GETARG(); char fbuf[32], nbuf[64], *s; const char *prefix = 0; int sign = 0, dots = 0; char sc = 0; long v = 0, org_v = 0; int base; int len; switch (*p) { case 'd': case 'i': case 'u': sign = 1; break; case 'o': case 'x': case 'X': case 'b': case 'B': if (flags&(FPLUS|FSPACE)) sign = 1; break; } if (flags & FSHARP) { switch (*p) { case 'o': prefix = "0"; break; case 'x': prefix = "0x"; break; case 'X': prefix = "0X"; break; case 'b': prefix = "0b"; break; case 'B': prefix = "0B"; break; } } bin_retry: switch (mrb_type(val)) { case MRB_TT_FLOAT: if (FIXABLE(mrb_float(val))) { val = mrb_fixnum_value((mrb_int)mrb_float(val)); goto bin_retry; } val = mrb_flt2big(mrb, mrb_float(val)); if (FIXNUM_P(val)) goto bin_retry; break; case MRB_TT_STRING: val = mrb_str_to_inum(mrb, val, 0, TRUE); goto bin_retry; case MRB_TT_FIXNUM: v = (long)mrb_fixnum(val); break; default: val = mrb_Integer(mrb, val); goto bin_retry; } switch (*p) { case 'o': base = 8; break; case 'x': case 'X': base = 16; break; case 'b': case 'B': base = 2; break; case 'u': case 'd': case 'i': default: base = 10; break; } if (base == 2) { org_v = v; if ( v < 0 && !sign ) { val = mrb_fix2binstr(mrb, mrb_fixnum_value(v), base); dots = 1; } else { val = mrb_fix2str(mrb, mrb_fixnum_value(v), base); } v = mrb_fixnum(mrb_str_to_inum(mrb, val, 10, 0/*Qfalse*/)); } if (sign) { char c = *p; if (c == 'i') c = 'd'; /* %d and %i are identical */ if (base == 2) c = 'd'; if (v < 0) { v = -v; sc = '-'; width--; } else if (flags & FPLUS) { sc = '+'; width--; } else if (flags & FSPACE) { sc = ' '; width--; } snprintf(fbuf, sizeof(fbuf), "%%l%c", c); snprintf(nbuf, sizeof(nbuf), fbuf, v); s = nbuf; } else { char c = *p; if (c == 'X') c = 'x'; if (base == 2) c = 'd'; s = nbuf; if (v < 0) { dots = 1; } snprintf(fbuf, sizeof(fbuf), "%%l%c", c); snprintf(++s, sizeof(nbuf) - 1, fbuf, v); if (v < 0) { char d = 0; s = remove_sign_bits(s, base); switch (base) { case 16: d = 'f'; break; case 8: d = '7'; break; case 2: d = '1'; break; } if (d && *s != d) { *--s = d; } } } len = (int)strlen(s); if (dots) { prec -= 2; width -= 2; } if (*p == 'X') { char *pp = s; int c; #ifdef INCLUDE_ENCODING mrb_encoding *enc = mrb_enc_get(mrb, fmt); #endif //INCLUDE_ENCODING while ((c = (int)(unsigned char)*pp) != 0) { #ifdef INCLUDE_ENCODING *pp = mrb_enc_toupper(c, enc); #else *pp = toupper(c); #endif //INCLUDE_ENCODING pp++; } } if (prefix && !prefix[1]) { /* octal */ if (dots) { prefix = 0; } else if (len == 1 && *s == '0') { len = 0; if (flags & FPREC) prec--; } else if ((flags & FPREC) && (prec > len)) { prefix = 0; } } else if (len == 1 && *s == '0') { prefix = 0; } if (prefix) { width -= (int)strlen(prefix); } if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) { prec = width; width = 0; } else { if (prec < len) { if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0; prec = len; } width -= prec; } if (!(flags&FMINUS)) { CHECK(width); while (width-- > 0) { buf[blen++] = ' '; } } if (sc) PUSH(&sc, 1); if (prefix) { int plen = (int)strlen(prefix); PUSH(prefix, plen); } CHECK(prec - len); if (dots) PUSH("..", 2); if (v < 0 || (base == 2 && org_v < 0)) { char c = sign_bits(base, p); while (len < prec--) { buf[blen++] = c; } } else if ((flags & (FMINUS|FPREC)) != FMINUS) { char c = '0'; while (len < prec--) { buf[blen++] = c; } } PUSH(s, len); CHECK(width); while (width-- > 0) { buf[blen++] = ' '; } } break; case 'f': case 'g': case 'G': case 'e': case 'E': case 'a': case 'A': { mrb_value val = GETARG(); double fval; int i, need = 6; char fbuf[32]; fval = mrb_float(mrb_Float(mrb, val)); if (isnan(fval) || isinf(fval)) { const char *expr; if (isnan(fval)) { expr = "NaN"; } else { expr = "Inf"; } need = (int)strlen(expr); if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS)) need++; if ((flags & FWIDTH) && need < width) need = width; CHECK(need + 1); snprintf(&buf[blen], need + 1, "%*s", need, ""); if (flags & FMINUS) { if (!isnan(fval) && fval < 0.0) buf[blen++] = '-'; else if (flags & FPLUS) buf[blen++] = '+'; else if (flags & FSPACE) blen++; memcpy(&buf[blen], expr, strlen(expr)); } else { if (!isnan(fval) && fval < 0.0) buf[blen + need - strlen(expr) - 1] = '-'; else if (flags & FPLUS) buf[blen + need - strlen(expr) - 1] = '+'; else if ((flags & FSPACE) && need > width) blen++; memcpy(&buf[blen + need - strlen(expr)], expr, strlen(expr)); } blen += strlen(&buf[blen]); break; } fmt_setup(fbuf, sizeof(fbuf), *p, flags, width, prec); need = 0; if (*p != 'e' && *p != 'E') { i = INT_MIN; frexp(fval, &i); if (i > 0) need = BIT_DIGITS(i); } need += (flags&FPREC) ? prec : 6; if ((flags&FWIDTH) && need < width) need = width; need += 20; CHECK(need); snprintf(&buf[blen], need, fbuf, fval); blen += strlen(&buf[blen]); } break; } flags = FNONE; } sprint_exit: /* XXX - We cannot validate the number of arguments if (digit)$ style used. */ if (posarg >= 0 && nextarg < argc) { const char *mesg = "too many arguments for format string"; if (RTEST(ruby_debug)) mrb_raise(mrb, E_ARGUMENT_ERROR, "%s", mesg); if (RTEST(ruby_verbose)) mrb_warn("%s", mesg); } mrb_str_resize(mrb, result, blen); return result; }
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt) { enum {default_float_precision = 6}; rb_encoding *enc; const char *p, *end; char *buf; long blen, bsiz; VALUE result; long scanned = 0; int coderange = ENC_CODERANGE_7BIT; int width, prec, flags = FNONE; int nextarg = 1; int posarg = 0; int tainted = 0; VALUE nextvalue; VALUE tmp; VALUE str; volatile VALUE hash = Qundef; #define CHECK_FOR_WIDTH(f) \ if ((f) & FWIDTH) { \ rb_raise(rb_eArgError, "width given twice"); \ } \ if ((f) & FPREC0) { \ rb_raise(rb_eArgError, "width after precision"); \ } #define CHECK_FOR_FLAGS(f) \ if ((f) & FWIDTH) { \ rb_raise(rb_eArgError, "flag after width"); \ } \ if ((f) & FPREC0) { \ rb_raise(rb_eArgError, "flag after precision"); \ } ++argc; --argv; if (OBJ_TAINTED(fmt)) tainted = 1; StringValue(fmt); enc = rb_enc_get(fmt); fmt = rb_str_new4(fmt); p = RSTRING_PTR(fmt); end = p + RSTRING_LEN(fmt); blen = 0; bsiz = 120; result = rb_str_buf_new(bsiz); rb_enc_copy(result, fmt); buf = RSTRING_PTR(result); memset(buf, 0, bsiz); ENC_CODERANGE_SET(result, coderange); for (; p < end; p++) { const char *t; int n; VALUE sym = Qnil; for (t = p; t < end && *t != '%'; t++) ; PUSH(p, t - p); if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) { scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &coderange); ENC_CODERANGE_SET(result, coderange); } if (t >= end) { /* end of fmt string */ goto sprint_exit; } p = t + 1; /* skip `%' */ width = prec = -1; nextvalue = Qundef; retry: switch (*p) { default: if (rb_enc_isprint(*p, enc)) rb_raise(rb_eArgError, "malformed format string - %%%c", *p); else rb_raise(rb_eArgError, "malformed format string"); break; case ' ': CHECK_FOR_FLAGS(flags); flags |= FSPACE; p++; goto retry; case '#': CHECK_FOR_FLAGS(flags); flags |= FSHARP; p++; goto retry; case '+': CHECK_FOR_FLAGS(flags); flags |= FPLUS; p++; goto retry; case '-': CHECK_FOR_FLAGS(flags); flags |= FMINUS; p++; goto retry; case '0': CHECK_FOR_FLAGS(flags); flags |= FZERO; p++; goto retry; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': n = 0; GETNUM(n, width); if (*p == '$') { if (nextvalue != Qundef) { rb_raise(rb_eArgError, "value given twice - %d$", n); } nextvalue = GETPOSARG(n); p++; goto retry; } CHECK_FOR_WIDTH(flags); width = n; flags |= FWIDTH; goto retry; case '<': case '{': { const char *start = p; char term = (*p == '<') ? '>' : '}'; int len; for (; p < end && *p != term; ) { p += rb_enc_mbclen(p, end, enc); } if (p >= end) { rb_raise(rb_eArgError, "malformed name - unmatched parenthesis"); } #if SIZEOF_INT < SIZEOF_SIZE_T if ((size_t)(p - start) >= INT_MAX) { const int message_limit = 20; len = (int)(rb_enc_right_char_head(start, start + message_limit, p, enc) - start); rb_enc_raise(enc, rb_eArgError, "too long name (%"PRIdSIZE" bytes) - %.*s...%c", (size_t)(p - start - 2), len, start, term); } #endif len = (int)(p - start + 1); /* including parenthesis */ if (sym != Qnil) { rb_enc_raise(enc, rb_eArgError, "named%.*s after <%"PRIsVALUE">", len, start, rb_sym2str(sym)); } CHECKNAMEARG(start, len, enc); get_hash(&hash, argc, argv); sym = rb_check_symbol_cstr(start + 1, len - 2 /* without parenthesis */, enc); if (sym != Qnil) nextvalue = rb_hash_lookup2(hash, sym, Qundef); if (nextvalue == Qundef) { rb_enc_raise(enc, rb_eKeyError, "key%.*s not found", len, start); } if (term == '}') goto format_s; p++; goto retry; } case '*': CHECK_FOR_WIDTH(flags); flags |= FWIDTH; GETASTER(width); if (width < 0) { flags |= FMINUS; width = -width; } p++; goto retry; case '.': if (flags & FPREC0) { rb_raise(rb_eArgError, "precision given twice"); } flags |= FPREC|FPREC0; prec = 0; p++; if (*p == '*') { GETASTER(prec); if (prec < 0) { /* ignore negative precision */ flags &= ~FPREC; } p++; goto retry; } GETNUM(prec, precision); goto retry; case '\n': case '\0': p--; case '%': if (flags != FNONE) { rb_raise(rb_eArgError, "invalid format character - %%"); } PUSH("%", 1); break; case 'c': { VALUE val = GETARG(); VALUE tmp; unsigned int c; int n; tmp = rb_check_string_type(val); if (!NIL_P(tmp)) { if (rb_enc_strlen(RSTRING_PTR(tmp),RSTRING_END(tmp),enc) != 1) { rb_raise(rb_eArgError, "%%c requires a character"); } c = rb_enc_codepoint_len(RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc); RB_GC_GUARD(tmp); } else { c = NUM2INT(val); n = rb_enc_codelen(c, enc); } if (n <= 0) { rb_raise(rb_eArgError, "invalid character"); } if (!(flags & FWIDTH)) { CHECK(n); rb_enc_mbcput(c, &buf[blen], enc); blen += n; } else if ((flags & FMINUS)) { CHECK(n); rb_enc_mbcput(c, &buf[blen], enc); blen += n; FILL(' ', width-1); } else { FILL(' ', width-1); CHECK(n); rb_enc_mbcput(c, &buf[blen], enc); blen += n; } } break; case 's': case 'p': format_s: { VALUE arg = GETARG(); long len, slen; if (*p == 'p') arg = rb_inspect(arg); str = rb_obj_as_string(arg); if (OBJ_TAINTED(str)) tainted = 1; len = RSTRING_LEN(str); rb_str_set_len(result, blen); if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) { int cr = coderange; scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &cr); ENC_CODERANGE_SET(result, (cr == ENC_CODERANGE_UNKNOWN ? ENC_CODERANGE_BROKEN : (coderange = cr))); } enc = rb_enc_check(result, str); if (flags&(FPREC|FWIDTH)) { slen = rb_enc_strlen(RSTRING_PTR(str),RSTRING_END(str),enc); if (slen < 0) { rb_raise(rb_eArgError, "invalid mbstring sequence"); } if ((flags&FPREC) && (prec < slen)) { char *p = rb_enc_nth(RSTRING_PTR(str), RSTRING_END(str), prec, enc); slen = prec; len = p - RSTRING_PTR(str); } /* need to adjust multi-byte string pos */ if ((flags&FWIDTH) && (width > slen)) { width -= (int)slen; if (!(flags&FMINUS)) { CHECK(width); while (width--) { buf[blen++] = ' '; } } CHECK(len); memcpy(&buf[blen], RSTRING_PTR(str), len); RB_GC_GUARD(str); blen += len; if (flags&FMINUS) { CHECK(width); while (width--) { buf[blen++] = ' '; } } rb_enc_associate(result, enc); break; } } PUSH(RSTRING_PTR(str), len); RB_GC_GUARD(str); rb_enc_associate(result, enc); } break; case 'd': case 'i': case 'o': case 'x': case 'X': case 'b': case 'B': case 'u': { volatile VALUE val = GETARG(); int valsign; char nbuf[64], *s; const char *prefix = 0; int sign = 0, dots = 0; char sc = 0; long v = 0; int base, bignum = 0; int len; switch (*p) { case 'd': case 'i': case 'u': sign = 1; break; case 'o': case 'x': case 'X': case 'b': case 'B': if (flags&(FPLUS|FSPACE)) sign = 1; break; } if (flags & FSHARP) { switch (*p) { case 'o': prefix = "0"; break; case 'x': prefix = "0x"; break; case 'X': prefix = "0X"; break; case 'b': prefix = "0b"; break; case 'B': prefix = "0B"; break; } } bin_retry: switch (TYPE(val)) { case T_FLOAT: if (FIXABLE(RFLOAT_VALUE(val))) { val = LONG2FIX((long)RFLOAT_VALUE(val)); goto bin_retry; } val = rb_dbl2big(RFLOAT_VALUE(val)); if (FIXNUM_P(val)) goto bin_retry; bignum = 1; break; case T_STRING: val = rb_str_to_inum(val, 0, TRUE); goto bin_retry; case T_BIGNUM: bignum = 1; break; case T_FIXNUM: v = FIX2LONG(val); break; default: val = rb_Integer(val); goto bin_retry; } switch (*p) { case 'o': base = 8; break; case 'x': case 'X': base = 16; break; case 'b': case 'B': base = 2; break; case 'u': case 'd': case 'i': default: base = 10; break; } if (base != 10) { int numbits = ffs(base)-1; size_t abs_nlz_bits; size_t numdigits = rb_absint_numwords(val, numbits, &abs_nlz_bits); long i; if (INT_MAX-1 < numdigits) /* INT_MAX is used because rb_long2int is used later. */ rb_raise(rb_eArgError, "size too big"); if (sign) { if (numdigits == 0) numdigits = 1; tmp = rb_str_new(NULL, numdigits); valsign = rb_integer_pack(val, RSTRING_PTR(tmp), RSTRING_LEN(tmp), 1, CHAR_BIT-numbits, INTEGER_PACK_BIG_ENDIAN); for (i = 0; i < RSTRING_LEN(tmp); i++) RSTRING_PTR(tmp)[i] = ruby_digitmap[((unsigned char *)RSTRING_PTR(tmp))[i]]; s = RSTRING_PTR(tmp); if (valsign < 0) { sc = '-'; width--; } else if (flags & FPLUS) { sc = '+'; width--; } else if (flags & FSPACE) { sc = ' '; width--; } } else { /* Following conditional "numdigits++" guarantees the * most significant digit as * - '1'(bin), '7'(oct) or 'f'(hex) for negative numbers * - '0' for zero * - not '0' for positive numbers. * * It also guarantees the most significant two * digits will not be '11'(bin), '77'(oct), 'ff'(hex) * or '00'. */ if (numdigits == 0 || ((abs_nlz_bits != (size_t)(numbits-1) || !rb_absint_singlebit_p(val)) && (!bignum ? v < 0 : BIGNUM_NEGATIVE_P(val)))) numdigits++; tmp = rb_str_new(NULL, numdigits); valsign = rb_integer_pack(val, RSTRING_PTR(tmp), RSTRING_LEN(tmp), 1, CHAR_BIT-numbits, INTEGER_PACK_2COMP | INTEGER_PACK_BIG_ENDIAN); for (i = 0; i < RSTRING_LEN(tmp); i++) RSTRING_PTR(tmp)[i] = ruby_digitmap[((unsigned char *)RSTRING_PTR(tmp))[i]]; s = RSTRING_PTR(tmp); dots = valsign < 0; } len = rb_long2int(RSTRING_END(tmp) - s); } else if (!bignum) { valsign = 1; if (v < 0) { v = -v; sc = '-'; width--; valsign = -1; } else if (flags & FPLUS) { sc = '+'; width--; } else if (flags & FSPACE) { sc = ' '; width--; } snprintf(nbuf, sizeof(nbuf), "%ld", v); s = nbuf; len = (int)strlen(s); } else { tmp = rb_big2str(val, 10); s = RSTRING_PTR(tmp); valsign = 1; if (s[0] == '-') { s++; sc = '-'; width--; valsign = -1; } else if (flags & FPLUS) { sc = '+'; width--; } else if (flags & FSPACE) { sc = ' '; width--; } len = rb_long2int(RSTRING_END(tmp) - s); } if (dots) { prec -= 2; width -= 2; } if (*p == 'X') { char *pp = s; int c; while ((c = (int)(unsigned char)*pp) != 0) { *pp = rb_enc_toupper(c, enc); pp++; } } if (prefix && !prefix[1]) { /* octal */ if (dots) { prefix = 0; } else if (len == 1 && *s == '0') { len = 0; if (flags & FPREC) prec--; } else if ((flags & FPREC) && (prec > len)) { prefix = 0; } } else if (len == 1 && *s == '0') { prefix = 0; } if (prefix) { width -= (int)strlen(prefix); } if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) { prec = width; width = 0; } else { if (prec < len) { if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0; prec = len; } width -= prec; } if (!(flags&FMINUS)) { CHECK(width); while (width-- > 0) { buf[blen++] = ' '; } } if (sc) PUSH(&sc, 1); if (prefix) { int plen = (int)strlen(prefix); PUSH(prefix, plen); } CHECK(prec - len); if (dots) PUSH("..", 2); if (!sign && valsign < 0) { char c = sign_bits(base, p); while (len < prec--) { buf[blen++] = c; } } else if ((flags & (FMINUS|FPREC)) != FMINUS) { while (len < prec--) { buf[blen++] = '0'; } } PUSH(s, len); RB_GC_GUARD(tmp); CHECK(width); while (width-- > 0) { buf[blen++] = ' '; } } break; case 'f': { VALUE val = GETARG(), num, den; int sign = (flags&FPLUS) ? 1 : 0, zero = 0; long len, done = 0; int prefix = 0; if (!RB_TYPE_P(val, T_RATIONAL)) { nextvalue = val; goto float_value; } if (!(flags&FPREC)) prec = default_float_precision; den = rb_rational_den(val); num = rb_rational_num(val); if (FIXNUM_P(num)) { if ((SIGNED_VALUE)num < 0) { long n = -FIX2LONG(num); num = LONG2FIX(n); sign = -1; } } else if (rb_num_negative_p(num)) { sign = -1; num = rb_funcallv(num, idUMinus, 0, 0); } if (den != INT2FIX(1) || prec > 1) { const ID idDiv = rb_intern("div"); VALUE p10 = rb_int_positive_pow(10, prec); VALUE den_2 = rb_funcall(den, idDiv, 1, INT2FIX(2)); num = rb_funcallv(num, '*', 1, &p10); num = rb_funcallv(num, '+', 1, &den_2); num = rb_funcallv(num, idDiv, 1, &den); } else if (prec >= 0) { zero = prec; } val = rb_obj_as_string(num); len = RSTRING_LEN(val) + zero; if (prec >= len) ++len; /* integer part 0 */ if (sign || (flags&FSPACE)) ++len; if (prec > 0) ++len; /* period */ CHECK(len > width ? len : width); if (sign || (flags&FSPACE)) { buf[blen++] = sign > 0 ? '+' : sign < 0 ? '-' : ' '; prefix++; done++; } len = RSTRING_LEN(val) + zero; t = RSTRING_PTR(val); if (len > prec) { memcpy(&buf[blen], t, len - prec); blen += len - prec; done += len - prec; } else { buf[blen++] = '0'; done++; } if (prec > 0) { buf[blen++] = '.'; done++; } if (zero) { FILL('0', zero); done += zero; } else if (prec > len) { FILL('0', prec - len); memcpy(&buf[blen], t, len); blen += len; done += prec; } else if (prec > 0) { memcpy(&buf[blen], t + len - prec, prec); blen += prec; done += prec; } if ((flags & FWIDTH) && width > done) { int fill = ' '; long shifting = 0; if (!(flags&FMINUS)) { shifting = done; if (flags&FZERO) { shifting -= prefix; fill = '0'; } blen -= shifting; memmove(&buf[blen + width - done], &buf[blen], shifting); } FILL(fill, width - done); blen += shifting; } RB_GC_GUARD(val); break; } case 'g': case 'G': case 'e': case 'E': /* TODO: rational support */ case 'a': case 'A': float_value: { VALUE val = GETARG(); double fval; int i, need; char fbuf[32]; fval = RFLOAT_VALUE(rb_Float(val)); if (isnan(fval) || isinf(fval)) { const char *expr; if (isnan(fval)) { expr = "NaN"; } else { expr = "Inf"; } need = (int)strlen(expr); if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS)) need++; if ((flags & FWIDTH) && need < width) need = width; CHECK(need + 1); snprintf(&buf[blen], need + 1, "%*s", need, ""); if (flags & FMINUS) { if (!isnan(fval) && fval < 0.0) buf[blen++] = '-'; else if (flags & FPLUS) buf[blen++] = '+'; else if (flags & FSPACE) blen++; memcpy(&buf[blen], expr, strlen(expr)); } else { if (!isnan(fval) && fval < 0.0) buf[blen + need - strlen(expr) - 1] = '-'; else if (flags & FPLUS) buf[blen + need - strlen(expr) - 1] = '+'; else if ((flags & FSPACE) && need > width) blen++; memcpy(&buf[blen + need - strlen(expr)], expr, strlen(expr)); } blen += strlen(&buf[blen]); break; } fmt_setup(fbuf, sizeof(fbuf), *p, flags, width, prec); need = 0; if (*p != 'e' && *p != 'E') { i = INT_MIN; frexp(fval, &i); if (i > 0) need = BIT_DIGITS(i); } need += (flags&FPREC) ? prec : default_float_precision; if ((flags&FWIDTH) && need < width) need = width; need += 20; CHECK(need); snprintf(&buf[blen], need, fbuf, fval); blen += strlen(&buf[blen]); } break; } flags = FNONE; } sprint_exit: RB_GC_GUARD(fmt); /* XXX - We cannot validate the number of arguments if (digit)$ style used. */ if (posarg >= 0 && nextarg < argc) { const char *mesg = "too many arguments for format string"; if (RTEST(ruby_debug)) rb_raise(rb_eArgError, "%s", mesg); if (RTEST(ruby_verbose)) rb_warn("%s", mesg); } rb_str_resize(result, blen); if (tainted) OBJ_TAINT(result); return result; }
/** * @param str the string to be scrubbed * @param repl the replacement character * @return If given string is invalid, returns a new string. Otherwise, returns Qnil. */ static VALUE str_scrub0(int argc, VALUE *argv, VALUE str) { int cr = ENC_CODERANGE(str); rb_encoding *enc; int encidx; VALUE repl; if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID) return Qnil; enc = STR_ENC_GET(str); rb_scan_args(argc, argv, "01", &repl); if (argc != 0) { repl = str_compat_and_valid(repl, enc); } if (rb_enc_dummy_p(enc)) { return Qnil; } encidx = rb_enc_to_index(enc); #define DEFAULT_REPLACE_CHAR(str) do { \ static const char replace[sizeof(str)-1] = str; \ rep = replace; replen = (int)sizeof(replace); \ } while (0) if (rb_enc_asciicompat(enc)) { const char *p = RSTRING_PTR(str); const char *e = RSTRING_END(str); const char *p1 = p; const char *rep; long replen; int rep7bit_p; VALUE buf = Qnil; if (rb_block_given_p()) { rep = NULL; replen = 0; rep7bit_p = FALSE; } else if (!NIL_P(repl)) { rep = RSTRING_PTR(repl); replen = RSTRING_LEN(repl); rep7bit_p = (ENC_CODERANGE(repl) == ENC_CODERANGE_7BIT); } else if (encidx == rb_utf8_encindex()) { DEFAULT_REPLACE_CHAR("\xEF\xBF\xBD"); rep7bit_p = FALSE; } else { DEFAULT_REPLACE_CHAR("?"); rep7bit_p = TRUE; } cr = ENC_CODERANGE_7BIT; p = search_nonascii(p, e); if (!p) { p = e; } while (p < e) { int ret = rb_enc_precise_mbclen(p, e, enc); if (MBCLEN_NEEDMORE_P(ret)) { break; } else if (MBCLEN_CHARFOUND_P(ret)) { cr = ENC_CODERANGE_VALID; p += MBCLEN_CHARFOUND_LEN(ret); } else if (MBCLEN_INVALID_P(ret)) { /* * p1~p: valid ascii/multibyte chars * p ~e: invalid bytes + unknown bytes */ long clen = rb_enc_mbmaxlen(enc); if (NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str)); if (p > p1) { rb_str_buf_cat(buf, p1, p - p1); } if (e - p < clen) clen = e - p; if (clen <= 2) { clen = 1; } else { const char *q = p; clen--; for (; clen > 1; clen--) { ret = rb_enc_precise_mbclen(q, q + clen, enc); if (MBCLEN_NEEDMORE_P(ret)) break; if (MBCLEN_INVALID_P(ret)) continue; UNREACHABLE; } } if (rep) { rb_str_buf_cat(buf, rep, replen); if (!rep7bit_p) cr = ENC_CODERANGE_VALID; } else { repl = rb_yield(rb_enc_str_new(p, clen, enc)); repl = str_compat_and_valid(repl, enc); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID) cr = ENC_CODERANGE_VALID; } p += clen; p1 = p; p = search_nonascii(p, e); if (!p) { p = e; break; } } else { UNREACHABLE; } } if (NIL_P(buf)) { if (p == e) { ENC_CODERANGE_SET(str, cr); return Qnil; } buf = rb_str_buf_new(RSTRING_LEN(str)); } if (p1 < p) { rb_str_buf_cat(buf, p1, p - p1); } if (p < e) { if (rep) { rb_str_buf_cat(buf, rep, replen); if (!rep7bit_p) cr = ENC_CODERANGE_VALID; } else { repl = rb_yield(rb_enc_str_new(p, e-p, enc)); repl = str_compat_and_valid(repl, enc); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID) cr = ENC_CODERANGE_VALID; } } ENCODING_CODERANGE_SET(buf, rb_enc_to_index(enc), cr); return buf; } else { /* ASCII incompatible */ const char *p = RSTRING_PTR(str); const char *e = RSTRING_END(str); const char *p1 = p; VALUE buf = Qnil; const char *rep; long replen; long mbminlen = rb_enc_mbminlen(enc); if (!NIL_P(repl)) { rep = RSTRING_PTR(repl); replen = RSTRING_LEN(repl); } else if (!strcasecmp(rb_enc_name(enc), "UTF-16BE")) { DEFAULT_REPLACE_CHAR("\xFF\xFD"); } else if (!strcasecmp(rb_enc_name(enc), "UTF-16LE")) { DEFAULT_REPLACE_CHAR("\xFD\xFF"); } else if (!strcasecmp(rb_enc_name(enc), "UTF-32BE")) { DEFAULT_REPLACE_CHAR("\x00\x00\xFF\xFD"); } else if (!strcasecmp(rb_enc_name(enc), "UTF-32lE")) { DEFAULT_REPLACE_CHAR("\xFD\xFF\x00\x00"); } else { DEFAULT_REPLACE_CHAR("?"); } while (p < e) { int ret = rb_enc_precise_mbclen(p, e, enc); if (MBCLEN_NEEDMORE_P(ret)) { break; } else if (MBCLEN_CHARFOUND_P(ret)) { p += MBCLEN_CHARFOUND_LEN(ret); } else if (MBCLEN_INVALID_P(ret)) { const char *q = p; long clen = rb_enc_mbmaxlen(enc); if (NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str)); if (p > p1) rb_str_buf_cat(buf, p1, p - p1); if (e - p < clen) clen = e - p; if (clen <= mbminlen * 2) { clen = mbminlen; } else { clen -= mbminlen; for (; clen > mbminlen; clen-=mbminlen) { ret = rb_enc_precise_mbclen(q, q + clen, enc); if (MBCLEN_NEEDMORE_P(ret)) break; if (MBCLEN_INVALID_P(ret)) continue; UNREACHABLE; } } if (rep) { rb_str_buf_cat(buf, rep, replen); } else { repl = rb_yield(rb_enc_str_new(p, e-p, enc)); repl = str_compat_and_valid(repl, enc); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); } p += clen; p1 = p; } else { UNREACHABLE; } } if (NIL_P(buf)) { if (p == e) { ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID); return Qnil; } buf = rb_str_buf_new(RSTRING_LEN(str)); } if (p1 < p) { rb_str_buf_cat(buf, p1, p - p1); } if (p < e) { if (rep) { rb_str_buf_cat(buf, rep, replen); } else { repl = rb_yield(rb_enc_str_new(p, e-p, enc)); repl = str_compat_and_valid(repl, enc); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); } } ENCODING_CODERANGE_SET(buf, rb_enc_to_index(enc), ENC_CODERANGE_VALID); return buf; } }
/********************************************************* * main *********************************************************/ static mrb_value mrb_value_to_string(mrb_state* mrb, mrb_value value) { mrb_value str; if (mrb_nil_p(value)) { return mrb_str_new_cstr(mrb, "null"); } switch (mrb_type(value)) { case MRB_TT_FIXNUM: case MRB_TT_FLOAT: case MRB_TT_TRUE: case MRB_TT_FALSE: case MRB_TT_UNDEF: str = mrb_funcall(mrb, value, "to_s", 0, NULL); break; case MRB_TT_SYMBOL: value = mrb_funcall(mrb, value, "to_s", 0, NULL); /* FALLTHROUGH */ case MRB_TT_STRING: { int ai = mrb_gc_arena_save(mrb); char* ptr = RSTRING_PTR(value); char* end = RSTRING_END(value); str = mrb_str_new_cstr(mrb, "\""); while (ptr < end && *ptr) { switch (*ptr) { case '\\': str = mrb_str_cat_cstr(mrb, str, "\\\\"); break; case '"': str = mrb_str_cat_cstr(mrb, str, "\\\""); break; case '\b': str = mrb_str_cat_cstr(mrb, str, "\\b"); break; case '\f': str = mrb_str_cat_cstr(mrb, str, "\\f"); break; case '\n': str = mrb_str_cat_cstr(mrb, str, "\\n"); break; case '\r': str = mrb_str_cat_cstr(mrb, str, "\\r"); break; case '\t': str = mrb_str_cat_cstr(mrb, str, "\\t"); break; default: // TODO: handle unicode str = mrb_str_cat(mrb, str, ptr, 1); } ptr++; } mrb_str_cat_cstr(mrb, str, "\""); mrb_gc_arena_restore(mrb, ai); } break; case MRB_TT_HASH: { mrb_value keys; int n, l; str = mrb_str_new_cstr(mrb, "{"); keys = mrb_hash_keys(mrb, value); l = RARRAY_LEN(keys); for (n = 0; n < l; n++) { mrb_value obj; int ai = mrb_gc_arena_save(mrb); mrb_value key = mrb_ary_entry(keys, n); mrb_value enckey = mrb_funcall(mrb, key, "to_s", 0, NULL); enckey = mrb_funcall(mrb, enckey, "inspect", 0, NULL); mrb_str_concat(mrb, str, enckey); mrb_str_cat_cstr(mrb, str, ":"); obj = mrb_hash_get(mrb, value, key); mrb_str_concat(mrb, str, mrb_value_to_string(mrb, obj)); if (n != l - 1) { mrb_str_cat_cstr(mrb, str, ","); } mrb_gc_arena_restore(mrb, ai); } mrb_str_cat_cstr(mrb, str, "}"); break; } case MRB_TT_ARRAY: { int n, l; str = mrb_str_new_cstr(mrb, "["); l = RARRAY_LEN(value); for (n = 0; n < l; n++) { int ai = mrb_gc_arena_save(mrb); mrb_value obj = mrb_ary_entry(value, n); mrb_str_concat(mrb, str, mrb_value_to_string(mrb, obj)); if (n != l - 1) { mrb_str_cat_cstr(mrb, str, ","); } mrb_gc_arena_restore(mrb, ai); } mrb_str_cat_cstr(mrb, str, "]"); break; } default: mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid argument"); } return str; }
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt) { rb_encoding *enc; const char *p, *end; char *buf; long blen, bsiz; VALUE result; long scanned = 0; int coderange = ENC_CODERANGE_7BIT; int width, prec, flags = FNONE; int nextarg = 1; int posarg = 0; int tainted = 0; VALUE nextvalue; VALUE tmp; VALUE str; volatile VALUE hash = Qundef; #define CHECK_FOR_WIDTH(f) \ if ((f) & FWIDTH) { \ rb_raise(rb_eArgError, "width given twice"); \ } \ if ((f) & FPREC0) { \ rb_raise(rb_eArgError, "width after precision"); \ } #define CHECK_FOR_FLAGS(f) \ if ((f) & FWIDTH) { \ rb_raise(rb_eArgError, "flag after width"); \ } \ if ((f) & FPREC0) { \ rb_raise(rb_eArgError, "flag after precision"); \ } ++argc; --argv; if (OBJ_TAINTED(fmt)) tainted = 1; StringValue(fmt); enc = rb_enc_get(fmt); fmt = rb_str_new4(fmt); p = RSTRING_PTR(fmt); end = p + RSTRING_LEN(fmt); blen = 0; bsiz = 120; result = rb_str_buf_new(bsiz); rb_enc_copy(result, fmt); buf = RSTRING_PTR(result); memset(buf, 0, bsiz); ENC_CODERANGE_SET(result, coderange); for (; p < end; p++) { const char *t; int n; ID id = 0; for (t = p; t < end && *t != '%'; t++) ; PUSH(p, t - p); if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) { scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &coderange); ENC_CODERANGE_SET(result, coderange); } if (t >= end) { /* end of fmt string */ goto sprint_exit; } p = t + 1; /* skip `%' */ width = prec = -1; nextvalue = Qundef; retry: switch (*p) { default: if (rb_enc_isprint(*p, enc)) rb_raise(rb_eArgError, "malformed format string - %%%c", *p); else rb_raise(rb_eArgError, "malformed format string"); break; case ' ': CHECK_FOR_FLAGS(flags); flags |= FSPACE; p++; goto retry; case '#': CHECK_FOR_FLAGS(flags); flags |= FSHARP; p++; goto retry; case '+': CHECK_FOR_FLAGS(flags); flags |= FPLUS; p++; goto retry; case '-': CHECK_FOR_FLAGS(flags); flags |= FMINUS; p++; goto retry; case '0': CHECK_FOR_FLAGS(flags); flags |= FZERO; p++; goto retry; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': n = 0; GETNUM(n, width); if (*p == '$') { if (nextvalue != Qundef) { rb_raise(rb_eArgError, "value given twice - %d$", n); } nextvalue = GETPOSARG(n); p++; goto retry; } CHECK_FOR_WIDTH(flags); width = n; flags |= FWIDTH; goto retry; case '<': case '{': { const char *start = p; char term = (*p == '<') ? '>' : '}'; int len; for (; p < end && *p != term; ) { p += rb_enc_mbclen(p, end, enc); } if (p >= end) { rb_raise(rb_eArgError, "malformed name - unmatched parenthesis"); } #if SIZEOF_INT < SIZEOF_SIZE_T if ((size_t)(p - start) >= INT_MAX) { const int message_limit = 20; len = (int)(rb_enc_right_char_head(start, start + message_limit, p, enc) - start); rb_enc_raise(enc, rb_eArgError, "too long name (%"PRIdSIZE" bytes) - %.*s...%c", (size_t)(p - start - 2), len, start, term); } #endif len = (int)(p - start + 1); /* including parenthesis */ if (id) { rb_enc_raise(enc, rb_eArgError, "named%.*s after <%s>", len, start, rb_id2name(id)); } nextvalue = GETNAMEARG((id = rb_check_id_cstr(start + 1, len - 2 /* without parenthesis */, enc), ID2SYM(id)), start, len, enc); if (nextvalue == Qundef) { rb_enc_raise(enc, rb_eKeyError, "key%.*s not found", len, start); } if (term == '}') goto format_s; p++; goto retry; } case '*': CHECK_FOR_WIDTH(flags); flags |= FWIDTH; GETASTER(width); if (width < 0) { flags |= FMINUS; width = -width; } p++; goto retry; case '.': if (flags & FPREC0) { rb_raise(rb_eArgError, "precision given twice"); } flags |= FPREC|FPREC0; prec = 0; p++; if (*p == '*') { GETASTER(prec); if (prec < 0) { /* ignore negative precision */ flags &= ~FPREC; } p++; goto retry; } GETNUM(prec, precision); goto retry; case '\n': case '\0': p--; case '%': if (flags != FNONE) { rb_raise(rb_eArgError, "invalid format character - %%"); } PUSH("%", 1); break; case 'c': { VALUE val = GETARG(); VALUE tmp; unsigned int c; int n; tmp = rb_check_string_type(val); if (!NIL_P(tmp)) { if (rb_enc_strlen(RSTRING_PTR(tmp),RSTRING_END(tmp),enc) != 1) { rb_raise(rb_eArgError, "%%c requires a character"); } c = rb_enc_codepoint_len(RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc); RB_GC_GUARD(tmp); } else { c = NUM2INT(val); n = rb_enc_codelen(c, enc); } if (n <= 0) { rb_raise(rb_eArgError, "invalid character"); } if (!(flags & FWIDTH)) { CHECK(n); rb_enc_mbcput(c, &buf[blen], enc); blen += n; } else if ((flags & FMINUS)) { CHECK(n); rb_enc_mbcput(c, &buf[blen], enc); blen += n; FILL(' ', width-1); } else { FILL(' ', width-1); CHECK(n); rb_enc_mbcput(c, &buf[blen], enc); blen += n; } } break; case 's': case 'p': format_s: { VALUE arg = GETARG(); long len, slen; if (*p == 'p') arg = rb_inspect(arg); str = rb_obj_as_string(arg); if (OBJ_TAINTED(str)) tainted = 1; len = RSTRING_LEN(str); rb_str_set_len(result, blen); if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) { int cr = coderange; scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &cr); ENC_CODERANGE_SET(result, (cr == ENC_CODERANGE_UNKNOWN ? ENC_CODERANGE_BROKEN : (coderange = cr))); } enc = rb_enc_check(result, str); if (flags&(FPREC|FWIDTH)) { slen = rb_enc_strlen(RSTRING_PTR(str),RSTRING_END(str),enc); if (slen < 0) { rb_raise(rb_eArgError, "invalid mbstring sequence"); } if ((flags&FPREC) && (prec < slen)) { char *p = rb_enc_nth(RSTRING_PTR(str), RSTRING_END(str), prec, enc); slen = prec; len = p - RSTRING_PTR(str); } /* need to adjust multi-byte string pos */ if ((flags&FWIDTH) && (width > slen)) { width -= (int)slen; if (!(flags&FMINUS)) { CHECK(width); while (width--) { buf[blen++] = ' '; } } CHECK(len); memcpy(&buf[blen], RSTRING_PTR(str), len); RB_GC_GUARD(str); blen += len; if (flags&FMINUS) { CHECK(width); while (width--) { buf[blen++] = ' '; } } rb_enc_associate(result, enc); break; } } PUSH(RSTRING_PTR(str), len); RB_GC_GUARD(str); rb_enc_associate(result, enc); } break; case 'd': case 'i': case 'o': case 'x': case 'X': case 'b': case 'B': case 'u': { volatile VALUE val = GETARG(); char fbuf[32], nbuf[64], *s; const char *prefix = 0; int sign = 0, dots = 0; char sc = 0; long v = 0; int base, bignum = 0; int len; switch (*p) { case 'd': case 'i': case 'u': sign = 1; break; case 'o': case 'x': case 'X': case 'b': case 'B': if (flags&(FPLUS|FSPACE)) sign = 1; break; } if (flags & FSHARP) { switch (*p) { case 'o': prefix = "0"; break; case 'x': prefix = "0x"; break; case 'X': prefix = "0X"; break; case 'b': prefix = "0b"; break; case 'B': prefix = "0B"; break; } } bin_retry: switch (TYPE(val)) { case T_FLOAT: if (FIXABLE(RFLOAT_VALUE(val))) { val = LONG2FIX((long)RFLOAT_VALUE(val)); goto bin_retry; } val = rb_dbl2big(RFLOAT_VALUE(val)); if (FIXNUM_P(val)) goto bin_retry; bignum = 1; break; case T_STRING: val = rb_str_to_inum(val, 0, TRUE); goto bin_retry; case T_BIGNUM: bignum = 1; break; case T_FIXNUM: v = FIX2LONG(val); break; default: val = rb_Integer(val); goto bin_retry; } switch (*p) { case 'o': base = 8; break; case 'x': case 'X': base = 16; break; case 'b': case 'B': base = 2; break; case 'u': case 'd': case 'i': default: base = 10; break; } if (!bignum) { if (base == 2) { val = rb_int2big(v); goto bin_retry; } if (sign) { char c = *p; if (c == 'i') c = 'd'; /* %d and %i are identical */ if (v < 0) { v = -v; sc = '-'; width--; } else if (flags & FPLUS) { sc = '+'; width--; } else if (flags & FSPACE) { sc = ' '; width--; } snprintf(fbuf, sizeof(fbuf), "%%l%c", c); snprintf(nbuf, sizeof(nbuf), fbuf, v); s = nbuf; } else { s = nbuf; if (v < 0) { dots = 1; } snprintf(fbuf, sizeof(fbuf), "%%l%c", *p == 'X' ? 'x' : *p); snprintf(++s, sizeof(nbuf) - 1, fbuf, v); if (v < 0) { char d = 0; s = remove_sign_bits(s, base); switch (base) { case 16: d = 'f'; break; case 8: d = '7'; break; } if (d && *s != d) { *--s = d; } } } len = (int)strlen(s); } else { if (sign) { tmp = rb_big2str(val, base); s = RSTRING_PTR(tmp); if (s[0] == '-') { s++; sc = '-'; width--; } else if (flags & FPLUS) { sc = '+'; width--; } else if (flags & FSPACE) { sc = ' '; width--; } } else { if (!RBIGNUM_SIGN(val)) { val = rb_big_clone(val); rb_big_2comp(val); } tmp = rb_big2str0(val, base, RBIGNUM_SIGN(val)); s = RSTRING_PTR(tmp); if (*s == '-') { dots = 1; if (base == 10) { rb_warning("negative number for %%u specifier"); } s = remove_sign_bits(++s, base); switch (base) { case 16: if (s[0] != 'f') *--s = 'f'; break; case 8: if (s[0] != '7') *--s = '7'; break; case 2: if (s[0] != '1') *--s = '1'; break; } } } len = rb_long2int(RSTRING_END(tmp) - s); } if (dots) { prec -= 2; width -= 2; } if (*p == 'X') { char *pp = s; int c; while ((c = (int)(unsigned char)*pp) != 0) { *pp = rb_enc_toupper(c, enc); pp++; } } if (prefix && !prefix[1]) { /* octal */ if (dots) { prefix = 0; } else if (len == 1 && *s == '0') { len = 0; if (flags & FPREC) prec--; } else if ((flags & FPREC) && (prec > len)) { prefix = 0; } } else if (len == 1 && *s == '0') { prefix = 0; } if (prefix) { width -= (int)strlen(prefix); } if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) { prec = width; width = 0; } else { if (prec < len) { if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0; prec = len; } width -= prec; } if (!(flags&FMINUS)) { CHECK(width); while (width-- > 0) { buf[blen++] = ' '; } } if (sc) PUSH(&sc, 1); if (prefix) { int plen = (int)strlen(prefix); PUSH(prefix, plen); } CHECK(prec - len); if (dots) PUSH("..", 2); if (!bignum && v < 0) { char c = sign_bits(base, p); while (len < prec--) { buf[blen++] = c; } } else if ((flags & (FMINUS|FPREC)) != FMINUS) { char c; if (!sign && bignum && !RBIGNUM_SIGN(val)) c = sign_bits(base, p); else c = '0'; while (len < prec--) { buf[blen++] = c; } } PUSH(s, len); RB_GC_GUARD(tmp); CHECK(width); while (width-- > 0) { buf[blen++] = ' '; } } break; case 'f': case 'g': case 'G': case 'e': case 'E': case 'a': case 'A': { VALUE val = GETARG(); double fval; int i, need = 6; char fbuf[32]; fval = RFLOAT_VALUE(rb_Float(val)); if (isnan(fval) || isinf(fval)) { const char *expr; if (isnan(fval)) { expr = "NaN"; } else { expr = "Inf"; } need = (int)strlen(expr); if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS)) need++; if ((flags & FWIDTH) && need < width) need = width; CHECK(need + 1); snprintf(&buf[blen], need + 1, "%*s", need, ""); if (flags & FMINUS) { if (!isnan(fval) && fval < 0.0) buf[blen++] = '-'; else if (flags & FPLUS) buf[blen++] = '+'; else if (flags & FSPACE) blen++; memcpy(&buf[blen], expr, strlen(expr)); } else { if (!isnan(fval) && fval < 0.0) buf[blen + need - strlen(expr) - 1] = '-'; else if (flags & FPLUS) buf[blen + need - strlen(expr) - 1] = '+'; else if ((flags & FSPACE) && need > width) blen++; memcpy(&buf[blen + need - strlen(expr)], expr, strlen(expr)); } blen += strlen(&buf[blen]); break; } fmt_setup(fbuf, sizeof(fbuf), *p, flags, width, prec); need = 0; if (*p != 'e' && *p != 'E') { i = INT_MIN; frexp(fval, &i); if (i > 0) need = BIT_DIGITS(i); } need += (flags&FPREC) ? prec : 6; if ((flags&FWIDTH) && need < width) need = width; need += 20; CHECK(need); snprintf(&buf[blen], need, fbuf, fval); blen += strlen(&buf[blen]); } break; } flags = FNONE; } sprint_exit: RB_GC_GUARD(fmt); /* XXX - We cannot validate the number of arguments if (digit)$ style used. */ if (posarg >= 0 && nextarg < argc) { const char *mesg = "too many arguments for format string"; if (RTEST(ruby_debug)) rb_raise(rb_eArgError, "%s", mesg); if (RTEST(ruby_verbose)) rb_warn("%s", mesg); } rb_str_resize(result, blen); if (tainted) OBJ_TAINT(result); return result; }
/* @overload gsub(pattern, replacement) * * Returns the receiver with all matches of PATTERN replaced by REPLACEMENT, * inheriting any taint and untrust from the receiver and from REPLACEMENT. * * The REPLACEMENT is used as a specification for what to replace matches * with: * * <table> * <thead> * <tr><th>Specification</th><th>Replacement</th></tr> * </thead> * <tbody> * <tr> * <td><code>\1</code>, <code>\2</code>, …, <code>\</code><em>n</em></td> * <td>Numbered sub-match <em>n</em></td> * </tr> * <tr> * <td><code>\k<</code><em>name</em><code>></code></td> * <td>Named sub-match <em>name</em></td> * </tr> * </tbody> * </table> * * The Regexp special variables `$&`, `$'`, <code>$\`</code>, `$1`, `$2`, …, * `$`_n_ are updated accordingly. * * @param [Regexp, #to_str] pattern * @param [#to_str] replacement * @return [U::String] * * @overload gsub(pattern, replacements) * * Returns the receiver with all matches of PATTERN replaced by * REPLACEMENTS#[_match_], where _match_ is the matched substring, inheriting * any taint and untrust from the receiver and from the * REPLACEMENTS#[_match_]es, as well as any taint on REPLACEMENTS. * * The Regexp special variables `$&`, `$'`, <code>$\`</code>, `$1`, `$2`, …, * `$`_n_ are updated accordingly. * * @param [Regexp, #to_str] pattern * @param [#to_hash] replacements * @raise [RuntimeError] If any replacement is the result being constructed * @raise [Exception] Any error raised by REPLACEMENTS#default, if it gets * called * @return [U::String] * * @overload gsub(pattern){ |match| … } * * Returns the receiver with all matches of PATTERN replaced by the results * of the given block, inheriting any taint and untrust from the receiver and * from the results of the given block. * * The Regexp special variables `$&`, `$'`, <code>$\`</code>, `$1`, `$2`, …, * `$`_n_ are updated accordingly. * * @param [Regexp, #to_str] pattern * @yieldparam [U::String] match * @yieldreturn [#to_str] * @return [U::String] * * @overload gsub(pattern) * * Returns an Enumerator over the matches of PATTERN in the receiver. * * The Regexp special variables `$&`, `$'`, <code>$\`</code>, `$1`, `$2`, …, * `$`_n_ will be updated accordingly. * * @param [Regexp, #to_str] pattern * @return [Enumerator] */ VALUE rb_u_string_gsub(int argc, VALUE *argv, VALUE self) { VALUE pattern, replacement; VALUE replacements = Qnil; bool use_block = false; bool tainted = false; if (argc == 1) { RETURN_ENUMERATOR(self, argc, argv); use_block = true; } if (rb_scan_args(argc, argv, "11", &pattern, &replacement) == 2) { replacements = rb_check_convert_type(replacement, T_HASH, "Hash", "to_hash"); if (NIL_P(replacements)) StringValue(replacement); if (OBJ_TAINTED(replacement)) tainted = true; } pattern = rb_u_pattern_argument(pattern, true); VALUE str = rb_str_to_str(self); long begin = rb_reg_search(pattern, str, 0, 0); if (begin < 0) return self; const char *base = RSTRING_PTR(str); const char *p = base; const char *end = RSTRING_END(str); VALUE substituted = rb_u_str_buf_new(RSTRING_LEN(str) + 30); do { VALUE match = rb_backref_get(); struct re_registers *registers = RMATCH_REGS(match); VALUE result; if (use_block || !NIL_P(replacements)) { if (use_block) { VALUE ustr = rb_u_string_new_rb(rb_reg_nth_match(0, match)); result = rb_u_string_object_as_string(rb_yield(ustr)); } else { VALUE ustr = rb_u_string_new_c(self, base + registers->beg[0], registers->end[0] - registers->beg[0]); result = rb_u_string_object_as_string(rb_hash_aref(replacements, ustr)); } if (result == substituted) rb_u_raise(rb_eRuntimeError, "result of block is string being built; please try not to cheat"); } else result = #ifdef HAVE_RB_REG_REGSUB4 rb_reg_regsub(replacement, str, registers, pattern); #else rb_reg_regsub(replacement, str, registers); #endif if (OBJ_TAINTED(result)) tainted = true; const struct rb_u_string *value = RVAL2USTRING_ANY(result); rb_str_buf_cat(substituted, p, registers->beg[0] - (p - base)); rb_str_buf_cat(substituted, USTRING_STR(value), USTRING_LENGTH(value)); OBJ_INFECT(substituted, result); p = base + registers->end[0]; if (registers->beg[0] == registers->end[0]) p = u_next(p); if (p >= end) break; begin = rb_reg_search(pattern, str, registers->end[0], 0); } while (begin >= 0); if (p < end) rb_str_buf_cat(substituted, p, end - p); rb_reg_search(pattern, str, end - p, 0); RBASIC(substituted)->klass = rb_obj_class(str); OBJ_INFECT(substituted, str); if (tainted) OBJ_TAINT(substituted); return rb_u_string_new_rb(substituted); }