Esempio n. 1
0
VALUE
rb_str_ends_with_p( VALUE str, VALUE oth)
{
    long i;
    char *s, *o;
    VALUE ost;

#ifdef HAVE_HEADER_RUBY_H
#else
    if (!rb_str_comparable( str, oth))
        return Qnil;
#endif
    ost = rb_string_value( &oth);
    i = RSTRING_LEN( ost);
    if (i > RSTRING_LEN( str))
        return Qnil;
    s = RSTRING_END( str);
    o = RSTRING_END( ost);
    for (; i; i--)
        if (*--s != *--o)
            return Qnil;
#ifdef HAVE_HEADER_RUBY_H
    return INT2FIX( RSTRING_LEN( str) - RSTRING_LEN( ost));
#else
    return INT2FIX( rb_str_strlen( str) - rb_str_strlen( ost));
#endif
}
Esempio n. 2
0
static mrb_value mrb_js_obj_get(mrb_state *mrb, mrb_value klass)
{
    mrb_value str;
    mrb_get_args(mrb, "S", &str);

    NPObject *window;
    NPN_GetValue(MRB_UD_NPP(mrb), NPNVWindowNPObject, &window);

    NPUTF8 *s = (NPUTF8 *)NPN_MemAlloc(RSTRING_LEN(str));
    std::copy(RSTRING_PTR(str), RSTRING_END(str), s);
    NPString evaluated_str = { s, RSTRING_LEN(str) };

    NPVariant result;
    std::string js_str(RSTRING_PTR(str), RSTRING_LEN(str));
    NPN_Evaluate(MRB_UD_NPP(mrb), window, &evaluated_str, &result);
    NPN_ReleaseObject(window);
    NPN_MemFree(s);

    mrb_value ret;
    if (!convert_js_to_mrb(MRB_UD_NPP(mrb), result, mrb, &ret)){
        return mrb_nil_value();
    }

    return ret;
}
Esempio n. 3
0
static int
load_encoding(const char *name)
{
    VALUE enclib = rb_sprintf("enc/%s.so", name);
    VALUE verbose = ruby_verbose;
    VALUE debug = ruby_debug;
    VALUE errinfo;
    char *s = RSTRING_PTR(enclib) + 4, *e = RSTRING_END(enclib) - 3;
    int loaded;
    int idx;

    while (s < e) {
	if (!ISALNUM(*s)) *s = '_';
	else if (ISUPPER(*s)) *s = (char)TOLOWER(*s);
	++s;
    }
    FL_UNSET(enclib, FL_TAINT);
    enclib = rb_fstring(enclib);
    ruby_verbose = Qfalse;
    ruby_debug = Qfalse;
    errinfo = rb_errinfo();
    loaded = rb_require_internal(enclib, rb_safe_level());
    ruby_verbose = verbose;
    ruby_debug = debug;
    rb_set_errinfo(errinfo);
    if (loaded < 0 || 1 < loaded) return -1;
    if ((idx = rb_enc_registered(name)) < 0) return -1;
    if (enc_autoload_p(enc_table.list[idx].enc)) return -1;
    return idx;
}
Esempio n. 4
0
File: encoding.c Progetto: 217/ruby
static int
load_encoding(const char *name)
{
    VALUE enclib = rb_sprintf("enc/%s.so", name);
    VALUE verbose = ruby_verbose;
    VALUE debug = ruby_debug;
    VALUE loaded;
    char *s = RSTRING_PTR(enclib) + 4, *e = RSTRING_END(enclib) - 3;
    int idx;

    while (s < e) {
	if (!ISALNUM(*s)) *s = '_';
	else if (ISUPPER(*s)) *s = TOLOWER(*s);
	++s;
    }
    OBJ_FREEZE(enclib);
    ruby_verbose = Qfalse;
    ruby_debug = Qfalse;
    loaded = rb_protect(require_enc, enclib, 0);
    ruby_verbose = verbose;
    ruby_debug = debug;
    rb_set_errinfo(Qnil);
    if (NIL_P(loaded)) return -1;
    if ((idx = rb_enc_registered(name)) < 0) return -1;
    if (enc_autoload_p(enc_table.list[idx].enc)) return -1;
    return idx;
}
Esempio n. 5
0
VALUE
rb_str_vcatf(VALUE str, const char *fmt, va_list ap)
{
    rb_printf_buffer_extra buffer;
#define f buffer.base
    VALUE klass;

    StringValue(str);
    rb_str_modify(str);
    f._flags = __SWR | __SSTR;
    f._bf._size = 0;
    f._w = rb_str_capacity(str);
    f._bf._base = (unsigned char *)str;
    f._p = (unsigned char *)RSTRING_END(str);
    klass = RBASIC(str)->klass;
    RBASIC_CLEAR_CLASS(str);
    f.vwrite = ruby__sfvwrite;
    f.vextra = ruby__sfvextra;
    buffer.value = 0;
    BSD_vfprintf(&f, fmt, ap);
    RBASIC_SET_CLASS_RAW(str, klass);
    rb_str_resize(str, (char *)f._p - RSTRING_PTR(str));
#undef f

    return str;
}
Esempio n. 6
0
static mrb_value
mrb_str_reverse_bang(mrb_state *mrb, mrb_value str)
{
  mrb_int utf8_len = mrb_utf8_strlen(str);
  if (utf8_len > 1) {
    mrb_int len = RSTRING_LEN(str);
    char *buf = (char *)mrb_malloc(mrb, (size_t)len);
    unsigned char* p = (unsigned char*)buf;
    unsigned char* e = (unsigned char*)buf + len;
    unsigned char* r = (unsigned char*)RSTRING_END(str);
    
    memcpy(buf, RSTRING_PTR(str), len);
    mrb_str_modify(mrb, mrb_str_ptr(str));
    
    while (p<e) {
      mrb_int clen = utf8len(p);
      r -= clen;
      memcpy(r, p, clen);
      p += clen;
    }
    mrb_free(mrb, buf);
  }
  
  return str;
}
Esempio n. 7
0
static size_t
str_format_len(mrb_value str)
{
  size_t dump_len = 0;

  char *src;

  for (src = RSTRING_PTR(str); src < RSTRING_END(str); src++) {
    switch (*src) {
    case 0x07:/* BEL */ /* fall through */
    case 0x08:/* BS  */ /* fall through */
    case 0x09:/* HT  */ /* fall through */
    case 0x0A:/* LF  */ /* fall through */
    case 0x0B:/* VT  */ /* fall through */
    case 0x0C:/* FF  */ /* fall through */
    case 0x0D:/* CR  */ /* fall through */
    case 0x22:/* "   */ /* fall through */
    case 0x27:/* '   */ /* fall through */
    case 0x3F:/* ?   */ /* fall through */
    case 0x5C:/* \   */ /* fall through */
      dump_len += 2;
      break;

    default:
      dump_len++;
      break;
    }
  }

  return dump_len;
}
Esempio n. 8
0
static char*
str_to_format(mrb_value str, char *buf)
{
  char *src;
  char *dst;

  for (src = RSTRING_PTR(str), dst = buf; src < RSTRING_END(str); src++) {
    switch (*src) {
    case 0x07:/* BEL */  *dst++ = '\\'; *dst++ = 'a'; break;
    case 0x08:/* BS  */  *dst++ = '\\'; *dst++ = 'b'; break;
    case 0x09:/* HT  */  *dst++ = '\\'; *dst++ = 't'; break;
    case 0x0A:/* LF  */  *dst++ = '\\'; *dst++ = 'n'; break;
    case 0x0B:/* VT  */  *dst++ = '\\'; *dst++ = 'v'; break;
    case 0x0C:/* FF  */  *dst++ = '\\'; *dst++ = 'f'; break;
    case 0x0D:/* CR  */  *dst++ = '\\'; *dst++ = 'r'; break;
    case 0x22:/* "   */  *dst++ = '\\'; *dst++ = '\"'; break;
    case 0x27:/* '   */  *dst++ = '\\'; *dst++ = '\''; break;
    case 0x3F:/* ?   */  *dst++ = '\\'; *dst++ = '\?'; break;
    case 0x5C:/* \   */  *dst++ = '\\'; *dst++ = '\\'; break;
    default: *dst++ = *src; break;
    }
  }

  return buf;
}
Esempio n. 9
0
bool convert_mrb_to_js_string(mrb_state *mrb, mrb_value value, NPP npp, NPVariant *result)
{
    NPUTF8 *ptr = (NPUTF8 *)NPN_MemAlloc(RSTRING_LEN(value));
    if (!ptr){
        return false;
    }

    std::copy(RSTRING_PTR(value), RSTRING_END(value), ptr);
    STRINGN_TO_NPVARIANT(ptr, RSTRING_LEN(value), *result);

    return true;
}
Esempio n. 10
0
static VALUE
rb_str_blank_as(VALUE str)
{
  rb_encoding *enc;
  char *s, *e;

  enc = STR_ENC_GET(str);
  s = RSTRING_PTR(str);
  if (!s || RSTRING_LEN(str) == 0) return Qtrue;

  e = RSTRING_END(str);
  while (s < e) {
    int n;
    unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);

    switch (cc) {
      case 9:
      case 0xa:
      case 0xb:
      case 0xc:
      case 0xd:
      case 0x20:
      case 0x85:
      case 0xa0:
      case 0x1680:
      case 0x180e:
      case 0x2000:
      case 0x2001:
      case 0x2002:
      case 0x2003:
      case 0x2004:
      case 0x2005:
      case 0x2006:
      case 0x2007:
      case 0x2008:
      case 0x2009:
      case 0x200a:
      case 0x2028:
      case 0x2029:
      case 0x202f:
      case 0x205f:
      case 0x3000:
          /* found */
          break;
      default:
          return Qfalse;
    }
    s += n;
  }
  return Qtrue;
}
Esempio n. 11
0
static VALUE
bug_str_s_cstr_noembed(VALUE self, VALUE str)
{
    VALUE str2 = rb_str_new(NULL, 0);
    long capacity = RSTRING_LEN(str) + TERM_LEN(str);
    char *buf = ALLOC_N(char, capacity);
    Check_Type(str, T_STRING);
    FL_SET((str2), STR_NOEMBED);
    memcpy(buf, RSTRING_PTR(str), capacity);
    RBASIC(str2)->flags &= ~RSTRING_EMBED_LEN_MASK;
    RSTRING(str2)->as.heap.aux.capa = capacity;
    RSTRING(str2)->as.heap.ptr = buf;
    RSTRING(str2)->as.heap.len = RSTRING_LEN(str);
    TERM_FILL(RSTRING_END(str2), TERM_LEN(str));
    return str2;
}
Esempio n. 12
0
static VALUE
rb_str_blank(VALUE str)
{
  rb_encoding *enc;
  char *s, *e;

  enc = STR_ENC_GET(str);
  s = RSTRING_PTR(str);
  if (!s || RSTRING_LEN(str) == 0) return Qtrue;

  e = RSTRING_END(str);
  while (s < e) {
    int n;
    unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);

    if (!rb_isspace(cc) && cc != 0) return Qfalse;
    s += n;
  }
  return Qtrue;
}
Esempio n. 13
0
static VALUE
str_encode_bang(int argc, VALUE *argv, VALUE str)
{
    VALUE newstr = str;
    int encidx = str_transcode(argc, argv, &newstr);
    int cr = 0;

    if (encidx < 0) return str;
    rb_str_shared_replace(str, newstr);
    rb_enc_associate_index(str, encidx);

    /* transcoded string never be broken. */
    if (rb_enc_asciicompat(rb_enc_from_index(encidx))) {
	rb_str_coderange_scan_restartable(RSTRING_PTR(str), RSTRING_END(str), 0, &cr);
    }
    else {
	cr = ENC_CODERANGE_VALID;
    }
    ENC_CODERANGE_SET(str, cr);
    return str;
}
Esempio n. 14
0
VALUE
rb_str_vcatf(VALUE str, const char *fmt, va_list ap)
{
    rb_printf_buffer f;
    VALUE klass;

    StringValue(str);
    rb_str_modify(str);
    f._flags = __SWR | __SSTR;
    f._bf._size = 0;
    f._w = rb_str_capacity(str);
    f._bf._base = (unsigned char *)str;
    f._p = (unsigned char *)RSTRING_END(str);
    klass = RBASIC(str)->klass;
    RBASIC(str)->klass = 0;
    f.vwrite = ruby__sfvwrite;
    BSD_vfprintf(&f, fmt, ap);
    RBASIC(str)->klass = klass;
    rb_str_resize(str, (char *)f._p - RSTRING_PTR(str));

    return str;
}
Esempio n. 15
0
static VALUE decode_cesu8(struct state *state, VALUE str)
{
  duk_context *ctx = state->ctx;
  VALUE res = rb_str_new(0, 0);

  const char *ptr = RSTRING_PTR(str);
  const char *end = RSTRING_END(str);
  long len;

  while (ptr < end) {
    len = (end - ptr);
    unsigned short code = utf8_to_uv(ptr, &len);
    rb_str_buf_cat(res, (char*)&code, 2);
    ptr += len;
  }

  rb_enc_associate(res, utf16enc);
  VALUE utf8res = rb_str_conv_enc(res, utf16enc, rb_utf8_encoding());
  if (utf8res == res) {
    clean_raise(ctx, rb_eEncodingError, "cannot convert JavaScript string to UTF-16");
  }

  return utf8res;
}
Esempio n. 16
0
static char*
str_to_format(mrb_value str, char *buf)
{
  char *src, *dst;

  for (src = RSTRING_PTR(str), dst = buf; src < RSTRING_END(str);) {
    switch (*src) {
    case 0x07:/* BEL */  memcpy(dst, "\\a", 2); dst+=2; src+=2; break;
    case 0x08:/* BS  */  memcpy(dst, "\\b", 2); dst+=2; src+=2; break;
    case 0x09:/* HT  */  memcpy(dst, "\\t", 2); dst+=2; src+=2; break;
    case 0x0A:/* LF  */  memcpy(dst, "\\n", 2); dst+=2; src+=2; break;
    case 0x0B:/* VT  */  memcpy(dst, "\\v", 2); dst+=2; src+=2; break;
    case 0x0C:/* FF  */  memcpy(dst, "\\f", 2); dst+=2; src+=2; break;
    case 0x0D:/* CR  */  memcpy(dst, "\\r", 2); dst+=2; src+=2; break;
    case 0x22:/* "   */  memcpy(dst, "\\\"", 2); dst+=2; src+=2; break;
    case 0x27:/* '   */  memcpy(dst, "\\\'", 2); dst+=2; src+=2; break;
    case 0x3F:/* ?   */  memcpy(dst, "\\\?", 2); dst+=2; src+=2; break;
    case 0x5C:/* \   */  memcpy(dst, "\\\\", 2); dst+=2; src+=2; break;
    default: *dst++ = *src++; break;
    }
  }

  return buf;
}
Esempio n. 17
0
mrb_value
mrb_str_format(mrb_state *mrb, int argc, const mrb_value *argv, mrb_value fmt)
{
  const char *p, *end;
  char *buf;
  long blen, bsiz;
  mrb_value result;

  int width, prec, flags = FNONE;
  int nextarg = 1;
  int posarg = 0;
  mrb_value nextvalue;
  mrb_value tmp;
  mrb_value str;
  mrb_value hash = mrb_undef_value();

#define CHECK_FOR_WIDTH(f)                                                  \
  if ((f) & FWIDTH) {                                                       \
    mrb_raise(mrb, E_ARGUMENT_ERROR, "width given twice");         \
  }                                                                         \
  if ((f) & FPREC0) {                                                       \
    mrb_raise(mrb, E_ARGUMENT_ERROR, "width after precision");     \
  }
#define CHECK_FOR_FLAGS(f)                                                  \
  if ((f) & FWIDTH) {                                                       \
    mrb_raise(mrb, E_ARGUMENT_ERROR, "flag after width");          \
  }                                                                         \
  if ((f) & FPREC0) {                                                       \
    mrb_raise(mrb, E_ARGUMENT_ERROR, "flag after precision");      \
  }

  ++argc;
  --argv;
  mrb_string_value(mrb, &fmt);
  fmt = mrb_str_new4(mrb, fmt);
  p = RSTRING_PTR(fmt);
  end = p + RSTRING_LEN(fmt);
  blen = 0;
  bsiz = 120;
  result = mrb_str_buf_new(mrb, bsiz);
  buf = RSTRING_PTR(result);
  memset(buf, 0, bsiz);

  for (; p < end; p++) {
    const char *t;
    int n;
    mrb_sym id = 0;

    for (t = p; t < end && *t != '%'; t++) ;
    PUSH(p, t - p);
    if (t >= end)
      goto sprint_exit; /* end of fmt string */

    p = t + 1;    /* skip `%' */

    width = prec = -1;
    nextvalue = mrb_undef_value();

retry:
    switch (*p) {
      default:
        mrb_raise(mrb, E_ARGUMENT_ERROR, "malformed format string - %%%c", *p);
        break;

    case ' ':
      CHECK_FOR_FLAGS(flags);
      flags |= FSPACE;
      p++;
      goto retry;

    case '#':
      CHECK_FOR_FLAGS(flags);
      flags |= FSHARP;
      p++;
      goto retry;

    case '+':
      CHECK_FOR_FLAGS(flags);
      flags |= FPLUS;
      p++;
      goto retry;

    case '-':
      CHECK_FOR_FLAGS(flags);
      flags |= FMINUS;
      p++;
      goto retry;

    case '0':
      CHECK_FOR_FLAGS(flags);
      flags |= FZERO;
      p++;
      goto retry;

    case '1': case '2': case '3': case '4':
    case '5': case '6': case '7': case '8': case '9':
      n = 0;
      GETNUM(n, width);
      if (*p == '$') {
        if (!UNDEF_P(nextvalue)) {
          mrb_raise(mrb, E_ARGUMENT_ERROR, "value given twice - %d$", n);
        }
        nextvalue = GETPOSARG(n);
        p++;
        goto retry;
      }
      CHECK_FOR_WIDTH(flags);
      width = n;
      flags |= FWIDTH;
      goto retry;

    case '<':
    case '{':
      {
        const char *start = p;
        char term = (*p == '<') ? '>' : '}';
	mrb_value symname;

        for (; p < end && *p != term; )
          p++;
        if (id) {
          mrb_raise(mrb, E_ARGUMENT_ERROR, "name%.*s after <%s>",
               (int)(p - start + 1), start, mrb_sym2name(mrb, id));
        }
        symname = mrb_str_new(mrb, start + 1, p - start - 1);
        id = mrb_intern(mrb, RSTRING_PTR(symname));
        nextvalue = GETNAMEARG(mrb_symbol_value(id), start, (int)(p - start + 1));
        if (UNDEF_P(nextvalue)) {
          mrb_raise(mrb, E_KEY_ERROR, "key%.*s not found", (int)(p - start + 1), start);
        }
        if (term == '}') goto format_s;
        p++;
        goto retry;
      }

    case '*':
      CHECK_FOR_WIDTH(flags);
      flags |= FWIDTH;
      GETASTER(width);
      if (width < 0) {
        flags |= FMINUS;
        width = -width;
      }
      p++;
      goto retry;

    case '.':
      if (flags & FPREC0) {
        mrb_raise(mrb, E_ARGUMENT_ERROR, "precision given twice");
      }
      flags |= FPREC|FPREC0;

      prec = 0;
      p++;
      if (*p == '*') {
        GETASTER(prec);
        if (prec < 0) {  /* ignore negative precision */
          flags &= ~FPREC;
        }
        p++;
        goto retry;
      }

      GETNUM(prec, precision);
      goto retry;

    case '\n':
    case '\0':
      p--;
    case '%':
      if (flags != FNONE) {
        mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid format character - %%");
      }
      PUSH("%", 1);
      break;

    case 'c':
      {
        mrb_value val = GETARG();
        mrb_value tmp;
        unsigned int c;
        int n;
#ifdef INCLUDE_ENCODING
        mrb_encoding *enc = mrb_enc_get(mrb, fmt);
#endif //INCLUDE_ENCODING

        tmp = mrb_check_string_type(mrb, val);
        if (!mrb_nil_p(tmp)) {
          if (RSTRING_LEN(tmp) != 1 ) {
            mrb_raise(mrb, E_ARGUMENT_ERROR, "%%c requires a character");
          }
#ifdef INCLUDE_ENCODING
          c = mrb_enc_codepoint_len(mrb, RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc);
#else
          c = RSTRING_PTR(tmp)[0];
          n = 1;
#endif //INCLUDE_ENCODING
        }
        else {
          c = mrb_fixnum(val);
          n = mrb_enc_codelen(mrb, c, enc);
        }
        if (n <= 0) {
          mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid character");
        }
        if (!(flags & FWIDTH)) {
          CHECK(n);
          mrb_enc_mbcput(c, &buf[blen], enc);
          blen += n;
        }
        else if ((flags & FMINUS)) {
          CHECK(n);
          mrb_enc_mbcput(c, &buf[blen], enc);
          blen += n;
          FILL(' ', width-1);
        }
        else {
          FILL(' ', width-1);
          CHECK(n);
          mrb_enc_mbcput(c, &buf[blen], enc);
          blen += n;
        }
      }
      break;

    case 's':
    case 'p':
format_s:
      {
        mrb_value arg = GETARG();
        long len, slen;
#ifdef INCLUDE_ENCODING
        mrb_encoding *enc = mrb_enc_get(mrb, fmt);
#endif //INCLUDE_ENCODING

        if (*p == 'p') arg = mrb_inspect(mrb, arg);
        str = mrb_obj_as_string(mrb, arg);
        len = RSTRING_LEN(str);
        mrb_str_set_len(mrb, result, blen);
        if (flags&(FPREC|FWIDTH)) {
          slen = RSTRING_LEN(str);
          if (slen < 0) {
            mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid mbstring sequence");
          }
          if ((flags&FPREC) && (prec < slen)) {
#ifdef INCLUDE_ENCODING
            char *p = mrb_enc_nth(mrb, RSTRING_PTR(str), RSTRING_END(str),prec, enc);
#else
            char *p = RSTRING_PTR(str) + prec;
#endif //INCLUDE_ENCODING
            slen = prec;
            len = p - RSTRING_PTR(str);
          }
          /* need to adjust multi-byte string pos */
          if ((flags&FWIDTH) && (width > slen)) {
            width -= (int)slen;
            if (!(flags&FMINUS)) {
              CHECK(width);
              while (width--) {
                buf[blen++] = ' ';
              }
            }
            CHECK(len);
            memcpy(&buf[blen], RSTRING_PTR(str), len);
            blen += len;
            if (flags&FMINUS) {
              CHECK(width);
              while (width--) {
                buf[blen++] = ' ';
              }
            }
            mrb_enc_associate(mrb, result, enc);
            break;
          }
        }
        PUSH(RSTRING_PTR(str), len);
        mrb_enc_associate(mrb, result, enc);
      }
      break;

    case 'd':
    case 'i':
    case 'o':
    case 'x':
    case 'X':
    case 'b':
    case 'B':
    case 'u':
      {
        mrb_value val = GETARG();
        char fbuf[32], nbuf[64], *s;
        const char *prefix = 0;
        int sign = 0, dots = 0;
        char sc = 0;
        long v = 0, org_v = 0;
        int base;
        int len;

        switch (*p) {
        case 'd':
        case 'i':
        case 'u':
          sign = 1; break;
        case 'o':
        case 'x':
        case 'X':
        case 'b':
        case 'B':
          if (flags&(FPLUS|FSPACE)) sign = 1;
          break;
        }
        if (flags & FSHARP) {
          switch (*p) {
          case 'o': prefix = "0"; break;
          case 'x': prefix = "0x"; break;
          case 'X': prefix = "0X"; break;
          case 'b': prefix = "0b"; break;
          case 'B': prefix = "0B"; break;
          }
        }

bin_retry:
        switch (mrb_type(val)) {
        case MRB_TT_FLOAT:
          if (FIXABLE(mrb_float(val))) {
            val = mrb_fixnum_value((mrb_int)mrb_float(val));
            goto bin_retry;
          }
          val = mrb_flt2big(mrb, mrb_float(val));
          if (FIXNUM_P(val)) goto bin_retry;
          break;
        case MRB_TT_STRING:
          val = mrb_str_to_inum(mrb, val, 0, TRUE);
          goto bin_retry;
        case MRB_TT_FIXNUM:
          v = (long)mrb_fixnum(val);
          break;
        default:
          val = mrb_Integer(mrb, val);
          goto bin_retry;
        }

        switch (*p) {
        case 'o':
          base = 8; break;
        case 'x':
        case 'X':
          base = 16; break;
        case 'b':
        case 'B':
          base = 2; break;
        case 'u':
        case 'd':
        case 'i':
        default:
          base = 10; break;
        }

        if (base == 2) {
          org_v = v;
          if ( v < 0 && !sign ) {
            val = mrb_fix2binstr(mrb, mrb_fixnum_value(v), base);
            dots = 1;
          }
          else {
            val = mrb_fix2str(mrb, mrb_fixnum_value(v), base);
          }
          v = mrb_fixnum(mrb_str_to_inum(mrb, val, 10, 0/*Qfalse*/));
        }
        if (sign) {
          char c = *p;
          if (c == 'i') c = 'd'; /* %d and %i are identical */
          if (base == 2) c = 'd';
          if (v < 0) {
            v = -v;
            sc = '-';
            width--;
          }
          else if (flags & FPLUS) {
            sc = '+';
            width--;
          }
          else if (flags & FSPACE) {
            sc = ' ';
            width--;
          }
          snprintf(fbuf, sizeof(fbuf), "%%l%c", c);
          snprintf(nbuf, sizeof(nbuf), fbuf, v);
          s = nbuf;
        }
        else {
          char c = *p;
          if (c == 'X') c = 'x';
          if (base == 2) c = 'd';
          s = nbuf;
          if (v < 0) {
            dots = 1;
          }
          snprintf(fbuf, sizeof(fbuf), "%%l%c", c);
          snprintf(++s, sizeof(nbuf) - 1, fbuf, v);
          if (v < 0) {
            char d = 0;

            s = remove_sign_bits(s, base);
            switch (base) {
            case 16: d = 'f'; break;
            case 8:  d = '7'; break;
            case 2:  d = '1'; break;
            }

            if (d && *s != d) {
              *--s = d;
            }
          }
        }
        len = (int)strlen(s);

        if (dots) {
          prec -= 2;
          width -= 2;
        }

        if (*p == 'X') {
          char *pp = s;
          int c;
#ifdef INCLUDE_ENCODING
          mrb_encoding *enc = mrb_enc_get(mrb, fmt);
#endif //INCLUDE_ENCODING
          while ((c = (int)(unsigned char)*pp) != 0) {
#ifdef INCLUDE_ENCODING
            *pp = mrb_enc_toupper(c, enc);
#else
            *pp = toupper(c);
#endif //INCLUDE_ENCODING
            pp++;
          }
        }
        if (prefix && !prefix[1]) { /* octal */
          if (dots) {
            prefix = 0;
          }
          else if (len == 1 && *s == '0') {
            len = 0;
            if (flags & FPREC) prec--;
          }
          else if ((flags & FPREC) && (prec > len)) {
            prefix = 0;
          }
        }
        else if (len == 1 && *s == '0') {
          prefix = 0;
        }
        if (prefix) {
          width -= (int)strlen(prefix);
        }
        if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) {
          prec = width;
          width = 0;
        }
        else {
          if (prec < len) {
            if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0;
            prec = len;
          }
          width -= prec;
        }
        if (!(flags&FMINUS)) {
          CHECK(width);
          while (width-- > 0) {
            buf[blen++] = ' ';
          }
        }
        if (sc) PUSH(&sc, 1);
        if (prefix) {
          int plen = (int)strlen(prefix);
          PUSH(prefix, plen);
        }
        CHECK(prec - len);
        if (dots) PUSH("..", 2);
        if (v < 0 || (base == 2 && org_v < 0)) {
          char c = sign_bits(base, p);
          while (len < prec--) {
            buf[blen++] = c;
          }
        }
        else if ((flags & (FMINUS|FPREC)) != FMINUS) {
          char c = '0';
          while (len < prec--) {
            buf[blen++] = c;
          }
        }
        PUSH(s, len);
        CHECK(width);
        while (width-- > 0) {
          buf[blen++] = ' ';
        }
      }
      break;

    case 'f':
    case 'g':
    case 'G':
    case 'e':
    case 'E':
    case 'a':
    case 'A':
      {
        mrb_value val = GETARG();
        double fval;
        int i, need = 6;
        char fbuf[32];

        fval = mrb_float(mrb_Float(mrb, val));
        if (isnan(fval) || isinf(fval)) {
          const char *expr;

          if (isnan(fval)) {
            expr = "NaN";
          }
          else {
            expr = "Inf";
          }
          need = (int)strlen(expr);
          if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS))
            need++;
          if ((flags & FWIDTH) && need < width)
            need = width;

          CHECK(need + 1);
          snprintf(&buf[blen], need + 1, "%*s", need, "");
          if (flags & FMINUS) {
            if (!isnan(fval) && fval < 0.0)
              buf[blen++] = '-';
            else if (flags & FPLUS)
              buf[blen++] = '+';
            else if (flags & FSPACE)
              blen++;
            memcpy(&buf[blen], expr, strlen(expr));
          }
          else {
            if (!isnan(fval) && fval < 0.0)
              buf[blen + need - strlen(expr) - 1] = '-';
            else if (flags & FPLUS)
              buf[blen + need - strlen(expr) - 1] = '+';
            else if ((flags & FSPACE) && need > width)
              blen++;
            memcpy(&buf[blen + need - strlen(expr)], expr,
            strlen(expr));
          }
          blen += strlen(&buf[blen]);
          break;
        }

        fmt_setup(fbuf, sizeof(fbuf), *p, flags, width, prec);
        need = 0;
        if (*p != 'e' && *p != 'E') {
          i = INT_MIN;
          frexp(fval, &i);
          if (i > 0)
            need = BIT_DIGITS(i);
        }
        need += (flags&FPREC) ? prec : 6;
        if ((flags&FWIDTH) && need < width)
          need = width;
        need += 20;

        CHECK(need);
        snprintf(&buf[blen], need, fbuf, fval);
        blen += strlen(&buf[blen]);
      }
      break;
    }
    flags = FNONE;
  }

  sprint_exit:
  /* XXX - We cannot validate the number of arguments if (digit)$ style used.
   */
  if (posarg >= 0 && nextarg < argc) {
    const char *mesg = "too many arguments for format string";
    if (RTEST(ruby_debug)) mrb_raise(mrb, E_ARGUMENT_ERROR, "%s", mesg);
    if (RTEST(ruby_verbose)) mrb_warn("%s", mesg);
  }
  mrb_str_resize(mrb, result, blen);

  return result;
}
Esempio n. 18
0
VALUE
rb_str_format(int argc, const VALUE *argv, VALUE fmt)
{
    enum {default_float_precision = 6};
    rb_encoding *enc;
    const char *p, *end;
    char *buf;
    long blen, bsiz;
    VALUE result;

    long scanned = 0;
    int coderange = ENC_CODERANGE_7BIT;
    int width, prec, flags = FNONE;
    int nextarg = 1;
    int posarg = 0;
    int tainted = 0;
    VALUE nextvalue;
    VALUE tmp;
    VALUE str;
    volatile VALUE hash = Qundef;

#define CHECK_FOR_WIDTH(f)				 \
    if ((f) & FWIDTH) {					 \
	rb_raise(rb_eArgError, "width given twice");	 \
    }							 \
    if ((f) & FPREC0) {					 \
	rb_raise(rb_eArgError, "width after precision"); \
    }
#define CHECK_FOR_FLAGS(f)				 \
    if ((f) & FWIDTH) {					 \
	rb_raise(rb_eArgError, "flag after width");	 \
    }							 \
    if ((f) & FPREC0) {					 \
	rb_raise(rb_eArgError, "flag after precision"); \
    }

    ++argc;
    --argv;
    if (OBJ_TAINTED(fmt)) tainted = 1;
    StringValue(fmt);
    enc = rb_enc_get(fmt);
    fmt = rb_str_new4(fmt);
    p = RSTRING_PTR(fmt);
    end = p + RSTRING_LEN(fmt);
    blen = 0;
    bsiz = 120;
    result = rb_str_buf_new(bsiz);
    rb_enc_copy(result, fmt);
    buf = RSTRING_PTR(result);
    memset(buf, 0, bsiz);
    ENC_CODERANGE_SET(result, coderange);

    for (; p < end; p++) {
	const char *t;
	int n;
	VALUE sym = Qnil;

	for (t = p; t < end && *t != '%'; t++) ;
	PUSH(p, t - p);
	if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) {
	    scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &coderange);
	    ENC_CODERANGE_SET(result, coderange);
	}
	if (t >= end) {
	    /* end of fmt string */
	    goto sprint_exit;
	}
	p = t + 1;		/* skip `%' */

	width = prec = -1;
	nextvalue = Qundef;
      retry:
	switch (*p) {
	  default:
	    if (rb_enc_isprint(*p, enc))
		rb_raise(rb_eArgError, "malformed format string - %%%c", *p);
	    else
		rb_raise(rb_eArgError, "malformed format string");
	    break;

	  case ' ':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FSPACE;
	    p++;
	    goto retry;

	  case '#':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FSHARP;
	    p++;
	    goto retry;

	  case '+':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FPLUS;
	    p++;
	    goto retry;

	  case '-':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FMINUS;
	    p++;
	    goto retry;

	  case '0':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FZERO;
	    p++;
	    goto retry;

	  case '1': case '2': case '3': case '4':
	  case '5': case '6': case '7': case '8': case '9':
	    n = 0;
	    GETNUM(n, width);
	    if (*p == '$') {
		if (nextvalue != Qundef) {
		    rb_raise(rb_eArgError, "value given twice - %d$", n);
		}
		nextvalue = GETPOSARG(n);
		p++;
		goto retry;
	    }
	    CHECK_FOR_WIDTH(flags);
	    width = n;
	    flags |= FWIDTH;
	    goto retry;

	  case '<':
	  case '{':
	    {
		const char *start = p;
		char term = (*p == '<') ? '>' : '}';
		int len;

		for (; p < end && *p != term; ) {
		    p += rb_enc_mbclen(p, end, enc);
		}
		if (p >= end) {
		    rb_raise(rb_eArgError, "malformed name - unmatched parenthesis");
		}
#if SIZEOF_INT < SIZEOF_SIZE_T
		if ((size_t)(p - start) >= INT_MAX) {
		    const int message_limit = 20;
		    len = (int)(rb_enc_right_char_head(start, start + message_limit, p, enc) - start);
		    rb_enc_raise(enc, rb_eArgError,
				 "too long name (%"PRIdSIZE" bytes) - %.*s...%c",
				 (size_t)(p - start - 2), len, start, term);
		}
#endif
		len = (int)(p - start + 1); /* including parenthesis */
		if (sym != Qnil) {
		    rb_enc_raise(enc, rb_eArgError, "named%.*s after <%"PRIsVALUE">",
				 len, start, rb_sym2str(sym));
		}
		CHECKNAMEARG(start, len, enc);
		get_hash(&hash, argc, argv);
		sym = rb_check_symbol_cstr(start + 1,
					   len - 2 /* without parenthesis */,
					   enc);
		if (sym != Qnil) nextvalue = rb_hash_lookup2(hash, sym, Qundef);
		if (nextvalue == Qundef) {
		    rb_enc_raise(enc, rb_eKeyError, "key%.*s not found", len, start);
		}
		if (term == '}') goto format_s;
		p++;
		goto retry;
	    }

	  case '*':
	    CHECK_FOR_WIDTH(flags);
	    flags |= FWIDTH;
	    GETASTER(width);
	    if (width < 0) {
		flags |= FMINUS;
		width = -width;
	    }
	    p++;
	    goto retry;

	  case '.':
	    if (flags & FPREC0) {
		rb_raise(rb_eArgError, "precision given twice");
	    }
	    flags |= FPREC|FPREC0;

	    prec = 0;
	    p++;
	    if (*p == '*') {
		GETASTER(prec);
		if (prec < 0) {	/* ignore negative precision */
		    flags &= ~FPREC;
		}
		p++;
		goto retry;
	    }

	    GETNUM(prec, precision);
	    goto retry;

	  case '\n':
	  case '\0':
	    p--;
	  case '%':
	    if (flags != FNONE) {
		rb_raise(rb_eArgError, "invalid format character - %%");
	    }
	    PUSH("%", 1);
	    break;

	  case 'c':
	    {
		VALUE val = GETARG();
		VALUE tmp;
		unsigned int c;
		int n;

		tmp = rb_check_string_type(val);
		if (!NIL_P(tmp)) {
		    if (rb_enc_strlen(RSTRING_PTR(tmp),RSTRING_END(tmp),enc) != 1) {
			rb_raise(rb_eArgError, "%%c requires a character");
		    }
		    c = rb_enc_codepoint_len(RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc);
		    RB_GC_GUARD(tmp);
		}
		else {
		    c = NUM2INT(val);
		    n = rb_enc_codelen(c, enc);
		}
		if (n <= 0) {
		    rb_raise(rb_eArgError, "invalid character");
		}
		if (!(flags & FWIDTH)) {
		    CHECK(n);
		    rb_enc_mbcput(c, &buf[blen], enc);
		    blen += n;
		}
		else if ((flags & FMINUS)) {
		    CHECK(n);
		    rb_enc_mbcput(c, &buf[blen], enc);
		    blen += n;
		    FILL(' ', width-1);
		}
		else {
		    FILL(' ', width-1);
		    CHECK(n);
		    rb_enc_mbcput(c, &buf[blen], enc);
		    blen += n;
		}
	    }
	    break;

	  case 's':
	  case 'p':
	  format_s:
	    {
		VALUE arg = GETARG();
		long len, slen;

		if (*p == 'p') arg = rb_inspect(arg);
		str = rb_obj_as_string(arg);
		if (OBJ_TAINTED(str)) tainted = 1;
		len = RSTRING_LEN(str);
		rb_str_set_len(result, blen);
		if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) {
		    int cr = coderange;
		    scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &cr);
		    ENC_CODERANGE_SET(result,
				      (cr == ENC_CODERANGE_UNKNOWN ?
				       ENC_CODERANGE_BROKEN : (coderange = cr)));
		}
		enc = rb_enc_check(result, str);
		if (flags&(FPREC|FWIDTH)) {
		    slen = rb_enc_strlen(RSTRING_PTR(str),RSTRING_END(str),enc);
		    if (slen < 0) {
			rb_raise(rb_eArgError, "invalid mbstring sequence");
		    }
		    if ((flags&FPREC) && (prec < slen)) {
			char *p = rb_enc_nth(RSTRING_PTR(str), RSTRING_END(str),
					     prec, enc);
			slen = prec;
			len = p - RSTRING_PTR(str);
		    }
		    /* need to adjust multi-byte string pos */
		    if ((flags&FWIDTH) && (width > slen)) {
			width -= (int)slen;
			if (!(flags&FMINUS)) {
			    CHECK(width);
			    while (width--) {
				buf[blen++] = ' ';
			    }
			}
			CHECK(len);
			memcpy(&buf[blen], RSTRING_PTR(str), len);
			RB_GC_GUARD(str);
			blen += len;
			if (flags&FMINUS) {
			    CHECK(width);
			    while (width--) {
				buf[blen++] = ' ';
			    }
			}
			rb_enc_associate(result, enc);
			break;
		    }
		}
		PUSH(RSTRING_PTR(str), len);
		RB_GC_GUARD(str);
		rb_enc_associate(result, enc);
	    }
	    break;

	  case 'd':
	  case 'i':
	  case 'o':
	  case 'x':
	  case 'X':
	  case 'b':
	  case 'B':
	  case 'u':
	    {
		volatile VALUE val = GETARG();
                int valsign;
		char nbuf[64], *s;
		const char *prefix = 0;
		int sign = 0, dots = 0;
		char sc = 0;
		long v = 0;
		int base, bignum = 0;
		int len;

		switch (*p) {
		  case 'd':
		  case 'i':
		  case 'u':
		    sign = 1; break;
		  case 'o':
		  case 'x':
		  case 'X':
		  case 'b':
		  case 'B':
		    if (flags&(FPLUS|FSPACE)) sign = 1;
		    break;
		}
		if (flags & FSHARP) {
		    switch (*p) {
		      case 'o':
			prefix = "0"; break;
		      case 'x':
			prefix = "0x"; break;
		      case 'X':
			prefix = "0X"; break;
		      case 'b':
			prefix = "0b"; break;
		      case 'B':
			prefix = "0B"; break;
		    }
		}

	      bin_retry:
		switch (TYPE(val)) {
		  case T_FLOAT:
		    if (FIXABLE(RFLOAT_VALUE(val))) {
			val = LONG2FIX((long)RFLOAT_VALUE(val));
			goto bin_retry;
		    }
		    val = rb_dbl2big(RFLOAT_VALUE(val));
		    if (FIXNUM_P(val)) goto bin_retry;
		    bignum = 1;
		    break;
		  case T_STRING:
		    val = rb_str_to_inum(val, 0, TRUE);
		    goto bin_retry;
		  case T_BIGNUM:
		    bignum = 1;
		    break;
		  case T_FIXNUM:
		    v = FIX2LONG(val);
		    break;
		  default:
		    val = rb_Integer(val);
		    goto bin_retry;
		}

		switch (*p) {
		  case 'o':
		    base = 8; break;
		  case 'x':
		  case 'X':
		    base = 16; break;
		  case 'b':
		  case 'B':
		    base = 2; break;
		  case 'u':
		  case 'd':
		  case 'i':
		  default:
		    base = 10; break;
		}

                if (base != 10) {
                    int numbits = ffs(base)-1;
                    size_t abs_nlz_bits;
                    size_t numdigits = rb_absint_numwords(val, numbits, &abs_nlz_bits);
                    long i;
                    if (INT_MAX-1 < numdigits) /* INT_MAX is used because rb_long2int is used later. */
                        rb_raise(rb_eArgError, "size too big");
                    if (sign) {
                        if (numdigits == 0)
                            numdigits = 1;
                        tmp = rb_str_new(NULL, numdigits);
                        valsign = rb_integer_pack(val, RSTRING_PTR(tmp), RSTRING_LEN(tmp),
                                1, CHAR_BIT-numbits, INTEGER_PACK_BIG_ENDIAN);
                        for (i = 0; i < RSTRING_LEN(tmp); i++)
                            RSTRING_PTR(tmp)[i] = ruby_digitmap[((unsigned char *)RSTRING_PTR(tmp))[i]];
                        s = RSTRING_PTR(tmp);
                        if (valsign < 0) {
                            sc = '-';
                            width--;
                        }
                        else if (flags & FPLUS) {
                            sc = '+';
                            width--;
                        }
                        else if (flags & FSPACE) {
                            sc = ' ';
                            width--;
                        }
                    }
                    else {
                        /* Following conditional "numdigits++" guarantees the
                         * most significant digit as
                         * - '1'(bin), '7'(oct) or 'f'(hex) for negative numbers
                         * - '0' for zero
                         * - not '0' for positive numbers.
                         *
                         * It also guarantees the most significant two
                         * digits will not be '11'(bin), '77'(oct), 'ff'(hex)
                         * or '00'.  */
                        if (numdigits == 0 ||
                                ((abs_nlz_bits != (size_t)(numbits-1) ||
                                  !rb_absint_singlebit_p(val)) &&
                                 (!bignum ? v < 0 : BIGNUM_NEGATIVE_P(val))))
                            numdigits++;
                        tmp = rb_str_new(NULL, numdigits);
                        valsign = rb_integer_pack(val, RSTRING_PTR(tmp), RSTRING_LEN(tmp),
                                1, CHAR_BIT-numbits, INTEGER_PACK_2COMP | INTEGER_PACK_BIG_ENDIAN);
                        for (i = 0; i < RSTRING_LEN(tmp); i++)
                            RSTRING_PTR(tmp)[i] = ruby_digitmap[((unsigned char *)RSTRING_PTR(tmp))[i]];
                        s = RSTRING_PTR(tmp);
                        dots = valsign < 0;
                    }
                    len = rb_long2int(RSTRING_END(tmp) - s);
                }
                else if (!bignum) {
                    valsign = 1;
                    if (v < 0) {
                        v = -v;
                        sc = '-';
                        width--;
                        valsign = -1;
                    }
                    else if (flags & FPLUS) {
                        sc = '+';
                        width--;
                    }
                    else if (flags & FSPACE) {
                        sc = ' ';
                        width--;
                    }
                    snprintf(nbuf, sizeof(nbuf), "%ld", v);
                    s = nbuf;
		    len = (int)strlen(s);
		}
		else {
                    tmp = rb_big2str(val, 10);
                    s = RSTRING_PTR(tmp);
                    valsign = 1;
                    if (s[0] == '-') {
                        s++;
                        sc = '-';
                        width--;
                        valsign = -1;
                    }
                    else if (flags & FPLUS) {
                        sc = '+';
                        width--;
                    }
                    else if (flags & FSPACE) {
                        sc = ' ';
                        width--;
                    }
		    len = rb_long2int(RSTRING_END(tmp) - s);
		}

		if (dots) {
		    prec -= 2;
		    width -= 2;
		}

		if (*p == 'X') {
		    char *pp = s;
		    int c;
		    while ((c = (int)(unsigned char)*pp) != 0) {
			*pp = rb_enc_toupper(c, enc);
			pp++;
		    }
		}
		if (prefix && !prefix[1]) { /* octal */
		    if (dots) {
			prefix = 0;
		    }
		    else if (len == 1 && *s == '0') {
			len = 0;
			if (flags & FPREC) prec--;
		    }
		    else if ((flags & FPREC) && (prec > len)) {
			prefix = 0;
		    }
		}
		else if (len == 1 && *s == '0') {
		    prefix = 0;
		}
		if (prefix) {
		    width -= (int)strlen(prefix);
		}
		if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) {
		    prec = width;
		    width = 0;
		}
		else {
		    if (prec < len) {
			if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0;
			prec = len;
		    }
		    width -= prec;
		}
		if (!(flags&FMINUS)) {
		    CHECK(width);
		    while (width-- > 0) {
			buf[blen++] = ' ';
		    }
		}
		if (sc) PUSH(&sc, 1);
		if (prefix) {
		    int plen = (int)strlen(prefix);
		    PUSH(prefix, plen);
		}
		CHECK(prec - len);
		if (dots) PUSH("..", 2);
		if (!sign && valsign < 0) {
		    char c = sign_bits(base, p);
		    while (len < prec--) {
			buf[blen++] = c;
		    }
		}
		else if ((flags & (FMINUS|FPREC)) != FMINUS) {
		    while (len < prec--) {
			buf[blen++] = '0';
		    }
		}
		PUSH(s, len);
		RB_GC_GUARD(tmp);
		CHECK(width);
		while (width-- > 0) {
		    buf[blen++] = ' ';
		}
	    }
	    break;

	  case 'f':
	    {
		VALUE val = GETARG(), num, den;
		int sign = (flags&FPLUS) ? 1 : 0, zero = 0;
		long len, done = 0;
		int prefix = 0;
		if (!RB_TYPE_P(val, T_RATIONAL)) {
		    nextvalue = val;
		    goto float_value;
		}
		if (!(flags&FPREC)) prec = default_float_precision;
		den = rb_rational_den(val);
		num = rb_rational_num(val);
		if (FIXNUM_P(num)) {
		    if ((SIGNED_VALUE)num < 0) {
			long n = -FIX2LONG(num);
			num = LONG2FIX(n);
			sign = -1;
		    }
		}
		else if (rb_num_negative_p(num)) {
		    sign = -1;
		    num = rb_funcallv(num, idUMinus, 0, 0);
		}
		if (den != INT2FIX(1) || prec > 1) {
		    const ID idDiv = rb_intern("div");
		    VALUE p10 = rb_int_positive_pow(10, prec);
		    VALUE den_2 = rb_funcall(den, idDiv, 1, INT2FIX(2));
		    num = rb_funcallv(num, '*', 1, &p10);
		    num = rb_funcallv(num, '+', 1, &den_2);
		    num = rb_funcallv(num, idDiv, 1, &den);
		}
		else if (prec >= 0) {
		    zero = prec;
		}
		val = rb_obj_as_string(num);
		len = RSTRING_LEN(val) + zero;
		if (prec >= len) ++len; /* integer part 0 */
		if (sign || (flags&FSPACE)) ++len;
		if (prec > 0) ++len; /* period */
		CHECK(len > width ? len : width);
		if (sign || (flags&FSPACE)) {
		    buf[blen++] = sign > 0 ? '+' : sign < 0 ? '-' : ' ';
		    prefix++;
		    done++;
		}
		len = RSTRING_LEN(val) + zero;
		t = RSTRING_PTR(val);
		if (len > prec) {
		    memcpy(&buf[blen], t, len - prec);
		    blen += len - prec;
		    done += len - prec;
		}
		else {
		    buf[blen++] = '0';
		    done++;
		}
		if (prec > 0) {
		    buf[blen++] = '.';
		    done++;
		}
		if (zero) {
		    FILL('0', zero);
		    done += zero;
		}
		else if (prec > len) {
		    FILL('0', prec - len);
		    memcpy(&buf[blen], t, len);
		    blen += len;
		    done += prec;
		}
		else if (prec > 0) {
		    memcpy(&buf[blen], t + len - prec, prec);
		    blen += prec;
		    done += prec;
		}
		if ((flags & FWIDTH) && width > done) {
		    int fill = ' ';
		    long shifting = 0;
		    if (!(flags&FMINUS)) {
			shifting = done;
			if (flags&FZERO) {
			    shifting -= prefix;
			    fill = '0';
			}
			blen -= shifting;
			memmove(&buf[blen + width - done], &buf[blen], shifting);
		    }
		    FILL(fill, width - done);
		    blen += shifting;
		}
		RB_GC_GUARD(val);
		break;
	    }
	  case 'g':
	  case 'G':
	  case 'e':
	  case 'E':
	    /* TODO: rational support */
	  case 'a':
	  case 'A':
	  float_value:
	    {
		VALUE val = GETARG();
		double fval;
		int i, need;
		char fbuf[32];

		fval = RFLOAT_VALUE(rb_Float(val));
		if (isnan(fval) || isinf(fval)) {
		    const char *expr;

		    if (isnan(fval)) {
			expr = "NaN";
		    }
		    else {
			expr = "Inf";
		    }
		    need = (int)strlen(expr);
		    if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS))
			need++;
		    if ((flags & FWIDTH) && need < width)
			need = width;

		    CHECK(need + 1);
		    snprintf(&buf[blen], need + 1, "%*s", need, "");
		    if (flags & FMINUS) {
			if (!isnan(fval) && fval < 0.0)
			    buf[blen++] = '-';
			else if (flags & FPLUS)
			    buf[blen++] = '+';
			else if (flags & FSPACE)
			    blen++;
			memcpy(&buf[blen], expr, strlen(expr));
		    }
		    else {
			if (!isnan(fval) && fval < 0.0)
			    buf[blen + need - strlen(expr) - 1] = '-';
			else if (flags & FPLUS)
			    buf[blen + need - strlen(expr) - 1] = '+';
			else if ((flags & FSPACE) && need > width)
			    blen++;
			memcpy(&buf[blen + need - strlen(expr)], expr,
			       strlen(expr));
		    }
		    blen += strlen(&buf[blen]);
		    break;
		}

		fmt_setup(fbuf, sizeof(fbuf), *p, flags, width, prec);
		need = 0;
		if (*p != 'e' && *p != 'E') {
		    i = INT_MIN;
		    frexp(fval, &i);
		    if (i > 0)
			need = BIT_DIGITS(i);
		}
		need += (flags&FPREC) ? prec : default_float_precision;
		if ((flags&FWIDTH) && need < width)
		    need = width;
		need += 20;

		CHECK(need);
		snprintf(&buf[blen], need, fbuf, fval);
		blen += strlen(&buf[blen]);
	    }
	    break;
	}
	flags = FNONE;
    }

  sprint_exit:
    RB_GC_GUARD(fmt);
    /* XXX - We cannot validate the number of arguments if (digit)$ style used.
     */
    if (posarg >= 0 && nextarg < argc) {
	const char *mesg = "too many arguments for format string";
	if (RTEST(ruby_debug)) rb_raise(rb_eArgError, "%s", mesg);
	if (RTEST(ruby_verbose)) rb_warn("%s", mesg);
    }
    rb_str_resize(result, blen);

    if (tainted) OBJ_TAINT(result);
    return result;
}
Esempio n. 19
0
/**
 * @param str the string to be scrubbed
 * @param repl the replacement character
 * @return If given string is invalid, returns a new string. Otherwise, returns Qnil.
 */
static VALUE
str_scrub0(int argc, VALUE *argv, VALUE str)
{
    int cr = ENC_CODERANGE(str);
    rb_encoding *enc;
    int encidx;
    VALUE repl;

    if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID)
	return Qnil;

    enc = STR_ENC_GET(str);
    rb_scan_args(argc, argv, "01", &repl);
    if (argc != 0) {
	repl = str_compat_and_valid(repl, enc);
    }

    if (rb_enc_dummy_p(enc)) {
	return Qnil;
    }
    encidx = rb_enc_to_index(enc);

#define DEFAULT_REPLACE_CHAR(str) do { \
	static const char replace[sizeof(str)-1] = str; \
	rep = replace; replen = (int)sizeof(replace); \
    } while (0)

    if (rb_enc_asciicompat(enc)) {
	const char *p = RSTRING_PTR(str);
	const char *e = RSTRING_END(str);
	const char *p1 = p;
	const char *rep;
	long replen;
	int rep7bit_p;
	VALUE buf = Qnil;
	if (rb_block_given_p()) {
	    rep = NULL;
	    replen = 0;
	    rep7bit_p = FALSE;
	}
	else if (!NIL_P(repl)) {
	    rep = RSTRING_PTR(repl);
	    replen = RSTRING_LEN(repl);
	    rep7bit_p = (ENC_CODERANGE(repl) == ENC_CODERANGE_7BIT);
	}
	else if (encidx == rb_utf8_encindex()) {
	    DEFAULT_REPLACE_CHAR("\xEF\xBF\xBD");
	    rep7bit_p = FALSE;
	}
	else {
	    DEFAULT_REPLACE_CHAR("?");
	    rep7bit_p = TRUE;
	}
	cr = ENC_CODERANGE_7BIT;

	p = search_nonascii(p, e);
	if (!p) {
	    p = e;
	}
	while (p < e) {
	    int ret = rb_enc_precise_mbclen(p, e, enc);
	    if (MBCLEN_NEEDMORE_P(ret)) {
		break;
	    }
	    else if (MBCLEN_CHARFOUND_P(ret)) {
		cr = ENC_CODERANGE_VALID;
		p += MBCLEN_CHARFOUND_LEN(ret);
	    }
	    else if (MBCLEN_INVALID_P(ret)) {
		/*
		 * p1~p: valid ascii/multibyte chars
		 * p ~e: invalid bytes + unknown bytes
		 */
		long clen = rb_enc_mbmaxlen(enc);
		if (NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str));
		if (p > p1) {
		    rb_str_buf_cat(buf, p1, p - p1);
		}

		if (e - p < clen) clen = e - p;
		if (clen <= 2) {
		    clen = 1;
		}
		else {
		    const char *q = p;
		    clen--;
		    for (; clen > 1; clen--) {
			ret = rb_enc_precise_mbclen(q, q + clen, enc);
			if (MBCLEN_NEEDMORE_P(ret)) break;
			if (MBCLEN_INVALID_P(ret)) continue;
			UNREACHABLE;
		    }
		}
		if (rep) {
		    rb_str_buf_cat(buf, rep, replen);
		    if (!rep7bit_p) cr = ENC_CODERANGE_VALID;
		}
		else {
		    repl = rb_yield(rb_enc_str_new(p, clen, enc));
		    repl = str_compat_and_valid(repl, enc);
		    rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
		    if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID)
			cr = ENC_CODERANGE_VALID;
		}
		p += clen;
		p1 = p;
		p = search_nonascii(p, e);
		if (!p) {
		    p = e;
		    break;
		}
	    }
	    else {
		UNREACHABLE;
	    }
	}
	if (NIL_P(buf)) {
	    if (p == e) {
		ENC_CODERANGE_SET(str, cr);
		return Qnil;
	    }
	    buf = rb_str_buf_new(RSTRING_LEN(str));
	}
	if (p1 < p) {
	    rb_str_buf_cat(buf, p1, p - p1);
	}
	if (p < e) {
	    if (rep) {
		rb_str_buf_cat(buf, rep, replen);
		if (!rep7bit_p) cr = ENC_CODERANGE_VALID;
	    }
	    else {
		repl = rb_yield(rb_enc_str_new(p, e-p, enc));
		repl = str_compat_and_valid(repl, enc);
		rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
		if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID)
		    cr = ENC_CODERANGE_VALID;
	    }
	}
	ENCODING_CODERANGE_SET(buf, rb_enc_to_index(enc), cr);
	return buf;
    }
    else {
	/* ASCII incompatible */
	const char *p = RSTRING_PTR(str);
	const char *e = RSTRING_END(str);
	const char *p1 = p;
	VALUE buf = Qnil;
	const char *rep;
	long replen;
	long mbminlen = rb_enc_mbminlen(enc);
	if (!NIL_P(repl)) {
	    rep = RSTRING_PTR(repl);
	    replen = RSTRING_LEN(repl);
	}
	else if (!strcasecmp(rb_enc_name(enc), "UTF-16BE")) {
	    DEFAULT_REPLACE_CHAR("\xFF\xFD");
	}
	else if (!strcasecmp(rb_enc_name(enc), "UTF-16LE")) {
	    DEFAULT_REPLACE_CHAR("\xFD\xFF");
	}
	else if (!strcasecmp(rb_enc_name(enc), "UTF-32BE")) {
	    DEFAULT_REPLACE_CHAR("\x00\x00\xFF\xFD");
	}
	else if (!strcasecmp(rb_enc_name(enc), "UTF-32lE")) {
	    DEFAULT_REPLACE_CHAR("\xFD\xFF\x00\x00");
	}
	else {
	    DEFAULT_REPLACE_CHAR("?");
	}

	while (p < e) {
	    int ret = rb_enc_precise_mbclen(p, e, enc);
	    if (MBCLEN_NEEDMORE_P(ret)) {
		break;
	    }
	    else if (MBCLEN_CHARFOUND_P(ret)) {
		p += MBCLEN_CHARFOUND_LEN(ret);
	    }
	    else if (MBCLEN_INVALID_P(ret)) {
		const char *q = p;
		long clen = rb_enc_mbmaxlen(enc);
		if (NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str));
		if (p > p1) rb_str_buf_cat(buf, p1, p - p1);

		if (e - p < clen) clen = e - p;
		if (clen <= mbminlen * 2) {
		    clen = mbminlen;
		}
		else {
		    clen -= mbminlen;
		    for (; clen > mbminlen; clen-=mbminlen) {
			ret = rb_enc_precise_mbclen(q, q + clen, enc);
			if (MBCLEN_NEEDMORE_P(ret)) break;
			if (MBCLEN_INVALID_P(ret)) continue;
			UNREACHABLE;
		    }
		}
		if (rep) {
		    rb_str_buf_cat(buf, rep, replen);
		}
		else {
		    repl = rb_yield(rb_enc_str_new(p, e-p, enc));
		    repl = str_compat_and_valid(repl, enc);
		    rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
		}
		p += clen;
		p1 = p;
	    }
	    else {
		UNREACHABLE;
	    }
	}
	if (NIL_P(buf)) {
	    if (p == e) {
		ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID);
		return Qnil;
	    }
	    buf = rb_str_buf_new(RSTRING_LEN(str));
	}
	if (p1 < p) {
	    rb_str_buf_cat(buf, p1, p - p1);
	}
	if (p < e) {
	    if (rep) {
		rb_str_buf_cat(buf, rep, replen);
	    }
	    else {
		repl = rb_yield(rb_enc_str_new(p, e-p, enc));
		repl = str_compat_and_valid(repl, enc);
		rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
	    }
	}
	ENCODING_CODERANGE_SET(buf, rb_enc_to_index(enc), ENC_CODERANGE_VALID);
	return buf;
    }
}
Esempio n. 20
0
/*********************************************************
 * main
 *********************************************************/
static mrb_value
mrb_value_to_string(mrb_state* mrb, mrb_value value) {
  mrb_value str;

  if (mrb_nil_p(value)) {
    return mrb_str_new_cstr(mrb, "null");
  }

  switch (mrb_type(value)) {
  case MRB_TT_FIXNUM:
  case MRB_TT_FLOAT:
  case MRB_TT_TRUE:
  case MRB_TT_FALSE:
  case MRB_TT_UNDEF:
    str = mrb_funcall(mrb, value, "to_s", 0, NULL);
    break;
  case MRB_TT_SYMBOL:
    value = mrb_funcall(mrb, value, "to_s", 0, NULL);
    /* FALLTHROUGH */
  case MRB_TT_STRING:
    {
      int ai = mrb_gc_arena_save(mrb);
      char* ptr = RSTRING_PTR(value);
      char* end = RSTRING_END(value);
      str = mrb_str_new_cstr(mrb, "\""); 
      while (ptr < end && *ptr) {
        switch (*ptr) {
        case '\\':
          str = mrb_str_cat_cstr(mrb, str, "\\\\");
          break;
        case '"':
          str = mrb_str_cat_cstr(mrb, str, "\\\"");
          break;
        case '\b':
          str = mrb_str_cat_cstr(mrb, str, "\\b");
          break;
        case '\f':
          str = mrb_str_cat_cstr(mrb, str, "\\f");
          break;
        case '\n':
          str = mrb_str_cat_cstr(mrb, str, "\\n");
          break;
        case '\r':
          str = mrb_str_cat_cstr(mrb, str, "\\r");
          break;
        case '\t':
          str = mrb_str_cat_cstr(mrb, str, "\\t");
          break;
        default:
          // TODO: handle unicode
          str = mrb_str_cat(mrb, str, ptr, 1);
        }
        ptr++;
      }
      mrb_str_cat_cstr(mrb, str, "\""); 
      mrb_gc_arena_restore(mrb, ai);
    }
    break;
  case MRB_TT_HASH:
    {
      mrb_value keys;
      int n, l;
      str = mrb_str_new_cstr(mrb, "{");
      keys = mrb_hash_keys(mrb, value);
      l = RARRAY_LEN(keys);
      for (n = 0; n < l; n++) {
        mrb_value obj;
        int ai = mrb_gc_arena_save(mrb);
        mrb_value key = mrb_ary_entry(keys, n);
        mrb_value enckey = mrb_funcall(mrb, key, "to_s", 0, NULL);
        enckey = mrb_funcall(mrb, enckey, "inspect", 0, NULL);
        mrb_str_concat(mrb, str, enckey);
        mrb_str_cat_cstr(mrb, str, ":");
        obj = mrb_hash_get(mrb, value, key);
        mrb_str_concat(mrb, str, mrb_value_to_string(mrb, obj));
        if (n != l - 1) {
          mrb_str_cat_cstr(mrb, str, ",");
        }
        mrb_gc_arena_restore(mrb, ai);
      }
      mrb_str_cat_cstr(mrb, str, "}");
      break;
    }
  case MRB_TT_ARRAY:
    {
      int n, l;
      str = mrb_str_new_cstr(mrb, "[");
      l = RARRAY_LEN(value);
      for (n = 0; n < l; n++) {
        int ai = mrb_gc_arena_save(mrb);
        mrb_value obj = mrb_ary_entry(value, n);
        mrb_str_concat(mrb, str, mrb_value_to_string(mrb, obj));
        if (n != l - 1) {
          mrb_str_cat_cstr(mrb, str, ",");
        }
        mrb_gc_arena_restore(mrb, ai);
      }
      mrb_str_cat_cstr(mrb, str, "]");
      break;
    }
  default:
    mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid argument");
  }
  return str;
}
Esempio n. 21
0
VALUE
rb_str_format(int argc, const VALUE *argv, VALUE fmt)
{
    rb_encoding *enc;
    const char *p, *end;
    char *buf;
    long blen, bsiz;
    VALUE result;

    long scanned = 0;
    int coderange = ENC_CODERANGE_7BIT;
    int width, prec, flags = FNONE;
    int nextarg = 1;
    int posarg = 0;
    int tainted = 0;
    VALUE nextvalue;
    VALUE tmp;
    VALUE str;
    volatile VALUE hash = Qundef;

#define CHECK_FOR_WIDTH(f)				 \
    if ((f) & FWIDTH) {					 \
	rb_raise(rb_eArgError, "width given twice");	 \
    }							 \
    if ((f) & FPREC0) {					 \
	rb_raise(rb_eArgError, "width after precision"); \
    }
#define CHECK_FOR_FLAGS(f)				 \
    if ((f) & FWIDTH) {					 \
	rb_raise(rb_eArgError, "flag after width");	 \
    }							 \
    if ((f) & FPREC0) {					 \
	rb_raise(rb_eArgError, "flag after precision"); \
    }

    ++argc;
    --argv;
    if (OBJ_TAINTED(fmt)) tainted = 1;
    StringValue(fmt);
    enc = rb_enc_get(fmt);
    fmt = rb_str_new4(fmt);
    p = RSTRING_PTR(fmt);
    end = p + RSTRING_LEN(fmt);
    blen = 0;
    bsiz = 120;
    result = rb_str_buf_new(bsiz);
    rb_enc_copy(result, fmt);
    buf = RSTRING_PTR(result);
    memset(buf, 0, bsiz);
    ENC_CODERANGE_SET(result, coderange);

    for (; p < end; p++) {
	const char *t;
	int n;
	ID id = 0;

	for (t = p; t < end && *t != '%'; t++) ;
	PUSH(p, t - p);
	if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) {
	    scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &coderange);
	    ENC_CODERANGE_SET(result, coderange);
	}
	if (t >= end) {
	    /* end of fmt string */
	    goto sprint_exit;
	}
	p = t + 1;		/* skip `%' */

	width = prec = -1;
	nextvalue = Qundef;
      retry:
	switch (*p) {
	  default:
	    if (rb_enc_isprint(*p, enc))
		rb_raise(rb_eArgError, "malformed format string - %%%c", *p);
	    else
		rb_raise(rb_eArgError, "malformed format string");
	    break;

	  case ' ':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FSPACE;
	    p++;
	    goto retry;

	  case '#':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FSHARP;
	    p++;
	    goto retry;

	  case '+':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FPLUS;
	    p++;
	    goto retry;

	  case '-':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FMINUS;
	    p++;
	    goto retry;

	  case '0':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FZERO;
	    p++;
	    goto retry;

	  case '1': case '2': case '3': case '4':
	  case '5': case '6': case '7': case '8': case '9':
	    n = 0;
	    GETNUM(n, width);
	    if (*p == '$') {
		if (nextvalue != Qundef) {
		    rb_raise(rb_eArgError, "value given twice - %d$", n);
		}
		nextvalue = GETPOSARG(n);
		p++;
		goto retry;
	    }
	    CHECK_FOR_WIDTH(flags);
	    width = n;
	    flags |= FWIDTH;
	    goto retry;

	  case '<':
	  case '{':
	    {
		const char *start = p;
		char term = (*p == '<') ? '>' : '}';
		int len;

		for (; p < end && *p != term; ) {
		    p += rb_enc_mbclen(p, end, enc);
		}
		if (p >= end) {
		    rb_raise(rb_eArgError, "malformed name - unmatched parenthesis");
		}
#if SIZEOF_INT < SIZEOF_SIZE_T
		if ((size_t)(p - start) >= INT_MAX) {
		    const int message_limit = 20;
		    len = (int)(rb_enc_right_char_head(start, start + message_limit, p, enc) - start);
		    rb_enc_raise(enc, rb_eArgError,
				 "too long name (%"PRIdSIZE" bytes) - %.*s...%c",
				 (size_t)(p - start - 2), len, start, term);
		}
#endif
		len = (int)(p - start + 1); /* including parenthesis */
		if (id) {
		    rb_enc_raise(enc, rb_eArgError, "named%.*s after <%s>",
				 len, start, rb_id2name(id));
		}
		nextvalue = GETNAMEARG((id = rb_check_id_cstr(start + 1,
							      len - 2 /* without parenthesis */,
							      enc),
					ID2SYM(id)),
				       start, len, enc);
		if (nextvalue == Qundef) {
		    rb_enc_raise(enc, rb_eKeyError, "key%.*s not found", len, start);
		}
		if (term == '}') goto format_s;
		p++;
		goto retry;
	    }

	  case '*':
	    CHECK_FOR_WIDTH(flags);
	    flags |= FWIDTH;
	    GETASTER(width);
	    if (width < 0) {
		flags |= FMINUS;
		width = -width;
	    }
	    p++;
	    goto retry;

	  case '.':
	    if (flags & FPREC0) {
		rb_raise(rb_eArgError, "precision given twice");
	    }
	    flags |= FPREC|FPREC0;

	    prec = 0;
	    p++;
	    if (*p == '*') {
		GETASTER(prec);
		if (prec < 0) {	/* ignore negative precision */
		    flags &= ~FPREC;
		}
		p++;
		goto retry;
	    }

	    GETNUM(prec, precision);
	    goto retry;

	  case '\n':
	  case '\0':
	    p--;
	  case '%':
	    if (flags != FNONE) {
		rb_raise(rb_eArgError, "invalid format character - %%");
	    }
	    PUSH("%", 1);
	    break;

	  case 'c':
	    {
		VALUE val = GETARG();
		VALUE tmp;
		unsigned int c;
		int n;

		tmp = rb_check_string_type(val);
		if (!NIL_P(tmp)) {
		    if (rb_enc_strlen(RSTRING_PTR(tmp),RSTRING_END(tmp),enc) != 1) {
			rb_raise(rb_eArgError, "%%c requires a character");
		    }
		    c = rb_enc_codepoint_len(RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc);
		    RB_GC_GUARD(tmp);
		}
		else {
		    c = NUM2INT(val);
		    n = rb_enc_codelen(c, enc);
		}
		if (n <= 0) {
		    rb_raise(rb_eArgError, "invalid character");
		}
		if (!(flags & FWIDTH)) {
		    CHECK(n);
		    rb_enc_mbcput(c, &buf[blen], enc);
		    blen += n;
		}
		else if ((flags & FMINUS)) {
		    CHECK(n);
		    rb_enc_mbcput(c, &buf[blen], enc);
		    blen += n;
		    FILL(' ', width-1);
		}
		else {
		    FILL(' ', width-1);
		    CHECK(n);
		    rb_enc_mbcput(c, &buf[blen], enc);
		    blen += n;
		}
	    }
	    break;

	  case 's':
	  case 'p':
	  format_s:
	    {
		VALUE arg = GETARG();
		long len, slen;

		if (*p == 'p') arg = rb_inspect(arg);
		str = rb_obj_as_string(arg);
		if (OBJ_TAINTED(str)) tainted = 1;
		len = RSTRING_LEN(str);
		rb_str_set_len(result, blen);
		if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) {
		    int cr = coderange;
		    scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &cr);
		    ENC_CODERANGE_SET(result,
				      (cr == ENC_CODERANGE_UNKNOWN ?
				       ENC_CODERANGE_BROKEN : (coderange = cr)));
		}
		enc = rb_enc_check(result, str);
		if (flags&(FPREC|FWIDTH)) {
		    slen = rb_enc_strlen(RSTRING_PTR(str),RSTRING_END(str),enc);
		    if (slen < 0) {
			rb_raise(rb_eArgError, "invalid mbstring sequence");
		    }
		    if ((flags&FPREC) && (prec < slen)) {
			char *p = rb_enc_nth(RSTRING_PTR(str), RSTRING_END(str),
					     prec, enc);
			slen = prec;
			len = p - RSTRING_PTR(str);
		    }
		    /* need to adjust multi-byte string pos */
		    if ((flags&FWIDTH) && (width > slen)) {
			width -= (int)slen;
			if (!(flags&FMINUS)) {
			    CHECK(width);
			    while (width--) {
				buf[blen++] = ' ';
			    }
			}
			CHECK(len);
			memcpy(&buf[blen], RSTRING_PTR(str), len);
			RB_GC_GUARD(str);
			blen += len;
			if (flags&FMINUS) {
			    CHECK(width);
			    while (width--) {
				buf[blen++] = ' ';
			    }
			}
			rb_enc_associate(result, enc);
			break;
		    }
		}
		PUSH(RSTRING_PTR(str), len);
		RB_GC_GUARD(str);
		rb_enc_associate(result, enc);
	    }
	    break;

	  case 'd':
	  case 'i':
	  case 'o':
	  case 'x':
	  case 'X':
	  case 'b':
	  case 'B':
	  case 'u':
	    {
		volatile VALUE val = GETARG();
		char fbuf[32], nbuf[64], *s;
		const char *prefix = 0;
		int sign = 0, dots = 0;
		char sc = 0;
		long v = 0;
		int base, bignum = 0;
		int len;

		switch (*p) {
		  case 'd':
		  case 'i':
		  case 'u':
		    sign = 1; break;
		  case 'o':
		  case 'x':
		  case 'X':
		  case 'b':
		  case 'B':
		    if (flags&(FPLUS|FSPACE)) sign = 1;
		    break;
		}
		if (flags & FSHARP) {
		    switch (*p) {
		      case 'o':
			prefix = "0"; break;
		      case 'x':
			prefix = "0x"; break;
		      case 'X':
			prefix = "0X"; break;
		      case 'b':
			prefix = "0b"; break;
		      case 'B':
			prefix = "0B"; break;
		    }
		}

	      bin_retry:
		switch (TYPE(val)) {
		  case T_FLOAT:
		    if (FIXABLE(RFLOAT_VALUE(val))) {
			val = LONG2FIX((long)RFLOAT_VALUE(val));
			goto bin_retry;
		    }
		    val = rb_dbl2big(RFLOAT_VALUE(val));
		    if (FIXNUM_P(val)) goto bin_retry;
		    bignum = 1;
		    break;
		  case T_STRING:
		    val = rb_str_to_inum(val, 0, TRUE);
		    goto bin_retry;
		  case T_BIGNUM:
		    bignum = 1;
		    break;
		  case T_FIXNUM:
		    v = FIX2LONG(val);
		    break;
		  default:
		    val = rb_Integer(val);
		    goto bin_retry;
		}

		switch (*p) {
		  case 'o':
		    base = 8; break;
		  case 'x':
		  case 'X':
		    base = 16; break;
		  case 'b':
		  case 'B':
		    base = 2; break;
		  case 'u':
		  case 'd':
		  case 'i':
		  default:
		    base = 10; break;
		}

		if (!bignum) {
		    if (base == 2) {
			val = rb_int2big(v);
			goto bin_retry;
		    }
		    if (sign) {
			char c = *p;
			if (c == 'i') c = 'd'; /* %d and %i are identical */
			if (v < 0) {
			    v = -v;
			    sc = '-';
			    width--;
			}
			else if (flags & FPLUS) {
			    sc = '+';
			    width--;
			}
			else if (flags & FSPACE) {
			    sc = ' ';
			    width--;
			}
			snprintf(fbuf, sizeof(fbuf), "%%l%c", c);
			snprintf(nbuf, sizeof(nbuf), fbuf, v);
			s = nbuf;
		    }
		    else {
			s = nbuf;
			if (v < 0) {
			    dots = 1;
			}
			snprintf(fbuf, sizeof(fbuf), "%%l%c", *p == 'X' ? 'x' : *p);
			snprintf(++s, sizeof(nbuf) - 1, fbuf, v);
			if (v < 0) {
			    char d = 0;

			    s = remove_sign_bits(s, base);
			    switch (base) {
			      case 16:
				d = 'f'; break;
			      case 8:
				d = '7'; break;
			    }
			    if (d && *s != d) {
				*--s = d;
			    }
			}
		    }
		    len = (int)strlen(s);
		}
		else {
		    if (sign) {
			tmp = rb_big2str(val, base);
			s = RSTRING_PTR(tmp);
			if (s[0] == '-') {
			    s++;
			    sc = '-';
			    width--;
			}
			else if (flags & FPLUS) {
			    sc = '+';
			    width--;
			}
			else if (flags & FSPACE) {
			    sc = ' ';
			    width--;
			}
		    }
		    else {
			if (!RBIGNUM_SIGN(val)) {
			    val = rb_big_clone(val);
			    rb_big_2comp(val);
			}
			tmp = rb_big2str0(val, base, RBIGNUM_SIGN(val));
			s = RSTRING_PTR(tmp);
			if (*s == '-') {
			    dots = 1;
			    if (base == 10) {
				rb_warning("negative number for %%u specifier");
			    }
			    s = remove_sign_bits(++s, base);
			    switch (base) {
			      case 16:
				if (s[0] != 'f') *--s = 'f'; break;
			      case 8:
				if (s[0] != '7') *--s = '7'; break;
			      case 2:
				if (s[0] != '1') *--s = '1'; break;
			    }
			}
		    }
		    len = rb_long2int(RSTRING_END(tmp) - s);
		}

		if (dots) {
		    prec -= 2;
		    width -= 2;
		}

		if (*p == 'X') {
		    char *pp = s;
		    int c;
		    while ((c = (int)(unsigned char)*pp) != 0) {
			*pp = rb_enc_toupper(c, enc);
			pp++;
		    }
		}
		if (prefix && !prefix[1]) { /* octal */
		    if (dots) {
			prefix = 0;
		    }
		    else if (len == 1 && *s == '0') {
			len = 0;
			if (flags & FPREC) prec--;
		    }
		    else if ((flags & FPREC) && (prec > len)) {
			prefix = 0;
		    }
		}
		else if (len == 1 && *s == '0') {
		    prefix = 0;
		}
		if (prefix) {
		    width -= (int)strlen(prefix);
		}
		if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) {
		    prec = width;
		    width = 0;
		}
		else {
		    if (prec < len) {
			if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0;
			prec = len;
		    }
		    width -= prec;
		}
		if (!(flags&FMINUS)) {
		    CHECK(width);
		    while (width-- > 0) {
			buf[blen++] = ' ';
		    }
		}
		if (sc) PUSH(&sc, 1);
		if (prefix) {
		    int plen = (int)strlen(prefix);
		    PUSH(prefix, plen);
		}
		CHECK(prec - len);
		if (dots) PUSH("..", 2);
		if (!bignum && v < 0) {
		    char c = sign_bits(base, p);
		    while (len < prec--) {
			buf[blen++] = c;
		    }
		}
		else if ((flags & (FMINUS|FPREC)) != FMINUS) {
		    char c;

		    if (!sign && bignum && !RBIGNUM_SIGN(val))
			c = sign_bits(base, p);
		    else
			c = '0';
		    while (len < prec--) {
			buf[blen++] = c;
		    }
		}
		PUSH(s, len);
		RB_GC_GUARD(tmp);
		CHECK(width);
		while (width-- > 0) {
		    buf[blen++] = ' ';
		}
	    }
	    break;

	  case 'f':
	  case 'g':
	  case 'G':
	  case 'e':
	  case 'E':
	  case 'a':
	  case 'A':
	    {
		VALUE val = GETARG();
		double fval;
		int i, need = 6;
		char fbuf[32];

		fval = RFLOAT_VALUE(rb_Float(val));
		if (isnan(fval) || isinf(fval)) {
		    const char *expr;

		    if (isnan(fval)) {
			expr = "NaN";
		    }
		    else {
			expr = "Inf";
		    }
		    need = (int)strlen(expr);
		    if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS))
			need++;
		    if ((flags & FWIDTH) && need < width)
			need = width;

		    CHECK(need + 1);
		    snprintf(&buf[blen], need + 1, "%*s", need, "");
		    if (flags & FMINUS) {
			if (!isnan(fval) && fval < 0.0)
			    buf[blen++] = '-';
			else if (flags & FPLUS)
			    buf[blen++] = '+';
			else if (flags & FSPACE)
			    blen++;
			memcpy(&buf[blen], expr, strlen(expr));
		    }
		    else {
			if (!isnan(fval) && fval < 0.0)
			    buf[blen + need - strlen(expr) - 1] = '-';
			else if (flags & FPLUS)
			    buf[blen + need - strlen(expr) - 1] = '+';
			else if ((flags & FSPACE) && need > width)
			    blen++;
			memcpy(&buf[blen + need - strlen(expr)], expr,
			       strlen(expr));
		    }
		    blen += strlen(&buf[blen]);
		    break;
		}

		fmt_setup(fbuf, sizeof(fbuf), *p, flags, width, prec);
		need = 0;
		if (*p != 'e' && *p != 'E') {
		    i = INT_MIN;
		    frexp(fval, &i);
		    if (i > 0)
			need = BIT_DIGITS(i);
		}
		need += (flags&FPREC) ? prec : 6;
		if ((flags&FWIDTH) && need < width)
		    need = width;
		need += 20;

		CHECK(need);
		snprintf(&buf[blen], need, fbuf, fval);
		blen += strlen(&buf[blen]);
	    }
	    break;
	}
	flags = FNONE;
    }

  sprint_exit:
    RB_GC_GUARD(fmt);
    /* XXX - We cannot validate the number of arguments if (digit)$ style used.
     */
    if (posarg >= 0 && nextarg < argc) {
	const char *mesg = "too many arguments for format string";
	if (RTEST(ruby_debug)) rb_raise(rb_eArgError, "%s", mesg);
	if (RTEST(ruby_verbose)) rb_warn("%s", mesg);
    }
    rb_str_resize(result, blen);

    if (tainted) OBJ_TAINT(result);
    return result;
}
Esempio n. 22
0
/* @overload gsub(pattern, replacement)
 *
 *   Returns the receiver with all matches of PATTERN replaced by REPLACEMENT,
 *   inheriting any taint and untrust from the receiver and from REPLACEMENT.
 *
 *   The REPLACEMENT is used as a specification for what to replace matches
 *   with:
 *
 *   <table>
 *     <thead>
 *       <tr><th>Specification</th><th>Replacement</th></tr>
 *     </thead>
 *     <tbody>
 *       <tr>
 *         <td><code>\1</code>, <code>\2</code>, …, <code>\</code><em>n</em></td>
 *         <td>Numbered sub-match <em>n</em></td>
 *       </tr>
 *       <tr>
 *         <td><code>\k&lt;</code><em>name</em><code>></code></td>
 *         <td>Named sub-match <em>name</em></td>
 *       </tr>
 *     </tbody>
 *   </table>
 *
 *   The Regexp special variables `$&`, `$'`, <code>$\`</code>, `$1`, `$2`, …,
 *   `$`_n_ are updated accordingly.
 *
 *   @param [Regexp, #to_str] pattern
 *   @param [#to_str] replacement
 *   @return [U::String]
 *
 * @overload gsub(pattern, replacements)
 *
 *   Returns the receiver with all matches of PATTERN replaced by
 *   REPLACEMENTS#[_match_], where _match_ is the matched substring, inheriting
 *   any taint and untrust from the receiver and from the
 *   REPLACEMENTS#[_match_]es, as well as any taint on REPLACEMENTS.
 *
 *   The Regexp special variables `$&`, `$'`, <code>$\`</code>, `$1`, `$2`, …,
 *   `$`_n_ are updated accordingly.
 *
 *   @param [Regexp, #to_str] pattern
 *   @param [#to_hash] replacements
 *   @raise [RuntimeError] If any replacement is the result being constructed
 *   @raise [Exception] Any error raised by REPLACEMENTS#default, if it gets
 *     called
 *   @return [U::String]
 *
 * @overload gsub(pattern){ |match| … }
 *
 *   Returns the receiver with all matches of PATTERN replaced by the results
 *   of the given block, inheriting any taint and untrust from the receiver and
 *   from the results of the given block.
 *
 *   The Regexp special variables `$&`, `$'`, <code>$\`</code>, `$1`, `$2`, …,
 *   `$`_n_ are updated accordingly.
 *
 *   @param [Regexp, #to_str] pattern
 *   @yieldparam [U::String] match
 *   @yieldreturn [#to_str]
 *   @return [U::String]
 *
 * @overload gsub(pattern)
 *
 *   Returns an Enumerator over the matches of PATTERN in the receiver.
 *
 *   The Regexp special variables `$&`, `$'`, <code>$\`</code>, `$1`, `$2`, …,
 *   `$`_n_ will be updated accordingly.
 *
 *   @param [Regexp, #to_str] pattern
 *   @return [Enumerator] */
VALUE
rb_u_string_gsub(int argc, VALUE *argv, VALUE self)
{
        VALUE pattern, replacement;
        VALUE replacements = Qnil;
        bool use_block = false;
        bool tainted = false;

        if (argc == 1) {
                RETURN_ENUMERATOR(self, argc, argv);
                use_block = true;
        }

        if (rb_scan_args(argc, argv, "11", &pattern, &replacement) == 2) {
                replacements = rb_check_convert_type(replacement, T_HASH,
                                                     "Hash", "to_hash");
                if (NIL_P(replacements))
                        StringValue(replacement);
                if (OBJ_TAINTED(replacement))
                        tainted = true;
        }

        pattern = rb_u_pattern_argument(pattern, true);

        VALUE str = rb_str_to_str(self);
        long begin = rb_reg_search(pattern, str, 0, 0);
        if (begin < 0)
                return self;

        const char *base = RSTRING_PTR(str);
        const char *p = base;
        const char *end = RSTRING_END(str);
        VALUE substituted = rb_u_str_buf_new(RSTRING_LEN(str) + 30);
        do {
                VALUE match = rb_backref_get();
                struct re_registers *registers = RMATCH_REGS(match);
                VALUE result;

                if (use_block || !NIL_P(replacements)) {
                        if (use_block) {
                                VALUE ustr = rb_u_string_new_rb(rb_reg_nth_match(0, match));
                                result = rb_u_string_object_as_string(rb_yield(ustr));
                        } else {
                                VALUE ustr = rb_u_string_new_c(self,
                                                               base + registers->beg[0],
                                                               registers->end[0] - registers->beg[0]);
                                result = rb_u_string_object_as_string(rb_hash_aref(replacements, ustr));
                        }

                        if (result == substituted)
                                rb_u_raise(rb_eRuntimeError,
                                           "result of block is string being built; please try not to cheat");
                } else
                        result =
#ifdef HAVE_RB_REG_REGSUB4
                        rb_reg_regsub(replacement, str, registers, pattern);
#else
                        rb_reg_regsub(replacement, str, registers);
#endif

                if (OBJ_TAINTED(result))
                        tainted = true;

                const struct rb_u_string *value = RVAL2USTRING_ANY(result);

                rb_str_buf_cat(substituted, p, registers->beg[0] - (p - base));
                rb_str_buf_cat(substituted, USTRING_STR(value), USTRING_LENGTH(value));
                OBJ_INFECT(substituted, result);

                p = base + registers->end[0];
                if (registers->beg[0] == registers->end[0])
                        p = u_next(p);
                if (p >= end)
                        break;

                begin = rb_reg_search(pattern, str, registers->end[0], 0);
        } while (begin >= 0);

        if (p < end)
                rb_str_buf_cat(substituted, p, end - p);

        rb_reg_search(pattern, str, end - p, 0);

        RBASIC(substituted)->klass = rb_obj_class(str);
        OBJ_INFECT(substituted, str);
        if (tainted)
                OBJ_TAINT(substituted);

        return rb_u_string_new_rb(substituted);
}