Exemple #1
0
mrb_value
mrb_str_format(mrb_state *mrb, int argc, const mrb_value *argv, mrb_value fmt)
{
  const char *p, *end;
  char *buf;
  mrb_int blen;
  mrb_int bsiz;
  mrb_value result;
  mrb_int n;
  mrb_int width;
  mrb_int prec;
  int flags = FNONE;
  int nextarg = 1;
  int posarg = 0;
  mrb_value nextvalue;
  mrb_value tmp;
  mrb_value str;
  mrb_value hash = mrb_undef_value();

#define CHECK_FOR_WIDTH(f)                                                  \
  if ((f) & FWIDTH) {                                                       \
    mrb_raise(mrb, E_ARGUMENT_ERROR, "width given twice");         \
  }                                                                         \
  if ((f) & FPREC0) {                                                       \
    mrb_raise(mrb, E_ARGUMENT_ERROR, "width after precision");     \
  }
#define CHECK_FOR_FLAGS(f)                                                  \
  if ((f) & FWIDTH) {                                                       \
    mrb_raise(mrb, E_ARGUMENT_ERROR, "flag after width");          \
  }                                                                         \
  if ((f) & FPREC0) {                                                       \
    mrb_raise(mrb, E_ARGUMENT_ERROR, "flag after precision");      \
  }

  ++argc;
  --argv;
  mrb_string_value(mrb, &fmt);
  p = RSTRING_PTR(fmt);
  end = p + RSTRING_LEN(fmt);
  blen = 0;
  bsiz = 120;
  result = mrb_str_buf_new(mrb, bsiz);
  buf = RSTRING_PTR(result);
  memset(buf, 0, bsiz);

  for (; p < end; p++) {
    const char *t;
    mrb_sym id = 0;

    for (t = p; t < end && *t != '%'; t++) ;
    PUSH(p, t - p);
    if (t >= end)
      goto sprint_exit; /* end of fmt string */

    p = t + 1;    /* skip `%' */

    width = prec = -1;
    nextvalue = mrb_undef_value();

retry:
    switch (*p) {
      default:
        mrb_raisef(mrb, E_ARGUMENT_ERROR, "malformed format string - \\%%S", mrb_str_new(mrb, p, 1));
        break;

      case ' ':
        CHECK_FOR_FLAGS(flags);
        flags |= FSPACE;
        p++;
        goto retry;

      case '#':
        CHECK_FOR_FLAGS(flags);
        flags |= FSHARP;
        p++;
        goto retry;

      case '+':
        CHECK_FOR_FLAGS(flags);
        flags |= FPLUS;
        p++;
        goto retry;

      case '-':
        CHECK_FOR_FLAGS(flags);
        flags |= FMINUS;
        p++;
        goto retry;

      case '0':
        CHECK_FOR_FLAGS(flags);
        flags |= FZERO;
        p++;
        goto retry;

      case '1': case '2': case '3': case '4':
      case '5': case '6': case '7': case '8': case '9':
        n = 0;
        GETNUM(n, width);
        if (*p == '$') {
          if (!mrb_undef_p(nextvalue)) {
            mrb_raisef(mrb, E_ARGUMENT_ERROR, "value given twice - %S$", mrb_fixnum_value(n));
          }
          nextvalue = GETPOSARG(n);
          p++;
          goto retry;
        }
        CHECK_FOR_WIDTH(flags);
        width = n;
        flags |= FWIDTH;
        goto retry;

      case '<':
      case '{': {
        const char *start = p;
        char term = (*p == '<') ? '>' : '}';
        mrb_value symname;

        for (; p < end && *p != term; )
          p++;
        if (id) {
          mrb_raisef(mrb, E_ARGUMENT_ERROR, "name%S after <%S>",
                     mrb_str_new(mrb, start, p - start + 1), mrb_sym2str(mrb, id));
        }
        symname = mrb_str_new(mrb, start + 1, p - start - 1);
        id = mrb_intern_str(mrb, symname);
        nextvalue = GETNAMEARG(mrb_symbol_value(id), start, (int)(p - start + 1));
        if (mrb_undef_p(nextvalue)) {
          mrb_raisef(mrb, E_KEY_ERROR, "key%S not found", mrb_str_new(mrb, start, p - start + 1));
        }
        if (term == '}') goto format_s;
        p++;
        goto retry;
      }

      case '*':
        CHECK_FOR_WIDTH(flags);
        flags |= FWIDTH;
        GETASTER(width);
        if (width < 0) {
          flags |= FMINUS;
          width = -width;
        }
        p++;
        goto retry;

      case '.':
        if (flags & FPREC0) {
          mrb_raise(mrb, E_ARGUMENT_ERROR, "precision given twice");
        }
        flags |= FPREC|FPREC0;

        prec = 0;
        p++;
        if (*p == '*') {
          GETASTER(prec);
          if (prec < 0) {  /* ignore negative precision */
            flags &= ~FPREC;
          }
          p++;
          goto retry;
        }

        GETNUM(prec, precision);
        goto retry;

      case '\n':
      case '\0':
        p--;

      case '%':
        if (flags != FNONE) {
          mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid format character - %");
        }
        PUSH("%", 1);
        break;

      case 'c': {
        mrb_value val = GETARG();
        mrb_value tmp;
        unsigned int c;

        tmp = mrb_check_string_type(mrb, val);
        if (!mrb_nil_p(tmp)) {
          if (RSTRING_LEN(tmp) != 1 ) {
            mrb_raise(mrb, E_ARGUMENT_ERROR, "%c requires a character");
          }
          c = RSTRING_PTR(tmp)[0];
          n = 1;
        }
        else {
          c = mrb_fixnum(val);
          n = 1;
        }
        if (n <= 0) {
          mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid character");
        }
        if (!(flags & FWIDTH)) {
          CHECK(n);
          buf[blen] = c;
          blen += n;
        }
        else if ((flags & FMINUS)) {
          CHECK(n);
          buf[blen] = c;
          blen += n;
          FILL(' ', width-1);
        }
        else {
          FILL(' ', width-1);
          CHECK(n);
          buf[blen] = c;
          blen += n;
        }
      }
      break;

      case 's':
      case 'p':
  format_s:
      {
        mrb_value arg = GETARG();
        mrb_int len;
        mrb_int slen;

        if (*p == 'p') arg = mrb_inspect(mrb, arg);
        str = mrb_obj_as_string(mrb, arg);
        len = RSTRING_LEN(str);
        RSTRING_LEN(result) = blen;
        if (flags&(FPREC|FWIDTH)) {
          slen = RSTRING_LEN(str);
          if (slen < 0) {
            mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid mbstring sequence");
          }
          if ((flags&FPREC) && (prec < slen)) {
            char *p = RSTRING_PTR(str) + prec;
            slen = prec;
            len = p - RSTRING_PTR(str);
          }
          /* need to adjust multi-byte string pos */
          if ((flags&FWIDTH) && (width > slen)) {
            width -= (int)slen;
            if (!(flags&FMINUS)) {
              CHECK(width);
              while (width--) {
                buf[blen++] = ' ';
              }
            }
            CHECK(len);
            memcpy(&buf[blen], RSTRING_PTR(str), len);
            blen += len;
            if (flags&FMINUS) {
              CHECK(width);
              while (width--) {
                buf[blen++] = ' ';
              }
            }
            break;
          }
        }
        PUSH(RSTRING_PTR(str), len);
      }
      break;

      case 'd':
      case 'i':
      case 'o':
      case 'x':
      case 'X':
      case 'b':
      case 'B':
      case 'u': {
        mrb_value val = GETARG();
        char fbuf[32], nbuf[64], *s;
        const char *prefix = NULL;
        int sign = 0, dots = 0;
        char sc = 0;
        mrb_int v = 0, org_v = 0;
        int base;
        mrb_int len;

        switch (*p) {
          case 'd':
          case 'i':
          case 'u':
            sign = 1; break;
          case 'o':
          case 'x':
          case 'X':
          case 'b':
          case 'B':
            if (flags&(FPLUS|FSPACE)) sign = 1;
            break;
          default:
            break;
        }
        if (flags & FSHARP) {
          switch (*p) {
            case 'o': prefix = "0"; break;
            case 'x': prefix = "0x"; break;
            case 'X': prefix = "0X"; break;
            case 'b': prefix = "0b"; break;
            case 'B': prefix = "0B"; break;
            default: break;
          }
        }

  bin_retry:
        switch (mrb_type(val)) {
          case MRB_TT_FLOAT:
            if (FIXABLE(mrb_float(val))) {
              val = mrb_fixnum_value((mrb_int)mrb_float(val));
              goto bin_retry;
            }
            val = mrb_flt2big(mrb, mrb_float(val));
            if (mrb_fixnum_p(val)) goto bin_retry;
            break;
          case MRB_TT_STRING:
            val = mrb_str_to_inum(mrb, val, 0, TRUE);
            goto bin_retry;
          case MRB_TT_FIXNUM:
            v = mrb_fixnum(val);
            break;
          default:
            val = mrb_Integer(mrb, val);
            goto bin_retry;
        }

        switch (*p) {
          case 'o':
            base = 8; break;
          case 'x':
          case 'X':
            base = 16; break;
          case 'b':
          case 'B':
            base = 2; break;
          case 'u':
          case 'd':
          case 'i':
          default:
            base = 10; break;
        }

        if (base == 2) {
          org_v = v;
          if ( v < 0 && !sign ) {
            val = mrb_fix2binstr(mrb, mrb_fixnum_value(v), base);
            dots = 1;
          }
          else {
            val = mrb_fix2str(mrb, mrb_fixnum_value(v), base);
          }
          v = mrb_fixnum(mrb_str_to_inum(mrb, val, 10, 0/*Qfalse*/));
        }
        if (sign) {
          char c = *p;
          if (c == 'i') c = 'd'; /* %d and %i are identical */
          if (base == 2) c = 'd';
          if (v < 0) {
            v = -v;
            sc = '-';
            width--;
          }
          else if (flags & FPLUS) {
            sc = '+';
            width--;
          }
          else if (flags & FSPACE) {
            sc = ' ';
            width--;
          }
          snprintf(fbuf, sizeof(fbuf), "%%l%c", c);
          snprintf(nbuf, sizeof(nbuf), fbuf, v);
          s = nbuf;
        }
        else {
          char c = *p;
          if (c == 'X') c = 'x';
          if (base == 2) c = 'd';
          s = nbuf;
          if (v < 0) {
            dots = 1;
          }
          snprintf(fbuf, sizeof(fbuf), "%%l%c", c);
          snprintf(++s, sizeof(nbuf) - 1, fbuf, v);
          if (v < 0) {
            char d;

            s = remove_sign_bits(s, base);
            switch (base) {
              case 16: d = 'f'; break;
              case 8:  d = '7'; break;
              case 2:  d = '1'; break;
              default: d = 0; break;
            }

            if (d && *s != d) {
              *--s = d;
            }
          }
        }
        {
          size_t size;
          size = strlen(s);
          /* PARANOID: assert(size <= MRB_INT_MAX) */
          len = (mrb_int)size;
        }

        if (dots) {
          prec -= 2;
          width -= 2;
        }

        if (*p == 'X') {
          char *pp = s;
          int c;
          while ((c = (int)(unsigned char)*pp) != 0) {
            *pp = toupper(c);
            pp++;
          }
        }

        if (prefix && !prefix[1]) { /* octal */
          if (dots) {
            prefix = NULL;
          }
          else if (len == 1 && *s == '0') {
            len = 0;
            if (flags & FPREC) prec--;
          }
          else if ((flags & FPREC) && (prec > len)) {
            prefix = NULL;
          }
        }
        else if (len == 1 && *s == '0') {
          prefix = NULL;
        }

        if (prefix) {
          size_t size;
          size = strlen(prefix);
          /* PARANOID: assert(size <= MRB_INT_MAX).
           *  this check is absolutely paranoid. */
          width -= (mrb_int)size;
        }

        if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) {
          prec = width;
          width = 0;
        }
        else {
          if (prec < len) {
            if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0;
            prec = len;
          }
          width -= prec;
        }

        if (!(flags&FMINUS)) {
          CHECK(width);
          while (width-- > 0) {
            buf[blen++] = ' ';
          }
        }

        if (sc) PUSH(&sc, 1);

        if (prefix) {
          int plen = (int)strlen(prefix);
          PUSH(prefix, plen);
        }
        CHECK(prec - len);
        if (dots) PUSH("..", 2);

        if (v < 0 || (base == 2 && org_v < 0)) {
          char c = sign_bits(base, p);
          while (len < prec--) {
            buf[blen++] = c;
          }
        }
        else if ((flags & (FMINUS|FPREC)) != FMINUS) {
          char c = '0';
          while (len < prec--) {
            buf[blen++] = c;
          }
        }

        PUSH(s, len);
        CHECK(width);
        while (width-- > 0) {
          buf[blen++] = ' ';
        }
      }
      break;

      case 'f':
      case 'g':
      case 'G':
      case 'e':
      case 'E':
      case 'a':
      case 'A': {
        mrb_value val = GETARG();
        double fval;
        int i, need = 6;
        char fbuf[32];

        fval = mrb_float(mrb_Float(mrb, val));
        if (isnan(fval) || isinf(fval)) {
          const char *expr;
          const int elen = 3;

          if (isnan(fval)) {
            expr = "NaN";
          }
          else {
            expr = "Inf";
          }
          need = elen;
          if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS))
            need++;
          if ((flags & FWIDTH) && need < width)
            need = width;

          CHECK(need + 1);
          n = snprintf(&buf[blen], need + 1, "%*s", need, "");
          if (flags & FMINUS) {
            if (!isnan(fval) && fval < 0.0)
              buf[blen++] = '-';
            else if (flags & FPLUS)
              buf[blen++] = '+';
            else if (flags & FSPACE)
              blen++;
            memcpy(&buf[blen], expr, elen);
          }
          else {
            if (!isnan(fval) && fval < 0.0)
              buf[blen + need - elen - 1] = '-';
            else if (flags & FPLUS)
              buf[blen + need - elen - 1] = '+';
            else if ((flags & FSPACE) && need > width)
              blen++;
            memcpy(&buf[blen + need - elen], expr, elen);
          }
          blen += strlen(&buf[blen]);
          break;
        }

        fmt_setup(fbuf, sizeof(fbuf), *p, flags, width, prec);
        need = 0;
        if (*p != 'e' && *p != 'E') {
          i = INT_MIN;
          frexp(fval, &i);
          if (i > 0)
            need = BIT_DIGITS(i);
        }
        need += (flags&FPREC) ? prec : 6;
        if ((flags&FWIDTH) && need < width)
          need = width;
        need += 20;

        CHECK(need);
        n = snprintf(&buf[blen], need, fbuf, fval);
        blen += n;
      }
      break;
    }
    flags = FNONE;
  }

  sprint_exit:
#if 0
  /* XXX - We cannot validate the number of arguments if (digit)$ style used.
   */
  if (posarg >= 0 && nextarg < argc) {
    const char *mesg = "too many arguments for format string";
    if (mrb_test(ruby_debug)) mrb_raise(mrb, E_ARGUMENT_ERROR, mesg);
    if (mrb_test(ruby_verbose)) mrb_warn("%s", mesg);
  }
#endif
  mrb_str_resize(mrb, result, blen);

  return result;
}
Exemple #2
0
mrb_value
mrb_str_format(mrb_state *mrb, mrb_int argc, const mrb_value *argv, mrb_value fmt)
{
  const char *p, *end;
  char *buf;
  mrb_int blen;
  mrb_int bsiz;
  mrb_value result;
  mrb_int n;
  mrb_int width;
  mrb_int prec;
  int nextarg = 1;
  int posarg = 0;
  mrb_value nextvalue;
  mrb_value str;
  mrb_value hash = mrb_undef_value();

#define CHECK_FOR_WIDTH(f)                                                  \
  if ((f) & FWIDTH) {                                                       \
    mrb_raise(mrb, E_ARGUMENT_ERROR, "width given twice");         \
  }                                                                         \
  if ((f) & FPREC0) {                                                       \
    mrb_raise(mrb, E_ARGUMENT_ERROR, "width after precision");     \
  }
#define CHECK_FOR_FLAGS(f)                                                  \
  if ((f) & FWIDTH) {                                                       \
    mrb_raise(mrb, E_ARGUMENT_ERROR, "flag after width");          \
  }                                                                         \
  if ((f) & FPREC0) {                                                       \
    mrb_raise(mrb, E_ARGUMENT_ERROR, "flag after precision");      \
  }

  ++argc;
  --argv;
  mrb_to_str(mrb, fmt);
  p = RSTRING_PTR(fmt);
  end = p + RSTRING_LEN(fmt);
  blen = 0;
  bsiz = 120;
  result = mrb_str_new_capa(mrb, bsiz);
  buf = RSTRING_PTR(result);
  memset(buf, 0, bsiz);

  for (; p < end; p++) {
    const char *t;
    mrb_sym id = 0;
    int flags = FNONE;

    for (t = p; t < end && *t != '%'; t++) ;
    if (t + 1 == end) ++t;
    PUSH(p, t - p);
    if (t >= end)
      goto sprint_exit; /* end of fmt string */

    p = t + 1;    /* skip '%' */

    width = prec = -1;
    nextvalue = mrb_undef_value();

retry:
    switch (*p) {
      default:
        mrb_raisef(mrb, E_ARGUMENT_ERROR, "malformed format string - \\%%S", mrb_str_new(mrb, p, 1));
        break;

      case ' ':
        CHECK_FOR_FLAGS(flags);
        flags |= FSPACE;
        p++;
        goto retry;

      case '#':
        CHECK_FOR_FLAGS(flags);
        flags |= FSHARP;
        p++;
        goto retry;

      case '+':
        CHECK_FOR_FLAGS(flags);
        flags |= FPLUS;
        p++;
        goto retry;

      case '-':
        CHECK_FOR_FLAGS(flags);
        flags |= FMINUS;
        p++;
        goto retry;

      case '0':
        CHECK_FOR_FLAGS(flags);
        flags |= FZERO;
        p++;
        goto retry;

      case '1': case '2': case '3': case '4':
      case '5': case '6': case '7': case '8': case '9':
        n = 0;
        GETNUM(n, width);
        if (*p == '$') {
          if (!mrb_undef_p(nextvalue)) {
            mrb_raisef(mrb, E_ARGUMENT_ERROR, "value given twice - %S$", mrb_fixnum_value(n));
          }
          nextvalue = GETPOSARG(n);
          p++;
          goto retry;
        }
        CHECK_FOR_WIDTH(flags);
        width = n;
        flags |= FWIDTH;
        goto retry;

      case '<':
      case '{': {
        const char *start = p;
        char term = (*p == '<') ? '>' : '}';
        mrb_value symname;

        for (; p < end && *p != term; )
          p++;
        if (id) {
          mrb_raisef(mrb, E_ARGUMENT_ERROR, "name%S after <%S>",
                     mrb_str_new(mrb, start, p - start + 1), mrb_sym2str(mrb, id));
        }
        symname = mrb_str_new(mrb, start + 1, p - start - 1);
        id = mrb_intern_str(mrb, symname);
        nextvalue = GETNAMEARG(mrb_symbol_value(id), start, (mrb_int)(p - start + 1));
        if (mrb_undef_p(nextvalue)) {
          mrb_raisef(mrb, E_KEY_ERROR, "key%S not found", mrb_str_new(mrb, start, p - start + 1));
        }
        if (term == '}') goto format_s;
        p++;
        goto retry;
      }

      case '*':
        CHECK_FOR_WIDTH(flags);
        flags |= FWIDTH;
        GETASTER(width);
        if (width < 0) {
          flags |= FMINUS;
          width = -width;
        }
        p++;
        goto retry;

      case '.':
        if (flags & FPREC0) {
          mrb_raise(mrb, E_ARGUMENT_ERROR, "precision given twice");
        }
        flags |= FPREC|FPREC0;

        prec = 0;
        p++;
        if (*p == '*') {
          GETASTER(prec);
          if (prec < 0) {  /* ignore negative precision */
            flags &= ~FPREC;
          }
          p++;
          goto retry;
        }

        GETNUM(prec, precision);
        goto retry;

      case '\n':
      case '\0':
        p--;
        /* fallthrough */
      case '%':
        if (flags != FNONE) {
          mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid format character - %");
        }
        PUSH("%", 1);
        break;

      case 'c': {
        mrb_value val = GETARG();
        mrb_value tmp;
        char *c;

        tmp = mrb_check_string_type(mrb, val);
        if (!mrb_nil_p(tmp)) {
          if (RSTRING_LEN(tmp) != 1) {
            mrb_raise(mrb, E_ARGUMENT_ERROR, "%c requires a character");
          }
        }
        else if (mrb_fixnum_p(val)) {
          mrb_int n = mrb_fixnum(val);

          if (n < 0x80) {
            char buf[1];

            buf[0] = (char)n;
            tmp = mrb_str_new(mrb, buf, 1);
          }
          else {
            tmp = mrb_funcall(mrb, val, "chr", 0);
            mrb_check_type(mrb, tmp, MRB_TT_STRING);
          }
        }
        else {
          mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid character");
        }
        c = RSTRING_PTR(tmp);
        n = RSTRING_LEN(tmp);
        if (!(flags & FWIDTH)) {
          PUSH(c, n);
        }
        else if ((flags & FMINUS)) {
          PUSH(c, n);
          if (width>0) FILL(' ', width-1);
        }
        else {
          if (width>0) FILL(' ', width-1);
          PUSH(c, n);
        }
      }
      break;

      case 's':
      case 'p':
  format_s:
      {
        mrb_value arg = GETARG();
        mrb_int len;
        mrb_int slen;

        if (*p == 'p') arg = mrb_inspect(mrb, arg);
        str = mrb_obj_as_string(mrb, arg);
        len = RSTRING_LEN(str);
        if (RSTRING(result)->flags & MRB_STR_EMBED) {
          mrb_int tmp_n = len;
          RSTRING(result)->flags &= ~MRB_STR_EMBED_LEN_MASK;
          RSTRING(result)->flags |= tmp_n << MRB_STR_EMBED_LEN_SHIFT;
        }
        else {
          RSTRING(result)->as.heap.len = blen;
        }
        if (flags&(FPREC|FWIDTH)) {
          slen = RSTRING_LEN(str);
          if (slen < 0) {
            mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid mbstring sequence");
          }
          if ((flags&FPREC) && (prec < slen)) {
            char *p = RSTRING_PTR(str) + prec;
            slen = prec;
            len = (mrb_int)(p - RSTRING_PTR(str));
          }
          /* need to adjust multi-byte string pos */
          if ((flags&FWIDTH) && (width > slen)) {
            width -= (int)slen;
            if (!(flags&FMINUS)) {
              FILL(' ', width);
            }
            PUSH(RSTRING_PTR(str), len);
            if (flags&FMINUS) {
              FILL(' ', width);
            }
            break;
          }
        }
        PUSH(RSTRING_PTR(str), len);
      }
      break;

      case 'd':
      case 'i':
      case 'o':
      case 'x':
      case 'X':
      case 'b':
      case 'B':
      case 'u': {
        mrb_value val = GETARG();
        char nbuf[68], *s;
        const char *prefix = NULL;
        int sign = 0, dots = 0;
        char sc = 0;
        mrb_int v = 0;
        int base;
        mrb_int len;

        if (flags & FSHARP) {
          switch (*p) {
            case 'o': prefix = "0"; break;
            case 'x': prefix = "0x"; break;
            case 'X': prefix = "0X"; break;
            case 'b': prefix = "0b"; break;
            case 'B': prefix = "0B"; break;
            default: break;
          }
        }

  bin_retry:
        switch (mrb_type(val)) {
#ifndef MRB_WITHOUT_FLOAT
          case MRB_TT_FLOAT:
            val = mrb_flo_to_fixnum(mrb, val);
            if (mrb_fixnum_p(val)) goto bin_retry;
            break;
#endif
          case MRB_TT_STRING:
            val = mrb_str_to_inum(mrb, val, 0, TRUE);
            goto bin_retry;
          case MRB_TT_FIXNUM:
            v = mrb_fixnum(val);
            break;
          default:
            val = mrb_Integer(mrb, val);
            goto bin_retry;
        }

        switch (*p) {
          case 'o':
            base = 8; break;
          case 'x':
          case 'X':
            base = 16; break;
          case 'b':
          case 'B':
            base = 2; break;
          case 'u':
          case 'd':
          case 'i':
            sign = 1;
          default:
            base = 10; break;
        }

        if (sign) {
          if (v >= 0) {
            if (flags & FPLUS) {
              sc = '+';
              width--;
            }
            else if (flags & FSPACE) {
              sc = ' ';
              width--;
            }
          }
          else {
            sc = '-';
            width--;
          }
          mrb_assert(base == 10);
          snprintf(nbuf, sizeof(nbuf), "%" MRB_PRId, v);
          s = nbuf;
          if (v < 0) s++;       /* skip minus sign */
        }
        else {
          s = nbuf;
          if (v < 0) {
            dots = 1;
          }
          switch (base) {
          case 2:
            if (v < 0) {
              val = mrb_fix2binstr(mrb, mrb_fixnum_value(v), base);
            }
            else {
              val = mrb_fixnum_to_str(mrb, mrb_fixnum_value(v), base);
            }
            strncpy(++s, RSTRING_PTR(val), sizeof(nbuf)-1);
            break;
          case 8:
            snprintf(++s, sizeof(nbuf)-1, "%" MRB_PRIo, v);
            break;
          case 16:
            snprintf(++s, sizeof(nbuf)-1, "%" MRB_PRIx, v);
            break;
          }
          if (v < 0) {
            char d;

            s = remove_sign_bits(s, base);
            switch (base) {
              case 16: d = 'f'; break;
              case 8:  d = '7'; break;
              case 2:  d = '1'; break;
              default: d = 0; break;
            }

            if (d && *s != d) {
              *--s = d;
            }
          }
        }
        {
          size_t size;
          size = strlen(s);
          /* PARANOID: assert(size <= MRB_INT_MAX) */
          len = (mrb_int)size;
        }

        if (*p == 'X') {
          char *pp = s;
          int c;
          while ((c = (int)(unsigned char)*pp) != 0) {
            *pp = toupper(c);
            pp++;
          }
        }

        if (prefix && !prefix[1]) { /* octal */
          if (dots) {
            prefix = NULL;
          }
          else if (len == 1 && *s == '0') {
            len = 0;
            if (flags & FPREC) prec--;
          }
          else if ((flags & FPREC) && (prec > len)) {
            prefix = NULL;
          }
        }
        else if (len == 1 && *s == '0') {
          prefix = NULL;
        }

        if (prefix) {
          size_t size;
          size = strlen(prefix);
          /* PARANOID: assert(size <= MRB_INT_MAX).
           *  this check is absolutely paranoid. */
          width -= (mrb_int)size;
        }

        if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) {
          prec = width;
          width = 0;
        }
        else {
          if (prec < len) {
            if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0;
            prec = len;
          }
          width -= prec;
        }

        if (!(flags&FMINUS) && width > 0) {
          FILL(' ', width);
          width = 0;
        }

        if (sc) PUSH(&sc, 1);

        if (prefix) {
          int plen = (int)strlen(prefix);
          PUSH(prefix, plen);
        }
        if (dots) {
          prec -= 2;
          width -= 2;
          PUSH("..", 2);
        }

        if (prec > len) {
          CHECK(prec - len);
          if ((flags & (FMINUS|FPREC)) != FMINUS) {
            char c = '0';
            FILL(c, prec - len);
          } else if (v < 0) {
            char c = sign_bits(base, p);
            FILL(c, prec - len);
          }
        }
        PUSH(s, len);
        if (width > 0) {
          FILL(' ', width);
        }
      }
      break;

#ifndef MRB_WITHOUT_FLOAT
      case 'f':
      case 'g':
      case 'G':
      case 'e':
      case 'E':
      case 'a':
      case 'A': {
        mrb_value val = GETARG();
        double fval;
        mrb_int i;
        mrb_int need = 6;
        char fbuf[32];
        int frexp_result;

        fval = mrb_float(mrb_Float(mrb, val));
        if (!isfinite(fval)) {
          const char *expr;
          const mrb_int elen = 3;
          char sign = '\0';

          if (isnan(fval)) {
            expr = "NaN";
          }
          else {
            expr = "Inf";
          }
          need = elen;
          if (!isnan(fval) && fval < 0.0)
            sign = '-';
          else if (flags & (FPLUS|FSPACE))
            sign = (flags & FPLUS) ? '+' : ' ';
          if (sign)
            ++need;
          if ((flags & FWIDTH) && need < width)
            need = width;

          if (need < 0) {
            mrb_raise(mrb, E_ARGUMENT_ERROR, "width too big");
          }
          FILL(' ', need);
          if (flags & FMINUS) {
            if (sign)
              buf[blen - need--] = sign;
            memcpy(&buf[blen - need], expr, elen);
          }
          else {
            if (sign)
              buf[blen - elen - 1] = sign;
            memcpy(&buf[blen - elen], expr, elen);
          }
          break;
        }

        fmt_setup(fbuf, sizeof(fbuf), *p, flags, width, prec);
        need = 0;
        if (*p != 'e' && *p != 'E') {
          i = INT_MIN;
          frexp(fval, &frexp_result);
          i = (mrb_int)frexp_result;
          if (i > 0)
            need = BIT_DIGITS(i);
        }
        if (need > MRB_INT_MAX - ((flags&FPREC) ? prec : 6)) {
        too_big_width:
          mrb_raise(mrb, E_ARGUMENT_ERROR,
                    (width > prec ? "width too big" : "prec too big"));
        }
        need += (flags&FPREC) ? prec : 6;
        if ((flags&FWIDTH) && need < width)
          need = width;
        if (need > MRB_INT_MAX - 20) {
          goto too_big_width;
        }
        need += 20;

        CHECK(need);
        n = snprintf(&buf[blen], need, fbuf, fval);
        if (n < 0 || n >= need) {
          mrb_raise(mrb, E_RUNTIME_ERROR, "formatting error");
        }
        blen += n;
      }
      break;
#endif
    }
    flags = FNONE;
  }

  sprint_exit:
#if 0
  /* XXX - We cannot validate the number of arguments if (digit)$ style used.
   */
  if (posarg >= 0 && nextarg < argc) {
    const char *mesg = "too many arguments for format string";
    if (mrb_test(ruby_debug)) mrb_raise(mrb, E_ARGUMENT_ERROR, mesg);
    if (mrb_test(ruby_verbose)) mrb_warn(mrb, "%S", mrb_str_new_cstr(mrb, mesg));
  }
#endif
  mrb_str_resize(mrb, result, blen);

  return result;
}
Exemple #3
0
VALUE
rb_str_format(int argc, const VALUE *argv, VALUE fmt)
{
    enum {default_float_precision = 6};
    rb_encoding *enc;
    const char *p, *end;
    char *buf;
    long blen, bsiz;
    VALUE result;

    long scanned = 0;
    int coderange = ENC_CODERANGE_7BIT;
    int width, prec, flags = FNONE;
    int nextarg = 1;
    int posarg = 0;
    int tainted = 0;
    VALUE nextvalue;
    VALUE tmp;
    VALUE str;
    volatile VALUE hash = Qundef;

#define CHECK_FOR_WIDTH(f)				 \
    if ((f) & FWIDTH) {					 \
	rb_raise(rb_eArgError, "width given twice");	 \
    }							 \
    if ((f) & FPREC0) {					 \
	rb_raise(rb_eArgError, "width after precision"); \
    }
#define CHECK_FOR_FLAGS(f)				 \
    if ((f) & FWIDTH) {					 \
	rb_raise(rb_eArgError, "flag after width");	 \
    }							 \
    if ((f) & FPREC0) {					 \
	rb_raise(rb_eArgError, "flag after precision"); \
    }

    ++argc;
    --argv;
    if (OBJ_TAINTED(fmt)) tainted = 1;
    StringValue(fmt);
    enc = rb_enc_get(fmt);
    fmt = rb_str_new4(fmt);
    p = RSTRING_PTR(fmt);
    end = p + RSTRING_LEN(fmt);
    blen = 0;
    bsiz = 120;
    result = rb_str_buf_new(bsiz);
    rb_enc_copy(result, fmt);
    buf = RSTRING_PTR(result);
    memset(buf, 0, bsiz);
    ENC_CODERANGE_SET(result, coderange);

    for (; p < end; p++) {
	const char *t;
	int n;
	VALUE sym = Qnil;

	for (t = p; t < end && *t != '%'; t++) ;
	PUSH(p, t - p);
	if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) {
	    scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &coderange);
	    ENC_CODERANGE_SET(result, coderange);
	}
	if (t >= end) {
	    /* end of fmt string */
	    goto sprint_exit;
	}
	p = t + 1;		/* skip `%' */

	width = prec = -1;
	nextvalue = Qundef;
      retry:
	switch (*p) {
	  default:
	    if (rb_enc_isprint(*p, enc))
		rb_raise(rb_eArgError, "malformed format string - %%%c", *p);
	    else
		rb_raise(rb_eArgError, "malformed format string");
	    break;

	  case ' ':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FSPACE;
	    p++;
	    goto retry;

	  case '#':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FSHARP;
	    p++;
	    goto retry;

	  case '+':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FPLUS;
	    p++;
	    goto retry;

	  case '-':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FMINUS;
	    p++;
	    goto retry;

	  case '0':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FZERO;
	    p++;
	    goto retry;

	  case '1': case '2': case '3': case '4':
	  case '5': case '6': case '7': case '8': case '9':
	    n = 0;
	    GETNUM(n, width);
	    if (*p == '$') {
		if (nextvalue != Qundef) {
		    rb_raise(rb_eArgError, "value given twice - %d$", n);
		}
		nextvalue = GETPOSARG(n);
		p++;
		goto retry;
	    }
	    CHECK_FOR_WIDTH(flags);
	    width = n;
	    flags |= FWIDTH;
	    goto retry;

	  case '<':
	  case '{':
	    {
		const char *start = p;
		char term = (*p == '<') ? '>' : '}';
		int len;

		for (; p < end && *p != term; ) {
		    p += rb_enc_mbclen(p, end, enc);
		}
		if (p >= end) {
		    rb_raise(rb_eArgError, "malformed name - unmatched parenthesis");
		}
#if SIZEOF_INT < SIZEOF_SIZE_T
		if ((size_t)(p - start) >= INT_MAX) {
		    const int message_limit = 20;
		    len = (int)(rb_enc_right_char_head(start, start + message_limit, p, enc) - start);
		    rb_enc_raise(enc, rb_eArgError,
				 "too long name (%"PRIdSIZE" bytes) - %.*s...%c",
				 (size_t)(p - start - 2), len, start, term);
		}
#endif
		len = (int)(p - start + 1); /* including parenthesis */
		if (sym != Qnil) {
		    rb_enc_raise(enc, rb_eArgError, "named%.*s after <%"PRIsVALUE">",
				 len, start, rb_sym2str(sym));
		}
		CHECKNAMEARG(start, len, enc);
		get_hash(&hash, argc, argv);
		sym = rb_check_symbol_cstr(start + 1,
					   len - 2 /* without parenthesis */,
					   enc);
		if (sym != Qnil) nextvalue = rb_hash_lookup2(hash, sym, Qundef);
		if (nextvalue == Qundef) {
		    rb_enc_raise(enc, rb_eKeyError, "key%.*s not found", len, start);
		}
		if (term == '}') goto format_s;
		p++;
		goto retry;
	    }

	  case '*':
	    CHECK_FOR_WIDTH(flags);
	    flags |= FWIDTH;
	    GETASTER(width);
	    if (width < 0) {
		flags |= FMINUS;
		width = -width;
	    }
	    p++;
	    goto retry;

	  case '.':
	    if (flags & FPREC0) {
		rb_raise(rb_eArgError, "precision given twice");
	    }
	    flags |= FPREC|FPREC0;

	    prec = 0;
	    p++;
	    if (*p == '*') {
		GETASTER(prec);
		if (prec < 0) {	/* ignore negative precision */
		    flags &= ~FPREC;
		}
		p++;
		goto retry;
	    }

	    GETNUM(prec, precision);
	    goto retry;

	  case '\n':
	  case '\0':
	    p--;
	  case '%':
	    if (flags != FNONE) {
		rb_raise(rb_eArgError, "invalid format character - %%");
	    }
	    PUSH("%", 1);
	    break;

	  case 'c':
	    {
		VALUE val = GETARG();
		VALUE tmp;
		unsigned int c;
		int n;

		tmp = rb_check_string_type(val);
		if (!NIL_P(tmp)) {
		    if (rb_enc_strlen(RSTRING_PTR(tmp),RSTRING_END(tmp),enc) != 1) {
			rb_raise(rb_eArgError, "%%c requires a character");
		    }
		    c = rb_enc_codepoint_len(RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc);
		    RB_GC_GUARD(tmp);
		}
		else {
		    c = NUM2INT(val);
		    n = rb_enc_codelen(c, enc);
		}
		if (n <= 0) {
		    rb_raise(rb_eArgError, "invalid character");
		}
		if (!(flags & FWIDTH)) {
		    CHECK(n);
		    rb_enc_mbcput(c, &buf[blen], enc);
		    blen += n;
		}
		else if ((flags & FMINUS)) {
		    CHECK(n);
		    rb_enc_mbcput(c, &buf[blen], enc);
		    blen += n;
		    FILL(' ', width-1);
		}
		else {
		    FILL(' ', width-1);
		    CHECK(n);
		    rb_enc_mbcput(c, &buf[blen], enc);
		    blen += n;
		}
	    }
	    break;

	  case 's':
	  case 'p':
	  format_s:
	    {
		VALUE arg = GETARG();
		long len, slen;

		if (*p == 'p') arg = rb_inspect(arg);
		str = rb_obj_as_string(arg);
		if (OBJ_TAINTED(str)) tainted = 1;
		len = RSTRING_LEN(str);
		rb_str_set_len(result, blen);
		if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) {
		    int cr = coderange;
		    scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &cr);
		    ENC_CODERANGE_SET(result,
				      (cr == ENC_CODERANGE_UNKNOWN ?
				       ENC_CODERANGE_BROKEN : (coderange = cr)));
		}
		enc = rb_enc_check(result, str);
		if (flags&(FPREC|FWIDTH)) {
		    slen = rb_enc_strlen(RSTRING_PTR(str),RSTRING_END(str),enc);
		    if (slen < 0) {
			rb_raise(rb_eArgError, "invalid mbstring sequence");
		    }
		    if ((flags&FPREC) && (prec < slen)) {
			char *p = rb_enc_nth(RSTRING_PTR(str), RSTRING_END(str),
					     prec, enc);
			slen = prec;
			len = p - RSTRING_PTR(str);
		    }
		    /* need to adjust multi-byte string pos */
		    if ((flags&FWIDTH) && (width > slen)) {
			width -= (int)slen;
			if (!(flags&FMINUS)) {
			    CHECK(width);
			    while (width--) {
				buf[blen++] = ' ';
			    }
			}
			CHECK(len);
			memcpy(&buf[blen], RSTRING_PTR(str), len);
			RB_GC_GUARD(str);
			blen += len;
			if (flags&FMINUS) {
			    CHECK(width);
			    while (width--) {
				buf[blen++] = ' ';
			    }
			}
			rb_enc_associate(result, enc);
			break;
		    }
		}
		PUSH(RSTRING_PTR(str), len);
		RB_GC_GUARD(str);
		rb_enc_associate(result, enc);
	    }
	    break;

	  case 'd':
	  case 'i':
	  case 'o':
	  case 'x':
	  case 'X':
	  case 'b':
	  case 'B':
	  case 'u':
	    {
		volatile VALUE val = GETARG();
                int valsign;
		char nbuf[64], *s;
		const char *prefix = 0;
		int sign = 0, dots = 0;
		char sc = 0;
		long v = 0;
		int base, bignum = 0;
		int len;

		switch (*p) {
		  case 'd':
		  case 'i':
		  case 'u':
		    sign = 1; break;
		  case 'o':
		  case 'x':
		  case 'X':
		  case 'b':
		  case 'B':
		    if (flags&(FPLUS|FSPACE)) sign = 1;
		    break;
		}
		if (flags & FSHARP) {
		    switch (*p) {
		      case 'o':
			prefix = "0"; break;
		      case 'x':
			prefix = "0x"; break;
		      case 'X':
			prefix = "0X"; break;
		      case 'b':
			prefix = "0b"; break;
		      case 'B':
			prefix = "0B"; break;
		    }
		}

	      bin_retry:
		switch (TYPE(val)) {
		  case T_FLOAT:
		    if (FIXABLE(RFLOAT_VALUE(val))) {
			val = LONG2FIX((long)RFLOAT_VALUE(val));
			goto bin_retry;
		    }
		    val = rb_dbl2big(RFLOAT_VALUE(val));
		    if (FIXNUM_P(val)) goto bin_retry;
		    bignum = 1;
		    break;
		  case T_STRING:
		    val = rb_str_to_inum(val, 0, TRUE);
		    goto bin_retry;
		  case T_BIGNUM:
		    bignum = 1;
		    break;
		  case T_FIXNUM:
		    v = FIX2LONG(val);
		    break;
		  default:
		    val = rb_Integer(val);
		    goto bin_retry;
		}

		switch (*p) {
		  case 'o':
		    base = 8; break;
		  case 'x':
		  case 'X':
		    base = 16; break;
		  case 'b':
		  case 'B':
		    base = 2; break;
		  case 'u':
		  case 'd':
		  case 'i':
		  default:
		    base = 10; break;
		}

                if (base != 10) {
                    int numbits = ffs(base)-1;
                    size_t abs_nlz_bits;
                    size_t numdigits = rb_absint_numwords(val, numbits, &abs_nlz_bits);
                    long i;
                    if (INT_MAX-1 < numdigits) /* INT_MAX is used because rb_long2int is used later. */
                        rb_raise(rb_eArgError, "size too big");
                    if (sign) {
                        if (numdigits == 0)
                            numdigits = 1;
                        tmp = rb_str_new(NULL, numdigits);
                        valsign = rb_integer_pack(val, RSTRING_PTR(tmp), RSTRING_LEN(tmp),
                                1, CHAR_BIT-numbits, INTEGER_PACK_BIG_ENDIAN);
                        for (i = 0; i < RSTRING_LEN(tmp); i++)
                            RSTRING_PTR(tmp)[i] = ruby_digitmap[((unsigned char *)RSTRING_PTR(tmp))[i]];
                        s = RSTRING_PTR(tmp);
                        if (valsign < 0) {
                            sc = '-';
                            width--;
                        }
                        else if (flags & FPLUS) {
                            sc = '+';
                            width--;
                        }
                        else if (flags & FSPACE) {
                            sc = ' ';
                            width--;
                        }
                    }
                    else {
                        /* Following conditional "numdigits++" guarantees the
                         * most significant digit as
                         * - '1'(bin), '7'(oct) or 'f'(hex) for negative numbers
                         * - '0' for zero
                         * - not '0' for positive numbers.
                         *
                         * It also guarantees the most significant two
                         * digits will not be '11'(bin), '77'(oct), 'ff'(hex)
                         * or '00'.  */
                        if (numdigits == 0 ||
                                ((abs_nlz_bits != (size_t)(numbits-1) ||
                                  !rb_absint_singlebit_p(val)) &&
                                 (!bignum ? v < 0 : BIGNUM_NEGATIVE_P(val))))
                            numdigits++;
                        tmp = rb_str_new(NULL, numdigits);
                        valsign = rb_integer_pack(val, RSTRING_PTR(tmp), RSTRING_LEN(tmp),
                                1, CHAR_BIT-numbits, INTEGER_PACK_2COMP | INTEGER_PACK_BIG_ENDIAN);
                        for (i = 0; i < RSTRING_LEN(tmp); i++)
                            RSTRING_PTR(tmp)[i] = ruby_digitmap[((unsigned char *)RSTRING_PTR(tmp))[i]];
                        s = RSTRING_PTR(tmp);
                        dots = valsign < 0;
                    }
                    len = rb_long2int(RSTRING_END(tmp) - s);
                }
                else if (!bignum) {
                    valsign = 1;
                    if (v < 0) {
                        v = -v;
                        sc = '-';
                        width--;
                        valsign = -1;
                    }
                    else if (flags & FPLUS) {
                        sc = '+';
                        width--;
                    }
                    else if (flags & FSPACE) {
                        sc = ' ';
                        width--;
                    }
                    snprintf(nbuf, sizeof(nbuf), "%ld", v);
                    s = nbuf;
		    len = (int)strlen(s);
		}
		else {
                    tmp = rb_big2str(val, 10);
                    s = RSTRING_PTR(tmp);
                    valsign = 1;
                    if (s[0] == '-') {
                        s++;
                        sc = '-';
                        width--;
                        valsign = -1;
                    }
                    else if (flags & FPLUS) {
                        sc = '+';
                        width--;
                    }
                    else if (flags & FSPACE) {
                        sc = ' ';
                        width--;
                    }
		    len = rb_long2int(RSTRING_END(tmp) - s);
		}

		if (dots) {
		    prec -= 2;
		    width -= 2;
		}

		if (*p == 'X') {
		    char *pp = s;
		    int c;
		    while ((c = (int)(unsigned char)*pp) != 0) {
			*pp = rb_enc_toupper(c, enc);
			pp++;
		    }
		}
		if (prefix && !prefix[1]) { /* octal */
		    if (dots) {
			prefix = 0;
		    }
		    else if (len == 1 && *s == '0') {
			len = 0;
			if (flags & FPREC) prec--;
		    }
		    else if ((flags & FPREC) && (prec > len)) {
			prefix = 0;
		    }
		}
		else if (len == 1 && *s == '0') {
		    prefix = 0;
		}
		if (prefix) {
		    width -= (int)strlen(prefix);
		}
		if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) {
		    prec = width;
		    width = 0;
		}
		else {
		    if (prec < len) {
			if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0;
			prec = len;
		    }
		    width -= prec;
		}
		if (!(flags&FMINUS)) {
		    CHECK(width);
		    while (width-- > 0) {
			buf[blen++] = ' ';
		    }
		}
		if (sc) PUSH(&sc, 1);
		if (prefix) {
		    int plen = (int)strlen(prefix);
		    PUSH(prefix, plen);
		}
		CHECK(prec - len);
		if (dots) PUSH("..", 2);
		if (!sign && valsign < 0) {
		    char c = sign_bits(base, p);
		    while (len < prec--) {
			buf[blen++] = c;
		    }
		}
		else if ((flags & (FMINUS|FPREC)) != FMINUS) {
		    while (len < prec--) {
			buf[blen++] = '0';
		    }
		}
		PUSH(s, len);
		RB_GC_GUARD(tmp);
		CHECK(width);
		while (width-- > 0) {
		    buf[blen++] = ' ';
		}
	    }
	    break;

	  case 'f':
	    {
		VALUE val = GETARG(), num, den;
		int sign = (flags&FPLUS) ? 1 : 0, zero = 0;
		long len, done = 0;
		int prefix = 0;
		if (!RB_TYPE_P(val, T_RATIONAL)) {
		    nextvalue = val;
		    goto float_value;
		}
		if (!(flags&FPREC)) prec = default_float_precision;
		den = rb_rational_den(val);
		num = rb_rational_num(val);
		if (FIXNUM_P(num)) {
		    if ((SIGNED_VALUE)num < 0) {
			long n = -FIX2LONG(num);
			num = LONG2FIX(n);
			sign = -1;
		    }
		}
		else if (rb_num_negative_p(num)) {
		    sign = -1;
		    num = rb_funcallv(num, idUMinus, 0, 0);
		}
		if (den != INT2FIX(1) || prec > 1) {
		    const ID idDiv = rb_intern("div");
		    VALUE p10 = rb_int_positive_pow(10, prec);
		    VALUE den_2 = rb_funcall(den, idDiv, 1, INT2FIX(2));
		    num = rb_funcallv(num, '*', 1, &p10);
		    num = rb_funcallv(num, '+', 1, &den_2);
		    num = rb_funcallv(num, idDiv, 1, &den);
		}
		else if (prec >= 0) {
		    zero = prec;
		}
		val = rb_obj_as_string(num);
		len = RSTRING_LEN(val) + zero;
		if (prec >= len) ++len; /* integer part 0 */
		if (sign || (flags&FSPACE)) ++len;
		if (prec > 0) ++len; /* period */
		CHECK(len > width ? len : width);
		if (sign || (flags&FSPACE)) {
		    buf[blen++] = sign > 0 ? '+' : sign < 0 ? '-' : ' ';
		    prefix++;
		    done++;
		}
		len = RSTRING_LEN(val) + zero;
		t = RSTRING_PTR(val);
		if (len > prec) {
		    memcpy(&buf[blen], t, len - prec);
		    blen += len - prec;
		    done += len - prec;
		}
		else {
		    buf[blen++] = '0';
		    done++;
		}
		if (prec > 0) {
		    buf[blen++] = '.';
		    done++;
		}
		if (zero) {
		    FILL('0', zero);
		    done += zero;
		}
		else if (prec > len) {
		    FILL('0', prec - len);
		    memcpy(&buf[blen], t, len);
		    blen += len;
		    done += prec;
		}
		else if (prec > 0) {
		    memcpy(&buf[blen], t + len - prec, prec);
		    blen += prec;
		    done += prec;
		}
		if ((flags & FWIDTH) && width > done) {
		    int fill = ' ';
		    long shifting = 0;
		    if (!(flags&FMINUS)) {
			shifting = done;
			if (flags&FZERO) {
			    shifting -= prefix;
			    fill = '0';
			}
			blen -= shifting;
			memmove(&buf[blen + width - done], &buf[blen], shifting);
		    }
		    FILL(fill, width - done);
		    blen += shifting;
		}
		RB_GC_GUARD(val);
		break;
	    }
	  case 'g':
	  case 'G':
	  case 'e':
	  case 'E':
	    /* TODO: rational support */
	  case 'a':
	  case 'A':
	  float_value:
	    {
		VALUE val = GETARG();
		double fval;
		int i, need;
		char fbuf[32];

		fval = RFLOAT_VALUE(rb_Float(val));
		if (isnan(fval) || isinf(fval)) {
		    const char *expr;

		    if (isnan(fval)) {
			expr = "NaN";
		    }
		    else {
			expr = "Inf";
		    }
		    need = (int)strlen(expr);
		    if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS))
			need++;
		    if ((flags & FWIDTH) && need < width)
			need = width;

		    CHECK(need + 1);
		    snprintf(&buf[blen], need + 1, "%*s", need, "");
		    if (flags & FMINUS) {
			if (!isnan(fval) && fval < 0.0)
			    buf[blen++] = '-';
			else if (flags & FPLUS)
			    buf[blen++] = '+';
			else if (flags & FSPACE)
			    blen++;
			memcpy(&buf[blen], expr, strlen(expr));
		    }
		    else {
			if (!isnan(fval) && fval < 0.0)
			    buf[blen + need - strlen(expr) - 1] = '-';
			else if (flags & FPLUS)
			    buf[blen + need - strlen(expr) - 1] = '+';
			else if ((flags & FSPACE) && need > width)
			    blen++;
			memcpy(&buf[blen + need - strlen(expr)], expr,
			       strlen(expr));
		    }
		    blen += strlen(&buf[blen]);
		    break;
		}

		fmt_setup(fbuf, sizeof(fbuf), *p, flags, width, prec);
		need = 0;
		if (*p != 'e' && *p != 'E') {
		    i = INT_MIN;
		    frexp(fval, &i);
		    if (i > 0)
			need = BIT_DIGITS(i);
		}
		need += (flags&FPREC) ? prec : default_float_precision;
		if ((flags&FWIDTH) && need < width)
		    need = width;
		need += 20;

		CHECK(need);
		snprintf(&buf[blen], need, fbuf, fval);
		blen += strlen(&buf[blen]);
	    }
	    break;
	}
	flags = FNONE;
    }

  sprint_exit:
    RB_GC_GUARD(fmt);
    /* XXX - We cannot validate the number of arguments if (digit)$ style used.
     */
    if (posarg >= 0 && nextarg < argc) {
	const char *mesg = "too many arguments for format string";
	if (RTEST(ruby_debug)) rb_raise(rb_eArgError, "%s", mesg);
	if (RTEST(ruby_verbose)) rb_warn("%s", mesg);
    }
    rb_str_resize(result, blen);

    if (tainted) OBJ_TAINT(result);
    return result;
}
Exemple #4
0
VALUE
rb_str_format(int argc, const VALUE *argv, VALUE fmt)
{
    rb_encoding *enc;
    const char *p, *end;
    char *buf;
    long blen, bsiz;
    VALUE result;

    long scanned = 0;
    int coderange = ENC_CODERANGE_7BIT;
    int width, prec, flags = FNONE;
    int nextarg = 1;
    int posarg = 0;
    int tainted = 0;
    VALUE nextvalue;
    VALUE tmp;
    VALUE str;
    volatile VALUE hash = Qundef;

#define CHECK_FOR_WIDTH(f)				 \
    if ((f) & FWIDTH) {					 \
	rb_raise(rb_eArgError, "width given twice");	 \
    }							 \
    if ((f) & FPREC0) {					 \
	rb_raise(rb_eArgError, "width after precision"); \
    }
#define CHECK_FOR_FLAGS(f)				 \
    if ((f) & FWIDTH) {					 \
	rb_raise(rb_eArgError, "flag after width");	 \
    }							 \
    if ((f) & FPREC0) {					 \
	rb_raise(rb_eArgError, "flag after precision"); \
    }

    ++argc;
    --argv;
    if (OBJ_TAINTED(fmt)) tainted = 1;
    StringValue(fmt);
    enc = rb_enc_get(fmt);
    fmt = rb_str_new4(fmt);
    p = RSTRING_PTR(fmt);
    end = p + RSTRING_LEN(fmt);
    blen = 0;
    bsiz = 120;
    result = rb_str_buf_new(bsiz);
    rb_enc_copy(result, fmt);
    buf = RSTRING_PTR(result);
    memset(buf, 0, bsiz);
    ENC_CODERANGE_SET(result, coderange);

    for (; p < end; p++) {
	const char *t;
	int n;
	ID id = 0;

	for (t = p; t < end && *t != '%'; t++) ;
	PUSH(p, t - p);
	if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) {
	    scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &coderange);
	    ENC_CODERANGE_SET(result, coderange);
	}
	if (t >= end) {
	    /* end of fmt string */
	    goto sprint_exit;
	}
	p = t + 1;		/* skip `%' */

	width = prec = -1;
	nextvalue = Qundef;
      retry:
	switch (*p) {
	  default:
	    if (rb_enc_isprint(*p, enc))
		rb_raise(rb_eArgError, "malformed format string - %%%c", *p);
	    else
		rb_raise(rb_eArgError, "malformed format string");
	    break;

	  case ' ':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FSPACE;
	    p++;
	    goto retry;

	  case '#':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FSHARP;
	    p++;
	    goto retry;

	  case '+':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FPLUS;
	    p++;
	    goto retry;

	  case '-':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FMINUS;
	    p++;
	    goto retry;

	  case '0':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FZERO;
	    p++;
	    goto retry;

	  case '1': case '2': case '3': case '4':
	  case '5': case '6': case '7': case '8': case '9':
	    n = 0;
	    GETNUM(n, width);
	    if (*p == '$') {
		if (nextvalue != Qundef) {
		    rb_raise(rb_eArgError, "value given twice - %d$", n);
		}
		nextvalue = GETPOSARG(n);
		p++;
		goto retry;
	    }
	    CHECK_FOR_WIDTH(flags);
	    width = n;
	    flags |= FWIDTH;
	    goto retry;

	  case '<':
	  case '{':
	    {
		const char *start = p;
		char term = (*p == '<') ? '>' : '}';
		int len;

		for (; p < end && *p != term; ) {
		    p += rb_enc_mbclen(p, end, enc);
		}
		if (p >= end) {
		    rb_raise(rb_eArgError, "malformed name - unmatched parenthesis");
		}
#if SIZEOF_INT < SIZEOF_SIZE_T
		if ((size_t)(p - start) >= INT_MAX) {
		    const int message_limit = 20;
		    len = (int)(rb_enc_right_char_head(start, start + message_limit, p, enc) - start);
		    rb_enc_raise(enc, rb_eArgError,
				 "too long name (%"PRIdSIZE" bytes) - %.*s...%c",
				 (size_t)(p - start - 2), len, start, term);
		}
#endif
		len = (int)(p - start + 1); /* including parenthesis */
		if (id) {
		    rb_enc_raise(enc, rb_eArgError, "named%.*s after <%s>",
				 len, start, rb_id2name(id));
		}
		nextvalue = GETNAMEARG((id = rb_check_id_cstr(start + 1,
							      len - 2 /* without parenthesis */,
							      enc),
					ID2SYM(id)),
				       start, len, enc);
		if (nextvalue == Qundef) {
		    rb_enc_raise(enc, rb_eKeyError, "key%.*s not found", len, start);
		}
		if (term == '}') goto format_s;
		p++;
		goto retry;
	    }

	  case '*':
	    CHECK_FOR_WIDTH(flags);
	    flags |= FWIDTH;
	    GETASTER(width);
	    if (width < 0) {
		flags |= FMINUS;
		width = -width;
	    }
	    p++;
	    goto retry;

	  case '.':
	    if (flags & FPREC0) {
		rb_raise(rb_eArgError, "precision given twice");
	    }
	    flags |= FPREC|FPREC0;

	    prec = 0;
	    p++;
	    if (*p == '*') {
		GETASTER(prec);
		if (prec < 0) {	/* ignore negative precision */
		    flags &= ~FPREC;
		}
		p++;
		goto retry;
	    }

	    GETNUM(prec, precision);
	    goto retry;

	  case '\n':
	  case '\0':
	    p--;
	  case '%':
	    if (flags != FNONE) {
		rb_raise(rb_eArgError, "invalid format character - %%");
	    }
	    PUSH("%", 1);
	    break;

	  case 'c':
	    {
		VALUE val = GETARG();
		VALUE tmp;
		unsigned int c;
		int n;

		tmp = rb_check_string_type(val);
		if (!NIL_P(tmp)) {
		    if (rb_enc_strlen(RSTRING_PTR(tmp),RSTRING_END(tmp),enc) != 1) {
			rb_raise(rb_eArgError, "%%c requires a character");
		    }
		    c = rb_enc_codepoint_len(RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc);
		    RB_GC_GUARD(tmp);
		}
		else {
		    c = NUM2INT(val);
		    n = rb_enc_codelen(c, enc);
		}
		if (n <= 0) {
		    rb_raise(rb_eArgError, "invalid character");
		}
		if (!(flags & FWIDTH)) {
		    CHECK(n);
		    rb_enc_mbcput(c, &buf[blen], enc);
		    blen += n;
		}
		else if ((flags & FMINUS)) {
		    CHECK(n);
		    rb_enc_mbcput(c, &buf[blen], enc);
		    blen += n;
		    FILL(' ', width-1);
		}
		else {
		    FILL(' ', width-1);
		    CHECK(n);
		    rb_enc_mbcput(c, &buf[blen], enc);
		    blen += n;
		}
	    }
	    break;

	  case 's':
	  case 'p':
	  format_s:
	    {
		VALUE arg = GETARG();
		long len, slen;

		if (*p == 'p') arg = rb_inspect(arg);
		str = rb_obj_as_string(arg);
		if (OBJ_TAINTED(str)) tainted = 1;
		len = RSTRING_LEN(str);
		rb_str_set_len(result, blen);
		if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) {
		    int cr = coderange;
		    scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &cr);
		    ENC_CODERANGE_SET(result,
				      (cr == ENC_CODERANGE_UNKNOWN ?
				       ENC_CODERANGE_BROKEN : (coderange = cr)));
		}
		enc = rb_enc_check(result, str);
		if (flags&(FPREC|FWIDTH)) {
		    slen = rb_enc_strlen(RSTRING_PTR(str),RSTRING_END(str),enc);
		    if (slen < 0) {
			rb_raise(rb_eArgError, "invalid mbstring sequence");
		    }
		    if ((flags&FPREC) && (prec < slen)) {
			char *p = rb_enc_nth(RSTRING_PTR(str), RSTRING_END(str),
					     prec, enc);
			slen = prec;
			len = p - RSTRING_PTR(str);
		    }
		    /* need to adjust multi-byte string pos */
		    if ((flags&FWIDTH) && (width > slen)) {
			width -= (int)slen;
			if (!(flags&FMINUS)) {
			    CHECK(width);
			    while (width--) {
				buf[blen++] = ' ';
			    }
			}
			CHECK(len);
			memcpy(&buf[blen], RSTRING_PTR(str), len);
			RB_GC_GUARD(str);
			blen += len;
			if (flags&FMINUS) {
			    CHECK(width);
			    while (width--) {
				buf[blen++] = ' ';
			    }
			}
			rb_enc_associate(result, enc);
			break;
		    }
		}
		PUSH(RSTRING_PTR(str), len);
		RB_GC_GUARD(str);
		rb_enc_associate(result, enc);
	    }
	    break;

	  case 'd':
	  case 'i':
	  case 'o':
	  case 'x':
	  case 'X':
	  case 'b':
	  case 'B':
	  case 'u':
	    {
		volatile VALUE val = GETARG();
		char fbuf[32], nbuf[64], *s;
		const char *prefix = 0;
		int sign = 0, dots = 0;
		char sc = 0;
		long v = 0;
		int base, bignum = 0;
		int len;

		switch (*p) {
		  case 'd':
		  case 'i':
		  case 'u':
		    sign = 1; break;
		  case 'o':
		  case 'x':
		  case 'X':
		  case 'b':
		  case 'B':
		    if (flags&(FPLUS|FSPACE)) sign = 1;
		    break;
		}
		if (flags & FSHARP) {
		    switch (*p) {
		      case 'o':
			prefix = "0"; break;
		      case 'x':
			prefix = "0x"; break;
		      case 'X':
			prefix = "0X"; break;
		      case 'b':
			prefix = "0b"; break;
		      case 'B':
			prefix = "0B"; break;
		    }
		}

	      bin_retry:
		switch (TYPE(val)) {
		  case T_FLOAT:
		    if (FIXABLE(RFLOAT_VALUE(val))) {
			val = LONG2FIX((long)RFLOAT_VALUE(val));
			goto bin_retry;
		    }
		    val = rb_dbl2big(RFLOAT_VALUE(val));
		    if (FIXNUM_P(val)) goto bin_retry;
		    bignum = 1;
		    break;
		  case T_STRING:
		    val = rb_str_to_inum(val, 0, TRUE);
		    goto bin_retry;
		  case T_BIGNUM:
		    bignum = 1;
		    break;
		  case T_FIXNUM:
		    v = FIX2LONG(val);
		    break;
		  default:
		    val = rb_Integer(val);
		    goto bin_retry;
		}

		switch (*p) {
		  case 'o':
		    base = 8; break;
		  case 'x':
		  case 'X':
		    base = 16; break;
		  case 'b':
		  case 'B':
		    base = 2; break;
		  case 'u':
		  case 'd':
		  case 'i':
		  default:
		    base = 10; break;
		}

		if (!bignum) {
		    if (base == 2) {
			val = rb_int2big(v);
			goto bin_retry;
		    }
		    if (sign) {
			char c = *p;
			if (c == 'i') c = 'd'; /* %d and %i are identical */
			if (v < 0) {
			    v = -v;
			    sc = '-';
			    width--;
			}
			else if (flags & FPLUS) {
			    sc = '+';
			    width--;
			}
			else if (flags & FSPACE) {
			    sc = ' ';
			    width--;
			}
			snprintf(fbuf, sizeof(fbuf), "%%l%c", c);
			snprintf(nbuf, sizeof(nbuf), fbuf, v);
			s = nbuf;
		    }
		    else {
			s = nbuf;
			if (v < 0) {
			    dots = 1;
			}
			snprintf(fbuf, sizeof(fbuf), "%%l%c", *p == 'X' ? 'x' : *p);
			snprintf(++s, sizeof(nbuf) - 1, fbuf, v);
			if (v < 0) {
			    char d = 0;

			    s = remove_sign_bits(s, base);
			    switch (base) {
			      case 16:
				d = 'f'; break;
			      case 8:
				d = '7'; break;
			    }
			    if (d && *s != d) {
				*--s = d;
			    }
			}
		    }
		    len = (int)strlen(s);
		}
		else {
		    if (sign) {
			tmp = rb_big2str(val, base);
			s = RSTRING_PTR(tmp);
			if (s[0] == '-') {
			    s++;
			    sc = '-';
			    width--;
			}
			else if (flags & FPLUS) {
			    sc = '+';
			    width--;
			}
			else if (flags & FSPACE) {
			    sc = ' ';
			    width--;
			}
		    }
		    else {
			if (!RBIGNUM_SIGN(val)) {
			    val = rb_big_clone(val);
			    rb_big_2comp(val);
			}
			tmp = rb_big2str0(val, base, RBIGNUM_SIGN(val));
			s = RSTRING_PTR(tmp);
			if (*s == '-') {
			    dots = 1;
			    if (base == 10) {
				rb_warning("negative number for %%u specifier");
			    }
			    s = remove_sign_bits(++s, base);
			    switch (base) {
			      case 16:
				if (s[0] != 'f') *--s = 'f'; break;
			      case 8:
				if (s[0] != '7') *--s = '7'; break;
			      case 2:
				if (s[0] != '1') *--s = '1'; break;
			    }
			}
		    }
		    len = rb_long2int(RSTRING_END(tmp) - s);
		}

		if (dots) {
		    prec -= 2;
		    width -= 2;
		}

		if (*p == 'X') {
		    char *pp = s;
		    int c;
		    while ((c = (int)(unsigned char)*pp) != 0) {
			*pp = rb_enc_toupper(c, enc);
			pp++;
		    }
		}
		if (prefix && !prefix[1]) { /* octal */
		    if (dots) {
			prefix = 0;
		    }
		    else if (len == 1 && *s == '0') {
			len = 0;
			if (flags & FPREC) prec--;
		    }
		    else if ((flags & FPREC) && (prec > len)) {
			prefix = 0;
		    }
		}
		else if (len == 1 && *s == '0') {
		    prefix = 0;
		}
		if (prefix) {
		    width -= (int)strlen(prefix);
		}
		if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) {
		    prec = width;
		    width = 0;
		}
		else {
		    if (prec < len) {
			if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0;
			prec = len;
		    }
		    width -= prec;
		}
		if (!(flags&FMINUS)) {
		    CHECK(width);
		    while (width-- > 0) {
			buf[blen++] = ' ';
		    }
		}
		if (sc) PUSH(&sc, 1);
		if (prefix) {
		    int plen = (int)strlen(prefix);
		    PUSH(prefix, plen);
		}
		CHECK(prec - len);
		if (dots) PUSH("..", 2);
		if (!bignum && v < 0) {
		    char c = sign_bits(base, p);
		    while (len < prec--) {
			buf[blen++] = c;
		    }
		}
		else if ((flags & (FMINUS|FPREC)) != FMINUS) {
		    char c;

		    if (!sign && bignum && !RBIGNUM_SIGN(val))
			c = sign_bits(base, p);
		    else
			c = '0';
		    while (len < prec--) {
			buf[blen++] = c;
		    }
		}
		PUSH(s, len);
		RB_GC_GUARD(tmp);
		CHECK(width);
		while (width-- > 0) {
		    buf[blen++] = ' ';
		}
	    }
	    break;

	  case 'f':
	  case 'g':
	  case 'G':
	  case 'e':
	  case 'E':
	  case 'a':
	  case 'A':
	    {
		VALUE val = GETARG();
		double fval;
		int i, need = 6;
		char fbuf[32];

		fval = RFLOAT_VALUE(rb_Float(val));
		if (isnan(fval) || isinf(fval)) {
		    const char *expr;

		    if (isnan(fval)) {
			expr = "NaN";
		    }
		    else {
			expr = "Inf";
		    }
		    need = (int)strlen(expr);
		    if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS))
			need++;
		    if ((flags & FWIDTH) && need < width)
			need = width;

		    CHECK(need + 1);
		    snprintf(&buf[blen], need + 1, "%*s", need, "");
		    if (flags & FMINUS) {
			if (!isnan(fval) && fval < 0.0)
			    buf[blen++] = '-';
			else if (flags & FPLUS)
			    buf[blen++] = '+';
			else if (flags & FSPACE)
			    blen++;
			memcpy(&buf[blen], expr, strlen(expr));
		    }
		    else {
			if (!isnan(fval) && fval < 0.0)
			    buf[blen + need - strlen(expr) - 1] = '-';
			else if (flags & FPLUS)
			    buf[blen + need - strlen(expr) - 1] = '+';
			else if ((flags & FSPACE) && need > width)
			    blen++;
			memcpy(&buf[blen + need - strlen(expr)], expr,
			       strlen(expr));
		    }
		    blen += strlen(&buf[blen]);
		    break;
		}

		fmt_setup(fbuf, sizeof(fbuf), *p, flags, width, prec);
		need = 0;
		if (*p != 'e' && *p != 'E') {
		    i = INT_MIN;
		    frexp(fval, &i);
		    if (i > 0)
			need = BIT_DIGITS(i);
		}
		need += (flags&FPREC) ? prec : 6;
		if ((flags&FWIDTH) && need < width)
		    need = width;
		need += 20;

		CHECK(need);
		snprintf(&buf[blen], need, fbuf, fval);
		blen += strlen(&buf[blen]);
	    }
	    break;
	}
	flags = FNONE;
    }

  sprint_exit:
    RB_GC_GUARD(fmt);
    /* XXX - We cannot validate the number of arguments if (digit)$ style used.
     */
    if (posarg >= 0 && nextarg < argc) {
	const char *mesg = "too many arguments for format string";
	if (RTEST(ruby_debug)) rb_raise(rb_eArgError, "%s", mesg);
	if (RTEST(ruby_verbose)) rb_warn("%s", mesg);
    }
    rb_str_resize(result, blen);

    if (tainted) OBJ_TAINT(result);
    return result;
}