Esempio n. 1
0
static mrb_encoding *
to_encoding(mrb_state *mrb, mrb_value enc)
{
  int idx;

  //StringValue(enc);
  mrb_string_value(mrb, &enc);

  if (!mrb_enc_asciicompat(mrb, mrb_enc_get(mrb, enc))) {
    mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid name encoding (non ASCII)");
  }
  //idx = mrb_enc_find_index(StringValueCStr(enc));
  idx = mrb_enc_find_index(mrb, mrb_string_value_cstr(mrb, &enc));
  if (idx < 0) {
    mrb_raise(mrb, E_ARGUMENT_ERROR, "unknown encoding name - %s", RSTRING_PTR(enc));
  }
  return mrb_enc_from_index(mrb, idx);
}
Esempio n. 2
0
int
mrb_to_encoding_index(mrb_state *mrb, mrb_value enc)
{
    int idx;

    idx = enc_check_encoding(mrb, enc);
    if (idx >= 0) {
    return idx;
    }
    else if (mrb_nil_p(enc = mrb_check_string_type(mrb, enc))) {
    return -1;
    }
    if (!mrb_enc_asciicompat(mrb, mrb_enc_get(mrb, enc))) {
    return -1;
    }
    //return mrb_enc_find_index(StringValueCStr(enc));
    return mrb_enc_find_index(mrb, mrb_string_value_cstr(mrb, &enc));

}
Esempio n. 3
0
mrb_value
mrb_str_format(mrb_state *mrb, int argc, const mrb_value *argv, mrb_value fmt)
{
  const char *p, *end;
  char *buf;
  long blen, bsiz;
  mrb_value result;

  int width, prec, flags = FNONE;
  int nextarg = 1;
  int posarg = 0;
  mrb_value nextvalue;
  mrb_value tmp;
  mrb_value str;
  mrb_value hash = mrb_undef_value();

#define CHECK_FOR_WIDTH(f)                                                  \
  if ((f) & FWIDTH) {                                                       \
    mrb_raise(mrb, E_ARGUMENT_ERROR, "width given twice");         \
  }                                                                         \
  if ((f) & FPREC0) {                                                       \
    mrb_raise(mrb, E_ARGUMENT_ERROR, "width after precision");     \
  }
#define CHECK_FOR_FLAGS(f)                                                  \
  if ((f) & FWIDTH) {                                                       \
    mrb_raise(mrb, E_ARGUMENT_ERROR, "flag after width");          \
  }                                                                         \
  if ((f) & FPREC0) {                                                       \
    mrb_raise(mrb, E_ARGUMENT_ERROR, "flag after precision");      \
  }

  ++argc;
  --argv;
  mrb_string_value(mrb, &fmt);
  fmt = mrb_str_new4(mrb, fmt);
  p = RSTRING_PTR(fmt);
  end = p + RSTRING_LEN(fmt);
  blen = 0;
  bsiz = 120;
  result = mrb_str_buf_new(mrb, bsiz);
  buf = RSTRING_PTR(result);
  memset(buf, 0, bsiz);

  for (; p < end; p++) {
    const char *t;
    int n;
    mrb_sym id = 0;

    for (t = p; t < end && *t != '%'; t++) ;
    PUSH(p, t - p);
    if (t >= end)
      goto sprint_exit; /* end of fmt string */

    p = t + 1;    /* skip `%' */

    width = prec = -1;
    nextvalue = mrb_undef_value();

retry:
    switch (*p) {
      default:
        mrb_raise(mrb, E_ARGUMENT_ERROR, "malformed format string - %%%c", *p);
        break;

    case ' ':
      CHECK_FOR_FLAGS(flags);
      flags |= FSPACE;
      p++;
      goto retry;

    case '#':
      CHECK_FOR_FLAGS(flags);
      flags |= FSHARP;
      p++;
      goto retry;

    case '+':
      CHECK_FOR_FLAGS(flags);
      flags |= FPLUS;
      p++;
      goto retry;

    case '-':
      CHECK_FOR_FLAGS(flags);
      flags |= FMINUS;
      p++;
      goto retry;

    case '0':
      CHECK_FOR_FLAGS(flags);
      flags |= FZERO;
      p++;
      goto retry;

    case '1': case '2': case '3': case '4':
    case '5': case '6': case '7': case '8': case '9':
      n = 0;
      GETNUM(n, width);
      if (*p == '$') {
        if (!UNDEF_P(nextvalue)) {
          mrb_raise(mrb, E_ARGUMENT_ERROR, "value given twice - %d$", n);
        }
        nextvalue = GETPOSARG(n);
        p++;
        goto retry;
      }
      CHECK_FOR_WIDTH(flags);
      width = n;
      flags |= FWIDTH;
      goto retry;

    case '<':
    case '{':
      {
        const char *start = p;
        char term = (*p == '<') ? '>' : '}';
	mrb_value symname;

        for (; p < end && *p != term; )
          p++;
        if (id) {
          mrb_raise(mrb, E_ARGUMENT_ERROR, "name%.*s after <%s>",
               (int)(p - start + 1), start, mrb_sym2name(mrb, id));
        }
        symname = mrb_str_new(mrb, start + 1, p - start - 1);
        id = mrb_intern(mrb, RSTRING_PTR(symname));
        nextvalue = GETNAMEARG(mrb_symbol_value(id), start, (int)(p - start + 1));
        if (UNDEF_P(nextvalue)) {
          mrb_raise(mrb, E_KEY_ERROR, "key%.*s not found", (int)(p - start + 1), start);
        }
        if (term == '}') goto format_s;
        p++;
        goto retry;
      }

    case '*':
      CHECK_FOR_WIDTH(flags);
      flags |= FWIDTH;
      GETASTER(width);
      if (width < 0) {
        flags |= FMINUS;
        width = -width;
      }
      p++;
      goto retry;

    case '.':
      if (flags & FPREC0) {
        mrb_raise(mrb, E_ARGUMENT_ERROR, "precision given twice");
      }
      flags |= FPREC|FPREC0;

      prec = 0;
      p++;
      if (*p == '*') {
        GETASTER(prec);
        if (prec < 0) {  /* ignore negative precision */
          flags &= ~FPREC;
        }
        p++;
        goto retry;
      }

      GETNUM(prec, precision);
      goto retry;

    case '\n':
    case '\0':
      p--;
    case '%':
      if (flags != FNONE) {
        mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid format character - %%");
      }
      PUSH("%", 1);
      break;

    case 'c':
      {
        mrb_value val = GETARG();
        mrb_value tmp;
        unsigned int c;
        int n;
#ifdef INCLUDE_ENCODING
        mrb_encoding *enc = mrb_enc_get(mrb, fmt);
#endif //INCLUDE_ENCODING

        tmp = mrb_check_string_type(mrb, val);
        if (!mrb_nil_p(tmp)) {
          if (RSTRING_LEN(tmp) != 1 ) {
            mrb_raise(mrb, E_ARGUMENT_ERROR, "%%c requires a character");
          }
#ifdef INCLUDE_ENCODING
          c = mrb_enc_codepoint_len(mrb, RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc);
#else
          c = RSTRING_PTR(tmp)[0];
          n = 1;
#endif //INCLUDE_ENCODING
        }
        else {
          c = mrb_fixnum(val);
          n = mrb_enc_codelen(mrb, c, enc);
        }
        if (n <= 0) {
          mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid character");
        }
        if (!(flags & FWIDTH)) {
          CHECK(n);
          mrb_enc_mbcput(c, &buf[blen], enc);
          blen += n;
        }
        else if ((flags & FMINUS)) {
          CHECK(n);
          mrb_enc_mbcput(c, &buf[blen], enc);
          blen += n;
          FILL(' ', width-1);
        }
        else {
          FILL(' ', width-1);
          CHECK(n);
          mrb_enc_mbcput(c, &buf[blen], enc);
          blen += n;
        }
      }
      break;

    case 's':
    case 'p':
format_s:
      {
        mrb_value arg = GETARG();
        long len, slen;
#ifdef INCLUDE_ENCODING
        mrb_encoding *enc = mrb_enc_get(mrb, fmt);
#endif //INCLUDE_ENCODING

        if (*p == 'p') arg = mrb_inspect(mrb, arg);
        str = mrb_obj_as_string(mrb, arg);
        len = RSTRING_LEN(str);
        mrb_str_set_len(mrb, result, blen);
        if (flags&(FPREC|FWIDTH)) {
          slen = RSTRING_LEN(str);
          if (slen < 0) {
            mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid mbstring sequence");
          }
          if ((flags&FPREC) && (prec < slen)) {
#ifdef INCLUDE_ENCODING
            char *p = mrb_enc_nth(mrb, RSTRING_PTR(str), RSTRING_END(str),prec, enc);
#else
            char *p = RSTRING_PTR(str) + prec;
#endif //INCLUDE_ENCODING
            slen = prec;
            len = p - RSTRING_PTR(str);
          }
          /* need to adjust multi-byte string pos */
          if ((flags&FWIDTH) && (width > slen)) {
            width -= (int)slen;
            if (!(flags&FMINUS)) {
              CHECK(width);
              while (width--) {
                buf[blen++] = ' ';
              }
            }
            CHECK(len);
            memcpy(&buf[blen], RSTRING_PTR(str), len);
            blen += len;
            if (flags&FMINUS) {
              CHECK(width);
              while (width--) {
                buf[blen++] = ' ';
              }
            }
            mrb_enc_associate(mrb, result, enc);
            break;
          }
        }
        PUSH(RSTRING_PTR(str), len);
        mrb_enc_associate(mrb, result, enc);
      }
      break;

    case 'd':
    case 'i':
    case 'o':
    case 'x':
    case 'X':
    case 'b':
    case 'B':
    case 'u':
      {
        mrb_value val = GETARG();
        char fbuf[32], nbuf[64], *s;
        const char *prefix = 0;
        int sign = 0, dots = 0;
        char sc = 0;
        long v = 0, org_v = 0;
        int base;
        int len;

        switch (*p) {
        case 'd':
        case 'i':
        case 'u':
          sign = 1; break;
        case 'o':
        case 'x':
        case 'X':
        case 'b':
        case 'B':
          if (flags&(FPLUS|FSPACE)) sign = 1;
          break;
        }
        if (flags & FSHARP) {
          switch (*p) {
          case 'o': prefix = "0"; break;
          case 'x': prefix = "0x"; break;
          case 'X': prefix = "0X"; break;
          case 'b': prefix = "0b"; break;
          case 'B': prefix = "0B"; break;
          }
        }

bin_retry:
        switch (mrb_type(val)) {
        case MRB_TT_FLOAT:
          if (FIXABLE(mrb_float(val))) {
            val = mrb_fixnum_value((mrb_int)mrb_float(val));
            goto bin_retry;
          }
          val = mrb_flt2big(mrb, mrb_float(val));
          if (FIXNUM_P(val)) goto bin_retry;
          break;
        case MRB_TT_STRING:
          val = mrb_str_to_inum(mrb, val, 0, TRUE);
          goto bin_retry;
        case MRB_TT_FIXNUM:
          v = (long)mrb_fixnum(val);
          break;
        default:
          val = mrb_Integer(mrb, val);
          goto bin_retry;
        }

        switch (*p) {
        case 'o':
          base = 8; break;
        case 'x':
        case 'X':
          base = 16; break;
        case 'b':
        case 'B':
          base = 2; break;
        case 'u':
        case 'd':
        case 'i':
        default:
          base = 10; break;
        }

        if (base == 2) {
          org_v = v;
          if ( v < 0 && !sign ) {
            val = mrb_fix2binstr(mrb, mrb_fixnum_value(v), base);
            dots = 1;
          }
          else {
            val = mrb_fix2str(mrb, mrb_fixnum_value(v), base);
          }
          v = mrb_fixnum(mrb_str_to_inum(mrb, val, 10, 0/*Qfalse*/));
        }
        if (sign) {
          char c = *p;
          if (c == 'i') c = 'd'; /* %d and %i are identical */
          if (base == 2) c = 'd';
          if (v < 0) {
            v = -v;
            sc = '-';
            width--;
          }
          else if (flags & FPLUS) {
            sc = '+';
            width--;
          }
          else if (flags & FSPACE) {
            sc = ' ';
            width--;
          }
          snprintf(fbuf, sizeof(fbuf), "%%l%c", c);
          snprintf(nbuf, sizeof(nbuf), fbuf, v);
          s = nbuf;
        }
        else {
          char c = *p;
          if (c == 'X') c = 'x';
          if (base == 2) c = 'd';
          s = nbuf;
          if (v < 0) {
            dots = 1;
          }
          snprintf(fbuf, sizeof(fbuf), "%%l%c", c);
          snprintf(++s, sizeof(nbuf) - 1, fbuf, v);
          if (v < 0) {
            char d = 0;

            s = remove_sign_bits(s, base);
            switch (base) {
            case 16: d = 'f'; break;
            case 8:  d = '7'; break;
            case 2:  d = '1'; break;
            }

            if (d && *s != d) {
              *--s = d;
            }
          }
        }
        len = (int)strlen(s);

        if (dots) {
          prec -= 2;
          width -= 2;
        }

        if (*p == 'X') {
          char *pp = s;
          int c;
#ifdef INCLUDE_ENCODING
          mrb_encoding *enc = mrb_enc_get(mrb, fmt);
#endif //INCLUDE_ENCODING
          while ((c = (int)(unsigned char)*pp) != 0) {
#ifdef INCLUDE_ENCODING
            *pp = mrb_enc_toupper(c, enc);
#else
            *pp = toupper(c);
#endif //INCLUDE_ENCODING
            pp++;
          }
        }
        if (prefix && !prefix[1]) { /* octal */
          if (dots) {
            prefix = 0;
          }
          else if (len == 1 && *s == '0') {
            len = 0;
            if (flags & FPREC) prec--;
          }
          else if ((flags & FPREC) && (prec > len)) {
            prefix = 0;
          }
        }
        else if (len == 1 && *s == '0') {
          prefix = 0;
        }
        if (prefix) {
          width -= (int)strlen(prefix);
        }
        if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) {
          prec = width;
          width = 0;
        }
        else {
          if (prec < len) {
            if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0;
            prec = len;
          }
          width -= prec;
        }
        if (!(flags&FMINUS)) {
          CHECK(width);
          while (width-- > 0) {
            buf[blen++] = ' ';
          }
        }
        if (sc) PUSH(&sc, 1);
        if (prefix) {
          int plen = (int)strlen(prefix);
          PUSH(prefix, plen);
        }
        CHECK(prec - len);
        if (dots) PUSH("..", 2);
        if (v < 0 || (base == 2 && org_v < 0)) {
          char c = sign_bits(base, p);
          while (len < prec--) {
            buf[blen++] = c;
          }
        }
        else if ((flags & (FMINUS|FPREC)) != FMINUS) {
          char c = '0';
          while (len < prec--) {
            buf[blen++] = c;
          }
        }
        PUSH(s, len);
        CHECK(width);
        while (width-- > 0) {
          buf[blen++] = ' ';
        }
      }
      break;

    case 'f':
    case 'g':
    case 'G':
    case 'e':
    case 'E':
    case 'a':
    case 'A':
      {
        mrb_value val = GETARG();
        double fval;
        int i, need = 6;
        char fbuf[32];

        fval = mrb_float(mrb_Float(mrb, val));
        if (isnan(fval) || isinf(fval)) {
          const char *expr;

          if (isnan(fval)) {
            expr = "NaN";
          }
          else {
            expr = "Inf";
          }
          need = (int)strlen(expr);
          if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS))
            need++;
          if ((flags & FWIDTH) && need < width)
            need = width;

          CHECK(need + 1);
          snprintf(&buf[blen], need + 1, "%*s", need, "");
          if (flags & FMINUS) {
            if (!isnan(fval) && fval < 0.0)
              buf[blen++] = '-';
            else if (flags & FPLUS)
              buf[blen++] = '+';
            else if (flags & FSPACE)
              blen++;
            memcpy(&buf[blen], expr, strlen(expr));
          }
          else {
            if (!isnan(fval) && fval < 0.0)
              buf[blen + need - strlen(expr) - 1] = '-';
            else if (flags & FPLUS)
              buf[blen + need - strlen(expr) - 1] = '+';
            else if ((flags & FSPACE) && need > width)
              blen++;
            memcpy(&buf[blen + need - strlen(expr)], expr,
            strlen(expr));
          }
          blen += strlen(&buf[blen]);
          break;
        }

        fmt_setup(fbuf, sizeof(fbuf), *p, flags, width, prec);
        need = 0;
        if (*p != 'e' && *p != 'E') {
          i = INT_MIN;
          frexp(fval, &i);
          if (i > 0)
            need = BIT_DIGITS(i);
        }
        need += (flags&FPREC) ? prec : 6;
        if ((flags&FWIDTH) && need < width)
          need = width;
        need += 20;

        CHECK(need);
        snprintf(&buf[blen], need, fbuf, fval);
        blen += strlen(&buf[blen]);
      }
      break;
    }
    flags = FNONE;
  }

  sprint_exit:
  /* XXX - We cannot validate the number of arguments if (digit)$ style used.
   */
  if (posarg >= 0 && nextarg < argc) {
    const char *mesg = "too many arguments for format string";
    if (RTEST(ruby_debug)) mrb_raise(mrb, E_ARGUMENT_ERROR, "%s", mesg);
    if (RTEST(ruby_verbose)) mrb_warn("%s", mesg);
  }
  mrb_str_resize(mrb, result, blen);

  return result;
}