static mrb_encoding * to_encoding(mrb_state *mrb, mrb_value enc) { int idx; //StringValue(enc); mrb_string_value(mrb, &enc); if (!mrb_enc_asciicompat(mrb, mrb_enc_get(mrb, enc))) { mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid name encoding (non ASCII)"); } //idx = mrb_enc_find_index(StringValueCStr(enc)); idx = mrb_enc_find_index(mrb, mrb_string_value_cstr(mrb, &enc)); if (idx < 0) { mrb_raise(mrb, E_ARGUMENT_ERROR, "unknown encoding name - %s", RSTRING_PTR(enc)); } return mrb_enc_from_index(mrb, idx); }
int mrb_to_encoding_index(mrb_state *mrb, mrb_value enc) { int idx; idx = enc_check_encoding(mrb, enc); if (idx >= 0) { return idx; } else if (mrb_nil_p(enc = mrb_check_string_type(mrb, enc))) { return -1; } if (!mrb_enc_asciicompat(mrb, mrb_enc_get(mrb, enc))) { return -1; } //return mrb_enc_find_index(StringValueCStr(enc)); return mrb_enc_find_index(mrb, mrb_string_value_cstr(mrb, &enc)); }
mrb_value mrb_str_format(mrb_state *mrb, int argc, const mrb_value *argv, mrb_value fmt) { const char *p, *end; char *buf; long blen, bsiz; mrb_value result; int width, prec, flags = FNONE; int nextarg = 1; int posarg = 0; mrb_value nextvalue; mrb_value tmp; mrb_value str; mrb_value hash = mrb_undef_value(); #define CHECK_FOR_WIDTH(f) \ if ((f) & FWIDTH) { \ mrb_raise(mrb, E_ARGUMENT_ERROR, "width given twice"); \ } \ if ((f) & FPREC0) { \ mrb_raise(mrb, E_ARGUMENT_ERROR, "width after precision"); \ } #define CHECK_FOR_FLAGS(f) \ if ((f) & FWIDTH) { \ mrb_raise(mrb, E_ARGUMENT_ERROR, "flag after width"); \ } \ if ((f) & FPREC0) { \ mrb_raise(mrb, E_ARGUMENT_ERROR, "flag after precision"); \ } ++argc; --argv; mrb_string_value(mrb, &fmt); fmt = mrb_str_new4(mrb, fmt); p = RSTRING_PTR(fmt); end = p + RSTRING_LEN(fmt); blen = 0; bsiz = 120; result = mrb_str_buf_new(mrb, bsiz); buf = RSTRING_PTR(result); memset(buf, 0, bsiz); for (; p < end; p++) { const char *t; int n; mrb_sym id = 0; for (t = p; t < end && *t != '%'; t++) ; PUSH(p, t - p); if (t >= end) goto sprint_exit; /* end of fmt string */ p = t + 1; /* skip `%' */ width = prec = -1; nextvalue = mrb_undef_value(); retry: switch (*p) { default: mrb_raise(mrb, E_ARGUMENT_ERROR, "malformed format string - %%%c", *p); break; case ' ': CHECK_FOR_FLAGS(flags); flags |= FSPACE; p++; goto retry; case '#': CHECK_FOR_FLAGS(flags); flags |= FSHARP; p++; goto retry; case '+': CHECK_FOR_FLAGS(flags); flags |= FPLUS; p++; goto retry; case '-': CHECK_FOR_FLAGS(flags); flags |= FMINUS; p++; goto retry; case '0': CHECK_FOR_FLAGS(flags); flags |= FZERO; p++; goto retry; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': n = 0; GETNUM(n, width); if (*p == '$') { if (!UNDEF_P(nextvalue)) { mrb_raise(mrb, E_ARGUMENT_ERROR, "value given twice - %d$", n); } nextvalue = GETPOSARG(n); p++; goto retry; } CHECK_FOR_WIDTH(flags); width = n; flags |= FWIDTH; goto retry; case '<': case '{': { const char *start = p; char term = (*p == '<') ? '>' : '}'; mrb_value symname; for (; p < end && *p != term; ) p++; if (id) { mrb_raise(mrb, E_ARGUMENT_ERROR, "name%.*s after <%s>", (int)(p - start + 1), start, mrb_sym2name(mrb, id)); } symname = mrb_str_new(mrb, start + 1, p - start - 1); id = mrb_intern(mrb, RSTRING_PTR(symname)); nextvalue = GETNAMEARG(mrb_symbol_value(id), start, (int)(p - start + 1)); if (UNDEF_P(nextvalue)) { mrb_raise(mrb, E_KEY_ERROR, "key%.*s not found", (int)(p - start + 1), start); } if (term == '}') goto format_s; p++; goto retry; } case '*': CHECK_FOR_WIDTH(flags); flags |= FWIDTH; GETASTER(width); if (width < 0) { flags |= FMINUS; width = -width; } p++; goto retry; case '.': if (flags & FPREC0) { mrb_raise(mrb, E_ARGUMENT_ERROR, "precision given twice"); } flags |= FPREC|FPREC0; prec = 0; p++; if (*p == '*') { GETASTER(prec); if (prec < 0) { /* ignore negative precision */ flags &= ~FPREC; } p++; goto retry; } GETNUM(prec, precision); goto retry; case '\n': case '\0': p--; case '%': if (flags != FNONE) { mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid format character - %%"); } PUSH("%", 1); break; case 'c': { mrb_value val = GETARG(); mrb_value tmp; unsigned int c; int n; #ifdef INCLUDE_ENCODING mrb_encoding *enc = mrb_enc_get(mrb, fmt); #endif //INCLUDE_ENCODING tmp = mrb_check_string_type(mrb, val); if (!mrb_nil_p(tmp)) { if (RSTRING_LEN(tmp) != 1 ) { mrb_raise(mrb, E_ARGUMENT_ERROR, "%%c requires a character"); } #ifdef INCLUDE_ENCODING c = mrb_enc_codepoint_len(mrb, RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc); #else c = RSTRING_PTR(tmp)[0]; n = 1; #endif //INCLUDE_ENCODING } else { c = mrb_fixnum(val); n = mrb_enc_codelen(mrb, c, enc); } if (n <= 0) { mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid character"); } if (!(flags & FWIDTH)) { CHECK(n); mrb_enc_mbcput(c, &buf[blen], enc); blen += n; } else if ((flags & FMINUS)) { CHECK(n); mrb_enc_mbcput(c, &buf[blen], enc); blen += n; FILL(' ', width-1); } else { FILL(' ', width-1); CHECK(n); mrb_enc_mbcput(c, &buf[blen], enc); blen += n; } } break; case 's': case 'p': format_s: { mrb_value arg = GETARG(); long len, slen; #ifdef INCLUDE_ENCODING mrb_encoding *enc = mrb_enc_get(mrb, fmt); #endif //INCLUDE_ENCODING if (*p == 'p') arg = mrb_inspect(mrb, arg); str = mrb_obj_as_string(mrb, arg); len = RSTRING_LEN(str); mrb_str_set_len(mrb, result, blen); if (flags&(FPREC|FWIDTH)) { slen = RSTRING_LEN(str); if (slen < 0) { mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid mbstring sequence"); } if ((flags&FPREC) && (prec < slen)) { #ifdef INCLUDE_ENCODING char *p = mrb_enc_nth(mrb, RSTRING_PTR(str), RSTRING_END(str),prec, enc); #else char *p = RSTRING_PTR(str) + prec; #endif //INCLUDE_ENCODING slen = prec; len = p - RSTRING_PTR(str); } /* need to adjust multi-byte string pos */ if ((flags&FWIDTH) && (width > slen)) { width -= (int)slen; if (!(flags&FMINUS)) { CHECK(width); while (width--) { buf[blen++] = ' '; } } CHECK(len); memcpy(&buf[blen], RSTRING_PTR(str), len); blen += len; if (flags&FMINUS) { CHECK(width); while (width--) { buf[blen++] = ' '; } } mrb_enc_associate(mrb, result, enc); break; } } PUSH(RSTRING_PTR(str), len); mrb_enc_associate(mrb, result, enc); } break; case 'd': case 'i': case 'o': case 'x': case 'X': case 'b': case 'B': case 'u': { mrb_value val = GETARG(); char fbuf[32], nbuf[64], *s; const char *prefix = 0; int sign = 0, dots = 0; char sc = 0; long v = 0, org_v = 0; int base; int len; switch (*p) { case 'd': case 'i': case 'u': sign = 1; break; case 'o': case 'x': case 'X': case 'b': case 'B': if (flags&(FPLUS|FSPACE)) sign = 1; break; } if (flags & FSHARP) { switch (*p) { case 'o': prefix = "0"; break; case 'x': prefix = "0x"; break; case 'X': prefix = "0X"; break; case 'b': prefix = "0b"; break; case 'B': prefix = "0B"; break; } } bin_retry: switch (mrb_type(val)) { case MRB_TT_FLOAT: if (FIXABLE(mrb_float(val))) { val = mrb_fixnum_value((mrb_int)mrb_float(val)); goto bin_retry; } val = mrb_flt2big(mrb, mrb_float(val)); if (FIXNUM_P(val)) goto bin_retry; break; case MRB_TT_STRING: val = mrb_str_to_inum(mrb, val, 0, TRUE); goto bin_retry; case MRB_TT_FIXNUM: v = (long)mrb_fixnum(val); break; default: val = mrb_Integer(mrb, val); goto bin_retry; } switch (*p) { case 'o': base = 8; break; case 'x': case 'X': base = 16; break; case 'b': case 'B': base = 2; break; case 'u': case 'd': case 'i': default: base = 10; break; } if (base == 2) { org_v = v; if ( v < 0 && !sign ) { val = mrb_fix2binstr(mrb, mrb_fixnum_value(v), base); dots = 1; } else { val = mrb_fix2str(mrb, mrb_fixnum_value(v), base); } v = mrb_fixnum(mrb_str_to_inum(mrb, val, 10, 0/*Qfalse*/)); } if (sign) { char c = *p; if (c == 'i') c = 'd'; /* %d and %i are identical */ if (base == 2) c = 'd'; if (v < 0) { v = -v; sc = '-'; width--; } else if (flags & FPLUS) { sc = '+'; width--; } else if (flags & FSPACE) { sc = ' '; width--; } snprintf(fbuf, sizeof(fbuf), "%%l%c", c); snprintf(nbuf, sizeof(nbuf), fbuf, v); s = nbuf; } else { char c = *p; if (c == 'X') c = 'x'; if (base == 2) c = 'd'; s = nbuf; if (v < 0) { dots = 1; } snprintf(fbuf, sizeof(fbuf), "%%l%c", c); snprintf(++s, sizeof(nbuf) - 1, fbuf, v); if (v < 0) { char d = 0; s = remove_sign_bits(s, base); switch (base) { case 16: d = 'f'; break; case 8: d = '7'; break; case 2: d = '1'; break; } if (d && *s != d) { *--s = d; } } } len = (int)strlen(s); if (dots) { prec -= 2; width -= 2; } if (*p == 'X') { char *pp = s; int c; #ifdef INCLUDE_ENCODING mrb_encoding *enc = mrb_enc_get(mrb, fmt); #endif //INCLUDE_ENCODING while ((c = (int)(unsigned char)*pp) != 0) { #ifdef INCLUDE_ENCODING *pp = mrb_enc_toupper(c, enc); #else *pp = toupper(c); #endif //INCLUDE_ENCODING pp++; } } if (prefix && !prefix[1]) { /* octal */ if (dots) { prefix = 0; } else if (len == 1 && *s == '0') { len = 0; if (flags & FPREC) prec--; } else if ((flags & FPREC) && (prec > len)) { prefix = 0; } } else if (len == 1 && *s == '0') { prefix = 0; } if (prefix) { width -= (int)strlen(prefix); } if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) { prec = width; width = 0; } else { if (prec < len) { if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0; prec = len; } width -= prec; } if (!(flags&FMINUS)) { CHECK(width); while (width-- > 0) { buf[blen++] = ' '; } } if (sc) PUSH(&sc, 1); if (prefix) { int plen = (int)strlen(prefix); PUSH(prefix, plen); } CHECK(prec - len); if (dots) PUSH("..", 2); if (v < 0 || (base == 2 && org_v < 0)) { char c = sign_bits(base, p); while (len < prec--) { buf[blen++] = c; } } else if ((flags & (FMINUS|FPREC)) != FMINUS) { char c = '0'; while (len < prec--) { buf[blen++] = c; } } PUSH(s, len); CHECK(width); while (width-- > 0) { buf[blen++] = ' '; } } break; case 'f': case 'g': case 'G': case 'e': case 'E': case 'a': case 'A': { mrb_value val = GETARG(); double fval; int i, need = 6; char fbuf[32]; fval = mrb_float(mrb_Float(mrb, val)); if (isnan(fval) || isinf(fval)) { const char *expr; if (isnan(fval)) { expr = "NaN"; } else { expr = "Inf"; } need = (int)strlen(expr); if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS)) need++; if ((flags & FWIDTH) && need < width) need = width; CHECK(need + 1); snprintf(&buf[blen], need + 1, "%*s", need, ""); if (flags & FMINUS) { if (!isnan(fval) && fval < 0.0) buf[blen++] = '-'; else if (flags & FPLUS) buf[blen++] = '+'; else if (flags & FSPACE) blen++; memcpy(&buf[blen], expr, strlen(expr)); } else { if (!isnan(fval) && fval < 0.0) buf[blen + need - strlen(expr) - 1] = '-'; else if (flags & FPLUS) buf[blen + need - strlen(expr) - 1] = '+'; else if ((flags & FSPACE) && need > width) blen++; memcpy(&buf[blen + need - strlen(expr)], expr, strlen(expr)); } blen += strlen(&buf[blen]); break; } fmt_setup(fbuf, sizeof(fbuf), *p, flags, width, prec); need = 0; if (*p != 'e' && *p != 'E') { i = INT_MIN; frexp(fval, &i); if (i > 0) need = BIT_DIGITS(i); } need += (flags&FPREC) ? prec : 6; if ((flags&FWIDTH) && need < width) need = width; need += 20; CHECK(need); snprintf(&buf[blen], need, fbuf, fval); blen += strlen(&buf[blen]); } break; } flags = FNONE; } sprint_exit: /* XXX - We cannot validate the number of arguments if (digit)$ style used. */ if (posarg >= 0 && nextarg < argc) { const char *mesg = "too many arguments for format string"; if (RTEST(ruby_debug)) mrb_raise(mrb, E_ARGUMENT_ERROR, "%s", mesg); if (RTEST(ruby_verbose)) mrb_warn("%s", mesg); } mrb_str_resize(mrb, result, blen); return result; }