static VALUE bug_str_cstr_term_char(VALUE str) { long len; char *s; int c; rb_encoding *enc = rb_enc_get(str); RSTRING_GETMEM(str, s, len); s += len; len = rb_enc_mbminlen(enc); c = rb_enc_precise_mbclen(s, s + len, enc); if (!MBCLEN_CHARFOUND_P(c)) { c = (unsigned char)*s; } else { c = rb_enc_mbc_to_codepoint(s, s + len, enc); if (!c) return Qnil; } return rb_enc_uint_chr((unsigned int)c, enc); }
VALUE rb_path_to_class(VALUE pathname) { rb_encoding *enc = rb_enc_get(pathname); const char *pbeg, *p, *path = RSTRING_PTR(pathname); ID id; VALUE c = rb_cObject; if (!rb_enc_asciicompat(enc)) { rb_raise(rb_eArgError, "invalid class path encoding (non ASCII)"); } pbeg = p = path; if (path[0] == '#') { rb_raise(rb_eArgError, "can't retrieve anonymous class %s", path); } while (*p) { while (*p && *p != ':') p++; id = rb_intern3(pbeg, p-pbeg, enc); if (p[0] == ':') { if (p[1] != ':') goto undefined_class; p += 2; pbeg = p; } if (!rb_const_defined(c, id)) { undefined_class: rb_raise(rb_eArgError, "undefined class/module %.*s", (int)(p-path), path); } c = rb_const_get_at(c, id); switch (TYPE(c)) { case T_MODULE: case T_CLASS: break; default: rb_raise(rb_eTypeError, "%s does not refer to class/module", path); } } return c; }
/* * Return a pathname with +repl+ added as a suffix to the basename. * * If self has no extension part, +repl+ is appended. * * Pathname.new('/usr/bin/shutdown').sub_ext('.rb') * #=> #<Pathname:/usr/bin/shutdown.rb> */ static VALUE path_sub_ext(VALUE self, VALUE repl) { VALUE str = get_strpath(self); VALUE str2; long extlen; const char *ext; const char *p; StringValue(repl); p = RSTRING_PTR(str); extlen = RSTRING_LEN(str); ext = ruby_enc_find_extname(p, &extlen, rb_enc_get(str)); if (ext == NULL) { ext = p + RSTRING_LEN(str); } else if (extlen <= 1) { ext += extlen; } str2 = rb_str_subseq(str, 0, ext-p); rb_str_append(str2, repl); OBJ_INFECT(str2, str); return rb_class_new_instance(1, &str2, rb_obj_class(self)); }
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt) { enum {default_float_precision = 6}; rb_encoding *enc; const char *p, *end; char *buf; long blen, bsiz; VALUE result; long scanned = 0; int coderange = ENC_CODERANGE_7BIT; int width, prec, flags = FNONE; int nextarg = 1; int posarg = 0; int tainted = 0; VALUE nextvalue; VALUE tmp; VALUE str; volatile VALUE hash = Qundef; #define CHECK_FOR_WIDTH(f) \ if ((f) & FWIDTH) { \ rb_raise(rb_eArgError, "width given twice"); \ } \ if ((f) & FPREC0) { \ rb_raise(rb_eArgError, "width after precision"); \ } #define CHECK_FOR_FLAGS(f) \ if ((f) & FWIDTH) { \ rb_raise(rb_eArgError, "flag after width"); \ } \ if ((f) & FPREC0) { \ rb_raise(rb_eArgError, "flag after precision"); \ } ++argc; --argv; if (OBJ_TAINTED(fmt)) tainted = 1; StringValue(fmt); enc = rb_enc_get(fmt); fmt = rb_str_new4(fmt); p = RSTRING_PTR(fmt); end = p + RSTRING_LEN(fmt); blen = 0; bsiz = 120; result = rb_str_buf_new(bsiz); rb_enc_copy(result, fmt); buf = RSTRING_PTR(result); memset(buf, 0, bsiz); ENC_CODERANGE_SET(result, coderange); for (; p < end; p++) { const char *t; int n; VALUE sym = Qnil; for (t = p; t < end && *t != '%'; t++) ; PUSH(p, t - p); if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) { scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &coderange); ENC_CODERANGE_SET(result, coderange); } if (t >= end) { /* end of fmt string */ goto sprint_exit; } p = t + 1; /* skip `%' */ width = prec = -1; nextvalue = Qundef; retry: switch (*p) { default: if (rb_enc_isprint(*p, enc)) rb_raise(rb_eArgError, "malformed format string - %%%c", *p); else rb_raise(rb_eArgError, "malformed format string"); break; case ' ': CHECK_FOR_FLAGS(flags); flags |= FSPACE; p++; goto retry; case '#': CHECK_FOR_FLAGS(flags); flags |= FSHARP; p++; goto retry; case '+': CHECK_FOR_FLAGS(flags); flags |= FPLUS; p++; goto retry; case '-': CHECK_FOR_FLAGS(flags); flags |= FMINUS; p++; goto retry; case '0': CHECK_FOR_FLAGS(flags); flags |= FZERO; p++; goto retry; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': n = 0; GETNUM(n, width); if (*p == '$') { if (nextvalue != Qundef) { rb_raise(rb_eArgError, "value given twice - %d$", n); } nextvalue = GETPOSARG(n); p++; goto retry; } CHECK_FOR_WIDTH(flags); width = n; flags |= FWIDTH; goto retry; case '<': case '{': { const char *start = p; char term = (*p == '<') ? '>' : '}'; int len; for (; p < end && *p != term; ) { p += rb_enc_mbclen(p, end, enc); } if (p >= end) { rb_raise(rb_eArgError, "malformed name - unmatched parenthesis"); } #if SIZEOF_INT < SIZEOF_SIZE_T if ((size_t)(p - start) >= INT_MAX) { const int message_limit = 20; len = (int)(rb_enc_right_char_head(start, start + message_limit, p, enc) - start); rb_enc_raise(enc, rb_eArgError, "too long name (%"PRIdSIZE" bytes) - %.*s...%c", (size_t)(p - start - 2), len, start, term); } #endif len = (int)(p - start + 1); /* including parenthesis */ if (sym != Qnil) { rb_enc_raise(enc, rb_eArgError, "named%.*s after <%"PRIsVALUE">", len, start, rb_sym2str(sym)); } CHECKNAMEARG(start, len, enc); get_hash(&hash, argc, argv); sym = rb_check_symbol_cstr(start + 1, len - 2 /* without parenthesis */, enc); if (sym != Qnil) nextvalue = rb_hash_lookup2(hash, sym, Qundef); if (nextvalue == Qundef) { rb_enc_raise(enc, rb_eKeyError, "key%.*s not found", len, start); } if (term == '}') goto format_s; p++; goto retry; } case '*': CHECK_FOR_WIDTH(flags); flags |= FWIDTH; GETASTER(width); if (width < 0) { flags |= FMINUS; width = -width; } p++; goto retry; case '.': if (flags & FPREC0) { rb_raise(rb_eArgError, "precision given twice"); } flags |= FPREC|FPREC0; prec = 0; p++; if (*p == '*') { GETASTER(prec); if (prec < 0) { /* ignore negative precision */ flags &= ~FPREC; } p++; goto retry; } GETNUM(prec, precision); goto retry; case '\n': case '\0': p--; case '%': if (flags != FNONE) { rb_raise(rb_eArgError, "invalid format character - %%"); } PUSH("%", 1); break; case 'c': { VALUE val = GETARG(); VALUE tmp; unsigned int c; int n; tmp = rb_check_string_type(val); if (!NIL_P(tmp)) { if (rb_enc_strlen(RSTRING_PTR(tmp),RSTRING_END(tmp),enc) != 1) { rb_raise(rb_eArgError, "%%c requires a character"); } c = rb_enc_codepoint_len(RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc); RB_GC_GUARD(tmp); } else { c = NUM2INT(val); n = rb_enc_codelen(c, enc); } if (n <= 0) { rb_raise(rb_eArgError, "invalid character"); } if (!(flags & FWIDTH)) { CHECK(n); rb_enc_mbcput(c, &buf[blen], enc); blen += n; } else if ((flags & FMINUS)) { CHECK(n); rb_enc_mbcput(c, &buf[blen], enc); blen += n; FILL(' ', width-1); } else { FILL(' ', width-1); CHECK(n); rb_enc_mbcput(c, &buf[blen], enc); blen += n; } } break; case 's': case 'p': format_s: { VALUE arg = GETARG(); long len, slen; if (*p == 'p') arg = rb_inspect(arg); str = rb_obj_as_string(arg); if (OBJ_TAINTED(str)) tainted = 1; len = RSTRING_LEN(str); rb_str_set_len(result, blen); if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) { int cr = coderange; scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &cr); ENC_CODERANGE_SET(result, (cr == ENC_CODERANGE_UNKNOWN ? ENC_CODERANGE_BROKEN : (coderange = cr))); } enc = rb_enc_check(result, str); if (flags&(FPREC|FWIDTH)) { slen = rb_enc_strlen(RSTRING_PTR(str),RSTRING_END(str),enc); if (slen < 0) { rb_raise(rb_eArgError, "invalid mbstring sequence"); } if ((flags&FPREC) && (prec < slen)) { char *p = rb_enc_nth(RSTRING_PTR(str), RSTRING_END(str), prec, enc); slen = prec; len = p - RSTRING_PTR(str); } /* need to adjust multi-byte string pos */ if ((flags&FWIDTH) && (width > slen)) { width -= (int)slen; if (!(flags&FMINUS)) { CHECK(width); while (width--) { buf[blen++] = ' '; } } CHECK(len); memcpy(&buf[blen], RSTRING_PTR(str), len); RB_GC_GUARD(str); blen += len; if (flags&FMINUS) { CHECK(width); while (width--) { buf[blen++] = ' '; } } rb_enc_associate(result, enc); break; } } PUSH(RSTRING_PTR(str), len); RB_GC_GUARD(str); rb_enc_associate(result, enc); } break; case 'd': case 'i': case 'o': case 'x': case 'X': case 'b': case 'B': case 'u': { volatile VALUE val = GETARG(); int valsign; char nbuf[64], *s; const char *prefix = 0; int sign = 0, dots = 0; char sc = 0; long v = 0; int base, bignum = 0; int len; switch (*p) { case 'd': case 'i': case 'u': sign = 1; break; case 'o': case 'x': case 'X': case 'b': case 'B': if (flags&(FPLUS|FSPACE)) sign = 1; break; } if (flags & FSHARP) { switch (*p) { case 'o': prefix = "0"; break; case 'x': prefix = "0x"; break; case 'X': prefix = "0X"; break; case 'b': prefix = "0b"; break; case 'B': prefix = "0B"; break; } } bin_retry: switch (TYPE(val)) { case T_FLOAT: if (FIXABLE(RFLOAT_VALUE(val))) { val = LONG2FIX((long)RFLOAT_VALUE(val)); goto bin_retry; } val = rb_dbl2big(RFLOAT_VALUE(val)); if (FIXNUM_P(val)) goto bin_retry; bignum = 1; break; case T_STRING: val = rb_str_to_inum(val, 0, TRUE); goto bin_retry; case T_BIGNUM: bignum = 1; break; case T_FIXNUM: v = FIX2LONG(val); break; default: val = rb_Integer(val); goto bin_retry; } switch (*p) { case 'o': base = 8; break; case 'x': case 'X': base = 16; break; case 'b': case 'B': base = 2; break; case 'u': case 'd': case 'i': default: base = 10; break; } if (base != 10) { int numbits = ffs(base)-1; size_t abs_nlz_bits; size_t numdigits = rb_absint_numwords(val, numbits, &abs_nlz_bits); long i; if (INT_MAX-1 < numdigits) /* INT_MAX is used because rb_long2int is used later. */ rb_raise(rb_eArgError, "size too big"); if (sign) { if (numdigits == 0) numdigits = 1; tmp = rb_str_new(NULL, numdigits); valsign = rb_integer_pack(val, RSTRING_PTR(tmp), RSTRING_LEN(tmp), 1, CHAR_BIT-numbits, INTEGER_PACK_BIG_ENDIAN); for (i = 0; i < RSTRING_LEN(tmp); i++) RSTRING_PTR(tmp)[i] = ruby_digitmap[((unsigned char *)RSTRING_PTR(tmp))[i]]; s = RSTRING_PTR(tmp); if (valsign < 0) { sc = '-'; width--; } else if (flags & FPLUS) { sc = '+'; width--; } else if (flags & FSPACE) { sc = ' '; width--; } } else { /* Following conditional "numdigits++" guarantees the * most significant digit as * - '1'(bin), '7'(oct) or 'f'(hex) for negative numbers * - '0' for zero * - not '0' for positive numbers. * * It also guarantees the most significant two * digits will not be '11'(bin), '77'(oct), 'ff'(hex) * or '00'. */ if (numdigits == 0 || ((abs_nlz_bits != (size_t)(numbits-1) || !rb_absint_singlebit_p(val)) && (!bignum ? v < 0 : BIGNUM_NEGATIVE_P(val)))) numdigits++; tmp = rb_str_new(NULL, numdigits); valsign = rb_integer_pack(val, RSTRING_PTR(tmp), RSTRING_LEN(tmp), 1, CHAR_BIT-numbits, INTEGER_PACK_2COMP | INTEGER_PACK_BIG_ENDIAN); for (i = 0; i < RSTRING_LEN(tmp); i++) RSTRING_PTR(tmp)[i] = ruby_digitmap[((unsigned char *)RSTRING_PTR(tmp))[i]]; s = RSTRING_PTR(tmp); dots = valsign < 0; } len = rb_long2int(RSTRING_END(tmp) - s); } else if (!bignum) { valsign = 1; if (v < 0) { v = -v; sc = '-'; width--; valsign = -1; } else if (flags & FPLUS) { sc = '+'; width--; } else if (flags & FSPACE) { sc = ' '; width--; } snprintf(nbuf, sizeof(nbuf), "%ld", v); s = nbuf; len = (int)strlen(s); } else { tmp = rb_big2str(val, 10); s = RSTRING_PTR(tmp); valsign = 1; if (s[0] == '-') { s++; sc = '-'; width--; valsign = -1; } else if (flags & FPLUS) { sc = '+'; width--; } else if (flags & FSPACE) { sc = ' '; width--; } len = rb_long2int(RSTRING_END(tmp) - s); } if (dots) { prec -= 2; width -= 2; } if (*p == 'X') { char *pp = s; int c; while ((c = (int)(unsigned char)*pp) != 0) { *pp = rb_enc_toupper(c, enc); pp++; } } if (prefix && !prefix[1]) { /* octal */ if (dots) { prefix = 0; } else if (len == 1 && *s == '0') { len = 0; if (flags & FPREC) prec--; } else if ((flags & FPREC) && (prec > len)) { prefix = 0; } } else if (len == 1 && *s == '0') { prefix = 0; } if (prefix) { width -= (int)strlen(prefix); } if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) { prec = width; width = 0; } else { if (prec < len) { if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0; prec = len; } width -= prec; } if (!(flags&FMINUS)) { CHECK(width); while (width-- > 0) { buf[blen++] = ' '; } } if (sc) PUSH(&sc, 1); if (prefix) { int plen = (int)strlen(prefix); PUSH(prefix, plen); } CHECK(prec - len); if (dots) PUSH("..", 2); if (!sign && valsign < 0) { char c = sign_bits(base, p); while (len < prec--) { buf[blen++] = c; } } else if ((flags & (FMINUS|FPREC)) != FMINUS) { while (len < prec--) { buf[blen++] = '0'; } } PUSH(s, len); RB_GC_GUARD(tmp); CHECK(width); while (width-- > 0) { buf[blen++] = ' '; } } break; case 'f': { VALUE val = GETARG(), num, den; int sign = (flags&FPLUS) ? 1 : 0, zero = 0; long len, done = 0; int prefix = 0; if (!RB_TYPE_P(val, T_RATIONAL)) { nextvalue = val; goto float_value; } if (!(flags&FPREC)) prec = default_float_precision; den = rb_rational_den(val); num = rb_rational_num(val); if (FIXNUM_P(num)) { if ((SIGNED_VALUE)num < 0) { long n = -FIX2LONG(num); num = LONG2FIX(n); sign = -1; } } else if (rb_num_negative_p(num)) { sign = -1; num = rb_funcallv(num, idUMinus, 0, 0); } if (den != INT2FIX(1) || prec > 1) { const ID idDiv = rb_intern("div"); VALUE p10 = rb_int_positive_pow(10, prec); VALUE den_2 = rb_funcall(den, idDiv, 1, INT2FIX(2)); num = rb_funcallv(num, '*', 1, &p10); num = rb_funcallv(num, '+', 1, &den_2); num = rb_funcallv(num, idDiv, 1, &den); } else if (prec >= 0) { zero = prec; } val = rb_obj_as_string(num); len = RSTRING_LEN(val) + zero; if (prec >= len) ++len; /* integer part 0 */ if (sign || (flags&FSPACE)) ++len; if (prec > 0) ++len; /* period */ CHECK(len > width ? len : width); if (sign || (flags&FSPACE)) { buf[blen++] = sign > 0 ? '+' : sign < 0 ? '-' : ' '; prefix++; done++; } len = RSTRING_LEN(val) + zero; t = RSTRING_PTR(val); if (len > prec) { memcpy(&buf[blen], t, len - prec); blen += len - prec; done += len - prec; } else { buf[blen++] = '0'; done++; } if (prec > 0) { buf[blen++] = '.'; done++; } if (zero) { FILL('0', zero); done += zero; } else if (prec > len) { FILL('0', prec - len); memcpy(&buf[blen], t, len); blen += len; done += prec; } else if (prec > 0) { memcpy(&buf[blen], t + len - prec, prec); blen += prec; done += prec; } if ((flags & FWIDTH) && width > done) { int fill = ' '; long shifting = 0; if (!(flags&FMINUS)) { shifting = done; if (flags&FZERO) { shifting -= prefix; fill = '0'; } blen -= shifting; memmove(&buf[blen + width - done], &buf[blen], shifting); } FILL(fill, width - done); blen += shifting; } RB_GC_GUARD(val); break; } case 'g': case 'G': case 'e': case 'E': /* TODO: rational support */ case 'a': case 'A': float_value: { VALUE val = GETARG(); double fval; int i, need; char fbuf[32]; fval = RFLOAT_VALUE(rb_Float(val)); if (isnan(fval) || isinf(fval)) { const char *expr; if (isnan(fval)) { expr = "NaN"; } else { expr = "Inf"; } need = (int)strlen(expr); if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS)) need++; if ((flags & FWIDTH) && need < width) need = width; CHECK(need + 1); snprintf(&buf[blen], need + 1, "%*s", need, ""); if (flags & FMINUS) { if (!isnan(fval) && fval < 0.0) buf[blen++] = '-'; else if (flags & FPLUS) buf[blen++] = '+'; else if (flags & FSPACE) blen++; memcpy(&buf[blen], expr, strlen(expr)); } else { if (!isnan(fval) && fval < 0.0) buf[blen + need - strlen(expr) - 1] = '-'; else if (flags & FPLUS) buf[blen + need - strlen(expr) - 1] = '+'; else if ((flags & FSPACE) && need > width) blen++; memcpy(&buf[blen + need - strlen(expr)], expr, strlen(expr)); } blen += strlen(&buf[blen]); break; } fmt_setup(fbuf, sizeof(fbuf), *p, flags, width, prec); need = 0; if (*p != 'e' && *p != 'E') { i = INT_MIN; frexp(fval, &i); if (i > 0) need = BIT_DIGITS(i); } need += (flags&FPREC) ? prec : default_float_precision; if ((flags&FWIDTH) && need < width) need = width; need += 20; CHECK(need); snprintf(&buf[blen], need, fbuf, fval); blen += strlen(&buf[blen]); } break; } flags = FNONE; } sprint_exit: RB_GC_GUARD(fmt); /* XXX - We cannot validate the number of arguments if (digit)$ style used. */ if (posarg >= 0 && nextarg < argc) { const char *mesg = "too many arguments for format string"; if (RTEST(ruby_debug)) rb_raise(rb_eArgError, "%s", mesg); if (RTEST(ruby_verbose)) rb_warn("%s", mesg); } rb_str_resize(result, blen); if (tainted) OBJ_TAINT(result); return result; }
static VALUE encoding_spec_rb_enc_get(VALUE self, VALUE obj) { return rb_str_new2(rb_enc_get(obj)->name); }
VALUE ruv_buffer_write(int argc, VALUE* argv, VALUE rb_buffer) { VALUE rb_str, rb_offset, rb_length, rb_extern_enc, rb_cBuffer; size_t offset, length, max_length, char_count; ruv_buffer_t *buffer; rb_encoding *rb_extern_encoding; Data_Get_Struct(rb_buffer, ruv_buffer_t, buffer); rb_scan_args(argc, argv, "13", &rb_str, &rb_offset, &rb_length, &rb_extern_enc); StringValue(rb_str); // encoding: use specified external encoding if provided // otherwise use Encoding.default_external as default if(!NIL_P(rb_extern_enc)) { rb_extern_encoding = rb_enc_get(rb_extern_enc); } else { rb_extern_encoding = rb_default_external_encoding(); } // convert to external encoding rb_str = rb_str_export_to_enc(rb_str, rb_extern_encoding); // offset: either specified in params or 0 if(!NIL_P(rb_offset)) { Check_Type(rb_offset, T_FIXNUM); offset = NUM2SIZET(rb_offset); if(offset >= buffer->length) { rb_raise(rb_eArgError, "Overflow! offset is larger than buffer size."); } } else { offset = 0; } // max length: the smaller of the max available space or the whole ruby string max_length = MIN(buffer->length - offset, (size_t)RSTRING_LEN(rb_str)); // length: number of bytes to write. (include half chars) if(!NIL_P(rb_length)) { Check_Type(rb_length, T_FIXNUM); length = NUM2SIZET(rb_length); } else { length = max_length; } // If we are not writing the whole string into the buffer, // re-adjust length so we don't write half a character (uft8, etc) // 1). get char count from calculated byte length // 2). get byte length back from char count // This way only whole char is written to buffer if(length != (size_t)RSTRING_LEN(rb_str)) { char_count = rb_str_sublen(rb_str, length); length = rb_str_offset(rb_str, char_count); } memcpy(buffer->data + offset, RSTRING_PTR(rb_str), length); // set instance variable so we know how much characters are written rb_cBuffer = rb_obj_class(rb_buffer); rb_iv_set(rb_cBuffer, RUV_BUFFER_CHAR_WRITTEN_SYM, SIZET2NUM(char_count)); return SIZET2NUM(length); }
// TODO: can we fail allocating memory? static VALUE fenix_file_expand_path(int argc, VALUE *argv) { size_t size = 0, wpath_len = 0, wdir_len = 0, whome_len = 0; size_t buffer_len = 0; char *fullpath = NULL; wchar_t *wfullpath = NULL, *wpath = NULL, *wpath_pos = NULL, *wdir = NULL; wchar_t *whome = NULL, *buffer = NULL, *buffer_pos = NULL; UINT cp; VALUE result = Qnil, path = Qnil, dir = Qnil; wchar_t wfullpath_buffer[PATH_BUFFER_SIZE]; wchar_t path_drive = L'\0', dir_drive = L'\0'; int ignore_dir = 0; rb_encoding *path_encoding; int tainted = 0; // prepare for rb_file_absolute_path() int abs_mode = 0; // retrieve path and dir from argv rb_scan_args(argc, argv, "11", &path, &dir); /* tainted if path is tainted */ tainted = OBJ_TAINTED(path); // get path encoding if (NIL_P(dir)) { path_encoding = rb_enc_get(path); } else { path_encoding = rb_enc_check(path, dir); } cp = fenix_code_page(path_encoding); // printf("code page: %i\n", cp); // coerce them to string path = fenix_coerce_to_path(path); // convert char * to wchar_t // path fenix_path_to_wchar(path, &wpath, &wpath_pos, &wpath_len, cp); // wprintf(L"wpath: '%s' with (%i) characters long.\n", wpath, wpath_len); /* determine if we need the user's home directory */ /* expand '~' only if NOT rb_file_absolute_path() where `abs_mode` is 1 */ if (abs_mode == 0 && ((wpath_len == 1 && wpath_pos[0] == L'~') || (wpath_len >= 2 && wpath_pos[0] == L'~' && IS_DIR_SEPARATOR_P(wpath_pos[1])))) { /* tainted if expanding '~' */ tainted = 1; // wprintf(L"wpath requires expansion.\n"); whome = fenix_home_dir(); if (whome == NULL) { free(wpath); rb_raise(rb_eArgError, "couldn't find HOME environment -- expanding `~'"); } whome_len = wcslen(whome); if (PathIsRelativeW(whome) && !(whome_len >= 2 && IS_DIR_UNC_P(whome))) { free(wpath); rb_raise(rb_eArgError, "non-absolute home"); } // wprintf(L"whome: '%s' with (%i) characters long.\n", whome, whome_len); /* ignores dir since we are expading home */ ignore_dir = 1; /* exclude ~ from the result */ wpath_pos++; wpath_len--; /* exclude separator if present */ if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) { // wprintf(L"excluding expansion character and separator\n"); wpath_pos++; wpath_len--; } } else if (wpath_len >= 2 && wpath_pos[1] == L':') { if (wpath_len >= 3 && IS_DIR_SEPARATOR_P(wpath_pos[2])) { /* ignore dir since path contains a drive letter and a root slash */ // wprintf(L"Ignore dir since we have drive letter and root slash\n"); ignore_dir = 1; } else { /* determine if we ignore dir or not later */ path_drive = wpath_pos[0]; } } else if (abs_mode == 0 && wpath_len >= 2 && wpath_pos[0] == L'~') { wchar_t *wuser = wpath_pos + 1; wchar_t *pos = wuser; char *user; /* tainted if expanding '~' */ tainted = 1; while (!IS_DIR_SEPARATOR_P(*pos) && *pos != '\0') pos++; *pos = '\0'; size = WideCharToMultiByte(cp, 0, wuser, -1, NULL, 0, NULL, NULL); user = (char *)malloc(size * sizeof(char)); WideCharToMultiByte(cp, 0, wuser, -1, user, size, NULL, NULL); /* convert to VALUE and set the path encoding */ result = rb_enc_str_new(user, size - 1, path_encoding); free(wpath); if (user) free(user); rb_raise(rb_eArgError, "can't find user %s", StringValuePtr(result)); } /* convert dir */ if (!ignore_dir && !NIL_P(dir)) { // coerce them to string dir = fenix_coerce_to_path(dir); // convert char * to wchar_t // dir fenix_path_to_wchar(dir, &wdir, NULL, &wdir_len, cp); // wprintf(L"wdir: '%s' with (%i) characters long.\n", wdir, wdir_len); if (wdir_len >= 2 && wdir[1] == L':') { dir_drive = wdir[0]; if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) { wdir_len = 2; } } else if (wdir_len >= 2 && IS_DIR_UNC_P(wdir)) { /* UNC path */ if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) { /* cut the UNC path tail to '//host/share' */ size_t separators = 0; size_t pos = 2; while (pos < wdir_len && separators < 2) { if (IS_DIR_SEPARATOR_P(wdir[pos])) { separators++; } pos++; } if (separators == 2) wdir_len = pos - 1; // wprintf(L"UNC wdir: '%s' with (%i) characters.\n", wdir, wdir_len); } } } /* determine if we ignore dir or not */ if (!ignore_dir && path_drive && dir_drive) { if (towupper(path_drive) == towupper(dir_drive)) { /* exclude path drive letter to use dir */ // wprintf(L"excluding path drive letter\n"); wpath_pos += 2; wpath_len -= 2; } else { /* ignore dir since path drive is different from dir drive */ ignore_dir = 1; wdir_len = 0; } } if (!ignore_dir && wpath_len >= 2 && IS_DIR_UNC_P(wpath)) { /* ignore dir since path has UNC root */ ignore_dir = 1; wdir_len = 0; } else if (!ignore_dir && wpath_len >= 1 && IS_DIR_SEPARATOR_P(wpath[0]) && !dir_drive && !(wdir_len >= 2 && IS_DIR_UNC_P(wdir))) { /* ignore dir since path has root slash and dir doesn't have drive or UNC root */ ignore_dir = 1; wdir_len = 0; } // wprintf(L"wpath_len: %i\n", wpath_len); // wprintf(L"wdir_len: %i\n", wdir_len); // wprintf(L"whome_len: %i\n", whome_len); buffer_len = wpath_len + 1 + wdir_len + 1 + whome_len + 1; // wprintf(L"buffer_len: %i\n", buffer_len + 1); buffer = buffer_pos = (wchar_t *)malloc((buffer_len + 1) * sizeof(wchar_t)); /* add home */ if (whome_len) { // wprintf(L"Copying whome...\n"); wcsncpy(buffer_pos, whome, whome_len); buffer_pos += whome_len; } /* Add separator if required */ if (whome_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) { // wprintf(L"Adding separator after whome\n"); buffer_pos[0] = L'\\'; buffer_pos++; } if (wdir_len) { /* tainted if dir is used and dir is tainted */ if (!tainted && OBJ_TAINTED(dir)) tainted = 1; // wprintf(L"Copying wdir...\n"); wcsncpy(buffer_pos, wdir, wdir_len); buffer_pos += wdir_len; } /* add separator if required */ if (wdir_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) { // wprintf(L"Adding separator after wdir\n"); buffer_pos[0] = L'\\'; buffer_pos++; } /* now deal with path */ if (wpath_len) { // wprintf(L"Copying wpath...\n"); wcsncpy(buffer_pos, wpath_pos, wpath_len); buffer_pos += wpath_len; } /* GetFullPathNameW requires at least "." to determine current directory */ if (wpath_len == 0) { // wprintf(L"Adding '.' to buffer\n"); buffer_pos[0] = L'.'; buffer_pos++; } /* Ensure buffer is NULL terminated */ buffer_pos[0] = L'\0'; /* tainted if path is relative */ if (!tainted && PathIsRelativeW(buffer) && !(buffer_len >= 2 && IS_DIR_UNC_P(buffer))) { tainted = 1; } // wprintf(L"buffer: '%s'\n", buffer); // FIXME: Make this more robust // Determine require buffer size size = GetFullPathNameW(buffer, PATH_BUFFER_SIZE, wfullpath_buffer, NULL); if (size) { if (size > PATH_BUFFER_SIZE) { // allocate enough memory to contain the response wfullpath = (wchar_t *)malloc(size * sizeof(wchar_t)); size = GetFullPathNameW(buffer, size, wfullpath, NULL); } else { wfullpath = wfullpath_buffer; } // wprintf(L"wfullpath: '%s'\n", wfullpath); /* Calculate the new size and leave the garbage out */ // size = wcslen(wfullpath); /* Remove any trailing slashes */ if (IS_DIR_SEPARATOR_P(wfullpath[size - 1]) && wfullpath[size - 2] != L':' && !(size == 2 && IS_DIR_UNC_P(wfullpath))) { // wprintf(L"Removing trailing slash\n"); size -= 1; wfullpath[size] = L'\0'; } // wprintf(L"wfullpath: '%s'\n", wfullpath); /* Remove any trailing dot */ if (wfullpath[size - 1] == L'.') { // wprintf(L"Removing trailing dot\n"); size -= 1; wfullpath[size] = L'\0'; } /* removes trailing invalid ':$DATA' */ size = fenix_remove_invalid_alternative_data(wfullpath, size); // sanitize backslashes with forwardslashes fenix_replace_wchar(wfullpath, L'\\', L'/'); // wprintf(L"wfullpath: '%s'\n", wfullpath); // What CodePage should we use? // cp = AreFileApisANSI() ? CP_ACP : CP_OEMCP; // convert to char * size = WideCharToMultiByte(cp, 0, wfullpath, -1, NULL, 0, NULL, NULL); fullpath = (char *)malloc(size * sizeof(char)); WideCharToMultiByte(cp, 0, wfullpath, -1, fullpath, size, NULL, NULL); /* convert to VALUE and set the path encoding */ result = rb_enc_str_new(fullpath, size - 1, path_encoding); /* makes the result object tainted if expanding tainted strings or returning modified path */ if (tainted) OBJ_TAINT(result); } // TODO: better cleanup if (buffer) free(buffer); if (wpath) free(wpath); if (wdir) free(wdir); if (whome) free(whome); if (wfullpath && wfullpath != wfullpath_buffer) free(wfullpath); if (fullpath) free(fullpath); return result; }
VALUE rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_name, VALUE result) { size_t size = 0, whome_len = 0; size_t buffer_len = 0; long wpath_len = 0, wdir_len = 0; char *fullpath = NULL; wchar_t *wfullpath = NULL, *wpath = NULL, *wpath_pos = NULL; wchar_t *wdir = NULL, *wdir_pos = NULL; wchar_t *whome = NULL, *buffer = NULL, *buffer_pos = NULL; UINT path_cp, cp; VALUE path = fname, dir = dname; wchar_t wfullpath_buffer[PATH_BUFFER_SIZE]; wchar_t path_drive = L'\0', dir_drive = L'\0'; int ignore_dir = 0; rb_encoding *path_encoding; int tainted = 0; /* tainted if path is tainted */ tainted = OBJ_TAINTED(path); /* get path encoding */ if (NIL_P(dir)) { path_encoding = rb_enc_get(path); } else { path_encoding = rb_enc_check(path, dir); } cp = path_cp = code_page(path_encoding); /* workaround invalid codepage */ if (path_cp == INVALID_CODE_PAGE) { cp = CP_UTF8; if (!NIL_P(path)) { path = fix_string_encoding(path, path_encoding); } } /* convert char * to wchar_t */ if (!NIL_P(path)) { wpath = mbstr_to_wstr(cp, RSTRING_PTR(path), (int)RSTRING_LEN(path), &wpath_len); wpath_pos = wpath; } /* determine if we need the user's home directory */ /* expand '~' only if NOT rb_file_absolute_path() where `abs_mode` is 1 */ if (abs_mode == 0 && wpath_len > 0 && wpath_pos[0] == L'~' && (wpath_len == 1 || IS_DIR_SEPARATOR_P(wpath_pos[1]))) { /* tainted if expanding '~' */ tainted = 1; whome = home_dir(); if (whome == NULL) { xfree(wpath); rb_raise(rb_eArgError, "couldn't find HOME environment -- expanding `~'"); } whome_len = wcslen(whome); if (PathIsRelativeW(whome) && !(whome_len >= 2 && IS_DIR_UNC_P(whome))) { xfree(wpath); xfree(whome); rb_raise(rb_eArgError, "non-absolute home"); } if (path_cp == INVALID_CODE_PAGE || rb_enc_str_asciionly_p(path)) { /* use filesystem encoding if expanding home dir */ path_encoding = rb_filesystem_encoding(); cp = path_cp = system_code_page(); } /* ignores dir since we are expanding home */ ignore_dir = 1; /* exclude ~ from the result */ wpath_pos++; wpath_len--; /* exclude separator if present */ if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) { wpath_pos++; wpath_len--; } } else if (wpath_len >= 2 && wpath_pos[1] == L':') { if (wpath_len >= 3 && IS_DIR_SEPARATOR_P(wpath_pos[2])) { /* ignore dir since path contains a drive letter and a root slash */ ignore_dir = 1; } else { /* determine if we ignore dir or not later */ path_drive = wpath_pos[0]; wpath_pos += 2; wpath_len -= 2; } } else if (abs_mode == 0 && wpath_len >= 2 && wpath_pos[0] == L'~') { result = rb_str_new_cstr("can't find user "); result = append_wstr(result, wpath_pos + 1, user_length_in_path(wpath_pos + 1, wpath_len - 1), cp, path_cp, path_encoding); if (wpath) xfree(wpath); rb_exc_raise(rb_exc_new_str(rb_eArgError, result)); } /* convert dir */ if (!ignore_dir && !NIL_P(dir)) { /* fix string encoding */ if (path_cp == INVALID_CODE_PAGE) { dir = fix_string_encoding(dir, path_encoding); } /* convert char * to wchar_t */ if (!NIL_P(dir)) { wdir = mbstr_to_wstr(cp, RSTRING_PTR(dir), (int)RSTRING_LEN(dir), &wdir_len); wdir_pos = wdir; } if (abs_mode == 0 && wdir_len > 0 && wdir_pos[0] == L'~' && (wdir_len == 1 || IS_DIR_SEPARATOR_P(wdir_pos[1]))) { /* tainted if expanding '~' */ tainted = 1; whome = home_dir(); if (whome == NULL) { free(wpath); free(wdir); rb_raise(rb_eArgError, "couldn't find HOME environment -- expanding `~'"); } whome_len = wcslen(whome); if (PathIsRelativeW(whome) && !(whome_len >= 2 && IS_DIR_UNC_P(whome))) { free(wpath); free(wdir); xfree(whome); rb_raise(rb_eArgError, "non-absolute home"); } /* exclude ~ from the result */ wdir_pos++; wdir_len--; /* exclude separator if present */ if (wdir_len && IS_DIR_SEPARATOR_P(wdir_pos[0])) { wdir_pos++; wdir_len--; } } else if (wdir_len >= 2 && wdir[1] == L':') { dir_drive = wdir[0]; if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) { wdir_len = 2; } } else if (wdir_len >= 2 && IS_DIR_UNC_P(wdir)) { /* UNC path */ if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) { /* cut the UNC path tail to '//host/share' */ long separators = 0; long pos = 2; while (pos < wdir_len && separators < 2) { if (IS_DIR_SEPARATOR_P(wdir[pos])) { separators++; } pos++; } if (separators == 2) wdir_len = pos - 1; } } else if (abs_mode == 0 && wdir_len >= 2 && wdir_pos[0] == L'~') { result = rb_str_new_cstr("can't find user "); result = append_wstr(result, wdir_pos + 1, user_length_in_path(wdir_pos + 1, wdir_len - 1), cp, path_cp, path_encoding); if (wpath) free(wpath); if (wdir) free(wdir); rb_exc_raise(rb_exc_new_str(rb_eArgError, result)); } } /* determine if we ignore dir or not */ if (!ignore_dir && path_drive && dir_drive) { if (towupper(path_drive) != towupper(dir_drive)) { /* ignore dir since path drive is different from dir drive */ ignore_dir = 1; wdir_len = 0; dir_drive = 0; } } if (!ignore_dir && wpath_len >= 2 && IS_DIR_UNC_P(wpath)) { /* ignore dir since path has UNC root */ ignore_dir = 1; wdir_len = 0; } else if (!ignore_dir && wpath_len >= 1 && IS_DIR_SEPARATOR_P(wpath[0]) && !dir_drive && !(wdir_len >= 2 && IS_DIR_UNC_P(wdir))) { /* ignore dir since path has root slash and dir doesn't have drive or UNC root */ ignore_dir = 1; wdir_len = 0; } buffer_len = wpath_len + 1 + wdir_len + 1 + whome_len + 1; buffer = buffer_pos = (wchar_t *)xmalloc((buffer_len + 1) * sizeof(wchar_t)); /* add home */ if (whome_len) { wcsncpy(buffer_pos, whome, whome_len); buffer_pos += whome_len; } /* Add separator if required */ if (whome_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) { buffer_pos[0] = L'\\'; buffer_pos++; } else if (!dir_drive && path_drive) { *buffer_pos++ = path_drive; *buffer_pos++ = L':'; } if (wdir_len) { /* tainted if dir is used and dir is tainted */ if (!tainted && OBJ_TAINTED(dir)) tainted = 1; wcsncpy(buffer_pos, wdir_pos, wdir_len); buffer_pos += wdir_len; } /* add separator if required */ if (wdir_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) { buffer_pos[0] = L'\\'; buffer_pos++; } /* now deal with path */ if (wpath_len) { wcsncpy(buffer_pos, wpath_pos, wpath_len); buffer_pos += wpath_len; } /* GetFullPathNameW requires at least "." to determine current directory */ if (wpath_len == 0) { buffer_pos[0] = L'.'; buffer_pos++; } /* Ensure buffer is NULL terminated */ buffer_pos[0] = L'\0'; /* tainted if path is relative */ if (!tainted && PathIsRelativeW(buffer) && !(buffer_len >= 2 && IS_DIR_UNC_P(buffer))) tainted = 1; /* FIXME: Make this more robust */ /* Determine require buffer size */ size = GetFullPathNameW(buffer, PATH_BUFFER_SIZE, wfullpath_buffer, NULL); if (size > PATH_BUFFER_SIZE) { /* allocate more memory than alloted originally by PATH_BUFFER_SIZE */ wfullpath = (wchar_t *)xmalloc(size * sizeof(wchar_t)); size = GetFullPathNameW(buffer, size, wfullpath, NULL); } else { wfullpath = wfullpath_buffer; } /* Remove any trailing slashes */ if (IS_DIR_SEPARATOR_P(wfullpath[size - 1]) && wfullpath[size - 2] != L':' && !(size == 2 && IS_DIR_UNC_P(wfullpath))) { size -= 1; wfullpath[size] = L'\0'; } /* Remove any trailing dot */ if (wfullpath[size - 1] == L'.') { size -= 1; wfullpath[size] = L'\0'; } /* removes trailing invalid ':$DATA' */ size = remove_invalid_alternative_data(wfullpath, size); /* Replace the trailing path to long name */ if (long_name) size = replace_to_long_name(&wfullpath, size, (wfullpath != wfullpath_buffer)); /* sanitize backslashes with forwardslashes */ replace_wchar(wfullpath, L'\\', L'/'); /* convert to VALUE and set the path encoding */ rb_str_set_len(result, 0); result = append_wstr(result, wfullpath, size, cp, path_cp, path_encoding); /* makes the result object tainted if expanding tainted strings or returning modified path */ if (tainted) OBJ_TAINT(result); /* TODO: better cleanup */ if (buffer) xfree(buffer); if (wpath) free(wpath); if (wdir) free(wdir); if (whome) xfree(whome); if (wfullpath != wfullpath_buffer) xfree(wfullpath); if (fullpath) xfree(fullpath); rb_enc_associate(result, path_encoding); return result; }
/* * call-seq: * Dir.new( string ) -> aDir * * Returns a new directory object for the named directory. */ static VALUE dir_initialize(int argc, VALUE *argv, VALUE dir) { struct dir_data *dp; static rb_encoding *fs_encoding; rb_encoding *intencoding, *extencoding; VALUE dirname, opt; static VALUE sym_intenc, sym_extenc; if (!sym_intenc) { sym_intenc = ID2SYM(rb_intern("internal_encoding")); sym_extenc = ID2SYM(rb_intern("external_encoding")); fs_encoding = rb_filesystem_encoding(); } intencoding = NULL; extencoding = fs_encoding; rb_scan_args(argc, argv, "11", &dirname, &opt); if (!NIL_P(opt)) { VALUE v, extenc=Qnil, intenc=Qnil; opt = rb_check_convert_type(opt, T_HASH, "Hash", "to_hash"); v = rb_hash_aref(opt, sym_intenc); if (!NIL_P(v)) intenc = v; v = rb_hash_aref(opt, sym_extenc); if (!NIL_P(v)) extenc = v; if (!NIL_P(extenc)) { extencoding = rb_to_encoding(extenc); if (!NIL_P(intenc)) { intencoding = rb_to_encoding(intenc); if (extencoding == intencoding) { rb_warn("Ignoring internal encoding '%s': it is identical to external encoding '%s'", RSTRING_PTR(rb_inspect(intenc)), RSTRING_PTR(rb_inspect(extenc))); intencoding = NULL; } } } else if (!NIL_P(intenc)) { rb_raise(rb_eArgError, "External encoding must be specified when internal encoding is given"); } } { rb_encoding *dirname_encoding = rb_enc_get(dirname); if (rb_usascii_encoding() != dirname_encoding && rb_ascii8bit_encoding() != dirname_encoding #if defined __APPLE__ && rb_utf8_encoding() != dirname_encoding #endif && extencoding != dirname_encoding) { if (!intencoding) intencoding = dirname_encoding; dirname = rb_str_transcode(dirname, rb_enc_from_encoding(extencoding)); } } FilePathValue(dirname); Data_Get_Struct(dir, struct dir_data, dp); if (dp->dir) closedir(dp->dir); if (dp->path) xfree(dp->path); dp->dir = NULL; dp->path = NULL; dp->intenc = intencoding; dp->extenc = extencoding; dp->dir = opendir(RSTRING_PTR(dirname)); if (dp->dir == NULL) { if (errno == EMFILE || errno == ENFILE) { rb_gc(); dp->dir = opendir(RSTRING_PTR(dirname)); } if (dp->dir == NULL) { rb_sys_fail(RSTRING_PTR(dirname)); } } dp->path = strdup(RSTRING_PTR(dirname)); return dir; }
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt) { rb_encoding *enc; const char *p, *end; char *buf; long blen, bsiz; VALUE result; long scanned = 0; int coderange = ENC_CODERANGE_7BIT; int width, prec, flags = FNONE; int nextarg = 1; int posarg = 0; int tainted = 0; VALUE nextvalue; VALUE tmp; VALUE str; volatile VALUE hash = Qundef; #define CHECK_FOR_WIDTH(f) \ if ((f) & FWIDTH) { \ rb_raise(rb_eArgError, "width given twice"); \ } \ if ((f) & FPREC0) { \ rb_raise(rb_eArgError, "width after precision"); \ } #define CHECK_FOR_FLAGS(f) \ if ((f) & FWIDTH) { \ rb_raise(rb_eArgError, "flag after width"); \ } \ if ((f) & FPREC0) { \ rb_raise(rb_eArgError, "flag after precision"); \ } ++argc; --argv; if (OBJ_TAINTED(fmt)) tainted = 1; StringValue(fmt); enc = rb_enc_get(fmt); fmt = rb_str_new4(fmt); p = RSTRING_PTR(fmt); end = p + RSTRING_LEN(fmt); blen = 0; bsiz = 120; result = rb_str_buf_new(bsiz); rb_enc_copy(result, fmt); buf = RSTRING_PTR(result); memset(buf, 0, bsiz); ENC_CODERANGE_SET(result, coderange); for (; p < end; p++) { const char *t; int n; ID id = 0; for (t = p; t < end && *t != '%'; t++) ; PUSH(p, t - p); if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) { scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &coderange); ENC_CODERANGE_SET(result, coderange); } if (t >= end) { /* end of fmt string */ goto sprint_exit; } p = t + 1; /* skip `%' */ width = prec = -1; nextvalue = Qundef; retry: switch (*p) { default: if (rb_enc_isprint(*p, enc)) rb_raise(rb_eArgError, "malformed format string - %%%c", *p); else rb_raise(rb_eArgError, "malformed format string"); break; case ' ': CHECK_FOR_FLAGS(flags); flags |= FSPACE; p++; goto retry; case '#': CHECK_FOR_FLAGS(flags); flags |= FSHARP; p++; goto retry; case '+': CHECK_FOR_FLAGS(flags); flags |= FPLUS; p++; goto retry; case '-': CHECK_FOR_FLAGS(flags); flags |= FMINUS; p++; goto retry; case '0': CHECK_FOR_FLAGS(flags); flags |= FZERO; p++; goto retry; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': n = 0; GETNUM(n, width); if (*p == '$') { if (nextvalue != Qundef) { rb_raise(rb_eArgError, "value given twice - %d$", n); } nextvalue = GETPOSARG(n); p++; goto retry; } CHECK_FOR_WIDTH(flags); width = n; flags |= FWIDTH; goto retry; case '<': case '{': { const char *start = p; char term = (*p == '<') ? '>' : '}'; int len; for (; p < end && *p != term; ) { p += rb_enc_mbclen(p, end, enc); } if (p >= end) { rb_raise(rb_eArgError, "malformed name - unmatched parenthesis"); } #if SIZEOF_INT < SIZEOF_SIZE_T if ((size_t)(p - start) >= INT_MAX) { const int message_limit = 20; len = (int)(rb_enc_right_char_head(start, start + message_limit, p, enc) - start); rb_enc_raise(enc, rb_eArgError, "too long name (%"PRIdSIZE" bytes) - %.*s...%c", (size_t)(p - start - 2), len, start, term); } #endif len = (int)(p - start + 1); /* including parenthesis */ if (id) { rb_enc_raise(enc, rb_eArgError, "named%.*s after <%s>", len, start, rb_id2name(id)); } nextvalue = GETNAMEARG((id = rb_check_id_cstr(start + 1, len - 2 /* without parenthesis */, enc), ID2SYM(id)), start, len, enc); if (nextvalue == Qundef) { rb_enc_raise(enc, rb_eKeyError, "key%.*s not found", len, start); } if (term == '}') goto format_s; p++; goto retry; } case '*': CHECK_FOR_WIDTH(flags); flags |= FWIDTH; GETASTER(width); if (width < 0) { flags |= FMINUS; width = -width; } p++; goto retry; case '.': if (flags & FPREC0) { rb_raise(rb_eArgError, "precision given twice"); } flags |= FPREC|FPREC0; prec = 0; p++; if (*p == '*') { GETASTER(prec); if (prec < 0) { /* ignore negative precision */ flags &= ~FPREC; } p++; goto retry; } GETNUM(prec, precision); goto retry; case '\n': case '\0': p--; case '%': if (flags != FNONE) { rb_raise(rb_eArgError, "invalid format character - %%"); } PUSH("%", 1); break; case 'c': { VALUE val = GETARG(); VALUE tmp; unsigned int c; int n; tmp = rb_check_string_type(val); if (!NIL_P(tmp)) { if (rb_enc_strlen(RSTRING_PTR(tmp),RSTRING_END(tmp),enc) != 1) { rb_raise(rb_eArgError, "%%c requires a character"); } c = rb_enc_codepoint_len(RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc); RB_GC_GUARD(tmp); } else { c = NUM2INT(val); n = rb_enc_codelen(c, enc); } if (n <= 0) { rb_raise(rb_eArgError, "invalid character"); } if (!(flags & FWIDTH)) { CHECK(n); rb_enc_mbcput(c, &buf[blen], enc); blen += n; } else if ((flags & FMINUS)) { CHECK(n); rb_enc_mbcput(c, &buf[blen], enc); blen += n; FILL(' ', width-1); } else { FILL(' ', width-1); CHECK(n); rb_enc_mbcput(c, &buf[blen], enc); blen += n; } } break; case 's': case 'p': format_s: { VALUE arg = GETARG(); long len, slen; if (*p == 'p') arg = rb_inspect(arg); str = rb_obj_as_string(arg); if (OBJ_TAINTED(str)) tainted = 1; len = RSTRING_LEN(str); rb_str_set_len(result, blen); if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) { int cr = coderange; scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &cr); ENC_CODERANGE_SET(result, (cr == ENC_CODERANGE_UNKNOWN ? ENC_CODERANGE_BROKEN : (coderange = cr))); } enc = rb_enc_check(result, str); if (flags&(FPREC|FWIDTH)) { slen = rb_enc_strlen(RSTRING_PTR(str),RSTRING_END(str),enc); if (slen < 0) { rb_raise(rb_eArgError, "invalid mbstring sequence"); } if ((flags&FPREC) && (prec < slen)) { char *p = rb_enc_nth(RSTRING_PTR(str), RSTRING_END(str), prec, enc); slen = prec; len = p - RSTRING_PTR(str); } /* need to adjust multi-byte string pos */ if ((flags&FWIDTH) && (width > slen)) { width -= (int)slen; if (!(flags&FMINUS)) { CHECK(width); while (width--) { buf[blen++] = ' '; } } CHECK(len); memcpy(&buf[blen], RSTRING_PTR(str), len); RB_GC_GUARD(str); blen += len; if (flags&FMINUS) { CHECK(width); while (width--) { buf[blen++] = ' '; } } rb_enc_associate(result, enc); break; } } PUSH(RSTRING_PTR(str), len); RB_GC_GUARD(str); rb_enc_associate(result, enc); } break; case 'd': case 'i': case 'o': case 'x': case 'X': case 'b': case 'B': case 'u': { volatile VALUE val = GETARG(); char fbuf[32], nbuf[64], *s; const char *prefix = 0; int sign = 0, dots = 0; char sc = 0; long v = 0; int base, bignum = 0; int len; switch (*p) { case 'd': case 'i': case 'u': sign = 1; break; case 'o': case 'x': case 'X': case 'b': case 'B': if (flags&(FPLUS|FSPACE)) sign = 1; break; } if (flags & FSHARP) { switch (*p) { case 'o': prefix = "0"; break; case 'x': prefix = "0x"; break; case 'X': prefix = "0X"; break; case 'b': prefix = "0b"; break; case 'B': prefix = "0B"; break; } } bin_retry: switch (TYPE(val)) { case T_FLOAT: if (FIXABLE(RFLOAT_VALUE(val))) { val = LONG2FIX((long)RFLOAT_VALUE(val)); goto bin_retry; } val = rb_dbl2big(RFLOAT_VALUE(val)); if (FIXNUM_P(val)) goto bin_retry; bignum = 1; break; case T_STRING: val = rb_str_to_inum(val, 0, TRUE); goto bin_retry; case T_BIGNUM: bignum = 1; break; case T_FIXNUM: v = FIX2LONG(val); break; default: val = rb_Integer(val); goto bin_retry; } switch (*p) { case 'o': base = 8; break; case 'x': case 'X': base = 16; break; case 'b': case 'B': base = 2; break; case 'u': case 'd': case 'i': default: base = 10; break; } if (!bignum) { if (base == 2) { val = rb_int2big(v); goto bin_retry; } if (sign) { char c = *p; if (c == 'i') c = 'd'; /* %d and %i are identical */ if (v < 0) { v = -v; sc = '-'; width--; } else if (flags & FPLUS) { sc = '+'; width--; } else if (flags & FSPACE) { sc = ' '; width--; } snprintf(fbuf, sizeof(fbuf), "%%l%c", c); snprintf(nbuf, sizeof(nbuf), fbuf, v); s = nbuf; } else { s = nbuf; if (v < 0) { dots = 1; } snprintf(fbuf, sizeof(fbuf), "%%l%c", *p == 'X' ? 'x' : *p); snprintf(++s, sizeof(nbuf) - 1, fbuf, v); if (v < 0) { char d = 0; s = remove_sign_bits(s, base); switch (base) { case 16: d = 'f'; break; case 8: d = '7'; break; } if (d && *s != d) { *--s = d; } } } len = (int)strlen(s); } else { if (sign) { tmp = rb_big2str(val, base); s = RSTRING_PTR(tmp); if (s[0] == '-') { s++; sc = '-'; width--; } else if (flags & FPLUS) { sc = '+'; width--; } else if (flags & FSPACE) { sc = ' '; width--; } } else { if (!RBIGNUM_SIGN(val)) { val = rb_big_clone(val); rb_big_2comp(val); } tmp = rb_big2str0(val, base, RBIGNUM_SIGN(val)); s = RSTRING_PTR(tmp); if (*s == '-') { dots = 1; if (base == 10) { rb_warning("negative number for %%u specifier"); } s = remove_sign_bits(++s, base); switch (base) { case 16: if (s[0] != 'f') *--s = 'f'; break; case 8: if (s[0] != '7') *--s = '7'; break; case 2: if (s[0] != '1') *--s = '1'; break; } } } len = rb_long2int(RSTRING_END(tmp) - s); } if (dots) { prec -= 2; width -= 2; } if (*p == 'X') { char *pp = s; int c; while ((c = (int)(unsigned char)*pp) != 0) { *pp = rb_enc_toupper(c, enc); pp++; } } if (prefix && !prefix[1]) { /* octal */ if (dots) { prefix = 0; } else if (len == 1 && *s == '0') { len = 0; if (flags & FPREC) prec--; } else if ((flags & FPREC) && (prec > len)) { prefix = 0; } } else if (len == 1 && *s == '0') { prefix = 0; } if (prefix) { width -= (int)strlen(prefix); } if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) { prec = width; width = 0; } else { if (prec < len) { if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0; prec = len; } width -= prec; } if (!(flags&FMINUS)) { CHECK(width); while (width-- > 0) { buf[blen++] = ' '; } } if (sc) PUSH(&sc, 1); if (prefix) { int plen = (int)strlen(prefix); PUSH(prefix, plen); } CHECK(prec - len); if (dots) PUSH("..", 2); if (!bignum && v < 0) { char c = sign_bits(base, p); while (len < prec--) { buf[blen++] = c; } } else if ((flags & (FMINUS|FPREC)) != FMINUS) { char c; if (!sign && bignum && !RBIGNUM_SIGN(val)) c = sign_bits(base, p); else c = '0'; while (len < prec--) { buf[blen++] = c; } } PUSH(s, len); RB_GC_GUARD(tmp); CHECK(width); while (width-- > 0) { buf[blen++] = ' '; } } break; case 'f': case 'g': case 'G': case 'e': case 'E': case 'a': case 'A': { VALUE val = GETARG(); double fval; int i, need = 6; char fbuf[32]; fval = RFLOAT_VALUE(rb_Float(val)); if (isnan(fval) || isinf(fval)) { const char *expr; if (isnan(fval)) { expr = "NaN"; } else { expr = "Inf"; } need = (int)strlen(expr); if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS)) need++; if ((flags & FWIDTH) && need < width) need = width; CHECK(need + 1); snprintf(&buf[blen], need + 1, "%*s", need, ""); if (flags & FMINUS) { if (!isnan(fval) && fval < 0.0) buf[blen++] = '-'; else if (flags & FPLUS) buf[blen++] = '+'; else if (flags & FSPACE) blen++; memcpy(&buf[blen], expr, strlen(expr)); } else { if (!isnan(fval) && fval < 0.0) buf[blen + need - strlen(expr) - 1] = '-'; else if (flags & FPLUS) buf[blen + need - strlen(expr) - 1] = '+'; else if ((flags & FSPACE) && need > width) blen++; memcpy(&buf[blen + need - strlen(expr)], expr, strlen(expr)); } blen += strlen(&buf[blen]); break; } fmt_setup(fbuf, sizeof(fbuf), *p, flags, width, prec); need = 0; if (*p != 'e' && *p != 'E') { i = INT_MIN; frexp(fval, &i); if (i > 0) need = BIT_DIGITS(i); } need += (flags&FPREC) ? prec : 6; if ((flags&FWIDTH) && need < width) need = width; need += 20; CHECK(need); snprintf(&buf[blen], need, fbuf, fval); blen += strlen(&buf[blen]); } break; } flags = FNONE; } sprint_exit: RB_GC_GUARD(fmt); /* XXX - We cannot validate the number of arguments if (digit)$ style used. */ if (posarg >= 0 && nextarg < argc) { const char *mesg = "too many arguments for format string"; if (RTEST(ruby_debug)) rb_raise(rb_eArgError, "%s", mesg); if (RTEST(ruby_verbose)) rb_warn("%s", mesg); } rb_str_resize(result, blen); if (tainted) OBJ_TAINT(result); return result; }
/** * filter_html **/ static VALUE t_filter_html(VALUE self, VALUE str) { node root, now, ret; bool in_tag; char *text; const char* inner_tag; long i, head_i, tail_i, copy_head_i, total_len; VALUE change_str, url_base, word; rb_encoding *enc; change_str = rb_str_new2(EMPTY_STRING); enc = rb_enc_get(str); text = StringValuePtr(str); Data_Get_Struct(self, struct _node, root); url_base = rb_iv_get(self, LINK_URL_VARIABLE); if (url_base == Qnil) { url_base = rb_str_new2(DEAULT_LINK_URL); } now = root; total_len = strlen(text); head_i = -1; tail_i = -1; copy_head_i = 0; in_tag = false; inner_tag = NULL; for(i = 0; i <= total_len; i++) { if (!in_tag && text[i] == BEGIN_TAG) { in_tag = true; if (strncasecmp(&text[i + 1], A_TAG, strlen(A_TAG)) == 0) { inner_tag = A_TAG; } else if (strncasecmp(&text[i + 1], SCRIPT_TAG, strlen(SCRIPT_TAG)) == 0) { inner_tag = SCRIPT_TAG; } else if (strncasecmp(&text[i + 1], PRE_TAG, strlen(PRE_TAG)) == 0) { inner_tag = PRE_TAG; } else if (strncasecmp(&text[i + 1], IFRAME_TAG, strlen(IFRAME_TAG)) == 0) { inner_tag = IFRAME_TAG; } else if (strncasecmp(&text[i + 1], OBJECT_TAG, strlen(OBJECT_TAG)) == 0) { inner_tag = OBJECT_TAG; } continue; } if (in_tag && !inner_tag && text[i] == END_TAG) { in_tag = false; continue; } if (inner_tag && text[i] == BEGIN_TAG) { if (strncasecmp(&text[i + 2], inner_tag, strlen(inner_tag)) == 0) { inner_tag = NULL; continue; } } if (in_tag) { continue; } ret = search_child(now, text[i]); if (ret && i != total_len) { if (head_i == -1) { head_i = i; } if (ret->end_flag) { tail_i = i; } now = ret; } else { if (head_i != -1) { if (tail_i != -1) { if (copy_head_i < head_i) { rb_funcall( change_str, rb_intern("concat"), 1, add_encode(rb_str_new(&text[copy_head_i], (head_i - copy_head_i)), enc) ); } word = rb_str_new(&text[head_i], (tail_i - head_i + 1)); rb_funcall( change_str, rb_intern("concat"), 1, add_encode(rb_funcall(url_base, rb_intern("%"), 1, rb_assoc_new(word, word)), enc) ); i = tail_i; copy_head_i = tail_i + 1; tail_i = -1; } else { i = head_i; } head_i = -1; } now = root; } } if (copy_head_i == 0) { return str; } else { rb_funcall( change_str, rb_intern("concat"), 1, add_encode(rb_str_new(&text[copy_head_i], (total_len - copy_head_i)), enc) ); return change_str; } }
VALUE symbol_spec_rb_intern3_c_compare(VALUE self, VALUE string, VALUE len, VALUE enc, VALUE sym) { ID symbol = rb_intern3(RSTRING_PTR(string), FIX2LONG(len), rb_enc_get(enc)); return (SYM2ID(sym) == symbol) ? Qtrue : Qfalse; }
VALUE symbol_spec_rb_intern3(VALUE self, VALUE string, VALUE len, VALUE enc) { return ID2SYM(rb_intern3(RSTRING_PTR(string), FIX2LONG(len), rb_enc_get(enc))); }
static VALUE optimized_unescape_html(VALUE str) { enum {UNICODE_MAX = 0x10ffff}; rb_encoding *enc = rb_enc_get(str); unsigned long charlimit = (strcasecmp(rb_enc_name(enc), "UTF-8") == 0 ? UNICODE_MAX : strcasecmp(rb_enc_name(enc), "ISO-8859-1") == 0 ? 256 : 128); long i, len, beg = 0; size_t clen, plen; int overflow; const char *cstr; char buf[6]; VALUE dest = 0; len = RSTRING_LEN(str); cstr = RSTRING_PTR(str); for (i = 0; i < len; i++) { unsigned long cc; char c = cstr[i]; if (c != '&') continue; plen = i - beg; if (++i >= len) break; c = (unsigned char)cstr[i]; switch (c) { case 'a': ++i; if (len - i >= 4 && memcmp(&cstr[i], "pos;", 4) == 0) { c = '\''; i += 3; } else if (len - i >= 3 && memcmp(&cstr[i], "mp;", 3) == 0) { c = '&'; i += 2; } else continue; break; case 'q': ++i; if (len - i >= 4 && memcmp(&cstr[i], "uot;", 4) == 0) { c = '"'; i += 3; } else continue; break; case 'g': ++i; if (len - i >= 2 && memcmp(&cstr[i], "t;", 2) == 0) { c = '>'; i += 1; } else continue; break; case 'l': ++i; if (len - i >= 2 && memcmp(&cstr[i], "t;", 2) == 0) { c = '<'; i += 1; } else continue; break; case '#': if (len - ++i >= 2 && ISDIGIT(cstr[i])) { cc = ruby_scan_digits(&cstr[i], len-i, 10, &clen, &overflow); } else if ((cstr[i] == 'x' || cstr[i] == 'X') && len - ++i >= 2 && ISXDIGIT(cstr[i])) { cc = ruby_scan_digits(&cstr[i], len-i, 16, &clen, &overflow); } else continue; i += clen; if (overflow || cc >= charlimit || cstr[i] != ';') continue; if (!dest) { dest = rb_str_buf_new(len); } rb_str_cat(dest, cstr + beg, plen); if (charlimit > 256) { rb_str_cat(dest, buf, rb_enc_mbcput((OnigCodePoint)cc, buf, enc)); } else { c = (unsigned char)cc; rb_str_cat(dest, &c, 1); } beg = i + 1; continue; default: --i; continue; } if (!dest) { dest = rb_str_buf_new(len); } rb_str_cat(dest, cstr + beg, plen); rb_str_cat(dest, &c, 1); beg = i + 1; } if (dest) { rb_str_cat(dest, cstr + beg, len - beg); preserve_original_state(str, dest); return dest; } else { return rb_str_dup(str); } }
VALUE Trenni_Native_parse_markup(VALUE self, VALUE buffer, VALUE delegate, VALUE entities) { VALUE string = rb_funcall(buffer, id_read, 0); rb_encoding *encoding = rb_enc_get(string); VALUE pcdata = Qnil; VALUE empty_string = rb_obj_freeze(rb_enc_str_new("", 0, encoding)); const char *s, *p, *pe, *eof; unsigned long cs, top = 0, stack[2] = {0}; unsigned long codepoint = 0; Token identifier = {0}, cdata = {0}, characters = {0}, entity = {0}, doctype = {0}, comment = {0}, instruction = {0}; unsigned self_closing = 0, has_value = 0; s = p = RSTRING_PTR(string); eof = pe = p + RSTRING_LEN(string); #line 42 "markup.c" { cs = Trenni_markup_parser_start; top = 0; } #line 48 "markup.c" { if ( p == pe ) goto _test_eof; goto _resume; _again: switch ( cs ) { case 48: goto st48; case 49: goto st49; case 50: goto st50; case 1: goto st1; case 2: goto st2; case 0: goto st0; case 3: goto st3; case 4: goto st4; case 5: goto st5; case 51: goto st51; case 6: goto st6; case 7: goto st7; case 8: goto st8; case 9: goto st9; case 10: goto st10; case 11: goto st11; case 12: goto st12; case 13: goto st13; case 14: goto st14; case 15: goto st15; case 16: goto st16; case 17: goto st17; case 18: goto st18; case 19: goto st19; case 52: goto st52; case 20: goto st20; case 21: goto st21; case 22: goto st22; case 23: goto st23; case 24: goto st24; case 25: goto st25; case 26: goto st26; case 53: goto st53; case 27: goto st27; case 28: goto st28; case 29: goto st29; case 30: goto st30; case 31: goto st31; case 32: goto st32; case 33: goto st33; case 34: goto st34; case 35: goto st35; case 54: goto st54; case 36: goto st36; case 37: goto st37; case 55: goto st55; case 38: goto st38; case 39: goto st39; case 40: goto st40; case 41: goto st41; case 56: goto st56; case 42: goto st42; case 43: goto st43; case 44: goto st44; case 57: goto st57; case 45: goto st45; case 46: goto st46; case 47: goto st47; default: break; } if ( ++p == pe ) goto _test_eof; _resume: switch ( cs ) { st48: if ( ++p == pe ) goto _test_eof48; case 48: switch( (*p) ) { case 38: goto tr88; case 60: goto tr89; } goto tr87; tr93: #line 32 "markup.rl" { characters.begin = p; } goto st49; tr87: #line 25 "markup.rl" { } #line 18 "markup.rl" { pcdata = Qnil; } #line 32 "markup.rl" { characters.begin = p; } goto st49; tr96: #line 158 "markup.rl" { rb_funcall(delegate, id_open_tag_end, 1, self_closing == 1 ? Qtrue : Qfalse); } #line 25 "markup.rl" { } #line 18 "markup.rl" { pcdata = Qnil; } #line 32 "markup.rl" { characters.begin = p; } goto st49; tr99: #line 92 "markup.rl" { comment.end = p; rb_funcall(delegate, id_comment, 1, Trenni_token(comment, encoding)); } #line 25 "markup.rl" { } #line 18 "markup.rl" { pcdata = Qnil; } #line 32 "markup.rl" { characters.begin = p; } goto st49; tr102: #line 78 "markup.rl" { doctype.end = p; rb_funcall(delegate, id_doctype, 1, Trenni_token(doctype, encoding)); } #line 25 "markup.rl" { } #line 18 "markup.rl" { pcdata = Qnil; } #line 32 "markup.rl" { characters.begin = p; } goto st49; tr105: #line 177 "markup.rl" { cdata.end = p; rb_funcall(delegate, id_cdata, 1, Trenni_token(cdata, encoding)); } #line 25 "markup.rl" { } #line 18 "markup.rl" { pcdata = Qnil; } #line 32 "markup.rl" { characters.begin = p; } goto st49; tr108: #line 165 "markup.rl" { rb_funcall(delegate, id_close_tag, 2, Trenni_token(identifier, encoding), ULONG2NUM(identifier.begin-s)); } #line 25 "markup.rl" { } #line 18 "markup.rl" { pcdata = Qnil; } #line 32 "markup.rl" { characters.begin = p; } goto st49; tr111: #line 112 "markup.rl" { instruction.end = p; rb_funcall(delegate, id_instruction, 1, Trenni_token(instruction, encoding)); } #line 25 "markup.rl" { } #line 18 "markup.rl" { pcdata = Qnil; } #line 32 "markup.rl" { characters.begin = p; } goto st49; st49: if ( ++p == pe ) goto _test_eof49; case 49: #line 264 "markup.c" switch( (*p) ) { case 38: goto tr91; case 60: goto tr92; } goto st49; tr91: #line 36 "markup.rl" { characters.end = p; Trenni_append_token(&pcdata, encoding, characters); } #line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl" {{stack[top++] = 50; goto st42;}} goto st50; tr94: #line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl" {{stack[top++] = 50; goto st42;}} goto st50; tr88: #line 25 "markup.rl" { } #line 18 "markup.rl" { pcdata = Qnil; } #line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl" {{stack[top++] = 50; goto st42;}} goto st50; tr97: #line 158 "markup.rl" { rb_funcall(delegate, id_open_tag_end, 1, self_closing == 1 ? Qtrue : Qfalse); } #line 25 "markup.rl" { } #line 18 "markup.rl" { pcdata = Qnil; } #line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl" {{stack[top++] = 50; goto st42;}} goto st50; tr100: #line 92 "markup.rl" { comment.end = p; rb_funcall(delegate, id_comment, 1, Trenni_token(comment, encoding)); } #line 25 "markup.rl" { } #line 18 "markup.rl" { pcdata = Qnil; } #line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl" {{stack[top++] = 50; goto st42;}} goto st50; tr103: #line 78 "markup.rl" { doctype.end = p; rb_funcall(delegate, id_doctype, 1, Trenni_token(doctype, encoding)); } #line 25 "markup.rl" { } #line 18 "markup.rl" { pcdata = Qnil; } #line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl" {{stack[top++] = 50; goto st42;}} goto st50; tr106: #line 177 "markup.rl" { cdata.end = p; rb_funcall(delegate, id_cdata, 1, Trenni_token(cdata, encoding)); } #line 25 "markup.rl" { } #line 18 "markup.rl" { pcdata = Qnil; } #line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl" {{stack[top++] = 50; goto st42;}} goto st50; tr109: #line 165 "markup.rl" { rb_funcall(delegate, id_close_tag, 2, Trenni_token(identifier, encoding), ULONG2NUM(identifier.begin-s)); } #line 25 "markup.rl" { } #line 18 "markup.rl" { pcdata = Qnil; } #line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl" {{stack[top++] = 50; goto st42;}} goto st50; tr112: #line 112 "markup.rl" { instruction.end = p; rb_funcall(delegate, id_instruction, 1, Trenni_token(instruction, encoding)); } #line 25 "markup.rl" { } #line 18 "markup.rl" { pcdata = Qnil; } #line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl" {{stack[top++] = 50; goto st42;}} goto st50; st50: if ( ++p == pe ) goto _test_eof50; case 50: #line 397 "markup.c" switch( (*p) ) { case 38: goto tr94; case 60: goto tr95; } goto tr93; tr89: #line 129 "markup.rl" { } #line 162 "markup.rl" { } #line 102 "markup.rl" { instruction.begin = p; } #line 88 "markup.rl" { comment.begin = p; } #line 74 "markup.rl" { doctype.begin = p; } #line 173 "markup.rl" { cdata.begin = p; } goto st1; tr92: #line 36 "markup.rl" { characters.end = p; Trenni_append_token(&pcdata, encoding, characters); } #line 22 "markup.rl" { } #line 28 "markup.rl" { rb_funcall(delegate, id_text, 1, pcdata); } #line 129 "markup.rl" { } #line 162 "markup.rl" { } #line 102 "markup.rl" { instruction.begin = p; } #line 88 "markup.rl" { comment.begin = p; } #line 74 "markup.rl" { doctype.begin = p; } #line 173 "markup.rl" { cdata.begin = p; } goto st1; tr95: #line 22 "markup.rl" { } #line 28 "markup.rl" { rb_funcall(delegate, id_text, 1, pcdata); } #line 129 "markup.rl" { } #line 162 "markup.rl" { } #line 102 "markup.rl" { instruction.begin = p; } #line 88 "markup.rl" { comment.begin = p; } #line 74 "markup.rl" { doctype.begin = p; } #line 173 "markup.rl" { cdata.begin = p; } goto st1; tr98: #line 158 "markup.rl" { rb_funcall(delegate, id_open_tag_end, 1, self_closing == 1 ? Qtrue : Qfalse); } #line 129 "markup.rl" { } #line 162 "markup.rl" { } #line 102 "markup.rl" { instruction.begin = p; } #line 88 "markup.rl" { comment.begin = p; } #line 74 "markup.rl" { doctype.begin = p; } #line 173 "markup.rl" { cdata.begin = p; } goto st1; tr101: #line 92 "markup.rl" { comment.end = p; rb_funcall(delegate, id_comment, 1, Trenni_token(comment, encoding)); } #line 129 "markup.rl" { } #line 162 "markup.rl" { } #line 102 "markup.rl" { instruction.begin = p; } #line 88 "markup.rl" { comment.begin = p; } #line 74 "markup.rl" { doctype.begin = p; } #line 173 "markup.rl" { cdata.begin = p; } goto st1; tr104: #line 78 "markup.rl" { doctype.end = p; rb_funcall(delegate, id_doctype, 1, Trenni_token(doctype, encoding)); } #line 129 "markup.rl" { } #line 162 "markup.rl" { } #line 102 "markup.rl" { instruction.begin = p; } #line 88 "markup.rl" { comment.begin = p; } #line 74 "markup.rl" { doctype.begin = p; } #line 173 "markup.rl" { cdata.begin = p; } goto st1; tr107: #line 177 "markup.rl" { cdata.end = p; rb_funcall(delegate, id_cdata, 1, Trenni_token(cdata, encoding)); } #line 129 "markup.rl" { } #line 162 "markup.rl" { } #line 102 "markup.rl" { instruction.begin = p; } #line 88 "markup.rl" { comment.begin = p; } #line 74 "markup.rl" { doctype.begin = p; } #line 173 "markup.rl" { cdata.begin = p; } goto st1; tr110: #line 165 "markup.rl" { rb_funcall(delegate, id_close_tag, 2, Trenni_token(identifier, encoding), ULONG2NUM(identifier.begin-s)); } #line 129 "markup.rl" { } #line 162 "markup.rl" { } #line 102 "markup.rl" { instruction.begin = p; } #line 88 "markup.rl" { comment.begin = p; } #line 74 "markup.rl" { doctype.begin = p; } #line 173 "markup.rl" { cdata.begin = p; } goto st1; tr113: #line 112 "markup.rl" { instruction.end = p; rb_funcall(delegate, id_instruction, 1, Trenni_token(instruction, encoding)); } #line 129 "markup.rl" { } #line 162 "markup.rl" { } #line 102 "markup.rl" { instruction.begin = p; } #line 88 "markup.rl" { comment.begin = p; } #line 74 "markup.rl" { doctype.begin = p; } #line 173 "markup.rl" { cdata.begin = p; } goto st1; st1: if ( ++p == pe ) goto _test_eof1; case 1: #line 675 "markup.c" switch( (*p) ) { case 33: goto st15; case 47: goto st36; case 63: goto st38; case 96: goto tr1; } if ( (*p) < 59 ) { if ( 0 <= (*p) && (*p) <= 44 ) goto tr1; } else if ( (*p) > 64 ) { if ( (*p) > 94 ) { if ( 123 <= (*p) ) goto tr1; } else if ( (*p) >= 91 ) goto tr1; } else goto tr1; goto tr0; tr0: #line 10 "markup.rl" { identifier.begin = p; } goto st2; st2: if ( ++p == pe ) goto _test_eof2; case 2: #line 704 "markup.c" switch( (*p) ) { case 32: goto tr6; case 47: goto tr7; case 62: goto tr8; case 96: goto tr1; } if ( (*p) < 14 ) { if ( (*p) > 8 ) { if ( 9 <= (*p) && (*p) <= 13 ) goto tr6; } else if ( (*p) >= 0 ) goto tr1; } else if ( (*p) > 44 ) { if ( (*p) < 91 ) { if ( 59 <= (*p) && (*p) <= 64 ) goto tr1; } else if ( (*p) > 94 ) { if ( 123 <= (*p) ) goto tr1; } else goto tr1; } else goto tr1; goto st2; tr1: #line 169 "markup.rl" { Trenni_raise_error("could not parse tag", buffer, p-s); } goto st0; tr69: #line 118 "markup.rl" { Trenni_raise_error("could not parse instruction", buffer, p-s); } goto st0; tr75: #line 42 "markup.rl" { Trenni_raise_error("could not parse entity", buffer, p-s); } goto st0; #line 747 "markup.c" st0: cs = 0; goto _out; tr6: #line 14 "markup.rl" { identifier.end = p; } #line 122 "markup.rl" { // Reset self-closing state - we don't know yet. self_closing = 0; rb_funcall(delegate, id_open_tag_begin, 2, Trenni_token(identifier, encoding), ULONG2NUM(identifier.begin-s)); } goto st3; tr14: #line 14 "markup.rl" { identifier.end = p; } #line 148 "markup.rl" { if (has_value == 1) { rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), pcdata); } else if (has_value == 2) { rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), empty_string); } else { rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), Qtrue); } } goto st3; tr26: #line 140 "markup.rl" { has_value = 1; } #line 148 "markup.rl" { if (has_value == 1) { rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), pcdata); } else if (has_value == 2) { rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), empty_string); } else { rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), Qtrue); } } goto st3; tr32: #line 144 "markup.rl" { has_value = 2; } #line 148 "markup.rl" { if (has_value == 1) { rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), pcdata); } else if (has_value == 2) { rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), empty_string); } else { rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), Qtrue); } } goto st3; st3: if ( ++p == pe ) goto _test_eof3; case 3: #line 816 "markup.c" switch( (*p) ) { case 32: goto st3; case 47: goto tr11; case 62: goto st51; case 96: goto tr1; } if ( (*p) < 14 ) { if ( (*p) > 8 ) { if ( 9 <= (*p) && (*p) <= 13 ) goto st3; } else if ( (*p) >= 0 ) goto tr1; } else if ( (*p) > 44 ) { if ( (*p) < 91 ) { if ( 59 <= (*p) && (*p) <= 64 ) goto tr1; } else if ( (*p) > 94 ) { if ( 123 <= (*p) ) goto tr1; } else goto tr1; } else goto tr1; goto tr9; tr9: #line 136 "markup.rl" { has_value = 0; } #line 10 "markup.rl" { identifier.begin = p; } goto st4; st4: if ( ++p == pe ) goto _test_eof4; case 4: #line 855 "markup.c" switch( (*p) ) { case 32: goto tr14; case 47: goto tr15; case 61: goto tr16; case 62: goto tr17; case 96: goto tr1; } if ( (*p) < 14 ) { if ( (*p) > 8 ) { if ( 9 <= (*p) && (*p) <= 13 ) goto tr14; } else if ( (*p) >= 0 ) goto tr1; } else if ( (*p) > 44 ) { if ( (*p) < 91 ) { if ( 59 <= (*p) && (*p) <= 64 ) goto tr1; } else if ( (*p) > 94 ) { if ( 123 <= (*p) ) goto tr1; } else goto tr1; } else goto tr1; goto st4; tr7: #line 14 "markup.rl" { identifier.end = p; } #line 122 "markup.rl" { // Reset self-closing state - we don't know yet. self_closing = 0; rb_funcall(delegate, id_open_tag_begin, 2, Trenni_token(identifier, encoding), ULONG2NUM(identifier.begin-s)); } #line 132 "markup.rl" { self_closing = 1; } goto st5; tr11: #line 132 "markup.rl" { self_closing = 1; } goto st5; tr15: #line 14 "markup.rl" { identifier.end = p; } #line 148 "markup.rl" { if (has_value == 1) { rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), pcdata); } else if (has_value == 2) { rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), empty_string); } else { rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), Qtrue); } } #line 132 "markup.rl" { self_closing = 1; } goto st5; tr27: #line 140 "markup.rl" { has_value = 1; } #line 148 "markup.rl" { if (has_value == 1) { rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), pcdata); } else if (has_value == 2) { rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), empty_string); } else { rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), Qtrue); } } #line 132 "markup.rl" { self_closing = 1; } goto st5; tr33: #line 144 "markup.rl" { has_value = 2; } #line 148 "markup.rl" { if (has_value == 1) { rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), pcdata); } else if (has_value == 2) { rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), empty_string); } else { rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), Qtrue); } } #line 132 "markup.rl" { self_closing = 1; } goto st5; st5: if ( ++p == pe ) goto _test_eof5; case 5: #line 968 "markup.c" if ( (*p) == 62 ) goto st51; goto tr1; tr8: #line 14 "markup.rl" { identifier.end = p; } #line 122 "markup.rl" { // Reset self-closing state - we don't know yet. self_closing = 0; rb_funcall(delegate, id_open_tag_begin, 2, Trenni_token(identifier, encoding), ULONG2NUM(identifier.begin-s)); } goto st51; tr17: #line 14 "markup.rl" { identifier.end = p; } #line 148 "markup.rl" { if (has_value == 1) { rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), pcdata); } else if (has_value == 2) { rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), empty_string); } else { rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), Qtrue); } } goto st51; tr28: #line 140 "markup.rl" { has_value = 1; } #line 148 "markup.rl" { if (has_value == 1) { rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), pcdata); } else if (has_value == 2) { rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), empty_string); } else { rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), Qtrue); } } goto st51; tr34: #line 144 "markup.rl" { has_value = 2; } #line 148 "markup.rl" { if (has_value == 1) { rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), pcdata); } else if (has_value == 2) { rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), empty_string); } else { rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), Qtrue); } } goto st51; st51: if ( ++p == pe ) goto _test_eof51; case 51: #line 1037 "markup.c" switch( (*p) ) { case 38: goto tr97; case 60: goto tr98; } goto tr96; tr16: #line 14 "markup.rl" { identifier.end = p; } goto st6; st6: if ( ++p == pe ) goto _test_eof6; case 6: #line 1053 "markup.c" switch( (*p) ) { case 34: goto st7; case 39: goto st12; } goto tr1; st7: if ( ++p == pe ) goto _test_eof7; case 7: switch( (*p) ) { case 34: goto st11; case 38: goto tr22; case 60: goto tr1; } goto tr20; tr20: #line 18 "markup.rl" { pcdata = Qnil; } #line 32 "markup.rl" { characters.begin = p; } goto st8; tr29: #line 32 "markup.rl" { characters.begin = p; } goto st8; st8: if ( ++p == pe ) goto _test_eof8; case 8: #line 1089 "markup.c" switch( (*p) ) { case 34: goto tr24; case 38: goto tr25; case 60: goto tr1; } goto st8; tr24: #line 36 "markup.rl" { characters.end = p; Trenni_append_token(&pcdata, encoding, characters); } #line 22 "markup.rl" { } goto st9; tr30: #line 22 "markup.rl" { } goto st9; st9: if ( ++p == pe ) goto _test_eof9; case 9: #line 1116 "markup.c" switch( (*p) ) { case 32: goto tr26; case 47: goto tr27; case 62: goto tr28; } if ( 9 <= (*p) && (*p) <= 13 ) goto tr26; goto tr1; tr22: #line 18 "markup.rl" { pcdata = Qnil; } #line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl" {{stack[top++] = 10; goto st42;}} goto st10; tr25: #line 36 "markup.rl" { characters.end = p; Trenni_append_token(&pcdata, encoding, characters); } #line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl" {{stack[top++] = 10; goto st42;}} goto st10; tr31: #line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl" {{stack[top++] = 10; goto st42;}} goto st10; st10: if ( ++p == pe ) goto _test_eof10; case 10: #line 1151 "markup.c" switch( (*p) ) { case 34: goto tr30; case 38: goto tr31; case 60: goto tr1; } goto tr29; st11: if ( ++p == pe ) goto _test_eof11; case 11: switch( (*p) ) { case 32: goto tr32; case 47: goto tr33; case 62: goto tr34; } if ( 9 <= (*p) && (*p) <= 13 ) goto tr32; goto tr1; st12: if ( ++p == pe ) goto _test_eof12; case 12: switch( (*p) ) { case 38: goto tr36; case 39: goto st11; case 60: goto tr1; } goto tr35; tr35: #line 18 "markup.rl" { pcdata = Qnil; } #line 32 "markup.rl" { characters.begin = p; } goto st13; tr39: #line 32 "markup.rl" { characters.begin = p; } goto st13; st13: if ( ++p == pe ) goto _test_eof13; case 13: #line 1200 "markup.c" switch( (*p) ) { case 38: goto tr38; case 39: goto tr24; case 60: goto tr1; } goto st13; tr36: #line 18 "markup.rl" { pcdata = Qnil; } #line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl" {{stack[top++] = 14; goto st42;}} goto st14; tr38: #line 36 "markup.rl" { characters.end = p; Trenni_append_token(&pcdata, encoding, characters); } #line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl" {{stack[top++] = 14; goto st42;}} goto st14; tr40: #line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl" {{stack[top++] = 14; goto st42;}} goto st14; st14: if ( ++p == pe ) goto _test_eof14; case 14: #line 1233 "markup.c" switch( (*p) ) { case 38: goto tr40; case 39: goto tr30; case 60: goto tr1; } goto tr39; st15: if ( ++p == pe ) goto _test_eof15; case 15: switch( (*p) ) { case 45: goto st16; case 68: goto st20; case 91: goto st27; } goto st0; st16: if ( ++p == pe ) goto _test_eof16; case 16: if ( (*p) == 45 ) goto st17; goto st0; st17: if ( ++p == pe ) goto _test_eof17; case 17: if ( (*p) == 45 ) goto st18; goto st17; st18: if ( ++p == pe ) goto _test_eof18; case 18: if ( (*p) == 45 ) goto st19; goto st17; st19: if ( ++p == pe ) goto _test_eof19; case 19: switch( (*p) ) { case 45: goto st19; case 62: goto st52; } goto st17; st52: if ( ++p == pe ) goto _test_eof52; case 52: switch( (*p) ) { case 38: goto tr100; case 60: goto tr101; } goto tr99; st20: if ( ++p == pe ) goto _test_eof20; case 20: if ( (*p) == 79 ) goto st21; goto st0; st21: if ( ++p == pe ) goto _test_eof21; case 21: if ( (*p) == 67 ) goto st22; goto st0; st22: if ( ++p == pe ) goto _test_eof22; case 22: if ( (*p) == 84 ) goto st23; goto st0; st23: if ( ++p == pe ) goto _test_eof23; case 23: if ( (*p) == 89 ) goto st24; goto st0; st24: if ( ++p == pe ) goto _test_eof24; case 24: if ( (*p) == 80 ) goto st25; goto st0; st25: if ( ++p == pe ) goto _test_eof25; case 25: if ( (*p) == 69 ) goto st26; goto st0; st26: if ( ++p == pe ) goto _test_eof26; case 26: if ( (*p) == 62 ) goto st53; goto st26; st53: if ( ++p == pe ) goto _test_eof53; case 53: switch( (*p) ) { case 38: goto tr103; case 60: goto tr104; } goto tr102; st27: if ( ++p == pe ) goto _test_eof27; case 27: if ( (*p) == 67 ) goto st28; goto st0; st28: if ( ++p == pe ) goto _test_eof28; case 28: if ( (*p) == 68 ) goto st29; goto st0; st29: if ( ++p == pe ) goto _test_eof29; case 29: if ( (*p) == 65 ) goto st30; goto st0; st30: if ( ++p == pe ) goto _test_eof30; case 30: if ( (*p) == 84 ) goto st31; goto st0; st31: if ( ++p == pe ) goto _test_eof31; case 31: if ( (*p) == 65 ) goto st32; goto st0; st32: if ( ++p == pe ) goto _test_eof32; case 32: if ( (*p) == 91 ) goto st33; goto st0; st33: if ( ++p == pe ) goto _test_eof33; case 33: if ( (*p) == 93 ) goto st34; goto st33; st34: if ( ++p == pe ) goto _test_eof34; case 34: if ( (*p) == 93 ) goto st35; goto st33; st35: if ( ++p == pe ) goto _test_eof35; case 35: switch( (*p) ) { case 62: goto st54; case 93: goto st35; } goto st33; st54: if ( ++p == pe ) goto _test_eof54; case 54: switch( (*p) ) { case 38: goto tr106; case 60: goto tr107; } goto tr105; st36: if ( ++p == pe ) goto _test_eof36; case 36: switch( (*p) ) { case 47: goto tr1; case 96: goto tr1; } if ( (*p) < 59 ) { if ( 0 <= (*p) && (*p) <= 44 ) goto tr1; } else if ( (*p) > 64 ) { if ( (*p) > 94 ) { if ( 123 <= (*p) ) goto tr1; } else if ( (*p) >= 91 ) goto tr1; } else goto tr1; goto tr65; tr65: #line 10 "markup.rl" { identifier.begin = p; } goto st37; st37: if ( ++p == pe ) goto _test_eof37; case 37: #line 1451 "markup.c" switch( (*p) ) { case 47: goto tr1; case 62: goto tr67; case 96: goto tr1; } if ( (*p) < 59 ) { if ( 0 <= (*p) && (*p) <= 44 ) goto tr1; } else if ( (*p) > 64 ) { if ( (*p) > 94 ) { if ( 123 <= (*p) ) goto tr1; } else if ( (*p) >= 91 ) goto tr1; } else goto tr1; goto st37; tr67: #line 14 "markup.rl" { identifier.end = p; } goto st55; st55: if ( ++p == pe ) goto _test_eof55; case 55: #line 1479 "markup.c" switch( (*p) ) { case 38: goto tr109; case 60: goto tr110; } goto tr108; st38: if ( ++p == pe ) goto _test_eof38; case 38: switch( (*p) ) { case 47: goto tr69; case 96: goto tr69; } if ( (*p) < 59 ) { if ( 0 <= (*p) && (*p) <= 44 ) goto tr69; } else if ( (*p) > 64 ) { if ( (*p) > 94 ) { if ( 123 <= (*p) ) goto tr69; } else if ( (*p) >= 91 ) goto tr69; } else goto tr69; goto tr68; tr68: #line 10 "markup.rl" { identifier.begin = p; } goto st39; st39: if ( ++p == pe ) goto _test_eof39; case 39: #line 1515 "markup.c" switch( (*p) ) { case 32: goto tr71; case 47: goto tr69; case 96: goto tr69; } if ( (*p) < 14 ) { if ( (*p) > 8 ) { if ( 9 <= (*p) && (*p) <= 13 ) goto tr71; } else if ( (*p) >= 0 ) goto tr69; } else if ( (*p) > 44 ) { if ( (*p) < 91 ) { if ( 59 <= (*p) && (*p) <= 64 ) goto tr69; } else if ( (*p) > 94 ) { if ( 123 <= (*p) ) goto tr69; } else goto tr69; } else goto tr69; goto st39; tr71: #line 14 "markup.rl" { identifier.end = p; } #line 106 "markup.rl" { } goto st40; st40: if ( ++p == pe ) goto _test_eof40; case 40: #line 1552 "markup.c" if ( (*p) == 63 ) goto tr73; goto st40; tr73: #line 109 "markup.rl" { } goto st41; st41: if ( ++p == pe ) goto _test_eof41; case 41: #line 1565 "markup.c" switch( (*p) ) { case 62: goto st56; case 63: goto tr73; } goto st40; st56: if ( ++p == pe ) goto _test_eof56; case 56: switch( (*p) ) { case 38: goto tr112; case 60: goto tr113; } goto tr111; st42: if ( ++p == pe ) goto _test_eof42; case 42: if ( (*p) == 35 ) goto st43; if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr77; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto tr77; } else goto tr77; goto tr75; st43: if ( ++p == pe ) goto _test_eof43; case 43: if ( (*p) == 120 ) goto st45; if ( 48 <= (*p) && (*p) <= 57 ) goto tr78; goto tr75; tr78: #line 46 "markup.rl" { entity.begin = p; } goto st44; st44: if ( ++p == pe ) goto _test_eof44; case 44: #line 1614 "markup.c" if ( (*p) == 59 ) goto tr81; if ( 48 <= (*p) && (*p) <= 57 ) goto st44; goto tr75; tr81: #line 66 "markup.rl" { entity.end = p; codepoint = strtoul(entity.begin, (char **)&entity.end, 10); Trenni_append_codepoint(&pcdata, encoding, codepoint); } #line 8 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl" {{cs = stack[--top];goto _again;}} goto st57; tr84: #line 58 "markup.rl" { entity.end = p; codepoint = strtoul(entity.begin, (char **)&entity.end, 16); Trenni_append_codepoint(&pcdata, encoding, codepoint); } #line 8 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl" {{cs = stack[--top];goto _again;}} goto st57; tr86: #line 50 "markup.rl" { entity.end = p; Trenni_append_string(&pcdata, encoding, rb_funcall(entities, id_key_get, 1, Trenni_token(entity, encoding)) ); } #line 8 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl" {{cs = stack[--top];goto _again;}} goto st57; st57: if ( ++p == pe ) goto _test_eof57; case 57: #line 1660 "markup.c" goto st0; st45: if ( ++p == pe ) goto _test_eof45; case 45: if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto tr82; } else if ( (*p) > 70 ) { if ( 97 <= (*p) && (*p) <= 102 ) goto tr82; } else goto tr82; goto tr75; tr82: #line 46 "markup.rl" { entity.begin = p; } goto st46; st46: if ( ++p == pe ) goto _test_eof46; case 46: #line 1685 "markup.c" if ( (*p) == 59 ) goto tr84; if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto st46; } else if ( (*p) > 70 ) { if ( 97 <= (*p) && (*p) <= 102 ) goto st46; } else goto st46; goto tr75; tr77: #line 46 "markup.rl" { entity.begin = p; } goto st47; st47: if ( ++p == pe ) goto _test_eof47; case 47: #line 1707 "markup.c" if ( (*p) == 59 ) goto tr86; if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto st47; } else if ( (*p) > 90 ) { if ( 97 <= (*p) && (*p) <= 122 ) goto st47; } else goto st47; goto tr75; } _test_eof48: cs = 48; goto _test_eof; _test_eof49: cs = 49; goto _test_eof; _test_eof50: cs = 50; goto _test_eof; _test_eof1: cs = 1; goto _test_eof; _test_eof2: cs = 2; goto _test_eof; _test_eof3: cs = 3; goto _test_eof; _test_eof4: cs = 4; goto _test_eof; _test_eof5: cs = 5; goto _test_eof; _test_eof51: cs = 51; goto _test_eof; _test_eof6: cs = 6; goto _test_eof; _test_eof7: cs = 7; goto _test_eof; _test_eof8: cs = 8; goto _test_eof; _test_eof9: cs = 9; goto _test_eof; _test_eof10: cs = 10; goto _test_eof; _test_eof11: cs = 11; goto _test_eof; _test_eof12: cs = 12; goto _test_eof; _test_eof13: cs = 13; goto _test_eof; _test_eof14: cs = 14; goto _test_eof; _test_eof15: cs = 15; goto _test_eof; _test_eof16: cs = 16; goto _test_eof; _test_eof17: cs = 17; goto _test_eof; _test_eof18: cs = 18; goto _test_eof; _test_eof19: cs = 19; goto _test_eof; _test_eof52: cs = 52; goto _test_eof; _test_eof20: cs = 20; goto _test_eof; _test_eof21: cs = 21; goto _test_eof; _test_eof22: cs = 22; goto _test_eof; _test_eof23: cs = 23; goto _test_eof; _test_eof24: cs = 24; goto _test_eof; _test_eof25: cs = 25; goto _test_eof; _test_eof26: cs = 26; goto _test_eof; _test_eof53: cs = 53; goto _test_eof; _test_eof27: cs = 27; goto _test_eof; _test_eof28: cs = 28; goto _test_eof; _test_eof29: cs = 29; goto _test_eof; _test_eof30: cs = 30; goto _test_eof; _test_eof31: cs = 31; goto _test_eof; _test_eof32: cs = 32; goto _test_eof; _test_eof33: cs = 33; goto _test_eof; _test_eof34: cs = 34; goto _test_eof; _test_eof35: cs = 35; goto _test_eof; _test_eof54: cs = 54; goto _test_eof; _test_eof36: cs = 36; goto _test_eof; _test_eof37: cs = 37; goto _test_eof; _test_eof55: cs = 55; goto _test_eof; _test_eof38: cs = 38; goto _test_eof; _test_eof39: cs = 39; goto _test_eof; _test_eof40: cs = 40; goto _test_eof; _test_eof41: cs = 41; goto _test_eof; _test_eof56: cs = 56; goto _test_eof; _test_eof42: cs = 42; goto _test_eof; _test_eof43: cs = 43; goto _test_eof; _test_eof44: cs = 44; goto _test_eof; _test_eof57: cs = 57; goto _test_eof; _test_eof45: cs = 45; goto _test_eof; _test_eof46: cs = 46; goto _test_eof; _test_eof47: cs = 47; goto _test_eof; _test_eof: {} if ( p == eof ) { switch ( cs ) { case 42: case 43: case 44: case 45: case 46: case 47: #line 42 "markup.rl" { Trenni_raise_error("could not parse entity", buffer, p-s); } break; case 53: #line 78 "markup.rl" { doctype.end = p; rb_funcall(delegate, id_doctype, 1, Trenni_token(doctype, encoding)); } break; case 26: #line 84 "markup.rl" { Trenni_raise_error("could not parse doctype", buffer, p-s); } break; case 52: #line 92 "markup.rl" { comment.end = p; rb_funcall(delegate, id_comment, 1, Trenni_token(comment, encoding)); } break; case 17: case 18: case 19: #line 98 "markup.rl" { Trenni_raise_error("could not parse comment", buffer, p-s); } break; case 56: #line 112 "markup.rl" { instruction.end = p; rb_funcall(delegate, id_instruction, 1, Trenni_token(instruction, encoding)); } break; case 38: case 39: case 40: case 41: #line 118 "markup.rl" { Trenni_raise_error("could not parse instruction", buffer, p-s); } break; case 51: #line 158 "markup.rl" { rb_funcall(delegate, id_open_tag_end, 1, self_closing == 1 ? Qtrue : Qfalse); } break; case 55: #line 165 "markup.rl" { rb_funcall(delegate, id_close_tag, 2, Trenni_token(identifier, encoding), ULONG2NUM(identifier.begin-s)); } break; case 1: case 2: case 3: case 4: case 5: case 6: case 7: case 8: case 9: case 10: case 11: case 12: case 13: case 14: case 36: case 37: #line 169 "markup.rl" { Trenni_raise_error("could not parse tag", buffer, p-s); } break; case 54: #line 177 "markup.rl" { cdata.end = p; rb_funcall(delegate, id_cdata, 1, Trenni_token(cdata, encoding)); } break; case 33: case 34: case 35: #line 183 "markup.rl" { Trenni_raise_error("could not parse cdata", buffer, p-s); } break; case 50: #line 22 "markup.rl" { } #line 28 "markup.rl" { rb_funcall(delegate, id_text, 1, pcdata); } break; case 49: #line 36 "markup.rl" { characters.end = p; Trenni_append_token(&pcdata, encoding, characters); } #line 22 "markup.rl" { } #line 28 "markup.rl" { rb_funcall(delegate, id_text, 1, pcdata); } break; #line 1913 "markup.c" } } _out: {} } #line 214 "markup.rl" if (p != eof) { Trenni_raise_error("could not parse all input", buffer, p-s); } return Qnil; }
int rb_enc_get_index(VALUE obj) { return index_of_encoding(rb_enc_get(obj)); }
static VALUE encoding_spec_rb_enc_nth(VALUE self, VALUE str, VALUE index) { char* start = RSTRING_PTR(str); char* end = start + RSTRING_LEN(str); char* ptr = rb_enc_nth(start, end, FIX2LONG(index), rb_enc_get(str)); return LONG2NUM(ptr - start); }