Example #1
0
static VALUE
bug_str_cstr_term_char(VALUE str)
{
    long len;
    char *s;
    int c;
    rb_encoding *enc = rb_enc_get(str);

    RSTRING_GETMEM(str, s, len);
    s += len;
    len = rb_enc_mbminlen(enc);
    c = rb_enc_precise_mbclen(s, s + len, enc);
    if (!MBCLEN_CHARFOUND_P(c)) {
	c = (unsigned char)*s;
    }
    else {
	c = rb_enc_mbc_to_codepoint(s, s + len, enc);
	if (!c) return Qnil;
    }
    return rb_enc_uint_chr((unsigned int)c, enc);
}
Example #2
0
VALUE
rb_path_to_class(VALUE pathname)
{
    rb_encoding *enc = rb_enc_get(pathname);
    const char *pbeg, *p, *path = RSTRING_PTR(pathname);
    ID id;
    VALUE c = rb_cObject;

    if (!rb_enc_asciicompat(enc)) {
	rb_raise(rb_eArgError, "invalid class path encoding (non ASCII)");
    }
    pbeg = p = path;
    if (path[0] == '#') {
	rb_raise(rb_eArgError, "can't retrieve anonymous class %s", path);
    }
    while (*p) {
	while (*p && *p != ':') p++;
	id = rb_intern3(pbeg, p-pbeg, enc);
	if (p[0] == ':') {
	    if (p[1] != ':') goto undefined_class;
	    p += 2;
	    pbeg = p;
	}
	if (!rb_const_defined(c, id)) {
	  undefined_class:
	    rb_raise(rb_eArgError, "undefined class/module %.*s", (int)(p-path), path);
	}
	c = rb_const_get_at(c, id);
	switch (TYPE(c)) {
	  case T_MODULE:
	  case T_CLASS:
	    break;
	  default:
	    rb_raise(rb_eTypeError, "%s does not refer to class/module", path);
	}
    }

    return c;
}
Example #3
0
/*
 * Return a pathname with +repl+ added as a suffix to the basename.
 *
 * If self has no extension part, +repl+ is appended.
 *
 *	Pathname.new('/usr/bin/shutdown').sub_ext('.rb')
 *	    #=> #<Pathname:/usr/bin/shutdown.rb>
 */
static VALUE
path_sub_ext(VALUE self, VALUE repl)
{
    VALUE str = get_strpath(self);
    VALUE str2;
    long extlen;
    const char *ext;
    const char *p;

    StringValue(repl);
    p = RSTRING_PTR(str);
    extlen = RSTRING_LEN(str);
    ext = ruby_enc_find_extname(p, &extlen, rb_enc_get(str));
    if (ext == NULL) {
        ext = p + RSTRING_LEN(str);
    }
    else if (extlen <= 1) {
        ext += extlen;
    }
    str2 = rb_str_subseq(str, 0, ext-p);
    rb_str_append(str2, repl);
    OBJ_INFECT(str2, str);
    return rb_class_new_instance(1, &str2, rb_obj_class(self));
}
Example #4
0
VALUE
rb_str_format(int argc, const VALUE *argv, VALUE fmt)
{
    enum {default_float_precision = 6};
    rb_encoding *enc;
    const char *p, *end;
    char *buf;
    long blen, bsiz;
    VALUE result;

    long scanned = 0;
    int coderange = ENC_CODERANGE_7BIT;
    int width, prec, flags = FNONE;
    int nextarg = 1;
    int posarg = 0;
    int tainted = 0;
    VALUE nextvalue;
    VALUE tmp;
    VALUE str;
    volatile VALUE hash = Qundef;

#define CHECK_FOR_WIDTH(f)				 \
    if ((f) & FWIDTH) {					 \
	rb_raise(rb_eArgError, "width given twice");	 \
    }							 \
    if ((f) & FPREC0) {					 \
	rb_raise(rb_eArgError, "width after precision"); \
    }
#define CHECK_FOR_FLAGS(f)				 \
    if ((f) & FWIDTH) {					 \
	rb_raise(rb_eArgError, "flag after width");	 \
    }							 \
    if ((f) & FPREC0) {					 \
	rb_raise(rb_eArgError, "flag after precision"); \
    }

    ++argc;
    --argv;
    if (OBJ_TAINTED(fmt)) tainted = 1;
    StringValue(fmt);
    enc = rb_enc_get(fmt);
    fmt = rb_str_new4(fmt);
    p = RSTRING_PTR(fmt);
    end = p + RSTRING_LEN(fmt);
    blen = 0;
    bsiz = 120;
    result = rb_str_buf_new(bsiz);
    rb_enc_copy(result, fmt);
    buf = RSTRING_PTR(result);
    memset(buf, 0, bsiz);
    ENC_CODERANGE_SET(result, coderange);

    for (; p < end; p++) {
	const char *t;
	int n;
	VALUE sym = Qnil;

	for (t = p; t < end && *t != '%'; t++) ;
	PUSH(p, t - p);
	if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) {
	    scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &coderange);
	    ENC_CODERANGE_SET(result, coderange);
	}
	if (t >= end) {
	    /* end of fmt string */
	    goto sprint_exit;
	}
	p = t + 1;		/* skip `%' */

	width = prec = -1;
	nextvalue = Qundef;
      retry:
	switch (*p) {
	  default:
	    if (rb_enc_isprint(*p, enc))
		rb_raise(rb_eArgError, "malformed format string - %%%c", *p);
	    else
		rb_raise(rb_eArgError, "malformed format string");
	    break;

	  case ' ':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FSPACE;
	    p++;
	    goto retry;

	  case '#':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FSHARP;
	    p++;
	    goto retry;

	  case '+':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FPLUS;
	    p++;
	    goto retry;

	  case '-':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FMINUS;
	    p++;
	    goto retry;

	  case '0':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FZERO;
	    p++;
	    goto retry;

	  case '1': case '2': case '3': case '4':
	  case '5': case '6': case '7': case '8': case '9':
	    n = 0;
	    GETNUM(n, width);
	    if (*p == '$') {
		if (nextvalue != Qundef) {
		    rb_raise(rb_eArgError, "value given twice - %d$", n);
		}
		nextvalue = GETPOSARG(n);
		p++;
		goto retry;
	    }
	    CHECK_FOR_WIDTH(flags);
	    width = n;
	    flags |= FWIDTH;
	    goto retry;

	  case '<':
	  case '{':
	    {
		const char *start = p;
		char term = (*p == '<') ? '>' : '}';
		int len;

		for (; p < end && *p != term; ) {
		    p += rb_enc_mbclen(p, end, enc);
		}
		if (p >= end) {
		    rb_raise(rb_eArgError, "malformed name - unmatched parenthesis");
		}
#if SIZEOF_INT < SIZEOF_SIZE_T
		if ((size_t)(p - start) >= INT_MAX) {
		    const int message_limit = 20;
		    len = (int)(rb_enc_right_char_head(start, start + message_limit, p, enc) - start);
		    rb_enc_raise(enc, rb_eArgError,
				 "too long name (%"PRIdSIZE" bytes) - %.*s...%c",
				 (size_t)(p - start - 2), len, start, term);
		}
#endif
		len = (int)(p - start + 1); /* including parenthesis */
		if (sym != Qnil) {
		    rb_enc_raise(enc, rb_eArgError, "named%.*s after <%"PRIsVALUE">",
				 len, start, rb_sym2str(sym));
		}
		CHECKNAMEARG(start, len, enc);
		get_hash(&hash, argc, argv);
		sym = rb_check_symbol_cstr(start + 1,
					   len - 2 /* without parenthesis */,
					   enc);
		if (sym != Qnil) nextvalue = rb_hash_lookup2(hash, sym, Qundef);
		if (nextvalue == Qundef) {
		    rb_enc_raise(enc, rb_eKeyError, "key%.*s not found", len, start);
		}
		if (term == '}') goto format_s;
		p++;
		goto retry;
	    }

	  case '*':
	    CHECK_FOR_WIDTH(flags);
	    flags |= FWIDTH;
	    GETASTER(width);
	    if (width < 0) {
		flags |= FMINUS;
		width = -width;
	    }
	    p++;
	    goto retry;

	  case '.':
	    if (flags & FPREC0) {
		rb_raise(rb_eArgError, "precision given twice");
	    }
	    flags |= FPREC|FPREC0;

	    prec = 0;
	    p++;
	    if (*p == '*') {
		GETASTER(prec);
		if (prec < 0) {	/* ignore negative precision */
		    flags &= ~FPREC;
		}
		p++;
		goto retry;
	    }

	    GETNUM(prec, precision);
	    goto retry;

	  case '\n':
	  case '\0':
	    p--;
	  case '%':
	    if (flags != FNONE) {
		rb_raise(rb_eArgError, "invalid format character - %%");
	    }
	    PUSH("%", 1);
	    break;

	  case 'c':
	    {
		VALUE val = GETARG();
		VALUE tmp;
		unsigned int c;
		int n;

		tmp = rb_check_string_type(val);
		if (!NIL_P(tmp)) {
		    if (rb_enc_strlen(RSTRING_PTR(tmp),RSTRING_END(tmp),enc) != 1) {
			rb_raise(rb_eArgError, "%%c requires a character");
		    }
		    c = rb_enc_codepoint_len(RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc);
		    RB_GC_GUARD(tmp);
		}
		else {
		    c = NUM2INT(val);
		    n = rb_enc_codelen(c, enc);
		}
		if (n <= 0) {
		    rb_raise(rb_eArgError, "invalid character");
		}
		if (!(flags & FWIDTH)) {
		    CHECK(n);
		    rb_enc_mbcput(c, &buf[blen], enc);
		    blen += n;
		}
		else if ((flags & FMINUS)) {
		    CHECK(n);
		    rb_enc_mbcput(c, &buf[blen], enc);
		    blen += n;
		    FILL(' ', width-1);
		}
		else {
		    FILL(' ', width-1);
		    CHECK(n);
		    rb_enc_mbcput(c, &buf[blen], enc);
		    blen += n;
		}
	    }
	    break;

	  case 's':
	  case 'p':
	  format_s:
	    {
		VALUE arg = GETARG();
		long len, slen;

		if (*p == 'p') arg = rb_inspect(arg);
		str = rb_obj_as_string(arg);
		if (OBJ_TAINTED(str)) tainted = 1;
		len = RSTRING_LEN(str);
		rb_str_set_len(result, blen);
		if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) {
		    int cr = coderange;
		    scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &cr);
		    ENC_CODERANGE_SET(result,
				      (cr == ENC_CODERANGE_UNKNOWN ?
				       ENC_CODERANGE_BROKEN : (coderange = cr)));
		}
		enc = rb_enc_check(result, str);
		if (flags&(FPREC|FWIDTH)) {
		    slen = rb_enc_strlen(RSTRING_PTR(str),RSTRING_END(str),enc);
		    if (slen < 0) {
			rb_raise(rb_eArgError, "invalid mbstring sequence");
		    }
		    if ((flags&FPREC) && (prec < slen)) {
			char *p = rb_enc_nth(RSTRING_PTR(str), RSTRING_END(str),
					     prec, enc);
			slen = prec;
			len = p - RSTRING_PTR(str);
		    }
		    /* need to adjust multi-byte string pos */
		    if ((flags&FWIDTH) && (width > slen)) {
			width -= (int)slen;
			if (!(flags&FMINUS)) {
			    CHECK(width);
			    while (width--) {
				buf[blen++] = ' ';
			    }
			}
			CHECK(len);
			memcpy(&buf[blen], RSTRING_PTR(str), len);
			RB_GC_GUARD(str);
			blen += len;
			if (flags&FMINUS) {
			    CHECK(width);
			    while (width--) {
				buf[blen++] = ' ';
			    }
			}
			rb_enc_associate(result, enc);
			break;
		    }
		}
		PUSH(RSTRING_PTR(str), len);
		RB_GC_GUARD(str);
		rb_enc_associate(result, enc);
	    }
	    break;

	  case 'd':
	  case 'i':
	  case 'o':
	  case 'x':
	  case 'X':
	  case 'b':
	  case 'B':
	  case 'u':
	    {
		volatile VALUE val = GETARG();
                int valsign;
		char nbuf[64], *s;
		const char *prefix = 0;
		int sign = 0, dots = 0;
		char sc = 0;
		long v = 0;
		int base, bignum = 0;
		int len;

		switch (*p) {
		  case 'd':
		  case 'i':
		  case 'u':
		    sign = 1; break;
		  case 'o':
		  case 'x':
		  case 'X':
		  case 'b':
		  case 'B':
		    if (flags&(FPLUS|FSPACE)) sign = 1;
		    break;
		}
		if (flags & FSHARP) {
		    switch (*p) {
		      case 'o':
			prefix = "0"; break;
		      case 'x':
			prefix = "0x"; break;
		      case 'X':
			prefix = "0X"; break;
		      case 'b':
			prefix = "0b"; break;
		      case 'B':
			prefix = "0B"; break;
		    }
		}

	      bin_retry:
		switch (TYPE(val)) {
		  case T_FLOAT:
		    if (FIXABLE(RFLOAT_VALUE(val))) {
			val = LONG2FIX((long)RFLOAT_VALUE(val));
			goto bin_retry;
		    }
		    val = rb_dbl2big(RFLOAT_VALUE(val));
		    if (FIXNUM_P(val)) goto bin_retry;
		    bignum = 1;
		    break;
		  case T_STRING:
		    val = rb_str_to_inum(val, 0, TRUE);
		    goto bin_retry;
		  case T_BIGNUM:
		    bignum = 1;
		    break;
		  case T_FIXNUM:
		    v = FIX2LONG(val);
		    break;
		  default:
		    val = rb_Integer(val);
		    goto bin_retry;
		}

		switch (*p) {
		  case 'o':
		    base = 8; break;
		  case 'x':
		  case 'X':
		    base = 16; break;
		  case 'b':
		  case 'B':
		    base = 2; break;
		  case 'u':
		  case 'd':
		  case 'i':
		  default:
		    base = 10; break;
		}

                if (base != 10) {
                    int numbits = ffs(base)-1;
                    size_t abs_nlz_bits;
                    size_t numdigits = rb_absint_numwords(val, numbits, &abs_nlz_bits);
                    long i;
                    if (INT_MAX-1 < numdigits) /* INT_MAX is used because rb_long2int is used later. */
                        rb_raise(rb_eArgError, "size too big");
                    if (sign) {
                        if (numdigits == 0)
                            numdigits = 1;
                        tmp = rb_str_new(NULL, numdigits);
                        valsign = rb_integer_pack(val, RSTRING_PTR(tmp), RSTRING_LEN(tmp),
                                1, CHAR_BIT-numbits, INTEGER_PACK_BIG_ENDIAN);
                        for (i = 0; i < RSTRING_LEN(tmp); i++)
                            RSTRING_PTR(tmp)[i] = ruby_digitmap[((unsigned char *)RSTRING_PTR(tmp))[i]];
                        s = RSTRING_PTR(tmp);
                        if (valsign < 0) {
                            sc = '-';
                            width--;
                        }
                        else if (flags & FPLUS) {
                            sc = '+';
                            width--;
                        }
                        else if (flags & FSPACE) {
                            sc = ' ';
                            width--;
                        }
                    }
                    else {
                        /* Following conditional "numdigits++" guarantees the
                         * most significant digit as
                         * - '1'(bin), '7'(oct) or 'f'(hex) for negative numbers
                         * - '0' for zero
                         * - not '0' for positive numbers.
                         *
                         * It also guarantees the most significant two
                         * digits will not be '11'(bin), '77'(oct), 'ff'(hex)
                         * or '00'.  */
                        if (numdigits == 0 ||
                                ((abs_nlz_bits != (size_t)(numbits-1) ||
                                  !rb_absint_singlebit_p(val)) &&
                                 (!bignum ? v < 0 : BIGNUM_NEGATIVE_P(val))))
                            numdigits++;
                        tmp = rb_str_new(NULL, numdigits);
                        valsign = rb_integer_pack(val, RSTRING_PTR(tmp), RSTRING_LEN(tmp),
                                1, CHAR_BIT-numbits, INTEGER_PACK_2COMP | INTEGER_PACK_BIG_ENDIAN);
                        for (i = 0; i < RSTRING_LEN(tmp); i++)
                            RSTRING_PTR(tmp)[i] = ruby_digitmap[((unsigned char *)RSTRING_PTR(tmp))[i]];
                        s = RSTRING_PTR(tmp);
                        dots = valsign < 0;
                    }
                    len = rb_long2int(RSTRING_END(tmp) - s);
                }
                else if (!bignum) {
                    valsign = 1;
                    if (v < 0) {
                        v = -v;
                        sc = '-';
                        width--;
                        valsign = -1;
                    }
                    else if (flags & FPLUS) {
                        sc = '+';
                        width--;
                    }
                    else if (flags & FSPACE) {
                        sc = ' ';
                        width--;
                    }
                    snprintf(nbuf, sizeof(nbuf), "%ld", v);
                    s = nbuf;
		    len = (int)strlen(s);
		}
		else {
                    tmp = rb_big2str(val, 10);
                    s = RSTRING_PTR(tmp);
                    valsign = 1;
                    if (s[0] == '-') {
                        s++;
                        sc = '-';
                        width--;
                        valsign = -1;
                    }
                    else if (flags & FPLUS) {
                        sc = '+';
                        width--;
                    }
                    else if (flags & FSPACE) {
                        sc = ' ';
                        width--;
                    }
		    len = rb_long2int(RSTRING_END(tmp) - s);
		}

		if (dots) {
		    prec -= 2;
		    width -= 2;
		}

		if (*p == 'X') {
		    char *pp = s;
		    int c;
		    while ((c = (int)(unsigned char)*pp) != 0) {
			*pp = rb_enc_toupper(c, enc);
			pp++;
		    }
		}
		if (prefix && !prefix[1]) { /* octal */
		    if (dots) {
			prefix = 0;
		    }
		    else if (len == 1 && *s == '0') {
			len = 0;
			if (flags & FPREC) prec--;
		    }
		    else if ((flags & FPREC) && (prec > len)) {
			prefix = 0;
		    }
		}
		else if (len == 1 && *s == '0') {
		    prefix = 0;
		}
		if (prefix) {
		    width -= (int)strlen(prefix);
		}
		if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) {
		    prec = width;
		    width = 0;
		}
		else {
		    if (prec < len) {
			if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0;
			prec = len;
		    }
		    width -= prec;
		}
		if (!(flags&FMINUS)) {
		    CHECK(width);
		    while (width-- > 0) {
			buf[blen++] = ' ';
		    }
		}
		if (sc) PUSH(&sc, 1);
		if (prefix) {
		    int plen = (int)strlen(prefix);
		    PUSH(prefix, plen);
		}
		CHECK(prec - len);
		if (dots) PUSH("..", 2);
		if (!sign && valsign < 0) {
		    char c = sign_bits(base, p);
		    while (len < prec--) {
			buf[blen++] = c;
		    }
		}
		else if ((flags & (FMINUS|FPREC)) != FMINUS) {
		    while (len < prec--) {
			buf[blen++] = '0';
		    }
		}
		PUSH(s, len);
		RB_GC_GUARD(tmp);
		CHECK(width);
		while (width-- > 0) {
		    buf[blen++] = ' ';
		}
	    }
	    break;

	  case 'f':
	    {
		VALUE val = GETARG(), num, den;
		int sign = (flags&FPLUS) ? 1 : 0, zero = 0;
		long len, done = 0;
		int prefix = 0;
		if (!RB_TYPE_P(val, T_RATIONAL)) {
		    nextvalue = val;
		    goto float_value;
		}
		if (!(flags&FPREC)) prec = default_float_precision;
		den = rb_rational_den(val);
		num = rb_rational_num(val);
		if (FIXNUM_P(num)) {
		    if ((SIGNED_VALUE)num < 0) {
			long n = -FIX2LONG(num);
			num = LONG2FIX(n);
			sign = -1;
		    }
		}
		else if (rb_num_negative_p(num)) {
		    sign = -1;
		    num = rb_funcallv(num, idUMinus, 0, 0);
		}
		if (den != INT2FIX(1) || prec > 1) {
		    const ID idDiv = rb_intern("div");
		    VALUE p10 = rb_int_positive_pow(10, prec);
		    VALUE den_2 = rb_funcall(den, idDiv, 1, INT2FIX(2));
		    num = rb_funcallv(num, '*', 1, &p10);
		    num = rb_funcallv(num, '+', 1, &den_2);
		    num = rb_funcallv(num, idDiv, 1, &den);
		}
		else if (prec >= 0) {
		    zero = prec;
		}
		val = rb_obj_as_string(num);
		len = RSTRING_LEN(val) + zero;
		if (prec >= len) ++len; /* integer part 0 */
		if (sign || (flags&FSPACE)) ++len;
		if (prec > 0) ++len; /* period */
		CHECK(len > width ? len : width);
		if (sign || (flags&FSPACE)) {
		    buf[blen++] = sign > 0 ? '+' : sign < 0 ? '-' : ' ';
		    prefix++;
		    done++;
		}
		len = RSTRING_LEN(val) + zero;
		t = RSTRING_PTR(val);
		if (len > prec) {
		    memcpy(&buf[blen], t, len - prec);
		    blen += len - prec;
		    done += len - prec;
		}
		else {
		    buf[blen++] = '0';
		    done++;
		}
		if (prec > 0) {
		    buf[blen++] = '.';
		    done++;
		}
		if (zero) {
		    FILL('0', zero);
		    done += zero;
		}
		else if (prec > len) {
		    FILL('0', prec - len);
		    memcpy(&buf[blen], t, len);
		    blen += len;
		    done += prec;
		}
		else if (prec > 0) {
		    memcpy(&buf[blen], t + len - prec, prec);
		    blen += prec;
		    done += prec;
		}
		if ((flags & FWIDTH) && width > done) {
		    int fill = ' ';
		    long shifting = 0;
		    if (!(flags&FMINUS)) {
			shifting = done;
			if (flags&FZERO) {
			    shifting -= prefix;
			    fill = '0';
			}
			blen -= shifting;
			memmove(&buf[blen + width - done], &buf[blen], shifting);
		    }
		    FILL(fill, width - done);
		    blen += shifting;
		}
		RB_GC_GUARD(val);
		break;
	    }
	  case 'g':
	  case 'G':
	  case 'e':
	  case 'E':
	    /* TODO: rational support */
	  case 'a':
	  case 'A':
	  float_value:
	    {
		VALUE val = GETARG();
		double fval;
		int i, need;
		char fbuf[32];

		fval = RFLOAT_VALUE(rb_Float(val));
		if (isnan(fval) || isinf(fval)) {
		    const char *expr;

		    if (isnan(fval)) {
			expr = "NaN";
		    }
		    else {
			expr = "Inf";
		    }
		    need = (int)strlen(expr);
		    if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS))
			need++;
		    if ((flags & FWIDTH) && need < width)
			need = width;

		    CHECK(need + 1);
		    snprintf(&buf[blen], need + 1, "%*s", need, "");
		    if (flags & FMINUS) {
			if (!isnan(fval) && fval < 0.0)
			    buf[blen++] = '-';
			else if (flags & FPLUS)
			    buf[blen++] = '+';
			else if (flags & FSPACE)
			    blen++;
			memcpy(&buf[blen], expr, strlen(expr));
		    }
		    else {
			if (!isnan(fval) && fval < 0.0)
			    buf[blen + need - strlen(expr) - 1] = '-';
			else if (flags & FPLUS)
			    buf[blen + need - strlen(expr) - 1] = '+';
			else if ((flags & FSPACE) && need > width)
			    blen++;
			memcpy(&buf[blen + need - strlen(expr)], expr,
			       strlen(expr));
		    }
		    blen += strlen(&buf[blen]);
		    break;
		}

		fmt_setup(fbuf, sizeof(fbuf), *p, flags, width, prec);
		need = 0;
		if (*p != 'e' && *p != 'E') {
		    i = INT_MIN;
		    frexp(fval, &i);
		    if (i > 0)
			need = BIT_DIGITS(i);
		}
		need += (flags&FPREC) ? prec : default_float_precision;
		if ((flags&FWIDTH) && need < width)
		    need = width;
		need += 20;

		CHECK(need);
		snprintf(&buf[blen], need, fbuf, fval);
		blen += strlen(&buf[blen]);
	    }
	    break;
	}
	flags = FNONE;
    }

  sprint_exit:
    RB_GC_GUARD(fmt);
    /* XXX - We cannot validate the number of arguments if (digit)$ style used.
     */
    if (posarg >= 0 && nextarg < argc) {
	const char *mesg = "too many arguments for format string";
	if (RTEST(ruby_debug)) rb_raise(rb_eArgError, "%s", mesg);
	if (RTEST(ruby_verbose)) rb_warn("%s", mesg);
    }
    rb_str_resize(result, blen);

    if (tainted) OBJ_TAINT(result);
    return result;
}
Example #5
0
static VALUE encoding_spec_rb_enc_get(VALUE self, VALUE obj) {
  return rb_str_new2(rb_enc_get(obj)->name);
}
Example #6
0
VALUE ruv_buffer_write(int argc, VALUE* argv, VALUE rb_buffer) {
    VALUE           rb_str, rb_offset, rb_length, rb_extern_enc, rb_cBuffer;
    size_t          offset, length, max_length, char_count;
    ruv_buffer_t    *buffer;
    rb_encoding     *rb_extern_encoding;

    Data_Get_Struct(rb_buffer, ruv_buffer_t, buffer);

    rb_scan_args(argc, argv, "13", &rb_str, &rb_offset, &rb_length, &rb_extern_enc);
    StringValue(rb_str);

    // encoding: use specified external encoding if provided
    // otherwise use Encoding.default_external as default
    if(!NIL_P(rb_extern_enc)) {
        rb_extern_encoding = rb_enc_get(rb_extern_enc);
    } else {
        rb_extern_encoding = rb_default_external_encoding();
    }

    // convert to external encoding
    rb_str = rb_str_export_to_enc(rb_str, rb_extern_encoding);

    // offset: either specified in params or 0
    if(!NIL_P(rb_offset)) {
        Check_Type(rb_offset, T_FIXNUM);
        offset = NUM2SIZET(rb_offset);
        if(offset >= buffer->length) {
            rb_raise(rb_eArgError, "Overflow! offset is larger than buffer size.");
        }
    } else {
        offset = 0;
    }

    // max length: the smaller of the max available space or the whole ruby string
    max_length = MIN(buffer->length - offset, (size_t)RSTRING_LEN(rb_str));

    // length: number of bytes to write. (include half chars)
    if(!NIL_P(rb_length)) {
        Check_Type(rb_length, T_FIXNUM);
        length = NUM2SIZET(rb_length);
    } else {
        length = max_length;
    }

    // If we are not writing the whole string into the buffer,
    // re-adjust length so we don't write half a character (uft8, etc)
    // 1). get char count from calculated byte length
    // 2). get byte length back from char count
    // This way only whole char is written to buffer
    if(length != (size_t)RSTRING_LEN(rb_str)) {
        char_count  = rb_str_sublen(rb_str, length);
        length      = rb_str_offset(rb_str, char_count);
    }

    memcpy(buffer->data + offset, RSTRING_PTR(rb_str), length);

    // set instance variable so we know how much characters are written
    rb_cBuffer = rb_obj_class(rb_buffer);
    rb_iv_set(rb_cBuffer, RUV_BUFFER_CHAR_WRITTEN_SYM, SIZET2NUM(char_count));

    return SIZET2NUM(length);
}
Example #7
0
// TODO: can we fail allocating memory?
static VALUE
fenix_file_expand_path(int argc, VALUE *argv)
{
	size_t size = 0, wpath_len = 0, wdir_len = 0, whome_len = 0;
	size_t buffer_len = 0;
	char *fullpath = NULL;
	wchar_t *wfullpath = NULL, *wpath = NULL, *wpath_pos = NULL, *wdir = NULL;
	wchar_t *whome = NULL, *buffer = NULL, *buffer_pos = NULL;
	UINT cp;
	VALUE result = Qnil, path = Qnil, dir = Qnil;
	wchar_t wfullpath_buffer[PATH_BUFFER_SIZE];
	wchar_t path_drive = L'\0', dir_drive = L'\0';
	int ignore_dir = 0;
	rb_encoding *path_encoding;
	int tainted = 0;
	// prepare for rb_file_absolute_path()
	int abs_mode = 0;

	// retrieve path and dir from argv
	rb_scan_args(argc, argv, "11", &path, &dir);

	/* tainted if path is tainted */
	tainted = OBJ_TAINTED(path);

	// get path encoding
	if (NIL_P(dir)) {
		path_encoding = rb_enc_get(path);
	} else {
		path_encoding = rb_enc_check(path, dir);
	}
	cp = fenix_code_page(path_encoding);
	// printf("code page: %i\n", cp);

	// coerce them to string
	path = fenix_coerce_to_path(path);

	// convert char * to wchar_t
	// path
	fenix_path_to_wchar(path, &wpath, &wpath_pos, &wpath_len, cp);
	// wprintf(L"wpath: '%s' with (%i) characters long.\n", wpath, wpath_len);

	/* determine if we need the user's home directory */
	/* expand '~' only if NOT rb_file_absolute_path() where `abs_mode` is 1 */
	if (abs_mode == 0 && ((wpath_len == 1 && wpath_pos[0] == L'~') ||
		(wpath_len >= 2 && wpath_pos[0] == L'~' && IS_DIR_SEPARATOR_P(wpath_pos[1])))) {
		/* tainted if expanding '~' */
		tainted = 1;

		// wprintf(L"wpath requires expansion.\n");
		whome = fenix_home_dir();
		if (whome == NULL) {
			free(wpath);
			rb_raise(rb_eArgError, "couldn't find HOME environment -- expanding `~'");
		}
		whome_len = wcslen(whome);

		if (PathIsRelativeW(whome) && !(whome_len >= 2 && IS_DIR_UNC_P(whome))) {
			free(wpath);
			rb_raise(rb_eArgError, "non-absolute home");
		}

		// wprintf(L"whome: '%s' with (%i) characters long.\n", whome, whome_len);

		/* ignores dir since we are expading home */
		ignore_dir = 1;

		/* exclude ~ from the result */
		wpath_pos++;
		wpath_len--;

		/* exclude separator if present */
		if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) {
			// wprintf(L"excluding expansion character and separator\n");
			wpath_pos++;
			wpath_len--;
		}
	} else if (wpath_len >= 2 && wpath_pos[1] == L':') {
		if (wpath_len >= 3 && IS_DIR_SEPARATOR_P(wpath_pos[2])) {
			/* ignore dir since path contains a drive letter and a root slash */
			// wprintf(L"Ignore dir since we have drive letter and root slash\n");
			ignore_dir = 1;
		} else {
			/* determine if we ignore dir or not later */
			path_drive = wpath_pos[0];
		}
	} else if (abs_mode == 0 && wpath_len >= 2 && wpath_pos[0] == L'~') {
		wchar_t *wuser = wpath_pos + 1;
		wchar_t *pos = wuser;
		char *user;

		/* tainted if expanding '~' */
		tainted = 1;

		while (!IS_DIR_SEPARATOR_P(*pos) && *pos != '\0')
			pos++;

		*pos = '\0';
		size = WideCharToMultiByte(cp, 0, wuser, -1, NULL, 0, NULL, NULL);
		user = (char *)malloc(size * sizeof(char));
		WideCharToMultiByte(cp, 0, wuser, -1, user, size, NULL, NULL);

		/* convert to VALUE and set the path encoding */
		result = rb_enc_str_new(user, size - 1, path_encoding);

		free(wpath);
		if (user)
			free(user);

		rb_raise(rb_eArgError, "can't find user %s", StringValuePtr(result));
	}

	/* convert dir */
	if (!ignore_dir && !NIL_P(dir)) {
		// coerce them to string
		dir = fenix_coerce_to_path(dir);

		// convert char * to wchar_t
		// dir
		fenix_path_to_wchar(dir, &wdir, NULL, &wdir_len, cp);
		// wprintf(L"wdir: '%s' with (%i) characters long.\n", wdir, wdir_len);

		if (wdir_len >= 2 && wdir[1] == L':') {
			dir_drive = wdir[0];
			if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) {
				wdir_len = 2;
			}
		} else if (wdir_len >= 2 && IS_DIR_UNC_P(wdir)) {
			/* UNC path */
			if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) {
				/* cut the UNC path tail to '//host/share' */
				size_t separators = 0;
				size_t pos = 2;
				while (pos < wdir_len && separators < 2) {
					if (IS_DIR_SEPARATOR_P(wdir[pos])) {
						separators++;
					}
					pos++;
				}
				if (separators == 2)
					wdir_len = pos - 1;
				// wprintf(L"UNC wdir: '%s' with (%i) characters.\n", wdir, wdir_len);
			}
		}
	}

	/* determine if we ignore dir or not */
	if (!ignore_dir && path_drive && dir_drive) {
		if (towupper(path_drive) == towupper(dir_drive)) {
			/* exclude path drive letter to use dir */
			// wprintf(L"excluding path drive letter\n");
			wpath_pos += 2;
			wpath_len -= 2;
		} else {
			/* ignore dir since path drive is different from dir drive */
			ignore_dir = 1;
			wdir_len = 0;
		}
	}

	if (!ignore_dir && wpath_len >= 2 && IS_DIR_UNC_P(wpath)) {
		/* ignore dir since path has UNC root */
		ignore_dir = 1;
		wdir_len = 0;
	} else if (!ignore_dir && wpath_len >= 1 && IS_DIR_SEPARATOR_P(wpath[0]) &&
		!dir_drive && !(wdir_len >= 2 && IS_DIR_UNC_P(wdir))) {
		/* ignore dir since path has root slash and dir doesn't have drive or UNC root */
		ignore_dir = 1;
		wdir_len = 0;
	}

	// wprintf(L"wpath_len: %i\n", wpath_len);
	// wprintf(L"wdir_len: %i\n", wdir_len);
	// wprintf(L"whome_len: %i\n", whome_len);

	buffer_len = wpath_len + 1 + wdir_len + 1 + whome_len + 1;
	// wprintf(L"buffer_len: %i\n", buffer_len + 1);

	buffer = buffer_pos = (wchar_t *)malloc((buffer_len + 1) * sizeof(wchar_t));

	/* add home */
	if (whome_len) {
		// wprintf(L"Copying whome...\n");
		wcsncpy(buffer_pos, whome, whome_len);
		buffer_pos += whome_len;
	}

	/* Add separator if required */
	if (whome_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) {
		// wprintf(L"Adding separator after whome\n");
		buffer_pos[0] = L'\\';
		buffer_pos++;
	}

	if (wdir_len) {
		/* tainted if dir is used and dir is tainted */
		if (!tainted && OBJ_TAINTED(dir))
			tainted = 1;

		// wprintf(L"Copying wdir...\n");
		wcsncpy(buffer_pos, wdir, wdir_len);
		buffer_pos += wdir_len;
	}

	/* add separator if required */
	if (wdir_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) {
		// wprintf(L"Adding separator after wdir\n");
		buffer_pos[0] = L'\\';
		buffer_pos++;
	}

	/* now deal with path */
	if (wpath_len) {
		// wprintf(L"Copying wpath...\n");
		wcsncpy(buffer_pos, wpath_pos, wpath_len);
		buffer_pos += wpath_len;
	}

	/* GetFullPathNameW requires at least "." to determine current directory */
	if (wpath_len == 0) {
		// wprintf(L"Adding '.' to buffer\n");
		buffer_pos[0] = L'.';
		buffer_pos++;
	}

	/* Ensure buffer is NULL terminated */
	buffer_pos[0] = L'\0';


	/* tainted if path is relative */
	if (!tainted && PathIsRelativeW(buffer) && !(buffer_len >= 2 && IS_DIR_UNC_P(buffer))) {
	    tainted = 1;
	}

	// wprintf(L"buffer: '%s'\n", buffer);

	// FIXME: Make this more robust
	// Determine require buffer size
	size = GetFullPathNameW(buffer, PATH_BUFFER_SIZE, wfullpath_buffer, NULL);
	if (size) {
		if (size > PATH_BUFFER_SIZE) {
			// allocate enough memory to contain the response
			wfullpath = (wchar_t *)malloc(size * sizeof(wchar_t));
			size = GetFullPathNameW(buffer, size, wfullpath, NULL);
		} else {
			wfullpath = wfullpath_buffer;
		}
		// wprintf(L"wfullpath: '%s'\n", wfullpath);


		/* Calculate the new size and leave the garbage out */
		// size = wcslen(wfullpath);

		/* Remove any trailing slashes */
		if (IS_DIR_SEPARATOR_P(wfullpath[size - 1]) &&
			wfullpath[size - 2] != L':' &&
			!(size == 2 && IS_DIR_UNC_P(wfullpath))) {
			// wprintf(L"Removing trailing slash\n");
			size -= 1;
			wfullpath[size] = L'\0';
		}
		// wprintf(L"wfullpath: '%s'\n", wfullpath);

		/* Remove any trailing dot */
		if (wfullpath[size - 1] == L'.') {
			// wprintf(L"Removing trailing dot\n");
			size -= 1;
			wfullpath[size] = L'\0';
		}

		/* removes trailing invalid ':$DATA' */
		size = fenix_remove_invalid_alternative_data(wfullpath, size);

		// sanitize backslashes with forwardslashes
		fenix_replace_wchar(wfullpath, L'\\', L'/');
		// wprintf(L"wfullpath: '%s'\n", wfullpath);

		// What CodePage should we use?
		// cp = AreFileApisANSI() ? CP_ACP : CP_OEMCP;

		// convert to char *
		size = WideCharToMultiByte(cp, 0, wfullpath, -1, NULL, 0, NULL, NULL);
		fullpath = (char *)malloc(size * sizeof(char));
		WideCharToMultiByte(cp, 0, wfullpath, -1, fullpath, size, NULL, NULL);

		/* convert to VALUE and set the path encoding */
		result = rb_enc_str_new(fullpath, size - 1, path_encoding);

		/* makes the result object tainted if expanding tainted strings or returning modified path */
		if (tainted)
			OBJ_TAINT(result);
	}

	// TODO: better cleanup
	if (buffer)
		free(buffer);

	if (wpath)
		free(wpath);

	if (wdir)
		free(wdir);

	if (whome)
		free(whome);

	if (wfullpath && wfullpath != wfullpath_buffer)
		free(wfullpath);

	if (fullpath)
		free(fullpath);

	return result;
}
Example #8
0
File: file.c Project: 0x00evil/ruby
VALUE
rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_name, VALUE result)
{
    size_t size = 0, whome_len = 0;
    size_t buffer_len = 0;
    long wpath_len = 0, wdir_len = 0;
    char *fullpath = NULL;
    wchar_t *wfullpath = NULL, *wpath = NULL, *wpath_pos = NULL;
    wchar_t *wdir = NULL, *wdir_pos = NULL;
    wchar_t *whome = NULL, *buffer = NULL, *buffer_pos = NULL;
    UINT path_cp, cp;
    VALUE path = fname, dir = dname;
    wchar_t wfullpath_buffer[PATH_BUFFER_SIZE];
    wchar_t path_drive = L'\0', dir_drive = L'\0';
    int ignore_dir = 0;
    rb_encoding *path_encoding;
    int tainted = 0;

    /* tainted if path is tainted */
    tainted = OBJ_TAINTED(path);

    /* get path encoding */
    if (NIL_P(dir)) {
	path_encoding = rb_enc_get(path);
    }
    else {
	path_encoding = rb_enc_check(path, dir);
    }

    cp = path_cp = code_page(path_encoding);

    /* workaround invalid codepage */
    if (path_cp == INVALID_CODE_PAGE) {
	cp = CP_UTF8;
	if (!NIL_P(path)) {
	    path = fix_string_encoding(path, path_encoding);
	}
    }

    /* convert char * to wchar_t */
    if (!NIL_P(path)) {
	wpath = mbstr_to_wstr(cp, RSTRING_PTR(path), (int)RSTRING_LEN(path), &wpath_len);
	wpath_pos = wpath;
    }

    /* determine if we need the user's home directory */
    /* expand '~' only if NOT rb_file_absolute_path() where `abs_mode` is 1 */
    if (abs_mode == 0 && wpath_len > 0 && wpath_pos[0] == L'~' &&
	(wpath_len == 1 || IS_DIR_SEPARATOR_P(wpath_pos[1]))) {
	/* tainted if expanding '~' */
	tainted = 1;

	whome = home_dir();
	if (whome == NULL) {
	    xfree(wpath);
	    rb_raise(rb_eArgError, "couldn't find HOME environment -- expanding `~'");
	}
	whome_len = wcslen(whome);

	if (PathIsRelativeW(whome) && !(whome_len >= 2 && IS_DIR_UNC_P(whome))) {
	    xfree(wpath);
	    xfree(whome);
	    rb_raise(rb_eArgError, "non-absolute home");
	}

	if (path_cp == INVALID_CODE_PAGE || rb_enc_str_asciionly_p(path)) {
	    /* use filesystem encoding if expanding home dir */
	    path_encoding = rb_filesystem_encoding();
	    cp = path_cp = system_code_page();
	}

	/* ignores dir since we are expanding home */
	ignore_dir = 1;

	/* exclude ~ from the result */
	wpath_pos++;
	wpath_len--;

	/* exclude separator if present */
	if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) {
	    wpath_pos++;
	    wpath_len--;
	}
    }
    else if (wpath_len >= 2 && wpath_pos[1] == L':') {
	if (wpath_len >= 3 && IS_DIR_SEPARATOR_P(wpath_pos[2])) {
	    /* ignore dir since path contains a drive letter and a root slash */
	    ignore_dir = 1;
	}
	else {
	    /* determine if we ignore dir or not later */
	    path_drive = wpath_pos[0];
	    wpath_pos += 2;
	    wpath_len -= 2;
	}
    }
    else if (abs_mode == 0 && wpath_len >= 2 && wpath_pos[0] == L'~') {
	result = rb_str_new_cstr("can't find user ");
	result = append_wstr(result, wpath_pos + 1, user_length_in_path(wpath_pos + 1, wpath_len - 1),
			     cp, path_cp, path_encoding);

	if (wpath)
	    xfree(wpath);

	rb_exc_raise(rb_exc_new_str(rb_eArgError, result));
    }

    /* convert dir */
    if (!ignore_dir && !NIL_P(dir)) {
	/* fix string encoding */
	if (path_cp == INVALID_CODE_PAGE) {
	    dir = fix_string_encoding(dir, path_encoding);
	}

	/* convert char * to wchar_t */
	if (!NIL_P(dir)) {
	    wdir = mbstr_to_wstr(cp, RSTRING_PTR(dir), (int)RSTRING_LEN(dir), &wdir_len);
	    wdir_pos = wdir;
	}

	if (abs_mode == 0 && wdir_len > 0 && wdir_pos[0] == L'~' &&
	    (wdir_len == 1 || IS_DIR_SEPARATOR_P(wdir_pos[1]))) {
	    /* tainted if expanding '~' */
	    tainted = 1;

	    whome = home_dir();
	    if (whome == NULL) {
		free(wpath);
		free(wdir);
		rb_raise(rb_eArgError, "couldn't find HOME environment -- expanding `~'");
	    }
	    whome_len = wcslen(whome);

	    if (PathIsRelativeW(whome) && !(whome_len >= 2 && IS_DIR_UNC_P(whome))) {
		free(wpath);
		free(wdir);
		xfree(whome);
		rb_raise(rb_eArgError, "non-absolute home");
	    }

	    /* exclude ~ from the result */
	    wdir_pos++;
	    wdir_len--;

	    /* exclude separator if present */
	    if (wdir_len && IS_DIR_SEPARATOR_P(wdir_pos[0])) {
		wdir_pos++;
		wdir_len--;
	    }
	}
	else if (wdir_len >= 2 && wdir[1] == L':') {
	    dir_drive = wdir[0];
	    if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) {
		wdir_len = 2;
	    }
	}
	else if (wdir_len >= 2 && IS_DIR_UNC_P(wdir)) {
	    /* UNC path */
	    if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) {
		/* cut the UNC path tail to '//host/share' */
		long separators = 0;
		long pos = 2;
		while (pos < wdir_len && separators < 2) {
		    if (IS_DIR_SEPARATOR_P(wdir[pos])) {
			separators++;
		    }
		    pos++;
		}
		if (separators == 2)
		    wdir_len = pos - 1;
	    }
	}
	else if (abs_mode == 0 && wdir_len >= 2 && wdir_pos[0] == L'~') {
	    result = rb_str_new_cstr("can't find user ");
	    result = append_wstr(result, wdir_pos + 1, user_length_in_path(wdir_pos + 1, wdir_len - 1),
				 cp, path_cp, path_encoding);
	    if (wpath)
		free(wpath);

	    if (wdir)
		free(wdir);

	    rb_exc_raise(rb_exc_new_str(rb_eArgError, result));
	}
    }

    /* determine if we ignore dir or not */
    if (!ignore_dir && path_drive && dir_drive) {
	if (towupper(path_drive) != towupper(dir_drive)) {
	    /* ignore dir since path drive is different from dir drive */
	    ignore_dir = 1;
	    wdir_len = 0;
	    dir_drive = 0;
	}
    }

    if (!ignore_dir && wpath_len >= 2 && IS_DIR_UNC_P(wpath)) {
	/* ignore dir since path has UNC root */
	ignore_dir = 1;
	wdir_len = 0;
    }
    else if (!ignore_dir && wpath_len >= 1 && IS_DIR_SEPARATOR_P(wpath[0]) &&
	     !dir_drive && !(wdir_len >= 2 && IS_DIR_UNC_P(wdir))) {
	/* ignore dir since path has root slash and dir doesn't have drive or UNC root */
	ignore_dir = 1;
	wdir_len = 0;
    }

    buffer_len = wpath_len + 1 + wdir_len + 1 + whome_len + 1;

    buffer = buffer_pos = (wchar_t *)xmalloc((buffer_len + 1) * sizeof(wchar_t));

    /* add home */
    if (whome_len) {
	wcsncpy(buffer_pos, whome, whome_len);
	buffer_pos += whome_len;
    }

    /* Add separator if required */
    if (whome_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) {
	buffer_pos[0] = L'\\';
	buffer_pos++;
    }
    else if (!dir_drive && path_drive) {
	*buffer_pos++ = path_drive;
	*buffer_pos++ = L':';
    }

    if (wdir_len) {
	/* tainted if dir is used and dir is tainted */
	if (!tainted && OBJ_TAINTED(dir))
	    tainted = 1;

	wcsncpy(buffer_pos, wdir_pos, wdir_len);
	buffer_pos += wdir_len;
    }

    /* add separator if required */
    if (wdir_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) {
	buffer_pos[0] = L'\\';
	buffer_pos++;
    }

    /* now deal with path */
    if (wpath_len) {
	wcsncpy(buffer_pos, wpath_pos, wpath_len);
	buffer_pos += wpath_len;
    }

    /* GetFullPathNameW requires at least "." to determine current directory */
    if (wpath_len == 0) {
	buffer_pos[0] = L'.';
	buffer_pos++;
    }

    /* Ensure buffer is NULL terminated */
    buffer_pos[0] = L'\0';

    /* tainted if path is relative */
    if (!tainted && PathIsRelativeW(buffer) && !(buffer_len >= 2 && IS_DIR_UNC_P(buffer)))
	tainted = 1;

    /* FIXME: Make this more robust */
    /* Determine require buffer size */
    size = GetFullPathNameW(buffer, PATH_BUFFER_SIZE, wfullpath_buffer, NULL);
    if (size > PATH_BUFFER_SIZE) {
	/* allocate more memory than alloted originally by PATH_BUFFER_SIZE */
	wfullpath = (wchar_t *)xmalloc(size * sizeof(wchar_t));
	size = GetFullPathNameW(buffer, size, wfullpath, NULL);
    }
    else {
	wfullpath = wfullpath_buffer;
    }

    /* Remove any trailing slashes */
    if (IS_DIR_SEPARATOR_P(wfullpath[size - 1]) &&
	wfullpath[size - 2] != L':' &&
	!(size == 2 && IS_DIR_UNC_P(wfullpath))) {
	size -= 1;
	wfullpath[size] = L'\0';
    }

    /* Remove any trailing dot */
    if (wfullpath[size - 1] == L'.') {
	size -= 1;
	wfullpath[size] = L'\0';
    }

    /* removes trailing invalid ':$DATA' */
    size = remove_invalid_alternative_data(wfullpath, size);

    /* Replace the trailing path to long name */
    if (long_name)
	size = replace_to_long_name(&wfullpath, size, (wfullpath != wfullpath_buffer));

    /* sanitize backslashes with forwardslashes */
    replace_wchar(wfullpath, L'\\', L'/');

    /* convert to VALUE and set the path encoding */
    rb_str_set_len(result, 0);
    result = append_wstr(result, wfullpath, size, cp, path_cp, path_encoding);

    /* makes the result object tainted if expanding tainted strings or returning modified path */
    if (tainted)
	OBJ_TAINT(result);

    /* TODO: better cleanup */
    if (buffer)
	xfree(buffer);

    if (wpath)
	free(wpath);

    if (wdir)
	free(wdir);

    if (whome)
	xfree(whome);

    if (wfullpath != wfullpath_buffer)
	xfree(wfullpath);

    if (fullpath)
	xfree(fullpath);

    rb_enc_associate(result, path_encoding);
    return result;
}
Example #9
0
/*
 *  call-seq:
 *     Dir.new( string ) -> aDir
 *
 *  Returns a new directory object for the named directory.
 */
static VALUE
dir_initialize(int argc, VALUE *argv, VALUE dir)
{
    struct dir_data *dp;
    static rb_encoding *fs_encoding;
    rb_encoding  *intencoding, *extencoding;
    VALUE dirname, opt;
    static VALUE sym_intenc, sym_extenc;

    if (!sym_intenc) {
	sym_intenc = ID2SYM(rb_intern("internal_encoding"));
	sym_extenc = ID2SYM(rb_intern("external_encoding"));
	fs_encoding = rb_filesystem_encoding();
    }

    intencoding = NULL;
    extencoding = fs_encoding;
    rb_scan_args(argc, argv, "11", &dirname, &opt);

    if (!NIL_P(opt)) {
        VALUE v, extenc=Qnil, intenc=Qnil;
        opt = rb_check_convert_type(opt, T_HASH, "Hash", "to_hash");

        v = rb_hash_aref(opt, sym_intenc);
        if (!NIL_P(v)) intenc = v;
        v = rb_hash_aref(opt, sym_extenc);
        if (!NIL_P(v)) extenc = v;

	if (!NIL_P(extenc)) {
	    extencoding = rb_to_encoding(extenc);
	    if (!NIL_P(intenc)) {
		intencoding = rb_to_encoding(intenc);
		if (extencoding == intencoding) {
		    rb_warn("Ignoring internal encoding '%s': it is identical to external encoding '%s'",
			    RSTRING_PTR(rb_inspect(intenc)),
			    RSTRING_PTR(rb_inspect(extenc)));
		    intencoding = NULL;
		}
	    }
	}
	else if (!NIL_P(intenc)) {
	    rb_raise(rb_eArgError, "External encoding must be specified when internal encoding is given");
	}
    }

    {
	rb_encoding  *dirname_encoding = rb_enc_get(dirname);
	if (rb_usascii_encoding() != dirname_encoding
	    && rb_ascii8bit_encoding() != dirname_encoding
#if defined __APPLE__
	    && rb_utf8_encoding() != dirname_encoding
#endif
	    && extencoding != dirname_encoding) {
	    if (!intencoding) intencoding = dirname_encoding;
	    dirname = rb_str_transcode(dirname, rb_enc_from_encoding(extencoding));
	}
    }
    FilePathValue(dirname);

    Data_Get_Struct(dir, struct dir_data, dp);
    if (dp->dir) closedir(dp->dir);
    if (dp->path) xfree(dp->path);
    dp->dir = NULL;
    dp->path = NULL;
    dp->intenc = intencoding;
    dp->extenc = extencoding;
    dp->dir = opendir(RSTRING_PTR(dirname));
    if (dp->dir == NULL) {
	if (errno == EMFILE || errno == ENFILE) {
	    rb_gc();
	    dp->dir = opendir(RSTRING_PTR(dirname));
	}
	if (dp->dir == NULL) {
	    rb_sys_fail(RSTRING_PTR(dirname));
	}
    }
    dp->path = strdup(RSTRING_PTR(dirname));

    return dir;
}
Example #10
0
VALUE
rb_str_format(int argc, const VALUE *argv, VALUE fmt)
{
    rb_encoding *enc;
    const char *p, *end;
    char *buf;
    long blen, bsiz;
    VALUE result;

    long scanned = 0;
    int coderange = ENC_CODERANGE_7BIT;
    int width, prec, flags = FNONE;
    int nextarg = 1;
    int posarg = 0;
    int tainted = 0;
    VALUE nextvalue;
    VALUE tmp;
    VALUE str;
    volatile VALUE hash = Qundef;

#define CHECK_FOR_WIDTH(f)				 \
    if ((f) & FWIDTH) {					 \
	rb_raise(rb_eArgError, "width given twice");	 \
    }							 \
    if ((f) & FPREC0) {					 \
	rb_raise(rb_eArgError, "width after precision"); \
    }
#define CHECK_FOR_FLAGS(f)				 \
    if ((f) & FWIDTH) {					 \
	rb_raise(rb_eArgError, "flag after width");	 \
    }							 \
    if ((f) & FPREC0) {					 \
	rb_raise(rb_eArgError, "flag after precision"); \
    }

    ++argc;
    --argv;
    if (OBJ_TAINTED(fmt)) tainted = 1;
    StringValue(fmt);
    enc = rb_enc_get(fmt);
    fmt = rb_str_new4(fmt);
    p = RSTRING_PTR(fmt);
    end = p + RSTRING_LEN(fmt);
    blen = 0;
    bsiz = 120;
    result = rb_str_buf_new(bsiz);
    rb_enc_copy(result, fmt);
    buf = RSTRING_PTR(result);
    memset(buf, 0, bsiz);
    ENC_CODERANGE_SET(result, coderange);

    for (; p < end; p++) {
	const char *t;
	int n;
	ID id = 0;

	for (t = p; t < end && *t != '%'; t++) ;
	PUSH(p, t - p);
	if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) {
	    scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &coderange);
	    ENC_CODERANGE_SET(result, coderange);
	}
	if (t >= end) {
	    /* end of fmt string */
	    goto sprint_exit;
	}
	p = t + 1;		/* skip `%' */

	width = prec = -1;
	nextvalue = Qundef;
      retry:
	switch (*p) {
	  default:
	    if (rb_enc_isprint(*p, enc))
		rb_raise(rb_eArgError, "malformed format string - %%%c", *p);
	    else
		rb_raise(rb_eArgError, "malformed format string");
	    break;

	  case ' ':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FSPACE;
	    p++;
	    goto retry;

	  case '#':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FSHARP;
	    p++;
	    goto retry;

	  case '+':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FPLUS;
	    p++;
	    goto retry;

	  case '-':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FMINUS;
	    p++;
	    goto retry;

	  case '0':
	    CHECK_FOR_FLAGS(flags);
	    flags |= FZERO;
	    p++;
	    goto retry;

	  case '1': case '2': case '3': case '4':
	  case '5': case '6': case '7': case '8': case '9':
	    n = 0;
	    GETNUM(n, width);
	    if (*p == '$') {
		if (nextvalue != Qundef) {
		    rb_raise(rb_eArgError, "value given twice - %d$", n);
		}
		nextvalue = GETPOSARG(n);
		p++;
		goto retry;
	    }
	    CHECK_FOR_WIDTH(flags);
	    width = n;
	    flags |= FWIDTH;
	    goto retry;

	  case '<':
	  case '{':
	    {
		const char *start = p;
		char term = (*p == '<') ? '>' : '}';
		int len;

		for (; p < end && *p != term; ) {
		    p += rb_enc_mbclen(p, end, enc);
		}
		if (p >= end) {
		    rb_raise(rb_eArgError, "malformed name - unmatched parenthesis");
		}
#if SIZEOF_INT < SIZEOF_SIZE_T
		if ((size_t)(p - start) >= INT_MAX) {
		    const int message_limit = 20;
		    len = (int)(rb_enc_right_char_head(start, start + message_limit, p, enc) - start);
		    rb_enc_raise(enc, rb_eArgError,
				 "too long name (%"PRIdSIZE" bytes) - %.*s...%c",
				 (size_t)(p - start - 2), len, start, term);
		}
#endif
		len = (int)(p - start + 1); /* including parenthesis */
		if (id) {
		    rb_enc_raise(enc, rb_eArgError, "named%.*s after <%s>",
				 len, start, rb_id2name(id));
		}
		nextvalue = GETNAMEARG((id = rb_check_id_cstr(start + 1,
							      len - 2 /* without parenthesis */,
							      enc),
					ID2SYM(id)),
				       start, len, enc);
		if (nextvalue == Qundef) {
		    rb_enc_raise(enc, rb_eKeyError, "key%.*s not found", len, start);
		}
		if (term == '}') goto format_s;
		p++;
		goto retry;
	    }

	  case '*':
	    CHECK_FOR_WIDTH(flags);
	    flags |= FWIDTH;
	    GETASTER(width);
	    if (width < 0) {
		flags |= FMINUS;
		width = -width;
	    }
	    p++;
	    goto retry;

	  case '.':
	    if (flags & FPREC0) {
		rb_raise(rb_eArgError, "precision given twice");
	    }
	    flags |= FPREC|FPREC0;

	    prec = 0;
	    p++;
	    if (*p == '*') {
		GETASTER(prec);
		if (prec < 0) {	/* ignore negative precision */
		    flags &= ~FPREC;
		}
		p++;
		goto retry;
	    }

	    GETNUM(prec, precision);
	    goto retry;

	  case '\n':
	  case '\0':
	    p--;
	  case '%':
	    if (flags != FNONE) {
		rb_raise(rb_eArgError, "invalid format character - %%");
	    }
	    PUSH("%", 1);
	    break;

	  case 'c':
	    {
		VALUE val = GETARG();
		VALUE tmp;
		unsigned int c;
		int n;

		tmp = rb_check_string_type(val);
		if (!NIL_P(tmp)) {
		    if (rb_enc_strlen(RSTRING_PTR(tmp),RSTRING_END(tmp),enc) != 1) {
			rb_raise(rb_eArgError, "%%c requires a character");
		    }
		    c = rb_enc_codepoint_len(RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc);
		    RB_GC_GUARD(tmp);
		}
		else {
		    c = NUM2INT(val);
		    n = rb_enc_codelen(c, enc);
		}
		if (n <= 0) {
		    rb_raise(rb_eArgError, "invalid character");
		}
		if (!(flags & FWIDTH)) {
		    CHECK(n);
		    rb_enc_mbcput(c, &buf[blen], enc);
		    blen += n;
		}
		else if ((flags & FMINUS)) {
		    CHECK(n);
		    rb_enc_mbcput(c, &buf[blen], enc);
		    blen += n;
		    FILL(' ', width-1);
		}
		else {
		    FILL(' ', width-1);
		    CHECK(n);
		    rb_enc_mbcput(c, &buf[blen], enc);
		    blen += n;
		}
	    }
	    break;

	  case 's':
	  case 'p':
	  format_s:
	    {
		VALUE arg = GETARG();
		long len, slen;

		if (*p == 'p') arg = rb_inspect(arg);
		str = rb_obj_as_string(arg);
		if (OBJ_TAINTED(str)) tainted = 1;
		len = RSTRING_LEN(str);
		rb_str_set_len(result, blen);
		if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) {
		    int cr = coderange;
		    scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &cr);
		    ENC_CODERANGE_SET(result,
				      (cr == ENC_CODERANGE_UNKNOWN ?
				       ENC_CODERANGE_BROKEN : (coderange = cr)));
		}
		enc = rb_enc_check(result, str);
		if (flags&(FPREC|FWIDTH)) {
		    slen = rb_enc_strlen(RSTRING_PTR(str),RSTRING_END(str),enc);
		    if (slen < 0) {
			rb_raise(rb_eArgError, "invalid mbstring sequence");
		    }
		    if ((flags&FPREC) && (prec < slen)) {
			char *p = rb_enc_nth(RSTRING_PTR(str), RSTRING_END(str),
					     prec, enc);
			slen = prec;
			len = p - RSTRING_PTR(str);
		    }
		    /* need to adjust multi-byte string pos */
		    if ((flags&FWIDTH) && (width > slen)) {
			width -= (int)slen;
			if (!(flags&FMINUS)) {
			    CHECK(width);
			    while (width--) {
				buf[blen++] = ' ';
			    }
			}
			CHECK(len);
			memcpy(&buf[blen], RSTRING_PTR(str), len);
			RB_GC_GUARD(str);
			blen += len;
			if (flags&FMINUS) {
			    CHECK(width);
			    while (width--) {
				buf[blen++] = ' ';
			    }
			}
			rb_enc_associate(result, enc);
			break;
		    }
		}
		PUSH(RSTRING_PTR(str), len);
		RB_GC_GUARD(str);
		rb_enc_associate(result, enc);
	    }
	    break;

	  case 'd':
	  case 'i':
	  case 'o':
	  case 'x':
	  case 'X':
	  case 'b':
	  case 'B':
	  case 'u':
	    {
		volatile VALUE val = GETARG();
		char fbuf[32], nbuf[64], *s;
		const char *prefix = 0;
		int sign = 0, dots = 0;
		char sc = 0;
		long v = 0;
		int base, bignum = 0;
		int len;

		switch (*p) {
		  case 'd':
		  case 'i':
		  case 'u':
		    sign = 1; break;
		  case 'o':
		  case 'x':
		  case 'X':
		  case 'b':
		  case 'B':
		    if (flags&(FPLUS|FSPACE)) sign = 1;
		    break;
		}
		if (flags & FSHARP) {
		    switch (*p) {
		      case 'o':
			prefix = "0"; break;
		      case 'x':
			prefix = "0x"; break;
		      case 'X':
			prefix = "0X"; break;
		      case 'b':
			prefix = "0b"; break;
		      case 'B':
			prefix = "0B"; break;
		    }
		}

	      bin_retry:
		switch (TYPE(val)) {
		  case T_FLOAT:
		    if (FIXABLE(RFLOAT_VALUE(val))) {
			val = LONG2FIX((long)RFLOAT_VALUE(val));
			goto bin_retry;
		    }
		    val = rb_dbl2big(RFLOAT_VALUE(val));
		    if (FIXNUM_P(val)) goto bin_retry;
		    bignum = 1;
		    break;
		  case T_STRING:
		    val = rb_str_to_inum(val, 0, TRUE);
		    goto bin_retry;
		  case T_BIGNUM:
		    bignum = 1;
		    break;
		  case T_FIXNUM:
		    v = FIX2LONG(val);
		    break;
		  default:
		    val = rb_Integer(val);
		    goto bin_retry;
		}

		switch (*p) {
		  case 'o':
		    base = 8; break;
		  case 'x':
		  case 'X':
		    base = 16; break;
		  case 'b':
		  case 'B':
		    base = 2; break;
		  case 'u':
		  case 'd':
		  case 'i':
		  default:
		    base = 10; break;
		}

		if (!bignum) {
		    if (base == 2) {
			val = rb_int2big(v);
			goto bin_retry;
		    }
		    if (sign) {
			char c = *p;
			if (c == 'i') c = 'd'; /* %d and %i are identical */
			if (v < 0) {
			    v = -v;
			    sc = '-';
			    width--;
			}
			else if (flags & FPLUS) {
			    sc = '+';
			    width--;
			}
			else if (flags & FSPACE) {
			    sc = ' ';
			    width--;
			}
			snprintf(fbuf, sizeof(fbuf), "%%l%c", c);
			snprintf(nbuf, sizeof(nbuf), fbuf, v);
			s = nbuf;
		    }
		    else {
			s = nbuf;
			if (v < 0) {
			    dots = 1;
			}
			snprintf(fbuf, sizeof(fbuf), "%%l%c", *p == 'X' ? 'x' : *p);
			snprintf(++s, sizeof(nbuf) - 1, fbuf, v);
			if (v < 0) {
			    char d = 0;

			    s = remove_sign_bits(s, base);
			    switch (base) {
			      case 16:
				d = 'f'; break;
			      case 8:
				d = '7'; break;
			    }
			    if (d && *s != d) {
				*--s = d;
			    }
			}
		    }
		    len = (int)strlen(s);
		}
		else {
		    if (sign) {
			tmp = rb_big2str(val, base);
			s = RSTRING_PTR(tmp);
			if (s[0] == '-') {
			    s++;
			    sc = '-';
			    width--;
			}
			else if (flags & FPLUS) {
			    sc = '+';
			    width--;
			}
			else if (flags & FSPACE) {
			    sc = ' ';
			    width--;
			}
		    }
		    else {
			if (!RBIGNUM_SIGN(val)) {
			    val = rb_big_clone(val);
			    rb_big_2comp(val);
			}
			tmp = rb_big2str0(val, base, RBIGNUM_SIGN(val));
			s = RSTRING_PTR(tmp);
			if (*s == '-') {
			    dots = 1;
			    if (base == 10) {
				rb_warning("negative number for %%u specifier");
			    }
			    s = remove_sign_bits(++s, base);
			    switch (base) {
			      case 16:
				if (s[0] != 'f') *--s = 'f'; break;
			      case 8:
				if (s[0] != '7') *--s = '7'; break;
			      case 2:
				if (s[0] != '1') *--s = '1'; break;
			    }
			}
		    }
		    len = rb_long2int(RSTRING_END(tmp) - s);
		}

		if (dots) {
		    prec -= 2;
		    width -= 2;
		}

		if (*p == 'X') {
		    char *pp = s;
		    int c;
		    while ((c = (int)(unsigned char)*pp) != 0) {
			*pp = rb_enc_toupper(c, enc);
			pp++;
		    }
		}
		if (prefix && !prefix[1]) { /* octal */
		    if (dots) {
			prefix = 0;
		    }
		    else if (len == 1 && *s == '0') {
			len = 0;
			if (flags & FPREC) prec--;
		    }
		    else if ((flags & FPREC) && (prec > len)) {
			prefix = 0;
		    }
		}
		else if (len == 1 && *s == '0') {
		    prefix = 0;
		}
		if (prefix) {
		    width -= (int)strlen(prefix);
		}
		if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) {
		    prec = width;
		    width = 0;
		}
		else {
		    if (prec < len) {
			if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0;
			prec = len;
		    }
		    width -= prec;
		}
		if (!(flags&FMINUS)) {
		    CHECK(width);
		    while (width-- > 0) {
			buf[blen++] = ' ';
		    }
		}
		if (sc) PUSH(&sc, 1);
		if (prefix) {
		    int plen = (int)strlen(prefix);
		    PUSH(prefix, plen);
		}
		CHECK(prec - len);
		if (dots) PUSH("..", 2);
		if (!bignum && v < 0) {
		    char c = sign_bits(base, p);
		    while (len < prec--) {
			buf[blen++] = c;
		    }
		}
		else if ((flags & (FMINUS|FPREC)) != FMINUS) {
		    char c;

		    if (!sign && bignum && !RBIGNUM_SIGN(val))
			c = sign_bits(base, p);
		    else
			c = '0';
		    while (len < prec--) {
			buf[blen++] = c;
		    }
		}
		PUSH(s, len);
		RB_GC_GUARD(tmp);
		CHECK(width);
		while (width-- > 0) {
		    buf[blen++] = ' ';
		}
	    }
	    break;

	  case 'f':
	  case 'g':
	  case 'G':
	  case 'e':
	  case 'E':
	  case 'a':
	  case 'A':
	    {
		VALUE val = GETARG();
		double fval;
		int i, need = 6;
		char fbuf[32];

		fval = RFLOAT_VALUE(rb_Float(val));
		if (isnan(fval) || isinf(fval)) {
		    const char *expr;

		    if (isnan(fval)) {
			expr = "NaN";
		    }
		    else {
			expr = "Inf";
		    }
		    need = (int)strlen(expr);
		    if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS))
			need++;
		    if ((flags & FWIDTH) && need < width)
			need = width;

		    CHECK(need + 1);
		    snprintf(&buf[blen], need + 1, "%*s", need, "");
		    if (flags & FMINUS) {
			if (!isnan(fval) && fval < 0.0)
			    buf[blen++] = '-';
			else if (flags & FPLUS)
			    buf[blen++] = '+';
			else if (flags & FSPACE)
			    blen++;
			memcpy(&buf[blen], expr, strlen(expr));
		    }
		    else {
			if (!isnan(fval) && fval < 0.0)
			    buf[blen + need - strlen(expr) - 1] = '-';
			else if (flags & FPLUS)
			    buf[blen + need - strlen(expr) - 1] = '+';
			else if ((flags & FSPACE) && need > width)
			    blen++;
			memcpy(&buf[blen + need - strlen(expr)], expr,
			       strlen(expr));
		    }
		    blen += strlen(&buf[blen]);
		    break;
		}

		fmt_setup(fbuf, sizeof(fbuf), *p, flags, width, prec);
		need = 0;
		if (*p != 'e' && *p != 'E') {
		    i = INT_MIN;
		    frexp(fval, &i);
		    if (i > 0)
			need = BIT_DIGITS(i);
		}
		need += (flags&FPREC) ? prec : 6;
		if ((flags&FWIDTH) && need < width)
		    need = width;
		need += 20;

		CHECK(need);
		snprintf(&buf[blen], need, fbuf, fval);
		blen += strlen(&buf[blen]);
	    }
	    break;
	}
	flags = FNONE;
    }

  sprint_exit:
    RB_GC_GUARD(fmt);
    /* XXX - We cannot validate the number of arguments if (digit)$ style used.
     */
    if (posarg >= 0 && nextarg < argc) {
	const char *mesg = "too many arguments for format string";
	if (RTEST(ruby_debug)) rb_raise(rb_eArgError, "%s", mesg);
	if (RTEST(ruby_verbose)) rb_warn("%s", mesg);
    }
    rb_str_resize(result, blen);

    if (tainted) OBJ_TAINT(result);
    return result;
}
Example #11
0
/**
 * filter_html
 **/
static VALUE
t_filter_html(VALUE self, VALUE str)
{
    node root, now, ret;
    bool in_tag;
    char *text;
    const char* inner_tag;
    long i, head_i, tail_i, copy_head_i, total_len;
    VALUE change_str, url_base, word;
    rb_encoding *enc;

    change_str = rb_str_new2(EMPTY_STRING);
    enc = rb_enc_get(str);
    text = StringValuePtr(str);

    Data_Get_Struct(self, struct _node, root);
    url_base = rb_iv_get(self, LINK_URL_VARIABLE);
    if (url_base == Qnil) {
        url_base = rb_str_new2(DEAULT_LINK_URL);
    }

    now = root;
    total_len = strlen(text);
    head_i = -1;
    tail_i = -1;
    copy_head_i = 0;
    in_tag = false;
    inner_tag = NULL;

    for(i = 0; i <= total_len; i++) {
        if (!in_tag && text[i] == BEGIN_TAG) {
            in_tag = true;
            if (strncasecmp(&text[i + 1], A_TAG, strlen(A_TAG)) == 0) {
                inner_tag = A_TAG;
            } else if (strncasecmp(&text[i + 1], SCRIPT_TAG, strlen(SCRIPT_TAG)) == 0) {
                inner_tag = SCRIPT_TAG;
            } else if (strncasecmp(&text[i + 1], PRE_TAG, strlen(PRE_TAG)) == 0) {
                inner_tag = PRE_TAG;
            } else if (strncasecmp(&text[i + 1], IFRAME_TAG, strlen(IFRAME_TAG)) == 0) {
                inner_tag = IFRAME_TAG;
            } else if (strncasecmp(&text[i + 1], OBJECT_TAG, strlen(OBJECT_TAG)) == 0) {
                inner_tag = OBJECT_TAG;
            }
            continue;
        }

        if (in_tag && !inner_tag && text[i] == END_TAG) {
            in_tag = false;
            continue;
        }

        if (inner_tag && text[i] == BEGIN_TAG) {
            if (strncasecmp(&text[i + 2], inner_tag, strlen(inner_tag)) == 0) {
                inner_tag = NULL;
                continue;
            }
        }

        if (in_tag) {
            continue;
        }

        ret = search_child(now, text[i]);

        if (ret && i != total_len) {
            if (head_i == -1) {
                head_i = i;
            }

            if (ret->end_flag) {
                tail_i = i;
            }
            now = ret;
        } else {
            if (head_i != -1) {
                if (tail_i != -1) {
                    if (copy_head_i < head_i) {
                        rb_funcall(
                            change_str, 
                            rb_intern("concat"),
                            1,
                            add_encode(rb_str_new(&text[copy_head_i], (head_i - copy_head_i)), enc)
                        );
                    }

                    word = rb_str_new(&text[head_i], (tail_i - head_i + 1));
                    rb_funcall(
                        change_str,
                        rb_intern("concat"),
                        1,
                        add_encode(rb_funcall(url_base, rb_intern("%"), 1, rb_assoc_new(word, word)), enc)
                    );
                    i = tail_i;
                    copy_head_i = tail_i + 1;
                    tail_i = -1;
                } else {
                    i = head_i;
                }
                head_i = -1;
            }
            now = root;
        }
    }

    if (copy_head_i == 0) {
        return str;
    } else {
        rb_funcall(
            change_str,
            rb_intern("concat"),
            1,
            add_encode(rb_str_new(&text[copy_head_i], (total_len - copy_head_i)), enc)
        );
        return change_str;
    }
}
Example #12
0
VALUE symbol_spec_rb_intern3_c_compare(VALUE self, VALUE string, VALUE len, VALUE enc, VALUE sym) {
  ID symbol = rb_intern3(RSTRING_PTR(string), FIX2LONG(len), rb_enc_get(enc));
  return (SYM2ID(sym) == symbol) ? Qtrue : Qfalse;
}
Example #13
0
VALUE symbol_spec_rb_intern3(VALUE self, VALUE string, VALUE len, VALUE enc) {
  return ID2SYM(rb_intern3(RSTRING_PTR(string), FIX2LONG(len), rb_enc_get(enc)));
}
Example #14
0
static VALUE
optimized_unescape_html(VALUE str)
{
    enum {UNICODE_MAX = 0x10ffff};
    rb_encoding *enc = rb_enc_get(str);
    unsigned long charlimit = (strcasecmp(rb_enc_name(enc), "UTF-8") == 0 ? UNICODE_MAX :
			       strcasecmp(rb_enc_name(enc), "ISO-8859-1") == 0 ? 256 :
			       128);
    long i, len, beg = 0;
    size_t clen, plen;
    int overflow;
    const char *cstr;
    char buf[6];
    VALUE dest = 0;

    len  = RSTRING_LEN(str);
    cstr = RSTRING_PTR(str);

    for (i = 0; i < len; i++) {
	unsigned long cc;
	char c = cstr[i];
	if (c != '&') continue;
	plen = i - beg;
	if (++i >= len) break;
	c = (unsigned char)cstr[i];
	switch (c) {
	  case 'a':
	    ++i;
	    if (len - i >= 4 && memcmp(&cstr[i], "pos;", 4) == 0) {
		c = '\'';
		i += 3;
	    }
	    else if (len - i >= 3 && memcmp(&cstr[i], "mp;", 3) == 0) {
		c = '&';
		i += 2;
	    }
	    else continue;
	    break;
	  case 'q':
	    ++i;
	    if (len - i >= 4 && memcmp(&cstr[i], "uot;", 4) == 0) {
		c = '"';
		i += 3;
	    }
	    else continue;
	    break;
	  case 'g':
	    ++i;
	    if (len - i >= 2 && memcmp(&cstr[i], "t;", 2) == 0) {
		c = '>';
		i += 1;
	    }
	    else continue;
	    break;
	  case 'l':
	    ++i;
	    if (len - i >= 2 && memcmp(&cstr[i], "t;", 2) == 0) {
		c = '<';
		i += 1;
	    }
	    else continue;
	    break;
	  case '#':
	    if (len - ++i >= 2 && ISDIGIT(cstr[i])) {
		cc = ruby_scan_digits(&cstr[i], len-i, 10, &clen, &overflow);
	    }
	    else if ((cstr[i] == 'x' || cstr[i] == 'X') && len - ++i >= 2 && ISXDIGIT(cstr[i])) {
		cc = ruby_scan_digits(&cstr[i], len-i, 16, &clen, &overflow);
	    }
	    else continue;
	    i += clen;
	    if (overflow || cc >= charlimit || cstr[i] != ';') continue;
	    if (!dest) {
		dest = rb_str_buf_new(len);
	    }
	    rb_str_cat(dest, cstr + beg, plen);
	    if (charlimit > 256) {
		rb_str_cat(dest, buf, rb_enc_mbcput((OnigCodePoint)cc, buf, enc));
	    }
	    else {
		c = (unsigned char)cc;
		rb_str_cat(dest, &c, 1);
	    }
	    beg = i + 1;
	    continue;
	  default:
	    --i;
	    continue;
	}
	if (!dest) {
	    dest = rb_str_buf_new(len);
	}
	rb_str_cat(dest, cstr + beg, plen);
	rb_str_cat(dest, &c, 1);
	beg = i + 1;
    }

    if (dest) {
	rb_str_cat(dest, cstr + beg, len - beg);
	preserve_original_state(str, dest);
	return dest;
    }
    else {
	return rb_str_dup(str);
    }
}
Example #15
0
VALUE Trenni_Native_parse_markup(VALUE self, VALUE buffer, VALUE delegate, VALUE entities) {
	VALUE string = rb_funcall(buffer, id_read, 0);
	
	rb_encoding *encoding = rb_enc_get(string);
	
	VALUE pcdata = Qnil;
	
	VALUE empty_string = rb_obj_freeze(rb_enc_str_new("", 0, encoding));
	
	const char *s, *p, *pe, *eof;
	unsigned long cs, top = 0, stack[2] = {0};
	unsigned long codepoint = 0;
	
	Token identifier = {0}, cdata = {0}, characters = {0}, entity = {0}, doctype = {0}, comment = {0}, instruction = {0};
	unsigned self_closing = 0, has_value = 0;
	
	s = p = RSTRING_PTR(string);
	eof = pe = p + RSTRING_LEN(string);
	
	
#line 42 "markup.c"
	{
	cs = Trenni_markup_parser_start;
	top = 0;
	}

#line 48 "markup.c"
	{
	if ( p == pe )
		goto _test_eof;
	goto _resume;

_again:
	switch ( cs ) {
		case 48: goto st48;
		case 49: goto st49;
		case 50: goto st50;
		case 1: goto st1;
		case 2: goto st2;
		case 0: goto st0;
		case 3: goto st3;
		case 4: goto st4;
		case 5: goto st5;
		case 51: goto st51;
		case 6: goto st6;
		case 7: goto st7;
		case 8: goto st8;
		case 9: goto st9;
		case 10: goto st10;
		case 11: goto st11;
		case 12: goto st12;
		case 13: goto st13;
		case 14: goto st14;
		case 15: goto st15;
		case 16: goto st16;
		case 17: goto st17;
		case 18: goto st18;
		case 19: goto st19;
		case 52: goto st52;
		case 20: goto st20;
		case 21: goto st21;
		case 22: goto st22;
		case 23: goto st23;
		case 24: goto st24;
		case 25: goto st25;
		case 26: goto st26;
		case 53: goto st53;
		case 27: goto st27;
		case 28: goto st28;
		case 29: goto st29;
		case 30: goto st30;
		case 31: goto st31;
		case 32: goto st32;
		case 33: goto st33;
		case 34: goto st34;
		case 35: goto st35;
		case 54: goto st54;
		case 36: goto st36;
		case 37: goto st37;
		case 55: goto st55;
		case 38: goto st38;
		case 39: goto st39;
		case 40: goto st40;
		case 41: goto st41;
		case 56: goto st56;
		case 42: goto st42;
		case 43: goto st43;
		case 44: goto st44;
		case 57: goto st57;
		case 45: goto st45;
		case 46: goto st46;
		case 47: goto st47;
	default: break;
	}

	if ( ++p == pe )
		goto _test_eof;
_resume:
	switch ( cs )
	{
st48:
	if ( ++p == pe )
		goto _test_eof48;
case 48:
	switch( (*p) ) {
		case 38: goto tr88;
		case 60: goto tr89;
	}
	goto tr87;
tr93:
#line 32 "markup.rl"
	{
		characters.begin = p;
	}
	goto st49;
tr87:
#line 25 "markup.rl"
	{
	}
#line 18 "markup.rl"
	{
		pcdata = Qnil;
	}
#line 32 "markup.rl"
	{
		characters.begin = p;
	}
	goto st49;
tr96:
#line 158 "markup.rl"
	{
		rb_funcall(delegate, id_open_tag_end, 1, self_closing == 1 ? Qtrue : Qfalse);
	}
#line 25 "markup.rl"
	{
	}
#line 18 "markup.rl"
	{
		pcdata = Qnil;
	}
#line 32 "markup.rl"
	{
		characters.begin = p;
	}
	goto st49;
tr99:
#line 92 "markup.rl"
	{
		comment.end = p;
		
		rb_funcall(delegate, id_comment, 1, Trenni_token(comment, encoding));
	}
#line 25 "markup.rl"
	{
	}
#line 18 "markup.rl"
	{
		pcdata = Qnil;
	}
#line 32 "markup.rl"
	{
		characters.begin = p;
	}
	goto st49;
tr102:
#line 78 "markup.rl"
	{
		doctype.end = p;
		
		rb_funcall(delegate, id_doctype, 1, Trenni_token(doctype, encoding));
	}
#line 25 "markup.rl"
	{
	}
#line 18 "markup.rl"
	{
		pcdata = Qnil;
	}
#line 32 "markup.rl"
	{
		characters.begin = p;
	}
	goto st49;
tr105:
#line 177 "markup.rl"
	{
		cdata.end = p;
		
		rb_funcall(delegate, id_cdata, 1, Trenni_token(cdata, encoding));
	}
#line 25 "markup.rl"
	{
	}
#line 18 "markup.rl"
	{
		pcdata = Qnil;
	}
#line 32 "markup.rl"
	{
		characters.begin = p;
	}
	goto st49;
tr108:
#line 165 "markup.rl"
	{
		rb_funcall(delegate, id_close_tag, 2, Trenni_token(identifier, encoding), ULONG2NUM(identifier.begin-s));
	}
#line 25 "markup.rl"
	{
	}
#line 18 "markup.rl"
	{
		pcdata = Qnil;
	}
#line 32 "markup.rl"
	{
		characters.begin = p;
	}
	goto st49;
tr111:
#line 112 "markup.rl"
	{
		instruction.end = p;
		
		rb_funcall(delegate, id_instruction, 1, Trenni_token(instruction, encoding));
	}
#line 25 "markup.rl"
	{
	}
#line 18 "markup.rl"
	{
		pcdata = Qnil;
	}
#line 32 "markup.rl"
	{
		characters.begin = p;
	}
	goto st49;
st49:
	if ( ++p == pe )
		goto _test_eof49;
case 49:
#line 264 "markup.c"
	switch( (*p) ) {
		case 38: goto tr91;
		case 60: goto tr92;
	}
	goto st49;
tr91:
#line 36 "markup.rl"
	{
		characters.end = p;
		
		Trenni_append_token(&pcdata, encoding, characters);
	}
#line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl"
	{{stack[top++] = 50; goto st42;}}
	goto st50;
tr94:
#line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl"
	{{stack[top++] = 50; goto st42;}}
	goto st50;
tr88:
#line 25 "markup.rl"
	{
	}
#line 18 "markup.rl"
	{
		pcdata = Qnil;
	}
#line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl"
	{{stack[top++] = 50; goto st42;}}
	goto st50;
tr97:
#line 158 "markup.rl"
	{
		rb_funcall(delegate, id_open_tag_end, 1, self_closing == 1 ? Qtrue : Qfalse);
	}
#line 25 "markup.rl"
	{
	}
#line 18 "markup.rl"
	{
		pcdata = Qnil;
	}
#line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl"
	{{stack[top++] = 50; goto st42;}}
	goto st50;
tr100:
#line 92 "markup.rl"
	{
		comment.end = p;
		
		rb_funcall(delegate, id_comment, 1, Trenni_token(comment, encoding));
	}
#line 25 "markup.rl"
	{
	}
#line 18 "markup.rl"
	{
		pcdata = Qnil;
	}
#line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl"
	{{stack[top++] = 50; goto st42;}}
	goto st50;
tr103:
#line 78 "markup.rl"
	{
		doctype.end = p;
		
		rb_funcall(delegate, id_doctype, 1, Trenni_token(doctype, encoding));
	}
#line 25 "markup.rl"
	{
	}
#line 18 "markup.rl"
	{
		pcdata = Qnil;
	}
#line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl"
	{{stack[top++] = 50; goto st42;}}
	goto st50;
tr106:
#line 177 "markup.rl"
	{
		cdata.end = p;
		
		rb_funcall(delegate, id_cdata, 1, Trenni_token(cdata, encoding));
	}
#line 25 "markup.rl"
	{
	}
#line 18 "markup.rl"
	{
		pcdata = Qnil;
	}
#line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl"
	{{stack[top++] = 50; goto st42;}}
	goto st50;
tr109:
#line 165 "markup.rl"
	{
		rb_funcall(delegate, id_close_tag, 2, Trenni_token(identifier, encoding), ULONG2NUM(identifier.begin-s));
	}
#line 25 "markup.rl"
	{
	}
#line 18 "markup.rl"
	{
		pcdata = Qnil;
	}
#line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl"
	{{stack[top++] = 50; goto st42;}}
	goto st50;
tr112:
#line 112 "markup.rl"
	{
		instruction.end = p;
		
		rb_funcall(delegate, id_instruction, 1, Trenni_token(instruction, encoding));
	}
#line 25 "markup.rl"
	{
	}
#line 18 "markup.rl"
	{
		pcdata = Qnil;
	}
#line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl"
	{{stack[top++] = 50; goto st42;}}
	goto st50;
st50:
	if ( ++p == pe )
		goto _test_eof50;
case 50:
#line 397 "markup.c"
	switch( (*p) ) {
		case 38: goto tr94;
		case 60: goto tr95;
	}
	goto tr93;
tr89:
#line 129 "markup.rl"
	{
	}
#line 162 "markup.rl"
	{
	}
#line 102 "markup.rl"
	{
		instruction.begin = p;
	}
#line 88 "markup.rl"
	{
		comment.begin = p;
	}
#line 74 "markup.rl"
	{
		doctype.begin = p;
	}
#line 173 "markup.rl"
	{
		cdata.begin = p;
	}
	goto st1;
tr92:
#line 36 "markup.rl"
	{
		characters.end = p;
		
		Trenni_append_token(&pcdata, encoding, characters);
	}
#line 22 "markup.rl"
	{
	}
#line 28 "markup.rl"
	{
		rb_funcall(delegate, id_text, 1, pcdata);
	}
#line 129 "markup.rl"
	{
	}
#line 162 "markup.rl"
	{
	}
#line 102 "markup.rl"
	{
		instruction.begin = p;
	}
#line 88 "markup.rl"
	{
		comment.begin = p;
	}
#line 74 "markup.rl"
	{
		doctype.begin = p;
	}
#line 173 "markup.rl"
	{
		cdata.begin = p;
	}
	goto st1;
tr95:
#line 22 "markup.rl"
	{
	}
#line 28 "markup.rl"
	{
		rb_funcall(delegate, id_text, 1, pcdata);
	}
#line 129 "markup.rl"
	{
	}
#line 162 "markup.rl"
	{
	}
#line 102 "markup.rl"
	{
		instruction.begin = p;
	}
#line 88 "markup.rl"
	{
		comment.begin = p;
	}
#line 74 "markup.rl"
	{
		doctype.begin = p;
	}
#line 173 "markup.rl"
	{
		cdata.begin = p;
	}
	goto st1;
tr98:
#line 158 "markup.rl"
	{
		rb_funcall(delegate, id_open_tag_end, 1, self_closing == 1 ? Qtrue : Qfalse);
	}
#line 129 "markup.rl"
	{
	}
#line 162 "markup.rl"
	{
	}
#line 102 "markup.rl"
	{
		instruction.begin = p;
	}
#line 88 "markup.rl"
	{
		comment.begin = p;
	}
#line 74 "markup.rl"
	{
		doctype.begin = p;
	}
#line 173 "markup.rl"
	{
		cdata.begin = p;
	}
	goto st1;
tr101:
#line 92 "markup.rl"
	{
		comment.end = p;
		
		rb_funcall(delegate, id_comment, 1, Trenni_token(comment, encoding));
	}
#line 129 "markup.rl"
	{
	}
#line 162 "markup.rl"
	{
	}
#line 102 "markup.rl"
	{
		instruction.begin = p;
	}
#line 88 "markup.rl"
	{
		comment.begin = p;
	}
#line 74 "markup.rl"
	{
		doctype.begin = p;
	}
#line 173 "markup.rl"
	{
		cdata.begin = p;
	}
	goto st1;
tr104:
#line 78 "markup.rl"
	{
		doctype.end = p;
		
		rb_funcall(delegate, id_doctype, 1, Trenni_token(doctype, encoding));
	}
#line 129 "markup.rl"
	{
	}
#line 162 "markup.rl"
	{
	}
#line 102 "markup.rl"
	{
		instruction.begin = p;
	}
#line 88 "markup.rl"
	{
		comment.begin = p;
	}
#line 74 "markup.rl"
	{
		doctype.begin = p;
	}
#line 173 "markup.rl"
	{
		cdata.begin = p;
	}
	goto st1;
tr107:
#line 177 "markup.rl"
	{
		cdata.end = p;
		
		rb_funcall(delegate, id_cdata, 1, Trenni_token(cdata, encoding));
	}
#line 129 "markup.rl"
	{
	}
#line 162 "markup.rl"
	{
	}
#line 102 "markup.rl"
	{
		instruction.begin = p;
	}
#line 88 "markup.rl"
	{
		comment.begin = p;
	}
#line 74 "markup.rl"
	{
		doctype.begin = p;
	}
#line 173 "markup.rl"
	{
		cdata.begin = p;
	}
	goto st1;
tr110:
#line 165 "markup.rl"
	{
		rb_funcall(delegate, id_close_tag, 2, Trenni_token(identifier, encoding), ULONG2NUM(identifier.begin-s));
	}
#line 129 "markup.rl"
	{
	}
#line 162 "markup.rl"
	{
	}
#line 102 "markup.rl"
	{
		instruction.begin = p;
	}
#line 88 "markup.rl"
	{
		comment.begin = p;
	}
#line 74 "markup.rl"
	{
		doctype.begin = p;
	}
#line 173 "markup.rl"
	{
		cdata.begin = p;
	}
	goto st1;
tr113:
#line 112 "markup.rl"
	{
		instruction.end = p;
		
		rb_funcall(delegate, id_instruction, 1, Trenni_token(instruction, encoding));
	}
#line 129 "markup.rl"
	{
	}
#line 162 "markup.rl"
	{
	}
#line 102 "markup.rl"
	{
		instruction.begin = p;
	}
#line 88 "markup.rl"
	{
		comment.begin = p;
	}
#line 74 "markup.rl"
	{
		doctype.begin = p;
	}
#line 173 "markup.rl"
	{
		cdata.begin = p;
	}
	goto st1;
st1:
	if ( ++p == pe )
		goto _test_eof1;
case 1:
#line 675 "markup.c"
	switch( (*p) ) {
		case 33: goto st15;
		case 47: goto st36;
		case 63: goto st38;
		case 96: goto tr1;
	}
	if ( (*p) < 59 ) {
		if ( 0 <= (*p) && (*p) <= 44 )
			goto tr1;
	} else if ( (*p) > 64 ) {
		if ( (*p) > 94 ) {
			if ( 123 <= (*p) )
				goto tr1;
		} else if ( (*p) >= 91 )
			goto tr1;
	} else
		goto tr1;
	goto tr0;
tr0:
#line 10 "markup.rl"
	{
		identifier.begin = p;
	}
	goto st2;
st2:
	if ( ++p == pe )
		goto _test_eof2;
case 2:
#line 704 "markup.c"
	switch( (*p) ) {
		case 32: goto tr6;
		case 47: goto tr7;
		case 62: goto tr8;
		case 96: goto tr1;
	}
	if ( (*p) < 14 ) {
		if ( (*p) > 8 ) {
			if ( 9 <= (*p) && (*p) <= 13 )
				goto tr6;
		} else if ( (*p) >= 0 )
			goto tr1;
	} else if ( (*p) > 44 ) {
		if ( (*p) < 91 ) {
			if ( 59 <= (*p) && (*p) <= 64 )
				goto tr1;
		} else if ( (*p) > 94 ) {
			if ( 123 <= (*p) )
				goto tr1;
		} else
			goto tr1;
	} else
		goto tr1;
	goto st2;
tr1:
#line 169 "markup.rl"
	{
		Trenni_raise_error("could not parse tag", buffer, p-s);
	}
	goto st0;
tr69:
#line 118 "markup.rl"
	{
		Trenni_raise_error("could not parse instruction", buffer, p-s);
	}
	goto st0;
tr75:
#line 42 "markup.rl"
	{
		Trenni_raise_error("could not parse entity", buffer, p-s);
	}
	goto st0;
#line 747 "markup.c"
st0:
cs = 0;
	goto _out;
tr6:
#line 14 "markup.rl"
	{
		identifier.end = p;
	}
#line 122 "markup.rl"
	{
		// Reset self-closing state - we don't know yet.
		self_closing = 0;
		
		rb_funcall(delegate, id_open_tag_begin, 2, Trenni_token(identifier, encoding), ULONG2NUM(identifier.begin-s));
	}
	goto st3;
tr14:
#line 14 "markup.rl"
	{
		identifier.end = p;
	}
#line 148 "markup.rl"
	{
		if (has_value == 1) {
			rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), pcdata);
		} else if (has_value == 2) {
			rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), empty_string);
		} else {
			rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), Qtrue);
		}
	}
	goto st3;
tr26:
#line 140 "markup.rl"
	{
		has_value = 1;
	}
#line 148 "markup.rl"
	{
		if (has_value == 1) {
			rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), pcdata);
		} else if (has_value == 2) {
			rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), empty_string);
		} else {
			rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), Qtrue);
		}
	}
	goto st3;
tr32:
#line 144 "markup.rl"
	{
		has_value = 2;
	}
#line 148 "markup.rl"
	{
		if (has_value == 1) {
			rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), pcdata);
		} else if (has_value == 2) {
			rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), empty_string);
		} else {
			rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), Qtrue);
		}
	}
	goto st3;
st3:
	if ( ++p == pe )
		goto _test_eof3;
case 3:
#line 816 "markup.c"
	switch( (*p) ) {
		case 32: goto st3;
		case 47: goto tr11;
		case 62: goto st51;
		case 96: goto tr1;
	}
	if ( (*p) < 14 ) {
		if ( (*p) > 8 ) {
			if ( 9 <= (*p) && (*p) <= 13 )
				goto st3;
		} else if ( (*p) >= 0 )
			goto tr1;
	} else if ( (*p) > 44 ) {
		if ( (*p) < 91 ) {
			if ( 59 <= (*p) && (*p) <= 64 )
				goto tr1;
		} else if ( (*p) > 94 ) {
			if ( 123 <= (*p) )
				goto tr1;
		} else
			goto tr1;
	} else
		goto tr1;
	goto tr9;
tr9:
#line 136 "markup.rl"
	{
		has_value = 0;
	}
#line 10 "markup.rl"
	{
		identifier.begin = p;
	}
	goto st4;
st4:
	if ( ++p == pe )
		goto _test_eof4;
case 4:
#line 855 "markup.c"
	switch( (*p) ) {
		case 32: goto tr14;
		case 47: goto tr15;
		case 61: goto tr16;
		case 62: goto tr17;
		case 96: goto tr1;
	}
	if ( (*p) < 14 ) {
		if ( (*p) > 8 ) {
			if ( 9 <= (*p) && (*p) <= 13 )
				goto tr14;
		} else if ( (*p) >= 0 )
			goto tr1;
	} else if ( (*p) > 44 ) {
		if ( (*p) < 91 ) {
			if ( 59 <= (*p) && (*p) <= 64 )
				goto tr1;
		} else if ( (*p) > 94 ) {
			if ( 123 <= (*p) )
				goto tr1;
		} else
			goto tr1;
	} else
		goto tr1;
	goto st4;
tr7:
#line 14 "markup.rl"
	{
		identifier.end = p;
	}
#line 122 "markup.rl"
	{
		// Reset self-closing state - we don't know yet.
		self_closing = 0;
		
		rb_funcall(delegate, id_open_tag_begin, 2, Trenni_token(identifier, encoding), ULONG2NUM(identifier.begin-s));
	}
#line 132 "markup.rl"
	{
		self_closing = 1;
	}
	goto st5;
tr11:
#line 132 "markup.rl"
	{
		self_closing = 1;
	}
	goto st5;
tr15:
#line 14 "markup.rl"
	{
		identifier.end = p;
	}
#line 148 "markup.rl"
	{
		if (has_value == 1) {
			rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), pcdata);
		} else if (has_value == 2) {
			rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), empty_string);
		} else {
			rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), Qtrue);
		}
	}
#line 132 "markup.rl"
	{
		self_closing = 1;
	}
	goto st5;
tr27:
#line 140 "markup.rl"
	{
		has_value = 1;
	}
#line 148 "markup.rl"
	{
		if (has_value == 1) {
			rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), pcdata);
		} else if (has_value == 2) {
			rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), empty_string);
		} else {
			rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), Qtrue);
		}
	}
#line 132 "markup.rl"
	{
		self_closing = 1;
	}
	goto st5;
tr33:
#line 144 "markup.rl"
	{
		has_value = 2;
	}
#line 148 "markup.rl"
	{
		if (has_value == 1) {
			rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), pcdata);
		} else if (has_value == 2) {
			rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), empty_string);
		} else {
			rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), Qtrue);
		}
	}
#line 132 "markup.rl"
	{
		self_closing = 1;
	}
	goto st5;
st5:
	if ( ++p == pe )
		goto _test_eof5;
case 5:
#line 968 "markup.c"
	if ( (*p) == 62 )
		goto st51;
	goto tr1;
tr8:
#line 14 "markup.rl"
	{
		identifier.end = p;
	}
#line 122 "markup.rl"
	{
		// Reset self-closing state - we don't know yet.
		self_closing = 0;
		
		rb_funcall(delegate, id_open_tag_begin, 2, Trenni_token(identifier, encoding), ULONG2NUM(identifier.begin-s));
	}
	goto st51;
tr17:
#line 14 "markup.rl"
	{
		identifier.end = p;
	}
#line 148 "markup.rl"
	{
		if (has_value == 1) {
			rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), pcdata);
		} else if (has_value == 2) {
			rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), empty_string);
		} else {
			rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), Qtrue);
		}
	}
	goto st51;
tr28:
#line 140 "markup.rl"
	{
		has_value = 1;
	}
#line 148 "markup.rl"
	{
		if (has_value == 1) {
			rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), pcdata);
		} else if (has_value == 2) {
			rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), empty_string);
		} else {
			rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), Qtrue);
		}
	}
	goto st51;
tr34:
#line 144 "markup.rl"
	{
		has_value = 2;
	}
#line 148 "markup.rl"
	{
		if (has_value == 1) {
			rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), pcdata);
		} else if (has_value == 2) {
			rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), empty_string);
		} else {
			rb_funcall(delegate, id_attribute, 2, Trenni_token(identifier, encoding), Qtrue);
		}
	}
	goto st51;
st51:
	if ( ++p == pe )
		goto _test_eof51;
case 51:
#line 1037 "markup.c"
	switch( (*p) ) {
		case 38: goto tr97;
		case 60: goto tr98;
	}
	goto tr96;
tr16:
#line 14 "markup.rl"
	{
		identifier.end = p;
	}
	goto st6;
st6:
	if ( ++p == pe )
		goto _test_eof6;
case 6:
#line 1053 "markup.c"
	switch( (*p) ) {
		case 34: goto st7;
		case 39: goto st12;
	}
	goto tr1;
st7:
	if ( ++p == pe )
		goto _test_eof7;
case 7:
	switch( (*p) ) {
		case 34: goto st11;
		case 38: goto tr22;
		case 60: goto tr1;
	}
	goto tr20;
tr20:
#line 18 "markup.rl"
	{
		pcdata = Qnil;
	}
#line 32 "markup.rl"
	{
		characters.begin = p;
	}
	goto st8;
tr29:
#line 32 "markup.rl"
	{
		characters.begin = p;
	}
	goto st8;
st8:
	if ( ++p == pe )
		goto _test_eof8;
case 8:
#line 1089 "markup.c"
	switch( (*p) ) {
		case 34: goto tr24;
		case 38: goto tr25;
		case 60: goto tr1;
	}
	goto st8;
tr24:
#line 36 "markup.rl"
	{
		characters.end = p;
		
		Trenni_append_token(&pcdata, encoding, characters);
	}
#line 22 "markup.rl"
	{
	}
	goto st9;
tr30:
#line 22 "markup.rl"
	{
	}
	goto st9;
st9:
	if ( ++p == pe )
		goto _test_eof9;
case 9:
#line 1116 "markup.c"
	switch( (*p) ) {
		case 32: goto tr26;
		case 47: goto tr27;
		case 62: goto tr28;
	}
	if ( 9 <= (*p) && (*p) <= 13 )
		goto tr26;
	goto tr1;
tr22:
#line 18 "markup.rl"
	{
		pcdata = Qnil;
	}
#line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl"
	{{stack[top++] = 10; goto st42;}}
	goto st10;
tr25:
#line 36 "markup.rl"
	{
		characters.end = p;
		
		Trenni_append_token(&pcdata, encoding, characters);
	}
#line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl"
	{{stack[top++] = 10; goto st42;}}
	goto st10;
tr31:
#line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl"
	{{stack[top++] = 10; goto st42;}}
	goto st10;
st10:
	if ( ++p == pe )
		goto _test_eof10;
case 10:
#line 1151 "markup.c"
	switch( (*p) ) {
		case 34: goto tr30;
		case 38: goto tr31;
		case 60: goto tr1;
	}
	goto tr29;
st11:
	if ( ++p == pe )
		goto _test_eof11;
case 11:
	switch( (*p) ) {
		case 32: goto tr32;
		case 47: goto tr33;
		case 62: goto tr34;
	}
	if ( 9 <= (*p) && (*p) <= 13 )
		goto tr32;
	goto tr1;
st12:
	if ( ++p == pe )
		goto _test_eof12;
case 12:
	switch( (*p) ) {
		case 38: goto tr36;
		case 39: goto st11;
		case 60: goto tr1;
	}
	goto tr35;
tr35:
#line 18 "markup.rl"
	{
		pcdata = Qnil;
	}
#line 32 "markup.rl"
	{
		characters.begin = p;
	}
	goto st13;
tr39:
#line 32 "markup.rl"
	{
		characters.begin = p;
	}
	goto st13;
st13:
	if ( ++p == pe )
		goto _test_eof13;
case 13:
#line 1200 "markup.c"
	switch( (*p) ) {
		case 38: goto tr38;
		case 39: goto tr24;
		case 60: goto tr1;
	}
	goto st13;
tr36:
#line 18 "markup.rl"
	{
		pcdata = Qnil;
	}
#line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl"
	{{stack[top++] = 14; goto st42;}}
	goto st14;
tr38:
#line 36 "markup.rl"
	{
		characters.end = p;
		
		Trenni_append_token(&pcdata, encoding, characters);
	}
#line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl"
	{{stack[top++] = 14; goto st42;}}
	goto st14;
tr40:
#line 10 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl"
	{{stack[top++] = 14; goto st42;}}
	goto st14;
st14:
	if ( ++p == pe )
		goto _test_eof14;
case 14:
#line 1233 "markup.c"
	switch( (*p) ) {
		case 38: goto tr40;
		case 39: goto tr30;
		case 60: goto tr1;
	}
	goto tr39;
st15:
	if ( ++p == pe )
		goto _test_eof15;
case 15:
	switch( (*p) ) {
		case 45: goto st16;
		case 68: goto st20;
		case 91: goto st27;
	}
	goto st0;
st16:
	if ( ++p == pe )
		goto _test_eof16;
case 16:
	if ( (*p) == 45 )
		goto st17;
	goto st0;
st17:
	if ( ++p == pe )
		goto _test_eof17;
case 17:
	if ( (*p) == 45 )
		goto st18;
	goto st17;
st18:
	if ( ++p == pe )
		goto _test_eof18;
case 18:
	if ( (*p) == 45 )
		goto st19;
	goto st17;
st19:
	if ( ++p == pe )
		goto _test_eof19;
case 19:
	switch( (*p) ) {
		case 45: goto st19;
		case 62: goto st52;
	}
	goto st17;
st52:
	if ( ++p == pe )
		goto _test_eof52;
case 52:
	switch( (*p) ) {
		case 38: goto tr100;
		case 60: goto tr101;
	}
	goto tr99;
st20:
	if ( ++p == pe )
		goto _test_eof20;
case 20:
	if ( (*p) == 79 )
		goto st21;
	goto st0;
st21:
	if ( ++p == pe )
		goto _test_eof21;
case 21:
	if ( (*p) == 67 )
		goto st22;
	goto st0;
st22:
	if ( ++p == pe )
		goto _test_eof22;
case 22:
	if ( (*p) == 84 )
		goto st23;
	goto st0;
st23:
	if ( ++p == pe )
		goto _test_eof23;
case 23:
	if ( (*p) == 89 )
		goto st24;
	goto st0;
st24:
	if ( ++p == pe )
		goto _test_eof24;
case 24:
	if ( (*p) == 80 )
		goto st25;
	goto st0;
st25:
	if ( ++p == pe )
		goto _test_eof25;
case 25:
	if ( (*p) == 69 )
		goto st26;
	goto st0;
st26:
	if ( ++p == pe )
		goto _test_eof26;
case 26:
	if ( (*p) == 62 )
		goto st53;
	goto st26;
st53:
	if ( ++p == pe )
		goto _test_eof53;
case 53:
	switch( (*p) ) {
		case 38: goto tr103;
		case 60: goto tr104;
	}
	goto tr102;
st27:
	if ( ++p == pe )
		goto _test_eof27;
case 27:
	if ( (*p) == 67 )
		goto st28;
	goto st0;
st28:
	if ( ++p == pe )
		goto _test_eof28;
case 28:
	if ( (*p) == 68 )
		goto st29;
	goto st0;
st29:
	if ( ++p == pe )
		goto _test_eof29;
case 29:
	if ( (*p) == 65 )
		goto st30;
	goto st0;
st30:
	if ( ++p == pe )
		goto _test_eof30;
case 30:
	if ( (*p) == 84 )
		goto st31;
	goto st0;
st31:
	if ( ++p == pe )
		goto _test_eof31;
case 31:
	if ( (*p) == 65 )
		goto st32;
	goto st0;
st32:
	if ( ++p == pe )
		goto _test_eof32;
case 32:
	if ( (*p) == 91 )
		goto st33;
	goto st0;
st33:
	if ( ++p == pe )
		goto _test_eof33;
case 33:
	if ( (*p) == 93 )
		goto st34;
	goto st33;
st34:
	if ( ++p == pe )
		goto _test_eof34;
case 34:
	if ( (*p) == 93 )
		goto st35;
	goto st33;
st35:
	if ( ++p == pe )
		goto _test_eof35;
case 35:
	switch( (*p) ) {
		case 62: goto st54;
		case 93: goto st35;
	}
	goto st33;
st54:
	if ( ++p == pe )
		goto _test_eof54;
case 54:
	switch( (*p) ) {
		case 38: goto tr106;
		case 60: goto tr107;
	}
	goto tr105;
st36:
	if ( ++p == pe )
		goto _test_eof36;
case 36:
	switch( (*p) ) {
		case 47: goto tr1;
		case 96: goto tr1;
	}
	if ( (*p) < 59 ) {
		if ( 0 <= (*p) && (*p) <= 44 )
			goto tr1;
	} else if ( (*p) > 64 ) {
		if ( (*p) > 94 ) {
			if ( 123 <= (*p) )
				goto tr1;
		} else if ( (*p) >= 91 )
			goto tr1;
	} else
		goto tr1;
	goto tr65;
tr65:
#line 10 "markup.rl"
	{
		identifier.begin = p;
	}
	goto st37;
st37:
	if ( ++p == pe )
		goto _test_eof37;
case 37:
#line 1451 "markup.c"
	switch( (*p) ) {
		case 47: goto tr1;
		case 62: goto tr67;
		case 96: goto tr1;
	}
	if ( (*p) < 59 ) {
		if ( 0 <= (*p) && (*p) <= 44 )
			goto tr1;
	} else if ( (*p) > 64 ) {
		if ( (*p) > 94 ) {
			if ( 123 <= (*p) )
				goto tr1;
		} else if ( (*p) >= 91 )
			goto tr1;
	} else
		goto tr1;
	goto st37;
tr67:
#line 14 "markup.rl"
	{
		identifier.end = p;
	}
	goto st55;
st55:
	if ( ++p == pe )
		goto _test_eof55;
case 55:
#line 1479 "markup.c"
	switch( (*p) ) {
		case 38: goto tr109;
		case 60: goto tr110;
	}
	goto tr108;
st38:
	if ( ++p == pe )
		goto _test_eof38;
case 38:
	switch( (*p) ) {
		case 47: goto tr69;
		case 96: goto tr69;
	}
	if ( (*p) < 59 ) {
		if ( 0 <= (*p) && (*p) <= 44 )
			goto tr69;
	} else if ( (*p) > 64 ) {
		if ( (*p) > 94 ) {
			if ( 123 <= (*p) )
				goto tr69;
		} else if ( (*p) >= 91 )
			goto tr69;
	} else
		goto tr69;
	goto tr68;
tr68:
#line 10 "markup.rl"
	{
		identifier.begin = p;
	}
	goto st39;
st39:
	if ( ++p == pe )
		goto _test_eof39;
case 39:
#line 1515 "markup.c"
	switch( (*p) ) {
		case 32: goto tr71;
		case 47: goto tr69;
		case 96: goto tr69;
	}
	if ( (*p) < 14 ) {
		if ( (*p) > 8 ) {
			if ( 9 <= (*p) && (*p) <= 13 )
				goto tr71;
		} else if ( (*p) >= 0 )
			goto tr69;
	} else if ( (*p) > 44 ) {
		if ( (*p) < 91 ) {
			if ( 59 <= (*p) && (*p) <= 64 )
				goto tr69;
		} else if ( (*p) > 94 ) {
			if ( 123 <= (*p) )
				goto tr69;
		} else
			goto tr69;
	} else
		goto tr69;
	goto st39;
tr71:
#line 14 "markup.rl"
	{
		identifier.end = p;
	}
#line 106 "markup.rl"
	{
	}
	goto st40;
st40:
	if ( ++p == pe )
		goto _test_eof40;
case 40:
#line 1552 "markup.c"
	if ( (*p) == 63 )
		goto tr73;
	goto st40;
tr73:
#line 109 "markup.rl"
	{
	}
	goto st41;
st41:
	if ( ++p == pe )
		goto _test_eof41;
case 41:
#line 1565 "markup.c"
	switch( (*p) ) {
		case 62: goto st56;
		case 63: goto tr73;
	}
	goto st40;
st56:
	if ( ++p == pe )
		goto _test_eof56;
case 56:
	switch( (*p) ) {
		case 38: goto tr112;
		case 60: goto tr113;
	}
	goto tr111;
st42:
	if ( ++p == pe )
		goto _test_eof42;
case 42:
	if ( (*p) == 35 )
		goto st43;
	if ( (*p) < 65 ) {
		if ( 48 <= (*p) && (*p) <= 57 )
			goto tr77;
	} else if ( (*p) > 90 ) {
		if ( 97 <= (*p) && (*p) <= 122 )
			goto tr77;
	} else
		goto tr77;
	goto tr75;
st43:
	if ( ++p == pe )
		goto _test_eof43;
case 43:
	if ( (*p) == 120 )
		goto st45;
	if ( 48 <= (*p) && (*p) <= 57 )
		goto tr78;
	goto tr75;
tr78:
#line 46 "markup.rl"
	{
		entity.begin = p;
	}
	goto st44;
st44:
	if ( ++p == pe )
		goto _test_eof44;
case 44:
#line 1614 "markup.c"
	if ( (*p) == 59 )
		goto tr81;
	if ( 48 <= (*p) && (*p) <= 57 )
		goto st44;
	goto tr75;
tr81:
#line 66 "markup.rl"
	{
		entity.end = p;
		
		codepoint = strtoul(entity.begin, (char **)&entity.end, 10);
		
		Trenni_append_codepoint(&pcdata, encoding, codepoint);
	}
#line 8 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl"
	{{cs = stack[--top];goto _again;}}
	goto st57;
tr84:
#line 58 "markup.rl"
	{
		entity.end = p;
		
		codepoint = strtoul(entity.begin, (char **)&entity.end, 16);
		
		Trenni_append_codepoint(&pcdata, encoding, codepoint);
	}
#line 8 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl"
	{{cs = stack[--top];goto _again;}}
	goto st57;
tr86:
#line 50 "markup.rl"
	{
		entity.end = p;
		
		Trenni_append_string(&pcdata, encoding, 
			rb_funcall(entities, id_key_get, 1, Trenni_token(entity, encoding))
		);
	}
#line 8 "/Users/samuel/Documents/Programming/ioquatix/trenni/parsers/trenni/entities.rl"
	{{cs = stack[--top];goto _again;}}
	goto st57;
st57:
	if ( ++p == pe )
		goto _test_eof57;
case 57:
#line 1660 "markup.c"
	goto st0;
st45:
	if ( ++p == pe )
		goto _test_eof45;
case 45:
	if ( (*p) < 65 ) {
		if ( 48 <= (*p) && (*p) <= 57 )
			goto tr82;
	} else if ( (*p) > 70 ) {
		if ( 97 <= (*p) && (*p) <= 102 )
			goto tr82;
	} else
		goto tr82;
	goto tr75;
tr82:
#line 46 "markup.rl"
	{
		entity.begin = p;
	}
	goto st46;
st46:
	if ( ++p == pe )
		goto _test_eof46;
case 46:
#line 1685 "markup.c"
	if ( (*p) == 59 )
		goto tr84;
	if ( (*p) < 65 ) {
		if ( 48 <= (*p) && (*p) <= 57 )
			goto st46;
	} else if ( (*p) > 70 ) {
		if ( 97 <= (*p) && (*p) <= 102 )
			goto st46;
	} else
		goto st46;
	goto tr75;
tr77:
#line 46 "markup.rl"
	{
		entity.begin = p;
	}
	goto st47;
st47:
	if ( ++p == pe )
		goto _test_eof47;
case 47:
#line 1707 "markup.c"
	if ( (*p) == 59 )
		goto tr86;
	if ( (*p) < 65 ) {
		if ( 48 <= (*p) && (*p) <= 57 )
			goto st47;
	} else if ( (*p) > 90 ) {
		if ( 97 <= (*p) && (*p) <= 122 )
			goto st47;
	} else
		goto st47;
	goto tr75;
	}
	_test_eof48: cs = 48; goto _test_eof; 
	_test_eof49: cs = 49; goto _test_eof; 
	_test_eof50: cs = 50; goto _test_eof; 
	_test_eof1: cs = 1; goto _test_eof; 
	_test_eof2: cs = 2; goto _test_eof; 
	_test_eof3: cs = 3; goto _test_eof; 
	_test_eof4: cs = 4; goto _test_eof; 
	_test_eof5: cs = 5; goto _test_eof; 
	_test_eof51: cs = 51; goto _test_eof; 
	_test_eof6: cs = 6; goto _test_eof; 
	_test_eof7: cs = 7; goto _test_eof; 
	_test_eof8: cs = 8; goto _test_eof; 
	_test_eof9: cs = 9; goto _test_eof; 
	_test_eof10: cs = 10; goto _test_eof; 
	_test_eof11: cs = 11; goto _test_eof; 
	_test_eof12: cs = 12; goto _test_eof; 
	_test_eof13: cs = 13; goto _test_eof; 
	_test_eof14: cs = 14; goto _test_eof; 
	_test_eof15: cs = 15; goto _test_eof; 
	_test_eof16: cs = 16; goto _test_eof; 
	_test_eof17: cs = 17; goto _test_eof; 
	_test_eof18: cs = 18; goto _test_eof; 
	_test_eof19: cs = 19; goto _test_eof; 
	_test_eof52: cs = 52; goto _test_eof; 
	_test_eof20: cs = 20; goto _test_eof; 
	_test_eof21: cs = 21; goto _test_eof; 
	_test_eof22: cs = 22; goto _test_eof; 
	_test_eof23: cs = 23; goto _test_eof; 
	_test_eof24: cs = 24; goto _test_eof; 
	_test_eof25: cs = 25; goto _test_eof; 
	_test_eof26: cs = 26; goto _test_eof; 
	_test_eof53: cs = 53; goto _test_eof; 
	_test_eof27: cs = 27; goto _test_eof; 
	_test_eof28: cs = 28; goto _test_eof; 
	_test_eof29: cs = 29; goto _test_eof; 
	_test_eof30: cs = 30; goto _test_eof; 
	_test_eof31: cs = 31; goto _test_eof; 
	_test_eof32: cs = 32; goto _test_eof; 
	_test_eof33: cs = 33; goto _test_eof; 
	_test_eof34: cs = 34; goto _test_eof; 
	_test_eof35: cs = 35; goto _test_eof; 
	_test_eof54: cs = 54; goto _test_eof; 
	_test_eof36: cs = 36; goto _test_eof; 
	_test_eof37: cs = 37; goto _test_eof; 
	_test_eof55: cs = 55; goto _test_eof; 
	_test_eof38: cs = 38; goto _test_eof; 
	_test_eof39: cs = 39; goto _test_eof; 
	_test_eof40: cs = 40; goto _test_eof; 
	_test_eof41: cs = 41; goto _test_eof; 
	_test_eof56: cs = 56; goto _test_eof; 
	_test_eof42: cs = 42; goto _test_eof; 
	_test_eof43: cs = 43; goto _test_eof; 
	_test_eof44: cs = 44; goto _test_eof; 
	_test_eof57: cs = 57; goto _test_eof; 
	_test_eof45: cs = 45; goto _test_eof; 
	_test_eof46: cs = 46; goto _test_eof; 
	_test_eof47: cs = 47; goto _test_eof; 

	_test_eof: {}
	if ( p == eof )
	{
	switch ( cs ) {
	case 42: 
	case 43: 
	case 44: 
	case 45: 
	case 46: 
	case 47: 
#line 42 "markup.rl"
	{
		Trenni_raise_error("could not parse entity", buffer, p-s);
	}
	break;
	case 53: 
#line 78 "markup.rl"
	{
		doctype.end = p;
		
		rb_funcall(delegate, id_doctype, 1, Trenni_token(doctype, encoding));
	}
	break;
	case 26: 
#line 84 "markup.rl"
	{
		Trenni_raise_error("could not parse doctype", buffer, p-s);
	}
	break;
	case 52: 
#line 92 "markup.rl"
	{
		comment.end = p;
		
		rb_funcall(delegate, id_comment, 1, Trenni_token(comment, encoding));
	}
	break;
	case 17: 
	case 18: 
	case 19: 
#line 98 "markup.rl"
	{
		Trenni_raise_error("could not parse comment", buffer, p-s);
	}
	break;
	case 56: 
#line 112 "markup.rl"
	{
		instruction.end = p;
		
		rb_funcall(delegate, id_instruction, 1, Trenni_token(instruction, encoding));
	}
	break;
	case 38: 
	case 39: 
	case 40: 
	case 41: 
#line 118 "markup.rl"
	{
		Trenni_raise_error("could not parse instruction", buffer, p-s);
	}
	break;
	case 51: 
#line 158 "markup.rl"
	{
		rb_funcall(delegate, id_open_tag_end, 1, self_closing == 1 ? Qtrue : Qfalse);
	}
	break;
	case 55: 
#line 165 "markup.rl"
	{
		rb_funcall(delegate, id_close_tag, 2, Trenni_token(identifier, encoding), ULONG2NUM(identifier.begin-s));
	}
	break;
	case 1: 
	case 2: 
	case 3: 
	case 4: 
	case 5: 
	case 6: 
	case 7: 
	case 8: 
	case 9: 
	case 10: 
	case 11: 
	case 12: 
	case 13: 
	case 14: 
	case 36: 
	case 37: 
#line 169 "markup.rl"
	{
		Trenni_raise_error("could not parse tag", buffer, p-s);
	}
	break;
	case 54: 
#line 177 "markup.rl"
	{
		cdata.end = p;
		
		rb_funcall(delegate, id_cdata, 1, Trenni_token(cdata, encoding));
	}
	break;
	case 33: 
	case 34: 
	case 35: 
#line 183 "markup.rl"
	{
		Trenni_raise_error("could not parse cdata", buffer, p-s);
	}
	break;
	case 50: 
#line 22 "markup.rl"
	{
	}
#line 28 "markup.rl"
	{
		rb_funcall(delegate, id_text, 1, pcdata);
	}
	break;
	case 49: 
#line 36 "markup.rl"
	{
		characters.end = p;
		
		Trenni_append_token(&pcdata, encoding, characters);
	}
#line 22 "markup.rl"
	{
	}
#line 28 "markup.rl"
	{
		rb_funcall(delegate, id_text, 1, pcdata);
	}
	break;
#line 1913 "markup.c"
	}
	}

	_out: {}
	}

#line 214 "markup.rl"

	
	if (p != eof) {
		Trenni_raise_error("could not parse all input", buffer, p-s);
	}
	
	return Qnil;
}
Example #16
0
int
rb_enc_get_index(VALUE obj)
{
    return index_of_encoding(rb_enc_get(obj));
}
Example #17
0
static VALUE encoding_spec_rb_enc_nth(VALUE self, VALUE str, VALUE index) {
  char* start = RSTRING_PTR(str);
  char* end = start + RSTRING_LEN(str);
  char* ptr = rb_enc_nth(start, end, FIX2LONG(index), rb_enc_get(str));
  return LONG2NUM(ptr - start);
}