コード例 #1
0
static void
my_sv_copypv(pTHX_ SV *const dsv, SV *const ssv)
{
    STRLEN len;
    const char * const s = SvPV_const(ssv,len);
    sv_setpvn(dsv,s,len);
    if (SvUTF8(ssv))
        SvUTF8_on(dsv);
    else
        SvUTF8_off(dsv);
}
コード例 #2
0
EXTERN void
parse(pTHX_
      PSTATE* p_state,
      SV* chunk,
      SV* self)
{
    char *s, *beg, *end;
    U32 utf8 = 0;
    STRLEN len;

    if (!chunk) {
	/* eof */
	char empty[1];
	if (p_state->buf && SvOK(p_state->buf)) {
	    /* flush it */
	    s = SvPV(p_state->buf, len);
	    end = s + len;
	    utf8 = SvUTF8(p_state->buf);
	    assert(len);

	    while (s < end) {
		if (p_state->literal_mode) {
		    if (strEQ(p_state->literal_mode, "plaintext") && !p_state->closing_plaintext)
			break;
		    p_state->pending_end_tag = p_state->literal_mode;
		    p_state->literal_mode = 0;
		    s = parse_buf(aTHX_ p_state, s, end, utf8, self);
		    continue;
		}

		if (!p_state->strict_comment && !p_state->no_dash_dash_comment_end && *s == '<') {
		    p_state->no_dash_dash_comment_end = 1;
		    s = parse_buf(aTHX_ p_state, s, end, utf8, self);
		    continue;
		}

		if (!p_state->strict_comment && *s == '<') {
		    /* some kind of unterminated markup.  Report rest as as comment */
		    token_pos_t token;
		    token.beg = s + 1;
		    token.end = end;
		    report_event(p_state, E_COMMENT, s, end, utf8, &token, 1, self);
		    s = end;
		}

		break;
	    }

	    if (s < end) {
		/* report rest as text */
		report_event(p_state, E_TEXT, s, end, utf8, 0, 0, self);
	    }
	    
	    SvREFCNT_dec(p_state->buf);
	    p_state->buf = 0;
	}
	if (p_state->pend_text && SvOK(p_state->pend_text))
	    flush_pending_text(p_state, self);

	if (p_state->ignoring_element) {
	    /* document not balanced */
	    SvREFCNT_dec(p_state->ignoring_element);
	    p_state->ignoring_element = 0;
	}
	report_event(p_state, E_END_DOCUMENT, empty, empty, 0, 0, 0, self);

	/* reset state */
	p_state->offset = 0;
	if (p_state->line)
	    p_state->line = 1;
	p_state->column = 0;
	p_state->literal_mode = 0;
	p_state->is_cdata = 0;
	return;
    }

#ifdef UNICODE_HTML_PARSER
    if (p_state->utf8_mode)
	sv_utf8_downgrade(chunk, 0);
#endif

    if (p_state->buf && SvOK(p_state->buf)) {
	sv_catsv(p_state->buf, chunk);
	beg = SvPV(p_state->buf, len);
	utf8 = SvUTF8(p_state->buf);
    }
    else {
	beg = SvPV(chunk, len);
	utf8 = SvUTF8(chunk);
	if (p_state->offset == 0) {
	    report_event(p_state, E_START_DOCUMENT, beg, beg, 0, 0, 0, self);

	    /* Print warnings if we find unexpected Unicode BOM forms */
#ifdef UNICODE_HTML_PARSER
	    if (DOWARN &&
		p_state->argspec_entity_decode &&
		!p_state->utf8_mode && (
                 (!utf8 && len >= 3 && strnEQ(beg, "\xEF\xBB\xBF", 3)) ||
		 (utf8 && len >= 6 && strnEQ(beg, "\xC3\xAF\xC2\xBB\xC2\xBF", 6)) ||
		 (!utf8 && probably_utf8_chunk(aTHX_ beg, len))
		)
	       )
	    {
		warn("Parsing of undecoded UTF-8 will give garbage when decoding entities");
	    }
	    if (DOWARN && utf8 && len >= 2 && strnEQ(beg, "\xFF\xFE", 2)) {
		warn("Parsing string decoded with wrong endianess");
	    }
#endif
	    if (DOWARN) {
		if (!utf8 && len >= 4 &&
		    (strnEQ(beg, "\x00\x00\xFE\xFF", 4) ||
		     strnEQ(beg, "\xFE\xFF\x00\x00", 4))
		    )
		{
		    warn("Parsing of undecoded UTF-32");
		}
		else if (!utf8 && len >= 2 &&
			 (strnEQ(beg, "\xFE\xFF", 2) || strnEQ(beg, "\xFF\xFE", 2))
		    )
		{
		    warn("Parsing of undecoded UTF-16");
		}
	    }
	}
    }

    if (!len)
	return; /* nothing to do */

    end = beg + len;
    s = parse_buf(aTHX_ p_state, beg, end, utf8, self);

    if (s == end || p_state->eof) {
	if (p_state->buf) {
	    SvOK_off(p_state->buf);
	}
    }
    else {
	/* need to keep rest in buffer */
	if (p_state->buf) {
	    /* chop off some chars at the beginning */
	    if (SvOK(p_state->buf)) {
		sv_chop(p_state->buf, s);
	    }
	    else {
		sv_setpvn(p_state->buf, s, end - s);
		if (utf8)
		    SvUTF8_on(p_state->buf);
		else
		    SvUTF8_off(p_state->buf);
	    }
	}
	else {
	    p_state->buf = newSVpv(s, end - s);
	    if (utf8)
		SvUTF8_on(p_state->buf);
	}
    }
    return;
}
コード例 #3
0
ファイル: encoding.c プロジェクト: hiromitsu-itou/.perlbrew
IV
PerlIOEncode_fill(pTHX_ PerlIO * f)
{
    PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
    dSP;
    IV code = 0;
    PerlIO *n;
    SSize_t avail;

    if (PerlIO_flush(f) != 0)
	return -1;
    n  = PerlIONext(f);
    if (!PerlIO_fast_gets(n)) {
	/* Things get too messy if we don't have a buffer layer
	   push a :perlio to do the job */
	char mode[8];
	n  = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv);
	if (!n) {
	    Perl_die(aTHX_ "panic: cannot push :perlio for %p",f);
	}
    }
    PUSHSTACKi(PERLSI_MAGIC);
    SPAGAIN;
    ENTER;
    SAVETMPS;
  retry:
    avail = PerlIO_get_cnt(n);
    if (avail <= 0) {
	avail = PerlIO_fill(n);
	if (avail == 0) {
	    avail = PerlIO_get_cnt(n);
	}
	else {
	    if (!PerlIO_error(n) && PerlIO_eof(n))
		avail = 0;
	}
    }
    if (avail > 0 || (e->flags & NEEDS_LINES)) {
	STDCHAR *ptr = PerlIO_get_ptr(n);
	SSize_t use  = (avail >= 0) ? avail : 0;
	SV *uni;
	char *s = NULL;
	STRLEN len = 0;
	e->base.ptr = e->base.end = (STDCHAR *) NULL;
	(void) PerlIOEncode_get_base(aTHX_ f);
	if (!e->dataSV)
	    e->dataSV = newSV(0);
	if (SvTYPE(e->dataSV) < SVt_PV) {
	    sv_upgrade(e->dataSV,SVt_PV);
	}
	if (e->flags & NEEDS_LINES) {
	    /* Encoding needs whole lines (e.g. iso-2022-*)
	       search back from end of available data for
	       and line marker
	     */
	    STDCHAR *nl = ptr+use-1;
	    while (nl >= ptr) {
		if (*nl == '\n') {
		    break;
		}
		nl--;
	    }
	    if (nl >= ptr && *nl == '\n') {
		/* found a line - take up to and including that */
		use = (nl+1)-ptr;
	    }
	    else if (avail > 0) {
		/* No line, but not EOF - append avail to the pending data */
		sv_catpvn(e->dataSV, (char*)ptr, use);
		PerlIO_set_ptrcnt(n, ptr+use, 0);
		goto retry;
	    }
	    else if (!SvCUR(e->dataSV)) {
		goto end_of_file;
	    }
	}
	if (SvCUR(e->dataSV)) {
	    /* something left over from last time - create a normal
	       SV with new data appended
	     */
	    if (use + SvCUR(e->dataSV) > e->base.bufsiz) {
		if (e->flags & NEEDS_LINES) {
		    /* Have to grow buffer */
		    e->base.bufsiz = use + SvCUR(e->dataSV);
		    PerlIOEncode_get_base(aTHX_ f);
		}
		else {
	       use = e->base.bufsiz - SvCUR(e->dataSV);
	    }
	    }
	    sv_catpvn(e->dataSV,(char*)ptr,use);
	}
	else {
	    /* Create a "dummy" SV to represent the available data from layer below */
	    if (SvLEN(e->dataSV) && SvPVX_const(e->dataSV)) {
		Safefree(SvPVX_mutable(e->dataSV));
	    }
	    if (use > (SSize_t)e->base.bufsiz) {
		if (e->flags & NEEDS_LINES) {
		    /* Have to grow buffer */
		    e->base.bufsiz = use;
		    PerlIOEncode_get_base(aTHX_ f);
		}
		else {
	       use = e->base.bufsiz;
	    }
	    }
	    SvPV_set(e->dataSV, (char *) ptr);
	    SvLEN_set(e->dataSV, 0);  /* Hands off sv.c - it isn't yours */
	    SvCUR_set(e->dataSV,use);
	    SvPOK_only(e->dataSV);
	}
	SvUTF8_off(e->dataSV);
	PUSHMARK(sp);
	XPUSHs(e->enc);
	XPUSHs(e->dataSV);
	XPUSHs(e->chk);
	PUTBACK;
	if (call_method("decode", G_SCALAR) != 1) {
	    Perl_die(aTHX_ "panic: decode did not return a value");
	}
	SPAGAIN;
	uni = POPs;
	PUTBACK;
	/* Now get translated string (forced to UTF-8) and use as buffer */
	if (SvPOK(uni)) {
	    s = SvPVutf8(uni, len);
#ifdef PARANOID_ENCODE_CHECKS
	    if (len && !is_utf8_string((U8*)s,len)) {
		Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s);
	    }
#endif
	}
	if (len > 0) {
	    /* Got _something */
	    /* if decode gave us back dataSV then data may vanish when
	       we do ptrcnt adjust - so take our copy now.
	       (The copy is a pain - need a put-it-here option for decode.)
	     */
	    sv_setpvn(e->bufsv,s,len);
	    e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv);
	    e->base.end = e->base.ptr + SvCUR(e->bufsv);
	    PerlIOBase(f)->flags |= PERLIO_F_RDBUF;
	    SvUTF8_on(e->bufsv);

	    /* Adjust ptr/cnt not taking anything which
	       did not translate - not clear this is a win */
	    /* compute amount we took */
	    use -= SvCUR(e->dataSV);
	    PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
	    /* and as we did not take it it isn't pending */
	    SvCUR_set(e->dataSV,0);
	} else {
	    /* Got nothing - assume partial character so we need some more */
	    /* Make sure e->dataSV is a normal SV before re-filling as
	       buffer alias will change under us
	     */
	    s = SvPV(e->dataSV,len);
	    sv_setpvn(e->dataSV,s,len);
	    PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
	    goto retry;
	}
    }
    else {
    end_of_file:
	code = -1;
	if (avail == 0)
	    PerlIOBase(f)->flags |= PERLIO_F_EOF;
	else
	    PerlIOBase(f)->flags |= PERLIO_F_ERROR;
    }
    FREETMPS;
    LEAVE;
    POPSTACK;
    return code;
}
コード例 #4
0
static void
report_event(PSTATE* p_state,
	     event_id_t event,
	     char *beg, char *end, U32 utf8,
	     token_pos_t *tokens, int num_tokens,
	     SV* self
	    )
{
    struct p_handler *h;
    dTHX;
    dSP;
    AV *array;
    STRLEN my_na;
    char *argspec;
    char *s;

#ifdef UNICODE_HTML_PARSER
    #define CHR_DIST(a,b) (utf8 ? utf8_distance((U8*)(a),(U8*)(b)) : (a) - (b))
#else
    #define CHR_DIST(a,b) ((a) - (b))
#endif

    /* capture offsets */
    STRLEN offset = p_state->offset;
    STRLEN line = p_state->line;
    STRLEN column = p_state->column;

#if 0
    {  /* used for debugging at some point */
	char *s = beg;
	int i;

	/* print debug output */
	switch(event) {
	case E_DECLARATION: printf("DECLARATION"); break;
	case E_COMMENT:     printf("COMMENT"); break;
	case E_START:       printf("START"); break;
	case E_END:         printf("END"); break;
	case E_TEXT:        printf("TEXT"); break;
	case E_PROCESS:     printf("PROCESS"); break;
	case E_NONE:        printf("NONE"); break;
	default:            printf("EVENT #%d", event); break;
	}

	printf(" [");
	while (s < end) {
	    if (*s == '\n') {
		putchar('\\'); putchar('n');
	    }
	    else
		putchar(*s);
	    s++;
	}
	printf("] %d\n", end - beg);
	for (i = 0; i < num_tokens; i++) {
	    printf("  token %d: %d %d\n",
		   i,
		   tokens[i].beg - beg,
		   tokens[i].end - tokens[i].beg);
	}
    }
#endif

    if (p_state->pending_end_tag && event != E_TEXT && event != E_COMMENT) {
	token_pos_t t;
	char dummy;
	t.beg = p_state->pending_end_tag;
	t.end = p_state->pending_end_tag + strlen(p_state->pending_end_tag);
	p_state->pending_end_tag = 0;
	report_event(p_state, E_END, &dummy, &dummy, 0, &t, 1, self);
	SPAGAIN;
    }

    /* update offsets */
    p_state->offset += CHR_DIST(end, beg);
    if (line) {
	char *s = beg;
	char *nl = NULL;
	while (s < end) {
	    if (*s == '\n') {
		p_state->line++;
		nl = s;
	    }
	    s++;
	}
	if (nl)
	    p_state->column = CHR_DIST(end, nl) - 1;
	else
	    p_state->column += CHR_DIST(end, beg);
    }

    if (event == E_NONE)
	goto IGNORE_EVENT;
    
#ifdef MARKED_SECTION
    if (p_state->ms == MS_IGNORE)
	goto IGNORE_EVENT;
#endif

    /* tag filters */
    if (p_state->ignore_tags || p_state->report_tags || p_state->ignore_elements) {

	if (event == E_START || event == E_END) {
	    SV* tagname = p_state->tmp;

	    assert(num_tokens >= 1);
	    sv_setpvn(tagname, tokens[0].beg, tokens[0].end - tokens[0].beg);
	    if (utf8)
		SvUTF8_on(tagname);
	    else
		SvUTF8_off(tagname);
	    if (!CASE_SENSITIVE(p_state))
		sv_lower(aTHX_ tagname);

	    if (p_state->ignoring_element) {
		if (sv_eq(p_state->ignoring_element, tagname)) {
		    if (event == E_START)
			p_state->ignore_depth++;
		    else if (--p_state->ignore_depth == 0) {
			SvREFCNT_dec(p_state->ignoring_element);
			p_state->ignoring_element = 0;
		    }
		}
		goto IGNORE_EVENT;
	    }

	    if (p_state->ignore_elements &&
		hv_fetch_ent(p_state->ignore_elements, tagname, 0, 0))
	    {
		p_state->ignoring_element = newSVsv(tagname);
		p_state->ignore_depth = 1;
		goto IGNORE_EVENT;
	    }

	    if (p_state->ignore_tags &&
		hv_fetch_ent(p_state->ignore_tags, tagname, 0, 0))
	    {
		goto IGNORE_EVENT;
	    }
	    if (p_state->report_tags &&
		!hv_fetch_ent(p_state->report_tags, tagname, 0, 0))
	    {
		goto IGNORE_EVENT;
	    }
	}
	else if (p_state->ignoring_element) {
	    goto IGNORE_EVENT;
	}
    }

    h = &p_state->handlers[event];
    if (!h->cb) {
	/* event = E_DEFAULT; */
	h = &p_state->handlers[E_DEFAULT];
	if (!h->cb)
	    goto IGNORE_EVENT;
    }

    if (SvTYPE(h->cb) != SVt_PVAV && !SvTRUE(h->cb)) {
	/* FALSE scalar ('' or 0) means IGNORE this event */
	return;
    }

    if (p_state->unbroken_text && event == E_TEXT) {
	/* should buffer text */
	if (!p_state->pend_text)
	    p_state->pend_text = newSV(256);
	if (SvOK(p_state->pend_text)) {
	    if (p_state->is_cdata != p_state->pend_text_is_cdata) {
		flush_pending_text(p_state, self);
		SPAGAIN;
		goto INIT_PEND_TEXT;
	    }
	}
	else {
	INIT_PEND_TEXT:
	    p_state->pend_text_offset = offset;
	    p_state->pend_text_line = line;
	    p_state->pend_text_column = column;
	    p_state->pend_text_is_cdata = p_state->is_cdata;
	    sv_setpvn(p_state->pend_text, "", 0);
	    if (!utf8)
		SvUTF8_off(p_state->pend_text);
	}
#ifdef UNICODE_HTML_PARSER
	if (utf8 && !SvUTF8(p_state->pend_text))
	    sv_utf8_upgrade(p_state->pend_text);
	if (utf8 || !SvUTF8(p_state->pend_text)) {
	    sv_catpvn(p_state->pend_text, beg, end - beg);
	}
	else {
	    SV *tmp = newSVpvn(beg, end - beg);
	    sv_utf8_upgrade(tmp);
	    sv_catsv(p_state->pend_text, tmp);
	    SvREFCNT_dec(tmp);
	}
#else
	sv_catpvn(p_state->pend_text, beg, end - beg);
#endif
	return;
    }
    else if (p_state->pend_text && SvOK(p_state->pend_text)) {
	flush_pending_text(p_state, self);
	SPAGAIN;
    }

    /* At this point we have decided to generate an event callback */

    argspec = h->argspec ? SvPV(h->argspec, my_na) : "";

    if (SvTYPE(h->cb) == SVt_PVAV) {
	
	if (*argspec == ARG_FLAG_FLAT_ARRAY) {
	    argspec++;
	    array = (AV*)h->cb;
	}
	else {
	    /* start sub-array for accumulator array */
	    array = newAV();
	}
    }
    else {
	array = 0;
	if (*argspec == ARG_FLAG_FLAT_ARRAY)
	    argspec++;

	/* start argument stack for callback */
	ENTER;
	SAVETMPS;
	PUSHMARK(SP);
    }

    for (s = argspec; *s; s++) {
	SV* arg = 0;
	int push_arg = 1;
	enum argcode argcode = (enum argcode)*s;

	switch( argcode ) {

	case ARG_SELF:
	    arg = sv_mortalcopy(self);
	    break;

	case ARG_TOKENS:
	    if (num_tokens >= 1) {
		AV* av = newAV();
		SV* prev_token = &PL_sv_undef;
		int i;
		av_extend(av, num_tokens);
		for (i = 0; i < num_tokens; i++) {
		    if (tokens[i].beg) {
			prev_token = newSVpvn(tokens[i].beg, tokens[i].end-tokens[i].beg);
			if (utf8)
			    SvUTF8_on(prev_token);
			av_push(av, prev_token);
		    }
		    else { /* boolean */
			av_push(av, p_state->bool_attr_val
				? newSVsv(p_state->bool_attr_val)
				: newSVsv(prev_token));
		    }
		}
		arg = sv_2mortal(newRV_noinc((SV*)av));
	    }
	    break;

	case ARG_TOKENPOS:
	    if (num_tokens >= 1 && tokens[0].beg >= beg) {
		AV* av = newAV();
		int i;
		av_extend(av, num_tokens*2);
		for (i = 0; i < num_tokens; i++) {
		    if (tokens[i].beg) {
			av_push(av, newSViv(CHR_DIST(tokens[i].beg, beg)));
			av_push(av, newSViv(CHR_DIST(tokens[i].end, tokens[i].beg)));
		    }
		    else { /* boolean tag value */
			av_push(av, newSViv(0));
			av_push(av, newSViv(0));
		    }
		}
		arg = sv_2mortal(newRV_noinc((SV*)av));
	    }
	    break;

	case ARG_TOKEN0:
	case ARG_TAGNAME:
	    /* fall through */

	case ARG_TAG:
	    if (num_tokens >= 1) {
		arg = sv_2mortal(newSVpvn(tokens[0].beg,
					  tokens[0].end - tokens[0].beg));
		if (utf8)
		    SvUTF8_on(arg);
		if (!CASE_SENSITIVE(p_state) && argcode != ARG_TOKEN0)
		    sv_lower(aTHX_ arg);
		if (argcode == ARG_TAG && event != E_START) {
		    char *e_type = "!##/#?#";
		    sv_insert(arg, 0, 0, &e_type[event], 1);
		}
	    }
	    break;

	case ARG_ATTR:
	case ARG_ATTRARR:
	    if (event == E_START) {
		HV* hv;
		int i;
		if (argcode == ARG_ATTR) {
		    hv = newHV();
		    arg = sv_2mortal(newRV_noinc((SV*)hv));
		}
		else {
#ifdef __GNUC__
		    /* gcc -Wall reports this variable as possibly used uninitialized */
		    hv = 0;
#endif
		    push_arg = 0;  /* deal with argument pushing here */
		}

		for (i = 1; i < num_tokens; i += 2) {
		    SV* attrname = newSVpvn(tokens[i].beg,
					    tokens[i].end-tokens[i].beg);
		    SV* attrval;

		    if (utf8)
			SvUTF8_on(attrname);
		    if (tokens[i+1].beg) {
			char *beg = tokens[i+1].beg;
			STRLEN len = tokens[i+1].end - beg;
			if (*beg == '"' || *beg == '\'') {
			    assert(len >= 2 && *beg == beg[len-1]);
			    beg++; len -= 2;
			}
			attrval = newSVpvn(beg, len);
			if (utf8)
			    SvUTF8_on(attrval);
			if (!p_state->attr_encoded) {
#ifdef UNICODE_HTML_PARSER
			    if (p_state->utf8_mode)
				sv_utf8_decode(attrval);
#endif
			    decode_entities(aTHX_ attrval, p_state->entity2char, 0);
			    if (p_state->utf8_mode)
				SvUTF8_off(attrval);
			}
		    }
		    else { /* boolean */
			if (p_state->bool_attr_val)
			    attrval = newSVsv(p_state->bool_attr_val);
			else
			    attrval = newSVsv(attrname);
		    }

		    if (!CASE_SENSITIVE(p_state))
			sv_lower(aTHX_ attrname);

		    if (argcode == ARG_ATTR) {
			if (hv_exists_ent(hv, attrname, 0) ||
			    !hv_store_ent(hv, attrname, attrval, 0)) {
			    SvREFCNT_dec(attrval);
			}
			SvREFCNT_dec(attrname);
		    }
		    else { /* ARG_ATTRARR */
			if (array) {
			    av_push(array, attrname);
			    av_push(array, attrval);
			}
			else {
			    XPUSHs(sv_2mortal(attrname));
			    XPUSHs(sv_2mortal(attrval));
			}
		    }
		}
	    }
	    else if (argcode == ARG_ATTRARR) {
		push_arg = 0;
	    }
	    break;

	case ARG_ATTRSEQ:       /* (v2 compatibility stuff) */
	    if (event == E_START) {
		AV* av = newAV();
		int i;
		for (i = 1; i < num_tokens; i += 2) {
		    SV* attrname = newSVpvn(tokens[i].beg,
					    tokens[i].end-tokens[i].beg);
		    if (utf8)
			SvUTF8_on(attrname);
		    if (!CASE_SENSITIVE(p_state))
			sv_lower(aTHX_ attrname);
		    av_push(av, attrname);
		}
		arg = sv_2mortal(newRV_noinc((SV*)av));
	    }
	    break;
	
	case ARG_TEXT:
	    arg = sv_2mortal(newSVpvn(beg, end - beg));
	    if (utf8)
		SvUTF8_on(arg);
	    break;

	case ARG_DTEXT:
	    if (event == E_TEXT) {
		arg = sv_2mortal(newSVpvn(beg, end - beg));
		if (utf8)
		    SvUTF8_on(arg);
		if (!p_state->is_cdata) {
#ifdef UNICODE_HTML_PARSER
		    if (p_state->utf8_mode)
			sv_utf8_decode(arg);
#endif
		    decode_entities(aTHX_ arg, p_state->entity2char, 1);
		    if (p_state->utf8_mode)
			SvUTF8_off(arg);
		}
	    }
	    break;
      
	case ARG_IS_CDATA:
	    if (event == E_TEXT) {
		arg = boolSV(p_state->is_cdata);
	    }
	    break;

        case ARG_SKIPPED_TEXT:
	    arg = sv_2mortal(p_state->skipped_text);
	    p_state->skipped_text = newSVpvn("", 0);
            break;

	case ARG_OFFSET:
	    arg = sv_2mortal(newSViv(offset));
	    break;

	case ARG_OFFSET_END:
	    arg = sv_2mortal(newSViv(offset + CHR_DIST(end, beg)));
	    break;

	case ARG_LENGTH:
	    arg = sv_2mortal(newSViv(CHR_DIST(end, beg)));
	    break;

	case ARG_LINE:
	    arg = sv_2mortal(newSViv(line));
	    break;

	case ARG_COLUMN:
	    arg = sv_2mortal(newSViv(column));
	    break;

	case ARG_EVENT:
	    assert(event >= 0 && event < EVENT_COUNT);
	    arg = sv_2mortal(newSVpv(event_id_str[event], 0));
	    break;

	case ARG_LITERAL:
	{
	    int len = (unsigned char)s[1];
	    arg = sv_2mortal(newSVpvn(s+2, len));
	    if (SvUTF8(h->argspec))
		SvUTF8_on(arg);
	    s += len + 1;
	}
	break;

	case ARG_UNDEF:
	    arg = sv_mortalcopy(&PL_sv_undef);
	    break;
      
	default:
	    arg = sv_2mortal(newSVpvf("Bad argspec %d", *s));
	    break;
	}

	if (push_arg) {
	    if (!arg)
		arg = sv_mortalcopy(&PL_sv_undef);

	    if (array) {
		/* have to fix mortality here or add mortality to
		 * XPUSHs after removing it from the switch cases.
		 */
		av_push(array, SvREFCNT_inc(arg));
	    }
	    else {
		XPUSHs(arg);
	    }
	}
    }

    if (array) {
	if (array != (AV*)h->cb)
	    av_push((AV*)h->cb, newRV_noinc((SV*)array));
    }
    else {
	PUTBACK;

	if ((enum argcode)*argspec == ARG_SELF && !SvROK(h->cb)) {
	    char *method = SvPV(h->cb, my_na);
	    perl_call_method(method, G_DISCARD | G_EVAL | G_VOID);
	}
	else {
	    perl_call_sv(h->cb, G_DISCARD | G_EVAL | G_VOID);
	}

	if (SvTRUE(ERRSV)) {
	    RETHROW;
	}

	FREETMPS;
	LEAVE;
    }
    if (p_state->skipped_text)
	SvCUR_set(p_state->skipped_text, 0);
    return;

IGNORE_EVENT:
    if (p_state->skipped_text) {
	if (event != E_TEXT && p_state->pend_text && SvOK(p_state->pend_text))
	    flush_pending_text(p_state, self);
#ifdef UNICODE_HTML_PARSER
	if (utf8 && !SvUTF8(p_state->skipped_text))
	    sv_utf8_upgrade(p_state->skipped_text);
	if (utf8 || !SvUTF8(p_state->skipped_text)) {
#endif
	    sv_catpvn(p_state->skipped_text, beg, end - beg);
#ifdef UNICODE_HTML_PARSER
	}
	else {
	    SV *tmp = newSVpvn(beg, end - beg);
	    sv_utf8_upgrade(tmp);
	    sv_catsv(p_state->pend_text, tmp);
	    SvREFCNT_dec(tmp);
	}
#endif
    }
#undef CHR_DIST    
    return;
}