Example #1
0
char *
PJS_ConvertUC(
    pTHX_
    SV *sv,
    STRLEN *len
) {
    dSP;
    char *ret;
    STRLEN elen;
    SvPV_force(sv, elen);
    if(SvUTF8(sv) && !sv_utf8_downgrade(sv, 1)) {
	SV *svtmp;
	ENTER; SAVETMPS;
	PUSHMARK(SP);
	XPUSHs(sv_2mortal(newSVpv(PJS_STR_ENCODING, 0)));
	XPUSHs(sv_mortalcopy(sv));
	PUTBACK;
	call_pv("Encode::encode", G_SCALAR);
	SPAGAIN;
	svtmp = newSVsv(POPs);
	SAVEMORTALIZESV(svtmp);
	ret = SvPV(svtmp, elen);
	PUTBACK;
	FREETMPS; LEAVE;
	*len = -(elen / 2);
    } else {
	ret = SvPV(sv, elen);
	*len = elen;
    }
    return ret;
}
Example #2
0
char *
Perl_sv_pvbyten(pTHX_ SV *sv, STRLEN *lp)
{
    PERL_ARGS_ASSERT_SV_PVBYTEN;

    sv_utf8_downgrade(sv, FALSE);
    return sv_pvn(sv,lp);
}
Example #3
0
char *
Perl_sv_pvbyte(pTHX_ SV *sv)
{
    PERL_ARGS_ASSERT_SV_PVBYTE;

    sv_utf8_downgrade(sv, FALSE);
    return sv_pv(sv);
}
Example #4
0
/* This is the main routine to deserialize a structure.
 * It rolls up all the other "top level" routines into one
 */
SV *
srl_decode_into(pTHX_ srl_decoder_t *dec, SV *src, SV* into, UV start_offset)
{
    assert(dec != NULL);
    if (SvUTF8(src))
        sv_utf8_downgrade(src, 0);
    srl_begin_decoding(aTHX_ dec, src, start_offset);
    srl_read_header(aTHX_ dec);
    if (SRL_DEC_HAVE_OPTION(dec, SRL_F_DECODER_DECOMPRESS_SNAPPY)) {
        /* uncompress */
        uint32_t dest_len;
        SV *buf_sv;
        unsigned char *buf;
        unsigned char *old_pos;
        const ptrdiff_t sereal_header_len = dec->pos - dec->buf_start;
        const STRLEN compressed_packet_len =
                ( dec->proto_version_and_flags & SRL_PROTOCOL_ENCODING_MASK ) == SRL_PROTOCOL_ENCODING_SNAPPY_INCREMENTAL
                ? (STRLEN)srl_read_varint_uv_length(aTHX_ dec, " while reading compressed packet size")
                : (STRLEN)(dec->buf_end - dec->pos);
        int decompress_ok;
        int header_len;

        /* all decl's above here, or we break C89 compilers */

        dec->bytes_consumed= compressed_packet_len + (dec->pos - dec->buf_start);

        header_len = csnappy_get_uncompressed_length(
                            (char *)dec->pos,
                            compressed_packet_len,
                            &dest_len
                         );
        if (header_len == CSNAPPY_E_HEADER_BAD)
            SRL_ERROR("Invalid Snappy header in Snappy-compressed Sereal packet");

        /* Let perl clean this up. Yes, it's not the most efficient thing
         * ever, but it's just one mortal per full decompression, so not
         * a bottle-neck. */
        buf_sv = sv_2mortal( newSV(sereal_header_len + dest_len + 1 ));
        buf = (unsigned char *)SvPVX(buf_sv);

        /* FIXME probably unnecessary to copy the Sereal header! */
        Copy(dec->buf_start, buf, sereal_header_len, unsigned char);

        old_pos = dec->pos;
        dec->buf_start = buf;
        dec->pos = buf + sereal_header_len;
        dec->buf_end = dec->pos + dest_len;
        dec->buf_len = dest_len + sereal_header_len;

        decompress_ok = csnappy_decompress_noheader((char *)(old_pos + header_len),
                                                    compressed_packet_len - header_len,
                                                    (char *)dec->pos,
                                                    &dest_len);
        if (expect_false( decompress_ok != 0 ))
        {
            SRL_ERRORf1("Snappy decompression of Sereal packet payload failed with error %i!", decompress_ok);
        }
    }

    if (expect_true(!into)) {
        into= sv_2mortal(newSV_type(SVt_NULL));
    }
    srl_read_single_value(aTHX_ dec, into);
    /* assert(dec->pos == dec->buf_end); For now we disable this */
    if (expect_false(SRL_DEC_HAVE_OPTION(dec, SRL_F_DECODER_NEEDS_FINALIZE))) {
        srl_finalize_structure(aTHX_ dec);
    }

    /* If we aren't reading from a decompressed buffer we have to remember the number
     * of bytes used for the user to query. */
    if (dec->bytes_consumed == 0)
        dec->bytes_consumed = dec->pos - dec->buf_start;

    if (SRL_DEC_HAVE_OPTION(dec, SRL_F_DECODER_DESTRUCTIVE_INCREMENTAL)) {
        STRLEN len;
        char *pv= SvPV(src,len);
        /* check the length here? do something different if the string is now exhausted? */
        sv_chop(src, pv + dec->bytes_consumed);
    }

    srl_clear_decoder(aTHX_ dec);
    return into;
}
Example #5
0
 static char *
 my_sv_2pvbyte(pTHX_ register SV *sv, STRLEN *lp)
 {   
     sv_utf8_downgrade(sv,0);
     return SvPV(sv,*lp);
 }
/*
 * This is the main Load function.
 * It takes a yaml stream and turns it into 0 or more Perl objects.
 */
void
Load(SV *yaml_sv)
{
    dXSARGS;
    perl_yaml_loader_t loader;
    SV *node;
    const unsigned char *yaml_str;
    STRLEN yaml_len;

    yaml_str = (const unsigned char *)SvPV_const(yaml_sv, yaml_len);

    if (DO_UTF8(yaml_sv)) {
        yaml_sv = sv_mortalcopy(yaml_sv);
        if (!sv_utf8_downgrade(yaml_sv, TRUE))
            croak("%s", "Wide character in YAML::XS::Load()");
        yaml_str = (const unsigned char *)SvPV_const(yaml_sv, yaml_len);
    }

    sp = mark;
    if (0 && (items || ax)) {} /* XXX Quiet the -Wall warnings for now. */

    yaml_parser_initialize(&loader.parser);
    loader.document = 0;
    yaml_parser_set_input_string(
        &loader.parser,
        yaml_str,
        yaml_len
    );

    /* Get the first event. Must be a STREAM_START */
    if (!yaml_parser_parse(&loader.parser, &loader.event))
        goto load_error;
    if (loader.event.type != YAML_STREAM_START_EVENT)
        croak("%sExpected STREAM_START_EVENT; Got: %d != %d",
            ERRMSG,
            loader.event.type,
            YAML_STREAM_START_EVENT
         );

    loader.anchors = newHV();
    sv_2mortal((SV *)loader.anchors);

    /* Keep calling load_node until end of stream */
    while (1) {
        loader.document++;
        /* We are through with the previous event - delete it! */
        yaml_event_delete(&loader.event);
        if (!yaml_parser_parse(&loader.parser, &loader.event))
            goto load_error;
        if (loader.event.type == YAML_STREAM_END_EVENT)
            break;
        node = load_node(&loader);
        /* We are through with the previous event - delete it! */
        yaml_event_delete(&loader.event);
        hv_clear(loader.anchors);
        if (! node) break;
        XPUSHs(sv_2mortal(node));
        if (!yaml_parser_parse(&loader.parser, &loader.event))
            goto load_error;
        if (loader.event.type != YAML_DOCUMENT_END_EVENT)
            croak("%sExpected DOCUMENT_END_EVENT", ERRMSG);
    }

    /* Make sure the last event is a STREAM_END */
    if (loader.event.type != YAML_STREAM_END_EVENT)
        croak("%sExpected STREAM_END_EVENT; Got: %d != %d",
            ERRMSG,
            loader.event.type,
            YAML_STREAM_END_EVENT
         );
    yaml_parser_delete(&loader.parser);
    PUTBACK;
    return;

load_error:
    croak("%s", loader_error_msg(&loader, NULL));
}
Example #7
0
EXTERN void
parse(pTHX_
      PSTATE* p_state,
      SV* chunk,
      SV* self)
{
    char *s, *beg, *end;
    U32 utf8 = 0;
    STRLEN len;

    if (!chunk) {
	/* eof */
	char empty[1];
	if (p_state->buf && SvOK(p_state->buf)) {
	    /* flush it */
	    s = SvPV(p_state->buf, len);
	    end = s + len;
	    utf8 = SvUTF8(p_state->buf);
	    assert(len);

	    while (s < end) {
		if (p_state->literal_mode) {
		    if (strEQ(p_state->literal_mode, "plaintext") && !p_state->closing_plaintext)
			break;
		    p_state->pending_end_tag = p_state->literal_mode;
		    p_state->literal_mode = 0;
		    s = parse_buf(aTHX_ p_state, s, end, utf8, self);
		    continue;
		}

		if (!p_state->strict_comment && !p_state->no_dash_dash_comment_end && *s == '<') {
		    p_state->no_dash_dash_comment_end = 1;
		    s = parse_buf(aTHX_ p_state, s, end, utf8, self);
		    continue;
		}

		if (!p_state->strict_comment && *s == '<') {
		    /* some kind of unterminated markup.  Report rest as as comment */
		    token_pos_t token;
		    token.beg = s + 1;
		    token.end = end;
		    report_event(p_state, E_COMMENT, s, end, utf8, &token, 1, self);
		    s = end;
		}

		break;
	    }

	    if (s < end) {
		/* report rest as text */
		report_event(p_state, E_TEXT, s, end, utf8, 0, 0, self);
	    }
	    
	    SvREFCNT_dec(p_state->buf);
	    p_state->buf = 0;
	}
	if (p_state->pend_text && SvOK(p_state->pend_text))
	    flush_pending_text(p_state, self);

	if (p_state->ignoring_element) {
	    /* document not balanced */
	    SvREFCNT_dec(p_state->ignoring_element);
	    p_state->ignoring_element = 0;
	}
	report_event(p_state, E_END_DOCUMENT, empty, empty, 0, 0, 0, self);

	/* reset state */
	p_state->offset = 0;
	if (p_state->line)
	    p_state->line = 1;
	p_state->column = 0;
	p_state->literal_mode = 0;
	p_state->is_cdata = 0;
	return;
    }

#ifdef UNICODE_HTML_PARSER
    if (p_state->utf8_mode)
	sv_utf8_downgrade(chunk, 0);
#endif

    if (p_state->buf && SvOK(p_state->buf)) {
	sv_catsv(p_state->buf, chunk);
	beg = SvPV(p_state->buf, len);
	utf8 = SvUTF8(p_state->buf);
    }
    else {
	beg = SvPV(chunk, len);
	utf8 = SvUTF8(chunk);
	if (p_state->offset == 0) {
	    report_event(p_state, E_START_DOCUMENT, beg, beg, 0, 0, 0, self);

	    /* Print warnings if we find unexpected Unicode BOM forms */
#ifdef UNICODE_HTML_PARSER
	    if (DOWARN &&
		p_state->argspec_entity_decode &&
		!p_state->utf8_mode && (
                 (!utf8 && len >= 3 && strnEQ(beg, "\xEF\xBB\xBF", 3)) ||
		 (utf8 && len >= 6 && strnEQ(beg, "\xC3\xAF\xC2\xBB\xC2\xBF", 6)) ||
		 (!utf8 && probably_utf8_chunk(aTHX_ beg, len))
		)
	       )
	    {
		warn("Parsing of undecoded UTF-8 will give garbage when decoding entities");
	    }
	    if (DOWARN && utf8 && len >= 2 && strnEQ(beg, "\xFF\xFE", 2)) {
		warn("Parsing string decoded with wrong endianess");
	    }
#endif
	    if (DOWARN) {
		if (!utf8 && len >= 4 &&
		    (strnEQ(beg, "\x00\x00\xFE\xFF", 4) ||
		     strnEQ(beg, "\xFE\xFF\x00\x00", 4))
		    )
		{
		    warn("Parsing of undecoded UTF-32");
		}
		else if (!utf8 && len >= 2 &&
			 (strnEQ(beg, "\xFE\xFF", 2) || strnEQ(beg, "\xFF\xFE", 2))
		    )
		{
		    warn("Parsing of undecoded UTF-16");
		}
	    }
	}
    }

    if (!len)
	return; /* nothing to do */

    end = beg + len;
    s = parse_buf(aTHX_ p_state, beg, end, utf8, self);

    if (s == end || p_state->eof) {
	if (p_state->buf) {
	    SvOK_off(p_state->buf);
	}
    }
    else {
	/* need to keep rest in buffer */
	if (p_state->buf) {
	    /* chop off some chars at the beginning */
	    if (SvOK(p_state->buf)) {
		sv_chop(p_state->buf, s);
	    }
	    else {
		sv_setpvn(p_state->buf, s, end - s);
		if (utf8)
		    SvUTF8_on(p_state->buf);
		else
		    SvUTF8_off(p_state->buf);
	    }
	}
	else {
	    p_state->buf = newSVpv(s, end - s);
	    if (utf8)
		SvUTF8_on(p_state->buf);
	}
    }
    return;
}