char * PJS_ConvertUC( pTHX_ SV *sv, STRLEN *len ) { dSP; char *ret; STRLEN elen; SvPV_force(sv, elen); if(SvUTF8(sv) && !sv_utf8_downgrade(sv, 1)) { SV *svtmp; ENTER; SAVETMPS; PUSHMARK(SP); XPUSHs(sv_2mortal(newSVpv(PJS_STR_ENCODING, 0))); XPUSHs(sv_mortalcopy(sv)); PUTBACK; call_pv("Encode::encode", G_SCALAR); SPAGAIN; svtmp = newSVsv(POPs); SAVEMORTALIZESV(svtmp); ret = SvPV(svtmp, elen); PUTBACK; FREETMPS; LEAVE; *len = -(elen / 2); } else { ret = SvPV(sv, elen); *len = elen; } return ret; }
char * Perl_sv_pvbyten(pTHX_ SV *sv, STRLEN *lp) { PERL_ARGS_ASSERT_SV_PVBYTEN; sv_utf8_downgrade(sv, FALSE); return sv_pvn(sv,lp); }
char * Perl_sv_pvbyte(pTHX_ SV *sv) { PERL_ARGS_ASSERT_SV_PVBYTE; sv_utf8_downgrade(sv, FALSE); return sv_pv(sv); }
/* This is the main routine to deserialize a structure. * It rolls up all the other "top level" routines into one */ SV * srl_decode_into(pTHX_ srl_decoder_t *dec, SV *src, SV* into, UV start_offset) { assert(dec != NULL); if (SvUTF8(src)) sv_utf8_downgrade(src, 0); srl_begin_decoding(aTHX_ dec, src, start_offset); srl_read_header(aTHX_ dec); if (SRL_DEC_HAVE_OPTION(dec, SRL_F_DECODER_DECOMPRESS_SNAPPY)) { /* uncompress */ uint32_t dest_len; SV *buf_sv; unsigned char *buf; unsigned char *old_pos; const ptrdiff_t sereal_header_len = dec->pos - dec->buf_start; const STRLEN compressed_packet_len = ( dec->proto_version_and_flags & SRL_PROTOCOL_ENCODING_MASK ) == SRL_PROTOCOL_ENCODING_SNAPPY_INCREMENTAL ? (STRLEN)srl_read_varint_uv_length(aTHX_ dec, " while reading compressed packet size") : (STRLEN)(dec->buf_end - dec->pos); int decompress_ok; int header_len; /* all decl's above here, or we break C89 compilers */ dec->bytes_consumed= compressed_packet_len + (dec->pos - dec->buf_start); header_len = csnappy_get_uncompressed_length( (char *)dec->pos, compressed_packet_len, &dest_len ); if (header_len == CSNAPPY_E_HEADER_BAD) SRL_ERROR("Invalid Snappy header in Snappy-compressed Sereal packet"); /* Let perl clean this up. Yes, it's not the most efficient thing * ever, but it's just one mortal per full decompression, so not * a bottle-neck. */ buf_sv = sv_2mortal( newSV(sereal_header_len + dest_len + 1 )); buf = (unsigned char *)SvPVX(buf_sv); /* FIXME probably unnecessary to copy the Sereal header! */ Copy(dec->buf_start, buf, sereal_header_len, unsigned char); old_pos = dec->pos; dec->buf_start = buf; dec->pos = buf + sereal_header_len; dec->buf_end = dec->pos + dest_len; dec->buf_len = dest_len + sereal_header_len; decompress_ok = csnappy_decompress_noheader((char *)(old_pos + header_len), compressed_packet_len - header_len, (char *)dec->pos, &dest_len); if (expect_false( decompress_ok != 0 )) { SRL_ERRORf1("Snappy decompression of Sereal packet payload failed with error %i!", decompress_ok); } } if (expect_true(!into)) { into= sv_2mortal(newSV_type(SVt_NULL)); } srl_read_single_value(aTHX_ dec, into); /* assert(dec->pos == dec->buf_end); For now we disable this */ if (expect_false(SRL_DEC_HAVE_OPTION(dec, SRL_F_DECODER_NEEDS_FINALIZE))) { srl_finalize_structure(aTHX_ dec); } /* If we aren't reading from a decompressed buffer we have to remember the number * of bytes used for the user to query. */ if (dec->bytes_consumed == 0) dec->bytes_consumed = dec->pos - dec->buf_start; if (SRL_DEC_HAVE_OPTION(dec, SRL_F_DECODER_DESTRUCTIVE_INCREMENTAL)) { STRLEN len; char *pv= SvPV(src,len); /* check the length here? do something different if the string is now exhausted? */ sv_chop(src, pv + dec->bytes_consumed); } srl_clear_decoder(aTHX_ dec); return into; }
static char * my_sv_2pvbyte(pTHX_ register SV *sv, STRLEN *lp) { sv_utf8_downgrade(sv,0); return SvPV(sv,*lp); }
/* * This is the main Load function. * It takes a yaml stream and turns it into 0 or more Perl objects. */ void Load(SV *yaml_sv) { dXSARGS; perl_yaml_loader_t loader; SV *node; const unsigned char *yaml_str; STRLEN yaml_len; yaml_str = (const unsigned char *)SvPV_const(yaml_sv, yaml_len); if (DO_UTF8(yaml_sv)) { yaml_sv = sv_mortalcopy(yaml_sv); if (!sv_utf8_downgrade(yaml_sv, TRUE)) croak("%s", "Wide character in YAML::XS::Load()"); yaml_str = (const unsigned char *)SvPV_const(yaml_sv, yaml_len); } sp = mark; if (0 && (items || ax)) {} /* XXX Quiet the -Wall warnings for now. */ yaml_parser_initialize(&loader.parser); loader.document = 0; yaml_parser_set_input_string( &loader.parser, yaml_str, yaml_len ); /* Get the first event. Must be a STREAM_START */ if (!yaml_parser_parse(&loader.parser, &loader.event)) goto load_error; if (loader.event.type != YAML_STREAM_START_EVENT) croak("%sExpected STREAM_START_EVENT; Got: %d != %d", ERRMSG, loader.event.type, YAML_STREAM_START_EVENT ); loader.anchors = newHV(); sv_2mortal((SV *)loader.anchors); /* Keep calling load_node until end of stream */ while (1) { loader.document++; /* We are through with the previous event - delete it! */ yaml_event_delete(&loader.event); if (!yaml_parser_parse(&loader.parser, &loader.event)) goto load_error; if (loader.event.type == YAML_STREAM_END_EVENT) break; node = load_node(&loader); /* We are through with the previous event - delete it! */ yaml_event_delete(&loader.event); hv_clear(loader.anchors); if (! node) break; XPUSHs(sv_2mortal(node)); if (!yaml_parser_parse(&loader.parser, &loader.event)) goto load_error; if (loader.event.type != YAML_DOCUMENT_END_EVENT) croak("%sExpected DOCUMENT_END_EVENT", ERRMSG); } /* Make sure the last event is a STREAM_END */ if (loader.event.type != YAML_STREAM_END_EVENT) croak("%sExpected STREAM_END_EVENT; Got: %d != %d", ERRMSG, loader.event.type, YAML_STREAM_END_EVENT ); yaml_parser_delete(&loader.parser); PUTBACK; return; load_error: croak("%s", loader_error_msg(&loader, NULL)); }
EXTERN void parse(pTHX_ PSTATE* p_state, SV* chunk, SV* self) { char *s, *beg, *end; U32 utf8 = 0; STRLEN len; if (!chunk) { /* eof */ char empty[1]; if (p_state->buf && SvOK(p_state->buf)) { /* flush it */ s = SvPV(p_state->buf, len); end = s + len; utf8 = SvUTF8(p_state->buf); assert(len); while (s < end) { if (p_state->literal_mode) { if (strEQ(p_state->literal_mode, "plaintext") && !p_state->closing_plaintext) break; p_state->pending_end_tag = p_state->literal_mode; p_state->literal_mode = 0; s = parse_buf(aTHX_ p_state, s, end, utf8, self); continue; } if (!p_state->strict_comment && !p_state->no_dash_dash_comment_end && *s == '<') { p_state->no_dash_dash_comment_end = 1; s = parse_buf(aTHX_ p_state, s, end, utf8, self); continue; } if (!p_state->strict_comment && *s == '<') { /* some kind of unterminated markup. Report rest as as comment */ token_pos_t token; token.beg = s + 1; token.end = end; report_event(p_state, E_COMMENT, s, end, utf8, &token, 1, self); s = end; } break; } if (s < end) { /* report rest as text */ report_event(p_state, E_TEXT, s, end, utf8, 0, 0, self); } SvREFCNT_dec(p_state->buf); p_state->buf = 0; } if (p_state->pend_text && SvOK(p_state->pend_text)) flush_pending_text(p_state, self); if (p_state->ignoring_element) { /* document not balanced */ SvREFCNT_dec(p_state->ignoring_element); p_state->ignoring_element = 0; } report_event(p_state, E_END_DOCUMENT, empty, empty, 0, 0, 0, self); /* reset state */ p_state->offset = 0; if (p_state->line) p_state->line = 1; p_state->column = 0; p_state->literal_mode = 0; p_state->is_cdata = 0; return; } #ifdef UNICODE_HTML_PARSER if (p_state->utf8_mode) sv_utf8_downgrade(chunk, 0); #endif if (p_state->buf && SvOK(p_state->buf)) { sv_catsv(p_state->buf, chunk); beg = SvPV(p_state->buf, len); utf8 = SvUTF8(p_state->buf); } else { beg = SvPV(chunk, len); utf8 = SvUTF8(chunk); if (p_state->offset == 0) { report_event(p_state, E_START_DOCUMENT, beg, beg, 0, 0, 0, self); /* Print warnings if we find unexpected Unicode BOM forms */ #ifdef UNICODE_HTML_PARSER if (DOWARN && p_state->argspec_entity_decode && !p_state->utf8_mode && ( (!utf8 && len >= 3 && strnEQ(beg, "\xEF\xBB\xBF", 3)) || (utf8 && len >= 6 && strnEQ(beg, "\xC3\xAF\xC2\xBB\xC2\xBF", 6)) || (!utf8 && probably_utf8_chunk(aTHX_ beg, len)) ) ) { warn("Parsing of undecoded UTF-8 will give garbage when decoding entities"); } if (DOWARN && utf8 && len >= 2 && strnEQ(beg, "\xFF\xFE", 2)) { warn("Parsing string decoded with wrong endianess"); } #endif if (DOWARN) { if (!utf8 && len >= 4 && (strnEQ(beg, "\x00\x00\xFE\xFF", 4) || strnEQ(beg, "\xFE\xFF\x00\x00", 4)) ) { warn("Parsing of undecoded UTF-32"); } else if (!utf8 && len >= 2 && (strnEQ(beg, "\xFE\xFF", 2) || strnEQ(beg, "\xFF\xFE", 2)) ) { warn("Parsing of undecoded UTF-16"); } } } } if (!len) return; /* nothing to do */ end = beg + len; s = parse_buf(aTHX_ p_state, beg, end, utf8, self); if (s == end || p_state->eof) { if (p_state->buf) { SvOK_off(p_state->buf); } } else { /* need to keep rest in buffer */ if (p_state->buf) { /* chop off some chars at the beginning */ if (SvOK(p_state->buf)) { sv_chop(p_state->buf, s); } else { sv_setpvn(p_state->buf, s, end - s); if (utf8) SvUTF8_on(p_state->buf); else SvUTF8_off(p_state->buf); } } else { p_state->buf = newSVpv(s, end - s); if (utf8) SvUTF8_on(p_state->buf); } } return; }