static int do_decompress(FILE *ifile, FILE *ofile) { char *ibuf, *obuf; uint32_t ilen, olen; int status, retval = 0; if (!(ibuf = (char *)malloc(MAX_INPUT_SIZE))) { fprintf(stderr, "malloc failed to allocate %d.\n", MAX_INPUT_SIZE); fclose(ifile); retval = 4; goto out; } ilen = fread(ibuf, 1, MAX_INPUT_SIZE, ifile); if (!feof(ifile)) { fprintf(stderr, "input was longer than %d, aborting.\n", MAX_INPUT_SIZE); free(ibuf); fclose(ifile); retval = 5; goto out; } fclose(ifile); if ((status = csnappy_get_uncompressed_length(ibuf, ilen, &olen)) < 0) { fprintf(stderr, "snappy_get_uncompressed_length returned %d.\n", status); free(ibuf); retval = 6; goto out; } if (!(obuf = (char *)malloc(olen))) { fprintf(stderr, "malloc failed to allocate %d.\n", (int)olen); free(ibuf); retval = 4; goto out; } status = csnappy_decompress(ibuf, ilen, obuf, olen); free(ibuf); if (status != CSNAPPY_E_OK) { fprintf(stderr, "snappy_decompress returned %d.\n", status); free(obuf); retval = 7; goto out; } fwrite(obuf, 1, olen, ofile); free(obuf); out: fclose(ofile); return retval; }
/* This is the main routine to deserialize a structure. * It rolls up all the other "top level" routines into one */ SV * srl_decode_into(pTHX_ srl_decoder_t *dec, SV *src, SV* into, UV start_offset) { assert(dec != NULL); if (SvUTF8(src)) sv_utf8_downgrade(src, 0); srl_begin_decoding(aTHX_ dec, src, start_offset); srl_read_header(aTHX_ dec); if (SRL_DEC_HAVE_OPTION(dec, SRL_F_DECODER_DECOMPRESS_SNAPPY)) { /* uncompress */ uint32_t dest_len; SV *buf_sv; unsigned char *buf; unsigned char *old_pos; const ptrdiff_t sereal_header_len = dec->pos - dec->buf_start; const STRLEN compressed_packet_len = ( dec->proto_version_and_flags & SRL_PROTOCOL_ENCODING_MASK ) == SRL_PROTOCOL_ENCODING_SNAPPY_INCREMENTAL ? (STRLEN)srl_read_varint_uv_length(aTHX_ dec, " while reading compressed packet size") : (STRLEN)(dec->buf_end - dec->pos); int decompress_ok; int header_len; /* all decl's above here, or we break C89 compilers */ dec->bytes_consumed= compressed_packet_len + (dec->pos - dec->buf_start); header_len = csnappy_get_uncompressed_length( (char *)dec->pos, compressed_packet_len, &dest_len ); if (header_len == CSNAPPY_E_HEADER_BAD) SRL_ERROR("Invalid Snappy header in Snappy-compressed Sereal packet"); /* Let perl clean this up. Yes, it's not the most efficient thing * ever, but it's just one mortal per full decompression, so not * a bottle-neck. */ buf_sv = sv_2mortal( newSV(sereal_header_len + dest_len + 1 )); buf = (unsigned char *)SvPVX(buf_sv); /* FIXME probably unnecessary to copy the Sereal header! */ Copy(dec->buf_start, buf, sereal_header_len, unsigned char); old_pos = dec->pos; dec->buf_start = buf; dec->pos = buf + sereal_header_len; dec->buf_end = dec->pos + dest_len; dec->buf_len = dest_len + sereal_header_len; decompress_ok = csnappy_decompress_noheader((char *)(old_pos + header_len), compressed_packet_len - header_len, (char *)dec->pos, &dest_len); if (expect_false( decompress_ok != 0 )) { SRL_ERRORf1("Snappy decompression of Sereal packet payload failed with error %i!", decompress_ok); } } if (expect_true(!into)) { into= sv_2mortal(newSV_type(SVt_NULL)); } srl_read_single_value(aTHX_ dec, into); /* assert(dec->pos == dec->buf_end); For now we disable this */ if (expect_false(SRL_DEC_HAVE_OPTION(dec, SRL_F_DECODER_NEEDS_FINALIZE))) { srl_finalize_structure(aTHX_ dec); } /* If we aren't reading from a decompressed buffer we have to remember the number * of bytes used for the user to query. */ if (dec->bytes_consumed == 0) dec->bytes_consumed = dec->pos - dec->buf_start; if (SRL_DEC_HAVE_OPTION(dec, SRL_F_DECODER_DESTRUCTIVE_INCREMENTAL)) { STRLEN len; char *pv= SvPV(src,len); /* check the length here? do something different if the string is now exhausted? */ sv_chop(src, pv + dec->bytes_consumed); } srl_clear_decoder(aTHX_ dec); return into; }
void _parse_header(pTHX_ srl_splitter_t *splitter) { int magic_string = 1; int high_magic_string = 1; U8 version_encoding; U8 version; U8 encoding_flags; UV header_len; int is_zlib_encoded = 0; int is_snappy_encoded = 0; int is_snappyincr_encoded = 0; // SRL_MAGIC_STRLEN + PROTOCOL_LENGTH + OPTIONAL-HEADER-SIZE(at least 1 byte) + DATA(at least 1 byte) if (splitter->input_len < SRL_MAGIC_STRLEN + 1 + 1 + 1){ croak("input Sereal string lacks data"); } else if ( (high_magic_string = strncmp(splitter->input_str, SRL_MAGIC_STRING, SRL_MAGIC_STRLEN)) && (magic_string = strncmp(splitter->input_str, SRL_MAGIC_STRING_HIGHBIT, SRL_MAGIC_STRLEN)) ) { croak("input Sereal string has wrong Sereal magic"); } splitter->pos += SRL_MAGIC_STRLEN; version_encoding = (U8)*(splitter->pos); version = (U8)(version_encoding & SRL_PROTOCOL_VERSION_MASK); encoding_flags = (U8)(version_encoding & SRL_PROTOCOL_ENCODING_MASK); if ( version <= 0 || ( version < 3 && high_magic_string ) || ( version > 2 && magic_string ) ) { croak("unsupported Sereal versions/protocol"); } switch(encoding_flags) { case SRL_PROTOCOL_ENCODING_RAW: /* no op */ SRL_SPLITTER_TRACE("encoding is raw %s", ""); break; case SRL_PROTOCOL_ENCODING_SNAPPY: SRL_SPLITTER_TRACE("encoding is snappy %s", ""); is_snappy_encoded = 1; break; case SRL_PROTOCOL_ENCODING_SNAPPY_INCREMENTAL: SRL_SPLITTER_TRACE("encoding is snappy incr %s", ""); is_snappy_encoded = is_snappyincr_encoded = 1; break; case SRL_PROTOCOL_ENCODING_ZLIB: SRL_SPLITTER_TRACE("encoding is zlib %s", ""); is_zlib_encoded = 1; break; default: croak("Sereal document encoded in an unknown format"); } SRL_SPLITTER_TRACE("header version is %hhu", version); // move after protocol version splitter->pos += 1; header_len= _read_varint_uv_nocheck(splitter); SRL_SPLITTER_TRACE("header len is %lu", header_len); //TODO: add code for processing the header splitter->pos += header_len; if (version < 2) { splitter->input_body_pos = splitter->input_str; } else { splitter->input_body_pos = splitter->pos; } if (is_snappy_encoded) { UV compressed_len; uint32_t uncompressed_len; int decompress_ok; char * new_input_str; if (is_snappyincr_encoded) { compressed_len = _read_varint_uv_nocheck(splitter); } else { compressed_len = splitter->input_len - (splitter->pos - splitter->input_str); } SRL_SPLITTER_TRACE("snappy compressed len %"UVuf, compressed_len); // splitter->pos is now at start of compressed payload int snappy_header_len; char *old_pos; old_pos = splitter->pos; snappy_header_len = csnappy_get_uncompressed_length( (char *)old_pos, compressed_len, &uncompressed_len ); if (snappy_header_len == CSNAPPY_E_HEADER_BAD) { croak("invalid Snappy header in Snappy-compressed Sereal packet"); } // allocate a new SV for uncompressed data SvREFCNT_dec(splitter->input_sv); splitter->input_sv = newSVpvs(""); new_input_str = SvGROW(splitter->input_sv, uncompressed_len); decompress_ok = csnappy_decompress_noheader((char *) (old_pos + snappy_header_len), compressed_len - snappy_header_len, (char *) new_input_str, &uncompressed_len); if ( decompress_ok != 0 ) { croak("Snappy decompression of Sereal packet payload failed"); } splitter->input_str = new_input_str; SRL_SPLITTER_TRACE(" decompress OK: uncompressed length: %d\n", uncompressed_len); splitter->pos = splitter->input_str;; splitter->input_len = uncompressed_len; splitter->input_body_pos = splitter->pos; } else if (is_zlib_encoded) { UV uncompressed_len = _read_varint_uv_nocheck(splitter); UV compressed_len = _read_varint_uv_nocheck(splitter); char * new_input_str; // splitter->pos is now at start of compressed payload SRL_SPLITTER_TRACE("unzipping %s", ""); SRL_SPLITTER_TRACE("compressed_len : %" UVuf, compressed_len); SRL_SPLITTER_TRACE("uncompressed_len : %" UVuf, uncompressed_len); mz_ulong tmp = uncompressed_len; // allocate a new SV for uncompressed data SvREFCNT_dec(splitter->input_sv); splitter->input_sv = newSVpvs(""); new_input_str = SvGROW(splitter->input_sv, uncompressed_len); char *compressed = splitter->pos; int decompress_ok = mz_uncompress( (unsigned char *) new_input_str, &tmp, (const unsigned char *) compressed, compressed_len ); if (decompress_ok != Z_OK) croak("ZLIB decompression of Sereal packet payload failed"); splitter->input_str = new_input_str; SRL_SPLITTER_TRACE(" decompress OK: length %lu\n", uncompressed_len); splitter->pos = splitter->input_str; splitter->input_len = (STRLEN)tmp; splitter->input_body_pos = splitter->pos; } }