예제 #1
0
static int do_decompress(FILE *ifile, FILE *ofile)
{
	char *ibuf, *obuf;
	uint32_t ilen, olen;
	int status, retval = 0;

	if (!(ibuf = (char *)malloc(MAX_INPUT_SIZE))) {
		fprintf(stderr, "malloc failed to allocate %d.\n", MAX_INPUT_SIZE);
		fclose(ifile);
		retval = 4;
		goto out;
	}

	ilen = fread(ibuf, 1, MAX_INPUT_SIZE, ifile);
	if (!feof(ifile)) {
		fprintf(stderr, "input was longer than %d, aborting.\n", MAX_INPUT_SIZE);
		free(ibuf);
		fclose(ifile);
		retval = 5;
		goto out;
	}
	fclose(ifile);

	if ((status = csnappy_get_uncompressed_length(ibuf, ilen, &olen)) < 0) {
		fprintf(stderr, "snappy_get_uncompressed_length returned %d.\n", status);
		free(ibuf);
		retval = 6;
		goto out;
	}

	if (!(obuf = (char *)malloc(olen))) {
		fprintf(stderr, "malloc failed to allocate %d.\n", (int)olen);
		free(ibuf);
		retval = 4;
		goto out;
	}

	status = csnappy_decompress(ibuf, ilen, obuf, olen);
	free(ibuf);
	if (status != CSNAPPY_E_OK) {
		fprintf(stderr, "snappy_decompress returned %d.\n", status);
		free(obuf);
		retval = 7;
		goto out;
	}

	fwrite(obuf, 1, olen, ofile);
	free(obuf);
out:
	fclose(ofile);
	return retval;
}
예제 #2
0
/* This is the main routine to deserialize a structure.
 * It rolls up all the other "top level" routines into one
 */
SV *
srl_decode_into(pTHX_ srl_decoder_t *dec, SV *src, SV* into, UV start_offset)
{
    assert(dec != NULL);
    if (SvUTF8(src))
        sv_utf8_downgrade(src, 0);
    srl_begin_decoding(aTHX_ dec, src, start_offset);
    srl_read_header(aTHX_ dec);
    if (SRL_DEC_HAVE_OPTION(dec, SRL_F_DECODER_DECOMPRESS_SNAPPY)) {
        /* uncompress */
        uint32_t dest_len;
        SV *buf_sv;
        unsigned char *buf;
        unsigned char *old_pos;
        const ptrdiff_t sereal_header_len = dec->pos - dec->buf_start;
        const STRLEN compressed_packet_len =
                ( dec->proto_version_and_flags & SRL_PROTOCOL_ENCODING_MASK ) == SRL_PROTOCOL_ENCODING_SNAPPY_INCREMENTAL
                ? (STRLEN)srl_read_varint_uv_length(aTHX_ dec, " while reading compressed packet size")
                : (STRLEN)(dec->buf_end - dec->pos);
        int decompress_ok;
        int header_len;

        /* all decl's above here, or we break C89 compilers */

        dec->bytes_consumed= compressed_packet_len + (dec->pos - dec->buf_start);

        header_len = csnappy_get_uncompressed_length(
                            (char *)dec->pos,
                            compressed_packet_len,
                            &dest_len
                         );
        if (header_len == CSNAPPY_E_HEADER_BAD)
            SRL_ERROR("Invalid Snappy header in Snappy-compressed Sereal packet");

        /* Let perl clean this up. Yes, it's not the most efficient thing
         * ever, but it's just one mortal per full decompression, so not
         * a bottle-neck. */
        buf_sv = sv_2mortal( newSV(sereal_header_len + dest_len + 1 ));
        buf = (unsigned char *)SvPVX(buf_sv);

        /* FIXME probably unnecessary to copy the Sereal header! */
        Copy(dec->buf_start, buf, sereal_header_len, unsigned char);

        old_pos = dec->pos;
        dec->buf_start = buf;
        dec->pos = buf + sereal_header_len;
        dec->buf_end = dec->pos + dest_len;
        dec->buf_len = dest_len + sereal_header_len;

        decompress_ok = csnappy_decompress_noheader((char *)(old_pos + header_len),
                                                    compressed_packet_len - header_len,
                                                    (char *)dec->pos,
                                                    &dest_len);
        if (expect_false( decompress_ok != 0 ))
        {
            SRL_ERRORf1("Snappy decompression of Sereal packet payload failed with error %i!", decompress_ok);
        }
    }

    if (expect_true(!into)) {
        into= sv_2mortal(newSV_type(SVt_NULL));
    }
    srl_read_single_value(aTHX_ dec, into);
    /* assert(dec->pos == dec->buf_end); For now we disable this */
    if (expect_false(SRL_DEC_HAVE_OPTION(dec, SRL_F_DECODER_NEEDS_FINALIZE))) {
        srl_finalize_structure(aTHX_ dec);
    }

    /* If we aren't reading from a decompressed buffer we have to remember the number
     * of bytes used for the user to query. */
    if (dec->bytes_consumed == 0)
        dec->bytes_consumed = dec->pos - dec->buf_start;

    if (SRL_DEC_HAVE_OPTION(dec, SRL_F_DECODER_DESTRUCTIVE_INCREMENTAL)) {
        STRLEN len;
        char *pv= SvPV(src,len);
        /* check the length here? do something different if the string is now exhausted? */
        sv_chop(src, pv + dec->bytes_consumed);
    }

    srl_clear_decoder(aTHX_ dec);
    return into;
}
예제 #3
0
void _parse_header(pTHX_ srl_splitter_t *splitter) {
    int magic_string = 1;
    int high_magic_string = 1;

    U8 version_encoding;
    U8 version;
    U8 encoding_flags;
    UV header_len;

    int is_zlib_encoded = 0;
    int is_snappy_encoded = 0;
    int is_snappyincr_encoded = 0;

    // SRL_MAGIC_STRLEN + PROTOCOL_LENGTH + OPTIONAL-HEADER-SIZE(at least 1 byte) + DATA(at least 1 byte)
    if (splitter->input_len < SRL_MAGIC_STRLEN + 1 + 1 + 1){
        croak("input Sereal string lacks data");
    } else if ( (high_magic_string = strncmp(splitter->input_str, SRL_MAGIC_STRING, SRL_MAGIC_STRLEN))
                  && (magic_string = strncmp(splitter->input_str, SRL_MAGIC_STRING_HIGHBIT, SRL_MAGIC_STRLEN)) ) {
        croak("input Sereal string has wrong Sereal magic");
    }

    splitter->pos += SRL_MAGIC_STRLEN;

    version_encoding = (U8)*(splitter->pos);
    version = (U8)(version_encoding & SRL_PROTOCOL_VERSION_MASK);
    encoding_flags = (U8)(version_encoding & SRL_PROTOCOL_ENCODING_MASK);

    if (      version <= 0
              || ( version < 3 && high_magic_string )
              || ( version > 2 && magic_string ) ) {
        croak("unsupported Sereal versions/protocol");
    }

    switch(encoding_flags) {

   case SRL_PROTOCOL_ENCODING_RAW:
        /* no op */
        SRL_SPLITTER_TRACE("encoding is raw %s", "");
        break;

    case SRL_PROTOCOL_ENCODING_SNAPPY:
        SRL_SPLITTER_TRACE("encoding is snappy %s", "");
        is_snappy_encoded = 1;
        break;

    case SRL_PROTOCOL_ENCODING_SNAPPY_INCREMENTAL:
        SRL_SPLITTER_TRACE("encoding is snappy incr %s", "");
        is_snappy_encoded = is_snappyincr_encoded = 1;
        break;

    case SRL_PROTOCOL_ENCODING_ZLIB:
        SRL_SPLITTER_TRACE("encoding is zlib %s", "");
        is_zlib_encoded = 1;
        break;

    default:
        croak("Sereal document encoded in an unknown format");
    }

    SRL_SPLITTER_TRACE("header version is %hhu", version);

    // move after protocol version
    splitter->pos += 1;
    
    header_len= _read_varint_uv_nocheck(splitter);

    SRL_SPLITTER_TRACE("header len is %lu", header_len);

    //TODO: add code for processing the header
    splitter->pos += header_len;

    if (version < 2) {
        splitter->input_body_pos = splitter->input_str;
    } else {
        splitter->input_body_pos = splitter->pos;
    }

    if (is_snappy_encoded) {
        UV compressed_len;
        uint32_t uncompressed_len;
        int decompress_ok;
        char * new_input_str;

        if (is_snappyincr_encoded) {
            compressed_len = _read_varint_uv_nocheck(splitter);
        } else {
            compressed_len = splitter->input_len - (splitter->pos - splitter->input_str);
        }
        SRL_SPLITTER_TRACE("snappy compressed len %"UVuf, compressed_len);
        // splitter->pos is now at start of compressed payload

        int snappy_header_len;
        char *old_pos;
        old_pos = splitter->pos;
        snappy_header_len = csnappy_get_uncompressed_length(
            (char *)old_pos,
            compressed_len,
            &uncompressed_len
        );
        if (snappy_header_len == CSNAPPY_E_HEADER_BAD) {
            croak("invalid Snappy header in Snappy-compressed Sereal packet");
        }

        // allocate a new SV for uncompressed data
        SvREFCNT_dec(splitter->input_sv);
        splitter->input_sv = newSVpvs("");
        new_input_str = SvGROW(splitter->input_sv, uncompressed_len);

        decompress_ok = csnappy_decompress_noheader((char *) (old_pos + snappy_header_len),
                                                    compressed_len - snappy_header_len,
                                                    (char *) new_input_str,
                                                    &uncompressed_len);
        if ( decompress_ok != 0 ) {
            croak("Snappy decompression of Sereal packet payload failed");
        }

        splitter->input_str = new_input_str;
        SRL_SPLITTER_TRACE(" decompress OK: uncompressed length: %d\n", uncompressed_len);

        splitter->pos = splitter->input_str;;
        splitter->input_len = uncompressed_len;
        splitter->input_body_pos = splitter->pos;

    } else if (is_zlib_encoded) {

        UV uncompressed_len = _read_varint_uv_nocheck(splitter);
        UV compressed_len = _read_varint_uv_nocheck(splitter);
        char * new_input_str;

        // splitter->pos is now at start of compressed payload
        SRL_SPLITTER_TRACE("unzipping %s", "");
        SRL_SPLITTER_TRACE("compressed_len : %" UVuf, compressed_len);
        SRL_SPLITTER_TRACE("uncompressed_len : %" UVuf, uncompressed_len);

                 
        mz_ulong tmp = uncompressed_len;

        // allocate a new SV for uncompressed data
        SvREFCNT_dec(splitter->input_sv);
        splitter->input_sv = newSVpvs("");
        new_input_str = SvGROW(splitter->input_sv, uncompressed_len);

        char *compressed = splitter->pos;

        int decompress_ok = mz_uncompress( (unsigned char *) new_input_str,
                                           &tmp,
                                           (const unsigned char *) compressed,
                                           compressed_len );

        if (decompress_ok != Z_OK)
            croak("ZLIB decompression of Sereal packet payload failed");

        splitter->input_str = new_input_str;
        SRL_SPLITTER_TRACE(" decompress OK: length %lu\n", uncompressed_len);

        splitter->pos = splitter->input_str;
        splitter->input_len = (STRLEN)tmp;
        splitter->input_body_pos = splitter->pos;

    }
}