Beispiel #1
0
SRL_STATIC_INLINE void
srl_merge_binary_utf8(pTHX_ srl_merger_t *mrg, ptable_entry_ptr ptable_entry)
{
    int ok;
    UV length, total_length;
    strtable_entry_ptr strtable_entry;
    srl_reader_char_ptr tag_ptr = mrg->ibuf.pos;

    DEBUG_ASSERT_RDR_SANE(mrg->pibuf);
    DEBUG_ASSERT_BUF_SANE(&mrg->obuf);

    mrg->ibuf.pos++; // skip tag in input buffer
    length = srl_read_varint_uv_length(aTHX_ mrg->pibuf, " while reading BINARY or STR_UTF8");

    assert((mrg->ibuf.pos - tag_ptr) > 0);
    assert((mrg->ibuf.pos - tag_ptr) <= SRL_MAX_VARINT_LENGTH);
    total_length = length + (mrg->ibuf.pos - tag_ptr);

    strtable_entry = srl_lookup_string(aTHX_ mrg, tag_ptr, total_length, &ok);

    if (ok) {
        // issue COPY tag
        srl_buf_cat_varint(aTHX_ &mrg->obuf, SRL_HDR_COPY, strtable_entry->offset);
        mrg->ibuf.pos += length;

        if (expect_false(ptable_entry)) {
            // update value in ptable entry
            // This is needed because if any of following tags will reffer to
            // this one as COPY we need to point them to original string.
            // By Sereal spec a COPY tag cannot reffer to another COPY tag.
            ptable_entry->value = INT2PTR(void *, strtable_entry->offset);
        }
    } else if (strtable_entry) {
Beispiel #2
0
/* clone an encoder without current state */
srl_encoder_t *
srl_build_encoder_struct_alike(pTHX_ srl_encoder_t *proto)
{
    srl_encoder_t *enc;
    enc = srl_empty_encoder_struct(aTHX);
    enc->flags = proto->flags;
    DEBUG_ASSERT_BUF_SANE(enc);
    return enc;
}
Beispiel #3
0
SRL_STATIC_INLINE void
srl_merge_array(pTHX_ srl_merger_t *mrg, const U8 tag, UV length)
{
    unsigned int i;
    DEBUG_ASSERT_RDR_SANE(mrg->pibuf);
    DEBUG_ASSERT_BUF_SANE(&mrg->obuf);

    if (tag == SRL_HDR_ARRAY) {
        srl_buf_cat_varint(aTHX_ &mrg->obuf, tag, length);
    } else {
        srl_buf_cat_tag_nocheck(mrg, tag);
    }

    for (i = 0; i < length; ++i) {
        srl_merge_single_value(aTHX_ mrg);
    }

    DEBUG_ASSERT_RDR_SANE(mrg->pibuf);
    DEBUG_ASSERT_BUF_SANE(&mrg->obuf);
}
Beispiel #4
0
void
srl_merger_append_all(pTHX_ srl_merger_t *mrg, AV *src)
{
    SSize_t i;
    SV **svptr;
    STRLEN size = 0;
    SSize_t tidx = av_len(src);

    if (mrg->obuf_last_successfull_offset) {
        /* If obuf_last_successfull_offset is true then last merge
         * operation has failed. It means that some cleanup operation needs to
         * be done. */

        SRL_MERGER_TRACE("last merge operation has failed, need to do some cleanup (offset %"UVuf")",
                          mrg->obuf_last_successfull_offset);

        mrg->obuf.pos = mrg->obuf.body_pos + mrg->obuf_last_successfull_offset;
        srl_cleanup_dedup_tlbs(aTHX_ mrg, mrg->obuf_last_successfull_offset);
        DEBUG_ASSERT_BUF_SANE(&mrg->obuf);
    }

    for (i = 0; i <= tidx; ++i) {
        svptr = av_fetch(src, i, 0);
        if (expect_false(svptr == NULL))
            croak("av_fetch returned NULL");

        size += SvLEN(*svptr);
    }

    /* preallocate space in obuf in one go,
     * of course this's is very rough estimation */
    GROW_BUF(&mrg->obuf, size);

    for (i = 0; i <= tidx; ++i) {
        srl_set_input_buffer(aTHX_ mrg, *av_fetch(src, i, 0));
        srl_build_track_table(aTHX_ mrg);

        /* save current offset as last successfull */
        mrg->obuf_last_successfull_offset = BODY_POS_OFS(&mrg->obuf);

        mrg->recursion_depth = 0;
        mrg->ibuf.pos = mrg->ibuf.body_pos + 1;
        srl_merge_single_value(aTHX_ mrg);

        mrg->cnt_of_merged_elements++;
        mrg->obuf_last_successfull_offset = 0;
    }
}
Beispiel #5
0
void
srl_merger_append(pTHX_ srl_merger_t *mrg, SV *src)
{
    assert(mrg != NULL);

    srl_set_input_buffer(aTHX_ mrg, src);
    srl_build_track_table(aTHX_ mrg);

    if (mrg->obuf_last_successfull_offset) {
        /* If obuf_last_successfull_offset is true then last merge
         * operation has failed. It means that some cleanup operation needs to
         * be done. */

        SRL_MERGER_TRACE("last merge operation has failed, need to do some cleanup (offset %"UVuf")",
                          mrg->obuf_last_successfull_offset);

        mrg->obuf.pos = mrg->obuf.body_pos + mrg->obuf_last_successfull_offset;
        srl_cleanup_dedup_tlbs(aTHX_ mrg, mrg->obuf_last_successfull_offset);
        DEBUG_ASSERT_BUF_SANE(&mrg->obuf);
    }

    /* preallocate space in obuf,
     * but this is still not enough because due to
     * varint we might need more space in obug then size of ibuf */
    GROW_BUF(&mrg->obuf, (size_t) SRL_RDR_SIZE(mrg->pibuf));

    /* save current offset as last successfull */
    mrg->obuf_last_successfull_offset = BODY_POS_OFS(&mrg->obuf);

    mrg->recursion_depth = 0;
    mrg->ibuf.pos = mrg->ibuf.body_pos + 1;
    srl_merge_single_value(aTHX_ mrg);

    mrg->cnt_of_merged_elements++;
    mrg->obuf_last_successfull_offset = 0;
}
Beispiel #6
0
/* Builds the C-level configuration and state struct. */
srl_encoder_t *
srl_build_encoder_struct(pTHX_ HV *opt)
{
    srl_encoder_t *enc;
    SV **svp;

    enc = srl_empty_encoder_struct(aTHX);
    enc->flags = 0;

    /* load options */
    if (opt != NULL) {
        int undef_unknown = 0;
        /* SRL_F_SHARED_HASHKEYS on by default */
        svp = hv_fetchs(opt, "no_shared_hashkeys", 0);
        if ( !svp || !SvTRUE(*svp) )
            enc->flags |= SRL_F_SHARED_HASHKEYS;

        svp = hv_fetchs(opt, "croak_on_bless", 0);
        if ( svp && SvTRUE(*svp) )
            enc->flags |= SRL_F_CROAK_ON_BLESS;

        svp = hv_fetchs(opt, "snappy", 0);
        if ( svp && SvTRUE(*svp) )
            enc->flags |= SRL_F_COMPRESS_SNAPPY;

        svp = hv_fetchs(opt, "undef_unknown", 0);
        if ( svp && SvTRUE(*svp) ) {
            undef_unknown = 1;
            enc->flags |= SRL_F_UNDEF_UNKNOWN;
        }

        svp = hv_fetchs(opt, "stringify_unknown", 0);
        if ( svp && SvTRUE(*svp) ) {
            if (expect_false( undef_unknown )) {
                croak("'undef_unknown' and 'stringify_unknown' "
                      "options are mutually exclusive");
            }
            enc->flags |= SRL_F_STRINGIFY_UNKNOWN;
        }

        svp = hv_fetchs(opt, "warn_unknown", 0);
        if ( svp && SvTRUE(*svp) ) {
            enc->flags |= SRL_F_WARN_UNKNOWN;
            if (SvIV(*svp) < 0)
                enc->flags |= SRL_F_NOWARN_UNKNOWN_OVERLOAD;
        }

        svp = hv_fetchs(opt, "snappy_threshold", 0);
        if ( svp && SvOK(*svp) )
            enc->snappy_threshold = SvIV(*svp);
        else
            enc->snappy_threshold = 1024;
    }
    else {
        /* SRL_F_SHARED_HASHKEYS on by default */
        enc->flags |= SRL_F_SHARED_HASHKEYS;
    }

    DEBUG_ASSERT_BUF_SANE(enc);
    return enc;
}
Beispiel #7
0
SRL_STATIC_INLINE void
srl_merge_single_value(pTHX_ srl_merger_t *mrg)
{
    U8 tag;
    UV length, offset;
    ptable_entry_ptr ptable_entry;


read_again:
    assert(mrg->recursion_depth >= 0);
    DEBUG_ASSERT_RDR_SANE(mrg->pibuf);
    DEBUG_ASSERT_BUF_SANE(&mrg->obuf);

    if (expect_false(++mrg->recursion_depth > mrg->max_recursion_depth))
        SRL_RDR_ERRORf1(mrg->pibuf, "Reached recursion limit (%lu) during merging", mrg->max_recursion_depth);

    ptable_entry = NULL;
    if (expect_false(SRL_RDR_DONE(mrg->pibuf)))
        SRL_RDR_ERROR(mrg->pibuf, "Unexpected termination of input buffer");

    tag = *mrg->ibuf.pos & ~SRL_HDR_TRACK_FLAG;
    SRL_REPORT_CURRENT_TAG(mrg, tag);

    if (mrg->tracked_offsets && !srl_stack_empty(mrg->tracked_offsets)) {
        UV itag_offset = SRL_RDR_BODY_POS_OFS(mrg->pibuf);
        if (expect_false(itag_offset == srl_stack_peek_nocheck(aTHX_ mrg->tracked_offsets))) {
            // trackme case
            srl_stack_pop_nocheck(mrg->tracked_offsets);
            ptable_entry = srl_store_tracked_offset(aTHX_ mrg, itag_offset, BODY_POS_OFS(&mrg->obuf));
        }
    }

    if (tag <= SRL_HDR_NEG_HIGH) {
        srl_buf_cat_tag_nocheck(mrg, tag);
    } else if (tag >= SRL_HDR_ARRAYREF_LOW && tag <= SRL_HDR_ARRAYREF_HIGH) {
        srl_merge_array(aTHX_ mrg, tag, SRL_HDR_ARRAYREF_LEN_FROM_TAG(tag));
    } else if (tag >= SRL_HDR_HASHREF_LOW && tag <= SRL_HDR_HASHREF_HIGH) {
        srl_merge_hash(aTHX_ mrg, tag, SRL_HDR_HASHREF_LEN_FROM_TAG(tag));
    } else if (tag >= SRL_HDR_SHORT_BINARY_LOW) {
        srl_merge_short_binary(aTHX_ mrg, tag, ptable_entry);
    } else {
        switch (tag) {
            case SRL_HDR_VARINT:
            case SRL_HDR_ZIGZAG:
                srl_buf_cat_tag_nocheck(mrg, tag);
                srl_copy_varint(aTHX_ mrg);
                break;

            case SRL_HDR_FLOAT:         srl_buf_copy_content_nocheck(aTHX_ mrg, 5);  break;
            case SRL_HDR_DOUBLE:        srl_buf_copy_content_nocheck(aTHX_ mrg, 9);  break;
            case SRL_HDR_LONG_DOUBLE:   srl_buf_copy_content_nocheck(aTHX_ mrg, 17); break;

            case SRL_HDR_TRUE:
            case SRL_HDR_FALSE:
            case SRL_HDR_UNDEF:
            case SRL_HDR_CANONICAL_UNDEF:
                srl_buf_cat_tag_nocheck(mrg, tag);
                break;

            case SRL_HDR_BINARY:
            case SRL_HDR_STR_UTF8:
                srl_merge_binary_utf8(aTHX_ mrg, ptable_entry);
                break;

            case SRL_HDR_HASH:
                mrg->ibuf.pos++; // skip tag in input buffer
                length = srl_read_varint_uv_count(aTHX_ mrg->pibuf, " while reading ARRAY or HASH");
                srl_merge_hash(aTHX_ mrg, tag, length);
                break;

            case SRL_HDR_ARRAY:
                mrg->ibuf.pos++; // skip tag in input buffer
                length = srl_read_varint_uv_count(aTHX_ mrg->pibuf, " while reading ARRAY or HASH");
                srl_merge_array(aTHX_ mrg, tag, length);
                break;

            default:
                switch (tag) {
                    case SRL_HDR_COPY:
                    case SRL_HDR_REFP:
                    case SRL_HDR_ALIAS:
                        mrg->ibuf.pos++; // skip tag in input buffer
                        offset = srl_read_varint_uv_offset(aTHX_ mrg->pibuf, " while reading COPY/ALIAS/REFP");
                        offset = srl_lookup_tracked_offset(aTHX_ mrg, offset); // convert ibuf offset to obuf offset
                        srl_buf_cat_varint(aTHX_ &mrg->obuf, tag, offset);

                        if (tag == SRL_HDR_REFP || tag == SRL_HDR_ALIAS) {
                            SRL_SET_TRACK_FLAG(*(mrg->obuf.body_pos + offset));
                        }

                        break;

                    case SRL_HDR_REFN:
                    case SRL_HDR_WEAKEN:
                    case SRL_HDR_EXTEND:
                        srl_buf_cat_tag_nocheck(mrg, tag);
                        goto read_again;

                    case SRL_HDR_OBJECT:
                    case SRL_HDR_OBJECT_FREEZE:
                        srl_merge_object(aTHX_ mrg, tag);
                        break;

                    case SRL_HDR_REGEXP:
                        srl_buf_cat_tag_nocheck(mrg, tag);
                        srl_merge_stringish(aTHX_ mrg);

                        tag = *mrg->ibuf.pos;
                        if (expect_false(tag < SRL_HDR_SHORT_BINARY_LOW))
                            SRL_RDR_ERROR_UNEXPECTED(mrg->pibuf, tag, "SRL_HDR_SHORT_BINARY");

                        srl_buf_copy_content_nocheck(aTHX_ mrg, SRL_HDR_SHORT_BINARY_LEN_FROM_TAG(tag) + 1);
                        break;

                    case SRL_HDR_OBJECTV:
                    case SRL_HDR_OBJECTV_FREEZE:
                        mrg->ibuf.pos++; // skip tag in input buffer
                        offset = srl_read_varint_uv_offset(aTHX_ mrg->pibuf, " while reading OBJECTV/OBJECTV_FREEZE");
                        offset = srl_lookup_tracked_offset(aTHX_ mrg, offset); // convert ibuf offset to obuf offset
                        srl_buf_cat_varint(aTHX_ &mrg->obuf, tag, offset);
                        goto read_again;

                    case SRL_HDR_PAD:
                        while (SRL_RDR_NOT_DONE(mrg->pibuf) && *mrg->ibuf.pos == SRL_HDR_PAD) {
                            srl_buf_cat_tag_nocheck(mrg, SRL_HDR_PAD);
                        }

                        goto read_again;

                     default:
                        SRL_RDR_ERROR_UNIMPLEMENTED(mrg->pibuf, tag, "");
                        break;
                }
        }
    }

    --mrg->recursion_depth;
    DEBUG_ASSERT_RDR_SANE(mrg->pibuf);
    DEBUG_ASSERT_BUF_SANE(&mrg->obuf);
}
Beispiel #8
0
SV *
srl_merger_finish(pTHX_ srl_merger_t *mrg, SV *user_header_src)
{
    UV end_offset;
    UV body_offset;
    UV srl_start_offset = 0;

    DEBUG_ASSERT_BUF_SANE(&mrg->obuf);

    if (mrg->obuf_last_successfull_offset) {
        SRL_MERGER_TRACE("last merge operation has failed, reset to offset %"UVuf"",
                          mrg->obuf_last_successfull_offset);

        mrg->obuf.pos = mrg->obuf.body_pos + mrg->obuf_last_successfull_offset;
        DEBUG_ASSERT_BUF_SANE(&mrg->obuf);
    }

    /* store offset to the end of the document */
    end_offset = BODY_POS_OFS(&mrg->obuf);
    body_offset = mrg->obuf.body_pos - mrg->obuf.start;

    if (!SRL_MRG_HAVE_OPTION(mrg, SRL_F_TOPLEVEL_KEY_SCALAR)) {
        mrg->obuf.pos = mrg->obuf.start + mrg->obuf_padding_bytes_offset;
        DEBUG_ASSERT_BUF_SANE(&mrg->obuf);

        srl_buf_cat_varint_nocheck(aTHX_ &mrg->obuf, 0, mrg->cnt_of_merged_elements);
        DEBUG_ASSERT_BUF_SANE(&mrg->obuf);

        mrg->obuf.pos = mrg->obuf.body_pos + end_offset;
        DEBUG_ASSERT_BUF_SANE(&mrg->obuf);
    }

    if (user_header_src) {
        char *user_header;
        STRLEN user_header_len;
        U8 encoding_flags, protocol_version;
        IV proto_version_and_encoding_flags_int;
        UV need_space_for_sereal_and_user_headers = 0;

        if (mrg->protocol_version < 2)
            croak("Sereal version does not support headers");

        user_header = (char*) SvPV(user_header_src, user_header_len);
        proto_version_and_encoding_flags_int = srl_validate_header_version(aTHX_ (srl_reader_char_ptr) user_header, user_header_len);
        if (expect_false(proto_version_and_encoding_flags_int < 1))
            croak("Bad Sereal header: Not a valid Sereal document.");

        protocol_version = (U8) (proto_version_and_encoding_flags_int & SRL_PROTOCOL_VERSION_MASK);
        if (expect_false(protocol_version != mrg->protocol_version))
            croak("The versions of body and header do not match");

        encoding_flags = (U8) (proto_version_and_encoding_flags_int & SRL_PROTOCOL_ENCODING_MASK);
        if (expect_false(encoding_flags != SRL_PROTOCOL_ENCODING_RAW))
            croak("The header has unsupported format.");

        if (expect_false(user_header_len < SRL_MINIMALISTIC_HEADER_SIZE))
            croak("Provided user header is too short");

        /* here some byte magic goes. The main idea is to fix user_header
         * inside preallocated space. However, due to varint it becomes quite
         * tricky */

        user_header     += SRL_MINIMALISTIC_HEADER_SIZE;
        user_header_len -= SRL_MINIMALISTIC_HEADER_SIZE;

        // =srl + 1 byte for version + 1 byte for header
        need_space_for_sereal_and_user_headers
            = 4                                             /* srl magic */ 
            + 1                                             /* byte for version */
            + 1                                             /* user_header bit field */
            + srl_varint_length(aTHX_ user_header_len + 1)  /* user_header_len in varint representation, add one because of bit field */
            + user_header_len;

        if (SRL_PREALLOCATE_FOR_USER_HEADER < need_space_for_sereal_and_user_headers) {
            croak("User header excided SRL_PREALLOCATE_FOR_USER_HEADER. Need to reallocate memory but too lazy to implement this"); // TODO
        }

        // move position to where Sereal and user headers should start with */
        srl_start_offset = SRL_PREALLOCATE_FOR_USER_HEADER - need_space_for_sereal_and_user_headers;
        mrg->obuf.pos = mrg->obuf.start + srl_start_offset;

        srl_fill_header(aTHX_ mrg, user_header, user_header_len);
        DEBUG_ASSERT_BUF_SANE(&mrg->obuf);

        if (expect_false(body_offset != (UV) (mrg->obuf.pos - mrg->obuf.start - 1))) {
            croak("Bizare! Body pointer has different offset after writing Sereal header! Current offset=%"UVuf", expected=%"UVuf,
                  (UV) (mrg->obuf.pos - mrg->obuf.start), body_offset);
        }

        mrg->obuf.pos += end_offset;
    } else if (mrg->protocol_version > 1) {
        assert(SRL_PREALLOCATE_FOR_USER_HEADER > SRL_MINIMALISTIC_HEADER_SIZE);

        // move position to where Sereal and user headers should start with */
        srl_start_offset = SRL_PREALLOCATE_FOR_USER_HEADER - SRL_MINIMALISTIC_HEADER_SIZE;
        mrg->obuf.pos = mrg->obuf.start + srl_start_offset;

        srl_fill_header(aTHX_ mrg, NULL, 0);
        DEBUG_ASSERT_BUF_SANE(&mrg->obuf);

        if (expect_false(body_offset != (UV) (mrg->obuf.pos - mrg->obuf.start - 1))) {
            croak("Bizare! Body pointer has different offset after writing Sereal header!");
        }

        mrg->obuf.pos += end_offset;
    }

    DEBUG_ASSERT_BUF_SANE(&mrg->obuf);

    if (SRL_MRG_HAVE_OPTION(mrg, SRL_F_COMPRESS_SNAPPY_INCREMENTAL)) {
        srl_compress_body(aTHX_ &mrg->obuf, body_offset, mrg->flags, 0, &mrg->snappy_workmem);
        SRL_UPDATE_BODY_POS(&mrg->obuf, mrg->protocol_version);
    }

    assert(srl_start_offset <= (UV) BUF_POS_OFS(&mrg->obuf));
    DEBUG_ASSERT_BUF_SANE(&mrg->obuf);

    return newSVpvn((char *) mrg->obuf.start + srl_start_offset, BUF_POS_OFS(&mrg->obuf) - srl_start_offset - 1);
}