SRL_STATIC_INLINE void srl_merge_binary_utf8(pTHX_ srl_merger_t *mrg, ptable_entry_ptr ptable_entry) { int ok; UV length, total_length; strtable_entry_ptr strtable_entry; srl_reader_char_ptr tag_ptr = mrg->ibuf.pos; DEBUG_ASSERT_RDR_SANE(mrg->pibuf); DEBUG_ASSERT_BUF_SANE(&mrg->obuf); mrg->ibuf.pos++; // skip tag in input buffer length = srl_read_varint_uv_length(aTHX_ mrg->pibuf, " while reading BINARY or STR_UTF8"); assert((mrg->ibuf.pos - tag_ptr) > 0); assert((mrg->ibuf.pos - tag_ptr) <= SRL_MAX_VARINT_LENGTH); total_length = length + (mrg->ibuf.pos - tag_ptr); strtable_entry = srl_lookup_string(aTHX_ mrg, tag_ptr, total_length, &ok); if (ok) { // issue COPY tag srl_buf_cat_varint(aTHX_ &mrg->obuf, SRL_HDR_COPY, strtable_entry->offset); mrg->ibuf.pos += length; if (expect_false(ptable_entry)) { // update value in ptable entry // This is needed because if any of following tags will reffer to // this one as COPY we need to point them to original string. // By Sereal spec a COPY tag cannot reffer to another COPY tag. ptable_entry->value = INT2PTR(void *, strtable_entry->offset); } } else if (strtable_entry) {
/* clone an encoder without current state */ srl_encoder_t * srl_build_encoder_struct_alike(pTHX_ srl_encoder_t *proto) { srl_encoder_t *enc; enc = srl_empty_encoder_struct(aTHX); enc->flags = proto->flags; DEBUG_ASSERT_BUF_SANE(enc); return enc; }
SRL_STATIC_INLINE void srl_merge_array(pTHX_ srl_merger_t *mrg, const U8 tag, UV length) { unsigned int i; DEBUG_ASSERT_RDR_SANE(mrg->pibuf); DEBUG_ASSERT_BUF_SANE(&mrg->obuf); if (tag == SRL_HDR_ARRAY) { srl_buf_cat_varint(aTHX_ &mrg->obuf, tag, length); } else { srl_buf_cat_tag_nocheck(mrg, tag); } for (i = 0; i < length; ++i) { srl_merge_single_value(aTHX_ mrg); } DEBUG_ASSERT_RDR_SANE(mrg->pibuf); DEBUG_ASSERT_BUF_SANE(&mrg->obuf); }
void srl_merger_append_all(pTHX_ srl_merger_t *mrg, AV *src) { SSize_t i; SV **svptr; STRLEN size = 0; SSize_t tidx = av_len(src); if (mrg->obuf_last_successfull_offset) { /* If obuf_last_successfull_offset is true then last merge * operation has failed. It means that some cleanup operation needs to * be done. */ SRL_MERGER_TRACE("last merge operation has failed, need to do some cleanup (offset %"UVuf")", mrg->obuf_last_successfull_offset); mrg->obuf.pos = mrg->obuf.body_pos + mrg->obuf_last_successfull_offset; srl_cleanup_dedup_tlbs(aTHX_ mrg, mrg->obuf_last_successfull_offset); DEBUG_ASSERT_BUF_SANE(&mrg->obuf); } for (i = 0; i <= tidx; ++i) { svptr = av_fetch(src, i, 0); if (expect_false(svptr == NULL)) croak("av_fetch returned NULL"); size += SvLEN(*svptr); } /* preallocate space in obuf in one go, * of course this's is very rough estimation */ GROW_BUF(&mrg->obuf, size); for (i = 0; i <= tidx; ++i) { srl_set_input_buffer(aTHX_ mrg, *av_fetch(src, i, 0)); srl_build_track_table(aTHX_ mrg); /* save current offset as last successfull */ mrg->obuf_last_successfull_offset = BODY_POS_OFS(&mrg->obuf); mrg->recursion_depth = 0; mrg->ibuf.pos = mrg->ibuf.body_pos + 1; srl_merge_single_value(aTHX_ mrg); mrg->cnt_of_merged_elements++; mrg->obuf_last_successfull_offset = 0; } }
void srl_merger_append(pTHX_ srl_merger_t *mrg, SV *src) { assert(mrg != NULL); srl_set_input_buffer(aTHX_ mrg, src); srl_build_track_table(aTHX_ mrg); if (mrg->obuf_last_successfull_offset) { /* If obuf_last_successfull_offset is true then last merge * operation has failed. It means that some cleanup operation needs to * be done. */ SRL_MERGER_TRACE("last merge operation has failed, need to do some cleanup (offset %"UVuf")", mrg->obuf_last_successfull_offset); mrg->obuf.pos = mrg->obuf.body_pos + mrg->obuf_last_successfull_offset; srl_cleanup_dedup_tlbs(aTHX_ mrg, mrg->obuf_last_successfull_offset); DEBUG_ASSERT_BUF_SANE(&mrg->obuf); } /* preallocate space in obuf, * but this is still not enough because due to * varint we might need more space in obug then size of ibuf */ GROW_BUF(&mrg->obuf, (size_t) SRL_RDR_SIZE(mrg->pibuf)); /* save current offset as last successfull */ mrg->obuf_last_successfull_offset = BODY_POS_OFS(&mrg->obuf); mrg->recursion_depth = 0; mrg->ibuf.pos = mrg->ibuf.body_pos + 1; srl_merge_single_value(aTHX_ mrg); mrg->cnt_of_merged_elements++; mrg->obuf_last_successfull_offset = 0; }
/* Builds the C-level configuration and state struct. */ srl_encoder_t * srl_build_encoder_struct(pTHX_ HV *opt) { srl_encoder_t *enc; SV **svp; enc = srl_empty_encoder_struct(aTHX); enc->flags = 0; /* load options */ if (opt != NULL) { int undef_unknown = 0; /* SRL_F_SHARED_HASHKEYS on by default */ svp = hv_fetchs(opt, "no_shared_hashkeys", 0); if ( !svp || !SvTRUE(*svp) ) enc->flags |= SRL_F_SHARED_HASHKEYS; svp = hv_fetchs(opt, "croak_on_bless", 0); if ( svp && SvTRUE(*svp) ) enc->flags |= SRL_F_CROAK_ON_BLESS; svp = hv_fetchs(opt, "snappy", 0); if ( svp && SvTRUE(*svp) ) enc->flags |= SRL_F_COMPRESS_SNAPPY; svp = hv_fetchs(opt, "undef_unknown", 0); if ( svp && SvTRUE(*svp) ) { undef_unknown = 1; enc->flags |= SRL_F_UNDEF_UNKNOWN; } svp = hv_fetchs(opt, "stringify_unknown", 0); if ( svp && SvTRUE(*svp) ) { if (expect_false( undef_unknown )) { croak("'undef_unknown' and 'stringify_unknown' " "options are mutually exclusive"); } enc->flags |= SRL_F_STRINGIFY_UNKNOWN; } svp = hv_fetchs(opt, "warn_unknown", 0); if ( svp && SvTRUE(*svp) ) { enc->flags |= SRL_F_WARN_UNKNOWN; if (SvIV(*svp) < 0) enc->flags |= SRL_F_NOWARN_UNKNOWN_OVERLOAD; } svp = hv_fetchs(opt, "snappy_threshold", 0); if ( svp && SvOK(*svp) ) enc->snappy_threshold = SvIV(*svp); else enc->snappy_threshold = 1024; } else { /* SRL_F_SHARED_HASHKEYS on by default */ enc->flags |= SRL_F_SHARED_HASHKEYS; } DEBUG_ASSERT_BUF_SANE(enc); return enc; }
SRL_STATIC_INLINE void srl_merge_single_value(pTHX_ srl_merger_t *mrg) { U8 tag; UV length, offset; ptable_entry_ptr ptable_entry; read_again: assert(mrg->recursion_depth >= 0); DEBUG_ASSERT_RDR_SANE(mrg->pibuf); DEBUG_ASSERT_BUF_SANE(&mrg->obuf); if (expect_false(++mrg->recursion_depth > mrg->max_recursion_depth)) SRL_RDR_ERRORf1(mrg->pibuf, "Reached recursion limit (%lu) during merging", mrg->max_recursion_depth); ptable_entry = NULL; if (expect_false(SRL_RDR_DONE(mrg->pibuf))) SRL_RDR_ERROR(mrg->pibuf, "Unexpected termination of input buffer"); tag = *mrg->ibuf.pos & ~SRL_HDR_TRACK_FLAG; SRL_REPORT_CURRENT_TAG(mrg, tag); if (mrg->tracked_offsets && !srl_stack_empty(mrg->tracked_offsets)) { UV itag_offset = SRL_RDR_BODY_POS_OFS(mrg->pibuf); if (expect_false(itag_offset == srl_stack_peek_nocheck(aTHX_ mrg->tracked_offsets))) { // trackme case srl_stack_pop_nocheck(mrg->tracked_offsets); ptable_entry = srl_store_tracked_offset(aTHX_ mrg, itag_offset, BODY_POS_OFS(&mrg->obuf)); } } if (tag <= SRL_HDR_NEG_HIGH) { srl_buf_cat_tag_nocheck(mrg, tag); } else if (tag >= SRL_HDR_ARRAYREF_LOW && tag <= SRL_HDR_ARRAYREF_HIGH) { srl_merge_array(aTHX_ mrg, tag, SRL_HDR_ARRAYREF_LEN_FROM_TAG(tag)); } else if (tag >= SRL_HDR_HASHREF_LOW && tag <= SRL_HDR_HASHREF_HIGH) { srl_merge_hash(aTHX_ mrg, tag, SRL_HDR_HASHREF_LEN_FROM_TAG(tag)); } else if (tag >= SRL_HDR_SHORT_BINARY_LOW) { srl_merge_short_binary(aTHX_ mrg, tag, ptable_entry); } else { switch (tag) { case SRL_HDR_VARINT: case SRL_HDR_ZIGZAG: srl_buf_cat_tag_nocheck(mrg, tag); srl_copy_varint(aTHX_ mrg); break; case SRL_HDR_FLOAT: srl_buf_copy_content_nocheck(aTHX_ mrg, 5); break; case SRL_HDR_DOUBLE: srl_buf_copy_content_nocheck(aTHX_ mrg, 9); break; case SRL_HDR_LONG_DOUBLE: srl_buf_copy_content_nocheck(aTHX_ mrg, 17); break; case SRL_HDR_TRUE: case SRL_HDR_FALSE: case SRL_HDR_UNDEF: case SRL_HDR_CANONICAL_UNDEF: srl_buf_cat_tag_nocheck(mrg, tag); break; case SRL_HDR_BINARY: case SRL_HDR_STR_UTF8: srl_merge_binary_utf8(aTHX_ mrg, ptable_entry); break; case SRL_HDR_HASH: mrg->ibuf.pos++; // skip tag in input buffer length = srl_read_varint_uv_count(aTHX_ mrg->pibuf, " while reading ARRAY or HASH"); srl_merge_hash(aTHX_ mrg, tag, length); break; case SRL_HDR_ARRAY: mrg->ibuf.pos++; // skip tag in input buffer length = srl_read_varint_uv_count(aTHX_ mrg->pibuf, " while reading ARRAY or HASH"); srl_merge_array(aTHX_ mrg, tag, length); break; default: switch (tag) { case SRL_HDR_COPY: case SRL_HDR_REFP: case SRL_HDR_ALIAS: mrg->ibuf.pos++; // skip tag in input buffer offset = srl_read_varint_uv_offset(aTHX_ mrg->pibuf, " while reading COPY/ALIAS/REFP"); offset = srl_lookup_tracked_offset(aTHX_ mrg, offset); // convert ibuf offset to obuf offset srl_buf_cat_varint(aTHX_ &mrg->obuf, tag, offset); if (tag == SRL_HDR_REFP || tag == SRL_HDR_ALIAS) { SRL_SET_TRACK_FLAG(*(mrg->obuf.body_pos + offset)); } break; case SRL_HDR_REFN: case SRL_HDR_WEAKEN: case SRL_HDR_EXTEND: srl_buf_cat_tag_nocheck(mrg, tag); goto read_again; case SRL_HDR_OBJECT: case SRL_HDR_OBJECT_FREEZE: srl_merge_object(aTHX_ mrg, tag); break; case SRL_HDR_REGEXP: srl_buf_cat_tag_nocheck(mrg, tag); srl_merge_stringish(aTHX_ mrg); tag = *mrg->ibuf.pos; if (expect_false(tag < SRL_HDR_SHORT_BINARY_LOW)) SRL_RDR_ERROR_UNEXPECTED(mrg->pibuf, tag, "SRL_HDR_SHORT_BINARY"); srl_buf_copy_content_nocheck(aTHX_ mrg, SRL_HDR_SHORT_BINARY_LEN_FROM_TAG(tag) + 1); break; case SRL_HDR_OBJECTV: case SRL_HDR_OBJECTV_FREEZE: mrg->ibuf.pos++; // skip tag in input buffer offset = srl_read_varint_uv_offset(aTHX_ mrg->pibuf, " while reading OBJECTV/OBJECTV_FREEZE"); offset = srl_lookup_tracked_offset(aTHX_ mrg, offset); // convert ibuf offset to obuf offset srl_buf_cat_varint(aTHX_ &mrg->obuf, tag, offset); goto read_again; case SRL_HDR_PAD: while (SRL_RDR_NOT_DONE(mrg->pibuf) && *mrg->ibuf.pos == SRL_HDR_PAD) { srl_buf_cat_tag_nocheck(mrg, SRL_HDR_PAD); } goto read_again; default: SRL_RDR_ERROR_UNIMPLEMENTED(mrg->pibuf, tag, ""); break; } } } --mrg->recursion_depth; DEBUG_ASSERT_RDR_SANE(mrg->pibuf); DEBUG_ASSERT_BUF_SANE(&mrg->obuf); }
SV * srl_merger_finish(pTHX_ srl_merger_t *mrg, SV *user_header_src) { UV end_offset; UV body_offset; UV srl_start_offset = 0; DEBUG_ASSERT_BUF_SANE(&mrg->obuf); if (mrg->obuf_last_successfull_offset) { SRL_MERGER_TRACE("last merge operation has failed, reset to offset %"UVuf"", mrg->obuf_last_successfull_offset); mrg->obuf.pos = mrg->obuf.body_pos + mrg->obuf_last_successfull_offset; DEBUG_ASSERT_BUF_SANE(&mrg->obuf); } /* store offset to the end of the document */ end_offset = BODY_POS_OFS(&mrg->obuf); body_offset = mrg->obuf.body_pos - mrg->obuf.start; if (!SRL_MRG_HAVE_OPTION(mrg, SRL_F_TOPLEVEL_KEY_SCALAR)) { mrg->obuf.pos = mrg->obuf.start + mrg->obuf_padding_bytes_offset; DEBUG_ASSERT_BUF_SANE(&mrg->obuf); srl_buf_cat_varint_nocheck(aTHX_ &mrg->obuf, 0, mrg->cnt_of_merged_elements); DEBUG_ASSERT_BUF_SANE(&mrg->obuf); mrg->obuf.pos = mrg->obuf.body_pos + end_offset; DEBUG_ASSERT_BUF_SANE(&mrg->obuf); } if (user_header_src) { char *user_header; STRLEN user_header_len; U8 encoding_flags, protocol_version; IV proto_version_and_encoding_flags_int; UV need_space_for_sereal_and_user_headers = 0; if (mrg->protocol_version < 2) croak("Sereal version does not support headers"); user_header = (char*) SvPV(user_header_src, user_header_len); proto_version_and_encoding_flags_int = srl_validate_header_version(aTHX_ (srl_reader_char_ptr) user_header, user_header_len); if (expect_false(proto_version_and_encoding_flags_int < 1)) croak("Bad Sereal header: Not a valid Sereal document."); protocol_version = (U8) (proto_version_and_encoding_flags_int & SRL_PROTOCOL_VERSION_MASK); if (expect_false(protocol_version != mrg->protocol_version)) croak("The versions of body and header do not match"); encoding_flags = (U8) (proto_version_and_encoding_flags_int & SRL_PROTOCOL_ENCODING_MASK); if (expect_false(encoding_flags != SRL_PROTOCOL_ENCODING_RAW)) croak("The header has unsupported format."); if (expect_false(user_header_len < SRL_MINIMALISTIC_HEADER_SIZE)) croak("Provided user header is too short"); /* here some byte magic goes. The main idea is to fix user_header * inside preallocated space. However, due to varint it becomes quite * tricky */ user_header += SRL_MINIMALISTIC_HEADER_SIZE; user_header_len -= SRL_MINIMALISTIC_HEADER_SIZE; // =srl + 1 byte for version + 1 byte for header need_space_for_sereal_and_user_headers = 4 /* srl magic */ + 1 /* byte for version */ + 1 /* user_header bit field */ + srl_varint_length(aTHX_ user_header_len + 1) /* user_header_len in varint representation, add one because of bit field */ + user_header_len; if (SRL_PREALLOCATE_FOR_USER_HEADER < need_space_for_sereal_and_user_headers) { croak("User header excided SRL_PREALLOCATE_FOR_USER_HEADER. Need to reallocate memory but too lazy to implement this"); // TODO } // move position to where Sereal and user headers should start with */ srl_start_offset = SRL_PREALLOCATE_FOR_USER_HEADER - need_space_for_sereal_and_user_headers; mrg->obuf.pos = mrg->obuf.start + srl_start_offset; srl_fill_header(aTHX_ mrg, user_header, user_header_len); DEBUG_ASSERT_BUF_SANE(&mrg->obuf); if (expect_false(body_offset != (UV) (mrg->obuf.pos - mrg->obuf.start - 1))) { croak("Bizare! Body pointer has different offset after writing Sereal header! Current offset=%"UVuf", expected=%"UVuf, (UV) (mrg->obuf.pos - mrg->obuf.start), body_offset); } mrg->obuf.pos += end_offset; } else if (mrg->protocol_version > 1) { assert(SRL_PREALLOCATE_FOR_USER_HEADER > SRL_MINIMALISTIC_HEADER_SIZE); // move position to where Sereal and user headers should start with */ srl_start_offset = SRL_PREALLOCATE_FOR_USER_HEADER - SRL_MINIMALISTIC_HEADER_SIZE; mrg->obuf.pos = mrg->obuf.start + srl_start_offset; srl_fill_header(aTHX_ mrg, NULL, 0); DEBUG_ASSERT_BUF_SANE(&mrg->obuf); if (expect_false(body_offset != (UV) (mrg->obuf.pos - mrg->obuf.start - 1))) { croak("Bizare! Body pointer has different offset after writing Sereal header!"); } mrg->obuf.pos += end_offset; } DEBUG_ASSERT_BUF_SANE(&mrg->obuf); if (SRL_MRG_HAVE_OPTION(mrg, SRL_F_COMPRESS_SNAPPY_INCREMENTAL)) { srl_compress_body(aTHX_ &mrg->obuf, body_offset, mrg->flags, 0, &mrg->snappy_workmem); SRL_UPDATE_BODY_POS(&mrg->obuf, mrg->protocol_version); } assert(srl_start_offset <= (UV) BUF_POS_OFS(&mrg->obuf)); DEBUG_ASSERT_BUF_SANE(&mrg->obuf); return newSVpvn((char *) mrg->obuf.start + srl_start_offset, BUF_POS_OFS(&mrg->obuf) - srl_start_offset - 1); }