void srl_iterator_step_out(pTHX_ srl_iterator_t *iter, UV n) { UV offset; srl_stack_t *stack = iter->stack; DEBUG_ASSERT_RDR_SANE(iter->pbuf); SRL_ITER_TRACE("n=%"UVuf, n); SRL_ITER_ASSERT_STACK(iter); // SRL_ITER_ASSERT_EOF(iter, "serialized object"); XXX need ability to go back on last element // if (expect_false(n == 0)) return; XXX keep it as a feature? while (n--) { srl_stack_pop_nocheck(stack); if (expect_false(srl_stack_empty(stack))) { SRL_ITER_ERROR("It was last object on stack, no more parents"); } } offset = stack->ptr->offset; iter->buf.pos = iter->buf.body_pos + offset; stack->ptr->idx = stack->ptr->count; DEBUG_ASSERT_RDR_SANE(iter->pbuf); }
void srl_iterator_next_until_depth_and_idx(pTHX_ srl_iterator_t *iter, UV expected_depth, U32 expected_idx) { U32 current_idx; srl_stack_t *stack = iter->stack; IV current_depth = SRL_STACK_DEPTH(stack); DEBUG_ASSERT_RDR_SANE(iter->pbuf); SRL_ITER_TRACE("expected_depth=%"UVuf" expected_idx=%u", expected_depth, expected_idx); SRL_ITER_ASSERT_STACK(iter); if (expect_false((IV) expected_depth > current_depth)) { SRL_ITER_ERRORf2("srl_iterator_next_until_depth() can only go forward, " "so expected_depth=%"UVuf" should not be greater then current_depth=%"IVdf, expected_depth, current_depth); } current_idx = stack->ptr->idx; if (expect_false((IV) expected_depth == current_depth && expected_idx == current_idx)) return; while (expect_true(!srl_stack_empty(stack))) { srl_iterator_wrap_stack(aTHX_ iter, expected_depth); current_depth = SRL_STACK_DEPTH(stack); if (expect_false(srl_stack_empty(stack))) break; current_idx = stack->ptr->idx; if (current_depth == (IV) expected_depth && current_idx == expected_idx) break; if (expect_false(current_depth == (IV) expected_depth && expected_idx > current_idx)) { SRL_ITER_ERRORf2("srl_iterator_next_until_depth() can only go forward, " "so expected_idx=%d should not be greater then current_idx=%d", expected_idx, current_idx); } srl_iterator_step_internal(aTHX_ iter); } if (expect_false(current_depth != (IV) expected_depth)) { SRL_ITER_ERRORf2("func led to wrong stack depth, expected=%"IVdf", actual=%"IVdf, expected_depth, current_depth); } if (expect_false(current_idx != expected_idx)) { SRL_ITER_ERRORf2("func led to wrong stack index, expected=%u, actual=%u", expected_idx, current_idx); } SRL_ITER_TRACE("Reached expected stack depth: %"UVuf " and idx: %u", expected_depth, expected_idx); DEBUG_ASSERT_RDR_SANE(iter->pbuf); }
void srl_iterator_next_until_depth_and_idx(pTHX_ srl_iterator_t *iter, UV expected_depth, U32 expected_idx) { IV current_depth = iter->stack.depth; srl_iterator_stack_ptr stack_ptr = iter->stack.ptr; DEBUG_ASSERT_RDR_SANE(iter->pbuf); SRL_ITER_ASSERT_STACK(iter); SRL_ITER_TRACE("expected_depth=%"UVuf" expected_idx=%u", expected_depth, expected_idx); SRL_ITER_REPORT_STACK_STATE(iter); if (expect_false((IV) expected_depth == current_depth && (IV) expected_idx == stack_ptr->idx)) return; if (expect_false((IV) expected_depth > current_depth)) { SRL_ITER_ERRORf2("srl_iterator_next_until_depth() can only go forward, " "so expected_depth=%"UVuf" should not be greater then current_depth=%"IVdf, expected_depth, current_depth); } stack_ptr = iter->stack.begin + expected_depth; if (expect_false((IV) expected_idx > stack_ptr->idx)) { SRL_ITER_ERRORf3("srl_iterator_next_until_depth() can only go forward, " "so expected_idx=%u should not be greater then current " "index (%u) at expected_depth=%"IVdf, expected_idx, stack_ptr->idx, expected_depth); } stack_ptr = iter->stack.ptr; while (1) { srl_iterator_wrap_stack(iter, expected_depth, stack_ptr); if (iter->stack.depth == (IV) expected_depth) { if (stack_ptr->idx == (IV) expected_idx) break; assert(((IV) expected_idx > stack_ptr->idx) == 0); } srl_iterator_step_internal(iter, stack_ptr); } assert(stack_ptr->idx == (IV) expected_idx); assert(iter->stack.depth == (IV) expected_depth); SRL_ITER_TRACE("Reached expected stack depth: %"UVuf " and idx: %u", expected_depth, expected_idx); DEBUG_ASSERT_RDR_SANE(iter->pbuf); }
void srl_iterator_unite(pTHX_ srl_iterator_t *iter) { UV offset; srl_stack_t *stack = iter->pstack; SRL_ITER_TRACE("--------------------------"); if (expect_false(SRL_STACK_DEPTH(stack) <= 0)) SRL_ITER_ERROR("There is nothing to unite. Please call disjoin first."); while (!SRL_ITER_STACK_ON_ROOT(stack)) { srl_stack_pop(stack); } offset = stack->ptr->offset; srl_stack_pop(stack); // remove SRL_ITER_STACK_ROOT_TAG SRL_ITER_ASSERT_STACK(iter); iter->buf.pos = iter->buf.body_pos + offset; SRL_ITER_REPORT_STACK_STATE(iter); SRL_ITER_TRACE("ofs %"UVuf" body_ofs %"UVuf, (UV) SRL_RDR_POS_OFS((iter)->pbuf), (UV) SRL_RDR_BODY_POS_OFS((iter)->pbuf)); DEBUG_ASSERT_RDR_SANE(iter->pbuf); }
SRL_STATIC_INLINE void srl_merge_binary_utf8(pTHX_ srl_merger_t *mrg, ptable_entry_ptr ptable_entry) { int ok; UV length, total_length; strtable_entry_ptr strtable_entry; srl_reader_char_ptr tag_ptr = mrg->ibuf.pos; DEBUG_ASSERT_RDR_SANE(mrg->pibuf); DEBUG_ASSERT_BUF_SANE(&mrg->obuf); mrg->ibuf.pos++; // skip tag in input buffer length = srl_read_varint_uv_length(aTHX_ mrg->pibuf, " while reading BINARY or STR_UTF8"); assert((mrg->ibuf.pos - tag_ptr) > 0); assert((mrg->ibuf.pos - tag_ptr) <= SRL_MAX_VARINT_LENGTH); total_length = length + (mrg->ibuf.pos - tag_ptr); strtable_entry = srl_lookup_string(aTHX_ mrg, tag_ptr, total_length, &ok); if (ok) { // issue COPY tag srl_buf_cat_varint(aTHX_ &mrg->obuf, SRL_HDR_COPY, strtable_entry->offset); mrg->ibuf.pos += length; if (expect_false(ptable_entry)) { // update value in ptable entry // This is needed because if any of following tags will reffer to // this one as COPY we need to point them to original string. // By Sereal spec a COPY tag cannot reffer to another COPY tag. ptable_entry->value = INT2PTR(void *, strtable_entry->offset); } } else if (strtable_entry) {
IV srl_iterator_array_exists(pTHX_ srl_iterator_t *iter, I32 idx) { I32 s_idx; srl_iterator_stack_ptr stack_ptr = iter->stack.ptr; DEBUG_ASSERT_RDR_SANE(iter->pbuf); SRL_ITER_ASSERT_EOF(iter, "array element"); SRL_ITER_ASSERT_STACK(iter); SRL_ITER_ASSERT_ARRAY_ON_STACK(iter); SRL_ITER_TRACE("idx=%d", idx); SRL_ITER_REPORT_STACK_STATE(iter); if (idx >= 0) { s_idx = stack_ptr->count - idx; if (idx >= (I32) stack_ptr->count) { SRL_ITER_TRACE("Index is out of range, idx=%d count=%u", idx, stack_ptr->count); return SRL_ITER_NOT_FOUND; } } else { s_idx = -idx; if (s_idx > (I32) stack_ptr->count) { SRL_ITER_TRACE("Index is out of range, idx=%d count=%u", idx, stack_ptr->count); return SRL_ITER_NOT_FOUND; } } return s_idx; }
SV * srl_iterator_decode(pTHX_ srl_iterator_t *iter) { SV *into; SRL_ITER_ASSERT_EOF(iter, "serialized object"); DEBUG_ASSERT_RDR_SANE(iter->pbuf); into = sv_2mortal(newSV_type(SVt_NULL)); if (!iter->dec) iter->dec = srl_build_decoder_struct(aTHX_ NULL, NULL); Copy(&iter->buf, &iter->dec->buf, 1, srl_reader_buffer_t); DEBUG_ASSERT_RDR_SANE(iter->dec->pbuf); srl_decode_single_value(aTHX_ iter->dec, into, NULL); return into; }
void srl_iterator_step_in(pTHX_ srl_iterator_t *iter, UV n) { srl_iterator_stack_ptr stack_ptr = iter->stack.ptr; DEBUG_ASSERT_RDR_SANE(iter->pbuf); SRL_ITER_ASSERT_STACK(iter); SRL_ITER_TRACE("n=%"UVuf, n); SRL_ITER_REPORT_STACK_STATE(iter); while (n--) { srl_iterator_wrap_stack(iter, -1, stack_ptr); srl_iterator_step_internal(iter, stack_ptr); } SRL_ITER_TRACE("Completed expected number of steps"); DEBUG_ASSERT_RDR_SANE(iter->pbuf); }
void srl_iterator_reset(pTHX_ srl_iterator_t *iter) { U8 tag = '\0'; SRL_ITER_TRACE(); srl_stack_clear(iter->stack); srl_stack_push_and_set(iter, tag, 1); iter->buf.pos = iter->buf.body_pos + iter->first_tag_offset; DEBUG_ASSERT_RDR_SANE(iter->pbuf); }
SRL_STATIC_INLINE void srl_merge_array(pTHX_ srl_merger_t *mrg, const U8 tag, UV length) { unsigned int i; DEBUG_ASSERT_RDR_SANE(mrg->pibuf); DEBUG_ASSERT_BUF_SANE(&mrg->obuf); if (tag == SRL_HDR_ARRAY) { srl_buf_cat_varint(aTHX_ &mrg->obuf, tag, length); } else { srl_buf_cat_tag_nocheck(mrg, tag); } for (i = 0; i < length; ++i) { srl_merge_single_value(aTHX_ mrg); } DEBUG_ASSERT_RDR_SANE(mrg->pibuf); DEBUG_ASSERT_BUF_SANE(&mrg->obuf); }
SRL_STATIC_INLINE void srl_set_input_buffer(pTHX_ srl_merger_t *mrg, SV *src) { STRLEN len; UV header_len; U8 encoding_flags; U8 protocol_version; srl_buffer_char *tmp; IV proto_version_and_encoding_flags_int; SRL_RDR_CLEAR(&mrg->ibuf); tmp = (srl_buffer_char*) SvPV(src, len); mrg->ibuf.start = mrg->ibuf.pos = tmp; mrg->ibuf.end = mrg->ibuf.start + len; proto_version_and_encoding_flags_int = srl_validate_header_version(aTHX_ (srl_reader_char_ptr) mrg->ibuf.start, len); if (proto_version_and_encoding_flags_int < 1) { if (proto_version_and_encoding_flags_int == 0) SRL_RDR_ERROR(mrg->pibuf, "Bad Sereal header: It seems your document was accidentally UTF-8 encoded"); else SRL_RDR_ERROR(mrg->pibuf, "Bad Sereal header: Not a valid Sereal document."); } mrg->ibuf.pos += 5; encoding_flags = (U8) (proto_version_and_encoding_flags_int & SRL_PROTOCOL_ENCODING_MASK); protocol_version = (U8) (proto_version_and_encoding_flags_int & SRL_PROTOCOL_VERSION_MASK); if (expect_false(protocol_version > 3 || protocol_version < 1)) { SRL_RDR_ERRORf1(mrg->pibuf, "Unsupported Sereal protocol version %u", (unsigned int) protocol_version); } // skip header in any case header_len = srl_read_varint_uv_length(aTHX_ mrg->pibuf, " while reading header"); mrg->ibuf.pos += header_len; if (encoding_flags == SRL_PROTOCOL_ENCODING_RAW) { /* no op */ } else if ( encoding_flags == SRL_PROTOCOL_ENCODING_SNAPPY || encoding_flags == SRL_PROTOCOL_ENCODING_SNAPPY_INCREMENTAL) { srl_decompress_body_snappy(aTHX_ mrg->pibuf, encoding_flags, NULL); } else if (encoding_flags == SRL_PROTOCOL_ENCODING_ZLIB) { srl_decompress_body_zlib(aTHX_ mrg->pibuf, NULL); } else { SRL_RDR_ERROR(mrg->pibuf, "Sereal document encoded in an unknown format"); } /* this functions *MUST* be called after srl_decompress_body* */ SRL_RDR_UPDATE_BODY_POS(mrg->pibuf, protocol_version); DEBUG_ASSERT_RDR_SANE(mrg->pibuf); }
void srl_iterator_next(pTHX_ srl_iterator_t *iter, UV n) { IV expected_depth = iter->stack.depth; srl_iterator_stack_ptr stack_ptr = iter->stack.ptr; DEBUG_ASSERT_RDR_SANE(iter->pbuf); SRL_ITER_ASSERT_STACK(iter); SRL_ITER_TRACE("n=%"UVuf, n); SRL_ITER_REPORT_STACK_STATE(iter); if (expect_false(n == 0)) return; while (1) { srl_iterator_wrap_stack(iter, expected_depth, stack_ptr); if (iter->stack.depth == expected_depth) { if (n == 0) break; else n--; } srl_iterator_step_internal(iter, stack_ptr); } if (expect_false(n != 0)) { SRL_ITER_ERRORf1("Failed to do %"UVuf" next steps. Likely EOF was reached", n); } if (expect_false(iter->stack.depth != expected_depth)) { SRL_ITER_ERRORf2("next() led to wrong stack depth, expected=%"IVdf", actual=%"IVdf, expected_depth, iter->stack.depth); } SRL_ITER_TRACE("Did expected number of steps at depth %"IVdf, expected_depth); DEBUG_ASSERT_RDR_SANE(iter->pbuf); }
void srl_iterator_step_in(pTHX_ srl_iterator_t *iter, UV n) { srl_stack_t *stack = iter->stack; DEBUG_ASSERT_RDR_SANE(iter->pbuf); SRL_ITER_TRACE("n=%"UVuf, n); SRL_ITER_ASSERT_STACK(iter); if (expect_false(n == 0)) return; while (expect_true(!srl_stack_empty(stack))) { if (n == 0) break; srl_iterator_step_internal(aTHX_ iter); n--; } if (expect_false(n != 0)) { SRL_ITER_ERRORf1("Failed to do %"UVuf" steps. Likely EOF was reached", n); } SRL_ITER_TRACE("Did expected number of steps"); DEBUG_ASSERT_RDR_SANE(iter->pbuf); }
SRL_STATIC_INLINE void srl_iterator_restore_stack_position(pTHX_ srl_iterator_t *iter) { SRL_ITER_ASSERT_STACK(iter); iter->stack.ptr->idx = iter->stack.ptr->count; iter->buf.pos = iter->buf.body_pos + iter->stack.ptr->offset; SRL_ITER_REPORT_STACK_STATE(iter); SRL_ITER_TRACE("ofs %"UVuf" body_ofs %"UVuf, (UV) SRL_RDR_POS_OFS((iter)->pbuf), (UV) SRL_RDR_BODY_POS_OFS((iter)->pbuf)); DEBUG_ASSERT_RDR_SANE(iter->pbuf); }
void srl_iterator_next(pTHX_ srl_iterator_t *iter, UV n) { srl_stack_t *stack = iter->stack; IV expected_depth = SRL_STACK_DEPTH(stack); DEBUG_ASSERT_RDR_SANE(iter->pbuf); SRL_ITER_TRACE("n=%"UVuf, n); SRL_ITER_ASSERT_STACK(iter); if (expect_false(n == 0)) return; if (expect_false(stack->ptr->idx == 0)) SRL_ITER_ERROR("Nothing to parse at this depth"); while (expect_true(!srl_stack_empty(stack))) { if (SRL_STACK_DEPTH(stack) == expected_depth) { if (n == 0) break; else n--; } srl_iterator_step_internal(aTHX_ iter); srl_iterator_wrap_stack(aTHX_ iter, expected_depth); } if (expect_false(n != 0)) { SRL_ITER_ERRORf1("Failed to do %"UVuf" next steps. Likely EOF was reached", n); } if (expect_false(SRL_STACK_DEPTH(stack) != expected_depth)) { SRL_ITER_ERRORf2("next() led to wrong stack depth, expected=%"IVdf", actual=%"IVdf, expected_depth, SRL_STACK_DEPTH(stack)); } SRL_ITER_TRACE("Did expected number of steps at depth %"IVdf, expected_depth); DEBUG_ASSERT_RDR_SANE(iter->pbuf); }
IV srl_iterator_array_goto(pTHX_ srl_iterator_t *iter, I32 idx) { I32 s_idx; srl_iterator_stack_ptr stack_ptr = iter->stack.ptr; DEBUG_ASSERT_RDR_SANE(iter->pbuf); SRL_ITER_ASSERT_EOF(iter, "array element"); SRL_ITER_ASSERT_STACK(iter); SRL_ITER_ASSERT_ARRAY_ON_STACK(iter); SRL_ITER_TRACE("idx=%d", idx); SRL_ITER_REPORT_STACK_STATE(iter); if (idx >= 0) { s_idx = stack_ptr->count - idx; if (idx >= (I32) stack_ptr->count) { SRL_ITER_TRACE("Index is out of range, idx=%d count=%u", idx, stack_ptr->count); return SRL_ITER_NOT_FOUND; } } else { s_idx = -idx; if (s_idx > (I32) stack_ptr->count) { SRL_ITER_TRACE("Index is out of range, idx=%d count=%u", idx, stack_ptr->count); return SRL_ITER_NOT_FOUND; } } if (s_idx == stack_ptr->idx) { return SRL_RDR_BODY_POS_OFS(iter->pbuf); // already at expected position } else if (s_idx > stack_ptr->idx) { SRL_ITER_ERRORf2("Can't go backwards, idx=%d, count=%u", idx, stack_ptr->count); } // srl_iterator_next garantee that we remans on current stack srl_iterator_next(aTHX_ iter, stack_ptr->idx - s_idx); assert(stack_ptr->idx == s_idx); return SRL_RDR_BODY_POS_OFS(iter->pbuf); }
void srl_iterator_step_out(pTHX_ srl_iterator_t *iter, UV n) { srl_stack_t *stack = iter->pstack; DEBUG_ASSERT_RDR_SANE(iter->pbuf); SRL_ITER_ASSERT_STACK(iter); SRL_ITER_TRACE("n=%"UVuf, n); SRL_ITER_REPORT_STACK_STATE(iter); // SRL_ITER_ASSERT_EOF(iter, "serialized object"); XXX need ability to go back on last element // if (expect_false(n == 0)) return; XXX keep it as a feature? while (n--) { if (expect_false(SRL_ITER_STACK_ON_ROOT(stack))) { SRL_ITER_ERROR("It was last object on stack, no more parents"); } srl_stack_pop_nocheck(stack); } srl_iterator_restore_stack_position(aTHX_ iter); }
const char * srl_iterator_hash_key(pTHX_ srl_iterator_t *iter, STRLEN *len_out) { U8 tag; UV length, offset; const char *result = NULL; srl_reader_char_ptr orig_pos = iter->buf.pos; *len_out = 0; DEBUG_ASSERT_RDR_SANE(iter->pbuf); SRL_ITER_ASSERT_EOF(iter, "stringish"); SRL_ITER_ASSERT_STACK(iter); SRL_ITER_ASSERT_HASH_ON_STACK(iter); tag = *iter->buf.pos & ~SRL_HDR_TRACK_FLAG; SRL_ITER_REPORT_TAG(iter, tag); iter->buf.pos++; switch (tag) { CASE_SRL_HDR_SHORT_BINARY: length = SRL_HDR_SHORT_BINARY_LEN_FROM_TAG(tag); break; case SRL_HDR_BINARY: length = srl_read_varint_uv_length(aTHX_ iter->pbuf, " while reading BINARY"); break; case SRL_HDR_STR_UTF8: // TODO deal with UTF8 length = srl_read_varint_uv_length(aTHX_ iter->pbuf, " while reading STR_UTF8"); break; case SRL_HDR_COPY: offset = srl_read_varint_uv_offset(aTHX_ iter->pbuf, " while reading COPY tag"); iter->buf.pos = iter->buf.body_pos + offset; /* Note we do NOT validate these items, as we have already read them * and if they were a problem we would not be here to process them! */ tag = *iter->buf.pos & ~SRL_HDR_TRACK_FLAG; SRL_ITER_REPORT_TAG(iter, tag); iter->buf.pos++; switch (tag) { CASE_SRL_HDR_SHORT_BINARY: length = SRL_HDR_SHORT_BINARY_LEN_FROM_TAG(tag); break; case SRL_HDR_BINARY: SET_UV_FROM_VARINT(iter->pbuf, length, iter->buf.pos); break; case SRL_HDR_STR_UTF8: // TODO deal with UTF8 SET_UV_FROM_VARINT(iter->pbuf, length, iter->buf.pos); break; default: SRL_RDR_ERROR_BAD_COPY(iter->pbuf, SRL_HDR_HASH); } break; default: SRL_RDR_ERROR_UNEXPECTED(iter->pbuf, tag, "stringish"); } if (expect_false(iter->buf.pos + length >= iter->buf.end)) { SRL_RDR_ERROR_EOF(iter->pbuf, "string content"); } *len_out = length; result = (const char *) iter->buf.pos; iter->buf.pos = orig_pos; // restore original position DEBUG_ASSERT_RDR_SANE(iter->pbuf); return result; }
SRL_STATIC_INLINE void srl_merge_single_value(pTHX_ srl_merger_t *mrg) { U8 tag; UV length, offset; ptable_entry_ptr ptable_entry; read_again: assert(mrg->recursion_depth >= 0); DEBUG_ASSERT_RDR_SANE(mrg->pibuf); DEBUG_ASSERT_BUF_SANE(&mrg->obuf); if (expect_false(++mrg->recursion_depth > mrg->max_recursion_depth)) SRL_RDR_ERRORf1(mrg->pibuf, "Reached recursion limit (%lu) during merging", mrg->max_recursion_depth); ptable_entry = NULL; if (expect_false(SRL_RDR_DONE(mrg->pibuf))) SRL_RDR_ERROR(mrg->pibuf, "Unexpected termination of input buffer"); tag = *mrg->ibuf.pos & ~SRL_HDR_TRACK_FLAG; SRL_REPORT_CURRENT_TAG(mrg, tag); if (mrg->tracked_offsets && !srl_stack_empty(mrg->tracked_offsets)) { UV itag_offset = SRL_RDR_BODY_POS_OFS(mrg->pibuf); if (expect_false(itag_offset == srl_stack_peek_nocheck(aTHX_ mrg->tracked_offsets))) { // trackme case srl_stack_pop_nocheck(mrg->tracked_offsets); ptable_entry = srl_store_tracked_offset(aTHX_ mrg, itag_offset, BODY_POS_OFS(&mrg->obuf)); } } if (tag <= SRL_HDR_NEG_HIGH) { srl_buf_cat_tag_nocheck(mrg, tag); } else if (tag >= SRL_HDR_ARRAYREF_LOW && tag <= SRL_HDR_ARRAYREF_HIGH) { srl_merge_array(aTHX_ mrg, tag, SRL_HDR_ARRAYREF_LEN_FROM_TAG(tag)); } else if (tag >= SRL_HDR_HASHREF_LOW && tag <= SRL_HDR_HASHREF_HIGH) { srl_merge_hash(aTHX_ mrg, tag, SRL_HDR_HASHREF_LEN_FROM_TAG(tag)); } else if (tag >= SRL_HDR_SHORT_BINARY_LOW) { srl_merge_short_binary(aTHX_ mrg, tag, ptable_entry); } else { switch (tag) { case SRL_HDR_VARINT: case SRL_HDR_ZIGZAG: srl_buf_cat_tag_nocheck(mrg, tag); srl_copy_varint(aTHX_ mrg); break; case SRL_HDR_FLOAT: srl_buf_copy_content_nocheck(aTHX_ mrg, 5); break; case SRL_HDR_DOUBLE: srl_buf_copy_content_nocheck(aTHX_ mrg, 9); break; case SRL_HDR_LONG_DOUBLE: srl_buf_copy_content_nocheck(aTHX_ mrg, 17); break; case SRL_HDR_TRUE: case SRL_HDR_FALSE: case SRL_HDR_UNDEF: case SRL_HDR_CANONICAL_UNDEF: srl_buf_cat_tag_nocheck(mrg, tag); break; case SRL_HDR_BINARY: case SRL_HDR_STR_UTF8: srl_merge_binary_utf8(aTHX_ mrg, ptable_entry); break; case SRL_HDR_HASH: mrg->ibuf.pos++; // skip tag in input buffer length = srl_read_varint_uv_count(aTHX_ mrg->pibuf, " while reading ARRAY or HASH"); srl_merge_hash(aTHX_ mrg, tag, length); break; case SRL_HDR_ARRAY: mrg->ibuf.pos++; // skip tag in input buffer length = srl_read_varint_uv_count(aTHX_ mrg->pibuf, " while reading ARRAY or HASH"); srl_merge_array(aTHX_ mrg, tag, length); break; default: switch (tag) { case SRL_HDR_COPY: case SRL_HDR_REFP: case SRL_HDR_ALIAS: mrg->ibuf.pos++; // skip tag in input buffer offset = srl_read_varint_uv_offset(aTHX_ mrg->pibuf, " while reading COPY/ALIAS/REFP"); offset = srl_lookup_tracked_offset(aTHX_ mrg, offset); // convert ibuf offset to obuf offset srl_buf_cat_varint(aTHX_ &mrg->obuf, tag, offset); if (tag == SRL_HDR_REFP || tag == SRL_HDR_ALIAS) { SRL_SET_TRACK_FLAG(*(mrg->obuf.body_pos + offset)); } break; case SRL_HDR_REFN: case SRL_HDR_WEAKEN: case SRL_HDR_EXTEND: srl_buf_cat_tag_nocheck(mrg, tag); goto read_again; case SRL_HDR_OBJECT: case SRL_HDR_OBJECT_FREEZE: srl_merge_object(aTHX_ mrg, tag); break; case SRL_HDR_REGEXP: srl_buf_cat_tag_nocheck(mrg, tag); srl_merge_stringish(aTHX_ mrg); tag = *mrg->ibuf.pos; if (expect_false(tag < SRL_HDR_SHORT_BINARY_LOW)) SRL_RDR_ERROR_UNEXPECTED(mrg->pibuf, tag, "SRL_HDR_SHORT_BINARY"); srl_buf_copy_content_nocheck(aTHX_ mrg, SRL_HDR_SHORT_BINARY_LEN_FROM_TAG(tag) + 1); break; case SRL_HDR_OBJECTV: case SRL_HDR_OBJECTV_FREEZE: mrg->ibuf.pos++; // skip tag in input buffer offset = srl_read_varint_uv_offset(aTHX_ mrg->pibuf, " while reading OBJECTV/OBJECTV_FREEZE"); offset = srl_lookup_tracked_offset(aTHX_ mrg, offset); // convert ibuf offset to obuf offset srl_buf_cat_varint(aTHX_ &mrg->obuf, tag, offset); goto read_again; case SRL_HDR_PAD: while (SRL_RDR_NOT_DONE(mrg->pibuf) && *mrg->ibuf.pos == SRL_HDR_PAD) { srl_buf_cat_tag_nocheck(mrg, SRL_HDR_PAD); } goto read_again; default: SRL_RDR_ERROR_UNIMPLEMENTED(mrg->pibuf, tag, ""); break; } } } --mrg->recursion_depth; DEBUG_ASSERT_RDR_SANE(mrg->pibuf); DEBUG_ASSERT_BUF_SANE(&mrg->obuf); }
/* Main routine. Caller must ensure that EOF is NOT reached */ SRL_STATIC_INLINE void srl_iterator_step_internal(pTHX_ srl_iterator_t *iter) { U8 tag; UV length; srl_stack_t *stack = iter->stack; DEBUG_ASSERT_RDR_SANE(iter->pbuf); srl_iterator_wrap_stack(aTHX_ iter, -1); if (srl_stack_empty(stack)) return; SRL_ITER_ASSERT_STACK(iter); stack->ptr->idx--; SRL_ITER_TRACE("stack->ptr: idx=%d depth=%d", stack->ptr->idx, (int) SRL_STACK_DEPTH(stack)); SRL_ITER_ASSERT_STACK(iter); read_again: tag = *iter->buf.pos & ~SRL_HDR_TRACK_FLAG; SRL_ITER_REPORT_TAG(iter, tag); iter->buf.pos++; /* No code which decrease step, next or stack's counters should be added here. * Otherwise the counters will be decreased twicer for tags like REFN, ALIAS, etc. */ switch (tag) { CASE_SRL_HDR_SHORT_BINARY: iter->buf.pos += SRL_HDR_SHORT_BINARY_LEN_FROM_TAG(tag); break; case SRL_HDR_HASH: length = srl_read_varint_uv_count(aTHX_ iter->pbuf, " while reading HASH"); if (length > 0) srl_stack_push_and_set(iter, tag, length * 2); break; case SRL_HDR_ARRAY: length = srl_read_varint_uv_count(aTHX_ iter->pbuf, " while reading ARRAY"); if (length > 0) srl_stack_push_and_set(iter, tag, length); break; CASE_SRL_HDR_HASHREF: length = SRL_HDR_HASHREF_LEN_FROM_TAG(tag); if (length > 0) srl_stack_push_and_set(iter, tag, length * 2); break; CASE_SRL_HDR_ARRAYREF: length = SRL_HDR_ARRAYREF_LEN_FROM_TAG(tag); if (length > 0) srl_stack_push_and_set(iter, tag, length); break; CASE_SRL_HDR_POS: CASE_SRL_HDR_NEG: break; case SRL_HDR_VARINT: case SRL_HDR_ZIGZAG: srl_skip_varint(aTHX_ iter->pbuf); break; case SRL_HDR_FLOAT: iter->buf.pos += 4; break; case SRL_HDR_DOUBLE: iter->buf.pos += 8; break; case SRL_HDR_LONG_DOUBLE: iter->buf.pos += 16; break; case SRL_HDR_TRUE: case SRL_HDR_FALSE: case SRL_HDR_UNDEF: case SRL_HDR_CANONICAL_UNDEF: break; case SRL_HDR_REFN: case SRL_HDR_ALIAS: case SRL_HDR_WEAKEN: goto read_again; case SRL_HDR_PAD: while (SRL_RDR_NOT_DONE(iter->pbuf) && *iter->buf.pos++ == SRL_HDR_PAD) {}; goto read_again; case SRL_HDR_BINARY: case SRL_HDR_STR_UTF8: length = srl_read_varint_uv_length(aTHX_ iter->pbuf, " while reading BINARY or STR_UTF8"); iter->buf.pos += length; break; case SRL_HDR_COPY: case SRL_HDR_REFP: srl_skip_varint(aTHX_ iter->pbuf); break; /* case SRL_HDR_OBJECTV: */ /* case SRL_HDR_OBJECTV_FREEZE: */ /* case SRL_HDR_REGEXP: */ /* case SRL_HDR_OBJECT: */ /* case SRL_HDR_OBJECT_FREEZE: */ default: SRL_RDR_ERROR_UNIMPLEMENTED(iter->pbuf, tag, ""); break; } DEBUG_ASSERT_RDR_SANE(iter->pbuf); }
SRL_STATIC_INLINE void srl_build_track_table(pTHX_ srl_merger_t *mrg) { U8 tag; UV offset, length; DEBUG_ASSERT_RDR_SANE(mrg->pibuf); if (mrg->tracked_offsets) srl_stack_clear(mrg->tracked_offsets); while (expect_true(BUF_NOT_DONE(mrg->pibuf))) { /* since we're doing full pass, it's not necessary to * add items into tracked_offsets here. They will be added * by corresponding REFP/ALIAS/COPY and other tags */ tag = *mrg->ibuf.pos & ~SRL_HDR_TRACK_FLAG; SRL_REPORT_CURRENT_TAG(mrg, tag); mrg->ibuf.pos++; if (tag >= SRL_HDR_SHORT_BINARY_LOW) { mrg->ibuf.pos += SRL_HDR_SHORT_BINARY_LEN_FROM_TAG(tag); } else if (tag > SRL_HDR_NEG_HIGH && tag < SRL_HDR_ARRAYREF_LOW) { switch (tag) { case SRL_HDR_VARINT: case SRL_HDR_ZIGZAG: srl_read_varint_uv(aTHX_ mrg->pibuf); // TODO test/implement srl_skip_varint() break; case SRL_HDR_FLOAT: mrg->ibuf.pos += 4; break; case SRL_HDR_DOUBLE: mrg->ibuf.pos += 8; break; case SRL_HDR_LONG_DOUBLE: mrg->ibuf.pos += 16; break; case SRL_HDR_BINARY: case SRL_HDR_STR_UTF8: length = srl_read_varint_uv_length(aTHX_ mrg->pibuf, " while reading BINARY or STR_UTF8"); mrg->ibuf.pos += length; break; case SRL_HDR_HASH: case SRL_HDR_ARRAY: srl_read_varint_uv_count(aTHX_ mrg->pibuf, " while reading ARRAY or HASH"); break; case SRL_HDR_TRUE: case SRL_HDR_FALSE: case SRL_HDR_UNDEF: case SRL_HDR_CANONICAL_UNDEF: // noop break; default: switch (tag) { case SRL_HDR_COPY: case SRL_HDR_REFP: case SRL_HDR_ALIAS: case SRL_HDR_OBJECTV: case SRL_HDR_OBJECTV_FREEZE: offset = srl_read_varint_uv_offset(aTHX_ mrg->pibuf, " while reading COPY, OBJECTV or OBJECTV_FREEZE"); srl_stack_push_val(SRL_GET_TRACKED_OFFSETS(mrg), offset); break; case SRL_HDR_PAD: case SRL_HDR_REFN: case SRL_HDR_WEAKEN: case SRL_HDR_EXTEND: case SRL_HDR_REGEXP: case SRL_HDR_OBJECT: case SRL_HDR_OBJECT_FREEZE: // noop break; default: SRL_RDR_ERROR_UNIMPLEMENTED(mrg->pibuf, tag, ""); break; } } } } if (mrg->tracked_offsets && !srl_stack_empty(mrg->tracked_offsets)) { srl_stack_rsort(aTHX_ mrg->tracked_offsets); srl_stack_dedupe(aTHX_ mrg->tracked_offsets); //int i = 0; //SRL_STACK_TYPE *ptr = mrg->tracked_offsets->begin; //while (ptr <= mrg->tracked_offsets->ptr) { // warn("tracked_offsets: offset dedups idx %d offset %d\n", i, (int) *ptr); // i++; ptr++; //} } DEBUG_ASSERT_RDR_SANE(mrg->pibuf); }
IV srl_iterator_hash_exists(pTHX_ srl_iterator_t *iter, const char *name, STRLEN name_len) { U8 tag; UV length, offset; const char *key_ptr; IV stack_depth = iter->stack.depth; srl_iterator_stack_ptr stack_ptr = iter->stack.ptr; DEBUG_ASSERT_RDR_SANE(iter->pbuf); SRL_ITER_ASSERT_EOF(iter, "stringish"); SRL_ITER_ASSERT_STACK(iter); SRL_ITER_ASSERT_HASH_ON_STACK(iter); SRL_ITER_TRACE("name=%.*s", (int) name_len, name); SRL_ITER_REPORT_STACK_STATE(iter); while (stack_ptr->idx) { stack_ptr->idx--; // do not make it be part of while clause SRL_ITER_ASSERT_STACK(iter); assert(stack_ptr->idx % 2 == 1); assert(iter->stack.depth == stack_depth); DEBUG_ASSERT_RDR_SANE(iter->pbuf); tag = *iter->buf.pos & ~SRL_HDR_TRACK_FLAG; SRL_ITER_REPORT_TAG(iter, tag); iter->buf.pos++; switch (tag) { CASE_SRL_HDR_SHORT_BINARY: length = SRL_HDR_SHORT_BINARY_LEN_FROM_TAG(tag); key_ptr = (const char *) iter->buf.pos; iter->buf.pos += length; break; case SRL_HDR_BINARY: length = srl_read_varint_uv_length(aTHX_ iter->pbuf, " while reading BINARY"); key_ptr = (const char *) iter->buf.pos; iter->buf.pos += length; break; case SRL_HDR_STR_UTF8: // TODO deal with UTF8 length = srl_read_varint_uv_length(aTHX_ iter->pbuf, " while reading STR_UTF8"); key_ptr = (const char *) iter->buf.pos; iter->buf.pos += length; break; case SRL_HDR_COPY: offset = srl_read_varint_uv_offset(aTHX_ iter->pbuf, " while reading COPY tag"); key_ptr = (const char *) iter->buf.body_pos + offset; tag = *key_ptr & ~SRL_HDR_TRACK_FLAG; key_ptr++; /* Note we do NOT validate these items, as we have already read them * and if they were a problem we would not be here to process them! */ switch (tag) { CASE_SRL_HDR_SHORT_BINARY: length = SRL_HDR_SHORT_BINARY_LEN_FROM_TAG(tag); break; case SRL_HDR_BINARY: SET_UV_FROM_VARINT(iter->pbuf, length, key_ptr); break; case SRL_HDR_STR_UTF8: // TODO deal with UTF8 SET_UV_FROM_VARINT(iter->pbuf, length, key_ptr); break; default: SRL_RDR_ERROR_BAD_COPY(iter->pbuf, SRL_HDR_HASH); } break; default: SRL_RDR_ERROR_UNEXPECTED(iter->pbuf, tag, "stringish"); } if (expect_false((srl_reader_char_ptr) key_ptr >= iter->buf.end)) { SRL_RDR_ERROR_EOF(iter->pbuf, "string content"); } if ( length == name_len && memcmp(name, key_ptr, name_len) == 0) { SRL_ITER_TRACE("found key '%.*s' at offset %"UVuf, (int) name_len, name, SRL_RDR_BODY_POS_OFS(iter->pbuf)); return SRL_RDR_BODY_POS_OFS(iter->pbuf); } // srl_iterator_next garantee that we remans on current stack srl_iterator_next(aTHX_ iter, 1); stack_ptr = iter->stack.ptr; } SRL_ITER_TRACE("didn't found key '%.*s'", (int) name_len, name); return SRL_ITER_NOT_FOUND; }
UV srl_iterator_object_info(pTHX_ srl_iterator_t *iter, UV *length_ptr) { U8 tag; UV offset, type = 0; srl_reader_char_ptr orig_pos = iter->buf.pos; DEBUG_ASSERT_RDR_SANE(iter->pbuf); if (length_ptr) *length_ptr = 0; read_again: SRL_ITER_ASSERT_EOF(iter, "serialized object"); tag = *iter->buf.pos & ~SRL_HDR_TRACK_FLAG; SRL_ITER_REPORT_TAG(iter, tag); iter->buf.pos++; switch (tag) { case SRL_HDR_PAD: case SRL_HDR_REFN: case SRL_HDR_WEAKEN: /* advanced pointer to the object */ goto read_again; case SRL_HDR_HASH: type = SRL_ITERATOR_OBJ_IS_HASH; if (length_ptr) *length_ptr = srl_read_varint_uv_count(aTHX_ iter->pbuf, " while reading HASH"); break; CASE_SRL_HDR_HASHREF: type = SRL_ITERATOR_OBJ_IS_HASH; if (length_ptr) *length_ptr = SRL_HDR_HASHREF_LEN_FROM_TAG(tag); break; case SRL_HDR_ARRAY: type = SRL_ITERATOR_OBJ_IS_ARRAY; if (length_ptr) *length_ptr = srl_read_varint_uv_count(aTHX_ iter->pbuf, " while reading ARRAY"); break; CASE_SRL_HDR_ARRAYREF: type = SRL_ITERATOR_OBJ_IS_ARRAY; if (length_ptr) *length_ptr = SRL_HDR_ARRAYREF_LEN_FROM_TAG(tag); break; CASE_SRL_HDR_POS: CASE_SRL_HDR_NEG: CASE_SRL_HDR_SHORT_BINARY: case SRL_HDR_BINARY: case SRL_HDR_STR_UTF8: case SRL_HDR_VARINT: case SRL_HDR_ZIGZAG: case SRL_HDR_FLOAT: case SRL_HDR_DOUBLE: case SRL_HDR_LONG_DOUBLE: case SRL_HDR_TRUE: case SRL_HDR_FALSE: case SRL_HDR_UNDEF: case SRL_HDR_CANONICAL_UNDEF: type = SRL_ITERATOR_OBJ_IS_SCALAR; break; case SRL_HDR_REFP: case SRL_HDR_COPY: offset = srl_read_varint_uv_offset(aTHX_ iter->pbuf, " while reading COPY tag"); iter->buf.pos = iter->buf.body_pos + offset; goto read_again; default: iter->buf.pos = orig_pos; SRL_RDR_ERROR_UNEXPECTED(iter->pbuf, tag, "ARRAY or HASH or SCALAR"); } iter->buf.pos = orig_pos; DEBUG_ASSERT_RDR_SANE(iter->pbuf); return type; }
void srl_iterator_set(pTHX_ srl_iterator_t *iter, SV *src) { SV *sv; STRLEN len; UV header_len; U8 encoding_flags; U8 protocol_version; srl_reader_char_ptr tmp; IV proto_version_and_encoding_flags_int; srl_iterator_stack_ptr stack_ptr = NULL; if (iter->document) { SvREFCNT_dec(iter->document); iter->document = NULL; } iter->document = src; SvREFCNT_inc(iter->document); tmp = (srl_reader_char_ptr) SvPV(src, len); iter->buf.start = iter->buf.pos = tmp; iter->buf.end = iter->buf.start + len; proto_version_and_encoding_flags_int = srl_validate_header_version(aTHX_ iter->buf.start, len); if (proto_version_and_encoding_flags_int < 1) { if (proto_version_and_encoding_flags_int == 0) SRL_RDR_ERROR(iter->pbuf, "Bad Sereal header: It seems your document was accidentally UTF-8 encoded"); else SRL_RDR_ERROR(iter->pbuf, "Bad Sereal header: Not a valid Sereal document."); } iter->buf.pos += 5; encoding_flags = (U8) (proto_version_and_encoding_flags_int & SRL_PROTOCOL_ENCODING_MASK); protocol_version = (U8) (proto_version_and_encoding_flags_int & SRL_PROTOCOL_VERSION_MASK); if (expect_false(protocol_version > 3 || protocol_version < 1)) { SRL_RDR_ERRORf1(iter->pbuf, "Unsupported Sereal protocol version %u", (unsigned int) protocol_version); } // skip header in any case header_len = srl_read_varint_uv_length(aTHX_ iter->pbuf, " while reading header"); iter->buf.pos += header_len; if (encoding_flags == SRL_PROTOCOL_ENCODING_RAW) { /* no op */ } else if ( encoding_flags == SRL_PROTOCOL_ENCODING_SNAPPY || encoding_flags == SRL_PROTOCOL_ENCODING_SNAPPY_INCREMENTAL) { srl_decompress_body_snappy(aTHX_ iter->pbuf, encoding_flags, &sv); SvREFCNT_dec(iter->document); SvREFCNT_inc(sv); iter->document = sv; } else if (encoding_flags == SRL_PROTOCOL_ENCODING_ZLIB) { srl_decompress_body_zlib(aTHX_ iter->pbuf, &sv); SvREFCNT_dec(iter->document); SvREFCNT_inc(sv); iter->document = sv; } else { SRL_RDR_ERROR(iter->pbuf, "Sereal document encoded in an unknown format"); } /* this function *MUST* be called after calling srl_decompress_body* */ SRL_RDR_UPDATE_BODY_POS(iter->pbuf, protocol_version); DEBUG_ASSERT_RDR_SANE(iter->pbuf); srl_stack_push_and_set(iter, SRL_ITER_STACK_ROOT_TAG, 1, stack_ptr); srl_iterator_reset(aTHX_ iter); }