/** * Take action when the offset for a toppar becomes unusable. * * Locality: toppar handler thread * Locks: toppar_lock() MUST be held */ void rd_kafka_offset_reset (rd_kafka_toppar_t *rktp, int64_t err_offset, rd_kafka_resp_err_t err, const char *reason) { int64_t offset = RD_KAFKA_OFFSET_INVALID; rd_kafka_op_t *rko; /* Enqueue op for toppar handler thread if we're on the wrong thread. */ if (!thrd_is_current(rktp->rktp_rkt->rkt_rk->rk_thread)) { rd_kafka_op_t *rko = rd_kafka_op_new(RD_KAFKA_OP_CALLBACK); rko->rko_op_cb = rd_kafka_offset_reset_op_cb; rko->rko_rktp = rd_kafka_toppar_keep(rktp); rko->rko_err = err; rko->rko_offset = err_offset; rko->rko_flags |= RD_KAFKA_OP_F_FREE; rko->rko_payload = rd_strdup(reason); rko->rko_len = strlen(reason); rd_kafka_q_enq(&rktp->rktp_ops, rko); return; } if (err_offset == RD_KAFKA_OFFSET_INVALID || err) offset = rktp->rktp_rkt->rkt_conf.auto_offset_reset; else offset = err_offset; if (offset == RD_KAFKA_OFFSET_INVALID) { /* Error, auto.offset.reset tells us to error out. */ rko = rd_kafka_op_new(RD_KAFKA_OP_CONSUMER_ERR); rko->rko_err = err; rko->rko_rkmessage.offset = err_offset; rko->rko_rkmessage.partition = rktp->rktp_partition; rko->rko_payload = rd_strdup(reason); rko->rko_len = strlen(rko->rko_payload); rko->rko_flags |= RD_KAFKA_OP_F_FREE; rko->rko_rktp = rd_kafka_toppar_keep(rktp); rd_kafka_q_enq(&rktp->rktp_fetchq, rko); rd_kafka_toppar_set_fetch_state( rktp, RD_KAFKA_TOPPAR_FETCH_NONE); } else { /* Query logical offset */ rktp->rktp_query_offset = offset; rd_kafka_toppar_set_fetch_state( rktp, RD_KAFKA_TOPPAR_FETCH_OFFSET_QUERY); } rd_kafka_dbg(rktp->rktp_rkt->rkt_rk, TOPIC, "OFFSET", "%s [%"PRId32"]: offset reset (at offset %s) " "to %s: %s: %s", rktp->rktp_rkt->rkt_topic->str, rktp->rktp_partition, rd_kafka_offset2str(err_offset), rd_kafka_offset2str(offset), reason, rd_kafka_err2str(err)); if (rktp->rktp_fetch_state == RD_KAFKA_TOPPAR_FETCH_OFFSET_QUERY) rd_kafka_toppar_offset_request(rktp, rktp->rktp_query_offset, 0); }
/** * Commit a list of offsets asynchronously. Response will be queued on 'replyq'. * Optional 'op_cb' will be set on requesting op. * 'opaque' will be set as 'rko_opaque'. */ rd_kafka_resp_err_t rd_kafka_commit0 (rd_kafka_t *rk, const rd_kafka_topic_partition_list_t *offsets, rd_kafka_q_t *replyq, void (*op_cb) (rd_kafka_t *, rd_kafka_op_t *)) { rd_kafka_cgrp_t *rkcg; rd_kafka_op_t *rko; if (!(rkcg = rd_kafka_cgrp_get(rk))) return RD_KAFKA_RESP_ERR__UNKNOWN_GROUP; rko = rd_kafka_op_new(RD_KAFKA_OP_OFFSET_COMMIT); rko->rko_op_cb = op_cb; rko->rko_replyq = replyq; if (replyq) rd_kafka_q_keep(rko->rko_replyq); if (offsets) rd_kafka_op_payload_set( rko, rd_kafka_topic_partition_list_copy(offsets), (void *)rd_kafka_topic_partition_list_destroy); rd_kafka_q_enq(&rkcg->rkcg_ops, rko); return RD_KAFKA_RESP_ERR_NO_ERROR; }
/** * Commit a list of offsets asynchronously. Response will be queued on 'replyq'. * Optional \p cb will be set on requesting op. * * Makes a copy of \p offsets (may be NULL for current assignment) */ static rd_kafka_resp_err_t rd_kafka_commit0 (rd_kafka_t *rk, const rd_kafka_topic_partition_list_t *offsets, rd_kafka_toppar_t *rktp, rd_kafka_replyq_t replyq, void (*cb) (rd_kafka_t *rk, rd_kafka_resp_err_t err, rd_kafka_topic_partition_list_t *offsets, void *opaque), void *opaque) { rd_kafka_cgrp_t *rkcg; rd_kafka_op_t *rko; if (!(rkcg = rd_kafka_cgrp_get(rk))) return RD_KAFKA_RESP_ERR__UNKNOWN_GROUP; rko = rd_kafka_op_new(RD_KAFKA_OP_OFFSET_COMMIT); rko->rko_replyq = replyq; rko->rko_u.offset_commit.cb = cb; rko->rko_u.offset_commit.opaque = opaque; if (rktp) rko->rko_rktp = rd_kafka_toppar_keep(rktp); if (offsets) rko->rko_u.offset_commit.partitions = rd_kafka_topic_partition_list_copy(offsets); rd_kafka_q_enq(rkcg->rkcg_ops, rko); return RD_KAFKA_RESP_ERR_NO_ERROR; }
/** * Enqueue ERR__THROTTLE op, if desired. */ void rd_kafka_op_throttle_time (rd_kafka_broker_t *rkb, rd_kafka_q_t *rkq, int throttle_time) { rd_kafka_op_t *rko; rd_avg_add(&rkb->rkb_avg_throttle, throttle_time); if (!rkb->rkb_rk->rk_conf.quota_support) return; /* We send throttle events when: * - throttle_time > 0 * - throttle_time == 0 and last throttle_time > 0 */ if (!throttle_time && !rd_atomic32_get(&rkb->rkb_rk->rk_last_throttle)) return; rd_atomic32_set(&rkb->rkb_rk->rk_last_throttle, throttle_time); rko = rd_kafka_op_new(RD_KAFKA_OP_THROTTLE); rko->rko_nodename = rd_strdup(rkb->rkb_nodename); rko->rko_flags |= RD_KAFKA_OP_F_FREE; /* free nodename */ rko->rko_nodeid = rkb->rkb_nodeid; rko->rko_throttle_time = throttle_time; rd_kafka_q_enq(rkq, rko); }
/** * Take action when the offset for a toppar becomes unusable. * NOTE: toppar_lock(rktp) must be held */ void rd_kafka_offset_reset (rd_kafka_toppar_t *rktp, int64_t err_offset, rd_kafka_resp_err_t err, const char *reason) { int64_t offset = RD_KAFKA_OFFSET_ERROR; rd_kafka_op_t *rko; int64_t offset_reset = rktp->rktp_rkt->rkt_conf.auto_offset_reset; if (offset_reset == RD_KAFKA_OFFSET_END || offset_reset == RD_KAFKA_OFFSET_BEGINNING || offset_reset <= RD_KAFKA_OFFSET_TAIL_BASE) { offset = rktp->rktp_rkt->rkt_conf.auto_offset_reset; rktp->rktp_query_offset = offset; rktp->rktp_fetch_state = RD_KAFKA_TOPPAR_FETCH_OFFSET_QUERY; } else if (offset_reset == RD_KAFKA_OFFSET_ERROR) { rko = rd_kafka_op_new(RD_KAFKA_OP_ERR); rko->rko_err = err; rko->rko_rkmessage.offset = err_offset; rko->rko_rkmessage.rkt = rktp->rktp_rkt; rko->rko_rkmessage.partition = rktp->rktp_partition; rko->rko_payload = strdup(reason); rko->rko_len = strlen(rko->rko_payload); rko->rko_flags |= RD_KAFKA_OP_F_FREE; rd_kafka_topic_keep(rko->rko_rkmessage.rkt); rd_kafka_q_enq(&rktp->rktp_fetchq, rko); rktp->rktp_fetch_state = RD_KAFKA_TOPPAR_FETCH_NONE; } rd_kafka_dbg(rktp->rktp_rkt->rkt_rk, TOPIC, "OFFSET", "%s [%"PRId32"]: offset reset (at offset %"PRId64") " "to %"PRId64": %s: %s", rktp->rktp_rkt->rkt_topic->str, rktp->rktp_partition, err_offset, offset, reason, rd_kafka_err2str(err)); }
/** * Trigger offset_commit_cb op, if configured. * */ void rd_kafka_offset_commit_cb_op (rd_kafka_t *rk, rd_kafka_resp_err_t err, const rd_kafka_topic_partition_list_t *offsets) { rd_kafka_op_t *rko; if (!rk->rk_conf.offset_commit_cb) return; rko = rd_kafka_op_new(RD_KAFKA_OP_OFFSET_COMMIT|RD_KAFKA_OP_REPLY); rko->rko_err = err; rd_kafka_assert(NULL, offsets->cnt > 0); rd_kafka_op_payload_set(rko, rd_kafka_topic_partition_list_copy(offsets), (void *)rd_kafka_topic_partition_list_destroy); rd_kafka_q_enq(&rk->rk_rep, rko); }
/** * Enqueue offset_commit_cb op, if configured. * */ void rd_kafka_offset_commit_cb_op (rd_kafka_t *rk, rd_kafka_resp_err_t err, const rd_kafka_topic_partition_list_t *offsets) { rd_kafka_op_t *rko; if (!(rk->rk_conf.enabled_events & RD_KAFKA_EVENT_OFFSET_COMMIT)) return; rko = rd_kafka_op_new(RD_KAFKA_OP_OFFSET_COMMIT|RD_KAFKA_OP_REPLY); rko->rko_err = err; rko->rko_u.offset_commit.cb = rk->rk_conf.offset_commit_cb;/*maybe NULL*/ rko->rko_u.offset_commit.opaque = rk->rk_conf.opaque; if (offsets) rko->rko_u.offset_commit.partitions = rd_kafka_topic_partition_list_copy(offsets); rd_kafka_q_enq(rk->rk_rep, rko); }
/** * @brief Send request to queue, wait for response. * * @returns response on success or NULL if destq is disabled. */ rd_kafka_op_t *rd_kafka_op_req0 (rd_kafka_q_t *destq, rd_kafka_q_t *recvq, rd_kafka_op_t *rko, int timeout_ms) { rd_kafka_op_t *reply; /* Indicate to destination where to send reply. */ rd_kafka_op_set_replyq(rko, recvq, NULL); /* Enqueue op */ if (!rd_kafka_q_enq(destq, rko)) return NULL; /* Wait for reply */ reply = rd_kafka_q_pop(recvq, timeout_ms, 0); /* May be NULL for timeout */ return reply; }
/** * Reply to 'rko_orig' using err,payload,len if a replyq is set up, * else do nothing. * * Returns 0 if 'rko_orig' did not have a replyq and nothing was enqueued, * else 1. */ int rd_kafka_op_reply (rd_kafka_op_t *rko_orig, rd_kafka_resp_err_t err, void *payload, size_t len, void (*free_cb) (void *)) { rd_kafka_op_t *rko; if (!rko_orig->rko_replyq) return 0; rko = rd_kafka_op_new(rko_orig->rko_type); rko->rko_err = err; rko->rko_payload = payload; rko->rko_len = len; rko->rko_free_cb = free_cb; if (free_cb) rko->rko_flags |= RD_KAFKA_OP_F_FREE; rko->rko_version = rko_orig->rko_version; return rd_kafka_q_enq(rko_orig->rko_replyq, rko); }
/** * Send request to queue, wait for response. */ rd_kafka_op_t *rd_kafka_op_req0 (rd_kafka_q_t *destq, rd_kafka_q_t *recvq, rd_kafka_op_t *rko, int timeout_ms) { rd_kafka_op_t *reply; /* Indicate to destination where to send reply. */ rko->rko_replyq = recvq; if (recvq) rd_kafka_q_keep(rko->rko_replyq); /* Enqueue op */ rd_kafka_q_enq(destq, rko); /* Wait for reply */ reply = rd_kafka_q_pop(recvq, timeout_ms, 0); /* May be NULL for timeout */ return reply; }
/** * Propagate an error event to the application on a specific queue. * \p optype should be RD_KAFKA_OP_ERR for generic errors and * RD_KAFKA_OP_CONSUMER_ERR for consumer errors. */ void rd_kafka_q_op_err (rd_kafka_q_t *rkq, rd_kafka_op_type_t optype, rd_kafka_resp_err_t err, int32_t version, rd_kafka_toppar_t *rktp, int64_t offset, const char *fmt, ...) { va_list ap; char buf[2048]; rd_kafka_op_t *rko; va_start(ap, fmt); rd_vsnprintf(buf, sizeof(buf), fmt, ap); va_end(ap); rko = rd_kafka_op_new(optype); rko->rko_version = version; rko->rko_err = err; rko->rko_u.err.offset = offset; rko->rko_u.err.errstr = rd_strdup(buf); if (rktp) rko->rko_rktp = rd_kafka_toppar_keep(rktp); rd_kafka_q_enq(rkq, rko); }
/** * Enqueue op for app. Convenience function */ void rd_kafka_op_app (rd_kafka_q_t *rkq, rd_kafka_op_type_t type, int op_flags, rd_kafka_toppar_t *rktp, rd_kafka_resp_err_t err, void *payload, size_t len, void (*free_cb) (void *)) { rd_kafka_op_t *rko; rko = rd_kafka_op_new(type); if (rktp) { rko->rko_rktp = rd_kafka_toppar_keep(rktp); rko->rko_version = rktp->rktp_fetch_version; rko->rko_rkmessage.partition = rktp->rktp_partition; } rko->rko_err = err; rko->rko_payload = payload; rko->rko_len = len; rko->rko_flags |= op_flags; rko->rko_free_cb = free_cb; rd_kafka_q_enq(rkq, rko); }
/** * Enqueue ERR__THROTTLE op, if desired. */ void rd_kafka_op_throttle_time (rd_kafka_broker_t *rkb, rd_kafka_q_t *rkq, int throttle_time) { rd_kafka_op_t *rko; rd_avg_add(&rkb->rkb_avg_throttle, throttle_time); /* We send throttle events when: * - throttle_time > 0 * - throttle_time == 0 and last throttle_time > 0 */ if (!rkb->rkb_rk->rk_conf.throttle_cb || (!throttle_time && !rd_atomic32_get(&rkb->rkb_rk->rk_last_throttle))) return; rd_atomic32_set(&rkb->rkb_rk->rk_last_throttle, throttle_time); rko = rd_kafka_op_new(RD_KAFKA_OP_THROTTLE); rd_kafka_op_set_prio(rko, RD_KAFKA_PRIO_HIGH); rko->rko_u.throttle.nodename = rd_strdup(rkb->rkb_nodename); rko->rko_u.throttle.nodeid = rkb->rkb_nodeid; rko->rko_u.throttle.throttle_time = throttle_time; rd_kafka_q_enq(rkq, rko); }
/** * Send an op back to the application. * * Locality: Kafka thread */ void rd_kafka_op_app_reply (rd_kafka_q_t *rkq, rd_kafka_op_type_t type, rd_kafka_resp_err_t err, int32_t version, void *payload, size_t len) { rd_kafka_op_t *rko; rko = rd_kafka_op_new(type); if (err && !payload) { /* Provide human readable error string if not provided. */ payload = rd_strdup(rd_kafka_err2str(err)); len = strlen(payload); } rko->rko_flags |= RD_KAFKA_OP_F_FREE; rko->rko_version = version; rko->rko_payload = payload; rko->rko_len = len; rko->rko_err = err; rd_kafka_q_enq(rkq, rko); }
/** * @brief Message parser for MsgVersion v2 */ static rd_kafka_resp_err_t rd_kafka_msgset_reader_msg_v2 (rd_kafka_msgset_reader_t *msetr) { rd_kafka_buf_t *rkbuf = msetr->msetr_rkbuf; rd_kafka_toppar_t *rktp = msetr->msetr_rktp; struct { int64_t Length; int64_t MsgAttributes; /* int8_t, but int64 req. for varint */ int64_t TimestampDelta; int64_t OffsetDelta; int64_t Offset; /* Absolute offset */ rd_kafkap_bytes_t Key; rd_kafkap_bytes_t Value; int64_t HeaderCnt; } hdr; rd_kafka_op_t *rko; rd_kafka_msg_t *rkm; /* Only log decoding errors if protocol debugging enabled. */ int log_decode_errors = (rkbuf->rkbuf_rkb->rkb_rk->rk_conf.debug & RD_KAFKA_DBG_PROTOCOL) ? LOG_DEBUG : 0; size_t message_end; rd_kafka_buf_read_varint(rkbuf, &hdr.Length); message_end = rd_slice_offset(&rkbuf->rkbuf_reader)+(size_t)hdr.Length; rd_kafka_buf_read_varint(rkbuf, &hdr.MsgAttributes); rd_kafka_buf_read_varint(rkbuf, &hdr.TimestampDelta); rd_kafka_buf_read_varint(rkbuf, &hdr.OffsetDelta); hdr.Offset = msetr->msetr_v2_hdr->BaseOffset + hdr.OffsetDelta; /* Skip message if outdated */ if (hdr.Offset < rktp->rktp_offsets.fetch_offset) { rd_rkb_dbg(msetr->msetr_rkb, MSG, "MSG", "Skip offset %"PRId64" < fetch_offset %"PRId64, hdr.Offset, rktp->rktp_offsets.fetch_offset); rd_kafka_buf_skip_to(rkbuf, message_end); return RD_KAFKA_RESP_ERR_NO_ERROR; /* Continue with next msg */ } rd_kafka_buf_read_bytes_varint(rkbuf, &hdr.Key); rd_kafka_buf_read_bytes_varint(rkbuf, &hdr.Value); /* Ignore headers for now */ rd_kafka_buf_skip_to(rkbuf, message_end); /* Create op/message container for message. */ rko = rd_kafka_op_new_fetch_msg(&rkm, rktp, msetr->msetr_tver->version, rkbuf, hdr.Offset, (size_t)RD_KAFKAP_BYTES_LEN(&hdr.Key), RD_KAFKAP_BYTES_IS_NULL(&hdr.Key) ? NULL : hdr.Key.data, (size_t)RD_KAFKAP_BYTES_LEN(&hdr.Value), RD_KAFKAP_BYTES_IS_NULL(&hdr.Value) ? NULL : hdr.Value.data); /* Set timestamp. * * When broker assigns the timestamps (LOG_APPEND_TIME) it will * assign the same timestamp for all messages in a MessageSet * using MaxTimestamp. */ if ((msetr->msetr_v2_hdr->Attributes & RD_KAFKA_MSG_ATTR_LOG_APPEND_TIME) || (hdr.MsgAttributes & RD_KAFKA_MSG_ATTR_LOG_APPEND_TIME)) { rkm->rkm_tstype = RD_KAFKA_TIMESTAMP_LOG_APPEND_TIME; rkm->rkm_timestamp = msetr->msetr_v2_hdr->MaxTimestamp; } else { rkm->rkm_tstype = RD_KAFKA_TIMESTAMP_CREATE_TIME; rkm->rkm_timestamp = msetr->msetr_v2_hdr->BaseTimestamp + hdr.TimestampDelta; } /* Enqueue message on temporary queue */ rd_kafka_q_enq(&msetr->msetr_rkq, rko); msetr->msetr_msgcnt++; return RD_KAFKA_RESP_ERR_NO_ERROR; err_parse: /* Count all parse errors as partial message errors. */ rd_atomic64_add(&msetr->msetr_rkb->rkb_c.rx_partial, 1); return rkbuf->rkbuf_err; }
/** * @brief Decompress MessageSet, pass the uncompressed MessageSet to * the MessageSet reader. */ static rd_kafka_resp_err_t rd_kafka_msgset_reader_decompress (rd_kafka_msgset_reader_t *msetr, int MsgVersion, int Attributes, int64_t Timestamp, int64_t Offset, const void *compressed, size_t compressed_size) { struct iovec iov = { .iov_base = NULL, .iov_len = 0 }; rd_kafka_toppar_t *rktp = msetr->msetr_rktp; int codec = Attributes & RD_KAFKA_MSG_ATTR_COMPRESSION_MASK; rd_kafka_resp_err_t err = RD_KAFKA_RESP_ERR_NO_ERROR; rd_kafka_buf_t *rkbufz; switch (codec) { #if WITH_ZLIB case RD_KAFKA_COMPRESSION_GZIP: { uint64_t outlenx = 0; /* Decompress Message payload */ iov.iov_base = rd_gz_decompress(compressed, (int)compressed_size, &outlenx); if (unlikely(!iov.iov_base)) { rd_rkb_dbg(msetr->msetr_rkb, MSG, "GZIP", "Failed to decompress Gzip " "message at offset %"PRId64 " of %"PRIusz" bytes: " "ignoring message", Offset, compressed_size); err = RD_KAFKA_RESP_ERR__BAD_COMPRESSION; goto err; } iov.iov_len = (size_t)outlenx; } break; #endif #if WITH_SNAPPY case RD_KAFKA_COMPRESSION_SNAPPY: { const char *inbuf = compressed; size_t inlen = compressed_size; int r; static const unsigned char snappy_java_magic[] = { 0x82, 'S','N','A','P','P','Y', 0 }; static const size_t snappy_java_hdrlen = 8+4+4; /* snappy-java adds its own header (SnappyCodec) * which is not compatible with the official Snappy * implementation. * 8: magic, 4: version, 4: compatible * followed by any number of chunks: * 4: length * ...: snappy-compressed data. */ if (likely(inlen > snappy_java_hdrlen + 4 && !memcmp(inbuf, snappy_java_magic, 8))) { /* snappy-java framing */ char errstr[128]; inbuf = inbuf + snappy_java_hdrlen; inlen -= snappy_java_hdrlen; iov.iov_base = rd_kafka_snappy_java_uncompress( inbuf, inlen, &iov.iov_len, errstr, sizeof(errstr)); if (unlikely(!iov.iov_base)) { rd_rkb_dbg(msetr->msetr_rkb, MSG, "SNAPPY", "%s [%"PRId32"]: " "Snappy decompression for message " "at offset %"PRId64" failed: %s: " "ignoring message", rktp->rktp_rkt->rkt_topic->str, rktp->rktp_partition, Offset, errstr); err = RD_KAFKA_RESP_ERR__BAD_COMPRESSION; goto err; } } else { /* No framing */ /* Acquire uncompressed length */ if (unlikely(!rd_kafka_snappy_uncompressed_length( inbuf, inlen, &iov.iov_len))) { rd_rkb_dbg(msetr->msetr_rkb, MSG, "SNAPPY", "Failed to get length of Snappy " "compressed payload " "for message at offset %"PRId64 " (%"PRIusz" bytes): " "ignoring message", Offset, inlen); err = RD_KAFKA_RESP_ERR__BAD_COMPRESSION; goto err; } /* Allocate output buffer for uncompressed data */ iov.iov_base = rd_malloc(iov.iov_len); if (unlikely(!iov.iov_base)) { rd_rkb_dbg(msetr->msetr_rkb, MSG, "SNAPPY", "Failed to allocate Snappy " "decompress buffer of size %"PRIusz "for message at offset %"PRId64 " (%"PRIusz" bytes): %s: " "ignoring message", iov.iov_len, Offset, inlen, rd_strerror(errno)); err = RD_KAFKA_RESP_ERR__CRIT_SYS_RESOURCE; goto err; } /* Uncompress to outbuf */ if (unlikely((r = rd_kafka_snappy_uncompress( inbuf, inlen, iov.iov_base)))) { rd_rkb_dbg(msetr->msetr_rkb, MSG, "SNAPPY", "Failed to decompress Snappy " "payload for message at offset " "%"PRId64" (%"PRIusz" bytes): %s: " "ignoring message", Offset, inlen, rd_strerror(-r/*negative errno*/)); rd_free(iov.iov_base); err = RD_KAFKA_RESP_ERR__BAD_COMPRESSION; goto err; } } } break; #endif case RD_KAFKA_COMPRESSION_LZ4: { err = rd_kafka_lz4_decompress(msetr->msetr_rkb, /* Proper HC? */ MsgVersion >= 1 ? 1 : 0, Offset, /* @warning Will modify compressed * if no proper HC */ (char *)compressed, compressed_size, &iov.iov_base, &iov.iov_len); if (err) goto err; } break; default: rd_rkb_dbg(msetr->msetr_rkb, MSG, "CODEC", "%s [%"PRId32"]: Message at offset %"PRId64 " with unsupported " "compression codec 0x%x: message ignored", rktp->rktp_rkt->rkt_topic->str, rktp->rktp_partition, Offset, (int)codec); err = RD_KAFKA_RESP_ERR__NOT_IMPLEMENTED; goto err; } rd_assert(iov.iov_base); /* * Decompression successful */ /* Create a new buffer pointing to the uncompressed * allocated buffer (outbuf) and let messages keep a reference to * this new buffer. */ rkbufz = rd_kafka_buf_new_shadow(iov.iov_base, iov.iov_len, rd_free); rkbufz->rkbuf_rkb = msetr->msetr_rkbuf->rkbuf_rkb; rd_kafka_broker_keep(rkbufz->rkbuf_rkb); /* In MsgVersion v0..1 the decompressed data contains * an inner MessageSet, pass it to a new MessageSet reader. * * For MsgVersion v2 the decompressed data are the list of messages. */ if (MsgVersion <= 1) { /* Pass decompressed data (inner Messageset) * to new instance of the MessageSet parser. */ rd_kafka_msgset_reader_t inner_msetr; rd_kafka_msgset_reader_init(&inner_msetr, rkbufz, msetr->msetr_rktp, msetr->msetr_tver, &msetr->msetr_rkq); if (MsgVersion == 1) { /* postproc() will convert relative to * absolute offsets */ inner_msetr.msetr_relative_offsets = 1; inner_msetr.msetr_outer.offset = Offset; /* Apply single LogAppendTime timestamp for * all messages. */ if (Attributes & RD_KAFKA_MSG_ATTR_LOG_APPEND_TIME) { inner_msetr.msetr_outer.tstype = RD_KAFKA_TIMESTAMP_LOG_APPEND_TIME; inner_msetr.msetr_outer.timestamp = Timestamp; } } /* Parse the inner MessageSet */ err = rd_kafka_msgset_reader_run(&inner_msetr); } else { /* MsgVersion 2 */ rd_kafka_buf_t *orig_rkbuf = msetr->msetr_rkbuf; /* Temporarily replace read buffer with uncompressed buffer */ msetr->msetr_rkbuf = rkbufz; /* Read messages */ err = rd_kafka_msgset_reader_msgs_v2(msetr); /* Restore original buffer */ msetr->msetr_rkbuf = orig_rkbuf; } /* Loose our refcnt of the uncompressed rkbuf. * Individual messages/rko's will have their own reference. */ rd_kafka_buf_destroy(rkbufz); return err; err: /* Enqueue error messsage: * Create op and push on temporary queue. */ rd_kafka_q_op_err(&msetr->msetr_rkq, RD_KAFKA_OP_CONSUMER_ERR, err, msetr->msetr_tver->version, rktp, Offset, "Decompression (codec 0x%x) of message at %"PRIu64 " of %"PRIu64" bytes failed: %s", codec, Offset, compressed_size, rd_kafka_err2str(err)); return err; } /** * @brief Message parser for MsgVersion v0..1 * * @returns RD_KAFKA_RESP_ERR_NO_ERROR on success or on single-message errors, * or any other error code when the MessageSet parser should stop * parsing (such as for partial Messages). */ static rd_kafka_resp_err_t rd_kafka_msgset_reader_msg_v0_1 (rd_kafka_msgset_reader_t *msetr) { rd_kafka_buf_t *rkbuf = msetr->msetr_rkbuf; rd_kafka_toppar_t *rktp = msetr->msetr_rktp; rd_kafka_broker_t *rkb = msetr->msetr_rkb; struct { int64_t Offset; /* MessageSet header */ int32_t MessageSize; /* MessageSet header */ uint32_t Crc; int8_t MagicByte; /* MsgVersion */ int8_t Attributes; int64_t Timestamp; /* v1 */ } hdr; /* Message header */ rd_kafkap_bytes_t Key; rd_kafkap_bytes_t Value; int32_t Value_len; rd_kafka_op_t *rko; size_t hdrsize = 6; /* Header size following MessageSize */ rd_slice_t crc_slice; rd_kafka_msg_t *rkm; int relative_offsets = 0; const char *reloff_str = ""; /* Only log decoding errors if protocol debugging enabled. */ int log_decode_errors = (rkbuf->rkbuf_rkb->rkb_rk->rk_conf.debug & RD_KAFKA_DBG_PROTOCOL) ? LOG_DEBUG : 0; size_t message_end; rd_kafka_buf_read_i64(rkbuf, &hdr.Offset); rd_kafka_buf_read_i32(rkbuf, &hdr.MessageSize); message_end = rd_slice_offset(&rkbuf->rkbuf_reader) + hdr.MessageSize; rd_kafka_buf_read_i32(rkbuf, &hdr.Crc); if (!rd_slice_narrow_copy_relative(&rkbuf->rkbuf_reader, &crc_slice, hdr.MessageSize - 4)) rd_kafka_buf_check_len(rkbuf, hdr.MessageSize - 4); rd_kafka_buf_read_i8(rkbuf, &hdr.MagicByte); rd_kafka_buf_read_i8(rkbuf, &hdr.Attributes); if (hdr.MagicByte == 1) { /* MsgVersion */ rd_kafka_buf_read_i64(rkbuf, &hdr.Timestamp); hdrsize += 8; /* MsgVersion 1 has relative offsets for compressed MessageSets*/ if (!(hdr.Attributes & RD_KAFKA_MSG_ATTR_COMPRESSION_MASK) && msetr->msetr_relative_offsets) { relative_offsets = 1; reloff_str = "relative "; } } else hdr.Timestamp = 0; /* Verify MessageSize */ if (unlikely(hdr.MessageSize < (ssize_t)hdrsize)) rd_kafka_buf_parse_fail(rkbuf, "Message at %soffset %"PRId64 " MessageSize %"PRId32 " < hdrsize %"PRIusz, reloff_str, hdr.Offset, hdr.MessageSize, hdrsize); /* Early check for partial messages */ rd_kafka_buf_check_len(rkbuf, hdr.MessageSize - hdrsize); if (rkb->rkb_rk->rk_conf.check_crcs) { /* Verify CRC32 if desired. */ uint32_t calc_crc; calc_crc = rd_slice_crc32(&crc_slice); rd_dassert(rd_slice_remains(&crc_slice) == 0); if (unlikely(hdr.Crc != calc_crc)) { /* Propagate CRC error to application and * continue with next message. */ rd_kafka_q_op_err(&msetr->msetr_rkq, RD_KAFKA_OP_CONSUMER_ERR, RD_KAFKA_RESP_ERR__BAD_MSG, msetr->msetr_tver->version, rktp, hdr.Offset, "Message at %soffset %"PRId64 " (%"PRId32" bytes) " "failed CRC32 check " "(original 0x%"PRIx32" != " "calculated 0x%"PRIx32")", reloff_str, hdr.Offset, hdr.MessageSize, hdr.Crc, calc_crc); rd_kafka_buf_skip_to(rkbuf, message_end); rd_atomic64_add(&rkb->rkb_c.rx_err, 1); /* Continue with next message */ return RD_KAFKA_RESP_ERR_NO_ERROR; } } /* Extract key */ rd_kafka_buf_read_bytes(rkbuf, &Key); /* Extract Value */ rd_kafka_buf_read_bytes(rkbuf, &Value); Value_len = RD_KAFKAP_BYTES_LEN(&Value); /* MessageSets may contain offsets earlier than we * requested (compressed MessageSets in particular), * drop the earlier messages. * Note: the inner offset may only be trusted for * absolute offsets. KIP-31 introduced * ApiVersion 2 that maintains relative offsets * of compressed messages and the base offset * in the outer message is the offset of * the *LAST* message in the MessageSet. * This requires us to assign offsets * after all messages have been read from * the messageset, and it also means * we cant perform this offset check here * in that case. */ if (!relative_offsets && hdr.Offset < rktp->rktp_offsets.fetch_offset) return RD_KAFKA_RESP_ERR_NO_ERROR; /* Continue with next msg */ /* Handle compressed MessageSet */ if (unlikely(hdr.Attributes & RD_KAFKA_MSG_ATTR_COMPRESSION_MASK)) return rd_kafka_msgset_reader_decompress( msetr, hdr.MagicByte, hdr.Attributes, hdr.Timestamp, hdr.Offset, Value.data, Value_len); /* Pure uncompressed message, this is the innermost * handler after all compression and cascaded * MessageSets have been peeled off. */ /* Create op/message container for message. */ rko = rd_kafka_op_new_fetch_msg(&rkm, rktp, msetr->msetr_tver->version, rkbuf, hdr.Offset, (size_t)RD_KAFKAP_BYTES_LEN(&Key), RD_KAFKAP_BYTES_IS_NULL(&Key) ? NULL : Key.data, (size_t)RD_KAFKAP_BYTES_LEN(&Value), RD_KAFKAP_BYTES_IS_NULL(&Value) ? NULL : Value.data); /* Assign message timestamp. * If message was in a compressed MessageSet and the outer/wrapper * Message.Attribute had a LOG_APPEND_TIME set, use the * outer timestamp */ if (msetr->msetr_outer.tstype == RD_KAFKA_TIMESTAMP_LOG_APPEND_TIME) { rkm->rkm_timestamp = msetr->msetr_outer.timestamp; rkm->rkm_tstype = msetr->msetr_outer.tstype; } else if (hdr.MagicByte >= 1 && hdr.Timestamp) { rkm->rkm_timestamp = hdr.Timestamp; if (hdr.Attributes & RD_KAFKA_MSG_ATTR_LOG_APPEND_TIME) rkm->rkm_tstype = RD_KAFKA_TIMESTAMP_LOG_APPEND_TIME; else rkm->rkm_tstype = RD_KAFKA_TIMESTAMP_CREATE_TIME; } /* Enqueue message on temporary queue */ rd_kafka_q_enq(&msetr->msetr_rkq, rko); msetr->msetr_msgcnt++; return RD_KAFKA_RESP_ERR_NO_ERROR; /* Continue */ err_parse: /* Count all parse errors as partial message errors. */ rd_atomic64_add(&msetr->msetr_rkb->rkb_c.rx_partial, 1); return rkbuf->rkbuf_err; }
void rd_kafka_op_app_reply2 (rd_kafka_t *rk, rd_kafka_op_t *rko) { rd_kafka_q_enq(&rk->rk_rep, rko); }
/** * @brief Message parser for MsgVersion v2 */ static rd_kafka_resp_err_t rd_kafka_msgset_reader_msg_v2 (rd_kafka_msgset_reader_t *msetr) { rd_kafka_buf_t *rkbuf = msetr->msetr_rkbuf; rd_kafka_toppar_t *rktp = msetr->msetr_rktp; struct { int64_t Length; int8_t MsgAttributes; int64_t TimestampDelta; int64_t OffsetDelta; int64_t Offset; /* Absolute offset */ rd_kafkap_bytes_t Key; rd_kafkap_bytes_t Value; rd_kafkap_bytes_t Headers; } hdr; rd_kafka_op_t *rko; rd_kafka_msg_t *rkm; /* Only log decoding errors if protocol debugging enabled. */ int log_decode_errors = (rkbuf->rkbuf_rkb->rkb_rk->rk_conf.debug & RD_KAFKA_DBG_PROTOCOL) ? LOG_DEBUG : 0; size_t message_end; rd_kafka_buf_read_varint(rkbuf, &hdr.Length); message_end = rd_slice_offset(&rkbuf->rkbuf_reader)+(size_t)hdr.Length; rd_kafka_buf_read_i8(rkbuf, &hdr.MsgAttributes); rd_kafka_buf_read_varint(rkbuf, &hdr.TimestampDelta); rd_kafka_buf_read_varint(rkbuf, &hdr.OffsetDelta); hdr.Offset = msetr->msetr_v2_hdr->BaseOffset + hdr.OffsetDelta; /* Skip message if outdated */ if (hdr.Offset < rktp->rktp_offsets.fetch_offset) { rd_rkb_dbg(msetr->msetr_rkb, MSG, "MSG", "%s [%"PRId32"]: " "Skip offset %"PRId64" < fetch_offset %"PRId64, rktp->rktp_rkt->rkt_topic->str, rktp->rktp_partition, hdr.Offset, rktp->rktp_offsets.fetch_offset); rd_kafka_buf_skip_to(rkbuf, message_end); return RD_KAFKA_RESP_ERR_NO_ERROR; /* Continue with next msg */ } rd_kafka_buf_read_bytes_varint(rkbuf, &hdr.Key); rd_kafka_buf_read_bytes_varint(rkbuf, &hdr.Value); /* We parse the Headers later, just store the size (possibly truncated) * and pointer to the headers. */ hdr.Headers.len = (int32_t)(message_end - rd_slice_offset(&rkbuf->rkbuf_reader)); rd_kafka_buf_read_ptr(rkbuf, &hdr.Headers.data, hdr.Headers.len); /* Create op/message container for message. */ rko = rd_kafka_op_new_fetch_msg(&rkm, rktp, msetr->msetr_tver->version, rkbuf, hdr.Offset, (size_t)RD_KAFKAP_BYTES_LEN(&hdr.Key), RD_KAFKAP_BYTES_IS_NULL(&hdr.Key) ? NULL : hdr.Key.data, (size_t)RD_KAFKAP_BYTES_LEN(&hdr.Value), RD_KAFKAP_BYTES_IS_NULL(&hdr.Value) ? NULL : hdr.Value.data); /* Store pointer to unparsed message headers, they will * be parsed on the first access. * This pointer points to the rkbuf payload. * Note: can't perform struct copy here due to const fields (MSVC) */ rkm->rkm_u.consumer.binhdrs.len = hdr.Headers.len; rkm->rkm_u.consumer.binhdrs.data = hdr.Headers.data; /* Set timestamp. * * When broker assigns the timestamps (LOG_APPEND_TIME) it will * assign the same timestamp for all messages in a MessageSet * using MaxTimestamp. */ if ((msetr->msetr_v2_hdr->Attributes & RD_KAFKA_MSG_ATTR_LOG_APPEND_TIME) || (hdr.MsgAttributes & RD_KAFKA_MSG_ATTR_LOG_APPEND_TIME)) { rkm->rkm_tstype = RD_KAFKA_TIMESTAMP_LOG_APPEND_TIME; rkm->rkm_timestamp = msetr->msetr_v2_hdr->MaxTimestamp; } else { rkm->rkm_tstype = RD_KAFKA_TIMESTAMP_CREATE_TIME; rkm->rkm_timestamp = msetr->msetr_v2_hdr->BaseTimestamp + hdr.TimestampDelta; } /* Enqueue message on temporary queue */ rd_kafka_q_enq(&msetr->msetr_rkq, rko); msetr->msetr_msgcnt++; msetr->msetr_msg_bytes += rkm->rkm_key_len + rkm->rkm_len; return RD_KAFKA_RESP_ERR_NO_ERROR; err_parse: /* Count all parse errors as partial message errors. */ rd_atomic64_add(&msetr->msetr_rkb->rkb_c.rx_partial, 1); return rkbuf->rkbuf_err; }