Ejemplo n.º 1
0
/**
 * Move 'cnt' entries from 'srcq' to 'dstq'.
 * If 'cnt' == -1 all entries will be moved.
 * Returns the number of entries moved.
 */
int rd_kafka_q_move_cnt (rd_kafka_q_t *dstq, rd_kafka_q_t *srcq,
			    int cnt, int do_locks) {
	rd_kafka_op_t *rko;
        int mcnt = 0;

        if (do_locks) {
		mtx_lock(&srcq->rkq_lock);
		mtx_lock(&dstq->rkq_lock);
	}

	if (!dstq->rkq_fwdq && !srcq->rkq_fwdq) {
		if (cnt > 0 && dstq->rkq_qlen == 0)
			rd_kafka_q_io_event(dstq);

		/* Optimization, if 'cnt' is equal/larger than all
		 * items of 'srcq' we can move the entire queue. */
		if (cnt == -1 ||
                    cnt >= (int)srcq->rkq_qlen) {
                        rd_dassert(TAILQ_EMPTY(&srcq->rkq_q) ||
                                   srcq->rkq_qlen > 0);
			TAILQ_CONCAT(&dstq->rkq_q, &srcq->rkq_q, rko_link);
			mcnt = srcq->rkq_qlen;
                        dstq->rkq_qlen += srcq->rkq_qlen;
                        dstq->rkq_qsize += srcq->rkq_qsize;
			rd_kafka_q_reset(srcq);
		} else {
			while (mcnt < cnt &&
			       (rko = TAILQ_FIRST(&srcq->rkq_q))) {
				TAILQ_REMOVE(&srcq->rkq_q, rko, rko_link);
				TAILQ_INSERT_TAIL(&dstq->rkq_q, rko, rko_link);
                                srcq->rkq_qlen--;
                                dstq->rkq_qlen++;
                                srcq->rkq_qsize -= rko->rko_len;
                                dstq->rkq_qsize += rko->rko_len;
				mcnt++;
			}
		}
	} else
		mcnt = rd_kafka_q_move_cnt(dstq->rkq_fwdq ? dstq->rkq_fwdq:dstq,
					   srcq->rkq_fwdq ? srcq->rkq_fwdq:srcq,
					   cnt, do_locks);

	if (do_locks) {
		mtx_unlock(&dstq->rkq_lock);
		mtx_unlock(&srcq->rkq_lock);
	}

	return mcnt;
}
Ejemplo n.º 2
0
/**
 * Set/clear forward queue.
 * Queue forwarding enables message routing inside rdkafka.
 * Typical use is to re-route all fetched messages for all partitions
 * to one single queue.
 *
 * All access to rkq_fwdq are protected by rkq_lock.
 */
void rd_kafka_q_fwd_set0 (rd_kafka_q_t *srcq, rd_kafka_q_t *destq,
                          int do_lock) {

        if (do_lock)
                mtx_lock(&srcq->rkq_lock);
	if (srcq->rkq_fwdq) {
		rd_kafka_q_destroy(srcq->rkq_fwdq);
		srcq->rkq_fwdq = NULL;
	}
	if (destq) {
		rd_kafka_q_keep(destq);

		/* If rkq has ops in queue, append them to fwdq's queue.
		 * This is an irreversible operation. */
                if (srcq->rkq_qlen > 0) {
			rd_dassert(destq->rkq_flags & RD_KAFKA_Q_F_READY);
			rd_kafka_q_concat(destq, srcq);
		}

		srcq->rkq_fwdq = destq;
	}
        if (do_lock)
                mtx_unlock(&srcq->rkq_lock);
}
Ejemplo n.º 3
0
/**
 * @brief Build client-final-message
 * @returns -1 on error.
 */
static int
rd_kafka_sasl_scram_build_client_final_message (
        rd_kafka_transport_t *rktrans,
        const rd_chariov_t *salt,
        const char *server_nonce,
        const rd_chariov_t *server_first_msg,
        int itcnt, rd_chariov_t *out) {
        struct rd_kafka_sasl_scram_state *state = rktrans->rktrans_sasl.state;
        const rd_kafka_conf_t *conf = &rktrans->rktrans_rkb->rkb_rk->rk_conf;
        rd_chariov_t SaslPassword =
                { .ptr = conf->sasl.password,
                  .size = strlen(conf->sasl.password) };
        rd_chariov_t SaltedPassword =
                { .ptr = rd_alloca(EVP_MAX_MD_SIZE) };
        rd_chariov_t ClientKey =
                { .ptr = rd_alloca(EVP_MAX_MD_SIZE) };
        rd_chariov_t ServerKey =
                { .ptr = rd_alloca(EVP_MAX_MD_SIZE) };
        rd_chariov_t StoredKey =
                { .ptr = rd_alloca(EVP_MAX_MD_SIZE) };
        rd_chariov_t AuthMessage = RD_ZERO_INIT;
        rd_chariov_t ClientSignature =
                { .ptr = rd_alloca(EVP_MAX_MD_SIZE) };
        rd_chariov_t ServerSignature =
                { .ptr = rd_alloca(EVP_MAX_MD_SIZE) };
        const rd_chariov_t ClientKeyVerbatim =
                { .ptr = "Client Key", .size = 10 };
        const rd_chariov_t ServerKeyVerbatim =
                { .ptr = "Server Key", .size = 10 };
        rd_chariov_t ClientProof =
                { .ptr = rd_alloca(EVP_MAX_MD_SIZE) };
        rd_chariov_t client_final_msg_wo_proof;
        char *ClientProofB64;
        int i;

        /* Constructing the ClientProof attribute (p):
         *
         * p = Base64-encoded ClientProof
         * SaltedPassword  := Hi(Normalize(password), salt, i)
         * ClientKey       := HMAC(SaltedPassword, "Client Key")
         * StoredKey       := H(ClientKey)
         * AuthMessage     := client-first-message-bare + "," +
         *                    server-first-message + "," +
         *                    client-final-message-without-proof
         * ClientSignature := HMAC(StoredKey, AuthMessage)
         * ClientProof     := ClientKey XOR ClientSignature
         * ServerKey       := HMAC(SaltedPassword, "Server Key")
         * ServerSignature := HMAC(ServerKey, AuthMessage)
         */

        /* SaltedPassword  := Hi(Normalize(password), salt, i) */
        if (rd_kafka_sasl_scram_Hi(
                    rktrans, &SaslPassword, salt,
                    itcnt, &SaltedPassword) == -1)
                return -1;

        /* ClientKey       := HMAC(SaltedPassword, "Client Key") */
        if (rd_kafka_sasl_scram_HMAC(
                    rktrans, &SaltedPassword, &ClientKeyVerbatim,
                    &ClientKey) == -1)
                return -1;

        /* StoredKey       := H(ClientKey) */
        if (rd_kafka_sasl_scram_H(rktrans, &ClientKey, &StoredKey) == -1)
                return -1;

        /* client-final-message-without-proof */
        rd_kafka_sasl_scram_build_client_final_message_wo_proof(
                state, server_nonce, &client_final_msg_wo_proof);

        /* AuthMessage     := client-first-message-bare + "," +
         *                    server-first-message + "," +
         *                    client-final-message-without-proof */
        AuthMessage.size =
                state->first_msg_bare.size + 1 +
                server_first_msg->size + 1 +
                client_final_msg_wo_proof.size;
        AuthMessage.ptr = rd_alloca(AuthMessage.size+1);
        rd_snprintf(AuthMessage.ptr, AuthMessage.size+1,
                    "%.*s,%.*s,%.*s",
                    (int)state->first_msg_bare.size, state->first_msg_bare.ptr,
                    (int)server_first_msg->size, server_first_msg->ptr,
                    (int)client_final_msg_wo_proof.size,
                    client_final_msg_wo_proof.ptr);

        /*
         * Calculate ServerSignature for later verification when
         * server-final-message is received.
         */

        /* ServerKey       := HMAC(SaltedPassword, "Server Key") */
        if (rd_kafka_sasl_scram_HMAC(
                    rktrans, &SaltedPassword, &ServerKeyVerbatim,
                    &ServerKey) == -1) {
                rd_free(client_final_msg_wo_proof.ptr);
                return -1;
        }

        /* ServerSignature := HMAC(ServerKey, AuthMessage) */
        if (rd_kafka_sasl_scram_HMAC(rktrans, &ServerKey,
                                     &AuthMessage, &ServerSignature) == -1) {
                rd_free(client_final_msg_wo_proof.ptr);
                return -1;
        }

        /* Store the Base64 encoded ServerSignature for quick comparison */
        state->ServerSignatureB64 = rd_base64_encode(&ServerSignature);


        /*
         * Continue with client-final-message
         */

        /* ClientSignature := HMAC(StoredKey, AuthMessage) */
        if (rd_kafka_sasl_scram_HMAC(rktrans, &StoredKey,
                                     &AuthMessage, &ClientSignature) == -1) {
                rd_free(client_final_msg_wo_proof.ptr);
                return -1;
        }

        /* ClientProof     := ClientKey XOR ClientSignature */
        assert(ClientKey.size == ClientSignature.size);
        for (i = 0 ; i < (int)ClientKey.size ; i++)
                ClientProof.ptr[i] = ClientKey.ptr[i] ^ ClientSignature.ptr[i];
        ClientProof.size = ClientKey.size;


        /* Base64 encoded ClientProof */
        ClientProofB64 = rd_base64_encode(&ClientProof);

        /* Construct client-final-message */
        out->size = client_final_msg_wo_proof.size +
                strlen(",p=") + strlen(ClientProofB64);
        out->ptr = rd_malloc(out->size + 1);

        rd_snprintf(out->ptr, out->size+1,
                    "%.*s,p=%s",
                    (int)client_final_msg_wo_proof.size,
                    client_final_msg_wo_proof.ptr,
                    ClientProofB64);
        rd_free(ClientProofB64);
        rd_free(client_final_msg_wo_proof.ptr);

        return 0;
}


/**
 * @brief Handle first message from server
 *
 * Parse server response which looks something like:
 * "r=fyko+d2lbbFgONR....,s=QSXCR+Q6sek8bf92,i=4096"
 *
 * @returns -1 on error.
 */
static int
rd_kafka_sasl_scram_handle_server_first_message (rd_kafka_transport_t *rktrans,
                                                 const rd_chariov_t *in,
                                                 rd_chariov_t *out,
                                                 char *errstr,
                                                 size_t errstr_size) {
        struct rd_kafka_sasl_scram_state *state = rktrans->rktrans_sasl.state;
        char *server_nonce;
        rd_chariov_t salt_b64, salt;
        char *itcntstr;
        const char *endptr;
        int itcnt;
        char *attr_m;

        /* Mandatory future extension check */
        if ((attr_m = rd_kafka_sasl_scram_get_attr(
                     in, 'm', NULL, NULL, 0))) {
                rd_snprintf(errstr, errstr_size,
                            "Unsupported mandatory SCRAM extension");
                rd_free(attr_m);
                return -1;
        }

        /* Server nonce */
        if (!(server_nonce = rd_kafka_sasl_scram_get_attr(
                      in, 'r',
                      "Server nonce in server-first-message",
                      errstr, errstr_size)))
                return -1;

        if (strlen(server_nonce) <= state->cnonce.size ||
            strncmp(state->cnonce.ptr, server_nonce, state->cnonce.size)) {
                rd_snprintf(errstr, errstr_size,
                            "Server/client nonce mismatch in "
                            "server-first-message");
                rd_free(server_nonce);
                return -1;
        }

        /* Salt (Base64) */
        if (!(salt_b64.ptr = rd_kafka_sasl_scram_get_attr(
                      in, 's',
                      "Salt in server-first-message",
                      errstr, errstr_size))) {
                rd_free(server_nonce);
                return -1;
        }
        salt_b64.size = strlen(salt_b64.ptr);

        /* Convert Salt to binary */
        if (rd_base64_decode(&salt_b64, &salt) == -1) {
                rd_snprintf(errstr, errstr_size,
                            "Invalid Base64 Salt in server-first-message");
                rd_free(server_nonce);
                rd_free(salt_b64.ptr);
        }
        rd_free(salt_b64.ptr);

        /* Iteration count (as string) */
        if (!(itcntstr = rd_kafka_sasl_scram_get_attr(
                      in, 'i',
                      "Iteration count in server-first-message",
                      errstr, errstr_size))) {
                rd_free(server_nonce);
                rd_free(salt.ptr);
                return -1;
        }

        /* Iteration count (as int) */
        errno = 0;
        itcnt = (int)strtoul(itcntstr, (char **)&endptr, 10);
        if (itcntstr == endptr || *endptr != '\0' || errno != 0 ||
            itcnt > 1000000) {
                rd_snprintf(errstr, errstr_size,
                            "Invalid value (not integer or too large) "
                            "for Iteration count in server-first-message");
                rd_free(server_nonce);
                rd_free(salt.ptr);
                rd_free(itcntstr);
                return -1;
        }
        rd_free(itcntstr);

        /* Build client-final-message */
        if (rd_kafka_sasl_scram_build_client_final_message(
                    rktrans, &salt, server_nonce, in, itcnt, out) == -1) {
                rd_snprintf(errstr, errstr_size,
                            "Failed to build SCRAM client-final-message");
                rd_free(salt.ptr);
                rd_free(server_nonce);
                return -1;
        }

        rd_free(server_nonce);
        rd_free(salt.ptr);

        return 0;
}

/**
 * @brief Handle server-final-message
 * 
 *        This is the end of authentication and the SCRAM state
 *        will be freed at the end of this function regardless of
 *        authentication outcome.
 *
 * @returns -1 on failure
 */
static int
rd_kafka_sasl_scram_handle_server_final_message (
        rd_kafka_transport_t *rktrans,
        const rd_chariov_t *in,
        char *errstr, size_t errstr_size) {
        struct rd_kafka_sasl_scram_state *state = rktrans->rktrans_sasl.state;
        char *attr_v, *attr_e;

        if ((attr_e = rd_kafka_sasl_scram_get_attr(
                            in, 'e', "server-error in server-final-message",
                            errstr, errstr_size))) {
                /* Authentication failed */

                rd_snprintf(errstr, errstr_size,
                            "SASL SCRAM authentication failed: "
                            "broker responded with %s",
                            attr_e);
                rd_free(attr_e);
                return -1;

        } else if ((attr_v = rd_kafka_sasl_scram_get_attr(
                     in, 'v', "verifier in server-final-message",
                     errstr, errstr_size))) {
                const rd_kafka_conf_t *conf;

                /* Authentication succesful on server,
                 * but we need to verify the ServerSignature too. */
                rd_rkb_dbg(rktrans->rktrans_rkb, SECURITY | RD_KAFKA_DBG_BROKER,
                           "SCRAMAUTH",
                           "SASL SCRAM authentication succesful on server: "
                           "verifying ServerSignature");

                if (strcmp(attr_v, state->ServerSignatureB64)) {
                        rd_snprintf(errstr, errstr_size,
                                    "SASL SCRAM authentication failed: "
                                    "ServerSignature mismatch "
                                    "(server's %s != ours %s)",
                                    attr_v, state->ServerSignatureB64);
                        rd_free(attr_v);
                        return -1;
                }
                rd_free(attr_v);

                conf = &rktrans->rktrans_rkb->rkb_rk->rk_conf;

                rd_rkb_dbg(rktrans->rktrans_rkb, SECURITY | RD_KAFKA_DBG_BROKER,
                           "SCRAMAUTH",
                           "Authenticated as %s using %s",
                           conf->sasl.username,
                           conf->sasl.mechanisms);

                rd_kafka_sasl_auth_done(rktrans);
                return 0;

        } else {
                rd_snprintf(errstr, errstr_size,
                            "SASL SCRAM authentication failed: "
                            "no verifier or server-error returned from broker");
                return -1;
        }
}



/**
 * @brief Build client-first-message
 */
static void
rd_kafka_sasl_scram_build_client_first_message (
        rd_kafka_transport_t *rktrans,
        rd_chariov_t *out) {
        char *sasl_username;
        struct rd_kafka_sasl_scram_state *state = rktrans->rktrans_sasl.state;
        const rd_kafka_conf_t *conf = &rktrans->rktrans_rkb->rkb_rk->rk_conf;

        rd_kafka_sasl_scram_generate_nonce(&state->cnonce);

        sasl_username = rd_kafka_sasl_safe_string(conf->sasl.username);

        out->size = strlen("n,,n=,r=") + strlen(sasl_username) +
                state->cnonce.size;
        out->ptr = rd_malloc(out->size+1);

        rd_snprintf(out->ptr, out->size+1,
                    "n,,n=%s,r=%.*s",
                    sasl_username,
                    (int)state->cnonce.size, state->cnonce.ptr);
        rd_free(sasl_username);

        /* Save client-first-message-bare (skip gs2-header) */
        state->first_msg_bare.size = out->size-3;
        state->first_msg_bare.ptr  = rd_memdup(out->ptr+3,
                                               state->first_msg_bare.size);
}



/**
 * @brief SASL SCRAM client state machine
 * @returns -1 on failure (errstr set), else 0.
 */
static int rd_kafka_sasl_scram_fsm (rd_kafka_transport_t *rktrans,
                                    const rd_chariov_t *in,
                                    char *errstr, size_t errstr_size) {
        static const char *state_names[] = {
                "client-first-message",
                "server-first-message",
                "client-final-message",
        };
        struct rd_kafka_sasl_scram_state *state = rktrans->rktrans_sasl.state;
        rd_chariov_t out = RD_ZERO_INIT;
        int r = -1;
        rd_ts_t ts_start = rd_clock();
        int prev_state = state->state;

        rd_rkb_dbg(rktrans->rktrans_rkb, SECURITY, "SASLSCRAM",
                   "SASL SCRAM client in state %s",
                   state_names[state->state]);

        switch (state->state)
        {
        case RD_KAFKA_SASL_SCRAM_STATE_CLIENT_FIRST_MESSAGE:
                rd_dassert(!in); /* Not expecting any server-input */

                rd_kafka_sasl_scram_build_client_first_message(rktrans, &out);
                state->state = RD_KAFKA_SASL_SCRAM_STATE_SERVER_FIRST_MESSAGE;
                break;


        case RD_KAFKA_SASL_SCRAM_STATE_SERVER_FIRST_MESSAGE:
                rd_dassert(in); /* Requires server-input */

                if (rd_kafka_sasl_scram_handle_server_first_message(
                             rktrans, in, &out, errstr, errstr_size) == -1)
                        return -1;

                state->state = RD_KAFKA_SASL_SCRAM_STATE_CLIENT_FINAL_MESSAGE;
                break;

        case RD_KAFKA_SASL_SCRAM_STATE_CLIENT_FINAL_MESSAGE:
                rd_dassert(in);  /* Requires server-input */

                r = rd_kafka_sasl_scram_handle_server_final_message(
                        rktrans, in, errstr, errstr_size);
                break;
        }

        if (out.ptr) {
                r = rd_kafka_sasl_send(rktrans, out.ptr, (int)out.size,
                                       errstr, errstr_size);
                rd_free(out.ptr);
        }

        ts_start = (rd_clock() - ts_start) / 1000;
        if (ts_start >= 100)
                rd_rkb_dbg(rktrans->rktrans_rkb, SECURITY, "SCRAM",
                           "SASL SCRAM state %s handled in %"PRId64"ms",
                           state_names[prev_state], ts_start);


        return r;
}


/**
 * @brief Handle received frame from broker.
 */
static int rd_kafka_sasl_scram_recv (rd_kafka_transport_t *rktrans,
                                     const void *buf, size_t size,
                                     char *errstr, size_t errstr_size) {
        const rd_chariov_t in = { .ptr = (char *)buf, .size = size };
        return rd_kafka_sasl_scram_fsm(rktrans, &in, errstr, errstr_size);
}


/**
 * @brief Initialize and start SASL SCRAM (builtin) authentication.
 *
 * Returns 0 on successful init and -1 on error.
 *
 * @locality broker thread
 */
static int rd_kafka_sasl_scram_client_new (rd_kafka_transport_t *rktrans,
                                    const char *hostname,
                                    char *errstr, size_t errstr_size) {
        struct rd_kafka_sasl_scram_state *state;

        state = rd_calloc(1, sizeof(*state));
        state->state = RD_KAFKA_SASL_SCRAM_STATE_CLIENT_FIRST_MESSAGE;
        rktrans->rktrans_sasl.state = state;

        /* Kick off the FSM */
        return rd_kafka_sasl_scram_fsm(rktrans, NULL, errstr, errstr_size);
}



/**
 * @brief Validate SCRAM config and look up the hash function
 */
static int rd_kafka_sasl_scram_conf_validate (rd_kafka_t *rk,
                                              char *errstr,
                                              size_t errstr_size) {
        const char *mech = rk->rk_conf.sasl.mechanisms;

        if (!rk->rk_conf.sasl.username || !rk->rk_conf.sasl.password) {
                rd_snprintf(errstr, errstr_size,
                            "sasl.username and sasl.password must be set");
                return -1;
        }

        if (!strcmp(mech, "SCRAM-SHA-1")) {
                rk->rk_conf.sasl.scram_evp = EVP_sha1();
                rk->rk_conf.sasl.scram_H = SHA1;
                rk->rk_conf.sasl.scram_H_size = SHA_DIGEST_LENGTH;
        } else if (!strcmp(mech, "SCRAM-SHA-256")) {
                rk->rk_conf.sasl.scram_evp = EVP_sha256();
                rk->rk_conf.sasl.scram_H = SHA256;
                rk->rk_conf.sasl.scram_H_size = SHA256_DIGEST_LENGTH;
        } else if (!strcmp(mech, "SCRAM-SHA-512")) {
                rk->rk_conf.sasl.scram_evp = EVP_sha512();
                rk->rk_conf.sasl.scram_H = SHA512;
                rk->rk_conf.sasl.scram_H_size = SHA512_DIGEST_LENGTH;
        } else {
                rd_snprintf(errstr, errstr_size,
                            "Unsupported hash function: %s "
                            "(try SCRAM-SHA-512)",
                            mech);
                return -1;
        }

        return 0;
}




const struct rd_kafka_sasl_provider rd_kafka_sasl_scram_provider = {
        .name          = "SCRAM (builtin)",
        .client_new    = rd_kafka_sasl_scram_client_new,
        .recv          = rd_kafka_sasl_scram_recv,
        .close         = rd_kafka_sasl_scram_close,
        .conf_validate = rd_kafka_sasl_scram_conf_validate,
};
/**
 * @brief Decompress MessageSet, pass the uncompressed MessageSet to
 *        the MessageSet reader.
 */
static rd_kafka_resp_err_t
rd_kafka_msgset_reader_decompress (rd_kafka_msgset_reader_t *msetr,
                                   int MsgVersion, int Attributes,
                                   int64_t Timestamp, int64_t Offset,
                                   const void *compressed,
                                   size_t compressed_size) {
        struct iovec iov = { .iov_base = NULL, .iov_len = 0 };
        rd_kafka_toppar_t *rktp = msetr->msetr_rktp;
        int codec = Attributes & RD_KAFKA_MSG_ATTR_COMPRESSION_MASK;
        rd_kafka_resp_err_t err = RD_KAFKA_RESP_ERR_NO_ERROR;
        rd_kafka_buf_t *rkbufz;

        switch (codec)
        {
#if WITH_ZLIB
        case RD_KAFKA_COMPRESSION_GZIP:
        {
                uint64_t outlenx = 0;

                /* Decompress Message payload */
                iov.iov_base = rd_gz_decompress(compressed, (int)compressed_size,
                                                &outlenx);
                if (unlikely(!iov.iov_base)) {
                        rd_rkb_dbg(msetr->msetr_rkb, MSG, "GZIP",
                                   "Failed to decompress Gzip "
                                   "message at offset %"PRId64
                                   " of %"PRIusz" bytes: "
                                   "ignoring message",
                                   Offset, compressed_size);
                        err = RD_KAFKA_RESP_ERR__BAD_COMPRESSION;
                        goto err;
                }

                iov.iov_len = (size_t)outlenx;
        }
        break;
#endif

#if WITH_SNAPPY
        case RD_KAFKA_COMPRESSION_SNAPPY:
        {
                const char *inbuf = compressed;
                size_t inlen = compressed_size;
                int r;
                static const unsigned char snappy_java_magic[] =
                        { 0x82, 'S','N','A','P','P','Y', 0 };
                static const size_t snappy_java_hdrlen = 8+4+4;

                /* snappy-java adds its own header (SnappyCodec)
                 * which is not compatible with the official Snappy
                 * implementation.
                 *   8: magic, 4: version, 4: compatible
                 * followed by any number of chunks:
                 *   4: length
                 * ...: snappy-compressed data. */
                if (likely(inlen > snappy_java_hdrlen + 4 &&
                           !memcmp(inbuf, snappy_java_magic, 8))) {
                        /* snappy-java framing */
                        char errstr[128];

                        inbuf  = inbuf + snappy_java_hdrlen;
                        inlen -= snappy_java_hdrlen;
                        iov.iov_base = rd_kafka_snappy_java_uncompress(
                                inbuf, inlen,
                                &iov.iov_len,
                                errstr, sizeof(errstr));

                        if (unlikely(!iov.iov_base)) {
                                rd_rkb_dbg(msetr->msetr_rkb, MSG, "SNAPPY",
                                           "%s [%"PRId32"]: "
                                           "Snappy decompression for message "
                                           "at offset %"PRId64" failed: %s: "
                                           "ignoring message",
                                           rktp->rktp_rkt->rkt_topic->str,
                                           rktp->rktp_partition,
                                           Offset, errstr);
                                err = RD_KAFKA_RESP_ERR__BAD_COMPRESSION;
                                goto err;
                        }


                } else {
                        /* No framing */

                        /* Acquire uncompressed length */
                        if (unlikely(!rd_kafka_snappy_uncompressed_length(
                                             inbuf, inlen, &iov.iov_len))) {
                                rd_rkb_dbg(msetr->msetr_rkb, MSG, "SNAPPY",
                                           "Failed to get length of Snappy "
                                           "compressed payload "
                                           "for message at offset %"PRId64
                                           " (%"PRIusz" bytes): "
                                           "ignoring message",
                                           Offset, inlen);
                                err = RD_KAFKA_RESP_ERR__BAD_COMPRESSION;
                                goto err;
                        }

                        /* Allocate output buffer for uncompressed data */
                        iov.iov_base = rd_malloc(iov.iov_len);
                        if (unlikely(!iov.iov_base)) {
                                rd_rkb_dbg(msetr->msetr_rkb, MSG, "SNAPPY",
                                           "Failed to allocate Snappy "
                                           "decompress buffer of size %"PRIusz
                                           "for message at offset %"PRId64
                                           " (%"PRIusz" bytes): %s: "
                                           "ignoring message",
                                           iov.iov_len, Offset, inlen,
                                           rd_strerror(errno));
                                err = RD_KAFKA_RESP_ERR__CRIT_SYS_RESOURCE;
                                goto err;
                        }

                        /* Uncompress to outbuf */
                        if (unlikely((r = rd_kafka_snappy_uncompress(
                                              inbuf, inlen, iov.iov_base)))) {
                                rd_rkb_dbg(msetr->msetr_rkb, MSG, "SNAPPY",
                                           "Failed to decompress Snappy "
                                           "payload for message at offset "
                                           "%"PRId64" (%"PRIusz" bytes): %s: "
                                           "ignoring message",
                                           Offset, inlen,
                                           rd_strerror(-r/*negative errno*/));
                                rd_free(iov.iov_base);
                                err = RD_KAFKA_RESP_ERR__BAD_COMPRESSION;
                                goto err;
                        }
                }

        }
        break;
#endif

        case RD_KAFKA_COMPRESSION_LZ4:
        {
                err = rd_kafka_lz4_decompress(msetr->msetr_rkb,
                                              /* Proper HC? */
                                              MsgVersion >= 1 ? 1 : 0,
                                              Offset,
                                              /* @warning Will modify compressed
                                               *          if no proper HC */
                                              (char *)compressed,
                                              compressed_size,
                                              &iov.iov_base, &iov.iov_len);
                if (err)
                        goto err;
        }
        break;

        default:
                rd_rkb_dbg(msetr->msetr_rkb, MSG, "CODEC",
                           "%s [%"PRId32"]: Message at offset %"PRId64
                           " with unsupported "
                           "compression codec 0x%x: message ignored",
                           rktp->rktp_rkt->rkt_topic->str,
                           rktp->rktp_partition,
                           Offset, (int)codec);

                err = RD_KAFKA_RESP_ERR__NOT_IMPLEMENTED;
                goto err;
        }


        rd_assert(iov.iov_base);

        /*
         * Decompression successful
         */

        /* Create a new buffer pointing to the uncompressed
         * allocated buffer (outbuf) and let messages keep a reference to
         * this new buffer. */
        rkbufz = rd_kafka_buf_new_shadow(iov.iov_base, iov.iov_len, rd_free);
        rkbufz->rkbuf_rkb = msetr->msetr_rkbuf->rkbuf_rkb;
        rd_kafka_broker_keep(rkbufz->rkbuf_rkb);


        /* In MsgVersion v0..1 the decompressed data contains
         * an inner MessageSet, pass it to a new MessageSet reader.
         *
         * For MsgVersion v2 the decompressed data are the list of messages.
         */

        if (MsgVersion <= 1) {
                /* Pass decompressed data (inner Messageset)
                 * to new instance of the MessageSet parser. */
                rd_kafka_msgset_reader_t inner_msetr;
                rd_kafka_msgset_reader_init(&inner_msetr,
                                            rkbufz,
                                            msetr->msetr_rktp,
                                            msetr->msetr_tver,
                                            &msetr->msetr_rkq);

                if (MsgVersion == 1) {
                        /* postproc() will convert relative to
                         * absolute offsets */
                        inner_msetr.msetr_relative_offsets = 1;
                        inner_msetr.msetr_outer.offset = Offset;

                        /* Apply single LogAppendTime timestamp for
                         * all messages. */
                        if (Attributes & RD_KAFKA_MSG_ATTR_LOG_APPEND_TIME) {
                                inner_msetr.msetr_outer.tstype =
                                        RD_KAFKA_TIMESTAMP_LOG_APPEND_TIME;
                                inner_msetr.msetr_outer.timestamp = Timestamp;
                        }
                }

                /* Parse the inner MessageSet */
                err = rd_kafka_msgset_reader_run(&inner_msetr);


        } else {
                /* MsgVersion 2 */
                rd_kafka_buf_t *orig_rkbuf = msetr->msetr_rkbuf;

                /* Temporarily replace read buffer with uncompressed buffer */
                msetr->msetr_rkbuf = rkbufz;

                /* Read messages */
                err = rd_kafka_msgset_reader_msgs_v2(msetr);

                /* Restore original buffer */
                msetr->msetr_rkbuf = orig_rkbuf;
        }

        /* Loose our refcnt of the uncompressed rkbuf.
         * Individual messages/rko's will have their own reference. */
        rd_kafka_buf_destroy(rkbufz);

        return err;

 err:
        /* Enqueue error messsage:
         * Create op and push on temporary queue. */
        rd_kafka_q_op_err(&msetr->msetr_rkq, RD_KAFKA_OP_CONSUMER_ERR,
                          err, msetr->msetr_tver->version, rktp, Offset,
                          "Decompression (codec 0x%x) of message at %"PRIu64
                          " of %"PRIu64" bytes failed: %s",
                          codec, Offset, compressed_size, rd_kafka_err2str(err));

        return err;

}



/**
 * @brief Message parser for MsgVersion v0..1
 *
 * @returns RD_KAFKA_RESP_ERR_NO_ERROR on success or on single-message errors,
 *          or any other error code when the MessageSet parser should stop
 *          parsing (such as for partial Messages).
 */
static rd_kafka_resp_err_t
rd_kafka_msgset_reader_msg_v0_1 (rd_kafka_msgset_reader_t *msetr) {
        rd_kafka_buf_t *rkbuf = msetr->msetr_rkbuf;
        rd_kafka_toppar_t *rktp = msetr->msetr_rktp;
        rd_kafka_broker_t *rkb = msetr->msetr_rkb;
        struct {
                int64_t Offset;       /* MessageSet header */
                int32_t MessageSize;  /* MessageSet header */
                uint32_t Crc;
                int8_t  MagicByte;  /* MsgVersion */
                int8_t  Attributes;
                int64_t Timestamp;  /* v1 */
        } hdr; /* Message header */
        rd_kafkap_bytes_t Key;
        rd_kafkap_bytes_t Value;
        int32_t Value_len;
        rd_kafka_op_t *rko;
        size_t hdrsize = 6; /* Header size following MessageSize */
        rd_slice_t crc_slice;
        rd_kafka_msg_t *rkm;
        int relative_offsets = 0;
        const char *reloff_str = "";
        /* Only log decoding errors if protocol debugging enabled. */
        int log_decode_errors = (rkbuf->rkbuf_rkb->rkb_rk->rk_conf.debug &
                                 RD_KAFKA_DBG_PROTOCOL) ? LOG_DEBUG : 0;
        size_t message_end;

        rd_kafka_buf_read_i64(rkbuf, &hdr.Offset);
        rd_kafka_buf_read_i32(rkbuf, &hdr.MessageSize);
        message_end = rd_slice_offset(&rkbuf->rkbuf_reader) + hdr.MessageSize;

        rd_kafka_buf_read_i32(rkbuf, &hdr.Crc);
        if (!rd_slice_narrow_copy_relative(&rkbuf->rkbuf_reader, &crc_slice,
                                           hdr.MessageSize - 4))
                rd_kafka_buf_check_len(rkbuf, hdr.MessageSize - 4);

        rd_kafka_buf_read_i8(rkbuf, &hdr.MagicByte);
        rd_kafka_buf_read_i8(rkbuf, &hdr.Attributes);

        if (hdr.MagicByte == 1) { /* MsgVersion */
                rd_kafka_buf_read_i64(rkbuf, &hdr.Timestamp);
                hdrsize += 8;
                /* MsgVersion 1 has relative offsets for compressed MessageSets*/
                if (!(hdr.Attributes & RD_KAFKA_MSG_ATTR_COMPRESSION_MASK) &&
                    msetr->msetr_relative_offsets) {
                        relative_offsets = 1;
                        reloff_str = "relative ";
                }
        } else
                hdr.Timestamp = 0;

        /* Verify MessageSize */
        if (unlikely(hdr.MessageSize < (ssize_t)hdrsize))
                rd_kafka_buf_parse_fail(rkbuf,
                                        "Message at %soffset %"PRId64
                                        " MessageSize %"PRId32
                                        " < hdrsize %"PRIusz,
                                        reloff_str,
                                        hdr.Offset, hdr.MessageSize, hdrsize);

        /* Early check for partial messages */
        rd_kafka_buf_check_len(rkbuf, hdr.MessageSize - hdrsize);

        if (rkb->rkb_rk->rk_conf.check_crcs) {
                /* Verify CRC32 if desired. */
                uint32_t calc_crc;

                calc_crc = rd_slice_crc32(&crc_slice);
                rd_dassert(rd_slice_remains(&crc_slice) == 0);

                if (unlikely(hdr.Crc != calc_crc)) {
                        /* Propagate CRC error to application and
                         * continue with next message. */
                        rd_kafka_q_op_err(&msetr->msetr_rkq,
                                          RD_KAFKA_OP_CONSUMER_ERR,
                                          RD_KAFKA_RESP_ERR__BAD_MSG,
                                          msetr->msetr_tver->version,
                                          rktp,
                                          hdr.Offset,
                                          "Message at %soffset %"PRId64
                                          " (%"PRId32" bytes) "
                                          "failed CRC32 check "
                                          "(original 0x%"PRIx32" != "
                                          "calculated 0x%"PRIx32")",
                                          reloff_str, hdr.Offset,
                                          hdr.MessageSize, hdr.Crc, calc_crc);
                        rd_kafka_buf_skip_to(rkbuf, message_end);
                        rd_atomic64_add(&rkb->rkb_c.rx_err, 1);
                        /* Continue with next message */
                        return RD_KAFKA_RESP_ERR_NO_ERROR;
                }
        }


        /* Extract key */
        rd_kafka_buf_read_bytes(rkbuf, &Key);

        /* Extract Value */
        rd_kafka_buf_read_bytes(rkbuf, &Value);
        Value_len = RD_KAFKAP_BYTES_LEN(&Value);

        /* MessageSets may contain offsets earlier than we
         * requested (compressed MessageSets in particular),
         * drop the earlier messages.
         * Note: the inner offset may only be trusted for
         *       absolute offsets. KIP-31 introduced
         *       ApiVersion 2 that maintains relative offsets
         *       of compressed messages and the base offset
         *       in the outer message is the offset of
         *       the *LAST* message in the MessageSet.
         *       This requires us to assign offsets
         *       after all messages have been read from
         *       the messageset, and it also means
         *       we cant perform this offset check here
         *       in that case. */
        if (!relative_offsets &&
            hdr.Offset < rktp->rktp_offsets.fetch_offset)
                return RD_KAFKA_RESP_ERR_NO_ERROR; /* Continue with next msg */

        /* Handle compressed MessageSet */
        if (unlikely(hdr.Attributes & RD_KAFKA_MSG_ATTR_COMPRESSION_MASK))
                return rd_kafka_msgset_reader_decompress(
                        msetr, hdr.MagicByte, hdr.Attributes, hdr.Timestamp,
                        hdr.Offset, Value.data, Value_len);


        /* Pure uncompressed message, this is the innermost
         * handler after all compression and cascaded
         * MessageSets have been peeled off. */

        /* Create op/message container for message. */
        rko = rd_kafka_op_new_fetch_msg(&rkm, rktp, msetr->msetr_tver->version,
                                        rkbuf,
                                        hdr.Offset,
                                        (size_t)RD_KAFKAP_BYTES_LEN(&Key),
                                        RD_KAFKAP_BYTES_IS_NULL(&Key) ?
                                        NULL : Key.data,
                                        (size_t)RD_KAFKAP_BYTES_LEN(&Value),
                                        RD_KAFKAP_BYTES_IS_NULL(&Value) ?
                                        NULL : Value.data);

        /* Assign message timestamp.
         * If message was in a compressed MessageSet and the outer/wrapper
         * Message.Attribute had a LOG_APPEND_TIME set, use the
         * outer timestamp */
        if (msetr->msetr_outer.tstype == RD_KAFKA_TIMESTAMP_LOG_APPEND_TIME) {
                rkm->rkm_timestamp = msetr->msetr_outer.timestamp;
                rkm->rkm_tstype    = msetr->msetr_outer.tstype;

        } else if (hdr.MagicByte >= 1 && hdr.Timestamp) {
                rkm->rkm_timestamp = hdr.Timestamp;
                if (hdr.Attributes & RD_KAFKA_MSG_ATTR_LOG_APPEND_TIME)
                        rkm->rkm_tstype = RD_KAFKA_TIMESTAMP_LOG_APPEND_TIME;
                else
                        rkm->rkm_tstype = RD_KAFKA_TIMESTAMP_CREATE_TIME;
        }

        /* Enqueue message on temporary queue */
        rd_kafka_q_enq(&msetr->msetr_rkq, rko);
        msetr->msetr_msgcnt++;

        return RD_KAFKA_RESP_ERR_NO_ERROR; /* Continue */

 err_parse:
        /* Count all parse errors as partial message errors. */
        rd_atomic64_add(&msetr->msetr_rkb->rkb_c.rx_partial, 1);
        return rkbuf->rkbuf_err;
}
Ejemplo n.º 5
0
/**
 * Pop all available ops from a queue and call the provided 
 * callback for each op.
 * `max_cnt` limits the number of ops served, 0 = no limit.
 *
 * Returns the number of ops served.
 *
 * Locality: any thread.
 */
int rd_kafka_q_serve (rd_kafka_q_t *rkq, int timeout_ms,
                      int max_cnt, int cb_type,
                      int (*callback) (rd_kafka_t *rk, rd_kafka_op_t *rko,
                                       int cb_type, void *opaque),
                      void *opaque) {
        rd_kafka_t *rk = rkq->rkq_rk;
	rd_kafka_op_t *rko;
	rd_kafka_q_t localq;
        int cnt = 0;
        int handled = 0;

	mtx_lock(&rkq->rkq_lock);

        rd_dassert(TAILQ_EMPTY(&rkq->rkq_q) || rkq->rkq_qlen > 0);
	if (rkq->rkq_fwdq) {
                rd_kafka_q_t *fwdq = rkq->rkq_fwdq;
                int ret;
                rd_kafka_q_keep(fwdq);
                /* Since the q_pop may block we need to release the parent
                 * queue's lock. */
                mtx_unlock(&rkq->rkq_lock);
		ret = rd_kafka_q_serve(fwdq, timeout_ms, max_cnt,
                                       cb_type, callback, opaque);
                rd_kafka_q_destroy(fwdq);
		return ret;
	}

	if (timeout_ms == RD_POLL_INFINITE)
		timeout_ms = INT_MAX;

	/* Wait for op */
	while (!(rko = TAILQ_FIRST(&rkq->rkq_q)) && timeout_ms != 0) {
		if (cnd_timedwait_ms(&rkq->rkq_cond,
				     &rkq->rkq_lock,
				     timeout_ms) != thrd_success)
			break;

		timeout_ms = 0;
	}

	if (!rko) {
		mtx_unlock(&rkq->rkq_lock);
		return 0;
	}

	/* Move the first `max_cnt` ops. */
	rd_kafka_q_init(&localq, rkq->rkq_rk);
	rd_kafka_q_move_cnt(&localq, rkq, max_cnt == 0 ? -1/*all*/ : max_cnt,
			    0/*no-locks*/);

        mtx_unlock(&rkq->rkq_lock);

        rd_kafka_yield_thread = 0;

	/* Call callback for each op */
        while ((rko = TAILQ_FIRST(&localq.rkq_q))) {
		handled += callback(rk, rko, cb_type, opaque);
		rd_kafka_q_deq0(&localq, rko);
		rd_kafka_op_destroy(rko);
                cnt++;

                if (unlikely(rd_kafka_yield_thread)) {
                        /* Callback called rd_kafka_yield(), we must
                         * stop our callback dispatching and put the
                         * ops in localq back on the original queue head. */
                        if (!TAILQ_EMPTY(&localq.rkq_q))
                                rd_kafka_q_prepend(rkq, &localq);
                        break;
                }
	}

        /* Make sure no op was left unhandled. i.e.,
         * a consumer op ended up on the global queue. */
        rd_kafka_assert(NULL, handled == cnt);

	rd_kafka_q_destroy(&localq);

	return cnt;
}