Esempio n. 1
0
void
as_transaction_error(as_transaction* tr, uint32_t error_code)
{
	if (tr->proto_fd_h) {
		if (tr->batch_shared) {
			as_batch_add_error(tr->batch_shared, tr->batch_index, error_code);
			// Clear this transaction's msgp so calling code does not free it.
			tr->msgp = 0;
		}
		else {
			as_msg_send_reply(tr->proto_fd_h, error_code, 0, 0, NULL, NULL, 0, NULL, NULL, as_transaction_trid(tr), NULL);
			tr->proto_fd_h = 0;
			MICROBENCHMARK_HIST_INSERT_P(error_hist);
			cf_atomic_int_incr(&g_config.err_tsvc_requests);
			if (error_code == AS_PROTO_RESULT_FAIL_TIMEOUT) {
				cf_atomic_int_incr(&g_config.err_tsvc_requests_timeout);
			}
		}
	}
	else if (tr->proxy_msg) {
		as_proxy_send_response(tr->proxy_node, tr->proxy_msg, error_code, 0, 0, NULL, NULL, 0, NULL, as_transaction_trid(tr), NULL);
		tr->proxy_msg = NULL;
	}
	else if (tr->udata.req_udata) {
		if (udf_rw_needcomplete(tr)) {
			udf_rw_complete(tr, error_code, __FILE__,__LINE__);
		}
	}
}
Esempio n. 2
0
/*
 * The work horse function to process the acknowledgment for the duplicate op.
 * It is received after the intended node has finished performing the op. In
 * case of success the op would have been successfully performed and replicated.
 * In case of failure the op would not have been performed anywhere.
 *
 * The retransmit is handled by making sure op hangs from the write hash as long
 * as it is not applied or failed. Any attempt to perform next operation has to
 * hang behind it unless it is finished. Also operation is assigned a timestamp
 * so that there is some protection in case the op arrives out of order, or the
 * same op comes back again. That would be a duplicate op ...
 *
 * Received a op message - I'm a winner duplicate on this partition. Perform the
 * UDF op and replicate to all the nodes in the replica list. We only replicate
 * the subrecord if the partition is in subrecord migration phase. If not, ship
 * both subrecord and record. In case partition is read replica on this node, do
 * the write and signal back that I'm done.
 *
 * THUS - PROLE SIDE
 *
 * is_write is misnamed. Differentiates between the 'duplicate' phase and the
 *    'operation' phase. If is_write == false, we're in the 'duplicate' phase.
 *
 * Algorithm
 *
 * This code is called when op is shipped to the winner node.
 *
 * 1. Assert that current node is indeed the winner node.
 * 2. Assert the cluster key matches.
 * 3. Create a transaction and apply the UDF. Create an internal transaction and
 *    make sure it does some sort of reservation and applies the write and
 *    replicates to replica set. Once the write is done it sends the op ack.
 *
 *    TODO: How do you handle retransmits?
 *    TODO: How do you handle partition reservation? Is it something special.
 *    TODO: How to send along with replication request? Same infra should be
 *          used by normal replication as well.
 *
 *    There won't be any deadlock because the requests are triggered from the
 *    write. Get down to the udf apply code. Replicate to replica set and then
 *    make sure the response is sent back to the originating node. This node has
 *    to make sure the replication actually did succeed.
 *
 * In the response code you need to add the callback function.
 */
int
as_proxy_shipop_response_hdlr(msg *m, proxy_request *pr, bool *free_msg)
{
	int rv            = -1;
	write_request *wr = pr->wr;
	if (!wr) {
		return -1;
	}
	cf_assert((pr->fd_h == NULL), AS_PROXY, CF_WARNING, "fd_h set for shipop proxy response");

	// If there is a write request hanging from pr then this is a response to
	// the proxy ship op request. This node is the resolving node (node @ which
	// duplicate resolution was triggered). It could be:
	// 1. Originating node [where the request was sent from the client] - in
	//    that case send response back to the client directly.
	// 2. Non-originating node [where the request arrived as a regular proxy] -
	//    in that case send response back to the proxy originating node.

	// Case 1: Non-originating node.
	if (wr->proxy_msg) {
		// Remember that "digest" gets printed at the end of cf_detail_digest().
		// Fake the ORIGINATING Proxy tid
		uint32_t transaction_id = 0;
		msg_get_uint32(wr->proxy_msg, PROXY_FIELD_TID, &transaction_id);
		msg_set_uint32(m, PROXY_FIELD_TID, transaction_id);
		cf_detail_digest(AS_PROXY, &(wr->keyd), "SHIPPED_OP NON-ORIG :: Got Op Response(%p) :", wr);
		cf_detail_digest(AS_PROXY, &(wr->keyd), "SHIPPED_OP NON-ORIG :: Back Forwarding Response for tid (%d). : ", transaction_id);
		if (0 != (rv = as_fabric_send(wr->proxy_node, m, AS_FABRIC_PRIORITY_MEDIUM))) {
			cf_detail_digest(AS_PROXY, &wr->keyd, "SHIPPED_OP NONORIG Failed Forwarding Response");
			as_fabric_msg_put(m);
		}
		*free_msg = false;
	}
	// Case 2: Originating node.
	else {
		cf_detail_digest(AS_PROXY, &wr->keyd, "SHIPPED_OP ORIG Got Op Response");
		pthread_mutex_lock(&wr->lock);
		if (wr->proto_fd_h) {
			if (!wr->proto_fd_h->fd) {
				cf_warning_digest(AS_PROXY, &wr->keyd, "SHIPPED_OP ORIG Missing fd in proto_fd ");
			}
			else {
				as_proto *proto;
				size_t proto_sz;
				if (0 != msg_get_buf(m, PROXY_FIELD_AS_PROTO, (byte **) &proto, &proto_sz, MSG_GET_DIRECT)) {
					cf_info(AS_PROXY, "msg get buf failed!");
				}
				size_t pos = 0;
				while (pos < proto_sz) {
					rv = send(wr->proto_fd_h->fd, (((uint8_t *)proto) + pos), proto_sz - pos, MSG_NOSIGNAL);
					if (rv > 0) {
						pos += rv;
					}
					else if (rv < 0) {
						if (errno != EWOULDBLOCK) {
							// Common message when a client aborts.
							cf_debug(AS_PROTO, "protocol proxy write fail: fd %d "
									"sz %d pos %d rv %d errno %d",
									wr->proto_fd_h->fd, proto_sz, pos, rv, errno);
							shutdown(wr->proto_fd_h->fd, SHUT_RDWR);
							break;
						}
						usleep(1); // yield
					}
					else {
						cf_info(AS_PROTO, "protocol write fail zero return: fd %d sz %d pos %d ",
								wr->proto_fd_h->fd, proto_sz, pos);
						shutdown(wr->proto_fd_h->fd, SHUT_RDWR);
						break;
					}
				}
				cf_detail_digest(AS_PROXY, &wr->keyd, "SHIPPED_OP ORIG Response Sent to Client");
			}
			wr->proto_fd_h->t_inprogress = false;
			AS_RELEASE_FILE_HANDLE(wr->proto_fd_h);
			wr->proto_fd_h = 0;
		} else {
			// this may be NULL if the request has already timedout and the wr proto_fd_h
			// will be cleaned up by then
			cf_detail_digest(AS_PROXY, &wr->keyd, "SHIPPED_OP ORIG Missing proto_fd ");

			// Note: This may be needed if this is node where internal scan or query
			// UDF is initiated where it happens so that there is migration is going
			// on and the request get routed to the remote node which is winning node
			// This request may need the req_cb to be called.
			if (udf_rw_needcomplete_wr(wr)) {
				as_transaction tr;
				write_request_init_tr(&tr, wr);
				udf_rw_complete(&tr, 0, __FILE__, __LINE__);
				if (tr.proto_fd_h) {
					tr.proto_fd_h->t_inprogress = false;
					AS_RELEASE_FILE_HANDLE(tr.proto_fd_h);
					tr.proto_fd_h = 0;
				}
			}
		}
		pthread_mutex_unlock(&wr->lock);
	}

	// This node is shipOp initiator. Remove it from the Global
	// hash
	global_keyd gk;
	gk.ns_id = wr->rsv.ns->id;
	gk.keyd = wr->keyd;
	g_write_hash_delete(&gk);

	WR_RELEASE(pr->wr);
	pr->wr = NULL;
	return 0;
}