Пример #1
0
transaction_status
as_read_start(as_transaction* tr)
{
	BENCHMARK_START(tr, read, FROM_CLIENT);
	BENCHMARK_START(tr, batch_sub, FROM_BATCH);

	if (! repl_ping_check(tr)) {
		send_read_response(tr, NULL, NULL, 0, NULL);
		return TRANS_DONE_ERROR;
	}

	transaction_status status;
	bool must_duplicate_resolve = read_must_duplicate_resolve(tr);
	bool must_ping = read_must_ping(tr);

	if (! must_duplicate_resolve && ! must_ping) {
		// No network hops needed, try reading.
		if ((status = read_local(tr)) != TRANS_IN_PROGRESS) {
			return status;
		}
		// else - must try again under hash.
	}
	// else - there are duplicates, and we're configured to resolve them, or
	// we're required to ping replicas.

	// Create rw_request and add to hash.
	rw_request_hkey hkey = { tr->rsv.ns->id, tr->keyd };
	rw_request* rw = rw_request_create(&tr->keyd);

	// If rw_request isn't inserted in hash, transaction is finished.
	if ((status = rw_request_hash_insert(&hkey, rw, tr)) != TRANS_IN_PROGRESS) {
		rw_request_release(rw);

		if (status != TRANS_WAITING) {
			send_read_response(tr, NULL, NULL, 0, NULL);
		}

		return status;
	}
	// else - rw_request is now in hash, continue...

	if (must_duplicate_resolve) {
		start_read_dup_res(rw, tr);

		// Started duplicate resolution.
		return TRANS_IN_PROGRESS;
	}

	if (must_ping) {
		// Set up the nodes to which we'll ping.
		rw->n_dest_nodes = as_partition_get_other_replicas(tr->rsv.p,
				rw->dest_nodes);

		if (insufficient_replica_destinations(tr->rsv.ns, rw->n_dest_nodes)) {
			rw_request_hash_delete(&hkey, rw);
			tr->result_code = AS_ERR_UNAVAILABLE;
			send_read_response(tr, NULL, NULL, 0, NULL);
			return TRANS_DONE_ERROR;
		}

		start_repl_ping(rw, tr);

		// Started replica ping.
		return TRANS_IN_PROGRESS;
	}

	// Trying again under hash.
	status = read_local(tr);
	cf_assert(status != TRANS_IN_PROGRESS, AS_RW, "read in-progress");
	rw_request_hash_delete(&hkey, rw);

	return status;
}
Пример #2
0
void
repl_write_handle_ack(cf_node node, msg* m)
{
	uint32_t ns_id;

	if (msg_get_uint32(m, RW_FIELD_NS_ID, &ns_id) != 0) {
		cf_warning(AS_RW, "repl-write ack: no ns-id");
		as_fabric_msg_put(m);
		return;
	}

	cf_digest* keyd;
	size_t sz;

	if (msg_get_buf(m, RW_FIELD_DIGEST, (uint8_t**)&keyd, &sz,
			MSG_GET_DIRECT) != 0) {
		cf_warning(AS_RW, "repl-write ack: no digest");
		as_fabric_msg_put(m);
		return;
	}

	uint32_t tid;

	if (msg_get_uint32(m, RW_FIELD_TID, &tid) != 0) {
		cf_warning(AS_RW, "repl-write ack: no tid");
		as_fabric_msg_put(m);
		return;
	}

	// TODO - result_code is currently ignored! What should we do with it?
	// Note - CLUSTER_KEY_MISMATCH not special, can't re-queue transaction.
	uint32_t result_code;

	if (msg_get_uint32(m, RW_FIELD_RESULT, &result_code) != 0) {
		cf_warning(AS_RW, "repl-write ack: no result_code");
		as_fabric_msg_put(m);
		return;
	}

	rw_request_hkey hkey = { ns_id, *keyd };
	rw_request* rw = rw_request_hash_get(&hkey);

	if (! rw) {
		// Extra ack, after rw_request is already gone.
		as_fabric_msg_put(m);
		return;
	}

	pthread_mutex_lock(&rw->lock);

	if (rw->tid != tid) {
		// Extra ack, rw_request is that of newer transaction for same digest.
		pthread_mutex_unlock(&rw->lock);
		rw_request_release(rw);
		as_fabric_msg_put(m);
		return;
	}

	int i;

	for (i = 0; i < rw->n_dest_nodes; i++) {
		if (rw->dest_nodes[i] != node) {
			continue;
		}

		if (rw->dest_complete[i]) {
			// Extra ack for this replica write.
			pthread_mutex_unlock(&rw->lock);
			rw_request_release(rw);
			as_fabric_msg_put(m);
			return;
		}

		rw->dest_complete[i] = true;

		break;
	}

	if (i == rw->n_dest_nodes) {
		cf_warning(AS_RW, "repl-write ack: from non-dest node %lx", node);
		pthread_mutex_unlock(&rw->lock);
		rw_request_release(rw);
		as_fabric_msg_put(m);
		return;
	}

	for (int j = 0; j < rw->n_dest_nodes; j++) {
		if (! rw->dest_complete[j]) {
			// Still haven't heard from all duplicates.
			pthread_mutex_unlock(&rw->lock);
			rw_request_release(rw);
			as_fabric_msg_put(m);
			return;
		}
	}

	if (! rw->from.any && rw->origin != FROM_NSUP &&
			! rw->respond_client_on_master_completion) {
		// Lost race against timeout in retransmit thread.
		pthread_mutex_unlock(&rw->lock);
		rw_request_release(rw);
		as_fabric_msg_put(m);
		return;
	}

	if (! rw->respond_client_on_master_completion) {
		rw->repl_write_cb(rw);
	}

	pthread_mutex_unlock(&rw->lock);

	rw_request_hash_delete(&hkey, rw);
	rw_request_release(rw);
	as_fabric_msg_put(m);
}