transaction_status as_read_start(as_transaction* tr) { BENCHMARK_START(tr, read, FROM_CLIENT); BENCHMARK_START(tr, batch_sub, FROM_BATCH); if (! repl_ping_check(tr)) { send_read_response(tr, NULL, NULL, 0, NULL); return TRANS_DONE_ERROR; } transaction_status status; bool must_duplicate_resolve = read_must_duplicate_resolve(tr); bool must_ping = read_must_ping(tr); if (! must_duplicate_resolve && ! must_ping) { // No network hops needed, try reading. if ((status = read_local(tr)) != TRANS_IN_PROGRESS) { return status; } // else - must try again under hash. } // else - there are duplicates, and we're configured to resolve them, or // we're required to ping replicas. // Create rw_request and add to hash. rw_request_hkey hkey = { tr->rsv.ns->id, tr->keyd }; rw_request* rw = rw_request_create(&tr->keyd); // If rw_request isn't inserted in hash, transaction is finished. if ((status = rw_request_hash_insert(&hkey, rw, tr)) != TRANS_IN_PROGRESS) { rw_request_release(rw); if (status != TRANS_WAITING) { send_read_response(tr, NULL, NULL, 0, NULL); } return status; } // else - rw_request is now in hash, continue... if (must_duplicate_resolve) { start_read_dup_res(rw, tr); // Started duplicate resolution. return TRANS_IN_PROGRESS; } if (must_ping) { // Set up the nodes to which we'll ping. rw->n_dest_nodes = as_partition_get_other_replicas(tr->rsv.p, rw->dest_nodes); if (insufficient_replica_destinations(tr->rsv.ns, rw->n_dest_nodes)) { rw_request_hash_delete(&hkey, rw); tr->result_code = AS_ERR_UNAVAILABLE; send_read_response(tr, NULL, NULL, 0, NULL); return TRANS_DONE_ERROR; } start_repl_ping(rw, tr); // Started replica ping. return TRANS_IN_PROGRESS; } // Trying again under hash. status = read_local(tr); cf_assert(status != TRANS_IN_PROGRESS, AS_RW, "read in-progress"); rw_request_hash_delete(&hkey, rw); return status; }
void repl_write_handle_ack(cf_node node, msg* m) { uint32_t ns_id; if (msg_get_uint32(m, RW_FIELD_NS_ID, &ns_id) != 0) { cf_warning(AS_RW, "repl-write ack: no ns-id"); as_fabric_msg_put(m); return; } cf_digest* keyd; size_t sz; if (msg_get_buf(m, RW_FIELD_DIGEST, (uint8_t**)&keyd, &sz, MSG_GET_DIRECT) != 0) { cf_warning(AS_RW, "repl-write ack: no digest"); as_fabric_msg_put(m); return; } uint32_t tid; if (msg_get_uint32(m, RW_FIELD_TID, &tid) != 0) { cf_warning(AS_RW, "repl-write ack: no tid"); as_fabric_msg_put(m); return; } // TODO - result_code is currently ignored! What should we do with it? // Note - CLUSTER_KEY_MISMATCH not special, can't re-queue transaction. uint32_t result_code; if (msg_get_uint32(m, RW_FIELD_RESULT, &result_code) != 0) { cf_warning(AS_RW, "repl-write ack: no result_code"); as_fabric_msg_put(m); return; } rw_request_hkey hkey = { ns_id, *keyd }; rw_request* rw = rw_request_hash_get(&hkey); if (! rw) { // Extra ack, after rw_request is already gone. as_fabric_msg_put(m); return; } pthread_mutex_lock(&rw->lock); if (rw->tid != tid) { // Extra ack, rw_request is that of newer transaction for same digest. pthread_mutex_unlock(&rw->lock); rw_request_release(rw); as_fabric_msg_put(m); return; } int i; for (i = 0; i < rw->n_dest_nodes; i++) { if (rw->dest_nodes[i] != node) { continue; } if (rw->dest_complete[i]) { // Extra ack for this replica write. pthread_mutex_unlock(&rw->lock); rw_request_release(rw); as_fabric_msg_put(m); return; } rw->dest_complete[i] = true; break; } if (i == rw->n_dest_nodes) { cf_warning(AS_RW, "repl-write ack: from non-dest node %lx", node); pthread_mutex_unlock(&rw->lock); rw_request_release(rw); as_fabric_msg_put(m); return; } for (int j = 0; j < rw->n_dest_nodes; j++) { if (! rw->dest_complete[j]) { // Still haven't heard from all duplicates. pthread_mutex_unlock(&rw->lock); rw_request_release(rw); as_fabric_msg_put(m); return; } } if (! rw->from.any && rw->origin != FROM_NSUP && ! rw->respond_client_on_master_completion) { // Lost race against timeout in retransmit thread. pthread_mutex_unlock(&rw->lock); rw_request_release(rw); as_fabric_msg_put(m); return; } if (! rw->respond_client_on_master_completion) { rw->repl_write_cb(rw); } pthread_mutex_unlock(&rw->lock); rw_request_hash_delete(&hkey, rw); rw_request_release(rw); as_fabric_msg_put(m); }