/*! Returns new last applied value if it has changes, 0 otherwise */ gcs_seqno_t gcs_group_handle_last_msg (gcs_group_t* group, const gcs_recv_msg_t* msg) { gcs_seqno_t seqno; assert (GCS_MSG_LAST == msg->type); assert (sizeof(gcs_seqno_t) == msg->size); seqno = gcs_seqno_gtoh(*(gcs_seqno_t*)(msg->buf)); // This assert is too restrictive. It requires application to send // last applied messages while holding TO, otherwise there's a race // between threads. // assert (seqno >= group->last_applied); gcs_node_set_last_applied (&group->nodes[msg->sender_idx], seqno); if (msg->sender_idx == group->last_node && seqno > group->last_applied) { /* node that was responsible for the last value, has changed it. * need to recompute it */ gcs_seqno_t old_val = group->last_applied; group_redo_last_applied (group); if (old_val < group->last_applied) { gu_debug ("New COMMIT CUT %lld after %lld from %d", (long long)group->last_applied, (long long)seqno, msg->sender_idx); return group->last_applied; } } return 0; }
/*! return true if this node is the sender to notify the calling thread of * success */ int gcs_group_handle_join_msg (gcs_group_t* group, const gcs_recv_msg_t* msg) { int const sender_idx = msg->sender_idx; gcs_node_t* sender = &group->nodes[sender_idx]; assert (GCS_MSG_JOIN == msg->type); // TODO: define an explicit type for the join message, like gcs_join_msg_t assert (msg->size == sizeof(gcs_seqno_t)); if (GCS_NODE_STATE_DONOR == sender->status || GCS_NODE_STATE_JOINER == sender->status) { int j; gcs_seqno_t seqno = gcs_seqno_gtoh(*(gcs_seqno_t*)msg->buf); gcs_node_t* peer = NULL; const char* peer_id = NULL; const char* peer_name = "left the group"; int peer_idx = -1; bool from_donor = false; const char* st_dir = NULL; // state transfer direction symbol if (GCS_NODE_STATE_DONOR == sender->status) { peer_id = sender->joiner; from_donor = true; st_dir = "to"; assert (group->last_applied_proto_ver >= 0); if (0 == group->last_applied_proto_ver) { /* #454 - we don't switch to JOINED here, * instead going straignt to SYNCED */ } else { assert(sender->count_last_applied); sender->status = GCS_NODE_STATE_JOINED; } } else { peer_id = sender->donor; st_dir = "from"; if (group->quorum.version < 2) { // #591 remove after quorum v1 is phased out sender->status = GCS_NODE_STATE_JOINED; group->prim_num++; } else { if (seqno >= 0) { sender->status = GCS_NODE_STATE_JOINED; group->prim_num++; } else { sender->status = GCS_NODE_STATE_PRIM; } } } // Try to find peer. for (j = 0; j < group->num; j++) { // #483 if (j == sender_idx) continue; if (!memcmp(peer_id, group->nodes[j].id, sizeof (group->nodes[j].id))) { peer_idx = j; peer = &group->nodes[peer_idx]; peer_name = peer->name; break; } } if (j == group->num) { gu_warn ("Could not find peer: %s", peer_id); } if (seqno < 0) { gu_warn ("%d.%d (%s): State transfer %s %d.%d (%s) failed: %d (%s)", sender_idx, sender->segment, sender->name, st_dir, peer_idx, peer ? peer->segment : -1, peer_name, (int)seqno, strerror((int)-seqno)); if (from_donor && peer_idx == group->my_idx && GCS_NODE_STATE_JOINER == group->nodes[peer_idx].status) { // this node will be waiting for SST forever. If it has only // one recv thread there is no (generic) way to wake it up. gu_fatal ("Will never receive state. Need to abort."); // return to core to shutdown the backend before aborting return -ENOTRECOVERABLE; } if (group->quorum.version < 2 && !from_donor && // #591 sender_idx == group->my_idx) { // remove after quorum v1 is phased out gu_fatal ("Faield to receive state. Need to abort."); return -ENOTRECOVERABLE; } } else { if (sender_idx == peer_idx) { gu_info ("Member %d.%d (%s) resyncs itself to group", sender_idx, sender->segment, sender->name); } else { gu_info ("%d.%d (%s): State transfer %s %d.%d (%s) complete.", sender_idx, sender->segment, sender->name, st_dir, peer_idx, peer ? peer->segment : -1, peer_name); } } } else { if (GCS_NODE_STATE_PRIM == sender->status) { gu_warn("Rejecting JOIN message from %d.%d (%s): new State Transfer" " required.", sender_idx, sender->segment, sender->name); } else { // should we freak out and throw an error? gu_warn("Protocol violation. JOIN message sender %d.%d (%s) is not " "in state transfer (%s). Message ignored.", sender_idx, sender->segment, sender->name, gcs_node_state_to_str(sender->status)); } return 0; } return (sender_idx == group->my_idx); }
// Initialises core and backend objects + some common tests static inline void core_test_init () { long ret; action_t act; mark_point(); gu_config_t* config = gu_config_create (); fail_if (config == NULL); Core = gcs_core_create (config, NULL, "core_test", "aaa.bbb.ccc.ddd:xxxx", 0, 0); fail_if (NULL == Core); Backend = gcs_core_get_backend (Core); fail_if (NULL == Backend); Seqno = 0; // reset seqno ret = core_test_set_payload_size (FRAG_SIZE); fail_if (-EBADFD != ret, "Expected -EBADFD, got: %ld (%s)", ret, strerror(-ret)); ret = gcs_core_open (Core, "yadda-yadda", "owkmevc", 1); fail_if (-EINVAL != ret, "Expected -EINVAL, got %ld (%s)", ret, strerror(-ret)); ret = gcs_core_open (Core, "yadda-yadda", "dummy://", 1); fail_if (0 != ret, "Failed to open core connection: %ld (%s)", ret, strerror(-ret)); // receive first configuration message fail_if (CORE_RECV_ACT (&act, NULL, UNKNOWN_SIZE, GCS_ACT_CONF)); fail_if (core_test_check_conf(act.out, true, 0, 1)); free (act.out); // this will configure backend to have desired fragment size ret = core_test_set_payload_size (FRAG_SIZE); fail_if (0 != ret, "Failed to set up the message payload size: %ld (%s)", ret, strerror(-ret)); // try to send an action to check that everything's alright ret = gcs_core_send (Core, act1, sizeof(act1_str), GCS_ACT_TORDERED); fail_if (ret != sizeof(act1_str), "Expected %d, got %d (%s)", sizeof(act1_str), ret, strerror (-ret)); gu_warn ("Next CORE_RECV_ACT fails under valgrind"); act.in = act1; fail_if (CORE_RECV_ACT (&act, act1_str, sizeof(act1_str),GCS_ACT_TORDERED)); ret = gcs_core_send_join (Core, Seqno); fail_if (ret != 0, "gcs_core_send_join(): %ld (%s)", ret, strerror(-ret)); // no action to be received (we're joined already) ret = gcs_core_send_sync (Core, Seqno); fail_if (ret != 0, "gcs_core_send_sync(): %ld (%s)", ret, strerror(-ret)); fail_if (CORE_RECV_ACT(&act,NULL,sizeof(gcs_seqno_t),GCS_ACT_SYNC)); fail_if (Seqno != gcs_seqno_gtoh(*(gcs_seqno_t*)act.out)); gcs_core_send_lock_step (Core, true); mark_point(); }
/*! Returns 0 if request is ignored, request size if it should be passed up */ int gcs_group_handle_state_request (gcs_group_t* group, struct gcs_act_rcvd* act) { // pass only to sender and to one potential donor const char* donor_name = (const char*)act->act.buf; size_t donor_name_len = strlen(donor_name); int donor_idx = -1; int const joiner_idx = act->sender_idx; const char* joiner_name = group->nodes[joiner_idx].name; gcs_node_state_t joiner_status = group->nodes[joiner_idx].status; bool const desync = group_desync_request (donor_name); gu_uuid_t ist_uuid = {{0, }}; gcs_seqno_t ist_seqno = GCS_SEQNO_ILL; int str_version = 1; // actually it's 0 or 1. if (act->act.buf_len != (ssize_t)(donor_name_len + 1) && donor_name[donor_name_len + 1] == 'V') { str_version = (int)donor_name[donor_name_len + 2]; } if (str_version >= 2) { const char* ist_buf = donor_name + donor_name_len + 3; memcpy(&ist_uuid, ist_buf, sizeof(ist_uuid)); ist_seqno = gcs_seqno_gtoh(*(gcs_seqno_t*)(ist_buf + sizeof(ist_uuid))); // change act.buf's content to original version. // and it's safe to change act.buf_len size_t head = donor_name_len + 3 + sizeof(ist_uuid) + sizeof(ist_seqno); memmove((char*)act->act.buf + donor_name_len + 1, (char*)act->act.buf + head, act->act.buf_len - head); act->act.buf_len -= sizeof(ist_uuid) + sizeof(ist_seqno) + 2; } assert (GCS_ACT_STATE_REQ == act->act.type); if (joiner_status != GCS_NODE_STATE_PRIM && !desync) { const char* joiner_status_string = gcs_node_state_to_str(joiner_status); if (group->my_idx == joiner_idx) { gu_error ("Requesting state transfer while in %s. " "Ignoring.", joiner_status_string); act->id = -ECANCELED; return act->act.buf_len; } else { gu_error ("Member %d.%d (%s) requested state transfer, " "but its state is %s. Ignoring.", joiner_idx, group->nodes[joiner_idx].segment, joiner_name, joiner_status_string); gcs_group_ignore_action (group, act); return 0; } } donor_idx = group_select_donor(group, str_version, joiner_idx, donor_name, &ist_uuid, ist_seqno, desync); assert (donor_idx != joiner_idx || desync || donor_idx < 0); assert (donor_idx == joiner_idx || !desync || donor_idx < 0); if (group->my_idx != joiner_idx && group->my_idx != donor_idx) { // if neither DONOR nor JOINER, ignore request gcs_group_ignore_action (group, act); return 0; } else if (group->my_idx == donor_idx) { act->act.buf_len -= donor_name_len + 1; memmove (*(void**)&act->act.buf, ((char*)act->act.buf) + donor_name_len + 1, act->act.buf_len); // now action starts with request, like it was supplied by application, // see gcs_request_state_transfer() } // Return index of donor (or error) in the seqno field to sender. // It will be used to detect error conditions (no availabale donor, // donor crashed and the like). // This may be ugly, well, any ideas? act->id = donor_idx; return act->act.buf_len; }
static inline gcs_seqno_t group_get_last_msg (gcs_recv_msg_t* msg) { return gcs_seqno_gtoh(*(gcs_seqno_t*)(msg->buf)); }