void raft_send_appendentries_all(raft_server_t* me_) { raft_server_private_t* me = (raft_server_private_t*)me_; int i; for (i = 0; i < me->num_nodes; i++) if (me->nodeid != i) raft_send_appendentries(me_, i); }
void raft_send_appendentries_all(raft_server_t* me_) { raft_server_private_t* me = (raft_server_private_t*)me_; int i; me->timeout_elapsed = 0; for (i = 0; i < me->num_nodes; i++) if (me->node != me->nodes[i]) raft_send_appendentries(me_, me->nodes[i]); }
void raft_send_appendentries_all(raft_server_t* me_) { raft_server_private_t* me = (void*)me_; int i; for (i=0; i<me->num_nodes; i++) { if (me->nodeid == i) continue; raft_send_appendentries(me_, i); } }
int raft_recv_entry(raft_server_t* me_, msg_entry_t* e, msg_entry_response_t *r) { raft_server_private_t* me = (raft_server_private_t*)me_; int i; /* Only one voting cfg change at a time */ if (raft_entry_is_voting_cfg_change(e)) if (-1 != me->voting_cfg_change_log_idx) return -1; if (!raft_is_leader(me_)) return -1; __log(me_, NULL, "received entry t:%d id: %d idx: %d", me->current_term, e->id, raft_get_current_idx(me_) + 1); raft_entry_t ety; ety.term = me->current_term; ety.id = e->id; ety.type = e->type; memcpy(&ety.data, &e->data, sizeof(raft_entry_data_t)); raft_append_entry(me_, &ety); for (i = 0; i < me->num_nodes; i++) { if (me->node == me->nodes[i] || !me->nodes[i] || !raft_node_is_voting(me->nodes[i])) continue; /* Only send new entries. * Don't send the entry to peers who are behind, to prevent them from * becoming congested. */ int next_idx = raft_node_get_next_idx(me->nodes[i]); if (next_idx == raft_get_current_idx(me_)) raft_send_appendentries(me_, me->nodes[i]); } /* if we're the only node, we can consider the entry committed */ if (1 == me->num_nodes) me->commit_idx = raft_get_current_idx(me_); r->id = e->id; r->idx = raft_get_current_idx(me_); r->term = me->current_term; if (raft_entry_is_voting_cfg_change(e)) me->voting_cfg_change_log_idx = raft_get_current_idx(me_); return 0; }
int raft_recv_appendentries_response(raft_server_t* me_, int node, msg_appendentries_response_t* r) { raft_server_private_t* me = (void*)me_; raft_node_t* p; __log(me_, NULL, "received appendentries response from: %d", node); p = raft_get_node(me_, node); if (1 == r->success) { int i; for (i=r->first_idx; i<=r->current_idx; i++) log_mark_node_has_committed(me->log, i); while (1) { raft_entry_t* e; e = log_get_from_idx(me->log, me->last_applied_idx + 1); /* majority has this */ if (e && me->num_nodes / 2 <= e->num_nodes) { if (0 == raft_apply_entry(me_)) break; } else { break; } } } else { /* If AppendEntries fails because of log inconsistency: decrement nextIndex and retry (�3) */ assert(0 <= raft_node_get_next_idx(p)); // TODO does this have test coverage? // TODO can jump back to where node is different instead of iterating raft_node_set_next_idx(p, raft_node_get_next_idx(p)-1); raft_send_appendentries(me_, node); } return 1; }
void raft_become_leader(raft_server_t* me_) { raft_server_private_t* me = (void*)me_; int i; __log(me_, NULL, "becoming leader"); raft_set_state(me_,RAFT_STATE_LEADER); me->voted_for = -1; for (i=0; i<me->num_nodes; i++) { if (me->nodeid == i) continue; raft_node_t* p = raft_get_node(me_, i); raft_node_set_next_idx(p, raft_get_current_idx(me_)+1); raft_send_appendentries(me_, i); } }
void raft_become_leader(raft_server_t* me_) { raft_server_private_t* me = (raft_server_private_t*)me_; int i; __log(me_, NULL, "becoming leader term:%d", raft_get_current_term(me_)); raft_set_state(me_, RAFT_STATE_LEADER); for (i = 0; i < me->num_nodes; i++) { if (me->node == me->nodes[i] || !raft_node_is_voting(me->nodes[i])) continue; raft_node_t* node = me->nodes[i]; raft_node_set_next_idx(node, raft_get_current_idx(me_) + 1); raft_node_set_match_idx(node, 0); raft_send_appendentries(me_, node); } }
int raft_recv_entry(raft_server_t* me_, int node, msg_entry_t* e) { raft_server_private_t* me = (void*)me_; raft_entry_t ety; int res, i; __log(me_, NULL, "received entry from: %d", node); ety.term = me->current_term; ety.id = e->id; ety.data = e->data; ety.len = e->len; res = raft_append_entry(me_, &ety); raft_send_entry_response(me_, node, e->id, res); for (i=0; i<me->num_nodes; i++) { if (me->nodeid == i) continue; raft_send_appendentries(me_,i); } return 0; }
int raft_recv_entry(raft_server_t* me_, int node, msg_entry_t* e, msg_entry_response_t *r) { raft_server_private_t* me = (raft_server_private_t*)me_; raft_entry_t ety; int res, i; __log(me_, "received entry from: %d", node); ety.term = me->current_term; ety.id = e->id; ety.data = e->data; ety.len = e->len; res = raft_append_entry(me_, &ety); for (i = 0; i < me->num_nodes; i++) if (me->nodeid != i) raft_send_appendentries(me_, i); r->id = e->id; r->was_committed = (0 == res); return 0; }
int raft_recv_appendentries_response(raft_server_t* me_, raft_node_t* node, msg_appendentries_response_t* r) { raft_server_private_t* me = (raft_server_private_t*)me_; __log(me_, node, "received appendentries response %s ci:%d rci:%d 1stidx:%d", r->success == 1 ? "SUCCESS" : "fail", raft_get_current_idx(me_), r->current_idx, r->first_idx); /* Stale response -- ignore */ if (r->current_idx != 0 && r->current_idx <= raft_node_get_match_idx(node)) return 0; if (!raft_is_leader(me_)) return -1; /* If response contains term T > currentTerm: set currentTerm = T and convert to follower (§5.3) */ if (me->current_term < r->term) { raft_set_current_term(me_, r->term); raft_become_follower(me_); return 0; } else if (me->current_term != r->term) return 0; /* stop processing, this is a node we don't have in our configuration */ if (!node) return 0; if (0 == r->success) { /* If AppendEntries fails because of log inconsistency: decrement nextIndex and retry (§5.3) */ assert(0 <= raft_node_get_next_idx(node)); int next_idx = raft_node_get_next_idx(node); assert(0 <= next_idx); if (r->current_idx < next_idx - 1) raft_node_set_next_idx(node, min(r->current_idx + 1, raft_get_current_idx(me_))); else raft_node_set_next_idx(node, next_idx - 1); /* retry */ raft_send_appendentries(me_, node); return 0; } assert(r->current_idx <= raft_get_current_idx(me_)); raft_node_set_next_idx(node, r->current_idx + 1); raft_node_set_match_idx(node, r->current_idx); if (!raft_node_is_voting(node) && -1 == me->voting_cfg_change_log_idx && raft_get_current_idx(me_) <= r->current_idx + 1 && me->cb.node_has_sufficient_logs && 0 == raft_node_has_sufficient_logs(node) ) { raft_node_set_has_sufficient_logs(node); me->cb.node_has_sufficient_logs(me_, me->udata, node); } /* Update commit idx */ int votes = 1; /* include me */ int point = r->current_idx; int i; for (i = 0; i < me->num_nodes; i++) { if (me->node == me->nodes[i] || !raft_node_is_voting(me->nodes[i])) continue; int match_idx = raft_node_get_match_idx(me->nodes[i]); if (0 < match_idx) { raft_entry_t* ety = raft_get_entry_from_idx(me_, match_idx); if (ety->term == me->current_term && point <= match_idx) votes++; } } if (me->num_nodes / 2 < votes && raft_get_commit_idx(me_) < point) raft_set_commit_idx(me_, point); /* Aggressively send remaining entries */ if (raft_get_entry_from_idx(me_, raft_node_get_next_idx(node))) raft_send_appendentries(me_, node); /* periodic applies committed entries lazily */ return 0; }