int raft_recv_requestvote_response(raft_server_t* me_, int node, msg_requestvote_response_t* r) { raft_server_private_t* me = (void*)me_; __log(me_, NULL, "node responded to requestvote: %d status: %s", node, r->vote_granted == 1 ? "granted" : "not granted"); if (raft_is_leader(me_)) return 0; assert(node < me->num_nodes); // if (r->term != raft_get_current_term(me_)) // return 0; if (1 == r->vote_granted) { int votes; me->votes_for_me[node] = 1; votes = raft_get_nvotes_for_me(me_); if (raft_votes_is_majority(me->num_nodes, votes)) raft_become_leader(me_); } return 0; }
static void drop_bads(void) { int i; for (i = 0; i < MAX_CLIENTS; i++) { Client *c = server.clients + i; if (c->sock < 0) continue; if (!c->good || !raft_is_leader(raft)) remove_client(c); } }
int raft_recv_entry(raft_server_t* me_, msg_entry_t* e, msg_entry_response_t *r) { raft_server_private_t* me = (raft_server_private_t*)me_; int i; /* Only one voting cfg change at a time */ if (raft_entry_is_voting_cfg_change(e)) if (-1 != me->voting_cfg_change_log_idx) return -1; if (!raft_is_leader(me_)) return -1; __log(me_, NULL, "received entry t:%d id: %d idx: %d", me->current_term, e->id, raft_get_current_idx(me_) + 1); raft_entry_t ety; ety.term = me->current_term; ety.id = e->id; ety.type = e->type; memcpy(&ety.data, &e->data, sizeof(raft_entry_data_t)); raft_append_entry(me_, &ety); for (i = 0; i < me->num_nodes; i++) { if (me->node == me->nodes[i] || !me->nodes[i] || !raft_node_is_voting(me->nodes[i])) continue; /* Only send new entries. * Don't send the entry to peers who are behind, to prevent them from * becoming congested. */ int next_idx = raft_node_get_next_idx(me->nodes[i]); if (next_idx == raft_get_current_idx(me_)) raft_send_appendentries(me_, me->nodes[i]); } /* if we're the only node, we can consider the entry committed */ if (1 == me->num_nodes) me->commit_idx = raft_get_current_idx(me_); r->id = e->id; r->idx = raft_get_current_idx(me_); r->term = me->current_term; if (raft_entry_is_voting_cfg_change(e)) me->voting_cfg_change_log_idx = raft_get_current_idx(me_); return 0; }
static bool accept_client(void) { int fd; fprintf(stderr, "a new connection is queued\n"); fd = accept(server.listener, NULL, NULL); if (fd == -1) { fprintf(stderr, "failed to accept a connection: %s\n", strerror(errno)); return false; } fprintf(stderr, "a new connection fd=%d accepted\n", fd); if (!raft_is_leader(raft)) { fprintf(stderr, "not a leader, disconnecting the accepted connection fd=%d\n", fd); close(fd); return false; } return add_client(fd); }
int raft_recv_requestvote(raft_server_t* me_, raft_node_t* node, msg_requestvote_t* vr, msg_requestvote_response_t *r) { raft_server_private_t* me = (raft_server_private_t*)me_; if (raft_get_current_term(me_) < vr->term) { raft_set_current_term(me_, vr->term); raft_become_follower(me_); } if (__should_grant_vote(me, vr)) { /* It shouldn't be possible for a leader or candidate to grant a vote * Both states would have voted for themselves */ assert(!(raft_is_leader(me_) || raft_is_candidate(me_))); raft_vote_for_nodeid(me_, vr->candidate_id); r->vote_granted = 1; /* there must be in an election. */ me->current_leader = NULL; me->timeout_elapsed = 0; } else r->vote_granted = 0; __log(me_, node, "node requested vote: %d replying: %s", node, r->vote_granted == 1 ? "granted" : "not granted"); r->term = raft_get_current_term(me_); return 0; }
int raft_recv_appendentries( raft_server_t* me_, const int node, msg_appendentries_t* ae) { int i; raft_server_private_t* me = (void*)me_; msg_appendentries_response_t r; me->timeout_elapsed = 0; __log(me_, NULL, "received appendentries from: %d", node); r.term = me->current_term; /* we've found a leader who is legitimate */ if (raft_is_leader(me_) && me->current_term <= ae->term) raft_become_follower(me_); /* 1. Reply false if term < currentTerm (�1) */ if (ae->term < me->current_term) { __log(me_, NULL, "AE term is less than current term"); r.success = 0; goto done; } #if 0 if (-1 != ae->prev_log_idx && ae->prev_log_idx < raft_get_current_idx(me_)) { __log(me_, NULL, "AE prev_idx is less than current idx"); r.success = 0; goto done; } #endif /* not the first appendentries we've received */ if (0 != ae->prev_log_idx) { raft_entry_t* e; if ((e = raft_get_entry_from_idx(me_, ae->prev_log_idx))) { /* 2. Reply false if log doesn抰 contain an entry at prevLogIndex whose term matches prevLogTerm (�3) */ if (e->term != ae->prev_log_term) { __log(me_, NULL, "AE term doesn't match prev_idx"); r.success = 0; goto done; } /* 3. If an existing entry conflicts with a new one (same index but different terms), delete the existing entry and all that follow it (�3) */ raft_entry_t* e2; if ((e2 = raft_get_entry_from_idx(me_, ae->prev_log_idx+1))) { log_delete(me->log, ae->prev_log_idx+1); } } else { __log(me_, NULL, "AE no log at prev_idx"); r.success = 0; goto done; //assert(0); } } /* 5. If leaderCommit > commitIndex, set commitIndex = min(leaderCommit, last log index) */ if (raft_get_commit_idx(me_) < ae->leader_commit) { raft_entry_t* e; if ((e = log_peektail(me->log))) { raft_set_commit_idx(me_, e->id < ae->leader_commit ? e->id : ae->leader_commit); while (1 == raft_apply_entry(me_)); } } if (raft_is_candidate(me_)) raft_become_follower(me_); raft_set_current_term(me_, ae->term); /* append all entries to log */ for (i=0; i<ae->n_entries; i++) { msg_entry_t* cmd; raft_entry_t* c; cmd = &ae->entries[i]; /* TODO: replace malloc with mempoll/arena */ c = malloc(sizeof(raft_entry_t)); c->term = me->current_term; c->len = cmd->len; c->id = cmd->id; c->data = malloc(cmd->len); memcpy(c->data, cmd->data, cmd->len); if (0 == raft_append_entry(me_, c)) { __log(me_, NULL, "AE failure; couldn't append entry"); r.success = 0; goto done; } } r.success = 1; r.current_idx = raft_get_current_idx(me_); r.first_idx = ae->prev_log_idx + 1; done: if (me->cb.send) me->cb.send(me->cb_ctx, me, node, RAFT_MSG_APPENDENTRIES_RESPONSE, (void*)&r, sizeof(msg_appendentries_response_t)); return 1; }
int raft_recv_appendentries( raft_server_t* me_, const int node, msg_appendentries_t* ae, msg_appendentries_response_t *r ) { raft_server_private_t* me = (raft_server_private_t*)me_; me->timeout_elapsed = 0; __log(me_, "received appendentries from: %d", node); r->term = me->current_term; /* we've found a leader who is legitimate */ if (raft_is_leader(me_) && me->current_term <= ae->term) raft_become_follower(me_); /* 1. Reply false if term < currentTerm (§5.1) */ if (ae->term < me->current_term) { __log(me_, "AE term is less than current term"); r->success = 0; return 0; } #if 0 if (-1 != ae->prev_log_idx && ae->prev_log_idx < raft_get_current_idx(me_)) { __log(me_, "AE prev_idx is less than current idx"); r->success = 0; return 0; } #endif /* not the first appendentries we've received */ if (0 != ae->prev_log_idx) { raft_entry_t* e = raft_get_entry_from_idx(me_, ae->prev_log_idx); if (e) { /* 2. Reply false if log doesn't contain an entry at prevLogIndex whose term matches prevLogTerm (§5.3) */ if (e->term != ae->prev_log_term) { __log(me_, "AE term doesn't match prev_idx"); r->success = 0; return 0; } /* 3. If an existing entry conflicts with a new one (same index but different terms), delete the existing entry and all that follow it (§5.3) */ raft_entry_t* e2; e2 = raft_get_entry_from_idx(me_, ae->prev_log_idx + 1); if (e2) log_delete(me->log, ae->prev_log_idx + 1); } else { __log(me_, "AE no log at prev_idx"); r->success = 0; return 0; } } /* 5. If leaderCommit > commitIndex, set commitIndex = min(leaderCommit, last log index) */ if (raft_get_commit_idx(me_) < ae->leader_commit) { raft_entry_t* e = log_peektail(me->log); if (e) { int id = e->id < ae->leader_commit ? e->id : ae->leader_commit; raft_set_commit_idx(me_, id); while (0 == raft_apply_entry(me_)) ; } } if (raft_is_candidate(me_)) raft_become_follower(me_); raft_set_current_term(me_, ae->term); int i; /* append all entries to log */ for (i = 0; i < ae->n_entries; i++) { msg_entry_t* cmd = &ae->entries[i]; /* TODO: replace malloc with mempoll/arena */ raft_entry_t* c = (raft_entry_t*)malloc(sizeof(raft_entry_t)); c->term = me->current_term; c->len = cmd->len; c->id = cmd->id; c->data = (unsigned char*)malloc(cmd->len); memcpy(c->data, cmd->data, cmd->len); if (-1 == raft_append_entry(me_, c)) { __log(me_, "AE failure; couldn't append entry"); r->success = 0; return -1; } } r->success = 1; r->current_idx = raft_get_current_idx(me_); r->first_idx = ae->prev_log_idx + 1; return 0; }
int raft_recv_appendentries_response(raft_server_t* me_, raft_node_t* node, msg_appendentries_response_t* r) { raft_server_private_t* me = (raft_server_private_t*)me_; __log(me_, node, "received appendentries response %s ci:%d rci:%d 1stidx:%d", r->success == 1 ? "SUCCESS" : "fail", raft_get_current_idx(me_), r->current_idx, r->first_idx); /* Stale response -- ignore */ if (r->current_idx != 0 && r->current_idx <= raft_node_get_match_idx(node)) return 0; if (!raft_is_leader(me_)) return -1; /* If response contains term T > currentTerm: set currentTerm = T and convert to follower (§5.3) */ if (me->current_term < r->term) { raft_set_current_term(me_, r->term); raft_become_follower(me_); return 0; } else if (me->current_term != r->term) return 0; /* stop processing, this is a node we don't have in our configuration */ if (!node) return 0; if (0 == r->success) { /* If AppendEntries fails because of log inconsistency: decrement nextIndex and retry (§5.3) */ assert(0 <= raft_node_get_next_idx(node)); int next_idx = raft_node_get_next_idx(node); assert(0 <= next_idx); if (r->current_idx < next_idx - 1) raft_node_set_next_idx(node, min(r->current_idx + 1, raft_get_current_idx(me_))); else raft_node_set_next_idx(node, next_idx - 1); /* retry */ raft_send_appendentries(me_, node); return 0; } assert(r->current_idx <= raft_get_current_idx(me_)); raft_node_set_next_idx(node, r->current_idx + 1); raft_node_set_match_idx(node, r->current_idx); if (!raft_node_is_voting(node) && -1 == me->voting_cfg_change_log_idx && raft_get_current_idx(me_) <= r->current_idx + 1 && me->cb.node_has_sufficient_logs && 0 == raft_node_has_sufficient_logs(node) ) { raft_node_set_has_sufficient_logs(node); me->cb.node_has_sufficient_logs(me_, me->udata, node); } /* Update commit idx */ int votes = 1; /* include me */ int point = r->current_idx; int i; for (i = 0; i < me->num_nodes; i++) { if (me->node == me->nodes[i] || !raft_node_is_voting(me->nodes[i])) continue; int match_idx = raft_node_get_match_idx(me->nodes[i]); if (0 < match_idx) { raft_entry_t* ety = raft_get_entry_from_idx(me_, match_idx); if (ety->term == me->current_term && point <= match_idx) votes++; } } if (me->num_nodes / 2 < votes && raft_get_commit_idx(me_) < point) raft_set_commit_idx(me_, point); /* Aggressively send remaining entries */ if (raft_get_entry_from_idx(me_, raft_node_get_next_idx(node))) raft_send_appendentries(me_, node); /* periodic applies committed entries lazily */ return 0; }