int raft_periodic(raft_server_t* me_, int msec_since_last_period) { raft_server_private_t* me = (raft_server_private_t*)me_; __log(me_, "periodic elapsed time: %d", me->timeout_elapsed); switch (me->state) { case RAFT_STATE_FOLLOWER: if (me->last_applied_idx < me->commit_idx) if (-1 == raft_apply_entry(me_)) return -1; break; } me->timeout_elapsed += msec_since_last_period; if (me->state == RAFT_STATE_LEADER) { if (me->request_timeout <= me->timeout_elapsed) { raft_send_appendentries_all(me_); me->timeout_elapsed = 0; } } else if (me->election_timeout <= me->timeout_elapsed) raft_election_start(me_); return 0; }
int raft_apply_all(raft_server_t* me_) { while (raft_get_last_applied_idx(me_) < raft_get_commit_idx(me_)) { int e = raft_apply_entry(me_); if (RAFT_ERR_SHUTDOWN == e) return RAFT_ERR_SHUTDOWN; } return 0; }
void TestRaft_server_wont_apply_entry_if_we_dont_have_entry_to_apply(CuTest* tc) { raft_entry_t ety; char *str = "aaa"; void *r = raft_new(); raft_set_commit_idx(r, 0); raft_set_last_applied_idx(r, 0); raft_apply_entry(r); CuAssertTrue(tc, 0 == raft_get_last_applied_idx(r)); CuAssertTrue(tc, 0 == raft_get_commit_idx(r)); ety.term = 1; ety.id = 1; ety.data.buf = str; ety.data.len = 3; raft_append_entry(r, &ety); raft_apply_entry(r); CuAssertTrue(tc, 1 == raft_get_last_applied_idx(r)); CuAssertTrue(tc, 1 == raft_get_commit_idx(r)); }
int raft_recv_appendentries_response(raft_server_t* me_, int node, msg_appendentries_response_t* r) { raft_server_private_t* me = (void*)me_; raft_node_t* p; __log(me_, NULL, "received appendentries response from: %d", node); p = raft_get_node(me_, node); if (1 == r->success) { int i; for (i=r->first_idx; i<=r->current_idx; i++) log_mark_node_has_committed(me->log, i); while (1) { raft_entry_t* e; e = log_get_from_idx(me->log, me->last_applied_idx + 1); /* majority has this */ if (e && me->num_nodes / 2 <= e->num_nodes) { if (0 == raft_apply_entry(me_)) break; } else { break; } } } else { /* If AppendEntries fails because of log inconsistency: decrement nextIndex and retry (�3) */ assert(0 <= raft_node_get_next_idx(p)); // TODO does this have test coverage? // TODO can jump back to where node is different instead of iterating raft_node_set_next_idx(p, raft_node_get_next_idx(p)-1); raft_send_appendentries(me_, node); } return 1; }
void TestRaft_server_apply_entry_increments_last_applied_idx(CuTest* tc) { raft_entry_t ety; char *str = "aaa"; ety.term = 1; void *r = raft_new(); raft_set_commit_idx(r, 1); raft_set_last_applied_idx(r, 0); ety.id = 1; ety.data.buf = str; ety.data.len = 3; raft_append_entry(r, &ety); raft_apply_entry(r); CuAssertTrue(tc, 1 == raft_get_last_applied_idx(r)); }
int raft_periodic(raft_server_t* me_, int msec_since_last_period) { raft_server_private_t* me = (raft_server_private_t*)me_; me->timeout_elapsed += msec_since_last_period; if (me->state == RAFT_STATE_LEADER) { if (me->request_timeout <= me->timeout_elapsed) raft_send_appendentries_all(me_); } else if (me->election_timeout <= me->timeout_elapsed) { if (1 < me->num_nodes) raft_election_start(me_); } if (me->last_applied_idx < me->commit_idx) if (-1 == raft_apply_entry(me_)) return -1; return 0; }
int raft_recv_appendentries( raft_server_t* me_, const int node, msg_appendentries_t* ae) { int i; raft_server_private_t* me = (void*)me_; msg_appendentries_response_t r; me->timeout_elapsed = 0; __log(me_, NULL, "received appendentries from: %d", node); r.term = me->current_term; /* we've found a leader who is legitimate */ if (raft_is_leader(me_) && me->current_term <= ae->term) raft_become_follower(me_); /* 1. Reply false if term < currentTerm (�1) */ if (ae->term < me->current_term) { __log(me_, NULL, "AE term is less than current term"); r.success = 0; goto done; } #if 0 if (-1 != ae->prev_log_idx && ae->prev_log_idx < raft_get_current_idx(me_)) { __log(me_, NULL, "AE prev_idx is less than current idx"); r.success = 0; goto done; } #endif /* not the first appendentries we've received */ if (0 != ae->prev_log_idx) { raft_entry_t* e; if ((e = raft_get_entry_from_idx(me_, ae->prev_log_idx))) { /* 2. Reply false if log doesn抰 contain an entry at prevLogIndex whose term matches prevLogTerm (�3) */ if (e->term != ae->prev_log_term) { __log(me_, NULL, "AE term doesn't match prev_idx"); r.success = 0; goto done; } /* 3. If an existing entry conflicts with a new one (same index but different terms), delete the existing entry and all that follow it (�3) */ raft_entry_t* e2; if ((e2 = raft_get_entry_from_idx(me_, ae->prev_log_idx+1))) { log_delete(me->log, ae->prev_log_idx+1); } } else { __log(me_, NULL, "AE no log at prev_idx"); r.success = 0; goto done; //assert(0); } } /* 5. If leaderCommit > commitIndex, set commitIndex = min(leaderCommit, last log index) */ if (raft_get_commit_idx(me_) < ae->leader_commit) { raft_entry_t* e; if ((e = log_peektail(me->log))) { raft_set_commit_idx(me_, e->id < ae->leader_commit ? e->id : ae->leader_commit); while (1 == raft_apply_entry(me_)); } } if (raft_is_candidate(me_)) raft_become_follower(me_); raft_set_current_term(me_, ae->term); /* append all entries to log */ for (i=0; i<ae->n_entries; i++) { msg_entry_t* cmd; raft_entry_t* c; cmd = &ae->entries[i]; /* TODO: replace malloc with mempoll/arena */ c = malloc(sizeof(raft_entry_t)); c->term = me->current_term; c->len = cmd->len; c->id = cmd->id; c->data = malloc(cmd->len); memcpy(c->data, cmd->data, cmd->len); if (0 == raft_append_entry(me_, c)) { __log(me_, NULL, "AE failure; couldn't append entry"); r.success = 0; goto done; } } r.success = 1; r.current_idx = raft_get_current_idx(me_); r.first_idx = ae->prev_log_idx + 1; done: if (me->cb.send) me->cb.send(me->cb_ctx, me, node, RAFT_MSG_APPENDENTRIES_RESPONSE, (void*)&r, sizeof(msg_appendentries_response_t)); return 1; }
int raft_recv_appendentries( raft_server_t* me_, const int node, msg_appendentries_t* ae, msg_appendentries_response_t *r ) { raft_server_private_t* me = (raft_server_private_t*)me_; me->timeout_elapsed = 0; __log(me_, "received appendentries from: %d", node); r->term = me->current_term; /* we've found a leader who is legitimate */ if (raft_is_leader(me_) && me->current_term <= ae->term) raft_become_follower(me_); /* 1. Reply false if term < currentTerm (§5.1) */ if (ae->term < me->current_term) { __log(me_, "AE term is less than current term"); r->success = 0; return 0; } #if 0 if (-1 != ae->prev_log_idx && ae->prev_log_idx < raft_get_current_idx(me_)) { __log(me_, "AE prev_idx is less than current idx"); r->success = 0; return 0; } #endif /* not the first appendentries we've received */ if (0 != ae->prev_log_idx) { raft_entry_t* e = raft_get_entry_from_idx(me_, ae->prev_log_idx); if (e) { /* 2. Reply false if log doesn't contain an entry at prevLogIndex whose term matches prevLogTerm (§5.3) */ if (e->term != ae->prev_log_term) { __log(me_, "AE term doesn't match prev_idx"); r->success = 0; return 0; } /* 3. If an existing entry conflicts with a new one (same index but different terms), delete the existing entry and all that follow it (§5.3) */ raft_entry_t* e2; e2 = raft_get_entry_from_idx(me_, ae->prev_log_idx + 1); if (e2) log_delete(me->log, ae->prev_log_idx + 1); } else { __log(me_, "AE no log at prev_idx"); r->success = 0; return 0; } } /* 5. If leaderCommit > commitIndex, set commitIndex = min(leaderCommit, last log index) */ if (raft_get_commit_idx(me_) < ae->leader_commit) { raft_entry_t* e = log_peektail(me->log); if (e) { int id = e->id < ae->leader_commit ? e->id : ae->leader_commit; raft_set_commit_idx(me_, id); while (0 == raft_apply_entry(me_)) ; } } if (raft_is_candidate(me_)) raft_become_follower(me_); raft_set_current_term(me_, ae->term); int i; /* append all entries to log */ for (i = 0; i < ae->n_entries; i++) { msg_entry_t* cmd = &ae->entries[i]; /* TODO: replace malloc with mempoll/arena */ raft_entry_t* c = (raft_entry_t*)malloc(sizeof(raft_entry_t)); c->term = me->current_term; c->len = cmd->len; c->id = cmd->id; c->data = (unsigned char*)malloc(cmd->len); memcpy(c->data, cmd->data, cmd->len); if (-1 == raft_append_entry(me_, c)) { __log(me_, "AE failure; couldn't append entry"); r->success = 0; return -1; } } r->success = 1; r->current_idx = raft_get_current_idx(me_); r->first_idx = ae->prev_log_idx + 1; return 0; }
void raft_apply_all(raft_server_t* me_) { while (raft_get_last_applied_idx(me_) < raft_get_commit_idx(me_)) raft_apply_entry(me_); }