void TestRaft_server_entry_is_retrieveable_using_idx(CuTest* tc) { raft_entry_t e1; raft_entry_t e2; raft_entry_t *ety_appended; char *str = "aaa"; char *str2 = "bbb"; void *r = raft_new(); e1.term = 1; e1.id = 1; e1.data.buf = str; e1.data.len = 3; raft_append_entry(r, &e1); /* different ID so we can be successful */ e2.term = 1; e2.id = 2; e2.data.buf = str2; e2.data.len = 3; raft_append_entry(r, &e2); CuAssertTrue(tc, NULL != (ety_appended = raft_get_entry_from_idx(r, 2))); CuAssertTrue(tc, !strncmp(ety_appended->data.buf, str2, 3)); }
/* Candidate 5.2 */ void TestRaft_follower_dont_grant_vote_if_candidate_has_a_less_complete_log( CuTest * tc) { msg_requestvote_t rv; msg_requestvote_response_t rvr; void *r = raft_new(); raft_add_node(r, (void*)1, 1); raft_add_node(r, (void*)2, 0); /* request vote */ /* vote indicates candidate's log is not complete compared to follower */ memset(&rv, 0, sizeof(msg_requestvote_t)); rv.term = 1; rv.candidate_id = 1; rv.last_log_idx = 1; rv.last_log_term = 1; /* server's term and idx are more up-to-date */ raft_set_current_term(r, 1); raft_entry_t ety; ety.term = 1; ety.id = 100; ety.data.len = 4; ety.data.buf = (unsigned char*)"aaa"; raft_append_entry(r, &ety); ety.id = 101; raft_append_entry(r, &ety); /* vote not granted */ raft_recv_requestvote(r, 1, &rv, &rvr); CuAssertTrue(tc, 0 == rvr.vote_granted); }
/* 5.3 */ void TestRaft_follower_recv_appendentries_delete_entries_if_conflict_with_new_entries( CuTest * tc) { msg_appendentries_t ae; msg_appendentries_response_t aer; raft_entry_t *ety_appended; void *r = raft_new(); raft_add_node(r, (void*)1, 1); raft_add_node(r, (void*)2, 0); raft_set_current_term(r, 1); raft_entry_t ety; /* increase log size */ char *str1 = "111"; ety.data.buf = str1; ety.data.len = 3; ety.id = 1; ety.term = 1; raft_append_entry(r, &ety); CuAssertTrue(tc, 1 == raft_get_log_count(r)); /* this log will be overwritten by the appendentries below */ char *str2 = "222"; ety.data.buf = str2; ety.data.len = 3; ety.id = 2; ety.term = 1; raft_append_entry(r, &ety); CuAssertTrue(tc, 2 == raft_get_log_count(r)); CuAssertTrue(tc, NULL != (ety_appended = raft_get_entry_from_idx(r, 2))); CuAssertTrue(tc, !strncmp(ety_appended->data.buf, str2, 3)); /* pass a appendentry that is newer */ msg_entry_t mety; memset(&ae, 0, sizeof(msg_appendentries_t)); ae.term = 2; ae.prev_log_idx = 1; ae.prev_log_term = 1; /* include one entry */ memset(&mety, 0, sizeof(msg_entry_t)); char *str3 = "333"; mety.data.buf = str3; mety.data.len = 3; mety.id = 3; ae.entries = &mety; ae.n_entries = 1; raft_recv_appendentries(r, 1, &ae, &aer); CuAssertTrue(tc, 1 == aer.success); CuAssertTrue(tc, 2 == raft_get_log_count(r)); CuAssertTrue(tc, NULL != (ety_appended = raft_get_entry_from_idx(r, 1))); CuAssertTrue(tc, !strncmp(ety_appended->data.buf, str1, 3)); }
/* If commitidx > lastApplied: increment lastApplied, apply log[lastApplied] * to state machine (§5.3) */ void TestRaft_server_increment_lastApplied_when_lastApplied_lt_commitidx( CuTest* tc) { raft_entry_t ety; void *r = raft_new(); /* must be follower */ raft_set_state(r, RAFT_STATE_FOLLOWER); raft_set_current_term(r, 1); raft_set_commit_idx(r, 1); raft_set_last_applied_idx(r, 0); /* need at least one entry */ ety.term = 1; ety.id = 1; ety.data.buf = "aaa"; ety.data.len = 3; raft_append_entry(r, &ety); /* let time lapse */ raft_periodic(r, 1); CuAssertTrue(tc, 0 != raft_get_last_applied_idx(r)); CuAssertTrue(tc, 1 == raft_get_last_applied_idx(r)); }
void TestRaft_server_idx_starts_at_1(CuTest * tc) { void *r = raft_new(); CuAssertTrue(tc, 0 == raft_get_current_idx(r)); raft_entry_t ety; ety.data.buf = "aaa"; ety.data.len = 3; ety.id = 1; ety.term = 1; raft_append_entry(r, &ety); CuAssertTrue(tc, 1 == raft_get_current_idx(r)); }
int raft_recv_entry(raft_server_t* me_, msg_entry_t* e, msg_entry_response_t *r) { raft_server_private_t* me = (raft_server_private_t*)me_; int i; /* Only one voting cfg change at a time */ if (raft_entry_is_voting_cfg_change(e)) if (-1 != me->voting_cfg_change_log_idx) return -1; if (!raft_is_leader(me_)) return -1; __log(me_, NULL, "received entry t:%d id: %d idx: %d", me->current_term, e->id, raft_get_current_idx(me_) + 1); raft_entry_t ety; ety.term = me->current_term; ety.id = e->id; ety.type = e->type; memcpy(&ety.data, &e->data, sizeof(raft_entry_data_t)); raft_append_entry(me_, &ety); for (i = 0; i < me->num_nodes; i++) { if (me->node == me->nodes[i] || !me->nodes[i] || !raft_node_is_voting(me->nodes[i])) continue; /* Only send new entries. * Don't send the entry to peers who are behind, to prevent them from * becoming congested. */ int next_idx = raft_node_get_next_idx(me->nodes[i]); if (next_idx == raft_get_current_idx(me_)) raft_send_appendentries(me_, me->nodes[i]); } /* if we're the only node, we can consider the entry committed */ if (1 == me->num_nodes) me->commit_idx = raft_get_current_idx(me_); r->id = e->id; r->idx = raft_get_current_idx(me_); r->term = me->current_term; if (raft_entry_is_voting_cfg_change(e)) me->voting_cfg_change_log_idx = raft_get_current_idx(me_); return 0; }
void TestRaft_server_entry_append_cant_append_if_id_is_zero(CuTest* tc) { raft_entry_t ety; char *str = "aaa"; ety.data.buf = str; ety.data.len = 3; ety.id = 0; ety.term = 1; void *r = raft_new(); CuAssertTrue(tc, 0 == raft_get_current_idx(r)); raft_append_entry(r, &ety); CuAssertTrue(tc, 0 == raft_get_current_idx(r)); }
/* TODO: no support for duplicate detection yet */ void T_estRaft_server_append_entry_not_sucessful_if_entry_with_id_already_appended( CuTest* tc) { void *r; raft_entry_t ety; char *str = "aaa"; ety.data.buf = str; ety.data.len = 3; ety.id = 1; ety.term = 1; r = raft_new(); CuAssertTrue(tc, 1 == raft_get_current_idx(r)); raft_append_entry(r, &ety); raft_append_entry(r, &ety); CuAssertTrue(tc, 2 == raft_get_current_idx(r)); /* different ID so we can be successful */ ety.id = 2; raft_append_entry(r, &ety); CuAssertTrue(tc, 3 == raft_get_current_idx(r)); }
void TestRaft_server_append_entry_means_entry_gets_current_term(CuTest* tc) { raft_entry_t ety; char *str = "aaa"; ety.data.buf = str; ety.data.len = 3; ety.id = 1; ety.term = 1; void *r = raft_new(); CuAssertTrue(tc, 0 == raft_get_current_idx(r)); raft_append_entry(r, &ety); CuAssertTrue(tc, 1 == raft_get_current_idx(r)); }
void TestRaft_server_apply_entry_increments_last_applied_idx(CuTest* tc) { raft_entry_t ety; char *str = "aaa"; ety.term = 1; void *r = raft_new(); raft_set_commit_idx(r, 1); raft_set_last_applied_idx(r, 0); ety.id = 1; ety.data.buf = str; ety.data.len = 3; raft_append_entry(r, &ety); raft_apply_entry(r); CuAssertTrue(tc, 1 == raft_get_last_applied_idx(r)); }
int raft_recv_entry(raft_server_t* me_, int node, msg_entry_t* e) { raft_server_private_t* me = (void*)me_; raft_entry_t ety; int res, i; __log(me_, NULL, "received entry from: %d", node); ety.term = me->current_term; ety.id = e->id; ety.data = e->data; ety.len = e->len; res = raft_append_entry(me_, &ety); raft_send_entry_response(me_, node, e->id, res); for (i=0; i<me->num_nodes; i++) { if (me->nodeid == i) continue; raft_send_appendentries(me_,i); } return 0; }
int raft_recv_entry(raft_server_t* me_, int node, msg_entry_t* e, msg_entry_response_t *r) { raft_server_private_t* me = (raft_server_private_t*)me_; raft_entry_t ety; int res, i; __log(me_, "received entry from: %d", node); ety.term = me->current_term; ety.id = e->id; ety.data = e->data; ety.len = e->len; res = raft_append_entry(me_, &ety); for (i = 0; i < me->num_nodes; i++) if (me->nodeid != i) raft_send_appendentries(me_, i); r->id = e->id; r->was_committed = (0 == res); return 0; }
void TestRaft_server_wont_apply_entry_if_we_dont_have_entry_to_apply(CuTest* tc) { raft_entry_t ety; char *str = "aaa"; void *r = raft_new(); raft_set_commit_idx(r, 0); raft_set_last_applied_idx(r, 0); raft_apply_entry(r); CuAssertTrue(tc, 0 == raft_get_last_applied_idx(r)); CuAssertTrue(tc, 0 == raft_get_commit_idx(r)); ety.term = 1; ety.id = 1; ety.data.buf = str; ety.data.len = 3; raft_append_entry(r, &ety); raft_apply_entry(r); CuAssertTrue(tc, 1 == raft_get_last_applied_idx(r)); CuAssertTrue(tc, 1 == raft_get_commit_idx(r)); }
int raft_recv_appendentries( raft_server_t* me_, const int node, msg_appendentries_t* ae) { int i; raft_server_private_t* me = (void*)me_; msg_appendentries_response_t r; me->timeout_elapsed = 0; __log(me_, NULL, "received appendentries from: %d", node); r.term = me->current_term; /* we've found a leader who is legitimate */ if (raft_is_leader(me_) && me->current_term <= ae->term) raft_become_follower(me_); /* 1. Reply false if term < currentTerm (�1) */ if (ae->term < me->current_term) { __log(me_, NULL, "AE term is less than current term"); r.success = 0; goto done; } #if 0 if (-1 != ae->prev_log_idx && ae->prev_log_idx < raft_get_current_idx(me_)) { __log(me_, NULL, "AE prev_idx is less than current idx"); r.success = 0; goto done; } #endif /* not the first appendentries we've received */ if (0 != ae->prev_log_idx) { raft_entry_t* e; if ((e = raft_get_entry_from_idx(me_, ae->prev_log_idx))) { /* 2. Reply false if log doesn抰 contain an entry at prevLogIndex whose term matches prevLogTerm (�3) */ if (e->term != ae->prev_log_term) { __log(me_, NULL, "AE term doesn't match prev_idx"); r.success = 0; goto done; } /* 3. If an existing entry conflicts with a new one (same index but different terms), delete the existing entry and all that follow it (�3) */ raft_entry_t* e2; if ((e2 = raft_get_entry_from_idx(me_, ae->prev_log_idx+1))) { log_delete(me->log, ae->prev_log_idx+1); } } else { __log(me_, NULL, "AE no log at prev_idx"); r.success = 0; goto done; //assert(0); } } /* 5. If leaderCommit > commitIndex, set commitIndex = min(leaderCommit, last log index) */ if (raft_get_commit_idx(me_) < ae->leader_commit) { raft_entry_t* e; if ((e = log_peektail(me->log))) { raft_set_commit_idx(me_, e->id < ae->leader_commit ? e->id : ae->leader_commit); while (1 == raft_apply_entry(me_)); } } if (raft_is_candidate(me_)) raft_become_follower(me_); raft_set_current_term(me_, ae->term); /* append all entries to log */ for (i=0; i<ae->n_entries; i++) { msg_entry_t* cmd; raft_entry_t* c; cmd = &ae->entries[i]; /* TODO: replace malloc with mempoll/arena */ c = malloc(sizeof(raft_entry_t)); c->term = me->current_term; c->len = cmd->len; c->id = cmd->id; c->data = malloc(cmd->len); memcpy(c->data, cmd->data, cmd->len); if (0 == raft_append_entry(me_, c)) { __log(me_, NULL, "AE failure; couldn't append entry"); r.success = 0; goto done; } } r.success = 1; r.current_idx = raft_get_current_idx(me_); r.first_idx = ae->prev_log_idx + 1; done: if (me->cb.send) me->cb.send(me->cb_ctx, me, node, RAFT_MSG_APPENDENTRIES_RESPONSE, (void*)&r, sizeof(msg_appendentries_response_t)); return 1; }
int raft_recv_appendentries( raft_server_t* me_, const int node, msg_appendentries_t* ae, msg_appendentries_response_t *r ) { raft_server_private_t* me = (raft_server_private_t*)me_; me->timeout_elapsed = 0; __log(me_, "received appendentries from: %d", node); r->term = me->current_term; /* we've found a leader who is legitimate */ if (raft_is_leader(me_) && me->current_term <= ae->term) raft_become_follower(me_); /* 1. Reply false if term < currentTerm (§5.1) */ if (ae->term < me->current_term) { __log(me_, "AE term is less than current term"); r->success = 0; return 0; } #if 0 if (-1 != ae->prev_log_idx && ae->prev_log_idx < raft_get_current_idx(me_)) { __log(me_, "AE prev_idx is less than current idx"); r->success = 0; return 0; } #endif /* not the first appendentries we've received */ if (0 != ae->prev_log_idx) { raft_entry_t* e = raft_get_entry_from_idx(me_, ae->prev_log_idx); if (e) { /* 2. Reply false if log doesn't contain an entry at prevLogIndex whose term matches prevLogTerm (§5.3) */ if (e->term != ae->prev_log_term) { __log(me_, "AE term doesn't match prev_idx"); r->success = 0; return 0; } /* 3. If an existing entry conflicts with a new one (same index but different terms), delete the existing entry and all that follow it (§5.3) */ raft_entry_t* e2; e2 = raft_get_entry_from_idx(me_, ae->prev_log_idx + 1); if (e2) log_delete(me->log, ae->prev_log_idx + 1); } else { __log(me_, "AE no log at prev_idx"); r->success = 0; return 0; } } /* 5. If leaderCommit > commitIndex, set commitIndex = min(leaderCommit, last log index) */ if (raft_get_commit_idx(me_) < ae->leader_commit) { raft_entry_t* e = log_peektail(me->log); if (e) { int id = e->id < ae->leader_commit ? e->id : ae->leader_commit; raft_set_commit_idx(me_, id); while (0 == raft_apply_entry(me_)) ; } } if (raft_is_candidate(me_)) raft_become_follower(me_); raft_set_current_term(me_, ae->term); int i; /* append all entries to log */ for (i = 0; i < ae->n_entries; i++) { msg_entry_t* cmd = &ae->entries[i]; /* TODO: replace malloc with mempoll/arena */ raft_entry_t* c = (raft_entry_t*)malloc(sizeof(raft_entry_t)); c->term = me->current_term; c->len = cmd->len; c->id = cmd->id; c->data = (unsigned char*)malloc(cmd->len); memcpy(c->data, cmd->data, cmd->len); if (-1 == raft_append_entry(me_, c)) { __log(me_, "AE failure; couldn't append entry"); r->success = 0; return -1; } } r->success = 1; r->current_idx = raft_get_current_idx(me_); r->first_idx = ae->prev_log_idx + 1; return 0; }
int raft_recv_appendentries( raft_server_t* me_, raft_node_t* node, msg_appendentries_t* ae, msg_appendentries_response_t *r ) { raft_server_private_t* me = (raft_server_private_t*)me_; me->timeout_elapsed = 0; if (0 < ae->n_entries) __log(me_, node, "recvd appendentries from: %lx, t:%d ci:%d lc:%d pli:%d plt:%d #%d", node, ae->term, raft_get_current_idx(me_), ae->leader_commit, ae->prev_log_idx, ae->prev_log_term, ae->n_entries); r->term = me->current_term; if (raft_is_candidate(me_) && me->current_term == ae->term) { me->voted_for = -1; raft_become_follower(me_); } else if (me->current_term < ae->term) { raft_set_current_term(me_, ae->term); r->term = ae->term; raft_become_follower(me_); } else if (ae->term < me->current_term) { /* 1. Reply false if term < currentTerm (§5.1) */ __log(me_, node, "AE term %d is less than current term %d", ae->term, me->current_term); goto fail_with_current_idx; } /* Not the first appendentries we've received */ /* NOTE: the log starts at 1 */ if (0 < ae->prev_log_idx) { raft_entry_t* e = raft_get_entry_from_idx(me_, ae->prev_log_idx); if (!e) { __log(me_, node, "AE no log at prev_idx %d", ae->prev_log_idx); goto fail_with_current_idx; } /* 2. Reply false if log doesn't contain an entry at prevLogIndex whose term matches prevLogTerm (§5.3) */ if (raft_get_current_idx(me_) < ae->prev_log_idx) goto fail_with_current_idx; if (e->term != ae->prev_log_term) { __log(me_, node, "AE term doesn't match prev_term (ie. %d vs %d) ci:%d pli:%d", e->term, ae->prev_log_term, raft_get_current_idx(me_), ae->prev_log_idx); assert(me->commit_idx < ae->prev_log_idx); /* Delete all the following log entries because they don't match */ log_delete(me->log, ae->prev_log_idx); r->current_idx = ae->prev_log_idx - 1; goto fail; } } /* 3. If an existing entry conflicts with a new one (same index but different terms), delete the existing entry and all that follow it (§5.3) */ if (ae->n_entries == 0 && 0 < ae->prev_log_idx && ae->prev_log_idx + 1 < raft_get_current_idx(me_)) { assert(me->commit_idx < ae->prev_log_idx + 1); log_delete(me->log, ae->prev_log_idx + 1); } r->current_idx = ae->prev_log_idx; int i; for (i = 0; i < ae->n_entries; i++) { msg_entry_t* ety = &ae->entries[i]; int ety_index = ae->prev_log_idx + 1 + i; raft_entry_t* existing_ety = raft_get_entry_from_idx(me_, ety_index); r->current_idx = ety_index; if (existing_ety && existing_ety->term != ety->term) { assert(me->commit_idx < ety_index); log_delete(me->log, ety_index); break; } else if (!existing_ety) break; } /* Pick up remainder in case of mismatch or missing entry */ for (; i < ae->n_entries; i++) { int e = raft_append_entry(me_, &ae->entries[i]); if (-1 == e) goto fail_with_current_idx; r->current_idx = ae->prev_log_idx + 1 + i; } /* 4. If leaderCommit > commitIndex, set commitIndex = min(leaderCommit, index of most recent entry) */ if (raft_get_commit_idx(me_) < ae->leader_commit) { int last_log_idx = max(raft_get_current_idx(me_), 1); raft_set_commit_idx(me_, min(last_log_idx, ae->leader_commit)); } /* update current leader because we accepted appendentries from it */ me->current_leader = node; r->success = 1; r->first_idx = ae->prev_log_idx + 1; return 0; fail_with_current_idx: r->current_idx = raft_get_current_idx(me_); fail: r->success = 0; r->first_idx = 0; return -1; }