void TestRaft_server_cant_get_node_we_dont_have(CuTest * tc) { void *r = raft_new(); raft_add_node(r, (void*)1, 1); raft_add_node(r, (void*)2, 0); CuAssertTrue(tc, NULL != raft_get_node(r, 0)); CuAssertTrue(tc, NULL != raft_get_node(r, 1)); CuAssertTrue(tc, NULL == raft_get_node(r, 2)); }
/** Raft callback for sending request vote message */ static int __raft_send_requestvote( raft_server_t* raft, void *user_data, int nodeidx, msg_requestvote_t* m ) { raft_node_t* node = raft_get_node(raft, nodeidx); peer_connection_t* conn = raft_node_get_udata(node); int e = __connect_if_needed(conn); if (-1 == e) return 0; uv_buf_t bufs[1]; char buf[RAFT_BUFLEN]; msg_t msg; msg.type = MSG_REQUESTVOTE, msg.rv = *m; __peer_msg_serialize(tpl_map("S(I$(IIII))", &msg), bufs, buf); conn->wreq.data = conn; e = uv_write(&conn->wreq, conn->stream, bufs, 1, __peer_write_cb); if (-1 == e) uv_fatal(e); return 0; }
/** Raft callback for sending appendentries message */ static int __raft_send_appendentries( raft_server_t* raft, void *user_data, int nodeidx, msg_appendentries_t* m ) { uv_buf_t bufs[3]; raft_node_t* node = raft_get_node(raft, nodeidx); peer_connection_t* conn = raft_node_get_udata(node); int e = __connect_if_needed(conn); if (-1 == e) return 0; char buf[RAFT_BUFLEN], *ptr = buf; msg_t msg; memset(&msg, 0, sizeof(msg)); msg.type = MSG_APPENDENTRIES; msg.ae.term = m->term; msg.ae.prev_log_idx = m->prev_log_idx; msg.ae.prev_log_term = m->prev_log_term; msg.ae.leader_commit = m->leader_commit; msg.ae.n_entries = m->n_entries; ptr += __peer_msg_serialize(tpl_map("S(I$(IIIII))", &msg), bufs, ptr); /* appendentries with payload */ if (0 < m->n_entries) { tpl_bin tb = { .sz = m->entries[0].data.len, .addr = m->entries[0].data.buf }; /* list of entries */ tpl_node *tn = tpl_map("IIB", &m->entries[0].id, &m->entries[0].term, &tb); size_t sz; tpl_pack(tn, 0); tpl_dump(tn, TPL_GETSIZE, &sz); e = tpl_dump(tn, TPL_MEM | TPL_PREALLOCD, ptr, RAFT_BUFLEN); assert(0 == e); bufs[1].len = sz; bufs[1].base = ptr; e = uv_write(&conn->wreq, conn->stream, bufs, 2, __peer_write_cb); if (-1 == e) uv_fatal(e); tpl_free(tn); } else {
int raft_recv_appendentries_response(raft_server_t* me_, int node, msg_appendentries_response_t* r) { raft_server_private_t* me = (void*)me_; raft_node_t* p; __log(me_, NULL, "received appendentries response from: %d", node); p = raft_get_node(me_, node); if (1 == r->success) { int i; for (i=r->first_idx; i<=r->current_idx; i++) log_mark_node_has_committed(me->log, i); while (1) { raft_entry_t* e; e = log_get_from_idx(me->log, me->last_applied_idx + 1); /* majority has this */ if (e && me->num_nodes / 2 <= e->num_nodes) { if (0 == raft_apply_entry(me_)) break; } else { break; } } } else { /* If AppendEntries fails because of log inconsistency: decrement nextIndex and retry (�3) */ assert(0 <= raft_node_get_next_idx(p)); // TODO does this have test coverage? // TODO can jump back to where node is different instead of iterating raft_node_set_next_idx(p, raft_node_get_next_idx(p)-1); raft_send_appendentries(me_, node); } return 1; }
void raft_become_leader(raft_server_t* me_) { raft_server_private_t* me = (void*)me_; int i; __log(me_, NULL, "becoming leader"); raft_set_state(me_,RAFT_STATE_LEADER); me->voted_for = -1; for (i=0; i<me->num_nodes; i++) { if (me->nodeid == i) continue; raft_node_t* p = raft_get_node(me_, i); raft_node_set_next_idx(p, raft_get_current_idx(me_)+1); raft_send_appendentries(me_, i); } }
void raft_send_appendentries(raft_server_t* me_, int node) { raft_server_private_t* me = (raft_server_private_t*)me_; __log(me_, "sending appendentries to: %d", node); if (!(me->cb.send_appendentries)) return; raft_node_t* p = raft_get_node(me_, node); msg_appendentries_t ae; ae.term = me->current_term; ae.leader_id = me->nodeid; ae.prev_log_term = raft_node_get_next_idx(p); // TODO: ae.prev_log_idx = 0; ae.n_entries = 0; me->cb.send_appendentries(me_, me->udata, node, &ae); }
void raft_send_appendentries(raft_server_t* me_, int node) { msg_appendentries_t ae; raft_server_private_t* me = (void*)me_; __log(me_, NULL, "sending appendentries to: %d", node); if (!(me->cb.send)) return; raft_node_t* p = raft_get_node(me_, node); ae.term = me->current_term; ae.leader_id = me->nodeid; ae.prev_log_term = raft_node_get_next_idx(p); // TODO: ae.prev_log_idx = 0; ae.n_entries = 0; me->cb.send(me->cb_ctx, me, node, RAFT_MSG_APPENDENTRIES, (void*)&ae, sizeof(msg_appendentries_t)); }
raft_node_t* raft_add_node(raft_server_t* me_, void* udata, int id, int is_self) { raft_server_private_t* me = (raft_server_private_t*)me_; /* set to voting if node already exists */ raft_node_t* node = raft_get_node(me_, id); if (node) { raft_node_set_voting(node, 1); return node; } me->num_nodes++; me->nodes = (raft_node_t*)realloc(me->nodes, sizeof(raft_node_t*) * me->num_nodes); me->nodes[me->num_nodes - 1] = raft_node_new(udata, id); assert(me->nodes[me->num_nodes - 1]); if (is_self) me->node = me->nodes[me->num_nodes - 1]; return me->nodes[me->num_nodes - 1]; }
/** HTTP POST entry point for receiving entries from client * Provide the user with an ID */ static int __http_get_id(h2o_handler_t *self, h2o_req_t *req) { static h2o_generator_t generator = { NULL, NULL }; if (!h2o_memis(req->method.base, req->method.len, H2O_STRLIT("POST"))) return -1; /* redirect to leader if needed */ int leader = raft_get_current_leader(sv->raft); if (-1 == leader) { return h2oh_respond_with_error(req, 503, "Leader unavailable"); } else if (leader != sv->node_idx) { raft_node_t* node = raft_get_node(sv->raft, leader); peer_connection_t* leader_conn = raft_node_get_udata(node); char leader_url[LEADER_URL_LEN]; static h2o_generator_t generator = { NULL, NULL }; static h2o_iovec_t body = { .base = "", .len = 0 }; req->res.status = 301; req->res.reason = "Moved Permanently"; h2o_start_response(req, &generator); snprintf(leader_url, LEADER_URL_LEN, "http://%s:%d/", inet_ntoa(leader_conn->addr.sin_addr), leader_conn->http_port); h2o_add_header(&req->pool, &req->res.headers, H2O_TOKEN_LOCATION, leader_url, strlen(leader_url)); h2o_send(req, &body, 1, 1); return 0; } int e; unsigned int ticket = __generate_ticket(); msg_entry_t entry; entry.id = rand(); entry.data.buf = (void*)&ticket; entry.data.len = sizeof(ticket); uv_mutex_lock(&sv->raft_lock); msg_entry_response_t r; e = raft_recv_entry(sv->raft, sv->node_idx, &entry, &r); if (0 != e) return h2oh_respond_with_error(req, 500, "BAD"); /* block until the entry is committed */ int done = 0; do { uv_cond_wait(&sv->appendentries_received, &sv->raft_lock); e = raft_msg_entry_response_committed(sv->raft, &r); switch (e) { case 0: /* not committed yet */ break; case 1: done = 1; uv_mutex_unlock(&sv->raft_lock); break; case -1: uv_mutex_unlock(&sv->raft_lock); return h2oh_respond_with_error(req, 400, "TRY AGAIN"); } } while (!done); /* serialize ID */ char id_str[100]; h2o_iovec_t body; sprintf(id_str, "%d", entry.id); body = h2o_iovec_init(id_str, strlen(id_str)); req->res.status = 200; req->res.reason = "OK"; h2o_start_response(req, &generator); h2o_send(req, &body, 1, 1); return 0; }