Esempio n. 1
0
int raft_recv_requestvote_response(raft_server_t* me_, int node,
        msg_requestvote_response_t* r)
{
    raft_server_private_t* me = (void*)me_;

    __log(me_, NULL, "node responded to requestvote: %d status: %s",
            node, r->vote_granted == 1 ? "granted" : "not granted");

    if (raft_is_leader(me_))
        return 0;

    assert(node < me->num_nodes);

//    if (r->term != raft_get_current_term(me_))
//        return 0;

    if (1 == r->vote_granted)
    {
        int votes;

        me->votes_for_me[node] = 1;
        votes = raft_get_nvotes_for_me(me_);
        if (raft_votes_is_majority(me->num_nodes, votes))
            raft_become_leader(me_);
    }

    return 0;
}
Esempio n. 2
0
static void drop_bads(void)
{
	int i;
	for (i = 0; i < MAX_CLIENTS; i++)
	{
		Client *c = server.clients + i;
		if (c->sock < 0) continue;
		if (!c->good || !raft_is_leader(raft)) remove_client(c);
	}
}
Esempio n. 3
0
int raft_recv_entry(raft_server_t* me_,
                    msg_entry_t* e,
                    msg_entry_response_t *r)
{
    raft_server_private_t* me = (raft_server_private_t*)me_;
    int i;

    /* Only one voting cfg change at a time */
    if (raft_entry_is_voting_cfg_change(e))
        if (-1 != me->voting_cfg_change_log_idx)
            return -1;

    if (!raft_is_leader(me_))
        return -1;

    __log(me_, NULL, "received entry t:%d id: %d idx: %d",
          me->current_term, e->id, raft_get_current_idx(me_) + 1);

    raft_entry_t ety;
    ety.term = me->current_term;
    ety.id = e->id;
    ety.type = e->type;
    memcpy(&ety.data, &e->data, sizeof(raft_entry_data_t));
    raft_append_entry(me_, &ety);
    for (i = 0; i < me->num_nodes; i++)
    {
        if (me->node == me->nodes[i] || !me->nodes[i] ||
            !raft_node_is_voting(me->nodes[i]))
            continue;

        /* Only send new entries.
         * Don't send the entry to peers who are behind, to prevent them from
         * becoming congested. */
        int next_idx = raft_node_get_next_idx(me->nodes[i]);
        if (next_idx == raft_get_current_idx(me_))
            raft_send_appendentries(me_, me->nodes[i]);
    }

    /* if we're the only node, we can consider the entry committed */
    if (1 == me->num_nodes)
        me->commit_idx = raft_get_current_idx(me_);

    r->id = e->id;
    r->idx = raft_get_current_idx(me_);
    r->term = me->current_term;

    if (raft_entry_is_voting_cfg_change(e))
        me->voting_cfg_change_log_idx = raft_get_current_idx(me_);

    return 0;
}
Esempio n. 4
0
static bool accept_client(void)
{
	int fd;

	fprintf(stderr, "a new connection is queued\n");

	fd = accept(server.listener, NULL, NULL);
	if (fd == -1) {
		fprintf(stderr, "failed to accept a connection: %s\n", strerror(errno));
		return false;
	}
	fprintf(stderr, "a new connection fd=%d accepted\n", fd);
	
	if (!raft_is_leader(raft)) {
		fprintf(stderr, "not a leader, disconnecting the accepted connection fd=%d\n", fd);
		close(fd);
		return false;
	}

	return add_client(fd);
}
Esempio n. 5
0
int raft_recv_requestvote(raft_server_t* me_,
                          raft_node_t* node,
                          msg_requestvote_t* vr,
                          msg_requestvote_response_t *r)
{
    raft_server_private_t* me = (raft_server_private_t*)me_;

    if (raft_get_current_term(me_) < vr->term)
    {
        raft_set_current_term(me_, vr->term);
        raft_become_follower(me_);
    }

    if (__should_grant_vote(me, vr))
    {
        /* It shouldn't be possible for a leader or candidate to grant a vote
         * Both states would have voted for themselves */
        assert(!(raft_is_leader(me_) || raft_is_candidate(me_)));

        raft_vote_for_nodeid(me_, vr->candidate_id);
        r->vote_granted = 1;

        /* there must be in an election. */
        me->current_leader = NULL;

        me->timeout_elapsed = 0;
    }
    else
        r->vote_granted = 0;

    __log(me_, node, "node requested vote: %d replying: %s",
          node, r->vote_granted == 1 ? "granted" : "not granted");

    r->term = raft_get_current_term(me_);
    return 0;
}
Esempio n. 6
0
int raft_recv_appendentries(
        raft_server_t* me_,
        const int node,
        msg_appendentries_t* ae)
{
    int i;
    raft_server_private_t* me = (void*)me_;
    msg_appendentries_response_t r;

    me->timeout_elapsed = 0;

    __log(me_, NULL, "received appendentries from: %d", node);

    r.term = me->current_term;

    /* we've found a leader who is legitimate */
    if (raft_is_leader(me_) && me->current_term <= ae->term)
        raft_become_follower(me_);

    /* 1. Reply false if term < currentTerm (�1) */
    if (ae->term < me->current_term)
    {
        __log(me_, NULL, "AE term is less than current term");
        r.success = 0;
        goto done;
    }

#if 0
    if (-1 != ae->prev_log_idx &&
         ae->prev_log_idx < raft_get_current_idx(me_))
    {
        __log(me_, NULL, "AE prev_idx is less than current idx");
        r.success = 0;
        goto done;
    }
#endif

    /* not the first appendentries we've received */
    if (0 != ae->prev_log_idx)
    {
        raft_entry_t* e;

        if ((e = raft_get_entry_from_idx(me_, ae->prev_log_idx)))
        {
            /* 2. Reply false if log doesn抰 contain an entry at prevLogIndex
               whose term matches prevLogTerm (�3) */
            if (e->term != ae->prev_log_term)
            {
                __log(me_, NULL, "AE term doesn't match prev_idx");
                r.success = 0;
                goto done;
            }

            /* 3. If an existing entry conflicts with a new one (same index
            but different terms), delete the existing entry and all that
            follow it (�3) */
            raft_entry_t* e2;
            if ((e2 = raft_get_entry_from_idx(me_, ae->prev_log_idx+1)))
            {
                log_delete(me->log, ae->prev_log_idx+1);
            }
        }
        else
        {
            __log(me_, NULL, "AE no log at prev_idx");
            r.success = 0;
            goto done;
            //assert(0);
        }
    }

    /* 5. If leaderCommit > commitIndex, set commitIndex =
        min(leaderCommit, last log index) */
    if (raft_get_commit_idx(me_) < ae->leader_commit)
    {
        raft_entry_t* e;

        if ((e = log_peektail(me->log)))
        {
            raft_set_commit_idx(me_, e->id < ae->leader_commit ?
                    e->id : ae->leader_commit);
            while (1 == raft_apply_entry(me_));
        }
    }

    if (raft_is_candidate(me_))
        raft_become_follower(me_);

    raft_set_current_term(me_, ae->term);

    
    
    /* append all entries to log */
    for (i=0; i<ae->n_entries; i++)
    {
        msg_entry_t* cmd;
        raft_entry_t* c;

        cmd = &ae->entries[i];

        /* TODO: replace malloc with mempoll/arena */
        c = malloc(sizeof(raft_entry_t));
        c->term = me->current_term;
        c->len = cmd->len;
        c->id = cmd->id;
        c->data = malloc(cmd->len);
        memcpy(c->data, cmd->data, cmd->len);
        if (0 == raft_append_entry(me_, c))
        {
            __log(me_, NULL, "AE failure; couldn't append entry");
            r.success = 0;
            goto done;
        }
    }

    r.success = 1;
    r.current_idx = raft_get_current_idx(me_);
    r.first_idx = ae->prev_log_idx + 1;

done:
    if (me->cb.send)
        me->cb.send(me->cb_ctx, me, node, RAFT_MSG_APPENDENTRIES_RESPONSE,
                (void*)&r, sizeof(msg_appendentries_response_t));
    return 1;
}
Esempio n. 7
0
int raft_recv_appendentries(
    raft_server_t* me_,
    const int node,
    msg_appendentries_t* ae,
    msg_appendentries_response_t *r
    )
{
    raft_server_private_t* me = (raft_server_private_t*)me_;

    me->timeout_elapsed = 0;

    __log(me_, "received appendentries from: %d", node);

    r->term = me->current_term;

    /* we've found a leader who is legitimate */
    if (raft_is_leader(me_) && me->current_term <= ae->term)
        raft_become_follower(me_);

    /* 1. Reply false if term < currentTerm (§5.1) */
    if (ae->term < me->current_term)
    {
        __log(me_, "AE term is less than current term");
        r->success = 0;
        return 0;
    }

#if 0
    if (-1 != ae->prev_log_idx &&
        ae->prev_log_idx < raft_get_current_idx(me_))
    {
        __log(me_, "AE prev_idx is less than current idx");
        r->success = 0;
        return 0;
    }
#endif

    /* not the first appendentries we've received */
    if (0 != ae->prev_log_idx)
    {
        raft_entry_t* e = raft_get_entry_from_idx(me_, ae->prev_log_idx);

        if (e)
        {
            /* 2. Reply false if log doesn't contain an entry at prevLogIndex
               whose term matches prevLogTerm (§5.3) */
            if (e->term != ae->prev_log_term)
            {
                __log(me_, "AE term doesn't match prev_idx");
                r->success = 0;
                return 0;
            }

            /* 3. If an existing entry conflicts with a new one (same index
               but different terms), delete the existing entry and all that
               follow it (§5.3) */
            raft_entry_t* e2;

            e2 = raft_get_entry_from_idx(me_, ae->prev_log_idx + 1);

            if (e2)
                log_delete(me->log, ae->prev_log_idx + 1);
        }
        else
        {
            __log(me_, "AE no log at prev_idx");
            r->success = 0;
            return 0;
        }
    }

    /* 5. If leaderCommit > commitIndex, set commitIndex =
        min(leaderCommit, last log index) */
    if (raft_get_commit_idx(me_) < ae->leader_commit)
    {
        raft_entry_t* e = log_peektail(me->log);
        if (e)
        {
            int id = e->id < ae->leader_commit ?  e->id : ae->leader_commit;
            raft_set_commit_idx(me_, id);
            while (0 == raft_apply_entry(me_))
                ;
        }
    }

    if (raft_is_candidate(me_))
        raft_become_follower(me_);

    raft_set_current_term(me_, ae->term);

    int i;

    /* append all entries to log */
    for (i = 0; i < ae->n_entries; i++)
    {
        msg_entry_t* cmd = &ae->entries[i];

        /* TODO: replace malloc with mempoll/arena */
        raft_entry_t* c = (raft_entry_t*)malloc(sizeof(raft_entry_t));
        c->term = me->current_term;
        c->len = cmd->len;
        c->id = cmd->id;
        c->data = (unsigned char*)malloc(cmd->len);
        memcpy(c->data, cmd->data, cmd->len);
        if (-1 == raft_append_entry(me_, c))
        {
            __log(me_, "AE failure; couldn't append entry");
            r->success = 0;
            return -1;
        }
    }

    r->success = 1;
    r->current_idx = raft_get_current_idx(me_);
    r->first_idx = ae->prev_log_idx + 1;
    return 0;
}
Esempio n. 8
0
int raft_recv_appendentries_response(raft_server_t* me_,
                                     raft_node_t* node,
                                     msg_appendentries_response_t* r)
{
    raft_server_private_t* me = (raft_server_private_t*)me_;

    __log(me_, node,
          "received appendentries response %s ci:%d rci:%d 1stidx:%d",
          r->success == 1 ? "SUCCESS" : "fail",
          raft_get_current_idx(me_),
          r->current_idx,
          r->first_idx);

    /* Stale response -- ignore */
    if (r->current_idx != 0 && r->current_idx <= raft_node_get_match_idx(node))
        return 0;

    if (!raft_is_leader(me_))
        return -1;

    /* If response contains term T > currentTerm: set currentTerm = T
       and convert to follower (§5.3) */
    if (me->current_term < r->term)
    {
        raft_set_current_term(me_, r->term);
        raft_become_follower(me_);
        return 0;
    }
    else if (me->current_term != r->term)
        return 0;

    /* stop processing, this is a node we don't have in our configuration */
    if (!node)
        return 0;

    if (0 == r->success)
    {
        /* If AppendEntries fails because of log inconsistency:
           decrement nextIndex and retry (§5.3) */
        assert(0 <= raft_node_get_next_idx(node));

        int next_idx = raft_node_get_next_idx(node);
        assert(0 <= next_idx);
        if (r->current_idx < next_idx - 1)
            raft_node_set_next_idx(node, min(r->current_idx + 1, raft_get_current_idx(me_)));
        else
            raft_node_set_next_idx(node, next_idx - 1);

        /* retry */
        raft_send_appendentries(me_, node);
        return 0;
    }

    assert(r->current_idx <= raft_get_current_idx(me_));

    raft_node_set_next_idx(node, r->current_idx + 1);
    raft_node_set_match_idx(node, r->current_idx);

    if (!raft_node_is_voting(node) &&
        -1 == me->voting_cfg_change_log_idx &&
        raft_get_current_idx(me_) <= r->current_idx + 1 &&
        me->cb.node_has_sufficient_logs &&
        0 == raft_node_has_sufficient_logs(node)
        )
    {
        raft_node_set_has_sufficient_logs(node);
        me->cb.node_has_sufficient_logs(me_, me->udata, node);
    }

    /* Update commit idx */
    int votes = 1; /* include me */
    int point = r->current_idx;
    int i;
    for (i = 0; i < me->num_nodes; i++)
    {
        if (me->node == me->nodes[i] || !raft_node_is_voting(me->nodes[i]))
            continue;

        int match_idx = raft_node_get_match_idx(me->nodes[i]);

        if (0 < match_idx)
        {
            raft_entry_t* ety = raft_get_entry_from_idx(me_, match_idx);
            if (ety->term == me->current_term && point <= match_idx)
                votes++;
        }
    }

    if (me->num_nodes / 2 < votes && raft_get_commit_idx(me_) < point)
        raft_set_commit_idx(me_, point);

    /* Aggressively send remaining entries */
    if (raft_get_entry_from_idx(me_, raft_node_get_next_idx(node)))
        raft_send_appendentries(me_, node);

    /* periodic applies committed entries lazily */

    return 0;
}