Exemplo n.º 1
0
void raft_send_appendentries_all(raft_server_t* me_)
{
    raft_server_private_t* me = (raft_server_private_t*)me_;
    int i;

    for (i = 0; i < me->num_nodes; i++)
        if (me->nodeid != i)
            raft_send_appendentries(me_, i);
}
Exemplo n.º 2
0
void raft_send_appendentries_all(raft_server_t* me_)
{
    raft_server_private_t* me = (raft_server_private_t*)me_;
    int i;

    me->timeout_elapsed = 0;
    for (i = 0; i < me->num_nodes; i++)
        if (me->node != me->nodes[i])
            raft_send_appendentries(me_, me->nodes[i]);
}
Exemplo n.º 3
0
void raft_send_appendentries_all(raft_server_t* me_)
{
    raft_server_private_t* me = (void*)me_;
    int i;

    for (i=0; i<me->num_nodes; i++)
    {
        if (me->nodeid == i) continue;
        raft_send_appendentries(me_, i);
    }
}
Exemplo n.º 4
0
int raft_recv_entry(raft_server_t* me_,
                    msg_entry_t* e,
                    msg_entry_response_t *r)
{
    raft_server_private_t* me = (raft_server_private_t*)me_;
    int i;

    /* Only one voting cfg change at a time */
    if (raft_entry_is_voting_cfg_change(e))
        if (-1 != me->voting_cfg_change_log_idx)
            return -1;

    if (!raft_is_leader(me_))
        return -1;

    __log(me_, NULL, "received entry t:%d id: %d idx: %d",
          me->current_term, e->id, raft_get_current_idx(me_) + 1);

    raft_entry_t ety;
    ety.term = me->current_term;
    ety.id = e->id;
    ety.type = e->type;
    memcpy(&ety.data, &e->data, sizeof(raft_entry_data_t));
    raft_append_entry(me_, &ety);
    for (i = 0; i < me->num_nodes; i++)
    {
        if (me->node == me->nodes[i] || !me->nodes[i] ||
            !raft_node_is_voting(me->nodes[i]))
            continue;

        /* Only send new entries.
         * Don't send the entry to peers who are behind, to prevent them from
         * becoming congested. */
        int next_idx = raft_node_get_next_idx(me->nodes[i]);
        if (next_idx == raft_get_current_idx(me_))
            raft_send_appendentries(me_, me->nodes[i]);
    }

    /* if we're the only node, we can consider the entry committed */
    if (1 == me->num_nodes)
        me->commit_idx = raft_get_current_idx(me_);

    r->id = e->id;
    r->idx = raft_get_current_idx(me_);
    r->term = me->current_term;

    if (raft_entry_is_voting_cfg_change(e))
        me->voting_cfg_change_log_idx = raft_get_current_idx(me_);

    return 0;
}
Exemplo n.º 5
0
int raft_recv_appendentries_response(raft_server_t* me_,
        int node, msg_appendentries_response_t* r)
{
    raft_server_private_t* me = (void*)me_;
    raft_node_t* p;

    __log(me_, NULL, "received appendentries response from: %d", node);

    p = raft_get_node(me_, node);

    if (1 == r->success)
    {
        int i;

        for (i=r->first_idx; i<=r->current_idx; i++)
            log_mark_node_has_committed(me->log, i);

        while (1)
        {
            raft_entry_t* e;

            e = log_get_from_idx(me->log, me->last_applied_idx + 1);

            /* majority has this */
            if (e && me->num_nodes / 2 <= e->num_nodes)
            {
                if (0 == raft_apply_entry(me_)) break;
            }
            else
            {
                break;
            }
        }
    }
    else
    {
        /* If AppendEntries fails because of log inconsistency:
           decrement nextIndex and retry (�3) */
        assert(0 <= raft_node_get_next_idx(p));
        // TODO does this have test coverage?
        // TODO can jump back to where node is different instead of iterating
        raft_node_set_next_idx(p, raft_node_get_next_idx(p)-1);
        raft_send_appendentries(me_, node);
    }

    return 1;
}
Exemplo n.º 6
0
void raft_become_leader(raft_server_t* me_)
{
    raft_server_private_t* me = (void*)me_;
    int i;

    __log(me_, NULL, "becoming leader");

    raft_set_state(me_,RAFT_STATE_LEADER);
    me->voted_for = -1;
    for (i=0; i<me->num_nodes; i++)
    {
        if (me->nodeid == i) continue;
        raft_node_t* p = raft_get_node(me_, i);
        raft_node_set_next_idx(p, raft_get_current_idx(me_)+1);
        raft_send_appendentries(me_, i);
    }
}
Exemplo n.º 7
0
void raft_become_leader(raft_server_t* me_)
{
    raft_server_private_t* me = (raft_server_private_t*)me_;
    int i;

    __log(me_, NULL, "becoming leader term:%d", raft_get_current_term(me_));

    raft_set_state(me_, RAFT_STATE_LEADER);
    for (i = 0; i < me->num_nodes; i++)
    {
        if (me->node == me->nodes[i] || !raft_node_is_voting(me->nodes[i]))
            continue;

        raft_node_t* node = me->nodes[i];
        raft_node_set_next_idx(node, raft_get_current_idx(me_) + 1);
        raft_node_set_match_idx(node, 0);
        raft_send_appendentries(me_, node);
    }
}
Exemplo n.º 8
0
int raft_recv_entry(raft_server_t* me_, int node, msg_entry_t* e)
{
    raft_server_private_t* me = (void*)me_;
    raft_entry_t ety;
    int res, i;

    __log(me_, NULL, "received entry from: %d", node);

    ety.term = me->current_term;
    ety.id = e->id;
    ety.data = e->data;
    ety.len = e->len;
    res = raft_append_entry(me_, &ety);
    raft_send_entry_response(me_, node, e->id, res);
    for (i=0; i<me->num_nodes; i++)
    {
        if (me->nodeid == i) continue;
        raft_send_appendentries(me_,i);
    }
    return 0;
}
Exemplo n.º 9
0
int raft_recv_entry(raft_server_t* me_, int node, msg_entry_t* e,
                    msg_entry_response_t *r)
{
    raft_server_private_t* me = (raft_server_private_t*)me_;
    raft_entry_t ety;
    int res, i;

    __log(me_, "received entry from: %d", node);

    ety.term = me->current_term;
    ety.id = e->id;
    ety.data = e->data;
    ety.len = e->len;
    res = raft_append_entry(me_, &ety);
    for (i = 0; i < me->num_nodes; i++)
        if (me->nodeid != i)
            raft_send_appendentries(me_, i);

    r->id = e->id;
    r->was_committed = (0 == res);
    return 0;
}
Exemplo n.º 10
0
int raft_recv_appendentries_response(raft_server_t* me_,
                                     raft_node_t* node,
                                     msg_appendentries_response_t* r)
{
    raft_server_private_t* me = (raft_server_private_t*)me_;

    __log(me_, node,
          "received appendentries response %s ci:%d rci:%d 1stidx:%d",
          r->success == 1 ? "SUCCESS" : "fail",
          raft_get_current_idx(me_),
          r->current_idx,
          r->first_idx);

    /* Stale response -- ignore */
    if (r->current_idx != 0 && r->current_idx <= raft_node_get_match_idx(node))
        return 0;

    if (!raft_is_leader(me_))
        return -1;

    /* If response contains term T > currentTerm: set currentTerm = T
       and convert to follower (§5.3) */
    if (me->current_term < r->term)
    {
        raft_set_current_term(me_, r->term);
        raft_become_follower(me_);
        return 0;
    }
    else if (me->current_term != r->term)
        return 0;

    /* stop processing, this is a node we don't have in our configuration */
    if (!node)
        return 0;

    if (0 == r->success)
    {
        /* If AppendEntries fails because of log inconsistency:
           decrement nextIndex and retry (§5.3) */
        assert(0 <= raft_node_get_next_idx(node));

        int next_idx = raft_node_get_next_idx(node);
        assert(0 <= next_idx);
        if (r->current_idx < next_idx - 1)
            raft_node_set_next_idx(node, min(r->current_idx + 1, raft_get_current_idx(me_)));
        else
            raft_node_set_next_idx(node, next_idx - 1);

        /* retry */
        raft_send_appendentries(me_, node);
        return 0;
    }

    assert(r->current_idx <= raft_get_current_idx(me_));

    raft_node_set_next_idx(node, r->current_idx + 1);
    raft_node_set_match_idx(node, r->current_idx);

    if (!raft_node_is_voting(node) &&
        -1 == me->voting_cfg_change_log_idx &&
        raft_get_current_idx(me_) <= r->current_idx + 1 &&
        me->cb.node_has_sufficient_logs &&
        0 == raft_node_has_sufficient_logs(node)
        )
    {
        raft_node_set_has_sufficient_logs(node);
        me->cb.node_has_sufficient_logs(me_, me->udata, node);
    }

    /* Update commit idx */
    int votes = 1; /* include me */
    int point = r->current_idx;
    int i;
    for (i = 0; i < me->num_nodes; i++)
    {
        if (me->node == me->nodes[i] || !raft_node_is_voting(me->nodes[i]))
            continue;

        int match_idx = raft_node_get_match_idx(me->nodes[i]);

        if (0 < match_idx)
        {
            raft_entry_t* ety = raft_get_entry_from_idx(me_, match_idx);
            if (ety->term == me->current_term && point <= match_idx)
                votes++;
        }
    }

    if (me->num_nodes / 2 < votes && raft_get_commit_idx(me_) < point)
        raft_set_commit_idx(me_, point);

    /* Aggressively send remaining entries */
    if (raft_get_entry_from_idx(me_, raft_node_get_next_idx(node)))
        raft_send_appendentries(me_, node);

    /* periodic applies committed entries lazily */

    return 0;
}