/*! Returns new last applied value if it has changes, 0 otherwise */
gcs_seqno_t
gcs_group_handle_last_msg (gcs_group_t* group, const gcs_recv_msg_t* msg)
{
    gcs_seqno_t seqno;

    assert (GCS_MSG_LAST        == msg->type);
    assert (sizeof(gcs_seqno_t) == msg->size);

    seqno = gcs_seqno_gtoh(*(gcs_seqno_t*)(msg->buf));

    // This assert is too restrictive. It requires application to send
    // last applied messages while holding TO, otherwise there's a race
    // between threads.
    // assert (seqno >= group->last_applied);

    gcs_node_set_last_applied (&group->nodes[msg->sender_idx], seqno);

    if (msg->sender_idx == group->last_node && seqno > group->last_applied) {
        /* node that was responsible for the last value, has changed it.
         * need to recompute it */
        gcs_seqno_t old_val = group->last_applied;

        group_redo_last_applied (group);

        if (old_val < group->last_applied) {
            gu_debug ("New COMMIT CUT %lld after %lld from %d",
                      (long long)group->last_applied,
                      (long long)seqno, msg->sender_idx);
            return group->last_applied;
        }
    }

    return 0;
}
/*! return true if this node is the sender to notify the calling thread of
 * success */
int
gcs_group_handle_join_msg  (gcs_group_t* group, const gcs_recv_msg_t* msg)
{
    int const   sender_idx = msg->sender_idx;
    gcs_node_t* sender    = &group->nodes[sender_idx];

    assert (GCS_MSG_JOIN == msg->type);

    // TODO: define an explicit type for the join message, like gcs_join_msg_t
    assert (msg->size == sizeof(gcs_seqno_t));

    if (GCS_NODE_STATE_DONOR  == sender->status ||
        GCS_NODE_STATE_JOINER == sender->status) {
        int j;
        gcs_seqno_t seqno     = gcs_seqno_gtoh(*(gcs_seqno_t*)msg->buf);
        gcs_node_t* peer      = NULL;
        const char* peer_id   = NULL;
        const char* peer_name = "left the group";
        int         peer_idx  = -1;
        bool        from_donor = false;
        const char* st_dir    = NULL; // state transfer direction symbol

        if (GCS_NODE_STATE_DONOR == sender->status) {
            peer_id    = sender->joiner;
            from_donor = true;
            st_dir     = "to";

            assert (group->last_applied_proto_ver >= 0);

            if (0 == group->last_applied_proto_ver) {
                /* #454 - we don't switch to JOINED here,
                 *        instead going straignt to SYNCED */
            }
            else {
                assert(sender->count_last_applied);
                sender->status = GCS_NODE_STATE_JOINED;
            }
        }
        else {
            peer_id = sender->donor;
            st_dir  = "from";

            if (group->quorum.version < 2) {
                // #591 remove after quorum v1 is phased out
                sender->status = GCS_NODE_STATE_JOINED;
                group->prim_num++;
            }
            else {
                if (seqno >= 0) {
                    sender->status = GCS_NODE_STATE_JOINED;
                    group->prim_num++;
                }
                else {
                    sender->status = GCS_NODE_STATE_PRIM;
                }
            }
        }

        // Try to find peer.
        for (j = 0; j < group->num; j++) {
// #483            if (j == sender_idx) continue;
            if (!memcmp(peer_id, group->nodes[j].id,
                        sizeof (group->nodes[j].id))) {
                peer_idx  = j;
                peer      = &group->nodes[peer_idx];
                peer_name = peer->name;
                break;
            }
        }

        if (j == group->num) {
            gu_warn ("Could not find peer: %s", peer_id);
        }

        if (seqno < 0) {
            gu_warn ("%d.%d (%s): State transfer %s %d.%d (%s) failed: %d (%s)",
                     sender_idx, sender->segment, sender->name, st_dir,
                     peer_idx, peer ? peer->segment : -1, peer_name,
                     (int)seqno, strerror((int)-seqno));

            if (from_donor && peer_idx == group->my_idx &&
                GCS_NODE_STATE_JOINER == group->nodes[peer_idx].status) {
                // this node will be waiting for SST forever. If it has only
                // one recv thread there is no (generic) way to wake it up.
                gu_fatal ("Will never receive state. Need to abort.");
                // return to core to shutdown the backend before aborting
                return -ENOTRECOVERABLE;
            }

            if (group->quorum.version < 2 && !from_donor && // #591
                sender_idx == group->my_idx) {
                // remove after quorum v1 is phased out
                gu_fatal ("Faield to receive state. Need to abort.");
                return -ENOTRECOVERABLE;
            }
        }
        else {
            if (sender_idx == peer_idx) {
                gu_info ("Member %d.%d (%s) resyncs itself to group",
                         sender_idx, sender->segment, sender->name);
            }
            else {
                gu_info ("%d.%d (%s): State transfer %s %d.%d (%s) complete.",
                         sender_idx, sender->segment, sender->name, st_dir,
                         peer_idx, peer ? peer->segment : -1, peer_name);
            }
        }
    }
    else {
        if (GCS_NODE_STATE_PRIM == sender->status) {
            gu_warn("Rejecting JOIN message from %d.%d (%s): new State Transfer"
                    " required.", sender_idx, sender->segment, sender->name);
        }
        else {
            // should we freak out and throw an error?
            gu_warn("Protocol violation. JOIN message sender %d.%d (%s) is not "
                    "in state transfer (%s). Message ignored.",
                    sender_idx, sender->segment, sender->name,
                    gcs_node_state_to_str(sender->status));
        }
        return 0;
    }

    return (sender_idx == group->my_idx);
}
Пример #3
0
// Initialises core and backend objects + some common tests
static inline void
core_test_init ()
{
    long     ret;
    action_t act;

    mark_point();

    gu_config_t* config = gu_config_create ();
    fail_if (config == NULL);

    Core = gcs_core_create (config, NULL, "core_test",
                            "aaa.bbb.ccc.ddd:xxxx", 0, 0);

    fail_if (NULL == Core);

    Backend = gcs_core_get_backend (Core);
    fail_if (NULL == Backend);

    Seqno = 0; // reset seqno

    ret = core_test_set_payload_size (FRAG_SIZE);
    fail_if (-EBADFD != ret, "Expected -EBADFD, got: %ld (%s)",
             ret, strerror(-ret));

    ret = gcs_core_open (Core, "yadda-yadda", "owkmevc", 1);
    fail_if (-EINVAL != ret, "Expected -EINVAL, got %ld (%s)",
             ret, strerror(-ret));

    ret = gcs_core_open (Core, "yadda-yadda", "dummy://", 1);
    fail_if (0 != ret, "Failed to open core connection: %ld (%s)",
             ret, strerror(-ret));

    // receive first configuration message
    fail_if (CORE_RECV_ACT (&act, NULL, UNKNOWN_SIZE, GCS_ACT_CONF));
    fail_if (core_test_check_conf(act.out, true, 0, 1));
    free (act.out);

    // this will configure backend to have desired fragment size
    ret = core_test_set_payload_size (FRAG_SIZE);
    fail_if (0 != ret, "Failed to set up the message payload size: %ld (%s)",
             ret, strerror(-ret));

    // try to send an action to check that everything's alright
    ret = gcs_core_send (Core, act1, sizeof(act1_str), GCS_ACT_TORDERED);
    fail_if (ret != sizeof(act1_str), "Expected %d, got %d (%s)",
             sizeof(act1_str), ret, strerror (-ret));
    gu_warn ("Next CORE_RECV_ACT fails under valgrind");
    act.in = act1;
    fail_if (CORE_RECV_ACT (&act, act1_str, sizeof(act1_str),GCS_ACT_TORDERED));

    ret = gcs_core_send_join (Core, Seqno);
    fail_if (ret != 0, "gcs_core_send_join(): %ld (%s)",
             ret, strerror(-ret));
    // no action to be received (we're joined already)

    ret = gcs_core_send_sync (Core, Seqno);
    fail_if (ret != 0, "gcs_core_send_sync(): %ld (%s)",
             ret, strerror(-ret));
    fail_if (CORE_RECV_ACT(&act,NULL,sizeof(gcs_seqno_t),GCS_ACT_SYNC));
    fail_if (Seqno != gcs_seqno_gtoh(*(gcs_seqno_t*)act.out));

    gcs_core_send_lock_step (Core, true);
    mark_point();
}
/*! Returns 0 if request is ignored, request size if it should be passed up */
int
gcs_group_handle_state_request (gcs_group_t*         group,
                                struct gcs_act_rcvd* act)
{
    // pass only to sender and to one potential donor
    const char*      donor_name     = (const char*)act->act.buf;
    size_t           donor_name_len = strlen(donor_name);
    int              donor_idx      = -1;
    int const        joiner_idx     = act->sender_idx;
    const char*      joiner_name    = group->nodes[joiner_idx].name;
    gcs_node_state_t joiner_status  = group->nodes[joiner_idx].status;
    bool const       desync         = group_desync_request (donor_name);

    gu_uuid_t ist_uuid = {{0, }};
    gcs_seqno_t ist_seqno = GCS_SEQNO_ILL;
    int str_version = 1; // actually it's 0 or 1.

    if (act->act.buf_len != (ssize_t)(donor_name_len + 1) &&
        donor_name[donor_name_len + 1] == 'V') {
        str_version = (int)donor_name[donor_name_len + 2];
    }

    if (str_version >= 2) {
        const char* ist_buf = donor_name + donor_name_len + 3;
        memcpy(&ist_uuid, ist_buf, sizeof(ist_uuid));
        ist_seqno = gcs_seqno_gtoh(*(gcs_seqno_t*)(ist_buf + sizeof(ist_uuid)));

        // change act.buf's content to original version.
        // and it's safe to change act.buf_len
        size_t head = donor_name_len + 3 + sizeof(ist_uuid) + sizeof(ist_seqno);
        memmove((char*)act->act.buf + donor_name_len + 1,
                (char*)act->act.buf + head,
                act->act.buf_len - head);
        act->act.buf_len -= sizeof(ist_uuid) + sizeof(ist_seqno) + 2;
    }

    assert (GCS_ACT_STATE_REQ == act->act.type);

    if (joiner_status != GCS_NODE_STATE_PRIM && !desync) {

        const char* joiner_status_string = gcs_node_state_to_str(joiner_status);

        if (group->my_idx == joiner_idx) {
            gu_error ("Requesting state transfer while in %s. "
                      "Ignoring.", joiner_status_string);
            act->id = -ECANCELED;
            return act->act.buf_len;
        }
        else {
            gu_error ("Member %d.%d (%s) requested state transfer, "
                      "but its state is %s. Ignoring.",
                      joiner_idx, group->nodes[joiner_idx].segment, joiner_name,
                      joiner_status_string);
            gcs_group_ignore_action (group, act);
            return 0;
        }
    }

    donor_idx = group_select_donor(group,
                                   str_version,
                                   joiner_idx, donor_name,
                                   &ist_uuid, ist_seqno, desync);

    assert (donor_idx != joiner_idx || desync  || donor_idx < 0);
    assert (donor_idx == joiner_idx || !desync || donor_idx < 0);

    if (group->my_idx != joiner_idx && group->my_idx != donor_idx) {
        // if neither DONOR nor JOINER, ignore request
        gcs_group_ignore_action (group, act);
        return 0;
    }
    else if (group->my_idx == donor_idx) {
        act->act.buf_len -= donor_name_len + 1;
        memmove (*(void**)&act->act.buf,
                 ((char*)act->act.buf) + donor_name_len + 1,
                 act->act.buf_len);
        // now action starts with request, like it was supplied by application,
        // see gcs_request_state_transfer()
    }

    // Return index of donor (or error) in the seqno field to sender.
    // It will be used to detect error conditions (no availabale donor,
    // donor crashed and the like).
    // This may be ugly, well, any ideas?
    act->id = donor_idx;

    return act->act.buf_len;
}
Пример #5
0
static inline gcs_seqno_t
group_get_last_msg (gcs_recv_msg_t* msg)
{
    return gcs_seqno_gtoh(*(gcs_seqno_t*)(msg->buf));
}