Exemple #1
0
static
GCS_BACKEND_OPEN_FN(dummy_open)
{
    long     ret   = -ENOMEM;
    dummy_t* dummy = backend->conn;
    gcs_comp_msg_t* comp;

    if (!dummy) {
        gu_debug ("Backend not initialized");
        return -EBADFD;
    }

    comp = gcs_comp_msg_new (true, false, 0, 1);
 
    if (comp) {
	ret = gcs_comp_msg_add (comp, "11111111-2222-3333-4444-555555555555");
	assert (0 == ret); // we have only one member, index = 0

        dummy->state = DUMMY_TRANS; // required by gcs_dummy_set_component()
        ret = gcs_dummy_set_component (backend, comp); // install new component
        if (ret >= 0) {                                // queue the message
            ret = gcs_comp_msg_size(comp);
            ret = gcs_dummy_inject_msg (backend, comp, ret, GCS_MSG_COMPONENT,
                                        GCS_SENDER_NONE);
            if (ret > 0) ret = 0;
        }
	gcs_comp_msg_delete (comp);
    }
    gu_debug ("Opened backend connection: %d (%s)", ret, strerror(-ret));
    return ret;
}
static int
group_find_ist_donor_by_state (gcs_group_t* const group,
                               int joiner_idx,
                               gcs_seqno_t ist_seqno,
                               gcs_node_state_t status)
{
    gcs_node_t* joiner = &group->nodes[joiner_idx];
    gcs_segment_t joiner_segment = joiner->segment;

    // find node who is ist potentially possible.
    // first highest cached seqno local node.
    // then highest cached seqno remote node.
    int idx = 0;
    int local_idx = -1;
    int remote_idx = -1;
    for (idx = 0; idx < group->num; idx++)
    {
        if (joiner_idx == idx) continue;

        gcs_node_t* const node = &group->nodes[idx];
        gcs_seqno_t const node_cached = gcs_node_cached(node);

        if (node->status >= status &&
            group_node_is_stateful(group, node) &&
            node_cached != GCS_SEQNO_ILL &&
            node_cached <= (ist_seqno + 1))
        {
            int* const idx_ptr =
                (joiner_segment == node->segment) ? &local_idx : &remote_idx;

            if (*idx_ptr == -1 ||
                node_cached >= gcs_node_cached(&group->nodes[*idx_ptr]))
            {
                *idx_ptr = idx;
            }
        }
    }
    if (local_idx >= 0)
    {
        gu_debug("local found. name[%s], seqno[%lld]",
                 group->nodes[local_idx].name,
                 (long long)gcs_node_cached(&group->nodes[local_idx]));
        return local_idx;
    }
    if (remote_idx >= 0)
    {
        gu_debug("remote found. name[%s], seqno[%lld]",
                 group->nodes[remote_idx].name,
                 (long long)gcs_node_cached(&group->nodes[remote_idx]));
        return remote_idx;
    }
    gu_debug("not found.");
    return -1;
}
Exemple #3
0
long gu_to_interrupt (gu_to_t *to, gu_seqno_t seqno)
{
    long rcode = 0;
    long err;
    to_waiter_t *w;

    assert (seqno >= 0);

    if ((err = gu_mutex_lock (&to->lock))) {
        gu_fatal("Mutex lock failed (%d): %s", err, strerror(err));
        abort();
    }
    if (seqno >= to->seqno) {
        if ((w = to_get_waiter (to, seqno)) == NULL) {
            gu_mutex_unlock(&to->lock);
            return -EAGAIN;
        }        
        /* we have a valid waiter now */

        switch (w->state) {
        case HOLDER:
            gu_debug ("trying to interrupt in use seqno: seqno = %llu, "
                      "TO seqno = %llu", seqno, to->seqno);
            /* gu_mutex_unlock (&to->lock); */
            rcode = -ERANGE;
            break;
        case CANCELED:
            gu_debug ("trying to interrupt canceled seqno: seqno = %llu, "
                      "TO seqno = %llu", seqno, to->seqno);
            /* gu_mutex_unlock (&to->lock); */
            rcode = -ERANGE;
            break;
        case WAIT:
            gu_debug ("signaling to interrupt wait seqno: seqno = %llu, "
                      "TO seqno = %llu", seqno, to->seqno);
            rcode    = to_wake_waiter (w);
        case RELEASED:
            w->state = INTERRUPTED;
            break;
        case INTERRUPTED:
            gu_debug ("TO waiter interrupt already seqno: seqno = %llu, "
                      "TO seqno = %llu", seqno, to->seqno);
            break;
        }
    } else {
        gu_debug ("trying to interrupt used seqno: cancel seqno = %llu, "
                  "TO seqno = %llu", seqno, to->seqno);
        /* gu_mutex_unlock (&to->lock); */
        rcode = -ERANGE;
    }

    gu_mutex_unlock (&to->lock);
    return rcode;
}
Exemple #4
0
int
gu_conf_set_log_callback (gu_log_cb_t callback)
{
    if (callback) {
        gu_debug ("Logging function changed by application");
        gu_log_cb = callback;
    } else {
        gu_debug ("Logging function restored to default");
        gu_log_cb = gu_log_cb_default;
    }
    return 0;
}
static int
group_find_ist_donor_by_name_in_string (
    gcs_group_t* const group,
    int joiner_idx,
    const char* str, int str_len,
    gcs_seqno_t ist_seqno,
    gcs_node_state_t status)
{
    assert (str != NULL);

    const char* begin = str;
    const char* end;

    gu_debug("ist_seqno[%lld]", (long long)ist_seqno);
    // return the highest cached seqno node.
    int ret = -1;
    do {
        end = strchr(begin, ',');
        int len = 0;
        if (end == NULL) {
            len = str_len - (begin - str);
        } else {
            len = end - begin;
        }
        assert (len >= 0);
        if (len == 0) break;
        int idx = group_find_ist_donor_by_name(
            group, joiner_idx, begin, len,
            ist_seqno, status);
        if (idx >= 0)
        {
            if (ret == -1 ||
                gcs_node_cached(&group->nodes[idx]) >=
                gcs_node_cached(&group->nodes[ret]))
            {
                ret = idx;
            }
        }
        begin = end + 1;
    } while (end != NULL);

    if (ret == -1) {
        gu_debug("not found");
    } else {
        gu_debug("found. name[%s], seqno[%lld]",
                 group->nodes[ret].name,
                 (long long)gcs_node_cached(&group->nodes[ret]));
    }
    return ret;
}
/*! Returns new last applied value if it has changes, 0 otherwise */
gcs_seqno_t
gcs_group_handle_last_msg (gcs_group_t* group, const gcs_recv_msg_t* msg)
{
    gcs_seqno_t seqno;

    assert (GCS_MSG_LAST        == msg->type);
    assert (sizeof(gcs_seqno_t) == msg->size);

    seqno = gcs_seqno_gtoh(*(gcs_seqno_t*)(msg->buf));

    // This assert is too restrictive. It requires application to send
    // last applied messages while holding TO, otherwise there's a race
    // between threads.
    // assert (seqno >= group->last_applied);

    gcs_node_set_last_applied (&group->nodes[msg->sender_idx], seqno);

    if (msg->sender_idx == group->last_node && seqno > group->last_applied) {
        /* node that was responsible for the last value, has changed it.
         * need to recompute it */
        gcs_seqno_t old_val = group->last_applied;

        group_redo_last_applied (group);

        if (old_val < group->last_applied) {
            gu_debug ("New COMMIT CUT %lld after %lld from %d",
                      (long long)group->last_applied,
                      (long long)seqno, msg->sender_idx);
            return group->last_applied;
        }
    }

    return 0;
}
Exemple #7
0
int
gu_conf_debug_on ()
{
    gu_log_max_level = GU_LOG_DEBUG;
    gu_debug ("Turning debug on");
    return 0;
}
Exemple #8
0
/*! Sets the new component view.
 *  The same component message should be injected in the queue separately
 *  (see gcs_dummy_inject_msg()) in order to model different race conditions */
long
gcs_dummy_set_component (gcs_backend_t*        backend,
                         const gcs_comp_msg_t* comp)
{
    dummy_t* dummy    = backend->conn;
    long     new_num  = gcs_comp_msg_num (comp);
    long     i;

    assert (dummy->state > DUMMY_CLOSED);

    if (dummy->memb_num != new_num) {
        void* tmp = gu_realloc (dummy->memb, new_num * sizeof(gcs_comp_memb_t));

        if (NULL == tmp) return -ENOMEM;

        dummy->memb     = tmp;
        dummy->memb_num = new_num;
    }

    for (i = 0; i < dummy->memb_num; i++) {
        strcpy ((char*)&dummy->memb[i], gcs_comp_msg_id (comp, i));
    }

    dummy->my_idx = gcs_comp_msg_self(comp);
    dummy->state  = gcs_comp_msg_primary(comp) ? DUMMY_PRIM : DUMMY_NON_PRIM;
    gu_debug ("Setting state to %s",
              DUMMY_PRIM == dummy->state ? "DUMMY_PRIM" : "DUMMY_NON_PRIM");

    return 0;
}
Exemple #9
0
GuChoiceMark
gu_choice_mark(GuChoice* ch)
{
	gu_assert(ch->path_idx <= gu_buf_length(ch->path));
	gu_debug("%p@%d: mark", ch, ch->path_idx);
	return (GuChoiceMark){ch->path_idx};
}
int
gcs_group_handle_sync_msg  (gcs_group_t* group, const gcs_recv_msg_t* msg)
{
    int const   sender_idx = msg->sender_idx;
    gcs_node_t* sender     = &group->nodes[sender_idx];

    assert (GCS_MSG_SYNC == msg->type);

    if (GCS_NODE_STATE_JOINED == sender->status ||
        /* #454 - at this layer we jump directly from DONOR to SYNCED */
        (0 == group->last_applied_proto_ver &&
         GCS_NODE_STATE_DONOR == sender->status)) {

        sender->status = GCS_NODE_STATE_SYNCED;
        sender->count_last_applied = true;

        group_redo_last_applied (group);//from now on this node must be counted

        gu_info ("Member %d.%d (%s) synced with group.",
                 sender_idx, sender->segment, sender->name);

        return (sender_idx == group->my_idx);
    }
    else {
        if (GCS_NODE_STATE_SYNCED != sender->status) {
            gu_warn ("SYNC message sender from non-JOINED %d.%d (%s). Ignored.",
                     sender_idx, sender->segment, sender->name);
        }
        else {
            gu_debug ("Redundant SYNC message from %d.%d (%s).",
                      sender_idx, sender->segment, sender->name);
        }
        return 0;
    }
}
Exemple #11
0
int
gu_conf_self_tstamp_on ()
{
    gu_debug ("Turning self timestamping on");
    gu_log_self_tstamp = true;
    return 0;
}
Exemple #12
0
int
gu_conf_self_tstamp_off ()
{
    gu_debug ("Turning self timestamping off");
    gu_log_self_tstamp = false;
    return 0;
}
Exemple #13
0
int
gu_conf_debug_off ()
{
    gu_debug ("Turning debug off");
    gu_log_max_level = GU_LOG_INFO;
    return 0;
}
Exemple #14
0
void
gu_choice_reset(GuChoice* ch, GuChoiceMark mark)
{
	gu_assert(ch->path_idx <= gu_buf_length(ch->path));
	gu_debug("%p@%d: reset %d", ch, ch->path_idx, mark.path_idx);
	gu_require(mark.path_idx <= ch->path_idx );
	ch->path_idx = mark.path_idx;
}
static int
group_find_ist_donor (gcs_group_t* const group,
                      int str_version,
                      int joiner_idx,
                      const char* str, int str_len,
                      gcs_seqno_t ist_seqno,
                      gcs_node_state_t status)
{
    int idx = -1;

    gcs_seqno_t conf_seqno = group->quorum.act_id;
    gcs_seqno_t lowest_cached_seqno = group_lowest_cached_seqno(group);
    if (lowest_cached_seqno == GCS_SEQNO_ILL)
    {
        gu_debug("fallback to sst. lowest_cached_seqno == GCS_SEQNO_ILL");
        return -1;
    }
    gcs_seqno_t const max_cached_range = conf_seqno - lowest_cached_seqno;
    gcs_seqno_t safety_gap = max_cached_range >> 7; /* 1.0 / 128 ~= 0.008 */
    safety_gap = safety_gap < (1 << 20) ? safety_gap : (1 << 20); /* Be sensible and don't reserve more than 1M */
    gcs_seqno_t safe_ist_seqno = lowest_cached_seqno + safety_gap;

    gu_debug("ist_seqno[%lld], lowest_cached_seqno[%lld],"
             "conf_seqno[%lld], safe_ist_seqno[%lld]",
             (long long)ist_seqno, (long long)lowest_cached_seqno,
             (long long)conf_seqno, (long long)safe_ist_seqno);

    if (ist_seqno < safe_ist_seqno) {
        // unsafe to perform ist.
        gu_debug("fallback to sst. ist_seqno < safe_ist_seqno");
        return -1;
    }

    if (str_len) {
        // find ist donor by name.
        idx = group_find_ist_donor_by_name_in_string(
            group, joiner_idx, str, str_len, ist_seqno, status);
        if (idx >= 0) return idx;
    }
    // find ist donor by status.
    idx = group_find_ist_donor_by_state(
        group, joiner_idx, ist_seqno, status);
    if (idx >= 0) return idx;
    return -1;
}
Exemple #16
0
int
gu_conf_set_log_file (FILE *file)
{
    gu_debug ("Log file changed by application");
    if (file) {
        gu_log_file = file;
    }
    else {
        gu_log_file = stderr;
    }

    return 0;
}
Exemple #17
0
static
GCS_BACKEND_RECV_FN(dummy_recv)
{
    long     ret = 0;
    dummy_t* conn = backend->conn;

    msg->sender_idx = GCS_SENDER_NONE;
    msg->type   = GCS_MSG_ERROR;

    assert (conn);

    /* skip it if we already have popped a message from the queue
     * in the previous call */
    if (gu_likely(DUMMY_CLOSED <= conn->state))
    {
        int err;
        dummy_msg_t** ptr = gu_fifo_get_head (conn->gc_q, &err);

        if (gu_likely(ptr != NULL)) {

            dummy_msg_t* dmsg = *ptr;

            assert (NULL != dmsg);

            msg->type       = dmsg->type;
            msg->sender_idx = dmsg->sender_idx;
            ret             = dmsg->len;
            msg->size       = ret;

            if (gu_likely(dmsg->len <= msg->buf_len)) {
                gu_fifo_pop_head (conn->gc_q);
                memcpy (msg->buf, dmsg->buf, dmsg->len);
                dummy_msg_destroy (dmsg);
            }
            else {
                // supplied recv buffer too short, leave the message in queue
                memcpy (msg->buf, dmsg->buf, msg->buf_len);
                gu_fifo_release (conn->gc_q);
            }
        }
        else {
            ret = -EBADFD; // closing
            gu_debug ("Returning %d: %s", ret, strerror(-ret));
        }
    }
    else {
        ret = -EBADFD;
    }

    return ret;
}
gcs_group_state_t
gcs_group_handle_state_msg (gcs_group_t* group, const gcs_recv_msg_t* msg)
{
    if (GCS_GROUP_WAIT_STATE_MSG == group->state) {

        gcs_state_msg_t* state = gcs_state_msg_read (msg->buf, msg->size);

        if (state) {

            const gu_uuid_t* state_uuid = gcs_state_msg_uuid (state);

            if (!gu_uuid_compare(&group->state_uuid, state_uuid)) {

                gu_info ("STATE EXCHANGE: got state msg: "GU_UUID_FORMAT
                         " from %d (%s)", GU_UUID_ARGS(state_uuid),
                         msg->sender_idx, gcs_state_msg_name(state));

                if (gu_log_debug) group_print_state_debug(state);

                gcs_node_record_state (&group->nodes[msg->sender_idx], state);
                group_post_state_exchange (group);
            }
            else {
                gu_debug ("STATE EXCHANGE: stray state msg: "GU_UUID_FORMAT
                          " from node %ld (%s), current state UUID: "
                          GU_UUID_FORMAT,
                          GU_UUID_ARGS(state_uuid),
                          msg->sender_idx, gcs_state_msg_name(state),
                          GU_UUID_ARGS(&group->state_uuid));

                if (gu_log_debug) group_print_state_debug(state);

                gcs_state_msg_destroy (state);
            }
        }
        else {
            gu_warn ("Could not parse state message from node %d",
                     msg->sender_idx, group->nodes[msg->sender_idx].name);
        }
    }

    return group->state;
}
Exemple #19
0
int
gu_choice_next(GuChoice* ch, int n_choices)
{
	gu_assert(n_choices >= 0);
	gu_assert(ch->path_idx <= gu_buf_length(ch->path));
	if (n_choices == 0) {
		return -1;
	}
	int i = 0;
	if (gu_buf_length(ch->path) > ch->path_idx) {
		i = (int) gu_buf_get(ch->path, size_t, ch->path_idx);
		gu_assert(i <= n_choices);
	} else {
		gu_buf_push(ch->path, size_t, n_choices);
		i = n_choices;
	}
	int ret = (i == 0) ? -1 : n_choices - i;
	gu_debug("%p@%d: %d", ch, ch->path_idx, ret);
	ch->path_idx++;
	return ret;
}
gcs_group_state_t
gcs_group_handle_comp_msg (gcs_group_t* group, const gcs_comp_msg_t* comp)
{
    long        new_idx, old_idx;
    gcs_node_t* new_nodes = NULL;
    ulong       new_memb  = 0;

    const bool prim_comp     = gcs_comp_msg_primary  (comp);
    const bool bootstrap     = gcs_comp_msg_bootstrap(comp);
    const long new_my_idx    = gcs_comp_msg_self     (comp);
    const long new_nodes_num = gcs_comp_msg_num      (comp);

    group_check_comp_msg (prim_comp, new_my_idx, new_nodes_num);

    if (new_my_idx >= 0) {
        gu_info ("New COMPONENT: primary = %s, bootstrap = %s, my_idx = %ld, "
                 "memb_num = %ld", prim_comp ? "yes" : "no",
                 bootstrap ? "yes" : "no", new_my_idx, new_nodes_num);

        new_nodes = group_nodes_init (group, comp);

        if (!new_nodes) {
            gu_fatal ("Could not allocate memory for %ld-node component.",
                      gcs_comp_msg_num (comp));
            assert(0);
            return (gcs_group_state_t)-ENOMEM;
        }

        if (GCS_GROUP_PRIMARY == group->state) {
            gu_debug ("#281: Saving %s over %s",
                      gcs_node_state_to_str(group->nodes[group->my_idx].status),
                      gcs_node_state_to_str(group->prim_state));
            group->prim_state = group->nodes[group->my_idx].status;
        }
    }
    else {
        // Self-leave message
        gu_info ("Received self-leave message.");
        assert (0 == new_nodes_num);
        assert (!prim_comp);
    }

    if (prim_comp) {
        /* Got PRIMARY COMPONENT - Hooray! */
        assert (new_my_idx >= 0);
        if (group->state == GCS_GROUP_PRIMARY) {
            /* we come from previous primary configuration, relax */
        }
        else if (bootstrap)
        {
            /* Is there need to initialize something else in this case? */
            group->nodes[group->my_idx].bootstrap = true;
        }
        else {
            const bool first_component =
#ifndef GCS_CORE_TESTING
            (1 == group->num) && !strcmp (NODE_NO_ID, group->nodes[0].id);
#else
            (1 == group->num);
#endif

            if (1 == new_nodes_num && first_component) {
                /* bootstrap new configuration */
                assert (GCS_GROUP_NON_PRIMARY == group->state);
                assert (1 == group->num);
                assert (0 == group->my_idx);

                // This bootstraps initial primary component for state exchange
                gu_uuid_generate (&group->prim_uuid, NULL, 0);
                group->prim_seqno = 0;
                group->prim_num   = 1;
                group->state      = GCS_GROUP_PRIMARY;

                if (group->act_id < 0) {
                    // no history provided: start a new one
                    group->act_id  = GCS_SEQNO_NIL;
                    gu_uuid_generate (&group->group_uuid, NULL, 0);
                    gu_info ("Starting new group from scratch: "GU_UUID_FORMAT,
                             GU_UUID_ARGS(&group->group_uuid));
                }
// the following should be removed under #474
                group->nodes[0].status = GCS_NODE_STATE_JOINED;
                /* initialize node ID to the one given by the backend - this way
                 * we'll be recognized as coming from prev. conf. in node array
                 * remap below */
                strncpy ((char*)group->nodes[0].id, new_nodes[0].id,
                         sizeof (new_nodes[0].id) - 1);
                group->nodes[0].segment = new_nodes[0].segment;
            }
        }
    }
    else {
        group_go_non_primary (group);
    }

    /* Remap old node array to new one to preserve action continuity */
    for (new_idx = 0; new_idx < new_nodes_num; new_idx++) {
        /* find member index in old component by unique member id */
        for (old_idx = 0; old_idx < group->num; old_idx++) {
            // just scan through old group
            if (!strcmp(group->nodes[old_idx].id, new_nodes[new_idx].id)) {
                /* the node was in previous configuration with us */
                /* move node context to new node array */
                gcs_node_move (&new_nodes[new_idx], &group->nodes[old_idx]);
                break;
            }
        }
        /* if wasn't found in new configuration, new member -
         * need to do state exchange */
        new_memb |= (old_idx == group->num);
    }

    /* free old nodes array */
    group_nodes_free (group);

    group->my_idx = new_my_idx;
    group->num    = new_nodes_num;
    group->nodes  = new_nodes;

    if (gcs_comp_msg_primary(comp) || bootstrap) {
        /* TODO: for now pretend that we always have new nodes and perform
         * state exchange because old states can carry outdated node status.
         * (also protocol voting needs to be redone)
         * However this means aborting ongoing actions. Find a way to avoid
         * this extra state exchange. Generate new state messages on behalf
         * of other nodes? see #238 */
        new_memb = true;
        /* if new nodes joined, reset ongoing actions and state messages */
        if (new_memb) {
            group_nodes_reset (group);
            group->state      = GCS_GROUP_WAIT_STATE_UUID;
            group->state_uuid = GU_UUID_NIL; // prepare for state exchange
        }
        else {
            if (GCS_GROUP_PRIMARY == group->state) {
                /* since we don't have any new nodes since last PRIMARY,
                   we skip state exchange */
                group_post_state_exchange (group);
            }
        }
        group_redo_last_applied (group);
    }

    return group->state;
}
Exemple #21
0
Fichier : map.c Projet : McMbuvi/GF
static void
gu_map_resize(GuMap* map)
{
	GuMapData* data = &map->data;
	GuMapData old_data = *data;
	size_t req_entries =
		gu_twin_prime_sup(GU_MAX(11, map->data.n_occupied * 4 / 3 + 1));

	size_t key_size = map->key_size;
	size_t key_alloc = 0;
	data->keys = gu_mem_buf_alloc(req_entries * key_size, &key_alloc);

	size_t value_size = map->value_size;
	size_t value_alloc = 0;
	if (value_size) {
		data->values = gu_mem_buf_alloc(req_entries * value_size,
						    &value_alloc);
		memset(data->values, 0, value_alloc);
	}
	
	data->n_entries = gu_twin_prime_inf(value_size ?
					    GU_MIN(key_alloc / key_size,
						   value_alloc / value_size)
					    : key_alloc / key_size);
	switch (map->kind) {
	case GU_MAP_GENERIC:
	case GU_MAP_WORD:
		memset(data->keys, 0, key_alloc);
		break;
	case GU_MAP_ADDR:
		for (size_t i = 0; i < data->n_entries; i++) {
			((const void**)data->keys)[i] = NULL;
		}
		break;
	default:
		gu_impossible();
	}

	gu_assert(data->n_entries > data->n_occupied);
	gu_debug("Resized to %d entries", data->n_entries);
	
	data->n_occupied = 0;
	data->zero_idx = SIZE_MAX;

	for (size_t i = 0; i < old_data.n_entries; i++) {
		if (gu_map_entry_is_free(map, &old_data, i)) {
			continue;
		}
		void* old_key = &old_data.keys[i * key_size];
		if (map->kind == GU_MAP_ADDR) {
			old_key = *(void**)old_key;
		}
		void* old_value = &old_data.values[i * value_size];

		memcpy(gu_map_insert(map, old_key),
		       old_value, map->value_size);
	}

	gu_mem_buf_free(old_data.keys);
	if (value_size) {
		gu_mem_buf_free(old_data.values);
	}
}
/*! Processes state messages and sets group parameters accordingly */
static void
group_post_state_exchange (gcs_group_t* group)
{
    const gcs_state_msg_t* states[group->num];
    gcs_state_quorum_t* quorum = &group->quorum;
    bool new_exchange = gu_uuid_compare (&group->state_uuid, &GU_UUID_NIL);
    long i;

    /* Collect state messages from nodes. */
    /* Looping here every time is suboptimal, but simply counting state messages
     * is not straightforward too: nodes may disappear, so the final count may
     * include messages from the disappeared nodes.
     * Let's put it this way: looping here is reliable and not that expensive.*/
    for (i = 0; i < group->num; i++) {
        states[i] = group->nodes[i].state_msg;
        if (NULL == states[i] ||
            (new_exchange &&
             gu_uuid_compare (&group->state_uuid,
                              gcs_state_msg_uuid(states[i]))))
            return; // not all states from THIS state exch. received, wait
    }
    gu_debug ("STATE EXCHANGE: "GU_UUID_FORMAT" complete.",
              GU_UUID_ARGS(&group->state_uuid));

    gcs_state_msg_get_quorum (states, group->num, quorum);

    if (quorum->version >= 0) {
        if (quorum->version < 2) {
            group->last_applied_proto_ver = 0;
        }
        else {
            group->last_applied_proto_ver = 1;
        }
    }
    else {
        gu_fatal ("Negative quorum version: %d", quorum->version);
        abort();
    }

    // Update each node state based on quorum outcome:
    // is it up to date, does it need SST and stuff
    for (i = 0; i < group->num; i++) {
        gcs_node_update_status (&group->nodes[i], quorum);
    }

    if (quorum->primary) {
        // primary configuration
        if (new_exchange) {
            // new state exchange happened
            group->state      = GCS_GROUP_PRIMARY;
            group->act_id     = quorum->act_id;
            group->conf_id    = quorum->conf_id + 1;
            group->group_uuid = quorum->group_uuid;
            group->prim_uuid  = group->state_uuid;
            group->state_uuid = GU_UUID_NIL;
        }
        else {
            // no state exchange happend, processing old state messages
            assert (GCS_GROUP_PRIMARY == group->state);
            group->conf_id++;
        }

        group->prim_seqno = group->conf_id;
        group->prim_num   = 0;

        for (i = 0; i < group->num; i++) {
            group->prim_num += gcs_node_is_joined (group->nodes[i].status);
        }

        assert (group->prim_num > 0);
    }
    else {
        // non-primary configuration
        group_go_non_primary (group);
    }

    gu_info ("Quorum results:"
             "\n\tversion    = %u,"
             "\n\tcomponent  = %s,"
             "\n\tconf_id    = %lld,"
             "\n\tmembers    = %d/%d (joined/total),"
             "\n\tact_id     = %lld,"
             "\n\tlast_appl. = %lld,"
             "\n\tprotocols  = %d/%d/%d (gcs/repl/appl),"
             "\n\tgroup UUID = "GU_UUID_FORMAT,
             quorum->version,
             quorum->primary ? "PRIMARY" : "NON-PRIMARY",
             quorum->conf_id,
             group->prim_num, group->num,
             quorum->act_id,
             group->last_applied,
             quorum->gcs_proto_ver, quorum->repl_proto_ver,
             quorum->appl_proto_ver,
             GU_UUID_ARGS(&quorum->group_uuid));

    group_check_donor(group);
}
Exemple #23
0
/*!
 * Handle action fragment
 *
 * Unless a whole action is returned, contents of act is undefined
 *
 * In order to optimize branch prediction used gu_likely macros and odered and
 * nested if/else blocks according to branch probability.
 *
 * @return 0              - success,
 *         size of action - success, full action received,
 *         negative       - error.
 *
 * TODO: this function is too long, figure out a way to factor it into several
 *       smaller ones. Note that it is called for every GCS_MSG_ACTION message
 *       so it should be optimal.
 */
ssize_t
gcs_defrag_handle_frag (gcs_defrag_t*         df,
                        const gcs_act_frag_t* frg,
                        struct gcs_act*       act,
                        bool                  local)
{
    if (df->received) {
        /* another fragment of existing action */

        df->frag_no++;

        /* detect possible error condition */
        if (gu_unlikely((df->sent_id != frg->act_id) ||
                        (df->frag_no != frg->frag_no))) {
            if (local && df->reset &&
                (df->sent_id == frg->act_id) && (0 == frg->frag_no)) {
                /* df->sent_id was aborted halfway and is being taken care of
                 * by the sender thread. Forget about it.
                 * Reinit counters and continue with the new action.
                 * Note that for local actions no memory allocation is made.*/
                gu_debug ("Local action %lld reset.", frg->act_id);
                df->frag_no  = 0;
                df->received = 0;
                df->tail     = df->head;
                df->reset    = false;

                if (df->size != frg->act_size) {

                    df->size = frg->act_size;

#ifndef GCS_FOR_GARB
                    if (df->cache !=NULL) {
                        gcache_free (df->cache, df->head);
                    }
                    else {
                        free ((void*)df->head);
                    }

                    DF_ALLOC();
#endif /* GCS_FOR_GARB */
                }
            }
            else {
                gu_error ("Unordered fragment received. Protocol error.");
                gu_error ("Expected: %llu:%ld, received: %llu:%ld",
                          df->sent_id, df->frag_no, frg->act_id, frg->frag_no);
                gu_error ("Contents: '%.*s'", frg->frag_len, (char*)frg->frag);
                df->frag_no--; // revert counter in hope that we get good frag
                assert(0);
                return -EPROTO;
            }
        }
    }
    else {
        /* new action */
        if (gu_likely(0 == frg->frag_no)) {

            df->size    = frg->act_size;
            df->sent_id = frg->act_id;
            df->reset   = false;

#ifndef GCS_FOR_GARB
            DF_ALLOC();
#else
            /* we don't store actions locally at all */
            df->head = NULL;
            df->tail = df->head;
#endif
        }
        else {
            /* not a first fragment */
            if (!local && df->reset) {
                /* can happen after configuration change,
                   just ignore this message calmly */
                gu_debug ("Ignoring fragment %lld:%ld after action reset",
                          frg->act_id, frg->frag_no);
                return 0;
            }
            else {
                ((char*)frg->frag)[frg->frag_len - 1] = '\0';
                gu_error ("Unordered fragment received. Protocol error.");
                gu_error ("Expected: any:0(first), received: %lld:%ld",
                          frg->act_id, frg->frag_no);
                gu_error ("Contents: '%s', local: %s, reset: %s",
                          (char*)frg->frag, local ? "yes" : "no",
                          df->reset ? "yes" : "no");
                assert(0);
                return -EPROTO;
            }
        }
    }

    df->received += frg->frag_len;
    assert (df->received <= df->size);

#ifndef GCS_FOR_GARB
    assert (df->tail);
    memcpy (df->tail, frg->frag, frg->frag_len);
    df->tail += frg->frag_len;
#else
    /* we skip memcpy since have not allocated any buffer */
    assert (NULL == df->tail);
    assert (NULL == df->head);
#endif

    if (df->received == df->size) {
        act->buf     = df->head;
        act->buf_len = df->received;
        gcs_defrag_init (df, df->cache);
        return act->buf_len;
    }
    else {
        return 0;
    }
}