コード例 #1
0
int
gcs_group_handle_sync_msg  (gcs_group_t* group, const gcs_recv_msg_t* msg)
{
    int const   sender_idx = msg->sender_idx;
    gcs_node_t* sender     = &group->nodes[sender_idx];

    assert (GCS_MSG_SYNC == msg->type);

    if (GCS_NODE_STATE_JOINED == sender->status ||
        /* #454 - at this layer we jump directly from DONOR to SYNCED */
        (0 == group->last_applied_proto_ver &&
         GCS_NODE_STATE_DONOR == sender->status)) {

        sender->status = GCS_NODE_STATE_SYNCED;
        sender->count_last_applied = true;

        group_redo_last_applied (group);//from now on this node must be counted

        gu_info ("Member %d.%d (%s) synced with group.",
                 sender_idx, sender->segment, sender->name);

        return (sender_idx == group->my_idx);
    }
    else {
        if (GCS_NODE_STATE_SYNCED != sender->status) {
            gu_warn ("SYNC message sender from non-JOINED %d.%d (%s). Ignored.",
                     sender_idx, sender->segment, sender->name);
        }
        else {
            gu_debug ("Redundant SYNC message from %d.%d (%s).",
                      sender_idx, sender->segment, sender->name);
        }
        return 0;
    }
}
コード例 #2
0
static void
group_check_donor (gcs_group_t* group)
{
    gcs_node_state_t const my_state = group->nodes[group->my_idx].status;
    const char*      const donor_id = group->nodes[group->my_idx].donor;

    if (GCS_NODE_STATE_JOINER == my_state &&
        memcmp (donor_id, group_empty_id, sizeof(group_empty_id)))
    {
        long i;

        for (i = 0; i < group->num; i++)
        {
            if (i != group->my_idx &&
                !memcmp (donor_id, group->nodes[i].id,
                         sizeof (group->nodes[i].id)))
                return;
        }

        gu_warn ("Donor %s is no longer in the group. State transfer cannot "
                 "be completed, need to abort. Aborting...", donor_id);

        gu_abort();
    }

    return;
}
コード例 #3
0
static inline void
gcs_node_set_last_applied (gcs_node_t* node, gcs_seqno_t seqno)
{
    if (gu_unlikely(seqno < node->last_applied)) {
        gu_warn ("Received bogus LAST message: %lld, from node %s, "
                 "expected >= %lld. Ignoring.",
                 seqno, node->id, node->last_applied);
    } else {
        node->last_applied = seqno;
    }
}
コード例 #4
0
ファイル: gcs_dummy.c プロジェクト: latinovic/galera
static
GCS_BACKEND_MSG_SIZE_FN(dummy_msg_size)
{
    const long max_pkt_size = backend->conn->max_pkt_size;

    if (pkt_size > max_pkt_size) {
	gu_warn ("Requested packet size: %d, maximum possible packet size: %d",
		 pkt_size, max_pkt_size);
        return (max_pkt_size - backend->conn->hdr_size);
    }

    return (pkt_size - backend->conn->hdr_size);
}
コード例 #5
0
ファイル: gu_to.c プロジェクト: latinovic/galera
long gu_to_destroy (gu_to_t** to)
{
    gu_to_t *t = *to;
    long      ret;
    ssize_t    i;

    gu_mutex_lock (&t->lock);
    if (t->used) {
        gu_mutex_unlock (&t->lock);
        return -EBUSY;
    }
    
    for (i = 0; i < t->qlen; i++) {
        to_waiter_t *w = t->queue + i;
#ifdef TO_USE_SIGNAL
        if (gu_cond_destroy (&w->cond)) {
            // @todo: what if someone is waiting?
            gu_warn ("Failed to destroy condition %d. Should not happen", i);
        }
#else
        if (pthread_mutex_destroy (&w->mtx)) {
            // @todo: what if someone is waiting?
            gu_warn ("Failed to destroy mutex %d. Should not happen", i);
        }
#endif
    }    
    t->qlen = 0;
    
    gu_mutex_unlock (&t->lock);
    /* What else can be done here? */
    ret = gu_mutex_destroy (&t->lock);
    if (ret) return -ret; // application can retry

    gu_free (t->queue);
    gu_free (t);
    *to = NULL;
    return 0;
}
コード例 #6
0
ファイル: gu_to.c プロジェクト: latinovic/galera
long gu_to_cancel (gu_to_t *to, gu_seqno_t seqno)
{
    long         err;
    to_waiter_t *w;

    assert (seqno >= 0);

    if ((err = gu_mutex_lock (&to->lock))) {
        gu_fatal("Mutex lock failed (%d): %s", err, strerror(err));
        abort();
    }
    
    // Check for queue overflow. This is totally unrecoverable. Abort.
    if ((w = to_get_waiter (to, seqno)) == NULL) {
        gu_mutex_unlock(&to->lock);
        abort();
    }        
    /* we have a valid waiter now */

    if ((seqno > to->seqno) || 
        (seqno == to->seqno && w->state != HOLDER)) {
        err = to_wake_waiter (w);
        w->state = CANCELED;
    } else if (seqno == to->seqno && w->state == HOLDER) {
        gu_warn("tried to cancel current TO holder, state %d seqno %llu",
                 w->state, seqno);
        err = -ECANCELED;
    } else {
        gu_warn("trying to cancel used seqno: state %d cancel seqno = %llu, "
                "TO seqno = %llu", w->state, seqno, to->seqno);
        err = -ECANCELED;        
    }
    
    gu_mutex_unlock (&to->lock);
    return err;
}
コード例 #7
0
/*!
 * Handles action message. Is called often - therefore, inlined
 *
 * @return
 */
static inline ssize_t
gcs_node_handle_act_frag (gcs_node_t*           node,
                          const gcs_act_frag_t* frg,
                          struct gcs_act*       act,
                          bool                  local)
{
    if (gu_likely(GCS_ACT_SERVICE != frg->act_type)) {
        return gcs_defrag_handle_frag (&node->app, frg, act, local);
    }
    else if (GCS_ACT_SERVICE == frg->act_type) {
        return gcs_defrag_handle_frag (&node->oob, frg, act, local);
    }
    else {
        gu_warn ("Unrecognised action type: %d", frg->act_type);
        assert(0);
        return -EPROTO;
    }
}
コード例 #8
0
gcs_group_state_t
gcs_group_handle_state_msg (gcs_group_t* group, const gcs_recv_msg_t* msg)
{
    if (GCS_GROUP_WAIT_STATE_MSG == group->state) {

        gcs_state_msg_t* state = gcs_state_msg_read (msg->buf, msg->size);

        if (state) {

            const gu_uuid_t* state_uuid = gcs_state_msg_uuid (state);

            if (!gu_uuid_compare(&group->state_uuid, state_uuid)) {

                gu_info ("STATE EXCHANGE: got state msg: "GU_UUID_FORMAT
                         " from %d (%s)", GU_UUID_ARGS(state_uuid),
                         msg->sender_idx, gcs_state_msg_name(state));

                if (gu_log_debug) group_print_state_debug(state);

                gcs_node_record_state (&group->nodes[msg->sender_idx], state);
                group_post_state_exchange (group);
            }
            else {
                gu_debug ("STATE EXCHANGE: stray state msg: "GU_UUID_FORMAT
                          " from node %ld (%s), current state UUID: "
                          GU_UUID_FORMAT,
                          GU_UUID_ARGS(state_uuid),
                          msg->sender_idx, gcs_state_msg_name(state),
                          GU_UUID_ARGS(&group->state_uuid));

                if (gu_log_debug) group_print_state_debug(state);

                gcs_state_msg_destroy (state);
            }
        }
        else {
            gu_warn ("Could not parse state message from node %d",
                     msg->sender_idx, group->nodes[msg->sender_idx].name);
        }
    }

    return group->state;
}
コード例 #9
0
gcs_group_state_t
gcs_group_handle_uuid_msg  (gcs_group_t* group, const gcs_recv_msg_t* msg)
{
    assert (msg->size == sizeof(gu_uuid_t));

    if (GCS_GROUP_WAIT_STATE_UUID == group->state &&
        0 == msg->sender_idx /* check that it is from the representative */) {
        group->state_uuid = *(gu_uuid_t*)msg->buf;
        group->state      = GCS_GROUP_WAIT_STATE_MSG;
    }
    else {
        gu_warn ("Stray state UUID msg: "GU_UUID_FORMAT
                 " from node %ld (%s), current group state %s",
                 GU_UUID_ARGS((gu_uuid_t*)msg->buf),
                 msg->sender_idx, group->nodes[msg->sender_idx].name,
                 gcs_group_state_str[group->state]);
    }

    return group->state;
}
コード例 #10
0
static int
group_find_node_by_state (gcs_group_t*     const group,
                          int              const joiner_idx,
                          gcs_node_state_t const status)
{
    gcs_segment_t const segment = group->nodes[joiner_idx].segment;
    int  idx;
    int  donor = -1;
    bool hnss = false; /* have nodes in the same segment */

    for (idx = 0; idx < group->num; idx++) {

        if (joiner_idx == idx) continue; /* skip joiner */

        gcs_node_t* node = &group->nodes[idx];

        if (node->status >= status && group_node_is_stateful (group, node))
            donor = idx; /* potential donor */

        if (segment == node->segment) {
            if (donor == idx) return donor; /* found suitable donor in the
                                             * same segment */
            if (node->status >= GCS_NODE_STATE_JOINER) hnss = true;;
        }
    }

    /* Have not found suitable donor in the same segment. */
    if (!hnss && donor >= 0) {
        if (joiner_idx == group->my_idx) {
            gu_warn ("There are no nodes in the same segment that will ever "
                     "be able to become donors, yet there is a suitable donor "
                     "outside. Will use that one.");
        }
        return donor;
    }
    else {
        /* wait for a suitable donor to appear in the same segment */
        return -EAGAIN;
    }
}
コード例 #11
0
/*! return true if this node is the sender to notify the calling thread of
 * success */
int
gcs_group_handle_join_msg  (gcs_group_t* group, const gcs_recv_msg_t* msg)
{
    int const   sender_idx = msg->sender_idx;
    gcs_node_t* sender    = &group->nodes[sender_idx];

    assert (GCS_MSG_JOIN == msg->type);

    // TODO: define an explicit type for the join message, like gcs_join_msg_t
    assert (msg->size == sizeof(gcs_seqno_t));

    if (GCS_NODE_STATE_DONOR  == sender->status ||
        GCS_NODE_STATE_JOINER == sender->status) {
        int j;
        gcs_seqno_t seqno     = gcs_seqno_gtoh(*(gcs_seqno_t*)msg->buf);
        gcs_node_t* peer      = NULL;
        const char* peer_id   = NULL;
        const char* peer_name = "left the group";
        int         peer_idx  = -1;
        bool        from_donor = false;
        const char* st_dir    = NULL; // state transfer direction symbol

        if (GCS_NODE_STATE_DONOR == sender->status) {
            peer_id    = sender->joiner;
            from_donor = true;
            st_dir     = "to";

            assert (group->last_applied_proto_ver >= 0);

            if (0 == group->last_applied_proto_ver) {
                /* #454 - we don't switch to JOINED here,
                 *        instead going straignt to SYNCED */
            }
            else {
                assert(sender->count_last_applied);
                sender->status = GCS_NODE_STATE_JOINED;
            }
        }
        else {
            peer_id = sender->donor;
            st_dir  = "from";

            if (group->quorum.version < 2) {
                // #591 remove after quorum v1 is phased out
                sender->status = GCS_NODE_STATE_JOINED;
                group->prim_num++;
            }
            else {
                if (seqno >= 0) {
                    sender->status = GCS_NODE_STATE_JOINED;
                    group->prim_num++;
                }
                else {
                    sender->status = GCS_NODE_STATE_PRIM;
                }
            }
        }

        // Try to find peer.
        for (j = 0; j < group->num; j++) {
// #483            if (j == sender_idx) continue;
            if (!memcmp(peer_id, group->nodes[j].id,
                        sizeof (group->nodes[j].id))) {
                peer_idx  = j;
                peer      = &group->nodes[peer_idx];
                peer_name = peer->name;
                break;
            }
        }

        if (j == group->num) {
            gu_warn ("Could not find peer: %s", peer_id);
        }

        if (seqno < 0) {
            gu_warn ("%d.%d (%s): State transfer %s %d.%d (%s) failed: %d (%s)",
                     sender_idx, sender->segment, sender->name, st_dir,
                     peer_idx, peer ? peer->segment : -1, peer_name,
                     (int)seqno, strerror((int)-seqno));

            if (from_donor && peer_idx == group->my_idx &&
                GCS_NODE_STATE_JOINER == group->nodes[peer_idx].status) {
                // this node will be waiting for SST forever. If it has only
                // one recv thread there is no (generic) way to wake it up.
                gu_fatal ("Will never receive state. Need to abort.");
                // return to core to shutdown the backend before aborting
                return -ENOTRECOVERABLE;
            }

            if (group->quorum.version < 2 && !from_donor && // #591
                sender_idx == group->my_idx) {
                // remove after quorum v1 is phased out
                gu_fatal ("Faield to receive state. Need to abort.");
                return -ENOTRECOVERABLE;
            }
        }
        else {
            if (sender_idx == peer_idx) {
                gu_info ("Member %d.%d (%s) resyncs itself to group",
                         sender_idx, sender->segment, sender->name);
            }
            else {
                gu_info ("%d.%d (%s): State transfer %s %d.%d (%s) complete.",
                         sender_idx, sender->segment, sender->name, st_dir,
                         peer_idx, peer ? peer->segment : -1, peer_name);
            }
        }
    }
    else {
        if (GCS_NODE_STATE_PRIM == sender->status) {
            gu_warn("Rejecting JOIN message from %d.%d (%s): new State Transfer"
                    " required.", sender_idx, sender->segment, sender->name);
        }
        else {
            // should we freak out and throw an error?
            gu_warn("Protocol violation. JOIN message sender %d.%d (%s) is not "
                    "in state transfer (%s). Message ignored.",
                    sender_idx, sender->segment, sender->name,
                    gcs_node_state_to_str(sender->status));
        }
        return 0;
    }

    return (sender_idx == group->my_idx);
}
コード例 #12
0
/*!
 * Selects and returns the index of state transfer donor, if available.
 * Updates donor and joiner status if state transfer is possible
 *
 * @return
 *         donor index or negative error code:
 *         -EHOSTUNREACH if reqiested donor is not available
 *         -EAGAIN       if there were no nodes in the proper state.
 */
static int
group_select_donor (gcs_group_t* group,
                    int const str_version,
                    int const joiner_idx,
                    const char* const donor_string,
                    const gu_uuid_t* ist_uuid, gcs_seqno_t ist_seqno,
                    bool const desync)
{
    static gcs_node_state_t const min_donor_state = GCS_NODE_STATE_SYNCED;
    int  donor_idx;
    int  const donor_len = strlen(donor_string);
    bool const required_donor = (donor_len > 0);

    if (desync) { /* sender wants to become "donor" itself */
        assert(donor_len > 0);
        gcs_node_state_t const st = group->nodes[joiner_idx].status;
        if (st >= min_donor_state)
            donor_idx = joiner_idx;
        else
            donor_idx = -EAGAIN;
    }
    else {
        donor_idx = gcs_group_find_donor(group,
                                         str_version,
                                         joiner_idx,
                                         donor_string, donor_len,
                                         ist_uuid, ist_seqno);
    }

    if (donor_idx >= 0) {
        assert(donor_idx != joiner_idx || desync);

        gcs_node_t* const joiner = &group->nodes[joiner_idx];
        gcs_node_t* const donor  = &group->nodes[donor_idx];

        if (desync) {
            gu_info ("Member %d.%d (%s) desyncs itself from group",
                     donor_idx, donor->segment, donor->name);
        }
        else {
            gu_info ("Member %d.%d (%s) requested state transfer from '%s'. "
                     "Selected %d.%d (%s)(%s) as donor.",
                     joiner_idx, joiner->segment, joiner->name,
                     required_donor ? donor_string : "*any*",
                     donor_idx, donor->segment, donor->name,
                     gcs_node_state_to_str(donor->status));
        }

        // reserve donor, confirm joiner (! assignment order is significant !)
        joiner->status = GCS_NODE_STATE_JOINER;
        donor->status  = GCS_NODE_STATE_DONOR;
        memcpy (donor->joiner, joiner->id, GCS_COMP_MEMB_ID_MAX_LEN+1);
        memcpy (joiner->donor, donor->id,  GCS_COMP_MEMB_ID_MAX_LEN+1);
    }
    else {
        gu_warn ("Member %d.%d (%s) requested state transfer from '%s', "
                 "but it is impossible to select State Transfer donor: %s",
                 joiner_idx, group->nodes[joiner_idx].segment,
                 group->nodes[joiner_idx].name,
                 required_donor ? donor_string : "*any*", strerror (-donor_idx));
    }

    return donor_idx;
}
コード例 #13
0
ファイル: gcs_core_test.c プロジェクト: cyclefusion/galera
// Initialises core and backend objects + some common tests
static inline void
core_test_init ()
{
    long     ret;
    action_t act;

    mark_point();

    gu_config_t* config = gu_config_create ();
    fail_if (config == NULL);

    Core = gcs_core_create (config, NULL, "core_test",
                            "aaa.bbb.ccc.ddd:xxxx", 0, 0);

    fail_if (NULL == Core);

    Backend = gcs_core_get_backend (Core);
    fail_if (NULL == Backend);

    Seqno = 0; // reset seqno

    ret = core_test_set_payload_size (FRAG_SIZE);
    fail_if (-EBADFD != ret, "Expected -EBADFD, got: %ld (%s)",
             ret, strerror(-ret));

    ret = gcs_core_open (Core, "yadda-yadda", "owkmevc", 1);
    fail_if (-EINVAL != ret, "Expected -EINVAL, got %ld (%s)",
             ret, strerror(-ret));

    ret = gcs_core_open (Core, "yadda-yadda", "dummy://", 1);
    fail_if (0 != ret, "Failed to open core connection: %ld (%s)",
             ret, strerror(-ret));

    // receive first configuration message
    fail_if (CORE_RECV_ACT (&act, NULL, UNKNOWN_SIZE, GCS_ACT_CONF));
    fail_if (core_test_check_conf(act.out, true, 0, 1));
    free (act.out);

    // this will configure backend to have desired fragment size
    ret = core_test_set_payload_size (FRAG_SIZE);
    fail_if (0 != ret, "Failed to set up the message payload size: %ld (%s)",
             ret, strerror(-ret));

    // try to send an action to check that everything's alright
    ret = gcs_core_send (Core, act1, sizeof(act1_str), GCS_ACT_TORDERED);
    fail_if (ret != sizeof(act1_str), "Expected %d, got %d (%s)",
             sizeof(act1_str), ret, strerror (-ret));
    gu_warn ("Next CORE_RECV_ACT fails under valgrind");
    act.in = act1;
    fail_if (CORE_RECV_ACT (&act, act1_str, sizeof(act1_str),GCS_ACT_TORDERED));

    ret = gcs_core_send_join (Core, Seqno);
    fail_if (ret != 0, "gcs_core_send_join(): %ld (%s)",
             ret, strerror(-ret));
    // no action to be received (we're joined already)

    ret = gcs_core_send_sync (Core, Seqno);
    fail_if (ret != 0, "gcs_core_send_sync(): %ld (%s)",
             ret, strerror(-ret));
    fail_if (CORE_RECV_ACT(&act,NULL,sizeof(gcs_seqno_t),GCS_ACT_SYNC));
    fail_if (Seqno != gcs_seqno_gtoh(*(gcs_seqno_t*)act.out));

    gcs_core_send_lock_step (Core, true);
    mark_point();
}
コード例 #14
0
ファイル: gcs_fc.c プロジェクト: cyclefusion/galera
/*! Processes a new action added to a slave queue.
 *  @return length of sleep in nanoseconds or negative error code
 *          or GU_TIME_ETERNITY for complete stop */
long long
gcs_fc_process (gcs_fc_t* fc, ssize_t act_size)
{
    fc->size += act_size;
    fc->act_count++;

    if (fc->size <= fc->soft_limit) {
        /* normal operation */
        if (gu_unlikely(fc->debug > 0 && !(fc->act_count % fc->debug))) {
            gu_info ("FC: queue size: %zdb (%4.1f%% of soft limit)",
                     fc->size, ((double)fc->size)/fc->soft_limit*100.0);
        }
        return 0;
    }
    else if (fc->size >= fc->hard_limit) {
        if (0.0 == fc->max_throttle) {
            /* we can accept total service outage */
            return GU_TIME_ETERNITY;
        }
        else {
            gu_error ("Recv queue hard limit exceded. Can't continue.");
            return -ENOMEM;
        }
    }
//    else if (!(fc->act_count & 7)) { // do this for every 8th action
    else {
        long long end   = gu_time_monotonic();
        double interval = ((end - fc->start) * 1.0e-9);

        if (gu_unlikely (0 == fc->last_sleep)) {
            /* just tripped the soft limit, preparing constants for throttle */

            fc->max_rate = (double)(fc->size - fc->init_size) / interval;

            double s = (1.0 - fc->max_throttle)/(fc->soft_limit-fc->hard_limit);
            assert (s < 0.0);

            fc->scale  = s * fc->max_rate;
            fc->offset = (1.0 - s*fc->soft_limit) * fc->max_rate;

            // calculate time interval from the soft limit
            interval = interval * (double)(fc->size - fc->soft_limit) /
                (fc->size - fc->init_size);
            assert (interval >= 0.0);

            // Move reference point to soft limit
            fc->last_sleep = fc->soft_limit;
            fc->start      = end - interval;

            gu_warn("Soft recv queue limit exceeded, starting replication "
                    "throttle. Measured avg. rate: %f bytes/sec; "
                    "Throttle parameters: scale=%f, offset=%f",
                    fc->max_rate, fc->scale, fc->offset);
        }

        /* throttling operation */
        double desired_rate = fc->size * fc->scale + fc->offset; // linear decay
        //double desired_rate = fc->max_rate * fc->max_throttle; // square wave
        assert (desired_rate <= fc->max_rate);

        double sleep = (double)(fc->size - fc->last_sleep) / desired_rate
            - interval;

        if (gu_unlikely(fc->debug > 0 && !(fc->act_count % fc->debug))) {
            gu_info ("FC: queue size: %zdb, length: %zd, "
                     "measured rate: %fb/s, desired rate: %fb/s, "
                     "interval: %5.3fs, sleep: %5.4fs. "
                     "Sleeps initiated: %zd, for a total of %6.3fs",
                     fc->size, fc->act_count,
                     ((double)(fc->size - fc->last_sleep))/interval,
                     desired_rate, interval, sleep, fc->sleep_count,
                     fc->sleeps);
            fc->sleep_count = 0;
            fc->sleeps = 0.0;
        }

        if (gu_likely(sleep < min_sleep)) {
#if 0
            gu_info ("Skipping sleep: desired_rate = %f, sleep = %f (%f), "
                     "interval = %f, fc->scale = %f, fc->offset = %f, "
                     "fc->size = %zd",
                     desired_rate, sleep, min_sleep, interval,
                     fc->scale, fc->offset, fc->size);
#endif
            return 0;
        }

        fc->last_sleep = fc->size;
        fc->start      = end;
        fc->sleep_count++;
        fc->sleeps += sleep;

        return (1000000000LL * sleep);
    }

    return 0;
}