Пример #1
0
static st_query_result_t *
stonith_choose_peer(remote_fencing_op_t * op)
{
    const char *device = NULL;
    st_query_result_t *peer = NULL;
    uint32_t active = fencing_active_peers();

    do {
        if (op->devices) {
            device = op->devices->data;
            crm_trace("Checking for someone to fence %s with %s", op->target, device);
        } else {
            crm_trace("Checking for someone to fence %s", op->target);
        }

        peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET|FIND_PEER_VERIFIED_ONLY);
        if (peer) {
            crm_trace("Found verified peer %s for %s", peer->host, device?device:"<any>");
            return peer;
        }

        if(op->query_timer != 0 && op->replies < QB_MIN(op->replies_expected, active)) {
            crm_trace("Waiting before looking for unverified devices to fence %s", op->target);
            return NULL;
        }

        peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET);
        if (peer) {
            crm_trace("Found best unverified peer %s", peer->host);
            return peer;
        }

        peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY);
        if(peer) {
            crm_trace("%s will fence itself", peer->host);
            return peer;
        }

        /* Try the next fencing level if there is one */
    } while (is_set(op->call_options, st_opt_topology)
             && stonith_topology_next(op) == pcmk_ok);

    crm_notice("Couldn't find anyone to fence %s with %s", op->target, device?device:"<any>");
    return NULL;
}
Пример #2
0
static st_query_result_t *stonith_choose_peer(remote_fencing_op_t *op)
{
    GListPtr iter = NULL;
    do {
        if(op->devices) {
            crm_trace("Checking for someone to fence %s with %s", op->target, (char*)op->devices->data);
        } else {
            crm_trace("Checking for someone to fence %s", op->target);
        }
        for(iter = op->query_results; iter != NULL; iter = iter->next) {
            st_query_result_t *peer = iter->data;
            if(is_set(op->call_options, st_opt_topology)) {
                /* Do they have the next device of the current fencing level? */
                GListPtr match = NULL;
                if(op->devices) {
                    match = g_list_find_custom(peer->device_list, op->devices->data, sort_strings);
                }
                if(match) {
                    crm_trace("Removing %s from %s (%d remaining)", (char*)match->data, peer->host, g_list_length(peer->device_list));
                    peer->device_list = g_list_remove(peer->device_list, match->data);
                    return peer;
                }

            } else if(peer && peer->devices > 0) {
                /* No topology: Use the current best peer */
                crm_trace("Simple fencing");
                return peer;
            }
        }

        /* Try the next fencing level if there is one */
    } while(is_set(op->call_options, st_opt_topology)
            && stonith_topology_next(op) == pcmk_ok);

    if(op->devices) {
        crm_trace("Couldn't find anyone to fence %s with %s", op->target, (char*)op->devices->data);
    } else {
        crm_trace("Couldn't find anyone to fence %s", op->target);
    }

    return NULL;
}
Пример #3
0
static st_query_result_t *
stonith_choose_peer(remote_fencing_op_t * op)
{
    st_query_result_t *peer = NULL;
    const char *device = NULL;

    do {
        if (op->devices) {
            device = op->devices->data;
            crm_trace("Checking for someone to fence %s with %s", op->target,
                      (char *)op->devices->data);
        } else {
            crm_trace("Checking for someone to fence %s", op->target);
        }

        if ((peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET | FIND_PEER_VERIFIED_ONLY))) {
            return peer;
        } else if ((peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET))) {
            return peer;
        } else if ((peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY))) {
            return peer;
        }

        /* Try the next fencing level if there is one */
    } while (is_set(op->call_options, st_opt_topology)
             && stonith_topology_next(op) == pcmk_ok);

    if (op->devices) {
        crm_notice("Couldn't find anyone to fence %s with %s", op->target,
                   (char *)op->devices->data);
    } else {
        crm_debug("Couldn't find anyone to fence %s", op->target);
    }

    return NULL;
}
Пример #4
0
remote_fencing_op_t *
initiate_remote_stonith_op(crm_client_t * client, xmlNode * request, gboolean manual_ack)
{
    int query_timeout = 0;
    xmlNode *query = NULL;
    const char *client_id = NULL;
    remote_fencing_op_t *op = NULL;

    if (client) {
        client_id = client->id;
    } else {
        client_id = crm_element_value(request, F_STONITH_CLIENTID);
    }

    CRM_LOG_ASSERT(client_id != NULL);
    op = create_remote_stonith_op(client_id, request, FALSE);
    op->owner = TRUE;
    if (manual_ack) {
        crm_notice("Initiating manual confirmation for %s: %s",
                   op->target, op->id);
        return op;
    }
    
    CRM_CHECK(op->action, return NULL);

    if (stonith_topology_next(op) != pcmk_ok) {
        op->state = st_failed;
    }

    switch (op->state) {
        case st_failed:
            crm_warn("Initiation of remote operation %s for %s: failed (%s)", op->action,
                     op->target, op->id);
            remote_op_done(op, NULL, -EINVAL, FALSE);
            return op;

        case st_duplicate:
            crm_info("Initiating remote operation %s for %s: %s (duplicate)", op->action,
                     op->target, op->id);
            return op;

        default:
            crm_notice("Initiating remote operation %s for %s: %s (%d)", op->action, op->target,
                       op->id, op->state);
    }

    query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY, NULL, 0);

    crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id);
    crm_xml_add(query, F_STONITH_TARGET, op->target);
    crm_xml_add(query, F_STONITH_ACTION, op->action);
    crm_xml_add(query, F_STONITH_ORIGIN, op->originator);
    crm_xml_add(query, F_STONITH_CLIENTID, op->client_id);
    crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name);
    crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout);

    send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
    free_xml(query);

    query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR;
    op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op);

    return op;
}
Пример #5
0
int
process_remote_stonith_exec(xmlNode * msg)
{
    int rc = 0;
    const char *id = NULL;
    const char *device = NULL;
    remote_fencing_op_t *op = NULL;
    xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);

    CRM_CHECK(dev != NULL, return -EPROTO);

    id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
    CRM_CHECK(id != NULL, return -EPROTO);

    dev = get_xpath_object("//@" F_STONITH_RC, msg, LOG_ERR);
    CRM_CHECK(dev != NULL, return -EPROTO);

    crm_element_value_int(dev, F_STONITH_RC, &rc);

    device = crm_element_value(dev, F_STONITH_DEVICE);

    if (remote_op_list) {
        op = g_hash_table_lookup(remote_op_list, id);
    }

    if (op == NULL && rc == pcmk_ok) {
        /* Record successful fencing operations */
        const char *client_id = crm_element_value(dev, F_STONITH_CLIENTID);

        op = create_remote_stonith_op(client_id, dev, TRUE);
    }

    if (op == NULL) {
        /* Could be for an event that began before we started */
        /* TODO: Record the op for later querying */
        crm_info("Unknown or expired remote op: %s", id);
        return -EOPNOTSUPP;
    }

    if (op->devices && device && safe_str_neq(op->devices->data, device)) {
        crm_err
            ("Received outdated reply for device %s (instead of %s) to %s node %s. Operation already timed out at remote level.",
             device, op->devices->data, op->action, op->target);
        return rc;
    }

    if (safe_str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast")) {
        crm_debug("Marking call to %s for %s on behalf of %s@%s.%.8s: %s (%d)",
                  op->action, op->target, op->client_name, op->id, op->originator,
                  pcmk_strerror(rc), rc);
        if (rc == pcmk_ok) {
            op->state = st_done;
        } else {
            op->state = st_failed;
        }
        remote_op_done(op, msg, rc, FALSE);
        return pcmk_ok;
    } else if (safe_str_neq(op->originator, stonith_our_uname)) {
        /* If this isn't a remote level broadcast, and we are not the
         * originator of the operation, we should not be receiving this msg. */
        crm_err
            ("%s received non-broadcast fencing result for operation it does not own (device %s targeting %s)",
             stonith_our_uname, device, op->target);
        return rc;
    }

    if (is_set(op->call_options, st_opt_topology)) {
        const char *device = crm_element_value(msg, F_STONITH_DEVICE);

        crm_notice("Call to %s for %s on behalf of %s@%s: %s (%d)",
                   device, op->target, op->client_name, op->originator,
                   pcmk_strerror(rc), rc);

        /* We own the op, and it is complete. broadcast the result to all nodes
         * and notify our local clients. */
        if (op->state == st_done) {
            remote_op_done(op, msg, rc, FALSE);
            return rc;
        }

        /* An operation completed succesfully but has not yet been marked as done.
         * Continue the topology if more devices exist at the current level, otherwise
         * mark as done. */
        if (rc == pcmk_ok) {
            if (op->devices) {
                /* Success, are there any more? */
                op->devices = op->devices->next;
            }
            /* if no more devices at this fencing level, we are done,
             * else we need to contine with executing the next device in the list */
            if (op->devices == NULL) {
                crm_trace("Marking complex fencing op for %s as complete", op->target);
                op->state = st_done;
                remote_op_done(op, msg, rc, FALSE);
                return rc;
            }
        } else {
            /* This device failed, time to try another topology level. If no other
             * levels are available, mark this operation as failed and report results. */
            if (stonith_topology_next(op) != pcmk_ok) {
                op->state = st_failed;
                remote_op_done(op, msg, rc, FALSE);
                return rc;
            }
        }
    } else if (rc == pcmk_ok && op->devices == NULL) {
        crm_trace("All done for %s", op->target);

        op->state = st_done;
        remote_op_done(op, msg, rc, FALSE);
        return rc;
    }

    /* Retry on failure or execute the rest of the topology */
    crm_trace("Next for %s on behalf of %s@%s (rc was %d)", op->target, op->originator,
              op->client_name, rc);
    call_remote_stonith(op, NULL);
    return rc;
}
Пример #6
0
void *create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer)
{
    remote_fencing_op_t *op = NULL;
    xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, request, LOG_TRACE);

    if(remote_op_list == NULL) {
        remote_op_list = g_hash_table_new_full(
        crm_str_hash, g_str_equal, NULL, free_remote_op);
    }

    if(peer && dev) {
        const char *peer_id = crm_element_value(dev, F_STONITH_REMOTE);
        CRM_CHECK(peer_id != NULL, return NULL);

        op = g_hash_table_lookup(remote_op_list, peer_id);
        if(op) {
            crm_debug("%s already exists", peer_id);
            return op;
        }
    }

    op = calloc(1, sizeof(remote_fencing_op_t));
    crm_element_value_int(request, F_STONITH_TIMEOUT, (int*)&(op->base_timeout));

    if(peer && dev) {
        op->id = crm_element_value_copy(dev, F_STONITH_REMOTE);
        crm_trace("Recorded new stonith op: %s", op->id);
    } else {
        op->id = crm_generate_uuid();
        crm_trace("Generated new stonith op: %s", op->id);
    }

    g_hash_table_replace(remote_op_list, op->id, op);
    CRM_LOG_ASSERT(g_hash_table_lookup(remote_op_list, op->id) != NULL);

    op->state = st_query;
    op->action = crm_element_value_copy(dev, F_STONITH_ACTION);
    op->originator = crm_element_value_copy(dev, F_STONITH_OWNER);

    if(op->originator == NULL) {
        /* Local request */
        op->originator = strdup(stonith_our_uname);
    }

    if(client) {
        op->client_id = strdup(client);
    }

    op->client_name = crm_element_value_copy(request, F_STONITH_CLIENTNAME);

    op->target = crm_element_value_copy(dev, F_STONITH_TARGET);
    op->request = copy_xml(request); /* TODO: Figure out how to avoid this */
    crm_element_value_int(request, F_STONITH_CALLOPTS, (int*)&(op->call_options));

    if(op->call_options & st_opt_cs_nodeid) {
        int nodeid = crm_atoi(op->target, NULL);
        crm_node_t *node = crm_get_peer(nodeid, NULL);

        /* Ensure the conversion only happens once */
        op->call_options &= ~st_opt_cs_nodeid;

        if(node) {
            free(op->target);
            op->target = strdup(node->uname);
        }
    }

    if(stonith_topology_next(op) != pcmk_ok) {
        op->state = st_failed;
    }
    return op;
}