/*
 * Check if this node is heartbeating and is connected to all other
 * heartbeating nodes.
 */
static int o2cb_cluster_check(void)
{
	u8 node_num;
	int i;
	unsigned long hbmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
	unsigned long netmap[BITS_TO_LONGS(O2NM_MAX_NODES)];

	node_num = o2nm_this_node();
	if (node_num == O2NM_MAX_NODES) {
		printk(KERN_ERR "o2cb: This node has not been configured.\n");
		return -EINVAL;
	}

	/*
	 * o2dlm expects o2net sockets to be created. If not, then
	 * dlm_join_domain() fails with a stack of errors which are both cryptic
	 * and incomplete. The idea here is to detect upfront whether we have
	 * managed to connect to all nodes or not. If not, then list the nodes
	 * to allow the user to check the configuration (incorrect IP, firewall,
	 * etc.) Yes, this is racy. But its not the end of the world.
	 */
#define	O2CB_MAP_STABILIZE_COUNT	60
	for (i = 0; i < O2CB_MAP_STABILIZE_COUNT; ++i) {
		o2hb_fill_node_map(hbmap, sizeof(hbmap));
		if (!test_bit(node_num, hbmap)) {
			printk(KERN_ERR "o2cb: %s heartbeat has not been "
			       "started.\n", (o2hb_global_heartbeat_active() ?
					      "Global" : "Local"));
			return -EINVAL;
		}
		o2net_fill_node_map(netmap, sizeof(netmap));
		/* Force set the current node to allow easy compare */
		set_bit(node_num, netmap);
		if (!memcmp(hbmap, netmap, sizeof(hbmap)))
			return 0;
		if (i < O2CB_MAP_STABILIZE_COUNT)
			msleep(1000);
	}

	printk(KERN_ERR "o2cb: This node could not connect to nodes:");
	i = -1;
	while ((i = find_next_bit(hbmap, O2NM_MAX_NODES,
				  i + 1)) < O2NM_MAX_NODES) {
		if (!test_bit(i, netmap))
			printk(" %u", i);
	}
	printk(".\n");

	return -ENOTCONN;
}
static int o2cb_cluster_this_node(unsigned int *node)
{
	int node_num;

	node_num = o2nm_this_node();
	if (node_num == O2NM_INVALID_NODE_NUM)
		return -ENOENT;

	if (node_num >= O2NM_MAX_NODES)
		return -EOVERFLOW;

	*node = node_num;
	return 0;
}
예제 #3
0
static void o2quo_make_decision(struct work_struct *work)
{
	int quorum;
	int lowest_hb, lowest_reachable = 0, fence = 0;
	struct o2quo_state *qs = &o2quo_state;

	spin_lock(&qs->qs_lock);

	lowest_hb = find_first_bit(qs->qs_hb_bm, O2NM_MAX_NODES);
	if (lowest_hb != O2NM_MAX_NODES)
		lowest_reachable = test_bit(lowest_hb, qs->qs_conn_bm);

	mlog(0, "heartbeating: %d, connected: %d, "
	     "lowest: %d (%sreachable)\n", qs->qs_heartbeating,
	     qs->qs_connected, lowest_hb, lowest_reachable ? "" : "un");

	if (!test_bit(o2nm_this_node(), qs->qs_hb_bm) ||
	    qs->qs_heartbeating == 1)
		goto out;

	if (qs->qs_heartbeating & 1) {
		/* the odd numbered cluster case is straight forward --
		 * if we can't talk to the majority we're hosed */
		quorum = (qs->qs_heartbeating + 1)/2;
		if (qs->qs_connected < quorum) {
			mlog(ML_ERROR, "fencing this node because it is "
			     "only connected to %u nodes and %u is needed "
			     "to make a quorum out of %u heartbeating nodes\n",
			     qs->qs_connected, quorum,
			     qs->qs_heartbeating);
			fence = 1;
		}
	} else {
		/* the even numbered cluster adds the possibility of each half
		 * of the cluster being able to talk amongst themselves.. in
		 * that case we're hosed if we can't talk to the group that has
		 * the lowest numbered node */
		quorum = qs->qs_heartbeating / 2;
		if (qs->qs_connected < quorum) {
			mlog(ML_ERROR, "fencing this node because it is "
			     "only connected to %u nodes and %u is needed "
			     "to make a quorum out of %u heartbeating nodes\n",
			     qs->qs_connected, quorum,
			     qs->qs_heartbeating);
			fence = 1;
		}
		else if ((qs->qs_connected == quorum) &&
			 !lowest_reachable) {
			mlog(ML_ERROR, "fencing this node because it is "
			     "connected to a half-quorum of %u out of %u "
			     "nodes which doesn't include the lowest active "
			     "node %u\n", quorum, qs->qs_heartbeating,
			     lowest_hb);
			fence = 1;
		}
	}

out:
	spin_unlock(&qs->qs_lock);
	if (fence)
		o2quo_fence_self();
}