/* * Check if this node is heartbeating and is connected to all other * heartbeating nodes. */ static int o2cb_cluster_check(void) { u8 node_num; int i; unsigned long hbmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; unsigned long netmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; node_num = o2nm_this_node(); if (node_num == O2NM_MAX_NODES) { printk(KERN_ERR "o2cb: This node has not been configured.\n"); return -EINVAL; } /* * o2dlm expects o2net sockets to be created. If not, then * dlm_join_domain() fails with a stack of errors which are both cryptic * and incomplete. The idea here is to detect upfront whether we have * managed to connect to all nodes or not. If not, then list the nodes * to allow the user to check the configuration (incorrect IP, firewall, * etc.) Yes, this is racy. But its not the end of the world. */ #define O2CB_MAP_STABILIZE_COUNT 60 for (i = 0; i < O2CB_MAP_STABILIZE_COUNT; ++i) { o2hb_fill_node_map(hbmap, sizeof(hbmap)); if (!test_bit(node_num, hbmap)) { printk(KERN_ERR "o2cb: %s heartbeat has not been " "started.\n", (o2hb_global_heartbeat_active() ? "Global" : "Local")); return -EINVAL; } o2net_fill_node_map(netmap, sizeof(netmap)); /* Force set the current node to allow easy compare */ set_bit(node_num, netmap); if (!memcmp(hbmap, netmap, sizeof(hbmap))) return 0; if (i < O2CB_MAP_STABILIZE_COUNT) msleep(1000); } printk(KERN_ERR "o2cb: This node could not connect to nodes:"); i = -1; while ((i = find_next_bit(hbmap, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) { if (!test_bit(i, netmap)) printk(" %u", i); } printk(".\n"); return -ENOTCONN; }
static int o2cb_cluster_this_node(unsigned int *node) { int node_num; node_num = o2nm_this_node(); if (node_num == O2NM_INVALID_NODE_NUM) return -ENOENT; if (node_num >= O2NM_MAX_NODES) return -EOVERFLOW; *node = node_num; return 0; }
static void o2quo_make_decision(struct work_struct *work) { int quorum; int lowest_hb, lowest_reachable = 0, fence = 0; struct o2quo_state *qs = &o2quo_state; spin_lock(&qs->qs_lock); lowest_hb = find_first_bit(qs->qs_hb_bm, O2NM_MAX_NODES); if (lowest_hb != O2NM_MAX_NODES) lowest_reachable = test_bit(lowest_hb, qs->qs_conn_bm); mlog(0, "heartbeating: %d, connected: %d, " "lowest: %d (%sreachable)\n", qs->qs_heartbeating, qs->qs_connected, lowest_hb, lowest_reachable ? "" : "un"); if (!test_bit(o2nm_this_node(), qs->qs_hb_bm) || qs->qs_heartbeating == 1) goto out; if (qs->qs_heartbeating & 1) { /* the odd numbered cluster case is straight forward -- * if we can't talk to the majority we're hosed */ quorum = (qs->qs_heartbeating + 1)/2; if (qs->qs_connected < quorum) { mlog(ML_ERROR, "fencing this node because it is " "only connected to %u nodes and %u is needed " "to make a quorum out of %u heartbeating nodes\n", qs->qs_connected, quorum, qs->qs_heartbeating); fence = 1; } } else { /* the even numbered cluster adds the possibility of each half * of the cluster being able to talk amongst themselves.. in * that case we're hosed if we can't talk to the group that has * the lowest numbered node */ quorum = qs->qs_heartbeating / 2; if (qs->qs_connected < quorum) { mlog(ML_ERROR, "fencing this node because it is " "only connected to %u nodes and %u is needed " "to make a quorum out of %u heartbeating nodes\n", qs->qs_connected, quorum, qs->qs_heartbeating); fence = 1; } else if ((qs->qs_connected == quorum) && !lowest_reachable) { mlog(ML_ERROR, "fencing this node because it is " "connected to a half-quorum of %u out of %u " "nodes which doesn't include the lowest active " "node %u\n", quorum, qs->qs_heartbeating, lowest_hb); fence = 1; } } out: spin_unlock(&qs->qs_lock); if (fence) o2quo_fence_self(); }