void Quorum::start(boost::shared_ptr<sys::Poller> p) { poller = p; QPID_LOG(debug, "Connecting to quorum service."); cman = cman_init(0); if (cman == 0) throw ErrnoException("Can't connect to cman service"); if (!cman_is_quorate(cman)) { QPID_LOG(notice, "Waiting for cluster quorum."); while(!cman_is_quorate(cman)) sys::sleep(5); } int err = cman_start_notification(cman, cmanCallbackFn); if (err != 0) throw ErrnoException("Can't register for cman notifications"); watch(getFd()); }
static void delay_quorum(void) { int rv, tries = 0; while (1) { rv = cman_is_quorate(ch); if (rv) break; rv = cman_is_active(ch); if (!rv) { cman_finish(ch); die("lost cman connection"); } tries++; if (opt_delay_quorum < 0) goto retry_quorum; if (!opt_delay_quorum || tries >= opt_delay_quorum) { fprintf(stderr, "%s: continuing without quorum\n", prog_name); break; } retry_quorum: if (!(tries % 10)) fprintf(stderr, "%s: delaying for quorum\n", prog_name); sleep(1); } return; }
static void statechange(void) { cman_cluster_t info; cman_node_t *old; int i, j, rv; struct cman_node_address addrs[MAX_NODE_ADDRESSES]; int num_addrs; struct cman_node_address *addrptr = addrs; rv = cman_get_cluster(ch, &info); if (rv < 0) { log_error("cman_get_cluster error %d %d", rv, errno); /* keep going, this is just informational */ memset(&info, 0, sizeof(info)); } cluster_ringid_seq = info.ci_generation; cluster_quorate = cman_is_quorate(ch); old_node_count = cman_node_count; memcpy(&old_nodes, &cman_nodes, sizeof(old_nodes)); cman_node_count = 0; memset(&cman_nodes, 0, sizeof(cman_nodes)); rv = cman_get_nodes(ch, MAX_NODES, &cman_node_count, cman_nodes); if (rv < 0) { log_debug("cman_get_nodes error %d %d", rv, errno); return; } /* Never allow node ID 0 to be considered a member #315711 */ for (i = 0; i < cman_node_count; i++) { if (cman_nodes[i].cn_nodeid == 0) { cman_nodes[i].cn_member = 0; break; } } for (i = 0; i < old_node_count; i++) { if (old_nodes[i].cn_member && !is_cluster_member(old_nodes[i].cn_nodeid)) { log_debug("cluster node %d removed seq %u", old_nodes[i].cn_nodeid, cluster_ringid_seq); node_history_cluster_remove(old_nodes[i].cn_nodeid); del_configfs_node(old_nodes[i].cn_nodeid); } } for (i = 0; i < cman_node_count; i++) { if (cman_nodes[i].cn_member && !is_old_member(cman_nodes[i].cn_nodeid)) { log_debug("cluster node %d added seq %u", cman_nodes[i].cn_nodeid, cluster_ringid_seq); rv = cman_get_node_addrs(ch, cman_nodes[i].cn_nodeid, MAX_NODE_ADDRESSES, &num_addrs, addrs); if (rv < 0) { log_debug("cman_get_node_addrs failed, falling back to single-homed. "); num_addrs = 1; addrptr = &cman_nodes[i].cn_address; } node_history_cluster_add(cman_nodes[i].cn_nodeid); for (j = 0; j < num_addrs; j++) { add_configfs_node(cman_nodes[i].cn_nodeid, addrptr[j].cna_address, addrptr[j].cna_addrlen, (cman_nodes[i].cn_nodeid == our_nodeid)); } } else { /* look for any nodes that were members of both * old and new but have a new incarnation number * from old to new, indicating they left and rejoined * in between */ old = get_node(old_nodes, old_node_count, cman_nodes[i].cn_nodeid); if (!old) continue; if (cman_nodes[i].cn_incarnation == old->cn_incarnation) continue; log_debug("cluster node %d removed and added seq %u " "old %u new %u", cman_nodes[i].cn_nodeid, cluster_ringid_seq, old->cn_incarnation, cman_nodes[i].cn_incarnation); /* * remove (copied from above) */ node_history_cluster_remove(old_nodes[i].cn_nodeid); del_configfs_node(old_nodes[i].cn_nodeid); /* * add (copied from above) */ rv = cman_get_node_addrs(ch, cman_nodes[i].cn_nodeid, MAX_NODE_ADDRESSES, &num_addrs, addrs); if (rv < 0) { log_debug("cman_get_node_addrs failed, falling back to single-homed. "); num_addrs = 1; addrptr = &cman_nodes[i].cn_address; } node_history_cluster_add(cman_nodes[i].cn_nodeid); for (j = 0; j < num_addrs; j++) { add_configfs_node(cman_nodes[i].cn_nodeid, addrptr[j].cna_address, addrptr[j].cna_addrlen, (cman_nodes[i].cn_nodeid == our_nodeid)); } } } }