int cib_init(void) { if (is_corosync_cluster()) { #if SUPPORT_COROSYNC crm_cluster.destroy = cib_cs_destroy; crm_cluster.cpg.cpg_deliver_fn = cib_cs_dispatch; crm_cluster.cpg.cpg_confchg_fn = pcmk_cpg_membership; #endif } config_hash = crm_str_table_new(); if (startCib("cib.xml") == FALSE) { crm_crit("Cannot start CIB... terminating"); crm_exit(CRM_EX_NOINPUT); } if (stand_alone == FALSE) { if (is_corosync_cluster()) { crm_set_status_callback(&cib_peer_update_callback); } if (crm_cluster_connect(&crm_cluster) == FALSE) { crm_crit("Cannot sign in to the cluster... terminating"); crm_exit(CRM_EX_FATAL); } cib_our_uname = crm_cluster.uname; } else { cib_our_uname = strdup("localhost"); } cib_ipc_servers_init(&ipcs_ro, &ipcs_rw, &ipcs_shm, &ipc_ro_callbacks, &ipc_rw_callbacks); if (stand_alone) { cib_is_master = TRUE; } /* Create the mainloop and run it... */ mainloop = g_main_loop_new(NULL, FALSE); crm_info("Starting %s mainloop", crm_system_name); g_main_loop_run(mainloop); /* If main loop returned, clean up and exit. We disconnect in case * terminate_cib() was called with fast=-1. */ crm_cluster_disconnect(&crm_cluster); cib_ipc_servers_destroy(ipcs_ro, ipcs_rw, ipcs_shm); return crm_exit(CRM_EX_OK); }
gboolean decode_transition_magic( const char *magic, char **uuid, int *transition_id, int *action_id, int *op_status, int *op_rc, int *target_rc) { int res = 0; char *key = NULL; gboolean result = TRUE; CRM_CHECK(magic != NULL, return FALSE); CRM_CHECK(op_rc != NULL, return FALSE); CRM_CHECK(op_status != NULL, return FALSE); crm_malloc0(key, strlen(magic)); res = sscanf(magic, "%d:%d;%s", op_status, op_rc, key); if(res != 3) { crm_crit("Only found %d items in: %s", res, magic); result = FALSE; goto bail; } CRM_CHECK(decode_transition_key(key, uuid, transition_id, action_id, target_rc), result = FALSE; goto bail; );
/*! * \internal * \brief Respond to scheduler connection failure * * \param[in] user_data Ignored */ static void pe_ipc_destroy(gpointer user_data) { if (is_set(fsa_input_register, R_PE_REQUIRED)) { int rc = pcmk_ok; char *uuid_str = crm_generate_uuid(); crm_crit("Connection to the scheduler failed " CRM_XS " uuid=%s", uuid_str); /* * The scheduler died... * * Save the current CIB so that we have a chance of * figuring out what killed it. * * Delay raising the I_ERROR until the query below completes or * 5s is up, whichever comes first. * */ rc = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local); fsa_register_cib_callback(rc, FALSE, uuid_str, save_cib_contents); } else { crm_info("Connection to the scheduler released"); } clear_bit(fsa_input_register, R_PE_CONNECTED); pe_subsystem = NULL; mainloop_set_trigger(fsa_source); return; }
static gboolean child_timeout_callback(gpointer p) { mainloop_child_t *child = p; child->timerid = 0; if (child->timeout) { crm_crit("%s process (PID %d) will not die!", child->desc, (int)child->pid); return FALSE; } child->timeout = TRUE; crm_warn("%s process (PID %d) timed out", child->desc, (int)child->pid); if (kill(child->pid, SIGKILL) < 0) { if (errno == ESRCH) { /* Nothing left to do */ return FALSE; } crm_perror(LOG_ERR, "kill(%d, KILL) failed", child->pid); } child->timerid = g_timeout_add(5000, child_timeout_callback, child); return FALSE; }
static void cib_cs_destroy(gpointer user_data) { if (cib_shutdown_flag) { crm_info("Corosync disconnection complete"); } else { crm_crit("Lost connection to cluster layer, shutting down"); terminate_cib(__FUNCTION__, CRM_EX_DISCONNECT); } }
static void attrd_cpg_destroy(gpointer unused) { if (attrd_shutting_down()) { crm_info("Corosync disconnection complete"); } else { crm_crit("Lost connection to cluster layer, shutting down"); attrd_exit_status = CRM_EX_DISCONNECT; attrd_shutdown(0); } }
static void attrd_cpg_destroy(gpointer unused) { if (shutting_down) { crm_info("Corosync disconnection complete"); } else { crm_crit("Lost connection to Corosync service!"); attrd_error = ECONNRESET; attrd_shutdown(0); } }
void crmd_ha_connection_destroy(gpointer user_data) { crm_trace("Invoked"); if (is_set(fsa_input_register, R_HA_DISCONNECTED)) { /* we signed out, so this is expected */ crm_info("Heartbeat disconnection complete"); return; } crm_crit("Lost connection to heartbeat service!"); register_fsa_input(C_HA_DISCONNECT, I_ERROR, NULL); trigger_fsa(fsa_source); }
/*! * \internal * \brief Request cluster shutdown if appropriate, otherwise exit immediately * * \param[in] nsig Signal that caused invocation (ignored) */ static void lrmd_shutdown(int nsig) { #ifdef ENABLE_PCMK_REMOTE crm_client_t *ipc_proxy = ipc_proxy_get_provider(); /* If there are active proxied IPC providers, then we may be running * resources, so notify the cluster that we wish to shut down. */ if (ipc_proxy) { if (shutting_down) { crm_notice("Waiting for cluster to stop resources before exiting"); return; } crm_info("Sending shutdown request to cluster"); if (ipc_proxy_shutdown_req(ipc_proxy) < 0) { crm_crit("Shutdown request failed, exiting immediately"); } else { /* We requested a shutdown. Now, we need to wait for an * acknowledgement from the proxy host (which ensures the proxy host * supports shutdown requests), then wait for all proxy hosts to * disconnect (which ensures that all resources have been stopped). */ shutting_down = TRUE; /* Stop accepting new proxy connections */ lrmd_tls_server_destroy(); /* Older crmd versions will never acknowledge our request, so set a * fairly short timeout to exit quickly in that case. If we get the * ack, we'll defuse this timer. */ shutdown_ack_timer = g_timeout_add_seconds(20, lrmd_exit, NULL); /* Currently, we let the OS kill us if the clients don't disconnect * in a reasonable time. We could instead set a long timer here * (shorter than what the OS is likely to use) and exit immediately * if it pops. */ return; } } #endif lrmd_exit(NULL); }
static void tengine_stonith_connection_destroy(stonith_t * st, const char *event, xmlNode * msg) { if (is_set(fsa_input_register, R_ST_REQUIRED)) { crm_crit("Fencing daemon connection failed"); mainloop_set_trigger(stonith_reconnect); } else { crm_info("Fencing daemon disconnected"); } /* cbchan will be garbage at this point, arrange for it to be reset */ stonith_api->state = stonith_disconnected; if (AM_I_DC) { fail_incompletable_stonith(transition_graph); trigger_graph(); } }
static void attrd_cib_destroy_cb(gpointer user_data) { cib_t *conn = user_data; conn->cmds->signoff(conn); /* Ensure IPC is cleaned up */ if (attrd_shutting_down()) { crm_info("Connection disconnection complete"); } else { /* eventually this should trigger a reconnect, not a shutdown */ crm_crit("Lost connection to the CIB manager, shutting down"); attrd_exit_status = CRM_EX_DISCONNECT; attrd_shutdown(0); } return; }
crm_node_t *crm_get_peer(unsigned int id, const char *uname) { crm_node_t *node = NULL; if(uname != NULL) { node = g_hash_table_lookup(crm_peer_cache, uname); } if(node == NULL && id > 0) { node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id)); if(node && node->uname && uname) { crm_crit("Node %s and %s share the same cluster node id '%u'!", node->uname, uname, id); /* NOTE: Calling crm_new_peer() means the entry in * crm_peer_id_cache will point to the new entity */ /* TODO: Replace the old uname instead? */ node = crm_new_peer(id, uname); CRM_ASSERT(node->uname != NULL); } } if(node && uname && node->uname == NULL) { node->uname = crm_strdup(uname); crm_info("Node %u is now known as %s", id, uname); g_hash_table_insert(crm_peer_cache, node->uname, node); if(crm_status_callback) { crm_status_callback(crm_status_uname, node, NULL); } } if(node && id > 0 && id != node->id) { g_hash_table_remove(crm_peer_id_cache, GUINT_TO_POINTER(node->id)); g_hash_table_insert(crm_peer_id_cache, GUINT_TO_POINTER(id), node); node->id = id; crm_info("Node %s now has id: %u", crm_str(uname), id); } return node; }
void * fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type, const char *caller) { void *ret_val = NULL; if (fsa_data == NULL) { crm_err("%s: No FSA data available", caller); } else if (fsa_data->data == NULL) { crm_err("%s: No message data available. Origin: %s", caller, fsa_data->origin); } else if (fsa_data->data_type != a_type) { crm_crit("%s: Message data was the wrong type! %d vs. requested=%d. Origin: %s", caller, fsa_data->data_type, a_type, fsa_data->origin); CRM_ASSERT(fsa_data->data_type == a_type); } else { ret_val = fsa_data->data; } return ret_val; }
static gboolean child_timeout_callback(gpointer p) { mainloop_child_t *child = p; int rc = 0; child->timerid = 0; if (child->timeout) { crm_crit("%s process (PID %d) will not die!", child->desc, (int)child->pid); return FALSE; } rc = child_kill_helper(child); if (rc == ESRCH) { /* Nothing left to do. pid doesn't exist */ return FALSE; } child->timeout = TRUE; crm_warn("%s process (PID %d) timed out", child->desc, (int)child->pid); child->timerid = g_timeout_add(5000, child_timeout_callback, child); return FALSE; }
static void tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event) { if(te_client_id == NULL) { te_client_id = crm_strdup_printf("%s.%lu", crm_system_name, (unsigned long) getpid()); } if (st_event == NULL) { crm_err("Notify data not found"); return; } crmd_alert_fencing_op(st_event); if (st_event->result == pcmk_ok && safe_str_eq("on", st_event->action)) { crm_notice("%s was successfully unfenced by %s (at the request of %s)", st_event->target, st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin); /* TODO: Hook up st_event->device */ return; } else if (safe_str_eq("on", st_event->action)) { crm_err("Unfencing of %s by %s failed: %s (%d)", st_event->target, st_event->executioner ? st_event->executioner : "<anyone>", pcmk_strerror(st_event->result), st_event->result); return; } else if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) { crm_crit("We were allegedly just fenced by %s for %s!", st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin); /* Dumps blackbox if enabled */ qb_log_fini(); /* Try to get the above log message to disk - somehow */ /* Get out ASAP and do not come back up. * * Triggering a reboot is also not the worst idea either since * the rest of the cluster thinks we're safely down */ #ifdef RB_HALT_SYSTEM reboot(RB_HALT_SYSTEM); #endif /* * If reboot() fails or is not supported, coming back up will * probably lead to a situation where the other nodes set our * status to 'lost' because of the fencing callback and will * discard subsequent election votes with: * * Election 87 (current: 5171, owner: 103): Processed vote from east-03 (Peer is not part of our cluster) * * So just stay dead, something is seriously messed up anyway. * */ exit(CRM_EX_FATAL); // None of our wrappers since we already called qb_log_fini() return; } /* Update the count of stonith failures for this target, in case we become * DC later. The current DC has already updated its fail count in * tengine_stonith_callback(). */ if (!AM_I_DC && safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) { if (st_event->result == pcmk_ok) { st_fail_count_reset(st_event->target); } else { st_fail_count_increment(st_event->target); } } crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s: %s " CRM_XS " initiator=%s ref=%s", st_event->target, st_event->result == pcmk_ok ? "" : " not", st_event->action, st_event->executioner ? st_event->executioner : "<anyone>", (st_event->client_origin? st_event->client_origin : "<unknown>"), pcmk_strerror(st_event->result), st_event->origin, st_event->id); if (st_event->result == pcmk_ok) { crm_node_t *peer = crm_find_peer_full(0, st_event->target, CRM_GET_PEER_ANY); const char *uuid = NULL; gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname); if (peer == NULL) { return; } uuid = crm_peer_uuid(peer); crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc); if(AM_I_DC) { /* The DC always sends updates */ send_stonith_update(NULL, st_event->target, uuid); /* @TODO Ideally, at this point, we'd check whether the fenced node * hosted any guest nodes, and call remote_node_down() for them. * Unfortunately, the controller doesn't have a simple, reliable way * to map hosts to guests. It might be possible to track this in the * peer cache via crm_remote_peer_cache_refresh(). For now, we rely * on the PE creating fence pseudo-events for the guests. */ if (st_event->client_origin && safe_str_neq(st_event->client_origin, te_client_id)) { /* Abort the current transition graph if it wasn't us * that invoked stonith to fence someone */ crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target); abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL); } /* Assume it was our leader if we don't currently have one */ } else if (((fsa_our_dc == NULL) || safe_str_eq(fsa_our_dc, st_event->target)) && !is_set(peer->flags, crm_remote_node)) { crm_notice("Target %s our leader %s (recorded: %s)", fsa_our_dc ? "was" : "may have been", st_event->target, fsa_our_dc ? fsa_our_dc : "<unset>"); /* Given the CIB resyncing that occurs around elections, * have one node update the CIB now and, if the new DC is different, * have them do so too after the election */ if (we_are_executioner) { send_stonith_update(NULL, st_event->target, uuid); } add_stonith_cleanup(st_event->target); } /* If the target is a remote node, and we host its connection, * immediately fail all monitors so it can be recovered quickly. * The connection won't necessarily drop when a remote node is fenced, * so the failure might not otherwise be detected until the next poke. */ if (is_set(peer->flags, crm_remote_node)) { remote_ra_fail(st_event->target); } crmd_peer_down(peer, TRUE); } }
gboolean corosync_initialize_nodelist(void *cluster, gboolean force_member, xmlNode * xml_parent) { int lpc = 0; cs_error_t rc = CS_OK; int retries = 0; gboolean any = FALSE; cmap_handle_t cmap_handle; int fd = -1; uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rv; do { rc = cmap_initialize(&cmap_handle); if (rc != CS_OK) { retries++; crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries); sleep(retries); } } while (retries < 5 && rc != CS_OK); if (rc != CS_OK) { crm_warn("Could not connect to Cluster Configuration Database API, error %d", rc); return FALSE; } rc = cmap_fd_get(cmap_handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the CMAP API connection: %s (%d)", cs_strerror(rc), rc); goto bail; } /* CMAP provider run as root (in given user namespace, anyway)? */ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, &found_uid, &found_gid))) { crm_err("CMAP provider is not authentic:" " process %lld (uid: %lld, gid: %lld)", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); goto bail; } else if (rv < 0) { crm_err("Could not verify authenticity of CMAP provider: %s (%d)", strerror(-rv), -rv); goto bail; } crm_peer_init(); crm_trace("Initializing corosync nodelist"); for (lpc = 0; TRUE; lpc++) { uint32_t nodeid = 0; char *name = NULL; char *key = NULL; key = crm_strdup_printf("nodelist.node.%d.nodeid", lpc); rc = cmap_get_uint32(cmap_handle, key, &nodeid); free(key); if (rc != CS_OK) { break; } name = corosync_node_name(cmap_handle, nodeid); if (name != NULL) { GHashTableIter iter; crm_node_t *node = NULL; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if(node && node->uname && strcasecmp(node->uname, name) == 0) { if (node->id && node->id != nodeid) { crm_crit("Nodes %u and %u share the same name '%s': shutting down", node->id, nodeid, name); crm_exit(CRM_EX_FATAL); } } } } if (nodeid > 0 || name != NULL) { crm_trace("Initializing node[%d] %u = %s", lpc, nodeid, name); crm_get_peer(nodeid, name); } if (nodeid > 0 && name != NULL) { any = TRUE; if (xml_parent) { xmlNode *node = create_xml_node(xml_parent, XML_CIB_TAG_NODE); crm_xml_set_id(node, "%u", nodeid); crm_xml_add(node, XML_ATTR_UNAME, name); if (force_member) { crm_xml_add(node, XML_ATTR_TYPE, CRM_NODE_MEMBER); } } } free(name); } bail: cmap_finalize(cmap_handle); return any; }
static void tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event) { if(te_client_id == NULL) { te_client_id = crm_strdup_printf("%s.%d", crm_system_name, getpid()); } if (st_event == NULL) { crm_err("Notify data not found"); return; } crmd_notify_fencing_op(st_event); if (st_event->result == pcmk_ok && safe_str_eq("on", st_event->action)) { crm_notice("%s was successfully unfenced by %s (at the request of %s)", st_event->target, st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin); /* TODO: Hook up st_event->device */ return; } else if (safe_str_eq("on", st_event->action)) { crm_err("Unfencing of %s by %s failed: %s (%d)", st_event->target, st_event->executioner ? st_event->executioner : "<anyone>", pcmk_strerror(st_event->result), st_event->result); return; } else if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) { crm_crit("We were allegedly just fenced by %s for %s!", st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin); /* Dumps blackbox if enabled */ qb_log_fini(); /* Try to get the above log message to disk - somehow */ /* Get out ASAP and do not come back up. * * Triggering a reboot is also not the worst idea either since * the rest of the cluster thinks we're safely down */ #ifdef RB_HALT_SYSTEM reboot(RB_HALT_SYSTEM); #endif /* * If reboot() fails or is not supported, coming back up will * probably lead to a situation where the other nodes set our * status to 'lost' because of the fencing callback and will * discard subsequent election votes with: * * Election 87 (current: 5171, owner: 103): Processed vote from east-03 (Peer is not part of our cluster) * * So just stay dead, something is seriously messed up anyway. * */ exit(100); /* None of our wrappers since we already called qb_log_fini() */ return; } if (st_event->result == pcmk_ok && safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) { st_fail_count_reset(st_event->target); } crm_notice("Peer %s was%s terminated (%s) by %s for %s: %s (ref=%s) by client %s", st_event->target, st_event->result == pcmk_ok ? "" : " not", st_event->action, st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin, pcmk_strerror(st_event->result), st_event->id, st_event->client_origin ? st_event->client_origin : "<unknown>"); #if SUPPORT_CMAN if (st_event->result == pcmk_ok && is_cman_cluster()) { int local_rc = 0; int confirm = 0; char *target_copy = strdup(st_event->target); /* In case fenced hasn't noticed yet * * Any fencing that has been inititated will be completed by way of the fence_pcmk redirect */ local_rc = fenced_external(target_copy); if (local_rc != 0) { crm_err("Could not notify CMAN that '%s' is now fenced: %d", st_event->target, local_rc); } else { crm_notice("Notified CMAN that '%s' is now fenced", st_event->target); } /* In case fenced is already trying to shoot it */ confirm = open("/var/run/cluster/fenced_override", O_NONBLOCK|O_WRONLY); if (confirm > 0) { int ignore = 0; int len = strlen(target_copy); errno = 0; local_rc = write(confirm, target_copy, len); ignore = write(confirm, "\n", 1); if(ignore < 0 && errno == EBADF) { crm_trace("CMAN not expecting %s to be fenced (yet)", st_event->target); } else if (local_rc < len) { crm_perror(LOG_ERR, "Confirmation of CMAN fencing event for '%s' failed: %d", st_event->target, local_rc); } else { fsync(confirm); crm_notice("Confirmed CMAN fencing event for '%s'", st_event->target); } close(confirm); } free(target_copy); } #endif if (st_event->result == pcmk_ok) { crm_node_t *peer = crm_find_peer_full(0, st_event->target, CRM_GET_PEER_REMOTE | CRM_GET_PEER_CLUSTER); const char *uuid = NULL; gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname); if (peer == NULL) { return; } uuid = crm_peer_uuid(peer); crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc); if(AM_I_DC) { /* The DC always sends updates */ send_stonith_update(NULL, st_event->target, uuid); if (st_event->client_origin && safe_str_neq(st_event->client_origin, te_client_id)) { /* Abort the current transition graph if it wasn't us * that invoked stonith to fence someone */ crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target); abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL); } /* Assume it was our leader if we dont currently have one */ } else if (fsa_our_dc == NULL || safe_str_eq(fsa_our_dc, st_event->target)) { crm_notice("Target %s our leader %s (recorded: %s)", fsa_our_dc ? "was" : "may have been", st_event->target, fsa_our_dc ? fsa_our_dc : "<unset>"); /* Given the CIB resyncing that occurs around elections, * have one node update the CIB now and, if the new DC is different, * have them do so too after the election */ if (we_are_executioner) { send_stonith_update(NULL, st_event->target, uuid); } stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(st_event->target)); } crmd_peer_down(peer, TRUE); } }
/* coverity[+kill] */ void crm_abort(const char *file, const char *function, int line, const char *assert_condition, gboolean do_core, gboolean do_fork) { int rc = 0; int pid = 0; int status = 0; /* Implied by the parent's error logging below */ /* crm_write_blackbox(0); */ if(crm_is_daemon == FALSE) { /* This is a command line tool - do not fork */ /* crm_add_logfile(NULL); * Record it to a file? */ crm_enable_stderr(TRUE); /* Make sure stderr is enabled so we can tell the caller */ do_fork = FALSE; /* Just crash if needed */ } if (do_core == FALSE) { crm_err("%s: Triggered assert at %s:%d : %s", function, file, line, assert_condition); return; } else if (do_fork) { pid = fork(); } else { crm_err("%s: Triggered fatal assert at %s:%d : %s", function, file, line, assert_condition); } if (pid == -1) { crm_crit("%s: Cannot create core for non-fatal assert at %s:%d : %s", function, file, line, assert_condition); return; } else if(pid == 0) { /* Child process */ abort(); return; } /* Parent process */ crm_err("%s: Forked child %d to record non-fatal assert at %s:%d : %s", function, pid, file, line, assert_condition); crm_write_blackbox(SIGTRAP, NULL); do { rc = waitpid(pid, &status, 0); if(rc == pid) { return; /* Job done */ } } while(errno == EINTR); if (errno == ECHILD) { /* crm_mon does this */ crm_trace("Cannot wait on forked child %d - SIGCHLD is probably set to SIG_IGN", pid); return; } crm_perror(LOG_ERR, "Cannot wait on forked child %d", pid); }
/* coverity[-alloc] Memory is referenced in one or both hashtables */ crm_node_t * crm_get_peer(unsigned int id, const char *uname) { GHashTableIter iter; crm_node_t *node = NULL; crm_node_t *by_id = NULL; crm_node_t *by_name = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); if (uname != NULL) { g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if(node->uname && strcasecmp(node->uname, uname) == 0) { crm_trace("Name match: %s = %p", node->uname, node); by_name = node; break; } } } if (id > 0) { g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if(node->id == id) { crm_trace("ID match: %u = %p", node->id, node); by_id = node; break; } } } node = by_id; /* Good default */ if(by_id == by_name) { /* Nothing to do if they match (both NULL counts) */ crm_trace("Consistent: %p for %u/%s", by_id, id, uname); } else if(by_id == NULL && by_name) { crm_trace("Only one: %p for %u/%s", by_name, id, uname); if(id && by_name->id) { crm_dump_peer_hash(LOG_WARNING, __FUNCTION__); crm_crit("Node %u and %u share the same name '%s'", id, by_name->id, uname); node = NULL; /* Create a new one */ } else { node = by_name; } } else if(by_name == NULL && by_id) { crm_trace("Only one: %p for %u/%s", by_id, id, uname); if(uname && by_id->uname) { crm_dump_peer_hash(LOG_WARNING, __FUNCTION__); crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct", uname, by_id->uname, id, uname); } } else if(uname && by_id->uname) { crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u", by_id->uname, by_name->uname, id); } else if(id && by_name->id) { crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname); } else { /* Simple merge */ /* Only corosync based clusters use nodeid's * * The functions that call crm_update_peer_state() only know nodeid * so 'by_id' is authorative when merging * * Same for crm_update_peer_proc() */ crm_dump_peer_hash(LOG_DEBUG, __FUNCTION__); crm_info("Merging %p into %p", by_name, by_id); g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name); } if (node == NULL) { char *uniqueid = crm_generate_uuid(); node = calloc(1, sizeof(crm_node_t)); CRM_ASSERT(node); crm_info("Created entry %s/%p for node %s/%u (%d total)", uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache)); g_hash_table_replace(crm_peer_cache, uniqueid, node); } if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) { crm_info("Node %u is now known as %s", id, uname); } if(id > 0 && node->id == 0) { node->id = id; } if(uname && node->uname == NULL) { int lpc, len = strlen(uname); for (lpc = 0; lpc < len; lpc++) { if (uname[lpc] >= 'A' && uname[lpc] <= 'Z') { crm_warn("Node names with capitals are discouraged, consider changing '%s' to something else", uname); break; } } node->uname = strdup(uname); if (crm_status_callback) { crm_status_callback(crm_status_uname, node, NULL); } } if(node->uuid == NULL) { const char *uuid = crm_peer_uuid(node); if (uuid) { crm_info("Node %u has uuid %s", id, uuid); } else { crm_info("Cannot obtain a UUID for node %d/%s", id, node->uname); } } return node; }
void cib_notify_client(gpointer key, gpointer value, gpointer user_data) { IPC_Channel *ipc_client = NULL; HA_Message *update_msg = user_data; cib_client_t *client = value; const char *type = NULL; gboolean is_pre = FALSE; gboolean is_post = FALSE; gboolean is_confirm = FALSE; gboolean is_replace = FALSE; gboolean is_diff = FALSE; gboolean do_send = FALSE; int qlen = 0; int max_qlen = 0; CRM_DEV_ASSERT(client != NULL); CRM_DEV_ASSERT(update_msg != NULL); type = cl_get_string(update_msg, F_SUBTYPE); CRM_DEV_ASSERT(type != NULL); if(client == NULL) { crm_warn("Skipping NULL client"); return; } else if(client->channel == NULL) { crm_warn("Skipping client with NULL channel"); return; } else if(client->name == NULL) { crm_debug_2("Skipping unnammed client / comamnd channel"); return; } if(safe_str_eq(type, T_CIB_PRE_NOTIFY)) { is_pre = TRUE; } else if(safe_str_eq(type, T_CIB_POST_NOTIFY)) { is_post = TRUE; } else if(safe_str_eq(type, T_CIB_UPDATE_CONFIRM)) { is_confirm = TRUE; } else if(safe_str_eq(type, T_CIB_DIFF_NOTIFY)) { is_diff = TRUE; } else if(safe_str_eq(type, T_CIB_REPLACE_NOTIFY)) { is_replace = TRUE; } ipc_client = client->channel; qlen = ipc_client->send_queue->current_qlen; max_qlen = ipc_client->send_queue->max_qlen; #if 1 /* get_chan_status() causes memory to be allocated that isnt free'd * until the message is read (which messes up the memory stats) */ if(ipc_client->ops->get_chan_status(ipc_client) != IPC_CONNECT) { crm_debug_2("Skipping notification to disconnected" " client %s/%s", client->name, client->id); } else if(client->pre_notify && is_pre) { if(qlen < (int)(0.4 * max_qlen)) { do_send = TRUE; } else { crm_warn("Throttling pre-notifications due to" " high load: queue=%d (max=%d)", qlen, max_qlen); } } else if(client->post_notify && is_post) { if(qlen < (int)(0.7 * max_qlen)) { do_send = TRUE; } else { crm_warn("Throttling post-notifications due to" " extreme load: queue=%d (max=%d)", qlen, max_qlen); } /* these are critical */ } else #endif if(client->diffs && is_diff) { do_send = TRUE; } else if(client->confirmations && is_confirm) { do_send = TRUE; } else if(client->replace && is_replace) { do_send = TRUE; } if(do_send) { crm_debug_2("Notifying client %s/%s of %s update (queue=%d)", client->name, client->channel_name, type, qlen); if(ipc_client->send_queue->current_qlen >= ipc_client->send_queue->max_qlen) { /* We never want the CIB to exit because our client is slow */ crm_crit("%s-notification of client %s/%s failed - queue saturated", is_confirm?"Confirmation":is_post?"Post":"Pre", client->name, client->id); } else if(send_ipc_message(ipc_client, update_msg) == FALSE) { crm_warn("Notification of client %s/%s failed", client->name, client->id); } } else { crm_debug_3("Client %s/%s not interested in %s notifications", client->name, client->channel_name, type); } }
gboolean decode_transition_key( const char *key, char **uuid, int *transition_id, int *action_id, int *target_rc) { int res = 0; gboolean done = FALSE; CRM_CHECK(uuid != NULL, return FALSE); CRM_CHECK(target_rc != NULL, return FALSE); CRM_CHECK(action_id != NULL, return FALSE); CRM_CHECK(transition_id != NULL, return FALSE); crm_malloc0(*uuid, strlen(key)); res = sscanf(key, "%d:%d:%d:%s", action_id, transition_id, target_rc, *uuid); switch(res) { case 4: /* Post Pacemaker 0.6 */ done = TRUE; break; case 3: case 2: /* this can be tricky - the UUID might start with an integer */ /* Until Pacemaker 0.6 */ done = TRUE; *target_rc = -1; res = sscanf(key, "%d:%d:%s", action_id, transition_id, *uuid); if(res == 2) { *action_id = -1; res = sscanf(key, "%d:%s", transition_id, *uuid); CRM_CHECK(res == 2, done = FALSE); } else if(res != 3) { CRM_CHECK(res == 3, done = FALSE); } break; case 1: /* Prior to Heartbeat 2.0.8 */ done = TRUE; *action_id = -1; *target_rc = -1; res = sscanf(key, "%d:%s", transition_id, *uuid); CRM_CHECK(res == 2, done = FALSE); break; default: crm_crit("Unhandled sscanf result (%d) for %s", res, key); } if(strlen(*uuid) != 36) { crm_warn("Bad UUID (%s) in sscanf result (%d) for %s", *uuid, res, key); } if(done == FALSE) { crm_err("Cannot decode '%s' rc=%d", key, res); crm_free(*uuid); *uuid = NULL; *target_rc = -1; *action_id = -1; *transition_id = -1; } return done; }
gboolean cib_notify_client(gpointer key, gpointer value, gpointer user_data) { int qlen = 0; int max_qlen = 500; const char *type = NULL; gboolean do_send = FALSE; gboolean do_remote = FALSE; IPC_Channel *ipc_client = NULL; cib_client_t *client = value; xmlNode *update_msg = user_data; CRM_CHECK(client != NULL, return TRUE); CRM_CHECK(update_msg != NULL, return TRUE); if (client == NULL) { crm_warn("Skipping NULL client"); return TRUE; } else if (client->channel == NULL) { crm_warn("Skipping client with NULL channel"); return FALSE; } else if (client->name == NULL) { crm_trace("Skipping unnammed client / comamnd channel"); return FALSE; } type = crm_element_value(update_msg, F_SUBTYPE); ipc_client = client->channel; do_remote = crm_str_eq(client->channel_name, "remote", FALSE); if (do_remote == FALSE) { qlen = ipc_client->send_queue->current_qlen; max_qlen = ipc_client->send_queue->max_qlen; } CRM_LOG_ASSERT(type != NULL); if (client->diffs && safe_str_eq(type, T_CIB_DIFF_NOTIFY)) { do_send = TRUE; } else if (client->replace && safe_str_eq(type, T_CIB_REPLACE_NOTIFY)) { do_send = TRUE; } else if (client->confirmations && safe_str_eq(type, T_CIB_UPDATE_CONFIRM)) { do_send = TRUE; } else if (client->pre_notify && safe_str_eq(type, T_CIB_PRE_NOTIFY)) { if (qlen < (int)(0.4 * max_qlen)) { do_send = TRUE; } else { crm_warn("Throttling pre-notifications due to" " high load: queue=%d (max=%d)", qlen, max_qlen); } } else if (client->post_notify && safe_str_eq(type, T_CIB_POST_NOTIFY)) { if (qlen < (int)(0.7 * max_qlen)) { do_send = TRUE; } else { crm_warn("Throttling post-notifications due to" " extreme load: queue=%d (max=%d)", qlen, max_qlen); } } if (do_send) { if (do_remote) { crm_debug("Sent %s notification to client %s/%s", type, client->name, client->id); cib_send_remote_msg(client->channel, update_msg, client->encrypted); } else if (ipc_client->send_queue->current_qlen >= ipc_client->send_queue->max_qlen) { /* We never want the CIB to exit because our client is slow */ crm_crit("%s-notification of client %s/%s failed - queue saturated", type, client->name, client->id); } else if (send_ipc_message(ipc_client, update_msg) == FALSE) { crm_warn("Notification of client %s/%s failed", client->name, client->id); return FALSE; } } return FALSE; }
/* coverity[-alloc] Memory is referenced in one or both hashtables */ crm_node_t * crm_get_peer(unsigned int id, const char *uname) { crm_node_t *node = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); if (node == NULL && uname != NULL) { node = g_hash_table_lookup(crm_peer_cache, uname); } if (node == NULL && id > 0) { node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id)); if (node && node->uname && uname) { crm_crit("Node %s and %s share the same cluster node id '%u'!", node->uname, uname, id); /* NOTE: Calling crm_new_peer() means the entry in * crm_peer_id_cache will point to the new entity * * TO-DO: Replace the old uname instead? */ node = NULL; } } if (node == NULL) { crm_debug("Creating entry for node %s/%u", uname, id); node = calloc(1, sizeof(crm_node_t)); CRM_ASSERT(node); } if (id > 0 && node->id != id) { node->id = id; crm_info("Node %s now has id: %u", crm_str(uname), id); g_hash_table_replace(crm_peer_id_cache, GUINT_TO_POINTER(node->id), node); } if (uname && node->uname == NULL) { node->uname = strdup(uname); crm_info("Node %u is now known as %s", id, uname); g_hash_table_replace(crm_peer_cache, node->uname, node); if (crm_status_callback) { crm_status_callback(crm_status_uname, node, NULL); } } if (node && node->uname && node->uuid == NULL) { const char *uuid = get_node_uuid(id, node->uname); if(uuid) { node->uuid = strdup(uuid); crm_info("Node %u has uuid %s", id, node->uuid); } else { crm_warn("Cannot obtain a UUID for node %d/%s", id, node->uname); } } return node; }
static void stonith_peer_cs_destroy(gpointer user_data) { crm_crit("Lost connection to cluster layer, shutting down"); stonith_shutdown(0); }
gboolean corosync_initialize_nodelist(void *cluster, gboolean force_member, xmlNode * xml_parent) { int lpc = 0; int rc = CS_OK; int retries = 0; gboolean any = FALSE; cmap_handle_t cmap_handle; do { rc = cmap_initialize(&cmap_handle); if (rc != CS_OK) { retries++; crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries); sleep(retries); } } while (retries < 5 && rc != CS_OK); if (rc != CS_OK) { crm_warn("Could not connect to Cluster Configuration Database API, error %d", rc); return FALSE; } crm_peer_init(); crm_trace("Initializing corosync nodelist"); for (lpc = 0;; lpc++) { uint32_t nodeid = 0; char *name = NULL; char *key = NULL; key = g_strdup_printf("nodelist.node.%d.nodeid", lpc); rc = cmap_get_uint32(cmap_handle, key, &nodeid); g_free(key); if (rc != CS_OK) { break; } name = corosync_node_name(cmap_handle, nodeid); if (name != NULL) { GHashTableIter iter; crm_node_t *node = NULL; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if(node && node->uname && strcasecmp(node->uname, name) == 0) { if (node->id && node->id != nodeid) { crm_crit("Nodes %u and %u share the same name '%s': shutting down", node->id, nodeid, name); crm_exit(DAEMON_RESPAWN_STOP); } } } } if (nodeid > 0 || name != NULL) { crm_trace("Initializing node[%d] %u = %s", lpc, nodeid, name); crm_get_peer(nodeid, name); } if (nodeid > 0 && name != NULL) { any = TRUE; if (xml_parent) { xmlNode *node = create_xml_node(xml_parent, XML_CIB_TAG_NODE); crm_xml_add_int(node, XML_ATTR_ID, nodeid); crm_xml_add(node, XML_ATTR_UNAME, name); if (force_member) { crm_xml_add(node, XML_ATTR_TYPE, CRM_NODE_MEMBER); } } } free(name); } cmap_finalize(cmap_handle); return any; }