/* this is called when the ctdb daemon received a ctdb request message from a local client over the unix domain socket */ static void daemon_request_message_from_client(struct ctdb_client *client, struct ctdb_req_message *c) { TDB_DATA data; int res; if (c->hdr.destnode == CTDB_CURRENT_NODE) { c->hdr.destnode = ctdb_get_pnn(client->ctdb); } /* maybe the message is for another client on this node */ if (ctdb_get_pnn(client->ctdb)==c->hdr.destnode) { ctdb_request_message(client->ctdb, (struct ctdb_req_header *)c); return; } /* its for a remote node */ data.dptr = &c->data[0]; data.dsize = c->datalen; res = ctdb_daemon_send_message(client->ctdb, c->hdr.destnode, c->srvid, data); if (res != 0) { DEBUG(DEBUG_ERR,(__location__ " Failed to send message to remote node %u\n", c->hdr.destnode)); } }
/* modify flags on a node */ int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata) { struct ctdb_node_flag_change *c = (struct ctdb_node_flag_change *)indata.dptr; struct ctdb_node *node; uint32_t old_flags; if (c->pnn >= ctdb->num_nodes) { DEBUG(DEBUG_ERR,(__location__ " Node %d is invalid, num_nodes :%d\n", c->pnn, ctdb->num_nodes)); return -1; } node = ctdb->nodes[c->pnn]; old_flags = node->flags; if (c->pnn != ctdb->pnn) { c->old_flags = node->flags; } node->flags = c->new_flags & ~NODE_FLAGS_DISCONNECTED; node->flags |= (c->old_flags & NODE_FLAGS_DISCONNECTED); /* we don't let other nodes modify our STOPPED status */ if (c->pnn == ctdb->pnn) { node->flags &= ~NODE_FLAGS_STOPPED; if (old_flags & NODE_FLAGS_STOPPED) { node->flags |= NODE_FLAGS_STOPPED; } } /* we don't let other nodes modify our BANNED status */ if (c->pnn == ctdb->pnn) { node->flags &= ~NODE_FLAGS_BANNED; if (old_flags & NODE_FLAGS_BANNED) { node->flags |= NODE_FLAGS_BANNED; } } if (node->flags == c->old_flags) { DEBUG(DEBUG_INFO, ("Control modflags on node %u - Unchanged - flags 0x%x\n", c->pnn, node->flags)); return 0; } DEBUG(DEBUG_INFO, ("Control modflags on node %u - flags now 0x%x\n", c->pnn, node->flags)); if (node->flags == 0 && ctdb->runstate <= CTDB_RUNSTATE_STARTUP) { DEBUG(DEBUG_ERR, (__location__ " Node %u became healthy - force recovery for startup\n", c->pnn)); ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE; } /* tell the recovery daemon something has changed */ c->new_flags = node->flags; ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_SET_NODE_FLAGS, indata); /* if we have become banned, we should go into recovery mode */ if ((node->flags & NODE_FLAGS_BANNED) && !(c->old_flags & NODE_FLAGS_BANNED) && (node->pnn == ctdb->pnn)) { ctdb_local_node_got_banned(ctdb); } return 0; }
/* called when a health monitoring event script finishes */ static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p) { struct ctdb_node *node = ctdb->nodes[ctdb->pnn]; TDB_DATA data; struct ctdb_node_flag_change c; uint32_t next_interval; int ret; TDB_DATA rddata; struct ctdb_srvid_message rd; const char *state_str = NULL; c.pnn = ctdb->pnn; c.old_flags = node->flags; rd.pnn = ctdb->pnn; rd.srvid = CTDB_SRVID_TAKEOVER_RUN_RESPONSE; rddata.dptr = (uint8_t *)&rd; rddata.dsize = sizeof(rd); if (status == -ECANCELED) { DEBUG(DEBUG_ERR,("Monitoring event was cancelled\n")); goto after_change_status; } if (status == -ETIME) { ctdb->monitor->event_script_timeouts++; if (ctdb->monitor->event_script_timeouts >= ctdb->tunable.monitor_timeout_count) { DEBUG(DEBUG_ERR, ("Maximum monitor timeout count %u reached." " Making node unhealthy\n", ctdb->tunable.monitor_timeout_count)); } else { /* We pretend this is OK. */ goto after_change_status; } } else { ctdb->monitor->event_script_timeouts = 0; } if (status != 0 && !(node->flags & NODE_FLAGS_UNHEALTHY)) { DEBUG(DEBUG_NOTICE,("monitor event failed - disabling node\n")); node->flags |= NODE_FLAGS_UNHEALTHY; ctdb->monitor->next_interval = 5; ctdb_run_notification_script(ctdb, "unhealthy"); } else if (status == 0 && (node->flags & NODE_FLAGS_UNHEALTHY)) { DEBUG(DEBUG_NOTICE,("monitor event OK - node re-enabled\n")); node->flags &= ~NODE_FLAGS_UNHEALTHY; ctdb->monitor->next_interval = 5; ctdb_run_notification_script(ctdb, "healthy"); } after_change_status: next_interval = ctdb->monitor->next_interval; ctdb->monitor->next_interval *= 2; if (ctdb->monitor->next_interval > ctdb->tunable.monitor_interval) { ctdb->monitor->next_interval = ctdb->tunable.monitor_interval; } tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context, timeval_current_ofs(next_interval, 0), ctdb_check_health, ctdb); if (c.old_flags == node->flags) { return; } c.new_flags = node->flags; data.dptr = (uint8_t *)&c; data.dsize = sizeof(c); /* ask the recovery daemon to push these changes out to all nodes */ ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_PUSH_NODE_FLAGS, data); if (c.new_flags & NODE_FLAGS_UNHEALTHY) { state_str = "UNHEALTHY"; } else { state_str = "HEALTHY"; } /* ask the recmaster to reallocate all addresses */ DEBUG(DEBUG_ERR, ("Node became %s. Ask recovery master to reallocate IPs\n", state_str)); ret = ctdb_daemon_send_message(ctdb, CTDB_BROADCAST_CONNECTED, CTDB_SRVID_TAKEOVER_RUN, rddata); if (ret != 0) { DEBUG(DEBUG_ERR, (__location__ " Failed to send IP takeover run request\n")); } }
/* modify flags on a node */ int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata) { struct ctdb_node_flag_change *c = (struct ctdb_node_flag_change *)indata.dptr; struct ctdb_node *node; uint32_t old_flags; int i; if (c->pnn >= ctdb->num_nodes) { DEBUG(DEBUG_ERR,(__location__ " Node %d is invalid, num_nodes :%d\n", c->pnn, ctdb->num_nodes)); return -1; } node = ctdb->nodes[c->pnn]; old_flags = node->flags; if (c->pnn != ctdb->pnn) { c->old_flags = node->flags; } node->flags = c->new_flags & ~NODE_FLAGS_DISCONNECTED; node->flags |= (c->old_flags & NODE_FLAGS_DISCONNECTED); /* we dont let other nodes modify our STOPPED status */ if (c->pnn == ctdb->pnn) { node->flags &= ~NODE_FLAGS_STOPPED; if (old_flags & NODE_FLAGS_STOPPED) { node->flags |= NODE_FLAGS_STOPPED; } } /* we dont let other nodes modify our BANNED status */ if (c->pnn == ctdb->pnn) { node->flags &= ~NODE_FLAGS_BANNED; if (old_flags & NODE_FLAGS_BANNED) { node->flags |= NODE_FLAGS_BANNED; } } if (node->flags == c->old_flags) { DEBUG(DEBUG_INFO, ("Control modflags on node %u - Unchanged - flags 0x%x\n", c->pnn, node->flags)); return 0; } DEBUG(DEBUG_INFO, ("Control modflags on node %u - flags now 0x%x\n", c->pnn, node->flags)); if (node->flags == 0 && !ctdb->done_startup) { DEBUG(DEBUG_ERR, (__location__ " Node %u became healthy - force recovery for startup\n", c->pnn)); ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE; } /* tell the recovery daemon something has changed */ ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_SET_NODE_FLAGS, indata); /* if we have become banned, we should go into recovery mode */ if ((node->flags & NODE_FLAGS_BANNED) && !(c->old_flags & NODE_FLAGS_BANNED) && (node->pnn == ctdb->pnn)) { /* make sure we are frozen */ DEBUG(DEBUG_NOTICE,("This node has been banned - forcing freeze and recovery\n")); /* Reset the generation id to 1 to make us ignore any REQ/REPLY CALL/DMASTER someone sends to us. We are now banned so we shouldnt service database calls anymore. */ ctdb->vnn_map->generation = INVALID_GENERATION; for (i=1; i<=NUM_DB_PRIORITIES; i++) { if (ctdb_start_freeze(ctdb, i) != 0) { DEBUG(DEBUG_ERR,(__location__ " Failed to freeze db priority %u\n", i)); } } ctdb_release_all_ips(ctdb); ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE; } return 0; }