Ejemplo n.º 1
0
/*
  called when the startup event script finishes
 */
static void ctdb_startup_callback(struct ctdb_context *ctdb, int status, void *p)
{
	if (status != 0) {
		DEBUG(DEBUG_ERR,("startup event failed\n"));
	} else if (status == 0) {
		DEBUG(DEBUG_NOTICE,("startup event OK - enabling monitoring\n"));
		ctdb->done_startup = true;
		ctdb->monitor->next_interval = 5;
		ctdb_run_notification_script(ctdb, "startup");
	}

	event_add_timed(ctdb->ev, ctdb->monitor->monitor_context, 
			timeval_current_ofs(ctdb->monitor->next_interval, 0),
			ctdb_check_health, ctdb);
}
Ejemplo n.º 2
0
/*
  called when the startup event script finishes
 */
static void ctdb_startup_callback(struct ctdb_context *ctdb, int status, void *p)
{
	if (status != 0) {
		DEBUG(DEBUG_ERR,("startup event failed\n"));
		tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
				 timeval_current_ofs(5, 0),
				 ctdb_run_startup, ctdb);
		return;
	}

	DEBUG(DEBUG_NOTICE,("startup event OK - enabling monitoring\n"));
	ctdb_set_runstate(ctdb, CTDB_RUNSTATE_RUNNING);
	ctdb->monitor->next_interval = 2;
	ctdb_run_notification_script(ctdb, "startup");

	ctdb->monitor->monitoring_mode = CTDB_MONITORING_ACTIVE;

	tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
			 timeval_current_ofs(ctdb->monitor->next_interval, 0),
			 ctdb_check_health, ctdb);
}
Ejemplo n.º 3
0
/*
  called when a health monitoring event script finishes
 */
static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
{
	struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
	TDB_DATA data;
	struct ctdb_node_flag_change c;
	uint32_t next_interval;
	int ret;
	TDB_DATA rddata;
	struct ctdb_srvid_message rd;
	const char *state_str = NULL;

	c.pnn = ctdb->pnn;
	c.old_flags = node->flags;

	rd.pnn   = ctdb->pnn;
	rd.srvid = CTDB_SRVID_TAKEOVER_RUN_RESPONSE;

	rddata.dptr = (uint8_t *)&rd;
	rddata.dsize = sizeof(rd);

	if (status == -ECANCELED) {
		DEBUG(DEBUG_ERR,("Monitoring event was cancelled\n"));
		goto after_change_status;
	}

	if (status == -ETIME) {
		ctdb->monitor->event_script_timeouts++;

		if (ctdb->monitor->event_script_timeouts >=
		    ctdb->tunable.monitor_timeout_count) {
			DEBUG(DEBUG_ERR,
			      ("Maximum monitor timeout count %u reached."
			       " Making node unhealthy\n",
			       ctdb->tunable.monitor_timeout_count));
		} else {
			/* We pretend this is OK. */
			goto after_change_status;
		}
	} else {
		ctdb->monitor->event_script_timeouts = 0;
	}

	if (status != 0 && !(node->flags & NODE_FLAGS_UNHEALTHY)) {
		DEBUG(DEBUG_NOTICE,("monitor event failed - disabling node\n"));
		node->flags |= NODE_FLAGS_UNHEALTHY;
		ctdb->monitor->next_interval = 5;

		ctdb_run_notification_script(ctdb, "unhealthy");
	} else if (status == 0 && (node->flags & NODE_FLAGS_UNHEALTHY)) {
		DEBUG(DEBUG_NOTICE,("monitor event OK - node re-enabled\n"));
		node->flags &= ~NODE_FLAGS_UNHEALTHY;
		ctdb->monitor->next_interval = 5;

		ctdb_run_notification_script(ctdb, "healthy");
	}

after_change_status:
	next_interval = ctdb->monitor->next_interval;

	ctdb->monitor->next_interval *= 2;
	if (ctdb->monitor->next_interval > ctdb->tunable.monitor_interval) {
		ctdb->monitor->next_interval = ctdb->tunable.monitor_interval;
	}

	tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
			 timeval_current_ofs(next_interval, 0),
			 ctdb_check_health, ctdb);

	if (c.old_flags == node->flags) {
		return;
	}

	c.new_flags = node->flags;

	data.dptr = (uint8_t *)&c;
	data.dsize = sizeof(c);

	/* ask the recovery daemon to push these changes out to all nodes */
	ctdb_daemon_send_message(ctdb, ctdb->pnn,
				 CTDB_SRVID_PUSH_NODE_FLAGS, data);

	if (c.new_flags & NODE_FLAGS_UNHEALTHY) {
		state_str = "UNHEALTHY";
	} else {
		state_str = "HEALTHY";
	}

	/* ask the recmaster to reallocate all addresses */
	DEBUG(DEBUG_ERR,
	      ("Node became %s. Ask recovery master to reallocate IPs\n",
	       state_str));
	ret = ctdb_daemon_send_message(ctdb, CTDB_BROADCAST_CONNECTED, CTDB_SRVID_TAKEOVER_RUN, rddata);
	if (ret != 0) {
		DEBUG(DEBUG_ERR,
		      (__location__
		       " Failed to send IP takeover run request\n"));
	}
}