Beispiel #1
0
/*
  wait until we have finished initial recoveries before we start the
  monitoring events
 */
static void ctdb_wait_until_recovered(struct tevent_context *ev,
				      struct tevent_timer *te,
				      struct timeval t, void *private_data)
{
	struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
	int ret;
	static int count = 0;

	count++;

	if (count < 60 || count%600 == 0) { 
		DEBUG(DEBUG_NOTICE,("CTDB_WAIT_UNTIL_RECOVERED\n"));
		if (ctdb->nodes[ctdb->pnn]->flags & NODE_FLAGS_STOPPED) {
			DEBUG(DEBUG_NOTICE,("Node is STOPPED. Node will NOT recover.\n"));
		}
	}

	if (ctdb->vnn_map->generation == INVALID_GENERATION) {
		ctdb->db_persistent_startup_generation = INVALID_GENERATION;

		tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
				 timeval_current_ofs(1, 0),
				 ctdb_wait_until_recovered, ctdb);
		return;
	}

	if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
		ctdb->db_persistent_startup_generation = INVALID_GENERATION;

		DEBUG(DEBUG_NOTICE,(__location__ " in recovery. Wait one more second\n"));
		tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
				 timeval_current_ofs(1, 0),
				 ctdb_wait_until_recovered, ctdb);
		return;
	}


	if (!fast_start && timeval_elapsed(&ctdb->last_recovery_finished) < (ctdb->tunable.rerecovery_timeout + 3)) {
		ctdb->db_persistent_startup_generation = INVALID_GENERATION;

		DEBUG(DEBUG_NOTICE,(__location__ " wait for pending recoveries to end. Wait one more second.\n"));

		tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
				 timeval_current_ofs(1, 0),
				 ctdb_wait_until_recovered, ctdb);
		return;
	}

	if (ctdb->vnn_map->generation == ctdb->db_persistent_startup_generation) {
		DEBUG(DEBUG_INFO,(__location__ " skip ctdb_recheck_persistent_health() "
				  "until the next recovery\n"));
		tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
				 timeval_current_ofs(1, 0),
				 ctdb_wait_until_recovered, ctdb);
		return;
	}

	ctdb->db_persistent_startup_generation = ctdb->vnn_map->generation;
	ret = ctdb_recheck_persistent_health(ctdb);
	if (ret != 0) {
		ctdb->db_persistent_check_errors++;
		if (ctdb->db_persistent_check_errors < ctdb->max_persistent_check_errors) {
			DEBUG(ctdb->db_persistent_check_errors==1?DEBUG_ERR:DEBUG_WARNING,
			      (__location__ "ctdb_recheck_persistent_health() "
			      "failed (%llu of %llu times) - retry later\n",
			      (unsigned long long)ctdb->db_persistent_check_errors,
			      (unsigned long long)ctdb->max_persistent_check_errors));
			tevent_add_timer(ctdb->ev,
					 ctdb->monitor->monitor_context,
					 timeval_current_ofs(1, 0),
					 ctdb_wait_until_recovered, ctdb);
			return;
		}
		DEBUG(DEBUG_ALERT,(__location__
				  "ctdb_recheck_persistent_health() failed (%llu times) - prepare shutdown\n",
				  (unsigned long long)ctdb->db_persistent_check_errors));
		ctdb_shutdown_sequence(ctdb, 11);
		/* In case above returns due to duplicate shutdown */
		return;
	}
	ctdb->db_persistent_check_errors = 0;

	tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
			 timeval_current(), ctdb_run_startup, ctdb);
}
Beispiel #2
0
/*
  wait until we have finished initial recoveries before we start the
  monitoring events
 */
static void ctdb_wait_until_recovered(struct event_context *ev, struct timed_event *te, 
			      struct timeval t, void *private_data)
{
	struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
	int ret;

	DEBUG(DEBUG_NOTICE,("CTDB_WAIT_UNTIL_RECOVERED\n"));

	if (ctdb->vnn_map->generation == INVALID_GENERATION) {
		ctdb->db_persistent_startup_generation = INVALID_GENERATION;

		DEBUG(DEBUG_NOTICE,(__location__ " generation is INVALID. Wait one more second\n"));
		event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
				     timeval_current_ofs(1, 0), 
				     ctdb_wait_until_recovered, ctdb);
		return;
	}

	if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
		ctdb->db_persistent_startup_generation = INVALID_GENERATION;

		DEBUG(DEBUG_NOTICE,(__location__ " in recovery. Wait one more second\n"));
		event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
				     timeval_current_ofs(1, 0), 
				     ctdb_wait_until_recovered, ctdb);
		return;
	}


	if (timeval_elapsed(&ctdb->last_recovery_finished) < (ctdb->tunable.rerecovery_timeout + 3)) {
		ctdb->db_persistent_startup_generation = INVALID_GENERATION;

		DEBUG(DEBUG_NOTICE,(__location__ " wait for pending recoveries to end. Wait one more second.\n"));

		event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
				     timeval_current_ofs(1, 0), 
				     ctdb_wait_until_recovered, ctdb);
		return;
	}

	if (ctdb->vnn_map->generation == ctdb->db_persistent_startup_generation) {
		DEBUG(DEBUG_INFO,(__location__ " skip ctdb_recheck_persistent_health() "
				  "until the next recovery\n"));
		event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
				     timeval_current_ofs(1, 0),
				     ctdb_wait_until_recovered, ctdb);
		return;
	}

	ctdb->db_persistent_startup_generation = ctdb->vnn_map->generation;
	ret = ctdb_recheck_persistent_health(ctdb);
	if (ret != 0) {
		ctdb->db_persistent_check_errors++;
		if (ctdb->db_persistent_check_errors < ctdb->max_persistent_check_errors) {
			DEBUG(ctdb->db_persistent_check_errors==1?DEBUG_ERR:DEBUG_WARNING,
			      (__location__ "ctdb_recheck_persistent_health() "
			      "failed (%llu of %llu times) - retry later\n",
			      (unsigned long long)ctdb->db_persistent_check_errors,
			      (unsigned long long)ctdb->max_persistent_check_errors));
			event_add_timed(ctdb->ev,
					ctdb->monitor->monitor_context,
					timeval_current_ofs(1, 0),
					ctdb_wait_until_recovered, ctdb);
			return;
		}
		DEBUG(DEBUG_ALERT,(__location__
				  "ctdb_recheck_persistent_health() failed (%llu times) - prepare shutdown\n",
				  (unsigned long long)ctdb->db_persistent_check_errors));
		ctdb_stop_recoverd(ctdb);
		ctdb_stop_keepalive(ctdb);
		ctdb_stop_monitoring(ctdb);
		ctdb_release_all_ips(ctdb);
		if (ctdb->methods != NULL) {
			ctdb->methods->shutdown(ctdb);
		}
		ctdb_event_script(ctdb, CTDB_EVENT_SHUTDOWN);
		DEBUG(DEBUG_ALERT,("ctdb_recheck_persistent_health() failed - Stopping CTDB daemon\n"));
		exit(11);
	}
	ctdb->db_persistent_check_errors = 0;
	DEBUG(DEBUG_NOTICE,(__location__
			   "ctdb_start_monitoring: ctdb_recheck_persistent_health() OK\n"));

	DEBUG(DEBUG_NOTICE,(__location__ " Recoveries finished. Running the \"startup\" event.\n"));
	event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
			     timeval_current_ofs(1, 0), 
			     ctdb_check_health, ctdb);
}