Пример #1
0
/*
  this is called when the ctdb daemon received a ctdb request message
  from a local client over the unix domain socket
 */
static void daemon_request_message_from_client(struct ctdb_client *client, 
					       struct ctdb_req_message *c)
{
	TDB_DATA data;
	int res;

	if (c->hdr.destnode == CTDB_CURRENT_NODE) {
		c->hdr.destnode = ctdb_get_pnn(client->ctdb);
	}

	/* maybe the message is for another client on this node */
	if (ctdb_get_pnn(client->ctdb)==c->hdr.destnode) {
		ctdb_request_message(client->ctdb, (struct ctdb_req_header *)c);
		return;
	}

	/* its for a remote node */
	data.dptr = &c->data[0];
	data.dsize = c->datalen;
	res = ctdb_daemon_send_message(client->ctdb, c->hdr.destnode,
				       c->srvid, data);
	if (res != 0) {
		DEBUG(DEBUG_ERR,(__location__ " Failed to send message to remote node %u\n",
			 c->hdr.destnode));
	}
}
Пример #2
0
/*
  modify flags on a node
 */
int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata)
{
	struct ctdb_node_flag_change *c = (struct ctdb_node_flag_change *)indata.dptr;
	struct ctdb_node *node;
	uint32_t old_flags;

	if (c->pnn >= ctdb->num_nodes) {
		DEBUG(DEBUG_ERR,(__location__ " Node %d is invalid, num_nodes :%d\n", c->pnn, ctdb->num_nodes));
		return -1;
	}

	node         = ctdb->nodes[c->pnn];
	old_flags    = node->flags;
	if (c->pnn != ctdb->pnn) {
		c->old_flags  = node->flags;
	}
	node->flags   = c->new_flags & ~NODE_FLAGS_DISCONNECTED;
	node->flags  |= (c->old_flags & NODE_FLAGS_DISCONNECTED);

	/* we don't let other nodes modify our STOPPED status */
	if (c->pnn == ctdb->pnn) {
		node->flags &= ~NODE_FLAGS_STOPPED;
		if (old_flags & NODE_FLAGS_STOPPED) {
			node->flags |= NODE_FLAGS_STOPPED;
		}
	}

	/* we don't let other nodes modify our BANNED status */
	if (c->pnn == ctdb->pnn) {
		node->flags &= ~NODE_FLAGS_BANNED;
		if (old_flags & NODE_FLAGS_BANNED) {
			node->flags |= NODE_FLAGS_BANNED;
		}
	}

	if (node->flags == c->old_flags) {
		DEBUG(DEBUG_INFO, ("Control modflags on node %u - Unchanged - flags 0x%x\n", c->pnn, node->flags));
		return 0;
	}

	DEBUG(DEBUG_INFO, ("Control modflags on node %u - flags now 0x%x\n", c->pnn, node->flags));

	if (node->flags == 0 && ctdb->runstate <= CTDB_RUNSTATE_STARTUP) {
		DEBUG(DEBUG_ERR, (__location__ " Node %u became healthy - force recovery for startup\n",
				  c->pnn));
		ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
	}

	/* tell the recovery daemon something has changed */
	c->new_flags = node->flags;
	ctdb_daemon_send_message(ctdb, ctdb->pnn,
				 CTDB_SRVID_SET_NODE_FLAGS, indata);

	/* if we have become banned, we should go into recovery mode */
	if ((node->flags & NODE_FLAGS_BANNED) && !(c->old_flags & NODE_FLAGS_BANNED) && (node->pnn == ctdb->pnn)) {
		ctdb_local_node_got_banned(ctdb);
	}
	
	return 0;
}
Пример #3
0
/*
  called when a health monitoring event script finishes
 */
static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
{
	struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
	TDB_DATA data;
	struct ctdb_node_flag_change c;
	uint32_t next_interval;
	int ret;
	TDB_DATA rddata;
	struct ctdb_srvid_message rd;
	const char *state_str = NULL;

	c.pnn = ctdb->pnn;
	c.old_flags = node->flags;

	rd.pnn   = ctdb->pnn;
	rd.srvid = CTDB_SRVID_TAKEOVER_RUN_RESPONSE;

	rddata.dptr = (uint8_t *)&rd;
	rddata.dsize = sizeof(rd);

	if (status == -ECANCELED) {
		DEBUG(DEBUG_ERR,("Monitoring event was cancelled\n"));
		goto after_change_status;
	}

	if (status == -ETIME) {
		ctdb->monitor->event_script_timeouts++;

		if (ctdb->monitor->event_script_timeouts >=
		    ctdb->tunable.monitor_timeout_count) {
			DEBUG(DEBUG_ERR,
			      ("Maximum monitor timeout count %u reached."
			       " Making node unhealthy\n",
			       ctdb->tunable.monitor_timeout_count));
		} else {
			/* We pretend this is OK. */
			goto after_change_status;
		}
	} else {
		ctdb->monitor->event_script_timeouts = 0;
	}

	if (status != 0 && !(node->flags & NODE_FLAGS_UNHEALTHY)) {
		DEBUG(DEBUG_NOTICE,("monitor event failed - disabling node\n"));
		node->flags |= NODE_FLAGS_UNHEALTHY;
		ctdb->monitor->next_interval = 5;

		ctdb_run_notification_script(ctdb, "unhealthy");
	} else if (status == 0 && (node->flags & NODE_FLAGS_UNHEALTHY)) {
		DEBUG(DEBUG_NOTICE,("monitor event OK - node re-enabled\n"));
		node->flags &= ~NODE_FLAGS_UNHEALTHY;
		ctdb->monitor->next_interval = 5;

		ctdb_run_notification_script(ctdb, "healthy");
	}

after_change_status:
	next_interval = ctdb->monitor->next_interval;

	ctdb->monitor->next_interval *= 2;
	if (ctdb->monitor->next_interval > ctdb->tunable.monitor_interval) {
		ctdb->monitor->next_interval = ctdb->tunable.monitor_interval;
	}

	tevent_add_timer(ctdb->ev, ctdb->monitor->monitor_context,
			 timeval_current_ofs(next_interval, 0),
			 ctdb_check_health, ctdb);

	if (c.old_flags == node->flags) {
		return;
	}

	c.new_flags = node->flags;

	data.dptr = (uint8_t *)&c;
	data.dsize = sizeof(c);

	/* ask the recovery daemon to push these changes out to all nodes */
	ctdb_daemon_send_message(ctdb, ctdb->pnn,
				 CTDB_SRVID_PUSH_NODE_FLAGS, data);

	if (c.new_flags & NODE_FLAGS_UNHEALTHY) {
		state_str = "UNHEALTHY";
	} else {
		state_str = "HEALTHY";
	}

	/* ask the recmaster to reallocate all addresses */
	DEBUG(DEBUG_ERR,
	      ("Node became %s. Ask recovery master to reallocate IPs\n",
	       state_str));
	ret = ctdb_daemon_send_message(ctdb, CTDB_BROADCAST_CONNECTED, CTDB_SRVID_TAKEOVER_RUN, rddata);
	if (ret != 0) {
		DEBUG(DEBUG_ERR,
		      (__location__
		       " Failed to send IP takeover run request\n"));
	}
}
Пример #4
0
/*
  modify flags on a node
 */
int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata)
{
	struct ctdb_node_flag_change *c = (struct ctdb_node_flag_change *)indata.dptr;
	struct ctdb_node *node;
	uint32_t old_flags;
	int i;

	if (c->pnn >= ctdb->num_nodes) {
		DEBUG(DEBUG_ERR,(__location__ " Node %d is invalid, num_nodes :%d\n", c->pnn, ctdb->num_nodes));
		return -1;
	}

	node         = ctdb->nodes[c->pnn];
	old_flags    = node->flags;
	if (c->pnn != ctdb->pnn) {
		c->old_flags  = node->flags;
	}
	node->flags   = c->new_flags & ~NODE_FLAGS_DISCONNECTED;
	node->flags  |= (c->old_flags & NODE_FLAGS_DISCONNECTED);

	/* we dont let other nodes modify our STOPPED status */
	if (c->pnn == ctdb->pnn) {
		node->flags &= ~NODE_FLAGS_STOPPED;
		if (old_flags & NODE_FLAGS_STOPPED) {
			node->flags |= NODE_FLAGS_STOPPED;
		}
	}

	/* we dont let other nodes modify our BANNED status */
	if (c->pnn == ctdb->pnn) {
		node->flags &= ~NODE_FLAGS_BANNED;
		if (old_flags & NODE_FLAGS_BANNED) {
			node->flags |= NODE_FLAGS_BANNED;
		}
	}

	if (node->flags == c->old_flags) {
		DEBUG(DEBUG_INFO, ("Control modflags on node %u - Unchanged - flags 0x%x\n", c->pnn, node->flags));
		return 0;
	}

	DEBUG(DEBUG_INFO, ("Control modflags on node %u - flags now 0x%x\n", c->pnn, node->flags));

	if (node->flags == 0 && !ctdb->done_startup) {
		DEBUG(DEBUG_ERR, (__location__ " Node %u became healthy - force recovery for startup\n",
				  c->pnn));
		ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
	}

	/* tell the recovery daemon something has changed */
	ctdb_daemon_send_message(ctdb, ctdb->pnn,
				 CTDB_SRVID_SET_NODE_FLAGS, indata);

	/* if we have become banned, we should go into recovery mode */
	if ((node->flags & NODE_FLAGS_BANNED) && !(c->old_flags & NODE_FLAGS_BANNED) && (node->pnn == ctdb->pnn)) {
		/* make sure we are frozen */
		DEBUG(DEBUG_NOTICE,("This node has been banned - forcing freeze and recovery\n"));
		/* Reset the generation id to 1 to make us ignore any
		   REQ/REPLY CALL/DMASTER someone sends to us.
		   We are now banned so we shouldnt service database calls
		   anymore.
		*/
		ctdb->vnn_map->generation = INVALID_GENERATION;

		for (i=1; i<=NUM_DB_PRIORITIES; i++) {
			if (ctdb_start_freeze(ctdb, i) != 0) {
				DEBUG(DEBUG_ERR,(__location__ " Failed to freeze db priority %u\n", i));
			}
		}
		ctdb_release_all_ips(ctdb);
		ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
	}
	
	return 0;
}