Example #1
0
/*
 * Establish persistent connection to backend
 */
static void establish_persistent_connection(void)
{
	int i;
	BackendInfo *bkinfo;
	POOL_CONNECTION_POOL_SLOT *s;

	for (i=0;i<NUM_BACKENDS;i++)
	{
		if (!VALID_BACKEND(i))
			continue;

		if (slots[i] == NULL)
		{
			bkinfo = pool_get_node_info(i);
			s = make_persistent_db_connection(bkinfo->backend_hostname, 
											  bkinfo->backend_port,
											  "postgres",
											  pool_config->sr_check_user,
											  pool_config->sr_check_password, true);
			if (s)
				slots[i] = s;
			else
				slots[i] = NULL;
		}
	}
}
Example #2
0
static void
inform_node_info(PCP_CONNECTION *frontend,char *buf)
{
	int node_id;
	int wsize;
	char port_str[6];
	char status[2];
	char weight_str[20];
	char code[] = "CommandComplete";
	BackendInfo *bi = NULL;

	node_id = atoi(buf);

	bi = pool_get_node_info(node_id);

	if (bi == NULL)
		ereport(ERROR,
				(errmsg("informing node info failed"),
				 errdetail("invalid node ID")));
	
	ereport(DEBUG2,
			(errmsg("PCP: informing node info"),
			 errdetail("retrieved node information from shared memory")));
	
	snprintf(port_str, sizeof(port_str), "%d", bi->backend_port);
	snprintf(status, sizeof(status), "%d", bi->backend_status);
	snprintf(weight_str, sizeof(weight_str), "%f", bi->backend_weight);
	
	pcp_write(frontend, "i", 1);
	wsize = htonl(sizeof(code) +
				  strlen(bi->backend_hostname)+1 +
				  strlen(port_str)+1 +
				  strlen(status)+1 +
				  strlen(weight_str)+1 +
				  sizeof(int));
	pcp_write(frontend, &wsize, sizeof(int));
	pcp_write(frontend, code, sizeof(code));
	pcp_write(frontend, bi->backend_hostname, strlen(bi->backend_hostname)+1);
	pcp_write(frontend, port_str, strlen(port_str)+1);
	pcp_write(frontend, status, strlen(status)+1);
	pcp_write(frontend, weight_str, strlen(weight_str)+1);
	do_pcp_flush(frontend);
}
Example #3
0
POOL_REPORT_NODES* get_nodes(int *nrows)
{
	int i;
	POOL_REPORT_NODES* nodes = malloc(NUM_BACKENDS * sizeof(POOL_REPORT_NODES));
	BackendInfo *bi = NULL;

    for (i = 0; i < NUM_BACKENDS; i++)
	{
	    bi = pool_get_node_info(i);

            snprintf(nodes[i].node_id, 	POOLCONFIG_MAXSTATLEN, 	"%d", 	i);
	    strncpy(nodes[i].hostname, 	bi->backend_hostname, 		strlen(bi->backend_hostname)+1);
	    snprintf(nodes[i].port, 	POOLCONFIG_MAXIDENTLEN, "%d", 	bi->backend_port);
	    snprintf(nodes[i].status, 	POOLCONFIG_MAXSTATLEN, 	"%d", 	bi->backend_status);
	    snprintf(nodes[i].lb_weight, POOLCONFIG_MAXWEIGHTLEN, "%f", bi->backend_weight/RAND_MAX);
	}

	*nrows = i;

	return nodes;
	}
Example #4
0
/*
 * Decide where to send queries(thus expecting response)
 */
void pool_where_to_send(POOL_QUERY_CONTEXT *query_context, char *query, Node *node)
{
	POOL_SESSION_CONTEXT *session_context;
	POOL_CONNECTION_POOL *backend;
	int i;

	if (!query_context)
	{
		pool_error("pool_where_to_send: no query context");
		return;
	}

	session_context = pool_get_session_context();
	backend = session_context->backend;

	/*
	 * Zap out DB node map
	 */
	pool_clear_node_to_be_sent(query_context);

	/*
	 * If there is "NO LOAD BALANCE" comment, we send only to master node.
	 */
	if (!strncasecmp(query, NO_LOAD_BALANCE, NO_LOAD_BALANCE_COMMENT_SZ))
	{
		pool_set_node_to_be_sent(query_context,
								 MASTER_SLAVE ? PRIMARY_NODE_ID : REAL_MASTER_NODE_ID);
		for (i=0;i<NUM_BACKENDS;i++)
		{
			if (query_context->where_to_send[i])
			{
				query_context->virtual_master_node_id = i;
				break;
			}
		}
		return;
	}

	/*
	 * In raw mode, we send only to master node. Simple enough.
	 */
	if (RAW_MODE)
	{
		pool_set_node_to_be_sent(query_context, REAL_MASTER_NODE_ID);
	}
	else if (MASTER_SLAVE && query_context->is_multi_statement)
	{
		/*
		 * If we are in master/slave mode and we have multi stametemt
		 * query, we should send it to primary server only. Otherwise
		 * it is possible to send a write query to standby servers
		 * because we only use the first element of the multi
		 * statement query and don't care about the rest.  Typical
		 * situation where we are bugged by this is, "BEGIN;DELETE
		 * FROM table;END". Note that from pgpool-II 3.1.0
		 * transactional statements such as "BEGIN" is unconditionaly
		 * sent to all nodes(see send_to_where() for more details).
		 * Someday we might be able to understand all part of multi
		 * statement queries, but until that day we need this band
		 * aid.
		 */
		if (query_context->is_multi_statement)
		{
			pool_set_node_to_be_sent(query_context, PRIMARY_NODE_ID);
		}
	}
	else if (MASTER_SLAVE)
	{
		POOL_DEST dest;
		POOL_MEMORY_POOL *old_context;

		old_context = pool_memory_context_switch_to(query_context->memory_context);
		dest = send_to_where(node, query);
		pool_memory_context_switch_to(old_context);

		pool_debug("send_to_where: %d query: %s", dest, query);

		/* Should be sent to primary only? */
		if (dest == POOL_PRIMARY)
		{
			pool_set_node_to_be_sent(query_context, PRIMARY_NODE_ID);
		}
		/* Should be sent to both primary and standby? */
		else if (dest == POOL_BOTH)
		{
			pool_setall_node_to_be_sent(query_context);
		}

		/*
		 * Ok, we might be able to load balance the SELECT query.
		 */
		else
		{
			if (pool_config->load_balance_mode &&
				is_select_query(node, query) &&
				MAJOR(backend) == PROTO_MAJOR_V3)
			{
				/* 
				 * If (we are outside of an explicit transaction) OR
				 * (the transaction has not issued a write query yet, AND
				 *	transaction isolation level is not SERIALIZABLE)
				 * we might be able to load balance.
				 */
				if (TSTATE(backend, PRIMARY_NODE_ID) == 'I' ||
					(!pool_is_writing_transaction() &&
					 !pool_is_failed_transaction() &&
					 pool_get_transaction_isolation() != POOL_SERIALIZABLE))
				{
					BackendInfo *bkinfo = pool_get_node_info(session_context->load_balance_node_id);

					/*
					 * Load balance if possible
					 */

					/*
					 * If replication delay is too much, we prefer to send to the primary.
					 */
					if (!strcmp(pool_config->master_slave_sub_mode, MODE_STREAMREP) &&
						pool_config->delay_threshold &&
						bkinfo->standby_delay > pool_config->delay_threshold)
					{
						pool_set_node_to_be_sent(query_context, PRIMARY_NODE_ID);
					}

					/*
					 * If a writing function call is used, 
					 * we prefer to send to the primary.
					 */
					else if (pool_has_function_call(node))
					{
						pool_set_node_to_be_sent(query_context, PRIMARY_NODE_ID);
					}

					/*
					 * If system catalog is used in the SELECT, we
					 * prefer to send to the primary. Example: SELECT
					 * * FROM pg_class WHERE relname = 't1'; Because
					 * 't1' is a constant, it's hard to recognize as
					 * table name.  Most use case such query is
					 * against system catalog, and the table name can
					 * be a temporary table, it's best to query
					 * against primary system catalog.
					 * Please note that this test must be done *before*
					 * test using pool_has_temp_table.
					 */
					else if (pool_has_system_catalog(node))
					{
						pool_set_node_to_be_sent(query_context, PRIMARY_NODE_ID);
					}

					/*
					 * If temporary table is used in the SELECT,
					 * we prefer to send to the primary.
					 */
					else if (pool_config->check_temp_table && pool_has_temp_table(node))
					{
						pool_set_node_to_be_sent(query_context, PRIMARY_NODE_ID);
					}

					/*
					 * If unlogged table is used in the SELECT,
					 * we prefer to send to the primary.
					 */
					else if (pool_has_unlogged_table(node))
					{
						pool_set_node_to_be_sent(query_context, PRIMARY_NODE_ID);
					}

					else
					{
						pool_set_node_to_be_sent(query_context,
												 session_context->load_balance_node_id);
					}
				}
				else
				{
					/* Send to the primary only */
					pool_set_node_to_be_sent(query_context, PRIMARY_NODE_ID);
				}
			}
			else
			{
				/* Send to the primary only */
				pool_set_node_to_be_sent(query_context, PRIMARY_NODE_ID);
			}
		}
	}
	else if (REPLICATION || PARALLEL_MODE)
	{
		if (is_select_query(node, query))
		{
			/*
			 * If a writing function call is used or replicate_select is true,
			 * we prefer to send to all nodes.
			 */
			if ((pool_has_function_call(node) || pool_config->replicate_select))
			{
				pool_setall_node_to_be_sent(query_context);
			}
			else if (pool_config->load_balance_mode &&
					 MAJOR(backend) == PROTO_MAJOR_V3 &&
					 TSTATE(backend, MASTER_NODE_ID) == 'I')
			{
				/* load balance */
				pool_set_node_to_be_sent(query_context,
										 session_context->load_balance_node_id);
			}
			else
			{
				/* only send to master node */
				pool_set_node_to_be_sent(query_context, REAL_MASTER_NODE_ID);
			}
		}
		else if (IsA(node, DeclareCursorStmt) || IsA(node, ClosePortalStmt) || IsA(node, FetchStmt))
		{
			if (query_context->loadbalance_cursor)
			{
				if (pool_config->load_balance_mode &&
						 MAJOR(backend) == PROTO_MAJOR_V3 &&
					 TSTATE(backend, MASTER_NODE_ID) == 'I')
				{
					/* load balance */
					pool_set_node_to_be_sent(query_context,
											 session_context->load_balance_node_id);
				}
				else
				{
					/* only send to master node */
					pool_set_node_to_be_sent(query_context, REAL_MASTER_NODE_ID);
				}
			}
			else
			{
				/* send to all nodes */
				pool_setall_node_to_be_sent(query_context);
			}
		}
		else
		{
			/* send to all nodes */
			pool_setall_node_to_be_sent(query_context);
		}
	}
	else
	{
		pool_error("pool_where_to_send: unknown mode");
		return;
	}

	/*
	 * EXECUTE?
	 */
	if (IsA(node, ExecuteStmt))
	{
		POOL_SENT_MESSAGE *msg;

		msg = pool_get_sent_message('Q', ((ExecuteStmt *)node)->name);
		if (!msg)
			msg = pool_get_sent_message('P', ((ExecuteStmt *)node)->name);
		if (msg)
			pool_copy_prep_where(msg->query_context->where_to_send,
								 query_context->where_to_send);
	}

	/*
	 * DEALLOCATE?
	 */
	else if (IsA(node, DeallocateStmt))
	{
		where_to_send_deallocate(query_context, node);
	}

	for (i=0;i<NUM_BACKENDS;i++)
	{
		if (query_context->where_to_send[i])
		{
			query_context->virtual_master_node_id = i;
			break;
		}
	}

	return;
}
Example #5
0
/*
 * trigger_failover_command: execute specified command at failover.
 *                           command_line is null-terminated string.
 */
static int trigger_failover_command(int node, const char *command_line)
{
	int r = 0;
	String *exec_cmd;
	char port_buf[6];
	char buf[2];
	BackendInfo *info;

	if (command_line == NULL || (strlen(command_line) == 0))
		return 0;

	/* check nodeID */
	if (node < 0 || node > NUM_BACKENDS)
		return -1;

	info = pool_get_node_info(node);
	if (!info)
		return -1;

	buf[1] = '\0';
	pool_memory = pool_memory_create(PREPARE_BLOCK_SIZE);
	if (!pool_memory)
	{
		pool_error("trigger_failover_command: pool_memory_create() failed");
		return -1;
	}
	exec_cmd = init_string("");

	while (*command_line)
	{
		if (*command_line == '%')
		{
			if (*(command_line + 1))
			{
				char val = *(command_line + 1);
				switch (val)
				{
					case 'p': /* port */
						snprintf(port_buf, sizeof(port_buf), "%d", info->backend_port);
						string_append_char(exec_cmd, port_buf);
						break;

					case 'D': /* database directory */
						string_append_char(exec_cmd, info->backend_data_directory);
						break;

					case 'd': /* node id */
						snprintf(port_buf, sizeof(port_buf), "%d", node);
						string_append_char(exec_cmd, port_buf);
						break;

					case 'h': /* host name */
						string_append_char(exec_cmd, info->backend_hostname);
						break;

					case 'm': /* new master node id */
						snprintf(port_buf, sizeof(port_buf), "%d", get_next_master_node());
						string_append_char(exec_cmd, port_buf);
						break;

					case 'M': /* old master node id */
						snprintf(port_buf, sizeof(port_buf), "%d", MASTER_NODE_ID);
						string_append_char(exec_cmd, port_buf);
						break;

					case '%': /* escape */
						string_append_char(exec_cmd, "%");
						break;

					default: /* ignore */
						break;
				}
				command_line++;
			}
		} else {
			buf[0] = *command_line;
			string_append_char(exec_cmd, buf);
		}
		command_line++;
	}

	if (strlen(exec_cmd->data) != 0)
	{
		pool_log("execute command: %s", exec_cmd->data);
		r = system(exec_cmd->data);
	}

	pool_memory_delete(pool_memory, 0);
	pool_memory = NULL;

	return r;
}
Example #6
0
/*
 * Check replicaton time lag
 */
static void check_replication_time_lag(void)
{
	int i;
	int active_nodes = 0;
	POOL_STATUS sts;
	POOL_SELECT_RESULT *res;
	unsigned long long int lsn[MAX_NUM_BACKENDS];
	char *query;
	BackendInfo *bkinfo;
	unsigned long long int lag;

	if (NUM_BACKENDS <= 1)
	{
		/* If there's only one node, there's no point to do checking */
		return;
	}

	/* Count healthy nodes */
	for (i=0;i<NUM_BACKENDS;i++)
	{
		if (VALID_BACKEND(i))
			active_nodes++;
	}

	if (active_nodes <= 1)
	{
		/* If there's only one or less active node, there's no point
		 * to do checking */
		return;
	}

	for (i=0;i<NUM_BACKENDS;i++)
	{
		if (!VALID_BACKEND(i))
			continue;

		if (!slots[i])
		{
			pool_debug("check_replication_time_lag: DB node is valid but no persistent connection");
			pool_error("check_replication_time_lag: could not connect to DB node %d, check sr_check_user and sr_check_password", i);

			return;
		}

		if (PRIMARY_NODE_ID == i)
		{
			query = "SELECT pg_current_xlog_location()";
		}
		else
		{
			query = "SELECT pg_last_xlog_replay_location()";
		}

		sts = do_query(slots[i]->con, query, &res, PROTO_MAJOR_V3);
		if (sts != POOL_CONTINUE)
		{
			pool_error("check_replication_time_lag: %s failed", query);
			return;
		}
		if (!res)
		{
			pool_error("check_replication_time_lag: %s result is null", query);
			return;
		}
		if (res->numrows <= 0)
		{
			pool_error("check_replication_time_lag: %s returns no rows", query);
			free_select_result(res);
			return;
		}
		if (res->data[0] == NULL)
		{
			pool_error("check_replication_time_lag: %s returns no data", query);
			free_select_result(res);
			return;
		}

		if (res->nullflags[0] == -1)
		{
			pool_log("check_replication_time_lag: %s returns NULL", query);
			free_select_result(res);
			lsn[i] = 0;
		}
		else
		{
			lsn[i] = text_to_lsn(res->data[0]);
			free_select_result(res);
		}
	}

	for (i=0;i<NUM_BACKENDS;i++)
	{
		if (!VALID_BACKEND(i))
			continue;

		/* Set standby delay value */
		bkinfo = pool_get_node_info(i);
		lag = (lsn[PRIMARY_NODE_ID] > lsn[i]) ? lsn[PRIMARY_NODE_ID] - lsn[i] : 0;

		if (PRIMARY_NODE_ID == i)
		{
			bkinfo->standby_delay = 0;
		}
		else
		{
			bkinfo->standby_delay = lag;

			/* Log delay if necessary */
			if ((!strcmp(pool_config->log_standby_delay, "always") && lag > 0) ||
				(pool_config->delay_threshold &&
				 !strcmp(pool_config->log_standby_delay, "if_over_threshold") &&
				 lag > pool_config->delay_threshold))
			{
				pool_log("Replication of node:%d is behind %llu bytes from the primary server (node:%d)",
				         i, lsn[PRIMARY_NODE_ID] - lsn[i], PRIMARY_NODE_ID);
			}
		}
	}
}