POOL_STATUS CompletedResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend) { int i; char *string = NULL; char *string1 = NULL; int len, len1 = 0; /* read command tag */ string = pool_read_string(MASTER(backend), &len, 0); if (string == NULL) return POOL_END; else if (!strncmp(string, "BEGIN", 5)) TSTATE(backend, MASTER_NODE_ID) = 'T'; else if (!strncmp(string, "COMMIT", 6) || !strncmp(string, "ROLLBACK", 8)) TSTATE(backend, MASTER_NODE_ID) = 'I'; len1 = len; string1 = strdup(string); for (i=0;i<NUM_BACKENDS;i++) { if (!VALID_BACKEND(i) || IS_MASTER_NODE_ID(i)) continue; /* read command tag */ string = pool_read_string(CONNECTION(backend, i), &len, 0); if (string == NULL) return POOL_END; else if (!strncmp(string, "BEGIN", 5)) TSTATE(backend, i) = 'T'; else if (!strncmp(string, "COMMIT", 6) || !strncmp(string, "ROLLBACK", 8)) TSTATE(backend, i) = 'I'; if (len != len1) { pool_debug("CompletedResponse: message length does not match between master(%d \"%s\",) and %d th server (%d \"%s\",)", len, string, i, len1, string1); /* we except INSERT, because INSERT response has OID */ if (strncmp(string1, "INSERT", 6)) { free(string1); return POOL_END; } } } /* forward to the frontend */ pool_write(frontend, "C", 1); pool_debug("CompletedResponse: string: \"%s\"", string1); if (pool_write(frontend, string1, len1) < 0) { free(string1); return POOL_END; } free(string1); return pool_flush(frontend); }
/* -------------------------------- * Execute query cache look up * -------------------------------- */ POOL_STATUS pool_execute_query_cache_lookup(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend, Node *node) { SelectStmt *select = (SelectStmt *)node; POOL_STATUS status = POOL_END; /* cache not found */ if (! (select->intoClause || select->lockingClause)) { parsed_query = strdup(nodeToString(node)); if (parsed_query == NULL) { pool_error("pool_execute_query_cache_lookup: malloc failed"); return POOL_ERROR; } status = pool_query_cache_lookup(frontend, parsed_query, backend->info->database, TSTATE(backend, MASTER_NODE_ID)); if (status == POOL_CONTINUE) { free(parsed_query); parsed_query = NULL; free_parser(); } } return status; }
POOL_STATUS ErrorResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend) { char *string = NULL; int len; int i; POOL_STATUS ret = POOL_CONTINUE; for (i=0;i<NUM_BACKENDS;i++) { if (VALID_BACKEND(i)) { /* read error message */ string = pool_read_string(CONNECTION(backend, i), &len, 0); if (string == NULL) return POOL_END; } } /* forward to the frontend */ pool_write(frontend, "E", 1); if (pool_write_and_flush(frontend, string, len) < 0) return POOL_END; /* * check session context, because this function is called * by pool_do_auth too. */ if (pool_get_session_context()) ret = raise_intentional_error_if_need(backend); /* change transaction state */ for (i=0;i<NUM_BACKENDS;i++) { if (VALID_BACKEND(i)) { if (TSTATE(backend, i) == 'T') TSTATE(backend, i) = 'E'; } } return ret; }
/* * Send extended query and wait for response * send_type: * -1: do not send this node_id * 0: send to all nodes * >0: send to this node_id */ POOL_STATUS pool_extended_send_and_wait(POOL_QUERY_CONTEXT *query_context, char *kind, int len, char *contents, int send_type, int node_id) { POOL_SESSION_CONTEXT *session_context; POOL_CONNECTION *frontend; POOL_CONNECTION_POOL *backend; bool is_commit; bool is_begin_read_write; int i; int str_len; int rewritten_len; char *str; char *rewritten_begin; session_context = pool_get_session_context(); frontend = session_context->frontend; backend = session_context->backend; is_commit = is_commit_or_rollback_query(query_context->parse_tree); is_begin_read_write = false; str_len = 0; rewritten_len = 0; str = NULL; rewritten_begin = NULL; /* * If the query is BEGIN READ WRITE or * BEGIN ... SERIALIZABLE in master/slave mode, * we send BEGIN to slaves/standbys instead. * original_query which is BEGIN READ WRITE is sent to primary. * rewritten_query which is BEGIN is sent to standbys. */ if (pool_need_to_treat_as_if_default_transaction(query_context)) { is_begin_read_write = true; if (*kind == 'P') { rewritten_begin = remove_read_write(len, contents, &rewritten_len); if (rewritten_begin == NULL) return POOL_END; } } if (!rewritten_begin) { str_len = len; str = contents; } /* Send query */ for (i=0;i<NUM_BACKENDS;i++) { if (!VALID_BACKEND(i)) continue; else if (send_type < 0 && i == node_id) continue; else if (send_type > 0 && i != node_id) continue; /* * If in reset context, we send COMMIT/ABORT to nodes those * are not in I(idle) state. This will ensure that * transactions are closed. */ if (is_commit && session_context->reset_context && TSTATE(backend, i) == 'I') { pool_unset_node_to_be_sent(query_context, i); continue; } if (rewritten_begin) { if (REAL_PRIMARY_NODE_ID == i) { str = contents; str_len = len; } else { str = rewritten_begin; str_len = rewritten_len; } } if (pool_config->log_per_node_statement) { char msgbuf[QUERY_STRING_BUFFER_LEN]; char *stmt; if (*kind == 'P' || *kind == 'E') { if (query_context->rewritten_query) { if (is_begin_read_write) { if (REAL_PRIMARY_NODE_ID == i) stmt = query_context->original_query; else stmt = query_context->rewritten_query; } else { stmt = query_context->rewritten_query; } } else { stmt = query_context->original_query; } if (*kind == 'P') snprintf(msgbuf, sizeof(msgbuf), "Parse: %s", stmt); else snprintf(msgbuf, sizeof(msgbuf), "Execute: %s", stmt); } else { snprintf(msgbuf, sizeof(msgbuf), "%c message", *kind); } per_node_statement_log(backend, i, msgbuf); } if (send_extended_protocol_message(backend, i, kind, str_len, str) != POOL_CONTINUE) { free(rewritten_begin); return POOL_END; } } if (!is_begin_read_write) { if (query_context->rewritten_query) str = query_context->rewritten_query; else str = query_context->original_query; } /* Wait for response */ for (i=0;i<NUM_BACKENDS;i++) { if (!VALID_BACKEND(i)) continue; else if (send_type < 0 && i == node_id) continue; else if (send_type > 0 && i != node_id) continue; /* * If in master/slave mode, we do not send COMMIT/ABORT to * slaves/standbys if it's in I(idle) state. */ if (is_commit && MASTER_SLAVE && !IS_MASTER_NODE_ID(i) && TSTATE(backend, i) == 'I') { continue; } if (is_begin_read_write) { if (REAL_PRIMARY_NODE_ID == i) str = query_context->original_query; else str = query_context->rewritten_query; } if (wait_for_query_response(frontend, CONNECTION(backend, i), MAJOR(backend)) != POOL_CONTINUE) { /* Cancel current transaction */ CancelPacket cancel_packet; cancel_packet.protoVersion = htonl(PROTO_CANCEL); cancel_packet.pid = MASTER_CONNECTION(backend)->pid; cancel_packet.key= MASTER_CONNECTION(backend)->key; cancel_request(&cancel_packet); free(rewritten_begin); return POOL_END; } /* * Check if some error detected. If so, emit * log. This is usefull when invalid encoding error * occurs. In this case, PostgreSQL does not report * what statement caused that error and make users * confused. */ per_node_error_log(backend, i, str, "pool_send_and_wait: Error or notice message from backend: ", true); } free(rewritten_begin); return POOL_CONTINUE; }
/* * Send simple query and wait for response * send_type: * -1: do not send this node_id * 0: send to all nodes * >0: send to this node_id */ POOL_STATUS pool_send_and_wait(POOL_QUERY_CONTEXT *query_context, int send_type, int node_id) { POOL_SESSION_CONTEXT *session_context; POOL_CONNECTION *frontend; POOL_CONNECTION_POOL *backend; bool is_commit; bool is_begin_read_write; int i; int len; char *string; session_context = pool_get_session_context(); frontend = session_context->frontend; backend = session_context->backend; is_commit = is_commit_or_rollback_query(query_context->parse_tree); is_begin_read_write = false; len = 0; string = NULL; /* * If the query is BEGIN READ WRITE or * BEGIN ... SERIALIZABLE in master/slave mode, * we send BEGIN to slaves/standbys instead. * original_query which is BEGIN READ WRITE is sent to primary. * rewritten_query which is BEGIN is sent to standbys. */ if (pool_need_to_treat_as_if_default_transaction(query_context)) { is_begin_read_write = true; } else { if (query_context->rewritten_query) { len = query_context->rewritten_length; string = query_context->rewritten_query; } else { len = query_context->original_length; string = query_context->original_query; } } /* Send query */ for (i=0;i<NUM_BACKENDS;i++) { if (!VALID_BACKEND(i)) continue; else if (send_type < 0 && i == node_id) continue; else if (send_type > 0 && i != node_id) continue; /* * If in master/slave mode, we do not send COMMIT/ABORT to * slaves/standbys if it's in I(idle) state. */ if (is_commit && MASTER_SLAVE && !IS_MASTER_NODE_ID(i) && TSTATE(backend, i) == 'I') { pool_unset_node_to_be_sent(query_context, i); continue; } /* * If in reset context, we send COMMIT/ABORT to nodes those * are not in I(idle) state. This will ensure that * transactions are closed. */ if (is_commit && session_context->reset_context && TSTATE(backend, i) == 'I') { pool_unset_node_to_be_sent(query_context, i); continue; } if (is_begin_read_write) { if (REAL_PRIMARY_NODE_ID == i) { len = query_context->original_length; string = query_context->original_query; } else { len = query_context->rewritten_length; string = query_context->rewritten_query; } } per_node_statement_log(backend, i, string); if (send_simplequery_message(CONNECTION(backend, i), len, string, MAJOR(backend)) != POOL_CONTINUE) { return POOL_END; } } /* Wait for response */ for (i=0;i<NUM_BACKENDS;i++) { if (!VALID_BACKEND(i)) continue; else if (send_type < 0 && i == node_id) continue; else if (send_type > 0 && i != node_id) continue; #ifdef NOT_USED /* * If in master/slave mode, we do not send COMMIT/ABORT to * slaves/standbys if it's in I(idle) state. */ if (is_commit && MASTER_SLAVE && !IS_MASTER_NODE_ID(i) && TSTATE(backend, i) == 'I') { continue; } #endif if (is_begin_read_write) { if(REAL_PRIMARY_NODE_ID == i) string = query_context->original_query; else string = query_context->rewritten_query; } if (wait_for_query_response(frontend, CONNECTION(backend, i), MAJOR(backend)) != POOL_CONTINUE) { /* Cancel current transaction */ CancelPacket cancel_packet; cancel_packet.protoVersion = htonl(PROTO_CANCEL); cancel_packet.pid = MASTER_CONNECTION(backend)->pid; cancel_packet.key= MASTER_CONNECTION(backend)->key; cancel_request(&cancel_packet); return POOL_END; } /* * Check if some error detected. If so, emit * log. This is usefull when invalid encoding error * occurs. In this case, PostgreSQL does not report * what statement caused that error and make users * confused. */ per_node_error_log(backend, i, string, "pool_send_and_wait: Error or notice message from backend: ", true); } return POOL_CONTINUE; }
/* * Decide where to send queries(thus expecting response) */ void pool_where_to_send(POOL_QUERY_CONTEXT *query_context, char *query, Node *node) { POOL_SESSION_CONTEXT *session_context; POOL_CONNECTION_POOL *backend; int i; if (!query_context) { pool_error("pool_where_to_send: no query context"); return; } session_context = pool_get_session_context(); backend = session_context->backend; /* * Zap out DB node map */ pool_clear_node_to_be_sent(query_context); /* * If there is "NO LOAD BALANCE" comment, we send only to master node. */ if (!strncasecmp(query, NO_LOAD_BALANCE, NO_LOAD_BALANCE_COMMENT_SZ)) { pool_set_node_to_be_sent(query_context, MASTER_SLAVE ? PRIMARY_NODE_ID : REAL_MASTER_NODE_ID); for (i=0;i<NUM_BACKENDS;i++) { if (query_context->where_to_send[i]) { query_context->virtual_master_node_id = i; break; } } return; } /* * In raw mode, we send only to master node. Simple enough. */ if (RAW_MODE) { pool_set_node_to_be_sent(query_context, REAL_MASTER_NODE_ID); } else if (MASTER_SLAVE && query_context->is_multi_statement) { /* * If we are in master/slave mode and we have multi stametemt * query, we should send it to primary server only. Otherwise * it is possible to send a write query to standby servers * because we only use the first element of the multi * statement query and don't care about the rest. Typical * situation where we are bugged by this is, "BEGIN;DELETE * FROM table;END". Note that from pgpool-II 3.1.0 * transactional statements such as "BEGIN" is unconditionaly * sent to all nodes(see send_to_where() for more details). * Someday we might be able to understand all part of multi * statement queries, but until that day we need this band * aid. */ if (query_context->is_multi_statement) { pool_set_node_to_be_sent(query_context, PRIMARY_NODE_ID); } } else if (MASTER_SLAVE) { POOL_DEST dest; POOL_MEMORY_POOL *old_context; old_context = pool_memory_context_switch_to(query_context->memory_context); dest = send_to_where(node, query); pool_memory_context_switch_to(old_context); pool_debug("send_to_where: %d query: %s", dest, query); /* Should be sent to primary only? */ if (dest == POOL_PRIMARY) { pool_set_node_to_be_sent(query_context, PRIMARY_NODE_ID); } /* Should be sent to both primary and standby? */ else if (dest == POOL_BOTH) { pool_setall_node_to_be_sent(query_context); } /* * Ok, we might be able to load balance the SELECT query. */ else { if (pool_config->load_balance_mode && is_select_query(node, query) && MAJOR(backend) == PROTO_MAJOR_V3) { /* * If (we are outside of an explicit transaction) OR * (the transaction has not issued a write query yet, AND * transaction isolation level is not SERIALIZABLE) * we might be able to load balance. */ if (TSTATE(backend, PRIMARY_NODE_ID) == 'I' || (!pool_is_writing_transaction() && !pool_is_failed_transaction() && pool_get_transaction_isolation() != POOL_SERIALIZABLE)) { BackendInfo *bkinfo = pool_get_node_info(session_context->load_balance_node_id); /* * Load balance if possible */ /* * If replication delay is too much, we prefer to send to the primary. */ if (!strcmp(pool_config->master_slave_sub_mode, MODE_STREAMREP) && pool_config->delay_threshold && bkinfo->standby_delay > pool_config->delay_threshold) { pool_set_node_to_be_sent(query_context, PRIMARY_NODE_ID); } /* * If a writing function call is used, * we prefer to send to the primary. */ else if (pool_has_function_call(node)) { pool_set_node_to_be_sent(query_context, PRIMARY_NODE_ID); } /* * If system catalog is used in the SELECT, we * prefer to send to the primary. Example: SELECT * * FROM pg_class WHERE relname = 't1'; Because * 't1' is a constant, it's hard to recognize as * table name. Most use case such query is * against system catalog, and the table name can * be a temporary table, it's best to query * against primary system catalog. * Please note that this test must be done *before* * test using pool_has_temp_table. */ else if (pool_has_system_catalog(node)) { pool_set_node_to_be_sent(query_context, PRIMARY_NODE_ID); } /* * If temporary table is used in the SELECT, * we prefer to send to the primary. */ else if (pool_config->check_temp_table && pool_has_temp_table(node)) { pool_set_node_to_be_sent(query_context, PRIMARY_NODE_ID); } /* * If unlogged table is used in the SELECT, * we prefer to send to the primary. */ else if (pool_has_unlogged_table(node)) { pool_set_node_to_be_sent(query_context, PRIMARY_NODE_ID); } else { pool_set_node_to_be_sent(query_context, session_context->load_balance_node_id); } } else { /* Send to the primary only */ pool_set_node_to_be_sent(query_context, PRIMARY_NODE_ID); } } else { /* Send to the primary only */ pool_set_node_to_be_sent(query_context, PRIMARY_NODE_ID); } } } else if (REPLICATION || PARALLEL_MODE) { if (is_select_query(node, query)) { /* * If a writing function call is used or replicate_select is true, * we prefer to send to all nodes. */ if ((pool_has_function_call(node) || pool_config->replicate_select)) { pool_setall_node_to_be_sent(query_context); } else if (pool_config->load_balance_mode && MAJOR(backend) == PROTO_MAJOR_V3 && TSTATE(backend, MASTER_NODE_ID) == 'I') { /* load balance */ pool_set_node_to_be_sent(query_context, session_context->load_balance_node_id); } else { /* only send to master node */ pool_set_node_to_be_sent(query_context, REAL_MASTER_NODE_ID); } } else if (IsA(node, DeclareCursorStmt) || IsA(node, ClosePortalStmt) || IsA(node, FetchStmt)) { if (query_context->loadbalance_cursor) { if (pool_config->load_balance_mode && MAJOR(backend) == PROTO_MAJOR_V3 && TSTATE(backend, MASTER_NODE_ID) == 'I') { /* load balance */ pool_set_node_to_be_sent(query_context, session_context->load_balance_node_id); } else { /* only send to master node */ pool_set_node_to_be_sent(query_context, REAL_MASTER_NODE_ID); } } else { /* send to all nodes */ pool_setall_node_to_be_sent(query_context); } } else { /* send to all nodes */ pool_setall_node_to_be_sent(query_context); } } else { pool_error("pool_where_to_send: unknown mode"); return; } /* * EXECUTE? */ if (IsA(node, ExecuteStmt)) { POOL_SENT_MESSAGE *msg; msg = pool_get_sent_message('Q', ((ExecuteStmt *)node)->name); if (!msg) msg = pool_get_sent_message('P', ((ExecuteStmt *)node)->name); if (msg) pool_copy_prep_where(msg->query_context->where_to_send, query_context->where_to_send); } /* * DEALLOCATE? */ else if (IsA(node, DeallocateStmt)) { where_to_send_deallocate(query_context, node); } for (i=0;i<NUM_BACKENDS;i++) { if (query_context->where_to_send[i]) { query_context->virtual_master_node_id = i; break; } } return; }
/* * Reuse existing connection */ static bool connect_using_existing_connection(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend, StartupPacket *sp) { int i, freed = 0; /* * Save startup packet info */ for (i = 0; i < NUM_BACKENDS; i++) { if (VALID_BACKEND(i)) { if (!freed) { pool_free_startup_packet(backend->slots[i]->sp); freed = 1; } backend->slots[i]->sp = sp; } } /* Reuse existing connection to backend */ if (pool_do_reauth(frontend, backend)) { pool_close(frontend); connection_count_down(); return false; } if (MAJOR(backend) == 3) { char command_buf[1024]; /* If we have received application_name in the start up * packet, we send SET command to backend. Also we add or * replace existing application_name data. */ if (sp->application_name) { snprintf(command_buf, sizeof(command_buf), "SET application_name TO '%s'", sp->application_name); for (i=0;i<NUM_BACKENDS;i++) { if (VALID_BACKEND(i)) if (do_command(frontend, CONNECTION(backend, i), command_buf, MAJOR(backend), MASTER_CONNECTION(backend)->pid, MASTER_CONNECTION(backend)->key, 0) != POOL_CONTINUE) { pool_error("connect_using_existing_connection: do_command failed. command: %s", command_buf); return false; } } pool_add_param(&MASTER(backend)->params, "application_name", sp->application_name); } if (send_params(frontend, backend)) { pool_close(frontend); connection_count_down(); return false; } } /* Send ReadyForQuery to frontend */ pool_write(frontend, "Z", 1); if (MAJOR(backend) == 3) { int len; char tstate; len = htonl(5); pool_write(frontend, &len, sizeof(len)); tstate = TSTATE(backend, MASTER_NODE_ID); pool_write(frontend, &tstate, 1); } if (pool_flush(frontend) < 0) { pool_close(frontend); connection_count_down(); return false; } return true; }