/* * ExecQueryUsingCursor: run a SELECT-like query using a cursor * * This feature allows result sets larger than RAM to be dealt with. * * Returns true if the query executed successfully, false otherwise. * * If pset.timing is on, total query time (exclusive of result-printing) is * stored into *elapsed_msec. */ static bool ExecQueryUsingCursor(const char *query, double *elapsed_msec) { bool OK = true; PGresult *results; PQExpBufferData buf; printQueryOpt my_popt = pset.popt; FILE *queryFout_copy = pset.queryFout; bool queryFoutPipe_copy = pset.queryFoutPipe; bool started_txn = false; bool did_pager = false; int ntuples; char fetch_cmd[64]; instr_time before, after; int flush_error; *elapsed_msec = 0; /* initialize print options for partial table output */ my_popt.topt.start_table = true; my_popt.topt.stop_table = false; my_popt.topt.prior_records = 0; if (pset.timing) INSTR_TIME_SET_CURRENT(before); /* if we're not in a transaction, start one */ if (PQtransactionStatus(pset.db) == PQTRANS_IDLE) { results = PQexec(pset.db, "BEGIN"); OK = AcceptResult(results) && (PQresultStatus(results) == PGRES_COMMAND_OK); PQclear(results); if (!OK) return false; started_txn = true; } /* Send DECLARE CURSOR */ initPQExpBuffer(&buf); appendPQExpBuffer(&buf, "DECLARE _psql_cursor NO SCROLL CURSOR FOR\n%s", query); results = PQexec(pset.db, buf.data); OK = AcceptResult(results) && (PQresultStatus(results) == PGRES_COMMAND_OK); PQclear(results); termPQExpBuffer(&buf); if (!OK) goto cleanup; if (pset.timing) { INSTR_TIME_SET_CURRENT(after); INSTR_TIME_SUBTRACT(after, before); *elapsed_msec += INSTR_TIME_GET_MILLISEC(after); } snprintf(fetch_cmd, sizeof(fetch_cmd), "FETCH FORWARD %d FROM _psql_cursor", pset.fetch_count); /* prepare to write output to \g argument, if any */ if (pset.gfname) { /* keep this code in sync with PrintQueryTuples */ pset.queryFout = stdout; /* so it doesn't get closed */ /* open file/pipe */ if (!setQFout(pset.gfname)) { pset.queryFout = queryFout_copy; pset.queryFoutPipe = queryFoutPipe_copy; OK = false; goto cleanup; } } /* clear any pre-existing error indication on the output stream */ clearerr(pset.queryFout); for (;;) { if (pset.timing) INSTR_TIME_SET_CURRENT(before); /* get FETCH_COUNT tuples at a time */ results = PQexec(pset.db, fetch_cmd); if (pset.timing) { INSTR_TIME_SET_CURRENT(after); INSTR_TIME_SUBTRACT(after, before); *elapsed_msec += INSTR_TIME_GET_MILLISEC(after); } if (PQresultStatus(results) != PGRES_TUPLES_OK) { /* shut down pager before printing error message */ if (did_pager) { ClosePager(pset.queryFout); pset.queryFout = queryFout_copy; pset.queryFoutPipe = queryFoutPipe_copy; did_pager = false; } OK = AcceptResult(results); psql_assert(!OK); PQclear(results); break; } ntuples = PQntuples(results); if (ntuples < pset.fetch_count) { /* this is the last result set, so allow footer decoration */ my_popt.topt.stop_table = true; } else if (pset.queryFout == stdout && !did_pager) { /* * If query requires multiple result sets, hack to ensure that * only one pager instance is used for the whole mess */ pset.queryFout = PageOutput(100000, my_popt.topt.pager); did_pager = true; } printQuery(results, &my_popt, pset.queryFout, pset.logfile); PQclear(results); /* after the first result set, disallow header decoration */ my_popt.topt.start_table = false; my_popt.topt.prior_records += ntuples; /* * Make sure to flush the output stream, so intermediate results are * visible to the client immediately. We check the results because if * the pager dies/exits/etc, there's no sense throwing more data at * it. */ flush_error = fflush(pset.queryFout); /* * Check if we are at the end, if a cancel was pressed, or if there * were any errors either trying to flush out the results, or more * generally on the output stream at all. If we hit any errors * writing things to the stream, we presume $PAGER has disappeared and * stop bothering to pull down more data. */ if (ntuples < pset.fetch_count || cancel_pressed || flush_error || ferror(pset.queryFout)) break; } /* close \g argument file/pipe, restore old setting */ if (pset.gfname) { /* keep this code in sync with PrintQueryTuples */ setQFout(NULL); pset.queryFout = queryFout_copy; pset.queryFoutPipe = queryFoutPipe_copy; free(pset.gfname); pset.gfname = NULL; } else if (did_pager) { ClosePager(pset.queryFout); pset.queryFout = queryFout_copy; pset.queryFoutPipe = queryFoutPipe_copy; } cleanup: if (pset.timing) INSTR_TIME_SET_CURRENT(before); /* * We try to close the cursor on either success or failure, but on failure * ignore the result (it's probably just a bleat about being in an aborted * transaction) */ results = PQexec(pset.db, "CLOSE _psql_cursor"); if (OK) { OK = AcceptResult(results) && (PQresultStatus(results) == PGRES_COMMAND_OK); } PQclear(results); if (started_txn) { results = PQexec(pset.db, OK ? "COMMIT" : "ROLLBACK"); OK &= AcceptResult(results) && (PQresultStatus(results) == PGRES_COMMAND_OK); PQclear(results); } if (pset.timing) { INSTR_TIME_SET_CURRENT(after); INSTR_TIME_SUBTRACT(after, before); *elapsed_msec += INSTR_TIME_GET_MILLISEC(after); } return OK; }
static void powa_main(Datum main_arg) { char *q1 = "SELECT powa_take_snapshot()"; static char *q2 = "SET application_name = 'POWA collector'"; instr_time begin; instr_time end; long time_to_wait; die_on_too_small_frequency(); /* Set up signal handlers, then unblock signalsl */ pqsignal(SIGHUP, powa_sighup); pqsignal(SIGTERM, powa_sigterm); BackgroundWorkerUnblockSignals(); /* We only connect when powa_frequency >0. If not, powa has been deactivated */ if (powa_frequency < 0) { elog(LOG, "POWA is deactivated (powa.frequency = %i), exiting", powa_frequency); exit(1); } // We got here: it means powa_frequency > 0. Let's connect /* Connect to POWA database */ BackgroundWorkerInitializeConnection(powa_database, NULL); elog(LOG, "POWA connected to %s", powa_database); StartTransactionCommand(); SetCurrentStatementStartTimestamp(); SPI_connect(); PushActiveSnapshot(GetTransactionSnapshot()); SPI_execute(q2, false, 0); SPI_finish(); PopActiveSnapshot(); CommitTransactionCommand(); /* let's store the current time. It will be used to calculate a quite stable interval between each measure */ while (!got_sigterm) { /* We can get here with a new value of powa_frequency because of a reload. Let's suicide to disconnect if this value is <0 */ if (powa_frequency < 0) { elog(LOG, "POWA exits to disconnect from the database now"); exit(1); } INSTR_TIME_SET_CURRENT(begin); ResetLatch(&MyProc->procLatch); StartTransactionCommand(); SetCurrentStatementStartTimestamp(); SPI_connect(); PushActiveSnapshot(GetTransactionSnapshot()); SPI_execute(q1, false, 0); SPI_finish(); PopActiveSnapshot(); CommitTransactionCommand(); INSTR_TIME_SET_CURRENT(end); INSTR_TIME_SUBTRACT(end, begin); /* Wait powa.frequency, compensate for work time of last snapshot */ /* If we got off schedule (because of a compact or delete, just do another operation right now */ time_to_wait = powa_frequency - INSTR_TIME_GET_MILLISEC(end); if (time_to_wait > 0) { WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, time_to_wait); } } proc_exit(0); }
/* * ProcessUtility hook */ static void pgss_ProcessUtility(Node *parsetree, const char *queryString, ParamListInfo params, bool isTopLevel, DestReceiver *dest, char *completionTag) { if (pgss_track_utility && pgss_enabled()) { instr_time start; instr_time duration; uint64 rows = 0; BufferUsage bufusage; bufusage = pgBufferUsage; INSTR_TIME_SET_CURRENT(start); nested_level++; PG_TRY(); { if (prev_ProcessUtility) prev_ProcessUtility(parsetree, queryString, params, isTopLevel, dest, completionTag); else standard_ProcessUtility(parsetree, queryString, params, isTopLevel, dest, completionTag); nested_level--; } PG_CATCH(); { nested_level--; PG_RE_THROW(); } PG_END_TRY(); INSTR_TIME_SET_CURRENT(duration); INSTR_TIME_SUBTRACT(duration, start); /* parse command tag to retrieve the number of affected rows. */ if (completionTag && sscanf(completionTag, "COPY " UINT64_FORMAT, &rows) != 1) rows = 0; /* calc differences of buffer counters. */ bufusage.shared_blks_hit = pgBufferUsage.shared_blks_hit - bufusage.shared_blks_hit; bufusage.shared_blks_read = pgBufferUsage.shared_blks_read - bufusage.shared_blks_read; bufusage.shared_blks_written = pgBufferUsage.shared_blks_written - bufusage.shared_blks_written; bufusage.local_blks_hit = pgBufferUsage.local_blks_hit - bufusage.local_blks_hit; bufusage.local_blks_read = pgBufferUsage.local_blks_read - bufusage.local_blks_read; bufusage.local_blks_written = pgBufferUsage.local_blks_written - bufusage.local_blks_written; bufusage.temp_blks_read = pgBufferUsage.temp_blks_read - bufusage.temp_blks_read; bufusage.temp_blks_written = pgBufferUsage.temp_blks_written - bufusage.temp_blks_written; pgss_store(queryString, INSTR_TIME_GET_DOUBLE(duration), rows, &bufusage); } else { if (prev_ProcessUtility) prev_ProcessUtility(parsetree, queryString, params, isTopLevel, dest, completionTag); else standard_ProcessUtility(parsetree, queryString, params, isTopLevel, dest, completionTag); } }
/* * SendQuery: send the query string to the backend * (and print out results) * * Note: This is the "front door" way to send a query. That is, use it to * send queries actually entered by the user. These queries will be subject to * single step mode. * To send "back door" queries (generated by slash commands, etc.) in a * controlled way, use PSQLexec(). * * Returns true if the query executed successfully, false otherwise. */ bool SendQuery(const char *query) { PGresult *results; PGTransactionStatusType transaction_status; double elapsed_msec = 0; bool OK, on_error_rollback_savepoint = false; static bool on_error_rollback_warning = false; if (!pset.db) { psql_error("You are currently not connected to a database.\n"); return false; } if (pset.singlestep) { char buf[3]; printf(_("***(Single step mode: verify command)*******************************************\n" "%s\n" "***(press return to proceed or enter x and return to cancel)********************\n"), query); fflush(stdout); if (fgets(buf, sizeof(buf), stdin) != NULL) if (buf[0] == 'x') return false; } else if (pset.echo == PSQL_ECHO_QUERIES) { puts(query); fflush(stdout); } if (pset.logfile) { fprintf(pset.logfile, _("********* QUERY **********\n" "%s\n" "**************************\n\n"), query); fflush(pset.logfile); } SetCancelConn(); transaction_status = PQtransactionStatus(pset.db); if (transaction_status == PQTRANS_IDLE && !pset.autocommit && !command_no_begin(query)) { results = PQexec(pset.db, "BEGIN"); if (PQresultStatus(results) != PGRES_COMMAND_OK) { psql_error("%s", PQerrorMessage(pset.db)); PQclear(results); ResetCancelConn(); return false; } PQclear(results); transaction_status = PQtransactionStatus(pset.db); } if (transaction_status == PQTRANS_INTRANS && pset.on_error_rollback != PSQL_ERROR_ROLLBACK_OFF && (pset.cur_cmd_interactive || pset.on_error_rollback == PSQL_ERROR_ROLLBACK_ON)) { if (on_error_rollback_warning == false && pset.sversion < 80000) { fprintf(stderr, _("The server (version %d.%d) does not support savepoints for ON_ERROR_ROLLBACK.\n"), pset.sversion / 10000, (pset.sversion / 100) % 100); on_error_rollback_warning = true; } else { results = PQexec(pset.db, "SAVEPOINT pg_psql_temporary_savepoint"); if (PQresultStatus(results) != PGRES_COMMAND_OK) { psql_error("%s", PQerrorMessage(pset.db)); PQclear(results); ResetCancelConn(); return false; } PQclear(results); on_error_rollback_savepoint = true; } } if (pset.fetch_count <= 0 || !is_select_command(query)) { /* Default fetch-it-all-and-print mode */ instr_time before, after; if (pset.timing) INSTR_TIME_SET_CURRENT(before); results = PQexec(pset.db, query); /* these operations are included in the timing result: */ ResetCancelConn(); OK = (AcceptResult(results) && ProcessCopyResult(results)); if (pset.timing) { INSTR_TIME_SET_CURRENT(after); INSTR_TIME_SUBTRACT(after, before); elapsed_msec = INSTR_TIME_GET_MILLISEC(after); } /* but printing results isn't: */ if (OK) OK = PrintQueryResults(results); } else { /* Fetch-in-segments mode */ OK = ExecQueryUsingCursor(query, &elapsed_msec); ResetCancelConn(); results = NULL; /* PQclear(NULL) does nothing */ } /* If we made a temporary savepoint, possibly release/rollback */ if (on_error_rollback_savepoint) { const char *svptcmd; transaction_status = PQtransactionStatus(pset.db); if (transaction_status == PQTRANS_INERROR) { /* We always rollback on an error */ svptcmd = "ROLLBACK TO pg_psql_temporary_savepoint"; } else if (transaction_status != PQTRANS_INTRANS) { /* If they are no longer in a transaction, then do nothing */ svptcmd = NULL; } else { /* * Do nothing if they are messing with savepoints themselves: If * the user did RELEASE or ROLLBACK, our savepoint is gone. If * they issued a SAVEPOINT, releasing ours would remove theirs. */ if (results && (strcmp(PQcmdStatus(results), "SAVEPOINT") == 0 || strcmp(PQcmdStatus(results), "RELEASE") == 0 || strcmp(PQcmdStatus(results), "ROLLBACK") == 0)) svptcmd = NULL; else svptcmd = "RELEASE pg_psql_temporary_savepoint"; } if (svptcmd) { PGresult *svptres; svptres = PQexec(pset.db, svptcmd); if (PQresultStatus(svptres) != PGRES_COMMAND_OK) { psql_error("%s", PQerrorMessage(pset.db)); PQclear(svptres); PQclear(results); ResetCancelConn(); return false; } PQclear(svptres); } } PQclear(results); /* Possible microtiming output */ if (OK && pset.timing) printf(_("Time: %.3f ms\n"), elapsed_msec); /* check for events that may occur during query execution */ if (pset.encoding != PQclientEncoding(pset.db) && PQclientEncoding(pset.db) >= 0) { /* track effects of SET CLIENT_ENCODING */ pset.encoding = PQclientEncoding(pset.db); pset.popt.topt.encoding = pset.encoding; SetVariable(pset.vars, "ENCODING", pg_encoding_to_char(pset.encoding)); } PrintNotifications(); return OK; }
/* * PSQLexecWatch * * This function is used for \watch command to send the query to * the server and print out the results. * * Returns 1 if the query executed successfully, 0 if it cannot be repeated, * e.g., because of the interrupt, -1 on error. */ int PSQLexecWatch(const char *query, const printQueryOpt *opt) { PGresult *res; double elapsed_msec = 0; instr_time before; instr_time after; if (!pset.db) { psql_error("You are currently not connected to a database.\n"); return 0; } SetCancelConn(); if (pset.timing) INSTR_TIME_SET_CURRENT(before); res = PQexec(pset.db, query); ResetCancelConn(); if (!AcceptResult(res)) { PQclear(res); return 0; } if (pset.timing) { INSTR_TIME_SET_CURRENT(after); INSTR_TIME_SUBTRACT(after, before); elapsed_msec = INSTR_TIME_GET_MILLISEC(after); } /* * If SIGINT is sent while the query is processing, the interrupt * will be consumed. The user's intention, though, is to cancel * the entire watch process, so detect a sent cancellation request and * exit in this case. */ if (cancel_pressed) { PQclear(res); return 0; } switch (PQresultStatus(res)) { case PGRES_TUPLES_OK: printQuery(res, opt, pset.queryFout, pset.logfile); break; case PGRES_COMMAND_OK: fprintf(pset.queryFout, "%s\n%s\n\n", opt->title, PQcmdStatus(res)); break; case PGRES_EMPTY_QUERY: psql_error(_("\\watch cannot be used with an empty query\n")); PQclear(res); return -1; case PGRES_COPY_OUT: case PGRES_COPY_IN: case PGRES_COPY_BOTH: psql_error(_("\\watch cannot be used with COPY\n")); PQclear(res); return -1; default: psql_error(_("unexpected result status for \\watch\n")); PQclear(res); return -1; } PQclear(res); fflush(pset.queryFout); /* Possible microtiming output */ if (pset.timing) printf(_("Time: %.3f ms\n"), elapsed_msec); return 1; }
/* * Rescan end pages to verify that they are (still) empty of tuples. * * Returns number of nondeletable pages (last nonempty page + 1). */ static BlockNumber count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats) { BlockNumber blkno; instr_time starttime; /* Initialize the starttime if we check for conflicting lock requests */ INSTR_TIME_SET_CURRENT(starttime); /* Strange coding of loop control is needed because blkno is unsigned */ blkno = vacrelstats->rel_pages; while (blkno > vacrelstats->nonempty_pages) { Buffer buf; Page page; OffsetNumber offnum, maxoff; bool hastup; /* * Check if another process requests a lock on our relation. We are * holding an AccessExclusiveLock here, so they will be waiting. We * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we * only check if that interval has elapsed once every 32 blocks to * keep the number of system calls and actual shared lock table * lookups to a minimum. */ if ((blkno % 32) == 0) { instr_time currenttime; instr_time elapsed; INSTR_TIME_SET_CURRENT(currenttime); elapsed = currenttime; INSTR_TIME_SUBTRACT(elapsed, starttime); if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000) >= VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL) { if (LockHasWaitersRelation(onerel, AccessExclusiveLock)) { ereport(elevel, (errmsg("\"%s\": suspending truncate due to conflicting lock request", RelationGetRelationName(onerel)))); vacrelstats->lock_waiter_detected = true; return blkno; } starttime = currenttime; } } /* * We don't insert a vacuum delay point here, because we have an * exclusive lock on the table which we want to hold for as short a * time as possible. We still need to check for interrupts however. */ CHECK_FOR_INTERRUPTS(); blkno--; buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno, RBM_NORMAL, vac_strategy); /* In this phase we only need shared access to the buffer */ LockBuffer(buf, BUFFER_LOCK_SHARE); page = BufferGetPage(buf); if (PageIsNew(page) || PageIsEmpty(page)) { /* PageIsNew probably shouldn't happen... */ UnlockReleaseBuffer(buf); continue; } hastup = false; maxoff = PageGetMaxOffsetNumber(page); for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum)) { ItemId itemid; itemid = PageGetItemId(page, offnum); /* * Note: any non-unused item should be taken as a reason to keep * this page. We formerly thought that DEAD tuples could be * thrown away, but that's not so, because we'd not have cleaned * out their index entries. */ if (ItemIdIsUsed(itemid)) { hastup = true; break; /* can stop scanning */ } } /* scan along page */ UnlockReleaseBuffer(buf); /* Done scanning if we found a tuple here */ if (hastup) return blkno + 1; } /* * If we fall out of the loop, all the previously-thought-to-be-empty * pages still are; we need not bother to look at the last known-nonempty * page. */ return vacrelstats->nonempty_pages; }
/* * Select next block to sample. * * Uses linear probing algorithm for picking next block. */ static BlockNumber system_time_nextsampleblock(SampleScanState *node) { SystemTimeSamplerData *sampler = (SystemTimeSamplerData *) node->tsm_state; HeapScanDesc scan = node->ss.ss_currentScanDesc; instr_time cur_time; /* First call within scan? */ if (sampler->doneblocks == 0) { /* First scan within query? */ if (sampler->step == 0) { /* Initialize now that we have scan descriptor */ SamplerRandomState randstate; /* If relation is empty, there's nothing to scan */ if (scan->rs_nblocks == 0) return InvalidBlockNumber; /* We only need an RNG during this setup step */ sampler_random_init_state(sampler->seed, randstate); /* Compute nblocks/firstblock/step only once per query */ sampler->nblocks = scan->rs_nblocks; /* Choose random starting block within the relation */ /* (Actually this is the predecessor of the first block visited) */ sampler->firstblock = sampler_random_fract(randstate) * sampler->nblocks; /* Find relative prime as step size for linear probing */ sampler->step = random_relative_prime(sampler->nblocks, randstate); } /* Reinitialize lb and start_time */ sampler->lb = sampler->firstblock; INSTR_TIME_SET_CURRENT(sampler->start_time); } /* If we've read all blocks in relation, we're done */ if (++sampler->doneblocks > sampler->nblocks) return InvalidBlockNumber; /* If we've used up all the allotted time, we're done */ INSTR_TIME_SET_CURRENT(cur_time); INSTR_TIME_SUBTRACT(cur_time, sampler->start_time); if (INSTR_TIME_GET_MILLISEC(cur_time) >= sampler->millis) return InvalidBlockNumber; /* * It's probably impossible for scan->rs_nblocks to decrease between scans * within a query; but just in case, loop until we select a block number * less than scan->rs_nblocks. We don't care if scan->rs_nblocks has * increased since the first scan. */ do { /* Advance lb, using uint64 arithmetic to forestall overflow */ sampler->lb = ((uint64) sampler->lb + sampler->step) % sampler->nblocks; } while (sampler->lb >= scan->rs_nblocks); return sampler->lb; }
/* * Like WaitLatch, but with an extra socket argument for WL_SOCKET_* * conditions. * * When waiting on a socket, WL_SOCKET_READABLE *must* be included in * 'wakeEvents'; WL_SOCKET_WRITEABLE is optional. The reason for this is * that EOF and error conditions are reported only via WL_SOCKET_READABLE. */ int WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, long timeout) { int result = 0; int rc; instr_time start_time, cur_time; long cur_timeout; #ifdef HAVE_POLL struct pollfd pfds[3]; int nfds; #else struct timeval tv, *tvp; fd_set input_mask; fd_set output_mask; int hifd; #endif /* Ignore WL_SOCKET_* events if no valid socket is given */ if (sock == PGINVALID_SOCKET) wakeEvents &= ~(WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE); Assert(wakeEvents != 0); /* must have at least one wake event */ /* Cannot specify WL_SOCKET_WRITEABLE without WL_SOCKET_READABLE */ Assert((wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) != WL_SOCKET_WRITEABLE); if ((wakeEvents & WL_LATCH_SET) && latch->owner_pid != MyProcPid) elog(ERROR, "cannot wait on a latch owned by another process"); /* * Initialize timeout if requested. We must record the current time so * that we can determine the remaining timeout if the poll() or select() * is interrupted. (On some platforms, select() will update the contents * of "tv" for us, but unfortunately we can't rely on that.) */ if (wakeEvents & WL_TIMEOUT) { INSTR_TIME_SET_CURRENT(start_time); Assert(timeout >= 0); cur_timeout = timeout; #ifndef HAVE_POLL tv.tv_sec = cur_timeout / 1000L; tv.tv_usec = (cur_timeout % 1000L) * 1000L; tvp = &tv; #endif } else { cur_timeout = -1; #ifndef HAVE_POLL tvp = NULL; #endif } waiting = true; do { /* * Clear the pipe, then check if the latch is set already. If someone * sets the latch between this and the poll()/select() below, the * setter will write a byte to the pipe (or signal us and the signal * handler will do that), and the poll()/select() will return * immediately. * * Note: we assume that the kernel calls involved in drainSelfPipe() * and SetLatch() will provide adequate synchronization on machines * with weak memory ordering, so that we cannot miss seeing is_set if * the signal byte is already in the pipe when we drain it. */ drainSelfPipe(); if ((wakeEvents & WL_LATCH_SET) && latch->is_set) { result |= WL_LATCH_SET; /* * Leave loop immediately, avoid blocking again. We don't attempt * to report any other events that might also be satisfied. */ break; } /* Must wait ... we use poll(2) if available, otherwise select(2) */ #ifdef HAVE_POLL nfds = 0; if (wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) { /* socket, if used, is always in pfds[0] */ pfds[0].fd = sock; pfds[0].events = 0; if (wakeEvents & WL_SOCKET_READABLE) pfds[0].events |= POLLIN; if (wakeEvents & WL_SOCKET_WRITEABLE) pfds[0].events |= POLLOUT; pfds[0].revents = 0; nfds++; } pfds[nfds].fd = selfpipe_readfd; pfds[nfds].events = POLLIN; pfds[nfds].revents = 0; nfds++; if (wakeEvents & WL_POSTMASTER_DEATH) { /* postmaster fd, if used, is always in pfds[nfds - 1] */ pfds[nfds].fd = postmaster_alive_fds[POSTMASTER_FD_WATCH]; pfds[nfds].events = POLLIN; pfds[nfds].revents = 0; nfds++; } /* Sleep */ rc = poll(pfds, nfds, (int) cur_timeout); /* Check return code */ if (rc < 0) { /* EINTR is okay, otherwise complain */ if (errno != EINTR) { waiting = false; ereport(ERROR, (errcode_for_socket_access(), errmsg("poll() failed: %m"))); } } else if (rc == 0) { /* timeout exceeded */ if (wakeEvents & WL_TIMEOUT) result |= WL_TIMEOUT; } else { /* at least one event occurred, so check revents values */ if ((wakeEvents & WL_SOCKET_READABLE) && (pfds[0].revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL))) { /* data available in socket, or EOF/error condition */ result |= WL_SOCKET_READABLE; } if ((wakeEvents & WL_SOCKET_WRITEABLE) && (pfds[0].revents & POLLOUT)) { result |= WL_SOCKET_WRITEABLE; } /* * We expect a POLLHUP when the remote end is closed, but because * we don't expect the pipe to become readable or to have any * errors either, treat those cases as postmaster death, too. */ if ((wakeEvents & WL_POSTMASTER_DEATH) && (pfds[nfds - 1].revents & (POLLHUP | POLLIN | POLLERR | POLLNVAL))) { /* * According to the select(2) man page on Linux, select(2) may * spuriously return and report a file descriptor as readable, * when it's not; and presumably so can poll(2). It's not * clear that the relevant cases would ever apply to the * postmaster pipe, but since the consequences of falsely * returning WL_POSTMASTER_DEATH could be pretty unpleasant, * we take the trouble to positively verify EOF with * PostmasterIsAlive(). */ if (!PostmasterIsAlive()) result |= WL_POSTMASTER_DEATH; } } #else /* !HAVE_POLL */ FD_ZERO(&input_mask); FD_ZERO(&output_mask); FD_SET(selfpipe_readfd, &input_mask); hifd = selfpipe_readfd; if (wakeEvents & WL_POSTMASTER_DEATH) { FD_SET(postmaster_alive_fds[POSTMASTER_FD_WATCH], &input_mask); if (postmaster_alive_fds[POSTMASTER_FD_WATCH] > hifd) hifd = postmaster_alive_fds[POSTMASTER_FD_WATCH]; } if (wakeEvents & WL_SOCKET_READABLE) { FD_SET(sock, &input_mask); if (sock > hifd) hifd = sock; } if (wakeEvents & WL_SOCKET_WRITEABLE) { FD_SET(sock, &output_mask); if (sock > hifd) hifd = sock; } /* Sleep */ rc = select(hifd + 1, &input_mask, &output_mask, NULL, tvp); /* Check return code */ if (rc < 0) { /* EINTR is okay, otherwise complain */ if (errno != EINTR) { waiting = false; ereport(ERROR, (errcode_for_socket_access(), errmsg("select() failed: %m"))); } } else if (rc == 0) { /* timeout exceeded */ if (wakeEvents & WL_TIMEOUT) result |= WL_TIMEOUT; } else { /* at least one event occurred, so check masks */ if ((wakeEvents & WL_SOCKET_READABLE) && FD_ISSET(sock, &input_mask)) { /* data available in socket, or EOF */ result |= WL_SOCKET_READABLE; } if ((wakeEvents & WL_SOCKET_WRITEABLE) && FD_ISSET(sock, &output_mask)) { result |= WL_SOCKET_WRITEABLE; } if ((wakeEvents & WL_POSTMASTER_DEATH) && FD_ISSET(postmaster_alive_fds[POSTMASTER_FD_WATCH], &input_mask)) { /* * According to the select(2) man page on Linux, select(2) may * spuriously return and report a file descriptor as readable, * when it's not; and presumably so can poll(2). It's not * clear that the relevant cases would ever apply to the * postmaster pipe, but since the consequences of falsely * returning WL_POSTMASTER_DEATH could be pretty unpleasant, * we take the trouble to positively verify EOF with * PostmasterIsAlive(). */ if (!PostmasterIsAlive()) result |= WL_POSTMASTER_DEATH; } } #endif /* HAVE_POLL */ /* If we're not done, update cur_timeout for next iteration */ if (result == 0 && cur_timeout >= 0) { INSTR_TIME_SET_CURRENT(cur_time); INSTR_TIME_SUBTRACT(cur_time, start_time); cur_timeout = timeout - (long) INSTR_TIME_GET_MILLISEC(cur_time); if (cur_timeout < 0) cur_timeout = 0; #ifndef HAVE_POLL tv.tv_sec = cur_timeout / 1000L; tv.tv_usec = (cur_timeout % 1000L) * 1000L; #endif } } while (result == 0); waiting = false; return result; }
/* * ExecQueryUsingCursor: run a SELECT-like query using a cursor * * This feature allows result sets larger than RAM to be dealt with. * * Returns true if the query executed successfully, false otherwise. * * If pset.timing is on, total query time (exclusive of result-printing) is * stored into *elapsed_msec. */ static bool ExecQueryUsingCursor(const char *query, double *elapsed_msec) { bool OK = true; PGresult *results; PQExpBufferData buf; printQueryOpt my_popt = pset.popt; FILE *fout; bool is_pipe; bool is_pager = false; bool started_txn = false; int ntuples; int fetch_count; char fetch_cmd[64]; instr_time before, after; int flush_error; *elapsed_msec = 0; /* initialize print options for partial table output */ my_popt.topt.start_table = true; my_popt.topt.stop_table = false; my_popt.topt.prior_records = 0; if (pset.timing) INSTR_TIME_SET_CURRENT(before); /* if we're not in a transaction, start one */ if (PQtransactionStatus(pset.db) == PQTRANS_IDLE) { results = PQexec(pset.db, "BEGIN"); OK = AcceptResult(results) && (PQresultStatus(results) == PGRES_COMMAND_OK); ClearOrSaveResult(results); if (!OK) return false; started_txn = true; } /* Send DECLARE CURSOR */ initPQExpBuffer(&buf); appendPQExpBuffer(&buf, "DECLARE _psql_cursor NO SCROLL CURSOR FOR\n%s", query); results = PQexec(pset.db, buf.data); OK = AcceptResult(results) && (PQresultStatus(results) == PGRES_COMMAND_OK); ClearOrSaveResult(results); termPQExpBuffer(&buf); if (!OK) goto cleanup; if (pset.timing) { INSTR_TIME_SET_CURRENT(after); INSTR_TIME_SUBTRACT(after, before); *elapsed_msec += INSTR_TIME_GET_MILLISEC(after); } /* * In \gset mode, we force the fetch count to be 2, so that we will throw * the appropriate error if the query returns more than one row. */ if (pset.gset_prefix) fetch_count = 2; else fetch_count = pset.fetch_count; snprintf(fetch_cmd, sizeof(fetch_cmd), "FETCH FORWARD %d FROM _psql_cursor", fetch_count); /* prepare to write output to \g argument, if any */ if (pset.gfname) { if (!openQueryOutputFile(pset.gfname, &fout, &is_pipe)) { OK = false; goto cleanup; } if (is_pipe) disable_sigpipe_trap(); } else { fout = pset.queryFout; is_pipe = false; /* doesn't matter */ } /* clear any pre-existing error indication on the output stream */ clearerr(fout); for (;;) { if (pset.timing) INSTR_TIME_SET_CURRENT(before); /* get fetch_count tuples at a time */ results = PQexec(pset.db, fetch_cmd); if (pset.timing) { INSTR_TIME_SET_CURRENT(after); INSTR_TIME_SUBTRACT(after, before); *elapsed_msec += INSTR_TIME_GET_MILLISEC(after); } if (PQresultStatus(results) != PGRES_TUPLES_OK) { /* shut down pager before printing error message */ if (is_pager) { ClosePager(fout); is_pager = false; } OK = AcceptResult(results); Assert(!OK); ClearOrSaveResult(results); break; } if (pset.gset_prefix) { /* StoreQueryTuple will complain if not exactly one row */ OK = StoreQueryTuple(results); ClearOrSaveResult(results); break; } /* Note we do not deal with \gexec or \crosstabview modes here */ ntuples = PQntuples(results); if (ntuples < fetch_count) { /* this is the last result set, so allow footer decoration */ my_popt.topt.stop_table = true; } else if (fout == stdout && !is_pager) { /* * If query requires multiple result sets, hack to ensure that * only one pager instance is used for the whole mess */ fout = PageOutput(INT_MAX, &(my_popt.topt)); is_pager = true; } printQuery(results, &my_popt, fout, is_pager, pset.logfile); ClearOrSaveResult(results); /* after the first result set, disallow header decoration */ my_popt.topt.start_table = false; my_popt.topt.prior_records += ntuples; /* * Make sure to flush the output stream, so intermediate results are * visible to the client immediately. We check the results because if * the pager dies/exits/etc, there's no sense throwing more data at * it. */ flush_error = fflush(fout); /* * Check if we are at the end, if a cancel was pressed, or if there * were any errors either trying to flush out the results, or more * generally on the output stream at all. If we hit any errors * writing things to the stream, we presume $PAGER has disappeared and * stop bothering to pull down more data. */ if (ntuples < fetch_count || cancel_pressed || flush_error || ferror(fout)) break; } if (pset.gfname) { /* close \g argument file/pipe */ if (is_pipe) { pclose(fout); restore_sigpipe_trap(); } else fclose(fout); } else if (is_pager) { /* close transient pager */ ClosePager(fout); } cleanup: if (pset.timing) INSTR_TIME_SET_CURRENT(before); /* * We try to close the cursor on either success or failure, but on failure * ignore the result (it's probably just a bleat about being in an aborted * transaction) */ results = PQexec(pset.db, "CLOSE _psql_cursor"); if (OK) { OK = AcceptResult(results) && (PQresultStatus(results) == PGRES_COMMAND_OK); ClearOrSaveResult(results); } else PQclear(results); if (started_txn) { results = PQexec(pset.db, OK ? "COMMIT" : "ROLLBACK"); OK &= AcceptResult(results) && (PQresultStatus(results) == PGRES_COMMAND_OK); ClearOrSaveResult(results); } if (pset.timing) { INSTR_TIME_SET_CURRENT(after); INSTR_TIME_SUBTRACT(after, before); *elapsed_msec += INSTR_TIME_GET_MILLISEC(after); } return OK; }