static void thread_DispatchOut(DispatchCommandParms * pParms) { CdbDispatchResult *dispatchResult; int i, db_count = pParms->db_count; /* * The pParms contains an array of SegmentDatabaseDescriptors * to send commands through to. */ for (i = 0; i < db_count; i++) { dispatchResult = pParms->dispatchResultPtrArray[i]; /* * Don't use elog, it's not thread-safe */ if (DEBUG5 >= log_min_messages) { if (dispatchResult->segdbDesc->conn) { write_log ("thread_DispatchCommand working on %d of %d commands. asyncStatus %d", i + 1, db_count, dispatchResult->segdbDesc->conn->asyncStatus); } } dispatchResult->hasDispatched = false; dispatchResult->sentSignal = DISPATCH_WAIT_NONE; dispatchResult->wasCanceled = false; if (!shouldStillDispatchCommand(pParms, dispatchResult)) { /* * Don't dispatch if cancellation pending or no connection. */ dispatchResult->stillRunning = false; if (PQisBusy(dispatchResult->segdbDesc->conn)) write_log (" We thought we were done, because !shouldStillDispatchCommand(), but libpq says we are still busy"); if (PQstatus(dispatchResult->segdbDesc->conn) == CONNECTION_BAD) write_log (" We thought we were done, because !shouldStillDispatchCommand(), but libpq says the connection died?"); } else { /* * Kick off the command over the libpq connection. * * If unsuccessful, proceed anyway, and check for lost connection below. */ if (PQisBusy(dispatchResult->segdbDesc->conn)) { write_log ("Trying to send to busy connection %s %d %d asyncStatus %d", dispatchResult->segdbDesc->whoami, i, db_count, dispatchResult->segdbDesc->conn->asyncStatus); } if (PQstatus(dispatchResult->segdbDesc->conn) == CONNECTION_BAD) { char *msg; msg = PQerrorMessage(dispatchResult->segdbDesc->conn); write_log ("Dispatcher noticed a problem before query transmit: %s (%s)", msg ? msg : "unknown error", dispatchResult->segdbDesc->whoami); /* * Save error info for later. */ cdbdisp_appendMessage(dispatchResult, LOG, ERRCODE_GP_INTERCONNECTION_ERROR, "Error before transmit from %s: %s", dispatchResult->segdbDesc->whoami, msg ? msg : "unknown error"); PQfinish(dispatchResult->segdbDesc->conn); dispatchResult->segdbDesc->conn = NULL; dispatchResult->stillRunning = false; continue; } #ifdef USE_NONBLOCKING /* * In 2000, Tom Lane said: * "I believe that the nonblocking-mode code is pretty buggy, and don't * recommend using it unless you really need it and want to help debug * it.." * * Reading through the code, I'm not convinced the situation has * improved in 2007... I still see some very questionable things * about nonblocking mode, so for now, I'm disabling it. */ PQsetnonblocking(dispatchResult->segdbDesc->conn, TRUE); #endif dispatchCommand(dispatchResult, pParms->query_text, pParms->query_text_len); } } #ifdef USE_NONBLOCKING /* * Is everything sent? Well, if the network stack was too busy, and we are using * nonblocking mode, some of the sends * might not have completed. We can't use SELECT to wait unless they have * received their work, or we will wait forever. Make sure they do. */ { bool allsent = true; /* * debug loop to check to see if this really is needed */ for (i = 0; i < db_count; i++) { dispatchResult = pParms->dispatchResultPtrArray[i]; if (!dispatchResult->stillRunning || !dispatchResult->hasDispatched) continue; if (PQstatus(dispatchResult->segdbDesc->conn) == CONNECTION_BAD) continue; if (dispatchResult->segdbDesc->conn->outCount > 0) { write_log("Yes, extra flushing is necessary %d", i); break; } } /* * Check to see if any needed extra flushing. */ for (i = 0; i < db_count; i++) { int flushResult; dispatchResult = pParms->dispatchResultPtrArray[i]; if (!dispatchResult->stillRunning || !dispatchResult->hasDispatched) continue; if (PQstatus(dispatchResult->segdbDesc->conn) == CONNECTION_BAD) continue; /* * If data remains unsent, send it. Else we might be waiting for the * result of a command the backend hasn't even got yet. */ flushResult = PQflush(dispatchResult->segdbDesc->conn); /* * First time, go through the loop without waiting if we can't * flush, in case we are using multiple network adapters, and * other connections might be able to flush */ if (flushResult > 0) { allsent = false; write_log("flushing didn't finish the work %d", i); } } /* * our first attempt at doing more flushes didn't get everything out, * so we need to continue to try. */ for (i = 0; i < db_count; i++) { dispatchResult = pParms->dispatchResultPtrArray[i]; while (PQisnonblocking(dispatchResult->segdbDesc->conn)) { PQflush(dispatchResult->segdbDesc->conn); PQsetnonblocking(dispatchResult->segdbDesc->conn, FALSE); } } } #endif }
/* * Receive a log stream starting at the specified position. * * If sysidentifier is specified, validate that both the system * identifier and the timeline matches the specified ones * (by sending an extra IDENTIFY_SYSTEM command) * * All received segments will be written to the directory * specified by basedir. * * The stream_stop callback will be called every time data * is received, and whenever a segment is completed. If it returns * true, the streaming will stop and the function * return. As long as it returns false, streaming will continue * indefinitely. * * standby_message_timeout controls how often we send a message * back to the master letting it know our progress, in seconds. * This message will only contain the write location, and never * flush or replay. * * Note: The log position *must* be at a log segment start! */ bool ReceiveXlogStream(PGconn *conn, XLogRecPtr startpos, uint32 timeline, char *sysidentifier, char *basedir, stream_stop_callback stream_stop, int standby_message_timeout, bool rename_partial) { char query[128]; char current_walfile_name[MAXPGPATH]; PGresult *res; char *copybuf = NULL; int64 last_status = -1; XLogRecPtr blockpos = InvalidXLogRecPtr; if (sysidentifier != NULL) { /* Validate system identifier and timeline hasn't changed */ res = PQexec(conn, "IDENTIFY_SYSTEM"); if (PQresultStatus(res) != PGRES_TUPLES_OK) { fprintf(stderr, _("%s: could not send replication command \"%s\": %s"), progname, "IDENTIFY_SYSTEM", PQerrorMessage(conn)); PQclear(res); return false; } if (PQnfields(res) != 3 || PQntuples(res) != 1) { fprintf(stderr, _("%s: could not identify system: got %d rows and %d fields, expected %d rows and %d fields\n"), progname, PQntuples(res), PQnfields(res), 1, 3); PQclear(res); return false; } if (strcmp(sysidentifier, PQgetvalue(res, 0, 0)) != 0) { fprintf(stderr, _("%s: system identifier does not match between base backup and streaming connection\n"), progname); PQclear(res); return false; } if (timeline != atoi(PQgetvalue(res, 0, 1))) { fprintf(stderr, _("%s: timeline does not match between base backup and streaming connection\n"), progname); PQclear(res); return false; } PQclear(res); } /* Initiate the replication stream at specified location */ snprintf(query, sizeof(query), "START_REPLICATION %X/%X", startpos.xlogid, startpos.xrecoff); res = PQexec(conn, query); if (PQresultStatus(res) != PGRES_COPY_BOTH) { fprintf(stderr, _("%s: could not send replication command \"%s\": %s"), progname, "START_REPLICATION", PQresultErrorMessage(res)); PQclear(res); return false; } PQclear(res); /* * Receive the actual xlog data */ while (1) { int r; int xlogoff; int bytes_left; int bytes_written; int64 now; if (copybuf != NULL) { PQfreemem(copybuf); copybuf = NULL; } /* * Check if we should continue streaming, or abort at this point. */ if (stream_stop && stream_stop(blockpos, timeline, false)) { if (walfile != -1 && !close_walfile(basedir, current_walfile_name, rename_partial)) /* Potential error message is written by close_walfile */ goto error; return true; } /* * Potentially send a status message to the master */ now = localGetCurrentTimestamp(); if (standby_message_timeout > 0 && localTimestampDifferenceExceeds(last_status, now, standby_message_timeout)) { /* Time to send feedback! */ char replybuf[sizeof(StandbyReplyMessage) + 1]; StandbyReplyMessage *replymsg; replymsg = (StandbyReplyMessage *) (replybuf + 1); replymsg->write = blockpos; replymsg->flush = InvalidXLogRecPtr; replymsg->apply = InvalidXLogRecPtr; replymsg->sendTime = now; replybuf[0] = 'r'; if (PQputCopyData(conn, replybuf, sizeof(replybuf)) <= 0 || PQflush(conn)) { fprintf(stderr, _("%s: could not send feedback packet: %s"), progname, PQerrorMessage(conn)); goto error; } last_status = now; } r = PQgetCopyData(conn, ©buf, 1); if (r == 0) { /* * In async mode, and no data available. We block on reading but * not more than the specified timeout, so that we can send a * response back to the client. */ fd_set input_mask; struct timeval timeout; struct timeval *timeoutptr; FD_ZERO(&input_mask); FD_SET(PQsocket(conn), &input_mask); if (standby_message_timeout) { TimestampTz targettime; long secs; int usecs; targettime = TimestampTzPlusMilliseconds(last_status, standby_message_timeout - 1); localTimestampDifference(now, targettime, &secs, &usecs); if (secs <= 0) timeout.tv_sec = 1; /* Always sleep at least 1 sec */ else timeout.tv_sec = secs; timeout.tv_usec = usecs; timeoutptr = &timeout; } else timeoutptr = NULL; r = select(PQsocket(conn) + 1, &input_mask, NULL, NULL, timeoutptr); if (r == 0 || (r < 0 && errno == EINTR)) { /* * Got a timeout or signal. Continue the loop and either * deliver a status packet to the server or just go back into * blocking. */ continue; } else if (r < 0) { fprintf(stderr, _("%s: select() failed: %s\n"), progname, strerror(errno)); goto error; } /* Else there is actually data on the socket */ if (PQconsumeInput(conn) == 0) { fprintf(stderr, _("%s: could not receive data from WAL stream: %s"), progname, PQerrorMessage(conn)); goto error; } continue; } if (r == -1) /* End of copy stream */ break; if (r == -2) { fprintf(stderr, _("%s: could not read COPY data: %s"), progname, PQerrorMessage(conn)); goto error; } if (copybuf[0] == 'k') { /* * keepalive message, sent in 9.2 and newer. We just ignore this * message completely, but need to skip past it in the stream. */ if (r != STREAMING_KEEPALIVE_SIZE) { fprintf(stderr, _("%s: keepalive message has incorrect size %d\n"), progname, r); goto error; } continue; } else if (copybuf[0] != 'w') { fprintf(stderr, _("%s: unrecognized streaming header: \"%c\"\n"), progname, copybuf[0]); goto error; } if (r < STREAMING_HEADER_SIZE + 1) { fprintf(stderr, _("%s: streaming header too small: %d\n"), progname, r); goto error; } /* Extract WAL location for this block */ memcpy(&blockpos, copybuf + 1, 8); xlogoff = blockpos.xrecoff % XLOG_SEG_SIZE; /* * Verify that the initial location in the stream matches where we * think we are. */ if (walfile == -1) { /* No file open yet */ if (xlogoff != 0) { fprintf(stderr, _("%s: received transaction log record for offset %u with no file open\n"), progname, xlogoff); goto error; } } else { /* More data in existing segment */ /* XXX: store seek value don't reseek all the time */ if (lseek(walfile, 0, SEEK_CUR) != xlogoff) { fprintf(stderr, _("%s: got WAL data offset %08x, expected %08x\n"), progname, xlogoff, (int) lseek(walfile, 0, SEEK_CUR)); goto error; } } bytes_left = r - STREAMING_HEADER_SIZE; bytes_written = 0; while (bytes_left) { int bytes_to_write; /* * If crossing a WAL boundary, only write up until we reach * XLOG_SEG_SIZE. */ if (xlogoff + bytes_left > XLOG_SEG_SIZE) bytes_to_write = XLOG_SEG_SIZE - xlogoff; else bytes_to_write = bytes_left; if (walfile == -1) { walfile = open_walfile(blockpos, timeline, basedir, current_walfile_name); if (walfile == -1) /* Error logged by open_walfile */ goto error; } if (write(walfile, copybuf + STREAMING_HEADER_SIZE + bytes_written, bytes_to_write) != bytes_to_write) { fprintf(stderr, _("%s: could not write %u bytes to WAL file \"%s\": %s\n"), progname, bytes_to_write, current_walfile_name, strerror(errno)); goto error; } /* Write was successful, advance our position */ bytes_written += bytes_to_write; bytes_left -= bytes_to_write; XLByteAdvance(blockpos, bytes_to_write); xlogoff += bytes_to_write; /* Did we reach the end of a WAL segment? */ if (blockpos.xrecoff % XLOG_SEG_SIZE == 0) { if (!close_walfile(basedir, current_walfile_name, false)) /* Error message written in close_walfile() */ goto error; xlogoff = 0; if (stream_stop != NULL) { /* * Callback when the segment finished, and return if it * told us to. */ if (stream_stop(blockpos, timeline, true)) return true; } } } /* No more data left to write, start receiving next copy packet */ } /* * The only way to get out of the loop is if the server shut down the * replication stream. If it's a controlled shutdown, the server will send * a shutdown message, and we'll return the latest xlog location that has * been streamed. */ res = PQgetResult(conn); if (PQresultStatus(res) != PGRES_COMMAND_OK) { fprintf(stderr, _("%s: unexpected termination of replication stream: %s"), progname, PQresultErrorMessage(res)); goto error; } PQclear(res); /* Complain if we've not reached stop point yet */ if (stream_stop != NULL && !stream_stop(blockpos, timeline, false)) { fprintf(stderr, _("%s: replication stream was terminated before stop point\n"), progname); goto error; } if (copybuf != NULL) PQfreemem(copybuf); if (walfile != -1 && close(walfile) != 0) fprintf(stderr, _("%s: could not close file \"%s\": %s\n"), progname, current_walfile_name, strerror(errno)); walfile = -1; return true; error: if (copybuf != NULL) PQfreemem(copybuf); if (walfile != -1 && close(walfile) != 0) fprintf(stderr, _("%s: could not close file \"%s\": %s\n"), progname, current_walfile_name, strerror(errno)); walfile = -1; return false; }
/******************************************************************************** * Attempt to flush any data queued to the backend, returns 0 if * successful (or if the send queue is empty) or EOF if it failed for * some reason. int PQflush(PGconn *conn); PQflush needs to be called on a non-blocking connection before calling select to determine if a responce has arrived. If 0 is returned it ensures that there is no data queued to the backend that has not actually been sent. Only applications that have used PQsetnonblocking have a need for this. ********************************************************************************/ int dbi_flush(DBI_conn *conn) { return PQflush(conn); }