pg_tz * pg_tzenumerate_next(pg_tzenum *dir) { while (dir->depth >= 0) { struct dirent *direntry; char fullname[MAXPGPATH]; struct stat statbuf; direntry = ReadDir(dir->dirdesc[dir->depth], dir->dirname[dir->depth]); if (!direntry) { /* End of this directory */ FreeDir(dir->dirdesc[dir->depth]); pfree(dir->dirname[dir->depth]); dir->depth--; continue; } if (direntry->d_name[0] == '.') continue; snprintf(fullname, MAXPGPATH, "%s/%s", dir->dirname[dir->depth], direntry->d_name); if (stat(fullname, &statbuf) != 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not stat \"%s\": %m", fullname))); if (S_ISDIR(statbuf.st_mode)) { /* Step into the subdirectory */ if (dir->depth >= MAX_TZDIR_DEPTH - 1) ereport(ERROR, (errmsg_internal("timezone directory stack overflow"))); dir->depth++; dir->dirname[dir->depth] = pstrdup(fullname); dir->dirdesc[dir->depth] = AllocateDir(fullname); if (!dir->dirdesc[dir->depth]) ereport(ERROR, (errcode_for_file_access(), errmsg("could not open directory \"%s\": %m", fullname))); /* Start over reading in the new directory */ continue; } /* * Load this timezone using tzload() not pg_tzset(), so we don't fill * the cache */ if (tzload(fullname + dir->baselen, dir->tz.TZname, &dir->tz.state, TRUE) != 0) { /* Zone could not be loaded, ignore it */ continue; } if (!pg_tz_acceptable(&dir->tz)) { /* Ignore leap-second zones */ continue; } /* Timezone loaded OK. */ return &dir->tz; } /* Nothing more found */ return NULL; }
/* * While checking the mirroring configuration, call this procedure to set * that we are going to attempt connect and mirror to the standby. */ void FtsSetQDMirroring(Segment *seginfo) { int len; bool connectedToWalSendServer; bool walSendServerOk; LWLockAcquire(ftsQDMirrorLock, LW_EXCLUSIVE); if (ftsQDMirrorInfo->configurationChecked) { // Someone else finished the configuration check work first. LWLockRelease(ftsQDMirrorLock); return; } Assert(ftsQDMirrorInfo->state == QDMIRROR_STATE_NONE); len = strlen(seginfo->hostname); if (len >= sizeof(ftsQDMirrorInfo->name)) { LWLockRelease(ftsQDMirrorLock); elog(ERROR, "hostname too long for fts master mirror configuration storage"); } memcpy(ftsQDMirrorInfo->name, seginfo->hostname, len); ftsQDMirrorInfo->name[len] = 0; ftsQDMirrorInfo->port = seginfo->port; //ftsQDMirrorInfo->dbid = seginfo->dbid; //ftsQDMirrorInfo->valid = dbinfo->valid; /* * Connect to our WAL Send server, but not under lock. * */ ftsQDMirrorInfo->configurationChecked = true; if (Master_mirroring_administrator_disable) { LWLockRelease(ftsQDMirrorLock); disableQDMirroring_AdministratorDisabled(); return; } ftsQDMirrorInfo->state = QDMIRROR_STATE_CONNECTINGWALSENDSERVER; ftsQDMirrorInfo->updateMask |= QDMIRROR_UPDATEMASK_MASTERMIRRORING; //elog(NOTICE, "Master mirroring synchronizing"); /* Get the log message time from the NOTICE we just issued. */ gettimeofday(&ftsQDMirrorInfo->lastLogTimeVal, NULL); LWLockRelease(ftsQDMirrorLock); connectedToWalSendServer = WalSendServerClientConnect(/* complain */ true); if (!connectedToWalSendServer) { disableQDMirroring_WalSendServerError("Cannot connect to the WAL Send server"); return; } LWLockAcquire(ftsQDMirrorLock, LW_EXCLUSIVE); if (ftsQDMirrorInfo->state != QDMIRROR_STATE_CONNECTINGWALSENDSERVER) { /* * We've changed state while unlocked. */ if (ftsQDMirrorInfo->state == QDMIRROR_STATE_DISABLED) { LWLockRelease(ftsQDMirrorLock); elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "Master mirroring already disabled (connecting)"); return; } else { QDMIRRORState state = ftsQDMirrorInfo->state; LWLockRelease(ftsQDMirrorLock); ereport(ERROR, (errmsg_internal("Unexpected master mirroring state '%s'", QDMirroringStateToString(state)))); } } ftsQDMirrorInfo->state = QDMIRROR_STATE_POSITIONINGTOEND; LWLockRelease(ftsQDMirrorLock); walSendServerOk = XLogQDMirrorPositionToEnd(); if (!walSendServerOk) { disableQDMirroring_WalSendServerError("Error occurred during the PositionToEnd request to the WAL Send server"); return; } LWLockAcquire(ftsQDMirrorLock, LW_EXCLUSIVE); if (ftsQDMirrorInfo->state != QDMIRROR_STATE_POSITIONINGTOEND) { /* * We've changed state while unlocked. */ if (ftsQDMirrorInfo->state == QDMIRROR_STATE_DISABLED) { LWLockRelease(ftsQDMirrorLock); elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "Master mirroring already disabled (request PositionToEnd -- could not send to WAL Send server)"); return; } else { QDMIRRORState state = ftsQDMirrorInfo->state; LWLockRelease(ftsQDMirrorLock); elog(ERROR,"Unexpected master mirror state '%s'", QDMirroringStateToString(state)); } } ftsQDMirrorInfo->state = QDMIRROR_STATE_CATCHUPPENDING; LWLockRelease(ftsQDMirrorLock); elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "positioning master mirror to end complete -- next step is catch-up"); }
int pgpipe(int handles[2]) { SOCKET s; struct sockaddr_in serv_addr; int len = sizeof(serv_addr); handles[0] = handles[1] = INVALID_SOCKET; if ((s = socket(AF_INET, SOCK_STREAM, 0)) == INVALID_SOCKET) { ereport(LOG, (errmsg_internal("pgpipe could not create socket: %ui", WSAGetLastError()))); return -1; } memset((void *) &serv_addr, 0, sizeof(serv_addr)); serv_addr.sin_family = AF_INET; serv_addr.sin_port = htons(0); serv_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); if (bind(s, (SOCKADDR *) &serv_addr, len) == SOCKET_ERROR) { ereport(LOG, (errmsg_internal("pgpipe could not bind: %ui", WSAGetLastError()))); closesocket(s); return -1; } if (listen(s, 1) == SOCKET_ERROR) { ereport(LOG, (errmsg_internal("pgpipe could not listen: %ui", WSAGetLastError()))); closesocket(s); return -1; } if (getsockname(s, (SOCKADDR *) &serv_addr, &len) == SOCKET_ERROR) { ereport(LOG, (errmsg_internal("pgpipe could not getsockname: %ui", WSAGetLastError()))); closesocket(s); return -1; } if ((handles[1] = socket(PF_INET, SOCK_STREAM, 0)) == INVALID_SOCKET) { ereport(LOG, (errmsg_internal("pgpipe could not create socket 2: %ui", WSAGetLastError()))); closesocket(s); return -1; } if (connect(handles[1], (SOCKADDR *) &serv_addr, len) == SOCKET_ERROR) { ereport(LOG, (errmsg_internal("pgpipe could not connect socket: %ui", WSAGetLastError()))); closesocket(s); return -1; } if ((handles[0] = accept(s, (SOCKADDR *) &serv_addr, &len)) == INVALID_SOCKET) { ereport(LOG, (errmsg_internal("pgpipe could not accept socket: %ui", WSAGetLastError()))); closesocket(handles[1]); handles[1] = INVALID_SOCKET; closesocket(s); return -1; } closesocket(s); return 0; }
/* * Attempt to execute an external shell command during recovery. * * 'command' is the shell command to be executed, 'commandName' is a * human-readable name describing the command emitted in the logs. If * 'failOnSignal' is true and the command is killed by a signal, a FATAL * error is thrown. Otherwise a WARNING is emitted. * * This is currently used for recovery_end_command and archive_cleanup_command. */ void ExecuteRecoveryCommand(char *command, char *commandName, bool failOnSignal) { char xlogRecoveryCmd[MAXPGPATH]; char lastRestartPointFname[MAXPGPATH]; char *dp; char *endp; const char *sp; int rc; bool signaled; XLogSegNo restartSegNo; XLogRecPtr restartRedoPtr; TimeLineID restartTli; Assert(command && commandName); /* * Calculate the archive file cutoff point for use during log shipping * replication. All files earlier than this point can be deleted from the * archive, though there is no requirement to do so. */ GetOldestRestartPoint(&restartRedoPtr, &restartTli); XLByteToSeg(restartRedoPtr, restartSegNo); XLogFileName(lastRestartPointFname, restartTli, restartSegNo); /* * construct the command to be executed */ dp = xlogRecoveryCmd; endp = xlogRecoveryCmd + MAXPGPATH - 1; *endp = '\0'; for (sp = command; *sp; sp++) { if (*sp == '%') { switch (sp[1]) { case 'r': /* %r: filename of last restartpoint */ sp++; StrNCpy(dp, lastRestartPointFname, endp - dp); dp += strlen(dp); break; case '%': /* convert %% to a single % */ sp++; if (dp < endp) *dp++ = *sp; break; default: /* otherwise treat the % as not special */ if (dp < endp) *dp++ = *sp; break; } } else { if (dp < endp) *dp++ = *sp; } } *dp = '\0'; ereport(DEBUG3, (errmsg_internal("executing %s \"%s\"", commandName, command))); /* * execute the constructed command */ rc = system(xlogRecoveryCmd); if (rc != 0) { /* * If the failure was due to any sort of signal, it's best to punt and * abort recovery. See also detailed comments on signals in * RestoreArchivedFile(). */ signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125; ereport((signaled && failOnSignal) ? FATAL : WARNING, /*------ translator: First %s represents a recovery.conf parameter name like "recovery_end_command", and the 2nd is the value of that parameter. */ (errmsg("%s \"%s\": return code %d", commandName, command, rc))); } }
/* * Attempt to retrieve the specified file from off-line archival storage. * If successful, fill "path" with its complete path (note that this will be * a temp file name that doesn't follow the normal naming convention), and * return TRUE. * * If not successful, fill "path" with the name of the normal on-line file * (which may or may not actually exist, but we'll try to use it), and return * FALSE. * * For fixed-size files, the caller may pass the expected size as an * additional crosscheck on successful recovery. If the file size is not * known, set expectedSize = 0. */ bool RestoreArchivedFile(char *path, const char *xlogfname, const char *recovername, off_t expectedSize) { char xlogpath[MAXPGPATH]; char xlogRestoreCmd[MAXPGPATH]; char lastRestartPointFname[MAXPGPATH]; char *dp; char *endp; const char *sp; int rc; bool signaled; struct stat stat_buf; XLogSegNo restartSegNo; XLogRecPtr restartRedoPtr; TimeLineID restartTli; /* In standby mode, restore_command might not be supplied */ if (recoveryRestoreCommand == NULL) goto not_available; /* * When doing archive recovery, we always prefer an archived log file even * if a file of the same name exists in XLOGDIR. The reason is that the * file in XLOGDIR could be an old, un-filled or partly-filled version * that was copied and restored as part of backing up $PGDATA. * * We could try to optimize this slightly by checking the local copy * lastchange timestamp against the archived copy, but we have no API to * do this, nor can we guarantee that the lastchange timestamp was * preserved correctly when we copied to archive. Our aim is robustness, * so we elect not to do this. * * If we cannot obtain the log file from the archive, however, we will try * to use the XLOGDIR file if it exists. This is so that we can make use * of log segments that weren't yet transferred to the archive. * * Notice that we don't actually overwrite any files when we copy back * from archive because the restore_command may inadvertently * restore inappropriate xlogs, or they may be corrupt, so we may wish to * fallback to the segments remaining in current XLOGDIR later. The * copy-from-archive filename is always the same, ensuring that we don't * run out of disk space on long recoveries. */ snprintf(xlogpath, MAXPGPATH, XLOGDIR "/%s", recovername); /* * Make sure there is no existing file named recovername. */ if (stat(xlogpath, &stat_buf) != 0) { if (errno != ENOENT) ereport(FATAL, (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", xlogpath))); } else { if (unlink(xlogpath) != 0) ereport(FATAL, (errcode_for_file_access(), errmsg("could not remove file \"%s\": %m", xlogpath))); } /* * Calculate the archive file cutoff point for use during log shipping * replication. All files earlier than this point can be deleted from the * archive, though there is no requirement to do so. * * We initialise this with the filename of an InvalidXLogRecPtr, which * will prevent the deletion of any WAL files from the archive because of * the alphabetic sorting property of WAL filenames. * * Once we have successfully located the redo pointer of the checkpoint * from which we start recovery we never request a file prior to the redo * pointer of the last restartpoint. When redo begins we know that we have * successfully located it, so there is no need for additional status * flags to signify the point when we can begin deleting WAL files from * the archive. */ GetOldestRestartPoint(&restartRedoPtr, &restartTli); if (!XLogRecPtrIsInvalid(restartRedoPtr)) { XLByteToSeg(restartRedoPtr, restartSegNo); XLogFileName(lastRestartPointFname, restartTli, restartSegNo); /* we shouldn't need anything earlier than last restart point */ Assert(strcmp(lastRestartPointFname, xlogfname) <= 0); } else XLogFileName(lastRestartPointFname, 0, 0L); /* * construct the command to be executed */ dp = xlogRestoreCmd; endp = xlogRestoreCmd + MAXPGPATH - 1; *endp = '\0'; for (sp = recoveryRestoreCommand; *sp; sp++) { if (*sp == '%') { switch (sp[1]) { case 'p': /* %p: relative path of target file */ sp++; StrNCpy(dp, xlogpath, endp - dp); make_native_path(dp); dp += strlen(dp); break; case 'f': /* %f: filename of desired file */ sp++; StrNCpy(dp, xlogfname, endp - dp); dp += strlen(dp); break; case 'r': /* %r: filename of last restartpoint */ sp++; StrNCpy(dp, lastRestartPointFname, endp - dp); dp += strlen(dp); break; case '%': /* convert %% to a single % */ sp++; if (dp < endp) *dp++ = *sp; break; default: /* otherwise treat the % as not special */ if (dp < endp) *dp++ = *sp; break; } } else { if (dp < endp) *dp++ = *sp; } } *dp = '\0'; ereport(DEBUG3, (errmsg_internal("executing restore command \"%s\"", xlogRestoreCmd))); /* * Check signals before restore command and reset afterwards. */ PreRestoreCommand(); /* * Copy xlog from archival storage to XLOGDIR */ rc = system(xlogRestoreCmd); PostRestoreCommand(); if (rc == 0) { /* * command apparently succeeded, but let's make sure the file is * really there now and has the correct size. */ if (stat(xlogpath, &stat_buf) == 0) { if (expectedSize > 0 && stat_buf.st_size != expectedSize) { int elevel; /* * If we find a partial file in standby mode, we assume it's * because it's just being copied to the archive, and keep * trying. * * Otherwise treat a wrong-sized file as FATAL to ensure the * DBA would notice it, but is that too strong? We could try * to plow ahead with a local copy of the file ... but the * problem is that there probably isn't one, and we'd * incorrectly conclude we've reached the end of WAL and we're * done recovering ... */ if (StandbyMode && stat_buf.st_size < expectedSize) elevel = DEBUG1; else elevel = FATAL; ereport(elevel, (errmsg("archive file \"%s\" has wrong size: %lu instead of %lu", xlogfname, (unsigned long) stat_buf.st_size, (unsigned long) expectedSize))); return false; } else { ereport(LOG, (errmsg("restored log file \"%s\" from archive", xlogfname))); strcpy(path, xlogpath); return true; } } else { /* stat failed */ if (errno != ENOENT) ereport(FATAL, (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", xlogpath))); } } /* * Remember, we rollforward UNTIL the restore fails so failure here is * just part of the process... that makes it difficult to determine * whether the restore failed because there isn't an archive to restore, * or because the administrator has specified the restore program * incorrectly. We have to assume the former. * * However, if the failure was due to any sort of signal, it's best to * punt and abort recovery. (If we "return false" here, upper levels will * assume that recovery is complete and start up the database!) It's * essential to abort on child SIGINT and SIGQUIT, because per spec * system() ignores SIGINT and SIGQUIT while waiting; if we see one of * those it's a good bet we should have gotten it too. * * On SIGTERM, assume we have received a fast shutdown request, and exit * cleanly. It's pure chance whether we receive the SIGTERM first, or the * child process. If we receive it first, the signal handler will call * proc_exit, otherwise we do it here. If we or the child process received * SIGTERM for any other reason than a fast shutdown request, postmaster * will perform an immediate shutdown when it sees us exiting * unexpectedly. * * Per the Single Unix Spec, shells report exit status > 128 when a called * command died on a signal. Also, 126 and 127 are used to report * problems such as an unfindable command; treat those as fatal errors * too. */ if (WIFSIGNALED(rc) && WTERMSIG(rc) == SIGTERM) proc_exit(1); signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125; ereport(signaled ? FATAL : DEBUG2, (errmsg("could not restore file \"%s\" from archive: return code %d", xlogfname, rc))); not_available: /* * if an archived file is not available, there might still be a version of * this file in XLOGDIR, so return that as the filename to open. * * In many recovery scenarios we expect this to fail also, but if so that * just means we've reached the end of WAL. */ snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlogfname); return false; }
/* * pgarch_archiveXlog * * Invokes system(3) to copy one archive file to wherever it should go * * Returns true if successful */ static bool pgarch_archiveXlog(char *xlog) { char xlogarchcmd[MAXPGPATH]; char pathname[MAXPGPATH]; char activitymsg[MAXFNAMELEN + 16]; char *dp; char *endp; const char *sp; int rc; snprintf(pathname, MAXPGPATH, XLOGDIR "/%s", xlog); /* * construct the command to be executed */ dp = xlogarchcmd; endp = xlogarchcmd + MAXPGPATH - 1; *endp = '\0'; for (sp = XLogArchiveCommand; *sp; sp++) { if (*sp == '%') { switch (sp[1]) { case 'p': /* %p: relative path of source file */ sp++; strlcpy(dp, pathname, endp - dp); make_native_path(dp); dp += strlen(dp); break; case 'f': /* %f: filename of source file */ sp++; strlcpy(dp, xlog, endp - dp); dp += strlen(dp); break; case '%': /* convert %% to a single % */ sp++; if (dp < endp) *dp++ = *sp; break; default: /* otherwise treat the % as not special */ if (dp < endp) *dp++ = *sp; break; } } else { if (dp < endp) *dp++ = *sp; } } *dp = '\0'; ereport(DEBUG3, (errmsg_internal("executing archive command \"%s\"", xlogarchcmd))); /* Report archive activity in PS display */ snprintf(activitymsg, sizeof(activitymsg), "archiving %s", xlog); set_ps_display(activitymsg, false); rc = system(xlogarchcmd); if (rc != 0) { /* * If either the shell itself, or a called command, died on a signal, * abort the archiver. We do this because system() ignores SIGINT and * SIGQUIT while waiting; so a signal is very likely something that * should have interrupted us too. If we overreact it's no big deal, * the postmaster will just start the archiver again. * * Per the Single Unix Spec, shells report exit status > 128 when a * called command died on a signal. */ int lev = (WIFSIGNALED(rc) || WEXITSTATUS(rc) > 128) ? FATAL : LOG; if (WIFEXITED(rc)) { ereport(lev, (errmsg("archive command failed with exit code %d", WEXITSTATUS(rc)), errdetail("The failed archive command was: %s", xlogarchcmd))); } else if (WIFSIGNALED(rc)) { #if defined(WIN32) ereport(lev, (errmsg("archive command was terminated by exception 0x%X", WTERMSIG(rc)), errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."), errdetail("The failed archive command was: %s", xlogarchcmd))); #elif defined(HAVE_DECL_SYS_SIGLIST) && HAVE_DECL_SYS_SIGLIST ereport(lev, (errmsg("archive command was terminated by signal %d: %s", WTERMSIG(rc), WTERMSIG(rc) < NSIG ? sys_siglist[WTERMSIG(rc)] : "(unknown)"), errdetail("The failed archive command was: %s", xlogarchcmd))); #else ereport(lev, (errmsg("archive command was terminated by signal %d", WTERMSIG(rc)), errdetail("The failed archive command was: %s", xlogarchcmd))); #endif } else { ereport(lev, (errmsg("archive command exited with unrecognized status %d", rc), errdetail("The failed archive command was: %s", xlogarchcmd))); } snprintf(activitymsg, sizeof(activitymsg), "failed on %s", xlog); set_ps_display(activitymsg, false); return false; } ereport(DEBUG1, (errmsg("archived transaction log file \"%s\"", xlog))); snprintf(activitymsg, sizeof(activitymsg), "last was %s", xlog); set_ps_display(activitymsg, false); return true; }
/* * Set up for tuple conversion, matching input and output columns by * position. (Dropped columns are ignored in both input and output.) * * Note: the errdetail messages speak of indesc as the "returned" rowtype, * outdesc as the "expected" rowtype. This is okay for current uses but * might need generalization in future. */ TupleConversionMap * convert_tuples_by_position(TupleDesc indesc, TupleDesc outdesc, const char *msg) { TupleConversionMap *map; AttrNumber *attrMap; int nincols; int noutcols; int n; int i; int j; bool same; /* Verify compatibility and prepare attribute-number map */ n = outdesc->natts; attrMap = (AttrNumber *) palloc0(n * sizeof(AttrNumber)); j = 0; /* j is next physical input attribute */ nincols = noutcols = 0; /* these count non-dropped attributes */ same = true; for (i = 0; i < n; i++) { Form_pg_attribute att = outdesc->attrs[i]; Oid atttypid; int32 atttypmod; if (att->attisdropped) continue; /* attrMap[i] is already 0 */ noutcols++; atttypid = att->atttypid; atttypmod = att->atttypmod; for (; j < indesc->natts; j++) { att = indesc->attrs[j]; if (att->attisdropped) continue; nincols++; /* Found matching column, check type */ if (atttypid != att->atttypid || (atttypmod != att->atttypmod && atttypmod >= 0)) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg_internal("%s", _(msg)), errdetail("Returned type %s does not match expected type %s in column %d.", format_type_with_typemod(att->atttypid, att->atttypmod), format_type_with_typemod(atttypid, atttypmod), noutcols))); attrMap[i] = (AttrNumber) (j + 1); j++; break; } if (attrMap[i] == 0) same = false; /* we'll complain below */ } /* Check for unused input columns */ for (; j < indesc->natts; j++) { if (indesc->attrs[j]->attisdropped) continue; nincols++; same = false; /* we'll complain below */ } /* Report column count mismatch using the non-dropped-column counts */ if (!same) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg_internal("%s", _(msg)), errdetail("Number of returned columns (%d) does not match " "expected column count (%d).", nincols, noutcols))); /* * Check to see if the map is one-to-one and the tuple types are the same. * (We check the latter because if they're not, we want to do conversion * to inject the right OID into the tuple datum.) */ if (indesc->natts == outdesc->natts && indesc->tdtypeid == outdesc->tdtypeid) { for (i = 0; i < n; i++) { if (attrMap[i] == (i + 1)) continue; /* * If it's a dropped column and the corresponding input column is * also dropped, we needn't convert. However, attlen and attalign * must agree. */ if (attrMap[i] == 0 && indesc->attrs[i]->attisdropped && indesc->attrs[i]->attlen == outdesc->attrs[i]->attlen && indesc->attrs[i]->attalign == outdesc->attrs[i]->attalign) continue; same = false; break; } } else same = false; if (same) { /* Runtime conversion is not needed */ pfree(attrMap); return NULL; } /* Prepare the map structure */ map = (TupleConversionMap *) palloc(sizeof(TupleConversionMap)); map->indesc = indesc; map->outdesc = outdesc; map->attrMap = attrMap; /* preallocate workspace for Datum arrays */ map->outvalues = (Datum *) palloc(n * sizeof(Datum)); map->outisnull = (bool *) palloc(n * sizeof(bool)); n = indesc->natts + 1; /* +1 for NULL */ map->invalues = (Datum *) palloc(n * sizeof(Datum)); map->inisnull = (bool *) palloc(n * sizeof(bool)); map->invalues[0] = (Datum) 0; /* set up the NULL entry */ map->inisnull[0] = true; return map; }
/* * Emit a PG error or notice, together with any available info about * the current Python error, previously set by PLy_exception_set(). * This should be used to propagate Python errors into PG. If fmt is * NULL, the Python error becomes the primary error message, otherwise * it becomes the detail. If there is a Python traceback, it is put * in the context. */ void PLy_elog(int elevel, const char *fmt,...) { char *xmsg; char *tbmsg; int tb_depth; StringInfoData emsg; PyObject *exc, *val, *tb; const char *primary = NULL; int sqlerrcode = 0; char *detail = NULL; char *hint = NULL; char *query = NULL; int position = 0; PyErr_Fetch(&exc, &val, &tb); if (exc != NULL) { PyErr_NormalizeException(&exc, &val, &tb); if (PyErr_GivenExceptionMatches(val, PLy_exc_spi_error)) PLy_get_spi_error_data(val, &sqlerrcode, &detail, &hint, &query, &position); else if (PyErr_GivenExceptionMatches(val, PLy_exc_fatal)) elevel = FATAL; } /* this releases our refcount on tb! */ PLy_traceback(exc, val, tb, &xmsg, &tbmsg, &tb_depth); if (fmt) { initStringInfo(&emsg); for (;;) { va_list ap; int needed; va_start(ap, fmt); needed = appendStringInfoVA(&emsg, dgettext(TEXTDOMAIN, fmt), ap); va_end(ap); if (needed == 0) break; enlargeStringInfo(&emsg, needed); } primary = emsg.data; /* Since we have a format string, we cannot have a SPI detail. */ Assert(detail == NULL); /* If there's an exception message, it goes in the detail. */ if (xmsg) detail = xmsg; } else { if (xmsg) primary = xmsg; } PG_TRY(); { ereport(elevel, (errcode(sqlerrcode ? sqlerrcode : ERRCODE_EXTERNAL_ROUTINE_EXCEPTION), errmsg_internal("%s", primary ? primary : "no exception data"), (detail) ? errdetail_internal("%s", detail) : 0, (tb_depth > 0 && tbmsg) ? errcontext("%s", tbmsg) : 0, (hint) ? errhint("%s", hint) : 0, (query) ? internalerrquery(query) : 0, (position) ? internalerrposition(position) : 0)); } PG_CATCH(); { if (fmt) pfree(emsg.data); if (xmsg) pfree(xmsg); if (tbmsg) pfree(tbmsg); Py_XDECREF(exc); Py_XDECREF(val); PG_RE_THROW(); } PG_END_TRY(); if (fmt) pfree(emsg.data); if (xmsg) pfree(xmsg); if (tbmsg) pfree(tbmsg); Py_XDECREF(exc); Py_XDECREF(val); }
static const char * identify_system_timezone(void) { int i; char tzname[128]; char localtzname[256]; time_t t = time(NULL); struct tm *tm = localtime(&t); HKEY rootKey; int idx; if (!tm) { ereport(WARNING, (errmsg_internal("could not determine current date/time: localtime failed"))); return NULL; } memset(tzname, 0, sizeof(tzname)); strftime(tzname, sizeof(tzname) - 1, "%Z", tm); for (i = 0; win32_tzmap[i].stdname != NULL; i++) { if (strcmp(tzname, win32_tzmap[i].stdname) == 0 || strcmp(tzname, win32_tzmap[i].dstname) == 0) { elog(DEBUG4, "TZ \"%s\" matches Windows timezone \"%s\"", win32_tzmap[i].pgtzname, tzname); return win32_tzmap[i].pgtzname; } } /* * Localized Windows versions return localized names for the timezone. * Scan the registry to find the English name, and then try matching * against our table again. */ memset(localtzname, 0, sizeof(localtzname)); if (RegOpenKeyEx(HKEY_LOCAL_MACHINE, "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Time Zones", 0, KEY_READ, &rootKey) != ERROR_SUCCESS) { ereport(WARNING, (errmsg_internal("could not open registry key to identify Windows timezone: %i", (int) GetLastError()))); return NULL; } for (idx = 0;; idx++) { char keyname[256]; char zonename[256]; DWORD namesize; FILETIME lastwrite; HKEY key; LONG r; memset(keyname, 0, sizeof(keyname)); namesize = sizeof(keyname); if ((r = RegEnumKeyEx(rootKey, idx, keyname, &namesize, NULL, NULL, NULL, &lastwrite)) != ERROR_SUCCESS) { if (r == ERROR_NO_MORE_ITEMS) break; ereport(WARNING, (errmsg_internal("could not enumerate registry subkeys to identify Windows timezone: %i", (int) r))); break; } if ((r = RegOpenKeyEx(rootKey, keyname, 0, KEY_READ, &key)) != ERROR_SUCCESS) { ereport(WARNING, (errmsg_internal("could not open registry subkey to identify Windows timezone: %i", (int) r))); break; } memset(zonename, 0, sizeof(zonename)); namesize = sizeof(zonename); if ((r = RegQueryValueEx(key, "Std", NULL, NULL, zonename, &namesize)) != ERROR_SUCCESS) { ereport(WARNING, (errmsg_internal("could not query value for 'std' to identify Windows timezone \"%s\": %i", keyname, (int) r))); RegCloseKey(key); continue; /* Proceed to look at the next timezone */ } if (strcmp(tzname, zonename) == 0) { /* Matched zone */ strcpy(localtzname, keyname); RegCloseKey(key); break; } memset(zonename, 0, sizeof(zonename)); namesize = sizeof(zonename); if ((r = RegQueryValueEx(key, "Dlt", NULL, NULL, zonename, &namesize)) != ERROR_SUCCESS) { ereport(WARNING, (errmsg_internal("could not query value for 'dlt' to identify Windows timezone \"%s\": %i", keyname, (int) r))); RegCloseKey(key); continue; /* Proceed to look at the next timezone */ } if (strcmp(tzname, zonename) == 0) { /* Matched DST zone */ strcpy(localtzname, keyname); RegCloseKey(key); break; } RegCloseKey(key); } RegCloseKey(rootKey); if (localtzname[0]) { /* Found a localized name, so scan for that one too */ for (i = 0; win32_tzmap[i].stdname != NULL; i++) { if (strcmp(localtzname, win32_tzmap[i].stdname) == 0 || strcmp(localtzname, win32_tzmap[i].dstname) == 0) { elog(DEBUG4, "TZ \"%s\" matches localized Windows timezone \"%s\" (\"%s\")", win32_tzmap[i].pgtzname, tzname, localtzname); return win32_tzmap[i].pgtzname; } } } ereport(WARNING, (errmsg("could not find a match for Windows timezone \"%s\"", tzname))); return NULL; }
/* * Convert the last socket error code into errno */ static void TranslateSocketError(void) { switch (WSAGetLastError()) { case WSANOTINITIALISED: case WSAENETDOWN: case WSAEINPROGRESS: case WSAEINVAL: case WSAESOCKTNOSUPPORT: case WSAEFAULT: case WSAEINVALIDPROVIDER: case WSAEINVALIDPROCTABLE: case WSAEMSGSIZE: errno = EINVAL; break; case WSAEAFNOSUPPORT: errno = EAFNOSUPPORT; break; case WSAEMFILE: errno = EMFILE; break; case WSAENOBUFS: errno = ENOBUFS; break; case WSAEPROTONOSUPPORT: case WSAEPROTOTYPE: errno = EPROTONOSUPPORT; break; case WSAECONNREFUSED: errno = ECONNREFUSED; break; case WSAEINTR: errno = EINTR; break; case WSAENOTSOCK: errno = EBADFD; break; case WSAEOPNOTSUPP: errno = EOPNOTSUPP; break; case WSAEWOULDBLOCK: errno = EWOULDBLOCK; break; case WSAEACCES: errno = EACCES; break; case WSAENOTCONN: case WSAENETRESET: case WSAECONNRESET: case WSAESHUTDOWN: case WSAECONNABORTED: case WSAEDISCON: errno = ECONNREFUSED; /* ENOTCONN? */ break; default: ereport(NOTICE, (errmsg_internal("Unknown win32 socket error code: %i", WSAGetLastError()))); errno = EINVAL; } }
/* * Wait for activity on one or more sockets. * While waiting, allow signals to run * * NOTE! Currently does not implement exceptfds check, * since it is not used in postgresql! */ int pgwin32_select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, const struct timeval * timeout) { WSAEVENT events[FD_SETSIZE * 2]; /* worst case is readfds totally * different from writefds, so * 2*FD_SETSIZE sockets */ SOCKET sockets[FD_SETSIZE * 2]; int numevents = 0; int i; int r; DWORD timeoutval = WSA_INFINITE; FD_SET outreadfds; FD_SET outwritefds; int nummatches = 0; Assert(exceptfds == NULL); if (pgwin32_poll_signals()) return -1; FD_ZERO(&outreadfds); FD_ZERO(&outwritefds); /* * Write FDs are different in the way that it is only flagged by * WSASelectEvent() if we have tried to write to them first. So try an * empty write */ if (writefds) { for (i = 0; i < writefds->fd_count; i++) { char c; WSABUF buf; DWORD sent; buf.buf = &c; buf.len = 0; r = WSASend(writefds->fd_array[i], &buf, 1, &sent, 0, NULL, NULL); if (r == 0) /* Completed - means things are fine! */ FD_SET(writefds->fd_array[i], &outwritefds); else { /* Not completed */ if (WSAGetLastError() != WSAEWOULDBLOCK) /* * Not completed, and not just "would block", so an error * occured */ FD_SET(writefds->fd_array[i], &outwritefds); } } if (outwritefds.fd_count > 0) { memcpy(writefds, &outwritefds, sizeof(fd_set)); if (readfds) FD_ZERO(readfds); return outwritefds.fd_count; } } /* Now set up for an actual select */ if (timeout != NULL) { /* timeoutval is in milliseconds */ timeoutval = timeout->tv_sec * 1000 + timeout->tv_usec / 1000; } if (readfds != NULL) { for (i = 0; i < readfds->fd_count; i++) { events[numevents] = WSACreateEvent(); sockets[numevents] = readfds->fd_array[i]; numevents++; } } if (writefds != NULL) { for (i = 0; i < writefds->fd_count; i++) { if (!readfds || !FD_ISSET(writefds->fd_array[i], readfds)) { /* If the socket is not in the read list */ events[numevents] = WSACreateEvent(); sockets[numevents] = writefds->fd_array[i]; numevents++; } } } for (i = 0; i < numevents; i++) { int flags = 0; if (readfds && FD_ISSET(sockets[i], readfds)) flags |= FD_READ | FD_ACCEPT | FD_CLOSE; if (writefds && FD_ISSET(sockets[i], writefds)) flags |= FD_WRITE | FD_CLOSE; if (WSAEventSelect(sockets[i], events[i], flags) == SOCKET_ERROR) { TranslateSocketError(); for (i = 0; i < numevents; i++) WSACloseEvent(events[i]); return -1; } } events[numevents] = pgwin32_signal_event; r = WaitForMultipleObjectsEx(numevents + 1, events, FALSE, timeoutval, TRUE); if (r != WAIT_TIMEOUT && r != WAIT_IO_COMPLETION && r != (WAIT_OBJECT_0 + numevents)) { /* * We scan all events, even those not signalled, in case more than one * event has been tagged but Wait.. can only return one. */ WSANETWORKEVENTS resEvents; for (i = 0; i < numevents; i++) { ZeroMemory(&resEvents, sizeof(resEvents)); if (WSAEnumNetworkEvents(sockets[i], events[i], &resEvents) == SOCKET_ERROR) ereport(FATAL, (errmsg_internal("failed to enumerate network events: %i", (int) GetLastError()))); /* Read activity? */ if (readfds && FD_ISSET(sockets[i], readfds)) { if ((resEvents.lNetworkEvents & FD_READ) || (resEvents.lNetworkEvents & FD_ACCEPT) || (resEvents.lNetworkEvents & FD_CLOSE)) { FD_SET(sockets[i], &outreadfds); nummatches++; } } /* Write activity? */ if (writefds && FD_ISSET(sockets[i], writefds)) { if ((resEvents.lNetworkEvents & FD_WRITE) || (resEvents.lNetworkEvents & FD_CLOSE)) { FD_SET(sockets[i], &outwritefds); nummatches++; } } } } /* Clean up all handles */ for (i = 0; i < numevents; i++) { WSAEventSelect(sockets[i], events[i], 0); WSACloseEvent(events[i]); } if (r == WSA_WAIT_TIMEOUT) { if (readfds) FD_ZERO(readfds); if (writefds) FD_ZERO(writefds); return 0; } if (r == WAIT_OBJECT_0 + numevents) { pgwin32_dispatch_queued_signals(); errno = EINTR; if (readfds) FD_ZERO(readfds); if (writefds) FD_ZERO(writefds); return -1; } /* Overwrite socket sets with our resulting values */ if (readfds) memcpy(readfds, &outreadfds, sizeof(fd_set)); if (writefds) memcpy(writefds, &outwritefds, sizeof(fd_set)); return nummatches; }
int pgwin32_recv(SOCKET s, char *buf, int len, int f) { WSABUF wbuf; int r; DWORD b; DWORD flags = f; int n; if (pgwin32_poll_signals()) return -1; wbuf.len = len; wbuf.buf = buf; r = WSARecv(s, &wbuf, 1, &b, &flags, NULL, NULL); if (r != SOCKET_ERROR && b > 0) /* Read succeeded right away */ return b; if (r == SOCKET_ERROR && WSAGetLastError() != WSAEWOULDBLOCK) { TranslateSocketError(); return -1; } /* No error, zero bytes (win2000+) or error+WSAEWOULDBLOCK (<=nt4) */ for (n = 0; n < 5; n++) { if (pgwin32_waitforsinglesocket(s, FD_READ | FD_CLOSE | FD_ACCEPT, INFINITE) == 0) return -1; /* errno already set */ r = WSARecv(s, &wbuf, 1, &b, &flags, NULL, NULL); if (r == SOCKET_ERROR) { if (WSAGetLastError() == WSAEWOULDBLOCK) { /* * There seem to be cases on win2k (at least) where WSARecv * can return WSAEWOULDBLOCK even when * pgwin32_waitforsinglesocket claims the socket is readable. * In this case, just sleep for a moment and try again. We try * up to 5 times - if it fails more than that it's not likely * to ever come back. */ pg_usleep(10000); continue; } TranslateSocketError(); return -1; } return b; } ereport(NOTICE, (errmsg_internal("Failed to read from ready socket (after retries)"))); errno = EWOULDBLOCK; return -1; }
int pgwin32_waitforsinglesocket(SOCKET s, int what, int timeout) { static HANDLE waitevent = INVALID_HANDLE_VALUE; static SOCKET current_socket = -1; static int isUDP = 0; HANDLE events[2]; int r; if (waitevent == INVALID_HANDLE_VALUE) { waitevent = CreateEvent(NULL, TRUE, FALSE, NULL); if (waitevent == INVALID_HANDLE_VALUE) ereport(ERROR, (errmsg_internal("Failed to create socket waiting event: %i", (int) GetLastError()))); } else if (!ResetEvent(waitevent)) ereport(ERROR, (errmsg_internal("Failed to reset socket waiting event: %i", (int) GetLastError()))); /* * make sure we don't multiplex this kernel event object with a different * socket from a previous call */ if (current_socket != s) { if ( current_socket != -1 ) WSAEventSelect(current_socket, waitevent, 0); isUDP = isDataGram(s); } current_socket = s; if (WSAEventSelect(s, waitevent, what) == SOCKET_ERROR) { TranslateSocketError(); return 0; } events[0] = pgwin32_signal_event; events[1] = waitevent; /* * Just a workaround of unknown locking problem with writing in UDP socket * under high load: Client's pgsql backend sleeps infinitely in * WaitForMultipleObjectsEx, pgstat process sleeps in pgwin32_select(). * So, we will wait with small timeout(0.1 sec) and if sockect is still * blocked, try WSASend (see comments in pgwin32_select) and wait again. */ if ((what & FD_WRITE) && isUDP) { for(;;) { r = WaitForMultipleObjectsEx(2, events, FALSE, 100, TRUE); if ( r == WAIT_TIMEOUT ) { char c; WSABUF buf; DWORD sent; buf.buf = &c; buf.len = 0; r = WSASend(s, &buf, 1, &sent, 0, NULL, NULL); if (r == 0) /* Completed - means things are fine! */ return 1; else if ( WSAGetLastError() != WSAEWOULDBLOCK ) { TranslateSocketError(); return 0; } } else break; } } else r = WaitForMultipleObjectsEx(2, events, FALSE, timeout, TRUE); if (r == WAIT_OBJECT_0 || r == WAIT_IO_COMPLETION) { pgwin32_dispatch_queued_signals(); errno = EINTR; return 0; } if (r == WAIT_OBJECT_0 + 1) return 1; if (r == WAIT_TIMEOUT) return 0; ereport(ERROR, (errmsg_internal("Bad return from WaitForMultipleObjects: %i (%i)", r, (int) GetLastError()))); return 0; }
/* * ApplicableOpExpressionList walks over all filter clauses that relate to this * foreign table, and chooses applicable clauses that we know we can translate * into Mongo queries. Currently, these clauses include comparison expressions * that have a column and a constant as arguments. For example, "o_orderdate >= * date '1994-01-01' + interval '1' year" is an applicable expression. */ List * ApplicableOpExpressionList(RelOptInfo *baserel) { List *opExpressionList = NIL; List *restrictInfoList = baserel->baserestrictinfo; ListCell *restrictInfoCell = NULL; foreach(restrictInfoCell, restrictInfoList) { RestrictInfo *restrictInfo = (RestrictInfo *) lfirst(restrictInfoCell); Expr *expression = restrictInfo->clause; NodeTag expressionType = 0; OpExpr *opExpression = NULL; char *operatorName = NULL; char *mongoOperatorName = NULL; List *argumentList = NIL; Var *column = NULL; Const *constant = NULL; bool equalsOperator = false; bool constantIsArray = false; /* we only support operator expressions */ expressionType = nodeTag(expression); if (expressionType != T_OpExpr) { continue; } opExpression = (OpExpr *) expression; operatorName = get_opname(opExpression->opno); /* we only support =, <, >, <=, >=, and <> operators */ if (strncmp(operatorName, EQUALITY_OPERATOR_NAME, NAMEDATALEN) == 0) { equalsOperator = true; } mongoOperatorName = MongoOperatorName(operatorName); if (!equalsOperator && mongoOperatorName == NULL) { ereport(INFO, (errmsg_internal("Ignoring unsupported operator %s", operatorName))); continue; } /* * We only support simple binary operators that compare a column against * a constant. If the expression is a tree, we don't recurse into it. */ argumentList = opExpression->args; column = (Var *) FindArgumentOfType(argumentList, T_Var); constant = (Const *) FindArgumentOfType(argumentList, T_Const); /* * We don't push down operators where the constant is an array, since * conditional operators for arrays in MongoDB aren't properly defined. * For example, {similar_products : [ "B0009S4IJW", "6301964144" ]} * finds results that are equal to the array, but {similar_products: * {$gte: [ "B0009S4IJW", "6301964144" ]}} returns an empty set. */ if (constant != NULL) { Oid constantArrayTypeId = get_element_type(constant->consttype); if (constantArrayTypeId != InvalidOid) { constantIsArray = true; ereport(INFO, (errmsg_internal("Ignoring %s expression with array", operatorName))); } } else { ereport(INFO, (errmsg_internal("Ignoring %s expression without a constant", operatorName))); } if (column != NULL && constant != NULL && !constantIsArray) { opExpressionList = lappend(opExpressionList, opExpression); } }
/* * Generate an ephemeral DH key. Because this can take a long * time to compute, we can use precomputed parameters of the * common key sizes. * * Since few sites will bother to precompute these parameter * files, we also provide a fallback to the parameters provided * by the OpenSSL project. * * These values can be static (once loaded or computed) since * the OpenSSL library can efficiently generate random keys from * the information provided. */ static DH * tmp_dh_cb(SSL *s, int is_export, int keylength) { DH *r = NULL; static DH *dh = NULL; static DH *dh512 = NULL; static DH *dh1024 = NULL; static DH *dh2048 = NULL; static DH *dh4096 = NULL; switch (keylength) { case 512: if (dh512 == NULL) dh512 = load_dh_file(keylength); if (dh512 == NULL) dh512 = load_dh_buffer(file_dh512, sizeof file_dh512); r = dh512; break; case 1024: if (dh1024 == NULL) dh1024 = load_dh_file(keylength); if (dh1024 == NULL) dh1024 = load_dh_buffer(file_dh1024, sizeof file_dh1024); r = dh1024; break; case 2048: if (dh2048 == NULL) dh2048 = load_dh_file(keylength); if (dh2048 == NULL) dh2048 = load_dh_buffer(file_dh2048, sizeof file_dh2048); r = dh2048; break; case 4096: if (dh4096 == NULL) dh4096 = load_dh_file(keylength); if (dh4096 == NULL) dh4096 = load_dh_buffer(file_dh4096, sizeof file_dh4096); r = dh4096; break; default: if (dh == NULL) dh = load_dh_file(keylength); r = dh; } /* this may take a long time, but it may be necessary... */ if (r == NULL || 8 * DH_size(r) < keylength) { ereport(DEBUG2, (errmsg_internal("DH: generating parameters (%d bits)", keylength))); r = DH_generate_parameters(keylength, DH_GENERATOR_2, NULL, NULL); } return r; }
static int pg_SSPI_recvauth(Port *port) { int mtype; StringInfoData buf; SECURITY_STATUS r; CredHandle sspicred; CtxtHandle *sspictx = NULL, newctx; TimeStamp expiry; ULONG contextattr; SecBufferDesc inbuf; SecBufferDesc outbuf; SecBuffer OutBuffers[1]; SecBuffer InBuffers[1]; HANDLE token; TOKEN_USER *tokenuser; DWORD retlen; char accountname[MAXPGPATH]; char domainname[MAXPGPATH]; DWORD accountnamesize = sizeof(accountname); DWORD domainnamesize = sizeof(domainname); SID_NAME_USE accountnameuse; HMODULE secur32; QUERY_SECURITY_CONTEXT_TOKEN_FN _QuerySecurityContextToken; /* * SSPI auth is not supported for protocol versions before 3, because it * relies on the overall message length word to determine the SSPI payload * size in AuthenticationGSSContinue and PasswordMessage messages. * (This is, in fact, a design error in our SSPI support, because protocol * messages are supposed to be parsable without relying on the length * word; but it's not worth changing it now.) */ if (PG_PROTOCOL_MAJOR(FrontendProtocol) < 3) ereport(FATAL, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("SSPI is not supported in protocol version 2"))); /* * Acquire a handle to the server credentials. */ r = AcquireCredentialsHandle(NULL, "negotiate", SECPKG_CRED_INBOUND, NULL, NULL, NULL, NULL, &sspicred, &expiry); if (r != SEC_E_OK) pg_SSPI_error(ERROR, gettext_noop("could not acquire SSPI credentials handle"), r); /* * Loop through SSPI message exchange. This exchange can consist of * multiple messags sent in both directions. First message is always from * the client. All messages from client to server are password packets * (type 'p'). */ do { mtype = pq_getbyte(); if (mtype != 'p') { /* Only log error if client didn't disconnect. */ if (mtype != EOF) ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("expected SSPI response, got message type %d", mtype))); return STATUS_ERROR; } /* Get the actual SSPI token */ initStringInfo(&buf); if (pq_getmessage(&buf, PG_MAX_AUTH_TOKEN_LENGTH)) { /* EOF - pq_getmessage already logged error */ pfree(buf.data); return STATUS_ERROR; } /* Map to SSPI style buffer */ inbuf.ulVersion = SECBUFFER_VERSION; inbuf.cBuffers = 1; inbuf.pBuffers = InBuffers; InBuffers[0].pvBuffer = buf.data; InBuffers[0].cbBuffer = buf.len; InBuffers[0].BufferType = SECBUFFER_TOKEN; /* Prepare output buffer */ OutBuffers[0].pvBuffer = NULL; OutBuffers[0].BufferType = SECBUFFER_TOKEN; OutBuffers[0].cbBuffer = 0; outbuf.cBuffers = 1; outbuf.pBuffers = OutBuffers; outbuf.ulVersion = SECBUFFER_VERSION; elog(DEBUG4, "Processing received SSPI token of length %u", (unsigned int) buf.len); r = AcceptSecurityContext(&sspicred, sspictx, &inbuf, ASC_REQ_ALLOCATE_MEMORY, SECURITY_NETWORK_DREP, &newctx, &outbuf, &contextattr, NULL); /* input buffer no longer used */ pfree(buf.data); if (outbuf.cBuffers > 0 && outbuf.pBuffers[0].cbBuffer > 0) { /* * Negotiation generated data to be sent to the client. */ elog(DEBUG4, "sending SSPI response token of length %u", (unsigned int) outbuf.pBuffers[0].cbBuffer); port->gss->outbuf.length = outbuf.pBuffers[0].cbBuffer; port->gss->outbuf.value = outbuf.pBuffers[0].pvBuffer; sendAuthRequest(port, AUTH_REQ_GSS_CONT); FreeContextBuffer(outbuf.pBuffers[0].pvBuffer); } if (r != SEC_E_OK && r != SEC_I_CONTINUE_NEEDED) { if (sspictx != NULL) { DeleteSecurityContext(sspictx); free(sspictx); } FreeCredentialsHandle(&sspicred); pg_SSPI_error(ERROR, gettext_noop("could not accept SSPI security context"), r); } if (sspictx == NULL) { sspictx = malloc(sizeof(CtxtHandle)); if (sspictx == NULL) ereport(ERROR, (errmsg("out of memory"))); memcpy(sspictx, &newctx, sizeof(CtxtHandle)); } if (r == SEC_I_CONTINUE_NEEDED) elog(DEBUG4, "SSPI continue needed"); } while (r == SEC_I_CONTINUE_NEEDED); /* * Release service principal credentials */ FreeCredentialsHandle(&sspicred); /* * SEC_E_OK indicates that authentication is now complete. * * Get the name of the user that authenticated, and compare it to the pg * username that was specified for the connection. * * MingW is missing the export for QuerySecurityContextToken in the * secur32 library, so we have to load it dynamically. */ secur32 = LoadLibrary("SECUR32.DLL"); if (secur32 == NULL) ereport(ERROR, (errmsg_internal("could not load secur32.dll: %d", (int) GetLastError()))); _QuerySecurityContextToken = (QUERY_SECURITY_CONTEXT_TOKEN_FN) GetProcAddress(secur32, "QuerySecurityContextToken"); if (_QuerySecurityContextToken == NULL) { FreeLibrary(secur32); ereport(ERROR, (errmsg_internal("could not locate QuerySecurityContextToken in secur32.dll: %d", (int) GetLastError()))); } r = (_QuerySecurityContextToken) (sspictx, &token); if (r != SEC_E_OK) { FreeLibrary(secur32); pg_SSPI_error(ERROR, gettext_noop("could not get security token from context"), r); } FreeLibrary(secur32); /* * No longer need the security context, everything from here on uses the * token instead. */ DeleteSecurityContext(sspictx); free(sspictx); if (!GetTokenInformation(token, TokenUser, NULL, 0, &retlen) && GetLastError() != 122) ereport(ERROR, (errmsg_internal("could not get token user size: error code %d", (int) GetLastError()))); tokenuser = malloc(retlen); if (tokenuser == NULL) ereport(ERROR, (errmsg("out of memory"))); if (!GetTokenInformation(token, TokenUser, tokenuser, retlen, &retlen)) ereport(ERROR, (errmsg_internal("could not get user token: error code %d", (int) GetLastError()))); if (!LookupAccountSid(NULL, tokenuser->User.Sid, accountname, &accountnamesize, domainname, &domainnamesize, &accountnameuse)) ereport(ERROR, (errmsg_internal("could not lookup acconut sid: error code %d", (int) GetLastError()))); free(tokenuser); /* * Compare realm/domain if requested. In SSPI, always compare case * insensitive. */ if (pg_krb_realm && strlen(pg_krb_realm)) { if (pg_strcasecmp(pg_krb_realm, domainname)) { elog(DEBUG2, "SSPI domain (%s) and configured domain (%s) don't match", domainname, pg_krb_realm); return STATUS_ERROR; } } /* * We have the username (without domain/realm) in accountname, compare to * the supplied value. In SSPI, always compare case insensitive. */ if (pg_strcasecmp(port->user_name, accountname)) { /* GSS name and PGUSER are not equivalent */ elog(DEBUG2, "provided username (%s) and SSPI username (%s) don't match", port->user_name, accountname); return STATUS_ERROR; } return STATUS_OK; }
/* * Execute an incoming replication command. */ static bool HandleReplicationCommand(const char *cmd_string) { bool replication_started = false; int parse_rc; Node *cmd_node; MemoryContext cmd_context; MemoryContext old_context; elog(DEBUG1, "received replication command: %s", cmd_string); cmd_context = AllocSetContextCreate(CurrentMemoryContext, "Replication command context", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); old_context = MemoryContextSwitchTo(cmd_context); replication_scanner_init(cmd_string); parse_rc = replication_yyparse(); if (parse_rc != 0) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), (errmsg_internal("replication command parser returned %d", parse_rc)))); cmd_node = replication_parse_result; switch (cmd_node->type) { case T_IdentifySystemCmd: IdentifySystem(); break; case T_StartReplicationCmd: StartReplication((StartReplicationCmd *) cmd_node); /* break out of the loop */ replication_started = true; break; case T_BaseBackupCmd: SendBaseBackup((BaseBackupCmd *) cmd_node); /* Send CommandComplete and ReadyForQuery messages */ EndCommand("SELECT", DestRemote); ReadyForQuery(DestRemote); /* ReadyForQuery did pq_flush for us */ break; default: ereport(FATAL, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("invalid standby query string: %s", cmd_string))); } /* done */ MemoryContextSwitchTo(old_context); MemoryContextDelete(cmd_context); return replication_started; }
/* * ProcessQuery * Execute a single plannable query within a PORTAL_MULTI_QUERY * or PORTAL_ONE_RETURNING portal * * parsetree: the query tree * plan: the plan tree for the query * params: any parameters needed * dest: where to send results * completionTag: points to a buffer of size COMPLETION_TAG_BUFSIZE * in which to store a command completion status string. * * completionTag may be NULL if caller doesn't want a status string. * * Must be called in a memory context that will be reset or deleted on * error; otherwise the executor's memory usage will be leaked. */ static void ProcessQuery(Query *parsetree, Plan *plan, ParamListInfo params, DestReceiver *dest, char *completionTag) { int operation = parsetree->commandType; QueryDesc *queryDesc; ereport(DEBUG3, (errmsg_internal("ProcessQuery"))); /* * Must always set snapshot for plannable queries. Note we assume that * caller will take care of restoring ActiveSnapshot on exit/error. */ ActiveSnapshot = CopySnapshot(GetTransactionSnapshot()); /* * Create the QueryDesc object */ queryDesc = CreateQueryDesc(parsetree, plan, ActiveSnapshot, InvalidSnapshot, dest, params, false); /* * Set up to collect AFTER triggers */ AfterTriggerBeginQuery(); /* * Call ExecutorStart to prepare the plan for execution */ ExecutorStart(queryDesc, 0); /* * Run the plan to completion. */ ExecutorRun(queryDesc, ForwardScanDirection, 0L); /* * Build command completion status string, if caller wants one. */ if (completionTag) { Oid lastOid; switch (operation) { case CMD_SELECT: strcpy(completionTag, "SELECT"); break; case CMD_INSERT: if (queryDesc->estate->es_processed == 1) lastOid = queryDesc->estate->es_lastoid; else lastOid = InvalidOid; snprintf(completionTag, COMPLETION_TAG_BUFSIZE, "INSERT %u %u", lastOid, queryDesc->estate->es_processed); break; case CMD_UPDATE: snprintf(completionTag, COMPLETION_TAG_BUFSIZE, "UPDATE %u", queryDesc->estate->es_processed); break; case CMD_DELETE: snprintf(completionTag, COMPLETION_TAG_BUFSIZE, "DELETE %u", queryDesc->estate->es_processed); break; default: strcpy(completionTag, "???"); break; } } /* Now take care of any queued AFTER triggers */ AfterTriggerEndQuery(queryDesc->estate); /* * Now, we close down all the scans and free allocated resources. */ ExecutorEnd(queryDesc); FreeQueryDesc(queryDesc); FreeSnapshot(ActiveSnapshot); ActiveSnapshot = NULL; }
void inv_truncate(LargeObjectDesc *obj_desc, int64 len) { int32 pageno = (int32) (len / LOBLKSIZE); int32 off; ScanKeyData skey[2]; SysScanDesc sd; HeapTuple oldtuple; Form_pg_largeobject olddata; struct { bytea hdr; char data[LOBLKSIZE]; /* make struct big enough */ int32 align_it; /* ensure struct is aligned well enough */ } workbuf; char *workb = VARDATA(&workbuf.hdr); HeapTuple newtup; Datum values[Natts_pg_largeobject]; bool nulls[Natts_pg_largeobject]; bool replace[Natts_pg_largeobject]; CatalogIndexState indstate; Assert(PointerIsValid(obj_desc)); /* enforce writability because snapshot is probably wrong otherwise */ Assert(obj_desc->flags & IFS_WRLOCK); /* * use errmsg_internal here because we don't want to expose INT64_FORMAT * in translatable strings; doing better is not worth the trouble */ if (len < 0 || len > MAX_LARGE_OBJECT_SIZE) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg_internal("invalid large object truncation target: " INT64_FORMAT, len))); open_lo_relation(); indstate = CatalogOpenIndexes(lo_heap_r); /* * Set up to find all pages with desired loid and pageno >= target */ ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(obj_desc->id)); ScanKeyInit(&skey[1], Anum_pg_largeobject_pageno, BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(pageno)); sd = systable_beginscan_ordered(lo_heap_r, lo_index_r, obj_desc->snapshot, 2, skey); /* * If possible, get the page the truncation point is in. The truncation * point may be beyond the end of the LO or in a hole. */ olddata = NULL; if ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL) { if (HeapTupleHasNulls(oldtuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); olddata = (Form_pg_largeobject) GETSTRUCT(oldtuple); Assert(olddata->pageno >= pageno); } /* * If we found the page of the truncation point we need to truncate the * data in it. Otherwise if we're in a hole, we need to create a page to * mark the end of data. */ if (olddata != NULL && olddata->pageno == pageno) { /* First, load old data into workbuf */ bytea *datafield; int pagelen; bool pfreeit; getdatafield(olddata, &datafield, &pagelen, &pfreeit); memcpy(workb, VARDATA(datafield), pagelen); if (pfreeit) pfree(datafield); /* * Fill any hole */ off = len % LOBLKSIZE; if (off > pagelen) MemSet(workb + pagelen, 0, off - pagelen); /* compute length of new page */ SET_VARSIZE(&workbuf.hdr, off + VARHDRSZ); /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); memset(replace, false, sizeof(replace)); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); replace[Anum_pg_largeobject_data - 1] = true; newtup = heap_modify_tuple(oldtuple, RelationGetDescr(lo_heap_r), values, nulls, replace); simple_heap_update(lo_heap_r, &newtup->t_self, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); } else { /* * If the first page we found was after the truncation point, we're in * a hole that we'll fill, but we need to delete the later page * because the loop below won't visit it again. */ if (olddata != NULL) { Assert(olddata->pageno > pageno); simple_heap_delete(lo_heap_r, &oldtuple->t_self); } /* * Write a brand new page. * * Fill the hole up to the truncation point */ off = len % LOBLKSIZE; if (off > 0) MemSet(workb, 0, off); /* compute length of new page */ SET_VARSIZE(&workbuf.hdr, off + VARHDRSZ); /* * Form and insert new tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); values[Anum_pg_largeobject_loid - 1] = ObjectIdGetDatum(obj_desc->id); values[Anum_pg_largeobject_pageno - 1] = Int32GetDatum(pageno); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); newtup = heap_form_tuple(lo_heap_r->rd_att, values, nulls); simple_heap_insert(lo_heap_r, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); } /* * Delete any pages after the truncation point. If the initial search * didn't find a page, then of course there's nothing more to do. */ if (olddata != NULL) { while ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL) { simple_heap_delete(lo_heap_r, &oldtuple->t_self); } } systable_endscan_ordered(sd); CatalogCloseIndexes(indstate); /* * Advance command counter so that tuple updates will be seen by later * large-object operations in this transaction. */ CommandCounterIncrement(); }
/* * Set up for tuple conversion, matching input and output columns by name. * (Dropped columns are ignored in both input and output.) This is intended * for use when the rowtypes are related by inheritance, so we expect an exact * match of both type and typmod. The error messages will be a bit unhelpful * unless both rowtypes are named composite types. */ TupleConversionMap * convert_tuples_by_name(TupleDesc indesc, TupleDesc outdesc, const char *msg) { TupleConversionMap *map; AttrNumber *attrMap; int n; int i; bool same; /* Verify compatibility and prepare attribute-number map */ n = outdesc->natts; attrMap = (AttrNumber *) palloc0(n * sizeof(AttrNumber)); for (i = 0; i < n; i++) { Form_pg_attribute att = outdesc->attrs[i]; char *attname; Oid atttypid; int32 atttypmod; int j; if (att->attisdropped) continue; /* attrMap[i] is already 0 */ attname = NameStr(att->attname); atttypid = att->atttypid; atttypmod = att->atttypmod; for (j = 0; j < indesc->natts; j++) { att = indesc->attrs[j]; if (att->attisdropped) continue; if (strcmp(attname, NameStr(att->attname)) == 0) { /* Found it, check type */ if (atttypid != att->atttypid || atttypmod != att->atttypmod) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg_internal("%s", _(msg)), errdetail("Attribute \"%s\" of type %s does not match corresponding attribute of type %s.", attname, format_type_be(outdesc->tdtypeid), format_type_be(indesc->tdtypeid)))); attrMap[i] = (AttrNumber) (j + 1); break; } } if (attrMap[i] == 0) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg_internal("%s", _(msg)), errdetail("Attribute \"%s\" of type %s does not exist in type %s.", attname, format_type_be(outdesc->tdtypeid), format_type_be(indesc->tdtypeid)))); } /* * Check to see if the map is one-to-one and the tuple types are the same. * (We check the latter because if they're not, we want to do conversion * to inject the right OID into the tuple datum.) */ if (indesc->natts == outdesc->natts && indesc->tdtypeid == outdesc->tdtypeid) { same = true; for (i = 0; i < n; i++) { if (attrMap[i] == (i + 1)) continue; /* * If it's a dropped column and the corresponding input column is * also dropped, we needn't convert. However, attlen and attalign * must agree. */ if (attrMap[i] == 0 && indesc->attrs[i]->attisdropped && indesc->attrs[i]->attlen == outdesc->attrs[i]->attlen && indesc->attrs[i]->attalign == outdesc->attrs[i]->attalign) continue; same = false; break; } } else same = false; if (same) { /* Runtime conversion is not needed */ pfree(attrMap); return NULL; } /* Prepare the map structure */ map = (TupleConversionMap *) palloc(sizeof(TupleConversionMap)); map->indesc = indesc; map->outdesc = outdesc; map->attrMap = attrMap; /* preallocate workspace for Datum arrays */ map->outvalues = (Datum *) palloc(n * sizeof(Datum)); map->outisnull = (bool *) palloc(n * sizeof(bool)); n = indesc->natts + 1; /* +1 for NULL */ map->invalues = (Datum *) palloc(n * sizeof(Datum)); map->inisnull = (bool *) palloc(n * sizeof(bool)); map->invalues[0] = (Datum) 0; /* set up the NULL entry */ map->inisnull[0] = true; return map; }