Ejemplo n.º 1
0
pg_tz *
pg_tzenumerate_next(pg_tzenum *dir)
{
	while (dir->depth >= 0)
	{
		struct dirent *direntry;
		char		fullname[MAXPGPATH];
		struct stat statbuf;

		direntry = ReadDir(dir->dirdesc[dir->depth], dir->dirname[dir->depth]);

		if (!direntry)
		{
			/* End of this directory */
			FreeDir(dir->dirdesc[dir->depth]);
			pfree(dir->dirname[dir->depth]);
			dir->depth--;
			continue;
		}

		if (direntry->d_name[0] == '.')
			continue;

		snprintf(fullname, MAXPGPATH, "%s/%s",
				 dir->dirname[dir->depth], direntry->d_name);
		if (stat(fullname, &statbuf) != 0)
			ereport(ERROR,
					(errcode_for_file_access(),
					 errmsg("could not stat \"%s\": %m", fullname)));

		if (S_ISDIR(statbuf.st_mode))
		{
			/* Step into the subdirectory */
			if (dir->depth >= MAX_TZDIR_DEPTH - 1)
				ereport(ERROR,
					 (errmsg_internal("timezone directory stack overflow")));
			dir->depth++;
			dir->dirname[dir->depth] = pstrdup(fullname);
			dir->dirdesc[dir->depth] = AllocateDir(fullname);
			if (!dir->dirdesc[dir->depth])
				ereport(ERROR,
						(errcode_for_file_access(),
						 errmsg("could not open directory \"%s\": %m",
								fullname)));

			/* Start over reading in the new directory */
			continue;
		}

		/*
		 * Load this timezone using tzload() not pg_tzset(), so we don't fill
		 * the cache
		 */
		if (tzload(fullname + dir->baselen, dir->tz.TZname, &dir->tz.state,
				   TRUE) != 0)
		{
			/* Zone could not be loaded, ignore it */
			continue;
		}

		if (!pg_tz_acceptable(&dir->tz))
		{
			/* Ignore leap-second zones */
			continue;
		}

		/* Timezone loaded OK. */
		return &dir->tz;
	}

	/* Nothing more found */
	return NULL;
}
Ejemplo n.º 2
0
/*
 * While checking the mirroring configuration, call this procedure to set
 * that we are going to attempt connect and mirror to the standby.
 */
void
FtsSetQDMirroring(Segment *seginfo)
{
	int len;
	bool connectedToWalSendServer;
	bool walSendServerOk;

	LWLockAcquire(ftsQDMirrorLock, LW_EXCLUSIVE);
	if (ftsQDMirrorInfo->configurationChecked)
	{
		// Someone else finished the configuration check work first.
		LWLockRelease(ftsQDMirrorLock);
		return;
	}

	Assert(ftsQDMirrorInfo->state == QDMIRROR_STATE_NONE);

	len = strlen(seginfo->hostname);
	if (len >= sizeof(ftsQDMirrorInfo->name))
	{
		LWLockRelease(ftsQDMirrorLock);
		elog(ERROR, "hostname too long for fts master mirror configuration storage");
	}
	memcpy(ftsQDMirrorInfo->name, seginfo->hostname, len);
	ftsQDMirrorInfo->name[len] = 0;
	ftsQDMirrorInfo->port = seginfo->port;

	//ftsQDMirrorInfo->dbid = seginfo->dbid;

	//ftsQDMirrorInfo->valid = dbinfo->valid;

	/*
	 * Connect to our WAL Send server, but not under lock.
	 *
	 */
	ftsQDMirrorInfo->configurationChecked = true;
	if (Master_mirroring_administrator_disable)
	{
		LWLockRelease(ftsQDMirrorLock);
		disableQDMirroring_AdministratorDisabled();
		return;
	}

	ftsQDMirrorInfo->state = QDMIRROR_STATE_CONNECTINGWALSENDSERVER;
	ftsQDMirrorInfo->updateMask |= QDMIRROR_UPDATEMASK_MASTERMIRRORING;
	//elog(NOTICE, "Master mirroring synchronizing");

	/* Get the log message time from the NOTICE we just issued. */

	gettimeofday(&ftsQDMirrorInfo->lastLogTimeVal, NULL);

	LWLockRelease(ftsQDMirrorLock);

	connectedToWalSendServer = WalSendServerClientConnect(/* complain */ true);
	if (!connectedToWalSendServer)
	{
		disableQDMirroring_WalSendServerError("Cannot connect to the WAL Send server");
		return;
	}

	LWLockAcquire(ftsQDMirrorLock, LW_EXCLUSIVE);
	if (ftsQDMirrorInfo->state != QDMIRROR_STATE_CONNECTINGWALSENDSERVER)
	{
		/*
		 * We've changed state while unlocked.
		 */
		if (ftsQDMirrorInfo->state == QDMIRROR_STATE_DISABLED)
		{
			LWLockRelease(ftsQDMirrorLock);

			elog((Debug_print_qd_mirroring ? LOG : DEBUG5),
				 "Master mirroring already disabled (connecting)");
			return;
		}
		else
		{
			QDMIRRORState state = ftsQDMirrorInfo->state;

			LWLockRelease(ftsQDMirrorLock);
			ereport(ERROR, (errmsg_internal("Unexpected master mirroring state '%s'",
				 QDMirroringStateToString(state))));
		}
	}

	ftsQDMirrorInfo->state = QDMIRROR_STATE_POSITIONINGTOEND;
	LWLockRelease(ftsQDMirrorLock);

	walSendServerOk = XLogQDMirrorPositionToEnd();
	if (!walSendServerOk)
	{
		disableQDMirroring_WalSendServerError("Error occurred during the PositionToEnd request to the WAL Send server");
		return;
	}

	LWLockAcquire(ftsQDMirrorLock, LW_EXCLUSIVE);
	if (ftsQDMirrorInfo->state != QDMIRROR_STATE_POSITIONINGTOEND)
	{
		/*
		 * We've changed state while unlocked.
		 */
		if (ftsQDMirrorInfo->state == QDMIRROR_STATE_DISABLED)
		{
			LWLockRelease(ftsQDMirrorLock);

			elog((Debug_print_qd_mirroring ? LOG : DEBUG5),
				 "Master mirroring already disabled (request PositionToEnd -- could not send to WAL Send server)");
			return;
		}
		else
		{
			QDMIRRORState state = ftsQDMirrorInfo->state;

			LWLockRelease(ftsQDMirrorLock);
			elog(ERROR,"Unexpected master mirror state '%s'",
				 QDMirroringStateToString(state));
		}
	}
	ftsQDMirrorInfo->state = QDMIRROR_STATE_CATCHUPPENDING;
	LWLockRelease(ftsQDMirrorLock);
	elog((Debug_print_qd_mirroring ? LOG : DEBUG5),
		 "positioning master mirror to end complete -- next step is catch-up");
}
Ejemplo n.º 3
0
int
pgpipe(int handles[2])
{
	SOCKET		s;
	struct sockaddr_in serv_addr;
	int			len = sizeof(serv_addr);

	handles[0] = handles[1] = INVALID_SOCKET;

	if ((s = socket(AF_INET, SOCK_STREAM, 0)) == INVALID_SOCKET)
	{
		ereport(LOG, (errmsg_internal("pgpipe could not create socket: %ui", WSAGetLastError())));
		return -1;
	}

	memset((void *) &serv_addr, 0, sizeof(serv_addr));
	serv_addr.sin_family = AF_INET;
	serv_addr.sin_port = htons(0);
	serv_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
	if (bind(s, (SOCKADDR *) &serv_addr, len) == SOCKET_ERROR)
	{
		ereport(LOG, (errmsg_internal("pgpipe could not bind: %ui", WSAGetLastError())));
		closesocket(s);
		return -1;
	}
	if (listen(s, 1) == SOCKET_ERROR)
	{
		ereport(LOG, (errmsg_internal("pgpipe could not listen: %ui", WSAGetLastError())));
		closesocket(s);
		return -1;
	}
	if (getsockname(s, (SOCKADDR *) &serv_addr, &len) == SOCKET_ERROR)
	{
		ereport(LOG, (errmsg_internal("pgpipe could not getsockname: %ui", WSAGetLastError())));
		closesocket(s);
		return -1;
	}
	if ((handles[1] = socket(PF_INET, SOCK_STREAM, 0)) == INVALID_SOCKET)
	{
		ereport(LOG, (errmsg_internal("pgpipe could not create socket 2: %ui", WSAGetLastError())));
		closesocket(s);
		return -1;
	}

	if (connect(handles[1], (SOCKADDR *) &serv_addr, len) == SOCKET_ERROR)
	{
		ereport(LOG, (errmsg_internal("pgpipe could not connect socket: %ui", WSAGetLastError())));
		closesocket(s);
		return -1;
	}
	if ((handles[0] = accept(s, (SOCKADDR *) &serv_addr, &len)) == INVALID_SOCKET)
	{
		ereport(LOG, (errmsg_internal("pgpipe could not accept socket: %ui", WSAGetLastError())));
		closesocket(handles[1]);
		handles[1] = INVALID_SOCKET;
		closesocket(s);
		return -1;
	}
	closesocket(s);
	return 0;
}
Ejemplo n.º 4
0
/*
 * Attempt to execute an external shell command during recovery.
 *
 * 'command' is the shell command to be executed, 'commandName' is a
 * human-readable name describing the command emitted in the logs. If
 * 'failOnSignal' is true and the command is killed by a signal, a FATAL
 * error is thrown. Otherwise a WARNING is emitted.
 *
 * This is currently used for recovery_end_command and archive_cleanup_command.
 */
void
ExecuteRecoveryCommand(char *command, char *commandName, bool failOnSignal)
{
	char		xlogRecoveryCmd[MAXPGPATH];
	char		lastRestartPointFname[MAXPGPATH];
	char	   *dp;
	char	   *endp;
	const char *sp;
	int			rc;
	bool		signaled;
	XLogSegNo	restartSegNo;
	XLogRecPtr	restartRedoPtr;
	TimeLineID	restartTli;

	Assert(command && commandName);

	/*
	 * Calculate the archive file cutoff point for use during log shipping
	 * replication. All files earlier than this point can be deleted from the
	 * archive, though there is no requirement to do so.
	 */
	GetOldestRestartPoint(&restartRedoPtr, &restartTli);
	XLByteToSeg(restartRedoPtr, restartSegNo);
	XLogFileName(lastRestartPointFname, restartTli, restartSegNo);

	/*
	 * construct the command to be executed
	 */
	dp = xlogRecoveryCmd;
	endp = xlogRecoveryCmd + MAXPGPATH - 1;
	*endp = '\0';

	for (sp = command; *sp; sp++)
	{
		if (*sp == '%')
		{
			switch (sp[1])
			{
				case 'r':
					/* %r: filename of last restartpoint */
					sp++;
					StrNCpy(dp, lastRestartPointFname, endp - dp);
					dp += strlen(dp);
					break;
				case '%':
					/* convert %% to a single % */
					sp++;
					if (dp < endp)
						*dp++ = *sp;
					break;
				default:
					/* otherwise treat the % as not special */
					if (dp < endp)
						*dp++ = *sp;
					break;
			}
		}
		else
		{
			if (dp < endp)
				*dp++ = *sp;
		}
	}
	*dp = '\0';

	ereport(DEBUG3,
			(errmsg_internal("executing %s \"%s\"", commandName, command)));

	/*
	 * execute the constructed command
	 */
	rc = system(xlogRecoveryCmd);
	if (rc != 0)
	{
		/*
		 * If the failure was due to any sort of signal, it's best to punt and
		 * abort recovery. See also detailed comments on signals in
		 * RestoreArchivedFile().
		 */
		signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;

		ereport((signaled && failOnSignal) ? FATAL : WARNING,
		/*------
		   translator: First %s represents a recovery.conf parameter name like
		  "recovery_end_command", and the 2nd is the value of that parameter. */
				(errmsg("%s \"%s\": return code %d", commandName,
						command, rc)));
	}
}
Ejemplo n.º 5
0
/*
 * Attempt to retrieve the specified file from off-line archival storage.
 * If successful, fill "path" with its complete path (note that this will be
 * a temp file name that doesn't follow the normal naming convention), and
 * return TRUE.
 *
 * If not successful, fill "path" with the name of the normal on-line file
 * (which may or may not actually exist, but we'll try to use it), and return
 * FALSE.
 *
 * For fixed-size files, the caller may pass the expected size as an
 * additional crosscheck on successful recovery.  If the file size is not
 * known, set expectedSize = 0.
 */
bool
RestoreArchivedFile(char *path, const char *xlogfname,
					const char *recovername, off_t expectedSize)
{
	char		xlogpath[MAXPGPATH];
	char		xlogRestoreCmd[MAXPGPATH];
	char		lastRestartPointFname[MAXPGPATH];
	char	   *dp;
	char	   *endp;
	const char *sp;
	int			rc;
	bool		signaled;
	struct stat stat_buf;
	XLogSegNo	restartSegNo;
	XLogRecPtr	restartRedoPtr;
	TimeLineID	restartTli;

	/* In standby mode, restore_command might not be supplied */
	if (recoveryRestoreCommand == NULL)
		goto not_available;

	/*
	 * When doing archive recovery, we always prefer an archived log file even
	 * if a file of the same name exists in XLOGDIR.  The reason is that the
	 * file in XLOGDIR could be an old, un-filled or partly-filled version
	 * that was copied and restored as part of backing up $PGDATA.
	 *
	 * We could try to optimize this slightly by checking the local copy
	 * lastchange timestamp against the archived copy, but we have no API to
	 * do this, nor can we guarantee that the lastchange timestamp was
	 * preserved correctly when we copied to archive. Our aim is robustness,
	 * so we elect not to do this.
	 *
	 * If we cannot obtain the log file from the archive, however, we will try
	 * to use the XLOGDIR file if it exists.  This is so that we can make use
	 * of log segments that weren't yet transferred to the archive.
	 *
	 * Notice that we don't actually overwrite any files when we copy back
	 * from archive because the restore_command may inadvertently
	 * restore inappropriate xlogs, or they may be corrupt, so we may wish to
	 * fallback to the segments remaining in current XLOGDIR later. The
	 * copy-from-archive filename is always the same, ensuring that we don't
	 * run out of disk space on long recoveries.
	 */
	snprintf(xlogpath, MAXPGPATH, XLOGDIR "/%s", recovername);

	/*
	 * Make sure there is no existing file named recovername.
	 */
	if (stat(xlogpath, &stat_buf) != 0)
	{
		if (errno != ENOENT)
			ereport(FATAL,
					(errcode_for_file_access(),
					 errmsg("could not stat file \"%s\": %m",
							xlogpath)));
	}
	else
	{
		if (unlink(xlogpath) != 0)
			ereport(FATAL,
					(errcode_for_file_access(),
					 errmsg("could not remove file \"%s\": %m",
							xlogpath)));
	}

	/*
	 * Calculate the archive file cutoff point for use during log shipping
	 * replication. All files earlier than this point can be deleted from the
	 * archive, though there is no requirement to do so.
	 *
	 * We initialise this with the filename of an InvalidXLogRecPtr, which
	 * will prevent the deletion of any WAL files from the archive because of
	 * the alphabetic sorting property of WAL filenames.
	 *
	 * Once we have successfully located the redo pointer of the checkpoint
	 * from which we start recovery we never request a file prior to the redo
	 * pointer of the last restartpoint. When redo begins we know that we have
	 * successfully located it, so there is no need for additional status
	 * flags to signify the point when we can begin deleting WAL files from
	 * the archive.
	 */
	GetOldestRestartPoint(&restartRedoPtr, &restartTli);
	if (!XLogRecPtrIsInvalid(restartRedoPtr))
	{
		XLByteToSeg(restartRedoPtr, restartSegNo);
		XLogFileName(lastRestartPointFname, restartTli, restartSegNo);
		/* we shouldn't need anything earlier than last restart point */
		Assert(strcmp(lastRestartPointFname, xlogfname) <= 0);
	}
	else
		XLogFileName(lastRestartPointFname, 0, 0L);

	/*
	 * construct the command to be executed
	 */
	dp = xlogRestoreCmd;
	endp = xlogRestoreCmd + MAXPGPATH - 1;
	*endp = '\0';

	for (sp = recoveryRestoreCommand; *sp; sp++)
	{
		if (*sp == '%')
		{
			switch (sp[1])
			{
				case 'p':
					/* %p: relative path of target file */
					sp++;
					StrNCpy(dp, xlogpath, endp - dp);
					make_native_path(dp);
					dp += strlen(dp);
					break;
				case 'f':
					/* %f: filename of desired file */
					sp++;
					StrNCpy(dp, xlogfname, endp - dp);
					dp += strlen(dp);
					break;
				case 'r':
					/* %r: filename of last restartpoint */
					sp++;
					StrNCpy(dp, lastRestartPointFname, endp - dp);
					dp += strlen(dp);
					break;
				case '%':
					/* convert %% to a single % */
					sp++;
					if (dp < endp)
						*dp++ = *sp;
					break;
				default:
					/* otherwise treat the % as not special */
					if (dp < endp)
						*dp++ = *sp;
					break;
			}
		}
		else
		{
			if (dp < endp)
				*dp++ = *sp;
		}
	}
	*dp = '\0';

	ereport(DEBUG3,
			(errmsg_internal("executing restore command \"%s\"",
							 xlogRestoreCmd)));

	/*
	 * Check signals before restore command and reset afterwards.
	 */
	PreRestoreCommand();

	/*
	 * Copy xlog from archival storage to XLOGDIR
	 */
	rc = system(xlogRestoreCmd);

	PostRestoreCommand();

	if (rc == 0)
	{
		/*
		 * command apparently succeeded, but let's make sure the file is
		 * really there now and has the correct size.
		 */
		if (stat(xlogpath, &stat_buf) == 0)
		{
			if (expectedSize > 0 && stat_buf.st_size != expectedSize)
			{
				int			elevel;

				/*
				 * If we find a partial file in standby mode, we assume it's
				 * because it's just being copied to the archive, and keep
				 * trying.
				 *
				 * Otherwise treat a wrong-sized file as FATAL to ensure the
				 * DBA would notice it, but is that too strong? We could try
				 * to plow ahead with a local copy of the file ... but the
				 * problem is that there probably isn't one, and we'd
				 * incorrectly conclude we've reached the end of WAL and we're
				 * done recovering ...
				 */
				if (StandbyMode && stat_buf.st_size < expectedSize)
					elevel = DEBUG1;
				else
					elevel = FATAL;
				ereport(elevel,
						(errmsg("archive file \"%s\" has wrong size: %lu instead of %lu",
								xlogfname,
								(unsigned long) stat_buf.st_size,
								(unsigned long) expectedSize)));
				return false;
			}
			else
			{
				ereport(LOG,
						(errmsg("restored log file \"%s\" from archive",
								xlogfname)));
				strcpy(path, xlogpath);
				return true;
			}
		}
		else
		{
			/* stat failed */
			if (errno != ENOENT)
				ereport(FATAL,
						(errcode_for_file_access(),
						 errmsg("could not stat file \"%s\": %m",
								xlogpath)));
		}
	}

	/*
	 * Remember, we rollforward UNTIL the restore fails so failure here is
	 * just part of the process... that makes it difficult to determine
	 * whether the restore failed because there isn't an archive to restore,
	 * or because the administrator has specified the restore program
	 * incorrectly.  We have to assume the former.
	 *
	 * However, if the failure was due to any sort of signal, it's best to
	 * punt and abort recovery.  (If we "return false" here, upper levels will
	 * assume that recovery is complete and start up the database!) It's
	 * essential to abort on child SIGINT and SIGQUIT, because per spec
	 * system() ignores SIGINT and SIGQUIT while waiting; if we see one of
	 * those it's a good bet we should have gotten it too.
	 *
	 * On SIGTERM, assume we have received a fast shutdown request, and exit
	 * cleanly. It's pure chance whether we receive the SIGTERM first, or the
	 * child process. If we receive it first, the signal handler will call
	 * proc_exit, otherwise we do it here. If we or the child process received
	 * SIGTERM for any other reason than a fast shutdown request, postmaster
	 * will perform an immediate shutdown when it sees us exiting
	 * unexpectedly.
	 *
	 * Per the Single Unix Spec, shells report exit status > 128 when a called
	 * command died on a signal.  Also, 126 and 127 are used to report
	 * problems such as an unfindable command; treat those as fatal errors
	 * too.
	 */
	if (WIFSIGNALED(rc) && WTERMSIG(rc) == SIGTERM)
		proc_exit(1);

	signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;

	ereport(signaled ? FATAL : DEBUG2,
		(errmsg("could not restore file \"%s\" from archive: return code %d",
				xlogfname, rc)));

not_available:

	/*
	 * if an archived file is not available, there might still be a version of
	 * this file in XLOGDIR, so return that as the filename to open.
	 *
	 * In many recovery scenarios we expect this to fail also, but if so that
	 * just means we've reached the end of WAL.
	 */
	snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlogfname);
	return false;
}
Ejemplo n.º 6
0
/*
 * pgarch_archiveXlog
 *
 * Invokes system(3) to copy one archive file to wherever it should go
 *
 * Returns true if successful
 */
static bool
pgarch_archiveXlog(char *xlog)
{
	char		xlogarchcmd[MAXPGPATH];
	char		pathname[MAXPGPATH];
	char		activitymsg[MAXFNAMELEN + 16];
	char	   *dp;
	char	   *endp;
	const char *sp;
	int			rc;

	snprintf(pathname, MAXPGPATH, XLOGDIR "/%s", xlog);

	/*
	 * construct the command to be executed
	 */
	dp = xlogarchcmd;
	endp = xlogarchcmd + MAXPGPATH - 1;
	*endp = '\0';

	for (sp = XLogArchiveCommand; *sp; sp++)
	{
		if (*sp == '%')
		{
			switch (sp[1])
			{
				case 'p':
					/* %p: relative path of source file */
					sp++;
					strlcpy(dp, pathname, endp - dp);
					make_native_path(dp);
					dp += strlen(dp);
					break;
				case 'f':
					/* %f: filename of source file */
					sp++;
					strlcpy(dp, xlog, endp - dp);
					dp += strlen(dp);
					break;
				case '%':
					/* convert %% to a single % */
					sp++;
					if (dp < endp)
						*dp++ = *sp;
					break;
				default:
					/* otherwise treat the % as not special */
					if (dp < endp)
						*dp++ = *sp;
					break;
			}
		}
		else
		{
			if (dp < endp)
				*dp++ = *sp;
		}
	}
	*dp = '\0';

	ereport(DEBUG3,
			(errmsg_internal("executing archive command \"%s\"",
							 xlogarchcmd)));

	/* Report archive activity in PS display */
	snprintf(activitymsg, sizeof(activitymsg), "archiving %s", xlog);
	set_ps_display(activitymsg, false);

	rc = system(xlogarchcmd);
	if (rc != 0)
	{
		/*
		 * If either the shell itself, or a called command, died on a signal,
		 * abort the archiver.  We do this because system() ignores SIGINT and
		 * SIGQUIT while waiting; so a signal is very likely something that
		 * should have interrupted us too.  If we overreact it's no big deal,
		 * the postmaster will just start the archiver again.
		 *
		 * Per the Single Unix Spec, shells report exit status > 128 when a
		 * called command died on a signal.
		 */
		int			lev = (WIFSIGNALED(rc) || WEXITSTATUS(rc) > 128) ? FATAL : LOG;

		if (WIFEXITED(rc))
		{
			ereport(lev,
					(errmsg("archive command failed with exit code %d",
							WEXITSTATUS(rc)),
					 errdetail("The failed archive command was: %s",
							   xlogarchcmd)));
		}
		else if (WIFSIGNALED(rc))
		{
#if defined(WIN32)
			ereport(lev,
				  (errmsg("archive command was terminated by exception 0x%X",
						  WTERMSIG(rc)),
				   errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
				   errdetail("The failed archive command was: %s",
							 xlogarchcmd)));
#elif defined(HAVE_DECL_SYS_SIGLIST) && HAVE_DECL_SYS_SIGLIST
			ereport(lev,
					(errmsg("archive command was terminated by signal %d: %s",
							WTERMSIG(rc),
			  WTERMSIG(rc) < NSIG ? sys_siglist[WTERMSIG(rc)] : "(unknown)"),
					 errdetail("The failed archive command was: %s",
							   xlogarchcmd)));
#else
			ereport(lev,
					(errmsg("archive command was terminated by signal %d",
							WTERMSIG(rc)),
					 errdetail("The failed archive command was: %s",
							   xlogarchcmd)));
#endif
		}
		else
		{
			ereport(lev,
				(errmsg("archive command exited with unrecognized status %d",
						rc),
				 errdetail("The failed archive command was: %s",
						   xlogarchcmd)));
		}

		snprintf(activitymsg, sizeof(activitymsg), "failed on %s", xlog);
		set_ps_display(activitymsg, false);

		return false;
	}
	ereport(DEBUG1,
			(errmsg("archived transaction log file \"%s\"", xlog)));

	snprintf(activitymsg, sizeof(activitymsg), "last was %s", xlog);
	set_ps_display(activitymsg, false);

	return true;
}
Ejemplo n.º 7
0
/*
 * Set up for tuple conversion, matching input and output columns by
 * position.  (Dropped columns are ignored in both input and output.)
 *
 * Note: the errdetail messages speak of indesc as the "returned" rowtype,
 * outdesc as the "expected" rowtype.  This is okay for current uses but
 * might need generalization in future.
 */
TupleConversionMap *
convert_tuples_by_position(TupleDesc indesc,
						   TupleDesc outdesc,
						   const char *msg)
{
	TupleConversionMap *map;
	AttrNumber *attrMap;
	int			nincols;
	int			noutcols;
	int			n;
	int			i;
	int			j;
	bool		same;

	/* Verify compatibility and prepare attribute-number map */
	n = outdesc->natts;
	attrMap = (AttrNumber *) palloc0(n * sizeof(AttrNumber));
	j = 0;						/* j is next physical input attribute */
	nincols = noutcols = 0;		/* these count non-dropped attributes */
	same = true;
	for (i = 0; i < n; i++)
	{
		Form_pg_attribute att = outdesc->attrs[i];
		Oid			atttypid;
		int32		atttypmod;

		if (att->attisdropped)
			continue;			/* attrMap[i] is already 0 */
		noutcols++;
		atttypid = att->atttypid;
		atttypmod = att->atttypmod;
		for (; j < indesc->natts; j++)
		{
			att = indesc->attrs[j];
			if (att->attisdropped)
				continue;
			nincols++;
			/* Found matching column, check type */
			if (atttypid != att->atttypid ||
				(atttypmod != att->atttypmod && atttypmod >= 0))
				ereport(ERROR,
						(errcode(ERRCODE_DATATYPE_MISMATCH),
						 errmsg_internal("%s", _(msg)),
						 errdetail("Returned type %s does not match expected type %s in column %d.",
								   format_type_with_typemod(att->atttypid,
															att->atttypmod),
								   format_type_with_typemod(atttypid,
															atttypmod),
								   noutcols)));
			attrMap[i] = (AttrNumber) (j + 1);
			j++;
			break;
		}
		if (attrMap[i] == 0)
			same = false;		/* we'll complain below */
	}

	/* Check for unused input columns */
	for (; j < indesc->natts; j++)
	{
		if (indesc->attrs[j]->attisdropped)
			continue;
		nincols++;
		same = false;			/* we'll complain below */
	}

	/* Report column count mismatch using the non-dropped-column counts */
	if (!same)
		ereport(ERROR,
				(errcode(ERRCODE_DATATYPE_MISMATCH),
				 errmsg_internal("%s", _(msg)),
				 errdetail("Number of returned columns (%d) does not match "
						   "expected column count (%d).",
						   nincols, noutcols)));

	/*
	 * Check to see if the map is one-to-one and the tuple types are the same.
	 * (We check the latter because if they're not, we want to do conversion
	 * to inject the right OID into the tuple datum.)
	 */
	if (indesc->natts == outdesc->natts &&
		indesc->tdtypeid == outdesc->tdtypeid)
	{
		for (i = 0; i < n; i++)
		{
			if (attrMap[i] == (i + 1))
				continue;

			/*
			 * If it's a dropped column and the corresponding input column is
			 * also dropped, we needn't convert.  However, attlen and attalign
			 * must agree.
			 */
			if (attrMap[i] == 0 &&
				indesc->attrs[i]->attisdropped &&
				indesc->attrs[i]->attlen == outdesc->attrs[i]->attlen &&
				indesc->attrs[i]->attalign == outdesc->attrs[i]->attalign)
				continue;

			same = false;
			break;
		}
	}
	else
		same = false;

	if (same)
	{
		/* Runtime conversion is not needed */
		pfree(attrMap);
		return NULL;
	}

	/* Prepare the map structure */
	map = (TupleConversionMap *) palloc(sizeof(TupleConversionMap));
	map->indesc = indesc;
	map->outdesc = outdesc;
	map->attrMap = attrMap;
	/* preallocate workspace for Datum arrays */
	map->outvalues = (Datum *) palloc(n * sizeof(Datum));
	map->outisnull = (bool *) palloc(n * sizeof(bool));
	n = indesc->natts + 1;		/* +1 for NULL */
	map->invalues = (Datum *) palloc(n * sizeof(Datum));
	map->inisnull = (bool *) palloc(n * sizeof(bool));
	map->invalues[0] = (Datum) 0;		/* set up the NULL entry */
	map->inisnull[0] = true;

	return map;
}
Ejemplo n.º 8
0
/*
 * Emit a PG error or notice, together with any available info about
 * the current Python error, previously set by PLy_exception_set().
 * This should be used to propagate Python errors into PG.  If fmt is
 * NULL, the Python error becomes the primary error message, otherwise
 * it becomes the detail.  If there is a Python traceback, it is put
 * in the context.
 */
void
PLy_elog(int elevel, const char *fmt,...)
{
	char	   *xmsg;
	char	   *tbmsg;
	int			tb_depth;
	StringInfoData emsg;
	PyObject   *exc,
			   *val,
			   *tb;
	const char *primary = NULL;
	int			sqlerrcode = 0;
	char	   *detail = NULL;
	char	   *hint = NULL;
	char	   *query = NULL;
	int			position = 0;

	PyErr_Fetch(&exc, &val, &tb);

	if (exc != NULL)
	{
		PyErr_NormalizeException(&exc, &val, &tb);

		if (PyErr_GivenExceptionMatches(val, PLy_exc_spi_error))
			PLy_get_spi_error_data(val, &sqlerrcode, &detail, &hint, &query, &position);
		else if (PyErr_GivenExceptionMatches(val, PLy_exc_fatal))
			elevel = FATAL;
	}

	/* this releases our refcount on tb! */
	PLy_traceback(exc, val, tb,
				  &xmsg, &tbmsg, &tb_depth);

	if (fmt)
	{
		initStringInfo(&emsg);
		for (;;)
		{
			va_list		ap;
			int			needed;

			va_start(ap, fmt);
			needed = appendStringInfoVA(&emsg, dgettext(TEXTDOMAIN, fmt), ap);
			va_end(ap);
			if (needed == 0)
				break;
			enlargeStringInfo(&emsg, needed);
		}
		primary = emsg.data;

		/* Since we have a format string, we cannot have a SPI detail. */
		Assert(detail == NULL);

		/* If there's an exception message, it goes in the detail. */
		if (xmsg)
			detail = xmsg;
	}
	else
	{
		if (xmsg)
			primary = xmsg;
	}

	PG_TRY();
	{
		ereport(elevel,
				(errcode(sqlerrcode ? sqlerrcode : ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
			  errmsg_internal("%s", primary ? primary : "no exception data"),
				 (detail) ? errdetail_internal("%s", detail) : 0,
				 (tb_depth > 0 && tbmsg) ? errcontext("%s", tbmsg) : 0,
				 (hint) ? errhint("%s", hint) : 0,
				 (query) ? internalerrquery(query) : 0,
				 (position) ? internalerrposition(position) : 0));
	}
	PG_CATCH();
	{
		if (fmt)
			pfree(emsg.data);
		if (xmsg)
			pfree(xmsg);
		if (tbmsg)
			pfree(tbmsg);
		Py_XDECREF(exc);
		Py_XDECREF(val);

		PG_RE_THROW();
	}
	PG_END_TRY();

	if (fmt)
		pfree(emsg.data);
	if (xmsg)
		pfree(xmsg);
	if (tbmsg)
		pfree(tbmsg);
	Py_XDECREF(exc);
	Py_XDECREF(val);
}
Ejemplo n.º 9
0
static const char *
identify_system_timezone(void)
{
	int			i;
	char		tzname[128];
	char		localtzname[256];
	time_t		t = time(NULL);
	struct tm  *tm = localtime(&t);
	HKEY		rootKey;
	int			idx;

	if (!tm)
	{
		ereport(WARNING,
				(errmsg_internal("could not determine current date/time: localtime failed")));
		return NULL;
	}

	memset(tzname, 0, sizeof(tzname));
	strftime(tzname, sizeof(tzname) - 1, "%Z", tm);

	for (i = 0; win32_tzmap[i].stdname != NULL; i++)
	{
		if (strcmp(tzname, win32_tzmap[i].stdname) == 0 ||
			strcmp(tzname, win32_tzmap[i].dstname) == 0)
		{
			elog(DEBUG4, "TZ \"%s\" matches Windows timezone \"%s\"",
				 win32_tzmap[i].pgtzname, tzname);
			return win32_tzmap[i].pgtzname;
		}
	}

	/*
	 * Localized Windows versions return localized names for the timezone.
	 * Scan the registry to find the English name, and then try matching
	 * against our table again.
	 */
	memset(localtzname, 0, sizeof(localtzname));
	if (RegOpenKeyEx(HKEY_LOCAL_MACHINE,
			   "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Time Zones",
					 0,
					 KEY_READ,
					 &rootKey) != ERROR_SUCCESS)
	{
		ereport(WARNING,
				(errmsg_internal("could not open registry key to identify Windows timezone: %i", (int) GetLastError())));
		return NULL;
	}

	for (idx = 0;; idx++)
	{
		char		keyname[256];
		char		zonename[256];
		DWORD		namesize;
		FILETIME	lastwrite;
		HKEY		key;
		LONG		r;

		memset(keyname, 0, sizeof(keyname));
		namesize = sizeof(keyname);
		if ((r = RegEnumKeyEx(rootKey,
							  idx,
							  keyname,
							  &namesize,
							  NULL,
							  NULL,
							  NULL,
							  &lastwrite)) != ERROR_SUCCESS)
		{
			if (r == ERROR_NO_MORE_ITEMS)
				break;
			ereport(WARNING,
					(errmsg_internal("could not enumerate registry subkeys to identify Windows timezone: %i", (int) r)));
			break;
		}

		if ((r = RegOpenKeyEx(rootKey, keyname, 0, KEY_READ, &key)) != ERROR_SUCCESS)
		{
			ereport(WARNING,
					(errmsg_internal("could not open registry subkey to identify Windows timezone: %i", (int) r)));
			break;
		}

		memset(zonename, 0, sizeof(zonename));
		namesize = sizeof(zonename);
		if ((r = RegQueryValueEx(key, "Std", NULL, NULL, zonename, &namesize)) != ERROR_SUCCESS)
		{
			ereport(WARNING,
					(errmsg_internal("could not query value for 'std' to identify Windows timezone \"%s\": %i",
									 keyname, (int) r)));
			RegCloseKey(key);
			continue; /* Proceed to look at the next timezone */
		}
		if (strcmp(tzname, zonename) == 0)
		{
			/* Matched zone */
			strcpy(localtzname, keyname);
			RegCloseKey(key);
			break;
		}
		memset(zonename, 0, sizeof(zonename));
		namesize = sizeof(zonename);
		if ((r = RegQueryValueEx(key, "Dlt", NULL, NULL, zonename, &namesize)) != ERROR_SUCCESS)
		{
			ereport(WARNING,
					(errmsg_internal("could not query value for 'dlt' to identify Windows timezone \"%s\": %i",
									 keyname, (int) r)));
			RegCloseKey(key);
			continue; /* Proceed to look at the next timezone */
		}
		if (strcmp(tzname, zonename) == 0)
		{
			/* Matched DST zone */
			strcpy(localtzname, keyname);
			RegCloseKey(key);
			break;
		}

		RegCloseKey(key);
	}

	RegCloseKey(rootKey);

	if (localtzname[0])
	{
		/* Found a localized name, so scan for that one too */
		for (i = 0; win32_tzmap[i].stdname != NULL; i++)
		{
			if (strcmp(localtzname, win32_tzmap[i].stdname) == 0 ||
				strcmp(localtzname, win32_tzmap[i].dstname) == 0)
			{
				elog(DEBUG4, "TZ \"%s\" matches localized Windows timezone \"%s\" (\"%s\")",
					 win32_tzmap[i].pgtzname, tzname, localtzname);
				return win32_tzmap[i].pgtzname;
			}
		}
	}

	ereport(WARNING,
			(errmsg("could not find a match for Windows timezone \"%s\"",
					tzname)));
	return NULL;
}
Ejemplo n.º 10
0
/*
 * Convert the last socket error code into errno
 */
static void
TranslateSocketError(void)
{
	switch (WSAGetLastError())
	{
		case WSANOTINITIALISED:
		case WSAENETDOWN:
		case WSAEINPROGRESS:
		case WSAEINVAL:
		case WSAESOCKTNOSUPPORT:
		case WSAEFAULT:
		case WSAEINVALIDPROVIDER:
		case WSAEINVALIDPROCTABLE:
		case WSAEMSGSIZE:
			errno = EINVAL;
			break;
		case WSAEAFNOSUPPORT:
			errno = EAFNOSUPPORT;
			break;
		case WSAEMFILE:
			errno = EMFILE;
			break;
		case WSAENOBUFS:
			errno = ENOBUFS;
			break;
		case WSAEPROTONOSUPPORT:
		case WSAEPROTOTYPE:
			errno = EPROTONOSUPPORT;
			break;
		case WSAECONNREFUSED:
			errno = ECONNREFUSED;
			break;
		case WSAEINTR:
			errno = EINTR;
			break;
		case WSAENOTSOCK:
			errno = EBADFD;
			break;
		case WSAEOPNOTSUPP:
			errno = EOPNOTSUPP;
			break;
		case WSAEWOULDBLOCK:
			errno = EWOULDBLOCK;
			break;
		case WSAEACCES:
			errno = EACCES;
			break;
		case WSAENOTCONN:
		case WSAENETRESET:
		case WSAECONNRESET:
		case WSAESHUTDOWN:
		case WSAECONNABORTED:
		case WSAEDISCON:
			errno = ECONNREFUSED;		/* ENOTCONN? */
			break;
		default:
			ereport(NOTICE,
					(errmsg_internal("Unknown win32 socket error code: %i", WSAGetLastError())));
			errno = EINVAL;
	}
}
Ejemplo n.º 11
0
/*
 * Wait for activity on one or more sockets.
 * While waiting, allow signals to run
 *
 * NOTE! Currently does not implement exceptfds check,
 * since it is not used in postgresql!
 */
int
pgwin32_select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, const struct timeval * timeout)
{
	WSAEVENT	events[FD_SETSIZE * 2]; /* worst case is readfds totally
										 * different from writefds, so
										 * 2*FD_SETSIZE sockets */
	SOCKET		sockets[FD_SETSIZE * 2];
	int			numevents = 0;
	int			i;
	int			r;
	DWORD		timeoutval = WSA_INFINITE;
	FD_SET		outreadfds;
	FD_SET		outwritefds;
	int			nummatches = 0;

	Assert(exceptfds == NULL);

	if (pgwin32_poll_signals())
		return -1;

	FD_ZERO(&outreadfds);
	FD_ZERO(&outwritefds);

	/*
	 * Write FDs are different in the way that it is only flagged by
	 * WSASelectEvent() if we have tried to write to them first. So try an
	 * empty write
	 */
	if (writefds)
	{
		for (i = 0; i < writefds->fd_count; i++)
		{
			char		c;
			WSABUF		buf;
			DWORD		sent;

			buf.buf = &c;
			buf.len = 0;

			r = WSASend(writefds->fd_array[i], &buf, 1, &sent, 0, NULL, NULL);
			if (r == 0)			/* Completed - means things are fine! */
				FD_SET(writefds->fd_array[i], &outwritefds);

			else
			{					/* Not completed */
				if (WSAGetLastError() != WSAEWOULDBLOCK)

					/*
					 * Not completed, and not just "would block", so an error
					 * occured
					 */
					FD_SET(writefds->fd_array[i], &outwritefds);
			}
		}
		if (outwritefds.fd_count > 0)
		{
			memcpy(writefds, &outwritefds, sizeof(fd_set));
			if (readfds)
				FD_ZERO(readfds);
			return outwritefds.fd_count;
		}
	}


	/* Now set up for an actual select */

	if (timeout != NULL)
	{
		/* timeoutval is in milliseconds */
		timeoutval = timeout->tv_sec * 1000 + timeout->tv_usec / 1000;
	}

	if (readfds != NULL)
	{
		for (i = 0; i < readfds->fd_count; i++)
		{
			events[numevents] = WSACreateEvent();
			sockets[numevents] = readfds->fd_array[i];
			numevents++;
		}
	}
	if (writefds != NULL)
	{
		for (i = 0; i < writefds->fd_count; i++)
		{
			if (!readfds ||
				!FD_ISSET(writefds->fd_array[i], readfds))
			{
				/* If the socket is not in the read list */
				events[numevents] = WSACreateEvent();
				sockets[numevents] = writefds->fd_array[i];
				numevents++;
			}
		}
	}

	for (i = 0; i < numevents; i++)
	{
		int			flags = 0;

		if (readfds && FD_ISSET(sockets[i], readfds))
			flags |= FD_READ | FD_ACCEPT | FD_CLOSE;

		if (writefds && FD_ISSET(sockets[i], writefds))
			flags |= FD_WRITE | FD_CLOSE;

		if (WSAEventSelect(sockets[i], events[i], flags) == SOCKET_ERROR)
		{
			TranslateSocketError();
			for (i = 0; i < numevents; i++)
				WSACloseEvent(events[i]);
			return -1;
		}
	}

	events[numevents] = pgwin32_signal_event;
	r = WaitForMultipleObjectsEx(numevents + 1, events, FALSE, timeoutval, TRUE);
	if (r != WAIT_TIMEOUT && r != WAIT_IO_COMPLETION && r != (WAIT_OBJECT_0 + numevents))
	{
		/*
		 * We scan all events, even those not signalled, in case more than one
		 * event has been tagged but Wait.. can only return one.
		 */
		WSANETWORKEVENTS resEvents;

		for (i = 0; i < numevents; i++)
		{
			ZeroMemory(&resEvents, sizeof(resEvents));
			if (WSAEnumNetworkEvents(sockets[i], events[i], &resEvents) == SOCKET_ERROR)
				ereport(FATAL,
						(errmsg_internal("failed to enumerate network events: %i", (int) GetLastError())));
			/* Read activity? */
			if (readfds && FD_ISSET(sockets[i], readfds))
			{
				if ((resEvents.lNetworkEvents & FD_READ) ||
					(resEvents.lNetworkEvents & FD_ACCEPT) ||
					(resEvents.lNetworkEvents & FD_CLOSE))
				{
					FD_SET(sockets[i], &outreadfds);

					nummatches++;
				}
			}
			/* Write activity? */
			if (writefds && FD_ISSET(sockets[i], writefds))
			{
				if ((resEvents.lNetworkEvents & FD_WRITE) ||
					(resEvents.lNetworkEvents & FD_CLOSE))
				{
					FD_SET(sockets[i], &outwritefds);

					nummatches++;
				}
			}
		}
	}

	/* Clean up all handles */
	for (i = 0; i < numevents; i++)
	{
		WSAEventSelect(sockets[i], events[i], 0);
		WSACloseEvent(events[i]);
	}

	if (r == WSA_WAIT_TIMEOUT)
	{
		if (readfds)
			FD_ZERO(readfds);
		if (writefds)
			FD_ZERO(writefds);
		return 0;
	}

	if (r == WAIT_OBJECT_0 + numevents)
	{
		pgwin32_dispatch_queued_signals();
		errno = EINTR;
		if (readfds)
			FD_ZERO(readfds);
		if (writefds)
			FD_ZERO(writefds);
		return -1;
	}

	/* Overwrite socket sets with our resulting values */
	if (readfds)
		memcpy(readfds, &outreadfds, sizeof(fd_set));
	if (writefds)
		memcpy(writefds, &outwritefds, sizeof(fd_set));
	return nummatches;
}
Ejemplo n.º 12
0
int
pgwin32_recv(SOCKET s, char *buf, int len, int f)
{
	WSABUF		wbuf;
	int			r;
	DWORD		b;
	DWORD		flags = f;
	int		n;

	if (pgwin32_poll_signals())
		return -1;

	wbuf.len = len;
	wbuf.buf = buf;

	r = WSARecv(s, &wbuf, 1, &b, &flags, NULL, NULL);
	if (r != SOCKET_ERROR && b > 0)
		/* Read succeeded right away */
		return b;

	if (r == SOCKET_ERROR &&
		WSAGetLastError() != WSAEWOULDBLOCK)
	{
		TranslateSocketError();
		return -1;
	}

	/* No error, zero bytes (win2000+) or error+WSAEWOULDBLOCK (<=nt4) */

	for (n = 0; n < 5; n++)
	{
		if (pgwin32_waitforsinglesocket(s, FD_READ | FD_CLOSE | FD_ACCEPT,
										INFINITE) == 0)
			return -1; /* errno already set */
	
		r = WSARecv(s, &wbuf, 1, &b, &flags, NULL, NULL);
		if (r == SOCKET_ERROR)
		{
			if (WSAGetLastError() == WSAEWOULDBLOCK)
			{
				/*
				 * There seem to be cases on win2k (at least) where WSARecv
				 * can return WSAEWOULDBLOCK even when
				 * pgwin32_waitforsinglesocket claims the socket is readable.
				 * In this case, just sleep for a moment and try again. We try
				 * up to 5 times - if it fails more than that it's not likely
				 * to ever come back.
				 */
				pg_usleep(10000);
				continue;
			}
			TranslateSocketError();
			return -1;
		}
		return b;
	}
	ereport(NOTICE,
		(errmsg_internal("Failed to read from ready socket (after retries)")));
	errno = EWOULDBLOCK;
	return -1;
}
Ejemplo n.º 13
0
int
pgwin32_waitforsinglesocket(SOCKET s, int what, int timeout)
{
	static HANDLE waitevent = INVALID_HANDLE_VALUE;
	static SOCKET current_socket = -1;
	static int    isUDP = 0;
	HANDLE		events[2];
	int			r;

	if (waitevent == INVALID_HANDLE_VALUE)
	{
		waitevent = CreateEvent(NULL, TRUE, FALSE, NULL);

		if (waitevent == INVALID_HANDLE_VALUE)
			ereport(ERROR,
					(errmsg_internal("Failed to create socket waiting event: %i", (int) GetLastError())));
	}
	else if (!ResetEvent(waitevent))
		ereport(ERROR,
				(errmsg_internal("Failed to reset socket waiting event: %i", (int) GetLastError())));

	/*
	 * make sure we don't multiplex this kernel event object with a different
	 * socket from a previous call
	 */

	if (current_socket != s) 
	{
		if ( current_socket != -1 )
			WSAEventSelect(current_socket, waitevent, 0);
		isUDP = isDataGram(s);
	}

	current_socket = s;

	if (WSAEventSelect(s, waitevent, what) == SOCKET_ERROR)
	{
		TranslateSocketError();
		return 0;
	}

	events[0] = pgwin32_signal_event;
	events[1] = waitevent;

	/* 
	 * Just a workaround of unknown locking problem with writing in UDP socket
	 * under high load: Client's pgsql backend sleeps infinitely in
	 * WaitForMultipleObjectsEx, pgstat process sleeps in pgwin32_select().
	 * So, we will wait with small timeout(0.1 sec) and if sockect is still
	 * blocked, try WSASend (see comments in pgwin32_select) and wait again.
	 */
	if ((what & FD_WRITE) && isUDP)
	{
		for(;;)
		{
			r = WaitForMultipleObjectsEx(2, events, FALSE, 100, TRUE);

			if ( r == WAIT_TIMEOUT )
			{
				char        c;
				WSABUF      buf;
				DWORD       sent;

				buf.buf = &c;
				buf.len = 0;

				r = WSASend(s, &buf, 1, &sent, 0, NULL, NULL);
				if (r == 0)         /* Completed - means things are fine! */
					return 1;
				else if ( WSAGetLastError() != WSAEWOULDBLOCK )
				{
					TranslateSocketError();
					return 0;
				}
			}
			else
				break;
		}
	}
	else
		r = WaitForMultipleObjectsEx(2, events, FALSE, timeout, TRUE);

	if (r == WAIT_OBJECT_0 || r == WAIT_IO_COMPLETION)
	{
		pgwin32_dispatch_queued_signals();
		errno = EINTR;
		return 0;
	}
	if (r == WAIT_OBJECT_0 + 1)
		return 1;
	if (r == WAIT_TIMEOUT)
		return 0;
	ereport(ERROR,
			(errmsg_internal("Bad return from WaitForMultipleObjects: %i (%i)", r, (int) GetLastError())));
	return 0;
}
Ejemplo n.º 14
0
/*
 * ApplicableOpExpressionList walks over all filter clauses that relate to this
 * foreign table, and chooses applicable clauses that we know we can translate
 * into Mongo queries. Currently, these clauses include comparison expressions
 * that have a column and a constant as arguments. For example, "o_orderdate >=
 * date '1994-01-01' + interval '1' year" is an applicable expression.
 */
List *
ApplicableOpExpressionList(RelOptInfo *baserel)
{
	List *opExpressionList = NIL;
	List *restrictInfoList = baserel->baserestrictinfo;
	ListCell *restrictInfoCell = NULL;

	foreach(restrictInfoCell, restrictInfoList)
	{
		RestrictInfo *restrictInfo = (RestrictInfo *) lfirst(restrictInfoCell);
		Expr *expression = restrictInfo->clause;
		NodeTag expressionType = 0;

		OpExpr *opExpression = NULL;
		char *operatorName = NULL;
		char *mongoOperatorName = NULL;
		List *argumentList = NIL;
		Var *column = NULL;
		Const *constant = NULL;
		bool equalsOperator = false;
		bool constantIsArray = false;

		/* we only support operator expressions */
		expressionType = nodeTag(expression);
		if (expressionType != T_OpExpr)
		{
			continue;
		}

		opExpression = (OpExpr *) expression;
		operatorName = get_opname(opExpression->opno);

		/* we only support =, <, >, <=, >=, and <> operators */
		if (strncmp(operatorName, EQUALITY_OPERATOR_NAME, NAMEDATALEN) == 0)
		{
			equalsOperator = true;
		}

		mongoOperatorName = MongoOperatorName(operatorName);
		if (!equalsOperator && mongoOperatorName == NULL)
		{
			ereport(INFO, (errmsg_internal("Ignoring unsupported operator %s", 
							               operatorName)));
			continue;
		}

		/*
		 * We only support simple binary operators that compare a column against
		 * a constant. If the expression is a tree, we don't recurse into it.
		 */
		argumentList = opExpression->args;
		column = (Var *) FindArgumentOfType(argumentList, T_Var);
		constant = (Const *) FindArgumentOfType(argumentList, T_Const);

		/*
		 * We don't push down operators where the constant is an array, since
		 * conditional operators for arrays in MongoDB aren't properly defined.
		 * For example, {similar_products : [ "B0009S4IJW", "6301964144" ]}
		 * finds results that are equal to the array, but {similar_products:
		 * {$gte: [ "B0009S4IJW", "6301964144" ]}} returns an empty set.
		 */
		if (constant != NULL)
		{
			Oid constantArrayTypeId = get_element_type(constant->consttype);
			if (constantArrayTypeId != InvalidOid)
			{
				constantIsArray = true;
				ereport(INFO, (errmsg_internal("Ignoring %s expression with array",
											   operatorName)));
			}
		}
		else
		{
			ereport(INFO, (errmsg_internal("Ignoring %s expression without a constant",
										   operatorName)));
		}

		if (column != NULL && constant != NULL && !constantIsArray)
		{
			opExpressionList = lappend(opExpressionList, opExpression);
		}
	}
Ejemplo n.º 15
0
/*
 *	Generate an ephemeral DH key.  Because this can take a long
 *	time to compute, we can use precomputed parameters of the
 *	common key sizes.
 *
 *	Since few sites will bother to precompute these parameter
 *	files, we also provide a fallback to the parameters provided
 *	by the OpenSSL project.
 *
 *	These values can be static (once loaded or computed) since
 *	the OpenSSL library can efficiently generate random keys from
 *	the information provided.
 */
static DH  *
tmp_dh_cb(SSL *s, int is_export, int keylength)
{
    DH		   *r = NULL;
    static DH  *dh = NULL;
    static DH  *dh512 = NULL;
    static DH  *dh1024 = NULL;
    static DH  *dh2048 = NULL;
    static DH  *dh4096 = NULL;

    switch (keylength)
    {
    case 512:
        if (dh512 == NULL)
            dh512 = load_dh_file(keylength);
        if (dh512 == NULL)
            dh512 = load_dh_buffer(file_dh512, sizeof file_dh512);
        r = dh512;
        break;

    case 1024:
        if (dh1024 == NULL)
            dh1024 = load_dh_file(keylength);
        if (dh1024 == NULL)
            dh1024 = load_dh_buffer(file_dh1024, sizeof file_dh1024);
        r = dh1024;
        break;

    case 2048:
        if (dh2048 == NULL)
            dh2048 = load_dh_file(keylength);
        if (dh2048 == NULL)
            dh2048 = load_dh_buffer(file_dh2048, sizeof file_dh2048);
        r = dh2048;
        break;

    case 4096:
        if (dh4096 == NULL)
            dh4096 = load_dh_file(keylength);
        if (dh4096 == NULL)
            dh4096 = load_dh_buffer(file_dh4096, sizeof file_dh4096);
        r = dh4096;
        break;

    default:
        if (dh == NULL)
            dh = load_dh_file(keylength);
        r = dh;
    }

    /* this may take a long time, but it may be necessary... */
    if (r == NULL || 8 * DH_size(r) < keylength)
    {
        ereport(DEBUG2,
                (errmsg_internal("DH: generating parameters (%d bits)",
                                 keylength)));
        r = DH_generate_parameters(keylength, DH_GENERATOR_2, NULL, NULL);
    }

    return r;
}
Ejemplo n.º 16
0
static int
pg_SSPI_recvauth(Port *port)
{
	int			mtype;
	StringInfoData buf;
	SECURITY_STATUS r;
	CredHandle	sspicred;
	CtxtHandle *sspictx = NULL,
				newctx;
	TimeStamp	expiry;
	ULONG		contextattr;
	SecBufferDesc inbuf;
	SecBufferDesc outbuf;
	SecBuffer	OutBuffers[1];
	SecBuffer	InBuffers[1];
	HANDLE		token;
	TOKEN_USER *tokenuser;
	DWORD		retlen;
	char		accountname[MAXPGPATH];
	char		domainname[MAXPGPATH];
	DWORD		accountnamesize = sizeof(accountname);
	DWORD		domainnamesize = sizeof(domainname);
	SID_NAME_USE accountnameuse;
	HMODULE		secur32;
	QUERY_SECURITY_CONTEXT_TOKEN_FN _QuerySecurityContextToken;

	/*
	 * SSPI auth is not supported for protocol versions before 3, because it
	 * relies on the overall message length word to determine the SSPI payload
	 * size in AuthenticationGSSContinue and PasswordMessage messages.
	 * (This is, in fact, a design error in our SSPI support, because protocol
	 * messages are supposed to be parsable without relying on the length
	 * word; but it's not worth changing it now.)
	 */
	if (PG_PROTOCOL_MAJOR(FrontendProtocol) < 3)
		ereport(FATAL,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("SSPI is not supported in protocol version 2")));

	/*
	 * Acquire a handle to the server credentials.
	 */
	r = AcquireCredentialsHandle(NULL,
								 "negotiate",
								 SECPKG_CRED_INBOUND,
								 NULL,
								 NULL,
								 NULL,
								 NULL,
								 &sspicred,
								 &expiry);
	if (r != SEC_E_OK)
		pg_SSPI_error(ERROR,
			   gettext_noop("could not acquire SSPI credentials handle"), r);

	/*
	 * Loop through SSPI message exchange. This exchange can consist of
	 * multiple messags sent in both directions. First message is always from
	 * the client. All messages from client to server are password packets
	 * (type 'p').
	 */
	do
	{
		mtype = pq_getbyte();
		if (mtype != 'p')
		{
			/* Only log error if client didn't disconnect. */
			if (mtype != EOF)
				ereport(COMMERROR,
						(errcode(ERRCODE_PROTOCOL_VIOLATION),
						 errmsg("expected SSPI response, got message type %d",
								mtype)));
			return STATUS_ERROR;
		}

		/* Get the actual SSPI token */
		initStringInfo(&buf);
		if (pq_getmessage(&buf, PG_MAX_AUTH_TOKEN_LENGTH))
		{
			/* EOF - pq_getmessage already logged error */
			pfree(buf.data);
			return STATUS_ERROR;
		}

		/* Map to SSPI style buffer */
		inbuf.ulVersion = SECBUFFER_VERSION;
		inbuf.cBuffers = 1;
		inbuf.pBuffers = InBuffers;
		InBuffers[0].pvBuffer = buf.data;
		InBuffers[0].cbBuffer = buf.len;
		InBuffers[0].BufferType = SECBUFFER_TOKEN;

		/* Prepare output buffer */
		OutBuffers[0].pvBuffer = NULL;
		OutBuffers[0].BufferType = SECBUFFER_TOKEN;
		OutBuffers[0].cbBuffer = 0;
		outbuf.cBuffers = 1;
		outbuf.pBuffers = OutBuffers;
		outbuf.ulVersion = SECBUFFER_VERSION;


		elog(DEBUG4, "Processing received SSPI token of length %u",
			 (unsigned int) buf.len);

		r = AcceptSecurityContext(&sspicred,
								  sspictx,
								  &inbuf,
								  ASC_REQ_ALLOCATE_MEMORY,
								  SECURITY_NETWORK_DREP,
								  &newctx,
								  &outbuf,
								  &contextattr,
								  NULL);

		/* input buffer no longer used */
		pfree(buf.data);

		if (outbuf.cBuffers > 0 && outbuf.pBuffers[0].cbBuffer > 0)
		{
			/*
			 * Negotiation generated data to be sent to the client.
			 */
			elog(DEBUG4, "sending SSPI response token of length %u",
				 (unsigned int) outbuf.pBuffers[0].cbBuffer);

			port->gss->outbuf.length = outbuf.pBuffers[0].cbBuffer;
			port->gss->outbuf.value = outbuf.pBuffers[0].pvBuffer;

			sendAuthRequest(port, AUTH_REQ_GSS_CONT);

			FreeContextBuffer(outbuf.pBuffers[0].pvBuffer);
		}

		if (r != SEC_E_OK && r != SEC_I_CONTINUE_NEEDED)
		{
			if (sspictx != NULL)
			{
				DeleteSecurityContext(sspictx);
				free(sspictx);
			}
			FreeCredentialsHandle(&sspicred);
			pg_SSPI_error(ERROR,
				  gettext_noop("could not accept SSPI security context"), r);
		}

		if (sspictx == NULL)
		{
			sspictx = malloc(sizeof(CtxtHandle));
			if (sspictx == NULL)
				ereport(ERROR,
						(errmsg("out of memory")));

			memcpy(sspictx, &newctx, sizeof(CtxtHandle));
		}

		if (r == SEC_I_CONTINUE_NEEDED)
			elog(DEBUG4, "SSPI continue needed");

	} while (r == SEC_I_CONTINUE_NEEDED);


	/*
	 * Release service principal credentials
	 */
	FreeCredentialsHandle(&sspicred);


	/*
	 * SEC_E_OK indicates that authentication is now complete.
	 *
	 * Get the name of the user that authenticated, and compare it to the pg
	 * username that was specified for the connection.
	 *
	 * MingW is missing the export for QuerySecurityContextToken in the
	 * secur32 library, so we have to load it dynamically.
	 */

	secur32 = LoadLibrary("SECUR32.DLL");
	if (secur32 == NULL)
		ereport(ERROR,
				(errmsg_internal("could not load secur32.dll: %d",
								 (int) GetLastError())));

	_QuerySecurityContextToken = (QUERY_SECURITY_CONTEXT_TOKEN_FN)
		GetProcAddress(secur32, "QuerySecurityContextToken");
	if (_QuerySecurityContextToken == NULL)
	{
		FreeLibrary(secur32);
		ereport(ERROR,
				(errmsg_internal("could not locate QuerySecurityContextToken in secur32.dll: %d",
								 (int) GetLastError())));
	}

	r = (_QuerySecurityContextToken) (sspictx, &token);
	if (r != SEC_E_OK)
	{
		FreeLibrary(secur32);
		pg_SSPI_error(ERROR,
			   gettext_noop("could not get security token from context"), r);
	}

	FreeLibrary(secur32);

	/*
	 * No longer need the security context, everything from here on uses the
	 * token instead.
	 */
	DeleteSecurityContext(sspictx);
	free(sspictx);

	if (!GetTokenInformation(token, TokenUser, NULL, 0, &retlen) && GetLastError() != 122)
		ereport(ERROR,
			 (errmsg_internal("could not get token user size: error code %d",
							  (int) GetLastError())));

	tokenuser = malloc(retlen);
	if (tokenuser == NULL)
		ereport(ERROR,
				(errmsg("out of memory")));

	if (!GetTokenInformation(token, TokenUser, tokenuser, retlen, &retlen))
		ereport(ERROR,
				(errmsg_internal("could not get user token: error code %d",
								 (int) GetLastError())));

	if (!LookupAccountSid(NULL, tokenuser->User.Sid, accountname, &accountnamesize,
						  domainname, &domainnamesize, &accountnameuse))
		ereport(ERROR,
			  (errmsg_internal("could not lookup acconut sid: error code %d",
							   (int) GetLastError())));

	free(tokenuser);

	/*
	 * Compare realm/domain if requested. In SSPI, always compare case
	 * insensitive.
	 */
	if (pg_krb_realm && strlen(pg_krb_realm))
	{
		if (pg_strcasecmp(pg_krb_realm, domainname))
		{
			elog(DEBUG2,
				 "SSPI domain (%s) and configured domain (%s) don't match",
				 domainname, pg_krb_realm);

			return STATUS_ERROR;
		}
	}

	/*
	 * We have the username (without domain/realm) in accountname, compare to
	 * the supplied value. In SSPI, always compare case insensitive.
	 */
	if (pg_strcasecmp(port->user_name, accountname))
	{
		/* GSS name and PGUSER are not equivalent */
		elog(DEBUG2,
			 "provided username (%s) and SSPI username (%s) don't match",
			 port->user_name, accountname);

		return STATUS_ERROR;
	}

	return STATUS_OK;
}
Ejemplo n.º 17
0
/*
 * Execute an incoming replication command.
 */
static bool
HandleReplicationCommand(const char *cmd_string)
{
	bool		replication_started = false;
	int			parse_rc;
	Node	   *cmd_node;
	MemoryContext cmd_context;
	MemoryContext old_context;

	elog(DEBUG1, "received replication command: %s", cmd_string);

	cmd_context = AllocSetContextCreate(CurrentMemoryContext,
										"Replication command context",
										ALLOCSET_DEFAULT_MINSIZE,
										ALLOCSET_DEFAULT_INITSIZE,
										ALLOCSET_DEFAULT_MAXSIZE);
	old_context = MemoryContextSwitchTo(cmd_context);

	replication_scanner_init(cmd_string);
	parse_rc = replication_yyparse();
	if (parse_rc != 0)
		ereport(ERROR,
				(errcode(ERRCODE_SYNTAX_ERROR),
				 (errmsg_internal("replication command parser returned %d",
								  parse_rc))));

	cmd_node = replication_parse_result;

	switch (cmd_node->type)
	{
		case T_IdentifySystemCmd:
			IdentifySystem();
			break;

		case T_StartReplicationCmd:
			StartReplication((StartReplicationCmd *) cmd_node);

			/* break out of the loop */
			replication_started = true;
			break;

		case T_BaseBackupCmd:
			SendBaseBackup((BaseBackupCmd *) cmd_node);

			/* Send CommandComplete and ReadyForQuery messages */
			EndCommand("SELECT", DestRemote);
			ReadyForQuery(DestRemote);
			/* ReadyForQuery did pq_flush for us */
			break;

		default:
			ereport(FATAL,
					(errcode(ERRCODE_PROTOCOL_VIOLATION),
					 errmsg("invalid standby query string: %s", cmd_string)));
	}

	/* done */
	MemoryContextSwitchTo(old_context);
	MemoryContextDelete(cmd_context);

	return replication_started;
}
Ejemplo n.º 18
0
/*
 * ProcessQuery
 *		Execute a single plannable query within a PORTAL_MULTI_QUERY
 *		or PORTAL_ONE_RETURNING portal
 *
 *	parsetree: the query tree
 *	plan: the plan tree for the query
 *	params: any parameters needed
 *	dest: where to send results
 *	completionTag: points to a buffer of size COMPLETION_TAG_BUFSIZE
 *		in which to store a command completion status string.
 *
 * completionTag may be NULL if caller doesn't want a status string.
 *
 * Must be called in a memory context that will be reset or deleted on
 * error; otherwise the executor's memory usage will be leaked.
 */
static void
ProcessQuery(Query *parsetree,
             Plan *plan,
             ParamListInfo params,
             DestReceiver *dest,
             char *completionTag)
{
    int			operation = parsetree->commandType;
    QueryDesc  *queryDesc;

    ereport(DEBUG3,
            (errmsg_internal("ProcessQuery")));

    /*
     * Must always set snapshot for plannable queries.	Note we assume that
     * caller will take care of restoring ActiveSnapshot on exit/error.
     */
    ActiveSnapshot = CopySnapshot(GetTransactionSnapshot());

    /*
     * Create the QueryDesc object
     */
    queryDesc = CreateQueryDesc(parsetree, plan,
                                ActiveSnapshot, InvalidSnapshot,
                                dest, params, false);

    /*
     * Set up to collect AFTER triggers
     */
    AfterTriggerBeginQuery();

    /*
     * Call ExecutorStart to prepare the plan for execution
     */
    ExecutorStart(queryDesc, 0);

    /*
     * Run the plan to completion.
     */
    ExecutorRun(queryDesc, ForwardScanDirection, 0L);

    /*
     * Build command completion status string, if caller wants one.
     */
    if (completionTag)
    {
        Oid			lastOid;

        switch (operation)
        {
        case CMD_SELECT:
            strcpy(completionTag, "SELECT");
            break;
        case CMD_INSERT:
            if (queryDesc->estate->es_processed == 1)
                lastOid = queryDesc->estate->es_lastoid;
            else
                lastOid = InvalidOid;
            snprintf(completionTag, COMPLETION_TAG_BUFSIZE,
                     "INSERT %u %u", lastOid, queryDesc->estate->es_processed);
            break;
        case CMD_UPDATE:
            snprintf(completionTag, COMPLETION_TAG_BUFSIZE,
                     "UPDATE %u", queryDesc->estate->es_processed);
            break;
        case CMD_DELETE:
            snprintf(completionTag, COMPLETION_TAG_BUFSIZE,
                     "DELETE %u", queryDesc->estate->es_processed);
            break;
        default:
            strcpy(completionTag, "???");
            break;
        }
    }

    /* Now take care of any queued AFTER triggers */
    AfterTriggerEndQuery(queryDesc->estate);

    /*
     * Now, we close down all the scans and free allocated resources.
     */
    ExecutorEnd(queryDesc);

    FreeQueryDesc(queryDesc);

    FreeSnapshot(ActiveSnapshot);
    ActiveSnapshot = NULL;
}
Ejemplo n.º 19
0
void
inv_truncate(LargeObjectDesc *obj_desc, int64 len)
{
	int32		pageno = (int32) (len / LOBLKSIZE);
	int32		off;
	ScanKeyData skey[2];
	SysScanDesc sd;
	HeapTuple	oldtuple;
	Form_pg_largeobject olddata;
	struct
	{
		bytea		hdr;
		char		data[LOBLKSIZE];	/* make struct big enough */
		int32		align_it;	/* ensure struct is aligned well enough */
	}			workbuf;
	char	   *workb = VARDATA(&workbuf.hdr);
	HeapTuple	newtup;
	Datum		values[Natts_pg_largeobject];
	bool		nulls[Natts_pg_largeobject];
	bool		replace[Natts_pg_largeobject];
	CatalogIndexState indstate;

	Assert(PointerIsValid(obj_desc));

	/* enforce writability because snapshot is probably wrong otherwise */
	Assert(obj_desc->flags & IFS_WRLOCK);

	/*
	 * use errmsg_internal here because we don't want to expose INT64_FORMAT
	 * in translatable strings; doing better is not worth the trouble
	 */
	if (len < 0 || len > MAX_LARGE_OBJECT_SIZE)
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg_internal("invalid large object truncation target: " INT64_FORMAT,
								 len)));

	open_lo_relation();

	indstate = CatalogOpenIndexes(lo_heap_r);

	/*
	 * Set up to find all pages with desired loid and pageno >= target
	 */
	ScanKeyInit(&skey[0],
				Anum_pg_largeobject_loid,
				BTEqualStrategyNumber, F_OIDEQ,
				ObjectIdGetDatum(obj_desc->id));

	ScanKeyInit(&skey[1],
				Anum_pg_largeobject_pageno,
				BTGreaterEqualStrategyNumber, F_INT4GE,
				Int32GetDatum(pageno));

	sd = systable_beginscan_ordered(lo_heap_r, lo_index_r,
									obj_desc->snapshot, 2, skey);

	/*
	 * If possible, get the page the truncation point is in. The truncation
	 * point may be beyond the end of the LO or in a hole.
	 */
	olddata = NULL;
	if ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL)
	{
		if (HeapTupleHasNulls(oldtuple))		/* paranoia */
			elog(ERROR, "null field found in pg_largeobject");
		olddata = (Form_pg_largeobject) GETSTRUCT(oldtuple);
		Assert(olddata->pageno >= pageno);
	}

	/*
	 * If we found the page of the truncation point we need to truncate the
	 * data in it.  Otherwise if we're in a hole, we need to create a page to
	 * mark the end of data.
	 */
	if (olddata != NULL && olddata->pageno == pageno)
	{
		/* First, load old data into workbuf */
		bytea	   *datafield;
		int			pagelen;
		bool		pfreeit;

		getdatafield(olddata, &datafield, &pagelen, &pfreeit);
		memcpy(workb, VARDATA(datafield), pagelen);
		if (pfreeit)
			pfree(datafield);

		/*
		 * Fill any hole
		 */
		off = len % LOBLKSIZE;
		if (off > pagelen)
			MemSet(workb + pagelen, 0, off - pagelen);

		/* compute length of new page */
		SET_VARSIZE(&workbuf.hdr, off + VARHDRSZ);

		/*
		 * Form and insert updated tuple
		 */
		memset(values, 0, sizeof(values));
		memset(nulls, false, sizeof(nulls));
		memset(replace, false, sizeof(replace));
		values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf);
		replace[Anum_pg_largeobject_data - 1] = true;
		newtup = heap_modify_tuple(oldtuple, RelationGetDescr(lo_heap_r),
								   values, nulls, replace);
		simple_heap_update(lo_heap_r, &newtup->t_self, newtup);
		CatalogIndexInsert(indstate, newtup);
		heap_freetuple(newtup);
	}
	else
	{
		/*
		 * If the first page we found was after the truncation point, we're in
		 * a hole that we'll fill, but we need to delete the later page
		 * because the loop below won't visit it again.
		 */
		if (olddata != NULL)
		{
			Assert(olddata->pageno > pageno);
			simple_heap_delete(lo_heap_r, &oldtuple->t_self);
		}

		/*
		 * Write a brand new page.
		 *
		 * Fill the hole up to the truncation point
		 */
		off = len % LOBLKSIZE;
		if (off > 0)
			MemSet(workb, 0, off);

		/* compute length of new page */
		SET_VARSIZE(&workbuf.hdr, off + VARHDRSZ);

		/*
		 * Form and insert new tuple
		 */
		memset(values, 0, sizeof(values));
		memset(nulls, false, sizeof(nulls));
		values[Anum_pg_largeobject_loid - 1] = ObjectIdGetDatum(obj_desc->id);
		values[Anum_pg_largeobject_pageno - 1] = Int32GetDatum(pageno);
		values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf);
		newtup = heap_form_tuple(lo_heap_r->rd_att, values, nulls);
		simple_heap_insert(lo_heap_r, newtup);
		CatalogIndexInsert(indstate, newtup);
		heap_freetuple(newtup);
	}

	/*
	 * Delete any pages after the truncation point.  If the initial search
	 * didn't find a page, then of course there's nothing more to do.
	 */
	if (olddata != NULL)
	{
		while ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL)
		{
			simple_heap_delete(lo_heap_r, &oldtuple->t_self);
		}
	}

	systable_endscan_ordered(sd);

	CatalogCloseIndexes(indstate);

	/*
	 * Advance command counter so that tuple updates will be seen by later
	 * large-object operations in this transaction.
	 */
	CommandCounterIncrement();
}
Ejemplo n.º 20
0
/*
 * Set up for tuple conversion, matching input and output columns by name.
 * (Dropped columns are ignored in both input and output.)	This is intended
 * for use when the rowtypes are related by inheritance, so we expect an exact
 * match of both type and typmod.  The error messages will be a bit unhelpful
 * unless both rowtypes are named composite types.
 */
TupleConversionMap *
convert_tuples_by_name(TupleDesc indesc,
					   TupleDesc outdesc,
					   const char *msg)
{
	TupleConversionMap *map;
	AttrNumber *attrMap;
	int			n;
	int			i;
	bool		same;

	/* Verify compatibility and prepare attribute-number map */
	n = outdesc->natts;
	attrMap = (AttrNumber *) palloc0(n * sizeof(AttrNumber));
	for (i = 0; i < n; i++)
	{
		Form_pg_attribute att = outdesc->attrs[i];
		char	   *attname;
		Oid			atttypid;
		int32		atttypmod;
		int			j;

		if (att->attisdropped)
			continue;			/* attrMap[i] is already 0 */
		attname = NameStr(att->attname);
		atttypid = att->atttypid;
		atttypmod = att->atttypmod;
		for (j = 0; j < indesc->natts; j++)
		{
			att = indesc->attrs[j];
			if (att->attisdropped)
				continue;
			if (strcmp(attname, NameStr(att->attname)) == 0)
			{
				/* Found it, check type */
				if (atttypid != att->atttypid || atttypmod != att->atttypmod)
					ereport(ERROR,
							(errcode(ERRCODE_DATATYPE_MISMATCH),
							 errmsg_internal("%s", _(msg)),
							 errdetail("Attribute \"%s\" of type %s does not match corresponding attribute of type %s.",
									   attname,
									   format_type_be(outdesc->tdtypeid),
									   format_type_be(indesc->tdtypeid))));
				attrMap[i] = (AttrNumber) (j + 1);
				break;
			}
		}
		if (attrMap[i] == 0)
			ereport(ERROR,
					(errcode(ERRCODE_DATATYPE_MISMATCH),
					 errmsg_internal("%s", _(msg)),
					 errdetail("Attribute \"%s\" of type %s does not exist in type %s.",
							   attname,
							   format_type_be(outdesc->tdtypeid),
							   format_type_be(indesc->tdtypeid))));
	}

	/*
	 * Check to see if the map is one-to-one and the tuple types are the same.
	 * (We check the latter because if they're not, we want to do conversion
	 * to inject the right OID into the tuple datum.)
	 */
	if (indesc->natts == outdesc->natts &&
		indesc->tdtypeid == outdesc->tdtypeid)
	{
		same = true;
		for (i = 0; i < n; i++)
		{
			if (attrMap[i] == (i + 1))
				continue;

			/*
			 * If it's a dropped column and the corresponding input column is
			 * also dropped, we needn't convert.  However, attlen and attalign
			 * must agree.
			 */
			if (attrMap[i] == 0 &&
				indesc->attrs[i]->attisdropped &&
				indesc->attrs[i]->attlen == outdesc->attrs[i]->attlen &&
				indesc->attrs[i]->attalign == outdesc->attrs[i]->attalign)
				continue;

			same = false;
			break;
		}
	}
	else
		same = false;

	if (same)
	{
		/* Runtime conversion is not needed */
		pfree(attrMap);
		return NULL;
	}

	/* Prepare the map structure */
	map = (TupleConversionMap *) palloc(sizeof(TupleConversionMap));
	map->indesc = indesc;
	map->outdesc = outdesc;
	map->attrMap = attrMap;
	/* preallocate workspace for Datum arrays */
	map->outvalues = (Datum *) palloc(n * sizeof(Datum));
	map->outisnull = (bool *) palloc(n * sizeof(bool));
	n = indesc->natts + 1;		/* +1 for NULL */
	map->invalues = (Datum *) palloc(n * sizeof(Datum));
	map->inisnull = (bool *) palloc(n * sizeof(bool));
	map->invalues[0] = (Datum) 0;		/* set up the NULL entry */
	map->inisnull[0] = true;

	return map;
}