Пример #1
0
/*
 * Flush dirty pages to disk during checkpoint or database shutdown
 */
void
SimpleLruFlush(SlruCtl ctl, bool checkpoint)
{
	SlruShared	shared = ctl->shared;
	SlruFlushData fdata;
	int			slotno;
	int			pageno = 0;
	int			i;
	bool		ok;

	/*
	 * Find and write dirty pages
	 */
	fdata.num_files = 0;

	LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);

	for (slotno = 0; slotno < shared->num_slots; slotno++)
	{
		SimpleLruWritePage(ctl, slotno, &fdata);

		/*
		 * When called during a checkpoint, we cannot assert that the slot is
		 * clean now, since another process might have re-dirtied it already.
		 * That's okay.
		 */
		Assert(checkpoint ||
			   shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
			   (shared->page_status[slotno] == SLRU_PAGE_VALID &&
				!shared->page_dirty[slotno]));
	}

	LWLockRelease(shared->ControlLock);

	/*
	 * Now fsync and close any files that were open
	 */
	ok = true;
	for (i = 0; i < fdata.num_files; i++)
	{
		if (ctl->do_fsync && 
			MirroredFlatFile_Flush(
							&fdata.mirroredOpens[i],
							/* suppressError */ true))
		{
			slru_errcause = SLRU_FSYNC_FAILED;
			slru_errno = errno;
			pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
			ok = false;
		}

		// UNDONE: We don't have a suppressError for close...
		MirroredFlatFile_Close(&fdata.mirroredOpens[i]);
	}
	if (!ok)
		SlruReportIOError(ctl, pageno, InvalidTransactionId);
}
Пример #2
0
/*
 * write out the PG_VERSION file in the specified directory. If mirror is true,
 * mirror the file creation to our segment mirror.
 *
 * XXX: API is terrible, make it cleaner
 */
void
set_short_version(const char *path, DbDirNode *dbDirNode, bool mirror)
{
	char	   *short_version;
	bool		gotdot = false;
	int			end;
	char	   *fullname;
	FILE	   *version_file;

	/* Construct short version string (should match initdb.c) */
	short_version = pstrdup(PG_VERSION);

	for (end = 0; short_version[end] != '\0'; end++)
	{
		if (short_version[end] == '.')
		{
			Assert(end != 0);
			if (gotdot)
				break;
			else
				gotdot = true;
		}
		else if (short_version[end] < '0' || short_version[end] > '9')
		{
			/* gone past digits and dots */
			break;
		}
	}
	Assert(end > 0 && short_version[end - 1] != '.' && gotdot);
	short_version[end++] = '\n';
	short_version[end] = '\0';

	if (mirror)
	{
		MirroredFlatFileOpen mirroredOpen;

		Insist(!PointerIsValid(path));
		Insist(PointerIsValid(dbDirNode));

		MirroredFlatFile_OpenInDbDir(&mirroredOpen, dbDirNode, "PG_VERSION",
							O_CREAT | O_WRONLY | PG_BINARY, S_IRUSR | S_IWUSR,
							/* suppressError */ false);

		MirroredFlatFile_Append(&mirroredOpen, short_version,
								end,
								/* suppressError */ false);

		MirroredFlatFile_Flush(&mirroredOpen, /* suppressError */ false);
		MirroredFlatFile_Close(&mirroredOpen);
	}
	else
	{
		Insist(!PointerIsValid(dbDirNode));
		Insist(PointerIsValid(path));

		/* Now write the file */
		fullname = palloc(strlen(path) + 11 + 1);
		sprintf(fullname, "%s/PG_VERSION", path);
		version_file = AllocateFile(fullname, PG_BINARY_W);
		if (version_file == NULL)
			ereport(ERROR,
					(errcode_for_file_access(),
					 errmsg("could not write to file \"%s\": %m",
							fullname)));
		fprintf(version_file, "%s", short_version);
		if (FreeFile(version_file))
			ereport(ERROR,
					(errcode_for_file_access(),
					 errmsg("could not write to file \"%s\": %m",
							fullname)));

		pfree(fullname);
	}
	pfree(short_version);
}
Пример #3
0
/*
 * write_database_file: update the flat database file
 *
 * A side effect is to determine the oldest database's datfrozenxid
 * so we can set or update the XID wrap limit.
 *
 * Also, if "startup" is true, we tell relcache.c to clear out the relcache
 * init file in each database.  That's a bit nonmodular, but scanning
 * pg_database twice during system startup seems too high a price for keeping
 * things better separated.
 */
static void
write_database_file(Relation drel, bool startup)
{
	StringInfoData buffer;
	HeapScanDesc scan;
	HeapTuple	tuple;
	NameData	oldest_datname;
	TransactionId oldest_datfrozenxid = InvalidTransactionId;
	MirroredFlatFileOpen mirroredOpen;

	initStringInfo(&buffer);

	MirroredFlatFile_Open(
					&mirroredOpen,
					"global",
					"pg_database",
					O_CREAT | O_TRUNC | O_WRONLY | PG_BINARY,
					S_IRUSR | S_IWUSR,
					/* suppressError */ false,
					/* atomic operation */ true,
					/*isMirrorRecovery */ false);
	/*
	 * Read pg_database and write the file.
	 */
	scan = heap_beginscan(drel, SnapshotNow, 0, NULL);
	while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
	{
		Form_pg_database dbform = (Form_pg_database) GETSTRUCT(tuple);
		char	   *datname;
		Oid			datoid;
		Oid			dattablespace;
		TransactionId datfrozenxid;

		datname = NameStr(dbform->datname);
		datoid = HeapTupleGetOid(tuple);
		dattablespace = dbform->dattablespace;
		datfrozenxid = dbform->datfrozenxid;

		/*
		 * Identify the oldest datfrozenxid.  This must match
		 * the logic in vac_truncate_clog() in vacuum.c.
		 *
		 * MPP-20053: Skip databases that cannot be connected to in computing
		 * the oldest database.
		 */
		if (dbform->datallowconn && TransactionIdIsNormal(datfrozenxid))
		{
			if (oldest_datfrozenxid == InvalidTransactionId ||
				TransactionIdPrecedes(datfrozenxid, oldest_datfrozenxid))
			{
				oldest_datfrozenxid = datfrozenxid;
				namestrcpy(&oldest_datname, datname);
			}
		}

		/*
		 * Check for illegal characters in the database name.
		 */
		if (!name_okay(datname))
		{
			ereport(LOG,
					(errmsg("invalid database name \"%s\"", datname)));
			continue;
		}

		/*
		 * The file format is: "dbname" oid tablespace frozenxid
		 *
		 * The xids are not needed for backend startup, but are of use to
		 * autovacuum, and might also be helpful for forensic purposes.
		 */
		sputs_quote(&buffer, datname);
		appendStringInfo(&buffer, " %u %u %u\n",
						 datoid, dattablespace, datfrozenxid);

		/*
		 * MPP-10111 - During database expansion we need to be able to bring a
		 * database up in order to correct the filespace locations in the
		 * catalog.  At this point we will not be able to resolve database paths
		 * for databases not stored in "pg_default" or "pg_global".
		 *
		 * This is solved by passing a special guc to the startup during this
		 * phase of expand to bypass logic involving non-system tablespaces.
		 * Since we are bypassing the clearing of the relation cache on these
		 * databases we need to ensure that we don't try to use them at all
		 * elsewhere.  This is done with a similar check in
		 * PersistentTablespace_GetPrimaryAndMirrorFilespaces().
		 */
		if (gp_before_filespace_setup && !IsBuiltinTablespace(dattablespace))
			continue;
	}
	heap_endscan(scan);

	MirroredFlatFile_Append(&mirroredOpen, buffer.data, buffer.len,
							/* suppressError */ false);
	MirroredFlatFile_Flush(&mirroredOpen, /* suppressError */ false);
	MirroredFlatFile_Close(&mirroredOpen);

	if (buffer.maxlen > 0)
		pfree(buffer.data);

	/*
	 * Set the transaction ID wrap limit using the oldest datfrozenxid
	 */
	if (oldest_datfrozenxid != InvalidTransactionId)
		SetTransactionIdLimit(oldest_datfrozenxid, &oldest_datname);
}
Пример #4
0
/*
 * Physical write of a page from a buffer slot
 *
 * On failure, we cannot just ereport(ERROR) since caller has put state in
 * shared memory that must be undone.  So, we return FALSE and save enough
 * info in static variables to let SlruReportIOError make the report.
 *
 * For now, assume it's not worth keeping a file pointer open across
 * independent read/write operations.  We do batch operations during
 * SimpleLruFlush, though.
 *
 * fdata is NULL for a standalone write, pointer to open-file info during
 * SimpleLruFlush.
 */
static bool
SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
{
	SlruShared	shared = ctl->shared;
	int			segno = pageno / SLRU_PAGES_PER_SEGMENT;
	int			rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
	int			offset = rpageno * BLCKSZ;
	char		simpleFileName[MAXPGPATH];

	MirroredFlatFileOpen	*existingMirroredOpen = NULL;
	MirroredFlatFileOpen	newMirroredOpen = MirroredFlatFileOpen_Init;
	MirroredFlatFileOpen	*useMirroredOpen = NULL;

	/*
	 * During a Flush, we may already have the desired file open.
	 */
	if (fdata)
	{
		int			i;

		for (i = 0; i < fdata->num_files; i++)
		{
			if (fdata->segno[i] == segno)
			{
				existingMirroredOpen = &fdata->mirroredOpens[i];
				break;
			}
		}
	}

	if (existingMirroredOpen == NULL ||
		!MirroredFlatFile_IsActive(existingMirroredOpen))
	{
		/*
		 * If the file doesn't already exist, we should create it.  It is
		 * possible for this to need to happen when writing a page that's not
		 * first in its segment; we assume the OS can cope with that. (Note:
		 * it might seem that it'd be okay to create files only when
		 * SimpleLruZeroPage is called for the first page of a segment.
		 * However, if after a crash and restart the REDO logic elects to
		 * replay the log from a checkpoint before the latest one, then it's
		 * possible that we will get commands to set transaction status of
		 * transactions that have already been truncated from the commit log.
		 * Easiest way to deal with that is to accept references to
		 * nonexistent files here and in SlruPhysicalReadPage.)
		 *
		 * Note: it is possible for more than one backend to be executing this
		 * code simultaneously for different pages of the same file. Hence,
		 * don't use O_EXCL or O_TRUNC or anything like that.
		 */
		SlruSimpleFileName(simpleFileName, segno);
		if (MirroredFlatFile_Open(
						&newMirroredOpen,
						ctl->Dir,
						simpleFileName,
						O_RDWR | O_CREAT | PG_BINARY,
						S_IRUSR | S_IWUSR,
						/* suppressError */ true,
						/* atomic operation */ false,
						/*isMirrorRecovery */ false))
		{
			slru_errcause = SLRU_OPEN_FAILED;
			slru_errno = errno;
			return false;
		}

		if (fdata)
		{
			if (fdata->num_files < MAX_FLUSH_BUFFERS)
			{
				fdata->mirroredOpens[fdata->num_files] = newMirroredOpen;
				useMirroredOpen = &fdata->mirroredOpens[fdata->num_files];
				fdata->segno[fdata->num_files] = segno;
				fdata->num_files++;
			}
			else
			{
				/*
				 * In the unlikely event that we exceed MAX_FLUSH_BUFFERS,
				 * fall back to treating it as a standalone write.
				 */
				fdata = NULL;

				useMirroredOpen = &newMirroredOpen;
			}
		}
		else
			useMirroredOpen = &newMirroredOpen;
	
	}
	else
		useMirroredOpen = existingMirroredOpen;

	Assert(useMirroredOpen != NULL);

	if (MirroredFlatFile_SeekSet(
						useMirroredOpen,
						offset) != offset)
	{
		slru_errcause = SLRU_SEEK_FAILED;
		slru_errno = errno;
		if (!fdata)
			MirroredFlatFile_Close(useMirroredOpen);
		return false;
	}

	if (MirroredFlatFile_Write(
						useMirroredOpen,
						offset,
						shared->page_buffer[slotno], 
						BLCKSZ,
						/* suppressError */ true))
	{
		slru_errcause = SLRU_WRITE_FAILED;
		slru_errno = errno;
		if (!fdata)
			MirroredFlatFile_Close(useMirroredOpen);
		return false;
	}

	/*
	 * If not part of Flush, need to fsync now.  We assume this happens
	 * infrequently enough that it's not a performance issue.
	 */
	if (!fdata)
	{
		if (ctl->do_fsync && 
			MirroredFlatFile_Flush(
							useMirroredOpen,
							/* suppressError */ true))
		{
			slru_errcause = SLRU_FSYNC_FAILED;
			slru_errno = errno;
			MirroredFlatFile_Close(useMirroredOpen);
			return false;
		}

		// UNDONE: We don't have a suppressError for close...
		MirroredFlatFile_Close(useMirroredOpen);
	}

	return true;
}