/*
 * get a tablespace location by oid.
 */
void
DispatchedFilespace_GetPathForTablespace(Oid tablespace, char **filespacePath, bool * found)
{
	DispatchedFilespaceDirEntry entry;

	Assert(NULL != filespacePath);
	Assert(OidIsValid(tablespace));

	*filespacePath = NULL;
	*found = FALSE;

	if (IsBuiltinTablespace(tablespace))
	{
		/*
		 * Optimize out the common cases.
		 */
		return;
	}

	Assert(NULL != DispatchedFilespaceDirHashTable);

	entry = (DispatchedFilespaceDirEntry) hash_search(
			DispatchedFilespaceDirHashTable, (void *) &tablespace, HASH_FIND,
			found);

	if (!*found)
		*filespacePath = NULL;
	else
		*filespacePath = pstrdup(entry->location);
}
PersistentTablespaceGetFilespaces
PersistentTablespace_TryGetPrimaryAndMirrorFilespaces(
													  Oid tablespaceOid,
 /* The tablespace OID for the create. */

													  char **primaryFilespaceLocation,
 /* The primary filespace directory path.  Return NULL for global and base. */

													  char **mirrorFilespaceLocation,

 /*
  * The primary filespace directory path.  Return NULL for global and base.
  * Or, returns NULL when mirror not configured.
  */

													  Oid *filespaceOid)
{
	*primaryFilespaceLocation = NULL;
	*mirrorFilespaceLocation = NULL;
	*filespaceOid = InvalidOid;

	if (IsBuiltinTablespace(tablespaceOid))
	{
		/*
		 * Optimize out the common cases.
		 */
		return PersistentTablespaceGetFilespaces_Ok;
	}

#ifdef MASTER_MIRROR_SYNC

	/*
	 * Can't rely on persistent tables or memory structures on the standby so
	 * get it from the cache maintained by the master mirror sync code
	 */
	if (IsStandbyMode())
	{
		if (!mmxlog_tablespace_get_filespace(
											 tablespaceOid,
											 filespaceOid))
		{
			if (!Debug_persistent_recovery_print)
			{
				/* Print this information when we are not doing other tracing. */
				mmxlog_print_tablespaces(
										 LOG,
										 "Standby Get Filespace for Tablespace");
			}
			return PersistentTablespaceGetFilespaces_TablespaceNotFound;
		}

		if (!mmxlog_filespace_get_path(
									   *filespaceOid,
									   primaryFilespaceLocation))
		{
			if (!Debug_persistent_recovery_print)
			{
				/* Print this information when we are not doing other tracing. */
				mmxlog_print_filespaces(
										LOG,
										"Standby Get Filespace Location");
			}
			return PersistentTablespaceGetFilespaces_FilespaceNotFound;
		}

		return PersistentTablespaceGetFilespaces_Ok;
	}
#endif

	/*
	 * MPP-10111 - There is a point during gpexpand where we need to bring the
	 * database up to fix the filespace locations for a segment.  At this
	 * point in time the old filespace locations are wrong and we should not
	 * trust anything currently stored there.  If the guc is set we prevent
	 * the lookup of a any non builtin filespaces.
	 */
	if (gp_before_filespace_setup)
		elog(ERROR, "can not lookup tablespace location: gp_before_filespace_setup=true");

	/*
	 * Important to make this call AFTER we check if we are the Standby
	 * Master.
	 */
	PersistentTablespace_VerifyInitScan();

	return PersistentFilespace_GetFilespaceFromTablespace(
														  tablespaceOid, primaryFilespaceLocation,
														  mirrorFilespaceLocation, filespaceOid);
}
Пример #3
0
/*
 * Open a relation during XLOG replay
 *
 * Note: this once had an API that allowed NULL return on failure, but it
 * no longer does; any failure results in elog().
 */
Relation
XLogOpenRelation(RelFileNode rnode)
{
	XLogRelDesc *res;
	XLogRelCacheEntry *hentry;
	bool		found;

	hentry = (XLogRelCacheEntry *)
		hash_search(_xlrelcache, (void *) &rnode, HASH_FIND, NULL);

	if (hentry)
	{
		res = hentry->rdesc;

		res->lessRecently->moreRecently = res->moreRecently;
		res->moreRecently->lessRecently = res->lessRecently;
	}
	else
	{
		/*
		 * We need to fault in the database directory on the standby.
		 */
		if (rnode.spcNode != GLOBALTABLESPACE_OID && IsStandbyMode())
		{
			char *primaryFilespaceLocation = NULL;

			char *dbPath;
			
			if (IsBuiltinTablespace(rnode.spcNode))
			{
				/*
				 * No filespace to fetch.
				 */
			}
			else
			{		
				char *mirrorFilespaceLocation = NULL;
			
				/*
				 * Investigate whether the containing directories exist to give more detail.
				 */
				PersistentTablespace_GetPrimaryAndMirrorFilespaces(
													rnode.spcNode,
													&primaryFilespaceLocation,
													&mirrorFilespaceLocation);
				if (primaryFilespaceLocation == NULL ||
					strlen(primaryFilespaceLocation) == 0)
				{
					elog(ERROR, "Empty primary filespace directory location");
				}
			
				if (mirrorFilespaceLocation != NULL)
				{
					pfree(mirrorFilespaceLocation);
					mirrorFilespaceLocation = NULL;
				}
			}
			
			dbPath = (char*)palloc(MAXPGPATH + 1);
			
			FormDatabasePath(
						dbPath,
						primaryFilespaceLocation,
						rnode.spcNode,
						rnode.dbNode);

			if (primaryFilespaceLocation != NULL)
			{
				pfree(primaryFilespaceLocation);
				primaryFilespaceLocation = NULL;
			}
			
			if (mkdir(dbPath, 0700) == 0)
			{
				if (Debug_persistent_recovery_print)
				{
					elog(PersistentRecovery_DebugPrintLevel(), 
						 "XLogOpenRelation: Re-created database directory \"%s\"",
						 dbPath);
				}
			}
			else
			{
				/*
				 * Allowed to already exist.
				 */
				if (errno != EEXIST)
				{
					elog(ERROR, "could not create database directory \"%s\": %m",
						 dbPath);
				}
				else
				{
					if (Debug_persistent_recovery_print)
					{
						elog(PersistentRecovery_DebugPrintLevel(), 
							 "XLogOpenRelation: Database directory \"%s\" already exists",
							 dbPath);
					}
				}
			}

			pfree(dbPath);
		}
		
		res = _xl_new_reldesc();

		sprintf(RelationGetRelationName(&(res->reldata)), "%u", rnode.relNode);

		res->reldata.rd_node = rnode;

		/*
		 * We set up the lockRelId in case anything tries to lock the dummy
		 * relation.  Note that this is fairly bogus since relNode may be
		 * different from the relation's OID.  It shouldn't really matter
		 * though, since we are presumably running by ourselves and can't have
		 * any lock conflicts ...
		 */
		res->reldata.rd_lockInfo.lockRelId.dbId = rnode.dbNode;
		res->reldata.rd_lockInfo.lockRelId.relId = rnode.relNode;

		hentry = (XLogRelCacheEntry *)
			hash_search(_xlrelcache, (void *) &rnode, HASH_ENTER, &found);

		if (found)
			elog(PANIC, "xlog relation already present on insert into cache");

		hentry->rdesc = res;

		res->reldata.rd_targblock = InvalidBlockNumber;
		res->reldata.rd_smgr = NULL;
		RelationOpenSmgr(&(res->reldata));

		/*
		 * Create the target file if it doesn't already exist.  This lets us
		 * cope if the replay sequence contains writes to a relation that is
		 * later deleted.  (The original coding of this routine would instead
		 * return NULL, causing the writes to be suppressed. But that seems
		 * like it risks losing valuable data if the filesystem loses an inode
		 * during a crash.	Better to write the data until we are actually
		 * told to delete the file.)
		 */
		// NOTE: We no longer re-create files automatically because
		// new FileRep persistent objects will ensure files exist.

		// UNDONE: Can't remove this block of code yet until boot time calls to this routine are analyzed...
		{
			MirrorDataLossTrackingState mirrorDataLossTrackingState;
			int64 mirrorDataLossTrackingSessionNum;
			bool mirrorDataLossOccurred;
			
			// UNDONE: What about the persistent rel files table???
			// UNDONE: This condition should not occur anymore.
			// UNDONE: segmentFileNum and AO?
			mirrorDataLossTrackingState = 
						FileRepPrimary_GetMirrorDataLossTrackingSessionNum(
														&mirrorDataLossTrackingSessionNum);
			smgrcreate(
				res->reldata.rd_smgr, 
				res->reldata.rd_isLocalBuf, 
				/* relationName */ NULL,		// Ok to be NULL -- we don't know the name here.
				mirrorDataLossTrackingState,
				mirrorDataLossTrackingSessionNum,
				/* ignoreAlreadyExists */ true,
				&mirrorDataLossOccurred);
			
		}
	}

	res->moreRecently = &(_xlrelarr[0]);
	res->lessRecently = _xlrelarr[0].lessRecently;
	_xlrelarr[0].lessRecently = res;
	res->lessRecently->moreRecently = res;

	Assert(&(res->reldata) != NULL);	// Assert what it says in the interface -- we don't return NULL anymore.

	return &(res->reldata);
}
Пример #4
0
/*
 * write_database_file: update the flat database file
 *
 * A side effect is to determine the oldest database's datfrozenxid
 * so we can set or update the XID wrap limit.
 *
 * Also, if "startup" is true, we tell relcache.c to clear out the relcache
 * init file in each database.  That's a bit nonmodular, but scanning
 * pg_database twice during system startup seems too high a price for keeping
 * things better separated.
 */
static void
write_database_file(Relation drel, bool startup)
{
	StringInfoData buffer;
	HeapScanDesc scan;
	HeapTuple	tuple;
	NameData	oldest_datname;
	TransactionId oldest_datfrozenxid = InvalidTransactionId;
	MirroredFlatFileOpen mirroredOpen;

	initStringInfo(&buffer);

	MirroredFlatFile_Open(
					&mirroredOpen,
					"global",
					"pg_database",
					O_CREAT | O_TRUNC | O_WRONLY | PG_BINARY,
					S_IRUSR | S_IWUSR,
					/* suppressError */ false,
					/* atomic operation */ true,
					/*isMirrorRecovery */ false);
	/*
	 * Read pg_database and write the file.
	 */
	scan = heap_beginscan(drel, SnapshotNow, 0, NULL);
	while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
	{
		Form_pg_database dbform = (Form_pg_database) GETSTRUCT(tuple);
		char	   *datname;
		Oid			datoid;
		Oid			dattablespace;
		TransactionId datfrozenxid;

		datname = NameStr(dbform->datname);
		datoid = HeapTupleGetOid(tuple);
		dattablespace = dbform->dattablespace;
		datfrozenxid = dbform->datfrozenxid;

		/*
		 * Identify the oldest datfrozenxid.  This must match
		 * the logic in vac_truncate_clog() in vacuum.c.
		 *
		 * MPP-20053: Skip databases that cannot be connected to in computing
		 * the oldest database.
		 */
		if (dbform->datallowconn && TransactionIdIsNormal(datfrozenxid))
		{
			if (oldest_datfrozenxid == InvalidTransactionId ||
				TransactionIdPrecedes(datfrozenxid, oldest_datfrozenxid))
			{
				oldest_datfrozenxid = datfrozenxid;
				namestrcpy(&oldest_datname, datname);
			}
		}

		/*
		 * Check for illegal characters in the database name.
		 */
		if (!name_okay(datname))
		{
			ereport(LOG,
					(errmsg("invalid database name \"%s\"", datname)));
			continue;
		}

		/*
		 * The file format is: "dbname" oid tablespace frozenxid
		 *
		 * The xids are not needed for backend startup, but are of use to
		 * autovacuum, and might also be helpful for forensic purposes.
		 */
		sputs_quote(&buffer, datname);
		appendStringInfo(&buffer, " %u %u %u\n",
						 datoid, dattablespace, datfrozenxid);

		/*
		 * MPP-10111 - During database expansion we need to be able to bring a
		 * database up in order to correct the filespace locations in the
		 * catalog.  At this point we will not be able to resolve database paths
		 * for databases not stored in "pg_default" or "pg_global".
		 *
		 * This is solved by passing a special guc to the startup during this
		 * phase of expand to bypass logic involving non-system tablespaces.
		 * Since we are bypassing the clearing of the relation cache on these
		 * databases we need to ensure that we don't try to use them at all
		 * elsewhere.  This is done with a similar check in
		 * PersistentTablespace_GetPrimaryAndMirrorFilespaces().
		 */
		if (gp_before_filespace_setup && !IsBuiltinTablespace(dattablespace))
			continue;
	}
	heap_endscan(scan);

	MirroredFlatFile_Append(&mirroredOpen, buffer.data, buffer.len,
							/* suppressError */ false);
	MirroredFlatFile_Flush(&mirroredOpen, /* suppressError */ false);
	MirroredFlatFile_Close(&mirroredOpen);

	if (buffer.maxlen > 0)
		pfree(buffer.data);

	/*
	 * Set the transaction ID wrap limit using the oldest datfrozenxid
	 */
	if (oldest_datfrozenxid != InvalidTransactionId)
		SetTransactionIdLimit(oldest_datfrozenxid, &oldest_datname);
}