/* * get a tablespace location by oid. */ void DispatchedFilespace_GetPathForTablespace(Oid tablespace, char **filespacePath, bool * found) { DispatchedFilespaceDirEntry entry; Assert(NULL != filespacePath); Assert(OidIsValid(tablespace)); *filespacePath = NULL; *found = FALSE; if (IsBuiltinTablespace(tablespace)) { /* * Optimize out the common cases. */ return; } Assert(NULL != DispatchedFilespaceDirHashTable); entry = (DispatchedFilespaceDirEntry) hash_search( DispatchedFilespaceDirHashTable, (void *) &tablespace, HASH_FIND, found); if (!*found) *filespacePath = NULL; else *filespacePath = pstrdup(entry->location); }
PersistentTablespaceGetFilespaces PersistentTablespace_TryGetPrimaryAndMirrorFilespaces( Oid tablespaceOid, /* The tablespace OID for the create. */ char **primaryFilespaceLocation, /* The primary filespace directory path. Return NULL for global and base. */ char **mirrorFilespaceLocation, /* * The primary filespace directory path. Return NULL for global and base. * Or, returns NULL when mirror not configured. */ Oid *filespaceOid) { *primaryFilespaceLocation = NULL; *mirrorFilespaceLocation = NULL; *filespaceOid = InvalidOid; if (IsBuiltinTablespace(tablespaceOid)) { /* * Optimize out the common cases. */ return PersistentTablespaceGetFilespaces_Ok; } #ifdef MASTER_MIRROR_SYNC /* * Can't rely on persistent tables or memory structures on the standby so * get it from the cache maintained by the master mirror sync code */ if (IsStandbyMode()) { if (!mmxlog_tablespace_get_filespace( tablespaceOid, filespaceOid)) { if (!Debug_persistent_recovery_print) { /* Print this information when we are not doing other tracing. */ mmxlog_print_tablespaces( LOG, "Standby Get Filespace for Tablespace"); } return PersistentTablespaceGetFilespaces_TablespaceNotFound; } if (!mmxlog_filespace_get_path( *filespaceOid, primaryFilespaceLocation)) { if (!Debug_persistent_recovery_print) { /* Print this information when we are not doing other tracing. */ mmxlog_print_filespaces( LOG, "Standby Get Filespace Location"); } return PersistentTablespaceGetFilespaces_FilespaceNotFound; } return PersistentTablespaceGetFilespaces_Ok; } #endif /* * MPP-10111 - There is a point during gpexpand where we need to bring the * database up to fix the filespace locations for a segment. At this * point in time the old filespace locations are wrong and we should not * trust anything currently stored there. If the guc is set we prevent * the lookup of a any non builtin filespaces. */ if (gp_before_filespace_setup) elog(ERROR, "can not lookup tablespace location: gp_before_filespace_setup=true"); /* * Important to make this call AFTER we check if we are the Standby * Master. */ PersistentTablespace_VerifyInitScan(); return PersistentFilespace_GetFilespaceFromTablespace( tablespaceOid, primaryFilespaceLocation, mirrorFilespaceLocation, filespaceOid); }
/* * Open a relation during XLOG replay * * Note: this once had an API that allowed NULL return on failure, but it * no longer does; any failure results in elog(). */ Relation XLogOpenRelation(RelFileNode rnode) { XLogRelDesc *res; XLogRelCacheEntry *hentry; bool found; hentry = (XLogRelCacheEntry *) hash_search(_xlrelcache, (void *) &rnode, HASH_FIND, NULL); if (hentry) { res = hentry->rdesc; res->lessRecently->moreRecently = res->moreRecently; res->moreRecently->lessRecently = res->lessRecently; } else { /* * We need to fault in the database directory on the standby. */ if (rnode.spcNode != GLOBALTABLESPACE_OID && IsStandbyMode()) { char *primaryFilespaceLocation = NULL; char *dbPath; if (IsBuiltinTablespace(rnode.spcNode)) { /* * No filespace to fetch. */ } else { char *mirrorFilespaceLocation = NULL; /* * Investigate whether the containing directories exist to give more detail. */ PersistentTablespace_GetPrimaryAndMirrorFilespaces( rnode.spcNode, &primaryFilespaceLocation, &mirrorFilespaceLocation); if (primaryFilespaceLocation == NULL || strlen(primaryFilespaceLocation) == 0) { elog(ERROR, "Empty primary filespace directory location"); } if (mirrorFilespaceLocation != NULL) { pfree(mirrorFilespaceLocation); mirrorFilespaceLocation = NULL; } } dbPath = (char*)palloc(MAXPGPATH + 1); FormDatabasePath( dbPath, primaryFilespaceLocation, rnode.spcNode, rnode.dbNode); if (primaryFilespaceLocation != NULL) { pfree(primaryFilespaceLocation); primaryFilespaceLocation = NULL; } if (mkdir(dbPath, 0700) == 0) { if (Debug_persistent_recovery_print) { elog(PersistentRecovery_DebugPrintLevel(), "XLogOpenRelation: Re-created database directory \"%s\"", dbPath); } } else { /* * Allowed to already exist. */ if (errno != EEXIST) { elog(ERROR, "could not create database directory \"%s\": %m", dbPath); } else { if (Debug_persistent_recovery_print) { elog(PersistentRecovery_DebugPrintLevel(), "XLogOpenRelation: Database directory \"%s\" already exists", dbPath); } } } pfree(dbPath); } res = _xl_new_reldesc(); sprintf(RelationGetRelationName(&(res->reldata)), "%u", rnode.relNode); res->reldata.rd_node = rnode; /* * We set up the lockRelId in case anything tries to lock the dummy * relation. Note that this is fairly bogus since relNode may be * different from the relation's OID. It shouldn't really matter * though, since we are presumably running by ourselves and can't have * any lock conflicts ... */ res->reldata.rd_lockInfo.lockRelId.dbId = rnode.dbNode; res->reldata.rd_lockInfo.lockRelId.relId = rnode.relNode; hentry = (XLogRelCacheEntry *) hash_search(_xlrelcache, (void *) &rnode, HASH_ENTER, &found); if (found) elog(PANIC, "xlog relation already present on insert into cache"); hentry->rdesc = res; res->reldata.rd_targblock = InvalidBlockNumber; res->reldata.rd_smgr = NULL; RelationOpenSmgr(&(res->reldata)); /* * Create the target file if it doesn't already exist. This lets us * cope if the replay sequence contains writes to a relation that is * later deleted. (The original coding of this routine would instead * return NULL, causing the writes to be suppressed. But that seems * like it risks losing valuable data if the filesystem loses an inode * during a crash. Better to write the data until we are actually * told to delete the file.) */ // NOTE: We no longer re-create files automatically because // new FileRep persistent objects will ensure files exist. // UNDONE: Can't remove this block of code yet until boot time calls to this routine are analyzed... { MirrorDataLossTrackingState mirrorDataLossTrackingState; int64 mirrorDataLossTrackingSessionNum; bool mirrorDataLossOccurred; // UNDONE: What about the persistent rel files table??? // UNDONE: This condition should not occur anymore. // UNDONE: segmentFileNum and AO? mirrorDataLossTrackingState = FileRepPrimary_GetMirrorDataLossTrackingSessionNum( &mirrorDataLossTrackingSessionNum); smgrcreate( res->reldata.rd_smgr, res->reldata.rd_isLocalBuf, /* relationName */ NULL, // Ok to be NULL -- we don't know the name here. mirrorDataLossTrackingState, mirrorDataLossTrackingSessionNum, /* ignoreAlreadyExists */ true, &mirrorDataLossOccurred); } } res->moreRecently = &(_xlrelarr[0]); res->lessRecently = _xlrelarr[0].lessRecently; _xlrelarr[0].lessRecently = res; res->lessRecently->moreRecently = res; Assert(&(res->reldata) != NULL); // Assert what it says in the interface -- we don't return NULL anymore. return &(res->reldata); }
/* * write_database_file: update the flat database file * * A side effect is to determine the oldest database's datfrozenxid * so we can set or update the XID wrap limit. * * Also, if "startup" is true, we tell relcache.c to clear out the relcache * init file in each database. That's a bit nonmodular, but scanning * pg_database twice during system startup seems too high a price for keeping * things better separated. */ static void write_database_file(Relation drel, bool startup) { StringInfoData buffer; HeapScanDesc scan; HeapTuple tuple; NameData oldest_datname; TransactionId oldest_datfrozenxid = InvalidTransactionId; MirroredFlatFileOpen mirroredOpen; initStringInfo(&buffer); MirroredFlatFile_Open( &mirroredOpen, "global", "pg_database", O_CREAT | O_TRUNC | O_WRONLY | PG_BINARY, S_IRUSR | S_IWUSR, /* suppressError */ false, /* atomic operation */ true, /*isMirrorRecovery */ false); /* * Read pg_database and write the file. */ scan = heap_beginscan(drel, SnapshotNow, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { Form_pg_database dbform = (Form_pg_database) GETSTRUCT(tuple); char *datname; Oid datoid; Oid dattablespace; TransactionId datfrozenxid; datname = NameStr(dbform->datname); datoid = HeapTupleGetOid(tuple); dattablespace = dbform->dattablespace; datfrozenxid = dbform->datfrozenxid; /* * Identify the oldest datfrozenxid. This must match * the logic in vac_truncate_clog() in vacuum.c. * * MPP-20053: Skip databases that cannot be connected to in computing * the oldest database. */ if (dbform->datallowconn && TransactionIdIsNormal(datfrozenxid)) { if (oldest_datfrozenxid == InvalidTransactionId || TransactionIdPrecedes(datfrozenxid, oldest_datfrozenxid)) { oldest_datfrozenxid = datfrozenxid; namestrcpy(&oldest_datname, datname); } } /* * Check for illegal characters in the database name. */ if (!name_okay(datname)) { ereport(LOG, (errmsg("invalid database name \"%s\"", datname))); continue; } /* * The file format is: "dbname" oid tablespace frozenxid * * The xids are not needed for backend startup, but are of use to * autovacuum, and might also be helpful for forensic purposes. */ sputs_quote(&buffer, datname); appendStringInfo(&buffer, " %u %u %u\n", datoid, dattablespace, datfrozenxid); /* * MPP-10111 - During database expansion we need to be able to bring a * database up in order to correct the filespace locations in the * catalog. At this point we will not be able to resolve database paths * for databases not stored in "pg_default" or "pg_global". * * This is solved by passing a special guc to the startup during this * phase of expand to bypass logic involving non-system tablespaces. * Since we are bypassing the clearing of the relation cache on these * databases we need to ensure that we don't try to use them at all * elsewhere. This is done with a similar check in * PersistentTablespace_GetPrimaryAndMirrorFilespaces(). */ if (gp_before_filespace_setup && !IsBuiltinTablespace(dattablespace)) continue; } heap_endscan(scan); MirroredFlatFile_Append(&mirroredOpen, buffer.data, buffer.len, /* suppressError */ false); MirroredFlatFile_Flush(&mirroredOpen, /* suppressError */ false); MirroredFlatFile_Close(&mirroredOpen); if (buffer.maxlen > 0) pfree(buffer.data); /* * Set the transaction ID wrap limit using the oldest datfrozenxid */ if (oldest_datfrozenxid != InvalidTransactionId) SetTransactionIdLimit(oldest_datfrozenxid, &oldest_datname); }