/* * Indicate we intend to create a filespace file as part of the current transaction. * * An XLOG IntentToCreate record is generated that will guard the subsequent file-system * create in case the transaction aborts. * * After 1 or more calls to this routine to mark intention about filespace files that are going * to be created, call ~_DoPendingCreates to do the actual file-system creates. (See its * note on XLOG flushing). */ void PersistentFilespace_MarkCreatePending( Oid filespaceOid, /* The filespace where the filespace lives. */ int16 primaryDbId, char *primaryFilespaceLocation, /* * The primary filespace directory path. NOT Blank padded. Just a NULL * terminated string. */ int16 mirrorDbId, char *mirrorFilespaceLocation, MirroredObjectExistenceState mirrorExistenceState, ItemPointer persistentTid, /* TID of the gp_persistent_rel_files tuple for the rel file */ int64 *persistentSerialNum, bool flushToXLog) /* When true, the XLOG record for this change will be flushed to disk. */ { WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE; PersistentFileSysObjName fsObjName; FilespaceDirEntry filespaceDirEntry; TransactionId topXid; Datum values[Natts_gp_persistent_filespace_node]; char mirrorFilespaceLocationBlankPadded[FilespaceLocationBlankPaddedWithNullTermLen]; char primaryFilespaceLocationBlankPadded[FilespaceLocationBlankPaddedWithNullTermLen]; if (Persistent_BeforePersistenceWork()) { if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Skipping persistent filespace %u because we are before persistence work", filespaceOid); return; /* * The initdb process will load the persistent table once we out of * bootstrap mode. */ } PersistentFilespace_VerifyInitScan(); PersistentFileSysObjName_SetFilespaceDir(&fsObjName, filespaceOid); topXid = GetTopTransactionId(); WRITE_PERSISTENT_STATE_ORDERED_LOCK; PersistentFilespace_BlankPadCopyLocation( primaryFilespaceLocationBlankPadded, primaryFilespaceLocation); PersistentFilespace_BlankPadCopyLocation( mirrorFilespaceLocationBlankPadded, mirrorFilespaceLocation); GpPersistentFilespaceNode_SetDatumValues( values, filespaceOid, primaryDbId, primaryFilespaceLocationBlankPadded, mirrorDbId, mirrorFilespaceLocationBlankPadded, PersistentFileSysState_CreatePending, /* createMirrorDataLossTrackingSessionNum */ 0, mirrorExistenceState, /* reserved */ 0, /* parentXid */ topXid, /* persistentSerialNum */ 0); /* This will be set by PersistentFileSysObj_AddTuple. */ PersistentFileSysObj_AddTuple( PersistentFsObjType_FilespaceDir, values, flushToXLog, persistentTid, persistentSerialNum); WRITE_FILESPACE_HASH_LOCK; filespaceDirEntry = PersistentFilespace_CreateDirUnderLock(filespaceOid); Assert(filespaceDirEntry != NULL); filespaceDirEntry->dbId1 = primaryDbId; memcpy(filespaceDirEntry->locationBlankPadded1, primaryFilespaceLocationBlankPadded, FilespaceLocationBlankPaddedWithNullTermLen); filespaceDirEntry->dbId2 = mirrorDbId; memcpy(filespaceDirEntry->locationBlankPadded2, mirrorFilespaceLocationBlankPadded, FilespaceLocationBlankPaddedWithNullTermLen); filespaceDirEntry->state = PersistentFileSysState_CreatePending; ItemPointerCopy(persistentTid, &filespaceDirEntry->persistentTid); filespaceDirEntry->persistentSerialNum = *persistentSerialNum; WRITE_FILESPACE_HASH_UNLOCK; /* * This XLOG must be generated under the persistent write-lock. */ #ifdef MASTER_MIRROR_SYNC mmxlog_log_create_filespace(filespaceOid); #endif SIMPLE_FAULT_INJECTOR(FaultBeforePendingDeleteFilespaceEntry); /* * MPP-18228 To make adding 'Create Pending' entry to persistent table and * adding to the PendingDelete list atomic */ PendingDelete_AddCreatePendingEntryWrapper( &fsObjName, persistentTid, *persistentSerialNum); WRITE_PERSISTENT_STATE_ORDERED_UNLOCK; if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Persistent filespace directory: Add '%s' in state 'Created', mirror existence state '%s', serial number " INT64_FORMAT " at TID %s", PersistentFileSysObjName_ObjectName(&fsObjName), MirroredObjectExistenceState_Name(mirrorExistenceState), *persistentSerialNum, ItemPointerToString(persistentTid)); }
void PersistentRelation_MarkBufPoolRelationForScanIncrementalResync( RelFileNode *relFileNode, /* The tablespace, database, and relation OIDs for the created relation. */ ItemPointer persistentTid, /* TID of the gp_persistent_rel_files tuple for the relation. */ int64 persistentSerialNum) /* Serial number for the relation. Distinquishes the uses of the tuple. */ { PersistentFileSysObjName fsObjName; if(RelFileNode_IsEmpty(relFileNode)) elog(ERROR, "Invalid RelFileNode (0,0,0)"); if (GpPersistent_SkipXLogInfo(relFileNode->relNode)) { if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Skipping persistent relation '%s' because it is special", relpath(*relFileNode)); return; // Resynchronize will always handle these relations as 'Scan Incremental'.. } if (IsBootstrapProcessingMode()) { if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Skipping persistent relation '%s' because we are in bootstrap mode", relpath(*relFileNode)); return; // The initdb process will load the persistent table once we out of bootstrap mode. } if (Persistent_BeforePersistenceWork()) { if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Skipping persistent relation '%s' because we are before persistence work", relpath(*relFileNode)); return; // The initdb process will load the persistent table once we out of bootstrap mode. } PersistentRelation_VerifyInitScan(); PersistentFileSysObjName_SetRelationFile( &fsObjName, relFileNode, /* segmentFileNum */ 0); // Do this check after skipping out if in bootstrap mode. if (PersistentStore_IsZeroTid(persistentTid)) elog(ERROR, "TID for persistent '%s' tuple for mark physically truncated is invalid (0,0)", PersistentFileSysObjName_TypeAndObjectName(&fsObjName)); if (persistentSerialNum == 0) elog(ERROR, "Persistent '%s' serial number for mark physcially truncated is invalid (0)", PersistentFileSysObjName_TypeAndObjectName(&fsObjName)); PersistentFileSysObj_MarkBufPoolRelationForScanIncrementalResync( &fsObjName, persistentTid, persistentSerialNum, /* flushToXlog */ true); if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Persistent relation: '%s' marked physically truncated, serial number " INT64_FORMAT " at TID %s", PersistentFileSysObjName_ObjectName(&fsObjName), persistentSerialNum, ItemPointerToString(persistentTid)); }
static void MirroredBufferPool_SetUpMirrorAccess( RelFileNode *relFileNode, /* The tablespace, database, and relation OIDs for the open. */ int32 segmentFileNum, char *relationName, /* For tracing only. Can be NULL in some execution paths. */ MirrorDataLossTrackingState mirrorDataLossTrackingState, int64 mirrorDataLossTrackingSessionNum, bool primaryOnly, bool mirrorOnly, StorageManagerMirrorMode *mirrorMode, bool *mirrorDataLossOccurred) { *mirrorMode = StorageManagerMirrorMode_None; *mirrorDataLossOccurred = false; // Assume. if (gp_initdb_mirrored) { /* Kludge for initdb */ *mirrorMode = StorageManagerMirrorMode_Both; } else { switch (mirrorDataLossTrackingState) { case MirrorDataLossTrackingState_MirrorNotConfigured: if (mirrorOnly) elog(ERROR, "No mirror configured for mirror only"); *mirrorMode = StorageManagerMirrorMode_PrimaryOnly; break; case MirrorDataLossTrackingState_MirrorCurrentlyUpInSync: if (primaryOnly) *mirrorMode = StorageManagerMirrorMode_PrimaryOnly; else if (!mirrorOnly) *mirrorMode = StorageManagerMirrorMode_Both; else *mirrorMode = StorageManagerMirrorMode_MirrorOnly; break; case MirrorDataLossTrackingState_MirrorCurrentlyUpInResync: if (primaryOnly) *mirrorMode = StorageManagerMirrorMode_PrimaryOnly; else if (!mirrorOnly) *mirrorMode = StorageManagerMirrorMode_Both; else *mirrorMode = StorageManagerMirrorMode_MirrorOnly; break; case MirrorDataLossTrackingState_MirrorDown: if (!mirrorOnly) *mirrorMode = StorageManagerMirrorMode_PrimaryOnly; else *mirrorMode = StorageManagerMirrorMode_MirrorOnly; // Mirror only operations fails from the outset. *mirrorDataLossOccurred = true; // Mirror communication is down. break; default: elog(ERROR, "Unexpected mirror data loss tracking state: %d", mirrorDataLossTrackingState); *mirrorMode = StorageManagerMirrorMode_None; // A happy optimizer is the sound of one hand clapping. } } if (Debug_persistent_print) { SUPPRESS_ERRCONTEXT_DECLARE; SUPPRESS_ERRCONTEXT_PUSH(); elog(Persistent_DebugPrintLevel(), "MirroredBufferPool_SetUpMirrorAccess %u/%u/%u, segment file #%d, relation name '%s': primaryOnly %s, mirrorOnly %s, mirror mode %s, mirror data loss occurred %s", relFileNode->spcNode, relFileNode->dbNode, relFileNode->relNode, segmentFileNum, (relationName == NULL ? "<null>" : relationName), (primaryOnly ? "true" : "false"), (mirrorOnly ? "true" : "false"), StorageManagerMirrorMode_Name(*mirrorMode), (*mirrorDataLossOccurred ? "true" : "false")); SUPPRESS_ERRCONTEXT_POP(); } }
/* * Indicate we physically removed the filespace file. */ void PersistentFilespace_Dropped( Oid filespaceOid, /* The filespace OID for the dropped filespace. */ ItemPointer persistentTid, /* TID of the gp_persistent_rel_files tuple for the rel file */ int64 persistentSerialNum) /* Serial number for the filespace. Distinquishes the uses of the tuple. */ { WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE; PersistentFileSysObjName fsObjName; FilespaceDirEntry filespaceDirEntry; PersistentFileSysState oldState; PersistentFileSysObjStateChangeResult stateChangeResult; if (Persistent_BeforePersistenceWork()) { if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Skipping persistent filespace %u because we are before persistence work", filespaceOid); return; // The initdb process will load the persistent table once we out of bootstrap mode. } PersistentFilespace_VerifyInitScan(); PersistentFileSysObjName_SetFilespaceDir(&fsObjName,filespaceOid); WRITE_PERSISTENT_STATE_ORDERED_LOCK; filespaceDirEntry = PersistentFilespace_FindDirUnderLock( filespaceOid); if (filespaceDirEntry == NULL) elog(ERROR, "Did not find persistent filespace entry %u", filespaceOid); if (filespaceDirEntry->state != PersistentFileSysState_DropPending && filespaceDirEntry->state != PersistentFileSysState_AbortingCreate) elog(ERROR, "Persistent filespace entry %u expected to be in 'Drop Pending' or 'Aborting Create' (actual state '%s')", filespaceOid, PersistentFileSysObjState_Name(filespaceDirEntry->state)); stateChangeResult = PersistentFileSysObj_StateChange( &fsObjName, persistentTid, persistentSerialNum, PersistentFileSysState_Free, /* retryPossible */ false, /* flushToXlog */ false, &oldState, PersistentFilespace_DroppedVerifiedActionCallback); filespaceDirEntry->state = PersistentFileSysState_Free; PersistentFilespace_RemoveDirUnderLock(filespaceDirEntry); WRITE_PERSISTENT_STATE_ORDERED_UNLOCK; if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Persistent filespace directory: '%s' changed state from '%s' to (Free), serial number " INT64_FORMAT " at TID %s (State-Change result '%s')", PersistentFileSysObjName_ObjectName(&fsObjName), PersistentFileSysObjState_Name(oldState), persistentSerialNum, ItemPointerToString(persistentTid), PersistentFileSysObjStateChangeResult_Name(stateChangeResult)); }
static bool DatabaseInfo_AddGpRelationNode( DatabaseInfo *info, HTAB *dbInfoRelHashTable, Oid relfilenode, int32 segmentFileNum, ItemPointer persistentTid, int64 persistentSerialNum, ItemPointer gpRelationNodeTid) { DbInfoRel *dbInfoRel; bool found; DbInfoGpRelationNode *dbInfoGpRelationNode; dbInfoRel = (DbInfoRel*) hash_search(dbInfoRelHashTable, (void *) &relfilenode, HASH_FIND, &found); //Changes to solve MPP-16346 if(!dbInfoRel) return found; if (found) { if (dbInfoRel->gpRelationNodesCount >= dbInfoRel->gpRelationNodesMaxCount) { DatabaseInfo_Grow( (void**)&dbInfoRel->gpRelationNodes, dbInfoRel->gpRelationNodesCount, &dbInfoRel->gpRelationNodesMaxCount, sizeof(DbInfoGpRelationNode)); } dbInfoGpRelationNode = &dbInfoRel->gpRelationNodes[dbInfoRel->gpRelationNodesCount]; dbInfoRel->gpRelationNodesCount++; } else { if (info->parentlessGpRelationNodesCount >= dbInfoRel->physicalSegmentFilesMaxCount) { DatabaseInfo_Grow( (void**)&info->parentlessGpRelationNodes, info->parentlessGpRelationNodesCount, &info->parentlessGpRelationNodesMaxCount, sizeof(DbInfoGpRelationNode)); } dbInfoGpRelationNode = &info->parentlessGpRelationNodes[info->parentlessGpRelationNodesCount]; info->parentlessGpRelationNodesCount++; } dbInfoGpRelationNode->gpRelationNodeTid = *gpRelationNodeTid; dbInfoGpRelationNode->relfilenodeOid = relfilenode; dbInfoGpRelationNode->segmentFileNum = segmentFileNum; dbInfoGpRelationNode->persistentTid = *persistentTid; dbInfoGpRelationNode->persistentSerialNum = persistentSerialNum; dbInfoGpRelationNode->logicalEof = 0; // This will obtained from the other sources later (e.g. aoseg / aocsseg). if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "DatabaseInfo_AddGpRelationNode: gp_relation_node TID %s, relfilenode %u, segment file #%d, persistent serial number " INT64_FORMAT ", persistent TID %s", ItemPointerToString(gpRelationNodeTid), relfilenode, segmentFileNum, persistentSerialNum, ItemPointerToString(persistentTid)); return found; }
void MirroredBufferPool_CopyToMirror( RelFileNode *relFileNode, char *relationName, ItemPointer persistentTid, int64 persistentSerialNum, MirrorDataLossTrackingState mirrorDataLossTrackingState, int64 mirrorDataLossTrackingSessionNum, int32 numOfBlocks, bool *mirrorDataLossOccurred) { MirroredBufferPoolOpen mirroredOpen; MirrorDataLossTrackingState currentMirrorDataLossTrackingState; int64 currentMirrorDataLossTrackingSessionNum; int32 numOfSegments; int32 numOfRemainingBlocks; int segmentFileNum; int32 numOfBlocksToCopy; int primaryError; Assert(numOfBlocks > 0); numOfSegments = ((numOfBlocks - 1) / RELSEG_SIZE) + 1; Assert(numOfSegments > 0); numOfRemainingBlocks = numOfBlocks; if (Debug_persistent_print) { SUPPRESS_ERRCONTEXT_DECLARE; SUPPRESS_ERRCONTEXT_PUSH(); elog(Persistent_DebugPrintLevel(), "MirroredBufferPool_CopyToMirror %u/%u/%u: copy %d blocks (%d segments). Mirror data loss tracking (state '%s', session num " INT64_FORMAT "), persistent serial num " INT64_FORMAT ", TID %s", relFileNode->spcNode, relFileNode->dbNode, relFileNode->relNode, numOfBlocks, numOfSegments, MirrorDataLossTrackingState_Name(mirrorDataLossTrackingState), mirrorDataLossTrackingSessionNum, persistentSerialNum, ItemPointerToString(persistentTid)); SUPPRESS_ERRCONTEXT_POP(); } for (segmentFileNum = 0; segmentFileNum < numOfSegments; segmentFileNum++) { Assert(numOfRemainingBlocks > 0); LWLockAcquire(MirroredLock, LW_SHARED); currentMirrorDataLossTrackingState = FileRepPrimary_GetMirrorDataLossTrackingSessionNum( ¤tMirrorDataLossTrackingSessionNum); if (currentMirrorDataLossTrackingSessionNum != mirrorDataLossTrackingSessionNum) { *mirrorDataLossOccurred = true; LWLockRelease(MirroredLock); return; } Assert(currentMirrorDataLossTrackingState == mirrorDataLossTrackingState); MirroredBufferPool_DoOpen( &mirroredOpen, relFileNode, segmentFileNum, relationName, mirrorDataLossTrackingState, mirrorDataLossTrackingSessionNum, /* create */ false, /* mirrorOnly */ false, /* copyToMirror */ true, &primaryError, mirrorDataLossOccurred); if (primaryError != 0) { ereport(ERROR, (errcode_for_file_access(), errmsg("could not open relation file '%s', relation name '%s': %s", relpath(*relFileNode), relationName, strerror(primaryError)))); } LWLockRelease(MirroredLock); if (*mirrorDataLossOccurred) { return; } if (numOfRemainingBlocks > RELSEG_SIZE) numOfBlocksToCopy = RELSEG_SIZE; else numOfBlocksToCopy = numOfRemainingBlocks; MirroredBufferPool_DoCopyDataToMirror( &mirroredOpen, numOfBlocksToCopy, mirrorDataLossOccurred); if (*mirrorDataLossOccurred) { MirroredBufferPool_Close(&mirroredOpen); return; } if (!MirroredBufferPool_Flush(&mirroredOpen)) { ereport(ERROR, (errcode_for_file_access(), errmsg("could not flush relation file '%s', relation name '%s': %s", relpath(*relFileNode), relationName, strerror(primaryError)))); } if (mirroredOpen.mirrorDataLossOccurred) { *mirrorDataLossOccurred = true; MirroredBufferPool_Close(&mirroredOpen); return; } MirroredBufferPool_Close(&mirroredOpen); numOfRemainingBlocks -= numOfBlocksToCopy; } }
static int64 PersistentBuild_BuildDb( Oid dbOid, bool mirrored) { int64 count = 0; Relation gp_global_sequence; Relation pg_database; HeapTuple tuple; HeapScanDesc scandesc; Form_pg_database form_pg_database; DatabaseInfo *info; Oid defaultTablespace; int t; bool collectGpRelationNodeInfo, collectAppendOnlyCatalogSegmentInfo; /* * Turn this on so we don't try to fetch persistence information from * gp_releation_node for gp_relation_node and its index until we've done the * assignment with PersistentRelation_AddCreated. */ gp_before_persistence_work = true; /* * If the gp_global_sequence table hasn't been populated yet then we need * to populate it before we can procede with building the rest of the * persistent tables. */ gp_global_sequence = heap_open(GpGlobalSequenceRelationId, RowExclusiveLock); scandesc = heap_beginscan(gp_global_sequence, SnapshotAny, 0, NULL); tuple = heap_getnext(scandesc, ForwardScanDirection); if (!HeapTupleIsValid(tuple)) { TupleDesc tupDesc; Datum values[Natts_gp_global_sequence]; bool nulls[Natts_gp_global_sequence]; /* Insert N frozen tuples of value 0 */ tupDesc = RelationGetDescr(gp_global_sequence); MemSet(nulls, false, sizeof(nulls)); values[Anum_gp_global_sequence_sequence_num-1] = Int64GetDatum(0); tuple = heap_form_tuple(tupDesc, values, nulls); if (!HeapTupleIsValid(tuple)) elog(ERROR, "failed to build global sequence tuple"); for (t = 0; t < GpGlobalSequence_MaxSequenceTid; t++) frozen_heap_insert(gp_global_sequence, tuple); } heap_endscan(scandesc); heap_close(gp_global_sequence, RowExclusiveLock); /* Lookup the information for the current database */ pg_database = heap_open(DatabaseRelationId, AccessShareLock); /* Fetch a copy of the tuple to scribble on */ tuple = SearchSysCacheCopy(DATABASEOID, ObjectIdGetDatum(dbOid), 0, 0, 0); if (!HeapTupleIsValid(tuple)) elog(ERROR, "could not find tuple for database %u", dbOid); form_pg_database = (Form_pg_database) GETSTRUCT(tuple); defaultTablespace = form_pg_database->dattablespace; if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "PersistentBuild_BuildDb: dbOid %u, '%s'", dbOid, form_pg_database->datname.data); /* * Special call here to scan the persistent meta-data structures so we are open for * business and then we can add information. */ PersistentFileSysObj_BuildInitScan(); if (gp_upgrade_mode && (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_UTILITY)){ collectGpRelationNodeInfo = false; collectAppendOnlyCatalogSegmentInfo = false; }else{ collectGpRelationNodeInfo = true; collectAppendOnlyCatalogSegmentInfo = true; } info = DatabaseInfo_Collect( dbOid, defaultTablespace, collectGpRelationNodeInfo, collectAppendOnlyCatalogSegmentInfo, /* scanFileSystem */ true); for (t = 0; t < info->tablespacesCount; t++) { Oid tablespace = info->tablespaces[t]; DbDirNode dbDirNode; ItemPointerData persistentTid; if (tablespace == GLOBALTABLESPACE_OID) continue; dbDirNode.tablespace = tablespace; dbDirNode.database = dbOid; PersistentDatabase_AddCreated( &dbDirNode, &persistentTid, /* flushToXLog */ false); } PersistentBuild_PopulateGpRelationNode( info, defaultTablespace, &count); heap_close(pg_database, AccessShareLock); gp_before_persistence_work = false; /* * Since we have written XLOG records with <persistentTid, * persistentSerialNum> of zeroes because of the gp_before_persistence_work * GUC, lets do a checkpoint to force out all buffer pool pages so we never * try to redo those XLOG records in Crash Recovery. */ CreateCheckPoint(false, true); return count; }
/* * Indicate we physically removed the relation file. */ void PersistentRelation_Dropped( RelFileNode *relFileNode, /* The tablespace, database, and relation OIDs for the dropped relation. */ int32 segmentFileNum, ItemPointer persistentTid, /* TID of the gp_persistent_rel_files tuple for the relation. */ int64 persistentSerialNum) /* Serial number for the relation. Distinquishes the uses of the tuple. */ { WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE; PersistentFileSysObjName fsObjName; PersistentFileSysState oldState; PersistentFileSysObjStateChangeResult stateChangeResult; if(RelFileNode_IsEmpty(relFileNode)) elog(ERROR, "Invalid RelFileNode (0,0,0)"); if (Persistent_BeforePersistenceWork()) { if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Skipping persistent relation '%s' because we are before persistence work", relpath(*relFileNode)); return; // The initdb process will load the persistent table once we out of bootstrap mode. } PersistentRelation_VerifyInitScan(); PersistentFileSysObjName_SetRelationFile( &fsObjName, relFileNode, segmentFileNum); // Do this check after skipping out if in bootstrap mode. if (PersistentStore_IsZeroTid(persistentTid)) elog(ERROR, "TID for persistent '%s' tuple for mark DROP pending is invalid (0,0)", PersistentFileSysObjName_TypeAndObjectName(&fsObjName)); if (persistentSerialNum == 0) elog(ERROR, "Persistent '%s' serial number for mark DROP pending is invalid (0)", PersistentFileSysObjName_TypeAndObjectName(&fsObjName)); WRITE_PERSISTENT_STATE_ORDERED_LOCK; stateChangeResult = PersistentFileSysObj_StateChange( &fsObjName, persistentTid, persistentSerialNum, PersistentFileSysState_Free, /* retryPossible */ false, /* flushToXlog */ false, &oldState, PersistentRelation_DroppedVerifiedActionCallback); WRITE_PERSISTENT_STATE_ORDERED_UNLOCK; if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Persistent relation: '%s' changed state from '%s' to (Free), serial number " INT64_FORMAT " at TID %s (State-Change result '%s')", PersistentFileSysObjName_ObjectName(&fsObjName), PersistentFileSysObjState_Name(oldState), persistentSerialNum, ItemPointerToString(persistentTid), PersistentFileSysObjStateChangeResult_Name(stateChangeResult)); }
/* * Indicate we are aborting the create of a relation file. * * This state will make sure the relation gets dropped after a system crash. */ PersistentFileSysObjStateChangeResult PersistentRelfile_MarkAbortingCreate( PersistentFileSysObjName *fsObjName, /* The tablespace, database, and relation OIDs for the aborting create. */ ItemPointer persistentTid, /* TID of the gp_persistent_rel_files tuple for the relation. */ int64 persistentSerialNum, /* Serial number for the relation. Distinquishes the uses of the tuple. */ bool retryPossible) { WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE; RelFileNode *relFileNode = &fsObjName->variant.rel.relFileNode; PersistentFileSysObjStateChangeResult stateChangeResult; if(RelFileNode_IsEmpty(relFileNode)) elog(ERROR, "Invalid RelFileNode (0,0,0)"); if (Persistent_BeforePersistenceWork()) { if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Skipping persistent relation '%s' because we are before persistence work", relpath(*relFileNode)); return false; // The initdb process will load the persistent table once we out of bootstrap mode. } /* MPP-16543: When inserting tuples into AO table, row numbers will be * generated from gp_fastsequence catalog table, as part of the design, * these sequence numbers are not reusable, even if the AO insert * transaction is aborted. The entry in gp_fastsequence was inserted * using frozen_heap_insert, which means it's always visible. * Aborted AO insert transaction will cause inconsistency between * gp_fastsequence and pg_class, the solution is to introduce "frozen * delete" - inplace update tuple's MVCC header to make it invisible. */ Relation gp_fastsequence_rel = heap_open(FastSequenceRelationId, RowExclusiveLock); HeapTuple tup; SysScanDesc scan; ScanKeyData skey; ScanKeyInit(&skey, Anum_gp_fastsequence_objid, BTEqualStrategyNumber, F_OIDEQ, relFileNode->relNode); scan = systable_beginscan(gp_fastsequence_rel, InvalidOid, false, SnapshotNow, 1, &skey); while (HeapTupleIsValid(tup = systable_getnext(scan))) { Form_gp_fastsequence found = (Form_gp_fastsequence) GETSTRUCT(tup); if (found->objid == relFileNode->relNode) { if (Debug_persistent_print) { elog(LOG, "frozen deleting gp_fastsequence entry for aborted AO insert transaction on relation %s", relpath(*relFileNode)); } frozen_heap_inplace_delete(gp_fastsequence_rel, tup); } } systable_endscan(scan); heap_close(gp_fastsequence_rel, RowExclusiveLock); PersistentRelfile_VerifyInitScan(); // Do this check after skipping out if in bootstrap mode. if (PersistentStore_IsZeroTid(persistentTid)) elog(ERROR, "TID for persistent '%s' tuple for mark DROP pending is invalid (0,0)", PersistentFileSysObjName_TypeAndObjectName(fsObjName)); if (persistentSerialNum == 0) elog(ERROR, "Persistent '%s' serial number for mark DROP pending is invalid (0)", PersistentFileSysObjName_TypeAndObjectName(fsObjName)); WRITE_PERSISTENT_STATE_ORDERED_LOCK; stateChangeResult = PersistentFileSysObj_StateChange( fsObjName, persistentTid, persistentSerialNum, PersistentFileSysState_AbortingCreate, retryPossible, /* flushToXlog */ false, /* oldState */ NULL, /* verifiedActionCallback */ NULL); WRITE_PERSISTENT_STATE_ORDERED_UNLOCK; if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Persistent relation: '%s' changed state from 'Create Pending' to 'Aborting Create', serial number " INT64_FORMAT " at TID %s (State-Change result '%s')", PersistentFileSysObjName_ObjectName(fsObjName), persistentSerialNum, ItemPointerToString(persistentTid), PersistentFileSysObjStateChangeResult_Name(stateChangeResult)); return stateChangeResult; }
static void PersistentBuild_PopulateGpRelationNode( DatabaseInfo *info, Oid defaultTablespace, int64 *count) { Relation gp_relfile_node; int r; RelFileNode indexRelFileNode; bool indexFound; Relation gp_relfile_node_index; struct IndexInfo *indexInfo; if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "PersistentBuild_PopulateGpRelationNode: Enter for dbOid %u", info->database); MemSet(&indexRelFileNode, 0, sizeof(RelFileNode)); indexFound = false; gp_relfile_node = DirectOpen_GpRelfileNodeOpen( defaultTablespace, info->database); for (r = 0; r < info->dbInfoRelArrayCount; r++) { DbInfoRel *dbInfoRel = &info->dbInfoRelArray[r]; RelFileNode relFileNode; PersistentFileSysRelStorageMgr relStorageMgr; ItemPointerData persistentTid; int64 persistentSerialNum; if (dbInfoRel->reltablespace == GLOBALTABLESPACE_OID && info->database != TemplateDbOid) continue; relFileNode.spcNode = dbInfoRel->reltablespace; relFileNode.dbNode = (dbInfoRel->reltablespace == GLOBALTABLESPACE_OID ? 0 : info->database); relFileNode.relNode = dbInfoRel->relfilenodeOid; if (dbInfoRel->relationOid == GpRelfileNodeOidIndexId) { indexRelFileNode = relFileNode; indexFound = true; } relStorageMgr = ( (dbInfoRel->relstorage == RELSTORAGE_AOROWS || dbInfoRel->relstorage == RELSTORAGE_PARQUET) ? PersistentFileSysRelStorageMgr_AppendOnly : PersistentFileSysRelStorageMgr_BufferPool); /* * The gp_relation_node mapping table is empty, so use the physical files as * the guide. */ if (relStorageMgr == PersistentFileSysRelStorageMgr_BufferPool) { PersistentFileSysRelStorageMgr localRelStorageMgr; PersistentFileSysRelBufpoolKind relBufpoolKind; GpPersistentRelfileNode_GetRelfileInfo( dbInfoRel->relkind, dbInfoRel->relstorage, dbInfoRel->relam, &localRelStorageMgr, &relBufpoolKind); Assert(localRelStorageMgr == PersistentFileSysRelStorageMgr_BufferPool); /* * Heap tables only ever add a single segment_file_num=0 entry to * gp_persistent_relation regardless of how many segment files there * really are. */ PersistentRelfile_AddCreated( &relFileNode, /* segmentFileNum */ 0, relStorageMgr, relBufpoolKind, dbInfoRel->relname, &persistentTid, &persistentSerialNum, /* flushToXLog */ false); InsertGpRelfileNodeTuple( gp_relfile_node, dbInfoRel->relationOid, // pg_class OID dbInfoRel->relname, relFileNode.relNode, // pg_class relfilenode /* segmentFileNum */ 0, /* updateIndex */ false, &persistentTid, persistentSerialNum); } else { int a; int p; /* * Append-Only. */ /*if (dbInfoRel->physicalSegmentFilesCount == 0 || dbInfoRel->physicalSegmentFiles[0].segmentFileNum != 0) { elog(ERROR, "Physical segment file #0 missing for relation '%s'", dbInfoRel->relname); }*/ /* * Merge physical file existence and ao[cs]seg catalog logical EOFs . */ a = 0; for (p = 0; p < dbInfoRel->physicalSegmentFilesCount; p++) { int physicalSegmentFileNum = dbInfoRel->physicalSegmentFiles[p].segmentFileNum; bool haveCatalogInfo; int64 logicalEof; /* * There is mostly a 1:1 matching of physical files and logical * files and we just have to match them up correctly. However * there are several cases where this can diverge that we have * to be able to handle. * * 1) Segment file 0 always exists as a physical file, but is * only cataloged when it actually contains data - this only * occurs for ao when data is inserted in utility mode. * * 2) Files created in aborted transactions where an initial * frozen tuple never made it to disk may have a physical file * with no logical file. * XXX - These are leaked files that should probably be * cleaned up at some point. * * 3) It is possible to have files that logically exist with a * logical EOF of 0 but not exist in the filesystem. * XXX - how does this happen, is it really safe? */ logicalEof = 0; haveCatalogInfo = false; /* If we exhaust the loop then we are in case 2 */ while (a < dbInfoRel->appendOnlyCatalogSegmentInfoCount) { DbInfoAppendOnlyCatalogSegmentInfo *logicalSegInfo = \ &dbInfoRel->appendOnlyCatalogSegmentInfo[a]; /* Normal Case: both exist */ if (logicalSegInfo->segmentFileNum == physicalSegmentFileNum) { logicalEof = logicalSegInfo->logicalEof; haveCatalogInfo = true; a++; break; /* found */ } /* case 0 or case 2 */ else if (logicalSegInfo->segmentFileNum > physicalSegmentFileNum) { logicalEof = 0; haveCatalogInfo = false; break; /* not found */ } /* case 3 - skip over logical segments w/o physical files */ else if (logicalSegInfo->logicalEof == 0) { a++; continue; /* keep looking */ } /* otherwise it is an error */ else { elog(ERROR, "logical EOF greater than zero (" INT64_FORMAT ") for segment file #%d in relation '%s' but physical file is missing", logicalSegInfo->logicalEof, logicalSegInfo->segmentFileNum, dbInfoRel->relname); } /* unreachable */ Assert(false); } /* * case 2) Ignore segment file left over from pre-Release 4.0 aborted * transaction whose initial frozen ao[cs]seg tuple never made it to * disk. This will be a file that can result in an upgrade complaint... */ if (physicalSegmentFileNum > 0 && !haveCatalogInfo) continue; PersistentRelfile_AddCreated( &relFileNode, physicalSegmentFileNum, relStorageMgr, PersistentFileSysRelBufpoolKind_None, dbInfoRel->relname, &persistentTid, &persistentSerialNum, /* flushToXLog */ false); InsertGpRelfileNodeTuple( gp_relfile_node, dbInfoRel->relationOid, // pg_class OID dbInfoRel->relname, relFileNode.relNode, // pg_class relfilenode physicalSegmentFileNum, /* updateIndex */ false, &persistentTid, persistentSerialNum); } } (*count)++; } if (info->database != TemplateDbOid) { PersistentBuild_ScanGpPersistentRelationNodeForGlobal( gp_relfile_node, count); } /* * Build the index for gp_relation_node. * * The problem is the session we are using is associated with one particular database * of the cluster, but we need to iterate through all the databases. So, unfortunately, * the solution has been to use the "Direct Open" stuff. * * We do this because MyDatabaseId, the default tablespace of the session should not be * changed. The various caches and many other implicit things assume the object is for * MyDatabaseId and the default tablespace. For example, we cannot use * CatalogUpdateIndexes called in InsertGpRelationNodeTuple because it will not do * the right thing. * * Also, if they re-indexed gp_relation_node, it will have a different relfilenode and so we * must have found it (above) and open it with dynamically. */ Assert(indexFound); PersistentBuild_NonTransactionTruncate( &indexRelFileNode); gp_relfile_node_index = DirectOpen_GpRelfileNodeIndexOpenDynamic( GpRelfileNodeOidIndexId, indexRelFileNode.spcNode, indexRelFileNode.dbNode, indexRelFileNode.relNode); indexInfo = makeNode(IndexInfo); indexInfo->ii_NumIndexAttrs = Natts_gp_relfile_node_index; indexInfo->ii_KeyAttrNumbers[0] = 1; indexInfo->ii_KeyAttrNumbers[1] = 2; indexInfo->ii_KeyAttrNumbers[2] = 6; indexInfo->ii_Unique = true; if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "PersistentBuild_PopulateGpRelationNode: building gp_relfile_node_index %u/%u/%u for gp_relfile_node %u/%u/%u", gp_relfile_node_index->rd_node.spcNode, gp_relfile_node_index->rd_node.dbNode, gp_relfile_node_index->rd_node.relNode, gp_relfile_node->rd_node.spcNode, gp_relfile_node->rd_node.dbNode, gp_relfile_node->rd_node.relNode); index_build( gp_relfile_node, gp_relfile_node_index, indexInfo, false); DirectOpen_GpRelfileNodeIndexClose(gp_relfile_node_index); DirectOpen_GpRelfileNodeClose(gp_relfile_node); if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "PersistentBuild_PopulateGpRelationNode: Exit for dbOid %u", info->database); }
void PersistentRelfile_AddCreated( RelFileNode *relFileNode, /* The tablespace, database, and relation OIDs for the create. */ int32 segmentFileNum, PersistentFileSysRelStorageMgr relStorageMgr, PersistentFileSysRelBufpoolKind relBufpoolKind, char *relationName, ItemPointer persistentTid, /* Resulting TID of the gp_persistent_rel_files tuple for the relation. */ int64 *persistentSerialNum, /* Resulting serial number for the relation. Distinquishes the uses of the tuple. */ bool flushToXLog) /* When true, the XLOG record for this change will be flushed to disk. */ { WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE; PersistentFileSysObjName fsObjName; XLogRecPtr mirrorBufpoolResyncCkptLoc; ItemPointerData previousFreeTid; Datum values[Natts_gp_persistent_relfile_node]; if(RelFileNode_IsEmpty(relFileNode)) elog(ERROR, "Invalid RelFileNode (0,0,0)"); MemSet(&previousFreeTid, 0, sizeof(ItemPointerData)); MemSet(&mirrorBufpoolResyncCkptLoc, 0, sizeof(XLogRecPtr)); if (!Persistent_BeforePersistenceWork()) elog(ERROR, "We can only add to persistent meta-data when special states"); // Verify PersistentFileSysObj_BuildInitScan has been called. PersistentRelfile_VerifyInitScan(); PersistentFileSysObjName_SetRelationFile( &fsObjName, relFileNode, segmentFileNum, is_tablespace_shared); WRITE_PERSISTENT_STATE_ORDERED_LOCK; GpPersistentRelfileNode_SetDatumValues( values, relFileNode->spcNode, relFileNode->dbNode, relFileNode->relNode, segmentFileNum, relStorageMgr, PersistentFileSysState_Created, relBufpoolKind, InvalidTransactionId, /* persistentSerialNum */ 0, // This will be set by PersistentFileSysObj_AddTuple. &previousFreeTid, is_tablespace_shared(relFileNode->spcNode)); PersistentFileSysObj_AddTuple( PersistentFsObjType_RelationFile, values, flushToXLog, persistentTid, persistentSerialNum); WRITE_PERSISTENT_STATE_ORDERED_UNLOCK; if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Persistent relation: Add '%s', relation name '%s', in state 'Created', relation storage manager '%s', , serial number " INT64_FORMAT " at TID %s", PersistentFileSysObjName_ObjectName(&fsObjName), relationName, PersistentFileSysRelStorageMgr_Name(relStorageMgr), *persistentSerialNum, ItemPointerToString(persistentTid)); }
/* * Indicate we intend to create a relation file as part of the current transaction. * * An XLOG IntentToCreate record is generated that will guard the subsequent file-system * create in case the transaction aborts. * * After 1 or more calls to this routine to mark intention about relation files that are going * to be created, call ~_DoPendingCreates to do the actual file-system creates. (See its * note on XLOG flushing). */ void PersistentRelfile_AddCreatePending( RelFileNode *relFileNode, /* The tablespace, database, and relation OIDs for the create. */ int32 segmentFileNum, PersistentFileSysRelStorageMgr relStorageMgr, PersistentFileSysRelBufpoolKind relBufpoolKind, bool bufferPoolBulkLoad, char *relationName, ItemPointer persistentTid, /* Resulting TID of the gp_persistent_relation_files tuple for the relation. */ int64 *serialNum, /* Resulting serial number for the relation. Distinquishes the uses of the tuple. */ bool flushToXLog, /* When true, the XLOG record for this change will be flushed to disk. */ bool isLocalBuf) { WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE; PersistentFileSysObjName fsObjName; XLogRecPtr mirrorBufpoolResyncCkptLoc; ItemPointerData previousFreeTid; Datum values[Natts_gp_persistent_relfile_node]; if(RelFileNode_IsEmpty(relFileNode)) elog(ERROR, "Invalid RelFileNode (0,0,0)"); MemSet(&previousFreeTid, 0, sizeof(ItemPointerData)); MemSet(&mirrorBufpoolResyncCkptLoc, 0, sizeof(XLogRecPtr)); if (Persistent_BeforePersistenceWork()) { if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Skipping persistent relation '%s' because we are before persistence work", relpath(*relFileNode)); MemSet(persistentTid, 0, sizeof(ItemPointerData)); *serialNum = 0; return; // The initdb process will load the persistent table once we out of bootstrap mode. } PersistentRelfile_VerifyInitScan(); PersistentFileSysObjName_SetRelationFile( &fsObjName, relFileNode, segmentFileNum, is_tablespace_shared); WRITE_PERSISTENT_STATE_ORDERED_LOCK; GpPersistentRelfileNode_SetDatumValues( values, relFileNode->spcNode, relFileNode->dbNode, relFileNode->relNode, segmentFileNum, relStorageMgr, (bufferPoolBulkLoad ? PersistentFileSysState_BulkLoadCreatePending : PersistentFileSysState_CreatePending), relBufpoolKind, GetTopTransactionId(), /* persistentSerialNum */ 0, // This will be set by PersistentFileSysObj_AddTuple. &previousFreeTid, is_tablespace_shared(relFileNode->spcNode)); PersistentFileSysObj_AddTuple( PersistentFsObjType_RelationFile, values, flushToXLog, persistentTid, serialNum); /* * This XLOG must be generated under the persistent write-lock. */ #ifdef MASTER_MIRROR_SYNC mmxlog_log_create_relfilenode( relFileNode->spcNode, relFileNode->dbNode, relFileNode->relNode, segmentFileNum, persistentTid, serialNum); #endif #ifdef FAULT_INJECTOR FaultInjector_InjectFaultIfSet( FaultBeforePendingDeleteRelationEntry, DDLNotSpecified, "", // databaseName ""); // tableName #endif /* * MPP-18228 * To make adding 'Create Pending' entry to persistent table and adding * to the PendingDelete list atomic */ PendingDelete_AddCreatePendingRelationEntry( &fsObjName, persistentTid, serialNum, relStorageMgr, relationName, isLocalBuf, bufferPoolBulkLoad); WRITE_PERSISTENT_STATE_ORDERED_UNLOCK; if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Persistent relation: Add '%s', relation name '%s' in state 'Create Pending', relation storage manager '%s', serial number " INT64_FORMAT " at TID %s", PersistentFileSysObjName_ObjectName(&fsObjName), relationName, PersistentFileSysRelStorageMgr_Name(relStorageMgr), *serialNum, ItemPointerToString(persistentTid)); }
/* * Indicate we intend to create a filespace file as part of the current transaction. * * An XLOG IntentToCreate record is generated that will guard the subsequent file-system * create in case the transaction aborts. * * After 1 or more calls to this routine to mark intention about filespace files that are going * to be created, call ~_DoPendingCreates to do the actual file-system creates. (See its * note on XLOG flushing). */ void PersistentFilespace_MarkCreatePending( Oid filespaceOid, /* The filespace where the filespace lives. */ int16 primaryDbId, char *primaryFilespaceLocation, /* * The primary filespace directory path. NOT Blank padded. * Just a NULL terminated string. */ int16 mirrorDbId, char *mirrorFilespaceLocation, MirroredObjectExistenceState mirrorExistenceState, ItemPointer persistentTid, /* TID of the gp_persistent_rel_files tuple for the rel file */ int64 *persistentSerialNum, bool flushToXLog) /* When true, the XLOG record for this change will be flushed to disk. */ { WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE; PersistentFileSysObjName fsObjName; FilespaceDirEntry filespaceDirEntry; if (Persistent_BeforePersistenceWork()) { if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Skipping persistent filespace %u because we are before persistence work", filespaceOid); return; // The initdb process will load the persistent table once we out of bootstrap mode. } PersistentFilespace_VerifyInitScan(); PersistentFileSysObjName_SetFilespaceDir(&fsObjName,filespaceOid); WRITE_PERSISTENT_STATE_ORDERED_LOCK; filespaceDirEntry = PersistentFilespace_CreateDirUnderLock( filespaceOid); Assert(filespaceDirEntry != NULL); filespaceDirEntry->dbId1 = primaryDbId; PersistentFilespace_BlankPadCopyLocation( filespaceDirEntry->locationBlankPadded1, primaryFilespaceLocation); filespaceDirEntry->dbId2 = mirrorDbId; PersistentFilespace_BlankPadCopyLocation( filespaceDirEntry->locationBlankPadded2, mirrorFilespaceLocation); filespaceDirEntry->state = PersistentFileSysState_CreatePending; PersistentFilespace_AddTuple( filespaceDirEntry, /* createMirrorDataLossTrackingSessionNum */ 0, mirrorExistenceState, /* reserved */ 0, /* parentXid */ GetTopTransactionId(), flushToXLog); *persistentTid = filespaceDirEntry->persistentTid; *persistentSerialNum = filespaceDirEntry->persistentSerialNum; /* * This XLOG must be generated under the persistent write-lock. */ #ifdef MASTER_MIRROR_SYNC mmxlog_log_create_filespace(filespaceOid); #endif #ifdef FAULT_INJECTOR FaultInjector_InjectFaultIfSet( FaultBeforePendingDeleteFilespaceEntry, DDLNotSpecified, "", // databaseName ""); // tableName #endif /* * MPP-18228 * To make adding 'Create Pending' entry to persistent table and adding * to the PendingDelete list atomic */ PendingDelete_AddCreatePendingEntryWrapper( &fsObjName, persistentTid, *persistentSerialNum); WRITE_PERSISTENT_STATE_ORDERED_UNLOCK; if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Persistent filespace directory: Add '%s' in state 'Created', mirror existence state '%s', serial number " INT64_FORMAT " at TID %s", PersistentFileSysObjName_ObjectName(&fsObjName), MirroredObjectExistenceState_Name(mirrorExistenceState), *persistentSerialNum, ItemPointerToString(persistentTid)); }
/* * Indicate we intend to create a relation file as part of the current transaction. * * This function adds an entry in 'gp_persistent_relation_node' for either a new table (segment file * # 0) or a new segment file under AO table (segment file # > 0 for row/column-oriented AO) with a state * 'Create Pending'. An XLOG IntentToCreate record is generated that will guard the subsequent file-system * create in case the transaction aborts. * * Paramaters * ----------- * relFileNode = The tablespace, database, and relation OIDs for the create * segmentFileNum = As the name implies ( 0 for heap * >= 0 for RO/CO AO as applicable) * relStorageMgr = Persistent Relation storage Manager * relBufpoolKind = Buffer pool type beneath corrosponding relation * TODO bufferPollBulkLoad = ??? * TODO mirrorExistenceState = ??? * TODO relDataSynchronizationState = ??? * flushToXlog = If true, the XLOG record for this change will be flushed to disk. * TODO isLocalBuf = ??? * * Return * ------ * relationName = Name of the relation used for either debugging or to store in PendingDelete LL. * persistentTid = Resulting TID of the gp_persistent_rel_files tuple for the relation * serialNum = Resulting serial number for the relation. Distinquishes the uses of the tuple */ void PersistentRelation_AddCreatePending( RelFileNode *relFileNode, int32 segmentFileNum, PersistentFileSysRelStorageMgr relStorageMgr, PersistentFileSysRelBufpoolKind relBufpoolKind, bool bufferPoolBulkLoad, MirroredObjectExistenceState mirrorExistenceState, MirroredRelDataSynchronizationState relDataSynchronizationState, char *relationName, ItemPointer persistentTid, int64 *serialNum, bool flushToXLog, bool isLocalBuf) { WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE; PersistentFileSysObjName fsObjName; XLogRecPtr mirrorBufpoolResyncCkptLoc; Datum values[Natts_gp_persistent_relation_node]; if(RelFileNode_IsEmpty(relFileNode)) elog(ERROR, "Invalid RelFileNode (0,0,0)"); MemSet(&mirrorBufpoolResyncCkptLoc, 0, sizeof(XLogRecPtr)); if (Persistent_BeforePersistenceWork()) { if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Skipping persistent relation '%s' because we are before persistence work", relpath(*relFileNode)); MemSet(persistentTid, 0, sizeof(ItemPointerData)); *serialNum = 0; return; // The initdb process will load the persistent table once we out of bootstrap mode. } /* Verify if the needed shared mem data structures for persistent tables are setup and inited */ PersistentRelation_VerifyInitScan(); /* Setup the file system object name */ PersistentFileSysObjName_SetRelationFile( &fsObjName, relFileNode, segmentFileNum); WRITE_PERSISTENT_STATE_ORDERED_LOCK; /* Create a values array which will be used to create a 'gp_persistent_relation_node' tuple */ GpPersistentRelationNode_SetDatumValues( values, relFileNode->spcNode, relFileNode->dbNode, relFileNode->relNode, segmentFileNum, relStorageMgr, (bufferPoolBulkLoad ? PersistentFileSysState_BulkLoadCreatePending : PersistentFileSysState_CreatePending), /* createMirrorDataLossTrackingSessionNum */ 0, mirrorExistenceState, relDataSynchronizationState, /* mirrorBufpoolMarkedForScanIncrementalResync */ false, /* mirrorBufpoolResyncChangedPageCount */ 0, &mirrorBufpoolResyncCkptLoc, /* mirrorBufpoolResyncCkptBlockNum */ 0, /* mirrorAppendOnlyLossEof */ 0, /* mirrorAppendOnlyNewEof */ 0, relBufpoolKind, GetTopTransactionId(), /* persistentSerialNum */ 0); // This will be set by PersistentFileSysObj_AddTuple. /* Add a new tuple to 'gp_persistent_relation_node' table for the new relation/segment file * we intend to create. This will also create and apply a new persistent serial number. */ PersistentFileSysObj_AddTuple( PersistentFsObjType_RelationFile, values, flushToXLog, persistentTid, serialNum); /* * This XLOG must be generated under the persistent write-lock. */ #ifdef MASTER_MIRROR_SYNC mmxlog_log_create_relfilenode( relFileNode->spcNode, relFileNode->dbNode, relFileNode->relNode, segmentFileNum); #endif SIMPLE_FAULT_INJECTOR(FaultBeforePendingDeleteRelationEntry); /* We'll add an entry to the PendingDelete LinkedList (LL) to remeber what we * created in this transaction (or sub-transaction). If the transaction * aborts then we can search for all such entries in this LL and get rid of (delete) * such relations or segment files on the disk. * * MPP-18228 * To make adding 'Create Pending' entry to persistent table and adding * to the PendingDelete list atomic */ PendingDelete_AddCreatePendingRelationEntry( &fsObjName, persistentTid, serialNum, relStorageMgr, relationName, isLocalBuf, bufferPoolBulkLoad); WRITE_PERSISTENT_STATE_ORDERED_UNLOCK; if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Persistent relation: Add '%s', relation name '%s' in state 'Create Pending', relation storage manager '%s', mirror existence state '%s', relation data resynchronization state '%s', serial number " INT64_FORMAT " at TID %s", PersistentFileSysObjName_ObjectName(&fsObjName), relationName, PersistentFileSysRelStorageMgr_Name(relStorageMgr), MirroredObjectExistenceState_Name(mirrorExistenceState), MirroredRelDataSynchronizationState_Name(relDataSynchronizationState), *serialNum, ItemPointerToString(persistentTid)); }
Datum gp_persistent_build_all(PG_FUNCTION_ARGS) { bool mirrored = PG_GETARG_BOOL(0); Relation pg_filespace; Relation pg_tablespace; Relation pg_database; HeapScanDesc scan; HeapTuple tuple; Datum *d; bool *null; // UNDONE: Verify we are in some sort of single-user mode. // clear hash PersistentFilespace_Reset(); //clear dispatched file space info DispatchedFilespace_SeqSearch_Term(); /* * Re-build filespaces. */ d = (Datum *) palloc(sizeof(Datum) * Natts_pg_filespace); null = (bool *) palloc(sizeof(bool) * Natts_pg_filespace); pg_filespace = heap_open( FileSpaceRelationId, AccessShareLock); scan = heap_beginscan(pg_filespace, SnapshotNow, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { Oid filespaceOid; if (!HeapTupleIsValid(tuple)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("filespace tuple is invalid"))); filespaceOid = HeapTupleGetOid(tuple); heap_deform_tuple(tuple, pg_filespace->rd_att, d, null); if (filespaceOid == SYSTEMFILESPACE_OID) { if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "gp_persistent_build_all: skip pg_system filespaceOid %u", filespaceOid); continue; } if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "gp_persistent_build_all: filespaceOid %u", filespaceOid); /* * Special call here to scan the persistent meta-data structures so we are open for * business and then we can add information. */ PersistentFileSysObj_BuildInitScan(); PersistentFilespace_AddCreated( filespaceOid, /* flushToXLog */ false); } heap_endscan(scan); heap_close(pg_filespace, AccessShareLock); pfree(d); pfree(null); /* * Re-build tablespaces. */ d = (Datum *) palloc(sizeof(Datum) * Natts_pg_tablespace); null = (bool *) palloc(sizeof(bool) * Natts_pg_tablespace); pg_tablespace = heap_open( TableSpaceRelationId, AccessShareLock); scan = heap_beginscan(pg_tablespace, SnapshotNow, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { Oid tablespaceOid; if (!HeapTupleIsValid(tuple)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("tablespace tuple is invalid"))); tablespaceOid = HeapTupleGetOid(tuple); heap_deform_tuple(tuple, pg_tablespace->rd_att, d, null); if (tablespaceOid == DEFAULTTABLESPACE_OID || tablespaceOid == GLOBALTABLESPACE_OID) { if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "gp_persistent_build_all: skip pg_default and pg_global tablespaceOid %u", tablespaceOid); continue; } if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "gp_persistent_build_all: tablespaceOid %u filespaceOid %u", tablespaceOid, DatumGetInt32(d[Anum_pg_tablespace_spcfsoid - 1])); /* * Special call here to scan the persistent meta-data structures so we are open for * business and then we can add information. */ PersistentFileSysObj_BuildInitScan(); PersistentTablespace_AddCreated( DatumGetInt32(d[Anum_pg_tablespace_spcfsoid - 1]), tablespaceOid, /* flushToXLog */ false); } heap_endscan(scan); heap_close(pg_tablespace, AccessShareLock); pfree(d); pfree(null); /* * Re-build databases. * Do template1 first since it will also populate the shared-object persistent objects. */ PersistentBuild_BuildDb( TemplateDbOid, mirrored); if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "gp_persistent_build_all: template1 complete"); /* * Now, the remaining databases. */ pg_database = heap_open( DatabaseRelationId, AccessShareLock); scan = heap_beginscan(pg_database, SnapshotNow, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { Oid dbOid; dbOid = HeapTupleGetOid(tuple); if (dbOid == TemplateDbOid) { if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "gp_persistent_build_all: skip template1"); continue; } if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "gp_persistent_build_all: dbOid %u", dbOid); PersistentBuild_BuildDb( dbOid, mirrored); } heap_endscan(scan); heap_close(pg_database, AccessShareLock); PersistentStore_FlushXLog(); PG_RETURN_INT32(1); }
void PersistentRelation_AddCreated( RelFileNode *relFileNode, /* The tablespace, database, and relation OIDs for the create. */ int32 segmentFileNum, PersistentFileSysRelStorageMgr relStorageMgr, PersistentFileSysRelBufpoolKind relBufpoolKind, MirroredObjectExistenceState mirrorExistenceState, MirroredRelDataSynchronizationState relDataSynchronizationState, int64 mirrorAppendOnlyLossEof, int64 mirrorAppendOnlyNewEof, char *relationName, ItemPointer persistentTid, /* Resulting TID of the gp_persistent_rel_files tuple for the relation. */ int64 *persistentSerialNum, /* Resulting serial number for the relation. Distinquishes the uses of the tuple. */ bool flushToXLog) /* When true, the XLOG record for this change will be flushed to disk. */ { WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE; PersistentFileSysObjName fsObjName; XLogRecPtr mirrorBufpoolResyncCkptLoc; Datum values[Natts_gp_persistent_relation_node]; if(RelFileNode_IsEmpty(relFileNode)) elog(ERROR, "Invalid RelFileNode (0,0,0)"); MemSet(&mirrorBufpoolResyncCkptLoc, 0, sizeof(XLogRecPtr)); if (!Persistent_BeforePersistenceWork()) elog(ERROR, "We can only add to persistent meta-data when special states"); // Verify PersistentFileSysObj_BuildInitScan has been called. PersistentRelation_VerifyInitScan(); PersistentFileSysObjName_SetRelationFile( &fsObjName, relFileNode, segmentFileNum); WRITE_PERSISTENT_STATE_ORDERED_LOCK; GpPersistentRelationNode_SetDatumValues( values, relFileNode->spcNode, relFileNode->dbNode, relFileNode->relNode, segmentFileNum, relStorageMgr, PersistentFileSysState_Created, /* createMirrorDataLossTrackingSessionNum */ 0, mirrorExistenceState, relDataSynchronizationState, /* mirrorBufpoolMarkedForScanIncrementalResync */ false, /* mirrorBufpoolResyncChangedPageCount */ 0, &mirrorBufpoolResyncCkptLoc, /* mirrorBufpoolResyncCkptBlockNum */ 0, mirrorAppendOnlyLossEof, mirrorAppendOnlyNewEof, relBufpoolKind, InvalidTransactionId, /* persistentSerialNum */ 0); // This will be set by PersistentFileSysObj_AddTuple. PersistentFileSysObj_AddTuple( PersistentFsObjType_RelationFile, values, flushToXLog, persistentTid, persistentSerialNum); WRITE_PERSISTENT_STATE_ORDERED_UNLOCK; if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Persistent relation: Add '%s', relation name '%s', in state 'Created', relation storage manager '%s', mirror existence state '%s', relation data resynchronization state '%s', serial number " INT64_FORMAT " at TID %s", PersistentFileSysObjName_ObjectName(&fsObjName), relationName, PersistentFileSysRelStorageMgr_Name(relStorageMgr), MirroredObjectExistenceState_Name(mirrorExistenceState), MirroredRelDataSynchronizationState_Name(relDataSynchronizationState), *persistentSerialNum, ItemPointerToString(persistentTid)); }
static int64 PersistentBuild_TruncateAllGpRelationNode(void) { Relation pg_database; HeapScanDesc scan; HeapTuple tuple; int64 count; pg_database = heap_open( DatabaseRelationId, AccessShareLock); /* * Truncate gp_relation_node and its index in each database. */ scan = heap_beginscan(pg_database, SnapshotNow, 0, NULL); count = 0; while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { Form_pg_database form_pg_database = (Form_pg_database)GETSTRUCT(tuple); Oid dbOid; Oid dattablespace; RelFileNode relFileNode; SMgrRelation smgrRelation; Page btree_metapage; dbOid = HeapTupleGetOid(tuple); dattablespace = form_pg_database->dattablespace; if (dbOid == HcatalogDbOid) continue; if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "PersistentBuild_TruncateAllGpRelationNode: dbOid %u, '%s'", dbOid, form_pg_database->datname.data); if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Truncating gp_relation_node %u/%u/%u in database oid %u ('%s')", relFileNode.spcNode, relFileNode.dbNode, relFileNode.relNode, dbOid, form_pg_database->datname.data); relFileNode.spcNode = dattablespace; relFileNode.dbNode = dbOid; relFileNode.relNode = GpRelfileNodeRelationId; /* * Truncate WITHOUT generating an XLOG record (i.e. pretend it is a temp relation). */ PersistentBuild_NonTransactionTruncate(&relFileNode); count++; /* * And, the index. Unfortunately, the relfilenode OID can change due to a * REINDEX {TABLE|INDEX} command. */ PersistentBuild_FindGpRelationNodeIndex( dbOid, dattablespace, &relFileNode); if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Truncating gp_relation_node_index %u/%u/%u in database oid %u ('%s'). relfilenode different %s, tablespace different %s", relFileNode.spcNode, relFileNode.dbNode, relFileNode.relNode, dbOid, form_pg_database->datname.data, ((relFileNode.relNode != GpRelfileNodeOidIndexId) ? "true" : "false"), ((relFileNode.spcNode != dattablespace) ? "true" : "false")); PersistentBuild_NonTransactionTruncate(&relFileNode); // The BTree needs an empty meta-data block. smgrRelation = smgropen(relFileNode); btree_metapage = (Page)palloc(BLCKSZ); _bt_initmetapage(btree_metapage, P_NONE, 0); smgrwrite( smgrRelation, /* blockNum */ 0, (char*)btree_metapage, /* isTemp */ false); smgrimmedsync(smgrRelation); pfree(btree_metapage); smgrclose(smgrRelation); count++; } heap_endscan(scan); heap_close(pg_database, AccessShareLock); return count; }
static void DatabaseInfo_CollectPgAppendOnly( DatabaseInfo *info, HTAB *pgAppendOnlyHashTable) { Relation pg_appendonly_rel; HeapScanDesc scan; HeapTuple tuple; pg_appendonly_rel = DirectOpen_PgAppendOnlyOpen( info->defaultTablespace, info->database); scan = heap_beginscan(pg_appendonly_rel, SnapshotNow, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { bool nulls[Natts_pg_appendonly]; Datum values[Natts_pg_appendonly]; AppendOnlyEntry *aoEntry; Oid relationId; heap_deform_tuple(tuple, RelationGetDescr(pg_appendonly_rel), values, nulls); aoEntry = GetAppendOnlyEntryFromTuple( pg_appendonly_rel, RelationGetDescr(pg_appendonly_rel), tuple, &relationId); Assert(aoEntry != NULL); if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "DatabaseInfo_Collect: Append-Only entry for relation id %u, " "blocksize %d, safefswritesize %d, compresslevel %d, major version %d, minor version %d, " " checksum %s, compresstype %s, columnstore %s, segrelid %u, segidxid %u, blkdirrelid %u, blkdiridxid %u", relationId, aoEntry->blocksize, aoEntry->safefswritesize, aoEntry->compresslevel, aoEntry->majorversion, aoEntry->minorversion, (aoEntry->checksum ? "true" : "false"), (aoEntry->compresstype ? aoEntry->compresstype : "NULL"), (aoEntry->columnstore ? "true" : "false"), aoEntry->segrelid, aoEntry->segidxid, aoEntry->blkdirrelid, aoEntry->blkdiridxid); DatabaseInfo_AddPgAppendOnly( pgAppendOnlyHashTable, relationId, aoEntry); } heap_endscan(scan); DirectOpen_PgAppendOnlyClose(pg_appendonly_rel); }
static bool PersistentFilespace_ScanTupleCallback( ItemPointer persistentTid, int64 persistentSerialNum, Datum *values) { Oid filespaceOid; int16 dbId1; char locationBlankPadded1[FilespaceLocationBlankPaddedWithNullTermLen]; PersistentFileSysState state; int32 reserved; TransactionId parentXid; int64 serialNum; ItemPointerData previousFreeTid; FilespaceDirEntry filespaceDirEntry; bool sharedStorage; GpPersistentFilespaceNode_GetValues( values, &filespaceOid, &dbId1, locationBlankPadded1, &state, &reserved, &parentXid, &serialNum, &previousFreeTid, &sharedStorage); if (state == PersistentFileSysState_Free) { if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "PersistentFilespace_ScanTupleCallback: TID %s, serial number " INT64_FORMAT " is free", ItemPointerToString2(persistentTid), persistentSerialNum); return true; // Continue. } filespaceDirEntry = PersistentFilespace_CreateDirUnderLock( filespaceOid); if (filespaceDirEntry == NULL) elog(ERROR, "Out of shared-memory for persistent filespaces"); memcpy(filespaceDirEntry->locationBlankPadded1, locationBlankPadded1, FilespaceLocationBlankPaddedWithNullTermLen); filespaceDirEntry->state = state; filespaceDirEntry->persistentSerialNum = serialNum; filespaceDirEntry->persistentTid = *persistentTid; if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "PersistentFilespace_ScanTupleCallback: filespace %u, dbId1 %d, state '%s', TID %s, serial number " INT64_FORMAT, filespaceOid, dbId1, PersistentFileSysObjState_Name(state), ItemPointerToString2(persistentTid), persistentSerialNum); return true; // Continue. }
static void DatabaseInfo_HandleAppendOnly( DatabaseInfo *info, HTAB *dbInfoRelHashTable, HTAB *relationIdHashTable, HTAB *pgAppendOnlyHashTable) { HASH_SEQ_STATUS iterateStatus; hash_seq_init(&iterateStatus, dbInfoRelHashTable); while (true) { DbInfoRel *dbInfoRel; dbInfoRel = (DbInfoRel*) hash_seq_search(&iterateStatus); if (dbInfoRel == NULL) break; if (dbInfoRel->relstorage == RELSTORAGE_AOROWS || dbInfoRel->relstorage == RELSTORAGE_AOCOLS) { AppendOnlyEntry *aoEntry; DbInfoRel *aosegDbInfoRel; int i; aoEntry = DatabaseInfo_FindPgAppendOnly( pgAppendOnlyHashTable, dbInfoRel->relationOid); if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "DatabaseInfo_AddPgClassStoredRelation: Append-Only entry for relation id %u, relation name %s, " "blocksize %d, safefswritesize %d, compresslevel %d, major version %d, minor version %d, " " checksum %s, compresstype %s, columnstore %s, segrelid %u, segidxid %u, blkdirrelid %u, blkdiridxid %u, " " visimaprelid %u, visimapidxid %u", dbInfoRel->relationOid, dbInfoRel->relname, aoEntry->blocksize, aoEntry->safefswritesize, aoEntry->compresslevel, aoEntry->majorversion, aoEntry->minorversion, (aoEntry->checksum ? "true" : "false"), (aoEntry->compresstype ? aoEntry->compresstype : "NULL"), (aoEntry->columnstore ? "true" : "false"), aoEntry->segrelid, aoEntry->segidxid, aoEntry->blkdirrelid, aoEntry->blkdiridxid, aoEntry->visimaprelid, aoEntry->visimapidxid); /* * Translate the ao[cs]seg relation id to relfilenode. */ aosegDbInfoRel = DatabaseInfo_FindRelationId( relationIdHashTable, aoEntry->segrelid); Assert(aosegDbInfoRel != NULL); if (dbInfoRel->relstorage == RELSTORAGE_AOROWS) { FileSegInfo **aoSegfileArray; int totalAoSegFiles; Relation pg_aoseg_rel; pg_aoseg_rel = DirectOpen_PgAoSegOpenDynamic( aoEntry->segrelid, dbInfoRel->reltablespace, info->database, aosegDbInfoRel->relfilenodeOid); aoSegfileArray = GetAllFileSegInfo_pg_aoseg_rel( dbInfoRel->relname, aoEntry, pg_aoseg_rel, SnapshotNow, &totalAoSegFiles); for (i = 0; i < totalAoSegFiles; i++) { DatabaseInfo_AddAppendOnlyCatalogSegmentInfo( dbInfoRel, aoSegfileArray[i]->segno, aoSegfileArray[i]->eof); } DirectOpen_PgAoSegClose(pg_aoseg_rel); } else if (dbInfoRel->relstorage == RELSTORAGE_AOCOLS) { struct AOCSFileSegInfo **aocsSegfileArray; int totalAocsSegFiles; Relation pg_aocsseg_rel; pg_aocsseg_rel = DirectOpen_PgAoCsSegOpenDynamic( aoEntry->segrelid, dbInfoRel->reltablespace, info->database, aosegDbInfoRel->relfilenodeOid); aocsSegfileArray = GetAllAOCSFileSegInfo_pg_aocsseg_rel( dbInfoRel->relnatts, dbInfoRel->relname, aoEntry, pg_aocsseg_rel, SnapshotNow, &totalAocsSegFiles); for (i = 0; i < totalAocsSegFiles; i++) { int32 segmentFileNum; int columnNum; segmentFileNum = aocsSegfileArray[i]->segno; for (columnNum = 0; columnNum < dbInfoRel->relnatts; columnNum++) { AOCSVPInfoEntry *entry; entry = getAOCSVPEntry(aocsSegfileArray[i], columnNum); DatabaseInfo_AddAppendOnlyCatalogSegmentInfo( dbInfoRel, columnNum * AOTupleId_MultiplierSegmentFileNum + segmentFileNum, entry->eof); } } DirectOpen_PgAoCsSegClose(pg_aocsseg_rel); } } } }
/* * Indicate we intend to create a filespace file as part of the current transaction. * * An XLOG IntentToCreate record is generated that will guard the subsequent file-system * create in case the transaction aborts. * * After 1 or more calls to this routine to mark intention about filespace files that are going * to be created, call ~_DoPendingCreates to do the actual file-system creates. (See its * note on XLOG flushing). */ void PersistentFilespace_MarkCreatePending( Oid filespaceOid, char *filespaceLocation, ItemPointer persistentTid, int64 *persistentSerialNum, bool flushToXLog) { WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE; PersistentFileSysObjName fsObjName; FilespaceDirEntry filespaceDirEntry; if (Persistent_BeforePersistenceWork()) { if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Skipping persistent filespace %u because we are before persistence work", filespaceOid); return; // The initdb process will load the persistent table once we out of bootstrap mode. } PersistentFilespace_VerifyInitScan(); PersistentFileSysObjName_SetFilespaceDir(&fsObjName,filespaceOid,is_filespace_shared); WRITE_PERSISTENT_STATE_ORDERED_LOCK; filespaceDirEntry = PersistentFilespace_CreateDirUnderLock(filespaceOid); if (filespaceDirEntry == NULL) { /* If out of shared memory, no need to promote to PANIC. */ WRITE_PERSISTENT_STATE_ORDERED_UNLOCK; ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("Out of shared-memory for persistent filespaces"), errhint("You may need to increase the gp_max_filespaces value"), errOmitLocation(true))); } PersistentFilespace_BlankPadCopyLocation( filespaceDirEntry->locationBlankPadded1, filespaceLocation); filespaceDirEntry->state = PersistentFileSysState_CreatePending; PersistentFilespace_AddTuple( filespaceDirEntry, /* createMirrorDataLossTrackingSessionNum */ 0, /* reserved */ 0, /* parentXid */ GetTopTransactionId(), flushToXLog); *persistentTid = filespaceDirEntry->persistentTid; *persistentSerialNum = filespaceDirEntry->persistentSerialNum; /* * This XLOG must be generated under the persistent write-lock. */ #ifdef MASTER_MIRROR_SYNC mmxlog_log_create_filespace(filespaceOid); #endif #ifdef FAULT_INJECTOR FaultInjector_InjectFaultIfSet( FaultBeforePendingDeleteFilespaceEntry, DDLNotSpecified, "", // databaseName ""); // tableName #endif /* * MPP-18228 * To make adding 'Create Pending' entry to persistent table and adding * to the PendingDelete list atomic */ PendingDelete_AddCreatePendingEntryWrapper( &fsObjName, persistentTid, *persistentSerialNum); WRITE_PERSISTENT_STATE_ORDERED_UNLOCK; if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Persistent filespace directory: Add '%s' in state 'Created', serial number " INT64_FORMAT " at TID %s", PersistentFileSysObjName_ObjectName(&fsObjName), *persistentSerialNum, ItemPointerToString(persistentTid)); }
/* * Open a relation for mirrored write. */ static void MirroredBufferPool_DoOpen( MirroredBufferPoolOpen *open, /* The resulting open struct. */ RelFileNode *relFileNode, /* The tablespace, database, and relation OIDs for the open. */ uint32 segmentFileNum, /* Which segment file. */ char *relationName, /* For tracing only. Can be NULL in some execution paths. */ MirrorDataLossTrackingState mirrorDataLossTrackingState, int64 mirrorDataLossTrackingSessionNum, bool create, bool mirrorOnly, bool copyToMirror, int *primaryError, bool *mirrorDataLossOccurred) { int fileFlags = O_RDWR | PG_BINARY; int fileMode = 0600; /* * File mode is S_IRUSR 00400 user has read permission * + S_IWUSR 00200 user has write permission */ char *primaryFilespaceLocation = NULL; char *mirrorFilespaceLocation = NULL; Assert(open != NULL); *primaryError = 0; *mirrorDataLossOccurred = false; if (create) fileFlags = O_CREAT | O_RDWR | PG_BINARY; PersistentTablespace_GetPrimaryAndMirrorFilespaces( relFileNode->spcNode, &primaryFilespaceLocation, &mirrorFilespaceLocation); if (Debug_persistent_print && (create || mirrorOnly || copyToMirror)) { SUPPRESS_ERRCONTEXT_DECLARE; SUPPRESS_ERRCONTEXT_PUSH(); elog(Persistent_DebugPrintLevel(), "MirroredBufferPool_DoOpen: Special open %u/%u/%u --> create %s, mirrorOnly %s, copyToMirror %s, " "primary filespace location %s " "mirror filespace location %s ", relFileNode->spcNode, relFileNode->dbNode, relFileNode->relNode, (create ? "true" : "false"), (mirrorOnly ? "true" : "false"), (copyToMirror ? "true" : "false"), (primaryFilespaceLocation == NULL) ? "<null>" : primaryFilespaceLocation, (mirrorFilespaceLocation == NULL) ? "<null>" : mirrorFilespaceLocation); SUPPRESS_ERRCONTEXT_POP(); } MemSet(open, 0, sizeof(MirroredBufferPoolOpen)); open->primaryFile = -1; if (mirrorFilespaceLocation == NULL) sprintf(open->mirrorFilespaceLocation, "%s", ""); else sprintf(open->mirrorFilespaceLocation, "%s", mirrorFilespaceLocation); open->relFileNode = *relFileNode; open->segmentFileNum = segmentFileNum; open->create = create; open->mirrorOnly = mirrorOnly; open->copyToMirror = copyToMirror; MirroredBufferPool_SetUpMirrorAccess( relFileNode, segmentFileNum, relationName, mirrorDataLossTrackingState, mirrorDataLossTrackingSessionNum, /* primaryOnly */ false, mirrorOnly, &open->mirrorMode, &open->mirrorDataLossOccurred); if (StorageManagerMirrorMode_DoPrimaryWork(open->mirrorMode)) { char *dbPath; char *path; dbPath = (char*)palloc(MAXPGPATH + 1); path = (char*)palloc(MAXPGPATH + 1); /* * Do the primary work first so we don't leave files on the mirror or have an * open to clean up. */ FormDatabasePath( dbPath, primaryFilespaceLocation, relFileNode->spcNode, relFileNode->dbNode); if (segmentFileNum == 0) sprintf(path, "%s/%u", dbPath, relFileNode->relNode); else sprintf(path, "%s/%u.%u", dbPath, relFileNode->relNode, segmentFileNum); errno = 0; open->primaryFile = PathNameOpenFile(path, fileFlags, fileMode); if (open->primaryFile < 0) { *primaryError = errno; } pfree(dbPath); pfree(path); } if (StorageManagerMirrorMode_SendToMirror(open->mirrorMode) && *primaryError == 0 && !open->mirrorDataLossOccurred) { if (FileRepPrimary_MirrorOpen( FileRep_GetRelationIdentifier( open->mirrorFilespaceLocation, open->relFileNode, open->segmentFileNum), FileRepRelationTypeBufferPool, FILEREP_OFFSET_UNDEFINED, fileFlags, fileMode, TRUE /* supressError */) != 0) { if (Debug_filerep_print) ereport(LOG, (errmsg("could not sent file open request to mirror "), FileRep_ReportRelationPath( open->mirrorFilespaceLocation, open->relFileNode, open->segmentFileNum))); } open->mirrorDataLossOccurred = FileRepPrimary_IsMirrorDataLossOccurred(); } if (*primaryError != 0) { open->isActive = false; } else if (StorageManagerMirrorMode_DoPrimaryWork(open->mirrorMode)) { open->isActive = true; } else if (StorageManagerMirrorMode_SendToMirror(open->mirrorMode) && !open->mirrorDataLossOccurred) { open->isActive = true; } *mirrorDataLossOccurred = open->mirrorDataLossOccurred; if (primaryFilespaceLocation != NULL) pfree(primaryFilespaceLocation); if (mirrorFilespaceLocation != NULL) pfree(mirrorFilespaceLocation); }
// ----------------------------------------------------------------------------- // Rebuild filespace persistent table 'gp_persistent_filespace_node' // ----------------------------------------------------------------------------- void PersistentFilespace_AddCreated( Oid filespaceOid, /* The filespace OID to be added. */ bool flushToXLog) /* When true, the XLOG record for this change will be flushed to disk. */ { WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE; PersistentFileSysObjName fsObjName; ItemPointerData persistentTid; int64 persistentSerialNum; FilespaceDirEntry filespaceDirEntry; /*if (Persistent_BeforePersistenceWork()) { if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Skipping persistent filespace %u because we are before persistence work", filespaceOid); return; // The initdb process will load the persistent table once we out of bootstrap mode. }*/ PersistentFilespace_VerifyInitScan(); PersistentFileSysObjName_SetFilespaceDir(&fsObjName,filespaceOid,is_filespace_shared); WRITE_PERSISTENT_STATE_ORDERED_LOCK; filespaceDirEntry = PersistentFilespace_CreateDirUnderLock(filespaceOid); if (filespaceDirEntry == NULL) { /* If out of shared memory, no need to promote to PANIC. */ WRITE_PERSISTENT_STATE_ORDERED_UNLOCK; ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("Out of shared-memory for persistent filespaces"), errhint("You may need to increase the gp_max_filespaces value"), errOmitLocation(true))); } // if it is a new generated one, we need to set info from pg_filespace_entry if(filespaceDirEntry->persistentSerialNum==0 || strlen(filespaceDirEntry->locationBlankPadded1)==0) { Relation pg_fs_entry_rel; HeapScanDesc scandesc; HeapTuple tuple; ScanKeyData entry[1]; bool isNull; Datum locDatum; char *loc; /* Lookup the information for the current pg_filespace_entry */ pg_fs_entry_rel = heap_open(FileSpaceEntryRelationId, AccessShareLock); ScanKeyInit(&entry[0], Anum_pg_filespace_entry_fsefsoid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(filespaceOid)); scandesc = heap_beginscan(pg_fs_entry_rel, SnapshotNow, 1, entry); tuple = heap_getnext(scandesc, ForwardScanDirection); /* We assume that there can be at most one matching tuple */ if (!HeapTupleIsValid(tuple)) { elog(ERROR, "filespace %u could not be found in pg_filespace_entry", filespaceOid); } locDatum = heap_getattr(tuple, Anum_pg_filespace_entry_fselocation, pg_fs_entry_rel->rd_att, &isNull); loc = TextDatumGetCString(locDatum); //convert location with blank padded memset(filespaceDirEntry->locationBlankPadded1, ' ', FilespaceLocationBlankPaddedWithNullTermLen); filespaceDirEntry->locationBlankPadded1[FilespaceLocationBlankPaddedWithNullTermLen-1]='\0'; memcpy(filespaceDirEntry->locationBlankPadded1, loc, strlen(loc)); if(isNull) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("internal error: filespace '%u' has no name defined", filespaceOid))); heap_endscan(scandesc); heap_close(pg_fs_entry_rel, AccessShareLock); } filespaceDirEntry->state = PersistentFileSysState_Created; PersistentFilespace_AddTuple( filespaceDirEntry, /* createMirrorDataLossTrackingSessionNum */ 0, /* reserved */ 0, /* parentXid */ InvalidTransactionId, flushToXLog); persistentTid = filespaceDirEntry->persistentTid; persistentSerialNum = filespaceDirEntry->persistentSerialNum; WRITE_PERSISTENT_STATE_ORDERED_UNLOCK; if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Persistent filespace directory: Add '%s' in state 'Created', serial number " INT64_FORMAT " at TID '%s' ", PersistentFileSysObjName_ObjectName(&fsObjName), persistentSerialNum, ItemPointerToString(&persistentTid)); }
bool MirroredBufferPool_EvaluateBulkLoadFinish( MirroredBufferPoolBulkLoadInfo *bulkLoadInfo) { MIRROREDLOCK_BUFMGR_DECLARE; MirrorDataLossTrackingState mirrorDataLossTrackingState; int64 mirrorDataLossTrackingSessionNum; bool bulkLoadFinished; // -------- MirroredLock ---------- MIRROREDLOCK_BUFMGR_LOCK; /* * Make this call while under the MirroredLock (unless we are a resync worker). */ mirrorDataLossTrackingState = FileRepPrimary_GetMirrorDataLossTrackingSessionNum( &mirrorDataLossTrackingSessionNum); bulkLoadFinished = false; // Assume. if (mirrorDataLossTrackingSessionNum == bulkLoadInfo->mirrorDataLossTrackingSessionNum) { if (bulkLoadInfo->mirrorDataLossTrackingState == MirrorDataLossTrackingState_MirrorDown && mirrorDataLossTrackingState != bulkLoadInfo->mirrorDataLossTrackingState) { /* * We started with the mirror down and ended with the mirror up in either * Resynchronized or Synchronized state (or not configured). * * So, there was mirror data loss. */ } else { bulkLoadFinished = true; } } if (bulkLoadFinished) { if (Debug_persistent_print) { SUPPRESS_ERRCONTEXT_DECLARE; SUPPRESS_ERRCONTEXT_PUSH(); elog(Persistent_DebugPrintLevel(), "MirroredBufferPool_EvaluateBulkLoadFinish %u/%u/%u: no change -- mirror stayed up whole time. Mirror data loss tracking (state '%s', session num " INT64_FORMAT "), persistent serial num " INT64_FORMAT ", TID %s", bulkLoadInfo->relFileNode.spcNode, bulkLoadInfo->relFileNode.dbNode, bulkLoadInfo->relFileNode.relNode, MirrorDataLossTrackingState_Name(bulkLoadInfo->mirrorDataLossTrackingState), bulkLoadInfo->mirrorDataLossTrackingSessionNum, bulkLoadInfo->persistentSerialNum, ItemPointerToString(&bulkLoadInfo->persistentTid)); SUPPRESS_ERRCONTEXT_POP(); } } else { if (Debug_persistent_print) { SUPPRESS_ERRCONTEXT_DECLARE; SUPPRESS_ERRCONTEXT_PUSH(); elog(Persistent_DebugPrintLevel(), "MirroredBufferPool_EvaluateBulkLoadFinish %u/%u/%u: change. Original mirror data loss tracking (state '%s', session num " INT64_FORMAT "), new mirror data loss tracking (state %d, session num " INT64_FORMAT "), persistent serial num " INT64_FORMAT ", TID %s", bulkLoadInfo->relFileNode.spcNode, bulkLoadInfo->relFileNode.dbNode, bulkLoadInfo->relFileNode.relNode, MirrorDataLossTrackingState_Name(bulkLoadInfo->mirrorDataLossTrackingState), bulkLoadInfo->mirrorDataLossTrackingSessionNum, mirrorDataLossTrackingState, mirrorDataLossTrackingSessionNum, bulkLoadInfo->persistentSerialNum, ItemPointerToString(&bulkLoadInfo->persistentTid)); SUPPRESS_ERRCONTEXT_POP(); } switch (mirrorDataLossTrackingState) { case MirrorDataLossTrackingState_MirrorNotConfigured: bulkLoadFinished = true; break; case MirrorDataLossTrackingState_MirrorCurrentlyUpInSync: case MirrorDataLossTrackingState_MirrorCurrentlyUpInResync: bulkLoadFinished = false; // We lost data along the way. break; case MirrorDataLossTrackingState_MirrorDown: bulkLoadFinished = true; // Give the problem to resync. break; default: elog(ERROR, "Unexpected mirror data loss tracking state: %d", mirrorDataLossTrackingState); bulkLoadFinished = false; // A happy optimizer is the sound of one hand clapping. } } if (bulkLoadFinished) { PersistentRelation_FinishBufferPoolBulkLoad( &bulkLoadInfo->relFileNode, &bulkLoadInfo->persistentTid, bulkLoadInfo->persistentSerialNum); } else { /* * Save new information so caller can copy to mirror and reevaluate again. */ bulkLoadInfo->mirrorDataLossTrackingState = mirrorDataLossTrackingState; bulkLoadInfo->mirrorDataLossTrackingSessionNum = mirrorDataLossTrackingSessionNum; } MIRROREDLOCK_BUFMGR_UNLOCK; // -------- MirroredLock ---------- return bulkLoadFinished; }
/* * Indicate we are aborting the create of a filespace file. * * This state will make sure the filespace gets dropped after a system crash. */ PersistentFileSysObjStateChangeResult PersistentFilespace_MarkAbortingCreate( PersistentFileSysObjName *fsObjName, /* The filespace OID for the aborting create. */ ItemPointer persistentTid, /* TID of the gp_persistent_rel_files tuple for the rel file */ int64 persistentSerialNum, /* Serial number for the filespace. Distinquishes the uses of the tuple. */ bool retryPossible) { WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE; Oid filespaceOid = fsObjName->variant.filespaceOid; FilespaceDirEntry filespaceDirEntry; PersistentFileSysObjStateChangeResult stateChangeResult; if (Persistent_BeforePersistenceWork()) { if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Skipping persistent filespace %u because we are before persistence work", filespaceOid); return false; // The initdb process will load the persistent table once we out of bootstrap mode. } PersistentFilespace_VerifyInitScan(); WRITE_PERSISTENT_STATE_ORDERED_LOCK; filespaceDirEntry = PersistentFilespace_FindDirUnderLock( filespaceOid); if (filespaceDirEntry == NULL) elog(ERROR, "Did not find persistent filespace entry %u", filespaceOid); if (filespaceDirEntry->state != PersistentFileSysState_CreatePending) elog(ERROR, "Persistent filespace entry %u expected to be in 'Create Pending' (actual state '%s')", filespaceOid, PersistentFileSysObjState_Name(filespaceDirEntry->state)); stateChangeResult = PersistentFileSysObj_StateChange( fsObjName, persistentTid, persistentSerialNum, PersistentFileSysState_AbortingCreate, retryPossible, /* flushToXlog */ false, /* oldState */ NULL, /* verifiedActionCallback */ NULL); filespaceDirEntry->state = PersistentFileSysState_AbortingCreate; WRITE_PERSISTENT_STATE_ORDERED_UNLOCK; if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Persistent filespace directory: '%s' changed state from 'Create Pending' to 'Aborting Create', serial number " INT64_FORMAT " at TID %s (State-Change result '%s')", PersistentFileSysObjName_ObjectName(fsObjName), persistentSerialNum, ItemPointerToString(persistentTid), PersistentFileSysObjStateChangeResult_Name(stateChangeResult)); return stateChangeResult; }
static bool PersistentFilespace_ScanTupleCallback( ItemPointer persistentTid, int64 persistentSerialNum, Datum *values) { Oid filespaceOid; int16 dbId1; char locationBlankPadded1[FilespaceLocationBlankPaddedWithNullTermLen]; int16 dbId2; char locationBlankPadded2[FilespaceLocationBlankPaddedWithNullTermLen]; PersistentFileSysState state; int64 createMirrorDataLossTrackingSessionNum; MirroredObjectExistenceState mirrorExistenceState; int32 reserved; TransactionId parentXid; int64 serialNum; ItemPointerData previousFreeTid; FilespaceDirEntry filespaceDirEntry; GpPersistentFilespaceNode_GetValues( values, &filespaceOid, &dbId1, locationBlankPadded1, &dbId2, locationBlankPadded2, &state, &createMirrorDataLossTrackingSessionNum, &mirrorExistenceState, &reserved, &parentXid, &serialNum, &previousFreeTid); if (state == PersistentFileSysState_Free) { if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "PersistentFilespace_ScanTupleCallback: TID %s, serial number " INT64_FORMAT " is free", ItemPointerToString2(persistentTid), persistentSerialNum); return true; // Continue. } filespaceDirEntry = PersistentFilespace_CreateDirUnderLock( filespaceOid); filespaceDirEntry->dbId1 = dbId1; memcpy(filespaceDirEntry->locationBlankPadded1, locationBlankPadded1, FilespaceLocationBlankPaddedWithNullTermLen); filespaceDirEntry->dbId2 = dbId2; memcpy(filespaceDirEntry->locationBlankPadded2, locationBlankPadded2, FilespaceLocationBlankPaddedWithNullTermLen); filespaceDirEntry->state = state; filespaceDirEntry->persistentSerialNum = serialNum; filespaceDirEntry->persistentTid = *persistentTid; if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "PersistentFilespace_ScanTupleCallback: filespace %u, dbId1 %d, dbId2 %d, state '%s', mirror existence state '%s', TID %s, serial number " INT64_FORMAT, filespaceOid, dbId1, dbId2, PersistentFileSysObjState_Name(state), MirroredObjectExistenceState_Name(mirrorExistenceState), ItemPointerToString2(persistentTid), persistentSerialNum); return true; // Continue. }
static bool PersistentFilespace_ScanTupleCallback( ItemPointer persistentTid, int64 persistentSerialNum, Datum *values) { Oid filespaceOid; int16 dbId1; char locationBlankPadded1[FilespaceLocationBlankPaddedWithNullTermLen]; int16 dbId2; char locationBlankPadded2[FilespaceLocationBlankPaddedWithNullTermLen]; PersistentFileSysState state; int64 createMirrorDataLossTrackingSessionNum; MirroredObjectExistenceState mirrorExistenceState; int32 reserved; TransactionId parentXid; int64 serialNum; FilespaceDirEntry filespaceDirEntry; GpPersistentFilespaceNode_GetValues( values, &filespaceOid, &dbId1, locationBlankPadded1, &dbId2, locationBlankPadded2, &state, &createMirrorDataLossTrackingSessionNum, &mirrorExistenceState, &reserved, &parentXid, &serialNum); /* * Normally we would acquire this lock with the WRITE_FILESPACE_HASH_LOCK * macro, however, this particular function can be called during startup. * During startup, which executes in a single threaded context, no * PersistentObjLock exists and we cannot assert that we're holding it. */ LWLockAcquire(FilespaceHashLock, LW_EXCLUSIVE); filespaceDirEntry = PersistentFilespace_CreateDirUnderLock(filespaceOid); filespaceDirEntry->dbId1 = dbId1; memcpy(filespaceDirEntry->locationBlankPadded1, locationBlankPadded1, FilespaceLocationBlankPaddedWithNullTermLen); filespaceDirEntry->dbId2 = dbId2; memcpy(filespaceDirEntry->locationBlankPadded2, locationBlankPadded2, FilespaceLocationBlankPaddedWithNullTermLen); filespaceDirEntry->state = state; filespaceDirEntry->persistentSerialNum = serialNum; filespaceDirEntry->persistentTid = *persistentTid; LWLockRelease(FilespaceHashLock); if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "PersistentFilespace_ScanTupleCallback: filespace %u, dbId1 %d, dbId2 %d, state '%s', mirror existence state '%s', TID %s, serial number " INT64_FORMAT, filespaceOid, dbId1, dbId2, PersistentFileSysObjState_Name(state), MirroredObjectExistenceState_Name(mirrorExistenceState), ItemPointerToString2(persistentTid), persistentSerialNum); return true; /* Continue. */ }