void PersistentRelation_AddCreated( RelFileNode *relFileNode, /* The tablespace, database, and relation OIDs for the create. */ int32 segmentFileNum, PersistentFileSysRelStorageMgr relStorageMgr, PersistentFileSysRelBufpoolKind relBufpoolKind, MirroredObjectExistenceState mirrorExistenceState, MirroredRelDataSynchronizationState relDataSynchronizationState, int64 mirrorAppendOnlyLossEof, int64 mirrorAppendOnlyNewEof, char *relationName, ItemPointer persistentTid, /* Resulting TID of the gp_persistent_rel_files tuple for the relation. */ int64 *persistentSerialNum, /* Resulting serial number for the relation. Distinquishes the uses of the tuple. */ bool flushToXLog) /* When true, the XLOG record for this change will be flushed to disk. */ { WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE; PersistentFileSysObjName fsObjName; XLogRecPtr mirrorBufpoolResyncCkptLoc; ItemPointerData previousFreeTid; Datum values[Natts_gp_persistent_relation_node]; if(RelFileNode_IsEmpty(relFileNode)) elog(ERROR, "Invalid RelFileNode (0,0,0)"); MemSet(&previousFreeTid, 0, sizeof(ItemPointerData)); MemSet(&mirrorBufpoolResyncCkptLoc, 0, sizeof(XLogRecPtr)); if (!Persistent_BeforePersistenceWork()) elog(ERROR, "We can only add to persistent meta-data when special states"); // Verify PersistentFileSysObj_BuildInitScan has been called. PersistentRelation_VerifyInitScan(); PersistentFileSysObjName_SetRelationFile( &fsObjName, relFileNode, segmentFileNum); WRITE_PERSISTENT_STATE_ORDERED_LOCK; GpPersistentRelationNode_SetDatumValues( values, relFileNode->spcNode, relFileNode->dbNode, relFileNode->relNode, segmentFileNum, relStorageMgr, PersistentFileSysState_Created, /* createMirrorDataLossTrackingSessionNum */ 0, mirrorExistenceState, relDataSynchronizationState, /* mirrorBufpoolMarkedForScanIncrementalResync */ false, /* mirrorBufpoolResyncChangedPageCount */ 0, &mirrorBufpoolResyncCkptLoc, /* mirrorBufpoolResyncCkptBlockNum */ 0, mirrorAppendOnlyLossEof, mirrorAppendOnlyNewEof, relBufpoolKind, InvalidTransactionId, /* persistentSerialNum */ 0, // This will be set by PersistentFileSysObj_AddTuple. &previousFreeTid); PersistentFileSysObj_AddTuple( PersistentFsObjType_RelationFile, values, flushToXLog, persistentTid, persistentSerialNum); WRITE_PERSISTENT_STATE_ORDERED_UNLOCK; if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Persistent relation: Add '%s', relation name '%s', in state 'Created', relation storage manager '%s', mirror existence state '%s', relation data resynchronization state '%s', serial number " INT64_FORMAT " at TID %s", PersistentFileSysObjName_ObjectName(&fsObjName), relationName, PersistentFileSysRelStorageMgr_Name(relStorageMgr), MirroredObjectExistenceState_Name(mirrorExistenceState), MirroredRelDataSynchronizationState_Name(relDataSynchronizationState), *persistentSerialNum, ItemPointerToString(persistentTid)); }
static int FileRepPrimary_ResyncWrite(FileRepResyncHashEntry_s *entry) { int status = STATUS_OK; Page page; Buffer buf; BlockNumber numBlocks; BlockNumber blkno; SMgrRelation smgr_relation; char relidstr[OIDCHARS + 1 + OIDCHARS + 1 + OIDCHARS + 1]; XLogRecPtr loc; int count = 0; int thresholdCount = 0; bool mirrorDataLossOccurred = FALSE; switch (entry->relStorageMgr) { case PersistentFileSysRelStorageMgr_BufferPool: switch (entry->mirrorDataSynchronizationState) { case MirroredRelDataSynchronizationState_BufferPoolScanIncremental: case MirroredRelDataSynchronizationState_FullCopy: smgr_relation = smgropen(entry->relFileNode); numBlocks = smgrnblocks(smgr_relation); snprintf(relidstr, sizeof(relidstr), "%u/%u/%u", smgr_relation->smgr_rnode.spcNode, smgr_relation->smgr_rnode.dbNode, smgr_relation->smgr_rnode.relNode); if (Debug_filerep_print) elog(LOG, "resync buffer pool relation '%s' number of blocks '%d' ", relidstr, numBlocks); thresholdCount = Min(numBlocks, 1024); /* * required in order to report how many blocks were synchronized * if gp_persistent_relation_node does not return that information */ if (entry->mirrorBufpoolResyncChangedPageCount == 0) { entry->mirrorBufpoolResyncChangedPageCount = numBlocks - entry->mirrorBufpoolResyncCkptBlockNum; } for (blkno = entry->mirrorBufpoolResyncCkptBlockNum; blkno < numBlocks; blkno++) { XLogRecPtr endResyncLSN = (isFullResync() ? FileRepResync_GetEndFullResyncLSN() : FileRepResync_GetEndIncrResyncLSN()); #ifdef FAULT_INJECTOR FaultInjector_InjectFaultIfSet( FileRepResyncWorkerRead, DDLNotSpecified, "", //databaseName ""); // tableName #endif FileRepResync_SetReadBufferRequest(); buf = ReadBuffer_Resync(smgr_relation, blkno, relidstr); FileRepResync_ResetReadBufferRequest(); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); page = BufferGetPage(buf); loc = PageGetLSN(page); if (Debug_filerep_print) { elog(LOG, "full resync buffer pool identifier '%s' num blocks '%d' blkno '%d' lsn begin change tracking '%s(%u/%u)' " "lsn page '%s(%u/%u)' lsn end change tracking '%s(%u/%u)' ", relidstr, numBlocks, blkno, XLogLocationToString(&entry->mirrorBufpoolResyncCkptLoc), entry->mirrorBufpoolResyncCkptLoc.xlogid, entry->mirrorBufpoolResyncCkptLoc.xrecoff, XLogLocationToString(&loc), loc.xlogid, loc.xrecoff, XLogLocationToString(&endResyncLSN), endResyncLSN.xlogid, endResyncLSN.xrecoff); } else { char tmpBuf[FILEREP_MAX_LOG_DESCRIPTION_LEN]; snprintf(tmpBuf, sizeof(tmpBuf), "full resync buffer pool identifier '%s' num blocks '%d' blkno '%d' lsn begin change tracking '%s(%u/%u)' ", relidstr, numBlocks, blkno, XLogLocationToString(&entry->mirrorBufpoolResyncCkptLoc), entry->mirrorBufpoolResyncCkptLoc.xlogid, entry->mirrorBufpoolResyncCkptLoc.xrecoff); FileRep_InsertConfigLogEntry(tmpBuf); snprintf(tmpBuf, sizeof(tmpBuf), "full resync buffer pool identifier '%s' lsn page '%s(%u/%u)' lsn end change tracking '%s(%u/%u)' ", relidstr, XLogLocationToString(&loc), loc.xlogid, loc.xrecoff, XLogLocationToString(&endResyncLSN), endResyncLSN.xlogid, endResyncLSN.xrecoff); FileRep_InsertConfigLogEntry(tmpBuf); } if (XLByteLE(PageGetLSN(page), endResyncLSN) && XLByteLE(entry->mirrorBufpoolResyncCkptLoc, PageGetLSN(page))) { smgrwrite(smgr_relation, blkno, (char *)BufferGetBlock(buf), FALSE); } #ifdef FAULT_INJECTOR FaultInjector_InjectFaultIfSet( FileRepResyncWorker, DDLNotSpecified, "", // databaseName ""); // tableName #endif UnlockReleaseBuffer(buf); if (count > thresholdCount) { count = 0; FileRepSubProcess_ProcessSignals(); if (! (FileRepSubProcess_GetState() == FileRepStateReady && dataState == DataStateInResync)) { mirrorDataLossOccurred = TRUE; break; } } else count++; } if (mirrorDataLossOccurred) break; if (entry->mirrorDataSynchronizationState != MirroredRelDataSynchronizationState_FullCopy) { LockRelationForResyncExtension(&smgr_relation->smgr_rnode, ExclusiveLock); numBlocks = smgrnblocks(smgr_relation); smgrtruncate(smgr_relation, numBlocks, TRUE /* isTemp, TRUE means to not record in XLOG */, FALSE /* isLocalBuf */, &entry->persistentTid, entry->persistentSerialNum); UnlockRelationForResyncExtension(&smgr_relation->smgr_rnode, ExclusiveLock); } smgrimmedsync(smgr_relation); smgrclose(smgr_relation); smgr_relation = NULL; break; case MirroredRelDataSynchronizationState_None: case MirroredRelDataSynchronizationState_DataSynchronized: break; default: ereport(LOG, (errmsg("could not resynchronize relation '%u/%u/%u' " "mirror synchronization state:'%s(%d)' ", entry->relFileNode.relNode, entry->relFileNode.spcNode, entry->relFileNode.dbNode, MirroredRelDataSynchronizationState_Name(entry->mirrorDataSynchronizationState), entry->mirrorDataSynchronizationState))); break; } break; case PersistentFileSysRelStorageMgr_AppendOnly: { MirroredAppendOnlyOpen mirroredOpen; int primaryError; bool mirrorDataLossOccurred; char *buffer = NULL; int64 endOffset = entry->mirrorAppendOnlyNewEof; int64 startOffset = entry->mirrorAppendOnlyLossEof; int32 bufferLen = 0; int retval = 0; switch (entry->mirrorDataSynchronizationState) { case MirroredRelDataSynchronizationState_AppendOnlyCatchup: case MirroredRelDataSynchronizationState_FullCopy: /* * required in order to report how many blocks were synchronized * if gp_persistent_relation_node does not return that information */ if (entry->mirrorBufpoolResyncChangedPageCount == 0) { entry->mirrorBufpoolResyncChangedPageCount = (endOffset - startOffset) / BLCKSZ; } /* * The MirroredAppendOnly_OpenResynchonize routine knows we are a resynch worker and * will open BOTH, but write only the MIRROR!!! */ MirroredAppendOnly_OpenResynchonize( &mirroredOpen, &entry->relFileNode, entry->segmentFileNum, startOffset, &primaryError, &mirrorDataLossOccurred); if (primaryError != 0) { ereport(ERROR, (errcode_for_file_access(), errmsg("could not open file %u/%u/%u.%u : %s", entry->relFileNode.dbNode, entry->relFileNode.spcNode, entry->relFileNode.relNode, entry->segmentFileNum, strerror(primaryError)))); break; } if (mirrorDataLossOccurred) break; /* AO and CO Data Store writes 64k size by default */ bufferLen = (Size) Min(2*BLCKSZ, endOffset - startOffset); buffer = (char*) palloc(bufferLen); if (buffer == NULL) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), (errmsg("not enough memory for resynchronization")))); MemSet(buffer, 0, bufferLen); while (startOffset < endOffset) { retval = MirroredAppendOnly_Read( &mirroredOpen, buffer, bufferLen); if (retval != bufferLen) { ereport(ERROR, (errcode_for_file_access(), errmsg("could not read from position:" INT64_FORMAT " in file %u/%u/%u.%u : %m", startOffset, entry->relFileNode.dbNode, entry->relFileNode.spcNode, entry->relFileNode.relNode, entry->segmentFileNum))); break; } MirroredAppendOnly_Append( &mirroredOpen, buffer, bufferLen, &primaryError, &mirrorDataLossOccurred); if (mirrorDataLossOccurred) break; Assert(primaryError == 0); // No primary writes as resync worker. startOffset += bufferLen; /* AO and CO Data Store writes 64k size by default */ bufferLen = (Size) Min(2*BLCKSZ, endOffset - startOffset); } if (buffer) { pfree(buffer); buffer = NULL; } if (mirrorDataLossOccurred) break; /* Flush written data on Mirror */ MirroredAppendOnly_Flush( &mirroredOpen, &primaryError, &mirrorDataLossOccurred); if (mirrorDataLossOccurred) break; Assert(primaryError == 0); // Not flushed on primary as resync worker. /* Close Primary and Mirror */ MirroredAppendOnly_Close( &mirroredOpen, &mirrorDataLossOccurred); break; case MirroredRelDataSynchronizationState_None: case MirroredRelDataSynchronizationState_DataSynchronized: break; default: ereport(LOG, (errmsg("could not resynchronize relation '%u/%u/%u' " "mirror synchronization state:'%s(%d)' ", entry->relFileNode.relNode, entry->relFileNode.spcNode, entry->relFileNode.dbNode, MirroredRelDataSynchronizationState_Name(entry->mirrorDataSynchronizationState), entry->mirrorDataSynchronizationState))); break; } break; } //case default: Assert(0); break; } //switch if (mirrorDataLossOccurred) status = STATUS_ERROR; return status; }
/* * Indicate we intend to create a relation file as part of the current transaction. * * This function adds an entry in 'gp_persistent_relation_node' for either a new table (segment file * # 0) or a new segment file under AO table (segment file # > 0 for row/column-oriented AO) with a state * 'Create Pending'. An XLOG IntentToCreate record is generated that will guard the subsequent file-system * create in case the transaction aborts. * * Paramaters * ----------- * relFileNode = The tablespace, database, and relation OIDs for the create * segmentFileNum = As the name implies ( 0 for heap * >= 0 for RO/CO AO as applicable) * relStorageMgr = Persistent Relation storage Manager * relBufpoolKind = Buffer pool type beneath corrosponding relation * TODO bufferPollBulkLoad = ??? * TODO mirrorExistenceState = ??? * TODO relDataSynchronizationState = ??? * flushToXlog = If true, the XLOG record for this change will be flushed to disk. * TODO isLocalBuf = ??? * * Return * ------ * relationName = Name of the relation used for either debugging or to store in PendingDelete LL. * persistentTid = Resulting TID of the gp_persistent_rel_files tuple for the relation * serialNum = Resulting serial number for the relation. Distinquishes the uses of the tuple */ void PersistentRelation_AddCreatePending( RelFileNode *relFileNode, int32 segmentFileNum, PersistentFileSysRelStorageMgr relStorageMgr, PersistentFileSysRelBufpoolKind relBufpoolKind, bool bufferPoolBulkLoad, MirroredObjectExistenceState mirrorExistenceState, MirroredRelDataSynchronizationState relDataSynchronizationState, char *relationName, ItemPointer persistentTid, int64 *serialNum, bool flushToXLog, bool isLocalBuf) { WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE; PersistentFileSysObjName fsObjName; XLogRecPtr mirrorBufpoolResyncCkptLoc; ItemPointerData previousFreeTid; Datum values[Natts_gp_persistent_relation_node]; if(RelFileNode_IsEmpty(relFileNode)) elog(ERROR, "Invalid RelFileNode (0,0,0)"); MemSet(&previousFreeTid, 0, sizeof(ItemPointerData)); MemSet(&mirrorBufpoolResyncCkptLoc, 0, sizeof(XLogRecPtr)); if (Persistent_BeforePersistenceWork()) { if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Skipping persistent relation '%s' because we are before persistence work", relpath(*relFileNode)); MemSet(persistentTid, 0, sizeof(ItemPointerData)); *serialNum = 0; return; // The initdb process will load the persistent table once we out of bootstrap mode. } /* Verify if the needed shared mem data structures for persistent tables are setup and inited */ PersistentRelation_VerifyInitScan(); /* Setup the file system object name */ PersistentFileSysObjName_SetRelationFile( &fsObjName, relFileNode, segmentFileNum); WRITE_PERSISTENT_STATE_ORDERED_LOCK; /* Create a values array which will be used to create a 'gp_persistent_relation_node' tuple */ GpPersistentRelationNode_SetDatumValues( values, relFileNode->spcNode, relFileNode->dbNode, relFileNode->relNode, segmentFileNum, relStorageMgr, (bufferPoolBulkLoad ? PersistentFileSysState_BulkLoadCreatePending : PersistentFileSysState_CreatePending), /* createMirrorDataLossTrackingSessionNum */ 0, mirrorExistenceState, relDataSynchronizationState, /* mirrorBufpoolMarkedForScanIncrementalResync */ false, /* mirrorBufpoolResyncChangedPageCount */ 0, &mirrorBufpoolResyncCkptLoc, /* mirrorBufpoolResyncCkptBlockNum */ 0, /* mirrorAppendOnlyLossEof */ 0, /* mirrorAppendOnlyNewEof */ 0, relBufpoolKind, GetTopTransactionId(), /* persistentSerialNum */ 0, // This will be set by PersistentFileSysObj_AddTuple. &previousFreeTid); /* Add a new tuple to 'gp_persistent_relation_node' table for the new relation/segment file * we intend to create. This will also create and apply a new persistent serial number. */ PersistentFileSysObj_AddTuple( PersistentFsObjType_RelationFile, values, flushToXLog, persistentTid, serialNum); /* * This XLOG must be generated under the persistent write-lock. */ #ifdef MASTER_MIRROR_SYNC mmxlog_log_create_relfilenode( relFileNode->spcNode, relFileNode->dbNode, relFileNode->relNode, segmentFileNum); #endif #ifdef FAULT_INJECTOR FaultInjector_InjectFaultIfSet( FaultBeforePendingDeleteRelationEntry, DDLNotSpecified, "", // databaseName ""); // tableName #endif /* We'll add an entry to the PendingDelete LinkedList (LL) to remeber what we * created in this transaction (or sub-transaction). If the transaction * aborts then we can search for all such entries in this LL and get rid of (delete) * such relations or segment files on the disk. * * MPP-18228 * To make adding 'Create Pending' entry to persistent table and adding * to the PendingDelete list atomic */ PendingDelete_AddCreatePendingRelationEntry( &fsObjName, persistentTid, serialNum, relStorageMgr, relationName, isLocalBuf, bufferPoolBulkLoad); WRITE_PERSISTENT_STATE_ORDERED_UNLOCK; if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Persistent relation: Add '%s', relation name '%s' in state 'Create Pending', relation storage manager '%s', mirror existence state '%s', relation data resynchronization state '%s', serial number " INT64_FORMAT " at TID %s", PersistentFileSysObjName_ObjectName(&fsObjName), relationName, PersistentFileSysRelStorageMgr_Name(relStorageMgr), MirroredObjectExistenceState_Name(mirrorExistenceState), MirroredRelDataSynchronizationState_Name(relDataSynchronizationState), *serialNum, ItemPointerToString(persistentTid)); }