Beispiel #1
0
void PersistentRelation_AddCreated(
    RelFileNode 		*relFileNode,
    /* The tablespace, database, and relation OIDs for the create. */
    int32				segmentFileNum,
    PersistentFileSysRelStorageMgr relStorageMgr,
    PersistentFileSysRelBufpoolKind relBufpoolKind,
    MirroredObjectExistenceState mirrorExistenceState,
    MirroredRelDataSynchronizationState relDataSynchronizationState,
    int64				mirrorAppendOnlyLossEof,
    int64				mirrorAppendOnlyNewEof,
    char				*relationName,
    ItemPointer			persistentTid,
    /* Resulting TID of the gp_persistent_rel_files tuple for the relation. */
    int64				*persistentSerialNum,
    /* Resulting serial number for the relation.  Distinquishes the uses of the tuple. */
    bool 				flushToXLog)
/* When true, the XLOG record for this change will be flushed to disk. */
{
    WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE;

    PersistentFileSysObjName fsObjName;

    XLogRecPtr mirrorBufpoolResyncCkptLoc;
    ItemPointerData previousFreeTid;

    Datum values[Natts_gp_persistent_relation_node];

    if(RelFileNode_IsEmpty(relFileNode))
        elog(ERROR, "Invalid RelFileNode (0,0,0)");

    MemSet(&previousFreeTid, 0, sizeof(ItemPointerData));
    MemSet(&mirrorBufpoolResyncCkptLoc, 0, sizeof(XLogRecPtr));

    if (!Persistent_BeforePersistenceWork())
        elog(ERROR, "We can only add to persistent meta-data when special states");

    // Verify PersistentFileSysObj_BuildInitScan has been called.
    PersistentRelation_VerifyInitScan();

    PersistentFileSysObjName_SetRelationFile(
        &fsObjName,
        relFileNode,
        segmentFileNum);

    WRITE_PERSISTENT_STATE_ORDERED_LOCK;

    GpPersistentRelationNode_SetDatumValues(
        values,
        relFileNode->spcNode,
        relFileNode->dbNode,
        relFileNode->relNode,
        segmentFileNum,
        relStorageMgr,
        PersistentFileSysState_Created,
        /* createMirrorDataLossTrackingSessionNum */ 0,
        mirrorExistenceState,
        relDataSynchronizationState,
        /* mirrorBufpoolMarkedForScanIncrementalResync */ false,
        /* mirrorBufpoolResyncChangedPageCount */ 0,
        &mirrorBufpoolResyncCkptLoc,
        /* mirrorBufpoolResyncCkptBlockNum */ 0,
        mirrorAppendOnlyLossEof,
        mirrorAppendOnlyNewEof,
        relBufpoolKind,
        InvalidTransactionId,
        /* persistentSerialNum */ 0,	// This will be set by PersistentFileSysObj_AddTuple.
        &previousFreeTid);

    PersistentFileSysObj_AddTuple(
        PersistentFsObjType_RelationFile,
        values,
        flushToXLog,
        persistentTid,
        persistentSerialNum);

    WRITE_PERSISTENT_STATE_ORDERED_UNLOCK;

    if (Debug_persistent_print)
        elog(Persistent_DebugPrintLevel(),
             "Persistent relation: Add '%s', relation name '%s', in state 'Created', relation storage manager '%s', mirror existence state '%s', relation data resynchronization state '%s', serial number " INT64_FORMAT " at TID %s",
             PersistentFileSysObjName_ObjectName(&fsObjName),
             relationName,
             PersistentFileSysRelStorageMgr_Name(relStorageMgr),
             MirroredObjectExistenceState_Name(mirrorExistenceState),
             MirroredRelDataSynchronizationState_Name(relDataSynchronizationState),
             *persistentSerialNum,
             ItemPointerToString(persistentTid));
}
Beispiel #2
0
static int
FileRepPrimary_ResyncWrite(FileRepResyncHashEntry_s	*entry)
{

	int				status = STATUS_OK;
	Page			page;
	Buffer			buf; 
	BlockNumber		numBlocks;
	BlockNumber		blkno;
	SMgrRelation	smgr_relation;
	char			relidstr[OIDCHARS + 1 + OIDCHARS + 1 + OIDCHARS + 1];
	XLogRecPtr		loc;
	int				count = 0;
	int				thresholdCount = 0;
	bool			mirrorDataLossOccurred = FALSE;
		
	switch (entry->relStorageMgr)
	{

		case PersistentFileSysRelStorageMgr_BufferPool:
			
			switch (entry->mirrorDataSynchronizationState)
			{
				case MirroredRelDataSynchronizationState_BufferPoolScanIncremental:
				case MirroredRelDataSynchronizationState_FullCopy:

					smgr_relation = smgropen(entry->relFileNode);
					
					numBlocks = smgrnblocks(smgr_relation);

					snprintf(relidstr, sizeof(relidstr), "%u/%u/%u",
							 smgr_relation->smgr_rnode.spcNode,
							 smgr_relation->smgr_rnode.dbNode,
							 smgr_relation->smgr_rnode.relNode);

					if (Debug_filerep_print)
						elog(LOG, "resync buffer pool relation '%s' number of blocks '%d' ",
							 relidstr, numBlocks);

					thresholdCount = Min(numBlocks, 1024);
					
					/* 
					 * required in order to report how many blocks were synchronized 
					 * if gp_persistent_relation_node does not return that information 
					 */
					if (entry->mirrorBufpoolResyncChangedPageCount == 0)
					{
						entry->mirrorBufpoolResyncChangedPageCount = numBlocks - entry->mirrorBufpoolResyncCkptBlockNum;
					}
					
					for (blkno = entry->mirrorBufpoolResyncCkptBlockNum; blkno < numBlocks; blkno++) 
					{
						XLogRecPtr	endResyncLSN = (isFullResync() ? 
													FileRepResync_GetEndFullResyncLSN() :
													FileRepResync_GetEndIncrResyncLSN());
#ifdef FAULT_INJECTOR
						FaultInjector_InjectFaultIfSet(
													   FileRepResyncWorkerRead,
													   DDLNotSpecified,
													   "",	//databaseName
													   ""); // tableName
#endif				
						
						FileRepResync_SetReadBufferRequest();
						buf = ReadBuffer_Resync(smgr_relation, blkno, relidstr);
						FileRepResync_ResetReadBufferRequest();
						
						LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
						page = BufferGetPage(buf);
						
						loc = PageGetLSN(page);
						
						if (Debug_filerep_print)
						{
							elog(LOG, 
									 "full resync buffer pool identifier '%s' num blocks '%d' blkno '%d' lsn begin change tracking '%s(%u/%u)' "
									 "lsn page '%s(%u/%u)' lsn end change tracking '%s(%u/%u)' ",
									 relidstr,
									 numBlocks,
									 blkno,
									 XLogLocationToString(&entry->mirrorBufpoolResyncCkptLoc),
									 entry->mirrorBufpoolResyncCkptLoc.xlogid,
									 entry->mirrorBufpoolResyncCkptLoc.xrecoff,
									 XLogLocationToString(&loc),
									 loc.xlogid,
									 loc.xrecoff,
									 XLogLocationToString(&endResyncLSN),
									 endResyncLSN.xlogid,
									 endResyncLSN.xrecoff);
						}
						else
						{
							char	tmpBuf[FILEREP_MAX_LOG_DESCRIPTION_LEN];
							
							snprintf(tmpBuf, sizeof(tmpBuf), 
									 "full resync buffer pool identifier '%s' num blocks '%d' blkno '%d' lsn begin change tracking '%s(%u/%u)' ",
									 relidstr,
									 numBlocks,
									 blkno,
									 XLogLocationToString(&entry->mirrorBufpoolResyncCkptLoc),
									 entry->mirrorBufpoolResyncCkptLoc.xlogid,
									 entry->mirrorBufpoolResyncCkptLoc.xrecoff);
														
							FileRep_InsertConfigLogEntry(tmpBuf);
							
							snprintf(tmpBuf, sizeof(tmpBuf), 
									 "full resync buffer pool identifier '%s' lsn page '%s(%u/%u)' lsn end change tracking '%s(%u/%u)' ",
									 relidstr,
									 XLogLocationToString(&loc),
									 loc.xlogid,
									 loc.xrecoff,
									 XLogLocationToString(&endResyncLSN),
									 endResyncLSN.xlogid,
									 endResyncLSN.xrecoff);
							
							FileRep_InsertConfigLogEntry(tmpBuf);
							
						}
						
						if (XLByteLE(PageGetLSN(page), endResyncLSN) &&
							XLByteLE(entry->mirrorBufpoolResyncCkptLoc, PageGetLSN(page))) 
						{
							smgrwrite(smgr_relation, 
									  blkno,
									  (char *)BufferGetBlock(buf),
									  FALSE);
						}
						
#ifdef FAULT_INJECTOR	
						FaultInjector_InjectFaultIfSet(
													   FileRepResyncWorker, 
													   DDLNotSpecified,
													   "",	// databaseName
													   ""); // tableName
#endif				
						
						UnlockReleaseBuffer(buf);
						
						if (count > thresholdCount)
						{
							count = 0;
							FileRepSubProcess_ProcessSignals();
							
							if (! (FileRepSubProcess_GetState() == FileRepStateReady && 
								   dataState == DataStateInResync))
							{
								mirrorDataLossOccurred = TRUE;
								break;
							}
						}
						else
							count++;
					}
						
					if (mirrorDataLossOccurred)
						break;

					if (entry->mirrorDataSynchronizationState != MirroredRelDataSynchronizationState_FullCopy)
					{
						LockRelationForResyncExtension(&smgr_relation->smgr_rnode, ExclusiveLock);
					
						numBlocks = smgrnblocks(smgr_relation);
					
						smgrtruncate(smgr_relation,
								 numBlocks,
								 TRUE /* isTemp, TRUE means to not record in XLOG */,
								 FALSE /* isLocalBuf */,
								 &entry->persistentTid,
								 entry->persistentSerialNum);
								 
						UnlockRelationForResyncExtension(&smgr_relation->smgr_rnode, ExclusiveLock);
					}
					
					smgrimmedsync(smgr_relation);
					smgrclose(smgr_relation);
					
					smgr_relation = NULL;
					break;
					
				case MirroredRelDataSynchronizationState_None:										
				case MirroredRelDataSynchronizationState_DataSynchronized:
					break;
					
				default:
					ereport(LOG, 
							(errmsg("could not resynchronize relation '%u/%u/%u' "
									"mirror synchronization state:'%s(%d)' ",
									entry->relFileNode.relNode,
									entry->relFileNode.spcNode,
									entry->relFileNode.dbNode,
									MirroredRelDataSynchronizationState_Name(entry->mirrorDataSynchronizationState),
									entry->mirrorDataSynchronizationState)));
					break;
			}
			break;
			
		case PersistentFileSysRelStorageMgr_AppendOnly:
		{
			MirroredAppendOnlyOpen	mirroredOpen;
			int						primaryError;
			bool					mirrorDataLossOccurred;
			char					*buffer = NULL;
			int64					endOffset = entry->mirrorAppendOnlyNewEof;
			int64					startOffset = entry->mirrorAppendOnlyLossEof;
			int32					bufferLen = 0;
			int						retval = 0;
			
			switch (entry->mirrorDataSynchronizationState)
			{
				case MirroredRelDataSynchronizationState_AppendOnlyCatchup:
				case MirroredRelDataSynchronizationState_FullCopy:
					
					/* 
					 * required in order to report how many blocks were synchronized 
					 * if gp_persistent_relation_node does not return that information 
					 */
					if (entry->mirrorBufpoolResyncChangedPageCount == 0)
					{
						entry->mirrorBufpoolResyncChangedPageCount = (endOffset - startOffset) / BLCKSZ;
					}					
					
					/*
					 * The MirroredAppendOnly_OpenResynchonize routine knows we are a resynch worker and
					 * will open BOTH, but write only the MIRROR!!!
					 */
					MirroredAppendOnly_OpenResynchonize(
											&mirroredOpen, 
											&entry->relFileNode,
											entry->segmentFileNum,
											startOffset,
											&primaryError,
											&mirrorDataLossOccurred);
					if (primaryError != 0)
					{
						ereport(ERROR,
								(errcode_for_file_access(),
								 errmsg("could not open file %u/%u/%u.%u : %s",
										entry->relFileNode.dbNode,
										entry->relFileNode.spcNode,
										entry->relFileNode.relNode,
										entry->segmentFileNum,
										strerror(primaryError))));
						
						break;
					}

					if (mirrorDataLossOccurred)
						break;
					
					/* AO and CO Data Store writes 64k size by default */
					bufferLen = (Size) Min(2*BLCKSZ, endOffset - startOffset);
					buffer = (char*) palloc(bufferLen);
					if (buffer == NULL)
						ereport(ERROR,
								(errcode(ERRCODE_OUT_OF_MEMORY),
								 (errmsg("not enough memory for resynchronization"))));
					
					MemSet(buffer, 0, bufferLen);
					
					while (startOffset < endOffset)
					{
						retval = MirroredAppendOnly_Read(
												&mirroredOpen,
												buffer,
												bufferLen);
						
						if (retval != bufferLen) 
						{
							ereport(ERROR,
									(errcode_for_file_access(),
									 errmsg("could not read from position:" INT64_FORMAT " in file %u/%u/%u.%u : %m",
											startOffset, 
											entry->relFileNode.dbNode,
											entry->relFileNode.spcNode,
											entry->relFileNode.relNode,
											entry->segmentFileNum)));
							
							break;
						}						
						
						MirroredAppendOnly_Append(
											  &mirroredOpen,
											  buffer,
											  bufferLen,
											  &primaryError,
											  &mirrorDataLossOccurred);
						
						if (mirrorDataLossOccurred)
							break;

						Assert(primaryError == 0);	// No primary writes as resync worker.
						
						startOffset += bufferLen;
						/* AO and CO Data Store writes 64k size by default */
						bufferLen = (Size) Min(2*BLCKSZ, endOffset - startOffset);						
					}
					
					if (buffer) 
					{
						pfree(buffer);
						buffer = NULL;
					}
					
					if (mirrorDataLossOccurred)
						break;
					
					/* Flush written data on Mirror */
					MirroredAppendOnly_Flush(
										&mirroredOpen,
										&primaryError,
										&mirrorDataLossOccurred);
					if (mirrorDataLossOccurred)
						break;
					
					Assert(primaryError == 0);	// Not flushed on primary as resync worker.
					
					/* Close Primary and Mirror */
					MirroredAppendOnly_Close(
										&mirroredOpen,
										&mirrorDataLossOccurred);
								
					break;
					
				case MirroredRelDataSynchronizationState_None:										
				case MirroredRelDataSynchronizationState_DataSynchronized:
					break;					
					
				default:
					ereport(LOG, 
							(errmsg("could not resynchronize relation '%u/%u/%u' "
									"mirror synchronization state:'%s(%d)' ",
									entry->relFileNode.relNode,
									entry->relFileNode.spcNode,
									entry->relFileNode.dbNode,
									MirroredRelDataSynchronizationState_Name(entry->mirrorDataSynchronizationState),
									entry->mirrorDataSynchronizationState)));
					break;
			}
			
			break;
		}	//case
		default:
			Assert(0);
			break;
	} //switch
	
	if (mirrorDataLossOccurred)
		status = STATUS_ERROR;
	
	return status;
}
Beispiel #3
0
/*
 * Indicate we intend to create a relation file as part of the current transaction.
 *
 * This function adds an entry in 'gp_persistent_relation_node' for either a new table (segment file
 * # 0) or a new segment file under AO table (segment file # > 0 for row/column-oriented AO) with a state
 * 'Create Pending'. An XLOG IntentToCreate record is generated that will guard the subsequent file-system
 * create in case the transaction aborts.
 *
 * Paramaters
 * -----------
 * relFileNode = The tablespace, database, and relation OIDs for the create
 * segmentFileNum = As the name implies (   0 for heap
 *                                       >= 0 for RO/CO AO as applicable)
 * relStorageMgr = Persistent Relation storage Manager
 * relBufpoolKind = Buffer pool type beneath corrosponding relation
 * TODO bufferPollBulkLoad = ???
 * TODO mirrorExistenceState = ???
 * TODO relDataSynchronizationState = ???
 * flushToXlog = If true, the XLOG record for this change will be flushed to disk.
 * TODO isLocalBuf = ???
 *
 * Return
 * ------
 * relationName = Name of the relation used for either debugging or to store in PendingDelete LL.
 * persistentTid = Resulting TID of the gp_persistent_rel_files tuple for the relation
 * serialNum = Resulting serial number for the relation.  Distinquishes the uses of the tuple
 */
void PersistentRelation_AddCreatePending(
    RelFileNode 		*relFileNode,
    int32				segmentFileNum,
    PersistentFileSysRelStorageMgr relStorageMgr,
    PersistentFileSysRelBufpoolKind relBufpoolKind,
    bool				bufferPoolBulkLoad,
    MirroredObjectExistenceState mirrorExistenceState,
    MirroredRelDataSynchronizationState relDataSynchronizationState,
    char				*relationName,
    ItemPointer			persistentTid,
    int64				*serialNum,
    bool 				flushToXLog,
    bool				isLocalBuf)
{
    WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE;

    PersistentFileSysObjName fsObjName;

    XLogRecPtr mirrorBufpoolResyncCkptLoc;
    ItemPointerData previousFreeTid;

    Datum values[Natts_gp_persistent_relation_node];

    if(RelFileNode_IsEmpty(relFileNode))
        elog(ERROR, "Invalid RelFileNode (0,0,0)");

    MemSet(&previousFreeTid, 0, sizeof(ItemPointerData));
    MemSet(&mirrorBufpoolResyncCkptLoc, 0, sizeof(XLogRecPtr));

    if (Persistent_BeforePersistenceWork())
    {
        if (Debug_persistent_print)
            elog(Persistent_DebugPrintLevel(),
                 "Skipping persistent relation '%s' because we are before persistence work",
                 relpath(*relFileNode));

        MemSet(persistentTid, 0, sizeof(ItemPointerData));
        *serialNum = 0;

        return;	// The initdb process will load the persistent table once we out of bootstrap mode.
    }

    /* Verify if the needed shared mem data structures for persistent tables are setup and inited */
    PersistentRelation_VerifyInitScan();

    /* Setup the file system object name */
    PersistentFileSysObjName_SetRelationFile(
        &fsObjName,
        relFileNode,
        segmentFileNum);

    WRITE_PERSISTENT_STATE_ORDERED_LOCK;

    /* Create a values array which will be used to create a 'gp_persistent_relation_node' tuple */
    GpPersistentRelationNode_SetDatumValues(
        values,
        relFileNode->spcNode,
        relFileNode->dbNode,
        relFileNode->relNode,
        segmentFileNum,
        relStorageMgr,
        (bufferPoolBulkLoad ?
         PersistentFileSysState_BulkLoadCreatePending :
         PersistentFileSysState_CreatePending),
        /* createMirrorDataLossTrackingSessionNum */ 0,
        mirrorExistenceState,
        relDataSynchronizationState,
        /* mirrorBufpoolMarkedForScanIncrementalResync */ false,
        /* mirrorBufpoolResyncChangedPageCount */ 0,
        &mirrorBufpoolResyncCkptLoc,
        /* mirrorBufpoolResyncCkptBlockNum */ 0,
        /* mirrorAppendOnlyLossEof */ 0,
        /* mirrorAppendOnlyNewEof */ 0,
        relBufpoolKind,
        GetTopTransactionId(),
        /* persistentSerialNum */ 0,	// This will be set by PersistentFileSysObj_AddTuple.
        &previousFreeTid);

    /* Add a new tuple to 'gp_persistent_relation_node' table for the new relation/segment file
     * we intend to create. This will also create and apply a new persistent serial number. */
    PersistentFileSysObj_AddTuple(
        PersistentFsObjType_RelationFile,
        values,
        flushToXLog,
        persistentTid,
        serialNum);

    /*
     * This XLOG must be generated under the persistent write-lock.
     */
#ifdef MASTER_MIRROR_SYNC
    mmxlog_log_create_relfilenode(
        relFileNode->spcNode,
        relFileNode->dbNode,
        relFileNode->relNode,
        segmentFileNum);
#endif

#ifdef FAULT_INJECTOR
    FaultInjector_InjectFaultIfSet(
        FaultBeforePendingDeleteRelationEntry,
        DDLNotSpecified,
        "",  // databaseName
        ""); // tableName
#endif

    /* We'll add an entry to the PendingDelete LinkedList (LL) to remeber what we
     * created in this transaction (or sub-transaction). If the transaction
     * aborts then we can search for all such entries in this LL and get rid of (delete)
     * such relations or segment files on the disk.
    *
    * MPP-18228
    * To make adding 'Create Pending' entry to persistent table and adding
    * to the PendingDelete list atomic
    */
    PendingDelete_AddCreatePendingRelationEntry(
        &fsObjName,
        persistentTid,
        serialNum,
        relStorageMgr,
        relationName,
        isLocalBuf,
        bufferPoolBulkLoad);


    WRITE_PERSISTENT_STATE_ORDERED_UNLOCK;

    if (Debug_persistent_print)
        elog(Persistent_DebugPrintLevel(),
             "Persistent relation: Add '%s', relation name '%s' in state 'Create Pending', relation storage manager '%s', mirror existence state '%s', relation data resynchronization state '%s', serial number " INT64_FORMAT " at TID %s",
             PersistentFileSysObjName_ObjectName(&fsObjName),
             relationName,
             PersistentFileSysRelStorageMgr_Name(relStorageMgr),
             MirroredObjectExistenceState_Name(mirrorExistenceState),
             MirroredRelDataSynchronizationState_Name(relDataSynchronizationState),
             *serialNum,
             ItemPointerToString(persistentTid));
}