/*
 * 
 * FileRepPrimary_RunHeartBeat()
 *
 *
 */
static void 
FileRepPrimary_RunHeartBeat(void)
{	
	int retry = 0;
	
	Insist(fileRepRole == FileRepPrimaryRole);
	
	Insist(dataState == DataStateInSync ||
		   dataState == DataStateInResync);	
	
	while (1) 
	{
		FileRepSubProcess_ProcessSignals();
		
		while (FileRepSubProcess_GetState() == FileRepStateFault ||
			   
			   (fileRepShmemArray[0]->state == FileRepStateNotInitialized &&
				FileRepSubProcess_GetState() != FileRepStateShutdownBackends &&
			    FileRepSubProcess_GetState() != FileRepStateShutdown)) {
			
			FileRepSubProcess_ProcessSignals();
			pg_usleep(50000L); /* 50 ms */	
		}
		
		if (FileRepSubProcess_GetState() == FileRepStateShutdown ||
			FileRepSubProcess_GetState() == FileRepStateShutdownBackends) {
			
			break;
		}

		/* verify if flow from primary to mirror and back is alive once per minute */
		pg_usleep(50000L); /* 50 ms */
		
		if (FileRepSubProcess_ProcessSignals() == true ||
			FileRepSubProcess_GetState() == FileRepStateFault)
		{
			continue;
		}
		
		retry++;
		if (retry == 1200) /* 1200 * 50 ms = 60 sec */
		{
			FileRepPrimary_MirrorHeartBeat(FileRepMessageTypeXLog);
			continue;
		}
		
		if (retry == 1201) /* 1200 * 50 ms = 60 sec */
		{
			FileRepPrimary_MirrorHeartBeat(FileRepMessageTypeWriter);
			continue;
		}
		
		if (retry == 1202) /* 1200 * 50 ms = 60 sec */
		{
			FileRepPrimary_MirrorHeartBeat(FileRepMessageTypeAO01);
			retry = 0;
		}
	} // while(1)	
}
/*
 * FileRepPrimary_RunResyncWorker()
 *
 */
static int
FileRepPrimary_RunResyncWorker(void)
{
	int							status = STATUS_OK;
	FileRepResyncHashEntry_s	*entry = NULL;
	ChangeTrackingRequest		*request = NULL;

	FileRep_InsertConfigLogEntry("run resync worker");
	
	while (1) {

		FileRepSubProcess_ProcessSignals();
		
		if (! (FileRepSubProcess_GetState() == FileRepStateReady && 
			   dataState == DataStateInResync))
		{
			break;
		}

		entry = FileRepPrimary_GetResyncEntry(&request);
				
		if (entry == NULL && request == NULL) {
			
			pg_usleep(100000L); /* 100 ms */
			continue;
		}
		
		Assert(! (entry != NULL && request != NULL));

		if (entry != NULL)
		{			
			status = FileRepPrimary_ResyncWrite(entry);
			
			if (status == STATUS_OK)
			{
				if (entry->mirrorBufpoolResyncChangedPageCount == 0)
				{
					entry->mirrorBufpoolResyncChangedPageCount = (entry->mirrorAppendOnlyNewEof - entry->mirrorAppendOnlyLossEof) / BLCKSZ;
				}					
				
				status = FileRepResync_UpdateEntry(entry);
			}
		}
		
		if (request != NULL)
		{
			status = FileRepPrimary_ResyncBufferPoolIncrementalWrite(request);
			request = NULL;
		}
		
		if (status != STATUS_OK)
		{
			break;
		}
		
	}
	
	return status;
}
Exemple #3
0
/*
 * FileRepPrimary_StartResyncWorker()
 */
void 
FileRepPrimary_StartResyncWorker(void)
{	
	int	status = STATUS_OK;
	
	FileRep_InsertConfigLogEntry("start resync worker");
	
	Insist(fileRepRole == FileRepPrimaryRole);
	
	while (1) {
		
		if (status != STATUS_OK) 
		{
			FileRep_SetSegmentState(SegmentStateFault, FaultTypeMirror);
			FileRepSubProcess_SetState(FileRepStateFault);
		}
		
		/*
		 * We are waiting for following conditions to move forward:
		 *
		 * 	Database is running
		 * 	And
		 * 		if dataState is InResync, we wait for FileRepSubProcess to Ready state
		 * 		else don't wait
		 */
		while (!isDatabaseRunning() ||
			   !(dataState == DataStateInResync ? FileRepSubProcess_GetState() == FileRepStateReady : true))
		{
			FileRepSubProcess_ProcessSignals();

			if (FileRepSubProcess_GetState() == FileRepStateShutdown ||
				FileRepSubProcess_GetState() == FileRepStateShutdownBackends)
			{
				break;
			}

			pg_usleep(50000L); /* 50 ms */	
		}
		
		if (FileRepSubProcess_GetState() == FileRepStateShutdown ||
			FileRepSubProcess_GetState() == FileRepStateShutdownBackends) {
			break;
		}
		
		FileRepSubProcess_InitHeapAccess();

		status = FileRepPrimary_RunResyncWorker();
		
		if (status != STATUS_OK) {
			continue;
		}
		
		break;
		
	} // while(1)	
		
}
Exemple #4
0
/*
 * 
 * FileRepAckPrimary_StartConsumer
*/
void 
FileRepAckPrimary_StartConsumer(void)
{
	int status = STATUS_OK;
	
	FileRep_InsertConfigLogEntry("run consumer");
		
	while (1) {
		
		if (status != STATUS_OK) 
		{
			FileRep_SetSegmentState(SegmentStateFault, FaultTypeMirror);
			FileRepSubProcess_SetState(FileRepStateFault);
		}
		
		while (FileRepSubProcess_GetState() == FileRepStateFault ||
			   
			   (fileRepShmemArray[0]->state == FileRepStateNotInitialized &&
			    FileRepSubProcess_GetState() != FileRepStateShutdown)) {
			
			FileRepSubProcess_ProcessSignals();
			pg_usleep(50000L); /* 50 ms */	
		}
		
		if (FileRepSubProcess_GetState() == FileRepStateShutdown) {
			
			break;
		}		
		
		status = FileRepAckPrimary_RunConsumer();
		
	} // while(1)
		

	if (FileRepSubProcess_GetState() == FileRepStateShutdown) {
		/* perform graceful shutdown */
	}

	LWLockAcquire(FileRepAckHashShmemLock, LW_EXCLUSIVE);

	FileRep_IpcSignal(fileRepIpcArray[fileRepAckHashShmem->ipcArrayIndex]->semP, 
					  &fileRepIpcArray[fileRepAckHashShmem->ipcArrayIndex]->refCountSemP);
	
	LWLockRelease(FileRepAckHashShmemLock);

	/* NOTE free memory (if any) */
	return;
}
/*
 * 
 * FileRepPrimary_StartRecoveryInSync()
 *
 *
 */
static void 
FileRepPrimary_StartRecoveryInSync(void)
{	
	int	status = STATUS_OK;
	
	FileRep_InsertConfigLogEntry("run recovery");
	
	while (1) {
		
		if (status != STATUS_OK) 
		{
			FileRep_SetSegmentState(SegmentStateFault, FaultTypeMirror);
			FileRepSubProcess_SetState(FileRepStateFault);
		}
		
		while (FileRepSubProcess_GetState() == FileRepStateFault ||
			   
			   (fileRepShmemArray[0]->state == FileRepStateNotInitialized &&
				FileRepSubProcess_GetState() != FileRepStateShutdownBackends &&
			    FileRepSubProcess_GetState() != FileRepStateShutdown)) {
			
			FileRepSubProcess_ProcessSignals();
			pg_usleep(50000L); /* 50 ms */	
		}
		
		if (FileRepSubProcess_GetState() == FileRepStateShutdown ||
			FileRepSubProcess_GetState() == FileRepStateShutdownBackends) {
			
			break;
		}
		
		if (FileRepSubProcess_GetState() == FileRepStateReady) {
			break;
		}
		
		Insist(fileRepRole == FileRepPrimaryRole);

		Insist(dataState == DataStateInSync);
		
		status = FileRepPrimary_RunRecoveryInSync();

		
	} // while(1)	

}
/*
 * FileRepPrimary_StartResyncWorker()
 */
void 
FileRepPrimary_StartResyncWorker(void)
{	
	int	status = STATUS_OK;
	
	FileRep_InsertConfigLogEntry("start resync worker");
	
	Insist(fileRepRole == FileRepPrimaryRole);
	
	while (1) {
		
		if (status != STATUS_OK) 
		{
			FileRep_SetSegmentState(SegmentStateFault, FaultTypeMirror);
			FileRepSubProcess_SetState(FileRepStateFault);
		}
		
		while (FileRepSubProcess_GetState() != FileRepStateShutdown &&
			   FileRepSubProcess_GetState() != FileRepStateShutdownBackends &&
			   ! (FileRepSubProcess_GetState() == FileRepStateReady && 
			    dataState == DataStateInResync)) {
			
			FileRepSubProcess_ProcessSignals();
			pg_usleep(50000L); /* 50 ms */	
		}
		
		if (FileRepSubProcess_GetState() == FileRepStateShutdown ||
			FileRepSubProcess_GetState() == FileRepStateShutdownBackends) {
			
			break;
		}
		
		status = FileRepPrimary_RunResyncWorker();
		
		if (status != STATUS_OK) {
			continue;
		}
		
		break;
		
	} // while(1)	
		
}
static int
FileRepPrimary_ResyncBufferPoolIncrementalWrite(ChangeTrackingRequest *request)
{
	int				status = STATUS_OK;
	Page			page;
	Buffer			buf; 
	BlockNumber		numBlocks = 0;
	SMgrRelation	smgr_relation = NULL;
	char			relidstr[OIDCHARS + 1 + OIDCHARS + 1 + OIDCHARS + 1];
	int				ii;
	XLogRecPtr		loc;
	XLogRecPtr		loc1;
	int				count = 0;
	int				thresholdCount = 0;
	bool			mirrorDataLossOccurred = FALSE;
	int				NumberOfRelations = request->count;
	
	FileRepResyncHashEntry_s	entry;
	ChangeTrackingResult		*result = NULL;	

	while (1)
	{
		/* allow flushing buffers from buffer pool during scan */
		FileRepResync_SetReadBufferRequest();
		if ((result = ChangeTracking_GetChanges(request)) != NULL) 
		{
			FileRepResync_ResetReadBufferRequest();
					
			for (ii = 0; ii < result->count; ii++)
			{
				
				if (smgr_relation == NULL)
				{
					NumberOfRelations--;
					
					smgr_relation = smgropen(result->entries[ii].relFileNode);
					
					snprintf(relidstr, sizeof(relidstr), "%u/%u/%u",
							 smgr_relation->smgr_rnode.spcNode,
							 smgr_relation->smgr_rnode.dbNode,
							 smgr_relation->smgr_rnode.relNode);

					numBlocks = smgrnblocks(smgr_relation);
					
					if (Debug_filerep_print)
						elog(LOG, "resynchronize buffer pool relation '%u/%u/%u' "
							 "number of blocks:'%u' ",
							 smgr_relation->smgr_rnode.spcNode,
							 smgr_relation->smgr_rnode.dbNode,
							 smgr_relation->smgr_rnode.relNode,
							 numBlocks);
					
					thresholdCount = Min(numBlocks, 1024);
				}
				
				loc1 =  result->entries[ii].lsn_end;
				
				/*
				 * if relation was truncated then block_num from change tracking can be beyond numBlocks 
				 */
				if (result->entries[ii].block_num >=  numBlocks)
				{
					ereport(LOG,	
							(errmsg("could not resynchonize buffer pool relation '%s' block '%d' (maybe due to truncate), "
									"lsn change tracking '%s(%u/%u)' "
									"number of blocks '%d' ",
									relidstr,
									result->entries[ii].block_num,
									XLogLocationToString(&loc1),
									loc1.xlogid,
									loc1.xrecoff,
									numBlocks),						
							 FileRep_errcontext()));						
					
					goto flush_check;
				}
				
				/* allow flushing buffers from buffer pool during scan */
				FileRepResync_SetReadBufferRequest();
				buf = ReadBuffer_Resync(smgr_relation,
										result->entries[ii].block_num,
										relidstr);
				FileRepResync_ResetReadBufferRequest();
				
				Assert(result->entries[ii].block_num < numBlocks);
				
				LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
				page = BufferGetPage(buf);
				
				loc = PageGetLSN(page); 
				
				if(Debug_filerep_print)
				{
					elog(LOG,	
							"incremental resync buffer pool identifier '%s' num blocks '%d' blkno '%d' lsn page '%s(%u/%u)' "
							"lsn end change tracking '%s(%u/%u)' ",
							relidstr,
							numBlocks,
							result->entries[ii].block_num,
							XLogLocationToString(&loc),
							loc.xlogid,
							loc.xrecoff,
							XLogLocationToString(&loc1),
							result->entries[ii].lsn_end.xlogid,
							result->entries[ii].lsn_end.xrecoff);					
				}
				else
				{
					char	tmpBuf[FILEREP_MAX_LOG_DESCRIPTION_LEN];
					
					snprintf(tmpBuf, sizeof(tmpBuf), 
							 "incremental resync buffer pool identifier '%s' num blocks '%d' blkno '%d' lsn page '%s(%u/%u)' ",
							 relidstr,
							 numBlocks,
							 result->entries[ii].block_num,
							 XLogLocationToString(&loc),
							 loc.xlogid,
							 loc.xrecoff);
					
					FileRep_InsertConfigLogEntry(tmpBuf);
					
					snprintf(tmpBuf, sizeof(tmpBuf), 
							 "incremental resync buffer pool identifier '%s' lsn end change tracking '%s(%u/%u)' ",
							 relidstr,
							 XLogLocationToString(&loc1),
							 result->entries[ii].lsn_end.xlogid,
							 result->entries[ii].lsn_end.xrecoff);
					
					FileRep_InsertConfigLogEntry(tmpBuf);
					
				}
								
				if (XLByteLE(result->entries[ii].lsn_end, PageGetLSN(page)))
				{
					if (! XLByteEQ(PageGetLSN(page), result->entries[ii].lsn_end))
					{
						ereport(LOG,
							(errmsg("Resynchonize buffer pool relation '%s' block '%d' has page lsn less than CT lsn, "
								"lsn end change tracking '%s(%u/%u)' lsn page '%s(%u/%u)' "
								"number of blocks '%d'",
								relidstr,
								result->entries[ii].block_num,
								XLogLocationToString(&loc),
								loc.xlogid,
								loc.xrecoff,
								XLogLocationToString(&loc1),
								loc1.xlogid,
								loc1.xrecoff,
								numBlocks),
							 FileRep_errcontext()));

					}

					/*
					 * It's safe and better to perform write of the page to mirror,
					 * for this case, as primary and mirror data pages should always
					 * be same. So, we might do some extra work but definitely won't
					 * loose out blocks, or error out and need to perform full recovery.
					 * Need to cover for this case as there are some known scenarios where
					 * CT file can have extra records which should have been discarded,
					 * but as we loose out information of xlog LSN cannot be discarded.
					 * One such case is when CT_TRANSIENT being compacted to CT_COMPACT
					 * with specific xlog LSN (to discard extra records) in CT mode gets
					 * interrupted by resync. Compaction during Resync collects all the
					 * CT records and doesn't have xlog LSN information to discard any
					 * extra records from CT_TRANSIENT.
					 */

					smgrwrite(smgr_relation,
							  result->entries[ii].block_num,
							  (char *)BufferGetBlock(buf),
							  FALSE);
				}

#ifdef FAULT_INJECTOR	
				FaultInjector_InjectFaultIfSet(
											   FileRepResyncWorker, 
											   DDLNotSpecified,
											   "",	// databaseName
											   ""); // tableName
#endif				
				
				UnlockReleaseBuffer(buf);
				
#ifdef FAULT_INJECTOR	
				FaultInjector_InjectFaultIfSet(
											   FileRepResyncWorker, 
											   DDLNotSpecified,
											   "",	// databaseName
											   ""); // tableName
#endif				
		
	flush_check:			
				if (((ii + 1) == result->count) ||
					! (result->entries[ii].relFileNode.spcNode == result->entries[ii+1].relFileNode.spcNode &&
					   result->entries[ii].relFileNode.dbNode == result->entries[ii+1].relFileNode.dbNode &&
					   result->entries[ii].relFileNode.relNode == result->entries[ii+1].relFileNode.relNode))
				{
					if (result->ask_for_more == false)
					{
								
						smgrimmedsync(smgr_relation);
						
						smgrclose(smgr_relation);
								 
						smgr_relation = NULL;
							
						FileRep_GetRelationPath(
												 entry.fileName, 
												 result->entries[ii].relFileNode, 
												 0 /* segment file number is always 0 for Buffer Pool */);							 
								 
						status = FileRepResync_UpdateEntry(&entry);
						if (status != STATUS_OK)
						{
							 break;
						}
					}
								 
				}			
							
				if (count > thresholdCount)
				{
					count = 0;
					FileRepSubProcess_ProcessSignals();
					
					if (! (FileRepSubProcess_GetState() == FileRepStateReady && 
						   dataState == DataStateInResync))
					{
						mirrorDataLossOccurred = TRUE;
						break;
					}
				}
				else
					count++;
			}  // for (ii = 0; ii < result->count; ii++)
			
		} // if ((result = ChangeTracking_GetChanges(request)) != NULL) 
		
		FileRepResync_ResetReadBufferRequest();
			
		if (result != NULL && result->ask_for_more == true)
		{
			Assert(request->count == 1);
			request->entries[0].lsn_start = result->next_start_lsn;
		}
		else
		{
			break;
		}

	} // while(1) 
		
	ChangeTracking_FreeRequest(request);
	ChangeTracking_FreeResult(result);
	
	Insist(NumberOfRelations == 0);
	
	if (mirrorDataLossOccurred)
		status = STATUS_ERROR;
	
	return status;	
}
static int
FileRepPrimary_ResyncWrite(FileRepResyncHashEntry_s	*entry)
{

	int				status = STATUS_OK;
	Page			page;
	Buffer			buf; 
	BlockNumber		numBlocks;
	BlockNumber		blkno;
	SMgrRelation	smgr_relation;
	char			relidstr[OIDCHARS + 1 + OIDCHARS + 1 + OIDCHARS + 1];
	XLogRecPtr		loc;
	int				count = 0;
	int				thresholdCount = 0;
	bool			mirrorDataLossOccurred = FALSE;
		
	switch (entry->relStorageMgr)
	{

		case PersistentFileSysRelStorageMgr_BufferPool:
			
			switch (entry->mirrorDataSynchronizationState)
			{
				case MirroredRelDataSynchronizationState_BufferPoolScanIncremental:
				case MirroredRelDataSynchronizationState_FullCopy:

					smgr_relation = smgropen(entry->relFileNode);
					
					numBlocks = smgrnblocks(smgr_relation);

					snprintf(relidstr, sizeof(relidstr), "%u/%u/%u",
							 smgr_relation->smgr_rnode.spcNode,
							 smgr_relation->smgr_rnode.dbNode,
							 smgr_relation->smgr_rnode.relNode);

					if (Debug_filerep_print)
						elog(LOG, "resync buffer pool relation '%s' number of blocks '%d' ",
							 relidstr, numBlocks);

					thresholdCount = Min(numBlocks, 1024);
					
					/* 
					 * required in order to report how many blocks were synchronized 
					 * if gp_persistent_relation_node does not return that information 
					 */
					if (entry->mirrorBufpoolResyncChangedPageCount == 0)
					{
						entry->mirrorBufpoolResyncChangedPageCount = numBlocks - entry->mirrorBufpoolResyncCkptBlockNum;
					}
					
					for (blkno = entry->mirrorBufpoolResyncCkptBlockNum; blkno < numBlocks; blkno++) 
					{
						XLogRecPtr	endResyncLSN = (isFullResync() ? 
													FileRepResync_GetEndFullResyncLSN() :
													FileRepResync_GetEndIncrResyncLSN());
#ifdef FAULT_INJECTOR
						FaultInjector_InjectFaultIfSet(
													   FileRepResyncWorkerRead,
													   DDLNotSpecified,
													   "",	//databaseName
													   ""); // tableName
#endif				
						
						FileRepResync_SetReadBufferRequest();
						buf = ReadBuffer_Resync(smgr_relation, blkno, relidstr);
						FileRepResync_ResetReadBufferRequest();
						
						LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
						page = BufferGetPage(buf);
						
						loc = PageGetLSN(page);
						
						if (Debug_filerep_print)
						{
							elog(LOG, 
									 "full resync buffer pool identifier '%s' num blocks '%d' blkno '%d' lsn begin change tracking '%s(%u/%u)' "
									 "lsn page '%s(%u/%u)' lsn end change tracking '%s(%u/%u)' ",
									 relidstr,
									 numBlocks,
									 blkno,
									 XLogLocationToString(&entry->mirrorBufpoolResyncCkptLoc),
									 entry->mirrorBufpoolResyncCkptLoc.xlogid,
									 entry->mirrorBufpoolResyncCkptLoc.xrecoff,
									 XLogLocationToString(&loc),
									 loc.xlogid,
									 loc.xrecoff,
									 XLogLocationToString(&endResyncLSN),
									 endResyncLSN.xlogid,
									 endResyncLSN.xrecoff);
						}
						else
						{
							char	tmpBuf[FILEREP_MAX_LOG_DESCRIPTION_LEN];
							
							snprintf(tmpBuf, sizeof(tmpBuf), 
									 "full resync buffer pool identifier '%s' num blocks '%d' blkno '%d' lsn begin change tracking '%s(%u/%u)' ",
									 relidstr,
									 numBlocks,
									 blkno,
									 XLogLocationToString(&entry->mirrorBufpoolResyncCkptLoc),
									 entry->mirrorBufpoolResyncCkptLoc.xlogid,
									 entry->mirrorBufpoolResyncCkptLoc.xrecoff);
														
							FileRep_InsertConfigLogEntry(tmpBuf);
							
							snprintf(tmpBuf, sizeof(tmpBuf), 
									 "full resync buffer pool identifier '%s' lsn page '%s(%u/%u)' lsn end change tracking '%s(%u/%u)' ",
									 relidstr,
									 XLogLocationToString(&loc),
									 loc.xlogid,
									 loc.xrecoff,
									 XLogLocationToString(&endResyncLSN),
									 endResyncLSN.xlogid,
									 endResyncLSN.xrecoff);
							
							FileRep_InsertConfigLogEntry(tmpBuf);
							
						}
						
						if (XLByteLE(PageGetLSN(page), endResyncLSN) &&
							XLByteLE(entry->mirrorBufpoolResyncCkptLoc, PageGetLSN(page))) 
						{
							smgrwrite(smgr_relation, 
									  blkno,
									  (char *)BufferGetBlock(buf),
									  FALSE);
						}
						
#ifdef FAULT_INJECTOR	
						FaultInjector_InjectFaultIfSet(
													   FileRepResyncWorker, 
													   DDLNotSpecified,
													   "",	// databaseName
													   ""); // tableName
#endif				
						
						UnlockReleaseBuffer(buf);
						
						if (count > thresholdCount)
						{
							count = 0;
							FileRepSubProcess_ProcessSignals();
							
							if (! (FileRepSubProcess_GetState() == FileRepStateReady && 
								   dataState == DataStateInResync))
							{
								mirrorDataLossOccurred = TRUE;
								break;
							}
						}
						else
							count++;
					}
						
					if (mirrorDataLossOccurred)
						break;

					if (entry->mirrorDataSynchronizationState != MirroredRelDataSynchronizationState_FullCopy)
					{
						LockRelationForResyncExtension(&smgr_relation->smgr_rnode, ExclusiveLock);
					
						numBlocks = smgrnblocks(smgr_relation);
					
						smgrtruncate(smgr_relation,
								 numBlocks,
								 TRUE /* isTemp, TRUE means to not record in XLOG */,
								 FALSE /* isLocalBuf */,
								 &entry->persistentTid,
								 entry->persistentSerialNum);
								 
						UnlockRelationForResyncExtension(&smgr_relation->smgr_rnode, ExclusiveLock);
					}
					
					smgrimmedsync(smgr_relation);
					smgrclose(smgr_relation);
					
					smgr_relation = NULL;
					break;
					
				case MirroredRelDataSynchronizationState_None:										
				case MirroredRelDataSynchronizationState_DataSynchronized:
					break;
					
				default:
					ereport(LOG, 
							(errmsg("could not resynchronize relation '%u/%u/%u' "
									"mirror synchronization state:'%s(%d)' ",
									entry->relFileNode.relNode,
									entry->relFileNode.spcNode,
									entry->relFileNode.dbNode,
									MirroredRelDataSynchronizationState_Name(entry->mirrorDataSynchronizationState),
									entry->mirrorDataSynchronizationState)));
					break;
			}
			break;
			
		case PersistentFileSysRelStorageMgr_AppendOnly:
		{
			MirroredAppendOnlyOpen	mirroredOpen;
			int						primaryError;
			bool					mirrorDataLossOccurred;
			char					*buffer = NULL;
			int64					endOffset = entry->mirrorAppendOnlyNewEof;
			int64					startOffset = entry->mirrorAppendOnlyLossEof;
			int32					bufferLen = 0;
			int						retval = 0;
			
			switch (entry->mirrorDataSynchronizationState)
			{
				case MirroredRelDataSynchronizationState_AppendOnlyCatchup:
				case MirroredRelDataSynchronizationState_FullCopy:
					
					/* 
					 * required in order to report how many blocks were synchronized 
					 * if gp_persistent_relation_node does not return that information 
					 */
					if (entry->mirrorBufpoolResyncChangedPageCount == 0)
					{
						entry->mirrorBufpoolResyncChangedPageCount = (endOffset - startOffset) / BLCKSZ;
					}					
					
					/*
					 * The MirroredAppendOnly_OpenResynchonize routine knows we are a resynch worker and
					 * will open BOTH, but write only the MIRROR!!!
					 */
					MirroredAppendOnly_OpenResynchonize(
											&mirroredOpen, 
											&entry->relFileNode,
											entry->segmentFileNum,
											startOffset,
											&primaryError,
											&mirrorDataLossOccurred);
					if (primaryError != 0)
					{
						ereport(ERROR,
								(errcode_for_file_access(),
								 errmsg("could not open file %u/%u/%u.%u : %s",
										entry->relFileNode.dbNode,
										entry->relFileNode.spcNode,
										entry->relFileNode.relNode,
										entry->segmentFileNum,
										strerror(primaryError))));
						
						break;
					}

					if (mirrorDataLossOccurred)
						break;
					
					/* AO and CO Data Store writes 64k size by default */
					bufferLen = (Size) Min(2*BLCKSZ, endOffset - startOffset);
					buffer = (char*) palloc(bufferLen);
					if (buffer == NULL)
						ereport(ERROR,
								(errcode(ERRCODE_OUT_OF_MEMORY),
								 (errmsg("not enough memory for resynchronization"))));
					
					MemSet(buffer, 0, bufferLen);
					
					while (startOffset < endOffset)
					{
						retval = MirroredAppendOnly_Read(
												&mirroredOpen,
												buffer,
												bufferLen);
						
						if (retval != bufferLen) 
						{
							ereport(ERROR,
									(errcode_for_file_access(),
									 errmsg("could not read from position:" INT64_FORMAT " in file %u/%u/%u.%u : %m",
											startOffset, 
											entry->relFileNode.dbNode,
											entry->relFileNode.spcNode,
											entry->relFileNode.relNode,
											entry->segmentFileNum)));
							
							break;
						}						
						
						MirroredAppendOnly_Append(
											  &mirroredOpen,
											  buffer,
											  bufferLen,
											  &primaryError,
											  &mirrorDataLossOccurred);
						
						if (mirrorDataLossOccurred)
							break;

						Assert(primaryError == 0);	// No primary writes as resync worker.
						
						startOffset += bufferLen;
						/* AO and CO Data Store writes 64k size by default */
						bufferLen = (Size) Min(2*BLCKSZ, endOffset - startOffset);						
					}
					
					if (buffer) 
					{
						pfree(buffer);
						buffer = NULL;
					}
					
					if (mirrorDataLossOccurred)
						break;
					
					/* Flush written data on Mirror */
					MirroredAppendOnly_Flush(
										&mirroredOpen,
										&primaryError,
										&mirrorDataLossOccurred);
					if (mirrorDataLossOccurred)
						break;
					
					Assert(primaryError == 0);	// Not flushed on primary as resync worker.
					
					/* Close Primary and Mirror */
					MirroredAppendOnly_Close(
										&mirroredOpen,
										&mirrorDataLossOccurred);
								
					break;
					
				case MirroredRelDataSynchronizationState_None:										
				case MirroredRelDataSynchronizationState_DataSynchronized:
					break;					
					
				default:
					ereport(LOG, 
							(errmsg("could not resynchronize relation '%u/%u/%u' "
									"mirror synchronization state:'%s(%d)' ",
									entry->relFileNode.relNode,
									entry->relFileNode.spcNode,
									entry->relFileNode.dbNode,
									MirroredRelDataSynchronizationState_Name(entry->mirrorDataSynchronizationState),
									entry->mirrorDataSynchronizationState)));
					break;
			}
			
			break;
		}	//case
		default:
			Assert(0);
			break;
	} //switch
	
	if (mirrorDataLossOccurred)
		status = STATUS_ERROR;
	
	return status;
}
Exemple #9
0
static int
FileRepAckPrimary_RunReceiver(void)
{
	uint32_t				msgLength = 0;
	FileRepConsumerProcIndex_e	msgType;
	int						status = STATUS_OK;
	char					*msgPositionInsert;
	FileRepShmemMessageDescr_s  *fileRepShmemMessageDescr;
	uint32					spareField;
	
	FileRep_InsertConfigLogEntry("run receiver");
	
	while (1) {
		
		FileRepSubProcess_ProcessSignals();
		if (FileRepSubProcess_GetState() != FileRepStateReady &&
			FileRepSubProcess_GetState() != FileRepStateInitialization) {
			break;
		}
		
		if ( ! FileRepConnServer_AwaitMessageBegin()) {
			/* call was interrupted ... go back to beginning to process signals */
			continue;
		}

		status = FileRepConnServer_ReceiveMessageType(&msgType);
		
		if (status != STATUS_OK) {
			break;
		}
				
		/* DATA MESSAGE TYPE */
		status = FileRepConnServer_ReceiveMessageLength(&msgLength);
		
		if (status != STATUS_OK) {
			break;
		}

		msgPositionInsert = FileRep_ReserveShmem(fileRepAckShmemArray[msgType], 
												 msgLength, 
												 /* not used */ &spareField, 
												 FileRepOperationNotSpecified, 
												 FileRepAckShmemLock);
		
		if (msgPositionInsert == NULL) {
			
			status = STATUS_ERROR;
			ereport(WARNING,
					(errmsg("mirror failure, "
							"could not queue received ack message to be processed, "
							"failover requested"), 
					 errhint("run gprecoverseg to re-establish mirror connectivity"),
					 FileRep_errdetail_Shmem(),
					 FileRep_errdetail_ShmemAck(),
					 FileRep_errcontext()));													
			break;
		}
		
		status = FileRepConnServer_ReceiveMessageData(
						msgPositionInsert + sizeof(FileRepShmemMessageDescr_s),
						msgLength);
		
		if (status != STATUS_OK) {
			break;
		}		
		
		SIMPLE_FAULT_INJECTOR(FileRepReceiver);
		
		fileRepShmemMessageDescr = 
		(FileRepShmemMessageDescr_s*) msgPositionInsert;	
		
		/* it is not in use */
		fileRepShmemMessageDescr->messageSync = FALSE;
		
		fileRepShmemMessageDescr->messageState = FileRepShmemMessageStateReady; 
		
		LWLockAcquire(FileRepAckShmemLock, LW_EXCLUSIVE);
		
		FileRep_IpcSignal(fileRepIpcArray[fileRepAckShmemArray[msgType]->ipcArrayIndex]->semC, 
						  &fileRepIpcArray[fileRepAckShmemArray[msgType]->ipcArrayIndex]->refCountSemC);
		
		LWLockRelease(FileRepAckShmemLock);
		
		FileRep_InsertLogEntry(
							   "P_RunReceiver",
							   FileRep_GetFlatFileIdentifier("", ""),
							   FileRepRelationTypeNotSpecified,
							   FileRepOperationNotSpecified,
							   FILEREP_UNDEFINED,
							   FILEREP_UNDEFINED,
							   FileRepAckStateNotInitialized,
							   spareField,
							   FILEREP_UNDEFINED);			
		
	} // while(1)
	
	FileRepConnServer_CloseConnection();
	
	return status;
}
/*
 * 
 * FileRepPrimary_RunChangeTrackingCompacting()
 *
 */
static void 
FileRepPrimary_RunChangeTrackingCompacting(void)
{
	int		retry = 0;
	
	FileRep_InsertConfigLogEntry("run change tracking compacting if records has to be discarded");
	
	/*
	 * We have to check if any records have to be discarded from Change Tracking log file.
	 * Due to crash it can happen that the highest change tracking log lsn > the highest xlog lsn.
	 *
	 * Records from change tracking log file can be discarded only after database is started. 
	 * Full environhment has to be set up in order to run queries over SPI.
	 */
	while (FileRepSubProcess_GetState() != FileRepStateShutdown &&
		   FileRepSubProcess_GetState() != FileRepStateShutdownBackends &&
		   isDatabaseRunning() == FALSE) 
	{
		
		FileRepSubProcess_ProcessSignals();
		
		pg_usleep(50000L); /* 50 ms */	
	}		

	ChangeTracking_DoFullCompactingRoundIfNeeded();

	
	/*
	 * Periodically check if compacting is required. 
	 * Periodic compacting is required in order to
	 *		a) reduce space for change tracking log file
	 *		b) reduce time for transition from Change Tracking to Resync
	 */
	FileRep_InsertConfigLogEntry("run change tracking compacting");
	while (1) {
		
		FileRepSubProcess_ProcessSignals();
		
		while (FileRepSubProcess_GetState() == FileRepStateFault ||
			   segmentState == SegmentStateChangeTrackingDisabled) 
		{			
			FileRepSubProcess_ProcessSignals();
			pg_usleep(50000L); /* 50 ms */	
		}
		
		if (! (FileRepSubProcess_GetState() == FileRepStateReady &&
			   dataState == DataStateInChangeTracking))
		{
			break;
		}				
				
		Insist(fileRepRole == FileRepPrimaryRole);
		Insist(dataState == DataStateInChangeTracking);		
		Insist(FileRepSubProcess_GetState() == FileRepStateReady);
		
		/* retry compacting of change tracking log files once per minute */
		pg_usleep(50000L); /* 50 ms */
		
		if (++retry == 1200)
		{
			ChangeTracking_CompactLogsIfPossible(); 
			retry=0;
		}
	} 
}
/*
 * 
 * FileRepPrimary_StartRecoveryInChangeTracking()
 *
 */
static void 
FileRepPrimary_StartRecoveryInChangeTracking(void)
{		
	FileRep_InsertConfigLogEntry("run recovery");
	
	while (1) {
					
		while (FileRepSubProcess_GetState() == FileRepStateFault) {			
			FileRepSubProcess_ProcessSignals();
			pg_usleep(50000L); /* 50 ms */	
		}
		
		if (FileRepSubProcess_GetState() == FileRepStateShutdown ||
			FileRepSubProcess_GetState() == FileRepStateShutdownBackends) {
			
			break;
		}
				
		Insist(fileRepRole == FileRepPrimaryRole);
		Insist(dataState == DataStateInChangeTracking);		
		Insist(FileRepSubProcess_GetState() != FileRepStateReady);
		
		if (ChangeTracking_RetrieveIsTransitionToInsync())
		{
			ChangeTracking_DropAll();
		}
		else
		{
			if (ChangeTracking_RetrieveIsTransitionToResync() == FALSE &&
				isFullResync())
			{
				ChangeTracking_MarkFullResync();
				/* segmentState == SegmentStateChangeTrackingDisabled */
				getFileRepRoleAndState(&fileRepRole, &segmentState, &dataState, NULL, NULL);
				Assert(segmentState == SegmentStateChangeTrackingDisabled);
				
				/* database is resumed */
				primaryMirrorSetIOSuspended(FALSE);
				
				FileRep_InsertConfigLogEntry("change tracking recovery completed");
				
				break;				
			}
			else
			{
				ChangeTracking_MarkIncrResync();
			}
			
		}
		
		XLogInChangeTrackingTransition();
				
		/* NOTE: Any error during change tracking will result in disabling Change Tracking */
		FileRepSubProcess_SetState(FileRepStateReady);
		
		/* database is resumed */
		primaryMirrorSetIOSuspended(FALSE);
				
		FileRep_InsertConfigLogEntry("change tracking recovery completed");
		
		break;
		
	} // while(1)	
}
/*
 * 
 * FileRepPrimary_RunRecoveryInSync()
 *		
 *		1) Recover Flat Files
 *			a) pg_control file
 *			b) pg_database file
 *			c) pg_auth file
 *			d) pg_twophase directory
 *			e) Slru directories 
 *					*) pg_clog 
 *					*) pg_multixact 
 *					*) pg_distributedlog
 *					*) pg_distributedxidmap
 *					*) pg_subtrans
 *
 *		2) Reconcile xlog EOF
 *
 */
static int 
FileRepPrimary_RunRecoveryInSync(void)
{	
	int status = STATUS_OK;
	
	FileRep_InsertConfigLogEntry("run recovery of flat files");
	
	while (1) {
		
		status = XLogRecoverMirrorControlFile();
		
		if (status != STATUS_OK) {
			break;
		}
		
		FileRepSubProcess_ProcessSignals();
		if (FileRepSubProcess_GetState() != FileRepStateInitialization) {
			break;
		}		
		
		status = XLogReconcileEofPrimary();
		
		if (status != STATUS_OK) {
			break;
		}
		
		FileRepSubProcess_ProcessSignals();
		if (FileRepSubProcess_GetState() != FileRepStateInitialization) {
			break;
		}
		
		MirroredFlatFile_DropTemporaryFiles();
				
		FileRepSubProcess_ProcessSignals();
		if (FileRepSubProcess_GetState() != FileRepStateInitialization) {
			break;
		}				

		MirroredFlatFile_MirrorDropTemporaryFiles();

		FileRepSubProcess_ProcessSignals();
		if (FileRepSubProcess_GetState() != FileRepStateInitialization) {
			break;
		}						
		
		status = FlatFilesRecoverMirror();
		
		if (status != STATUS_OK) {
			break;
		}
		
		FileRepSubProcess_ProcessSignals();
		if (FileRepSubProcess_GetState() != FileRepStateInitialization) {
			break;
		}		

		status = TwoPhaseRecoverMirror();

		if (status != STATUS_OK) {
			break;
		}
		
		FileRepSubProcess_ProcessSignals();
		if (FileRepSubProcess_GetState() != FileRepStateInitialization) {
			break;
		}	
		
		status = SlruRecoverMirror();
		
		if (status != STATUS_OK) {
			break;
		}
		
		FileRepSubProcess_ProcessSignals();
		if (FileRepSubProcess_GetState() != FileRepStateInitialization) {
			break;
		}			
				
		FileRepSubProcess_SetState(FileRepStateReady);
		break;
	}
	
	
	return status;
}
/*
 * SenderLoop
 *
 */
static int
FileRepAckMirror_RunSender(void)
{
	FileRepShmemMessageDescr_s	*fileRepShmemMessageDescr=NULL;
	char						*fileRepMessage;
	int							status = STATUS_OK;
	bool						movePositionConsume = FALSE;
	FileRepConsumerProcIndex_e  messageType;
	FileRepMessageHeader_s		*fileRepMessageHeader;
	FileRepShmem_s              *fileRepAckShmem = NULL;
	
	FileRep_InsertConfigLogEntry("run sender ack");
	
	fileRepAckShmem = fileRepAckShmemArray[FILEREP_OUTGOING_MESSAGE_QUEUE];

	while (1) {

		LWLockAcquire(FileRepAckShmemLock, LW_EXCLUSIVE);
		
		if (movePositionConsume) {
			
			fileRepAckShmem->positionConsume = 
					fileRepAckShmem->positionConsume +
					fileRepShmemMessageDescr->messageLength + 
					sizeof(FileRepShmemMessageDescr_s);
			
			if (fileRepAckShmem->positionConsume == fileRepAckShmem->positionWraparound &&
				fileRepAckShmem->positionInsert != fileRepAckShmem->positionWraparound) {
				
				fileRepAckShmem->positionConsume = fileRepAckShmem->positionBegin;
				fileRepAckShmem->positionWraparound = fileRepAckShmem->positionEnd;
			}
			
			FileRep_IpcSignal(fileRepIpcArray[fileRepAckShmem->ipcArrayIndex]->semP, 
							  &fileRepIpcArray[fileRepAckShmem->ipcArrayIndex]->refCountSemP);
		}
		
		fileRepShmemMessageDescr = 
		(FileRepShmemMessageDescr_s*) fileRepAckShmem->positionConsume;	
		
		while ((fileRepAckShmem->positionConsume == fileRepAckShmem->positionInsert) ||
			   ((fileRepAckShmem->positionConsume != fileRepAckShmem->positionInsert) &&
				(fileRepShmemMessageDescr->messageState != FileRepShmemMessageStateReady))) {
			
			fileRepIpcArray[fileRepAckShmem->ipcArrayIndex]->refCountSemC++;
			
			LWLockRelease(FileRepAckShmemLock);
			
			FileRepSubProcess_ProcessSignals();
			if (FileRepSubProcess_GetState() != FileRepStateReady) {

				LWLockAcquire(FileRepAckShmemLock, LW_EXCLUSIVE);
				break;
			}
			
			FileRep_IpcWait(fileRepIpcArray[fileRepAckShmem->ipcArrayIndex]->semC, &fileRepIpcArray[fileRepAckShmem->ipcArrayIndex]->refCountSemC, FileRepAckShmemLock);
						
			LWLockAcquire(FileRepAckShmemLock, LW_EXCLUSIVE); 
			
			if (fileRepAckShmem->positionConsume == fileRepAckShmem->positionWraparound &&
				fileRepAckShmem->positionInsert != fileRepAckShmem->positionWraparound) {
				
				fileRepAckShmem->positionConsume = fileRepAckShmem->positionBegin;
				fileRepAckShmem->positionWraparound = fileRepAckShmem->positionEnd;
			}			
			
			/* Re-assign to find if messageState is changed */
			fileRepShmemMessageDescr = 
			(FileRepShmemMessageDescr_s*) fileRepAckShmem->positionConsume;				
		} // while internal
		fileRepAckShmem->consumeCount++;
		
		LWLockRelease(FileRepAckShmemLock); 

		FileRepSubProcess_ProcessSignals();
		if (FileRepSubProcess_GetState() != FileRepStateReady) {
			break;
		}
	
		FileRep_InsertLogEntry(
							   "M_RunSenderAck",
							   FileRep_GetFlatFileIdentifier("", ""),
							   FileRepRelationTypeNotSpecified,
							   FileRepOperationNotSpecified,
							   FILEREP_UNDEFINED,
							   FILEREP_UNDEFINED,
							   FileRepAckStateNotInitialized,
							   FILEREP_UNDEFINED,
							   FILEREP_UNDEFINED);		
				
#ifdef FAULT_INJECTOR
		FaultInjector_InjectFaultIfSet(
									   FileRepSender,
									   DDLNotSpecified,
									   "",	//databaseName
									   ""); // tableName
#endif						
		
		fileRepMessage = (char*) (fileRepAckShmem->positionConsume + 
								  sizeof(FileRepShmemMessageDescr_s));
		
		fileRepMessageHeader = (FileRepMessageHeader_s*) (fileRepAckShmem->positionConsume + 
														  sizeof(FileRepShmemMessageDescr_s));

		messageType = FileRepMessageTypeXLog;
		
		if (! FileRepConnClient_SendMessage(
						messageType,
						fileRepShmemMessageDescr->messageSync,
						fileRepMessage,
						fileRepShmemMessageDescr->messageLength)) 
		{

			ereport(WARNING, 
					(errcode_for_socket_access(),
					 errmsg("mirror failure, "
							"could not sent ack message to primary : %m, "
							"failover requested"),
					 errhint("run gprecoverseg to re-establish mirror connectivity"),
					 FileRep_errdetail_ShmemAck(),
					 FileRep_errcontext()));		
			
			status = STATUS_ERROR;
			break;
		}

		movePositionConsume = TRUE;
	} // while(1)
	
	FileRepConnClient_CloseConnection();

	return status;
}
/*
 * 
 * FileRepPrimary_StartSender
 */
void 
FileRepAckMirror_StartSender(void)
{
	int				status = STATUS_OK;
	int				retry = 0; 
	struct timeval	currentTime;
	pg_time_t		beginTime = 0;
	pg_time_t		endTime = 0;
	
	FileRep_InsertConfigLogEntry("start sender ack");

	while (1) {
		
		if (status != STATUS_OK) {
			FileRep_SetSegmentState(SegmentStateFault, FaultTypeMirror);
			FileRepSubProcess_SetState(FileRepStateFault);
		}
		
		while (FileRepSubProcess_GetState() == FileRepStateInitialization ||
			   FileRepSubProcess_GetState() == FileRepStateFault ||

			   (fileRepShmemArray[0]->state == FileRepStateNotInitialized &&
			    FileRepSubProcess_GetState() != FileRepStateShutdown )) {
			
			FileRepSubProcess_ProcessSignals();
			pg_usleep(50000L); /* 50 ms */	
		}
		
		if (FileRepSubProcess_GetState() == FileRepStateShutdown) {
			
			break;
		}

		{
			char	tmpBuf[FILEREP_MAX_LOG_DESCRIPTION_LEN];
			
			snprintf(tmpBuf, sizeof(tmpBuf), "primary address(port) '%s(%d)' mirror address(port) '%s(%d)' ",
					 fileRepPrimaryHostAddress, 
					 fileRepPrimaryPort,
					 fileRepMirrorHostAddress, 
					 fileRepMirrorPort);
			
			FileRep_InsertConfigLogEntry(tmpBuf);
		}
		
		Insist(fileRepRole == FileRepMirrorRole);

		status = FileRepConnClient_EstablishConnection(
									   fileRepPrimaryHostAddress,
									   fileRepPrimaryPort,
									   FALSE /* reportError */);		

		if (status != STATUS_OK)
		{
			gettimeofday(&currentTime, NULL);
			beginTime = (pg_time_t) currentTime.tv_sec;
		}
		
		while (status != STATUS_OK && 
			   FileRep_IsRetry(retry) &&
			   (endTime - beginTime) < gp_segment_connect_timeout)  
		{
			FileRep_Sleep10ms(retry);
			
			FileRep_IncrementRetry(retry);
			
			gettimeofday(&currentTime, NULL);
			endTime = (pg_time_t) currentTime.tv_sec;			
			
			status = FileRepConnClient_EstablishConnection(
														   fileRepPrimaryHostAddress,
														   fileRepPrimaryPort,
														   (retry == file_rep_retry && file_rep_retry != 0) ||
														   ((endTime - beginTime) > gp_segment_connect_timeout) ? TRUE : FALSE);

			if (FileRepSubProcess_IsStateTransitionRequested())
			{
				break;
			}			
		}
 
		if (status != STATUS_OK) {
			continue;
		}
		
		FileRep_SetFileRepRetry();
		
		status = FileRepAckMirror_RunSender();
		
	} // while(1)
		
	FileRepConnClient_CloseConnection();
	
	return;
}
Exemple #15
0
/*
 * FileRepAckPrimary_RunConsumer()
 */
static int
FileRepAckPrimary_RunConsumer(void)
{
	FileRepShmemMessageDescr_s	*fileRepShmemMessageDescr = NULL;
	FileRepMessageHeader_s		*fileRepMessageHeader = NULL;
	pg_crc32					*fileRepMessageHeaderCrc;
	pg_crc32					messageHeaderCrcLocal = 0;
	int							status = STATUS_OK;
	bool						movePositionConsume = FALSE;
	FileRepShmem_s              *fileRepAckShmem = NULL;
		
	FileRep_InsertConfigLogEntry("run consumer");
	
	fileRepAckShmem = fileRepAckShmemArray[FILEREP_ACKSHMEM_MESSAGE_SLOT_PRIMARY_ACK];
	
	while (1) {	
		
		LWLockAcquire(FileRepAckShmemLock, LW_EXCLUSIVE);
		
		if (movePositionConsume) {
			
			fileRepAckShmem->positionConsume = 
				fileRepAckShmem->positionConsume +
				fileRepShmemMessageDescr->messageLength + 
				sizeof(FileRepShmemMessageDescr_s);
			
			if (fileRepAckShmem->positionConsume == fileRepAckShmem->positionWraparound &&
				fileRepAckShmem->positionInsert != fileRepAckShmem->positionWraparound) {
				
				fileRepAckShmem->positionConsume = fileRepAckShmem->positionBegin;
				fileRepAckShmem->positionWraparound = fileRepAckShmem->positionEnd;
			}
			FileRep_IpcSignal(fileRepIpcArray[fileRepAckShmem->ipcArrayIndex]->semP, 
							  &fileRepIpcArray[fileRepAckShmem->ipcArrayIndex]->refCountSemP);
		}
				
		fileRepShmemMessageDescr = 
			(FileRepShmemMessageDescr_s*) fileRepAckShmem->positionConsume;	

		while ((fileRepAckShmem->positionConsume == fileRepAckShmem->positionInsert) ||
			   ((fileRepAckShmem->positionConsume != fileRepAckShmem->positionInsert) &&
				(fileRepShmemMessageDescr->messageState != FileRepShmemMessageStateReady))) {
			
			fileRepIpcArray[fileRepAckShmem->ipcArrayIndex]->refCountSemC++;

			LWLockRelease(FileRepAckShmemLock);
						
			FileRepSubProcess_ProcessSignals();
			if (FileRepSubProcess_GetState() != FileRepStateReady &&
				FileRepSubProcess_GetState() != FileRepStateInitialization) {
				LWLockAcquire(FileRepAckShmemLock, LW_EXCLUSIVE);
				break;
			}
			
			FileRep_IpcWait(fileRepIpcArray[fileRepAckShmem->ipcArrayIndex]->semC, &fileRepIpcArray[fileRepAckShmem->ipcArrayIndex]->refCountSemC, FileRepAckShmemLock);
			
			LWLockAcquire(FileRepAckShmemLock, LW_EXCLUSIVE);
			
			if (fileRepAckShmem->positionConsume == fileRepAckShmem->positionWraparound &&
				fileRepAckShmem->positionInsert != fileRepAckShmem->positionWraparound) {
				
				fileRepAckShmem->positionConsume = fileRepAckShmem->positionBegin;
				fileRepAckShmem->positionWraparound = fileRepAckShmem->positionEnd;
			}
			
			/* Re-assign to find if messageState is changed */
			fileRepShmemMessageDescr = 
				(FileRepShmemMessageDescr_s*) fileRepAckShmem->positionConsume;	
				
		} // internal while
		fileRepAckShmem->consumeCount++;
		LWLockRelease(FileRepAckShmemLock);
		
		FileRepSubProcess_ProcessSignals();
		if (FileRepSubProcess_GetState() != FileRepStateReady && 
			FileRepSubProcess_GetState() != FileRepStateInitialization) {
			break;
		}
		
		SIMPLE_FAULT_INJECTOR(FileRepConsumer);
		
		/* Calculate and compare FileRepMessageHeader_s Crc */
		fileRepMessageHeader = (FileRepMessageHeader_s*) (fileRepAckShmem->positionConsume + 
														  sizeof(FileRepShmemMessageDescr_s));
		
		FileRep_CalculateCrc((char *) fileRepMessageHeader,
							 sizeof(FileRepMessageHeader_s),
							 &messageHeaderCrcLocal);	
		
		fileRepMessageHeaderCrc =
			(pg_crc32 *) (fileRepAckShmem->positionConsume + 
						  sizeof(FileRepMessageHeader_s) + 
						  sizeof(FileRepShmemMessageDescr_s));
		
		if (*fileRepMessageHeaderCrc != messageHeaderCrcLocal) 
		{
			status = STATUS_ERROR;
			ereport(WARNING,
					(errmsg("mirror failure, "
							"could not match ack message header checksum between primary '%u' and mirror '%u', "
							"failover requested", 
							*fileRepMessageHeaderCrc, 
							messageHeaderCrcLocal),
					 errhint("run gprecoverseg to re-establish mirror connectivity"),
					 FileRep_errdetail(fileRepMessageHeader->fileRepIdentifier,
									   fileRepMessageHeader->fileRepRelationType,
									   fileRepMessageHeader->fileRepOperation,
									   fileRepMessageHeader->messageCount),
					 FileRep_errdetail_ShmemAck(),
					 FileRep_errcontext()));		
						
			break;
		}
				
	    /* Write operation is never acknowledged. 
		 * That means message should never have body. 
		 * CRC of body should be always 0.
		 */
		Assert(fileRepMessageHeader->fileRepOperation != FileRepOperationWrite);
		Assert(fileRepMessageHeader->fileRepMessageBodyCrc == 0);
		
		switch (fileRepMessageHeader->fileRepOperation)
		{
			case FileRepOperationReconcileXLogEof:			
				xLogEof = fileRepMessageHeader->fileRepOperationDescription.reconcile.xLogEof;

				if (Debug_filerep_print)
					ereport(LOG,
						(errmsg("ack reconcile xlogid '%d' xrecoff '%d' ",
							xLogEof.xlogid, 
							xLogEof.xrecoff)));	

				break;
		
			case FileRepOperationValidation:
				mirrorStatus = fileRepMessageHeader->fileRepOperationDescription.validation.mirrorStatus;

				if (Debug_filerep_print)
					ereport(LOG,
						(errmsg("ack validation status '%s' ",
							FileRepStatusToString[mirrorStatus])));	

				break;
				
			case FileRepOperationCreate:
				mirrorStatus = fileRepMessageHeader->fileRepOperationDescription.create.mirrorStatus;

				if (Debug_filerep_print)
					ereport(LOG,
						(errmsg("ack create status '%s' ",
								FileRepStatusToString[mirrorStatus])));	

				break;

			case FileRepOperationStartSlruChecksum:
				mirrorStatus =
					fileRepMessageHeader->fileRepOperationDescription.startChecksum.mirrorStatus;

				if (Debug_filerep_print)
				{
					ereport(LOG,
						(errmsg("ack start SLRU checksum: status = '%s', directory = '%s' ",
								FileRepStatusToString[mirrorStatus],
								fileRepMessageHeader->fileRepIdentifier.fileRepFlatFileIdentifier.directorySimpleName)));
				}

				break;

			case FileRepOperationVerifySlruDirectoryChecksum:
				mirrorStatus =
					fileRepMessageHeader->fileRepOperationDescription.verifyDirectoryChecksum.mirrorStatus;

				if (Debug_filerep_print)
				{
					ereport(LOG,
						(errmsg("ack verify SLRU directory checksum: status = '%s', directory = '%s' ",
								FileRepStatusToString[mirrorStatus],
								fileRepMessageHeader->fileRepIdentifier.fileRepFlatFileIdentifier.directorySimpleName)));
				}

				break;
				
			default:
				break;
		}
		
		if (fileRepMessageHeader->fileRepAckState != FileRepAckStateCompleted) {

			status = STATUS_ERROR;
			
			ereport(WARNING,
					(errmsg("mirror failure, "
							"could not complete operation on mirror ack state '%s', "
							"failover requested", 
							FileRepAckStateToString[fileRepMessageHeader->fileRepAckState]),
					 errhint("run gprecoverseg to re-establish mirror connectivity"),
					 errSendAlert(true),
					 FileRep_errdetail(fileRepMessageHeader->fileRepIdentifier,
									   fileRepMessageHeader->fileRepRelationType,
									   fileRepMessageHeader->fileRepOperation,
									   fileRepMessageHeader->messageCount),
					 FileRep_errdetail_Shmem(),
					 FileRep_errdetail_ShmemAck(),
					 FileRep_errcontext()));	
			
			/* 
			 * FAULT has to be set before entry is updated in ack hash table
			 * in order to suspend backend process.
			 */	
			FileRep_SetSegmentState(SegmentStateFault, FaultTypeMirror);
			FileRepSubProcess_ProcessSignals();
		}
				
		if (FileRepAckPrimary_UpdateHashEntry(
				fileRepMessageHeader->fileRepIdentifier,
				fileRepMessageHeader->fileRepRelationType,
				fileRepMessageHeader->fileRepAckState) != STATUS_OK) {
			
			status = STATUS_ERROR;
			ereport(WARNING,
					(errmsg("mirror failure, "
							"could not update ack state '%s' in ack hash table, "
							"failover requested", 
							FileRepAckStateToString[fileRepMessageHeader->fileRepAckState]),
					 errhint("run gprecoverseg to re-establish mirror connectivity"),
					 errSendAlert(true),
					 FileRep_errdetail(fileRepMessageHeader->fileRepIdentifier,
									   fileRepMessageHeader->fileRepRelationType,
									   fileRepMessageHeader->fileRepOperation,
									   fileRepMessageHeader->messageCount),
					 FileRep_errdetail_Shmem(),
					 FileRep_errdetail_ShmemAck(),
					 FileRep_errcontext()));					
		}
	
		FileRep_InsertLogEntry(
							   "P_RunConsumer",
							   fileRepMessageHeader->fileRepIdentifier,
							   fileRepMessageHeader->fileRepRelationType,
							   fileRepMessageHeader->fileRepOperation,
							   messageHeaderCrcLocal,
							   fileRepMessageHeader->fileRepMessageBodyCrc,
							   fileRepMessageHeader->fileRepAckState,
							   FILEREP_UNDEFINED,
							   fileRepMessageHeader->messageCount);				
		
		if (status != STATUS_OK) {
			break;
		}
				
		movePositionConsume = TRUE;		
	} // while(1)	
	
	return status;
}
bool
FileRepSubProcess_IsStateTransitionRequested(void)
{

	bool		isStateTransitionRequested = FALSE;

	getFileRepRoleAndState(&fileRepRole, &segmentState, &dataState, NULL, NULL);

	switch (fileRepProcessType)
	{
		case FileRepProcessTypeMain:
			/* Handle Shutdown request */
			if (segmentState == SegmentStateImmediateShutdown)
			{
				isStateTransitionRequested = TRUE;
			}

			break;

		case FileRepProcessTypeNotInitialized:

			if (segmentState == SegmentStateShutdownFilerepBackends &&
				fileRepShmemArray[0]->state == FileRepStateFault)
			{
				FileRep_InsertConfigLogEntry("failure is detected in segment mirroring during backend shutdown, abort requested");
			}
			/* no break */
		default:

			if (fileRepProcessType != FileRepProcessTypeNotInitialized)
			{
				FileRepSubProcess_ProcessSignals();
			}

			if (dataState == DataStateInChangeTracking)
			{
				isStateTransitionRequested = TRUE;
			}

			switch (segmentState)
			{
				case SegmentStateFault:
				case SegmentStateImmediateShutdown:
				case SegmentStateShutdown:

					isStateTransitionRequested = TRUE;
					break;

				default:
					break;
			}

			break;
	}

	if (isStateTransitionRequested)
	{
		FileRep_InsertConfigLogEntry("state transition requested ");
	}
	return isStateTransitionRequested;
}
void
FileRepSubProcess_Main()
{
	const char *statmsg;

	MemoryContext fileRepSubProcessMemoryContext;

	sigjmp_buf	local_sigjmp_buf;

	MyProcPid = getpid();

	MyStartTime = time(NULL);

	/*
	 * Create a PGPROC so we can use LWLocks in FileRep sub-processes.  The
	 * routine also register clean up at process exit
	 */
	InitAuxiliaryProcess();

	InitBufferPoolBackend();

	FileRepSubProcess_ConfigureSignals();

	/*
	 * If an exception is encountered, processing resumes here.
	 *
	 * See notes in postgres.c about the design of this coding.
	 */
	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		/* Prevents interrupts while cleaning up */
		HOLD_INTERRUPTS();

		/* Report the error to the server log */
		EmitErrorReport();

		LWLockReleaseAll();

		if (FileRepPrimary_IsResyncManagerOrWorker())
		{
			LockReleaseAll(DEFAULT_LOCKMETHOD, false);
		}

		if (FileRepIsBackendSubProcess(fileRepProcessType))
		{
			AbortBufferIO();
			UnlockBuffers();

			/* buffer pins are released here: */
			ResourceOwnerRelease(CurrentResourceOwner,
								 RESOURCE_RELEASE_BEFORE_LOCKS,
								 false, true);
		}

		/*
		 * We can now go away.	Note that because we'll call InitProcess, a
		 * callback will be registered to do ProcKill, which will clean up
		 * necessary state.
		 */
		proc_exit(0);
	}

	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;

	PG_SETMASK(&UnBlockSig);

	/*
	 * Identify myself via ps
	 */

	statmsg = FileRepProcessTypeToString[fileRepProcessType];

	init_ps_display(statmsg, "", "", "");

	/* Create the memory context where cross-transaction state is stored */
	fileRepSubProcessMemoryContext = AllocSetContextCreate(TopMemoryContext,
														   "filerep subprocess memory context",
														   ALLOCSET_DEFAULT_MINSIZE,
														   ALLOCSET_DEFAULT_INITSIZE,
														   ALLOCSET_DEFAULT_MAXSIZE);

	MemoryContextSwitchTo(fileRepSubProcessMemoryContext);

	stateChangeRequestCounter++;

	FileRepSubProcess_ProcessSignals();

	switch (fileRepProcessType)
	{
		case FileRepProcessTypePrimarySender:
			FileRepPrimary_StartSender();
			break;

		case FileRepProcessTypeMirrorReceiver:
			FileRepMirror_StartReceiver();
			break;

		case FileRepProcessTypeMirrorConsumer:
		case FileRepProcessTypeMirrorConsumerWriter:
		case FileRepProcessTypeMirrorConsumerAppendOnly1:
			FileRepMirror_StartConsumer();
			break;

		case FileRepProcessTypeMirrorSenderAck:
			FileRepAckMirror_StartSender();
			break;

		case FileRepProcessTypePrimaryReceiverAck:
			FileRepAckPrimary_StartReceiver();
			break;

		case FileRepProcessTypePrimaryConsumerAck:
			FileRepAckPrimary_StartConsumer();
			break;

		case FileRepProcessTypePrimaryRecovery:
			FileRepSubProcess_InitProcess();

			/*
			 * At this point, database is starting up and xlog is not yet
			 * replayed.  Initializing relcache now is dangerous, a sequential
			 * scan of catalog tables may end up with incorrect hint bits.
			 * E.g. a committed transaction's dirty heap pages made it to disk
			 * but pg_clog update was still in memory and we crashed.  If a
			 * tuple inserted by this transaction is read during relcache
			 * initialization, status of the tuple's xmin will be incorrectly
			 * determined as "not commited" from pg_clog. And
			 * HEAP_XMIN_INVALID hint bit will be set, rendering the tuple
			 * perpetually invisible.  Relcache initialization must be
			 * deferred to only after all of xlog has been replayed.
			 */
			FileRepPrimary_StartRecovery();

			ResourceOwnerRelease(CurrentResourceOwner,
								 RESOURCE_RELEASE_BEFORE_LOCKS,
								 false, true);
			break;

		case FileRepProcessTypeResyncManager:
			FileRepSubProcess_InitProcess();
			FileRepPrimary_StartResyncManager();

			ResourceOwnerRelease(CurrentResourceOwner,
								 RESOURCE_RELEASE_BEFORE_LOCKS,
								 false, true);
			break;

		case FileRepProcessTypeResyncWorker1:
		case FileRepProcessTypeResyncWorker2:
		case FileRepProcessTypeResyncWorker3:
		case FileRepProcessTypeResyncWorker4:
			FileRepSubProcess_InitProcess();
			FileRepPrimary_StartResyncWorker();

			ResourceOwnerRelease(CurrentResourceOwner,
								 RESOURCE_RELEASE_BEFORE_LOCKS,
								 false, true);
			break;

		default:
			elog(PANIC, "unrecognized process type: %s(%d)",
				 statmsg, fileRepProcessType);
			break;
	}

	switch (FileRepSubProcess_GetState())
	{
		case FileRepStateShutdown:
		case FileRepStateReady:
			proc_exit(0);
			break;

		default:
			proc_exit(2);
			break;
	}
}
Exemple #18
0
/*
 * 
 * FileRepAckPrimary_StartReceiver
 */
void 
FileRepAckPrimary_StartReceiver(void)
{	
	int				status = STATUS_OK;
	struct timeval	currentTime;
	pg_time_t		beginTime = 0;
	pg_time_t		endTime = 0;	
	int				retval = 0;
	
	FileRep_InsertConfigLogEntry("start receiver ack");

	{
		char	tmpBuf[FILEREP_MAX_LOG_DESCRIPTION_LEN];
		
		snprintf(tmpBuf, sizeof(tmpBuf), "primary address(port) '%s(%d)' mirror address(port) '%s(%d)' ",
				 fileRepPrimaryHostAddress, 
				 fileRepPrimaryPort,
				 fileRepMirrorHostAddress, 
				 fileRepMirrorPort);
		
		FileRep_InsertConfigLogEntry(tmpBuf);
	}
		
	FileRepAckPrimary_ShmemReInit();
	
	Insist(fileRepRole == FileRepPrimaryRole);
	
	if (filerep_inject_listener_fault)
	{
		status = STATUS_ERROR;
		ereport(WARNING,
				(errmsg("mirror failure, "
						"injected fault by guc filerep_inject_listener_fault, "
						"failover requested"), 
				 FileRep_errcontext()));												
		
		FileRep_SetSegmentState(SegmentStateFault, FaultTypeMirror);
		FileRepSubProcess_SetState(FileRepStateFault);
		FileRepSubProcess_ProcessSignals();
		return;
	}
	
	status = FileRepConnServer_StartListener(
								 fileRepPrimaryHostAddress,
								 fileRepPrimaryPort);
	
	gettimeofday(&currentTime, NULL);
	beginTime = (pg_time_t) currentTime.tv_sec;
	
	while (1) {
		
		if (status != STATUS_OK) 
		{
			FileRep_SetSegmentState(SegmentStateFault, FaultTypeMirror);
			FileRepSubProcess_SetState(FileRepStateFault);
		}
		
		while (FileRepSubProcess_GetState() == FileRepStateFault) {
			
			FileRepSubProcess_ProcessSignals();
			pg_usleep(50000L); /* 50 ms */	
		}
		
		if (FileRepSubProcess_GetState() == FileRepStateShutdown) {
			
			break;
		}

		PG_SETMASK(&BlockSig);
		retval = FileRepConnServer_Select();	
		PG_SETMASK(&UnBlockSig);
		
		gettimeofday(&currentTime, NULL);
		endTime = (pg_time_t) currentTime.tv_sec;

		if ((endTime - beginTime) > gp_segment_connect_timeout) 
		{
			ereport(WARNING, 
					(errmsg("mirror failure, "
							"no connection was established from client from mirror, "
							"primary address(port) '%s(%d)' mirror address(port) '%s(%d)' timeout reached '%d' "
							"failover requested",
							fileRepPrimaryHostAddress, 
							fileRepPrimaryPort,
							fileRepMirrorHostAddress, 
							fileRepMirrorPort,
							gp_segment_connect_timeout),
					 errSendAlert(true),
					 FileRep_errcontext()));
			
			status = STATUS_ERROR;
			continue;
		}

		/* 
		 * check and process any signals received 
		 * The routine returns TRUE if the received signal requests
		 * process shutdown.
		 */
		if (FileRepSubProcess_ProcessSignals()) {
			continue;
		}
		
		if (retval < 0) {
			status = STATUS_ERROR;
			continue;
		}
		
		if (retval == 0) {
			continue;
		}
		
		Assert(retval > 0);
		
		status = FileRepConnServer_CreateConnection();
		
		if (status != STATUS_OK) {
			continue;
		}				
		
		status = FileRepConnServer_ReceiveStartupPacket();
		if (status != STATUS_OK) {
			continue;
		} 
		
		fileRepShmemArray[0]->state = FileRepStateInitialization;
		
		status = FileRepAckPrimary_RunReceiver();
		
	} // while(1)
			
	FileRepConnServer_CloseConnection();
	
	return;
}