예제 #1
0
static void
cdb_position_to_end(void)
{
	XLogRecPtr redoCheckpointLoc;
	CheckPoint redoCheckpoint;
	XLogRecPtr endLocation;

	// Throw in extra new line to make log more readable.
	elog(LOG,"--------------------------");
	
	XLogGetRecoveryStart("QDSYNC", "to get initial restart location", &redoCheckpointLoc, &redoCheckpoint);
	syncRedoLoc = redoCheckpoint.redo;

	// UNDONE: Minimum of redoCheckpointLoc and redoCheckpoint.redo?
	
	XLogScanForStandbyEndLocation(&syncRedoLoc, &endLocation);

	ereport(LOG,
	 (errmsg("QDSYNC: reporting recovery start location %s and scanned end location %s",
		 XLogLocationToString(&syncRedoLoc),
		 XLogLocationToString2(&endLocation))));

	// Throw in extra new line to make log more readable.
	elog(LOG,"--------------------------");
	
	/*
	 * Open up end location segment and set offset to end.
	 */
	openXlogEnd(&endLocation);

	/*
	 * Extra reply information that gives our standby master XLOG end location
	 * to the primary.
	 */
	putEndLocationReply(&endLocation);
}
예제 #2
0
static int
FileRepPrimary_ResyncBufferPoolIncrementalWrite(ChangeTrackingRequest *request)
{
	int				status = STATUS_OK;
	Page			page;
	Buffer			buf; 
	BlockNumber		numBlocks = 0;
	SMgrRelation	smgr_relation = NULL;
	char			relidstr[OIDCHARS + 1 + OIDCHARS + 1 + OIDCHARS + 1];
	int				ii;
	XLogRecPtr		loc;
	XLogRecPtr		loc1;
	int				count = 0;
	int				thresholdCount = 0;
	bool			mirrorDataLossOccurred = FALSE;
	int				NumberOfRelations = request->count;
	
	FileRepResyncHashEntry_s	entry;
	ChangeTrackingResult		*result = NULL;	

	while (1)
	{
		/* allow flushing buffers from buffer pool during scan */
		FileRepResync_SetReadBufferRequest();
		if ((result = ChangeTracking_GetChanges(request)) != NULL) 
		{
			FileRepResync_ResetReadBufferRequest();
					
			for (ii = 0; ii < result->count; ii++)
			{
				
				if (smgr_relation == NULL)
				{
					NumberOfRelations--;
					
					smgr_relation = smgropen(result->entries[ii].relFileNode);
					
					snprintf(relidstr, sizeof(relidstr), "%u/%u/%u",
							 smgr_relation->smgr_rnode.spcNode,
							 smgr_relation->smgr_rnode.dbNode,
							 smgr_relation->smgr_rnode.relNode);

					numBlocks = smgrnblocks(smgr_relation);
					
					if (Debug_filerep_print)
						elog(LOG, "resynchronize buffer pool relation '%u/%u/%u' "
							 "number of blocks:'%u' ",
							 smgr_relation->smgr_rnode.spcNode,
							 smgr_relation->smgr_rnode.dbNode,
							 smgr_relation->smgr_rnode.relNode,
							 numBlocks);
					
					thresholdCount = Min(numBlocks, 1024);
				}
				
				loc1 =  result->entries[ii].lsn_end;
				
				/*
				 * if relation was truncated then block_num from change tracking can be beyond numBlocks 
				 */
				if (result->entries[ii].block_num >=  numBlocks)
				{
					ereport(LOG,	
							(errmsg("could not resynchonize buffer pool relation '%s' block '%d' (maybe due to truncate), "
									"lsn change tracking '%s(%u/%u)' "
									"number of blocks '%d' ",
									relidstr,
									result->entries[ii].block_num,
									XLogLocationToString(&loc1),
									loc1.xlogid,
									loc1.xrecoff,
									numBlocks),						
							 FileRep_errcontext()));						
					
					goto flush_check;
				}
				
				/* allow flushing buffers from buffer pool during scan */
				FileRepResync_SetReadBufferRequest();
				buf = ReadBuffer_Resync(smgr_relation,
										result->entries[ii].block_num,
										relidstr);
				FileRepResync_ResetReadBufferRequest();
				
				Assert(result->entries[ii].block_num < numBlocks);
				
				LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
				page = BufferGetPage(buf);
				
				loc = PageGetLSN(page); 
				
				if(Debug_filerep_print)
				{
					elog(LOG,	
							"incremental resync buffer pool identifier '%s' num blocks '%d' blkno '%d' lsn page '%s(%u/%u)' "
							"lsn end change tracking '%s(%u/%u)' ",
							relidstr,
							numBlocks,
							result->entries[ii].block_num,
							XLogLocationToString(&loc),
							loc.xlogid,
							loc.xrecoff,
							XLogLocationToString(&loc1),
							result->entries[ii].lsn_end.xlogid,
							result->entries[ii].lsn_end.xrecoff);					
				}
				else
				{
					char	tmpBuf[FILEREP_MAX_LOG_DESCRIPTION_LEN];
					
					snprintf(tmpBuf, sizeof(tmpBuf), 
							 "incremental resync buffer pool identifier '%s' num blocks '%d' blkno '%d' lsn page '%s(%u/%u)' ",
							 relidstr,
							 numBlocks,
							 result->entries[ii].block_num,
							 XLogLocationToString(&loc),
							 loc.xlogid,
							 loc.xrecoff);
					
					FileRep_InsertConfigLogEntry(tmpBuf);
					
					snprintf(tmpBuf, sizeof(tmpBuf), 
							 "incremental resync buffer pool identifier '%s' lsn end change tracking '%s(%u/%u)' ",
							 relidstr,
							 XLogLocationToString(&loc1),
							 result->entries[ii].lsn_end.xlogid,
							 result->entries[ii].lsn_end.xrecoff);
					
					FileRep_InsertConfigLogEntry(tmpBuf);
					
				}
								
				if (XLByteLE(result->entries[ii].lsn_end, PageGetLSN(page)))
				{
					if (! XLByteEQ(PageGetLSN(page), result->entries[ii].lsn_end))
					{
						ereport(LOG,
							(errmsg("Resynchonize buffer pool relation '%s' block '%d' has page lsn less than CT lsn, "
								"lsn end change tracking '%s(%u/%u)' lsn page '%s(%u/%u)' "
								"number of blocks '%d'",
								relidstr,
								result->entries[ii].block_num,
								XLogLocationToString(&loc),
								loc.xlogid,
								loc.xrecoff,
								XLogLocationToString(&loc1),
								loc1.xlogid,
								loc1.xrecoff,
								numBlocks),
							 FileRep_errcontext()));

					}

					/*
					 * It's safe and better to perform write of the page to mirror,
					 * for this case, as primary and mirror data pages should always
					 * be same. So, we might do some extra work but definitely won't
					 * loose out blocks, or error out and need to perform full recovery.
					 * Need to cover for this case as there are some known scenarios where
					 * CT file can have extra records which should have been discarded,
					 * but as we loose out information of xlog LSN cannot be discarded.
					 * One such case is when CT_TRANSIENT being compacted to CT_COMPACT
					 * with specific xlog LSN (to discard extra records) in CT mode gets
					 * interrupted by resync. Compaction during Resync collects all the
					 * CT records and doesn't have xlog LSN information to discard any
					 * extra records from CT_TRANSIENT.
					 */

					smgrwrite(smgr_relation,
							  result->entries[ii].block_num,
							  (char *)BufferGetBlock(buf),
							  FALSE);
				}

#ifdef FAULT_INJECTOR	
				FaultInjector_InjectFaultIfSet(
											   FileRepResyncWorker, 
											   DDLNotSpecified,
											   "",	// databaseName
											   ""); // tableName
#endif				
				
				UnlockReleaseBuffer(buf);
				
#ifdef FAULT_INJECTOR	
				FaultInjector_InjectFaultIfSet(
											   FileRepResyncWorker, 
											   DDLNotSpecified,
											   "",	// databaseName
											   ""); // tableName
#endif				
		
	flush_check:			
				if (((ii + 1) == result->count) ||
					! (result->entries[ii].relFileNode.spcNode == result->entries[ii+1].relFileNode.spcNode &&
					   result->entries[ii].relFileNode.dbNode == result->entries[ii+1].relFileNode.dbNode &&
					   result->entries[ii].relFileNode.relNode == result->entries[ii+1].relFileNode.relNode))
				{
					if (result->ask_for_more == false)
					{
								
						smgrimmedsync(smgr_relation);
						
						smgrclose(smgr_relation);
								 
						smgr_relation = NULL;
							
						FileRep_GetRelationPath(
												 entry.fileName, 
												 result->entries[ii].relFileNode, 
												 0 /* segment file number is always 0 for Buffer Pool */);							 
								 
						status = FileRepResync_UpdateEntry(&entry);
						if (status != STATUS_OK)
						{
							 break;
						}
					}
								 
				}			
							
				if (count > thresholdCount)
				{
					count = 0;
					FileRepSubProcess_ProcessSignals();
					
					if (! (FileRepSubProcess_GetState() == FileRepStateReady && 
						   dataState == DataStateInResync))
					{
						mirrorDataLossOccurred = TRUE;
						break;
					}
				}
				else
					count++;
			}  // for (ii = 0; ii < result->count; ii++)
			
		} // if ((result = ChangeTracking_GetChanges(request)) != NULL) 
		
		FileRepResync_ResetReadBufferRequest();
			
		if (result != NULL && result->ask_for_more == true)
		{
			Assert(request->count == 1);
			request->entries[0].lsn_start = result->next_start_lsn;
		}
		else
		{
			break;
		}

	} // while(1) 
		
	ChangeTracking_FreeRequest(request);
	ChangeTracking_FreeResult(result);
	
	Insist(NumberOfRelations == 0);
	
	if (mirrorDataLossOccurred)
		status = STATUS_ERROR;
	
	return status;	
}
예제 #3
0
static int
FileRepPrimary_ResyncWrite(FileRepResyncHashEntry_s	*entry)
{

	int				status = STATUS_OK;
	Page			page;
	Buffer			buf; 
	BlockNumber		numBlocks;
	BlockNumber		blkno;
	SMgrRelation	smgr_relation;
	char			relidstr[OIDCHARS + 1 + OIDCHARS + 1 + OIDCHARS + 1];
	XLogRecPtr		loc;
	int				count = 0;
	int				thresholdCount = 0;
	bool			mirrorDataLossOccurred = FALSE;
		
	switch (entry->relStorageMgr)
	{

		case PersistentFileSysRelStorageMgr_BufferPool:
			
			switch (entry->mirrorDataSynchronizationState)
			{
				case MirroredRelDataSynchronizationState_BufferPoolScanIncremental:
				case MirroredRelDataSynchronizationState_FullCopy:

					smgr_relation = smgropen(entry->relFileNode);
					
					numBlocks = smgrnblocks(smgr_relation);

					snprintf(relidstr, sizeof(relidstr), "%u/%u/%u",
							 smgr_relation->smgr_rnode.spcNode,
							 smgr_relation->smgr_rnode.dbNode,
							 smgr_relation->smgr_rnode.relNode);

					if (Debug_filerep_print)
						elog(LOG, "resync buffer pool relation '%s' number of blocks '%d' ",
							 relidstr, numBlocks);

					thresholdCount = Min(numBlocks, 1024);
					
					/* 
					 * required in order to report how many blocks were synchronized 
					 * if gp_persistent_relation_node does not return that information 
					 */
					if (entry->mirrorBufpoolResyncChangedPageCount == 0)
					{
						entry->mirrorBufpoolResyncChangedPageCount = numBlocks - entry->mirrorBufpoolResyncCkptBlockNum;
					}
					
					for (blkno = entry->mirrorBufpoolResyncCkptBlockNum; blkno < numBlocks; blkno++) 
					{
						XLogRecPtr	endResyncLSN = (isFullResync() ? 
													FileRepResync_GetEndFullResyncLSN() :
													FileRepResync_GetEndIncrResyncLSN());
#ifdef FAULT_INJECTOR
						FaultInjector_InjectFaultIfSet(
													   FileRepResyncWorkerRead,
													   DDLNotSpecified,
													   "",	//databaseName
													   ""); // tableName
#endif				
						
						FileRepResync_SetReadBufferRequest();
						buf = ReadBuffer_Resync(smgr_relation, blkno, relidstr);
						FileRepResync_ResetReadBufferRequest();
						
						LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
						page = BufferGetPage(buf);
						
						loc = PageGetLSN(page);
						
						if (Debug_filerep_print)
						{
							elog(LOG, 
									 "full resync buffer pool identifier '%s' num blocks '%d' blkno '%d' lsn begin change tracking '%s(%u/%u)' "
									 "lsn page '%s(%u/%u)' lsn end change tracking '%s(%u/%u)' ",
									 relidstr,
									 numBlocks,
									 blkno,
									 XLogLocationToString(&entry->mirrorBufpoolResyncCkptLoc),
									 entry->mirrorBufpoolResyncCkptLoc.xlogid,
									 entry->mirrorBufpoolResyncCkptLoc.xrecoff,
									 XLogLocationToString(&loc),
									 loc.xlogid,
									 loc.xrecoff,
									 XLogLocationToString(&endResyncLSN),
									 endResyncLSN.xlogid,
									 endResyncLSN.xrecoff);
						}
						else
						{
							char	tmpBuf[FILEREP_MAX_LOG_DESCRIPTION_LEN];
							
							snprintf(tmpBuf, sizeof(tmpBuf), 
									 "full resync buffer pool identifier '%s' num blocks '%d' blkno '%d' lsn begin change tracking '%s(%u/%u)' ",
									 relidstr,
									 numBlocks,
									 blkno,
									 XLogLocationToString(&entry->mirrorBufpoolResyncCkptLoc),
									 entry->mirrorBufpoolResyncCkptLoc.xlogid,
									 entry->mirrorBufpoolResyncCkptLoc.xrecoff);
														
							FileRep_InsertConfigLogEntry(tmpBuf);
							
							snprintf(tmpBuf, sizeof(tmpBuf), 
									 "full resync buffer pool identifier '%s' lsn page '%s(%u/%u)' lsn end change tracking '%s(%u/%u)' ",
									 relidstr,
									 XLogLocationToString(&loc),
									 loc.xlogid,
									 loc.xrecoff,
									 XLogLocationToString(&endResyncLSN),
									 endResyncLSN.xlogid,
									 endResyncLSN.xrecoff);
							
							FileRep_InsertConfigLogEntry(tmpBuf);
							
						}
						
						if (XLByteLE(PageGetLSN(page), endResyncLSN) &&
							XLByteLE(entry->mirrorBufpoolResyncCkptLoc, PageGetLSN(page))) 
						{
							smgrwrite(smgr_relation, 
									  blkno,
									  (char *)BufferGetBlock(buf),
									  FALSE);
						}
						
#ifdef FAULT_INJECTOR	
						FaultInjector_InjectFaultIfSet(
													   FileRepResyncWorker, 
													   DDLNotSpecified,
													   "",	// databaseName
													   ""); // tableName
#endif				
						
						UnlockReleaseBuffer(buf);
						
						if (count > thresholdCount)
						{
							count = 0;
							FileRepSubProcess_ProcessSignals();
							
							if (! (FileRepSubProcess_GetState() == FileRepStateReady && 
								   dataState == DataStateInResync))
							{
								mirrorDataLossOccurred = TRUE;
								break;
							}
						}
						else
							count++;
					}
						
					if (mirrorDataLossOccurred)
						break;

					if (entry->mirrorDataSynchronizationState != MirroredRelDataSynchronizationState_FullCopy)
					{
						LockRelationForResyncExtension(&smgr_relation->smgr_rnode, ExclusiveLock);
					
						numBlocks = smgrnblocks(smgr_relation);
					
						smgrtruncate(smgr_relation,
								 numBlocks,
								 TRUE /* isTemp, TRUE means to not record in XLOG */,
								 FALSE /* isLocalBuf */,
								 &entry->persistentTid,
								 entry->persistentSerialNum);
								 
						UnlockRelationForResyncExtension(&smgr_relation->smgr_rnode, ExclusiveLock);
					}
					
					smgrimmedsync(smgr_relation);
					smgrclose(smgr_relation);
					
					smgr_relation = NULL;
					break;
					
				case MirroredRelDataSynchronizationState_None:										
				case MirroredRelDataSynchronizationState_DataSynchronized:
					break;
					
				default:
					ereport(LOG, 
							(errmsg("could not resynchronize relation '%u/%u/%u' "
									"mirror synchronization state:'%s(%d)' ",
									entry->relFileNode.relNode,
									entry->relFileNode.spcNode,
									entry->relFileNode.dbNode,
									MirroredRelDataSynchronizationState_Name(entry->mirrorDataSynchronizationState),
									entry->mirrorDataSynchronizationState)));
					break;
			}
			break;
			
		case PersistentFileSysRelStorageMgr_AppendOnly:
		{
			MirroredAppendOnlyOpen	mirroredOpen;
			int						primaryError;
			bool					mirrorDataLossOccurred;
			char					*buffer = NULL;
			int64					endOffset = entry->mirrorAppendOnlyNewEof;
			int64					startOffset = entry->mirrorAppendOnlyLossEof;
			int32					bufferLen = 0;
			int						retval = 0;
			
			switch (entry->mirrorDataSynchronizationState)
			{
				case MirroredRelDataSynchronizationState_AppendOnlyCatchup:
				case MirroredRelDataSynchronizationState_FullCopy:
					
					/* 
					 * required in order to report how many blocks were synchronized 
					 * if gp_persistent_relation_node does not return that information 
					 */
					if (entry->mirrorBufpoolResyncChangedPageCount == 0)
					{
						entry->mirrorBufpoolResyncChangedPageCount = (endOffset - startOffset) / BLCKSZ;
					}					
					
					/*
					 * The MirroredAppendOnly_OpenResynchonize routine knows we are a resynch worker and
					 * will open BOTH, but write only the MIRROR!!!
					 */
					MirroredAppendOnly_OpenResynchonize(
											&mirroredOpen, 
											&entry->relFileNode,
											entry->segmentFileNum,
											startOffset,
											&primaryError,
											&mirrorDataLossOccurred);
					if (primaryError != 0)
					{
						ereport(ERROR,
								(errcode_for_file_access(),
								 errmsg("could not open file %u/%u/%u.%u : %s",
										entry->relFileNode.dbNode,
										entry->relFileNode.spcNode,
										entry->relFileNode.relNode,
										entry->segmentFileNum,
										strerror(primaryError))));
						
						break;
					}

					if (mirrorDataLossOccurred)
						break;
					
					/* AO and CO Data Store writes 64k size by default */
					bufferLen = (Size) Min(2*BLCKSZ, endOffset - startOffset);
					buffer = (char*) palloc(bufferLen);
					if (buffer == NULL)
						ereport(ERROR,
								(errcode(ERRCODE_OUT_OF_MEMORY),
								 (errmsg("not enough memory for resynchronization"))));
					
					MemSet(buffer, 0, bufferLen);
					
					while (startOffset < endOffset)
					{
						retval = MirroredAppendOnly_Read(
												&mirroredOpen,
												buffer,
												bufferLen);
						
						if (retval != bufferLen) 
						{
							ereport(ERROR,
									(errcode_for_file_access(),
									 errmsg("could not read from position:" INT64_FORMAT " in file %u/%u/%u.%u : %m",
											startOffset, 
											entry->relFileNode.dbNode,
											entry->relFileNode.spcNode,
											entry->relFileNode.relNode,
											entry->segmentFileNum)));
							
							break;
						}						
						
						MirroredAppendOnly_Append(
											  &mirroredOpen,
											  buffer,
											  bufferLen,
											  &primaryError,
											  &mirrorDataLossOccurred);
						
						if (mirrorDataLossOccurred)
							break;

						Assert(primaryError == 0);	// No primary writes as resync worker.
						
						startOffset += bufferLen;
						/* AO and CO Data Store writes 64k size by default */
						bufferLen = (Size) Min(2*BLCKSZ, endOffset - startOffset);						
					}
					
					if (buffer) 
					{
						pfree(buffer);
						buffer = NULL;
					}
					
					if (mirrorDataLossOccurred)
						break;
					
					/* Flush written data on Mirror */
					MirroredAppendOnly_Flush(
										&mirroredOpen,
										&primaryError,
										&mirrorDataLossOccurred);
					if (mirrorDataLossOccurred)
						break;
					
					Assert(primaryError == 0);	// Not flushed on primary as resync worker.
					
					/* Close Primary and Mirror */
					MirroredAppendOnly_Close(
										&mirroredOpen,
										&mirrorDataLossOccurred);
								
					break;
					
				case MirroredRelDataSynchronizationState_None:										
				case MirroredRelDataSynchronizationState_DataSynchronized:
					break;					
					
				default:
					ereport(LOG, 
							(errmsg("could not resynchronize relation '%u/%u/%u' "
									"mirror synchronization state:'%s(%d)' ",
									entry->relFileNode.relNode,
									entry->relFileNode.spcNode,
									entry->relFileNode.dbNode,
									MirroredRelDataSynchronizationState_Name(entry->mirrorDataSynchronizationState),
									entry->mirrorDataSynchronizationState)));
					break;
			}
			
			break;
		}	//case
		default:
			Assert(0);
			break;
	} //switch
	
	if (mirrorDataLossOccurred)
		status = STATUS_ERROR;
	
	return status;
}
예제 #4
0
static void
WalSendServerDoRequest(WalSendRequest *walSendRequest)
{
	bool successful;
	struct timeval standbyTimeout;

	WalSendServerGetStandbyTimeout(&standbyTimeout);
	
	switch (walSendRequest->command)
	{
	case PositionToEnd:
		elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "PositionToEnd");

		successful = write_position_to_end(&originalEndLocation,
			                               NULL, &walsend_shutdown_requested);
		if (successful)
			elog(LOG,"Standby master returned transaction log end location %s",
				 XLogLocationToString(&originalEndLocation));
		else
		{
			disableQDMirroring_ConnectionError(
				"Unable to connect to standby master and determine transaction log end location",
				GetStandbyErrorString());
			disconnectMirrorQD_SendClose();
		}
		break;
		
	case Catchup:
		elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "Catchup");

        if (isQDMirroringCatchingUp())
    	{
    		bool tooFarBehind = false;

			elog(LOG,"Current master transaction log is flushed through location %s",
				 XLogLocationToString(&walSendRequest->flushedLocation));
			
			if (XLByteLT(originalEndLocation, walSendRequest->flushedLocation))
			{
				/*
				 * Standby master is behind the primary.  Send catchup WAL.
				 */
				 
				/* 
				 * Use a TRY block to catch errors from our attempt to read
				 * the primary's WAL.  Errors from sending to the standby
				 * come up as a boolean return (successful).
				 */
				PG_TRY();
				{
					successful = XLogCatchupQDMirror(
									&originalEndLocation, 
									&walSendRequest->flushedLocation,
									&standbyTimeout,
									&walsend_shutdown_requested);
				}
				PG_CATCH();
				{
					/* 
					 * Report the error related to reading the primary's WAL
					 * to the server log 
					 */
					 
					/* 
					 * But first demote the error to something much less
					 * scary.
					 */
				    if (!elog_demote(WARNING))
			    	{
			    		elog(LOG,"unable to demote error");
			        	PG_RE_THROW();
			    	}
					
					EmitErrorReport();
					FlushErrorState();

					successful = false;
					tooFarBehind = true;
				}
				PG_END_TRY();
				
				if (successful)
				{
					elog((Debug_print_qd_mirroring ? LOG : DEBUG5),
						 "catchup send from standby end %s through primary flushed location %s",
						 XLogLocationToString(&originalEndLocation),
						 XLogLocationToString2(&walSendRequest->flushedLocation));
				}

			}
			else if (XLByteEQ(originalEndLocation, walSendRequest->flushedLocation))
			{
				elog((Debug_print_qd_mirroring ? LOG : DEBUG5),"Mirror was already caught up");
				successful = true;
			}
			else
			{
				elog(WARNING,"Standby master transaction log location %s is beyond the current master end location %s",
				     XLogLocationToString(&originalEndLocation),
				     XLogLocationToString2(&walSendRequest->flushedLocation));
				successful = false;
			}
			
			if (successful)
			{
				char detail[200];
				int count;
				
				count = snprintf(
							 detail, sizeof(detail),
							 "Transaction log copied from locations %s through %s to the standby master",
						     XLogLocationToString(&originalEndLocation),
						     XLogLocationToString2(&walSendRequest->flushedLocation));
				if (count >= sizeof(detail))
				{
					ereport(ERROR,
							(errcode(ERRCODE_INTERNAL_ERROR),
							 errmsg("format command string failure")));
				}

				enableQDMirroring("Master mirroring is now synchronized", detail);

				currentEndLocation = walSendRequest->flushedLocation;

				periodicLen = 0;
				periodicLocation = currentEndLocation;
			}
			else
			{
				if (tooFarBehind)
				{
					disableQDMirroring_TooFarBehind(
						"The current master was unable to synchronize the standby master "
						"because the transaction logs on the current master were recycled.  "
						"A gpinitstandby (at an appropriate time) will be necessary to copy "
						"over the whole master database to the standby master so it may be synchronized");
				}
				else
				{
					disableQDMirroring_ConnectionError(
						"Connection to the standby master was lost during transaction log catchup",
						GetStandbyErrorString());
				}
				disconnectMirrorQD_SendClose();
			}
		}
		else if (isQDMirroringDisabled())
		{
			elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "Master Mirror Send: Master mirroring not catching-up (state is disabled)");
		}
		else
		{
			elog(ERROR,"unexpected master mirroring state %s",
				 QDMirroringStateString());
		}
		
		break;
		
	case WriteWalPages:
		if (Debug_print_qd_mirroring)
			elog(LOG, "WriteWalPages");
		
        if (isQDMirroringEnabled())
        {
			char	   *from;
			Size		nbytes;
			bool		more= false;

			/*
			 * For now, save copy of data until flush.  This could be
			 * optimized.
			 */
			if (saveBuffer == NULL)
			{
				uint32 totalBufferLen = XLOGbuffers * XLOG_BLCKSZ;
				
				saveBuffer = malloc(totalBufferLen);
				if (saveBuffer == NULL)
					elog(ERROR,"Could not allocate buffer for xlog data (%d bytes)",
					     totalBufferLen);
				
				saveBufferLen = 0;
			}

			XLogGetBuffer(walSendRequest->startidx, walSendRequest->npages,
				          &from, &nbytes);

			if (saveBufferLen == 0)
			{
				more = false;
				writeLogId = walSendRequest->logId;
				writeLogSeg = walSendRequest->logSeg;
				writeLogOff = walSendRequest->logOff;

				memcpy(saveBuffer, from, nbytes);
				saveBufferLen = nbytes;
			}
			else
			{
				more = true;
				memcpy(&saveBuffer[saveBufferLen], from, nbytes);
				saveBufferLen += nbytes;
			}
			
			if (Debug_print_qd_mirroring)
				elog(LOG,
					 "Master Mirror Send: WriteWalPages (%s) startidx %d, npages %d, timeLineID %d, logId %u, logSeg %u, logOff 0x%X, nbytes 0x%X",
					 (more ? "more" : "new"),
					 walSendRequest->startidx,
					 walSendRequest->npages,
					 walSendRequest->timeLineID,
					 walSendRequest->logId,
					 walSendRequest->logSeg,
					 walSendRequest->logOff,
					 (int)nbytes);
    	}

	case FlushWalPages:
		if (Debug_print_qd_mirroring)
			elog(LOG, "FlushWalPages");
		
        if (isQDMirroringEnabled())
        {
			char 		cmd[MAXFNAMELEN + 50];

			if (saveBufferLen == 0)
				successful = true;
			else
			{
				if (snprintf(cmd, sizeof(cmd),"xlog %d %d %d %d", 
							 writeLogId, writeLogSeg, writeLogOff, 
							 (int)saveBufferLen) >= sizeof(cmd))
					elog(ERROR,"could not create cmd for qd mirror logid %d seg %d", 
					     writeLogId, writeLogSeg);
				
				successful = write_qd_sync(cmd, saveBuffer, saveBufferLen, 
							               &standbyTimeout,
							               &walsend_shutdown_requested);
				if (successful)
				{
					XLogRecPtr oldEndLocation;
					
					oldEndLocation = currentEndLocation;

					currentEndLocation.xlogid = writeLogId;
					currentEndLocation.xrecoff = writeLogSeg * XLogSegSize + writeLogOff;
					if (currentEndLocation.xrecoff >= XLogFileSize)
					{
						(currentEndLocation.xlogid)++;
						currentEndLocation.xrecoff = 0;
					}

					if (XLByteLT(oldEndLocation,currentEndLocation))
					{
						periodicLen += saveBufferLen;
						if (periodicLen > periodicReportLen)
						{
							elog(LOG,
								 "Master mirroring periodic report: %d bytes successfully send to standby master for locations %s through %s",
								 periodicLen,
								 XLogLocationToString(&periodicLocation),
								 XLogLocationToString2(&currentEndLocation));

							periodicLen = 0;
							periodicLocation = currentEndLocation;
						}
					}
					else
					{
						if (Debug_print_qd_mirroring)
							elog(LOG,
							     "Send to Master mirror successful.  New end location %s (old %s)",
							     XLogLocationToString(&currentEndLocation),
							     XLogLocationToString2(&oldEndLocation));
					}
				}
				else
				{
					disableQDMirroring_ConnectionError(
						"Connection to the standby master was lost attempting to send new transaction log",
						GetStandbyErrorString());
					disconnectMirrorQD_SendClose();
				}

				/*
				 * Reset so WriteWalPages can fill the buffer again.
				 */
				saveBufferLen = 0;
				writeLogId = 0;
				writeLogSeg = 0;
				writeLogOff = 0;
			}
			
			if (successful && walSendRequest->haveNewCheckpointLocation)
			{
				uint32 logid;
				uint32 seg;
				uint32 offset;
				
	        	elog((Debug_print_qd_mirroring ? LOG : DEBUG5),"New previous checkpoint location %s",
				     XLogLocationToString(&walSendRequest->newCheckpointLocation));
				XLByteToSeg(walSendRequest->newCheckpointLocation, logid, seg);
				offset = walSendRequest->newCheckpointLocation.xrecoff % XLogSegSize;
				
				if (snprintf(cmd, sizeof(cmd),"new_checkpoint_location %d %d %d", 
							 logid, seg, offset) >= sizeof(cmd))
					elog(ERROR,"could not create cmd for qd mirror logid %d seg %d offset %d", 
					     logid, seg, offset);
				
				successful = write_qd_sync(cmd, NULL, 0, 
					                       NULL, &walsend_shutdown_requested);
				if (successful)
				{
					elog((Debug_print_qd_mirroring ? LOG : DEBUG5),"Send of new checkpoint location to master mirror successful");
				}
				else
				{
					disableQDMirroring_ConnectionError(
						"Connection to the standby master was lost attempting to send new checkpoint location",
						GetStandbyErrorString());
					disconnectMirrorQD_SendClose();
				}
			}
			
        }
		else if (isQDMirroringDisabled())
		{
			elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "Master Mirror Send: Master mirroring not enabled");
		}
		else
		{
			elog(ERROR,"unexpected master mirroring state %s",
				 QDMirroringStateString());
		}
		
		break;

	case CloseForShutdown:
		if (Debug_print_qd_mirroring)
			elog(LOG, "CloseForShutdown");

		/*
		 * Do the work we would normally do when signaled to stop.
		 */
		WalSendServer_ServiceShutdown();
		break;

	default:
		elog(ERROR, "Unknown WalSendRequestCommand %d", walSendRequest->command);
	}

}
bool ChangeTracking_PrintRelationChangeInfo(
									  RmgrId	xl_rmid,
									  uint8		xl_info,
									  void		*data, 
									  XLogRecPtr *loc,
									  bool		weAreGeneratingXLogNow,
									  bool		printSkipIssuesOnly)
{
	bool				atLeastOneSkipIssue = false;
	int					relationChangeInfoArrayCount;
	int					i;
	int					arrlen = ChangeTracking_GetInfoArrayDesiredMaxLength(xl_rmid, xl_info);
	RelationChangeInfo 	relationChangeInfoArray[arrlen];
	
	ChangeTracking_GetRelationChangeInfoFromXlog(
										  xl_rmid,
										  xl_info,
										  data,
										  relationChangeInfoArray,
										  &relationChangeInfoArrayCount,
										  arrlen);

	for (i = 0; i < relationChangeInfoArrayCount; i++)
	{
		RelationChangeInfo	*relationChangeInfo;
		int64				maxPersistentSerialNum;
		bool				skip;
		bool				zeroTid = false;
		bool				invalidTid = false;
		bool				zeroSerialNum = false;
		bool				invalidSerialNum = false;
		bool				skipIssue = false;

		relationChangeInfo = &relationChangeInfoArray[i];

		if (weAreGeneratingXLogNow)
			maxPersistentSerialNum = PersistentRelfile_MyHighestSerialNum();
		else
			maxPersistentSerialNum = PersistentRelfile_CurrentMaxSerialNum();

		skip = GpPersistent_SkipXLogInfo(relationChangeInfo->relFileNode.relNode);
		if (!skip)
		{
			zeroTid = PersistentStore_IsZeroTid(&relationChangeInfo->persistentTid);
			if (!zeroTid)
				invalidTid = !ItemPointerIsValid(&relationChangeInfo->persistentTid);
			zeroSerialNum = (relationChangeInfo->persistentSerialNum == 0);
			if (!zeroSerialNum)
			{
				invalidSerialNum = (relationChangeInfo->persistentSerialNum < 0);

				/*
				 * If we have'nt done the scan yet... do not do upper range check.
				 */
				if (maxPersistentSerialNum != 0 &&
					relationChangeInfo->persistentSerialNum > maxPersistentSerialNum)
					invalidSerialNum = true;
			}
			skipIssue = (zeroTid || invalidTid || zeroSerialNum || invalidSerialNum);
		}

		if (!printSkipIssuesOnly || skipIssue)
			elog(LOG, 
				 "ChangeTracking_PrintRelationChangeInfo: [%d] xl_rmid %d, xl_info 0x%X, %u/%u/%u, block number %u, LSN %s, persistent serial num " INT64_FORMAT ", TID %s, maxPersistentSerialNum " INT64_FORMAT ", skip %s, zeroTid %s, invalidTid %s, zeroSerialNum %s, invalidSerialNum %s, skipIssue %s",
				i,
				xl_rmid,
				xl_info,
				relationChangeInfo->relFileNode.spcNode,
				relationChangeInfo->relFileNode.dbNode,
				relationChangeInfo->relFileNode.relNode,
				relationChangeInfo->blockNumber,
				XLogLocationToString(loc),
				relationChangeInfo->persistentSerialNum,
				ItemPointerToString(&relationChangeInfo->persistentTid),
				maxPersistentSerialNum,
				(skip ? "true" : "false"),
				(zeroTid ? "true" : "false"),
				(invalidTid ? "true" : "false"),
				(zeroSerialNum ? "true" : "false"),
				(invalidSerialNum ? "true" : "false"),
				(skipIssue ? "true" : "false"));

		if (skipIssue)
			atLeastOneSkipIssue = true;
	}

	return atLeastOneSkipIssue;
}
예제 #6
0
/*
 * cdb_sync_xlog - process xlog sync
 */
static void
cdb_sync_xlog(void)
{
	uint32 currentBlockOffset;
	XLogRecPtr writeLoc;

	elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "QDSYNC: write logid %d seg %d woffset 0x%X, wlen 0x%X",
		 wlogid, wseg, woffset, wlen);
	if (woffset % XLOG_BLCKSZ != 0)
	{
		elog(ERROR,"QDSYNC: not on block boundaries 0x%X",
			 woffset);
	}

	if (wlogid != xlogid || wseg != xseg)
	{
		elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "QDSYNC: closing previous file %s",
		     xlogfilename);
		if (xlogfilefd >= 0)
		{
			close(xlogfilefd);
			xlogfilefd = -1;
			xlogfileoffset = -1;
		}
		xlogid = wlogid;
		xseg = wseg;
		openXlogNextFile();

		/*
		 * Assume caller knows where to write.
		 */
		xlogfileoffset = woffset;
	}
	
	/* 
	 * Validate we are appending or overwritting previous block
	 */
	currentBlockOffset = (xlogfileoffset / XLOG_BLCKSZ) * XLOG_BLCKSZ;
	if (woffset != currentBlockOffset && 
		woffset + XLOG_BLCKSZ != currentBlockOffset)
	{
		elog(ERROR,"QDSYNC: not appending to end (primary: 0x%X, standby: 0x%X)",
			   woffset, xlogfileoffset);
	}
	
	/*
	 * no validation checking on xlog. xlog sync is by block and may repeat
	 * the same block, so we do not have any way to check it now. we will rely
	 * on tmlog checking for now.
	 */
	ensureBufferSize();
	readLogMessage(buf, wlen);
	if ((wlen / XLOG_BLCKSZ) * XLOG_BLCKSZ != wlen)
	{	
		int roundedUp;
		int padLen;
		
		/*
		 * Pad buffer out with zeros.
		 */
		roundedUp = ((wlen + XLOG_BLCKSZ - 1) / XLOG_BLCKSZ) * XLOG_BLCKSZ;
		Assert(buflen >= roundedUp);
		padLen = roundedUp - wlen;
		
		elog((Debug_print_qd_mirroring ? LOG : DEBUG5), 
			 "QDSYNC: padding buffer with %d zeros (wlen %d, roundedUp %d)",
			 padLen, wlen, roundedUp);
		memset(&((char*) buf)[wlen], 0, padLen); 
		
		wlen = roundedUp;
	}

	writeLoc.xlogid = wlogid;
	writeLoc.xrecoff = wseg * XLogSegSize + 
					   woffset;
	syncWriteLog(xlogfilefd, buf, woffset, wlen);
	elog((Debug_print_qd_mirroring ? LOG : DEBUG5), 
		 "QDSYNC: wrote location %s len 0x%X", 
		 XLogLocationToString(&writeLoc),
		 wlen);

	xlogfileoffset = woffset + wlen;
}
예제 #7
0
void
cdb_perform_redo(XLogRecPtr *redoCheckPointLoc, CheckPoint *redoCheckPoint, XLogRecPtr *newCheckpointLoc)
{
	CheckPoint oldRedoCheckpoint;
	uint32 logid;
	uint32 seg;
	int nsegsremoved;
	
	if (redoCheckPointLoc->xlogid == 0 && redoCheckPointLoc->xrecoff == 0)
	{
		XLogGetRecoveryStart("QDSYNC", "for redo apply", redoCheckPointLoc, redoCheckPoint);
	}
	
	XLogStandbyRecoverRange(redoCheckPointLoc, redoCheckPoint, newCheckpointLoc);

	/*
	 * Sample the recovery start location now to see if appling redo
	 * processed checkpoint records and moved the restart location forward.
	 */
	oldRedoCheckpoint = *redoCheckPoint;

	XLogGetRecoveryStart("QDSYNC", "for redo progress check", redoCheckPointLoc, redoCheckPoint);

	if (XLByteLT(oldRedoCheckpoint.redo,redoCheckPoint->redo))
	{
		ereport(LOG,
		 (errmsg("QDSYNC: transaction redo moved the restart location from %s to %s",
			     XLogLocationToString(&oldRedoCheckpoint.redo),
			     XLogLocationToString2(&redoCheckPoint->redo))));
	}
	else
	{
		Assert(XLByteEQ(oldRedoCheckpoint.redo,redoCheckPoint->redo));
		ereport(LOG,
		 (errmsg("QDSYNC: transaction redo did not move the restart location %s forward this pass",
			     XLogLocationToString(&oldRedoCheckpoint.redo))));
		return;
	}

	XLByteToSeg(redoCheckPoint->redo, logid, seg);
	
	/*
	 * Delete offline log files (those no longer needed even for previous
	 * checkpoint).
	 */
	elog((Debug_print_qd_mirroring ? LOG : DEBUG5),
	     "QDSYNC: keep log files as far back as (logid %d, seg %d)",
		 logid, seg);

	if (logid || seg)
	{
		PrevLogSeg(logid, seg);
		elog((Debug_print_qd_mirroring ? LOG : DEBUG5),
			 "QDSYNC: delete offline log files up to (logid %d, seg %d)",
			 logid, seg);
		
		XLogRemoveStandbyLogs(logid, seg, &nsegsremoved);

		if (nsegsremoved > 0)
		{
		// Throw in extra new line to make log more readable.
			ereport(LOG,
			 (errmsg("QDSYNC: %d logs removed through logid %d, seg %d\n",
				     nsegsremoved,
				     logid, seg)));
		}

	}
	// Throw in extra new line to make log more readable.
	elog(LOG,"--------------------------");
}
예제 #8
0
static void PersistentStore_DiagnoseDumpTable(
	PersistentStoreData 		*storeData,
	PersistentStoreSharedData 	*storeSharedData)
{

	if (disable_persistent_diagnostic_dump)
	{
		return;
	}

	MIRROREDLOCK_BUFMGR_DECLARE;

	PersistentStoreScan storeScan;
	ItemPointerData			persistentTid;
	int64					persistentSerialNum;
	Datum					*values;
	BlockNumber				lastDisplayedBlockNum;
	bool					displayedOne;
	BlockNumber				currentBlockNum;

	elog(LOG, 
		 "Diagnostic dump of persistent table ('%s'): maximum in-use serial number " INT64_FORMAT ", maximum free order number " INT64_FORMAT ", free TID %s, maximum known TID %s",
		 storeData->tableName,
		 storeSharedData->maxInUseSerialNum, 
		 storeSharedData->maxFreeOrderNum, 
		 ItemPointerToString(&storeSharedData->freeTid),
		 ItemPointerToString2(&storeSharedData->maxTid));

	values = (Datum*)palloc(storeData->numAttributes * sizeof(Datum));

	PersistentStore_BeginScan(
						storeData,
						storeSharedData,
						&storeScan);

	lastDisplayedBlockNum = 0;
	displayedOne = false;
	while (PersistentStore_GetNext(
							&storeScan,
							values,
							&persistentTid,
							&persistentSerialNum))
	{
		/*
		 * Use the BlockIdGetBlockNumber routine because ItemPointerGetBlockNumber 
		 * asserts for valid TID.
		 */
		currentBlockNum = BlockIdGetBlockNumber(&persistentTid.ip_blkid);
		if (!displayedOne || currentBlockNum != lastDisplayedBlockNum)
		{
			Buffer		buffer;
			PageHeader	page;
			XLogRecPtr	lsn;

			/*
			 * Fetch the block and display the LSN.
			 */
			
			// -------- MirroredLock ----------
			MIRROREDLOCK_BUFMGR_LOCK;
			
			buffer = ReadBuffer(
							storeScan.persistentRel,
							currentBlockNum);

			page = (PageHeader) BufferGetPage(buffer);
			lsn = PageGetLSN(page);
			ReleaseBuffer(buffer);

			MIRROREDLOCK_BUFMGR_UNLOCK;
			// -------- MirroredLock ----------

			elog(LOG, "Diagnostic LSN %s of page %u",
				 XLogLocationToString(&lsn),
				 currentBlockNum);

			lastDisplayedBlockNum = currentBlockNum;
			displayedOne = true;
		}

		/*
		 * Display the persistent tuple.
		 */
		(*storeData->printTupleCallback)(
									LOG,
									"DIAGNOSE",
									&persistentTid,
									values);
	}
	
	PersistentStore_EndScan(&storeScan);

	pfree(values);
}