Exemple #1
0
Fichier : aomd.c Projet : 50wu/gpdb
/*
 * Close an Append Only relation file segment
 */
void
CloseAOSegmentFile(MirroredAppendOnlyOpen *mirroredOpen)
{
	bool mirrorDataLossOccurred;	// UNDONE: We need to do something now...

	Assert(mirroredOpen->primaryFile > 0);
	
	MirroredAppendOnly_Close(
						mirroredOpen,
						&mirrorDataLossOccurred);
}
static int
FileRepPrimary_ResyncWrite(FileRepResyncHashEntry_s	*entry)
{

	int				status = STATUS_OK;
	Page			page;
	Buffer			buf; 
	BlockNumber		numBlocks;
	BlockNumber		blkno;
	SMgrRelation	smgr_relation;
	char			relidstr[OIDCHARS + 1 + OIDCHARS + 1 + OIDCHARS + 1];
	XLogRecPtr		loc;
	int				count = 0;
	int				thresholdCount = 0;
	bool			mirrorDataLossOccurred = FALSE;
		
	switch (entry->relStorageMgr)
	{

		case PersistentFileSysRelStorageMgr_BufferPool:
			
			switch (entry->mirrorDataSynchronizationState)
			{
				case MirroredRelDataSynchronizationState_BufferPoolScanIncremental:
				case MirroredRelDataSynchronizationState_FullCopy:

					smgr_relation = smgropen(entry->relFileNode);
					
					numBlocks = smgrnblocks(smgr_relation);

					snprintf(relidstr, sizeof(relidstr), "%u/%u/%u",
							 smgr_relation->smgr_rnode.spcNode,
							 smgr_relation->smgr_rnode.dbNode,
							 smgr_relation->smgr_rnode.relNode);

					if (Debug_filerep_print)
						elog(LOG, "resync buffer pool relation '%s' number of blocks '%d' ",
							 relidstr, numBlocks);

					thresholdCount = Min(numBlocks, 1024);
					
					/* 
					 * required in order to report how many blocks were synchronized 
					 * if gp_persistent_relation_node does not return that information 
					 */
					if (entry->mirrorBufpoolResyncChangedPageCount == 0)
					{
						entry->mirrorBufpoolResyncChangedPageCount = numBlocks - entry->mirrorBufpoolResyncCkptBlockNum;
					}
					
					for (blkno = entry->mirrorBufpoolResyncCkptBlockNum; blkno < numBlocks; blkno++) 
					{
						XLogRecPtr	endResyncLSN = (isFullResync() ? 
													FileRepResync_GetEndFullResyncLSN() :
													FileRepResync_GetEndIncrResyncLSN());
#ifdef FAULT_INJECTOR
						FaultInjector_InjectFaultIfSet(
													   FileRepResyncWorkerRead,
													   DDLNotSpecified,
													   "",	//databaseName
													   ""); // tableName
#endif				
						
						FileRepResync_SetReadBufferRequest();
						buf = ReadBuffer_Resync(smgr_relation, blkno, relidstr);
						FileRepResync_ResetReadBufferRequest();
						
						LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
						page = BufferGetPage(buf);
						
						loc = PageGetLSN(page);
						
						if (Debug_filerep_print)
						{
							elog(LOG, 
									 "full resync buffer pool identifier '%s' num blocks '%d' blkno '%d' lsn begin change tracking '%s(%u/%u)' "
									 "lsn page '%s(%u/%u)' lsn end change tracking '%s(%u/%u)' ",
									 relidstr,
									 numBlocks,
									 blkno,
									 XLogLocationToString(&entry->mirrorBufpoolResyncCkptLoc),
									 entry->mirrorBufpoolResyncCkptLoc.xlogid,
									 entry->mirrorBufpoolResyncCkptLoc.xrecoff,
									 XLogLocationToString(&loc),
									 loc.xlogid,
									 loc.xrecoff,
									 XLogLocationToString(&endResyncLSN),
									 endResyncLSN.xlogid,
									 endResyncLSN.xrecoff);
						}
						else
						{
							char	tmpBuf[FILEREP_MAX_LOG_DESCRIPTION_LEN];
							
							snprintf(tmpBuf, sizeof(tmpBuf), 
									 "full resync buffer pool identifier '%s' num blocks '%d' blkno '%d' lsn begin change tracking '%s(%u/%u)' ",
									 relidstr,
									 numBlocks,
									 blkno,
									 XLogLocationToString(&entry->mirrorBufpoolResyncCkptLoc),
									 entry->mirrorBufpoolResyncCkptLoc.xlogid,
									 entry->mirrorBufpoolResyncCkptLoc.xrecoff);
														
							FileRep_InsertConfigLogEntry(tmpBuf);
							
							snprintf(tmpBuf, sizeof(tmpBuf), 
									 "full resync buffer pool identifier '%s' lsn page '%s(%u/%u)' lsn end change tracking '%s(%u/%u)' ",
									 relidstr,
									 XLogLocationToString(&loc),
									 loc.xlogid,
									 loc.xrecoff,
									 XLogLocationToString(&endResyncLSN),
									 endResyncLSN.xlogid,
									 endResyncLSN.xrecoff);
							
							FileRep_InsertConfigLogEntry(tmpBuf);
							
						}
						
						if (XLByteLE(PageGetLSN(page), endResyncLSN) &&
							XLByteLE(entry->mirrorBufpoolResyncCkptLoc, PageGetLSN(page))) 
						{
							smgrwrite(smgr_relation, 
									  blkno,
									  (char *)BufferGetBlock(buf),
									  FALSE);
						}
						
#ifdef FAULT_INJECTOR	
						FaultInjector_InjectFaultIfSet(
													   FileRepResyncWorker, 
													   DDLNotSpecified,
													   "",	// databaseName
													   ""); // tableName
#endif				
						
						UnlockReleaseBuffer(buf);
						
						if (count > thresholdCount)
						{
							count = 0;
							FileRepSubProcess_ProcessSignals();
							
							if (! (FileRepSubProcess_GetState() == FileRepStateReady && 
								   dataState == DataStateInResync))
							{
								mirrorDataLossOccurred = TRUE;
								break;
							}
						}
						else
							count++;
					}
						
					if (mirrorDataLossOccurred)
						break;

					if (entry->mirrorDataSynchronizationState != MirroredRelDataSynchronizationState_FullCopy)
					{
						LockRelationForResyncExtension(&smgr_relation->smgr_rnode, ExclusiveLock);
					
						numBlocks = smgrnblocks(smgr_relation);
					
						smgrtruncate(smgr_relation,
								 numBlocks,
								 TRUE /* isTemp, TRUE means to not record in XLOG */,
								 FALSE /* isLocalBuf */,
								 &entry->persistentTid,
								 entry->persistentSerialNum);
								 
						UnlockRelationForResyncExtension(&smgr_relation->smgr_rnode, ExclusiveLock);
					}
					
					smgrimmedsync(smgr_relation);
					smgrclose(smgr_relation);
					
					smgr_relation = NULL;
					break;
					
				case MirroredRelDataSynchronizationState_None:										
				case MirroredRelDataSynchronizationState_DataSynchronized:
					break;
					
				default:
					ereport(LOG, 
							(errmsg("could not resynchronize relation '%u/%u/%u' "
									"mirror synchronization state:'%s(%d)' ",
									entry->relFileNode.relNode,
									entry->relFileNode.spcNode,
									entry->relFileNode.dbNode,
									MirroredRelDataSynchronizationState_Name(entry->mirrorDataSynchronizationState),
									entry->mirrorDataSynchronizationState)));
					break;
			}
			break;
			
		case PersistentFileSysRelStorageMgr_AppendOnly:
		{
			MirroredAppendOnlyOpen	mirroredOpen;
			int						primaryError;
			bool					mirrorDataLossOccurred;
			char					*buffer = NULL;
			int64					endOffset = entry->mirrorAppendOnlyNewEof;
			int64					startOffset = entry->mirrorAppendOnlyLossEof;
			int32					bufferLen = 0;
			int						retval = 0;
			
			switch (entry->mirrorDataSynchronizationState)
			{
				case MirroredRelDataSynchronizationState_AppendOnlyCatchup:
				case MirroredRelDataSynchronizationState_FullCopy:
					
					/* 
					 * required in order to report how many blocks were synchronized 
					 * if gp_persistent_relation_node does not return that information 
					 */
					if (entry->mirrorBufpoolResyncChangedPageCount == 0)
					{
						entry->mirrorBufpoolResyncChangedPageCount = (endOffset - startOffset) / BLCKSZ;
					}					
					
					/*
					 * The MirroredAppendOnly_OpenResynchonize routine knows we are a resynch worker and
					 * will open BOTH, but write only the MIRROR!!!
					 */
					MirroredAppendOnly_OpenResynchonize(
											&mirroredOpen, 
											&entry->relFileNode,
											entry->segmentFileNum,
											startOffset,
											&primaryError,
											&mirrorDataLossOccurred);
					if (primaryError != 0)
					{
						ereport(ERROR,
								(errcode_for_file_access(),
								 errmsg("could not open file %u/%u/%u.%u : %s",
										entry->relFileNode.dbNode,
										entry->relFileNode.spcNode,
										entry->relFileNode.relNode,
										entry->segmentFileNum,
										strerror(primaryError))));
						
						break;
					}

					if (mirrorDataLossOccurred)
						break;
					
					/* AO and CO Data Store writes 64k size by default */
					bufferLen = (Size) Min(2*BLCKSZ, endOffset - startOffset);
					buffer = (char*) palloc(bufferLen);
					if (buffer == NULL)
						ereport(ERROR,
								(errcode(ERRCODE_OUT_OF_MEMORY),
								 (errmsg("not enough memory for resynchronization"))));
					
					MemSet(buffer, 0, bufferLen);
					
					while (startOffset < endOffset)
					{
						retval = MirroredAppendOnly_Read(
												&mirroredOpen,
												buffer,
												bufferLen);
						
						if (retval != bufferLen) 
						{
							ereport(ERROR,
									(errcode_for_file_access(),
									 errmsg("could not read from position:" INT64_FORMAT " in file %u/%u/%u.%u : %m",
											startOffset, 
											entry->relFileNode.dbNode,
											entry->relFileNode.spcNode,
											entry->relFileNode.relNode,
											entry->segmentFileNum)));
							
							break;
						}						
						
						MirroredAppendOnly_Append(
											  &mirroredOpen,
											  buffer,
											  bufferLen,
											  &primaryError,
											  &mirrorDataLossOccurred);
						
						if (mirrorDataLossOccurred)
							break;

						Assert(primaryError == 0);	// No primary writes as resync worker.
						
						startOffset += bufferLen;
						/* AO and CO Data Store writes 64k size by default */
						bufferLen = (Size) Min(2*BLCKSZ, endOffset - startOffset);						
					}
					
					if (buffer) 
					{
						pfree(buffer);
						buffer = NULL;
					}
					
					if (mirrorDataLossOccurred)
						break;
					
					/* Flush written data on Mirror */
					MirroredAppendOnly_Flush(
										&mirroredOpen,
										&primaryError,
										&mirrorDataLossOccurred);
					if (mirrorDataLossOccurred)
						break;
					
					Assert(primaryError == 0);	// Not flushed on primary as resync worker.
					
					/* Close Primary and Mirror */
					MirroredAppendOnly_Close(
										&mirroredOpen,
										&mirrorDataLossOccurred);
								
					break;
					
				case MirroredRelDataSynchronizationState_None:										
				case MirroredRelDataSynchronizationState_DataSynchronized:
					break;					
					
				default:
					ereport(LOG, 
							(errmsg("could not resynchronize relation '%u/%u/%u' "
									"mirror synchronization state:'%s(%d)' ",
									entry->relFileNode.relNode,
									entry->relFileNode.spcNode,
									entry->relFileNode.dbNode,
									MirroredRelDataSynchronizationState_Name(entry->mirrorDataSynchronizationState),
									entry->mirrorDataSynchronizationState)));
					break;
			}
			
			break;
		}	//case
		default:
			Assert(0);
			break;
	} //switch
	
	if (mirrorDataLossOccurred)
		status = STATUS_ERROR;
	
	return status;
}
Exemple #3
0
/*
 * Opens the next segment file to write.  The file must already exist.
 * This routine is responsible for seeking to the proper write location
 * given the logical EOF.
 *
 * @filePathName:	The name of the segment file to open.
 * @logicalEof:		The last committed write transaction's EOF
 * 					value to use as the end of the segment file.
 * @parquet_file		The file handler of segment file
 */
static void OpenSegmentFile(
		MirroredAppendOnlyOpen *mirroredOpen,
		char *filePathName,
		int64 logicalEof,
		RelFileNode *relFileNode,
		int32 segmentFileNum,
		char *relname,
		File *parquet_file,
		File *parquet_file_previous,
		CompactProtocol **protocol_read,
		TupleDesc tableAttrs,
		ParquetMetadata *parquetMetadata,
		int64 *fileLen,
		int64 *fileLen_uncompressed,
		int *previous_rowgroupcnt) {
	int primaryError;

	File file;

	int64 seekResult;

	Assert(filePathName != NULL);

	bool metadataExist = false;

	/*
	 * Open the file for metadata reading.
	 */
	MirroredAppendOnly_OpenReadWrite(mirroredOpen, relFileNode, segmentFileNum,
			relname, logicalEof, true, &primaryError);
	if (primaryError != 0)
		ereport(ERROR,
				(errcode_for_file_access(),
						errmsg("file open error when opening file "
								"'%s' for relation '%s': %s", filePathName, relname,
								strerror(primaryError))));

	*parquet_file_previous = mirroredOpen->primaryFile;

	int64 fileSize = FileSeek(*parquet_file_previous, 0, SEEK_END);
	if (fileSize < 0){
		ereport(ERROR,
				(errcode_for_file_access(),
						errmsg("file seek error in file '%s' for relation "
								"'%s'", filePathName, relname)));
	}
	if (logicalEof > fileSize) {
		ereport(ERROR,
				(errcode(ERRCODE_GP_INTERNAL_ERROR),
						errmsg("logical eof exceeds file size in file '%s' for relation '%s'",
								filePathName,
								relname)));
	}

	/*read parquet footer, get metadata information before rowgroup metadata*/
	metadataExist = readParquetFooter(*parquet_file_previous, parquetMetadata, protocol_read,
			logicalEof, filePathName);

	*previous_rowgroupcnt = (*parquetMetadata)->blockCount;

	/*
	 * Open the file for writing.
	 */
	MirroredAppendOnly_OpenReadWrite(mirroredOpen, relFileNode, segmentFileNum,
			relname, logicalEof, false, &primaryError);
	if (primaryError != 0)
		ereport(ERROR,
				(errcode_for_file_access(),
						errmsg("file open error when opening file '%s' "
								"for relation '%s': %s", filePathName, relname,
								strerror(primaryError))));

	file = mirroredOpen->primaryFile;

	seekResult = FileNonVirtualTell(file);
	if (seekResult != logicalEof) {
		/* previous transaction is aborted truncate file*/
		if (FileTruncate(file, logicalEof)) {

			MirroredAppendOnly_Close(mirroredOpen);
			ereport(ERROR,
					(errcode_for_file_access(),
						errmsg("file truncate error in file '%s' for relation "
								"'%s' to position " INT64_FORMAT ": %s",
								filePathName, relname, logicalEof,
								strerror(errno))));
		}
	}

	*parquet_file = file;

	/*if metadata not exist, should initialize the metadata, and write out file header*/
	if (metadataExist == false) {
		/* init parquet metadata information, init schema information using table attributes,
		 * and may get existing information from data file*/
		initparquetMetadata(*parquetMetadata, tableAttrs, *parquet_file);

		/*should judge whether file already exists, if a new file, should write header out*/
		writeParquetHeader(*parquet_file, filePathName, fileLen, fileLen_uncompressed);
	}
	else {
		if (!checkAndSyncMetadata(*parquetMetadata, tableAttrs))
		{
			ereport(ERROR,
					(errcode(ERRCODE_GP_INTERNAL_ERROR),
						errmsg("parquet storage write file's metadata incompatible "
								"with table's schema for relation '%s'.",
						relname)));
		}
	}
}