void TestGetBlock(CuTest* tc) { buffer_t *buf = BufferCreate(); uint8_t data1[4] = { 1, 2, 3, 4 }; BufferAppend(buf, data1, sizeof(data1)); uint8_t *block; // Test case when not enough data in the buffer. CuAssertIntEquals(tc, 0, BufferGetBlock(buf, &block, 0, 8)); // Add more data to the buffer. uint8_t data2[4] = { 5, 6, 7, 8 }; BufferAppend(buf, data2, sizeof(data2)); // Test case when enough data in the buffer. CuAssertIntEquals(tc, 1, BufferGetBlock(buf, &block, 0, 8)); uint8_t check_data[8] = { 1, 2, 3, 4, 5, 6, 7, 8 }; CuAssertIntEquals(tc, 0, memcmp(block, check_data, 8)); BufferDestroy(&buf); }
/* * Write a backup block if needed when we are setting a hint. Note that * this may be called for a variety of page types, not just heaps. * * Callable while holding just share lock on the buffer content. * * We can't use the plain backup block mechanism since that relies on the * Buffer being exclusively locked. Since some modifications (setting LSN, hint * bits) are allowed in a sharelocked buffer that can lead to wal checksum * failures. So instead we copy the page and insert the copied data as normal * record data. * * We only need to do something if page has not yet been full page written in * this checkpoint round. The LSN of the inserted wal record is returned if we * had to write, InvalidXLogRecPtr otherwise. * * It is possible that multiple concurrent backends could attempt to write WAL * records. In that case, multiple copies of the same block would be recorded * in separate WAL records by different backends, though that is still OK from * a correctness perspective. */ XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std) { XLogRecPtr recptr = InvalidXLogRecPtr; XLogRecPtr lsn; XLogRecPtr RedoRecPtr; /* * Ensure no checkpoint can change our view of RedoRecPtr. */ Assert(MyPgXact->delayChkpt); /* * Update RedoRecPtr so that we can make the right decision */ RedoRecPtr = GetRedoRecPtr(); /* * We assume page LSN is first data on *every* page that can be passed to * XLogInsert, whether it has the standard page layout or not. Since we're * only holding a share-lock on the page, we must take the buffer header * lock when we look at the LSN. */ lsn = BufferGetLSNAtomic(buffer); if (lsn <= RedoRecPtr) { int flags; char copied_buffer[BLCKSZ]; char *origdata = (char *) BufferGetBlock(buffer); RelFileNode rnode; ForkNumber forkno; BlockNumber blkno; /* * Copy buffer so we don't have to worry about concurrent hint bit or * lsn updates. We assume pd_lower/upper cannot be changed without an * exclusive lock, so the contents bkp are not racy. */ if (buffer_std) { /* Assume we can omit data between pd_lower and pd_upper */ Page page = BufferGetPage(buffer); uint16 lower = ((PageHeader) page)->pd_lower; uint16 upper = ((PageHeader) page)->pd_upper; memcpy(copied_buffer, origdata, lower); memcpy(copied_buffer + upper, origdata + upper, BLCKSZ - upper); } else memcpy(copied_buffer, origdata, BLCKSZ); XLogBeginInsert(); flags = REGBUF_FORCE_IMAGE; if (buffer_std) flags |= REGBUF_STANDARD; BufferGetTag(buffer, &rnode, &forkno, &blkno); XLogRegisterBlock(0, &rnode, forkno, blkno, copied_buffer, flags); recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI_FOR_HINT); } return recptr; }
static int FileRepPrimary_ResyncBufferPoolIncrementalWrite(ChangeTrackingRequest *request) { int status = STATUS_OK; Page page; Buffer buf; BlockNumber numBlocks = 0; SMgrRelation smgr_relation = NULL; char relidstr[OIDCHARS + 1 + OIDCHARS + 1 + OIDCHARS + 1]; int ii; XLogRecPtr loc; XLogRecPtr loc1; int count = 0; int thresholdCount = 0; bool mirrorDataLossOccurred = FALSE; int NumberOfRelations = request->count; FileRepResyncHashEntry_s entry; ChangeTrackingResult *result = NULL; while (1) { /* allow flushing buffers from buffer pool during scan */ FileRepResync_SetReadBufferRequest(); if ((result = ChangeTracking_GetChanges(request)) != NULL) { FileRepResync_ResetReadBufferRequest(); for (ii = 0; ii < result->count; ii++) { if (smgr_relation == NULL) { NumberOfRelations--; smgr_relation = smgropen(result->entries[ii].relFileNode); snprintf(relidstr, sizeof(relidstr), "%u/%u/%u", smgr_relation->smgr_rnode.spcNode, smgr_relation->smgr_rnode.dbNode, smgr_relation->smgr_rnode.relNode); numBlocks = smgrnblocks(smgr_relation); if (Debug_filerep_print) elog(LOG, "resynchronize buffer pool relation '%u/%u/%u' " "number of blocks:'%u' ", smgr_relation->smgr_rnode.spcNode, smgr_relation->smgr_rnode.dbNode, smgr_relation->smgr_rnode.relNode, numBlocks); thresholdCount = Min(numBlocks, 1024); } loc1 = result->entries[ii].lsn_end; /* * if relation was truncated then block_num from change tracking can be beyond numBlocks */ if (result->entries[ii].block_num >= numBlocks) { ereport(LOG, (errmsg("could not resynchonize buffer pool relation '%s' block '%d' (maybe due to truncate), " "lsn change tracking '%s(%u/%u)' " "number of blocks '%d' ", relidstr, result->entries[ii].block_num, XLogLocationToString(&loc1), loc1.xlogid, loc1.xrecoff, numBlocks), FileRep_errcontext())); goto flush_check; } /* allow flushing buffers from buffer pool during scan */ FileRepResync_SetReadBufferRequest(); buf = ReadBuffer_Resync(smgr_relation, result->entries[ii].block_num, relidstr); FileRepResync_ResetReadBufferRequest(); Assert(result->entries[ii].block_num < numBlocks); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); page = BufferGetPage(buf); loc = PageGetLSN(page); if(Debug_filerep_print) { elog(LOG, "incremental resync buffer pool identifier '%s' num blocks '%d' blkno '%d' lsn page '%s(%u/%u)' " "lsn end change tracking '%s(%u/%u)' ", relidstr, numBlocks, result->entries[ii].block_num, XLogLocationToString(&loc), loc.xlogid, loc.xrecoff, XLogLocationToString(&loc1), result->entries[ii].lsn_end.xlogid, result->entries[ii].lsn_end.xrecoff); } else { char tmpBuf[FILEREP_MAX_LOG_DESCRIPTION_LEN]; snprintf(tmpBuf, sizeof(tmpBuf), "incremental resync buffer pool identifier '%s' num blocks '%d' blkno '%d' lsn page '%s(%u/%u)' ", relidstr, numBlocks, result->entries[ii].block_num, XLogLocationToString(&loc), loc.xlogid, loc.xrecoff); FileRep_InsertConfigLogEntry(tmpBuf); snprintf(tmpBuf, sizeof(tmpBuf), "incremental resync buffer pool identifier '%s' lsn end change tracking '%s(%u/%u)' ", relidstr, XLogLocationToString(&loc1), result->entries[ii].lsn_end.xlogid, result->entries[ii].lsn_end.xrecoff); FileRep_InsertConfigLogEntry(tmpBuf); } if (XLByteLE(result->entries[ii].lsn_end, PageGetLSN(page))) { if (! XLByteEQ(PageGetLSN(page), result->entries[ii].lsn_end)) { ereport(LOG, (errmsg("Resynchonize buffer pool relation '%s' block '%d' has page lsn less than CT lsn, " "lsn end change tracking '%s(%u/%u)' lsn page '%s(%u/%u)' " "number of blocks '%d'", relidstr, result->entries[ii].block_num, XLogLocationToString(&loc), loc.xlogid, loc.xrecoff, XLogLocationToString(&loc1), loc1.xlogid, loc1.xrecoff, numBlocks), FileRep_errcontext())); } /* * It's safe and better to perform write of the page to mirror, * for this case, as primary and mirror data pages should always * be same. So, we might do some extra work but definitely won't * loose out blocks, or error out and need to perform full recovery. * Need to cover for this case as there are some known scenarios where * CT file can have extra records which should have been discarded, * but as we loose out information of xlog LSN cannot be discarded. * One such case is when CT_TRANSIENT being compacted to CT_COMPACT * with specific xlog LSN (to discard extra records) in CT mode gets * interrupted by resync. Compaction during Resync collects all the * CT records and doesn't have xlog LSN information to discard any * extra records from CT_TRANSIENT. */ smgrwrite(smgr_relation, result->entries[ii].block_num, (char *)BufferGetBlock(buf), FALSE); } #ifdef FAULT_INJECTOR FaultInjector_InjectFaultIfSet( FileRepResyncWorker, DDLNotSpecified, "", // databaseName ""); // tableName #endif UnlockReleaseBuffer(buf); #ifdef FAULT_INJECTOR FaultInjector_InjectFaultIfSet( FileRepResyncWorker, DDLNotSpecified, "", // databaseName ""); // tableName #endif flush_check: if (((ii + 1) == result->count) || ! (result->entries[ii].relFileNode.spcNode == result->entries[ii+1].relFileNode.spcNode && result->entries[ii].relFileNode.dbNode == result->entries[ii+1].relFileNode.dbNode && result->entries[ii].relFileNode.relNode == result->entries[ii+1].relFileNode.relNode)) { if (result->ask_for_more == false) { smgrimmedsync(smgr_relation); smgrclose(smgr_relation); smgr_relation = NULL; FileRep_GetRelationPath( entry.fileName, result->entries[ii].relFileNode, 0 /* segment file number is always 0 for Buffer Pool */); status = FileRepResync_UpdateEntry(&entry); if (status != STATUS_OK) { break; } } } if (count > thresholdCount) { count = 0; FileRepSubProcess_ProcessSignals(); if (! (FileRepSubProcess_GetState() == FileRepStateReady && dataState == DataStateInResync)) { mirrorDataLossOccurred = TRUE; break; } } else count++; } // for (ii = 0; ii < result->count; ii++) } // if ((result = ChangeTracking_GetChanges(request)) != NULL) FileRepResync_ResetReadBufferRequest(); if (result != NULL && result->ask_for_more == true) { Assert(request->count == 1); request->entries[0].lsn_start = result->next_start_lsn; } else { break; } } // while(1) ChangeTracking_FreeRequest(request); ChangeTracking_FreeResult(result); Insist(NumberOfRelations == 0); if (mirrorDataLossOccurred) status = STATUS_ERROR; return status; }
static int FileRepPrimary_ResyncWrite(FileRepResyncHashEntry_s *entry) { int status = STATUS_OK; Page page; Buffer buf; BlockNumber numBlocks; BlockNumber blkno; SMgrRelation smgr_relation; char relidstr[OIDCHARS + 1 + OIDCHARS + 1 + OIDCHARS + 1]; XLogRecPtr loc; int count = 0; int thresholdCount = 0; bool mirrorDataLossOccurred = FALSE; switch (entry->relStorageMgr) { case PersistentFileSysRelStorageMgr_BufferPool: switch (entry->mirrorDataSynchronizationState) { case MirroredRelDataSynchronizationState_BufferPoolScanIncremental: case MirroredRelDataSynchronizationState_FullCopy: smgr_relation = smgropen(entry->relFileNode); numBlocks = smgrnblocks(smgr_relation); snprintf(relidstr, sizeof(relidstr), "%u/%u/%u", smgr_relation->smgr_rnode.spcNode, smgr_relation->smgr_rnode.dbNode, smgr_relation->smgr_rnode.relNode); if (Debug_filerep_print) elog(LOG, "resync buffer pool relation '%s' number of blocks '%d' ", relidstr, numBlocks); thresholdCount = Min(numBlocks, 1024); /* * required in order to report how many blocks were synchronized * if gp_persistent_relation_node does not return that information */ if (entry->mirrorBufpoolResyncChangedPageCount == 0) { entry->mirrorBufpoolResyncChangedPageCount = numBlocks - entry->mirrorBufpoolResyncCkptBlockNum; } for (blkno = entry->mirrorBufpoolResyncCkptBlockNum; blkno < numBlocks; blkno++) { XLogRecPtr endResyncLSN = (isFullResync() ? FileRepResync_GetEndFullResyncLSN() : FileRepResync_GetEndIncrResyncLSN()); #ifdef FAULT_INJECTOR FaultInjector_InjectFaultIfSet( FileRepResyncWorkerRead, DDLNotSpecified, "", //databaseName ""); // tableName #endif FileRepResync_SetReadBufferRequest(); buf = ReadBuffer_Resync(smgr_relation, blkno, relidstr); FileRepResync_ResetReadBufferRequest(); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); page = BufferGetPage(buf); loc = PageGetLSN(page); if (Debug_filerep_print) { elog(LOG, "full resync buffer pool identifier '%s' num blocks '%d' blkno '%d' lsn begin change tracking '%s(%u/%u)' " "lsn page '%s(%u/%u)' lsn end change tracking '%s(%u/%u)' ", relidstr, numBlocks, blkno, XLogLocationToString(&entry->mirrorBufpoolResyncCkptLoc), entry->mirrorBufpoolResyncCkptLoc.xlogid, entry->mirrorBufpoolResyncCkptLoc.xrecoff, XLogLocationToString(&loc), loc.xlogid, loc.xrecoff, XLogLocationToString(&endResyncLSN), endResyncLSN.xlogid, endResyncLSN.xrecoff); } else { char tmpBuf[FILEREP_MAX_LOG_DESCRIPTION_LEN]; snprintf(tmpBuf, sizeof(tmpBuf), "full resync buffer pool identifier '%s' num blocks '%d' blkno '%d' lsn begin change tracking '%s(%u/%u)' ", relidstr, numBlocks, blkno, XLogLocationToString(&entry->mirrorBufpoolResyncCkptLoc), entry->mirrorBufpoolResyncCkptLoc.xlogid, entry->mirrorBufpoolResyncCkptLoc.xrecoff); FileRep_InsertConfigLogEntry(tmpBuf); snprintf(tmpBuf, sizeof(tmpBuf), "full resync buffer pool identifier '%s' lsn page '%s(%u/%u)' lsn end change tracking '%s(%u/%u)' ", relidstr, XLogLocationToString(&loc), loc.xlogid, loc.xrecoff, XLogLocationToString(&endResyncLSN), endResyncLSN.xlogid, endResyncLSN.xrecoff); FileRep_InsertConfigLogEntry(tmpBuf); } if (XLByteLE(PageGetLSN(page), endResyncLSN) && XLByteLE(entry->mirrorBufpoolResyncCkptLoc, PageGetLSN(page))) { smgrwrite(smgr_relation, blkno, (char *)BufferGetBlock(buf), FALSE); } #ifdef FAULT_INJECTOR FaultInjector_InjectFaultIfSet( FileRepResyncWorker, DDLNotSpecified, "", // databaseName ""); // tableName #endif UnlockReleaseBuffer(buf); if (count > thresholdCount) { count = 0; FileRepSubProcess_ProcessSignals(); if (! (FileRepSubProcess_GetState() == FileRepStateReady && dataState == DataStateInResync)) { mirrorDataLossOccurred = TRUE; break; } } else count++; } if (mirrorDataLossOccurred) break; if (entry->mirrorDataSynchronizationState != MirroredRelDataSynchronizationState_FullCopy) { LockRelationForResyncExtension(&smgr_relation->smgr_rnode, ExclusiveLock); numBlocks = smgrnblocks(smgr_relation); smgrtruncate(smgr_relation, numBlocks, TRUE /* isTemp, TRUE means to not record in XLOG */, FALSE /* isLocalBuf */, &entry->persistentTid, entry->persistentSerialNum); UnlockRelationForResyncExtension(&smgr_relation->smgr_rnode, ExclusiveLock); } smgrimmedsync(smgr_relation); smgrclose(smgr_relation); smgr_relation = NULL; break; case MirroredRelDataSynchronizationState_None: case MirroredRelDataSynchronizationState_DataSynchronized: break; default: ereport(LOG, (errmsg("could not resynchronize relation '%u/%u/%u' " "mirror synchronization state:'%s(%d)' ", entry->relFileNode.relNode, entry->relFileNode.spcNode, entry->relFileNode.dbNode, MirroredRelDataSynchronizationState_Name(entry->mirrorDataSynchronizationState), entry->mirrorDataSynchronizationState))); break; } break; case PersistentFileSysRelStorageMgr_AppendOnly: { MirroredAppendOnlyOpen mirroredOpen; int primaryError; bool mirrorDataLossOccurred; char *buffer = NULL; int64 endOffset = entry->mirrorAppendOnlyNewEof; int64 startOffset = entry->mirrorAppendOnlyLossEof; int32 bufferLen = 0; int retval = 0; switch (entry->mirrorDataSynchronizationState) { case MirroredRelDataSynchronizationState_AppendOnlyCatchup: case MirroredRelDataSynchronizationState_FullCopy: /* * required in order to report how many blocks were synchronized * if gp_persistent_relation_node does not return that information */ if (entry->mirrorBufpoolResyncChangedPageCount == 0) { entry->mirrorBufpoolResyncChangedPageCount = (endOffset - startOffset) / BLCKSZ; } /* * The MirroredAppendOnly_OpenResynchonize routine knows we are a resynch worker and * will open BOTH, but write only the MIRROR!!! */ MirroredAppendOnly_OpenResynchonize( &mirroredOpen, &entry->relFileNode, entry->segmentFileNum, startOffset, &primaryError, &mirrorDataLossOccurred); if (primaryError != 0) { ereport(ERROR, (errcode_for_file_access(), errmsg("could not open file %u/%u/%u.%u : %s", entry->relFileNode.dbNode, entry->relFileNode.spcNode, entry->relFileNode.relNode, entry->segmentFileNum, strerror(primaryError)))); break; } if (mirrorDataLossOccurred) break; /* AO and CO Data Store writes 64k size by default */ bufferLen = (Size) Min(2*BLCKSZ, endOffset - startOffset); buffer = (char*) palloc(bufferLen); if (buffer == NULL) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), (errmsg("not enough memory for resynchronization")))); MemSet(buffer, 0, bufferLen); while (startOffset < endOffset) { retval = MirroredAppendOnly_Read( &mirroredOpen, buffer, bufferLen); if (retval != bufferLen) { ereport(ERROR, (errcode_for_file_access(), errmsg("could not read from position:" INT64_FORMAT " in file %u/%u/%u.%u : %m", startOffset, entry->relFileNode.dbNode, entry->relFileNode.spcNode, entry->relFileNode.relNode, entry->segmentFileNum))); break; } MirroredAppendOnly_Append( &mirroredOpen, buffer, bufferLen, &primaryError, &mirrorDataLossOccurred); if (mirrorDataLossOccurred) break; Assert(primaryError == 0); // No primary writes as resync worker. startOffset += bufferLen; /* AO and CO Data Store writes 64k size by default */ bufferLen = (Size) Min(2*BLCKSZ, endOffset - startOffset); } if (buffer) { pfree(buffer); buffer = NULL; } if (mirrorDataLossOccurred) break; /* Flush written data on Mirror */ MirroredAppendOnly_Flush( &mirroredOpen, &primaryError, &mirrorDataLossOccurred); if (mirrorDataLossOccurred) break; Assert(primaryError == 0); // Not flushed on primary as resync worker. /* Close Primary and Mirror */ MirroredAppendOnly_Close( &mirroredOpen, &mirrorDataLossOccurred); break; case MirroredRelDataSynchronizationState_None: case MirroredRelDataSynchronizationState_DataSynchronized: break; default: ereport(LOG, (errmsg("could not resynchronize relation '%u/%u/%u' " "mirror synchronization state:'%s(%d)' ", entry->relFileNode.relNode, entry->relFileNode.spcNode, entry->relFileNode.dbNode, MirroredRelDataSynchronizationState_Name(entry->mirrorDataSynchronizationState), entry->mirrorDataSynchronizationState))); break; } break; } //case default: Assert(0); break; } //switch if (mirrorDataLossOccurred) status = STATUS_ERROR; return status; }