static void cdb_position_to_end(void) { XLogRecPtr redoCheckpointLoc; CheckPoint redoCheckpoint; XLogRecPtr endLocation; // Throw in extra new line to make log more readable. elog(LOG,"--------------------------"); XLogGetRecoveryStart("QDSYNC", "to get initial restart location", &redoCheckpointLoc, &redoCheckpoint); syncRedoLoc = redoCheckpoint.redo; // UNDONE: Minimum of redoCheckpointLoc and redoCheckpoint.redo? XLogScanForStandbyEndLocation(&syncRedoLoc, &endLocation); ereport(LOG, (errmsg("QDSYNC: reporting recovery start location %s and scanned end location %s", XLogLocationToString(&syncRedoLoc), XLogLocationToString2(&endLocation)))); // Throw in extra new line to make log more readable. elog(LOG,"--------------------------"); /* * Open up end location segment and set offset to end. */ openXlogEnd(&endLocation); /* * Extra reply information that gives our standby master XLOG end location * to the primary. */ putEndLocationReply(&endLocation); }
static int FileRepPrimary_ResyncBufferPoolIncrementalWrite(ChangeTrackingRequest *request) { int status = STATUS_OK; Page page; Buffer buf; BlockNumber numBlocks = 0; SMgrRelation smgr_relation = NULL; char relidstr[OIDCHARS + 1 + OIDCHARS + 1 + OIDCHARS + 1]; int ii; XLogRecPtr loc; XLogRecPtr loc1; int count = 0; int thresholdCount = 0; bool mirrorDataLossOccurred = FALSE; int NumberOfRelations = request->count; FileRepResyncHashEntry_s entry; ChangeTrackingResult *result = NULL; while (1) { /* allow flushing buffers from buffer pool during scan */ FileRepResync_SetReadBufferRequest(); if ((result = ChangeTracking_GetChanges(request)) != NULL) { FileRepResync_ResetReadBufferRequest(); for (ii = 0; ii < result->count; ii++) { if (smgr_relation == NULL) { NumberOfRelations--; smgr_relation = smgropen(result->entries[ii].relFileNode); snprintf(relidstr, sizeof(relidstr), "%u/%u/%u", smgr_relation->smgr_rnode.spcNode, smgr_relation->smgr_rnode.dbNode, smgr_relation->smgr_rnode.relNode); numBlocks = smgrnblocks(smgr_relation); if (Debug_filerep_print) elog(LOG, "resynchronize buffer pool relation '%u/%u/%u' " "number of blocks:'%u' ", smgr_relation->smgr_rnode.spcNode, smgr_relation->smgr_rnode.dbNode, smgr_relation->smgr_rnode.relNode, numBlocks); thresholdCount = Min(numBlocks, 1024); } loc1 = result->entries[ii].lsn_end; /* * if relation was truncated then block_num from change tracking can be beyond numBlocks */ if (result->entries[ii].block_num >= numBlocks) { ereport(LOG, (errmsg("could not resynchonize buffer pool relation '%s' block '%d' (maybe due to truncate), " "lsn change tracking '%s(%u/%u)' " "number of blocks '%d' ", relidstr, result->entries[ii].block_num, XLogLocationToString(&loc1), loc1.xlogid, loc1.xrecoff, numBlocks), FileRep_errcontext())); goto flush_check; } /* allow flushing buffers from buffer pool during scan */ FileRepResync_SetReadBufferRequest(); buf = ReadBuffer_Resync(smgr_relation, result->entries[ii].block_num, relidstr); FileRepResync_ResetReadBufferRequest(); Assert(result->entries[ii].block_num < numBlocks); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); page = BufferGetPage(buf); loc = PageGetLSN(page); if(Debug_filerep_print) { elog(LOG, "incremental resync buffer pool identifier '%s' num blocks '%d' blkno '%d' lsn page '%s(%u/%u)' " "lsn end change tracking '%s(%u/%u)' ", relidstr, numBlocks, result->entries[ii].block_num, XLogLocationToString(&loc), loc.xlogid, loc.xrecoff, XLogLocationToString(&loc1), result->entries[ii].lsn_end.xlogid, result->entries[ii].lsn_end.xrecoff); } else { char tmpBuf[FILEREP_MAX_LOG_DESCRIPTION_LEN]; snprintf(tmpBuf, sizeof(tmpBuf), "incremental resync buffer pool identifier '%s' num blocks '%d' blkno '%d' lsn page '%s(%u/%u)' ", relidstr, numBlocks, result->entries[ii].block_num, XLogLocationToString(&loc), loc.xlogid, loc.xrecoff); FileRep_InsertConfigLogEntry(tmpBuf); snprintf(tmpBuf, sizeof(tmpBuf), "incremental resync buffer pool identifier '%s' lsn end change tracking '%s(%u/%u)' ", relidstr, XLogLocationToString(&loc1), result->entries[ii].lsn_end.xlogid, result->entries[ii].lsn_end.xrecoff); FileRep_InsertConfigLogEntry(tmpBuf); } if (XLByteLE(result->entries[ii].lsn_end, PageGetLSN(page))) { if (! XLByteEQ(PageGetLSN(page), result->entries[ii].lsn_end)) { ereport(LOG, (errmsg("Resynchonize buffer pool relation '%s' block '%d' has page lsn less than CT lsn, " "lsn end change tracking '%s(%u/%u)' lsn page '%s(%u/%u)' " "number of blocks '%d'", relidstr, result->entries[ii].block_num, XLogLocationToString(&loc), loc.xlogid, loc.xrecoff, XLogLocationToString(&loc1), loc1.xlogid, loc1.xrecoff, numBlocks), FileRep_errcontext())); } /* * It's safe and better to perform write of the page to mirror, * for this case, as primary and mirror data pages should always * be same. So, we might do some extra work but definitely won't * loose out blocks, or error out and need to perform full recovery. * Need to cover for this case as there are some known scenarios where * CT file can have extra records which should have been discarded, * but as we loose out information of xlog LSN cannot be discarded. * One such case is when CT_TRANSIENT being compacted to CT_COMPACT * with specific xlog LSN (to discard extra records) in CT mode gets * interrupted by resync. Compaction during Resync collects all the * CT records and doesn't have xlog LSN information to discard any * extra records from CT_TRANSIENT. */ smgrwrite(smgr_relation, result->entries[ii].block_num, (char *)BufferGetBlock(buf), FALSE); } #ifdef FAULT_INJECTOR FaultInjector_InjectFaultIfSet( FileRepResyncWorker, DDLNotSpecified, "", // databaseName ""); // tableName #endif UnlockReleaseBuffer(buf); #ifdef FAULT_INJECTOR FaultInjector_InjectFaultIfSet( FileRepResyncWorker, DDLNotSpecified, "", // databaseName ""); // tableName #endif flush_check: if (((ii + 1) == result->count) || ! (result->entries[ii].relFileNode.spcNode == result->entries[ii+1].relFileNode.spcNode && result->entries[ii].relFileNode.dbNode == result->entries[ii+1].relFileNode.dbNode && result->entries[ii].relFileNode.relNode == result->entries[ii+1].relFileNode.relNode)) { if (result->ask_for_more == false) { smgrimmedsync(smgr_relation); smgrclose(smgr_relation); smgr_relation = NULL; FileRep_GetRelationPath( entry.fileName, result->entries[ii].relFileNode, 0 /* segment file number is always 0 for Buffer Pool */); status = FileRepResync_UpdateEntry(&entry); if (status != STATUS_OK) { break; } } } if (count > thresholdCount) { count = 0; FileRepSubProcess_ProcessSignals(); if (! (FileRepSubProcess_GetState() == FileRepStateReady && dataState == DataStateInResync)) { mirrorDataLossOccurred = TRUE; break; } } else count++; } // for (ii = 0; ii < result->count; ii++) } // if ((result = ChangeTracking_GetChanges(request)) != NULL) FileRepResync_ResetReadBufferRequest(); if (result != NULL && result->ask_for_more == true) { Assert(request->count == 1); request->entries[0].lsn_start = result->next_start_lsn; } else { break; } } // while(1) ChangeTracking_FreeRequest(request); ChangeTracking_FreeResult(result); Insist(NumberOfRelations == 0); if (mirrorDataLossOccurred) status = STATUS_ERROR; return status; }
static int FileRepPrimary_ResyncWrite(FileRepResyncHashEntry_s *entry) { int status = STATUS_OK; Page page; Buffer buf; BlockNumber numBlocks; BlockNumber blkno; SMgrRelation smgr_relation; char relidstr[OIDCHARS + 1 + OIDCHARS + 1 + OIDCHARS + 1]; XLogRecPtr loc; int count = 0; int thresholdCount = 0; bool mirrorDataLossOccurred = FALSE; switch (entry->relStorageMgr) { case PersistentFileSysRelStorageMgr_BufferPool: switch (entry->mirrorDataSynchronizationState) { case MirroredRelDataSynchronizationState_BufferPoolScanIncremental: case MirroredRelDataSynchronizationState_FullCopy: smgr_relation = smgropen(entry->relFileNode); numBlocks = smgrnblocks(smgr_relation); snprintf(relidstr, sizeof(relidstr), "%u/%u/%u", smgr_relation->smgr_rnode.spcNode, smgr_relation->smgr_rnode.dbNode, smgr_relation->smgr_rnode.relNode); if (Debug_filerep_print) elog(LOG, "resync buffer pool relation '%s' number of blocks '%d' ", relidstr, numBlocks); thresholdCount = Min(numBlocks, 1024); /* * required in order to report how many blocks were synchronized * if gp_persistent_relation_node does not return that information */ if (entry->mirrorBufpoolResyncChangedPageCount == 0) { entry->mirrorBufpoolResyncChangedPageCount = numBlocks - entry->mirrorBufpoolResyncCkptBlockNum; } for (blkno = entry->mirrorBufpoolResyncCkptBlockNum; blkno < numBlocks; blkno++) { XLogRecPtr endResyncLSN = (isFullResync() ? FileRepResync_GetEndFullResyncLSN() : FileRepResync_GetEndIncrResyncLSN()); #ifdef FAULT_INJECTOR FaultInjector_InjectFaultIfSet( FileRepResyncWorkerRead, DDLNotSpecified, "", //databaseName ""); // tableName #endif FileRepResync_SetReadBufferRequest(); buf = ReadBuffer_Resync(smgr_relation, blkno, relidstr); FileRepResync_ResetReadBufferRequest(); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); page = BufferGetPage(buf); loc = PageGetLSN(page); if (Debug_filerep_print) { elog(LOG, "full resync buffer pool identifier '%s' num blocks '%d' blkno '%d' lsn begin change tracking '%s(%u/%u)' " "lsn page '%s(%u/%u)' lsn end change tracking '%s(%u/%u)' ", relidstr, numBlocks, blkno, XLogLocationToString(&entry->mirrorBufpoolResyncCkptLoc), entry->mirrorBufpoolResyncCkptLoc.xlogid, entry->mirrorBufpoolResyncCkptLoc.xrecoff, XLogLocationToString(&loc), loc.xlogid, loc.xrecoff, XLogLocationToString(&endResyncLSN), endResyncLSN.xlogid, endResyncLSN.xrecoff); } else { char tmpBuf[FILEREP_MAX_LOG_DESCRIPTION_LEN]; snprintf(tmpBuf, sizeof(tmpBuf), "full resync buffer pool identifier '%s' num blocks '%d' blkno '%d' lsn begin change tracking '%s(%u/%u)' ", relidstr, numBlocks, blkno, XLogLocationToString(&entry->mirrorBufpoolResyncCkptLoc), entry->mirrorBufpoolResyncCkptLoc.xlogid, entry->mirrorBufpoolResyncCkptLoc.xrecoff); FileRep_InsertConfigLogEntry(tmpBuf); snprintf(tmpBuf, sizeof(tmpBuf), "full resync buffer pool identifier '%s' lsn page '%s(%u/%u)' lsn end change tracking '%s(%u/%u)' ", relidstr, XLogLocationToString(&loc), loc.xlogid, loc.xrecoff, XLogLocationToString(&endResyncLSN), endResyncLSN.xlogid, endResyncLSN.xrecoff); FileRep_InsertConfigLogEntry(tmpBuf); } if (XLByteLE(PageGetLSN(page), endResyncLSN) && XLByteLE(entry->mirrorBufpoolResyncCkptLoc, PageGetLSN(page))) { smgrwrite(smgr_relation, blkno, (char *)BufferGetBlock(buf), FALSE); } #ifdef FAULT_INJECTOR FaultInjector_InjectFaultIfSet( FileRepResyncWorker, DDLNotSpecified, "", // databaseName ""); // tableName #endif UnlockReleaseBuffer(buf); if (count > thresholdCount) { count = 0; FileRepSubProcess_ProcessSignals(); if (! (FileRepSubProcess_GetState() == FileRepStateReady && dataState == DataStateInResync)) { mirrorDataLossOccurred = TRUE; break; } } else count++; } if (mirrorDataLossOccurred) break; if (entry->mirrorDataSynchronizationState != MirroredRelDataSynchronizationState_FullCopy) { LockRelationForResyncExtension(&smgr_relation->smgr_rnode, ExclusiveLock); numBlocks = smgrnblocks(smgr_relation); smgrtruncate(smgr_relation, numBlocks, TRUE /* isTemp, TRUE means to not record in XLOG */, FALSE /* isLocalBuf */, &entry->persistentTid, entry->persistentSerialNum); UnlockRelationForResyncExtension(&smgr_relation->smgr_rnode, ExclusiveLock); } smgrimmedsync(smgr_relation); smgrclose(smgr_relation); smgr_relation = NULL; break; case MirroredRelDataSynchronizationState_None: case MirroredRelDataSynchronizationState_DataSynchronized: break; default: ereport(LOG, (errmsg("could not resynchronize relation '%u/%u/%u' " "mirror synchronization state:'%s(%d)' ", entry->relFileNode.relNode, entry->relFileNode.spcNode, entry->relFileNode.dbNode, MirroredRelDataSynchronizationState_Name(entry->mirrorDataSynchronizationState), entry->mirrorDataSynchronizationState))); break; } break; case PersistentFileSysRelStorageMgr_AppendOnly: { MirroredAppendOnlyOpen mirroredOpen; int primaryError; bool mirrorDataLossOccurred; char *buffer = NULL; int64 endOffset = entry->mirrorAppendOnlyNewEof; int64 startOffset = entry->mirrorAppendOnlyLossEof; int32 bufferLen = 0; int retval = 0; switch (entry->mirrorDataSynchronizationState) { case MirroredRelDataSynchronizationState_AppendOnlyCatchup: case MirroredRelDataSynchronizationState_FullCopy: /* * required in order to report how many blocks were synchronized * if gp_persistent_relation_node does not return that information */ if (entry->mirrorBufpoolResyncChangedPageCount == 0) { entry->mirrorBufpoolResyncChangedPageCount = (endOffset - startOffset) / BLCKSZ; } /* * The MirroredAppendOnly_OpenResynchonize routine knows we are a resynch worker and * will open BOTH, but write only the MIRROR!!! */ MirroredAppendOnly_OpenResynchonize( &mirroredOpen, &entry->relFileNode, entry->segmentFileNum, startOffset, &primaryError, &mirrorDataLossOccurred); if (primaryError != 0) { ereport(ERROR, (errcode_for_file_access(), errmsg("could not open file %u/%u/%u.%u : %s", entry->relFileNode.dbNode, entry->relFileNode.spcNode, entry->relFileNode.relNode, entry->segmentFileNum, strerror(primaryError)))); break; } if (mirrorDataLossOccurred) break; /* AO and CO Data Store writes 64k size by default */ bufferLen = (Size) Min(2*BLCKSZ, endOffset - startOffset); buffer = (char*) palloc(bufferLen); if (buffer == NULL) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), (errmsg("not enough memory for resynchronization")))); MemSet(buffer, 0, bufferLen); while (startOffset < endOffset) { retval = MirroredAppendOnly_Read( &mirroredOpen, buffer, bufferLen); if (retval != bufferLen) { ereport(ERROR, (errcode_for_file_access(), errmsg("could not read from position:" INT64_FORMAT " in file %u/%u/%u.%u : %m", startOffset, entry->relFileNode.dbNode, entry->relFileNode.spcNode, entry->relFileNode.relNode, entry->segmentFileNum))); break; } MirroredAppendOnly_Append( &mirroredOpen, buffer, bufferLen, &primaryError, &mirrorDataLossOccurred); if (mirrorDataLossOccurred) break; Assert(primaryError == 0); // No primary writes as resync worker. startOffset += bufferLen; /* AO and CO Data Store writes 64k size by default */ bufferLen = (Size) Min(2*BLCKSZ, endOffset - startOffset); } if (buffer) { pfree(buffer); buffer = NULL; } if (mirrorDataLossOccurred) break; /* Flush written data on Mirror */ MirroredAppendOnly_Flush( &mirroredOpen, &primaryError, &mirrorDataLossOccurred); if (mirrorDataLossOccurred) break; Assert(primaryError == 0); // Not flushed on primary as resync worker. /* Close Primary and Mirror */ MirroredAppendOnly_Close( &mirroredOpen, &mirrorDataLossOccurred); break; case MirroredRelDataSynchronizationState_None: case MirroredRelDataSynchronizationState_DataSynchronized: break; default: ereport(LOG, (errmsg("could not resynchronize relation '%u/%u/%u' " "mirror synchronization state:'%s(%d)' ", entry->relFileNode.relNode, entry->relFileNode.spcNode, entry->relFileNode.dbNode, MirroredRelDataSynchronizationState_Name(entry->mirrorDataSynchronizationState), entry->mirrorDataSynchronizationState))); break; } break; } //case default: Assert(0); break; } //switch if (mirrorDataLossOccurred) status = STATUS_ERROR; return status; }
static void WalSendServerDoRequest(WalSendRequest *walSendRequest) { bool successful; struct timeval standbyTimeout; WalSendServerGetStandbyTimeout(&standbyTimeout); switch (walSendRequest->command) { case PositionToEnd: elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "PositionToEnd"); successful = write_position_to_end(&originalEndLocation, NULL, &walsend_shutdown_requested); if (successful) elog(LOG,"Standby master returned transaction log end location %s", XLogLocationToString(&originalEndLocation)); else { disableQDMirroring_ConnectionError( "Unable to connect to standby master and determine transaction log end location", GetStandbyErrorString()); disconnectMirrorQD_SendClose(); } break; case Catchup: elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "Catchup"); if (isQDMirroringCatchingUp()) { bool tooFarBehind = false; elog(LOG,"Current master transaction log is flushed through location %s", XLogLocationToString(&walSendRequest->flushedLocation)); if (XLByteLT(originalEndLocation, walSendRequest->flushedLocation)) { /* * Standby master is behind the primary. Send catchup WAL. */ /* * Use a TRY block to catch errors from our attempt to read * the primary's WAL. Errors from sending to the standby * come up as a boolean return (successful). */ PG_TRY(); { successful = XLogCatchupQDMirror( &originalEndLocation, &walSendRequest->flushedLocation, &standbyTimeout, &walsend_shutdown_requested); } PG_CATCH(); { /* * Report the error related to reading the primary's WAL * to the server log */ /* * But first demote the error to something much less * scary. */ if (!elog_demote(WARNING)) { elog(LOG,"unable to demote error"); PG_RE_THROW(); } EmitErrorReport(); FlushErrorState(); successful = false; tooFarBehind = true; } PG_END_TRY(); if (successful) { elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "catchup send from standby end %s through primary flushed location %s", XLogLocationToString(&originalEndLocation), XLogLocationToString2(&walSendRequest->flushedLocation)); } } else if (XLByteEQ(originalEndLocation, walSendRequest->flushedLocation)) { elog((Debug_print_qd_mirroring ? LOG : DEBUG5),"Mirror was already caught up"); successful = true; } else { elog(WARNING,"Standby master transaction log location %s is beyond the current master end location %s", XLogLocationToString(&originalEndLocation), XLogLocationToString2(&walSendRequest->flushedLocation)); successful = false; } if (successful) { char detail[200]; int count; count = snprintf( detail, sizeof(detail), "Transaction log copied from locations %s through %s to the standby master", XLogLocationToString(&originalEndLocation), XLogLocationToString2(&walSendRequest->flushedLocation)); if (count >= sizeof(detail)) { ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("format command string failure"))); } enableQDMirroring("Master mirroring is now synchronized", detail); currentEndLocation = walSendRequest->flushedLocation; periodicLen = 0; periodicLocation = currentEndLocation; } else { if (tooFarBehind) { disableQDMirroring_TooFarBehind( "The current master was unable to synchronize the standby master " "because the transaction logs on the current master were recycled. " "A gpinitstandby (at an appropriate time) will be necessary to copy " "over the whole master database to the standby master so it may be synchronized"); } else { disableQDMirroring_ConnectionError( "Connection to the standby master was lost during transaction log catchup", GetStandbyErrorString()); } disconnectMirrorQD_SendClose(); } } else if (isQDMirroringDisabled()) { elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "Master Mirror Send: Master mirroring not catching-up (state is disabled)"); } else { elog(ERROR,"unexpected master mirroring state %s", QDMirroringStateString()); } break; case WriteWalPages: if (Debug_print_qd_mirroring) elog(LOG, "WriteWalPages"); if (isQDMirroringEnabled()) { char *from; Size nbytes; bool more= false; /* * For now, save copy of data until flush. This could be * optimized. */ if (saveBuffer == NULL) { uint32 totalBufferLen = XLOGbuffers * XLOG_BLCKSZ; saveBuffer = malloc(totalBufferLen); if (saveBuffer == NULL) elog(ERROR,"Could not allocate buffer for xlog data (%d bytes)", totalBufferLen); saveBufferLen = 0; } XLogGetBuffer(walSendRequest->startidx, walSendRequest->npages, &from, &nbytes); if (saveBufferLen == 0) { more = false; writeLogId = walSendRequest->logId; writeLogSeg = walSendRequest->logSeg; writeLogOff = walSendRequest->logOff; memcpy(saveBuffer, from, nbytes); saveBufferLen = nbytes; } else { more = true; memcpy(&saveBuffer[saveBufferLen], from, nbytes); saveBufferLen += nbytes; } if (Debug_print_qd_mirroring) elog(LOG, "Master Mirror Send: WriteWalPages (%s) startidx %d, npages %d, timeLineID %d, logId %u, logSeg %u, logOff 0x%X, nbytes 0x%X", (more ? "more" : "new"), walSendRequest->startidx, walSendRequest->npages, walSendRequest->timeLineID, walSendRequest->logId, walSendRequest->logSeg, walSendRequest->logOff, (int)nbytes); } case FlushWalPages: if (Debug_print_qd_mirroring) elog(LOG, "FlushWalPages"); if (isQDMirroringEnabled()) { char cmd[MAXFNAMELEN + 50]; if (saveBufferLen == 0) successful = true; else { if (snprintf(cmd, sizeof(cmd),"xlog %d %d %d %d", writeLogId, writeLogSeg, writeLogOff, (int)saveBufferLen) >= sizeof(cmd)) elog(ERROR,"could not create cmd for qd mirror logid %d seg %d", writeLogId, writeLogSeg); successful = write_qd_sync(cmd, saveBuffer, saveBufferLen, &standbyTimeout, &walsend_shutdown_requested); if (successful) { XLogRecPtr oldEndLocation; oldEndLocation = currentEndLocation; currentEndLocation.xlogid = writeLogId; currentEndLocation.xrecoff = writeLogSeg * XLogSegSize + writeLogOff; if (currentEndLocation.xrecoff >= XLogFileSize) { (currentEndLocation.xlogid)++; currentEndLocation.xrecoff = 0; } if (XLByteLT(oldEndLocation,currentEndLocation)) { periodicLen += saveBufferLen; if (periodicLen > periodicReportLen) { elog(LOG, "Master mirroring periodic report: %d bytes successfully send to standby master for locations %s through %s", periodicLen, XLogLocationToString(&periodicLocation), XLogLocationToString2(¤tEndLocation)); periodicLen = 0; periodicLocation = currentEndLocation; } } else { if (Debug_print_qd_mirroring) elog(LOG, "Send to Master mirror successful. New end location %s (old %s)", XLogLocationToString(¤tEndLocation), XLogLocationToString2(&oldEndLocation)); } } else { disableQDMirroring_ConnectionError( "Connection to the standby master was lost attempting to send new transaction log", GetStandbyErrorString()); disconnectMirrorQD_SendClose(); } /* * Reset so WriteWalPages can fill the buffer again. */ saveBufferLen = 0; writeLogId = 0; writeLogSeg = 0; writeLogOff = 0; } if (successful && walSendRequest->haveNewCheckpointLocation) { uint32 logid; uint32 seg; uint32 offset; elog((Debug_print_qd_mirroring ? LOG : DEBUG5),"New previous checkpoint location %s", XLogLocationToString(&walSendRequest->newCheckpointLocation)); XLByteToSeg(walSendRequest->newCheckpointLocation, logid, seg); offset = walSendRequest->newCheckpointLocation.xrecoff % XLogSegSize; if (snprintf(cmd, sizeof(cmd),"new_checkpoint_location %d %d %d", logid, seg, offset) >= sizeof(cmd)) elog(ERROR,"could not create cmd for qd mirror logid %d seg %d offset %d", logid, seg, offset); successful = write_qd_sync(cmd, NULL, 0, NULL, &walsend_shutdown_requested); if (successful) { elog((Debug_print_qd_mirroring ? LOG : DEBUG5),"Send of new checkpoint location to master mirror successful"); } else { disableQDMirroring_ConnectionError( "Connection to the standby master was lost attempting to send new checkpoint location", GetStandbyErrorString()); disconnectMirrorQD_SendClose(); } } } else if (isQDMirroringDisabled()) { elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "Master Mirror Send: Master mirroring not enabled"); } else { elog(ERROR,"unexpected master mirroring state %s", QDMirroringStateString()); } break; case CloseForShutdown: if (Debug_print_qd_mirroring) elog(LOG, "CloseForShutdown"); /* * Do the work we would normally do when signaled to stop. */ WalSendServer_ServiceShutdown(); break; default: elog(ERROR, "Unknown WalSendRequestCommand %d", walSendRequest->command); } }
bool ChangeTracking_PrintRelationChangeInfo( RmgrId xl_rmid, uint8 xl_info, void *data, XLogRecPtr *loc, bool weAreGeneratingXLogNow, bool printSkipIssuesOnly) { bool atLeastOneSkipIssue = false; int relationChangeInfoArrayCount; int i; int arrlen = ChangeTracking_GetInfoArrayDesiredMaxLength(xl_rmid, xl_info); RelationChangeInfo relationChangeInfoArray[arrlen]; ChangeTracking_GetRelationChangeInfoFromXlog( xl_rmid, xl_info, data, relationChangeInfoArray, &relationChangeInfoArrayCount, arrlen); for (i = 0; i < relationChangeInfoArrayCount; i++) { RelationChangeInfo *relationChangeInfo; int64 maxPersistentSerialNum; bool skip; bool zeroTid = false; bool invalidTid = false; bool zeroSerialNum = false; bool invalidSerialNum = false; bool skipIssue = false; relationChangeInfo = &relationChangeInfoArray[i]; if (weAreGeneratingXLogNow) maxPersistentSerialNum = PersistentRelfile_MyHighestSerialNum(); else maxPersistentSerialNum = PersistentRelfile_CurrentMaxSerialNum(); skip = GpPersistent_SkipXLogInfo(relationChangeInfo->relFileNode.relNode); if (!skip) { zeroTid = PersistentStore_IsZeroTid(&relationChangeInfo->persistentTid); if (!zeroTid) invalidTid = !ItemPointerIsValid(&relationChangeInfo->persistentTid); zeroSerialNum = (relationChangeInfo->persistentSerialNum == 0); if (!zeroSerialNum) { invalidSerialNum = (relationChangeInfo->persistentSerialNum < 0); /* * If we have'nt done the scan yet... do not do upper range check. */ if (maxPersistentSerialNum != 0 && relationChangeInfo->persistentSerialNum > maxPersistentSerialNum) invalidSerialNum = true; } skipIssue = (zeroTid || invalidTid || zeroSerialNum || invalidSerialNum); } if (!printSkipIssuesOnly || skipIssue) elog(LOG, "ChangeTracking_PrintRelationChangeInfo: [%d] xl_rmid %d, xl_info 0x%X, %u/%u/%u, block number %u, LSN %s, persistent serial num " INT64_FORMAT ", TID %s, maxPersistentSerialNum " INT64_FORMAT ", skip %s, zeroTid %s, invalidTid %s, zeroSerialNum %s, invalidSerialNum %s, skipIssue %s", i, xl_rmid, xl_info, relationChangeInfo->relFileNode.spcNode, relationChangeInfo->relFileNode.dbNode, relationChangeInfo->relFileNode.relNode, relationChangeInfo->blockNumber, XLogLocationToString(loc), relationChangeInfo->persistentSerialNum, ItemPointerToString(&relationChangeInfo->persistentTid), maxPersistentSerialNum, (skip ? "true" : "false"), (zeroTid ? "true" : "false"), (invalidTid ? "true" : "false"), (zeroSerialNum ? "true" : "false"), (invalidSerialNum ? "true" : "false"), (skipIssue ? "true" : "false")); if (skipIssue) atLeastOneSkipIssue = true; } return atLeastOneSkipIssue; }
/* * cdb_sync_xlog - process xlog sync */ static void cdb_sync_xlog(void) { uint32 currentBlockOffset; XLogRecPtr writeLoc; elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "QDSYNC: write logid %d seg %d woffset 0x%X, wlen 0x%X", wlogid, wseg, woffset, wlen); if (woffset % XLOG_BLCKSZ != 0) { elog(ERROR,"QDSYNC: not on block boundaries 0x%X", woffset); } if (wlogid != xlogid || wseg != xseg) { elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "QDSYNC: closing previous file %s", xlogfilename); if (xlogfilefd >= 0) { close(xlogfilefd); xlogfilefd = -1; xlogfileoffset = -1; } xlogid = wlogid; xseg = wseg; openXlogNextFile(); /* * Assume caller knows where to write. */ xlogfileoffset = woffset; } /* * Validate we are appending or overwritting previous block */ currentBlockOffset = (xlogfileoffset / XLOG_BLCKSZ) * XLOG_BLCKSZ; if (woffset != currentBlockOffset && woffset + XLOG_BLCKSZ != currentBlockOffset) { elog(ERROR,"QDSYNC: not appending to end (primary: 0x%X, standby: 0x%X)", woffset, xlogfileoffset); } /* * no validation checking on xlog. xlog sync is by block and may repeat * the same block, so we do not have any way to check it now. we will rely * on tmlog checking for now. */ ensureBufferSize(); readLogMessage(buf, wlen); if ((wlen / XLOG_BLCKSZ) * XLOG_BLCKSZ != wlen) { int roundedUp; int padLen; /* * Pad buffer out with zeros. */ roundedUp = ((wlen + XLOG_BLCKSZ - 1) / XLOG_BLCKSZ) * XLOG_BLCKSZ; Assert(buflen >= roundedUp); padLen = roundedUp - wlen; elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "QDSYNC: padding buffer with %d zeros (wlen %d, roundedUp %d)", padLen, wlen, roundedUp); memset(&((char*) buf)[wlen], 0, padLen); wlen = roundedUp; } writeLoc.xlogid = wlogid; writeLoc.xrecoff = wseg * XLogSegSize + woffset; syncWriteLog(xlogfilefd, buf, woffset, wlen); elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "QDSYNC: wrote location %s len 0x%X", XLogLocationToString(&writeLoc), wlen); xlogfileoffset = woffset + wlen; }
void cdb_perform_redo(XLogRecPtr *redoCheckPointLoc, CheckPoint *redoCheckPoint, XLogRecPtr *newCheckpointLoc) { CheckPoint oldRedoCheckpoint; uint32 logid; uint32 seg; int nsegsremoved; if (redoCheckPointLoc->xlogid == 0 && redoCheckPointLoc->xrecoff == 0) { XLogGetRecoveryStart("QDSYNC", "for redo apply", redoCheckPointLoc, redoCheckPoint); } XLogStandbyRecoverRange(redoCheckPointLoc, redoCheckPoint, newCheckpointLoc); /* * Sample the recovery start location now to see if appling redo * processed checkpoint records and moved the restart location forward. */ oldRedoCheckpoint = *redoCheckPoint; XLogGetRecoveryStart("QDSYNC", "for redo progress check", redoCheckPointLoc, redoCheckPoint); if (XLByteLT(oldRedoCheckpoint.redo,redoCheckPoint->redo)) { ereport(LOG, (errmsg("QDSYNC: transaction redo moved the restart location from %s to %s", XLogLocationToString(&oldRedoCheckpoint.redo), XLogLocationToString2(&redoCheckPoint->redo)))); } else { Assert(XLByteEQ(oldRedoCheckpoint.redo,redoCheckPoint->redo)); ereport(LOG, (errmsg("QDSYNC: transaction redo did not move the restart location %s forward this pass", XLogLocationToString(&oldRedoCheckpoint.redo)))); return; } XLByteToSeg(redoCheckPoint->redo, logid, seg); /* * Delete offline log files (those no longer needed even for previous * checkpoint). */ elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "QDSYNC: keep log files as far back as (logid %d, seg %d)", logid, seg); if (logid || seg) { PrevLogSeg(logid, seg); elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "QDSYNC: delete offline log files up to (logid %d, seg %d)", logid, seg); XLogRemoveStandbyLogs(logid, seg, &nsegsremoved); if (nsegsremoved > 0) { // Throw in extra new line to make log more readable. ereport(LOG, (errmsg("QDSYNC: %d logs removed through logid %d, seg %d\n", nsegsremoved, logid, seg))); } } // Throw in extra new line to make log more readable. elog(LOG,"--------------------------"); }
static void PersistentStore_DiagnoseDumpTable( PersistentStoreData *storeData, PersistentStoreSharedData *storeSharedData) { if (disable_persistent_diagnostic_dump) { return; } MIRROREDLOCK_BUFMGR_DECLARE; PersistentStoreScan storeScan; ItemPointerData persistentTid; int64 persistentSerialNum; Datum *values; BlockNumber lastDisplayedBlockNum; bool displayedOne; BlockNumber currentBlockNum; elog(LOG, "Diagnostic dump of persistent table ('%s'): maximum in-use serial number " INT64_FORMAT ", maximum free order number " INT64_FORMAT ", free TID %s, maximum known TID %s", storeData->tableName, storeSharedData->maxInUseSerialNum, storeSharedData->maxFreeOrderNum, ItemPointerToString(&storeSharedData->freeTid), ItemPointerToString2(&storeSharedData->maxTid)); values = (Datum*)palloc(storeData->numAttributes * sizeof(Datum)); PersistentStore_BeginScan( storeData, storeSharedData, &storeScan); lastDisplayedBlockNum = 0; displayedOne = false; while (PersistentStore_GetNext( &storeScan, values, &persistentTid, &persistentSerialNum)) { /* * Use the BlockIdGetBlockNumber routine because ItemPointerGetBlockNumber * asserts for valid TID. */ currentBlockNum = BlockIdGetBlockNumber(&persistentTid.ip_blkid); if (!displayedOne || currentBlockNum != lastDisplayedBlockNum) { Buffer buffer; PageHeader page; XLogRecPtr lsn; /* * Fetch the block and display the LSN. */ // -------- MirroredLock ---------- MIRROREDLOCK_BUFMGR_LOCK; buffer = ReadBuffer( storeScan.persistentRel, currentBlockNum); page = (PageHeader) BufferGetPage(buffer); lsn = PageGetLSN(page); ReleaseBuffer(buffer); MIRROREDLOCK_BUFMGR_UNLOCK; // -------- MirroredLock ---------- elog(LOG, "Diagnostic LSN %s of page %u", XLogLocationToString(&lsn), currentBlockNum); lastDisplayedBlockNum = currentBlockNum; displayedOne = true; } /* * Display the persistent tuple. */ (*storeData->printTupleCallback)( LOG, "DIAGNOSE", &persistentTid, values); } PersistentStore_EndScan(&storeScan); pfree(values); }