/* * FileRepPrimary_StartResyncWorker() */ void FileRepPrimary_StartResyncWorker(void) { int status = STATUS_OK; FileRep_InsertConfigLogEntry("start resync worker"); Insist(fileRepRole == FileRepPrimaryRole); while (1) { if (status != STATUS_OK) { FileRep_SetSegmentState(SegmentStateFault, FaultTypeMirror); FileRepSubProcess_SetState(FileRepStateFault); } /* * We are waiting for following conditions to move forward: * * Database is running * And * if dataState is InResync, we wait for FileRepSubProcess to Ready state * else don't wait */ while (!isDatabaseRunning() || !(dataState == DataStateInResync ? FileRepSubProcess_GetState() == FileRepStateReady : true)) { FileRepSubProcess_ProcessSignals(); if (FileRepSubProcess_GetState() == FileRepStateShutdown || FileRepSubProcess_GetState() == FileRepStateShutdownBackends) { break; } pg_usleep(50000L); /* 50 ms */ } if (FileRepSubProcess_GetState() == FileRepStateShutdown || FileRepSubProcess_GetState() == FileRepStateShutdownBackends) { break; } FileRepSubProcess_InitHeapAccess(); status = FileRepPrimary_RunResyncWorker(); if (status != STATUS_OK) { continue; } break; } // while(1) }
/* * * FileRepAckPrimary_StartConsumer */ void FileRepAckPrimary_StartConsumer(void) { int status = STATUS_OK; FileRep_InsertConfigLogEntry("run consumer"); while (1) { if (status != STATUS_OK) { FileRep_SetSegmentState(SegmentStateFault, FaultTypeMirror); FileRepSubProcess_SetState(FileRepStateFault); } while (FileRepSubProcess_GetState() == FileRepStateFault || (fileRepShmemArray[0]->state == FileRepStateNotInitialized && FileRepSubProcess_GetState() != FileRepStateShutdown)) { FileRepSubProcess_ProcessSignals(); pg_usleep(50000L); /* 50 ms */ } if (FileRepSubProcess_GetState() == FileRepStateShutdown) { break; } status = FileRepAckPrimary_RunConsumer(); } // while(1) if (FileRepSubProcess_GetState() == FileRepStateShutdown) { /* perform graceful shutdown */ } LWLockAcquire(FileRepAckHashShmemLock, LW_EXCLUSIVE); FileRep_IpcSignal(fileRepIpcArray[fileRepAckHashShmem->ipcArrayIndex]->semP, &fileRepIpcArray[fileRepAckHashShmem->ipcArrayIndex]->refCountSemP); LWLockRelease(FileRepAckHashShmemLock); /* NOTE free memory (if any) */ return; }
/* * FileRepPrimary_RunResyncWorker() * */ static int FileRepPrimary_RunResyncWorker(void) { int status = STATUS_OK; FileRepResyncHashEntry_s *entry = NULL; ChangeTrackingRequest *request = NULL; FileRep_InsertConfigLogEntry("run resync worker"); while (1) { FileRepSubProcess_ProcessSignals(); if (! (FileRepSubProcess_GetState() == FileRepStateReady && dataState == DataStateInResync)) { break; } entry = FileRepPrimary_GetResyncEntry(&request); if (entry == NULL && request == NULL) { pg_usleep(100000L); /* 100 ms */ continue; } Assert(! (entry != NULL && request != NULL)); if (entry != NULL) { status = FileRepPrimary_ResyncWrite(entry); if (status == STATUS_OK) { if (entry->mirrorBufpoolResyncChangedPageCount == 0) { entry->mirrorBufpoolResyncChangedPageCount = (entry->mirrorAppendOnlyNewEof - entry->mirrorAppendOnlyLossEof) / BLCKSZ; } status = FileRepResync_UpdateEntry(entry); } } if (request != NULL) { status = FileRepPrimary_ResyncBufferPoolIncrementalWrite(request); request = NULL; } if (status != STATUS_OK) { break; } } return status; }
/* * FileRepPrimary_StartResyncWorker() */ void FileRepPrimary_StartResyncWorker(void) { int status = STATUS_OK; FileRep_InsertConfigLogEntry("start resync worker"); Insist(fileRepRole == FileRepPrimaryRole); while (1) { if (status != STATUS_OK) { FileRep_SetSegmentState(SegmentStateFault, FaultTypeMirror); FileRepSubProcess_SetState(FileRepStateFault); } while (FileRepSubProcess_GetState() != FileRepStateShutdown && FileRepSubProcess_GetState() != FileRepStateShutdownBackends && ! (FileRepSubProcess_GetState() == FileRepStateReady && dataState == DataStateInResync)) { FileRepSubProcess_ProcessSignals(); pg_usleep(50000L); /* 50 ms */ } if (FileRepSubProcess_GetState() == FileRepStateShutdown || FileRepSubProcess_GetState() == FileRepStateShutdownBackends) { break; } status = FileRepPrimary_RunResyncWorker(); if (status != STATUS_OK) { continue; } break; } // while(1) }
/* * * FileRepPrimary_RunHeartBeat() * * */ static void FileRepPrimary_RunHeartBeat(void) { int retry = 0; Insist(fileRepRole == FileRepPrimaryRole); Insist(dataState == DataStateInSync || dataState == DataStateInResync); while (1) { FileRepSubProcess_ProcessSignals(); while (FileRepSubProcess_GetState() == FileRepStateFault || (fileRepShmemArray[0]->state == FileRepStateNotInitialized && FileRepSubProcess_GetState() != FileRepStateShutdownBackends && FileRepSubProcess_GetState() != FileRepStateShutdown)) { FileRepSubProcess_ProcessSignals(); pg_usleep(50000L); /* 50 ms */ } if (FileRepSubProcess_GetState() == FileRepStateShutdown || FileRepSubProcess_GetState() == FileRepStateShutdownBackends) { break; } /* verify if flow from primary to mirror and back is alive once per minute */ pg_usleep(50000L); /* 50 ms */ if (FileRepSubProcess_ProcessSignals() == true || FileRepSubProcess_GetState() == FileRepStateFault) { continue; } retry++; if (retry == 1200) /* 1200 * 50 ms = 60 sec */ { FileRepPrimary_MirrorHeartBeat(FileRepMessageTypeXLog); continue; } if (retry == 1201) /* 1200 * 50 ms = 60 sec */ { FileRepPrimary_MirrorHeartBeat(FileRepMessageTypeWriter); continue; } if (retry == 1202) /* 1200 * 50 ms = 60 sec */ { FileRepPrimary_MirrorHeartBeat(FileRepMessageTypeAO01); retry = 0; } } // while(1) }
/* * * FileRepPrimary_StartRecoveryInSync() * * */ static void FileRepPrimary_StartRecoveryInSync(void) { int status = STATUS_OK; FileRep_InsertConfigLogEntry("run recovery"); while (1) { if (status != STATUS_OK) { FileRep_SetSegmentState(SegmentStateFault, FaultTypeMirror); FileRepSubProcess_SetState(FileRepStateFault); } while (FileRepSubProcess_GetState() == FileRepStateFault || (fileRepShmemArray[0]->state == FileRepStateNotInitialized && FileRepSubProcess_GetState() != FileRepStateShutdownBackends && FileRepSubProcess_GetState() != FileRepStateShutdown)) { FileRepSubProcess_ProcessSignals(); pg_usleep(50000L); /* 50 ms */ } if (FileRepSubProcess_GetState() == FileRepStateShutdown || FileRepSubProcess_GetState() == FileRepStateShutdownBackends) { break; } if (FileRepSubProcess_GetState() == FileRepStateReady) { break; } Insist(fileRepRole == FileRepPrimaryRole); Insist(dataState == DataStateInSync); status = FileRepPrimary_RunRecoveryInSync(); } // while(1) }
static int FileRepPrimary_ResyncBufferPoolIncrementalWrite(ChangeTrackingRequest *request) { int status = STATUS_OK; Page page; Buffer buf; BlockNumber numBlocks = 0; SMgrRelation smgr_relation = NULL; char relidstr[OIDCHARS + 1 + OIDCHARS + 1 + OIDCHARS + 1]; int ii; XLogRecPtr loc; XLogRecPtr loc1; int count = 0; int thresholdCount = 0; bool mirrorDataLossOccurred = FALSE; int NumberOfRelations = request->count; FileRepResyncHashEntry_s entry; ChangeTrackingResult *result = NULL; while (1) { /* allow flushing buffers from buffer pool during scan */ FileRepResync_SetReadBufferRequest(); if ((result = ChangeTracking_GetChanges(request)) != NULL) { FileRepResync_ResetReadBufferRequest(); for (ii = 0; ii < result->count; ii++) { if (smgr_relation == NULL) { NumberOfRelations--; smgr_relation = smgropen(result->entries[ii].relFileNode); snprintf(relidstr, sizeof(relidstr), "%u/%u/%u", smgr_relation->smgr_rnode.spcNode, smgr_relation->smgr_rnode.dbNode, smgr_relation->smgr_rnode.relNode); numBlocks = smgrnblocks(smgr_relation); if (Debug_filerep_print) elog(LOG, "resynchronize buffer pool relation '%u/%u/%u' " "number of blocks:'%u' ", smgr_relation->smgr_rnode.spcNode, smgr_relation->smgr_rnode.dbNode, smgr_relation->smgr_rnode.relNode, numBlocks); thresholdCount = Min(numBlocks, 1024); } loc1 = result->entries[ii].lsn_end; /* * if relation was truncated then block_num from change tracking can be beyond numBlocks */ if (result->entries[ii].block_num >= numBlocks) { ereport(LOG, (errmsg("could not resynchonize buffer pool relation '%s' block '%d' (maybe due to truncate), " "lsn change tracking '%s(%u/%u)' " "number of blocks '%d' ", relidstr, result->entries[ii].block_num, XLogLocationToString(&loc1), loc1.xlogid, loc1.xrecoff, numBlocks), FileRep_errcontext())); goto flush_check; } /* allow flushing buffers from buffer pool during scan */ FileRepResync_SetReadBufferRequest(); buf = ReadBuffer_Resync(smgr_relation, result->entries[ii].block_num, relidstr); FileRepResync_ResetReadBufferRequest(); Assert(result->entries[ii].block_num < numBlocks); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); page = BufferGetPage(buf); loc = PageGetLSN(page); if(Debug_filerep_print) { elog(LOG, "incremental resync buffer pool identifier '%s' num blocks '%d' blkno '%d' lsn page '%s(%u/%u)' " "lsn end change tracking '%s(%u/%u)' ", relidstr, numBlocks, result->entries[ii].block_num, XLogLocationToString(&loc), loc.xlogid, loc.xrecoff, XLogLocationToString(&loc1), result->entries[ii].lsn_end.xlogid, result->entries[ii].lsn_end.xrecoff); } else { char tmpBuf[FILEREP_MAX_LOG_DESCRIPTION_LEN]; snprintf(tmpBuf, sizeof(tmpBuf), "incremental resync buffer pool identifier '%s' num blocks '%d' blkno '%d' lsn page '%s(%u/%u)' ", relidstr, numBlocks, result->entries[ii].block_num, XLogLocationToString(&loc), loc.xlogid, loc.xrecoff); FileRep_InsertConfigLogEntry(tmpBuf); snprintf(tmpBuf, sizeof(tmpBuf), "incremental resync buffer pool identifier '%s' lsn end change tracking '%s(%u/%u)' ", relidstr, XLogLocationToString(&loc1), result->entries[ii].lsn_end.xlogid, result->entries[ii].lsn_end.xrecoff); FileRep_InsertConfigLogEntry(tmpBuf); } if (XLByteLE(result->entries[ii].lsn_end, PageGetLSN(page))) { if (! XLByteEQ(PageGetLSN(page), result->entries[ii].lsn_end)) { ereport(LOG, (errmsg("Resynchonize buffer pool relation '%s' block '%d' has page lsn less than CT lsn, " "lsn end change tracking '%s(%u/%u)' lsn page '%s(%u/%u)' " "number of blocks '%d'", relidstr, result->entries[ii].block_num, XLogLocationToString(&loc), loc.xlogid, loc.xrecoff, XLogLocationToString(&loc1), loc1.xlogid, loc1.xrecoff, numBlocks), FileRep_errcontext())); } /* * It's safe and better to perform write of the page to mirror, * for this case, as primary and mirror data pages should always * be same. So, we might do some extra work but definitely won't * loose out blocks, or error out and need to perform full recovery. * Need to cover for this case as there are some known scenarios where * CT file can have extra records which should have been discarded, * but as we loose out information of xlog LSN cannot be discarded. * One such case is when CT_TRANSIENT being compacted to CT_COMPACT * with specific xlog LSN (to discard extra records) in CT mode gets * interrupted by resync. Compaction during Resync collects all the * CT records and doesn't have xlog LSN information to discard any * extra records from CT_TRANSIENT. */ smgrwrite(smgr_relation, result->entries[ii].block_num, (char *)BufferGetBlock(buf), FALSE); } #ifdef FAULT_INJECTOR FaultInjector_InjectFaultIfSet( FileRepResyncWorker, DDLNotSpecified, "", // databaseName ""); // tableName #endif UnlockReleaseBuffer(buf); #ifdef FAULT_INJECTOR FaultInjector_InjectFaultIfSet( FileRepResyncWorker, DDLNotSpecified, "", // databaseName ""); // tableName #endif flush_check: if (((ii + 1) == result->count) || ! (result->entries[ii].relFileNode.spcNode == result->entries[ii+1].relFileNode.spcNode && result->entries[ii].relFileNode.dbNode == result->entries[ii+1].relFileNode.dbNode && result->entries[ii].relFileNode.relNode == result->entries[ii+1].relFileNode.relNode)) { if (result->ask_for_more == false) { smgrimmedsync(smgr_relation); smgrclose(smgr_relation); smgr_relation = NULL; FileRep_GetRelationPath( entry.fileName, result->entries[ii].relFileNode, 0 /* segment file number is always 0 for Buffer Pool */); status = FileRepResync_UpdateEntry(&entry); if (status != STATUS_OK) { break; } } } if (count > thresholdCount) { count = 0; FileRepSubProcess_ProcessSignals(); if (! (FileRepSubProcess_GetState() == FileRepStateReady && dataState == DataStateInResync)) { mirrorDataLossOccurred = TRUE; break; } } else count++; } // for (ii = 0; ii < result->count; ii++) } // if ((result = ChangeTracking_GetChanges(request)) != NULL) FileRepResync_ResetReadBufferRequest(); if (result != NULL && result->ask_for_more == true) { Assert(request->count == 1); request->entries[0].lsn_start = result->next_start_lsn; } else { break; } } // while(1) ChangeTracking_FreeRequest(request); ChangeTracking_FreeResult(result); Insist(NumberOfRelations == 0); if (mirrorDataLossOccurred) status = STATUS_ERROR; return status; }
static int FileRepPrimary_ResyncWrite(FileRepResyncHashEntry_s *entry) { int status = STATUS_OK; Page page; Buffer buf; BlockNumber numBlocks; BlockNumber blkno; SMgrRelation smgr_relation; char relidstr[OIDCHARS + 1 + OIDCHARS + 1 + OIDCHARS + 1]; XLogRecPtr loc; int count = 0; int thresholdCount = 0; bool mirrorDataLossOccurred = FALSE; switch (entry->relStorageMgr) { case PersistentFileSysRelStorageMgr_BufferPool: switch (entry->mirrorDataSynchronizationState) { case MirroredRelDataSynchronizationState_BufferPoolScanIncremental: case MirroredRelDataSynchronizationState_FullCopy: smgr_relation = smgropen(entry->relFileNode); numBlocks = smgrnblocks(smgr_relation); snprintf(relidstr, sizeof(relidstr), "%u/%u/%u", smgr_relation->smgr_rnode.spcNode, smgr_relation->smgr_rnode.dbNode, smgr_relation->smgr_rnode.relNode); if (Debug_filerep_print) elog(LOG, "resync buffer pool relation '%s' number of blocks '%d' ", relidstr, numBlocks); thresholdCount = Min(numBlocks, 1024); /* * required in order to report how many blocks were synchronized * if gp_persistent_relation_node does not return that information */ if (entry->mirrorBufpoolResyncChangedPageCount == 0) { entry->mirrorBufpoolResyncChangedPageCount = numBlocks - entry->mirrorBufpoolResyncCkptBlockNum; } for (blkno = entry->mirrorBufpoolResyncCkptBlockNum; blkno < numBlocks; blkno++) { XLogRecPtr endResyncLSN = (isFullResync() ? FileRepResync_GetEndFullResyncLSN() : FileRepResync_GetEndIncrResyncLSN()); #ifdef FAULT_INJECTOR FaultInjector_InjectFaultIfSet( FileRepResyncWorkerRead, DDLNotSpecified, "", //databaseName ""); // tableName #endif FileRepResync_SetReadBufferRequest(); buf = ReadBuffer_Resync(smgr_relation, blkno, relidstr); FileRepResync_ResetReadBufferRequest(); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); page = BufferGetPage(buf); loc = PageGetLSN(page); if (Debug_filerep_print) { elog(LOG, "full resync buffer pool identifier '%s' num blocks '%d' blkno '%d' lsn begin change tracking '%s(%u/%u)' " "lsn page '%s(%u/%u)' lsn end change tracking '%s(%u/%u)' ", relidstr, numBlocks, blkno, XLogLocationToString(&entry->mirrorBufpoolResyncCkptLoc), entry->mirrorBufpoolResyncCkptLoc.xlogid, entry->mirrorBufpoolResyncCkptLoc.xrecoff, XLogLocationToString(&loc), loc.xlogid, loc.xrecoff, XLogLocationToString(&endResyncLSN), endResyncLSN.xlogid, endResyncLSN.xrecoff); } else { char tmpBuf[FILEREP_MAX_LOG_DESCRIPTION_LEN]; snprintf(tmpBuf, sizeof(tmpBuf), "full resync buffer pool identifier '%s' num blocks '%d' blkno '%d' lsn begin change tracking '%s(%u/%u)' ", relidstr, numBlocks, blkno, XLogLocationToString(&entry->mirrorBufpoolResyncCkptLoc), entry->mirrorBufpoolResyncCkptLoc.xlogid, entry->mirrorBufpoolResyncCkptLoc.xrecoff); FileRep_InsertConfigLogEntry(tmpBuf); snprintf(tmpBuf, sizeof(tmpBuf), "full resync buffer pool identifier '%s' lsn page '%s(%u/%u)' lsn end change tracking '%s(%u/%u)' ", relidstr, XLogLocationToString(&loc), loc.xlogid, loc.xrecoff, XLogLocationToString(&endResyncLSN), endResyncLSN.xlogid, endResyncLSN.xrecoff); FileRep_InsertConfigLogEntry(tmpBuf); } if (XLByteLE(PageGetLSN(page), endResyncLSN) && XLByteLE(entry->mirrorBufpoolResyncCkptLoc, PageGetLSN(page))) { smgrwrite(smgr_relation, blkno, (char *)BufferGetBlock(buf), FALSE); } #ifdef FAULT_INJECTOR FaultInjector_InjectFaultIfSet( FileRepResyncWorker, DDLNotSpecified, "", // databaseName ""); // tableName #endif UnlockReleaseBuffer(buf); if (count > thresholdCount) { count = 0; FileRepSubProcess_ProcessSignals(); if (! (FileRepSubProcess_GetState() == FileRepStateReady && dataState == DataStateInResync)) { mirrorDataLossOccurred = TRUE; break; } } else count++; } if (mirrorDataLossOccurred) break; if (entry->mirrorDataSynchronizationState != MirroredRelDataSynchronizationState_FullCopy) { LockRelationForResyncExtension(&smgr_relation->smgr_rnode, ExclusiveLock); numBlocks = smgrnblocks(smgr_relation); smgrtruncate(smgr_relation, numBlocks, TRUE /* isTemp, TRUE means to not record in XLOG */, FALSE /* isLocalBuf */, &entry->persistentTid, entry->persistentSerialNum); UnlockRelationForResyncExtension(&smgr_relation->smgr_rnode, ExclusiveLock); } smgrimmedsync(smgr_relation); smgrclose(smgr_relation); smgr_relation = NULL; break; case MirroredRelDataSynchronizationState_None: case MirroredRelDataSynchronizationState_DataSynchronized: break; default: ereport(LOG, (errmsg("could not resynchronize relation '%u/%u/%u' " "mirror synchronization state:'%s(%d)' ", entry->relFileNode.relNode, entry->relFileNode.spcNode, entry->relFileNode.dbNode, MirroredRelDataSynchronizationState_Name(entry->mirrorDataSynchronizationState), entry->mirrorDataSynchronizationState))); break; } break; case PersistentFileSysRelStorageMgr_AppendOnly: { MirroredAppendOnlyOpen mirroredOpen; int primaryError; bool mirrorDataLossOccurred; char *buffer = NULL; int64 endOffset = entry->mirrorAppendOnlyNewEof; int64 startOffset = entry->mirrorAppendOnlyLossEof; int32 bufferLen = 0; int retval = 0; switch (entry->mirrorDataSynchronizationState) { case MirroredRelDataSynchronizationState_AppendOnlyCatchup: case MirroredRelDataSynchronizationState_FullCopy: /* * required in order to report how many blocks were synchronized * if gp_persistent_relation_node does not return that information */ if (entry->mirrorBufpoolResyncChangedPageCount == 0) { entry->mirrorBufpoolResyncChangedPageCount = (endOffset - startOffset) / BLCKSZ; } /* * The MirroredAppendOnly_OpenResynchonize routine knows we are a resynch worker and * will open BOTH, but write only the MIRROR!!! */ MirroredAppendOnly_OpenResynchonize( &mirroredOpen, &entry->relFileNode, entry->segmentFileNum, startOffset, &primaryError, &mirrorDataLossOccurred); if (primaryError != 0) { ereport(ERROR, (errcode_for_file_access(), errmsg("could not open file %u/%u/%u.%u : %s", entry->relFileNode.dbNode, entry->relFileNode.spcNode, entry->relFileNode.relNode, entry->segmentFileNum, strerror(primaryError)))); break; } if (mirrorDataLossOccurred) break; /* AO and CO Data Store writes 64k size by default */ bufferLen = (Size) Min(2*BLCKSZ, endOffset - startOffset); buffer = (char*) palloc(bufferLen); if (buffer == NULL) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), (errmsg("not enough memory for resynchronization")))); MemSet(buffer, 0, bufferLen); while (startOffset < endOffset) { retval = MirroredAppendOnly_Read( &mirroredOpen, buffer, bufferLen); if (retval != bufferLen) { ereport(ERROR, (errcode_for_file_access(), errmsg("could not read from position:" INT64_FORMAT " in file %u/%u/%u.%u : %m", startOffset, entry->relFileNode.dbNode, entry->relFileNode.spcNode, entry->relFileNode.relNode, entry->segmentFileNum))); break; } MirroredAppendOnly_Append( &mirroredOpen, buffer, bufferLen, &primaryError, &mirrorDataLossOccurred); if (mirrorDataLossOccurred) break; Assert(primaryError == 0); // No primary writes as resync worker. startOffset += bufferLen; /* AO and CO Data Store writes 64k size by default */ bufferLen = (Size) Min(2*BLCKSZ, endOffset - startOffset); } if (buffer) { pfree(buffer); buffer = NULL; } if (mirrorDataLossOccurred) break; /* Flush written data on Mirror */ MirroredAppendOnly_Flush( &mirroredOpen, &primaryError, &mirrorDataLossOccurred); if (mirrorDataLossOccurred) break; Assert(primaryError == 0); // Not flushed on primary as resync worker. /* Close Primary and Mirror */ MirroredAppendOnly_Close( &mirroredOpen, &mirrorDataLossOccurred); break; case MirroredRelDataSynchronizationState_None: case MirroredRelDataSynchronizationState_DataSynchronized: break; default: ereport(LOG, (errmsg("could not resynchronize relation '%u/%u/%u' " "mirror synchronization state:'%s(%d)' ", entry->relFileNode.relNode, entry->relFileNode.spcNode, entry->relFileNode.dbNode, MirroredRelDataSynchronizationState_Name(entry->mirrorDataSynchronizationState), entry->mirrorDataSynchronizationState))); break; } break; } //case default: Assert(0); break; } //switch if (mirrorDataLossOccurred) status = STATUS_ERROR; return status; }
static int FileRepAckPrimary_RunReceiver(void) { uint32_t msgLength = 0; FileRepConsumerProcIndex_e msgType; int status = STATUS_OK; char *msgPositionInsert; FileRepShmemMessageDescr_s *fileRepShmemMessageDescr; uint32 spareField; FileRep_InsertConfigLogEntry("run receiver"); while (1) { FileRepSubProcess_ProcessSignals(); if (FileRepSubProcess_GetState() != FileRepStateReady && FileRepSubProcess_GetState() != FileRepStateInitialization) { break; } if ( ! FileRepConnServer_AwaitMessageBegin()) { /* call was interrupted ... go back to beginning to process signals */ continue; } status = FileRepConnServer_ReceiveMessageType(&msgType); if (status != STATUS_OK) { break; } /* DATA MESSAGE TYPE */ status = FileRepConnServer_ReceiveMessageLength(&msgLength); if (status != STATUS_OK) { break; } msgPositionInsert = FileRep_ReserveShmem(fileRepAckShmemArray[msgType], msgLength, /* not used */ &spareField, FileRepOperationNotSpecified, FileRepAckShmemLock); if (msgPositionInsert == NULL) { status = STATUS_ERROR; ereport(WARNING, (errmsg("mirror failure, " "could not queue received ack message to be processed, " "failover requested"), errhint("run gprecoverseg to re-establish mirror connectivity"), FileRep_errdetail_Shmem(), FileRep_errdetail_ShmemAck(), FileRep_errcontext())); break; } status = FileRepConnServer_ReceiveMessageData( msgPositionInsert + sizeof(FileRepShmemMessageDescr_s), msgLength); if (status != STATUS_OK) { break; } SIMPLE_FAULT_INJECTOR(FileRepReceiver); fileRepShmemMessageDescr = (FileRepShmemMessageDescr_s*) msgPositionInsert; /* it is not in use */ fileRepShmemMessageDescr->messageSync = FALSE; fileRepShmemMessageDescr->messageState = FileRepShmemMessageStateReady; LWLockAcquire(FileRepAckShmemLock, LW_EXCLUSIVE); FileRep_IpcSignal(fileRepIpcArray[fileRepAckShmemArray[msgType]->ipcArrayIndex]->semC, &fileRepIpcArray[fileRepAckShmemArray[msgType]->ipcArrayIndex]->refCountSemC); LWLockRelease(FileRepAckShmemLock); FileRep_InsertLogEntry( "P_RunReceiver", FileRep_GetFlatFileIdentifier("", ""), FileRepRelationTypeNotSpecified, FileRepOperationNotSpecified, FILEREP_UNDEFINED, FILEREP_UNDEFINED, FileRepAckStateNotInitialized, spareField, FILEREP_UNDEFINED); } // while(1) FileRepConnServer_CloseConnection(); return status; }
/* * * FileRepPrimary_StartRecoveryInChangeTracking() * */ static void FileRepPrimary_StartRecoveryInChangeTracking(void) { FileRep_InsertConfigLogEntry("run recovery"); while (1) { while (FileRepSubProcess_GetState() == FileRepStateFault) { FileRepSubProcess_ProcessSignals(); pg_usleep(50000L); /* 50 ms */ } if (FileRepSubProcess_GetState() == FileRepStateShutdown || FileRepSubProcess_GetState() == FileRepStateShutdownBackends) { break; } Insist(fileRepRole == FileRepPrimaryRole); Insist(dataState == DataStateInChangeTracking); Insist(FileRepSubProcess_GetState() != FileRepStateReady); if (ChangeTracking_RetrieveIsTransitionToInsync()) { ChangeTracking_DropAll(); } else { if (ChangeTracking_RetrieveIsTransitionToResync() == FALSE && isFullResync()) { ChangeTracking_MarkFullResync(); /* segmentState == SegmentStateChangeTrackingDisabled */ getFileRepRoleAndState(&fileRepRole, &segmentState, &dataState, NULL, NULL); Assert(segmentState == SegmentStateChangeTrackingDisabled); /* database is resumed */ primaryMirrorSetIOSuspended(FALSE); FileRep_InsertConfigLogEntry("change tracking recovery completed"); break; } else { ChangeTracking_MarkIncrResync(); } } XLogInChangeTrackingTransition(); /* NOTE: Any error during change tracking will result in disabling Change Tracking */ FileRepSubProcess_SetState(FileRepStateReady); /* database is resumed */ primaryMirrorSetIOSuspended(FALSE); FileRep_InsertConfigLogEntry("change tracking recovery completed"); break; } // while(1) }
/* * * FileRepPrimary_RunRecoveryInSync() * * 1) Recover Flat Files * a) pg_control file * b) pg_database file * c) pg_auth file * d) pg_twophase directory * e) Slru directories * *) pg_clog * *) pg_multixact * *) pg_distributedlog * *) pg_distributedxidmap * *) pg_subtrans * * 2) Reconcile xlog EOF * */ static int FileRepPrimary_RunRecoveryInSync(void) { int status = STATUS_OK; FileRep_InsertConfigLogEntry("run recovery of flat files"); while (1) { status = XLogRecoverMirrorControlFile(); if (status != STATUS_OK) { break; } FileRepSubProcess_ProcessSignals(); if (FileRepSubProcess_GetState() != FileRepStateInitialization) { break; } status = XLogReconcileEofPrimary(); if (status != STATUS_OK) { break; } FileRepSubProcess_ProcessSignals(); if (FileRepSubProcess_GetState() != FileRepStateInitialization) { break; } MirroredFlatFile_DropTemporaryFiles(); FileRepSubProcess_ProcessSignals(); if (FileRepSubProcess_GetState() != FileRepStateInitialization) { break; } MirroredFlatFile_MirrorDropTemporaryFiles(); FileRepSubProcess_ProcessSignals(); if (FileRepSubProcess_GetState() != FileRepStateInitialization) { break; } status = FlatFilesRecoverMirror(); if (status != STATUS_OK) { break; } FileRepSubProcess_ProcessSignals(); if (FileRepSubProcess_GetState() != FileRepStateInitialization) { break; } status = TwoPhaseRecoverMirror(); if (status != STATUS_OK) { break; } FileRepSubProcess_ProcessSignals(); if (FileRepSubProcess_GetState() != FileRepStateInitialization) { break; } status = SlruRecoverMirror(); if (status != STATUS_OK) { break; } FileRepSubProcess_ProcessSignals(); if (FileRepSubProcess_GetState() != FileRepStateInitialization) { break; } FileRepSubProcess_SetState(FileRepStateReady); break; } return status; }
/* * SenderLoop * */ static int FileRepAckMirror_RunSender(void) { FileRepShmemMessageDescr_s *fileRepShmemMessageDescr=NULL; char *fileRepMessage; int status = STATUS_OK; bool movePositionConsume = FALSE; FileRepConsumerProcIndex_e messageType; FileRepMessageHeader_s *fileRepMessageHeader; FileRepShmem_s *fileRepAckShmem = NULL; FileRep_InsertConfigLogEntry("run sender ack"); fileRepAckShmem = fileRepAckShmemArray[FILEREP_OUTGOING_MESSAGE_QUEUE]; while (1) { LWLockAcquire(FileRepAckShmemLock, LW_EXCLUSIVE); if (movePositionConsume) { fileRepAckShmem->positionConsume = fileRepAckShmem->positionConsume + fileRepShmemMessageDescr->messageLength + sizeof(FileRepShmemMessageDescr_s); if (fileRepAckShmem->positionConsume == fileRepAckShmem->positionWraparound && fileRepAckShmem->positionInsert != fileRepAckShmem->positionWraparound) { fileRepAckShmem->positionConsume = fileRepAckShmem->positionBegin; fileRepAckShmem->positionWraparound = fileRepAckShmem->positionEnd; } FileRep_IpcSignal(fileRepIpcArray[fileRepAckShmem->ipcArrayIndex]->semP, &fileRepIpcArray[fileRepAckShmem->ipcArrayIndex]->refCountSemP); } fileRepShmemMessageDescr = (FileRepShmemMessageDescr_s*) fileRepAckShmem->positionConsume; while ((fileRepAckShmem->positionConsume == fileRepAckShmem->positionInsert) || ((fileRepAckShmem->positionConsume != fileRepAckShmem->positionInsert) && (fileRepShmemMessageDescr->messageState != FileRepShmemMessageStateReady))) { fileRepIpcArray[fileRepAckShmem->ipcArrayIndex]->refCountSemC++; LWLockRelease(FileRepAckShmemLock); FileRepSubProcess_ProcessSignals(); if (FileRepSubProcess_GetState() != FileRepStateReady) { LWLockAcquire(FileRepAckShmemLock, LW_EXCLUSIVE); break; } FileRep_IpcWait(fileRepIpcArray[fileRepAckShmem->ipcArrayIndex]->semC, &fileRepIpcArray[fileRepAckShmem->ipcArrayIndex]->refCountSemC, FileRepAckShmemLock); LWLockAcquire(FileRepAckShmemLock, LW_EXCLUSIVE); if (fileRepAckShmem->positionConsume == fileRepAckShmem->positionWraparound && fileRepAckShmem->positionInsert != fileRepAckShmem->positionWraparound) { fileRepAckShmem->positionConsume = fileRepAckShmem->positionBegin; fileRepAckShmem->positionWraparound = fileRepAckShmem->positionEnd; } /* Re-assign to find if messageState is changed */ fileRepShmemMessageDescr = (FileRepShmemMessageDescr_s*) fileRepAckShmem->positionConsume; } // while internal fileRepAckShmem->consumeCount++; LWLockRelease(FileRepAckShmemLock); FileRepSubProcess_ProcessSignals(); if (FileRepSubProcess_GetState() != FileRepStateReady) { break; } FileRep_InsertLogEntry( "M_RunSenderAck", FileRep_GetFlatFileIdentifier("", ""), FileRepRelationTypeNotSpecified, FileRepOperationNotSpecified, FILEREP_UNDEFINED, FILEREP_UNDEFINED, FileRepAckStateNotInitialized, FILEREP_UNDEFINED, FILEREP_UNDEFINED); #ifdef FAULT_INJECTOR FaultInjector_InjectFaultIfSet( FileRepSender, DDLNotSpecified, "", //databaseName ""); // tableName #endif fileRepMessage = (char*) (fileRepAckShmem->positionConsume + sizeof(FileRepShmemMessageDescr_s)); fileRepMessageHeader = (FileRepMessageHeader_s*) (fileRepAckShmem->positionConsume + sizeof(FileRepShmemMessageDescr_s)); messageType = FileRepMessageTypeXLog; if (! FileRepConnClient_SendMessage( messageType, fileRepShmemMessageDescr->messageSync, fileRepMessage, fileRepShmemMessageDescr->messageLength)) { ereport(WARNING, (errcode_for_socket_access(), errmsg("mirror failure, " "could not sent ack message to primary : %m, " "failover requested"), errhint("run gprecoverseg to re-establish mirror connectivity"), FileRep_errdetail_ShmemAck(), FileRep_errcontext())); status = STATUS_ERROR; break; } movePositionConsume = TRUE; } // while(1) FileRepConnClient_CloseConnection(); return status; }
/* * * FileRepPrimary_StartSender */ void FileRepAckMirror_StartSender(void) { int status = STATUS_OK; int retry = 0; struct timeval currentTime; pg_time_t beginTime = 0; pg_time_t endTime = 0; FileRep_InsertConfigLogEntry("start sender ack"); while (1) { if (status != STATUS_OK) { FileRep_SetSegmentState(SegmentStateFault, FaultTypeMirror); FileRepSubProcess_SetState(FileRepStateFault); } while (FileRepSubProcess_GetState() == FileRepStateInitialization || FileRepSubProcess_GetState() == FileRepStateFault || (fileRepShmemArray[0]->state == FileRepStateNotInitialized && FileRepSubProcess_GetState() != FileRepStateShutdown )) { FileRepSubProcess_ProcessSignals(); pg_usleep(50000L); /* 50 ms */ } if (FileRepSubProcess_GetState() == FileRepStateShutdown) { break; } { char tmpBuf[FILEREP_MAX_LOG_DESCRIPTION_LEN]; snprintf(tmpBuf, sizeof(tmpBuf), "primary address(port) '%s(%d)' mirror address(port) '%s(%d)' ", fileRepPrimaryHostAddress, fileRepPrimaryPort, fileRepMirrorHostAddress, fileRepMirrorPort); FileRep_InsertConfigLogEntry(tmpBuf); } Insist(fileRepRole == FileRepMirrorRole); status = FileRepConnClient_EstablishConnection( fileRepPrimaryHostAddress, fileRepPrimaryPort, FALSE /* reportError */); if (status != STATUS_OK) { gettimeofday(¤tTime, NULL); beginTime = (pg_time_t) currentTime.tv_sec; } while (status != STATUS_OK && FileRep_IsRetry(retry) && (endTime - beginTime) < gp_segment_connect_timeout) { FileRep_Sleep10ms(retry); FileRep_IncrementRetry(retry); gettimeofday(¤tTime, NULL); endTime = (pg_time_t) currentTime.tv_sec; status = FileRepConnClient_EstablishConnection( fileRepPrimaryHostAddress, fileRepPrimaryPort, (retry == file_rep_retry && file_rep_retry != 0) || ((endTime - beginTime) > gp_segment_connect_timeout) ? TRUE : FALSE); if (FileRepSubProcess_IsStateTransitionRequested()) { break; } } if (status != STATUS_OK) { continue; } FileRep_SetFileRepRetry(); status = FileRepAckMirror_RunSender(); } // while(1) FileRepConnClient_CloseConnection(); return; }
/* * FileRepAckPrimary_RunConsumer() */ static int FileRepAckPrimary_RunConsumer(void) { FileRepShmemMessageDescr_s *fileRepShmemMessageDescr = NULL; FileRepMessageHeader_s *fileRepMessageHeader = NULL; pg_crc32 *fileRepMessageHeaderCrc; pg_crc32 messageHeaderCrcLocal = 0; int status = STATUS_OK; bool movePositionConsume = FALSE; FileRepShmem_s *fileRepAckShmem = NULL; FileRep_InsertConfigLogEntry("run consumer"); fileRepAckShmem = fileRepAckShmemArray[FILEREP_ACKSHMEM_MESSAGE_SLOT_PRIMARY_ACK]; while (1) { LWLockAcquire(FileRepAckShmemLock, LW_EXCLUSIVE); if (movePositionConsume) { fileRepAckShmem->positionConsume = fileRepAckShmem->positionConsume + fileRepShmemMessageDescr->messageLength + sizeof(FileRepShmemMessageDescr_s); if (fileRepAckShmem->positionConsume == fileRepAckShmem->positionWraparound && fileRepAckShmem->positionInsert != fileRepAckShmem->positionWraparound) { fileRepAckShmem->positionConsume = fileRepAckShmem->positionBegin; fileRepAckShmem->positionWraparound = fileRepAckShmem->positionEnd; } FileRep_IpcSignal(fileRepIpcArray[fileRepAckShmem->ipcArrayIndex]->semP, &fileRepIpcArray[fileRepAckShmem->ipcArrayIndex]->refCountSemP); } fileRepShmemMessageDescr = (FileRepShmemMessageDescr_s*) fileRepAckShmem->positionConsume; while ((fileRepAckShmem->positionConsume == fileRepAckShmem->positionInsert) || ((fileRepAckShmem->positionConsume != fileRepAckShmem->positionInsert) && (fileRepShmemMessageDescr->messageState != FileRepShmemMessageStateReady))) { fileRepIpcArray[fileRepAckShmem->ipcArrayIndex]->refCountSemC++; LWLockRelease(FileRepAckShmemLock); FileRepSubProcess_ProcessSignals(); if (FileRepSubProcess_GetState() != FileRepStateReady && FileRepSubProcess_GetState() != FileRepStateInitialization) { LWLockAcquire(FileRepAckShmemLock, LW_EXCLUSIVE); break; } FileRep_IpcWait(fileRepIpcArray[fileRepAckShmem->ipcArrayIndex]->semC, &fileRepIpcArray[fileRepAckShmem->ipcArrayIndex]->refCountSemC, FileRepAckShmemLock); LWLockAcquire(FileRepAckShmemLock, LW_EXCLUSIVE); if (fileRepAckShmem->positionConsume == fileRepAckShmem->positionWraparound && fileRepAckShmem->positionInsert != fileRepAckShmem->positionWraparound) { fileRepAckShmem->positionConsume = fileRepAckShmem->positionBegin; fileRepAckShmem->positionWraparound = fileRepAckShmem->positionEnd; } /* Re-assign to find if messageState is changed */ fileRepShmemMessageDescr = (FileRepShmemMessageDescr_s*) fileRepAckShmem->positionConsume; } // internal while fileRepAckShmem->consumeCount++; LWLockRelease(FileRepAckShmemLock); FileRepSubProcess_ProcessSignals(); if (FileRepSubProcess_GetState() != FileRepStateReady && FileRepSubProcess_GetState() != FileRepStateInitialization) { break; } SIMPLE_FAULT_INJECTOR(FileRepConsumer); /* Calculate and compare FileRepMessageHeader_s Crc */ fileRepMessageHeader = (FileRepMessageHeader_s*) (fileRepAckShmem->positionConsume + sizeof(FileRepShmemMessageDescr_s)); FileRep_CalculateCrc((char *) fileRepMessageHeader, sizeof(FileRepMessageHeader_s), &messageHeaderCrcLocal); fileRepMessageHeaderCrc = (pg_crc32 *) (fileRepAckShmem->positionConsume + sizeof(FileRepMessageHeader_s) + sizeof(FileRepShmemMessageDescr_s)); if (*fileRepMessageHeaderCrc != messageHeaderCrcLocal) { status = STATUS_ERROR; ereport(WARNING, (errmsg("mirror failure, " "could not match ack message header checksum between primary '%u' and mirror '%u', " "failover requested", *fileRepMessageHeaderCrc, messageHeaderCrcLocal), errhint("run gprecoverseg to re-establish mirror connectivity"), FileRep_errdetail(fileRepMessageHeader->fileRepIdentifier, fileRepMessageHeader->fileRepRelationType, fileRepMessageHeader->fileRepOperation, fileRepMessageHeader->messageCount), FileRep_errdetail_ShmemAck(), FileRep_errcontext())); break; } /* Write operation is never acknowledged. * That means message should never have body. * CRC of body should be always 0. */ Assert(fileRepMessageHeader->fileRepOperation != FileRepOperationWrite); Assert(fileRepMessageHeader->fileRepMessageBodyCrc == 0); switch (fileRepMessageHeader->fileRepOperation) { case FileRepOperationReconcileXLogEof: xLogEof = fileRepMessageHeader->fileRepOperationDescription.reconcile.xLogEof; if (Debug_filerep_print) ereport(LOG, (errmsg("ack reconcile xlogid '%d' xrecoff '%d' ", xLogEof.xlogid, xLogEof.xrecoff))); break; case FileRepOperationValidation: mirrorStatus = fileRepMessageHeader->fileRepOperationDescription.validation.mirrorStatus; if (Debug_filerep_print) ereport(LOG, (errmsg("ack validation status '%s' ", FileRepStatusToString[mirrorStatus]))); break; case FileRepOperationCreate: mirrorStatus = fileRepMessageHeader->fileRepOperationDescription.create.mirrorStatus; if (Debug_filerep_print) ereport(LOG, (errmsg("ack create status '%s' ", FileRepStatusToString[mirrorStatus]))); break; case FileRepOperationStartSlruChecksum: mirrorStatus = fileRepMessageHeader->fileRepOperationDescription.startChecksum.mirrorStatus; if (Debug_filerep_print) { ereport(LOG, (errmsg("ack start SLRU checksum: status = '%s', directory = '%s' ", FileRepStatusToString[mirrorStatus], fileRepMessageHeader->fileRepIdentifier.fileRepFlatFileIdentifier.directorySimpleName))); } break; case FileRepOperationVerifySlruDirectoryChecksum: mirrorStatus = fileRepMessageHeader->fileRepOperationDescription.verifyDirectoryChecksum.mirrorStatus; if (Debug_filerep_print) { ereport(LOG, (errmsg("ack verify SLRU directory checksum: status = '%s', directory = '%s' ", FileRepStatusToString[mirrorStatus], fileRepMessageHeader->fileRepIdentifier.fileRepFlatFileIdentifier.directorySimpleName))); } break; default: break; } if (fileRepMessageHeader->fileRepAckState != FileRepAckStateCompleted) { status = STATUS_ERROR; ereport(WARNING, (errmsg("mirror failure, " "could not complete operation on mirror ack state '%s', " "failover requested", FileRepAckStateToString[fileRepMessageHeader->fileRepAckState]), errhint("run gprecoverseg to re-establish mirror connectivity"), errSendAlert(true), FileRep_errdetail(fileRepMessageHeader->fileRepIdentifier, fileRepMessageHeader->fileRepRelationType, fileRepMessageHeader->fileRepOperation, fileRepMessageHeader->messageCount), FileRep_errdetail_Shmem(), FileRep_errdetail_ShmemAck(), FileRep_errcontext())); /* * FAULT has to be set before entry is updated in ack hash table * in order to suspend backend process. */ FileRep_SetSegmentState(SegmentStateFault, FaultTypeMirror); FileRepSubProcess_ProcessSignals(); } if (FileRepAckPrimary_UpdateHashEntry( fileRepMessageHeader->fileRepIdentifier, fileRepMessageHeader->fileRepRelationType, fileRepMessageHeader->fileRepAckState) != STATUS_OK) { status = STATUS_ERROR; ereport(WARNING, (errmsg("mirror failure, " "could not update ack state '%s' in ack hash table, " "failover requested", FileRepAckStateToString[fileRepMessageHeader->fileRepAckState]), errhint("run gprecoverseg to re-establish mirror connectivity"), errSendAlert(true), FileRep_errdetail(fileRepMessageHeader->fileRepIdentifier, fileRepMessageHeader->fileRepRelationType, fileRepMessageHeader->fileRepOperation, fileRepMessageHeader->messageCount), FileRep_errdetail_Shmem(), FileRep_errdetail_ShmemAck(), FileRep_errcontext())); } FileRep_InsertLogEntry( "P_RunConsumer", fileRepMessageHeader->fileRepIdentifier, fileRepMessageHeader->fileRepRelationType, fileRepMessageHeader->fileRepOperation, messageHeaderCrcLocal, fileRepMessageHeader->fileRepMessageBodyCrc, fileRepMessageHeader->fileRepAckState, FILEREP_UNDEFINED, fileRepMessageHeader->messageCount); if (status != STATUS_OK) { break; } movePositionConsume = TRUE; } // while(1) return status; }
/* * * FileRepPrimary_RunChangeTrackingCompacting() * */ static void FileRepPrimary_RunChangeTrackingCompacting(void) { int retry = 0; FileRep_InsertConfigLogEntry("run change tracking compacting if records has to be discarded"); /* * We have to check if any records have to be discarded from Change Tracking log file. * Due to crash it can happen that the highest change tracking log lsn > the highest xlog lsn. * * Records from change tracking log file can be discarded only after database is started. * Full environhment has to be set up in order to run queries over SPI. */ while (FileRepSubProcess_GetState() != FileRepStateShutdown && FileRepSubProcess_GetState() != FileRepStateShutdownBackends && isDatabaseRunning() == FALSE) { FileRepSubProcess_ProcessSignals(); pg_usleep(50000L); /* 50 ms */ } ChangeTracking_DoFullCompactingRoundIfNeeded(); /* * Periodically check if compacting is required. * Periodic compacting is required in order to * a) reduce space for change tracking log file * b) reduce time for transition from Change Tracking to Resync */ FileRep_InsertConfigLogEntry("run change tracking compacting"); while (1) { FileRepSubProcess_ProcessSignals(); while (FileRepSubProcess_GetState() == FileRepStateFault || segmentState == SegmentStateChangeTrackingDisabled) { FileRepSubProcess_ProcessSignals(); pg_usleep(50000L); /* 50 ms */ } if (! (FileRepSubProcess_GetState() == FileRepStateReady && dataState == DataStateInChangeTracking)) { break; } Insist(fileRepRole == FileRepPrimaryRole); Insist(dataState == DataStateInChangeTracking); Insist(FileRepSubProcess_GetState() == FileRepStateReady); /* retry compacting of change tracking log files once per minute */ pg_usleep(50000L); /* 50 ms */ if (++retry == 1200) { ChangeTracking_CompactLogsIfPossible(); retry=0; } } }
void FileRepSubProcess_Main() { const char *statmsg; MemoryContext fileRepSubProcessMemoryContext; sigjmp_buf local_sigjmp_buf; MyProcPid = getpid(); MyStartTime = time(NULL); /* * Create a PGPROC so we can use LWLocks in FileRep sub-processes. The * routine also register clean up at process exit */ InitAuxiliaryProcess(); InitBufferPoolBackend(); FileRepSubProcess_ConfigureSignals(); /* * If an exception is encountered, processing resumes here. * * See notes in postgres.c about the design of this coding. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Prevents interrupts while cleaning up */ HOLD_INTERRUPTS(); /* Report the error to the server log */ EmitErrorReport(); LWLockReleaseAll(); if (FileRepPrimary_IsResyncManagerOrWorker()) { LockReleaseAll(DEFAULT_LOCKMETHOD, false); } if (FileRepIsBackendSubProcess(fileRepProcessType)) { AbortBufferIO(); UnlockBuffers(); /* buffer pins are released here: */ ResourceOwnerRelease(CurrentResourceOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, true); } /* * We can now go away. Note that because we'll call InitProcess, a * callback will be registered to do ProcKill, which will clean up * necessary state. */ proc_exit(0); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; PG_SETMASK(&UnBlockSig); /* * Identify myself via ps */ statmsg = FileRepProcessTypeToString[fileRepProcessType]; init_ps_display(statmsg, "", "", ""); /* Create the memory context where cross-transaction state is stored */ fileRepSubProcessMemoryContext = AllocSetContextCreate(TopMemoryContext, "filerep subprocess memory context", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); MemoryContextSwitchTo(fileRepSubProcessMemoryContext); stateChangeRequestCounter++; FileRepSubProcess_ProcessSignals(); switch (fileRepProcessType) { case FileRepProcessTypePrimarySender: FileRepPrimary_StartSender(); break; case FileRepProcessTypeMirrorReceiver: FileRepMirror_StartReceiver(); break; case FileRepProcessTypeMirrorConsumer: case FileRepProcessTypeMirrorConsumerWriter: case FileRepProcessTypeMirrorConsumerAppendOnly1: FileRepMirror_StartConsumer(); break; case FileRepProcessTypeMirrorSenderAck: FileRepAckMirror_StartSender(); break; case FileRepProcessTypePrimaryReceiverAck: FileRepAckPrimary_StartReceiver(); break; case FileRepProcessTypePrimaryConsumerAck: FileRepAckPrimary_StartConsumer(); break; case FileRepProcessTypePrimaryRecovery: FileRepSubProcess_InitProcess(); /* * At this point, database is starting up and xlog is not yet * replayed. Initializing relcache now is dangerous, a sequential * scan of catalog tables may end up with incorrect hint bits. * E.g. a committed transaction's dirty heap pages made it to disk * but pg_clog update was still in memory and we crashed. If a * tuple inserted by this transaction is read during relcache * initialization, status of the tuple's xmin will be incorrectly * determined as "not commited" from pg_clog. And * HEAP_XMIN_INVALID hint bit will be set, rendering the tuple * perpetually invisible. Relcache initialization must be * deferred to only after all of xlog has been replayed. */ FileRepPrimary_StartRecovery(); ResourceOwnerRelease(CurrentResourceOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, true); break; case FileRepProcessTypeResyncManager: FileRepSubProcess_InitProcess(); FileRepPrimary_StartResyncManager(); ResourceOwnerRelease(CurrentResourceOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, true); break; case FileRepProcessTypeResyncWorker1: case FileRepProcessTypeResyncWorker2: case FileRepProcessTypeResyncWorker3: case FileRepProcessTypeResyncWorker4: FileRepSubProcess_InitProcess(); FileRepPrimary_StartResyncWorker(); ResourceOwnerRelease(CurrentResourceOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, true); break; default: elog(PANIC, "unrecognized process type: %s(%d)", statmsg, fileRepProcessType); break; } switch (FileRepSubProcess_GetState()) { case FileRepStateShutdown: case FileRepStateReady: proc_exit(0); break; default: proc_exit(2); break; } }
/* * * FileRepAckPrimary_StartReceiver */ void FileRepAckPrimary_StartReceiver(void) { int status = STATUS_OK; struct timeval currentTime; pg_time_t beginTime = 0; pg_time_t endTime = 0; int retval = 0; FileRep_InsertConfigLogEntry("start receiver ack"); { char tmpBuf[FILEREP_MAX_LOG_DESCRIPTION_LEN]; snprintf(tmpBuf, sizeof(tmpBuf), "primary address(port) '%s(%d)' mirror address(port) '%s(%d)' ", fileRepPrimaryHostAddress, fileRepPrimaryPort, fileRepMirrorHostAddress, fileRepMirrorPort); FileRep_InsertConfigLogEntry(tmpBuf); } FileRepAckPrimary_ShmemReInit(); Insist(fileRepRole == FileRepPrimaryRole); if (filerep_inject_listener_fault) { status = STATUS_ERROR; ereport(WARNING, (errmsg("mirror failure, " "injected fault by guc filerep_inject_listener_fault, " "failover requested"), FileRep_errcontext())); FileRep_SetSegmentState(SegmentStateFault, FaultTypeMirror); FileRepSubProcess_SetState(FileRepStateFault); FileRepSubProcess_ProcessSignals(); return; } status = FileRepConnServer_StartListener( fileRepPrimaryHostAddress, fileRepPrimaryPort); gettimeofday(¤tTime, NULL); beginTime = (pg_time_t) currentTime.tv_sec; while (1) { if (status != STATUS_OK) { FileRep_SetSegmentState(SegmentStateFault, FaultTypeMirror); FileRepSubProcess_SetState(FileRepStateFault); } while (FileRepSubProcess_GetState() == FileRepStateFault) { FileRepSubProcess_ProcessSignals(); pg_usleep(50000L); /* 50 ms */ } if (FileRepSubProcess_GetState() == FileRepStateShutdown) { break; } PG_SETMASK(&BlockSig); retval = FileRepConnServer_Select(); PG_SETMASK(&UnBlockSig); gettimeofday(¤tTime, NULL); endTime = (pg_time_t) currentTime.tv_sec; if ((endTime - beginTime) > gp_segment_connect_timeout) { ereport(WARNING, (errmsg("mirror failure, " "no connection was established from client from mirror, " "primary address(port) '%s(%d)' mirror address(port) '%s(%d)' timeout reached '%d' " "failover requested", fileRepPrimaryHostAddress, fileRepPrimaryPort, fileRepMirrorHostAddress, fileRepMirrorPort, gp_segment_connect_timeout), errSendAlert(true), FileRep_errcontext())); status = STATUS_ERROR; continue; } /* * check and process any signals received * The routine returns TRUE if the received signal requests * process shutdown. */ if (FileRepSubProcess_ProcessSignals()) { continue; } if (retval < 0) { status = STATUS_ERROR; continue; } if (retval == 0) { continue; } Assert(retval > 0); status = FileRepConnServer_CreateConnection(); if (status != STATUS_OK) { continue; } status = FileRepConnServer_ReceiveStartupPacket(); if (status != STATUS_OK) { continue; } fileRepShmemArray[0]->state = FileRepStateInitialization; status = FileRepAckPrimary_RunReceiver(); } // while(1) FileRepConnServer_CloseConnection(); return; }