/* * FileRepPrimary_StartResyncWorker() */ void FileRepPrimary_StartResyncWorker(void) { int status = STATUS_OK; FileRep_InsertConfigLogEntry("start resync worker"); Insist(fileRepRole == FileRepPrimaryRole); while (1) { if (status != STATUS_OK) { FileRep_SetSegmentState(SegmentStateFault, FaultTypeMirror); FileRepSubProcess_SetState(FileRepStateFault); } /* * We are waiting for following conditions to move forward: * * Database is running * And * if dataState is InResync, we wait for FileRepSubProcess to Ready state * else don't wait */ while (!isDatabaseRunning() || !(dataState == DataStateInResync ? FileRepSubProcess_GetState() == FileRepStateReady : true)) { FileRepSubProcess_ProcessSignals(); if (FileRepSubProcess_GetState() == FileRepStateShutdown || FileRepSubProcess_GetState() == FileRepStateShutdownBackends) { break; } pg_usleep(50000L); /* 50 ms */ } if (FileRepSubProcess_GetState() == FileRepStateShutdown || FileRepSubProcess_GetState() == FileRepStateShutdownBackends) { break; } FileRepSubProcess_InitHeapAccess(); status = FileRepPrimary_RunResyncWorker(); if (status != STATUS_OK) { continue; } break; } // while(1) }
/* * * FileRepAckPrimary_StartConsumer */ void FileRepAckPrimary_StartConsumer(void) { int status = STATUS_OK; FileRep_InsertConfigLogEntry("run consumer"); while (1) { if (status != STATUS_OK) { FileRep_SetSegmentState(SegmentStateFault, FaultTypeMirror); FileRepSubProcess_SetState(FileRepStateFault); } while (FileRepSubProcess_GetState() == FileRepStateFault || (fileRepShmemArray[0]->state == FileRepStateNotInitialized && FileRepSubProcess_GetState() != FileRepStateShutdown)) { FileRepSubProcess_ProcessSignals(); pg_usleep(50000L); /* 50 ms */ } if (FileRepSubProcess_GetState() == FileRepStateShutdown) { break; } status = FileRepAckPrimary_RunConsumer(); } // while(1) if (FileRepSubProcess_GetState() == FileRepStateShutdown) { /* perform graceful shutdown */ } LWLockAcquire(FileRepAckHashShmemLock, LW_EXCLUSIVE); FileRep_IpcSignal(fileRepIpcArray[fileRepAckHashShmem->ipcArrayIndex]->semP, &fileRepIpcArray[fileRepAckHashShmem->ipcArrayIndex]->refCountSemP); LWLockRelease(FileRepAckHashShmemLock); /* NOTE free memory (if any) */ return; }
/* * * FileRepPrimary_StartRecoveryInSync() * * */ static void FileRepPrimary_StartRecoveryInSync(void) { int status = STATUS_OK; FileRep_InsertConfigLogEntry("run recovery"); while (1) { if (status != STATUS_OK) { FileRep_SetSegmentState(SegmentStateFault, FaultTypeMirror); FileRepSubProcess_SetState(FileRepStateFault); } while (FileRepSubProcess_GetState() == FileRepStateFault || (fileRepShmemArray[0]->state == FileRepStateNotInitialized && FileRepSubProcess_GetState() != FileRepStateShutdownBackends && FileRepSubProcess_GetState() != FileRepStateShutdown)) { FileRepSubProcess_ProcessSignals(); pg_usleep(50000L); /* 50 ms */ } if (FileRepSubProcess_GetState() == FileRepStateShutdown || FileRepSubProcess_GetState() == FileRepStateShutdownBackends) { break; } if (FileRepSubProcess_GetState() == FileRepStateReady) { break; } Insist(fileRepRole == FileRepPrimaryRole); Insist(dataState == DataStateInSync); status = FileRepPrimary_RunRecoveryInSync(); } // while(1) }
/* * FileRepPrimary_StartResyncWorker() */ void FileRepPrimary_StartResyncWorker(void) { int status = STATUS_OK; FileRep_InsertConfigLogEntry("start resync worker"); Insist(fileRepRole == FileRepPrimaryRole); while (1) { if (status != STATUS_OK) { FileRep_SetSegmentState(SegmentStateFault, FaultTypeMirror); FileRepSubProcess_SetState(FileRepStateFault); } while (FileRepSubProcess_GetState() != FileRepStateShutdown && FileRepSubProcess_GetState() != FileRepStateShutdownBackends && ! (FileRepSubProcess_GetState() == FileRepStateReady && dataState == DataStateInResync)) { FileRepSubProcess_ProcessSignals(); pg_usleep(50000L); /* 50 ms */ } if (FileRepSubProcess_GetState() == FileRepStateShutdown || FileRepSubProcess_GetState() == FileRepStateShutdownBackends) { break; } status = FileRepPrimary_RunResyncWorker(); if (status != STATUS_OK) { continue; } break; } // while(1) }
FaultInjectorType_e FaultInjector_InjectFaultIfSet( FaultInjectorIdentifier_e identifier, DDLStatement_e ddlStatement, char* databaseName, char* tableName) { FaultInjectorEntry_s *entryShared, localEntry, *entryLocal = &localEntry; char databaseNameLocal[NAMEDATALEN]; char tableNameLocal[NAMEDATALEN]; int ii = 0; int cnt = 3600; /* * Return immediately if no fault has been injected ever. It is * important to not touch the spinlock, especially if this is the * postmaster process. If one of the backend processes dies while * holding the spin lock, and postmaster comes here before resetting * the shared memory, it waits without holder process and eventually * goes into PANIC. Also this saves a few cycles to acquire the spin * lock and look into the shared hash table. * * Although this is a race condition without lock, a false negative is * ok given this framework is purely for dev/testing. */ if (faultInjectorShmem->faultInjectorSlots == 0) return FALSE; getFileRepRoleAndState(&fileRepRole, &segmentState, &dataState, NULL, NULL); FiLockAcquire(); entryShared = FaultInjector_LookupHashEntry(identifier); if (entryShared != NULL) memcpy(entryLocal, entryShared, sizeof(FaultInjectorEntry_s)); FiLockRelease(); /* Verify if fault injection is set */ if (entryShared == NULL) /* fault injection is not set */ return FALSE; if (entryLocal->ddlStatement != ddlStatement) /* fault injection is not set for the specified DDL */ return FALSE; snprintf(databaseNameLocal, sizeof(databaseNameLocal), "%s", databaseName); if (strcmp(entryLocal->databaseName, databaseNameLocal) != 0) /* fault injection is not set for the specified database name */ return FALSE; snprintf(tableNameLocal, sizeof(tableNameLocal), "%s", tableName); if (strcmp(entryLocal->tableName, tableNameLocal) != 0) /* fault injection is not set for the specified table name */ return FALSE; if (entryLocal->faultInjectorState == FaultInjectorStateTriggered || entryLocal->faultInjectorState == FaultInjectorStateCompleted || entryLocal->faultInjectorState == FaultInjectorStateFailed) { /* fault injection was already executed */ return FALSE; } /* Update the injection fault entry in hash table */ if (entryLocal->occurrence != FILEREP_UNDEFINED) { if (entryLocal->occurrence > 1) { entryLocal->occurrence--; FaultInjector_UpdateHashEntry(entryLocal); return FALSE; } entryLocal->faultInjectorState = FaultInjectorStateTriggered; FaultInjector_UpdateHashEntry(entryLocal); } /* Inject fault */ switch (entryLocal->faultInjectorType) { case FaultInjectorTypeNotSpecified: break; case FaultInjectorTypeSleep: ereport(LOG, (errmsg("fault triggered, fault name:'%s' fault type:'%s' ", FaultInjectorIdentifierEnumToString[entryLocal->faultInjectorIdentifier], FaultInjectorTypeEnumToString[entryLocal->faultInjectorType]))); pg_usleep(entryLocal->sleepTime * 1000000L); break; case FaultInjectorTypeFault: switch (entryLocal->faultInjectorIdentifier) { case FileRepConsumer: case FileRepConsumerVerification: case FileRepSender: case FileRepReceiver: case FileRepResync: case FileRepResyncInProgress: case FileRepResyncWorker: case FileRepResyncWorkerRead: case FileRepTransitionToInResyncMirrorReCreate: case FileRepTransitionToInResyncMarkReCreated: case FileRepTransitionToInResyncMarkCompleted: case FileRepTransitionToInSyncBegin: case FileRepTransitionToInSync: case FileRepTransitionToInSyncMarkCompleted: case FileRepTransitionToInSyncBeforeCheckpoint: case FileRepIsOperationCompleted: FileRep_SetSegmentState(SegmentStateFault, FaultTypeMirror); break; case FileRepTransitionToChangeTracking: FileRep_SetPostmasterReset(); break; default: FileRep_SetSegmentState(SegmentStateFault, FaultTypeIO); break; } ereport(LOG, (errmsg("fault triggered, fault name:'%s' fault type:'%s' ", FaultInjectorIdentifierEnumToString[entryLocal->faultInjectorIdentifier], FaultInjectorTypeEnumToString[entryLocal->faultInjectorType]))); break; case FaultInjectorTypeFatal: /* * If it's one time occurrence then disable the fault before it's * actually triggered because this fault errors out the transaction * and hence we wont get a chance to disable it or put it in completed * state. */ if (entryLocal->occurrence != FILEREP_UNDEFINED) { entryLocal->faultInjectorState = FaultInjectorStateCompleted; FaultInjector_UpdateHashEntry(entryLocal); } ereport(FATAL, (errmsg("fault triggered, fault name:'%s' fault type:'%s' ", FaultInjectorIdentifierEnumToString[entryLocal->faultInjectorIdentifier], FaultInjectorTypeEnumToString[entryLocal->faultInjectorType]))); break; case FaultInjectorTypePanic: /* * If it's one time occurrence then disable the fault before it's * actually triggered because this fault errors out the transaction * and hence we wont get a chance to disable it or put it in completed * state. For PANIC it may be unnecessary though. */ if (entryLocal->occurrence != FILEREP_UNDEFINED) { entryLocal->faultInjectorState = FaultInjectorStateCompleted; FaultInjector_UpdateHashEntry(entryLocal); } ereport(PANIC, (errmsg("fault triggered, fault name:'%s' fault type:'%s' ", FaultInjectorIdentifierEnumToString[entryLocal->faultInjectorIdentifier], FaultInjectorTypeEnumToString[entryLocal->faultInjectorType]))); break; case FaultInjectorTypeError: /* * If it's one time occurrence then disable the fault before it's * actually triggered because this fault errors out the transaction * and hence we wont get a chance to disable it or put it in completed * state. */ if (entryLocal->occurrence != FILEREP_UNDEFINED) { entryLocal->faultInjectorState = FaultInjectorStateCompleted; FaultInjector_UpdateHashEntry(entryLocal); } ereport(ERROR, (errmsg("fault triggered, fault name:'%s' fault type:'%s' ", FaultInjectorIdentifierEnumToString[entryLocal->faultInjectorIdentifier], FaultInjectorTypeEnumToString[entryLocal->faultInjectorType]))); break; case FaultInjectorTypeInfiniteLoop: ereport(LOG, (errmsg("fault triggered, fault name:'%s' fault type:'%s' ", FaultInjectorIdentifierEnumToString[entryLocal->faultInjectorIdentifier], FaultInjectorTypeEnumToString[entryLocal->faultInjectorType]))); if (entryLocal->faultInjectorIdentifier == FileRepImmediateShutdownRequested) cnt = entryLocal->sleepTime; for (ii=0; ii < cnt; ii++) { pg_usleep(1000000L); // sleep for 1 sec (1 sec * 3600 = 1 hour) getFileRepRoleAndState(NULL, &segmentState, NULL, NULL, NULL); if ((entryLocal->faultInjectorIdentifier != FileRepImmediateShutdownRequested) && (segmentState == SegmentStateShutdownFilerepBackends || segmentState == SegmentStateImmediateShutdown || segmentState == SegmentStateShutdown || IsFtsShudownRequested())) { break; } } break; case FaultInjectorTypeDataCorruption: ereport(LOG, (errmsg("fault triggered, fault name:'%s' fault type:'%s' ", FaultInjectorIdentifierEnumToString[entryLocal->faultInjectorIdentifier], FaultInjectorTypeEnumToString[entryLocal->faultInjectorType]))); break; case FaultInjectorTypeSuspend: { FaultInjectorEntry_s *entry; ereport(LOG, (errmsg("fault triggered, fault name:'%s' fault type:'%s' ", FaultInjectorIdentifierEnumToString[entryLocal->faultInjectorIdentifier], FaultInjectorTypeEnumToString[entryLocal->faultInjectorType]))); while ((entry = FaultInjector_LookupHashEntry(entryLocal->faultInjectorIdentifier)) != NULL && entry->faultInjectorType != FaultInjectorTypeResume) { pg_usleep(1000000L); // 1 sec } if (entry != NULL) { ereport(LOG, (errmsg("fault triggered, fault name:'%s' fault type:'%s' ", FaultInjectorIdentifierEnumToString[entryLocal->faultInjectorIdentifier], FaultInjectorTypeEnumToString[entry->faultInjectorType]))); } else { ereport(LOG, (errmsg("fault 'NULL', fault name:'%s' ", FaultInjectorIdentifierEnumToString[entryLocal->faultInjectorIdentifier]))); /* * Since the entry is gone already, we should NOT update * the entry below. (There could be other places in this * function that are under the same situation, but I'm too * tired to look for them...) */ return entryLocal->faultInjectorType; } break; } case FaultInjectorTypeSkip: ereport(LOG, (errmsg("fault triggered, fault name:'%s' fault type:'%s' ", FaultInjectorIdentifierEnumToString[entryLocal->faultInjectorIdentifier], FaultInjectorTypeEnumToString[entryLocal->faultInjectorType]))); break; case FaultInjectorTypeMemoryFull: { char *buffer = NULL; ereport(LOG, (errmsg("fault triggered, fault name:'%s' fault type:'%s' ", FaultInjectorIdentifierEnumToString[entryLocal->faultInjectorIdentifier], FaultInjectorTypeEnumToString[entryLocal->faultInjectorType]))); buffer = (char*) palloc(BLCKSZ); while (buffer != NULL) { buffer = (char*) palloc(BLCKSZ); } break; } case FaultInjectorTypeReset: case FaultInjectorTypeStatus: ereport(LOG, (errmsg("unexpected error, fault triggered, fault name:'%s' fault type:'%s' ", FaultInjectorIdentifierEnumToString[entryLocal->faultInjectorIdentifier], FaultInjectorTypeEnumToString[entryLocal->faultInjectorType]))); Assert(0); break; case FaultInjectorTypeResume: break; case FaultInjectorTypeSegv: { *(int *) 0 = 1234; break; } case FaultInjectorTypeInterrupt: { /* * The place where this type of fault is injected must have * has HOLD_INTERRUPTS() .. RESUME_INTERRUPTS() around it, otherwise * the interrupt could be handled inside the fault injector itself */ ereport(LOG, (errmsg("fault triggered, fault name:'%s' fault type:'%s' ", FaultInjectorIdentifierEnumToString[entryLocal->faultInjectorIdentifier], FaultInjectorTypeEnumToString[entryLocal->faultInjectorType]))); InterruptPending = true; QueryCancelPending = true; break; } case FaultInjectorTypeCheckpointAndPanic: { if (entryLocal->occurrence != FILEREP_UNDEFINED) { entryLocal->faultInjectorState = FaultInjectorStateCompleted; FaultInjector_UpdateHashEntry(entryLocal); } RequestCheckpoint(true, false); ereport(PANIC, (errmsg("fault triggered, fault name:'%s' fault type:'%s' ", FaultInjectorIdentifierEnumToString[entryLocal->faultInjectorIdentifier], FaultInjectorTypeEnumToString[entryLocal->faultInjectorType]))); break; } default: ereport(LOG, (errmsg("unexpected error, fault triggered, fault name:'%s' fault type:'%s' ", FaultInjectorIdentifierEnumToString[entryLocal->faultInjectorIdentifier], FaultInjectorTypeEnumToString[entryLocal->faultInjectorType]))); Assert(0); break; } if (entryLocal->occurrence != FILEREP_UNDEFINED) { entryLocal->faultInjectorState = FaultInjectorStateCompleted; } FaultInjector_UpdateHashEntry(entryLocal); return (entryLocal->faultInjectorType); }
/* * Set state in FileRep process and sent signal to postmaster */ void FileRepSubProcess_SetState(FileRepState_e fileRepStateLocal) { bool doAssignment = true; if (fileRepStateLocal == FileRepStateShutdownBackends) { if (FileRepIsBackendSubProcess(fileRepProcessType)) { /* the current process must shutdown! */ fileRepStateLocal = FileRepStateShutdown; } else { /* * the current process doesn't care about shutdown backends -- * leave it as shutdown */ doAssignment = false; } } if (!doAssignment) { return; } switch (fileRepState) { case FileRepStateNotInitialized: fileRepState = fileRepStateLocal; break; case FileRepStateInitialization: switch (fileRepStateLocal) { case FileRepStateNotInitialized: ereport(WARNING, (errmsg("mirror failure, " "unexpected filerep state transition from '%s' to '%s' " "failover requested", FileRepStateToString[fileRepState], FileRepStateToString[fileRepStateLocal]), errhint("run gprecoverseg to re-establish mirror connectivity"))); fileRepState = FileRepStateFault; break; default: fileRepState = fileRepStateLocal; break; } break; case FileRepStateReady: switch (fileRepStateLocal) { case FileRepStateFault: case FileRepStateShutdown: fileRepState = fileRepStateLocal; break; case FileRepStateNotInitialized: ereport(WARNING, (errmsg("mirror failure, " "unexpected filerep state transition from '%s' to '%s' " "failover requested", FileRepStateToString[fileRepState], FileRepStateToString[fileRepStateLocal]), errhint("run gprecoverseg to re-establish mirror connectivity"))); fileRepState = FileRepStateFault; break; case FileRepStateInitialization: /* * don't do assignment -- this can happen when going from * segmentState Ready to InSyncTransition */ doAssignment = false; break; case FileRepStateReady: break; default: Assert(0); break; } break; case FileRepStateFault: switch (fileRepStateLocal) { case FileRepStateFault: case FileRepStateShutdown: fileRepState = fileRepStateLocal; break; case FileRepStateNotInitialized: case FileRepStateInitialization: case FileRepStateReady: ereport(WARNING, (errmsg("mirror failure, " "unexpected filerep state transition from '%s' to '%s' " "failover requested", FileRepStateToString[fileRepState], FileRepStateToString[fileRepStateLocal]), errhint("run gprecoverseg to re-establish mirror connectivity"))); fileRepState = FileRepStateFault; break; default: Assert(0); break; } break; case FileRepStateShutdownBackends: Assert(!"process filerep state should never be in ShutdownBackends"); break; case FileRepStateShutdown: switch (fileRepStateLocal) { case FileRepStateShutdown: fileRepState = fileRepStateLocal; break; case FileRepStateNotInitialized: case FileRepStateInitialization: case FileRepStateReady: ereport(WARNING, (errmsg("mirror failure, " "unexpected filerep state transition from '%s' to '%s' " "failover requested", FileRepStateToString[fileRepState], FileRepStateToString[fileRepStateLocal]), errhint("run gprecoverseg to re-establish mirror connectivity"))); fileRepState = FileRepStateFault; case FileRepStateFault: break; default: Assert(0); break; } break; default: Assert(0); break; } /* check doAssignment again -- may have changed value in the switch above */ if (!doAssignment) { return; } /* now update in shared memory if needed */ switch (fileRepState) { case FileRepStateReady: if (segmentState != SegmentStateChangeTrackingDisabled) { FileRep_SetSegmentState(SegmentStateReady, FaultTypeNotInitialized); } break; case FileRepStateFault: /* * update shared memory configuration bool * updateSegmentState(FAULT); return TRUE if state was updated; * return FALSE if state was already set to FAULT change signal to * PMSIGNAL_FILEREP_SEGMENT_STATE_CHANGE */ FileRep_SetSegmentState(SegmentStateFault, FaultTypeMirror); break; case FileRepStateInitialization: case FileRepStateShutdown: case FileRepStateNotInitialized: /* No operation */ break; case FileRepStateShutdownBackends: Assert(0); break; default: Assert(0); break; } /* report the change */ if (fileRepState != FileRepStateShutdown) { FileRep_InsertConfigLogEntry("set filerep state"); } }
/* * * FileRepPrimary_StartSender */ void FileRepAckMirror_StartSender(void) { int status = STATUS_OK; int retry = 0; struct timeval currentTime; pg_time_t beginTime = 0; pg_time_t endTime = 0; FileRep_InsertConfigLogEntry("start sender ack"); while (1) { if (status != STATUS_OK) { FileRep_SetSegmentState(SegmentStateFault, FaultTypeMirror); FileRepSubProcess_SetState(FileRepStateFault); } while (FileRepSubProcess_GetState() == FileRepStateInitialization || FileRepSubProcess_GetState() == FileRepStateFault || (fileRepShmemArray[0]->state == FileRepStateNotInitialized && FileRepSubProcess_GetState() != FileRepStateShutdown )) { FileRepSubProcess_ProcessSignals(); pg_usleep(50000L); /* 50 ms */ } if (FileRepSubProcess_GetState() == FileRepStateShutdown) { break; } { char tmpBuf[FILEREP_MAX_LOG_DESCRIPTION_LEN]; snprintf(tmpBuf, sizeof(tmpBuf), "primary address(port) '%s(%d)' mirror address(port) '%s(%d)' ", fileRepPrimaryHostAddress, fileRepPrimaryPort, fileRepMirrorHostAddress, fileRepMirrorPort); FileRep_InsertConfigLogEntry(tmpBuf); } Insist(fileRepRole == FileRepMirrorRole); status = FileRepConnClient_EstablishConnection( fileRepPrimaryHostAddress, fileRepPrimaryPort, FALSE /* reportError */); if (status != STATUS_OK) { gettimeofday(¤tTime, NULL); beginTime = (pg_time_t) currentTime.tv_sec; } while (status != STATUS_OK && FileRep_IsRetry(retry) && (endTime - beginTime) < gp_segment_connect_timeout) { FileRep_Sleep10ms(retry); FileRep_IncrementRetry(retry); gettimeofday(¤tTime, NULL); endTime = (pg_time_t) currentTime.tv_sec; status = FileRepConnClient_EstablishConnection( fileRepPrimaryHostAddress, fileRepPrimaryPort, (retry == file_rep_retry && file_rep_retry != 0) || ((endTime - beginTime) > gp_segment_connect_timeout) ? TRUE : FALSE); if (FileRepSubProcess_IsStateTransitionRequested()) { break; } } if (status != STATUS_OK) { continue; } FileRep_SetFileRepRetry(); status = FileRepAckMirror_RunSender(); } // while(1) FileRepConnClient_CloseConnection(); return; }
/* * FileRepAckPrimary_RunConsumer() */ static int FileRepAckPrimary_RunConsumer(void) { FileRepShmemMessageDescr_s *fileRepShmemMessageDescr = NULL; FileRepMessageHeader_s *fileRepMessageHeader = NULL; pg_crc32 *fileRepMessageHeaderCrc; pg_crc32 messageHeaderCrcLocal = 0; int status = STATUS_OK; bool movePositionConsume = FALSE; FileRepShmem_s *fileRepAckShmem = NULL; FileRep_InsertConfigLogEntry("run consumer"); fileRepAckShmem = fileRepAckShmemArray[FILEREP_ACKSHMEM_MESSAGE_SLOT_PRIMARY_ACK]; while (1) { LWLockAcquire(FileRepAckShmemLock, LW_EXCLUSIVE); if (movePositionConsume) { fileRepAckShmem->positionConsume = fileRepAckShmem->positionConsume + fileRepShmemMessageDescr->messageLength + sizeof(FileRepShmemMessageDescr_s); if (fileRepAckShmem->positionConsume == fileRepAckShmem->positionWraparound && fileRepAckShmem->positionInsert != fileRepAckShmem->positionWraparound) { fileRepAckShmem->positionConsume = fileRepAckShmem->positionBegin; fileRepAckShmem->positionWraparound = fileRepAckShmem->positionEnd; } FileRep_IpcSignal(fileRepIpcArray[fileRepAckShmem->ipcArrayIndex]->semP, &fileRepIpcArray[fileRepAckShmem->ipcArrayIndex]->refCountSemP); } fileRepShmemMessageDescr = (FileRepShmemMessageDescr_s*) fileRepAckShmem->positionConsume; while ((fileRepAckShmem->positionConsume == fileRepAckShmem->positionInsert) || ((fileRepAckShmem->positionConsume != fileRepAckShmem->positionInsert) && (fileRepShmemMessageDescr->messageState != FileRepShmemMessageStateReady))) { fileRepIpcArray[fileRepAckShmem->ipcArrayIndex]->refCountSemC++; LWLockRelease(FileRepAckShmemLock); FileRepSubProcess_ProcessSignals(); if (FileRepSubProcess_GetState() != FileRepStateReady && FileRepSubProcess_GetState() != FileRepStateInitialization) { LWLockAcquire(FileRepAckShmemLock, LW_EXCLUSIVE); break; } FileRep_IpcWait(fileRepIpcArray[fileRepAckShmem->ipcArrayIndex]->semC, &fileRepIpcArray[fileRepAckShmem->ipcArrayIndex]->refCountSemC, FileRepAckShmemLock); LWLockAcquire(FileRepAckShmemLock, LW_EXCLUSIVE); if (fileRepAckShmem->positionConsume == fileRepAckShmem->positionWraparound && fileRepAckShmem->positionInsert != fileRepAckShmem->positionWraparound) { fileRepAckShmem->positionConsume = fileRepAckShmem->positionBegin; fileRepAckShmem->positionWraparound = fileRepAckShmem->positionEnd; } /* Re-assign to find if messageState is changed */ fileRepShmemMessageDescr = (FileRepShmemMessageDescr_s*) fileRepAckShmem->positionConsume; } // internal while fileRepAckShmem->consumeCount++; LWLockRelease(FileRepAckShmemLock); FileRepSubProcess_ProcessSignals(); if (FileRepSubProcess_GetState() != FileRepStateReady && FileRepSubProcess_GetState() != FileRepStateInitialization) { break; } SIMPLE_FAULT_INJECTOR(FileRepConsumer); /* Calculate and compare FileRepMessageHeader_s Crc */ fileRepMessageHeader = (FileRepMessageHeader_s*) (fileRepAckShmem->positionConsume + sizeof(FileRepShmemMessageDescr_s)); FileRep_CalculateCrc((char *) fileRepMessageHeader, sizeof(FileRepMessageHeader_s), &messageHeaderCrcLocal); fileRepMessageHeaderCrc = (pg_crc32 *) (fileRepAckShmem->positionConsume + sizeof(FileRepMessageHeader_s) + sizeof(FileRepShmemMessageDescr_s)); if (*fileRepMessageHeaderCrc != messageHeaderCrcLocal) { status = STATUS_ERROR; ereport(WARNING, (errmsg("mirror failure, " "could not match ack message header checksum between primary '%u' and mirror '%u', " "failover requested", *fileRepMessageHeaderCrc, messageHeaderCrcLocal), errhint("run gprecoverseg to re-establish mirror connectivity"), FileRep_errdetail(fileRepMessageHeader->fileRepIdentifier, fileRepMessageHeader->fileRepRelationType, fileRepMessageHeader->fileRepOperation, fileRepMessageHeader->messageCount), FileRep_errdetail_ShmemAck(), FileRep_errcontext())); break; } /* Write operation is never acknowledged. * That means message should never have body. * CRC of body should be always 0. */ Assert(fileRepMessageHeader->fileRepOperation != FileRepOperationWrite); Assert(fileRepMessageHeader->fileRepMessageBodyCrc == 0); switch (fileRepMessageHeader->fileRepOperation) { case FileRepOperationReconcileXLogEof: xLogEof = fileRepMessageHeader->fileRepOperationDescription.reconcile.xLogEof; if (Debug_filerep_print) ereport(LOG, (errmsg("ack reconcile xlogid '%d' xrecoff '%d' ", xLogEof.xlogid, xLogEof.xrecoff))); break; case FileRepOperationValidation: mirrorStatus = fileRepMessageHeader->fileRepOperationDescription.validation.mirrorStatus; if (Debug_filerep_print) ereport(LOG, (errmsg("ack validation status '%s' ", FileRepStatusToString[mirrorStatus]))); break; case FileRepOperationCreate: mirrorStatus = fileRepMessageHeader->fileRepOperationDescription.create.mirrorStatus; if (Debug_filerep_print) ereport(LOG, (errmsg("ack create status '%s' ", FileRepStatusToString[mirrorStatus]))); break; case FileRepOperationStartSlruChecksum: mirrorStatus = fileRepMessageHeader->fileRepOperationDescription.startChecksum.mirrorStatus; if (Debug_filerep_print) { ereport(LOG, (errmsg("ack start SLRU checksum: status = '%s', directory = '%s' ", FileRepStatusToString[mirrorStatus], fileRepMessageHeader->fileRepIdentifier.fileRepFlatFileIdentifier.directorySimpleName))); } break; case FileRepOperationVerifySlruDirectoryChecksum: mirrorStatus = fileRepMessageHeader->fileRepOperationDescription.verifyDirectoryChecksum.mirrorStatus; if (Debug_filerep_print) { ereport(LOG, (errmsg("ack verify SLRU directory checksum: status = '%s', directory = '%s' ", FileRepStatusToString[mirrorStatus], fileRepMessageHeader->fileRepIdentifier.fileRepFlatFileIdentifier.directorySimpleName))); } break; default: break; } if (fileRepMessageHeader->fileRepAckState != FileRepAckStateCompleted) { status = STATUS_ERROR; ereport(WARNING, (errmsg("mirror failure, " "could not complete operation on mirror ack state '%s', " "failover requested", FileRepAckStateToString[fileRepMessageHeader->fileRepAckState]), errhint("run gprecoverseg to re-establish mirror connectivity"), errSendAlert(true), FileRep_errdetail(fileRepMessageHeader->fileRepIdentifier, fileRepMessageHeader->fileRepRelationType, fileRepMessageHeader->fileRepOperation, fileRepMessageHeader->messageCount), FileRep_errdetail_Shmem(), FileRep_errdetail_ShmemAck(), FileRep_errcontext())); /* * FAULT has to be set before entry is updated in ack hash table * in order to suspend backend process. */ FileRep_SetSegmentState(SegmentStateFault, FaultTypeMirror); FileRepSubProcess_ProcessSignals(); } if (FileRepAckPrimary_UpdateHashEntry( fileRepMessageHeader->fileRepIdentifier, fileRepMessageHeader->fileRepRelationType, fileRepMessageHeader->fileRepAckState) != STATUS_OK) { status = STATUS_ERROR; ereport(WARNING, (errmsg("mirror failure, " "could not update ack state '%s' in ack hash table, " "failover requested", FileRepAckStateToString[fileRepMessageHeader->fileRepAckState]), errhint("run gprecoverseg to re-establish mirror connectivity"), errSendAlert(true), FileRep_errdetail(fileRepMessageHeader->fileRepIdentifier, fileRepMessageHeader->fileRepRelationType, fileRepMessageHeader->fileRepOperation, fileRepMessageHeader->messageCount), FileRep_errdetail_Shmem(), FileRep_errdetail_ShmemAck(), FileRep_errcontext())); } FileRep_InsertLogEntry( "P_RunConsumer", fileRepMessageHeader->fileRepIdentifier, fileRepMessageHeader->fileRepRelationType, fileRepMessageHeader->fileRepOperation, messageHeaderCrcLocal, fileRepMessageHeader->fileRepMessageBodyCrc, fileRepMessageHeader->fileRepAckState, FILEREP_UNDEFINED, fileRepMessageHeader->messageCount); if (status != STATUS_OK) { break; } movePositionConsume = TRUE; } // while(1) return status; }
/* * * FileRepAckPrimary_StartReceiver */ void FileRepAckPrimary_StartReceiver(void) { int status = STATUS_OK; struct timeval currentTime; pg_time_t beginTime = 0; pg_time_t endTime = 0; int retval = 0; FileRep_InsertConfigLogEntry("start receiver ack"); { char tmpBuf[FILEREP_MAX_LOG_DESCRIPTION_LEN]; snprintf(tmpBuf, sizeof(tmpBuf), "primary address(port) '%s(%d)' mirror address(port) '%s(%d)' ", fileRepPrimaryHostAddress, fileRepPrimaryPort, fileRepMirrorHostAddress, fileRepMirrorPort); FileRep_InsertConfigLogEntry(tmpBuf); } FileRepAckPrimary_ShmemReInit(); Insist(fileRepRole == FileRepPrimaryRole); if (filerep_inject_listener_fault) { status = STATUS_ERROR; ereport(WARNING, (errmsg("mirror failure, " "injected fault by guc filerep_inject_listener_fault, " "failover requested"), FileRep_errcontext())); FileRep_SetSegmentState(SegmentStateFault, FaultTypeMirror); FileRepSubProcess_SetState(FileRepStateFault); FileRepSubProcess_ProcessSignals(); return; } status = FileRepConnServer_StartListener( fileRepPrimaryHostAddress, fileRepPrimaryPort); gettimeofday(¤tTime, NULL); beginTime = (pg_time_t) currentTime.tv_sec; while (1) { if (status != STATUS_OK) { FileRep_SetSegmentState(SegmentStateFault, FaultTypeMirror); FileRepSubProcess_SetState(FileRepStateFault); } while (FileRepSubProcess_GetState() == FileRepStateFault) { FileRepSubProcess_ProcessSignals(); pg_usleep(50000L); /* 50 ms */ } if (FileRepSubProcess_GetState() == FileRepStateShutdown) { break; } PG_SETMASK(&BlockSig); retval = FileRepConnServer_Select(); PG_SETMASK(&UnBlockSig); gettimeofday(¤tTime, NULL); endTime = (pg_time_t) currentTime.tv_sec; if ((endTime - beginTime) > gp_segment_connect_timeout) { ereport(WARNING, (errmsg("mirror failure, " "no connection was established from client from mirror, " "primary address(port) '%s(%d)' mirror address(port) '%s(%d)' timeout reached '%d' " "failover requested", fileRepPrimaryHostAddress, fileRepPrimaryPort, fileRepMirrorHostAddress, fileRepMirrorPort, gp_segment_connect_timeout), errSendAlert(true), FileRep_errcontext())); status = STATUS_ERROR; continue; } /* * check and process any signals received * The routine returns TRUE if the received signal requests * process shutdown. */ if (FileRepSubProcess_ProcessSignals()) { continue; } if (retval < 0) { status = STATUS_ERROR; continue; } if (retval == 0) { continue; } Assert(retval > 0); status = FileRepConnServer_CreateConnection(); if (status != STATUS_OK) { continue; } status = FileRepConnServer_ReceiveStartupPacket(); if (status != STATUS_OK) { continue; } fileRepShmemArray[0]->state = FileRepStateInitialization; status = FileRepAckPrimary_RunReceiver(); } // while(1) FileRepConnServer_CloseConnection(); return; }