/* -------------------------------- * pq_recvbuf - load some bytes into the input buffer * * returns 0 if OK, EOF if trouble * -------------------------------- */ static int pq_recvbuf(void) { if (PqRecvPointer > 0) { if (PqRecvLength > PqRecvPointer) { /* still some unread data, left-justify it in the buffer */ memmove(PqRecvBuffer, PqRecvBuffer + PqRecvPointer, PqRecvLength - PqRecvPointer); PqRecvLength -= PqRecvPointer; PqRecvPointer = 0; } else PqRecvLength = PqRecvPointer = 0; } /* Ensure that we're in blocking mode */ pq_set_nonblocking(false); /* Can fill buffer from PqRecvLength and upwards */ for (;;) { int r; r = secure_read(MyProcPort, PqRecvBuffer + PqRecvLength, PQ_RECV_BUFFER_SIZE - PqRecvLength); if (r < 0) { if (errno == EINTR || errno == EAGAIN) { /* change tracking */ if (FileRepSubProcess_IsStateTransitionRequested()) { elog(WARNING, "segment state transition requested while waiting to read data from socket"); return EOF; } else continue; /* Ok if interrupted or timeout expired */ } /* * Careful: an ereport() that tries to write to the client would * cause recursion to here, leading to stack overflow and core * dump! This message must go *only* to the postmaster log. */ ereport(COMMERROR, (errcode_for_socket_access(), errmsg("could not receive data from client: %m"))); return EOF; } if (r == 0) { /* * EOF detected. We used to write a log message here, but it's * better to expect the ultimate caller to do that. */ return EOF; } /* r contains number of bytes read, so just incr length */ PqRecvLength += r; return 0; } }
/* * * FileRepPrimary_StartSender */ void FileRepAckMirror_StartSender(void) { int status = STATUS_OK; int retry = 0; struct timeval currentTime; pg_time_t beginTime = 0; pg_time_t endTime = 0; FileRep_InsertConfigLogEntry("start sender ack"); while (1) { if (status != STATUS_OK) { FileRep_SetSegmentState(SegmentStateFault, FaultTypeMirror); FileRepSubProcess_SetState(FileRepStateFault); } while (FileRepSubProcess_GetState() == FileRepStateInitialization || FileRepSubProcess_GetState() == FileRepStateFault || (fileRepShmemArray[0]->state == FileRepStateNotInitialized && FileRepSubProcess_GetState() != FileRepStateShutdown )) { FileRepSubProcess_ProcessSignals(); pg_usleep(50000L); /* 50 ms */ } if (FileRepSubProcess_GetState() == FileRepStateShutdown) { break; } { char tmpBuf[FILEREP_MAX_LOG_DESCRIPTION_LEN]; snprintf(tmpBuf, sizeof(tmpBuf), "primary address(port) '%s(%d)' mirror address(port) '%s(%d)' ", fileRepPrimaryHostAddress, fileRepPrimaryPort, fileRepMirrorHostAddress, fileRepMirrorPort); FileRep_InsertConfigLogEntry(tmpBuf); } Insist(fileRepRole == FileRepMirrorRole); status = FileRepConnClient_EstablishConnection( fileRepPrimaryHostAddress, fileRepPrimaryPort, FALSE /* reportError */); if (status != STATUS_OK) { gettimeofday(¤tTime, NULL); beginTime = (pg_time_t) currentTime.tv_sec; } while (status != STATUS_OK && FileRep_IsRetry(retry) && (endTime - beginTime) < gp_segment_connect_timeout) { FileRep_Sleep10ms(retry); FileRep_IncrementRetry(retry); gettimeofday(¤tTime, NULL); endTime = (pg_time_t) currentTime.tv_sec; status = FileRepConnClient_EstablishConnection( fileRepPrimaryHostAddress, fileRepPrimaryPort, (retry == file_rep_retry && file_rep_retry != 0) || ((endTime - beginTime) > gp_segment_connect_timeout) ? TRUE : FALSE); if (FileRepSubProcess_IsStateTransitionRequested()) { break; } } if (status != STATUS_OK) { continue; } FileRep_SetFileRepRetry(); status = FileRepAckMirror_RunSender(); } // while(1) FileRepConnClient_CloseConnection(); return; }
/* * Backend checks if acknowledgement that its operation is completed * is received from mirror. * If acknowledgement is received (state == FileRepAckStateCompleted) then * a) entry is removed from hash * b) TRUE is returned */ bool FileRepAckPrimary_IsOperationCompleted( FileRepIdentifier_u fileRepIdentifier, FileRepRelationType_e fileRepRelationType) { FileRepAckHashEntry_s *entry = NULL; bool isCompleted = FALSE; bool isRemoved; FileName fileName = NULL; int retry = 0; bool retval = FALSE; bool wait = FALSE; fileName = FileRep_GetFileName(fileRepIdentifier, fileRepRelationType); while ((isCompleted == FALSE) && FileRep_IsRetry(retry)) { LWLockAcquire(FileRepAckHashShmemLock, LW_EXCLUSIVE); entry = FileRepAckPrimary_LookupHashEntry(fileName); if (entry == NULL) { LWLockRelease(FileRepAckHashShmemLock); break; } if (! FileRep_IsIpcSleep(entry->fileRepOperation)) { if (wait == TRUE) { wait = FALSE; } } switch (entry->fileRepAckState) { case FileRepAckStateWaiting: /* No Operation */ break; case FileRepAckStateCompleted: retval = TRUE; xLogEof = entry->xLogEof; mirrorStatus = entry->mirrorStatus; /* no BREAK */ case FileRepAckStateMirrorInFault: isCompleted = TRUE; isRemoved = FileRepAckPrimary_RemoveHashEntry(fileName); Assert(isRemoved == TRUE); break; default: break; } if (isCompleted == false) { if (! FileRep_IsIpcSleep(entry->fileRepOperation)) { fileRepIpcArray[fileRepAckHashShmem->ipcArrayIndex]->refCountSemP++; wait = TRUE; } } LWLockRelease(FileRepAckHashShmemLock); if (isCompleted == false) { if (FileRepSubProcess_IsStateTransitionRequested()) { break; } if (FileRep_IsIpcSleep(entry->fileRepOperation)) { FileRep_Sleep1ms(retry); if (retry == (3 * file_rep_retry / 4)) ereport(WARNING, (errmsg("threshold '75' percent of 'gp_segment_connect_timeout=%d' is reached, " "mirror may not be able to keep up with primary, " "primary may transition to change tracking", gp_segment_connect_timeout), errhint("increase guc 'gp_segment_connect_timeout' by 'gpconfig' and 'gpstop -u' "), errSendAlert(true))); FileRep_IncrementRetry(retry); } else { FileRep_IpcWait(fileRepIpcArray[fileRepAckHashShmem->ipcArrayIndex]->semP, &fileRepIpcArray[fileRepAckHashShmem->ipcArrayIndex]->refCountSemP, FileRepAckHashShmemLock); } /* * if the message was from the main filerep process then it is a * graceful shutdown message to the mirror. We don't want to stall * shutdown if the mirror is unavailable so we wait a smaller amount * of time */ if ( entry->fileRepOperation == FileRepOperationShutdown && retry == 50) { FileRepAckPrimary_RemoveHashEntry(fileName); break; } } } if (retval == FALSE) { mirrorStatus = FileRepStatusMirrorLossOccurred; if (! primaryMirrorIsIOSuspended()) { ereport(WARNING, (errmsg("mirror failure, " "could not complete mirrored request identifier '%s' ack state '%s', " "failover requested", (fileName == NULL) ? "<null>" : fileName, (entry == NULL) ? "<entry not found>" : FileRepAckStateToString[entry->fileRepAckState]), errhint("run gprecoverseg to re-establish mirror connectivity"), FileRep_errdetail_ShmemAck(), FileRep_errcontext())); } } if (fileName) { pfree(fileName); fileName = NULL; } return retval; }
/* * * * Control Message has msg_type='C'. * Control Message is consumed by Receiver thread on mirror side. * * Data Message has msg_type='M'. * Data Message is inserted in Shared memory and consumed by Consumer * thread on mirror side. */ bool FileRepConnClient_SendMessage( FileRepConsumerProcIndex_e messageType, bool messageSynchronous, char *message, uint32 messageLength) { char msgType = 0; int status = STATUS_OK; #ifdef USE_ASSERT_CHECKING int prevOutCount = filerep_conn->outCount; #endif /* // USE_ASSERT_CHECKING */ switch (messageType) { case FileRepMessageTypeXLog: msgType = '1'; break; case FileRepMessageTypeAO01: msgType = '2'; break; case FileRepMessageTypeWriter: msgType = '3'; break; case FileRepMessageTypeShutdown: msgType = 'S'; break; default: return false; } /** * Note that pqPutMsgStart and pqPutnchar both may grow the connection's internal buffer, and do not * flush data */ if (pqPutMsgStart(msgType, true, filerep_conn) < 0) { return false; } if (pqPutnchar(message, messageLength, filerep_conn) < 0) { return false; } /* * Server side needs complete messages for mode-transitions so disable * auto-flush since it flushes partial messages */ pqPutMsgEndNoAutoFlush(filerep_conn); /* assert that a flush did not occur */ Assert(prevOutCount + messageLength + 5 == filerep_conn->outCount); /* the +5 is the amount * added by * pgPutMsgStart */ /* * note also that we could do a flush beforehand to avoid having * pqPutMsgStart and pqPutnchar growing the buffer */ if (messageSynchronous || filerep_conn->outCount >= file_rep_min_data_before_flush) { int result = 0; /* wait and timeout will be handled by pqWaitTimeout */ while ((status = pqFlushNonBlocking(filerep_conn)) > 0) { /* retry on timeout */ while (!(result = pqWaitTimeout(FALSE, TRUE, filerep_conn, time(NULL) + file_rep_socket_timeout))) { if (FileRepSubProcess_IsStateTransitionRequested()) { elog(WARNING, "segment state transition requested while waiting to write data to socket"); status = -1; break; } } if (result < 0) { ereport(WARNING, (errcode_for_socket_access(), errmsg("could not write data to socket, failure detected : %m"))); status = -1; break; } if (status == -1) { break; } } if (status < 0) { return false; } Assert(status == 0); return true; } return true; }
/* * It is called by backend process to insert new ack entry into hash table. */ int FileRepAckPrimary_NewHashEntry( FileRepIdentifier_u fileRepIdentifier, FileRepOperation_e fileRepOperation, FileRepRelationType_e fileRepRelationType) { FileRepAckHashEntry_s *entry=NULL; bool exists = FALSE; FileName fileName = NULL; int status = STATUS_OK; int retry = 0; bool wait = FALSE; fileName = FileRep_GetFileName(fileRepIdentifier, fileRepRelationType); while (FileRep_IsRetry(retry)) { LWLockAcquire(FileRepAckHashShmemLock, LW_EXCLUSIVE); if (! FileRep_IsIpcSleep(fileRepOperation)) { if (wait == TRUE) { wait = FALSE; } } entry = FileRepAckPrimary_InsertHashEntry(fileName, &exists); if (entry == NULL) { LWLockRelease(FileRepAckHashShmemLock); status = STATUS_ERROR; ereport(WARNING, (errmsg("mirror failure, " "could not insert ack entry into ack table, no memory " "failover requested"), errhint("run gprecoverseg to re-establish mirror connectivity"), FileRep_errdetail(fileRepIdentifier, fileRepRelationType, fileRepOperation, FILEREP_UNDEFINED), FileRep_errcontext())); goto exit; } if (exists) { if (! FileRep_IsIpcSleep(fileRepOperation)) { fileRepIpcArray[fileRepAckHashShmem->ipcArrayIndex]->refCountSemC++; wait = TRUE; } LWLockRelease(FileRepAckHashShmemLock); if (FileRepSubProcess_IsStateTransitionRequested()) { status = STATUS_ERROR; break; } if (FileRep_IsIpcSleep(fileRepOperation)) { FileRep_Sleep1ms(retry); FileRep_IncrementRetry(retry); } else { FileRep_IpcWait(fileRepIpcArray[fileRepAckHashShmem->ipcArrayIndex]->semC, &fileRepIpcArray[fileRepAckHashShmem->ipcArrayIndex]->refCountSemC, FileRepAckHashShmemLock); } continue; } entry->fileRepOperation = fileRepOperation; entry->fileRepRelationType = fileRepRelationType; entry->fileRepAckState = FileRepAckStateWaiting; entry->xLogEof = xLogEof; entry->mirrorStatus = FileRepStatusSuccess; LWLockRelease(FileRepAckHashShmemLock); break; } if (exists) { status = STATUS_ERROR; ereport(WARNING, (errmsg("mirror failure, " "could not insert ack entry into ack table, entry exists " "failover requested"), errhint("run gprecoverseg to re-establish mirror connectivity"), FileRep_errdetail(fileRepIdentifier, fileRepRelationType, fileRepOperation, FILEREP_UNDEFINED), FileRep_errcontext())); } exit: if (fileName) { pfree(fileName); fileName = NULL; } return status; }