/* * shareinput_writer_waitdone * * Called by the writer (producer) to wait for the "done" notfication from * all readers (consumers). * * This is a blocking operation. */ void shareinput_writer_waitdone(void *ctxt, int share_id, int nsharer_xslice) { ShareInput_Lk_Context *pctxt = (ShareInput_Lk_Context *) ctxt; mpp_fd_set rset; struct timeval tval; int numReady; char z; int ack_needed = nsharer_xslice - pctxt->zcnt; elog(DEBUG1, "SISC WRITER (shareid=%d, slice=%d): waiting for DONE message from %d readers", share_id, currentSliceId, ack_needed); while(ack_needed > 0) { CHECK_FOR_INTERRUPTS(); MPP_FD_ZERO(&rset); MPP_FD_SET(pctxt->donefd, &rset); tval.tv_sec = 1; tval.tv_usec = 0; numReady = select(pctxt->donefd+1, (fd_set *) &rset, NULL, NULL, &tval); if(numReady==1) { #if USE_ASSERT_CHECKING int rwsize = #endif retry_read(pctxt->donefd, &z, 1); Assert(rwsize == 1 && z == 'z'); elog(DEBUG1, "SISC WRITER (shareid=%d, slice=%d): wait done get 1 notification", share_id, currentSliceId); --ack_needed; } else if(numReady==0) { elog(DEBUG1, "SISC WRITER (shareid=%d, slice=%d): wait done timeout once", share_id, currentSliceId); } else { int save_errno = errno; elog(LOG, "SISC WRITER (shareid=%d, slice=%d): wait done time out once, errno %d", share_id, currentSliceId, save_errno); } } elog(DEBUG1, "SISC WRITER (shareid=%d, slice=%d): Writer received all %d reader done notifications", share_id, currentSliceId, nsharer_xslice - pctxt->zcnt); UnregisterXactCallbackOnce(XCallBack_ShareInput_FIFO, (void *) ctxt); shareinput_clean_lk_ctxt(ctxt); }
/* * writer_wait_for_acks * * After sending the handshake to all the reader, the writer waits for acks * from all the readers. * * This is a blocking operation. */ static void writer_wait_for_acks(ShareInput_Lk_Context *pctxt, int share_id, int xslice) { int ack_needed = xslice; mpp_fd_set rset; struct timeval tval; char b; while(ack_needed > 0) { CHECK_FOR_INTERRUPTS(); MPP_FD_ZERO(&rset); MPP_FD_SET(pctxt->donefd, &rset); tval.tv_sec = 1; tval.tv_usec = 0; int numReady = select(pctxt->donefd+1, (fd_set *) &rset, NULL, NULL, &tval); if(numReady==1) { #if USE_ASSERT_CHECKING int rwsize = #endif retry_read(pctxt->donefd, &b, 1); Assert(rwsize == 1); if(b == 'z') { ++pctxt->zcnt; } else { Assert(b == 'b'); --ack_needed; elog(DEBUG1, "SISC WRITER (shareid=%d, slice=%d): notify ready succeed 1, xslice remaining %d", share_id, currentSliceId, ack_needed); } } else if(numReady==0) { elog(DEBUG1, "SISC WRITER (shareid=%d, slice=%d): Notify ready time out once ... ", share_id, currentSliceId); } else { int save_errno = errno; elog(LOG, "SISC WRITER (shareid=%d, slice=%d): notify still wait for an answer, errno %d", share_id, currentSliceId, save_errno); } } }
/* * shareinput_reader_waitready * * Called by the reader (consumer) to wait for the writer (producer) to produce * all the tuples and write them to disk. * * This is a blocking operation. */ void * shareinput_reader_waitready(int share_id, PlanGenerator planGen) { mpp_fd_set rset; struct timeval tval; int n; char a; ShareInput_Lk_Context *pctxt = gp_malloc(sizeof(ShareInput_Lk_Context)); if(!pctxt) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("Share input reader failed: out of memory"))); pctxt->readyfd = -1; pctxt->donefd = -1; pctxt->zcnt = 0; pctxt->del_ready = false; pctxt->del_done = false; pctxt->lkname_ready[0] = '\0'; pctxt->lkname_done[0] = '\0'; RegisterXactCallbackOnce(XCallBack_ShareInput_FIFO, pctxt); sisc_lockname(pctxt->lkname_ready, MAXPGPATH, share_id, "ready"); create_tmp_fifo(pctxt->lkname_ready); pctxt->readyfd = open(pctxt->lkname_ready, O_RDWR, 0600); if(pctxt->readyfd < 0) elog(ERROR, "could not open fifo \"%s\": %m", pctxt->lkname_ready); sisc_lockname(pctxt->lkname_done, MAXPGPATH, share_id, "done"); create_tmp_fifo(pctxt->lkname_done); pctxt->donefd = open(pctxt->lkname_done, O_RDWR, 0600); if(pctxt->donefd < 0) elog(ERROR, "could not open fifo \"%s\": %m", pctxt->lkname_done); while(1) { CHECK_FOR_INTERRUPTS(); MPP_FD_ZERO(&rset); MPP_FD_SET(pctxt->readyfd, &rset); tval.tv_sec = 1; tval.tv_usec = 0; n = select(pctxt->readyfd+1, (fd_set *) &rset, NULL, NULL, &tval); if(n==1) { #if USE_ASSERT_CHECKING int rwsize = #endif retry_read(pctxt->readyfd, &a, 1); Assert(rwsize == 1 && a == 'a'); elog(DEBUG1, "SISC READER (shareid=%d, slice=%d): Wait ready got writer's handshake", share_id, currentSliceId); if (planGen == PLANGEN_PLANNER) { /* For planner-generated plans, we send ack back after receiving the handshake */ elog(DEBUG1, "SISC READER (shareid=%d, slice=%d): Wait ready writing ack back to writer", share_id, currentSliceId); #if USE_ASSERT_CHECKING rwsize = #endif retry_write(pctxt->donefd, "b", 1); Assert(rwsize == 1); } break; } else if(n==0) { elog(DEBUG1, "SISC READER (shareid=%d, slice=%d): Wait ready time out once", share_id, currentSliceId); } else { int save_errno = errno; elog(LOG, "SISC READER (shareid=%d, slice=%d): Wait ready try again, errno %d ... ", share_id, currentSliceId, save_errno); } } return (void *) pctxt; }
/* * Check a file descriptor for read and/or write data, possibly waiting. * If neither forRead nor forWrite are set, immediately return a timeout * condition (without waiting). Return >0 if condition is met, 0 * if a timeout occurred, -1 if an error or interrupt occurred. * * Timeout is infinite if end_time is -1. Timeout is immediate (no blocking) * if end_time is 0 (or indeed, any time before now). */ static int pqSocketPoll(int sock, int forRead, int forWrite, time_t end_time) { /* We use poll(2) if available, otherwise select(2) */ #ifdef HAVE_POLL struct pollfd input_fd; int timeout_ms; if (!forRead && !forWrite) return 0; input_fd.fd = sock; input_fd.events = POLLERR; input_fd.revents = 0; if (forRead) input_fd.events |= POLLIN; if (forWrite) input_fd.events |= POLLOUT; /* Compute appropriate timeout interval */ if (end_time == ((time_t) -1)) timeout_ms = -1; else { time_t now = time(NULL); if (end_time > now) timeout_ms = (end_time - now) * 1000; else timeout_ms = 0; } return poll(&input_fd, 1, timeout_ms); #else /* !HAVE_POLL */ mpp_fd_set input_mask; mpp_fd_set output_mask; mpp_fd_set except_mask; struct timeval timeout; struct timeval *ptr_timeout; if (!forRead && !forWrite) return 0; MPP_FD_ZERO(&input_mask); MPP_FD_ZERO(&output_mask); MPP_FD_ZERO(&except_mask); if (forRead) MPP_FD_SET(sock, &input_mask); if (forWrite) MPP_FD_SET(sock, &output_mask); MPP_FD_SET(sock, &except_mask); /* Compute appropriate timeout interval */ if (end_time == ((time_t) -1)) ptr_timeout = NULL; else { time_t now = time(NULL); if (end_time > now) timeout.tv_sec = end_time - now; else timeout.tv_sec = 0; timeout.tv_usec = 0; ptr_timeout = &timeout; } return select(sock + 1, (fd_set *)&input_mask, (fd_set *)&output_mask, (fd_set *)&except_mask, ptr_timeout); #endif /* HAVE_POLL */ }
static void ServiceListenLoop(ServiceCtrl *serviceCtrl) { ServiceConfig *serviceConfig = (ServiceConfig*)serviceCtrl->serviceConfig; uint8 *inputBuff; int n, highsock = 0, newsockfd; mpp_fd_set rset, rrset; struct sockaddr_in addr; socklen_t addrlen; List *connectedSockets = NIL; ListCell *cell; Assert(TopMemoryContext != NULL); MemoryContextSwitchTo(TopMemoryContext); Assert(CurrentMemoryContext == TopMemoryContext); /* * Setup scratch buffer. */ inputBuff = palloc(serviceConfig->requestLen); MPP_FD_ZERO(&rset); MPP_FD_SET(serviceCtrl->listenerFd, &rset); highsock = serviceCtrl->listenerFd + 1; /* we'll handle many incoming sockets but keep the sockets in blocking * mode since we are dealing with very small messages. */ while(true) { struct timeval shutdownTimeout = {1,0}; // 1 second. // Use local variable since select modifies // the timeout parameter with remaining time. CHECK_FOR_INTERRUPTS(); if (serviceConfig->ServiceShutdownRequested()) { if (serviceConfig->ServiceShutdown != NULL) { serviceConfig->ServiceShutdown(); } break; } /* no need to live on if postmaster has died */ if (!PostmasterIsAlive(true)) { if (serviceConfig->ServicePostmasterDied != NULL) { serviceConfig->ServicePostmasterDied(); } else { ereport(LOG, (errmsg("exiting because postmaster has died"))); proc_exit(1); } } memcpy(&rrset, &rset, sizeof(mpp_fd_set)); n = select(highsock + 1, (fd_set *)&rrset, NULL, NULL, &shutdownTimeout); if (n == 0 || (n < 0 && errno == EINTR)) { /* intr or timeout: Have we been here too long ? */ continue; } if (n < 0) { /* this may be a little severe, but if we error on select() * we'll just go ahead and blow up. This will result in the * postmaster re-spawning a new process. */ ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("'%s': error during select() call (error:%d).", serviceConfig->title, errno))); break; } /* is it someone tickling our listener port? */ if (MPP_FD_ISSET(serviceCtrl->listenerFd, &rrset)) { addrlen = sizeof(addr); if ((newsockfd = accept(serviceCtrl->listenerFd, (struct sockaddr *) & addr, &addrlen)) < 0) { /* * TODO: would be nice to read the errno and try and provide * more useful info as to why this happened. */ ereport(NOTICE, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("'%s': error from client connection: %s)", serviceConfig->title, strerror(errno)))); } /* make socket non-blocking BEFORE we connect. */ if (!pg_set_noblock(newsockfd)) { /* * TODO: would be nice to read the errno and try and provide * more useful info as to why this happened. */ ereport(NOTICE, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("'%s': could not set outbound socket to non-blocking mode: %s", serviceConfig->title, strerror(errno)))); } if (newsockfd > highsock) highsock = newsockfd + 1; MPP_FD_SET(newsockfd, &rset); /* * Read connection message. */ // UNDONE: temporarily turn off new connection flag... if( !ServiceProcessRequest(serviceCtrl, newsockfd, inputBuff, false)) { /* close it down */ MPP_FD_CLR( newsockfd, &rset); shutdown(newsockfd, SHUT_WR); close(newsockfd); } else { connectedSockets = lappend_int(connectedSockets, newsockfd); } } /* loop through all of our established sockets */ cell = list_head(connectedSockets); while (cell != NULL) { int fd = lfirst_int(cell); /* get the next cell ready before we delete */ cell = lnext(cell); if (MPP_FD_ISSET(fd, &rrset)) { if( !ServiceProcessRequest(serviceCtrl, fd, inputBuff, false)) { /* close it down */ MPP_FD_CLR( fd, &rset); connectedSockets = list_delete_int(connectedSockets, fd); shutdown(fd, SHUT_WR); close(fd); } } } } ereport(LOG, (errmsg("normal shutdown"))); proc_exit(0); }
static bool ServiceClientRead(ServiceClient *serviceClient, void* response, int responseLen, struct timeval *timeout) { ServiceConfig *serviceConfig; int n; int bytesRead = 0; int saved_err; char *message; bool result = false; mpp_fd_set rset; struct timeval rundownTimeout = {0,0}; // Use local variable since select modifies // the timeout parameter with remaining time. DECLARE_SAVE_SUPPRESS_PANIC(); Assert(serviceClient != NULL); serviceConfig = serviceClient->serviceConfig; Assert(serviceConfig != NULL); Assert(response != NULL); if (timeout != NULL) rundownTimeout = *timeout; PG_TRY(); { SUPPRESS_PANIC(); /* * read the response */ while (bytesRead < responseLen) { n = read(serviceClient->sockfd, ((char *)response) + bytesRead, responseLen - bytesRead); saved_err = errno; if (n == 0) { ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("Connection to '%s' is closed (%d)", serviceConfig->title, serviceClient->sockfd))); } if (n < 0) { if (saved_err != EINTR && saved_err != EWOULDBLOCK) { ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("Read error from '%s': %s (%d)", serviceConfig->title, strerror(saved_err), serviceClient->sockfd))); } if (saved_err == EWOULDBLOCK) { /* we shouldn't really get here since we are dealing with * small messages, but once we've read a bit of data we * need to finish out reading till we get the message (or error) */ do { MPP_FD_ZERO(&rset); MPP_FD_SET(serviceClient->sockfd, &rset); n = select(serviceClient->sockfd + 1, (fd_set *)&rset, NULL, NULL, (timeout == NULL ? NULL : &rundownTimeout)); if (n == 0) { if (timeout != NULL) { ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("Read from '%s' timed out after %d.%03d seconds", serviceConfig->title, (int)timeout->tv_sec, (int)timeout->tv_usec / 1000))); } } else if (n < 0 && errno == EINTR) continue; else if (n < 0) { saved_err = errno; ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("Read error from '%s': %s (%d)", serviceConfig->title, strerror(saved_err), serviceClient->sockfd))); } } while (n < 1); } /* else saved_err == EINTR */ continue; } else bytesRead += n; } result = true; RESTORE_PANIC(); } PG_CATCH(); { RESTORE_PANIC(); /* Report the error to the server log */ if (!elog_demote(WARNING)) { elog(LOG,"unable to demote error"); PG_RE_THROW(); } message = elog_message(); if (message != NULL && strlen(message) + 1 < sizeof(ClientErrorString)) strcpy(ClientErrorString, message); else strcpy(ClientErrorString, ""); EmitErrorReport(); FlushErrorState(); result = false; } PG_END_TRY(); return result; }
bool ServiceProcessRespond(ServiceCtrl *serviceCtrl, int sockfd, uint8 *response, int responseLen) { ServiceConfig *serviceConfig = (ServiceConfig*)serviceCtrl->serviceConfig; int n; int saved_err; mpp_fd_set wset; int bytesWritten = 0; struct timeval rundownTimeout; // Use local variable since select modifies // the timeout parameter with remaining time. ServiceGetClientTimeout(serviceConfig, &rundownTimeout); // elog(LOG,"ServiceProcessRespond called for sockfd %d, responseLen %d", // sockfd, responseLen); /* * Write the response */ while (bytesWritten < responseLen ) { CHECK_FOR_INTERRUPTS(); n = write(sockfd, response + bytesWritten, responseLen - bytesWritten); saved_err = errno; if (n == 0) { ereport(NOTICE, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("'%s': connection to backend process is closed", serviceConfig->title))); return false; } if (n < 0) { if (saved_err != EINTR && saved_err != EWOULDBLOCK) { ereport(NOTICE, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("'%s': write error to backend process: %s", serviceConfig->title, strerror(saved_err)))); return false; } if (saved_err == EWOULDBLOCK) { /* we shouldn't really get here since we are dealing with * small messages, but once we've read a bit of data we * need to finish out reading till we get the message (or error) */ do { CHECK_FOR_INTERRUPTS(); MPP_FD_ZERO(&wset); MPP_FD_SET(sockfd, &wset); n = select(sockfd + 1, NULL, (fd_set *)&wset, NULL, &rundownTimeout); if (n == 0) { struct timeval wholeTimeout; ServiceGetClientTimeout(serviceConfig, &wholeTimeout); ereport(NOTICE, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("'%s': write to backend process timed out after %d.%03d seconds)", serviceConfig->title, (int)wholeTimeout.tv_sec, (int)wholeTimeout.tv_usec / 1000))); return false; } if (n < 0 && errno == EINTR) continue; else if (n < 0) { saved_err = errno; ereport(NOTICE, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("'%s': write error to backend process: %s)", serviceConfig->title, strerror(saved_err)))); return false; } } while (n < 1); } /* else saved_err == EINTR */ continue; } else bytesWritten += n; } return true; }
/* * Used by ServiceListenLoop to process an incoming request. */ static bool ServiceProcessRequest(ServiceCtrl *serviceCtrl, int sockfd, uint8 *inputBuff, bool newConnection) { ServiceConfig *serviceConfig = (ServiceConfig*)serviceCtrl->serviceConfig; ServiceConnectionRequestMsg newConnectionRequestMsg; int saved_err; mpp_fd_set rset; struct timeval rundownTimeout; // Use local variable since select modifies // the timeout parameter with remaining time. int n; int bytesRead = 0; uint8 *request = NULL; int reqlen; bool successful; ServiceGetClientTimeout(serviceConfig, &rundownTimeout); if (newConnection) { request = (uint8*)&newConnectionRequestMsg; reqlen = sizeof(newConnectionRequestMsg); } else { request = inputBuff; reqlen = serviceConfig->requestLen; } /* * Read in the incoming request message. */ while (bytesRead < reqlen) { CHECK_FOR_INTERRUPTS(); n = read(sockfd, request + bytesRead, reqlen - bytesRead); saved_err = errno; if (n == 0) { // elog(LOG, "'%s': client socket sockfd %d is closed", // serviceConfig->title, sockfd); return false; } if (n < 0) { if (saved_err != EINTR && saved_err != EWOULDBLOCK) { ereport(NOTICE, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("'%s': read error from backend process: %s", serviceConfig->title, strerror(saved_err)))); return false; } if (saved_err == EWOULDBLOCK) { /* we shouldn't really get here since we are dealing with * small messages, but once we've read a bit of data we * need to finish out reading till we get the message (or error) */ do { CHECK_FOR_INTERRUPTS(); if (serviceConfig->ServiceShutdownRequested()) return false; MPP_FD_ZERO(&rset); MPP_FD_SET(sockfd, &rset); n = select(sockfd + 1, (fd_set *)&rset, NULL, NULL, &rundownTimeout); if (n == 0) { struct timeval wholeTimeout; ServiceGetClientTimeout(serviceConfig, &wholeTimeout); ereport(NOTICE, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("'%s': read from backend process timed out after %d.%03d seconds", serviceConfig->title, (int)wholeTimeout.tv_sec, (int)wholeTimeout.tv_usec / 1000))); return false; } if (n < 0 && errno == EINTR) continue; else if (n < 0) { saved_err = errno; ereport(NOTICE, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("'%s': read error from backend process: %s", serviceConfig->title, strerror(saved_err)))); return false; } } while (n < 1); } /* else saved_err == EINTR */ continue; } bytesRead += n; } if (newConnection) successful = ServiceNewConnectionMsg(serviceCtrl, sockfd, &newConnectionRequestMsg); else successful = serviceConfig->ServiceRequest(serviceCtrl, sockfd, request); return successful; }