bool SharedPortEndpoint::CheckListenerReady(Selector &selector) { #ifdef WIN32 if(!wake_select_dest) EXCEPT("SharedPortEndpoint: CheckListenerReady: Nothing registered."); return selector.fd_ready(wake_select_dest->get_file_desc(),Selector::IO_READ); #else return selector.fd_ready(m_listener_sock.get_file_desc(),Selector::IO_READ); #endif }
bool NamedPipeWriter::write_data(void* buffer, int len) { assert(m_initialized); // if we're writing to a pipe that has multiple writers, // we need to make sure our messages are no larger than // PIPE_BUF to guarantee atomic writes // assert(len <= PIPE_BUF); // if we have a watchdog, we don't go right into a blocking // write. instead, we select with both the real pipe and the // watchdog pipe, which will close if our peer shuts down or // crashes // if (m_watchdog != NULL) { int watchdog_pipe = m_watchdog->get_file_descriptor(); Selector selector; selector.add_fd( m_pipe, Selector::IO_WRITE ); selector.add_fd( watchdog_pipe, Selector::IO_READ ); selector.execute(); if ( selector.failed() || selector.signalled() ) { dprintf(D_ALWAYS, "select error: %s (%d)\n", strerror(selector.select_errno()), selector.select_errno()); return false; } if ( selector.fd_ready( watchdog_pipe, Selector::IO_READ ) ) { dprintf(D_ALWAYS, "error writing to named pipe: " "watchdog pipe has closed\n"); return false; } } // do the write // int bytes = write(m_pipe, buffer, len); if (bytes != len) { if (bytes == -1) { dprintf(D_ALWAYS, "write error: %s (%d)\n", strerror(errno), errno); } else { dprintf(D_ALWAYS, "error: wrote %d of %d bytes\n", bytes, len); } return false; } return true; }
bool VanillaProc::JobReaper(int pid, int status) { dprintf(D_FULLDEBUG,"Inside VanillaProc::JobReaper()\n"); // // Run all the reapers first, since some of them change the exit status. // if( m_pid_ns_status_filename.length() > 0 ) { status = pidNameSpaceReaper( status ); } bool jobExited = OsProc::JobReaper( pid, status ); if( pid != JobPid ) { return jobExited; } #if defined(LINUX) // On newer kernels if memory.use_hierarchy==1, then we cannot disable // the OOM killer. Hence, we have to be ready for a SIGKILL to be delivered // by the kernel at the same time we get the notification. Hence, if we // see an exit signal, we must also check the event file descriptor. // // outOfMemoryEvent() is aware of checkpointing and will mention that // the OOM event happened during a checkpoint. int efd = -1; if( (m_oom_efd >= 0) && daemonCore->Get_Pipe_FD(m_oom_efd, &efd) && (efd != -1) ) { Selector selector; selector.add_fd(efd, Selector::IO_READ); selector.set_timeout(0); selector.execute(); if( !selector.failed() && !selector.timed_out() && selector.has_ready() && selector.fd_ready(efd, Selector::IO_READ) ) { outOfMemoryEvent( m_oom_efd ); } } #endif // // We have three cases to consider: // * if we're checkpointing; or // * if we see a special checkpoint exit code; or // * there's no special case to consider. // bool wantsFileTransferOnCheckpointExit = false; JobAd->LookupBool( ATTR_WANT_FT_ON_CHECKPOINT, wantsFileTransferOnCheckpointExit ); int checkpointExitCode = 0; JobAd->LookupInteger( ATTR_CHECKPOINT_EXIT_CODE, checkpointExitCode ); int checkpointExitSignal = 0; JobAd->LookupInteger( ATTR_CHECKPOINT_EXIT_SIGNAL, checkpointExitSignal ); bool checkpointExitBySignal = 0; JobAd->LookupBool( ATTR_CHECKPOINT_EXIT_BY_SIGNAL, checkpointExitBySignal ); int successfulCheckpointStatus = 0; if( checkpointExitBySignal ) { successfulCheckpointStatus = checkpointExitSignal; } else if( checkpointExitCode != 0 ) { successfulCheckpointStatus = checkpointExitCode << 8; #if defined( WINDOWS ) successfulCheckpointStatus = checkpointExitCode; #endif } if( isCheckpointing ) { dprintf( D_FULLDEBUG, "Inside VanillaProc::JobReaper() during a checkpoint\n" ); if( exit_status == successfulCheckpointStatus ) { if( isSoftKilling ) { notifySuccessfulEvictionCheckpoint(); return true; } restartCheckpointedJob(); isCheckpointing = false; return false; } else { // The job exited without taking a checkpoint. If we don't do // anything, it will be reported as if the error code or signal // had happened naturally (and the job will usually exit the // queue). This could confuse the users. // // Instead, we'll put the job on hold, figuring that if the job // requested that we (periodically) send it a signal, and we // did, that it's not our fault that the job failed. This has // the convenient side-effect of not overwriting the job's // previous checkpoint(s), if any (since file transfer doesn't // occur when the job goes on hold). killFamilyIfWarranted(); recordFinalUsage(); std::string holdMessage; formatstr( holdMessage, "Job did not exit as promised when sent its checkpoint signal. " "Promised exit was %s %u, actual exit status was %s %u.", checkpointExitBySignal ? "on signal" : "with exit code", checkpointExitBySignal ? checkpointExitSignal : checkpointExitCode, WIFSIGNALED( exit_status ) ? "on signal" : "with exit code", WIFSIGNALED( exit_status ) ? WTERMSIG( exit_status ) : WEXITSTATUS( exit_status ) ); Starter->jic->holdJob( holdMessage.c_str(), CONDOR_HOLD_CODE_FailedToCheckpoint, exit_status ); Starter->Hold(); return true; } } else if( wantsFileTransferOnCheckpointExit && exit_status == successfulCheckpointStatus ) { dprintf( D_FULLDEBUG, "Inside VanillaProc::JobReaper() and the job self-checkpointed.\n" ); if( isSoftKilling ) { notifySuccessfulEvictionCheckpoint(); return true; } else { restartCheckpointedJob(); return false; } } else { // If the parent job process died, clean up all of the job's processes. killFamilyIfWarranted(); // Record final usage stats for this process family, since // once the reaper returns, the family is no longer // registered with DaemonCore and we'll never be able to // get this information again. recordFinalUsage(); return jobExited; } }
void SocketProxy::execute() { std::list<SocketProxyPair>::iterator it; Selector selector; while( true ) { selector.reset(); bool has_active_sockets = false; for ( it=m_socket_pairs.begin(); it != m_socket_pairs.end(); ++it ) { if( it->shutdown ) { continue; } has_active_sockets = true; if( it->buf_end > 0 ) { // drain the buffer before reading more selector.add_fd(it->to_socket, Selector::IO_WRITE); } else { selector.add_fd(it->from_socket, Selector::IO_READ); } } if( !has_active_sockets ) { break; } selector.execute(); for ( it=m_socket_pairs.begin(); it != m_socket_pairs.end(); ++it ) { if( it->shutdown ) { continue; } if( it->buf_end > 0 ) { // attempt to drain the buffer if( selector.fd_ready(it->to_socket, Selector::IO_WRITE) ) { int n = write(it->to_socket,&it->buf[it->buf_begin],it->buf_end-it->buf_begin); if( n > 0 ) { it->buf_begin += n; if( it->buf_begin >= it->buf_end ) { it->buf_begin = 0; it->buf_end = 0; } } } } else if( selector.fd_ready(it->from_socket, Selector::IO_READ) ) { int n = read(it->from_socket,it->buf,SOCKET_PROXY_BUFSIZE); if( n > 0 ) { it->buf_end = n; } else if( n == 0 ) { // the socket has closed // WIN32 lacks SHUT_RD=0 and SHUT_WR=1 shutdown(it->from_socket,0); close(it->from_socket); shutdown(it->to_socket,1); close(it->to_socket); it->shutdown = true; } else if( n < 0 ) { MyString error_msg; error_msg.sprintf("Error reading from socket %d: %s\n", it->from_socket, strerror(errno)); setErrorMsg(error_msg.Value()); break; } } } } }
int condor_write( char const *peer_description, SOCKET fd, const char *buf, int sz, int timeout, int flags, bool non_blocking ) { int nw = 0, nwo = 0; unsigned int start_time = 0, cur_time = 0; char tmpbuf[1]; int nro; bool select_for_read = true; bool needs_select = true; char sinbuf[SINFUL_STRING_BUF_SIZE]; if( IsDebugLevel( D_NETWORK ) ) { dprintf(D_NETWORK, "condor_write(fd=%d %s,,size=%d,timeout=%d,flags=%d,non_blocking=%d)\n", fd, not_null_peer_description(peer_description,fd,sinbuf), sz, timeout, flags, non_blocking); } /* Pre-conditions. */ ASSERT(sz > 0); /* Can't write buffers that are have no data */ ASSERT(fd >= 0); /* Need valid file descriptor */ ASSERT(buf != NULL); /* Need valid buffer to write */ if (non_blocking) { #ifdef WIN32 unsigned long mode = 1; // nonblocking mode if (ioctlsocket(fd, FIONBIO, &mode) < 0) return -1; #else int fcntl_flags; if ( (fcntl_flags=fcntl(fd, F_GETFL)) < 0 ) return -1; // set nonblocking mode if ( ((fcntl_flags & O_NONBLOCK) == 0) && fcntl(fd, F_SETFL, fcntl_flags | O_NONBLOCK) == -1 ) return -1; #endif nw = -2; while (nw == -2 || (nw == -1 && errno == EINTR)) { nw = send(fd, buf, sz, flags); } if ( nw <= 0 ) { int the_error; char const *the_errorstr; #ifdef WIN32 the_error = WSAGetLastError(); the_errorstr = ""; #else the_error = errno; the_errorstr = strerror(the_error); #endif if ( !errno_is_temporary(the_error) ) { dprintf( D_ALWAYS, "condor_write() failed: send() %d bytes to %s " "returned %d, " "timeout=%d, errno=%d %s.\n", sz, not_null_peer_description(peer_description,fd,sinbuf), nw, timeout, the_error, the_errorstr ); } else { nw = 0; } } if (nw < 0) { dprintf(D_NETWORK, "condor_write (non-blocking) wrote %d bytes.\n", nw); } #ifdef WIN32 mode = 0; // reset blocking mode if (ioctlsocket(fd, FIONBIO, &mode) < 0) return -1; #else // reset flags to prior value if ( ((fcntl_flags & O_NONBLOCK) == 0) && fcntl(fd, F_SETFL, fcntl_flags) == -1 ) return -1; #endif return nw; } Selector selector; selector.add_fd( fd, Selector::IO_READ ); selector.add_fd( fd, Selector::IO_WRITE ); selector.add_fd( fd, Selector::IO_EXCEPT ); if(timeout > 0) { start_time = time(NULL); cur_time = start_time; } while( nw < sz ) { needs_select = true; if( timeout > 0 ) { while( needs_select ) { if( cur_time == 0 ) { cur_time = time(NULL); } if( start_time + timeout > cur_time ) { selector.set_timeout( (start_time + timeout) - cur_time ); } else { dprintf( D_ALWAYS, "condor_write(): " "timed out writing %d bytes to %s\n", sz, not_null_peer_description(peer_description,fd,sinbuf) ); return -1; } cur_time = 0; // The write and except sets are added at the top of // this function, since we always want to select on // them. if( select_for_read ) { // Also, put it in the read fds, so we'll wake // up if the socket is closed selector.add_fd( fd, Selector::IO_READ ); } else { selector.delete_fd( fd, Selector::IO_READ ); } selector.execute(); // unless we decide we need to select() again, we // want to break out of our while() loop now that // we've actually performed a select() needs_select = false; if ( selector.timed_out() ) { dprintf( D_ALWAYS, "condor_write(): " "timed out writing %d bytes to %s\n", sz, not_null_peer_description(peer_description,fd,sinbuf) ); return -1; } else if ( selector.signalled() ) { needs_select = true; continue; } else if ( selector.has_ready() ) { if ( selector.fd_ready( fd, Selector::IO_READ ) ) { dprintf(D_NETWORK, "condor_write(): socket %d is readable\n", fd); // see if the socket was closed nro = recv(fd, tmpbuf, 1, MSG_PEEK); if( nro == -1 ) { int the_error; char const *the_errorstr; #ifdef WIN32 the_error = WSAGetLastError(); the_errorstr = ""; #else the_error = errno; the_errorstr = strerror(the_error); #endif if(errno_is_temporary( the_error )) { continue; } dprintf( D_ALWAYS, "condor_write(): " "Socket closed when trying " "to write %d bytes to %s, fd is %d, " "errno=%d %s\n", sz, not_null_peer_description(peer_description,fd,sinbuf), fd, the_error, the_errorstr ); return -1; } if( ! nro ) { dprintf( D_ALWAYS, "condor_write(): " "Socket closed when trying " "to write %d bytes to %s, fd is %d\n", sz, not_null_peer_description(peer_description,fd,sinbuf), fd ); return -1; } /* otherwise, there's real data to consume on the read side, and we don't want to put our fd in the readfds anymore or select() will never block. also, we need to re-do the select() */ needs_select = true; select_for_read = false; } } else { dprintf( D_ALWAYS, "condor_write() failed: select() " "returns %d, " "writing %d bytes to %s.\n", selector.select_retval(), sz, not_null_peer_description(peer_description,fd,sinbuf) ); return -1; } } } start_thread_safe("send"); nwo = send(fd, &buf[nw], sz - nw, flags); // Save the error value before stop_thread_safe(), as that may // overwrite it. int the_error; #ifdef WIN32 the_error = WSAGetLastError(); #else the_error = errno; #endif stop_thread_safe("send"); if( nwo <= 0 ) { char const *the_errorstr; #ifdef WIN32 the_errorstr = ""; #else the_errorstr = strerror(the_error); #endif if ( errno_is_temporary(the_error) ) { dprintf( D_FULLDEBUG, "condor_write(): " "send() returned temporary error %d %s," "still trying to write %d bytes to %s\n", the_error, the_errorstr, sz, not_null_peer_description(peer_description,fd,sinbuf) ); continue; } dprintf( D_ALWAYS, "condor_write() failed: send() %d bytes to %s " "returned %d, " "timeout=%d, errno=%d %s.\n", sz, not_null_peer_description(peer_description,fd,sinbuf), nwo, timeout, the_error, the_errorstr ); return -1; } nw += nwo; } /* POST conditions. */ ASSERT( nw == sz ); /* Make sure that we wrote everything */ return nw; }