bool
SharedPortEndpoint::CheckListenerReady(Selector &selector)
{
#ifdef WIN32
	if(!wake_select_dest)
		EXCEPT("SharedPortEndpoint: CheckListenerReady: Nothing registered.");
	return selector.fd_ready(wake_select_dest->get_file_desc(),Selector::IO_READ);
#else
	return selector.fd_ready(m_listener_sock.get_file_desc(),Selector::IO_READ);
#endif
}
bool
NamedPipeWriter::write_data(void* buffer, int len)
{
	assert(m_initialized);

	// if we're writing to a pipe that has multiple writers,
	// we need to make sure our messages are no larger than
	// PIPE_BUF to guarantee atomic writes
	//
	assert(len <= PIPE_BUF);

	// if we have a watchdog, we don't go right into a blocking
	// write. instead, we select with both the real pipe and the
	// watchdog pipe, which will close if our peer shuts down or
	// crashes
	//
	if (m_watchdog != NULL) {
		int watchdog_pipe = m_watchdog->get_file_descriptor();
		Selector selector;
		selector.add_fd( m_pipe, Selector::IO_WRITE );
		selector.add_fd( watchdog_pipe, Selector::IO_READ );
		selector.execute();
		if ( selector.failed() || selector.signalled() ) {
			dprintf(D_ALWAYS,
			        "select error: %s (%d)\n",
			        strerror(selector.select_errno()),
			        selector.select_errno());
			return false;
		}
		if ( selector.fd_ready( watchdog_pipe, Selector::IO_READ ) ) {
			dprintf(D_ALWAYS,
			        "error writing to named pipe: "
			            "watchdog pipe has closed\n");
			return false;
		}
	}

	// do the write
	//
	int bytes = write(m_pipe, buffer, len);
	if (bytes != len) {
		if (bytes == -1) {
			dprintf(D_ALWAYS,
			        "write error: %s (%d)\n",
			        strerror(errno),
			        errno);
		}
		else {
			dprintf(D_ALWAYS,
			        "error: wrote %d of %d bytes\n",
			        bytes,
			        len);
		}
		return false;
	}

	return true;
}
Example #3
0
bool
VanillaProc::JobReaper(int pid, int status)
{
	dprintf(D_FULLDEBUG,"Inside VanillaProc::JobReaper()\n");

	//
	// Run all the reapers first, since some of them change the exit status.
	//
	if( m_pid_ns_status_filename.length() > 0 ) {
		status = pidNameSpaceReaper( status );
	}
	bool jobExited = OsProc::JobReaper( pid, status );
	if( pid != JobPid ) { return jobExited; }

#if defined(LINUX)
	// On newer kernels if memory.use_hierarchy==1, then we cannot disable
	// the OOM killer.  Hence, we have to be ready for a SIGKILL to be delivered
	// by the kernel at the same time we get the notification.  Hence, if we
	// see an exit signal, we must also check the event file descriptor.
	//
	// outOfMemoryEvent() is aware of checkpointing and will mention that
	// the OOM event happened during a checkpoint.
	int efd = -1;
	if( (m_oom_efd >= 0) && daemonCore->Get_Pipe_FD(m_oom_efd, &efd) && (efd != -1) ) {
		Selector selector;
		selector.add_fd(efd, Selector::IO_READ);
		selector.set_timeout(0);
		selector.execute();
		if( !selector.failed() && !selector.timed_out() && selector.has_ready() && selector.fd_ready(efd, Selector::IO_READ) ) {
			outOfMemoryEvent( m_oom_efd );
		}
	}
#endif

	//
	// We have three cases to consider:
	//   * if we're checkpointing; or
	//   * if we see a special checkpoint exit code; or
	//   * there's no special case to consider.
	//

	bool wantsFileTransferOnCheckpointExit = false;
	JobAd->LookupBool( ATTR_WANT_FT_ON_CHECKPOINT, wantsFileTransferOnCheckpointExit );

	int checkpointExitCode = 0;
	JobAd->LookupInteger( ATTR_CHECKPOINT_EXIT_CODE, checkpointExitCode );
	int checkpointExitSignal = 0;
	JobAd->LookupInteger( ATTR_CHECKPOINT_EXIT_SIGNAL, checkpointExitSignal );
	bool checkpointExitBySignal = 0;
	JobAd->LookupBool( ATTR_CHECKPOINT_EXIT_BY_SIGNAL, checkpointExitBySignal );

	int successfulCheckpointStatus = 0;
	if( checkpointExitBySignal ) {
		successfulCheckpointStatus = checkpointExitSignal;
	} else if( checkpointExitCode != 0 ) {
		successfulCheckpointStatus = checkpointExitCode << 8;
#if defined( WINDOWS )
		successfulCheckpointStatus = checkpointExitCode;
#endif
	}

	if( isCheckpointing ) {
		dprintf( D_FULLDEBUG, "Inside VanillaProc::JobReaper() during a checkpoint\n" );

		if( exit_status == successfulCheckpointStatus ) {
			if( isSoftKilling ) {
				notifySuccessfulEvictionCheckpoint();
				return true;
			}

			restartCheckpointedJob();
			isCheckpointing = false;
			return false;
		} else {
			// The job exited without taking a checkpoint.  If we don't do
			// anything, it will be reported as if the error code or signal
			// had happened naturally (and the job will usually exit the
			// queue).  This could confuse the users.
			//
			// Instead, we'll put the job on hold, figuring that if the job
			// requested that we (periodically) send it a signal, and we
			// did, that it's not our fault that the job failed.  This has
			// the convenient side-effect of not overwriting the job's
			// previous checkpoint(s), if any (since file transfer doesn't
			// occur when the job goes on hold).
			killFamilyIfWarranted();
			recordFinalUsage();

			std::string holdMessage;
			formatstr( holdMessage, "Job did not exit as promised when sent its checkpoint signal.  "
				"Promised exit was %s %u, actual exit status was %s %u.",
				checkpointExitBySignal ? "on signal" : "with exit code",
				checkpointExitBySignal ? checkpointExitSignal : checkpointExitCode,
				WIFSIGNALED( exit_status ) ? "on signal" : "with exit code",
				WIFSIGNALED( exit_status ) ? WTERMSIG( exit_status ) : WEXITSTATUS( exit_status ) );
			Starter->jic->holdJob( holdMessage.c_str(), CONDOR_HOLD_CODE_FailedToCheckpoint, exit_status );
			Starter->Hold();
			return true;
		}
	} else if( wantsFileTransferOnCheckpointExit && exit_status == successfulCheckpointStatus ) {
		dprintf( D_FULLDEBUG, "Inside VanillaProc::JobReaper() and the job self-checkpointed.\n" );

		if( isSoftKilling ) {
			notifySuccessfulEvictionCheckpoint();
			return true;
		} else {
			restartCheckpointedJob();
			return false;
		}
	} else {
		// If the parent job process died, clean up all of the job's processes.
		killFamilyIfWarranted();

		// Record final usage stats for this process family, since
		// once the reaper returns, the family is no longer
		// registered with DaemonCore and we'll never be able to
		// get this information again.
		recordFinalUsage();

		return jobExited;
	}
}
Example #4
0
void SocketProxy::execute()
{
	std::list<SocketProxyPair>::iterator it;
	Selector selector;

	while( true ) {
		selector.reset();

		bool has_active_sockets = false;
		for	( it=m_socket_pairs.begin(); it != m_socket_pairs.end(); ++it ) {
			if( it->shutdown ) {
				continue;
			}
			has_active_sockets = true;
			if( it->buf_end > 0 ) {
					// drain the buffer before reading more
				selector.add_fd(it->to_socket, Selector::IO_WRITE);
			}
			else {
				selector.add_fd(it->from_socket, Selector::IO_READ);
			}
		}

		if( !has_active_sockets ) {
			break;
		}

		selector.execute();

		for	( it=m_socket_pairs.begin(); it != m_socket_pairs.end(); ++it ) {
			if( it->shutdown ) {
				continue;
			}
			if( it->buf_end > 0 ) {
					// attempt to drain the buffer
				if( selector.fd_ready(it->to_socket, Selector::IO_WRITE) ) {
					int n = write(it->to_socket,&it->buf[it->buf_begin],it->buf_end-it->buf_begin);
					if( n > 0 ) {
						it->buf_begin += n;
						if( it->buf_begin >= it->buf_end ) {
							it->buf_begin = 0;
							it->buf_end = 0;
						}
					}
				}
			}
			else if( selector.fd_ready(it->from_socket, Selector::IO_READ) ) {
				int n = read(it->from_socket,it->buf,SOCKET_PROXY_BUFSIZE);
				if( n > 0 ) {
					it->buf_end = n;
				}
				else if( n == 0 ) {
						// the socket has closed
						// WIN32 lacks SHUT_RD=0 and SHUT_WR=1
					shutdown(it->from_socket,0);
					close(it->from_socket);
					shutdown(it->to_socket,1);
					close(it->to_socket);
					it->shutdown = true;
				}
				else if( n < 0 ) {
					MyString error_msg;
					error_msg.sprintf("Error reading from socket %d: %s\n",
									  it->from_socket, strerror(errno));
					setErrorMsg(error_msg.Value());
					break;
				}
			}
		}
	}
}
Example #5
0
int
condor_write( char const *peer_description, SOCKET fd, const char *buf, int sz, int timeout, int flags, bool non_blocking )
{
	int nw = 0, nwo = 0;
	unsigned int start_time = 0, cur_time = 0;
	char tmpbuf[1];
	int nro;
	bool select_for_read = true;
	bool needs_select = true;
	char sinbuf[SINFUL_STRING_BUF_SIZE];

	if( IsDebugLevel( D_NETWORK ) ) {
		dprintf(D_NETWORK,
				"condor_write(fd=%d %s,,size=%d,timeout=%d,flags=%d,non_blocking=%d)\n",
				fd,
				not_null_peer_description(peer_description,fd,sinbuf),
				sz,
				timeout,
				flags,
				non_blocking);
	}

	/* Pre-conditions. */
	ASSERT(sz > 0);      /* Can't write buffers that are have no data */
	ASSERT(fd >= 0);     /* Need valid file descriptor */
	ASSERT(buf != NULL); /* Need valid buffer to write */

	if (non_blocking) {
#ifdef WIN32
		unsigned long mode = 1; // nonblocking mode
		if (ioctlsocket(fd, FIONBIO, &mode) < 0)
			return -1;
#else
		int fcntl_flags;
		if ( (fcntl_flags=fcntl(fd, F_GETFL)) < 0 )
			return -1;
			// set nonblocking mode
		if ( ((fcntl_flags & O_NONBLOCK) == 0) && fcntl(fd, F_SETFL, fcntl_flags | O_NONBLOCK) == -1 )
			return -1;
#endif
		nw = -2;
		while (nw == -2 || (nw == -1 && errno == EINTR)) {
			nw = send(fd, buf, sz, flags);
		}

		if ( nw <= 0 ) {
			int the_error;
			char const *the_errorstr;
#ifdef WIN32
			the_error = WSAGetLastError();
			the_errorstr = "";
#else
			the_error = errno;
			the_errorstr = strerror(the_error);
#endif
			if ( !errno_is_temporary(the_error) ) {
				dprintf( D_ALWAYS, "condor_write() failed: send() %d bytes to %s "
					"returned %d, "     
					"timeout=%d, errno=%d %s.\n",
					sz,                 
					not_null_peer_description(peer_description,fd,sinbuf),
					nw, timeout, the_error, the_errorstr );
			}
			else
			{
				nw = 0;
			}
		}
		if (nw < 0) {
			dprintf(D_NETWORK, "condor_write (non-blocking) wrote %d bytes.\n", nw);
		}       

#ifdef WIN32
		mode = 0; // reset blocking mode
		if (ioctlsocket(fd, FIONBIO, &mode) < 0)
			return -1;
#else
			// reset flags to prior value
		if ( ((fcntl_flags & O_NONBLOCK) == 0) && fcntl(fd, F_SETFL, fcntl_flags) == -1 )
			return -1;
#endif
		return nw;
	}

	Selector selector;
	selector.add_fd( fd, Selector::IO_READ );
	selector.add_fd( fd, Selector::IO_WRITE );
	selector.add_fd( fd, Selector::IO_EXCEPT );
	
	if(timeout > 0) {
		start_time = time(NULL);
		cur_time = start_time;
	}

	while( nw < sz ) {

		needs_select = true;

		if( timeout > 0 ) {
			while( needs_select ) {
				if( cur_time == 0 ) {
					cur_time = time(NULL);
				}

				if( start_time + timeout > cur_time ) {
					selector.set_timeout( (start_time + timeout) - cur_time );
				} else {
					dprintf( D_ALWAYS, "condor_write(): "
							 "timed out writing %d bytes to %s\n",
							 sz,
							 not_null_peer_description(peer_description,fd,sinbuf) );
					return -1;
				}
			
				cur_time = 0;

					// The write and except sets are added at the top of
					// this function, since we always want to select on
					// them.
				if( select_for_read ) {
						// Also, put it in the read fds, so we'll wake
						// up if the socket is closed
					selector.add_fd( fd, Selector::IO_READ );
				} else {
					selector.delete_fd( fd, Selector::IO_READ );
				}
				selector.execute();

					// unless we decide we need to select() again, we
					// want to break out of our while() loop now that
					// we've actually performed a select()
				needs_select = false;

				if ( selector.timed_out() ) {
					dprintf( D_ALWAYS, "condor_write(): "
							 "timed out writing %d bytes to %s\n",
							 sz,
							 not_null_peer_description(peer_description,fd,sinbuf) );
					return -1;
				
				} else if ( selector.signalled() ) {
					needs_select = true;
					continue;
				} else if ( selector.has_ready() ) {
					if ( selector.fd_ready( fd, Selector::IO_READ ) ) {
						dprintf(D_NETWORK, "condor_write(): socket %d is readable\n", fd);
							// see if the socket was closed
						nro = recv(fd, tmpbuf, 1, MSG_PEEK);
						if( nro == -1 ) {
							int the_error;
                            char const *the_errorstr;
#ifdef WIN32
							the_error = WSAGetLastError();
                            the_errorstr = "";
#else
							the_error = errno;
                            the_errorstr = strerror(the_error);
#endif
							if(errno_is_temporary( the_error )) {
								continue;
							}

							dprintf( D_ALWAYS, "condor_write(): "
									 "Socket closed when trying "
									 "to write %d bytes to %s, fd is %d, "
									 "errno=%d %s\n",
									 sz,
									 not_null_peer_description(peer_description,fd,sinbuf),
									 fd, the_error, the_errorstr );
							return -1;
						}

						if( ! nro ) {
							dprintf( D_ALWAYS, "condor_write(): "
									 "Socket closed when trying "
									 "to write %d bytes to %s, fd is %d\n",
									 sz,
									 not_null_peer_description(peer_description,fd,sinbuf),
									 fd );
							return -1;
						}

							/*
							  otherwise, there's real data to consume
							  on the read side, and we don't want to
							  put our fd in the readfds anymore or
							  select() will never block.  also, we
							  need to re-do the select()
							*/
						needs_select = true;
						select_for_read = false;
					}
				} else {
					dprintf( D_ALWAYS, "condor_write() failed: select() "
							 "returns %d, "
							 "writing %d bytes to %s.\n",
							 selector.select_retval(),
							 sz,
							 not_null_peer_description(peer_description,fd,sinbuf) );
					return -1;
				}
			}
		}
		start_thread_safe("send");

		nwo = send(fd, &buf[nw], sz - nw, flags);
		// Save the error value before stop_thread_safe(), as that may
		// overwrite it.
		int the_error;
#ifdef WIN32
		the_error = WSAGetLastError();
#else
		the_error = errno;
#endif

		stop_thread_safe("send");		

		if( nwo <= 0 ) {
            char const *the_errorstr;
#ifdef WIN32
            the_errorstr = "";
#else
            the_errorstr = strerror(the_error);
#endif
			if ( errno_is_temporary(the_error) ) {
				dprintf( D_FULLDEBUG, "condor_write(): "
				         "send() returned temporary error %d %s,"
						 "still trying to write %d bytes to %s\n", the_error,
						 the_errorstr, sz,
						 not_null_peer_description(peer_description,fd,sinbuf) );
				continue;
			}

			dprintf( D_ALWAYS, "condor_write() failed: send() %d bytes to %s "
					 "returned %d, "
					 "timeout=%d, errno=%d %s.\n",
					 sz,
					 not_null_peer_description(peer_description,fd,sinbuf),
					 nwo, timeout, the_error, the_errorstr );

			return -1;
		}
		
		nw += nwo;
	}

	/* POST conditions. */
	ASSERT( nw == sz ); /* Make sure that we wrote everything */
	return nw;
}