예제 #1
0
파일: schedd.cpp 프로젝트: blueskyll/condor
bool
putClassAdAndEOM(Sock & sock, classad::ClassAd &ad)
{
        if (sock.type() != Stream::reli_sock)
	{
		return putClassAd(&sock, ad) && sock.end_of_message();
	}
	ReliSock & rsock = static_cast<ReliSock&>(sock);

	Selector selector;
	selector.add_fd(sock.get_file_desc(), Selector::IO_WRITE);
	int timeout = sock.timeout(0); sock.timeout(timeout);
	timeout = timeout ? timeout : 20;
	selector.set_timeout(timeout);
	if (!putClassAd(&sock, ad, PUT_CLASSAD_NON_BLOCKING))
	{
		return false;
	}
	int retval = rsock.end_of_message_nonblocking();
	while (true) {
		if (rsock.clear_backlog_flag()) {
			Py_BEGIN_ALLOW_THREADS
			selector.execute();
			Py_END_ALLOW_THREADS
			if (selector.timed_out()) {THROW_EX(RuntimeError, "Timeout when trying to write to remote host");}
		} else if (retval == 1) {
예제 #2
0
bool
DCTransferQueue::CheckTransferQueueSlot()
{
	if( !m_xfer_queue_sock ) {
		return false;
	}
	if( m_xfer_queue_pending ) {
			// If connection closes while our status is still pending,
			// we will find out in PollForTransferQueueSlot(), so no
			// need to do anything here.
		return false;
	}

	Selector selector;
	selector.add_fd( m_xfer_queue_sock->get_file_desc(), Selector::IO_READ );
	selector.set_timeout( 0 );
	selector.execute();

	if( selector.has_ready() ) {
			// If the socket ever selects true for read, this means the
			// transfer queue manager has either died or taken away our
			// transfer slot.

		formatstr(m_xfer_rejected_reason,
			"Connection to transfer queue manager %s for %s has gone bad.",
			m_xfer_queue_sock->peer_description(), m_xfer_fname.c_str());
		dprintf(D_ALWAYS,"%s\n",m_xfer_rejected_reason.c_str());

		m_xfer_queue_go_ahead = false;
		return false;
	}

		// All is quiet on our connection to the transfer queue manager.
	return true;
}
예제 #3
0
bool
NamedPipeWriter::write_data(void* buffer, int len)
{
	assert(m_initialized);

	// if we're writing to a pipe that has multiple writers,
	// we need to make sure our messages are no larger than
	// PIPE_BUF to guarantee atomic writes
	//
	assert(len <= PIPE_BUF);

	// if we have a watchdog, we don't go right into a blocking
	// write. instead, we select with both the real pipe and the
	// watchdog pipe, which will close if our peer shuts down or
	// crashes
	//
	if (m_watchdog != NULL) {
		int watchdog_pipe = m_watchdog->get_file_descriptor();
		Selector selector;
		selector.add_fd( m_pipe, Selector::IO_WRITE );
		selector.add_fd( watchdog_pipe, Selector::IO_READ );
		selector.execute();
		if ( selector.failed() || selector.signalled() ) {
			dprintf(D_ALWAYS,
			        "select error: %s (%d)\n",
			        strerror(selector.select_errno()),
			        selector.select_errno());
			return false;
		}
		if ( selector.fd_ready( watchdog_pipe, Selector::IO_READ ) ) {
			dprintf(D_ALWAYS,
			        "error writing to named pipe: "
			            "watchdog pipe has closed\n");
			return false;
		}
	}

	// do the write
	//
	int bytes = write(m_pipe, buffer, len);
	if (bytes != len) {
		if (bytes == -1) {
			dprintf(D_ALWAYS,
			        "write error: %s (%d)\n",
			        strerror(errno),
			        errno);
		}
		else {
			dprintf(D_ALWAYS,
			        "error: wrote %d of %d bytes\n",
			        bytes,
			        len);
		}
		return false;
	}

	return true;
}
예제 #4
0
int 
ReliSock::accept( ReliSock	&c )
{
	int c_sock;

	if (_state != sock_special || _special_state != relisock_listen ||
													c._state != sock_virgin)
	{
		return FALSE;
	}

	if (_timeout > 0) {
		Selector		selector;
		selector.set_timeout( _timeout );
		selector.add_fd( _sock, Selector::IO_READ );

		selector.execute();

		if( selector.timed_out() ) {
			return FALSE;
		} else if ( !selector.has_ready() ) {
			dprintf( D_ALWAYS, "select returns %d, connect failed\n",
				selector.select_retval() );
			return FALSE;
		}
	}

#ifndef WIN32 /* Unix */
	errno = 0;
#endif
	if ((c_sock = condor_accept(_sock, c._who)) < 0) {
#ifndef WIN32 /* Unix */
		if ( errno == EMFILE ) {
			_condor_fd_panic ( __LINE__, __FILE__ ); /* This calls dprintf_exit! */
		}
#endif
		return FALSE;

	}

	c.assign(c_sock);
	c.enter_connected_state("ACCEPT");
	c.decode();

	int on = 1;
	c.setsockopt(SOL_SOCKET, SO_KEEPALIVE, (char*)&on, sizeof(on));


		/* Set no delay to disable Nagle, since we buffer all our
		   relisock output and it degrades performance of our
		   various chatty protocols. -Todd T, 9/05
		*/
	c.setsockopt(IPPROTO_TCP, TCP_NODELAY, (char*)&on, sizeof(on));

	return TRUE;
}
int
SharedPortEndpoint::HandleListenerAccept( Stream * stream )
{
#ifndef WIN32
	ASSERT( stream == &m_listener_sock );
#endif
	Selector selector;
	selector.set_timeout( 0, 0 );
	selector.add_fd( static_cast<Sock*>(stream)->get_file_desc(), Selector::IO_READ );

	for (int idx=0; (idx<m_max_accepts) || (m_max_accepts <= 0); idx++)
	{
		DoListenerAccept(NULL);
		selector.execute();
		if (!selector.has_ready())
		{
			break;
		}
	}
	return KEEP_STREAM;
}
예제 #6
0
bool
VanillaProc::JobReaper(int pid, int status)
{
	dprintf(D_FULLDEBUG,"Inside VanillaProc::JobReaper()\n");

	//
	// Run all the reapers first, since some of them change the exit status.
	//
	if( m_pid_ns_status_filename.length() > 0 ) {
		status = pidNameSpaceReaper( status );
	}
	bool jobExited = OsProc::JobReaper( pid, status );
	if( pid != JobPid ) { return jobExited; }

#if defined(LINUX)
	// On newer kernels if memory.use_hierarchy==1, then we cannot disable
	// the OOM killer.  Hence, we have to be ready for a SIGKILL to be delivered
	// by the kernel at the same time we get the notification.  Hence, if we
	// see an exit signal, we must also check the event file descriptor.
	//
	// outOfMemoryEvent() is aware of checkpointing and will mention that
	// the OOM event happened during a checkpoint.
	int efd = -1;
	if( (m_oom_efd >= 0) && daemonCore->Get_Pipe_FD(m_oom_efd, &efd) && (efd != -1) ) {
		Selector selector;
		selector.add_fd(efd, Selector::IO_READ);
		selector.set_timeout(0);
		selector.execute();
		if( !selector.failed() && !selector.timed_out() && selector.has_ready() && selector.fd_ready(efd, Selector::IO_READ) ) {
			outOfMemoryEvent( m_oom_efd );
		}
	}
#endif

	//
	// We have three cases to consider:
	//   * if we're checkpointing; or
	//   * if we see a special checkpoint exit code; or
	//   * there's no special case to consider.
	//

	bool wantsFileTransferOnCheckpointExit = false;
	JobAd->LookupBool( ATTR_WANT_FT_ON_CHECKPOINT, wantsFileTransferOnCheckpointExit );

	int checkpointExitCode = 0;
	JobAd->LookupInteger( ATTR_CHECKPOINT_EXIT_CODE, checkpointExitCode );
	int checkpointExitSignal = 0;
	JobAd->LookupInteger( ATTR_CHECKPOINT_EXIT_SIGNAL, checkpointExitSignal );
	bool checkpointExitBySignal = 0;
	JobAd->LookupBool( ATTR_CHECKPOINT_EXIT_BY_SIGNAL, checkpointExitBySignal );

	int successfulCheckpointStatus = 0;
	if( checkpointExitBySignal ) {
		successfulCheckpointStatus = checkpointExitSignal;
	} else if( checkpointExitCode != 0 ) {
		successfulCheckpointStatus = checkpointExitCode << 8;
#if defined( WINDOWS )
		successfulCheckpointStatus = checkpointExitCode;
#endif
	}

	if( isCheckpointing ) {
		dprintf( D_FULLDEBUG, "Inside VanillaProc::JobReaper() during a checkpoint\n" );

		if( exit_status == successfulCheckpointStatus ) {
			if( isSoftKilling ) {
				notifySuccessfulEvictionCheckpoint();
				return true;
			}

			restartCheckpointedJob();
			isCheckpointing = false;
			return false;
		} else {
			// The job exited without taking a checkpoint.  If we don't do
			// anything, it will be reported as if the error code or signal
			// had happened naturally (and the job will usually exit the
			// queue).  This could confuse the users.
			//
			// Instead, we'll put the job on hold, figuring that if the job
			// requested that we (periodically) send it a signal, and we
			// did, that it's not our fault that the job failed.  This has
			// the convenient side-effect of not overwriting the job's
			// previous checkpoint(s), if any (since file transfer doesn't
			// occur when the job goes on hold).
			killFamilyIfWarranted();
			recordFinalUsage();

			std::string holdMessage;
			formatstr( holdMessage, "Job did not exit as promised when sent its checkpoint signal.  "
				"Promised exit was %s %u, actual exit status was %s %u.",
				checkpointExitBySignal ? "on signal" : "with exit code",
				checkpointExitBySignal ? checkpointExitSignal : checkpointExitCode,
				WIFSIGNALED( exit_status ) ? "on signal" : "with exit code",
				WIFSIGNALED( exit_status ) ? WTERMSIG( exit_status ) : WEXITSTATUS( exit_status ) );
			Starter->jic->holdJob( holdMessage.c_str(), CONDOR_HOLD_CODE_FailedToCheckpoint, exit_status );
			Starter->Hold();
			return true;
		}
	} else if( wantsFileTransferOnCheckpointExit && exit_status == successfulCheckpointStatus ) {
		dprintf( D_FULLDEBUG, "Inside VanillaProc::JobReaper() and the job self-checkpointed.\n" );

		if( isSoftKilling ) {
			notifySuccessfulEvictionCheckpoint();
			return true;
		} else {
			restartCheckpointedJob();
			return false;
		}
	} else {
		// If the parent job process died, clean up all of the job's processes.
		killFamilyIfWarranted();

		// Record final usage stats for this process family, since
		// once the reaper returns, the family is no longer
		// registered with DaemonCore and we'll never be able to
		// get this information again.
		recordFinalUsage();

		return jobExited;
	}
}
예제 #7
0
void SocketProxy::execute()
{
	std::list<SocketProxyPair>::iterator it;
	Selector selector;

	while( true ) {
		selector.reset();

		bool has_active_sockets = false;
		for	( it=m_socket_pairs.begin(); it != m_socket_pairs.end(); ++it ) {
			if( it->shutdown ) {
				continue;
			}
			has_active_sockets = true;
			if( it->buf_end > 0 ) {
					// drain the buffer before reading more
				selector.add_fd(it->to_socket, Selector::IO_WRITE);
			}
			else {
				selector.add_fd(it->from_socket, Selector::IO_READ);
			}
		}

		if( !has_active_sockets ) {
			break;
		}

		selector.execute();

		for	( it=m_socket_pairs.begin(); it != m_socket_pairs.end(); ++it ) {
			if( it->shutdown ) {
				continue;
			}
			if( it->buf_end > 0 ) {
					// attempt to drain the buffer
				if( selector.fd_ready(it->to_socket, Selector::IO_WRITE) ) {
					int n = write(it->to_socket,&it->buf[it->buf_begin],it->buf_end-it->buf_begin);
					if( n > 0 ) {
						it->buf_begin += n;
						if( it->buf_begin >= it->buf_end ) {
							it->buf_begin = 0;
							it->buf_end = 0;
						}
					}
				}
			}
			else if( selector.fd_ready(it->from_socket, Selector::IO_READ) ) {
				int n = read(it->from_socket,it->buf,SOCKET_PROXY_BUFSIZE);
				if( n > 0 ) {
					it->buf_end = n;
				}
				else if( n == 0 ) {
						// the socket has closed
						// WIN32 lacks SHUT_RD=0 and SHUT_WR=1
					shutdown(it->from_socket,0);
					close(it->from_socket);
					shutdown(it->to_socket,1);
					close(it->to_socket);
					it->shutdown = true;
				}
				else if( n < 0 ) {
					MyString error_msg;
					error_msg.sprintf("Error reading from socket %d: %s\n",
									  it->from_socket, strerror(errno));
					setErrorMsg(error_msg.Value());
					break;
				}
			}
		}
	}
}
예제 #8
0
/* Generic read/write wrappers for condor.  These function emulate-ish the 
 * read/write system calls under unix except that they are portable, use
 * a timeout, and make sure that all data is read or written.
 *
 * A few notes on the behavior differing from POSIX:
 * - These will never fail due to EINTR.
 * - If in non_blocking mode, there may be a short read or write returned.
 * - The corresponding POSIX functon returns 0 bytes read when the peer closed
 *   the socket; these return -2.
 * - If zero bytes were read/written in non-blocking mode, this will return 0.  This differs
 *   from POSIX.
 * - Providing a zero-sized argument to this function will cause the program to abort().
 *
 * Returns < 0 on failure.  -1 = general error, -2 = peer closed socket
 */
int
condor_read( char const *peer_description, SOCKET fd, char *buf, int sz, int timeout, int flags, bool non_blocking )
{
	Selector selector;
	int nr = 0, nro;
	unsigned int start_time=0, cur_time=0;
	char sinbuf[SINFUL_STRING_BUF_SIZE];

	if( IsDebugLevel(D_NETWORK) ) {
		dprintf(D_NETWORK,
				"condor_read(fd=%d %s,,size=%d,timeout=%d,flags=%d,non_blocking=%d)\n",
				fd,
				not_null_peer_description(peer_description,fd,sinbuf),
				sz,
				timeout,
				flags,
				non_blocking);
	}

	/* PRE Conditions. */
	ASSERT(fd >= 0);     /* Need valid file descriptor */
	ASSERT(buf != NULL); /* Need real memory to put data into */
	ASSERT(sz > 0);      /* Need legit size on buffer */

	if (non_blocking) {
#ifdef WIN32
		unsigned long mode = 1; // nonblocking mode
		if (ioctlsocket(fd, FIONBIO, &mode) < 0)
			return -1;
#else
		int fcntl_flags;
		if ( (fcntl_flags=fcntl(fd, F_GETFL)) < 0 )
			return -1;
			// set nonblocking mode
		if ( ((fcntl_flags & O_NONBLOCK) == 0) && fcntl(fd, F_SETFL, fcntl_flags | O_NONBLOCK) == -1 )
			return -1;
#endif
		nr = -2;
		while (nr == -2 || (nr == -1 && errno == EINTR)) {
			nr = recv(fd, buf, sz, flags);
		}

		if ( nr <= 0 ) {
			int the_error;
			char const *the_errorstr;
#ifdef WIN32
			the_error = WSAGetLastError();
			the_errorstr = "";
#else
			the_error = errno;
			the_errorstr = strerror(the_error);
#endif
			if ( nr == 0 && !(flags & MSG_PEEK)) {
				nr = -2;
				dprintf( D_FULLDEBUG, "condor_read(): "
					"Socket closed when trying to read %d bytes from %s in non-blocking mode\n",
					sz,
					not_null_peer_description(peer_description,fd,sinbuf) );
			} else if ( !errno_is_temporary(the_error) ) {
				dprintf( D_ALWAYS, "condor_read() failed: recv() %d bytes from %s "
					"returned %d, "     
					"timeout=%d, errno=%d %s.\n",
					sz,                 
					not_null_peer_description(peer_description,fd,sinbuf),
					nr, timeout, the_error, the_errorstr );
			}
			else
			{
				nr = 0;
			}
		}

#ifdef WIN32
		mode = 0; // reset blocking mode
		if (ioctlsocket(fd, FIONBIO, &mode) < 0)
			return -1;
#else
			// reset flags to prior value
		if ( ((fcntl_flags & O_NONBLOCK) == 0) && (fcntl(fd, F_SETFL, fcntl_flags) == -1) )
			return -1;
#endif
		return nr;
	}

	selector.add_fd( fd, Selector::IO_READ );
	
	if ( timeout > 0 ) {
		start_time = time(NULL);
		cur_time = start_time;
	}

	while( nr < sz ) {

		if( timeout > 0 ) {

			if( cur_time == 0 ) {
				cur_time = time(NULL);
			}

			// If it hasn't yet been longer then we said we would wait...
			if( start_time + timeout > cur_time ) {
				selector.set_timeout( (start_time + timeout) - cur_time );
			} else {
				dprintf( D_ALWAYS, 
						 "condor_read(): timeout reading %d bytes from %s.\n",
						 sz,
						 not_null_peer_description(peer_description,fd,sinbuf) );
				return -1;
			}
			
			cur_time = 0;

			if( IsDebugVerbose( D_NETWORK ) ) {
				dprintf(D_NETWORK, "condor_read(): fd=%d\n", fd);
			}
			selector.execute();
			if( IsDebugVerbose( D_NETWORK ) ) {
				dprintf(D_NETWORK, "condor_read(): select returned %d\n", 
						selector.select_retval());
			}

			if ( selector.timed_out() ) {
				dprintf( D_ALWAYS, 
						 "condor_read(): timeout reading %d bytes from %s.\n",
						 sz,
						 not_null_peer_description(peer_description,fd,sinbuf) );
				return -1;

			} else if ( selector.signalled() ) {
				continue;
			} else if ( !selector.has_ready() ) {
				int the_error;
                char const *the_errorstr;
#ifdef WIN32
				the_error = WSAGetLastError();
                the_errorstr = "";
#else
				the_error = errno;
                the_errorstr = strerror(the_error);
#endif

				dprintf( D_ALWAYS, "condor_read() failed: select() "
						 "returns %d, reading %d bytes from %s (errno=%d %s).\n",
						 selector.select_retval(),
						 sz,
						 not_null_peer_description(peer_description,fd,sinbuf),
						 the_error, the_errorstr );
				return -1;
			}
		}
		
		start_thread_safe("recv");

		nro = recv(fd, &buf[nr], sz - nr, flags);
		// Save the error value before stop_thread_safe(), as that may
		// overwrite it.
		int the_error;
#ifdef WIN32
		the_error = WSAGetLastError();
#else
		the_error = errno;
#endif

		stop_thread_safe("recv");

		if( nro <= 0 ) {

				// If timeout > 0, and we made it here, then
				// we know we were woken by select().  Now, if
				// select() wakes up on a read fd, and then recv() 
				// subsequently returns 0, that means that the
				// socket has been closed by our peer.
				// If timeout == 0, then recv() should have 
				// blocked until 1 or more bytes arrived.
				// Thus no matter what, if nro==0, then the
				// socket must be closed.
			if ( nro == 0 ) {
				dprintf( D_FULLDEBUG, "condor_read(): "
						 "Socket closed when trying to read %d bytes from %s\n",
						 sz,
						 not_null_peer_description(peer_description,fd,sinbuf) );
				return -2;
			}

            char const *the_errorstr;
#ifdef WIN32
            the_errorstr = "";
#else
            the_errorstr = strerror(the_error);
#endif
			if ( errno_is_temporary(the_error) ) {
				dprintf( D_FULLDEBUG, "condor_read(): "
				         "recv() returned temporary error %d %s,"
				         "still trying to read from %s\n",
				         the_error,the_errorstr,
				         not_null_peer_description(peer_description,fd,sinbuf) );
				continue;
			}

			dprintf( D_ALWAYS, "condor_read() failed: recv(fd=%d) returned %d, "
					 "errno = %d %s, reading %d bytes from %s.\n",
					 fd, nro, the_error, the_errorstr, sz,
					 not_null_peer_description(peer_description,fd,sinbuf) );

			if( the_error == ETIMEDOUT ) {
				if( timeout <= 0 ) {
					dprintf( D_ALWAYS,
							 "condor_read(): read timeout during blocking read from %s\n",
							 not_null_peer_description(peer_description,fd,sinbuf));
				}
				else {
					int lapse = (int)(time(NULL)-start_time);
					dprintf( D_ALWAYS,
							 "condor_read(): UNEXPECTED read timeout after %ds during non-blocking read from %s (desired timeout=%ds)\n",
							 lapse,
							 not_null_peer_description(peer_description,fd,sinbuf),
							 timeout);
				}
			}
			return -1;
		}
		nr += nro;
	}
	
/* Post Conditions */
	ASSERT( nr == sz );  // we should have read *ALL* the data
	return nr;
}
예제 #9
0
int
condor_write( char const *peer_description, SOCKET fd, const char *buf, int sz, int timeout, int flags, bool non_blocking )
{
	int nw = 0, nwo = 0;
	unsigned int start_time = 0, cur_time = 0;
	char tmpbuf[1];
	int nro;
	bool select_for_read = true;
	bool needs_select = true;
	char sinbuf[SINFUL_STRING_BUF_SIZE];

	if( IsDebugLevel( D_NETWORK ) ) {
		dprintf(D_NETWORK,
				"condor_write(fd=%d %s,,size=%d,timeout=%d,flags=%d,non_blocking=%d)\n",
				fd,
				not_null_peer_description(peer_description,fd,sinbuf),
				sz,
				timeout,
				flags,
				non_blocking);
	}

	/* Pre-conditions. */
	ASSERT(sz > 0);      /* Can't write buffers that are have no data */
	ASSERT(fd >= 0);     /* Need valid file descriptor */
	ASSERT(buf != NULL); /* Need valid buffer to write */

	if (non_blocking) {
#ifdef WIN32
		unsigned long mode = 1; // nonblocking mode
		if (ioctlsocket(fd, FIONBIO, &mode) < 0)
			return -1;
#else
		int fcntl_flags;
		if ( (fcntl_flags=fcntl(fd, F_GETFL)) < 0 )
			return -1;
			// set nonblocking mode
		if ( ((fcntl_flags & O_NONBLOCK) == 0) && fcntl(fd, F_SETFL, fcntl_flags | O_NONBLOCK) == -1 )
			return -1;
#endif
		nw = -2;
		while (nw == -2 || (nw == -1 && errno == EINTR)) {
			nw = send(fd, buf, sz, flags);
		}

		if ( nw <= 0 ) {
			int the_error;
			char const *the_errorstr;
#ifdef WIN32
			the_error = WSAGetLastError();
			the_errorstr = "";
#else
			the_error = errno;
			the_errorstr = strerror(the_error);
#endif
			if ( !errno_is_temporary(the_error) ) {
				dprintf( D_ALWAYS, "condor_write() failed: send() %d bytes to %s "
					"returned %d, "     
					"timeout=%d, errno=%d %s.\n",
					sz,                 
					not_null_peer_description(peer_description,fd,sinbuf),
					nw, timeout, the_error, the_errorstr );
			}
			else
			{
				nw = 0;
			}
		}
		if (nw < 0) {
			dprintf(D_NETWORK, "condor_write (non-blocking) wrote %d bytes.\n", nw);
		}       

#ifdef WIN32
		mode = 0; // reset blocking mode
		if (ioctlsocket(fd, FIONBIO, &mode) < 0)
			return -1;
#else
			// reset flags to prior value
		if ( ((fcntl_flags & O_NONBLOCK) == 0) && fcntl(fd, F_SETFL, fcntl_flags) == -1 )
			return -1;
#endif
		return nw;
	}

	Selector selector;
	selector.add_fd( fd, Selector::IO_READ );
	selector.add_fd( fd, Selector::IO_WRITE );
	selector.add_fd( fd, Selector::IO_EXCEPT );
	
	if(timeout > 0) {
		start_time = time(NULL);
		cur_time = start_time;
	}

	while( nw < sz ) {

		needs_select = true;

		if( timeout > 0 ) {
			while( needs_select ) {
				if( cur_time == 0 ) {
					cur_time = time(NULL);
				}

				if( start_time + timeout > cur_time ) {
					selector.set_timeout( (start_time + timeout) - cur_time );
				} else {
					dprintf( D_ALWAYS, "condor_write(): "
							 "timed out writing %d bytes to %s\n",
							 sz,
							 not_null_peer_description(peer_description,fd,sinbuf) );
					return -1;
				}
			
				cur_time = 0;

					// The write and except sets are added at the top of
					// this function, since we always want to select on
					// them.
				if( select_for_read ) {
						// Also, put it in the read fds, so we'll wake
						// up if the socket is closed
					selector.add_fd( fd, Selector::IO_READ );
				} else {
					selector.delete_fd( fd, Selector::IO_READ );
				}
				selector.execute();

					// unless we decide we need to select() again, we
					// want to break out of our while() loop now that
					// we've actually performed a select()
				needs_select = false;

				if ( selector.timed_out() ) {
					dprintf( D_ALWAYS, "condor_write(): "
							 "timed out writing %d bytes to %s\n",
							 sz,
							 not_null_peer_description(peer_description,fd,sinbuf) );
					return -1;
				
				} else if ( selector.signalled() ) {
					needs_select = true;
					continue;
				} else if ( selector.has_ready() ) {
					if ( selector.fd_ready( fd, Selector::IO_READ ) ) {
						dprintf(D_NETWORK, "condor_write(): socket %d is readable\n", fd);
							// see if the socket was closed
						nro = recv(fd, tmpbuf, 1, MSG_PEEK);
						if( nro == -1 ) {
							int the_error;
                            char const *the_errorstr;
#ifdef WIN32
							the_error = WSAGetLastError();
                            the_errorstr = "";
#else
							the_error = errno;
                            the_errorstr = strerror(the_error);
#endif
							if(errno_is_temporary( the_error )) {
								continue;
							}

							dprintf( D_ALWAYS, "condor_write(): "
									 "Socket closed when trying "
									 "to write %d bytes to %s, fd is %d, "
									 "errno=%d %s\n",
									 sz,
									 not_null_peer_description(peer_description,fd,sinbuf),
									 fd, the_error, the_errorstr );
							return -1;
						}

						if( ! nro ) {
							dprintf( D_ALWAYS, "condor_write(): "
									 "Socket closed when trying "
									 "to write %d bytes to %s, fd is %d\n",
									 sz,
									 not_null_peer_description(peer_description,fd,sinbuf),
									 fd );
							return -1;
						}

							/*
							  otherwise, there's real data to consume
							  on the read side, and we don't want to
							  put our fd in the readfds anymore or
							  select() will never block.  also, we
							  need to re-do the select()
							*/
						needs_select = true;
						select_for_read = false;
					}
				} else {
					dprintf( D_ALWAYS, "condor_write() failed: select() "
							 "returns %d, "
							 "writing %d bytes to %s.\n",
							 selector.select_retval(),
							 sz,
							 not_null_peer_description(peer_description,fd,sinbuf) );
					return -1;
				}
			}
		}
		start_thread_safe("send");

		nwo = send(fd, &buf[nw], sz - nw, flags);
		// Save the error value before stop_thread_safe(), as that may
		// overwrite it.
		int the_error;
#ifdef WIN32
		the_error = WSAGetLastError();
#else
		the_error = errno;
#endif

		stop_thread_safe("send");		

		if( nwo <= 0 ) {
            char const *the_errorstr;
#ifdef WIN32
            the_errorstr = "";
#else
            the_errorstr = strerror(the_error);
#endif
			if ( errno_is_temporary(the_error) ) {
				dprintf( D_FULLDEBUG, "condor_write(): "
				         "send() returned temporary error %d %s,"
						 "still trying to write %d bytes to %s\n", the_error,
						 the_errorstr, sz,
						 not_null_peer_description(peer_description,fd,sinbuf) );
				continue;
			}

			dprintf( D_ALWAYS, "condor_write() failed: send() %d bytes to %s "
					 "returned %d, "
					 "timeout=%d, errno=%d %s.\n",
					 sz,
					 not_null_peer_description(peer_description,fd,sinbuf),
					 nwo, timeout, the_error, the_errorstr );

			return -1;
		}
		
		nw += nwo;
	}

	/* POST conditions. */
	ASSERT( nw == sz ); /* Make sure that we wrote everything */
	return nw;
}
예제 #10
0
bool
DCTransferQueue::PollForTransferQueueSlot(int timeout,bool &pending,MyString &error_desc)
{
	if( GoAheadAlways( m_xfer_downloading ) ) {
		return true;
	}
	CheckTransferQueueSlot();

	if( !m_xfer_queue_pending ) {
		// status of request is known
		pending = false;
		if( !m_xfer_queue_go_ahead ) {
			error_desc = m_xfer_rejected_reason;
		}
		return m_xfer_queue_go_ahead;
	}

	Selector selector;
	selector.add_fd( m_xfer_queue_sock->get_file_desc(), Selector::IO_READ );
	time_t start = time(NULL);
	do {
		int t = timeout - (time(NULL) - start);
		selector.set_timeout( t >= 0 ? t : 0 );
		selector.execute();
	} while( selector.signalled() );

	if( selector.timed_out() ) {
			// It is expected that we may time out while waiting for a
			// response.  The caller should keep calling this function
			// periodically until we get a result.
		pending = true;
		return false;
	}

	m_xfer_queue_sock->decode();
	ClassAd msg;
	if( !msg.initFromStream(*m_xfer_queue_sock) ||
		!m_xfer_queue_sock->end_of_message() )
	{
		formatstr(m_xfer_rejected_reason,
			"Failed to receive transfer queue response from %s for job %s "
			"(initial file %s).",
			m_xfer_queue_sock->peer_description(),
			m_xfer_jobid.c_str(),
			m_xfer_fname.c_str());
		goto request_failed;
	}

	int result; // this should be one of the values in XFER_QUEUE_ENUM
	if( !msg.LookupInteger(ATTR_RESULT,result) ) {
		std::string msg_str;
		msg.sPrint(msg_str);
		formatstr(m_xfer_rejected_reason,
			"Invalid transfer queue response from %s for job %s (%s): %s",
			m_xfer_queue_sock->peer_description(),
			m_xfer_jobid.c_str(),
			m_xfer_fname.c_str(),
			msg_str.c_str());
		goto request_failed;
	}

	if( result == XFER_QUEUE_GO_AHEAD ) {
		m_xfer_queue_go_ahead = true;
	}
	else {
		m_xfer_queue_go_ahead = false;
		std::string reason;
		msg.LookupString(ATTR_ERROR_STRING,reason);
		formatstr(m_xfer_rejected_reason,
			"Request to transfer files for %s (%s) was rejected by %s: %s",
			m_xfer_jobid.c_str(), m_xfer_fname.c_str(),
			m_xfer_queue_sock->peer_description(),
			reason.c_str());

		goto request_failed;
	}

	m_xfer_queue_pending = false;
	pending = m_xfer_queue_pending;
	return true;

 request_failed:
	error_desc = m_xfer_rejected_reason;
	dprintf(D_ALWAYS, "%s\n", m_xfer_rejected_reason.c_str());
	m_xfer_queue_pending = false;
	m_xfer_queue_go_ahead = false;
	pending = m_xfer_queue_pending;
	return false;
}