void CpuAttributes::display( amask_t how_much ) { if( IS_UPDATE(how_much) ) { dprintf( D_KEYBOARD, "Idle time: %s %-8d %s %d\n", "Keyboard:", (int)c_idle, "Console:", (int)c_console_idle ); dprintf( D_LOAD, "%s %.2f %s %.2f %s %.2f\n", "SystemLoad:", c_condor_load + c_owner_load, "CondorLoad:", c_condor_load, "OwnerLoad:", c_owner_load ); } else { if( IsDebugLevel( D_LOAD ) ) { dprintf( D_FULLDEBUG, "%s %.2f %s %.2f %s %.2f\n", "SystemLoad:", c_condor_load + c_owner_load, "CondorLoad:", c_condor_load, "OwnerLoad:", c_owner_load ); } if( IsDebugLevel( D_KEYBOARD ) ) { dprintf( D_FULLDEBUG, "Idle time: %s %-8d %s %d\n", "Keyboard:", (int)c_idle, "Console:", (int)c_console_idle ); } } }
void JobInfoCommunicator::checkForStarterDebugging( void ) { if( ! job_ad ) { EXCEPT( "checkForStarterDebugging() called with no job ad!" ); } // For debugging, see if there's a special attribute in the // job ad that sends us into an infinite loop, waiting for // someone to attach with a debugger int starter_should_wait = 0; job_ad->LookupInteger( ATTR_STARTER_WAIT_FOR_DEBUG, starter_should_wait ); if( starter_should_wait ) { dprintf( D_ALWAYS, "Job requested starter should wait for " "debugger with %s=%d, going into infinite loop\n", ATTR_STARTER_WAIT_FOR_DEBUG, starter_should_wait ); while( 1 ) { if ( !starter_should_wait ) { break; } } } // Also, if the starter has D_JOB turned on, we want to dump // out the job ad to the log file... if( IsDebugLevel( D_JOB ) ) { dprintf( D_JOB, "*** Job ClassAd ***\n" ); job_ad->dPrint( D_JOB ); dprintf( D_JOB, "--- End of ClassAd ---\n" ); } }
void Selector::delete_fd( int fd, IO_FUNC interest ) { #if !defined(WIN32) if ( fd < 0 || fd >= fd_select_size() ) { EXCEPT( "Selector::delete_fd(): fd %d outside valid range 0-%d", fd, _fd_select_size-1 ); } #endif m_single_shot = SINGLE_SHOT_SKIP; if (IsDebugLevel(D_DAEMONCORE)) { dprintf(D_DAEMONCORE | D_VERBOSE, "selector %p deleting fd %d\n", this, fd); } switch( interest ) { case IO_READ: FD_CLR( fd, save_read_fds ); break; case IO_WRITE: FD_CLR( fd, save_write_fds ); break; case IO_EXCEPT: FD_CLR( fd, save_except_fds ); break; } }
void Selector::reset() { _select_retval = -2; _select_errno = 0; state = VIRGIN; timeout_wanted = false; timeout.tv_sec = timeout.tv_usec = 0; max_fd = -1; #if defined(WIN32) FD_ZERO( save_read_fds ); FD_ZERO( save_write_fds ); FD_ZERO( save_except_fds ); #else memset( save_read_fds, 0, fd_set_size * sizeof(fd_set) ); memset( save_write_fds, 0, fd_set_size * sizeof(fd_set) ); memset( save_except_fds, 0, fd_set_size * sizeof(fd_set) ); #endif #ifdef SELECTOR_USE_POLL m_single_shot = SINGLE_SHOT_VIRGIN; #else m_single_shot = SINGLE_SHOT_SKIP; #endif memset(&m_poll, '\0', sizeof(m_poll)); if (IsDebugLevel(D_DAEMONCORE)) { dprintf(D_DAEMONCORE | D_VERBOSE, "selector %p resetting\n", this); } }
void Selector::add_fd( int fd, IO_FUNC interest ) { // update max_fd (the highest valid index in fd_set's array) and also // make sure we're not overflowing our fd_set // On Windows, we have to check the individual fd_set to see if it's // full. if( fd > max_fd ) { max_fd = fd; } #if !defined(WIN32) if ( fd < 0 || fd >= fd_select_size() ) { EXCEPT( "Selector::add_fd(): fd %d outside valid range 0-%d", fd, _fd_select_size-1 ); } #endif if(IsDebugLevel(D_DAEMONCORE)) { char *fd_description = describe_fd(fd); dprintf(D_FULLDEBUG, "selector %p adding fd %d (%s)\n", this, fd, fd_description); free(fd_description); } switch( interest ) { case IO_READ: #if defined(WIN32) if ( save_read_fds->fd_count >= fd_select_size() ) { EXCEPT( "Selector::add_fd(): read fd_set is full" ); } #endif FD_SET( fd, save_read_fds ); break; case IO_WRITE: #if defined(WIN32) if ( save_write_fds->fd_count >= fd_select_size() ) { EXCEPT( "Selector::add_fd(): write fd_set is full" ); } #endif FD_SET( fd, save_write_fds ); break; case IO_EXCEPT: #if defined(WIN32) if ( save_except_fds->fd_count >= fd_select_size() ) { EXCEPT( "Selector::add_fd(): except fd_set is full" ); } #endif FD_SET( fd, save_except_fds ); break; } }
bool DCStartd::_suspendClaim( ) { setCmdStr( "suspendClaim" ); if( ! checkClaimId() ) { return false; } if( ! checkAddr() ) { return false; } // if this claim is associated with a security session ClaimIdParser cidp(claim_id); char const *sec_session = cidp.secSessionId(); if (IsDebugLevel(D_COMMAND)) { int cmd = SUSPEND_CLAIM; dprintf (D_COMMAND, "DCStartd::_suspendClaim(%s,...) making connection to %s\n", getCommandStringSafe(cmd), _addr ? _addr : "NULL"); } bool result; ReliSock reli_sock; reli_sock.timeout(20); // years of research... :) if( ! reli_sock.connect(_addr) ) { std::string err = "DCStartd::_suspendClaim: "; err += "Failed to connect to startd ("; err += _addr ? _addr : "NULL"; err += ')'; newError( CA_CONNECT_FAILED, err.c_str() ); return false; } int cmd = SUSPEND_CLAIM; result = startCommand( cmd, (Sock*)&reli_sock, 20, NULL, NULL, false, sec_session ); if( ! result ) { newError( CA_COMMUNICATION_ERROR, "DCStartd::_suspendClaim: Failed to send command " ); return false; } // Now, send the ClaimId if( ! reli_sock.put_secret(claim_id) ) { newError( CA_COMMUNICATION_ERROR, "DCStartd::_suspendClaim: Failed to send ClaimId to the startd" ); return false; } if( ! reli_sock.end_of_message() ) { newError( CA_COMMUNICATION_ERROR, "DCStartd::_suspendClaim: Failed to send EOM to the startd" ); return false; } return true; }
bool DCStarter::createJobOwnerSecSession(int timeout,char const *job_claim_id,char const *starter_sec_session,char const *session_info,MyString &owner_claim_id,MyString &error_msg,MyString &starter_version,MyString &starter_addr) { ReliSock sock; if (IsDebugLevel(D_COMMAND)) { dprintf (D_COMMAND, "DCStarter::createJobOwnerSecSession(%s,...) making connection to %s\n", getCommandStringSafe(CREATE_JOB_OWNER_SEC_SESSION), _addr ? _addr : "NULL"); } if( !connectSock(&sock, timeout, NULL) ) { error_msg = "Failed to connect to starter"; return false; } if( !startCommand(CREATE_JOB_OWNER_SEC_SESSION, &sock,timeout,NULL,NULL,false,starter_sec_session) ) { error_msg = "Failed to send CREATE_JOB_OWNER_SEC_SESSION to starter"; return false; } ClassAd input; input.Assign(ATTR_CLAIM_ID,job_claim_id); input.Assign(ATTR_SESSION_INFO,session_info); sock.encode(); if( !putClassAd(&sock, input) || !sock.end_of_message() ) { error_msg = "Failed to compose CREATE_JOB_OWNER_SEC_SESSION to starter"; return false; } sock.decode(); ClassAd reply; if( !getClassAd(&sock, reply) || !sock.end_of_message() ) { error_msg = "Failed to get response to CREATE_JOB_OWNER_SEC_SESSION from starter"; return false; } bool success = false; reply.LookupBool(ATTR_RESULT,success); if( !success ) { reply.LookupString(ATTR_ERROR_STRING,error_msg); return false; } reply.LookupString(ATTR_CLAIM_ID,owner_claim_id); reply.LookupString(ATTR_VERSION,starter_version); // get the full starter address from the starter in case it contains // extra CCB info that we don't already know about reply.LookupString(ATTR_STARTER_IP_ADDR,starter_addr); return true; }
bool DCStartd::checkpointJob( const char* name_ckpt ) { dprintf( D_FULLDEBUG, "Entering DCStartd::checkpointJob(%s)\n", name_ckpt ); setCmdStr( "checkpointJob" ); if (IsDebugLevel(D_COMMAND)) { int cmd = PCKPT_JOB; dprintf (D_COMMAND, "DCStartd::checkpointJob(%s,...) making connection to %s\n", getCommandStringSafe(cmd), _addr ? _addr : "NULL"); } bool result; ReliSock reli_sock; reli_sock.timeout(20); // years of research... :) if( ! reli_sock.connect(_addr) ) { std::string err = "DCStartd::checkpointJob: "; err += "Failed to connect to startd ("; err += _addr ? _addr : "NULL"; err += ')'; newError( CA_CONNECT_FAILED, err.c_str() ); return false; } int cmd = PCKPT_JOB; result = startCommand( cmd, (Sock*)&reli_sock ); if( ! result ) { newError( CA_COMMUNICATION_ERROR, "DCStartd::checkpointJob: Failed to send command PCKPT_JOB to the startd" ); return false; } // Now, send the name if( ! reli_sock.put(name_ckpt) ) { newError( CA_COMMUNICATION_ERROR, "DCStartd::checkpointJob: Failed to send Name to the startd" ); return false; } if( ! reli_sock.end_of_message() ) { newError( CA_COMMUNICATION_ERROR, "DCStartd::checkpointJob: Failed to send EOM to the startd" ); return false; } // we're done dprintf( D_FULLDEBUG, "DCStartd::checkpointJob: " "successfully sent command\n" ); return true; }
bool DCStartd::vacateClaim( const char* name_vacate ) { setCmdStr( "vacateClaim" ); if (IsDebugLevel(D_COMMAND)) { int cmd = VACATE_CLAIM; dprintf (D_COMMAND, "DCStartd::vacateClaim(%s,...) making connection to %s\n", getCommandStringSafe(cmd), _addr ? _addr : "NULL"); } bool result; ReliSock reli_sock; reli_sock.timeout(20); // years of research... :) if( ! reli_sock.connect(_addr) ) { std::string err = "DCStartd::vacateClaim: "; err += "Failed to connect to startd ("; err += _addr ? _addr : "NULL"; err += ')'; newError( CA_CONNECT_FAILED, err.c_str() ); return false; } int cmd = VACATE_CLAIM; result = startCommand( cmd, (Sock*)&reli_sock ); if( ! result ) { newError( CA_COMMUNICATION_ERROR, "DCStartd::vacateClaim: Failed to send command PCKPT_JOB to the startd" ); return false; } if( ! reli_sock.put(name_vacate) ) { newError( CA_COMMUNICATION_ERROR, "DCStartd::vacateClaim: Failed to send Name to the startd" ); return false; } if( ! reli_sock.end_of_message() ) { newError( CA_COMMUNICATION_ERROR, "DCStartd::vacateClaim: Failed to send EOM to the startd" ); return false; } return true; }
int IpVerify::add_hash_entry(const struct in6_addr & sin6_addr, const char * user, perm_mask_t new_mask) { UserPerm_t * perm = NULL; perm_mask_t old_mask = 0; // must init old_mask to zero!!! MyString user_key = user; // assert(PermHashTable); if ( PermHashTable->lookup(sin6_addr, perm) != -1 ) { // found an existing entry. if (has_user(perm, user, old_mask)) { // remove it because we are going to edit the mask below // and re-insert it. perm->remove(user_key); } } else { perm = new UserPerm_t(42, compute_host_hash); if (PermHashTable->insert(sin6_addr, perm) != 0) { delete perm; return FALSE; } } perm->insert(user_key, old_mask | new_mask); if( IsFulldebug(D_FULLDEBUG) || IsDebugLevel(D_SECURITY) ) { MyString auth_str; AuthEntryToString(sin6_addr,user,new_mask, auth_str); dprintf(D_FULLDEBUG|D_SECURITY, "Adding to resolved authorization table: %s\n", auth_str.Value()); } return TRUE; }
void Selector::reset() { _select_retval = -2; _select_errno = 0; state = VIRGIN; timeout_wanted = FALSE; timeout.tv_sec = timeout.tv_usec = 0; max_fd = -1; #if defined(WIN32) FD_ZERO( save_read_fds ); FD_ZERO( save_write_fds ); FD_ZERO( save_except_fds ); #else memset( save_read_fds, 0, fd_set_size * sizeof(fd_set) ); memset( save_write_fds, 0, fd_set_size * sizeof(fd_set) ); memset( save_except_fds, 0, fd_set_size * sizeof(fd_set) ); #endif if (IsDebugLevel(D_DAEMONCORE)) { dprintf(D_FULLDEBUG, "selector %p resetting\n", this); } }
// process ads from the collector, handing each to the callback // callback will return 'false' if it took ownership of the ad. QueryResult CondorQuery:: processAds (bool (*callback)(void*, ClassAd *), void* pv, const char * poolName, CondorError* errstack /*= NULL*/) { Sock* sock; QueryResult result; ClassAd queryAd(extraAttrs); if ( !poolName ) { return Q_NO_COLLECTOR_HOST; } // contact collector Daemon my_collector( DT_COLLECTOR, poolName, NULL ); if( !my_collector.locate() ) { // We were passed a bogus poolName, abort gracefully return Q_NO_COLLECTOR_HOST; } // make the query ad result = getQueryAd (queryAd); if (result != Q_OK) return result; if (IsDebugLevel(D_HOSTNAME)) { dprintf( D_HOSTNAME, "Querying collector %s (%s) with classad:\n", my_collector.addr(), my_collector.fullHostname() ); dPrintAd( D_HOSTNAME, queryAd ); dprintf( D_HOSTNAME, " --- End of Query ClassAd ---\n" ); } int mytimeout = param_integer ("QUERY_TIMEOUT",60); if (!(sock = my_collector.startCommand(command, Stream::reli_sock, mytimeout, errstack)) || !putClassAd (sock, queryAd) || !sock->end_of_message()) { if (sock) { delete sock; } return Q_COMMUNICATION_ERROR; } // get result sock->decode (); int more = 1; while (more) { if (!sock->code (more)) { sock->end_of_message(); delete sock; return Q_COMMUNICATION_ERROR; } if (more) { ClassAd * ad = new ClassAd; if( !getClassAd(sock, *ad) ) { sock->end_of_message(); delete ad; delete sock; return Q_COMMUNICATION_ERROR; } if (callback(pv, ad)) { delete ad; } } } sock->end_of_message(); // finalize sock->close(); delete sock; return (Q_OK); }
bool DCStartd::deactivateClaim( bool graceful, bool *claim_is_closing ) { dprintf( D_FULLDEBUG, "Entering DCStartd::deactivateClaim(%s)\n", graceful ? "graceful" : "forceful" ); if( claim_is_closing ) { *claim_is_closing = false; } setCmdStr( "deactivateClaim" ); if( ! checkClaimId() ) { return false; } if( ! checkAddr() ) { return false; } // if this claim is associated with a security session ClaimIdParser cidp(claim_id); char const *sec_session = cidp.secSessionId(); if (IsDebugLevel(D_COMMAND)) { int cmd = graceful ? DEACTIVATE_CLAIM : DEACTIVATE_CLAIM_FORCIBLY; dprintf (D_COMMAND, "DCStartd::deactivateClaim(%s,...) making connection to %s\n", getCommandStringSafe(cmd), _addr ? _addr : "NULL"); } bool result; ReliSock reli_sock; reli_sock.timeout(20); // years of research... :) if( ! reli_sock.connect(_addr) ) { std::string err = "DCStartd::deactivateClaim: "; err += "Failed to connect to startd ("; err += _addr ? _addr : "NULL"; err += ')'; newError( CA_CONNECT_FAILED, err.c_str() ); return false; } int cmd; if( graceful ) { cmd = DEACTIVATE_CLAIM; } else { cmd = DEACTIVATE_CLAIM_FORCIBLY; } result = startCommand( cmd, (Sock*)&reli_sock, 20, NULL, NULL, false, sec_session ); if( ! result ) { std::string err = "DCStartd::deactivateClaim: "; err += "Failed to send command "; if( graceful ) { err += "DEACTIVATE_CLAIM"; } else { err += "DEACTIVATE_CLAIM_FORCIBLY"; } err += " to the startd"; newError( CA_COMMUNICATION_ERROR, err.c_str() ); return false; } // Now, send the ClaimId if( ! reli_sock.put_secret(claim_id) ) { newError( CA_COMMUNICATION_ERROR, "DCStartd::deactivateClaim: Failed to send ClaimId to the startd" ); return false; } if( ! reli_sock.end_of_message() ) { newError( CA_COMMUNICATION_ERROR, "DCStartd::deactivateClaim: Failed to send EOM to the startd" ); return false; } reli_sock.decode(); ClassAd response_ad; if( !getClassAd(&reli_sock, response_ad) || !reli_sock.end_of_message() ) { dprintf( D_FULLDEBUG, "DCStartd::deactivateClaim: failed to read response ad.\n"); // The response ad is not critical and is expected to be missing // if the startd is from before 7.0.5. } else { bool start = true; response_ad.LookupBool(ATTR_START,start); if( claim_is_closing ) { *claim_is_closing = !start; } } // we're done dprintf( D_FULLDEBUG, "DCStartd::deactivateClaim: " "successfully sent command\n" ); return true; }
// fetch all ads from the collector that satisfy the constraints QueryResult CondorQuery:: fetchAds (ClassAdList &adList, const char *poolName, CondorError* errstack) { Sock* sock; int more; QueryResult result; ClassAd queryAd(extraAttrs), *ad; if ( !poolName ) { return Q_NO_COLLECTOR_HOST; } // contact collector Daemon my_collector( DT_COLLECTOR, poolName, NULL ); if( !my_collector.locate() ) { // We were passed a bogus poolName, abort gracefully return Q_NO_COLLECTOR_HOST; } // make the query ad result = getQueryAd (queryAd); if (result != Q_OK) return result; if( IsDebugLevel( D_HOSTNAME ) ) { dprintf( D_HOSTNAME, "Querying collector %s (%s) with classad:\n", my_collector.addr(), my_collector.fullHostname() ); queryAd.dPrint( D_HOSTNAME ); dprintf( D_HOSTNAME, " --- End of Query ClassAd ---\n" ); } int mytimeout = param_integer ("QUERY_TIMEOUT",60); if (!(sock = my_collector.startCommand(command, Stream::reli_sock, mytimeout, errstack)) || !queryAd.put (*sock) || !sock->end_of_message()) { if (sock) { delete sock; } return Q_COMMUNICATION_ERROR; } // get result sock->decode (); more = 1; while (more) { if (!sock->code (more)) { sock->end_of_message(); delete sock; return Q_COMMUNICATION_ERROR; } if (more) { ad = new ClassAd; if( !ad->initFromStream(*sock) ) { sock->end_of_message(); delete ad; delete sock; return Q_COMMUNICATION_ERROR; } adList.Insert (ad); } } sock->end_of_message(); // finalize sock->close(); delete sock; return (Q_OK); }
bool DCTransferQueue::RequestTransferQueueSlot(bool downloading,filesize_t sandbox_size,char const *fname,char const *jobid,char const *queue_user,int timeout,MyString &error_desc) { ASSERT(fname); ASSERT(jobid); if( GoAheadAlways( downloading ) ) { m_xfer_downloading = downloading; m_xfer_fname = fname; m_xfer_jobid = jobid; return true; } CheckTransferQueueSlot(); if( m_xfer_queue_sock ) { // A request has already been made. // Currently, this is a no-op, because any upload/download slot // is as good as any other. In the future, there may be // different queues for different paths. ASSERT( m_xfer_downloading == downloading ); m_xfer_fname = fname; m_xfer_jobid = jobid; return true; } time_t started = time(NULL); CondorError errstack; // Our caller has to finish this operation in the specified // amount of time or risk not responding to the file transfer // peer in time, so ignore the timeout multiplier and set the // timeout exactly as specified. m_xfer_queue_sock = reliSock( timeout, 0, &errstack, false, true ); if( !m_xfer_queue_sock ) { formatstr(m_xfer_rejected_reason, "Failed to connect to transfer queue manager for job %s (%s): %s.", jobid, fname, errstack.getFullText().c_str() ); error_desc = m_xfer_rejected_reason; dprintf(D_ALWAYS,"%s\n",m_xfer_rejected_reason.c_str()); return false; } if( timeout ) { timeout -= time(NULL)-started; if( timeout <= 0 ) { timeout = 1; } } if (IsDebugLevel(D_COMMAND)) { int cmd = TRANSFER_QUEUE_REQUEST; dprintf (D_COMMAND, "DCTransferQueue::RequestTransferQueueSlot(%s,...) making connection to %s\n", getCommandStringSafe(cmd), _addr ? _addr : "NULL"); } bool connected = startCommand( TRANSFER_QUEUE_REQUEST, m_xfer_queue_sock, timeout, &errstack ); if( !connected ) { delete m_xfer_queue_sock; m_xfer_queue_sock = NULL; formatstr(m_xfer_rejected_reason, "Failed to initiate transfer queue request for job %s (%s): %s.", jobid, fname, errstack.getFullText().c_str() ); error_desc = m_xfer_rejected_reason; dprintf(D_ALWAYS,"%s\n",m_xfer_rejected_reason.c_str()); return false; } m_xfer_downloading = downloading; m_xfer_fname = fname; m_xfer_jobid = jobid; ClassAd msg; msg.Assign(ATTR_DOWNLOADING,downloading); msg.Assign(ATTR_FILE_NAME,fname); msg.Assign(ATTR_JOB_ID,jobid); msg.Assign(ATTR_USER,queue_user); msg.Assign(ATTR_SANDBOX_SIZE,sandbox_size); m_xfer_queue_sock->encode(); if( !putClassAd(m_xfer_queue_sock, msg) || !m_xfer_queue_sock->end_of_message() ) { formatstr(m_xfer_rejected_reason, "Failed to write transfer request to %s for job %s " "(initial file %s).", m_xfer_queue_sock->peer_description(), m_xfer_jobid.c_str(), m_xfer_fname.c_str()); error_desc = m_xfer_rejected_reason; dprintf(D_ALWAYS,"%s\n",m_xfer_rejected_reason.c_str()); return false; } m_xfer_queue_sock->decode(); // Request has been initiated. Now sender should call // PollForTransferQueueSlot() to get response. m_xfer_queue_pending = true; return true; }
void Selector::add_fd( int fd, IO_FUNC interest ) { // update max_fd (the highest valid index in fd_set's array) and also // make sure we're not overflowing our fd_set // On Windows, we have to check the individual fd_set to see if it's // full. if( fd > max_fd ) { max_fd = fd; } #if !defined(WIN32) if ( fd < 0 || fd >= fd_select_size() ) { EXCEPT( "Selector::add_fd(): fd %d outside valid range 0-%d", fd, _fd_select_size-1 ); } #endif if(IsDebugLevel(D_DAEMONCORE)) { char *fd_description = describe_fd(fd); dprintf(D_DAEMONCORE | D_VERBOSE, "selector %p adding fd %d (%s)\n", this, fd, fd_description); free(fd_description); } bool new_fd = false; if ((m_single_shot == SINGLE_SHOT_OK) && (m_poll.fd != fd)) { new_fd = true; } m_poll.fd = fd; switch( interest ) { case IO_READ: #if defined(WIN32) if ( save_read_fds->fd_count >= fd_select_size() ) { EXCEPT( "Selector::add_fd(): read fd_set is full" ); } #endif m_poll.events |= POLLIN; FD_SET( fd, save_read_fds ); break; case IO_WRITE: #if defined(WIN32) if ( save_write_fds->fd_count >= fd_select_size() ) { EXCEPT( "Selector::add_fd(): write fd_set is full" ); } #endif m_poll.events |= POLLOUT; FD_SET( fd, save_write_fds ); break; case IO_EXCEPT: #if defined(WIN32) if ( save_except_fds->fd_count >= fd_select_size() ) { EXCEPT( "Selector::add_fd(): except fd_set is full" ); } #endif m_poll.events |= POLLERR; FD_SET( fd, save_except_fds ); break; } if ((m_single_shot == SINGLE_SHOT_VIRGIN) || ((m_single_shot == SINGLE_SHOT_OK) && (new_fd == false))) { m_single_shot = SINGLE_SHOT_OK; } else { m_single_shot = SINGLE_SHOT_SKIP; } }
void DCMessenger::startCommand( classy_counted_ptr<DCMsg> msg ) { MyString error; msg->setMessenger( this ); if( msg->deliveryStatus() == DCMsg::DELIVERY_CANCELED ) { msg->callMessageSendFailed( this ); return; } time_t deadline = msg->getDeadline(); if( deadline && deadline < time(NULL) ) { msg->addError(CEDAR_ERR_DEADLINE_EXPIRED, "deadline for delivery of this message expired"); msg->callMessageSendFailed( this ); return; } // For a UDP message, we may need to register two sockets, one for // the SafeSock and another for a ReliSock to establish the // security session. Stream::stream_type st = msg->getStreamType(); if( daemonCore->TooManyRegisteredSockets(-1,&error,st==Stream::safe_sock?2:1) ) { // Try again in a sec // Eventually, it would be better to queue this centrally // (i.e. in DaemonCore) rather than having an independent // timer for each case. Then it would be possible to control // priority of different messages etc. dprintf(D_FULLDEBUG, "Delaying delivery of %s to %s, because %s\n", msg->name(),peerDescription(),error.Value()); startCommandAfterDelay( 1, msg ); return; } // Currently, there may be only one pending operation per messenger. ASSERT(!m_callback_msg.get()); ASSERT(!m_callback_sock); ASSERT(m_pending_operation == NOTHING_PENDING); m_pending_operation = START_COMMAND_PENDING; m_callback_msg = msg; m_callback_sock = m_sock.get(); if( !m_callback_sock ) { if (IsDebugLevel(D_COMMAND)) { const char * addr = m_daemon->addr(); const int cmd = msg->m_cmd; dprintf (D_COMMAND, "DCMessenger::startCommand(%s,...) making non-blocking connection to %s\n", getCommandStringSafe(cmd), addr ? addr : "NULL"); } const bool nonblocking = true; m_callback_sock = m_daemon->makeConnectedSocket(st,msg->getTimeout(),msg->getDeadline(),&msg->m_errstack,nonblocking); if( !m_callback_sock ) { msg->callMessageSendFailed( this ); return; } } incRefCount(); m_daemon->startCommand_nonblocking ( msg->m_cmd, m_callback_sock, msg->getTimeout(), &msg->m_errstack, &DCMessenger::connectCallback, this, msg->name(), msg->getRawProtocol(), msg->getSecSessionId()); }
bool DCStarter::peek(bool transfer_stdout, ssize_t &stdout_offset, bool transfer_stderr, ssize_t &stderr_offset, const std::vector<std::string> &filenames, std::vector<ssize_t> &offsets, size_t max_bytes, bool &retry_sensible, PeekGetFD &next, std::string &error_msg, unsigned timeout, const std::string &sec_session_id, DCTransferQueue *xfer_q) { compat_classad::ClassAd ad; ad.InsertAttr(ATTR_JOB_OUTPUT, transfer_stdout); ad.InsertAttr("OutOffset", stdout_offset); ad.InsertAttr(ATTR_JOB_ERROR, transfer_stderr); ad.InsertAttr("ErrOffset", stderr_offset); ad.InsertAttr(ATTR_VERSION, CondorVersion()); size_t total_files = 0; total_files += transfer_stdout ? 1 : 0; total_files += transfer_stderr ? 1 : 0; if (filenames.size()) { total_files += filenames.size(); std::vector<classad::ExprTree *> filelist; filelist.reserve(filenames.size()); std::vector<classad::ExprTree *> offsetlist; offsetlist.reserve(filenames.size()); std::vector<ssize_t>::const_iterator it2 = offsets.begin(); for (std::vector<std::string>::const_iterator it = filenames.begin(); it != filenames.end() && it2 != offsets.end(); it++, it2++) { classad::Value value; value.SetStringValue(*it); filelist.push_back(classad::Literal::MakeLiteral(value)); value.SetIntegerValue(*it2); offsetlist.push_back(classad::Literal::MakeLiteral(value)); } classad::ExprTree *list(classad::ExprList::MakeExprList(filelist)); ad.Insert("TransferFiles", list); list = classad::ExprList::MakeExprList(offsetlist); ad.Insert("TransferOffsets", list); } ad.InsertAttr(ATTR_MAX_TRANSFER_BYTES, static_cast<long long>(max_bytes)); ReliSock sock; if (IsDebugLevel(D_COMMAND)) { dprintf (D_COMMAND, "DCStarter::peek(%s,...) making connection to %s\n", getCommandStringSafe(STARTER_PEEK), _addr ? _addr : "NULL"); } if( !connectSock(&sock, timeout, NULL) ) { error_msg = "Failed to connect to starter"; return false; } if( !startCommand(STARTER_PEEK, &sock, timeout, NULL, NULL, false, sec_session_id.c_str()) ) { error_msg = "Failed to send START_PEEK to starter"; return false; } sock.encode(); if (!putClassAd(&sock, ad) || !sock.end_of_message()) { error_msg = "Failed to send request to starter"; return false; } compat_classad::ClassAd response; sock.decode(); if (!getClassAd(&sock, response) || !sock.end_of_message()) { error_msg = "Failed to read response for peeking at logs."; return false; } dPrintAd(D_FULLDEBUG, response); bool success = false; if (!response.EvaluateAttrBool(ATTR_RESULT, success) || !success) { response.EvaluateAttrBool(ATTR_RETRY, retry_sensible); error_msg = "Remote operation failed."; response.EvaluateAttrString(ATTR_ERROR_STRING, error_msg); return false; } classad::Value valueX; classad_shared_ptr<classad::ExprList> list; if (!response.EvaluateAttr("TransferFiles", valueX) || !valueX.IsSListValue(list)) { error_msg = "Unable to evaluate starter response"; return false; } classad_shared_ptr<classad::ExprList> offlist; if (!response.EvaluateAttr("TransferOffsets", valueX) || !valueX.IsSListValue(offlist)) { error_msg = "Unable to evaluate starter response (missing offsets)"; return false; } size_t remaining = max_bytes; size_t file_count = 0; classad::ExprList::const_iterator it2 = offlist->begin(); for (classad::ExprList::const_iterator it = list->begin(); it != list->end() && it2 != offlist->end(); it++, it2++) { classad::Value value; (*it2)->Evaluate(value); off_t off = -1; value.IsIntegerValue(off); (*it)->Evaluate(value); std::string filename; int64_t xfer_fd = -1; if (!value.IsStringValue(filename) && value.IsIntegerValue(xfer_fd)) { if (xfer_fd == 0) filename = "_condor_stdout"; if (xfer_fd == 1) filename = "_condor_stderr"; } int fd = next.getNextFD(filename); filesize_t size = -1; int retval; if ((retval = sock.get_file(&size, fd, false, false, remaining, xfer_q)) && (retval != GET_FILE_MAX_BYTES_EXCEEDED)) { error_msg = "Internal error when transferring file " + filename; } else if (size >= 0) { remaining -= max_bytes; file_count++; off += size; } else { error_msg = "Failed to transfer file " + filename; } if (xfer_fd == 0) { stdout_offset = off; //dprintf(D_FULLDEBUG, "New stdout offset: %ld\n", stdout_offset); } else if (xfer_fd == 1) { stderr_offset = off; } else { std::vector<ssize_t>::iterator it4 = offsets.begin(); for (std::vector<std::string>::const_iterator it3 = filenames.begin(); it3 != filenames.end() && it4 != offsets.end(); it3++, it4++) { if (*it3 == filename) *it4 = off; } } } size_t remote_file_count; if (!sock.get(remote_file_count) || !sock.end_of_message()) { error_msg = "Unable to get remote file count."; return false; } if (file_count != remote_file_count) { formatstr(error_msg, "Received %ld files, but remote side thought it sent %ld files\n", file_count, remote_file_count); return false; } if ((total_files != file_count) && !error_msg.size()) { error_msg = "At least one file transfer failed."; return false; } return true; }
bool DCStarter::startSSHD(char const *known_hosts_file,char const *private_client_key_file,char const *preferred_shells,char const *slot_name,char const *ssh_keygen_args,ReliSock &sock,int timeout,char const *sec_session_id,MyString &remote_user,MyString &error_msg,bool &retry_is_sensible) { retry_is_sensible = false; #ifndef HAVE_SSH_TO_JOB error_msg = "This version of Condor does not support ssh key exchange."; return false; #else if (IsDebugLevel(D_COMMAND)) { dprintf (D_COMMAND, "DCStarter::startSSHD(%s,...) making connection to %s\n", getCommandStringSafe(START_SSHD), _addr ? _addr : "NULL"); } if( !connectSock(&sock, timeout, NULL) ) { error_msg = "Failed to connect to starter"; return false; } if( !startCommand(START_SSHD, &sock,timeout,NULL,NULL,false,sec_session_id) ) { error_msg = "Failed to send START_SSHD to starter"; return false; } ClassAd input; if( preferred_shells && *preferred_shells ) { input.Assign(ATTR_SHELL,preferred_shells); } if( slot_name && *slot_name ) { // This is a little silly. // We are telling the remote side the name of the slot so // that it can put it in the welcome message. input.Assign(ATTR_NAME,slot_name); } if( ssh_keygen_args && *ssh_keygen_args ) { input.Assign(ATTR_SSH_KEYGEN_ARGS,ssh_keygen_args); } sock.encode(); if( !putClassAd(&sock, input) || !sock.end_of_message() ) { error_msg = "Failed to send START_SSHD request to starter"; return false; } ClassAd result; sock.decode(); if( !getClassAd(&sock, result) || !sock.end_of_message() ) { error_msg = "Failed to read response to START_SSHD from starter"; return false; } bool success = false; result.LookupBool(ATTR_RESULT,success); if( !success ) { std::string remote_error_msg; result.LookupString(ATTR_ERROR_STRING,remote_error_msg); error_msg.formatstr("%s: %s",slot_name,remote_error_msg.c_str()); retry_is_sensible = false; result.LookupBool(ATTR_RETRY,retry_is_sensible); return false; } result.LookupString(ATTR_REMOTE_USER,remote_user); std::string public_server_key; if( !result.LookupString(ATTR_SSH_PUBLIC_SERVER_KEY,public_server_key) ) { error_msg = "No public ssh server key received in reply to START_SSHD"; return false; } std::string private_client_key; if( !result.LookupString(ATTR_SSH_PRIVATE_CLIENT_KEY,private_client_key) ) { error_msg = "No ssh client key received in reply to START_SSHD"; return false; } // store the private client key unsigned char *decode_buf = NULL; int length = -1; condor_base64_decode(private_client_key.c_str(),&decode_buf,&length); if( !decode_buf ) { error_msg = "Error decoding ssh client key."; return false; } FILE *fp = safe_fcreate_fail_if_exists(private_client_key_file,"a",0400); if( !fp ) { error_msg.formatstr("Failed to create %s: %s", private_client_key_file,strerror(errno)); free( decode_buf ); return false; } if( fwrite(decode_buf,length,1,fp)!=1 ) { error_msg.formatstr("Failed to write to %s: %s", private_client_key_file,strerror(errno)); fclose( fp ); free( decode_buf ); return false; } if( fclose(fp)!=0 ) { error_msg.formatstr("Failed to close %s: %s", private_client_key_file,strerror(errno)); free( decode_buf ); return false; } fp = NULL; free( decode_buf ); decode_buf = NULL; // store the public server key in the known_hosts file length = -1; condor_base64_decode(public_server_key.c_str(),&decode_buf,&length); if( !decode_buf ) { error_msg = "Error decoding ssh server key."; return false; } fp = safe_fcreate_fail_if_exists(known_hosts_file,"a",0600); if( !fp ) { error_msg.formatstr("Failed to create %s: %s", known_hosts_file,strerror(errno)); free( decode_buf ); return false; } // prepend a host name pattern (*) to the public key to make a valid // record in the known_hosts file fprintf(fp,"* "); if( fwrite(decode_buf,length,1,fp)!=1 ) { error_msg.formatstr("Failed to write to %s: %s", known_hosts_file,strerror(errno)); fclose( fp ); free( decode_buf ); return false; } if( fclose(fp)!=0 ) { error_msg.formatstr("Failed to close %s: %s", known_hosts_file,strerror(errno)); free( decode_buf ); return false; } fp = NULL; free( decode_buf ); decode_buf = NULL; return true; #endif }
int do_Q_request(ReliSock *syscall_sock,bool &may_fork) { int request_num = -1; int rval; syscall_sock->decode(); assert( syscall_sock->code(request_num) ); dprintf(D_SYSCALLS, "Got request #%d\n", request_num); switch( request_num ) { case CONDOR_InitializeConnection: { // dprintf( D_ALWAYS, "InitializeConnection()\n" ); bool authenticated = true; // Authenticate socket, if not already done by daemonCore if( !syscall_sock->triedAuthentication() ) { if( IsDebugLevel(D_SECURITY) ) { MyString methods; SecMan::getAuthenticationMethods( WRITE, &methods ); dprintf(D_SECURITY,"Calling authenticate(%s) in qmgmt_receivers\n", methods.Value()); } CondorError errstack; if( ! SecMan::authenticate_sock(syscall_sock, WRITE, &errstack) ) { // Failed to authenticate dprintf( D_ALWAYS, "SCHEDD: authentication failed: %s\n", errstack.getFullText().c_str() ); authenticated = false; } } if ( authenticated ) { InitializeConnection( syscall_sock->getOwner(), syscall_sock->getDomain() ); } else { InitializeConnection( NULL, NULL ); } return 0; } case CONDOR_InitializeReadOnlyConnection: { // dprintf( D_ALWAYS, "InitializeReadOnlyConnection()\n" ); // Since InitializeConnection() does nothing, and we need // to record the fact that this is a read-only connection, // but we have to do it in the socket (since we don't have // any other persistent data structure, and it's probably // the right place anyway), set the FQU. // // We need to record if this is a read-only connection so that // we can avoid expanding $$ in GetJobAd; simply checking if the // connection is authenticated isn't sufficient, because the // security session cache means that read-only connection could // be authenticated by a previous authenticated connection from // the same address (when using host-based security) less than // the expiration period ago. syscall_sock->setFullyQualifiedUser( "read-only" ); // same as InitializeConnection but no authenticate() InitializeConnection( NULL, NULL ); may_fork = true; return 0; } case CONDOR_SetEffectiveOwner: { MyString owner; int terrno; assert( syscall_sock->get(owner) ); assert( syscall_sock->end_of_message() ); rval = QmgmtSetEffectiveOwner( owner.Value() ); terrno = errno; syscall_sock->encode(); assert( syscall_sock->code(rval) ); if( rval < 0 ) { assert( syscall_sock->code(terrno) ); } assert( syscall_sock->end_of_message() ); char const *fqu = syscall_sock->getFullyQualifiedUser(); dprintf(D_SYSCALLS, "\tSetEffectiveOwner\n"); dprintf(D_SYSCALLS, "\tauthenticated user = '******'\n", fqu ? fqu : ""); dprintf(D_SYSCALLS, "\trequested owner = '%s'\n", owner.Value()); dprintf(D_SYSCALLS, "\trval %d, errno %d\n", rval, terrno); return 0; } case CONDOR_NewCluster: { int terrno; assert( syscall_sock->end_of_message() );; errno = 0; rval = NewCluster( ); terrno = errno; dprintf(D_SYSCALLS, "\tNewCluster: rval = %d, errno = %d\n",rval,terrno ); if ( rval > 0 ) { dprintf( D_AUDIT, *syscall_sock, "Submitting new job %d.0\n", rval ); } syscall_sock->encode(); assert( syscall_sock->code(rval) ); if( rval < 0 ) { assert( syscall_sock->code(terrno) ); } assert( syscall_sock->end_of_message() );; dprintf(D_FULLDEBUG,"schedd: NewCluster rval %d errno %d\n",rval,terrno); return 0; } case CONDOR_NewProc: { int cluster_id = -1; int terrno; assert( syscall_sock->code(cluster_id) ); dprintf( D_SYSCALLS, " cluster_id = %d\n", cluster_id ); assert( syscall_sock->end_of_message() );; errno = 0; rval = NewProc( cluster_id ); terrno = errno; dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno ); if ( rval > 0 ) { dprintf( D_AUDIT, *syscall_sock, "Submitting new job %d.%d\n", cluster_id, rval ); } syscall_sock->encode(); assert( syscall_sock->code(rval) ); if( rval < 0 ) { assert( syscall_sock->code(terrno) ); } assert( syscall_sock->end_of_message() );; dprintf(D_FULLDEBUG,"schedd: NewProc rval %d errno %d\n",rval,terrno); return 0; } case CONDOR_DestroyProc: { int cluster_id = -1; int proc_id = -1; int terrno; assert( syscall_sock->code(cluster_id) ); dprintf( D_SYSCALLS, " cluster_id = %d\n", cluster_id ); assert( syscall_sock->code(proc_id) ); dprintf( D_SYSCALLS, " proc_id = %d\n", proc_id ); assert( syscall_sock->end_of_message() );; errno = 0; rval = DestroyProc( cluster_id, proc_id ); terrno = errno; dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno ); syscall_sock->encode(); assert( syscall_sock->code(rval) ); if( rval < 0 ) { assert( syscall_sock->code(terrno) ); } assert( syscall_sock->end_of_message() );; dprintf(D_FULLDEBUG,"schedd: DestroyProc cluster %d proc %d rval %d errno %d\n",cluster_id,proc_id,rval,terrno); return 0; } case CONDOR_DestroyCluster: { int cluster_id = -1; int terrno; assert( syscall_sock->code(cluster_id) ); dprintf( D_SYSCALLS, " cluster_id = %d\n", cluster_id ); assert( syscall_sock->end_of_message() );; errno = 0; rval = DestroyCluster( cluster_id ); terrno = errno; dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno ); syscall_sock->encode(); assert( syscall_sock->code(rval) ); if( rval < 0 ) { assert( syscall_sock->code(terrno) ); } assert( syscall_sock->end_of_message() );; return 0; } #if 0 case CONDOR_DestroyClusterByConstraint: { char *constraint=NULL; int terrno; assert( syscall_sock->code(constraint) ); assert( syscall_sock->end_of_message() );; errno = 0; rval = DestroyClusterByConstraint( constraint ); terrno = errno; dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno ); syscall_sock->encode(); assert( syscall_sock->code(rval) ); if( rval < 0 ) { assert( syscall_sock->code(terrno) ); } free( (char *)constraint ); assert( syscall_sock->end_of_message() );; return 0; } #endif case CONDOR_SetAttributeByConstraint: case CONDOR_SetAttributeByConstraint2: { char *attr_name=NULL; char *attr_value=NULL; char *constraint=NULL; int terrno; SetAttributeFlags_t flags = 0; assert( syscall_sock->code(constraint) ); dprintf( D_SYSCALLS, " constraint = %s\n",constraint); assert( syscall_sock->code(attr_value) ); assert( syscall_sock->code(attr_name) ); if( request_num == CONDOR_SetAttributeByConstraint2 ) { assert( syscall_sock->code( flags ) ); } assert( syscall_sock->end_of_message() );; if (strcmp (attr_name, ATTR_MYPROXY_PASSWORD) == 0) { errno = 0; dprintf( D_SYSCALLS, "SetAttributeByConstraint (MyProxyPassword) not supported...\n"); rval = 0; terrno = errno; } else { errno = 0; rval = SetAttributeByConstraint( constraint, attr_name, attr_value, flags ); terrno = errno; dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno ); if ( rval == 0 ) { dprintf( D_AUDIT, *syscall_sock, "Set Attribute By Constraint %s, " "%s = %s\n", constraint, attr_name, attr_value); } } syscall_sock->encode(); assert( syscall_sock->code(rval) ); if( rval < 0 ) { assert( syscall_sock->code(terrno) ); } free( (char *)constraint ); free( (char *)attr_value ); free( (char *)attr_name ); assert( syscall_sock->end_of_message() );; return 0; } case CONDOR_SetAttribute: case CONDOR_SetAttribute2: { int cluster_id = -1; int proc_id = -1; char *attr_name=NULL; char *attr_value=NULL; int terrno; SetAttributeFlags_t flags = 0; const char *users_username; const char *condor_username; assert( syscall_sock->code(cluster_id) ); dprintf( D_SYSCALLS, " cluster_id = %d\n", cluster_id ); assert( syscall_sock->code(proc_id) ); dprintf( D_SYSCALLS, " proc_id = %d\n", proc_id ); assert( syscall_sock->code(attr_value) ); assert( syscall_sock->code(attr_name) ); if( request_num == CONDOR_SetAttribute2 ) { assert( syscall_sock->code( flags ) ); } users_username = syscall_sock->getOwner(); condor_username = get_condor_username(); if (attr_name) dprintf(D_SYSCALLS,"\tattr_name = %s\n",attr_name); if (attr_value) dprintf(D_SYSCALLS,"\tattr_value = %s\n",attr_value); assert( syscall_sock->end_of_message() );; // ckireyev: // We do NOT want to include MyProxy password in the ClassAd (since it's a secret) // I'm not sure if this is the best place to do this, but.... if (attr_name && attr_value && strcmp (attr_name, ATTR_MYPROXY_PASSWORD) == 0) { errno = 0; dprintf( D_SYSCALLS, "Got MyProxyPassword, stashing...\n"); rval = SetMyProxyPassword (cluster_id, proc_id, attr_value); terrno = errno; dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno ); } else { errno = 0; rval = SetAttribute( cluster_id, proc_id, attr_name, attr_value, flags ); terrno = errno; dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno ); // If we're modifying a previously-submitted job AND either // the client's username is not HTCondor's (i.e. not a // daemon) OR the client says we should log... if( (cluster_id != active_cluster_num) && (rval == 0) && ( strcmp(users_username, condor_username) || (flags & SHOULDLOG) ) ) { dprintf( D_AUDIT, *syscall_sock, "Set Attribute for job %d.%d, " "%s = %s\n", cluster_id, proc_id, attr_name, attr_value); } } free( (char *)attr_value ); free( (char *)attr_name ); if( flags & SetAttribute_NoAck ) { if( rval < 0 ) { return -1; } } else { syscall_sock->encode(); assert( syscall_sock->code(rval) ); if( rval < 0 ) { assert( syscall_sock->code(terrno) ); } assert( syscall_sock->end_of_message() ); } return 0; } case CONDOR_SetTimerAttribute: { int cluster_id = -1; int proc_id = -1; char *attr_name=NULL; int duration = 0; int terrno; assert( syscall_sock->code(cluster_id) ); dprintf( D_SYSCALLS, " cluster_id = %d\n", cluster_id ); assert( syscall_sock->code(proc_id) ); dprintf( D_SYSCALLS, " proc_id = %d\n", proc_id ); assert( syscall_sock->code(attr_name) ); if (attr_name) dprintf(D_SYSCALLS,"\tattr_name = %s\n",attr_name); assert( syscall_sock->code(duration) ); dprintf(D_SYSCALLS,"\tduration = %d\n",duration); assert( syscall_sock->end_of_message() );; errno = 0; rval = SetTimerAttribute( cluster_id, proc_id, attr_name, duration ); terrno = errno; dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno ); dprintf( D_AUDIT, *syscall_sock, "Set Timer Attribute for job %d.%d, " "attr_name = %s, duration = %d\n", cluster_id, proc_id, attr_name, duration); syscall_sock->encode(); assert( syscall_sock->code(rval) ); if( rval < 0 ) { assert( syscall_sock->code(terrno) ); } free( (char *)attr_name ); assert( syscall_sock->end_of_message() );; return 0; } case CONDOR_BeginTransaction: { int terrno; assert( syscall_sock->end_of_message() );; errno = 0; rval = 0; // BeginTransaction returns void (sigh), so always success BeginTransaction( ); terrno = errno; dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno ); syscall_sock->encode(); assert( syscall_sock->code(rval) ); if( rval < 0 ) { assert( syscall_sock->code(terrno) ); } assert( syscall_sock->end_of_message() );; return 0; } case CONDOR_AbortTransaction: { int terrno; assert( syscall_sock->end_of_message() );; errno = 0; rval = 0; // AbortTransaction returns void (sigh), so always success AbortTransaction( ); terrno = errno; dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno ); syscall_sock->encode(); assert( syscall_sock->code(rval) ); if( rval < 0 ) { assert( syscall_sock->code(terrno) ); } assert( syscall_sock->end_of_message() );; return 0; } case CONDOR_CommitTransactionNoFlags: case CONDOR_CommitTransaction: { int terrno; int flags; if( request_num == CONDOR_CommitTransaction ) { assert( syscall_sock->code(flags) ); } else { flags = 0; } assert( syscall_sock->end_of_message() );; errno = 0; CondorError errstack; rval = CheckTransaction( flags, & errstack ); terrno = errno; dprintf( D_SYSCALLS, "\tflags = %d, rval = %d, errno = %d\n", flags, rval, terrno ); if( rval >= 0 ) { errno = 0; CommitTransaction( flags ); // CommitTransaction() never returns on failure rval = 0; terrno = errno; dprintf( D_SYSCALLS, "\tflags = %d, rval = %d, errno = %d\n", flags, rval, terrno ); } syscall_sock->encode(); assert( syscall_sock->code(rval) ); if( rval < 0 ) { assert( syscall_sock->code(terrno) ); const CondorVersionInfo *vers = syscall_sock->get_peer_version(); if (vers && vers->built_since_version(8, 3, 4)) { // Send a classad, for less backwards-incompatibility. int code = 1; const char * reason = "QMGMT rejected job submission."; if( errstack.subsys() ) { code = 2; reason = errstack.message(); } ClassAd reply; reply.Assign( "ErrorCode", code ); reply.Assign( "ErrorReason", reason ); assert( putClassAd( syscall_sock, reply ) ); } } assert( syscall_sock->end_of_message() );; return 0; } case CONDOR_GetAttributeFloat: { int cluster_id = -1; int proc_id = -1; char *attr_name=NULL; float value = 0.0; int terrno; assert( syscall_sock->code(cluster_id) ); dprintf( D_SYSCALLS, " cluster_id = %d\n", cluster_id ); assert( syscall_sock->code(proc_id) ); dprintf( D_SYSCALLS, " proc_id = %d\n", proc_id ); assert( syscall_sock->code(attr_name) ); assert( syscall_sock->end_of_message() );; errno = 0; if( QmgmtMayAccessAttribute( attr_name ) ) { rval = GetAttributeFloat( cluster_id, proc_id, attr_name, &value ); } else { rval = -1; } terrno = errno; dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno ); syscall_sock->encode(); assert( syscall_sock->code(rval) ); if( rval < 0 ) { assert( syscall_sock->code(terrno) ); } if( rval >= 0 ) { assert( syscall_sock->code(value) ); } free( (char *)attr_name ); assert( syscall_sock->end_of_message() );; return 0; } case CONDOR_GetAttributeInt: { int cluster_id = -1; int proc_id = -1; char *attr_name=NULL; int value = 0; int terrno; assert( syscall_sock->code(cluster_id) ); dprintf( D_SYSCALLS, " cluster_id = %d\n", cluster_id ); assert( syscall_sock->code(proc_id) ); dprintf( D_SYSCALLS, " proc_id = %d\n", proc_id ); assert( syscall_sock->code(attr_name) ); dprintf( D_SYSCALLS, " attr_name = %s\n", attr_name ); assert( syscall_sock->end_of_message() );; errno = 0; if( QmgmtMayAccessAttribute( attr_name ) ) { rval = GetAttributeInt( cluster_id, proc_id, attr_name, &value ); } else { rval = -1; } terrno = errno; if (rval < 0) { dprintf( D_SYSCALLS, "GetAttributeInt(%d, %d, %s) not found.\n", cluster_id, proc_id, attr_name); } else { dprintf( D_SYSCALLS, " value: %d\n", value ); dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno ); } syscall_sock->encode(); assert( syscall_sock->code(rval) ); if( rval < 0 ) { assert( syscall_sock->code(terrno) ); } if( rval >= 0 ) { assert( syscall_sock->code(value) ); } free( (char *)attr_name ); assert( syscall_sock->end_of_message() );; return 0; } case CONDOR_GetAttributeString: { int cluster_id = -1; int proc_id = -1; char *attr_name=NULL; char *value = NULL; int terrno; assert( syscall_sock->code(cluster_id) ); dprintf( D_SYSCALLS, " cluster_id = %d\n", cluster_id ); assert( syscall_sock->code(proc_id) ); dprintf( D_SYSCALLS, " proc_id = %d\n", proc_id ); assert( syscall_sock->code(attr_name) ); assert( syscall_sock->end_of_message() );; errno = 0; if( QmgmtMayAccessAttribute( attr_name ) ) { rval = GetAttributeStringNew( cluster_id, proc_id, attr_name, &value ); } else { rval = -1; } terrno = errno; dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno ); syscall_sock->encode(); assert( syscall_sock->code(rval) ); if( rval < 0 ) { assert( syscall_sock->code(terrno) ); } if( rval >= 0 ) { assert( syscall_sock->code(value) ); } free( (char *)value ); free( (char *)attr_name ); assert( syscall_sock->end_of_message() );; return 0; } case CONDOR_GetAttributeExpr: { int cluster_id = -1; int proc_id = -1; char *attr_name=NULL; int terrno; assert( syscall_sock->code(cluster_id) ); dprintf( D_SYSCALLS, " cluster_id = %d\n", cluster_id ); assert( syscall_sock->code(proc_id) ); dprintf( D_SYSCALLS, " proc_id = %d\n", proc_id ); assert( syscall_sock->code(attr_name) ); assert( syscall_sock->end_of_message() );; char *value = NULL; errno = 0; if( QmgmtMayAccessAttribute( attr_name ) ) { rval = GetAttributeExprNew( cluster_id, proc_id, attr_name, &value ); } else { rval = -1; } terrno = errno; dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno ); syscall_sock->encode(); if ( !syscall_sock->code(rval) ) { free(value); return -1; } if( rval < 0 ) { if ( !syscall_sock->code(terrno) ) { free(value); return -1; } } if( rval >= 0 ) { if ( !syscall_sock->code(value) ) { free(value); return -1; } } free( (char *)value ); free( (char *)attr_name ); assert( syscall_sock->end_of_message() );; return 0; } case CONDOR_GetDirtyAttributes: { int cluster_id = -1; int proc_id = -1; ClassAd updates; int terrno; assert( syscall_sock->code(cluster_id) ); dprintf( D_SYSCALLS, " cluster_id = %d\n", cluster_id ); assert( syscall_sock->code(proc_id) ); dprintf( D_SYSCALLS, " proc_id = %d\n", proc_id ); assert( syscall_sock->end_of_message() );; errno = 0; rval = GetDirtyAttributes( cluster_id, proc_id, &updates ); terrno = errno; dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno ); syscall_sock->encode(); if ( !syscall_sock->code(rval) ) { return -1; } if( rval < 0 ) { if ( !syscall_sock->code(terrno) ) { return -1; } } if( rval >= 0 ) { assert( putClassAd(syscall_sock, updates) ); } assert( syscall_sock->end_of_message() );; return 0; } case CONDOR_DeleteAttribute: { int cluster_id = -1; int proc_id = -1; char *attr_name=NULL; int terrno; assert( syscall_sock->code(cluster_id) ); dprintf( D_SYSCALLS, " cluster_id = %d\n", cluster_id ); assert( syscall_sock->code(proc_id) ); dprintf( D_SYSCALLS, " proc_id = %d\n", proc_id ); assert( syscall_sock->code(attr_name) ); assert( syscall_sock->end_of_message() );; errno = 0; rval = DeleteAttribute( cluster_id, proc_id, attr_name ); terrno = errno; dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno ); syscall_sock->encode(); assert( syscall_sock->code(rval) ); if( rval < 0 ) { assert( syscall_sock->code(terrno) ); } free( (char *)attr_name ); assert( syscall_sock->end_of_message() );; return 0; } case CONDOR_GetJobAd: { int cluster_id = -1; int proc_id = -1; ClassAd *ad = NULL; int terrno; bool delete_ad = false; assert( syscall_sock->code(cluster_id) ); dprintf( D_SYSCALLS, " cluster_id = %d\n", cluster_id ); assert( syscall_sock->code(proc_id) ); dprintf( D_SYSCALLS, " proc_id = %d\n", proc_id ); assert( syscall_sock->end_of_message() );; // dprintf( D_ALWAYS, "(%d.%d) isAuthenticated() = %d\n", cluster_id, proc_id, syscall_sock->isAuthenticated() ); // dprintf( D_ALWAYS, "(%d.%d) getOwner() = %s\n", cluster_id, proc_id, syscall_sock->getOwner() ); errno = 0; // Only fetch the jobad for legal values of cluster/proc if( cluster_id >= 1 ) { if( proc_id >= 0 ) { const char * fqu = syscall_sock->getFullyQualifiedUser(); if( fqu != NULL && strcmp( fqu, "read-only" ) != 0 ) { // expand $$() macros in the jobad as required by GridManager. // The GridManager depends on the fact that the following call // expands $$ and saves the expansions to disk in case of // restart. ad = GetJobAd_as_ClassAd( cluster_id, proc_id, true, true ); delete_ad = true; // note : since we expanded the ad, ad is now a deep // copy of the ad in memory, so we must delete it below. } else { ad = GetJobAd_as_ClassAd( cluster_id, proc_id, false, false ); } } else if( proc_id == -1 ) { // allow cluster ad to be queried as required by preen, but // do NOT ask to expand $$() macros in a cluster ad! ad = GetJobAd_as_ClassAd( cluster_id, proc_id, false, false ); } } terrno = errno; rval = ad ? 0 : -1; dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno ); syscall_sock->encode(); assert( syscall_sock->code(rval) ); if( rval < 0 ) { assert( syscall_sock->code(terrno) ); } if( rval >= 0 ) { assert( putClassAd(syscall_sock, *ad, PUT_CLASSAD_NO_PRIVATE) ); } // If we called GetJobAd() with the third bool argument set // to True (expandedAd), it does a deep copy of the ad in the // queue in order to expand the $$() attributes. So we must // delete it. if (delete_ad) delete ad; assert( syscall_sock->end_of_message() );; return 0; } case CONDOR_GetJobByConstraint: { char *constraint=NULL; ClassAd *ad; int terrno; assert( syscall_sock->code(constraint) ); assert( syscall_sock->end_of_message() );; errno = 0; ad = GetJobByConstraint_as_ClassAd( constraint ); terrno = errno; rval = ad ? 0 : -1; dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno ); syscall_sock->encode(); assert( syscall_sock->code(rval) ); if( rval < 0 ) { assert( syscall_sock->code(terrno) ); } if( rval >= 0 ) { assert( putClassAd(syscall_sock, *ad, PUT_CLASSAD_NO_PRIVATE) ); } FreeJobAd(ad); free( (char *)constraint ); assert( syscall_sock->end_of_message() );; return 0; } case CONDOR_GetNextJob: { ClassAd *ad; int initScan = 0; int terrno; assert( syscall_sock->code(initScan) ); dprintf( D_SYSCALLS, " initScan = %d\n", initScan ); assert( syscall_sock->end_of_message() );; errno = 0; ad = GetNextJob( initScan ); terrno = errno; rval = ad ? 0 : -1; dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno ); syscall_sock->encode(); assert( syscall_sock->code(rval) ); if( rval < 0 ) { assert( syscall_sock->code(terrno) ); } if( rval >= 0 ) { assert( putClassAd(syscall_sock, *ad, PUT_CLASSAD_NO_PRIVATE) ); } FreeJobAd(ad); assert( syscall_sock->end_of_message() );; return 0; } case CONDOR_GetNextJobByConstraint: { char *constraint=NULL; ClassAd *ad; int initScan = 0; int terrno; assert( syscall_sock->code(initScan) ); dprintf( D_SYSCALLS, " initScan = %d\n", initScan ); if ( !(syscall_sock->code(constraint)) ) { if (constraint != NULL) { free(constraint); constraint = NULL; } return -1; } assert( syscall_sock->end_of_message() );; errno = 0; ad = GetNextJobByConstraint( constraint, initScan ); terrno = errno; rval = ad ? 0 : -1; dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno ); syscall_sock->encode(); assert( syscall_sock->code(rval) ); if( rval < 0 ) { assert( syscall_sock->code(terrno) ); } if( rval >= 0 ) { assert( putClassAd(syscall_sock, *ad, PUT_CLASSAD_NO_PRIVATE) ); } FreeJobAd(ad); free( (char *)constraint ); assert( syscall_sock->end_of_message() );; return 0; } case CONDOR_GetNextDirtyJobByConstraint: { char *constraint=NULL; ClassAd *ad; int initScan = 0; int terrno; assert( syscall_sock->code(initScan) ); dprintf( D_SYSCALLS, " initScan = %d\n", initScan ); if ( !(syscall_sock->code(constraint)) ) { if (constraint != NULL) { free(constraint); constraint = NULL; } return -1; } assert( syscall_sock->end_of_message() ); errno = 0; ad = GetNextDirtyJobByConstraint( constraint, initScan ); terrno = errno; rval = ad ? 0 : -1; dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno ); syscall_sock->encode(); assert( syscall_sock->code(rval) ); if( rval < 0 ) { assert( syscall_sock->code(terrno) ); } if( rval >= 0 ) { assert( putClassAd(syscall_sock, *ad, PUT_CLASSAD_NO_PRIVATE) ); } FreeJobAd(ad); free( (char *)constraint ); assert( syscall_sock->end_of_message() ); return 0; } case CONDOR_SendSpoolFile: { char *filename=NULL; int terrno; assert( syscall_sock->code(filename) ); assert( syscall_sock->end_of_message() );; errno = 0; rval = SendSpoolFile( filename ); terrno = errno; dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno ); #if 0 syscall_sock->encode(); assert( syscall_sock->code(rval) ); if( rval < 0 ) { assert( syscall_sock->code(terrno) ); } assert( syscall_sock->end_of_message() );; #endif free( (char *)filename ); return 0; } case CONDOR_SendSpoolFileIfNeeded: { int terrno; ClassAd ad; assert( getClassAd(syscall_sock, ad) ); assert( syscall_sock->end_of_message() );; errno = 0; rval = SendSpoolFileIfNeeded(ad); terrno = errno; dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno ); return 0; } case CONDOR_GetAllJobsByConstraint: { char *constraint=NULL; char *projection=NULL; ClassAd *ad; int terrno; int initScan = 1; classad::References proj; if ( !(syscall_sock->code(constraint)) ) { if (constraint != NULL) { free(constraint); constraint = NULL; } return -1; } if ( !(syscall_sock->code(projection)) ) { if (projection != NULL) { free(constraint); free(projection); projection = NULL; } return -1; } dprintf( D_SYSCALLS, " constraint = %s\n", constraint ); dprintf( D_SYSCALLS, " projection = %s\n", projection ? projection : ""); assert( syscall_sock->end_of_message() );; // if there is a projection, convert it into a set of attribute names if (projection) { StringTokenIterator list(projection); const std::string * attr; while ((attr = list.next_string())) { proj.insert(*attr); } } syscall_sock->encode(); do { errno = 0; ad = GetNextJobByConstraint( constraint, initScan ); initScan=0; // one first time through, otherwise 0 terrno = errno; rval = ad ? 0 : -1; dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno ); assert( syscall_sock->code(rval) ); if( rval < 0 ) { assert( syscall_sock->code(terrno) ); } if( rval >= 0 ) { assert( putClassAd(syscall_sock, *ad, PUT_CLASSAD_NO_PRIVATE, proj.empty() ? NULL : &proj) ); FreeJobAd(ad); } } while (rval >= 0); assert( syscall_sock->end_of_message() );; free( (char *)constraint ); free( (char *)projection ); return 0; } case CONDOR_CloseSocket: { assert( syscall_sock->end_of_message() );; return -1; } } /* End of switch */ return -1; } /* End of function */
/* Generic read/write wrappers for condor. These function emulate-ish the * read/write system calls under unix except that they are portable, use * a timeout, and make sure that all data is read or written. * * A few notes on the behavior differing from POSIX: * - These will never fail due to EINTR. * - If in non_blocking mode, there may be a short read or write returned. * - The corresponding POSIX functon returns 0 bytes read when the peer closed * the socket; these return -2. * - If zero bytes were read/written in non-blocking mode, this will return 0. This differs * from POSIX. * - Providing a zero-sized argument to this function will cause the program to abort(). * * Returns < 0 on failure. -1 = general error, -2 = peer closed socket */ int condor_read( char const *peer_description, SOCKET fd, char *buf, int sz, int timeout, int flags, bool non_blocking ) { Selector selector; int nr = 0, nro; unsigned int start_time=0, cur_time=0; char sinbuf[SINFUL_STRING_BUF_SIZE]; if( IsDebugLevel(D_NETWORK) ) { dprintf(D_NETWORK, "condor_read(fd=%d %s,,size=%d,timeout=%d,flags=%d,non_blocking=%d)\n", fd, not_null_peer_description(peer_description,fd,sinbuf), sz, timeout, flags, non_blocking); } /* PRE Conditions. */ ASSERT(fd >= 0); /* Need valid file descriptor */ ASSERT(buf != NULL); /* Need real memory to put data into */ ASSERT(sz > 0); /* Need legit size on buffer */ if (non_blocking) { #ifdef WIN32 unsigned long mode = 1; // nonblocking mode if (ioctlsocket(fd, FIONBIO, &mode) < 0) return -1; #else int fcntl_flags; if ( (fcntl_flags=fcntl(fd, F_GETFL)) < 0 ) return -1; // set nonblocking mode if ( ((fcntl_flags & O_NONBLOCK) == 0) && fcntl(fd, F_SETFL, fcntl_flags | O_NONBLOCK) == -1 ) return -1; #endif nr = -2; while (nr == -2 || (nr == -1 && errno == EINTR)) { nr = recv(fd, buf, sz, flags); } if ( nr <= 0 ) { int the_error; char const *the_errorstr; #ifdef WIN32 the_error = WSAGetLastError(); the_errorstr = ""; #else the_error = errno; the_errorstr = strerror(the_error); #endif if ( nr == 0 && !(flags & MSG_PEEK)) { nr = -2; dprintf( D_FULLDEBUG, "condor_read(): " "Socket closed when trying to read %d bytes from %s in non-blocking mode\n", sz, not_null_peer_description(peer_description,fd,sinbuf) ); } else if ( !errno_is_temporary(the_error) ) { dprintf( D_ALWAYS, "condor_read() failed: recv() %d bytes from %s " "returned %d, " "timeout=%d, errno=%d %s.\n", sz, not_null_peer_description(peer_description,fd,sinbuf), nr, timeout, the_error, the_errorstr ); } else { nr = 0; } } #ifdef WIN32 mode = 0; // reset blocking mode if (ioctlsocket(fd, FIONBIO, &mode) < 0) return -1; #else // reset flags to prior value if ( ((fcntl_flags & O_NONBLOCK) == 0) && (fcntl(fd, F_SETFL, fcntl_flags) == -1) ) return -1; #endif return nr; } selector.add_fd( fd, Selector::IO_READ ); if ( timeout > 0 ) { start_time = time(NULL); cur_time = start_time; } while( nr < sz ) { if( timeout > 0 ) { if( cur_time == 0 ) { cur_time = time(NULL); } // If it hasn't yet been longer then we said we would wait... if( start_time + timeout > cur_time ) { selector.set_timeout( (start_time + timeout) - cur_time ); } else { dprintf( D_ALWAYS, "condor_read(): timeout reading %d bytes from %s.\n", sz, not_null_peer_description(peer_description,fd,sinbuf) ); return -1; } cur_time = 0; if( IsDebugVerbose( D_NETWORK ) ) { dprintf(D_NETWORK, "condor_read(): fd=%d\n", fd); } selector.execute(); if( IsDebugVerbose( D_NETWORK ) ) { dprintf(D_NETWORK, "condor_read(): select returned %d\n", selector.select_retval()); } if ( selector.timed_out() ) { dprintf( D_ALWAYS, "condor_read(): timeout reading %d bytes from %s.\n", sz, not_null_peer_description(peer_description,fd,sinbuf) ); return -1; } else if ( selector.signalled() ) { continue; } else if ( !selector.has_ready() ) { int the_error; char const *the_errorstr; #ifdef WIN32 the_error = WSAGetLastError(); the_errorstr = ""; #else the_error = errno; the_errorstr = strerror(the_error); #endif dprintf( D_ALWAYS, "condor_read() failed: select() " "returns %d, reading %d bytes from %s (errno=%d %s).\n", selector.select_retval(), sz, not_null_peer_description(peer_description,fd,sinbuf), the_error, the_errorstr ); return -1; } } start_thread_safe("recv"); nro = recv(fd, &buf[nr], sz - nr, flags); // Save the error value before stop_thread_safe(), as that may // overwrite it. int the_error; #ifdef WIN32 the_error = WSAGetLastError(); #else the_error = errno; #endif stop_thread_safe("recv"); if( nro <= 0 ) { // If timeout > 0, and we made it here, then // we know we were woken by select(). Now, if // select() wakes up on a read fd, and then recv() // subsequently returns 0, that means that the // socket has been closed by our peer. // If timeout == 0, then recv() should have // blocked until 1 or more bytes arrived. // Thus no matter what, if nro==0, then the // socket must be closed. if ( nro == 0 ) { dprintf( D_FULLDEBUG, "condor_read(): " "Socket closed when trying to read %d bytes from %s\n", sz, not_null_peer_description(peer_description,fd,sinbuf) ); return -2; } char const *the_errorstr; #ifdef WIN32 the_errorstr = ""; #else the_errorstr = strerror(the_error); #endif if ( errno_is_temporary(the_error) ) { dprintf( D_FULLDEBUG, "condor_read(): " "recv() returned temporary error %d %s," "still trying to read from %s\n", the_error,the_errorstr, not_null_peer_description(peer_description,fd,sinbuf) ); continue; } dprintf( D_ALWAYS, "condor_read() failed: recv(fd=%d) returned %d, " "errno = %d %s, reading %d bytes from %s.\n", fd, nro, the_error, the_errorstr, sz, not_null_peer_description(peer_description,fd,sinbuf) ); if( the_error == ETIMEDOUT ) { if( timeout <= 0 ) { dprintf( D_ALWAYS, "condor_read(): read timeout during blocking read from %s\n", not_null_peer_description(peer_description,fd,sinbuf)); } else { int lapse = (int)(time(NULL)-start_time); dprintf( D_ALWAYS, "condor_read(): UNEXPECTED read timeout after %ds during non-blocking read from %s (desired timeout=%ds)\n", lapse, not_null_peer_description(peer_description,fd,sinbuf), timeout); } } return -1; } nr += nro; } /* Post Conditions */ ASSERT( nr == sz ); // we should have read *ALL* the data return nr; }
int condor_write( char const *peer_description, SOCKET fd, const char *buf, int sz, int timeout, int flags, bool non_blocking ) { int nw = 0, nwo = 0; unsigned int start_time = 0, cur_time = 0; char tmpbuf[1]; int nro; bool select_for_read = true; bool needs_select = true; char sinbuf[SINFUL_STRING_BUF_SIZE]; if( IsDebugLevel( D_NETWORK ) ) { dprintf(D_NETWORK, "condor_write(fd=%d %s,,size=%d,timeout=%d,flags=%d,non_blocking=%d)\n", fd, not_null_peer_description(peer_description,fd,sinbuf), sz, timeout, flags, non_blocking); } /* Pre-conditions. */ ASSERT(sz > 0); /* Can't write buffers that are have no data */ ASSERT(fd >= 0); /* Need valid file descriptor */ ASSERT(buf != NULL); /* Need valid buffer to write */ if (non_blocking) { #ifdef WIN32 unsigned long mode = 1; // nonblocking mode if (ioctlsocket(fd, FIONBIO, &mode) < 0) return -1; #else int fcntl_flags; if ( (fcntl_flags=fcntl(fd, F_GETFL)) < 0 ) return -1; // set nonblocking mode if ( ((fcntl_flags & O_NONBLOCK) == 0) && fcntl(fd, F_SETFL, fcntl_flags | O_NONBLOCK) == -1 ) return -1; #endif nw = -2; while (nw == -2 || (nw == -1 && errno == EINTR)) { nw = send(fd, buf, sz, flags); } if ( nw <= 0 ) { int the_error; char const *the_errorstr; #ifdef WIN32 the_error = WSAGetLastError(); the_errorstr = ""; #else the_error = errno; the_errorstr = strerror(the_error); #endif if ( !errno_is_temporary(the_error) ) { dprintf( D_ALWAYS, "condor_write() failed: send() %d bytes to %s " "returned %d, " "timeout=%d, errno=%d %s.\n", sz, not_null_peer_description(peer_description,fd,sinbuf), nw, timeout, the_error, the_errorstr ); } else { nw = 0; } } if (nw < 0) { dprintf(D_NETWORK, "condor_write (non-blocking) wrote %d bytes.\n", nw); } #ifdef WIN32 mode = 0; // reset blocking mode if (ioctlsocket(fd, FIONBIO, &mode) < 0) return -1; #else // reset flags to prior value if ( ((fcntl_flags & O_NONBLOCK) == 0) && fcntl(fd, F_SETFL, fcntl_flags) == -1 ) return -1; #endif return nw; } Selector selector; selector.add_fd( fd, Selector::IO_READ ); selector.add_fd( fd, Selector::IO_WRITE ); selector.add_fd( fd, Selector::IO_EXCEPT ); if(timeout > 0) { start_time = time(NULL); cur_time = start_time; } while( nw < sz ) { needs_select = true; if( timeout > 0 ) { while( needs_select ) { if( cur_time == 0 ) { cur_time = time(NULL); } if( start_time + timeout > cur_time ) { selector.set_timeout( (start_time + timeout) - cur_time ); } else { dprintf( D_ALWAYS, "condor_write(): " "timed out writing %d bytes to %s\n", sz, not_null_peer_description(peer_description,fd,sinbuf) ); return -1; } cur_time = 0; // The write and except sets are added at the top of // this function, since we always want to select on // them. if( select_for_read ) { // Also, put it in the read fds, so we'll wake // up if the socket is closed selector.add_fd( fd, Selector::IO_READ ); } else { selector.delete_fd( fd, Selector::IO_READ ); } selector.execute(); // unless we decide we need to select() again, we // want to break out of our while() loop now that // we've actually performed a select() needs_select = false; if ( selector.timed_out() ) { dprintf( D_ALWAYS, "condor_write(): " "timed out writing %d bytes to %s\n", sz, not_null_peer_description(peer_description,fd,sinbuf) ); return -1; } else if ( selector.signalled() ) { needs_select = true; continue; } else if ( selector.has_ready() ) { if ( selector.fd_ready( fd, Selector::IO_READ ) ) { dprintf(D_NETWORK, "condor_write(): socket %d is readable\n", fd); // see if the socket was closed nro = recv(fd, tmpbuf, 1, MSG_PEEK); if( nro == -1 ) { int the_error; char const *the_errorstr; #ifdef WIN32 the_error = WSAGetLastError(); the_errorstr = ""; #else the_error = errno; the_errorstr = strerror(the_error); #endif if(errno_is_temporary( the_error )) { continue; } dprintf( D_ALWAYS, "condor_write(): " "Socket closed when trying " "to write %d bytes to %s, fd is %d, " "errno=%d %s\n", sz, not_null_peer_description(peer_description,fd,sinbuf), fd, the_error, the_errorstr ); return -1; } if( ! nro ) { dprintf( D_ALWAYS, "condor_write(): " "Socket closed when trying " "to write %d bytes to %s, fd is %d\n", sz, not_null_peer_description(peer_description,fd,sinbuf), fd ); return -1; } /* otherwise, there's real data to consume on the read side, and we don't want to put our fd in the readfds anymore or select() will never block. also, we need to re-do the select() */ needs_select = true; select_for_read = false; } } else { dprintf( D_ALWAYS, "condor_write() failed: select() " "returns %d, " "writing %d bytes to %s.\n", selector.select_retval(), sz, not_null_peer_description(peer_description,fd,sinbuf) ); return -1; } } } start_thread_safe("send"); nwo = send(fd, &buf[nw], sz - nw, flags); // Save the error value before stop_thread_safe(), as that may // overwrite it. int the_error; #ifdef WIN32 the_error = WSAGetLastError(); #else the_error = errno; #endif stop_thread_safe("send"); if( nwo <= 0 ) { char const *the_errorstr; #ifdef WIN32 the_errorstr = ""; #else the_errorstr = strerror(the_error); #endif if ( errno_is_temporary(the_error) ) { dprintf( D_FULLDEBUG, "condor_write(): " "send() returned temporary error %d %s," "still trying to write %d bytes to %s\n", the_error, the_errorstr, sz, not_null_peer_description(peer_description,fd,sinbuf) ); continue; } dprintf( D_ALWAYS, "condor_write() failed: send() %d bytes to %s " "returned %d, " "timeout=%d, errno=%d %s.\n", sz, not_null_peer_description(peer_description,fd,sinbuf), nwo, timeout, the_error, the_errorstr ); return -1; } nw += nwo; } /* POST conditions. */ ASSERT( nw == sz ); /* Make sure that we wrote everything */ return nw; }