int agent_starter( ReliSock * s ) { ReliSock* stream = (ReliSock*)s; char *subsys = NULL; stream->decode(); if( ! stream->code(subsys) || ! stream->end_of_message() ) { dprintf( D_ALWAYS, "Can't read subsystem name\n" ); free( subsys ); return FALSE; } dprintf ( D_ALWAYS, "Starting agent '%s'\n", subsys ); if( strcasecmp(subsys, "fetch_log") == 0 ) { free (subsys); return handle_agent_fetch_log( stream ); } // default: free (subsys); dprintf( D_ALWAYS, "WARNING: unrecognized agent name\n" ); return FALSE; }
bool do_item(Daemon* d, MyString name, int num, int output_mode) { CondorError errstack; ClassAd authz_ad; bool sc_success; ReliSock *sock = NULL; bool fn_success = false; sock = (ReliSock*) d->makeConnectedSocket( Stream::reli_sock, 0, 0, &errstack ); if (sock) { sc_success = d->startSubCommand(DC_SEC_QUERY, num, sock, 0, &errstack); if (sc_success) { sock->decode(); if (getClassAd(sock, authz_ad) && sock->end_of_message()) { fn_success = true; } } } print_info(fn_success, d->addr(), sock, name, num, &authz_ad, &errstack, output_mode); return fn_success; }
bool do_item(Daemon* d, MyString name, int num, int output_mode) { CondorError errstack; ClassAd authz_ad; bool sc_success; ReliSock *sock = NULL; bool fn_success = false; sock = (ReliSock*) d->makeConnectedSocket( Stream::reli_sock, 0, 0, &errstack ); if (sock) { sc_success = d->startSubCommand(DC_SEC_QUERY, num, sock, 0, &errstack); if (sc_success) { sock->decode(); if (getClassAd(sock, authz_ad) && sock->end_of_message()) { fn_success = true; } } print_info(fn_success, sock->get_connect_addr(), sock, name, num, &authz_ad, &errstack, output_mode); } else { // we know that d->addr() is not null because we checked before // calling do_item. but i'll be paranoid and check again. fprintf(stderr, "ERROR: failed to make connection to %s\n", d->addr()?d->addr():"(null)"); } return fn_success; }
/* Takes sinful address of startd and sends it the given cmd, along with the capability and an end_of_message. */ int send_cmd_to_startd(char *sin_host, char *capability, int cmd) { ReliSock* sock = NULL; Daemon startd (DT_STARTD, sin_host, NULL); if (!(sock = (ReliSock*)startd.startCommand(cmd, Stream::reli_sock, 20))) { dprintf( D_ALWAYS, "Can't connect to startd at %s\n", sin_host ); return -1; } // send the capability ClaimIdParser idp( capability ); dprintf(D_FULLDEBUG, "send capability %s\n", idp.publicClaimId() ); if(!sock->code(capability)){ dprintf( D_ALWAYS, "sock->code(%s) failed.\n", idp.publicClaimId() ); delete sock; return -3; } // send end of message if( !sock->end_of_message() ) { dprintf( D_ALWAYS, "end_of_message failed\n" ); delete sock; return -4; } dprintf( D_FULLDEBUG, "Sent command %d to startd at %s with cap %s\n", cmd, sin_host, idp.publicClaimId() ); delete sock; return 0; }
// when a transferd registers itself, it identifies who it is. The connection // is then held open and the schedd periodically might send more transfer // requests to the transferd. Also, if the transferd dies, the schedd is // informed quickly and reliably due to the closed connection. bool DCTransferD::setup_treq_channel(ReliSock **treq_sock_ptr, int timeout, CondorError *errstack) { ReliSock *rsock; if (treq_sock_ptr != NULL) { // Our caller wants a pointer to the socket we used to succesfully // register the claim. The NULL pointer will represent failure and // this will only be set to something real if everything was ok. *treq_sock_ptr = NULL; } ///////////////////////////////////////////////////////////////////////// // Connect to the transfer daemon ///////////////////////////////////////////////////////////////////////// // This call with automatically connect to _addr, which was set in the // constructor of this object to be the transferd in question. rsock = (ReliSock*)startCommand(TRANSFERD_CONTROL_CHANNEL, Stream::reli_sock, timeout, errstack); if( ! rsock ) { dprintf( D_ALWAYS, "DCTransferD::setup_treq_channel: " "Failed to send command (TRANSFERD_CONTROL_CHANNEL) " "to the schedd\n" ); errstack->push("DC_TRANSFERD", 1, "Failed to start a TRANSFERD_CONTROL_CHANNEL command."); return false; } ///////////////////////////////////////////////////////////////////////// // Make sure we are authenticated. ///////////////////////////////////////////////////////////////////////// // First, if we're not already authenticated, force that now. if (!forceAuthentication( rsock, errstack )) { dprintf( D_ALWAYS, "DCTransferD::setup_treq_channel() authentication " "failure: %s\n", errstack->getFullText().c_str() ); errstack->push("DC_TRANSFERD", 1, "Failed to authenticate properly."); return false; } rsock->encode(); ///////////////////////////////////////////////////////////////////////// // At this point, the socket passed all of the authentication protocols // so it is ready for use. ///////////////////////////////////////////////////////////////////////// if (treq_sock_ptr) *treq_sock_ptr = rsock; return true; }
int ReliSock::accept( ReliSock &c ) { int c_sock; if (_state != sock_special || _special_state != relisock_listen || c._state != sock_virgin) { return FALSE; } if (_timeout > 0) { Selector selector; selector.set_timeout( _timeout ); selector.add_fd( _sock, Selector::IO_READ ); selector.execute(); if( selector.timed_out() ) { return FALSE; } else if ( !selector.has_ready() ) { dprintf( D_ALWAYS, "select returns %d, connect failed\n", selector.select_retval() ); return FALSE; } } #ifndef WIN32 /* Unix */ errno = 0; #endif if ((c_sock = condor_accept(_sock, c._who)) < 0) { #ifndef WIN32 /* Unix */ if ( errno == EMFILE ) { _condor_fd_panic ( __LINE__, __FILE__ ); /* This calls dprintf_exit! */ } #endif return FALSE; } c.assign(c_sock); c.enter_connected_state("ACCEPT"); c.decode(); int on = 1; c.setsockopt(SOL_SOCKET, SO_KEEPALIVE, (char*)&on, sizeof(on)); /* Set no delay to disable Nagle, since we buffer all our relisock output and it degrades performance of our various chatty protocols. -Todd T, 9/05 */ c.setsockopt(IPPROTO_TCP, TCP_NODELAY, (char*)&on, sizeof(on)); return TRUE; }
int TransferQueueManager::HandleRequest(int cmd,Stream *stream) { ReliSock *sock = (ReliSock *)stream; ASSERT( cmd == TRANSFER_QUEUE_REQUEST ); ClassAd msg; sock->decode(); if( !getClassAd( sock, msg ) || !sock->end_of_message() ) { dprintf(D_ALWAYS, "TransferQueueManager: failed to receive transfer request " "from %s.\n", sock->peer_description() ); return FALSE; } bool downloading = false; MyString fname; MyString jobid; MyString queue_user; filesize_t sandbox_size; if( !msg.LookupBool(ATTR_DOWNLOADING,downloading) || !msg.LookupString(ATTR_FILE_NAME,fname) || !msg.LookupString(ATTR_JOB_ID,jobid) || !msg.LookupString(ATTR_USER,queue_user) || !msg.LookupInteger(ATTR_SANDBOX_SIZE,sandbox_size)) { MyString msg_str; sPrintAd(msg_str, msg); dprintf(D_ALWAYS,"TransferQueueManager: invalid request from %s: %s\n", sock->peer_description(), msg_str.Value()); return FALSE; } // Currently, we just create the client with the default max queue // age. If it becomes necessary to customize the maximum age // on a case-by-case basis, it should be easy to adjust. TransferQueueRequest *client = new TransferQueueRequest( sock, sandbox_size, fname.Value(), jobid.Value(), queue_user.Value(), downloading, m_default_max_queue_age); if( !AddRequest( client ) ) { delete client; return KEEP_STREAM; // we have already closed this socket } return KEEP_STREAM; }
// Called when the schedd initially connects to the transferd to finish // the registration process. int TransferD::setup_transfer_request_handler(int /*cmd*/, Stream *sock) { ReliSock *rsock = (ReliSock*)sock; MyString sock_id; dprintf(D_ALWAYS, "Got TRANSFER_CONTROL_CHANNEL!\n"); rsock->decode(); /////////////////////////////////////////////////////////////// // make sure we are authenticated /////////////////////////////////////////////////////////////// if( ! rsock->triedAuthentication() ) { CondorError errstack; if( ! SecMan::authenticate_sock(rsock, WRITE, &errstack) ) { // we failed to authenticate, we should bail out now // since we don't know what user is trying to perform // this action. // TODO: it'd be nice to print out what failed, but we // need better error propagation for that... errstack.push( "TransferD::setup_transfer_request_handler()", 42, "Failure to register transferd - Authentication failed" ); dprintf( D_ALWAYS, "setup_transfer_request_handler() " "aborting: %s\n", errstack.getFullText().c_str() ); refuse(rsock); return CLOSE_STREAM; } } rsock->decode(); /////////////////////////////////////////////////////////////// // Register this socket with a socket handler to handle incoming requests /////////////////////////////////////////////////////////////// sock_id += "<TreqChannel-Socket>"; char* _sock_id = strdup( sock_id.Value() ); //de-const // register the handler for any future transfer requests on this socket. daemonCore->Register_Socket((Sock*)rsock, _sock_id, (SocketHandlercpp)&TransferD::accept_transfer_request_handler, "TransferD::accept_transfer_request_handler", this, ALLOW); free( _sock_id ); dprintf(D_ALWAYS, "Treq channel established.\n"); dprintf(D_ALWAYS, "Accepting Transfer Requests.\n"); return KEEP_STREAM; }
bool DCStarter::createJobOwnerSecSession(int timeout,char const *job_claim_id,char const *starter_sec_session,char const *session_info,MyString &owner_claim_id,MyString &error_msg,MyString &starter_version,MyString &starter_addr) { ReliSock sock; if (IsDebugLevel(D_COMMAND)) { dprintf (D_COMMAND, "DCStarter::createJobOwnerSecSession(%s,...) making connection to %s\n", getCommandStringSafe(CREATE_JOB_OWNER_SEC_SESSION), _addr ? _addr : "NULL"); } if( !connectSock(&sock, timeout, NULL) ) { error_msg = "Failed to connect to starter"; return false; } if( !startCommand(CREATE_JOB_OWNER_SEC_SESSION, &sock,timeout,NULL,NULL,false,starter_sec_session) ) { error_msg = "Failed to send CREATE_JOB_OWNER_SEC_SESSION to starter"; return false; } ClassAd input; input.Assign(ATTR_CLAIM_ID,job_claim_id); input.Assign(ATTR_SESSION_INFO,session_info); sock.encode(); if( !putClassAd(&sock, input) || !sock.end_of_message() ) { error_msg = "Failed to compose CREATE_JOB_OWNER_SEC_SESSION to starter"; return false; } sock.decode(); ClassAd reply; if( !getClassAd(&sock, reply) || !sock.end_of_message() ) { error_msg = "Failed to get response to CREATE_JOB_OWNER_SEC_SESSION from starter"; return false; } bool success = false; reply.LookupBool(ATTR_RESULT,success); if( !success ) { reply.LookupString(ATTR_ERROR_STRING,error_msg); return false; } reply.LookupString(ATTR_CLAIM_ID,owner_claim_id); reply.LookupString(ATTR_VERSION,starter_version); // get the full starter address from the starter in case it contains // extra CCB info that we don't already know about reply.LookupString(ATTR_STARTER_IP_ADDR,starter_addr); return true; }
void initialize() { char *tmp; string collName; dprintf(D_FULLDEBUG, "AviaryLocatorPlugin: Initializing...\n"); tmp = param("COLLECTOR_NAME"); if (NULL == tmp) { collName = getPoolName(); } else { collName = tmp; free(tmp); tmp = NULL; } string log_name; formatstr(log_name,"aviary_locator.log"); provider = AviaryProviderFactory::create(log_name, getPoolName(),CUSTOM,LOCATOR, "services/locator/"); if (!provider) { EXCEPT("Unable to configure AviaryProvider. Exiting..."); } ReliSock *sock = new ReliSock; if (!sock) { EXCEPT("Failed to allocate transport socket"); } if (!sock->assign(provider->getListenerSocket())) { EXCEPT("Failed to bind transport socket"); } int index; if (-1 == (index = daemonCore->Register_Socket((Stream *) sock, "Aviary Method Socket", (SocketHandlercpp) ( &AviaryLocatorPlugin::handleTransportSocket ), "Handler for Aviary Methods.", this))) { EXCEPT("Failed to register transport socket"); } int pruning_interval = param_integer("AVIARY_LOCATOR_PRUNE_INTERVAL",20); if (-1 == (index = daemonCore->Register_Timer( pruning_interval,pruning_interval*2, (TimerHandlercpp)(&AviaryLocatorPlugin::handleTimerCallback), "Timer for pruning unresponsive endpoints", this))) { EXCEPT("Failed to register pruning timer"); } }
int IOProxyHandler::handle_request( Stream *s ) { char line[CHIRP_LINE_MAX]; ReliSock *r = (ReliSock *) s; if(r->get_line_raw(line,CHIRP_LINE_MAX)>0) { if( got_cookie ) { handle_standard_request(r,line); } else { handle_cookie_request(r,line); } return KEEP_STREAM; } else { dprintf(D_FULLDEBUG,"IOProxyHandler: closing connection to %s\n",r->peer_ip_str()); delete this; return ~KEEP_STREAM; } }
void initialize() { char *tmp; string collName; dprintf(D_FULLDEBUG, "AviaryCollectorPlugin: Initializing...\n"); tmp = param("COLLECTOR_NAME"); if (NULL == tmp) { collName = getPoolName(); } else { collName = tmp; free(tmp); tmp = NULL; } string log_name("aviary_collector.log"); string id_name("collector"); id_name+=SEPARATOR; id_name+=getPoolName(); provider = AviaryProviderFactory::create(log_name, id_name,"COLLECTOR","POOL","services/collector/"); if (!provider) { EXCEPT("Unable to configure AviaryProvider. Exiting..."); } collector = CollectorObject::getInstance(); ReliSock *sock = new ReliSock; if (!sock) { EXCEPT("Failed to allocate transport socket"); } if (!sock->assign(provider->getListenerSocket())) { EXCEPT("Failed to bind transport socket"); } int index; if (-1 == (index = daemonCore->Register_Socket((Stream *) sock, "Aviary Method Socket", (SocketHandlercpp) ( &AviaryCollectorPlugin::handleTransportSocket ), "Handler for Aviary Methods.", this))) { EXCEPT("Failed to register transport socket"); } collector->setMyAddress(daemonCore->publicNetworkIpAddr()); }
void AviaryScheddPlugin::earlyInitialize() { // Since this plugin is registered with multiple // PluginManagers it may be initialized more than once, // and we don't want that static bool skip = false; if (skip) return; skip = true; string log_name("aviary_job.log"); string id_name("job"); id_name+=SEPARATOR; id_name+=getScheddName(); provider = AviaryProviderFactory::create(log_name,id_name, "SCHEDULER","JOB","services/job/"); if (!provider) { EXCEPT("Unable to configure AviaryProvider. Exiting..."); } schedulerObj = SchedulerObject::getInstance(); dirtyJobs = new DirtyJobsType(); isHandlerRegistered = false; ReliSock *sock = new ReliSock; if (!sock) { EXCEPT("Failed to allocate transport socket"); } if (!sock->assign(provider->getListenerSocket())) { EXCEPT("Failed to bind transport socket"); } int index; if (-1 == (index = daemonCore->Register_Socket((Stream *) sock, "Aviary Method Socket", (SocketHandlercpp) ( &AviaryScheddPlugin::handleTransportSocket ), "Handler for Aviary Methods.", this))) { EXCEPT("Failed to register transport socket"); } m_initialized = false; }
ReliSock::ReliSock(const ReliSock & orig) : Sock(orig) { init(); // now copy all cedar state info via the serialize() method char *buf = NULL; buf = orig.serialize(); // get state from orig sock ASSERT(buf); serialize(buf); // put the state into the new sock delete [] buf; }
SharedPortState::HandlerResult SharedPortState::HandleHeader(Stream *&s) { // First tell the target daemon that we are about to send the fd. ReliSock *sock = static_cast<ReliSock*>(s); sock->encode(); if( !sock->put((int)SHARED_PORT_PASS_SOCK) || !sock->end_of_message() ) { dprintf(D_ALWAYS,"SharedPortClient: failed to send SHARED_PORT_PASS_FD to %s%s: %s\n", m_sock_name.c_str(), m_requested_by.c_str(), strerror(errno)); return FAILED; } m_state = SEND_FD; return CONTINUE; }
int IOProxy::connect_callback( Stream * /*stream*/ ) { ReliSock *client = new ReliSock; bool accept_client = false; int success; success = server->accept(*client); if(success) { if(get_local_ipaddr().compare_address(client->peer_addr())) { dprintf(D_ALWAYS,"IOProxy: accepting connection from %s\n",client->peer_ip_str()); accept_client = true; } else { dprintf(D_ALWAYS,"IOProxy: rejecting connection from %s: invalid ip addr\n",client->peer_ip_str()); } } else { dprintf(D_ALWAYS,"IOProxy: Couldn't accept connection: %s\n",strerror(errno)); } if(accept_client) { IOProxyHandler *handler = new IOProxyHandler(); if(!handler->init(client,cookie)) { dprintf(D_ALWAYS,"IOProxy: couldn't register request callback!\n"); client->close(); delete client; } } else { client->close(); delete client; } return KEEP_STREAM; }
// specific command function - sends local daemon's version and state over // the socket bool AbstractReplicatorStateMachine::versionAndStateCommand(ReliSock& socket) { if( ! versionCommand( socket ) ) { return false; } int stateAsInteger = int( m_state ); if( ! socket.code( stateAsInteger ) /*|| ! socket.end_of_message( )*/ ) { dprintf( D_NETWORK, "AbstractReplicatorStateMachine::versionAndStateCommand " "unable to code the state or eom%d\n", m_state ); return false; } dprintf( D_ALWAYS, "AbstractReplicatorStateMachine::versionAndStateCommand " "sent command successfully\n" ); return true; }
DCStarter::X509UpdateStatus DCStarter::delegateX509Proxy( const char * filename, time_t expiration_time, char const *sec_session_id, time_t *result_expiration_time) { ReliSock rsock; rsock.timeout(60); if( ! rsock.connect(_addr) ) { dprintf(D_ALWAYS, "DCStarter::delegateX509Proxy: " "Failed to connect to starter %s\n", _addr); return XUS_Error; } CondorError errstack; if( ! startCommand(DELEGATE_GSI_CRED_STARTER, &rsock, 0, &errstack, NULL, false, sec_session_id) ) { dprintf( D_ALWAYS, "DCStarter::delegateX509Proxy: " "Failed send command to the starter: %s\n", errstack.getFullText().c_str()); return XUS_Error; } // Send the gsi proxy filesize_t file_size = 0; // will receive the size of the file if ( rsock.put_x509_delegation(&file_size,filename,expiration_time,result_expiration_time) < 0 ) { dprintf(D_ALWAYS, "DCStarter::delegateX509Proxy " "failed to delegate proxy file %s (size=%ld)\n", filename, (long int)file_size); return XUS_Error; } // Fetch the result rsock.decode(); int reply = 0; rsock.code(reply); rsock.end_of_message(); switch(reply) { case 0: return XUS_Error; case 1: return XUS_Okay; case 2: return XUS_Declined; } dprintf(D_ALWAYS, "DCStarter::delegateX509Proxy: " "remote side returned unknown code %d. Treating " "as an error.\n", reply); return XUS_Error; }
int DCStartd::delegateX509Proxy( const char* proxy, time_t expiration_time, time_t *result_expiration_time ) { dprintf( D_FULLDEBUG, "Entering DCStartd::delegateX509Proxy()\n" ); setCmdStr( "delegateX509Proxy" ); if( ! claim_id ) { newError( CA_INVALID_REQUEST, "DCStartd::delegateX509Proxy: Called with NULL claim_id" ); return CONDOR_ERROR; } // if this claim is associated with a security session ClaimIdParser cidp(claim_id); // // 1) begin the DELEGATE_GSI_CRED_STARTD command // ReliSock* tmp = (ReliSock*)startCommand( DELEGATE_GSI_CRED_STARTD, Stream::reli_sock, 20, NULL, NULL, false, cidp.secSessionId() ); if( ! tmp ) { newError( CA_COMMUNICATION_ERROR, "DCStartd::delegateX509Proxy: Failed to send command DELEGATE_GSI_CRED_STARTD to the startd" ); return CONDOR_ERROR; } // // 2) get reply from startd - OK means continue, NOT_OK means // don't bother (the startd doesn't require a delegated // proxy // tmp->decode(); int reply; if( !tmp->code(reply) ) { newError( CA_COMMUNICATION_ERROR, "DCStartd::delegateX509Proxy: failed to receive reply from startd (1)" ); delete tmp; return CONDOR_ERROR; } if ( !tmp->end_of_message() ) { newError( CA_COMMUNICATION_ERROR, "DCStartd::delegateX509Proxy: end of message error from startd (1)" ); delete tmp; return CONDOR_ERROR; } if( reply == NOT_OK ) { delete tmp; return NOT_OK; } // // 3) send over the claim id and delegate (or copy) the given proxy // tmp->encode(); int use_delegation = param_boolean( "DELEGATE_JOB_GSI_CREDENTIALS", true ) ? 1 : 0; if( !tmp->code( claim_id ) ) { newError( CA_COMMUNICATION_ERROR, "DCStartd::delegateX509Proxy: Failed to send claim id to the startd" ); delete tmp; return CONDOR_ERROR; } if ( !tmp->code( use_delegation ) ) { newError( CA_COMMUNICATION_ERROR, "DCStartd::delegateX509Proxy: Failed to send use_delegation flag to the startd" ); delete tmp; return CONDOR_ERROR; } int rv; filesize_t dont_care; if( use_delegation ) { rv = tmp->put_x509_delegation( &dont_care, proxy, expiration_time, result_expiration_time ); } else { dprintf( D_FULLDEBUG, "DELEGATE_JOB_GSI_CREDENTIALS is False; using direct copy\n"); if( ! tmp->get_encryption() ) { newError( CA_COMMUNICATION_ERROR, "DCStartd::delegateX509Proxy: Cannot copy: channel does not have encryption enabled" ); delete tmp; return CONDOR_ERROR; } rv = tmp->put_file( &dont_care, proxy ); } if( rv == -1 ) { newError( CA_FAILURE, "DCStartd::delegateX509Proxy: Failed to delegate proxy" ); delete tmp; return CONDOR_ERROR; } if ( !tmp->end_of_message() ) { newError( CA_FAILURE, "DCStartd::delegateX509Proxy: end of message error to startd" ); delete tmp; return CONDOR_ERROR; } // command successfully sent; now get the reply tmp->decode(); if( !tmp->code(reply) ) { newError( CA_COMMUNICATION_ERROR, "DCStartd::delegateX509Proxy: failed to receive reply from startd (2)" ); delete tmp; return CONDOR_ERROR; } if ( !tmp->end_of_message() ) { newError( CA_COMMUNICATION_ERROR, "DCStartd::delegateX509Proxy: end of message error from startd (2)" ); delete tmp; return CONDOR_ERROR; } delete tmp; dprintf( D_FULLDEBUG, "DCStartd::delegateX509Proxy: successfully sent command, reply is: %d\n", reply ); return reply; }
int CCBServer::HandleRequest(int cmd,Stream *stream) { ReliSock *sock = (ReliSock *)stream; ASSERT( cmd == CCB_REQUEST ); // Avoid lengthy blocking on communication with our peer. // This command-handler should not get called until data // is ready to read. sock->timeout(1); ClassAd msg; sock->decode(); if( !msg.initFromStream( *sock ) || !sock->end_of_message() ) { dprintf(D_ALWAYS, "CCB: failed to receive request " "from %s.\n", sock->peer_description() ); return FALSE; } MyString name; if( msg.LookupString(ATTR_NAME,name) ) { // client name is purely for debugging purposes name.formatstr_cat(" on %s",sock->peer_description()); sock->set_peer_description(name.Value()); } MyString target_ccbid_str; MyString return_addr; MyString connect_id; // id target daemon should present to requester CCBID target_ccbid; // NOTE: using ATTR_CLAIM_ID for connect id so that it is // automatically treated as a secret over the network. // It must be presented by the target daemon when connecting // to the requesting client, so the client can confirm that // the connection is in response to its request. if( !msg.LookupString(ATTR_CCBID,target_ccbid_str) || !msg.LookupString(ATTR_MY_ADDRESS,return_addr) || !msg.LookupString(ATTR_CLAIM_ID,connect_id) ) { MyString ad_str; msg.sPrint(ad_str); dprintf(D_ALWAYS, "CCB: invalid request from %s: %s\n", sock->peer_description(), ad_str.Value() ); return FALSE; } if( !CCBIDFromString(target_ccbid,target_ccbid_str.Value()) ) { dprintf(D_ALWAYS, "CCB: request from %s contains invalid CCBID %s\n", sock->peer_description(), target_ccbid_str.Value() ); return FALSE; } CCBTarget *target = GetTarget( target_ccbid ); if( !target ) { dprintf(D_ALWAYS, "CCB: rejecting request from %s for ccbid %s because no daemon is " "currently registered with that id " "(perhaps it recently disconnected).\n", sock->peer_description(), target_ccbid_str.Value()); MyString error_msg; error_msg.formatstr( "CCB server rejecting request for ccbid %s because no daemon is " "currently registered with that id " "(perhaps it recently disconnected).", target_ccbid_str.Value()); RequestReply( sock, false, error_msg.Value(), 0, target_ccbid ); return FALSE; } SetSmallBuffers(sock); CCBServerRequest *request = new CCBServerRequest( sock, target_ccbid, return_addr.Value(), connect_id.Value() ); AddRequest( request, target ); dprintf(D_FULLDEBUG, "CCB: received request id %lu from %s for target ccbid %s " "(registered as %s)\n", request->getRequestID(), request->getSock()->peer_description(), target_ccbid_str.Value(), target->getSock()->peer_description()); ForwardRequestToTarget( request, target ); return KEEP_STREAM; }
int CCBServer::HandleRegistration(int cmd,Stream *stream) { ReliSock *sock = (ReliSock *)stream; ASSERT( cmd == CCB_REGISTER ); // Avoid lengthy blocking on communication with our peer. // This command-handler should not get called until data // is ready to read. sock->timeout(1); ClassAd msg; sock->decode(); if( !msg.initFromStream( *sock ) || !sock->end_of_message() ) { dprintf(D_ALWAYS, "CCB: failed to receive registration " "from %s.\n", sock->peer_description() ); return FALSE; } SetSmallBuffers(sock); MyString name; if( msg.LookupString(ATTR_NAME,name) ) { // target daemon name is purely for debugging purposes name.formatstr_cat(" on %s",sock->peer_description()); sock->set_peer_description(name.Value()); } CCBTarget *target = new CCBTarget(sock); MyString reconnect_cookie_str,reconnect_ccbid_str; CCBID reconnect_cookie,reconnect_ccbid; bool reconnected = false; if( msg.LookupString(ATTR_CLAIM_ID,reconnect_cookie_str) && CCBIDFromString(reconnect_cookie,reconnect_cookie_str.Value()) && msg.LookupString( ATTR_CCBID,reconnect_ccbid_str) && CCBIDFromContactString(reconnect_ccbid,reconnect_ccbid_str.Value()) ) { target->setCCBID( reconnect_ccbid ); reconnected = ReconnectTarget( target, reconnect_cookie ); } if( !reconnected ) { AddTarget( target ); } CCBReconnectInfo *reconnect_info = GetReconnectInfo( target->getCCBID() ); ASSERT( reconnect_info ); sock->encode(); ClassAd reply_msg; MyString ccb_contact; CCBIDToString( reconnect_info->getReconnectCookie(),reconnect_cookie_str ); // We send our address as part of the CCB contact string, rather // than letting the target daemon fill it in. This is to give us // potential flexibility on the CCB server side to do things like // assign different targets to different CCB server sub-processes, // each with their own command port. CCBIDToContactString( m_address.Value(), target->getCCBID(), ccb_contact ); reply_msg.Assign(ATTR_CCBID,ccb_contact.Value()); reply_msg.Assign(ATTR_COMMAND,CCB_REGISTER); reply_msg.Assign(ATTR_CLAIM_ID,reconnect_cookie_str.Value()); if( !reply_msg.put( *sock ) || !sock->end_of_message() ) { dprintf(D_ALWAYS, "CCB: failed to send registration response " "to %s.\n", sock->peer_description() ); RemoveTarget( target ); return KEEP_STREAM; // we have already closed this socket } return KEEP_STREAM; }
// download the files associated with the jobads to the sandbox at td_sinful // with the supplied capability. // The work_ad should contain: // ATTR_TREQ_CAPABILITY // ATTR_TREQ_FTP // ATTR_TREQ_JOBID_ALLOW_LIST bool DCTransferD::download_job_files(ClassAd *work_ad, CondorError * errstack) { ReliSock *rsock = NULL; int timeout = 60 * 60 * 8; // transfers take a long time... int i; ClassAd reqad, respad; std::string cap; int ftp; int invalid; int protocol; std::string reason; int num_transfers; ClassAd jad; const char *lhstr = NULL; ExprTree *tree = NULL; ////////////////////////////////////////////////////////////////////////// // Connect to the transferd and authenticate ////////////////////////////////////////////////////////////////////////// // This call with automatically connect to _addr, which was set in the // constructor of this object to be the transferd in question. rsock = (ReliSock*)startCommand(TRANSFERD_READ_FILES, Stream::reli_sock, timeout, errstack); if( ! rsock ) { dprintf( D_ALWAYS, "DCTransferD::download_job_files: " "Failed to send command (TRANSFERD_READ_FILES) " "to the schedd\n" ); errstack->push("DC_TRANSFERD", 1, "Failed to start a TRANSFERD_READ_FILES command."); return false; } // First, if we're not already authenticated, force that now. if (!forceAuthentication( rsock, errstack )) { dprintf( D_ALWAYS, "DCTransferD::download_job_files() authentication " "failure: %s\n", errstack->getFullText().c_str() ); errstack->push("DC_TRANSFERD", 1, "Failed to authenticate properly."); return false; } rsock->encode(); ////////////////////////////////////////////////////////////////////////// // Query the transferd about the capability/protocol and see if I can // download my files. It will respond with a classad saying good or bad. ////////////////////////////////////////////////////////////////////////// work_ad->LookupString(ATTR_TREQ_CAPABILITY, cap); work_ad->LookupInteger(ATTR_TREQ_FTP, ftp); reqad.Assign(ATTR_TREQ_CAPABILITY, cap); reqad.Assign(ATTR_TREQ_FTP, ftp); // This request ad to the transferd should contain: // ATTR_TREQ_CAPABILITY // ATTR_TREQ_FTP reqad.put(*rsock); rsock->end_of_message(); rsock->decode(); // This response ad from the transferd should contain: // ATTR_TREQ_INVALID_REQUEST (set to true) // ATTR_TREQ_INVALID_REASON // // OR // // ATTR_TREQ_INVALID_REQUEST (set to false) // ATTR_TREQ_NUM_TRANSFERS // respad.initFromStream(*rsock); rsock->end_of_message(); respad.LookupInteger(ATTR_TREQ_INVALID_REQUEST, invalid); if (invalid == TRUE) { // The transferd rejected my attempt to upload the fileset delete rsock; respad.LookupString(ATTR_TREQ_INVALID_REASON, reason); errstack->push("DC_TRANSFERD", 1, reason.c_str()); return false; } respad.LookupInteger(ATTR_TREQ_NUM_TRANSFERS, num_transfers); ////////////////////////////////////////////////////////////////////////// // Based upon the protocol I've chosen, use that method to download the // files. When using the FileTrans protocol, a child process on the // transferd side will be sending me individual job ads and then // instantiating a filetransfer object for that ad. ////////////////////////////////////////////////////////////////////////// dprintf(D_ALWAYS, "Receiving fileset"); work_ad->LookupInteger(ATTR_TREQ_FTP, protocol); switch(protocol) { case FTP_CFTP: // download the files using the FileTransfer Object for (i = 0; i < num_transfers; i++) { // Grab a job ad the server is sending us so we know what // to receive. jad.initFromStream(*rsock); rsock->end_of_message(); // translate the job ad by replacing the // saved SUBMIT_ attributes so the download goes into the // correct place. jad.ResetExpr(); while( jad.NextExpr(lhstr, tree) ) { if ( lhstr && strncasecmp("SUBMIT_",lhstr,7)==0 ) { // this attr name starts with SUBMIT_ // compute new lhs (strip off the SUBMIT_) const char *new_attr_name = strchr(lhstr,'_'); ExprTree * pTree; ASSERT(new_attr_name); new_attr_name++; // insert attribute pTree = tree->Copy(); jad.Insert(new_attr_name, pTree, false); } } // while next expr // instantiate a filetransfer object and have it accept the // files. FileTransfer ftrans; if ( !ftrans.SimpleInit(&jad, false, false, rsock) ) { delete rsock; errstack->push("DC_TRANSFERD", 1, "Failed to initate uploading of files."); return false; } // We want files to be copied to their final places, so apply // any filename remaps when downloading. if ( !ftrans.InitDownloadFilenameRemaps(&jad) ) { return false; } ftrans.setPeerVersion( version() ); if ( !ftrans.DownloadFiles() ) { delete rsock; errstack->push("DC_TRANSFERD", 1, "Failed to download files."); return false; } dprintf(D_ALWAYS | D_NOHEADER, "."); } rsock->end_of_message(); dprintf(D_ALWAYS | D_NOHEADER, "\n"); break; default: // Bail due to user error. This client doesn't support the unknown // protocol. delete rsock; errstack->push("DC_TRANSFERD", 1, "Unknown file transfer protocol selected."); return false; break; } ////////////////////////////////////////////////////////////////////////// // Get the response from the transferd once it sees a completed // movement of files to the child process. ////////////////////////////////////////////////////////////////////////// rsock->decode(); respad.initFromStream(*rsock); rsock->end_of_message(); // close up shop delete rsock; respad.LookupInteger(ATTR_TREQ_INVALID_REQUEST, invalid); if ( invalid == TRUE ) { respad.LookupString(ATTR_TREQ_INVALID_REASON, reason); errstack->push("DC_TRANSFERD", 1, reason.c_str()); return false; } return true; }
// upload the files associated with the jobads to the sandbox at td_sinful // with the supplied capability. // The work_ad should contain: // ATTR_TREQ_CAPABILITY // ATTR_TREQ_FTP // ATTR_TREQ_JOBID_ALLOW_LIST bool DCTransferD::upload_job_files(int JobAdsArrayLen, ClassAd* JobAdsArray[], ClassAd *work_ad, CondorError * errstack) { ReliSock *rsock = NULL; int timeout = 60 * 60 * 8; // transfers take a long time... int i; ClassAd reqad, respad; std::string cap; int ftp; int invalid; int protocol; std::string reason; ////////////////////////////////////////////////////////////////////////// // Connect to the transferd and authenticate ////////////////////////////////////////////////////////////////////////// // This call with automatically connect to _addr, which was set in the // constructor of this object to be the transferd in question. rsock = (ReliSock*)startCommand(TRANSFERD_WRITE_FILES, Stream::reli_sock, timeout, errstack); if( ! rsock ) { dprintf( D_ALWAYS, "DCTransferD::upload_job_files: " "Failed to send command (TRANSFERD_WRITE_FILES) " "to the schedd\n" ); errstack->push("DC_TRANSFERD", 1, "Failed to start a TRANSFERD_WRITE_FILES command."); return false; } // First, if we're not already authenticated, force that now. if (!forceAuthentication( rsock, errstack )) { dprintf( D_ALWAYS, "DCTransferD::upload_job_files() authentication " "failure: %s\n", errstack->getFullText().c_str() ); errstack->push("DC_TRANSFERD", 1, "Failed to authenticate properly."); return false; } rsock->encode(); ////////////////////////////////////////////////////////////////////////// // Query the transferd about the capability/protocol and see if I can // upload my files. It will respond with a classad saying good or bad. ////////////////////////////////////////////////////////////////////////// work_ad->LookupString(ATTR_TREQ_CAPABILITY, cap); work_ad->LookupInteger(ATTR_TREQ_FTP, ftp); reqad.Assign(ATTR_TREQ_CAPABILITY, cap); reqad.Assign(ATTR_TREQ_FTP, ftp); // This request ad to the transferd should contain: // ATTR_TREQ_CAPABILITY // ATTR_TREQ_FTP reqad.put(*rsock); rsock->end_of_message(); rsock->decode(); // This response ad to the transferd should contain: // ATTR_TREQ_INVALID_REQUEST (set to true) // ATTR_TREQ_INVALID_REASON // // OR // // ATTR_TREQ_INVALID_REQUEST (set to false) respad.initFromStream(*rsock); rsock->end_of_message(); respad.LookupInteger(ATTR_TREQ_INVALID_REQUEST, invalid); if (invalid == TRUE) { // The transferd rejected my attempt to upload the fileset delete rsock; respad.LookupString(ATTR_TREQ_INVALID_REASON, reason); errstack->push("DC_TRANSFERD", 1, reason.c_str()); return false; } ////////////////////////////////////////////////////////////////////////// // Sort the job ads array into numerically sorted order. The transferd // must do the same. ////////////////////////////////////////////////////////////////////////// // TODO ////////////////////////////////////////////////////////////////////////// // Based upon the protocol I've chosen, use that method to upload the // files. When using the FileTrans protocol, a child process on the // transferd side will be inheriting a socket and accepting the // FileTransfer object's protocol. ////////////////////////////////////////////////////////////////////////// // XXX Fix to only send jobads for allowed jobs dprintf(D_ALWAYS, "Sending fileset"); work_ad->LookupInteger(ATTR_TREQ_FTP, protocol); switch(protocol) { case FTP_CFTP: // upload the files using the FileTransfer Object for (i=0; i<JobAdsArrayLen; i++) { FileTransfer ftrans; if ( !ftrans.SimpleInit(JobAdsArray[i], false, false, rsock) ) { delete rsock; errstack->push("DC_TRANSFERD", 1, "Failed to initate uploading of files."); return false; } ftrans.setPeerVersion( version() ); if ( !ftrans.UploadFiles(true,false) ) { delete rsock; errstack->push("DC_TRANSFERD", 1, "Failed to upload files."); return false; } dprintf(D_ALWAYS | D_NOHEADER, "."); } rsock->end_of_message(); dprintf(D_ALWAYS | D_NOHEADER, "\n"); break; default: // Bail due to user error. This client doesn't support the uknown // protocol. delete rsock; errstack->push("DC_TRANSFERD", 1, "Unknown file transfer protocol selected."); return false; break; } ////////////////////////////////////////////////////////////////////////// // Get the response from the transferd once it sees a completed // movement of files from the child process. ////////////////////////////////////////////////////////////////////////// rsock->decode(); respad.initFromStream(*rsock); rsock->end_of_message(); // close up shop delete rsock; respad.LookupInteger(ATTR_TREQ_INVALID_REQUEST, invalid); if ( invalid == TRUE ) { respad.LookupString(ATTR_TREQ_INVALID_REASON, reason); errstack->push("DC_TRANSFERD", 1, reason.c_str()); return false; } return true; }
// The function occurs in a seperate thread or process int TransferD::write_files_thread(void *targ, Stream *sock) { ThreadArg *thread_arg = (ThreadArg*)targ; ReliSock *rsock = (ReliSock*)sock; TransferRequest *treq = NULL; // int protocol; SimpleList<ClassAd*> *jad_list = NULL; ClassAd *jad = NULL; int cluster, proc; int old_timeout; int result; ClassAd respad; // XXX This is a damn dirty hack whose solution resides in implementing // a checksum for the files. // Now we sleep here for one second. Why? So we are certain // to transfer back output files even if the job ran for less // than one second. This is because: // stat() can't tell the difference between: // 1) A job starts up, touches a file, and exits all in one second // 2) A job starts up, doesn't touch the file, and exits all in one // second // So if we force the start time of the job to be one second later than // the time we know the files were written, stat() should be able // to perceive what happened, if anything. sleep(1); // even though I'm in a new process, I got here either through forking // or through a thread, so this memory is a copy. // protocol = thread_arg->protocol; treq = thread_arg->treq; delete thread_arg; // XXX deal with protocol value. //////////////////////////////////////////////////////////////////////// // Sort the classads (XXX maybe put at a higher level in the protocol) //////////////////////////////////////////////////////////////////////// // XXX TODO //////////////////////////////////////////////////////////////////////// // Do the transfer. //////////////////////////////////////////////////////////////////////// // file transfers can take a long time.... old_timeout = rsock->timeout(60 * 60 * 8); jad_list = treq->todo_tasks(); while(jad_list->Next(jad)) { FileTransfer ftrans; jad->LookupInteger(ATTR_CLUSTER_ID, cluster); jad->LookupInteger(ATTR_PROC_ID, proc); dprintf( D_ALWAYS, "TransferD::write_files_thread(): " "Transferring fileset for job %d.%d\n", cluster, proc); result = ftrans.SimpleInit(jad, true, true, rsock); if ( !result ) { dprintf( D_ALWAYS, "TransferD::write_files_thread(): " "failed to init file transfer for job %d.%d \n", cluster, proc ); respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE); respad.Assign(ATTR_TREQ_INVALID_REASON, "FileTransfer Object failed to SimpleInit."); respad.put(*rsock); rsock->end_of_message(); rsock->timeout(old_timeout); return EXIT_FAILURE; } ftrans.setPeerVersion(treq->get_peer_version().Value()); // We're "downloading" from the client to here. result = ftrans.DownloadFiles(); if ( !result ) { dprintf( D_ALWAYS, "TransferD::write_files_thread(): " "failed to transfer files for job %d.%d \n", cluster, proc ); respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE); respad.Assign(ATTR_TREQ_INVALID_REASON, "FileTransfer Object failed to download."); respad.put(*rsock); rsock->end_of_message(); rsock->timeout(old_timeout); return EXIT_FAILURE; } } rsock->end_of_message(); ////////////////////////////////////////////////////////////////////////// // Now that the file transfer is done, tell the client everything is ok. ////////////////////////////////////////////////////////////////////////// dprintf(D_ALWAYS, "Informing client of finished transfer.\n"); rsock->encode(); respad.Assign(ATTR_TREQ_INVALID_REQUEST, FALSE); // This response ad to the client will contain: // // ATTR_TREQ_INVALID_REQUEST (set to false) // respad.put(*rsock); rsock->end_of_message(); delete rsock; return EXIT_SUCCESS; }
// This handler is called when a client wishes to write files from the // transferd's storage. int TransferD::write_files_handler(int cmd, Stream *sock) { ReliSock *rsock = (ReliSock*)sock; MyString capability; int protocol = FTP_UNKNOWN; TransferRequest *treq = NULL; MyString fquser; static int transfer_reaper_id = -1; ThreadArg *thread_arg; int tid; ClassAd reqad; ClassAd respad; cmd = cmd; // quiet the compiler. dprintf(D_ALWAYS, "Got TRANSFERD_WRITE_FILES!\n"); ///////////////////////////////////////////////////////////////////////// // make sure we are authenticated ///////////////////////////////////////////////////////////////////////// if( ! rsock->triedAuthentication() ) { CondorError errstack; if( ! SecMan::authenticate_sock(rsock, WRITE, &errstack) ) { // we failed to authenticate, we should bail out now // since we don't know what user is trying to perform // this action. // TODO: it'd be nice to print out what failed, but we // need better error propagation for that... errstack.push( "TransferD::setup_transfer_request_handler()", 42, "Failure to register transferd - Authentication failed" ); dprintf( D_ALWAYS, "setup_transfer_request_handler() " "aborting: %s\n", errstack.getFullText() ); refuse( rsock ); return CLOSE_STREAM; } } fquser = rsock->getFullyQualifiedUser(); ///////////////////////////////////////////////////////////////////////// // Check to see if the capability the client tells us is something that // we have knowledge of. We ONLY check the capability and not the // identity of the person in question. This allows people of different // identities to write files here as long as they had the right // capability. While this might not sound secure, they STILL had to have // authenticated as someone this daemon trusts. // Similarly, check the protocol it wants to use as well as ensure that // the direction the transfer request was supposed to be is being honored. ///////////////////////////////////////////////////////////////////////// rsock->decode(); // soak the request ad from the client about what it wants to transfer reqad.initFromStream(*rsock); rsock->end_of_message(); reqad.LookupString(ATTR_TREQ_CAPABILITY, capability); rsock->encode(); // do I know of such a capability? if (m_treqs.lookup(capability, treq) != 0) { // didn't find it. Log it and tell them to leave and close up shop respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE); respad.Assign(ATTR_TREQ_INVALID_REASON, "Invalid capability!"); respad.put(*rsock); rsock->end_of_message(); dprintf(D_ALWAYS, "Client identity '%s' tried to write some files " "using capability '%s', but there was no such capability. " "Access denied.\n", fquser.Value(), capability.Value()); return CLOSE_STREAM; } reqad.LookupInteger(ATTR_TREQ_FTP, protocol); // am I willing to use this protocol? switch(protocol) { case FTP_CFTP: // FileTrans protocol, I'm happy. break; default: respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE); respad.Assign(ATTR_TREQ_INVALID_REASON, "Invalid file transfer protocol!"); respad.put(*rsock); rsock->end_of_message(); dprintf(D_ALWAYS, "Client identity '%s' tried to write some files " "using protocol '%d', but I don't support that protocol. " "Access denied.\n", fquser.Value(), protocol); return CLOSE_STREAM; } // nsure that this transfer request was of the uploading variety if (treq->get_direction() != FTPD_UPLOAD) { respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE); respad.Assign(ATTR_TREQ_INVALID_REASON, "Transfer Request was not an uploading request!"); respad.put(*rsock); rsock->end_of_message(); dprintf(D_ALWAYS, "Client identity '%s' tried to write some files " "to a transfer request that wasn't expecting to be written. " "Access denied.\n", fquser.Value()); } ///////////////////////////////////////////////////////////////////////// // Tell the client everything was ok. ///////////////////////////////////////////////////////////////////////// respad.Assign(ATTR_TREQ_INVALID_REQUEST, FALSE); respad.put(*rsock); rsock->end_of_message(); ///////////////////////////////////////////////////////////////////////// // Set up a thread (a process under unix) to read ALL of the job files // for all of the ads in the TransferRequest. ///////////////////////////////////////////////////////////////////////// // now create a thread, passing in the sock, which uses the file transfer // object to accept the files. if (transfer_reaper_id == -1) { // only set this up ONCE so each and every thread gets one. transfer_reaper_id = daemonCore->Register_Reaper( "write_files_reaper", (ReaperHandlercpp) &TransferD::write_files_reaper, "write_files_reaper", this ); } thread_arg = new ThreadArg(protocol, treq); // Start a new thread (process on Unix) to do the work tid = daemonCore->Create_Thread( (ThreadStartFunc)&TransferD::write_files_thread, (void *)thread_arg, rsock, transfer_reaper_id ); if (tid == FALSE) { // XXX How do I handle this failure? } // associate the tid with the request so I can deal with it propery in // the reaper m_client_to_transferd_threads.insert(tid, treq); // The stream is inherited to the thread, who does the transfer and // finishes the protocol, but in the parent, I'm closing it. return CLOSE_STREAM; }
void Triggerd::init() { std::string trigger_log; ClassAd* ad; HashKey key; uint32_t key_value; ReliSock* sock; int index; char* host; char* tmp; char* dataDir = NULL; char* username; char* password; char* mechanism; int port, interval; std::string storefile; std::string error_text; std::stringstream int_str; qpid::management::ConnectionSettings settings; bool enable_console = true; dprintf(D_FULLDEBUG, "Triggerd::init called\n"); char* name = param("TRIGGERD_NAME"); if (name) { char* valid_name = build_valid_daemon_name(name); daemonName = valid_name; delete[] name; delete[] valid_name; } else { char* default_name = build_valid_daemon_name("triggerd"); if(default_name) { daemonName = default_name; delete[] default_name; } } port = param_integer("QMF_BROKER_PORT", 5672); if (NULL == (host = param("QMF_BROKER_HOST"))) { host = strdup("localhost"); } if (NULL == (username = param("QMF_BROKER_USERNAME"))) { username = strdup(""); } if (NULL == (mechanism = param("QMF_BROKER_AUTH_MECH"))) { mechanism = strdup("ANONYMOUS"); } tmp = param("QMF_STOREFILE"); if (NULL == tmp) { storefile = ".triggerd_storefile"; } else { storefile = tmp; free(tmp); tmp = NULL; } interval = param_integer("QMF_UPDATE_INTERVAL", 10); password = getBrokerPassword(); dataDir = param("DATA"); ASSERT(dataDir); trigger_log = dataDir; trigger_log += "/triggers.log"; triggerCollection = new ClassAdCollection(NULL, trigger_log.c_str()); free(dataDir); settings.host = std::string(host); settings.port = port; settings.username = std::string(username); settings.password = std::string(password); settings.mechanism = std::string(mechanism); // Initialize the QMF agent singleton = new ManagementAgent::Singleton(); ManagementAgent* agent = singleton->getInstance(); CondorTriggerService::registerSelf(agent); CondorTrigger::registerSelf(agent); EventCondorTriggerNotify::registerSelf(agent); agent->setName("com.redhat.grid","condortriggerservice", daemonName.c_str()); agent->init(settings, interval, true, storefile); mgmtObject = new CondorTriggerService(agent, this); // Initialize the QMF console, if desired enable_console = param_boolean("ENABLE_ABSENT_NODES_DETECTION", false); if (true == enable_console) { console = new TriggerConsole(); console->config(host, port, username, password, mechanism); } free(host); free(username); free(password); free(mechanism); // Initialize the triggers if any already exist triggerCollection->StartIterateAllClassAds(); while(true == triggerCollection->IterateAllClassAds(ad, key)) { key_value = atoll(key.value()); if (triggers.end() == triggers.find(key_value)) { if (STATUS_OK != AddTriggerToCollection(key_value, ad, error_text)) { dprintf(D_ALWAYS, "Triggerd Error: '%s'. Removing trigger\n", error_text.c_str()); int_str << key_value; triggerCollection->DestroyClassAd(int_str.str().c_str()); } } } bool lifetime = param_boolean("QMF_IS_PERSISTENT", true); agent->addObject(mgmtObject, daemonName.c_str(), lifetime); // Create a socket to handle management method calls sock = new ReliSock; if (NULL == sock) { EXCEPT("Failed to create Managment socket"); } if (0 == sock->assign(agent->getSignalFd())) { EXCEPT("Failed to bind Management socket"); } if (-1 == (index = daemonCore->Register_Socket((Stream *) sock, "Management Method Socket", (SocketHandlercpp) &Triggerd::HandleMgmtSocket, "Handler for Management Methods", this))) { EXCEPT("Failed to register Management socket"); } config(); }
bool DCStarter::startSSHD(char const *known_hosts_file,char const *private_client_key_file,char const *preferred_shells,char const *slot_name,char const *ssh_keygen_args,ReliSock &sock,int timeout,char const *sec_session_id,MyString &remote_user,MyString &error_msg,bool &retry_is_sensible) { retry_is_sensible = false; #ifndef HAVE_SSH_TO_JOB error_msg = "This version of Condor does not support ssh key exchange."; return false; #else if( !connectSock(&sock, timeout, NULL) ) { error_msg = "Failed to connect to starter"; return false; } if( !startCommand(START_SSHD, &sock,timeout,NULL,NULL,false,sec_session_id) ) { error_msg = "Failed to send START_SSHD to starter"; return false; } ClassAd input; if( preferred_shells && *preferred_shells ) { input.Assign(ATTR_SHELL,preferred_shells); } if( slot_name && *slot_name ) { // This is a little silly. // We are telling the remote side the name of the slot so // that it can put it in the welcome message. input.Assign(ATTR_NAME,slot_name); } if( ssh_keygen_args && *ssh_keygen_args ) { input.Assign(ATTR_SSH_KEYGEN_ARGS,ssh_keygen_args); } sock.encode(); if( !putClassAd(&sock, input) || !sock.end_of_message() ) { error_msg = "Failed to send START_SSHD request to starter"; return false; } ClassAd result; sock.decode(); if( !getClassAd(&sock, result) || !sock.end_of_message() ) { error_msg = "Failed to read response to START_SSHD from starter"; return false; } bool success = false; result.LookupBool(ATTR_RESULT,success); if( !success ) { std::string remote_error_msg; result.LookupString(ATTR_ERROR_STRING,remote_error_msg); error_msg.formatstr("%s: %s",slot_name,remote_error_msg.c_str()); retry_is_sensible = false; result.LookupBool(ATTR_RETRY,retry_is_sensible); return false; } result.LookupString(ATTR_REMOTE_USER,remote_user); std::string public_server_key; if( !result.LookupString(ATTR_SSH_PUBLIC_SERVER_KEY,public_server_key) ) { error_msg = "No public ssh server key received in reply to START_SSHD"; return false; } std::string private_client_key; if( !result.LookupString(ATTR_SSH_PRIVATE_CLIENT_KEY,private_client_key) ) { error_msg = "No ssh client key received in reply to START_SSHD"; return false; } // store the private client key unsigned char *decode_buf = NULL; int length = -1; condor_base64_decode(private_client_key.c_str(),&decode_buf,&length); if( !decode_buf ) { error_msg = "Error decoding ssh client key."; return false; } FILE *fp = safe_fcreate_fail_if_exists(private_client_key_file,"a",0400); if( !fp ) { error_msg.formatstr("Failed to create %s: %s", private_client_key_file,strerror(errno)); free( decode_buf ); return false; } if( fwrite(decode_buf,length,1,fp)!=1 ) { error_msg.formatstr("Failed to write to %s: %s", private_client_key_file,strerror(errno)); fclose( fp ); free( decode_buf ); return false; } if( fclose(fp)!=0 ) { error_msg.formatstr("Failed to close %s: %s", private_client_key_file,strerror(errno)); free( decode_buf ); return false; } fp = NULL; free( decode_buf ); decode_buf = NULL; // store the public server key in the known_hosts file length = -1; condor_base64_decode(public_server_key.c_str(),&decode_buf,&length); if( !decode_buf ) { error_msg = "Error decoding ssh server key."; return false; } fp = safe_fcreate_fail_if_exists(known_hosts_file,"a",0600); if( !fp ) { error_msg.formatstr("Failed to create %s: %s", known_hosts_file,strerror(errno)); free( decode_buf ); return false; } // prepend a host name pattern (*) to the public key to make a valid // record in the known_hosts file fprintf(fp,"* "); if( fwrite(decode_buf,length,1,fp)!=1 ) { error_msg.formatstr("Failed to write to %s: %s", known_hosts_file,strerror(errno)); fclose( fp ); free( decode_buf ); return false; } if( fclose(fp)!=0 ) { error_msg.formatstr("Failed to close %s: %s", known_hosts_file,strerror(errno)); free( decode_buf ); return false; } fp = NULL; free( decode_buf ); decode_buf = NULL; return true; #endif }
// sending command to remote replication daemon; specified command function // allows to specify which data is to be sent to the remote daemon void AbstractReplicatorStateMachine::sendCommand( int command, char* daemonSinfulString, CommandFunction function ) { dprintf( D_ALWAYS, "AbstractReplicatorStateMachine::sendCommand %s to %s\n", utilToString( command ), daemonSinfulString ); Daemon daemon( DT_ANY, daemonSinfulString ); ReliSock socket; // no retries after 'm_connectionTimeout' seconds of unsuccessful connection socket.timeout( m_connectionTimeout ); socket.doNotEnforceMinimalCONNECT_TIMEOUT( ); if( ! socket.connect( daemonSinfulString, 0, false ) ) { dprintf( D_ALWAYS, "AbstractReplicatorStateMachine::sendCommand " "unable to connect to %s\n", daemonSinfulString ); socket.close( ); return ; } // General actions for any command sending if( ! daemon.startCommand( command, &socket, m_connectionTimeout ) ) { dprintf( D_ALWAYS, "AbstractReplicatorStateMachine::sendCommand " "cannot start command %s to %s\n", utilToString( command ), daemonSinfulString ); socket.close( ); return ; } char const* sinfulString = daemonCore->InfoCommandSinfulString(); if(! socket.put( sinfulString )/* || ! socket.end_of_message( )*/) { dprintf( D_ALWAYS, "AbstractReplicatorStateMachine::sendCommand " "unable to code the local sinful string or eom%s\n", sinfulString ); socket.close( ); return ; } else { dprintf( D_FULLDEBUG, "AbstractReplicatorStateMachine::sendCommand " "local sinful string coded successfully\n" ); } // End of General actions for any command sending // Command-specific actions if( ! ((*this).*(function))( socket ) ) { socket.close( ); return ; } // End of Command-specific actions if( ! socket.end_of_message( ) ) { socket.close( ); dprintf( D_ALWAYS, "AbstractReplicatorStateMachine::sendCommand " "unable to code the end of message\n" ); return ; } socket.close( ); dprintf( D_ALWAYS, "AbstractReplicatorStateMachine::sendCommand " "%s command sent to %s successfully\n", utilToString( command ), daemonSinfulString ); }
bool DCStarter::peek(bool transfer_stdout, ssize_t &stdout_offset, bool transfer_stderr, ssize_t &stderr_offset, const std::vector<std::string> &filenames, std::vector<ssize_t> &offsets, size_t max_bytes, bool &retry_sensible, PeekGetFD &next, std::string &error_msg, unsigned timeout, const std::string &sec_session_id, DCTransferQueue *xfer_q) { compat_classad::ClassAd ad; ad.InsertAttr(ATTR_JOB_OUTPUT, transfer_stdout); ad.InsertAttr("OutOffset", stdout_offset); ad.InsertAttr(ATTR_JOB_ERROR, transfer_stderr); ad.InsertAttr("ErrOffset", stderr_offset); ad.InsertAttr(ATTR_VERSION, CondorVersion()); size_t total_files = 0; total_files += transfer_stdout ? 1 : 0; total_files += transfer_stderr ? 1 : 0; if (filenames.size()) { total_files += filenames.size(); std::vector<classad::ExprTree *> filelist; filelist.reserve(filenames.size()); std::vector<classad::ExprTree *> offsetlist; offsetlist.reserve(filenames.size()); std::vector<ssize_t>::const_iterator it2 = offsets.begin(); for (std::vector<std::string>::const_iterator it = filenames.begin(); it != filenames.end() && it2 != offsets.end(); it++, it2++) { classad::Value value; value.SetStringValue(*it); filelist.push_back(classad::Literal::MakeLiteral(value)); value.SetIntegerValue(*it2); offsetlist.push_back(classad::Literal::MakeLiteral(value)); } classad::ExprTree *list(classad::ExprList::MakeExprList(filelist)); ad.Insert("TransferFiles", list); list = classad::ExprList::MakeExprList(offsetlist); ad.Insert("TransferOffsets", list); } ad.InsertAttr(ATTR_MAX_TRANSFER_BYTES, static_cast<long long>(max_bytes)); ReliSock sock; if( !connectSock(&sock, timeout, NULL) ) { error_msg = "Failed to connect to starter"; return false; } if( !startCommand(STARTER_PEEK, &sock, timeout, NULL, NULL, false, sec_session_id.c_str()) ) { error_msg = "Failed to send START_PEEK to starter"; return false; } sock.encode(); if (!putClassAd(&sock, ad) || !sock.end_of_message()) { error_msg = "Failed to send request to starter"; return false; } compat_classad::ClassAd response; sock.decode(); if (!getClassAd(&sock, response) || !sock.end_of_message()) { error_msg = "Failed to read response for peeking at logs."; return false; } dPrintAd(D_FULLDEBUG, response); bool success = false; if (!response.EvaluateAttrBool(ATTR_RESULT, success) || !success) { response.EvaluateAttrBool(ATTR_RETRY, retry_sensible); error_msg = "Remote operation failed."; response.EvaluateAttrString(ATTR_ERROR_STRING, error_msg); return false; } classad::Value valueX; classad_shared_ptr<classad::ExprList> list; if (!response.EvaluateAttr("TransferFiles", valueX) || !valueX.IsSListValue(list)) { error_msg = "Unable to evaluate starter response"; return false; } classad_shared_ptr<classad::ExprList> offlist; if (!response.EvaluateAttr("TransferOffsets", valueX) || !valueX.IsSListValue(offlist)) { error_msg = "Unable to evaluate starter response (missing offsets)"; return false; } size_t remaining = max_bytes; size_t file_count = 0; classad::ExprList::const_iterator it2 = offlist->begin(); for (classad::ExprList::const_iterator it = list->begin(); it != list->end() && it2 != offlist->end(); it++, it2++) { classad::Value value; (*it2)->Evaluate(value); off_t off = -1; value.IsIntegerValue(off); (*it)->Evaluate(value); std::string filename; int64_t xfer_fd = -1; if (!value.IsStringValue(filename) && value.IsIntegerValue(xfer_fd)) { if (xfer_fd == 0) filename = "_condor_stdout"; if (xfer_fd == 1) filename = "_condor_stderr"; } int fd = next.getNextFD(filename); filesize_t size = -1; int retval; if ((retval = sock.get_file(&size, fd, false, false, remaining, xfer_q)) && (retval != GET_FILE_MAX_BYTES_EXCEEDED)) { error_msg = "Internal error when transferring file " + filename; } else if (size >= 0) { remaining -= max_bytes; file_count++; off += size; } else { error_msg = "Failed to transfer file " + filename; } if (xfer_fd == 0) { stdout_offset = off; //dprintf(D_FULLDEBUG, "New stdout offset: %ld\n", stdout_offset); } else if (xfer_fd == 1) { stderr_offset = off; } else { std::vector<ssize_t>::iterator it4 = offsets.begin(); for (std::vector<std::string>::const_iterator it3 = filenames.begin(); it3 != filenames.end() && it4 != offsets.end(); it3++, it4++) { if (*it3 == filename) *it4 = off; } } } size_t remote_file_count; if (!sock.get(remote_file_count) || !sock.end_of_message()) { error_msg = "Unable to get remote file count."; return false; } if (file_count != remote_file_count) { std::stringstream ss; ss << "Recieved " << file_count << " files, but remote side thought it sent " << remote_file_count << " files"; error_msg = ss.str(); return false; } if ((total_files != file_count) && !error_msg.size()) { error_msg = "At least one file transfer failed."; return false; } return true; }
int part_send_job( int test_starter, char *host, int &reason, char *capability, char * /*schedd*/, PROC *proc, int &sd1, int &sd2, char **name) { int reply; ReliSock *sock = NULL; StartdRec stRec; PORTS ports; bool done = false; int retry_delay = 3; int num_retries = 0; // make sure we have the job classad InitJobAd(proc->id.cluster, proc->id.proc); while( !done ) { Daemon startd(DT_STARTD, host, NULL); if (!(sock = (ReliSock*)startd.startCommand ( ACTIVATE_CLAIM, Stream::reli_sock, 90))) { dprintf( D_ALWAYS, "startCommand(ACTIVATE_CLAIM) to startd failed\n"); goto returnfailure; } // Send the capability ClaimIdParser idp( capability ); dprintf(D_FULLDEBUG, "send capability %s\n", idp.publicClaimId() ); if( !sock->put_secret(capability) ) { dprintf( D_ALWAYS, "sock->put(\"%s\") failed\n",idp.publicClaimId()); goto returnfailure; } // Send the starter number if( test_starter ) { dprintf( D_ALWAYS, "Requesting Alternate Starter %d\n", test_starter ); } else { dprintf( D_ALWAYS, "Requesting Primary Starter\n" ); } if( !sock->code(test_starter) ) { dprintf( D_ALWAYS, "sock->code(%d) failed\n", test_starter ); goto returnfailure; } // Send the job info if( !JobAd->put(*sock) ) { dprintf( D_ALWAYS, "failed to send job ad\n" ); goto returnfailure; } if( !sock->end_of_message() ) { dprintf( D_ALWAYS, "failed to send message to startd\n" ); goto returnfailure; } // We're done sending. Now, get the reply. sock->decode(); if( !sock->code(reply) || !sock->end_of_message() ) { dprintf( D_ALWAYS, "failed to receive reply from startd\n" ); goto returnfailure; } switch( reply ) { case OK: dprintf( D_ALWAYS, "Shadow: Request to run a job was ACCEPTED\n" ); done = true; break; case NOT_OK: dprintf( D_ALWAYS, "Shadow: Request to run a job was REFUSED\n"); goto returnfailure; break; case CONDOR_TRY_AGAIN: num_retries++; dprintf( D_ALWAYS, "Shadow: Request to run a job was TEMPORARILY REFUSED\n" ); if( num_retries > 20 ) { dprintf( D_ALWAYS, "Shadow: Too many retries, giving up.\n" ); goto returnfailure; } delete sock; dprintf( D_ALWAYS, "Shadow: will try again in %d seconds\n", retry_delay ); sleep( retry_delay ); break; default: dprintf(D_ALWAYS,"Unknown reply from startd for command ACTIVATE_CLAIM\n"); dprintf(D_ALWAYS,"Shadow: Request to run a job was REFUSED\n"); goto returnfailure; break; } } /* start flock : dhruba */ sock->decode(); memset( &stRec, '\0', sizeof(stRec) ); if( !sock->code(stRec) || !sock->end_of_message() ) { dprintf(D_ALWAYS, "Can't read reply from startd.\n"); goto returnfailure; } ports = stRec.ports; if( stRec.ip_addr ) { host = stRec.server_name; if(name) { *name = strdup(stRec.server_name); } dprintf(D_FULLDEBUG, "host = %s inet_addr = 0x%x port1 = %d port2 = %d\n", host, stRec.ip_addr,ports.port1, ports.port2 ); } else { dprintf(D_FULLDEBUG, "host = %s port1 = %d port2 = %d\n", host, ports.port1, ports.port2 ); } if( ports.port1 == 0 ) { dprintf( D_ALWAYS, "Shadow: Request to run a job on %s was REFUSED\n", host ); goto returnfailure; } /* end flock ; dhruba */ // We don't use the server_name in the StartdRec, because our // DNS query may fail or may give us the wrong IP address // (either because it's stale or because we're talking to a // machine with multiple network interfaces). Sadly, we can't // use the ip_addr either, because the startd doesn't send it in // the correct byte ordering on little-endian machines. So, we // grab the IP address from the ReliSock, since we konw the // startd always uses the same IP address for all of its // communication. char sinfulstring[SINFUL_STRING_BUF_SIZE]; generate_sinful(sinfulstring, SINFUL_STRING_BUF_SIZE, sock->peer_ip_str(), ports.port1); if( (sd1 = do_connect(sinfulstring, (char *)0, (u_short)ports.port1)) < 0 ) { dprintf( D_ALWAYS, "failed to connect to scheduler on %s\n", sinfulstring ); goto returnfailure; } generate_sinful(sinfulstring, SINFUL_STRING_BUF_SIZE, sock->peer_ip_str(), ports.port2); if( (sd2 = do_connect(sinfulstring, (char *)0, (u_short)ports.port2)) < 0 ) { dprintf( D_ALWAYS, "failed to connect to scheduler on %s\n", sinfulstring ); close(sd1); goto returnfailure; } delete sock; sock = NULL; if ( stRec.server_name ) { free( stRec.server_name ); } return 0; returnfailure: reason = JOB_NOT_STARTED; delete sock; return -1; }