Example #1
0
bool cp_supports_policy(ClassAd& resource, bool strict) {
    // currently, only p-slots can support a functional consumption policy
    if (strict) {
        bool part = false;
        if (!resource.LookupBool(ATTR_SLOT_PARTITIONABLE, part)) part = false;
        if (!part) return false;
    }

    // must support MachineResources attribute
    string mrv;
    if (!resource.LookupString(ATTR_MACHINE_RESOURCES, mrv)) return false;

    // must define ConsumptionXxx for all resources Xxx (including extensible resources)
    StringList alist(mrv.c_str());
    alist.rewind();
    while (char* asset = alist.next()) {
        if (MATCH == strcasecmp(asset, "swap")) continue;
        string ca;
        formatstr(ca, "%s%s", ATTR_CONSUMPTION_PREFIX, asset);
        ClassAd::iterator f(resource.find(ca));
        if (f == resource.end()) return false;
    }

    return true;
}
Example #2
0
//
// JobExit() is called after file transfer.
//
bool DockerProc::JobExit() {
	dprintf( D_ALWAYS, "DockerProc::JobExit()\n" );

	{
	TemporaryPrivSentry sentry(PRIV_ROOT);
	ClassAd dockerAd;
	CondorError error;
	int rv = DockerAPI::inspect( containerName, & dockerAd, error );
	if( rv < 0 ) {
		dprintf( D_ALWAYS | D_FAILURE, "Failed to inspect (for removal) container '%s'.\n", containerName.c_str() );
		return VanillaProc::JobExit();
	}

	bool running;
	if( ! dockerAd.LookupBool( "Running", running ) ) {
		dprintf( D_ALWAYS | D_FAILURE, "Inspection of container '%s' failed to reveal its running state.\n", containerName.c_str() );
		return VanillaProc::JobExit();
	}
	if( running ) {
		dprintf( D_ALWAYS | D_FAILURE, "Inspection reveals that container '%s' is still running.\n", containerName.c_str() );
		return VanillaProc::JobExit();
	}

	rv = DockerAPI::rm( containerName, error );
	if( rv < 0 ) {
		dprintf( D_ALWAYS | D_FAILURE, "Failed to remove container '%s'.\n", containerName.c_str() );
	}
	}

	return VanillaProc::JobExit();
}
Example #3
0
bool
BaseShadow::jobWantsGracefulRemoval()
{
	bool job_wants_graceful_removal = false;
	ClassAd *thejobAd = getJobAd();
	if( thejobAd ) {
		thejobAd->LookupBool( ATTR_WANT_GRACEFUL_REMOVAL, job_wants_graceful_removal );
	}
	return job_wants_graceful_removal;
}
Example #4
0
Starter*
StarterMgr::makeStarter( const char* path )
{
	Starter* new_starter;
	FILE* fp;
	char *args[] = { const_cast<char*>(path),
					 const_cast<char*>("-classad"),
					 NULL };
	char buf[1024];

		// first, try to execute the given path with a "-classad"
		// option, and grab the output as a ClassAd
		// note we run the starter here as root if possible,
		// since that is how the starter will be invoked for real,
		// and the real uid of the starter may influence the
		// list of capabilities the "-classad" option returns.
	{
		TemporaryPrivSentry sentry(PRIV_ROOT);
		fp = my_popenv( args, "r", FALSE );
	}

	if( ! fp ) {
		dprintf( D_ALWAYS, "Failed to execute %s, ignoring\n", path );
		return NULL;
	}
	ClassAd* ad = new ClassAd;
	bool read_something = false;
	while( fgets(buf, 1024, fp) ) {
		read_something = true;
		if( ! ad->Insert(buf) ) {
			dprintf( D_ALWAYS, "Failed to insert \"%s\" into ClassAd, "
					 "ignoring invalid starter\n", buf );
			delete( ad );
			pclose( fp );
			return NULL;
		}
	}
	my_pclose( fp );
	if( ! read_something ) {
		dprintf( D_ALWAYS, 
				 "\"%s -classad\" did not produce any output, ignoring\n", 
				 path ); 
		delete( ad );
		return NULL;
	}

	new_starter = new Starter();
	new_starter->setAd( ad );
	new_starter->setPath( path );
	int is_dc = 0;
	ad->LookupBool( ATTR_IS_DAEMON_CORE, is_dc );
	new_starter->setIsDC( (bool)is_dc );

	return new_starter;
}
Example #5
0
int TransferQueueManager::HandleRequest(int cmd,Stream *stream)
{
	ReliSock *sock = (ReliSock *)stream;
	ASSERT( cmd == TRANSFER_QUEUE_REQUEST );

	ClassAd msg;
	sock->decode();
	if( !getClassAd( sock, msg ) || !sock->end_of_message() ) {
		dprintf(D_ALWAYS,
				"TransferQueueManager: failed to receive transfer request "
				"from %s.\n", sock->peer_description() );
		return FALSE;
	}

	bool downloading = false;
	MyString fname;
	MyString jobid;
	MyString queue_user;
	filesize_t sandbox_size;
	if( !msg.LookupBool(ATTR_DOWNLOADING,downloading) ||
		!msg.LookupString(ATTR_FILE_NAME,fname) ||
		!msg.LookupString(ATTR_JOB_ID,jobid) ||
		!msg.LookupString(ATTR_USER,queue_user) ||
		!msg.LookupInteger(ATTR_SANDBOX_SIZE,sandbox_size))
	{
		MyString msg_str;
		sPrintAd(msg_str, msg);
		dprintf(D_ALWAYS,"TransferQueueManager: invalid request from %s: %s\n",
				sock->peer_description(), msg_str.Value());
		return FALSE;
	}

		// Currently, we just create the client with the default max queue
		// age.  If it becomes necessary to customize the maximum age
		// on a case-by-case basis, it should be easy to adjust.

	TransferQueueRequest *client =
		new TransferQueueRequest(
			sock,
			sandbox_size,
			fname.Value(),
			jobid.Value(),
			queue_user.Value(),
			downloading,
			m_default_max_queue_age);

	if( !AddRequest( client ) ) {
		delete client;
		return KEEP_STREAM; // we have already closed this socket
	}

	return KEEP_STREAM;
}
Example #6
0
bool
DCStarter::createJobOwnerSecSession(int timeout,char const *job_claim_id,char const *starter_sec_session,char const *session_info,MyString &owner_claim_id,MyString &error_msg,MyString &starter_version,MyString &starter_addr)
{
	ReliSock sock;

	if (IsDebugLevel(D_COMMAND)) {
		dprintf (D_COMMAND, "DCStarter::createJobOwnerSecSession(%s,...) making connection to %s\n",
			getCommandStringSafe(CREATE_JOB_OWNER_SEC_SESSION), _addr ? _addr : "NULL");
	}

	if( !connectSock(&sock, timeout, NULL) ) {
		error_msg = "Failed to connect to starter";
		return false;
	}

	if( !startCommand(CREATE_JOB_OWNER_SEC_SESSION, &sock,timeout,NULL,NULL,false,starter_sec_session) ) {
		error_msg = "Failed to send CREATE_JOB_OWNER_SEC_SESSION to starter";
		return false;
	}

	ClassAd input;
	input.Assign(ATTR_CLAIM_ID,job_claim_id);
	input.Assign(ATTR_SESSION_INFO,session_info);

	sock.encode();
	if( !putClassAd(&sock, input) || !sock.end_of_message() ) {
		error_msg = "Failed to compose CREATE_JOB_OWNER_SEC_SESSION to starter";
		return false;
	}

	sock.decode();

	ClassAd reply;
	if( !getClassAd(&sock, reply) || !sock.end_of_message() ) {
		error_msg = "Failed to get response to CREATE_JOB_OWNER_SEC_SESSION from starter";
		return false;
	}

	bool success = false;
	reply.LookupBool(ATTR_RESULT,success);
	if( !success ) {
		reply.LookupString(ATTR_ERROR_STRING,error_msg);
		return false;
	}

	reply.LookupString(ATTR_CLAIM_ID,owner_claim_id);
	reply.LookupString(ATTR_VERSION,starter_version);
		// get the full starter address from the starter in case it contains
		// extra CCB info that we don't already know about
	reply.LookupString(ATTR_STARTER_IP_ADDR,starter_addr);
	return true;
}
Example #7
0
Starter*
StarterMgr::makeStarter( const char* path )
{
	Starter* new_starter;
	FILE* fp;
	char *args[] = { const_cast<char*>(path),
					 const_cast<char*>("-classad"),
					 NULL };
	char buf[1024];

		// first, try to execute the given path with a "-classad"
		// option, and grab the output as a ClassAd
	fp = my_popenv( args, "r", FALSE );

	if( ! fp ) {
		dprintf( D_ALWAYS, "Failed to execute %s, ignoring\n", path );
		return NULL;
	}
	ClassAd* ad = new ClassAd;
	bool read_something = false;
	while( fgets(buf, 1024, fp) ) {
		read_something = true;
		if( ! ad->Insert(buf) ) {
			dprintf( D_ALWAYS, "Failed to insert \"%s\" into ClassAd, "
					 "ignoring invalid starter\n", buf );
			delete( ad );
			pclose( fp );
			return NULL;
		}
	}
	my_pclose( fp );
	if( ! read_something ) {
		dprintf( D_ALWAYS, 
				 "\"%s -classad\" did not produce any output, ignoring\n", 
				 path ); 
		delete( ad );
		return NULL;
	}

	new_starter = new Starter();
	new_starter->setAd( ad );
	new_starter->setPath( path );
	int is_dc = 0;
	ad->LookupBool( ATTR_IS_DAEMON_CORE, is_dc );
	new_starter->setIsDC( (bool)is_dc );

	return new_starter;
}
Example #8
0
bool
BaseShadow::jobWantsGracefulRemoval()
{
	if ( m_force_fast_starter_shutdown ) {
		return false;
	}
	bool job_wants_graceful_removal = param_boolean("GRACEFULLY_REMOVE_JOBS", true);
	bool job_request;
	ClassAd *thejobAd = getJobAd();
	if( thejobAd ) {
		if( thejobAd->LookupBool( ATTR_WANT_GRACEFUL_REMOVAL, job_request ) ) {
			job_wants_graceful_removal = job_request;
		}
	}
	return job_wants_graceful_removal;
}
Example #9
0
void Slot::update(const ClassAd& ad)
{
    MGMT_DECLARATIONS;
    DaemonCollectable::update(ad);
    Slot& m_stats = *this;
    ad.LookupBool(ATTR_SLOT_DYNAMIC,m_stats.DynamicSlot);
    STRING(SlotType);
    upper_case(SlotType);
    STRING(Arch);
    STRING(OpSys);
    STRING(Activity);
    STRING(State);
    INTEGER(Cpus);
    INTEGER(Disk);
    INTEGER(Memory);
    INTEGER(Swap);
    INTEGER(Mips);
    DOUBLE(LoadAvg);
    STRING(Start);
    STRING(FileSystemDomain);
}
Example #10
0
bool DockerProc::JobReaper( int pid, int status ) {
	TemporaryPrivSentry sentry(PRIV_ROOT);
	dprintf( D_ALWAYS, "DockerProc::JobReaper()\n" );

	//
	// This should mean that the container has terminated.
	//
	if( pid == JobPid ) {
		//
		// Even running Docker in attached mode, we have a race condition
		// is exiting when the container exits, not when the docker daemon
		// notices that the container has exited.
		//

		int rv = -1;
		bool running = false;
		ClassAd dockerAd;
		CondorError error;
		// Years of careful research.
		for( int i = 0; i < 20; ++i ) {
			rv = DockerAPI::inspect( containerName, & dockerAd, error );
			if( rv < 0 ) {
				dprintf( D_FULLDEBUG, "Failed to inspect (for removal) container '%s'; sleeping a second (%d already slept) to give Docker a chance to catch up.\n", containerName.c_str(), i );
				sleep( 1 );
				continue;
			}

			if( ! dockerAd.LookupBool( "Running", running ) ) {
				dprintf( D_FULLDEBUG, "Inspection of container '%s' failed to reveal its running state; sleeping a second (%d already slept) to give Docke a chance to catch up.\n", containerName.c_str(), i );
				sleep( 1 );
				continue;
			}

			if( running ) {
				dprintf( D_FULLDEBUG, "Inspection reveals that container '%s' is still running; sleeping a second (%d already slept) to give Docker a chance to catch up.\n", containerName.c_str(), i );
				sleep( 1 );
				continue;
			}

			break;
		}

// FIXME: Move all this shared conditional-checking into a function.

		if( rv < 0 ) {
			dprintf( D_ALWAYS | D_FAILURE, "Failed to inspect (for removal) container '%s'.\n", containerName.c_str() );
			std::string imageName;
			if( ! JobAd->LookupString( ATTR_DOCKER_IMAGE, imageName ) ) {
				dprintf( D_ALWAYS | D_FAILURE, "%s not defined in job ad.\n", ATTR_DOCKER_IMAGE );
				imageName = "Unknown"; // shouldn't ever happen
			}

			std::string message;
			formatstr(message, "Cannot start container: invalid image name: %s", imageName.c_str());

			Starter->jic->holdJob(message.c_str(), CONDOR_HOLD_CODE_InvalidDockerImage, 0);
			return VanillaProc::JobReaper( pid, status );
		}

		if( ! dockerAd.LookupBool( "Running", running ) ) {
			dprintf( D_ALWAYS | D_FAILURE, "Inspection of container '%s' failed to reveal its running state.\n", containerName.c_str() );
			return VanillaProc::JobReaper( pid, status );
		}

		if( running ) {
			dprintf( D_ALWAYS | D_FAILURE, "Inspection reveals that container '%s' is still running.\n", containerName.c_str() );
			return VanillaProc::JobReaper( pid, status );
		}

		// FIXME: Rethink returning a classad.  Having to check for missing
		// attributes blows.

		// TODO: Set status appropriately (as if it were from waitpid()).
		std::string oomkilled;
		if (! dockerAd.LookupString( "OOMKilled", oomkilled)) {
			dprintf( D_ALWAYS | D_FAILURE, "Inspection of container '%s' failed to reveal whether it was OOM killed. Assuming it was not.\n", containerName.c_str() );
		}
		
		if (oomkilled.find("true") == 0) {
			ClassAd *machineAd = Starter->jic->machClassAd();
			int memory;
			machineAd->LookupInteger(ATTR_MEMORY, memory);
			std::string message;
			formatstr(message, "Docker job exhaused %d Mb memory", memory);
			dprintf(D_ALWAYS, "%s, going on hold\n", message.c_str());

			
			Starter->jic->holdJob(message.c_str(), CONDOR_HOLD_CODE_JobOutOfResources, 0);
			DockerAPI::rm( containerName, error );

			if ( Starter->Hold( ) ) {
				Starter->allJobsDone();
				this->JobExit();
			}

			Starter->ShutdownFast();
			return 0;
		}

			// See if docker could not run the job
			// most likely invalid executable
		std::string dockerError;
		if (! dockerAd.LookupString( "DockerError", dockerError)) {
			dprintf( D_ALWAYS | D_FAILURE, "Inspection of container '%s' failed to reveal whether there was an internal docker error.\n", containerName.c_str() );
		}

		if (dockerError.length() > 0) {
			std::string message;
			formatstr(message, "Error running docker job: %s", dockerError.c_str());
			dprintf(D_ALWAYS, "%s, going on hold\n", message.c_str());

			
			Starter->jic->holdJob(message.c_str(), CONDOR_HOLD_CODE_FailedToCreateProcess, 0);
			DockerAPI::rm( containerName, error );

			if ( Starter->Hold( ) ) {
				Starter->allJobsDone();
				this->JobExit();
			}

			Starter->ShutdownFast();
			return 0;
		} 
		
		int dockerStatus;
		if( ! dockerAd.LookupInteger( "ExitCode", dockerStatus ) ) {
			dprintf( D_ALWAYS | D_FAILURE, "Inspection of container '%s' failed to reveal its exit code.\n", containerName.c_str() );
			return VanillaProc::JobReaper( pid, status );
		}
		dprintf( D_FULLDEBUG, "Setting status of Docker job to %d.\n", dockerStatus );
		status = dockerStatus;

		// TODO: Record final job usage.

		// We don't have to do any process clean-up, because container.
		// We'll do the disk clean-up after we've transferred files.
	}

	// This helps to make ssh-to-job more plausible.
	return VanillaProc::JobReaper( pid, status );
}
Example #11
0
bool DCStarter::startSSHD(char const *known_hosts_file,char const *private_client_key_file,char const *preferred_shells,char const *slot_name,char const *ssh_keygen_args,ReliSock &sock,int timeout,char const *sec_session_id,MyString &remote_user,MyString &error_msg,bool &retry_is_sensible)
{

	retry_is_sensible = false;

#ifndef HAVE_SSH_TO_JOB
	error_msg = "This version of Condor does not support ssh key exchange.";
	return false;
#else
	if( !connectSock(&sock, timeout, NULL) ) {
		error_msg = "Failed to connect to starter";
		return false;
	}

	if( !startCommand(START_SSHD, &sock,timeout,NULL,NULL,false,sec_session_id) ) {
		error_msg = "Failed to send START_SSHD to starter";
		return false;
	}

	ClassAd input;

	if( preferred_shells && *preferred_shells ) {
		input.Assign(ATTR_SHELL,preferred_shells);
	}

	if( slot_name && *slot_name ) {
			// This is a little silly.
			// We are telling the remote side the name of the slot so
			// that it can put it in the welcome message.
		input.Assign(ATTR_NAME,slot_name);
	}

	if( ssh_keygen_args && *ssh_keygen_args ) {
		input.Assign(ATTR_SSH_KEYGEN_ARGS,ssh_keygen_args);
	}

	sock.encode();
	if( !putClassAd(&sock, input) || !sock.end_of_message() ) {
		error_msg = "Failed to send START_SSHD request to starter";
		return false;
	}

	ClassAd result;
	sock.decode();
	if( !getClassAd(&sock, result) || !sock.end_of_message() ) {
		error_msg = "Failed to read response to START_SSHD from starter";
		return false;
	}

	bool success = false;
	result.LookupBool(ATTR_RESULT,success);
	if( !success ) {
		std::string remote_error_msg;
		result.LookupString(ATTR_ERROR_STRING,remote_error_msg);
		error_msg.formatstr("%s: %s",slot_name,remote_error_msg.c_str());
		retry_is_sensible = false;
		result.LookupBool(ATTR_RETRY,retry_is_sensible);
		return false;
	}

	result.LookupString(ATTR_REMOTE_USER,remote_user);

	std::string public_server_key;
	if( !result.LookupString(ATTR_SSH_PUBLIC_SERVER_KEY,public_server_key) ) {
		error_msg = "No public ssh server key received in reply to START_SSHD";
		return false;
	}
	std::string private_client_key;
	if( !result.LookupString(ATTR_SSH_PRIVATE_CLIENT_KEY,private_client_key) ) {
		error_msg = "No ssh client key received in reply to START_SSHD";
		return false;
	}


		// store the private client key
	unsigned char *decode_buf = NULL;
	int length = -1;
	condor_base64_decode(private_client_key.c_str(),&decode_buf,&length);
	if( !decode_buf ) {
		error_msg = "Error decoding ssh client key.";
		return false;
	}
	FILE *fp = safe_fcreate_fail_if_exists(private_client_key_file,"a",0400);
	if( !fp ) {
		error_msg.formatstr("Failed to create %s: %s",
						  private_client_key_file,strerror(errno));
		free( decode_buf );
		return false;
	}
	if( fwrite(decode_buf,length,1,fp)!=1 ) {
		error_msg.formatstr("Failed to write to %s: %s",
						  private_client_key_file,strerror(errno));
		fclose( fp );
		free( decode_buf );
		return false;
	}
	if( fclose(fp)!=0 ) {
		error_msg.formatstr("Failed to close %s: %s",
						  private_client_key_file,strerror(errno));
		free( decode_buf );
		return false;
	}
	fp = NULL;
	free( decode_buf );
	decode_buf = NULL;


		// store the public server key in the known_hosts file
	length = -1;
	condor_base64_decode(public_server_key.c_str(),&decode_buf,&length);
	if( !decode_buf ) {
		error_msg = "Error decoding ssh server key.";
		return false;
	}
	fp = safe_fcreate_fail_if_exists(known_hosts_file,"a",0600);
	if( !fp ) {
		error_msg.formatstr("Failed to create %s: %s",
						  known_hosts_file,strerror(errno));
		free( decode_buf );
		return false;
	}

		// prepend a host name pattern (*) to the public key to make a valid
		// record in the known_hosts file
	fprintf(fp,"* ");

	if( fwrite(decode_buf,length,1,fp)!=1 ) {
		error_msg.formatstr("Failed to write to %s: %s",
						  known_hosts_file,strerror(errno));
		fclose( fp );
		free( decode_buf );
		return false;
	}

	if( fclose(fp)!=0 ) {
		error_msg.formatstr("Failed to close %s: %s",
						  known_hosts_file,strerror(errno));
		free( decode_buf );
		return false;
	}
	fp = NULL;
	free( decode_buf );
	decode_buf = NULL;

	return true;
#endif
}
Example #12
0
bool 
OfflineCollectorPlugin::expire ( 
	ClassAd &ad )
{
	classad::Value result;
	bool val;

	dprintf (
		D_FULLDEBUG,
		"In OfflineCollectorPlugin::expire()\n" );

	/* bail out if the plug-in is not enabled, or if no ABSENT_REQUIREMENTS
	   have been defined */
	if ( !enabled() || !AbsentReq ) {
		return false;	// return false tells collector to delete this ad
	}

	/* for now, if the ad is of any type other than a startd ad, bail out. currently
	   absent ads only supported for ads of type Machine, because our offline storage
	   assumes that. */
	if ( strcmp(GetMyTypeName(ad),STARTD_ADTYPE) ) {
		return false;	// return false tells collector to delete this ad
	}
	/*	The ad may be a STARTD_PVT_ADTYPE, even though GetMyTypeName() claims 
		it is a STARTD_ADTYPE. Sigh. This is because the startd sends private 
		ads w/ the wrong type, because the query object queries private ads w/ the 
		wrong type. If I were to fix the startd to label private ads with the proper
		type, an incompatibility between startd/negotiator would have to be dealt with.
		So here we try to distinguish if this ad is really a STARTD_PVT_ADTYPE by seeing
		if a Capability attr is present and a State attr is not present. */
	if ( ad.Lookup(ATTR_CAPABILITY) && !ad.Lookup(ATTR_STATE) ) {
		// looks like a private ad, we don't want to store these
		return false;	// return false tells collector to delete this ad
	}


	/* If the ad is alraedy has ABSENT=True and it is expiring, then
	   let it be deleted as in this case it already sat around absent
	   for the absent lifetime. */
	bool already_absent = false;
	ad.LookupBool(ATTR_ABSENT,already_absent);
	if (already_absent) {
		MyString s;
		const char *key = makeOfflineKey(ad,s);
		if (key) {
			persistentRemoveAd(key);
		}
		return false; // return false tells collector to delete this ad
	}

	/* Test is ad against the absent requirements expression, and
	   mark the ad absent if true */
	if (EvalExprTree(AbsentReq,&ad,NULL,result) &&
		result.IsBooleanValue(val) && val) 
	{
		int lifetime, timestamp;

		lifetime = param_integer ( 
			"ABSENT_EXPIRE_ADS_AFTER",
			60 * 60 * 24 * 30 );	// default expire absent ads in a month		
		if ( lifetime == 0 ) lifetime = INT_MAX; // 0 means forever

		ad.Assign ( ATTR_ABSENT, true );
		ad.Assign ( ATTR_CLASSAD_LIFETIME, lifetime );
		timestamp = time(NULL);
		ad.Assign(ATTR_LAST_HEARD_FROM, timestamp);
		ad.Assign ( ATTR_MY_CURRENT_TIME, timestamp );
		persistentStoreAd(NULL,ad);
		// if we marked this ad as absent, we want to keep it in the collector
		return true;	// return true tells the collector to KEEP this ad
	}

	return false;	// return false tells collector to delete this ad
}
Example #13
0
void
CCBServer::HandleRequestResultsMsg( CCBTarget *target )
{
		// Reply from target daemon about whether it succeeded in
		// connecting to the requested client.

	Sock *sock = target->getSock();

	ClassAd msg;
	sock->decode();
	if( !msg.initFromStream( *sock ) || !sock->end_of_message() ) {
			// disconnect
		dprintf(D_FULLDEBUG,
				"CCB: received disconnect from target daemon %s "
				"with ccbid %lu.\n",
				sock->peer_description(), target->getCCBID() );
		RemoveTarget( target );
		return;
	}

	int command = 0;
	if( msg.LookupInteger( ATTR_COMMAND, command ) && command == ALIVE ) {
		SendHeartbeatResponse( target );
		return;
	}

	target->decPendingRequestResults();

	bool success = false;
	MyString error_msg;
	MyString reqid_str;
	CCBID reqid;
	MyString connect_id;
	msg.LookupBool( ATTR_RESULT, success );
	msg.LookupString( ATTR_ERROR_STRING, error_msg );
	msg.LookupString( ATTR_REQUEST_ID, reqid_str );
	msg.LookupString( ATTR_CLAIM_ID, connect_id );

	if( !CCBIDFromString( reqid, reqid_str.Value() ) ) {
		MyString msg_str;
		msg.sPrint(msg_str);
		dprintf(D_ALWAYS,
				"CCB: received reply from target daemon %s with ccbid %lu "
				"without a valid request id: %s\n",
				sock->peer_description(),
				target->getCCBID(),
				msg_str.Value());
		RemoveTarget( target );
		return;
	}

	CCBServerRequest *request = GetRequest( reqid );
	if( request && request->getSock()->readReady() ) {
		// Request socket must have just closed.  To avoid noise in
		// logs when we fail to write to it, delete the request now.
		RemoveRequest( request );
		request = NULL;
	}

	char const *request_desc = "(client which has gone away)";
	if( request ) {
		request_desc = request->getSock()->peer_description();
	}

	if( success ) {
		dprintf(D_FULLDEBUG,"CCB: received 'success' from target daemon %s "
				"with ccbid %lu for "
				"request %s from %s.\n",
				sock->peer_description(),
				target->getCCBID(),
				reqid_str.Value(),
				request_desc);
	}
	else {
		dprintf(D_FULLDEBUG,"CCB: received error from target daemon %s "
				"with ccbid %lu for "
				"request %s from %s: %s\n",
				sock->peer_description(),
				target->getCCBID(),
				reqid_str.Value(),
				request_desc,
				error_msg.Value());
	}

	if( !request ) {
		if( success ) {
				// expected: the client has gone away; it got what it wanted
			return;
		}
		dprintf( D_FULLDEBUG,
				 "CCB: client for request %s to target daemon %s with ccbid "
				 "%lu disappeared before receiving error details.\n",
				 reqid_str.Value(),
				 sock->peer_description(),
				 target->getCCBID());
		return;
	}
	if( connect_id != request->getConnectID() ) {
		MyString msg_str;
		msg.sPrint(msg_str);
		dprintf( D_FULLDEBUG,
				 "CCB: received wrong connect id (%s) from target daemon %s "
				 "with ccbid %lu for "
				 "request %s\n",
				 connect_id.Value(),
				 sock->peer_description(),
				 target->getCCBID(),
				 reqid_str.Value());
		RemoveTarget( target );
		return;
	}

	RequestFinished( request, success, error_msg.Value() );
}
Example #14
0
bool DCSchedd::getJobConnectInfo(
	PROC_ID jobid,
	int subproc,
	char const *session_info,
	int timeout,
	CondorError *errstack,
	MyString &starter_addr,
	MyString &starter_claim_id,
	MyString &starter_version,
	MyString &slot_name,
	MyString &error_msg,
	bool &retry_is_sensible,
	int &job_status,
	MyString &hold_reason)
{
	ClassAd input;
	ClassAd output;

	input.Assign(ATTR_CLUSTER_ID,jobid.cluster);
	input.Assign(ATTR_PROC_ID,jobid.proc);
	if( subproc != -1 ) {
		input.Assign(ATTR_SUB_PROC_ID,subproc);
	}
	input.Assign(ATTR_SESSION_INFO,session_info);

	ReliSock sock;
	if( !connectSock(&sock,timeout,errstack) ) {
		error_msg = "Failed to connect to schedd";
		dprintf( D_ALWAYS, "%s\n",error_msg.Value());
		return false;
	}

	if( !startCommand(GET_JOB_CONNECT_INFO, &sock, timeout, errstack) ) {
		error_msg = "Failed to send GET_JOB_CONNECT_INFO to schedd";
		dprintf( D_ALWAYS, "%s\n",error_msg.Value());
		return false;
	}

	if( !forceAuthentication(&sock, errstack) ) {
		error_msg = "Failed to authenticate";
		dprintf( D_ALWAYS, "%s\n",error_msg.Value());
		return false;
	}

	sock.encode();
	if( !putClassAd(&sock, input) || !sock.end_of_message() ) {
		error_msg = "Failed to send GET_JOB_CONNECT_INFO to schedd";
		dprintf( D_ALWAYS, "%s\n",error_msg.Value());
		return false;
	}

	sock.decode();
	if( !getClassAd(&sock, output) || !sock.end_of_message() ) {
		error_msg = "Failed to get response from schedd";
		dprintf( D_ALWAYS, "%s\n",error_msg.Value());
		return false;
	}

	if( IsFulldebug(D_FULLDEBUG) ) {
		std::string adstr;
		sPrintAd(adstr, output, true);
		dprintf(D_FULLDEBUG,"Response for GET_JOB_CONNECT_INFO:\n%s\n",
				adstr.c_str());
	}

	bool result=false;
	output.LookupBool(ATTR_RESULT,result);

	if( !result ) {
		output.LookupString(ATTR_HOLD_REASON,hold_reason);
		output.LookupString(ATTR_ERROR_STRING,error_msg);
		retry_is_sensible = false;
		output.LookupBool(ATTR_RETRY,retry_is_sensible);
		output.LookupInteger(ATTR_JOB_STATUS,job_status);
	}
	else {
		output.LookupString(ATTR_STARTER_IP_ADDR,starter_addr);
		output.LookupString(ATTR_CLAIM_ID,starter_claim_id);
		output.LookupString(ATTR_VERSION,starter_version);
		output.LookupString(ATTR_REMOTE_HOST,slot_name);
	}

	return result;
}