Exemple #1
0
// when a transferd registers itself, it identifies who it is. The connection
// is then held open and the schedd periodically might send more transfer
// requests to the transferd. Also, if the transferd dies, the schedd is 
// informed quickly and reliably due to the closed connection.
bool
DCTransferD::setup_treq_channel(ReliSock **treq_sock_ptr, 
	int timeout, CondorError *errstack) 
{
	ReliSock *rsock;

	if (treq_sock_ptr != NULL) {
		// Our caller wants a pointer to the socket we used to succesfully
		// register the claim. The NULL pointer will represent failure and
		// this will only be set to something real if everything was ok.
		*treq_sock_ptr = NULL;
	}

	
	/////////////////////////////////////////////////////////////////////////
	// Connect to the transfer daemon
	/////////////////////////////////////////////////////////////////////////

	// This call with automatically connect to _addr, which was set in the
	// constructor of this object to be the transferd in question.
	rsock = (ReliSock*)startCommand(TRANSFERD_CONTROL_CHANNEL,
		Stream::reli_sock, timeout, errstack);

	if( ! rsock ) {
		dprintf( D_ALWAYS, "DCTransferD::setup_treq_channel: "
				 "Failed to send command (TRANSFERD_CONTROL_CHANNEL) "
				 "to the schedd\n" );
		errstack->push("DC_TRANSFERD", 1, 
			"Failed to start a TRANSFERD_CONTROL_CHANNEL command.");
		return false;
	}

	/////////////////////////////////////////////////////////////////////////
	// Make sure we are authenticated.
	/////////////////////////////////////////////////////////////////////////

		// First, if we're not already authenticated, force that now. 
	if (!forceAuthentication( rsock, errstack )) {
		dprintf( D_ALWAYS, "DCTransferD::setup_treq_channel() authentication "
				"failure: %s\n", errstack->getFullText().c_str() );
		errstack->push("DC_TRANSFERD", 1, 
			"Failed to authenticate properly.");
		return false;
	}

	rsock->encode();

	/////////////////////////////////////////////////////////////////////////
	// At this point, the socket passed all of the authentication protocols
	// so it is ready for use.
	/////////////////////////////////////////////////////////////////////////

	if (treq_sock_ptr)
		*treq_sock_ptr = rsock;

	return true;
}
Exemple #2
0
// download the files associated with the jobads to the sandbox at td_sinful
// with the supplied capability.
// The work_ad should contain:
//	ATTR_TREQ_CAPABILITY
//	ATTR_TREQ_FTP
//	ATTR_TREQ_JOBID_ALLOW_LIST
bool 
DCTransferD::download_job_files(ClassAd *work_ad, CondorError * errstack)
{
	ReliSock *rsock = NULL;
	int timeout = 60 * 60 * 8; // transfers take a long time...
	int i;
	ClassAd reqad, respad;
	std::string cap;
	int ftp;
	int invalid;
	int protocol;
	std::string reason;
	int num_transfers;
	ClassAd jad;
	const char *lhstr = NULL;
	ExprTree *tree = NULL;

	//////////////////////////////////////////////////////////////////////////
	// Connect to the transferd and authenticate
	//////////////////////////////////////////////////////////////////////////

	// This call with automatically connect to _addr, which was set in the
	// constructor of this object to be the transferd in question.
	rsock = (ReliSock*)startCommand(TRANSFERD_READ_FILES, Stream::reli_sock,
		timeout, errstack);
	if( ! rsock ) {
		dprintf( D_ALWAYS, "DCTransferD::download_job_files: "
				 "Failed to send command (TRANSFERD_READ_FILES) "
				 "to the schedd\n" );
		errstack->push("DC_TRANSFERD", 1, 
			"Failed to start a TRANSFERD_READ_FILES command.");
		return false;
	}

		// First, if we're not already authenticated, force that now. 
	if (!forceAuthentication( rsock, errstack )) {
		dprintf( D_ALWAYS, "DCTransferD::download_job_files() authentication "
				"failure: %s\n", errstack->getFullText().c_str() );
		errstack->push("DC_TRANSFERD", 1, 
			"Failed to authenticate properly.");
		return false;
	}

	rsock->encode();

	//////////////////////////////////////////////////////////////////////////
	// Query the transferd about the capability/protocol and see if I can 
	// download my files. It will respond with a classad saying good or bad.
	//////////////////////////////////////////////////////////////////////////

	work_ad->LookupString(ATTR_TREQ_CAPABILITY, cap);
	work_ad->LookupInteger(ATTR_TREQ_FTP, ftp);

	reqad.Assign(ATTR_TREQ_CAPABILITY, cap);
	reqad.Assign(ATTR_TREQ_FTP, ftp);

	// This request ad to the transferd should contain:
	//	ATTR_TREQ_CAPABILITY
	//	ATTR_TREQ_FTP
	reqad.put(*rsock);
	rsock->end_of_message();

	rsock->decode();

	// This response ad from the transferd should contain:
	// ATTR_TREQ_INVALID_REQUEST (set to true)
	// ATTR_TREQ_INVALID_REASON
	//
	// OR
	//
	//	ATTR_TREQ_INVALID_REQUEST (set to false)
	//	ATTR_TREQ_NUM_TRANSFERS
	//
	respad.initFromStream(*rsock);
	rsock->end_of_message();

	respad.LookupInteger(ATTR_TREQ_INVALID_REQUEST, invalid);
	
	if (invalid == TRUE) {
		// The transferd rejected my attempt to upload the fileset
		delete rsock;
		respad.LookupString(ATTR_TREQ_INVALID_REASON, reason);
		errstack->push("DC_TRANSFERD", 1, reason.c_str());
		return false;
	}

	respad.LookupInteger(ATTR_TREQ_NUM_TRANSFERS, num_transfers);

	//////////////////////////////////////////////////////////////////////////
	// Based upon the protocol I've chosen, use that method to download the
	// files. When using the FileTrans protocol, a child process on the
	// transferd side will be sending me individual job ads and then 
	// instantiating a filetransfer object for that ad.
	//////////////////////////////////////////////////////////////////////////

	dprintf(D_ALWAYS, "Receiving fileset");
	work_ad->LookupInteger(ATTR_TREQ_FTP, protocol);
	switch(protocol) {
		case FTP_CFTP: // download the files using the FileTransfer Object
			for (i = 0; i < num_transfers; i++) {

				// Grab a job ad the server is sending us so we know what
				// to receive.
				jad.initFromStream(*rsock);
				rsock->end_of_message();

				// translate the job ad by replacing the 
				// saved SUBMIT_ attributes so the download goes into the
				// correct place.
				jad.ResetExpr();
				while( jad.NextExpr(lhstr, tree) ) {
					if ( lhstr && strncasecmp("SUBMIT_",lhstr,7)==0 ) {
							// this attr name starts with SUBMIT_
							// compute new lhs (strip off the SUBMIT_)
						const char *new_attr_name = strchr(lhstr,'_');
						ExprTree * pTree;
						ASSERT(new_attr_name);
						new_attr_name++;
							// insert attribute
						pTree = tree->Copy();
						jad.Insert(new_attr_name, pTree, false);
					}
				}	// while next expr
		
				// instantiate a filetransfer object and have it accept the
				// files.
				FileTransfer ftrans;
				if ( !ftrans.SimpleInit(&jad, false, false, rsock) )
				{
					delete rsock;
					errstack->push("DC_TRANSFERD", 1, 
						"Failed to initate uploading of files.");
					return false;
				}

				// We want files to be copied to their final places, so apply
				// any filename remaps when downloading.
				if ( !ftrans.InitDownloadFilenameRemaps(&jad) ) {
					return false;
				}

				ftrans.setPeerVersion( version() );

				if ( !ftrans.DownloadFiles() ) {
					delete rsock;
					errstack->push("DC_TRANSFERD", 1, 
						"Failed to download files.");
					return false;
				}

				dprintf(D_ALWAYS | D_NOHEADER, ".");
			}	
			rsock->end_of_message();

			dprintf(D_ALWAYS | D_NOHEADER, "\n");

			break;

		default:
			// Bail due to user error. This client doesn't support the unknown
			// protocol.

			delete rsock;
			errstack->push("DC_TRANSFERD", 1, 
				"Unknown file transfer protocol selected.");
			return false;
			break;
	}

	//////////////////////////////////////////////////////////////////////////
	// Get the response from the transferd once it sees a completed 
	// movement of files to the child process.
	//////////////////////////////////////////////////////////////////////////

	rsock->decode();
	respad.initFromStream(*rsock);
	rsock->end_of_message();

	// close up shop
	delete rsock;

	respad.LookupInteger(ATTR_TREQ_INVALID_REQUEST, invalid);
	if ( invalid == TRUE ) {
		respad.LookupString(ATTR_TREQ_INVALID_REASON, reason);
		errstack->push("DC_TRANSFERD", 1, reason.c_str());
		return false;
	}

	return true;
}
Exemple #3
0
bool 
DCSchedd::spoolJobFiles(int JobAdsArrayLen, ClassAd* JobAdsArray[], CondorError * errstack)
{
	int reply;
	int i;
	ReliSock rsock;
	bool use_new_command = true;

	if ( version() ) {
		CondorVersionInfo vi( version() );
		if ( vi.built_since_version(6,7,7) ) {
			use_new_command = true;
		} else {
			use_new_command = false;
		}
	}

		// // // // // // // //
		// On the wire protocol
		// // // // // // // //

	rsock.timeout(20);   // years of research... :)
	if( ! rsock.connect(_addr) ) {
		std::string errmsg;
		formatstr(errmsg, "Failed to connect to schedd (%s)", _addr);

		dprintf( D_ALWAYS, "DCSchedd::spoolJobFiles: %s\n", errmsg.c_str() );

		if( errstack ) {
			errstack->push(
				"DCSchedd::spoolJobFiles",CEDAR_ERR_CONNECT_FAILED,
				errmsg.c_str() );
		}
		return false;
	}
	if ( use_new_command ) {
		if( ! startCommand(SPOOL_JOB_FILES_WITH_PERMS, (Sock*)&rsock, 0,
						   errstack) ) {

			dprintf( D_ALWAYS, "DCSchedd::spoolJobFiles: "
				"Failed to send command (SPOOL_JOB_FILES_WITH_PERMS) "
				"to the schedd (%s)\n", _addr );

			return false;
		}
	} else {
		if( ! startCommand(SPOOL_JOB_FILES, (Sock*)&rsock, 0, errstack) ) {
			dprintf( D_ALWAYS, "DCSchedd::spoolJobFiles: "
					 "Failed to send command (SPOOL_JOB_FILES) "
					 "to the schedd (%s)\n", _addr );

			return false;
		}
	}

		// First, if we're not already authenticated, force that now. 
	if (!forceAuthentication( &rsock, errstack )) {
		dprintf( D_ALWAYS, "DCSchedd: authentication failure: %s\n",
				 errstack ? errstack->getFullText().c_str() : "" );
		return false;
	}

	rsock.encode();

		// Send our version if using the new command
	if ( use_new_command ) {
			// Need to use a named variable, else the wrong version of	
			// code() is called.
		char *my_version = strdup( CondorVersion() );
		if ( !rsock.code(my_version) ) {
			dprintf(D_ALWAYS,"DCSchedd:spoolJobFiles: "
					"Can't send version string to the schedd\n");
			free( my_version );
			return false;
		}
		free( my_version );
	}

		// Send the number of jobs
	if ( !rsock.code(JobAdsArrayLen) ) {
		dprintf(D_ALWAYS,"DCSchedd:spoolJobFiles: "
				"Can't send JobAdsArrayLen to the schedd\n");
		return false;
	}

	if( !rsock.end_of_message() ) {
		std::string errmsg;
		formatstr(errmsg,
				"Can't send initial message (version + count) to schedd (%s)",
				_addr);

		dprintf(D_ALWAYS,"DCSchedd:spoolJobFiles: %s\n", errmsg.c_str());

		if( errstack ) {
			errstack->push(
				"DCSchedd::spoolJobFiles",
				CEDAR_ERR_EOM_FAILED,
				errmsg.c_str());
		}
		return false;
	}

		// Now, put the job ids onto the wire
	PROC_ID jobid;
	for (i=0; i<JobAdsArrayLen; i++) {
		if (!JobAdsArray[i]->LookupInteger(ATTR_CLUSTER_ID,jobid.cluster)) {
			dprintf(D_ALWAYS,"DCSchedd:spoolJobFiles: "
					"Job ad %d did not have a cluster id\n",i);
			return false;
		}
		if (!JobAdsArray[i]->LookupInteger(ATTR_PROC_ID,jobid.proc)) {
			dprintf(D_ALWAYS,"DCSchedd:spoolJobFiles: "
					"Job ad %d did not have a proc id\n",i);
			return false;
		}
		rsock.code(jobid);
	}

	if( !rsock.end_of_message() ) {
		std::string errmsg;
		formatstr(errmsg, "Failed while sending job ids to schedd (%s)", _addr);

		dprintf(D_ALWAYS,"DCSchedd:spoolJobFiles: %s\n", errmsg.c_str());

		if( errstack ) {
			errstack->push(
				"DCSchedd::spoolJobFiles",
				CEDAR_ERR_EOM_FAILED,
				errmsg.c_str());
		}
		return false;
	}

		// Now send all the files via the file transfer object
	for (i=0; i<JobAdsArrayLen; i++) {
		FileTransfer ftrans;
		if ( !ftrans.SimpleInit(JobAdsArray[i], false, false, &rsock, PRIV_UNKNOWN, false, true) ) {
			if( errstack ) {
				int cluster = -1, proc = -1;
				if(JobAdsArray[i]) {
					JobAdsArray[i]->LookupInteger(ATTR_CLUSTER_ID,cluster);
					JobAdsArray[i]->LookupInteger(ATTR_PROC_ID,proc);
				}
				errstack->pushf(
					"DCSchedd::spoolJobFiles",
					FILETRANSFER_INIT_FAILED,
					"File transfer initialization failed for target job %d.%d",
					cluster, proc );
			}
			return false;
		}
		if ( use_new_command ) {
			ftrans.setPeerVersion( version() );
		}
		if ( !ftrans.UploadFiles(true,false) ) {
			if( errstack ) {
				FileTransfer::FileTransferInfo ft_info = ftrans.GetInfo();

				int cluster = -1, proc = -1;
				if(JobAdsArray[i]) {
					JobAdsArray[i]->LookupInteger(ATTR_CLUSTER_ID,cluster);
					JobAdsArray[i]->LookupInteger(ATTR_PROC_ID,proc);
				}
				errstack->pushf(
					"DCSchedd::spoolJobFiles",
					FILETRANSFER_UPLOAD_FAILED,
					"File transfer failed for target job %d.%d: %s",
					cluster, proc, ft_info.error_desc.Value() );
			}
			return false;
		}
	}	
		
		
	rsock.end_of_message();

	rsock.decode();

	reply = 0;
	rsock.code(reply);
	rsock.end_of_message();

	if ( reply == 1 ) 
		return true;
	else
		return false;
}
Exemple #4
0
// upload the files associated with the jobads to the sandbox at td_sinful
// with the supplied capability.
// The work_ad should contain:
//	ATTR_TREQ_CAPABILITY
//	ATTR_TREQ_FTP
//	ATTR_TREQ_JOBID_ALLOW_LIST
bool 
DCTransferD::upload_job_files(int JobAdsArrayLen, ClassAd* JobAdsArray[], 
		ClassAd *work_ad, CondorError * errstack)
{
	ReliSock *rsock = NULL;
	int timeout = 60 * 60 * 8; // transfers take a long time...
	int i;
	ClassAd reqad, respad;
	std::string cap;
	int ftp;
	int invalid;
	int protocol;
	std::string reason;

	//////////////////////////////////////////////////////////////////////////
	// Connect to the transferd and authenticate
	//////////////////////////////////////////////////////////////////////////

	// This call with automatically connect to _addr, which was set in the
	// constructor of this object to be the transferd in question.
	rsock = (ReliSock*)startCommand(TRANSFERD_WRITE_FILES, Stream::reli_sock,
		timeout, errstack);
	if( ! rsock ) {
		dprintf( D_ALWAYS, "DCTransferD::upload_job_files: "
				 "Failed to send command (TRANSFERD_WRITE_FILES) "
				 "to the schedd\n" );
		errstack->push("DC_TRANSFERD", 1, 
			"Failed to start a TRANSFERD_WRITE_FILES command.");
		return false;
	}

		// First, if we're not already authenticated, force that now. 
	if (!forceAuthentication( rsock, errstack )) {
		dprintf( D_ALWAYS, "DCTransferD::upload_job_files() authentication "
				"failure: %s\n", errstack->getFullText().c_str() );
		errstack->push("DC_TRANSFERD", 1, 
			"Failed to authenticate properly.");
		return false;
	}

	rsock->encode();

	//////////////////////////////////////////////////////////////////////////
	// Query the transferd about the capability/protocol and see if I can 
	// upload my files. It will respond with a classad saying good or bad.
	//////////////////////////////////////////////////////////////////////////

	work_ad->LookupString(ATTR_TREQ_CAPABILITY, cap);
	work_ad->LookupInteger(ATTR_TREQ_FTP, ftp);

	reqad.Assign(ATTR_TREQ_CAPABILITY, cap);
	reqad.Assign(ATTR_TREQ_FTP, ftp);

	// This request ad to the transferd should contain:
	//	ATTR_TREQ_CAPABILITY
	//	ATTR_TREQ_FTP
	reqad.put(*rsock);
	rsock->end_of_message();

	rsock->decode();

	// This response ad to the transferd should contain:
	// ATTR_TREQ_INVALID_REQUEST (set to true)
	// ATTR_TREQ_INVALID_REASON
	//
	// OR
	//
	//	ATTR_TREQ_INVALID_REQUEST (set to false)
	respad.initFromStream(*rsock);
	rsock->end_of_message();

	respad.LookupInteger(ATTR_TREQ_INVALID_REQUEST, invalid);
	
	if (invalid == TRUE) {
		// The transferd rejected my attempt to upload the fileset
		delete rsock;
		respad.LookupString(ATTR_TREQ_INVALID_REASON, reason);
		errstack->push("DC_TRANSFERD", 1, reason.c_str());
		return false;
	}

	//////////////////////////////////////////////////////////////////////////
	// Sort the job ads array into numerically sorted order. The transferd
	// must do the same.
	//////////////////////////////////////////////////////////////////////////

	// TODO

	//////////////////////////////////////////////////////////////////////////
	// Based upon the protocol I've chosen, use that method to upload the
	// files. When using the FileTrans protocol, a child process on the
	// transferd side will be inheriting a socket and accepting the
	// FileTransfer object's protocol.
	//////////////////////////////////////////////////////////////////////////

	// XXX Fix to only send jobads for allowed jobs

	dprintf(D_ALWAYS, "Sending fileset");
	work_ad->LookupInteger(ATTR_TREQ_FTP, protocol);
	switch(protocol) {
		case FTP_CFTP:
			// upload the files using the FileTransfer Object

			for (i=0; i<JobAdsArrayLen; i++) {
				FileTransfer ftrans;
				if ( !ftrans.SimpleInit(JobAdsArray[i], false, false, rsock) )
				{
					delete rsock;
					errstack->push("DC_TRANSFERD", 1, 
						"Failed to initate uploading of files.");
					return false;
				}

				ftrans.setPeerVersion( version() );

				if ( !ftrans.UploadFiles(true,false) ) {
					delete rsock;
					errstack->push("DC_TRANSFERD", 1, 
						"Failed to upload files.");
					return false;
				}

				dprintf(D_ALWAYS | D_NOHEADER, ".");
			}	
			rsock->end_of_message();
			dprintf(D_ALWAYS | D_NOHEADER, "\n");
			break;

		default:
			// Bail due to user error. This client doesn't support the uknown
			// protocol.

			delete rsock;
			errstack->push("DC_TRANSFERD", 1, 
				"Unknown file transfer protocol selected.");
			return false;
			break;
	}

	//////////////////////////////////////////////////////////////////////////
	// Get the response from the transferd once it sees a completed 
	// movement of files from the child process.
	//////////////////////////////////////////////////////////////////////////

	rsock->decode();
	respad.initFromStream(*rsock);
	rsock->end_of_message();

	// close up shop
	delete rsock;

	respad.LookupInteger(ATTR_TREQ_INVALID_REQUEST, invalid);
	if ( invalid == TRUE ) {
		respad.LookupString(ATTR_TREQ_INVALID_REASON, reason);
		errstack->push("DC_TRANSFERD", 1, reason.c_str());
		return false;
	}

	return true;
}
Exemple #5
0
// when a transferd registers itself, it identifies who it is. The connection
// is then held open and the schedd periodically might send more transfer
// requests to the transferd. Also, if the transferd dies, the schedd is 
// informed quickly and reliably due to the closed connection.
bool
DCSchedd::register_transferd(MyString sinful, MyString id, int timeout, 
		ReliSock **regsock_ptr, CondorError *errstack) 
{
	ReliSock *rsock;
	int invalid_request = 0;
	ClassAd regad;
	ClassAd respad;
	std::string errstr;
	std::string reason;

	if (regsock_ptr != NULL) {
		// Our caller wants a pointer to the socket we used to succesfully
		// register the claim. The NULL pointer will represent failure and
		// this will only be set to something real if everything was ok.
		*regsock_ptr = NULL;
	}

	// This call with automatically connect to _addr, which was set in the
	// constructor of this object to be the schedd in question.
	rsock = (ReliSock*)startCommand(TRANSFERD_REGISTER, Stream::reli_sock,
		timeout, errstack);

	if( ! rsock ) {
		dprintf( D_ALWAYS, "DCSchedd::register_transferd: "
				 "Failed to send command (TRANSFERD_REGISTER) "
				 "to the schedd\n" );
		errstack->push("DC_SCHEDD", 1, 
			"Failed to start a TRANSFERD_REGISTER command.");
		return false;
	}

		// First, if we're not already authenticated, force that now. 
	if (!forceAuthentication( rsock, errstack )) {
		dprintf( D_ALWAYS, "DCSchedd::register_transferd authentication "
				"failure: %s\n", errstack->getFullText().c_str() );
		errstack->push("DC_SCHEDD", 1, 
			"Failed to authenticate properly.");
		return false;
	}

	rsock->encode();

	// set up my registration request.
	regad.Assign(ATTR_TREQ_TD_SINFUL, sinful);
	regad.Assign(ATTR_TREQ_TD_ID, id);

	// This is the initial registration identification ad to the schedd
	// It contains:
	//	ATTR_TREQ_TD_SINFUL
	//	ATTR_TREQ_TD_ID
	putClassAd(rsock, regad);
	rsock->end_of_message();

	// Get the response from the schedd.
	rsock->decode();

	// This is the response ad from the schedd:
	// It contains:
	//	ATTR_TREQ_INVALID_REQUEST
	//
	// OR
	// 
	//	ATTR_TREQ_INVALID_REQUEST
	//	ATTR_TREQ_INVALID_REASON
	getClassAd(rsock, respad);
	rsock->end_of_message();

	respad.LookupInteger(ATTR_TREQ_INVALID_REQUEST, invalid_request);

	if (invalid_request == FALSE) {
		// not an invalid request
		if (regsock_ptr)
			*regsock_ptr = rsock;
		return true;
	}

	respad.LookupString(ATTR_TREQ_INVALID_REASON, reason);
	errstack->pushf("DC_SCHEDD", 1, "Schedd refused registration: %s", reason.c_str());

	return false;
}
Exemple #6
0
// I'm going to ask the schedd for where I can put the files for the jobs I've
// specified. The schedd is going to respond with A) a message telling me it
// has the answer right away, or B) an answer telling me I have to wait 
// an unknown length of time for the schedd to schedule me a place to put it.
bool 
DCSchedd::requestSandboxLocation(ClassAd *reqad, ClassAd *respad, 
	CondorError * errstack)
{
	ReliSock rsock;
	int will_block;
	ClassAd status_ad;

	rsock.timeout(20);   // years of research... :)
	if( ! rsock.connect(_addr) ) {
		dprintf( D_ALWAYS, "DCSchedd::requestSandboxLocation(): "
				 "Failed to connect to schedd (%s)\n", _addr );
		return false;
	}
	if( ! startCommand(REQUEST_SANDBOX_LOCATION, (Sock*)&rsock, 0,
						   errstack) ) {
		dprintf( D_ALWAYS, "DCSchedd::requestSandboxLocation(): "
				 "Failed to send command (REQUEST_SANDBOX_LOCATION) "
				 "to schedd (%s)\n", _addr );
		return false;
	}

		// First, if we're not already authenticated, force that now. 
	if (!forceAuthentication( &rsock, errstack )) {
		dprintf( D_ALWAYS, "DCSchedd: authentication failure: %s\n",
				 errstack->getFullText().c_str() );
		return false;
	}

	rsock.encode();

	///////////////////////////////////////////////////////////////////////
	// Send my sandbox location request packet to the schedd.
	///////////////////////////////////////////////////////////////////////

	// This request ad will either contain:
	//	ATTR_TREQ_PEER_VERSION
	//	ATTR_TREQ_HAS_CONSTRAINT
	//	ATTR_TREQ_JOBID_LIST
	//	ATTR_TREQ_FTP
	// 
	// OR
	//
	//	ATTR_TREQ_DIRECTION
	//	ATTR_TREQ_PEER_VERSION
	//	ATTR_TREQ_HAS_CONSTRAINT
	//	ATTR_TREQ_CONSTRAINT
	//	ATTR_TREQ_FTP
	dprintf(D_ALWAYS, "Sending request ad.\n");
	if (putClassAd(&rsock, *reqad) != 1) {
		dprintf(D_ALWAYS,"DCSchedd:requestSandboxLocation(): "
				"Can't send reqad to the schedd\n");
		return false;
	}
	rsock.end_of_message();

	rsock.decode();

	///////////////////////////////////////////////////////////////////////
	// Read back a response ad which will tell me which jobs the schedd
	// said I could modify and whether or not I'm am going to have to block
	// before getting the payload of the transferd location/capability ad.
	///////////////////////////////////////////////////////////////////////

	// This status ad will contain
	//	ATTR_TREQ_INVALID_REQUEST (set to true)
	//	ATTR_TREQ_INVALID_REASON
	//
	// OR
	//	ATTR_TREQ_INVALID_REQUEST (set to false)
	//	ATTR_TREQ_JOBID_ALLOW_LIST
	//	ATTR_TREQ_JOBID_DENY_LIST
	//	ATTR_TREQ_WILL_BLOCK

	dprintf(D_ALWAYS, "Receiving status ad.\n");
	if (getClassAd(&rsock, status_ad) == false) {
		dprintf(D_ALWAYS, "Schedd closed connection to me. Aborting sandbox "
			"submission.\n");
		return false;
	}
	rsock.end_of_message();

	status_ad.LookupInteger(ATTR_TREQ_WILL_BLOCK, will_block);

	dprintf(D_ALWAYS, "Client will %s\n", will_block==1?"block":"not block");

	if (will_block == 1) {
		// set to 20 minutes.
		rsock.timeout(60*20);
	}

	///////////////////////////////////////////////////////////////////////
	// Read back the payload ad from the schedd about the transferd location
	// and capability string I can use for the fileset I wish to transfer.
	///////////////////////////////////////////////////////////////////////

	// read back the response ad from the schedd which contains a 
	// td sinful string, and a capability. These represent my ability to
	// read/write a certain fileset somewhere.

	// This response ad from the schedd will contain:
	//
	//	ATTR_TREQ_INVALID_REQUEST (set to true)
	//	ATTR_TREQ_INVALID_REASON
	//
	// OR
	//
	//	ATTR_TREQ_INVALID_REQUEST (set to false)
	//	ATTR_TREQ_CAPABILITY
	//	ATTR_TREQ_TD_SINFUL
	//	ATTR_TREQ_JOBID_ALLOW_LIST

	dprintf(D_ALWAYS, "Receiving response ad.\n");
	if (getClassAd(&rsock, *respad) != true) {
		dprintf(D_ALWAYS,"DCSchedd:requestSandboxLocation(): "
				"Can't receive respond ad from the schedd\n");
		return false;
	}
	rsock.end_of_message();

	return true;
}
Exemple #7
0
bool 
DCSchedd::receiveJobSandbox(const char* constraint, CondorError * errstack, int * numdone /*=0*/)
{
	if(numdone) { *numdone = 0; }
	ExprTree *tree = NULL;
	const char *lhstr;
	int reply;
	int i;
	ReliSock rsock;
	int JobAdsArrayLen;
	bool use_new_command = true;

	if ( version() ) {
		CondorVersionInfo vi( version() );
		if ( vi.built_since_version(6,7,7) ) {
			use_new_command = true;
		} else {
			use_new_command = false;
		}
	}

		// // // // // // // //
		// On the wire protocol
		// // // // // // // //

	rsock.timeout(20);   // years of research... :)
	if( ! rsock.connect(_addr) ) {
		dprintf( D_ALWAYS, "DCSchedd::receiveJobSandbox: "
				 "Failed to connect to schedd (%s)\n", _addr );
		return false;
	}
	if ( use_new_command ) {
		if( ! startCommand(TRANSFER_DATA_WITH_PERMS, (Sock*)&rsock, 0,
						   errstack) ) {
			dprintf( D_ALWAYS, "DCSchedd::receiveJobSandbox: "
					 "Failed to send command (TRANSFER_DATA_WITH_PERMS) "
					 "to the schedd\n" );
			return false;
		}
	} else {
		if( ! startCommand(TRANSFER_DATA, (Sock*)&rsock, 0, errstack) ) {
			dprintf( D_ALWAYS, "DCSchedd::receiveJobSandbox: "
					 "Failed to send command (TRANSFER_DATA) "
					 "to the schedd\n" );
			return false;
		}
	}

		// First, if we're not already authenticated, force that now. 
	if (!forceAuthentication( &rsock, errstack )) {
		dprintf( D_ALWAYS, 
			"DCSchedd::receiveJobSandbox: authentication failure: %s\n",
			errstack ? errstack->getFullText().c_str() : "" );
		return false;
	}

	rsock.encode();

		// Send our version if using the new command
	if ( use_new_command ) {
			// Need to use a named variable, else the wrong version of	
			// code() is called.
		char *my_version = strdup( CondorVersion() );
		if ( !rsock.code(my_version) ) {
			dprintf(D_ALWAYS,"DCSchedd:receiveJobSandbox: "
					"Can't send version string to the schedd\n");
			free( my_version );
			return false;
		}
		free( my_version );
	}

		// Send the constraint
	char * nc_constraint = strdup( constraint );	// de-const
	if ( !rsock.code(nc_constraint) ) {
		free( nc_constraint );
		dprintf(D_ALWAYS,"DCSchedd:receiveJobSandbox: "
				"Can't send JobAdsArrayLen to the schedd\n");
		return false;
	}
	free( nc_constraint );

	if ( !rsock.end_of_message() ) {
		std::string errmsg;
		formatstr(errmsg,
				"Can't send initial message (version + constraint) to schedd (%s)",
				_addr);

		dprintf(D_ALWAYS,"DCSchedd::receiveJobSandbox: %s\n", errmsg.c_str());

		if( errstack ) {
			errstack->push(
				"DCSchedd::receiveJobSandbox",
				CEDAR_ERR_EOM_FAILED,
				errmsg.c_str());
		}
		return false;
	}

		// Now, read how many jobs matched the constraint.
	rsock.decode();
	if ( !rsock.code(JobAdsArrayLen) ) {
		std::string errmsg;
		formatstr(errmsg,
				"Can't receive JobAdsArrayLen from the schedd (%s)",
				_addr);

		dprintf(D_ALWAYS,"DCSchedd::receiveJobSandbox: %s\n", errmsg.c_str());

		if( errstack ) {
			errstack->push(
				"DCSchedd::receiveJobSandbox",
				CEDAR_ERR_GET_FAILED,
				errmsg.c_str());
		}
		return false;
	}

	rsock.end_of_message();

	dprintf(D_FULLDEBUG,"DCSchedd:receiveJobSandbox: "
		"%d jobs matched my constraint (%s)\n",
		JobAdsArrayLen, constraint);

		// Now read all the files via the file transfer object
	for (i=0; i<JobAdsArrayLen; i++) {
		FileTransfer ftrans;
		ClassAd job;

			// grab job ClassAd
		if ( !getClassAd(&rsock, job) ) {
			std::string errmsg;
			formatstr(errmsg, "Can't receive job ad %d from the schedd", i);

			dprintf(D_ALWAYS, "DCSchedd::receiveJobSandbox: %s\n", errmsg.c_str());

			if( errstack ) {
				errstack->push(
							   "DCSchedd::receiveJobSandbox",
							   CEDAR_ERR_GET_FAILED,
							   errmsg.c_str());
			}
			return false;
		}

		rsock.end_of_message();

			// translate the job ad by replacing the 
			// saved SUBMIT_ attributes
		job.ResetExpr();
		while( job.NextExpr(lhstr, tree) ) {
			if ( lhstr && strncasecmp("SUBMIT_",lhstr,7)==0 ) {
					// this attr name starts with SUBMIT_
					// compute new lhs (strip off the SUBMIT_)
				const char *new_attr_name = strchr(lhstr,'_');
				ExprTree * pTree;
				ASSERT(new_attr_name);
				new_attr_name++;
					// insert attribute
				pTree = tree->Copy();
				job.Insert(new_attr_name, pTree, false);
			}
		}	// while next expr

		if ( !ftrans.SimpleInit(&job,false,false,&rsock) ) {
			if( errstack ) {
				int cluster = -1, proc = -1;
				job.LookupInteger(ATTR_CLUSTER_ID,cluster);
				job.LookupInteger(ATTR_PROC_ID,proc);
				errstack->pushf(
					"DCSchedd::receiveJobSandbox",
					FILETRANSFER_INIT_FAILED,
					"File transfer initialization failed for target job %d.%d",
					cluster, proc );
			}
			return false;
		}
		// We want files to be copied to their final places, so apply
		// any filename remaps when downloading.
		if ( !ftrans.InitDownloadFilenameRemaps(&job) ) {
			return false;
		}
		if ( use_new_command ) {
			ftrans.setPeerVersion( version() );
		}
		if ( !ftrans.DownloadFiles() ) {
			if( errstack ) {
				FileTransfer::FileTransferInfo ft_info = ftrans.GetInfo();

				int cluster = -1, proc = -1;
				job.LookupInteger(ATTR_CLUSTER_ID,cluster);
				job.LookupInteger(ATTR_PROC_ID,proc);
				errstack->pushf(
					"DCSchedd::receiveJobSandbox",
					FILETRANSFER_DOWNLOAD_FAILED,
					"File transfer failed for target job %d.%d: %s",
					cluster, proc, ft_info.error_desc.Value() );
			}
			return false;
		}
	}	
		
	rsock.end_of_message();

	rsock.encode();

	reply = OK;
	rsock.code(reply);
	rsock.end_of_message();

	if(numdone) { *numdone = JobAdsArrayLen; }

	return true;
}
Exemple #8
0
bool DCSchedd::recycleShadow( int previous_job_exit_reason, ClassAd **new_job_ad, MyString &error_msg )
{
	int timeout = 300;
	CondorError errstack;

	ReliSock sock;
	if( !connectSock(&sock,timeout,&errstack) ) {
		error_msg.formatstr("Failed to connect to schedd: %s",
						  errstack.getFullText().c_str());
		return false;
	}

	if( !startCommand(RECYCLE_SHADOW, &sock, timeout, &errstack) ) {
		error_msg.formatstr("Failed to send RECYCLE_SHADOW to schedd: %s",
						  errstack.getFullText().c_str());
		return false;
	}

	if( !forceAuthentication(&sock, &errstack) ) {
		error_msg.formatstr("Failed to authenticate: %s",
						  errstack.getFullText().c_str());
		return false;
	}

	sock.encode();
	int mypid = getpid();
	if( !sock.put( mypid ) ||
		!sock.put( previous_job_exit_reason ) ||
		!sock.end_of_message() )
	{
		error_msg = "Failed to send job exit reason";
		return false;
	}

	sock.decode();

	int found_new_job = 0;
	sock.get( found_new_job );

	if( found_new_job ) {
		*new_job_ad = new ClassAd();
		if( !getClassAd( &sock, *(*new_job_ad) ) ) {
			error_msg = "Failed to receive new job ClassAd";
			delete *new_job_ad;
			*new_job_ad = NULL;
			return false;
		}
	}

	if( !sock.end_of_message() ) {
		error_msg = "Failed to receive end of message";
		delete *new_job_ad;
		*new_job_ad = NULL;
		return false;
	}

	if( *new_job_ad ) {
		sock.encode();
		int ok=1;
		if( !sock.put(ok) ||
			!sock.end_of_message() )
		{
			error_msg = "Failed to send ok";
			delete *new_job_ad;
			*new_job_ad = NULL;
			return false;
		}
	}

	return true;
}
Exemple #9
0
bool DCSchedd::getJobConnectInfo(
	PROC_ID jobid,
	int subproc,
	char const *session_info,
	int timeout,
	CondorError *errstack,
	MyString &starter_addr,
	MyString &starter_claim_id,
	MyString &starter_version,
	MyString &slot_name,
	MyString &error_msg,
	bool &retry_is_sensible,
	int &job_status,
	MyString &hold_reason)
{
	ClassAd input;
	ClassAd output;

	input.Assign(ATTR_CLUSTER_ID,jobid.cluster);
	input.Assign(ATTR_PROC_ID,jobid.proc);
	if( subproc != -1 ) {
		input.Assign(ATTR_SUB_PROC_ID,subproc);
	}
	input.Assign(ATTR_SESSION_INFO,session_info);

	ReliSock sock;
	if( !connectSock(&sock,timeout,errstack) ) {
		error_msg = "Failed to connect to schedd";
		dprintf( D_ALWAYS, "%s\n",error_msg.Value());
		return false;
	}

	if( !startCommand(GET_JOB_CONNECT_INFO, &sock, timeout, errstack) ) {
		error_msg = "Failed to send GET_JOB_CONNECT_INFO to schedd";
		dprintf( D_ALWAYS, "%s\n",error_msg.Value());
		return false;
	}

	if( !forceAuthentication(&sock, errstack) ) {
		error_msg = "Failed to authenticate";
		dprintf( D_ALWAYS, "%s\n",error_msg.Value());
		return false;
	}

	sock.encode();
	if( !putClassAd(&sock, input) || !sock.end_of_message() ) {
		error_msg = "Failed to send GET_JOB_CONNECT_INFO to schedd";
		dprintf( D_ALWAYS, "%s\n",error_msg.Value());
		return false;
	}

	sock.decode();
	if( !getClassAd(&sock, output) || !sock.end_of_message() ) {
		error_msg = "Failed to get response from schedd";
		dprintf( D_ALWAYS, "%s\n",error_msg.Value());
		return false;
	}

	if( IsFulldebug(D_FULLDEBUG) ) {
		std::string adstr;
		sPrintAd(adstr, output, true);
		dprintf(D_FULLDEBUG,"Response for GET_JOB_CONNECT_INFO:\n%s\n",
				adstr.c_str());
	}

	bool result=false;
	output.LookupBool(ATTR_RESULT,result);

	if( !result ) {
		output.LookupString(ATTR_HOLD_REASON,hold_reason);
		output.LookupString(ATTR_ERROR_STRING,error_msg);
		retry_is_sensible = false;
		output.LookupBool(ATTR_RETRY,retry_is_sensible);
		output.LookupInteger(ATTR_JOB_STATUS,job_status);
	}
	else {
		output.LookupString(ATTR_STARTER_IP_ADDR,starter_addr);
		output.LookupString(ATTR_CLAIM_ID,starter_claim_id);
		output.LookupString(ATTR_VERSION,starter_version);
		output.LookupString(ATTR_REMOTE_HOST,slot_name);
	}

	return result;
}
Exemple #10
0
ClassAd*
DCSchedd::actOnJobs( JobAction action,
					 const char* constraint, StringList* ids, 
					 const char* reason, const char* reason_attr,
					 const char* reason_code, const char* reason_code_attr,
					 action_result_type_t result_type,
					 bool notify_scheduler,
					 CondorError * errstack )
{
	char* tmp = NULL;
	char buf[512];
	int size, reply;
	ReliSock rsock;

		// // // // // // // //
		// Construct the ad we want to send
		// // // // // // // //

	ClassAd cmd_ad;

	sprintf( buf, "%s = %d", ATTR_JOB_ACTION, action );
	cmd_ad.Insert( buf );
	
	sprintf( buf, "%s = %d", ATTR_ACTION_RESULT_TYPE, 
			 (int)result_type );
	cmd_ad.Insert( buf );

	sprintf( buf, "%s = %s", ATTR_NOTIFY_JOB_SCHEDULER, 
			 notify_scheduler ? "True" : "False" );
	cmd_ad.Insert( buf );

	if( constraint ) {
		if( ids ) {
				// This is a programming error, not a run-time one
			EXCEPT( "DCSchedd::actOnJobs has both constraint and ids!" );
		}
		size = strlen(constraint) + strlen(ATTR_ACTION_CONSTRAINT) + 4;  
		tmp = (char*) malloc( size*sizeof(char) );
		if( !tmp ) {
			EXCEPT( "Out of memory!" );
		}
		sprintf( tmp, "%s = %s", ATTR_ACTION_CONSTRAINT, constraint ); 
		if( ! cmd_ad.Insert(tmp) ) {
			dprintf( D_ALWAYS, "DCSchedd::actOnJobs: "
					 "Can't insert constraint (%s) into ClassAd!\n",
					 constraint );
			free( tmp );
			return NULL;
		}			
		free( tmp );
		tmp = NULL;
	} else if( ids ) {
		char* action_ids = ids->print_to_string();
		if ( action_ids ) {
			size = strlen(action_ids) + strlen(ATTR_ACTION_IDS) + 7;
			tmp = (char*) malloc( size*sizeof(char) );
			if( !tmp ) {
				EXCEPT( "Out of memory!" );
			}
			sprintf( tmp, "%s = \"%s\"", ATTR_ACTION_IDS, action_ids );
			cmd_ad.Insert( tmp );
			free( tmp );
			tmp = NULL;
			free(action_ids);
			action_ids = NULL;
		}
	} else {
		EXCEPT( "DCSchedd::actOnJobs called without constraint or ids" );
	}

	if( reason_attr && reason ) {
		size = strlen(reason_attr) + strlen(reason) + 7;
		tmp = (char*) malloc( size*sizeof(char) );
		if( !tmp ) {
			EXCEPT( "Out of memory!" );
		}
		sprintf( tmp, "%s = \"%s\"", reason_attr, reason );
		cmd_ad.Insert( tmp );
		free( tmp );
		tmp = NULL;
	}

	if( reason_code_attr && reason_code ) {
		cmd_ad.AssignExpr(reason_code_attr,reason_code);
	}

		// // // // // // // //
		// On the wire protocol
		// // // // // // // //

	rsock.timeout(20);   // years of research... :)
	if( ! rsock.connect(_addr) ) {
		dprintf( D_ALWAYS, "DCSchedd::actOnJobs: "
				 "Failed to connect to schedd (%s)\n", _addr );
		return NULL;
	}
	if( ! startCommand(ACT_ON_JOBS, (Sock*)&rsock, 0, errstack) ) {
		dprintf( D_ALWAYS, "DCSchedd::actOnJobs: "
				 "Failed to send command (ACT_ON_JOBS) to the schedd\n" );
		return NULL;
	}
		// First, if we're not already authenticated, force that now. 
	if (!forceAuthentication( &rsock, errstack )) {
		dprintf( D_ALWAYS, "DCSchedd: authentication failure: %s\n",
				 errstack->getFullText().c_str() );
		return NULL;
	}

		// Now, put the command classad on the wire
	if( ! (putClassAd(&rsock, cmd_ad) && rsock.end_of_message()) ) {
		dprintf( D_ALWAYS, "DCSchedd:actOnJobs: Can't send classad\n" );
		return NULL;
	}

		// Next, we need to read the reply from the schedd if things
		// are ok and it's going to go forward.  If the schedd can't
		// read our reply to this ClassAd, it assumes we got killed
		// and it should abort its transaction
	rsock.decode();
	ClassAd* result_ad = new ClassAd();
	if( ! (getClassAd(&rsock, *result_ad) && rsock.end_of_message()) ) {
		dprintf( D_ALWAYS, "DCSchedd:actOnJobs: "
				 "Can't read response ad from %s\n", _addr );
		delete( result_ad );
		return NULL;
	}

		// If the action totally failed, the schedd will already have
		// aborted the transaction and closed up shop, so there's no
		// reason trying to continue.  However, we still want to
		// return the result ad we got back so that our caller can
		// figure out what went wrong.
	reply = FALSE;
	result_ad->LookupInteger( ATTR_ACTION_RESULT, reply );
	if( reply != OK ) {
		dprintf( D_ALWAYS, "DCSchedd:actOnJobs: Action failed\n" );
		return result_ad;
	}

		// Tell the schedd we're still here and ready to go
	rsock.encode();
	int answer = OK;
	if( ! (rsock.code(answer) && rsock.end_of_message()) ) {
		dprintf( D_ALWAYS, "DCSchedd:actOnJobs: Can't send reply\n" );
		delete( result_ad );
		return NULL;
	}
	
		// finally, make sure the schedd didn't blow up trying to
		// commit these changes to the job queue...
	rsock.decode();
	if( ! (rsock.code(reply) && rsock.end_of_message()) ) {
		dprintf( D_ALWAYS, "DCSchedd:actOnJobs: "
				 "Can't read confirmation from %s\n", _addr );
		delete( result_ad );
		return NULL;
	}

	return result_ad;
}
Exemple #11
0
bool 
DCSchedd::delegateGSIcredential(const int cluster, const int proc, 
								const char* path_to_proxy_file,
								time_t expiration_time,
								time_t *result_expiration_time,
								CondorError * errstack)
{
	int reply;
	ReliSock rsock;

		// check the parameters
	if ( cluster < 1 || proc < 0 || !path_to_proxy_file || !errstack ) {
		dprintf(D_FULLDEBUG,"DCSchedd::delegateGSIcredential: bad parameters\n");
		return false;
	}

		// connect to the schedd, send the DELEGATE_GSI_CRED_SCHEDD command
	rsock.timeout(20);   // years of research... :)
	if( ! rsock.connect(_addr) ) {
		dprintf( D_ALWAYS, "DCSchedd::delegateGSIcredential: "
				 "Failed to connect to schedd (%s)\n", _addr );
		return false;
	}
	if( ! startCommand(DELEGATE_GSI_CRED_SCHEDD, (Sock*)&rsock, 0, errstack) ) {
		dprintf( D_ALWAYS, "DCSchedd::delegateGSIcredential: "
				 "Failed send command to the schedd: %s\n",
				 errstack->getFullText().c_str());
		return false;
	}


		// If we're not already authenticated, force that now. 
	if (!forceAuthentication( &rsock, errstack )) {
		dprintf( D_ALWAYS, 
				"DCSchedd::delegateGSIcredential authentication failure: %s\n",
				 errstack->getFullText().c_str() );
		return false;
	}

		// Send the job id
	rsock.encode();
	PROC_ID jobid;
	jobid.cluster = cluster;
	jobid.proc = proc;	
	if ( !rsock.code(jobid) || !rsock.end_of_message() ) {
		dprintf(D_ALWAYS,"DCSchedd::delegateGSIcredential: "
				"Can't send jobid to the schedd\n");
		return false;
	}

		// Delegate the gsi proxy
	filesize_t file_size = 0;	// will receive the size of the file
	if ( rsock.put_x509_delegation(&file_size,path_to_proxy_file,expiration_time,result_expiration_time) < 0 ) {
		dprintf(D_ALWAYS,
			"DCSchedd::delegateGSIcredential "
			"failed to send proxy file %s\n",
			path_to_proxy_file);
		return false;
	}
		
		// Fetch the result
	rsock.decode();
	reply = 0;
	rsock.code(reply);
	rsock.end_of_message();

	if ( reply == 1 ) 
		return true;
	else
		return false;
}