示例#1
0
文件: vmgahp.cpp 项目: pragmagrid/pcc
void
VMGahp::killAllProcess()
{
	if( !m_jobAd ) {
		// Virtual machine is absolutely not created.
		return;
	}

#if defined (HAVE_EXT_LIBVIRT) && !defined(VMWARE_ONLY)
	if( strcasecmp(m_gahp_config->m_vm_type.Value(),
				CONDOR_VM_UNIVERSE_XEN ) == 0 ) {
		priv_state priv = set_root_priv();
		if( m_jobAd && XenType::checkXenParams(m_gahp_config) ) {
			MyString vmname;
			if( VMType::createVMName(m_jobAd, vmname) ) {
				XenType::killVMFast(vmname.Value());
				vmprintf( D_FULLDEBUG, "killVMFast is called\n");
			}
		}
		set_priv(priv);
	} else if(strcasecmp(m_gahp_config->m_vm_type.Value(),
			     CONDOR_VM_UNIVERSE_KVM ) == 0 ) {
		priv_state priv = set_root_priv();
		if( m_jobAd && KVMType::checkXenParams(m_gahp_config) ) {
			MyString vmname;
			if( VMType::createVMName(m_jobAd, vmname) ) {
				KVMType::killVMFast(vmname.Value());
				vmprintf( D_FULLDEBUG, "killVMFast is called\n");
			}
		}
		set_priv(priv);

	} else
#endif
	if( strcasecmp(m_gahp_config->m_vm_type.Value(),
				CONDOR_VM_UNIVERSE_VMWARE ) == 0 ) {
		priv_state priv = set_user_priv();
		if( VMwareType::checkVMwareParams(m_gahp_config) ) {
			VMwareType::killVMFast(m_gahp_config->m_prog_for_script.Value(),
					m_gahp_config->m_vm_script.Value(), m_workingdir.Value());
			vmprintf( D_FULLDEBUG, "killVMFast is called\n");
		}
		set_priv(priv);
	}
	// [PRAGMA] Kill Rocks VM Process
	else if( strcasecmp(m_gahp_config->m_vm_type.Value(),
					CONDOR_VM_UNIVERSE_VMWARE ) == 0 ) {
			priv_state priv = set_user_priv();
			if( RocksType::checkRocksParams(m_gahp_config) ) {
				RocksType::killVMFast(m_gahp_config->m_prog_for_script.Value(),
						m_gahp_config->m_vm_script.Value(), m_workingdir.Value());
				vmprintf( D_FULLDEBUG, "killVMFast is called\n");
			}
			set_priv(priv);
		}
}
示例#2
0
void
VMGahp::executeCommand(VMRequest *req)
{
	char *command = req->m_args.argv[0];

	priv_state priv = set_user_priv();

	if(strcasecmp(command, VMGAHP_COMMAND_VM_START) == 0 ) {
		executeStart(req);
	} else if(strcasecmp(command, VMGAHP_COMMAND_VM_STOP) == 0 ) {
		executeStop(req);
	} else if(strcasecmp(command, VMGAHP_COMMAND_VM_SUSPEND) == 0 ) {
		executeSuspend(req);
	} else if(strcasecmp(command, VMGAHP_COMMAND_VM_SOFT_SUSPEND) == 0 ) {
		executeSoftSuspend(req);
	} else if(strcasecmp(command, VMGAHP_COMMAND_VM_RESUME) == 0 ) {
		executeResume(req);
	} else if(strcasecmp(command, VMGAHP_COMMAND_VM_CHECKPOINT) == 0 ) {
		executeCheckpoint(req);
	} else if(strcasecmp(command, VMGAHP_COMMAND_VM_STATUS) == 0 ) {
		executeStatus(req);
	} else if(strcasecmp(command, VMGAHP_COMMAND_VM_GETPID) == 0 ) {
		executeGetpid(req);
	} else {
		vmprintf(D_ALWAYS, "Unknown command(%s)\n", command);
	}

	set_priv(priv);
}
示例#3
0
bool
LocalUserLog::init( const std::vector<const char*>& filename, bool is_xml, 
					int cluster, int proc, int subproc )
{
	if( ! jic->userPrivInitialized() ) { 
		EXCEPT( "LocalUserLog::init() "
				"called before user priv is initialized!" );
	}
	priv_state priv;
	priv = set_user_priv();

	bool ret = u_log.initialize(filename, cluster, proc, subproc, NULL);
	if( ! ret ) {
		dprintf( D_ALWAYS, 
				 "Failed to initialize Starter's UserLog, aborting\n" );
		set_priv( priv );
		return false;
	}

	set_priv( priv );
	u_log.setUseXML( is_xml );
	for(std::vector<const char*>::const_iterator p = filename.begin();
			p != filename.end(); ++p) {
		dprintf( D_FULLDEBUG, "Starter's UserLog: %s\n", *p );
	}
	is_initialized = true;
	should_log = true;
	return true;
}
示例#4
0
void
JobInfoCommunicator::writeExecutionVisa( ClassAd& visa_ad )
{
    int value;
    if (!job_ad->EvalBool(ATTR_WANT_STARTER_EXECUTION_VISA, NULL, value) ||
            !value)
    {
        return;
    }
    MyString iwd;
    if (!job_ad->LookupString(ATTR_JOB_IWD, iwd)) {
        dprintf(D_ALWAYS,
                "writeExecutionVisa error: no IWD in job ad!\n");
        return;
    }
    priv_state priv = set_user_priv();
    MyString filename;
    bool ok = classad_visa_write(&visa_ad,
                                 get_mySubSystem()->getName(),
                                 daemonCore->InfoCommandSinfulString(),
                                 iwd.Value(),
                                 &filename);
    set_priv(priv);
    if (ok) {
        addToOutputFiles(filename.Value());
    }
}
/* proxy_valid_right_now()

   this function is used in this object to determine if glexec should actually
   be invoked.  glexec will always fail with an expired proxy, and there is
   overhead in invoking it.
*/
int
GLExecPrivSepHelper::proxy_valid_right_now()
{

	int result = TRUE;
		/* Note that set_user_priv is a no-op if condor is running as
		   non-root (the "usual" mode for invoking glexec) */
	priv_state priv_saved = set_user_priv();
	if (!m_proxy) {
		dprintf(D_FULLDEBUG, "GLExecPrivSepHelper::proxy_valid_right_now: no proxy defined\n");
		result = FALSE;
	} else {

		time_t expiration_time = x509_proxy_expiration_time(m_proxy);
		time_t now = time(NULL);

		if (expiration_time == -1) {
			dprintf(D_FULLDEBUG, "GLExecPrivSepHelper::proxy_valid_right_now: Globus error when getting proxy %s expiration: %s.\n", m_proxy, x509_error_string());
			result = FALSE;
		} else if (expiration_time < now) {
			dprintf(D_FULLDEBUG, "GLExecPrivSepHelper::proxy_valid_right_now: proxy %s expired %ld seconds ago!\n", m_proxy, now - expiration_time);
			result = FALSE;
		}
	}

	set_priv(priv_saved);

	return result;
}
int
GLExecPrivSepHelper::run_script(ArgList& args,MyString &error_desc)
{
	if (!proxy_valid_right_now()) {
		dprintf(D_ALWAYS, "GLExecPrivSepHelper::run_script: not invoking glexec since the proxy is not valid!\n");
		error_desc += "The job proxy is not valid.";
		return INVALID_PROXY_RC;
	}

		/* Note that set_user_priv is a no-op if condor is running as
		   non-root (the "usual" mode for invoking glexec) */
	priv_state priv_saved = set_user_priv();
	FILE* fp = my_popen(args, "r", TRUE);
	set_priv(priv_saved);
	if (fp == NULL) {
		dprintf(D_ALWAYS,
		        "GLExecPrivSepHelper::run_script: "
		            "my_popen failure on %s: errno=%d (%s)\n",
		        args.GetArg(0),
			errno,
			strerror(errno));
		return -1;
	}
	MyString str;
	while (str.readLine(fp, true));

	priv_saved = set_user_priv();
	int ret = my_pclose(fp);
	set_priv(priv_saved);

	if (ret != 0) {
		str.trim();
		dprintf(D_ALWAYS,
		        "GLExecPrivSepHelper::run_script: %s exited "
		            "with status %d and following output:\n%s\n",
		        args.GetArg(0),
		        ret,
		        str.Value());
		error_desc.formatstr_cat("%s exited with status %d and the following output: %s",
				       condor_basename(args.GetArg(0)),
				       ret,
				       str.Value());
		error_desc.replaceString("\n","; ");
	}
	return ret;
}
示例#7
0
bool
OsProc::JobExit( void )
{
	int reason;	

	dprintf( D_FULLDEBUG, "Inside OsProc::JobExit()\n" );

	if( requested_exit == true ) {
		if( Starter->jic->hadHold() || Starter->jic->hadRemove() ) {
			reason = JOB_KILLED;
		} else {
			reason = JOB_NOT_CKPTED;
		}
	} else if( dumped_core ) {
		reason = JOB_COREDUMPED;
	} else if( job_not_started ) {
		reason = JOB_NOT_STARTED;
	} else {
		reason = JOB_EXITED;
	}

#if defined ( WIN32 )
    
    /* If we loaded the user's profile, then we should dump it now */
    if ( owner_profile_.loaded () ) {
        owner_profile_.unload ();
        
        /* !!!! DO NOT DO THIS IN THE FUTURE !!!! */
        owner_profile_.destroy ();
        /* !!!! DO NOT DO THIS IN THE FUTURE !!!! */
        
    }

    priv_state old = set_user_priv ();
    HANDLE user_token = priv_state_get_handle ();
    ASSERT ( user_token );
    
    // Check USE_VISIBLE_DESKTOP in condor_config.  If set to TRUE,
    // then removed our users priveleges from the visible desktop.
	if (param_boolean_crufty("USE_VISIBLE_DESKTOP", false)) {
        /* at this point we can revoke the user's access to the visible desktop */
        RevokeDesktopAccess ( user_token );
    }

    set_priv ( old );

#endif

	return Starter->jic->notifyJobExit( exit_status, reason, this );
}
示例#8
0
void
UserProc::send_sig_no_privsep( int sig )
{
	priv_state	priv;

		// We don't want to be root going around killing things or we
		// might do something we'll regret in the morning. -Derek 8/29/97
	priv = set_user_priv();  

	// removed some vanilla-specific code here
	//
	ASSERT(job_class != CONDOR_UNIVERSE_VANILLA);

	if ( job_class != CONDOR_UNIVERSE_VANILLA )
	{
		// Make sure the process can receive the signal. So let's send it a
		// SIGCONT first if applicable.
		if( sig != SIGCONT ) {
			if( kill(pid,SIGCONT) < 0 ) {
				set_priv(priv);
				if( errno == ESRCH ) {	// User proc already exited
					dprintf( D_ALWAYS, "UserProc::send_sig_no_privsep(): "
						"Tried to send signal SIGCONT to user "
						"job %d, but that process doesn't exist.\n", pid);
					return;
				}
				perror("kill");
				EXCEPT( "kill(%d,SIGCONT)", pid  );
			}
			/* standard jobs can't fork, so.... */
			pids_suspended = 1;
			dprintf( D_ALWAYS, "UserProc::send_sig_no_privsep(): "
				"Sent signal SIGCONT to user job %d\n", pid);
		}

		if( kill(pid,sig) < 0 ) {
			set_priv(priv);
			if( errno == ESRCH ) {	// User proc already exited
				dprintf( D_ALWAYS, "UserProc::send_sig_no_privsep(): "
					"Tried to send signal %d to user job "
				 	"%d, but that process doesn't exist.\n", sig, pid);
				return;
			}
			perror("kill");
			EXCEPT( "kill(%d,%d)", pid, sig );
		}
	}

	set_priv(priv);
}
示例#9
0
void
main_pre_dc_init( int argc, char* argv[] )
{
	// handle -o, so that we can switch euid to the user before
	// daemoncore does most of its initialization work.
	int i = 1;
	while ( i < argc ) {
		if ( !strcmp( argv[i], "-o" ) ) {
			// Say what user we're running jobs on behave of.
			// If the schedd starts us as root, we need to switch to
			// this uid for most of our life.
			if ( argc <= i + 1 ) {
				usage( argv[0] );
			}
			myUserName = strdup( argv[i + 1] );
			break;
		}
		i++;
	}

	if ( myUserName ) {
		char *owner = strdup( myUserName );
		char *domain = strchr( owner, '@' );
		if ( domain ) {
			*domain = '\0';
			domain = domain + 1;
		}
		if ( !init_user_ids(owner, domain)) {
			dprintf(D_ALWAYS, "init_user_ids() failed!\n");
			// uids.C will EXCEPT when we set_user_priv() now
			// so there's not much we can do at this point
		}
		set_user_priv();
		// We can't call daemonCore->Register_Priv_State() here because
		// there's no daemonCore object yet. We'll call it in main_init().

		free( myUserName );
		myUserName = owner;
	} else if ( is_root() ) {
		dprintf( D_ALWAYS, "Don't know what user to run as!\n" );
		DC_Exit( 1 );
	} else {
		myUserName = my_username();
	}
}
示例#10
0
/* returns TRUE if the internals were correctly initialized;
otherwise, FALSE. This can be called multiple times, in case 
of a reconfig. */
BOOL OwnerProfile::update () {

    dprintf ( D_FULLDEBUG, "In OwnerProfile::update()\n" );

    priv_state  priv    = PRIV_UNKNOWN;
    BOOL        ok      = TRUE;

    __try {
        
        /* do this as the user, so we get their information. */
        priv = set_user_priv ();

        ZeroMemory ( 
            &user_profile_, 
            sizeof ( PROFILEINFO ) );

        user_token_  = priv_state_get_handle ();
        user_name_   = get_user_loginname ();
        domain_name_ = ".";

        if ( NULL != profile_template_ ) {
            free ( profile_template_ );
            profile_template_ = NULL;
        }
        if ( NULL != profile_cache_ ) {
            free ( profile_cache_ );
            profile_cache_ = NULL;
        }

        /* we always assume there is are fresh directorys in the 
        configuration file(s) */
        profile_template_ = param ( PARAM_PROFILE_TEMPLATE );
        profile_cache_    = param ( PARAM_PROFILE_CACHE );

    }
    __finally {

        /* return to previous privilege level */
        set_priv ( priv );

    }

    return ok;

}
示例#11
0
bool
OsProc::renameCoreFile( const char* old_name, const char* new_name )
{
	bool rval = false;
	int t_errno = 0;

	MyString old_full;
	MyString new_full;
	const char* job_iwd = Starter->jic->jobIWD();
	old_full.formatstr( "%s%c%s", job_iwd, DIR_DELIM_CHAR, old_name );
	new_full.formatstr( "%s%c%s", job_iwd, DIR_DELIM_CHAR, new_name );

	priv_state old_priv;

		// we need to do this rename as the user...
	errno = 0;
	old_priv = set_user_priv();
	int ret = rename(old_full.Value(), new_full.Value());
	if( ret != 0 ) {
			// rename failed
		t_errno = errno; // grab errno right away
		rval = false;
	} else { 
			// rename succeeded
		rval = true;
   	}
	set_priv( old_priv );

	if( rval ) {
		dprintf( D_FULLDEBUG, "Found core file '%s', renamed to '%s'\n",
				 old_name, new_name );
		if( dumped_core ) {
			EXCEPT( "IMPOSSIBLE: inside OsProc::renameCoreFile and "
					"dumped_core is already TRUE" );
		}
		dumped_core = true;
			// make sure it'll get transfered back, too.
		Starter->jic->addToOutputFiles( new_name );
	} else if( t_errno != ENOENT ) {
		dprintf( D_ALWAYS, "Failed to rename(%s,%s): errno %d (%s)\n",
				 old_full.Value(), new_full.Value(), t_errno,
				 strerror(t_errno) );
	}
	return rval;
}
示例#12
0
VirshType::~VirshType()
{
	priv_state old_priv = set_user_priv();
	Shutdown();
	set_priv( old_priv );

	if( getVMStatus() != VM_STOPPED ) {
		// To make sure VM exits
		killVM();
	}
	setVMStatus(VM_STOPPED);

	XenDisk *disk = NULL;
	m_disk_list.Rewind();
	while( m_disk_list.Next(disk) ) {
		m_disk_list.DeleteCurrent();
		delete disk;
	}
}
示例#13
0
bool
XInterface::TryUser(const char *user)
{
	static char env[1024];
	static bool need_uninit = false;
	passwd *passwd_entry;

	passwd_entry = getpwnam(user);
	if(passwd_entry == NULL) {
		// We couldn't find the current user in the passwd file?
		dprintf( D_FULLDEBUG, 
		 	"Current user cannot be found in passwd file.\n" );
		return false;
	} else {
		sprintf(env, "XAUTHORITY=%s/.Xauthority", passwd_entry->pw_dir);
		if(putenv(env) != 0) {
			EXCEPT("Putenv failed!.");
		}
	}

	if ( need_uninit ) {
		uninit_user_ids();
		need_uninit = false;
	} 

		// passing "root" to init_user_ids is fatal
	if (strcmp(user, "root") == 0) {
		set_root_priv();
	} else {
		init_user_ids( user, NULL );
		set_user_priv();
		need_uninit = true;
	}

	dprintf( D_FULLDEBUG, "Using %s's .Xauthority: \n", passwd_entry->pw_name );
	return true;
}
示例#14
0
void
CheckCredentials () {
  CredentialWrapper * pCred;
  credentials.Rewind();  
  dprintf (D_FULLDEBUG, "In CheckCredentials()\n");

  // Get current time
  time_t now = time(NULL);

  while (credentials.Next(pCred)) {
    
    init_user_id_from_FQN (pCred->cred->GetOwner());
    priv_state priv = set_user_priv();

    time_t time = pCred->cred->GetRealExpirationTime();
    dprintf (D_FULLDEBUG, "Checking %s:%s = %ld\n",
	       pCred->cred->GetOwner(),
               pCred->cred->GetName(),
	       time);

    if (time - now < 0) {
      dprintf (D_FULLDEBUG, "Credential %s:%s expired!\n",
	       pCred->cred->GetOwner(),
	       pCred->cred->GetName());
    }
    else if (time - now < default_cred_expire_threshold) {
      dprintf (D_FULLDEBUG, "Credential %s:%s about to expire\n",
	       pCred->cred->GetOwner(),
	       pCred->cred->GetName());
      if (pCred->cred->GetType() == X509_CREDENTIAL_TYPE) {
	RefreshProxyThruMyProxy ((X509CredentialWrapper*)pCred);
      }
    }
    
    set_priv (priv); // restore old priv
  }
}
示例#15
0
int 
GridUniverseLogic::GManagerReaper(Service *,int pid, int exit_status)
{
	gman_node_t* gman_node = NULL;
	MyString owner;

	// Iterate through our table to find the node w/ this pid
	// Someday we should perhaps also hash on the pid, but we
	// don't expect gridmanagers to exit very often, and there
	// are not that many of them.

	if (gman_pid_table) {
		gman_node_t* tmpnode;
		gman_pid_table->startIterations();
		while ( gman_pid_table->iterate(owner,tmpnode) ) {
			if (tmpnode->pid == pid ) {
				// found it!
				gman_node = tmpnode;
				break;
			}
		}
	}

	MyString owner_safe;
	MyString exit_reason;
	if(gman_node) { owner_safe = owner; }
	else { owner_safe = "Unknown"; }
	if ( WIFEXITED( exit_status ) ) {
		exit_reason.formatstr( "with return code %d",
							 WEXITSTATUS( exit_status ) );
	} else {
		exit_reason.formatstr( "due to %s",
							 daemonCore->GetExceptionString( exit_status ) );
	}
	dprintf(D_ALWAYS, "condor_gridmanager (PID %d, owner %s) exited %s.\n",
			pid, owner_safe.Value(), exit_reason.Value() );
	if(WIFEXITED(exit_status) && WEXITSTATUS(exit_status) == DPRINTF_ERROR) {
		const char *condorUserName = get_condor_username();

		dprintf(D_ALWAYS, 
			"The gridmanager had a problem writing its log. "
			"Check the permissions of the file specified by GRIDMANAGER_LOG; "
			"it needs to be writable by Condor.\n");

			/* send email to the admin about this, but only
			 * every six hours - enough to not be ignored, but
			 * not enough to be a pest.  If only my children were
			 * so helpful and polite.  Ah, well, we can always dream...
			 */
		static time_t last_email_re_gridmanlog = 0;
		if ( time(NULL) - last_email_re_gridmanlog > 6 * 60 * 60 ) {
			last_email_re_gridmanlog = time(NULL);
			FILE *email = email_admin_open("Unable to launch grid universe jobs.");
			if ( email ) {
				fprintf(email,
					"The condor_gridmanager had an error writing its log file.  Check the  \n"
					"permissions/ownership of the file specified by the GRIDMANAGER_LOG setting in \n"
					"the condor_config file.  This file needs to be writable as user %s to enable\n"
					"the condor_gridmanager daemon to write to it. \n\n"
					"Until this problem is fixed, grid universe jobs submitted from this machine cannot "
					"be launched.\n", condorUserName ? condorUserName : "******" );
				email_close(email);
			} else {
					// Error sending an email message
				dprintf(D_ALWAYS,"ERROR: Cannot send email to the admin\n");
			}
		}	
	}	// end if(WIFEXITED(exit_status) && WEXITSTATUS(exit_status) == DPRINTF_ERROR)

	if (!gman_node) {
		// nothing more to do, so return
		return 0;
	}

	// Cancel any timers before removing the node!!
	if (gman_node->add_timer_id != -1) {
		daemonCore->Cancel_Timer(gman_node->add_timer_id);
	}
	if (gman_node->remove_timer_id != -1) {
		daemonCore->Cancel_Timer(gman_node->remove_timer_id);
	}
	// Remove node from our hash table
	gman_pid_table->remove(owner);
	// Remove any scratch directory used by this gridmanager
	char *scratchdir = scratchFilePath(gman_node);
	ASSERT(scratchdir);
	if ( IsDirectory(scratchdir) && 
		 init_user_ids(gman_node->owner, gman_node->domain) ) 
	{
		priv_state saved_priv = set_user_priv();
			// Must put this in braces so the Directory object
			// destructor is called, which will free the iterator
			// handle.  If we didn't do this, the below rmdir 
			// would fail.
		{
			Directory tmp( scratchdir );
			tmp.Remove_Entire_Directory();
		}
		if ( rmdir(scratchdir) == 0 ) {
			dprintf(D_FULLDEBUG,"Removed scratch dir %s\n",scratchdir);
		} else {
			dprintf(D_FULLDEBUG,"Failed to remove scratch dir %s\n",
					scratchdir);
		}
		set_priv(saved_priv);
		uninit_user_ids();
	}
	delete [] scratchdir;

	// Reclaim memory from the node itself
	delete gman_node;

	return 0;
}
示例#16
0
void
HandleSyscalls()
{
	register int	cnt;
	fd_set 			readfds;
	int 			nfds = -1;

	time_t			periodic_interval_len = 20; /* secs, empirically found :) */

	nfds = (RSC_SOCK > CLIENT_LOG ) ? (RSC_SOCK + 1) : (CLIENT_LOG + 1);

	init_user_ids(Proc->owner, NULL);
	set_user_priv();

	dprintf(D_FULLDEBUG, "HandleSyscalls: about to chdir(%s)\n", Proc->iwd);
	if( chdir(Proc->iwd) < 0 ) {
		sprintf( ErrBuf,  "Can't chdir() to \"%s\"! [%s(%d)]", Proc->iwd, 
			strerror(errno), errno );
		HadErr = TRUE;
		return;
	}

	dprintf(D_SYSCALLS, "Shadow: Starting to field syscall requests\n");
	errno = 0;

	time_t current_time = time(0);
	time_t next_periodic_update = current_time + periodic_interval_len;
	
	for(;;) {	/* get a request and fulfill it */

		FD_ZERO(&readfds);
		FD_SET(RSC_SOCK, &readfds);
		FD_SET(CLIENT_LOG, &readfds);

		struct timeval *ptimer = NULL, timer;
		timer.tv_sec = next_periodic_update - current_time;
		timer.tv_usec = 0;
		ptimer = &timer;
		/* if the current timer is set for a time longer than this, than
			truncate the timer required to the periodic limit. After 
			inspection of the bandwidth timer, it seems that it will recorrect
			itself if select comes out of the loop before the timer goes off
			anyway to handle syscalls */
		if ( timer.tv_sec > periodic_interval_len) {
			timer.tv_sec = next_periodic_update - current_time;
			ptimer = &timer;
		}

		unblock_signal(SIGCHLD);
		unblock_signal(SIGUSR1);
#if defined(LINUX) || defined(Solaris)
		cnt = select(nfds, &readfds, (fd_set *)0, (fd_set *)0, ptimer);
#else
		cnt = select(nfds, &readfds, 0, 0, ptimer);
#endif
		block_signal(SIGCHLD);
		block_signal(SIGUSR1);

		if( cnt < 0 && errno != EINTR ) {
			EXCEPT("HandleSyscalls: select: errno=%d, rsc_sock=%d, client_log=%d",errno,RSC_SOCK,CLIENT_LOG);
		}

		if( cnt < 0 && errno == EINTR ) {
			continue;
		}

		if( FD_ISSET(CLIENT_LOG, &readfds) ) {
			if( HandleLog() < 0 ) {
				EXCEPT( "Peer went away" );
			}
		}

		if( FD_ISSET(RSC_SOCK, &readfds) ) {
			if( do_REMOTE_syscall() < 0 ) {
				dprintf(D_SYSCALLS,
						"Shadow: do_REMOTE_syscall returned < 0\n");
				break;
			}
		}

		if( FD_ISSET(UMBILICAL, &readfds) ) {
			dprintf(D_ALWAYS,
				"Shadow: Local scheduler apparently died, so I die too\n");
			exit(1);
		}

		current_time = time(0);

		/* if this is true, then do the periodic_interval_len events */
		if (current_time >= next_periodic_update) {
			next_periodic_update = current_time + periodic_interval_len;

			/* evaluate some attributes for policies like determining what to
			do if a job suspends wierdly or some such thing. This function
			has the possibility of making the shadow exit with JOB_SHOULD_HOLD
			or futzing up some global variables about how the job could've
			exited and letting Wraup take care of it. */
			if (periodic_policy() == true)
			{
				break;
			}
		}

#if defined(SYSCALL_DEBUG)
		strcpy( SyscallLabel, "shadow" );
#endif
	}

	/*
	The user job might exit while there is still unread data in the log.
	So, select with a timeout of zero, and flush everything from the log.
	*/
		/* 
		   NOTE: Since HandleLog does it's own loop to make sure it's
		   read everything, we don't need a loop here, and should only
		   call HandleLog once.  In fact, if there's a problem w/
		   select(), a loop here can cause an infinite loop.  
		   -Derek Wright and Jim Basney, 2/17/99.
		*/
	HandleLog();
	
		/* Take back normal condor privileges */
	set_condor_priv();

		/* If we are debugging with named pipes as our communications medium,
		   won't have a condor_startd running - don't try to send to it.
		*/
	if( !UsePipes ) {
		send_quit( ExecutingHost, GlobalCap );
	}

	dprintf(D_ALWAYS,
		"Shadow: Job %d.%d exited, termsig = %d, coredump = %d, retcode = %d\n",
			Proc->id.cluster, Proc->id.proc, WTERMSIG(JobStatus),
			WCOREDUMP(JobStatus), WEXITSTATUS(JobStatus));
}
示例#17
0
GridUniverseLogic::gman_node_t *
GridUniverseLogic::StartOrFindGManager(const char* owner, const char* domain,
	   	const char* attr_value, const char* attr_name, int cluster, int proc)
{
	gman_node_t* gman_node;
	int pid;

		// If attr_value is an empty string, convert to NULL since code
		// after this point expects that.
	if ( attr_value && strlen(attr_value)==0 ) {
		attr_value = NULL;
		attr_name = NULL;
	}

	if ( (gman_node=lookupGmanByOwner(owner, attr_value, cluster, proc)) ) {
		// found it
		return gman_node;
	}

	// not found.  fire one up!  we want to run the GManager as the user.

	// but first, make certain we are not shutting down...
	if (!gman_pid_table) {
		// destructor has already been called; we are probably
		// closing down.
		return NULL;
	}


#ifndef WIN32
	if (owner && strcasecmp(owner, "root") == 0 ) {
		dprintf(D_ALWAYS, "Tried to start condor_gmanager as root.\n");
		return NULL;
	}
#endif

	dprintf( D_FULLDEBUG, "Starting condor_gmanager for owner %s (%d.%d)\n",
			owner, cluster, proc);

	char *gman_binary;
	gman_binary = param("GRIDMANAGER");
	if ( !gman_binary ) {
		dprintf(D_ALWAYS,"ERROR - GRIDMANAGER not defined in config file\n");
		return NULL;
	}

	ArgList args;
	MyString error_msg;

	args.AppendArg("condor_gridmanager");
	args.AppendArg("-f");

	char *gman_args = param("GRIDMANAGER_ARGS");

	if(!args.AppendArgsV1RawOrV2Quoted(gman_args,&error_msg)) {
		dprintf( D_ALWAYS, "ERROR: failed to parse gridmanager args: %s\n",
				 error_msg.Value());
		free(gman_binary);
		free(gman_args);
		return NULL;
	}
	free(gman_args);

	// build a constraint
	if ( !owner ) {
		dprintf(D_ALWAYS,"ERROR - missing owner field\n");
		free(gman_binary);
		return NULL;
	}
	MyString constraint;
	if ( !attr_name  ) {
		constraint.formatstr("(%s=?=\"%s\"&&%s==%d)",
						   ATTR_OWNER,owner,
						   ATTR_JOB_UNIVERSE,CONDOR_UNIVERSE_GRID);
	} else {
		constraint.formatstr("(%s=?=\"%s\"&&%s=?=\"%s\"&&%s==%d)",
						   ATTR_OWNER,owner,
						   attr_name,attr_value,
						   ATTR_JOB_UNIVERSE,CONDOR_UNIVERSE_GRID);

		args.AppendArg("-A");
		args.AppendArg(attr_value);
	}
	args.AppendArg("-C");
	args.AppendArg(constraint.Value());

	MyString full_owner_name(owner);
	if ( domain && *domain ) {
		full_owner_name.formatstr_cat( "@%s", domain );
	}
	args.AppendArg("-o");
	args.AppendArg(full_owner_name.Value());

	if (!init_user_ids(owner, domain)) {
		dprintf(D_ALWAYS,"ERROR - init_user_ids() failed in GRIDMANAGER\n");
		free(gman_binary);
		return NULL;
	}

	static bool first_time_through = true;
	if ( first_time_through ) {
		// Note: Because first_time_through is static, this block runs only 
		// once per schedd invocation.
		first_time_through = false;

		// Clean up any old / abandoned scratch dirs.
		dprintf(D_FULLDEBUG,"Checking for old gridmanager scratch dirs\n");
		char *prefix = temp_dir_path();
		ASSERT(prefix);
		Directory tmp( prefix, PRIV_USER );
		const char *f;
		char const *dot;
		int fname_pid;
		int mypid = daemonCore->getpid();
		int scratch_pre_len = strlen(scratch_prefix);
		while ( (f=tmp.Next()) ) {
				// skip regular files -- we only need to inspect subdirs
			if ( !tmp.IsDirectory() ) {
				continue;
			}
				// skip if it does not start with our prefix
			if ( strncmp(scratch_prefix,f,scratch_pre_len) ) {
				continue;
			}
				// skip if does not end w/ a pid
			dot = strrchr(f,'.');
			if ( !dot ) {
				continue;
			}
				// skip if this pid is still alive and not ours
			dot++;	// skip over period
			fname_pid = atoi(dot);
			if ( fname_pid != mypid && daemonCore->Is_Pid_Alive(fname_pid) ) {
					continue;
			}
				// if we made it here, blow away this subdir
			if ( tmp.Remove_Current_File() ) {
				dprintf(D_ALWAYS,"Removed old scratch dir %s\n",
				tmp.GetFullPath());
			}
		}	// end of while for cleanup of old scratch dirs

		dprintf(D_FULLDEBUG,"Done checking for old scratch dirs\n");			

		if (prefix != NULL) {
			free(prefix);
			prefix = NULL;
		}

	}	// end of once-per-schedd invocation block

	// Create a temp dir for the gridmanager and append proper
	// command-line arguments to tell where it is.
	bool failed = false;
	gman_node = new gman_node_t;
	char *finalpath = scratchFilePath(gman_node);
	priv_state saved_priv = set_user_priv();
	if ( (mkdir(finalpath,0700)) < 0 ) {
		// mkdir failed.  
		dprintf(D_ALWAYS,"ERROR - mkdir(%s,0700) failed in GRIDMANAGER, errno=%d (%s)\n",
				finalpath, errno, strerror(errno));
		failed = true;
	}
	set_priv(saved_priv);
	uninit_user_ids();
	args.AppendArg("-S");	// -S = "ScratchDir" argument
	args.AppendArg(finalpath);
	delete [] finalpath;
	if ( failed ) {
		// we already did dprintf reason to the log...
		free(gman_binary);
		delete gman_node;
		return NULL;
	}

	if(IsFulldebug(D_FULLDEBUG)) {
		MyString args_string;
		args.GetArgsStringForDisplay(&args_string);
		dprintf(D_FULLDEBUG,"Really Execing %s\n",args_string.Value());
	}

	pid = daemonCore->Create_Process( 
		gman_binary,			// Program to exec
		args,					// Command-line args
		PRIV_ROOT,				// Run as root, so it can switch to
		                        //   PRIV_CONDOR
		rid						// Reaper ID
		);

	free(gman_binary);

	if ( pid <= 0 ) {
		dprintf ( D_ALWAYS, "StartOrFindGManager: Create_Process problems!\n" );
		if (gman_node) delete gman_node;
		return NULL;
	}

	// If we made it here, we happily started up a new gridmanager process

	dprintf( D_ALWAYS, "Started condor_gmanager for owner %s pid=%d\n",
			owner,pid);

	// Make a new gman_node entry for our hashtable & insert it
	if ( !gman_node ) {
		gman_node = new gman_node_t;
	}
	gman_node->pid = pid;
	gman_node->owner[0] = '\0';
	gman_node->domain[0] = '\0';
	if ( owner ) {
		strcpy(gman_node->owner,owner);
	}
	if ( domain ) {
		strcpy(gman_node->domain,domain);
	}
	MyString owner_key(owner);
	if(attr_value){
		owner_key += attr_value;
	}
	if (cluster) {
		owner_key.formatstr_cat( "-%d.%d", cluster, proc );
	}

	ASSERT( gman_pid_table->insert(owner_key,gman_node) == 0 );

	// start timer to signal gridmanager if we haven't already
	if ( gman_node->add_timer_id == -1 ) {  // == -1 means no timer set
		gman_node->add_timer_id = daemonCore->Register_Timer(job_added_delay,
			GridUniverseLogic::SendAddSignal,
			"GridUniverseLogic::SendAddSignal");
		daemonCore->Register_DataPtr(gman_node);
	}

	// All done
	return gman_node;
}
示例#18
0
int
OsProc::StartJob(FamilyInfo* family_info, FilesystemRemap* fs_remap=NULL)
{
	int nice_inc = 0;
	bool has_wrapper = false;

	dprintf(D_FULLDEBUG,"in OsProc::StartJob()\n");

	if ( !JobAd ) {
		dprintf ( D_ALWAYS, "No JobAd in OsProc::StartJob()!\n" );
		return 0;
	}

	MyString JobName;
	if ( JobAd->LookupString( ATTR_JOB_CMD, JobName ) != 1 ) {
		dprintf( D_ALWAYS, "%s not found in JobAd.  Aborting StartJob.\n", 
				 ATTR_JOB_CMD );
		return 0;
	}

	const char* job_iwd = Starter->jic->jobRemoteIWD();
	dprintf( D_ALWAYS, "IWD: %s\n", job_iwd );

		// some operations below will require a PrivSepHelper if
		// PrivSep is enabled (if it's not, privsep_helper will be
		// NULL)
	PrivSepHelper* privsep_helper = Starter->privSepHelper();

		// // // // // // 
		// Arguments
		// // // // // // 

		// prepend the full path to this name so that we
		// don't have to rely on the PATH inside the
		// USER_JOB_WRAPPER or for exec().

    bool transfer_exe = false;
    if (!JobAd->LookupBool(ATTR_TRANSFER_EXECUTABLE, transfer_exe)) {
        transfer_exe = false;
    }

    bool preserve_rel = false;
    if (!JobAd->LookupBool(ATTR_PRESERVE_RELATIVE_EXECUTABLE, preserve_rel)) {
        preserve_rel = false;
    }

    bool relative_exe = is_relative_to_cwd(JobName.Value());

    if (relative_exe && preserve_rel && !transfer_exe) {
        dprintf(D_ALWAYS, "Preserving relative executable path: %s\n", JobName.Value());
    }
	else if ( strcmp(CONDOR_EXEC,JobName.Value()) == 0 ) {
		JobName.formatstr( "%s%c%s",
		                 Starter->GetWorkingDir(),
		                 DIR_DELIM_CHAR,
		                 CONDOR_EXEC );
    }
	else if (relative_exe && job_iwd && *job_iwd) {
		MyString full_name;
		full_name.formatstr("%s%c%s",
		                  job_iwd,
		                  DIR_DELIM_CHAR,
		                  JobName.Value());
		JobName = full_name;

	}

	if( Starter->isGridshell() ) {
			// if we're a gridshell, just try to chmod our job, since
			// globus probably transfered it for us and left it with
			// bad permissions...
		priv_state old_priv = set_user_priv();
		int retval = chmod( JobName.Value(), S_IRWXU | S_IRWXO | S_IRWXG );
		set_priv( old_priv );
		if( retval < 0 ) {
			dprintf ( D_ALWAYS, "Failed to chmod %s!\n", JobName.Value() );
			return 0;
		}
	} 

	ArgList args;

		// Since we may be adding to the argument list, we may need to deal
		// with platform-specific arg syntax in the user's args in order
		// to successfully merge them with the additional wrapper args.
	args.SetArgV1SyntaxToCurrentPlatform();

		// First, put "condor_exec" or whatever at the front of Args,
		// since that will become argv[0] of what we exec(), either
		// the wrapper or the actual job.

	if( !getArgv0() ) {
		args.AppendArg(JobName.Value());
	} else {
		args.AppendArg(getArgv0());
	}
	
		// Support USER_JOB_WRAPPER parameter...
	char *wrapper = NULL;
	if( (wrapper=param("USER_JOB_WRAPPER")) ) {

			// make certain this wrapper program exists and is executable
		if( access(wrapper,X_OK) < 0 ) {
			dprintf( D_ALWAYS, 
					 "Cannot find/execute USER_JOB_WRAPPER file %s\n",
					 wrapper );
			free( wrapper );
			return 0;
		}
		has_wrapper = true;
			// Now, we've got a valid wrapper.  We want that to become
			// "JobName" so we exec it directly, and we want to put
			// what was the JobName (with the full path) as the first
			// argument to the wrapper
		args.AppendArg(JobName.Value());
		JobName = wrapper;
		free(wrapper);
	}
	
		// Support USE_PARROT 
	bool use_parrot = false;
	if( JobAd->LookupBool( ATTR_USE_PARROT, use_parrot) ) {
			// Check for parrot executable
		char *parrot = NULL;
		if( (parrot=param("PARROT")) ) {
			if( access(parrot,X_OK) < 0 ) {
				dprintf( D_ALWAYS, "Unable to use parrot(Cannot find/execute "
					"at %s(%s)).\n", parrot, strerror(errno) );
				free( parrot );
				return 0;
			} else {
				args.AppendArg(JobName.Value());
				JobName = parrot;
				free( parrot );
			}
		} else {
			dprintf( D_ALWAYS, "Unable to use parrot(Undefined path in config"
			" file)" );
			return 0;
		}
	}

		// Either way, we now have to add the user-specified args as
		// the rest of the Args string.
	MyString args_error;
	if(!args.AppendArgsFromClassAd(JobAd,&args_error)) {
		dprintf(D_ALWAYS, "Failed to read job arguments from JobAd.  "
				"Aborting OsProc::StartJob: %s\n",args_error.Value());
		return 0;
	}

		// // // // // // 
		// Environment 
		// // // // // // 

		// Now, instantiate an Env object so we can manipulate the
		// environment as needed.
	Env job_env;

	MyString env_errors;
	if( !Starter->GetJobEnv(JobAd,&job_env,&env_errors) ) {
		dprintf( D_ALWAYS, "Aborting OSProc::StartJob: %s\n",
				 env_errors.Value());
		return 0;
	}


		// // // // // // 
		// Standard Files
		// // // // // // 

	// handle stdin, stdout, and stderr redirection
	int fds[3];
		// initialize these to -2 to mean they're not specified.
		// -1 will be treated as an error.
	fds[0] = -2; fds[1] = -2; fds[2] = -2;

		// in order to open these files we must have the user's privs:
	priv_state priv;
	priv = set_user_priv();

		// if we're in PrivSep mode, we won't necessarily be able to
		// open the files for the job. getStdFile will return us an
		// open FD in some situations, but otherwise will give us
		// a filename that we'll pass to the PrivSep Switchboard
		//
	bool stdin_ok;
	bool stdout_ok;
	bool stderr_ok;
	MyString privsep_stdin_name;
	MyString privsep_stdout_name;
	MyString privsep_stderr_name;
	if (privsep_helper != NULL) {
		stdin_ok = getStdFile(SFT_IN,
		                      NULL,
		                      true,
		                      "Input file",
		                      &fds[0],
		                      &privsep_stdin_name);
		stdout_ok = getStdFile(SFT_OUT,
		                       NULL,
		                       true,
		                       "Output file",
		                       &fds[1],
		                       &privsep_stdout_name);
		stderr_ok = getStdFile(SFT_ERR,
		                       NULL,
		                       true,
		                       "Error file",
		                       &fds[2],
		                       &privsep_stderr_name);
	}
	else {
		fds[0] = openStdFile( SFT_IN,
		                      NULL,
		                      true,
		                      "Input file");
		stdin_ok = (fds[0] != -1);
		fds[1] = openStdFile( SFT_OUT,
		                      NULL,
		                      true,
		                      "Output file");
		stdout_ok = (fds[1] != -1);
		fds[2] = openStdFile( SFT_ERR,
		                      NULL,
		                      true,
		                      "Error file");
		stderr_ok = (fds[2] != -1);
	}

	/* Bail out if we couldn't open the std files correctly */
	if( !stdin_ok || !stdout_ok || !stderr_ok ) {
		/* only close ones that had been opened correctly */
		for ( int i = 0; i <= 2; i++ ) {
			if ( fds[i] >= 0 ) {
				daemonCore->Close_FD ( fds[i] );
			}
		}
		dprintf(D_ALWAYS, "Failed to open some/all of the std files...\n");
		dprintf(D_ALWAYS, "Aborting OsProc::StartJob.\n");
		set_priv(priv); /* go back to original priv state before leaving */
		return 0;
	}

		// // // // // // 
		// Misc + Exec
		// // // // // // 

	if( !ThisProcRunsAlongsideMainProc() ) {
		Starter->jic->notifyJobPreSpawn();
	}

	// compute job's renice value by evaluating the machine's
	// JOB_RENICE_INCREMENT in the context of the job ad...

    char* ptmp = param( "JOB_RENICE_INCREMENT" );
	if( ptmp ) {
			// insert renice expr into our copy of the job ad
		MyString reniceAttr = "Renice = ";
		reniceAttr += ptmp;
		if( !JobAd->Insert( reniceAttr.Value() ) ) {
			dprintf( D_ALWAYS, "ERROR: failed to insert JOB_RENICE_INCREMENT "
				"into job ad, Aborting OsProc::StartJob...\n" );
			free( ptmp );
			return 0;
		}
			// evaluate
		if( JobAd->EvalInteger( "Renice", NULL, nice_inc ) ) {
			dprintf( D_ALWAYS, "Renice expr \"%s\" evaluated to %d\n",
					 ptmp, nice_inc );
		} else {
			dprintf( D_ALWAYS, "WARNING: job renice expr (\"%s\") doesn't "
					 "eval to int!  Using default of 10...\n", ptmp );
			nice_inc = 10;
		}

			// enforce valid ranges for nice_inc
		if( nice_inc < 0 ) {
			dprintf( D_FULLDEBUG, "WARNING: job renice value (%d) is too "
					 "low: adjusted to 0\n", nice_inc );
			nice_inc = 0;
		}
		else if( nice_inc > 19 ) {
			dprintf( D_FULLDEBUG, "WARNING: job renice value (%d) is too "
					 "high: adjusted to 19\n", nice_inc );
			nice_inc = 19;
		}

		ASSERT( ptmp );
		free( ptmp );
		ptmp = NULL;
	} else {
			// if JOB_RENICE_INCREMENT is undefined, default to 0
		nice_inc = 0;
	}

		// in the below dprintfs, we want to skip past argv[0], which
		// is sometimes condor_exec, in the Args string. 

	MyString args_string;
	args.GetArgsStringForDisplay(&args_string, 1);
	if( has_wrapper ) { 
			// print out exactly what we're doing so folks can debug
			// it, if they need to.
		dprintf( D_ALWAYS, "Using wrapper %s to exec %s\n", JobName.Value(), 
				 args_string.Value() );
	} else {
		dprintf( D_ALWAYS, "About to exec %s %s\n", JobName.Value(),
				 args_string.Value() );
	}

		// Grab the full environment back out of the Env object 
	if(IsFulldebug(D_FULLDEBUG)) {
		MyString env_string;
		job_env.getDelimitedStringForDisplay(&env_string);
		dprintf(D_FULLDEBUG, "Env = %s\n", env_string.Value());
	}

	// Check to see if we need to start this process paused, and if
	// so, pass the right flag to DC::Create_Process().
	int job_opt_mask = DCJOBOPT_NO_CONDOR_ENV_INHERIT;
	if (!param_boolean("JOB_INHERITS_STARTER_ENVIRONMENT",false)) {
		job_opt_mask |= DCJOBOPT_NO_ENV_INHERIT;
	}
	int suspend_job_at_exec = 0;
	JobAd->LookupBool( ATTR_SUSPEND_JOB_AT_EXEC, suspend_job_at_exec);
	if( suspend_job_at_exec ) {
		dprintf( D_FULLDEBUG, "OsProc::StartJob(): "
				 "Job wants to be suspended at exec\n" );
		job_opt_mask |= DCJOBOPT_SUSPEND_ON_EXEC;
	}

	// If there is a requested coresize for this job, enforce it.
	// Convert negative and very large values to RLIM_INFINITY, meaning
	// no size limit.
	// RLIM_INFINITY is unsigned, but its value and type size vary.
	long long core_size_ad;
	size_t core_size;
	size_t *core_size_ptr = NULL;
#if !defined(WIN32)
	if ( JobAd->LookupInteger( ATTR_CORE_SIZE, core_size_ad ) ) {
		if ( core_size_ad < 0 || (unsigned long long)core_size_ad > RLIM_INFINITY ) {
			core_size = RLIM_INFINITY;
		} else {
			core_size = (size_t)core_size_ad;
		}
		core_size_ptr = &core_size;
	}
#endif // !defined(WIN32)

	long rlimit_as_hard_limit = 0;
	char *rlimit_expr = param("STARTER_RLIMIT_AS");
	if (rlimit_expr) {
		classad::ClassAdParser parser;

		classad::ExprTree *tree = parser.ParseExpression(rlimit_expr);
		if (tree) {
			classad::Value val;
			long long result;

			if (EvalExprTree(tree, Starter->jic->machClassAd(), JobAd, val) && 
				val.IsIntegerValue(result)) {
					result *= 1024 * 1024; // convert to megabytes
					rlimit_as_hard_limit = (long)result; // truncate for Create_Process
					if (result > rlimit_as_hard_limit) {
						// if truncation to long results in a change in the value, then
						// the requested limit must be > 2 GB and we are on a 32 bit platform
						// in that case, the requested limit is > than what the process can get anyway
						// so just don't set a limit.
						rlimit_as_hard_limit = 0;
					}
					if (rlimit_as_hard_limit > 0) {
						dprintf(D_ALWAYS, "Setting job's virtual memory rlimit to %ld megabytes\n", rlimit_as_hard_limit);
					}
			} else {
				dprintf(D_ALWAYS, "Can't evaluate STARTER_RLIMIT_AS expression %s\n", rlimit_expr);
			}
		} else {
			dprintf(D_ALWAYS, "Can't parse STARTER_RLIMIT_AS expression: %s\n", rlimit_expr);
		}
	}

	int *affinity_mask = makeCpuAffinityMask(Starter->getMySlotNumber());

#if defined ( WIN32 )
    owner_profile_.update ();
    /*************************************************************
    NOTE: We currently *ONLY* support loading slot-user profiles.
    This limitation will be addressed shortly, by allowing regular 
    users to load their registry hive - Ben [2008-09-31]
    **************************************************************/
    bool load_profile = false,
         run_as_owner = false;
    JobAd->LookupBool ( ATTR_JOB_LOAD_PROFILE, load_profile );
    JobAd->LookupBool ( ATTR_JOB_RUNAS_OWNER,  run_as_owner );
    if ( load_profile && !run_as_owner ) {
        if ( owner_profile_.load () ) {
            /* publish the users environment into that of the main 

            job's environment */
            if ( !owner_profile_.environment ( job_env ) ) {
                dprintf ( D_ALWAYS, "OsProc::StartJob(): Failed to "
                    "export owner's environment.\n" );
            }            
        } else {
            dprintf ( D_ALWAYS, "OsProc::StartJob(): Failed to load "
                "owner's profile.\n" );
        }
    }
#endif

		// While we are still in user priv, print out the username
#if defined(LINUX)
	if( Starter->glexecPrivSepHelper() ) {
			// TODO: if there is some way to figure out the final username,
			// print it out here or after starting the job.
		dprintf(D_ALWAYS,"Running job via glexec\n");
	}
#else
	if( false ) {
	}
#endif
	else {
		char const *username = NULL;
		char const *how = "";
		CondorPrivSepHelper* cpsh = Starter->condorPrivSepHelper();
		if( cpsh ) {
			username = cpsh->get_user_name();
			how = "via privsep switchboard ";
		}
		else {
			username = get_user_loginname();
		}
		if( !username ) {
			username = "******";
		}
		dprintf(D_ALWAYS,"Running job %sas user %s\n",how,username);
	}

	set_priv ( priv );

    // use this to return more detailed and reliable error message info
    // from create-process operation.
    MyString create_process_err_msg;

	if (privsep_helper != NULL) {
		const char* std_file_names[3] = {
			privsep_stdin_name.Value(),
			privsep_stdout_name.Value(),
			privsep_stderr_name.Value()
		};
		JobPid = privsep_helper->create_process(JobName.Value(),
		                                        args,
		                                        job_env,
		                                        job_iwd,
		                                        fds,
		                                        std_file_names,
		                                        nice_inc,
		                                        core_size_ptr,
		                                        1,
		                                        job_opt_mask,
		                                        family_info,
												affinity_mask,
												&create_process_err_msg);
	}
	else {
		JobPid = daemonCore->Create_Process( JobName.Value(),
		                                     args,
		                                     PRIV_USER_FINAL,
		                                     1,
		                                     FALSE,
		                                     FALSE,
		                                     &job_env,
		                                     job_iwd,
		                                     family_info,
		                                     NULL,
		                                     fds,
		                                     NULL,
		                                     nice_inc,
		                                     NULL,
		                                     job_opt_mask, 
		                                     core_size_ptr,
                                             affinity_mask,
											 NULL,
                                             &create_process_err_msg,
                                             fs_remap,
											 rlimit_as_hard_limit);
	}

	// Create_Process() saves the errno for us if it is an "interesting" error.
	int create_process_errno = errno;

    // errno is 0 in the privsep case.  This executes for the daemon core create-process logic
    if ((FALSE == JobPid) && (0 != create_process_errno)) {
        if (create_process_err_msg != "") create_process_err_msg += " ";
        MyString errbuf;
        errbuf.formatstr("(errno=%d: '%s')", create_process_errno, strerror(create_process_errno));
        create_process_err_msg += errbuf;
    }

	// now close the descriptors in fds array.  our child has inherited
	// them already, so we should close them so we do not leak descriptors.
	// NOTE, we want to use a special method to close the starter's
	// versions, if that's what we're using, so we don't think we've
	// still got those available in other parts of the code for any
	// reason.
	for ( int i = 0; i <= 2; i++ ) {
		if ( fds[i] >= 0 ) {
			daemonCore->Close_FD ( fds[i] );
		}
	}

	if ( JobPid == FALSE ) {
		JobPid = -1;

		if(!create_process_err_msg.IsEmpty()) {

			// if the reason Create_Process failed was that registering
			// a family with the ProcD failed, it is indicative of a
			// problem regarding this execute machine, not the job. in
			// this case, we'll want to EXCEPT instead of telling the
			// Shadow to put the job on hold. there are probably other
			// error conditions where EXCEPTing would be more appropriate
			// as well...
			//
			if (create_process_errno == DaemonCore::ERRNO_REGISTRATION_FAILED) {
				EXCEPT("Create_Process failed to register the job with the ProcD");
			}

			MyString err_msg = "Failed to execute '";
			err_msg += JobName;
			err_msg += "'";
			if(!args_string.IsEmpty()) {
				err_msg += " with arguments ";
				err_msg += args_string.Value();
			}
			err_msg += ": ";
			err_msg += create_process_err_msg;
			if( !ThisProcRunsAlongsideMainProc() ) {
				Starter->jic->notifyStarterError( err_msg.Value(),
			    	                              true,
			        	                          CONDOR_HOLD_CODE_FailedToCreateProcess,
			            	                      create_process_errno );
			}
		}

		dprintf(D_ALWAYS,"Create_Process(%s,%s, ...) failed: %s\n",
			JobName.Value(), args_string.Value(), create_process_err_msg.Value());
		return 0;
	}

	num_pids++;

	dprintf(D_ALWAYS,"Create_Process succeeded, pid=%d\n",JobPid);

	job_start_time.getTime();

	return 1;
}
示例#19
0
/* returns TRUE if the user profile template was backup-ed up; 
otherwise, FALSE.*/
BOOL
OwnerProfile::backup () {

    dprintf ( D_FULLDEBUG, "In OwnerProfile::backup()\n" );

    priv_state  priv            = PRIV_UNKNOWN;
    int         length          = 0;
    BOOL        backup_created  = FALSE,
                ok              = FALSE;

    __try {

        /* can't backup while in use, we'd get tons of access denied 
        errors, as a number of core files will be locked */
        if ( loaded () ) {

            dprintf ( 
                D_FULLDEBUG, 
                "OwnerProfile::backup: Cannot backup the profile "
                "while it is in use.\n");

            __leave;

        }

        /* we can do the following as the Condor because our copy 
        mechanism is designed to preserve the directory's ACLs */
        priv = set_user_priv ();

        /* create a backup directory name based on the profile 
        directory (i.e. profile_cache_), user's login name and 
        the */ 
        length = strlen ( profile_cache_ ) 
            + strlen ( user_name_ ) + 1
            + 20; /* +1 for \ +20 for pid */
        profile_backup_ = new CHAR[length + 1];
        ASSERT ( profile_backup_ );
        
        sprintf ( 
            profile_backup_, 
            "%s\\%s-%d", 
            profile_cache_, 
            user_name_,
            GetCurrentProcessId () );

        /* finally, copy the user's profile to the back-up directory */
        backup_created = CondorCopyDirectory ( 
            profile_directory_, 
            profile_backup_ );

        dprintf ( 
            D_FULLDEBUG, 
            "OwnerProfile::backup: Copying '%s' to '%s' %s. "
            "(last-error = %u)\n", 
            profile_directory_,
            profile_backup_,
            backup_created ? "succeeded" : "failed", 
            backup_created ? 0 : GetLastError () );

        if ( !backup_created ) {
            __leave;
        }

        /* if we've arrived here, then all it well */
        ok = TRUE;

    }
    __finally {

        /* return to previous privilege level */
        if ( PRIV_UNKNOWN != priv ) {
            set_priv ( priv );
        }

    }

    return ok;

}
示例#20
0
void
VMGahpServer::killVM(void)
{
    if( m_vm_type.IsEmpty() || m_vmgahp_server.IsEmpty() ) {
        return;
    }

    if( m_workingdir.IsEmpty() ) {
        dprintf(D_ALWAYS, "VMGahpServer::killVM() : no workingdir\n");
        return;
    }

    MyString matchstring;
    if( (strcasecmp(m_vm_type.Value(), CONDOR_VM_UNIVERSE_XEN ) == MATCH) || (strcasecmp(m_vm_type.Value(), CONDOR_VM_UNIVERSE_KVM ) == MATCH) ) {
        if( create_name_for_VM(m_job_ad, matchstring) == false ) {
            dprintf(D_ALWAYS, "VMGahpServer::killVM() : "
                    "cannot make the name of VM\n");
            return;
        }
    } else {
        // Except Xen, we need the path of working directory of Starter
        // in order to destroy a VM.
        matchstring = m_workingdir;
    }

    if( matchstring.IsEmpty() ) {
        dprintf(D_ALWAYS, "VMGahpServer::killVM() : empty matchstring\n");
        return;
    }

    // vmgahp is daemonCore, so we need to add -f -t options of daemonCore.
    // Then, try to execute vmgahp with
    // vmtype <vmtype> match <string>"
    ArgList systemcmd;
    systemcmd.AppendArg(m_vmgahp_server);
    systemcmd.AppendArg("-f");
    if( m_include_gahp_log ) {
        systemcmd.AppendArg("-t");
    }
    systemcmd.AppendArg("-M");
    systemcmd.AppendArg(VMGAHP_KILL_MODE);
    systemcmd.AppendArg("vmtype");
    systemcmd.AppendArg(m_vm_type);
    systemcmd.AppendArg("match");
    systemcmd.AppendArg(matchstring);

#if !defined(WIN32)
    if( can_switch_ids() ) {
        MyString tmp_str;
        tmp_str.sprintf("%d", (int)get_condor_uid());
        SetEnv("VMGAHP_USER_UID", tmp_str.Value());
    }
    else if (Starter->condorPrivSepHelper() != NULL) {
        MyString tmp_str;
        tmp_str.sprintf("%d", (int)Starter->condorPrivSepHelper()->get_uid());
        SetEnv("VMGAHP_USER_UID", tmp_str.Value());
    }
#endif

    priv_state oldpriv;
    if( (strcasecmp(m_vm_type.Value(), CONDOR_VM_UNIVERSE_XEN ) == MATCH) || (strcasecmp(m_vm_type.Value(), CONDOR_VM_UNIVERSE_KVM ) == MATCH) ) {
        oldpriv = set_root_priv();
    } else {
        oldpriv = set_user_priv();
    }
    int ret = my_system(systemcmd);
    set_priv(oldpriv);

    if( ret == 0 ) {
        dprintf( D_FULLDEBUG, "VMGahpServer::killVM() is called with "
                 "'%s'\n", matchstring.Value());
    } else {
        dprintf( D_FULLDEBUG, "VMGahpServer::killVM() failed!\n");
    }

    return;
}
示例#21
0
int
ScriptProc::StartJob()
{
	dprintf(D_FULLDEBUG,"in ScriptProc::StartJob()\n");

	if ( !JobAd ) {
		dprintf ( D_ALWAYS, "No JobAd in ScriptProc::StartJob()!\n" );
		return 0;
	}

	MyString attr;

	attr = name;
	attr += ATTR_JOB_CMD;
	char* tmp = NULL;
	if( ! JobAd->LookupString( attr.Value(), &tmp ) ) {
		dprintf( D_ALWAYS, "%s not found in JobAd.  Aborting StartJob.\n", 
				 attr.Value() );
		return 0;
	}

		// // // // // // 
		// executable
		// // // // // // 

		// TODO: make it smart in cases we're not the gridshell and/or
		// didn't transfer files so that we don't prepend the wrong
		// path to the binary, and don't try to chmod it.
	MyString exe_path = Starter->GetWorkingDir();
	exe_path += DIR_DELIM_CHAR;
	exe_path += tmp;
	free( tmp ); 
	tmp = NULL;

	if( Starter->isGridshell() ) {
			// if we're a gridshell, chmod() the binary, since globus
			// probably transfered it for us and left it with bad
			// permissions...
		priv_state old_priv = set_user_priv();
		int retval = chmod( exe_path.Value(), 0755 );
		set_priv( old_priv );
		if( retval < 0 ) {
			dprintf( D_ALWAYS, "Failed to chmod %s: %s (errno %d)\n", 
					 exe_path.Value(), strerror(errno), errno );
			return 0;
		}
	} 


		// // // // // // 
		// Args
		// // // // // // 

	char *args1 = NULL;
	char *args2 = NULL;
	MyString args1_attr;
	MyString args2_attr;
	args1_attr = name;
	args1_attr += ATTR_JOB_ARGUMENTS1;
	args2_attr = name;
	args2_attr += ATTR_JOB_ARGUMENTS2;

	JobAd->LookupString(args1_attr.Value(), &args1);
	JobAd->LookupString(args2_attr.Value(), &args2);

	ArgList args;

		// Since we are adding to the argument list, we may need to deal
		// with platform-specific arg syntax in the user's args in order
		// to successfully merge them with the additional args.
	args.SetArgV1SyntaxToCurrentPlatform();

		// First, put "condor_<name>script" at the front of Args,
		// since that will become argv[0] of what we exec(), either
		// the wrapper or the actual job.
	MyString arg0;
	arg0 = "condor_";
	arg0 += name;
	arg0 += "script";
	args.AppendArg(arg0.Value());

	MyString args_error;
	if(args2 && *args2) {
		args.AppendArgsV2Raw(args2,&args_error);
	}
	else if(args1 && *args1) {
		args.AppendArgsV1Raw(args1,&args_error);
	}
	else {
		dprintf( D_FULLDEBUG, "neither %s nor %s could be found in JobAd\n",
				 args1_attr.Value(), args2_attr.Value());
	}

	free( args1 );
	free( args2 );

		// // // // // // 
		// Environment 
		// // // // // // 

	char *env1 = NULL;
	char *env2 = NULL;
	MyString env1_attr;
	MyString env2_attr;
	env1_attr = name;
	env1_attr += ATTR_JOB_ENVIRONMENT1;
	env2_attr = name;
	env2_attr += ATTR_JOB_ENVIRONMENT2;
	JobAd->LookupString( env1_attr.Value(), &env1 );
	JobAd->LookupString( env2_attr.Value(), &env2 );
			// TODO do we want to use the regular ATTR_JOB_ENVIRONMENT
			// if there's nothing specific for this script?

		// Now, instantiate an Env object so we can manipulate the
		// environment as needed.
	Env job_env;
	MyString env_errors;
	if( env2 && *env2 ) { 
		if( ! job_env.MergeFromV2Raw(env2,&env_errors) ) {
			dprintf( D_ALWAYS, "Invalid %s found in JobAd (%s).  "
					 "Aborting ScriptProc::StartJob.\n",
					 env2_attr.Value(),env_errors.Value() );  
			free( env1 );
			free( env2 );
			return 0;
		}
	}
	else if( env1 && *env1 ) { 
		if( ! job_env.MergeFromV1Raw(env1,&env_errors) ) {
			dprintf( D_ALWAYS, "Invalid %s found in JobAd (%s).  "
					 "Aborting ScriptProc::StartJob.\n",
					 env1_attr.Value(),env_errors.Value() );  
			free( env1 );
			free( env2 );
			return 0;
		}
	}

	free(env1);
	free(env2);

		// Now, let the starter publish any env vars it wants to add
	Starter->PublishToEnv( &job_env );


		// TODO: Deal with port regulation stuff?

		// Grab the full environment back out of the Env object 
	if(IsFulldebug(D_FULLDEBUG)) {
		MyString env_str;
		job_env.getDelimitedStringForDisplay(&env_str);
		dprintf(D_FULLDEBUG, "%sEnv = %s\n", name, env_str.Value() );
	}



		// // // // // // 
		// Standard Files
		// // // // // // 

		// TODO???


		// // // // // // 
		// Misc + Exec
		// // // // // // 

		// TODO?
		// Starter->jic->notifyJobPreSpawn( name );

		// compute job's renice value by evaluating the machine's
		// JOB_RENICE_INCREMENT in the context of the job ad...
		// TODO?
	int nice_inc = 10;


		// in the below dprintfs, we want to skip past argv[0], which
		// is sometimes condor_exec, in the Args string. 

	MyString args_string;
	args.GetArgsStringForDisplay(&args_string,1);
	dprintf( D_ALWAYS, "About to exec %s script: %s %s\n", 
			 name, exe_path.Value(), 
			 args_string.Value() );
		
	// If there is a requested coresize for this job, enforce it.
	// It is truncated because you can't put an unsigned integer
	// into a classad. I could rewrite condor's use of ATTR_CORE_SIZE to
	// be a float, but then when that attribute is read/written to the
	// job queue log by/or shared between versions of Condor which view the
	// type of that attribute differently, calamity would arise.
	int core_size_truncated;
	size_t core_size;
	size_t *core_size_ptr = NULL;
	if ( JobAd->LookupInteger(ATTR_CORE_SIZE, core_size_truncated) ) {
		core_size = (size_t)core_size_truncated;
		core_size_ptr = &core_size;
	}

	JobPid = daemonCore->Create_Process(exe_path.Value(), 
	                                    args,
	                                    PRIV_USER_FINAL,
	                                    1,
	                                    FALSE,
	                                    FALSE,
	                                    &job_env,
	                                    Starter->jic->jobIWD(),
	                                    NULL,
	                                    NULL,
	                                    NULL,
	                                    NULL,
	                                    nice_inc,
	                                    NULL,
	                                    DCJOBOPT_NO_ENV_INHERIT,
	                                    core_size_ptr );

	//NOTE: Create_Process() saves the errno for us if it is an
	//"interesting" error.
	char const *create_process_error = NULL;
	int create_process_errno = errno;
	if( JobPid == FALSE && errno ) {
		create_process_error = strerror( errno );
	}

	if( JobPid == FALSE ) {
		JobPid = -1;

		if( create_process_error ) {
			MyString err_msg = "Failed to execute '";
			err_msg += exe_path.Value();
			err_msg += "'";
			if(!args_string.IsEmpty()) {
				err_msg += " with arguments ";
				err_msg += args_string.Value();
			}
			err_msg += ": ";
			err_msg += create_process_error;
			Starter->jic->notifyStarterError( err_msg.Value(), true, CONDOR_HOLD_CODE_FailedToCreateProcess, create_process_errno );
		}

		EXCEPT( "Create_Process(%s,%s, ...) failed",
				exe_path.Value(), args_string.Value() );
		return 0;
	}

	dprintf( D_ALWAYS, "Create_Process succeeded, pid=%d\n", JobPid );

	job_start_time.getTime();

	return 1;
}
示例#22
0
void
main_init( int argc, char ** const argv )
{

	// Setup dprintf to display pid
	DebugId = display_dprintf_header;

	dprintf(D_FULLDEBUG,
		"Welcome to the all-singing, all dancing, \"amazing\" GridManager!\n");

	// handle specific command line args
	int i = 1;
	while ( i < argc ) {
		if ( argv[i][0] != '-' )
			usage( argv[0] );

		switch( argv[i][1] ) {
		case 'A':
			if ( argc <= i + 1 )
				usage( argv[0] );
			if (SelectionValue) {
				free(SelectionValue);
			}
			SelectionValue = strdup( argv[i + 1] );
			i++;
			break;
		case 'C':
			if ( argc <= i + 1 )
				usage( argv[0] );
			if (ScheddJobConstraint) {
				free(ScheddJobConstraint);
			}
			ScheddJobConstraint = strdup( argv[i + 1] );
			i++;
			break;
		case 's':
			// don't check parent for schedd addr. use this one instead
			if ( argc <= i + 1 )
				usage( argv[0] );
			if (ScheddAddr) {
				free(ScheddAddr);
			}
			ScheddAddr = strdup( argv[i + 1] );
			i++;
			break;
		case 'S':
			if ( argc <= i + 1 )
				usage( argv[0] );
			if (GridmanagerScratchDir) {
				free(GridmanagerScratchDir);
			}
			GridmanagerScratchDir = strdup( argv[i + 1] );
			i++;
			break;
		case 'o':
			// We handled this in main_pre_dc_init(), so just verify that
			// it has an argument.
			if ( argc <= i + 1 )
				usage( argv[0] );
			i++;
			break;
		default:
			usage( argv[0] );
			break;
		}

		i++;
	}

	// Tell DaemonCore that we want to spend all our time as the job owner,
	// not as user condor.
	daemonCore->Register_Priv_State( PRIV_USER );
	set_user_priv();

	Init();
	Register();
}
示例#23
0
/* returns TRUE if the user profile directory was restored; 
otherwise, FALSE.*/
BOOL
OwnerProfile::restore () {
    
    dprintf ( D_FULLDEBUG, "In OwnerProfile::restore()\n" );

    priv_state  priv            = PRIV_UNKNOWN;
    int         length          = 0;
    HANDLE      directory       = NULL;
    BOOL        profile_deleted = FALSE,
                backup_restored = FALSE,
                backup_deleted  = FALSE,
                ok              = FALSE;

    __try {

        /* can't restore while the profile is loaded */
        if ( loaded () ) {

            dprintf ( 
                D_FULLDEBUG, 
                "OwnerProfile::restore: Cannot restore the profile "
                "while it is in use.\n");

            __leave;

        }

        /* we can do the following as the Condor because our copy 
        mechanism is designed to preserve the directory's ACLs */
        priv = set_user_priv ();

        /* use the directory created by the backup() call to 
        roll-back the changes made during the job execution */        
        profile_deleted = 
            CondorRemoveDirectory ( profile_directory_ );
        
        dprintf ( 
            D_FULLDEBUG, 
            "OwnerProfile::restore: Deleting the "
            "modified profile %s. (last-error = %u)\n", 
            profile_deleted ? "succeeded" : "failed", 
            profile_deleted ? 0 : GetLastError () );

        if ( !profile_deleted ) {
            __leave;
        }

        /* having removed the modified profile directory, 
        restore the back-up we made of the profile template */
        backup_restored = CondorCopyDirectory ( 
            profile_backup_,
            profile_directory_ );

        dprintf ( 
            D_FULLDEBUG, 
            "OwnerProfile::restore: Deleting the "
            "profile backup %s. (last-error = %u)\n", 
            backup_restored ? "succeeded" : "failed", 
            backup_restored ? 0 : GetLastError () );

        if ( !backup_restored ) {
            __leave;
        }

        /* finally, remove the back-up directory: this ensures
        that each new job receives a fresh copy of the template */        
        backup_deleted = 
            CondorRemoveDirectory ( profile_backup_ );

        dprintf ( 
            D_FULLDEBUG, 
            "OwnerProfile::restore: Deleting the "
            "back-up directory %s. (last-error = %u)\n", 
            backup_deleted ? "succeeded" : "failed", 
            backup_deleted ? 0 : GetLastError () );

        if ( !backup_deleted ) {
            __leave;
        }

        /* if we've arrived here, then all it well */
        ok = TRUE;

    }
    __finally {

        /* return to previous privilege level */
        if ( PRIV_UNKNOWN != priv ) {
            set_priv ( priv );
        }

        /* only if we were successful can we delete the 
        name this session's of profile backup directory */
        if ( ok ) { 
            delete [] profile_backup_;
        }

    }

    return ok;

}
示例#24
0
void
BaseShadow::baseInit( ClassAd *job_ad, const char* schedd_addr, const char *xfer_queue_contact_info )
{
	int pending = FALSE;

	if( ! job_ad ) {
		EXCEPT("baseInit() called with NULL job_ad!");
	}
	jobAd = job_ad;

	if (sendUpdatesToSchedd && ! is_valid_sinful(schedd_addr)) {
		EXCEPT("schedd_addr not specified with valid address");
	}
	scheddAddr = sendUpdatesToSchedd ? strdup( schedd_addr ) : strdup("noschedd");

	m_xfer_queue_contact_info = xfer_queue_contact_info;

	if ( !jobAd->LookupString(ATTR_OWNER, owner)) {
		EXCEPT("Job ad doesn't contain an %s attribute.", ATTR_OWNER);
	}

	if( !jobAd->LookupInteger(ATTR_CLUSTER_ID, cluster)) {
		EXCEPT("Job ad doesn't contain a %s attribute.", ATTR_CLUSTER_ID);
	}

	if( !jobAd->LookupInteger(ATTR_PROC_ID, proc)) {
		EXCEPT("Job ad doesn't contain a %s attribute.", ATTR_PROC_ID);
	}


		// Grab the GlobalJobId if we've got it.
	if( ! jobAd->LookupString(ATTR_GLOBAL_JOB_ID, &gjid) ) {
		gjid = NULL;
	}

	// grab the NT domain if we've got it
	jobAd->LookupString(ATTR_NT_DOMAIN, domain);
	if ( !jobAd->LookupString(ATTR_JOB_IWD, iwd)) {
		EXCEPT("Job ad doesn't contain an %s attribute.", ATTR_JOB_IWD);
	}

	if( !jobAd->LookupFloat(ATTR_BYTES_SENT, prev_run_bytes_sent) ) {
		prev_run_bytes_sent = 0;
	}
	if( !jobAd->LookupFloat(ATTR_BYTES_RECVD, prev_run_bytes_recvd) ) {
		prev_run_bytes_recvd = 0;
	}

		// construct the core file name we'd get if we had one.
	MyString tmp_name = iwd;
	tmp_name += DIR_DELIM_CHAR;
	tmp_name += "core.";
	tmp_name += cluster;
	tmp_name += '.';
	tmp_name += proc;
	core_file_name = strdup( tmp_name.Value() );

        // put the shadow's sinful string into the jobAd.  Helpful for
        // the mpi shadow, at least...and a good idea in general.
	MyString tmp_addr = ATTR_MY_ADDRESS;
	tmp_addr += "=\"";
	tmp_addr += daemonCore->InfoCommandSinfulString();
	tmp_addr += '"';
    if ( !jobAd->Insert( tmp_addr.Value() )) {
        EXCEPT( "Failed to insert %s!", ATTR_MY_ADDRESS );
    }

	DebugId = display_dprintf_header;
	
	config();

		// Make sure we've got enough swap space to run
	checkSwap();

	// handle system calls with Owner's privilege
// XXX this belong here?  We'll see...
	// Calling init_user_ids() while in user priv causes badness.
	// Make sure we're in another priv state.
	set_condor_priv();
	if ( !init_user_ids(owner.Value(), domain.Value())) {
		dprintf(D_ALWAYS, "init_user_ids() failed as user %s\n",owner.Value() );
		// uids.C will EXCEPT when we set_user_priv() now
		// so there's not much we can do at this point
		
#if ! defined(WIN32)
		if ( param_boolean( "SHADOW_RUN_UNKNOWN_USER_JOBS", false ) )
		{
			dprintf(D_ALWAYS, "trying init_user_ids() as user nobody\n" );
			
			owner="nobody";
			domain=NULL;
			if (!init_user_ids(owner.Value(), domain.Value()))
			{
				dprintf(D_ALWAYS, "init_user_ids() failed!\n");
			}
			else
			{
				jobAd->Assign( ATTR_JOB_RUNAS_OWNER, "FALSE" );
				m_RunAsNobody=true;
				dprintf(D_ALWAYS, "init_user_ids() now running as user nobody\n");
			}
		}
#endif

	}
	set_user_priv();
	daemonCore->Register_Priv_State( PRIV_USER );

	dumpClassad( "BaseShadow::baseInit()", this->jobAd, D_JOB );

		// initialize the UserPolicy object
	shadow_user_policy.init( jobAd, this );

		// setup an object to keep our job ad updated to the schedd's
		// permanent job queue.  this clears all the dirty bits on our
		// copy of the classad, so anything we touch after this will
		// be updated to the schedd when appropriate.

		// Unless we got a command line arg asking us not to
	if (sendUpdatesToSchedd) {
		// the usual case
		job_updater = new QmgrJobUpdater( jobAd, scheddAddr, CondorVersion() );
	} else {
		job_updater = new NullQmgrJobUpdater( jobAd, scheddAddr, CondorVersion() );
	}

		// init user log; hold on failure
		// NOTE: job_updater must be initialized _before_ initUserLog(),
		// in order to handle the case of the job going on hold as a
		// result of failure in initUserLog().
	initUserLog();

		// change directory; hold on failure
	if ( cdToIwd() == -1 ) {
		EXCEPT("Could not cd to initial working directory");
	}

		// check to see if this invocation of the shadow is just to write
		// a terminate event and exit since this job had been recorded as
		// pending termination, but somehow the job didn't leave the queue
		// and the schedd is trying to restart it again..
	if( jobAd->LookupInteger(ATTR_TERMINATION_PENDING, pending)) {
		if (pending == TRUE) {
			// If the classad of this job "thinks" that this job should be
			// finished already, let's enact that belief.
			// This function does not return.
			this->terminateJob(US_TERMINATE_PENDING);
		}
	}

		// If we need to claim the startd before activating the claim
	int wantClaiming = 0;
	jobAd->LookupBool(ATTR_CLAIM_STARTD, wantClaiming);
	if (wantClaiming) {
		MyString startdSinful;
		MyString claimid;

			// Pull startd addr and claimid out of the jobad
		jobAd->LookupString(ATTR_STARTD_IP_ADDR, startdSinful);
		jobAd->LookupString(ATTR_CLAIM_ID, claimid);

		dprintf(D_ALWAYS, "%s is true, trying to claim startd %s\n", ATTR_CLAIM_STARTD, startdSinful.Value());

		classy_counted_ptr<DCStartd> startd = new DCStartd("description", NULL, startdSinful.Value(), claimid.Value());
	
		classy_counted_ptr<DCMsgCallback> cb = 
			new DCMsgCallback((DCMsgCallback::CppFunction)&BaseShadow::startdClaimedCB,
			this, jobAd);
																 
			// this can't fail, will always call the callback
		startd->asyncRequestOpportunisticClaim(jobAd, 
											   "description", 
											   daemonCore->InfoCommandSinfulString(), 
											   1200 /*alive interval*/, 
											   20 /* net timeout*/, 
											   100 /*total timeout*/, 
											   cb);
	}
}
示例#25
0
/**
 * merge_stderr_with_stdout is intended for clients of this function
 * that wish to have the old behavior, where stderr and stdout were
 * both added to the same StringList.
 */
int systemCommand( ArgList &args, priv_state priv, StringList *cmd_out, StringList * cmd_in,
		   StringList *cmd_err, bool merge_stderr_with_stdout)
{
	int result = 0;
	FILE *fp = NULL;
	FILE * fp_for_stdin = NULL;
	FILE * childerr = NULL;
	MyString line;
	char buff[1024];
	StringList *my_cmd_out = cmd_out;

	priv_state prev = PRIV_UNKNOWN;

	int stdout_pipes[2];
	int stdin_pipes[2];
	int pid;
	bool use_privsep = false;
	switch ( priv ) {
	case PRIV_ROOT:
		prev = set_root_priv();
		break;
	case PRIV_USER:
	case PRIV_USER_FINAL:
		prev = set_user_priv();
#if !defined(WIN32)
		if ( privsep_enabled() && (job_user_uid != get_condor_uid()) ) {
			use_privsep = true;
		}
#endif
		break;
	default:
		// Stay as Condor user
		;
	}
#if defined(WIN32)
	if((cmd_in != NULL) || (cmd_err != NULL))
	  {
	    vmprintf(D_ALWAYS, "Invalid use of systemCommand() in Windows.\n");
	    return -1;
	  }
	//if ( use_privsep ) {
	//	fp = privsep_popen(args, "r", want_stderr, job_user_uid);
	//}
	//else {
	fp = my_popen( args, "r", merge_stderr_with_stdout );
	//}
#else
	// The old way of doing things (and the Win32 way of doing
	//	things)
	// fp = my_popen( args, "r", want_stderr );
	if((cmd_err != NULL) && merge_stderr_with_stdout)
	  {
	    vmprintf(D_ALWAYS, "Invalid use of systemCommand().\n");
	    return -1;
	  }

	PrivSepForkExec psforkexec;
	char ** args_array = args.GetStringArray();
	int error_pipe[2];
		// AIX 5.2, Solaris 5.9, HPUX 11 don't have AF_LOCAL

	if(pipe(stdin_pipes) < 0)
	  {
	    vmprintf(D_ALWAYS, "Error creating pipe: %s\n", strerror(errno));
		deleteStringArray( args_array );
	    return -1;
	  }
	if(pipe(stdout_pipes) < 0)
	  {
	    vmprintf(D_ALWAYS, "Error creating pipe: %s\n", strerror(errno));
	    close(stdin_pipes[0]);
	    close(stdin_pipes[1]);
		deleteStringArray( args_array );
	    return -1;
	  }

	if ( use_privsep ) {
	  if(!psforkexec.init())
	    {
	      vmprintf(D_ALWAYS,
		       "my_popenv failure on %s\n",
		       args_array[0]);
	      close(stdin_pipes[0]);
	      close(stdin_pipes[1]);
	      close(stdout_pipes[0]);
	      close(stdout_pipes[1]);
		  deleteStringArray( args_array );
	      return -1;
	    }
	}

	if(cmd_err != NULL)
	  {
	    if(pipe(error_pipe) < 0)
	      {
		vmprintf(D_ALWAYS, "Could not open pipe for error output: %s\n", strerror(errno));
		close(stdin_pipes[0]);
		close(stdin_pipes[1]);
		close(stdout_pipes[0]);
		close(stdout_pipes[1]);
		deleteStringArray( args_array );
		return -1;
	      }
	  }
	// Now fork and do what my_popen used to do
	pid = fork();
	if(pid < 0)
	  {
	    vmprintf(D_ALWAYS, "Error forking: %s\n", strerror(errno));
		close(stdin_pipes[0]);
		close(stdin_pipes[1]);
		close(stdout_pipes[0]);
		close(stdout_pipes[1]);
		if(cmd_err != NULL) {
			close(error_pipe[0]);
			close(error_pipe[1]);
		}
		deleteStringArray( args_array );
	    return -1;
	  }
	if(pid == 0)
	  {
	    close(stdout_pipes[0]);
	    close(stdin_pipes[1]);
	    dup2(stdout_pipes[1], STDOUT_FILENO);
	    dup2(stdin_pipes[0], STDIN_FILENO);

	    if(merge_stderr_with_stdout) dup2(stdout_pipes[1], STDERR_FILENO);
	    else if(cmd_err != NULL) 
	      {
		close(error_pipe[0]);
		dup2(error_pipe[1], STDERR_FILENO);
	      }


	    uid_t euid = geteuid();
	    gid_t egid = getegid();
	    seteuid( 0 );
	    setgroups( 1, &egid );
	    setgid( egid );
	    setuid( euid );
	    
	    install_sig_handler(SIGPIPE, SIG_DFL);
	    sigset_t sigs;
	    sigfillset(&sigs);
	    sigprocmask(SIG_UNBLOCK, &sigs, NULL);


	    MyString cmd = args_array[0];

	    if ( use_privsep ) {
	    
	      ArgList al;
	      psforkexec.in_child(cmd, al);
          deleteStringArray( args_array );
	      args_array = al.GetStringArray();
	    }


	    execvp(cmd.Value(), args_array);
	    vmprintf(D_ALWAYS, "Could not execute %s: %s\n", args_array[0], strerror(errno));
	    exit(-1);
	  }
	close(stdin_pipes[0]);
	close(stdout_pipes[1]);
	fp_for_stdin = fdopen(stdin_pipes[1], "w");
	fp = fdopen(stdout_pipes[0], "r");
	if(cmd_err != NULL)
	  {
	    close(error_pipe[1]);
	    childerr = fdopen(error_pipe[0],"r");
	    if(childerr == 0)
	      {
		vmprintf(D_ALWAYS, "Could not open pipe for reading child error output: %s\n", strerror(errno));
		close(error_pipe[0]);
		close(stdin_pipes[1]);
		close(stdout_pipes[0]);
	    fclose(fp);
		fclose(fp_for_stdin);
		deleteStringArray( args_array );
		return -1;
	      }
	  }

	if ( use_privsep ) {
	  FILE* _fp = psforkexec.parent_begin();
	  privsep_exec_set_uid(_fp, job_user_uid);
	  privsep_exec_set_path(_fp, args_array[0]);
	  privsep_exec_set_args(_fp, args);
	  Env env;
	  env.MergeFrom(environ);
	  privsep_exec_set_env(_fp, env);
	  privsep_exec_set_iwd(_fp, ".");

	  privsep_exec_set_inherit_fd(_fp, 1);
	  privsep_exec_set_inherit_fd(_fp, 2);
	  privsep_exec_set_inherit_fd(_fp, 0);
	
	  if (!psforkexec.parent_end()) {
	    vmprintf(D_ALWAYS,
		     "my_popenv failure on %s\n",
		     args_array[0]);
	    fclose(fp);
		fclose(fp_for_stdin);
		if (childerr) {
			fclose(childerr);
		}
		deleteStringArray( args_array );
	    return -1;
	  }
	}

	deleteStringArray( args_array );
#endif
	set_priv( prev );
	if ( fp == NULL ) {
		MyString args_string;
		args.GetArgsStringForDisplay( &args_string, 0 );
		vmprintf( D_ALWAYS, "Failed to execute command: %s\n",
				  args_string.Value() );
		if (childerr)
			fclose(childerr);
		return -1;
	}

	if(cmd_in != NULL) {
	  cmd_in->rewind();
	  char * tmp;
	  while((tmp = cmd_in->next()) != NULL)
	    {
	      fprintf(fp_for_stdin, "%s\n", tmp);
	      fflush(fp_for_stdin);
	    }
	}
	if (fp_for_stdin) {
	  // So that we will not be waiting for output while the
	  // script waits for stdin to be closed.
	  fclose(fp_for_stdin);
	}

	if ( my_cmd_out == NULL ) {
		my_cmd_out = new StringList();
	}

	while ( fgets( buff, sizeof(buff), fp ) != NULL ) {
		line += buff;
		if ( line.chomp() ) {
			my_cmd_out->append( line.Value() );
			line = "";
		}
	}

	if(cmd_err != NULL)
	  {
	    while(fgets(buff, sizeof(buff), childerr) != NULL)
	      {
		line += buff;
		if(line.chomp())
		  {
		    cmd_err->append(line.Value());
		    line = "";
		  }
	      }
	    fclose(childerr);
	  }
#if defined(WIN32)
	result = my_pclose( fp );
#else
	// Why close first?  Just in case the child process is waiting
	// on a read, and we have nothing more to send it.  It will
	// now receive a SIGPIPE.
	fclose(fp);
	if(waitpid(pid, &result, 0) < 0)
	  {
	    vmprintf(D_ALWAYS, "Unable to wait: %s\n", strerror(errno));
		if ( cmd_out == NULL ) {
			delete my_cmd_out;
		}
	   
	    return -1;
	  }
#endif
	if( result != 0 ) {
		MyString args_string;
		args.GetArgsStringForDisplay(&args_string,0);
		vmprintf(D_ALWAYS,
		         "Command returned non-zero: %s\n",
		         args_string.Value());
		my_cmd_out->rewind();
		const char *next_line;
		while ( (next_line = my_cmd_out->next()) ) {
			vmprintf( D_ALWAYS, "  %s\n", next_line );
		}
	}
	if ( cmd_out == NULL ) {
		delete my_cmd_out;
	}
	return result;
}