Beispiel #1
0
bool
XInterface::TryUser(const char *user)
{
	static char env[1024];
	static bool need_uninit = false;
	passwd *passwd_entry;

	passwd_entry = getpwnam(user);
	if(passwd_entry == NULL) {
		// We couldn't find the current user in the passwd file?
		dprintf( D_FULLDEBUG, 
		 	"Current user cannot be found in passwd file.\n" );
		return false;
	} else {
		sprintf(env, "XAUTHORITY=%s/.Xauthority", passwd_entry->pw_dir);
		if(putenv(env) != 0) {
			EXCEPT("Putenv failed!.");
		}
	}

	if ( need_uninit ) {
		uninit_user_ids();
		need_uninit = false;
	} 

		// passing "root" to init_user_ids is fatal
	if (strcmp(user, "root") == 0) {
		set_root_priv();
	} else {
		init_user_ids( user, NULL );
		set_user_priv();
		need_uninit = true;
	}

	dprintf( D_FULLDEBUG, "Using %s's .Xauthority: \n", passwd_entry->pw_name );
	return true;
}
Beispiel #2
0
int
JobRouterHookMgr::hookJobCleanup(RoutedJob* r_job)
{
	ClassAd temp_ad;
	char* hook_cleanup = getHookPath(HOOK_JOB_CLEANUP, r_job->src_ad);

	if (NULL == hook_cleanup)
	{
		// hook not defined
		dprintf(D_FULLDEBUG, "HOOK_JOB_CLEANUP not configured.\n");
		return 0;
	}

	if (0 >= r_job->dest_ad.size())
	{
		return 0;
	}

	// Verify the cleanup hook hasn't already been spawned and that
	// we're not waiting for it to return.
	std::string key = r_job->dest_key;
	if (true == JobRouterHookMgr::checkHookKnown(key.c_str(), HOOK_JOB_CLEANUP))
	{
		dprintf(D_FULLDEBUG, "JobRouterHookMgr::hookJobCleanup "
			"retried while still waiting for cleanup hook to "
			"return for job with key %s - ignoring\n", key.c_str());
		return 1;
	}


	temp_ad = r_job->dest_ad;

	MyString hook_stdin;
	temp_ad.sPrint(hook_stdin);

	CleanupClient* cleanup_client = new CleanupClient(hook_cleanup, r_job);
	if (NULL == cleanup_client)
	{
		dprintf(D_ALWAYS|D_FAILURE, 
			"ERROR in JobRouterHookMgr::hookJobCleanup: "
			"failed to create status update client\n");
		return -1;
	}

	set_user_from_ad(r_job->src_ad);
	if (0 == spawn(cleanup_client, NULL, &hook_stdin, PRIV_USER_FINAL))
	{
		dprintf(D_ALWAYS|D_FAILURE,
				"ERROR in JobRouterHookMgr::JobCleanup: "
				"failed to spawn HOOK_JOB_CLEANUP (%s)\n", hook_cleanup);
		delete cleanup_client;
		return -1;

	}
	uninit_user_ids();

	// Add our info to the list of hooks currently running for this job.
	if (false == JobRouterHookMgr::addKnownHook(key.c_str(), HOOK_JOB_CLEANUP))
	{
		dprintf(D_ALWAYS, "ERROR in JobRouterHookMgr::hookJobCleanup: "
				"failed to add HOOK_JOB_CLEANUP to list of "
				"hooks running for job key %s\n", key.c_str());
	}

	dprintf(D_FULLDEBUG, "HOOK_JOB_CLEANUP (%s) invoked.\n",
			hook_cleanup);
	return 1;
}
Beispiel #3
0
int
JobRouterHookMgr::hookJobExit(RoutedJob* r_job)
{
	ClassAd temp_ad;
	char* hook_job_exit = getHookPath(HOOK_JOB_EXIT, r_job->src_ad);

	if (NULL == hook_job_exit)
	{
		// hook not defined
		dprintf(D_FULLDEBUG, "HOOK_JOB_EXIT not configured.\n");
		return 0;
	}

	// Verify the exit hook hasn't already been spawned and that
	// we're not waiting for it to return.
	std::string key = r_job->dest_key;
	if (true == JobRouterHookMgr::checkHookKnown(key.c_str(),HOOK_JOB_EXIT))
	{
		dprintf(D_FULLDEBUG, "JobRouterHookMgr::hookJobExit "
			"retried while still waiting for exit hook to return "
			"for job with key %s - ignoring\n", key.c_str());
		return 1;
	}

	temp_ad = r_job->src_ad;

	MyString hook_stdin;
	temp_ad.sPrint(hook_stdin);
	hook_stdin += "\n------\n";

	temp_ad = r_job->dest_ad;
	temp_ad.sPrint(hook_stdin);

	ExitClient *exit_client = new ExitClient(hook_job_exit, r_job);
	if (NULL == exit_client)
	{
		dprintf(D_ALWAYS|D_FAILURE, 
			"ERROR in JobRouterHookMgr::hookJobExit: "
			"failed to create exit client\n");
		return -1;
	}

	set_user_from_ad(r_job->src_ad);
	if (0 == spawn(exit_client, NULL, &hook_stdin, PRIV_USER_FINAL))
	{
		dprintf(D_ALWAYS|D_FAILURE,
				"ERROR in JobRouterHookMgr::hookJobExit: "
				"failed to spawn HOOK_JOB_EXIT (%s)\n", hook_job_exit);
		delete exit_client;
		return -1;

	}
	uninit_user_ids();
	
	// Add our info to the list of hooks currently running for this job.
	if (false == JobRouterHookMgr::addKnownHook(key.c_str(), HOOK_JOB_EXIT))
	{
		dprintf(D_ALWAYS, "ERROR in JobRouterHookMgr::hookJobExit: "
				"failed to add HOOK_JOB_EXIT to list of "
				"hooks running for job key %s\n", key.c_str());
	}

	dprintf(D_FULLDEBUG, "HOOK_JOB_EXIT (%s) invoked.\n", hook_job_exit);
	return 1;
}
Beispiel #4
0
int
JobRouterHookMgr::hookTranslateJob(RoutedJob* r_job, std::string &route_info)
{
	ClassAd temp_ad;
	char* hook_translate = getHookPath(HOOK_TRANSLATE_JOB, r_job->src_ad);

	if (NULL == hook_translate)
	{
		// hook not defined, which is ok
		dprintf(D_FULLDEBUG, "HOOK_TRANSLATE_JOB not configured.\n");
		return 0;
	}

	// Verify the translate hook hasn't already been spawned and that
	// we're not waiting for it to return.
	std::string key = r_job->src_key;
	if (true == JobRouterHookMgr::checkHookKnown(key.c_str(), HOOK_TRANSLATE_JOB))
	{
		dprintf(D_FULLDEBUG, "JobRouterHookMgr::hookTranslateJob "
			"retried while still waiting for translate hook to "
			"return for job with key %s - ignoring\n", key.c_str());
		return 1;
	}

	temp_ad = r_job->src_ad;

	MyString hook_stdin;
	hook_stdin = route_info.c_str();
	hook_stdin += "\n------\n";
	temp_ad.sPrint(hook_stdin);

	TranslateClient* translate_client = new TranslateClient(hook_translate, r_job);
	if (NULL == translate_client)
	{
		dprintf(D_ALWAYS|D_FAILURE, 
			"ERROR in JobRouterHookMgr::hookTranslateJob: "
			"failed to create translation client\n");
		return -1;
	}

	set_user_from_ad(r_job->src_ad);
	if (0 == spawn(translate_client, NULL, &hook_stdin, PRIV_USER_FINAL))
	{
		dprintf(D_ALWAYS|D_FAILURE,
				"ERROR in JobRouterHookMgr::hookTranslateJob: "
				"failed to spawn HOOK_TRANSLATE_JOB (%s)\n", hook_translate);
		delete translate_client;
		return -1;
	}
	uninit_user_ids();
	
	// Add our info to the list of hooks currently running for this job.
	if (false == JobRouterHookMgr::addKnownHook(key.c_str(), HOOK_TRANSLATE_JOB))
	{
		dprintf(D_ALWAYS, "ERROR in JobRouterHookMgr::hookTranslateJob: "
				"failed to add HOOK_TRANSLATE_JOB to list of "
				"hooks running for job key %s\n", key.c_str());
	}

	dprintf(D_FULLDEBUG, "HOOK_TRANSLATE_JOB (%s) invoked.\n",
			hook_translate);
	return 1;
}
Beispiel #5
0
GridUniverseLogic::gman_node_t *
GridUniverseLogic::StartOrFindGManager(const char* owner, const char* domain,
	   	const char* attr_value, const char* attr_name, int cluster, int proc)
{
	gman_node_t* gman_node;
	int pid;

		// If attr_value is an empty string, convert to NULL since code
		// after this point expects that.
	if ( attr_value && strlen(attr_value)==0 ) {
		attr_value = NULL;
		attr_name = NULL;
	}

	if ( (gman_node=lookupGmanByOwner(owner, attr_value, cluster, proc)) ) {
		// found it
		return gman_node;
	}

	// not found.  fire one up!  we want to run the GManager as the user.

	// but first, make certain we are not shutting down...
	if (!gman_pid_table) {
		// destructor has already been called; we are probably
		// closing down.
		return NULL;
	}


#ifndef WIN32
	if (owner && strcasecmp(owner, "root") == 0 ) {
		dprintf(D_ALWAYS, "Tried to start condor_gmanager as root.\n");
		return NULL;
	}
#endif

	dprintf( D_FULLDEBUG, "Starting condor_gmanager for owner %s (%d.%d)\n",
			owner, cluster, proc);

	char *gman_binary;
	gman_binary = param("GRIDMANAGER");
	if ( !gman_binary ) {
		dprintf(D_ALWAYS,"ERROR - GRIDMANAGER not defined in config file\n");
		return NULL;
	}

	ArgList args;
	MyString error_msg;

	args.AppendArg("condor_gridmanager");
	args.AppendArg("-f");

	char *gman_args = param("GRIDMANAGER_ARGS");

	if(!args.AppendArgsV1RawOrV2Quoted(gman_args,&error_msg)) {
		dprintf( D_ALWAYS, "ERROR: failed to parse gridmanager args: %s\n",
				 error_msg.Value());
		free(gman_binary);
		free(gman_args);
		return NULL;
	}
	free(gman_args);

	// build a constraint
	if ( !owner ) {
		dprintf(D_ALWAYS,"ERROR - missing owner field\n");
		free(gman_binary);
		return NULL;
	}
	MyString constraint;
	if ( !attr_name  ) {
		constraint.formatstr("(%s=?=\"%s\"&&%s==%d)",
						   ATTR_OWNER,owner,
						   ATTR_JOB_UNIVERSE,CONDOR_UNIVERSE_GRID);
	} else {
		constraint.formatstr("(%s=?=\"%s\"&&%s=?=\"%s\"&&%s==%d)",
						   ATTR_OWNER,owner,
						   attr_name,attr_value,
						   ATTR_JOB_UNIVERSE,CONDOR_UNIVERSE_GRID);

		args.AppendArg("-A");
		args.AppendArg(attr_value);
	}
	args.AppendArg("-C");
	args.AppendArg(constraint.Value());

	MyString full_owner_name(owner);
	if ( domain && *domain ) {
		full_owner_name.formatstr_cat( "@%s", domain );
	}
	args.AppendArg("-o");
	args.AppendArg(full_owner_name.Value());

	if (!init_user_ids(owner, domain)) {
		dprintf(D_ALWAYS,"ERROR - init_user_ids() failed in GRIDMANAGER\n");
		free(gman_binary);
		return NULL;
	}

	static bool first_time_through = true;
	if ( first_time_through ) {
		// Note: Because first_time_through is static, this block runs only 
		// once per schedd invocation.
		first_time_through = false;

		// Clean up any old / abandoned scratch dirs.
		dprintf(D_FULLDEBUG,"Checking for old gridmanager scratch dirs\n");
		char *prefix = temp_dir_path();
		ASSERT(prefix);
		Directory tmp( prefix, PRIV_USER );
		const char *f;
		char const *dot;
		int fname_pid;
		int mypid = daemonCore->getpid();
		int scratch_pre_len = strlen(scratch_prefix);
		while ( (f=tmp.Next()) ) {
				// skip regular files -- we only need to inspect subdirs
			if ( !tmp.IsDirectory() ) {
				continue;
			}
				// skip if it does not start with our prefix
			if ( strncmp(scratch_prefix,f,scratch_pre_len) ) {
				continue;
			}
				// skip if does not end w/ a pid
			dot = strrchr(f,'.');
			if ( !dot ) {
				continue;
			}
				// skip if this pid is still alive and not ours
			dot++;	// skip over period
			fname_pid = atoi(dot);
			if ( fname_pid != mypid && daemonCore->Is_Pid_Alive(fname_pid) ) {
					continue;
			}
				// if we made it here, blow away this subdir
			if ( tmp.Remove_Current_File() ) {
				dprintf(D_ALWAYS,"Removed old scratch dir %s\n",
				tmp.GetFullPath());
			}
		}	// end of while for cleanup of old scratch dirs

		dprintf(D_FULLDEBUG,"Done checking for old scratch dirs\n");			

		if (prefix != NULL) {
			free(prefix);
			prefix = NULL;
		}

	}	// end of once-per-schedd invocation block

	// Create a temp dir for the gridmanager and append proper
	// command-line arguments to tell where it is.
	bool failed = false;
	gman_node = new gman_node_t;
	char *finalpath = scratchFilePath(gman_node);
	priv_state saved_priv = set_user_priv();
	if ( (mkdir(finalpath,0700)) < 0 ) {
		// mkdir failed.  
		dprintf(D_ALWAYS,"ERROR - mkdir(%s,0700) failed in GRIDMANAGER, errno=%d (%s)\n",
				finalpath, errno, strerror(errno));
		failed = true;
	}
	set_priv(saved_priv);
	uninit_user_ids();
	args.AppendArg("-S");	// -S = "ScratchDir" argument
	args.AppendArg(finalpath);
	delete [] finalpath;
	if ( failed ) {
		// we already did dprintf reason to the log...
		free(gman_binary);
		delete gman_node;
		return NULL;
	}

	if(IsFulldebug(D_FULLDEBUG)) {
		MyString args_string;
		args.GetArgsStringForDisplay(&args_string);
		dprintf(D_FULLDEBUG,"Really Execing %s\n",args_string.Value());
	}

	pid = daemonCore->Create_Process( 
		gman_binary,			// Program to exec
		args,					// Command-line args
		PRIV_ROOT,				// Run as root, so it can switch to
		                        //   PRIV_CONDOR
		rid						// Reaper ID
		);

	free(gman_binary);

	if ( pid <= 0 ) {
		dprintf ( D_ALWAYS, "StartOrFindGManager: Create_Process problems!\n" );
		if (gman_node) delete gman_node;
		return NULL;
	}

	// If we made it here, we happily started up a new gridmanager process

	dprintf( D_ALWAYS, "Started condor_gmanager for owner %s pid=%d\n",
			owner,pid);

	// Make a new gman_node entry for our hashtable & insert it
	if ( !gman_node ) {
		gman_node = new gman_node_t;
	}
	gman_node->pid = pid;
	gman_node->owner[0] = '\0';
	gman_node->domain[0] = '\0';
	if ( owner ) {
		strcpy(gman_node->owner,owner);
	}
	if ( domain ) {
		strcpy(gman_node->domain,domain);
	}
	MyString owner_key(owner);
	if(attr_value){
		owner_key += attr_value;
	}
	if (cluster) {
		owner_key.formatstr_cat( "-%d.%d", cluster, proc );
	}

	ASSERT( gman_pid_table->insert(owner_key,gman_node) == 0 );

	// start timer to signal gridmanager if we haven't already
	if ( gman_node->add_timer_id == -1 ) {  // == -1 means no timer set
		gman_node->add_timer_id = daemonCore->Register_Timer(job_added_delay,
			GridUniverseLogic::SendAddSignal,
			"GridUniverseLogic::SendAddSignal");
		daemonCore->Register_DataPtr(gman_node);
	}

	// All done
	return gman_node;
}
Beispiel #6
0
int 
GridUniverseLogic::GManagerReaper(Service *,int pid, int exit_status)
{
	gman_node_t* gman_node = NULL;
	MyString owner;

	// Iterate through our table to find the node w/ this pid
	// Someday we should perhaps also hash on the pid, but we
	// don't expect gridmanagers to exit very often, and there
	// are not that many of them.

	if (gman_pid_table) {
		gman_node_t* tmpnode;
		gman_pid_table->startIterations();
		while ( gman_pid_table->iterate(owner,tmpnode) ) {
			if (tmpnode->pid == pid ) {
				// found it!
				gman_node = tmpnode;
				break;
			}
		}
	}

	MyString owner_safe;
	MyString exit_reason;
	if(gman_node) { owner_safe = owner; }
	else { owner_safe = "Unknown"; }
	if ( WIFEXITED( exit_status ) ) {
		exit_reason.formatstr( "with return code %d",
							 WEXITSTATUS( exit_status ) );
	} else {
		exit_reason.formatstr( "due to %s",
							 daemonCore->GetExceptionString( exit_status ) );
	}
	dprintf(D_ALWAYS, "condor_gridmanager (PID %d, owner %s) exited %s.\n",
			pid, owner_safe.Value(), exit_reason.Value() );
	if(WIFEXITED(exit_status) && WEXITSTATUS(exit_status) == DPRINTF_ERROR) {
		const char *condorUserName = get_condor_username();

		dprintf(D_ALWAYS, 
			"The gridmanager had a problem writing its log. "
			"Check the permissions of the file specified by GRIDMANAGER_LOG; "
			"it needs to be writable by Condor.\n");

			/* send email to the admin about this, but only
			 * every six hours - enough to not be ignored, but
			 * not enough to be a pest.  If only my children were
			 * so helpful and polite.  Ah, well, we can always dream...
			 */
		static time_t last_email_re_gridmanlog = 0;
		if ( time(NULL) - last_email_re_gridmanlog > 6 * 60 * 60 ) {
			last_email_re_gridmanlog = time(NULL);
			FILE *email = email_admin_open("Unable to launch grid universe jobs.");
			if ( email ) {
				fprintf(email,
					"The condor_gridmanager had an error writing its log file.  Check the  \n"
					"permissions/ownership of the file specified by the GRIDMANAGER_LOG setting in \n"
					"the condor_config file.  This file needs to be writable as user %s to enable\n"
					"the condor_gridmanager daemon to write to it. \n\n"
					"Until this problem is fixed, grid universe jobs submitted from this machine cannot "
					"be launched.\n", condorUserName ? condorUserName : "******" );
				email_close(email);
			} else {
					// Error sending an email message
				dprintf(D_ALWAYS,"ERROR: Cannot send email to the admin\n");
			}
		}	
	}	// end if(WIFEXITED(exit_status) && WEXITSTATUS(exit_status) == DPRINTF_ERROR)

	if (!gman_node) {
		// nothing more to do, so return
		return 0;
	}

	// Cancel any timers before removing the node!!
	if (gman_node->add_timer_id != -1) {
		daemonCore->Cancel_Timer(gman_node->add_timer_id);
	}
	if (gman_node->remove_timer_id != -1) {
		daemonCore->Cancel_Timer(gman_node->remove_timer_id);
	}
	// Remove node from our hash table
	gman_pid_table->remove(owner);
	// Remove any scratch directory used by this gridmanager
	char *scratchdir = scratchFilePath(gman_node);
	ASSERT(scratchdir);
	if ( IsDirectory(scratchdir) && 
		 init_user_ids(gman_node->owner, gman_node->domain) ) 
	{
		priv_state saved_priv = set_user_priv();
			// Must put this in braces so the Directory object
			// destructor is called, which will free the iterator
			// handle.  If we didn't do this, the below rmdir 
			// would fail.
		{
			Directory tmp( scratchdir );
			tmp.Remove_Entire_Directory();
		}
		if ( rmdir(scratchdir) == 0 ) {
			dprintf(D_FULLDEBUG,"Removed scratch dir %s\n",scratchdir);
		} else {
			dprintf(D_FULLDEBUG,"Failed to remove scratch dir %s\n",
					scratchdir);
		}
		set_priv(saved_priv);
		uninit_user_ids();
	}
	delete [] scratchdir;

	// Reclaim memory from the node itself
	delete gman_node;

	return 0;
}
Beispiel #7
0
int
JobRouterHookMgr::hookUpdateJobInfo(RoutedJob* r_job)
{
	ClassAd temp_ad;
	char* hook_update_job_info = getHookPath(HOOK_UPDATE_JOB_INFO, r_job->src_ad);

	if (NULL == hook_update_job_info)
	{
		// hook not defined
		dprintf(D_FULLDEBUG, "HOOK_UPDATE_JOB_INFO not configured.\n");
		return 0;
	}

	// Verify the status hook hasn't already been spawned and that
	// we're not waiting for it to return.
	std::string key = r_job->dest_key;
	if (true == JobRouterHookMgr::checkHookKnown(key.c_str(), HOOK_UPDATE_JOB_INFO))
	{
		dprintf(D_FULLDEBUG, "JobRouterHookMgr::hookUpdateJobInfo "
			"retried while still waiting for status hook to return "
			"for job with key %s - ignoring\n", key.c_str());
		return 1;
	}


	temp_ad = r_job->dest_ad;

	MyString hook_stdin;
	sPrintAd(hook_stdin, temp_ad);

	StatusClient* status_client = new StatusClient(hook_update_job_info, r_job);
	if (NULL == status_client)
	{
		dprintf(D_ALWAYS|D_FAILURE, 
			"ERROR in JobRouterHookMgr::hookUpdateJobInfo: "
			"failed to create status update client\n");
		return -1;
	}

	set_user_priv_from_ad(r_job->src_ad);
	if (0 == spawn(status_client, NULL, &hook_stdin, PRIV_USER_FINAL))
	{
		dprintf(D_ALWAYS|D_FAILURE,
				"ERROR in JobRouterHookMgr::hookUpdateJobInfo: "
				"failed to spawn HOOK_UPDATE_JOB_INFO (%s)\n", hook_update_job_info);
		delete status_client;
		return -1;

	}
	uninit_user_ids();

	// Add our info to the list of hooks currently running for this job.
	if (false == JobRouterHookMgr::addKnownHook(key.c_str(), HOOK_UPDATE_JOB_INFO))
	{
		dprintf(D_ALWAYS, "ERROR in JobRouterHookMgr::hookUpdateJobInfo: "
				"failed to add HOOK_UPDATE_JOB_INFO to list of "
				"hooks running for job key %s\n", key.c_str());
	}

	dprintf(D_FULLDEBUG, "HOOK_UPDATE_JOB_INFO (%s) invoked.\n",
			hook_update_job_info);
	return 1;
}