Example #1
0
void
BaseShadow::reconnectFailed( const char* reason )
{
		// try one last time to release the claim, write a UserLog event
		// about it, and exit with a special status. 
	dprintf( D_ALWAYS, "Reconnect FAILED: %s\n", reason );
	
	logReconnectFailedEvent( reason );

		// if the shadow was born disconnected, exit with 
		// JOB_RECONNECT_FAILED so the schedd can make 
		// an accurate restart report.  otherwise just
		// exist with JOB_SHOULD_REQUEUE.
	if ( attemptingReconnectAtStartup ) {
		dprintf(D_ALWAYS,"Exiting with JOB_RECONNECT_FAILED\n");
		// does not return
		DC_Exit( JOB_RECONNECT_FAILED );
	} else {
		dprintf(D_ALWAYS,"Exiting with JOB_SHOULD_REQUEUE\n");
		// does not return
		DC_Exit( JOB_SHOULD_REQUEUE );
	}

	// Should never get here....
	ASSERT(true);
}
Example #2
0
void main_shutdown_rescue( int exitVal, Dag::dag_status dagStatus ) {
		// Avoid possible infinite recursion if you hit a fatal error
		// while writing a rescue DAG.
	static bool inShutdownRescue = false;
	if ( inShutdownRescue ) {
		return;
	}
	inShutdownRescue = true;

	dagman.dag->_dagStatus = dagStatus;
	debug_printf( DEBUG_QUIET, "Aborting DAG...\n" );
		// Avoid writing two different rescue DAGs if the "main" DAG and
		// the final node (if any) both fail.
	static bool wroteRescue = false;
	if( dagman.dag ) {
			// We write the rescue DAG *before* removing jobs because
			// otherwise if we crashed, failed, or were killed while
			// removing them, we would leave the DAG in an
			// unrecoverable state...
		if( exitVal != 0 ) {
			if ( dagman.maxRescueDagNum > 0 ) {
				dagman.dag->Rescue( dagman.primaryDagFile.Value(),
							dagman.multiDags, dagman.maxRescueDagNum,
							wroteRescue, false,
							dagman._writePartialRescueDag );
				wroteRescue = true;
			} else {
				debug_printf( DEBUG_QUIET, "No rescue DAG written because "
							"DAGMAN_MAX_RESCUE_NUM is 0\n" );
			}
		}

		debug_printf( DEBUG_DEBUG_1, "We have %d running jobs to remove\n",
					dagman.dag->NumJobsSubmitted() );
		if( dagman.dag->NumJobsSubmitted() > 0 ) {
			debug_printf( DEBUG_NORMAL, "Removing submitted jobs...\n" );
			dagman.dag->RemoveRunningJobs(dagman);
		}
		if ( dagman.dag->NumScriptsRunning() > 0 ) {
			debug_printf( DEBUG_NORMAL, "Removing running scripts...\n" );
			dagman.dag->RemoveRunningScripts();
		}
		dagman.dag->PrintDeferrals( DEBUG_NORMAL, true );

			// Start the final node if we have one.
		if ( dagman.dag->StartFinalNode() ) {
				// We started a final node; return here so we wait for the
				// final node to finish, instead of exiting immediately.
			inShutdownRescue = false;
			return;
		}
		dagman.dag->DumpNodeStatus( false, true );
		dagman.dag->GetJobstateLog().WriteDagmanFinished( exitVal );
	}
	MSC_SUPPRESS_WARNING_FIXME(6031) // return falue of unlink ignored.
	unlink( lockFileName ); 
	dagman.CleanUp();
	inShutdownRescue = false;
	DC_Exit( exitVal );
}
Example #3
0
// this can be called by other functions, or by DC when the schedd is
// shutdown gracefully
void main_shutdown_graceful() {
	print_status();
	dagman.dag->DumpNodeStatus( true, false );
	dagman.dag->GetJobstateLog().WriteDagmanFinished( EXIT_RESTART );
	dagman.CleanUp();
	DC_Exit( EXIT_RESTART );
}
int
request_pipe_handler(Service*, int) {

	std::string* next_line;
	while ((next_line = request_buffer.GetNextLine()) != NULL) {

		dprintf (D_FULLDEBUG, "got work request: %s\n", next_line->c_str());

		Gahp_Args args;

			// Parse the command...
		if (!(parse_gahp_command (next_line->c_str(), &args) &&
			  handle_gahp_command (args.argv, args.argc))) {
			dprintf (D_ALWAYS, "ERROR processing %s\n", next_line->c_str());
		}

			// Clean up...
		delete  next_line;
	}

	// check for an error in GetNextLine
	if (request_buffer.IsError() || request_buffer.IsEOF()) {
		dprintf (D_ALWAYS, "Request pipe closed. Exiting...\n");
		DC_Exit (1);
	}

	return TRUE;
}
Example #5
0
int
VMGahp::quitFast()
{
	cleanUp();
	DC_Exit(0);
	return TRUE;
}
Example #6
0
int
VMGahp::waitForCommand(int   /*pipe_end*/)
{
	MyString *line = NULL;

	while((line = m_request_buffer.GetNextLine()) != NULL) {

		const char *command = line->Value();

		Gahp_Args args;
		VMRequest *new_req = NULL;

		if( m_inClassAd )  {
			if( strcasecmp(command, VMGAHP_COMMAND_CLASSAD_END) == 0 ) {
				m_inClassAd = false;

				// Everything is Ok. Now we got vmClassAd
				returnOutputSuccess();
			}else {
				if( !m_jobAd->Insert(command) ) {
					vmprintf(D_ALWAYS, "Failed to insert \"%s\" into classAd, "
							"ignoring this attribute\n", command);
				}
			}
		}else {
			if(parse_vmgahp_command(command, args) &&
					verifyCommand(args.argv, args.argc)) {
				new_req = preExecuteCommand(command, &args);

				if( new_req != NULL ) {
					// Execute the new request
					executeCommand(new_req);
					if(new_req->m_has_result) {
						movePendingReqToResultList(new_req);
						if (m_async_mode) {
							if (!m_new_results_signaled) {
								write_to_daemoncore_pipe("R\n");
							}
							// So that we only do it once
							m_new_results_signaled = true;
						}
					}
				}
			}else {
				returnOutputError();
			}
		}

		delete line;
		line = NULL;
	}

	// check if GetNextLine() returned NULL because of an error or EOF
	if(m_request_buffer.IsError() || m_request_buffer.IsEOF()) {
		vmprintf(D_ALWAYS, "Request buffer closed, exiting\n");
		cleanUp();
		DC_Exit(0);
	}
	return true;
}
Example #7
0
void
VMGahp::executeQuit(void)
{
	m_need_output_for_quit = true;
	cleanUp();
	DC_Exit(0);
}
Example #8
0
void
BaseShadow::holdJob( const char* reason, int hold_reason_code, int hold_reason_subcode )
{
	dprintf( D_ALWAYS, "Job %d.%d going into Hold state (code %d,%d): %s\n", 
			 getCluster(), getProc(), hold_reason_code, hold_reason_subcode,reason );

	if( ! jobAd ) {
		dprintf( D_ALWAYS, "In HoldJob() w/ NULL JobAd!" );
		DC_Exit( JOB_SHOULD_HOLD );
	}

		// cleanup this shadow (kill starters, etc)
	cleanUp( jobWantsGracefulRemoval() );

		// Put the reason in our job ad.
	jobAd->Assign( ATTR_HOLD_REASON, reason );
	jobAd->Assign( ATTR_HOLD_REASON_CODE, hold_reason_code );
	jobAd->Assign( ATTR_HOLD_REASON_SUBCODE, hold_reason_subcode );

		// try to send email (if the user wants it)
	emailHoldEvent( reason );

		// update the job queue for the attributes we care about
	if( !updateJobInQueue(U_HOLD) ) {
			// trouble!  TODO: should we do anything else?
		dprintf( D_ALWAYS, "Failed to update job queue!\n" );
	}

}
Example #9
0
//---------------------------------------------------------------------------
static void Usage() {
    debug_printf( DEBUG_SILENT, "\nUsage: condor_dagman -f -t -l .\n"
            "\t\t-Lockfile <NAME.dag.lock>\n"
            "\t\t-Dag <NAME.dag>\n"
            "\t\t-CsdVersion <version string>\n"
            "\t\t[-Debug <level>]\n"
            "\t\t[-MaxIdle <int N>]\n"
            "\t\t[-MaxJobs <int N>]\n"
            "\t\t[-MaxPre <int N>]\n"
            "\t\t[-MaxPost <int N>]\n"
            "\t\t[-DontAlwaysRunPost]\n"
            "\t\t[-WaitForDebug]\n"
            "\t\t[-NoEventChecks]\n"
            "\t\t[-AllowLogError]\n"
            "\t\t[-UseDagDir]\n"
            "\t\t[-AutoRescue <0|1>]\n"
            "\t\t[-DoRescueFrom <int N>]\n"
            "\t\t[-Priority <int N>]\n"
			"\t\t[-AllowVersionMismatch]\n"
			"\t\t[-DumpRescue]\n"
			"\t\t[-Verbose]\n"
			"\t\t[-Force]\n"
			"\t\t[-Notification <never|always|complete|error>]\n"
			"\t\t[-Dagman <dagman_executable>]\n"
			"\t\t[-Outfile_dir <directory>]\n"
			"\t\t[-Update_submit]\n"
			"\t\t[-Import_env]\n"
            "\twhere NAME is the name of your DAG.\n"
            "\tdefault -Debug is -Debug %d\n", DEBUG_NORMAL);
	DC_Exit( EXIT_ERROR );
}
Example #10
0
void
BaseShadow::shutDown( int reason ) 
{
		// exit now if there is no job ad
	if ( !getJobAd() ) {
		DC_Exit( reason );
	}
	
		// if we are being called from the exception handler, return
		// now to prevent infinite loop in case we call EXCEPT below.
	if ( reason == JOB_EXCEPTION ) {
		return;
	}

		// Only if the job is trying to leave the queue should we
		// evaluate the user job policy...
	if( reason == JOB_EXITED || reason == JOB_COREDUMPED ) {
		if( !waitingToUpdateSchedd() ) {
			shadow_user_policy.checkAtExit();
				// WARNING: 'this' may have been deleted by the time we get here!!!
		}
	}
	else {
		// if we aren't trying to evaluate the user's policy, we just
		// want to evict this job.
		evictJob( reason );
	}
}
Example #11
0
void BaseShadow::removeJob( const char* reason )
{
	this->removeJobPre(reason);
	
	// does not return.
	DC_Exit( JOB_SHOULD_REMOVE );
}
Example #12
0
int
master_exit(int retval)
{
	cleanup_memory();

#ifdef WIN32
	if ( NT_ServiceFlag == TRUE ) {
		terminate(retval);
	}
#endif

#if defined(WANT_CONTRIB) && defined(WITH_MANAGEMENT)
#if defined(HAVE_DLOPEN) || defined(WIN32)
	MasterPluginManager::Shutdown();
#endif
#endif

		// If we're positive that we are going to shut down,
		// we should clean out the shared port directory if
		// we created it.
	std::string dirname;
	if ( SharedPortEndpoint::CreatedSharedPortDirectory() &&
		 SharedPortEndpoint::GetDaemonSocketDir(dirname) ) {

		TemporaryPrivSentry tps(PRIV_CONDOR);
		Directory d(dirname.c_str());
		d.Remove_Entire_Directory();
		if (-1 == rmdir(dirname.c_str())) {
			dprintf(D_ALWAYS, "ERROR: failed to remove shared port temporary directory: %s (errno=%d).\n", strerror(errno), errno);
		}
	}

	DC_Exit(retval, shutdown_program );
	return 1;	// just to satisfy vc++
}
Example #13
0
void
main_shutdown_graceful( )
{
    dprintf( D_ALWAYS, "main_shutdown_graceful started\n" );

    delete stateMachine;
    DC_Exit( 0 );
}
Example #14
0
void
BaseShadow::holdJobAndExit( const char* reason, int hold_reason_code, int hold_reason_subcode )
{
	holdJob(reason,hold_reason_code,hold_reason_subcode);

	// finally, exit and tell the schedd what to do
	DC_Exit( JOB_SHOULD_HOLD );
}
Example #15
0
void 
main_shutdown_graceful()
{
#ifndef WIN32
    delete xinter;
#endif
    DC_Exit(EXIT_SUCCESS);
}
Example #16
0
void
usage()
{
	dprintf( D_ALWAYS,
		"Usage: condor_ft-gahp\n"
			 );
	DC_Exit( 1 );
}
Example #17
0
void main_init(int  argc , char *  argv  [])
{
	char *testfile = NULL;
	ClassAd *inputAd = NULL;
	int i;

	dprintf(D_ALWAYS, "main_init() called\n");

	for (i=1; i<argc; i++ ) {
	
		if (match_prefix(argv[i],"-withfile")) {
			i++;
			if (argc <= i) {
				fprintf(stderr,
						"ERROR: Argument -withfile requires a parameter\n ");
				exit(1);
			}
			testfile = argv[i];
		}
	
	}	// end of parsing command line options

	if ( testfile ) {
		FILE* fp = safe_fopen_wrapper(testfile,"r");
		if (!fp) {
			fprintf(stderr,"ERROR: Unable to open test file %s\n",
					testfile);
			DC_Exit(1);
		}
		int EndFlag=0, ErrorFlag=0, EmptyFlag=0;
        if( !( inputAd=new ClassAd(fp,"***", EndFlag, ErrorFlag, EmptyFlag) ) ){
            fprintf( stderr, "ERROR:  Out of memory\n" );
            DC_Exit( 1 );
        }
		fclose(fp);
		if ( ErrorFlag || EmptyFlag ) {
			fprintf( stderr, "ERROR - file %s does not contain a parseable ClassAd\n",
					 testfile);
			DC_Exit(1);
		}
		// since this option is for testing, process then exit
		ClassAd * resultAd =  process_request(inputAd);
		dPrintAd(D_ALWAYS, *resultAd);
		DC_Exit( 0 );
	}
}
Example #18
0
void ExitSuccess() {
	print_status();
	dagman.dag->DumpNodeStatus( false, false );
	dagman.dag->GetJobstateLog().WriteDagmanFinished( EXIT_OKAY );
	tolerant_unlink( lockFileName ); 
	dagman.CleanUp();
	DC_Exit( EXIT_OKAY );
}
Example #19
0
void ExitSuccess() {
	dagman.dag->DumpNodeStatus( false, false );
	dagman.dag->GetJobstateLog().WriteDagmanFinished( EXIT_OKAY );
	MSC_SUPPRESS_WARNING_FIXME(6031) // return falue of unlink ignored.
	unlink( lockFileName ); 
	dagman.CleanUp();
	DC_Exit( EXIT_OKAY );
}
Example #20
0
void
usage( char *name )
{
	dprintf( D_ALWAYS, 
		"Usage: %s [-f] [-b] [-t] [-p <port>] [-s <schedd addr>] [-o <owern@uid-domain>] [-C <job constraint>] [-S <scratch dir>] [-A <aux id>]\n",
		condor_basename( name ) );
	DC_Exit( 1 );
}
void
main_shutdown_fast()
{
#ifndef WIN32
	if (io_loop_pid != -1)
		kill(io_loop_pid, SIGKILL);
#endif
	DC_Exit(0);
}
void
main_shutdown_graceful()
{
#ifndef WIN32
	if (io_loop_pid != -1)
		kill(io_loop_pid, SIGTERM);
#endif
	DC_Exit(0);
}
Example #23
0
void
usage( char* MyName)
{
	fprintf( stderr, "Usage: %s [option]\n", MyName );
	fprintf( stderr, "  where [option] is one of:\n" );
	fprintf( stderr, 
			 "     [-skip-benchmarks]\t(now a no-op)\n" );
	DC_Exit( 1 );
}
Example #24
0
void
BaseShadow::holdJobAndExit( const char* reason, int hold_reason_code, int hold_reason_subcode )
{
	m_force_fast_starter_shutdown = true;
	holdJob(reason,hold_reason_code,hold_reason_subcode);

	// finally, exit and tell the schedd what to do
	DC_Exit( JOB_SHOULD_HOLD );
}
Example #25
0
void
BaseShadow::reconnectFailed( const char* reason )
{
		// try one last time to release the claim, write a UserLog event
		// about it, and exit with a special status. 
	dprintf( D_ALWAYS, "Reconnect FAILED: %s\n", reason );
	
	logReconnectFailedEvent( reason );

		// does not return
	DC_Exit( JOB_SHOULD_REQUEUE );
}
Example #26
0
void usage(void)
{
	dprintf(D_ALWAYS, 
		"Usage info:\n"
		"--schedd <sinful>: Address of the schedd the transferd will contact\n"
		"--stdin:           Accept a transfer request on stdin\n"
		"--id <ascii>:      Used by the schedd to pair transferds to requests\n"
		"--shadow <upload|download>:\n"
		"                   Used with --stdin, transferd connects to shadow.\n"
		"                   This is demo mode with the starter.\n");

	DC_Exit(0);
}
Example #27
0
void
BaseShadow::evictJob( int reason )
{
	MyString from_where;
	MyString machine;
	if( getMachineName(machine) ) {
		from_where.formatstr(" from %s",machine.Value());
	}
	dprintf( D_ALWAYS, "Job %d.%d is being evicted%s\n",
			 getCluster(), getProc(), from_where.Value() );

	if( ! jobAd ) {
		dprintf( D_ALWAYS, "In evictJob() w/ NULL JobAd!" );
		DC_Exit( reason );
	}

		// cleanup this shadow (kill starters, etc)
	cleanUp( jobWantsGracefulRemoval() );

		// write stuff to user log:
	logEvictEvent( reason );

		// record the time we were vacated into the job ad 
	char buf[64];
	sprintf( buf, "%s = %d", ATTR_LAST_VACATE_TIME, (int)time(0) ); 
	jobAd->Insert( buf );

		// update the job ad in the queue with some important final
		// attributes so we know what happened to the job when using
		// condor_history...
	if( !updateJobInQueue(U_EVICT) ) {
			// trouble!  TODO: should we do anything else?
		dprintf( D_ALWAYS, "Failed to update job queue!\n" );
	}

		// does not return.
	DC_Exit( reason );
}
Example #28
0
static void PREFAST_NORETURN
usage()
{
	dprintf(D_ALWAYS, "argc = %d\n", my_argc);
	for( int i=0; i < my_argc; i++ ) {
		dprintf( D_ALWAYS, "argv[%d] = %s\n", i, my_argv[i] );
	}
	dprintf(D_ALWAYS, "usage: condor_starter initiating_host\n");
	dprintf(D_ALWAYS, "   or: condor_starter -job-keyword keyword\n");
	dprintf(D_ALWAYS, "                      -job-input-ad path\n");
	dprintf(D_ALWAYS, "                      -job-cluster number\n");
	dprintf(D_ALWAYS, "                      -job-proc    number\n");
	dprintf(D_ALWAYS, "                      -job-subproc number\n");
	DC_Exit(1);
}
Example #29
0
void
main_pre_dc_init( int argc, char* argv[] )
{
	// handle -o, so that we can switch euid to the user before
	// daemoncore does most of its initialization work.
	int i = 1;
	while ( i < argc ) {
		if ( !strcmp( argv[i], "-o" ) ) {
			// Say what user we're running jobs on behave of.
			// If the schedd starts us as root, we need to switch to
			// this uid for most of our life.
			if ( argc <= i + 1 ) {
				usage( argv[0] );
			}
			myUserName = strdup( argv[i + 1] );
			break;
		}
		i++;
	}

	if ( myUserName ) {
		char *owner = strdup( myUserName );
		char *domain = strchr( owner, '@' );
		if ( domain ) {
			*domain = '\0';
			domain = domain + 1;
		}
		if ( !init_user_ids(owner, domain)) {
			dprintf(D_ALWAYS, "init_user_ids() failed!\n");
			// uids.C will EXCEPT when we set_user_priv() now
			// so there's not much we can do at this point
		}
		set_user_priv();
		// We can't call daemonCore->Register_Priv_State() here because
		// there's no daemonCore object yet. We'll call it in main_init().

		free( myUserName );
		myUserName = owner;
	} else if ( is_root() ) {
		dprintf( D_ALWAYS, "Don't know what user to run as!\n" );
		DC_Exit( 1 );
	} else {
		myUserName = my_username();
	}
}
Example #30
0
void
BaseShadow::mockTerminateJob( MyString exit_reason, 
		bool exited_by_signal, int exit_code, int exit_signal, 
		bool core_dumped )
{
	if (exit_reason == "") {
		exit_reason = "Exited normally";
	}
	
	dprintf( D_ALWAYS, "Mock terminating job %d.%d: "
			"exited_by_signal=%s, exit_code=%d OR exit_signal=%d, "
			"core_dumped=%s, exit_reason=\"%s\"\n", 
			 getCluster(),
			 getProc(), 
			 exited_by_signal ? "TRUE" : "FALSE",
			 exit_code,
			 exit_signal,
			 core_dumped ? "TRUE" : "FALSE",
			 exit_reason.Value());

	if( ! jobAd ) {
		dprintf(D_ALWAYS, "BaseShadow::mockTerminateJob(): NULL JobAd! "
			"Holding Job!");
		DC_Exit( JOB_SHOULD_HOLD );
	}

	// Insert the various exit attributes into our job ad.
	jobAd->Assign( ATTR_JOB_CORE_DUMPED, core_dumped );
	jobAd->Assign( ATTR_ON_EXIT_BY_SIGNAL, exited_by_signal );

	if (exited_by_signal) {
		jobAd->Assign( ATTR_ON_EXIT_SIGNAL, exit_signal );
	} else {
		jobAd->Assign( ATTR_ON_EXIT_CODE, exit_code );
	}

	jobAd->Assign( ATTR_EXIT_REASON, exit_reason );

		// update the job queue for the attributes we care about
	if( !updateJobInQueue(U_TERMINATE) ) {
			// trouble!  TODO: should we do anything else?
		dprintf( D_ALWAYS, "Failed to update job queue!\n" );
	}
}