Ejemplo n.º 1
0
extern "C" void
log_old_starter_shadow_suspend_event_hack (char *s1, char *s2)
{
	const char *magic_suspend = "TISABH Starter: Suspended user job: ";
	const char *magic_unsuspend = "TISABH Starter: Unsuspended user job.";

	/* This should be bug enough to hold the two string params */
	char buffer[BUFSIZ * 2 + 2];

	int size_suspend, size_unsuspend;

	size_suspend = strlen(magic_suspend);
	size_unsuspend = strlen(magic_unsuspend);
	sprintf(buffer, "%s%s", s1, s2);

	/* depending on if it is a suspend or unsuspend event, do something
		about it. */

	if (strncmp(buffer, magic_suspend, size_suspend) == 0)
	{
		/* matched a suspend event */
		JobSuspendedEvent event;
		sscanf(buffer,"TISABH Starter: Suspended user job: %d",&event.num_pids);

		if (!ULog.writeEvent (&event))
		{
			dprintf (D_ALWAYS, "Unable to log ULOG_JOB_SUSPENDED event\n");
		}

		record_suspension_hack(ULOG_JOB_SUSPENDED);
		return;
	}

	if (strncmp(buffer, magic_unsuspend, size_unsuspend) == 0)
	{
		/* matched an unsuspend event */

		JobUnsuspendedEvent event;

		if (!ULog.writeEvent (&event))
		{
			dprintf (D_ALWAYS, "Unable to log ULOG_JOB_UNSUSPENDED event\n");
		}
		record_suspension_hack(ULOG_JOB_UNSUSPENDED);
		return;
	}

	/* otherwise, do nothing */
}
Ejemplo n.º 2
0
bool
WriteJobStatusKnownEventToUserLog( ClassAd *job_ad )
{
    int cluster, proc;
    WriteUserLog *ulog = InitializeUserLog( job_ad );
    if ( ulog == NULL ) {
        // User doesn't want a log
        return true;
    }

    job_ad->LookupInteger( ATTR_CLUSTER_ID, cluster );
    job_ad->LookupInteger( ATTR_PROC_ID, proc );

    dprintf( D_FULLDEBUG,
             "(%d.%d) Writing job status known record to user logfile\n",
             cluster, proc );

    JobStatusKnownEvent event;

    int rc = ulog->writeEvent( &event, job_ad );
    delete ulog;

    if ( !rc ) {
        dprintf( D_ALWAYS,
                 "(%d.%d) Unable to log ULOG_JOB_STATUS_KNOWN event\n",
                 cluster, proc );
        return false;
    }

    return true;
}
Ejemplo n.º 3
0
extern "C" void
log_except (const char *msg)
{
	check_execute_event();

	// log shadow exception event
	ShadowExceptionEvent event;
	if(!msg) msg = "";
	snprintf(event.message, sizeof(event.message), "%s", msg);
	event.message[sizeof(event.message)-1] = '\0';

	// we want to log the events from the perspective of the
	// user job, so if the shadow *sent* the bytes, then that
	// means the user job *received* the bytes

	event.recvd_bytes = BytesSent;
	event.sent_bytes = BytesRecvd;
	if (syscall_sock) {
		event.recvd_bytes += syscall_sock->get_bytes_sent();
		event.sent_bytes += syscall_sock->get_bytes_recvd();
	}

	if (!ULog.writeEvent (&event))
	{
		dprintf (D_ALWAYS, "Unable to log ULOG_SHADOW_EXCEPTION event\n");
	}
}
Ejemplo n.º 4
0
bool
WriteHoldEventToUserLog( ClassAd *job_ad )
{
    int cluster, proc;

    WriteUserLog *ulog = InitializeUserLog( job_ad );
    if ( ulog == NULL ) {
        // User doesn't want a log
        return true;
    }

    job_ad->LookupInteger( ATTR_CLUSTER_ID, cluster );
    job_ad->LookupInteger( ATTR_PROC_ID, proc );

    dprintf( D_FULLDEBUG,
             "(%d.%d) Writing hold record to user logfile\n",
             cluster, proc );

    JobHeldEvent event;

    event.initFromClassAd(job_ad);

    int rc = ulog->writeEvent(&event,job_ad);
    delete ulog;

    if (!rc) {
        dprintf( D_ALWAYS,
                 "(%d.%d) Unable to log ULOG_JOB_HELD event\n",
                 cluster, proc );
        return false;
    }

    return true;
}
Ejemplo n.º 5
0
int writeCheckpointedEvent()
{
	CheckpointedEvent checkpoint;
	if ( !logFile.writeEvent(&checkpoint) ) {
		printf("Complain about bad checkpoint write\n");
		exit(1);
	}
	return(0);
}
Ejemplo n.º 6
0
int writeJobAbortedEvent()
{
	JobAbortedEvent jobabort;
	jobabort.setReason("cause I said so!");
	if ( !logFile.writeEvent(&jobabort) ) {
	        printf("Complain about bad jobabort write\n");
			exit(1);
	}
	return(0);
}
Ejemplo n.º 7
0
int writeJobReleasedEvent() 
{
	JobReleasedEvent jobreleasedevent;
	jobreleasedevent.setReason("MessinWithYou");
	if ( !logFile.writeEvent(&jobreleasedevent) ) {
		printf("Complain about bad jobreleasedevent write\n");
		exit(1);
	}
	return(0);
}
Ejemplo n.º 8
0
int writeExecuteEvent()
{
	ExecuteEvent execute;
	execute.setExecuteHost("<128.105.165.12:32779>");
	if ( !logFile.writeEvent(&execute) ) {
		printf("Complain about bad execute write\n");
		exit(1);
	}
	return(0);
}
Ejemplo n.º 9
0
int writeExecutableErrorEvent()
{
	ExecutableErrorEvent executeerror;
	executeerror.errType = CONDOR_EVENT_BAD_LINK;
	if ( !logFile.writeEvent(&executeerror) ) {
		printf("Complain about bad executeerror write\n");
		exit(1);
	}
	return(0);
}
Ejemplo n.º 10
0
int writeJobUnsuspendedEvent()
{
	JobUnsuspendedEvent jobunsuspendevent;
	//jobunsuspendevent.num_pids = 99;
	if ( !logFile.writeEvent(&jobunsuspendevent) ) {
		printf("Complain about bad jobunsuspendevent write\n");
		exit(1);
	}
	return(0);
}
Ejemplo n.º 11
0
int writeJobImageSizeEvent()
{
	JobImageSizeEvent jobimagesizeevent;
	jobimagesizeevent.image_size_kb = 128;
	if ( !logFile.writeEvent(&jobimagesizeevent) ) {
		printf("Complain about bad jobimagesizeevent write\n");
		exit(1);
	}
	return(0);
}
Ejemplo n.º 12
0
int writeGlobusResourceDownEvent()
{
	GlobusResourceDownEvent globusresourcedownevent;
	globusresourcedownevent.rmContact = strdup("ResourceDown");;
	if ( !logFile.writeEvent(&globusresourcedownevent) ) {
	        printf("Complain about bad globusresourcedownevent write\n");
			exit(1);
	}
	return(0);
}
Ejemplo n.º 13
0
int writeGlobusSubmitFailedEvent()
{
	GlobusSubmitFailedEvent globussubmitfailedevent;
	globussubmitfailedevent.reason = strdup("Cause it could");;
	if ( !logFile.writeEvent(&globussubmitfailedevent) ) {
	        printf("Complain about bad globussubmitfailedevent write\n");
			exit(1);
	}
	return(0);
}
Ejemplo n.º 14
0
int writeNodeExecuteEvent()
{
	NodeExecuteEvent nodeexecuteevent;
	nodeexecuteevent.node = 49;
	nodeexecuteevent.setExecuteHost("<128.105.165.12:32779>");
	if ( !logFile.writeEvent(&nodeexecuteevent) ) {
		printf("Complain about bad nodeexecuteevent write\n");
		exit(1);
	}
	return(0);
}
Ejemplo n.º 15
0
int writeJobEvictedEvent()
{
	JobEvictedEvent jobevicted;
	jobevicted.setReason("It misbehaved!");
	jobevicted.setCoreFile("corefile");
	if ( !logFile.writeEvent(&jobevicted) ) {
	        printf("Complain about bad jobevicted write\n");
			exit(1);
	}
	return(0);
}
Ejemplo n.º 16
0
int writeSubmitEvent()
{
	SubmitEvent submit;
	submit.setSubmitHost("<128.105.165.12:32779>");
	submit.submitEventLogNotes = strdup("DAGMan info");
	submit.submitEventUserNotes = strdup("User info");
	if ( !logFile.writeEvent(&submit) ) {
		printf("Complain about bad submit write\n");
		exit(1);
	}
	return(0);
}
Ejemplo n.º 17
0
int writeJobHeldEvent() 
{
	JobHeldEvent jobheldevent;
	jobheldevent.setReason("CauseWeCan");
	jobheldevent.setReasonCode(404);
	jobheldevent.setReasonSubCode(0xff);
	if ( !logFile.writeEvent(&jobheldevent) ) {
		printf("Complain about bad jobheldevent write\n");
		exit(1);
	}
	return(0);
}
Ejemplo n.º 18
0
int writePostScriptTerminatedEvent()
{
	PostScriptTerminatedEvent postscriptterminated;
	postscriptterminated.normal = false;
	postscriptterminated.signalNumber = 9;
	postscriptterminated.returnValue = 4;
	if ( !logFile.writeEvent(&postscriptterminated) ) {
	        printf("Complain about bad postscriptterminated write\n");
			exit(1);
	}
	return(0);
}
Ejemplo n.º 19
0
int writeGlobusSubmitEvent()
{
	GlobusSubmitEvent globussubmitevent;
	globussubmitevent.rmContact = strdup("ResourceManager");;
	globussubmitevent.jmContact = strdup("JobManager");;
	globussubmitevent.restartableJM = true;
	if ( !logFile.writeEvent(&globussubmitevent) ) {
	        printf("Complain about bad globussubmitevent write\n");
			exit(1);
	}
	return(0);
}
Ejemplo n.º 20
0
int writeRemoteErrorEvent()
{
	RemoteErrorEvent remoteerror;
	remoteerror.setExecuteHost("<128.105.165.12:32779>");
	remoteerror.setDaemonName("<write job log test>");
	remoteerror.setErrorText("this is the write test error string");
	remoteerror.setCriticalError(true);
	if ( !logFile.writeEvent(&remoteerror) ) {
	        printf("Complain about bad remoteerror write\n");
			exit(1);
	}
	return(0);
}
Ejemplo n.º 21
0
extern "C" void 
log_image_size (int size)
{
	check_execute_event();

	// log the event
	JobImageSizeEvent event;
	event.image_size_kb = size;
	if (!ULog.writeEvent (&event))
	{
		dprintf (D_ALWAYS, "Unable to log ULOG_IMAGE_SIZE event\n");
	}
}
Ejemplo n.º 22
0
int writeShadowExceptionEvent() 
{
	ShadowExceptionEvent shadowexceptionevent;
	shadowexceptionevent.sent_bytes = 4096;
	shadowexceptionevent.recvd_bytes = 4096;
	shadowexceptionevent.message[0] = '\0';
	strncat(shadowexceptionevent.message,"shadow message", 15);
	if ( !logFile.writeEvent(&shadowexceptionevent) ) {
		printf("Complain about bad shadowexceptionevent write\n");
		exit(1);
	}
	return(0);
}
Ejemplo n.º 23
0
extern "C" void 
log_checkpoint (struct rusage *localr, struct rusage *remoter)
{
	check_execute_event();

	CheckpointedEvent event;
	event.run_local_rusage = *localr;
	event.run_remote_rusage = *remoter;
	if (!ULog.writeEvent (&event))
	{	
		dprintf (D_ALWAYS, "Could not log ULOG_CHECKPOINTED event\n");
	}
}
Ejemplo n.º 24
0
extern "C" void
log_execute (char *host)
{
	if( WroteExecuteEvent ) {
		return;
	}
	// log execute event
	ExecuteEvent event;
	event.setExecuteHost(host);
	if( !ULog.writeEvent(&event) ) {
		dprintf (D_ALWAYS, "Unable to log ULOG_EXECUTE event\n");
	} else {
		WroteExecuteEvent = 1;
	}
}
Ejemplo n.º 25
0
bool writePreSkipEvent( CondorID& condorID, Job* job, const char* DAGNodeName, 
			   const char* directory, const char *logFile )
{
	TmpDir tmpDir;
	MyString	errMsg;
	if ( !tmpDir.Cd2TmpDir( directory, errMsg ) ) {
		debug_printf( DEBUG_QUIET,
				"Could not change to node directory %s: %s\n",
				directory, errMsg.Value() );
		return false;
	}

		// Special HTCondorID for NOOP jobs -- actually indexed by
		// otherwise-unused subprocID.
	condorID._cluster = 0;
	condorID._proc = Job::NOOP_NODE_PROCID;

	condorID._subproc = 1+get_fake_condorID();
		// Increment this value
	set_fake_condorID(condorID._subproc);

	if( job ) {
		job->SetCondorID( condorID );
	}

	WriteUserLog ulog;
	ulog.setEnableGlobalLog( false );
	ulog.setUseXML( false );
	ulog.initialize( std::vector<const char*>(1,logFile), condorID._cluster,
		condorID._proc, condorID._subproc, NULL );

	PreSkipEvent pEvent;
	pEvent.cluster = condorID._cluster;
	pEvent.proc = condorID._proc;
	pEvent.subproc = condorID._subproc;

	MyString pEventNotes("DAG Node: " );
	pEventNotes += DAGNodeName;
		// skipEventLogNotes gets deleted in PreSkipEvent destructor.
	pEvent.skipEventLogNotes = strnewp( pEventNotes.Value() );

	if ( !ulog.writeEvent( &pEvent ) ) {
		EXCEPT( "Error: writing PRESKIP event failed!" );
		return false;
	}
	return true;
}
Ejemplo n.º 26
0
// The GlobusSubmitEvent is now deprecated and should be removed at
// some point in the future (6.9?).
bool
WriteGlobusSubmitEventToUserLog( ClassAd *job_ad )
{
    int cluster, proc;
    std::string contact;
    WriteUserLog *ulog = InitializeUserLog( job_ad );
    if ( ulog == NULL ) {
        // User doesn't want a log
        return true;
    }

    job_ad->LookupInteger( ATTR_CLUSTER_ID, cluster );
    job_ad->LookupInteger( ATTR_PROC_ID, proc );

    dprintf( D_FULLDEBUG,
             "(%d.%d) Writing globus submit record to user logfile\n",
             cluster, proc );

    GlobusSubmitEvent event;

    job_ad->LookupString( ATTR_GRID_RESOURCE, contact );
    Tokenize( contact );
    GetNextToken( " ", false );
    event.rmContact = strnewp(GetNextToken( " ", false ));

    job_ad->LookupString( ATTR_GRID_JOB_ID, contact );
    Tokenize( contact );
    if ( strcasecmp( GetNextToken( " ", false ), "gt2" ) == 0 ) {
        GetNextToken( " ", false );
    }
    event.jmContact = strnewp(GetNextToken( " ", false ));

    event.restartableJM = true;

    int rc = ulog->writeEvent(&event,job_ad);
    delete ulog;

    if (!rc) {
        dprintf( D_ALWAYS,
                 "(%d.%d) Unable to log ULOG_GLOBUS_SUBMIT event\n",
                 cluster, proc );
        return false;
    }

    return true;
}
Ejemplo n.º 27
0
// The GlobusResourceDownEvent is now deprecated and should be removed at
// some point in the future (6.9?).
bool
WriteGlobusResourceDownEventToUserLog( ClassAd *job_ad )
{
    int cluster, proc;
    std::string contact;
    WriteUserLog *ulog = InitializeUserLog( job_ad );
    if ( ulog == NULL ) {
        // User doesn't want a log
        return true;
    }

    job_ad->LookupInteger( ATTR_CLUSTER_ID, cluster );
    job_ad->LookupInteger( ATTR_PROC_ID, proc );

    dprintf( D_FULLDEBUG,
             "(%d.%d) Writing globus down record to user logfile\n",
             cluster, proc );

    GlobusResourceDownEvent event;

    job_ad->LookupString( ATTR_GRID_RESOURCE, contact );
    if ( contact.empty() ) {
        // Not a Globus job, don't log the event
        delete ulog;
        return true;
    }
    Tokenize( contact );
    GetNextToken( " ", false );
    event.rmContact =  strnewp(GetNextToken( " ", false ));

    int rc = ulog->writeEvent(&event,job_ad);
    delete ulog;

    if (!rc) {
        dprintf( D_ALWAYS,
                 "(%d.%d) Unable to log ULOG_GLOBUS_RESOURCE_DOWN event\n",
                 cluster, proc );
        return false;
    }

    return true;
}
Ejemplo n.º 28
0
bool
WriteEvictEventToUserLog( ClassAd *job_ad )
{
    int cluster, proc;
    WriteUserLog *ulog = InitializeUserLog( job_ad );
    if ( ulog == NULL ) {
        // User doesn't want a log
        return true;
    }

    job_ad->LookupInteger( ATTR_CLUSTER_ID, cluster );
    job_ad->LookupInteger( ATTR_PROC_ID, proc );

    dprintf( D_FULLDEBUG,
             "(%d.%d) Writing evict record to user logfile\n",
             cluster, proc );

    JobEvictedEvent event;
    struct rusage r;
    memset( &r, 0, sizeof( struct rusage ) );

#if !defined(WIN32)
    event.run_local_rusage = r;
    event.run_remote_rusage = r;
#endif /* WIN32 */
    event.sent_bytes = 0;
    event.recvd_bytes = 0;

    event.checkpointed = false;

    int rc = ulog->writeEvent(&event,job_ad);
    delete ulog;

    if (!rc) {
        dprintf( D_ALWAYS,
                 "(%d.%d) Unable to log ULOG_JOB_EVICTED event\n",
                 cluster, proc );
        return false;
    }

    return true;
}
Ejemplo n.º 29
0
int writeJobTerminatedEvent()
{
	struct rusage ru;

	JobTerminatedEvent jobterminated;
	jobterminated.normal = false;
	jobterminated.signalNumber = 9;
	jobterminated.returnValue = 4;
	jobterminated.run_remote_rusage = ru;
	jobterminated.total_remote_rusage = ru;
	jobterminated.recvd_bytes = 200000;
	jobterminated.sent_bytes = 400000;
	jobterminated.total_recvd_bytes = 800000;
	jobterminated.total_sent_bytes = 900000;
	jobterminated.setCoreFile( "badfilecore" );
	if ( !logFile.writeEvent(&jobterminated) ) {
	        printf("Complain about bad jobterminated write\n");
			exit(1);
	}
	return(0);
}
Ejemplo n.º 30
0
bool
WriteGridResourceDownEventToUserLog( ClassAd *job_ad )
{
    int cluster, proc;
    std::string contact;
    WriteUserLog *ulog = InitializeUserLog( job_ad );
    if ( ulog == NULL ) {
        // User doesn't want a log
        return true;
    }

    job_ad->LookupInteger( ATTR_CLUSTER_ID, cluster );
    job_ad->LookupInteger( ATTR_PROC_ID, proc );

    dprintf( D_FULLDEBUG,
             "(%d.%d) Writing grid source down record to user logfile\n",
             cluster, proc );

    GridResourceDownEvent event;

    job_ad->LookupString( ATTR_GRID_RESOURCE, contact );
    if ( contact.empty() ) {
        dprintf( D_ALWAYS,
                 "(%d.%d) %s attribute missing in job ad\n",
                 cluster, proc, ATTR_GRID_RESOURCE );
    }
    event.resourceName =  strnewp( contact.c_str() );

    int rc = ulog->writeEvent(&event,job_ad);
    delete ulog;

    if (!rc) {
        dprintf( D_ALWAYS,
                 "(%d.%d) Unable to log ULOG_GRID_RESOURCE_DOWN event\n",
                 cluster, proc );
        return false;
    }

    return true;
}