Exemplo n.º 1
0
bool
WriteJobStatusKnownEventToUserLog( ClassAd *job_ad )
{
    int cluster, proc;
    WriteUserLog *ulog = InitializeUserLog( job_ad );
    if ( ulog == NULL ) {
        // User doesn't want a log
        return true;
    }

    job_ad->LookupInteger( ATTR_CLUSTER_ID, cluster );
    job_ad->LookupInteger( ATTR_PROC_ID, proc );

    dprintf( D_FULLDEBUG,
             "(%d.%d) Writing job status known record to user logfile\n",
             cluster, proc );

    JobStatusKnownEvent event;

    int rc = ulog->writeEvent( &event, job_ad );
    delete ulog;

    if ( !rc ) {
        dprintf( D_ALWAYS,
                 "(%d.%d) Unable to log ULOG_JOB_STATUS_KNOWN event\n",
                 cluster, proc );
        return false;
    }

    return true;
}
Exemplo n.º 2
0
// Initialize a UserLog object for a given job and return a pointer to
// the UserLog object created.  This object can then be used to write
// events and must be deleted when you're done.  This returns NULL if
// the user didn't want a UserLog, so you must check for NULL before
// using the pointer you get back.
WriteUserLog*
InitializeUserLog( ClassAd *job_ad )
{
    int cluster, proc;
    MyString userLogFile, dagmanNodeLog;
    std::string gjid;
    bool use_xml = false;
    std::vector<const char*> logfiles;

    if( getPathToUserLog(job_ad, userLogFile) ) {
        logfiles.push_back(userLogFile.Value());
    }
    if( getPathToUserLog(job_ad, dagmanNodeLog, ATTR_DAGMAN_WORKFLOW_LOG) ) {
        logfiles.push_back(dagmanNodeLog.Value());
    }
    if(logfiles.empty()) {
        return NULL;
    }

    job_ad->LookupInteger( ATTR_CLUSTER_ID, cluster );
    job_ad->LookupInteger( ATTR_PROC_ID, proc );
    job_ad->LookupString( ATTR_GLOBAL_JOB_ID, gjid );
    job_ad->LookupBool( ATTR_ULOG_USE_XML, use_xml );

    WriteUserLog *ULog = new WriteUserLog();
    ULog->initialize(logfiles, cluster, proc, 0, gjid.c_str());
    ULog->setUseXML( use_xml );
    return ULog;
}
Exemplo n.º 3
0
bool
WriteHoldEventToUserLog( ClassAd *job_ad )
{
    int cluster, proc;

    WriteUserLog *ulog = InitializeUserLog( job_ad );
    if ( ulog == NULL ) {
        // User doesn't want a log
        return true;
    }

    job_ad->LookupInteger( ATTR_CLUSTER_ID, cluster );
    job_ad->LookupInteger( ATTR_PROC_ID, proc );

    dprintf( D_FULLDEBUG,
             "(%d.%d) Writing hold record to user logfile\n",
             cluster, proc );

    JobHeldEvent event;

    event.initFromClassAd(job_ad);

    int rc = ulog->writeEvent(&event,job_ad);
    delete ulog;

    if (!rc) {
        dprintf( D_ALWAYS,
                 "(%d.%d) Unable to log ULOG_JOB_HELD event\n",
                 cluster, proc );
        return false;
    }

    return true;
}
Exemplo n.º 4
0
extern "C" void 
initializeUserLog ()
{
	std::string logfilename,dagmanLogName;
	int use_xml;
	std::vector<const char*> logfiles;
	if ( getPathToUserLog(JobAd, logfilename) ) {
		logfiles.push_back(logfilename.c_str());
		dprintf(D_FULLDEBUG, "%s = %s\n", ATTR_ULOG_FILE, logfilename.c_str());
	}
	if ( getPathToUserLog(JobAd, dagmanLogName, ATTR_DAGMAN_WORKFLOW_LOG) ) {
		logfiles.push_back(dagmanLogName.c_str());
		dprintf(D_FULLDEBUG, "%s = %s\n", ATTR_DAGMAN_WORKFLOW_LOG,
			dagmanLogName.c_str());
	}
	if(!logfiles.empty()) {
		if ( !ULog.initialize (Proc->owner, NULL, logfiles,
				Proc->id.cluster, Proc->id.proc, 0)) {
			EXCEPT("Failed to initialize user log!");
		} else {
			ULog.setUseXML(JobAd->LookupBool(ATTR_ULOG_USE_XML, use_xml) && use_xml);
		}
	} else {
		dprintf(D_FULLDEBUG, "no %s found and no %s found\n", ATTR_ULOG_FILE,
			ATTR_DAGMAN_WORKFLOW_LOG);
	}
}
Exemplo n.º 5
0
extern "C" void
log_old_starter_shadow_suspend_event_hack (char *s1, char *s2)
{
	const char *magic_suspend = "TISABH Starter: Suspended user job: ";
	const char *magic_unsuspend = "TISABH Starter: Unsuspended user job.";

	/* This should be bug enough to hold the two string params */
	char buffer[BUFSIZ * 2 + 2];

	int size_suspend, size_unsuspend;

	size_suspend = strlen(magic_suspend);
	size_unsuspend = strlen(magic_unsuspend);
	sprintf(buffer, "%s%s", s1, s2);

	/* depending on if it is a suspend or unsuspend event, do something
		about it. */

	if (strncmp(buffer, magic_suspend, size_suspend) == 0)
	{
		/* matched a suspend event */
		JobSuspendedEvent event;
		sscanf(buffer,"TISABH Starter: Suspended user job: %d",&event.num_pids);

		if (!ULog.writeEvent (&event))
		{
			dprintf (D_ALWAYS, "Unable to log ULOG_JOB_SUSPENDED event\n");
		}

		record_suspension_hack(ULOG_JOB_SUSPENDED);
		return;
	}

	if (strncmp(buffer, magic_unsuspend, size_unsuspend) == 0)
	{
		/* matched an unsuspend event */

		JobUnsuspendedEvent event;

		if (!ULog.writeEvent (&event))
		{
			dprintf (D_ALWAYS, "Unable to log ULOG_JOB_UNSUSPENDED event\n");
		}
		record_suspension_hack(ULOG_JOB_UNSUSPENDED);
		return;
	}

	/* otherwise, do nothing */
}
Exemplo n.º 6
0
extern "C" void
log_except (const char *msg)
{
	check_execute_event();

	// log shadow exception event
	ShadowExceptionEvent event;
	if(!msg) msg = "";
	snprintf(event.message, sizeof(event.message), "%s", msg);
	event.message[sizeof(event.message)-1] = '\0';

	// we want to log the events from the perspective of the
	// user job, so if the shadow *sent* the bytes, then that
	// means the user job *received* the bytes

	event.recvd_bytes = BytesSent;
	event.sent_bytes = BytesRecvd;
	if (syscall_sock) {
		event.recvd_bytes += syscall_sock->get_bytes_sent();
		event.sent_bytes += syscall_sock->get_bytes_recvd();
	}

	if (!ULog.writeEvent (&event))
	{
		dprintf (D_ALWAYS, "Unable to log ULOG_SHADOW_EXCEPTION event\n");
	}
}
Exemplo n.º 7
0
bool writePreSkipEvent( CondorID& condorID, Job* job, const char* DAGNodeName, 
			   const char* directory, const char *logFile )
{
	TmpDir tmpDir;
	MyString	errMsg;
	if ( !tmpDir.Cd2TmpDir( directory, errMsg ) ) {
		debug_printf( DEBUG_QUIET,
				"Could not change to node directory %s: %s\n",
				directory, errMsg.Value() );
		return false;
	}

		// Special HTCondorID for NOOP jobs -- actually indexed by
		// otherwise-unused subprocID.
	condorID._cluster = 0;
	condorID._proc = Job::NOOP_NODE_PROCID;

	condorID._subproc = 1+get_fake_condorID();
		// Increment this value
	set_fake_condorID(condorID._subproc);

	if( job ) {
		job->SetCondorID( condorID );
	}

	WriteUserLog ulog;
	ulog.setEnableGlobalLog( false );
	ulog.setUseXML( false );
	ulog.initialize( std::vector<const char*>(1,logFile), condorID._cluster,
		condorID._proc, condorID._subproc, NULL );

	PreSkipEvent pEvent;
	pEvent.cluster = condorID._cluster;
	pEvent.proc = condorID._proc;
	pEvent.subproc = condorID._subproc;

	MyString pEventNotes("DAG Node: " );
	pEventNotes += DAGNodeName;
		// skipEventLogNotes gets deleted in PreSkipEvent destructor.
	pEvent.skipEventLogNotes = strnewp( pEventNotes.Value() );

	if ( !ulog.writeEvent( &pEvent ) ) {
		EXCEPT( "Error: writing PRESKIP event failed!" );
		return false;
	}
	return true;
}
Exemplo n.º 8
0
// The GlobusSubmitEvent is now deprecated and should be removed at
// some point in the future (6.9?).
bool
WriteGlobusSubmitEventToUserLog( ClassAd *job_ad )
{
    int cluster, proc;
    std::string contact;
    WriteUserLog *ulog = InitializeUserLog( job_ad );
    if ( ulog == NULL ) {
        // User doesn't want a log
        return true;
    }

    job_ad->LookupInteger( ATTR_CLUSTER_ID, cluster );
    job_ad->LookupInteger( ATTR_PROC_ID, proc );

    dprintf( D_FULLDEBUG,
             "(%d.%d) Writing globus submit record to user logfile\n",
             cluster, proc );

    GlobusSubmitEvent event;

    job_ad->LookupString( ATTR_GRID_RESOURCE, contact );
    Tokenize( contact );
    GetNextToken( " ", false );
    event.rmContact = strnewp(GetNextToken( " ", false ));

    job_ad->LookupString( ATTR_GRID_JOB_ID, contact );
    Tokenize( contact );
    if ( strcasecmp( GetNextToken( " ", false ), "gt2" ) == 0 ) {
        GetNextToken( " ", false );
    }
    event.jmContact = strnewp(GetNextToken( " ", false ));

    event.restartableJM = true;

    int rc = ulog->writeEvent(&event,job_ad);
    delete ulog;

    if (!rc) {
        dprintf( D_ALWAYS,
                 "(%d.%d) Unable to log ULOG_GLOBUS_SUBMIT event\n",
                 cluster, proc );
        return false;
    }

    return true;
}
Exemplo n.º 9
0
int writeCheckpointedEvent()
{
	CheckpointedEvent checkpoint;
	if ( !logFile.writeEvent(&checkpoint) ) {
		printf("Complain about bad checkpoint write\n");
		exit(1);
	}
	return(0);
}
Exemplo n.º 10
0
int writeExecutableErrorEvent()
{
	ExecutableErrorEvent executeerror;
	executeerror.errType = CONDOR_EVENT_BAD_LINK;
	if ( !logFile.writeEvent(&executeerror) ) {
		printf("Complain about bad executeerror write\n");
		exit(1);
	}
	return(0);
}
Exemplo n.º 11
0
int writeJobReleasedEvent() 
{
	JobReleasedEvent jobreleasedevent;
	jobreleasedevent.setReason("MessinWithYou");
	if ( !logFile.writeEvent(&jobreleasedevent) ) {
		printf("Complain about bad jobreleasedevent write\n");
		exit(1);
	}
	return(0);
}
Exemplo n.º 12
0
int writeJobUnsuspendedEvent()
{
	JobUnsuspendedEvent jobunsuspendevent;
	//jobunsuspendevent.num_pids = 99;
	if ( !logFile.writeEvent(&jobunsuspendevent) ) {
		printf("Complain about bad jobunsuspendevent write\n");
		exit(1);
	}
	return(0);
}
Exemplo n.º 13
0
int writeJobImageSizeEvent()
{
	JobImageSizeEvent jobimagesizeevent;
	jobimagesizeevent.image_size_kb = 128;
	if ( !logFile.writeEvent(&jobimagesizeevent) ) {
		printf("Complain about bad jobimagesizeevent write\n");
		exit(1);
	}
	return(0);
}
Exemplo n.º 14
0
int writeGlobusResourceDownEvent()
{
	GlobusResourceDownEvent globusresourcedownevent;
	globusresourcedownevent.rmContact = strdup("ResourceDown");;
	if ( !logFile.writeEvent(&globusresourcedownevent) ) {
	        printf("Complain about bad globusresourcedownevent write\n");
			exit(1);
	}
	return(0);
}
Exemplo n.º 15
0
int writeGlobusSubmitFailedEvent()
{
	GlobusSubmitFailedEvent globussubmitfailedevent;
	globussubmitfailedevent.reason = strdup("Cause it could");;
	if ( !logFile.writeEvent(&globussubmitfailedevent) ) {
	        printf("Complain about bad globussubmitfailedevent write\n");
			exit(1);
	}
	return(0);
}
Exemplo n.º 16
0
int writeJobAbortedEvent()
{
	JobAbortedEvent jobabort;
	jobabort.setReason("cause I said so!");
	if ( !logFile.writeEvent(&jobabort) ) {
	        printf("Complain about bad jobabort write\n");
			exit(1);
	}
	return(0);
}
Exemplo n.º 17
0
int writeExecuteEvent()
{
	ExecuteEvent execute;
	execute.setExecuteHost("<128.105.165.12:32779>");
	if ( !logFile.writeEvent(&execute) ) {
		printf("Complain about bad execute write\n");
		exit(1);
	}
	return(0);
}
Exemplo n.º 18
0
// The GlobusResourceDownEvent is now deprecated and should be removed at
// some point in the future (6.9?).
bool
WriteGlobusResourceDownEventToUserLog( ClassAd *job_ad )
{
    int cluster, proc;
    std::string contact;
    WriteUserLog *ulog = InitializeUserLog( job_ad );
    if ( ulog == NULL ) {
        // User doesn't want a log
        return true;
    }

    job_ad->LookupInteger( ATTR_CLUSTER_ID, cluster );
    job_ad->LookupInteger( ATTR_PROC_ID, proc );

    dprintf( D_FULLDEBUG,
             "(%d.%d) Writing globus down record to user logfile\n",
             cluster, proc );

    GlobusResourceDownEvent event;

    job_ad->LookupString( ATTR_GRID_RESOURCE, contact );
    if ( contact.empty() ) {
        // Not a Globus job, don't log the event
        delete ulog;
        return true;
    }
    Tokenize( contact );
    GetNextToken( " ", false );
    event.rmContact =  strnewp(GetNextToken( " ", false ));

    int rc = ulog->writeEvent(&event,job_ad);
    delete ulog;

    if (!rc) {
        dprintf( D_ALWAYS,
                 "(%d.%d) Unable to log ULOG_GLOBUS_RESOURCE_DOWN event\n",
                 cluster, proc );
        return false;
    }

    return true;
}
Exemplo n.º 19
0
bool
WriteEvictEventToUserLog( ClassAd *job_ad )
{
    int cluster, proc;
    WriteUserLog *ulog = InitializeUserLog( job_ad );
    if ( ulog == NULL ) {
        // User doesn't want a log
        return true;
    }

    job_ad->LookupInteger( ATTR_CLUSTER_ID, cluster );
    job_ad->LookupInteger( ATTR_PROC_ID, proc );

    dprintf( D_FULLDEBUG,
             "(%d.%d) Writing evict record to user logfile\n",
             cluster, proc );

    JobEvictedEvent event;
    struct rusage r;
    memset( &r, 0, sizeof( struct rusage ) );

#if !defined(WIN32)
    event.run_local_rusage = r;
    event.run_remote_rusage = r;
#endif /* WIN32 */
    event.sent_bytes = 0;
    event.recvd_bytes = 0;

    event.checkpointed = false;

    int rc = ulog->writeEvent(&event,job_ad);
    delete ulog;

    if (!rc) {
        dprintf( D_ALWAYS,
                 "(%d.%d) Unable to log ULOG_JOB_EVICTED event\n",
                 cluster, proc );
        return false;
    }

    return true;
}
Exemplo n.º 20
0
int writeNodeExecuteEvent()
{
	NodeExecuteEvent nodeexecuteevent;
	nodeexecuteevent.node = 49;
	nodeexecuteevent.setExecuteHost("<128.105.165.12:32779>");
	if ( !logFile.writeEvent(&nodeexecuteevent) ) {
		printf("Complain about bad nodeexecuteevent write\n");
		exit(1);
	}
	return(0);
}
Exemplo n.º 21
0
int writeJobEvictedEvent()
{
	JobEvictedEvent jobevicted;
	jobevicted.setReason("It misbehaved!");
	jobevicted.setCoreFile("corefile");
	if ( !logFile.writeEvent(&jobevicted) ) {
	        printf("Complain about bad jobevicted write\n");
			exit(1);
	}
	return(0);
}
Exemplo n.º 22
0
int writePostScriptTerminatedEvent()
{
	PostScriptTerminatedEvent postscriptterminated;
	postscriptterminated.normal = false;
	postscriptterminated.signalNumber = 9;
	postscriptterminated.returnValue = 4;
	if ( !logFile.writeEvent(&postscriptterminated) ) {
	        printf("Complain about bad postscriptterminated write\n");
			exit(1);
	}
	return(0);
}
Exemplo n.º 23
0
int writeGlobusSubmitEvent()
{
	GlobusSubmitEvent globussubmitevent;
	globussubmitevent.rmContact = strdup("ResourceManager");;
	globussubmitevent.jmContact = strdup("JobManager");;
	globussubmitevent.restartableJM = true;
	if ( !logFile.writeEvent(&globussubmitevent) ) {
	        printf("Complain about bad globussubmitevent write\n");
			exit(1);
	}
	return(0);
}
Exemplo n.º 24
0
int writeJobHeldEvent() 
{
	JobHeldEvent jobheldevent;
	jobheldevent.setReason("CauseWeCan");
	jobheldevent.setReasonCode(404);
	jobheldevent.setReasonSubCode(0xff);
	if ( !logFile.writeEvent(&jobheldevent) ) {
		printf("Complain about bad jobheldevent write\n");
		exit(1);
	}
	return(0);
}
Exemplo n.º 25
0
int writeSubmitEvent()
{
	SubmitEvent submit;
	submit.setSubmitHost("<128.105.165.12:32779>");
	submit.submitEventLogNotes = strdup("DAGMan info");
	submit.submitEventUserNotes = strdup("User info");
	if ( !logFile.writeEvent(&submit) ) {
		printf("Complain about bad submit write\n");
		exit(1);
	}
	return(0);
}
Exemplo n.º 26
0
bool
WriteGridResourceDownEventToUserLog( ClassAd *job_ad )
{
    int cluster, proc;
    std::string contact;
    WriteUserLog *ulog = InitializeUserLog( job_ad );
    if ( ulog == NULL ) {
        // User doesn't want a log
        return true;
    }

    job_ad->LookupInteger( ATTR_CLUSTER_ID, cluster );
    job_ad->LookupInteger( ATTR_PROC_ID, proc );

    dprintf( D_FULLDEBUG,
             "(%d.%d) Writing grid source down record to user logfile\n",
             cluster, proc );

    GridResourceDownEvent event;

    job_ad->LookupString( ATTR_GRID_RESOURCE, contact );
    if ( contact.empty() ) {
        dprintf( D_ALWAYS,
                 "(%d.%d) %s attribute missing in job ad\n",
                 cluster, proc, ATTR_GRID_RESOURCE );
    }
    event.resourceName =  strnewp( contact.c_str() );

    int rc = ulog->writeEvent(&event,job_ad);
    delete ulog;

    if (!rc) {
        dprintf( D_ALWAYS,
                 "(%d.%d) Unable to log ULOG_GRID_RESOURCE_DOWN event\n",
                 cluster, proc );
        return false;
    }

    return true;
}
Exemplo n.º 27
0
int writeRemoteErrorEvent()
{
	RemoteErrorEvent remoteerror;
	remoteerror.setExecuteHost("<128.105.165.12:32779>");
	remoteerror.setDaemonName("<write job log test>");
	remoteerror.setErrorText("this is the write test error string");
	remoteerror.setCriticalError(true);
	if ( !logFile.writeEvent(&remoteerror) ) {
	        printf("Complain about bad remoteerror write\n");
			exit(1);
	}
	return(0);
}
Exemplo n.º 28
0
extern "C" void 
log_image_size (int size)
{
	check_execute_event();

	// log the event
	JobImageSizeEvent event;
	event.image_size_kb = size;
	if (!ULog.writeEvent (&event))
	{
		dprintf (D_ALWAYS, "Unable to log ULOG_IMAGE_SIZE event\n");
	}
}
Exemplo n.º 29
0
extern "C" void 
log_checkpoint (struct rusage *localr, struct rusage *remoter)
{
	check_execute_event();

	CheckpointedEvent event;
	event.run_local_rusage = *localr;
	event.run_remote_rusage = *remoter;
	if (!ULog.writeEvent (&event))
	{	
		dprintf (D_ALWAYS, "Could not log ULOG_CHECKPOINTED event\n");
	}
}
Exemplo n.º 30
0
int writeShadowExceptionEvent() 
{
	ShadowExceptionEvent shadowexceptionevent;
	shadowexceptionevent.sent_bytes = 4096;
	shadowexceptionevent.recvd_bytes = 4096;
	shadowexceptionevent.message[0] = '\0';
	strncat(shadowexceptionevent.message,"shadow message", 15);
	if ( !logFile.writeEvent(&shadowexceptionevent) ) {
		printf("Complain about bad shadowexceptionevent write\n");
		exit(1);
	}
	return(0);
}