void HoldJob( const char* long_reason, const char* short_reason, int reason_code, int reason_subcode ) { char subject[ BUFSIZ ]; FILE *mailer; sprintf( subject, "Condor Job %d.%d put on hold\n", Proc->id.cluster, Proc->id.proc ); if( ! JobAd ) { dprintf( D_ALWAYS, "In HoldJob() w/ NULL JobAd!\n" ); exit( JOB_SHOULD_HOLD ); } ExitReason = JOB_SHOULD_HOLD; if ( !ConnectQ(schedd, SHADOW_QMGMT_TIMEOUT) ) { dprintf( D_ALWAYS, "Failed to connect to schedd!\n" ); } SetAttributeString( Proc->id.cluster, Proc->id.proc, ATTR_HOLD_REASON, short_reason ); SetAttributeInt( Proc->id.cluster, Proc->id.proc, ATTR_HOLD_REASON_CODE, reason_code ); SetAttributeInt( Proc->id.cluster, Proc->id.proc, ATTR_HOLD_REASON_SUBCODE, reason_subcode ); if ( !DisconnectQ(0) ) { dprintf( D_ALWAYS, "Failed to commit updated job queue status!\n" ); } mailer = email_user_open(JobAd, subject); if( ! mailer ) { // User didn't want email, so just exit now with the right // value so the schedd actually puts the job on hold. dprintf( D_ALWAYS, "Job going into Hold state.\n"); dprintf( D_ALWAYS, "********** Shadow Exiting(%d) **********\n", JOB_SHOULD_HOLD); exit( JOB_SHOULD_HOLD ); } fprintf( mailer, "Your condor job " ); if( Proc->args_v1or2[0] ) { ArgList args; MyString args_string; args.AppendArgsV1or2Raw(Proc->args_v1or2[0],NULL); args.GetArgsStringForDisplay(&args_string); fprintf( mailer, "%s %s ", Proc->cmd[0], args_string.Value() ); } else { fprintf( mailer, "%s ", Proc->cmd[0] ); } fprintf( mailer, "\nis being put on hold.\n\n" ); fprintf( mailer, "%s", long_reason ); email_close(mailer); // Now that the user knows why, exit with the right code. dprintf( D_ALWAYS, "Job going into Hold state.\n"); dprintf( D_ALWAYS, "********** Shadow Exiting(%d) **********\n", JOB_SHOULD_HOLD); exit( JOB_SHOULD_HOLD ); }
void publishNotifyEmail( FILE* mailer, char* buf, PROC* proc ) { double rutime, rstime, lutime, lstime; /* remote/local user/sys times */ double trtime, tltime; /* Total remote/local time */ double real_time; float run_time = 0.0; time_t arch_time=0; /* time_t is 8 bytes some archs and 4 bytes on other archs, and this means that doing a (time_t*) cast on & of a 4 byte int makes my life hell. So we fix it by assigning the time we want to a real time_t variable, then using ctime() to convert it to a string */ fprintf(mailer, "Your condor job " ); #if defined(NEW_PROC) if ( proc->args_v1or2[0] ) { ArgList args; MyString args_string; args.AppendArgsV1or2Raw(proc->args_v1or2[0],NULL); args.GetArgsStringForDisplay(&args_string); fprintf(mailer, "%s %s ", proc->cmd[0], args_string.Value() ); } else fprintf(mailer, "%s ", proc->cmd[0] ); #else ArgList args; MyString args_string; args.AppendArgsV1or2(proc->args_v1or2,NULL); args.GetArgsStringForDisplay(&args_string); fprintf(mailer, "%s %s ", proc->cmd, args_string.Value() ); #endif fprintf(mailer, "%s\n\n", buf ); job_report_display_errors( mailer ); char* user = NULL; char* full_user = NULL; if( ! JobAd->LookupString(ATTR_USER, &user) ) { JobAd->LookupString(ATTR_OWNER, &user); } if( user ) { full_user = email_check_domain( user, JobAd ); fprintf( mailer, "\nSubmitted by: %s\n", full_user ); free( full_user ); full_user = NULL; free( user ); user = NULL; } arch_time = proc->q_date; fprintf(mailer, "\nTime:\n"); fprintf(mailer, "\tSubmitted at: %s", ctime(&arch_time)); if( proc->completion_date ) { real_time = proc->completion_date - proc->q_date; arch_time = proc->completion_date; fprintf(mailer, "\tCompleted at: %s\n", ctime(&arch_time)); fprintf(mailer, "\tReal Time: %s\n", d_format_time(real_time)); if (JobAd) { JobAd->LookupFloat(ATTR_JOB_REMOTE_WALL_CLOCK, run_time); } run_time += proc->completion_date - ShadowBDate; fprintf(mailer, "\tRun Time: %s\n", d_format_time(run_time)); if (CommittedTime > 0) { fprintf(mailer, "\tCommitted Time: %s\n", d_format_time(CommittedTime)); } } fprintf( mailer, "\n" ); #if defined(NEW_PROC) rutime = proc->remote_usage[0].ru_utime.tv_sec; rstime = proc->remote_usage[0].ru_stime.tv_sec; trtime = rutime + rstime; #else rutime = proc->remote_usage.ru_utime.tv_sec; rstime = proc->remote_usage.ru_stime.tv_sec; trtime = rutime + rstime; #endif lutime = proc->local_usage.ru_utime.tv_sec; lstime = proc->local_usage.ru_stime.tv_sec; tltime = lutime + lstime; fprintf(mailer, "\tRemote User Time: %s\n", d_format_time(rutime) ); fprintf(mailer, "\tRemote System Time: %s\n", d_format_time(rstime) ); fprintf(mailer, "\tTotal Remote Time: %s\n\n", d_format_time(trtime)); fprintf(mailer, "\tLocal User Time: %s\n", d_format_time(lutime) ); fprintf(mailer, "\tLocal System Time: %s\n", d_format_time(lstime) ); fprintf(mailer, "\tTotal Local Time: %s\n\n", d_format_time(tltime)); if( tltime >= 1.0 ) { fprintf(mailer, "\tLeveraging Factor: %2.1f\n", trtime / tltime); } fprintf(mailer, "\tVirtual Image Size: %d Kilobytes\n", proc->image_size); if (NumCkpts > 0) { fprintf(mailer, "\nCheckpoints written: %d\n", NumCkpts); fprintf(mailer, "Checkpoint restarts: %d\n", NumRestarts); } // TotalBytesSent and TotalBytesRecvd are from the shadow's // perspective, and we want to display the stats from the job's // perspective. Note also that TotalBytesSent and TotalBytesRecvd // don't include our current run, so we need to include the // stats from our syscall_sock (if we have one) and the BytesSent // and BytesRecvd variables. This is ugly and confusing, which // explains why I keep getting it wrong. :-( float network_bytes = TotalBytesSent + BytesSent; if (syscall_sock) { network_bytes += syscall_sock->get_bytes_sent(); } if (network_bytes > 0.0) { fprintf(mailer,"\nNetwork:\n"); fprintf(mailer,"\t%s read\n",metric_units(network_bytes)); network_bytes = TotalBytesRecvd + BytesRecvd; if (syscall_sock) { network_bytes += syscall_sock->get_bytes_recvd(); } fprintf(mailer,"\t%s written\n",metric_units(network_bytes)); } if (JobIsStandard()) { job_report_display_file_info( mailer, (int) run_time ); job_report_display_calls( mailer ); } email_custom_attributes( mailer, JobAd ); }