/** * Terminates a job. * @param jobid is the PID assigned by the queue * @return 0 if correct, non-zero if error */ int rm_terminateJob(struct soap* s, char* jobid, char* user) { int connectionIdentifier = pbs_connect(server); if (connectionIdentifier < 1 ) return BESE_BACKEND; int rc = pbs_deljob(connectionIdentifier, jobid, NULL); updateErrorNo(); pbs_disconnect(connectionIdentifier); return BESE_OK; }
int drmaa_control(const char *job_id, int action, char *errmsg, size_t errlen) { drmaa_session_t *c = NULL; int rc = 0; DEBUG(("-> drmaa_control(job_id=%s,action=%d)", job_id, action)); GET_DRMAA_SESSION(c); pthread_mutex_lock(&c->conn_mutex); switch (action) { /* * We cannot know whether we did suspend job * in other way than remembering this inside DRMAA session. */ case DRMAA_CONTROL_SUSPEND: drmaa_find_job(c, job_id, NULL, DRMAA_JOB_SUSPENDED); rc = pbs_sigjob(c->pbs_conn, (char*)job_id, "SIGSTOP", NULL); break; case DRMAA_CONTROL_RESUME: drmaa_find_job(c, job_id, NULL, DRMAA_JOB_RESUMED); rc = pbs_sigjob(c->pbs_conn, (char*)job_id, "SIGCONT", NULL); break; case DRMAA_CONTROL_HOLD: rc = pbs_holdjob(c->pbs_conn, (char*)job_id, USER_HOLD, NULL); break; case DRMAA_CONTROL_RELEASE: rc = pbs_rlsjob(c->pbs_conn, (char*)job_id, USER_HOLD, NULL); break; case DRMAA_CONTROL_TERMINATE: rc = pbs_deljob(c->pbs_conn, (char*)job_id, NULL); /* deldelay=N -- delay between SIGTERM and SIGKILL (default 0)*/ break; } pthread_mutex_unlock(&c->conn_mutex); RELEASE_DRMAA_SESSION(c); DEBUG(("<- drmaa_control() =%d", rc)); if (rc) RAISE_PBS(); else return DRMAA_ERRNO_SUCCESS; }
int scheduling_cycle( int sd) { server_info *sinfo; /* ptr to the server/queue/job/node info */ job_info *jinfo; /* ptr to the job to see if it can run */ int ret = SUCCESS; /* return code from is_ok_to_run_job() */ char log_msg[MAX_LOG_SIZE]; /* used to log an message about job */ char comment[MAX_COMMENT_SIZE]; /* used to update comment of job */ sched_log(PBSEVENT_DEBUG2, PBS_EVENTCLASS_REQUEST, "", "Entering Schedule"); update_cycle_status(); /* create the server / queue / job / node structures */ if ((sinfo = query_server(sd)) == NULL) { fprintf(stderr, "Problem with creating server data strucutre\n"); return(0); } if (init_scheduling_cycle(sinfo) == 0) { sched_log( PBSEVENT_DEBUG, PBS_EVENTCLASS_SERVER, sinfo -> name, "init_scheduling_cycle failed."); free_server(sinfo, 1); return(0); } /* main scheduling loop */ while ((jinfo = next_job(sinfo, 0))) { sched_log( PBSEVENT_DEBUG2, PBS_EVENTCLASS_JOB, jinfo->name, "Considering job to run"); if ((ret = is_ok_to_run_job(sd, sinfo, jinfo->queue, jinfo)) == SUCCESS) { run_update_job(sd, sinfo, jinfo->queue, jinfo); } else { if (jinfo->can_never_run) { sched_log( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, jinfo->name, "Job Deleted because it would never run"); pbs_deljob(sd, jinfo->name, "Job could never run"); } jinfo->can_not_run = 1; if (translate_job_fail_code(ret, comment, log_msg)) { /* if the comment doesn't get changed, its because it hasn't changed. * if the reason for the job has not changed, we do not need to log it */ if (update_job_comment(sd, jinfo, comment) == 0) { sched_log( PBSEVENT_SCHED, PBS_EVENTCLASS_JOB, jinfo->name, log_msg); } } if ((ret != NOT_QUEUED) && cstat.strict_fifo) { update_jobs_cant_run( sd, jinfo->queue->jobs, jinfo, COMMENT_STRICT_FIFO, START_AFTER_JOB); } } } if (cstat.fair_share) update_last_running(sinfo); free_server(sinfo, 1); /* free server and queues and jobs */ sched_log(PBSEVENT_DEBUG2, PBS_EVENTCLASS_REQUEST, "", "Leaving schedule\n"); return 0; }
/* if a user requested deleting 'all' then this routine will get the list of * jobs from the server and try to delete all jobs that are not in a * 'C'omplete or 'E'xiting state */ void qdel_all( char *extend) /* I */ { char *jobid; char *state = 0; int connect; int stat; int retries; struct batch_status *p_status; struct batch_status *p; struct attropl *p_atropl = 0; struct attrl *a; connect = cnt2server('\0'); if (connect <= 0) { fprintf(stderr, "qdel: cannot connect to default server (errno=%d) %s\n", pbs_errno, pbs_strerror(pbs_errno)); return; } p_status = pbs_selstat(connect, p_atropl, NULL); if (p_status == NULL) { fprintf(stderr, "qdel: cannot find any jobs to delete\n"); } for (p = p_status;p != NULL;p = p->next) { jobid = p->name; a = p->attribs; while (a != NULL) { if ((a->name != NULL) && (!strcmp(a->name, ATTR_state))) { state = a->value; break; } a = a->next; } /* * Don't bother deleting jobs that are 'C'omplete or 'E'xiting * Unless we are Purging, then try 'C'ompleted jobs as well */ if (((strstr(extend,DELPURGE) != NULL) && (*state != 'E')) || ((*state != 'E') && (*state != 'C'))) { retries = 0; redo: stat = pbs_deljob(connect, jobid, extend); /* * if MOM is too slow to respond, we will retry a few times before * before giving up */ if (stat && (pbs_errno == PBSE_NORELYMOM) && (retries < 3)) { sleep(1); retries++; goto redo; } if (stat && (pbs_errno != PBSE_UNKJOBID) && (pbs_errno != PBSE_BADSTATE)) { printf("Deletion Error: %d (%s)\n", pbs_errno, pbs_strerror(pbs_errno)); prt_job_err("qdel", connect, jobid); } } } pbs_disconnect(connect); return; }
int main( int argc, char **argv) { int c; int errflg = 0; int any_failed = 0; int purge_completed = FALSE; int located = FALSE; char *pc; char job_id[PBS_MAXCLTJOBID]; /* from the command line */ char job_id_out[PBS_MAXCLTJOBID]; char server_out[MAXSERVERNAME]; char rmt_server[MAXSERVERNAME]; char extend[1024]; #define GETOPT_ARGS "acm:pW:t:" extend[0] = '\0'; while ((c = getopt(argc, argv, GETOPT_ARGS)) != EOF) { switch (c) { case 'a': /* Async job deletion */ if (extend[0] != '\0') { errflg++; break; } strcpy(extend, DELASYNC); break; case 'c': if (extend[0] != '\0') { errflg++; break; } snprintf(extend,sizeof(extend),"%s%ld",PURGECOMP,(long)(time(NULL))); purge_completed = TRUE; break; case 'm': /* add delete message */ if (extend[0] != '\0') { /* extension option already specified */ errflg++; break; } strncpy(extend, optarg, sizeof(extend)); break; case 'p': if (extend[0] != '\0') { errflg++; break; } strcpy(extend, DELPURGE); strcat(extend, "1"); break; case 't': if (extend[0] != '\0') { errflg++; break; } pc = optarg; if (strlen(pc) == 0) { fprintf(stderr, "qdel: illegal -t value (array range cannot be zero length)\n"); errflg++; break; } snprintf(extend,sizeof(extend),"%s%s", ARRAY_RANGE, pc); break; case 'W': if (extend[0] != '\0') { errflg++; break; } pc = optarg; if (strlen(pc) == 0) { fprintf(stderr, "qdel: illegal -W value\n"); errflg++; break; } while (*pc != '\0') { if (!isdigit(*pc)) { fprintf(stderr, "qdel: illegal -W value\n"); errflg++; break; } pc++; } strcpy(extend, DELDELAY); strcat(extend, optarg); break; default: errflg++; break; } } /* END while (c) */ if (purge_completed) { strcpy(server_out,pbs_default()); goto cnt; } if ((errflg != 0) || (optind >= argc)) { static char usage[] = "usage: qdel [{ -a | -c | -p | -t | -W delay | -m message}] [<JOBID>[<JOBID>]|'all'|'ALL']...\n"; fprintf(stderr, "%s", usage); fprintf(stderr, " -a -c, -m, -p, -t, and -W are mutually exclusive\n"); exit(2); } for (;optind < argc;optind++) { int connect; int stat = 0; /* check to see if user specified 'all' to delete all jobs */ strcpy(job_id, argv[optind]); if ((strcmp("all", job_id) == 0) || (strcmp("ALL", job_id) == 0)) { qdel_all(extend); continue; } else if (get_server(job_id, job_id_out, server_out)) { fprintf(stderr, "qdel: illegally formed job identifier: %s\n", job_id); any_failed = 1; continue; } cnt: connect = cnt2server(server_out); if (connect <= 0) { fprintf(stderr, "qdel: cannot connect to server %s (errno=%d) %s\n", pbs_server, pbs_errno, pbs_strerror(pbs_errno)); any_failed = pbs_errno; continue; } stat = pbs_deljob(connect, job_id_out, extend); if (stat && (pbs_errno != PBSE_UNKJOBID)) { prt_job_err("qdel", connect, job_id_out); any_failed = pbs_errno; } else if (stat && (pbs_errno == PBSE_UNKJOBID) && !located) { located = TRUE; if (locate_job(job_id_out, server_out, rmt_server)) { pbs_disconnect(connect); strcpy(server_out, rmt_server); goto cnt; } prt_job_err("qdel", connect, job_id_out); any_failed = pbs_errno; } pbs_disconnect(connect); } exit(any_failed); } /* END main() */
int schd_reject_job(Job *job, char *reason) { char *id = "schd_reject_job"; static char *message = NULL; int rc = 0; if (message == NULL) { if ((message = (char *)malloc(MSG_BUFFER_SIZE)) == NULL) { (void)sprintf(log_buffer, "cannot malloc %d bytes\n", MSG_BUFFER_SIZE); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); return (-1); } } DBPRT(( "*************************************************************************\n")); if (schd_TEST_ONLY) { DBPRT(("JOB %s WOULD HAVE BEEN DELETED!!!\n", job->jobid)); DBPRT(("Message: %s\n", reason)); } else { (void)sprintf(message, "\n" "PBS job '%s' was rejected by all execution queues.\n" "\n" "The reason given for this action was :\n" "\n" " %s\n" "\n" "Please correct the problem and resubmit your job, or contact the PBS\n" "administrator for assistance.\n" "\n" "Thank you.\n" "\n", job->jobid, reason); /* * Ask PBS to delete the job from the queue, which should deliver the * message to the user. */ rc = pbs_deljob(connector, job->jobid, message); if (rc) { (void)sprintf(log_buffer, "pbs_deljob failed: error %d", rc); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); return 1; } /* * Delete this job from the queue's list (move to a NULL queue) */ schd_move_job_to(job, NULL); DBPRT(("JOB %s DELETED!!!\n", job->jobid)); DBPRT(("Message: %s\n", reason)); } DBPRT(( "*************************************************************************\n")); return 0; }