int64_t getIntState() const { int lsfr = lsb_openjobinfo(lsfid,0/*jobname*/,0/*user*/,0/*queue*/,0/*host*/,ALL_JOB); if ( lsfr < 0 ) { return 0; } else if ( lsfr == 0 ) { lsb_closejobinfo(); return 0; } else { jobInfoEnt const * jie = lsb_readjobinfo(&lsfr); if ( ! jie ) { lsb_closejobinfo(); return 0; } int64_t const status = jie->status; lsb_closejobinfo(); return status; } }
static void lsfdrmaa_job_update_status( fsd_job_t *self ) { lsfdrmaa_job_t *lsf_self = (lsfdrmaa_job_t*)self; struct jobInfoEnt *volatile job_info = NULL; bool job_in_queue; fsd_log_enter(( "({job_id=%s, time_delta=%d})", self->job_id, time(NULL) - self->submit_time )); do { fsd_mutex_lock( &self->session->drm_connection_mutex ); TRY { int n_records; int more; char * username = (lsf_self->int_job_id>0)?"all":NULL; fsd_log_debug(( "drm connection locked" )); n_records = lsb_openjobinfo( lsf_self->int_job_id, NULL, username, NULL, NULL, ALL_JOB ); fsd_log_debug(( "lsb_openjobinfo( %d[%d], NULL, %s, NULL, NULL, ALL_JOB ) =%d", LSB_ARRAY_JOBID(lsf_self->int_job_id), LSB_ARRAY_IDX(lsf_self->int_job_id), username?username:"******", n_records )); job_in_queue = n_records > 0; if(!job_in_queue){ if(!(self->flags & FSD_JOB_CURRENT_SESSION)){ fsd_exc_raise_code( FSD_DRMAA_ERRNO_INVALID_JOB ); }else{/*handling missing job*/ self->on_missing(self); } }else{ job_info = lsb_readjobinfo( &more ); fsd_log_debug(( "lsb_readjobinfo(...) =%p: more=%d", (void*)job_info, more )); if( job_info == NULL ) fsd_exc_raise_lsf( "lsb_readjobinfo" ); lsf_self->read_job_info( self, job_info ); } } FINALLY { /* lsfdrmaa_free_job_info( job_info ); */ lsb_closejobinfo(); fsd_log_debug(( "lsb_closejobinfo()" )); fsd_mutex_unlock( &self->session->drm_connection_mutex ); } END_TRY } while( !job_in_queue ); fsd_log_return(( "" )); }
int main(int argc, char* argv[]) { int options = PEND_JOB | RUN_JOB | UGRP_INFO; char *user = ALL_USERS; /* match jobs for all users */ struct jobInfoEnt *job; FILE* fp; int more; if (lsb_init(argv[0]) < 0) { lsb_perror("lsb_init"); exit(-1); } if (lsb_openjobinfo(0, NULL, user, NULL, NULL, options) < 0) { lsb_perror("lsb_openjobinfo"); exit(-1); } #if 0 printf("All pending/running jobs submitted by all users:\n"); printf(" JOBID USER STAT QUEUE FROM_HOST EXEC_HOST USER_GROUP JOB_NAME SUBMIT_TIME\n"); #endif fp = fopen("/tmp/group_info.txt", "w"); if (fp == NULL) { perror("Error while opening the file.\n"); exit(EXIT_FAILURE); } for (;;) { job = lsb_readjobinfo(&more); if (job == NULL) { lsb_perror("lsb_readjobinfo"); exit(-1); } if ((job->submit.options & SUB_USER_GROUP)) { /* display job information */ char *host = ""; if (job->status == 4) host = job->exHosts[0]; fprintf(fp, "%ld %s %d %s\n", job->jobId, job->user, job->status, job->submit.userGroup); } if (! more) break; } fclose(fp); lsb_closejobinfo(); exit(0); }
int islsfuser(char *user,char *hostname) { struct clusterInfo *cluster; int i,more; struct jobInfoEnt *job; setenv("LSF_ENVDIR","/lsf/conf",1); setenv("LSF_LIBDIR","/lsf/7.0/linux2.6-glibc2.3-x86_64/lib",1); setenv("LSF_BINDIR","/lsf/7.0/linux2.6-glibc2.3-x86_64/bin",1); setenv("LSF_SERVERDIR","/lsf/7.0/linux2.6-glibc2.3-x86_64/etc",1); syslog(LOG_AUTHPRIV|LOG_DEBUG,"pamlsfauth checking lsf"); /* Open an LSF session. If we can't see the lsf shared directory, this will fail.\ * We're going to fail open here rather than closed. */ if(lsb_init(NULL)<0) { syslog(LOG_AUTHPRIV|LOG_ERR,"pamlsfauth LSF connection failed"); return JOBUSER; } #ifndef WITHOUTADMINS # Optionally exclude cluster admins from login access. For the case where # pam_access is being used. This aligns with the torque approach. # /* allow anyone who is a cluster administrator (listed in the * 'Administrators' line in ${LSFBASE}/conf/lsf.cluster.${CLUSTERNAME} * Again, if we're unable to retrieve the information from LSF we're * going to fail open rather than closed. We're not doing pam * authenticate (is this a real user), only pam account (is it ok * for this user to login right now), so failing open * isn't a security problem */ cluster=ls_clusterinfo(NULL,NULL,NULL,0,0); if (cluster!=NULL) { for (i=0; i<cluster->nAdmins; i++) { syslog(LOG_AUTHPRIV|LOG_DEBUG,"pamlsfauth comparing cluster admin %s",cluster->admins[i]); if (strcmp(user,cluster->admins[i])==0) { syslog(LOG_AUTHPRIV|LOG_NOTICE,"pamlsfauth allowing access for admin (%s)",cluster->admins[i]); return JOBUSER; } } } else { syslog(LOG_AUTHPRIV|LOG_ERR,"pamlsfauth unable to retrieve cluster info"); return JOBUSER; } #endif -- WITHOUTADMINS # /* retrieve list of jobs for user hostname combination, null result * means we can fail the attempt immediately * */ if (lsb_openjobinfo(0,NULL,user,NULL,hostname,CUR_JOB)<0) { syslog(LOG_AUTHPRIV|LOG_NOTICE,"pamlsfauth denying access for %s. No current job on host (%s)",user,hostname); return NOTJOBUSER; } /* we already know tha the user is scheduled onto the node. However, * we're going to iterate through the results so that we can log * a specific job number that enables access. * Again, we're going * to err on the side of not disallowing an authenticated user if we * lose communication with LSF. */ for (;;) { job=lsb_readjobinfo(&more); if (job == NULL) { syslog(LOG_AUTHPRIV|LOG_ERR,"pamlsfauth unable to get job info. allowing access.",job->jobId,hostname,user); return JOBUSER; } if (strcmp(user,job->user)==0) { syslog(LOG_AUTHPRIV|LOG_NOTICE,"pamlsfauth matched running job (%i) on %s for user %s. allowing access.",job->jobId,hostname,user); return JOBUSER; } if (!more) break; } lsb_closejobinfo(); syslog(LOG_AUTHPRIV|LOG_NOTICE,"pamlsfauth denying access for %s. No current job on host (%s)",user,hostname); return NOTJOBUSER; }
int main (int argc, char **argv, char **environ) { char *queue = NULL, *host = NULL, *jobName = NULL, *user = NULL; LS_LONG_INT jobId; int options; struct jobInfoEnt *jInfo; char *outFile; char fflag = FALSE; int cc; int rc; rc = _i18n_init (I18N_CAT_MIN); if (lsb_init (argv[0]) < 0) { lsb_perror ("lsb_init"); exit (-1); } while ((cc = getopt (argc, argv, "Vhfq:m:J:")) != EOF) { switch (cc) { case 'q': if (queue || host || jobName) oneOf (argv[0]); queue = optarg; break; case 'm': if (queue || host || jobName) oneOf (argv[0]); host = optarg; break; case 'J': if (queue || host || jobName) oneOf (argv[0]); jobName = optarg; break; case 'V': fputs (_LS_VERSION_, stderr); exit (0); case 'f': fflag = TRUE; break; case 'h': default: usage (argv[0]); } } jobId = 0; options = LAST_JOB; if (argc >= optind + 1) { if (queue || host || jobName) { oneOf (argv[0]); } else if ((argc > 2 && !fflag) || (argc > 3 && fflag)) usage (argv[0]); if (getOneJobId (argv[optind], &jobId, 0)) { usage (argv[0]); } options = 0; } if (lsb_openjobinfo (jobId, jobName, NULL, queue, host, options) < 0 || (jInfo = lsb_readjobinfo (NULL)) == NULL) { if (jobId != 0 || jobName != NULL) { user = ALL_USERS; if (lsb_openjobinfo (jobId, jobName, user, queue, host, options) < 0 || (jInfo = lsb_readjobinfo (NULL)) == NULL) { jobInfoErr (jobId, jobName, NULL, queue, host, options); exit (-1); } } else { jobInfoErr (jobId, jobName, NULL, queue, host, options); exit (-1); } } lsb_closejobinfo (); if (jobId && jInfo->jobId != jobId) { lsberrno = LSBE_JOB_ARRAY; lsb_perror ("bpeek"); exit (-1); } if ((jInfo->submit.options & SUB_INTERACTIVE) && !(jInfo->submit.options & (SUB_OUT_FILE | SUB_ERR_FILE))) { fprintf (stderr, _i18n_msg_get (ls_catd, NL_SETN, 2456, "Job <%s> : Cannot bpeek an interactive job.\n"), /* catgets 2456 */ lsb_jobid2str (jInfo->jobId)); exit (-1); } if (IS_PEND (jInfo->status) || jInfo->execUsername[0] == '\0') { fprintf (stderr, _i18n_msg_get (ls_catd, NL_SETN, 2454, "Job <%s> : Not yet started.\n"), /* catgets 2454 */ lsb_jobid2str (jInfo->jobId)); exit (-1); } if (IS_FINISH (jInfo->status)) { fprintf (stderr, _i18n_msg_get (ls_catd, NL_SETN, 2455, "Job <%s> : Already finished.\n"), /* catgets 2455 */ lsb_jobid2str (jInfo->jobId)); exit (-1); } if ((outFile = lsb_peekjob (jInfo->jobId)) == NULL) { char msg[50]; sprintf (msg, "%s <%s>", I18N_Job, lsb_jobid2str (jInfo->jobId)); lsb_perror (msg); exit (-1); } displayOutput (outFile, jInfo, fflag, environ); _i18n_end (ls_catd); exit (0); }
/* Main routine. */ int main(int argc, char **argv) { struct numq nq; /* Struct for passing information between routines */ int c; /* Used for holding information passed in from the command line */ char *user = "******", *queue=NULL, *hosts=NULL; /* Pointers to the information we are querying */ int i, def; /* counters */ /* Initialize Default Flag and structs*/ def=1; nq.cores=0; nq.nodes=0; /* Read in and interpret command line options */ while (1) { static struct option long_options[] = { /* These options set a flag. */ {"help", no_argument,0, 'h'}, {"q", required_argument, 0, 'q'}, {"u", required_argument, 0, 'u'}, {"n", required_argument, 0, 'n'}, {0, 0, 0, 0} }; /* Get information from command line */ int option_index = 0; c = getopt_long (argc, argv, "u:q:n:help",long_options, &option_index); /* Detect the end of the options. */ if (c == -1) break; /* Check what options we are using */ /* Default Flag set to false, we are actually passing options so the default is not used */ def=0; switch (c) { case 0: /* If this option set a flag, do nothing else now. */ if (long_options[option_index].flag != 0) break; printf ("option %s", long_options[option_index].name); if (optarg) printf (" with arg %s", optarg); printf ("\n"); break; case 'q': queue = optarg; break; case 'u': user = optarg; break; case 'n': hosts = optarg; break; case '?': /* This section handles when getopt_long is confused about what you are passing it */ /* getopt_long already printed an error message. */ exit(-1); case 'h': /* Displays this help page then exits */ printf("Welcome to the Help Section.\n"); printf("Showq provides an overview of the cluster and LSF queues in a similar way to showq in PBS.Torque.\n"); printf("By default showq will show you your running, pending and suspended jobs plus an overview of the cluster.\n"); printf("Depending on cluster activity this command could take a few minutes to complete.\n"); printf("Each job has several fields which are listed below.\n"); printf("\n"); printf("JOBID The LSF job id.\n"); printf("\n"); printf("USER The name of the user.\n"); printf("\n"); printf("STAT The current job state. RUN if it is running, PEND if it is pending,\n"); printf(" PSUSP if suspended by the owner or admin,\n"); printf(" SSUSP if suspended due to host being overloaded or queue run window closure.\n"); printf("\n"); printf("QUEUE The LSF queue the job was submitted from.\n"); printf("\n"); printf("CORES/NODES This lists the number of cores used by the process followed by the number of nodes after the /.\n"); printf(" Additionally if the job is running in Exclusive mode a X appears next to the node count.\n"); printf("\n"); printf("TIME REMAINING The amount of time remaining on this run. This is based off of the queue run time limit\n"); printf(" or if the user defines a run time using BSUB -W it will use that. Jobs that overrun\n"); printf(" the a time limit show up with negative time equal to the amount they have overrun.\n"); printf(" Jobs from queues with no time limit and no user defined limit show up as negative as well.\n"); printf(" The value in that case is just the negative value of the time the job has run for.\n"); printf("\n"); printf("SUBMIT TIME The time at which the job was submitted to the queue.\n"); printf("\n"); printf("START TIME The time at which the job started running.\n"); printf("\n"); printf("Additional Usage options are as below.\n"); printf("\n"); printf("-help Gets you to this convenient help page.\n"); printf("\n"); printf("-q queue_name Shows you what is going on in that queue currently.\n"); printf("\n"); printf("-u user_name Shows the jobs current associated with that user.\n"); printf(" By default this is set to all if -n or -q is used other wise it is set to the current user.\n"); printf(" If -u is used in tandem with -q it will show only the jobs running on that queue for that user.\n"); printf("\n"); printf("-n host_name Shows what is going on for this host or group of hosts. Does not show pending jobs for these hosts.\n"); exit(-1); default: abort (); } } /* Sets what will happen when the default flag is set */ /* In this case it will simply return what jobs belong to the user and the node summary */ if (def) user = NULL; /* initialize LSBLIB and get the configuration environment */ if (lsb_init(argv[0]) < 0) { lsb_perror("simbjobs: lsb_init() failed"); exit(-1); } /* Query about the following information */ printf("ACTIVE JOBS-------------\n"); nq = print_section(RUN_JOB, queue, user, hosts, nq.cores, nq.nodes); printf("\nSUSPENDED JOBS-------------\n"); nq = print_section(SUSP_JOB, queue, user, hosts, nq.cores, nq.nodes); printf("\n"); print_summary(queue, user, hosts, nq.cores, nq.nodes); printf("\n"); printf("PENDING JOBS-------------\n"); nq = print_section(PEND_JOB, queue, user, hosts, nq.cores, nq.nodes); /* when finished to display the job info, close the connection to the mbatchd */ lsb_closejobinfo(); exit(0); }
int getJobIds (int argc, char **argv, char *jobName, char *user, char *queue, char *host, LS_LONG_INT ** jobIds0, int extOption) { int numJobIds = 0; int options = LAST_JOB; struct jobInfoHead *jobInfoHead; if (extOption) { options = extOption; } numJobIds = getSpecJobIds (argc, argv, jobIds0, &options); if (extOption & ZOMBIE_JOB) { options |= ZOMBIE_JOB; } if (numJobIds != 0) return (numJobIds); if (strstr (argv[0], "bmig")) { options &= ~CUR_JOB; options |= (RUN_JOB | SUSP_JOB); } if (strstr (argv[0], "brequeue")) { options = 0; options = extOption; } options |= JOBID_ONLY; if (options & DONE_JOB) { options &= ~JOBID_ONLY; } TIMEIT (0, (jobInfoHead = lsb_openjobinfo_a ((LS_LONG_INT) 0, jobName, user, queue, host, options)), "lsb_openjobinfo"); if (jobInfoHead == NULL) { jobInfoErr (0, jobName, user, queue, host, options); exit (-1); } TIMEIT (0, lsb_closejobinfo (), "lsb_closejobinfo"); *jobIds0 = jobInfoHead->jobIds; return (jobInfoHead->numJobs); }
SEXP lsf_job_status(SEXP sexp_jobid) { int jobid, numrec; struct jobInfoEnt *jInfo; SEXP status; jobid = INTEGER(sexp_jobid)[0]; if ((numrec = lsb_openjobinfo(jobid, NULL, NULL, NULL, NULL, ALL_JOB)) < 0) { Rprintf("lsf_job_status: lsb_openjobinfo: %s\n", lsb_sysmsg()); return R_NilValue; } jInfo = lsb_readjobinfo(&numrec); if (jInfo == NULL) { Rprintf("lsf_job_status: lsb_readjobinfo: %s\n", lsb_sysmsg()); lsb_closejobinfo(); return R_NilValue; } lsb_closejobinfo(); PROTECT(status = allocVector(STRSXP, 1)); switch(jInfo->status) { case JOB_STAT_NULL: SET_STRING_ELT(status, 0, mkChar("NULL")); break; case JOB_STAT_PEND: SET_STRING_ELT(status, 0, mkChar("PEND")); break; case JOB_STAT_PSUSP: SET_STRING_ELT(status, 0, mkChar("PSUSP")); break; case JOB_STAT_RUN: SET_STRING_ELT(status, 0, mkChar("RUN")); break; case JOB_STAT_RUN|JOB_STAT_WAIT: SET_STRING_ELT(status, 0, mkChar("WAIT")); break; case JOB_STAT_SSUSP: SET_STRING_ELT(status, 0, mkChar("SSUSP")); break; case JOB_STAT_USUSP: SET_STRING_ELT(status, 0, mkChar("USUSP")); break; case JOB_STAT_EXIT: if (jInfo->reasons & EXIT_ZOMBIE) SET_STRING_ELT(status, 0, mkChar("ZOMBI")); else SET_STRING_ELT(status, 0, mkChar("EXIT")); break; case JOB_STAT_DONE: case JOB_STAT_DONE|JOB_STAT_PDONE: case JOB_STAT_DONE|JOB_STAT_PERR: case JOB_STAT_DONE|JOB_STAT_WAIT: SET_STRING_ELT(status, 0, mkChar("DONE")); break; case JOB_STAT_UNKWN: SET_STRING_ELT(status, 0, mkChar("UNKWN")); break; default: Rprintf("lsf_job_status: job state <%d> is unknown.\n", jInfo->status); SET_STRING_ELT(status, 0, mkChar("ERROR")); break; } UNPROTECT(1); return status; }