int64_t getIntState() const { int lsfr = lsb_openjobinfo(lsfid,0/*jobname*/,0/*user*/,0/*queue*/,0/*host*/,ALL_JOB); if ( lsfr < 0 ) { return 0; } else if ( lsfr == 0 ) { lsb_closejobinfo(); return 0; } else { jobInfoEnt const * jie = lsb_readjobinfo(&lsfr); if ( ! jie ) { lsb_closejobinfo(); return 0; } int64_t const status = jie->status; lsb_closejobinfo(); return status; } }
static void lsfdrmaa_job_update_status( fsd_job_t *self ) { lsfdrmaa_job_t *lsf_self = (lsfdrmaa_job_t*)self; struct jobInfoEnt *volatile job_info = NULL; bool job_in_queue; fsd_log_enter(( "({job_id=%s, time_delta=%d})", self->job_id, time(NULL) - self->submit_time )); do { fsd_mutex_lock( &self->session->drm_connection_mutex ); TRY { int n_records; int more; char * username = (lsf_self->int_job_id>0)?"all":NULL; fsd_log_debug(( "drm connection locked" )); n_records = lsb_openjobinfo( lsf_self->int_job_id, NULL, username, NULL, NULL, ALL_JOB ); fsd_log_debug(( "lsb_openjobinfo( %d[%d], NULL, %s, NULL, NULL, ALL_JOB ) =%d", LSB_ARRAY_JOBID(lsf_self->int_job_id), LSB_ARRAY_IDX(lsf_self->int_job_id), username?username:"******", n_records )); job_in_queue = n_records > 0; if(!job_in_queue){ if(!(self->flags & FSD_JOB_CURRENT_SESSION)){ fsd_exc_raise_code( FSD_DRMAA_ERRNO_INVALID_JOB ); }else{/*handling missing job*/ self->on_missing(self); } }else{ job_info = lsb_readjobinfo( &more ); fsd_log_debug(( "lsb_readjobinfo(...) =%p: more=%d", (void*)job_info, more )); if( job_info == NULL ) fsd_exc_raise_lsf( "lsb_readjobinfo" ); lsf_self->read_job_info( self, job_info ); } } FINALLY { /* lsfdrmaa_free_job_info( job_info ); */ lsb_closejobinfo(); fsd_log_debug(( "lsb_closejobinfo()" )); fsd_mutex_unlock( &self->session->drm_connection_mutex ); } END_TRY } while( !job_in_queue ); fsd_log_return(( "" )); }
int main(int argc, char* argv[]) { int options = PEND_JOB | RUN_JOB | UGRP_INFO; char *user = ALL_USERS; /* match jobs for all users */ struct jobInfoEnt *job; FILE* fp; int more; if (lsb_init(argv[0]) < 0) { lsb_perror("lsb_init"); exit(-1); } if (lsb_openjobinfo(0, NULL, user, NULL, NULL, options) < 0) { lsb_perror("lsb_openjobinfo"); exit(-1); } #if 0 printf("All pending/running jobs submitted by all users:\n"); printf(" JOBID USER STAT QUEUE FROM_HOST EXEC_HOST USER_GROUP JOB_NAME SUBMIT_TIME\n"); #endif fp = fopen("/tmp/group_info.txt", "w"); if (fp == NULL) { perror("Error while opening the file.\n"); exit(EXIT_FAILURE); } for (;;) { job = lsb_readjobinfo(&more); if (job == NULL) { lsb_perror("lsb_readjobinfo"); exit(-1); } if ((job->submit.options & SUB_USER_GROUP)) { /* display job information */ char *host = ""; if (job->status == 4) host = job->exHosts[0]; fprintf(fp, "%ld %s %d %s\n", job->jobId, job->user, job->status, job->submit.userGroup); } if (! more) break; } fclose(fp); lsb_closejobinfo(); exit(0); }
int islsfuser(char *user,char *hostname) { struct clusterInfo *cluster; int i,more; struct jobInfoEnt *job; setenv("LSF_ENVDIR","/lsf/conf",1); setenv("LSF_LIBDIR","/lsf/7.0/linux2.6-glibc2.3-x86_64/lib",1); setenv("LSF_BINDIR","/lsf/7.0/linux2.6-glibc2.3-x86_64/bin",1); setenv("LSF_SERVERDIR","/lsf/7.0/linux2.6-glibc2.3-x86_64/etc",1); syslog(LOG_AUTHPRIV|LOG_DEBUG,"pamlsfauth checking lsf"); /* Open an LSF session. If we can't see the lsf shared directory, this will fail.\ * We're going to fail open here rather than closed. */ if(lsb_init(NULL)<0) { syslog(LOG_AUTHPRIV|LOG_ERR,"pamlsfauth LSF connection failed"); return JOBUSER; } #ifndef WITHOUTADMINS # Optionally exclude cluster admins from login access. For the case where # pam_access is being used. This aligns with the torque approach. # /* allow anyone who is a cluster administrator (listed in the * 'Administrators' line in ${LSFBASE}/conf/lsf.cluster.${CLUSTERNAME} * Again, if we're unable to retrieve the information from LSF we're * going to fail open rather than closed. We're not doing pam * authenticate (is this a real user), only pam account (is it ok * for this user to login right now), so failing open * isn't a security problem */ cluster=ls_clusterinfo(NULL,NULL,NULL,0,0); if (cluster!=NULL) { for (i=0; i<cluster->nAdmins; i++) { syslog(LOG_AUTHPRIV|LOG_DEBUG,"pamlsfauth comparing cluster admin %s",cluster->admins[i]); if (strcmp(user,cluster->admins[i])==0) { syslog(LOG_AUTHPRIV|LOG_NOTICE,"pamlsfauth allowing access for admin (%s)",cluster->admins[i]); return JOBUSER; } } } else { syslog(LOG_AUTHPRIV|LOG_ERR,"pamlsfauth unable to retrieve cluster info"); return JOBUSER; } #endif -- WITHOUTADMINS # /* retrieve list of jobs for user hostname combination, null result * means we can fail the attempt immediately * */ if (lsb_openjobinfo(0,NULL,user,NULL,hostname,CUR_JOB)<0) { syslog(LOG_AUTHPRIV|LOG_NOTICE,"pamlsfauth denying access for %s. No current job on host (%s)",user,hostname); return NOTJOBUSER; } /* we already know tha the user is scheduled onto the node. However, * we're going to iterate through the results so that we can log * a specific job number that enables access. * Again, we're going * to err on the side of not disallowing an authenticated user if we * lose communication with LSF. */ for (;;) { job=lsb_readjobinfo(&more); if (job == NULL) { syslog(LOG_AUTHPRIV|LOG_ERR,"pamlsfauth unable to get job info. allowing access.",job->jobId,hostname,user); return JOBUSER; } if (strcmp(user,job->user)==0) { syslog(LOG_AUTHPRIV|LOG_NOTICE,"pamlsfauth matched running job (%i) on %s for user %s. allowing access.",job->jobId,hostname,user); return JOBUSER; } if (!more) break; } lsb_closejobinfo(); syslog(LOG_AUTHPRIV|LOG_NOTICE,"pamlsfauth denying access for %s. No current job on host (%s)",user,hostname); return NOTJOBUSER; }
int main (int argc, char **argv, char **environ) { char *queue = NULL, *host = NULL, *jobName = NULL, *user = NULL; LS_LONG_INT jobId; int options; struct jobInfoEnt *jInfo; char *outFile; char fflag = FALSE; int cc; int rc; rc = _i18n_init (I18N_CAT_MIN); if (lsb_init (argv[0]) < 0) { lsb_perror ("lsb_init"); exit (-1); } while ((cc = getopt (argc, argv, "Vhfq:m:J:")) != EOF) { switch (cc) { case 'q': if (queue || host || jobName) oneOf (argv[0]); queue = optarg; break; case 'm': if (queue || host || jobName) oneOf (argv[0]); host = optarg; break; case 'J': if (queue || host || jobName) oneOf (argv[0]); jobName = optarg; break; case 'V': fputs (_LS_VERSION_, stderr); exit (0); case 'f': fflag = TRUE; break; case 'h': default: usage (argv[0]); } } jobId = 0; options = LAST_JOB; if (argc >= optind + 1) { if (queue || host || jobName) { oneOf (argv[0]); } else if ((argc > 2 && !fflag) || (argc > 3 && fflag)) usage (argv[0]); if (getOneJobId (argv[optind], &jobId, 0)) { usage (argv[0]); } options = 0; } if (lsb_openjobinfo (jobId, jobName, NULL, queue, host, options) < 0 || (jInfo = lsb_readjobinfo (NULL)) == NULL) { if (jobId != 0 || jobName != NULL) { user = ALL_USERS; if (lsb_openjobinfo (jobId, jobName, user, queue, host, options) < 0 || (jInfo = lsb_readjobinfo (NULL)) == NULL) { jobInfoErr (jobId, jobName, NULL, queue, host, options); exit (-1); } } else { jobInfoErr (jobId, jobName, NULL, queue, host, options); exit (-1); } } lsb_closejobinfo (); if (jobId && jInfo->jobId != jobId) { lsberrno = LSBE_JOB_ARRAY; lsb_perror ("bpeek"); exit (-1); } if ((jInfo->submit.options & SUB_INTERACTIVE) && !(jInfo->submit.options & (SUB_OUT_FILE | SUB_ERR_FILE))) { fprintf (stderr, _i18n_msg_get (ls_catd, NL_SETN, 2456, "Job <%s> : Cannot bpeek an interactive job.\n"), /* catgets 2456 */ lsb_jobid2str (jInfo->jobId)); exit (-1); } if (IS_PEND (jInfo->status) || jInfo->execUsername[0] == '\0') { fprintf (stderr, _i18n_msg_get (ls_catd, NL_SETN, 2454, "Job <%s> : Not yet started.\n"), /* catgets 2454 */ lsb_jobid2str (jInfo->jobId)); exit (-1); } if (IS_FINISH (jInfo->status)) { fprintf (stderr, _i18n_msg_get (ls_catd, NL_SETN, 2455, "Job <%s> : Already finished.\n"), /* catgets 2455 */ lsb_jobid2str (jInfo->jobId)); exit (-1); } if ((outFile = lsb_peekjob (jInfo->jobId)) == NULL) { char msg[50]; sprintf (msg, "%s <%s>", I18N_Job, lsb_jobid2str (jInfo->jobId)); lsb_perror (msg); exit (-1); } displayOutput (outFile, jInfo, fflag, environ); _i18n_end (ls_catd); exit (0); }
/* Prints out information about the jobs running that fit the defined parameters */ struct numq print_section( int options, char *inp_queue, char *user, char *inp_hosts, int numcoresq, int numnodesq) { /* variables for simulating bjobs command */ struct jobInfoEnt *job; /* detailed job info */ struct queueInfoEnt *que; /* detailed queue info */ struct numq nq; /* information about the queue being polled by the user that we wish to pass on to other routines */ int tot_jobs; /* total jobs */ int more; /* number of remaining jobs unread */ int nodenum; /* Node number counter */ int i,j; /* counter*/ int alreadycounted; /* Logical flag used to figure out if the quantity in question has already been counted*/ int numQueues = 1; /* number of Queues to poll queue info about */ int t; /* Time remaining */ int tdays; /* Time remaining in days */ int thrs; /* Time remaining in hours */ int tmin; /* Time remaining in minutes */ int tsec; /* Time remaining in seconds */ char *targetqueue; /* array for the queue names to poll queue info about */ char startstr[14]; /* string of date when job started */ char submitstr[14]; /* string of data when job was submitted */ char statstr[5]; /* string of status */ char exclus; /* exclusive flag */ char nodelistcheck[10000][20]; /* a list of node names that we will compare against to test uniqueness*/ /* Initialize struct values */ nq.cores=numcoresq; nq.nodes=numnodesq; /* gets the total number of jobs. Exits if failure */ tot_jobs = lsb_openjobinfo(0, NULL, user, inp_queue, inp_hosts, options); /* Sanity Checks */ if (tot_jobs < 0) { printf("No matching jobs found\n"); return nq; } if (tot_jobs<0){ lsb_perror("lsb_openjobinfo"); exit(-1); } /* Print header for section */ printf("%-12s %-8.8s %-6.6s %-7.7s %-4s %-14s %-14s %-14s\n", \ "JOBID", "USER", "STAT", "QUEUE", "CORES/NODES", "TIME REMAINING", \ "SUBMIT TIME", "START TIME"); /* Loop over jobs until complete */ for (;;) { job = lsb_readjobinfo(&more); /* get the job details */ /* Sanity Check */ if (job == NULL) { lsb_perror("lsb_readjobinfo"); exit(-1); } /* Store our current target queue */ targetqueue=job->submit.queue; /* Grab information about that queue */ que = lsb_queueinfo(&targetqueue,&numQueues,NULL,NULL,0); /* Sanity check */ if (que == NULL){ lsb_perror("lsb_queueinfo"); exit(-1); } /* Detects if the job is running in exclusive mode */ exclus=' '; if (job->submit.options & SUB_EXCLUSIVE) { exclus='X'; } /* Counts the Number of nodes */ nodenum=1; if (job->numExHosts > 0) { for(i=0;i < job->numExHosts-1; i++) { if(strcmp(job->exHosts[i],job->exHosts[i+1]) != 0){ nodenum++; } } } /* Finds the jobs status */ if (job->status == JOB_STAT_RUN){ strcpy(statstr,"RUN"); } else if(job->status == JOB_STAT_PEND){ strcpy(statstr,"PEND"); } else if(job->status == JOB_STAT_PSUSP){ strcpy(statstr,"PSUSP"); } else if(job->status == JOB_STAT_SSUSP){ strcpy(statstr,"SSUSP"); } /* Test if there is a queue that the user is interested in */ if (inp_queue != NULL) { /* Stores the number of processors and nodes used by this queue for later use */ if (job->status == JOB_STAT_RUN && strcmp(inp_queue,targetqueue) == 0) { numcoresq=job->submit.numProcessors+numcoresq; /* Count the number of unique nodes are used by this queue */ for (i=0;i < job->numExHosts;i++){ /* We need to check that we aren't double counting nodes */ alreadycounted=0; for(j=0;j<numnodesq;j++){ if (strcmp(job->exHosts[i],nodelistcheck[j]) == 0) { alreadycounted=1; break; } } /* If we've already been counted break out of this loop if not counted then store that data */ if (alreadycounted == 0) { strcpy(nodelistcheck[numnodesq],job->exHosts[i]); numnodesq++; } } } } /* Figures out what time the job was submitted at and when it started running*/ strftime(submitstr,14,"%b %d %R",&(*localtime(&job->submitTime))); strftime(startstr,14,"%b %d %R",&(*localtime(&job->startTime))); /* Calculates Time Remaining */ t=que->rLimits[LSF_RLIMIT_RUN]-job->runTime; /* Checks to see if ther termination time is other than the queue limit */ if (job->submit.rLimits[LSF_RLIMIT_RUN] > 0) { t=job->submit.rLimits[LSF_RLIMIT_RUN]-job->runTime; } /* Calculates the Time remaining in Days, Hours, Minutes and Seconds */ tdays=t/24/60/60; thrs=t/60/60-24*tdays; tmin=t/60-24*60*tdays-60*thrs; tsec=t-24*60*60*tdays-60*60*thrs-60*tmin; /* Prints out job data */ printf("%-12s %-8.8s %-6.6s %-8.8s %4i%1s%2i%1c %2i%1s%-2.2i%1s%2.2i%1s%2.2i %-14s", \ lsb_jobid2str(job->jobId), job->user, statstr, job->submit.queue, \ job->submit.numProcessors, "/",nodenum,exclus, tdays,":", thrs,":",tmin,":",tsec ,submitstr); /* Prints out start time if the job is not pending */ if (options != PEND_JOB) { printf("%-14s",startstr); } printf ("\n"); /* Checks to see if there are more jobs to do. */ if (!more) break; } /* Prints the total number of jobs in the section */ printf("%-i total jobs\n",tot_jobs); /* Store number of cores and nodes used by the queue. These will have nonzero values if the queue is the one being polled by the user. */ nq.cores=numcoresq; nq.nodes=numnodesq; /* Transmit that data out of the function */ return nq; }
SEXP lsf_job_status(SEXP sexp_jobid) { int jobid, numrec; struct jobInfoEnt *jInfo; SEXP status; jobid = INTEGER(sexp_jobid)[0]; if ((numrec = lsb_openjobinfo(jobid, NULL, NULL, NULL, NULL, ALL_JOB)) < 0) { Rprintf("lsf_job_status: lsb_openjobinfo: %s\n", lsb_sysmsg()); return R_NilValue; } jInfo = lsb_readjobinfo(&numrec); if (jInfo == NULL) { Rprintf("lsf_job_status: lsb_readjobinfo: %s\n", lsb_sysmsg()); lsb_closejobinfo(); return R_NilValue; } lsb_closejobinfo(); PROTECT(status = allocVector(STRSXP, 1)); switch(jInfo->status) { case JOB_STAT_NULL: SET_STRING_ELT(status, 0, mkChar("NULL")); break; case JOB_STAT_PEND: SET_STRING_ELT(status, 0, mkChar("PEND")); break; case JOB_STAT_PSUSP: SET_STRING_ELT(status, 0, mkChar("PSUSP")); break; case JOB_STAT_RUN: SET_STRING_ELT(status, 0, mkChar("RUN")); break; case JOB_STAT_RUN|JOB_STAT_WAIT: SET_STRING_ELT(status, 0, mkChar("WAIT")); break; case JOB_STAT_SSUSP: SET_STRING_ELT(status, 0, mkChar("SSUSP")); break; case JOB_STAT_USUSP: SET_STRING_ELT(status, 0, mkChar("USUSP")); break; case JOB_STAT_EXIT: if (jInfo->reasons & EXIT_ZOMBIE) SET_STRING_ELT(status, 0, mkChar("ZOMBI")); else SET_STRING_ELT(status, 0, mkChar("EXIT")); break; case JOB_STAT_DONE: case JOB_STAT_DONE|JOB_STAT_PDONE: case JOB_STAT_DONE|JOB_STAT_PERR: case JOB_STAT_DONE|JOB_STAT_WAIT: SET_STRING_ELT(status, 0, mkChar("DONE")); break; case JOB_STAT_UNKWN: SET_STRING_ELT(status, 0, mkChar("UNKWN")); break; default: Rprintf("lsf_job_status: job state <%d> is unknown.\n", jInfo->status); SET_STRING_ELT(status, 0, mkChar("ERROR")); break; } UNPROTECT(1); return status; }