예제 #1
0
			int64_t getIntState() const
			{
				int lsfr = lsb_openjobinfo(lsfid,0/*jobname*/,0/*user*/,0/*queue*/,0/*host*/,ALL_JOB);
				
				if ( lsfr < 0 )
				{
					return 0;
				}
				else if ( lsfr == 0 )
				{
					lsb_closejobinfo();					
					return 0;
				}
				else
				{
					jobInfoEnt const * jie = lsb_readjobinfo(&lsfr);
					
					if ( ! jie )
					{
						lsb_closejobinfo();					
						return 0;				
					}

					int64_t const status = jie->status;
					lsb_closejobinfo();
			
					return status;		
				}
			}
예제 #2
0
static void
lsfdrmaa_job_update_status( fsd_job_t *self )
{
	lsfdrmaa_job_t *lsf_self = (lsfdrmaa_job_t*)self;
	struct jobInfoEnt *volatile job_info = NULL;
	bool job_in_queue;	
	
	fsd_log_enter(( "({job_id=%s, time_delta=%d})", self->job_id, time(NULL) - self->submit_time ));
	do {
		fsd_mutex_lock( &self->session->drm_connection_mutex );
		TRY
		 {
			int n_records;
			int more;
			char * username = (lsf_self->int_job_id>0)?"all":NULL;

			fsd_log_debug(( "drm connection locked" ));

			n_records = lsb_openjobinfo( lsf_self->int_job_id,
						NULL, username, NULL, NULL, ALL_JOB );
			fsd_log_debug((
						"lsb_openjobinfo( %d[%d], NULL, %s, NULL, NULL, ALL_JOB ) =%d",
						LSB_ARRAY_JOBID(lsf_self->int_job_id),
						LSB_ARRAY_IDX(lsf_self->int_job_id),
						username?username:"******",
						n_records ));
			
						job_in_queue = n_records > 0;
			
						if(!job_in_queue){
				if(!(self->flags & FSD_JOB_CURRENT_SESSION)){
									fsd_exc_raise_code( FSD_DRMAA_ERRNO_INVALID_JOB );
								 }else{/*handling missing job*/
									 self->on_missing(self);
								 }
						}else{
								job_info = lsb_readjobinfo( &more );
				fsd_log_debug(( "lsb_readjobinfo(...) =%p: more=%d",
							(void*)job_info, more ));
				if( job_info == NULL )
					fsd_exc_raise_lsf( "lsb_readjobinfo" );
				lsf_self->read_job_info( self, job_info );
						}
		 }
		FINALLY
		 {
			/* lsfdrmaa_free_job_info( job_info ); */
			lsb_closejobinfo();
			fsd_log_debug(( "lsb_closejobinfo()" ));
			fsd_mutex_unlock( &self->session->drm_connection_mutex );
		 }
		END_TRY

	} while( !job_in_queue );
	fsd_log_return(( "" ));
}
예제 #3
0
파일: test.c 프로젝트: subirsarkar/monitor
int main(int argc, char* argv[]) {
  int  options = PEND_JOB | RUN_JOB | UGRP_INFO;
  char *user = ALL_USERS;             /* match jobs for all users */
  struct jobInfoEnt *job;
  FILE* fp;
  int more;

  if (lsb_init(argv[0]) < 0) {
    lsb_perror("lsb_init");
    exit(-1);
  }

  if (lsb_openjobinfo(0, NULL, user, NULL, NULL, options) < 0) {
    lsb_perror("lsb_openjobinfo");
    exit(-1);
  }

#if 0
  printf("All pending/running jobs submitted by all users:\n");
  printf("    JOBID      USER    STAT  QUEUE      FROM_HOST   EXEC_HOST   USER_GROUP JOB_NAME   SUBMIT_TIME\n");
#endif
  fp = fopen("/tmp/group_info.txt", "w");
  if (fp == NULL) {
     perror("Error while opening the file.\n");
     exit(EXIT_FAILURE);
  }
  for (;;) {
    job = lsb_readjobinfo(&more);
    if (job == NULL) {
      lsb_perror("lsb_readjobinfo");
      exit(-1);
    }
    if ((job->submit.options & SUB_USER_GROUP)) {
      /* display job information */
      char *host = "";
      if (job->status == 4) host = job->exHosts[0];
      fprintf(fp, "%ld %s %d %s\n",
         job->jobId, 
              job->user, 
              job->status, 
              job->submit.userGroup);
    }
    if (! more) 
      break;
  }
  fclose(fp);

  lsb_closejobinfo();
  exit(0);
}
예제 #4
0
int islsfuser(char *user,char *hostname)
{
    struct clusterInfo *cluster;
    int i,more;
    struct jobInfoEnt *job;

    setenv("LSF_ENVDIR","/lsf/conf",1);
    setenv("LSF_LIBDIR","/lsf/7.0/linux2.6-glibc2.3-x86_64/lib",1);
    setenv("LSF_BINDIR","/lsf/7.0/linux2.6-glibc2.3-x86_64/bin",1);
    setenv("LSF_SERVERDIR","/lsf/7.0/linux2.6-glibc2.3-x86_64/etc",1);

    syslog(LOG_AUTHPRIV|LOG_DEBUG,"pamlsfauth checking lsf");

    /* Open an LSF session. If we can't see the lsf shared directory, this will fail.\
     * We're going to fail open here rather than closed.
     */
    if(lsb_init(NULL)<0)
    {
        syslog(LOG_AUTHPRIV|LOG_ERR,"pamlsfauth LSF connection failed");
        return JOBUSER;
    }

#ifndef WITHOUTADMINS
# Optionally exclude cluster admins from login access.  For the case where
# pam_access is being used.  This aligns with the torque approach.
#

    /*  allow anyone who is a cluster administrator (listed in the
     *  'Administrators' line in ${LSFBASE}/conf/lsf.cluster.${CLUSTERNAME}
     *  Again, if we're unable to retrieve the information from LSF we're
     *  going to fail open rather than closed. We're not doing pam
     *  authenticate (is this a real user), only pam account (is it ok
     *  for this user to login right now), so failing open
     *  isn't a security problem
     */
    cluster=ls_clusterinfo(NULL,NULL,NULL,0,0);
    if (cluster!=NULL)
    {
        for (i=0; i<cluster->nAdmins; i++)
        {
            syslog(LOG_AUTHPRIV|LOG_DEBUG,"pamlsfauth comparing cluster admin %s",cluster->admins[i]);
            if (strcmp(user,cluster->admins[i])==0)
            {
                syslog(LOG_AUTHPRIV|LOG_NOTICE,"pamlsfauth allowing access for admin (%s)",cluster->admins[i]);
                return JOBUSER;
            }
        }
    }
    else
    {
        syslog(LOG_AUTHPRIV|LOG_ERR,"pamlsfauth unable to retrieve cluster info");
        return JOBUSER;
    }

#endif -- WITHOUTADMINS

#
    /* retrieve list of jobs for user hostname combination, null result
     * means we can fail the attempt immediately
     * */

    if (lsb_openjobinfo(0,NULL,user,NULL,hostname,CUR_JOB)<0)
    {
        syslog(LOG_AUTHPRIV|LOG_NOTICE,"pamlsfauth denying access for %s.  No current job on host (%s)",user,hostname);
        return NOTJOBUSER;
    }

    /* we already know tha the user is scheduled onto the node. However,
     * we're going to iterate through the results  so that we can log
     * a specific job number that enables access.  * Again, we're going
     * to err on the side of not disallowing an authenticated user if we
     * lose communication with LSF.
     */

    for (;;)
    {
        job=lsb_readjobinfo(&more);
        if (job == NULL)
        {
            syslog(LOG_AUTHPRIV|LOG_ERR,"pamlsfauth unable to get job info.  allowing access.",job->jobId,hostname,user);
            return JOBUSER;
        }
        if (strcmp(user,job->user)==0)
        {
            syslog(LOG_AUTHPRIV|LOG_NOTICE,"pamlsfauth matched running job (%i) on %s for user %s.  allowing access.",job->jobId,hostname,user);
            return JOBUSER;
        }
        if (!more) break;
    }
    lsb_closejobinfo();
    syslog(LOG_AUTHPRIV|LOG_NOTICE,"pamlsfauth denying access for %s.  No current job on host (%s)",user,hostname);
    return NOTJOBUSER;
}
예제 #5
0
int
main (int argc, char **argv, char **environ)
{
  char *queue = NULL, *host = NULL, *jobName = NULL, *user = NULL;
  LS_LONG_INT jobId;
  int options;
  struct jobInfoEnt *jInfo;
  char *outFile;
  char fflag = FALSE;
  int cc;
  int rc;

  rc = _i18n_init (I18N_CAT_MIN);

  if (lsb_init (argv[0]) < 0)
    {
      lsb_perror ("lsb_init");
      exit (-1);
    }

  while ((cc = getopt (argc, argv, "Vhfq:m:J:")) != EOF)
    {
      switch (cc)
	{
	case 'q':
	  if (queue || host || jobName)
	    oneOf (argv[0]);
	  queue = optarg;
	  break;
	case 'm':
	  if (queue || host || jobName)
	    oneOf (argv[0]);
	  host = optarg;
	  break;
	case 'J':
	  if (queue || host || jobName)
	    oneOf (argv[0]);
	  jobName = optarg;
	  break;
	case 'V':
	  fputs (_LS_VERSION_, stderr);
	  exit (0);
	case 'f':
	  fflag = TRUE;
	  break;
	case 'h':
	default:
	  usage (argv[0]);
	}
    }

  jobId = 0;
  options = LAST_JOB;
  if (argc >= optind + 1)
    {
      if (queue || host || jobName)
	{
	  oneOf (argv[0]);
	}
      else if ((argc > 2 && !fflag) || (argc > 3 && fflag))
	usage (argv[0]);

      if (getOneJobId (argv[optind], &jobId, 0))
	{
	  usage (argv[0]);
	}

      options = 0;
    }



  if (lsb_openjobinfo (jobId, jobName, NULL, queue, host, options) < 0
      || (jInfo = lsb_readjobinfo (NULL)) == NULL)
    {

      if (jobId != 0 || jobName != NULL)
	{
	  user = ALL_USERS;
	  if (lsb_openjobinfo (jobId, jobName, user, queue, host, options) < 0
	      || (jInfo = lsb_readjobinfo (NULL)) == NULL)
	    {
	      jobInfoErr (jobId, jobName, NULL, queue, host, options);
	      exit (-1);
	    }
	}
      else
	{
	  jobInfoErr (jobId, jobName, NULL, queue, host, options);
	  exit (-1);
	}
    }
  lsb_closejobinfo ();


  if (jobId && jInfo->jobId != jobId)
    {
      lsberrno = LSBE_JOB_ARRAY;
      lsb_perror ("bpeek");
      exit (-1);
    }


  if ((jInfo->submit.options & SUB_INTERACTIVE) &&
      !(jInfo->submit.options & (SUB_OUT_FILE | SUB_ERR_FILE)))
    {
      fprintf (stderr, _i18n_msg_get (ls_catd, NL_SETN, 2456, "Job <%s> : Cannot bpeek an interactive job.\n"),	/* catgets  2456 */
	       lsb_jobid2str (jInfo->jobId));
      exit (-1);
    }

  if (IS_PEND (jInfo->status) || jInfo->execUsername[0] == '\0')
    {
      fprintf (stderr, _i18n_msg_get (ls_catd, NL_SETN, 2454, "Job <%s> : Not yet started.\n"),	/* catgets  2454 */
	       lsb_jobid2str (jInfo->jobId));

      exit (-1);
    }
  if (IS_FINISH (jInfo->status))
    {
      fprintf (stderr, _i18n_msg_get (ls_catd, NL_SETN, 2455, "Job <%s> : Already finished.\n"),	/* catgets  2455  */
	       lsb_jobid2str (jInfo->jobId));
      exit (-1);
    }

  if ((outFile = lsb_peekjob (jInfo->jobId)) == NULL)
    {
      char msg[50];
      sprintf (msg, "%s <%s>", I18N_Job, lsb_jobid2str (jInfo->jobId));
      lsb_perror (msg);
      exit (-1);
    }
  displayOutput (outFile, jInfo, fflag, environ);
  _i18n_end (ls_catd);
  exit (0);

}
예제 #6
0
파일: showq.c 프로젝트: fasrc/showq
/* Prints out information about the jobs running that fit the defined parameters */
struct numq print_section( int options, char *inp_queue, char *user, char *inp_hosts, int numcoresq, int numnodesq) {

	/* variables for simulating bjobs command */
  	struct jobInfoEnt *job;     /* detailed job info */
  	struct queueInfoEnt *que;   /* detailed queue info */
  	struct numq nq;	      /* information about the queue being polled by the user that we wish to pass on to other routines */
  	int tot_jobs;               /* total jobs */
  	int more;                   /* number of remaining jobs unread */
  	int nodenum;                /* Node number counter */
  	int i,j;		      /* counter*/
  	int alreadycounted;	      /* Logical flag used to figure out if the quantity in question has already been counted*/
  	int numQueues = 1;	      /* number of Queues to poll queue info about */
  	int t;		      /* Time remaining */
  	int tdays;		      /* Time remaining in days */
  	int thrs;		      /* Time remaining in hours */
  	int tmin;		      /* Time remaining in minutes */
  	int tsec;		      /* Time remaining in seconds */	
  	char *targetqueue;       /* array for the queue names to poll queue info about */
  	char startstr[14];          /* string of date when job started */
  	char submitstr[14];         /* string of data when job was submitted */
  	char statstr[5];            /* string of status */  
  	char exclus;		      /* exclusive flag */
  	char nodelistcheck[10000][20]; /* a list of node names that we will compare against to test uniqueness*/
  
  	/* Initialize struct values */
  	nq.cores=numcoresq;
  	nq.nodes=numnodesq;
  
  	/* gets the total number of jobs. Exits if failure */
  	tot_jobs = lsb_openjobinfo(0, NULL, user, inp_queue, inp_hosts, options);
  
  	/* Sanity Checks */
  	if (tot_jobs < 0) { 
    		printf("No matching jobs found\n");
    		return nq;
	}

  	if (tot_jobs<0){ 
    		lsb_perror("lsb_openjobinfo");
    		exit(-1);
	}

  	/* Print header for section */
  	printf("%-12s %-8.8s %-6.6s %-7.7s %-4s  %-14s   %-14s %-14s\n",	\
		 "JOBID", "USER", "STAT", "QUEUE", "CORES/NODES", "TIME REMAINING",		\
		 "SUBMIT TIME", "START TIME");

  	/* Loop over jobs until complete */
  	for (;;) {
    		job = lsb_readjobinfo(&more);   /* get the job details */

    		/* Sanity Check */
    		if (job == NULL) {
      			lsb_perror("lsb_readjobinfo");
      			exit(-1);
    		}

    		/* Store our current target queue */
    		targetqueue=job->submit.queue;

    		/* Grab information about that queue */
    		que = lsb_queueinfo(&targetqueue,&numQueues,NULL,NULL,0);


    		/* Sanity check */
    		if (que == NULL){
			lsb_perror("lsb_queueinfo");
			exit(-1);
    		}


    		/* Detects if the job is running in exclusive mode */
    		exclus=' ';
    		if (job->submit.options & SUB_EXCLUSIVE) {
			exclus='X';
		}

    		/* Counts the Number of nodes */
    		nodenum=1;
    		if (job->numExHosts > 0) {
			for(i=0;i < job->numExHosts-1; i++) {
				if(strcmp(job->exHosts[i],job->exHosts[i+1]) != 0){
					nodenum++; 
				}
			}
		}

    		/* Finds the jobs status */
    		if (job->status == JOB_STAT_RUN){
			strcpy(statstr,"RUN");
			}
    		else if(job->status == JOB_STAT_PEND){
      			strcpy(statstr,"PEND");
			} 
    		else if(job->status == JOB_STAT_PSUSP){
      			strcpy(statstr,"PSUSP");
			} 
    		else if(job->status == JOB_STAT_SSUSP){
      			strcpy(statstr,"SSUSP");
		}

    		/* Test if there is a queue that the user is interested in */
    		if (inp_queue != NULL) {

			/* Stores the number of processors and nodes used by this queue for later use */
			if (job->status == JOB_STAT_RUN && strcmp(inp_queue,targetqueue) == 0) {
				numcoresq=job->submit.numProcessors+numcoresq;

				/* Count the number of unique nodes are used by this queue */
				for (i=0;i < job->numExHosts;i++){
					/* We need to check that we aren't double counting nodes */
					alreadycounted=0;
	
					for(j=0;j<numnodesq;j++){
						if (strcmp(job->exHosts[i],nodelistcheck[j]) == 0) {
							alreadycounted=1;
							break;
						}
					}

					/* If we've already been counted break out of this loop if not counted then store that data */
					if (alreadycounted == 0) {
						strcpy(nodelistcheck[numnodesq],job->exHosts[i]);
						numnodesq++;
					}
				}
			}
		}

    		/* Figures out what time the job was submitted at and when it started running*/    
    		strftime(submitstr,14,"%b %d %R",&(*localtime(&job->submitTime)));
    		strftime(startstr,14,"%b %d %R",&(*localtime(&job->startTime)));

    		/* Calculates Time Remaining */
    		t=que->rLimits[LSF_RLIMIT_RUN]-job->runTime;

    		/* Checks to see if ther termination time is other than the queue limit */
    		if (job->submit.rLimits[LSF_RLIMIT_RUN] > 0) {
			t=job->submit.rLimits[LSF_RLIMIT_RUN]-job->runTime;
		}

    		/* Calculates the Time remaining in Days, Hours, Minutes and Seconds */
    		tdays=t/24/60/60;

    		thrs=t/60/60-24*tdays;

    		tmin=t/60-24*60*tdays-60*thrs;

    		tsec=t-24*60*60*tdays-60*60*thrs-60*tmin;

    		/* Prints out job data */	
    		printf("%-12s %-8.8s %-6.6s %-8.8s %4i%1s%2i%1c       %2i%1s%-2.2i%1s%2.2i%1s%2.2i  %-14s",	\
    			lsb_jobid2str(job->jobId), job->user, statstr, job->submit.queue, \
    			job->submit.numProcessors, "/",nodenum,exclus, tdays,":", thrs,":",tmin,":",tsec ,submitstr);

    		/* Prints out start time if the job is not pending */
    		if (options != PEND_JOB) {
			printf("%-14s",startstr);
		}
	
    		printf ("\n");

    		/* Checks to see if there are more jobs to do. */	
    		if (!more) break;
    	}

	/* Prints the total number of jobs in the section */
  	printf("%-i total jobs\n",tot_jobs);

	/* Store number of cores and nodes used by the queue.  These will have nonzero values if the queue is the one being polled by the user. */
  	nq.cores=numcoresq;
  	nq.nodes=numnodesq;

	/* Transmit that data out of the function */
  	return nq;

}
예제 #7
0
파일: Rlsf.c 프로젝트: cran/Rlsf
SEXP
lsf_job_status(SEXP sexp_jobid)
{
  int jobid, numrec;
  struct jobInfoEnt *jInfo;
  SEXP status;

  jobid = INTEGER(sexp_jobid)[0];

  if ((numrec = lsb_openjobinfo(jobid, NULL, NULL, NULL, NULL, ALL_JOB)) < 0) {
    Rprintf("lsf_job_status: lsb_openjobinfo: %s\n", lsb_sysmsg());
    return R_NilValue;
  }

  jInfo = lsb_readjobinfo(&numrec);
  if (jInfo == NULL) {
    Rprintf("lsf_job_status: lsb_readjobinfo: %s\n", lsb_sysmsg());
    lsb_closejobinfo();
    return R_NilValue;
  }

  lsb_closejobinfo();

  PROTECT(status = allocVector(STRSXP, 1));
  switch(jInfo->status) {
  case JOB_STAT_NULL:
    SET_STRING_ELT(status, 0, mkChar("NULL"));
    break;
  case JOB_STAT_PEND:
    SET_STRING_ELT(status, 0, mkChar("PEND"));
    break;
  case JOB_STAT_PSUSP:
    SET_STRING_ELT(status, 0, mkChar("PSUSP"));
    break;
  case JOB_STAT_RUN:
    SET_STRING_ELT(status, 0, mkChar("RUN"));
    break;
  case JOB_STAT_RUN|JOB_STAT_WAIT:
    SET_STRING_ELT(status, 0, mkChar("WAIT"));
    break;
  case JOB_STAT_SSUSP:
    SET_STRING_ELT(status, 0, mkChar("SSUSP"));
    break;
  case JOB_STAT_USUSP:
    SET_STRING_ELT(status, 0, mkChar("USUSP"));
    break;
  case JOB_STAT_EXIT:
    if (jInfo->reasons & EXIT_ZOMBIE)
      SET_STRING_ELT(status, 0, mkChar("ZOMBI"));
    else
      SET_STRING_ELT(status, 0, mkChar("EXIT"));
    break;
  case JOB_STAT_DONE:
  case JOB_STAT_DONE|JOB_STAT_PDONE:
  case JOB_STAT_DONE|JOB_STAT_PERR:
  case JOB_STAT_DONE|JOB_STAT_WAIT:
    SET_STRING_ELT(status, 0, mkChar("DONE"));
    break;
  case JOB_STAT_UNKWN:
    SET_STRING_ELT(status, 0, mkChar("UNKWN"));
    break;
  default:
    Rprintf("lsf_job_status: job state <%d> is unknown.\n", jInfo->status);
    SET_STRING_ELT(status, 0, mkChar("ERROR"));
    break;
  }
  UNPROTECT(1);

  return status;
}