int bhc(int argc, char *argv[], int opCode) { struct hostInfoEnt *hostInfo ; char **hostPoint ; char **hosts=NULL; char *optName; char message[MAXLINELEN]; int i; int fFlag = FALSE; int all = FALSE, numHosts = 0; int inquerFlag = FALSE; while ((optName = myGetOpt(argc, argv, "C:f|")) != NULL) { switch (optName[0]) { case 'f': fFlag = TRUE; break; case 'C': if (strlen(optarg) > MAXLINELEN-1) { printf("Message too long, truncated to %d char.\n", MAXLINELEN-1); strncpy(message, optarg, MAXLINELEN-1); message[MAXLINELEN-1]='\0'; } else strcpy(message, optarg); break; default: return -2; } } switch (opCode) { case HOST_OPEN : opStr = (_i18n_msg_get(ls_catd,NL_SETN,901, "Open")); /* catgets 901 */ break; case HOST_CLOSE : opStr = (_i18n_msg_get(ls_catd,NL_SETN,902, "Close")); /* catgets 902 */ break; case HOST_REBOOT : opStr = (_i18n_msg_get(ls_catd,NL_SETN,903, "Restart slave batch daemon on")); /* catgets 903 */ break; case HOST_SHUTDOWN : opStr = (_i18n_msg_get(ls_catd,NL_SETN,904, "Shut down slave batch daemon on")); /* catgets 904 */ break; default : fprintf(stderr, (_i18n_msg_get(ls_catd,NL_SETN,905, "Unknown operation code\n"))); /* catgets 905 */ exit(-1); } exitrc = 0; numHosts = getNames (argc, argv, optind, &hosts, &all, "hostC"); hostPoint = NULL; if (!numHosts && !all) numHosts = 1; else if (numHosts) hostPoint = hosts; if ((opCode == HOST_REBOOT || opCode == HOST_SHUTDOWN) && !(numHosts == 0 && all)) { if ((hostInfo = getHostList(&numHosts, hostPoint)) == NULL) return -1; } else { if ((hostInfo = lsb_hostinfo (hostPoint, &numHosts)) == NULL) { lsb_perror(NULL); return -1; } } if (!fFlag && all && (opCode == HOST_REBOOT || opCode == HOST_SHUTDOWN)) inquerFlag = !doConfirm (opCode, NULL); for (i = 0; i < numHosts; i++) { if (strcmp(hostInfo[i].host, "lost_and_found") == 0 && (opCode == HOST_REBOOT || opCode == HOST_SHUTDOWN)) { if (!all) fprintf(stderr, (_i18n_msg_get(ls_catd,NL_SETN,906, "<lost_and_found> is not a real host, ignored\n"))); /* catgets 906 */ continue; } if (inquerFlag && !(doConfirm (opCode, hostInfo[i].host))) continue; fprintf(stderr, "%s <%s> ...... ", opStr, hostInfo[i].host); fflush(stderr); ctrlHost (hostInfo[i].host, hostInfo[i].hStatus, opCode, message); } return exitrc; }
/* Get batch nodes informations ------------------------------- You don't have to create all bridge_batch_node_t structure, you just have to set parameters according to the following rules : if batch_nodes_batch_ids equals NULL or "" or "all", get all current nodes, otherwise get only batch_nodes by given batch_id if p_batch_nodes==NULL : - set total batch nodes number in p_batch_nodes_nb - allocate a bridge_batch_node_t** containing *p_batch_nodes_nb bridge_batch_node_t* - fill the *p_batch_nodes_nb bridge_batch_node_t else : - get max batch nodes number in *p_batch_nodes_nb - fill the *p_batch_nodes_nb bridge_batch_node_t if possible - update value of *p_batch_nodes_nb according to Returns : 0 on success 1 on succes, but p_nodes_nb contains a new valid value for nodes_nb -1 on error On succes, you 'll have to clean all nodes with bridge_rmi_clean_node(...) before freeing *p_nodes */ int get_batch_nodes(bridge_batch_manager_t* p_batch_manager, bridge_batch_node_t** p_p_batch_nodes, int* p_batch_nodes_nb, char* batch_node_name) { int fstatus=-1; int status; int i,j,k; char buffer[256]; char* node_array[1]; char* reason; struct hostInfoEnt* p_nodeInfo=NULL; struct groupInfoEnt* p_grpInfo=NULL; int node_nb=0; int grp_nb=0; int stored_node_nb=0; char* node_grouplist_item; size_t node_grouplist_default_length=128; size_t node_grouplist_length; node_array[0]=NULL; /* * Check that batch system is running or exit with error 1 */ if(!ls_getclustername()) { DEBUG3_LOGGER("unable to get cluster informations\n"); return 1; } p_nodeInfo=lsb_hostinfo(NULL,&node_nb); p_grpInfo=lsb_hostgrpinfo(NULL,&grp_nb,GRP_ALL); if(p_nodeInfo==NULL) DEBUG3_LOGGER("unable to get nodes informations\n"); else { if(*p_p_batch_nodes!=NULL) { if(*p_batch_nodes_nb<node_nb) node_nb=*p_batch_nodes_nb; } else { *p_p_batch_nodes=(bridge_batch_node_t*)malloc(node_nb*(sizeof(bridge_batch_node_t)+1)); if(*p_p_batch_nodes==NULL) { *p_batch_nodes_nb=0; node_nb=*p_batch_nodes_nb; } else { *p_batch_nodes_nb=node_nb; } } stored_node_nb=0; for(i=0; i<node_nb; i++) { if(batch_node_name!=NULL) { if(strcmp(batch_node_name,p_nodeInfo[i].host)!=0) continue; } init_batch_node(p_batch_manager,(*p_p_batch_nodes)+stored_node_nb); /* Node Name */ (*p_p_batch_nodes)[stored_node_nb].name=strdup(p_nodeInfo[i].host); /* Node description */ (*p_p_batch_nodes)[stored_node_nb].description=strdup("Batch node"); /* Node groups */ node_grouplist_length=node_grouplist_default_length; (*p_p_batch_nodes)[stored_node_nb].grouplist=(char*)malloc(node_grouplist_length); if((*p_p_batch_nodes)[stored_node_nb].grouplist!=NULL) { (*p_p_batch_nodes)[stored_node_nb].grouplist[0]='\0'; for(k=0; k<grp_nb; k++) { node_grouplist_item=strstr(p_grpInfo[k].memberList,(*p_p_batch_nodes)[stored_node_nb].name); if(node_grouplist_item!=NULL) if(*(node_grouplist_item+strlen((*p_p_batch_nodes)[stored_node_nb].name)) == '\0' || *(node_grouplist_item+strlen((*p_p_batch_nodes)[stored_node_nb].name)) == ' ') { bridge_common_string_appends_and_extends(&((*p_p_batch_nodes)[stored_node_nb].grouplist), &node_grouplist_length,128,p_grpInfo[k].group," "); } } } if(strlen((*p_p_batch_nodes)[stored_node_nb].grouplist)==0) { free((*p_p_batch_nodes)[stored_node_nb].grouplist); (*p_p_batch_nodes)[stored_node_nb].grouplist=NULL; } /* Node state */ if(p_nodeInfo[i].hStatus==HOST_STAT_OK || (p_nodeInfo[i].hStatus & HOST_STAT_LOCKED) ) { (*p_p_batch_nodes)[stored_node_nb].state=BRIDGE_BATCH_NODE_STATE_OPENED; } else if( (p_nodeInfo[i].hStatus & HOST_STAT_DISABLED) || (p_nodeInfo[i].hStatus & HOST_STAT_WIND) ) { (*p_p_batch_nodes)[stored_node_nb].state=BRIDGE_BATCH_NODE_STATE_CLOSED; } else if( (p_nodeInfo[i].hStatus & HOST_STAT_BUSY) || (p_nodeInfo[i].hStatus & HOST_STAT_FULL) ) { (*p_p_batch_nodes)[stored_node_nb].state=BRIDGE_BATCH_NODE_STATE_BUSY; } else if( (p_nodeInfo[i].hStatus & HOST_STAT_UNAVAIL) || (p_nodeInfo[i].hStatus & HOST_STAT_NO_LIM) ) { (*p_p_batch_nodes)[stored_node_nb].state=BRIDGE_BATCH_NODE_STATE_UNAVAILABLE; } else if( (p_nodeInfo[i].hStatus & HOST_STAT_UNREACH)) { (*p_p_batch_nodes)[stored_node_nb].state=BRIDGE_BATCH_NODE_STATE_UNREACHABLE; } else if( (p_nodeInfo[i].hStatus & HOST_STAT_UNLICENSED)) { (*p_p_batch_nodes)[stored_node_nb].state=BRIDGE_BATCH_NODE_STATE_UNLICENSED; } else { (*p_p_batch_nodes)[stored_node_nb].state=BRIDGE_BATCH_NODE_STATE_UNKNOWN; } /* Get informations only if host is open or closed or busy */ if((*p_p_batch_nodes)[stored_node_nb].state == BRIDGE_BATCH_NODE_STATE_OPENED || (*p_p_batch_nodes)[stored_node_nb].state == BRIDGE_BATCH_NODE_STATE_CLOSED || (*p_p_batch_nodes)[stored_node_nb].state == BRIDGE_BATCH_NODE_STATE_BUSY) { /* running jobs number */ (*p_p_batch_nodes)[stored_node_nb].running_jobs_nb=p_nodeInfo[i].numRUN; /* user suspended jobs number */ (*p_p_batch_nodes)[stored_node_nb].usersuspended_jobs_nb=p_nodeInfo[i].numUSUSP; /* system suspended jobs number */ (*p_p_batch_nodes)[stored_node_nb].syssuspended_jobs_nb=p_nodeInfo[i].numSSUSP; /* total jobs number */ (*p_p_batch_nodes)[stored_node_nb].jobs_nb=p_nodeInfo[i].numJobs; /* max jobs number */ (*p_p_batch_nodes)[stored_node_nb].jobs_nb_limit=(p_nodeInfo[i].maxJobs==INT_MAX)?NO_LIMIT:p_nodeInfo[i].maxJobs; /* max jobs number per user */ (*p_p_batch_nodes)[stored_node_nb].perUser_jobs_nb_limit=(p_nodeInfo[i].userJobLimit==INT_MAX)?NO_LIMIT:p_nodeInfo[i].userJobLimit; /* free swap space (in Mo) */ (*p_p_batch_nodes)[stored_node_nb].free_swap=p_nodeInfo[i].realLoad[SWP]; /* free tmp space (in Mo) */ (*p_p_batch_nodes)[stored_node_nb].free_tmp=p_nodeInfo[i].realLoad[TMP]; /* free mem space (in Mo) */ (*p_p_batch_nodes)[stored_node_nb].free_mem=p_nodeInfo[i].realLoad[MEM]; /* one minute cpu load */ (*p_p_batch_nodes)[stored_node_nb].one_min_cpu_load=p_nodeInfo[i].realLoad[R1M]*100; } stored_node_nb++; } fstatus=0; } if(stored_node_nb<node_nb) { *p_p_batch_nodes=(bridge_batch_node_t*)realloc(*p_p_batch_nodes,stored_node_nb*(sizeof(bridge_batch_node_t)+1)); if(*p_p_batch_nodes==NULL) *p_batch_nodes_nb=0; else *p_batch_nodes_nb=stored_node_nb; } return fstatus; }
static int badminDebug (int nargc, char *nargv[], int opCode) { struct hostInfoEnt *hostInfo; char opt[10]; char **hostPoint; char *word; char **hosts = NULL; int i, c; int send; int retCode = 0; int all = FALSE, numHosts = 0; struct debugReq debug; debug.opCode = opCode; debug.logClass = 0; debug.level = 0; debug.hostName = NULL; debug.logFileName[0] = '\0'; debug.options = 0; if (opCode == MBD_DEBUG || opCode == SBD_DEBUG) strcpy (opt, "oc:l:f:"); else if (opCode == MBD_TIMING || opCode == SBD_TIMING) strcpy (opt, "ol:f:"); else return (-2); linux_optind = 1; linux_opterr = 1; if (strstr (nargv[0], "badmin")) { linux_optind++; } while ((c = getopt (nargc, nargv, opt)) != EOF) { switch (c) { case 'c': while (optarg != NULL && (word = getNextWord_ (&optarg))) { if (strcmp (word, "LC_SCHED") == 0) debug.logClass |= LC_SCHED; if (strcmp (word, "LC_EXEC") == 0) debug.logClass |= LC_EXEC; if (strcmp (word, "LC_TRACE") == 0) debug.logClass |= LC_TRACE; if (strcmp (word, "LC_COMM") == 0) debug.logClass |= LC_COMM; if (strcmp (word, "LC_XDR") == 0) debug.logClass |= LC_XDR; if (strcmp (word, "LC_CHKPNT") == 0) debug.logClass |= LC_CHKPNT; if (strcmp (word, "LC_FILE") == 0) debug.logClass |= LC_FILE; if (strcmp (word, "LC_AUTH") == 0) debug.logClass |= LC_AUTH; if (strcmp (word, "LC_HANG") == 0) debug.logClass |= LC_HANG; if (strcmp (word, "LC_SIGNAL") == 0) debug.logClass |= LC_SIGNAL; if (strcmp (word, "LC_PIM") == 0) debug.logClass |= LC_PIM; if (strcmp (word, "LC_SYS") == 0) debug.logClass |= LC_SYS; if (strcmp (word, "LC_JLIMIT") == 0) debug.logClass |= LC_JLIMIT; if (strcmp (word, "LC_PEND") == 0) debug.logClass |= LC_PEND; if (strcmp (word, "LC_LOADINDX") == 0) debug.logClass |= LC_LOADINDX; if (strcmp (word, "LC_M_LOG") == 0) { debug.logClass |= LC_M_LOG; } if (strcmp (word, "LC_PERFM") == 0) { debug.logClass |= LC_PERFM; } if (strcmp (word, "LC_MPI") == 0) { debug.logClass |= LC_MPI; } if (strcmp (word, "LC_JGRP") == 0) { debug.logClass |= LC_JGRP; } } if (debug.logClass == 0) { fprintf (stderr, I18N (2572, "Command denied.Invalid class name\n")); /* catgets 2572 */ return (-1); } break; case 'l': for (i = 0; i < strlen (optarg); i++) { if (!isdigit (optarg[i])) { fprintf (stderr, I18N (2573, "Command denied. Invalid level value\n")); /* catgets 2573 */ return (-1); } } debug.level = atoi (optarg); if (opCode == MBD_DEBUG || opCode == SBD_DEBUG) { if (debug.level < 0 || debug.level > 3) { fprintf (stderr, I18N (2574, "Command denied. Valid debug level is [0-3] \n")); /* catgets 2574 */ return (-1); } } else if (debug.level < 1 || debug.level > 5) { fprintf (stderr, I18N (2575, "Command denied. Valid timing level is [1-5]\n")); /* catgets 2575 */ return (-1); } break; case 'f': if (strstr (optarg, "/") && strstr (optarg, "\\")) { fprintf (stderr, I18N (2576, "Command denied. Invalid file name\n")); /* catgets 2576 */ return (-1); } memset (debug.logFileName, 0, sizeof (debug.logFileName)); ls_strcat (debug.logFileName, sizeof (debug.logFileName), optarg); if (debug.logFileName[strlen (debug.logFileName) - 1] == '/' || debug.logFileName[strlen (debug.logFileName) - 1] == '\\') { fprintf (stderr, I18N (2577, "Command denied. File name is needed after the path\n")); /* catgets 2577 */ return (-1); } break; case 'o': debug.options = 1; break; default: return (-2); } } if (opCode == SBD_DEBUG || opCode == SBD_TIMING) { numHosts = getNames (nargc, nargv, optind, &hosts, &all, "hostC"); hostPoint = NULL; if (!numHosts && !all) numHosts = 1; else if (numHosts) hostPoint = hosts; if ((hostInfo = lsb_hostinfo (hostPoint, &numHosts)) == NULL) { lsb_perror (NULL); return (-1); } for (i = 0; i < numHosts; i++) { if (strcmp (hostInfo[i].host, "lost_and_found") == 0) { if (!all) fprintf (stderr, "%s.\n", _i18n_msg_get (ls_catd, NL_SETN, 2568, "<lost_and_found> is not a real host, ignored")); /* catgets 2568 */ continue; } fflush (stderr); if (hostInfo[i].hStatus & (HOST_STAT_UNAVAIL | HOST_STAT_UNREACH)) { if (hostInfo[i].hStatus & HOST_STAT_UNAVAIL) fprintf (stderr, I18N (2578, "failed : LSF daemon (LIM) is unavailable on host %s\n"), /* catgets 2578 */ hostInfo[i].host); else fprintf (stderr, I18N (2579, "failed : Slave batch daemon (sbatchd) is unreachable now on host %s\n"), /* catgets 2579 */ hostInfo[i].host); continue; } if ((send = lsb_debugReq (&debug, hostInfo[i].host)) < 0) { char msg[100]; sprintf (msg, I18N (2580, "Operation denied by SBD on <%s>"), /* catgets 2580 */ hostInfo[i].host); lsb_perror (msg); retCode = -1; } } return (retCode); } else { numHosts = getNames (nargc, nargv, optind, &hosts, &all, "hostC"); if (numHosts > 0) { fprintf (stderr, I18N (2581, "Host name does not need to be specified, set debug to the host which runs MBD\n")); /* catgets 2581 */ } if ((send = lsb_debugReq (&debug, NULL)) < 0) { char msg[100]; sprintf (msg, I18N (2582, "Operation denied by MBD")); /* catgets 2582 */ lsb_perror (msg); return (-1); } } return (0); }
/* Summarizes aspect of cluster currently being polled for jobs */ void print_summary(char *inp_queue, char *user, char *inp_hosts, int numcoresq, int numnodesq) { struct jobInfoEnt *job; /* detailed job info */ struct queueInfoEnt *que; /* detailed queue info */ struct queueInfoEnt *que2; /* detailed queue info */ struct hostInfoEnt *hos; /* detailed host info */ int numQueues, numQueues2; /* number of Queues to query about */ int numHosts; /* number of Hosts */ int i,j,k,ii; /* counters */ int totHosts=0, totClosedAdm=0, totUnavail=0, totUsed=0; /* counters for total number of nodes, total closed by the Admin, total Unavailable and total used*/ int totCores=0, totCoresUsed=0; /* counters for total number of cores and number of cores used. */ float perutil; /* work variable */ char *hostlist; /* Array of Hosts */ char quehostlist[1000]; /* list of hosts for a specific queue */ char *token; /* Used for parsing */ char *delimiters = " "; /* Used for parsing */ char *slash = "/"; /* Used for parsing */ char *saveptr1; /* Used for parsing */ char queuelistcheck[100][20]; /* Stores what queues we have already checked */ char hostlistcheck[10000][20]; /* Stores what hosts we have already checked */ char hostscheck[100000][20]; /* Stores what hostlists we have alredy checked */ int numhlc,numhc,numoqc, alreadycounted; /* Number of entires for the above arrays plus a logical for whether or not the variable has already be checked. */ /* If there is a queue we are pooling use this summary */ if (inp_queue != NULL) { printf("Queue Summary\n"); /* First let's get information on the queue in question */ numQueues=1; que = lsb_queueinfo(&inp_queue,&numQueues,NULL,NULL,0); /* Copy the hostlist to the a temporary file */ strcpy(quehostlist,que->hostList); /* Iterate through the host list which is seperated by spaces */ numoqc=0; i=0; for(;;) { /* Parses the hostlist based on the defined delimiters */ if (i == 0){ token = strtok(quehostlist,delimiters); } else{ token = strtok(NULL,delimiters); } if (token == NULL) break; /* We need to trim off slashes */ saveptr1=strpbrk(token,slash); if (saveptr1 != NULL) { token[strlen(token)-1]='\0'; } /* We will first poll the hosts available to this queue and see what is going on */ hostlist=token; numHosts=1; hos = lsb_hostinfo(&hostlist, &numHosts); totHosts=totHosts+numHosts; /* Now we are going to check each host for what state it is in */ /* We will also collect statistics regarding the usage of the host at the same time */ for (j=0;j<numHosts;j++){ if (hos[j].hStatus & HOST_STAT_UNLICENSED) { }/* unlicensed */ else if (hos[j].hStatus & HOST_STAT_UNAVAIL) { totUnavail++; /* LIM and sbatchd are unavailable */ } else if (hos[j].hStatus & HOST_STAT_UNREACH) { totUnavail++; /* sbatchd is unreachable */ } else if (hos[j].hStatus & HOST_CLOSED_BY_ADMIN) { totClosedAdm++; /* host closed by administrator */ } else if (hos[j].hStatus & ( HOST_STAT_WIND | HOST_STAT_DISABLED | HOST_STAT_NO_LIM)) { } else if (hos[j].hStatus & ( HOST_STAT_BUSY | HOST_STAT_FULL | HOST_STAT_LOCKED )) { /* Host is full */ totUsed++; totCoresUsed=hos[j].numRUN+totCoresUsed; totCores=hos[j].maxJobs+totCores; } else { /* Host is partially used */ if (hos[j].numRUN > 0) { totUsed++; } totCoresUsed=hos[j].numRUN+totCoresUsed; totCores=hos[j].maxJobs+totCores; } } /* Check what other queues are pointing to this host */ numQueues2=0; que2 = lsb_queueinfo(NULL,&numQueues2,hos->host,NULL,0); for (k=0;k<numQueues2;k++){ /* Check and see that the queue is not the one we are currently interested in */ if (strcmp(inp_queue,que2[k].queue) != 0) { /* We need to check that we aren't double counting queues */ alreadycounted=0; for(ii=0;ii<numoqc;ii++){ if (strcmp(que2[k].queue,queuelistcheck[ii]) == 0) { alreadycounted=1; break; } } /* If we've already been counted break out of this loop if not counted then store that data */ if (alreadycounted == 0) { strcpy(queuelistcheck[numoqc],que2[k].queue); numoqc++; } } } i=i+1; } /* Now to print what we found */ /* First Data on this Specific Queue */ /* Calculate the percentage of cores used */ perutil=(float)numcoresq/(float)totCores*100; printf("%4i of %4i Cores Used (%4.2f%%)\n", numcoresq, totCores, perutil); /* Calculate the percentage of nodes used */ perutil=(float)numnodesq/(float)totHosts*100; printf("%4i of %4i Nodes Used (%4.2f%%)\n", numnodesq, totHosts, perutil); /* Now for all the nodes accessible by the queue */ printf("\n"); printf("Overall Statistics for Nodes Available to this Queue\n"); /* Calculate the percentage of cores used */ perutil=(float)totCoresUsed/(float)totCores*100; printf("%4i of %4i Cores Used (%4.2f%%)\n", totCoresUsed, totCores, perutil); /* Calculate the percentage of nodess used */ perutil=(float)totUsed/(float)totHosts*100; printf("%4i of %4i Nodes Used (%4.2f%%), %2i Nodes Closed by Admin, %2i Nodes Unavailable\n", totUsed, totHosts, perutil, totClosedAdm, totUnavail); printf("\n"); /* Print out which other queues can submit to these hosts */ printf("Other Queues Which Submit To The Hosts For This Queue: "); j=0; for (k=0;k<numoqc;k++) { if (k == numoqc-1) { printf("%4s", queuelistcheck[k]); } else { printf("%4s, ", queuelistcheck[k]); } j++; /* Word wraps if line is too long */ if (j == 4) { printf("\n"); j=0; } } printf("\n"); } else if (inp_hosts != NULL) { printf("Host Summary\n"); /* First let's get information on the host in question */ hostlist=inp_hosts; numHosts=1; hos = lsb_hostinfo(&hostlist, &numHosts); totHosts=totHosts+numHosts; /* Now we are going to check each host for what state it is in */ /* We will also collect statistics regarding the usage of the host at the same time */ for (j=0;j<numHosts;j++){ if (hos[j].hStatus & HOST_STAT_UNLICENSED) { }/* unlicensed */ else if (hos[j].hStatus & HOST_STAT_UNAVAIL) { totUnavail++; /* LIM and sbatchd are unavailable */ } else if (hos[j].hStatus & HOST_STAT_UNREACH) { totUnavail++; /* sbatchd is unreachable */ } else if (hos[j].hStatus & HOST_CLOSED_BY_ADMIN) { totClosedAdm++; /* host closed by administrator */ } else if (hos[j].hStatus & ( HOST_STAT_WIND | HOST_STAT_DISABLED | HOST_STAT_NO_LIM)) { } else if (hos[j].hStatus & ( HOST_STAT_BUSY | HOST_STAT_FULL | HOST_STAT_LOCKED )) { /* Host is full */ totUsed++; totCoresUsed=hos[j].numRUN+totCoresUsed; totCores=hos[j].maxJobs+totCores; } else { /* Host is partially used */ if (hos[j].numRUN > 0) { totUsed++; } totCoresUsed=hos[j].numRUN+totCoresUsed; totCores=hos[j].maxJobs+totCores; } } /* Check what queues are pointing to this host */ numoqc=0; numQueues2=0; que2 = lsb_queueinfo(NULL,&numQueues2,hos->host,NULL,0); for (k=0;k<numQueues2;k++){ /* We need to check that we aren't double counting queues */ alreadycounted=0; for(ii=0;ii<numoqc;ii++){ if (strcmp(que2[k].queue,queuelistcheck[ii]) == 0) { alreadycounted=1; break; } } /* If we've already been counted break out of this loop if not counted then store that data */ if (alreadycounted == 0) { strcpy(queuelistcheck[numoqc],que2[k].queue); numoqc++; } } /* Now to print what we found */ /* Calculate the percentage of cores used */ perutil=(float)totCoresUsed/(float)totCores*100; printf("%4i of %4i Cores Used (%4.2f%%)\n", totCoresUsed, totCores, perutil); /* Calculate the percentage of nodes used */ perutil=(float)totUsed/(float)totHosts*100; printf("%4i of %4i Nodes Used (%4.2f%%), %2i Nodes Closed by Admin, %2i Nodes Unavailable\n", totUsed, totHosts, perutil, totClosedAdm, totUnavail); printf("\n"); /* List the queues that access these hosts */ printf("Queues That Run On These Hosts: "); j=0; for (k=0;k<numoqc;k++) { if (k == numoqc-1) { printf("%4s", queuelistcheck[k]); } else { printf("%4s, ", queuelistcheck[k]); } /* Word wrap if list is too long */ j++; if (j == 8) { printf("\n"); j=0; } } printf("\n"); } else if (inp_queue == NULL && inp_hosts == NULL) { printf("User Summary\n"); /* First lets get information on the Queues the User has access to */ numQueues=0; que = lsb_queueinfo(NULL,&numQueues,NULL,user,0); numhc=0; numhlc=0; /* Iterate through the queue list */ for(k=0;k<numQueues;k++) { /* Iterate through the host list which is seperated by spaces */ i=0; for(;;) { /* Parse the host list based on predefined delimiters */ if (i == 0){ token = strtok(que[k].hostList,delimiters); } else{ token = strtok(NULL,delimiters); } if (token == NULL) break; /* We need to trim off slashes */ saveptr1=strpbrk(token,slash); if (saveptr1 != NULL) { token[strlen(token)-1]='\0'; } /* We will first poll the hosts available to this queue and see what is going on */ hostlist=token; /* We need to check that we aren't double counting host groups */ alreadycounted=0; for(j=0;j<numhlc;j++){ if (strcmp(hostlist,hostlistcheck[j]) == 0) { alreadycounted=1; break; } } /* If we've already been counted break out of this loop if not counted then store that data */ if (alreadycounted == 0) { strcpy(hostlistcheck[numhlc],hostlist); numhlc++; } else { break; } /* Query for hostlist information */ numHosts=1; hos = lsb_hostinfo(&hostlist, &numHosts); /* Now we are going to check each host for what state it is in */ /* We will also collect statistics regarding the usage of the host at the same time */ for (j=0;j<numHosts;j++){ /* Some hosts lists contain redundant hosts so to make sure we aren't double counting we will check again */ alreadycounted=0; for(ii=0;ii<numhc;ii++){ if (strcmp(hos[j].host,hostscheck[ii]) == 0) { alreadycounted=1; break; } } /* If we've already been counted break out of this loop if not counted then store that data */ if (alreadycounted == 0) { strcpy(hostscheck[numhc],hos[j].host); numhc++; } else { break; } totHosts++; if (hos[j].hStatus & HOST_STAT_UNLICENSED) { } /* unlicensed */ else if (hos[j].hStatus & HOST_STAT_UNAVAIL) { totUnavail++; /* LIM and sbatchd are unavailable */ } else if (hos[j].hStatus & HOST_STAT_UNREACH) { totUnavail++; /* sbatchd is unreachable */ } else if (hos[j].hStatus & HOST_CLOSED_BY_ADMIN) { totClosedAdm++; /* host closed by administrator */ } else if (hos[j].hStatus & ( HOST_STAT_WIND | HOST_STAT_DISABLED | HOST_STAT_NO_LIM)) { } else if (hos[j].hStatus & ( HOST_STAT_BUSY | HOST_STAT_FULL | HOST_STAT_LOCKED )) { /* Host is full */ totUsed++; totCoresUsed=hos[j].numRUN+totCoresUsed; totCores=hos[j].maxJobs+totCores; } else { /* Host is partially used */ if (hos[j].numRUN > 0) { totUsed++; } totCoresUsed=hos[j].numRUN+totCoresUsed; totCores=hos[j].maxJobs+totCores; } } i=i+1; } } printf("Overall Statistics for Available Queues\n"); /* Now to print what we found */ /* Calculate the percentage of cores used */ perutil=(float)totCoresUsed/(float)totCores*100; printf("%4i of %4i Cores Used (%4.2f%%)\n", totCoresUsed, totCores, perutil); /* Calculate the percentage of nodes used */ perutil=(float)totUsed/(float)totHosts*100; printf("%4i of %4i Nodes Used (%4.2f%%), %2i Nodes Closed by Admin, %2i Nodes Unavailable\n", totUsed, totHosts, perutil, totClosedAdm, totUnavail); printf("\n"); /* List what queues are available to the user */ printf("Available Queues: "); j=0; for (k=0;k<numQueues;k++) { if (k == numQueues-1) { printf("%4s", que[k].queue); } else { printf("%4s, ", que[k].queue); } j++; /* Word wrap if list is too long */ if (j == 8) { printf("\n"); j=0; } } printf("\n"); } }