int IntStateQuery() { /* qstat -f Job Id: 11.cream-12.pd.infn.it Job_Name = cream_579184706 job_state = R ctime = Wed Apr 23 11:39:55 2008 exec_host = cream-wn-029.pn.pd.infn.it/0 */ /* Filled entries: batch_id wn_addr status udate Filled by submit script: blah_id Unfilled entries: exitreason */ FILE *fp; char *line=NULL; char **token; int maxtok_t=0; job_registry_entry en; int ret; char *timestamp; time_t tmstampepoch; char *batch_str=NULL; char *wn_str=NULL; char *twn_str=NULL; char *status_str=NULL; char *ex_str=NULL; int ex_code=0; char *cp=NULL; char *command_string=NULL; job_registry_entry *ren=NULL; int first=TRUE; time_t now; char *string_now=NULL; command_string=make_message("%s%s/qstat -f",batch_command,pbs_binpath); fp = popen(command_string,"r"); en.status=UNDEFINED; JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,"\0"); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"\0"); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,"\0"); en.exitcode=-1; bupdater_free_active_jobs(&bact); if(fp!=NULL){ while(!feof(fp) && (line=get_line(fp))){ if(line && strlen(line)==0){ free(line); continue; } if ((cp = strrchr (line, '\n')) != NULL){ *cp = '\0'; } do_log(debuglogfile, debug, 3, "%s: line in IntStateQuery is:%s\n",argv0,line); now=time(0); string_now=make_message("%d",now); if(line && strstr(line,"Job Id: ")){ if(!first && en.status!=UNDEFINED && ren && ren->status!=REMOVED && ren->status!=COMPLETED){ if ((ret=job_registry_update_recn_select(rha, &en, ren->recnum, JOB_REGISTRY_UPDATE_WN_ADDR| JOB_REGISTRY_UPDATE_STATUS| JOB_REGISTRY_UPDATE_UDATE| JOB_REGISTRY_UPDATE_UPDATER_INFO| JOB_REGISTRY_UPDATE_EXITCODE| JOB_REGISTRY_UPDATE_EXITREASON)) < 0){ if(ret != JOB_REGISTRY_NOT_FOUND){ fprintf(stderr,"Update of record returns %d: ",ret); perror(""); } } else { if(ret==JOB_REGISTRY_SUCCESS){ if (en.status == REMOVED || en.status == COMPLETED) { do_log(debuglogfile, debug, 2, "%s: registry update in IntStateQuery for: jobid=%s wn=%s status=%d exitcode=%d\n",argv0,en.batch_id,en.wn_addr,en.status,en.exitcode); job_registry_unlink_proxy(rha, &en); }else{ do_log(debuglogfile, debug, 2, "%s: registry update in IntStateQuery for: jobid=%s wn=%s status=%d\n",argv0,en.batch_id,en.wn_addr,en.status); } } if (remupd_conf != NULL){ if (ret=job_registry_send_update(remupd_head_send,&en,NULL,NULL)<=0){ do_log(debuglogfile, debug, 2, "%s: Error creating endpoint in IntStateQuery\n",argv0); } } } en.status = UNDEFINED; JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,"\0"); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"\0"); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,"\0"); en.exitcode=-1; } maxtok_t = strtoken(line, ':', &token); batch_str=strdel(token[1]," "); JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,batch_str); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now); en.exitcode=-1; bupdater_push_active_job(&bact, en.batch_id); free(batch_str); freetoken(&token,maxtok_t); if(!first) free(ren); if ((ren=job_registry_get(rha, en.batch_id)) == NULL){ fprintf(stderr,"Get of record returns error for %s ",en.batch_id); perror(""); } first=FALSE; }else if(line && strstr(line,"job_state = ")){ maxtok_t = strtoken(line, '=', &token); status_str=strdel(token[1]," "); if(status_str && strcmp(status_str,"Q")==0){ en.status=IDLE; en.exitcode=-1; JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,"\0"); }else if(status_str && strcmp(status_str,"W")==0){ en.status=IDLE; en.exitcode=-1; }else if(status_str && strcmp(status_str,"R")==0){ en.status=RUNNING; en.exitcode=-1; }else if(status_str && strcmp(status_str,"C")==0){ en.status=COMPLETED; JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"\0"); }else if(status_str && strcmp(status_str,"H")==0){ en.status=HELD; en.exitcode=-1; JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,"\0"); } free(status_str); freetoken(&token,maxtok_t); }else if(line && strstr(line,"unable to run job")){ en.status=IDLE; en.exitcode=-1; }else if(line && strstr(line,"exit_status = ")){ maxtok_t = strtoken(line, '=', &token); ex_str=strdel(token[1]," "); ex_code=atoi(ex_str); if(ex_code==0){ en.exitcode=0; }else if(ex_code==271){ en.status=REMOVED; en.exitcode=-999; }else{ en.exitcode=ex_code; } free(ex_str); freetoken(&token,maxtok_t); }else if(line && strstr(line,"exec_host = ")){ maxtok_t = strtoken(line, '=', &token); twn_str=strdup(token[1]); if(twn_str == NULL){ sysfatal("strdup failed for twn_str in IntStateQuery: %r"); } freetoken(&token,maxtok_t); maxtok_t = strtoken(twn_str, '/', &token); wn_str=strdel(token[0]," "); JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,wn_str); free(twn_str); free(wn_str); freetoken(&token,maxtok_t); }else if(line && strstr(line,"mtime = ")){ maxtok_t = strtoken(line, ' ', &token); timestamp=make_message("%s %s %s %s %s",token[2],token[3],token[4],token[5],token[6]); tmstampepoch=str2epoch(timestamp,"L"); free(timestamp); en.udate=tmstampepoch; freetoken(&token,maxtok_t); } free(line); free(string_now); } pclose(fp); } if(en.status!=UNDEFINED && ren && ren->status!=REMOVED && ren->status!=COMPLETED){ if ((ret=job_registry_update_recn_select(rha, &en, ren->recnum, JOB_REGISTRY_UPDATE_WN_ADDR| JOB_REGISTRY_UPDATE_STATUS| JOB_REGISTRY_UPDATE_UDATE| JOB_REGISTRY_UPDATE_UPDATER_INFO| JOB_REGISTRY_UPDATE_EXITCODE| JOB_REGISTRY_UPDATE_EXITREASON)) < 0){ if(ret != JOB_REGISTRY_NOT_FOUND){ fprintf(stderr,"Update of record returns %d: ",ret); perror(""); } } else { if(ret==JOB_REGISTRY_SUCCESS){ if (en.status == REMOVED || en.status == COMPLETED) { do_log(debuglogfile, debug, 2, "%s: registry update in IntStateQuery for: jobid=%s wn=%s status=%d exitcode=%d\n",argv0,en.batch_id,en.wn_addr,en.status,en.exitcode); job_registry_unlink_proxy(rha, &en); }else{ do_log(debuglogfile, debug, 2, "%s: registry update in IntStateQuery for: jobid=%s wn=%s status=%d\n",argv0,en.batch_id,en.wn_addr,en.status); } } if (remupd_conf != NULL){ if (ret=job_registry_send_update(remupd_head_send,&en,NULL,NULL)<=0){ do_log(debuglogfile, debug, 2, "%s: Error creating endpoint in IntStateQuery\n",argv0); } } } } free(ren); free(command_string); return 0; }
int IntStateQuery() { FILE *fp; char *line=NULL; char **token; char **token_l; char **token_e; int maxtok_t=0; int maxtok_l=0; int maxtok_e=0; job_registry_entry en; int ret; time_t tmstampepoch; char *cp=NULL; char *batch_str=NULL; char *command_string=NULL; job_registry_entry *ren=NULL; int isresumed=FALSE; int first=TRUE; time_t now; char *string_now=NULL; command_string=make_message("%s/scontrol -a show jobid",slurm_binpath); fp = popen(command_string,"r"); en.status=UNDEFINED; JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,"\0"); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"\0"); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,"\0"); en.exitcode=-1; bupdater_free_active_jobs(&bact); if(fp!=NULL){ while(!feof(fp) && (line=get_line(fp))){ if(line && strlen(line)==0){ free(line); continue; } if ((cp = strrchr (line, '\n')) != NULL){ *cp = '\0'; } do_log(debuglogfile, debug, 3, "%s: line in IntStateQuery is:%s\n",argv0,line); now=time(0); string_now=make_message("%d",now); maxtok_t = strtoken(line, ' ', &token); if(line && strstr(line,"JobId=")){ isresumed=FALSE; if(!first && en.status!=UNDEFINED && ren && ren->status!=REMOVED && ren->status!=COMPLETED){ if ((ret=job_registry_update_recn_select(rha, &en, ren->recnum, JOB_REGISTRY_UPDATE_WN_ADDR| JOB_REGISTRY_UPDATE_STATUS| JOB_REGISTRY_UPDATE_UDATE| JOB_REGISTRY_UPDATE_UPDATER_INFO| JOB_REGISTRY_UPDATE_EXITCODE| JOB_REGISTRY_UPDATE_EXITREASON)) < 0){ if(ret != JOB_REGISTRY_NOT_FOUND){ fprintf(stderr,"Update of record returns %d: ",ret); perror(""); } } else { if(ret==JOB_REGISTRY_SUCCESS){ if (en.status == REMOVED || en.status == COMPLETED) { do_log(debuglogfile, debug, 2, "%s: registry update in IntStateQuery for: jobid=%s creamjobid=%s wn=%s status=%d exitcode=%d\n",argv0,en.batch_id,en.user_prefix,en.wn_addr,en.status,en.exitcode); job_registry_unlink_proxy(rha, &en); }else{ do_log(debuglogfile, debug, 2, "%s: registry update in IntStateQuery for: jobid=%s creamjobid=%s wn=%s status=%d\n",argv0,en.batch_id,en.user_prefix,en.wn_addr,en.status); } if (remupd_conf != NULL){ if ((ret=job_registry_send_update(remupd_head_send,&en,NULL,NULL))<=0){ do_log(debuglogfile, debug, 2, "%s: Error creating endpoint in IntStateQuery\n",argv0); } } } } en.status = UNDEFINED; JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"\0"); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,"\0"); JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,"\0"); en.exitcode=-1; } en.status = UNDEFINED; maxtok_l = strtoken(token[0], '=', &token_l); batch_str=strdup(token_l[1]); JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,batch_str); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now); en.exitcode=-1; bupdater_push_active_job(&bact, en.batch_id); do_log(debuglogfile, debug, 4, "%s: bupdater_push_active_job done for %s\n",argv0,en.batch_id); free(batch_str); freetoken(&token_l,maxtok_l); if(!first) free(ren); if ((ren=job_registry_get(rha, en.batch_id)) == NULL){ fprintf(stderr,"Get of record returns error "); perror(""); } if(ren){ if(strlen(ren->updater_info)>0){ en.udate=ren->udate; }else{ en.udate=time(0); } } first=FALSE; }else if(line && strstr(line," JobState=")){ if(token[0] && strstr(line,"JobState=")){ maxtok_l = strtoken(token[0], '=', &token_l); if(token_l[1] && strstr(token_l[1],"PENDING")){ en.status=IDLE; en.exitcode=-1; JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now); }else if(token_l[1] && strstr(token_l[1],"RUNNING")){ en.status=RUNNING; en.exitcode=-1; JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now); }else if(token_l[1] && strstr(token_l[1],"COMPLETED")){ en.status=COMPLETED; en.exitcode=0; JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now); }else if(token_l[1] && strstr(token_l[1],"CANCELLED")){ en.status=REMOVED; en.exitcode=-999; JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now); }else if(token_l[1] && strstr(token_l[1],"FAILED")){ en.status=COMPLETED; en.exitcode=0; JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now); }else if(token_l[1] && strstr(token_l[1],"SUSPENDED")){ en.status=HELD; en.exitcode=-1; JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now); }else if(token_l[1] && strstr(token_l[1],"COMPLETING")){ bupdater_remove_active_job(&bact, en.batch_id); } freetoken(&token_l,maxtok_l); } }else if(line && strstr(line," BatchHost=")){ if(token[0] && strstr(line,"BatchHost=")){ maxtok_l = strtoken(token[0], '=', &token_l); if(en.status!=IDLE){ JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,token_l[1]); } freetoken(&token_l,maxtok_l); } }else if(line && strstr(line," ExitCode=")){ if(token[3] && strstr(line,"ExitCode=")){ maxtok_l = strtoken(token[3], '=', &token_l); maxtok_e = strtoken(token_l[1], ':', &token_e); if(en.status==COMPLETED){ en.exitcode=atoi(token_e[0]); } freetoken(&token_l,maxtok_l); freetoken(&token_e,maxtok_e); } }else if(line && strstr(line," SubmitTime=")){ if(en.status==IDLE){ if(token[0] && strstr(line,"SubmitTime=")){ maxtok_l = strtoken(token[0], '=', &token_l); tmstampepoch=str2epoch(token_l[1],"N"); en.udate=tmstampepoch; freetoken(&token_l,maxtok_l); } } }else if(line && strstr(line," StartTime=")){ if(en.status==RUNNING){ if(token[0] && strstr(line,"StartTime=")){ maxtok_l = strtoken(token[0], '=', &token_l); tmstampepoch=str2epoch(token_l[1],"N"); en.udate=tmstampepoch; freetoken(&token_l,maxtok_l); } } if(en.status==COMPLETED || en.status==REMOVED){ if(token[1] && strstr(line,"EndTime=")){ maxtok_l = strtoken(token[1], '=', &token_l); tmstampepoch=str2epoch(token_l[1],"N"); en.udate=tmstampepoch; freetoken(&token_l,maxtok_l); } } }else if(line && strstr(line," SuspendTime=")){ if(en.status==HELD){ if(token[1] && strstr(line,"SuspendTime=")){ maxtok_l = strtoken(token[1], '=', &token_l); tmstampepoch=str2epoch(token_l[1],"N"); en.udate=tmstampepoch; freetoken(&token_l,maxtok_l); } } } free(line); free(string_now); freetoken(&token,maxtok_t); } pclose(fp); } if(en.status!=UNDEFINED && ren && ren->status!=REMOVED && ren->status!=COMPLETED){ if ((ret=job_registry_update_recn_select(rha, &en, ren->recnum, JOB_REGISTRY_UPDATE_WN_ADDR| JOB_REGISTRY_UPDATE_STATUS| JOB_REGISTRY_UPDATE_UDATE| JOB_REGISTRY_UPDATE_UPDATER_INFO| JOB_REGISTRY_UPDATE_EXITCODE| JOB_REGISTRY_UPDATE_EXITREASON)) < 0){ if(ret != JOB_REGISTRY_NOT_FOUND){ fprintf(stderr,"Update of record returns %d: ",ret); perror(""); } } else { if(ret==JOB_REGISTRY_SUCCESS){ if (en.status == REMOVED || en.status == COMPLETED) { do_log(debuglogfile, debug, 2, "%s: registry update in IntStateQuery for: jobid=%s creamjobid=%s wn=%s status=%d exitcode=%d\n",argv0,en.batch_id,en.user_prefix,en.wn_addr,en.status,en.exitcode); job_registry_unlink_proxy(rha, &en); }else{ do_log(debuglogfile, debug, 2, "%s: registry update in IntStateQuery for: jobid=%s creamjobid=%s wn=%s status=%d\n",argv0,en.batch_id,en.user_prefix,en.wn_addr,en.status); } if (remupd_conf != NULL){ if ((ret=job_registry_send_update(remupd_head_send,&en,NULL,NULL))<=0){ do_log(debuglogfile, debug, 2, "%s: Error creating endpoint in IntStateQuery\n",argv0); } } } } } free(ren); free(command_string); return 0; }