int AssignFinalState(char *batchid){ job_registry_entry en; int ret,i; time_t now; now=time(0); JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,batchid); en.status=COMPLETED; en.exitcode=999; en.udate=now; JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,"\0"); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"\0"); if ((ret=job_registry_update(rha, &en)) < 0){ if(ret != JOB_REGISTRY_NOT_FOUND){ fprintf(stderr,"Update of record %d returns %d: ",i,ret); perror(""); } } else { do_log(debuglogfile, debug, 2, "%s: registry update in AssignStateQuery for: jobid=%s creamjobid=%s status=%d\n",argv0,en.batch_id,en.user_prefix,en.status); job_registry_unlink_proxy(rha, &en); if (remupd_conf != NULL){ if (ret=job_registry_send_update(remupd_head_send,&en,NULL,NULL)<=0){ do_log(debuglogfile, debug, 2, "%s: Error creating endpoint in AssignFinalState\n",argv0); } } } return 0; }
int AssignState (char *element, char *status, char *exit, char *reason, char *wn, char *udate){ char **id_element; job_registry_entry en; time_t now; char *string_now=NULL; int i=0; int n=strtoken(element, '.', &id_element); int iret; if(id_element[0]){ JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,id_element[0]); en.status=atoi(status); en.exitcode=atoi(exit); JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,wn); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,reason); now=time(0); string_now=make_message("%d",now); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now) en.udate=now; free(string_now); }else{ if((element=calloc(STR_CHARS,1)) == 0){ sysfatal("can't malloc cmd in GetAndSend: %r"); } } if ((iret=job_registry_update(rha, &en)) < 0){ fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id); perror(""); }else{ if (en.status == REMOVED || en.status == COMPLETED){ job_registry_unlink_proxy(rha, &en); } } freetoken(&id_element,n); return 0; }
int FinalStateQuery(char *input_string, int logs_to_read) { /* tracejob -m -l -a <jobid> In line: 04/23/2008 11:50:43 S Exit_status=0 resources_used.cput=00:00:01 resources_used.mem=11372kb resources_used.vmem=52804kb resources_used.walltime=00:10:15 there are: udate for the final state (04/23/2008 11:50:43): exitcode Exit_status= */ /* Filled entries: batch_id (a list of jobid is given, one for each tracejob call) status (always a final state 3 or 4) exitcode udate Filled by submit script: blah_id Unfilled entries: exitreason */ /* [root@cream-12 server_logs]# tracejob -m -l -a 13 Job: 13.cream-12.pd.infn.it 04/23/2008 11:40:27 S enqueuing into cream_1, state 1 hop 1 04/23/2008 11:40:27 S Job Queued at request of [email protected], owner = [email protected], job name = cream_365713239, queue = cream_1 04/23/2008 11:40:28 S Job Modified at request of [email protected] 04/23/2008 11:40:28 S Job Run at request of [email protected] 04/23/2008 11:50:43 S Exit_status=0 resources_used.cput=00:00:01 resources_used.mem=11372kb resources_used.vmem=52804kb resources_used.walltime=00:10:15 04/23/2008 11:50:44 S dequeuing from cream_1, state COMPLETE */ FILE *fp; char *line=NULL; char **token; char **jobid; int maxtok_t=0,maxtok_j=0,k; job_registry_entry en; int ret; char *timestamp; time_t tmstampepoch; char *exit_str=NULL; int failed_count=0; int time_to_add=0; time_t now; char *cp=NULL; char *command_string=NULL; char *pbs_spool=NULL; char *string_now=NULL; int tracejob_line_counter=0; do_log(debuglogfile, debug, 3, "%s: input_string in FinalStateQuery is:%s\n",argv0,input_string); maxtok_j = strtoken(input_string, ':', &jobid); for(k=0;k<maxtok_j;k++){ if(jobid[k] && strlen(jobid[k])==0) continue; pbs_spool=(pbs_spoolpath?make_message("-p %s ",pbs_spoolpath):make_message("")); command_string=make_message("%s%s/tracejob %s-m -l -a -n %d %s",batch_command,pbs_binpath,pbs_spool,logs_to_read,jobid[k]); free(pbs_spool); fp = popen(command_string,"r"); do_log(debuglogfile, debug, 3, "%s: command_string in FinalStateQuery is:%s\n",argv0,command_string); /* en.status is set =0 (UNDEFINED) here and it is tested if it is !=0 before the registry update: the update is done only if en.status is !=0*/ en.status=UNDEFINED; JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,jobid[k]); tracejob_line_counter=0; if(fp!=NULL){ while(!feof(fp) && (line=get_line(fp))){ if(line && strlen(line)==0){ free(line); continue; } if(tracejob_line_counter>tracejob_max_output){ do_log(debuglogfile, debug, 2, "%s: Tracejob output limit of %d lines reached. Skipping command.\n",argv0,tracejob_max_output); free(line); break; } if ((cp = strrchr (line, '\n')) != NULL){ *cp = '\0'; tracejob_line_counter++; } do_log(debuglogfile, debug, 3, "%s: line in FinalStateQuery is:%s\n",argv0,line); now=time(0); string_now=make_message("%d",now); if(line && (strstr(line,"Job deleted") || (strstr(line,"dequeuing from") && strstr(line,"state RUNNING")))){ maxtok_t = strtoken(line, ' ', &token); timestamp=make_message("%s %s",token[0],token[1]); tmstampepoch=str2epoch(timestamp,"A"); free(timestamp); freetoken(&token,maxtok_t); en.udate=tmstampepoch; en.status=REMOVED; en.exitcode=-999; JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"\0"); }else if(line && strstr(line," Exit_status=") && en.status != REMOVED){ maxtok_t = strtoken(line, ' ', &token); timestamp=make_message("%s %s",token[0],token[1]); tmstampepoch=str2epoch(timestamp,"A"); exit_str=strdup(token[3]); if(exit_str == NULL){ sysfatal("strdup failed for exit_str in FinalStateQuery: %r"); } free(timestamp); freetoken(&token,maxtok_t); if(strstr(exit_str,"Exit_status=")){ maxtok_t = strtoken(exit_str, '=', &token); if(maxtok_t == 2){ en.exitcode=atoi(token[1]); freetoken(&token,maxtok_t); }else{ en.exitcode=-1; } }else{ en.exitcode=-1; } free(exit_str); en.udate=tmstampepoch; en.status=COMPLETED; JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"\0"); } free(string_now); free(line); } pclose(fp); } if(en.status !=UNDEFINED && en.status!=IDLE){ if ((ret=job_registry_update_select(rha, &en, JOB_REGISTRY_UPDATE_UDATE | JOB_REGISTRY_UPDATE_STATUS | JOB_REGISTRY_UPDATE_UPDATER_INFO | JOB_REGISTRY_UPDATE_EXITCODE | JOB_REGISTRY_UPDATE_EXITREASON )) < 0){ if(ret != JOB_REGISTRY_NOT_FOUND){ fprintf(stderr,"Update of record returns %d: ",ret); perror(""); } } else { do_log(debuglogfile, debug, 2, "%s: registry update in FinalStateQuery for: jobid=%s exitcode=%d status=%d\n",argv0,en.batch_id,en.exitcode,en.status); if (en.status == REMOVED || en.status == COMPLETED){ job_registry_unlink_proxy(rha, &en); } if (remupd_conf != NULL){ if (ret=job_registry_send_update(remupd_head_send,&en,NULL,NULL)<=0){ do_log(debuglogfile, debug, 2, "%s: Error creating endpoint in FinalStateQuery\n",argv0); } } } }else{ failed_count++; } free(command_string); } now=time(0); if(failed_count>10){ failed_count=10; } time_to_add=pow(failed_count,1.5); next_finalstatequery=now+time_to_add; do_log(debuglogfile, debug, 3, "%s: next FinalStatequery will be in %d seconds\n",argv0,time_to_add); freetoken(&jobid,maxtok_j); return failed_count; }
int IntStateQuery() { /* qstat -f Job Id: 11.cream-12.pd.infn.it Job_Name = cream_579184706 job_state = R ctime = Wed Apr 23 11:39:55 2008 exec_host = cream-wn-029.pn.pd.infn.it/0 */ /* Filled entries: batch_id wn_addr status udate Filled by submit script: blah_id Unfilled entries: exitreason */ FILE *fp; char *line=NULL; char **token; int maxtok_t=0; job_registry_entry en; int ret; char *timestamp; time_t tmstampepoch; char *batch_str=NULL; char *wn_str=NULL; char *twn_str=NULL; char *status_str=NULL; char *ex_str=NULL; int ex_code=0; char *cp=NULL; char *command_string=NULL; job_registry_entry *ren=NULL; int first=TRUE; time_t now; char *string_now=NULL; command_string=make_message("%s%s/qstat -f",batch_command,pbs_binpath); fp = popen(command_string,"r"); en.status=UNDEFINED; JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,"\0"); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"\0"); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,"\0"); en.exitcode=-1; bupdater_free_active_jobs(&bact); if(fp!=NULL){ while(!feof(fp) && (line=get_line(fp))){ if(line && strlen(line)==0){ free(line); continue; } if ((cp = strrchr (line, '\n')) != NULL){ *cp = '\0'; } do_log(debuglogfile, debug, 3, "%s: line in IntStateQuery is:%s\n",argv0,line); now=time(0); string_now=make_message("%d",now); if(line && strstr(line,"Job Id: ")){ if(!first && en.status!=UNDEFINED && ren && ren->status!=REMOVED && ren->status!=COMPLETED){ if ((ret=job_registry_update_recn_select(rha, &en, ren->recnum, JOB_REGISTRY_UPDATE_WN_ADDR| JOB_REGISTRY_UPDATE_STATUS| JOB_REGISTRY_UPDATE_UDATE| JOB_REGISTRY_UPDATE_UPDATER_INFO| JOB_REGISTRY_UPDATE_EXITCODE| JOB_REGISTRY_UPDATE_EXITREASON)) < 0){ if(ret != JOB_REGISTRY_NOT_FOUND){ fprintf(stderr,"Update of record returns %d: ",ret); perror(""); } } else { if(ret==JOB_REGISTRY_SUCCESS){ if (en.status == REMOVED || en.status == COMPLETED) { do_log(debuglogfile, debug, 2, "%s: registry update in IntStateQuery for: jobid=%s wn=%s status=%d exitcode=%d\n",argv0,en.batch_id,en.wn_addr,en.status,en.exitcode); job_registry_unlink_proxy(rha, &en); }else{ do_log(debuglogfile, debug, 2, "%s: registry update in IntStateQuery for: jobid=%s wn=%s status=%d\n",argv0,en.batch_id,en.wn_addr,en.status); } } if (remupd_conf != NULL){ if (ret=job_registry_send_update(remupd_head_send,&en,NULL,NULL)<=0){ do_log(debuglogfile, debug, 2, "%s: Error creating endpoint in IntStateQuery\n",argv0); } } } en.status = UNDEFINED; JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,"\0"); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"\0"); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,"\0"); en.exitcode=-1; } maxtok_t = strtoken(line, ':', &token); batch_str=strdel(token[1]," "); JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,batch_str); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now); en.exitcode=-1; bupdater_push_active_job(&bact, en.batch_id); free(batch_str); freetoken(&token,maxtok_t); if(!first) free(ren); if ((ren=job_registry_get(rha, en.batch_id)) == NULL){ fprintf(stderr,"Get of record returns error for %s ",en.batch_id); perror(""); } first=FALSE; }else if(line && strstr(line,"job_state = ")){ maxtok_t = strtoken(line, '=', &token); status_str=strdel(token[1]," "); if(status_str && strcmp(status_str,"Q")==0){ en.status=IDLE; en.exitcode=-1; JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,"\0"); }else if(status_str && strcmp(status_str,"W")==0){ en.status=IDLE; en.exitcode=-1; }else if(status_str && strcmp(status_str,"R")==0){ en.status=RUNNING; en.exitcode=-1; }else if(status_str && strcmp(status_str,"C")==0){ en.status=COMPLETED; JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"\0"); }else if(status_str && strcmp(status_str,"H")==0){ en.status=HELD; en.exitcode=-1; JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,"\0"); } free(status_str); freetoken(&token,maxtok_t); }else if(line && strstr(line,"unable to run job")){ en.status=IDLE; en.exitcode=-1; }else if(line && strstr(line,"exit_status = ")){ maxtok_t = strtoken(line, '=', &token); ex_str=strdel(token[1]," "); ex_code=atoi(ex_str); if(ex_code==0){ en.exitcode=0; }else if(ex_code==271){ en.status=REMOVED; en.exitcode=-999; }else{ en.exitcode=ex_code; } free(ex_str); freetoken(&token,maxtok_t); }else if(line && strstr(line,"exec_host = ")){ maxtok_t = strtoken(line, '=', &token); twn_str=strdup(token[1]); if(twn_str == NULL){ sysfatal("strdup failed for twn_str in IntStateQuery: %r"); } freetoken(&token,maxtok_t); maxtok_t = strtoken(twn_str, '/', &token); wn_str=strdel(token[0]," "); JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,wn_str); free(twn_str); free(wn_str); freetoken(&token,maxtok_t); }else if(line && strstr(line,"mtime = ")){ maxtok_t = strtoken(line, ' ', &token); timestamp=make_message("%s %s %s %s %s",token[2],token[3],token[4],token[5],token[6]); tmstampepoch=str2epoch(timestamp,"L"); free(timestamp); en.udate=tmstampepoch; freetoken(&token,maxtok_t); } free(line); free(string_now); } pclose(fp); } if(en.status!=UNDEFINED && ren && ren->status!=REMOVED && ren->status!=COMPLETED){ if ((ret=job_registry_update_recn_select(rha, &en, ren->recnum, JOB_REGISTRY_UPDATE_WN_ADDR| JOB_REGISTRY_UPDATE_STATUS| JOB_REGISTRY_UPDATE_UDATE| JOB_REGISTRY_UPDATE_UPDATER_INFO| JOB_REGISTRY_UPDATE_EXITCODE| JOB_REGISTRY_UPDATE_EXITREASON)) < 0){ if(ret != JOB_REGISTRY_NOT_FOUND){ fprintf(stderr,"Update of record returns %d: ",ret); perror(""); } } else { if(ret==JOB_REGISTRY_SUCCESS){ if (en.status == REMOVED || en.status == COMPLETED) { do_log(debuglogfile, debug, 2, "%s: registry update in IntStateQuery for: jobid=%s wn=%s status=%d exitcode=%d\n",argv0,en.batch_id,en.wn_addr,en.status,en.exitcode); job_registry_unlink_proxy(rha, &en); }else{ do_log(debuglogfile, debug, 2, "%s: registry update in IntStateQuery for: jobid=%s wn=%s status=%d\n",argv0,en.batch_id,en.wn_addr,en.status); } } if (remupd_conf != NULL){ if (ret=job_registry_send_update(remupd_head_send,&en,NULL,NULL)<=0){ do_log(debuglogfile, debug, 2, "%s: Error creating endpoint in IntStateQuery\n",argv0); } } } } free(ren); free(command_string); return 0; }
int ReceiveUpdateFromNetwork() { char *proxy_path, *proxy_subject; int timeout_ms = 0; int ent, ret, prret, rhret; job_registry_entry *nen; job_registry_entry *ren; proxy_path = NULL; proxy_subject = NULL; while (nen = job_registry_receive_update(remupd_pollset, remupd_nfds,timeout_ms, &proxy_subject, &proxy_path)){ JOB_REGISTRY_ASSIGN_ENTRY(nen->subject_hash,"\0"); JOB_REGISTRY_ASSIGN_ENTRY(nen->proxy_link,"\0"); if ((ren=job_registry_get(rha, nen->batch_id)) == NULL){ if ((ret=job_registry_append(rha, nen)) < 0){ fprintf(stderr,"%s: Warning: job_registry_append returns %d: ",argv0,ret); perror(""); } }else{ if(ren->subject_hash!=NULL && strlen(ren->subject_hash) && ren->proxy_link!=NULL && strlen(ren->proxy_link)){ JOB_REGISTRY_ASSIGN_ENTRY(nen->subject_hash,ren->subject_hash); JOB_REGISTRY_ASSIGN_ENTRY(nen->proxy_link,ren->proxy_link); }else{ if (proxy_path != NULL && strlen(proxy_path) > 0){ prret = job_registry_set_proxy(rha, nen, proxy_path); if (prret < 0){ do_log(debuglogfile, debug, 1, "%s: warning: setting proxy to %s\n",argv0,proxy_path); fprintf(stderr,"%s: warning: setting proxy to %s: ",argv0,proxy_path); perror(""); /* Make sure we don't renew non-existing proxies */ nen->renew_proxy = 0; } free(proxy_path); nen->subject_hash[0] = '\000'; if (proxy_subject != NULL && strlen(proxy_subject) > 0){ job_registry_compute_subject_hash(nen, proxy_subject); rhret = job_registry_record_subject_hash(rha, nen->subject_hash, proxy_subject, TRUE); if (rhret < 0){ do_log(debuglogfile, debug, 1, "%s: warning: recording proxy subject %s (hash %s)\n",argv0, proxy_subject, nen->subject_hash); fprintf(stderr,"%s: warning: recording proxy subject %s (hash %s): ",argv0, proxy_subject, nen->subject_hash); perror(""); } } free(proxy_subject); } } if(job_registry_need_update(ren,nen,JOB_REGISTRY_UPDATE_ALL)){ if ((ret=job_registry_update(rha, nen)) < 0){ fprintf(stderr,"%s: Warning: job_registry_update returns %d: ",argv0,ret); perror(""); } } } free(nen); } return 0; }
int PollDB() { FILE *fd; job_registry_entry *en; job_registry_handle *rha; job_registry_handle *rhc; char *buffer=NULL; char *cdate=NULL; time_t now; int maxtok,i,maxtokl,j; char **tbuf; char **lbuf; int len=0,flen=0; struct stat sbuf; int rc; char *regfile; char *cp=NULL; int to_sleep=FALSE; int skip_reg_open=FALSE; int ret; rha=job_registry_init(registry_file, BY_BATCH_ID); if (rha == NULL){ do_log(debuglogfile, debug, 1, "%s: Error initialising job registry %s\n",argv0,registry_file); fprintf(stderr,"%s: Error initialising job registry %s :",argv0,registry_file); perror(""); } for(;;){ now=time(NULL); to_sleep=TRUE; /* cycle over connections: sleep if startnotify, startnotifyjob and sentendonce are not set. If startnotifyjob is set the conn is served. */ for(i=0; i<MAX_CONNECTIONS; i++){ if(!connections[i].startnotify && !connections[i].startnotifyjob && !(connections[i].firstnotify && connections[i].sentendonce)) continue; if(connections[i].startnotify) to_sleep=FALSE; if(connections[i].startnotifyjob){ to_sleep=FALSE; rhc=job_registry_init(registry_file, BY_USER_PREFIX); if (rhc == NULL){ do_log(debuglogfile, debug, 1, "%s: Error initialising job registry %s\n",argv0,registry_file); fprintf(stderr,"%s: Error initialising job registry %s :",argv0,registry_file); perror(""); } do_log(debuglogfile, debug, 2, "%s:Job list for notification:%s\n",argv0,connections[i].joblist_string); maxtok=strtoken(connections[i].joblist_string,',',&tbuf); for(j=0;j<maxtok;j++){ if ((en=job_registry_get(rhc, tbuf[j])) != NULL){ buffer=ComposeClassad(en); }else{ if(remupd_conf == NULL){ cdate=iepoch2str(now); maxtokl=strtoken(tbuf[j],'_',&lbuf); if(lbuf[1]){ if ((cp = strrchr (lbuf[1], '\n')) != NULL){ *cp = '\0'; } if ((cp = strrchr (lbuf[1], '\r')) != NULL){ *cp = '\0'; } buffer=make_message("[BlahJobName=\"%s\"; ClientJobId=\"%s\"; JobStatus=4; JwExitCode=999; ExitReason=\"BUpdater is not able to find the job anymore\"; Reason=\"BUpdater is not able to find the job anymore\"; ChangeTime=\"%s\"; ]\n",tbuf[j],lbuf[1],cdate); } freetoken(&lbuf,maxtokl); free(cdate); }else{ maxtokl=strtoken(tbuf[j],':',&lbuf); JOB_REGISTRY_ASSIGN_ENTRY(en->batch_id,lbuf[0]); JOB_REGISTRY_ASSIGN_ENTRY(en->blah_id,lbuf[1]); freetoken(&lbuf,maxtokl); en->status = 0; if ((ret=job_registry_append(rhc, en))<0){ if(ret != JOB_REGISTRY_NOT_FOUND){ fprintf(stderr,"Update of record returns %d: ",ret); perror(""); } }else{ if(ret==JOB_REGISTRY_SUCCESS){ do_log(debuglogfile, debug, 2, "%s: registry append in PollDB for: jobid=%s blahjobid=%s\n",argv0,en->batch_id,en->blah_id); } } } } free(en); len=strlen(buffer); if(connections[i].finalbuffer != NULL){ flen=strlen(connections[i].finalbuffer); }else{ flen=0; } connections[i].finalbuffer = realloc(connections[i].finalbuffer,flen+len+2); if (connections[i].finalbuffer == NULL){ sysfatal("can't realloc finalbuffer in PollDB: %r"); } if(flen==0){ connections[i].finalbuffer[0]='\000'; } strcat(connections[i].finalbuffer,buffer); free(buffer); } freetoken(&tbuf,maxtok); if(connections[i].finalbuffer != NULL){ if(NotifyCream(connections[i].finalbuffer,&connections[i])!=-1){ /* change last notification time */ connections[i].lastnotiftime=now; connections[i].startnotifyjob=FALSE; } free(connections[i].finalbuffer); connections[i].finalbuffer=NULL; } job_registry_destroy(rhc); } if(connections[i].firstnotify && connections[i].sentendonce){ to_sleep=FALSE; if(NotifyCream("NTFDATE/END\n",&connections[i])!=-1){ connections[i].startnotify=TRUE; connections[i].sentendonce=FALSE; connections[i].firstnotify=FALSE; connections[i].startnotifyjob=FALSE; } } } if(to_sleep){ sleep(loop_interval); continue; } regfile=make_message("%s/registry",registry_file); rc=stat(regfile,&sbuf); free(regfile); skip_reg_open=TRUE; for(i=0; i<MAX_CONNECTIONS; i++){ if(sbuf.st_mtime>=connections[i].lastnotiftime){ skip_reg_open=FALSE; break; } } if(skip_reg_open){ do_log(debuglogfile, debug, 3, "Skip registry opening: mtime:%d lastn:%d\n",sbuf.st_mtime,connections[i].lastnotiftime); sleep(loop_interval); continue; } do_log(debuglogfile, debug, 3, "Normal registry opening\n"); fd = job_registry_open(rha, "r"); if (fd == NULL) { do_log(debuglogfile, debug, 1, "%s: Error opening job registry %s\n",argv0,registry_file); fprintf(stderr,"%s: Error opening job registry %s :",argv0,registry_file); perror(""); sleep(loop_interval); continue; } if (job_registry_rdlock(rha, fd) < 0) { do_log(debuglogfile, debug, 1, "%s: Error read locking registry %s\n",argv0,registry_file); fprintf(stderr,"%s: Error read locking registry %s :",argv0,registry_file); perror(""); sleep(loop_interval); continue; } while ((en = job_registry_get_next(rha, fd)) != NULL) { for(i=0; i<MAX_CONNECTIONS; i++){ if(connections[i].creamfilter==NULL) continue; if(en->mdate >= connections[i].lastnotiftime && en->mdate < now && en->user_prefix && strstr(en->user_prefix,connections[i].creamfilter)!=NULL && strlen(en->updater_info)>0) { buffer=ComposeClassad(en); len=strlen(buffer); if(connections[i].finalbuffer != NULL){ flen=strlen(connections[i].finalbuffer); }else{ flen=0; } connections[i].finalbuffer = realloc(connections[i].finalbuffer,flen+len+2); if (connections[i].finalbuffer == NULL){ sysfatal("can't realloc finalbuffer in PollDB: %r"); } if(flen==0){ connections[i].finalbuffer[0]='\000'; } strcat(connections[i].finalbuffer,buffer); free(buffer); } } free(en); } for(i=0; i<MAX_CONNECTIONS; i++){ if(connections[i].finalbuffer != NULL){ if(NotifyCream(connections[i].finalbuffer,&connections[i])!=-1){ /* change last notification time */ connections[i].lastnotiftime=now; } free(connections[i].finalbuffer); connections[i].finalbuffer=NULL; } } fclose(fd); sleep(loop_interval); } job_registry_destroy(rha); return 0; }
int FinalStateQuery(char *query,char *queryStates, char *query_err){ char line[STR_CHARS],fail[6],qExit[10],qFailed[10],qHostname[100],qStatus[2],command_string[100]; char **saveptr1,**saveptr2,**list_query,**list_queryStates; FILE *file_output; int numQuery=0,numQueryStates=0,j=0,l=0,cont=0,cont2=0, nq=0; time_t now; char string_now[11]; job_registry_entry en; int iret; numQuery=strtoken(query,' ',&list_query); nq=numQuery; numQueryStates=strtoken(queryStates,' ',&list_queryStates); if (numQuery!=numQueryStates) return 1; sprintf(command_string,"%s/qstat -u '*'",sge_binpath); if (debug) do_log(debuglogfile, debug, 1, "+-+line 433, command_string:%s\n",command_string); //load in qstatJob list of jobids from qstat command exec file_output = popen(command_string,"r"); if (file_output == NULL) return 0; while (fgets(line,sizeof(line), file_output) != NULL){ cont=strtoken(line, ' ', &saveptr1); if ((strcmp(saveptr1[0],"job-ID")!=0)&&(strncmp(saveptr1[0],"-",1)!=0)){ for (l=0;l<nq;l++){ if (strcmp(list_query[l],saveptr1[0])==0){ if (strcmp(list_queryStates[l],saveptr1[4])!=0){ now=time(0); sprintf(string_now,"%d",now); if (strcmp(saveptr1[4],"u")==0){ JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,list_query[l]); en.status=0; en.exitcode=0; JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,""); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"0"); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now) en.udate=now; if ((iret=job_registry_update(rha, &en)) < 0){ fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id); perror(""); } } if (strcmp(saveptr1[4],"q")==0){ JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,list_query[l]); en.status=1; en.exitcode=0; JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,""); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"0"); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now) en.udate=now; if ((iret=job_registry_update(rha, &en)) < 0){ fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id); perror(""); } } if (strcmp(saveptr1[4],"r")==0){ cont2=strtoken(saveptr1[7], '@', &saveptr2); JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,list_query[l]); en.status=2; en.exitcode=0; JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,saveptr2[1]); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"0"); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now) en.udate=now; if ((iret=job_registry_update(rha, &en)) < 0){ fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id); perror(""); } freetoken(&saveptr2,cont2); } if ((strcmp(saveptr1[4],"hr")==0)||strcmp(saveptr1[4],"hqw")==0){ JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,list_query[l]); en.status=5; en.exitcode=0; JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,""); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"0"); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now) en.udate=now; if ((iret=job_registry_update(rha, &en)) < 0){ fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id); perror(""); } } } //i must put out element from query for (j=l;j<nq;j++) if (list_query[j+1]!=NULL) strcpy(list_query[j],list_query[j+1]); for (j=l;j<nq;j++) if (list_queryStates[j+1]!=NULL) strcpy(list_queryStates[j],list_queryStates[j+1]); nq--; break; } } } line[0]='\0'; freetoken(&saveptr1,cont); } pclose( file_output ); sprintf(query_err,"\0"); //now we have check in list_query only states that not change status //because they're not in qstat result for (l=0; l<nq; l++){ sprintf(command_string,"%s/qacct -j '%s'",sge_binpath,list_query[l]); if (debug) do_log(debuglogfile, debug, 1, "+-+line 520,command_string:%s\n",command_string); file_output = popen(command_string,"r"); if (file_output == NULL) return 1; //if a job number is here means that job was in query previously and //if now it's not in query and not finished (NULL qstat) it was deleted //or it's on transition time if (fgets( line,sizeof(line), file_output )==NULL){ strcat(query_err,list_query[l]); strcat(query_err," "); pclose( file_output ); continue; } //there is no problem to lost first line with previous fgets, because //it's only a line of ============================================= while (fgets( line,sizeof(line), file_output )!=NULL){ cont=strtoken(line, ' ', &saveptr1); if (strcmp(saveptr1[0],"hostname")==0) strcpy(qHostname,saveptr1[1]);; if (strcmp(saveptr1[0],"failed")==0) strcpy(qFailed,saveptr1[1]); if (strcmp(saveptr1[0],"exit_status")==0) strcpy(qExit,saveptr1[1]); freetoken(&saveptr1,cont); } pclose( file_output ); now=time(0); sprintf(string_now,"%d",now); if ((strcmp(qExit,"137")==0)||(strcmp(qExit,"143")==0)){ JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,list_query[l]); en.status=3; en.exitcode=atoi(qExit); JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,qHostname); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,""); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now) en.udate=now; if ((iret=job_registry_update(rha, &en)) < 0){ fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id); perror(""); }else job_registry_unlink_proxy(rha, &en); }else{ JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,list_query[l]); en.status=4; en.exitcode=atoi(qExit); JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,qHostname); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,qFailed); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now) en.udate=now; if ((iret=job_registry_update(rha, &en)) < 0){ fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id); perror(""); }else job_registry_unlink_proxy(rha, &en); } } freetoken(&list_query,numQuery); freetoken(&list_queryStates,numQueryStates); if (debug) do_log(debuglogfile, debug, 1, "+-+query_err:%s\n",query_err); //now check acumulated error jobids to verify if they are an error or not if (strcmp(query_err,"\0")!=0){ sleep(60); cont=0; int n=0; char cmd[10]="\0"; cont=strtoken(query_err, ' ', &list_query); while (n < cont){ if(list_query[n]) strcpy(cmd,list_query[n]); else return 1; sprintf(command_string,"%s/qacct -j '%s'",sge_binpath,cmd); if (debug) do_log(debuglogfile, debug, 1, "+-+line 587 error, command_string:%s\n",command_string); file_output = popen(command_string,"r"); if (file_output == NULL) return 1; //if a job number is here means that job was in query previously and //if now it's not in query and not finished (NULL qstat) it was deleted if (fgets( line,sizeof(line), file_output )==NULL){ JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,cmd); en.status=3; en.exitcode=3; JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,""); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"reason=3"); now=time(0); sprintf(string_now,"%d",now); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now) en.udate=now; if ((iret=job_registry_update(rha, &en)) < 0){ fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id); perror(""); }else job_registry_unlink_proxy(rha, &en); pclose( file_output ); n++; continue; } //there is no problem to lost first line with previous fgets, because //it's only a line of ============================================= while (fgets( line,sizeof(line), file_output )!=NULL){ cont=strtoken(line, ' ', &saveptr1); if (strcmp(saveptr1[0],"hostname")==0) strcpy(qHostname,saveptr1[1]); if (strcmp(saveptr1[0],"failed")==0) strcpy(qFailed,saveptr1[1]); if (strcmp(saveptr1[0],"exit_status")==0) strcpy(qExit,saveptr1[1]); freetoken(&saveptr1,cont); } pclose( file_output ); now=time(0); sprintf(string_now,"%d",now); if ((strcmp(qExit,"137")==0)||(strcmp(qExit,"143")==0)){ JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,cmd); en.status=3; en.exitcode=atoi(qExit); JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,qHostname); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,""); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now) en.udate=now; if ((iret=job_registry_update(rha, &en)) < 0){ fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id); perror(""); }else job_registry_unlink_proxy(rha, &en); }else{ JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,cmd); en.status=4; en.exitcode=atoi(qExit); JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,qHostname); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,qFailed); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now) en.udate=now; if ((iret=job_registry_update(rha, &en)) < 0){ fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id); perror(""); }else job_registry_unlink_proxy(rha, &en); } n++; } freetoken(&list_query,cont); } return 0; }
int FinalStateQuery(time_t start_date, int logs_to_read) { FILE *fp; char *line=NULL; char **token; char **token_l; int maxtok_t=0; int maxtok_l=0; job_registry_entry en; int ret; time_t tmstampepoch; char *cp=NULL; char *command_string=NULL; time_t now; char *string_now=NULL; job_registry_entry *ren=NULL; command_string=make_message("%s/sacct -nap -o JobID,JobName,State,ExitCode,submit,start,end 2>/dev/null",slurm_binpath); fp = popen(command_string,"r"); do_log(debuglogfile, debug, 3, "%s: command_string in FinalStateQuery is:%s\n",argv0,command_string); en.status=UNDEFINED; JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"\0"); if(fp!=NULL){ while(!feof(fp) && (line=get_line(fp))){ if(line && strlen(line)==0){ free(line); continue; } if ((cp = strrchr (line, '\n')) != NULL){ *cp = '\0'; } en.status=UNDEFINED; do_log(debuglogfile, debug, 3, "%s: line in FinalStateQuery is:%s\n",argv0,line); now=time(0); string_now=make_message("%d",now); maxtok_t = strtoken(line, '|', &token); JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,token[0]); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now); if(token[2] && strstr(token[2],"COMPLETED")){ en.status=COMPLETED; JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now); }else if(token[2] && strstr(token[2],"CANCELLED")){ en.status=REMOVED; en.exitcode=-999; JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now); }else if(token[2] && strstr(token[2],"FAILED")){ en.status=COMPLETED; JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now); } if(!(token[6] && strstr(token[6],"Unknown"))){ tmstampepoch=str2epoch(token[6],"N"); en.udate=tmstampepoch; } if(en.status==COMPLETED){ maxtok_l = strtoken(token[3], ':', &token_l); en.exitcode=atoi(token_l[0]); freetoken(&token_l,maxtok_l); } if ((ren=job_registry_get(rha, en.batch_id)) == NULL){ fprintf(stderr,"Get of record returns error "); perror(""); } if(en.status!=UNDEFINED && en.status!=IDLE && ren && ren->status!=REMOVED && ren->status!=COMPLETED){ if ((ret=job_registry_update_select(rha, &en, JOB_REGISTRY_UPDATE_UDATE | JOB_REGISTRY_UPDATE_STATUS | JOB_REGISTRY_UPDATE_UPDATER_INFO | JOB_REGISTRY_UPDATE_EXITCODE | JOB_REGISTRY_UPDATE_EXITREASON )) < 0){ if(ret != JOB_REGISTRY_NOT_FOUND){ fprintf(stderr,"Update of record returns %d: ",ret); perror(""); } } else { do_log(debuglogfile, debug, 2, "%s: f registry update in FinalStateQuery for: jobid=%s creamjobid=%s status=%d\n",argv0,en.batch_id,en.user_prefix,en.status); if (en.status == REMOVED || en.status == COMPLETED){ job_registry_unlink_proxy(rha, &en); } if (remupd_conf != NULL){ if ((ret=job_registry_send_update(remupd_head_send,&en,NULL,NULL))<=0){ do_log(debuglogfile, debug, 2, "%s: Error creating endpoint in FinalStateQuery\n",argv0); } } } } free(string_now); free(line); freetoken(&token,maxtok_t); free(ren); } pclose(fp); } free(command_string); return 0; }
int IntStateQuery() { FILE *fp; char *line=NULL; char **token; char **token_l; char **token_e; int maxtok_t=0; int maxtok_l=0; int maxtok_e=0; job_registry_entry en; int ret; time_t tmstampepoch; char *cp=NULL; char *batch_str=NULL; char *command_string=NULL; job_registry_entry *ren=NULL; int isresumed=FALSE; int first=TRUE; time_t now; char *string_now=NULL; command_string=make_message("%s/scontrol -a show jobid",slurm_binpath); fp = popen(command_string,"r"); en.status=UNDEFINED; JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,"\0"); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"\0"); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,"\0"); en.exitcode=-1; bupdater_free_active_jobs(&bact); if(fp!=NULL){ while(!feof(fp) && (line=get_line(fp))){ if(line && strlen(line)==0){ free(line); continue; } if ((cp = strrchr (line, '\n')) != NULL){ *cp = '\0'; } do_log(debuglogfile, debug, 3, "%s: line in IntStateQuery is:%s\n",argv0,line); now=time(0); string_now=make_message("%d",now); maxtok_t = strtoken(line, ' ', &token); if(line && strstr(line,"JobId=")){ isresumed=FALSE; if(!first && en.status!=UNDEFINED && ren && ren->status!=REMOVED && ren->status!=COMPLETED){ if ((ret=job_registry_update_recn_select(rha, &en, ren->recnum, JOB_REGISTRY_UPDATE_WN_ADDR| JOB_REGISTRY_UPDATE_STATUS| JOB_REGISTRY_UPDATE_UDATE| JOB_REGISTRY_UPDATE_UPDATER_INFO| JOB_REGISTRY_UPDATE_EXITCODE| JOB_REGISTRY_UPDATE_EXITREASON)) < 0){ if(ret != JOB_REGISTRY_NOT_FOUND){ fprintf(stderr,"Update of record returns %d: ",ret); perror(""); } } else { if(ret==JOB_REGISTRY_SUCCESS){ if (en.status == REMOVED || en.status == COMPLETED) { do_log(debuglogfile, debug, 2, "%s: registry update in IntStateQuery for: jobid=%s creamjobid=%s wn=%s status=%d exitcode=%d\n",argv0,en.batch_id,en.user_prefix,en.wn_addr,en.status,en.exitcode); job_registry_unlink_proxy(rha, &en); }else{ do_log(debuglogfile, debug, 2, "%s: registry update in IntStateQuery for: jobid=%s creamjobid=%s wn=%s status=%d\n",argv0,en.batch_id,en.user_prefix,en.wn_addr,en.status); } if (remupd_conf != NULL){ if ((ret=job_registry_send_update(remupd_head_send,&en,NULL,NULL))<=0){ do_log(debuglogfile, debug, 2, "%s: Error creating endpoint in IntStateQuery\n",argv0); } } } } en.status = UNDEFINED; JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"\0"); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,"\0"); JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,"\0"); en.exitcode=-1; } en.status = UNDEFINED; maxtok_l = strtoken(token[0], '=', &token_l); batch_str=strdup(token_l[1]); JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,batch_str); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now); en.exitcode=-1; bupdater_push_active_job(&bact, en.batch_id); do_log(debuglogfile, debug, 4, "%s: bupdater_push_active_job done for %s\n",argv0,en.batch_id); free(batch_str); freetoken(&token_l,maxtok_l); if(!first) free(ren); if ((ren=job_registry_get(rha, en.batch_id)) == NULL){ fprintf(stderr,"Get of record returns error "); perror(""); } if(ren){ if(strlen(ren->updater_info)>0){ en.udate=ren->udate; }else{ en.udate=time(0); } } first=FALSE; }else if(line && strstr(line," JobState=")){ if(token[0] && strstr(line,"JobState=")){ maxtok_l = strtoken(token[0], '=', &token_l); if(token_l[1] && strstr(token_l[1],"PENDING")){ en.status=IDLE; en.exitcode=-1; JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now); }else if(token_l[1] && strstr(token_l[1],"RUNNING")){ en.status=RUNNING; en.exitcode=-1; JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now); }else if(token_l[1] && strstr(token_l[1],"COMPLETED")){ en.status=COMPLETED; en.exitcode=0; JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now); }else if(token_l[1] && strstr(token_l[1],"CANCELLED")){ en.status=REMOVED; en.exitcode=-999; JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now); }else if(token_l[1] && strstr(token_l[1],"FAILED")){ en.status=COMPLETED; en.exitcode=0; JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now); }else if(token_l[1] && strstr(token_l[1],"SUSPENDED")){ en.status=HELD; en.exitcode=-1; JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now); }else if(token_l[1] && strstr(token_l[1],"COMPLETING")){ bupdater_remove_active_job(&bact, en.batch_id); } freetoken(&token_l,maxtok_l); } }else if(line && strstr(line," BatchHost=")){ if(token[0] && strstr(line,"BatchHost=")){ maxtok_l = strtoken(token[0], '=', &token_l); if(en.status!=IDLE){ JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,token_l[1]); } freetoken(&token_l,maxtok_l); } }else if(line && strstr(line," ExitCode=")){ if(token[3] && strstr(line,"ExitCode=")){ maxtok_l = strtoken(token[3], '=', &token_l); maxtok_e = strtoken(token_l[1], ':', &token_e); if(en.status==COMPLETED){ en.exitcode=atoi(token_e[0]); } freetoken(&token_l,maxtok_l); freetoken(&token_e,maxtok_e); } }else if(line && strstr(line," SubmitTime=")){ if(en.status==IDLE){ if(token[0] && strstr(line,"SubmitTime=")){ maxtok_l = strtoken(token[0], '=', &token_l); tmstampepoch=str2epoch(token_l[1],"N"); en.udate=tmstampepoch; freetoken(&token_l,maxtok_l); } } }else if(line && strstr(line," StartTime=")){ if(en.status==RUNNING){ if(token[0] && strstr(line,"StartTime=")){ maxtok_l = strtoken(token[0], '=', &token_l); tmstampepoch=str2epoch(token_l[1],"N"); en.udate=tmstampepoch; freetoken(&token_l,maxtok_l); } } if(en.status==COMPLETED || en.status==REMOVED){ if(token[1] && strstr(line,"EndTime=")){ maxtok_l = strtoken(token[1], '=', &token_l); tmstampepoch=str2epoch(token_l[1],"N"); en.udate=tmstampepoch; freetoken(&token_l,maxtok_l); } } }else if(line && strstr(line," SuspendTime=")){ if(en.status==HELD){ if(token[1] && strstr(line,"SuspendTime=")){ maxtok_l = strtoken(token[1], '=', &token_l); tmstampepoch=str2epoch(token_l[1],"N"); en.udate=tmstampepoch; freetoken(&token_l,maxtok_l); } } } free(line); free(string_now); freetoken(&token,maxtok_t); } pclose(fp); } if(en.status!=UNDEFINED && ren && ren->status!=REMOVED && ren->status!=COMPLETED){ if ((ret=job_registry_update_recn_select(rha, &en, ren->recnum, JOB_REGISTRY_UPDATE_WN_ADDR| JOB_REGISTRY_UPDATE_STATUS| JOB_REGISTRY_UPDATE_UDATE| JOB_REGISTRY_UPDATE_UPDATER_INFO| JOB_REGISTRY_UPDATE_EXITCODE| JOB_REGISTRY_UPDATE_EXITREASON)) < 0){ if(ret != JOB_REGISTRY_NOT_FOUND){ fprintf(stderr,"Update of record returns %d: ",ret); perror(""); } } else { if(ret==JOB_REGISTRY_SUCCESS){ if (en.status == REMOVED || en.status == COMPLETED) { do_log(debuglogfile, debug, 2, "%s: registry update in IntStateQuery for: jobid=%s creamjobid=%s wn=%s status=%d exitcode=%d\n",argv0,en.batch_id,en.user_prefix,en.wn_addr,en.status,en.exitcode); job_registry_unlink_proxy(rha, &en); }else{ do_log(debuglogfile, debug, 2, "%s: registry update in IntStateQuery for: jobid=%s creamjobid=%s wn=%s status=%d\n",argv0,en.batch_id,en.user_prefix,en.wn_addr,en.status); } if (remupd_conf != NULL){ if ((ret=job_registry_send_update(remupd_head_send,&en,NULL,NULL))<=0){ do_log(debuglogfile, debug, 2, "%s: Error creating endpoint in IntStateQuery\n",argv0); } } } } } free(ren); free(command_string); return 0; }
int main(int argc, char *argv[]) { char *registry_file = NULL, *registry_file_env = NULL; int need_to_free_registry_file = FALSE; const char *default_registry_file = "blah_job_registry.bjr"; char *my_home; job_registry_entry en; job_status_t status=IDLE; int exitcode = -1; char *exitreason = ""; char *user_prefix = ""; char *user_proxy = ""; char *proxy_subject = ""; int renew_proxy = 0; char *wn_addr = ""; time_t udate=0; char *blah_id, *batch_id; int ent, ret, prret, rhret; config_handle *cha; config_entry *rge, *remupd_conf; job_registry_handle *rha, *rhano; job_registry_index_mode rgin_mode = NO_INDEX; job_registry_updater_endpoint *remupd_head = NULL; if (argc > 1 && (strcmp(argv[1], "-u") == 0)) { /* Obtain an index to the registry so that we can */ /* check that the record is unique. */ rgin_mode = BY_BLAH_ID; argv[1] = argv[0]; argc--; argv++; } if (argc < 3) { fprintf(stderr,"Usage: %s [-u] <BLAH id> <batch id> [job status] [udate] [user prefix] [user proxy] [renew proxy] [proxy subject] [worker node] [exit code] [exit reason]\n",argv[0]); return 1; } blah_id = argv[1]; batch_id = argv[2]; if (argc > 3) status = atoi(argv[3]); if (argc > 4) udate = atol(argv[4]); if (argc > 5) user_prefix = argv[5]; if (argc > 6) user_proxy = argv[6]; if (argc > 7) renew_proxy = atoi(argv[7]); if (argc > 8) proxy_subject = argv[8]; if (argc > 9) wn_addr = argv[9]; if (argc > 10) exitcode = atoi(argv[10]); if (argc > 11) exitreason = argv[11]; cha = config_read(NULL); /* Read config from default locations. */ if (cha != NULL) { rge = config_get("job_registry", cha); remupd_conf = config_get("job_registry_add_remote", cha); if (remupd_conf != NULL) { if (job_registry_updater_setup_sender(remupd_conf->values, remupd_conf->n_values, 2, &remupd_head) < 0) { fprintf(stderr,"%s: warning: cannot set network sender(s) up for remote update to:\n",argv[0]); for (ent = 0; ent < remupd_conf->n_values; ent++) { fprintf(stderr," - %s\n", remupd_conf->values[ent]); } } } if (rge != NULL) registry_file = rge->value; } /* Env variable takes precedence */ registry_file_env = getenv("BLAH_JOB_REGISTRY_FILE"); if (registry_file_env != NULL) registry_file = registry_file_env; if (registry_file == NULL) { my_home = getenv("HOME"); if (my_home == NULL) my_home = "."; registry_file = (char *)malloc(strlen(default_registry_file)+strlen(my_home)+2); if (registry_file != NULL) { sprintf(registry_file,"%s/%s",my_home,default_registry_file); need_to_free_registry_file = TRUE; } else { if (cha != NULL) config_free(cha); if (remupd_head != NULL) job_registry_updater_free_endpoints(remupd_head); return 1; } } JOB_REGISTRY_ASSIGN_ENTRY(en.blah_id,blah_id); JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,batch_id); en.status = status; en.exitcode = exitcode; JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,wn_addr); en.udate = udate; JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,exitreason); en.submitter = geteuid(); JOB_REGISTRY_ASSIGN_ENTRY(en.user_prefix,user_prefix); en.proxy_link[0] = '\000'; /* Start with a valid string */ en.updater_info[0] = '\000'; en.renew_proxy = 0; /* Enable renewal only if a proxy is found */ rha=job_registry_init(registry_file, rgin_mode); if (rha == NULL) { if (errno == EACCES) { /* Try nonpriv update. It may work. */ rhano = job_registry_init(registry_file, NAMES_ONLY); if (cha != NULL) config_free(cha); if (need_to_free_registry_file) free(registry_file); if (rhano != NULL) { if (strlen(user_proxy) > 0) { prret = job_registry_set_proxy(rhano, &en, user_proxy); if (prret < 0) { fprintf(stderr,"%s: warning: setting proxy to %s: ",argv[0],user_proxy); perror(""); } else en.renew_proxy = renew_proxy; } ret=job_registry_append_nonpriv(rhano, &en); job_registry_destroy(rhano); if (ret < 0) { fprintf(stderr,"%s: job_registry_append_nonpriv returns %d: ",argv[0],ret); perror(""); if (remupd_head != NULL) job_registry_updater_free_endpoints(remupd_head); return 4; } else goto happy_ending; } } else { fprintf(stderr,"%s: error initialising job registry: ",argv[0]); perror(""); } if (remupd_head != NULL) job_registry_updater_free_endpoints(remupd_head); return 2; } /* Filename is stored in job registry handle. - Don't need these anymore */ if (cha != NULL) config_free(cha); if (need_to_free_registry_file) free(registry_file); if (strlen(user_proxy) > 0) { prret = job_registry_set_proxy(rha, &en, user_proxy); if (prret < 0) { fprintf(stderr,"%s: warning: setting proxy to %s: ",argv[0],user_proxy); perror(""); } else en.renew_proxy = renew_proxy; } en.subject_hash[0] = '\000'; if (strlen(proxy_subject) > 0) { job_registry_compute_subject_hash(&en, proxy_subject); rhret = job_registry_record_subject_hash(rha, en.subject_hash, proxy_subject, TRUE); if (rhret < 0) { fprintf(stderr,"%s: warning: recording proxy subject %s (hash %s): ", argv[0], proxy_subject, en.subject_hash); perror(""); } } if ((ret=job_registry_append(rha, &en)) < 0) { if (errno == EACCES) { /* Try nonpriv update. It may work. */ ret=job_registry_append_nonpriv(rha, &en); job_registry_destroy(rha); if (ret < 0) { fprintf(stderr,"%s: job_registry_append_nonpriv returns %d: ",argv[0],ret); perror(""); if (remupd_head != NULL) job_registry_updater_free_endpoints(remupd_head); return 5; } else goto happy_ending; } fprintf(stderr,"%s: job_registry_append returns %d: ",argv[0],ret); perror(""); job_registry_destroy(rha); if (remupd_head != NULL) job_registry_updater_free_endpoints(remupd_head); return 3; } job_registry_destroy(rha); happy_ending: if (remupd_head != NULL) { if (job_registry_send_update(remupd_head, &en, (strlen(proxy_subject) > 0 ? proxy_subject : NULL), (strlen(user_proxy) > 0 ? user_proxy : NULL)) < 0) { fprintf(stderr,"%s: warning: sending network update: ",argv[0]); perror(""); } job_registry_updater_free_endpoints(remupd_head); } return 0; }