int AssignFinalState(char *batchid){ job_registry_entry en; int ret,i; time_t now; now=time(0); JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,batchid); en.status=COMPLETED; en.exitcode=999; en.udate=now; JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,"\0"); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"\0"); if ((ret=job_registry_update(rha, &en)) < 0){ if(ret != JOB_REGISTRY_NOT_FOUND){ fprintf(stderr,"Update of record %d returns %d: ",i,ret); perror(""); } } else { do_log(debuglogfile, debug, 2, "%s: registry update in AssignStateQuery for: jobid=%s creamjobid=%s status=%d\n",argv0,en.batch_id,en.user_prefix,en.status); job_registry_unlink_proxy(rha, &en); if (remupd_conf != NULL){ if (ret=job_registry_send_update(remupd_head_send,&en,NULL,NULL)<=0){ do_log(debuglogfile, debug, 2, "%s: Error creating endpoint in AssignFinalState\n",argv0); } } } return 0; }
int AssignState (char *element, char *status, char *exit, char *reason, char *wn, char *udate){ char **id_element; job_registry_entry en; time_t now; char *string_now=NULL; int i=0; int n=strtoken(element, '.', &id_element); int iret; if(id_element[0]){ JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,id_element[0]); en.status=atoi(status); en.exitcode=atoi(exit); JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,wn); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,reason); now=time(0); string_now=make_message("%d",now); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now) en.udate=now; free(string_now); }else{ if((element=calloc(STR_CHARS,1)) == 0){ sysfatal("can't malloc cmd in GetAndSend: %r"); } } if ((iret=job_registry_update(rha, &en)) < 0){ fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id); perror(""); }else{ if (en.status == REMOVED || en.status == COMPLETED){ job_registry_unlink_proxy(rha, &en); } } freetoken(&id_element,n); return 0; }
int ReceiveUpdateFromNetwork() { char *proxy_path, *proxy_subject; int timeout_ms = 0; int ent, ret, prret, rhret; job_registry_entry *nen; job_registry_entry *ren; proxy_path = NULL; proxy_subject = NULL; while (nen = job_registry_receive_update(remupd_pollset, remupd_nfds,timeout_ms, &proxy_subject, &proxy_path)){ JOB_REGISTRY_ASSIGN_ENTRY(nen->subject_hash,"\0"); JOB_REGISTRY_ASSIGN_ENTRY(nen->proxy_link,"\0"); if ((ren=job_registry_get(rha, nen->batch_id)) == NULL){ if ((ret=job_registry_append(rha, nen)) < 0){ fprintf(stderr,"%s: Warning: job_registry_append returns %d: ",argv0,ret); perror(""); } }else{ if(ren->subject_hash!=NULL && strlen(ren->subject_hash) && ren->proxy_link!=NULL && strlen(ren->proxy_link)){ JOB_REGISTRY_ASSIGN_ENTRY(nen->subject_hash,ren->subject_hash); JOB_REGISTRY_ASSIGN_ENTRY(nen->proxy_link,ren->proxy_link); }else{ if (proxy_path != NULL && strlen(proxy_path) > 0){ prret = job_registry_set_proxy(rha, nen, proxy_path); if (prret < 0){ do_log(debuglogfile, debug, 1, "%s: warning: setting proxy to %s\n",argv0,proxy_path); fprintf(stderr,"%s: warning: setting proxy to %s: ",argv0,proxy_path); perror(""); /* Make sure we don't renew non-existing proxies */ nen->renew_proxy = 0; } free(proxy_path); nen->subject_hash[0] = '\000'; if (proxy_subject != NULL && strlen(proxy_subject) > 0){ job_registry_compute_subject_hash(nen, proxy_subject); rhret = job_registry_record_subject_hash(rha, nen->subject_hash, proxy_subject, TRUE); if (rhret < 0){ do_log(debuglogfile, debug, 1, "%s: warning: recording proxy subject %s (hash %s)\n",argv0, proxy_subject, nen->subject_hash); fprintf(stderr,"%s: warning: recording proxy subject %s (hash %s): ",argv0, proxy_subject, nen->subject_hash); perror(""); } } free(proxy_subject); } } if(job_registry_need_update(ren,nen,JOB_REGISTRY_UPDATE_ALL)){ if ((ret=job_registry_update(rha, nen)) < 0){ fprintf(stderr,"%s: Warning: job_registry_update returns %d: ",argv0,ret); perror(""); } } } free(nen); } return 0; }
int FinalStateQuery(char *query,char *queryStates, char *query_err){ char line[STR_CHARS],fail[6],qExit[10],qFailed[10],qHostname[100],qStatus[2],command_string[100]; char **saveptr1,**saveptr2,**list_query,**list_queryStates; FILE *file_output; int numQuery=0,numQueryStates=0,j=0,l=0,cont=0,cont2=0, nq=0; time_t now; char string_now[11]; job_registry_entry en; int iret; numQuery=strtoken(query,' ',&list_query); nq=numQuery; numQueryStates=strtoken(queryStates,' ',&list_queryStates); if (numQuery!=numQueryStates) return 1; sprintf(command_string,"%s/qstat -u '*'",sge_binpath); if (debug) do_log(debuglogfile, debug, 1, "+-+line 433, command_string:%s\n",command_string); //load in qstatJob list of jobids from qstat command exec file_output = popen(command_string,"r"); if (file_output == NULL) return 0; while (fgets(line,sizeof(line), file_output) != NULL){ cont=strtoken(line, ' ', &saveptr1); if ((strcmp(saveptr1[0],"job-ID")!=0)&&(strncmp(saveptr1[0],"-",1)!=0)){ for (l=0;l<nq;l++){ if (strcmp(list_query[l],saveptr1[0])==0){ if (strcmp(list_queryStates[l],saveptr1[4])!=0){ now=time(0); sprintf(string_now,"%d",now); if (strcmp(saveptr1[4],"u")==0){ JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,list_query[l]); en.status=0; en.exitcode=0; JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,""); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"0"); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now) en.udate=now; if ((iret=job_registry_update(rha, &en)) < 0){ fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id); perror(""); } } if (strcmp(saveptr1[4],"q")==0){ JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,list_query[l]); en.status=1; en.exitcode=0; JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,""); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"0"); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now) en.udate=now; if ((iret=job_registry_update(rha, &en)) < 0){ fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id); perror(""); } } if (strcmp(saveptr1[4],"r")==0){ cont2=strtoken(saveptr1[7], '@', &saveptr2); JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,list_query[l]); en.status=2; en.exitcode=0; JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,saveptr2[1]); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"0"); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now) en.udate=now; if ((iret=job_registry_update(rha, &en)) < 0){ fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id); perror(""); } freetoken(&saveptr2,cont2); } if ((strcmp(saveptr1[4],"hr")==0)||strcmp(saveptr1[4],"hqw")==0){ JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,list_query[l]); en.status=5; en.exitcode=0; JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,""); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"0"); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now) en.udate=now; if ((iret=job_registry_update(rha, &en)) < 0){ fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id); perror(""); } } } //i must put out element from query for (j=l;j<nq;j++) if (list_query[j+1]!=NULL) strcpy(list_query[j],list_query[j+1]); for (j=l;j<nq;j++) if (list_queryStates[j+1]!=NULL) strcpy(list_queryStates[j],list_queryStates[j+1]); nq--; break; } } } line[0]='\0'; freetoken(&saveptr1,cont); } pclose( file_output ); sprintf(query_err,"\0"); //now we have check in list_query only states that not change status //because they're not in qstat result for (l=0; l<nq; l++){ sprintf(command_string,"%s/qacct -j '%s'",sge_binpath,list_query[l]); if (debug) do_log(debuglogfile, debug, 1, "+-+line 520,command_string:%s\n",command_string); file_output = popen(command_string,"r"); if (file_output == NULL) return 1; //if a job number is here means that job was in query previously and //if now it's not in query and not finished (NULL qstat) it was deleted //or it's on transition time if (fgets( line,sizeof(line), file_output )==NULL){ strcat(query_err,list_query[l]); strcat(query_err," "); pclose( file_output ); continue; } //there is no problem to lost first line with previous fgets, because //it's only a line of ============================================= while (fgets( line,sizeof(line), file_output )!=NULL){ cont=strtoken(line, ' ', &saveptr1); if (strcmp(saveptr1[0],"hostname")==0) strcpy(qHostname,saveptr1[1]);; if (strcmp(saveptr1[0],"failed")==0) strcpy(qFailed,saveptr1[1]); if (strcmp(saveptr1[0],"exit_status")==0) strcpy(qExit,saveptr1[1]); freetoken(&saveptr1,cont); } pclose( file_output ); now=time(0); sprintf(string_now,"%d",now); if ((strcmp(qExit,"137")==0)||(strcmp(qExit,"143")==0)){ JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,list_query[l]); en.status=3; en.exitcode=atoi(qExit); JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,qHostname); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,""); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now) en.udate=now; if ((iret=job_registry_update(rha, &en)) < 0){ fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id); perror(""); }else job_registry_unlink_proxy(rha, &en); }else{ JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,list_query[l]); en.status=4; en.exitcode=atoi(qExit); JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,qHostname); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,qFailed); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now) en.udate=now; if ((iret=job_registry_update(rha, &en)) < 0){ fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id); perror(""); }else job_registry_unlink_proxy(rha, &en); } } freetoken(&list_query,numQuery); freetoken(&list_queryStates,numQueryStates); if (debug) do_log(debuglogfile, debug, 1, "+-+query_err:%s\n",query_err); //now check acumulated error jobids to verify if they are an error or not if (strcmp(query_err,"\0")!=0){ sleep(60); cont=0; int n=0; char cmd[10]="\0"; cont=strtoken(query_err, ' ', &list_query); while (n < cont){ if(list_query[n]) strcpy(cmd,list_query[n]); else return 1; sprintf(command_string,"%s/qacct -j '%s'",sge_binpath,cmd); if (debug) do_log(debuglogfile, debug, 1, "+-+line 587 error, command_string:%s\n",command_string); file_output = popen(command_string,"r"); if (file_output == NULL) return 1; //if a job number is here means that job was in query previously and //if now it's not in query and not finished (NULL qstat) it was deleted if (fgets( line,sizeof(line), file_output )==NULL){ JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,cmd); en.status=3; en.exitcode=3; JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,""); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"reason=3"); now=time(0); sprintf(string_now,"%d",now); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now) en.udate=now; if ((iret=job_registry_update(rha, &en)) < 0){ fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id); perror(""); }else job_registry_unlink_proxy(rha, &en); pclose( file_output ); n++; continue; } //there is no problem to lost first line with previous fgets, because //it's only a line of ============================================= while (fgets( line,sizeof(line), file_output )!=NULL){ cont=strtoken(line, ' ', &saveptr1); if (strcmp(saveptr1[0],"hostname")==0) strcpy(qHostname,saveptr1[1]); if (strcmp(saveptr1[0],"failed")==0) strcpy(qFailed,saveptr1[1]); if (strcmp(saveptr1[0],"exit_status")==0) strcpy(qExit,saveptr1[1]); freetoken(&saveptr1,cont); } pclose( file_output ); now=time(0); sprintf(string_now,"%d",now); if ((strcmp(qExit,"137")==0)||(strcmp(qExit,"143")==0)){ JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,cmd); en.status=3; en.exitcode=atoi(qExit); JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,qHostname); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,""); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now) en.udate=now; if ((iret=job_registry_update(rha, &en)) < 0){ fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id); perror(""); }else job_registry_unlink_proxy(rha, &en); }else{ JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,cmd); en.status=4; en.exitcode=atoi(qExit); JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,qHostname); JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,qFailed); JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now) en.udate=now; if ((iret=job_registry_update(rha, &en)) < 0){ fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id); perror(""); }else job_registry_unlink_proxy(rha, &en); } n++; } freetoken(&list_query,cont); } return 0; }