Beispiel #1
0
int AssignFinalState(char *batchid){

	job_registry_entry en;
	int ret,i;
	time_t now;

	now=time(0);
	
	JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,batchid);
	en.status=COMPLETED;
	en.exitcode=999;
	en.udate=now;
	JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,"\0");
	JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"\0");
		
	if ((ret=job_registry_update(rha, &en)) < 0){
		if(ret != JOB_REGISTRY_NOT_FOUND){
			fprintf(stderr,"Update of record %d returns %d: ",i,ret);
			perror("");
		}
	} else {
		do_log(debuglogfile, debug, 2, "%s: registry update in AssignStateQuery for: jobid=%s creamjobid=%s status=%d\n",argv0,en.batch_id,en.user_prefix,en.status);
		job_registry_unlink_proxy(rha, &en);
		if (remupd_conf != NULL){
			if (ret=job_registry_send_update(remupd_head_send,&en,NULL,NULL)<=0){
				do_log(debuglogfile, debug, 2, "%s: Error creating endpoint in AssignFinalState\n",argv0);
			}
		}
	}

	return 0;
}
Beispiel #2
0
int
FinalStateQuery(char *input_string, int logs_to_read)
{
/*
tracejob -m -l -a <jobid>
In line:

04/23/2008 11:50:43  S    Exit_status=0 resources_used.cput=00:00:01 resources_used.mem=11372kb resources_used.vmem=52804kb
                          resources_used.walltime=00:10:15

there are:
udate for the final state (04/23/2008 11:50:43):
exitcode Exit_status=

*/

/*
 Filled entries:
 batch_id (a list of jobid is given, one for each tracejob call)
 status (always a final state 3 or 4)
 exitcode
 udate
 
 Filled by submit script:
 blah_id 
 
 Unfilled entries:
 exitreason
*/
/*
[root@cream-12 server_logs]# tracejob -m -l -a 13

Job: 13.cream-12.pd.infn.it

04/23/2008 11:40:27  S    enqueuing into cream_1, state 1 hop 1
04/23/2008 11:40:27  S    Job Queued at request of [email protected], owner = [email protected], job name =
                          cream_365713239, queue = cream_1
04/23/2008 11:40:28  S    Job Modified at request of [email protected]
04/23/2008 11:40:28  S    Job Run at request of [email protected]
04/23/2008 11:50:43  S    Exit_status=0 resources_used.cput=00:00:01 resources_used.mem=11372kb resources_used.vmem=52804kb
                          resources_used.walltime=00:10:15
04/23/2008 11:50:44  S    dequeuing from cream_1, state COMPLETE
*/

        FILE *fp;
	char *line=NULL;
	char **token;
	char **jobid;
	int maxtok_t=0,maxtok_j=0,k;
	job_registry_entry en;
	int ret;
	char *timestamp;
	time_t tmstampepoch;
	char *exit_str=NULL;
	int failed_count=0;
	int time_to_add=0;
	time_t now;
	char *cp=NULL;
	char *command_string=NULL;
	char *pbs_spool=NULL;
	char *string_now=NULL;
	int tracejob_line_counter=0;

	do_log(debuglogfile, debug, 3, "%s: input_string in FinalStateQuery is:%s\n",argv0,input_string);
	
	maxtok_j = strtoken(input_string, ':', &jobid);
	
	for(k=0;k<maxtok_j;k++){
	
		if(jobid[k] && strlen(jobid[k])==0) continue;

		pbs_spool=(pbs_spoolpath?make_message("-p %s ",pbs_spoolpath):make_message(""));
		command_string=make_message("%s%s/tracejob %s-m -l -a -n %d %s",batch_command,pbs_binpath,pbs_spool,logs_to_read,jobid[k]);
		free(pbs_spool);
		
		fp = popen(command_string,"r");
		
		do_log(debuglogfile, debug, 3, "%s: command_string in FinalStateQuery is:%s\n",argv0,command_string);

		/* en.status is set =0 (UNDEFINED) here and it is tested if it is !=0 before the registry update: the update is done only if en.status is !=0*/
		en.status=UNDEFINED;
		
		JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,jobid[k]);

		tracejob_line_counter=0;
		
		if(fp!=NULL){
			while(!feof(fp) && (line=get_line(fp))){
				if(line && strlen(line)==0){
					free(line);
					continue;
				}
				if(tracejob_line_counter>tracejob_max_output){
					do_log(debuglogfile, debug, 2, "%s: Tracejob output limit of %d lines reached. Skipping command.\n",argv0,tracejob_max_output);
					free(line);
					break;
				}
				if ((cp = strrchr (line, '\n')) != NULL){
					*cp = '\0';
					tracejob_line_counter++;
					
				}
                        	do_log(debuglogfile, debug, 3, "%s: line in FinalStateQuery is:%s\n",argv0,line);
				now=time(0);
				string_now=make_message("%d",now);
				if(line && (strstr(line,"Job deleted") || (strstr(line,"dequeuing from") && strstr(line,"state RUNNING")))){	
					maxtok_t = strtoken(line, ' ', &token);
					timestamp=make_message("%s %s",token[0],token[1]);
					tmstampepoch=str2epoch(timestamp,"A");
					free(timestamp);
					freetoken(&token,maxtok_t);
					en.udate=tmstampepoch;
					en.status=REMOVED;
                        		en.exitcode=-999;
					JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now);
					JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"\0");
				}else if(line && strstr(line," Exit_status=") && en.status != REMOVED){	
					maxtok_t = strtoken(line, ' ', &token);
					timestamp=make_message("%s %s",token[0],token[1]);
					tmstampepoch=str2epoch(timestamp,"A");
					exit_str=strdup(token[3]);
                			if(exit_str == NULL){
                        			sysfatal("strdup failed for exit_str in FinalStateQuery: %r");
                			}
					free(timestamp);
					freetoken(&token,maxtok_t);
					if(strstr(exit_str,"Exit_status=")){
						maxtok_t = strtoken(exit_str, '=', &token);
						if(maxtok_t == 2){
                        				en.exitcode=atoi(token[1]);
							freetoken(&token,maxtok_t);
						}else{
							en.exitcode=-1;
						}
					}else{
						en.exitcode=-1;
					}
					free(exit_str);
					en.udate=tmstampepoch;
					en.status=COMPLETED;
					JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now);
					JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"\0");
				}
				free(string_now);
				free(line);
			}
			pclose(fp);
		}
		
		if(en.status !=UNDEFINED && en.status!=IDLE){
			if ((ret=job_registry_update_select(rha, &en,
			JOB_REGISTRY_UPDATE_UDATE |
			JOB_REGISTRY_UPDATE_STATUS |
			JOB_REGISTRY_UPDATE_UPDATER_INFO |
			JOB_REGISTRY_UPDATE_EXITCODE |
			JOB_REGISTRY_UPDATE_EXITREASON )) < 0){
				if(ret != JOB_REGISTRY_NOT_FOUND){
					fprintf(stderr,"Update of record returns %d: ",ret);
					perror("");
				}
			} else {
				do_log(debuglogfile, debug, 2, "%s: registry update in FinalStateQuery for: jobid=%s exitcode=%d status=%d\n",argv0,en.batch_id,en.exitcode,en.status);
				if (en.status == REMOVED || en.status == COMPLETED){
					job_registry_unlink_proxy(rha, &en);
				}
				if (remupd_conf != NULL){
					if (ret=job_registry_send_update(remupd_head_send,&en,NULL,NULL)<=0){
						do_log(debuglogfile, debug, 2, "%s: Error creating endpoint in FinalStateQuery\n",argv0);
					}
				}
			}
		}else{
			failed_count++;
		}		
		free(command_string);
	}
	
	now=time(0);
	if(failed_count>10){
		failed_count=10;
	}
	time_to_add=pow(failed_count,1.5);
	next_finalstatequery=now+time_to_add;
	do_log(debuglogfile, debug, 3, "%s: next FinalStatequery will be in %d seconds\n",argv0,time_to_add);
	
	freetoken(&jobid,maxtok_j);
	return failed_count;
}
Beispiel #3
0
int
IntStateQuery()
{
/*
qstat -f

Job Id: 11.cream-12.pd.infn.it
    Job_Name = cream_579184706
    job_state = R
    ctime = Wed Apr 23 11:39:55 2008
    exec_host = cream-wn-029.pn.pd.infn.it/0
*/

/*
 Filled entries:
 batch_id
 wn_addr
 status
 udate
 
 Filled by submit script:
 blah_id 
 
 Unfilled entries:
 exitreason
*/


        FILE *fp;
	char *line=NULL;
	char **token;
	int maxtok_t=0;
	job_registry_entry en;
	int ret;
	char *timestamp;
	time_t tmstampepoch;
	char *batch_str=NULL;
	char *wn_str=NULL; 
        char *twn_str=NULL;
        char *status_str=NULL;
	char *ex_str=NULL;
	int  ex_code=0; 
	char *cp=NULL;
	char *command_string=NULL;
	job_registry_entry *ren=NULL;
	int first=TRUE;
	time_t now;
	char *string_now=NULL;

	command_string=make_message("%s%s/qstat -f",batch_command,pbs_binpath);
	fp = popen(command_string,"r");

	en.status=UNDEFINED;
	JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,"\0");
	JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"\0");
	JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,"\0");
	en.exitcode=-1;
	bupdater_free_active_jobs(&bact);

	if(fp!=NULL){
		while(!feof(fp) && (line=get_line(fp))){
			if(line && strlen(line)==0){
				free(line);
				continue;
			}
			if ((cp = strrchr (line, '\n')) != NULL){
				*cp = '\0';
			}
			do_log(debuglogfile, debug, 3, "%s: line in IntStateQuery is:%s\n",argv0,line);
			now=time(0);
			string_now=make_message("%d",now);
			if(line && strstr(line,"Job Id: ")){
				if(!first && en.status!=UNDEFINED && ren && ren->status!=REMOVED && ren->status!=COMPLETED){
                        		if ((ret=job_registry_update_recn_select(rha, &en, ren->recnum,
					JOB_REGISTRY_UPDATE_WN_ADDR|
					JOB_REGISTRY_UPDATE_STATUS|
					JOB_REGISTRY_UPDATE_UDATE|
					JOB_REGISTRY_UPDATE_UPDATER_INFO|
					JOB_REGISTRY_UPDATE_EXITCODE|
					JOB_REGISTRY_UPDATE_EXITREASON)) < 0){
						if(ret != JOB_REGISTRY_NOT_FOUND){
                	                		fprintf(stderr,"Update of record returns %d: ",ret);
							perror("");
						}
					} else {
						if(ret==JOB_REGISTRY_SUCCESS){
							if (en.status == REMOVED || en.status == COMPLETED) {
								do_log(debuglogfile, debug, 2, "%s: registry update in IntStateQuery for: jobid=%s wn=%s status=%d exitcode=%d\n",argv0,en.batch_id,en.wn_addr,en.status,en.exitcode);
								job_registry_unlink_proxy(rha, &en);
							}else{
								do_log(debuglogfile, debug, 2, "%s: registry update in IntStateQuery for: jobid=%s wn=%s status=%d\n",argv0,en.batch_id,en.wn_addr,en.status);
							}
						}
						if (remupd_conf != NULL){
							if (ret=job_registry_send_update(remupd_head_send,&en,NULL,NULL)<=0){
								do_log(debuglogfile, debug, 2, "%s: Error creating endpoint in IntStateQuery\n",argv0);
							}
						}
					}
					en.status = UNDEFINED;
					JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,"\0");
					JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"\0");
					JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,"\0");
					en.exitcode=-1;
				}				
                        	maxtok_t = strtoken(line, ':', &token);
				batch_str=strdel(token[1]," ");
				JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,batch_str);
				JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now);
				en.exitcode=-1;
				bupdater_push_active_job(&bact, en.batch_id);
				free(batch_str);
				freetoken(&token,maxtok_t);
				if(!first) free(ren);
				if ((ren=job_registry_get(rha, en.batch_id)) == NULL){
						fprintf(stderr,"Get of record returns error for %s ",en.batch_id);
						perror("");
				}
				first=FALSE;				
			}else if(line && strstr(line,"job_state = ")){	
				maxtok_t = strtoken(line, '=', &token);
				status_str=strdel(token[1]," ");
				if(status_str && strcmp(status_str,"Q")==0){ 
					en.status=IDLE;
					en.exitcode=-1;
					JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,"\0");
				}else if(status_str && strcmp(status_str,"W")==0){ 
					en.status=IDLE;
					en.exitcode=-1;
				}else if(status_str && strcmp(status_str,"R")==0){ 
					en.status=RUNNING;
					en.exitcode=-1;
				}else if(status_str && strcmp(status_str,"C")==0){ 
					en.status=COMPLETED;
					JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"\0");
				}else if(status_str && strcmp(status_str,"H")==0){ 
					en.status=HELD;
					en.exitcode=-1;
					JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,"\0");
				}
				free(status_str);
				freetoken(&token,maxtok_t);
			}else if(line && strstr(line,"unable to run job")){
				en.status=IDLE;	
				en.exitcode=-1;
			}else if(line && strstr(line,"exit_status = ")){
				maxtok_t = strtoken(line, '=', &token);
				ex_str=strdel(token[1]," ");
				ex_code=atoi(ex_str);
				if(ex_code==0){
					en.exitcode=0;
				}else if(ex_code==271){
					en.status=REMOVED;
                        		en.exitcode=-999;
				}else{
					en.exitcode=ex_code;
				}
				free(ex_str);
				freetoken(&token,maxtok_t);
			}else if(line && strstr(line,"exec_host = ")){	
				maxtok_t = strtoken(line, '=', &token);
				twn_str=strdup(token[1]);
                		if(twn_str == NULL){
                        		sysfatal("strdup failed for twn_str in IntStateQuery: %r");
                		}
				freetoken(&token,maxtok_t);
				maxtok_t = strtoken(twn_str, '/', &token);
				wn_str=strdel(token[0]," ");
				JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,wn_str);
				free(twn_str);
 				free(wn_str);
				freetoken(&token,maxtok_t);
			}else if(line && strstr(line,"mtime = ")){	
                        	maxtok_t = strtoken(line, ' ', &token);
				timestamp=make_message("%s %s %s %s %s",token[2],token[3],token[4],token[5],token[6]);
                        	tmstampepoch=str2epoch(timestamp,"L");
				free(timestamp);
				en.udate=tmstampepoch;
				freetoken(&token,maxtok_t);
			}
			free(line);
			free(string_now);

		}
		pclose(fp);
	}
	
	if(en.status!=UNDEFINED && ren && ren->status!=REMOVED && ren->status!=COMPLETED){
		if ((ret=job_registry_update_recn_select(rha, &en, ren->recnum,
		JOB_REGISTRY_UPDATE_WN_ADDR|
		JOB_REGISTRY_UPDATE_STATUS|
		JOB_REGISTRY_UPDATE_UDATE|
		JOB_REGISTRY_UPDATE_UPDATER_INFO|
		JOB_REGISTRY_UPDATE_EXITCODE|
		JOB_REGISTRY_UPDATE_EXITREASON)) < 0){
			if(ret != JOB_REGISTRY_NOT_FOUND){
				fprintf(stderr,"Update of record returns %d: ",ret);
				perror("");
			}
		} else {
			if(ret==JOB_REGISTRY_SUCCESS){
				if (en.status == REMOVED || en.status == COMPLETED) {
					do_log(debuglogfile, debug, 2, "%s: registry update in IntStateQuery for: jobid=%s wn=%s status=%d exitcode=%d\n",argv0,en.batch_id,en.wn_addr,en.status,en.exitcode);
					job_registry_unlink_proxy(rha, &en);
				}else{
					do_log(debuglogfile, debug, 2, "%s: registry update in IntStateQuery for: jobid=%s wn=%s status=%d\n",argv0,en.batch_id,en.wn_addr,en.status);
				}
			}
			if (remupd_conf != NULL){
				if (ret=job_registry_send_update(remupd_head_send,&en,NULL,NULL)<=0){
					do_log(debuglogfile, debug, 2, "%s: Error creating endpoint in IntStateQuery\n",argv0);
				}
			}
		}
	}				

	free(ren);
	free(command_string);
	return 0;
}
Beispiel #4
0
int
FinalStateQuery(time_t start_date, int logs_to_read)
{

        FILE *fp;
	char *line=NULL;
	char **token;
	char **token_l;
	int maxtok_t=0;
	int maxtok_l=0;
	job_registry_entry en;
	int ret;
	time_t tmstampepoch;
	char *cp=NULL; 
	char *command_string=NULL;
	time_t now;
	char *string_now=NULL;
	job_registry_entry *ren=NULL;

	
	command_string=make_message("%s/sacct -nap -o JobID,JobName,State,ExitCode,submit,start,end 2>/dev/null",slurm_binpath);
	
	fp = popen(command_string,"r");
	
	do_log(debuglogfile, debug, 3, "%s: command_string in FinalStateQuery is:%s\n",argv0,command_string);

	en.status=UNDEFINED;
	JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"\0");

	if(fp!=NULL){
		while(!feof(fp) && (line=get_line(fp))){
			if(line && strlen(line)==0){
				free(line);
				continue;
			}
			if ((cp = strrchr (line, '\n')) != NULL){
				*cp = '\0';
			}
			en.status=UNDEFINED;
			do_log(debuglogfile, debug, 3, "%s: line in FinalStateQuery is:%s\n",argv0,line);
			now=time(0);
			string_now=make_message("%d",now);
			maxtok_t = strtoken(line, '|', &token);
			JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,token[0]);
			JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now);
			if(token[2] && strstr(token[2],"COMPLETED")){
				en.status=COMPLETED;
				JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now);
			}else if(token[2] && strstr(token[2],"CANCELLED")){
				en.status=REMOVED;
				en.exitcode=-999;
				JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now);
			}else if(token[2] && strstr(token[2],"FAILED")){
				en.status=COMPLETED;
				JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now);
			}
			
			if(!(token[6] && strstr(token[6],"Unknown"))){
				tmstampepoch=str2epoch(token[6],"N");
				en.udate=tmstampepoch;
			}
			if(en.status==COMPLETED){
				maxtok_l = strtoken(token[3], ':', &token_l);
				en.exitcode=atoi(token_l[0]);
				freetoken(&token_l,maxtok_l);
			}
			
			if ((ren=job_registry_get(rha, en.batch_id)) == NULL){
					fprintf(stderr,"Get of record returns error ");
					perror("");
			}
			if(en.status!=UNDEFINED && en.status!=IDLE && ren && ren->status!=REMOVED && ren->status!=COMPLETED){	
				if ((ret=job_registry_update_select(rha, &en,
				JOB_REGISTRY_UPDATE_UDATE |
				JOB_REGISTRY_UPDATE_STATUS |
				JOB_REGISTRY_UPDATE_UPDATER_INFO |
				JOB_REGISTRY_UPDATE_EXITCODE |
				JOB_REGISTRY_UPDATE_EXITREASON )) < 0){
					if(ret != JOB_REGISTRY_NOT_FOUND){
						fprintf(stderr,"Update of record returns %d: ",ret);
						perror("");
					}
				} else {
					do_log(debuglogfile, debug, 2, "%s: f registry update in FinalStateQuery for: jobid=%s creamjobid=%s status=%d\n",argv0,en.batch_id,en.user_prefix,en.status);
					if (en.status == REMOVED || en.status == COMPLETED){
						job_registry_unlink_proxy(rha, &en);
					}
					if (remupd_conf != NULL){
						if ((ret=job_registry_send_update(remupd_head_send,&en,NULL,NULL))<=0){
							do_log(debuglogfile, debug, 2, "%s: Error creating endpoint in FinalStateQuery\n",argv0);
						}
					}
				}
			}
			free(string_now);
			free(line);
			freetoken(&token,maxtok_t);
			free(ren);
		}
		pclose(fp);
	}
	
	free(command_string);
	return 0;
}
Beispiel #5
0
int
IntStateQuery()
{

        FILE *fp;
	char *line=NULL;
	char **token;
	char **token_l;
	char **token_e;
	int maxtok_t=0;
	int maxtok_l=0;
	int maxtok_e=0;
	job_registry_entry en;
	int ret;
	time_t tmstampepoch;
	char *cp=NULL; 
	char *batch_str=NULL;
	char *command_string=NULL;
	job_registry_entry *ren=NULL;
	int isresumed=FALSE;
	int first=TRUE;
	time_t now;
	char *string_now=NULL;

	command_string=make_message("%s/scontrol -a show jobid",slurm_binpath);
	fp = popen(command_string,"r");

	en.status=UNDEFINED;
	JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,"\0");
	JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"\0");
	JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,"\0");
	en.exitcode=-1;
	bupdater_free_active_jobs(&bact);

	if(fp!=NULL){
		while(!feof(fp) && (line=get_line(fp))){
			if(line && strlen(line)==0){
				free(line);
				continue;
			}
			if ((cp = strrchr (line, '\n')) != NULL){
				*cp = '\0';
			}
			do_log(debuglogfile, debug, 3, "%s: line in IntStateQuery is:%s\n",argv0,line);
			now=time(0);
			string_now=make_message("%d",now);
			maxtok_t = strtoken(line, ' ', &token);
			if(line && strstr(line,"JobId=")){
				isresumed=FALSE;
				if(!first && en.status!=UNDEFINED && ren && ren->status!=REMOVED && ren->status!=COMPLETED){	
					if ((ret=job_registry_update_recn_select(rha, &en, ren->recnum,
					JOB_REGISTRY_UPDATE_WN_ADDR|
					JOB_REGISTRY_UPDATE_STATUS|
					JOB_REGISTRY_UPDATE_UDATE|
					JOB_REGISTRY_UPDATE_UPDATER_INFO|
					JOB_REGISTRY_UPDATE_EXITCODE|
					JOB_REGISTRY_UPDATE_EXITREASON)) < 0){
						if(ret != JOB_REGISTRY_NOT_FOUND){
							fprintf(stderr,"Update of record returns %d: ",ret);
							perror("");
						}
					} else {
						if(ret==JOB_REGISTRY_SUCCESS){
							if (en.status == REMOVED || en.status == COMPLETED) {
								do_log(debuglogfile, debug, 2, "%s: registry update in IntStateQuery for: jobid=%s creamjobid=%s wn=%s status=%d exitcode=%d\n",argv0,en.batch_id,en.user_prefix,en.wn_addr,en.status,en.exitcode);
								job_registry_unlink_proxy(rha, &en);
							}else{
								do_log(debuglogfile, debug, 2, "%s: registry update in IntStateQuery for: jobid=%s creamjobid=%s wn=%s status=%d\n",argv0,en.batch_id,en.user_prefix,en.wn_addr,en.status);
							}
							if (remupd_conf != NULL){
								if ((ret=job_registry_send_update(remupd_head_send,&en,NULL,NULL))<=0){
									do_log(debuglogfile, debug, 2, "%s: Error creating endpoint in IntStateQuery\n",argv0);
								}
							}
						}
					}
					en.status = UNDEFINED;
					JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"\0");
					JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,"\0");
					JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,"\0");
					en.exitcode=-1;
				}
				en.status = UNDEFINED;
				maxtok_l = strtoken(token[0], '=', &token_l);
				batch_str=strdup(token_l[1]);
				JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,batch_str);
				JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now);
				en.exitcode=-1;
				bupdater_push_active_job(&bact, en.batch_id);
				do_log(debuglogfile, debug, 4, "%s: bupdater_push_active_job done for %s\n",argv0,en.batch_id);
				free(batch_str);
				freetoken(&token_l,maxtok_l);
				if(!first) free(ren);
				if ((ren=job_registry_get(rha, en.batch_id)) == NULL){
						fprintf(stderr,"Get of record returns error ");
						perror("");
				}
				if(ren){
					if(strlen(ren->updater_info)>0){
						en.udate=ren->udate;
					}else{
						en.udate=time(0);
					}
				}
				first=FALSE;
				
			}else if(line && strstr(line," JobState=")){
				if(token[0] && strstr(line,"JobState=")){
					maxtok_l = strtoken(token[0], '=', &token_l);
					if(token_l[1] && strstr(token_l[1],"PENDING")){
						en.status=IDLE;
						en.exitcode=-1;
						JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now);
					}else if(token_l[1] && strstr(token_l[1],"RUNNING")){
						en.status=RUNNING;
						en.exitcode=-1;
						JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now);
					}else if(token_l[1] && strstr(token_l[1],"COMPLETED")){
						en.status=COMPLETED;
						en.exitcode=0;
						JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now);
					}else if(token_l[1] && strstr(token_l[1],"CANCELLED")){
						en.status=REMOVED;
						en.exitcode=-999;
						JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now);
					}else if(token_l[1] && strstr(token_l[1],"FAILED")){
						en.status=COMPLETED;
						en.exitcode=0;
						JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now);
					}else if(token_l[1] && strstr(token_l[1],"SUSPENDED")){
						en.status=HELD;
						en.exitcode=-1;
						JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now);
					}else if(token_l[1] && strstr(token_l[1],"COMPLETING")){
						bupdater_remove_active_job(&bact, en.batch_id);
					}
					freetoken(&token_l,maxtok_l);
				}
			}else if(line && strstr(line," BatchHost=")){
				if(token[0] && strstr(line,"BatchHost=")){
					maxtok_l = strtoken(token[0], '=', &token_l);
					if(en.status!=IDLE){
						JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,token_l[1]);
					}
					freetoken(&token_l,maxtok_l);
				}
			}else if(line && strstr(line," ExitCode=")){
				if(token[3] && strstr(line,"ExitCode=")){
					maxtok_l = strtoken(token[3], '=', &token_l);
					maxtok_e = strtoken(token_l[1], ':', &token_e);
					if(en.status==COMPLETED){
						en.exitcode=atoi(token_e[0]);
					}
					freetoken(&token_l,maxtok_l);
					freetoken(&token_e,maxtok_e);
				}
			}else if(line && strstr(line," SubmitTime=")){
				if(en.status==IDLE){
					if(token[0] && strstr(line,"SubmitTime=")){
						maxtok_l = strtoken(token[0], '=', &token_l);
						tmstampepoch=str2epoch(token_l[1],"N");
						en.udate=tmstampepoch;
						freetoken(&token_l,maxtok_l);
					}
				}
			}else if(line && strstr(line," StartTime=")){
				if(en.status==RUNNING){
					if(token[0] && strstr(line,"StartTime=")){
						maxtok_l = strtoken(token[0], '=', &token_l);
						tmstampepoch=str2epoch(token_l[1],"N");
						en.udate=tmstampepoch;
						freetoken(&token_l,maxtok_l);
					}
				}
				if(en.status==COMPLETED || en.status==REMOVED){
					if(token[1] && strstr(line,"EndTime=")){
						maxtok_l = strtoken(token[1], '=', &token_l);
						tmstampepoch=str2epoch(token_l[1],"N");
						en.udate=tmstampepoch;
						freetoken(&token_l,maxtok_l);
					}
				}
			}else if(line && strstr(line," SuspendTime=")){
				if(en.status==HELD){
					if(token[1] && strstr(line,"SuspendTime=")){
						maxtok_l = strtoken(token[1], '=', &token_l);
						tmstampepoch=str2epoch(token_l[1],"N");
						en.udate=tmstampepoch;
						freetoken(&token_l,maxtok_l);
					}
				}
			}
			
			free(line);
			free(string_now);
			freetoken(&token,maxtok_t);
		}
		pclose(fp);
	}
		
	if(en.status!=UNDEFINED && ren && ren->status!=REMOVED && ren->status!=COMPLETED){	
		if ((ret=job_registry_update_recn_select(rha, &en, ren->recnum,
		JOB_REGISTRY_UPDATE_WN_ADDR|
		JOB_REGISTRY_UPDATE_STATUS|
		JOB_REGISTRY_UPDATE_UDATE|
		JOB_REGISTRY_UPDATE_UPDATER_INFO|
		JOB_REGISTRY_UPDATE_EXITCODE|
		JOB_REGISTRY_UPDATE_EXITREASON)) < 0){
			if(ret != JOB_REGISTRY_NOT_FOUND){
				fprintf(stderr,"Update of record returns %d: ",ret);
				perror("");
			}
		} else {
			if(ret==JOB_REGISTRY_SUCCESS){
				if (en.status == REMOVED || en.status == COMPLETED) {
					do_log(debuglogfile, debug, 2, "%s: registry update in IntStateQuery for: jobid=%s creamjobid=%s wn=%s status=%d exitcode=%d\n",argv0,en.batch_id,en.user_prefix,en.wn_addr,en.status,en.exitcode);
					job_registry_unlink_proxy(rha, &en);
				}else{
					do_log(debuglogfile, debug, 2, "%s: registry update in IntStateQuery for: jobid=%s creamjobid=%s wn=%s status=%d\n",argv0,en.batch_id,en.user_prefix,en.wn_addr,en.status);
				}
				if (remupd_conf != NULL){
					if ((ret=job_registry_send_update(remupd_head_send,&en,NULL,NULL))<=0){
						do_log(debuglogfile, debug, 2, "%s: Error creating endpoint in IntStateQuery\n",argv0);
					}
				}
			}
		}
	}				

	free(ren);
	free(command_string);
	return 0;
}
Beispiel #6
0
int
main(int argc, char *argv[])
{
  char *registry_file = NULL, *registry_file_env = NULL;
  int need_to_free_registry_file = FALSE;
  const char *default_registry_file = "blah_job_registry.bjr";
  char *my_home;
  job_registry_entry en;
  job_status_t status=IDLE;
  int exitcode = -1; 
  char *exitreason = "";
  char *user_prefix = "";
  char *user_proxy = "";
  char *proxy_subject = "";
  int  renew_proxy = 0;
  char *wn_addr = "";
  time_t udate=0;
  char *blah_id, *batch_id;
  int ent, ret, prret, rhret;
  config_handle *cha;
  config_entry *rge, *remupd_conf;
  job_registry_handle *rha, *rhano;
  job_registry_index_mode rgin_mode = NO_INDEX;
  job_registry_updater_endpoint *remupd_head = NULL;

  if (argc > 1 && (strcmp(argv[1], "-u") == 0))
   {
    /* Obtain an index to the registry so that we can */
    /* check that the record is unique. */
    rgin_mode = BY_BLAH_ID;
    argv[1] = argv[0];
    argc--;
    argv++;
   }

  if (argc < 3)
   {
    fprintf(stderr,"Usage: %s [-u] <BLAH id> <batch id> [job status] [udate] [user prefix] [user proxy] [renew proxy] [proxy subject] [worker node] [exit code] [exit reason]\n",argv[0]);
    return 1;
   }

  blah_id  = argv[1]; 
  batch_id = argv[2]; 

  if (argc > 3) status = atoi(argv[3]);
  if (argc > 4) udate = atol(argv[4]);
  if (argc > 5) user_prefix = argv[5];
  if (argc > 6) user_proxy = argv[6];
  if (argc > 7) renew_proxy = atoi(argv[7]);
  if (argc > 8) proxy_subject = argv[8];
  if (argc > 9) wn_addr = argv[9];
  if (argc > 10) exitcode = atoi(argv[10]);
  if (argc > 11) exitreason = argv[11];
   
  cha = config_read(NULL); /* Read config from default locations. */
  if (cha != NULL)
   {
    rge = config_get("job_registry", cha);
    remupd_conf = config_get("job_registry_add_remote", cha);
    if (remupd_conf != NULL)
     {
      if (job_registry_updater_setup_sender(remupd_conf->values,
                                            remupd_conf->n_values, 2,
                                           &remupd_head) < 0)
       {
         fprintf(stderr,"%s: warning: cannot set network sender(s) up for remote update to:\n",argv[0]);
         for (ent = 0; ent < remupd_conf->n_values; ent++)
          {
           fprintf(stderr," - %s\n", remupd_conf->values[ent]);
          }
       }
     }
    if (rge != NULL) registry_file = rge->value;
   }

  /* Env variable takes precedence */
  registry_file_env = getenv("BLAH_JOB_REGISTRY_FILE");
  if (registry_file_env != NULL) registry_file = registry_file_env;

  if (registry_file == NULL)
   {
    my_home = getenv("HOME");
    if (my_home == NULL) my_home = ".";
    registry_file = (char *)malloc(strlen(default_registry_file)+strlen(my_home)+2);
    if (registry_file != NULL)
     {
      sprintf(registry_file,"%s/%s",my_home,default_registry_file);
      need_to_free_registry_file = TRUE;
     }
    else 
     {
      if (cha != NULL) config_free(cha);
      if (remupd_head != NULL) job_registry_updater_free_endpoints(remupd_head);
      return 1;
     }
   }

  JOB_REGISTRY_ASSIGN_ENTRY(en.blah_id,blah_id); 
  JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,batch_id); 
  en.status = status;
  en.exitcode = exitcode;
  JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,wn_addr); 
  en.udate = udate;
  JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,exitreason); 
  en.submitter = geteuid();
  JOB_REGISTRY_ASSIGN_ENTRY(en.user_prefix,user_prefix); 
  en.proxy_link[0] = '\000'; /* Start with a valid string */
  en.updater_info[0] = '\000'; 
  en.renew_proxy = 0; /* Enable renewal only if a proxy is found */

  rha=job_registry_init(registry_file, rgin_mode);

  if (rha == NULL)
   {
    if (errno == EACCES)
     {
      /* Try nonpriv update. It may work. */
      rhano = job_registry_init(registry_file, NAMES_ONLY);
      if (cha != NULL) config_free(cha);
      if (need_to_free_registry_file) free(registry_file);
      if (rhano != NULL)
       {
        if (strlen(user_proxy) > 0)
         {
          prret = job_registry_set_proxy(rhano, &en, user_proxy);
          if (prret < 0)
           {
            fprintf(stderr,"%s: warning: setting proxy to %s: ",argv[0],user_proxy);
            perror("");
           }
          else en.renew_proxy = renew_proxy;
         }
        ret=job_registry_append_nonpriv(rhano, &en);
        job_registry_destroy(rhano);
        if (ret < 0)
         {
          fprintf(stderr,"%s: job_registry_append_nonpriv returns %d: ",argv[0],ret);
          perror("");
          if (remupd_head != NULL) job_registry_updater_free_endpoints(remupd_head);
          return 4;
         } 
        else goto happy_ending;
       }
     }
    else
     {
      fprintf(stderr,"%s: error initialising job registry: ",argv[0]);
      perror("");
     }
    if (remupd_head != NULL) job_registry_updater_free_endpoints(remupd_head);
    return 2;
   }

  /* Filename is stored in job registry handle. - Don't need these anymore */
  if (cha != NULL) config_free(cha);
  if (need_to_free_registry_file) free(registry_file);

  if (strlen(user_proxy) > 0)
   {
    prret = job_registry_set_proxy(rha, &en, user_proxy);
    if (prret < 0)
     {
      fprintf(stderr,"%s: warning: setting proxy to %s: ",argv[0],user_proxy);
      perror("");
     }
    else en.renew_proxy = renew_proxy;
   }

  en.subject_hash[0] = '\000';
  if (strlen(proxy_subject) > 0)
   {
    job_registry_compute_subject_hash(&en, proxy_subject);
    rhret = job_registry_record_subject_hash(rha, en.subject_hash, 
                                             proxy_subject, TRUE);  
    if (rhret < 0)
     {
      fprintf(stderr,"%s: warning: recording proxy subject %s (hash %s): ", argv[0], proxy_subject, en.subject_hash);
      perror("");
     }
   }

  if ((ret=job_registry_append(rha, &en)) < 0)
   {
    if (errno == EACCES)
     {
      /* Try nonpriv update. It may work. */
      ret=job_registry_append_nonpriv(rha, &en);
      job_registry_destroy(rha);
      if (ret < 0)
       {
        fprintf(stderr,"%s: job_registry_append_nonpriv returns %d: ",argv[0],ret);
        perror("");
        if (remupd_head != NULL) job_registry_updater_free_endpoints(remupd_head);
        return 5;
       } 
      else goto happy_ending;
     }

    fprintf(stderr,"%s: job_registry_append returns %d: ",argv[0],ret);
    perror("");
    job_registry_destroy(rha);
    if (remupd_head != NULL) job_registry_updater_free_endpoints(remupd_head);
    return 3;
   } 

  job_registry_destroy(rha);

 happy_ending:
  if (remupd_head != NULL)
   {
    if (job_registry_send_update(remupd_head, &en,
              (strlen(proxy_subject) > 0 ? proxy_subject : NULL),
              (strlen(user_proxy) > 0 ? user_proxy : NULL)) < 0)
     {
      fprintf(stderr,"%s: warning: sending network update: ",argv[0]);
      perror("");
     }
    job_registry_updater_free_endpoints(remupd_head);
   }
  return 0;
}