Exemple #1
0
int AssignFinalState(char *batchid){

	job_registry_entry en;
	int ret,i;
	time_t now;

	now=time(0);
	
	JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,batchid);
	en.status=COMPLETED;
	en.exitcode=999;
	en.udate=now;
	JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,"\0");
	JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"\0");
		
	if ((ret=job_registry_update(rha, &en)) < 0){
		if(ret != JOB_REGISTRY_NOT_FOUND){
			fprintf(stderr,"Update of record %d returns %d: ",i,ret);
			perror("");
		}
	} else {
		do_log(debuglogfile, debug, 2, "%s: registry update in AssignStateQuery for: jobid=%s creamjobid=%s status=%d\n",argv0,en.batch_id,en.user_prefix,en.status);
		job_registry_unlink_proxy(rha, &en);
		if (remupd_conf != NULL){
			if (ret=job_registry_send_update(remupd_head_send,&en,NULL,NULL)<=0){
				do_log(debuglogfile, debug, 2, "%s: Error creating endpoint in AssignFinalState\n",argv0);
			}
		}
	}

	return 0;
}
Exemple #2
0
int AssignState (char *element, char *status, char *exit, char *reason, char *wn, char *udate){
    char **id_element;
    job_registry_entry en;
    time_t now;
    char *string_now=NULL;
    int i=0;
    int n=strtoken(element, '.', &id_element);
    int iret;
    
    if(id_element[0]){
	JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,id_element[0]);
	en.status=atoi(status);
	en.exitcode=atoi(exit);
	JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,wn);
	JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,reason);
	now=time(0);
	string_now=make_message("%d",now);
	JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now)
	en.udate=now;
	free(string_now);
    }else{
	if((element=calloc(STR_CHARS,1)) == 0){
	    sysfatal("can't malloc cmd in GetAndSend: %r");
	}
    }
    if ((iret=job_registry_update(rha, &en)) < 0){
	fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id);
	perror("");
    }else{
	if (en.status == REMOVED || en.status == COMPLETED){
	    job_registry_unlink_proxy(rha, &en);
	}
    }
    freetoken(&id_element,n);
    return 0;
}
Exemple #3
0
int 
ReceiveUpdateFromNetwork()
{
	char *proxy_path, *proxy_subject;
	int timeout_ms = 0;
	int ent, ret, prret, rhret;
	job_registry_entry *nen;
	job_registry_entry *ren;
  
	proxy_path = NULL;
	proxy_subject = NULL;
	
	while (nen = job_registry_receive_update(remupd_pollset, remupd_nfds,timeout_ms, &proxy_subject, &proxy_path)){
	
		JOB_REGISTRY_ASSIGN_ENTRY(nen->subject_hash,"\0");
		JOB_REGISTRY_ASSIGN_ENTRY(nen->proxy_link,"\0");
		
		if ((ren=job_registry_get(rha, nen->batch_id)) == NULL){
			if ((ret=job_registry_append(rha, nen)) < 0){
				fprintf(stderr,"%s: Warning: job_registry_append returns %d: ",argv0,ret);
				perror("");
			} 
		}else{
		
			if(ren->subject_hash!=NULL && strlen(ren->subject_hash) && ren->proxy_link!=NULL && strlen(ren->proxy_link)){
				JOB_REGISTRY_ASSIGN_ENTRY(nen->subject_hash,ren->subject_hash);
				JOB_REGISTRY_ASSIGN_ENTRY(nen->proxy_link,ren->proxy_link);
			}else{
				if (proxy_path != NULL && strlen(proxy_path) > 0){
					prret = job_registry_set_proxy(rha, nen, proxy_path);
     			 		if (prret < 0){
						do_log(debuglogfile, debug, 1, "%s: warning: setting proxy to %s\n",argv0,proxy_path);
        					fprintf(stderr,"%s: warning: setting proxy to %s: ",argv0,proxy_path);
        					perror("");
        					/* Make sure we don't renew non-existing proxies */
						nen->renew_proxy = 0;  		
					}
					free(proxy_path);
  
					nen->subject_hash[0] = '\000';
					if (proxy_subject != NULL && strlen(proxy_subject) > 0){
						job_registry_compute_subject_hash(nen, proxy_subject);
						rhret = job_registry_record_subject_hash(rha, nen->subject_hash, proxy_subject, TRUE);  
						if (rhret < 0){
							do_log(debuglogfile, debug, 1, "%s: warning: recording proxy subject %s (hash %s)\n",argv0, proxy_subject, nen->subject_hash);
							fprintf(stderr,"%s: warning: recording proxy subject %s (hash %s): ",argv0, proxy_subject, nen->subject_hash);
							perror("");
						}
					}
					free(proxy_subject);
  
				}
			}
			if(job_registry_need_update(ren,nen,JOB_REGISTRY_UPDATE_ALL)){
				if ((ret=job_registry_update(rha, nen)) < 0){
					fprintf(stderr,"%s: Warning: job_registry_update returns %d: ",argv0,ret);
					perror("");
				}
			} 
		}
		free(nen);
	}
  
	return 0;
}
Exemple #4
0
int FinalStateQuery(char *query,char *queryStates, char *query_err){

    char line[STR_CHARS],fail[6],qExit[10],qFailed[10],qHostname[100],qStatus[2],command_string[100];
    char **saveptr1,**saveptr2,**list_query,**list_queryStates;
    FILE *file_output;
    int numQuery=0,numQueryStates=0,j=0,l=0,cont=0,cont2=0, nq=0;
    time_t now;
    char string_now[11];
    job_registry_entry en;
    int iret;
    
    numQuery=strtoken(query,' ',&list_query);
    nq=numQuery;
    numQueryStates=strtoken(queryStates,' ',&list_queryStates);
    if (numQuery!=numQueryStates) return 1;
    
    sprintf(command_string,"%s/qstat -u '*'",sge_binpath);
    if (debug) do_log(debuglogfile, debug, 1, "+-+line 433, command_string:%s\n",command_string);
    
    //load in qstatJob list of jobids from qstat command exec
    file_output = popen(command_string,"r");
    if (file_output == NULL) return 0;
    while (fgets(line,sizeof(line), file_output) != NULL){
	cont=strtoken(line, ' ', &saveptr1);
	if ((strcmp(saveptr1[0],"job-ID")!=0)&&(strncmp(saveptr1[0],"-",1)!=0)){
	    for (l=0;l<nq;l++){
		if (strcmp(list_query[l],saveptr1[0])==0){
		    if (strcmp(list_queryStates[l],saveptr1[4])!=0){
			now=time(0);
			sprintf(string_now,"%d",now);
			if (strcmp(saveptr1[4],"u")==0){
			    JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,list_query[l]);
			    en.status=0;
			    en.exitcode=0;
			    JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,"");
			    JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"0");
			    JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now)
			    en.udate=now;
			    if ((iret=job_registry_update(rha, &en)) < 0){
				fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id);
				perror("");
			    }
			}
			if (strcmp(saveptr1[4],"q")==0){
			    JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,list_query[l]);
			    en.status=1;
			    en.exitcode=0;
			    JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,"");
			    JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"0");
			    JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now)
			    en.udate=now;
			    if ((iret=job_registry_update(rha, &en)) < 0){
				fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id);
				perror("");
			    }
			}
			if (strcmp(saveptr1[4],"r")==0){
			    cont2=strtoken(saveptr1[7], '@', &saveptr2);
			    JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,list_query[l]);
			    en.status=2;
			    en.exitcode=0;
			    JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,saveptr2[1]);
			    JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"0");
			    JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now)
			    en.udate=now;
			    if ((iret=job_registry_update(rha, &en)) < 0){
				fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id);
				perror("");
			    }
			    freetoken(&saveptr2,cont2);
			}
			if ((strcmp(saveptr1[4],"hr")==0)||strcmp(saveptr1[4],"hqw")==0){
			    JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,list_query[l]);
			    en.status=5;
			    en.exitcode=0;
			    JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,"");
			    JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"0");
			    JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now)
			    en.udate=now;
			    if ((iret=job_registry_update(rha, &en)) < 0){
				fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id);
				perror("");
			    }
			}
		    }
		    //i must put out element from query
		    for (j=l;j<nq;j++)
			if (list_query[j+1]!=NULL) strcpy(list_query[j],list_query[j+1]);
		    for (j=l;j<nq;j++)
			if (list_queryStates[j+1]!=NULL) strcpy(list_queryStates[j],list_queryStates[j+1]);
		    nq--;
		    break;
		}
	    }
	}
	line[0]='\0';
	freetoken(&saveptr1,cont);
    }
    pclose( file_output );
    sprintf(query_err,"\0");
    //now we have check in list_query only states that not change status 
    //because they're not in qstat result
    for (l=0; l<nq; l++){
	sprintf(command_string,"%s/qacct -j '%s'",sge_binpath,list_query[l]);
	if (debug) do_log(debuglogfile, debug, 1, "+-+line 520,command_string:%s\n",command_string);
	file_output = popen(command_string,"r");
	if (file_output == NULL) return 1;
	//if a job number is here means that job was in query previously and
	//if now it's not in query and not finished (NULL qstat) it was deleted 
	//or it's on transition time
	if (fgets( line,sizeof(line), file_output )==NULL){
	    strcat(query_err,list_query[l]);
	    strcat(query_err," ");
	    pclose( file_output );
	    continue;
	}

	//there is no problem to lost first line with previous fgets, because 
	//it's only a line of =============================================
	while (fgets( line,sizeof(line), file_output )!=NULL){
	    cont=strtoken(line, ' ', &saveptr1);
	    if (strcmp(saveptr1[0],"hostname")==0) strcpy(qHostname,saveptr1[1]);;
	    if (strcmp(saveptr1[0],"failed")==0) strcpy(qFailed,saveptr1[1]);
	    if (strcmp(saveptr1[0],"exit_status")==0) strcpy(qExit,saveptr1[1]);
	    freetoken(&saveptr1,cont);
	}
	pclose( file_output );
	now=time(0);
	sprintf(string_now,"%d",now);
	if ((strcmp(qExit,"137")==0)||(strcmp(qExit,"143")==0)){
	    JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,list_query[l]);
	    en.status=3;
	    en.exitcode=atoi(qExit);
	    JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,qHostname);
	    JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"");
	    JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now)
	    en.udate=now;
	    if ((iret=job_registry_update(rha, &en)) < 0){
		fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id);
		perror("");
	    }else job_registry_unlink_proxy(rha, &en);
	}else{
	    JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,list_query[l]);
	    en.status=4;
	    en.exitcode=atoi(qExit);
	    JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,qHostname);
	    JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,qFailed);
	    JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now)
	    en.udate=now;
	    if ((iret=job_registry_update(rha, &en)) < 0){
		fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id);
		perror("");
	    }else job_registry_unlink_proxy(rha, &en);
	}
    }
    freetoken(&list_query,numQuery);
    freetoken(&list_queryStates,numQueryStates);
    if (debug) do_log(debuglogfile, debug, 1, "+-+query_err:%s\n",query_err);
    //now check acumulated error jobids to verify if they are an error or not
    if (strcmp(query_err,"\0")!=0){
	sleep(60);
	cont=0;
	int n=0;
	char cmd[10]="\0";
	
	cont=strtoken(query_err, ' ', &list_query);
	
	while (n < cont){
	    if(list_query[n]) strcpy(cmd,list_query[n]);
	    else return 1;
	    sprintf(command_string,"%s/qacct -j '%s'",sge_binpath,cmd);
	    if (debug) do_log(debuglogfile, debug, 1, "+-+line 587 error, command_string:%s\n",command_string);
	    file_output = popen(command_string,"r");
	    if (file_output == NULL) return 1;

	    //if a job number is here means that job was in query previously and
	    //if now it's not in query and not finished (NULL qstat) it was deleted 
	    if (fgets( line,sizeof(line), file_output )==NULL){
		JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,cmd);
		en.status=3;
		en.exitcode=3;
		JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,"");
		JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"reason=3");
		now=time(0);
		sprintf(string_now,"%d",now);
		JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now)
		en.udate=now;
		if ((iret=job_registry_update(rha, &en)) < 0){
		    fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id);
		    perror("");
		}else job_registry_unlink_proxy(rha, &en);
		pclose( file_output );
		n++;
		continue;
	    }
	    //there is no problem to lost first line with previous fgets, because 
	    //it's only a line of =============================================
	    while (fgets( line,sizeof(line), file_output )!=NULL){
		cont=strtoken(line, ' ', &saveptr1);
		if (strcmp(saveptr1[0],"hostname")==0) strcpy(qHostname,saveptr1[1]);
		if (strcmp(saveptr1[0],"failed")==0) strcpy(qFailed,saveptr1[1]);
		if (strcmp(saveptr1[0],"exit_status")==0) strcpy(qExit,saveptr1[1]);
		freetoken(&saveptr1,cont);
	    }
	    pclose( file_output );
	    now=time(0);
	    sprintf(string_now,"%d",now);
	    if ((strcmp(qExit,"137")==0)||(strcmp(qExit,"143")==0)){
		JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,cmd);
		en.status=3;
		en.exitcode=atoi(qExit);
		JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,qHostname);
		JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,"");
		JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now)
		en.udate=now;
		if ((iret=job_registry_update(rha, &en)) < 0){
		    fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id);
		    perror("");
		}else job_registry_unlink_proxy(rha, &en);
	    }else{
		JOB_REGISTRY_ASSIGN_ENTRY(en.batch_id,cmd);
		en.status=4;
		en.exitcode=atoi(qExit);
		JOB_REGISTRY_ASSIGN_ENTRY(en.wn_addr,qHostname);
		JOB_REGISTRY_ASSIGN_ENTRY(en.exitreason,qFailed);
		JOB_REGISTRY_ASSIGN_ENTRY(en.updater_info,string_now)
		en.udate=now;
		if ((iret=job_registry_update(rha, &en)) < 0){
		    fprintf(stderr,"Update of record returns %d: \nJobId: %d", iret,en.batch_id);
		    perror("");
		}else job_registry_unlink_proxy(rha, &en);
	    }
	    n++;
	}
	freetoken(&list_query,cont);
    }
    return 0;
}