示例#1
0
void gw_em_cancel(void *_job_id)
{
    int           job_id;
    gw_job_t      *job;
    gw_em_mad_t   *mad;
    gw_em_state_t current_em_state;

	if ( _job_id != NULL )
	{
		job_id = *( (int *) _job_id );
		free(_job_id);
		
		job = gw_job_pool_get(job_id, GW_TRUE);
	
		if ( job == NULL )
		{
			gw_log_print("EM",'E',"Job %s no longer exists (CANCEL).\n", job_id);
			return;
		}
	}
	else
		return;
    
    /* -------------------------------------------------------------------- */
            
    current_em_state = job->em_state;
        
    if ( issubmitted(current_em_state) )
    {
        gw_job_print (job,"EM",'I',"Cancelling job.\n");
        
        mad = job->history->em_mad;

        /* Warning! When in Migration Cancel, the previous MAD should be used */
        if (job->job_state == GW_JOB_STATE_MIGR_CANCEL)
        {
            if (job->history->next == NULL) 
            {
                gw_log_print("EM",'E',"Previous history record of job %i no longer exists\n", job_id);
				pthread_mutex_unlock(&(job->mutex));                        
                return;
            } 
            else
                mad = job->history->next->em_mad;
        }
        
        gw_em_mad_cancel(mad, job_id);
        
        gw_log_print ("EM",'I',"Cancelling job %i.\n", job_id);

    }
    else
        gw_log_print ("EM",'W',"Ignoring cancel request for job %i, will re-try.\n",
                job_id);    

    /* -------------------------------------------------------------------- */
            
    pthread_mutex_unlock(&(job->mutex));        
}
示例#2
0
void gw_dm_kill_hard (void *_job_id)
{
    gw_job_t *   job;
    int          job_id;
    int          rt;
    int          array_id;
    int          task_id;
    gw_array_t * array;
	char   		 conf_filename[2048];
	    
	/* ----------------------------------------------------------- */  
    /* 0.- Get job pointer                                         */
    /* ----------------------------------------------------------- */  
    
	if ( _job_id != NULL )
	{
		job_id = *( (int *) _job_id );

		job = gw_job_pool_get(job_id, GW_TRUE);

		if ( job == NULL )
		{
			gw_log_print("DM",'E',"Job %i does not exist (KILL_HARD).\n",job_id);

            gw_am_trigger(gw_dm.rm_am,"GW_RM_KILL_FAILED",_job_id);
			return;
		}
	}
	else
		return;
   
	/* ----------------------------------------------------------- */  
    /* 1.- Hard Kill the job                                       */
    /* ----------------------------------------------------------- */  
	
    switch (job->job_state)
    {
    	
		case GW_JOB_STATE_MIGR_PROLOG:
		case GW_JOB_STATE_MIGR_EPILOG:		

			pthread_mutex_lock(&(job->history->next->host->mutex));		
			job->history->next->host->running_jobs--;    		
			pthread_mutex_unlock(&(job->history->next->host->mutex));
			
			job->history->next->stats[EXIT_TIME] = time(NULL);
						    	
    	case GW_JOB_STATE_PROLOG:
			
			pthread_mutex_lock(&(job->history->host->mutex));    	
    		job->history->host->used_slots--;    		    			
			pthread_mutex_unlock(&(job->history->host->mutex));
			      
		case GW_JOB_STATE_EPILOG:
		case GW_JOB_STATE_EPILOG_STD:
		case GW_JOB_STATE_EPILOG_RESTART:
		case GW_JOB_STATE_EPILOG_FAIL:
		
			job->history->reason = GW_REASON_KILL;
			
		case GW_JOB_STATE_STOP_EPILOG:
		case GW_JOB_STATE_KILL_EPILOG:
			
			pthread_mutex_lock(&(job->history->host->mutex));    	
			job->history->host->running_jobs--;									
			pthread_mutex_unlock(&(job->history->host->mutex));
						
			job->exit_time = time(NULL);
			job->history->stats[EXIT_TIME] = time(NULL);
						
	    	job->tm_state = GW_TM_STATE_HARD_KILL;

			if (job->history != NULL) 
			{
            	gw_log_print("DM",'I',"Cancelling prolog/epilog transfers of job %i.\n", job_id);
            	
				gw_tm_mad_end(job->history->tm_mad, job->id);
			}				    	
	    break;
    	
		case GW_JOB_STATE_PRE_WRAPPER:
		case GW_JOB_STATE_WRAPPER:

			job->history->reason = GW_REASON_KILL;
			
			pthread_mutex_lock(&(job->history->host->mutex));
			job->history->host->used_slots--;
			job->history->host->running_jobs--;
			pthread_mutex_unlock(&(job->history->host->mutex));
						
			job->exit_time = time(NULL);		
			job->history->stats[EXIT_TIME] = time(NULL);
						
			job->em_state = GW_EM_STATE_HARD_KILL;
		
			if (job->history != NULL) 
			{
            	gw_log_print("DM",'I',"Cancelling execution of job %i.\n", job_id);
            	
				gw_em_mad_cancel(job->history->em_mad, job_id);
			}			
		break;

		case GW_JOB_STATE_MIGR_CANCEL:
		
			pthread_mutex_lock(&(job->history->next->host->mutex));		
			job->history->next->host->used_slots--;
			job->history->next->host->running_jobs--;
			pthread_mutex_unlock(&(job->history->next->host->mutex));		

			job->history->next->stats[EXIT_TIME] = time(NULL);
			
			job->history->reason = GW_REASON_KILL;
			
   		case GW_JOB_STATE_STOP_CANCEL:
		case GW_JOB_STATE_KILL_CANCEL:
						
			pthread_mutex_lock(&(job->history->host->mutex));
			job->history->host->used_slots--;
			job->history->host->running_jobs--;
			pthread_mutex_unlock(&(job->history->host->mutex));
		
			job->exit_time = time(NULL);		
			job->history->stats[EXIT_TIME] = time(NULL);
						
			job->em_state = GW_EM_STATE_HARD_KILL;
		break;
		
		case GW_JOB_STATE_INIT:
		case GW_JOB_STATE_PENDING:
		case GW_JOB_STATE_HOLD:
		case GW_JOB_STATE_STOPPED:
		
	        job->exit_time = time(NULL);
	     break;
            
		
		case GW_JOB_STATE_FAILED:
		case GW_JOB_STATE_ZOMBIE:
		
		break;
			
        default:
            
            gw_log_print("DM",'W',"Job %i can not be killed in current state.\n", job_id);
            
            gw_am_trigger(gw_dm.rm_am,"GW_RM_KILL_FAILED",  _job_id);
            
            pthread_mutex_unlock(&(job->mutex));            
            
        return;
    }
    
	array_id = job->array_id;
	task_id  = job->task_id;

	sprintf(conf_filename, "%s/job.conf", job->directory);	
	unlink(conf_filename);
							
	pthread_mutex_unlock(&(job->mutex));
    gw_job_pool_free(job_id);
            
    gw_log_print("DM",'I',"Job %i killed (hard) and freed.\n", job_id);		

    if (array_id != -1)
    {
    	array = gw_array_pool_get_array(array_id, GW_TRUE);
            
        if ( array != NULL )
        {                        
        	rt = gw_array_del_task(array,task_id);
            pthread_mutex_unlock(&(array->mutex));
                
            if (rt == 0)
            {
                gw_array_pool_array_free(array_id);
                gw_log_print("DM",'I',"Array %i freed.\n",array_id);
            }
         }
         else
             gw_log_print("DM",'E',"Array %i does not exisit (KILL - task %i).\n",
                          array_id, task_id);
     }
            
     gw_am_trigger(gw_dm.rm_am,"GW_RM_KILL_SUCCESS", _job_id);
}
示例#3
0
int main (int argc, char **argv )
{
    int  rc;
    char action[20];
    char jid_s[20];
    int  jid = 0;
    char contact[500];
    char rsl_file[1024];
    int  status = -1;
    char info[500];
    int end = 0;
    fd_set in_pipes;
    int j;
    char c;
    char str[4096];
    struct timeval tv;
    int timer_interval = 300;
    time_t last_timer = 0;
    time_t the_time;

    struct timeval t1,t2;
    double waited;

    setbuf(stdout,NULL);

    rc = globus_module_activate(GLOBUS_COMMON_MODULE);

    if ( rc != GLOBUS_SUCCESS )
        return -1;

    waited = 0;

    while (!end)
    {
        FD_ZERO(&in_pipes);
        FD_SET (0,&in_pipes);

        tv.tv_sec  = 0;
        tv.tv_usec = 1000;

        gettimeofday(&t1, NULL);

        rc = select(1, &in_pipes, NULL, NULL, &tv);

        gettimeofday(&t2, NULL);

        waited += ((t2.tv_sec - t1.tv_sec)*1000000) + (t2.tv_usec - t1.tv_usec);

        if ( waited > 999 )
        {
            globus_poll();
            waited = 0;
        }

        if (rc == -1)
        {
            exit(-1);
        }
        else if (rc == 1)
        {
            j = 0;

            do
            {
                rc = read(0, (void *) &c, sizeof(char));
                str[j++] = c;
            }
            while ( rc > 0 && c != '\n' );

            str[j] = '\0';

            if (rc <= 0)
                exit(-1);

            rc = sscanf(str, "%s %s %s %[^\n]", action, jid_s, contact,
                        rsl_file);

            if (rc != 4 )
            {
                printf("FAILURE Not all four arguments defined\n");
                continue;
            }

            jid = atoi(jid_s);

            if (strcmp(action, "INIT") == 0 )
            {
                status = gw_em_mad_init(jid, info);
            }
            else if (strcmp(action, "SUBMIT") == 0 )
            {
                status = gw_em_mad_submit(jid, contact, rsl_file, info);
            }
            else if (strcmp(action, "RECOVER") == 0 )
            {
                status = gw_em_mad_recover(jid, contact, info);
            }
            else if (strcmp(action, "CANCEL") == 0 )
            {
                status = gw_em_mad_cancel(jid, info);
            }
            else if (strcmp(action, "POLL") == 0 )
            {
                status = gw_em_mad_poll(jid, info);
            }
            else if (strcmp(action, "FINALIZE") == 0 )
            {
                status = gw_em_mad_finalize(info);
                end = 1;

                return 0;
            }

            if (status != 0)
                printf("%s %d FAILURE %s\n", action, jid, info);
        }

        the_time = time(NULL);

        if (the_time - last_timer >=  timer_interval)
        {
            last_timer = the_time;

            if (mad.initialized == 1)
            {
                status = gw_em_mad_check_credentials(info);

                if (status != 0)
                    printf("%s %d FAILURE %s\n", action, jid, info);
            }
        }
    }

    return 0;
}