Esempio n. 1
0
void gw_dm_kill_hard (void *_job_id)
{
    gw_job_t *   job;
    int          job_id;
    int          rt;
    int          array_id;
    int          task_id;
    gw_array_t * array;
	char   		 conf_filename[GW_MSG_STRING_LONG];
	    
	/* ----------------------------------------------------------- */  
    /* 0.- Get job pointer                                         */
    /* ----------------------------------------------------------- */  
    
	if ( _job_id != NULL )
	{
		job_id = *( (int *) _job_id );

		job = gw_job_pool_get(job_id, GW_TRUE);

		if ( job == NULL )
		{
			gw_log_print("DM",'E',"Job %i does not exist (KILL_HARD).\n",job_id);

            gw_am_trigger(gw_dm.rm_am,"GW_RM_KILL_FAILED",_job_id);
			return;
		}
	}
	else
		return;
   
	/* ----------------------------------------------------------- */  
    /* 1.- Hard Kill the job                                       */
    /* ----------------------------------------------------------- */  
	
    switch (job->job_state)
    {
    	
		case GW_JOB_STATE_MIGR_PROLOG:
		case GW_JOB_STATE_MIGR_EPILOG:				
                        
            gw_host_dec_rjobs(job->history->next->host);
                        			
			job->history->next->stats[EXIT_TIME] = time(NULL);
						    	
    	case GW_JOB_STATE_PROLOG:
            
            gw_host_dec_uslots(job->history->host, job->template.np);
Esempio n. 2
0
void gw_dm_stopped ( void *_job_id )
{
    gw_job_t * job;
    int        job_id;

    /* ----------------------------------------------------------- */  
    /* 0.- Get job pointer                                         */
    /* ----------------------------------------------------------- */  
    
    if ( _job_id != NULL )
    {
        job_id = *( (int *) _job_id );

        job = gw_job_pool_get(job_id, GW_TRUE);

        if ( job == NULL )
        {
			gw_log_print("DM",'E',"Job %i does not exist (JOB_STATE_STOPPED).\n",job_id);

            free(_job_id);
            return;
        }
    }
    else
        return;
        
    /* ----------------------------------------------------------- */  
    /* 1.- Update Job state                                        */
    /* ----------------------------------------------------------- */  
   		
	gw_job_set_state(job, GW_JOB_STATE_STOPPED, GW_FALSE);

	/* -------- Update Host & User running jobs -------- */
			            
    gw_user_pool_dec_running_jobs(job->user_id);

    gw_host_dec_rjobs(job->history->host);
                
    /* ----------------------------------------------------------- */  
    /* 2.- Notify Request Manager                                  */
    /* ----------------------------------------------------------- */
	
	gw_am_trigger(gw_dm.rm_am,"GW_RM_STOP_SUCCESS", _job_id);    
        
    pthread_mutex_unlock(&(job->mutex));
}
Esempio n. 3
0
void gw_dm_pending( void *_job_id )
{
    gw_job_t *job;
    int job_id;
    gw_boolean_t failed;
    gw_migration_reason_t reason;
    
    /* ----------------------------------------------------------- */  
    /* 0.- Get job pointer                                         */
    /* ----------------------------------------------------------- */  
    
    if ( _job_id != NULL )
    {
        job_id = *( (int *) _job_id );

        job = gw_job_pool_get(job_id, GW_TRUE);

        if ( job == NULL )
        {
            gw_log_print("DM",'E',"Job %i does not exist (JOB_STATE_PENDING).\n",job_id);

            free(_job_id);
            return;
        }
    }
    else
        return;

    /* ----------------------------------------------------------- */  
    /* 1.- Set state                                               */
    /* ----------------------------------------------------------- */  

    gw_job_print(job,"DM",'I',"Rescheduling job.\n");
    gw_log_print("DM",'I',"Rescheduling job %d.\n", job->id);

    gw_job_set_state (job, GW_JOB_STATE_PENDING, GW_FALSE);
    
    /* -------- Update Host & User running jobs -------- */
			            
    gw_user_pool_dec_running_jobs(job->user_id);
    
    gw_host_dec_rjobs(job->history->host);
    
    /* ------------- Restart counter --------------- */
    
    job->restarted++;

    /* ------------- Notify the Scheduler --------------- */
   
    if (job->history != NULL)
    {	
        reason = GW_REASON_NONE;
		    
        failed = (job->history->reason == GW_REASON_EXECUTION_ERROR)
                || (job->history->reason == GW_REASON_PERFORMANCE);
 
        if (failed)
        {
            gw_dm_mad_job_failed(&gw_dm.dm_mad[0], job->history->host->host_id,
	            job->user_id, job->history->reason);
        }
                              	                              
        gw_dm_mad_job_schedule(&gw_dm.dm_mad[0], job_id, job->array_id,
                job->user_id, reason);
    }
    else
        gw_log_print("DM",'E',"Rescheduling job %d, but no history records found.\n", 
                job->id);
                           
    /* ------------------------------------------------- */
				        
    free(_job_id);
		    
    pthread_mutex_unlock(&(job->mutex));
}
Esempio n. 4
0
void gw_dm_zombie ( void *_job_id )
{
    gw_job_t *   job;
    gw_array_t * array;
    int          job_id;
    int          task_id;
    int          array_id;
    int          rt;
    char         conf_filename[2048];
    time_t       prolog, epilog;

    /* ----------------------------------------------------------- */  
    /* 0.- Get job pointer                                         */
    /* ----------------------------------------------------------- */  
    
    if ( _job_id == NULL )
        return;

    job_id = *( (int *) _job_id );

    job = gw_job_pool_get(job_id, GW_TRUE);

    if ( job == NULL )
    {
        gw_log_print("DM",'E',"Job %i does not exist (JOB_STATE_ZOMBIE).\n",job_id);

        free(_job_id);
        return;
    }

    /* ----------------------------------------------------------- */  
    /* 0.- Update Job state                                        */
    /* ----------------------------------------------------------- */  
    
    switch (job->job_state)
    {
        case GW_JOB_STATE_EPILOG:
        
            gw_job_set_state(job, GW_JOB_STATE_ZOMBIE, GW_FALSE);
       
            gw_log_print("DM",'I',"Job %i done, with exit code %i.\n",job->id, job->exit_code);

            job->history->reason = GW_REASON_NONE;
            job->exit_time       = time(NULL);

            /* ------------- Print job history and send usage ------------ */

            gw_job_print(job,"DM",'I',"Job done, history:\n");            
            gw_job_print_history(job);
            gw_job_send_usage(job);
                                
            if ( job->client_waiting > 0 )
                gw_am_trigger(gw_dm.rm_am,"GW_RM_WAIT_SUCCESS", _job_id);
            else
            {
                if (gw_conf.dispose == GW_TRUE)
                    gw_am_trigger(&(gw_dm.am), "GW_DM_KILL", _job_id);
                else
                    free(_job_id);
            }

            /* -------- Update User & Host running jobs -------- */
            
            gw_user_pool_dec_running_jobs(job->user_id);
            
            gw_host_dec_rjobs(job->history->host);
            
            /* --------       Notify the scheduler      -------- */
                                  
            prolog = gw_job_history_get_prolog_time(job->history);
            epilog = gw_job_history_get_epilog_time(job->history);

            gw_dm_mad_job_success(&gw_dm.dm_mad[0],
                    job->history->host->host_id,
                    job->user_id,
                    (prolog + epilog),
                    job->history->stats[SUSPENSION_TIME],
                    job->history->stats[ACTIVE_TIME]);
                           
            pthread_mutex_unlock(&(job->mutex));

            /* -------- Update other jobs dependencies -------- */
            gw_job_pool_dep_check(job_id);

            break;
                    
        case GW_JOB_STATE_KILL_EPILOG:
            
            gw_job_set_state(job, GW_JOB_STATE_ZOMBIE, GW_FALSE);            

            job->exit_time = time(NULL);

            /* ------------- Print job history and send usage ------------ */
            
            gw_job_print(job,"DM",'I',"Job killed, history:\n");
            gw_job_print_history(job);
            gw_job_send_usage(job);

            /* ---------------- Free job & Notify RM ---------------- */
            
            array_id = job->array_id;
            task_id  = job->task_id;            

            /* -------- Update User & Host running jobs -------- */
           
            gw_user_pool_dec_running_jobs(job->user_id);

            gw_host_dec_rjobs(job->history->host);
            
            sprintf(conf_filename, "%s/job.conf", job->directory);
            unlink(conf_filename);    

            pthread_mutex_unlock(&(job->mutex));

            /* ------------------------------------------------- */            
            
            gw_job_pool_free(job_id);
            
            gw_log_print("DM",'I',"Job %i killed and freed.\n", job_id);        

            if (array_id != -1)
            {
                array = gw_array_pool_get_array(array_id,GW_TRUE);
            
                if ( array != NULL )
                {                        
                    rt = gw_array_del_task(array,task_id);
                    pthread_mutex_unlock(&(array->mutex));
                    if (rt == 0)
                    {
                        gw_array_pool_array_free(array_id);
                        gw_log_print("DM",'I',"Array %i freed\n",array_id);
                    }
                }
                else
                    gw_log_print("DM",'E',"Could not delete task %i from array %i.\n",
                            task_id, array_id);
            }
            
            gw_am_trigger(gw_dm.rm_am,"GW_RM_KILL_SUCCESS", _job_id);
            break;

        default:
            gw_log_print("DM",'E',"Zombie callback in wrong job (%i) state.\n", job_id);

            free(_job_id);
            
            pthread_mutex_unlock(&(job->mutex));
            break;
    }
}
Esempio n. 5
0
void gw_dm_failed ( void *_job_id )
{
    gw_job_t *   job;
    int          job_id;
    gw_boolean_t failed;
    
    /* ----------------------------------------------------------- */  
    /* 0.- Get job pointer                                         */
    /* ----------------------------------------------------------- */  
    
	if ( _job_id != NULL )
	{
		job_id = *( (int *) _job_id );

		job = gw_job_pool_get(job_id, GW_TRUE);

		if ( job == NULL )
		{
			gw_log_print("DM",'E',"Job %i does not exist (JOB_STATE_FAILED).\n",job_id);

			free(_job_id);
			return;
		}
	}
	else
		return;

    /* ----------------------------------------------------------- */  
    /* 1.- Set state                                               */
    /* ----------------------------------------------------------- */  

    gw_log_print("DM",'I',"Job %i failed.\n",job->id);
   	                    
    gw_job_set_state(job, GW_JOB_STATE_FAILED, GW_FALSE);
    gw_job_print(job,"DM",'I',"Job failed, history:\n");
        
    gw_job_print_history(job);
    
    job->exit_time = time(NULL);
                    
    if ( job->client_waiting > 0 )
       	gw_am_trigger(gw_dm.rm_am,"GW_RM_WAIT_SUCCESS", _job_id);
    else
       	free(_job_id);

    /* -------- Update Host & User running jobs -------- */       	
    
    gw_user_pool_dec_running_jobs(job->user_id);

    gw_host_dec_rjobs(job->history->host);
    	                     	
    /* --------- Notify the Scheduler ---------- */
	
    failed = (job->history->reason == GW_REASON_EXECUTION_ERROR) ||
             (job->history->reason == GW_REASON_PERFORMANCE);
                                                     
    if (failed)
    {
        gw_dm_mad_job_failed(&gw_dm.dm_mad[0],
	                     job->history->host->host_id,
	                     job->user_id,
	                     job->history->reason);
    }
                              	                                     	    				    
    pthread_mutex_unlock(&(job->mutex));
}