Esempio n. 1
0
/* _proc_cluster - process job cancellation on a specific cluster */
static int
_proc_cluster(void)
{
	int filter_cnt = 0;
	int rc;

	if (has_default_opt() && !has_job_steps()) {
		rc = _signal_job_by_str();
		return rc;
	}

	_load_job_records();
	rc = _verify_job_ids();
	if ((opt.account) ||
	    (opt.job_name) ||
	    (opt.nodelist) ||
	    (opt.partition) ||
	    (opt.qos) ||
	    (opt.reservation) ||
	    (opt.state != JOB_END) ||
	    (opt.user_name) ||
	    (opt.wckey)) {
		filter_cnt = _filter_job_records();
	}
	rc = MAX(_cancel_jobs(filter_cnt), rc);
	slurm_free_job_info_msg(job_buffer_ptr);

	return rc;
}
Esempio n. 2
0
/*
 * slurm_xlate_job_id - Translate a Slurm job ID string into a slurm job ID
 *	number. If this job ID contains an array index, map this to the
 *	equivalent Slurm job ID number (e.g. "123_2" to 124)
 *
 * IN job_id_str - String containing a single job ID number
 * RET - equivalent job ID number or 0 on error
 */
extern uint32_t slurm_xlate_job_id(char *job_id_str)
{
	char *next_str;
	uint32_t i, job_id;
	uint16_t array_id;
	job_info_msg_t *resp;
	slurm_job_info_t *job_ptr;

	job_id = (uint32_t) strtol(job_id_str, &next_str, 10);
	if (next_str[0] == '\0')
		return job_id;
	if (next_str[0] != '_')
		return (uint32_t) 0;
	array_id = (uint16_t) strtol(next_str + 1, &next_str, 10);
	if (next_str[0] != '\0')
		return (uint32_t) 0;
	if (slurm_load_job(&resp, job_id, SHOW_ALL) != 0)
		return (uint32_t) 0;
	job_id = 0;
	for (i = 0, job_ptr = resp->job_array; i < resp->record_count;
	     i++, job_ptr++) {
		if (job_ptr->array_task_id == array_id) {
			job_id = job_ptr->job_id;
			break;
		}	
	}
	slurm_free_job_info_msg(resp);
	return job_id;
}
Esempio n. 3
0
static int _get_job_size(uint32_t job_id)
{
	job_info_msg_t *job_buffer_ptr;
	job_info_t * job_ptr;
	int i, size = 1;
	hostlist_t hl;

	if (slurm_load_jobs((time_t) 0, &job_buffer_ptr, SHOW_ALL)) {
		slurm_perror("slurm_load_jobs");
		return 1;
	}

	for (i = 0; i < job_buffer_ptr->record_count; i++) {
		job_ptr = &job_buffer_ptr->job_array[i];
		if (job_ptr->job_id != job_id)
			continue;
		hl = hostlist_create(job_ptr->nodes);
		if (hl) {
			size = hostlist_count(hl);
			hostlist_destroy(hl);
		}
		break;
	}
	slurm_free_job_info_msg (job_buffer_ptr);

#if _DEBUG
	printf("Size is %d\n", size);
#endif
	return size;
}
Esempio n. 4
0
// Get bar summaries for cluster nodes
void ClusterMenu::get_lines() {
    // First we set the time of this update
    last_update = std::chrono::steady_clock::now();

    // Call SLURM API to write node information to pointer
    // Free pointer memory first if it has been previously set
    if (node_info_buffer_ptr != NULL) {
        slurm_free_node_info_msg(node_info_buffer_ptr);
    }
    slurm_load_node ((time_t) NULL, &node_info_buffer_ptr, SHOW_ALL);

    // Create a NodeContainer struct and populate with node information
    node_container.populate_nodes_from_slurm(node_info_buffer_ptr);

    // Call API function, pass job_info_ptr as reference (double pointer); flags must be SHOW_DETAIL to get job allocations
    // Free pointer memory first if it has been previously set
    if (job_info_buffer_ptr != NULL) {
        slurm_free_job_info_msg(job_info_buffer_ptr);
    }
    slurm_load_jobs((time_t) NULL, &job_info_buffer_ptr, SHOW_DETAIL);

    // Populate nodes with job allocations
    node_container.populate_job_allocations_from_slurm(job_info_buffer_ptr);

    // Get line content
    lines = node_container.get_node_bar_summary(32);

    // Record largest line for later use in horizontal scrolling
    get_longest_line();
}
Esempio n. 5
0
/* Load current job table information into *job_buffer_pptr */
extern int
scontrol_load_job(job_info_msg_t ** job_buffer_pptr, uint32_t job_id)
{
	int error_code;
	static uint16_t last_show_flags = 0xffff;
	uint16_t show_flags = 0;
	job_info_msg_t * job_info_ptr = NULL;

	if (all_flag)
		show_flags |= SHOW_ALL;

	if (detail_flag) {
		show_flags |= SHOW_DETAIL;
		if (detail_flag > 1)
			show_flags |= SHOW_DETAIL2;
	}
	if (federation_flag)
		show_flags |= SHOW_FEDERATION;
	if (local_flag)
		show_flags |= SHOW_LOCAL;
	if (sibling_flag)
		show_flags |= SHOW_FEDERATION | SHOW_SIBLING;

	if (old_job_info_ptr) {
		if (last_show_flags != show_flags)
			old_job_info_ptr->last_update = (time_t) 0;
		if (job_id) {
			error_code = slurm_load_job(&job_info_ptr, job_id,
						    show_flags);
		} else {
			error_code = slurm_load_jobs(
				old_job_info_ptr->last_update,
				&job_info_ptr, show_flags);
		}
		if (error_code == SLURM_SUCCESS)
			slurm_free_job_info_msg (old_job_info_ptr);
		else if (slurm_get_errno () == SLURM_NO_CHANGE_IN_DATA) {
			job_info_ptr = old_job_info_ptr;
			error_code = SLURM_SUCCESS;
			if (quiet_flag == -1)
 				printf ("slurm_load_jobs no change in data\n");
		}
	} else if (job_id) {
		error_code = slurm_load_job(&job_info_ptr, job_id, show_flags);
	} else {
		error_code = slurm_load_jobs((time_t) NULL, &job_info_ptr,
					     show_flags);
	}

	if (error_code == SLURM_SUCCESS) {
		old_job_info_ptr = job_info_ptr;
		if (job_id)
			old_job_info_ptr->last_update = (time_t) 0;
		last_show_flags  = show_flags;
		*job_buffer_pptr = job_info_ptr;
	}

	return error_code;
}
Esempio n. 6
0
static bool _is_single_job(char *job_id_str)
{
	uint32_t job_id, task_id;
	char *next_str = NULL;
	int rc;
	job_info_msg_t *resp;
	bool is_single = false;

	job_id = (uint32_t)strtol(job_id_str, &next_str, 10);
	if (next_str[0] == '_') {
		task_id = (uint32_t)strtol(next_str+1, &next_str, 10);
		if (next_str[0] != '\0') {
			error("Invalid job ID %s", job_id_str);
			return is_single;
		}
	} else if (next_str[0] != '\0') {
		error("Invalid job ID %s", job_id_str);
		return is_single;
	} else {
		task_id = NO_VAL;
	}

	rc = slurm_load_job(&resp, job_id, SHOW_ALL);
	if (rc == SLURM_SUCCESS) {
		if (resp->record_count == 0) {
			error("Job ID %s not found", job_id_str);
			slurm_free_job_info_msg(resp);
			return is_single;
		}
		if ((resp->record_count > 1) && (task_id == NO_VAL)) {
			error("Job resizing not supported for job arrays");
			slurm_free_job_info_msg(resp);
			return is_single;
		}
		is_single = true;	/* Do not bother to validate */
		slurm_free_job_info_msg(resp);
	} else {
		error("Could not load state information for job %s: %m",
		      job_id_str);
	}

	return is_single;
}
Esempio n. 7
0
static hostlist_t _slurm_wcoll (List joblist)
{
    int i;
    hostlist_t hl = NULL;
    job_info_msg_t * msg;
    int32_t envjobid = 0;
    int alljobids = 0;

    if ((joblist == NULL) && (envjobid = _slurm_jobid()) < 0)
        return (NULL);

    if (slurm_load_jobs((time_t) NULL, &msg, 1) < 0) 
        errx ("Unable to contact slurm controller: %s\n", 
              slurm_strerror (errno));

    /*
     *  Check for "all" in joblist
     */
    alljobids = _alljobids_requested (joblist);

    for (i = 0; i < msg->record_count; i++) {
        job_info_t *j = &msg->job_array[i];

        if (alljobids && j->job_state == JOB_RUNNING)
            hl = _hl_append (hl, j->nodes);
        else if (!joblist && (j->job_id == envjobid)) {
            /*
             *  Only use SLURM_JOBID environment variable if user
             *   didn't override with -j option
             */
            hl = hostlist_create (j->nodes);
            break;
        }
        else if (_jobid_requested (joblist, j->job_id)) {
            hl = _hl_append (hl, j->nodes);
            /* 
             * Exit when there is no more jobids to search
             */
            if (list_count (joblist) == 0)
                break;
        }
    }
    
    slurm_free_job_info_msg (msg);

    if (hl)
        hostlist_uniq (hl);

    return (hl);
}
Esempio n. 8
0
/* Translate a job name to relevant job IDs
 * NOTE: xfree the return value to avoid memory leak */
static char *_job_name2id(char *job_name, uint32_t job_uid)
{
	int i, rc;
	job_info_msg_t *resp;
	slurm_job_info_t *job_ptr;
	char *job_id_str = NULL, *sep = "";

	xassert(job_name);

	rc = scontrol_load_job(&resp, 0);
	if (rc == SLURM_SUCCESS) {
		if (resp->record_count == 0) {
			error("JobName %s not found", job_name);
			slurm_free_job_info_msg(resp);
			return job_id_str;
		}
		for (i = 0, job_ptr = resp->job_array; i < resp->record_count;
		     i++, job_ptr++) {
			if ((job_uid != NO_VAL) &&
			    (job_uid != job_ptr->user_id))
				continue;
			if (!job_ptr->name || xstrcmp(job_name, job_ptr->name))
				continue;
			if (job_ptr->array_task_id != NO_VAL) {
				xstrfmtcat(job_id_str, "%s%u_%u", sep,
					   job_ptr->array_job_id,
					   job_ptr->array_task_id);
			} else {
				xstrfmtcat(job_id_str, "%s%u", sep,
					   job_ptr->job_id);
			}
			sep = ",";
		}
		if (!job_id_str) {
			if (job_uid == NO_VAL) {
				error("No jobs with name \'%s\'", job_name);
			} else {
				error("No jobs with user ID %u and name \'%s\'",
				      job_uid, job_name);
			}
		}
	} else {
		error("Could not load state information: %m");
	}

	return job_id_str;
}
Esempio n. 9
0
/* main is used here for testing purposes only */
int 
main (int argc, char *argv[]) 
{
	static time_t last_update_time = (time_t) NULL;
	int error_code;
	job_info_msg_t * job_info_msg_ptr = NULL;

	error_code = slurm_load_jobs (last_update_time, &job_info_msg_ptr, 1);
	if (error_code) {
		slurm_perror ("slurm_load_jobs");
		return (error_code);
	}

	slurm_print_job_info_msg ( stdout, job_info_msg_ptr, 1 ) ;

	slurm_free_job_info_msg ( job_info_msg_ptr ) ;
	return (0);
}
Esempio n. 10
0
/* Return the current time limit of the specified job_id or NO_VAL if the
 * information is not available */
static uint32_t _get_job_time(uint32_t job_id)
{
	uint32_t time_limit = NO_VAL;
	int i, rc;
	job_info_msg_t *resp;

	rc = slurm_load_job(&resp, job_id, SHOW_ALL);
	if (rc == SLURM_SUCCESS) {
		for (i = 0; i < resp->record_count; i++) {
			if (resp->job_array[i].job_id != job_id)
				continue;	/* should not happen */
			time_limit = resp->job_array[i].time_limit;
			break;
		}
		slurm_free_job_info_msg(resp);
	} else {
		error("Could not load state information for job %u: %m",
		      job_id);
	}

	return time_limit;
}
Esempio n. 11
0
int main() {
    // Initialise container for all node information
    NodeContainer node_container;


    // Declare a pointer to which the SLURM API writes node information
    node_info_msg_t * node_info_buffer_ptr = NULL;
    // Call SLURM API to write node information to pointer
    slurm_load_node((time_t) NULL, &node_info_buffer_ptr, SHOW_ALL);

    // Create a NodeContainer struct and populate with node information
    node_container.populate_nodes_from_slurm(node_info_buffer_ptr);


    // Declare a pointer to which the SLURM API writes job information
    job_info_msg_t * job_info_buffer_ptr = NULL;
    // Call API function, pass job_info_ptr as reference (double pointer); flags must be SHOW_DETAIL to get job allocations
    slurm_load_jobs((time_t) NULL, &job_info_buffer_ptr, SHOW_DETAIL);

    // Populate nodes with job allocations
    node_container.populate_job_allocations_from_slurm(job_info_buffer_ptr);


    // Get lines for output
    std::vector<std::string> lines = node_container.get_node_bar_summary(32);

    // Print output
    for (std::vector<std::string>::iterator it = lines.begin(); it != lines.end(); ++it) {
        printf("%s\n", it->c_str());
    }


    // Clean up and nicely deallocated memory for SLURM pointers
    slurm_free_node_info_msg(node_info_buffer_ptr);
    slurm_free_job_info_msg(job_info_buffer_ptr);
}
Esempio n. 12
0
File: squeue.c Progetto: VURM/slurm
/* _print_job - print the specified job's information */
static int
_print_job ( bool clear_old )
{
	static job_info_msg_t * old_job_ptr = NULL, * new_job_ptr;
	int error_code;
	uint16_t show_flags = 0;
	uint32_t job_id = 0;

	if (params.all_flag || (params.job_list && list_count(params.job_list)))
		show_flags |= SHOW_ALL;

	/* We require detail data when CPUs are requested */
	if (params.format && strstr(params.format, "C"))
		show_flags |= SHOW_DETAIL;

	if (params.job_list && (list_count(params.job_list) == 1)) {
		ListIterator iterator;
		uint32_t *job_id_ptr;
		iterator = list_iterator_create(params.job_list);
		job_id_ptr = list_next(iterator);
		job_id = *job_id_ptr;
		list_iterator_destroy(iterator);
	}

	if (old_job_ptr) {
		if (clear_old)
			old_job_ptr->last_update = 0;
		if (job_id) {
			error_code = slurm_load_job(
				&new_job_ptr, job_id,
				show_flags);
		} else {
			error_code = slurm_load_jobs(
				old_job_ptr->last_update,
				&new_job_ptr, show_flags);
		}
		if (error_code ==  SLURM_SUCCESS)
			slurm_free_job_info_msg( old_job_ptr );
		else if (slurm_get_errno () == SLURM_NO_CHANGE_IN_DATA) {
			error_code = SLURM_SUCCESS;
			new_job_ptr = old_job_ptr;
		}
	} else if (job_id) {
		error_code = slurm_load_job(&new_job_ptr, job_id, show_flags);
	} else {
		error_code = slurm_load_jobs((time_t) NULL, &new_job_ptr,
					     show_flags);
	}

	if (error_code) {
		slurm_perror ("slurm_load_jobs error");
		return SLURM_ERROR;
	}
	old_job_ptr = new_job_ptr;
	if (job_id)
		old_job_ptr->last_update = (time_t) 0;

	if (params.verbose) {
		printf ("last_update_time=%ld\n",
		        (long) new_job_ptr->last_update);
	}

	if (params.format == NULL) {
		if (params.long_list)
			params.format = "%.7i %.9P %.8j %.8u %.8T %.10M %.9l "
				"%.6D %R";
		else
			params.format = "%.7i %.9P %.8j %.8u  %.2t %.10M %.6D %R";
	}
	if (params.format_list == NULL)
		parse_format(params.format);

	print_jobs_array( new_job_ptr->job_array, new_job_ptr->record_count ,
			  params.format_list ) ;
	return SLURM_SUCCESS;
}
Esempio n. 13
0
int
get_batch_queues(bridge_batch_manager_t* p_batch_manager,
                 bridge_batch_queue_t** p_p_batch_queues,
                 int* p_batch_queues_nb, char* batch_queue_name)
{
    int fstatus=-1;

    int i,j;

    int queue_nb=0;
    int stored_queue_nb=0;

    bridge_batch_queue_t* bn;

    partition_info_msg_t* ppim;
    partition_info_t* ppi;

    job_info_msg_t* pjim;
    job_info_t* pji;

    node_info_msg_t* pnim;
    node_info_t* pni;

    /* get slurm partition infos */
    if (slurm_load_partitions(0,&ppim,SHOW_ALL) != 0) {
        DEBUG3_LOGGER("unable to get slurm partitions infos");
        ppim=NULL;
        goto exit;
    }

    /* get nodes status */
    if(slurm_load_node(0,&pnim,SHOW_ALL)) {
        DEBUG3_LOGGER("unable to get nodes informations");
        slurm_free_partition_info_msg(ppim);
        pnim=NULL;
        goto exit;
    }

    /* get slurm job infos */
    if (slurm_load_jobs(0,&pjim,SHOW_ALL) != 0) {
        DEBUG3_LOGGER("unable to get allocations informations");
        slurm_free_partition_info_msg(ppim);
        slurm_free_node_info_msg(pnim);
        goto exit;
    }

    /* build/initialize storage structures */
    queue_nb = ppim->record_count;
    if (*p_p_batch_queues != NULL) {
        if (*p_batch_queues_nb < queue_nb)
            queue_nb=*p_batch_queues_nb;
    }
    else {
        *p_p_batch_queues = (bridge_batch_queue_t*)
                            malloc(queue_nb*(sizeof(bridge_batch_queue_t)+1));
        if (*p_p_batch_queues == NULL) {
            *p_batch_queues_nb = 0;
            queue_nb = *p_batch_queues_nb;
        }
        else {
            *p_batch_queues_nb = queue_nb;
        }
    }
    stored_queue_nb=0;

    /* fill queue structures */
    for (i=0; i<ppim->record_count && stored_queue_nb<queue_nb; i++) {

        /* get partition pointer */
        ppi=ppim->partition_array+i;

        if (ppi->name == NULL)
            continue;

        /* queue name filter */
        if (batch_queue_name != NULL &&
                strcmp(batch_queue_name,ppi->name) != 0)
            continue;

        bn = &(*p_p_batch_queues)[stored_queue_nb];

        /* put default values */
        init_batch_queue(p_batch_manager,bn);

        /* queue Name */
        bn->name=strdup(ppi->name);

        bn->default_queue = (uint32_t) ( ppi->flags | PART_FLAG_DEFAULT);
        bn->priority = (uint32_t) ppi->priority;

        /* queue activity */
        if(ppi->state_up == PARTITION_UP) {
            bn->activity = BRIDGE_BATCH_QUEUE_ACTIVITY_ACTIVE ;
            bn->state = BRIDGE_BATCH_QUEUE_STATE_OPENED ;
        } else if (ppi->state_up == PARTITION_DRAIN) {
            bn->activity = BRIDGE_BATCH_QUEUE_ACTIVITY_ACTIVE ;
            bn->state = BRIDGE_BATCH_QUEUE_STATE_CLOSED ;
        } else if (ppi->state_up == PARTITION_DOWN) {
            bn->activity = BRIDGE_BATCH_QUEUE_ACTIVITY_INACTIVE ;
            bn->state = BRIDGE_BATCH_QUEUE_STATE_OPENED ;
        } else if (ppi->state_up == PARTITION_INACTIVE) {
            bn->activity = BRIDGE_BATCH_QUEUE_ACTIVITY_INACTIVE ;
            bn->state = BRIDGE_BATCH_QUEUE_STATE_CLOSED ;
        } else {
            bn->activity = BRIDGE_BATCH_QUEUE_ACTIVITY_UNKNOWN ;
            bn->state = BRIDGE_BATCH_QUEUE_STATE_UNKNOWN ;
        }

        /* max times */
        if ( ppi->max_time != INFINITE )
            bn->seq_time_max = (uint32_t) ppi->max_time * 60 ;
        else
            bn->seq_time_max = NO_LIMIT;
        bn->par_time_max = bn->seq_time_max ;

        /* slurm */
        for ( j=0 ; j < pjim->record_count ; j++ ) {

            pji=pjim->job_array+j;

            if ( strcmp(pji->partition,ppi->name) != 0 )
                continue;

            switch ( pji->job_state & JOB_STATE_BASE ) {
            case JOB_PENDING :
                bn->jobs_nb++;
                bn->pending_jobs_nb++;
                break;
            case JOB_RUNNING :
                bn->jobs_nb++;
                bn->running_jobs_nb++;
                break;
            case JOB_SUSPENDED :
                bn->jobs_nb++;
                bn->syssuspended_jobs_nb++;
                break;
            }

        }

        /* Slurm does not provide information about Min and Max cpus per
         * partition. So we use the following method :
         *
         * if partition->name ~= /.*_seq/ min=max=1
         * otherwise, calculate it using MinNodes, MaxNodes and nodes
         * informations
         */

        int done = 0 ;
        char * p;
        p = rindex(ppi->name,'_');
        if ( p != NULL ) {
            if ( strcmp(p+1,"seq") == 0 ) {
                done = 1;
                bn->par_cores_nb_min = 1;
                bn->par_cores_nb_max = 1;
            }
        }

        if ( ! done ) {
            /* use partition nodes information to build the min and max */
            /* number of cores (only min and max nodes number are provided */
            /* by slurm so we have to build this information) */
            uint32_t max_cpus_per_node=0;
            uint32_t min_cpus_per_node=-1;
            bridge_nodelist_t list1,list2;
            bridge_nodelist_init(&list1,NULL,0);
            bridge_nodelist_add_nodes(&list1,ppi->nodes);
            for ( j=0 ; j < pnim->record_count ; j++ ) {
                pni=pnim->node_array+j;
                bridge_nodelist_init(&list2,NULL,0);
                bridge_nodelist_add_nodes(&list2,pni->name);
                if(bridge_nodelist_intersects(&list1,&list2)==0) {
                    bridge_nodelist_free_contents(&list2);
                    continue;
                }
                if ( pni->cpus > max_cpus_per_node )
                    max_cpus_per_node = pni->cpus ;
                if ( pni->cpus < min_cpus_per_node )
                    min_cpus_per_node = pni->cpus ;
                bridge_nodelist_free_contents(&list2);
            }
            bridge_nodelist_free_contents(&list1);

            if ( max_cpus_per_node > 0 && ppi->max_nodes != INFINITE )
                bn->par_cores_nb_max = max_cpus_per_node * ppi->max_nodes ;
            if ( min_cpus_per_node < (uint32_t) -1 && ppi->min_nodes > 1 )
                bn->par_cores_nb_min = min_cpus_per_node * ppi->min_nodes ;
        }

        stored_queue_nb++;
    }

    fstatus=0;

    /* free slurm informations */
    slurm_free_job_info_msg(pjim);
    slurm_free_node_info_msg(pnim);
    slurm_free_partition_info_msg(ppim);


    if(stored_queue_nb<queue_nb) {
        *p_p_batch_queues=(bridge_batch_queue_t*)
                          realloc(*p_p_batch_queues,
                                  stored_queue_nb*(sizeof(bridge_batch_queue_t)+1));
        if(*p_p_batch_queues==NULL)
            *p_batch_queues_nb=0;
        else
            *p_batch_queues_nb=stored_queue_nb;
    }

exit:

    return fstatus;
}
Esempio n. 14
0
File: sstat.c Progetto: Cray/slurm
int main(int argc, char **argv)
{
	ListIterator itr = NULL;
	uint32_t req_cpufreq = NO_VAL;
	uint32_t stepid = NO_VAL;
	slurmdb_selected_step_t *selected_step = NULL;

#ifdef HAVE_ALPS_CRAY
	error("The sstat command is not supported on Cray systems");
	return 1;
#endif
#ifdef HAVE_BG
	error("The sstat command is not supported on IBM BlueGene systems");
	return 1;
#endif

	slurm_conf_init(NULL);
	print_fields_list = list_create(NULL);
	print_fields_itr = list_iterator_create(print_fields_list);

	parse_command_line(argc, argv);
	if (!params.opt_job_list || !list_count(params.opt_job_list)) {
		error("You didn't give me any jobs to stat.");
		return 1;
	}

	print_fields_header(print_fields_list);
	itr = list_iterator_create(params.opt_job_list);
	while ((selected_step = list_next(itr))) {
		char *nodelist = NULL;
		bool free_nodelist = false;
		if (selected_step->stepid == INFINITE) {
			/* get the batch step info */
			job_info_msg_t *job_ptr = NULL;
			hostlist_t hl;

			if (slurm_load_job(
				    &job_ptr, selected_step->jobid, SHOW_ALL)) {
				error("couldn't get info for job %u",
				      selected_step->jobid);
				continue;
			}

			stepid = NO_VAL;
			hl = hostlist_create(job_ptr->job_array[0].nodes);
			nodelist = hostlist_pop(hl);
			free_nodelist = true;
			hostlist_destroy(hl);
			slurm_free_job_info_msg(job_ptr);
		} else if (selected_step->stepid != NO_VAL) {
			stepid = selected_step->stepid;
		} else if (params.opt_all_steps) {
			job_step_info_response_msg_t *step_ptr = NULL;
			int i = 0;
			if (slurm_get_job_steps(
				    0, selected_step->jobid, NO_VAL,
				    &step_ptr, SHOW_ALL)) {
				error("couldn't get steps for job %u",
				      selected_step->jobid);
				continue;
			}

			for (i = 0; i < step_ptr->job_step_count; i++) {
				_do_stat(selected_step->jobid,
					 step_ptr->job_steps[i].step_id,
					 step_ptr->job_steps[i].nodes,
					 step_ptr->job_steps[i].cpu_freq);
			}
			slurm_free_job_step_info_response_msg(step_ptr);
			continue;
		} else {
			/* get the first running step to query against. */
			job_step_info_response_msg_t *step_ptr = NULL;
			if (slurm_get_job_steps(
				    0, selected_step->jobid, NO_VAL,
				    &step_ptr, SHOW_ALL)) {
				error("couldn't get steps for job %u",
				      selected_step->jobid);
				continue;
			}
			if (!step_ptr->job_step_count) {
				error("no steps running for job %u",
				      selected_step->jobid);
				continue;
			}
			stepid = step_ptr->job_steps[0].step_id;
			nodelist = step_ptr->job_steps[0].nodes;
			req_cpufreq = step_ptr->job_steps[0].cpu_freq;
		}
		_do_stat(selected_step->jobid, stepid, nodelist, req_cpufreq);
		if (free_nodelist && nodelist)
			free(nodelist);
	}
	list_iterator_destroy(itr);

	xfree(params.opt_field_list);
	if (params.opt_job_list)
		list_destroy(params.opt_job_list);

	if (print_fields_itr)
		list_iterator_destroy(print_fields_itr);
	if (print_fields_list)
		list_destroy(print_fields_list);

	return 0;
}
Esempio n. 15
0
/* _print_job - print the specified job's information */
static int
_print_job ( bool clear_old )
{
	static job_info_msg_t * old_job_ptr = NULL, * new_job_ptr;
	int error_code;
	uint16_t show_flags = 0;

	if (params.all_flag || (params.job_list && list_count(params.job_list)))
		show_flags |= SHOW_ALL;

	/* We require detail data when CPUs are requested */
	if (params.format && strstr(params.format, "C"))
		show_flags |= SHOW_DETAIL;

	if (old_job_ptr) {
		if (clear_old)
			old_job_ptr->last_update = 0;
		if (params.job_id) {
			error_code = slurm_load_job(
				&new_job_ptr, params.job_id,
				show_flags);
		} else if (params.user_id) {
			error_code = slurm_load_job_user(&new_job_ptr,
							 params.user_id,
							 show_flags);
		} else {
			error_code = slurm_load_jobs(
				old_job_ptr->last_update,
				&new_job_ptr, show_flags);
		}
		if (error_code ==  SLURM_SUCCESS)
			slurm_free_job_info_msg( old_job_ptr );
		else if (slurm_get_errno () == SLURM_NO_CHANGE_IN_DATA) {
			error_code = SLURM_SUCCESS;
			new_job_ptr = old_job_ptr;
		}
	} else if (params.job_id) {
		error_code = slurm_load_job(&new_job_ptr, params.job_id,
					    show_flags);
	} else if (params.user_id) {
		error_code = slurm_load_job_user(&new_job_ptr, params.user_id,
						 show_flags);
	} else {
		error_code = slurm_load_jobs((time_t) NULL, &new_job_ptr,
					     show_flags);
	}

	if (error_code) {
		slurm_perror ("slurm_load_jobs error");
		return SLURM_ERROR;
	}
	old_job_ptr = new_job_ptr;
	if (params.job_id || params.job_id)
		old_job_ptr->last_update = (time_t) 0;

	if (params.verbose) {
		printf ("last_update_time=%ld records=%u\n",
			(long) new_job_ptr->last_update,
			new_job_ptr->record_count);
	}

	if (!params.format && !params.format_long) {
		if (params.long_list) {
			xstrcat(params.format,
				"%.18i %.9P %.8j %.8u %.8T %.10M %.9l %.6D %R");
		} else {
			xstrcat(params.format,
				"%.18i %.9P %.8j %.8u %.2t %.10M %.6D %R");
		}
	}

	if (!params.format_list) {
		if (params.format)
			parse_format(params.format);
		else if (params.format_long)
			parse_long_format(params.format_long);
	}

	print_jobs_array(new_job_ptr->job_array, new_job_ptr->record_count,
			 params.format_list) ;
	return SLURM_SUCCESS;
}
Esempio n. 16
0
// Destructor, not sure calling the slurm_free* functios are necessary
ClusterMenu::~ClusterMenu() {
    // Free pointer memory
    slurm_free_node_info_msg(node_info_buffer_ptr);
    slurm_free_job_info_msg(job_info_buffer_ptr);
}
Esempio n. 17
0
static void
slurmdrmaa_job_update_status( fsd_job_t *self )
{
	job_info_msg_t *job_info = NULL;
	slurmdrmaa_job_t * slurm_self = (slurmdrmaa_job_t *) self;
	fsd_log_enter(( "({job_id=%s})", self->job_id ));

	fsd_mutex_lock( &self->session->drm_connection_mutex );
	TRY
	{
		if ( slurm_load_job( &job_info, fsd_atoi(self->job_id), SHOW_ALL) ) {
			int _slurm_errno = slurm_get_errno();

			if (_slurm_errno == ESLURM_INVALID_JOB_ID) {
				self->on_missing(self);
			} else {
				fsd_exc_raise_fmt(FSD_ERRNO_INTERNAL_ERROR,"slurm_load_jobs error: %s,job_id: %s", slurm_strerror(slurm_get_errno()), self->job_id);
			}
		}
		if (job_info) {
			fsd_log_debug(("state = %d, state_reason = %d", job_info->job_array[0].job_state, job_info->job_array[0].state_reason));
			
			switch(job_info->job_array[0].job_state & JOB_STATE_BASE)
			{

				case JOB_PENDING:
					switch(job_info->job_array[0].state_reason)
					{
						#if SLURM_VERSION_NUMBER >= SLURM_VERSION_NUM(2,2,0)
						case WAIT_HELD_USER:   /* job is held by user */
							fsd_log_debug(("interpreting as DRMAA_PS_USER_ON_HOLD"));
							self->state = DRMAA_PS_USER_ON_HOLD;
							break;
						#endif
						case WAIT_HELD:  /* job is held by administrator */
							fsd_log_debug(("interpreting as DRMAA_PS_SYSTEM_ON_HOLD"));
							self->state = DRMAA_PS_SYSTEM_ON_HOLD;
							break;
						default:
							fsd_log_debug(("interpreting as DRMAA_PS_QUEUED_ACTIVE"));
							self->state = DRMAA_PS_QUEUED_ACTIVE;
					}
					break;
				case JOB_RUNNING:
					fsd_log_debug(("interpreting as DRMAA_PS_RUNNING"));
					self->state = DRMAA_PS_RUNNING;
					break;
				case JOB_SUSPENDED:
					if(slurm_self->user_suspended == true) {
						fsd_log_debug(("interpreting as DRMAA_PS_USER_SUSPENDED"));
						self->state = DRMAA_PS_USER_SUSPENDED;
					} else {
						fsd_log_debug(("interpreting as DRMAA_PS_SYSTEM_SUSPENDED"));
						self->state = DRMAA_PS_SYSTEM_SUSPENDED;
					}
					break;
				case JOB_COMPLETE:
					fsd_log_debug(("interpreting as DRMAA_PS_DONE"));
					self->state = DRMAA_PS_DONE;
					self->exit_status = job_info->job_array[0].exit_code;
					fsd_log_debug(("exit_status = %d -> %d",self->exit_status, WEXITSTATUS(self->exit_status)));
					break;
				case JOB_CANCELLED:
					fsd_log_debug(("interpreting as DRMAA_PS_FAILED (aborted)"));
					self->state = DRMAA_PS_FAILED;
					self->exit_status = -1;
				case JOB_FAILED:
				case JOB_TIMEOUT:
				case JOB_NODE_FAIL:
				#if SLURM_VERSION_NUMBER >= SLURM_VERSION_NUM(2,3,0)
				case JOB_PREEMPTED:
				#endif
					fsd_log_debug(("interpreting as DRMAA_PS_FAILED"));
					self->state = DRMAA_PS_FAILED;
					self->exit_status = job_info->job_array[0].exit_code;
					fsd_log_debug(("exit_status = %d -> %d",self->exit_status, WEXITSTATUS(self->exit_status)));
					break;
				default: /*unknown state */
					fsd_log_error(("Unknown job state: %d. Please send bug report: http://apps.man.poznan.pl/trac/slurm-drmaa", job_info->job_array[0].job_state));
			}

			if (job_info->job_array[0].job_state & JOB_STATE_FLAGS & JOB_COMPLETING) {
				fsd_log_debug(("Epilog completing"));
			}

			if (job_info->job_array[0].job_state & JOB_STATE_FLAGS & JOB_CONFIGURING) {
				fsd_log_debug(("Nodes booting"));
			}

			if (self->exit_status == -1) /* input,output,error path failure etc*/
				self->state = DRMAA_PS_FAILED;

			self->last_update_time = time(NULL);
		
			if( self->state >= DRMAA_PS_DONE ) {
				fsd_log_debug(("exit_status = %d, WEXITSTATUS(exit_status) = %d", self->exit_status, WEXITSTATUS(self->exit_status)));
				fsd_cond_broadcast( &self->status_cond );
			}
		}
	}
	FINALLY
	{
		if(job_info != NULL)
			slurm_free_job_info_msg (job_info);

		fsd_mutex_unlock( &self->session->drm_connection_mutex );
	}
	END_TRY
	
	fsd_log_return(( "" ));
}
Esempio n. 18
0
extern void _change_cluster_main(GtkComboBox *combo, gpointer extra)
{
	GtkTreeModel *model;
	display_data_t *display_data;
	GtkTreeIter iter;
	slurmdb_cluster_rec_t *cluster_rec = NULL;
	char *tmp, *ui_description;
	GError *error = NULL;
	GtkWidget *node_tab = NULL;
	int rc;
	bool got_grid = 0;

	if (!gtk_combo_box_get_active_iter(combo, &iter)) {
		g_print("nothing selected\n");
		return;
	}
	model = gtk_combo_box_get_model(combo);
	if (!model) {
		g_print("nothing selected\n");
		return;
	}

	gtk_tree_model_get(model, &iter, 1, &cluster_rec, -1);
	if (!cluster_rec) {
		g_print("no cluster_rec pointer here!");
		return;
	}

	/* From testing it doesn't appear you can get here without a
	   legitimate change, so there isn't a need to check if we are
	   going back to the same cluster we were just at.
	*/
	/* if (working_cluster_rec) { */
	/*	if (!xstrcmp(cluster_rec->name, working_cluster_rec->name)) */
	/*		return; */
	/* } */

	/* free old info under last cluster */
	slurm_free_block_info_msg(g_block_info_ptr);
	g_block_info_ptr = NULL;
	slurm_free_front_end_info_msg(g_front_end_info_ptr);
	g_front_end_info_ptr = NULL;
	slurm_free_burst_buffer_info_msg(g_bb_info_ptr);
	g_bb_info_ptr = NULL;
	slurm_free_job_info_msg(g_job_info_ptr);
	g_job_info_ptr = NULL;
	slurm_free_node_info_msg(g_node_info_ptr);
	g_node_info_ptr = NULL;
	slurm_free_partition_info_msg(g_part_info_ptr);
	g_part_info_ptr = NULL;
	slurm_free_reservation_info_msg(g_resv_info_ptr);
	g_resv_info_ptr = NULL;
	slurm_free_ctl_conf(g_ctl_info_ptr);
	g_ctl_info_ptr = NULL;
	slurm_free_job_step_info_response_msg(g_step_info_ptr);
	g_step_info_ptr = NULL;
	slurm_free_topo_info_msg(g_topo_info_msg_ptr);
	g_topo_info_msg_ptr = NULL;

	/* set up working_cluster_rec */
	if (cluster_dims > 1) {
		/* reset from a multi-dim cluster */
		working_sview_config.grid_x_width =
			default_sview_config.grid_x_width;
		working_sview_config.grid_hori = default_sview_config.grid_hori;
		working_sview_config.grid_vert = default_sview_config.grid_vert;
	}
	gtk_table_set_col_spacings(main_grid_table, 0);
	gtk_table_set_row_spacings(main_grid_table, 0);

	if (!orig_cluster_name)
		orig_cluster_name = slurm_get_cluster_name();
	if (!xstrcmp(cluster_rec->name, orig_cluster_name))
		working_cluster_rec = NULL;
	else
		working_cluster_rec = cluster_rec;
	cluster_dims = slurmdb_setup_cluster_dims();
	cluster_flags = slurmdb_setup_cluster_flags();

	display_data = main_display_data;
	while (display_data++) {
		if (display_data->id == -1)
			break;
		if (cluster_flags & CLUSTER_FLAG_BG) {
			switch(display_data->id) {
			case BLOCK_PAGE:
				display_data->show = true;
				break;
			case NODE_PAGE:
				display_data->name = "Midplanes";
				break;
			default:
				break;
			}
		} else {
			switch(display_data->id) {
			case BLOCK_PAGE:
				display_data->show = false;
				break;
			case NODE_PAGE:
				display_data->name = "Nodes";
				break;
			default:
				break;
			}
		}
	}

	/* set up menu */
	ui_description = _get_ui_description();
	gtk_ui_manager_remove_ui(g_ui_manager, g_menu_id);
	if (!(g_menu_id = gtk_ui_manager_add_ui_from_string(
		      g_ui_manager, ui_description, -1, &error))) {
		xfree(ui_description);
		g_error("building menus failed: %s", error->message);
		g_error_free (error);
		exit (0);
	}
	xfree(ui_description);

	/* make changes for each object */
	cluster_change_block();
	cluster_change_front_end();
	cluster_change_resv();
	cluster_change_part();
	cluster_change_job();
	cluster_change_node();
	cluster_change_bb();

	/* destroy old stuff */
	if (grid_button_list) {
		FREE_NULL_LIST(grid_button_list);
		got_grid = 1;
	}

	select_g_ba_fini();

	/* sorry popups can't survive a cluster change */
	if (popup_list)
		list_flush(popup_list);
	if (signal_params_list)
		list_flush(signal_params_list);
	if (signal_params_list)
		list_flush(signal_params_list);
	if (g_switch_nodes_maps)
		free_switch_nodes_maps(g_switch_nodes_maps);

	/* change the node tab name if needed */
	node_tab = gtk_notebook_get_nth_page(
		GTK_NOTEBOOK(main_notebook), NODE_PAGE);
	node_tab = gtk_notebook_get_tab_label(GTK_NOTEBOOK(main_notebook),
					      node_tab);
#ifdef GTK2_USE_GET_FOCUS

	/* ok, now we have a table which we have set up to contain an
	 * event_box which contains the label we are interested.  We
	 * setup this label to be the focus child of the table, so all
	 * we have to do is grab that and we are set. */
	node_tab = gtk_container_get_focus_child(GTK_CONTAINER(node_tab));
#else
	/* See above comment.  Since gtk_container_get_focus_child
	 * doesn't exist yet we will just traverse the children until
	 * we find the label widget and then break.
	 */
	{
		int i = 0;
		GList *children = gtk_container_get_children(
			GTK_CONTAINER(node_tab));
		while ((node_tab = g_list_nth_data(children, i++))) {
			int j = 0;
			GList *children2 = gtk_container_get_children(
				GTK_CONTAINER(node_tab));
			while ((node_tab = g_list_nth_data(children2, j++))) {
				if (GTK_IS_LABEL(node_tab))
					break;
			}
			g_list_free(children2);
			if (node_tab)
				break;
		}
		g_list_free(children);
	}
#endif
	if (node_tab)
		gtk_label_set_text(GTK_LABEL(node_tab),
				   main_display_data[NODE_PAGE].name);

	/* The name in the visible tabs is easier since it is really
	   just a button with a label on it.
	*/
	if (default_sview_config.page_check_widget[NODE_PAGE]) {
		gtk_button_set_label(GTK_BUTTON(default_sview_config.
						page_check_widget[NODE_PAGE]),
				     main_display_data[NODE_PAGE].name);
	}

	/* reinit */
	rc = get_system_stats(main_grid_table);

	if (rc == SLURM_SUCCESS) {
		/* It turns out if we didn't have the grid (cluster
		   not responding) before the
		   new grid doesn't get set up correctly.  Redoing the
		   system_stats fixes it.  There is probably a better
		   way of doing this, but it doesn't happen very often
		   and isn't that bad to handle every once in a while.
		*/
		if (!got_grid) {
			/* I know we just did this before, but it
			   needs to be done again here.
			*/
			FREE_NULL_LIST(grid_button_list);
			get_system_stats(main_grid_table);
		}

		refresh_main(NULL, NULL);
	}

	tmp = g_strdup_printf("Cluster changed to %s", cluster_rec->name);
	display_edit_note(tmp);
	g_free(tmp);
}
Esempio n. 19
0
extern void get_job(void)
{
	int error_code = -1, i, recs;
	static int printed_jobs = 0;
	static int count = 0;
	static job_info_msg_t *job_info_ptr = NULL, *new_job_ptr = NULL;
	job_info_t *job_ptr = NULL;
	uint16_t show_flags = 0;
	bitstr_t *nodes_req = NULL;
	static uint16_t last_flags = 0;

	if (params.all_flag)
		show_flags |= SHOW_ALL;
	if (job_info_ptr) {
		if (show_flags != last_flags)
			job_info_ptr->last_update = 0;
		error_code = slurm_load_jobs(job_info_ptr->last_update,
					     &new_job_ptr, show_flags);
		if (error_code == SLURM_SUCCESS)
			slurm_free_job_info_msg(job_info_ptr);
		else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) {
			error_code = SLURM_SUCCESS;
			new_job_ptr = job_info_ptr;
		}
	} else
		error_code = slurm_load_jobs((time_t) NULL, &new_job_ptr,
					     show_flags);

	last_flags = show_flags;
	if (error_code) {
		if (quiet_flag != 1) {
			if (!params.commandline) {
				mvwprintw(text_win,
					  main_ycord, 1,
					  "slurm_load_jobs: %s",
					  slurm_strerror(slurm_get_errno()));
				main_ycord++;
			} else {
				printf("slurm_load_jobs: %s\n",
				       slurm_strerror(slurm_get_errno()));
			}
		}
	}

	if (!params.no_header)
		_print_header_job();

	if (new_job_ptr)
		recs = new_job_ptr->record_count;
	else
		recs = 0;

	if (!params.commandline)
		if ((text_line_cnt+printed_jobs) > count)
			text_line_cnt--;
	printed_jobs = 0;
	count = 0;

	if (params.hl)
		nodes_req = get_requested_node_bitmap();
	for (i = 0; i < recs; i++) {
		job_ptr = &(new_job_ptr->job_array[i]);
		if (!IS_JOB_PENDING(job_ptr)   && !IS_JOB_RUNNING(job_ptr) &&
		    !IS_JOB_SUSPENDED(job_ptr) && !IS_JOB_COMPLETING(job_ptr))
			continue;	/* job has completed */
		if (nodes_req) {
			int overlap = 0;
			bitstr_t *loc_bitmap = bit_alloc(bit_size(nodes_req));
			inx2bitstr(loc_bitmap, job_ptr->node_inx);
			overlap = bit_overlap(loc_bitmap, nodes_req);
			FREE_NULL_BITMAP(loc_bitmap);
			if (!overlap)
				continue;
		}

		if (job_ptr->node_inx[0] != -1) {
			int j = 0;
			job_ptr->num_nodes = 0;
			while (job_ptr->node_inx[j] >= 0) {
				job_ptr->num_nodes +=
					(job_ptr->node_inx[j + 1] + 1) -
					 job_ptr->node_inx[j];
				set_grid_inx(job_ptr->node_inx[j],
					     job_ptr->node_inx[j + 1], count);
				j += 2;
			}

			if (!params.commandline) {
				if ((count >= text_line_cnt) &&
				    (printed_jobs < (getmaxy(text_win) - 4))) {
					job_ptr->num_cpus =
						(int)letters[count%62];
					wattron(text_win,
						COLOR_PAIR(colors[count%6]));
					_print_text_job(job_ptr);
					wattroff(text_win,
						 COLOR_PAIR(colors[count%6]));
					printed_jobs++;
				}
			} else {
				job_ptr->num_cpus = (int)letters[count%62];
				_print_text_job(job_ptr);
			}
			count++;
		}
		if (count == 128)
			count = 0;
	}

	for (i = 0; i < recs; i++) {
		job_ptr = &(new_job_ptr->job_array[i]);

		if (!IS_JOB_PENDING(job_ptr))
			continue;	/* job has completed */

		if (!params.commandline) {
			if ((count>=text_line_cnt) &&
			    (printed_jobs < (getmaxy(text_win) - 4))) {
				xfree(job_ptr->nodes);
				job_ptr->nodes = xstrdup("waiting...");
				job_ptr->num_cpus = (int) letters[count%62];
				wattron(text_win,
					COLOR_PAIR(colors[count%6]));
				_print_text_job(job_ptr);
				wattroff(text_win,
					 COLOR_PAIR(colors[count%6]));
				printed_jobs++;
			}
		} else {
			xfree(job_ptr->nodes);
			job_ptr->nodes = xstrdup("waiting...");
			job_ptr->num_cpus = (int) letters[count%62];
			_print_text_job(job_ptr);
			printed_jobs++;
		}
		count++;

		if (count == 128)
			count = 0;
	}

	if (params.commandline && params.iterate)
		printf("\n");

	if (!params.commandline)
		main_ycord++;

	job_info_ptr = new_job_ptr;
	return;
}
/*
 * srun call, the client node connects the allocated node(s)
 */
int slurm_spank_local_user_init (spank_t sp, int ac, char **av)
{
	int status;

	uint32_t jobid;
	uint32_t stepid;
	job_info_msg_t * job_buffer_ptr;
	job_info_t* job_ptr;

	/* only handle interactive usage */
	if ( x11_mode == X11_MODE_NONE || 
	     x11_mode == X11_MODE_BATCH )
		return 0;

	/* check DISPLAY value */
	if ( getenv("DISPLAY") == NULL ) {
		ERROR("x11: no local DISPLAY defined, skipping",jobid);
		return 0;
	}

	/* get job id */
	if ( spank_get_item (sp, S_JOB_ID, &jobid)
	     != ESPANK_SUCCESS ) {
		status = -1;
		goto exit;
	}
	
	/* get job step id */
	if ( spank_get_item (sp, S_JOB_STEPID, &stepid)
	     != ESPANK_SUCCESS ) {
		status = -1;
		goto exit;
	}
	
	/* get job infos */
	status = slurm_load_job(&job_buffer_ptr,jobid,SHOW_ALL);
	if ( status != 0 ) {
		ERROR("x11: unable to get job infos");
		status = -3;
		goto exit;
	}

	/* check infos validity  */
	if ( job_buffer_ptr->record_count != 1 ) {
		ERROR("x11: job infos are invalid");
		status = -4;
		goto clean_exit;
	}
	job_ptr = job_buffer_ptr->job_array;

	/* check allocated nodes var */
	if ( job_ptr->nodes == NULL ) {
		ERROR("x11: job has no allocated nodes defined");
		status = -5;
		goto clean_exit;
	}

	/* connect required nodes */
	status = _x11_connect_nodes(job_ptr->nodes,jobid,stepid);

clean_exit:
	slurm_free_job_info_msg(job_buffer_ptr);

exit:
	return status;
}
int _x11_init_remote_batch(spank_t sp,uint32_t jobid,uint32_t stepid)
{
	int status;

	FILE* f;
	char localhost[256];
	char* cmd_pattern= X11_LIBEXEC_PROG " -u %s -s \"%s\" -o \"%s\" -f %s -d %s -t %s -i %u.%u -cwg %s &";
	char* cmd;
	size_t cmd_length;
	char display[256];
	
        struct passwd user_pwent;
        struct passwd *p_pwent;
        size_t pwent_buffer_length = sysconf(_SC_GETPW_R_SIZE_MAX);
        char pwent_buffer[pwent_buffer_length];
        
	job_info_msg_t * job_buffer_ptr;
	job_info_t* job_ptr;
	
	/*
	 * get current hostname 
	 */
	if ( gethostname(localhost,256) != 0 ) {
		status = -20;
		goto exit;
	}
	
	/*
	 * the batch script inherits the DISPLAY value of the 
	 * submission command. We will use it on the allocation node
	 * for proper establishment of a working X11 ssh tunnel
	 */
	if ( spank_getenv(sp,"DISPLAY",display,256) != ESPANK_SUCCESS ) {
		ERROR("x11: unable to read batch step "
			    "inherited DISPLAY value");
		status = -1;
		goto exit;
	}
	
	/* get job infos */
	status = slurm_load_job(&job_buffer_ptr,jobid,SHOW_ALL);
	if ( status != 0 ) {
		ERROR("x11: unable to get job infos");
		status = -3;
		goto exit;
	}
	
	/* check infos validity  */
	if ( job_buffer_ptr->record_count != 1 ) {
		ERROR("x11: job infos are invalid");
		status = -4;
		goto clean_exit;
	}
	job_ptr = job_buffer_ptr->job_array;
	
	/* get user name */
	status = getpwuid_r(job_ptr->user_id,&user_pwent,pwent_buffer,
			    pwent_buffer_length,&p_pwent) ;
        if (status) {
                error("x11: unable to get username for uid=%u : %s",job_ptr->user_id,
		      strerror(status)) ;
		status = -10;
		goto clean_exit;
        }
	
	/* 
	 * build the command line that will be used to forward the 
	 * alloc node X11 tunnel
	 */
	cmd_length = strlen(cmd_pattern) + 128 ;
	cmd = (char*) malloc(cmd_length*sizeof(char));
	if ( cmd == NULL ||
	     snprintf(cmd,cmd_length,cmd_pattern,user_pwent.pw_name,
		      (ssh_cmd == NULL) ? DEFAULT_SSH_CMD : ssh_cmd,
		      (ssh_args == NULL) ? DEFAULT_SSH_ARGS : ssh_args,
		      job_ptr->alloc_node,display,localhost,jobid,stepid,
		      (helpertask_args == NULL) ? DEFAULT_HELPERTASK_ARGS : helpertask_args) >= cmd_length ) {
		ERROR("x11: error while building cmd");
		status = -2;
	}
	else {
		INFO("x11: batch mode : executing %s",cmd);
	        /* execute the command to retrieve the DISPLAY value to use */
		f = popen(cmd,"r");
		if ( f != NULL ) {
			if ( fscanf(f,"%255s",display) == 1 ) {
				if ( spank_setenv(sp,"DISPLAY",display,1)
				     != ESPANK_SUCCESS ) {
					ERROR("x11: unable to set DISPLAY"
						    " in job env");
					status = -5;
				}
				else {
					INFO("x11: now using DISPLAY=%s",
						   display);
					status=0;
				}
			}
			else {
				ERROR("x11: unable to get a DISPLAY value");
				status = -6;
			}
			pclose(f);
		}
		else {
		        ERROR("x11: unable to exec get cmd '%s'",cmd);
			status = -3;
		}
	}

	if ( cmd != NULL )
		free(cmd);

clean_exit:
	slurm_free_job_info_msg(job_buffer_ptr);
	
exit:
	return status;
}
Esempio n. 22
0
/* Return the current time limit of the specified job_id or NO_VAL if the
 * information is not available */
static uint32_t _get_job_time(const char *job_id_str)
{
	uint32_t job_id, task_id;
	char *next_str = NULL;
	uint32_t time_limit = NO_VAL;
	int i, rc;
	job_info_msg_t *resp;
	bitstr_t *array_bitmap;

	job_id = (uint32_t)strtol(job_id_str, &next_str, 10);
	if (next_str[0] == '_') {
		task_id = (uint32_t)strtol(next_str+1, &next_str, 10);
		if (next_str[0] != '\0') {
			error("Invalid job ID %s", job_id_str);
			return time_limit;
		}
	} else if (next_str[0] != '\0') {
		error("Invalid job ID %s", job_id_str);
		return time_limit;
	} else {
		task_id = NO_VAL;
	}

	rc = slurm_load_job(&resp, job_id, SHOW_ALL);
	if (rc == SLURM_SUCCESS) {
		if (resp->record_count == 0) {
			error("Job ID %s not found", job_id_str);
			slurm_free_job_info_msg(resp);
			return time_limit;
		}
		if ((resp->record_count > 1) && (task_id == NO_VAL)) {
			error("TimeLimit increment/decrement not supported "
			      "for job arrays");
			slurm_free_job_info_msg(resp);
			return time_limit;
		}
		for (i = 0; i < resp->record_count; i++) {
			if ((resp->job_array[i].job_id == job_id) &&
			    (resp->job_array[i].array_task_id == NO_VAL) &&
			    (resp->job_array[i].array_bitmap == NULL)) {
				/* Regular job match */
				time_limit = resp->job_array[i].time_limit;
				break;
			}
			if (resp->job_array[i].array_job_id != job_id)
				continue;
			array_bitmap = (bitstr_t *)
				       resp->job_array[i].array_bitmap;
			if ((task_id == NO_VAL) ||
			    (resp->job_array[i].array_task_id == task_id) ||
			    (array_bitmap &&
			     (task_id < bit_size(array_bitmap)) &&
			     bit_test(array_bitmap, task_id))) {
				/* Array job with task_id match */
				time_limit = resp->job_array[i].time_limit;
				break;
			}
		}
		slurm_free_job_info_msg(resp);
	} else {
		error("Could not load state information for job %s: %m",
		      job_id_str);
	}

	return time_limit;
}
Esempio n. 23
0
/* _get_job_ids()
 */
static uint32_t *
_get_job_ids(const char *jobid, uint32_t *num_ids)
{
	job_info_msg_t *job_info;
	uint32_t *job_ids;
	uint32_t task_id;
	int i;
	int cc;

	task_id = 0;
	job_info = _get_job_info(jobid, &task_id);
	if (job_info == NULL)
		return NULL;

	if (_is_array_task_id(jobid)) {

		job_ids = xmalloc(sizeof(uint32_t));
		*num_ids = 1;

		/* Search for the job_id of the specified
		 * task.
		 */
		for (cc = 0; cc < job_info->record_count; cc++) {
			if (task_id == job_info->job_array[cc].array_task_id) {
				job_ids[0] = job_info->job_array[cc].job_id;
				break;
			}
		}

		slurm_free_job_info_msg(job_info);
		return job_ids;
	}

	if (job_info->record_count == 1) {
		/* No task elements beside the
		 * job itself so it cannot be
		 * a job array.
		 */
		job_ids = xmalloc(sizeof(uint32_t));
		*num_ids = 1;
		job_ids[0] = job_info->job_array[0].job_id;
		slurm_free_job_info_msg(job_info);

		return job_ids;
	}

	*num_ids = job_info->record_count;
	job_ids = xmalloc((*num_ids) * sizeof(uint32_t));
	/* First save the pending jobs
	 */
	i = 0;
	for (cc = 0; cc < job_info->record_count; cc++) {
		if (job_info->job_array[cc].job_state == JOB_PENDING) {
			job_ids[i] = job_info->job_array[cc].job_id;
			++i;
		}
	}
	/* then the rest of the states
	 */
	for (cc = 0; cc < job_info->record_count; cc++) {
		if (job_info->job_array[cc].job_state != JOB_PENDING) {
			job_ids[i] = job_info->job_array[cc].job_id;
			++i;
		}
	}

	xassert(i == *num_ids);
	slurm_free_job_info_msg(job_info);

	return job_ids;
}