Ejemplo n.º 1
0
/* Create a reservation for a job in the future */
static void _add_reservation(uint32_t start_time, uint32_t end_reserve,
                             bitstr_t *res_bitmap,
                             node_space_map_t *node_space,
                             int *node_space_recs)
{
    bool placed = false;
    int i, j;

    for (j=0; ; ) {
        if (node_space[j].end_time > start_time) {
            /* insert start entry record */
            i = *node_space_recs;
            node_space[i].begin_time = start_time;
            node_space[i].end_time = node_space[j].end_time;
            node_space[j].end_time = start_time;
            node_space[i].avail_bitmap =
                bit_copy(node_space[j].avail_bitmap);
            node_space[i].next = node_space[j].next;
            node_space[j].next = i;
            (*node_space_recs)++;
            placed = true;
        }
        if (node_space[j].end_time == start_time) {
            /* no need to insert new start entry record */
            placed = true;
        }
        if (placed == true) {
            j = node_space[j].next;
            if (j && (end_reserve < node_space[j].end_time)) {
                /* insert end entry record */
                i = *node_space_recs;
                node_space[i].begin_time = end_reserve;
                node_space[i].end_time = node_space[j].
                                         end_time;
                node_space[j].end_time = end_reserve;
                node_space[i].avail_bitmap =
                    bit_copy(node_space[j].avail_bitmap);
                node_space[i].next = node_space[j].next;
                node_space[j].next = i;
                (*node_space_recs)++;
            }
            break;
        }
        if ((j = node_space[j].next) == 0)
            break;
    }

    for (j=0; ; ) {
        if ((node_space[j].begin_time >= start_time) &&
                (node_space[j].end_time <= end_reserve))
            bit_and(node_space[j].avail_bitmap, res_bitmap);
        if ((node_space[j].begin_time >= end_reserve) ||
                ((j = node_space[j].next) == 0))
            break;
    }
}
Ejemplo n.º 2
0
/* Try to start the job on any non-reserved nodes */
static int _start_job(struct job_record *job_ptr, bitstr_t *resv_bitmap)
{
	int rc;
	bitstr_t *orig_exc_nodes = NULL;
	static uint32_t fail_jobid = 0;

	if (job_ptr->details->exc_node_bitmap) {
		orig_exc_nodes = bit_copy(job_ptr->details->exc_node_bitmap);
		bit_or(job_ptr->details->exc_node_bitmap, resv_bitmap);
	} else
		job_ptr->details->exc_node_bitmap = bit_copy(resv_bitmap);

	rc = select_nodes(job_ptr, false, NULL);
	if (job_ptr->details) { /* select_nodes() might cancel the job! */
		FREE_NULL_BITMAP(job_ptr->details->exc_node_bitmap);
		job_ptr->details->exc_node_bitmap = orig_exc_nodes;
	} else
		FREE_NULL_BITMAP(orig_exc_nodes);
	if (rc == SLURM_SUCCESS) {
		/* job initiated */
		last_job_update = time(NULL);
		info("backfill: Started JobId=%u on %s",
		     job_ptr->job_id, job_ptr->nodes);
		if (job_ptr->batch_flag == 0)
			srun_allocate(job_ptr->job_id);
		else if ((job_ptr->details == NULL) ||
			 (job_ptr->details->prolog_running == 0))
			launch_job(job_ptr);
		slurmctld_diag_stats.backfilled_jobs++;
		slurmctld_diag_stats.last_backfilled_jobs++;
		if (debug_flags & DEBUG_FLAG_BACKFILL) {
			info("backfill: Jobs backfilled since boot: %u",
			     slurmctld_diag_stats.backfilled_jobs);
		}
	} else if ((job_ptr->job_id != fail_jobid) &&
		   (rc != ESLURM_ACCOUNTING_POLICY)) {
		char *node_list;
		bit_not(resv_bitmap);
		node_list = bitmap2node_name(resv_bitmap);
		/* This happens when a job has sharing disabled and
		 * a selected node is still completing some job,
		 * which should be a temporary situation. */
		verbose("backfill: Failed to start JobId=%u on %s: %s",
			job_ptr->job_id, node_list, slurm_strerror(rc));
		xfree(node_list);
		fail_jobid = job_ptr->job_id;
	} else {
		debug3("backfill: Failed to start JobId=%u: %s",
		       job_ptr->job_id, slurm_strerror(rc));
	}

	return rc;
}
Ejemplo n.º 3
0
/* Distribute MPS Count to records on original list */
static void _distribute_count(List gres_conf_list, List gpu_conf_list,
			      uint64_t count)
{
	ListIterator gpu_itr;
	gres_slurmd_conf_t *gpu_record, *mps_record;
	int rem_gpus = list_count(gpu_conf_list);

	gpu_itr = list_iterator_create(gpu_conf_list);
	while ((gpu_record = list_next(gpu_itr))) {
		mps_record = xmalloc(sizeof(gres_slurmd_conf_t));
		mps_record->config_flags = gpu_record->config_flags;
		mps_record->count = count / rem_gpus;
		count -= mps_record->count;
		rem_gpus--;
		mps_record->cpu_cnt = gpu_record->cpu_cnt;
		mps_record->cpus = xstrdup(gpu_record->cpus);
		if (gpu_record->cpus_bitmap) {
			mps_record->cpus_bitmap =
				bit_copy(gpu_record->cpus_bitmap);
		}
		mps_record->file = xstrdup(gpu_record->file);
		mps_record->name = xstrdup("mps");
		mps_record->plugin_id = gres_plugin_build_id("mps");
		mps_record->type_name = xstrdup(gpu_record->type_name);
		list_append(gres_conf_list, mps_record);

		list_append(gres_conf_list, gpu_record);
		(void) list_remove(gpu_itr);
	}
	list_iterator_destroy(gpu_itr);
}
Ejemplo n.º 4
0
Archivo: gang.c Proyecto: corburn/slurm
/* Return 1 if job fits in this row, else return 0 */
static int _job_fits_in_active_row(struct job_record *job_ptr,
				   struct gs_part *p_ptr)
{
	job_resources_t *job_res = job_ptr->job_resrcs;
	int count;
	bitstr_t *job_map;
	uint16_t job_gr_type;

	if ((p_ptr->active_resmap == NULL) || (p_ptr->jobs_active == 0))
		return 1;

	job_gr_type = _get_part_gr_type(job_ptr->part_ptr);
	if ((job_gr_type == GS_CPU2) || (job_gr_type == GS_CORE) ||
	    (job_gr_type == GS_SOCKET)) {
		return job_fits_into_cores(job_res, p_ptr->active_resmap,
					   gs_bits_per_node);
	}

	/* job_gr_type == GS_NODE || job_gr_type == GS_CPU */
	job_map = bit_copy(job_res->node_bitmap);
	bit_and(job_map, p_ptr->active_resmap);
	/* any set bits indicate contention for the same resource */
	count = bit_set_count(job_map);
	if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG)
		info("gang: _job_fits_in_active_row: %d bits conflict", count);
	FREE_NULL_BITMAP(job_map);
	if (count == 0)
		return 1;
	if (job_gr_type == GS_CPU) {
		/* For GS_CPU we check the CPU arrays */
		return _can_cpus_fit(job_ptr, p_ptr);
	}

	return 0;
}
Ejemplo n.º 5
0
int powercap_get_job_optimal_cpufreq(uint32_t powercap, int *allowed_freqs)
{
	uint32_t cur_max_watts = 0, *tmp_max_watts_dvfs = NULL;
	int k = 1;
	bitstr_t *tmp_bitmap = NULL;

	if (!_powercap_enabled())
		return 0;

	tmp_max_watts_dvfs = xmalloc(sizeof(uint32_t) * (allowed_freqs[0]+1));
	tmp_bitmap = bit_copy(idle_node_bitmap);
	bit_not(tmp_bitmap);

	cur_max_watts = powercap_get_node_bitmap_maxwatts_dvfs(tmp_bitmap,
			  idle_node_bitmap,tmp_max_watts_dvfs,allowed_freqs,0);

	if (cur_max_watts > powercap) {
		while (tmp_max_watts_dvfs[k] > powercap &&
		      k < allowed_freqs[0] + 1) {
			k++;
		}
		if (k == allowed_freqs[0] + 1)
			k--;
	} else
		k = 1;

	return k;
}
Ejemplo n.º 6
0
/*
 * Build record used to set environment variables as appropriate for a job's
 * prolog or epilog based GRES allocated to the job.
 */
extern gres_epilog_info_t *epilog_build_env(gres_job_state_t *gres_job_ptr)
{
	int i;
	gres_epilog_info_t *epilog_info;

	epilog_info = xmalloc(sizeof(gres_epilog_info_t));
	epilog_info->node_cnt = gres_job_ptr->node_cnt;
	epilog_info->gres_bit_alloc = xcalloc(epilog_info->node_cnt,
					      sizeof(bitstr_t *));
	epilog_info->gres_cnt_node_alloc = xcalloc(epilog_info->node_cnt,
					      sizeof(uint64_t));
	for (i = 0; i < epilog_info->node_cnt; i++) {
		if (gres_job_ptr->gres_bit_alloc &&
		    gres_job_ptr->gres_bit_alloc[i]) {
			epilog_info->gres_bit_alloc[i] =
				bit_copy(gres_job_ptr->gres_bit_alloc[i]);
		}
		if (gres_job_ptr->gres_bit_alloc &&
		    gres_job_ptr->gres_bit_alloc[i]) {
			epilog_info->gres_cnt_node_alloc[i] =
				gres_job_ptr->gres_cnt_node_alloc[i];
		}
	}

	return epilog_info;
}
Ejemplo n.º 7
0
static void _add_config_feature(char *feature, bitstr_t *node_bitmap)
{
	struct features_record *feature_ptr;
	ListIterator feature_iter;
	bool match = false;

	/* If feature already exists in feature_list, just update the bitmap */
	feature_iter = list_iterator_create(feature_list);
	while ((feature_ptr = (struct features_record *)
			list_next(feature_iter))) {
		if (strcmp(feature, feature_ptr->name))
			continue;
		bit_or(feature_ptr->node_bitmap, node_bitmap);
		match = true;
		break;
	}
	list_iterator_destroy(feature_iter);

	if (!match) {	/* Need to create new feature_list record */
		feature_ptr = xmalloc(sizeof(struct features_record));
		feature_ptr->magic = FEATURE_MAGIC;
		feature_ptr->name = xstrdup(feature);
		feature_ptr->node_bitmap = bit_copy(node_bitmap);
		list_append(feature_list, feature_ptr);
	}
}
Ejemplo n.º 8
0
static unsigned buffer_write(struct mpsse_ctx *ctx, const uint8_t *out, unsigned out_offset,
	unsigned bit_count)
{
	DEBUG_IO("%d bits", bit_count);
	assert(ctx->write_count + DIV_ROUND_UP(bit_count, 8) <= ctx->write_size);
	bit_copy(ctx->write_buffer + ctx->write_count, 0, out, out_offset, bit_count);
	ctx->write_count += DIV_ROUND_UP(bit_count, 8);
	return bit_count;
}
Ejemplo n.º 9
0
/*
 * Convert all GPU records to a new entries in a list where each File is a
 * unique device (i.e. convert a record with "File=nvidia[0-3]" into 4 separate
 * records).
 */
static List _build_gpu_list(List gres_list)
{
	ListIterator itr;
	gres_slurmd_conf_t *gres_record, *gpu_record;
	List gpu_list;
	hostlist_t hl;
	char *f_name;
	bool log_fname = true;

	if (gres_list == NULL)
		return NULL;

	gpu_list = list_create(_delete_gres_list);
	itr = list_iterator_create(gres_list);
	while ((gres_record = list_next(itr))) {
		if (xstrcmp(gres_record->name, "gpu"))
			continue;
		if (!gres_record->file) {
			if (log_fname) {
				error("%s: GPU configuration lacks \"File\" specification",
				      plugin_name);
				log_fname = false;
			}
			continue;
		}
		hl = hostlist_create(gres_record->file);
		while ((f_name = hostlist_shift(hl))) {
			gpu_record = xmalloc(sizeof(gres_slurmd_conf_t));
			gpu_record->config_flags = gres_record->config_flags;
			if (gres_record->type_name) {
				gpu_record->config_flags |=
					GRES_CONF_HAS_TYPE;
			}
			gpu_record->count = 1;
			gpu_record->cpu_cnt = gres_record->cpu_cnt;
			gpu_record->cpus = xstrdup(gres_record->cpus);
			if (gres_record->cpus_bitmap) {
				gpu_record->cpus_bitmap =
					bit_copy(gres_record->cpus_bitmap);
			}
			gpu_record->file = xstrdup(f_name);
			gpu_record->links = xstrdup(gres_record->links);
			gpu_record->name = xstrdup(gres_record->name);
			gpu_record->plugin_id = gres_record->plugin_id;
			gpu_record->type_name = xstrdup(gres_record->type_name);
			list_append(gpu_list, gpu_record);
			free(f_name);
		}
		hostlist_destroy(hl);
		(void) list_delete_item(itr);
	}
	list_iterator_destroy(itr);

	return gpu_list;
}
Ejemplo n.º 10
0
uint32_t powercap_get_node_bitmap_maxwatts(bitstr_t *idle_bitmap)
{
	uint32_t max_watts = 0, val;
	struct node_record *node_ptr;
	int i;
	bitstr_t *tmp_bitmap = NULL;

	if (!_powercap_enabled())
		return 0;
	if (!power_layout_ready())
		return 0;

	/* if no input bitmap, consider the current idle nodes 
	 * bitmap as the input bitmap tagging nodes to consider 
	 * as idle while computing the max watts of the cluster */
	if (idle_bitmap == NULL) {
		tmp_bitmap = bit_copy(idle_node_bitmap);
		idle_bitmap = tmp_bitmap;
	}

	for (i = 0, node_ptr = node_record_table_ptr; i < node_record_count;
	     i++, node_ptr++) {
		/* non reserved node, evaluate the different cases */
		if (bit_test(idle_bitmap, i)) {
			 /* idle nodes, 2 cases : power save or not */
			if (bit_test(power_node_bitmap, i)) {
				layouts_entity_pullget_kv(L_NAME,
						node_ptr->name, L_NODE_SAVE,
						&val, L_T_UINT32);
			} else {
				layouts_entity_pullget_kv(L_NAME,
						node_ptr->name, L_NODE_IDLE,
						&val, L_T_UINT32);
			}
		} else {
			/* non idle nodes, 2 cases : down or not */
			if (!bit_test(up_node_bitmap, i)) {
				layouts_entity_pullget_kv(L_NAME, 
						node_ptr->name, L_NODE_DOWN,
						&val, L_T_UINT32);
			} else {
				layouts_entity_pullget_kv(L_NAME,
						node_ptr->name, L_NODE_MAX,
						&val, L_T_UINT32);
			}
		}
		max_watts += val;	
	}

	if (tmp_bitmap)
		bit_free(tmp_bitmap);

	return max_watts;
}
Ejemplo n.º 11
0
/* copy a select job credential
 * IN jobinfo - the select job credential to be copied
 * RET        - the copy or NULL on failure
 * NOTE: returned value must be freed using free_jobinfo
 */
extern select_jobinfo_t *copy_select_jobinfo(select_jobinfo_t *jobinfo)
{
	struct select_jobinfo *rc = NULL;

	if (jobinfo == NULL)
		;
	else if (jobinfo->magic != JOBINFO_MAGIC)
		error("copy_jobinfo: jobinfo magic bad");
	else {
		rc = xmalloc(sizeof(struct select_jobinfo));
		rc->dim_cnt = jobinfo->dim_cnt;
		memcpy(rc->geometry, jobinfo->geometry, sizeof(rc->geometry));
		memcpy(rc->conn_type, jobinfo->conn_type,
		       sizeof(rc->conn_type));
		memcpy(rc->start_loc, jobinfo->start_loc,
		       sizeof(rc->start_loc));
		rc->reboot = jobinfo->reboot;
		rc->rotate = jobinfo->rotate;
		rc->bg_record = jobinfo->bg_record;
		rc->bg_block_id = xstrdup(jobinfo->bg_block_id);
		rc->magic = JOBINFO_MAGIC;
		rc->mp_str = xstrdup(jobinfo->mp_str);
		rc->ionode_str = xstrdup(jobinfo->ionode_str);
		rc->block_cnode_cnt = jobinfo->block_cnode_cnt;
		rc->cleaning = jobinfo->cleaning;
		rc->cnode_cnt = jobinfo->cnode_cnt;
		rc->altered = jobinfo->altered;
		rc->blrtsimage = xstrdup(jobinfo->blrtsimage);
		rc->linuximage = xstrdup(jobinfo->linuximage);
		rc->mloaderimage = xstrdup(jobinfo->mloaderimage);
		rc->ramdiskimage = xstrdup(jobinfo->ramdiskimage);
		if (jobinfo->units_avail)
			rc->units_avail = bit_copy(jobinfo->units_avail);
		if (jobinfo->units_used)
			rc->units_used = bit_copy(jobinfo->units_used);
		rc->user_name = xstrdup(jobinfo->user_name);
	}

	return rc;
}
Ejemplo n.º 12
0
/* Clear active_feature_list,
 * then copy avail_feature_list into active_feature_list */
static void _copy_feature_list(void)
{
	node_feature_t *active_feature_ptr, *avail_feature_ptr;
	ListIterator feature_iter;

	(void) list_delete_all(active_feature_list, &_list_find_feature, NULL);

	feature_iter = list_iterator_create(avail_feature_list);
	while ((avail_feature_ptr = (node_feature_t *)list_next(feature_iter))){
		active_feature_ptr = xmalloc(sizeof(node_feature_t));
		active_feature_ptr->magic = FEATURE_MAGIC;
		active_feature_ptr->name = xstrdup(avail_feature_ptr->name);
		active_feature_ptr->node_bitmap =
			bit_copy(avail_feature_ptr->node_bitmap);
		list_append(active_feature_list, active_feature_ptr);
	}
	list_iterator_destroy(feature_iter);
}
Ejemplo n.º 13
0
static void _build_select_struct(struct job_record *job_ptr,
				 bitstr_t *bitmap, uint32_t node_cnt)
{
	int i;
	uint32_t total_cpus = 0;
	job_resources_t *job_resrcs_ptr;

	xassert(job_ptr);

	if (job_ptr->job_resrcs) {
		error("select_p_job_test: already have select_job");
		free_job_resources(&job_ptr->job_resrcs);
	}

	job_ptr->job_resrcs = job_resrcs_ptr = create_job_resources();
	job_resrcs_ptr->cpu_array_reps = xmalloc(sizeof(uint32_t));
	job_resrcs_ptr->cpu_array_value = xmalloc(sizeof(uint16_t));
	job_resrcs_ptr->cpus = xmalloc(sizeof(uint16_t) * node_cnt);
	job_resrcs_ptr->cpus_used = xmalloc(sizeof(uint16_t) * node_cnt);
/* 	job_resrcs_ptr->nhosts = node_cnt; */
	job_resrcs_ptr->nhosts = bit_set_count(bitmap);
	job_resrcs_ptr->ncpus = job_ptr->details->min_cpus;
	job_resrcs_ptr->node_bitmap = bit_copy(bitmap);
	job_resrcs_ptr->nodes = bitmap2node_name(bitmap);
	if (job_resrcs_ptr->node_bitmap == NULL)
		fatal("bit_copy malloc failure");

	job_resrcs_ptr->cpu_array_cnt = 1;
	if (job_ptr->details->min_cpus < bg_conf->cpus_per_mp)
		job_resrcs_ptr->cpu_array_value[0] = job_ptr->details->min_cpus;
	else
		job_resrcs_ptr->cpu_array_value[0] = bg_conf->cpus_per_mp;
	job_resrcs_ptr->cpu_array_reps[0] = node_cnt;
	total_cpus = bg_conf->cpu_ratio * node_cnt;

	for (i=0; i<node_cnt; i++)
		job_resrcs_ptr->cpus[i] = bg_conf->cpu_ratio;

	if (job_resrcs_ptr->ncpus != total_cpus) {
		error("select_p_job_test: ncpus mismatch %u != %u",
		      job_resrcs_ptr->ncpus, total_cpus);
	}
}
Ejemplo n.º 14
0
static int _attempt_backfill(void)
{
	DEF_TIMERS;
	bool filter_root = false;
	List job_queue;
	job_queue_rec_t *job_queue_rec;
	slurmdb_qos_rec_t *qos_ptr = NULL;
	int i, j, node_space_recs;
	struct job_record *job_ptr;
	struct part_record *part_ptr;
	uint32_t end_time, end_reserve;
	uint32_t time_limit, comp_time_limit, orig_time_limit;
	uint32_t min_nodes, max_nodes, req_nodes;
	bitstr_t *avail_bitmap = NULL, *resv_bitmap = NULL;
	bitstr_t *exc_core_bitmap = NULL;
	time_t now, sched_start, later_start, start_res, resv_end;
	node_space_map_t *node_space;
	struct timeval bf_time1, bf_time2;
	int sched_timeout = 2, yield_sleep = 1;
	int rc = 0;
	int job_test_count = 0;
	uint32_t *uid = NULL, nuser = 0;
	uint16_t *njobs = NULL;
	bool already_counted;
	uint32_t reject_array_job_id = 0;

#ifdef HAVE_CRAY
	/*
	 * Run a Basil Inventory immediately before setting up the schedule
	 * plan, to avoid race conditions caused by ALPS node state change.
	 * Needs to be done with the node-state lock taken.
	 */
	START_TIMER;
	if (select_g_reconfigure()) {
		debug4("backfill: not scheduling due to ALPS");
		return SLURM_SUCCESS;
	}
	END_TIMER;
	if (debug_flags & DEBUG_FLAG_BACKFILL)
		info("backfill: ALPS inventory completed, %s", TIME_STR);

	/* The Basil inventory can take a long time to complete. Process
	 * pending RPCs before starting the backfill scheduling logic */
	_yield_locks(1);
#endif

	START_TIMER;
	if (debug_flags & DEBUG_FLAG_BACKFILL)
		info("backfill: beginning");
	sched_start = now = time(NULL);

	if (slurm_get_root_filter())
		filter_root = true;

	job_queue = build_job_queue(true);
	if (list_count(job_queue) == 0) {
		debug("backfill: no jobs to backfill");
		list_destroy(job_queue);
		return 0;
	}

	gettimeofday(&bf_time1, NULL);

	slurmctld_diag_stats.bf_queue_len = list_count(job_queue);
	slurmctld_diag_stats.bf_queue_len_sum += slurmctld_diag_stats.
						 bf_queue_len;
	slurmctld_diag_stats.bf_last_depth = 0;
	slurmctld_diag_stats.bf_last_depth_try = 0;
	slurmctld_diag_stats.bf_when_last_cycle = now;
	bf_last_yields = 0;
	slurmctld_diag_stats.bf_active = 1;

	node_space = xmalloc(sizeof(node_space_map_t) *
			     (max_backfill_job_cnt + 3));
	node_space[0].begin_time = sched_start;
	node_space[0].end_time = sched_start + backfill_window;
	node_space[0].avail_bitmap = bit_copy(avail_node_bitmap);
	node_space[0].next = 0;
	node_space_recs = 1;
	if (debug_flags & DEBUG_FLAG_BACKFILL)
		_dump_node_space_table(node_space);

	if (max_backfill_job_per_user) {
		uid = xmalloc(BF_MAX_USERS * sizeof(uint32_t));
		njobs = xmalloc(BF_MAX_USERS * sizeof(uint16_t));
	}
	while ((job_queue_rec = (job_queue_rec_t *)
				list_pop_bottom(job_queue, sort_job_queue2))) {
		job_ptr  = job_queue_rec->job_ptr;
		orig_time_limit = job_ptr->time_limit;

		if ((time(NULL) - sched_start) >= sched_timeout) {
			uint32_t save_time_limit = job_ptr->time_limit;
			job_ptr->time_limit = orig_time_limit;
			if (debug_flags & DEBUG_FLAG_BACKFILL) {
				END_TIMER;
				info("backfill: completed yielding locks "
				     "after testing %d jobs, %s",
				     job_test_count, TIME_STR);
			}
			if (_yield_locks(yield_sleep) && !backfill_continue) {
				if (debug_flags & DEBUG_FLAG_BACKFILL) {
					info("backfill: system state changed, "
					     "breaking out after testing %d "
					     "jobs", job_test_count);
				}
				rc = 1;
				break;
			}
			job_ptr->time_limit = save_time_limit;
			/* Reset backfill scheduling timers, resume testing */
			sched_start = time(NULL);
			job_test_count = 0;
			START_TIMER;
		}

		part_ptr = job_queue_rec->part_ptr;
		job_test_count++;

		xfree(job_queue_rec);
		if (!IS_JOB_PENDING(job_ptr))
			continue;	/* started in other partition */
		if (!avail_front_end(job_ptr))
			continue;	/* No available frontend for this job */
		if (job_ptr->array_task_id != (uint16_t) NO_VAL) {
			if (reject_array_job_id == job_ptr->array_job_id)
				continue;  /* already rejected array element */
			/* assume reject whole array for now, clear if OK */
			reject_array_job_id = job_ptr->array_job_id;
		}
		job_ptr->part_ptr = part_ptr;

		if (debug_flags & DEBUG_FLAG_BACKFILL)
			info("backfill test for job %u", job_ptr->job_id);

		slurmctld_diag_stats.bf_last_depth++;
		already_counted = false;

		if (max_backfill_job_per_user) {
			for (j = 0; j < nuser; j++) {
				if (job_ptr->user_id == uid[j]) {
					njobs[j]++;
					if (debug_flags & DEBUG_FLAG_BACKFILL)
						debug("backfill: user %u: "
						      "#jobs %u",
						      uid[j], njobs[j]);
					break;
				}
			}
			if (j == nuser) { /* user not found */
				if (nuser < BF_MAX_USERS) {
					uid[j] = job_ptr->user_id;
					njobs[j] = 1;
					nuser++;
				} else {
					error("backfill: too many users in "
					      "queue. Consider increasing "
					      "BF_MAX_USERS");
				}
				if (debug_flags & DEBUG_FLAG_BACKFILL)
					debug2("backfill: found new user %u. "
					       "Total #users now %u",
					       job_ptr->user_id, nuser);
			} else {
				if (njobs[j] > max_backfill_job_per_user) {
					/* skip job */
					if (debug_flags & DEBUG_FLAG_BACKFILL)
						debug("backfill: have already "
						      "checked %u jobs for "
						      "user %u; skipping "
						      "job %u",
						      max_backfill_job_per_user,
						      job_ptr->user_id,
						      job_ptr->job_id);
					continue;
				}
			}
		}

		if (((part_ptr->state_up & PARTITION_SCHED) == 0) ||
		    (part_ptr->node_bitmap == NULL))
		 	continue;
		if ((part_ptr->flags & PART_FLAG_ROOT_ONLY) && filter_root)
			continue;

		if ((!job_independent(job_ptr, 0)) ||
		    (license_job_test(job_ptr, time(NULL)) != SLURM_SUCCESS))
			continue;

		/* Determine minimum and maximum node counts */
		min_nodes = MAX(job_ptr->details->min_nodes,
				part_ptr->min_nodes);
		if (job_ptr->details->max_nodes == 0)
			max_nodes = part_ptr->max_nodes;
		else
			max_nodes = MIN(job_ptr->details->max_nodes,
					part_ptr->max_nodes);
		max_nodes = MIN(max_nodes, 500000);     /* prevent overflows */
		if (job_ptr->details->max_nodes)
			req_nodes = max_nodes;
		else
			req_nodes = min_nodes;
		if (min_nodes > max_nodes) {
			/* job's min_nodes exceeds partition's max_nodes */
			continue;
		}

		/* Determine job's expected completion time */
		if (job_ptr->time_limit == NO_VAL) {
			if (part_ptr->max_time == INFINITE)
				time_limit = 365 * 24 * 60; /* one year */
			else
				time_limit = part_ptr->max_time;
		} else {
			if (part_ptr->max_time == INFINITE)
				time_limit = job_ptr->time_limit;
			else
				time_limit = MIN(job_ptr->time_limit,
						 part_ptr->max_time);
		}
		comp_time_limit = time_limit;
		qos_ptr = job_ptr->qos_ptr;
		if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE) &&
		    slurm_get_preempt_mode())
			time_limit = job_ptr->time_limit = 1;
		else if (job_ptr->time_min && (job_ptr->time_min < time_limit))
			time_limit = job_ptr->time_limit = job_ptr->time_min;

		/* Determine impact of any resource reservations */
		later_start = now;
 TRY_LATER:
		if ((time(NULL) - sched_start) >= sched_timeout) {
			uint32_t save_time_limit = job_ptr->time_limit;
			job_ptr->time_limit = orig_time_limit;
			if (debug_flags & DEBUG_FLAG_BACKFILL) {
				END_TIMER;
				info("backfill: completed yielding locks 2"
				     "after testing %d jobs, %s",
				     job_test_count, TIME_STR);
			}
			if (_yield_locks(yield_sleep) && !backfill_continue) {
				if (debug_flags & DEBUG_FLAG_BACKFILL) {
					info("backfill: system state changed, "
					     "breaking out after testing %d "
					     "jobs", job_test_count);
				}
				rc = 1;
				break;
			}
			job_ptr->time_limit = save_time_limit;
			/* Reset backfill scheduling timers, resume testing */
			sched_start = time(NULL);
			job_test_count = 1;
			START_TIMER;
		}

		FREE_NULL_BITMAP(avail_bitmap);
		FREE_NULL_BITMAP(exc_core_bitmap);
		start_res   = later_start;
		later_start = 0;
		j = job_test_resv(job_ptr, &start_res, true, &avail_bitmap,
				  &exc_core_bitmap);
		if (j != SLURM_SUCCESS) {
			job_ptr->time_limit = orig_time_limit;
			continue;
		}
		if (start_res > now)
			end_time = (time_limit * 60) + start_res;
		else
			end_time = (time_limit * 60) + now;
		resv_end = find_resv_end(start_res);
		/* Identify usable nodes for this job */
		bit_and(avail_bitmap, part_ptr->node_bitmap);
		bit_and(avail_bitmap, up_node_bitmap);
		for (j=0; ; ) {
			if ((node_space[j].end_time > start_res) &&
			     node_space[j].next && (later_start == 0))
				later_start = node_space[j].end_time;
			if (node_space[j].end_time <= start_res)
				;
			else if (node_space[j].begin_time <= end_time) {
				bit_and(avail_bitmap,
					node_space[j].avail_bitmap);
			} else
				break;
			if ((j = node_space[j].next) == 0)
				break;
		}
		if ((resv_end++) &&
		    ((later_start == 0) || (resv_end < later_start))) {
			later_start = resv_end;
		}

		if (job_ptr->details->exc_node_bitmap) {
			bit_not(job_ptr->details->exc_node_bitmap);
			bit_and(avail_bitmap,
				job_ptr->details->exc_node_bitmap);
			bit_not(job_ptr->details->exc_node_bitmap);
		}

		/* Test if insufficient nodes remain OR
		 *	required nodes missing OR
		 *	nodes lack features */
		if ((bit_set_count(avail_bitmap) < min_nodes) ||
		    ((job_ptr->details->req_node_bitmap) &&
		     (!bit_super_set(job_ptr->details->req_node_bitmap,
				     avail_bitmap))) ||
		    (job_req_node_filter(job_ptr, avail_bitmap))) {
			if (later_start) {
				job_ptr->start_time = 0;
				goto TRY_LATER;
			}
			/* Job can not start until too far in the future */
			job_ptr->time_limit = orig_time_limit;
			job_ptr->start_time = sched_start + backfill_window;
			continue;
		}

		/* Identify nodes which are definitely off limits */
		FREE_NULL_BITMAP(resv_bitmap);
		resv_bitmap = bit_copy(avail_bitmap);
		bit_not(resv_bitmap);

		/* this is the time consuming operation */
		debug2("backfill: entering _try_sched for job %u.",
		       job_ptr->job_id);

		if (!already_counted) {
			slurmctld_diag_stats.bf_last_depth_try++;
			already_counted = true;
		}

		j = _try_sched(job_ptr, &avail_bitmap, min_nodes, max_nodes,
			       req_nodes, exc_core_bitmap);

		now = time(NULL);
		if (j != SLURM_SUCCESS) {
			job_ptr->time_limit = orig_time_limit;
			job_ptr->start_time = 0;
			continue;	/* not runable */
		}

		if (start_res > job_ptr->start_time) {
			job_ptr->start_time = start_res;
			last_job_update = now;
		}
		if (job_ptr->start_time <= now) {
			uint32_t save_time_limit = job_ptr->time_limit;
			int rc = _start_job(job_ptr, resv_bitmap);
			if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE)) {
				if (orig_time_limit == NO_VAL)
					job_ptr->time_limit = comp_time_limit;
				else
					job_ptr->time_limit = orig_time_limit;
				job_ptr->end_time = job_ptr->start_time +
						    (job_ptr->time_limit * 60);
			} else if ((rc == SLURM_SUCCESS) && job_ptr->time_min) {
				/* Set time limit as high as possible */
				job_ptr->time_limit = comp_time_limit;
				job_ptr->end_time = job_ptr->start_time +
						    (comp_time_limit * 60);
				_reset_job_time_limit(job_ptr, now,
						      node_space);
				time_limit = job_ptr->time_limit;
			} else {
				job_ptr->time_limit = orig_time_limit;
			}
			if (rc == ESLURM_ACCOUNTING_POLICY) {
				/* Unknown future start time, just skip job */
				job_ptr->start_time = 0;
				continue;
			} else if (rc != SLURM_SUCCESS) {
				/* Planned to start job, but something bad
				 * happended. */
				job_ptr->start_time = 0;
				break;
			} else {
				/* Started this job, move to next one */
				reject_array_job_id = 0;

				/* Update the database if job time limit
				 * changed and move to next job */
				if (save_time_limit != job_ptr->time_limit)
					jobacct_storage_g_job_start(acct_db_conn,
								    job_ptr);
				continue;
			}
		} else
			job_ptr->time_limit = orig_time_limit;

		if (later_start && (job_ptr->start_time > later_start)) {
			/* Try later when some nodes currently reserved for
			 * pending jobs are free */
			job_ptr->start_time = 0;
			goto TRY_LATER;
		}

		if (job_ptr->start_time > (sched_start + backfill_window)) {
			/* Starts too far in the future to worry about */
			continue;
		}

		if (node_space_recs >= max_backfill_job_cnt) {
			/* Already have too many jobs to deal with */
			break;
		}

		end_reserve = job_ptr->start_time + (time_limit * 60);
		if (_test_resv_overlap(node_space, avail_bitmap,
				       job_ptr->start_time, end_reserve)) {
			/* This job overlaps with an existing reservation for
			 * job to be backfill scheduled, which the sched
			 * plugin does not know about. Try again later. */
			later_start = job_ptr->start_time;
			job_ptr->start_time = 0;
			goto TRY_LATER;
		}

		/*
		 * Add reservation to scheduling table if appropriate
		 */
		if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE))
			continue;
		reject_array_job_id = 0;
		bit_not(avail_bitmap);
		_add_reservation(job_ptr->start_time, end_reserve,
				 avail_bitmap, node_space, &node_space_recs);
		if (debug_flags & DEBUG_FLAG_BACKFILL)
			_dump_node_space_table(node_space);
	}
	xfree(uid);
	xfree(njobs);
	FREE_NULL_BITMAP(avail_bitmap);
	FREE_NULL_BITMAP(exc_core_bitmap);
	FREE_NULL_BITMAP(resv_bitmap);

	for (i=0; ; ) {
		FREE_NULL_BITMAP(node_space[i].avail_bitmap);
		if ((i = node_space[i].next) == 0)
			break;
	}
	xfree(node_space);
	list_destroy(job_queue);
	gettimeofday(&bf_time2, NULL);
	_do_diag_stats(&bf_time1, &bf_time2, yield_sleep);
	if (debug_flags & DEBUG_FLAG_BACKFILL) {
		END_TIMER;
		info("backfill: completed testing %d jobs, %s",
		     job_test_count, TIME_STR);
	}
	return rc;
}
Ejemplo n.º 15
0
Archivo: gang.c Proyecto: corburn/slurm
/* Add the given job to the "active" structures of
 * the given partition and increment the run count */
static void _add_job_to_active(struct job_record *job_ptr,
			       struct gs_part *p_ptr)
{
	job_resources_t *job_res = job_ptr->job_resrcs;
	uint16_t job_gr_type;

	/* add job to active_resmap */
	job_gr_type = _get_part_gr_type(job_ptr->part_ptr);
	if ((job_gr_type == GS_CPU2) || (job_gr_type == GS_CORE) ||
	    (job_gr_type == GS_SOCKET)) {
		if (p_ptr->jobs_active == 0 && p_ptr->active_resmap) {
			uint32_t size = bit_size(p_ptr->active_resmap);
			bit_nclear(p_ptr->active_resmap, 0, size-1);
		}
		add_job_to_cores(job_res, &(p_ptr->active_resmap),
				 gs_bits_per_node);
		if (job_gr_type == GS_SOCKET)
			_fill_sockets(job_res->node_bitmap, p_ptr);
	} else { /* GS_NODE or GS_CPU */
		if (!p_ptr->active_resmap) {
			if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) {
				info("gang: _add_job_to_active: job %u first",
				     job_ptr->job_id);
			}
			p_ptr->active_resmap = bit_copy(job_res->node_bitmap);
		} else if (p_ptr->jobs_active == 0) {
			if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) {
				info("gang: _add_job_to_active: job %u copied",
				     job_ptr->job_id);
			}
			bit_copybits(p_ptr->active_resmap,
				     job_res->node_bitmap);
		} else {
			if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) {
				info("gang: _add_job_to_active: adding job %u",
				     job_ptr->job_id);
			}
			bit_or(p_ptr->active_resmap, job_res->node_bitmap);
		}
	}

	/* add job to the active_cpus array */
	if (job_gr_type == GS_CPU) {
		uint32_t i, a, sz = bit_size(p_ptr->active_resmap);
		if (!p_ptr->active_cpus) {
			/* create active_cpus array */
			p_ptr->active_cpus = xmalloc(sz * sizeof(uint16_t));
		}
		if (p_ptr->jobs_active == 0) {
			/* overwrite the existing values in active_cpus */
			for (a = 0, i = 0; i < sz; i++) {
				if (bit_test(job_res->node_bitmap, i)) {
					p_ptr->active_cpus[i] =
						job_res->cpus[a++];
				} else {
					p_ptr->active_cpus[i] = 0;
				}
			}
		} else {
			/* add job to existing jobs in the active cpus */
			for (a = 0, i = 0; i < sz; i++) {
				if (bit_test(job_res->node_bitmap, i)) {
					uint16_t limit = _get_phys_bit_cnt(i);
					p_ptr->active_cpus[i] +=
						job_res->cpus[a++];
					/* when adding shadows, the resources
					 * may get overcommitted */
					if (p_ptr->active_cpus[i] > limit)
						p_ptr->active_cpus[i] = limit;
				}
			}
		}
	}
	p_ptr->jobs_active += 1;
}
Ejemplo n.º 16
0
/* Create a reservation for a job in the future */
static void _add_reservation(uint32_t start_time, uint32_t end_reserve,
			     bitstr_t *res_bitmap,
			     node_space_map_t *node_space,
			     int *node_space_recs)
{
	bool placed = false;
	int i, j;

	/* If we decrease the resolution of our timing information, this can
	 * decrease the number of records managed and increase performance */
	start_time = (start_time / backfill_resolution) * backfill_resolution;
	end_reserve = (end_reserve / backfill_resolution) * backfill_resolution;

	for (j=0; ; ) {
		if (node_space[j].end_time > start_time) {
			/* insert start entry record */
			i = *node_space_recs;
			node_space[i].begin_time = start_time;
			node_space[i].end_time = node_space[j].end_time;
			node_space[j].end_time = start_time;
			node_space[i].avail_bitmap =
				bit_copy(node_space[j].avail_bitmap);
			node_space[i].next = node_space[j].next;
			node_space[j].next = i;
			(*node_space_recs)++;
			placed = true;
		}
		if (node_space[j].end_time == start_time) {
			/* no need to insert new start entry record */
			placed = true;
		}
		if (placed == true) {
			j = node_space[j].next;
			if (j && (end_reserve < node_space[j].end_time)) {
				/* insert end entry record */
				i = *node_space_recs;
				node_space[i].begin_time = end_reserve;
				node_space[i].end_time = node_space[j].
							 end_time;
				node_space[j].end_time = end_reserve;
				node_space[i].avail_bitmap =
					bit_copy(node_space[j].avail_bitmap);
				node_space[i].next = node_space[j].next;
				node_space[j].next = i;
				(*node_space_recs)++;
			}
			break;
		}
		if ((j = node_space[j].next) == 0)
			break;
	}

	for (j=0; ; ) {
		if ((node_space[j].begin_time >= start_time) &&
		    (node_space[j].end_time <= end_reserve))
			bit_and(node_space[j].avail_bitmap, res_bitmap);
		if ((node_space[j].begin_time >= end_reserve) ||
		    ((j = node_space[j].next) == 0))
			break;
	}
}
Ejemplo n.º 17
0
/* Attempt to schedule a specific job on specific available nodes
 * IN job_ptr - job to schedule
 * IN/OUT avail_bitmap - nodes available/selected to use
 * IN exc_core_bitmap - cores which can not be used
 * RET SLURM_SUCCESS on success, otherwise an error code
 */
static int  _try_sched(struct job_record *job_ptr, bitstr_t **avail_bitmap,
		       uint32_t min_nodes, uint32_t max_nodes,
		       uint32_t req_nodes, bitstr_t *exc_core_bitmap)
{
	bitstr_t *tmp_bitmap;
	int rc = SLURM_SUCCESS;
	int feat_cnt = _num_feature_count(job_ptr);
	List preemptee_candidates = NULL;

	if (feat_cnt) {
		/* Ideally schedule the job feature by feature,
		 * but I don't want to add that complexity here
		 * right now, so clear the feature counts and try
		 * to schedule. This will work if there is only
		 * one feature count. It should work fairly well
		 * in cases where there are multiple feature
		 * counts. */
		struct job_details *detail_ptr = job_ptr->details;
		ListIterator feat_iter;
		struct feature_record *feat_ptr;
		int i = 0, list_size;
		uint16_t *feat_cnt_orig = NULL, high_cnt = 0;

		/* Clear the feature counts */
		list_size = list_count(detail_ptr->feature_list);
		feat_cnt_orig = xmalloc(sizeof(uint16_t) * list_size);
		feat_iter = list_iterator_create(detail_ptr->feature_list);
		while ((feat_ptr =
			(struct feature_record *) list_next(feat_iter))) {
			high_cnt = MAX(high_cnt, feat_ptr->count);
			feat_cnt_orig[i++] = feat_ptr->count;
			feat_ptr->count = 0;
		}
		list_iterator_destroy(feat_iter);

		if ((job_req_node_filter(job_ptr, *avail_bitmap) !=
		     SLURM_SUCCESS) ||
		    (bit_set_count(*avail_bitmap) < high_cnt)) {
			rc = ESLURM_NODES_BUSY;
		} else {
			preemptee_candidates =
					slurm_find_preemptable_jobs(job_ptr);
			rc = select_g_job_test(job_ptr, *avail_bitmap,
					       high_cnt, max_nodes, req_nodes,
					       SELECT_MODE_WILL_RUN,
					       preemptee_candidates, NULL,
					       exc_core_bitmap);
		}

		/* Restore the feature counts */
		i = 0;
		feat_iter = list_iterator_create(detail_ptr->feature_list);
		while ((feat_ptr =
			(struct feature_record *) list_next(feat_iter))) {
			feat_ptr->count = feat_cnt_orig[i++];
		}
		list_iterator_destroy(feat_iter);
		xfree(feat_cnt_orig);
	} else {
		/* Try to schedule the job. First on dedicated nodes
		 * then on shared nodes (if so configured). */
		uint16_t orig_shared;
		time_t now = time(NULL);
		char str[100];

		preemptee_candidates = slurm_find_preemptable_jobs(job_ptr);
		orig_shared = job_ptr->details->shared;
		job_ptr->details->shared = 0;
		tmp_bitmap = bit_copy(*avail_bitmap);

		if (exc_core_bitmap) {
			bit_fmt(str, (sizeof(str) - 1), exc_core_bitmap);
			debug2(" _try_sched with exclude core bitmap: %s",str);
		}

		rc = select_g_job_test(job_ptr, *avail_bitmap, min_nodes,
				       max_nodes, req_nodes,
				       SELECT_MODE_WILL_RUN,
				       preemptee_candidates, NULL,
				       exc_core_bitmap);

		job_ptr->details->shared = orig_shared;

		if (((rc != SLURM_SUCCESS) || (job_ptr->start_time > now)) &&
		    (orig_shared != 0)) {
			FREE_NULL_BITMAP(*avail_bitmap);
			*avail_bitmap= tmp_bitmap;
			rc = select_g_job_test(job_ptr, *avail_bitmap,
					       min_nodes, max_nodes, req_nodes,
					       SELECT_MODE_WILL_RUN,
					       preemptee_candidates, NULL,
					       exc_core_bitmap);
		} else
			FREE_NULL_BITMAP(tmp_bitmap);
	}

	if (preemptee_candidates)
		list_destroy(preemptee_candidates);
	return rc;

}
Ejemplo n.º 18
0
/*
 * _build_part_bitmap - update the total_cpus, total_nodes, and node_bitmap
 *	for the specified partition, also reset the partition pointers in
 *	the node back to this partition.
 * IN part_ptr - pointer to the partition
 * RET 0 if no error, errno otherwise
 * global: node_record_table_ptr - pointer to global node table
 * NOTE: this does not report nodes defined in more than one partition. this
 *	is checked only upon reading the configuration file, not on an update
 */
static int _build_part_bitmap(struct part_record *part_ptr)
{
	char *this_node_name;
	bitstr_t *old_bitmap;
	struct node_record *node_ptr;	/* pointer to node_record */
	hostlist_t host_list;

	part_ptr->total_cpus = 0;
	part_ptr->total_nodes = 0;

	if (part_ptr->node_bitmap == NULL) {
		part_ptr->node_bitmap = bit_alloc(node_record_count);
		old_bitmap = NULL;
	} else {
		old_bitmap = bit_copy(part_ptr->node_bitmap);
		bit_nclear(part_ptr->node_bitmap, 0,
			   node_record_count - 1);
	}

	if (part_ptr->nodes == NULL) {	/* no nodes in partition */
		_unlink_free_nodes(old_bitmap, part_ptr);
		FREE_NULL_BITMAP(old_bitmap);
		return 0;
	}

	if ((host_list = hostlist_create(part_ptr->nodes)) == NULL) {
		FREE_NULL_BITMAP(old_bitmap);
		error("hostlist_create error on %s, %m",
		      part_ptr->nodes);
		return ESLURM_INVALID_NODE_NAME;
	}

	while ((this_node_name = hostlist_shift(host_list))) {
		node_ptr = find_node_record(this_node_name);
		if (node_ptr == NULL) {
			error("_build_part_bitmap: invalid node name %s",
				this_node_name);
			free(this_node_name);
			FREE_NULL_BITMAP(old_bitmap);
			hostlist_destroy(host_list);
			return ESLURM_INVALID_NODE_NAME;
		}
		part_ptr->total_nodes++;
		if (slurmctld_conf.fast_schedule)
			part_ptr->total_cpus += node_ptr->config_ptr->cpus;
		else
			part_ptr->total_cpus += node_ptr->cpus;
		node_ptr->part_cnt++;
		xrealloc(node_ptr->part_pptr, (node_ptr->part_cnt *
			sizeof(struct part_record *)));
		node_ptr->part_pptr[node_ptr->part_cnt-1] = part_ptr;
		if (old_bitmap)
			bit_clear(old_bitmap,
				  (int) (node_ptr -
					 node_record_table_ptr));
		bit_set(part_ptr->node_bitmap,
			(int) (node_ptr - node_record_table_ptr));
		free(this_node_name);
	}
	hostlist_destroy(host_list);

	_unlink_free_nodes(old_bitmap, part_ptr);
	last_node_update = time(NULL);
	FREE_NULL_BITMAP(old_bitmap);
	return 0;
}
Ejemplo n.º 19
0
/*
 * Attempt to start a job
 * jobid     (IN) - job id
 * task_cnt  (IN) - total count of tasks to start
 * hostlist  (IN) - SLURM hostlist expression with no repeated hostnames
 * tasklist  (IN/OUT) - comma separated list of hosts with tasks to be started,
 *                  list hostname once per task to start
 * comment_ptr (IN) - new comment field for the job or NULL for no change
 * err_code (OUT) - Moab error code
 * err_msg  (OUT) - Moab error message
 */
static int	_start_job(uint32_t jobid, int task_cnt, char *hostlist,
			char *tasklist, char *comment_ptr,
			int *err_code, char **err_msg)
{
	int rc = 0, old_task_cnt = 1;
	struct job_record *job_ptr;
	/* Write lock on job info, read lock on node info */
	slurmctld_lock_t job_write_lock = {
		NO_LOCK, WRITE_LOCK, READ_LOCK, NO_LOCK };
	char *new_node_list = NULL;
	static char tmp_msg[128];
	bitstr_t *new_bitmap = (bitstr_t *) NULL;
	bitstr_t *save_req_bitmap = (bitstr_t *) NULL;
	bitoff_t i, bsize;
	int ll; /* layout info index */
	char *node_name, *node_idx, *node_cur, *save_req_nodes = NULL;
	size_t node_name_len;
	static uint32_t cr_test = 0, cr_enabled = 0;

	if (cr_test == 0) {
		select_g_get_info_from_plugin(SELECT_CR_PLUGIN, NULL,
						&cr_enabled);
		cr_test = 1;
	}

	lock_slurmctld(job_write_lock);
	job_ptr = find_job_record(jobid);
	if (job_ptr == NULL) {
		*err_code = -700;
		*err_msg = "No such job";
		error("wiki: Failed to find job %u", jobid);
		rc = -1;
		goto fini;
	}

	if ((job_ptr->details == NULL) || (!IS_JOB_PENDING(job_ptr))) {
		*err_code = -700;
		*err_msg = "Job not pending, can't start";
		error("wiki: Attempt to start job %u in state %s",
			jobid, job_state_string(job_ptr->job_state));
		rc = -1;
		goto fini;
	}

	if (comment_ptr) {
		char *reserved = strstr(comment_ptr, "RESERVED:");
		if (reserved) {
			reserved += 9;
			job_ptr->details->reserved_resources =
				strtol(reserved, NULL, 10);
		}
		xfree(job_ptr->comment);
		job_ptr->comment = xstrdup(comment_ptr);
	}

	if (task_cnt) {
		new_node_list = xstrdup(hostlist);
		if (node_name2bitmap(new_node_list, false, &new_bitmap) != 0) {
			*err_code = -700;
			*err_msg = "Invalid TASKLIST";
			error("wiki: Attempt to set invalid node list for "
				"job %u, %s",
				jobid, hostlist);
			xfree(new_node_list);
			rc = -1;
			goto fini;
		}

		if (!bit_super_set(new_bitmap, avail_node_bitmap)) {
			/* Selected node is UP and not responding
			 * or it just went DOWN */
			*err_code = -700;
			*err_msg = "TASKLIST includes non-responsive node";
			error("wiki: Attempt to use non-responsive nodes for "
				"job %u, %s",
				jobid, hostlist);
			xfree(new_node_list);
			FREE_NULL_BITMAP(new_bitmap);
			rc = -1;
			goto fini;
		}

		/* User excluded node list incompatible with Wiki
		 * Exclude all nodes not explicitly requested */
		FREE_NULL_BITMAP(job_ptr->details->exc_node_bitmap);
		job_ptr->details->exc_node_bitmap = bit_copy(new_bitmap);
		bit_not(job_ptr->details->exc_node_bitmap);
	}

	/* Build layout information from tasklist (assuming that Moab
	 * sends a non-bracketed list of nodes, repeated as many times
	 * as cpus should be used per node); at this point, node names
	 * are comma-separated. This is _not_ a fast algorithm as it
	 * performs many string compares. */
	xfree(job_ptr->details->req_node_layout);
	if (task_cnt && cr_enabled) {
		uint16_t cpus_per_task = MAX(1, job_ptr->details->cpus_per_task);
		job_ptr->details->req_node_layout = (uint16_t *)
			xmalloc(bit_set_count(new_bitmap) * sizeof(uint16_t));
		bsize = bit_size(new_bitmap);
		for (i = 0, ll = -1; i < bsize; i++) {
			if (!bit_test(new_bitmap, i))
				continue;
			ll++;
			node_name = node_record_table_ptr[i].name;
			node_name_len  = strlen(node_name);
			if (node_name_len == 0)
				continue;
			node_cur = tasklist;
			while (*node_cur) {
				if ((node_idx = strstr(node_cur, node_name))) {
					if ((node_idx[node_name_len] == ',') ||
				 	    (node_idx[node_name_len] == '\0')) {
						job_ptr->details->
							req_node_layout[ll] +=
							cpus_per_task;
					}
					node_cur = strchr(node_idx, ',');
					if (node_cur)
						continue;
				}
				break;
			}
		}
	}

	/* save and update job state to start now */
	save_req_nodes = job_ptr->details->req_nodes;
	job_ptr->details->req_nodes = new_node_list;
	save_req_bitmap = job_ptr->details->req_node_bitmap;
	job_ptr->details->req_node_bitmap = new_bitmap;
	old_task_cnt = job_ptr->details->min_cpus;
	job_ptr->details->min_cpus = MAX(task_cnt, old_task_cnt);
	job_ptr->priority = 100000000;

 fini:	unlock_slurmctld(job_write_lock);
	if (rc)
		return rc;

	/* No errors so far */
	(void) schedule(INFINITE);	/* provides own locking */

	/* Check to insure the job was actually started */
	lock_slurmctld(job_write_lock);
	if (job_ptr->job_id != jobid)
		job_ptr = find_job_record(jobid);

	if (job_ptr && (job_ptr->job_id == jobid) &&
	    (!IS_JOB_RUNNING(job_ptr))) {
		uint16_t wait_reason = 0;
		char *wait_string;

		if (IS_JOB_FAILED(job_ptr))
			wait_string = "Invalid request, job aborted";
		else {
			wait_reason = job_ptr->state_reason;
			if (wait_reason == WAIT_HELD) {
				/* some job is completing, slurmctld did
				 * not even try to schedule this job */
				wait_reason = WAIT_RESOURCES;
			}
			wait_string = job_reason_string(wait_reason);
			job_ptr->state_reason = WAIT_HELD;
			xfree(job_ptr->state_desc);
		}
		*err_code = -910 - wait_reason;
		snprintf(tmp_msg, sizeof(tmp_msg),
			"Could not start job %u(%s): %s",
			jobid, new_node_list, wait_string);
		*err_msg = tmp_msg;
		error("wiki: %s", tmp_msg);

		/* restore some of job state */
		job_ptr->priority = 0;
		job_ptr->details->min_cpus = old_task_cnt;
		rc = -1;
	}

	if (job_ptr && (job_ptr->job_id == jobid) && job_ptr->details) {
		/* Restore required node list in case job requeued */
		xfree(job_ptr->details->req_nodes);
		job_ptr->details->req_nodes = save_req_nodes;
		FREE_NULL_BITMAP(job_ptr->details->req_node_bitmap);
		job_ptr->details->req_node_bitmap = save_req_bitmap;
		FREE_NULL_BITMAP(job_ptr->details->exc_node_bitmap);
		xfree(job_ptr->details->req_node_layout);
	} else {
		error("wiki: start_job(%u) job missing", jobid);
		xfree(save_req_nodes);
		FREE_NULL_BITMAP(save_req_bitmap);
	}

	unlock_slurmctld(job_write_lock);
	schedule_node_save();	/* provides own locking */
	schedule_job_save();	/* provides own locking */
	return rc;
}
Ejemplo n.º 20
0
extern int select_nodeinfo_get(select_nodeinfo_t *nodeinfo,
			       enum select_nodedata_type dinfo,
			       enum node_states state,
			       void *data)
{
	int rc = SLURM_SUCCESS;
	uint16_t *uint16 = (uint16_t *) data;
	uint32_t *uint32 = (uint32_t *) data;
	bitstr_t **bitmap = (bitstr_t **) data;
	char **tmp_char = (char **) data;
	ListIterator itr = NULL;
	node_subgrp_t *subgrp = NULL;

	if (nodeinfo == NULL) {
		error("get_nodeinfo: nodeinfo not set");
		return SLURM_ERROR;
	}

	if (nodeinfo->magic != NODEINFO_MAGIC) {
		error("get_nodeinfo: nodeinfo magic bad");
		return SLURM_ERROR;
	}

	switch (dinfo) {
	case SELECT_NODEDATA_BITMAP_SIZE:
		*uint16 = nodeinfo->bitmap_size;
		break;
	case SELECT_NODEDATA_SUBGRP_SIZE:
		*uint16 = 0;
		if (!nodeinfo->subgrp_list)
			return SLURM_ERROR;
		*uint16 = list_count(nodeinfo->subgrp_list);
		break;
	case SELECT_NODEDATA_SUBCNT:
		*uint16 = 0;
		if (!nodeinfo->subgrp_list)
			return SLURM_ERROR;
		itr = list_iterator_create(nodeinfo->subgrp_list);
		while ((subgrp = list_next(itr))) {
			if (subgrp->state == state) {
				*uint16 = subgrp->cnode_cnt;
				break;
			}
		}
		list_iterator_destroy(itr);
		break;
	case SELECT_NODEDATA_BITMAP:
		*bitmap = NULL;
		if (!nodeinfo->subgrp_list)
			return SLURM_ERROR;
		itr = list_iterator_create(nodeinfo->subgrp_list);
		while ((subgrp = list_next(itr))) {
			if (subgrp->state == state) {
				*bitmap = bit_copy(subgrp->bitmap);
				break;
			}
		}
		list_iterator_destroy(itr);
		break;
	case SELECT_NODEDATA_RACK_MP:
		if (nodeinfo->ba_mp)
			*tmp_char = xstrdup(nodeinfo->ba_mp->loc);
		else if (nodeinfo->rack_mp)
			*tmp_char = xstrdup(nodeinfo->rack_mp);
		break;
	case SELECT_NODEDATA_STR:
		*tmp_char = NULL;
		if (!nodeinfo->subgrp_list)
			return SLURM_ERROR;
		itr = list_iterator_create(nodeinfo->subgrp_list);
		while ((subgrp = list_next(itr))) {
			if (subgrp->state == state) {
				*tmp_char = xstrdup(subgrp->str);
				break;
			}
		}
		list_iterator_destroy(itr);
		break;
	case SELECT_NODEDATA_EXTRA_INFO:
		if (nodeinfo->extra_info)
			*tmp_char = xstrdup(nodeinfo->extra_info);
		if (nodeinfo->failed_cnodes)
			xstrfmtcat(*tmp_char, "Failed cnodes=%s",
				   nodeinfo->failed_cnodes);
		break;
	case SELECT_NODEDATA_MEM_ALLOC:
		*uint32 = 0;
		break;
	default:
		error("Unsupported option %d for get_nodeinfo.", dinfo);
		rc = SLURM_ERROR;
		break;
	}
	return rc;
}
Ejemplo n.º 21
0
extern bg_record_t *create_small_record(bg_record_t *bg_record,
					bitstr_t *ionodes, int size)
{
	bg_record_t *found_record = NULL;
	ba_mp_t *new_ba_mp = NULL;
	ba_mp_t *ba_mp = NULL;

	found_record = (bg_record_t*) xmalloc(sizeof(bg_record_t));
	found_record->magic = BLOCK_MAGIC;

	/* This will be a list containing jobs running on this
	   block */
	if (bg_conf->sub_blocks)
		found_record->job_list = list_create(NULL);
	found_record->job_running = NO_JOB_RUNNING;

#ifdef HAVE_BGL
	found_record->node_use = SELECT_COPROCESSOR_MODE;
	found_record->blrtsimage = xstrdup(bg_record->blrtsimage);
#endif
#ifdef HAVE_BG_L_P
	found_record->linuximage = xstrdup(bg_record->linuximage);
	found_record->ramdiskimage = xstrdup(bg_record->ramdiskimage);
#endif
	found_record->mloaderimage = xstrdup(bg_record->mloaderimage);

	if (bg_record->conn_type[0] >= SELECT_SMALL)
		found_record->conn_type[0] = bg_record->conn_type[0];
	else
		found_record->conn_type[0] = SELECT_SMALL;

	xassert(bg_conf->cpu_ratio);
	found_record->cpu_cnt = bg_conf->cpu_ratio * size;
	found_record->cnode_cnt = size;

	found_record->ionode_bitmap = bit_copy(ionodes);
	ba_set_ionode_str(found_record);

	found_record->ba_mp_list = list_create(destroy_ba_mp);

	slurm_mutex_lock(&ba_system_mutex);
	if (bg_record->ba_mp_list)
		ba_mp = list_peek(bg_record->ba_mp_list);
	if (!ba_mp) {
		if (bg_record->mp_str) {
			int j = 0, dim;
			char *nodes = bg_record->mp_str;
			uint16_t coords[SYSTEM_DIMENSIONS];
			while (nodes[j] != '\0') {
				if ((nodes[j] >= '0' && nodes[j] <= '9')
				    || (nodes[j] >= 'A' && nodes[j] <= 'Z')) {
					break;
				}
				j++;
			}
			if (nodes[j] && ((strlen(nodes)
					  - (j + SYSTEM_DIMENSIONS)) >= 0)) {
				for (dim = 0; dim < SYSTEM_DIMENSIONS;
				     dim++, j++)
					coords[dim] = select_char2coord(
						nodes[j]);
				ba_mp = coord2ba_mp(coords);
			}
			error("you gave me a list with no ba_mps using %s",
			      ba_mp->coord_str);
		} else {
			ba_mp = coord2ba_mp(found_record->start);
			error("you gave me a record with no ba_mps "
			      "and no nodes either using %s",
			      ba_mp->coord_str);
		}
	}

	xassert(ba_mp);

	new_ba_mp = ba_copy_mp(ba_mp);
	slurm_mutex_unlock(&ba_system_mutex);
	/* We need to have this node wrapped in Q to handle
	   wires correctly when creating around the midplane.
	*/
	ba_setup_mp(new_ba_mp, false, true);

	new_ba_mp->used = BA_MP_USED_TRUE;

	/* Create these now so we can deal with error cnodes if/when
	   they happen.  Since this is the easiest place to figure it
	   out for blocks that don't use the entire block */
	if ((new_ba_mp->cnode_bitmap =
	     ba_create_ba_mp_cnode_bitmap(found_record))) {
		new_ba_mp->cnode_err_bitmap = bit_alloc(bg_conf->mp_cnode_cnt);
		new_ba_mp->cnode_usable_bitmap =
			bit_copy(new_ba_mp->cnode_bitmap);
	}

	list_append(found_record->ba_mp_list, new_ba_mp);
	found_record->mp_count = 1;
	found_record->mp_str = xstrdup_printf(
		"%s%s",
		bg_conf->slurm_node_prefix, new_ba_mp->coord_str);

	process_nodes(found_record, false);

	/* Force small blocks to always be non-full system blocks.
	 * This really only plays a part on sub-midplane systems. */
	found_record->full_block = 0;

	if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
		info("made small block of %s[%s]",
		     found_record->mp_str, found_record->ionode_str);

	return found_record;
}
Ejemplo n.º 22
0
/*
 * route_p_split_hostlist - logic to split an input hostlist into
 *                           a set of hostlists to forward to.
 *
 * IN: hl        - hostlist_t   - list of every node to send message to
 *                                will be empty on return;
 * OUT: sp_hl    - hostlist_t** - the array of hostlists that will be malloced
 * OUT: count    - int*         - the count of created hostlists
 * RET: SLURM_SUCCESS - int
 *
 * Note: created hostlist will have to be freed independently using
 *       hostlist_destroy by the caller.
 * Note: the hostlist_t array will have to be xfree.
 */
extern int route_p_split_hostlist(hostlist_t hl,
				  hostlist_t** sp_hl,
				  int* count)
{
	int i, j, k, hl_ndx, msg_count, sw_count, lst_count;
	char  *buf;
	bitstr_t *nodes_bitmap = NULL;		/* nodes in message list */
	bitstr_t *fwd_bitmap = NULL;		/* nodes in forward list */

	msg_count = hostlist_count(hl);
	if (switch_record_cnt == 0) {
		/* configs have not already been processed */
		slurm_conf_init(NULL);
		if (init_node_conf()) {
			fatal("ROUTE: Failed to init slurm config");
		}
		if (build_all_nodeline_info(false)) {
			fatal("ROUTE: Failed to build node config");
		}
		rehash_node();

		if (slurm_topo_build_config() != SLURM_SUCCESS) {
			fatal("ROUTE: Failed to build topology config");
		}
	}
	*sp_hl = (hostlist_t*) xmalloc(switch_record_cnt * sizeof(hostlist_t));
	/* create bitmap of nodes to send message too */
	if (hostlist2bitmap (hl, false, &nodes_bitmap) != SLURM_SUCCESS) {
		buf = hostlist_ranged_string_xmalloc(hl);
		fatal("ROUTE: Failed to make bitmap from hostlist=%s.", buf);
	}

	/* Find lowest level switch containing all the nodes in the list */
	j = 0;
	for (i = 0; i <= switch_levels; i++) {
		for (j=0; j<switch_record_cnt; j++) {
			if (switch_record_table[j].level == i) {
				if (bit_super_set(nodes_bitmap,
						  switch_record_table[j].
						  node_bitmap)) {
					/* All nodes in message list are in
					 * this switch */
					break;
				}
			}
		}
		if (j < switch_record_cnt) {
			/* Got here via break after bit_super_set */
			break; // 'j' is our switch
		} /* else, no switches at this level reach all nodes */
	}
	if (i > switch_levels) {
		/* This can only happen if trying to schedule multiple physical
		 * clusters as a single logical cluster under the control of a
		 * single slurmctld daemon, and sending something like a
		 * node_registation request to all nodes.
		 * Revert to default behavior*/
		if (debug_flags & DEBUG_FLAG_ROUTE) {
			buf = hostlist_ranged_string_xmalloc(hl);
			debug("ROUTE: didn't find switch containing nodes=%s",
			      buf);
			xfree(buf);
		}
		FREE_NULL_BITMAP(nodes_bitmap);
		xfree(*sp_hl);
		return route_split_hostlist_treewidth(hl, sp_hl, count);
	}
	if (switch_record_table[j].level == 0) {
		/* This is a leaf switch. Construct list based on TreeWidth */
		FREE_NULL_BITMAP(nodes_bitmap);
		xfree(*sp_hl);
		return route_split_hostlist_treewidth(hl, sp_hl, count);
	}
	/* loop through children, construction a hostlist for each child switch
	 * with nodes in the message list */
	hl_ndx = 0;
	lst_count = 0;
	for (i=0; i < switch_record_table[j].num_switches; i++) {
		k = switch_record_table[j].switch_index[i];
		fwd_bitmap = bit_copy(switch_record_table[k].node_bitmap);
		bit_and(fwd_bitmap, nodes_bitmap);
		sw_count = bit_set_count(fwd_bitmap);
		if (sw_count == 0) {
			continue; /* no nodes on this switch in message list */
		}
		(*sp_hl)[hl_ndx] = bitmap2hostlist(fwd_bitmap);
		/* Now remove nodes from this switch from message list */
		bit_not(fwd_bitmap);
		bit_and(nodes_bitmap, fwd_bitmap);
		FREE_NULL_BITMAP(fwd_bitmap);
		if (debug_flags & DEBUG_FLAG_ROUTE) {
			buf = hostlist_ranged_string_xmalloc((*sp_hl)[hl_ndx]);
			debug("ROUTE: ... sublist[%d] switch=%s :: %s",
			      i, switch_record_table[i].name, buf);
			xfree(buf);
		}
		hl_ndx++;
		lst_count += sw_count;
		if (lst_count == msg_count)
			break; /* all nodes in message are in a child list */
	}
	FREE_NULL_BITMAP(nodes_bitmap);

	*count = hl_ndx;
	return SLURM_SUCCESS;

}
Ejemplo n.º 23
0
int
main(int argc, char *argv[])
{
	note("Testing static decl");
	{
		bitstr_t bit_decl(bs, 65);
		/*bitstr_t *bsp = bs;*/

		bit_set(bs,9);
		bit_set(bs,14);
		TEST(bit_test(bs,9), "bit 9 set"); 
		TEST(!bit_test(bs,12), "bit 12 not set");
		TEST(bit_test(bs,14), "bit 14 set" );
		/*bit_free(bsp);*/	/* triggers TEST in bit_free - OK */
	}
	note("Testing basic vixie functions");
	{
		bitstr_t *bs = bit_alloc(16), *bs2;


		/*bit_set(bs, 42);*/ 	/* triggers TEST in bit_set - OK */
		bit_set(bs,9);
		bit_set(bs,14);
		TEST(bit_test(bs,9), "bit 9 set"); 
		TEST(!bit_test(bs,12), "bit 12 not set" );
		TEST(bit_test(bs,14), "bit 14 set");

		bs2 = bit_copy(bs);
		bit_fill_gaps(bs2);
		TEST(bit_ffs(bs2) == 9, "first bit set = 9 ");
		TEST(bit_fls(bs2) == 14, "last bit set = 14");
		TEST(bit_set_count(bs2) == 6, "bitstring");
		TEST(bit_test(bs2,12), "bitstring");
		TEST(bit_super_set(bs,bs2) == 1, "bitstring");
		TEST(bit_super_set(bs2,bs) == 0, "bitstring");

		bit_clear(bs,14);
		TEST(!bit_test(bs,14), "bitstring");

		bit_nclear(bs,9,14);
		TEST(!bit_test(bs,9), "bitstring");
		TEST(!bit_test(bs,12), "bitstring");
		TEST(!bit_test(bs,14), "bitstring");

		bit_nset(bs,9,14);
		TEST(bit_test(bs,9), "bitstring");
		TEST(bit_test(bs,12), "bitstring");
		TEST(bit_test(bs,14), "bitstring");

		TEST(bit_ffs(bs) == 9, "ffs");
		TEST(bit_ffc(bs) == 0, "ffc");
		bit_nset(bs,0,8);
		TEST(bit_ffc(bs) == 15, "ffc");

		bit_free(bs);
		/*bit_set(bs,9); */	/* triggers TEST in bit_set - OK */
	}
	note("Testing and/or/not");
	{
		bitstr_t *bs1 = bit_alloc(128);
		bitstr_t *bs2 = bit_alloc(128);

		bit_set(bs1, 100);
		bit_set(bs1, 104);
		bit_set(bs2, 100);

		bit_and(bs1, bs2);
		TEST(bit_test(bs1, 100), "and");
		TEST(!bit_test(bs1, 104), "and");

		bit_set(bs2, 110);
		bit_set(bs2, 111);
		bit_set(bs2, 112);
		bit_or(bs1, bs2);
		TEST(bit_test(bs1, 100), "or");
		TEST(bit_test(bs1, 110), "or");
		TEST(bit_test(bs1, 111), "or");
		TEST(bit_test(bs1, 112), "or");

		bit_not(bs1);
		TEST(!bit_test(bs1, 100), "not");
		TEST(bit_test(bs1, 12), "not");

		bit_free(bs1);
		bit_free(bs2);
	}

	note("testing bit selection");
	{
		bitstr_t *bs1 = bit_alloc(128), *bs2;
		bit_set(bs1, 21);
		bit_set(bs1, 100);
		bit_fill_gaps(bs1);
		bs2 = bit_pick_cnt(bs1,20);

		if (bs2) {
			TEST(bit_set_count(bs2) == 20, "pick");
			TEST(bit_ffs(bs2) == 21, "pick");
			TEST(bit_fls(bs2) == 40, "pick");
			bit_free(bs2);
		}
		else
			TEST(0, "alloc fail");

		bit_free(bs1);
	}
	note("Testing realloc");
	{
		bitstr_t *bs = bit_alloc(1);

		TEST(bit_ffs(bs) == -1, "bitstring");
		bit_set(bs,0);
		/*bit_set(bs, 1000);*/	/* triggers TEST in bit_set - OK */
		bs = bit_realloc(bs,1048576);
		bit_set(bs,1000);
		bit_set(bs,1048575);
		TEST(bit_test(bs, 0), "bitstring");
		TEST(bit_test(bs, 1000), "bitstring");
		TEST(bit_test(bs, 1048575), "bitstring");
		TEST(bit_set_count(bs) == 3, "bitstring");
		bit_clear(bs,0);
		bit_clear(bs,1000);
		TEST(bit_set_count(bs) == 1, "bitstring");
		TEST(bit_ffs(bs) == 1048575, "bitstring");
		bit_free(bs);
	}
	note("Testing bit_fmt");
	{
		char tmpstr[1024];
		bitstr_t *bs = bit_alloc(1024);

		TEST(!strcmp(bit_fmt(tmpstr,sizeof(tmpstr),bs), ""), "bitstring");
		bit_set(bs,42);
		TEST(!strcmp(bit_fmt(tmpstr,sizeof(tmpstr),bs), "42"), "bitstring");
		bit_set(bs,102);
		TEST(!strcmp(bit_fmt(tmpstr,sizeof(tmpstr),bs), "42,102"), "bitstring");
		bit_nset(bs,9,14);
		TEST(!strcmp(bit_fmt(tmpstr,sizeof(tmpstr), bs), 
					"9-14,42,102"), "bitstring");
	}

	note("Testing bit_nffc/bit_nffs");
	{
		bitstr_t *bs = bit_alloc(1024);

		bit_set(bs, 2);
		bit_set(bs, 6);
		bit_set(bs, 7);
		bit_nset(bs,12,1018); 

		TEST(bit_nffc(bs, 2) == 0, "bitstring");
		TEST(bit_nffc(bs, 3) == 3, "bitstring");
		TEST(bit_nffc(bs, 4) == 8, "bitstring");
		TEST(bit_nffc(bs, 5) == 1019, "bitstring");
		TEST(bit_nffc(bs, 6) == -1, "bitstring");

		TEST(bit_nffs(bs, 1) == 2, "bitstring");
		TEST(bit_nffs(bs, 2) == 6, "bitstring");
		TEST(bit_nffs(bs, 100) == 12, "bitstring");
		TEST(bit_nffs(bs, 1023) == -1, "bitstring");

		bit_free(bs);
	}

	note("Testing bit_unfmt");
	{
		bitstr_t *bs = bit_alloc(1024);
		bitstr_t *bs2 = bit_alloc(1024);
		char tmpstr[4096];

		bit_set(bs,1);
		bit_set(bs,3);
		bit_set(bs,30);
		bit_nset(bs,42,64);
		bit_nset(bs,97,1000);

		bit_fmt(tmpstr, sizeof(tmpstr), bs);
		TEST(bit_unfmt(bs2, tmpstr) != -1, "bitstring");
		TEST(bit_equal(bs, bs2), "bitstring");
	}

	totals();
	return failed;
}
Ejemplo n.º 24
0
extern bg_record_t *create_small_record(bg_record_t *bg_record,
					bitstr_t *ionodes, int size)
{
	bg_record_t *found_record = NULL;
	ba_mp_t *new_ba_mp = NULL;
	ba_mp_t *ba_mp = NULL;
	char bitstring[BITSIZE];

	found_record = (bg_record_t*) xmalloc(sizeof(bg_record_t));
	found_record->magic = BLOCK_MAGIC;

	found_record->job_running = NO_JOB_RUNNING;
	found_record->user_name = xstrdup(bg_record->user_name);
	found_record->user_uid = bg_record->user_uid;
	found_record->ba_mp_list = list_create(destroy_ba_mp);
	if (bg_record->ba_mp_list)
		ba_mp = list_peek(bg_record->ba_mp_list);
	if (!ba_mp) {
		if (bg_record->mp_str) {
			hostlist_t hl = hostlist_create(bg_record->mp_str);
			char *host = hostlist_shift(hl);
			hostlist_destroy(hl);
			found_record->mp_str = xstrdup(host);
			free(host);
			error("you gave me a list with no ba_mps using %s",
			      found_record->mp_str);
		} else {
			char tmp_char[SYSTEM_DIMENSIONS+1];
			int dim;
			for (dim=0; dim<SYSTEM_DIMENSIONS; dim++)
				tmp_char[dim] =
					alpha_num[found_record->start[dim]];
			tmp_char[dim] = '\0';
			found_record->mp_str = xstrdup_printf(
				"%s%s",
				bg_conf->slurm_node_prefix,
				tmp_char);
			error("you gave me a record with no ba_mps "
			      "and no nodes either using %s",
			      found_record->mp_str);
		}
	} else {
		new_ba_mp = ba_copy_mp(ba_mp);
		/* We need to have this node wrapped in Q to handle
		   wires correctly when creating around the midplane.
		*/
		ba_setup_mp(new_ba_mp, false, true);

		new_ba_mp->used = BA_MP_USED_TRUE;
		list_append(found_record->ba_mp_list, new_ba_mp);
		found_record->mp_count = 1;
		found_record->mp_str = xstrdup_printf(
			"%s%s",
			bg_conf->slurm_node_prefix, new_ba_mp->coord_str);
	}

#ifdef HAVE_BGL
	found_record->node_use = SELECT_COPROCESSOR_MODE;
	found_record->blrtsimage = xstrdup(bg_record->blrtsimage);
#endif
#ifdef HAVE_BG_L_P
	found_record->linuximage = xstrdup(bg_record->linuximage);
	found_record->ramdiskimage = xstrdup(bg_record->ramdiskimage);
#endif
	found_record->mloaderimage = xstrdup(bg_record->mloaderimage);

	process_nodes(found_record, false);

	found_record->conn_type[0] = SELECT_SMALL;

	xassert(bg_conf->cpu_ratio);
	found_record->cpu_cnt = bg_conf->cpu_ratio * size;
	found_record->cnode_cnt = size;

	found_record->ionode_bitmap = bit_copy(ionodes);
	bit_fmt(bitstring, BITSIZE, found_record->ionode_bitmap);
	found_record->ionode_str = xstrdup(bitstring);
	if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
		info("made small block of %s[%s]",
		     found_record->mp_str, found_record->ionode_str);
	return found_record;
}
Ejemplo n.º 25
0
/* Initialize power_save module parameters.
 * Return 0 on valid configuration to run power saving,
 * otherwise log the problem and return -1 */
static int _init_power_config(void)
{
	slurm_ctl_conf_t *conf = slurm_conf_lock();

	last_config     = slurmctld_conf.last_update;
	idle_time       = conf->suspend_time - 1;
	suspend_rate    = conf->suspend_rate;
	resume_timeout  = conf->resume_timeout;
	resume_rate     = conf->resume_rate;
	slurmd_timeout  = conf->slurmd_timeout;
	suspend_timeout = conf->suspend_timeout;
	_clear_power_config();
	if (conf->suspend_program)
		suspend_prog = xstrdup(conf->suspend_program);
	if (conf->resume_program)
		resume_prog = xstrdup(conf->resume_program);
	if (conf->suspend_exc_nodes)
		exc_nodes = xstrdup(conf->suspend_exc_nodes);
	if (conf->suspend_exc_parts)
		exc_parts = xstrdup(conf->suspend_exc_parts);
	slurm_conf_unlock();

	if (idle_time < 0) {	/* not an error */
		debug("power_save module disabled, SuspendTime < 0");
		return -1;
	}
	if (suspend_rate < 0) {
		error("power_save module disabled, SuspendRate < 0");
		return -1;
	}
	if (resume_rate < 0) {
		error("power_save module disabled, ResumeRate < 0");
		return -1;
	}
	if (suspend_prog == NULL) {
		error("power_save module disabled, NULL SuspendProgram");
		return -1;
	} else if (!_valid_prog(suspend_prog)) {
		error("power_save module disabled, invalid SuspendProgram %s",
		      suspend_prog);
		return -1;
	}
	if (resume_prog == NULL) {
		error("power_save module disabled, NULL ResumeProgram");
		return -1;
	} else if (!_valid_prog(resume_prog)) {
		error("power_save module disabled, invalid ResumeProgram %s",
		      resume_prog);
		return -1;
	}

	if (exc_nodes &&
	    (node_name2bitmap(exc_nodes, false, &exc_node_bitmap))) {
		error("power_save module disabled, "
		      "invalid SuspendExcNodes %s", exc_nodes);
		return -1;
	}

	if (exc_parts) {
		char *tmp = NULL, *one_part = NULL, *part_list = NULL;
		struct part_record *part_ptr = NULL;
		int rc = 0;

		part_list = xstrdup(exc_parts);
		one_part = strtok_r(part_list, ",", &tmp);
		while (one_part != NULL) {
			part_ptr = find_part_record(one_part);
			if (!part_ptr) {
				error("power_save module disabled, "
					"invalid SuspendExcPart %s",
					one_part);
				rc = -1;
				break;
			}
			if (exc_node_bitmap)
				bit_or(exc_node_bitmap, part_ptr->node_bitmap);
			else
				exc_node_bitmap = bit_copy(part_ptr->
							   node_bitmap);
			one_part = strtok_r(NULL, ",", &tmp);
		}
		xfree(part_list);
		if (rc)
			return rc;
	}

	if (exc_node_bitmap) {
		char *tmp = bitmap2node_name(exc_node_bitmap);
		debug("power_save module, excluded nodes %s", tmp);
		xfree(tmp);
	}

	return 0;
}
Ejemplo n.º 26
0
static char *	_will_run_test(uint32_t jobid, time_t start_time,
			       char *node_list, int *err_code, char **err_msg)
{
	struct job_record *job_ptr = NULL;
	struct part_record *part_ptr;
	bitstr_t *avail_bitmap = NULL, *resv_bitmap = NULL;
	bitstr_t *exc_core_bitmap = NULL;
	char *hostlist, *reply_msg = NULL;
	uint32_t min_nodes, max_nodes, req_nodes;
	int rc;
	time_t start_res, orig_start_time;
	List preemptee_candidates;
	bool resv_overlap = false;

	debug2("wiki2: will_run job_id=%u start_time=%u node_list=%s",
		jobid, (uint32_t)start_time, node_list);

	job_ptr = find_job_record(jobid);
	if (job_ptr == NULL) {
		*err_code = -700;
		*err_msg = "No such job";
		error("wiki: Failed to find job %u", jobid);
		return NULL;
	}
	if ((job_ptr->details == NULL) || (!IS_JOB_PENDING(job_ptr))) {
		*err_code = -700;
		*err_msg = "WillRun not applicable to non-pending job";
		error("wiki: WillRun on non-pending job %u", jobid);
		return NULL;
	}

	part_ptr = job_ptr->part_ptr;
	if (part_ptr == NULL) {
		*err_code = -700;
		*err_msg = "Job lacks a partition";
		error("wiki: Job %u lacks a partition", jobid);
		return NULL;
	}

	if ((node_list == NULL) || (node_list[0] == '\0')) {
		/* assume all nodes available to job for testing */
		avail_bitmap = bit_copy(avail_node_bitmap);
	} else if (node_name2bitmap(node_list, false, &avail_bitmap) != 0) {
		*err_code = -700;
		*err_msg = "Invalid available nodes value";
		error("wiki: Attempt to set invalid available node "
		      "list for job %u, %s", jobid, node_list);
		return NULL;
	}

	/* Enforce reservation: access control, time and nodes */
	start_res = start_time;
	rc = job_test_resv(job_ptr, &start_res, true, &resv_bitmap,
			   &exc_core_bitmap, &resv_overlap);
	if (rc != SLURM_SUCCESS) {
		*err_code = -730;
		*err_msg = "Job denied access to reservation";
		error("wiki: reservation access denied for job %u", jobid);
		FREE_NULL_BITMAP(avail_bitmap);
		FREE_NULL_BITMAP(exc_core_bitmap);
		return NULL;
	}
	bit_and(avail_bitmap, resv_bitmap);
	FREE_NULL_BITMAP(resv_bitmap);

	/* Only consider nodes that are not DOWN or DRAINED */
	bit_and(avail_bitmap, avail_node_bitmap);

	/* Consider only nodes in this job's partition */
	if (part_ptr->node_bitmap)
		bit_and(avail_bitmap, part_ptr->node_bitmap);
	else {
		*err_code = -730;
		*err_msg = "Job's partition has no nodes";
		error("wiki: no nodes in partition %s for job %u",
			part_ptr->name, jobid);
		FREE_NULL_BITMAP(avail_bitmap);
		FREE_NULL_BITMAP(exc_core_bitmap);
		return NULL;
	}

	if (job_req_node_filter(job_ptr, avail_bitmap) != SLURM_SUCCESS) {
		/* Job probably has invalid feature list */
		*err_code = -730;
		*err_msg = "Job's required features not available "
			   "on selected nodes";
		error("wiki: job %u not runnable on hosts=%s",
		      jobid, node_list);
		FREE_NULL_BITMAP(avail_bitmap);
		FREE_NULL_BITMAP(exc_core_bitmap);
		return NULL;
	}
	if (job_ptr->details->exc_node_bitmap) {
		bit_not(job_ptr->details->exc_node_bitmap);
		bit_and(avail_bitmap, job_ptr->details->exc_node_bitmap);
		bit_not(job_ptr->details->exc_node_bitmap);
	}
	if ((job_ptr->details->req_node_bitmap) &&
	    (!bit_super_set(job_ptr->details->req_node_bitmap,
			    avail_bitmap))) {
		*err_code = -730;
		*err_msg = "Job's required nodes not available";
		error("wiki: job %u not runnable on hosts=%s",
		      jobid, node_list);
		FREE_NULL_BITMAP(avail_bitmap);
		FREE_NULL_BITMAP(exc_core_bitmap);
		return NULL;
	}

	min_nodes = MAX(job_ptr->details->min_nodes, part_ptr->min_nodes);
	if (job_ptr->details->max_nodes == 0)
		max_nodes = part_ptr->max_nodes;
	else
		max_nodes = MIN(job_ptr->details->max_nodes,
				part_ptr->max_nodes);
	max_nodes = MIN(max_nodes, 500000); /* prevent overflows */
	if (job_ptr->details->max_nodes)
		req_nodes = max_nodes;
	else
		req_nodes = min_nodes;
	if (min_nodes > max_nodes) {
		/* job's min_nodes exceeds partitions max_nodes */
		*err_code = -730;
		*err_msg = "Job's min_nodes > max_nodes";
		error("wiki: job %u not runnable on hosts=%s",
		      jobid, node_list);
		FREE_NULL_BITMAP(avail_bitmap);
		FREE_NULL_BITMAP(exc_core_bitmap);
		return NULL;
	}

	preemptee_candidates = slurm_find_preemptable_jobs(job_ptr);

	orig_start_time = job_ptr->start_time;
	rc = select_g_job_test(job_ptr, avail_bitmap,
			       min_nodes, max_nodes, req_nodes,
			       SELECT_MODE_WILL_RUN,
			       preemptee_candidates, NULL, exc_core_bitmap);
	FREE_NULL_LIST(preemptee_candidates);

	if (rc == SLURM_SUCCESS) {
		char tmp_str[128];
		*err_code = 0;
		uint32_t proc_cnt = 0;

		xstrcat(reply_msg, "STARTINFO=");
#ifdef HAVE_BG
		select_g_select_jobinfo_get(job_ptr->select_jobinfo,
                             		    SELECT_JOBDATA_NODE_CNT,
					    &proc_cnt);

#else
		proc_cnt = job_ptr->total_cpus;
#endif
		snprintf(tmp_str, sizeof(tmp_str), "%u:%u@%u,",
			 jobid, proc_cnt, (uint32_t) job_ptr->start_time);
		xstrcat(reply_msg, tmp_str);
		hostlist = bitmap2node_name(avail_bitmap);
		xstrcat(reply_msg, hostlist);
		xfree(hostlist);
	} else {
		xstrcat(reply_msg, "Jobs not runable on selected nodes");
		error("wiki: jobs not runnable on nodes");
	}

	/* Restore pending job's expected start time */
	job_ptr->start_time = orig_start_time;
	FREE_NULL_BITMAP(avail_bitmap);
	FREE_NULL_BITMAP(exc_core_bitmap);
	return reply_msg;
}
Ejemplo n.º 27
0
uint32_t powercap_get_node_bitmap_maxwatts_dvfs(bitstr_t *idle_bitmap,
			  bitstr_t *select_bitmap, uint32_t *max_watts_dvfs,
			  int* allowed_freqs, uint32_t num_cpus)
{
	uint32_t max_watts = 0, tmp_max_watts = 0, val = 0;
	uint32_t *tmp_max_watts_dvfs = NULL;
	struct node_record *node_ptr;
	int i, p;
	char ename[128], keyname[128];
	bitstr_t *tmp_bitmap = NULL;
	uint32_t data[5], core_data[4];

	if (!_powercap_enabled())
		return 0;

	if (max_watts_dvfs != NULL) {
		tmp_max_watts_dvfs = 
			  xmalloc(sizeof(uint32_t)*(allowed_freqs[0]+1));
	}

	/* if no input bitmap, consider the current idle nodes 
	 * bitmap as the input bitmap tagging nodes to consider 
	 * as idle while computing the max watts of the cluster */
	if (idle_bitmap == NULL && select_bitmap == NULL) {
		tmp_bitmap = bit_copy(idle_node_bitmap);
		idle_bitmap = tmp_bitmap;
		select_bitmap = tmp_bitmap;
	}
	
	for (i = 0, node_ptr = node_record_table_ptr; i < node_record_count;
	     i++, node_ptr++) {
		if (bit_test(idle_bitmap, i)) {
			/* idle nodes, 2 cases : power save or not */
			if (bit_test(power_node_bitmap, i)) {
				layouts_entity_pullget_kv(L_NAME,
						  node_ptr->name, L_NODE_SAVE,
						  &val, L_T_UINT32);
			} else {
				layouts_entity_pullget_kv(L_NAME,
						  node_ptr->name, L_NODE_IDLE,
						  &val, L_T_UINT32);
			}
		
		} else if (bit_test(select_bitmap, i)) {
			layouts_entity_get_mkv(L_NAME, node_ptr->name,
				"IdleWatts,MaxWatts,CoresCount,LastCore,CurrentPower",
				data, (sizeof(uint32_t) * 5), L_T_UINT32);

			/* tmp_max_watts = IdleWatts - cpus*IdleCoreWatts
			 * + cpus*MaxCoreWatts */
			sprintf(ename, "virtualcore%u", data[3]);
			if (num_cpus == 0)
				num_cpus = data[2];
			layouts_entity_get_mkv(L_NAME, ename,
					       "IdleCoreWatts,MaxCoreWatts",
					       core_data,
					       (sizeof(uint32_t) * 2),
					       L_T_UINT32);
			if (data[4] == 0) {
				tmp_max_watts += data[0] -
					  num_cpus*core_data[0] +
					  num_cpus*core_data[1];
			} else if (data[4] > 0) {
				tmp_max_watts += data[4] -
					  num_cpus*core_data[0] +
					  num_cpus*core_data[1];
			} else if (num_cpus == data[2])
				tmp_max_watts += data[1];

			if (!tmp_max_watts_dvfs)
				goto skip_dvfs;
			for (p = 1; p < (allowed_freqs[0] + 1); p++) {
				sprintf(keyname, 
					"IdleCoreWatts,MaxCoreWatts,"
					"Cpufreq%dWatts,CurrentCorePower",
					allowed_freqs[p]);
				layouts_entity_get_mkv(L_NAME, ename, keyname,
					  core_data, (sizeof(uint32_t) * 4),
					  L_T_UINT32);
				if (num_cpus == data[2]) {
					tmp_max_watts_dvfs[p] +=
						  num_cpus*core_data[2];
				} else {
					if (data[4] == 0) {
						tmp_max_watts_dvfs[p] +=
						 	data[0] -
							num_cpus*core_data[0] +
							num_cpus*core_data[2];
					} else {
						tmp_max_watts_dvfs[p] +=
							data[4] -
							num_cpus*core_data[0] +
							num_cpus*core_data[2];
					}
				}
			}
  skip_dvfs:		;
		} else {
			/* non-idle nodes, 2 cases : down or not */
			if (!bit_test(up_node_bitmap, i)) {
				layouts_entity_pullget_kv(L_NAME,
						  node_ptr->name, L_NODE_DOWN,
						  &val, L_T_UINT32);
			} else {
				layouts_entity_pullget_kv(L_NAME,
						  node_ptr->name, L_NODE_CUR,
						  &val, L_T_UINT32);
			}
		}
		max_watts += val;
		val = 0;
	}
	if (max_watts_dvfs) {	
		for (p = 1; p < allowed_freqs[0] + 1; p++) {
			max_watts_dvfs[p] = max_watts + tmp_max_watts_dvfs[p];
		}
		xfree(tmp_max_watts_dvfs);
	}
	max_watts += tmp_max_watts;

	if (tmp_bitmap)
		bit_free(tmp_bitmap);

	return max_watts;
}
Ejemplo n.º 28
0
static void jlink_execute_scan(struct jtag_command *cmd)
{
	DEBUG_JTAG_IO("%s type:%d", cmd->cmd.scan->ir_scan ? "IRSCAN" : "DRSCAN",
		jtag_scan_type(cmd->cmd.scan));

	/* Make sure there are no trailing fields with num_bits == 0, or the logic below will fail. */
	while (cmd->cmd.scan->num_fields > 0
			&& cmd->cmd.scan->fields[cmd->cmd.scan->num_fields - 1].num_bits == 0) {
		cmd->cmd.scan->num_fields--;
		LOG_DEBUG("discarding trailing empty field");
	}

	if (cmd->cmd.scan->num_fields == 0) {
		LOG_DEBUG("empty scan, doing nothing");
		return;
	}

	if (cmd->cmd.scan->ir_scan) {
		if (tap_get_state() != TAP_IRSHIFT) {
			jlink_end_state(TAP_IRSHIFT);
			jlink_state_move();
		}
	} else {
		if (tap_get_state() != TAP_DRSHIFT) {
			jlink_end_state(TAP_DRSHIFT);
			jlink_state_move();
		}
	}

	jlink_end_state(cmd->cmd.scan->end_state);

	struct scan_field *field = cmd->cmd.scan->fields;
	unsigned scan_size = 0;

	for (int i = 0; i < cmd->cmd.scan->num_fields; i++, field++) {
		scan_size += field->num_bits;
		DEBUG_JTAG_IO("%s%s field %d/%d %d bits",
			field->in_value ? "in" : "",
			field->out_value ? "out" : "",
			i,
			cmd->cmd.scan->num_fields,
			field->num_bits);

		if (i == cmd->cmd.scan->num_fields - 1 && tap_get_state() != tap_get_end_state()) {
			/* Last field, and we're leaving IRSHIFT/DRSHIFT. Clock last bit during tap
			 * movement. This last field can't have length zero, it was checked above. */
			jlink_clock_data(field->out_value,
					 0,
					 NULL,
					 0,
					 field->in_value,
					 0,
					 field->num_bits - 1);
			uint8_t last_bit = 0;
			if (field->out_value)
				bit_copy(&last_bit, 0, field->out_value, field->num_bits - 1, 1);
			uint8_t tms_bits = 0x01;
			jlink_clock_data(&last_bit,
					 0,
					 &tms_bits,
					 0,
					 field->in_value,
					 field->num_bits - 1,
					 1);
			tap_set_state(tap_state_transition(tap_get_state(), 1));
			jlink_clock_data(&last_bit,
					 0,
					 &tms_bits,
					 1,
					 NULL,
					 0,
					 1);
			tap_set_state(tap_state_transition(tap_get_state(), 0));
		} else
			jlink_clock_data(field->out_value,
					 0,
					 NULL,
					 0,
					 field->in_value,
					 0,
					 field->num_bits);
	}

	if (tap_get_state() != tap_get_end_state()) {
		jlink_end_state(tap_get_end_state());
		jlink_state_move();
	}

	DEBUG_JTAG_IO("%s scan, %i bits, end in %s",
		(cmd->cmd.scan->ir_scan) ? "IR" : "DR", scan_size,
		tap_state_name(tap_get_end_state()));
}
Ejemplo n.º 29
0
extern job_resources_t *copy_job_resources(job_resources_t *job_resrcs_ptr)
{
	int i, sock_inx = 0;
	job_resources_t *new_layout = xmalloc(sizeof(struct job_resources));

	xassert(job_resrcs_ptr);
	new_layout->nhosts = job_resrcs_ptr->nhosts;
	new_layout->ncpus = job_resrcs_ptr->ncpus;
	new_layout->node_req = job_resrcs_ptr->node_req;
	if (job_resrcs_ptr->core_bitmap) {
		new_layout->core_bitmap = bit_copy(job_resrcs_ptr->
						   core_bitmap);
	}
	if (job_resrcs_ptr->core_bitmap_used) {
		new_layout->core_bitmap_used = bit_copy(job_resrcs_ptr->
							core_bitmap_used);
	}
	if (job_resrcs_ptr->node_bitmap) {
		new_layout->node_bitmap = bit_copy(job_resrcs_ptr->
						   node_bitmap);
	}

	new_layout->cpu_array_cnt = job_resrcs_ptr->cpu_array_cnt;
	if (job_resrcs_ptr->cpu_array_reps &&
	    job_resrcs_ptr->cpu_array_cnt) {
		new_layout->cpu_array_reps =
			xmalloc(sizeof(uint32_t) *
				job_resrcs_ptr->cpu_array_cnt);
		memcpy(new_layout->cpu_array_reps,
		       job_resrcs_ptr->cpu_array_reps,
		       (sizeof(uint32_t) * job_resrcs_ptr->cpu_array_cnt));
	}
	if (job_resrcs_ptr->cpu_array_value &&
	    job_resrcs_ptr->cpu_array_cnt) {
		new_layout->cpu_array_value =
			xmalloc(sizeof(uint16_t) *
				job_resrcs_ptr->cpu_array_cnt);
		memcpy(new_layout->cpu_array_value,
		       job_resrcs_ptr->cpu_array_value,
		       (sizeof(uint16_t) * job_resrcs_ptr->cpu_array_cnt));
	}

	if (job_resrcs_ptr->cpus) {
		new_layout->cpus = xmalloc(sizeof(uint16_t) *
					   job_resrcs_ptr->nhosts);
		memcpy(new_layout->cpus, job_resrcs_ptr->cpus,
		       (sizeof(uint16_t) * job_resrcs_ptr->nhosts));
	}
	if (job_resrcs_ptr->cpus_used) {
		new_layout->cpus_used = xmalloc(sizeof(uint16_t) *
						job_resrcs_ptr->nhosts);
		memcpy(new_layout->cpus_used, job_resrcs_ptr->cpus_used,
		       (sizeof(uint16_t) * job_resrcs_ptr->nhosts));
	}

	if (job_resrcs_ptr->memory_allocated) {
		new_layout->memory_allocated = xmalloc(sizeof(uint32_t) *
						       new_layout->nhosts);
		memcpy(new_layout->memory_allocated,
		       job_resrcs_ptr->memory_allocated,
		       (sizeof(uint32_t) * job_resrcs_ptr->nhosts));
	}
	if (job_resrcs_ptr->memory_used) {
		new_layout->memory_used = xmalloc(sizeof(uint32_t) *
						  new_layout->nhosts);
		memcpy(new_layout->memory_used,
		       job_resrcs_ptr->memory_used,
		       (sizeof(uint32_t) * job_resrcs_ptr->nhosts));
	}

	/* Copy sockets_per_node, cores_per_socket and core_sock_rep_count */
	new_layout->sockets_per_node = xmalloc(sizeof(uint16_t) *
					       new_layout->nhosts);
	new_layout->cores_per_socket = xmalloc(sizeof(uint16_t) *
					       new_layout->nhosts);
	new_layout->sock_core_rep_count = xmalloc(sizeof(uint32_t) *
						  new_layout->nhosts);
	for (i=0; i<new_layout->nhosts; i++) {
		if (job_resrcs_ptr->sock_core_rep_count[i] ==  0) {
			error("copy_job_resources: sock_core_rep_count=0");
			break;
		}
		sock_inx += job_resrcs_ptr->sock_core_rep_count[i];
		if (sock_inx >= job_resrcs_ptr->nhosts) {
			i++;
			break;
		}
	}
	memcpy(new_layout->sockets_per_node,
	       job_resrcs_ptr->sockets_per_node, (sizeof(uint16_t) * i));
	memcpy(new_layout->cores_per_socket,
	       job_resrcs_ptr->cores_per_socket, (sizeof(uint16_t) * i));
	memcpy(new_layout->sock_core_rep_count,
	       job_resrcs_ptr->sock_core_rep_count,
	       (sizeof(uint32_t) * i));

	return new_layout;
}
Ejemplo n.º 30
0
/* Merge MPS records back to original list, updating and reordering as needed */
static void _merge_lists(List gres_conf_list, List gpu_conf_list,
			 List mps_conf_list)
{
	ListIterator gpu_itr, mps_itr;
	gres_slurmd_conf_t *gpu_record, *mps_record;

	/*
	 * If gres/mps has Count, but no File specification and there is more
	 * than one gres/gpu record, then evenly distribute gres/mps Count
	 * evenly over all gres/gpu file records
	 */
	if ((list_count(mps_conf_list) == 1) &&
	    (list_count(gpu_conf_list) >  1)) {
		mps_record = list_peek(mps_conf_list);
		if (!mps_record->file) {
			_distribute_count(gres_conf_list, gpu_conf_list,
					  mps_record->count);
			list_flush(mps_conf_list);
			return;
		}
	}

	/* Add MPS records, matching File ordering to that of GPU records */
	gpu_itr = list_iterator_create(gpu_conf_list);
	while ((gpu_record = list_next(gpu_itr))) {
		mps_itr = list_iterator_create(mps_conf_list);
		while ((mps_record = list_next(mps_itr))) {
			if (!xstrcmp(gpu_record->file, mps_record->file)) {
				/* Copy gres/gpu Type & CPU info to gres/mps */
				if (gpu_record->type_name) {
					mps_record->config_flags |=
						GRES_CONF_HAS_TYPE;
				}
				if (gpu_record->cpus) {
					xfree(mps_record->cpus);
					mps_record->cpus =
						xstrdup(gpu_record->cpus);
				}
				if (gpu_record->cpus_bitmap) {
					mps_record->cpu_cnt =
						gpu_record->cpu_cnt;
					FREE_NULL_BITMAP(
						mps_record->cpus_bitmap);
					mps_record->cpus_bitmap =
					      bit_copy(gpu_record->cpus_bitmap);
				}
				xfree(mps_record->type_name);
				mps_record->type_name =
					xstrdup(gpu_record->type_name);
				list_append(gres_conf_list, mps_record);
				(void) list_remove(mps_itr);
				break;
			}
		}
		list_iterator_destroy(mps_itr);
		if (!mps_record) {
			/* Add gres/mps record to match gres/gps record */
			mps_record = xmalloc(sizeof(gres_slurmd_conf_t));
			mps_record->config_flags = gpu_record->config_flags;
			mps_record->count = 0;
			mps_record->cpu_cnt = gpu_record->cpu_cnt;
			mps_record->cpus = xstrdup(gpu_record->cpus);
			if (gpu_record->cpus_bitmap) {
				mps_record->cpus_bitmap =
					bit_copy(gpu_record->cpus_bitmap);
			}
			mps_record->file = xstrdup(gpu_record->file);
			mps_record->name = xstrdup("mps");
			mps_record->plugin_id = gres_plugin_build_id("mps");
			mps_record->type_name = xstrdup(gpu_record->type_name);
			list_append(gres_conf_list, mps_record);
		}
		list_append(gres_conf_list, gpu_record);
		(void) list_remove(gpu_itr);
	}
	list_iterator_destroy(gpu_itr);

	/* Remove any remaining MPS records (no matching File) */
	mps_itr = list_iterator_create(mps_conf_list);
	while ((mps_record = list_next(mps_itr))) {
		error("%s: Discarding gres/mps configuration (File=%s) without matching gres/gpu record",
		      plugin_name, mps_record->file);
		(void) list_delete_item(mps_itr);
	}
	list_iterator_destroy(mps_itr);
}