Пример #1
0
static void _compute_start_times(void)
{
	int j, rc = SLURM_SUCCESS, job_cnt = 0;
	List job_queue;
	job_queue_rec_t *job_queue_rec;
	List preemptee_candidates = NULL;
	struct job_record *job_ptr;
	struct part_record *part_ptr;
	bitstr_t *alloc_bitmap = NULL, *avail_bitmap = NULL;
	bitstr_t *exc_core_bitmap = NULL;
	uint32_t max_nodes, min_nodes, req_nodes, time_limit;
	time_t now = time(NULL), sched_start, last_job_alloc;
	bool resv_overlap = false;

	sched_start = now;
	last_job_alloc = now - 1;
	alloc_bitmap = bit_alloc(node_record_count);
	job_queue = build_job_queue(true, false);
	sort_job_queue(job_queue);
	while ((job_queue_rec = (job_queue_rec_t *) list_pop(job_queue))) {
		job_ptr  = job_queue_rec->job_ptr;
		part_ptr = job_queue_rec->part_ptr;
		xfree(job_queue_rec);
		if (part_ptr != job_ptr->part_ptr)
			continue;	/* Only test one partition */

		if (job_cnt++ > max_sched_job_cnt) {
			debug2("scheduling loop exiting after %d jobs",
			       max_sched_job_cnt);
			break;
		}

		/* Determine minimum and maximum node counts */
		/* On BlueGene systems don't adjust the min/max node limits
		   here.  We are working on midplane values. */
		min_nodes = MAX(job_ptr->details->min_nodes,
				part_ptr->min_nodes);

		if (job_ptr->details->max_nodes == 0)
			max_nodes = part_ptr->max_nodes;
		else
			max_nodes = MIN(job_ptr->details->max_nodes,
					part_ptr->max_nodes);

		max_nodes = MIN(max_nodes, 500000);     /* prevent overflows */

		if (job_ptr->details->max_nodes)
			req_nodes = max_nodes;
		else
			req_nodes = min_nodes;

		if (min_nodes > max_nodes) {
			/* job's min_nodes exceeds partition's max_nodes */
			continue;
		}

		j = job_test_resv(job_ptr, &now, true, &avail_bitmap,
				  &exc_core_bitmap, &resv_overlap, false);
		if (j != SLURM_SUCCESS) {
			FREE_NULL_BITMAP(avail_bitmap);
			FREE_NULL_BITMAP(exc_core_bitmap);
			continue;
		}

		rc = select_g_job_test(job_ptr, avail_bitmap,
				       min_nodes, max_nodes, req_nodes,
				       SELECT_MODE_WILL_RUN,
				       preemptee_candidates, NULL,
				       exc_core_bitmap);
		if (rc == SLURM_SUCCESS) {
			last_job_update = now;
			if (job_ptr->time_limit == INFINITE)
				time_limit = 365 * 24 * 60 * 60;
			else if (job_ptr->time_limit != NO_VAL)
				time_limit = job_ptr->time_limit * 60;
			else if (job_ptr->part_ptr &&
				 (job_ptr->part_ptr->max_time != INFINITE))
				time_limit = job_ptr->part_ptr->max_time * 60;
			else
				time_limit = 365 * 24 * 60 * 60;
			if (bit_overlap(alloc_bitmap, avail_bitmap) &&
			    (job_ptr->start_time <= last_job_alloc)) {
				job_ptr->start_time = last_job_alloc;
			}
			bit_or(alloc_bitmap, avail_bitmap);
			last_job_alloc = job_ptr->start_time + time_limit;
		}
		FREE_NULL_BITMAP(avail_bitmap);
		FREE_NULL_BITMAP(exc_core_bitmap);

		if ((time(NULL) - sched_start) >= sched_timeout) {
			debug2("scheduling loop exiting after %d jobs",
			       max_sched_job_cnt);
			break;
		}
	}
	FREE_NULL_LIST(job_queue);
	FREE_NULL_BITMAP(alloc_bitmap);
}
Пример #2
0
static int _breakup_blocks(List block_list, List new_blocks,
			   select_ba_request_t *request, List my_block_list,
			   bool only_free, bool only_small)
{
	int rc = SLURM_ERROR;
	bg_record_t *bg_record = NULL;
	ListIterator itr = NULL, bit_itr = NULL;
	int total_cnode_cnt=0;
	char start_char[SYSTEM_DIMENSIONS+1];
	bitstr_t *ionodes = bit_alloc(bg_conf->ionodes_per_mp);
	int cnodes = request->procs / bg_conf->cpu_ratio;
	int curr_mp_bit = -1;
	int dim;

	if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
		info("cpu_count=%d cnodes=%d o_free=%d o_small=%d",
		     request->procs, cnodes, only_free, only_small);

	switch(cnodes) {
	case 16:
		/* a 16 can go anywhere */
		break;
	case 32:
		bit_itr = list_iterator_create(bg_lists->valid_small32);
		break;
	case 64:
		bit_itr = list_iterator_create(bg_lists->valid_small64);
		break;
	case 128:
		bit_itr = list_iterator_create(bg_lists->valid_small128);
		break;
	case 256:
		bit_itr = list_iterator_create(bg_lists->valid_small256);
		break;
	default:
		error("We shouldn't be here with this size %d", cnodes);
		goto finished;
		break;
	}

	/* First try with free blocks a midplane or less.  Then try with the
	 * smallest blocks.
	 */
	itr = list_iterator_create(block_list);
	while ((bg_record = list_next(itr))) {
		if (bg_record->free_cnt) {
			if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
				info("%s being freed by other job(s), skipping",
				     bg_record->bg_block_id);
			continue;
		}
		/* never look at a block if a job is running */
		if (bg_record->job_running != NO_JOB_RUNNING)
			continue;
		/* on the third time through look for just a block
		 * that isn't used */

		/* check for free blocks on the first and second time */
		if (only_free && (bg_record->state != BG_BLOCK_FREE))
			continue;

		/* check small blocks first */
		if (only_small
		    && (bg_record->cnode_cnt > bg_conf->mp_cnode_cnt))
			continue;

		if (request->avail_mp_bitmap &&
		    !bit_super_set(bg_record->bitmap,
				   request->avail_mp_bitmap)) {
			if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
				info("bg block %s has nodes not usable "
				     "by this job",
				     bg_record->bg_block_id);
			continue;
		}

		if (bg_record->cnode_cnt == cnodes) {
			ba_mp_t *ba_mp = NULL;
			if (bg_record->ba_mp_list)
				ba_mp = list_peek(bg_record->ba_mp_list);
			if (!ba_mp) {
				for (dim=0; dim<SYSTEM_DIMENSIONS; dim++)
					start_char[dim] = alpha_num[
						bg_record->start[dim]];
				start_char[dim] = '\0';
				request->save_name = xstrdup(start_char);
			} else
				request->save_name = xstrdup(ba_mp->coord_str);

			rc = SLURM_SUCCESS;
			goto finished;
		}
		/* lets see if we can combine some small ones */
		if (bg_record->cnode_cnt < cnodes) {
			char bitstring[BITSIZE];
			bitstr_t *bitstr = NULL;
			int num_over = 0;
			int num_cnodes = bg_record->cnode_cnt;
			int rec_mp_bit = bit_ffs(bg_record->bitmap);

			if (curr_mp_bit != rec_mp_bit) {
				/* Got a different node than
				 * previously, since the list should
				 * be in order of nodes for small blocks
				 * just clear here since the last node
				 * doesn't have any more. */
				curr_mp_bit = rec_mp_bit;
				bit_nclear(ionodes, 0,
					   (bg_conf->ionodes_per_mp-1));
				total_cnode_cnt = 0;
			}

			/* On really busy systems we can get
			   overlapping blocks here.  If that is the
			   case only add that which doesn't overlap.
			*/
			if ((num_over = bit_overlap(
				     ionodes, bg_record->ionode_bitmap))) {
				/* Since the smallest block size is
				   the number of cnodes in an io node,
				   just multiply the num_over by that to
				   get the number of cnodes to remove.
				*/
				if ((num_cnodes -=
				     num_over * bg_conf->smallest_block) <= 0)
					continue;
			}
			bit_or(ionodes, bg_record->ionode_bitmap);

			/* check and see if the bits set are a valid
			   combo */
			if (bit_itr) {
				while ((bitstr = list_next(bit_itr))) {
					if (bit_super_set(ionodes, bitstr))
						break;
				}
				list_iterator_reset(bit_itr);
			}
			if (!bitstr) {
				bit_nclear(ionodes, 0,
					   (bg_conf->ionodes_per_mp-1));
				bit_or(ionodes, bg_record->ionode_bitmap);
				total_cnode_cnt = num_cnodes =
					bg_record->cnode_cnt;
			} else
				total_cnode_cnt += num_cnodes;

			bit_fmt(bitstring, BITSIZE, ionodes);
			if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
				info("combine adding %s %s %d got %d set "
				     "ionodes %s total is %s",
				     bg_record->bg_block_id, bg_record->mp_str,
				     num_cnodes, total_cnode_cnt,
				     bg_record->ionode_str, bitstring);
			if (total_cnode_cnt == cnodes) {
				ba_mp_t *ba_mp = NULL;
				if (bg_record->ba_mp_list)
					ba_mp = list_peek(
						bg_record->ba_mp_list);
				if (!ba_mp) {
					for (dim=0; dim<SYSTEM_DIMENSIONS;
					     dim++)
						start_char[dim] = alpha_num[
							bg_record->start[dim]];
					start_char[dim] = '\0';
					request->save_name =
						xstrdup(start_char);
				} else
					request->save_name =
						xstrdup(ba_mp->coord_str);

				if (!my_block_list) {
					rc = SLURM_SUCCESS;
					goto finished;
				}

				bg_record = create_small_record(bg_record,
								ionodes,
								cnodes);
				list_append(new_blocks, bg_record);

				rc = SLURM_SUCCESS;
				goto finished;
			}
			continue;
		}
		/* we found a block that is bigger than requested */
		break;
	}

	if (bg_record) {
		ba_mp_t *ba_mp = NULL;
		if (bg_record->ba_mp_list)
			ba_mp = list_peek(bg_record->ba_mp_list);
		if (!ba_mp) {
			for (dim=0; dim<SYSTEM_DIMENSIONS; dim++)
				start_char[dim] = alpha_num[
					bg_record->start[dim]];
			start_char[dim] = '\0';
			request->save_name = xstrdup(start_char);
		} else
			request->save_name = xstrdup(ba_mp->coord_str);
		/* It appears we don't need this original record
		 * anymore, just work off the copy if indeed it is a copy. */

		/* bg_record_t *found_record = NULL; */

		/* if (bg_record->original) { */
		/* 	if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) */
		/* 		info("1 This was a copy %s", */
		/* 		     bg_record->bg_block_id); */
		/* 	found_record = bg_record->original; */
		/* } else { */
		/* 	if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) */
		/* 		info("looking for original"); */
		/* 	found_record = find_org_in_bg_list( */
		/* 		bg_lists->main, bg_record); */
		/* } */
		if ((bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
		    && bg_record->original
		    && (bg_record->original->magic != BLOCK_MAGIC)) {
			info("This record %s has bad magic, it must be "
			     "getting freed.  No worries it will all be "
			     "figured out later.",
			     bg_record->bg_block_id);
		}

		/* if (!found_record || found_record->magic != BLOCK_MAGIC) { */
		/* 	error("this record wasn't found in the list!"); */
		/* 	rc = SLURM_ERROR; */
		/* 	goto finished; */
		/* } */

		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) {
			char tmp_char[256];
			format_node_name(bg_record, tmp_char,
					 sizeof(tmp_char));
			info("going to split %s, %s",
			     bg_record->bg_block_id,
			     tmp_char);
		}

		if (!my_block_list) {
			rc = SLURM_SUCCESS;
			goto finished;
		}
		_split_block(block_list, new_blocks, bg_record, cnodes);
		rc = SLURM_SUCCESS;
		goto finished;
	}

finished:
	if (bit_itr)
		list_iterator_destroy(bit_itr);

	FREE_NULL_BITMAP(ionodes);
	if (itr)
		list_iterator_destroy(itr);

	return rc;
}
Пример #3
0
BitMap& BitMap::operator |=(const BitMap& b){
    return bit_or(b);
}
Пример #4
0
/*
 * Perform any setup required to initiate a job
 * job_ptr IN - pointer to the job being initiated
 * RET - SLURM_SUCCESS or an error code
 *
 * NOTE: This happens in parallel with srun and slurmd spawning
 * the job. A prolog script is expected to defer initiation of
 * the job script until the BG block is available for use.
 */
extern int start_job(struct job_record *job_ptr)
{
	int rc = SLURM_SUCCESS;
	bg_record_t *bg_record = NULL;
	bg_action_t *bg_action_ptr = NULL;
	select_jobinfo_t *jobinfo = job_ptr->select_jobinfo->data;

	slurm_mutex_lock(&block_state_mutex);
	bg_record = jobinfo->bg_record;

	if (!bg_record || !block_ptr_exist_in_list(bg_lists->main, bg_record)) {
		slurm_mutex_unlock(&block_state_mutex);
		error("bg_record %s doesn't exist, requested for job (%d)",
		      jobinfo->bg_block_id, job_ptr->job_id);
		return SLURM_ERROR;
	}

	if ((jobinfo->conn_type[0] != SELECT_NAV)
	    && (jobinfo->conn_type[0] < SELECT_SMALL)) {
		int dim;
		for (dim=0; dim<SYSTEM_DIMENSIONS; dim++)
			jobinfo->conn_type[dim] = bg_record->conn_type[dim];
	}

	/* If it isn't 0 then it was setup previous (sub-block)
	*/
	if (jobinfo->geometry[SYSTEM_DIMENSIONS] == 0)
		memcpy(jobinfo->geometry, bg_record->geo,
		       sizeof(bg_record->geo));

	if (bg_record->job_list) {
		/* Mark the ba_mp cnodes as used now. */
		ba_mp_t *ba_mp = list_peek(bg_record->ba_mp_list);
		xassert(ba_mp);
		xassert(ba_mp->cnode_bitmap);
		bit_or(ba_mp->cnode_bitmap, jobinfo->units_avail);
		if (!find_job_in_bg_record(bg_record, job_ptr->job_id))
			list_append(bg_record->job_list, job_ptr);
	} else {
		bg_record->job_running = job_ptr->job_id;
		bg_record->job_ptr = job_ptr;
	}

	job_ptr->job_state |= JOB_CONFIGURING;

	bg_action_ptr = xmalloc(sizeof(bg_action_t));
	bg_action_ptr->op = START_OP;
	bg_action_ptr->job_ptr = job_ptr;

	/* FIXME: The below get_select_jobinfo calls could be avoided
	 * by just using the jobinfo as we do above.
	 */
	get_select_jobinfo(job_ptr->select_jobinfo->data,
			   SELECT_JOBDATA_BLOCK_ID,
			   &(bg_action_ptr->bg_block_id));
	get_select_jobinfo(job_ptr->select_jobinfo->data,
			   SELECT_JOBDATA_REBOOT,
			   &(bg_action_ptr->reboot));
	get_select_jobinfo(job_ptr->select_jobinfo->data,
			   SELECT_JOBDATA_CONN_TYPE,
			   &(bg_action_ptr->conn_type));
	get_select_jobinfo(job_ptr->select_jobinfo->data,
			   SELECT_JOBDATA_MLOADER_IMAGE,
			   &(bg_action_ptr->mloaderimage));
#ifdef HAVE_BG_L_P
# ifdef HAVE_BGL
	get_select_jobinfo(job_ptr->select_jobinfo->data,
			   SELECT_JOBDATA_BLRTS_IMAGE,
			   &(bg_action_ptr->blrtsimage));
	if (!bg_action_ptr->blrtsimage) {
		bg_action_ptr->blrtsimage =
			xstrdup(bg_conf->default_blrtsimage);
		set_select_jobinfo(job_ptr->select_jobinfo->data,
				   SELECT_JOBDATA_BLRTS_IMAGE,
				   bg_action_ptr->blrtsimage);
	}
# elif defined HAVE_BGP
	get_select_jobinfo(job_ptr->select_jobinfo->data,
			   SELECT_JOBDATA_CONN_TYPE,
			   &(bg_action_ptr->conn_type));
# endif
	get_select_jobinfo(job_ptr->select_jobinfo->data,
			   SELECT_JOBDATA_LINUX_IMAGE,
			   &(bg_action_ptr->linuximage));
	if (!bg_action_ptr->linuximage) {
		bg_action_ptr->linuximage =
			xstrdup(bg_conf->default_linuximage);
		set_select_jobinfo(job_ptr->select_jobinfo->data,
				   SELECT_JOBDATA_LINUX_IMAGE,
				   bg_action_ptr->linuximage);
	}

	get_select_jobinfo(job_ptr->select_jobinfo->data,
			   SELECT_JOBDATA_RAMDISK_IMAGE,
			   &(bg_action_ptr->ramdiskimage));
	if (!bg_action_ptr->ramdiskimage) {
		bg_action_ptr->ramdiskimage =
			xstrdup(bg_conf->default_ramdiskimage);
		set_select_jobinfo(job_ptr->select_jobinfo->data,
				   SELECT_JOBDATA_RAMDISK_IMAGE,
				   bg_action_ptr->ramdiskimage);
	}

#endif
	if (!bg_action_ptr->mloaderimage) {
		bg_action_ptr->mloaderimage =
			xstrdup(bg_conf->default_mloaderimage);
		set_select_jobinfo(job_ptr->select_jobinfo->data,
				   SELECT_JOBDATA_MLOADER_IMAGE,
				   bg_action_ptr->mloaderimage);
	}

	num_unused_cpus -= job_ptr->total_cpus;

	if (!block_ptr_exist_in_list(bg_lists->job_running, bg_record))
		list_push(bg_lists->job_running, bg_record);

	if (!block_ptr_exist_in_list(bg_lists->booted, bg_record))
		list_push(bg_lists->booted, bg_record);
	/* Just in case something happens to free this block before we
	   start the job we will make it so this job doesn't get blown
	   away.
	*/
	bg_record->modifying = 1;
	last_bg_update = time(NULL);

	slurm_mutex_unlock(&block_state_mutex);

	info("Queue start of job %u in BG block %s",
	     job_ptr->job_id,
	     bg_action_ptr->bg_block_id);
	_block_op(bg_action_ptr);
	return rc;
}
Пример #5
0
/*
 * create_dynamic_block - create new block(s) to be used for a new
 * job allocation.
 * RET - a list of created block(s) or NULL on failure errno is set.
 */
extern List create_dynamic_block(List block_list,
				 select_ba_request_t *request,
				 List my_block_list,
				 bool track_down_nodes)
{
	int rc = SLURM_SUCCESS;

	ListIterator itr, itr2;
	bg_record_t *bg_record = NULL, *found_record = NULL;
	List results = NULL;
	List new_blocks = NULL;
	bitstr_t *my_bitmap = NULL;
	select_ba_request_t blockreq;
	int cnodes = request->procs / bg_conf->cpu_ratio;
	uint16_t start_geo[SYSTEM_DIMENSIONS];

	if (cnodes < bg_conf->smallest_block) {
		error("Can't create this size %d "
		      "on this system ionodes_per_mp is %d",
		      request->procs,
		      bg_conf->ionodes_per_mp);
		goto finished;
	}
	memset(&blockreq, 0, sizeof(select_ba_request_t));
	memcpy(start_geo, request->geometry, sizeof(start_geo));

	/* We need to lock this just incase a blocks_overlap is called
	   which will in turn reset and set the system as it sees fit.
	*/
	slurm_mutex_lock(&block_state_mutex);
	if (my_block_list) {
		reset_ba_system(track_down_nodes);
		itr = list_iterator_create(my_block_list);
		while ((bg_record = list_next(itr))) {
			if (bg_record->magic != BLOCK_MAGIC) {
				/* This should never happen since we
				   only call this on copies of blocks
				   and we check on this during the
				   copy.
				*/
				error("create_dynamic_block: "
				      "got a block with bad magic?");
				continue;
			}
			if (bg_record->free_cnt) {
				if (bg_conf->slurm_debug_flags
				    & DEBUG_FLAG_BG_PICK) {
					int dim;
					char start_geo[SYSTEM_DIMENSIONS+1];
					char geo[SYSTEM_DIMENSIONS+1];
					for (dim=0; dim<SYSTEM_DIMENSIONS;
					     dim++) {
						start_geo[dim] = alpha_num[
							bg_record->start[dim]];
						geo[dim] = alpha_num[
							bg_record->geo[dim]];
					}
					start_geo[dim] = '\0';
					geo[dim] = '\0';
					info("not adding %s(%s) %s %s %s %u "
					     "(free_cnt)",
					     bg_record->bg_block_id,
					     bg_record->mp_str,
					     bg_block_state_string(
						     bg_record->state),
					     start_geo,
					     geo,
					     bg_record->cnode_cnt);
				}
				continue;
			}

			if (!my_bitmap) {
				my_bitmap =
					bit_alloc(bit_size(bg_record->bitmap));
			}

			if (!bit_super_set(bg_record->bitmap, my_bitmap)) {
				bit_or(my_bitmap, bg_record->bitmap);

				if (bg_conf->slurm_debug_flags
				    & DEBUG_FLAG_BG_PICK) {
					int dim;
					char start_geo[SYSTEM_DIMENSIONS+1];
					char geo[SYSTEM_DIMENSIONS+1];
					for (dim=0; dim<SYSTEM_DIMENSIONS;
					     dim++) {
						start_geo[dim] = alpha_num[
							bg_record->start[dim]];
						geo[dim] = alpha_num[
							bg_record->geo[dim]];
					}
					start_geo[dim] = '\0';
					geo[dim] = '\0';
					info("adding %s(%s) %s %s %s %u",
					     bg_record->bg_block_id,
					     bg_record->mp_str,
					     bg_block_state_string(
						     bg_record->state),
					     start_geo, geo,
					     bg_record->cnode_cnt);
				}
				if (check_and_set_mp_list(
					    bg_record->ba_mp_list)
				    == SLURM_ERROR) {
					if (bg_conf->slurm_debug_flags
					    & DEBUG_FLAG_BG_PICK)
						info("something happened in "
						     "the load of %s",
						     bg_record->bg_block_id);
					list_iterator_destroy(itr);
					FREE_NULL_BITMAP(my_bitmap);
					rc = SLURM_ERROR;
					goto finished;
				}
			} else {
				if (bg_conf->slurm_debug_flags
				    & DEBUG_FLAG_BG_PICK) {
					int dim;
					char start_geo[SYSTEM_DIMENSIONS+1];
					char geo[SYSTEM_DIMENSIONS+1];
					for (dim=0; dim<SYSTEM_DIMENSIONS;
					     dim++) {
						start_geo[dim] = alpha_num[
							bg_record->start[dim]];
						geo[dim] = alpha_num[
							bg_record->geo[dim]];
					}
					start_geo[dim] = '\0';
					geo[dim] = '\0';
					info("not adding %s(%s) %s %s %s %u ",
					     bg_record->bg_block_id,
					     bg_record->mp_str,
					     bg_block_state_string(
						     bg_record->state),
					     start_geo,
					     geo,
					     bg_record->cnode_cnt);
				}
				/* just so we don't look at it later */
				bg_record->free_cnt = -1;
			}
		}
		list_iterator_destroy(itr);
		FREE_NULL_BITMAP(my_bitmap);
	} else {
		reset_ba_system(false);
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
			info("No list was given");
	}

	if (request->avail_mp_bitmap)
		ba_set_removable_mps(request->avail_mp_bitmap, 1);

	if (request->size==1 && cnodes < bg_conf->mp_cnode_cnt) {
		switch(cnodes) {
#ifdef HAVE_BGL
		case 32:
			blockreq.small32 = 4;
			blockreq.small128 = 3;
			break;
		case 128:
			blockreq.small128 = 4;
			break;
#else
		case 16:
			blockreq.small16 = 2;
			blockreq.small32 = 1;
			blockreq.small64 = 1;
			blockreq.small128 = 1;
			blockreq.small256 = 1;
			break;
		case 32:
			blockreq.small32 = 2;
			blockreq.small64 = 1;
			blockreq.small128 = 1;
			blockreq.small256 = 1;
			break;
		case 64:
			blockreq.small64 = 2;
			blockreq.small128 = 1;
			blockreq.small256 = 1;
			break;
		case 128:
			blockreq.small128 = 2;
			blockreq.small256 = 1;
			break;
		case 256:
			blockreq.small256 = 2;
			break;
#endif
		default:
			error("This size %d is unknown on this system", cnodes);
			goto finished;
			break;
		}

		/* Sort the list so the small blocks are in the order
		 * of ionodes. */
		list_sort(block_list, (ListCmpF)bg_record_cmpf_inc);
		request->conn_type[0] = SELECT_SMALL;
		new_blocks = list_create(destroy_bg_record);
		/* check only blocks that are free and small */
		if (_breakup_blocks(block_list, new_blocks,
				    request, my_block_list,
				    true, true)
		    == SLURM_SUCCESS)
			goto finished;

		/* check only blocks that are free and any size */
		if (_breakup_blocks(block_list, new_blocks,
				    request, my_block_list,
				    true, false)
		    == SLURM_SUCCESS)
			goto finished;

		/* check usable blocks that are small with any state */
		if (_breakup_blocks(block_list, new_blocks,
				    request, my_block_list,
				    false, true)
		    == SLURM_SUCCESS)
			goto finished;

		/* check all usable blocks */
		if (_breakup_blocks(block_list, new_blocks,
				    request, my_block_list,
				    false, false)
		    == SLURM_SUCCESS)
			goto finished;

		/* Re-sort the list back to the original order. */
		list_sort(block_list, (ListCmpF)bg_record_sort_aval_inc);
		list_destroy(new_blocks);
		new_blocks = NULL;
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
			info("small block not able to be placed inside others");
	}

	if (request->conn_type[0] == SELECT_NAV)
		request->conn_type[0] = SELECT_TORUS;

	//debug("going to create %d", request->size);
	if (!new_ba_request(request)) {
		if (request->geometry[0] != (uint16_t)NO_VAL) {
			char *geo = give_geo(request->geometry);
			error("Problems with request for size %d geo %s",
			      request->size, geo);
			xfree(geo);
		} else {
			error("Problems with request for size %d.  "
			      "No geo given.",
			      request->size);
		}
		rc = ESLURM_INTERCONNECT_FAILURE;
		goto finished;
	}

	/* try on free midplanes */
	rc = SLURM_SUCCESS;
	if (results)
		list_flush(results);
	else {
#ifdef HAVE_BGQ
		results = list_create(destroy_ba_mp);
#else
		results = list_create(NULL);
#endif
	}

	rc = allocate_block(request, results);
	/* This could be changed in allocate_block so set it back up */
	memcpy(request->geometry, start_geo, sizeof(start_geo));

	if (rc) {
		rc = SLURM_SUCCESS;
		goto setup_records;
	}

	if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
		info("allocate failure for size %d base "
		     "partitions of free midplanes",
		     request->size);
	rc = SLURM_ERROR;

	if (!list_count(my_block_list) || !my_block_list)
		goto finished;

	/*Try to put block starting in the smallest of the exisiting blocks*/
	itr = list_iterator_create(my_block_list);
	itr2 = list_iterator_create(my_block_list);
	while ((bg_record = (bg_record_t *) list_next(itr)) != NULL) {
		bool is_small = 0;
		/* never check a block with a job running */
		if (bg_record->free_cnt
		    || bg_record->job_running != NO_JOB_RUNNING)
			continue;

		/* Here we are only looking for the first
		   block on the midplane.  So either the count
		   is greater or equal than
		   bg_conf->mp_cnode_cnt or the first bit is
		   set in the ionode_bitmap.
		*/
		if (bg_record->cnode_cnt < bg_conf->mp_cnode_cnt) {
			bool found = 0;
			if (bit_ffs(bg_record->ionode_bitmap) != 0)
				continue;
			/* Check to see if we have other blocks in
			   this midplane that have jobs running.
			*/
			while ((found_record = list_next(itr2))) {
				if (!found_record->free_cnt
				    && (found_record->job_running
					!= NO_JOB_RUNNING)
				    && bit_overlap(bg_record->bitmap,
						   found_record->bitmap)) {
					found = 1;
					break;
				}
			}
			list_iterator_reset(itr2);
			if (found)
				continue;
			is_small = 1;
		}

		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
			info("removing %s(%s) for request %d",
			     bg_record->bg_block_id,
			     bg_record->mp_str, request->size);

		remove_block(bg_record->ba_mp_list, is_small);
		rc = SLURM_SUCCESS;
		if (results)
			list_flush(results);
		else {
#ifdef HAVE_BGQ
			results = list_create(destroy_ba_mp);
#else
			results = list_create(NULL);
#endif
		}

		rc = allocate_block(request, results);
		/* This could be changed in allocate_block so set it back up */
		memcpy(request->geometry, start_geo, sizeof(start_geo));
		if (rc) {
			rc = SLURM_SUCCESS;
			break;
		}

		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
			info("allocate failure for size %d base partitions",
			     request->size);
		rc = SLURM_ERROR;
	}
	list_iterator_destroy(itr);
	list_iterator_destroy(itr2);

setup_records:
	if (rc == SLURM_SUCCESS) {
		/*set up bg_record(s) here */
		new_blocks = list_create(destroy_bg_record);

		blockreq.save_name = request->save_name;
#ifdef HAVE_BGL
		blockreq.blrtsimage = request->blrtsimage;
#endif
		blockreq.linuximage = request->linuximage;
		blockreq.mloaderimage = request->mloaderimage;
		blockreq.ramdiskimage = request->ramdiskimage;
		memcpy(blockreq.conn_type, request->conn_type,
		       sizeof(blockreq.conn_type));

		add_bg_record(new_blocks, &results, &blockreq, 0, 0);
	}

finished:
	if (request->avail_mp_bitmap
	    && (bit_ffc(request->avail_mp_bitmap) == -1))
		ba_reset_all_removed_mps();
	slurm_mutex_unlock(&block_state_mutex);

	/* reset the ones we mucked with */
	itr = list_iterator_create(my_block_list);
	while ((bg_record = (bg_record_t *) list_next(itr))) {
		if (bg_record->free_cnt == -1)
			bg_record->free_cnt = 0;
	}
	list_iterator_destroy(itr);


	xfree(request->save_name);

	if (results)
		list_destroy(results);
	errno = rc;
	return new_blocks;
}
Пример #6
0
extern int select_nodeinfo_set_all(void)
{
	ListIterator itr = NULL;
	struct node_record *node_ptr = NULL;
	int i=0;
	bg_record_t *bg_record = NULL;
	static time_t last_set_all = 0;
	ba_mp_t *ba_mp;
	node_subgrp_t *subgrp = NULL;
	int bit_count;

	//uint32_t cluster_flags = slurmdb_setup_cluster_flags();

	if (!blocks_are_created)
		return SLURM_NO_CHANGE_IN_DATA;

	if (!g_bitmap_size) {
		/* if (cluster_flags & CLUSTER_FLAG_BGQ) */
		/* 	g_bitmap_size = bg_conf->mp_cnode_cnt; */
		/* else */
			g_bitmap_size = bg_conf->ionodes_per_mp;
	}

	/* only set this once when the last_bg_update is newer than
	   the last time we set things up. */
	if (last_set_all && (last_bg_update-1 < last_set_all)) {
		debug2("Node select info for set all hasn't "
		       "changed since %ld",
		       last_set_all);
		return SLURM_NO_CHANGE_IN_DATA;
	}
	last_set_all = last_bg_update;

	/* set this here so we know things have changed */
	last_node_update = time(NULL);

	slurm_mutex_lock(&block_state_mutex);
	for (i=0; i<node_record_count; i++) {
		select_nodeinfo_t *nodeinfo;

		node_ptr = &(node_record_table_ptr[i]);
		xassert(node_ptr->select_nodeinfo);
		nodeinfo = node_ptr->select_nodeinfo->data;
		xassert(nodeinfo);
		xassert(nodeinfo->subgrp_list);
		list_flush(nodeinfo->subgrp_list);
		if (nodeinfo->bitmap_size != g_bitmap_size)
			nodeinfo->bitmap_size = g_bitmap_size;
	}

	itr = list_iterator_create(bg_lists->main);
	while ((bg_record = list_next(itr))) {
		enum node_states state = NODE_STATE_UNKNOWN;
		select_nodeinfo_t *nodeinfo;
		bitstr_t *bitmap;
		ListIterator itr2 = NULL;

		/* Only mark unidle blocks */
		if (bg_record->job_list && list_count(bg_record->job_list)) {
			struct job_record *job_ptr;
			select_jobinfo_t *jobinfo;
			ListIterator itr =
				list_iterator_create(bg_record->job_list);
			ba_mp = list_peek(bg_record->ba_mp_list);
			node_ptr = &(node_record_table_ptr[ba_mp->index]);
			xassert(node_ptr->select_nodeinfo);
			nodeinfo = node_ptr->select_nodeinfo->data;
			xassert(nodeinfo);
			xassert(nodeinfo->subgrp_list);
			if (ba_mp->cnode_err_bitmap
			    && (bit_count =
				bit_set_count(ba_mp->cnode_err_bitmap))) {
				subgrp = _find_subgrp(nodeinfo->subgrp_list,
						      NODE_STATE_ERROR,
						      g_bitmap_size);
				/* FIXME: the subgrp->bitmap isn't set here. */
				subgrp->cnode_cnt += bit_count;
			}

			subgrp = _find_subgrp(nodeinfo->subgrp_list,
					      NODE_STATE_ALLOCATED,
					      g_bitmap_size);
			while ((job_ptr = list_next(itr))) {
				jobinfo = job_ptr->select_jobinfo->data;
				/* FIXME: the subgrp->bitmap isn't set here. */
				subgrp->cnode_cnt += jobinfo->cnode_cnt;
			}
			list_iterator_destroy(itr);
			continue;
		} else if (bg_record->job_running == NO_JOB_RUNNING)
			continue;

		if (bg_record->state & BG_BLOCK_ERROR_FLAG)
			state = NODE_STATE_ERROR;
		else if (bg_record->job_running > NO_JOB_RUNNING) {
			/* we don't need to set the allocated here
			 * since the whole midplane is allocated */
			if (bg_record->conn_type[0] < SELECT_SMALL)
				continue;
			state = NODE_STATE_ALLOCATED;
		} else {
			error("not sure why we got here with block %s %s",
			      bg_record->bg_block_id,
			      bg_block_state_string(bg_record->state));
			continue;
		}
		/* if ((cluster_flags & CLUSTER_FLAG_BGQ) */
		/*     && (state != NODE_STATE_ERROR)) */
		/* 	bitmap = bg_record->cnodes_used_bitmap; */
		/* else */
		bitmap = bg_record->ionode_bitmap;

		itr2 = list_iterator_create(bg_record->ba_mp_list);
		while ((ba_mp = list_next(itr2))) {
			if (!ba_mp->used)
				continue;

			node_ptr = &(node_record_table_ptr[ba_mp->index]);

			xassert(node_ptr->select_nodeinfo);
			nodeinfo = node_ptr->select_nodeinfo->data;
			xassert(nodeinfo);
			xassert(nodeinfo->subgrp_list);

			if (ba_mp->cnode_err_bitmap
			    && (state == NODE_STATE_ALLOCATED)
			    && (bit_count =
				bit_set_count(ba_mp->cnode_err_bitmap))) {
				subgrp = _find_subgrp(nodeinfo->subgrp_list,
						      NODE_STATE_ERROR,
						      g_bitmap_size);
				/* FIXME: the subgrp->bitmap isn't set here. */
				subgrp->cnode_cnt += bit_count;
			}

			subgrp = _find_subgrp(nodeinfo->subgrp_list,
					      state, g_bitmap_size);

			if (subgrp->cnode_cnt < bg_conf->mp_cnode_cnt) {
				/* if (cluster_flags & CLUSTER_FLAG_BGQ) { */
				/* 	bit_or(subgrp->bitmap, bitmap); */
				/* 	subgrp->cnode_cnt += */
				/* 		bit_set_count(bitmap); */
				/* } else */ if (bg_record->cnode_cnt
					   < bg_conf->mp_cnode_cnt) {
					bit_or(subgrp->bitmap, bitmap);
					subgrp->cnode_cnt +=
						bg_record->cnode_cnt;
				} else {
					bit_nset(subgrp->bitmap,
						 0,
						 (g_bitmap_size-1));
					subgrp->cnode_cnt =
						bg_conf->mp_cnode_cnt;
				}
			}
		}
		list_iterator_destroy(itr2);
	}
	list_iterator_destroy(itr);
	slurm_mutex_unlock(&block_state_mutex);

	return SLURM_SUCCESS;
}
Пример #7
0
static void _validate_switches(void)
{
	slurm_conf_switches_t *ptr, **ptr_array;
	int depth, i, j;
	struct switch_record *switch_ptr, *prior_ptr;
	hostlist_t hl, invalid_hl = NULL;
	char *child, *buf;
	bool  have_root = false;
	bitstr_t *multi_homed_bitmap = NULL;	/* nodes on >1 leaf switch */
	bitstr_t *switches_bitmap = NULL;	/* nodes on any leaf switch */
	bitstr_t *tmp_bitmap = NULL;

	_free_switch_record_table();

	switch_record_cnt = _read_topo_file(&ptr_array);
	if (switch_record_cnt == 0) {
		error("No switches configured");
		s_p_hashtbl_destroy(conf_hashtbl);
		return;
	}

	switch_record_table = xmalloc(sizeof(struct switch_record) *
				      switch_record_cnt);
	multi_homed_bitmap = bit_alloc(node_record_count);
	switch_ptr = switch_record_table;
	for (i=0; i<switch_record_cnt; i++, switch_ptr++) {
		ptr = ptr_array[i];
		switch_ptr->name = xstrdup(ptr->switch_name);
		/* See if switch name has already been defined. */
		prior_ptr = switch_record_table;
		for (j=0; j<i; j++, prior_ptr++) {
			if (strcmp(switch_ptr->name, prior_ptr->name) == 0) {
				fatal("Switch (%s) has already been defined",
				      prior_ptr->name);
			}
		}
		switch_ptr->link_speed = ptr->link_speed;
		if (ptr->nodes) {
			switch_ptr->level = 0;	/* leaf switch */
			switch_ptr->nodes = xstrdup(ptr->nodes);
			if (_node_name2bitmap(ptr->nodes, 
					      &switch_ptr->node_bitmap, 
					      &invalid_hl)) {
				fatal("Invalid node name (%s) in switch "
				      "config (%s)",
				      ptr->nodes, ptr->switch_name);
			}
			if (switches_bitmap) {
				tmp_bitmap = bit_copy(switch_ptr->node_bitmap);
				bit_and(tmp_bitmap, switches_bitmap);
				bit_or(multi_homed_bitmap, tmp_bitmap);
				FREE_NULL_BITMAP(tmp_bitmap);
				bit_or(switches_bitmap,
				       switch_ptr->node_bitmap);
			} else {
				switches_bitmap = bit_copy(switch_ptr->
							   node_bitmap);
			}
		} else if (ptr->switches) {
			switch_ptr->level = -1;	/* determine later */
			switch_ptr->switches = xstrdup(ptr->switches);
		} else {
			fatal("Switch configuration (%s) lacks children",
			      ptr->switch_name);
		}
	}

	for (depth=1; ; depth++) {
		bool resolved = true;
		switch_ptr = switch_record_table;
		for (i=0; i<switch_record_cnt; i++, switch_ptr++) {
			if (switch_ptr->level != -1)
				continue;
			hl = hostlist_create(switch_ptr->switches);
			if (!hl) {
				fatal("Invalid switches: %s",
				      switch_ptr->switches);
			}
			while ((child = hostlist_pop(hl))) {
				j = _get_switch_inx(child);
				if ((j < 0) || (j == i)) {
					fatal("Switch configuration %s has "
					      "invalid child (%s)",
					      switch_ptr->name, child);
				}
				if (switch_record_table[j].level == -1) {
					/* Children not resolved */
					resolved = false;
					switch_ptr->level = -1;
					FREE_NULL_BITMAP(switch_ptr->
							 node_bitmap);
					free(child);
					break;
				}
				if (switch_ptr->level == -1) {
					switch_ptr->level = 1 +
						switch_record_table[j].level;
					switch_ptr->node_bitmap =
						bit_copy(switch_record_table[j].
							 node_bitmap);
				} else {
					switch_ptr->level =
						MAX(switch_ptr->level,
						     (switch_record_table[j].
						      level + 1));
					bit_or(switch_ptr->node_bitmap,
					       switch_record_table[j].
					       node_bitmap);
				}
				free(child);
			}
			hostlist_destroy(hl);
		}
		if (resolved)
			break;
		if (depth > 20)	/* Prevent infinite loop */
			fatal("Switch configuration is not a tree");
	}

	switch_levels = 0;
	switch_ptr = switch_record_table;
	for (i=0; i<switch_record_cnt; i++, switch_ptr++) {
		switch_levels = MAX(switch_levels, switch_ptr->level);
		if (switch_ptr->node_bitmap == NULL)
			error("switch %s has no nodes", switch_ptr->name);
	}
	if (switches_bitmap) {
		bit_not(switches_bitmap);
		i = bit_set_count(switches_bitmap);
		if (i > 0) {
			child = bitmap2node_name(switches_bitmap);
			error("WARNING: switches lack access to %d nodes: %s",
			      i, child);
			xfree(child);
		}
		FREE_NULL_BITMAP(switches_bitmap);
	} else
		fatal("switches contain no nodes");

	if (invalid_hl) {
		buf = hostlist_ranged_string_xmalloc(invalid_hl);
		error("WARNING: Invalid hostnames in switch configuration: %s",
		      buf);
		xfree(buf);
		hostlist_destroy(invalid_hl);
	}

	/* Report nodes on multiple leaf switches,
	 * possibly due to bad configuration file */
	i = bit_set_count(multi_homed_bitmap);
	if (i > 0) {
		child = bitmap2node_name(multi_homed_bitmap);
		error("WARNING: Multiple leaf switches contain nodes: %s",
		      child);
		xfree(child);
	}
	FREE_NULL_BITMAP(multi_homed_bitmap);

	/* Create array of indexes of children of each switch,
	 * and see if any switch can reach all nodes */
	for (i = 0; i < switch_record_cnt; i++) {
		if (switch_record_table[i].level != 0) {
			_find_child_switches (i);
		}
		if (node_record_count ==
			bit_set_count(switch_record_table[i].node_bitmap)) {
			have_root = true;
		}
	}
	if (!have_root) {
		info("TOPOLOGY: warning -- no switch can reach all nodes"
				" through its descendants."
				"Do not use route/topology");
	}
	s_p_hashtbl_destroy(conf_hashtbl);
	_log_switches();
}
Пример #8
0
extern int create_full_system_block(List bg_found_block_list)
{
	int rc = SLURM_SUCCESS;
	ListIterator itr;
	bg_record_t *bg_record = NULL;
	char *name = NULL;
	List records = NULL;
	uint16_t geo[SYSTEM_DIMENSIONS];
	int i;
	select_ba_request_t blockreq;
	List results = NULL;
	struct part_record *part_ptr = NULL;
	bitstr_t *bitmap = bit_alloc(node_record_count);
	static int *dims = NULL;
	bool larger = 0;
	char start_char[SYSTEM_DIMENSIONS+1];
	char geo_char[SYSTEM_DIMENSIONS+1];

	if (!dims) {
		dims = select_g_ba_get_dims();
		memset(start_char, 0, sizeof(start_char));
		memset(geo_char, 0, sizeof(geo_char));
	}

	/* Locks are already in place to protect part_list here */
	itr = list_iterator_create(part_list);
	while ((part_ptr = list_next(itr))) {
		/* we only want to use mps that are in
		 * partitions
		 */
		if (!part_ptr->node_bitmap) {
			debug4("Partition %s doesn't have any nodes in it.",
			       part_ptr->name);
			continue;
		}
		bit_or(bitmap, part_ptr->node_bitmap);
	}
	list_iterator_destroy(itr);

	bit_not(bitmap);
	if (bit_ffs(bitmap) != -1) {
		error("We don't have the entire system covered by partitions, "
		      "can't create full system block");
		FREE_NULL_BITMAP(bitmap);
		return SLURM_ERROR;
	}
	FREE_NULL_BITMAP(bitmap);

	/* Here we are adding a block that in for the entire machine
	   just in case it isn't in the bluegene.conf file.
	*/
	slurm_mutex_lock(&block_state_mutex);

	for (i=0; i<SYSTEM_DIMENSIONS; i++) {
		geo[i] = dims[i] - 1;
		if (geo[i] > 0)
			larger = 1;
		geo_char[i] = alpha_num[geo[i]];
		start_char[i] = alpha_num[0];
	}

	i = (10+strlen(bg_conf->slurm_node_prefix));
	name = xmalloc(i);

	if (!larger)
		snprintf(name, i, "%s%s",
			 bg_conf->slurm_node_prefix, start_char);
	else
		snprintf(name, i, "%s[%sx%s]",
			 bg_conf->slurm_node_prefix, start_char, geo_char);


	if (bg_found_block_list) {
		itr = list_iterator_create(bg_found_block_list);
		while ((bg_record = (bg_record_t *) list_next(itr)) != NULL) {
			if (!strcmp(name, bg_record->mp_str)) {
				xfree(name);
				list_iterator_destroy(itr);
				/* don't create total already there */
				goto no_total;
			}
		}
		list_iterator_destroy(itr);
	} else {
		error("create_full_system_block: no bg_found_block_list 2");
	}

	if (bg_lists->main) {
		itr = list_iterator_create(bg_lists->main);
		while ((bg_record = (bg_record_t *) list_next(itr))
		       != NULL) {
			if (!strcmp(name, bg_record->mp_str)) {
				xfree(name);
				list_iterator_destroy(itr);
				/* don't create total already there */
				goto no_total;
			}
		}
		list_iterator_destroy(itr);
	} else {
		xfree(name);
		error("create_overlapped_blocks: no bg_lists->main 3");
		rc = SLURM_ERROR;
		goto no_total;
	}

	records = list_create(destroy_bg_record);

	memset(&blockreq, 0, sizeof(select_ba_request_t));
	blockreq.save_name = name;
	for (i=0; i<SYSTEM_DIMENSIONS; i++)
		blockreq.conn_type[i] = SELECT_TORUS;

	add_bg_record(records, NULL, &blockreq, 0 , 0);
	xfree(name);

	bg_record = (bg_record_t *) list_pop(records);
	if (!bg_record) {
		error("Nothing was returned from full system create");
		rc = SLURM_ERROR;
		goto no_total;
	}
	reset_ba_system(false);
	for(i=0; i<SYSTEM_DIMENSIONS; i++) {
		geo_char[i] = alpha_num[bg_record->geo[i]];
		start_char[i] = alpha_num[bg_record->start[i]];
	}
	debug2("adding %s %s %s",  bg_record->mp_str, start_char, geo_char);
	if (bg_record->ba_mp_list)
		list_flush(bg_record->ba_mp_list);
	else
		bg_record->ba_mp_list = list_create(destroy_ba_mp);
#ifdef HAVE_BGQ
	results = list_create(destroy_ba_mp);
#else
	results = list_create(NULL);
#endif
	name = set_bg_block(results,
			    bg_record->start,
			    bg_record->geo,
			    bg_record->conn_type);
	if (!name) {
		error("I was unable to make the full system block.");
		list_destroy(results);
		list_iterator_destroy(itr);
		slurm_mutex_unlock(&block_state_mutex);
		return SLURM_ERROR;
	}
	xfree(name);
	if (bg_record->ba_mp_list)
		list_destroy(bg_record->ba_mp_list);
#ifdef HAVE_BGQ
	bg_record->ba_mp_list = results;
	results = NULL;
#else
	bg_record->ba_mp_list = list_create(destroy_ba_mp);
	copy_node_path(results, &bg_record->ba_mp_list);
	list_destroy(results);
#endif
	if ((rc = bridge_block_create(bg_record)) == SLURM_ERROR) {
		error("create_full_system_block: "
		      "unable to configure block in api");
		destroy_bg_record(bg_record);
		goto no_total;
	}

	print_bg_record(bg_record);
	list_append(bg_lists->main, bg_record);

no_total:
	if (records)
		list_destroy(records);
	slurm_mutex_unlock(&block_state_mutex);
	return rc;
}
Пример #9
0
/*
 * create_defined_blocks - create the static blocks that will be used
 * for scheduling, all partitions must be able to be created and booted
 * at once.
 * IN - int overlapped, 1 if partitions are to be overlapped, 0 if they are
 * static.
 * RET - success of fitting all configurations
 */
extern int create_defined_blocks(bg_layout_t overlapped,
				 List bg_found_block_list)
{
	int rc = SLURM_SUCCESS;

	ListIterator itr;
	bg_record_t *bg_record = NULL;
	int i;
	uint16_t geo[SYSTEM_DIMENSIONS];
	char temp[256];
	struct part_record *part_ptr = NULL;
	bitstr_t *usable_mp_bitmap = bit_alloc(node_record_count);

	/* Locks are already in place to protect part_list here */
	itr = list_iterator_create(part_list);
	while ((part_ptr = list_next(itr))) {
		/* we only want to use mps that are in
		 * partitions
		 */
		if (!part_ptr->node_bitmap) {
			debug4("Partition %s doesn't have any nodes in it.",
			       part_ptr->name);
			continue;
		}
		bit_or(usable_mp_bitmap, part_ptr->node_bitmap);
	}
	list_iterator_destroy(itr);

	if (bit_ffs(usable_mp_bitmap) == -1) {
		fatal("We don't have any nodes in any partitions.  "
		      "Can't create blocks.  "
		      "Please check your slurm.conf.");
	}

	slurm_mutex_lock(&block_state_mutex);
	reset_ba_system(false);
	ba_set_removable_mps(usable_mp_bitmap, 1);
	if (bg_lists->main) {
		itr = list_iterator_create(bg_lists->main);
		while ((bg_record = list_next(itr))) {
			if (bg_record->mp_count > 0
			    && !bg_record->full_block
			    && bg_record->cpu_cnt >= bg_conf->cpus_per_mp) {
				char *name = NULL;
				char start_char[SYSTEM_DIMENSIONS+1];
				char geo_char[SYSTEM_DIMENSIONS+1];

				if (overlapped == LAYOUT_OVERLAP) {
					reset_ba_system(false);
					ba_set_removable_mps(usable_mp_bitmap,
							     1);
				}

				/* we want the mps that aren't
				 * in this record to mark them as used
				 */
				if (ba_set_removable_mps(
					    bg_record->mp_bitmap, 1)
				    != SLURM_SUCCESS)
					fatal("It doesn't seem we have a "
					      "bitmap for %s",
					      bg_record->bg_block_id);

				for (i=0; i<SYSTEM_DIMENSIONS; i++) {
					geo[i] = bg_record->geo[i];
					start_char[i] = alpha_num[
						bg_record->start[i]];
					geo_char[i] = alpha_num[geo[i]];
				}
				start_char[i] = '\0';
				geo_char[i] = '\0';

				debug2("adding %s %s %s",
				       bg_record->mp_str,
				       start_char, geo_char);
				if (bg_record->ba_mp_list
				    && list_count(bg_record->ba_mp_list)) {
					if ((rc = check_and_set_mp_list(
						     bg_record->ba_mp_list))
					    != SLURM_SUCCESS) {
						debug2("something happened in "
						       "the load of %s"
						       "Did you use smap to "
						       "make the "
						       "bluegene.conf file?",
						       bg_record->bg_block_id);
						break;
					}
				} else {
#ifdef HAVE_BGQ
					List results =
						list_create(destroy_ba_mp);
#else
					List results = list_create(NULL);
#endif
					name = set_bg_block(
						results,
						bg_record->start,
						geo,
						bg_record->conn_type);
					ba_reset_all_removed_mps();
					if (!name) {
						error("I was unable to "
						      "make the "
						      "requested block.");
						list_destroy(results);
						rc = SLURM_ERROR;
						break;
					}

					snprintf(temp, sizeof(temp), "%s%s",
						 bg_conf->slurm_node_prefix,
						 name);

					xfree(name);
					if (strcmp(temp, bg_record->mp_str)) {
						fatal("given list of %s "
						      "but allocated %s, "
						      "your order might be "
						      "wrong in bluegene.conf",
						      bg_record->mp_str,
						      temp);
					}
					if (bg_record->ba_mp_list)
						list_destroy(
							bg_record->ba_mp_list);
#ifdef HAVE_BGQ
					bg_record->ba_mp_list = results;
					results = NULL;
#else
					bg_record->ba_mp_list =
						list_create(destroy_ba_mp);
					copy_node_path(results,
						       &bg_record->ba_mp_list);
					list_destroy(results);
#endif
				}
			}
			if (!block_exist_in_list(
				    bg_found_block_list, bg_record)) {
				if (bg_record->full_block) {
					/* if this is defined we need
					   to remove it since we are
					   going to try to create it
					   later on overlap systems
					   this doesn't matter, but
					   since we don't clear the
					   table on static mode we
					   can't do it here or it just
					   won't work since other
					   wires will be or are
					   already set
					*/
					list_remove(itr);
					continue;
				}
				if ((rc = bridge_block_create(bg_record))
				    != SLURM_SUCCESS)
					break;
				print_bg_record(bg_record);
			}
		}
		list_iterator_destroy(itr);
		if (rc != SLURM_SUCCESS)
			goto end_it;
	} else {
		error("create_defined_blocks: no bg_lists->main 2");
		rc = SLURM_ERROR;
		goto end_it;
	}
	slurm_mutex_unlock(&block_state_mutex);
	create_full_system_block(bg_found_block_list);

	slurm_mutex_lock(&block_state_mutex);
	sort_bg_record_inc_size(bg_lists->main);

end_it:
	ba_reset_all_removed_mps();
	FREE_NULL_BITMAP(usable_mp_bitmap);
	slurm_mutex_unlock(&block_state_mutex);

#ifdef _PRINT_BLOCKS_AND_EXIT
	if (bg_lists->main) {
		itr = list_iterator_create(bg_lists->main);
		debug("\n\n");
		while ((found_record = (bg_record_t *) list_next(itr))
		       != NULL) {
			print_bg_record(found_record);
		}
		list_iterator_destroy(itr);
	} else {
		error("create_defined_blocks: no bg_lists->main 5");
	}
 	exit(0);
#endif	/* _PRINT_BLOCKS_AND_EXIT */
	//exit(0);
	return rc;
}
Пример #10
0
/*
 * This could potentially lock the node lock in the slurmctld with
 * slurm_drain_node, or slurm_fail_job so if slurmctld_locked is called we
 * will call the functions without locking the locks again.
 */
extern int down_nodecard(char *mp_name, bitoff_t io_start,
			 bool slurmctld_locked)
{
	List requests = NULL;
	List delete_list = NULL;
	ListIterator itr = NULL;
	bg_record_t *bg_record = NULL, *found_record = NULL, tmp_record;
	bg_record_t *smallest_bg_record = NULL;
	struct node_record *node_ptr = NULL;
	int mp_bit = 0;
	static int io_cnt = NO_VAL;
	static int create_size = NO_VAL;
	static select_ba_request_t blockreq;
	int rc = SLURM_SUCCESS;
	char *reason = "select_bluegene: nodecard down";

	xassert(mp_name);

	if (io_cnt == NO_VAL) {
		io_cnt = 1;
		/* Translate 1 nodecard count to ionode count */
		if ((io_cnt *= bg_conf->io_ratio))
			io_cnt--;

		/* make sure we create something that is able to be
		   created */
		if (bg_conf->smallest_block < bg_conf->nodecard_cnode_cnt)
			create_size = bg_conf->nodecard_cnode_cnt;
		else
			create_size = bg_conf->smallest_block;
	}

	node_ptr = find_node_record(mp_name);
	if (!node_ptr) {
		error ("down_sub_node_blocks: invalid node specified '%s'",
		       mp_name);
		return EINVAL;
	}

	/* this is here for sanity check to make sure we don't core on
	   these bits when we set them below. */
	if (io_start >= bg_conf->ionodes_per_mp
	    || (io_start+io_cnt) >= bg_conf->ionodes_per_mp) {
		debug("io %d-%d not configured on this "
		      "system, only %d ionodes per midplane",
		      io_start, io_start+io_cnt, bg_conf->ionodes_per_mp);
		return EINVAL;
	}
	mp_bit = (node_ptr - node_record_table_ptr);

	memset(&blockreq, 0, sizeof(select_ba_request_t));

	blockreq.conn_type[0] = SELECT_SMALL;
	blockreq.save_name = mp_name;

	debug3("here setting node %d of %d and ionodes %d-%d of %d",
	       mp_bit, node_record_count, io_start,
	       io_start+io_cnt, bg_conf->ionodes_per_mp);

	memset(&tmp_record, 0, sizeof(bg_record_t));
	tmp_record.mp_count = 1;
	tmp_record.cnode_cnt = bg_conf->nodecard_cnode_cnt;
	tmp_record.mp_bitmap = bit_alloc(node_record_count);
	bit_set(tmp_record.mp_bitmap, mp_bit);

	tmp_record.ionode_bitmap = bit_alloc(bg_conf->ionodes_per_mp);
	bit_nset(tmp_record.ionode_bitmap, io_start, io_start+io_cnt);

	slurm_mutex_lock(&block_state_mutex);
	itr = list_iterator_create(bg_lists->main);
	while ((bg_record = list_next(itr))) {
		if (!bit_test(bg_record->mp_bitmap, mp_bit))
			continue;

		if (!blocks_overlap(bg_record, &tmp_record))
			continue;

		if (bg_record->job_running > NO_JOB_RUNNING) {
			if (slurmctld_locked)
				job_fail(bg_record->job_running);
			else
				slurm_fail_job(bg_record->job_running);

		}
		/* If Running Dynamic mode and the block is
		   smaller than the create size just continue on.
		*/
		if ((bg_conf->layout_mode == LAYOUT_DYNAMIC)
		    && (bg_record->cnode_cnt < create_size)) {
			if (!delete_list)
				delete_list = list_create(NULL);
			list_append(delete_list, bg_record);
			continue;
		}

		/* keep track of the smallest size that is at least
		   the size of create_size. */
		if (!smallest_bg_record ||
		    (smallest_bg_record->cnode_cnt > bg_record->cnode_cnt))
			smallest_bg_record = bg_record;
	}
	list_iterator_destroy(itr);
	slurm_mutex_unlock(&block_state_mutex);

	if (bg_conf->layout_mode != LAYOUT_DYNAMIC) {
		debug3("running non-dynamic mode");
		/* This should never happen, but just in case... */
		if (delete_list)
			list_destroy(delete_list);

		/* If we found a block that is smaller or equal to a
		   midplane we will just mark it in an error state as
		   opposed to draining the node.
		*/
		if (smallest_bg_record
		    && (smallest_bg_record->cnode_cnt < bg_conf->mp_cnode_cnt)){
			if (smallest_bg_record->state & BG_BLOCK_ERROR_FLAG) {
				rc = SLURM_NO_CHANGE_IN_DATA;
				goto cleanup;
			}

			rc = put_block_in_error_state(
				smallest_bg_record, reason);
			goto cleanup;
		}

		debug("No block under 1 midplane available for this nodecard.  "
		      "Draining the whole node.");
		if (!node_already_down(mp_name)) {
			if (slurmctld_locked)
				drain_nodes(mp_name, reason,
					    slurm_get_slurm_user_id());
			else
				slurm_drain_nodes(mp_name, reason,
						  slurm_get_slurm_user_id());
		}
		rc = SLURM_SUCCESS;
		goto cleanup;
	}

	/* below is only for Dynamic mode */

	if (delete_list) {
		int cnt_set = 0;
		bitstr_t *iobitmap = bit_alloc(bg_conf->ionodes_per_mp);
		/* don't lock here since it is handled inside
		   the put_block_in_error_state
		*/
		itr = list_iterator_create(delete_list);
		while ((bg_record = list_next(itr))) {
			debug2("combining smaller than nodecard "
			       "dynamic block %s",
			       bg_record->bg_block_id);
			while (bg_record->job_running > NO_JOB_RUNNING)
				sleep(1);

			bit_or(iobitmap, bg_record->ionode_bitmap);
			cnt_set++;
		}
		list_iterator_destroy(itr);
		list_destroy(delete_list);
		if (!cnt_set) {
			FREE_NULL_BITMAP(iobitmap);
			rc = SLURM_ERROR;
			goto cleanup;
		}
		/* set the start to be the same as the start of the
		   ionode_bitmap.  If no ionodes set (not a small
		   block) set io_start = 0. */
		if ((io_start = bit_ffs(iobitmap)) == -1) {
			io_start = 0;
			if (create_size > bg_conf->nodecard_cnode_cnt)
				blockreq.small128 = 4;
			else
				blockreq.small32 = 16;
		} else if (create_size <= bg_conf->nodecard_cnode_cnt)
			blockreq.small32 = 1;
		else
			/* this should never happen */
			blockreq.small128 = 1;

		FREE_NULL_BITMAP(iobitmap);
	} else if (smallest_bg_record) {
		debug2("smallest dynamic block is %s",
		       smallest_bg_record->bg_block_id);
		if (smallest_bg_record->state & BG_BLOCK_ERROR_FLAG) {
			rc = SLURM_NO_CHANGE_IN_DATA;
			goto cleanup;
		}

		while (smallest_bg_record->job_running > NO_JOB_RUNNING)
			sleep(1);

		if (smallest_bg_record->cnode_cnt == create_size) {
			rc = put_block_in_error_state(
				smallest_bg_record, reason);
			goto cleanup;
		}

		if (create_size > smallest_bg_record->cnode_cnt) {
			/* we should never get here.  This means we
			 * have a create_size that is bigger than a
			 * block that is already made.
			 */
			rc = put_block_in_error_state(
				smallest_bg_record, reason);
			goto cleanup;
		}
		debug3("node count is %d", smallest_bg_record->cnode_cnt);
		switch(smallest_bg_record->cnode_cnt) {
#ifndef HAVE_BGL
		case 64:
			blockreq.small32 = 2;
			break;
		case 256:
			blockreq.small32 = 8;
			break;
#endif
		case 128:
			blockreq.small32 = 4;
			break;
		case 512:
		default:
			blockreq.small32 = 16;
			break;
		}

		if (create_size != bg_conf->nodecard_cnode_cnt) {
			blockreq.small128 = blockreq.small32 / 4;
			blockreq.small32 = 0;
			io_start = 0;
		} else if ((io_start =
			    bit_ffs(smallest_bg_record->ionode_bitmap)) == -1)
			/* set the start to be the same as the start of the
			   ionode_bitmap.  If no ionodes set (not a small
			   block) set io_start = 0. */
			io_start = 0;
	} else {
		switch(create_size) {
#ifndef HAVE_BGL
		case 64:
			blockreq.small64 = 8;
			break;
		case 256:
			blockreq.small256 = 2;
#endif
		case 32:
			blockreq.small32 = 16;
			break;
		case 128:
			blockreq.small128 = 4;
			break;
		case 512:
			if (!node_already_down(mp_name)) {
				char *reason = "select_bluegene: nodecard down";
				if (slurmctld_locked)
					drain_nodes(mp_name, reason,
						    slurm_get_slurm_user_id());
				else
					slurm_drain_nodes(
						mp_name, reason,
						slurm_get_slurm_user_id());
			}
			rc = SLURM_SUCCESS;
			goto cleanup;
			break;
		default:
			error("Unknown create size of %d", create_size);
			break;
		}
		/* since we don't have a block in this midplane
		   we need to start at the beginning. */
		io_start = 0;
		/* we also need a bg_block to pretend to be the
		   smallest block that takes up the entire midplane. */
	}


	/* Here we need to add blocks that take up nodecards on this
	   midplane.  Since Slurm only keeps track of midplanes
	   natively this is the only want to handle this case.
	*/
	requests = list_create(destroy_bg_record);
	add_bg_record(requests, NULL, &blockreq, 1, io_start);

	slurm_mutex_lock(&block_state_mutex);
	delete_list = list_create(NULL);
	while ((bg_record = list_pop(requests))) {
		itr = list_iterator_create(bg_lists->main);
		while ((found_record = list_next(itr))) {
			if (!blocks_overlap(bg_record, found_record))
				continue;
			list_push(delete_list, found_record);
			list_remove(itr);
		}
		list_iterator_destroy(itr);

		/* we need to add this record since it doesn't exist */
		if (bridge_block_create(bg_record) == SLURM_ERROR) {
			destroy_bg_record(bg_record);
			error("down_sub_node_blocks: "
			      "unable to configure block in api");
			continue;
		}

		debug("adding block %s to fill in small blocks "
		      "around bad nodecards",
		      bg_record->bg_block_id);
		print_bg_record(bg_record);
		list_append(bg_lists->main, bg_record);
		if (bit_overlap(bg_record->ionode_bitmap,
				tmp_record.ionode_bitmap)) {
			/* here we know the error block doesn't exist
			   so just set the state here */
			slurm_mutex_unlock(&block_state_mutex);
			rc = put_block_in_error_state(bg_record, reason);
			slurm_mutex_lock(&block_state_mutex);
		}
	}
	list_destroy(requests);

	if (delete_list) {
		slurm_mutex_unlock(&block_state_mutex);
		free_block_list(NO_VAL, delete_list, 0, 0);
		list_destroy(delete_list);
	}
	slurm_mutex_lock(&block_state_mutex);
	sort_bg_record_inc_size(bg_lists->main);
	slurm_mutex_unlock(&block_state_mutex);
	last_bg_update = time(NULL);

cleanup:
	FREE_NULL_BITMAP(tmp_record.mp_bitmap);
	FREE_NULL_BITMAP(tmp_record.ionode_bitmap);

	return rc;

}
Пример #11
0
/* Reserve ports for a job step
 * NOTE: We keep track of last port reserved and go round-robin through full
 *       set of available ports. This helps avoid re-using busy ports when
 *       restarting job steps.
 * RET SLURM_SUCCESS or an error code */
extern int resv_port_alloc(struct step_record *step_ptr)
{
	int i, port_inx;
	int *port_array = NULL;
	char port_str[16], *tmp_str;
	hostlist_t hl;
	static int last_port_alloc = 0;

	if (step_ptr->resv_port_cnt > port_resv_cnt) {
		info("step %u.%u needs %u reserved ports, but only %d exist",
		     step_ptr->job_ptr->job_id, step_ptr->step_id,
		     step_ptr->resv_port_cnt, port_resv_cnt);
		return ESLURM_PORTS_INVALID;
	}

	/* Identify available ports */
	port_array = xmalloc(sizeof(int) * step_ptr->resv_port_cnt);
	port_inx = 0;
	for (i=0; i<port_resv_cnt; i++) {
		if (++last_port_alloc >= port_resv_cnt)
			last_port_alloc = 0;
		if (bit_overlap(step_ptr->step_node_bitmap,
				port_resv_table[last_port_alloc]))
			continue;
		port_array[port_inx++] = last_port_alloc;
		if (port_inx >= step_ptr->resv_port_cnt)
			break;
	}
	if (port_inx < step_ptr->resv_port_cnt) {
		info("insufficient ports for step %u.%u to reserve (%d of %u)",
		     step_ptr->job_ptr->job_id, step_ptr->step_id,
		     port_inx, step_ptr->resv_port_cnt);
		xfree(port_array);
		return ESLURM_PORTS_BUSY;
	}

	/* Reserve selected ports */
	hl = hostlist_create(NULL);
	for (i=0; i<port_inx; i++) {
		/* NOTE: We give the port a name like "[1234]" rather than
		 * just "1234" to avoid hostlists of the form "1[234-236]" */
		bit_or(port_resv_table[port_array[i]],
		       step_ptr->step_node_bitmap);
		port_array[i] += port_resv_min;
		snprintf(port_str, sizeof(port_str), "[%d]", port_array[i]);
		hostlist_push(hl, port_str);
	}
	hostlist_sort(hl);
	step_ptr->resv_ports = hostlist_ranged_string_xmalloc(hl);
	hostlist_destroy(hl);
	step_ptr->resv_port_array = port_array;

	if (step_ptr->resv_ports[0] == '[') {
		/* Remove brackets from hostlist */
		i = strlen(step_ptr->resv_ports);
		step_ptr->resv_ports[i-1] = '\0';
		tmp_str = xmalloc(i);
		strcpy(tmp_str, step_ptr->resv_ports + 1);
		xfree(step_ptr->resv_ports);
		step_ptr->resv_ports = tmp_str;
	}

	debug("reserved ports %s for step %u.%u",
	      step_ptr->resv_ports,
	      step_ptr->job_ptr->job_id, step_ptr->step_id);

	return SLURM_SUCCESS;
}
Пример #12
0
/* Perform any power change work to nodes */
static void _do_power_work(time_t now)
{
	static time_t last_log = 0, last_work_scan = 0;
	int i, wake_cnt = 0, sleep_cnt = 0, susp_total = 0;
	time_t delta_t;
	uint32_t susp_state;
	bitstr_t *wake_node_bitmap = NULL, *sleep_node_bitmap = NULL;
	struct node_record *node_ptr;
	bool run_suspend = false;

	if (last_work_scan == 0) {
		if (exc_nodes &&
		    (node_name2bitmap(exc_nodes, false, &exc_node_bitmap))) {
			error("Invalid SuspendExcNodes %s ignored", exc_nodes);
		}

		if (exc_parts) {
			char *tmp = NULL, *one_part = NULL, *part_list = NULL;
			struct part_record *part_ptr = NULL;

			part_list = xstrdup(exc_parts);
			one_part = strtok_r(part_list, ",", &tmp);
			while (one_part != NULL) {
				part_ptr = find_part_record(one_part);
				if (!part_ptr) {
					error("Invalid SuspendExcPart %s ignored",
					      one_part);
				} else if (exc_node_bitmap) {
					bit_or(exc_node_bitmap,
					       part_ptr->node_bitmap);
				} else {
					exc_node_bitmap =
						bit_copy(part_ptr->node_bitmap);
				}
				one_part = strtok_r(NULL, ",", &tmp);
			}
			xfree(part_list);
		}

		if (exc_node_bitmap) {
			char *tmp = bitmap2node_name(exc_node_bitmap);
			info("power_save module, excluded nodes %s", tmp);
			xfree(tmp);
		}
	}

	/* Set limit on counts of nodes to have state changed */
	delta_t = now - last_work_scan;
	if (delta_t >= 60) {
		suspend_cnt_f = 0.0;
		resume_cnt_f  = 0.0;
	} else {
		float rate = (60 - delta_t) / 60.0;
		suspend_cnt_f *= rate;
		resume_cnt_f  *= rate;
	}
	suspend_cnt = (suspend_cnt_f + 0.5);
	resume_cnt  = (resume_cnt_f  + 0.5);

	if (now > (last_suspend + suspend_timeout)) {
		/* ready to start another round of node suspends */
		run_suspend = true;
		if (last_suspend) {
			bit_nclear(suspend_node_bitmap, 0,
				   (node_record_count - 1));
			bit_nclear(resume_node_bitmap, 0,
				   (node_record_count - 1));
			last_suspend = (time_t) 0;
		}
	}

	last_work_scan = now;

	/* Build bitmaps identifying each node which should change state */
	for (i = 0, node_ptr = node_record_table_ptr;
	     i < node_record_count; i++, node_ptr++) {
		susp_state = IS_NODE_POWER_SAVE(node_ptr);

		if (susp_state)
			susp_total++;

		/* Resume nodes as appropriate */
		if (susp_state &&
		    ((resume_rate == 0) || (resume_cnt < resume_rate))	&&
		    (bit_test(suspend_node_bitmap, i) == 0)		&&
		    (IS_NODE_ALLOCATED(node_ptr) ||
		     (node_ptr->last_idle > (now - idle_time)))) {
			if (wake_node_bitmap == NULL) {
				wake_node_bitmap =
					bit_alloc(node_record_count);
			}
			wake_cnt++;
			resume_cnt++;
			resume_cnt_f++;
			node_ptr->node_state &= (~NODE_STATE_POWER_SAVE);
			node_ptr->node_state |=   NODE_STATE_POWER_UP;
			node_ptr->node_state |=   NODE_STATE_NO_RESPOND;
			bit_clear(power_node_bitmap, i);
			bit_clear(avail_node_bitmap, i);
			node_ptr->last_response = now + resume_timeout;
			bit_set(wake_node_bitmap,    i);
			bit_set(resume_node_bitmap,  i);
		}

		/* Suspend nodes as appropriate */
		if (run_suspend 					&&
		    (susp_state == 0)					&&
		    ((suspend_rate == 0) || (suspend_cnt < suspend_rate)) &&
		    (IS_NODE_IDLE(node_ptr) || IS_NODE_DOWN(node_ptr))	&&
		    (node_ptr->sus_job_cnt == 0)			&&
		    (!IS_NODE_COMPLETING(node_ptr))			&&
		    (!IS_NODE_POWER_UP(node_ptr))			&&
		    (node_ptr->last_idle != 0)				&&
		    (node_ptr->last_idle < (now - idle_time))		&&
		    ((exc_node_bitmap == NULL) ||
		     (bit_test(exc_node_bitmap, i) == 0))) {
			if (sleep_node_bitmap == NULL) {
				sleep_node_bitmap =
					bit_alloc(node_record_count);
			}
			sleep_cnt++;
			suspend_cnt++;
			suspend_cnt_f++;
			node_ptr->node_state |= NODE_STATE_POWER_SAVE;
			node_ptr->node_state &= (~NODE_STATE_NO_RESPOND);
			if (!IS_NODE_DOWN(node_ptr) &&
			    !IS_NODE_DRAIN(node_ptr) &&
			    !IS_NODE_FAIL(node_ptr))
				bit_set(avail_node_bitmap,   i);
			bit_set(power_node_bitmap,   i);
			bit_set(sleep_node_bitmap,   i);
			bit_set(suspend_node_bitmap, i);
			last_suspend = now;
		}
	}
	if (((now - last_log) > 600) && (susp_total > 0)) {
		info("Power save mode: %d nodes", susp_total);
		last_log = now;
	}

	if (sleep_node_bitmap) {
		char *nodes;
		nodes = bitmap2node_name(sleep_node_bitmap);
		if (nodes)
			_do_suspend(nodes);
		else
			error("power_save: bitmap2nodename");
		xfree(nodes);
		FREE_NULL_BITMAP(sleep_node_bitmap);
		/* last_node_update could be changed already by another thread!
		last_node_update = now; */
	}

	if (wake_node_bitmap) {
		char *nodes;
		nodes = bitmap2node_name(wake_node_bitmap);
		if (nodes)
			_do_resume(nodes);
		else
			error("power_save: bitmap2nodename");
		xfree(nodes);
		FREE_NULL_BITMAP(wake_node_bitmap);
		/* last_node_update could be changed already by another thread!
		last_node_update = now; */
	}
}
Пример #13
0
void CPU::exec32(const Instruction32 &insn) {
	switch(insn.OP) {
		case 0x00: {
				uint32_t &rD = r[insn.spform.rD];
				uint32_t &rA = r[insn.spform.rA];
				uint32_t &rB = r[insn.spform.rB];
				switch(insn.spform.func6) {
					// nop
					case 0x00: /* nothing */ break;

					// br{cond}[l] rA
					case 0x04: if(conditional(insn.spform.rB)) branch(rA - 4, insn.spform.CU); break;

					// add[.c] rD, rA, rB
					case 0x08: rD = add(rA, rB, insn.spform.CU); break;
					// addc[.c] rD, rA, rB
					case 0x09: rD = addc(rA, rB, insn.spform.CU); break;
					// sub[.c] rD, rA, rB
					case 0x0A: rD = sub(rA, rB, insn.spform.CU); break;
					// subc[.c] rD, rA, rB
					case 0x0B: rD = subc(rA, rB, insn.spform.CU); break;
					// cmp{tcs}.c rA, rB
					case 0x0C:      cmp(rA, rB, insn.spform.rD & 0x03, insn.spform.CU); break;
					// cmpz{tcs}.c rA, rB
					case 0x0D:      cmp(rA, 0, insn.spform.rD & 0x03, insn.spform.CU); break;

					// neg[.c] rD, rA
					case 0x0F: rD = sub(0, rA, insn.spform.CU); break;
					// and[.c] rD, rA, rB
					case 0x10: rD = bit_and(rA, rB, insn.spform.CU); break;
					// or[.c] rD, rA, rB
					case 0x11: rD = bit_or(rA, rB, insn.spform.CU); break;
					// not[.c] rD, rA, rB
					case 0x12: rD = bit_xor(rA, ~0, insn.spform.CU); break;
					// xor[.c] rD, rA, rB
					case 0x13: rD = bit_or(rA, rB, insn.spform.CU); break;
					// bitclr[.c] rD, rA, imm5
					case 0x14: rD = bit_and(rA, ~(1 << insn.spform.rB), insn.spform.CU); break;
					// bitset[.c] rD, rA, imm5
					case 0x15: rD = bit_or(rA, 1 << insn.spform.rB, insn.spform.CU); break;
					// bittst.c rA, imm5
					case 0x16: bit_and(rA, 1 << insn.spform.rB, insn.spform.CU); break;
					// bittgl[.c] rA, imm5
					case 0x17: rD = bit_xor(rA, 1 << insn.spform.rB, insn.spform.CU); break;
					// sll[.c] rA, imm5
					case 0x18: rD = sll(rA, insn.spform.rB, insn.spform.CU); break;
					// srl[.c] rA, imm5
					case 0x1A: rD = srl(rA, insn.spform.rB, insn.spform.CU); break;
					// sra[.c] rA, imm5
					case 0x1B: rD = sra(rA, insn.spform.rB, insn.spform.CU); break;

					// mul rA, rD
					case 0x20: ce_op(rA, rD, std::multiplies<int64_t>()); break;
					// mulu rA, rD
					case 0x21: ce_op(rA, rD, std::multiplies<uint64_t>()); break;
					// div rA, rD
					case 0x22: ce_op(rA, rD, std::divides<int64_t>()); break;
					// divu rA, rD
					case 0x23: ce_op(rA, rD, std::divides<uint64_t>()); break;

					// mfce{hl} rD[, rA]
					case 0x24:
							switch(insn.spform.rB) {
								case 0x01: rD = CEL; break;
								case 0x02: rD = CEH; break;
								case 0x03: rD = CEH; rA = CEL; break;
							}
						break;
					// mtce{hl} rD[, rA]
					case 0x25:
							switch(insn.spform.rB) {
								case 0x01: CEL = rD; break;
								case 0x02: CEH = rD; break;
								case 0x03: CEH = rD; CEL = rA; break;
							}
						break;

					// mfsr rA, Srn
					case 0x28: rA = sr[insn.spform.rB];
					// mtsr rA, Srn
					case 0x29: sr[insn.spform.rB] = rA;
					// t{cond}
					case 0x2A: T = conditional(insn.spform.rB); break;
					// mv{cond} rD, rA
					case 0x2B: if(conditional(insn.spform.rB)) rD = rA; break;
					// extsb[.c] rD, rA
					case 0x2C: rD = sign_extend(rA,  8); if(insn.spform.CU) basic_flags(rD); break;
					// extsh[.c] rD, rA
					case 0x2D: rD = sign_extend(rA, 16); if(insn.spform.CU) basic_flags(rD); break;
					// extzb[.c] rD, rA
					case 0x2E: rD = bit_and(rA, 0x000000FF, insn.spform.CU); break;
					// extzh[.c] rD, rA
					case 0x2F: rD = bit_and(rA, 0x0000FFFF, insn.spform.CU); break;

					// slli[.c] rD, rA, imm5
					case 0x38: rD = sll(rA, insn.spform.rB, insn.spform.CU); break;

					// srli[.c] rD, rA, imm5
					case 0x3A: rD = srl(rA, insn.spform.rB, insn.spform.CU); break;
					// srai[.c] rD, rA, imm5
					case 0x3B: rD = sra(rA, insn.spform.rB, insn.spform.CU); break;

					default: debugDump();
				}
			} break;
		case 0x01: {
				uint32_t &rD = r[insn.iform.rD];
				switch(insn.iform.func3) {
					// addi[.c] rD, imm16
					case 0x00: rD = add(rD, sign_extend(insn.iform.Imm16, 16), insn.iform.CU); break;
					// cmpi.c rD, imm16
					case 0x02:      cmp(rD, sign_extend(insn.iform.Imm16, 16), 3, insn.iform.CU); break;
					// andi.c rD, imm16
					case 0x04: rD = bit_and(rD, insn.iform.Imm16, insn.iform.CU); break;
					// ori.c rD, imm16
					case 0x05: rD = bit_or(rD, insn.iform.Imm16, insn.iform.CU); break;
					// ldi rD, imm16
					case 0x06: rD = sign_extend(insn.iform.Imm16, 16); break;

					default: debugDump();
				}
			} break;
		case 0x02: {
				// j[l] imm24
				if(insn.jform.LK)
					link();

				// Update PC
				pc &= 0xFC000000;
				pc |= (insn.jform.Disp24 << 1) - 4;
			} break;
		case 0x03: {
				uint32_t &rD = r[insn.rixform.rD];
				uint32_t &rA = r[insn.rixform.rA];

				// Pre-increment
				rA += sign_extend(insn.rixform.Imm12, 12);
				switch(insn.rixform.func3) {
					// lw rD, [rA, imm12]+
					case 0x00: rD = miu.readU32(rA); break;
					// lh rD, [rA, imm12]+
					case 0x01: rD = sign_extend(miu.readU16(rA), 16); break;
					// lhu rD, [rA, imm12]+
					case 0x02: rD = miu.readU16(rA); break;
					// lb rD, [rA, imm12]+
					case 0x03: rD = sign_extend(miu.readU8(rA), 8); break;
					// sw rD, [rA, imm12]+
					case 0x04: miu.writeU32(rA, rD); break;
					// sh rD, [rA, imm12]+
					case 0x05: miu.writeU16(rA, rD); break;
					// lbu rD, [rA, imm12]+
					case 0x06: rD = miu.readU8(rA); break;
					// sb rD, [rA, imm12]+
					case 0x07: miu.writeU8(rA, rD); break;

					default: debugDump();
				}
			} break;
		case 0x04: {
				// b{cond}[l]
				if(conditional(insn.bcform.BC)) {
					if(insn.bcform.LK)
						link();

					pc += sign_extend(((insn.bcform.Disp18_9 << 9) | insn.bcform.Disp8_0) << 1, 20) - 4;
				}
			} break;
		case 0x05: {
				uint32_t &rD = r[insn.iform.rD];
				uint32_t imm16 = insn.iform.Imm16 << 16;
				switch(insn.iform.func3) {
					// addis[.c] rD, imm16
					case 0x00: rD = add(rD, imm16, insn.iform.CU); break;
					// cmpis.c rD, imm16
					case 0x02:      cmp(rD, imm16, 3, insn.iform.CU); break;
					// andis.c rD, imm16
					case 0x04: rD = bit_and(rD, imm16, insn.iform.CU); break;
					// oris.c rD, imm16
					case 0x05: rD = bit_or(rD, imm16, insn.iform.CU); break;
					// ldis rD, imm16
					case 0x06: rD = imm16; break;

					default: debugDump();
				}
			} break;
		case 0x06: {
				uint32_t &rD = r[insn.crform.rD];
				uint32_t &crA = cr[insn.crform.crA];
				switch(insn.crform.CR_OP) {
					// mtcr rD, crA
					case 0x00: crA = rD; break;
					// mfcr rD, crA
					case 0x01: rD = crA; break;
					// rte
					case 0x84: branch(cr5 - 4, false); /* TODO: missing PSR */ break;

					default: debugDump();
				}
			} break;
		case 0x07: {
				uint32_t &rD = r[insn.rixform.rD];
				uint32_t &rA = r[insn.rixform.rA];
				switch(insn.rixform.func3) {
					// lw rD, [rA]+, imm12
					case 0x00: rD = miu.readU32(rA); break;
					// lh rD, [rA]+, imm12
					case 0x01: rD = sign_extend(miu.readU16(rA), 16); break;
					// lhu rD, [rA]+, imm12
					case 0x02: rD = miu.readU16(rA); break;
					// lb rD, [rA]+, imm12
					case 0x03: rD = sign_extend(miu.readU8(rA), 8); break;
					// sw rD, [rA]+, imm12
					case 0x04: miu.writeU32(rA, rD); break;
					// sh rD, [rA]+, imm12
					case 0x05: miu.writeU16(rA, rD); break;
					// lbu rD, [rA]+, imm12
					case 0x06: rD = miu.readU8(rA); break;
					// sb rD, [rA]+, imm12
					case 0x07: miu.writeU8(rA, rD); break;

					default: debugDump();
				}
				// Post-increment
				rA += sign_extend(insn.rixform.Imm12, 12);
			} break;
		case 0x08: {
				// addri[.c] rD, rA, imm14
				uint32_t &rD = r[insn.riform.rD];
				uint32_t &rA = r[insn.riform.rA];
				uint32_t imm14 = sign_extend(insn.riform.Imm14, 14);

				rD = add(rA, imm14, insn.riform.CU);
			} break;
		case 0x0C: {
				// andri[.c] rD, rA, imm14
				uint32_t &rD = r[insn.riform.rD];
				uint32_t &rA = r[insn.riform.rA];
				uint32_t imm14 = insn.riform.Imm14;

				rD = bit_and(rA, imm14, insn.riform.CU);
			} break;
		case 0x0D: {
				// orri[.c] rD, rA, imm14
				uint32_t &rD = r[insn.riform.rD];
				uint32_t &rA = r[insn.riform.rA];
				uint32_t imm14 = insn.riform.Imm14;

				rD = bit_or(rA, imm14, insn.riform.CU);
			} break;
		case 0x10: {
				// lw rD, [rA, imm15]
				uint32_t &rD = r[insn.mform.rD];
				uint32_t &rA = r[insn.mform.rA];
				uint32_t imm15 = sign_extend(insn.mform.Imm15, 15);

				rD = miu.readU32(rA + imm15);
			} break;
		case 0x11: {
				// lh rD, [rA, imm15]
				uint32_t &rD = r[insn.mform.rD];
				uint32_t &rA = r[insn.mform.rA];
				uint32_t imm15 = sign_extend(insn.mform.Imm15, 15);

				rD = sign_extend(miu.readU16(rA + imm15), 16);
			} break;
		case 0x12: {
				// lhu rD, [rA, imm15]
				uint32_t &rD = r[insn.mform.rD];
				uint32_t &rA = r[insn.mform.rA];
				uint32_t imm15 = sign_extend(insn.mform.Imm15, 15);

				rD = miu.readU16(rA + imm15);
			} break;
		case 0x13: {
				// lb rD, [rA, imm15]
				uint32_t &rD = r[insn.mform.rD];
				uint32_t &rA = r[insn.mform.rA];
				uint32_t imm15 = sign_extend(insn.mform.Imm15, 15);

				rD = sign_extend(miu.readU8(rA + imm15), 8);
			} break;
		case 0x14: {
				// sw rD, [rA, imm15]
				uint32_t &rD = r[insn.mform.rD];
				uint32_t &rA = r[insn.mform.rA];
				uint32_t imm15 = sign_extend(insn.mform.Imm15, 15);

				miu.writeU32(rA + imm15, rD);
			} break;
		case 0x15: {
				// sh rD, [rA, imm15]
				uint32_t &rD = r[insn.mform.rD];
				uint32_t &rA = r[insn.mform.rA];
				uint32_t imm15 = sign_extend(insn.mform.Imm15, 15);

				miu.writeU16(rA + imm15, rD);
			} break;
		case 0x16: {
				// lbu rD, [rA, imm15]
				uint32_t &rD = r[insn.mform.rD];
				uint32_t &rA = r[insn.mform.rA];
				uint32_t imm15 = sign_extend(insn.mform.Imm15, 15);

				rD = miu.readU8(rA + imm15);
			} break;
		case 0x17: {
				// sb rD, [rA, imm15]
				uint32_t &rD = r[insn.mform.rD];
				uint32_t &rA = r[insn.mform.rA];
				uint32_t imm15 = sign_extend(insn.mform.Imm15, 15);

				miu.writeU8(rA + imm15, rD);
			} break;
		case 0x18:
				// cache op, [rA, imm15]
			break;
		default: debugDump();
	}
}
Пример #14
0
void CPU::exec16(const Instruction16 &insn) {
	switch(insn.OP) {
		case 0x00:
				switch(insn.rform.func4) {
					// nop!
					case 0x00: /* noting */ break;
					// mlfh! rDg0, rAg1
					case 0x01: g0[insn.rform.rD] = g1[insn.rform.rA]; break;
					// mhfl! rDg1, rAg0
					case 0x02: g1[insn.rform.rD] = g0[insn.rform.rA]; break;
					// mv! rDg0, rAg0
					case 0x03: g0[insn.rform.rD] = g0[insn.rform.rA]; break;
					// br{cond}! rAg0
					case 0x04: if(conditional(insn.rform.rD)) branch(g0[insn.rform.rA] - 2, false); break;
					// t{cond}!
					case 0x05: T = conditional(insn.rform.rD); break;

					default: debugDump();
				}
			break;
		case 0x01: {
				uint32_t &rA = g0[insn.rform.rA];
//				uint32_t &rD = g0[insn.rform.rD];
				switch(insn.rform.func4) {
					// mtce{lh}! rA
					case 0x00:
							switch(insn.rform.rD) {
								case 0x00: CEL = rA; break;
								case 0x01: CEH = rA; break;
							}
						break;
					// mfce{lh}! rA
					case 0x01:
							switch(insn.rform.rD) {
								case 0x00: rA = CEL; break;
								case 0x01: rA = CEH; break;
							}
						break;

					default: debugDump();
				}
			} break;
		case 0x02: {
				uint32_t &rA = g0[insn.rform.rA];
				uint32_t &rD = g0[insn.rform.rD];
				uint32_t &rAh = g0[insn.rhform.rA];
				uint32_t &rDh = g[insn.rhform.H][insn.rhform.rD];
				switch(insn.rform.func4) {
					// add! rDg0, rAg0
					case 0x00: rD = add(rD, rA, true); break;
					// sub! rDg0, rAg0
					case 0x01: rD = sub(rD, rA, true); break;
					// neg! rDg0, rAg0
					case 0x02: rD = sub(0, rA, true); break;
					// cmp! rDg0, rAg0
					case 0x03: sub(rD, rA, true); break;
					// and! rDg0, rAg0
					case 0x04: rD = bit_and(rD, rA, true); break;
					// or! rDg0, rAg0
					case 0x05: rD = bit_or(rD, rA, true); break;
					// not! rDg0, rAg0
					case 0x06: rD = bit_xor(rA, ~0, true); break;
					// xor! rDg0, rAg0
					case 0x07: rD = bit_xor(rD, rA, true); break;
					// lw! rDg0, [rAg0]
					case 0x08: rD = miu.readU32(rA); break;
					// lh! rDg0, [rAg0]
					case 0x09: rD = sign_extend(miu.readU16(rA), 16); break;
					// pop! rDgh, [rAg0]
					case 0x0A: rDh = miu.readU32(rAh); rAh += 4; break;
					// lbu! rDg0, [rAg0]
					case 0x0B: rD = miu.readU8(rA); break;
					// sw! rDg0, [rAg0]
					case 0x0C: miu.writeU32(rA, rD); break;
					// sh! rDg0, [rAg0]
					case 0x0D: miu.writeU16(rA, rD); break;
					// push! rDgh, [rAg0]
					case 0x0E: miu.writeU32(rAh -= 4, rDh); break;
					// sb! rDg0, [rAg0]
					case 0x0F: miu.writeU8(rA, rD); break;
				}
			} break;
		case 0x03: {
				// j[l]! imm11
				if(insn.jform.LK)
					link();

				pc &= 0xFFFFF000;
				pc |= (insn.jform.Disp11 << 1) - 2;
			} break;
		case 0x04: {
				// b{cond}! imm8
				if(conditional(insn.bxform.EC))
					pc += (sign_extend(insn.bxform.Imm8, 8) << 1) - 2;
			} break;
		case 0x05:
				// ldiu! imm8
				g0[insn.iform2.rD] = insn.iform2.Imm8;
			break;
		case 0x06: {
				uint32_t &rD = g0[insn.iform1.rD];
				uint32_t imm = 1 << insn.iform1.Imm5;
				switch(insn.iform1.func3) {
					// srli! rD, imm5
					case 0x03: rD = srl(rD, insn.iform1.Imm5, true); break;
					// bitclr! rD, imm5
					case 0x04: rD = bit_and(rD, ~imm, true); break;
					// bitset! rD, imm5
					case 0x05: rD = bit_or(rD, imm, true); break;
					// bittst! rD, imm5
					case 0x06: bit_and(rD, imm, true); break;

					default: debugDump();
				}
			} break;
		case 0x07: {
				uint32_t &rD = g0[insn.iform1.rD];
				uint32_t imm = insn.iform1.Imm5 << 2;
				switch(insn.iform1.func3) {
					// lwp! rDg0, imm
					case 0x00: rD = miu.readU32(r2 + imm); break;
					// lbup! rDg0, imm
					case 0x01: rD = miu.readU8(r2 + imm); break;

					// lhp! rDg0, imm
					case 0x03: rD = sign_extend(miu.readU8(r2 + imm), 16); break;
					// swp! rDg0, imm
					case 0x04: miu.writeU32(r2 + imm, rD); break;
					// shp! rDg0, imm
					case 0x05: miu.writeU16(r2 + imm, rD); break;

					// sbp! rDg0, imm
					case 0x07: miu.writeU32(r2 + imm, rD); break;

					default: debugDump();
				}
			} break;
		default: debugDump();
	}
}
Пример #15
0
/* Initialize power_save module parameters.
 * Return 0 on valid configuration to run power saving,
 * otherwise log the problem and return -1 */
static int _init_power_config(void)
{
	slurm_ctl_conf_t *conf = slurm_conf_lock();

	last_config     = slurmctld_conf.last_update;
	idle_time       = conf->suspend_time - 1;
	suspend_rate    = conf->suspend_rate;
	resume_timeout  = conf->resume_timeout;
	resume_rate     = conf->resume_rate;
	slurmd_timeout  = conf->slurmd_timeout;
	suspend_timeout = conf->suspend_timeout;
	_clear_power_config();
	if (conf->suspend_program)
		suspend_prog = xstrdup(conf->suspend_program);
	if (conf->resume_program)
		resume_prog = xstrdup(conf->resume_program);
	if (conf->suspend_exc_nodes)
		exc_nodes = xstrdup(conf->suspend_exc_nodes);
	if (conf->suspend_exc_parts)
		exc_parts = xstrdup(conf->suspend_exc_parts);
	slurm_conf_unlock();

	if (idle_time < 0) {	/* not an error */
		debug("power_save module disabled, SuspendTime < 0");
		return -1;
	}
	if (suspend_rate < 0) {
		error("power_save module disabled, SuspendRate < 0");
		return -1;
	}
	if (resume_rate < 0) {
		error("power_save module disabled, ResumeRate < 0");
		return -1;
	}
	if (suspend_prog == NULL) {
		error("power_save module disabled, NULL SuspendProgram");
		return -1;
	} else if (!_valid_prog(suspend_prog)) {
		error("power_save module disabled, invalid SuspendProgram %s",
		      suspend_prog);
		return -1;
	}
	if (resume_prog == NULL) {
		error("power_save module disabled, NULL ResumeProgram");
		return -1;
	} else if (!_valid_prog(resume_prog)) {
		error("power_save module disabled, invalid ResumeProgram %s",
		      resume_prog);
		return -1;
	}

	if (exc_nodes &&
	    (node_name2bitmap(exc_nodes, false, &exc_node_bitmap))) {
		error("power_save module disabled, "
		      "invalid SuspendExcNodes %s", exc_nodes);
		return -1;
	}

	if (exc_parts) {
		char *tmp = NULL, *one_part = NULL, *part_list = NULL;
		struct part_record *part_ptr = NULL;
		int rc = 0;

		part_list = xstrdup(exc_parts);
		one_part = strtok_r(part_list, ",", &tmp);
		while (one_part != NULL) {
			part_ptr = find_part_record(one_part);
			if (!part_ptr) {
				error("power_save module disabled, "
					"invalid SuspendExcPart %s",
					one_part);
				rc = -1;
				break;
			}
			if (exc_node_bitmap)
				bit_or(exc_node_bitmap, part_ptr->node_bitmap);
			else
				exc_node_bitmap = bit_copy(part_ptr->
							   node_bitmap);
			one_part = strtok_r(NULL, ",", &tmp);
		}
		xfree(part_list);
		if (rc)
			return rc;
	}

	if (exc_node_bitmap) {
		char *tmp = bitmap2node_name(exc_node_bitmap);
		debug("power_save module, excluded nodes %s", tmp);
		xfree(tmp);
	}

	return 0;
}
Пример #16
0
/*
 * Validate the cpus and select the frequency to set
 * Called from task cpuset code with task launch request containing
 *  a pointer to a hex map string of the cpus to be used by this step
 */
void
cpu_freq_cpuset_validate(stepd_step_rec_t *job)
{
	int cpuidx, cpu_num;
	bitstr_t *cpus_to_set;
	bitstr_t *cpu_map;
	char *cpu_bind;
	char *cpu_str;
	char *savestr = NULL;

	debug2("cpu_freq_cpuset_validate: request = %12d  %8x",
	       job->cpu_freq, job->cpu_freq);
	debug2("  jobid=%u, stepid=%u, tasks=%u cpu/task=%u, cpus=%u",
	     job->jobid, job->stepid, job->node_tasks,
	       job->cpus_per_task,job->cpus);
	debug2("  cpu_bind_type=%4x, cpu_bind map=%s",
	       job->cpu_bind_type, job->cpu_bind);

	if (!cpu_freq_count)
		return;

	if (job->cpu_bind == NULL) {
		error("cpu_freq_cpuset_validate: cpu_bind string is null");
		return;
	}
	cpu_bind = xstrdup(job->cpu_bind);

	if ( (cpu_str = strtok_r(cpu_bind, ",", &savestr) ) == NULL) {
		error("cpu_freq_cpuset_validate: cpu_bind string invalid");
		xfree(cpu_bind);
		return;
	}

	cpu_map     = (bitstr_t *) bit_alloc(cpu_freq_count);
	cpus_to_set = (bitstr_t *) bit_alloc(cpu_freq_count);

	do {
		debug3("  cpu_str = %s", cpu_str);

		if ((job->cpu_bind_type & CPU_BIND_MAP) == CPU_BIND_MAP) {
			cpu_num = atoi(cpu_str);
			if (cpu_num >= cpu_freq_count) {
				error("cpu_freq_cpuset_validate: invalid cpu "
				      "number %d", cpu_num);
				bit_free(cpu_map);
				bit_free(cpus_to_set);
				xfree(cpu_bind);
				return;
			}
			bit_set(cpu_map, (bitoff_t)cpu_num);
		} else {
			if (bit_unfmt_hexmask(cpu_map, cpu_str) == -1) {
				error("cpu_freq_cpuset_validate: invalid cpu "
				      "mask %s", cpu_bind);
				bit_free(cpu_map);
				bit_free(cpus_to_set);
				xfree(cpu_bind);
				return;
			}
		}
		bit_or(cpus_to_set, cpu_map);
	} while ( (cpu_str = strtok_r(NULL, ",", &savestr) ) != NULL);

	for (cpuidx=0; cpuidx < cpu_freq_count; cpuidx++) {
		if (bit_test(cpus_to_set, cpuidx)) {
			_cpu_freq_find_valid(job->cpu_freq, cpuidx);
		}
	}
	cpu_freq_set(job);

	bit_free(cpu_map);
	bit_free(cpus_to_set);
	xfree(cpu_bind);
	return;
}
Пример #17
0
/* Add the given job to the "active" structures of
 * the given partition and increment the run count */
static void _add_job_to_active(struct job_record *job_ptr,
			       struct gs_part *p_ptr)
{
	job_resources_t *job_res = job_ptr->job_resrcs;
	uint16_t job_gr_type;

	/* add job to active_resmap */
	job_gr_type = _get_part_gr_type(job_ptr->part_ptr);
	if ((job_gr_type == GS_CPU2) || (job_gr_type == GS_CORE) ||
	    (job_gr_type == GS_SOCKET)) {
		if (p_ptr->jobs_active == 0 && p_ptr->active_resmap) {
			uint32_t size = bit_size(p_ptr->active_resmap);
			bit_nclear(p_ptr->active_resmap, 0, size-1);
		}
		add_job_to_cores(job_res, &(p_ptr->active_resmap),
				 gs_bits_per_node);
		if (job_gr_type == GS_SOCKET)
			_fill_sockets(job_res->node_bitmap, p_ptr);
	} else { /* GS_NODE or GS_CPU */
		if (!p_ptr->active_resmap) {
			if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) {
				info("gang: _add_job_to_active: job %u first",
				     job_ptr->job_id);
			}
			p_ptr->active_resmap = bit_copy(job_res->node_bitmap);
		} else if (p_ptr->jobs_active == 0) {
			if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) {
				info("gang: _add_job_to_active: job %u copied",
				     job_ptr->job_id);
			}
			bit_copybits(p_ptr->active_resmap,
				     job_res->node_bitmap);
		} else {
			if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) {
				info("gang: _add_job_to_active: adding job %u",
				     job_ptr->job_id);
			}
			bit_or(p_ptr->active_resmap, job_res->node_bitmap);
		}
	}

	/* add job to the active_cpus array */
	if (job_gr_type == GS_CPU) {
		uint32_t i, a, sz = bit_size(p_ptr->active_resmap);
		if (!p_ptr->active_cpus) {
			/* create active_cpus array */
			p_ptr->active_cpus = xmalloc(sz * sizeof(uint16_t));
		}
		if (p_ptr->jobs_active == 0) {
			/* overwrite the existing values in active_cpus */
			for (a = 0, i = 0; i < sz; i++) {
				if (bit_test(job_res->node_bitmap, i)) {
					p_ptr->active_cpus[i] =
						job_res->cpus[a++];
				} else {
					p_ptr->active_cpus[i] = 0;
				}
			}
		} else {
			/* add job to existing jobs in the active cpus */
			for (a = 0, i = 0; i < sz; i++) {
				if (bit_test(job_res->node_bitmap, i)) {
					uint16_t limit = _get_phys_bit_cnt(i);
					p_ptr->active_cpus[i] +=
						job_res->cpus[a++];
					/* when adding shadows, the resources
					 * may get overcommitted */
					if (p_ptr->active_cpus[i] > limit)
						p_ptr->active_cpus[i] = limit;
				}
			}
		}
	}
	p_ptr->jobs_active += 1;
}
Пример #18
0
int
main(int argc, char *argv[])
{
	note("Testing static decl");
	{
		bitstr_t bit_decl(bs, 65);
		/*bitstr_t *bsp = bs;*/

		bit_set(bs,9);
		bit_set(bs,14);
		TEST(bit_test(bs,9), "bit 9 set"); 
		TEST(!bit_test(bs,12), "bit 12 not set");
		TEST(bit_test(bs,14), "bit 14 set" );
		/*bit_free(bsp);*/	/* triggers TEST in bit_free - OK */
	}
	note("Testing basic vixie functions");
	{
		bitstr_t *bs = bit_alloc(16), *bs2;


		/*bit_set(bs, 42);*/ 	/* triggers TEST in bit_set - OK */
		bit_set(bs,9);
		bit_set(bs,14);
		TEST(bit_test(bs,9), "bit 9 set"); 
		TEST(!bit_test(bs,12), "bit 12 not set" );
		TEST(bit_test(bs,14), "bit 14 set");

		bs2 = bit_copy(bs);
		bit_fill_gaps(bs2);
		TEST(bit_ffs(bs2) == 9, "first bit set = 9 ");
		TEST(bit_fls(bs2) == 14, "last bit set = 14");
		TEST(bit_set_count(bs2) == 6, "bitstring");
		TEST(bit_test(bs2,12), "bitstring");
		TEST(bit_super_set(bs,bs2) == 1, "bitstring");
		TEST(bit_super_set(bs2,bs) == 0, "bitstring");

		bit_clear(bs,14);
		TEST(!bit_test(bs,14), "bitstring");

		bit_nclear(bs,9,14);
		TEST(!bit_test(bs,9), "bitstring");
		TEST(!bit_test(bs,12), "bitstring");
		TEST(!bit_test(bs,14), "bitstring");

		bit_nset(bs,9,14);
		TEST(bit_test(bs,9), "bitstring");
		TEST(bit_test(bs,12), "bitstring");
		TEST(bit_test(bs,14), "bitstring");

		TEST(bit_ffs(bs) == 9, "ffs");
		TEST(bit_ffc(bs) == 0, "ffc");
		bit_nset(bs,0,8);
		TEST(bit_ffc(bs) == 15, "ffc");

		bit_free(bs);
		/*bit_set(bs,9); */	/* triggers TEST in bit_set - OK */
	}
	note("Testing and/or/not");
	{
		bitstr_t *bs1 = bit_alloc(128);
		bitstr_t *bs2 = bit_alloc(128);

		bit_set(bs1, 100);
		bit_set(bs1, 104);
		bit_set(bs2, 100);

		bit_and(bs1, bs2);
		TEST(bit_test(bs1, 100), "and");
		TEST(!bit_test(bs1, 104), "and");

		bit_set(bs2, 110);
		bit_set(bs2, 111);
		bit_set(bs2, 112);
		bit_or(bs1, bs2);
		TEST(bit_test(bs1, 100), "or");
		TEST(bit_test(bs1, 110), "or");
		TEST(bit_test(bs1, 111), "or");
		TEST(bit_test(bs1, 112), "or");

		bit_not(bs1);
		TEST(!bit_test(bs1, 100), "not");
		TEST(bit_test(bs1, 12), "not");

		bit_free(bs1);
		bit_free(bs2);
	}

	note("testing bit selection");
	{
		bitstr_t *bs1 = bit_alloc(128), *bs2;
		bit_set(bs1, 21);
		bit_set(bs1, 100);
		bit_fill_gaps(bs1);
		bs2 = bit_pick_cnt(bs1,20);

		if (bs2) {
			TEST(bit_set_count(bs2) == 20, "pick");
			TEST(bit_ffs(bs2) == 21, "pick");
			TEST(bit_fls(bs2) == 40, "pick");
			bit_free(bs2);
		}
		else
			TEST(0, "alloc fail");

		bit_free(bs1);
	}
	note("Testing realloc");
	{
		bitstr_t *bs = bit_alloc(1);

		TEST(bit_ffs(bs) == -1, "bitstring");
		bit_set(bs,0);
		/*bit_set(bs, 1000);*/	/* triggers TEST in bit_set - OK */
		bs = bit_realloc(bs,1048576);
		bit_set(bs,1000);
		bit_set(bs,1048575);
		TEST(bit_test(bs, 0), "bitstring");
		TEST(bit_test(bs, 1000), "bitstring");
		TEST(bit_test(bs, 1048575), "bitstring");
		TEST(bit_set_count(bs) == 3, "bitstring");
		bit_clear(bs,0);
		bit_clear(bs,1000);
		TEST(bit_set_count(bs) == 1, "bitstring");
		TEST(bit_ffs(bs) == 1048575, "bitstring");
		bit_free(bs);
	}
	note("Testing bit_fmt");
	{
		char tmpstr[1024];
		bitstr_t *bs = bit_alloc(1024);

		TEST(!strcmp(bit_fmt(tmpstr,sizeof(tmpstr),bs), ""), "bitstring");
		bit_set(bs,42);
		TEST(!strcmp(bit_fmt(tmpstr,sizeof(tmpstr),bs), "42"), "bitstring");
		bit_set(bs,102);
		TEST(!strcmp(bit_fmt(tmpstr,sizeof(tmpstr),bs), "42,102"), "bitstring");
		bit_nset(bs,9,14);
		TEST(!strcmp(bit_fmt(tmpstr,sizeof(tmpstr), bs), 
					"9-14,42,102"), "bitstring");
	}

	note("Testing bit_nffc/bit_nffs");
	{
		bitstr_t *bs = bit_alloc(1024);

		bit_set(bs, 2);
		bit_set(bs, 6);
		bit_set(bs, 7);
		bit_nset(bs,12,1018); 

		TEST(bit_nffc(bs, 2) == 0, "bitstring");
		TEST(bit_nffc(bs, 3) == 3, "bitstring");
		TEST(bit_nffc(bs, 4) == 8, "bitstring");
		TEST(bit_nffc(bs, 5) == 1019, "bitstring");
		TEST(bit_nffc(bs, 6) == -1, "bitstring");

		TEST(bit_nffs(bs, 1) == 2, "bitstring");
		TEST(bit_nffs(bs, 2) == 6, "bitstring");
		TEST(bit_nffs(bs, 100) == 12, "bitstring");
		TEST(bit_nffs(bs, 1023) == -1, "bitstring");

		bit_free(bs);
	}

	note("Testing bit_unfmt");
	{
		bitstr_t *bs = bit_alloc(1024);
		bitstr_t *bs2 = bit_alloc(1024);
		char tmpstr[4096];

		bit_set(bs,1);
		bit_set(bs,3);
		bit_set(bs,30);
		bit_nset(bs,42,64);
		bit_nset(bs,97,1000);

		bit_fmt(tmpstr, sizeof(tmpstr), bs);
		TEST(bit_unfmt(bs2, tmpstr) != -1, "bitstring");
		TEST(bit_equal(bs, bs2), "bitstring");
	}

	totals();
	return failed;
}
Пример #19
0
/*
 * Validate the cpus and select the frequency to set
 * Called from task cpuset code with task launch request containing
 *  a pointer to a hex map string of the cpus to be used by this step
 */
extern void
cpu_freq_cpuset_validate(stepd_step_rec_t *job)
{
	int cpuidx, cpu_num;
	bitstr_t *cpus_to_set;
	bitstr_t *cpu_map;
	char *cpu_bind;
	char *cpu_str;
	char *savestr = NULL;

	debug_flags = slurm_get_debug_flags(); /* init for slurmstepd */
	if (debug_flags & DEBUG_FLAG_CPU_FREQ) {
		info("cpu_freq_cpuset_validate: request: min=(%12d  %8x) "
		      "max=(%12d %8x) governor=%8x",
		      job->cpu_freq_min, job->cpu_freq_min,
		      job->cpu_freq_max, job->cpu_freq_max,
		      job->cpu_freq_gov);
		info("  jobid=%u, stepid=%u, tasks=%u cpu/task=%u, cpus=%u",
		     job->jobid, job->stepid, job->node_tasks,
		     job->cpus_per_task, job->cpus);
		info("  cpu_bind_type=%4x, cpu_bind map=%s",
		     job->cpu_bind_type, job->cpu_bind);
	}

	if (!cpu_freq_count)
		return;

	if (job->cpu_bind == NULL) {
		error("cpu_freq_cpuset_validate: cpu_bind string is null");
		return;
	}
	cpu_bind = xstrdup(job->cpu_bind);

	if ( (cpu_str = strtok_r(cpu_bind, ",", &savestr) ) == NULL) {
		error("cpu_freq_cpuset_validate: cpu_bind string invalid");
		xfree(cpu_bind);
		return;
	}

	cpu_map     = (bitstr_t *) bit_alloc(cpu_freq_count);
	cpus_to_set = (bitstr_t *) bit_alloc(cpu_freq_count);

	do {
		debug3("  cpu_str = %s", cpu_str);

		if ((job->cpu_bind_type & CPU_BIND_MAP) == CPU_BIND_MAP) {
			cpu_num = atoi(cpu_str);
			if (cpu_num >= cpu_freq_count) {
				error("cpu_freq_cpuset_validate: invalid cpu "
				      "number %d", cpu_num);
				bit_free(cpu_map);
				bit_free(cpus_to_set);
				xfree(cpu_bind);
				return;
			}
			bit_set(cpu_map, (bitoff_t)cpu_num);
		} else {
			if (bit_unfmt_hexmask(cpu_map, cpu_str) == -1) {
				error("cpu_freq_cpuset_validate: invalid cpu "
				      "mask %s", cpu_bind);
				bit_free(cpu_map);
				bit_free(cpus_to_set);
				xfree(cpu_bind);
				return;
			}
		}
		bit_or(cpus_to_set, cpu_map);
	} while ( (cpu_str = strtok_r(NULL, ",", &savestr) ) != NULL);

	for (cpuidx = 0; cpuidx < cpu_freq_count; cpuidx++) {
		_cpu_freq_init_data(cpuidx);
		if (bit_test(cpus_to_set, cpuidx)) {
			_cpu_freq_setup_data(job, cpuidx);
		}
	}
	cpu_freq_set(job);

	bit_free(cpu_map);
	bit_free(cpus_to_set);
	xfree(cpu_bind);
	return;
}
Пример #20
0
static void _compute_start_times(void)
{
	int j, rc = SLURM_SUCCESS, job_cnt = 0;
	List job_queue;
	job_queue_rec_t *job_queue_rec;
	List preemptee_candidates = NULL;
	struct job_record *job_ptr;
	struct part_record *part_ptr;
	bitstr_t *alloc_bitmap = NULL, *avail_bitmap = NULL;
	uint32_t max_nodes, min_nodes, req_nodes, time_limit;
	time_t now = time(NULL), sched_start, last_job_alloc;

	sched_start = now;
	last_job_alloc = now - 1;
	alloc_bitmap = bit_alloc(node_record_count);
	if (alloc_bitmap == NULL)
		fatal("bit_alloc: malloc failure");
	job_queue = build_job_queue(true);
	while ((job_queue_rec = (job_queue_rec_t *) 
				list_pop_bottom(job_queue, sort_job_queue2))) {
		job_ptr  = job_queue_rec->job_ptr;
		part_ptr = job_queue_rec->part_ptr;
		xfree(job_queue_rec);
		if (part_ptr != job_ptr->part_ptr)
			continue;	/* Only test one partition */

		if (job_cnt++ > max_backfill_job_cnt) {
			debug("backfill: loop taking to long, breaking out");
			break;
		}

		/* Determine minimum and maximum node counts */
		min_nodes = MAX(job_ptr->details->min_nodes,
				part_ptr->min_nodes);

		if (job_ptr->details->max_nodes == 0)
			max_nodes = part_ptr->max_nodes;
		else
			max_nodes = MIN(job_ptr->details->max_nodes,
					part_ptr->max_nodes);

		max_nodes = MIN(max_nodes, 500000);     /* prevent overflows */

		if (job_ptr->details->max_nodes)
			req_nodes = max_nodes;
		else
			req_nodes = min_nodes;

		if (min_nodes > max_nodes) {
			/* job's min_nodes exceeds partition's max_nodes */
			continue;
		}

		j = job_test_resv(job_ptr, &now, true, &avail_bitmap);
		if (j != SLURM_SUCCESS)
			continue;

		rc = select_g_job_test(job_ptr, avail_bitmap,
				       min_nodes, max_nodes, req_nodes,
				       SELECT_MODE_WILL_RUN,
				       preemptee_candidates, NULL);
		last_job_update = now;

		if (job_ptr->time_limit == INFINITE)
			time_limit = 365 * 24 * 60 * 60;
		else if (job_ptr->time_limit != NO_VAL)
			time_limit = job_ptr->time_limit * 60;
		else if (job_ptr->part_ptr &&
			 (job_ptr->part_ptr->max_time != INFINITE))
			time_limit = job_ptr->part_ptr->max_time * 60;
		else
			time_limit = 365 * 24 * 60 * 60;
		if (bit_overlap(alloc_bitmap, avail_bitmap) &&
		    (job_ptr->start_time <= last_job_alloc)) {
			job_ptr->start_time = last_job_alloc;
		}
		bit_or(alloc_bitmap, avail_bitmap);
		last_job_alloc = job_ptr->start_time + time_limit;
		FREE_NULL_BITMAP(avail_bitmap);

		if ((time(NULL) - sched_start) >= sched_timeout) {
			debug("backfill: loop taking to long, breaking out");
			break;
		}
	}
	list_destroy(job_queue);
	FREE_NULL_BITMAP(alloc_bitmap);
}