예제 #1
0
/* build maps for task layout on nodes */
static int _init_task_layout(slurm_step_layout_t *step_layout,
			     const char *arbitrary_nodes,
			     uint16_t *cpus_per_node, uint32_t *cpu_count_reps,
			     uint16_t cpus_per_task,
			     uint16_t task_dist, uint16_t plane_size)
{
	int cpu_cnt = 0, cpu_inx = 0, i;
	uint32_t cluster_flags = slurmdb_setup_cluster_flags();

/* 	char *name = NULL; */
	uint16_t cpus[step_layout->node_cnt];

	if (step_layout->node_cnt == 0)
		return SLURM_ERROR;
	if (step_layout->tasks)	/* layout already completed */
		return SLURM_SUCCESS;

	if ((int)cpus_per_task < 1 || cpus_per_task == (uint16_t)NO_VAL)
		cpus_per_task = 1;

	step_layout->plane_size = plane_size;

	step_layout->tasks = xmalloc(sizeof(uint16_t)
				     * step_layout->node_cnt);
	step_layout->tids  = xmalloc(sizeof(uint32_t *)
				     * step_layout->node_cnt);
	if (!(cluster_flags & CLUSTER_FLAG_BG)) {
		hostlist_t hl = hostlist_create(step_layout->node_list);
		/* make sure the number of nodes we think we have
		 * is the correct number */
		i = hostlist_count(hl);
		if (step_layout->node_cnt > i)
			step_layout->node_cnt = i;
		hostlist_destroy(hl);
	}
	debug("laying out the %u tasks on %u hosts %s dist %u",
	      step_layout->task_cnt, step_layout->node_cnt,
	      step_layout->node_list, task_dist);
	if (step_layout->node_cnt < 1) {
		error("no hostlist given can't layout tasks");
		return SLURM_ERROR;
	}

	for (i=0; i<step_layout->node_cnt; i++) {
/* 		name = hostlist_shift(hl); */
/* 		if (!name) { */
/* 			error("hostlist incomplete for this job request"); */
/* 			hostlist_destroy(hl); */
/* 			return SLURM_ERROR; */
/* 		} */
/* 		debug2("host %d = %s", i, name); */
/* 		free(name); */
		cpus[i] = (cpus_per_node[cpu_inx] / cpus_per_task);
		if (cpus[i] == 0) {
			/* this can be a result of a heterogeneous allocation
			 * (e.g. 4 cpus on one node and 2 on the second with
			 *  cpus_per_task=3)  */
			cpus[i] = 1;
		}
		//info("got %d cpus", cpus[i]);
		if ((++cpu_cnt) >= cpu_count_reps[cpu_inx]) {
			/* move to next record */
			cpu_inx++;
			cpu_cnt = 0;
		}
	}

        if ((task_dist == SLURM_DIST_CYCLIC) ||
            (task_dist == SLURM_DIST_CYCLIC_CYCLIC) ||
            (task_dist == SLURM_DIST_CYCLIC_BLOCK))
		return _task_layout_cyclic(step_layout, cpus);
	else if (task_dist == SLURM_DIST_ARBITRARY
		&& !(cluster_flags & CLUSTER_FLAG_FE))
		return _task_layout_hostfile(step_layout, arbitrary_nodes);
        else if (task_dist == SLURM_DIST_PLANE)
                return _task_layout_plane(step_layout, cpus);
	else
		return _task_layout_block(step_layout, cpus);
}
예제 #2
0
/* build maps for task layout on nodes */
static int _init_task_layout(slurm_step_layout_req_t *step_layout_req,
			     slurm_step_layout_t *step_layout,
			     const char *arbitrary_nodes)
{
	int cpu_cnt = 0, cpu_inx = 0, cpu_task_cnt = 0, cpu_task_inx = 0, i;
	uint32_t cluster_flags = slurmdb_setup_cluster_flags();

	uint16_t cpus[step_layout->node_cnt];
	uint16_t cpus_per_task[1];
	uint32_t cpus_task_reps[1];

	if (step_layout->node_cnt == 0)
		return SLURM_ERROR;
	if (step_layout->tasks)	/* layout already completed */
		return SLURM_SUCCESS;

	if (!step_layout_req->cpus_per_task) {
		cpus_per_task[0] = 1;
		cpus_task_reps[0] = step_layout_req->num_hosts;
		step_layout_req->cpus_per_task = cpus_per_task;
		step_layout_req->cpus_task_reps = cpus_task_reps;
	}

	if (((int)step_layout_req->cpus_per_task[0] < 1) ||
	    (step_layout_req->cpus_per_task[0] == NO_VAL16)) {
		step_layout_req->cpus_per_task[0] = 1;
		step_layout_req->cpus_task_reps[0] = step_layout_req->num_hosts;
	}

	step_layout->plane_size = step_layout_req->plane_size;

	step_layout->tasks = xmalloc(sizeof(uint16_t)
				     * step_layout->node_cnt);
	step_layout->tids  = xmalloc(sizeof(uint32_t *)
				     * step_layout->node_cnt);
	if (!(cluster_flags & CLUSTER_FLAG_BG)) {
		hostlist_t hl = hostlist_create(step_layout->node_list);
		/* make sure the number of nodes we think we have
		 * is the correct number */
		i = hostlist_count(hl);
		if (step_layout->node_cnt > i)
			step_layout->node_cnt = i;
		hostlist_destroy(hl);
	}
	debug("laying out the %u tasks on %u hosts %s dist %u",
	      step_layout->task_cnt, step_layout->node_cnt,
	      step_layout->node_list, step_layout->task_dist);
	if (step_layout->node_cnt < 1) {
		error("no hostlist given can't layout tasks");
		return SLURM_ERROR;
	}

	/* hostlist_t hl = hostlist_create(step_layout->node_list); */
	for (i=0; i<step_layout->node_cnt; i++) {
		/* char *name = hostlist_shift(hl); */
		/* if (!name) { */
		/* 	error("hostlist incomplete for this job request"); */
		/* 	hostlist_destroy(hl); */
		/* 	return SLURM_ERROR; */
		/* } */
		/* debug2("host %d = %s", i, name); */
		/* free(name); */
		cpus[i] = (step_layout_req->cpus_per_node[cpu_inx] /
			   step_layout_req->cpus_per_task[cpu_task_inx]);
		if (cpus[i] == 0) {
			/* this can be a result of a heterogeneous allocation
			 * (e.g. 4 cpus on one node and 2 on the second with
			 *  step_layout_req->cpus_per_task=3)  */
			cpus[i] = 1;
		}

		if (step_layout->plane_size &&
		    (step_layout->plane_size != NO_VAL16) &&
		    ((step_layout->task_dist & SLURM_DIST_STATE_BASE)
		     != SLURM_DIST_PLANE)) {
			/* plane_size when dist != plane is used to
			   convey ntasks_per_node. Adjust the number
			   of cpus to reflect that.
			*/
			uint16_t cpus_per_node =
				step_layout->plane_size *
				step_layout_req->cpus_per_task[cpu_task_inx];
			if (cpus[i] > cpus_per_node)
				cpus[i] = cpus_per_node;
		}

		/* info("got %d cpus", cpus[i]); */
		if ((++cpu_cnt) >=
		    step_layout_req->cpu_count_reps[cpu_inx]) {
			/* move to next record */
			cpu_inx++;
			cpu_cnt = 0;
		}

		if ((++cpu_task_cnt) >=
		    step_layout_req->cpus_task_reps[cpu_task_inx]) {
			/* move to next record */
			cpu_task_inx++;
			cpu_task_cnt = 0;
		}
	}

	if ((step_layout->task_dist & SLURM_DIST_NODEMASK)
	    == SLURM_DIST_NODECYCLIC)
		return _task_layout_cyclic(step_layout, cpus);
	else if (((step_layout->task_dist & SLURM_DIST_STATE_BASE)
		  == SLURM_DIST_ARBITRARY)
		 && !(cluster_flags & CLUSTER_FLAG_FE))
		return _task_layout_hostfile(step_layout, arbitrary_nodes);
	else if ((step_layout->task_dist & SLURM_DIST_STATE_BASE)
		 == SLURM_DIST_PLANE)
		return _task_layout_plane(step_layout, cpus);
	else
		return _task_layout_block(step_layout, cpus);
}