Exemple #1
0
/**
 * do_basil_reserve - create a BASIL reservation.
 * IN job_ptr - pointer to job which has just been allocated resources
 * RET 0 or error code, job will abort or be requeued on failure
 */
extern int do_basil_reserve(struct job_record *job_ptr)
{
	struct nodespec *ns_head = NULL;
	uint16_t mppwidth = 0, mppdepth, mppnppn;
	uint32_t mppmem = 0, node_min_mem = 0;
	uint32_t resv_id;
	int i, first_bit, last_bit;
	hostlist_t hl;
	long rc;
	char *user, batch_id[16];

	if (!job_ptr->job_resrcs || job_ptr->job_resrcs->nhosts == 0)
		return SLURM_SUCCESS;

	debug3("job #%u: %u nodes = %s, cpus=%u" , job_ptr->job_id,
		job_ptr->job_resrcs->nhosts,
		job_ptr->job_resrcs->nodes,
		job_ptr->job_resrcs->ncpus
	);

	if (job_ptr->job_resrcs->node_bitmap == NULL) {
		error("job %u node_bitmap not set", job_ptr->job_id);
		return SLURM_SUCCESS;
	}

	first_bit = bit_ffs(job_ptr->job_resrcs->node_bitmap);
	last_bit  = bit_fls(job_ptr->job_resrcs->node_bitmap);
	if (first_bit == -1 || last_bit == -1)
		return SLURM_SUCCESS;		/* no nodes allocated */

	mppdepth = MAX(1, job_ptr->details->cpus_per_task);
	mppnppn  = job_ptr->details->ntasks_per_node;

	/* mppmem */
	if (job_ptr->details->pn_min_memory & MEM_PER_CPU) {
		/* Only honour --mem-per-cpu if --ntasks has been given */
		if (job_ptr->details->num_tasks)
			mppmem = job_ptr->details->pn_min_memory & ~MEM_PER_CPU;
	} else if (job_ptr->details->pn_min_memory) {
		node_min_mem = job_ptr->details->pn_min_memory;
	}

	hl = hostlist_create("");
	if (hl == NULL)
		fatal("hostlist_create: malloc error");

	for (i = first_bit; i <= last_bit; i++) {
		struct node_record *node_ptr = node_record_table_ptr + i;
		uint32_t basil_node_id;

		if (!bit_test(job_ptr->job_resrcs->node_bitmap, i))
			continue;

		if (!node_ptr->name || node_ptr->name[0] == '\0')
			continue;	/* bad node */

		if (sscanf(node_ptr->name, "nid%05u", &basil_node_id) != 1)
			fatal("can not read basil_node_id from %s", node_ptr->name);

		if (ns_add_node(&ns_head, basil_node_id) != 0) {
			error("can not add node %s (nid%05u)", node_ptr->name,
			      basil_node_id);
			free_nodespec(ns_head);
			return SLURM_ERROR;
		}

		if (node_min_mem) {
			uint32_t node_cpus, node_mem;

			if (slurmctld_conf.fast_schedule) {
				node_cpus = node_ptr->config_ptr->cpus;
				node_mem  = node_ptr->config_ptr->real_memory;
			} else {
				node_cpus = node_ptr->cpus;
				node_mem  = node_ptr->real_memory;
			}
			/*
			 * ALPS 'Processing Elements per Node' value (aprun -N),
			 * which in slurm is --ntasks-per-node and 'mppnppn' in
			 * PBS: if --ntasks is specified, default to the number
			 * of cores per node (also the default for 'aprun -N').
			 */
			node_mem /= mppnppn ? mppnppn : node_cpus;

			mppmem = node_min_mem = MIN(node_mem, node_min_mem);
		}
	}

	/* mppwidth */
	for (i = 0; i < job_ptr->job_resrcs->nhosts; i++) {
		uint16_t node_tasks = job_ptr->job_resrcs->cpus[i] / mppdepth;

		if (mppnppn && mppnppn < node_tasks)
			node_tasks = mppnppn;
		mppwidth += node_tasks;
	}

	snprintf(batch_id, sizeof(batch_id), "%u", job_ptr->job_id);
	user = uid_to_string(job_ptr->user_id);
	rc   = basil_reserve(user, batch_id, mppwidth,
			     mppdepth, mppnppn, mppmem, ns_head);
	xfree(user);
	if (rc <= 0) {
		/* errno value will be resolved by select_g_job_begin() */
		errno = is_transient_error(rc) ? EAGAIN : ECONNABORTED;
		return SLURM_ERROR;
	}

	resv_id	= rc;
	if (_set_select_jobinfo(job_ptr->select_jobinfo->data,
			SELECT_JOBDATA_RESV_ID, &resv_id) != SLURM_SUCCESS) {
		/*
		 * This is a fatal error since it means we will not be able to
		 * confirm the reservation; no step will be able to run in it.
		 */
		error("job %u: can not set resId %u", job_ptr->job_id, resv_id);
		basil_release(resv_id);
		return SLURM_ERROR;
	}

	info("ALPS RESERVATION #%u, JobId %u: BASIL -n %d -N %d -d %d -m %d",
	     resv_id, job_ptr->job_id, mppwidth, mppnppn, mppdepth, mppmem);

	return SLURM_SUCCESS;
}
Exemple #2
0
/**
 * do_basil_reserve - create a BASIL reservation.
 * IN job_ptr - pointer to job which has just been allocated resources
 * RET 0 or error code, job will abort or be requeued on failure
 */
extern int do_basil_reserve(struct job_record *job_ptr)
{
    struct nodespec *ns_head = NULL;
    uint16_t mppwidth = 0, mppdepth, mppnppn;
    /* mppmem must be at least 1 for gang scheduling to work so
     * if you are wondering why gang scheduling isn't working you
     * should check your slurm.conf for DefMemPerNode */
    uint32_t mppmem = 0, node_min_mem = 0;
    uint32_t resv_id;
    int i, first_bit, last_bit;
    long rc;
    char *user, batch_id[16];
    struct basil_accel_param* bap;

    if (!job_ptr->job_resrcs || job_ptr->job_resrcs->nhosts == 0)
        return SLURM_SUCCESS;

    debug3("job #%u: %u nodes = %s, cpus=%u" , job_ptr->job_id,
           job_ptr->job_resrcs->nhosts,
           job_ptr->job_resrcs->nodes,
           job_ptr->job_resrcs->ncpus
          );

    if (job_ptr->job_resrcs->node_bitmap == NULL) {
        error("job %u node_bitmap not set", job_ptr->job_id);
        return SLURM_SUCCESS;
    }

    first_bit = bit_ffs(job_ptr->job_resrcs->node_bitmap);
    last_bit  = bit_fls(job_ptr->job_resrcs->node_bitmap);
    if (first_bit == -1 || last_bit == -1)
        return SLURM_SUCCESS;		/* no nodes allocated */

    mppdepth = MAX(1, job_ptr->details->cpus_per_task);
    mppnppn  = job_ptr->details->ntasks_per_node;

    /* mppmem */
    if (job_ptr->details->pn_min_memory & MEM_PER_CPU) {
        /* Only honour --mem-per-cpu if --ntasks has been given */
        if (job_ptr->details->num_tasks)
            mppmem = job_ptr->details->pn_min_memory & ~MEM_PER_CPU;
    } else if (job_ptr->details->pn_min_memory) {
        node_min_mem = job_ptr->details->pn_min_memory;
    }

    for (i = first_bit; i <= last_bit; i++) {
        struct node_record *node_ptr = node_record_table_ptr + i;
        uint32_t basil_node_id;

        if (!bit_test(job_ptr->job_resrcs->node_bitmap, i))
            continue;

        if (!node_ptr->name || node_ptr->name[0] == '\0')
            continue;	/* bad node */

        if (sscanf(node_ptr->name, "nid%05u", &basil_node_id) != 1)
            fatal("can not read basil_node_id from %s",
                  node_ptr->name);

        if (ns_add_node(&ns_head, basil_node_id, false) != 0) {
            error("can not add node %s (nid%05u)", node_ptr->name,
                  basil_node_id);
            free_nodespec(ns_head);
            return SLURM_ERROR;
        }

        if (node_min_mem) {
            uint32_t node_cpus, node_mem;
            int32_t tmp_mppmem;

            if (slurmctld_conf.fast_schedule) {
                node_cpus = node_ptr->config_ptr->cpus;
                node_mem  = node_ptr->config_ptr->real_memory;
            } else {
                node_cpus = node_ptr->cpus;
                node_mem  = node_ptr->real_memory;
            }
            /*
             * ALPS 'Processing Elements per Node' value (aprun -N),
             * which in slurm is --ntasks-per-node and 'mppnppn' in
             * PBS: if --ntasks is specified, default to the number
             * of cores per node (also the default for 'aprun -N').
             * On a heterogeneous system the nodes aren't
             * always the same so keep track of the lowest
             * mppmem and use it as the level for all
             * nodes (mppmem is 0 when coming in).
             */
            node_mem /= mppnppn ? mppnppn : node_cpus;
            tmp_mppmem = node_min_mem = MIN(node_mem, node_min_mem);

            /* If less than or equal to 0 make sure you
               have 1 at least since 0 means give all the
               memory to the job.
            */
            if (tmp_mppmem <= 0)
                tmp_mppmem = 1;

            if (mppmem)
                mppmem = MIN(mppmem, tmp_mppmem);
            else
                mppmem = tmp_mppmem;
        }
    }

    /* mppwidth */
    for (i = 0; i < job_ptr->job_resrcs->nhosts; i++) {
        uint16_t node_tasks = job_ptr->job_resrcs->cpus[i] / mppdepth;

        if (mppnppn && mppnppn < node_tasks)
            node_tasks = mppnppn;
        mppwidth += node_tasks;
    }

    snprintf(batch_id, sizeof(batch_id), "%u", job_ptr->job_id);
    user = uid_to_string(job_ptr->user_id);

    if (job_ptr->gres_list)
        bap = build_accel_param(job_ptr);
    else
        bap = NULL;

    rc   = basil_reserve(user, batch_id, mppwidth, mppdepth, mppnppn,
                         mppmem, ns_head, bap);
    xfree(user);
    if (rc <= 0) {
        /* errno value will be resolved by select_g_job_begin() */
        errno = is_transient_error(rc) ? EAGAIN : ECONNABORTED;
        return SLURM_ERROR;
    }

    resv_id	= rc;
    if (_set_select_jobinfo(job_ptr->select_jobinfo->data,
                            SELECT_JOBDATA_RESV_ID, &resv_id) != SLURM_SUCCESS) {
        /*
         * This is a fatal error since it means we will not be able to
         * confirm the reservation; no step will be able to run in it.
         */
        error("job %u: can not set resId %u", job_ptr->job_id, resv_id);
        basil_release(resv_id);
        return SLURM_ERROR;
    }
    if (mppmem)
        job_ptr->details->pn_min_memory = mppmem | MEM_PER_CPU;

    info("ALPS RESERVATION #%u, JobId %u: BASIL -n %d -N %d -d %d -m %d",
         resv_id, job_ptr->job_id, mppwidth, mppnppn, mppdepth, mppmem);

    return SLURM_SUCCESS;
}