Esempio n. 1
0
/* create a slurmd job structure from a launch tasks message */
extern stepd_step_rec_t *stepd_step_rec_create(launch_tasks_request_msg_t *msg,
					       uint16_t protocol_version)
{
	stepd_step_rec_t  *job = NULL;
	srun_info_t   *srun = NULL;
	slurm_addr_t     resp_addr;
	slurm_addr_t     io_addr;
	int            i, nodeid = NO_VAL;

	xassert(msg != NULL);
	xassert(msg->complete_nodelist != NULL);
	debug3("entering stepd_step_rec_create");

	if (acct_gather_check_acct_freq_task(msg->job_mem_lim, msg->acctg_freq))
		return NULL;

	job = xmalloc(sizeof(stepd_step_rec_t));
	job->msg = msg;
#ifndef HAVE_FRONT_END
	nodeid = nodelist_find(msg->complete_nodelist, conf->node_name);
	job->node_name = xstrdup(conf->node_name);
#else
	nodeid = 0;
	job->node_name = xstrdup(msg->complete_nodelist);
#endif
	if (nodeid < 0) {
		error("couldn't find node %s in %s",
		      job->node_name, msg->complete_nodelist);
		stepd_step_rec_destroy(job);
		return NULL;
	}

	job->state = SLURMSTEPD_STEP_STARTING;
	slurm_cond_init(&job->state_cond, NULL);
	slurm_mutex_init(&job->state_mutex);
	job->node_tasks	= msg->tasks_to_launch[nodeid];
	i = sizeof(uint16_t) * msg->nnodes;
	job->task_cnts  = xmalloc(i);
	memcpy(job->task_cnts, msg->tasks_to_launch, i);
	job->ntasks	= msg->ntasks;
	job->jobid	= msg->job_id;
	job->stepid	= msg->job_step_id;

	job->uid	= (uid_t) msg->uid;
	job->gid	= (gid_t) msg->gid;
	job->user_name	= xstrdup(msg->user_name);
	job->ngids = (int) msg->ngids;
	job->gids = copy_gids(msg->ngids, msg->gids);

	job->cwd	= xstrdup(msg->cwd);
	job->task_dist	= msg->task_dist;

	job->cpu_bind_type = msg->cpu_bind_type;
	job->cpu_bind = xstrdup(msg->cpu_bind);
	job->mem_bind_type = msg->mem_bind_type;
	job->mem_bind = xstrdup(msg->mem_bind);
	job->cpu_freq_min = msg->cpu_freq_min;
	job->cpu_freq_max = msg->cpu_freq_max;
	job->cpu_freq_gov = msg->cpu_freq_gov;
	job->ckpt_dir = xstrdup(msg->ckpt_dir);
	job->restart_dir = xstrdup(msg->restart_dir);
	job->cpus_per_task = msg->cpus_per_task;

	job->env     = _array_copy(msg->envc, msg->env);
	job->array_job_id  = msg->job_id;
	job->array_task_id = NO_VAL;
	job->node_offset = msg->node_offset;	/* Used for env vars */
	job->pack_jobid  = msg->pack_jobid;	/* Used for env vars */
	job->pack_nnodes = msg->pack_nnodes;	/* Used for env vars */
	if (msg->pack_nnodes && msg->pack_ntasks && msg->pack_task_cnts) {
		job->pack_ntasks = msg->pack_ntasks;	/* Used for env vars */
		i = sizeof(uint16_t) * msg->pack_nnodes;
		job->pack_task_cnts = xmalloc(i);
		memcpy(job->pack_task_cnts, msg->pack_task_cnts, i);
	}
	job->pack_offset = msg->pack_offset;	/* Used for env vars & labels */
	job->pack_task_offset = msg->pack_task_offset;	/* Used for env vars & labels */
	job->pack_node_list = xstrdup(msg->pack_node_list);
	for (i = 0; i < msg->envc; i++) {
		/*                         1234567890123456789 */
		if (!xstrncmp(msg->env[i], "SLURM_ARRAY_JOB_ID=", 19))
			job->array_job_id = atoi(msg->env[i] + 19);
		/*                         12345678901234567890 */
		if (!xstrncmp(msg->env[i], "SLURM_ARRAY_TASK_ID=", 20))
			job->array_task_id = atoi(msg->env[i] + 20);
	}

	job->eio     = eio_handle_create(0);
	job->sruns   = list_create((ListDelF) _srun_info_destructor);

	/*
	 * Based on my testing the next 3 lists here could use the
	 * eio_obj_destroy, but if you do you can get an invalid read.  Since
	 * these stay until the end of the job it isn't that big of a deal.
	 */
	job->clients = list_create(NULL); /* FIXME! Needs destructor */
	job->stdout_eio_objs = list_create(NULL); /* FIXME! Needs destructor */
	job->stderr_eio_objs = list_create(NULL); /* FIXME! Needs destructor */
	job->free_incoming = list_create(NULL); /* FIXME! Needs destructor */
	job->incoming_count = 0;
	job->free_outgoing = list_create(NULL); /* FIXME! Needs destructor */
	job->outgoing_count = 0;
	job->outgoing_cache = list_create(NULL); /* FIXME! Needs destructor */

	job->envtp   = xmalloc(sizeof(env_t));
	job->envtp->jobid = -1;
	job->envtp->stepid = -1;
	job->envtp->procid = -1;
	job->envtp->localid = -1;
	job->envtp->nodeid = -1;

	job->envtp->distribution = 0;
	job->envtp->cpu_bind_type = 0;
	job->envtp->cpu_bind = NULL;
	job->envtp->mem_bind_type = 0;
	job->envtp->mem_bind = NULL;
	job->envtp->ckpt_dir = NULL;
	if (!msg->resp_port)
		msg->num_resp_port = 0;
	if (msg->num_resp_port) {
		job->envtp->comm_port =
			msg->resp_port[nodeid % msg->num_resp_port];
		memcpy(&resp_addr, &msg->orig_addr, sizeof(slurm_addr_t));
		slurm_set_addr(&resp_addr,
			       msg->resp_port[nodeid % msg->num_resp_port],
			       NULL);
	} else {
		memset(&resp_addr, 0, sizeof(slurm_addr_t));
	}
	if (!msg->io_port)
		msg->flags |= LAUNCH_USER_MANAGED_IO;
	if ((msg->flags & LAUNCH_USER_MANAGED_IO) == 0) {
		memcpy(&io_addr,   &msg->orig_addr, sizeof(slurm_addr_t));
		slurm_set_addr(&io_addr,
			       msg->io_port[nodeid % msg->num_io_port],
			       NULL);
	} else {
		memset(&io_addr, 0, sizeof(slurm_addr_t));
	}

	srun = srun_info_create(msg->cred, &resp_addr, &io_addr,
				protocol_version);

	job->profile     = msg->profile;
	job->task_prolog = xstrdup(msg->task_prolog);
	job->task_epilog = xstrdup(msg->task_epilog);

	job->argc    = msg->argc;
	job->argv    = _array_copy(job->argc, msg->argv);

	job->nnodes  = msg->nnodes;
	job->nodeid  = nodeid;
	job->debug   = msg->slurmd_debug;
	job->cpus    = msg->node_cpus;
	job->job_core_spec = msg->job_core_spec;

	/* This needs to happen before acct_gather_profile_startpoll
	   and only really looks at the profile in the job.
	*/
	acct_gather_profile_g_node_step_start(job);

	acct_gather_profile_startpoll(msg->acctg_freq,
				      conf->job_acct_gather_freq);

	job->timelimit   = (time_t) -1;
	job->flags       = msg->flags;
	job->switch_job  = msg->switch_job;
	job->open_mode   = msg->open_mode;
	job->options     = msg->options;
	format_core_allocs(msg->cred, conf->node_name, conf->cpus,
			   &job->job_alloc_cores, &job->step_alloc_cores,
			   &job->job_mem, &job->step_mem);

	/* If users have configured MemLimitEnforce=no
	 * in their slurm.conf keep going.
	 */
	if (job->step_mem
	    && conf->mem_limit_enforce) {
		jobacct_gather_set_mem_limit(job->jobid, job->stepid,
					     job->step_mem);
	} else if (job->job_mem
		   && conf->mem_limit_enforce) {
		jobacct_gather_set_mem_limit(job->jobid, job->stepid,
					     job->job_mem);
	}

#ifdef HAVE_ALPS_CRAY
	/* This is only used for Cray emulation mode where slurmd is used to
	 * launch job steps. On a real Cray system, ALPS is used to launch
	 * the tasks instead of SLURM. SLURM's task launch RPC does NOT
	 * contain the reservation ID, so just use some non-zero value here
	 * for testing purposes. */
	job->resv_id = 1;
	select_g_select_jobinfo_set(msg->select_jobinfo, SELECT_JOBDATA_RESV_ID,
				    &job->resv_id);
#endif

	/* only need these values on the extern step, don't copy otherwise */
	if ((msg->job_step_id == SLURM_EXTERN_CONT) && msg->x11) {
		job->x11 = msg->x11;
		job->x11_magic_cookie = xstrdup(msg->x11_magic_cookie);
		job->x11_target_host = xstrdup(msg->x11_target_host);
		job->x11_target_port = msg->x11_target_port;
	}

	get_cred_gres(msg->cred, conf->node_name,
		      &job->job_gres_list, &job->step_gres_list);

	list_append(job->sruns, (void *) srun);

	_job_init_task_info(job, msg->global_task_ids,
			    msg->ifname, msg->ofname, msg->efname);

	return job;
}
Esempio n. 2
0
/* create a slurmd job structure from a launch tasks message */
extern stepd_step_rec_t *
stepd_step_rec_create(launch_tasks_request_msg_t *msg)
{
	stepd_step_rec_t  *job = NULL;
	srun_info_t   *srun = NULL;
	slurm_addr_t     resp_addr;
	slurm_addr_t     io_addr;
	int            i, nodeid = NO_VAL;

	xassert(msg != NULL);
	xassert(msg->complete_nodelist != NULL);
	debug3("entering stepd_step_rec_create");

	if (!_valid_uid_gid((uid_t)msg->uid, &(msg->gid), &(msg->user_name)))
		return NULL;

	if (_check_acct_freq_task(msg->job_mem_lim, msg->acctg_freq))
		return NULL;

	job = xmalloc(sizeof(stepd_step_rec_t));
#ifndef HAVE_FRONT_END
	nodeid = nodelist_find(msg->complete_nodelist, conf->node_name);
	job->node_name = xstrdup(conf->node_name);
#else
	nodeid = 0;
	job->node_name = xstrdup(msg->complete_nodelist);
#endif
	if (nodeid < 0) {
		error("couldn't find node %s in %s",
		      job->node_name, msg->complete_nodelist);
		stepd_step_rec_destroy(job);
		return NULL;
	}

	job->state	= SLURMSTEPD_STEP_STARTING;
	job->node_tasks	= msg->tasks_to_launch[nodeid];
	job->ntasks	= msg->ntasks;
	job->jobid	= msg->job_id;
	job->stepid	= msg->job_step_id;

	job->uid	= (uid_t) msg->uid;
	job->user_name  = xstrdup(msg->user_name);
	job->gid	= (gid_t) msg->gid;
	job->cwd	= xstrdup(msg->cwd);
	job->task_dist	= msg->task_dist;

	job->cpu_bind_type = msg->cpu_bind_type;
	job->cpu_bind = xstrdup(msg->cpu_bind);
	job->mem_bind_type = msg->mem_bind_type;
	job->mem_bind = xstrdup(msg->mem_bind);
	job->cpu_freq = msg->cpu_freq;
	job->ckpt_dir = xstrdup(msg->ckpt_dir);
	job->restart_dir = xstrdup(msg->restart_dir);
	job->cpus_per_task = msg->cpus_per_task;

	job->env     = _array_copy(msg->envc, msg->env);
	job->array_job_id  = msg->job_id;
	job->array_task_id = (uint16_t) NO_VAL;
	for (i = 0; i < msg->envc; i++) {
		/*                         1234567890123456789 */
		if (!strncmp(msg->env[i], "SLURM_ARRAY_JOB_ID=", 19))
			job->array_job_id = atoi(msg->env[i] + 19);
		/*                         12345678901234567890 */
		if (!strncmp(msg->env[i], "SLURM_ARRAY_TASK_ID=", 20))
			job->array_task_id = atoi(msg->env[i] + 20);
	}

	job->eio     = eio_handle_create();
	job->sruns   = list_create((ListDelF) _srun_info_destructor);
	job->clients = list_create(NULL); /* FIXME! Needs destructor */
	job->stdout_eio_objs = list_create(NULL); /* FIXME! Needs destructor */
	job->stderr_eio_objs = list_create(NULL); /* FIXME! Needs destructor */
	job->free_incoming = list_create(NULL); /* FIXME! Needs destructor */
	job->incoming_count = 0;
	job->free_outgoing = list_create(NULL); /* FIXME! Needs destructor */
	job->outgoing_count = 0;
	job->outgoing_cache = list_create(NULL); /* FIXME! Needs destructor */

	job->envtp   = xmalloc(sizeof(env_t));
	job->envtp->jobid = -1;
	job->envtp->stepid = -1;
	job->envtp->procid = -1;
	job->envtp->localid = -1;
	job->envtp->nodeid = -1;

	job->envtp->distribution = 0;
	job->envtp->cpu_bind_type = 0;
	job->envtp->cpu_bind = NULL;
	job->envtp->mem_bind_type = 0;
	job->envtp->mem_bind = NULL;
	job->envtp->ckpt_dir = NULL;
	job->envtp->comm_port = msg->resp_port[nodeid % msg->num_resp_port];

	memcpy(&resp_addr, &msg->orig_addr, sizeof(slurm_addr_t));
	slurm_set_addr(&resp_addr,
		       msg->resp_port[nodeid % msg->num_resp_port],
		       NULL);
	job->user_managed_io = msg->user_managed_io;
	if (!msg->user_managed_io) {
		memcpy(&io_addr,   &msg->orig_addr, sizeof(slurm_addr_t));
		slurm_set_addr(&io_addr,
			       msg->io_port[nodeid % msg->num_io_port],
			       NULL);
	}

	srun = srun_info_create(msg->cred, &resp_addr, &io_addr);

	job->buffered_stdio = msg->buffered_stdio;
	job->labelio = msg->labelio;

	job->profile     = msg->profile;
	job->task_prolog = xstrdup(msg->task_prolog);
	job->task_epilog = xstrdup(msg->task_epilog);

	job->argc    = msg->argc;
	job->argv    = _array_copy(job->argc, msg->argv);

	job->nnodes  = msg->nnodes;
	job->nodeid  = nodeid;
	job->debug   = msg->slurmd_debug;
	job->cpus    = msg->cpus_allocated[nodeid];

	/* This needs to happen before acct_gather_profile_startpoll
	   and only really looks at the profile in the job.
	*/
	acct_gather_profile_g_node_step_start(job);

	acct_gather_profile_startpoll(msg->acctg_freq,
				      conf->job_acct_gather_freq);

	job->multi_prog  = msg->multi_prog;
	job->timelimit   = (time_t) -1;
	job->task_flags  = msg->task_flags;
	job->switch_job  = msg->switch_job;
	job->pty         = msg->pty;
	job->open_mode   = msg->open_mode;
	job->options     = msg->options;
	format_core_allocs(msg->cred, conf->node_name, conf->cpus,
			   &job->job_alloc_cores, &job->step_alloc_cores,
			   &job->job_mem, &job->step_mem);
	if (job->step_mem) {
		jobacct_gather_set_mem_limit(job->jobid, job->stepid,
					     job->step_mem);
	} else if (job->job_mem) {
		jobacct_gather_set_mem_limit(job->jobid, job->stepid,
					     job->job_mem);
	}

#ifdef HAVE_ALPS_CRAY
	/* This is only used for Cray emulation mode where slurmd is used to
	 * launch job steps. On a real Cray system, ALPS is used to launch
	 * the tasks instead of SLURM. SLURM's task launch RPC does NOT
	 * contain the reservation ID, so just use some non-zero value here
	 * for testing purposes. */
	job->resv_id = 1;
	select_g_select_jobinfo_set(msg->select_jobinfo, SELECT_JOBDATA_RESV_ID,
				    &job->resv_id);
#endif

	get_cred_gres(msg->cred, conf->node_name,
		      &job->job_gres_list, &job->step_gres_list);

	list_append(job->sruns, (void *) srun);

	_job_init_task_info(job, msg->global_task_ids[nodeid],
			    msg->ifname, msg->ofname, msg->efname);

	return job;
}
Esempio n. 3
0
/* create a slurmd job structure from a launch tasks message */
slurmd_job_t *
job_create(launch_tasks_request_msg_t *msg)
{
    struct passwd *pwd = NULL;
    slurmd_job_t  *job = NULL;
    srun_info_t   *srun = NULL;
    slurm_addr_t     resp_addr;
    slurm_addr_t     io_addr;
    int            nodeid = NO_VAL;

    xassert(msg != NULL);
    xassert(msg->complete_nodelist != NULL);
    debug3("entering job_create");
    if ((pwd = _pwd_create((uid_t)msg->uid)) == NULL) {
        error("uid %ld not found on system", (long) msg->uid);
        slurm_seterrno (ESLURMD_UID_NOT_FOUND);
        return NULL;
    }
    if (!_valid_gid(pwd, &(msg->gid))) {
        slurm_seterrno (ESLURMD_GID_NOT_FOUND);
        _pwd_destroy(pwd);
        return NULL;
    }

    if (msg->job_mem_lim && (msg->acctg_freq != (uint16_t) NO_VAL)
            && (msg->acctg_freq > conf->job_acct_gather_freq)) {
        error("Can't set frequency to %u, it is higher than %u.  "
              "We need it to be at least at this level to "
              "monitor memory usage.",
              msg->acctg_freq, conf->job_acct_gather_freq);
        slurm_seterrno (ESLURMD_INVALID_ACCT_FREQ);
        _pwd_destroy(pwd);
        return NULL;
    }

    job = xmalloc(sizeof(slurmd_job_t));
#ifndef HAVE_FRONT_END
    nodeid = nodelist_find(msg->complete_nodelist, conf->node_name);
    job->node_name = xstrdup(conf->node_name);
#else
    nodeid = 0;
    job->node_name = xstrdup(msg->complete_nodelist);
#endif
    if(nodeid < 0) {
        error("couldn't find node %s in %s",
              job->node_name, msg->complete_nodelist);
        job_destroy(job);
        return NULL;
    }

    job->state	= SLURMSTEPD_STEP_STARTING;
    job->pwd	= pwd;
    job->node_tasks	= msg->tasks_to_launch[nodeid];
    job->ntasks	= msg->ntasks;
    job->jobid	= msg->job_id;
    job->stepid	= msg->job_step_id;

    job->uid	= (uid_t) msg->uid;
    job->gid	= (gid_t) msg->gid;
    job->cwd	= xstrdup(msg->cwd);
    job->task_dist	= msg->task_dist;

    job->cpu_bind_type = msg->cpu_bind_type;
    job->cpu_bind = xstrdup(msg->cpu_bind);
    job->mem_bind_type = msg->mem_bind_type;
    job->mem_bind = xstrdup(msg->mem_bind);
    job->cpu_freq = msg->cpu_freq;
    job->ckpt_dir = xstrdup(msg->ckpt_dir);
    job->restart_dir = xstrdup(msg->restart_dir);
    job->cpus_per_task = msg->cpus_per_task;

    job->env     = _array_copy(msg->envc, msg->env);
    job->eio     = eio_handle_create();
    job->sruns   = list_create((ListDelF) _srun_info_destructor);
    job->clients = list_create(NULL); /* FIXME! Needs destructor */
    job->stdout_eio_objs = list_create(NULL); /* FIXME! Needs destructor */
    job->stderr_eio_objs = list_create(NULL); /* FIXME! Needs destructor */
    job->free_incoming = list_create(NULL); /* FIXME! Needs destructor */
    job->incoming_count = 0;
    job->free_outgoing = list_create(NULL); /* FIXME! Needs destructor */
    job->outgoing_count = 0;
    job->outgoing_cache = list_create(NULL); /* FIXME! Needs destructor */

    job->envtp   = xmalloc(sizeof(env_t));
    job->envtp->jobid = -1;
    job->envtp->stepid = -1;
    job->envtp->procid = -1;
    job->envtp->localid = -1;
    job->envtp->nodeid = -1;

    job->envtp->distribution = 0;
    job->envtp->cpu_bind_type = 0;
    job->envtp->cpu_bind = NULL;
    job->envtp->mem_bind_type = 0;
    job->envtp->mem_bind = NULL;
    job->envtp->ckpt_dir = NULL;
    //job->envtp->comm_port = msg->resp_port[nodeid % msg->num_resp_port];

    /*memcpy(&resp_addr, &msg->orig_addr, sizeof(slurm_addr_t));
    slurm_set_addr(&resp_addr,
    	       msg->resp_port[nodeid % msg->num_resp_port],
    	       NULL);
    job->user_managed_io = msg->user_managed_io;
    if (!msg->user_managed_io) {
    	memcpy(&io_addr,   &msg->orig_addr, sizeof(slurm_addr_t));
    	slurm_set_addr(&io_addr,
    		       msg->io_port[nodeid % msg->num_io_port],
    		       NULL);
    }*/
    //srun = srun_info_create(msg->cred, &resp_addr, &io_addr);
    srun = srun_info_create(NULL, NULL, NULL);
    job->buffered_stdio = msg->buffered_stdio;
    job->labelio = msg->labelio;

    job->task_prolog = xstrdup(msg->task_prolog);
    job->task_epilog = xstrdup(msg->task_epilog);

    job->argc    = msg->argc;
    job->argv    = _array_copy(job->argc, msg->argv);

    job->nnodes  = msg->nnodes;
    job->nodeid  = nodeid;
    job->debug   = msg->slurmd_debug;
    job->cpus    = msg->cpus_allocated[nodeid];
    if (msg->acctg_freq != (uint16_t) NO_VAL)
        jobacct_gather_change_poll(msg->acctg_freq);
    job->multi_prog  = msg->multi_prog;
    job->timelimit   = (time_t) -1;
    job->task_flags  = msg->task_flags;
    job->switch_job  = msg->switch_job;
    job->pty         = msg->pty;
    job->open_mode   = msg->open_mode;
    job->options     = msg->options;
    format_core_allocs(msg->cred, conf->node_name,
                       &job->job_alloc_cores, &job->step_alloc_cores,
                       &job->job_mem, &job->step_mem);
    if (job->step_mem) {
        jobacct_gather_set_mem_limit(job->jobid, job->stepid,
                                     job->step_mem);
    } else if (job->job_mem) {
        jobacct_gather_set_mem_limit(job->jobid, job->stepid,
                                     job->job_mem);
    }

#ifdef HAVE_CRAY
    /* This is only used for Cray emulation mode where slurmd is used to
     * launch job steps. On a real Cray system, ALPS is used to launch
     * the tasks instead of SLURM. SLURM's task launch RPC does NOT
     * contain the reservation ID, so just use some non-zero value here
     * for testing purposes. */
    job->resv_id = 1;
    select_g_select_jobinfo_set(msg->select_jobinfo, SELECT_JOBDATA_RESV_ID,
                                &job->resv_id);
#endif

    get_cred_gres(msg->cred, conf->node_name,
                  &job->job_gres_list, &job->step_gres_list);

    list_append(job->sruns, (void *) srun);
    _job_init_task_info(job, msg->global_task_ids[nodeid],
                        msg->ifname, msg->ofname, msg->efname);

    return job;
}