Esempio n. 1
0
resource_allocation_response_msg_t *
existing_allocation(void)
{
	uint32_t old_job_id;
        resource_allocation_response_msg_t *resp = NULL;

	if (opt.jobid != NO_VAL)
		old_job_id = (uint32_t)opt.jobid;
	else
                return NULL;

        if (slurm_allocation_lookup_lite(old_job_id, &resp) < 0) {
                if (opt.parallel_debug || opt.jobid_set)
                        return NULL;    /* create new allocation as needed */
                if (errno == ESLURM_ALREADY_DONE)
                        error ("SLURM job %u has expired.", old_job_id);
                else
                        error ("Unable to confirm allocation for job %u: %m",
			       old_job_id);
                info ("Check SLURM_JOB_ID environment variable "
                      "for expired or invalid job.");
                exit(error_exit);
        }

        return resp;
}
Esempio n. 2
0
static resource_allocation_response_msg_t *
_wait_for_allocation_response(uint32_t job_id, const listen_t *listen,
			      int timeout)
{
	resource_allocation_response_msg_t *resp = NULL;
	int errnum;

	info("job %u queued and waiting for resources", job_id);
	if (_wait_for_alloc_rpc(listen, timeout, &resp) <= 0) {
		errnum = errno;
		/* Maybe the resource allocation response RPC got lost
		 * in the mail; surely it should have arrived by now.
		 * Let's see if the controller thinks that the allocation
		 * has been granted.
		 */
		if (slurm_allocation_lookup_lite(job_id, &resp) >= 0) {
			return resp;
		}
		if (slurm_get_errno() == ESLURM_JOB_PENDING) {
			debug3("Still waiting for allocation");
			errno = errnum;
			return NULL;
		} else {
			debug3("Unable to confirm allocation for job %u: %m",
			       job_id);
			return NULL;
		}
	}
	info("job %u has been allocated resources", job_id);
	return resp;
}
Esempio n. 3
0
/*
 * Wait until a job is ready to execute or enters some failed state
 * RET 1: job ready to run
 *     0: job can't run (cancelled, failure state, timeout, etc.)
 */
extern int scontrol_job_ready(char *job_id_str)
{
	int rc;
	uint32_t job_id;

	job_id = atoi(job_id_str);
	if (job_id <= 0) {
		fprintf(stderr, "Invalid job_id %s", job_id_str);
		return SLURM_ERROR;
	}

	if (cluster_flags & CLUSTER_FLAG_BG) {
		resource_allocation_response_msg_t *alloc;
		rc = slurm_allocation_lookup_lite(job_id, &alloc);
		if (rc == SLURM_SUCCESS) {
			rc = _wait_bluegene_block_ready(alloc);
			slurm_free_resource_allocation_response_msg(alloc);
		} else {
			error("slurm_allocation_lookup_lite: %m");
			rc = SLURM_ERROR;
		}
	} else
		rc = _wait_nodes_ready(job_id);

	return rc;
}
Esempio n. 4
0
/*
 * slurm_signal_job - send the specified signal to all steps of an existing job
 * IN job_id     - the job's id
 * IN signal     - signal number
 * RET 0 on success, otherwise return -1 and set errno to indicate the error
 */
extern int
slurm_signal_job (uint32_t job_id, uint16_t signal)
{
	int rc = SLURM_SUCCESS;
	resource_allocation_response_msg_t *alloc_info = NULL;
	signal_job_msg_t rpc;

	if (slurm_allocation_lookup_lite(job_id, &alloc_info)) {
		rc = slurm_get_errno();
		goto fail1;
	}

	/* same remote procedure call for each node */
	rpc.job_id = job_id;
	rpc.signal = (uint32_t)signal;

	rc = _local_send_recv_rc_msgs(alloc_info->node_list,
				      REQUEST_SIGNAL_JOB, &rpc);
	slurm_free_resource_allocation_response_msg(alloc_info);
fail1:
	if (rc) {
		slurm_seterrno_ret(rc);
	} else {
		return SLURM_SUCCESS;
	}
}
Esempio n. 5
0
/*
 * slurm_terminate_job_step - terminates a job step by sending a
 * 	REQUEST_TERMINATE_TASKS rpc to all slurmd of a job step.
 * IN job_id  - the job's id
 * IN step_id - the job step's id - use SLURM_BATCH_SCRIPT as the step_id
 *              to terminate a job's batch script
 * RET 0 on success, otherwise return -1 and set errno to indicate the error
 */
extern int
slurm_terminate_job_step (uint32_t job_id, uint32_t step_id)
{
	resource_allocation_response_msg_t *alloc_info = NULL;
	job_step_info_response_msg_t *step_info = NULL;
	int rc = 0;
	int i;
	int save_errno = 0;

	if (slurm_allocation_lookup_lite(job_id, &alloc_info)) {
		return -1;
	}

	/*
	 * The controller won't give us info about the batch script job step,
	 * so we need to handle that seperately.
	 */
	if (step_id == SLURM_BATCH_SCRIPT) {
		rc = _terminate_batch_script_step(alloc_info);
		slurm_free_resource_allocation_response_msg(alloc_info);
		errno = rc;
		return rc ? -1 : 0;
	}

	/*
	 * Otherwise, look through the list of job step info and find
	 * the one matching step_id.  Terminate that step.
	 */
	rc = slurm_get_job_steps((time_t)0, job_id, step_id,
				 &step_info, SHOW_ALL);
	if (rc != 0) {
		save_errno = errno;
		goto fail;
	}
	for (i = 0; i < step_info->job_step_count; i++) {
		if ((step_info->job_steps[i].job_id == job_id) &&
		    (step_info->job_steps[i].step_id == step_id)) {
			rc = _terminate_job_step(&step_info->job_steps[i],
						 alloc_info);
			save_errno = errno;
			break;
		}
	}
	slurm_free_job_step_info_response_msg(step_info);
fail:
	slurm_free_resource_allocation_response_msg(alloc_info);
	errno = save_errno;
	return rc ? -1 : 0;
}
Esempio n. 6
0
static void _update_job_size(uint32_t job_id)
{
	resource_allocation_response_msg_t *alloc_info;
	char *fname_csh = NULL, *fname_sh = NULL;
	FILE *resize_csh = NULL, *resize_sh = NULL;

	if (!getenv("SLURM_JOBID"))
		return;		/*No job environment here to update */

	if (slurm_allocation_lookup_lite(job_id, &alloc_info) !=
	    SLURM_SUCCESS) {
		slurm_perror("slurm_allocation_lookup_lite");
		return;
	}

	xstrfmtcat(fname_csh, "slurm_job_%u_resize.csh", job_id);
	xstrfmtcat(fname_sh,  "slurm_job_%u_resize.sh", job_id);
	(void) unlink(fname_csh);
	(void) unlink(fname_sh);
 	if (!(resize_csh = fopen(fname_csh, "w"))) {
		fprintf(stderr, "Could not create file %s: %s\n", fname_csh, 
			strerror(errno));
		goto fini;
	}
 	if (!(resize_sh = fopen(fname_sh, "w"))) {
		fprintf(stderr, "Could not create file %s: %s\n", fname_sh, 
			strerror(errno));
		goto fini;
	}
	chmod(fname_csh, 0700);	/* Make file executable */
	chmod(fname_sh,  0700);

	if (getenv("SLURM_NODELIST")) {
		fprintf(resize_sh, "export SLURM_NODELIST=\"%s\"\n",
			alloc_info->node_list);
		fprintf(resize_csh, "setenv SLURM_NODELIST \"%s\"\n",
			alloc_info->node_list);
	}
	if (getenv("SLURM_JOB_NODELIST")) {
		fprintf(resize_sh, "export SLURM_JOB_NODELIST=\"%s\"\n", 
			alloc_info->node_list);
		fprintf(resize_csh, "setenv SLURM_JOB_NODELIST \"%s\"\n", 
			alloc_info->node_list);
	}
	if (getenv("SLURM_NNODES")) {
		fprintf(resize_sh, "export SLURM_NNODES=%u\n",
			alloc_info->node_cnt);
		fprintf(resize_csh, "setenv SLURM_NNODES %u\n",
			alloc_info->node_cnt);
	}
	if (getenv("SLURM_JOB_NUM_NODES")) {
		fprintf(resize_sh, "export SLURM_JOB_NUM_NODES=%u\n",
			alloc_info->node_cnt);
		fprintf(resize_csh, "setenv SLURM_JOB_NUM_NODES %u\n",
			alloc_info->node_cnt);
	}
	if (getenv("SLURM_JOB_CPUS_PER_NODE")) {
		char *tmp;
		tmp = uint32_compressed_to_str(alloc_info->num_cpu_groups,
					       alloc_info->cpus_per_node,
					       alloc_info->cpu_count_reps);
		fprintf(resize_sh, "export SLURM_JOB_CPUS_PER_NODE=\"%s\"\n",
			tmp);
		fprintf(resize_csh, "setenv SLURM_JOB_CPUS_PER_NODE \"%s\"\n",
			tmp);
		xfree(tmp);
	}
	if (getenv("SLURM_TASKS_PER_NODE")) {
		/* We don't have sufficient information to recreate this */
		fprintf(resize_sh, "unset SLURM_TASKS_PER_NODE\n");
		fprintf(resize_csh, "unsetenv SLURM_TASKS_PER_NODE\n");
	}

	printf("To reset SLURM environment variables, execute\n");
	printf("  For bash or sh shells:  . ./%s\n", fname_sh);
	printf("  For csh shells:         source ./%s\n", fname_csh);

fini:	slurm_free_resource_allocation_response_msg(alloc_info);
	xfree(fname_csh);
	xfree(fname_sh);
	if (resize_csh)
		fclose(resize_csh);
	if (resize_sh)
		fclose(resize_sh);
}
Esempio n. 7
0
/* returns 1 if job and nodes are ready for job to begin, 0 otherwise */
static int _wait_nodes_ready(resource_allocation_response_msg_t *alloc)
{
	int is_ready = 0, i, rc;
	int cur_delay = 0;
	int suspend_time, resume_time, max_delay;

	suspend_time = slurm_get_suspend_timeout();
	resume_time  = slurm_get_resume_timeout();
	if ((suspend_time == 0) || (resume_time == 0))
		return 1;	/* Power save mode disabled */
	max_delay = suspend_time + resume_time;
	max_delay *= 5;		/* Allow for ResumeRate support */

	pending_job_id = alloc->job_id;

	for (i = 0; (cur_delay < max_delay); i++) {
		if (i) {
			if (i == 1)
				verbose("Waiting for nodes to boot");
			else
				debug("still waiting");
			sleep(POLL_SLEEP);
			cur_delay += POLL_SLEEP;
		}

		rc = slurm_job_node_ready(alloc->job_id);

		if (rc == READY_JOB_FATAL)
			break;				/* fatal error */
		if ((rc == READY_JOB_ERROR) || (rc == EAGAIN))
			continue;			/* retry */
		if ((rc & READY_JOB_STATE) == 0)	/* job killed */
			break;
		if (rc & READY_NODE_STATE) {		/* job and node ready */
			is_ready = 1;
			break;
		}
		if (destroy_job)
			break;
	}
	if (is_ready) {
		resource_allocation_response_msg_t *resp;
		char *tmp_str;
		if (i > 0)
     			verbose("Nodes %s are ready for job", alloc->node_list);
		if (alloc->alias_list && !strcmp(alloc->alias_list, "TBD") &&
		    (slurm_allocation_lookup_lite(pending_job_id, &resp)
		     == SLURM_SUCCESS)) {
			tmp_str = alloc->alias_list;
			alloc->alias_list = resp->alias_list;
			resp->alias_list = tmp_str;
			slurm_free_resource_allocation_response_msg(resp);
		}
	} else if (!destroy_job)
		error("Nodes %s are still not ready", alloc->node_list);
	else	/* allocation_interrupted and slurmctld not responing */
		is_ready = 0;

	pending_job_id = 0;

	return is_ready;
}
Esempio n. 8
0
int main (int argc, char *argv[])
{
	int i, min_nodes = 1, max_nodes = 1, nodes, tasks = 0, rc = 0;
	job_desc_msg_t job_req;
	resource_allocation_response_msg_t *job_resp;
	slurm_step_ctx_params_t step_params[1];
	slurm_step_ctx_t *ctx = NULL;
	slurm_step_launch_params_t launch[1];
	char *task_argv[3];
	int *fd_array = NULL;
	int num_fd;

	if (argc > 1) {
		i = atoi(argv[1]);
		if (i > 0)
			min_nodes = i;
	}
	if (argc > 2) {
		i = atoi(argv[2]);
		if (i > 0)
			max_nodes = i;
	}
	if (max_nodes < min_nodes)
		max_nodes = min_nodes;

	/* Create a job allocation */
	slurm_init_job_desc_msg( &job_req );
	job_req.min_nodes  = min_nodes;
	job_req.max_nodes  = max_nodes;
	job_req.user_id    = getuid();
	job_req.group_id   = getgid();
	job_req.time_limit = 1;
	if (slurm_allocate_resources(&job_req, &job_resp)) {
		slurm_perror ("slurm_allocate_resources");
		printf("INFO: min_nodes=%u max_nodes=%u user_id=%u group_id=%u",
		       job_req.min_nodes, job_req.max_nodes,
		       job_req.user_id, job_req.group_id);
		exit(0);
	}
	printf("job_id %u\n", job_resp->job_id);
	fflush(stdout);

	/* Wait for allocation request to be satisfied */
	if ((job_resp->node_list == NULL) ||
	    (strlen(job_resp->node_list) == 0)) {
		printf("Waiting for resource allocation\n");
		fflush(stdout);
		while ((job_resp->node_list == NULL) ||
		       (strlen(job_resp->node_list) == 0)) {
			sleep(5);
			if (slurm_allocation_lookup_lite(job_resp->job_id,
							 &job_resp) &&
			    (slurm_get_errno() != ESLURM_JOB_PENDING)) {
				slurm_perror("slurm_confirm_allocation");
				exit(0);
			}
		}
	}
	nodes = job_resp->node_cnt;
	if (argc > 3)
		tasks = atoi(argv[3]);
	if (tasks < 1)
		tasks = nodes * TASKS_PER_NODE;
	if (tasks < nodes) {
		fprintf(stderr, "Invalid task count argument\n");
		exit(1);
	}
	printf("Starting %d tasks on %d nodes\n", tasks, nodes);
	fflush(stdout);

	/*
	 * Create a job step context.
	 */
	slurm_step_ctx_params_t_init(step_params);
	step_params->job_id = job_resp->job_id;
	step_params->min_nodes = nodes;
	step_params->task_count = tasks;

	ctx = slurm_step_ctx_create(step_params);
	if ((ctx == NULL) &&
	    (slurm_get_errno() == ESLURM_PROLOG_RUNNING)) {
		printf("SlurmctldProlog is still running, "
		       "sleep and try again\n");
		sleep(10);
		ctx = slurm_step_ctx_create(step_params);
	}
	if (ctx == NULL) {
		slurm_perror("slurm_step_ctx_create");
		rc = 1;
		goto done;
	}

	/*
	 * Hack to run one task per node, regardless of what we set up
	 * when we created the job step context.
	 */
	if (slurm_step_ctx_daemon_per_node_hack(ctx) != SLURM_SUCCESS) {
		slurm_perror("slurm_step_ctx_daemon_per_node_hack");
		rc = 1;
		goto done;
	}

	/*
	 * Launch the tasks using "user managed" IO.
	 * "user managed" IO means a TCP stream for each task, directly
         * connected to the stdin, stdout, and stderr the task.
	 */
	slurm_step_launch_params_t_init(launch);
	task_argv[0] = "./test7.3.io";
	launch->argv = task_argv;
	launch->argc = 1;
	launch->user_managed_io = true; /* This is the key to using
					  "user managed" IO */

	if (slurm_step_launch(ctx, launch, NULL) != SLURM_SUCCESS) {
		slurm_perror("slurm_step_launch");
		rc = 1;
		goto done;
	}

	if (slurm_step_launch_wait_start(ctx) != SLURM_SUCCESS) {
		slurm_perror("slurm_step_launch_wait_start");
		rc =1;
		goto done;
	}

	slurm_step_ctx_get(ctx, SLURM_STEP_CTX_USER_MANAGED_SOCKETS,
			   &num_fd, &fd_array);

	/* Interact with launched tasks as desired */
	_do_task_work(fd_array, tasks);

	for (i = 0; i < tasks; i++) {
		close(fd_array[i]);
	}

	slurm_step_launch_wait_finish(ctx);

	/* Terminate the job killing all tasks */
done:	slurm_kill_job(job_resp->job_id, SIGKILL, 0);

	/* clean up storage */
	slurm_free_resource_allocation_response_msg(job_resp);
	if (ctx)
		slurm_step_ctx_destroy(ctx);
	exit(0);
}