예제 #1
0
extern int launch_p_step_launch(
	srun_job_t *job, slurm_step_io_fds_t *cio_fds, uint32_t *global_rc,
	slurm_step_launch_callbacks_t *step_callbacks)
{
	slurm_step_launch_params_t launch_params;
	slurm_step_launch_callbacks_t callbacks;
	int rc = 0;
	bool first_launch = 0;

	slurm_step_launch_params_t_init(&launch_params);
	memcpy(&callbacks, step_callbacks, sizeof(callbacks));

	if (!task_state) {
		task_state = task_state_create(job->ntasks);
		local_srun_job = job;
		local_global_rc = global_rc;
		first_launch = 1;
	} else
		task_state_alter(task_state, job->ntasks);

	launch_params.gid = opt.gid;
	launch_params.alias_list = job->alias_list;
	launch_params.argc = opt.argc;
	launch_params.argv = opt.argv;
	launch_params.multi_prog = opt.multi_prog ? true : false;
	launch_params.cwd = opt.cwd;
	launch_params.slurmd_debug = opt.slurmd_debug;
	launch_params.buffered_stdio = !opt.unbuffered;
	launch_params.labelio = opt.labelio ? true : false;
	launch_params.remote_output_filename =fname_remote_string(job->ofname);
	launch_params.remote_input_filename = fname_remote_string(job->ifname);
	launch_params.remote_error_filename = fname_remote_string(job->efname);
	launch_params.task_prolog = opt.task_prolog;
	launch_params.task_epilog = opt.task_epilog;
	launch_params.cpu_bind = opt.cpu_bind;
	launch_params.cpu_bind_type = opt.cpu_bind_type;
	launch_params.mem_bind = opt.mem_bind;
	launch_params.mem_bind_type = opt.mem_bind_type;
	launch_params.open_mode = opt.open_mode;
	if (opt.acctg_freq >= 0)
		launch_params.acctg_freq = opt.acctg_freq;
	launch_params.pty = opt.pty;
	if (opt.cpus_set)
		launch_params.cpus_per_task	= opt.cpus_per_task;
	else
		launch_params.cpus_per_task	= 1;
	launch_params.cpu_freq          = opt.cpu_freq;
	launch_params.task_dist         = opt.distribution;
	launch_params.ckpt_dir		= opt.ckpt_dir;
	launch_params.restart_dir       = opt.restart_dir;
	launch_params.preserve_env      = opt.preserve_env;
	launch_params.spank_job_env     = opt.spank_job_env;
	launch_params.spank_job_env_size = opt.spank_job_env_size;
	launch_params.user_managed_io   = opt.user_managed_io;

	memcpy(&launch_params.local_fds, cio_fds, sizeof(slurm_step_io_fds_t));

	if (MPIR_being_debugged) {
		launch_params.parallel_debug = true;
		pmi_server_max_threads(1);
	} else {
		launch_params.parallel_debug = false;
	}
	/* Normally this isn't used, but if an outside process (other
	   than srun (poe) is using this logic to launch tasks then we
	   can use this to signal the step.
	*/
	callbacks.task_start = _task_start;
	/* If poe is using this code with multi-prog it always returns
	   1 for each task which could be confusing since no real
	   error happened.
	*/
	if (!launch_params.multi_prog
	    || (!callbacks.step_signal
		|| (callbacks.step_signal == launch_g_fwd_signal))) {
		callbacks.task_finish = _task_finish;
	}

	mpir_init(job->ctx_params.task_count);

	update_job_state(job, SRUN_JOB_LAUNCHING);
	launch_start_time = time(NULL);
	if (first_launch) {
		if (slurm_step_launch(job->step_ctx, &launch_params,
				      &callbacks) != SLURM_SUCCESS) {
			rc = errno;
			*local_global_rc = errno;
			error("Application launch failed: %m");
			slurm_step_launch_abort(job->step_ctx);
			slurm_step_launch_wait_finish(job->step_ctx);
			goto cleanup;
		}
	} else {
		if (slurm_step_launch_add(job->step_ctx, &launch_params,
					  job->nodelist, job->fir_nodeid)
		    != SLURM_SUCCESS) {
			rc = errno;
			*local_global_rc = errno;
			error("Application launch add failed: %m");
			slurm_step_launch_abort(job->step_ctx);
			slurm_step_launch_wait_finish(job->step_ctx);
			goto cleanup;
		}
	}

	update_job_state(job, SRUN_JOB_STARTING);
	if (slurm_step_launch_wait_start(job->step_ctx) == SLURM_SUCCESS) {
		update_job_state(job, SRUN_JOB_RUNNING);
		/* Only set up MPIR structures if the step launched
		 * correctly. */
		if (opt.multi_prog)
			mpir_set_multi_name(job->ctx_params.task_count,
					    launch_params.argv[0]);
		else
			mpir_set_executable_names(launch_params.argv[0]);
		MPIR_debug_state = MPIR_DEBUG_SPAWNED;
		if (opt.debugger_test)
			mpir_dump_proctable();
		else
			MPIR_Breakpoint(job);
	} else {
		info("Job step %u.%u aborted before step completely launched.",
		     job->jobid, job->stepid);
	}

cleanup:

	return rc;
}
예제 #2
0
파일: test7.3.prog.c 프로젝트: VURM/slurm
int main (int argc, char *argv[])
{
	int i, min_nodes = 1, max_nodes = 1, nodes, tasks = 0, rc = 0;
	job_desc_msg_t job_req;
	resource_allocation_response_msg_t *job_resp;
	slurm_step_ctx_params_t step_params[1];
	slurm_step_ctx_t *ctx = NULL;
	slurm_step_launch_params_t launch[1];
	char *task_argv[3];
	int *fd_array = NULL;
	int num_fd;

	if (argc > 1) {
		i = atoi(argv[1]);
		if (i > 0)
			min_nodes = i;
	}
	if (argc > 2) {
		i = atoi(argv[2]);
		if (i > 0)
			max_nodes = i;
	}
	if (max_nodes < min_nodes)
		max_nodes = min_nodes;

	/* Create a job allocation */
	slurm_init_job_desc_msg( &job_req );
	job_req.min_nodes  = min_nodes;
	job_req.max_nodes  = max_nodes;
	job_req.user_id    = getuid();
	job_req.group_id   = getgid();
	job_req.time_limit = 1;
	if (slurm_allocate_resources(&job_req, &job_resp)) {
		slurm_perror ("slurm_allocate_resources");
		printf("INFO: min_nodes=%u max_nodes=%u user_id=%u group_id=%u",
		       job_req.min_nodes, job_req.max_nodes,
		       job_req.user_id, job_req.group_id);
		exit(0);
	}
	printf("job_id %u\n", job_resp->job_id);
	fflush(stdout);

	/* Wait for allocation request to be satisfied */
	if ((job_resp->node_list == NULL) ||
	    (strlen(job_resp->node_list) == 0)) {
		printf("Waiting for resource allocation\n");
		fflush(stdout);
		while ((job_resp->node_list == NULL) ||
		       (strlen(job_resp->node_list) == 0)) {
			sleep(5);
			if (slurm_allocation_lookup_lite(job_resp->job_id,
							 &job_resp) &&
			    (slurm_get_errno() != ESLURM_JOB_PENDING)) {
				slurm_perror("slurm_confirm_allocation");
				exit(0);
			}
		}
	}
	nodes = job_resp->node_cnt;
	if (argc > 3)
		tasks = atoi(argv[3]);
	if (tasks < 1)
		tasks = nodes * TASKS_PER_NODE;
	if (tasks < nodes) {
		fprintf(stderr, "Invalid task count argument\n");
		exit(1);
	}
	printf("Starting %d tasks on %d nodes\n", tasks, nodes);
	fflush(stdout);

	/*
	 * Create a job step context.
	 */
	slurm_step_ctx_params_t_init(step_params);
	step_params->job_id = job_resp->job_id;
	step_params->min_nodes = nodes;
	step_params->task_count = tasks;

	ctx = slurm_step_ctx_create(step_params);
	if ((ctx == NULL) &&
	    (slurm_get_errno() == ESLURM_PROLOG_RUNNING)) {
		printf("SlurmctldProlog is still running, "
		       "sleep and try again\n");
		sleep(10);
		ctx = slurm_step_ctx_create(step_params);
	}
	if (ctx == NULL) {
		slurm_perror("slurm_step_ctx_create");
		rc = 1;
		goto done;
	}

	/*
	 * Hack to run one task per node, regardless of what we set up
	 * when we created the job step context.
	 */
	if (slurm_step_ctx_daemon_per_node_hack(ctx) != SLURM_SUCCESS) {
		slurm_perror("slurm_step_ctx_daemon_per_node_hack");
		rc = 1;
		goto done;
	}

	/*
	 * Launch the tasks using "user managed" IO.
	 * "user managed" IO means a TCP stream for each task, directly
         * connected to the stdin, stdout, and stderr the task.
	 */
	slurm_step_launch_params_t_init(launch);
	task_argv[0] = "./test7.3.io";
	launch->argv = task_argv;
	launch->argc = 1;
	launch->user_managed_io = true; /* This is the key to using
					  "user managed" IO */

	if (slurm_step_launch(ctx, launch, NULL) != SLURM_SUCCESS) {
		slurm_perror("slurm_step_launch");
		rc = 1;
		goto done;
	}

	if (slurm_step_launch_wait_start(ctx) != SLURM_SUCCESS) {
		slurm_perror("slurm_step_launch_wait_start");
		rc =1;
		goto done;
	}

	slurm_step_ctx_get(ctx, SLURM_STEP_CTX_USER_MANAGED_SOCKETS,
			   &num_fd, &fd_array);

	/* Interact with launched tasks as desired */
	_do_task_work(fd_array, tasks);

	for (i = 0; i < tasks; i++) {
		close(fd_array[i]);
	}

	slurm_step_launch_wait_finish(ctx);

	/* Terminate the job killing all tasks */
done:	slurm_kill_job(job_resp->job_id, SIGKILL, 0);

	/* clean up storage */
	slurm_free_resource_allocation_response_msg(job_resp);
	if (ctx)
		slurm_step_ctx_destroy(ctx);
	exit(0);
}