resource_allocation_response_msg_t * existing_allocation(void) { uint32_t old_job_id; resource_allocation_response_msg_t *resp = NULL; if (opt.jobid != NO_VAL) old_job_id = (uint32_t)opt.jobid; else return NULL; if (slurm_allocation_lookup_lite(old_job_id, &resp) < 0) { if (opt.parallel_debug || opt.jobid_set) return NULL; /* create new allocation as needed */ if (errno == ESLURM_ALREADY_DONE) error ("SLURM job %u has expired.", old_job_id); else error ("Unable to confirm allocation for job %u: %m", old_job_id); info ("Check SLURM_JOB_ID environment variable " "for expired or invalid job."); exit(error_exit); } return resp; }
static resource_allocation_response_msg_t * _wait_for_allocation_response(uint32_t job_id, const listen_t *listen, int timeout) { resource_allocation_response_msg_t *resp = NULL; int errnum; info("job %u queued and waiting for resources", job_id); if (_wait_for_alloc_rpc(listen, timeout, &resp) <= 0) { errnum = errno; /* Maybe the resource allocation response RPC got lost * in the mail; surely it should have arrived by now. * Let's see if the controller thinks that the allocation * has been granted. */ if (slurm_allocation_lookup_lite(job_id, &resp) >= 0) { return resp; } if (slurm_get_errno() == ESLURM_JOB_PENDING) { debug3("Still waiting for allocation"); errno = errnum; return NULL; } else { debug3("Unable to confirm allocation for job %u: %m", job_id); return NULL; } } info("job %u has been allocated resources", job_id); return resp; }
/* * Wait until a job is ready to execute or enters some failed state * RET 1: job ready to run * 0: job can't run (cancelled, failure state, timeout, etc.) */ extern int scontrol_job_ready(char *job_id_str) { int rc; uint32_t job_id; job_id = atoi(job_id_str); if (job_id <= 0) { fprintf(stderr, "Invalid job_id %s", job_id_str); return SLURM_ERROR; } if (cluster_flags & CLUSTER_FLAG_BG) { resource_allocation_response_msg_t *alloc; rc = slurm_allocation_lookup_lite(job_id, &alloc); if (rc == SLURM_SUCCESS) { rc = _wait_bluegene_block_ready(alloc); slurm_free_resource_allocation_response_msg(alloc); } else { error("slurm_allocation_lookup_lite: %m"); rc = SLURM_ERROR; } } else rc = _wait_nodes_ready(job_id); return rc; }
/* * slurm_signal_job - send the specified signal to all steps of an existing job * IN job_id - the job's id * IN signal - signal number * RET 0 on success, otherwise return -1 and set errno to indicate the error */ extern int slurm_signal_job (uint32_t job_id, uint16_t signal) { int rc = SLURM_SUCCESS; resource_allocation_response_msg_t *alloc_info = NULL; signal_job_msg_t rpc; if (slurm_allocation_lookup_lite(job_id, &alloc_info)) { rc = slurm_get_errno(); goto fail1; } /* same remote procedure call for each node */ rpc.job_id = job_id; rpc.signal = (uint32_t)signal; rc = _local_send_recv_rc_msgs(alloc_info->node_list, REQUEST_SIGNAL_JOB, &rpc); slurm_free_resource_allocation_response_msg(alloc_info); fail1: if (rc) { slurm_seterrno_ret(rc); } else { return SLURM_SUCCESS; } }
/* * slurm_terminate_job_step - terminates a job step by sending a * REQUEST_TERMINATE_TASKS rpc to all slurmd of a job step. * IN job_id - the job's id * IN step_id - the job step's id - use SLURM_BATCH_SCRIPT as the step_id * to terminate a job's batch script * RET 0 on success, otherwise return -1 and set errno to indicate the error */ extern int slurm_terminate_job_step (uint32_t job_id, uint32_t step_id) { resource_allocation_response_msg_t *alloc_info = NULL; job_step_info_response_msg_t *step_info = NULL; int rc = 0; int i; int save_errno = 0; if (slurm_allocation_lookup_lite(job_id, &alloc_info)) { return -1; } /* * The controller won't give us info about the batch script job step, * so we need to handle that seperately. */ if (step_id == SLURM_BATCH_SCRIPT) { rc = _terminate_batch_script_step(alloc_info); slurm_free_resource_allocation_response_msg(alloc_info); errno = rc; return rc ? -1 : 0; } /* * Otherwise, look through the list of job step info and find * the one matching step_id. Terminate that step. */ rc = slurm_get_job_steps((time_t)0, job_id, step_id, &step_info, SHOW_ALL); if (rc != 0) { save_errno = errno; goto fail; } for (i = 0; i < step_info->job_step_count; i++) { if ((step_info->job_steps[i].job_id == job_id) && (step_info->job_steps[i].step_id == step_id)) { rc = _terminate_job_step(&step_info->job_steps[i], alloc_info); save_errno = errno; break; } } slurm_free_job_step_info_response_msg(step_info); fail: slurm_free_resource_allocation_response_msg(alloc_info); errno = save_errno; return rc ? -1 : 0; }
static void _update_job_size(uint32_t job_id) { resource_allocation_response_msg_t *alloc_info; char *fname_csh = NULL, *fname_sh = NULL; FILE *resize_csh = NULL, *resize_sh = NULL; if (!getenv("SLURM_JOBID")) return; /*No job environment here to update */ if (slurm_allocation_lookup_lite(job_id, &alloc_info) != SLURM_SUCCESS) { slurm_perror("slurm_allocation_lookup_lite"); return; } xstrfmtcat(fname_csh, "slurm_job_%u_resize.csh", job_id); xstrfmtcat(fname_sh, "slurm_job_%u_resize.sh", job_id); (void) unlink(fname_csh); (void) unlink(fname_sh); if (!(resize_csh = fopen(fname_csh, "w"))) { fprintf(stderr, "Could not create file %s: %s\n", fname_csh, strerror(errno)); goto fini; } if (!(resize_sh = fopen(fname_sh, "w"))) { fprintf(stderr, "Could not create file %s: %s\n", fname_sh, strerror(errno)); goto fini; } chmod(fname_csh, 0700); /* Make file executable */ chmod(fname_sh, 0700); if (getenv("SLURM_NODELIST")) { fprintf(resize_sh, "export SLURM_NODELIST=\"%s\"\n", alloc_info->node_list); fprintf(resize_csh, "setenv SLURM_NODELIST \"%s\"\n", alloc_info->node_list); } if (getenv("SLURM_JOB_NODELIST")) { fprintf(resize_sh, "export SLURM_JOB_NODELIST=\"%s\"\n", alloc_info->node_list); fprintf(resize_csh, "setenv SLURM_JOB_NODELIST \"%s\"\n", alloc_info->node_list); } if (getenv("SLURM_NNODES")) { fprintf(resize_sh, "export SLURM_NNODES=%u\n", alloc_info->node_cnt); fprintf(resize_csh, "setenv SLURM_NNODES %u\n", alloc_info->node_cnt); } if (getenv("SLURM_JOB_NUM_NODES")) { fprintf(resize_sh, "export SLURM_JOB_NUM_NODES=%u\n", alloc_info->node_cnt); fprintf(resize_csh, "setenv SLURM_JOB_NUM_NODES %u\n", alloc_info->node_cnt); } if (getenv("SLURM_JOB_CPUS_PER_NODE")) { char *tmp; tmp = uint32_compressed_to_str(alloc_info->num_cpu_groups, alloc_info->cpus_per_node, alloc_info->cpu_count_reps); fprintf(resize_sh, "export SLURM_JOB_CPUS_PER_NODE=\"%s\"\n", tmp); fprintf(resize_csh, "setenv SLURM_JOB_CPUS_PER_NODE \"%s\"\n", tmp); xfree(tmp); } if (getenv("SLURM_TASKS_PER_NODE")) { /* We don't have sufficient information to recreate this */ fprintf(resize_sh, "unset SLURM_TASKS_PER_NODE\n"); fprintf(resize_csh, "unsetenv SLURM_TASKS_PER_NODE\n"); } printf("To reset SLURM environment variables, execute\n"); printf(" For bash or sh shells: . ./%s\n", fname_sh); printf(" For csh shells: source ./%s\n", fname_csh); fini: slurm_free_resource_allocation_response_msg(alloc_info); xfree(fname_csh); xfree(fname_sh); if (resize_csh) fclose(resize_csh); if (resize_sh) fclose(resize_sh); }
/* returns 1 if job and nodes are ready for job to begin, 0 otherwise */ static int _wait_nodes_ready(resource_allocation_response_msg_t *alloc) { int is_ready = 0, i, rc; int cur_delay = 0; int suspend_time, resume_time, max_delay; suspend_time = slurm_get_suspend_timeout(); resume_time = slurm_get_resume_timeout(); if ((suspend_time == 0) || (resume_time == 0)) return 1; /* Power save mode disabled */ max_delay = suspend_time + resume_time; max_delay *= 5; /* Allow for ResumeRate support */ pending_job_id = alloc->job_id; for (i = 0; (cur_delay < max_delay); i++) { if (i) { if (i == 1) verbose("Waiting for nodes to boot"); else debug("still waiting"); sleep(POLL_SLEEP); cur_delay += POLL_SLEEP; } rc = slurm_job_node_ready(alloc->job_id); if (rc == READY_JOB_FATAL) break; /* fatal error */ if ((rc == READY_JOB_ERROR) || (rc == EAGAIN)) continue; /* retry */ if ((rc & READY_JOB_STATE) == 0) /* job killed */ break; if (rc & READY_NODE_STATE) { /* job and node ready */ is_ready = 1; break; } if (destroy_job) break; } if (is_ready) { resource_allocation_response_msg_t *resp; char *tmp_str; if (i > 0) verbose("Nodes %s are ready for job", alloc->node_list); if (alloc->alias_list && !strcmp(alloc->alias_list, "TBD") && (slurm_allocation_lookup_lite(pending_job_id, &resp) == SLURM_SUCCESS)) { tmp_str = alloc->alias_list; alloc->alias_list = resp->alias_list; resp->alias_list = tmp_str; slurm_free_resource_allocation_response_msg(resp); } } else if (!destroy_job) error("Nodes %s are still not ready", alloc->node_list); else /* allocation_interrupted and slurmctld not responing */ is_ready = 0; pending_job_id = 0; return is_ready; }
int main (int argc, char *argv[]) { int i, min_nodes = 1, max_nodes = 1, nodes, tasks = 0, rc = 0; job_desc_msg_t job_req; resource_allocation_response_msg_t *job_resp; slurm_step_ctx_params_t step_params[1]; slurm_step_ctx_t *ctx = NULL; slurm_step_launch_params_t launch[1]; char *task_argv[3]; int *fd_array = NULL; int num_fd; if (argc > 1) { i = atoi(argv[1]); if (i > 0) min_nodes = i; } if (argc > 2) { i = atoi(argv[2]); if (i > 0) max_nodes = i; } if (max_nodes < min_nodes) max_nodes = min_nodes; /* Create a job allocation */ slurm_init_job_desc_msg( &job_req ); job_req.min_nodes = min_nodes; job_req.max_nodes = max_nodes; job_req.user_id = getuid(); job_req.group_id = getgid(); job_req.time_limit = 1; if (slurm_allocate_resources(&job_req, &job_resp)) { slurm_perror ("slurm_allocate_resources"); printf("INFO: min_nodes=%u max_nodes=%u user_id=%u group_id=%u", job_req.min_nodes, job_req.max_nodes, job_req.user_id, job_req.group_id); exit(0); } printf("job_id %u\n", job_resp->job_id); fflush(stdout); /* Wait for allocation request to be satisfied */ if ((job_resp->node_list == NULL) || (strlen(job_resp->node_list) == 0)) { printf("Waiting for resource allocation\n"); fflush(stdout); while ((job_resp->node_list == NULL) || (strlen(job_resp->node_list) == 0)) { sleep(5); if (slurm_allocation_lookup_lite(job_resp->job_id, &job_resp) && (slurm_get_errno() != ESLURM_JOB_PENDING)) { slurm_perror("slurm_confirm_allocation"); exit(0); } } } nodes = job_resp->node_cnt; if (argc > 3) tasks = atoi(argv[3]); if (tasks < 1) tasks = nodes * TASKS_PER_NODE; if (tasks < nodes) { fprintf(stderr, "Invalid task count argument\n"); exit(1); } printf("Starting %d tasks on %d nodes\n", tasks, nodes); fflush(stdout); /* * Create a job step context. */ slurm_step_ctx_params_t_init(step_params); step_params->job_id = job_resp->job_id; step_params->min_nodes = nodes; step_params->task_count = tasks; ctx = slurm_step_ctx_create(step_params); if ((ctx == NULL) && (slurm_get_errno() == ESLURM_PROLOG_RUNNING)) { printf("SlurmctldProlog is still running, " "sleep and try again\n"); sleep(10); ctx = slurm_step_ctx_create(step_params); } if (ctx == NULL) { slurm_perror("slurm_step_ctx_create"); rc = 1; goto done; } /* * Hack to run one task per node, regardless of what we set up * when we created the job step context. */ if (slurm_step_ctx_daemon_per_node_hack(ctx) != SLURM_SUCCESS) { slurm_perror("slurm_step_ctx_daemon_per_node_hack"); rc = 1; goto done; } /* * Launch the tasks using "user managed" IO. * "user managed" IO means a TCP stream for each task, directly * connected to the stdin, stdout, and stderr the task. */ slurm_step_launch_params_t_init(launch); task_argv[0] = "./test7.3.io"; launch->argv = task_argv; launch->argc = 1; launch->user_managed_io = true; /* This is the key to using "user managed" IO */ if (slurm_step_launch(ctx, launch, NULL) != SLURM_SUCCESS) { slurm_perror("slurm_step_launch"); rc = 1; goto done; } if (slurm_step_launch_wait_start(ctx) != SLURM_SUCCESS) { slurm_perror("slurm_step_launch_wait_start"); rc =1; goto done; } slurm_step_ctx_get(ctx, SLURM_STEP_CTX_USER_MANAGED_SOCKETS, &num_fd, &fd_array); /* Interact with launched tasks as desired */ _do_task_work(fd_array, tasks); for (i = 0; i < tasks; i++) { close(fd_array[i]); } slurm_step_launch_wait_finish(ctx); /* Terminate the job killing all tasks */ done: slurm_kill_job(job_resp->job_id, SIGKILL, 0); /* clean up storage */ slurm_free_resource_allocation_response_msg(job_resp); if (ctx) slurm_step_ctx_destroy(ctx); exit(0); }