extern int stepd_cleanup(slurm_msg_t *msg, stepd_step_rec_t *job, slurm_addr_t *cli, slurm_addr_t *self, int rc, bool only_mem) { if (!only_mem) { if (job->batch) batch_finish(job, rc); /* sends batch complete message */ /* signal the message thread to shutdown, and wait for it */ eio_signal_shutdown(job->msg_handle); pthread_join(job->msgid, NULL); } mpi_fini(); /* Remove stale PMI2 sockets */ #ifdef MEMORY_LEAK_DEBUG acct_gather_conf_destroy(); (void) core_spec_g_fini(); _step_cleanup(job, msg, rc); fini_setproctitle(); xfree(cli); xfree(self); xfree(conf->block_map); xfree(conf->block_map_inv); xfree(conf->hostname); xfree(conf->job_acct_gather_freq); xfree(conf->job_acct_gather_type); xfree(conf->logfile); xfree(conf->node_name); xfree(conf->node_topo_addr); xfree(conf->node_topo_pattern); xfree(conf->spooldir); xfree(conf->task_epilog); xfree(conf->task_prolog); xfree(conf); #endif info("done with job"); return rc; }
int main (int argc, char *argv[]) { slurm_addr_t *cli; slurm_addr_t *self; slurm_msg_t *msg; slurmd_job_t *job; int ngids; gid_t *gids; int rc = 0; if (process_cmdline (argc, argv) < 0) fatal ("Error in slurmstepd command line"); xsignal_block(slurmstepd_blocked_signals); conf = xmalloc(sizeof(*conf)); conf->argv = &argv; conf->argc = &argc; init_setproctitle(argc, argv); if (slurm_select_init(1) != SLURM_SUCCESS ) fatal( "failed to initialize node selection plugin" ); /* Receive job parameters from the slurmd */ _init_from_slurmd(STDIN_FILENO, argv, &cli, &self, &msg, &ngids, &gids); /* Fancy way of closing stdin that keeps STDIN_FILENO from being * allocated to any random file. The slurmd already opened /dev/null * on STDERR_FILENO for us. */ dup2(STDERR_FILENO, STDIN_FILENO); /* Create the slurmd_job_t, mostly from info in a launch_tasks_request_msg_t or a batch_job_launch_msg_t */ if(!(job = _step_setup(cli, self, msg))) { _send_fail_to_slurmd(STDOUT_FILENO); rc = SLURM_FAILURE; goto ending; } job->ngids = ngids; job->gids = gids; /* fork handlers cause mutexes on some global data structures to be re-initialized after the fork. */ list_install_fork_handlers(); slurm_conf_install_fork_handlers(); /* sets job->msg_handle and job->msgid */ if (msg_thr_create(job) == SLURM_ERROR) { _send_fail_to_slurmd(STDOUT_FILENO); rc = SLURM_FAILURE; goto ending; } _send_ok_to_slurmd(STDOUT_FILENO); /* Fancy way of closing stdout that keeps STDOUT_FILENO from being * allocated to any random file. The slurmd already opened /dev/null * on STDERR_FILENO for us. */ dup2(STDERR_FILENO, STDOUT_FILENO); /* This does most of the stdio setup, then launches all the tasks, and blocks until the step is complete */ rc = job_manager(job); /* signal the message thread to shutdown, and wait for it */ eio_signal_shutdown(job->msg_handle); pthread_join(job->msgid, NULL); if (job->batch) batch_finish(job, rc); /* sends batch complete message */ ending: #ifdef MEMORY_LEAK_DEBUG _step_cleanup(job, msg, rc); xfree(cli); xfree(self); xfree(conf->hostname); xfree(conf->block_map); xfree(conf->block_map_inv); xfree(conf->spooldir); xfree(conf->node_name); xfree(conf->node_topo_addr); xfree(conf->node_topo_pattern); xfree(conf->logfile); xfree(conf); #endif info("done with job"); return rc; }