extern int jobacct_gather_endpoll(void) { int retval = SLURM_SUCCESS; if (jobacct_gather_init() < 0) return SLURM_ERROR; jobacct_shutdown = true; slurm_mutex_lock(&task_list_lock); FREE_NULL_LIST(task_list); retval = (*(ops.endpoll))(); slurm_mutex_unlock(&task_list_lock); return retval; }
extern int jobacct_gather_add_task(pid_t pid, jobacct_id_t *jobacct_id, int poll) { struct jobacctinfo *jobacct; if (jobacct_gather_init() < 0) return SLURM_ERROR; if (!plugin_polling) return SLURM_SUCCESS; if (_jobacct_shutdown_test()) return SLURM_ERROR; jobacct = jobacctinfo_create(jobacct_id); slurm_mutex_lock(&task_list_lock); if (pid <= 0) { error("invalid pid given (%d) for task acct", pid); goto error; } else if (!task_list) { error("no task list created!"); goto error; } jobacct->pid = pid; memcpy(&jobacct->id, jobacct_id, sizeof(jobacct_id_t)); jobacct->min_cpu = 0; debug2("adding task %u pid %d on node %u to jobacct", jobacct_id->taskid, pid, jobacct_id->nodeid); list_push(task_list, jobacct); slurm_mutex_unlock(&task_list_lock); (*(ops.add_task))(pid, jobacct_id); if (poll == 1) _poll_data(1); return SLURM_SUCCESS; error: slurm_mutex_unlock(&task_list_lock); jobacctinfo_destroy(jobacct); return SLURM_ERROR; }
extern int jobacct_gather_startpoll(uint16_t frequency) { int retval = SLURM_SUCCESS; pthread_attr_t attr; pthread_t _watch_tasks_thread_id; if (!plugin_polling) return SLURM_SUCCESS; if (jobacct_gather_init() < 0) return SLURM_ERROR; if (!jobacct_shutdown) { error("jobacct_gather_startpoll: poll already started!"); return retval; } jobacct_shutdown = false; freq = frequency; task_list = list_create(jobacctinfo_destroy); if (frequency == 0) { /* don't want dynamic monitoring? */ debug2("jobacct_gather dynamic logging disabled"); return retval; } /* create polling thread */ slurm_attr_init(&attr); if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)) error("pthread_attr_setdetachstate error %m"); if (pthread_create(&_watch_tasks_thread_id, &attr, &_watch_tasks, NULL)) { debug("jobacct_gather failed to create _watch_tasks " "thread: %m"); frequency = 0; } else debug3("jobacct_gather dynamic logging enabled"); slurm_attr_destroy(&attr); return retval; }
extern int jobacct_gather_startpoll(uint16_t frequency) { int retval = SLURM_SUCCESS; pthread_attr_t attr; if (!plugin_polling) return SLURM_SUCCESS; if (jobacct_gather_init() < 0) return SLURM_ERROR; if (!_jobacct_shutdown_test()) { error("jobacct_gather_startpoll: poll already started!"); return retval; } slurm_mutex_lock(&jobacct_shutdown_mutex); jobacct_shutdown = false; slurm_mutex_unlock(&jobacct_shutdown_mutex); freq = frequency; task_list = list_create(jobacctinfo_destroy); if (frequency == 0) { /* don't want dynamic monitoring? */ debug2("jobacct_gather dynamic logging disabled"); return retval; } /* create polling thread */ slurm_attr_init(&attr); if (pthread_create(&watch_tasks_thread_id, &attr, &_watch_tasks, NULL)) { debug("jobacct_gather failed to create _watch_tasks " "thread: %m"); } else debug3("jobacct_gather dynamic logging enabled"); slurm_attr_destroy(&attr); return retval; }
extern int jobacctinfo_unpack(jobacctinfo_t **jobacct, uint16_t rpc_version, uint16_t protocol_type, Buf buffer, bool alloc) { uint32_t uint32_tmp; uint8_t uint8_tmp; if (jobacct_gather_init() < 0) return SLURM_ERROR; if (rpc_version >= SLURM_MIN_PROTOCOL_VERSION) { safe_unpack8(&uint8_tmp, buffer); if (uint8_tmp == (uint8_t) 0) return SLURM_SUCCESS; if (alloc) *jobacct = xmalloc(sizeof(struct jobacctinfo)); safe_unpack32(&uint32_tmp, buffer); (*jobacct)->user_cpu_sec = uint32_tmp; safe_unpack32(&uint32_tmp, buffer); (*jobacct)->user_cpu_usec = uint32_tmp; safe_unpack32(&uint32_tmp, buffer); (*jobacct)->sys_cpu_sec = uint32_tmp; safe_unpack32(&uint32_tmp, buffer); (*jobacct)->sys_cpu_usec = uint32_tmp; safe_unpack64(&(*jobacct)->max_vsize, buffer); safe_unpack64(&(*jobacct)->tot_vsize, buffer); safe_unpack64(&(*jobacct)->max_rss, buffer); safe_unpack64(&(*jobacct)->tot_rss, buffer); safe_unpack64(&(*jobacct)->max_pages, buffer); safe_unpack64(&(*jobacct)->tot_pages, buffer); safe_unpack32(&(*jobacct)->min_cpu, buffer); safe_unpackdouble(&(*jobacct)->tot_cpu, buffer); safe_unpack32(&(*jobacct)->act_cpufreq, buffer); safe_unpack64(&(*jobacct)->energy.consumed_energy, buffer); safe_unpackdouble(&(*jobacct)->max_disk_read, buffer); safe_unpackdouble(&(*jobacct)->tot_disk_read, buffer); safe_unpackdouble(&(*jobacct)->max_disk_write, buffer); safe_unpackdouble(&(*jobacct)->tot_disk_write, buffer); if (_unpack_jobacct_id(&(*jobacct)->max_vsize_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->max_rss_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->max_pages_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->min_cpu_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->max_disk_read_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->max_disk_write_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; } else { info("jobacctinfo_unpack version %u not supported", rpc_version); return SLURM_ERROR; } return SLURM_SUCCESS; unpack_error: debug2("jobacctinfo_unpack: unpack_error: size_buf(buffer) %u", size_buf(buffer)); if (alloc) xfree(*jobacct); return SLURM_ERROR; }
int main (int argc, char *argv[]) { int i, pidfd; int blocked_signals[] = {SIGPIPE, 0}; int cc; char *oom_value; uint32_t slurmd_uid = 0; uint32_t curr_uid = 0; char time_stamp[256]; log_options_t lopts = LOG_OPTS_INITIALIZER; /* NOTE: logfile is NULL at this point */ log_init(argv[0], lopts, LOG_DAEMON, NULL); /* * Make sure we have no extra open files which * would be propagated to spawned tasks. */ cc = sysconf(_SC_OPEN_MAX); for (i = 3; i < cc; i++) close(i); /* * Drop supplementary groups. */ if (geteuid() == 0) { if (setgroups(0, NULL) != 0) { fatal("Failed to drop supplementary groups, " "setgroups: %m"); } } else { debug("Not running as root. Can't drop supplementary groups"); } /* * Create and set default values for the slurmd global * config variable "conf" */ conf = xmalloc(sizeof(slurmd_conf_t)); _init_conf(); conf->argv = &argv; conf->argc = &argc; if (_slurmd_init() < 0) { error( "slurmd initialization failed" ); fflush( NULL ); exit(1); } slurmd_uid = slurm_get_slurmd_user_id(); curr_uid = getuid(); if (curr_uid != slurmd_uid) { struct passwd *pw = NULL; char *slurmd_user = NULL; char *curr_user = NULL; /* since when you do a getpwuid you get a pointer to a * structure you have to do a xstrdup on the first * call or your information will just get over * written. This is a memory leak, but a fatal is * called right after so it isn't that big of a deal. */ if ((pw=getpwuid(slurmd_uid))) slurmd_user = xstrdup(pw->pw_name); if ((pw=getpwuid(curr_uid))) curr_user = pw->pw_name; fatal("You are running slurmd as something " "other than user %s(%d). If you want to " "run as this user add SlurmdUser=%s " "to the slurm.conf file.", slurmd_user, slurmd_uid, curr_user); } init_setproctitle(argc, argv); xsignal(SIGTERM, &_term_handler); xsignal(SIGINT, &_term_handler); xsignal(SIGHUP, &_hup_handler ); xsignal_block(blocked_signals); debug3("slurmd initialization successful"); /* * Become a daemon if desired. * Do not chdir("/") or close all fd's */ if (conf->daemonize) { if (daemon(1,1) == -1) error("Couldn't daemonize slurmd: %m"); } test_core_limit(); info("slurmd version %s started", SLURM_VERSION_STRING); debug3("finished daemonize"); if ((oom_value = getenv("SLURMD_OOM_ADJ"))) { i = atoi(oom_value); debug("Setting slurmd oom_adj to %d", i); set_oom_adj(i); } _kill_old_slurmd(); if (conf->mlock_pages) { /* * Call mlockall() if available to ensure slurmd * doesn't get swapped out */ #ifdef _POSIX_MEMLOCK if (mlockall (MCL_FUTURE | MCL_CURRENT) < 0) error ("failed to mlock() slurmd pages: %m"); #else error ("mlockall() system call does not appear to be available"); #endif /* _POSIX_MEMLOCK */ } /* * Restore any saved revoked credential information */ if (!conf->cleanstart && (_restore_cred_state(conf->vctx) < 0)) return SLURM_FAILURE; if (jobacct_gather_init() != SLURM_SUCCESS) fatal("Unable to initialize jobacct_gather"); if (job_container_init() < 0) fatal("Unable to initialize job_container plugin."); if (container_g_restore(conf->spooldir, !conf->cleanstart)) error("Unable to restore job_container state."); if (switch_g_node_init() < 0) fatal("Unable to initialize interconnect."); if (conf->cleanstart && switch_g_clear_node_state()) fatal("Unable to clear interconnect state."); switch_g_slurmd_init(); _create_msg_socket(); conf->pid = getpid(); /* This has to happen after daemon(), which closes all fd's, so we keep the write lock of the pidfile. */ pidfd = create_pidfile(conf->pidfile, 0); rfc2822_timestamp(time_stamp, sizeof(time_stamp)); info("%s started on %s", slurm_prog_name, time_stamp); _install_fork_handlers(); list_install_fork_handlers(); slurm_conf_install_fork_handlers(); /* * Initialize any plugins */ if (slurmd_plugstack_init()) fatal("failed to initialize slurmd_plugstack"); _spawn_registration_engine(); _msg_engine(); /* * Close fd here, otherwise we'll deadlock since create_pidfile() * flocks the pidfile. */ if (pidfd >= 0) /* valid pidfd, non-error */ (void) close(pidfd); /* Ignore errors */ if (unlink(conf->pidfile) < 0) error("Unable to remove pidfile `%s': %m", conf->pidfile); _wait_for_all_threads(120); _slurmd_fini(); _destroy_conf(); slurm_crypto_fini(); /* must be after _destroy_conf() */ info("Slurmd shutdown completing"); log_fini(); return 0; }
extern int jobacctinfo_unpack(jobacctinfo_t **jobacct, uint16_t rpc_version, uint16_t protocol_type, Buf buffer, bool alloc) { uint32_t uint32_tmp; uint8_t uint8_tmp; bool no_pack; jobacct_gather_init(); no_pack = (!plugin_polling && (protocol_type != PROTOCOL_TYPE_DBD)); /* The function can take calls from both DBD and from regular * SLURM functions. We choose to standardize on using the * SLURM_PROTOCOL_VERSION here so if PROTOCOL_TYPE_DBD comes * in we need to translate the DBD rpc_version to use the * SLURM protocol_version. * * If this function ever changes make sure the * slurmdbd_translate_rpc function has been updated with the * new protocol version. */ if (protocol_type == PROTOCOL_TYPE_DBD) rpc_version = slurmdbd_translate_rpc(rpc_version); if (rpc_version >= SLURM_14_03_PROTOCOL_VERSION) { safe_unpack8(&uint8_tmp, buffer); if (uint8_tmp == (uint8_t) 0) return SLURM_SUCCESS; if (alloc) *jobacct = xmalloc(sizeof(struct jobacctinfo)); safe_unpack32(&uint32_tmp, buffer); (*jobacct)->user_cpu_sec = uint32_tmp; safe_unpack32(&uint32_tmp, buffer); (*jobacct)->user_cpu_usec = uint32_tmp; safe_unpack32(&uint32_tmp, buffer); (*jobacct)->sys_cpu_sec = uint32_tmp; safe_unpack32(&uint32_tmp, buffer); (*jobacct)->sys_cpu_usec = uint32_tmp; safe_unpack64(&(*jobacct)->max_vsize, buffer); safe_unpack64(&(*jobacct)->tot_vsize, buffer); safe_unpack64(&(*jobacct)->max_rss, buffer); safe_unpack64(&(*jobacct)->tot_rss, buffer); safe_unpack64(&(*jobacct)->max_pages, buffer); safe_unpack64(&(*jobacct)->tot_pages, buffer); safe_unpack32(&(*jobacct)->min_cpu, buffer); safe_unpack32(&(*jobacct)->tot_cpu, buffer); safe_unpack32(&(*jobacct)->act_cpufreq, buffer); safe_unpack32(&(*jobacct)->energy.consumed_energy, buffer); safe_unpackdouble(&(*jobacct)->max_disk_read, buffer); safe_unpackdouble(&(*jobacct)->tot_disk_read, buffer); safe_unpackdouble(&(*jobacct)->max_disk_write, buffer); safe_unpackdouble(&(*jobacct)->tot_disk_write, buffer); if (_unpack_jobacct_id(&(*jobacct)->max_vsize_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->max_rss_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->max_pages_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->min_cpu_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->max_disk_read_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->max_disk_write_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; } else if (rpc_version >= SLURM_2_6_PROTOCOL_VERSION) { if (no_pack) return SLURM_SUCCESS; if (alloc) *jobacct = xmalloc(sizeof(struct jobacctinfo)); safe_unpack32(&uint32_tmp, buffer); (*jobacct)->user_cpu_sec = uint32_tmp; safe_unpack32(&uint32_tmp, buffer); (*jobacct)->user_cpu_usec = uint32_tmp; safe_unpack32(&uint32_tmp, buffer); (*jobacct)->sys_cpu_sec = uint32_tmp; safe_unpack32(&uint32_tmp, buffer); (*jobacct)->sys_cpu_usec = uint32_tmp; safe_unpack32((uint32_t *)&(*jobacct)->max_vsize, buffer); safe_unpack32((uint32_t *)&(*jobacct)->tot_vsize, buffer); safe_unpack32((uint32_t *)&(*jobacct)->max_rss, buffer); safe_unpack32((uint32_t *)&(*jobacct)->tot_rss, buffer); safe_unpack32((uint32_t *)&(*jobacct)->max_pages, buffer); safe_unpack32((uint32_t *)&(*jobacct)->tot_pages, buffer); safe_unpack32(&(*jobacct)->min_cpu, buffer); safe_unpack32(&(*jobacct)->tot_cpu, buffer); safe_unpack32(&(*jobacct)->act_cpufreq, buffer); safe_unpack32(&(*jobacct)->energy.consumed_energy, buffer); safe_unpackdouble(&(*jobacct)->max_disk_read, buffer); safe_unpackdouble(&(*jobacct)->tot_disk_read, buffer); safe_unpackdouble(&(*jobacct)->max_disk_write, buffer); safe_unpackdouble(&(*jobacct)->tot_disk_write, buffer); if (_unpack_jobacct_id(&(*jobacct)->max_vsize_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->max_rss_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->max_pages_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->min_cpu_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->max_disk_read_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->max_disk_write_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; } else { info("jobacctinfo_unpack version %u not supported", rpc_version); return SLURM_ERROR; } return SLURM_SUCCESS; unpack_error: debug2("jobacctinfo_unpack: unpack_error: size_buf(buffer) %u", size_buf(buffer)); if (alloc) xfree(*jobacct); return SLURM_ERROR; }