static void *_launch_one_app(void *data) { static pthread_mutex_t launch_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t launch_cond = PTHREAD_COND_INITIALIZER; static bool launch_begin = false; static bool launch_fini = false; _launch_app_data_t *opts = (_launch_app_data_t *) data; opt_t *opt_local = opts->opt_local; srun_job_t *job = opts->job; bool got_alloc = opts->got_alloc; slurm_step_io_fds_t cio_fds = SLURM_STEP_IO_FDS_INITIALIZER; slurm_step_launch_callbacks_t step_callbacks; memset(&step_callbacks, 0, sizeof(step_callbacks)); step_callbacks.step_signal = launch_g_fwd_signal; /* * Run pre-launch once for entire pack job */ slurm_mutex_lock(&launch_mutex); if (!launch_begin) { launch_begin = true; slurm_mutex_unlock(&launch_mutex); pre_launch_srun_job(job, 0, 1, opt_local); slurm_mutex_lock(&launch_mutex); launch_fini = true; slurm_cond_broadcast(&launch_cond); } else { while (!launch_fini) slurm_cond_wait(&launch_cond, &launch_mutex); } slurm_mutex_unlock(&launch_mutex); relaunch: launch_common_set_stdio_fds(job, &cio_fds, opt_local); if (!launch_g_step_launch(job, &cio_fds, &global_rc, &step_callbacks, opt_local)) { if (launch_g_step_wait(job, got_alloc, opt_local) == -1) goto relaunch; } if (opts->step_mutex) { slurm_mutex_lock(opts->step_mutex); (*opts->step_cnt)--; slurm_cond_broadcast(opts->step_cond); slurm_mutex_unlock(opts->step_mutex); } xfree(data); return NULL; }
/* _wr_wrunlock - Issue a write unlock on the specified data type */ static void _wr_wrunlock(lock_datatype_t datatype) { slurm_mutex_lock(&locks_mutex); slurmctld_locks.entity[write_lock(datatype)]--; slurm_cond_broadcast(&locks_cond); slurm_mutex_unlock(&locks_mutex); }
static void _shutdown_agent(void) { int i; if (agent_tid) { slurmdbd_shutdown = time(NULL); for (i=0; i<50; i++) { /* up to 5 secs total */ slurm_cond_broadcast(&agent_cond); usleep(100000); /* 0.1 sec per try */ if (pthread_kill(agent_tid, SIGUSR1)) break; } /* On rare occasions agent thread may not end quickly, * perhaps due to communication problems with slurmdbd. * Cancel it and join before returning or we could remove * and leave the agent without valid data */ if (pthread_kill(agent_tid, 0) == 0) { error("slurmdbd: agent failed to shutdown gracefully"); error("slurmdbd: unable to save pending requests"); pthread_cancel(agent_tid); } pthread_join(agent_tid, NULL); agent_tid = 0; } }
static int cr_callback(void *unused) { int rc; char *step_image_dir = NULL; rc = CR_CHECKPOINT_READY; if (step_launched) { step_image_dir = get_step_image_dir(1); if (step_image_dir == NULL) { error ("failed to get step image directory"); rc = CR_CHECKPOINT_PERM_FAILURE; } else if (slurm_checkpoint_tasks(jobid, stepid, time(NULL), /* timestamp */ step_image_dir, 60, /* wait */ nodelist) != SLURM_SUCCESS) { error ("failed to checkpoint step tasks"); rc = CR_CHECKPOINT_PERM_FAILURE; } xfree(step_image_dir); } rc = cr_checkpoint(rc); /* dump */ if (rc < 0) { fatal("checkpoint failed: %s", cr_strerror(errno)); } else if (rc == 0) { /* continue, nothing to do */ } else { /* restarted */ if (srun_pid) { /* srun forked */ if (step_launched) { step_image_dir = get_step_image_dir(0); if (step_image_dir == NULL) { fatal("failed to get step image directory"); } update_env("SLURM_RESTART_DIR", step_image_dir); xfree(step_image_dir); } if (fork_exec_srun()) { fatal("failed fork/exec srun"); } } /* XXX: step_launched => listen_fd valid */ step_launched = 0; debug2("step not launched."); slurm_cond_broadcast(&step_launch_cond); } return 0; }
static int _wait_for_thread (pthread_t thread_id) { int i; for (i=0; i<20; i++) { slurm_cond_broadcast(&comp_list_cond); usleep(1000 * i); if (pthread_kill(thread_id, 0)) return SLURM_SUCCESS; } error("Could not kill jobcomp script pthread"); return SLURM_ERROR; }
int slurm_jobcomp_log_record (struct job_record *record) { struct jobcomp_info * job; debug3("Entering slurm_jobcomp_log_record"); if (!(job = _jobcomp_info_create (record))) return error ("jobcomp/script: Failed to create job info!"); slurm_mutex_lock(&comp_list_mutex); list_append(comp_list, job); slurm_cond_broadcast(&comp_list_cond); slurm_mutex_unlock(&comp_list_mutex); return SLURM_SUCCESS; }
/* my_tid IN - Thread ID of spawned thread, 0 if no thread spawned */ extern void slurm_persist_conn_free_thread_loc(int thread_loc) { /* we will handle this in the fini */ if (shutdown_time) return; slurm_mutex_lock(&thread_count_lock); if (thread_count > 0) thread_count--; else error("thread_count underflow"); _destroy_persist_service(persist_service_conn[thread_loc]); persist_service_conn[thread_loc] = NULL; slurm_cond_broadcast(&thread_count_cond); slurm_mutex_unlock(&thread_count_lock); }
/* _wr_rdlock - Issue a read lock on the specified data type * Wait until there are no write locks AND * no pending write locks (write_wait_lock == 0) * * NOTE: Always favoring write locks can result in starvation for * read locks. To prevent this, read locks were permitted to be satisified * after 10 consecutive write locks. This prevented starvation, but * deadlock has been observed with some values for the count. */ static bool _wr_rdlock(lock_datatype_t datatype, bool wait_lock) { bool success = true; slurm_mutex_lock(&locks_mutex); while (1) { #if 1 if ((slurmctld_locks.entity[write_lock(datatype)] == 0) && (slurmctld_locks.entity[write_wait_lock(datatype)] == 0)) { #else /* SEE NOTE ABOVE */ if ((slurmctld_locks.entity[write_lock(datatype)] == 0) && ((slurmctld_locks.entity[write_wait_lock(datatype)] == 0) || (slurmctld_locks.entity[write_cnt_lock(datatype)] > 10))) { #endif slurmctld_locks.entity[read_lock(datatype)]++; slurmctld_locks.entity[write_cnt_lock(datatype)] = 0; break; } else if (!wait_lock) { success = false; break; } else { /* wait for state change and retry */ slurm_cond_wait(&locks_cond, &locks_mutex); if (kill_thread) pthread_exit(NULL); } } slurm_mutex_unlock(&locks_mutex); return success; } /* _wr_rdunlock - Issue a read unlock on the specified data type */ static void _wr_rdunlock(lock_datatype_t datatype) { slurm_mutex_lock(&locks_mutex); slurmctld_locks.entity[read_lock(datatype)]--; slurm_cond_broadcast(&locks_cond); slurm_mutex_unlock(&locks_mutex); }
/* * _msg_aggregation_sender() * * Start and terminate message collection windows. * Send collected msgs to next collector node or final destination * at window expiration. */ static void * _msg_aggregation_sender(void *arg) { struct timeval now; struct timespec timeout; slurm_msg_t msg; composite_msg_t cmp; msg_collection.running = 1; slurm_mutex_lock(&msg_collection.mutex); while (msg_collection.running) { /* Wait for a new msg to be collected */ slurm_cond_wait(&msg_collection.cond, &msg_collection.mutex); if (!msg_collection.running && !list_count(msg_collection.msg_list)) break; /* A msg has been collected; start new window */ gettimeofday(&now, NULL); timeout.tv_sec = now.tv_sec + (msg_collection.window / 1000); timeout.tv_nsec = (now.tv_usec * 1000) + (1000000 * (msg_collection.window % 1000)); timeout.tv_sec += timeout.tv_nsec / 1000000000; timeout.tv_nsec %= 1000000000; slurm_cond_timedwait(&msg_collection.cond, &msg_collection.mutex, &timeout); if (!msg_collection.running && !list_count(msg_collection.msg_list)) break; msg_collection.max_msgs = true; /* Msg collection window has expired and message collection * is suspended; now build and send composite msg */ memset(&msg, 0, sizeof(slurm_msg_t)); memset(&cmp, 0, sizeof(composite_msg_t)); memcpy(&cmp.sender, &msg_collection.node_addr, sizeof(slurm_addr_t)); cmp.msg_list = msg_collection.msg_list; msg_collection.msg_list = list_create(slurm_free_comp_msg_list); msg_collection.max_msgs = false; slurm_msg_t_init(&msg); msg.msg_type = MESSAGE_COMPOSITE; msg.protocol_version = SLURM_PROTOCOL_VERSION; msg.data = &cmp; if (_send_to_next_collector(&msg) != SLURM_SUCCESS) { error("_msg_aggregation_engine: Unable to send " "composite msg: %m"); } FREE_NULL_LIST(cmp.msg_list); /* Resume message collection */ slurm_cond_broadcast(&msg_collection.cond); } slurm_mutex_unlock(&msg_collection.mutex); return NULL; }
/* kill_locked_threads - Kill all threads waiting on semaphores */ extern void kill_locked_threads(void) { kill_thread = 1; slurm_cond_broadcast(&locks_cond); }
/* Send an RPC to the SlurmDBD. Do not wait for the reply. The RPC * will be queued and processed later if the SlurmDBD is not responding. * NOTE: slurm_open_slurmdbd_conn() must have been called with callbacks set * * Returns SLURM_SUCCESS or an error code */ extern int send_slurmdbd_msg(uint16_t rpc_version, slurmdbd_msg_t *req) { Buf buffer; int cnt, rc = SLURM_SUCCESS; static time_t syslog_time = 0; static int max_agent_queue = 0; /* * Whatever our max job count is multiplied by 2 plus node count * multiplied by 4 or MAX_AGENT_QUEUE which ever is bigger. */ if (!max_agent_queue) max_agent_queue = MAX(MAX_AGENT_QUEUE, ((slurmctld_conf.max_job_cnt * 2) + (node_record_count * 4))); buffer = slurm_persist_msg_pack( slurmdbd_conn, (persist_msg_t *)req); if (!buffer) /* pack error */ return SLURM_ERROR; slurm_mutex_lock(&agent_lock); if ((agent_tid == 0) || (agent_list == NULL)) { _create_agent(); if ((agent_tid == 0) || (agent_list == NULL)) { slurm_mutex_unlock(&agent_lock); free_buf(buffer); return SLURM_ERROR; } } cnt = list_count(agent_list); if ((cnt >= (max_agent_queue / 2)) && (difftime(time(NULL), syslog_time) > 120)) { /* Record critical error every 120 seconds */ syslog_time = time(NULL); error("slurmdbd: agent queue filling (%d), RESTART SLURMDBD NOW", cnt); syslog(LOG_CRIT, "*** RESTART SLURMDBD NOW ***"); if (slurmdbd_conn->trigger_callbacks.dbd_fail) (slurmdbd_conn->trigger_callbacks.dbd_fail)(); } if (cnt == (max_agent_queue - 1)) cnt -= _purge_step_req(); if (cnt == (max_agent_queue - 1)) cnt -= _purge_job_start_req(); if (cnt < max_agent_queue) { if (list_enqueue(agent_list, buffer) == NULL) fatal("list_enqueue: memory allocation failure"); } else { error("slurmdbd: agent queue is full (%u), discarding %s:%u request", cnt, slurmdbd_msg_type_2_str(req->msg_type, 1), req->msg_type); if (slurmdbd_conn->trigger_callbacks.acct_full) (slurmdbd_conn->trigger_callbacks.acct_full)(); free_buf(buffer); rc = SLURM_ERROR; } slurm_cond_broadcast(&agent_cond); slurm_mutex_unlock(&agent_lock); return rc; }
/* Checkpoint processing pthread * Never returns, but is cancelled on plugin termiantion */ static void *_ckpt_agent_thr(void *arg) { struct ckpt_req *req = (struct ckpt_req *)arg; int rc; /* Locks: write job */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, READ_LOCK, NO_LOCK, NO_LOCK }; struct job_record *job_ptr; struct step_record *step_ptr; struct check_job_info *check_ptr; /* only perform ckpt operation of ONE JOB */ slurm_mutex_lock(&ckpt_agent_mutex); while (ckpt_agent_jobid && ckpt_agent_jobid != req->job_id) { slurm_cond_wait(&ckpt_agent_cond, &ckpt_agent_mutex); } ckpt_agent_jobid = req->job_id; ckpt_agent_count ++; slurm_mutex_unlock(&ckpt_agent_mutex); debug3("checkpoint/blcr: sending checkpoint tasks request %u to %u.%u", req->op, req->job_id, req->step_id); rc = checkpoint_tasks(req->job_id, req->step_id, req->begin_time, req->image_dir, req->wait, req->nodelist); if (rc != SLURM_SUCCESS) { error("checkpoint/blcr: error on checkpoint request %u to " "%u.%u: %s", req->op, req->job_id, req->step_id, slurm_strerror(rc)); } if (req->op == CHECK_REQUEUE) _requeue_when_finished(req->job_id); lock_slurmctld(job_write_lock); job_ptr = find_job_record(req->job_id); if (!job_ptr) { error("_ckpt_agent_thr: job finished"); goto out; } if (req->step_id == SLURM_BATCH_SCRIPT) { /* batch job */ check_ptr = (struct check_job_info *)job_ptr->check_job; } else { step_ptr = find_step_record(job_ptr, req->step_id); if (! step_ptr) { error("_ckpt_agent_thr: step finished"); goto out; } check_ptr = (struct check_job_info *)step_ptr->check_job; } check_ptr->time_stamp = 0; check_ptr->error_code = rc; if (check_ptr->error_code != SLURM_SUCCESS) check_ptr->error_msg = xstrdup(slurm_strerror(rc)); out: if (req->sig_done) { _send_sig(req->job_id, req->step_id, req->sig_done, req->nodelist); } unlock_slurmctld(job_write_lock); _on_ckpt_complete(req->gid, req->uid, req->job_id, req->step_id, req->image_dir, rc); slurm_mutex_lock(&ckpt_agent_mutex); ckpt_agent_count --; if (ckpt_agent_count == 0) { ckpt_agent_jobid = 0; slurm_cond_broadcast(&ckpt_agent_cond); } slurm_mutex_unlock(&ckpt_agent_mutex); _ckpt_req_free(req); return NULL; }