extern Buf slurm_persist_make_rc_msg(slurm_persist_conn_t *persist_conn, uint32_t rc, char *comment, uint16_t ret_info) { persist_rc_msg_t msg; persist_msg_t resp; memset(&msg, 0, sizeof(persist_rc_msg_t)); memset(&resp, 0, sizeof(persist_msg_t)); msg.rc = rc; msg.comment = comment; msg.ret_info = ret_info; resp.msg_type = PERSIST_RC; resp.data = &msg; return slurm_persist_msg_pack(persist_conn, &resp); }
/* Send an RPC to the SlurmDBD. Do not wait for the reply. The RPC * will be queued and processed later if the SlurmDBD is not responding. * NOTE: slurm_open_slurmdbd_conn() must have been called with callbacks set * * Returns SLURM_SUCCESS or an error code */ extern int send_slurmdbd_msg(uint16_t rpc_version, slurmdbd_msg_t *req) { Buf buffer; int cnt, rc = SLURM_SUCCESS; static time_t syslog_time = 0; static int max_agent_queue = 0; /* * Whatever our max job count is multiplied by 2 plus node count * multiplied by 4 or MAX_AGENT_QUEUE which ever is bigger. */ if (!max_agent_queue) max_agent_queue = MAX(MAX_AGENT_QUEUE, ((slurmctld_conf.max_job_cnt * 2) + (node_record_count * 4))); buffer = slurm_persist_msg_pack( slurmdbd_conn, (persist_msg_t *)req); if (!buffer) /* pack error */ return SLURM_ERROR; slurm_mutex_lock(&agent_lock); if ((agent_tid == 0) || (agent_list == NULL)) { _create_agent(); if ((agent_tid == 0) || (agent_list == NULL)) { slurm_mutex_unlock(&agent_lock); free_buf(buffer); return SLURM_ERROR; } } cnt = list_count(agent_list); if ((cnt >= (max_agent_queue / 2)) && (difftime(time(NULL), syslog_time) > 120)) { /* Record critical error every 120 seconds */ syslog_time = time(NULL); error("slurmdbd: agent queue filling (%d), RESTART SLURMDBD NOW", cnt); syslog(LOG_CRIT, "*** RESTART SLURMDBD NOW ***"); if (slurmdbd_conn->trigger_callbacks.dbd_fail) (slurmdbd_conn->trigger_callbacks.dbd_fail)(); } if (cnt == (max_agent_queue - 1)) cnt -= _purge_step_req(); if (cnt == (max_agent_queue - 1)) cnt -= _purge_job_start_req(); if (cnt < max_agent_queue) { if (list_enqueue(agent_list, buffer) == NULL) fatal("list_enqueue: memory allocation failure"); } else { error("slurmdbd: agent queue is full (%u), discarding %s:%u request", cnt, slurmdbd_msg_type_2_str(req->msg_type, 1), req->msg_type); if (slurmdbd_conn->trigger_callbacks.acct_full) (slurmdbd_conn->trigger_callbacks.acct_full)(); free_buf(buffer); rc = SLURM_ERROR; } slurm_cond_broadcast(&agent_cond); slurm_mutex_unlock(&agent_lock); return rc; }