/* * check_header_version checks to see that the specified header was sent * from a node running the same version of the protocol as the current node * IN header - the message header received * RET - SLURM error code */ int check_header_version(header_t * header) { uint16_t check_version = SLURM_PROTOCOL_VERSION; if (working_cluster_rec) check_version = working_cluster_rec->rpc_version; if (slurmdbd_conf) { if ((header->version != SLURM_PROTOCOL_VERSION) && (header->version != SLURM_ONE_BACK_PROTOCOL_VERSION) && (header->version != SLURM_MIN_PROTOCOL_VERSION)) { debug("unsupported RPC version %hu msg type %s(%u)", header->version, rpc_num2string(header->msg_type), header->msg_type); slurm_seterrno_ret(SLURM_PROTOCOL_VERSION_ERROR); } } else if (header->version != check_version) { switch (header->msg_type) { case REQUEST_LAUNCH_TASKS: case REQUEST_RUN_JOB_STEP: case RESPONSE_LAUNCH_TASKS: case RESPONSE_RUN_JOB_STEP: if (working_cluster_rec) { /* Disable job step creation/launch * between major releases. Other RPCs * should all be supported. */ debug("unsupported RPC type %hu", header->msg_type); slurm_seterrno_ret( SLURM_PROTOCOL_VERSION_ERROR); break; } default: if ((header->version != SLURM_PROTOCOL_VERSION) && (header->version != SLURM_ONE_BACK_PROTOCOL_VERSION) && (header->version != SLURM_MIN_PROTOCOL_VERSION)) { debug("Unsupported RPC version %hu " "msg type %s(%u)", header->version, rpc_num2string(header->msg_type), header->msg_type); slurm_seterrno_ret( SLURM_PROTOCOL_VERSION_ERROR); } break; } } return SLURM_PROTOCOL_SUCCESS; }
static int _print_stats(void) { int i; if (!buf) { printf("No data available. Probably slurmctld is not working\n"); return -1; } printf("*******************************************************\n"); printf("sdiag output at %s", slurm_ctime(&buf->req_time)); printf("Data since %s", slurm_ctime(&buf->req_time_start)); printf("*******************************************************\n"); printf("Server thread count: %d\n", buf->server_thread_count); printf("Agent queue size: %d\n\n", buf->agent_queue_size); printf("Jobs submitted: %d\n", buf->jobs_submitted); printf("Jobs started: %d\n", buf->jobs_started); printf("Jobs completed: %d\n", buf->jobs_completed); printf("Jobs canceled: %d\n", buf->jobs_canceled); printf("Jobs failed: %d\n", buf->jobs_failed); printf("\nMain schedule statistics (microseconds):\n"); printf("\tLast cycle: %u\n", buf->schedule_cycle_last); printf("\tMax cycle: %u\n", buf->schedule_cycle_max); printf("\tTotal cycles: %u\n", buf->schedule_cycle_counter); if (buf->schedule_cycle_counter > 0) { printf("\tMean cycle: %u\n", buf->schedule_cycle_sum / buf->schedule_cycle_counter); printf("\tMean depth cycle: %u\n", buf->schedule_cycle_depth / buf->schedule_cycle_counter); } if ((buf->req_time - buf->req_time_start) > 60) { printf("\tCycles per minute: %u\n", (uint32_t) (buf->schedule_cycle_counter / ((buf->req_time - buf->req_time_start) / 60))); } printf("\tLast queue length: %u\n", buf->schedule_queue_len); if (buf->bf_active) { printf("\nBackfilling stats (WARNING: data obtained" " in the middle of backfilling execution.)\n"); } else printf("\nBackfilling stats\n"); printf("\tTotal backfilled jobs (since last slurm start): %u\n", buf->bf_backfilled_jobs); printf("\tTotal backfilled jobs (since last stats cycle start): %u\n", buf->bf_last_backfilled_jobs); printf("\tTotal cycles: %u\n", buf->bf_cycle_counter); printf("\tLast cycle when: %s", slurm_ctime(&buf->bf_when_last_cycle)); printf("\tLast cycle: %u\n", buf->bf_cycle_last); printf("\tMax cycle: %u\n", buf->bf_cycle_max); if (buf->bf_cycle_counter > 0) { printf("\tMean cycle: %"PRIu64"\n", buf->bf_cycle_sum / buf->bf_cycle_counter); } printf("\tLast depth cycle: %u\n", buf->bf_last_depth); printf("\tLast depth cycle (try sched): %u\n", buf->bf_last_depth_try); if (buf->bf_cycle_counter > 0) { printf("\tDepth Mean: %u\n", buf->bf_depth_sum / buf->bf_cycle_counter); printf("\tDepth Mean (try depth): %u\n", buf->bf_depth_try_sum / buf->bf_cycle_counter); } printf("\tLast queue length: %u\n", buf->bf_queue_len); if (buf->bf_cycle_counter > 0) { printf("\tQueue length mean: %u\n", buf->bf_queue_len_sum / buf->bf_cycle_counter); } printf("\nRemote Procedure Call statistics by message type\n"); for (i = 0; i < buf->rpc_type_size; i++) { printf("\t%-40s(%5u) count:%-6u " "ave_time:%-6u total_time:%"PRIu64"\n", rpc_num2string(buf->rpc_type_id[i]), buf->rpc_type_id[i], buf->rpc_type_cnt[i], rpc_type_ave_time[i], buf->rpc_type_time[i]); } printf("\nRemote Procedure Call statistics by user\n"); for (i = 0; i < buf->rpc_user_size; i++) { printf("\t%-16s(%8u) count:%-6u " "ave_time:%-6u total_time:%"PRIu64"\n", uid_to_string_cached((uid_t)buf->rpc_user_id[i]), buf->rpc_user_id[i], buf->rpc_user_cnt[i], rpc_user_ave_time[i], buf->rpc_user_time[i]); } return 0; }
/* Send an RPC to the SlurmDBD and wait for an arbitrary reply message. * The RPC will not be queued if an error occurs. * The "resp" message must be freed by the caller. * Returns SLURM_SUCCESS or an error code */ extern int send_recv_slurmdbd_msg(uint16_t rpc_version, slurmdbd_msg_t *req, slurmdbd_msg_t *resp) { int rc = SLURM_SUCCESS; Buf buffer; xassert(req); xassert(resp); /* To make sure we can get this to send instead of the agent sending stuff that can happen anytime we set halt_agent and then after we get into the mutex we unset. */ halt_agent = 1; slurm_mutex_lock(&slurmdbd_lock); halt_agent = 0; if (!slurmdbd_conn || (slurmdbd_conn->fd < 0)) { /* Either slurm_open_slurmdbd_conn() was not executed or * the connection to Slurm DBD has been closed */ if (req->msg_type == DBD_GET_CONFIG) _open_slurmdbd_conn(0); else _open_slurmdbd_conn(1); if (!slurmdbd_conn || (slurmdbd_conn->fd < 0)) { rc = SLURM_ERROR; goto end_it; } } if (!(buffer = pack_slurmdbd_msg(req, rpc_version))) { rc = SLURM_ERROR; goto end_it; } rc = slurm_persist_send_msg(slurmdbd_conn, buffer); free_buf(buffer); if (rc != SLURM_SUCCESS) { error("slurmdbd: Sending message type %s: %d: %m", rpc_num2string(req->msg_type), rc); goto end_it; } buffer = slurm_persist_recv_msg(slurmdbd_conn); if (buffer == NULL) { error("slurmdbd: Getting response to message type %u", req->msg_type); rc = SLURM_ERROR; goto end_it; } rc = unpack_slurmdbd_msg(resp, rpc_version, buffer); /* check for the rc of the start job message */ if (rc == SLURM_SUCCESS && resp->msg_type == DBD_ID_RC) rc = ((dbd_id_rc_msg_t *)resp->data)->return_code; free_buf(buffer); end_it: slurm_cond_signal(&slurmdbd_cond); slurm_mutex_unlock(&slurmdbd_lock); return rc; }