extern int slurm_ckpt_unpack_job(check_jobinfo_t jobinfo, Buf buffer, uint16_t protocol_version) { uint32_t uint32_tmp; struct check_job_info *check_ptr = (struct check_job_info *)jobinfo; if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) { uint16_t id; uint32_t size; safe_unpack16(&id, buffer); safe_unpack32(&size, buffer); if (id != CHECK_POE) { uint32_t x; x = get_buf_offset(buffer); set_buf_offset(buffer, x + size); } else { safe_unpack16(&check_ptr->disabled, buffer); safe_unpack16(&check_ptr->node_cnt, buffer); safe_unpack16(&check_ptr->reply_cnt, buffer); safe_unpack16(&check_ptr->wait_time, buffer); safe_unpack32(&check_ptr->error_code, buffer); safe_unpackstr_xmalloc(&check_ptr->error_msg, &uint32_tmp, buffer); safe_unpack_time(&check_ptr->time_stamp, buffer); } } return SLURM_SUCCESS; unpack_error: xfree(check_ptr->error_msg); return SLURM_ERROR; }
extern int slurm_ckpt_unpack_job(check_jobinfo_t jobinfo, Buf buffer, uint16_t protocol_version) { uint32_t uint32_tmp; struct check_job_info *check_ptr = (struct check_job_info *)jobinfo; if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { safe_unpack16(&check_ptr->disabled, buffer); safe_unpack16(&check_ptr->node_cnt, buffer); safe_unpack16(&check_ptr->reply_cnt, buffer); safe_unpack16(&check_ptr->wait_time, buffer); safe_unpack32(&check_ptr->error_code, buffer); safe_unpackstr_xmalloc(&check_ptr->error_msg, &uint32_tmp, buffer); safe_unpack_time(&check_ptr->time_stamp, buffer); } return SLURM_SUCCESS; unpack_error: xfree(check_ptr->error_msg); return SLURM_ERROR; }
static int _unpack_node_state_msg(dbd_node_state_msg_t **msg, uint16_t rpc_version, Buf buffer) { dbd_node_state_msg_t *msg_ptr; uint32_t uint32_tmp; msg_ptr = xmalloc(sizeof(dbd_node_state_msg_t)); *msg = msg_ptr; msg_ptr->reason_uid = NO_VAL; if (rpc_version >= SLURM_MIN_PROTOCOL_VERSION) { safe_unpackstr_xmalloc(&msg_ptr->hostlist, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg_ptr->reason, &uint32_tmp, buffer); safe_unpack32(&msg_ptr->reason_uid, buffer); safe_unpack16(&msg_ptr->new_state, buffer); safe_unpack_time(&msg_ptr->event_time, buffer); safe_unpack32(&msg_ptr->state, buffer); safe_unpackstr_xmalloc(&msg_ptr->tres_str, &uint32_tmp, buffer); } return SLURM_SUCCESS; unpack_error: slurmdbd_free_node_state_msg(msg_ptr); *msg = NULL; return SLURM_ERROR; }
extern int acct_gather_energy_unpack(acct_gather_energy_t **energy, Buf buffer, uint16_t protocol_version) { acct_gather_energy_t *energy_ptr = acct_gather_energy_alloc(); *energy = energy_ptr; if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { safe_unpack32(&energy_ptr->base_consumed_energy, buffer); safe_unpack32(&energy_ptr->base_watts, buffer); safe_unpack32(&energy_ptr->consumed_energy, buffer); safe_unpack32(&energy_ptr->current_watts, buffer); safe_unpack32(&energy_ptr->previous_consumed_energy, buffer); safe_unpack_time(&energy_ptr->poll_time, buffer); } else { safe_unpack32(&energy_ptr->base_consumed_energy, buffer); safe_unpack32(&energy_ptr->base_watts, buffer); safe_unpack32(&energy_ptr->consumed_energy, buffer); safe_unpack32(&energy_ptr->current_watts, buffer); } return SLURM_SUCCESS; unpack_error: acct_gather_energy_destroy(energy_ptr); *energy = NULL; return SLURM_ERROR; }
extern int slurmdbd_unpack_usage_msg(dbd_usage_msg_t **msg, uint16_t rpc_version, slurmdbd_msg_type_t type, Buf buffer) { dbd_usage_msg_t *msg_ptr = NULL; int (*my_rec) (void **object, uint16_t rpc_version, Buf buffer); msg_ptr = xmalloc(sizeof(dbd_usage_msg_t)); *msg = msg_ptr; switch (type) { case DBD_GET_ASSOC_USAGE: case DBD_GOT_ASSOC_USAGE: my_rec = slurmdb_unpack_assoc_rec; break; case DBD_GET_CLUSTER_USAGE: case DBD_GOT_CLUSTER_USAGE: my_rec = slurmdb_unpack_cluster_rec; break; case DBD_GET_WCKEY_USAGE: case DBD_GOT_WCKEY_USAGE: my_rec = slurmdb_unpack_wckey_rec; break; default: fatal("Unknown pack type"); return SLURM_ERROR; } if ((*(my_rec))(&msg_ptr->rec, rpc_version, buffer) == SLURM_ERROR) goto unpack_error; safe_unpack_time(&msg_ptr->start, buffer); safe_unpack_time(&msg_ptr->end, buffer); return SLURM_SUCCESS; unpack_error: slurmdbd_free_usage_msg(msg_ptr, type); *msg = NULL; return SLURM_ERROR; }
extern int acct_gather_energy_unpack(acct_gather_energy_t **energy, Buf buffer, uint16_t protocol_version, bool need_alloc) { uint32_t uint32_tmp; acct_gather_energy_t *energy_ptr; if (need_alloc) { energy_ptr = acct_gather_energy_alloc(1); *energy = energy_ptr; } else { energy_ptr = *energy; } if (protocol_version >= SLURM_15_08_PROTOCOL_VERSION) { safe_unpack64(&energy_ptr->base_consumed_energy, buffer); safe_unpack32(&energy_ptr->base_watts, buffer); safe_unpack64(&energy_ptr->consumed_energy, buffer); safe_unpack32(&energy_ptr->current_watts, buffer); safe_unpack64(&energy_ptr->previous_consumed_energy, buffer); safe_unpack_time(&energy_ptr->poll_time, buffer); } else if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) { safe_unpack32(&uint32_tmp, buffer); energy_ptr->base_consumed_energy = (uint64_t) uint32_tmp; safe_unpack32(&energy_ptr->base_watts, buffer); safe_unpack32(&uint32_tmp, buffer); energy_ptr->consumed_energy = (uint64_t) uint32_tmp; safe_unpack32(&energy_ptr->current_watts, buffer); safe_unpack32(&uint32_tmp, buffer); energy_ptr->previous_consumed_energy = (uint64_t) uint32_tmp; safe_unpack_time(&energy_ptr->poll_time, buffer); } return SLURM_SUCCESS; unpack_error: if (need_alloc) { acct_gather_energy_destroy(energy_ptr); *energy = NULL; } else memset(energy_ptr, 0, sizeof(acct_gather_energy_t)); return SLURM_ERROR; }
static int _unpack_roll_usage_msg(dbd_roll_usage_msg_t **msg, uint16_t rpc_version, Buf buffer) { dbd_roll_usage_msg_t *msg_ptr = xmalloc(sizeof(dbd_roll_usage_msg_t)); *msg = msg_ptr; if (rpc_version >= SLURM_MIN_PROTOCOL_VERSION) { safe_unpack16(&msg_ptr->archive_data, buffer); safe_unpack_time(&msg_ptr->end, buffer); safe_unpack_time(&msg_ptr->start, buffer); } return SLURM_SUCCESS; unpack_error: slurmdbd_free_roll_usage_msg(msg_ptr); *msg = NULL; return SLURM_ERROR; }
static int _unpack_job_suspend_msg(dbd_job_suspend_msg_t **msg, uint16_t rpc_version, Buf buffer) { dbd_job_suspend_msg_t *msg_ptr = xmalloc(sizeof(dbd_job_suspend_msg_t)); *msg = msg_ptr; if (rpc_version >= SLURM_MIN_PROTOCOL_VERSION) { safe_unpack32(&msg_ptr->assoc_id, buffer); safe_unpack64(&msg_ptr->db_index, buffer); safe_unpack32(&msg_ptr->job_id, buffer); safe_unpack32(&msg_ptr->job_state, buffer); safe_unpack_time(&msg_ptr->submit_time, buffer); safe_unpack_time(&msg_ptr->suspend_time, buffer); } return SLURM_SUCCESS; unpack_error: slurmdbd_free_job_suspend_msg(msg_ptr); *msg = NULL; return SLURM_ERROR; }
static int _unpack_step_start_msg(dbd_step_start_msg_t **msg, uint16_t rpc_version, Buf buffer) { uint32_t uint32_tmp = 0; dbd_step_start_msg_t *msg_ptr = xmalloc(sizeof(dbd_step_start_msg_t)); *msg = msg_ptr; if (rpc_version >= SLURM_MIN_PROTOCOL_VERSION) { safe_unpack32(&msg_ptr->assoc_id, buffer); safe_unpack64(&msg_ptr->db_index, buffer); safe_unpack32(&msg_ptr->job_id, buffer); safe_unpackstr_xmalloc(&msg_ptr->name, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg_ptr->nodes, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg_ptr->node_inx, &uint32_tmp, buffer); safe_unpack32(&msg_ptr->node_cnt, buffer); safe_unpack_time(&msg_ptr->start_time, buffer); safe_unpack_time(&msg_ptr->job_submit_time, buffer); safe_unpack32(&msg_ptr->req_cpufreq_min, buffer); safe_unpack32(&msg_ptr->req_cpufreq_max, buffer); safe_unpack32(&msg_ptr->req_cpufreq_gov, buffer); safe_unpack32(&msg_ptr->step_id, buffer); safe_unpack32(&msg_ptr->task_dist, buffer); safe_unpack32(&msg_ptr->total_tasks, buffer); safe_unpackstr_xmalloc(&msg_ptr->tres_alloc_str, &uint32_tmp, buffer); } else goto unpack_error; return SLURM_SUCCESS; unpack_error: debug2("slurmdbd_unpack_step_start_msg:" "unpack_error: size_buf(buffer) %u", size_buf(buffer)); slurmdbd_free_step_start_msg(msg_ptr); *msg = NULL; return SLURM_ERROR; }
static int _unpack_job_complete_msg(dbd_job_comp_msg_t **msg, uint16_t rpc_version, Buf buffer) { uint32_t uint32_tmp; dbd_job_comp_msg_t *msg_ptr = xmalloc(sizeof(dbd_job_comp_msg_t)); *msg = msg_ptr; if (rpc_version >= SLURM_MIN_PROTOCOL_VERSION) { safe_unpackstr_xmalloc(&msg_ptr->admin_comment, &uint32_tmp, buffer); safe_unpack32(&msg_ptr->assoc_id, buffer); safe_unpackstr_xmalloc(&msg_ptr->comment, &uint32_tmp, buffer); safe_unpack64(&msg_ptr->db_index, buffer); safe_unpack32(&msg_ptr->derived_ec, buffer); safe_unpack_time(&msg_ptr->end_time, buffer); safe_unpack32(&msg_ptr->exit_code, buffer); safe_unpack32(&msg_ptr->job_id, buffer); safe_unpack32(&msg_ptr->job_state, buffer); safe_unpackstr_xmalloc(&msg_ptr->nodes, &uint32_tmp, buffer); safe_unpack32(&msg_ptr->req_uid, buffer); safe_unpack_time(&msg_ptr->start_time, buffer); safe_unpack_time(&msg_ptr->submit_time, buffer); safe_unpackstr_xmalloc(&msg_ptr->system_comment, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg_ptr->tres_alloc_str, &uint32_tmp, buffer); } else goto unpack_error; return SLURM_SUCCESS; unpack_error: slurmdbd_free_job_complete_msg(msg_ptr); *msg = NULL; return SLURM_ERROR; }
static int _unpack_step_complete_msg(dbd_step_comp_msg_t **msg, uint16_t rpc_version, Buf buffer) { uint32_t uint32_tmp; dbd_step_comp_msg_t *msg_ptr = xmalloc(sizeof(dbd_step_comp_msg_t)); *msg = msg_ptr; if (rpc_version >= SLURM_MIN_PROTOCOL_VERSION) { safe_unpack32(&msg_ptr->assoc_id, buffer); safe_unpack64(&msg_ptr->db_index, buffer); safe_unpack_time(&msg_ptr->end_time, buffer); safe_unpack32(&msg_ptr->exit_code, buffer); jobacctinfo_unpack((struct jobacctinfo **)&msg_ptr->jobacct, rpc_version, PROTOCOL_TYPE_DBD, buffer, 1); safe_unpack32(&msg_ptr->job_id, buffer); safe_unpack_time(&msg_ptr->job_submit_time, buffer); safe_unpackstr_xmalloc(&msg_ptr->job_tres_alloc_str, &uint32_tmp, buffer); safe_unpack32(&msg_ptr->req_uid, buffer); safe_unpack_time(&msg_ptr->start_time, buffer); safe_unpack16(&msg_ptr->state, buffer); safe_unpack32(&msg_ptr->step_id, buffer); safe_unpack32(&msg_ptr->total_tasks, buffer); } else goto unpack_error; return SLURM_SUCCESS; unpack_error: debug2("slurmdbd_unpack_step_complete_msg:" "unpack_error: size_buf(buffer) %u", size_buf(buffer)); slurmdbd_free_step_complete_msg(msg_ptr); *msg = NULL; return SLURM_ERROR; }
extern int ext_sensors_data_unpack(ext_sensors_data_t **ext_sensors, Buf buffer, uint16_t protocol_version) { ext_sensors_data_t *ext_sensors_ptr = ext_sensors_alloc(); *ext_sensors = ext_sensors_ptr; if (ext_sensors_ptr == NULL) return SLURM_ERROR; safe_unpack32(&ext_sensors_ptr->consumed_energy, buffer); safe_unpack32(&ext_sensors_ptr->temperature, buffer); safe_unpack_time(&ext_sensors_ptr->energy_update_time, buffer); safe_unpack32(&ext_sensors_ptr->current_watts, buffer); return SLURM_SUCCESS; unpack_error: ext_sensors_destroy(ext_sensors_ptr); *ext_sensors = NULL; return SLURM_ERROR; }
static int _unpack_cluster_tres_msg(dbd_cluster_tres_msg_t **msg, uint16_t rpc_version, Buf buffer) { dbd_cluster_tres_msg_t *msg_ptr; uint32_t uint32_tmp; msg_ptr = xmalloc(sizeof(dbd_cluster_tres_msg_t)); *msg = msg_ptr; if (rpc_version >= SLURM_MIN_PROTOCOL_VERSION) { safe_unpackstr_xmalloc(&msg_ptr->cluster_nodes, &uint32_tmp, buffer); safe_unpack_time(&msg_ptr->event_time, buffer); safe_unpackstr_xmalloc(&msg_ptr->tres_str, &uint32_tmp, buffer); } return SLURM_SUCCESS; unpack_error: slurmdbd_free_cluster_tres_msg(msg_ptr); *msg = NULL; return SLURM_ERROR; }
/* * load_all_part_state - load the partition state from file, recover on * slurmctld restart. execute this after loading the configuration * file data. * NOTE: READ lock_slurmctld config before entry */ int load_all_part_state(void) { char *part_name = NULL, *allow_groups = NULL, *nodes = NULL; char *state_file, *data = NULL; uint32_t max_time, default_time, max_nodes, min_nodes; uint32_t max_cpus_per_node = INFINITE, grace_time = 0; time_t time; uint16_t flags; uint16_t max_share, preempt_mode, priority, state_up, cr_type; struct part_record *part_ptr; uint32_t data_size = 0, name_len; int data_allocated, data_read = 0, error_code = 0, part_cnt = 0; int state_fd; Buf buffer; char *ver_str = NULL; char* allow_alloc_nodes = NULL; uint16_t protocol_version = (uint16_t)NO_VAL; char* alternate = NULL; /* read the file */ lock_state_files(); state_fd = _open_part_state_file(&state_file); if (state_fd < 0) { info("No partition state file (%s) to recover", state_file); error_code = ENOENT; } else { data_allocated = BUF_SIZE; data = xmalloc(data_allocated); while (1) { data_read = read(state_fd, &data[data_size], BUF_SIZE); if (data_read < 0) { if (errno == EINTR) continue; else { error("Read error on %s: %m", state_file); break; } } else if (data_read == 0) /* eof */ break; data_size += data_read; data_allocated += data_read; xrealloc(data, data_allocated); } close(state_fd); } xfree(state_file); unlock_state_files(); buffer = create_buf(data, data_size); safe_unpackstr_xmalloc( &ver_str, &name_len, buffer); debug3("Version string in part_state header is %s", ver_str); if (ver_str) { if (!strcmp(ver_str, PART_STATE_VERSION)) { protocol_version = SLURM_PROTOCOL_VERSION; } else if (!strcmp(ver_str, PART_2_5_STATE_VERSION)) { protocol_version = SLURM_2_5_PROTOCOL_VERSION; } } if (protocol_version == (uint16_t)NO_VAL) { error("**********************************************************"); error("Can not recover partition state, data version incompatible"); error("**********************************************************"); xfree(ver_str); free_buf(buffer); return EFAULT; } xfree(ver_str); safe_unpack_time(&time, buffer); while (remaining_buf(buffer) > 0) { if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { safe_unpackstr_xmalloc(&part_name, &name_len, buffer); safe_unpack32(&grace_time, buffer); safe_unpack32(&max_time, buffer); safe_unpack32(&default_time, buffer); safe_unpack32(&max_cpus_per_node, buffer); safe_unpack32(&max_nodes, buffer); safe_unpack32(&min_nodes, buffer); safe_unpack16(&flags, buffer); safe_unpack16(&max_share, buffer); safe_unpack16(&preempt_mode, buffer); safe_unpack16(&priority, buffer); if (priority > part_max_priority) part_max_priority = priority; safe_unpack16(&state_up, buffer); safe_unpack16(&cr_type, buffer); safe_unpackstr_xmalloc(&allow_groups, &name_len, buffer); safe_unpackstr_xmalloc(&allow_alloc_nodes, &name_len, buffer); safe_unpackstr_xmalloc(&alternate, &name_len, buffer); safe_unpackstr_xmalloc(&nodes, &name_len, buffer); if ((flags & PART_FLAG_DEFAULT_CLR) || (flags & PART_FLAG_HIDDEN_CLR) || (flags & PART_FLAG_NO_ROOT_CLR) || (flags & PART_FLAG_ROOT_ONLY_CLR) || (flags & PART_FLAG_REQ_RESV_CLR)) { error("Invalid data for partition %s: flags=%u", part_name, flags); error_code = EINVAL; } } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { safe_unpackstr_xmalloc(&part_name, &name_len, buffer); safe_unpack32(&grace_time, buffer); safe_unpack32(&max_time, buffer); safe_unpack32(&default_time, buffer); safe_unpack32(&max_nodes, buffer); safe_unpack32(&min_nodes, buffer); safe_unpack16(&flags, buffer); safe_unpack16(&max_share, buffer); safe_unpack16(&preempt_mode, buffer); safe_unpack16(&priority, buffer); if (priority > part_max_priority) part_max_priority = priority; cr_type = 0; /* Default value */ safe_unpack16(&state_up, buffer); safe_unpackstr_xmalloc(&allow_groups, &name_len, buffer); safe_unpackstr_xmalloc(&allow_alloc_nodes, &name_len, buffer); safe_unpackstr_xmalloc(&alternate, &name_len, buffer); safe_unpackstr_xmalloc(&nodes, &name_len, buffer); if ((flags & PART_FLAG_DEFAULT_CLR) || (flags & PART_FLAG_HIDDEN_CLR) || (flags & PART_FLAG_NO_ROOT_CLR) || (flags & PART_FLAG_ROOT_ONLY_CLR) || (flags & PART_FLAG_REQ_RESV_CLR)) { error("Invalid data for partition %s: flags=%u", part_name, flags); error_code = EINVAL; } } else { error("load_all_part_state: protocol_version " "%hu not supported", protocol_version); goto unpack_error; } /* validity test as possible */ if (state_up > PARTITION_UP) { error("Invalid data for partition %s: state_up=%u", part_name, state_up); error_code = EINVAL; } if (error_code) { error("No more partition data will be processed from " "the checkpoint file"); xfree(allow_groups); xfree(allow_alloc_nodes); xfree(alternate); xfree(part_name); xfree(nodes); error_code = EINVAL; break; } /* find record and perform update */ part_ptr = list_find_first(part_list, &list_find_part, part_name); part_cnt++; if (part_ptr == NULL) { info("load_all_part_state: partition %s missing from " "configuration file", part_name); part_ptr = create_part_record(); xfree(part_ptr->name); part_ptr->name = xstrdup(part_name); } part_ptr->flags = flags; if (part_ptr->flags & PART_FLAG_DEFAULT) { xfree(default_part_name); default_part_name = xstrdup(part_name); default_part_loc = part_ptr; } part_ptr->max_time = max_time; part_ptr->default_time = default_time; part_ptr->max_cpus_per_node = max_cpus_per_node; part_ptr->max_nodes = max_nodes; part_ptr->max_nodes_orig = max_nodes; part_ptr->min_nodes = min_nodes; part_ptr->min_nodes_orig = min_nodes; part_ptr->max_share = max_share; part_ptr->grace_time = grace_time; if (preempt_mode != (uint16_t) NO_VAL) part_ptr->preempt_mode = preempt_mode; part_ptr->priority = priority; part_ptr->state_up = state_up; part_ptr->cr_type = cr_type; xfree(part_ptr->allow_groups); part_ptr->allow_groups = allow_groups; xfree(part_ptr->allow_alloc_nodes); part_ptr->allow_alloc_nodes = allow_alloc_nodes; xfree(part_ptr->alternate); part_ptr->alternate = alternate; xfree(part_ptr->nodes); part_ptr->nodes = nodes; xfree(part_name); } info("Recovered state of %d partitions", part_cnt); free_buf(buffer); return error_code; unpack_error: error("Incomplete partition data checkpoint file"); info("Recovered state of %d partitions", part_cnt); free_buf(buffer); return EFAULT; }
/* * load_all_front_end_state - Load the front_end node state from file, recover * on slurmctld restart. Execute this after loading the configuration * file data. Data goes into common storage. * IN state_only - if true, overwrite only front_end node state and reason * Use this to overwrite the "UNKNOWN state typically used in slurm.conf * RET 0 or error code * NOTE: READ lock_slurmctld config before entry */ extern int load_all_front_end_state(bool state_only) { #ifdef HAVE_FRONT_END char *node_name = NULL, *reason = NULL, *data = NULL, *state_file; int data_allocated, data_read = 0, error_code = 0, node_cnt = 0; uint16_t node_state; uint32_t data_size = 0, name_len; uint32_t reason_uid = NO_VAL; time_t reason_time = 0; front_end_record_t *front_end_ptr; int state_fd; time_t time_stamp; Buf buffer; char *ver_str = NULL; uint16_t protocol_version = (uint16_t) NO_VAL; /* read the file */ lock_state_files (); state_fd = _open_front_end_state_file(&state_file); if (state_fd < 0) { info ("No node state file (%s) to recover", state_file); error_code = ENOENT; } else { data_allocated = BUF_SIZE; data = xmalloc(data_allocated); while (1) { data_read = read(state_fd, &data[data_size], BUF_SIZE); if (data_read < 0) { if (errno == EINTR) continue; else { error ("Read error on %s: %m", state_file); break; } } else if (data_read == 0) /* eof */ break; data_size += data_read; data_allocated += data_read; xrealloc(data, data_allocated); } close (state_fd); } xfree (state_file); unlock_state_files (); buffer = create_buf (data, data_size); safe_unpackstr_xmalloc( &ver_str, &name_len, buffer); debug3("Version string in front_end_state header is %s", ver_str); if (ver_str) { if (!strcmp(ver_str, FRONT_END_STATE_VERSION)) { protocol_version = SLURM_PROTOCOL_VERSION; } } if (protocol_version == (uint16_t) NO_VAL) { error("*****************************************************"); error("Can not recover front_end state, version incompatible"); error("*****************************************************"); xfree(ver_str); free_buf(buffer); return EFAULT; } xfree(ver_str); safe_unpack_time(&time_stamp, buffer); while (remaining_buf (buffer) > 0) { uint16_t base_state; if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { safe_unpackstr_xmalloc (&node_name, &name_len, buffer); safe_unpack16 (&node_state, buffer); safe_unpackstr_xmalloc (&reason, &name_len, buffer); safe_unpack_time (&reason_time, buffer); safe_unpack32 (&reason_uid, buffer); base_state = node_state & NODE_STATE_BASE; } else goto unpack_error; /* validity test as possible */ /* find record and perform update */ front_end_ptr = find_front_end_record(node_name); if (front_end_ptr == NULL) { error("Front_end node %s has vanished from " "configuration", node_name); } else if (state_only) { uint16_t orig_flags; orig_flags = front_end_ptr->node_state & NODE_STATE_FLAGS; node_cnt++; if (IS_NODE_UNKNOWN(front_end_ptr)) { if (base_state == NODE_STATE_DOWN) { orig_flags &= (~NODE_STATE_COMPLETING); front_end_ptr->node_state = NODE_STATE_DOWN | orig_flags; } if (node_state & NODE_STATE_DRAIN) { front_end_ptr->node_state |= NODE_STATE_DRAIN; } if (node_state & NODE_STATE_FAIL) { front_end_ptr->node_state |= NODE_STATE_FAIL; } } if (front_end_ptr->reason == NULL) { front_end_ptr->reason = reason; reason = NULL; /* Nothing to free */ front_end_ptr->reason_time = reason_time; front_end_ptr->reason_uid = reason_uid; } } else { node_cnt++; front_end_ptr->node_state = node_state; xfree(front_end_ptr->reason); front_end_ptr->reason = reason; reason = NULL; /* Nothing to free */ front_end_ptr->reason_time = reason_time; front_end_ptr->reason_uid = reason_uid; front_end_ptr->last_response = (time_t) 0; } xfree(node_name); xfree(reason); } fini: info("Recovered state of %d front_end nodes", node_cnt); free_buf (buffer); return error_code; unpack_error: error("Incomplete front_end node data checkpoint file"); error_code = EFAULT; xfree (node_name); xfree(reason); goto fini; #else return 0; #endif }
extern slurmdb_federation_rec_t *fed_mgr_state_load(char *state_save_location) { Buf buffer = NULL; char *data = NULL, *state_file; time_t buf_time; uint16_t ver = 0; uint32_t data_size = 0; int state_fd; int data_allocated, data_read = 0, error_code = SLURM_SUCCESS; slurmdb_federation_rec_t *ret_fed = NULL; state_file = xstrdup_printf("%s/%s", state_save_location, FED_MGR_STATE_FILE); state_fd = open(state_file, O_RDONLY); if (state_fd < 0) { error("No fed_mgr state file (%s) to recover", state_file); xfree(state_file); return NULL; } else { data_allocated = BUF_SIZE; data = xmalloc(data_allocated); while (1) { data_read = read(state_fd, &data[data_size], BUF_SIZE); if (data_read < 0) { if (errno == EINTR) continue; else { error("Read error on %s: %m", state_file); break; } } else if (data_read == 0) /* eof */ break; data_size += data_read; data_allocated += data_read; xrealloc(data, data_allocated); } close(state_fd); } xfree(state_file); buffer = create_buf(data, data_size); safe_unpack16(&ver, buffer); debug3("Version in fed_mgr_state header is %u", ver); if (ver > SLURM_PROTOCOL_VERSION || ver < SLURM_MIN_PROTOCOL_VERSION) { error("***********************************************"); error("Can not recover fed_mgr state, incompatible version, " "got %u need > %u <= %u", ver, SLURM_MIN_PROTOCOL_VERSION, SLURM_PROTOCOL_VERSION); error("***********************************************"); free_buf(buffer); return NULL; } safe_unpack_time(&buf_time, buffer); error_code = slurmdb_unpack_federation_rec((void **)&ret_fed, ver, buffer); if (error_code != SLURM_SUCCESS) goto unpack_error; else if (!ret_fed || !ret_fed->name || !list_count(ret_fed->cluster_list)) { slurmdb_destroy_federation_rec(ret_fed); ret_fed = NULL; error("No feds retrieved"); } else { /* We want to free the connections here since they don't exist * anymore, but they were packed when state was saved. */ slurmdb_cluster_rec_t *cluster; ListIterator itr = list_iterator_create( ret_fed->cluster_list); while ((cluster = list_next(itr))) { slurm_persist_conn_destroy(cluster->fed.recv); cluster->fed.recv = NULL; slurm_persist_conn_destroy(cluster->fed.send); cluster->fed.send = NULL; } list_iterator_destroy(itr); } free_buf(buffer); return ret_fed; unpack_error: free_buf(buffer); return NULL; }
static void _read_last_decay_ran(time_t *last_ran, time_t *last_reset) { int data_allocated, data_read = 0; uint32_t data_size = 0; int state_fd; char *data = NULL, *state_file; Buf buffer; xassert(last_ran); xassert(last_reset); (*last_ran) = 0; (*last_reset) = 0; /* read the file */ state_file = xstrdup(slurmctld_conf.state_save_location); xstrcat(state_file, "/priority_last_decay_ran"); lock_state_files(); state_fd = open(state_file, O_RDONLY); if (state_fd < 0) { info("No last decay (%s) to recover", state_file); unlock_state_files(); return; } else { data_allocated = BUF_SIZE; data = xmalloc(data_allocated); while (1) { data_read = read(state_fd, &data[data_size], BUF_SIZE); if (data_read < 0) { if (errno == EINTR) continue; else { error("Read error on %s: %m", state_file); break; } } else if (data_read == 0) /* eof */ break; data_size += data_read; data_allocated += data_read; xrealloc(data, data_allocated); } close(state_fd); } xfree(state_file); unlock_state_files(); buffer = create_buf(data, data_size); safe_unpack_time(last_ran, buffer); safe_unpack_time(last_reset, buffer); free_buf(buffer); if (priority_debug) info("Last ran decay on jobs at %ld", (long)*last_ran); return; unpack_error: error("Incomplete priority last decay file returning"); free_buf(buffer); return; }
static void _load_sicp_state(void) { int data_allocated, data_read = 0; uint32_t data_size = 0; int state_fd, sicp_cnt = 0; char *data = NULL, *state_file; struct stat stat_buf; Buf buffer; char *ver_str = NULL; uint32_t ver_str_len; uint16_t protocol_version = (uint16_t)NO_VAL; uint32_t job_id = 0; uint32_t job_state = 0; sicp_job_t *sicp_ptr; time_t buf_time, now; /* read the file */ lock_state_files(); state_file = xstrdup(slurmctld_conf.state_save_location); xstrcat(state_file, "/sicp_state"); state_fd = open(state_file, O_RDONLY); if (state_fd < 0) { error("Could not open job state file %s: %m", state_file); unlock_state_files(); xfree(state_file); return; } else if (fstat(state_fd, &stat_buf) < 0) { error("Could not stat job state file %s: %m", state_file); unlock_state_files(); (void) close(state_fd); xfree(state_file); return; } else if (stat_buf.st_size < 10) { error("Job state file %s too small", state_file); unlock_state_files(); (void) close(state_fd); xfree(state_file); return; } data_allocated = BUF_SIZE; data = xmalloc(data_allocated); while (1) { data_read = read(state_fd, &data[data_size], BUF_SIZE); if (data_read < 0) { if (errno == EINTR) continue; else { error("Read error on %s: %m", state_file); break; } } else if (data_read == 0) /* eof */ break; data_size += data_read; data_allocated += data_read; xrealloc(data, data_allocated); } close(state_fd); xfree(state_file); unlock_state_files(); buffer = create_buf(data, data_size); safe_unpackstr_xmalloc(&ver_str, &ver_str_len, buffer); debug3("Version string in sicp_state header is %s", ver_str); if (ver_str && !strcmp(ver_str, "PROTOCOL_VERSION")) safe_unpack16(&protocol_version, buffer); xfree(ver_str); if (protocol_version == (uint16_t)NO_VAL) { error("************************************************"); error("Can not recover SICP state, incompatible version"); error("************************************************"); xfree(ver_str); free_buf(buffer); return; } safe_unpack_time(&buf_time, buffer); now = time(NULL); while (remaining_buf(buffer) > 0) { safe_unpack32(&job_id, buffer); safe_unpack32(&job_state, buffer); sicp_ptr = xmalloc(sizeof(sicp_job_t)); sicp_ptr->job_id = job_id; sicp_ptr->job_state = job_state; sicp_ptr->update_time = now; list_append(sicp_job_list, sicp_ptr); _add_job_hash(sicp_ptr); sicp_cnt++; } free_buf(buffer); info("Recovered information about %d sicp jobs", sicp_cnt); if (slurm_get_debug_flags() & DEBUG_FLAG_SICP) _log_sicp_recs(); return; unpack_error: error("Incomplete sicp data checkpoint file"); info("Recovered information about %d sicp jobs", sicp_cnt); free_buf(buffer); return; }
static int _unpack_job_start_msg(void **msg, uint16_t rpc_version, Buf buffer) { uint32_t uint32_tmp; dbd_job_start_msg_t *msg_ptr = xmalloc(sizeof(dbd_job_start_msg_t)); *msg = msg_ptr; msg_ptr->array_job_id = 0; msg_ptr->array_task_id = NO_VAL; if (rpc_version >= SLURM_19_05_PROTOCOL_VERSION) { safe_unpackstr_xmalloc(&msg_ptr->account, &uint32_tmp, buffer); safe_unpack32(&msg_ptr->alloc_nodes, buffer); safe_unpack32(&msg_ptr->array_job_id, buffer); safe_unpack32(&msg_ptr->array_max_tasks, buffer); safe_unpack32(&msg_ptr->array_task_id, buffer); safe_unpackstr_xmalloc(&msg_ptr->array_task_str, &uint32_tmp, buffer); safe_unpack32(&msg_ptr->array_task_pending, buffer); safe_unpack32(&msg_ptr->assoc_id, buffer); safe_unpackstr_xmalloc(&msg_ptr->constraints, &uint32_tmp, buffer); safe_unpack32(&msg_ptr->db_flags, buffer); safe_unpack64(&msg_ptr->db_index, buffer); safe_unpack_time(&msg_ptr->eligible_time, buffer); safe_unpack32(&msg_ptr->gid, buffer); safe_unpackstr_xmalloc(&msg_ptr->gres_alloc, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg_ptr->gres_req, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg_ptr->gres_used, &uint32_tmp, buffer); safe_unpack32(&msg_ptr->job_id, buffer); safe_unpack32(&msg_ptr->job_state, buffer); safe_unpack32(&msg_ptr->state_reason_prev, buffer); safe_unpackstr_xmalloc(&msg_ptr->mcs_label, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg_ptr->name, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg_ptr->nodes, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg_ptr->node_inx, &uint32_tmp, buffer); safe_unpack32(&msg_ptr->pack_job_id, buffer); safe_unpack32(&msg_ptr->pack_job_offset, buffer); safe_unpackstr_xmalloc(&msg_ptr->partition, &uint32_tmp, buffer); safe_unpack32(&msg_ptr->priority, buffer); safe_unpack32(&msg_ptr->qos_id, buffer); safe_unpack32(&msg_ptr->req_cpus, buffer); safe_unpack64(&msg_ptr->req_mem, buffer); safe_unpack32(&msg_ptr->resv_id, buffer); safe_unpack_time(&msg_ptr->start_time, buffer); safe_unpack_time(&msg_ptr->submit_time, buffer); safe_unpack32(&msg_ptr->timelimit, buffer); safe_unpackstr_xmalloc(&msg_ptr->tres_alloc_str, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg_ptr->tres_req_str, &uint32_tmp, buffer); safe_unpack32(&msg_ptr->uid, buffer); safe_unpackstr_xmalloc(&msg_ptr->wckey, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg_ptr->work_dir, &uint32_tmp, buffer); } else if (rpc_version >= SLURM_MIN_PROTOCOL_VERSION) { char *tmp_char = NULL; safe_unpackstr_xmalloc(&msg_ptr->account, &uint32_tmp, buffer); safe_unpack32(&msg_ptr->alloc_nodes, buffer); safe_unpack32(&msg_ptr->array_job_id, buffer); safe_unpack32(&msg_ptr->array_max_tasks, buffer); safe_unpack32(&msg_ptr->array_task_id, buffer); safe_unpackstr_xmalloc(&msg_ptr->array_task_str, &uint32_tmp, buffer); safe_unpack32(&msg_ptr->array_task_pending, buffer); safe_unpack32(&msg_ptr->assoc_id, buffer); safe_unpackstr_xmalloc(&tmp_char, &uint32_tmp, buffer); xfree(tmp_char); /* block_id */ safe_unpack64(&msg_ptr->db_index, buffer); safe_unpack_time(&msg_ptr->eligible_time, buffer); safe_unpack32(&msg_ptr->gid, buffer); safe_unpackstr_xmalloc(&msg_ptr->gres_alloc, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg_ptr->gres_req, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg_ptr->gres_used, &uint32_tmp, buffer); safe_unpack32(&msg_ptr->job_id, buffer); safe_unpack32(&msg_ptr->job_state, buffer); safe_unpackstr_xmalloc(&msg_ptr->mcs_label, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg_ptr->name, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg_ptr->nodes, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg_ptr->node_inx, &uint32_tmp, buffer); safe_unpack32(&msg_ptr->pack_job_id, buffer); safe_unpack32(&msg_ptr->pack_job_offset, buffer); safe_unpackstr_xmalloc(&msg_ptr->partition, &uint32_tmp, buffer); safe_unpack32(&msg_ptr->priority, buffer); safe_unpack32(&msg_ptr->qos_id, buffer); safe_unpack32(&msg_ptr->req_cpus, buffer); safe_unpack64(&msg_ptr->req_mem, buffer); safe_unpack32(&msg_ptr->resv_id, buffer); safe_unpack_time(&msg_ptr->start_time, buffer); safe_unpack_time(&msg_ptr->submit_time, buffer); safe_unpack32(&msg_ptr->timelimit, buffer); safe_unpackstr_xmalloc(&msg_ptr->tres_alloc_str, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg_ptr->tres_req_str, &uint32_tmp, buffer); safe_unpack32(&msg_ptr->uid, buffer); safe_unpackstr_xmalloc(&msg_ptr->wckey, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg_ptr->work_dir, &uint32_tmp, buffer); } else goto unpack_error; return SLURM_SUCCESS; unpack_error: slurmdbd_free_job_start_msg(msg_ptr); *msg = NULL; return SLURM_ERROR; }