extern int slurm_persist_conn_process_msg(slurm_persist_conn_t *persist_conn, persist_msg_t *persist_msg, char *msg_char, uint32_t msg_size, Buf *out_buffer, bool first) { int rc; Buf recv_buffer = NULL; char *comment = NULL; /* puts msg_char into buffer struct */ recv_buffer = create_buf(msg_char, msg_size); memset(persist_msg, 0, sizeof(persist_msg_t)); rc = slurm_persist_msg_unpack(persist_conn, persist_msg, recv_buffer); xfer_buf_data(recv_buffer); /* delete in_buffer struct * without xfree of msg_char * (done later in this * function). */ if (rc != SLURM_SUCCESS) { comment = xstrdup_printf("Failed to unpack %s message", slurmdbd_msg_type_2_str( persist_msg->msg_type, true)); error("CONN:%u %s", persist_conn->fd, comment); *out_buffer = slurm_persist_make_rc_msg( persist_conn, rc, comment, persist_msg->msg_type); xfree(comment); } else if (first && (persist_msg->msg_type != REQUEST_PERSIST_INIT)) { comment = "Initial RPC not REQUEST_PERSIST_INIT"; error("CONN:%u %s type (%d)", persist_conn->fd, comment, persist_msg->msg_type); rc = EINVAL; *out_buffer = slurm_persist_make_rc_msg( persist_conn, rc, comment, REQUEST_PERSIST_INIT); } else if (!first && (persist_msg->msg_type == REQUEST_PERSIST_INIT)) { comment = "REQUEST_PERSIST_INIT sent after connection established"; error("CONN:%u %s", persist_conn->fd, comment); rc = EINVAL; *out_buffer = slurm_persist_make_rc_msg( persist_conn, rc, comment, REQUEST_PERSIST_INIT); } return rc; }
/* * Pack message header. * Returns packed size * Note: asymmetric to _recv_unpack_hdr because of additional SLURM header */ static int _send_pack_hdr(void *host, void *net) { send_header_t *ptr = (send_header_t *)host; Buf packbuf = create_buf(net, sizeof(send_header_t)); int size = 0; pack32(ptr->magic, packbuf); pack32(ptr->type, packbuf); pack32(ptr->seq, packbuf); pack32(ptr->nodeid, packbuf); pack32(ptr->msgsize, packbuf); size = get_buf_offset(packbuf); xassert(size == SEND_HDR_SIZE); /* free the Buf packbuf, but not the memory to which it points */ packbuf->head = NULL; free_buf(packbuf); return size; }
sound_type snd_make_follow(sound_type sndin, double floor, double risetime, double falltime, long lookahead) { register follow_susp_type susp; rate_type sr = sndin->sr; time_type t0 = sndin->t0; sample_type scale_factor = 1.0F; time_type t0_min = t0; falloc_generic(susp, follow_susp_node, "snd_make_follow"); susp->lookahead = lookahead = lookahead + 1; susp->delaybuf = create_buf(floor, lookahead); susp->delayptr = susp->delaybuf; susp->prevptr = susp->delaybuf + lookahead - 1; *(susp->prevptr) = (sample_type) floor;; susp->endptr = susp->delaybuf + lookahead; susp->floor = floor; floor = log(floor);; susp->rise_factor = exp(- floor / (sndin->sr * risetime + 0.5)); susp->fall_factor = exp(floor / (sndin->sr * falltime + 0.5)); susp->value = susp->floor; susp->susp.fetch = follow_s_fetch; susp->terminate_cnt = UNKNOWN; /* handle unequal start times, if any */ if (t0 < sndin->t0) sound_prepend_zeros(sndin, t0); /* minimum start time over all inputs: */ t0_min = min(sndin->t0, t0); /* how many samples to toss before t0: */ susp->susp.toss_cnt = (long) ((t0 - t0_min) * sr + 0.5); if (susp->susp.toss_cnt > 0) { susp->susp.keep_fetch = susp->susp.fetch; susp->susp.fetch = follow_toss_fetch; } /* initialize susp state */ susp->susp.free = follow_free; susp->susp.sr = sr; susp->susp.t0 = t0; susp->susp.mark = follow_mark; susp->susp.print_tree = follow_print_tree; susp->susp.name = "follow"; susp->susp.log_stop_cnt = UNKNOWN; susp->susp.current = 0; susp->sndin = sndin; susp->sndin_cnt = 0; return sound_create((snd_susp_type)susp, t0, sr, scale_factor); }
csdbparser::enResult csdbparser::setup_srcfil_read(void) { CSDBP_GENERAL_CHK(); long int num; if (fseek(m_fp, m_trailer_start, SEEK_SET) != 0) {return resFILE_ACCESS_ERR;} fscanf(m_fp, "%ld\n", &num); // number of source directories while (num-- > 0) {fgets(m_buf, CSDBP_MINIM_BUFSIZE, m_fp);} fscanf(m_fp, "%ld\n", &num); // number of include directories while (num-- > 0) {fgets(m_buf, CSDBP_MINIM_BUFSIZE, m_fp);} fscanf(m_fp, "%ld\n", &num); // number of files fscanf(m_fp, "%ld\n", &num); // string size required create_buf(num); return resOK; }
void test_copy_constructor() { BuffAggr b1(5); // test for simple non-overlapping additions uint32_t off = 10; for ( uint32_t i=1; i<5; i++) { uint8_t *buf = create_buf(i * 3); b1.set_buf(buf, off, (i*3)); off += i*3; } // test for copy constructor BuffAggr b2(b1); std::cout << "B1 State: " << b1.ToString() << std::endl; std::cout << "B2 State: " << b2.ToString() << std::endl; // assignment operator test BuffAggr b3 = b1; std::cout << "B1 State: " << b1.ToString() << std::endl; std::cout << "B3 State: " << b3.ToString() << std::endl; }
static void _respond_with_error(int seq_num, char *sender_host, char *sender_ns, int status) { Buf buf = create_buf(NULL, 0); char *addr; int rc; /* rank doesn't matter here, don't send it */ _setup_header(buf, DMDX_RESPONSE, pmixp_info_namespace(), -1, status); /* generate namespace usocket name */ addr = pmixp_info_nspace_usock(sender_ns); /* send response */ rc = pmixp_server_send(sender_host, PMIXP_MSG_DMDX, seq_num, addr, get_buf_data(buf), get_buf_offset(buf), 1); if (SLURM_SUCCESS != rc) { PMIXP_ERROR("Cannot send direct modex error" " response to %s", sender_host); } xfree(addr); free_buf(buf); }
static int _unpack_buffer(void **out, uint16_t rpc_version, Buf buffer) { Buf out_ptr = NULL; char *msg = NULL; uint32_t uint32_tmp; safe_unpackmem_xmalloc(&msg, &uint32_tmp, buffer); if (!(out_ptr = create_buf(msg, uint32_tmp))) goto unpack_error; *out = out_ptr; return SLURM_SUCCESS; unpack_error: xfree(msg); slurmdbd_free_buffer(out_ptr); *out = NULL; return SLURM_ERROR; }
static int _restore_cred_state(slurm_cred_ctx_t ctx) { char *file_name = NULL, *data = NULL; uint32_t data_size = 0; int cred_fd, data_allocated, data_read = 0; Buf buffer = NULL; if ( (mkdir(conf->spooldir, 0755) < 0) && (errno != EEXIST) ) { fatal("mkdir(%s): %m", conf->spooldir); return SLURM_ERROR; } file_name = xstrdup(conf->spooldir); xstrcat(file_name, "/cred_state"); cred_fd = open(file_name, O_RDONLY); if (cred_fd < 0) goto cleanup; data_allocated = 1024; data = xmalloc(sizeof(char)*data_allocated); while ((data_read = read(cred_fd, &data[data_size], 1024)) == 1024) { data_size += data_read; data_allocated += 1024; xrealloc(data, data_allocated); } data_size += data_read; close(cred_fd); buffer = create_buf(data, data_size); slurm_cred_ctx_unpack(ctx, buffer); cleanup: xfree(file_name); if (buffer) free_buf(buffer); return SLURM_SUCCESS; }
csdbparser::enResult csdbparser::parse_headers(void) { int slen; if (m_fp == NULL) { return resFILE_NOT_OPEN; } create_buf(); m_base_path.clear(); m_calling_func.clear(); m_calling_macro.clear(); m_current_srcfile.clear(); // Read out the first line i.e. the header if (fgets(m_buf, CSDBP_MINIM_BUFSIZE, m_fp) == NULL) { return resFILE_ACCESS_ERR; } slen = strlen(chomp(m_buf)); std::string s(static_cast<const char*>(m_buf)); m_trailer_start = atol(s.substr(slen - 10).c_str()); // Remove the part of the string after the base path if (s.compare(slen - 28, strlen(CSDBP_SUP_PARAM), CSDBP_SUP_PARAM) == 0) m_buf[slen - 28] = 0; else m_buf[slen - 14] = 0; //slen = strlen(m_buf); s = static_cast<const char*>(m_buf); m_base_path = s.substr(strlen("cscope ") + strlen(CSDBP_SUPPORTED_VER) + 1); if (*(m_base_path.rbegin()) == '\"') m_base_path.erase(m_base_path.end() - 1); return resOK; }
static size_t _base_hdr_pack_full_samearch(pmixp_base_hdr_t *hdr, void *net) { int offset = 0; if (hdr->ext_flag) { hdr->msgsize += PMIXP_BASE_HDR_EXT_SIZE(pmixp_dconn_ep_len()); } WRITE_HDR_FIELD(net, offset, hdr->magic); WRITE_HDR_FIELD(net, offset, hdr->type); WRITE_HDR_FIELD(net, offset, hdr->seq); WRITE_HDR_FIELD(net, offset, hdr->nodeid); WRITE_HDR_FIELD(net, offset, hdr->msgsize); WRITE_HDR_FIELD(net, offset, hdr->ext_flag); if (hdr->ext_flag) { Buf buf = create_buf(net + offset, PMIXP_BASE_HDR_MAX); packmem(pmixp_dconn_ep_data(), pmixp_dconn_ep_len(), buf); offset += get_buf_offset(buf); buf->head = NULL; free_buf(buf); } return offset; }
/* * See process_handler_t prototype description * on the details of this function output values */ static void _direct_new_msg_conn(pmixp_conn_t *conn, void *_hdr, void *msg) { pmixp_base_hdr_t *hdr = (pmixp_base_hdr_t*)_hdr; Buf buf = create_buf(msg, hdr->msgsize); _process_server_request(hdr, buf); }
static int _handle_completion(int fd, stepd_step_rec_t *job, uid_t uid) { int rc = SLURM_SUCCESS; int errnum = 0; int first; int last; jobacctinfo_t *jobacct = NULL; int step_rc; char* buf; int len; Buf buffer; int version; /* For future use */ bool lock_set = false; debug("_handle_completion for job %u.%u", job->jobid, job->stepid); debug3(" uid = %d", uid); if (!_slurm_authorized_user(uid)) { debug("step completion message from uid %ld for job %u.%u ", (long)uid, job->jobid, job->stepid); rc = -1; errnum = EPERM; /* Send the return code and errno */ safe_write(fd, &rc, sizeof(int)); safe_write(fd, &errnum, sizeof(int)); return SLURM_SUCCESS; } safe_read(fd, &version, sizeof(int)); safe_read(fd, &first, sizeof(int)); safe_read(fd, &last, sizeof(int)); safe_read(fd, &step_rc, sizeof(int)); /* * We must not use getinfo over a pipe with slurmd here * Indeed, slurmstepd does a large use of setinfo over a pipe * with slurmd and doing the reverse can result in a deadlock * scenario with slurmd : * slurmd(lockforread,write)/slurmstepd(write,lockforread) * Do pack/unpack instead to be sure of independances of * slurmd and slurmstepd */ safe_read(fd, &len, sizeof(int)); buf = xmalloc(len); safe_read(fd, buf, len); buffer = create_buf(buf, len); jobacctinfo_unpack(&jobacct, SLURM_PROTOCOL_VERSION, PROTOCOL_TYPE_SLURM, buffer, 1); free_buf(buffer); /* * Record the completed nodes */ pthread_mutex_lock(&step_complete.lock); lock_set = true; if (! step_complete.wait_children) { rc = -1; errnum = ETIMEDOUT; /* not used anyway */ goto timeout; } /* SlurmUser or root can craft a launch without a valid credential * ("srun --no-alloc ...") and no tree information can be built * without the hostlist from the credential. */ if (step_complete.rank >= 0) { #if 0 char bits_string[128]; debug2("Setting range %d (bit %d) through %d(bit %d)", first, first-(step_complete.rank+1), last, last-(step_complete.rank+1)); bit_fmt(bits_string, sizeof(bits_string), step_complete.bits); debug2(" before bits: %s", bits_string); #endif bit_nset(step_complete.bits, first - (step_complete.rank+1), last - (step_complete.rank+1)); #if 0 bit_fmt(bits_string, sizeof(bits_string), step_complete.bits); debug2(" after bits: %s", bits_string); #endif } step_complete.step_rc = MAX(step_complete.step_rc, step_rc); /************* acct stuff ********************/ jobacctinfo_aggregate(step_complete.jobacct, jobacct); timeout: jobacctinfo_destroy(jobacct); /*********************************************/ /* Send the return code and errno, we do this within the locked * region to ensure that the stepd doesn't exit before we can * perform this send. */ safe_write(fd, &rc, sizeof(int)); safe_write(fd, &errnum, sizeof(int)); pthread_cond_signal(&step_complete.cond); pthread_mutex_unlock(&step_complete.lock); return SLURM_SUCCESS; rwfail: if (lock_set) { pthread_cond_signal(&step_complete.cond); pthread_mutex_unlock(&step_complete.lock); } return SLURM_FAILURE; }
/* * Restore global nodeinfo from a file. * * NOTE: switch_p_libstate_restore is only called by slurmctld, and only * once at start-up. We exploit this fact to spawn a pthread to * periodically call _switch_p_libstate_save(). */ extern int switch_p_libstate_restore ( char * dir_name, bool recover ) { char *data = NULL, *file_name; Buf buffer = NULL; int error_code = SLURM_SUCCESS; int state_fd, data_allocated = 0, data_read = 0, data_size = 0; DEF_TIMERS; xassert(dir_name != NULL); if (debug_flags & DEBUG_FLAG_SWITCH) { START_TIMER; info("switch_p_libstate_restore() starting"); } _spawn_state_save_thread(xstrdup(dir_name)); if (!recover) /* clean start, no recovery */ return nrt_init(); file_name = xstrdup(dir_name); xstrcat(file_name, "/nrt_state"); state_fd = open (file_name, O_RDONLY); if (state_fd >= 0) { data_allocated = NRT_BUF_SIZE; data = xmalloc(data_allocated); while (1) { data_read = read (state_fd, &data[data_size], NRT_BUF_SIZE); if ((data_read < 0) && (errno == EINTR)) continue; if (data_read < 0) { error ("Read error on %s, %m", file_name); error_code = SLURM_ERROR; break; } else if (data_read == 0) break; data_size += data_read; data_allocated += data_read; xrealloc(data, data_allocated); } close (state_fd); xfree(file_name); } else { error("No %s file for switch/nrt state recovery", file_name); error("Starting switch/nrt with clean state"); xfree(file_name); return nrt_init(); } if (error_code == SLURM_SUCCESS) { buffer = create_buf (data, data_size); data = NULL; /* now in buffer, don't xfree() */ if (nrt_libstate_restore(buffer) < 0) error_code = SLURM_ERROR; } if (buffer) free_buf(buffer); xfree(data); if (debug_flags & DEBUG_FLAG_SWITCH) { END_TIMER; info("switch_p_libstate_restore() ending %s", TIME_STR); } return error_code; }
// create list of blocks based on map strings struct block * get_mapbuf_str (char * str) { struct block * buffer = create_buf(); unsigned short l = strlen(str); unsigned short i, j; unsigned short is_specialkey = 0; char sk[MAXSC+1]; sk[0] = '\0'; for (i=0; i<l; i++) { // Agrego special keys if (str[i] == '<') { is_specialkey = 1; } else if (str[i] == '>') { is_specialkey = 0; if (! strcmp(sk, "CR")) // CR - ENTER key addto_buf(buffer, OKEY_ENTER); else if (! strcmp(sk, "TAB")) // TAB addto_buf(buffer, OKEY_TAB); else if (! strcmp(sk, "LEFT")) // LEFT addto_buf(buffer, OKEY_LEFT); else if (! strcmp(sk, "RIGHT")) // RIGHT addto_buf(buffer, OKEY_RIGHT); else if (! strcmp(sk, "DOWN")) // DOWN addto_buf(buffer, OKEY_DOWN); else if (! strcmp(sk, "UP")) // UP addto_buf(buffer, OKEY_UP); else if (! strcmp(sk, "DEL")) // DEL addto_buf(buffer, OKEY_DEL); else if (! strcmp(sk, "BS")) // BS addto_buf(buffer, OKEY_BS); else if (! strcmp(sk, "HOME")) // HOME addto_buf(buffer, OKEY_HOME); else if (! strcmp(sk, "END")) // END addto_buf(buffer, OKEY_END); else if (! strcmp(sk, "PGDOWN")) // PGDOWN addto_buf(buffer, OKEY_PGDOWN); else if (! strcmp(sk, "PGUP")) // PGUP addto_buf(buffer, OKEY_PGUP); else if (! strncmp(sk, "C-", 2) && strlen(sk) == 3 // C-X && ( (sk[2] > 64 && sk[2] < 91) || (sk[2] > 96 && sk[2] < 123)) ) addto_buf(buffer, ctl(tolower(sk[2]))); sk[0]='\0'; } else if (is_specialkey && strlen(sk) < MAXSC-1) { add_char(sk, str[i], strlen(sk)); // Agrego otros caracteres } else { addto_buf(buffer, (int) str[i]); } } // en caso de que se tenga en el buffer un string del tipo "<algo", sin la terminación ">", se inserta en el buffer if (is_specialkey && i == l) { j = strlen(sk); addto_buf(buffer, '<'); for (i=0; i<j; i++) addto_buf(buffer, (int) str[l-j+i]); } return buffer; }
extern int jobacctinfo_getinfo( jobacctinfo_t *jobacct, enum jobacct_data_type type, void *data, uint16_t protocol_version) { int rc = SLURM_SUCCESS; int *fd = (int *)data; uint32_t *uint32 = (uint32_t *) data; uint64_t *uint64 = (uint64_t *) data; double *dub = (double *) data; jobacct_id_t *jobacct_id = (jobacct_id_t *) data; struct rusage *rusage = (struct rusage *)data; struct jobacctinfo *send = (struct jobacctinfo *) data; char *buf = NULL; if (!plugin_polling) return SLURM_SUCCESS; /* jobacct needs to be allocated before this is called. */ xassert(jobacct); switch (type) { case JOBACCT_DATA_TOTAL: memcpy(send, jobacct, sizeof(struct jobacctinfo)); break; case JOBACCT_DATA_PIPE: if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) { int len; Buf buffer; safe_read(*fd, &len, sizeof(int)); buf = xmalloc(len); safe_read(*fd, buf, len); buffer = create_buf(buf, len); jobacctinfo_unpack(&jobacct, protocol_version, PROTOCOL_TYPE_SLURM, buffer, 0); free_buf(buffer); } break; case JOBACCT_DATA_RUSAGE: memset(rusage, 0, sizeof(struct rusage)); rusage->ru_utime.tv_sec = jobacct->user_cpu_sec; rusage->ru_utime.tv_usec = jobacct->user_cpu_usec; rusage->ru_stime.tv_sec = jobacct->sys_cpu_sec; rusage->ru_stime.tv_usec = jobacct->sys_cpu_usec; break; case JOBACCT_DATA_MAX_RSS: *uint64 = jobacct->max_rss; break; case JOBACCT_DATA_MAX_RSS_ID: *jobacct_id = jobacct->max_rss_id; break; case JOBACCT_DATA_TOT_RSS: *uint64 = jobacct->tot_rss; break; case JOBACCT_DATA_MAX_VSIZE: *uint64 = jobacct->max_vsize; break; case JOBACCT_DATA_MAX_VSIZE_ID: *jobacct_id = jobacct->max_vsize_id; break; case JOBACCT_DATA_TOT_VSIZE: *uint64 = jobacct->tot_vsize; break; case JOBACCT_DATA_MAX_PAGES: *uint64 = jobacct->max_pages; break; case JOBACCT_DATA_MAX_PAGES_ID: *jobacct_id = jobacct->max_pages_id; break; case JOBACCT_DATA_TOT_PAGES: *uint64 = jobacct->tot_pages; break; case JOBACCT_DATA_MIN_CPU: *uint32 = jobacct->min_cpu; break; case JOBACCT_DATA_MIN_CPU_ID: *jobacct_id = jobacct->min_cpu_id; break; case JOBACCT_DATA_TOT_CPU: *dub = jobacct->tot_cpu; break; case JOBACCT_DATA_ACT_CPUFREQ: *uint32 = jobacct->act_cpufreq; break; case JOBACCT_DATA_CONSUMED_ENERGY: *uint64 = jobacct->energy.consumed_energy; break; case JOBACCT_DATA_MAX_DISK_READ: *dub = jobacct->max_disk_read; break; case JOBACCT_DATA_MAX_DISK_READ_ID: *jobacct_id = jobacct->max_disk_read_id; break; case JOBACCT_DATA_TOT_DISK_READ: *dub = jobacct->tot_disk_read; break; case JOBACCT_DATA_MAX_DISK_WRITE: *dub = jobacct->max_disk_write; break; case JOBACCT_DATA_MAX_DISK_WRITE_ID: *jobacct_id = jobacct->max_disk_write_id; break; case JOBACCT_DATA_TOT_DISK_WRITE: *dub = jobacct->tot_disk_write; break; default: debug("jobacct_g_set_getinfo data_type %d invalid", type); } return rc; rwfail: xfree(buf); return SLURM_ERROR; }
static void _load_sicp_state(void) { int data_allocated, data_read = 0; uint32_t data_size = 0; int state_fd, sicp_cnt = 0; char *data = NULL, *state_file; struct stat stat_buf; Buf buffer; char *ver_str = NULL; uint32_t ver_str_len; uint16_t protocol_version = (uint16_t)NO_VAL; uint32_t job_id = 0; uint32_t job_state = 0; sicp_job_t *sicp_ptr; time_t buf_time, now; /* read the file */ lock_state_files(); state_file = xstrdup(slurmctld_conf.state_save_location); xstrcat(state_file, "/sicp_state"); state_fd = open(state_file, O_RDONLY); if (state_fd < 0) { error("Could not open job state file %s: %m", state_file); unlock_state_files(); xfree(state_file); return; } else if (fstat(state_fd, &stat_buf) < 0) { error("Could not stat job state file %s: %m", state_file); unlock_state_files(); (void) close(state_fd); xfree(state_file); return; } else if (stat_buf.st_size < 10) { error("Job state file %s too small", state_file); unlock_state_files(); (void) close(state_fd); xfree(state_file); return; } data_allocated = BUF_SIZE; data = xmalloc(data_allocated); while (1) { data_read = read(state_fd, &data[data_size], BUF_SIZE); if (data_read < 0) { if (errno == EINTR) continue; else { error("Read error on %s: %m", state_file); break; } } else if (data_read == 0) /* eof */ break; data_size += data_read; data_allocated += data_read; xrealloc(data, data_allocated); } close(state_fd); xfree(state_file); unlock_state_files(); buffer = create_buf(data, data_size); safe_unpackstr_xmalloc(&ver_str, &ver_str_len, buffer); debug3("Version string in sicp_state header is %s", ver_str); if (ver_str && !strcmp(ver_str, "PROTOCOL_VERSION")) safe_unpack16(&protocol_version, buffer); xfree(ver_str); if (protocol_version == (uint16_t)NO_VAL) { error("************************************************"); error("Can not recover SICP state, incompatible version"); error("************************************************"); xfree(ver_str); free_buf(buffer); return; } safe_unpack_time(&buf_time, buffer); now = time(NULL); while (remaining_buf(buffer) > 0) { safe_unpack32(&job_id, buffer); safe_unpack32(&job_state, buffer); sicp_ptr = xmalloc(sizeof(sicp_job_t)); sicp_ptr->job_id = job_id; sicp_ptr->job_state = job_state; sicp_ptr->update_time = now; list_append(sicp_job_list, sicp_ptr); _add_job_hash(sicp_ptr); sicp_cnt++; } free_buf(buffer); info("Recovered information about %d sicp jobs", sicp_cnt); if (slurm_get_debug_flags() & DEBUG_FLAG_SICP) _log_sicp_recs(); return; unpack_error: error("Incomplete sicp data checkpoint file"); info("Recovered information about %d sicp jobs", sicp_cnt); free_buf(buffer); return; }
static void _read_last_decay_ran(time_t *last_ran, time_t *last_reset) { int data_allocated, data_read = 0; uint32_t data_size = 0; int state_fd; char *data = NULL, *state_file; Buf buffer; xassert(last_ran); xassert(last_reset); (*last_ran) = 0; (*last_reset) = 0; /* read the file */ state_file = xstrdup(slurmctld_conf.state_save_location); xstrcat(state_file, "/priority_last_decay_ran"); lock_state_files(); state_fd = open(state_file, O_RDONLY); if (state_fd < 0) { info("No last decay (%s) to recover", state_file); unlock_state_files(); return; } else { data_allocated = BUF_SIZE; data = xmalloc(data_allocated); while (1) { data_read = read(state_fd, &data[data_size], BUF_SIZE); if (data_read < 0) { if (errno == EINTR) continue; else { error("Read error on %s: %m", state_file); break; } } else if (data_read == 0) /* eof */ break; data_size += data_read; data_allocated += data_read; xrealloc(data, data_allocated); } close(state_fd); } xfree(state_file); unlock_state_files(); buffer = create_buf(data, data_size); safe_unpack_time(last_ran, buffer); safe_unpack_time(last_reset, buffer); free_buf(buffer); if (priority_debug) info("Last ran decay on jobs at %ld", (long)*last_ran); return; unpack_error: error("Incomplete priority last decay file returning"); free_buf(buffer); return; }
void init_interconnect (char* config_file, unsigned int n_shader, unsigned int n_mem ) { _n_shader = n_shader; _n_mem = n_mem; if (! config_file ) { cout << "Interconnect Requires a configfile" << endl; exit (-1); } icnt_config.Parse( config_file ); net_c = icnt_config.GetInt( "network_count" ); if (net_c==2) { doub_net = true; } else if (net_c<1 || net_c>2) { cout <<net_c<<" Network_count less than 1 or more than 2 not supported."<<endl; abort(); } g_num_vcs = icnt_config.GetInt( "num_vcs" ); InitializeRoutingMap( ); InitializeTrafficMap( ); InitializeInjectionMap( ); RandomSeed( icnt_config.GetInt("seed") ); Network_gpgpu **net; traffic = new TrafficManager *[net_c]; net = new Network_gpgpu *[net_c]; for (unsigned i=0;i<net_c;i++) { string topo; icnt_config.GetStr( "topology", topo ); if ( topo == "torus" ) { net[i] = new KNCube( icnt_config, true ); } else if ( topo =="mesh" ) { net[i] = new KNCube( icnt_config, false ); } else if ( topo == "fly" ) { net[i] = new KNFly( icnt_config ); } else if ( topo == "single" ) { net[i] = new SingleNet( icnt_config ); } else { cerr << "Unknown topology " << topo << endl; exit(-1); } if ( icnt_config.GetInt( "link_failures" ) ) { net[i]->InsertRandomFaults( icnt_config ); } traffic[i] = new TrafficManager ( icnt_config, net[i], i/*id*/ ); } fixed_lat_icnt = icnt_config.GetInt( "fixed_lat_per_hop" ); if (icnt_config.GetInt( "perfect_icnt" )) { perfect_icnt = true; fixed_lat_icnt = 1; } _flit_size = icnt_config.GetInt( "flit_size" ); if (icnt_config.GetInt("ejection_buf_size")) { ejection_buffer_capacity = icnt_config.GetInt( "ejection_buf_size" ) ; } else { ejection_buffer_capacity = icnt_config.GetInt( "vc_buf_size" ); } boundary_buf_capacity = icnt_config.GetInt( "boundary_buf_size" ) ; if (icnt_config.GetInt("input_buf_size")) { input_buffer_capacity = icnt_config.GetInt("input_buf_size"); } else { input_buffer_capacity = 9; } create_buf(traffic[0]->_dests,input_buffer_capacity,icnt_config.GetInt( "num_vcs" )); MATLAB_OUTPUT = icnt_config.GetInt("MATLAB_OUTPUT"); DISPLAY_LAT_DIST = icnt_config.GetInt("DISPLAY_LAT_DIST"); DISPLAY_HOP_DIST = icnt_config.GetInt("DISPLAY_HOP_DIST"); DISPLAY_PAIR_LATENCY = icnt_config.GetInt("DISPLAY_PAIR_LATENCY"); create_node_map(n_shader,n_mem,traffic[0]->_dests, icnt_config.GetInt("use_map")); for (unsigned i=0;i<net_c;i++) { traffic[i]->_FirstStep(); } }
int switch_p_libstate_restore(char *dir_name, bool recover) { #ifdef HAVE_NATIVE_CRAY char *data = NULL, *file_name; Buf buffer = NULL; int error_code = SLURM_SUCCESS; int state_fd, data_allocated = 0, data_read = 0, data_size = 0; xassert(dir_name != NULL); if (debug_flags & DEBUG_FLAG_SWITCH) { CRAY_INFO("restore from %s, recover %d", dir_name, (int) recover); } if (!recover) /* clean start, no recovery */ return SLURM_SUCCESS; file_name = xstrdup(dir_name); xstrcat(file_name, "/switch_cray_state"); state_fd = open (file_name, O_RDONLY); if (state_fd >= 0) { data_allocated = SWITCH_BUF_SIZE; data = xmalloc(data_allocated); while (1) { data_read = read (state_fd, &data[data_size], SWITCH_BUF_SIZE); if ((data_read < 0) && (errno == EINTR)) continue; if (data_read < 0) { CRAY_ERR("Read error on %s, %m", file_name); error_code = SLURM_ERROR; break; } else if (data_read == 0) break; data_size += data_read; data_allocated += data_read; xrealloc(data, data_allocated); } close (state_fd); (void) unlink(file_name); /* One chance to recover */ xfree(file_name); } else { CRAY_ERR("No %s file for switch/cray state recovery", file_name); CRAY_ERR("Starting switch/cray with clean state"); xfree(file_name); return SLURM_SUCCESS; } if (error_code == SLURM_SUCCESS) { buffer = create_buf (data, data_size); data = NULL; /* now in buffer, don't xfree() */ _state_read_buf(buffer); } if (buffer) free_buf(buffer); xfree(data); #endif return SLURM_SUCCESS; }
static void _process_server_request(pmixp_base_hdr_t *hdr, Buf buf) { int rc; switch (hdr->type) { case PMIXP_MSG_FAN_IN: case PMIXP_MSG_FAN_OUT: { pmixp_coll_t *coll; pmixp_proc_t *procs = NULL; size_t nprocs = 0; pmixp_coll_type_t type = 0; int c_nodeid; rc = pmixp_coll_unpack_info(buf, &type, &c_nodeid, &procs, &nprocs); if (SLURM_SUCCESS != rc) { char *nodename = pmixp_info_job_host(hdr->nodeid); PMIXP_ERROR("Bad message header from node %s", nodename); xfree(nodename); goto exit; } coll = pmixp_state_coll_get(type, procs, nprocs); xfree(procs); PMIXP_DEBUG("FENCE collective message from nodeid = %u, " "type = %s, seq = %d", hdr->nodeid, ((PMIXP_MSG_FAN_IN == hdr->type) ? "fan-in" : "fan-out"), hdr->seq); rc = pmixp_coll_check_seq(coll, hdr->seq); if (PMIXP_COLL_REQ_FAILURE == rc) { /* this is unexepable event: either something went * really wrong or the state machine is incorrect. * This will 100% lead to application hang. */ char *nodename = pmixp_info_job_host(hdr->nodeid); PMIXP_ERROR("Bad collective seq. #%d from %s, current" " is %d", hdr->seq, nodename, coll->seq); pmixp_debug_hang(0); /* enable hang to debug this! */ slurm_kill_job_step(pmixp_info_jobid(), pmixp_info_stepid(), SIGKILL); xfree(nodename); break; } else if (PMIXP_COLL_REQ_SKIP == rc) { PMIXP_DEBUG("Wrong collective seq. #%d from" " nodeid %u, current is %d, skip " "this message", hdr->seq, hdr->nodeid, coll->seq); goto exit; } if (PMIXP_MSG_FAN_IN == hdr->type) { pmixp_coll_contrib_child(coll, hdr->nodeid, hdr->seq, buf); } else { pmixp_coll_contrib_parent(coll, hdr->nodeid, hdr->seq, buf); } break; } case PMIXP_MSG_DMDX: { pmixp_dmdx_process(buf, hdr->nodeid, hdr->seq); /* buf will be free'd by the PMIx callback so * protect the data by voiding the buffer. * Use the statement below instead of (buf = NULL) * to maintain incapsulation - in general `buf`is * not a pointer, but opaque type. */ buf = create_buf(NULL, 0); break; } case PMIXP_MSG_INIT_DIRECT: PMIXP_DEBUG("Direct connection init from %d", hdr->nodeid); break; #ifndef NDEBUG case PMIXP_MSG_PINGPONG: { /* if the pingpong mode was activated - * node 0 sends ping requests * and receiver assumed to respond back to node 0 */ int msize = remaining_buf(buf); if (pmixp_info_nodeid()) { pmixp_server_pp_send(0, msize); } else { if (pmixp_server_pp_same_thread()) { if (pmixp_server_pp_count() == pmixp_server_pp_warmups()) { pmixp_server_pp_start(); } if (!pmixp_server_pp_check_fini(msize)) { pmixp_server_pp_send(1, msize); } } } pmixp_server_pp_inc(); break; } #endif default: PMIXP_ERROR("Unknown message type %d", hdr->type); break; } exit: free_buf(buf); }
static void _process_server_request(recv_header_t *_hdr, void *payload) { send_header_t *hdr = &_hdr->send_hdr; char *nodename = pmixp_info_job_host(hdr->nodeid); Buf buf; int rc; buf = create_buf(payload, hdr->msgsize); switch (hdr->type) { case PMIXP_MSG_FAN_IN: case PMIXP_MSG_FAN_OUT: { pmixp_coll_t *coll; pmix_proc_t *procs = NULL; size_t nprocs = 0; pmixp_coll_type_t type = 0; rc = pmixp_coll_unpack_ranges(buf, &type, &procs, &nprocs); if (SLURM_SUCCESS != rc) { PMIXP_ERROR("Bad message header from node %s", nodename); return; } coll = pmixp_state_coll_get(type, procs, nprocs); xfree(procs); PMIXP_DEBUG("FENCE collective message from node \"%s\", type = %s, seq = %d", nodename, (PMIXP_MSG_FAN_IN == hdr->type) ? "fan-in" : "fan-out", hdr->seq); rc = pmixp_coll_check_seq(coll, hdr->seq, nodename); if (PMIXP_COLL_REQ_FAILURE == rc) { /* this is unexepable event: either something went * really wrong or the state machine is incorrect. * This will 100% lead to application hang. */ PMIXP_ERROR("Bad collective seq. #%d from %s, current is %d", hdr->seq, nodename, coll->seq); pmixp_debug_hang(0); /* enable hang to debug this! */ slurm_kill_job_step(pmixp_info_jobid(), pmixp_info_stepid(), SIGKILL); break; } else if (PMIXP_COLL_REQ_SKIP == rc) { PMIXP_DEBUG("Wrong collective seq. #%d from %s, current is %d, skip this message", hdr->seq, nodename, coll->seq); free_buf(buf); break; } if (PMIXP_MSG_FAN_IN == hdr->type) { pmixp_coll_contrib_node(coll, nodename, buf); /* we don't need this buffer anymore */ free_buf(buf); } else { pmixp_coll_bcast(coll, buf); /* buf will be free'd by the PMIx callback */ } break; } case PMIXP_MSG_DMDX: { pmixp_dmdx_process(buf, nodename, hdr->seq); break; } case PMIXP_MSG_HEALTH_CHK: { /* this is just health ping. * TODO: can we do something more sophisticated? */ free_buf(buf); break; } default: PMIXP_ERROR("Unknown message type %d", hdr->type); break; } xfree(nodename); }
extern slurmdb_federation_rec_t *fed_mgr_state_load(char *state_save_location) { Buf buffer = NULL; char *data = NULL, *state_file; time_t buf_time; uint16_t ver = 0; uint32_t data_size = 0; int state_fd; int data_allocated, data_read = 0, error_code = SLURM_SUCCESS; slurmdb_federation_rec_t *ret_fed = NULL; state_file = xstrdup_printf("%s/%s", state_save_location, FED_MGR_STATE_FILE); state_fd = open(state_file, O_RDONLY); if (state_fd < 0) { error("No fed_mgr state file (%s) to recover", state_file); xfree(state_file); return NULL; } else { data_allocated = BUF_SIZE; data = xmalloc(data_allocated); while (1) { data_read = read(state_fd, &data[data_size], BUF_SIZE); if (data_read < 0) { if (errno == EINTR) continue; else { error("Read error on %s: %m", state_file); break; } } else if (data_read == 0) /* eof */ break; data_size += data_read; data_allocated += data_read; xrealloc(data, data_allocated); } close(state_fd); } xfree(state_file); buffer = create_buf(data, data_size); safe_unpack16(&ver, buffer); debug3("Version in fed_mgr_state header is %u", ver); if (ver > SLURM_PROTOCOL_VERSION || ver < SLURM_MIN_PROTOCOL_VERSION) { error("***********************************************"); error("Can not recover fed_mgr state, incompatible version, " "got %u need > %u <= %u", ver, SLURM_MIN_PROTOCOL_VERSION, SLURM_PROTOCOL_VERSION); error("***********************************************"); free_buf(buffer); return NULL; } safe_unpack_time(&buf_time, buffer); error_code = slurmdb_unpack_federation_rec((void **)&ret_fed, ver, buffer); if (error_code != SLURM_SUCCESS) goto unpack_error; else if (!ret_fed || !ret_fed->name || !list_count(ret_fed->cluster_list)) { slurmdb_destroy_federation_rec(ret_fed); ret_fed = NULL; error("No feds retrieved"); } else { /* We want to free the connections here since they don't exist * anymore, but they were packed when state was saved. */ slurmdb_cluster_rec_t *cluster; ListIterator itr = list_iterator_create( ret_fed->cluster_list); while ((cluster = list_next(itr))) { slurm_persist_conn_destroy(cluster->fed.recv); cluster->fed.recv = NULL; slurm_persist_conn_destroy(cluster->fed.send); cluster->fed.send = NULL; } list_iterator_destroy(itr); } free_buf(buffer); return ret_fed; unpack_error: free_buf(buffer); return NULL; }
/* * load_all_front_end_state - Load the front_end node state from file, recover * on slurmctld restart. Execute this after loading the configuration * file data. Data goes into common storage. * IN state_only - if true, overwrite only front_end node state and reason * Use this to overwrite the "UNKNOWN state typically used in slurm.conf * RET 0 or error code * NOTE: READ lock_slurmctld config before entry */ extern int load_all_front_end_state(bool state_only) { #ifdef HAVE_FRONT_END char *node_name = NULL, *reason = NULL, *data = NULL, *state_file; int data_allocated, data_read = 0, error_code = 0, node_cnt = 0; uint16_t node_state; uint32_t data_size = 0, name_len; uint32_t reason_uid = NO_VAL; time_t reason_time = 0; front_end_record_t *front_end_ptr; int state_fd; time_t time_stamp; Buf buffer; char *ver_str = NULL; uint16_t protocol_version = (uint16_t) NO_VAL; /* read the file */ lock_state_files (); state_fd = _open_front_end_state_file(&state_file); if (state_fd < 0) { info ("No node state file (%s) to recover", state_file); error_code = ENOENT; } else { data_allocated = BUF_SIZE; data = xmalloc(data_allocated); while (1) { data_read = read(state_fd, &data[data_size], BUF_SIZE); if (data_read < 0) { if (errno == EINTR) continue; else { error ("Read error on %s: %m", state_file); break; } } else if (data_read == 0) /* eof */ break; data_size += data_read; data_allocated += data_read; xrealloc(data, data_allocated); } close (state_fd); } xfree (state_file); unlock_state_files (); buffer = create_buf (data, data_size); safe_unpackstr_xmalloc( &ver_str, &name_len, buffer); debug3("Version string in front_end_state header is %s", ver_str); if (ver_str) { if (!strcmp(ver_str, FRONT_END_STATE_VERSION)) { protocol_version = SLURM_PROTOCOL_VERSION; } } if (protocol_version == (uint16_t) NO_VAL) { error("*****************************************************"); error("Can not recover front_end state, version incompatible"); error("*****************************************************"); xfree(ver_str); free_buf(buffer); return EFAULT; } xfree(ver_str); safe_unpack_time(&time_stamp, buffer); while (remaining_buf (buffer) > 0) { uint16_t base_state; if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { safe_unpackstr_xmalloc (&node_name, &name_len, buffer); safe_unpack16 (&node_state, buffer); safe_unpackstr_xmalloc (&reason, &name_len, buffer); safe_unpack_time (&reason_time, buffer); safe_unpack32 (&reason_uid, buffer); base_state = node_state & NODE_STATE_BASE; } else goto unpack_error; /* validity test as possible */ /* find record and perform update */ front_end_ptr = find_front_end_record(node_name); if (front_end_ptr == NULL) { error("Front_end node %s has vanished from " "configuration", node_name); } else if (state_only) { uint16_t orig_flags; orig_flags = front_end_ptr->node_state & NODE_STATE_FLAGS; node_cnt++; if (IS_NODE_UNKNOWN(front_end_ptr)) { if (base_state == NODE_STATE_DOWN) { orig_flags &= (~NODE_STATE_COMPLETING); front_end_ptr->node_state = NODE_STATE_DOWN | orig_flags; } if (node_state & NODE_STATE_DRAIN) { front_end_ptr->node_state |= NODE_STATE_DRAIN; } if (node_state & NODE_STATE_FAIL) { front_end_ptr->node_state |= NODE_STATE_FAIL; } } if (front_end_ptr->reason == NULL) { front_end_ptr->reason = reason; reason = NULL; /* Nothing to free */ front_end_ptr->reason_time = reason_time; front_end_ptr->reason_uid = reason_uid; } } else { node_cnt++; front_end_ptr->node_state = node_state; xfree(front_end_ptr->reason); front_end_ptr->reason = reason; reason = NULL; /* Nothing to free */ front_end_ptr->reason_time = reason_time; front_end_ptr->reason_uid = reason_uid; front_end_ptr->last_response = (time_t) 0; } xfree(node_name); xfree(reason); } fini: info("Recovered state of %d front_end nodes", node_cnt); free_buf (buffer); return error_code; unpack_error: error("Incomplete front_end node data checkpoint file"); error_code = EFAULT; xfree (node_name); xfree(reason); goto fini; #else return 0; #endif }
extern Buf slurm_persist_recv_msg(slurm_persist_conn_t *persist_conn) { uint32_t msg_size, nw_size; char *msg; ssize_t msg_read, offset; Buf buffer; xassert(persist_conn); if (persist_conn->fd < 0) return NULL; if (!_conn_readable(persist_conn)) goto endit; msg_read = read(persist_conn->fd, &nw_size, sizeof(nw_size)); if (msg_read != sizeof(nw_size)) goto endit; msg_size = ntohl(nw_size); /* We don't error check for an upper limit here * since size could possibly be massive */ if (msg_size < 2) { error("Persistent Conn: Invalid msg_size (%u)", msg_size); goto endit; } msg = xmalloc(msg_size); offset = 0; while (msg_size > offset) { if (!_conn_readable(persist_conn)) break; /* problem with this socket */ msg_read = read(persist_conn->fd, (msg + offset), (msg_size - offset)); if (msg_read <= 0) { error("Persistent Conn: read: %m"); break; } offset += msg_read; } if (msg_size != offset) { if (!(*persist_conn->shutdown)) { error("Persistent Conn: only read %zd of %d bytes", offset, msg_size); } /* else in shutdown mode */ xfree(msg); goto endit; } buffer = create_buf(msg, msg_size); return buffer; endit: /* Close it since we abandoned it. If the connection does still exist * on the other end we can't rely on it after this point since we didn't * listen long enough for this response. */ if (!(*persist_conn->shutdown) && persist_conn->flags & PERSIST_FLAG_RECONNECT) slurm_persist_conn_reopen(persist_conn, true); return NULL; }
/* * This function handles the initialization information from slurmd * sent by _send_slurmstepd_init() in src/slurmd/slurmd/req.c. */ static int _init_from_slurmd(int sock, char **argv, slurm_addr_t **_cli, slurm_addr_t **_self, slurm_msg_t **_msg, int *_ngids, gid_t **_gids) { char *incoming_buffer = NULL; Buf buffer; int step_type; int len; slurm_addr_t *cli = NULL; slurm_addr_t *self = NULL; slurm_msg_t *msg = NULL; int ngids = 0; gid_t *gids = NULL; uint16_t port; char buf[16]; log_options_t lopts = LOG_OPTS_INITIALIZER; log_init(argv[0], lopts, LOG_DAEMON, NULL); /* receive job type from slurmd */ safe_read(sock, &step_type, sizeof(int)); debug3("step_type = %d", step_type); /* receive reverse-tree info from slurmd */ pthread_mutex_lock(&step_complete.lock); safe_read(sock, &step_complete.rank, sizeof(int)); safe_read(sock, &step_complete.parent_rank, sizeof(int)); safe_read(sock, &step_complete.children, sizeof(int)); safe_read(sock, &step_complete.depth, sizeof(int)); safe_read(sock, &step_complete.max_depth, sizeof(int)); safe_read(sock, &step_complete.parent_addr, sizeof(slurm_addr_t)); step_complete.bits = bit_alloc(step_complete.children); step_complete.jobacct = jobacct_gather_g_create(NULL); pthread_mutex_unlock(&step_complete.lock); /* receive conf from slurmd */ if ((conf = read_slurmd_conf_lite (sock)) == NULL) fatal("Failed to read conf from slurmd"); log_alter(conf->log_opts, 0, conf->logfile); debug2("debug level is %d.", conf->debug_level); /* acct info */ jobacct_gather_g_startpoll(conf->job_acct_gather_freq); switch_g_slurmd_step_init(); slurm_get_ip_str(&step_complete.parent_addr, &port, buf, 16); debug3("slurmstepd rank %d, parent address = %s, port = %u", step_complete.rank, buf, port); /* receive cli from slurmd */ safe_read(sock, &len, sizeof(int)); incoming_buffer = xmalloc(sizeof(char) * len); safe_read(sock, incoming_buffer, len); buffer = create_buf(incoming_buffer,len); cli = xmalloc(sizeof(slurm_addr_t)); if(slurm_unpack_slurm_addr_no_alloc(cli, buffer) == SLURM_ERROR) fatal("slurmstepd: problem with unpack of slurmd_conf"); free_buf(buffer); /* receive self from slurmd */ safe_read(sock, &len, sizeof(int)); if (len > 0) { /* receive packed self from main slurmd */ incoming_buffer = xmalloc(sizeof(char) * len); safe_read(sock, incoming_buffer, len); buffer = create_buf(incoming_buffer,len); self = xmalloc(sizeof(slurm_addr_t)); if (slurm_unpack_slurm_addr_no_alloc(self, buffer) == SLURM_ERROR) { fatal("slurmstepd: problem with unpack of " "slurmd_conf"); } free_buf(buffer); } /* Receive GRES information from slurmd */ gres_plugin_recv_stepd(sock); /* receive req from slurmd */ safe_read(sock, &len, sizeof(int)); incoming_buffer = xmalloc(sizeof(char) * len); safe_read(sock, incoming_buffer, len); buffer = create_buf(incoming_buffer,len); msg = xmalloc(sizeof(slurm_msg_t)); slurm_msg_t_init(msg); switch(step_type) { case LAUNCH_BATCH_JOB: msg->msg_type = REQUEST_BATCH_JOB_LAUNCH; break; case LAUNCH_TASKS: msg->msg_type = REQUEST_LAUNCH_TASKS; break; default: fatal("Unrecognized launch RPC"); break; } if(unpack_msg(msg, buffer) == SLURM_ERROR) fatal("slurmstepd: we didn't unpack the request correctly"); free_buf(buffer); /* receive cached group ids array for the relevant uid */ safe_read(sock, &ngids, sizeof(int)); if (ngids > 0) { int i; uint32_t tmp32; gids = (gid_t *)xmalloc(sizeof(gid_t) * ngids); for (i = 0; i < ngids; i++) { safe_read(sock, &tmp32, sizeof(uint32_t)); gids[i] = (gid_t)tmp32; debug2("got gid %d", gids[i]); } } *_cli = cli; *_self = self; *_msg = msg; *_ngids = ngids; *_gids = gids; return 1; rwfail: fatal("Error reading initialization data from slurmd"); exit(1); }
/* * load_all_part_state - load the partition state from file, recover on * slurmctld restart. execute this after loading the configuration * file data. * NOTE: READ lock_slurmctld config before entry */ int load_all_part_state(void) { char *part_name = NULL, *allow_groups = NULL, *nodes = NULL; char *state_file, *data = NULL; uint32_t max_time, default_time, max_nodes, min_nodes; uint32_t max_cpus_per_node = INFINITE, grace_time = 0; time_t time; uint16_t flags; uint16_t max_share, preempt_mode, priority, state_up, cr_type; struct part_record *part_ptr; uint32_t data_size = 0, name_len; int data_allocated, data_read = 0, error_code = 0, part_cnt = 0; int state_fd; Buf buffer; char *ver_str = NULL; char* allow_alloc_nodes = NULL; uint16_t protocol_version = (uint16_t)NO_VAL; char* alternate = NULL; /* read the file */ lock_state_files(); state_fd = _open_part_state_file(&state_file); if (state_fd < 0) { info("No partition state file (%s) to recover", state_file); error_code = ENOENT; } else { data_allocated = BUF_SIZE; data = xmalloc(data_allocated); while (1) { data_read = read(state_fd, &data[data_size], BUF_SIZE); if (data_read < 0) { if (errno == EINTR) continue; else { error("Read error on %s: %m", state_file); break; } } else if (data_read == 0) /* eof */ break; data_size += data_read; data_allocated += data_read; xrealloc(data, data_allocated); } close(state_fd); } xfree(state_file); unlock_state_files(); buffer = create_buf(data, data_size); safe_unpackstr_xmalloc( &ver_str, &name_len, buffer); debug3("Version string in part_state header is %s", ver_str); if (ver_str) { if (!strcmp(ver_str, PART_STATE_VERSION)) { protocol_version = SLURM_PROTOCOL_VERSION; } else if (!strcmp(ver_str, PART_2_5_STATE_VERSION)) { protocol_version = SLURM_2_5_PROTOCOL_VERSION; } } if (protocol_version == (uint16_t)NO_VAL) { error("**********************************************************"); error("Can not recover partition state, data version incompatible"); error("**********************************************************"); xfree(ver_str); free_buf(buffer); return EFAULT; } xfree(ver_str); safe_unpack_time(&time, buffer); while (remaining_buf(buffer) > 0) { if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { safe_unpackstr_xmalloc(&part_name, &name_len, buffer); safe_unpack32(&grace_time, buffer); safe_unpack32(&max_time, buffer); safe_unpack32(&default_time, buffer); safe_unpack32(&max_cpus_per_node, buffer); safe_unpack32(&max_nodes, buffer); safe_unpack32(&min_nodes, buffer); safe_unpack16(&flags, buffer); safe_unpack16(&max_share, buffer); safe_unpack16(&preempt_mode, buffer); safe_unpack16(&priority, buffer); if (priority > part_max_priority) part_max_priority = priority; safe_unpack16(&state_up, buffer); safe_unpack16(&cr_type, buffer); safe_unpackstr_xmalloc(&allow_groups, &name_len, buffer); safe_unpackstr_xmalloc(&allow_alloc_nodes, &name_len, buffer); safe_unpackstr_xmalloc(&alternate, &name_len, buffer); safe_unpackstr_xmalloc(&nodes, &name_len, buffer); if ((flags & PART_FLAG_DEFAULT_CLR) || (flags & PART_FLAG_HIDDEN_CLR) || (flags & PART_FLAG_NO_ROOT_CLR) || (flags & PART_FLAG_ROOT_ONLY_CLR) || (flags & PART_FLAG_REQ_RESV_CLR)) { error("Invalid data for partition %s: flags=%u", part_name, flags); error_code = EINVAL; } } else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { safe_unpackstr_xmalloc(&part_name, &name_len, buffer); safe_unpack32(&grace_time, buffer); safe_unpack32(&max_time, buffer); safe_unpack32(&default_time, buffer); safe_unpack32(&max_nodes, buffer); safe_unpack32(&min_nodes, buffer); safe_unpack16(&flags, buffer); safe_unpack16(&max_share, buffer); safe_unpack16(&preempt_mode, buffer); safe_unpack16(&priority, buffer); if (priority > part_max_priority) part_max_priority = priority; cr_type = 0; /* Default value */ safe_unpack16(&state_up, buffer); safe_unpackstr_xmalloc(&allow_groups, &name_len, buffer); safe_unpackstr_xmalloc(&allow_alloc_nodes, &name_len, buffer); safe_unpackstr_xmalloc(&alternate, &name_len, buffer); safe_unpackstr_xmalloc(&nodes, &name_len, buffer); if ((flags & PART_FLAG_DEFAULT_CLR) || (flags & PART_FLAG_HIDDEN_CLR) || (flags & PART_FLAG_NO_ROOT_CLR) || (flags & PART_FLAG_ROOT_ONLY_CLR) || (flags & PART_FLAG_REQ_RESV_CLR)) { error("Invalid data for partition %s: flags=%u", part_name, flags); error_code = EINVAL; } } else { error("load_all_part_state: protocol_version " "%hu not supported", protocol_version); goto unpack_error; } /* validity test as possible */ if (state_up > PARTITION_UP) { error("Invalid data for partition %s: state_up=%u", part_name, state_up); error_code = EINVAL; } if (error_code) { error("No more partition data will be processed from " "the checkpoint file"); xfree(allow_groups); xfree(allow_alloc_nodes); xfree(alternate); xfree(part_name); xfree(nodes); error_code = EINVAL; break; } /* find record and perform update */ part_ptr = list_find_first(part_list, &list_find_part, part_name); part_cnt++; if (part_ptr == NULL) { info("load_all_part_state: partition %s missing from " "configuration file", part_name); part_ptr = create_part_record(); xfree(part_ptr->name); part_ptr->name = xstrdup(part_name); } part_ptr->flags = flags; if (part_ptr->flags & PART_FLAG_DEFAULT) { xfree(default_part_name); default_part_name = xstrdup(part_name); default_part_loc = part_ptr; } part_ptr->max_time = max_time; part_ptr->default_time = default_time; part_ptr->max_cpus_per_node = max_cpus_per_node; part_ptr->max_nodes = max_nodes; part_ptr->max_nodes_orig = max_nodes; part_ptr->min_nodes = min_nodes; part_ptr->min_nodes_orig = min_nodes; part_ptr->max_share = max_share; part_ptr->grace_time = grace_time; if (preempt_mode != (uint16_t) NO_VAL) part_ptr->preempt_mode = preempt_mode; part_ptr->priority = priority; part_ptr->state_up = state_up; part_ptr->cr_type = cr_type; xfree(part_ptr->allow_groups); part_ptr->allow_groups = allow_groups; xfree(part_ptr->allow_alloc_nodes); part_ptr->allow_alloc_nodes = allow_alloc_nodes; xfree(part_ptr->alternate); part_ptr->alternate = alternate; xfree(part_ptr->nodes); part_ptr->nodes = nodes; xfree(part_name); } info("Recovered state of %d partitions", part_cnt); free_buf(buffer); return error_code; unpack_error: error("Incomplete partition data checkpoint file"); info("Recovered state of %d partitions", part_cnt); free_buf(buffer); return EFAULT; }
int main (int argc, char *argv[]) { Buf buffer; uint16_t test16 = 1234, out16; uint32_t test32 = 5678, out32, byte_cnt; char testbytes[] = "TEST BYTES", *outbytes; char teststring[] = "TEST STRING", *outstring = NULL; char *nullstr = NULL; char *data; int data_size; long double test_double = 1340664754944.2132312, test_double2; uint64_t test64; buffer = init_buf (0); pack16(test16, buffer); pack32(test32, buffer); pack64((uint64_t)test_double, buffer); packstr(testbytes, buffer); packstr(teststring, buffer); packstr(nullstr, buffer); packstr("literal", buffer); packstr("", buffer); data_size = get_buf_offset(buffer); printf("wrote %d bytes\n", data_size); /* Pull data off old buffer, destroy it, and create a new one */ data = xfer_buf_data(buffer); buffer = create_buf(data, data_size); unpack16(&out16, buffer); TEST(out16 != test16, "un/pack16"); unpack32(&out32, buffer); TEST(out32 != test32, "un/pack32"); unpack64(&test64, buffer); test_double2 = (long double)test64; TEST((uint64_t)test_double2 != (uint64_t)test_double, "un/pack double as a uint64"); /* info("Original\t %Lf", test_double); */ /* info("uint64\t %ld", test64); */ /* info("converted LD\t %Lf", test_double2); */ unpackstr_ptr(&outbytes, &byte_cnt, buffer); TEST( ( strcmp(testbytes, outbytes) != 0 ) , "un/packstr_ptr"); unpackstr_xmalloc(&outstring, &byte_cnt, buffer); TEST(strcmp(teststring, outstring) != 0, "un/packstr_xmalloc"); xfree(outstring); unpackstr_xmalloc(&nullstr, &byte_cnt, buffer); TEST(nullstr != NULL, "un/packstr of null string."); unpackstr_xmalloc(&outstring, &byte_cnt, buffer); TEST(strcmp("literal", outstring) != 0, "un/packstr of string literal"); xfree(outstring); unpackstr_xmalloc(&outstring, &byte_cnt, buffer); TEST(strcmp("", outstring) != 0, "un/packstr of string \"\" "); xfree(outstring); free_buf(buffer); totals(); return failed; }