zbar_symbol_type_t _zbar_decode_i25 (zbar_decoder_t *dcode) { i25_decoder_t *dcode25 = &dcode->i25; /* update latest character width */ dcode25->s10 -= get_width(dcode, 10); dcode25->s10 += get_width(dcode, 0); if(dcode25->character < 0 && !i25_decode_start(dcode)) return(ZBAR_NONE); if(--dcode25->element == 6 - dcode25->direction) return(i25_decode_end(dcode)); else if(dcode25->element) return(ZBAR_NONE); /* FIXME check current character width against previous */ dcode25->width = dcode25->s10; dprintf(2, " i25[%c%02d+%x]", (dcode25->direction) ? '<' : '>', dcode25->character, dcode25->element); /* lock shared resources */ if(!dcode25->character && get_lock(dcode, ZBAR_I25)) { dcode25->character = -1; dprintf(2, " [locked %d]\n", dcode->lock); return(ZBAR_PARTIAL); } unsigned char c = i25_decode10(dcode, 1); dprintf(2, " c=%x", c); if(c > 9 || ((dcode25->character >= BUFFER_MIN) && size_buf(dcode, dcode25->character + 2))) { dprintf(2, (c > 9) ? " [aborted]\n" : " [overflow]\n"); dcode->lock = 0; dcode25->character = -1; return(ZBAR_NONE); } dcode->buf[dcode25->character++] = c + '0'; c = i25_decode10(dcode, 0); dprintf(2, " c=%x", c); if(c > 9) { dprintf(2, " [aborted]\n"); dcode->lock = 0; dcode25->character = -1; return(ZBAR_NONE); } else { dprintf(2, "\n"); } dcode->buf[dcode25->character++] = c + '0'; dcode25->element = 10; return((dcode25->character == 2) ? ZBAR_PARTIAL : ZBAR_NONE); }
inline static int _pmixp_coll_contrib(pmixp_coll_ring_ctx_t *coll_ctx, int contrib_id, uint32_t hop, char *data, size_t size) { pmixp_coll_t *coll = _ctx_get_coll(coll_ctx); char *data_ptr = NULL; int ret; /* change the state */ coll->ts = time(NULL); /* save contribution */ if (!size_buf(coll_ctx->ring_buf)) { grow_buf(coll_ctx->ring_buf, size * coll->peers_cnt); } else if(remaining_buf(coll_ctx->ring_buf) < size) { uint32_t new_size = size_buf(coll_ctx->ring_buf) + size * _ring_remain_contrib(coll_ctx); grow_buf(coll_ctx->ring_buf, new_size); } grow_buf(coll_ctx->ring_buf, size); data_ptr = get_buf_data(coll_ctx->ring_buf) + get_buf_offset(coll_ctx->ring_buf); memcpy(data_ptr, data, size); set_buf_offset(coll_ctx->ring_buf, get_buf_offset(coll_ctx->ring_buf) + size); /* check for ring is complete */ if (contrib_id != _ring_next_id(coll)) { /* forward data to the next node */ ret = _ring_forward_data(coll_ctx, contrib_id, hop, data_ptr, size); if (ret) { PMIXP_ERROR("Cannot forward ring data"); return SLURM_ERROR; } } return SLURM_SUCCESS; }
static int copy_buf(char *buf, char **line, int is_reading) { char endbuf; int buffsize; int linesize; buffsize = size_buf(buf); linesize = ft_strlen(*line); if (!(*line = ft_memrealloc(*line, linesize, linesize + buffsize + 1))) return (-1); if (!ft_memcpy(*line + linesize, buf, buffsize)) return (-1); if (buffsize != GNL_BUFF_SIZE) { endbuf = buf[buffsize]; if (!ft_memmove(buf, buf + buffsize + 1, GNL_BUFF_SIZE - buffsize - 1) || !ft_memset(buf + GNL_BUFF_SIZE - buffsize - 1, 0, buffsize + 1)) return (-1); return ((is_reading || *buf || endbuf == '\n') ? 1 : 0); } return ((!ft_memset(buf, 0, buffsize)) ? -1 : 0); }
static int _unpack_step_start_msg(dbd_step_start_msg_t **msg, uint16_t rpc_version, Buf buffer) { uint32_t uint32_tmp = 0; dbd_step_start_msg_t *msg_ptr = xmalloc(sizeof(dbd_step_start_msg_t)); *msg = msg_ptr; if (rpc_version >= SLURM_MIN_PROTOCOL_VERSION) { safe_unpack32(&msg_ptr->assoc_id, buffer); safe_unpack64(&msg_ptr->db_index, buffer); safe_unpack32(&msg_ptr->job_id, buffer); safe_unpackstr_xmalloc(&msg_ptr->name, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg_ptr->nodes, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&msg_ptr->node_inx, &uint32_tmp, buffer); safe_unpack32(&msg_ptr->node_cnt, buffer); safe_unpack_time(&msg_ptr->start_time, buffer); safe_unpack_time(&msg_ptr->job_submit_time, buffer); safe_unpack32(&msg_ptr->req_cpufreq_min, buffer); safe_unpack32(&msg_ptr->req_cpufreq_max, buffer); safe_unpack32(&msg_ptr->req_cpufreq_gov, buffer); safe_unpack32(&msg_ptr->step_id, buffer); safe_unpack32(&msg_ptr->task_dist, buffer); safe_unpack32(&msg_ptr->total_tasks, buffer); safe_unpackstr_xmalloc(&msg_ptr->tres_alloc_str, &uint32_tmp, buffer); } else goto unpack_error; return SLURM_SUCCESS; unpack_error: debug2("slurmdbd_unpack_step_start_msg:" "unpack_error: size_buf(buffer) %u", size_buf(buffer)); slurmdbd_free_step_start_msg(msg_ptr); *msg = NULL; return SLURM_ERROR; }
static int _unpack_step_complete_msg(dbd_step_comp_msg_t **msg, uint16_t rpc_version, Buf buffer) { uint32_t uint32_tmp; dbd_step_comp_msg_t *msg_ptr = xmalloc(sizeof(dbd_step_comp_msg_t)); *msg = msg_ptr; if (rpc_version >= SLURM_MIN_PROTOCOL_VERSION) { safe_unpack32(&msg_ptr->assoc_id, buffer); safe_unpack64(&msg_ptr->db_index, buffer); safe_unpack_time(&msg_ptr->end_time, buffer); safe_unpack32(&msg_ptr->exit_code, buffer); jobacctinfo_unpack((struct jobacctinfo **)&msg_ptr->jobacct, rpc_version, PROTOCOL_TYPE_DBD, buffer, 1); safe_unpack32(&msg_ptr->job_id, buffer); safe_unpack_time(&msg_ptr->job_submit_time, buffer); safe_unpackstr_xmalloc(&msg_ptr->job_tres_alloc_str, &uint32_tmp, buffer); safe_unpack32(&msg_ptr->req_uid, buffer); safe_unpack_time(&msg_ptr->start_time, buffer); safe_unpack16(&msg_ptr->state, buffer); safe_unpack32(&msg_ptr->step_id, buffer); safe_unpack32(&msg_ptr->total_tasks, buffer); } else goto unpack_error; return SLURM_SUCCESS; unpack_error: debug2("slurmdbd_unpack_step_complete_msg:" "unpack_error: size_buf(buffer) %u", size_buf(buffer)); slurmdbd_free_step_complete_msg(msg_ptr); *msg = NULL; return SLURM_ERROR; }
extern int jobacctinfo_unpack(jobacctinfo_t **jobacct, uint16_t rpc_version, uint16_t protocol_type, Buf buffer, bool alloc) { uint32_t uint32_tmp; uint8_t uint8_tmp; if (jobacct_gather_init() < 0) return SLURM_ERROR; if (rpc_version >= SLURM_MIN_PROTOCOL_VERSION) { safe_unpack8(&uint8_tmp, buffer); if (uint8_tmp == (uint8_t) 0) return SLURM_SUCCESS; if (alloc) *jobacct = xmalloc(sizeof(struct jobacctinfo)); safe_unpack32(&uint32_tmp, buffer); (*jobacct)->user_cpu_sec = uint32_tmp; safe_unpack32(&uint32_tmp, buffer); (*jobacct)->user_cpu_usec = uint32_tmp; safe_unpack32(&uint32_tmp, buffer); (*jobacct)->sys_cpu_sec = uint32_tmp; safe_unpack32(&uint32_tmp, buffer); (*jobacct)->sys_cpu_usec = uint32_tmp; safe_unpack64(&(*jobacct)->max_vsize, buffer); safe_unpack64(&(*jobacct)->tot_vsize, buffer); safe_unpack64(&(*jobacct)->max_rss, buffer); safe_unpack64(&(*jobacct)->tot_rss, buffer); safe_unpack64(&(*jobacct)->max_pages, buffer); safe_unpack64(&(*jobacct)->tot_pages, buffer); safe_unpack32(&(*jobacct)->min_cpu, buffer); safe_unpackdouble(&(*jobacct)->tot_cpu, buffer); safe_unpack32(&(*jobacct)->act_cpufreq, buffer); safe_unpack64(&(*jobacct)->energy.consumed_energy, buffer); safe_unpackdouble(&(*jobacct)->max_disk_read, buffer); safe_unpackdouble(&(*jobacct)->tot_disk_read, buffer); safe_unpackdouble(&(*jobacct)->max_disk_write, buffer); safe_unpackdouble(&(*jobacct)->tot_disk_write, buffer); if (_unpack_jobacct_id(&(*jobacct)->max_vsize_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->max_rss_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->max_pages_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->min_cpu_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->max_disk_read_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->max_disk_write_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; } else { info("jobacctinfo_unpack version %u not supported", rpc_version); return SLURM_ERROR; } return SLURM_SUCCESS; unpack_error: debug2("jobacctinfo_unpack: unpack_error: size_buf(buffer) %u", size_buf(buffer)); if (alloc) xfree(*jobacct); return SLURM_ERROR; }
extern int jobacctinfo_unpack(jobacctinfo_t **jobacct, uint16_t rpc_version, uint16_t protocol_type, Buf buffer) { uint32_t uint32_tmp; if (!plugin_polling && (protocol_type != PROTOCOL_TYPE_DBD)) return SLURM_SUCCESS; /* The function can take calls from both DBD and from regular * SLURM functions. We choose to standardize on using the * SLURM_PROTOCOL_VERSION here so if PROTOCOL_TYPE_DBD comes * in we need to translate the DBD rpc_version to use the * SLURM protocol_version. * * If this function ever changes make sure the * slurmdbd_translate_rpc function has been updated with the * new protocol version. */ if (protocol_type == PROTOCOL_TYPE_DBD) rpc_version = slurmdbd_translate_rpc(rpc_version); if (rpc_version >= SLURM_2_6_PROTOCOL_VERSION) { *jobacct = xmalloc(sizeof(struct jobacctinfo)); safe_unpack32(&uint32_tmp, buffer); (*jobacct)->user_cpu_sec = uint32_tmp; safe_unpack32(&uint32_tmp, buffer); (*jobacct)->user_cpu_usec = uint32_tmp; safe_unpack32(&uint32_tmp, buffer); (*jobacct)->sys_cpu_sec = uint32_tmp; safe_unpack32(&uint32_tmp, buffer); (*jobacct)->sys_cpu_usec = uint32_tmp; safe_unpack32(&(*jobacct)->max_vsize, buffer); safe_unpack32(&(*jobacct)->tot_vsize, buffer); safe_unpack32(&(*jobacct)->max_rss, buffer); safe_unpack32(&(*jobacct)->tot_rss, buffer); safe_unpack32(&(*jobacct)->max_pages, buffer); safe_unpack32(&(*jobacct)->tot_pages, buffer); safe_unpack32(&(*jobacct)->min_cpu, buffer); safe_unpack32(&(*jobacct)->tot_cpu, buffer); safe_unpack32(&(*jobacct)->act_cpufreq, buffer); safe_unpack32(&(*jobacct)->energy.consumed_energy, buffer); safe_unpackdouble(&(*jobacct)->max_disk_read, buffer); safe_unpackdouble(&(*jobacct)->tot_disk_read, buffer); safe_unpackdouble(&(*jobacct)->max_disk_write, buffer); safe_unpackdouble(&(*jobacct)->tot_disk_write, buffer); if (_unpack_jobacct_id(&(*jobacct)->max_vsize_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->max_rss_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->max_pages_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->min_cpu_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->max_disk_read_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->max_disk_write_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; } else if (rpc_version >= SLURM_2_5_PROTOCOL_VERSION) { *jobacct = xmalloc(sizeof(struct jobacctinfo)); safe_unpack32(&uint32_tmp, buffer); (*jobacct)->user_cpu_sec = uint32_tmp; safe_unpack32(&uint32_tmp, buffer); (*jobacct)->user_cpu_usec = uint32_tmp; safe_unpack32(&uint32_tmp, buffer); (*jobacct)->sys_cpu_sec = uint32_tmp; safe_unpack32(&uint32_tmp, buffer); (*jobacct)->sys_cpu_usec = uint32_tmp; safe_unpack32(&(*jobacct)->max_vsize, buffer); safe_unpack32(&(*jobacct)->tot_vsize, buffer); safe_unpack32(&(*jobacct)->max_rss, buffer); safe_unpack32(&(*jobacct)->tot_rss, buffer); safe_unpack32(&(*jobacct)->max_pages, buffer); safe_unpack32(&(*jobacct)->tot_pages, buffer); safe_unpack32(&(*jobacct)->min_cpu, buffer); safe_unpack32(&(*jobacct)->tot_cpu, buffer); safe_unpack32(&(*jobacct)->act_cpufreq, buffer); safe_unpack32(&(*jobacct)->energy.consumed_energy, buffer); if (_unpack_jobacct_id(&(*jobacct)->max_vsize_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->max_rss_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->max_pages_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->min_cpu_id, rpc_version, buffer) != SLURM_SUCCESS) goto unpack_error; } else { *jobacct = xmalloc(sizeof(struct jobacctinfo)); safe_unpack32(&uint32_tmp, buffer); (*jobacct)->user_cpu_sec = uint32_tmp; safe_unpack32(&uint32_tmp, buffer); (*jobacct)->user_cpu_usec = uint32_tmp; safe_unpack32(&uint32_tmp, buffer); (*jobacct)->sys_cpu_sec = uint32_tmp; safe_unpack32(&uint32_tmp, buffer); (*jobacct)->sys_cpu_usec = uint32_tmp; safe_unpack32(&(*jobacct)->max_vsize, buffer); safe_unpack32(&(*jobacct)->tot_vsize, buffer); safe_unpack32(&(*jobacct)->max_rss, buffer); safe_unpack32(&(*jobacct)->tot_rss, buffer); safe_unpack32(&(*jobacct)->max_pages, buffer); safe_unpack32(&(*jobacct)->tot_pages, buffer); safe_unpack32(&(*jobacct)->min_cpu, buffer); safe_unpack32(&(*jobacct)->tot_cpu, buffer); if (_unpack_jobacct_id(&(*jobacct)->max_vsize_id, rpc_version, buffer)!= SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->max_rss_id, rpc_version, buffer)!= SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->max_pages_id, rpc_version, buffer)!= SLURM_SUCCESS) goto unpack_error; if (_unpack_jobacct_id(&(*jobacct)->min_cpu_id, rpc_version, buffer)!= SLURM_SUCCESS) goto unpack_error; } return SLURM_SUCCESS; unpack_error: debug2("jobacctinfo_unpack: unpack_error: size_buf(buffer) %u", size_buf(buffer)); xfree(*jobacct); return SLURM_ERROR; }
/* we get a ring_in message from each child (stepd and application tasks), * once we've gotten a message from each child, we send a ring_in message * to our parent * ring_id - index of child (all app procs first, followed by stepds) * count - count value from child * left - left value from child * right - right value from child * * upon receiving ring_in messages from all children, we send a ring_in * message to our parent consisting of: * rank = our rank in stepd tree (so parent knows which child msg is from) * count = sum of counts from all children * left = left value from leftmost child * right = right value from rightmost child */ int pmix_ring_in(int ring_id, int count, char* left, char* right) { int i; int rc = SLURM_SUCCESS; debug3("mpi/pmi2: in pmix_ring_in rank=%d ring_id=%d count=%d left=%s right=%s", pmix_stepd_rank, ring_id, count, left, right); /* record values from child's ring_in message */ pmix_ring_msg* msg = &pmix_ring_msgs[ring_id]; msg->count = count; msg->left = xstrdup(left); msg->right = xstrdup(right); /* update our running count of received ring_in messages */ pmix_ring_count++; /* if we have received a ring_in message from each app process * and each stepd child, forward a ring_in message to our * parent in the stepd tree */ if (pmix_ring_count == pmix_ring_children) { /* each stepd has at least one application process * so each has at least one child */ /* lookup leftmost value from all children, * take left value from leftmost process */ char* leftmost = pmix_ring_msgs[0].left; /* lookup rightmost value from all children, * take right value from rightmost process */ int right_id = pmix_ring_children - 1; char* rightmost = pmix_ring_msgs[right_id].right; /* total count values across all children */ uint32_t sum = 0; for (i = 0; i < pmix_ring_children; i++) { sum += (uint32_t) pmix_ring_msgs[i].count; } /* send to parent if we have one, otherwise create ring output * message and start the broadcast */ if (pmix_stepd_rank > 0) { /* include our global rank in message so parent can * determine which child we are */ uint32_t my_rank = (uint32_t) pmix_stepd_rank; /* TODO: do we need hton translation? */ /* construct message */ Buf buf = init_buf(1024); pack16(TREE_CMD_RING, buf); /* specify message type (RING_IN) */ pack32(my_rank, buf); /* send our rank */ pack32(sum, buf); /* send count value */ packstr(leftmost, buf); /* send left value */ packstr(rightmost, buf); /* send right value */ /* get global rank of our parent stepd */ int rank = pmix_stepd_rank_parent(); debug3("mpi/pmi2: rank=%d sending RING_IN to rank=%d count=%d left=%s right=%s", my_rank, rank, count, leftmost, rightmost); /* send message to parent */ rc = pmix_stepd_send(get_buf_data(buf), (uint32_t) size_buf(buf), rank); /* TODO: use tmp_rc here to catch any failure */ /* free message */ free_buf(buf); } else { /* we're the root of the tree, send values back down */ /* at the top level, we wrap the ends to create a ring, * setting the rightmost process to be the left neighbor * of the leftmost process */ /* we start the top of the tree at offset 0 */ /* simulate reception of a ring output msg */ pmix_ring_out(0, rightmost, leftmost); } } debug3("mpi/pmi2: out pmix_ring_in"); return rc; }
/* ring_out messages come in from our parent, * we process this and send ring_out messages to each of our children: * count - starting rank for our leftmost application process * left - left value for leftmost application process in our subtree * right - right value for rightmost application process in our subtree */ int pmix_ring_out(int count, char* left, char* right) { int rc = SLURM_SUCCESS; debug3("mpi/pmi2: in pmix_ring_out rank=%d count=%d left=%s right=%s", pmix_stepd_rank, count, left, right); /* our parent will send us a pmix_ring_out message, the count value * contained in this message will be the rank of the first process * in our subtree, the left value will be the left value for the * first process in the subtree, and the right value will be the * right value for the last process in our subtree */ /* allocate a structure to compute values to send to each child */ pmix_ring_msg* outmsgs = (pmix_ring_msg*) xmalloc(pmix_ring_children * sizeof(pmix_ring_msg)); /* initialize messages to all children */ int i; for (i = 0; i < pmix_ring_children; i++) { outmsgs[i].count = 0; outmsgs[i].left = NULL; outmsgs[i].right = NULL; } /* iterate over all msgs and set count and left neighbor */ for (i = 0; i < pmix_ring_children; i++) { /* store current count in output message */ outmsgs[i].count = count; /* add count for this child to our running total */ count += pmix_ring_msgs[i].count; /* set left value for this child */ outmsgs[i].left = left; /* get right value from child, if it exists, * it will be the left neighbor of the next child, * otherwise, reuse the current left value */ char* next = pmix_ring_msgs[i].right; if (next != NULL) { left = next; } } /* now set all right values (iterate backwards through children) */ for (i = (pmix_ring_children - 1); i >= 0; i--) { /* set right value for this child */ outmsgs[i].right = right; /* get left value from child, if it exists, * it will be the right neighbor of the next child, * otherwise, reuse the current right value */ char* next = pmix_ring_msgs[i].left; if (next != NULL) { right = next; } } /* send messages to children in stepd tree, * we do this first to get the message down the tree quickly */ for (i = 0; i < pmix_stepd_children; i++) { /* get pointer to message data for this child */ int ring_id = pmix_app_children + i; pmix_ring_msg* msg = &outmsgs[ring_id]; /* TODO: do we need hton translation? */ /* construct message */ Buf buf = init_buf(1024); pack16(TREE_CMD_RING_RESP, buf); /* specify message type (RING_OUT) */ pack32((uint32_t) msg->count, buf); /* send count value */ packstr(msg->left, buf); /* send left value */ packstr(msg->right, buf); /* send right value */ /* get global rank of our i-th child stepd */ int rank = pmix_stepd_rank_child(i); debug3("mpi/pmi2: rank=%d sending RING_OUT to rank=%d count=%d left=%s right=%s", pmix_stepd_rank, rank, msg->count, msg->left, msg->right); /* send message to child */ rc = pmix_stepd_send(get_buf_data(buf), (uint32_t) size_buf(buf), rank); /* TODO: use tmp_rc here to catch any failure */ /* free message */ free_buf(buf); } /* now send messages to children app procs, * and set their state back to normal */ for (i = 0; i < pmix_app_children; i++) { /* get pointer to message data for this child */ pmix_ring_msg* msg = &outmsgs[i]; /* TODO: want to catch send failure here? */ /* construct message and send to client */ client_resp_t *resp = client_resp_new(); client_resp_append(resp, "%s=%s;%s=%d;%s=%d;%s=%s;%s=%s;", CMD_KEY, RINGRESP_CMD, RC_KEY, 0, RING_COUNT_KEY, msg->count, RING_LEFT_KEY, msg->left, RING_RIGHT_KEY, msg->right); client_resp_send(resp, STEPD_PMI_SOCK(i)); client_resp_free(resp); } /* delete messages, note that we don't need to free * left and right strings in each message since they * are pointers to strings allocated in pmix_ring_msgs */ xfree(outmsgs); /* clear the pmix_ring_in messages for next ring operation */ for (i = 0; i < pmix_ring_children; i++) { pmix_ring_msg* msg = &pmix_ring_msgs[i]; msg->count = 0; if (msg->left != NULL) { xfree(msg->left); msg->left = NULL; } if (msg->right != NULL) { xfree(msg->right); msg->right = NULL; } } /* reset our ring count */ pmix_ring_count = 0; debug3("mpi/pmi2: out pmix_ring_out"); return rc; }
void pmixp_coll_ring_log(pmixp_coll_t *coll) { int i; pmixp_coll_ring_t *ring = &coll->state.ring; char *nodename, *next, *prev; char *out_str = NULL; PMIXP_ERROR("%p: %s state seq=%d", coll, pmixp_coll_type2str(coll->type), coll->seq); nodename = pmixp_info_job_host(coll->my_peerid); PMIXP_ERROR("my peerid: %d:%s", coll->my_peerid, nodename); xfree(nodename); next = pmixp_info_job_host(_ring_next_id(coll)); prev = pmixp_info_job_host(_ring_prev_id(coll)); xstrfmtcat(out_str,"neighbor id: next %d:%s, prev %d:%s", _ring_next_id(coll), next, _ring_prev_id(coll), prev); PMIXP_ERROR("%s", out_str); xfree(next); xfree(prev); xfree(out_str); for (i = 0; i < PMIXP_COLL_RING_CTX_NUM; i++) { pmixp_coll_ring_ctx_t *coll_ctx = &ring->ctx_array[i]; PMIXP_ERROR("Context ptr=%p, #%d, in-use=%d", coll_ctx, i, coll_ctx->in_use); if (coll_ctx->in_use) { int id; char *done_contrib, *wait_contrib; hostlist_t hl_done_contrib, hl_wait_contrib; pmixp_hostset_from_ranges(coll->pset.procs, coll->pset.nprocs, &hl_done_contrib); hl_wait_contrib = hostlist_copy(hl_done_contrib); PMIXP_ERROR("\t seq=%d contribs: loc=%d/prev=%d/fwd=%d", coll_ctx->seq, coll_ctx->contrib_local, coll_ctx->contrib_prev, coll_ctx->forward_cnt); PMIXP_ERROR("\t neighbor contribs [%d]:", coll->peers_cnt); for (id = 0; id < coll->peers_cnt; id++) { char *nodename = pmixp_info_job_host(id); if(coll_ctx->contrib_map[id]) { hostlist_delete_host(hl_wait_contrib, nodename); } else { hostlist_delete_host(hl_done_contrib, nodename); } xfree(nodename); } done_contrib = slurm_hostlist_ranged_string_xmalloc( hl_done_contrib); wait_contrib = slurm_hostlist_ranged_string_xmalloc( hl_wait_contrib); PMIXP_ERROR("\t done contrib: %s", strlen(done_contrib) ? done_contrib : "-"); PMIXP_ERROR("\t wait contrib: %s", strlen(wait_contrib) ? wait_contrib : "-"); PMIXP_ERROR("\t status=%s", pmixp_coll_ring_state2str(coll_ctx->state)); PMIXP_ERROR("\t buf size=%u, remain=%u", size_buf(coll_ctx->ring_buf), remaining_buf(coll_ctx->ring_buf)); xfree(done_contrib); xfree(wait_contrib); hostlist_destroy(hl_done_contrib); hostlist_destroy(hl_wait_contrib); } } }
/* * Connect to a slurmstepd proccess by way of its unix domain socket. * * Both "directory" and "nodename" may be null, in which case stepd_connect * will attempt to determine them on its own. If you are using multiple * slurmd on one node (unusual outside of development environments), you * will get one of the local NodeNames more-or-less at random. * * Returns a socket descriptor for the opened socket on success, * and -1 on error. */ int stepd_connect(const char *directory, const char *nodename, uint32_t jobid, uint32_t stepid) { int req = REQUEST_CONNECT; int fd = -1; int rc; void *auth_cred; Buf buffer; int len; if (nodename == NULL) { if (!(nodename = _guess_nodename())) return -1; } if (directory == NULL) { slurm_ctl_conf_t *cf; cf = slurm_conf_lock(); directory = slurm_conf_expand_slurmd_path( cf->slurmd_spooldir, nodename); slurm_conf_unlock(); } buffer = init_buf(0); /* Create an auth credential */ auth_cred = g_slurm_auth_create(NULL, 2, NULL); if (auth_cred == NULL) { error("Creating authentication credential: %s", g_slurm_auth_errstr(g_slurm_auth_errno(NULL))); slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR); goto fail1; } /* Pack the auth credential */ rc = g_slurm_auth_pack(auth_cred, buffer); (void) g_slurm_auth_destroy(auth_cred); if (rc) { error("Packing authentication credential: %s", g_slurm_auth_errstr(g_slurm_auth_errno(auth_cred))); slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR); goto fail1; } /* Connect to the step */ fd = _step_connect(directory, nodename, jobid, stepid); if (fd == -1) goto fail1; safe_write(fd, &req, sizeof(int)); len = size_buf(buffer); safe_write(fd, &len, sizeof(int)); safe_write(fd, get_buf_data(buffer), len); safe_read(fd, &rc, sizeof(int)); if (rc < 0) { error("slurmstepd refused authentication: %m"); slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR); goto rwfail; } free_buf(buffer); return fd; rwfail: close(fd); fail1: free_buf(buffer); return -1; }
/* * Connect to a slurmstepd proccess by way of its unix domain socket. * * Both "directory" and "nodename" may be null, in which case stepd_connect * will attempt to determine them on its own. If you are using multiple * slurmd on one node (unusual outside of development environments), you * will get one of the local NodeNames more-or-less at random. * * Returns a socket descriptor for the opened socket on success, * and -1 on error. */ int stepd_connect(const char *directory, const char *nodename, uint32_t jobid, uint32_t stepid, uint16_t *protocol_version) { int req = REQUEST_CONNECT; int fd = -1; int rc; void *auth_cred; Buf buffer; int len; *protocol_version = 0; if (nodename == NULL) { if (!(nodename = _guess_nodename())) return -1; } if (directory == NULL) { slurm_ctl_conf_t *cf; cf = slurm_conf_lock(); directory = slurm_conf_expand_slurmd_path( cf->slurmd_spooldir, nodename); slurm_conf_unlock(); } buffer = init_buf(0); /* Create an auth credential */ auth_cred = g_slurm_auth_create(NULL, 2, NULL); if (auth_cred == NULL) { error("Creating authentication credential: %s", g_slurm_auth_errstr(g_slurm_auth_errno(NULL))); slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR); goto fail1; } /* Pack the auth credential */ rc = g_slurm_auth_pack(auth_cred, buffer); (void) g_slurm_auth_destroy(auth_cred); if (rc) { error("Packing authentication credential: %s", g_slurm_auth_errstr(g_slurm_auth_errno(auth_cred))); slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR); goto fail1; } /* Connect to the step */ fd = _step_connect(directory, nodename, jobid, stepid); if (fd == -1) goto fail1; safe_write(fd, &req, sizeof(int)); len = size_buf(buffer); safe_write(fd, &len, sizeof(int)); safe_write(fd, get_buf_data(buffer), len); safe_read(fd, &rc, sizeof(int)); if (rc < 0) { error("slurmstepd refused authentication: %m"); slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR); goto rwfail; } else if (rc) *protocol_version = rc; else { /* 0n older versions of Slurm < 14.11 SLURM_SUCCESS * was returned here instead of the protocol version. * This can be removed when we are 2 versions past * 14.11. */ slurmstepd_info_t *stepd_info = stepd_get_info(fd); *protocol_version = stepd_info->protocol_version; xfree(stepd_info); } free_buf(buffer); return fd; rwfail: close(fd); fail1: free_buf(buffer); return -1; }