/** * Initialize the LPs with BB capacity and cluster flag then kickoff * @Params ns node state * m message * lp LP */ void node_lp_init(node_state * ns, tw_lp * lp) { burst_buffer_capacity = ((long) (burst_buffer_max_capacity)) * 1000000000; //burst_buffer_capacity = ((long) (burst_buffer_max_capacity))*10; //printf("Burst Buffer Capacity:%li\n",burst_buffer_capacity); printf("In node_lp_init\n"); ns->num_processed = 0; // nodes are addressed in their logical id space (0...num_client_nodes-1 and // 0...num_svr_nodes-1, respectively). LPs are computed upon use with // model-net, other events ns->id_clust = codes_mapping_get_lp_relative_id(lp->gid, 1, 0); int id_all = codes_mapping_get_lp_relative_id(lp->gid, 0, 0); // track which cluster we're in ns->is_in_client = (id_all < num_client_nodes); ns->is_in_server = (id_all < (num_svr_nodes + num_client_nodes) && (id_all >= num_client_nodes)); ns->is_in_bb = (id_all < (num_svr_nodes + num_client_nodes + num_burst_buffer_nodes) && (id_all >= num_svr_nodes + num_client_nodes)); printf("is_in_client=%d\nis_in_svr=%d\nis_in_bb=%d\n", ns->is_in_client, ns->is_in_server, ns->is_in_bb); printf("id_all= %d\nnum_client_nodes= %d\n", id_all, num_client_nodes); // send a self kickoff event tw_event *e = codes_event_new(lp->gid, codes_local_latency(lp), lp); node_msg *m = tw_event_data(e); msg_set_header(node_magic, NODE_KICKOFF, lp->gid, &m->h); tw_event_send(e); }
void testsvr_lp_init( testsvr_state * ns, tw_lp * lp){ /* for test, just use dummy way (assume 1 svr / 1 modelnet) */ ns->idx = lp->gid / 2; /* expect exactly three servers */ assert(ns->idx <= 2); memset(ns->req_stat, 0x0, NUM_REQS*sizeof(int)); /* create kickoff event only if we're a request server */ if (ns->idx == 0 || ns->idx == 2){ tw_event *e = codes_event_new(lp->gid, codes_local_latency(lp), lp); testsvr_msg *m_local = tw_event_data(e); m_local->magic = testsvr_magic; m_local->event_type = KICKOFF; /* dummy values for kickoff */ m_local->idx_src = INT_MAX; m_local->lp_src = INT_MAX; m_local->req_num = INT_MAX; tw_event_send(e); } #if TEST_DEBUG char name[32]; sprintf(name, "testsvr.%d.%lu", ns->idx, lp->gid); ns->fdebug = fopen(name, "w"); setvbuf(ns->fdebug, NULL, _IONBF, 0); assert(ns->fdebug != NULL); ns->event_ctr = 0; #endif }
static void s_event(s_state *ns, tw_bf *bf, s_msg *m, tw_lp *lp){ assert(m->h.magic == s_magic); switch(m->h.event_type){ case S_KICKOFF: ; msg_header h; msg_set_header(s_magic, S_ALLOC_ACK, lp->gid, &h); resource_lp_get(bsize, 0, lp, CODES_MCTX_DEFAULT, 0, &h, &ns->cb); break; case S_ALLOC_ACK: if (m->c.ret == 0){ ns->mem += bsize; m->mem_max_prev = ns->mem_max; ns->mem_max = maxu64(ns->mem, ns->mem_max); msg_header h; msg_set_header(s_magic, S_ALLOC_ACK, lp->gid, &h); resource_lp_get(bsize, 0, lp, CODES_MCTX_DEFAULT, 0, &h, &ns->cb); break; } /* else fall into the free stmt */ case S_FREE: resource_lp_free(bsize, lp, CODES_MCTX_DEFAULT); ns->mem -= bsize; if (ns->mem > 0){ tw_event *e = codes_event_new(lp->gid, codes_local_latency(lp), lp); s_msg *m = tw_event_data(e); msg_set_header(s_magic, S_FREE, lp->gid, &m->h); tw_event_send(e); } break; } }
/* collective operation for the torus network */ void torus_collective(char* category, int message_size, int remote_event_size, const void* remote_event, tw_lp* sender) { tw_event * e_new; tw_stime xfer_to_nic_time; nodes_message * msg; tw_lpid local_nic_id; char* tmp_ptr; // TODO: be annotation-aware codes_mapping_get_lp_info(sender->gid, grp_name, &mapping_grp_id, NULL, &mapping_type_id, NULL, &mapping_rep_id, &mapping_offset); codes_mapping_get_lp_id(grp_name, LP_CONFIG_NM, NULL, 1, mapping_rep_id, mapping_offset, &local_nic_id); xfer_to_nic_time = g_tw_lookahead + codes_local_latency(sender); e_new = model_net_method_event_new(local_nic_id, xfer_to_nic_time, sender, TORUS, (void**)&msg, (void**)&tmp_ptr); msg->remote_event_size_bytes = message_size; strcpy(msg->category, category); msg->sender_svr=sender->gid; msg->type = T_COLLECTIVE_INIT; tmp_ptr = (char*)msg; tmp_ptr += torus_get_msg_sz(); if(remote_event_size > 0) { msg->remote_event_size_bytes = remote_event_size; memcpy(tmp_ptr, remote_event, remote_event_size); tmp_ptr += remote_event_size; } tw_event_send(e_new); return; }
static void svr_init( svr_state * ns, tw_lp * lp) { ns->server_idx = lp->gid / 2; if (ns->server_idx < NUM_SERVERS-1){ for (int i = 0; i < NUM_PRIOS; i++){ ns->random_order[i] = -1; } for (int i = 0; i < NUM_PRIOS; i++){ for (;;){ int idx = tw_rand_integer(lp->rng, 0, NUM_PRIOS-1); // not sure whether rand_integer is inclusive or not... assert(idx < NUM_PRIOS); if (ns->random_order[idx] == -1){ ns->random_order[idx] = i; break; } } } tw_event *e = codes_event_new(lp->gid, codes_local_latency(lp), lp); svr_msg * m = tw_event_data(e); msg_set_header(666, KICKOFF, lp->gid, &m->h); tw_event_send(e); } else { memset(ns->num_recv, 0, NUM_SERVERS*sizeof(*ns->num_recv)); } }
void lsm_io_event( const char * lp_io_category, uint64_t io_object, int64_t io_offset, uint64_t io_size_bytes, int io_type, tw_stime delay, tw_lp *sender, struct codes_mctx const * map_ctx, int return_tag, msg_header const * return_header, struct codes_cb_info const * cb) { assert(strlen(lp_io_category) < CATEGORY_NAME_MAX-1); assert(strlen(lp_io_category) > 0); SANITY_CHECK_CB(cb, lsm_return_t); tw_lpid lsm_id = codes_mctx_to_lpid(map_ctx, LSM_NAME, sender->gid); tw_stime delta = delay + codes_local_latency(sender); if (lsm_in_sequence) { tw_stime tmp = lsm_msg_offset; lsm_msg_offset += delta; delta += tmp; } tw_event *e = tw_event_new(lsm_id, delta, sender); lsm_message_t *m = tw_event_data(e); m->magic = lsm_magic; m->event = (lsm_event_t) io_type; m->data.object = io_object; m->data.offset = io_offset; m->data.size = io_size_bytes; strcpy(m->data.category, lp_io_category); // get the priority count for checking int num_prios = lsm_get_num_priorities(map_ctx, sender->gid); // prio checks and sets if (num_prios <= 0) // disabled scheduler - ignore m->data.prio = 0; else if (temp_prio < 0) // unprovided priority - defer to max possible m->data.prio = num_prios-1; else if (temp_prio < num_prios) // valid priority m->data.prio = temp_prio; else tw_error(TW_LOC, "LP %lu, LSM LP %lu: Bad priority (%d supplied, %d lanes)\n", sender->gid, lsm_id, temp_prio, num_prios); // reset temp_prio temp_prio = -1; m->cb.info = *cb; m->cb.h = *return_header; m->cb.tag = return_tag; tw_event_send(e); }
static void s_init(s_state *ns, tw_lp *lp){ ns->mem = 0; ns->mem_max = 0; INIT_CODES_CB_INFO(&ns->cb, s_msg, h, tag, c); ns->id = codes_mapping_get_lp_relative_id(lp->gid, 0, 0); tw_event *e = codes_event_new(lp->gid, codes_local_latency(lp), lp); s_msg *m = tw_event_data(e); msg_set_header(s_magic, S_KICKOFF, lp->gid, &m->h); tw_event_send(e); }
/* torus packet event , generates a torus packet on the compute node */ static tw_stime torus_packet_event(char* category, tw_lpid final_dest_lp, uint64_t packet_size, int is_pull, uint64_t pull_size, tw_stime offset, const mn_sched_params *sched_params, int remote_event_size, const void* remote_event, int self_event_size, const void* self_event, tw_lpid src_lp, tw_lp *sender, int is_last_pckt) { tw_event * e_new; tw_stime xfer_to_nic_time; nodes_message * msg; char* tmp_ptr; xfer_to_nic_time = g_tw_lookahead + codes_local_latency(sender); /* Throws an error of found last KP time > current event time otherwise */ //e_new = tw_event_new(local_nic_id, xfer_to_nic_time+offset, sender); //msg = tw_event_data(e_new); e_new = model_net_method_event_new(sender->gid, xfer_to_nic_time+offset, sender, TORUS, (void**)&msg, (void**)&tmp_ptr); strcpy(msg->category, category); msg->final_dest_gid = final_dest_lp; msg->sender_svr= src_lp; msg->packet_size = packet_size; msg->remote_event_size_bytes = 0; msg->local_event_size_bytes = 0; msg->type = GENERATE; msg->is_pull = is_pull; msg->pull_size = pull_size; if(is_last_pckt) /* Its the last packet so pass in remote event information*/ { if(remote_event_size > 0) { msg->remote_event_size_bytes = remote_event_size; memcpy(tmp_ptr, remote_event, remote_event_size); tmp_ptr += remote_event_size; } if(self_event_size > 0) { msg->local_event_size_bytes = self_event_size; memcpy(tmp_ptr, self_event, self_event_size); tmp_ptr += self_event_size; } // printf("\n torus remote event %d local event %d last packet %d %lf ", msg->remote_event_size_bytes, msg->local_event_size_bytes, is_last_pckt, xfer_to_nic_time); } tw_event_send(e_new); return xfer_to_nic_time; }
void handle_testsvr_req( testsvr_state * ns, testsvr_msg * m, tw_lp * lp){ /* only server 1 processes requests */ assert(ns->idx == 1); /* add a random amount of time to it */ tw_event *e = codes_event_new(lp->gid, codes_local_latency(lp), lp); testsvr_msg *m_local = tw_event_data(e); *m_local = *m; m_local->event_type = LOCAL; #if TEST_DEBUG m_local->src_event_ctr = ns->event_ctr; #endif tw_event_send(e); #if TEST_DEBUG ns->event_ctr++; #endif }
/* * handle_io_completion * - handle IO completion events * - invoke the callers original completion event */ static void handle_io_completion (lsm_state_t *ns, tw_bf *b, lsm_message_t *m_in, tw_lp *lp) { SANITY_CHECK_CB(&m_in->cb.info, lsm_return_t); tw_event * e = tw_event_new(m_in->cb.h.src, codes_local_latency(lp), lp); void * m = tw_event_data(e); GET_INIT_CB_PTRS(&m_in->cb, m, lp->gid, h, tag, rc, lsm_return_t); /* no failures to speak of yet */ rc->rc = 0; tw_event_send(e); // continue the loop if (ns->use_sched) handle_io_sched_compl(ns, b, m_in, lp); return; }