void Router::stageTN() { for (int in_pc=0; in_pc<m_num_pc; in_pc++) { int tunnel_in_vc = m_tunnel_info_vec[in_pc].m_in_vc; int tunnel_out_pc = m_tunnel_info_vec[in_pc].m_out_pc; int tunnel_out_vc = m_tunnel_info_vec[in_pc].m_out_vc; if (tunnel_in_vc == INVALID_VC) // support tunneling ? continue; if (m_flitQ->isEmpty(in_pc, tunnel_in_vc)) // has a flit ? continue; // peek one flit from the input buffer Flit* p_flit = m_flitQ->peek(in_pc, tunnel_in_vc); assert(p_flit); switch (p_flit->type()) { case HEAD_FLIT: case ATOM_FLIT: { FlitHead* p_head_flit = (FlitHead*) p_flit; switch(g_cfg.router_tunnel_type) { case TUNNELING_PER_FLOW: if (m_tunnel_info_vec[in_pc].m_flow != make_pair(p_head_flit->src_router_id(), p_head_flit->dest_router_id())) goto NO_TUNNEL; break; case TUNNELING_PER_DEST: if (m_tunnel_info_vec[in_pc].m_dest_router_id != p_head_flit->dest_router_id()) goto NO_TUNNEL; break; case TUNNELING_PER_OUTPORT: break; default: assert(0); } // check input module status if (m_in_mod_vec[in_pc][tunnel_in_vc].m_state != IN_MOD_I) goto NO_TUNNEL; // Step 1.1: do routing int out_pc = g_Routing->selectOutPC(this, tunnel_in_vc, (FlitHead*) p_flit); assert(out_pc < (int) m_connNextRouter_vec.size()); int next_router_id = m_connNextRouter_vec[out_pc].first; int next_in_pc = m_connNextRouter_vec[out_pc].second; if (tunnel_out_pc != out_pc) goto NO_TUNNEL; // Step 2.1: reserve VC if (! isEjectChannel(out_pc)) { if (m_out_mod_vec[out_pc][tunnel_out_vc].m_state != OUT_MOD_I ) // reserved ? goto NO_TUNNEL; } // Step 3.1: check no request in SA for designated input and output ports // FIXME: do we need this? // if (! m_sw_alloc->hasNoReq(in_pc, out_pc)) // goto NO_TUNNEL; // 03/14/08 credit-based flow control if (! hasCredit(out_pc, tunnel_out_vc) ) // no credit? goto NO_TUNNEL; // Step 4.1: check no flit in xbar for designated input and output ports if (m_xbar.m_waiting_in_vc_vec[in_pc] != INVALID_VC || ! m_xbar.m_outport_free_vec[out_pc]) goto NO_TUNNEL; // Step 5.1: check no flit in a link if (! isEjectChannel(out_pc)) { if (((int) m_link_vec[out_pc].m_w_pipeline.size()) >= m_link_vec[out_pc].m_delay_factor*g_cfg.link_latency) goto NO_TUNNEL; } // Now checking is done for tunneling. // Step 2.2: reserve VC if (! isEjectChannel(out_pc)) { m_out_mod_vec[out_pc][tunnel_out_vc].m_state = OUT_MOD_V; } // create a credit and send it to upstream router int prev_router_id = m_connPrevRouter_vec[in_pc].first; int prev_out_pc = m_connPrevRouter_vec[in_pc].second; if (prev_router_id != INVALID_ROUTER_ID && prev_out_pc != INVALID_PC) { Credit* p_credit = g_CreditPool.alloc(); p_credit->m_out_pc = prev_out_pc; p_credit->m_out_vc = tunnel_in_vc; p_credit->m_num_credits = 1; p_credit->m_clk_deposit = simtime() + g_Router_vec[prev_router_id]->getLink(prev_out_pc).m_delay_factor * g_cfg.link_latency; g_Router_vec[prev_router_id]->depositCredit(p_credit); } // 11/05/05: no stall at ST stage if (! isEjectChannel(out_pc)) { assert(next_router_id != INVALID_PC); assert(next_in_pc != INVALID_PC); } // pipeline stage latency m_pipe_lat_ST_tab->tabulate(simtime() - p_flit->m_clk_enter_stage); p_flit->m_clk_enter_stage = simtime(); // assert(m_xbar.m_waiting_in_vc_vec[in_pc] == INVALID_VC); // m_xbar.m_waiting_in_vc_vec[in_pc] = tunnel_in_vc; // assert(m_xbar.m_outport_free_vec[out_pc] == true); // m_xbar.m_outport_free_vec[out_pc] = false; // Step 4.2: traverse a crossbar // move flit to the link if (isEjectChannel(out_pc)) { // select ejection port int epc = p_flit->getPkt()->m_NI_out_pos; NIOutput* p_ni_output = getNIOutput(epc); assert(p_ni_output); assert(m_id == p_flit->getPkt()->getDestRouterID()); p_ni_output->writeFlit(p_flit); // 03/15/06 fast simulation m_num_flits_inside--; } else { m_link_vec[out_pc].m_w_pipeline.push_back(make_pair(p_flit, make_pair(tunnel_out_vc, simtime()))); // decrease credit decCredit(out_pc, tunnel_out_vc, 1); } // read a flit m_flitQ->read(in_pc, tunnel_in_vc); // bypass pipeline m_num_tunnel_flit_vec[in_pc][tunnel_in_vc]++; printf("here1\n"); #ifdef _DEBUG_CHECK_BUFFER_INTEGRITY assert(checkBufferIntegrity(in_pc, tunnel_in_vc, p_flit)); #endif // change input module status for only multi-flit packets assert(m_in_mod_vec[in_pc][tunnel_in_vc].m_state == IN_MOD_I); if (p_flit->type() != ATOM_FLIT) { m_in_mod_vec[in_pc][tunnel_in_vc].m_state = IN_MOD_S; m_in_mod_vec[in_pc][tunnel_in_vc].m_out_pc = out_pc; m_in_mod_vec[in_pc][tunnel_in_vc].m_out_vc = tunnel_out_vc; } // printf("TN3 router=%d m_in_mod_vec[%d][%d]: state=%d, out_pc=%d, out_vc=%d\n", id(), in_pc, tunnel_in_vc, m_in_mod_vec[in_pc][tunnel_in_vc].m_state, m_in_mod_vec[in_pc][tunnel_in_vc].m_out_pc, m_in_mod_vec[in_pc][tunnel_in_vc].m_out_vc); #ifdef _DEBUG_ROUTER debugTN(p_flit, in_pc, tunnel_in_vc, out_pc, tunnel_out_vc); #endif } break; case MIDL_FLIT: case TAIL_FLIT: { assert(m_in_mod_vec[in_pc][tunnel_in_vc].m_state == IN_MOD_S); int out_pc = m_in_mod_vec[in_pc][tunnel_in_vc].m_out_pc; int out_vc = m_in_mod_vec[in_pc][tunnel_in_vc].m_out_vc; if (tunnel_out_pc != out_pc) goto NO_TUNNEL; if (tunnel_out_vc != out_vc) // output VC for tunneling is determined at previous packet delivery. goto NO_TUNNEL; // Step 3.1: check no request in SA for designated input and output ports // FIXME: do we need this? // if (! m_sw_alloc->hasNoReq(in_pc, out_pc)) // goto NO_TUNNEL; // check no flit in xbar for designated input and output ports if (m_xbar.m_waiting_in_vc_vec[in_pc] != INVALID_VC || ! m_xbar.m_outport_free_vec[out_pc]) goto NO_TUNNEL; // 03/14/08 credit-based flow control if (! hasCredit(out_pc, out_vc) ) // no credit? goto NO_TUNNEL; // check no flit in a link if (! isEjectChannel(out_pc)) { if (((int) m_link_vec[out_pc].m_w_pipeline.size()) >= m_link_vec[out_pc].m_delay_factor*g_cfg.link_latency) goto NO_TUNNEL; } // Now checking is done for tunneling. // delete switch allocation status if reserved m_sw_arb->del(in_pc, tunnel_in_vc); // create a credit and send it to upstream router int prev_router_id = m_connPrevRouter_vec[in_pc].first; int prev_out_pc = m_connPrevRouter_vec[in_pc].second; if (prev_router_id != INVALID_ROUTER_ID && prev_out_pc != INVALID_PC) { Credit* p_credit = g_CreditPool.alloc(); p_credit->m_out_pc = prev_out_pc; p_credit->m_out_vc = tunnel_in_vc; p_credit->m_num_credits = 1; p_credit->m_clk_deposit = simtime() + g_Router_vec[prev_router_id]->getLink(prev_out_pc).m_delay_factor * g_cfg.link_latency; g_Router_vec[prev_router_id]->depositCredit(p_credit); } // Step 4: traverse a crossbar // move flit to the link if (isEjectChannel(out_pc)) { // select ejection port int epc = p_flit->getPkt()->m_NI_out_pos; NIOutput* p_ni_output = getNIOutput(epc); assert(p_ni_output); assert(m_id == p_flit->getPkt()->getDestRouterID()); p_ni_output->writeFlit(p_flit); // 03/15/06 fast simulation m_num_flits_inside--; } else { m_link_vec[out_pc].m_w_pipeline.push_back(make_pair(p_flit, make_pair(out_vc, simtime()))); // decrease credit decCredit(out_pc, out_vc, 1); } // clear status of input module if (p_flit->isTail()) { // assert(eject_vc_sts[epc][out_vc] == false); // eject_vc_sts[epc][out_vc] = true; m_in_mod_vec[in_pc][tunnel_in_vc].m_state = IN_MOD_I; m_in_mod_vec[in_pc][tunnel_in_vc].m_out_pc = INVALID_PC; m_in_mod_vec[in_pc][tunnel_in_vc].m_out_vc = INVALID_VC; } // read a flit m_flitQ->read(in_pc, tunnel_in_vc); // bypass pipeline m_num_tunnel_flit_vec[in_pc][tunnel_in_vc]++; printf("here2\n"); #ifdef _DEBUG_CHECK_BUFFER_INTEGRITY assert(checkBufferIntegrity(in_pc, tunnel_in_vc, p_flit)); #endif #ifdef _DEBUG_ROUTER debugTN(p_flit, in_pc, tunnel_in_vc, out_pc, out_vc); #endif } break; default: assert(0); } // switch (p_flit->type()) { NO_TUNNEL: ; } // for (int in_pc=0; in_pc<m_num_pc; in_pc++) { return; }
void Router::stageST() { #ifdef _DEBUG_ROUTER_ST int _st_debug_router = 3; double _st_debug_clk = 141000012.0; if (m_id==_st_debug_router && simtime() > _st_debug_clk) { printf("ST_status router=%d, clk=%.0lf\n ", m_id, simtime()); for (int out_pc=0; out_pc<m_num_pc; out_pc++) { if (isEjectChannel(out_pc)) { } else { if (m_link_vec[out_pc].m_w_pipeline.size() == 0) { printf("out_pc=%d(free) ", out_pc); } else { printf("out_pc=%d", out_pc); for (unsigned int i=0; i<m_link_vec[out_pc].m_w_pipeline.size(); i++) printf("(fid=%lld) ", m_link_vec[out_pc].m_w_pipeline[i].first->id()); } } } printf("\n"); } #endif int num_xbar_passes = 0; for (int in_pc=0; in_pc<m_num_pc; in_pc++) { int in_vc = m_xbar.m_waiting_in_vc_vec[in_pc]; if (in_vc == INVALID_VC) continue; Flit* read_flit = m_flitQ->read(in_pc, in_vc); assert(read_flit); #ifdef _DEBUG_CHECK_BUFFER_INTEGRITY assert(checkBufferIntegrity(in_pc, in_vc, read_flit)); #endif // create a credit and send it to upstream router int prev_router_id = m_connPrevRouter_vec[in_pc].first; int prev_out_pc = m_connPrevRouter_vec[in_pc].second; if (prev_router_id != INVALID_ROUTER_ID && prev_out_pc != INVALID_PC) { Credit* p_credit = g_CreditPool.alloc(); p_credit->m_out_pc = prev_out_pc; p_credit->m_out_vc = in_vc; p_credit->m_num_credits = 1; p_credit->m_clk_deposit = simtime() + g_Router_vec[prev_router_id]->getLink(prev_out_pc).m_delay_factor * g_cfg.link_latency; g_Router_vec[prev_router_id]->depositCredit(p_credit); } assert(m_in_mod_vec[in_pc][in_vc].m_state == IN_MOD_S); int out_pc = m_in_mod_vec[in_pc][in_vc].m_out_pc; int out_vc = m_in_mod_vec[in_pc][in_vc].m_out_vc; int next_router_id = m_connNextRouter_vec[out_pc].first; int next_in_pc = m_connNextRouter_vec[out_pc].second; if (isEjectChannel(out_pc)) { // select ejection port int epc = out_pc - num_internal_pc(); // printf("epc=%d out_pc=%d\n", epc, out_pc); NIOutput* p_ni_output = getNIOutput(epc); assert(p_ni_output); // FIXME: The following assert() is not valid for DMesh topology, // because destination is encoded at injection function. // assert(m_id == read_flit->getPkt()->getDestRouterID()); p_ni_output->writeFlit(read_flit); // 03/15/06 fast simulation m_num_flits_inside--; #ifdef _DEBUG_ROUTER debugST(read_flit, in_pc, out_pc, INVALID_ROUTER_ID, INVALID_PC, out_vc, true); #endif } else { // 11/05/05: no stall at ST stage assert(next_router_id != INVALID_PC); assert(next_in_pc != INVALID_PC); #ifdef _DEBUG_CREDIT if(g_Router_vec[next_router_id]->flitQ()->isFull(next_in_pc, out_vc)) { printf("router=%d out_pc=%d next_router=%d next_in_pc=%d\n", m_id, out_pc, next_router_id, next_in_pc); for (int x_vc=0; x_vc<m_num_vc; x_vc++) printf(" vc=%d: credit=%d credit_rsv=%d\n", x_vc, m_out_mod_vec[out_pc][x_vc].m_num_credit, m_out_mod_vec[out_pc][x_vc].m_num_credit_rsv); for (int x_vc=0; x_vc<m_num_vc; x_vc++) printf(" vc=%d: Q_sz=%d\n", x_vc, g_Router_vec[next_router_id]->flitQ()->size(next_in_pc, x_vc)); } #endif // assert(! g_Router_vec[next_router_id]->flitQ()->isFull(next_in_pc, out_vc) ); // move flit to the link // NOTE: For wire pipelining, // # of traversing flits on the link must be less than link latency. assert(((int) m_link_vec[out_pc].m_w_pipeline.size()) < m_link_vec[out_pc].m_delay_factor*g_cfg.link_latency); m_link_vec[out_pc].m_w_pipeline.push_back(make_pair(read_flit, make_pair(out_vc, simtime()))); // pipeline stage latency m_pipe_lat_ST_tab->tabulate(simtime() - read_flit->m_clk_enter_stage); read_flit->m_clk_enter_stage = simtime(); #ifdef _DEBUG_ROUTER debugST(read_flit, in_pc, out_pc, next_router_id, next_in_pc, out_vc, false); #endif } // if (isEjectChannel(out_pc)) { // remove a flit from xbar m_xbar.m_waiting_in_vc_vec[in_pc] = INVALID_VC; // update xbar outport status assert(m_xbar.m_outport_free_vec[out_pc] == false); m_xbar.m_outport_free_vec[out_pc] = true; if (read_flit->isTail()) { // change input module status (S->I) m_in_mod_vec[in_pc][in_vc].m_state = IN_MOD_I; m_in_mod_vec[in_pc][in_vc].m_out_pc = INVALID_PC; m_in_mod_vec[in_pc][in_vc].m_out_vc = INVALID_VC; // change output module status (V->I) assert(m_out_mod_vec[out_pc][out_vc].m_state == OUT_MOD_V); m_out_mod_vec[out_pc][out_vc].m_state = OUT_MOD_I; } num_xbar_passes++; // intra-router flit latency m_flit_lat_router_tab->tabulate(simtime() - read_flit->m_clk_enter_router); // record power if (!g_sim.m_warmup_phase) { m_power_tmpl->record_buffer_read(read_flit, in_pc); m_power_tmpl->record_xbar_trav(read_flit, in_pc, out_pc); if (g_cfg.profile_power) { m_power_tmpl_profile->record_buffer_read(read_flit, in_pc); m_power_tmpl_profile->record_xbar_trav(read_flit, in_pc, out_pc); } } } // record power if (!g_sim.m_warmup_phase) { if (num_xbar_passes > 0) { m_power_tmpl->record_xbar_trav_num(num_xbar_passes); if (g_cfg.profile_power) m_power_tmpl_profile->record_xbar_trav_num(num_xbar_passes); } } }