void Router::stageSSA() { // speculative-SW allocator does grant. vector< pair< pair< int, int >, pair< int, int > > > grant_vec = m_sw_arb->grantSpec(); // move granted requests to xbar for (unsigned int n=0; n<grant_vec.size(); n++) { int in_pc = grant_vec[n].first.first; int in_vc = grant_vec[n].first.second; int out_pc = grant_vec[n].second.first; int out_vc = grant_vec[n].second.second; assert(in_pc != INVALID_PC); assert(in_vc != INVALID_VC); Flit* peek_flit = m_flitQ->peek(in_pc, in_vc); assert(peek_flit->isHead()); #ifdef _DEBUG_ROUTER debugSA(peek_flit, in_pc, in_vc, out_pc, out_vc, true, false); #endif assert(m_xbar.m_waiting_in_vc_vec[in_pc] == INVALID_VC); m_xbar.m_waiting_in_vc_vec[in_pc] = in_vc; assert(m_xbar.m_outport_free_vec[out_pc] == true); m_xbar.m_outport_free_vec[out_pc] = false; // pipeline stage latency m_pipe_lat_SSA_tab->tabulate(simtime() - peek_flit->m_clk_enter_stage); peek_flit->m_clk_enter_stage = simtime(); // change input module status: a header flit completes SA. assert(m_in_mod_vec[in_pc][in_vc].m_state == IN_MOD_V); m_in_mod_vec[in_pc][in_vc].m_state = IN_MOD_S; // V->S } }
double WorkloadSynthetic::temporalSelfSimilar(int src_core_id) { double off_time = 0.0; if (m_SS_OnMode_vec[src_core_id]) { if (simtime() > m_SS_last_OnTimeStamp_vec[src_core_id]) { double x = m_stream_temporal_vec[src_core_id]->uniform(0.0, 1.0); double off_time = pow((1.0 - x), -1.0/1.25); // printf("src_core_id=%d off_time=%lf\n", src_core_id, off_time); if (off_time < 1.0) off_time = 1.0; hold(off_time); m_SS_OnMode_vec[src_core_id] = false; } } else { double x = m_stream_temporal_vec[src_core_id]->uniform(0.0, 1.0); double on_time = pow((1.0 - x), (-1.0)/1.9); // printf("src_core_id=%d on_time=%lf\n", src_core_id, on_time); m_SS_last_OnTimeStamp_vec[src_core_id] = simtime() + on_time; m_SS_OnMode_vec[src_core_id] = true; } return (off_time + temporalPoisson(src_core_id)); }
void process_parse_trace() { char proc_name[MAX_PROCESS_NAME_STR_LEN]; sprintf(proc_name, "trace proc"); create(proc_name); g_sim.m_num_CSIM_process++; // stream net_stream; WorkloadTrace* wkldTrace = (WorkloadTrace*) g_Workload; ((WorkloadTrace*) g_Workload)->skipTraceFile(); hold(g_cfg.wkld_trace_skip_cycles); fprintf(stderr, "skipped %.0lf cycles (trace_file_id=%d).\n", g_cfg.wkld_trace_skip_cycles, wkldTrace->trace_file_id()); double last_pkt_inject_clk = simtime(); while (!g_EOS) { vector< Packet* > pkt_vec = wkldTrace->readTrace(); if (pkt_vec.size() == 0) continue; if (pkt_vec.size() == 1 && pkt_vec[0] == 0) // no more trace? break; if (pkt_vec[0]->m_clk_gen > last_pkt_inject_clk) { double hold_tm = pkt_vec[0]->m_clk_gen - last_pkt_inject_clk; if (hold_tm > 0.0) hold(hold_tm); last_pkt_inject_clk = simtime(); } for (unsigned int n=0; n<pkt_vec.size(); n++) { Packet* p_pkt = pkt_vec[n]; #ifdef _DEBUG_ROUTER printf("clk=%0.lf GEN p=%lld C:%d->%d R:%d/%d->%d/%d #flits=%d gen_clk=%.0lf\n", simtime(), p_pkt->id(), p_pkt->getSrcCoreID(), p_pkt->getDestCoreID(), p_pkt->getSrcRouterID(), p_pkt->m_NI_in_pos, p_pkt->getDestRouterID(), p_pkt->m_NI_out_pos, p_pkt->m_num_flits, p_pkt->m_clk_gen); #endif // choose one network if multiple networks exist. if (g_cfg.net_networks > 1) select_network(p_pkt); assert(p_pkt->m_NI_in_pos < g_Core_vec[p_pkt->getSrcCoreID()]->num_NIInput()); g_Core_vec[p_pkt->getSrcCoreID()]->forwardPkt2NI(p_pkt->m_NI_in_pos, p_pkt); g_sim.m_num_pkt_inj++; } } // After the last trace is processed, terminate simulation. if (!g_EOS) { g_EOS = true; g_ev_sim_done->set(); g_sim.m_clk_sim_end = simtime(); } #ifdef _DEBUG_ROUTER_PROCESS printf("PROCESS COMPLETE: process_parse_trace\n"); #endif }
void simtimeopt(bkzfloat& t_lll,bkzfloat& t_enum,int& usebeta,int dim,int sbeta,int ebeta,bkzfloat const_lll,bkzfloat const_enum) { if (sbeta==ebeta) { t_lll = 0; t_enum = 0; return; } //find the optimal beta to minimize total cost from BKZ-sbeta to BKZ-ebeta //Time is estimated by const_lll*t_lll + const_enum*t_enum bkzfloat cost,mincost; bkzfloat prevcost; int minbeta; mincost = -1; cout << "Simulating BKZ Time: " << sbeta << "->" << ebeta << " \r"; cout.flush(); double loop; for (usebeta = ebeta+1;usebeta<dim;usebeta++) { simtime(t_lll,t_enum,loop,dim,sbeta,ebeta,usebeta); cost = t_lll * const_lll + t_enum * const_enum; if ((mincost<0) || (cost < mincost)) { mincost = cost; minbeta = usebeta; } if (usebeta >= minbeta+3) { break; } } simtime(t_lll,t_enum,loop,dim,sbeta,ebeta,minbeta); usebeta = minbeta; cout << "Simulating BKZ Time: " << sbeta << "->" << ebeta << " ...finished \r"; cout.flush(); }
void NIOutputDecompr::controlCompression(Packet* p_pkt, const int decode_lat) { int src_router_id = p_pkt->getSrcRouterID(); // network contention delay for packet (not including contention at NI) int hop_count = g_Topology->getMinHopCount(src_router_id, m_attached_router->id()); const int per_hop_delay = 3; int pkt_0_load_delay = (hop_count*per_hop_delay - 1) + (p_pkt->m_num_flits - 1); int net_cont_delay = ((int) (simtime() - p_pkt->m_clk_enter_net)) - pkt_0_load_delay; assert(net_cont_delay >= 0); #ifdef _DEBUG_CAM_DYN_CONTROL int pkt_delay = (int) (simtime() - p_pkt->m_clk_gen); printf("NIOutDecompr: clk=%0.lf NI-%d %-2d->%-2d pid=%lld cont=%d (mean=%.1lf cnt=%ld) pkt=%d zero=%d\n", simtime(), m_NI_id, p_pkt->getSrcRouterID(), p_pkt->getDestRouterID(), p_pkt->id(), net_cont_delay, m_cont_delay_src_tab_vec[src_router_id]->mean(), m_cont_delay_src_tab_vec[src_router_id]->cnt(), pkt_delay, pkt_0_load_delay); #endif // accounting m_cont_delay_src_tab_vec[src_router_id]->tabulate((double) net_cont_delay); // control actions if (m_cont_delay_src_tab_vec[src_router_id]->mean() > 2.0) { // enable Router* srcRouter = g_Router_vec[src_router_id]; for (unsigned int ni=0; ni<srcRouter->getNIInputVec().size(); ni++) { NIInputCompr* p_NI_comp = (dynamic_cast<NIInputCompr*> (srcRouter->getNIInputVec()[ni])); if (! p_NI_comp->CAMsts(m_attached_router->id())) { // disable ? p_NI_comp->enableCAM(m_attached_router->id(), 0); if (ni==0) { g_sim.m_num_pkt_spurious++; g_CamManager->m_pkt_dyn_control++; } } } } else { // disable Router* srcRouter = g_Router_vec[src_router_id]; for (unsigned int ni=0; ni<srcRouter->getNIInputVec().size(); ni++) { NIInputCompr* p_NI_comp = (dynamic_cast<NIInputCompr*> (srcRouter->getNIInputVec()[ni])); if (p_NI_comp->CAMsts(m_attached_router->id())) { p_NI_comp->disableCAM(m_attached_router->id(), 0); if (ni==0) { g_sim.m_num_pkt_spurious++; g_CamManager->m_pkt_dyn_control++; } } } } }
void process_router(Router* p_router) { char proc_name[MAX_PROCESS_NAME_STR_LEN]; sprintf(proc_name, "r_%d proc", p_router->id()); create(proc_name); g_sim.m_num_CSIM_process++; while (1) { #ifdef _DEBUG_ROUTER_SNAPSHOT if (simtime() >= _DEBUG_ROUTER_SNAPSHOT_CLK) take_network_snapshot(stdout); #endif // 03/15/06 fast simulation if (p_router->hasNoFlitsInside() && p_router->hasNoCreditDepositsInside()) { p_router->sleep(); } p_router->router_sim(); hold(ONE_CYCLE); } #ifdef _DEBUG_ROUTER_PROCESS printf("PROCESS COMPLETE: process_router(router=%d)\n", p_router->id()); #endif }
//////////////////////////////////////////////////////////////////////// // print simulation progress void print_sim_progress() { char buf[256]; // format: // clock, // packe: #injected pkts(I), #ejected pkts(E), #injected pkts - #ejected pkts(D), // #in-transit pkts(N) // flit: #in-transit flits(N), // trace: #injected traces (if workload uses traces) // latency: avg pkt latency(l), avg queuing latency(q), avg contention latency(c) sprintf(buf, "clk=%.0lf\tp:I=%lld E=%lld D=%lld N=%lld f:N=%lld", simtime(), g_sim.m_num_pkt_inj, g_sim.m_num_pkt_ejt, g_sim.m_num_pkt_inj-g_sim.m_num_pkt_ejt, g_sim.m_num_pkt_in_network, g_sim.m_num_flit_in_network); if (! g_Workload->isSynthetic()) { sprintf(buf+strlen(buf), " t:%lld", ((WorkloadTrace*) g_Workload)->getProcessedTraceCount()); } sprintf(buf+strlen(buf), "\tl=%.2lf q=%.1lf c=%.1lf\n", g_sim.m_pkt_T_t_tab->mean(), g_sim.m_pkt_T_q_tab->mean(), g_sim.m_pkt_T_t_tab->mean() - g_sim.m_pkt_T_h_tab->mean() - g_sim.m_pkt_T_w_tab->mean() - g_sim.m_pkt_T_s_tab->mean() + 1.0); // FIXME 1.0+ cycle fprintf(stderr, "%s", buf); fflush(stderr); }
void sim(){ create("sim"); /* Initialize simulation. */ init(); /* For the duration of the simulation generate customers with exponential inter-arrival times with mean IATM. */ while(simtime() < 1000){ hold(exponential(IATM)); cust(); } wait(done); /* Print reports. */ printf("Server 1, expected average delay in queue of a customer: %f\n", table_mean(box_time_table(queue_box))); printf("Server 1, expected time-average number of customers in queue: %f\n", qtable_mean(box_number_qtable(queue_box))); printf("Server 1, expected utilization: %f\n\n", qtable_mean(box_number_qtable(service_box))); printf("Server 2, expected average delay in queue of a customer: %f\n", table_mean(box_time_table(queue_box2))); printf("Server 2, expected time-average number of customers in queue: %f\n", qtable_mean(box_number_qtable(queue_box2))); printf("Server 2, expected utilization: %f\n\n", qtable_mean(box_number_qtable(service_box2))); }
/** * \brief Activate the guider port outputs */ void SimGuidePort::activate(float raplus, float raminus, float decplus, float decminus) { debug(LOG_DEBUG, DEBUG_LOG, 0, "activate(raplus = %.3f, raminus = %.3f," " decplus = %.3f, decminus = %.3f)", raplus, raminus, decplus, decminus); if ((raplus < 0) || (raminus < 0) || (decminus < 0) || (decplus < 0)) { throw BadParameter("activation times must be nonegative"); } // update the offset update(); // perform this new activation lastactivation = simtime(); if (raplus > 0) { ra = raplus; } else { ra = -raminus; } if (decplus > 0) { dec = decplus; } else { dec = -decminus; } debug(LOG_DEBUG, DEBUG_LOG, 0, "new activations: ra = %f, dec = %f", ra, dec); }
void SimFocuser::set(unsigned short value) { current(); if (value == target) { return; } lastset = simtime(); target = value; }
void process_link_dvs_set() { char proc_name[MAX_PROCESS_NAME_STR_LEN]; double last_profile_clk = 0.0; sprintf(proc_name, "link-dvs proc"); create(proc_name); g_sim.m_num_CSIM_process++; switch (g_cfg.link_dvs_method) { case LINK_DVS_NODVS: return; case LINK_DVS_HISTORY: break; case LINK_DVS_FLIT_RATE_PREDICT: g_LinkDVSer.open_flit_rate_predict_file(); g_LinkDVSer.skip_flit_rate_predict_file((int) (g_cfg.wkld_trace_skip_cycles/UNIT_MEGA) + 1); break; } double hold_time = g_cfg.sim_clk_start + g_cfg.link_dvs_interval/2.0; assert(hold_time > 0.0); hold(hold_time); while (!g_EOS) { switch (g_cfg.link_dvs_method) { case LINK_DVS_HISTORY: g_LinkDVSer.link_dvs_select_vf(); break; case LINK_DVS_FLIT_RATE_PREDICT: g_LinkDVSer.read_flit_rate_predict_interval(); g_LinkDVSer.estimate_link_utilz_from_flit_rate_predict(); g_LinkDVSer.link_dvs_select_vf_predicted_flit_rate(); #ifdef LINK_DVS_DEBUG printf("read (%.0lf~%.0lf) rate info for DVS at clk=%.0lf\n", (g_LinkDVSer.predict_line_num()-1)*g_cfg.link_dvs_interval, (g_LinkDVSer.predict_line_num())*g_cfg.link_dvs_interval, simtime()); #endif break; default: assert(0); } hold(g_cfg.link_dvs_interval); } switch (g_cfg.link_dvs_method) { case LINK_DVS_FLIT_RATE_PREDICT: g_LinkDVSer.close_flit_rate_predict_file(); break; } }
/** * \brief Update the offset to the current time * * The update method rolls the position changes forward. Each time it is * called, it compues the offset that guider port activations may have * cased since the last activation, and applies them to the offset. * It then computes the remaining activation that has not been applied * yet. * * The corrections applied by the update method amount to one pixel per * second. The CcdInfo publishes a pixel size of 10um, which means that * 10um corresponds to 15 arc seconds. */ void SimGuidePort::update() { // if this is the first if ((ra == 0) && (dec == 0)) { debug(LOG_DEBUG, DEBUG_LOG, 0, "no update"); return; } debug(LOG_DEBUG, DEBUG_LOG, 0, "update: current offset: %s", _offset.toString().c_str()); // advance the offset according to last activation double now = simtime(); // activetime is the time since the last activation call. Since only // part of the activations may have been executed, we compute that // part, and subtract it from current ra/dec values double activetime = now - lastactivation; // update the ra variable. This depends on the time since the last // call to update double rachange = 0; if (fabs(ra) < activetime) { // there was enough time to execute the complete activation rachange = ra; } else { // the activation could only partially be executed, so we // have to compute this partial activation rachange = sign(ra) * activetime; } debug(LOG_DEBUG, DEBUG_LOG, 0, "update: advance RA by %f", rachange); ra -= rachange; _offset = _offset + rachange * pixelspeed * _ravector; // update the dec variable, again this depends on the time since the // last call to update double decchange = 0; if (fabs(dec) < activetime) { // last call was a long time ago, full activation can be // executed decchange = dec; } else { // not enough time to execute activation decchange = sign(dec) * activetime; } debug(LOG_DEBUG, DEBUG_LOG, 0, "update: advance DEC by %f", decchange); dec -= decchange; _offset = _offset + decchange * pixelspeed * _decvector; debug(LOG_DEBUG, DEBUG_LOG, 0, "update: new offset: %s", _offset.toString().c_str()); // we must now remember that the activation time has changed lastactivation = now; }
void CAMDataDePrivate::printStats(ostream& out) const { out << "CAMDataDePrivate:" << " id=" << m_decoder_id << " entries=" << m_num_sets << endl; out << "total accesses: " << m_num_access << endl; out << "access rate (access/cycle): " << ((double) m_num_access) / simtime() << endl; out << endl; out << endl; }
void process_link_dvs_link_slowdown() { char proc_name[MAX_PROCESS_NAME_STR_LEN]; double last_profile_clk = 0.0; sprintf(proc_name, "link-dvs slowdown proc"); create(proc_name); g_sim.m_num_CSIM_process++; if (g_cfg.link_dvs_method == LINK_DVS_NODVS) return; double hold_time = g_cfg.sim_clk_start + g_cfg.link_dvs_interval - g_cfg.link_dvs_voltage_transit_delay - g_cfg.link_dvs_freq_transit_delay; assert(hold_time > 0.0); hold(hold_time); while (!g_EOS) { // frequency first, voltage second hold(g_cfg.link_dvs_freq_transit_delay); g_LinkDVSer.link_dvs_update_freq_slowdown(); #ifdef LINK_DVS_DEBUG printf("link freq slowdown clk=%.0lf\n", simtime()); #endif hold(g_cfg.link_dvs_voltage_transit_delay); g_LinkDVSer.link_dvs_update_voltage_slowdown(); #ifdef LINK_DVS_DEBUG printf("link voltage slowdown clk=%.0lf\n", simtime()); #endif hold(g_cfg.link_dvs_interval - g_cfg.link_dvs_voltage_transit_delay - g_cfg.link_dvs_freq_transit_delay); } }
unsigned short SimFocuser::current() { if (0 == lastset) { return _value; } double now = simtime(); double timepast = now - lastset; double delta = (double)_value - (double)target; //debug(LOG_DEBUG, DEBUG_LOG, 0, "delta: %f, timepast: %f", delta, timepast); if (fabs(delta / 1000.) > timepast) { _value -= timepast * delta; lastset = now; } else { lastset = 0; _value = target; } return _value; }
/** * \brief Create a simulated GuidePort * * The default settings of the guider port have a coordinate system rotated * by 30 degrees with respect to the ccd axes. Also the vector in the right * ascension direction is shorter, approximately as if declination was * 45 degrees. */ SimGuidePort::SimGuidePort(SimLocator& locator) : GuidePort("guideport:simulator/guideport"), _locator(locator) { starttime = simtime(); debug(LOG_DEBUG, DEBUG_LOG, 0, "SimGuidePort created at %f", starttime); _omega = 0; // the initial mount axis directions are not parallel to the coordinate // axes of the image _ravector = sqrt(0.5) * Point(sqrt(3) / 2, 0.5); _decvector = Point(-0.5, sqrt(3) / 2); ra = 0; dec = 0; // compute the speed at which a star image would move over the // CCD at standard guide rate. We assume a focal length of 0.6m // 15"/sec * radians/degree/ radians per pixel pixelspeed = ((15. / 3600.) * (M_PI / 180.)) / (0.000010 / 0.6); debug(LOG_DEBUG, DEBUG_LOG, 0, "pixelspeed = %f", pixelspeed); }
/** * \brief Retrieve the current offset */ Point SimGuidePort::offset() { double timepast = simtime() - starttime; // drift computation Point p = timepast * _drift; // Fourier components if (timepast > 360) { double angle = 0.01 * timepast; Point fourier = 5. * Point(sin(angle), cos(angle)); p = p + fourier; } // return the point debug(LOG_DEBUG, DEBUG_LOG, 0, "complete offset: %s", (_offset + p).toString().c_str()); return _offset + p; }
void Router::stageRC() { for (int in_pc=0; in_pc<m_num_pc; in_pc++) for (int in_vc=0; in_vc<m_num_vc; in_vc++) { #ifdef _DEBUG_ROUTER_RC int _rc_debug_router_id = 20; double _rc_debug_clk = 0.0; int _rc_in_pc = 4; int _rc_in_vc = 0; if (m_id == _rc_debug_router_id && simtime() > _rc_debug_clk && in_pc == _rc_in_pc && in_vc == _rc_in_vc ) { printf("buf_Status: clk=%.0lf router=%d pc=%d vc=%d sz=%d\n", simtime(), m_id, in_pc, in_vc, m_flitQ->size(in_pc, in_vc)); if (! m_flitQ->isEmpty(in_pc, in_vc)) { printf(" "); m_flitQ->print(stdout, in_pc, in_vc); } } #endif if (m_in_mod_vec[in_pc][in_vc].m_state != IN_MOD_I) continue; if (m_flitQ->isEmpty(in_pc, in_vc)) // has a flit ? continue; // peek one flit in the input buffer Flit* peek_flit = m_flitQ->peek(in_pc, in_vc); assert(peek_flit); // flit type must be HEAD. if (! peek_flit->isHead()) continue; // CSIM process synchronization problem: // The order of process creation may pre-determine the priority of processes. // If process for router X is created earlier than process for router Y // (i.e. X has higher priority than Y) in sim_process.C, // router Y does not wait for one cycle when router X sends a flit to router Y. if (peek_flit->m_clk_enter_router == simtime()) continue; // do routing (decide out_pc) int out_pc = g_Routing->selectOutPC(this, in_vc, (FlitHead*) peek_flit); assert(out_pc < (int) m_connNextRouter_vec.size()); int next_router_id = m_connNextRouter_vec[out_pc].first; int next_in_pc = m_connNextRouter_vec[out_pc].second; assert(isEjectChannel(out_pc) || (! isEjectChannel(out_pc) && next_router_id != INVALID_ROUTER_ID)); assert(isEjectChannel(out_pc) || (! isEjectChannel(out_pc) && next_in_pc != INVALID_PC)); // change input module status assert(m_in_mod_vec[in_pc][in_vc].m_state == IN_MOD_I); m_in_mod_vec[in_pc][in_vc].m_state = IN_MOD_R; // I->R m_in_mod_vec[in_pc][in_vc].m_out_pc = out_pc; // make VC arbiter request for this packet m_vc_arb->add(in_pc, in_vc, out_pc); #ifdef _DEBUG_ROUTER debugRC(peek_flit, next_router_id, in_pc, in_vc, out_pc, next_in_pc); #endif // pipeline stage latency m_pipe_lat_RC_tab->tabulate(simtime() - peek_flit->m_clk_enter_stage); peek_flit->m_clk_enter_stage = simtime(); // increase hop count for this packet peek_flit->getPkt()->m_hops++; } }
double SimGuidePort::alpha() { return (simtime() - starttime) * _omega; }
bool Router::hasCredit(int out_pc, int out_vc, int num_credits) { switch (g_cfg.router_buffer_type) { case ROUTER_BUFFER_SAMQ: return (m_out_mod_vec[out_pc][out_vc].m_num_credit >= num_credits) ? true: false; case ROUTER_BUFFER_DAMQ_P: { int num_out_vc = g_Router_vec[m_connNextRouter_vec[out_pc].first]->num_vc(); int num_shared_credits = 0; // total shared credits for out_pc int num_total_credits = 0; for (int vc=0; vc<num_out_vc; vc++) { num_shared_credits += m_out_mod_vec[out_pc][vc].m_num_credit; num_total_credits += m_out_mod_vec[out_pc][vc].m_num_credit + m_out_mod_vec[out_pc][vc].m_num_credit_rsv; } assert(num_shared_credits >= 0); #ifdef _DEBUG_CREDIT printf("hasCredit DAMQ_P router=%d clk=%.0lf out_pc=%d out_vc=%d num_credits=%d\n", id(), simtime(), out_pc, out_vc, num_credits); for (int vc=0; vc<num_out_vc; vc++) printf(" VC=%d credits=%d %d\n", vc, m_out_mod_vec[out_pc][vc].m_num_credit, m_out_mod_vec[out_pc][vc].m_num_credit_rsv); printf(" num_shared_credits=%d num_total_credits=%d\n", num_shared_credits, num_total_credits); #endif if (num_shared_credits >= num_credits) { // has enough shared credits? return true; } else { // check reserved credits return (m_out_mod_vec[out_pc][out_vc].m_num_credit_rsv >= num_credits) ? true: false; } } break; case ROUTER_BUFFER_DAMQ_R: { int num_shared_credits = 0; int num_total_credits = 0; for (int pc=0; pc<num_internal_pc(); pc++) { int next_router_id = m_connNextRouter_vec[pc].first; int num_out_vc = next_router_id == INVALID_ROUTER_ID ? 0 : g_Router_vec[next_router_id]->num_vc(); for (int vc=0; vc<num_out_vc; vc++) { num_shared_credits += m_out_mod_vec[pc][vc].m_num_credit; num_total_credits += m_out_mod_vec[pc][vc].m_num_credit + m_out_mod_vec[pc][vc].m_num_credit_rsv; } } assert(num_shared_credits >= 0); #ifdef _DEBUG_CREDIT printf("hasCredit DAMQ_R router=%d clk=%.0lf out_pc=%d out_vc=%d num_credits=%d\n", id(), simtime(), out_pc, out_vc, num_credits); for (int pc=0; pc<num_internal_pc(); pc++) { int next_router_id = m_connNextRouter_vec[pc].first; int num_out_vc = next_router_id == INVALID_ROUTER_ID ? 0 : g_Router_vec[next_router_id]->num_vc(); for (int vc=0; vc<num_out_vc; vc++) printf(" PC=%d VC=%d credits=%d %d\n", pc, vc, m_out_mod_vec[pc][vc].m_num_credit, m_out_mod_vec[pc][vc].m_num_credit_rsv); } printf(" num_shared_credits=%d num_total_credits=%d\n", num_shared_credits, num_total_credits); #endif if (num_shared_credits >= num_credits) { // has enough shared credits? return true; } else { // check reserved credits return (m_out_mod_vec[out_pc][out_vc].m_num_credit_rsv >= num_credits) ? true: false; } } break; default: ; } assert(0); // never reached return false; }
void process_main() { create("start_proc"); g_sim.m_num_CSIM_process++; fprintf(stderr, "started simulation.\n"); // simulation progress verbose if (g_cfg.sim_show_progress) process_sim_progress(); // router for (unsigned int i=0; i<g_Router_vec.size(); i++) { process_router(g_Router_vec[i]); } // input/output NI for (unsigned int n=0; n<g_NIInput_vec.size(); n++) { switch (g_cfg.NIin_type) { case NI_INPUT_TYPE_PER_PC: process_NI_input(g_NIInput_vec[n], 0); break; case NI_INPUT_TYPE_PER_VC: for (int NI_vc=0; NI_vc<g_cfg.router_num_vc; NI_vc++) process_NI_input(g_NIInput_vec[n], NI_vc); break; default: assert(0); } } for (unsigned int n=0; n<g_NIOutput_vec.size(); n++) { process_NI_output(g_NIOutput_vec[n]); } // profile if (g_cfg.profile_perf || g_cfg.profile_power) { if (g_cfg.profile_interval_cycle) process_profile_cycle(); else process_profile_instr(); } #ifdef LINK_DVS // link-dvs process_link_dvs_link_speedup(); process_link_dvs_link_slowdown(); process_link_dvs_set(); #endif // injection switch (g_cfg.wkld_type) { case WORKLOAD_TRIPS_TRACE: case WORKLOAD_TILED_CMP_TRACE: case WORKLOAD_TILED_CMP_VALUE_TRACE: case WORKLOAD_SNUCA_CMP_VALUE_TRACE: process_parse_trace(); break; case WORKLOAD_SYNTH_SPATIAL: case WORKLOAD_SYNTH_TRAFFIC_MATRIX: for (unsigned int c=0; c<g_Core_vec.size(); c++) process_gen_synth_traffic(c); break; default: assert(0); } // control simulation for warmup and finalize process_control_sim(); g_ev_sim_done->wait(); // Now the simulation is done. fprintf(stderr, "finished simulation at clk=%.0lf.\n", simtime()); // Find the simulation end time g_sim.m_end_time = time((time_t *)NULL); g_sim.m_elapsed_time = _MAX(g_sim.m_end_time - g_sim.m_start_time, 1); #ifdef _DEBUG_ROUTER_PROCESS printf("PROCESS COMPLETE: process_main()\n"); #endif }
void Router::decCredit(int out_pc, int out_vc, int num_credits) { if (isEjectChannel(out_pc)) return; // no credit management switch (g_cfg.router_buffer_type) { case ROUTER_BUFFER_SAMQ: m_out_mod_vec[out_pc][out_vc].m_num_credit -= num_credits; assert(m_out_mod_vec[out_pc][out_vc].m_num_credit >= 0); #ifdef _DEBUG_CREDIT printf("decCredit SAMQ: router=%d clk=%.0lf m_num_credit[out_pc=%d][out_vc=%d]=%d\n", id(), simtime(), out_pc, out_vc, m_out_mod_vec[out_pc][out_vc].m_num_credit); #endif break; case ROUTER_BUFFER_DAMQ_P: { int num_out_vc = g_Router_vec[m_connNextRouter_vec[out_pc].first]->num_vc(); int num_shared_credits = 0; // total available credits for out_pc for (int vc=0; vc<num_out_vc; vc++) num_shared_credits += m_out_mod_vec[out_pc][vc].m_num_credit; if (num_shared_credits >= num_credits) { // can decrease shared credit? m_out_mod_vec[out_pc][out_vc].m_num_credit -= num_credits; } else { assert(m_out_mod_vec[out_pc][out_vc].m_num_credit_rsv >= num_credits); m_out_mod_vec[out_pc][out_vc].m_num_credit_rsv -= num_credits; } #ifdef _DEBUG_CREDIT printf("decCredit DAMQ_P: router=%d clk=%.0lf out_pc=%d out_vc=%d\n", id(), simtime(), out_pc, out_vc); printf(" m_num_credit=%d m_num_credit_rsv=%d num_credits=%d num_shared_credits=%d\n", m_out_mod_vec[out_pc][out_vc].m_num_credit, m_out_mod_vec[out_pc][out_vc].m_num_credit_rsv, num_credits, num_shared_credits); #endif } break; case ROUTER_BUFFER_DAMQ_R: { int num_shared_credits = 0; // total available credits for router for (int pc=0; pc<num_internal_pc(); pc++) { int next_router_id = m_connNextRouter_vec[pc].first; int num_out_vc = (next_router_id == INVALID_ROUTER_ID) ? 0 : g_Router_vec[next_router_id]->num_vc(); for (int vc=0; vc<num_out_vc; vc++) num_shared_credits += m_out_mod_vec[pc][vc].m_num_credit; } if (num_shared_credits >= num_credits) { // can decrease shared credit? m_out_mod_vec[out_pc][out_vc].m_num_credit -= num_credits; } else { assert(m_out_mod_vec[out_pc][out_vc].m_num_credit_rsv >= num_credits); m_out_mod_vec[out_pc][out_vc].m_num_credit_rsv -= num_credits; } #ifdef _DEBUG_CREDIT printf("decCredit DAMQ_R: router=%d clk=%.0lf out_pc=%d out_vc=%d\n", id(), simtime(), out_pc, out_vc); printf(" m_num_credit=%d m_num_credit_rsv=%d num_credits=%d num_shared_credits=%d\n", m_out_mod_vec[out_pc][out_vc].m_num_credit, m_out_mod_vec[out_pc][out_vc].m_num_credit_rsv, num_credits, num_shared_credits); #endif } break; default: assert(0); } }
void Router::incCredit() { if (m_credit_deposit_vec.size() == 0) // no credits to deposit? return; int num_org_credit_reqs = m_credit_deposit_vec.size(); // for sanity check int num_complete_credit_reqs = 0; int num_deposited_credits = 0; // total deposited credits // deposit a credit to the corresponding output module for (vector< Credit* >::iterator pos=m_credit_deposit_vec.begin(); pos != m_credit_deposit_vec.end(); ++pos) { Credit* p_credit = *pos; if (p_credit->m_clk_deposit > simtime()) break; // all other deposited credits must be increased later. switch (g_cfg.router_buffer_type) { case ROUTER_BUFFER_SAMQ: m_out_mod_vec[p_credit->m_out_pc][p_credit->m_out_vc].m_num_credit += p_credit->m_num_credits; assert(m_out_mod_vec[p_credit->m_out_pc][p_credit->m_out_vc].m_num_credit <= m_inbuf_depth); break; case ROUTER_BUFFER_DAMQ_P: case ROUTER_BUFFER_DAMQ_R: { int num_shared_credits = p_credit->m_num_credits; if (m_out_mod_vec[p_credit->m_out_pc][p_credit->m_out_vc].m_num_credit_rsv < g_cfg.router_num_rsv_credit) { int num_rsv_credits = _MIN(g_cfg.router_num_rsv_credit - m_out_mod_vec[p_credit->m_out_pc][p_credit->m_out_vc].m_num_credit_rsv, p_credit->m_num_credits); num_shared_credits = p_credit->m_num_credits - num_rsv_credits; assert(num_shared_credits >= 0); m_out_mod_vec[p_credit->m_out_pc][p_credit->m_out_vc].m_num_credit_rsv += num_rsv_credits; } m_out_mod_vec[p_credit->m_out_pc][p_credit->m_out_vc].m_num_credit += num_shared_credits; } break; default: assert(0); } #ifdef _DEBUG_CREDIT printf("incCredit router=%d deposit_clk=%.0lf clk=%.0lf out_pc=%d out_vc=%d credits=%d\n", id(), p_credit->m_clk_deposit, simtime(), p_credit->m_out_pc, p_credit->m_out_vc, m_out_mod_vec[p_credit->m_out_pc][p_credit->m_out_vc].m_num_credit); #endif num_complete_credit_reqs++; num_deposited_credits += p_credit->m_num_credits; g_CreditPool.reclaim(p_credit); } // delete successfully deposited credits if (num_complete_credit_reqs > 0) m_credit_deposit_vec.erase(m_credit_deposit_vec.begin(), m_credit_deposit_vec.begin()+num_complete_credit_reqs); #ifdef _DEBUG_CREDIT printf("incCredit router=%d clk=%.0lf\n", m_id, simtime()); printf(" num_org_credit_reqs=%d\n", num_org_credit_reqs); printf(" num_complete_credit_reqs=%d\n", num_complete_credit_reqs); printf(" m_credit_deposit_vec.size()=%d\n", m_credit_deposit_vec.size()); printf(" num_deposited_credits=%d\n", num_deposited_credits); #endif assert((num_org_credit_reqs - num_complete_credit_reqs) == ((int) m_credit_deposit_vec.size())); }
vector< Packet* > WorkloadTiledCMPValue::readTrace() { PktTraceValue tr; vector< Packet* > pkt_vec; // read one line trace assert(m_trace_fp > 0); tr.cycle = 0; tr.sz_bytes = 0; igzstream_read(m_trace_fp, (char*) &tr, sizeof(PktTraceValue)); if (tr.cycle==0 || tr.sz_bytes==0 || igzstream_feof(m_trace_fp)) { // close the current file closeTraceFile(); // open the next file if (! openTraceFile() ) { // no trace file ? pkt_vec.push_back(0); return pkt_vec; } fprintf(stderr, "benchmark=%s trace_file_id=%d successfully open.\n", m_benchmark_name.c_str(), m_trace_file_id-1); igzstream_read(m_trace_fp, (char*) &tr, sizeof(PktTraceValue)); // assert(tr->cycle != 0); } // printTrace(cout, tr); // post processing int src_tile_id = (int) tr.src_mach_num; int dest_tile_id = (int) tr.dest_mach_num; g_sim.m_num_instr_executed = 0; int num_flits = (int) ceil( tr.sz_bytes * BITS_IN_BYTE / ( (double) g_cfg.link_width )); #ifdef _DEBUG_TRACE_TILED_CMP_VALUE printf("DEBUG_TILED_CMP_VALUE: src_tile=%d dest_tile=%d bytes=%d #flits=%d cycle=%lld cur_cycle=%.0lf\n", src_tile_id, dest_tile_id, tr.sz_bytes, num_flits, tr.cycle, simtime()); #endif // make one packet Packet* p_pkt = g_PacketPool.alloc(); assert(p_pkt); p_pkt->setID(g_sim.m_next_pkt_id++); p_pkt->m_start_flit_id = g_sim.m_next_flit_id; p_pkt->m_num_flits = num_flits; g_sim.m_next_flit_id += num_flits; p_pkt->setSrcRouterID(src_tile_id); p_pkt->addDestRouterID(dest_tile_id); p_pkt->setSrcCoreID(src_tile_id); p_pkt->addDestCoreID(dest_tile_id); p_pkt->m_clk_gen = (double) tr.cycle; switch(tr.src_mach_type) { case MachineType_L1Cache: p_pkt->m_NI_in_pos = 0; break; case MachineType_L2Cache: p_pkt->m_NI_in_pos = 1; break; case MachineType_Directory: p_pkt->m_NI_in_pos = 2; break; default: assert(0); } switch (tr.dest_mach_type) { case MachineType_L1Cache: p_pkt->m_NI_out_pos = 0; break; case MachineType_L2Cache: p_pkt->m_NI_out_pos = 1; break; case MachineType_Directory: p_pkt->m_NI_out_pos = 2; break; default: assert(0); } // port multiplexing if (g_cfg.NI_port_mux) { p_pkt->m_NI_in_pos = g_NI_in_last_pos; p_pkt->m_NI_out_pos = g_NI_out_last_pos; g_NI_in_last_pos = (g_NI_in_last_pos + 1) % g_cfg.core_num_NIs; g_NI_out_last_pos = (g_NI_out_last_pos + 1) % g_cfg.core_num_NIs; } // packet type if (tr.sz_bytes == (int) CONTROL_MESSAGE_SIZE) { p_pkt->m_packet_type = PACKET_TYPE_UNICAST_SHORT; } else { assert(tr.sz_bytes == (int) DATA_MESSAGE_SIZE); p_pkt->m_packet_type = PACKET_TYPE_UNICAST_LONG; } // set packet data assert(((int) tr.sz_bytes)%8 == 0); int pkt_64bitdata_sz = tr.sz_bytes/8; p_pkt->m_packetData_vec.resize(pkt_64bitdata_sz); if (pkt_64bitdata_sz == 1) { // address only p_pkt->m_packetData_vec[0] = tr.addr_value; #ifdef _DEBUG_TRACE_TILED_CMP_VALUE printf(" addr[flit0]=%016llX\n", tr.addr_value); #endif } else { // address + data block p_pkt->m_packetData_vec[0] = tr.addr_value; #ifdef _DEBUG_TRACE_TILED_CMP_VALUE printf(" addr[flit0]=%016llX\n", tr.addr_value); #endif unsigned int data_value_pos = 0; for (int i=1; i<pkt_64bitdata_sz; i++) { unsigned long long data64bit = 0x0; #ifdef _DEBUG_TRACE_TILED_CMP_VALUE printf(" "); #endif for (int j=0; j<8; j++) { #ifdef _DEBUG_TRACE_TILED_CMP_VALUE printf("%02X ", tr.data_value[data_value_pos]); #endif assert(data_value_pos < MAX_PKT_TRACE_DATA_VALUE_SZ); data64bit |= tr.data_value[data_value_pos]; if (j!=7) data64bit <<= 8; ++data_value_pos; } p_pkt->m_packetData_vec[i] = data64bit; #ifdef _DEBUG_TRACE_TILED_CMP_VALUE printf("data[flit%d]=%016llX\n", i, data64bit); #endif } } // spatial distribution m_spat_pattern_pkt_vec[src_tile_id][dest_tile_id]++; m_spat_pattern_flit_vec[src_tile_id][dest_tile_id] += num_flits; // throughput profile if (g_cfg.profile_perf) { g_sim.m_periodic_inj_pkt++; g_sim.m_periodic_inj_flit += num_flits; } m_num_proc_traces++; pkt_vec.push_back(p_pkt); return pkt_vec; }
// control simulation for warmup and finalize void process_control_sim() { create("sim-control-proc"); g_sim.m_num_CSIM_process++; double num_cycles_skip_trace = 0.0; double num_cycles_warmup_sim = 0.0; double num_cycles_end_sim = 0.0; #if 0 printf("g_cfg.wkld_trace_skip_cycles=%.1lf\n", g_cfg.wkld_trace_skip_cycles); printf("g_cfg.clk_start= %.1lf\n", g_cfg.sim_clk_start); printf("g_cfg.clk_end= %.1lf\n", g_cfg.sim_clk_end); fflush(stdout); #endif num_cycles_skip_trace = g_cfg.wkld_trace_skip_cycles; num_cycles_warmup_sim = (g_cfg.sim_clk_start == 0.0) ? 0.0 : (g_cfg.sim_clk_start - num_cycles_skip_trace); if (num_cycles_warmup_sim < 0.0) { fprintf(stderr, "process_control_sim(): negative num_cycles_warmup_sim=%lf\n", num_cycles_warmup_sim); assert(0); } num_cycles_end_sim = g_cfg.sim_clk_end - num_cycles_skip_trace - num_cycles_warmup_sim; if (num_cycles_end_sim < 0.0) { fprintf(stderr, "process_control_sim(): negative num_cycles_end_sim\n"); fprintf(stderr, " g_cfg.sim_clk_end= %.1lf\n", g_cfg.sim_clk_end); fprintf(stderr, " num_cycles_skip_trace= %.1lf\n", num_cycles_skip_trace); fprintf(stderr, " num_cycles_warmup_sim= %.1lf\n", num_cycles_warmup_sim); fprintf(stderr, " num_cycles_end_sim= %.1lf\n", num_cycles_end_sim); assert(0); } // skip cycles for trace if (num_cycles_skip_trace != 0.0) hold(num_cycles_skip_trace); // printf("SKIP TRACE END: %lf\n", simtime()); const double cycles_check = 4.0; // start warm-up switch (g_cfg.sim_end_cond) { case SIM_END_BY_INJ_PKT: case SIM_END_BY_EJT_PKT: while (g_sim.m_num_pkt_ejt < g_cfg.sim_num_ejt_pkt_4warmup) hold (cycles_check); break; case SIM_END_BY_CYCLE: if (num_cycles_warmup_sim != 0.0) hold(num_cycles_warmup_sim); break; default: assert(0); } // finished warm-up g_sim.m_warmup_phase = false; g_sim.m_clk_warmup_end = simtime(); g_sim.m_num_pkt_inj_warmup = g_sim.m_num_pkt_inj; g_sim.m_num_pkt_ejt_warmup = g_sim.m_num_pkt_ejt; g_sim.m_num_flit_inj_warmup = g_sim.m_num_flit_inj; g_sim.m_num_flit_ejt_warmup = g_sim.m_num_flit_ejt; reset_stats(); fprintf(stderr, "finished warm-up at clk=%.0lf.\n", simtime()); // start simulation switch (g_cfg.sim_end_cond) { case SIM_END_BY_INJ_PKT: while (g_sim.m_num_pkt_ejt <= g_cfg.sim_num_inj_pkt) hold(cycles_check); break; case SIM_END_BY_EJT_PKT: while (g_sim.m_num_pkt_ejt <= g_cfg.sim_num_ejt_pkt) hold(cycles_check); break; case SIM_END_BY_CYCLE: if (num_cycles_end_sim != 0.0) { if (g_cfg.profile_perf || g_cfg.profile_power) num_cycles_end_sim += 0.5; // for the last profile hold(num_cycles_end_sim); } break; } // finished simulation if (!g_EOS) { g_ev_sim_done->set(); g_sim.m_clk_sim_end = simtime(); g_EOS = true; } #ifdef _DEBUG_ROUTER_PROCESS printf("PROCESS COMPLETE: process_control_sim() clk=%.0lf\n", simtime()); #endif }
void Router::stageVA() { if (m_vc_arb->hasNoReq()) return; vector< pair< pair< int, int >, int > > grant_vec = m_vc_arb->grant(); // return value: <in_pc, in_vc>, out_vc> for (unsigned int n=0; n<grant_vec.size(); ++n) { int in_pc = grant_vec[n].first.first; int in_vc = grant_vec[n].first.second; int out_pc = m_in_mod_vec[in_pc][in_vc].m_out_pc; int out_vc = grant_vec[n].second; Flit* peek_flit = m_flitQ->peek(in_pc, in_vc); // add a request to SW arbiter m_sw_arb->add(in_pc, in_vc, out_pc, out_vc); // delete granted request m_vc_arb->del(in_pc, in_vc); #ifdef _DEBUG_ROUTER debugVA(peek_flit, in_pc, in_vc, out_pc, out_vc, false); #endif // pipeline stage latency m_pipe_lat_VA_tab->tabulate((simtime() - peek_flit->m_clk_enter_stage)); peek_flit->m_clk_enter_stage = simtime(); // tunneling: bypass pipeline if (g_cfg.router_tunnel) { FlitHead* p_head_flit = (FlitHead*) m_flitQ->peek(in_pc, in_vc); switch(g_cfg.router_tunnel_type) { case TUNNELING_PER_FLOW: if (m_tunnel_info_vec[in_pc].m_flow != make_pair(p_head_flit->src_router_id(), p_head_flit->dest_router_id())) { m_tunnel_info_vec[in_pc].m_flow = make_pair(p_head_flit->src_router_id(), p_head_flit->dest_router_id()); } break; case TUNNELING_PER_DEST: if (m_tunnel_info_vec[in_pc].m_dest_router_id != p_head_flit->dest_router_id()) { m_tunnel_info_vec[in_pc].m_dest_router_id = p_head_flit->dest_router_id(); } break; case TUNNELING_PER_OUTPORT: break; default: assert(0); } // common properties m_tunnel_info_vec[in_pc].m_in_vc = in_vc; m_tunnel_info_vec[in_pc].m_out_pc = out_pc; m_tunnel_info_vec[in_pc].m_out_vc = out_vc; } } // VA power consumption in Orion - assumption: per output PC organization // record power if (!g_sim.m_warmup_phase) { vector< bitset< max_sz_vc_arb > > vc_req_orion_vec; vector< bitset< max_sz_vc_arb > > vc_grant_orion_vec; vc_req_orion_vec.resize(m_num_pc, 0); vc_grant_orion_vec.resize(m_num_pc, 0); for (unsigned int n=0; n<grant_vec.size(); ++n) { int in_pc = grant_vec[n].first.first; int in_vc = grant_vec[n].first.second; int out_pc = m_in_mod_vec[in_pc][in_vc].m_out_pc; // NOTE: Orion limitation int arb_bit_pos = in_pc*m_num_vc + in_vc; if (arb_bit_pos > max_sz_vc_arb) arb_bit_pos %= max_sz_vc_arb; vc_req_orion_vec[out_pc][arb_bit_pos] = true; vc_grant_orion_vec[out_pc][arb_bit_pos] = true; } for (int out_pc=0; out_pc<m_num_pc; ++out_pc) { vc_req_orion_vec[out_pc] |= m_vc_arb->getReqBitVector(out_pc); if (vc_req_orion_vec[out_pc].any()) { m_power_tmpl->record_vc_arb(out_pc, (unsigned int) vc_req_orion_vec[out_pc].to_ulong() , (unsigned int) vc_grant_orion_vec[out_pc].to_ulong()); if (g_cfg.profile_power) { m_power_tmpl_profile->record_vc_arb(out_pc, (unsigned int) vc_req_orion_vec[out_pc].to_ulong(), (unsigned int) vc_grant_orion_vec[out_pc].to_ulong()); } } } } }
void Router::stageSA() { // make switch arbiter requests for middle/tail flits if head flit completed VA. // FIXME: can we do this better? vector< pair< int, int > > free_in_port_vec = m_sw_arb->getFreeInPorts(); for (unsigned int n=0; n<free_in_port_vec.size(); n++) { int in_pc = free_in_port_vec[n].first; int in_vc = free_in_port_vec[n].second; if (m_flitQ->isEmpty(in_pc, in_vc)) continue; // flit type should be MIDL or TAIL. Flit* peek_flit = m_flitQ->peek(in_pc, in_vc); if (peek_flit->isHead()) continue; // head flit that belongs to the same packet for peek_flit // should complete switch allocation. if (m_in_mod_vec[in_pc][in_vc].m_state != IN_MOD_S) continue; // get output PC/VC int out_pc = m_in_mod_vec[in_pc][in_vc].m_out_pc; int out_vc = m_in_mod_vec[in_pc][in_vc].m_out_vc; // add a request to SW arbiter m_sw_arb->add(in_pc, in_vc, out_pc, out_vc); } // get granted requests vector< pair< pair< int, int >, pair< int, int > > > grant_vec = m_sw_arb->grantRegular(); // move granted requests to xbar for (unsigned int n=0; n<grant_vec.size(); n++) { int in_pc = grant_vec[n].first.first; int in_vc = grant_vec[n].first.second; int out_pc = grant_vec[n].second.first; assert(in_pc != INVALID_PC); assert(in_vc != INVALID_VC); Flit* peek_flit = m_flitQ->peek(in_pc, in_vc); assert(peek_flit); #ifdef _DEBUG_ROUTER int out_vc = grant_vec[n].second.second; debugSA(peek_flit, in_pc, in_vc, out_pc, out_vc, false, false); #endif assert(m_xbar.m_waiting_in_vc_vec[in_pc] == INVALID_VC); m_xbar.m_waiting_in_vc_vec[in_pc] = in_vc; assert(m_xbar.m_outport_free_vec[out_pc] == true); m_xbar.m_outport_free_vec[out_pc] = false; // pipeline stage latency m_pipe_lat_SA_tab->tabulate(simtime() - peek_flit->m_clk_enter_stage); peek_flit->m_clk_enter_stage = simtime(); // change input module status (V->S) if a head flit completes SW allocation. if (peek_flit->isHead()) { assert(m_in_mod_vec[in_pc][in_vc].m_state == IN_MOD_V); m_in_mod_vec[in_pc][in_vc].m_state = IN_MOD_S; } } }
void Router::stageST() { #ifdef _DEBUG_ROUTER_ST int _st_debug_router = 3; double _st_debug_clk = 141000012.0; if (m_id==_st_debug_router && simtime() > _st_debug_clk) { printf("ST_status router=%d, clk=%.0lf\n ", m_id, simtime()); for (int out_pc=0; out_pc<m_num_pc; out_pc++) { if (isEjectChannel(out_pc)) { } else { if (m_link_vec[out_pc].m_w_pipeline.size() == 0) { printf("out_pc=%d(free) ", out_pc); } else { printf("out_pc=%d", out_pc); for (unsigned int i=0; i<m_link_vec[out_pc].m_w_pipeline.size(); i++) printf("(fid=%lld) ", m_link_vec[out_pc].m_w_pipeline[i].first->id()); } } } printf("\n"); } #endif int num_xbar_passes = 0; for (int in_pc=0; in_pc<m_num_pc; in_pc++) { int in_vc = m_xbar.m_waiting_in_vc_vec[in_pc]; if (in_vc == INVALID_VC) continue; Flit* read_flit = m_flitQ->read(in_pc, in_vc); assert(read_flit); #ifdef _DEBUG_CHECK_BUFFER_INTEGRITY assert(checkBufferIntegrity(in_pc, in_vc, read_flit)); #endif // create a credit and send it to upstream router int prev_router_id = m_connPrevRouter_vec[in_pc].first; int prev_out_pc = m_connPrevRouter_vec[in_pc].second; if (prev_router_id != INVALID_ROUTER_ID && prev_out_pc != INVALID_PC) { Credit* p_credit = g_CreditPool.alloc(); p_credit->m_out_pc = prev_out_pc; p_credit->m_out_vc = in_vc; p_credit->m_num_credits = 1; p_credit->m_clk_deposit = simtime() + g_Router_vec[prev_router_id]->getLink(prev_out_pc).m_delay_factor * g_cfg.link_latency; g_Router_vec[prev_router_id]->depositCredit(p_credit); } assert(m_in_mod_vec[in_pc][in_vc].m_state == IN_MOD_S); int out_pc = m_in_mod_vec[in_pc][in_vc].m_out_pc; int out_vc = m_in_mod_vec[in_pc][in_vc].m_out_vc; int next_router_id = m_connNextRouter_vec[out_pc].first; int next_in_pc = m_connNextRouter_vec[out_pc].second; if (isEjectChannel(out_pc)) { // select ejection port int epc = out_pc - num_internal_pc(); // printf("epc=%d out_pc=%d\n", epc, out_pc); NIOutput* p_ni_output = getNIOutput(epc); assert(p_ni_output); // FIXME: The following assert() is not valid for DMesh topology, // because destination is encoded at injection function. // assert(m_id == read_flit->getPkt()->getDestRouterID()); p_ni_output->writeFlit(read_flit); // 03/15/06 fast simulation m_num_flits_inside--; #ifdef _DEBUG_ROUTER debugST(read_flit, in_pc, out_pc, INVALID_ROUTER_ID, INVALID_PC, out_vc, true); #endif } else { // 11/05/05: no stall at ST stage assert(next_router_id != INVALID_PC); assert(next_in_pc != INVALID_PC); #ifdef _DEBUG_CREDIT if(g_Router_vec[next_router_id]->flitQ()->isFull(next_in_pc, out_vc)) { printf("router=%d out_pc=%d next_router=%d next_in_pc=%d\n", m_id, out_pc, next_router_id, next_in_pc); for (int x_vc=0; x_vc<m_num_vc; x_vc++) printf(" vc=%d: credit=%d credit_rsv=%d\n", x_vc, m_out_mod_vec[out_pc][x_vc].m_num_credit, m_out_mod_vec[out_pc][x_vc].m_num_credit_rsv); for (int x_vc=0; x_vc<m_num_vc; x_vc++) printf(" vc=%d: Q_sz=%d\n", x_vc, g_Router_vec[next_router_id]->flitQ()->size(next_in_pc, x_vc)); } #endif // assert(! g_Router_vec[next_router_id]->flitQ()->isFull(next_in_pc, out_vc) ); // move flit to the link // NOTE: For wire pipelining, // # of traversing flits on the link must be less than link latency. assert(((int) m_link_vec[out_pc].m_w_pipeline.size()) < m_link_vec[out_pc].m_delay_factor*g_cfg.link_latency); m_link_vec[out_pc].m_w_pipeline.push_back(make_pair(read_flit, make_pair(out_vc, simtime()))); // pipeline stage latency m_pipe_lat_ST_tab->tabulate(simtime() - read_flit->m_clk_enter_stage); read_flit->m_clk_enter_stage = simtime(); #ifdef _DEBUG_ROUTER debugST(read_flit, in_pc, out_pc, next_router_id, next_in_pc, out_vc, false); #endif } // if (isEjectChannel(out_pc)) { // remove a flit from xbar m_xbar.m_waiting_in_vc_vec[in_pc] = INVALID_VC; // update xbar outport status assert(m_xbar.m_outport_free_vec[out_pc] == false); m_xbar.m_outport_free_vec[out_pc] = true; if (read_flit->isTail()) { // change input module status (S->I) m_in_mod_vec[in_pc][in_vc].m_state = IN_MOD_I; m_in_mod_vec[in_pc][in_vc].m_out_pc = INVALID_PC; m_in_mod_vec[in_pc][in_vc].m_out_vc = INVALID_VC; // change output module status (V->I) assert(m_out_mod_vec[out_pc][out_vc].m_state == OUT_MOD_V); m_out_mod_vec[out_pc][out_vc].m_state = OUT_MOD_I; } num_xbar_passes++; // intra-router flit latency m_flit_lat_router_tab->tabulate(simtime() - read_flit->m_clk_enter_router); // record power if (!g_sim.m_warmup_phase) { m_power_tmpl->record_buffer_read(read_flit, in_pc); m_power_tmpl->record_xbar_trav(read_flit, in_pc, out_pc); if (g_cfg.profile_power) { m_power_tmpl_profile->record_buffer_read(read_flit, in_pc); m_power_tmpl_profile->record_xbar_trav(read_flit, in_pc, out_pc); } } } // record power if (!g_sim.m_warmup_phase) { if (num_xbar_passes > 0) { m_power_tmpl->record_xbar_trav_num(num_xbar_passes); if (g_cfg.profile_power) m_power_tmpl_profile->record_xbar_trav_num(num_xbar_passes); } } }
void Router::stageTN() { for (int in_pc=0; in_pc<m_num_pc; in_pc++) { int tunnel_in_vc = m_tunnel_info_vec[in_pc].m_in_vc; int tunnel_out_pc = m_tunnel_info_vec[in_pc].m_out_pc; int tunnel_out_vc = m_tunnel_info_vec[in_pc].m_out_vc; if (tunnel_in_vc == INVALID_VC) // support tunneling ? continue; if (m_flitQ->isEmpty(in_pc, tunnel_in_vc)) // has a flit ? continue; // peek one flit from the input buffer Flit* p_flit = m_flitQ->peek(in_pc, tunnel_in_vc); assert(p_flit); switch (p_flit->type()) { case HEAD_FLIT: case ATOM_FLIT: { FlitHead* p_head_flit = (FlitHead*) p_flit; switch(g_cfg.router_tunnel_type) { case TUNNELING_PER_FLOW: if (m_tunnel_info_vec[in_pc].m_flow != make_pair(p_head_flit->src_router_id(), p_head_flit->dest_router_id())) goto NO_TUNNEL; break; case TUNNELING_PER_DEST: if (m_tunnel_info_vec[in_pc].m_dest_router_id != p_head_flit->dest_router_id()) goto NO_TUNNEL; break; case TUNNELING_PER_OUTPORT: break; default: assert(0); } // check input module status if (m_in_mod_vec[in_pc][tunnel_in_vc].m_state != IN_MOD_I) goto NO_TUNNEL; // Step 1.1: do routing int out_pc = g_Routing->selectOutPC(this, tunnel_in_vc, (FlitHead*) p_flit); assert(out_pc < (int) m_connNextRouter_vec.size()); int next_router_id = m_connNextRouter_vec[out_pc].first; int next_in_pc = m_connNextRouter_vec[out_pc].second; if (tunnel_out_pc != out_pc) goto NO_TUNNEL; // Step 2.1: reserve VC if (! isEjectChannel(out_pc)) { if (m_out_mod_vec[out_pc][tunnel_out_vc].m_state != OUT_MOD_I ) // reserved ? goto NO_TUNNEL; } // Step 3.1: check no request in SA for designated input and output ports // FIXME: do we need this? // if (! m_sw_alloc->hasNoReq(in_pc, out_pc)) // goto NO_TUNNEL; // 03/14/08 credit-based flow control if (! hasCredit(out_pc, tunnel_out_vc) ) // no credit? goto NO_TUNNEL; // Step 4.1: check no flit in xbar for designated input and output ports if (m_xbar.m_waiting_in_vc_vec[in_pc] != INVALID_VC || ! m_xbar.m_outport_free_vec[out_pc]) goto NO_TUNNEL; // Step 5.1: check no flit in a link if (! isEjectChannel(out_pc)) { if (((int) m_link_vec[out_pc].m_w_pipeline.size()) >= m_link_vec[out_pc].m_delay_factor*g_cfg.link_latency) goto NO_TUNNEL; } // Now checking is done for tunneling. // Step 2.2: reserve VC if (! isEjectChannel(out_pc)) { m_out_mod_vec[out_pc][tunnel_out_vc].m_state = OUT_MOD_V; } // create a credit and send it to upstream router int prev_router_id = m_connPrevRouter_vec[in_pc].first; int prev_out_pc = m_connPrevRouter_vec[in_pc].second; if (prev_router_id != INVALID_ROUTER_ID && prev_out_pc != INVALID_PC) { Credit* p_credit = g_CreditPool.alloc(); p_credit->m_out_pc = prev_out_pc; p_credit->m_out_vc = tunnel_in_vc; p_credit->m_num_credits = 1; p_credit->m_clk_deposit = simtime() + g_Router_vec[prev_router_id]->getLink(prev_out_pc).m_delay_factor * g_cfg.link_latency; g_Router_vec[prev_router_id]->depositCredit(p_credit); } // 11/05/05: no stall at ST stage if (! isEjectChannel(out_pc)) { assert(next_router_id != INVALID_PC); assert(next_in_pc != INVALID_PC); } // pipeline stage latency m_pipe_lat_ST_tab->tabulate(simtime() - p_flit->m_clk_enter_stage); p_flit->m_clk_enter_stage = simtime(); // assert(m_xbar.m_waiting_in_vc_vec[in_pc] == INVALID_VC); // m_xbar.m_waiting_in_vc_vec[in_pc] = tunnel_in_vc; // assert(m_xbar.m_outport_free_vec[out_pc] == true); // m_xbar.m_outport_free_vec[out_pc] = false; // Step 4.2: traverse a crossbar // move flit to the link if (isEjectChannel(out_pc)) { // select ejection port int epc = p_flit->getPkt()->m_NI_out_pos; NIOutput* p_ni_output = getNIOutput(epc); assert(p_ni_output); assert(m_id == p_flit->getPkt()->getDestRouterID()); p_ni_output->writeFlit(p_flit); // 03/15/06 fast simulation m_num_flits_inside--; } else { m_link_vec[out_pc].m_w_pipeline.push_back(make_pair(p_flit, make_pair(tunnel_out_vc, simtime()))); // decrease credit decCredit(out_pc, tunnel_out_vc, 1); } // read a flit m_flitQ->read(in_pc, tunnel_in_vc); // bypass pipeline m_num_tunnel_flit_vec[in_pc][tunnel_in_vc]++; printf("here1\n"); #ifdef _DEBUG_CHECK_BUFFER_INTEGRITY assert(checkBufferIntegrity(in_pc, tunnel_in_vc, p_flit)); #endif // change input module status for only multi-flit packets assert(m_in_mod_vec[in_pc][tunnel_in_vc].m_state == IN_MOD_I); if (p_flit->type() != ATOM_FLIT) { m_in_mod_vec[in_pc][tunnel_in_vc].m_state = IN_MOD_S; m_in_mod_vec[in_pc][tunnel_in_vc].m_out_pc = out_pc; m_in_mod_vec[in_pc][tunnel_in_vc].m_out_vc = tunnel_out_vc; } // printf("TN3 router=%d m_in_mod_vec[%d][%d]: state=%d, out_pc=%d, out_vc=%d\n", id(), in_pc, tunnel_in_vc, m_in_mod_vec[in_pc][tunnel_in_vc].m_state, m_in_mod_vec[in_pc][tunnel_in_vc].m_out_pc, m_in_mod_vec[in_pc][tunnel_in_vc].m_out_vc); #ifdef _DEBUG_ROUTER debugTN(p_flit, in_pc, tunnel_in_vc, out_pc, tunnel_out_vc); #endif } break; case MIDL_FLIT: case TAIL_FLIT: { assert(m_in_mod_vec[in_pc][tunnel_in_vc].m_state == IN_MOD_S); int out_pc = m_in_mod_vec[in_pc][tunnel_in_vc].m_out_pc; int out_vc = m_in_mod_vec[in_pc][tunnel_in_vc].m_out_vc; if (tunnel_out_pc != out_pc) goto NO_TUNNEL; if (tunnel_out_vc != out_vc) // output VC for tunneling is determined at previous packet delivery. goto NO_TUNNEL; // Step 3.1: check no request in SA for designated input and output ports // FIXME: do we need this? // if (! m_sw_alloc->hasNoReq(in_pc, out_pc)) // goto NO_TUNNEL; // check no flit in xbar for designated input and output ports if (m_xbar.m_waiting_in_vc_vec[in_pc] != INVALID_VC || ! m_xbar.m_outport_free_vec[out_pc]) goto NO_TUNNEL; // 03/14/08 credit-based flow control if (! hasCredit(out_pc, out_vc) ) // no credit? goto NO_TUNNEL; // check no flit in a link if (! isEjectChannel(out_pc)) { if (((int) m_link_vec[out_pc].m_w_pipeline.size()) >= m_link_vec[out_pc].m_delay_factor*g_cfg.link_latency) goto NO_TUNNEL; } // Now checking is done for tunneling. // delete switch allocation status if reserved m_sw_arb->del(in_pc, tunnel_in_vc); // create a credit and send it to upstream router int prev_router_id = m_connPrevRouter_vec[in_pc].first; int prev_out_pc = m_connPrevRouter_vec[in_pc].second; if (prev_router_id != INVALID_ROUTER_ID && prev_out_pc != INVALID_PC) { Credit* p_credit = g_CreditPool.alloc(); p_credit->m_out_pc = prev_out_pc; p_credit->m_out_vc = tunnel_in_vc; p_credit->m_num_credits = 1; p_credit->m_clk_deposit = simtime() + g_Router_vec[prev_router_id]->getLink(prev_out_pc).m_delay_factor * g_cfg.link_latency; g_Router_vec[prev_router_id]->depositCredit(p_credit); } // Step 4: traverse a crossbar // move flit to the link if (isEjectChannel(out_pc)) { // select ejection port int epc = p_flit->getPkt()->m_NI_out_pos; NIOutput* p_ni_output = getNIOutput(epc); assert(p_ni_output); assert(m_id == p_flit->getPkt()->getDestRouterID()); p_ni_output->writeFlit(p_flit); // 03/15/06 fast simulation m_num_flits_inside--; } else { m_link_vec[out_pc].m_w_pipeline.push_back(make_pair(p_flit, make_pair(out_vc, simtime()))); // decrease credit decCredit(out_pc, out_vc, 1); } // clear status of input module if (p_flit->isTail()) { // assert(eject_vc_sts[epc][out_vc] == false); // eject_vc_sts[epc][out_vc] = true; m_in_mod_vec[in_pc][tunnel_in_vc].m_state = IN_MOD_I; m_in_mod_vec[in_pc][tunnel_in_vc].m_out_pc = INVALID_PC; m_in_mod_vec[in_pc][tunnel_in_vc].m_out_vc = INVALID_VC; } // read a flit m_flitQ->read(in_pc, tunnel_in_vc); // bypass pipeline m_num_tunnel_flit_vec[in_pc][tunnel_in_vc]++; printf("here2\n"); #ifdef _DEBUG_CHECK_BUFFER_INTEGRITY assert(checkBufferIntegrity(in_pc, tunnel_in_vc, p_flit)); #endif #ifdef _DEBUG_ROUTER debugTN(p_flit, in_pc, tunnel_in_vc, out_pc, out_vc); #endif } break; default: assert(0); } // switch (p_flit->type()) { NO_TUNNEL: ; } // for (int in_pc=0; in_pc<m_num_pc; in_pc++) { return; }
void Router::stageLT() { for (int out_pc=0; out_pc<m_num_pc; out_pc++) { if (m_link_vec[out_pc].m_w_pipeline.size() == 0) // no flits to traverse a link? continue; Link & link = m_link_vec[out_pc]; pair< Flit*, pair< int, double > > & front_flit = link.m_w_pipeline.front(); Flit* p_flit = front_flit.first; int next_in_vc = front_flit.second.first; double store_clk = front_flit.second.second; assert(next_in_vc != INVALID_VC); // 03/06/06: support for multi-cycle link int link_lat = link.m_delay_factor * g_cfg.link_latency; if (link_lat > 1) { int link_traverse_time = (int) (simtime() - store_clk); if (link_traverse_time < link_lat) { continue; } } #ifdef LINK_DVS // When link frequency is changed, link prevents flit traversal // for g_cfg.link_dvs_freq_transit_delay time. if (simtime() >= link.dvs_freq_set_clk && simtime() < link.dvs_freq_set_clk + g_cfg.link_dvs_freq_transit_delay) { // printf("router-%d link-%d at clk=%.0lf stall\n", m_id, out_pc, simtime()); continue; } double DVS_lat = g_cfg.chip_freq / link.dvs_freq; assert(DVS_lat >= 1.0); /* if (m_id == 5 && out_pc == 0 && simtime() > 21000001.0) { printf("router=%d out_pc=%d DVS_lat=%.1lf utilz=%lg freq=%lg voltage=%lg\n", m_id, out_pc, DVS_lat, link.link_expected_utilz, link.dvs_freq, link.dvs_voltage); } */ // FIXME: support for multi-cycle link double DVS_ready_clk; if (link.last_sent_clk > link.store_clk) { DVS_ready_clk = link.last_sent_clk; } else { DVS_ready_clk = link.store_clk; } DVS_ready_clk += DVS_lat; if (DVS_ready_clk > simtime()) continue; #endif // get router ID and input PC of the downstream router for out_pc int next_router_id = m_connNextRouter_vec[out_pc].first; int next_in_pc = m_connNextRouter_vec[out_pc].second; assert(next_router_id != INVALID_ROUTER_ID); assert(next_in_pc != INVALID_PC); Router* p_next_router = g_Router_vec[next_router_id]; FlitQ* p_next_flitQ = p_next_router->flitQ(); assert(! p_next_flitQ->isFull(next_in_pc, next_in_vc) ); // not full in downstream router's buffer // pop flit from link link.m_w_pipeline.pop_front(); // write flit to the downstream router's buffer p_next_flitQ->write(next_in_pc, next_in_vc, p_flit); p_next_router->m_num_flit_inj_from_router++; if (p_flit->isHead()) { // per-packet accounting p_next_router->m_num_pkt_inj_from_router++; p_flit->getPkt()->m_wire_delay += link_lat; // wire delay (T_w) for this packet } // intra-router flit latency p_flit->m_clk_enter_router = simtime(); // pipeline stage latency m_pipe_lat_LT_tab->tabulate(simtime() - p_flit->m_clk_enter_stage); p_flit->m_clk_enter_stage = simtime(); // 03/15/06 fast simulation m_num_flits_inside--; if (p_next_router->hasNoFlitsInside()) { p_next_router->wakeup(); // printf("WAKEUP r_%d process at clk=%.0lf\n", p_next_router->id, simtime()); } p_next_router->incFlitsInside(); // record power if (!g_sim.m_warmup_phase) { m_power_tmpl->record_link_trav(p_flit, out_pc); p_next_router->m_power_tmpl->record_buffer_write(p_flit, next_in_pc); if (g_cfg.profile_power) { m_power_tmpl_profile->record_link_trav(p_flit, out_pc); p_next_router->m_power_tmpl_profile->record_buffer_write(p_flit, next_in_pc); } } #ifdef _DEBUG_ROUTER debugLT(p_flit, out_pc); p_next_router->debugIB(p_flit, next_in_pc, next_in_vc); #endif #ifdef LINK_DVS if (!g_sim.m_warmup_phase) m_sim_pc_dvs_link_op_vec[out_pc]++; if (link.m_store_clk > link.m_last_sent_clk) link.m_last_sent_clk = link.m_store_clk; link.m_last_sent_clk += DVS_lat; #endif } }