Exemplo n.º 1
0
void Router::stageSSA()
{
    // speculative-SW allocator does grant.
    vector< pair< pair< int, int >, pair< int, int > > > grant_vec = m_sw_arb->grantSpec();

    // move granted requests to xbar
    for (unsigned int n=0; n<grant_vec.size(); n++) {
        int in_pc = grant_vec[n].first.first;
        int in_vc = grant_vec[n].first.second;
        int out_pc = grant_vec[n].second.first;
        int out_vc = grant_vec[n].second.second;
        assert(in_pc != INVALID_PC);
        assert(in_vc != INVALID_VC);
        Flit* peek_flit = m_flitQ->peek(in_pc, in_vc);
        assert(peek_flit->isHead());

#ifdef _DEBUG_ROUTER
        debugSA(peek_flit, in_pc, in_vc, out_pc, out_vc, true, false);
#endif

        assert(m_xbar.m_waiting_in_vc_vec[in_pc] == INVALID_VC);
        m_xbar.m_waiting_in_vc_vec[in_pc] = in_vc;

        assert(m_xbar.m_outport_free_vec[out_pc] == true);
        m_xbar.m_outport_free_vec[out_pc] = false;

        // pipeline stage latency
        m_pipe_lat_SSA_tab->tabulate(simtime() - peek_flit->m_clk_enter_stage);
        peek_flit->m_clk_enter_stage = simtime();

        // change input module status: a header flit completes SA.
        assert(m_in_mod_vec[in_pc][in_vc].m_state == IN_MOD_V);
        m_in_mod_vec[in_pc][in_vc].m_state = IN_MOD_S;	// V->S
    }
}
Exemplo n.º 2
0
double WorkloadSynthetic::temporalSelfSimilar(int src_core_id)
{
    double off_time = 0.0;

    if (m_SS_OnMode_vec[src_core_id]) {
        if (simtime() > m_SS_last_OnTimeStamp_vec[src_core_id]) {
            double x = m_stream_temporal_vec[src_core_id]->uniform(0.0, 1.0);
            double off_time = pow((1.0 - x), -1.0/1.25);
// printf("src_core_id=%d off_time=%lf\n", src_core_id, off_time);
            if (off_time < 1.0)
                off_time = 1.0;

            hold(off_time);
            m_SS_OnMode_vec[src_core_id] = false;
        }
    } else {
        double x = m_stream_temporal_vec[src_core_id]->uniform(0.0, 1.0);
        double on_time = pow((1.0 - x), (-1.0)/1.9);
// printf("src_core_id=%d on_time=%lf\n", src_core_id, on_time);
        m_SS_last_OnTimeStamp_vec[src_core_id] = simtime() + on_time;
        m_SS_OnMode_vec[src_core_id] = true;
    }

    return (off_time + temporalPoisson(src_core_id));
}
Exemplo n.º 3
0
void process_parse_trace()
{
    char proc_name[MAX_PROCESS_NAME_STR_LEN];
    sprintf(proc_name, "trace proc");
    create(proc_name);
    g_sim.m_num_CSIM_process++;

    // stream net_stream;

    WorkloadTrace* wkldTrace = (WorkloadTrace*) g_Workload;

    ((WorkloadTrace*) g_Workload)->skipTraceFile();
    hold(g_cfg.wkld_trace_skip_cycles);
    fprintf(stderr, "skipped %.0lf cycles (trace_file_id=%d).\n", g_cfg.wkld_trace_skip_cycles, wkldTrace->trace_file_id());

    double last_pkt_inject_clk = simtime();

    while (!g_EOS) {
        vector< Packet* > pkt_vec = wkldTrace->readTrace();

        if (pkt_vec.size() == 0)	
            continue;

        if (pkt_vec.size() == 1 && pkt_vec[0] == 0) // no more trace?
            break;

        if (pkt_vec[0]->m_clk_gen > last_pkt_inject_clk) {
            double hold_tm = pkt_vec[0]->m_clk_gen - last_pkt_inject_clk;
            if (hold_tm > 0.0)
                hold(hold_tm);
            last_pkt_inject_clk = simtime();
        }

        for (unsigned int n=0; n<pkt_vec.size(); n++) {
            Packet* p_pkt = pkt_vec[n];
#ifdef _DEBUG_ROUTER
printf("clk=%0.lf GEN p=%lld C:%d->%d R:%d/%d->%d/%d #flits=%d gen_clk=%.0lf\n", simtime(), p_pkt->id(), p_pkt->getSrcCoreID(), p_pkt->getDestCoreID(), p_pkt->getSrcRouterID(), p_pkt->m_NI_in_pos, p_pkt->getDestRouterID(), p_pkt->m_NI_out_pos, p_pkt->m_num_flits, p_pkt->m_clk_gen);
#endif

            // choose one network if multiple networks exist.
            if (g_cfg.net_networks > 1)
                select_network(p_pkt);

            assert(p_pkt->m_NI_in_pos < g_Core_vec[p_pkt->getSrcCoreID()]->num_NIInput());
            g_Core_vec[p_pkt->getSrcCoreID()]->forwardPkt2NI(p_pkt->m_NI_in_pos, p_pkt);
            g_sim.m_num_pkt_inj++;
        }
    }

    // After the last trace is processed, terminate simulation.
    if (!g_EOS) {
        g_EOS = true;
        g_ev_sim_done->set();
        g_sim.m_clk_sim_end = simtime();
    }

#ifdef _DEBUG_ROUTER_PROCESS
    printf("PROCESS COMPLETE: process_parse_trace\n");
#endif
}
Exemplo n.º 4
0
    void simtimeopt(bkzfloat& t_lll,bkzfloat& t_enum,int& usebeta,int dim,int sbeta,int ebeta,bkzfloat const_lll,bkzfloat const_enum) {
        if (sbeta==ebeta) {
            t_lll = 0;
            t_enum = 0;
            return;
        }
        
        //find the optimal beta to minimize total cost from BKZ-sbeta to BKZ-ebeta
        //Time is estimated by const_lll*t_lll + const_enum*t_enum
        bkzfloat cost,mincost;
        bkzfloat prevcost;
        int minbeta;
        mincost = -1;
        
        cout << "Simulating BKZ Time: " << sbeta << "->" << ebeta << "                  \r";
        cout.flush();
        double loop;
        for (usebeta = ebeta+1;usebeta<dim;usebeta++) {
            simtime(t_lll,t_enum,loop,dim,sbeta,ebeta,usebeta);
            cost = t_lll * const_lll + t_enum * const_enum;
            if ((mincost<0) || (cost < mincost)) {
                mincost = cost;
                minbeta = usebeta;
            }

            if (usebeta >= minbeta+3) {
                break;
            }
        }
        simtime(t_lll,t_enum,loop,dim,sbeta,ebeta,minbeta);
        usebeta = minbeta;
        cout << "Simulating BKZ Time: " << sbeta << "->" << ebeta << "  ...finished \r";
        cout.flush();
    }
Exemplo n.º 5
0
void NIOutputDecompr::controlCompression(Packet* p_pkt, const int decode_lat)
{
    int src_router_id = p_pkt->getSrcRouterID();

    // network contention delay for packet (not including contention at NI)
    int hop_count = g_Topology->getMinHopCount(src_router_id, m_attached_router->id());
    const int per_hop_delay = 3;
    int pkt_0_load_delay = (hop_count*per_hop_delay - 1) + (p_pkt->m_num_flits - 1);
    int net_cont_delay = ((int) (simtime() - p_pkt->m_clk_enter_net)) - pkt_0_load_delay;
    assert(net_cont_delay >= 0);

#ifdef _DEBUG_CAM_DYN_CONTROL
    int pkt_delay = (int) (simtime() - p_pkt->m_clk_gen);
printf("NIOutDecompr: clk=%0.lf NI-%d %-2d->%-2d pid=%lld cont=%d (mean=%.1lf cnt=%ld) pkt=%d zero=%d\n", simtime(), m_NI_id, p_pkt->getSrcRouterID(), p_pkt->getDestRouterID(), p_pkt->id(),
net_cont_delay, 
m_cont_delay_src_tab_vec[src_router_id]->mean(),
m_cont_delay_src_tab_vec[src_router_id]->cnt(),
pkt_delay, pkt_0_load_delay);
#endif

    // accounting
    m_cont_delay_src_tab_vec[src_router_id]->tabulate((double) net_cont_delay);

    // control actions
    if (m_cont_delay_src_tab_vec[src_router_id]->mean() > 2.0) {
        // enable
        Router* srcRouter = g_Router_vec[src_router_id];
        for (unsigned int ni=0; ni<srcRouter->getNIInputVec().size(); ni++) {
            NIInputCompr* p_NI_comp = (dynamic_cast<NIInputCompr*> (srcRouter->getNIInputVec()[ni]));

            if (! p_NI_comp->CAMsts(m_attached_router->id())) {	// disable ?
                p_NI_comp->enableCAM(m_attached_router->id(), 0);

                if (ni==0) {
                    g_sim.m_num_pkt_spurious++;
                    g_CamManager->m_pkt_dyn_control++;
                }
            }
        }
    } else {
        // disable
        Router* srcRouter = g_Router_vec[src_router_id];
        for (unsigned int ni=0; ni<srcRouter->getNIInputVec().size(); ni++) {
            NIInputCompr* p_NI_comp = (dynamic_cast<NIInputCompr*> (srcRouter->getNIInputVec()[ni]));

            if (p_NI_comp->CAMsts(m_attached_router->id())) {
                p_NI_comp->disableCAM(m_attached_router->id(), 0);

                if (ni==0) {
                    g_sim.m_num_pkt_spurious++;
                    g_CamManager->m_pkt_dyn_control++;
                }
            }
        }
    }
}
Exemplo n.º 6
0
void process_router(Router* p_router)
{
    char proc_name[MAX_PROCESS_NAME_STR_LEN];

    sprintf(proc_name, "r_%d proc", p_router->id());
    create(proc_name);
    g_sim.m_num_CSIM_process++;

    while (1) {
#ifdef _DEBUG_ROUTER_SNAPSHOT
        if (simtime() >= _DEBUG_ROUTER_SNAPSHOT_CLK)
            take_network_snapshot(stdout);
#endif

        // 03/15/06 fast simulation
        if (p_router->hasNoFlitsInside() && p_router->hasNoCreditDepositsInside()) {
            p_router->sleep();
        }

        p_router->router_sim();
        hold(ONE_CYCLE);
    }

#ifdef _DEBUG_ROUTER_PROCESS
    printf("PROCESS COMPLETE: process_router(router=%d)\n", p_router->id());
#endif
}
Exemplo n.º 7
0
////////////////////////////////////////////////////////////////////////
// print simulation progress
void print_sim_progress()
{
    char buf[256];

    // format:
    //   clock,
    //   packe:   #injected pkts(I), #ejected pkts(E), #injected pkts - #ejected pkts(D),
    //            #in-transit pkts(N)
    //   flit:    #in-transit flits(N),
    //   trace:   #injected traces (if workload uses traces)
    //   latency: avg pkt latency(l), avg queuing latency(q), avg contention latency(c)

    sprintf(buf, "clk=%.0lf\tp:I=%lld E=%lld D=%lld N=%lld f:N=%lld",
            simtime(),
            g_sim.m_num_pkt_inj, g_sim.m_num_pkt_ejt, g_sim.m_num_pkt_inj-g_sim.m_num_pkt_ejt,
            g_sim.m_num_pkt_in_network, g_sim.m_num_flit_in_network);

    if (! g_Workload->isSynthetic()) {
        sprintf(buf+strlen(buf), " t:%lld",
                ((WorkloadTrace*) g_Workload)->getProcessedTraceCount());
    }

    sprintf(buf+strlen(buf), "\tl=%.2lf q=%.1lf c=%.1lf\n",
            g_sim.m_pkt_T_t_tab->mean(),
            g_sim.m_pkt_T_q_tab->mean(),
            g_sim.m_pkt_T_t_tab->mean() - g_sim.m_pkt_T_h_tab->mean() - g_sim.m_pkt_T_w_tab->mean() - g_sim.m_pkt_T_s_tab->mean() + 1.0); // FIXME 1.0+ cycle

    fprintf(stderr, "%s", buf);
    fflush(stderr);
}
Exemplo n.º 8
0
Arquivo: a1c.c Projeto: llevar/uoft
void sim(){
	
	create("sim");
	
	/* Initialize simulation. */
	init();
	
	/* For the duration of the simulation generate customers with 
	   exponential inter-arrival times with mean IATM. */
	while(simtime() < 1000){
		hold(exponential(IATM));
		cust();
		
	}
	wait(done);
	
	/* Print reports. */
	printf("Server 1, expected average delay in queue of a customer: %f\n",
	table_mean(box_time_table(queue_box)));
	printf("Server 1, expected time-average number of customers in queue: %f\n",
	qtable_mean(box_number_qtable(queue_box)));
	printf("Server 1, expected utilization: %f\n\n",
	qtable_mean(box_number_qtable(service_box)));
	
	printf("Server 2, expected average delay in queue of a customer: %f\n",
	table_mean(box_time_table(queue_box2)));
	printf("Server 2, expected time-average number of customers in queue: %f\n",
	qtable_mean(box_number_qtable(queue_box2)));
	printf("Server 2, expected utilization: %f\n\n",
	qtable_mean(box_number_qtable(service_box2)));

}
/**
 * \brief Activate the guider port outputs
 */
void	SimGuidePort::activate(float raplus, float raminus,
		float decplus, float decminus) {
	debug(LOG_DEBUG, DEBUG_LOG, 0, "activate(raplus = %.3f, raminus = %.3f,"
		" decplus = %.3f, decminus = %.3f)",
		raplus, raminus, decplus, decminus);
	if ((raplus < 0) || (raminus < 0) || (decminus < 0) || (decplus < 0)) {
		throw BadParameter("activation times must be nonegative");
	}

	// update the offset
	update();
	
	// perform this new activation
	lastactivation = simtime();
	if (raplus > 0) {
		ra = raplus;
	} else {
		ra = -raminus;
	}
	if (decplus > 0) {
		dec = decplus;
	} else {
		dec = -decminus;
	}
	debug(LOG_DEBUG, DEBUG_LOG, 0, "new activations: ra = %f, dec = %f",
		ra, dec);
}
void	SimFocuser::set(unsigned short value) {
	current();
	if (value == target) {
		return;
	}
	lastset = simtime();
	target = value;
}
Exemplo n.º 11
0
void process_link_dvs_set()
{
    char proc_name[MAX_PROCESS_NAME_STR_LEN];
    double last_profile_clk = 0.0;

    sprintf(proc_name, "link-dvs proc");
    create(proc_name);
    g_sim.m_num_CSIM_process++;

    switch (g_cfg.link_dvs_method) {
    case LINK_DVS_NODVS:
        return;
    case LINK_DVS_HISTORY:
        break;
    case LINK_DVS_FLIT_RATE_PREDICT:
        g_LinkDVSer.open_flit_rate_predict_file();
        g_LinkDVSer.skip_flit_rate_predict_file((int) (g_cfg.wkld_trace_skip_cycles/UNIT_MEGA) + 1);
        break;
    }

    double hold_time = g_cfg.sim_clk_start + g_cfg.link_dvs_interval/2.0;
    assert(hold_time > 0.0);
    hold(hold_time);

    while (!g_EOS) {
        switch (g_cfg.link_dvs_method) {
        case LINK_DVS_HISTORY:
            g_LinkDVSer.link_dvs_select_vf();
            break;
        case LINK_DVS_FLIT_RATE_PREDICT:
            g_LinkDVSer.read_flit_rate_predict_interval();
            g_LinkDVSer.estimate_link_utilz_from_flit_rate_predict();
            g_LinkDVSer.link_dvs_select_vf_predicted_flit_rate();

#ifdef LINK_DVS_DEBUG
            printf("read (%.0lf~%.0lf) rate info for DVS at clk=%.0lf\n",
            (g_LinkDVSer.predict_line_num()-1)*g_cfg.link_dvs_interval,
            (g_LinkDVSer.predict_line_num())*g_cfg.link_dvs_interval,
            simtime());
#endif

            break;
        default:
            assert(0);
        }

        hold(g_cfg.link_dvs_interval);
    }

    switch (g_cfg.link_dvs_method) {
    case LINK_DVS_FLIT_RATE_PREDICT:
        g_LinkDVSer.close_flit_rate_predict_file();
        break;
    }
}
/**
 * \brief Update the offset to the current time
 *
 * The update method rolls the position changes forward. Each time it is
 * called, it compues the offset that guider port activations may have 
 * cased since the last activation, and applies them to the offset.
 * It then computes the remaining activation that has not been applied
 * yet.
 *
 * The corrections applied by the update method amount to one pixel per
 * second. The CcdInfo publishes a pixel size of 10um, which means that
 * 10um corresponds to 15 arc seconds. 
 */
void	SimGuidePort::update() {
	// if this is the first 
	if ((ra == 0) && (dec == 0)) {
		debug(LOG_DEBUG, DEBUG_LOG, 0, "no update");
		return;
	}

	debug(LOG_DEBUG, DEBUG_LOG, 0, "update: current offset: %s",
		_offset.toString().c_str());

	// advance the offset according to last activation
	double	now = simtime();

	// activetime is the time since the last activation call. Since only
	// part of the activations may have been executed, we compute that
	// part, and subtract it from current ra/dec values
	double	activetime = now - lastactivation;

	// update the ra variable. This depends on the time since the last
	// call to update
	double	rachange = 0;
	if (fabs(ra) < activetime) {
		// there was enough time to execute the complete activation
		rachange = ra;
	} else {
		// the activation could only partially be executed, so we
		// have to compute this partial activation
		rachange = sign(ra) * activetime;
	}
	debug(LOG_DEBUG, DEBUG_LOG, 0, "update: advance RA by %f", rachange);
	ra -= rachange;
	_offset = _offset + rachange * pixelspeed * _ravector;

	// update the dec variable, again this depends on the time since the
	// last call to update
	double	decchange = 0;
	if (fabs(dec) < activetime) {
		// last call was a long time ago, full activation can be
		// executed
		decchange = dec;
	} else {
		// not enough time to execute activation
		decchange = sign(dec) * activetime;
	}
	debug(LOG_DEBUG, DEBUG_LOG, 0, "update: advance DEC by %f", decchange);
	dec -= decchange;
	_offset = _offset + decchange * pixelspeed * _decvector;

	debug(LOG_DEBUG, DEBUG_LOG, 0, "update: new offset: %s",
		_offset.toString().c_str());

	// we must now remember that the activation time has changed
	lastactivation = now;
}
Exemplo n.º 13
0
void CAMDataDePrivate::printStats(ostream& out) const
{

    out << "CAMDataDePrivate:"
        << " id=" << m_decoder_id
        << " entries=" << m_num_sets << endl;
    out << "total accesses: " << m_num_access << endl;
    out << "access rate (access/cycle): " << ((double) m_num_access) / simtime() << endl;
    out << endl;
    out << endl;
}
Exemplo n.º 14
0
void process_link_dvs_link_slowdown()
{
    char proc_name[MAX_PROCESS_NAME_STR_LEN];
    double last_profile_clk = 0.0;

    sprintf(proc_name, "link-dvs slowdown proc");
    create(proc_name);
    g_sim.m_num_CSIM_process++;

    if (g_cfg.link_dvs_method == LINK_DVS_NODVS)
        return;

    double hold_time = g_cfg.sim_clk_start
                     + g_cfg.link_dvs_interval
                     - g_cfg.link_dvs_voltage_transit_delay 
                     - g_cfg.link_dvs_freq_transit_delay;
    assert(hold_time > 0.0);
    hold(hold_time);

    while (!g_EOS) {
        // frequency first, voltage second

        hold(g_cfg.link_dvs_freq_transit_delay);
        g_LinkDVSer.link_dvs_update_freq_slowdown();
#ifdef LINK_DVS_DEBUG
        printf("link freq slowdown clk=%.0lf\n", simtime());
#endif

        hold(g_cfg.link_dvs_voltage_transit_delay);
        g_LinkDVSer.link_dvs_update_voltage_slowdown();
#ifdef LINK_DVS_DEBUG
        printf("link voltage slowdown clk=%.0lf\n", simtime());
#endif

        hold(g_cfg.link_dvs_interval - g_cfg.link_dvs_voltage_transit_delay - g_cfg.link_dvs_freq_transit_delay);
    }
}
unsigned short	SimFocuser::current() {
	if (0 == lastset) {
		return _value;
	}
	double	now = simtime();
	double	timepast = now - lastset;
	double	delta = (double)_value - (double)target;
//debug(LOG_DEBUG, DEBUG_LOG, 0, "delta: %f, timepast: %f", delta, timepast);
	if (fabs(delta / 1000.) > timepast) {
		_value -= timepast * delta;
		lastset = now;
	} else {
		lastset = 0;
		_value = target;
	}
	return _value;
}
/**
 * \brief Create a simulated GuidePort
 *
 * The default settings of the guider port have a coordinate system rotated
 * by 30 degrees with respect to the ccd axes. Also the vector in the right
 * ascension direction is shorter, approximately as if declination was
 * 45 degrees.
 */
SimGuidePort::SimGuidePort(SimLocator& locator)
	: GuidePort("guideport:simulator/guideport"), _locator(locator) {
	starttime = simtime();
	debug(LOG_DEBUG, DEBUG_LOG, 0, "SimGuidePort created at %f",
		starttime);
	_omega = 0;
	// the initial mount axis directions are not parallel to the coordinate
	// axes of the image
	_ravector = sqrt(0.5) * Point(sqrt(3) / 2, 0.5);
	_decvector = Point(-0.5, sqrt(3) / 2);
	ra = 0;
	dec = 0;
	// compute the speed at which a star image would move over the
	// CCD at standard guide rate. We assume a focal length of 0.6m
	//              15"/sec   * radians/degree/  radians per pixel
	pixelspeed = ((15. / 3600.) * (M_PI / 180.)) / (0.000010 / 0.6);
	debug(LOG_DEBUG, DEBUG_LOG, 0, "pixelspeed = %f", pixelspeed);
}
/**
 * \brief Retrieve the current offset
 */
Point	SimGuidePort::offset() {
	double	timepast = simtime() - starttime;

	// drift computation
	Point	p = timepast * _drift;

	// Fourier components
	if (timepast > 360) {
		double	angle = 0.01 * timepast;
		Point	fourier = 5. * Point(sin(angle), cos(angle));
		p = p + fourier;
	}

	// return the point
	debug(LOG_DEBUG, DEBUG_LOG, 0, "complete offset: %s",
		(_offset + p).toString().c_str());
	return _offset + p;
}
Exemplo n.º 18
0
void Router::stageRC()
{
    for (int in_pc=0; in_pc<m_num_pc; in_pc++)
        for (int in_vc=0; in_vc<m_num_vc; in_vc++) {

#ifdef _DEBUG_ROUTER_RC
            int _rc_debug_router_id = 20;
            double _rc_debug_clk = 0.0;
            int _rc_in_pc = 4;
            int _rc_in_vc = 0;

            if (m_id == _rc_debug_router_id && simtime() > _rc_debug_clk && in_pc == _rc_in_pc && in_vc == _rc_in_vc ) {
                printf("buf_Status: clk=%.0lf router=%d pc=%d vc=%d sz=%d\n", simtime(), m_id, in_pc, in_vc, m_flitQ->size(in_pc, in_vc));
                if (! m_flitQ->isEmpty(in_pc, in_vc))  {
                    printf("  ");
                    m_flitQ->print(stdout, in_pc, in_vc);
                }
            }
#endif

            if (m_in_mod_vec[in_pc][in_vc].m_state != IN_MOD_I)
                continue;

            if (m_flitQ->isEmpty(in_pc, in_vc)) // has a flit ?
                continue;

            // peek one flit in the input buffer
            Flit* peek_flit = m_flitQ->peek(in_pc, in_vc);
            assert(peek_flit);

            // flit type must be HEAD.
            if (! peek_flit->isHead())
                continue;

            // CSIM process synchronization problem:
            //   The order of process creation may pre-determine the priority of processes.
            //   If process for router X is created earlier than process for router Y
            //   (i.e. X has higher priority than Y) in sim_process.C,
            //   router Y does not wait for one cycle when router X sends a flit to router Y.
            if (peek_flit->m_clk_enter_router == simtime())
                continue;

            // do routing (decide out_pc)
            int out_pc = g_Routing->selectOutPC(this, in_vc, (FlitHead*) peek_flit);
            assert(out_pc < (int) m_connNextRouter_vec.size());
            int next_router_id = m_connNextRouter_vec[out_pc].first;
            int next_in_pc = m_connNextRouter_vec[out_pc].second;

            assert(isEjectChannel(out_pc) || (! isEjectChannel(out_pc) && next_router_id != INVALID_ROUTER_ID));
            assert(isEjectChannel(out_pc) || (! isEjectChannel(out_pc) && next_in_pc != INVALID_PC));

            // change input module status
            assert(m_in_mod_vec[in_pc][in_vc].m_state == IN_MOD_I);
            m_in_mod_vec[in_pc][in_vc].m_state = IN_MOD_R; // I->R
            m_in_mod_vec[in_pc][in_vc].m_out_pc = out_pc;

            // make VC arbiter request for this packet
            m_vc_arb->add(in_pc, in_vc, out_pc);

#ifdef _DEBUG_ROUTER
            debugRC(peek_flit, next_router_id, in_pc, in_vc, out_pc, next_in_pc);
#endif

            // pipeline stage latency
            m_pipe_lat_RC_tab->tabulate(simtime() - peek_flit->m_clk_enter_stage);
            peek_flit->m_clk_enter_stage = simtime();

            // increase hop count for this packet
            peek_flit->getPkt()->m_hops++;
        }
}
double	SimGuidePort::alpha() {
	return (simtime() - starttime) * _omega;
}
Exemplo n.º 20
0
bool Router::hasCredit(int out_pc, int out_vc, int num_credits)
{
    switch (g_cfg.router_buffer_type) {
    case ROUTER_BUFFER_SAMQ:
        return (m_out_mod_vec[out_pc][out_vc].m_num_credit >= num_credits) ? true: false;
    case ROUTER_BUFFER_DAMQ_P:
    {

        int num_out_vc = g_Router_vec[m_connNextRouter_vec[out_pc].first]->num_vc();
        int num_shared_credits = 0;	// total shared credits for out_pc
        int num_total_credits = 0;
        for (int vc=0; vc<num_out_vc; vc++) {
            num_shared_credits += m_out_mod_vec[out_pc][vc].m_num_credit;
            num_total_credits += m_out_mod_vec[out_pc][vc].m_num_credit + m_out_mod_vec[out_pc][vc].m_num_credit_rsv;
        }
        assert(num_shared_credits >= 0);

#ifdef _DEBUG_CREDIT
        printf("hasCredit DAMQ_P router=%d clk=%.0lf out_pc=%d out_vc=%d num_credits=%d\n", id(), simtime(), out_pc, out_vc, num_credits);
        for (int vc=0; vc<num_out_vc; vc++)
            printf("  VC=%d credits=%d %d\n", vc, m_out_mod_vec[out_pc][vc].m_num_credit, m_out_mod_vec[out_pc][vc].m_num_credit_rsv);
        printf("  num_shared_credits=%d num_total_credits=%d\n", num_shared_credits, num_total_credits);
#endif

        if (num_shared_credits >= num_credits) { // has enough shared credits?
            return true;
        } else {
            // check reserved credits
            return (m_out_mod_vec[out_pc][out_vc].m_num_credit_rsv >= num_credits) ? true: false;
        }
    }
    break;
    case ROUTER_BUFFER_DAMQ_R:
    {
        int num_shared_credits = 0;
        int num_total_credits = 0;
        for (int pc=0; pc<num_internal_pc(); pc++) {
            int next_router_id = m_connNextRouter_vec[pc].first;
            int num_out_vc = next_router_id == INVALID_ROUTER_ID ? 0 : g_Router_vec[next_router_id]->num_vc();
            for (int vc=0; vc<num_out_vc; vc++) {
                num_shared_credits += m_out_mod_vec[pc][vc].m_num_credit;
                num_total_credits += m_out_mod_vec[pc][vc].m_num_credit + m_out_mod_vec[pc][vc].m_num_credit_rsv;
            }
        }
        assert(num_shared_credits >= 0);

#ifdef _DEBUG_CREDIT
        printf("hasCredit DAMQ_R router=%d clk=%.0lf out_pc=%d out_vc=%d num_credits=%d\n", id(), simtime(), out_pc, out_vc, num_credits);
        for (int pc=0; pc<num_internal_pc(); pc++) {
            int next_router_id = m_connNextRouter_vec[pc].first;
            int num_out_vc = next_router_id == INVALID_ROUTER_ID ? 0 : g_Router_vec[next_router_id]->num_vc();
            for (int vc=0; vc<num_out_vc; vc++)
                printf("  PC=%d VC=%d credits=%d %d\n", pc, vc, m_out_mod_vec[pc][vc].m_num_credit, m_out_mod_vec[pc][vc].m_num_credit_rsv);
        }
        printf("  num_shared_credits=%d num_total_credits=%d\n", num_shared_credits, num_total_credits);
#endif

        if (num_shared_credits >= num_credits) { // has enough shared credits?
            return true;
        } else {
            // check reserved credits
            return (m_out_mod_vec[out_pc][out_vc].m_num_credit_rsv >= num_credits) ? true: false;
        }
    }
    break;
    default:
        ;
    }

    assert(0);	// never reached
    return false;
}
Exemplo n.º 21
0
void process_main()
{
    create("start_proc");
    g_sim.m_num_CSIM_process++;

    fprintf(stderr, "started simulation.\n"); 

    // simulation progress verbose
    if (g_cfg.sim_show_progress)
        process_sim_progress();

    // router
    for (unsigned int i=0; i<g_Router_vec.size(); i++) {
        process_router(g_Router_vec[i]);
    }

    // input/output NI
    for (unsigned int n=0; n<g_NIInput_vec.size(); n++) {
        switch (g_cfg.NIin_type) {
        case NI_INPUT_TYPE_PER_PC:
            process_NI_input(g_NIInput_vec[n], 0);
            break;
        case NI_INPUT_TYPE_PER_VC:
            for (int NI_vc=0; NI_vc<g_cfg.router_num_vc; NI_vc++)
                process_NI_input(g_NIInput_vec[n], NI_vc);
            break;
        default:
            assert(0);
        }
    }

    for (unsigned int n=0; n<g_NIOutput_vec.size(); n++) {
        process_NI_output(g_NIOutput_vec[n]);
    }

    // profile
    if (g_cfg.profile_perf || g_cfg.profile_power) {
        if (g_cfg.profile_interval_cycle)
            process_profile_cycle();
        else
            process_profile_instr();
    }

#ifdef LINK_DVS
    // link-dvs
    process_link_dvs_link_speedup();
    process_link_dvs_link_slowdown();
    process_link_dvs_set();
#endif

    // injection
    switch (g_cfg.wkld_type) {
    case WORKLOAD_TRIPS_TRACE:
    case WORKLOAD_TILED_CMP_TRACE:
    case WORKLOAD_TILED_CMP_VALUE_TRACE:
    case WORKLOAD_SNUCA_CMP_VALUE_TRACE:
        process_parse_trace();
        break;

    case WORKLOAD_SYNTH_SPATIAL:
    case WORKLOAD_SYNTH_TRAFFIC_MATRIX:
        for (unsigned int c=0; c<g_Core_vec.size(); c++)
            process_gen_synth_traffic(c);
        break;

    default:
        assert(0);
    }

    // control simulation for warmup and finalize
    process_control_sim();

    g_ev_sim_done->wait();

    // Now the simulation is done.
    fprintf(stderr, "finished simulation at clk=%.0lf.\n", simtime());

    // Find the simulation end time
    g_sim.m_end_time = time((time_t *)NULL);
    g_sim.m_elapsed_time = _MAX(g_sim.m_end_time - g_sim.m_start_time, 1);

#ifdef _DEBUG_ROUTER_PROCESS
    printf("PROCESS COMPLETE: process_main()\n");
#endif
}
Exemplo n.º 22
0
void Router::decCredit(int out_pc, int out_vc, int num_credits)
{
    if (isEjectChannel(out_pc))
        return;	// no credit management

    switch (g_cfg.router_buffer_type) {
    case ROUTER_BUFFER_SAMQ:
        m_out_mod_vec[out_pc][out_vc].m_num_credit -= num_credits;
        assert(m_out_mod_vec[out_pc][out_vc].m_num_credit >= 0);

#ifdef _DEBUG_CREDIT
        printf("decCredit SAMQ: router=%d clk=%.0lf m_num_credit[out_pc=%d][out_vc=%d]=%d\n", id(), simtime(), out_pc, out_vc, m_out_mod_vec[out_pc][out_vc].m_num_credit);
#endif

        break;
    case ROUTER_BUFFER_DAMQ_P:
    {
        int num_out_vc = g_Router_vec[m_connNextRouter_vec[out_pc].first]->num_vc();
        int num_shared_credits = 0;    // total available credits for out_pc
        for (int vc=0; vc<num_out_vc; vc++)
            num_shared_credits += m_out_mod_vec[out_pc][vc].m_num_credit;

        if (num_shared_credits >= num_credits) { // can decrease shared credit?
            m_out_mod_vec[out_pc][out_vc].m_num_credit -= num_credits;
        } else {
            assert(m_out_mod_vec[out_pc][out_vc].m_num_credit_rsv >= num_credits);
            m_out_mod_vec[out_pc][out_vc].m_num_credit_rsv -= num_credits;
        }

#ifdef _DEBUG_CREDIT
        printf("decCredit DAMQ_P: router=%d clk=%.0lf out_pc=%d out_vc=%d\n", id(), simtime(), out_pc, out_vc);
        printf("    m_num_credit=%d m_num_credit_rsv=%d num_credits=%d num_shared_credits=%d\n", m_out_mod_vec[out_pc][out_vc].m_num_credit, m_out_mod_vec[out_pc][out_vc].m_num_credit_rsv, num_credits, num_shared_credits);
#endif

    }
    break;
    case ROUTER_BUFFER_DAMQ_R:
    {
        int num_shared_credits = 0;    // total available credits for router
        for (int pc=0; pc<num_internal_pc(); pc++) {
            int next_router_id = m_connNextRouter_vec[pc].first;
            int num_out_vc = (next_router_id == INVALID_ROUTER_ID) ? 0 : g_Router_vec[next_router_id]->num_vc();
            for (int vc=0; vc<num_out_vc; vc++)
                num_shared_credits += m_out_mod_vec[pc][vc].m_num_credit;
        }

        if (num_shared_credits >= num_credits) { // can decrease shared credit?
            m_out_mod_vec[out_pc][out_vc].m_num_credit -= num_credits;
        } else {
            assert(m_out_mod_vec[out_pc][out_vc].m_num_credit_rsv >= num_credits);
            m_out_mod_vec[out_pc][out_vc].m_num_credit_rsv -= num_credits;
        }

#ifdef _DEBUG_CREDIT
        printf("decCredit DAMQ_R: router=%d clk=%.0lf out_pc=%d out_vc=%d\n", id(), simtime(), out_pc, out_vc);
        printf("    m_num_credit=%d m_num_credit_rsv=%d num_credits=%d num_shared_credits=%d\n", m_out_mod_vec[out_pc][out_vc].m_num_credit, m_out_mod_vec[out_pc][out_vc].m_num_credit_rsv, num_credits, num_shared_credits);
#endif

    }
    break;
    default:
        assert(0);
    }

}
Exemplo n.º 23
0
void Router::incCredit()
{
    if (m_credit_deposit_vec.size() == 0) // no credits to deposit?
        return;

    int num_org_credit_reqs = m_credit_deposit_vec.size(); // for sanity check
    int num_complete_credit_reqs = 0;
    int num_deposited_credits = 0;	// total deposited credits

    // deposit a credit to the corresponding output module
    for (vector< Credit* >::iterator pos=m_credit_deposit_vec.begin(); pos != m_credit_deposit_vec.end(); ++pos) {
        Credit* p_credit = *pos;

        if (p_credit->m_clk_deposit > simtime())
            break;	// all other deposited credits must be increased later.

        switch (g_cfg.router_buffer_type) {
        case ROUTER_BUFFER_SAMQ:
            m_out_mod_vec[p_credit->m_out_pc][p_credit->m_out_vc].m_num_credit += p_credit->m_num_credits;
            assert(m_out_mod_vec[p_credit->m_out_pc][p_credit->m_out_vc].m_num_credit <= m_inbuf_depth);
            break;
        case ROUTER_BUFFER_DAMQ_P:
        case ROUTER_BUFFER_DAMQ_R:
        {
            int num_shared_credits = p_credit->m_num_credits;
            if (m_out_mod_vec[p_credit->m_out_pc][p_credit->m_out_vc].m_num_credit_rsv < g_cfg.router_num_rsv_credit) {
                int num_rsv_credits = _MIN(g_cfg.router_num_rsv_credit - m_out_mod_vec[p_credit->m_out_pc][p_credit->m_out_vc].m_num_credit_rsv, p_credit->m_num_credits);
                num_shared_credits = p_credit->m_num_credits - num_rsv_credits;
                assert(num_shared_credits >= 0);

                m_out_mod_vec[p_credit->m_out_pc][p_credit->m_out_vc].m_num_credit_rsv += num_rsv_credits;
            }

            m_out_mod_vec[p_credit->m_out_pc][p_credit->m_out_vc].m_num_credit += num_shared_credits;
        }
        break;
        default:
            assert(0);
        }

#ifdef _DEBUG_CREDIT
        printf("incCredit router=%d deposit_clk=%.0lf clk=%.0lf out_pc=%d out_vc=%d credits=%d\n", id(), p_credit->m_clk_deposit, simtime(), p_credit->m_out_pc, p_credit->m_out_vc, m_out_mod_vec[p_credit->m_out_pc][p_credit->m_out_vc].m_num_credit);
#endif

        num_complete_credit_reqs++;
        num_deposited_credits += p_credit->m_num_credits;
        g_CreditPool.reclaim(p_credit);
    }

    // delete successfully deposited credits
    if (num_complete_credit_reqs > 0)
        m_credit_deposit_vec.erase(m_credit_deposit_vec.begin(), m_credit_deposit_vec.begin()+num_complete_credit_reqs);

#ifdef _DEBUG_CREDIT
    printf("incCredit router=%d clk=%.0lf\n", m_id, simtime());
    printf("  num_org_credit_reqs=%d\n", num_org_credit_reqs);
    printf("  num_complete_credit_reqs=%d\n", num_complete_credit_reqs);
    printf("  m_credit_deposit_vec.size()=%d\n", m_credit_deposit_vec.size());
    printf("  num_deposited_credits=%d\n", num_deposited_credits);
#endif

    assert((num_org_credit_reqs - num_complete_credit_reqs) == ((int) m_credit_deposit_vec.size()));
}
Exemplo n.º 24
0
vector< Packet* > WorkloadTiledCMPValue::readTrace()
{
    PktTraceValue tr;
    vector< Packet* > pkt_vec;

    // read one line trace
    assert(m_trace_fp > 0);

    tr.cycle = 0;
    tr.sz_bytes = 0;
    igzstream_read(m_trace_fp, (char*) &tr, sizeof(PktTraceValue));
    if (tr.cycle==0 || tr.sz_bytes==0 || igzstream_feof(m_trace_fp)) {
        // close the current file
        closeTraceFile();

        // open the next file
        if (! openTraceFile() )	{ // no trace file ?
            pkt_vec.push_back(0);
            return pkt_vec;
        }
        fprintf(stderr, "benchmark=%s trace_file_id=%d successfully open.\n", m_benchmark_name.c_str(), m_trace_file_id-1);

        igzstream_read(m_trace_fp, (char*) &tr, sizeof(PktTraceValue));
        // assert(tr->cycle != 0);
    }
    // printTrace(cout, tr);

    // post processing
    int src_tile_id = (int) tr.src_mach_num;
    int dest_tile_id = (int) tr.dest_mach_num;
    g_sim.m_num_instr_executed = 0;

    int num_flits = (int) ceil( tr.sz_bytes * BITS_IN_BYTE
                                / ( (double) g_cfg.link_width ));
#ifdef _DEBUG_TRACE_TILED_CMP_VALUE
    printf("DEBUG_TILED_CMP_VALUE: src_tile=%d dest_tile=%d bytes=%d #flits=%d cycle=%lld cur_cycle=%.0lf\n", src_tile_id, dest_tile_id, tr.sz_bytes, num_flits, tr.cycle, simtime());
#endif

    // make one packet
    Packet* p_pkt = g_PacketPool.alloc();
    assert(p_pkt);
    p_pkt->setID(g_sim.m_next_pkt_id++);
    p_pkt->m_start_flit_id = g_sim.m_next_flit_id;
    p_pkt->m_num_flits = num_flits;
    g_sim.m_next_flit_id += num_flits;
    p_pkt->setSrcRouterID(src_tile_id);
    p_pkt->addDestRouterID(dest_tile_id);
    p_pkt->setSrcCoreID(src_tile_id);
    p_pkt->addDestCoreID(dest_tile_id);
    p_pkt->m_clk_gen = (double) tr.cycle;
    switch(tr.src_mach_type) {
    case MachineType_L1Cache: p_pkt->m_NI_in_pos = 0; break;
    case MachineType_L2Cache: p_pkt->m_NI_in_pos = 1; break;
    case MachineType_Directory: p_pkt->m_NI_in_pos = 2; break;
    default: assert(0);
    }
    switch (tr.dest_mach_type) {
    case MachineType_L1Cache: p_pkt->m_NI_out_pos = 0; break;
    case MachineType_L2Cache: p_pkt->m_NI_out_pos = 1; break;
    case MachineType_Directory: p_pkt->m_NI_out_pos = 2; break;
    default: assert(0);
    }

    // port multiplexing
    if (g_cfg.NI_port_mux) {
        p_pkt->m_NI_in_pos = g_NI_in_last_pos;
        p_pkt->m_NI_out_pos = g_NI_out_last_pos;
        g_NI_in_last_pos = (g_NI_in_last_pos + 1) % g_cfg.core_num_NIs;
        g_NI_out_last_pos = (g_NI_out_last_pos + 1) % g_cfg.core_num_NIs;
    }

    // packet type
    if (tr.sz_bytes == (int) CONTROL_MESSAGE_SIZE) {
        p_pkt->m_packet_type = PACKET_TYPE_UNICAST_SHORT;
    } else {
        assert(tr.sz_bytes == (int) DATA_MESSAGE_SIZE);
        p_pkt->m_packet_type = PACKET_TYPE_UNICAST_LONG;
    }

    // set packet data
    assert(((int) tr.sz_bytes)%8 == 0);
    int pkt_64bitdata_sz = tr.sz_bytes/8;
    p_pkt->m_packetData_vec.resize(pkt_64bitdata_sz);
    if (pkt_64bitdata_sz == 1) {
        // address only
        p_pkt->m_packetData_vec[0] = tr.addr_value;
#ifdef _DEBUG_TRACE_TILED_CMP_VALUE
printf("  addr[flit0]=%016llX\n", tr.addr_value);
#endif
    } else {
        // address + data block
        p_pkt->m_packetData_vec[0] = tr.addr_value;
#ifdef _DEBUG_TRACE_TILED_CMP_VALUE
printf("  addr[flit0]=%016llX\n", tr.addr_value);
#endif

        unsigned int data_value_pos = 0;
        for (int i=1; i<pkt_64bitdata_sz; i++) {
            unsigned long long data64bit = 0x0;
#ifdef _DEBUG_TRACE_TILED_CMP_VALUE
printf("  ");
#endif
            for (int j=0; j<8; j++) {
#ifdef _DEBUG_TRACE_TILED_CMP_VALUE
printf("%02X ", tr.data_value[data_value_pos]);
#endif
                assert(data_value_pos < MAX_PKT_TRACE_DATA_VALUE_SZ);
                data64bit |= tr.data_value[data_value_pos];

                if (j!=7)
                    data64bit <<= 8;

                ++data_value_pos;
            }

            p_pkt->m_packetData_vec[i] = data64bit;
#ifdef _DEBUG_TRACE_TILED_CMP_VALUE
printf("data[flit%d]=%016llX\n", i, data64bit);
#endif
        }
    }

    // spatial distribution
    m_spat_pattern_pkt_vec[src_tile_id][dest_tile_id]++;
    m_spat_pattern_flit_vec[src_tile_id][dest_tile_id] += num_flits;

    // throughput profile
    if (g_cfg.profile_perf) {
        g_sim.m_periodic_inj_pkt++;
        g_sim.m_periodic_inj_flit += num_flits;
    }

    m_num_proc_traces++;

    pkt_vec.push_back(p_pkt);
    return pkt_vec;
}
Exemplo n.º 25
0
// control simulation for warmup and finalize
void process_control_sim()
{
    create("sim-control-proc");
    g_sim.m_num_CSIM_process++;

    double num_cycles_skip_trace = 0.0;
    double num_cycles_warmup_sim = 0.0;
    double num_cycles_end_sim = 0.0;

#if 0
    printf("g_cfg.wkld_trace_skip_cycles=%.1lf\n", g_cfg.wkld_trace_skip_cycles);
    printf("g_cfg.clk_start=             %.1lf\n", g_cfg.sim_clk_start);
    printf("g_cfg.clk_end=               %.1lf\n", g_cfg.sim_clk_end);
    fflush(stdout);
#endif
    num_cycles_skip_trace = g_cfg.wkld_trace_skip_cycles;
    num_cycles_warmup_sim = (g_cfg.sim_clk_start == 0.0) ? 0.0 : (g_cfg.sim_clk_start - num_cycles_skip_trace);
    if (num_cycles_warmup_sim < 0.0) {
        fprintf(stderr, "process_control_sim(): negative num_cycles_warmup_sim=%lf\n", num_cycles_warmup_sim);
        assert(0);
    }
    num_cycles_end_sim = g_cfg.sim_clk_end - num_cycles_skip_trace - num_cycles_warmup_sim;
    if (num_cycles_end_sim < 0.0) {
        fprintf(stderr, "process_control_sim(): negative num_cycles_end_sim\n");
        fprintf(stderr, "  g_cfg.sim_clk_end=      %.1lf\n", g_cfg.sim_clk_end);
        fprintf(stderr, "  num_cycles_skip_trace=  %.1lf\n", num_cycles_skip_trace);
        fprintf(stderr, "  num_cycles_warmup_sim=  %.1lf\n", num_cycles_warmup_sim);
        fprintf(stderr, "  num_cycles_end_sim=     %.1lf\n", num_cycles_end_sim);
        assert(0);
    }

    // skip cycles for trace
    if (num_cycles_skip_trace != 0.0)
        hold(num_cycles_skip_trace);
// printf("SKIP TRACE END:  %lf\n", simtime());

    const double cycles_check = 4.0;
    // start warm-up
    switch (g_cfg.sim_end_cond) {
    case SIM_END_BY_INJ_PKT:
    case SIM_END_BY_EJT_PKT:
        while (g_sim.m_num_pkt_ejt < g_cfg.sim_num_ejt_pkt_4warmup)
           hold (cycles_check);
        break;
    case SIM_END_BY_CYCLE:
        if (num_cycles_warmup_sim != 0.0)
            hold(num_cycles_warmup_sim);
        break;
    default:
        assert(0);
    }
    // finished warm-up
    g_sim.m_warmup_phase = false;
    g_sim.m_clk_warmup_end = simtime();
    g_sim.m_num_pkt_inj_warmup = g_sim.m_num_pkt_inj;
    g_sim.m_num_pkt_ejt_warmup = g_sim.m_num_pkt_ejt;
    g_sim.m_num_flit_inj_warmup = g_sim.m_num_flit_inj;
    g_sim.m_num_flit_ejt_warmup = g_sim.m_num_flit_ejt;
    reset_stats();

    fprintf(stderr, "finished warm-up at clk=%.0lf.\n", simtime());

    // start simulation
    switch (g_cfg.sim_end_cond) {
    case SIM_END_BY_INJ_PKT:
        while (g_sim.m_num_pkt_ejt <= g_cfg.sim_num_inj_pkt)
            hold(cycles_check);
        break;
    case SIM_END_BY_EJT_PKT:
        while (g_sim.m_num_pkt_ejt <= g_cfg.sim_num_ejt_pkt)
            hold(cycles_check);
        break;
    case SIM_END_BY_CYCLE:
        if (num_cycles_end_sim != 0.0) {
            if (g_cfg.profile_perf || g_cfg.profile_power)
                num_cycles_end_sim += 0.5;	// for the last profile
            hold(num_cycles_end_sim);
        }
        break;
    }
    // finished simulation

    if (!g_EOS) {
        g_ev_sim_done->set();
        g_sim.m_clk_sim_end = simtime();
        g_EOS = true;
    }

#ifdef _DEBUG_ROUTER_PROCESS
    printf("PROCESS COMPLETE: process_control_sim() clk=%.0lf\n", simtime());
#endif
}
Exemplo n.º 26
0
void Router::stageVA()
{
    if (m_vc_arb->hasNoReq())
        return;

    vector< pair< pair< int, int >, int > > grant_vec = m_vc_arb->grant();  // return value: <in_pc, in_vc>, out_vc>

    for (unsigned int n=0; n<grant_vec.size(); ++n) {
        int in_pc = grant_vec[n].first.first;
        int in_vc = grant_vec[n].first.second;
        int out_pc = m_in_mod_vec[in_pc][in_vc].m_out_pc;
        int out_vc = grant_vec[n].second;
        Flit* peek_flit = m_flitQ->peek(in_pc, in_vc);

        // add a request to SW arbiter
        m_sw_arb->add(in_pc, in_vc, out_pc, out_vc);

        // delete granted request
        m_vc_arb->del(in_pc, in_vc);

#ifdef _DEBUG_ROUTER
        debugVA(peek_flit, in_pc, in_vc, out_pc, out_vc, false);
#endif

        // pipeline stage latency
        m_pipe_lat_VA_tab->tabulate((simtime() - peek_flit->m_clk_enter_stage));
        peek_flit->m_clk_enter_stage = simtime();

        // tunneling: bypass pipeline
        if (g_cfg.router_tunnel) {
            FlitHead* p_head_flit = (FlitHead*) m_flitQ->peek(in_pc, in_vc);
            switch(g_cfg.router_tunnel_type) {
            case TUNNELING_PER_FLOW:
                if (m_tunnel_info_vec[in_pc].m_flow != make_pair(p_head_flit->src_router_id(), p_head_flit->dest_router_id())) {
                    m_tunnel_info_vec[in_pc].m_flow = make_pair(p_head_flit->src_router_id(), p_head_flit->dest_router_id());
                }
                break;
            case TUNNELING_PER_DEST:
                if (m_tunnel_info_vec[in_pc].m_dest_router_id != p_head_flit->dest_router_id()) {
                    m_tunnel_info_vec[in_pc].m_dest_router_id = p_head_flit->dest_router_id();
                }
                break;
            case TUNNELING_PER_OUTPORT:
                break;
            default:
                assert(0);
            }

            // common properties
            m_tunnel_info_vec[in_pc].m_in_vc = in_vc;
            m_tunnel_info_vec[in_pc].m_out_pc = out_pc;
            m_tunnel_info_vec[in_pc].m_out_vc = out_vc;
        }
    }

    // VA power consumption in Orion - assumption: per output PC organization
    // record power
    if (!g_sim.m_warmup_phase) {
        vector< bitset< max_sz_vc_arb > > vc_req_orion_vec;
        vector< bitset< max_sz_vc_arb > > vc_grant_orion_vec;
        vc_req_orion_vec.resize(m_num_pc, 0);
        vc_grant_orion_vec.resize(m_num_pc, 0);

        for (unsigned int n=0; n<grant_vec.size(); ++n) {
            int in_pc = grant_vec[n].first.first;
            int in_vc = grant_vec[n].first.second;
            int out_pc = m_in_mod_vec[in_pc][in_vc].m_out_pc;

            // NOTE: Orion limitation
            int arb_bit_pos = in_pc*m_num_vc + in_vc;
            if (arb_bit_pos > max_sz_vc_arb)
                arb_bit_pos %= max_sz_vc_arb;

            vc_req_orion_vec[out_pc][arb_bit_pos] = true;
            vc_grant_orion_vec[out_pc][arb_bit_pos] = true;
        }

        for (int out_pc=0; out_pc<m_num_pc; ++out_pc) {
            vc_req_orion_vec[out_pc] |= m_vc_arb->getReqBitVector(out_pc);

            if (vc_req_orion_vec[out_pc].any()) {
                m_power_tmpl->record_vc_arb(out_pc, (unsigned int) vc_req_orion_vec[out_pc].to_ulong() , (unsigned int) vc_grant_orion_vec[out_pc].to_ulong());

                if (g_cfg.profile_power) {
                    m_power_tmpl_profile->record_vc_arb(out_pc, (unsigned int) vc_req_orion_vec[out_pc].to_ulong(), (unsigned int) vc_grant_orion_vec[out_pc].to_ulong());
                }
            }
        }
    }
}
Exemplo n.º 27
0
void Router::stageSA()
{
    // make switch arbiter requests for middle/tail flits if head flit completed VA.
    // FIXME: can we do this better?
    vector< pair< int, int > > free_in_port_vec = m_sw_arb->getFreeInPorts();
    for (unsigned int n=0; n<free_in_port_vec.size(); n++) {
        int in_pc = free_in_port_vec[n].first;
        int in_vc = free_in_port_vec[n].second;

        if (m_flitQ->isEmpty(in_pc, in_vc))
            continue;

        // flit type should be MIDL or TAIL.
        Flit* peek_flit = m_flitQ->peek(in_pc, in_vc);
        if (peek_flit->isHead())
            continue;

        // head flit that belongs to the same packet for peek_flit
        // should complete switch allocation.
        if (m_in_mod_vec[in_pc][in_vc].m_state != IN_MOD_S)
            continue;

        // get output PC/VC
        int out_pc = m_in_mod_vec[in_pc][in_vc].m_out_pc;
        int out_vc = m_in_mod_vec[in_pc][in_vc].m_out_vc;

        // add a request to SW arbiter
        m_sw_arb->add(in_pc, in_vc, out_pc, out_vc);
    }

    // get granted requests
    vector< pair< pair< int, int >, pair< int, int > > > grant_vec = m_sw_arb->grantRegular();

    // move granted requests to xbar
    for (unsigned int n=0; n<grant_vec.size(); n++) {
        int in_pc = grant_vec[n].first.first;
        int in_vc = grant_vec[n].first.second;
        int out_pc = grant_vec[n].second.first;
        assert(in_pc != INVALID_PC);
        assert(in_vc != INVALID_VC);

        Flit* peek_flit = m_flitQ->peek(in_pc, in_vc);
        assert(peek_flit);

#ifdef _DEBUG_ROUTER
        int out_vc = grant_vec[n].second.second;
        debugSA(peek_flit, in_pc, in_vc, out_pc, out_vc, false, false);
#endif

        assert(m_xbar.m_waiting_in_vc_vec[in_pc] == INVALID_VC);
        m_xbar.m_waiting_in_vc_vec[in_pc] = in_vc;

        assert(m_xbar.m_outport_free_vec[out_pc] == true);
        m_xbar.m_outport_free_vec[out_pc] = false;

        // pipeline stage latency
        m_pipe_lat_SA_tab->tabulate(simtime() - peek_flit->m_clk_enter_stage);
        peek_flit->m_clk_enter_stage = simtime();

        // change input module status (V->S) if a head flit completes SW allocation.
        if (peek_flit->isHead()) {
            assert(m_in_mod_vec[in_pc][in_vc].m_state == IN_MOD_V);
            m_in_mod_vec[in_pc][in_vc].m_state = IN_MOD_S;
        }
    }
}
Exemplo n.º 28
0
void Router::stageST()
{
#ifdef _DEBUG_ROUTER_ST
    int _st_debug_router = 3;
    double _st_debug_clk = 141000012.0;
    if (m_id==_st_debug_router && simtime() > _st_debug_clk) {
        printf("ST_status router=%d, clk=%.0lf\n  ", m_id, simtime());
        for (int out_pc=0; out_pc<m_num_pc; out_pc++) {
            if (isEjectChannel(out_pc)) {
            } else {
                if (m_link_vec[out_pc].m_w_pipeline.size() == 0) {
                    printf("out_pc=%d(free) ", out_pc);
                } else {
                    printf("out_pc=%d", out_pc);
                    for (unsigned int i=0; i<m_link_vec[out_pc].m_w_pipeline.size(); i++)
                        printf("(fid=%lld) ",  m_link_vec[out_pc].m_w_pipeline[i].first->id());
                }
            }
        }
        printf("\n");
    }
#endif

    int num_xbar_passes = 0;

    for (int in_pc=0; in_pc<m_num_pc; in_pc++) {
        int in_vc = m_xbar.m_waiting_in_vc_vec[in_pc];
        if (in_vc == INVALID_VC)
            continue;

        Flit* read_flit = m_flitQ->read(in_pc, in_vc);
        assert(read_flit);

#ifdef _DEBUG_CHECK_BUFFER_INTEGRITY
        assert(checkBufferIntegrity(in_pc, in_vc, read_flit));
#endif

        // create a credit and send it to upstream router
        int prev_router_id = m_connPrevRouter_vec[in_pc].first;
        int prev_out_pc = m_connPrevRouter_vec[in_pc].second;
        if (prev_router_id != INVALID_ROUTER_ID && prev_out_pc != INVALID_PC) {
            Credit* p_credit = g_CreditPool.alloc();

            p_credit->m_out_pc = prev_out_pc;
            p_credit->m_out_vc = in_vc;
            p_credit->m_num_credits = 1;
            p_credit->m_clk_deposit = simtime() + g_Router_vec[prev_router_id]->getLink(prev_out_pc).m_delay_factor * g_cfg.link_latency;

            g_Router_vec[prev_router_id]->depositCredit(p_credit);
        }

        assert(m_in_mod_vec[in_pc][in_vc].m_state == IN_MOD_S);
        int out_pc = m_in_mod_vec[in_pc][in_vc].m_out_pc;
        int out_vc = m_in_mod_vec[in_pc][in_vc].m_out_vc;
        int next_router_id = m_connNextRouter_vec[out_pc].first;
        int next_in_pc = m_connNextRouter_vec[out_pc].second;

        if (isEjectChannel(out_pc)) {
            // select ejection port
            int epc = out_pc - num_internal_pc();
// printf("epc=%d out_pc=%d\n", epc, out_pc);
            NIOutput* p_ni_output = getNIOutput(epc);
            assert(p_ni_output);
            // FIXME: The following assert() is not valid for DMesh topology,
            //        because destination is encoded at injection function.
            // assert(m_id == read_flit->getPkt()->getDestRouterID());

            p_ni_output->writeFlit(read_flit);

            // 03/15/06 fast simulation
            m_num_flits_inside--;

#ifdef _DEBUG_ROUTER
            debugST(read_flit, in_pc, out_pc, INVALID_ROUTER_ID, INVALID_PC, out_vc, true);
#endif
        } else {
            // 11/05/05: no stall at ST stage
            assert(next_router_id != INVALID_PC);
            assert(next_in_pc != INVALID_PC);

#ifdef _DEBUG_CREDIT
            if(g_Router_vec[next_router_id]->flitQ()->isFull(next_in_pc, out_vc)) {
                printf("router=%d out_pc=%d next_router=%d next_in_pc=%d\n", m_id, out_pc, next_router_id, next_in_pc);
                for (int x_vc=0; x_vc<m_num_vc; x_vc++)
                    printf("  vc=%d: credit=%d credit_rsv=%d\n", x_vc, m_out_mod_vec[out_pc][x_vc].m_num_credit, m_out_mod_vec[out_pc][x_vc].m_num_credit_rsv);
                for (int x_vc=0; x_vc<m_num_vc; x_vc++)
                    printf("  vc=%d: Q_sz=%d\n", x_vc, g_Router_vec[next_router_id]->flitQ()->size(next_in_pc, x_vc));
            }
#endif

            // assert(! g_Router_vec[next_router_id]->flitQ()->isFull(next_in_pc, out_vc) );

            // move flit to the link
            // NOTE: For wire pipelining,
            //       # of traversing flits on the link must be less than link latency.
            assert(((int) m_link_vec[out_pc].m_w_pipeline.size()) < m_link_vec[out_pc].m_delay_factor*g_cfg.link_latency);
            m_link_vec[out_pc].m_w_pipeline.push_back(make_pair(read_flit, make_pair(out_vc, simtime())));

            // pipeline stage latency
            m_pipe_lat_ST_tab->tabulate(simtime() - read_flit->m_clk_enter_stage);
            read_flit->m_clk_enter_stage = simtime();

#ifdef _DEBUG_ROUTER
            debugST(read_flit, in_pc, out_pc, next_router_id, next_in_pc, out_vc, false);
#endif
        } // if (isEjectChannel(out_pc)) {

        // remove a flit from xbar
        m_xbar.m_waiting_in_vc_vec[in_pc] = INVALID_VC;

        // update xbar outport status
        assert(m_xbar.m_outport_free_vec[out_pc] == false);
        m_xbar.m_outport_free_vec[out_pc] = true;

        if (read_flit->isTail()) {
            // change input module status (S->I)
            m_in_mod_vec[in_pc][in_vc].m_state = IN_MOD_I;
            m_in_mod_vec[in_pc][in_vc].m_out_pc = INVALID_PC;
            m_in_mod_vec[in_pc][in_vc].m_out_vc = INVALID_VC;

            // change output module status (V->I)
            assert(m_out_mod_vec[out_pc][out_vc].m_state == OUT_MOD_V);
            m_out_mod_vec[out_pc][out_vc].m_state = OUT_MOD_I;
        }

        num_xbar_passes++;

        // intra-router flit latency
        m_flit_lat_router_tab->tabulate(simtime() - read_flit->m_clk_enter_router);

        // record power
        if (!g_sim.m_warmup_phase) {
            m_power_tmpl->record_buffer_read(read_flit, in_pc);
            m_power_tmpl->record_xbar_trav(read_flit, in_pc, out_pc);

            if (g_cfg.profile_power) {
                m_power_tmpl_profile->record_buffer_read(read_flit, in_pc);
                m_power_tmpl_profile->record_xbar_trav(read_flit, in_pc, out_pc);
            }
        }
    }

    // record power
    if (!g_sim.m_warmup_phase) {
        if (num_xbar_passes > 0) {
            m_power_tmpl->record_xbar_trav_num(num_xbar_passes);

            if (g_cfg.profile_power)
                m_power_tmpl_profile->record_xbar_trav_num(num_xbar_passes);
        }
    }
}
Exemplo n.º 29
0
void Router::stageTN()
{
    for (int in_pc=0; in_pc<m_num_pc; in_pc++) {
        int tunnel_in_vc = m_tunnel_info_vec[in_pc].m_in_vc;
        int tunnel_out_pc = m_tunnel_info_vec[in_pc].m_out_pc;
        int tunnel_out_vc = m_tunnel_info_vec[in_pc].m_out_vc;

        if (tunnel_in_vc == INVALID_VC)	// support tunneling ?
            continue;

        if (m_flitQ->isEmpty(in_pc, tunnel_in_vc)) // has a flit ?
            continue;

        // peek one flit from the input buffer
        Flit* p_flit = m_flitQ->peek(in_pc, tunnel_in_vc);
        assert(p_flit);

        switch (p_flit->type()) {
        case HEAD_FLIT:
        case ATOM_FLIT:
          {
            FlitHead* p_head_flit = (FlitHead*) p_flit;
            switch(g_cfg.router_tunnel_type) {
            case TUNNELING_PER_FLOW:
                if (m_tunnel_info_vec[in_pc].m_flow != make_pair(p_head_flit->src_router_id(), p_head_flit->dest_router_id()))
                    goto NO_TUNNEL;
                break;
            case TUNNELING_PER_DEST:
                if (m_tunnel_info_vec[in_pc].m_dest_router_id != p_head_flit->dest_router_id())
                    goto NO_TUNNEL;
                break;
            case TUNNELING_PER_OUTPORT:
                break;
            default:
                assert(0);
            }

            // check input module status
            if (m_in_mod_vec[in_pc][tunnel_in_vc].m_state != IN_MOD_I)
                goto NO_TUNNEL;

            // Step 1.1: do routing
            int out_pc = g_Routing->selectOutPC(this, tunnel_in_vc, (FlitHead*) p_flit);
            assert(out_pc < (int) m_connNextRouter_vec.size());
            int next_router_id = m_connNextRouter_vec[out_pc].first;
            int next_in_pc = m_connNextRouter_vec[out_pc].second;

            if (tunnel_out_pc != out_pc)
                goto NO_TUNNEL;

            // Step 2.1: reserve VC
            if (! isEjectChannel(out_pc)) {
                if (m_out_mod_vec[out_pc][tunnel_out_vc].m_state != OUT_MOD_I ) // reserved ?
                    goto NO_TUNNEL;
            }

            // Step 3.1: check no request in SA for designated input and output ports
            // FIXME: do we need this?
            // if (! m_sw_alloc->hasNoReq(in_pc, out_pc))
            //    goto NO_TUNNEL;

            // 03/14/08 credit-based flow control
            if (! hasCredit(out_pc, tunnel_out_vc) ) // no credit?
                goto NO_TUNNEL;

            // Step 4.1: check no flit in xbar for designated input and output ports
            if (m_xbar.m_waiting_in_vc_vec[in_pc] != INVALID_VC || ! m_xbar.m_outport_free_vec[out_pc])
                goto NO_TUNNEL;

            // Step 5.1: check no flit in a link
            if (! isEjectChannel(out_pc)) {
                if (((int) m_link_vec[out_pc].m_w_pipeline.size()) >= m_link_vec[out_pc].m_delay_factor*g_cfg.link_latency)
                    goto NO_TUNNEL;
            }

            // Now checking is done for tunneling.

            // Step 2.2: reserve VC
            if (! isEjectChannel(out_pc)) {
                m_out_mod_vec[out_pc][tunnel_out_vc].m_state = OUT_MOD_V;
            }

            // create a credit and send it to upstream router
            int prev_router_id = m_connPrevRouter_vec[in_pc].first;
            int prev_out_pc = m_connPrevRouter_vec[in_pc].second;
            if (prev_router_id != INVALID_ROUTER_ID && prev_out_pc != INVALID_PC) {
                Credit* p_credit = g_CreditPool.alloc();

                p_credit->m_out_pc = prev_out_pc;
                p_credit->m_out_vc = tunnel_in_vc;
                p_credit->m_num_credits = 1;
                p_credit->m_clk_deposit = simtime() + g_Router_vec[prev_router_id]->getLink(prev_out_pc).m_delay_factor * g_cfg.link_latency;

                g_Router_vec[prev_router_id]->depositCredit(p_credit);
            }

            // 11/05/05: no stall at ST stage
            if (! isEjectChannel(out_pc)) {
                assert(next_router_id != INVALID_PC);
                assert(next_in_pc != INVALID_PC);
            }
            // pipeline stage latency
            m_pipe_lat_ST_tab->tabulate(simtime() - p_flit->m_clk_enter_stage);
            p_flit->m_clk_enter_stage = simtime();

            // assert(m_xbar.m_waiting_in_vc_vec[in_pc] == INVALID_VC);
            // m_xbar.m_waiting_in_vc_vec[in_pc] = tunnel_in_vc;

            // assert(m_xbar.m_outport_free_vec[out_pc] == true);
            // m_xbar.m_outport_free_vec[out_pc] = false;

            // Step 4.2: traverse a crossbar
            // move flit to the link
            if (isEjectChannel(out_pc)) {
                // select ejection port
                int epc = p_flit->getPkt()->m_NI_out_pos;
                NIOutput* p_ni_output = getNIOutput(epc);
                assert(p_ni_output);
                assert(m_id == p_flit->getPkt()->getDestRouterID());

                p_ni_output->writeFlit(p_flit);

                // 03/15/06 fast simulation
                m_num_flits_inside--;
            } else {
                m_link_vec[out_pc].m_w_pipeline.push_back(make_pair(p_flit, make_pair(tunnel_out_vc, simtime())));
                // decrease credit
                decCredit(out_pc, tunnel_out_vc, 1);
            }

            // read a flit
            m_flitQ->read(in_pc, tunnel_in_vc);

            // bypass pipeline
            m_num_tunnel_flit_vec[in_pc][tunnel_in_vc]++;
printf("here1\n");

#ifdef _DEBUG_CHECK_BUFFER_INTEGRITY
            assert(checkBufferIntegrity(in_pc, tunnel_in_vc, p_flit));
#endif
            // change input module status for only multi-flit packets
            assert(m_in_mod_vec[in_pc][tunnel_in_vc].m_state == IN_MOD_I);
            if (p_flit->type() != ATOM_FLIT) {
                m_in_mod_vec[in_pc][tunnel_in_vc].m_state = IN_MOD_S;
                m_in_mod_vec[in_pc][tunnel_in_vc].m_out_pc = out_pc;
                m_in_mod_vec[in_pc][tunnel_in_vc].m_out_vc = tunnel_out_vc;
            }
// printf("TN3 router=%d m_in_mod_vec[%d][%d]: state=%d, out_pc=%d, out_vc=%d\n", id(), in_pc, tunnel_in_vc, m_in_mod_vec[in_pc][tunnel_in_vc].m_state, m_in_mod_vec[in_pc][tunnel_in_vc].m_out_pc, m_in_mod_vec[in_pc][tunnel_in_vc].m_out_vc);


#ifdef _DEBUG_ROUTER
            debugTN(p_flit, in_pc, tunnel_in_vc, out_pc, tunnel_out_vc);
#endif

          }
            break;
        case MIDL_FLIT:
        case TAIL_FLIT:
          {
            assert(m_in_mod_vec[in_pc][tunnel_in_vc].m_state == IN_MOD_S);
            int out_pc = m_in_mod_vec[in_pc][tunnel_in_vc].m_out_pc;
            int out_vc = m_in_mod_vec[in_pc][tunnel_in_vc].m_out_vc;

            if (tunnel_out_pc != out_pc)
                goto NO_TUNNEL;

            if (tunnel_out_vc != out_vc)	// output VC for tunneling is determined at previous packet delivery.
                goto NO_TUNNEL;

            // Step 3.1: check no request in SA for designated input and output ports
            // FIXME: do we need this?
            // if (! m_sw_alloc->hasNoReq(in_pc, out_pc))
            //    goto NO_TUNNEL;

            // check no flit in xbar for designated input and output ports
            if (m_xbar.m_waiting_in_vc_vec[in_pc] != INVALID_VC || ! m_xbar.m_outport_free_vec[out_pc])
                goto NO_TUNNEL;

            // 03/14/08 credit-based flow control
            if (! hasCredit(out_pc, out_vc) ) // no credit?
                goto NO_TUNNEL;

            // check no flit in a link
            if (! isEjectChannel(out_pc)) {
                if (((int) m_link_vec[out_pc].m_w_pipeline.size()) >= m_link_vec[out_pc].m_delay_factor*g_cfg.link_latency)
                    goto NO_TUNNEL;
            }

            // Now checking is done for tunneling.

            // delete switch allocation status if reserved
            m_sw_arb->del(in_pc, tunnel_in_vc);

            // create a credit and send it to upstream router
            int prev_router_id = m_connPrevRouter_vec[in_pc].first;
            int prev_out_pc = m_connPrevRouter_vec[in_pc].second;
            if (prev_router_id != INVALID_ROUTER_ID && prev_out_pc != INVALID_PC) {
                Credit* p_credit = g_CreditPool.alloc();

                p_credit->m_out_pc = prev_out_pc;
                p_credit->m_out_vc = tunnel_in_vc;
                p_credit->m_num_credits = 1;
                p_credit->m_clk_deposit = simtime() + g_Router_vec[prev_router_id]->getLink(prev_out_pc).m_delay_factor * g_cfg.link_latency;

                g_Router_vec[prev_router_id]->depositCredit(p_credit);
            }

            // Step 4: traverse a crossbar
            // move flit to the link
            if (isEjectChannel(out_pc)) {
                // select ejection port
                int epc = p_flit->getPkt()->m_NI_out_pos;
                NIOutput* p_ni_output = getNIOutput(epc);
                assert(p_ni_output);
                assert(m_id == p_flit->getPkt()->getDestRouterID());

                p_ni_output->writeFlit(p_flit);

                // 03/15/06 fast simulation
                m_num_flits_inside--;
            } else {
                m_link_vec[out_pc].m_w_pipeline.push_back(make_pair(p_flit, make_pair(out_vc, simtime())));
                // decrease credit
                decCredit(out_pc, out_vc, 1);
            }

            // clear status of input module
            if (p_flit->isTail()) {
//              assert(eject_vc_sts[epc][out_vc] == false);
//              eject_vc_sts[epc][out_vc] = true;
                m_in_mod_vec[in_pc][tunnel_in_vc].m_state = IN_MOD_I;
                m_in_mod_vec[in_pc][tunnel_in_vc].m_out_pc = INVALID_PC;
                m_in_mod_vec[in_pc][tunnel_in_vc].m_out_vc = INVALID_VC;
            }

            // read a flit
            m_flitQ->read(in_pc, tunnel_in_vc);

            // bypass pipeline
            m_num_tunnel_flit_vec[in_pc][tunnel_in_vc]++;
printf("here2\n");

#ifdef _DEBUG_CHECK_BUFFER_INTEGRITY
            assert(checkBufferIntegrity(in_pc, tunnel_in_vc, p_flit));
#endif

#ifdef _DEBUG_ROUTER
            debugTN(p_flit, in_pc, tunnel_in_vc, out_pc, out_vc);
#endif
          }
            break;
        default:
            assert(0);
        } // switch (p_flit->type()) {

NO_TUNNEL:
        ;
    } // for (int in_pc=0; in_pc<m_num_pc; in_pc++) {

    return;
}
Exemplo n.º 30
0
void Router::stageLT()
{
    for (int out_pc=0; out_pc<m_num_pc; out_pc++) {
        if (m_link_vec[out_pc].m_w_pipeline.size() == 0) // no flits to traverse a link?
            continue;

        Link & link = m_link_vec[out_pc];
        pair< Flit*, pair< int, double > > & front_flit = link.m_w_pipeline.front();
        Flit* p_flit = front_flit.first;
        int next_in_vc = front_flit.second.first;
        double store_clk = front_flit.second.second;
        assert(next_in_vc != INVALID_VC);

        // 03/06/06: support for multi-cycle link
        int link_lat = link.m_delay_factor * g_cfg.link_latency;
        if (link_lat > 1) {
            int link_traverse_time = (int) (simtime() - store_clk);
            if (link_traverse_time < link_lat) {
                continue;
            }
        }

#ifdef LINK_DVS
        // When link frequency is changed, link prevents flit traversal
        // for g_cfg.link_dvs_freq_transit_delay time.
        if (simtime() >= link.dvs_freq_set_clk &&
                simtime() < link.dvs_freq_set_clk + g_cfg.link_dvs_freq_transit_delay) {
// printf("router-%d link-%d at clk=%.0lf stall\n", m_id, out_pc, simtime());
            continue;
        }

        double DVS_lat = g_cfg.chip_freq / link.dvs_freq;
        assert(DVS_lat >= 1.0);

        /*
        if (m_id == 5 && out_pc == 0 && simtime() > 21000001.0) {
        printf("router=%d out_pc=%d DVS_lat=%.1lf utilz=%lg freq=%lg voltage=%lg\n",
        m_id, out_pc, DVS_lat, link.link_expected_utilz, link.dvs_freq, link.dvs_voltage);
        }
        */

        // FIXME: support for multi-cycle link
        double DVS_ready_clk;
        if (link.last_sent_clk > link.store_clk) {
            DVS_ready_clk = link.last_sent_clk;
        } else {
            DVS_ready_clk = link.store_clk;
        }
        DVS_ready_clk += DVS_lat;

        if (DVS_ready_clk > simtime())
            continue;
#endif

        // get router ID and input PC of the downstream router for out_pc
        int next_router_id = m_connNextRouter_vec[out_pc].first;
        int next_in_pc = m_connNextRouter_vec[out_pc].second;
        assert(next_router_id != INVALID_ROUTER_ID);
        assert(next_in_pc != INVALID_PC);
        Router* p_next_router = g_Router_vec[next_router_id];
        FlitQ* p_next_flitQ = p_next_router->flitQ();
        assert(! p_next_flitQ->isFull(next_in_pc, next_in_vc) ); // not full in downstream router's buffer

        // pop flit from link
        link.m_w_pipeline.pop_front();

        // write flit to the downstream router's buffer
        p_next_flitQ->write(next_in_pc, next_in_vc, p_flit);

        p_next_router->m_num_flit_inj_from_router++;

        if (p_flit->isHead()) { // per-packet accounting
            p_next_router->m_num_pkt_inj_from_router++;

            p_flit->getPkt()->m_wire_delay += link_lat;	// wire delay (T_w) for this packet
        }

        // intra-router flit latency
        p_flit->m_clk_enter_router = simtime();

        // pipeline stage latency
        m_pipe_lat_LT_tab->tabulate(simtime() - p_flit->m_clk_enter_stage);
        p_flit->m_clk_enter_stage = simtime();

        // 03/15/06 fast simulation
        m_num_flits_inside--;
        if (p_next_router->hasNoFlitsInside()) {
            p_next_router->wakeup();
// printf("WAKEUP r_%d process at clk=%.0lf\n", p_next_router->id, simtime());
        }
        p_next_router->incFlitsInside();

        // record power
        if (!g_sim.m_warmup_phase) {
            m_power_tmpl->record_link_trav(p_flit, out_pc);
            p_next_router->m_power_tmpl->record_buffer_write(p_flit, next_in_pc);

            if (g_cfg.profile_power) {
                m_power_tmpl_profile->record_link_trav(p_flit, out_pc);
                p_next_router->m_power_tmpl_profile->record_buffer_write(p_flit, next_in_pc);
            }
        }

#ifdef _DEBUG_ROUTER
        debugLT(p_flit, out_pc);
        p_next_router->debugIB(p_flit, next_in_pc, next_in_vc);
#endif

#ifdef LINK_DVS
        if (!g_sim.m_warmup_phase)
            m_sim_pc_dvs_link_op_vec[out_pc]++;

        if (link.m_store_clk > link.m_last_sent_clk)
            link.m_last_sent_clk = link.m_store_clk;
        link.m_last_sent_clk += DVS_lat;
#endif
    }
}