Exemplo n.º 1
0
/*
 * time unit:	1 cycle
 * e_fin:	average # of flits received by one input port during unit time
 * 		  (at most 0.5 for InfiniBand router)
 * e_buf_wrt:	average # of input buffer writes of all ports during unit time
 * 		e_buf_wrt = e_fin * n_buf_in
 * e_buf_rd:	average # of input buffer reads of all ports during unit time
 * 		e_buf_rd = e_buf_wrt
 * 		  (splitted into different input ports in program)
 * e_cbuf_fin:	average # of flits passing through the switch during unit time
 * 		e_cbuf_fin = e_fin * n_total_in
 * e_cbuf_wrt:	average # of central buffer writes during unit time
 * 		e_cbuf_wrt = e_cbuf_fin / (pipe_depth * pipe_width)
 * e_cbuf_rd:	average # of central buffer reads during unit time
 * 		e_cbuf_rd = e_cbuf_wrt
 * e_arb:	average # of arbitrations per arbiter during unit time
 * 		assume e_arb = 1
 *
 * NOTES: (1) negative print_depth means infinite print depth
 *
 * FIXME: (1) hack: SIM_reg_stat_energy cannot be used for shared buffer,
 *            we use it now anyway
 */
struct orResult SIM_router_stat_energy(SIM_power_router_info_t *info, SIM_power_router_t *router, int print_depth, char *path, int max_avg, double e_fin, int plot_flag, double freq)
{
  // SESC changes
  struct orResult orr ;
  double fac ;
  // end of changes 
  
  double Eavg = 0, Eatomic, Estruct;
  double Pbuf, Pswitch, Parbiter, Ptotal;
  double e_in_buf_rw, e_cache_in_buf_rw, e_mc_in_buf_rw, e_io_in_buf_rw;
  double e_cbuf_fin, e_cbuf_rw, e_out_buf_rw;
  int next_depth;
  u_int path_len, n_regs;

  /* expected value computation */
  e_in_buf_rw       = e_fin * info->n_in;
  e_cache_in_buf_rw = e_fin * info->n_cache_in;
  e_mc_in_buf_rw    = e_fin * info->n_mc_in;
  e_io_in_buf_rw    = e_fin * info->n_io_in;
  e_cbuf_fin        = e_fin * info->n_total_in;
  e_out_buf_rw      = e_cbuf_fin / info->n_total_out * info->n_out;
  e_cbuf_rw         = e_cbuf_fin * info->flit_width / info->central_buf_info.blk_bits;

  next_depth = NEXT_DEPTH(print_depth);
  path_len = SIM_power_strlen(path);

  /* input buffers */
  if (info->in_buf) {
    Eavg += SIM_reg_stat_energy(&info->in_buf_info, &router->in_buf, e_in_buf_rw, e_in_buf_rw, next_depth, SIM_power_strcat(path, "input buffer"), max_avg);
    SIM_power_res_path(path, path_len);
  }
  if (info->cache_in_buf) {
    Eavg += SIM_reg_stat_energy(&info->cache_in_buf_info, &router->cache_in_buf, e_cache_in_buf_rw, e_cache_in_buf_rw, next_depth, SIM_power_strcat(path, "cache input buffer"), max_avg);
    SIM_power_res_path(path, path_len);
  }
  if (info->mc_in_buf) {
    Eavg += SIM_reg_stat_energy(&info->mc_in_buf_info, &router->mc_in_buf, e_mc_in_buf_rw, e_mc_in_buf_rw, next_depth, SIM_power_strcat(path, "memory controller input buffer"), max_avg);
    SIM_power_res_path(path, path_len);
  }
  if (info->io_in_buf) {
    Eavg += SIM_reg_stat_energy(&info->io_in_buf_info, &router->io_in_buf, e_io_in_buf_rw, e_io_in_buf_rw, next_depth, SIM_power_strcat(path, "I/O input buffer"), max_avg);
    SIM_power_res_path(path, path_len);
  }

  /* output buffers */
  if (info->out_buf) {
    /* local output ports don't use router buffers */
    Eavg += SIM_reg_stat_energy(&info->out_buf_info, &router->out_buf, e_out_buf_rw, e_out_buf_rw, next_depth, SIM_power_strcat(path, "output buffer"), max_avg);
    SIM_power_res_path(path, path_len);
  }

  Pbuf = Eavg * freq;

  /* main crossbar */
  if (info->crossbar_model) {
    Eavg += SIM_crossbar_stat_energy(&router->crossbar, next_depth, SIM_power_strcat(path, "crossbar"), max_avg, e_cbuf_fin);
    SIM_power_res_path(path, path_len);
  }

  /* central buffer */
  if (info->central_buf) {
    Eavg += SIM_reg_stat_energy(&info->central_buf_info, &router->central_buf, e_cbuf_rw, e_cbuf_rw, next_depth, SIM_power_strcat(path, "central buffer"), max_avg);
    SIM_power_res_path(path, path_len);
    
    Eavg += SIM_crossbar_stat_energy(&router->in_cbuf_crsbar, next_depth, SIM_power_strcat(path, "central buffer input crossbar"), max_avg, e_cbuf_fin);
    SIM_power_res_path(path, path_len);

    Eavg += SIM_crossbar_stat_energy(&router->out_cbuf_crsbar, next_depth, SIM_power_strcat(path, "central buffer output crossbar"), max_avg, e_cbuf_fin);
    SIM_power_res_path(path, path_len);

    /* dirty hack, REMEMBER to REMOVE Estruct and Eatomic */
    Estruct = 0;
    n_regs = info->central_buf_info.n_set * (info->central_buf_info.read_ports + info->central_buf_info.write_ports);

    /* ignore e_switch for now because we overestimate wordline driver cap */
    
    Eatomic = router->cbuf_ff.e_keep_0 * (info->pipe_depth - 1) * (n_regs - 2 * (e_cbuf_rw + e_cbuf_rw));
    SIM_print_stat_energy(SIM_power_strcat(path, "central buffer pipeline registers/keep 0"), Eatomic, NEXT_DEPTH(next_depth));
    SIM_power_res_path(path, path_len);
    Estruct += Eatomic;

    Eatomic = router->cbuf_ff.e_clock * (info->pipe_depth - 1) * n_regs;
    SIM_print_stat_energy(SIM_power_strcat(path, "central buffer pipeline registers/clock"), Eatomic, NEXT_DEPTH(next_depth));
    SIM_power_res_path(path, path_len);
    Estruct += Eatomic;

    SIM_print_stat_energy(SIM_power_strcat(path, "central buffer pipeline registers"), Estruct, next_depth);
    SIM_power_res_path(path, path_len);
    Eavg += Estruct;
  }

  Pswitch = Eavg * freq - Pbuf;

  /* input (local) arbiter */
  if (info->in_arb_model) {
    Eavg += SIM_arbiter_stat_energy(&router->in_arb, &info->in_arb_queue_info, e_fin / info->in_n_switch, next_depth, SIM_power_strcat(path, "input arbiter"), max_avg) * info->in_n_switch * info->n_in;
    SIM_power_res_path(path, path_len);

    if (info->n_cache_in) {
      Eavg += SIM_arbiter_stat_energy(&router->cache_in_arb, &info->cache_in_arb_queue_info, e_fin / info->cache_n_switch, next_depth, SIM_power_strcat(path, "cache input arbiter"), max_avg) * info->cache_n_switch * info->n_cache_in;
      SIM_power_res_path(path, path_len);
    }

    if (info->n_mc_in) {
      Eavg += SIM_arbiter_stat_energy(&router->mc_in_arb, &info->mc_in_arb_queue_info, e_fin / info->mc_n_switch, next_depth, SIM_power_strcat(path, "memory controller input arbiter"), max_avg) * info->mc_n_switch * info->n_mc_in;
      SIM_power_res_path(path, path_len);
    }

    if (info->n_io_in) {
      Eavg += SIM_arbiter_stat_energy(&router->io_in_arb, &info->io_in_arb_queue_info, e_fin / info->io_n_switch, next_depth, SIM_power_strcat(path, "I/O input arbiter"), max_avg) * info->io_n_switch * info->n_io_in;
      SIM_power_res_path(path, path_len);
    }
  }

  /* output (global) arbiter */
  if (info->out_arb_model) {
    Eavg += SIM_arbiter_stat_energy(&router->out_arb, &info->out_arb_queue_info, e_cbuf_fin / info->n_switch_out, next_depth, SIM_power_strcat(path, "output arbiter"), max_avg) * info->n_switch_out;
    SIM_power_res_path(path, path_len);
  }

  Ptotal = Eavg * freq;
  Parbiter = Ptotal - Pbuf - Pswitch;
  
  SIM_print_stat_energy(path, Eavg, print_depth);

#if 0
  if (plot_flag)
    fprintf(stderr, "pbuf=%g\n pswitch=%g\n parbiter=%g \n ptotal=%g\n", Pbuf, Pswitch, Parbiter, Ptotal);
#endif

  fac = freq/1e9 ; 
  orr.totEnergy = Ptotal/fac ;
  orr.bufEnergy = Pbuf/fac ;
  orr.switchEnergy = Pswitch/fac ;
  orr.arbEnergy = Parbiter/fac ;

  return orr ;
}
Exemplo n.º 2
0
/*
 * time unit:	1 cycle
 * e_fin:	average # of flits received by one input port during unit time
 * 		  (at most 0.5 for InfiniBand router)
 * e_buf_wrt:	average # of input buffer writes of all ports during unit time
 * 		e_buf_wrt = e_fin * n_buf_in
 * e_buf_rd:	average # of input buffer reads of all ports during unit time
 * 		e_buf_rd = e_buf_wrt
 * 		  (splitted into different input ports in program)
 * e_cbuf_fin:	average # of flits passing through the switch during unit time
 * 		e_cbuf_fin = e_fin * n_total_in
 * e_cbuf_wrt:	average # of central buffer writes during unit time
 * 		e_cbuf_wrt = e_cbuf_fin / (pipe_depth * pipe_width)
 * e_cbuf_rd:	average # of central buffer reads during unit time
 * 		e_cbuf_rd = e_cbuf_wrt
 * e_arb:	average # of arbitrations per arbiter during unit time
 * 		assume e_arb = 1
 *
 * NOTES: (1) negative print_depth means infinite print depth
 *
 * FIXME: (1) hack: SIM_array_stat_energy cannot be used for shared buffer,
 *            we use it now anyway
 */
double SIM_router_stat_energy(SIM_router_info_t *info, SIM_router_power_t *router, int print_depth, char *path, int max_avg, double e_fin, int plot_flag, double freq)
{
	double Eavg = 0, Eatomic, Estruct, Estatic = 0;
	double Pbuf = 0, Pxbar = 0, Pvc_arbiter = 0, Psw_arbiter = 0, Pclock = 0, Ptotal = 0;
	double Pbuf_static = 0, Pxbar_static = 0, Pvc_arbiter_static = 0, Psw_arbiter_static = 0, Pclock_static = 0;
	double Pbuf_dyn = 0, Pxbar_dyn = 0, Pvc_arbiter_dyn = 0, Psw_arbiter_dyn = 0, Pclock_dyn = 0;
	double e_in_buf_rw, e_cache_in_buf_rw, e_mc_in_buf_rw, e_io_in_buf_rw;
	double e_cbuf_fin, e_cbuf_rw, e_out_buf_rw;
	int next_depth;
	u_int path_len, n_regs;
	int vc_allocator_enabled = 1;

	/* expected value computation */
	e_in_buf_rw       = e_fin * info->n_in;
	e_cache_in_buf_rw = e_fin * info->n_cache_in;
	e_mc_in_buf_rw    = e_fin * info->n_mc_in;
	e_io_in_buf_rw    = e_fin * info->n_io_in;
	e_cbuf_fin        = e_fin * info->n_total_in;
	e_out_buf_rw      = e_cbuf_fin / info->n_total_out * info->n_out;
	e_cbuf_rw         = e_cbuf_fin * info->flit_width / info->central_buf_info.blk_bits;

	next_depth = NEXT_DEPTH(print_depth);
	path_len = SIM_strlen(path);

	/* input buffers */
	if (info->in_buf) {
		Eavg += SIM_array_stat_energy(&info->in_buf_info, &router->in_buf, e_in_buf_rw, e_in_buf_rw, next_depth, SIM_strcat(path, "input buffer"), max_avg); 
		SIM_res_path(path, path_len);
	}
	if (info->cache_in_buf) {
		Eavg += SIM_array_stat_energy(&info->cache_in_buf_info, &router->cache_in_buf, e_cache_in_buf_rw, e_cache_in_buf_rw, next_depth, SIM_strcat(path, "cache input buffer"), max_avg);
		SIM_res_path(path, path_len);
	}
	if (info->mc_in_buf) {
		Eavg += SIM_array_stat_energy(&info->mc_in_buf_info, &router->mc_in_buf, e_mc_in_buf_rw, e_mc_in_buf_rw, next_depth, SIM_strcat(path, "memory controller input buffer"), max_avg);
		SIM_res_path(path, path_len);
	}
	if (info->io_in_buf) {
		Eavg += SIM_array_stat_energy(&info->io_in_buf_info, &router->io_in_buf, e_io_in_buf_rw, e_io_in_buf_rw, next_depth, SIM_strcat(path, "I/O input buffer"), max_avg);
		SIM_res_path(path, path_len);
	}

	/* output buffers */
	if (info->out_buf) {
		/* local output ports don't use router buffers */
		Eavg += SIM_array_stat_energy(&info->out_buf_info, &router->out_buf, e_out_buf_rw, e_out_buf_rw, next_depth, SIM_strcat(path, "output buffer"), max_avg); 
		SIM_res_path(path, path_len);
	}

	/* central buffer */
	if (info->central_buf) {
		Eavg += SIM_array_stat_energy(&info->central_buf_info, &router->central_buf, e_cbuf_rw, e_cbuf_rw, next_depth, SIM_strcat(path, "central buffer"), max_avg);
		SIM_res_path(path, path_len);

		Eavg += SIM_crossbar_stat_energy(&router->in_cbuf_crsbar, next_depth, SIM_strcat(path, "central buffer input crossbar"), max_avg, e_cbuf_fin);
		SIM_res_path(path, path_len);

		Eavg += SIM_crossbar_stat_energy(&router->out_cbuf_crsbar, next_depth, SIM_strcat(path, "central buffer output crossbar"), max_avg, e_cbuf_fin);
		SIM_res_path(path, path_len);

		/* dirty hack, REMEMBER to REMOVE Estruct and Eatomic */
		Estruct = 0;
		n_regs = info->central_buf_info.n_set * (info->central_buf_info.read_ports + info->central_buf_info.write_ports);

		/* ignore e_switch for now because we overestimate wordline driver cap */

		Eatomic = router->cbuf_ff.e_keep_0 * (info->pipe_depth - 1) * (n_regs - 2 * (e_cbuf_rw + e_cbuf_rw));
		SIM_print_stat_energy(SIM_strcat(path, "central buffer pipeline registers/keep 0"), Eatomic, NEXT_DEPTH(next_depth));
		SIM_res_path(path, path_len);
		Estruct += Eatomic;

		Eatomic = router->cbuf_ff.e_clock * (info->pipe_depth - 1) * n_regs;
		SIM_print_stat_energy(SIM_strcat(path, "central buffer pipeline registers/clock"), Eatomic, NEXT_DEPTH(next_depth));
		SIM_res_path(path, path_len);
		Estruct += Eatomic;

		SIM_print_stat_energy(SIM_strcat(path, "central buffer pipeline registers"), Estruct, next_depth);
		SIM_res_path(path, path_len);
		Eavg += Estruct;
	}

	Pbuf_dyn = Eavg * freq;
	Pbuf_static = router->I_buf_static * Vdd * SCALE_S;
	Pbuf = Pbuf_dyn + Pbuf_static;

	/* main crossbar */
	if (info->crossbar_model) {
		Eavg += SIM_crossbar_stat_energy(&router->crossbar, next_depth, SIM_strcat(path, "crossbar"), max_avg, e_cbuf_fin);
		SIM_res_path(path, path_len);
	}

	Pxbar_dyn = (Eavg * freq - Pbuf_dyn);
	Pxbar_static = router->I_crossbar_static * Vdd * SCALE_S;
	Pxbar = Pxbar_dyn + Pxbar_static;

	/* switch allocation (arbiter energy only) */
	/* input (local) arbiter for switch allocation*/
	if (info->sw_in_arb_model) {
		/* assume # of active input arbiters is (info->in_n_switch * info->n_in * e_fin) 
		 * assume (info->n_v_channel*info->n_v_class)/2 vcs are making request at each arbiter */

		Eavg += SIM_arbiter_stat_energy(&router->sw_in_arb, &info->sw_in_arb_queue_info, (info->n_v_channel*info->n_v_class)/2, next_depth, SIM_strcat(path, "switch allocator input arbiter"), max_avg) * info->in_n_switch * info->n_in * e_fin;
		SIM_res_path(path, path_len);

		if (info->n_cache_in) {
			Eavg += SIM_arbiter_stat_energy(&router->cache_in_arb, &info->cache_in_arb_queue_info, e_fin / info->cache_n_switch, next_depth, SIM_strcat(path, "cache input arbiter"), max_avg) * info->cache_n_switch * info->n_cache_in;
			SIM_res_path(path, path_len);
		}

		if (info->n_mc_in) {
			Eavg += SIM_arbiter_stat_energy(&router->mc_in_arb, &info->mc_in_arb_queue_info, e_fin / info->mc_n_switch, next_depth, SIM_strcat(path, "memory controller input arbiter"), max_avg) * info->mc_n_switch * info->n_mc_in;
			SIM_res_path(path, path_len);
		}

		if (info->n_io_in) {
			Eavg += SIM_arbiter_stat_energy(&router->io_in_arb, &info->io_in_arb_queue_info, e_fin / info->io_n_switch, next_depth, SIM_strcat(path, "I/O input arbiter"), max_avg) * info->io_n_switch * info->n_io_in;
			SIM_res_path(path, path_len);
		}
	}

	/* output (global) arbiter for switch allocation*/
	if (info->sw_out_arb_model) {
		/* assume # of active output arbiters is (info->n_switch_out * (e_cbuf_fin/info->n_switch_out)) 
		 * assume (info->n_in)/2 request at each output arbiter */

		Eavg += SIM_arbiter_stat_energy(&router->sw_out_arb, &info->sw_out_arb_queue_info, info->n_in / 2, next_depth, SIM_strcat(path, "switch allocator output arbiter"), max_avg) * info->n_switch_out * (e_cbuf_fin / info->n_switch_out);

		SIM_res_path(path, path_len); 
	}

	if(info->sw_out_arb_model || info->sw_out_arb_model){
		Psw_arbiter_dyn = Eavg * freq - Pbuf_dyn - Pxbar_dyn;
		Psw_arbiter_static = router->I_sw_arbiter_static * Vdd * SCALE_S;
		Psw_arbiter = Psw_arbiter_dyn + Psw_arbiter_static;
	}

	/* virtual channel allocation (arbiter energy only) */
	/* HACKs:
	 *   - assume 1 header flit in every 5 flits for now, hence * 0.2  */

	if(info->vc_allocator_type == ONE_STAGE_ARB && info->vc_out_arb_model  ){
		/* one stage arbitration (vc allocation)*/
		/* # of active arbiters */
		double nActiveArbs = e_fin * info->n_in * 0.2 / 2; //flit_rate * n_in * 0.2 / 2

		/* assume for each active arbiter, there is 2 requests on average (should use expected value from simulation) */	
		Eavg += SIM_arbiter_stat_energy(&router->vc_out_arb, &info->vc_out_arb_queue_info,
				1, next_depth,
				SIM_strcat(path, "vc allocation arbiter"),
				max_avg) * nActiveArbs;

		SIM_res_path(path, path_len);
	}
	else if(info->vc_allocator_type == TWO_STAGE_ARB && info->vc_in_arb_model && info->vc_out_arb_model){
		/* first stage arbitration (vc allocation)*/
		if (info->vc_in_arb_model) {
			// # of active stage-1 arbiters (# of new header flits)
			double nActiveArbs = e_fin * info->n_in * 0.2;


			/* assume an active arbiter has n_v_channel/2 requests on average (should use expected value from simulation) */
			Eavg += SIM_arbiter_stat_energy(&router->vc_in_arb, &info->vc_in_arb_queue_info, info->n_v_channel/2, next_depth, 
					SIM_strcat(path, "vc allocation arbiter (stage 1)"),
					max_avg) * nActiveArbs; 

			SIM_res_path(path, path_len);
		}

		/* second stage arbitration (vc allocation)*/
		if (info->vc_out_arb_model) {
			/* # of active stage-2 arbiters */
			double nActiveArbs = e_fin * info->n_in * 0.2 / 2; //flit_rate * n_in * 0.2 / 2

			/* assume for each active arbiter, there is 2 requests on average (should use expected value from simulation) */
			Eavg += SIM_arbiter_stat_energy(&router->vc_out_arb, &info->vc_out_arb_queue_info,
					2, next_depth, 
					SIM_strcat(path, "vc allocation arbiter (stage 2)"),
					max_avg) * nActiveArbs;

			SIM_res_path(path, path_len);
		}
	}
	else if(info->vc_allocator_type == VC_SELECT && info->n_v_channel > 1 && info->n_in > 1){
		double n_read = e_fin * info->n_in * 0.2;
		double n_write = e_fin * info->n_in * 0.2;
		Eavg += SIM_array_stat_energy(&info->vc_select_buf_info, &router->vc_select_buf, n_read , n_write, next_depth, SIM_strcat(path, "vc selection"), max_avg);
		SIM_res_path(path, path_len);

	}
	else{
		vc_allocator_enabled = 0; //set to 0 means no vc allocator is used
	}

	if(info->n_v_channel > 1 && vc_allocator_enabled){
		Pvc_arbiter_dyn = Eavg * freq - Pbuf_dyn - Pxbar_dyn - Psw_arbiter_dyn; 
		Pvc_arbiter_static = router->I_vc_arbiter_static * Vdd * SCALE_S;
		Pvc_arbiter = Pvc_arbiter_dyn + Pvc_arbiter_static;
	}

	/*router clock power (supported for 90nm and below) */
	if(PARM(TECH_POINT) <=90){
		Eavg += SIM_total_clockEnergy(info, router);
		Pclock_dyn = Eavg * freq - Pbuf_dyn - Pxbar_dyn - Pvc_arbiter_dyn - Psw_arbiter_dyn;
		Pclock_static = router->I_clock_static * Vdd * SCALE_S;
		Pclock = Pclock_dyn + Pclock_static;
	}

	/* static power */
	Estatic = router->I_static * Vdd * Period * SCALE_S;
	SIM_print_stat_energy(SIM_strcat(path, "static energy"), Estatic, next_depth);
	SIM_res_path(path, path_len);
	Eavg += Estatic;
	Ptotal = Eavg * freq;

	SIM_print_stat_energy(path, Eavg, print_depth);

	if (plot_flag)
		fprintf(stdout, "Buffer:%g\tCrossbar:%g\tVC_allocator:%g\tSW_allocator:%g\tClock:%g\tTotal:%g\n", Pbuf, Pxbar, Pvc_arbiter, Psw_arbiter, Pclock, Ptotal); 

	return Eavg;
}