/* * time unit: 1 cycle * e_fin: average # of flits received by one input port during unit time * (at most 0.5 for InfiniBand router) * e_buf_wrt: average # of input buffer writes of all ports during unit time * e_buf_wrt = e_fin * n_buf_in * e_buf_rd: average # of input buffer reads of all ports during unit time * e_buf_rd = e_buf_wrt * (splitted into different input ports in program) * e_cbuf_fin: average # of flits passing through the switch during unit time * e_cbuf_fin = e_fin * n_total_in * e_cbuf_wrt: average # of central buffer writes during unit time * e_cbuf_wrt = e_cbuf_fin / (pipe_depth * pipe_width) * e_cbuf_rd: average # of central buffer reads during unit time * e_cbuf_rd = e_cbuf_wrt * e_arb: average # of arbitrations per arbiter during unit time * assume e_arb = 1 * * NOTES: (1) negative print_depth means infinite print depth * * FIXME: (1) hack: SIM_reg_stat_energy cannot be used for shared buffer, * we use it now anyway */ struct orResult SIM_router_stat_energy(SIM_power_router_info_t *info, SIM_power_router_t *router, int print_depth, char *path, int max_avg, double e_fin, int plot_flag, double freq) { // SESC changes struct orResult orr ; double fac ; // end of changes double Eavg = 0, Eatomic, Estruct; double Pbuf, Pswitch, Parbiter, Ptotal; double e_in_buf_rw, e_cache_in_buf_rw, e_mc_in_buf_rw, e_io_in_buf_rw; double e_cbuf_fin, e_cbuf_rw, e_out_buf_rw; int next_depth; u_int path_len, n_regs; /* expected value computation */ e_in_buf_rw = e_fin * info->n_in; e_cache_in_buf_rw = e_fin * info->n_cache_in; e_mc_in_buf_rw = e_fin * info->n_mc_in; e_io_in_buf_rw = e_fin * info->n_io_in; e_cbuf_fin = e_fin * info->n_total_in; e_out_buf_rw = e_cbuf_fin / info->n_total_out * info->n_out; e_cbuf_rw = e_cbuf_fin * info->flit_width / info->central_buf_info.blk_bits; next_depth = NEXT_DEPTH(print_depth); path_len = SIM_power_strlen(path); /* input buffers */ if (info->in_buf) { Eavg += SIM_reg_stat_energy(&info->in_buf_info, &router->in_buf, e_in_buf_rw, e_in_buf_rw, next_depth, SIM_power_strcat(path, "input buffer"), max_avg); SIM_power_res_path(path, path_len); } if (info->cache_in_buf) { Eavg += SIM_reg_stat_energy(&info->cache_in_buf_info, &router->cache_in_buf, e_cache_in_buf_rw, e_cache_in_buf_rw, next_depth, SIM_power_strcat(path, "cache input buffer"), max_avg); SIM_power_res_path(path, path_len); } if (info->mc_in_buf) { Eavg += SIM_reg_stat_energy(&info->mc_in_buf_info, &router->mc_in_buf, e_mc_in_buf_rw, e_mc_in_buf_rw, next_depth, SIM_power_strcat(path, "memory controller input buffer"), max_avg); SIM_power_res_path(path, path_len); } if (info->io_in_buf) { Eavg += SIM_reg_stat_energy(&info->io_in_buf_info, &router->io_in_buf, e_io_in_buf_rw, e_io_in_buf_rw, next_depth, SIM_power_strcat(path, "I/O input buffer"), max_avg); SIM_power_res_path(path, path_len); } /* output buffers */ if (info->out_buf) { /* local output ports don't use router buffers */ Eavg += SIM_reg_stat_energy(&info->out_buf_info, &router->out_buf, e_out_buf_rw, e_out_buf_rw, next_depth, SIM_power_strcat(path, "output buffer"), max_avg); SIM_power_res_path(path, path_len); } Pbuf = Eavg * freq; /* main crossbar */ if (info->crossbar_model) { Eavg += SIM_crossbar_stat_energy(&router->crossbar, next_depth, SIM_power_strcat(path, "crossbar"), max_avg, e_cbuf_fin); SIM_power_res_path(path, path_len); } /* central buffer */ if (info->central_buf) { Eavg += SIM_reg_stat_energy(&info->central_buf_info, &router->central_buf, e_cbuf_rw, e_cbuf_rw, next_depth, SIM_power_strcat(path, "central buffer"), max_avg); SIM_power_res_path(path, path_len); Eavg += SIM_crossbar_stat_energy(&router->in_cbuf_crsbar, next_depth, SIM_power_strcat(path, "central buffer input crossbar"), max_avg, e_cbuf_fin); SIM_power_res_path(path, path_len); Eavg += SIM_crossbar_stat_energy(&router->out_cbuf_crsbar, next_depth, SIM_power_strcat(path, "central buffer output crossbar"), max_avg, e_cbuf_fin); SIM_power_res_path(path, path_len); /* dirty hack, REMEMBER to REMOVE Estruct and Eatomic */ Estruct = 0; n_regs = info->central_buf_info.n_set * (info->central_buf_info.read_ports + info->central_buf_info.write_ports); /* ignore e_switch for now because we overestimate wordline driver cap */ Eatomic = router->cbuf_ff.e_keep_0 * (info->pipe_depth - 1) * (n_regs - 2 * (e_cbuf_rw + e_cbuf_rw)); SIM_print_stat_energy(SIM_power_strcat(path, "central buffer pipeline registers/keep 0"), Eatomic, NEXT_DEPTH(next_depth)); SIM_power_res_path(path, path_len); Estruct += Eatomic; Eatomic = router->cbuf_ff.e_clock * (info->pipe_depth - 1) * n_regs; SIM_print_stat_energy(SIM_power_strcat(path, "central buffer pipeline registers/clock"), Eatomic, NEXT_DEPTH(next_depth)); SIM_power_res_path(path, path_len); Estruct += Eatomic; SIM_print_stat_energy(SIM_power_strcat(path, "central buffer pipeline registers"), Estruct, next_depth); SIM_power_res_path(path, path_len); Eavg += Estruct; } Pswitch = Eavg * freq - Pbuf; /* input (local) arbiter */ if (info->in_arb_model) { Eavg += SIM_arbiter_stat_energy(&router->in_arb, &info->in_arb_queue_info, e_fin / info->in_n_switch, next_depth, SIM_power_strcat(path, "input arbiter"), max_avg) * info->in_n_switch * info->n_in; SIM_power_res_path(path, path_len); if (info->n_cache_in) { Eavg += SIM_arbiter_stat_energy(&router->cache_in_arb, &info->cache_in_arb_queue_info, e_fin / info->cache_n_switch, next_depth, SIM_power_strcat(path, "cache input arbiter"), max_avg) * info->cache_n_switch * info->n_cache_in; SIM_power_res_path(path, path_len); } if (info->n_mc_in) { Eavg += SIM_arbiter_stat_energy(&router->mc_in_arb, &info->mc_in_arb_queue_info, e_fin / info->mc_n_switch, next_depth, SIM_power_strcat(path, "memory controller input arbiter"), max_avg) * info->mc_n_switch * info->n_mc_in; SIM_power_res_path(path, path_len); } if (info->n_io_in) { Eavg += SIM_arbiter_stat_energy(&router->io_in_arb, &info->io_in_arb_queue_info, e_fin / info->io_n_switch, next_depth, SIM_power_strcat(path, "I/O input arbiter"), max_avg) * info->io_n_switch * info->n_io_in; SIM_power_res_path(path, path_len); } } /* output (global) arbiter */ if (info->out_arb_model) { Eavg += SIM_arbiter_stat_energy(&router->out_arb, &info->out_arb_queue_info, e_cbuf_fin / info->n_switch_out, next_depth, SIM_power_strcat(path, "output arbiter"), max_avg) * info->n_switch_out; SIM_power_res_path(path, path_len); } Ptotal = Eavg * freq; Parbiter = Ptotal - Pbuf - Pswitch; SIM_print_stat_energy(path, Eavg, print_depth); #if 0 if (plot_flag) fprintf(stderr, "pbuf=%g\n pswitch=%g\n parbiter=%g \n ptotal=%g\n", Pbuf, Pswitch, Parbiter, Ptotal); #endif fac = freq/1e9 ; orr.totEnergy = Ptotal/fac ; orr.bufEnergy = Pbuf/fac ; orr.switchEnergy = Pswitch/fac ; orr.arbEnergy = Parbiter/fac ; return orr ; }
/* * time unit: 1 cycle * e_fin: average # of flits received by one input port during unit time * (at most 0.5 for InfiniBand router) * e_buf_wrt: average # of input buffer writes of all ports during unit time * e_buf_wrt = e_fin * n_buf_in * e_buf_rd: average # of input buffer reads of all ports during unit time * e_buf_rd = e_buf_wrt * (splitted into different input ports in program) * e_cbuf_fin: average # of flits passing through the switch during unit time * e_cbuf_fin = e_fin * n_total_in * e_cbuf_wrt: average # of central buffer writes during unit time * e_cbuf_wrt = e_cbuf_fin / (pipe_depth * pipe_width) * e_cbuf_rd: average # of central buffer reads during unit time * e_cbuf_rd = e_cbuf_wrt * e_arb: average # of arbitrations per arbiter during unit time * assume e_arb = 1 * * NOTES: (1) negative print_depth means infinite print depth * * FIXME: (1) hack: SIM_array_stat_energy cannot be used for shared buffer, * we use it now anyway */ double SIM_router_stat_energy(SIM_router_info_t *info, SIM_router_power_t *router, int print_depth, char *path, int max_avg, double e_fin, int plot_flag, double freq) { double Eavg = 0, Eatomic, Estruct, Estatic = 0; double Pbuf = 0, Pxbar = 0, Pvc_arbiter = 0, Psw_arbiter = 0, Pclock = 0, Ptotal = 0; double Pbuf_static = 0, Pxbar_static = 0, Pvc_arbiter_static = 0, Psw_arbiter_static = 0, Pclock_static = 0; double Pbuf_dyn = 0, Pxbar_dyn = 0, Pvc_arbiter_dyn = 0, Psw_arbiter_dyn = 0, Pclock_dyn = 0; double e_in_buf_rw, e_cache_in_buf_rw, e_mc_in_buf_rw, e_io_in_buf_rw; double e_cbuf_fin, e_cbuf_rw, e_out_buf_rw; int next_depth; u_int path_len, n_regs; int vc_allocator_enabled = 1; /* expected value computation */ e_in_buf_rw = e_fin * info->n_in; e_cache_in_buf_rw = e_fin * info->n_cache_in; e_mc_in_buf_rw = e_fin * info->n_mc_in; e_io_in_buf_rw = e_fin * info->n_io_in; e_cbuf_fin = e_fin * info->n_total_in; e_out_buf_rw = e_cbuf_fin / info->n_total_out * info->n_out; e_cbuf_rw = e_cbuf_fin * info->flit_width / info->central_buf_info.blk_bits; next_depth = NEXT_DEPTH(print_depth); path_len = SIM_strlen(path); /* input buffers */ if (info->in_buf) { Eavg += SIM_array_stat_energy(&info->in_buf_info, &router->in_buf, e_in_buf_rw, e_in_buf_rw, next_depth, SIM_strcat(path, "input buffer"), max_avg); SIM_res_path(path, path_len); } if (info->cache_in_buf) { Eavg += SIM_array_stat_energy(&info->cache_in_buf_info, &router->cache_in_buf, e_cache_in_buf_rw, e_cache_in_buf_rw, next_depth, SIM_strcat(path, "cache input buffer"), max_avg); SIM_res_path(path, path_len); } if (info->mc_in_buf) { Eavg += SIM_array_stat_energy(&info->mc_in_buf_info, &router->mc_in_buf, e_mc_in_buf_rw, e_mc_in_buf_rw, next_depth, SIM_strcat(path, "memory controller input buffer"), max_avg); SIM_res_path(path, path_len); } if (info->io_in_buf) { Eavg += SIM_array_stat_energy(&info->io_in_buf_info, &router->io_in_buf, e_io_in_buf_rw, e_io_in_buf_rw, next_depth, SIM_strcat(path, "I/O input buffer"), max_avg); SIM_res_path(path, path_len); } /* output buffers */ if (info->out_buf) { /* local output ports don't use router buffers */ Eavg += SIM_array_stat_energy(&info->out_buf_info, &router->out_buf, e_out_buf_rw, e_out_buf_rw, next_depth, SIM_strcat(path, "output buffer"), max_avg); SIM_res_path(path, path_len); } /* central buffer */ if (info->central_buf) { Eavg += SIM_array_stat_energy(&info->central_buf_info, &router->central_buf, e_cbuf_rw, e_cbuf_rw, next_depth, SIM_strcat(path, "central buffer"), max_avg); SIM_res_path(path, path_len); Eavg += SIM_crossbar_stat_energy(&router->in_cbuf_crsbar, next_depth, SIM_strcat(path, "central buffer input crossbar"), max_avg, e_cbuf_fin); SIM_res_path(path, path_len); Eavg += SIM_crossbar_stat_energy(&router->out_cbuf_crsbar, next_depth, SIM_strcat(path, "central buffer output crossbar"), max_avg, e_cbuf_fin); SIM_res_path(path, path_len); /* dirty hack, REMEMBER to REMOVE Estruct and Eatomic */ Estruct = 0; n_regs = info->central_buf_info.n_set * (info->central_buf_info.read_ports + info->central_buf_info.write_ports); /* ignore e_switch for now because we overestimate wordline driver cap */ Eatomic = router->cbuf_ff.e_keep_0 * (info->pipe_depth - 1) * (n_regs - 2 * (e_cbuf_rw + e_cbuf_rw)); SIM_print_stat_energy(SIM_strcat(path, "central buffer pipeline registers/keep 0"), Eatomic, NEXT_DEPTH(next_depth)); SIM_res_path(path, path_len); Estruct += Eatomic; Eatomic = router->cbuf_ff.e_clock * (info->pipe_depth - 1) * n_regs; SIM_print_stat_energy(SIM_strcat(path, "central buffer pipeline registers/clock"), Eatomic, NEXT_DEPTH(next_depth)); SIM_res_path(path, path_len); Estruct += Eatomic; SIM_print_stat_energy(SIM_strcat(path, "central buffer pipeline registers"), Estruct, next_depth); SIM_res_path(path, path_len); Eavg += Estruct; } Pbuf_dyn = Eavg * freq; Pbuf_static = router->I_buf_static * Vdd * SCALE_S; Pbuf = Pbuf_dyn + Pbuf_static; /* main crossbar */ if (info->crossbar_model) { Eavg += SIM_crossbar_stat_energy(&router->crossbar, next_depth, SIM_strcat(path, "crossbar"), max_avg, e_cbuf_fin); SIM_res_path(path, path_len); } Pxbar_dyn = (Eavg * freq - Pbuf_dyn); Pxbar_static = router->I_crossbar_static * Vdd * SCALE_S; Pxbar = Pxbar_dyn + Pxbar_static; /* switch allocation (arbiter energy only) */ /* input (local) arbiter for switch allocation*/ if (info->sw_in_arb_model) { /* assume # of active input arbiters is (info->in_n_switch * info->n_in * e_fin) * assume (info->n_v_channel*info->n_v_class)/2 vcs are making request at each arbiter */ Eavg += SIM_arbiter_stat_energy(&router->sw_in_arb, &info->sw_in_arb_queue_info, (info->n_v_channel*info->n_v_class)/2, next_depth, SIM_strcat(path, "switch allocator input arbiter"), max_avg) * info->in_n_switch * info->n_in * e_fin; SIM_res_path(path, path_len); if (info->n_cache_in) { Eavg += SIM_arbiter_stat_energy(&router->cache_in_arb, &info->cache_in_arb_queue_info, e_fin / info->cache_n_switch, next_depth, SIM_strcat(path, "cache input arbiter"), max_avg) * info->cache_n_switch * info->n_cache_in; SIM_res_path(path, path_len); } if (info->n_mc_in) { Eavg += SIM_arbiter_stat_energy(&router->mc_in_arb, &info->mc_in_arb_queue_info, e_fin / info->mc_n_switch, next_depth, SIM_strcat(path, "memory controller input arbiter"), max_avg) * info->mc_n_switch * info->n_mc_in; SIM_res_path(path, path_len); } if (info->n_io_in) { Eavg += SIM_arbiter_stat_energy(&router->io_in_arb, &info->io_in_arb_queue_info, e_fin / info->io_n_switch, next_depth, SIM_strcat(path, "I/O input arbiter"), max_avg) * info->io_n_switch * info->n_io_in; SIM_res_path(path, path_len); } } /* output (global) arbiter for switch allocation*/ if (info->sw_out_arb_model) { /* assume # of active output arbiters is (info->n_switch_out * (e_cbuf_fin/info->n_switch_out)) * assume (info->n_in)/2 request at each output arbiter */ Eavg += SIM_arbiter_stat_energy(&router->sw_out_arb, &info->sw_out_arb_queue_info, info->n_in / 2, next_depth, SIM_strcat(path, "switch allocator output arbiter"), max_avg) * info->n_switch_out * (e_cbuf_fin / info->n_switch_out); SIM_res_path(path, path_len); } if(info->sw_out_arb_model || info->sw_out_arb_model){ Psw_arbiter_dyn = Eavg * freq - Pbuf_dyn - Pxbar_dyn; Psw_arbiter_static = router->I_sw_arbiter_static * Vdd * SCALE_S; Psw_arbiter = Psw_arbiter_dyn + Psw_arbiter_static; } /* virtual channel allocation (arbiter energy only) */ /* HACKs: * - assume 1 header flit in every 5 flits for now, hence * 0.2 */ if(info->vc_allocator_type == ONE_STAGE_ARB && info->vc_out_arb_model ){ /* one stage arbitration (vc allocation)*/ /* # of active arbiters */ double nActiveArbs = e_fin * info->n_in * 0.2 / 2; //flit_rate * n_in * 0.2 / 2 /* assume for each active arbiter, there is 2 requests on average (should use expected value from simulation) */ Eavg += SIM_arbiter_stat_energy(&router->vc_out_arb, &info->vc_out_arb_queue_info, 1, next_depth, SIM_strcat(path, "vc allocation arbiter"), max_avg) * nActiveArbs; SIM_res_path(path, path_len); } else if(info->vc_allocator_type == TWO_STAGE_ARB && info->vc_in_arb_model && info->vc_out_arb_model){ /* first stage arbitration (vc allocation)*/ if (info->vc_in_arb_model) { // # of active stage-1 arbiters (# of new header flits) double nActiveArbs = e_fin * info->n_in * 0.2; /* assume an active arbiter has n_v_channel/2 requests on average (should use expected value from simulation) */ Eavg += SIM_arbiter_stat_energy(&router->vc_in_arb, &info->vc_in_arb_queue_info, info->n_v_channel/2, next_depth, SIM_strcat(path, "vc allocation arbiter (stage 1)"), max_avg) * nActiveArbs; SIM_res_path(path, path_len); } /* second stage arbitration (vc allocation)*/ if (info->vc_out_arb_model) { /* # of active stage-2 arbiters */ double nActiveArbs = e_fin * info->n_in * 0.2 / 2; //flit_rate * n_in * 0.2 / 2 /* assume for each active arbiter, there is 2 requests on average (should use expected value from simulation) */ Eavg += SIM_arbiter_stat_energy(&router->vc_out_arb, &info->vc_out_arb_queue_info, 2, next_depth, SIM_strcat(path, "vc allocation arbiter (stage 2)"), max_avg) * nActiveArbs; SIM_res_path(path, path_len); } } else if(info->vc_allocator_type == VC_SELECT && info->n_v_channel > 1 && info->n_in > 1){ double n_read = e_fin * info->n_in * 0.2; double n_write = e_fin * info->n_in * 0.2; Eavg += SIM_array_stat_energy(&info->vc_select_buf_info, &router->vc_select_buf, n_read , n_write, next_depth, SIM_strcat(path, "vc selection"), max_avg); SIM_res_path(path, path_len); } else{ vc_allocator_enabled = 0; //set to 0 means no vc allocator is used } if(info->n_v_channel > 1 && vc_allocator_enabled){ Pvc_arbiter_dyn = Eavg * freq - Pbuf_dyn - Pxbar_dyn - Psw_arbiter_dyn; Pvc_arbiter_static = router->I_vc_arbiter_static * Vdd * SCALE_S; Pvc_arbiter = Pvc_arbiter_dyn + Pvc_arbiter_static; } /*router clock power (supported for 90nm and below) */ if(PARM(TECH_POINT) <=90){ Eavg += SIM_total_clockEnergy(info, router); Pclock_dyn = Eavg * freq - Pbuf_dyn - Pxbar_dyn - Pvc_arbiter_dyn - Psw_arbiter_dyn; Pclock_static = router->I_clock_static * Vdd * SCALE_S; Pclock = Pclock_dyn + Pclock_static; } /* static power */ Estatic = router->I_static * Vdd * Period * SCALE_S; SIM_print_stat_energy(SIM_strcat(path, "static energy"), Estatic, next_depth); SIM_res_path(path, path_len); Eavg += Estatic; Ptotal = Eavg * freq; SIM_print_stat_energy(path, Eavg, print_depth); if (plot_flag) fprintf(stdout, "Buffer:%g\tCrossbar:%g\tVC_allocator:%g\tSW_allocator:%g\tClock:%g\tTotal:%g\n", Pbuf, Pxbar, Pvc_arbiter, Psw_arbiter, Pclock, Ptotal); return Eavg; }