static double SIM_crossbar_stat_energy(SIM_power_crossbar_t *crsbar, int print_depth, char *path, int max_avg, double n_data) { double Eavg = 0, Eatomic; int next_depth; u_int path_len; if (n_data > crsbar->n_out) { fprintf(stderr, "%s: overflow\n", path); n_data = crsbar->n_out; } next_depth = NEXT_DEPTH(print_depth); path_len = SIM_power_strlen(path); switch (crsbar->model) { case MATRIX_CROSSBAR: case MULTREE_CROSSBAR: /* assume 0.5 data switch probability */ Eatomic = crsbar->e_chg_in * crsbar->data_width * (max_avg ? 1 : 0.5) * n_data; SIM_print_stat_energy(SIM_power_strcat(path, "input"), Eatomic, next_depth); SIM_power_res_path(path, path_len); Eavg += Eatomic; Eatomic = crsbar->e_chg_out * crsbar->data_width * (max_avg ? 1 : 0.5) * n_data; SIM_print_stat_energy(SIM_power_strcat(path, "output"), Eatomic, next_depth); SIM_power_res_path(path, path_len); Eavg += Eatomic; Eatomic = crsbar->e_chg_ctr * n_data; SIM_print_stat_energy(SIM_power_strcat(path, "control"), Eatomic, next_depth); SIM_power_res_path(path, path_len); Eavg += Eatomic; if (crsbar->model == MULTREE_CROSSBAR && crsbar->depth > 1) { Eatomic = crsbar->e_chg_int * crsbar->data_width * (crsbar->depth - 1) * (max_avg ? 1 : 0.5) * n_data; SIM_print_stat_energy(SIM_power_strcat(path, "internal node"), Eatomic, next_depth); SIM_power_res_path(path, path_len); Eavg += Eatomic; } break; } SIM_print_stat_energy(path, Eavg, print_depth); return Eavg; }
/* * time unit: 1 cycle * e_fin: average # of flits received by one input port during unit time * (at most 0.5 for InfiniBand router) * e_buf_wrt: average # of input buffer writes of all ports during unit time * e_buf_wrt = e_fin * n_buf_in * e_buf_rd: average # of input buffer reads of all ports during unit time * e_buf_rd = e_buf_wrt * (splitted into different input ports in program) * e_cbuf_fin: average # of flits passing through the switch during unit time * e_cbuf_fin = e_fin * n_total_in * e_cbuf_wrt: average # of central buffer writes during unit time * e_cbuf_wrt = e_cbuf_fin / (pipe_depth * pipe_width) * e_cbuf_rd: average # of central buffer reads during unit time * e_cbuf_rd = e_cbuf_wrt * e_arb: average # of arbitrations per arbiter during unit time * assume e_arb = 1 * * NOTES: (1) negative print_depth means infinite print depth * * FIXME: (1) hack: SIM_reg_stat_energy cannot be used for shared buffer, * we use it now anyway */ struct orResult SIM_router_stat_energy(SIM_power_router_info_t *info, SIM_power_router_t *router, int print_depth, char *path, int max_avg, double e_fin, int plot_flag, double freq) { // SESC changes struct orResult orr ; double fac ; // end of changes double Eavg = 0, Eatomic, Estruct; double Pbuf, Pswitch, Parbiter, Ptotal; double e_in_buf_rw, e_cache_in_buf_rw, e_mc_in_buf_rw, e_io_in_buf_rw; double e_cbuf_fin, e_cbuf_rw, e_out_buf_rw; int next_depth; u_int path_len, n_regs; /* expected value computation */ e_in_buf_rw = e_fin * info->n_in; e_cache_in_buf_rw = e_fin * info->n_cache_in; e_mc_in_buf_rw = e_fin * info->n_mc_in; e_io_in_buf_rw = e_fin * info->n_io_in; e_cbuf_fin = e_fin * info->n_total_in; e_out_buf_rw = e_cbuf_fin / info->n_total_out * info->n_out; e_cbuf_rw = e_cbuf_fin * info->flit_width / info->central_buf_info.blk_bits; next_depth = NEXT_DEPTH(print_depth); path_len = SIM_power_strlen(path); /* input buffers */ if (info->in_buf) { Eavg += SIM_reg_stat_energy(&info->in_buf_info, &router->in_buf, e_in_buf_rw, e_in_buf_rw, next_depth, SIM_power_strcat(path, "input buffer"), max_avg); SIM_power_res_path(path, path_len); } if (info->cache_in_buf) { Eavg += SIM_reg_stat_energy(&info->cache_in_buf_info, &router->cache_in_buf, e_cache_in_buf_rw, e_cache_in_buf_rw, next_depth, SIM_power_strcat(path, "cache input buffer"), max_avg); SIM_power_res_path(path, path_len); } if (info->mc_in_buf) { Eavg += SIM_reg_stat_energy(&info->mc_in_buf_info, &router->mc_in_buf, e_mc_in_buf_rw, e_mc_in_buf_rw, next_depth, SIM_power_strcat(path, "memory controller input buffer"), max_avg); SIM_power_res_path(path, path_len); } if (info->io_in_buf) { Eavg += SIM_reg_stat_energy(&info->io_in_buf_info, &router->io_in_buf, e_io_in_buf_rw, e_io_in_buf_rw, next_depth, SIM_power_strcat(path, "I/O input buffer"), max_avg); SIM_power_res_path(path, path_len); } /* output buffers */ if (info->out_buf) { /* local output ports don't use router buffers */ Eavg += SIM_reg_stat_energy(&info->out_buf_info, &router->out_buf, e_out_buf_rw, e_out_buf_rw, next_depth, SIM_power_strcat(path, "output buffer"), max_avg); SIM_power_res_path(path, path_len); } Pbuf = Eavg * freq; /* main crossbar */ if (info->crossbar_model) { Eavg += SIM_crossbar_stat_energy(&router->crossbar, next_depth, SIM_power_strcat(path, "crossbar"), max_avg, e_cbuf_fin); SIM_power_res_path(path, path_len); } /* central buffer */ if (info->central_buf) { Eavg += SIM_reg_stat_energy(&info->central_buf_info, &router->central_buf, e_cbuf_rw, e_cbuf_rw, next_depth, SIM_power_strcat(path, "central buffer"), max_avg); SIM_power_res_path(path, path_len); Eavg += SIM_crossbar_stat_energy(&router->in_cbuf_crsbar, next_depth, SIM_power_strcat(path, "central buffer input crossbar"), max_avg, e_cbuf_fin); SIM_power_res_path(path, path_len); Eavg += SIM_crossbar_stat_energy(&router->out_cbuf_crsbar, next_depth, SIM_power_strcat(path, "central buffer output crossbar"), max_avg, e_cbuf_fin); SIM_power_res_path(path, path_len); /* dirty hack, REMEMBER to REMOVE Estruct and Eatomic */ Estruct = 0; n_regs = info->central_buf_info.n_set * (info->central_buf_info.read_ports + info->central_buf_info.write_ports); /* ignore e_switch for now because we overestimate wordline driver cap */ Eatomic = router->cbuf_ff.e_keep_0 * (info->pipe_depth - 1) * (n_regs - 2 * (e_cbuf_rw + e_cbuf_rw)); SIM_print_stat_energy(SIM_power_strcat(path, "central buffer pipeline registers/keep 0"), Eatomic, NEXT_DEPTH(next_depth)); SIM_power_res_path(path, path_len); Estruct += Eatomic; Eatomic = router->cbuf_ff.e_clock * (info->pipe_depth - 1) * n_regs; SIM_print_stat_energy(SIM_power_strcat(path, "central buffer pipeline registers/clock"), Eatomic, NEXT_DEPTH(next_depth)); SIM_power_res_path(path, path_len); Estruct += Eatomic; SIM_print_stat_energy(SIM_power_strcat(path, "central buffer pipeline registers"), Estruct, next_depth); SIM_power_res_path(path, path_len); Eavg += Estruct; } Pswitch = Eavg * freq - Pbuf; /* input (local) arbiter */ if (info->in_arb_model) { Eavg += SIM_arbiter_stat_energy(&router->in_arb, &info->in_arb_queue_info, e_fin / info->in_n_switch, next_depth, SIM_power_strcat(path, "input arbiter"), max_avg) * info->in_n_switch * info->n_in; SIM_power_res_path(path, path_len); if (info->n_cache_in) { Eavg += SIM_arbiter_stat_energy(&router->cache_in_arb, &info->cache_in_arb_queue_info, e_fin / info->cache_n_switch, next_depth, SIM_power_strcat(path, "cache input arbiter"), max_avg) * info->cache_n_switch * info->n_cache_in; SIM_power_res_path(path, path_len); } if (info->n_mc_in) { Eavg += SIM_arbiter_stat_energy(&router->mc_in_arb, &info->mc_in_arb_queue_info, e_fin / info->mc_n_switch, next_depth, SIM_power_strcat(path, "memory controller input arbiter"), max_avg) * info->mc_n_switch * info->n_mc_in; SIM_power_res_path(path, path_len); } if (info->n_io_in) { Eavg += SIM_arbiter_stat_energy(&router->io_in_arb, &info->io_in_arb_queue_info, e_fin / info->io_n_switch, next_depth, SIM_power_strcat(path, "I/O input arbiter"), max_avg) * info->io_n_switch * info->n_io_in; SIM_power_res_path(path, path_len); } } /* output (global) arbiter */ if (info->out_arb_model) { Eavg += SIM_arbiter_stat_energy(&router->out_arb, &info->out_arb_queue_info, e_cbuf_fin / info->n_switch_out, next_depth, SIM_power_strcat(path, "output arbiter"), max_avg) * info->n_switch_out; SIM_power_res_path(path, path_len); } Ptotal = Eavg * freq; Parbiter = Ptotal - Pbuf - Pswitch; SIM_print_stat_energy(path, Eavg, print_depth); #if 0 if (plot_flag) fprintf(stderr, "pbuf=%g\n pswitch=%g\n parbiter=%g \n ptotal=%g\n", Pbuf, Pswitch, Parbiter, Ptotal); #endif fac = freq/1e9 ; orr.totEnergy = Ptotal/fac ; orr.bufEnergy = Pbuf/fac ; orr.switchEnergy = Pswitch/fac ; orr.arbEnergy = Parbiter/fac ; return orr ; }
/* info is only used by queuing arbiter */ static double SIM_arbiter_stat_energy(SIM_power_arbiter_t *arb, SIM_power_array_info_t *info, double n_req, int print_depth, char *path, int max_avg) { double Eavg = 0, Estruct, Eatomic; int next_depth, next_next_depth; double total_pri, n_chg_pri, n_grant; u_int path_len, next_path_len; next_depth = NEXT_DEPTH(print_depth); next_next_depth = NEXT_DEPTH(next_depth); path_len = SIM_power_strlen(path); /* energy cycle distribution */ if (n_req > arb->req_width) { fprintf(stderr, "arbiter overflow\n"); n_req = arb->req_width; } if (n_req >= 1) n_grant = 1; else n_grant = 1.0 / ceil(1.0 / n_req); switch (arb->model) { case RR_ARBITER: /* FIXME: we may overestimate request switch */ Eatomic = arb->e_chg_req * n_req; SIM_print_stat_energy(SIM_power_strcat(path, "request"), Eatomic, next_depth); SIM_power_res_path(path, path_len); Eavg += Eatomic; Eatomic = arb->e_chg_grant * n_grant; SIM_print_stat_energy(SIM_power_strcat(path, "grant"), Eatomic, next_depth); SIM_power_res_path(path, path_len); Eavg += Eatomic; /* assume carry signal propagates half length in average case */ /* carry does not propagate in maximum case, i.e. all carrys go down */ Eatomic = arb->e_chg_carry * arb->req_width * (max_avg ? 1 : 0.5) * n_grant; SIM_print_stat_energy(SIM_power_strcat(path, "carry"), Eatomic, next_depth); SIM_power_res_path(path, path_len); Eavg += Eatomic; Eatomic = arb->e_chg_carry_in * (arb->req_width * (max_avg ? 1 : 0.5) - 1) * n_grant; SIM_print_stat_energy(SIM_power_strcat(path, "internal carry"), Eatomic, next_depth); SIM_power_res_path(path, path_len); Eavg += Eatomic; /* priority registers */ Estruct = 0; SIM_power_strcat(path, "priority"); next_path_len = SIM_power_strlen(path); Eatomic = arb->pri_ff.e_switch * 2 * n_grant; SIM_print_stat_energy(SIM_power_strcat(path, "switch"), Eatomic, next_next_depth); SIM_power_res_path(path, next_path_len); Estruct += Eatomic; Eatomic = arb->pri_ff.e_keep_0 * (arb->req_width - 2 * n_grant); SIM_print_stat_energy(SIM_power_strcat(path, "keep 0"), Eatomic, next_next_depth); SIM_power_res_path(path, next_path_len); Estruct += Eatomic; Eatomic = arb->pri_ff.e_clock * arb->req_width; SIM_print_stat_energy(SIM_power_strcat(path, "clock"), Eatomic, next_next_depth); SIM_power_res_path(path, next_path_len); Estruct += Eatomic; SIM_print_stat_energy(path, Estruct, next_depth); SIM_power_res_path(path, path_len); Eavg += Estruct; break; case MATRIX_ARBITER: total_pri = arb->req_width * (arb->req_width - 1) * 0.5; /* assume switch probability 0.5 for priorities */ n_chg_pri = (arb->req_width - 1) * (max_avg ? 1 : 0.5); /* FIXME: we may overestimate request switch */ Eatomic = arb->e_chg_req * n_req; SIM_print_stat_energy(SIM_power_strcat(path, "request"), Eatomic, next_depth); SIM_power_res_path(path, path_len); Eavg += Eatomic; Eatomic = arb->e_chg_grant * n_grant; SIM_print_stat_energy(SIM_power_strcat(path, "grant"), Eatomic, next_depth); SIM_power_res_path(path, path_len); Eavg += Eatomic; /* priority registers */ Estruct = 0; SIM_power_strcat(path, "priority"); next_path_len = SIM_power_strlen(path); Eatomic = arb->pri_ff.e_switch * n_chg_pri * n_grant; SIM_print_stat_energy(SIM_power_strcat(path, "switch"), Eatomic, next_next_depth); SIM_power_res_path(path, next_path_len); Estruct += Eatomic; /* assume 1 and 0 are uniformly distributed */ if (arb->pri_ff.e_keep_0 >= arb->pri_ff.e_keep_1 || !max_avg) { Eatomic = arb->pri_ff.e_keep_0 * (total_pri - n_chg_pri * n_grant) * (max_avg ? 1 : 0.5); SIM_print_stat_energy(SIM_power_strcat(path, "keep 0"), Eatomic, next_next_depth); SIM_power_res_path(path, next_path_len); Estruct += Eatomic; } if (arb->pri_ff.e_keep_0 < arb->pri_ff.e_keep_1 || !max_avg) { Eatomic = arb->pri_ff.e_keep_1 * (total_pri - n_chg_pri * n_grant) * (max_avg ? 1 : 0.5); SIM_print_stat_energy(SIM_power_strcat(path, "keep 1"), Eatomic, next_next_depth); SIM_power_res_path(path, next_path_len); Estruct += Eatomic; } Eatomic = arb->pri_ff.e_clock * total_pri; SIM_print_stat_energy(SIM_power_strcat(path, "clock"), Eatomic, next_next_depth); SIM_power_res_path(path, next_path_len); Estruct += Eatomic; SIM_print_stat_energy(path, Estruct, next_depth); SIM_power_res_path(path, path_len); Eavg += Estruct; /* based on above assumptions */ if (max_avg) /* min(p,n/2)(n-1) + 2(n-1) */ Eatomic = arb->e_chg_mint * (MIN(n_req, arb->req_width * 0.5) + 2) * (arb->req_width - 1); else /* p(n-1)/2 + (n-1)/2 */ Eatomic = arb->e_chg_mint * (n_req + 1) * (arb->req_width - 1) * 0.5; SIM_print_stat_energy(SIM_power_strcat(path, "internal node"), Eatomic, next_depth); SIM_power_res_path(path, path_len); Eavg += Eatomic; break; case QUEUE_ARBITER: /* FIXME: what if n_req > 1? */ Eavg = SIM_reg_stat_energy(info, &arb->queue, n_req, n_grant, next_depth, SIM_power_strcat(path, "queue"), max_avg); SIM_power_res_path(path, path_len); break; } SIM_print_stat_energy(path, Eavg, print_depth); return Eavg; }
/* * time unit: 1 cycle * e_fin: average # of flits received by one input port during unit time * (at most 0.5 for InfiniBand router) * e_buf_wrt: average # of input buffer writes of all ports during unit time * e_buf_wrt = e_fin * n_buf_in * e_buf_rd: average # of input buffer reads of all ports during unit time * e_buf_rd = e_buf_wrt * (splitted into different input ports in program) * e_cbuf_fin: average # of flits passing through the switch during unit time * e_cbuf_fin = e_fin * n_total_in * e_cbuf_wrt: average # of central buffer writes during unit time * e_cbuf_wrt = e_cbuf_fin / (pipe_depth * pipe_width) * e_cbuf_rd: average # of central buffer reads during unit time * e_cbuf_rd = e_cbuf_wrt * e_arb: average # of arbitrations per arbiter during unit time * assume e_arb = 1 * * NOTES: (1) negative print_depth means infinite print depth * * FIXME: (1) hack: SIM_array_stat_energy cannot be used for shared buffer, * we use it now anyway */ double SIM_router_stat_energy(SIM_router_info_t *info, SIM_router_power_t *router, int print_depth, char *path, int max_avg, double e_fin, int plot_flag, double freq) { double Eavg = 0, Eatomic, Estruct, Estatic = 0; double Pbuf = 0, Pxbar = 0, Pvc_arbiter = 0, Psw_arbiter = 0, Pclock = 0, Ptotal = 0; double Pbuf_static = 0, Pxbar_static = 0, Pvc_arbiter_static = 0, Psw_arbiter_static = 0, Pclock_static = 0; double Pbuf_dyn = 0, Pxbar_dyn = 0, Pvc_arbiter_dyn = 0, Psw_arbiter_dyn = 0, Pclock_dyn = 0; double e_in_buf_rw, e_cache_in_buf_rw, e_mc_in_buf_rw, e_io_in_buf_rw; double e_cbuf_fin, e_cbuf_rw, e_out_buf_rw; int next_depth; u_int path_len, n_regs; int vc_allocator_enabled = 1; /* expected value computation */ e_in_buf_rw = e_fin * info->n_in; e_cache_in_buf_rw = e_fin * info->n_cache_in; e_mc_in_buf_rw = e_fin * info->n_mc_in; e_io_in_buf_rw = e_fin * info->n_io_in; e_cbuf_fin = e_fin * info->n_total_in; e_out_buf_rw = e_cbuf_fin / info->n_total_out * info->n_out; e_cbuf_rw = e_cbuf_fin * info->flit_width / info->central_buf_info.blk_bits; next_depth = NEXT_DEPTH(print_depth); path_len = SIM_strlen(path); /* input buffers */ if (info->in_buf) { Eavg += SIM_array_stat_energy(&info->in_buf_info, &router->in_buf, e_in_buf_rw, e_in_buf_rw, next_depth, SIM_strcat(path, "input buffer"), max_avg); SIM_res_path(path, path_len); } if (info->cache_in_buf) { Eavg += SIM_array_stat_energy(&info->cache_in_buf_info, &router->cache_in_buf, e_cache_in_buf_rw, e_cache_in_buf_rw, next_depth, SIM_strcat(path, "cache input buffer"), max_avg); SIM_res_path(path, path_len); } if (info->mc_in_buf) { Eavg += SIM_array_stat_energy(&info->mc_in_buf_info, &router->mc_in_buf, e_mc_in_buf_rw, e_mc_in_buf_rw, next_depth, SIM_strcat(path, "memory controller input buffer"), max_avg); SIM_res_path(path, path_len); } if (info->io_in_buf) { Eavg += SIM_array_stat_energy(&info->io_in_buf_info, &router->io_in_buf, e_io_in_buf_rw, e_io_in_buf_rw, next_depth, SIM_strcat(path, "I/O input buffer"), max_avg); SIM_res_path(path, path_len); } /* output buffers */ if (info->out_buf) { /* local output ports don't use router buffers */ Eavg += SIM_array_stat_energy(&info->out_buf_info, &router->out_buf, e_out_buf_rw, e_out_buf_rw, next_depth, SIM_strcat(path, "output buffer"), max_avg); SIM_res_path(path, path_len); } /* central buffer */ if (info->central_buf) { Eavg += SIM_array_stat_energy(&info->central_buf_info, &router->central_buf, e_cbuf_rw, e_cbuf_rw, next_depth, SIM_strcat(path, "central buffer"), max_avg); SIM_res_path(path, path_len); Eavg += SIM_crossbar_stat_energy(&router->in_cbuf_crsbar, next_depth, SIM_strcat(path, "central buffer input crossbar"), max_avg, e_cbuf_fin); SIM_res_path(path, path_len); Eavg += SIM_crossbar_stat_energy(&router->out_cbuf_crsbar, next_depth, SIM_strcat(path, "central buffer output crossbar"), max_avg, e_cbuf_fin); SIM_res_path(path, path_len); /* dirty hack, REMEMBER to REMOVE Estruct and Eatomic */ Estruct = 0; n_regs = info->central_buf_info.n_set * (info->central_buf_info.read_ports + info->central_buf_info.write_ports); /* ignore e_switch for now because we overestimate wordline driver cap */ Eatomic = router->cbuf_ff.e_keep_0 * (info->pipe_depth - 1) * (n_regs - 2 * (e_cbuf_rw + e_cbuf_rw)); SIM_print_stat_energy(SIM_strcat(path, "central buffer pipeline registers/keep 0"), Eatomic, NEXT_DEPTH(next_depth)); SIM_res_path(path, path_len); Estruct += Eatomic; Eatomic = router->cbuf_ff.e_clock * (info->pipe_depth - 1) * n_regs; SIM_print_stat_energy(SIM_strcat(path, "central buffer pipeline registers/clock"), Eatomic, NEXT_DEPTH(next_depth)); SIM_res_path(path, path_len); Estruct += Eatomic; SIM_print_stat_energy(SIM_strcat(path, "central buffer pipeline registers"), Estruct, next_depth); SIM_res_path(path, path_len); Eavg += Estruct; } Pbuf_dyn = Eavg * freq; Pbuf_static = router->I_buf_static * Vdd * SCALE_S; Pbuf = Pbuf_dyn + Pbuf_static; /* main crossbar */ if (info->crossbar_model) { Eavg += SIM_crossbar_stat_energy(&router->crossbar, next_depth, SIM_strcat(path, "crossbar"), max_avg, e_cbuf_fin); SIM_res_path(path, path_len); } Pxbar_dyn = (Eavg * freq - Pbuf_dyn); Pxbar_static = router->I_crossbar_static * Vdd * SCALE_S; Pxbar = Pxbar_dyn + Pxbar_static; /* switch allocation (arbiter energy only) */ /* input (local) arbiter for switch allocation*/ if (info->sw_in_arb_model) { /* assume # of active input arbiters is (info->in_n_switch * info->n_in * e_fin) * assume (info->n_v_channel*info->n_v_class)/2 vcs are making request at each arbiter */ Eavg += SIM_arbiter_stat_energy(&router->sw_in_arb, &info->sw_in_arb_queue_info, (info->n_v_channel*info->n_v_class)/2, next_depth, SIM_strcat(path, "switch allocator input arbiter"), max_avg) * info->in_n_switch * info->n_in * e_fin; SIM_res_path(path, path_len); if (info->n_cache_in) { Eavg += SIM_arbiter_stat_energy(&router->cache_in_arb, &info->cache_in_arb_queue_info, e_fin / info->cache_n_switch, next_depth, SIM_strcat(path, "cache input arbiter"), max_avg) * info->cache_n_switch * info->n_cache_in; SIM_res_path(path, path_len); } if (info->n_mc_in) { Eavg += SIM_arbiter_stat_energy(&router->mc_in_arb, &info->mc_in_arb_queue_info, e_fin / info->mc_n_switch, next_depth, SIM_strcat(path, "memory controller input arbiter"), max_avg) * info->mc_n_switch * info->n_mc_in; SIM_res_path(path, path_len); } if (info->n_io_in) { Eavg += SIM_arbiter_stat_energy(&router->io_in_arb, &info->io_in_arb_queue_info, e_fin / info->io_n_switch, next_depth, SIM_strcat(path, "I/O input arbiter"), max_avg) * info->io_n_switch * info->n_io_in; SIM_res_path(path, path_len); } } /* output (global) arbiter for switch allocation*/ if (info->sw_out_arb_model) { /* assume # of active output arbiters is (info->n_switch_out * (e_cbuf_fin/info->n_switch_out)) * assume (info->n_in)/2 request at each output arbiter */ Eavg += SIM_arbiter_stat_energy(&router->sw_out_arb, &info->sw_out_arb_queue_info, info->n_in / 2, next_depth, SIM_strcat(path, "switch allocator output arbiter"), max_avg) * info->n_switch_out * (e_cbuf_fin / info->n_switch_out); SIM_res_path(path, path_len); } if(info->sw_out_arb_model || info->sw_out_arb_model){ Psw_arbiter_dyn = Eavg * freq - Pbuf_dyn - Pxbar_dyn; Psw_arbiter_static = router->I_sw_arbiter_static * Vdd * SCALE_S; Psw_arbiter = Psw_arbiter_dyn + Psw_arbiter_static; } /* virtual channel allocation (arbiter energy only) */ /* HACKs: * - assume 1 header flit in every 5 flits for now, hence * 0.2 */ if(info->vc_allocator_type == ONE_STAGE_ARB && info->vc_out_arb_model ){ /* one stage arbitration (vc allocation)*/ /* # of active arbiters */ double nActiveArbs = e_fin * info->n_in * 0.2 / 2; //flit_rate * n_in * 0.2 / 2 /* assume for each active arbiter, there is 2 requests on average (should use expected value from simulation) */ Eavg += SIM_arbiter_stat_energy(&router->vc_out_arb, &info->vc_out_arb_queue_info, 1, next_depth, SIM_strcat(path, "vc allocation arbiter"), max_avg) * nActiveArbs; SIM_res_path(path, path_len); } else if(info->vc_allocator_type == TWO_STAGE_ARB && info->vc_in_arb_model && info->vc_out_arb_model){ /* first stage arbitration (vc allocation)*/ if (info->vc_in_arb_model) { // # of active stage-1 arbiters (# of new header flits) double nActiveArbs = e_fin * info->n_in * 0.2; /* assume an active arbiter has n_v_channel/2 requests on average (should use expected value from simulation) */ Eavg += SIM_arbiter_stat_energy(&router->vc_in_arb, &info->vc_in_arb_queue_info, info->n_v_channel/2, next_depth, SIM_strcat(path, "vc allocation arbiter (stage 1)"), max_avg) * nActiveArbs; SIM_res_path(path, path_len); } /* second stage arbitration (vc allocation)*/ if (info->vc_out_arb_model) { /* # of active stage-2 arbiters */ double nActiveArbs = e_fin * info->n_in * 0.2 / 2; //flit_rate * n_in * 0.2 / 2 /* assume for each active arbiter, there is 2 requests on average (should use expected value from simulation) */ Eavg += SIM_arbiter_stat_energy(&router->vc_out_arb, &info->vc_out_arb_queue_info, 2, next_depth, SIM_strcat(path, "vc allocation arbiter (stage 2)"), max_avg) * nActiveArbs; SIM_res_path(path, path_len); } } else if(info->vc_allocator_type == VC_SELECT && info->n_v_channel > 1 && info->n_in > 1){ double n_read = e_fin * info->n_in * 0.2; double n_write = e_fin * info->n_in * 0.2; Eavg += SIM_array_stat_energy(&info->vc_select_buf_info, &router->vc_select_buf, n_read , n_write, next_depth, SIM_strcat(path, "vc selection"), max_avg); SIM_res_path(path, path_len); } else{ vc_allocator_enabled = 0; //set to 0 means no vc allocator is used } if(info->n_v_channel > 1 && vc_allocator_enabled){ Pvc_arbiter_dyn = Eavg * freq - Pbuf_dyn - Pxbar_dyn - Psw_arbiter_dyn; Pvc_arbiter_static = router->I_vc_arbiter_static * Vdd * SCALE_S; Pvc_arbiter = Pvc_arbiter_dyn + Pvc_arbiter_static; } /*router clock power (supported for 90nm and below) */ if(PARM(TECH_POINT) <=90){ Eavg += SIM_total_clockEnergy(info, router); Pclock_dyn = Eavg * freq - Pbuf_dyn - Pxbar_dyn - Pvc_arbiter_dyn - Psw_arbiter_dyn; Pclock_static = router->I_clock_static * Vdd * SCALE_S; Pclock = Pclock_dyn + Pclock_static; } /* static power */ Estatic = router->I_static * Vdd * Period * SCALE_S; SIM_print_stat_energy(SIM_strcat(path, "static energy"), Estatic, next_depth); SIM_res_path(path, path_len); Eavg += Estatic; Ptotal = Eavg * freq; SIM_print_stat_energy(path, Eavg, print_depth); if (plot_flag) fprintf(stdout, "Buffer:%g\tCrossbar:%g\tVC_allocator:%g\tSW_allocator:%g\tClock:%g\tTotal:%g\n", Pbuf, Pxbar, Pvc_arbiter, Psw_arbiter, Pclock, Ptotal); return Eavg; }
double SIM_reg_stat_energy(SIM_power_array_info_t *info, SIM_power_array_t *arr, double n_read, double n_write, int print_depth, char *path, int max_avg) { double Eavg = 0, Eatomic, Estruct; int next_depth, next_next_depth; u_int path_len, next_path_len; /* hack to mimic central buffer */ /* packet header probability */ u_int NP_width, NC_width, cnt_width; int share_flag = 0; if (path && strstr(path, "central buffer")) { share_flag = 1; NP_width = NC_width = SIM_power_logtwo(info->n_set); /* assume no multicasting */ cnt_width = 0; } next_depth = NEXT_DEPTH(print_depth); next_next_depth = NEXT_DEPTH(next_depth); path_len = SIM_power_strlen(path); /* decoder */ if (info->row_dec_model) { Estruct = 0; SIM_power_strcat(path, "row decoder"); next_path_len = SIM_power_strlen(path); /* assume switch probability 0.5 for address bits */ Eatomic = arr->row_dec.e_chg_addr * arr->row_dec.n_bits * (max_avg ? 1 : 0.5) * (n_read + n_write); SIM_print_stat_energy(SIM_power_strcat(path, "input"), Eatomic, next_next_depth); SIM_power_res_path(path, next_path_len); Estruct += Eatomic; Eatomic = arr->row_dec.e_chg_output * (n_read + n_write); SIM_print_stat_energy(SIM_power_strcat(path, "output"), Eatomic, next_next_depth); SIM_power_res_path(path, next_path_len); Estruct += Eatomic; /* assume all 1st-level decoders change output */ Eatomic = arr->row_dec.e_chg_l1 * arr->row_dec.n_in_2nd * (n_read + n_write); SIM_print_stat_energy(SIM_power_strcat(path, "internal node"), Eatomic, next_next_depth); SIM_power_res_path(path, next_path_len); Estruct += Eatomic; SIM_print_stat_energy(path, Estruct, next_depth); SIM_power_res_path(path, path_len); Eavg += Estruct; } /* wordline */ Estruct = 0; SIM_power_strcat(path, "wordline"); next_path_len = SIM_power_strlen(path); Eatomic = arr->data_wordline.e_read * n_read; SIM_print_stat_energy(SIM_power_strcat(path, "read"), Eatomic, next_next_depth); SIM_power_res_path(path, next_path_len); Estruct += Eatomic; Eatomic = arr->data_wordline.e_write * n_write; SIM_print_stat_energy(SIM_power_strcat(path, "write"), Eatomic, next_next_depth); SIM_power_res_path(path, next_path_len); Estruct += Eatomic; SIM_print_stat_energy(path, Estruct, next_depth); SIM_power_res_path(path, path_len); Eavg += Estruct; /* bitlines */ Estruct = 0; SIM_power_strcat(path, "bitline"); next_path_len = SIM_power_strlen(path); if (arr->data_bitline.end == 2) { Eatomic = arr->data_bitline.e_col_read * info->eff_data_cols * n_read; /* dirty hack */ if (share_flag) { Eatomic += arr->data_bitline.e_col_read * (NP_width + NC_width + cnt_width) * n_read; /* read free list */ Eatomic += arr->data_bitline.e_col_read * (NP_width + NC_width + cnt_width) * n_write; } } else { /* assume switch probability 0.5 for single-ended bitlines */ Eatomic = arr->data_bitline.e_col_read * info->eff_data_cols * (max_avg ? 1 : 0.5) * n_read; /* dirty hack */ if (share_flag) { /* assume no multicasting, cnt is always 0 */ Eatomic += arr->data_bitline.e_col_read * (NP_width + NC_width) * (max_avg ? 1 : 0.5) * n_read; /* read free list */ Eatomic += arr->data_bitline.e_col_read * (NP_width + NC_width) * (max_avg ? 1 : 0.5) * n_write; } } SIM_print_stat_energy(SIM_power_strcat(path, "read"), Eatomic, next_next_depth); SIM_power_res_path(path, next_path_len); Estruct += Eatomic; /* assume switch probability 0.5 for write bitlines */ Eatomic = arr->data_bitline.e_col_write * info->data_width * (max_avg ? 1 : 0.5) * n_write; /* dirty hack */ if (share_flag) { /* current NP and NC */ Eatomic += arr->data_bitline.e_col_write * (NP_width + NC_width) * (max_avg ? 1 : 0.5) * n_write; /* previous NP or NC */ Eatomic += arr->data_bitline.e_col_write * NP_width * (max_avg ? 1 : 0.5) * n_write; /* update free list */ Eatomic += arr->data_bitline.e_col_write * NC_width * (max_avg ? 1 : 0.5) * n_read; } SIM_print_stat_energy(SIM_power_strcat(path, "write"), Eatomic, next_next_depth); SIM_power_res_path(path, next_path_len); Estruct += Eatomic; Eatomic = arr->data_bitline_pre.e_charge * info->eff_data_cols * n_read; /* dirty hack */ if (share_flag) { Eatomic += arr->data_bitline_pre.e_charge * (NP_width + NC_width + cnt_width) * n_read; /* read free list */ Eatomic += arr->data_bitline_pre.e_charge * (NP_width + NC_width + cnt_width) * n_write; } SIM_print_stat_energy(SIM_power_strcat(path, "precharge"), Eatomic, next_next_depth); SIM_power_res_path(path, next_path_len); Estruct += Eatomic; SIM_print_stat_energy(path, Estruct, next_depth); SIM_power_res_path(path, path_len); Eavg += Estruct; /* memory cells */ Estruct = 0; /* assume switch probability 0.5 for memory cells */ Eatomic = arr->data_mem.e_switch * info->data_width * (max_avg ? 1 : 0.5) * n_write; /* dirty hack */ if (share_flag) { /* current NP and NC */ Eatomic += arr->data_mem.e_switch * (NP_width + NC_width) * (max_avg ? 1 : 0.5) * n_write; /* previous NP or NC */ Eatomic += arr->data_mem.e_switch * NP_width * (max_avg ? 1 : 0.5) * n_write; /* update free list */ Eatomic += arr->data_mem.e_switch * NC_width * (max_avg ? 1 : 0.5) * n_read; } Estruct += Eatomic; SIM_print_stat_energy(SIM_power_strcat(path, "memory cell"), Estruct, next_depth); SIM_power_res_path(path, path_len); Eavg += Estruct; /* sense amplifier */ if (info->data_end == 2) { Estruct = 0; Eatomic = arr->data_amp.e_access * info->eff_data_cols * n_read; /* dirty hack */ if (share_flag) { Eatomic += arr->data_amp.e_access * (NP_width + NC_width + cnt_width) * n_read; /* read free list */ Eatomic += arr->data_amp.e_access * (NP_width + NC_width + cnt_width) * n_write; } Estruct += Eatomic; SIM_print_stat_energy(SIM_power_strcat(path, "sense amplifier"), Estruct, next_depth); SIM_power_res_path(path, path_len); Eavg += Estruct; } /* output driver */ if (info->outdrv_model) { Estruct = 0; SIM_power_strcat(path, "output driver"); next_path_len = SIM_power_strlen(path); Eatomic = arr->outdrv.e_select * n_read; SIM_print_stat_energy(SIM_power_strcat(path, "enable"), Eatomic, next_next_depth); SIM_power_res_path(path, next_path_len); Estruct += Eatomic; /* same switch probability as bitlines */ Eatomic = arr->outdrv.e_chg_data * arr->outdrv.item_width * (max_avg ? 1 : 0.5) * info->n_item * info->assoc * n_read; SIM_print_stat_energy(SIM_power_strcat(path, "data"), Eatomic, next_next_depth); SIM_power_res_path(path, next_path_len); Estruct += Eatomic; /* assume 1 and 0 are uniformly distributed */ if (arr->outdrv.e_out_1 >= arr->outdrv.e_out_0 || !max_avg) { Eatomic = arr->outdrv.e_out_1 * arr->outdrv.item_width * (max_avg ? 1 : 0.5) * n_read; SIM_print_stat_energy(SIM_power_strcat(path, "output 1"), Eatomic, next_next_depth); SIM_power_res_path(path, next_path_len); Estruct += Eatomic; } if (arr->outdrv.e_out_1 < arr->outdrv.e_out_0 || !max_avg) { Eatomic = arr->outdrv.e_out_0 * arr->outdrv.item_width * (max_avg ? 1 : 0.5) * n_read; SIM_print_stat_energy(SIM_power_strcat(path, "output 0"), Eatomic, next_next_depth); SIM_power_res_path(path, next_path_len); Estruct += Eatomic; } SIM_print_stat_energy(path, Estruct, next_depth); SIM_power_res_path(path, path_len); Eavg += Estruct; } SIM_print_stat_energy(path, Eavg, print_depth); return Eavg; }