Beispiel #1
0
static double SIM_crossbar_stat_energy(SIM_power_crossbar_t *crsbar, int print_depth, char *path, int max_avg, double n_data)
{
  double Eavg = 0, Eatomic, Estatic;
  int next_depth;
  u_int path_len;

  if (n_data > crsbar->n_out) {
    fprintf(stderr, "%s: overflow\n", path);
    n_data = crsbar->n_out;
  }

  next_depth = NEXT_DEPTH(print_depth);
  path_len = SIM_power_strlen(path);

  switch (crsbar->model) {
    case MATRIX_CROSSBAR:
    case CUT_THRU_CROSSBAR:
    case MULTREE_CROSSBAR:
	 /* assume 0.5 data switch probability */
         Eatomic = crsbar->e_chg_in * crsbar->data_width * (max_avg ? 1 : 0.5) * n_data;
	 SIM_print_stat_energy(SIM_power_strcat(path, "input"), Eatomic, next_depth);
         SIM_power_res_path(path, path_len);
	 Eavg += Eatomic;

         Eatomic = crsbar->e_chg_out * crsbar->data_width * (max_avg ? 1 : 0.5) * n_data;
	 SIM_print_stat_energy(SIM_power_strcat(path, "output"), Eatomic, next_depth);
         SIM_power_res_path(path, path_len);
	 Eavg += Eatomic;

         Eatomic = crsbar->e_chg_ctr * n_data;
	 SIM_print_stat_energy(SIM_power_strcat(path, "control"), Eatomic, next_depth);
         SIM_power_res_path(path, path_len);
	 Eavg += Eatomic;

	 if (crsbar->model == MULTREE_CROSSBAR && crsbar->depth > 1) {
	   Eatomic = crsbar->e_chg_int * crsbar->data_width * (crsbar->depth - 1) * (max_avg ? 1 : 0.5) * n_data;
	   SIM_print_stat_energy(SIM_power_strcat(path, "internal node"), Eatomic, next_depth);
           SIM_power_res_path(path, path_len);
	   Eavg += Eatomic;
	 }

	 /* static power */
	 Estatic = crsbar->I_static * Vdd * Period * SCALE_S;
	 SIM_print_stat_energy(SIM_power_strcat(path, "static energy"), Estatic, next_depth);
         SIM_power_res_path(path, path_len);

	 break;

    default:	/* some error handler */
      break;
  }

  SIM_print_stat_energy(path, Eavg, print_depth);

  return Eavg;
}
Beispiel #2
0
double SIM_array_stat_energy(SIM_array_info_t *info, SIM_array_t *arr, double n_read, double n_write, int print_depth,int max_avg)
{
    double Eavg = 0, Eatomic, Estruct, Estatic;
    int next_depth, next_next_depth;
    u_int path_len, next_path_len;
    double avg_read, avg_write;

    /* hack to mimic central buffer */
    /* packet header probability */
    u_int NP_width = 0, NC_width = 0, cnt_width = 0;
    int share_flag = 0;

    /*if (path && strstr(path, "central buffer")) {
    	share_flag = 1;
    	NP_width = NC_width = SIM_logtwo(info->n_set);
    	// assume no multicasting
    	cnt_width = 0;
    }*/

    next_depth = NEXT_DEPTH(print_depth);
    next_next_depth = NEXT_DEPTH(next_depth);
    //path_len = SIM_strlen(path);

    if (info->arr_buf_type == SRAM)
    {
        /* decoder */
        if (info->row_dec_model)
        {
            Estruct = 0;
            //SIM_strcat(path, "row decoder");
            //next_path_len = SIM_strlen(path);

            /* assume switch probability 0.5 for address bits */
            Eatomic = arr->row_dec.e_chg_addr * arr->row_dec.n_bits * (max_avg ? 1 : 0.5) * (n_read + n_write);
            //SIM_print_stat_energy(SIM_strcat(path, "input"), Eatomic, next_next_depth);
            //SIM_res_path(path, next_path_len);
            Estruct += Eatomic;
            Eatomic = arr->row_dec.e_chg_output * (n_read + n_write);
            //SIM_print_stat_energy(SIM_strcat(path, "output"), Eatomic, next_next_depth);
            //SIM_res_path(path, next_path_len);
            Estruct += Eatomic;

            /* assume all 1st-level decoders change output */
            Eatomic = arr->row_dec.e_chg_l1 * arr->row_dec.n_in_2nd * (n_read + n_write);
            //SIM_print_stat_energy(SIM_strcat(path, "internal node"), Eatomic, next_next_depth);
            //SIM_res_path(path, next_path_len);
            Estruct += Eatomic;

            //SIM_print_stat_energy(path, Estruct, next_depth);
            //SIM_res_path(path, path_len);
            Eavg += Estruct;
        }

        /* wordline */
        Estruct = 0;
        //SIM_strcat(path, "wordline");
        //next_path_len = SIM_strlen(path);

        Eatomic = arr->data_wordline.e_read * n_read;

        //SIM_print_stat_energy(SIM_strcat(path, "read"), Eatomic, next_next_depth);
        //SIM_res_path(path, next_path_len);
        Estruct += Eatomic;

        Eatomic = arr->data_wordline.e_write * n_write;
        //SIM_print_stat_energy(SIM_strcat(path, "write"), Eatomic, next_next_depth);
        //SIM_res_path(path, next_path_len);
        Estruct += Eatomic;

        //SIM_print_stat_energy(path, Estruct, next_depth);
        //SIM_res_path(path, path_len);
        Eavg += Estruct;

        /* bitlines */
        Estruct = 0;
        //SIM_strcat(path, "bitline");
        //next_path_len = SIM_strlen(path);

        if (arr->data_bitline.end == 2)
        {
            Eatomic = arr->data_bitline.e_col_read * info->eff_data_cols * n_read;
            /* dirty hack */
            if (share_flag)
            {
                Eatomic += arr->data_bitline.e_col_read * (NP_width + NC_width + cnt_width) * n_read;
                /* read free list */
                Eatomic += arr->data_bitline.e_col_read * (NP_width + NC_width + cnt_width) * n_write;
            }
        }
        else
        {
            /* assume switch probability 0.5 for single-ended bitlines */
            Eatomic = arr->data_bitline.e_col_read * info->eff_data_cols * (max_avg ? 1 : 0.5) * n_read;
            /* dirty hack */
            if (share_flag)
            {
                /* assume no multicasting, cnt is always 0 */
                Eatomic += arr->data_bitline.e_col_read * (NP_width + NC_width) * (max_avg ? 1 : 0.5) * n_read;
                /* read free list */
                Eatomic += arr->data_bitline.e_col_read * (NP_width + NC_width) * (max_avg ? 1 : 0.5) * n_write;
            }
        }
        //SIM_print_stat_energy(SIM_strcat(path, "read"), Eatomic, next_next_depth);
        //SIM_res_path(path, next_path_len);
        Estruct += Eatomic;

        /* assume switch probability 0.5 for write bitlines */
        Eatomic = arr->data_bitline.e_col_write * info->data_width * (max_avg ? 1 : 0.5) * n_write;
        /* dirty hack */
        if (share_flag)
        {
            /* current NP and NC */
            Eatomic += arr->data_bitline.e_col_write * (NP_width + NC_width) * (max_avg ? 1 : 0.5) * n_write;
            /* previous NP or NC */
            Eatomic += arr->data_bitline.e_col_write * NP_width * (max_avg ? 1 : 0.5) * n_write;
            /* update free list */
            Eatomic += arr->data_bitline.e_col_write * NC_width * (max_avg ? 1 : 0.5) * n_read;
        }
        //SIM_print_stat_energy(SIM_strcat(path, "write"), Eatomic, next_next_depth);
        //SIM_res_path(path, next_path_len);
        Estruct += Eatomic;

        Eatomic = arr->data_bitline_pre.e_charge * info->eff_data_cols * n_read;
        /* dirty hack */
        if (share_flag)
        {
            Eatomic += arr->data_bitline_pre.e_charge * (NP_width + NC_width + cnt_width) * n_read;
            /* read free list */
            Eatomic += arr->data_bitline_pre.e_charge * (NP_width + NC_width + cnt_width) * n_write;
        }
        //SIM_print_stat_energy(SIM_strcat(path, "precharge"), Eatomic, next_next_depth);
        //SIM_res_path(path, next_path_len);
        Estruct += Eatomic;

        //SIM_print_stat_energy(path, Estruct, next_depth);
        //SIM_res_path(path, path_len);
        Eavg += Estruct;

        /* memory cells */
        Estruct = 0;

        /* assume switch probability 0.5 for memory cells */
        Eatomic = arr->data_mem.e_switch * info->data_width * (max_avg ? 1 : 0.5) * n_write;
        /* dirty hack */
        if (share_flag)
        {
            /* current NP and NC */
            Eatomic += arr->data_mem.e_switch * (NP_width + NC_width) * (max_avg ? 1 : 0.5) * n_write;
            /* previous NP or NC */
            Eatomic += arr->data_mem.e_switch * NP_width * (max_avg ? 1 : 0.5) * n_write;
            /* update free list */
            Eatomic += arr->data_mem.e_switch * NC_width * (max_avg ? 1 : 0.5) * n_read;
        }
        Estruct += Eatomic;

        //SIM_print_stat_energy(SIM_strcat(path, "memory cell"), Estruct, next_depth);
        //SIM_res_path(path, path_len);
        Eavg += Estruct;

        /* sense amplifier */
        if (info->data_end == 2)
        {
            Estruct = 0;

            Eatomic = arr->data_amp.e_access * info->eff_data_cols * n_read;
            /* dirty hack */
            if (share_flag)
            {
                Eatomic += arr->data_amp.e_access * (NP_width + NC_width + cnt_width) * n_read;
                /* read free list */
                Eatomic += arr->data_amp.e_access * (NP_width + NC_width + cnt_width) * n_write;
            }
            Estruct += Eatomic;

            //SIM_print_stat_energy(SIM_strcat(path, "sense amplifier"), Estruct, next_depth);
            //SIM_res_path(path, path_len);
            Eavg += Estruct;
        }

        /* output driver */
        if (info->outdrv_model)
        {
            Estruct = 0;
            //SIM_strcat(path, "output driver");
            //next_path_len = SIM_strlen(path);

            Eatomic = arr->outdrv.e_select * n_read;
            //SIM_print_stat_energy(SIM_strcat(path, "enable"), Eatomic, next_next_depth);
            //SIM_res_path(path, next_path_len);
            Estruct += Eatomic;

            /* same switch probability as bitlines */
            Eatomic = arr->outdrv.e_chg_data * arr->outdrv.item_width * (max_avg ? 1 : 0.5) * info->n_item * info->assoc * n_read;
            //SIM_print_stat_energy(SIM_strcat(path, "data"), Eatomic, next_next_depth);
            //SIM_res_path(path, next_path_len);
            Estruct += Eatomic;

            /* assume 1 and 0 are uniformly distributed */
            if (arr->outdrv.e_out_1 >= arr->outdrv.e_out_0 || !max_avg)
            {
                Eatomic = arr->outdrv.e_out_1 * arr->outdrv.item_width * (max_avg ? 1 : 0.5) * n_read;
                //SIM_print_stat_energy(SIM_strcat(path, "output 1"), Eatomic, next_next_depth);
                //SIM_res_path(path, next_path_len);
                Estruct += Eatomic;
            }

            if (arr->outdrv.e_out_1 < arr->outdrv.e_out_0 || !max_avg)
            {
                Eatomic = arr->outdrv.e_out_0 * arr->outdrv.item_width * (max_avg ? 1 : 0.5) * n_read;
                //SIM_print_stat_energy(SIM_strcat(path, "output 0"), Eatomic, next_next_depth);
                //SIM_res_path(path, next_path_len);
                Estruct += Eatomic;
            }

            //SIM_print_stat_energy(path, Estruct, next_depth);
            //SIM_res_path(path, path_len);
            Eavg += Estruct;
        }

        /* static power */
        Estatic = arr->I_static * Vdd * Period * SCALE_S;

        //SIM_print_stat_energy(SIM_strcat(path, "static energy"), Estatic, next_depth);
        //SIM_res_path(path, path_len);

        //SIM_print_stat_energy(path, Eavg, print_depth);
    }
    else if (info->arr_buf_type == REGISTER)
    {
        Estruct = 0;

        /*average read energy for one buffer entry*/
        arr->ff.n_clock = info->data_width;

        avg_read = arr->ff.e_clock * arr->ff.n_clock * 0.5;

        /*average write energy for one buffer entry*/
        arr->ff.n_clock = info->data_width;
        arr->ff.n_switch = info->data_width* 0.5;

        avg_write = arr->ff.e_switch * arr->ff.n_switch + arr->ff.e_clock * arr->ff.n_clock ;

        /* for each read operation, the energy consists of one read operation and n write
         * operateion. n means there is n flits in the buffer before read operation.
         * assume n is info->n_entry * 0.25.
         */
        if (info->n_set > 1)
        {
            Eatomic = (avg_read + info->n_set * 0.25 * avg_write )* n_read;
        }
        else
        {
            Eatomic = avg_read * n_read;
        }
        //SIM_print_stat_energy(SIM_strcat(path, "read energy"), Eatomic, next_depth);
        //SIM_res_path(path, path_len);
        Estruct += Eatomic;

        /* write energy */
        Eatomic = avg_write * n_write;
        //SIM_print_stat_energy(SIM_strcat(path, "write energy"), Eatomic, next_depth);
        //SIM_res_path(path, path_len);
        Estruct += Eatomic;

        Eavg = Estruct;

        /* static power */
        Estatic = arr->ff.I_static * Vdd * Period * SCALE_S;

        //SIM_print_stat_energy(SIM_strcat(path, "static energy"), Estatic, next_depth);
        //SIM_res_path(path, path_len);

        //SIM_print_stat_energy(path, Eavg, print_depth);
    }


    return Eavg;
}
Beispiel #3
0
/*
 * time unit:	1 cycle
 * e_fin:	average # of flits received by one input port during unit time
 * 		  (at most 0.5 for InfiniBand router)
 * e_buf_wrt:	average # of input buffer writes of all ports during unit time
 * 		e_buf_wrt = e_fin * n_buf_in
 * e_buf_rd:	average # of input buffer reads of all ports during unit time
 * 		e_buf_rd = e_buf_wrt
 * 		  (splitted into different input ports in program)
 * e_cbuf_fin:	average # of flits passing through the switch during unit time
 * 		e_cbuf_fin = e_fin * n_total_in
 * e_cbuf_wrt:	average # of central buffer writes during unit time
 * 		e_cbuf_wrt = e_cbuf_fin / (pipe_depth * pipe_width)
 * e_cbuf_rd:	average # of central buffer reads during unit time
 * 		e_cbuf_rd = e_cbuf_wrt
 * e_arb:	average # of arbitrations per arbiter during unit time
 * 		assume e_arb = 1
 *
 * NOTES: (1) negative print_depth means infinite print depth
 *
 * FIXME: (1) hack: SIM_reg_stat_energy cannot be used for shared buffer,
 *            we use it now anyway
 */
struct orResult SIM_router_stat_energy(SIM_power_router_info_t *info, SIM_power_router_t *router, int print_depth, char *path, int max_avg, double e_fin, int plot_flag, double freq)
{
  // SESC changes
  struct orResult orr ;
  double fac ;
  // end of changes 
  
  double Eavg = 0, Eatomic, Estruct;
  double Pbuf, Pswitch, Parbiter, Ptotal;
  double e_in_buf_rw, e_cache_in_buf_rw, e_mc_in_buf_rw, e_io_in_buf_rw;
  double e_cbuf_fin, e_cbuf_rw, e_out_buf_rw;
  int next_depth;
  u_int path_len, n_regs;

  /* expected value computation */
  e_in_buf_rw       = e_fin * info->n_in;
  e_cache_in_buf_rw = e_fin * info->n_cache_in;
  e_mc_in_buf_rw    = e_fin * info->n_mc_in;
  e_io_in_buf_rw    = e_fin * info->n_io_in;
  e_cbuf_fin        = e_fin * info->n_total_in;
  e_out_buf_rw      = e_cbuf_fin / info->n_total_out * info->n_out;
  e_cbuf_rw         = e_cbuf_fin * info->flit_width / info->central_buf_info.blk_bits;

  next_depth = NEXT_DEPTH(print_depth);
  path_len = SIM_power_strlen(path);

  /* input buffers */
  if (info->in_buf) {
    Eavg += SIM_reg_stat_energy(&info->in_buf_info, &router->in_buf, e_in_buf_rw, e_in_buf_rw, next_depth, SIM_power_strcat(path, "input buffer"), max_avg);
    SIM_power_res_path(path, path_len);
  }
  if (info->cache_in_buf) {
    Eavg += SIM_reg_stat_energy(&info->cache_in_buf_info, &router->cache_in_buf, e_cache_in_buf_rw, e_cache_in_buf_rw, next_depth, SIM_power_strcat(path, "cache input buffer"), max_avg);
    SIM_power_res_path(path, path_len);
  }
  if (info->mc_in_buf) {
    Eavg += SIM_reg_stat_energy(&info->mc_in_buf_info, &router->mc_in_buf, e_mc_in_buf_rw, e_mc_in_buf_rw, next_depth, SIM_power_strcat(path, "memory controller input buffer"), max_avg);
    SIM_power_res_path(path, path_len);
  }
  if (info->io_in_buf) {
    Eavg += SIM_reg_stat_energy(&info->io_in_buf_info, &router->io_in_buf, e_io_in_buf_rw, e_io_in_buf_rw, next_depth, SIM_power_strcat(path, "I/O input buffer"), max_avg);
    SIM_power_res_path(path, path_len);
  }

  /* output buffers */
  if (info->out_buf) {
    /* local output ports don't use router buffers */
    Eavg += SIM_reg_stat_energy(&info->out_buf_info, &router->out_buf, e_out_buf_rw, e_out_buf_rw, next_depth, SIM_power_strcat(path, "output buffer"), max_avg);
    SIM_power_res_path(path, path_len);
  }

  Pbuf = Eavg * freq;

  /* main crossbar */
  if (info->crossbar_model) {
    Eavg += SIM_crossbar_stat_energy(&router->crossbar, next_depth, SIM_power_strcat(path, "crossbar"), max_avg, e_cbuf_fin);
    SIM_power_res_path(path, path_len);
  }

  /* central buffer */
  if (info->central_buf) {
    Eavg += SIM_reg_stat_energy(&info->central_buf_info, &router->central_buf, e_cbuf_rw, e_cbuf_rw, next_depth, SIM_power_strcat(path, "central buffer"), max_avg);
    SIM_power_res_path(path, path_len);
    
    Eavg += SIM_crossbar_stat_energy(&router->in_cbuf_crsbar, next_depth, SIM_power_strcat(path, "central buffer input crossbar"), max_avg, e_cbuf_fin);
    SIM_power_res_path(path, path_len);

    Eavg += SIM_crossbar_stat_energy(&router->out_cbuf_crsbar, next_depth, SIM_power_strcat(path, "central buffer output crossbar"), max_avg, e_cbuf_fin);
    SIM_power_res_path(path, path_len);

    /* dirty hack, REMEMBER to REMOVE Estruct and Eatomic */
    Estruct = 0;
    n_regs = info->central_buf_info.n_set * (info->central_buf_info.read_ports + info->central_buf_info.write_ports);

    /* ignore e_switch for now because we overestimate wordline driver cap */
    
    Eatomic = router->cbuf_ff.e_keep_0 * (info->pipe_depth - 1) * (n_regs - 2 * (e_cbuf_rw + e_cbuf_rw));
    SIM_print_stat_energy(SIM_power_strcat(path, "central buffer pipeline registers/keep 0"), Eatomic, NEXT_DEPTH(next_depth));
    SIM_power_res_path(path, path_len);
    Estruct += Eatomic;

    Eatomic = router->cbuf_ff.e_clock * (info->pipe_depth - 1) * n_regs;
    SIM_print_stat_energy(SIM_power_strcat(path, "central buffer pipeline registers/clock"), Eatomic, NEXT_DEPTH(next_depth));
    SIM_power_res_path(path, path_len);
    Estruct += Eatomic;

    SIM_print_stat_energy(SIM_power_strcat(path, "central buffer pipeline registers"), Estruct, next_depth);
    SIM_power_res_path(path, path_len);
    Eavg += Estruct;
  }

  Pswitch = Eavg * freq - Pbuf;

  /* input (local) arbiter */
  if (info->in_arb_model) {
    Eavg += SIM_arbiter_stat_energy(&router->in_arb, &info->in_arb_queue_info, e_fin / info->in_n_switch, next_depth, SIM_power_strcat(path, "input arbiter"), max_avg) * info->in_n_switch * info->n_in;
    SIM_power_res_path(path, path_len);

    if (info->n_cache_in) {
      Eavg += SIM_arbiter_stat_energy(&router->cache_in_arb, &info->cache_in_arb_queue_info, e_fin / info->cache_n_switch, next_depth, SIM_power_strcat(path, "cache input arbiter"), max_avg) * info->cache_n_switch * info->n_cache_in;
      SIM_power_res_path(path, path_len);
    }

    if (info->n_mc_in) {
      Eavg += SIM_arbiter_stat_energy(&router->mc_in_arb, &info->mc_in_arb_queue_info, e_fin / info->mc_n_switch, next_depth, SIM_power_strcat(path, "memory controller input arbiter"), max_avg) * info->mc_n_switch * info->n_mc_in;
      SIM_power_res_path(path, path_len);
    }

    if (info->n_io_in) {
      Eavg += SIM_arbiter_stat_energy(&router->io_in_arb, &info->io_in_arb_queue_info, e_fin / info->io_n_switch, next_depth, SIM_power_strcat(path, "I/O input arbiter"), max_avg) * info->io_n_switch * info->n_io_in;
      SIM_power_res_path(path, path_len);
    }
  }

  /* output (global) arbiter */
  if (info->out_arb_model) {
    Eavg += SIM_arbiter_stat_energy(&router->out_arb, &info->out_arb_queue_info, e_cbuf_fin / info->n_switch_out, next_depth, SIM_power_strcat(path, "output arbiter"), max_avg) * info->n_switch_out;
    SIM_power_res_path(path, path_len);
  }

  Ptotal = Eavg * freq;
  Parbiter = Ptotal - Pbuf - Pswitch;
  
  SIM_print_stat_energy(path, Eavg, print_depth);

#if 0
  if (plot_flag)
    fprintf(stderr, "pbuf=%g\n pswitch=%g\n parbiter=%g \n ptotal=%g\n", Pbuf, Pswitch, Parbiter, Ptotal);
#endif

  fac = freq/1e9 ; 
  orr.totEnergy = Ptotal/fac ;
  orr.bufEnergy = Pbuf/fac ;
  orr.switchEnergy = Pswitch/fac ;
  orr.arbEnergy = Parbiter/fac ;

  return orr ;
}
Beispiel #4
0
/* info is only used by queuing arbiter */
static double SIM_arbiter_stat_energy(SIM_power_arbiter_t *arb, SIM_power_array_info_t *info, double n_req, int print_depth, char *path, int max_avg)
{
  double Eavg = 0, Estruct, Eatomic;
  int next_depth, next_next_depth;
  double total_pri, n_chg_pri, n_grant;
  u_int path_len, next_path_len;

  next_depth = NEXT_DEPTH(print_depth);
  next_next_depth = NEXT_DEPTH(next_depth);
  path_len = SIM_power_strlen(path);

  /* energy cycle distribution */
  if (n_req > arb->req_width) {
    fprintf(stderr, "arbiter overflow\n");
    n_req = arb->req_width;
  }
  if (n_req >= 1) n_grant = 1;
  else n_grant = 1.0 / ceil(1.0 / n_req);

  switch (arb->model) {
    case RR_ARBITER:
	 /* FIXME: we may overestimate request switch */
	 Eatomic = arb->e_chg_req * n_req;
	 SIM_print_stat_energy(SIM_power_strcat(path, "request"), Eatomic, next_depth);
         SIM_power_res_path(path, path_len);
	 Eavg += Eatomic;

	 Eatomic = arb->e_chg_grant * n_grant;
	 SIM_print_stat_energy(SIM_power_strcat(path, "grant"), Eatomic, next_depth);
         SIM_power_res_path(path, path_len);
	 Eavg += Eatomic;

	 /* assume carry signal propagates half length in average case */
	 /* carry does not propagate in maximum case, i.e. all carrys go down */
	 Eatomic = arb->e_chg_carry * arb->req_width * (max_avg ? 1 : 0.5) * n_grant;
	 SIM_print_stat_energy(SIM_power_strcat(path, "carry"), Eatomic, next_depth);
         SIM_power_res_path(path, path_len);
	 Eavg += Eatomic;

	 Eatomic = arb->e_chg_carry_in * (arb->req_width * (max_avg ? 1 : 0.5) - 1) * n_grant;
	 SIM_print_stat_energy(SIM_power_strcat(path, "internal carry"), Eatomic, next_depth);
         SIM_power_res_path(path, path_len);
	 Eavg += Eatomic;

	 /* priority registers */
	 Estruct = 0;
	 SIM_power_strcat(path, "priority");
	 next_path_len = SIM_power_strlen(path);
	 
	 Eatomic = arb->pri_ff.e_switch * 2 * n_grant;
	 SIM_print_stat_energy(SIM_power_strcat(path, "switch"), Eatomic, next_next_depth);
	 SIM_power_res_path(path, next_path_len);
	 Estruct += Eatomic;

	 Eatomic = arb->pri_ff.e_keep_0 * (arb->req_width - 2 * n_grant);
	 SIM_print_stat_energy(SIM_power_strcat(path, "keep 0"), Eatomic, next_next_depth);
	 SIM_power_res_path(path, next_path_len);
	 Estruct += Eatomic;

	 Eatomic = arb->pri_ff.e_clock * arb->req_width;
	 SIM_print_stat_energy(SIM_power_strcat(path, "clock"), Eatomic, next_next_depth);
	 SIM_power_res_path(path, next_path_len);
	 Estruct += Eatomic;

	 SIM_print_stat_energy(path, Estruct, next_depth);
         SIM_power_res_path(path, path_len);
	 Eavg += Estruct;
	 break;

    case MATRIX_ARBITER:
	 total_pri = arb->req_width * (arb->req_width - 1) * 0.5;
	 /* assume switch probability 0.5 for priorities */
	 n_chg_pri = (arb->req_width - 1) * (max_avg ? 1 : 0.5);

	 /* FIXME: we may overestimate request switch */
	 Eatomic = arb->e_chg_req * n_req;
	 SIM_print_stat_energy(SIM_power_strcat(path, "request"), Eatomic, next_depth);
         SIM_power_res_path(path, path_len);
	 Eavg += Eatomic;

	 Eatomic = arb->e_chg_grant * n_grant;
	 SIM_print_stat_energy(SIM_power_strcat(path, "grant"), Eatomic, next_depth);
         SIM_power_res_path(path, path_len);
	 Eavg += Eatomic;

	 /* priority registers */
	 Estruct = 0;
	 SIM_power_strcat(path, "priority");
	 next_path_len = SIM_power_strlen(path);
	 
	 Eatomic = arb->pri_ff.e_switch * n_chg_pri * n_grant;
	 SIM_print_stat_energy(SIM_power_strcat(path, "switch"), Eatomic, next_next_depth);
	 SIM_power_res_path(path, next_path_len);
	 Estruct += Eatomic;

	 /* assume 1 and 0 are uniformly distributed */
	 if (arb->pri_ff.e_keep_0 >= arb->pri_ff.e_keep_1 || !max_avg) {
	   Eatomic = arb->pri_ff.e_keep_0 * (total_pri - n_chg_pri * n_grant) * (max_avg ? 1 : 0.5);
	   SIM_print_stat_energy(SIM_power_strcat(path, "keep 0"), Eatomic, next_next_depth);
	   SIM_power_res_path(path, next_path_len);
	   Estruct += Eatomic;
	 }

	 if (arb->pri_ff.e_keep_0 < arb->pri_ff.e_keep_1 || !max_avg) {
	   Eatomic = arb->pri_ff.e_keep_1 * (total_pri - n_chg_pri * n_grant) * (max_avg ? 1 : 0.5);
	   SIM_print_stat_energy(SIM_power_strcat(path, "keep 1"), Eatomic, next_next_depth);
	   SIM_power_res_path(path, next_path_len);
	   Estruct += Eatomic;
	 }

	 Eatomic = arb->pri_ff.e_clock * total_pri;
	 SIM_print_stat_energy(SIM_power_strcat(path, "clock"), Eatomic, next_next_depth);
	 SIM_power_res_path(path, next_path_len);
	 Estruct += Eatomic;

	 SIM_print_stat_energy(path, Estruct, next_depth);
         SIM_power_res_path(path, path_len);
	 Eavg += Estruct;

	 /* based on above assumptions */
	 if (max_avg)
	   /* min(p,n/2)(n-1) + 2(n-1) */
	   Eatomic = arb->e_chg_mint * (MIN(n_req, arb->req_width * 0.5) + 2) * (arb->req_width - 1);
	 else
	   /* p(n-1)/2 + (n-1)/2 */
	   Eatomic = arb->e_chg_mint * (n_req + 1) * (arb->req_width - 1) * 0.5;
	 SIM_print_stat_energy(SIM_power_strcat(path, "internal node"), Eatomic, next_depth);
         SIM_power_res_path(path, path_len);
	 Eavg += Eatomic;
	 break;

    case QUEUE_ARBITER:
	 /* FIXME: what if n_req > 1? */
	 Eavg = SIM_reg_stat_energy(info, &arb->queue, n_req, n_grant, next_depth, SIM_power_strcat(path, "queue"), max_avg);
         SIM_power_res_path(path, path_len);
	 break;
	 
  }

  SIM_print_stat_energy(path, Eavg, print_depth);

  return Eavg;
}
Beispiel #5
0
/*
 * time unit:	1 cycle
 * e_fin:	average # of flits received by one input port during unit time
 * 		  (at most 0.5 for InfiniBand router)
 * e_buf_wrt:	average # of input buffer writes of all ports during unit time
 * 		e_buf_wrt = e_fin * n_buf_in
 * e_buf_rd:	average # of input buffer reads of all ports during unit time
 * 		e_buf_rd = e_buf_wrt
 * 		  (splitted into different input ports in program)
 * e_cbuf_fin:	average # of flits passing through the switch during unit time
 * 		e_cbuf_fin = e_fin * n_total_in
 * e_cbuf_wrt:	average # of central buffer writes during unit time
 * 		e_cbuf_wrt = e_cbuf_fin / (pipe_depth * pipe_width)
 * e_cbuf_rd:	average # of central buffer reads during unit time
 * 		e_cbuf_rd = e_cbuf_wrt
 * e_arb:	average # of arbitrations per arbiter during unit time
 * 		assume e_arb = 1
 *
 * NOTES: (1) negative print_depth means infinite print depth
 *
 * FIXME: (1) hack: SIM_array_stat_energy cannot be used for shared buffer,
 *            we use it now anyway
 */
double SIM_router_stat_energy(SIM_router_info_t *info, SIM_router_power_t *router, int print_depth, char *path, int max_avg, double e_fin, int plot_flag, double freq)
{
	double Eavg = 0, Eatomic, Estruct, Estatic = 0;
	double Pbuf = 0, Pxbar = 0, Pvc_arbiter = 0, Psw_arbiter = 0, Pclock = 0, Ptotal = 0;
	double Pbuf_static = 0, Pxbar_static = 0, Pvc_arbiter_static = 0, Psw_arbiter_static = 0, Pclock_static = 0;
	double Pbuf_dyn = 0, Pxbar_dyn = 0, Pvc_arbiter_dyn = 0, Psw_arbiter_dyn = 0, Pclock_dyn = 0;
	double e_in_buf_rw, e_cache_in_buf_rw, e_mc_in_buf_rw, e_io_in_buf_rw;
	double e_cbuf_fin, e_cbuf_rw, e_out_buf_rw;
	int next_depth;
	u_int path_len, n_regs;
	int vc_allocator_enabled = 1;

	/* expected value computation */
	e_in_buf_rw       = e_fin * info->n_in;
	e_cache_in_buf_rw = e_fin * info->n_cache_in;
	e_mc_in_buf_rw    = e_fin * info->n_mc_in;
	e_io_in_buf_rw    = e_fin * info->n_io_in;
	e_cbuf_fin        = e_fin * info->n_total_in;
	e_out_buf_rw      = e_cbuf_fin / info->n_total_out * info->n_out;
	e_cbuf_rw         = e_cbuf_fin * info->flit_width / info->central_buf_info.blk_bits;

	next_depth = NEXT_DEPTH(print_depth);
	path_len = SIM_strlen(path);

	/* input buffers */
	if (info->in_buf) {
		Eavg += SIM_array_stat_energy(&info->in_buf_info, &router->in_buf, e_in_buf_rw, e_in_buf_rw, next_depth, SIM_strcat(path, "input buffer"), max_avg); 
		SIM_res_path(path, path_len);
	}
	if (info->cache_in_buf) {
		Eavg += SIM_array_stat_energy(&info->cache_in_buf_info, &router->cache_in_buf, e_cache_in_buf_rw, e_cache_in_buf_rw, next_depth, SIM_strcat(path, "cache input buffer"), max_avg);
		SIM_res_path(path, path_len);
	}
	if (info->mc_in_buf) {
		Eavg += SIM_array_stat_energy(&info->mc_in_buf_info, &router->mc_in_buf, e_mc_in_buf_rw, e_mc_in_buf_rw, next_depth, SIM_strcat(path, "memory controller input buffer"), max_avg);
		SIM_res_path(path, path_len);
	}
	if (info->io_in_buf) {
		Eavg += SIM_array_stat_energy(&info->io_in_buf_info, &router->io_in_buf, e_io_in_buf_rw, e_io_in_buf_rw, next_depth, SIM_strcat(path, "I/O input buffer"), max_avg);
		SIM_res_path(path, path_len);
	}

	/* output buffers */
	if (info->out_buf) {
		/* local output ports don't use router buffers */
		Eavg += SIM_array_stat_energy(&info->out_buf_info, &router->out_buf, e_out_buf_rw, e_out_buf_rw, next_depth, SIM_strcat(path, "output buffer"), max_avg); 
		SIM_res_path(path, path_len);
	}

	/* central buffer */
	if (info->central_buf) {
		Eavg += SIM_array_stat_energy(&info->central_buf_info, &router->central_buf, e_cbuf_rw, e_cbuf_rw, next_depth, SIM_strcat(path, "central buffer"), max_avg);
		SIM_res_path(path, path_len);

		Eavg += SIM_crossbar_stat_energy(&router->in_cbuf_crsbar, next_depth, SIM_strcat(path, "central buffer input crossbar"), max_avg, e_cbuf_fin);
		SIM_res_path(path, path_len);

		Eavg += SIM_crossbar_stat_energy(&router->out_cbuf_crsbar, next_depth, SIM_strcat(path, "central buffer output crossbar"), max_avg, e_cbuf_fin);
		SIM_res_path(path, path_len);

		/* dirty hack, REMEMBER to REMOVE Estruct and Eatomic */
		Estruct = 0;
		n_regs = info->central_buf_info.n_set * (info->central_buf_info.read_ports + info->central_buf_info.write_ports);

		/* ignore e_switch for now because we overestimate wordline driver cap */

		Eatomic = router->cbuf_ff.e_keep_0 * (info->pipe_depth - 1) * (n_regs - 2 * (e_cbuf_rw + e_cbuf_rw));
		SIM_print_stat_energy(SIM_strcat(path, "central buffer pipeline registers/keep 0"), Eatomic, NEXT_DEPTH(next_depth));
		SIM_res_path(path, path_len);
		Estruct += Eatomic;

		Eatomic = router->cbuf_ff.e_clock * (info->pipe_depth - 1) * n_regs;
		SIM_print_stat_energy(SIM_strcat(path, "central buffer pipeline registers/clock"), Eatomic, NEXT_DEPTH(next_depth));
		SIM_res_path(path, path_len);
		Estruct += Eatomic;

		SIM_print_stat_energy(SIM_strcat(path, "central buffer pipeline registers"), Estruct, next_depth);
		SIM_res_path(path, path_len);
		Eavg += Estruct;
	}

	Pbuf_dyn = Eavg * freq;
	Pbuf_static = router->I_buf_static * Vdd * SCALE_S;
	Pbuf = Pbuf_dyn + Pbuf_static;

	/* main crossbar */
	if (info->crossbar_model) {
		Eavg += SIM_crossbar_stat_energy(&router->crossbar, next_depth, SIM_strcat(path, "crossbar"), max_avg, e_cbuf_fin);
		SIM_res_path(path, path_len);
	}

	Pxbar_dyn = (Eavg * freq - Pbuf_dyn);
	Pxbar_static = router->I_crossbar_static * Vdd * SCALE_S;
	Pxbar = Pxbar_dyn + Pxbar_static;

	/* switch allocation (arbiter energy only) */
	/* input (local) arbiter for switch allocation*/
	if (info->sw_in_arb_model) {
		/* assume # of active input arbiters is (info->in_n_switch * info->n_in * e_fin) 
		 * assume (info->n_v_channel*info->n_v_class)/2 vcs are making request at each arbiter */

		Eavg += SIM_arbiter_stat_energy(&router->sw_in_arb, &info->sw_in_arb_queue_info, (info->n_v_channel*info->n_v_class)/2, next_depth, SIM_strcat(path, "switch allocator input arbiter"), max_avg) * info->in_n_switch * info->n_in * e_fin;
		SIM_res_path(path, path_len);

		if (info->n_cache_in) {
			Eavg += SIM_arbiter_stat_energy(&router->cache_in_arb, &info->cache_in_arb_queue_info, e_fin / info->cache_n_switch, next_depth, SIM_strcat(path, "cache input arbiter"), max_avg) * info->cache_n_switch * info->n_cache_in;
			SIM_res_path(path, path_len);
		}

		if (info->n_mc_in) {
			Eavg += SIM_arbiter_stat_energy(&router->mc_in_arb, &info->mc_in_arb_queue_info, e_fin / info->mc_n_switch, next_depth, SIM_strcat(path, "memory controller input arbiter"), max_avg) * info->mc_n_switch * info->n_mc_in;
			SIM_res_path(path, path_len);
		}

		if (info->n_io_in) {
			Eavg += SIM_arbiter_stat_energy(&router->io_in_arb, &info->io_in_arb_queue_info, e_fin / info->io_n_switch, next_depth, SIM_strcat(path, "I/O input arbiter"), max_avg) * info->io_n_switch * info->n_io_in;
			SIM_res_path(path, path_len);
		}
	}

	/* output (global) arbiter for switch allocation*/
	if (info->sw_out_arb_model) {
		/* assume # of active output arbiters is (info->n_switch_out * (e_cbuf_fin/info->n_switch_out)) 
		 * assume (info->n_in)/2 request at each output arbiter */

		Eavg += SIM_arbiter_stat_energy(&router->sw_out_arb, &info->sw_out_arb_queue_info, info->n_in / 2, next_depth, SIM_strcat(path, "switch allocator output arbiter"), max_avg) * info->n_switch_out * (e_cbuf_fin / info->n_switch_out);

		SIM_res_path(path, path_len); 
	}

	if(info->sw_out_arb_model || info->sw_out_arb_model){
		Psw_arbiter_dyn = Eavg * freq - Pbuf_dyn - Pxbar_dyn;
		Psw_arbiter_static = router->I_sw_arbiter_static * Vdd * SCALE_S;
		Psw_arbiter = Psw_arbiter_dyn + Psw_arbiter_static;
	}

	/* virtual channel allocation (arbiter energy only) */
	/* HACKs:
	 *   - assume 1 header flit in every 5 flits for now, hence * 0.2  */

	if(info->vc_allocator_type == ONE_STAGE_ARB && info->vc_out_arb_model  ){
		/* one stage arbitration (vc allocation)*/
		/* # of active arbiters */
		double nActiveArbs = e_fin * info->n_in * 0.2 / 2; //flit_rate * n_in * 0.2 / 2

		/* assume for each active arbiter, there is 2 requests on average (should use expected value from simulation) */	
		Eavg += SIM_arbiter_stat_energy(&router->vc_out_arb, &info->vc_out_arb_queue_info,
				1, next_depth,
				SIM_strcat(path, "vc allocation arbiter"),
				max_avg) * nActiveArbs;

		SIM_res_path(path, path_len);
	}
	else if(info->vc_allocator_type == TWO_STAGE_ARB && info->vc_in_arb_model && info->vc_out_arb_model){
		/* first stage arbitration (vc allocation)*/
		if (info->vc_in_arb_model) {
			// # of active stage-1 arbiters (# of new header flits)
			double nActiveArbs = e_fin * info->n_in * 0.2;


			/* assume an active arbiter has n_v_channel/2 requests on average (should use expected value from simulation) */
			Eavg += SIM_arbiter_stat_energy(&router->vc_in_arb, &info->vc_in_arb_queue_info, info->n_v_channel/2, next_depth, 
					SIM_strcat(path, "vc allocation arbiter (stage 1)"),
					max_avg) * nActiveArbs; 

			SIM_res_path(path, path_len);
		}

		/* second stage arbitration (vc allocation)*/
		if (info->vc_out_arb_model) {
			/* # of active stage-2 arbiters */
			double nActiveArbs = e_fin * info->n_in * 0.2 / 2; //flit_rate * n_in * 0.2 / 2

			/* assume for each active arbiter, there is 2 requests on average (should use expected value from simulation) */
			Eavg += SIM_arbiter_stat_energy(&router->vc_out_arb, &info->vc_out_arb_queue_info,
					2, next_depth, 
					SIM_strcat(path, "vc allocation arbiter (stage 2)"),
					max_avg) * nActiveArbs;

			SIM_res_path(path, path_len);
		}
	}
	else if(info->vc_allocator_type == VC_SELECT && info->n_v_channel > 1 && info->n_in > 1){
		double n_read = e_fin * info->n_in * 0.2;
		double n_write = e_fin * info->n_in * 0.2;
		Eavg += SIM_array_stat_energy(&info->vc_select_buf_info, &router->vc_select_buf, n_read , n_write, next_depth, SIM_strcat(path, "vc selection"), max_avg);
		SIM_res_path(path, path_len);

	}
	else{
		vc_allocator_enabled = 0; //set to 0 means no vc allocator is used
	}

	if(info->n_v_channel > 1 && vc_allocator_enabled){
		Pvc_arbiter_dyn = Eavg * freq - Pbuf_dyn - Pxbar_dyn - Psw_arbiter_dyn; 
		Pvc_arbiter_static = router->I_vc_arbiter_static * Vdd * SCALE_S;
		Pvc_arbiter = Pvc_arbiter_dyn + Pvc_arbiter_static;
	}

	/*router clock power (supported for 90nm and below) */
	if(PARM(TECH_POINT) <=90){
		Eavg += SIM_total_clockEnergy(info, router);
		Pclock_dyn = Eavg * freq - Pbuf_dyn - Pxbar_dyn - Pvc_arbiter_dyn - Psw_arbiter_dyn;
		Pclock_static = router->I_clock_static * Vdd * SCALE_S;
		Pclock = Pclock_dyn + Pclock_static;
	}

	/* static power */
	Estatic = router->I_static * Vdd * Period * SCALE_S;
	SIM_print_stat_energy(SIM_strcat(path, "static energy"), Estatic, next_depth);
	SIM_res_path(path, path_len);
	Eavg += Estatic;
	Ptotal = Eavg * freq;

	SIM_print_stat_energy(path, Eavg, print_depth);

	if (plot_flag)
		fprintf(stdout, "Buffer:%g\tCrossbar:%g\tVC_allocator:%g\tSW_allocator:%g\tClock:%g\tTotal:%g\n", Pbuf, Pxbar, Pvc_arbiter, Psw_arbiter, Pclock, Ptotal); 

	return Eavg;
}
Beispiel #6
0
double SIM_reg_stat_energy(SIM_power_array_info_t *info, SIM_power_array_t *arr, double n_read, double n_write, int print_depth, char *path, int max_avg)
{
  double Eavg = 0, Eatomic, Estruct;
  int next_depth, next_next_depth;
  u_int path_len, next_path_len;

  /* hack to mimic central buffer */
  /* packet header probability */
  u_int NP_width, NC_width, cnt_width;
  int share_flag = 0;

  if (path && strstr(path, "central buffer")) {
    share_flag = 1;
    NP_width = NC_width = SIM_power_logtwo(info->n_set);
    /* assume no multicasting */
    cnt_width = 0;
  }
    
  next_depth = NEXT_DEPTH(print_depth);
  next_next_depth = NEXT_DEPTH(next_depth);
  path_len = SIM_power_strlen(path);

  /* decoder */
  if (info->row_dec_model) {
    Estruct = 0;
    SIM_power_strcat(path, "row decoder");
    next_path_len = SIM_power_strlen(path);

    /* assume switch probability 0.5 for address bits */
    Eatomic = arr->row_dec.e_chg_addr * arr->row_dec.n_bits * (max_avg ? 1 : 0.5) * (n_read + n_write);
    SIM_print_stat_energy(SIM_power_strcat(path, "input"), Eatomic, next_next_depth);
    SIM_power_res_path(path, next_path_len);
    Estruct += Eatomic;
    
    Eatomic = arr->row_dec.e_chg_output * (n_read + n_write);
    SIM_print_stat_energy(SIM_power_strcat(path, "output"), Eatomic, next_next_depth);
    SIM_power_res_path(path, next_path_len);
    Estruct += Eatomic;

    /* assume all 1st-level decoders change output */
    Eatomic = arr->row_dec.e_chg_l1 * arr->row_dec.n_in_2nd * (n_read + n_write);
    SIM_print_stat_energy(SIM_power_strcat(path, "internal node"), Eatomic, next_next_depth);
    SIM_power_res_path(path, next_path_len);
    Estruct += Eatomic;

    SIM_print_stat_energy(path, Estruct, next_depth);
    SIM_power_res_path(path, path_len);
    Eavg += Estruct;
  }

  /* wordline */
  Estruct = 0;
  SIM_power_strcat(path, "wordline");
  next_path_len = SIM_power_strlen(path);
    
  Eatomic = arr->data_wordline.e_read * n_read;
  SIM_print_stat_energy(SIM_power_strcat(path, "read"), Eatomic, next_next_depth);
  SIM_power_res_path(path, next_path_len);
  Estruct += Eatomic;

  Eatomic = arr->data_wordline.e_write * n_write;
  SIM_print_stat_energy(SIM_power_strcat(path, "write"), Eatomic, next_next_depth);
  SIM_power_res_path(path, next_path_len);
  Estruct += Eatomic;

  SIM_print_stat_energy(path, Estruct, next_depth);
  SIM_power_res_path(path, path_len);
  Eavg += Estruct;

  /* bitlines */
  Estruct = 0;
  SIM_power_strcat(path, "bitline");
  next_path_len = SIM_power_strlen(path);
    
  if (arr->data_bitline.end == 2) {
    Eatomic = arr->data_bitline.e_col_read * info->eff_data_cols * n_read;
    /* dirty hack */
    if (share_flag) {
      Eatomic += arr->data_bitline.e_col_read * (NP_width + NC_width + cnt_width) * n_read;
      /* read free list */
      Eatomic += arr->data_bitline.e_col_read * (NP_width + NC_width + cnt_width) * n_write;
    }
  }
  else {
    /* assume switch probability 0.5 for single-ended bitlines */
    Eatomic = arr->data_bitline.e_col_read * info->eff_data_cols * (max_avg ? 1 : 0.5) * n_read;
    /* dirty hack */
    if (share_flag) {
      /* assume no multicasting, cnt is always 0 */
      Eatomic += arr->data_bitline.e_col_read * (NP_width + NC_width) * (max_avg ? 1 : 0.5) * n_read;
      /* read free list */
      Eatomic += arr->data_bitline.e_col_read * (NP_width + NC_width) * (max_avg ? 1 : 0.5) * n_write;
    }
  }
  SIM_print_stat_energy(SIM_power_strcat(path, "read"), Eatomic, next_next_depth);
  SIM_power_res_path(path, next_path_len);
  Estruct += Eatomic;

  /* assume switch probability 0.5 for write bitlines */
  Eatomic = arr->data_bitline.e_col_write * info->data_width * (max_avg ? 1 : 0.5) * n_write;
  /* dirty hack */
  if (share_flag) {
    /* current NP and NC */
    Eatomic += arr->data_bitline.e_col_write * (NP_width + NC_width) * (max_avg ? 1 : 0.5) * n_write;
    /* previous NP or NC */
    Eatomic += arr->data_bitline.e_col_write * NP_width * (max_avg ? 1 : 0.5) * n_write;
    /* update free list */
    Eatomic += arr->data_bitline.e_col_write * NC_width * (max_avg ? 1 : 0.5) * n_read;
  }
  SIM_print_stat_energy(SIM_power_strcat(path, "write"), Eatomic, next_next_depth);
  SIM_power_res_path(path, next_path_len);
  Estruct += Eatomic;

  Eatomic = arr->data_bitline_pre.e_charge * info->eff_data_cols * n_read;
  /* dirty hack */
  if (share_flag) {
    Eatomic += arr->data_bitline_pre.e_charge * (NP_width + NC_width + cnt_width) * n_read;
    /* read free list */
    Eatomic += arr->data_bitline_pre.e_charge * (NP_width + NC_width + cnt_width) * n_write;
  }
  SIM_print_stat_energy(SIM_power_strcat(path, "precharge"), Eatomic, next_next_depth);
  SIM_power_res_path(path, next_path_len);
  Estruct += Eatomic;

  SIM_print_stat_energy(path, Estruct, next_depth);
  SIM_power_res_path(path, path_len);
  Eavg += Estruct;

  /* memory cells */
  Estruct = 0;
    
  /* assume switch probability 0.5 for memory cells */
  Eatomic = arr->data_mem.e_switch * info->data_width * (max_avg ? 1 : 0.5) * n_write;
  /* dirty hack */
  if (share_flag) {
    /* current NP and NC */
    Eatomic += arr->data_mem.e_switch * (NP_width + NC_width) * (max_avg ? 1 : 0.5) * n_write;
    /* previous NP or NC */
    Eatomic += arr->data_mem.e_switch * NP_width * (max_avg ? 1 : 0.5) * n_write;
    /* update free list */
    Eatomic += arr->data_mem.e_switch * NC_width * (max_avg ? 1 : 0.5) * n_read;
  }
  Estruct += Eatomic;

  SIM_print_stat_energy(SIM_power_strcat(path, "memory cell"), Estruct, next_depth);
  SIM_power_res_path(path, path_len);
  Eavg += Estruct;

  /* sense amplifier */
  if (info->data_end == 2) {
    Estruct = 0;

    Eatomic = arr->data_amp.e_access * info->eff_data_cols * n_read;
    /* dirty hack */
    if (share_flag) {
      Eatomic += arr->data_amp.e_access * (NP_width + NC_width + cnt_width) * n_read;
      /* read free list */
      Eatomic += arr->data_amp.e_access * (NP_width + NC_width + cnt_width) * n_write;
    }
    Estruct += Eatomic;

    SIM_print_stat_energy(SIM_power_strcat(path, "sense amplifier"), Estruct, next_depth);
    SIM_power_res_path(path, path_len);
    Eavg += Estruct;
  }

  /* output driver */
  if (info->outdrv_model) {
    Estruct = 0;
    SIM_power_strcat(path, "output driver");
    next_path_len = SIM_power_strlen(path);

    Eatomic = arr->outdrv.e_select * n_read;
    SIM_print_stat_energy(SIM_power_strcat(path, "enable"), Eatomic, next_next_depth);
    SIM_power_res_path(path, next_path_len);
    Estruct += Eatomic;

    /* same switch probability as bitlines */
    Eatomic = arr->outdrv.e_chg_data * arr->outdrv.item_width * (max_avg ? 1 : 0.5) * info->n_item * info->assoc * n_read;
    SIM_print_stat_energy(SIM_power_strcat(path, "data"), Eatomic, next_next_depth);
    SIM_power_res_path(path, next_path_len);
    Estruct += Eatomic;

    /* assume 1 and 0 are uniformly distributed */
    if (arr->outdrv.e_out_1 >= arr->outdrv.e_out_0 || !max_avg) {
      Eatomic = arr->outdrv.e_out_1 * arr->outdrv.item_width * (max_avg ? 1 : 0.5) * n_read;
      SIM_print_stat_energy(SIM_power_strcat(path, "output 1"), Eatomic, next_next_depth);
      SIM_power_res_path(path, next_path_len);
      Estruct += Eatomic;
    }

    if (arr->outdrv.e_out_1 < arr->outdrv.e_out_0 || !max_avg) {
      Eatomic = arr->outdrv.e_out_0 * arr->outdrv.item_width * (max_avg ? 1 : 0.5) * n_read;
      SIM_print_stat_energy(SIM_power_strcat(path, "output 0"), Eatomic, next_next_depth);
      SIM_power_res_path(path, next_path_len);
      Estruct += Eatomic;
    }

    SIM_print_stat_energy(path, Estruct, next_depth);
    SIM_power_res_path(path, path_len);
    Eavg += Estruct;
  }

  SIM_print_stat_energy(path, Eavg, print_depth);

  return Eavg;
}