void count_routing_transistors (int num_switch, float R_minW_nmos, float R_minW_pmos) { int *num_inputs_to_cblock; int iswitch, i, j, iseg, max_inputs_to_cblock; float input_cblock_trans; const float trans_sram_bit = 6.; float trans_cblock_to_lblock_buf; input_cblock_trans = get_cblock_trans (num_inputs_to_cblock, max_inputs_to_cblock, trans_cblock_to_lblock_buf, trans_sram_bit); }
void count_routing_transistors (int num_switch, float R_minW_nmos, float R_minW_pmos) { /* Counts how many transistors are needed to implement the FPGA routing * * resources. Call this only when an rr_graph exists. It does not count * * the transistors used in logic blocks, but it counts the transistors in * * the input connection block multiplexers and in the output pin drivers and * * pass transistors. NB: this routine assumes pass transistors always * * generate two edges (one forward, one backward) between two nodes. * * Physically, this is what happens -- make sure your rr_graph does it. * * * * I assume a minimum width transistor takes 1 unit of area. A double-width * * transistor takes the twice the diffusion width, but the same spacing, so * * I assume it takes 1.5x the area of a minimum-width transitor. I always * * design tri-state buffers as a buffer followed by a pass transistor. * * I make Rbuffer = Rpass_transitor = 1/2 Rtri-state_buffer. * * I make the pull-up and pull-down sides of the buffer the same strength -- * * i.e. I make the p transistor R_minW_pmos / R_minW_nmos wider than the n * * transistor. * * * * I generate two area numbers in this routine: ntrans_sharing and * * ntrans_no_sharing. ntrans_sharing exactly reflects what the timing * * analyzer, etc. works with -- each switch is a completely self contained * * pass transistor or tri-state buffer. In the case of tri-state buffers * * this is rather pessimisitic. The inverter chain part of the buffer (as * * opposed to the pass transistor + SRAM output part) can be shared by * * several switches in the same location. Obviously all the switches from * * an OPIN can share one buffer. Also, CHANX and CHANY switches at the same * * spot (i,j) on a single segment can share a buffer. For a more realistic * * area number I assume all buffered switches from a node that are at the * * *same (i,j) location* can share one buffer. Only the lowest resistance * * (largest) buffer is implemented. In practice, you might want to build * * something that is 1.5x or 2x the largest buffer, so this may be a bit * * optimistic (but I still think it's pretty reasonable). */ int *num_inputs_to_cblock; /* [0..num_rr_nodes-1], but all entries not */ /* corresponding to IPINs will be 0. */ boolean *cblock_counted; /* [0..max(nx,ny)] -- 0th element unused. */ float *shared_buffer_trans; /* [0..max_nx,ny)] */ float *unsharable_switch_trans, *sharable_switch_trans; /* [0..num_switch-1] */ t_rr_type from_rr_type, to_rr_type; int from_node, to_node, iedge, num_edges, maxlen; int iswitch, i, j, iseg, max_inputs_to_cblock; float input_cblock_trans, shared_opin_buffer_trans; const float trans_sram_bit = 6.; /* Two variables below are the accumulator variables that add up all the * * transistors in the routing. Make doubles so that they don't stop * * incrementing once adding a switch makes a change of less than 1 part in * * 10^7 to the total. If this still isn't good enough (adding 1 part in * * 10^15 will still be thrown away), compute the transistor count in * * "chunks", by adding up inodes 1 to 1000, 1001 to 2000 and then summing * * the partial sums together. */ double ntrans_sharing, ntrans_no_sharing; /* Buffers from the routing to the ipin cblock inputs, and from the ipin * * cblock outputs to the logic block, respectively. Assume minimum size n * * transistors, and ptransistors sized to make the pull-up R = pull-down R. */ float trans_track_to_cblock_buf; float trans_cblock_to_lblock_buf; ntrans_sharing = 0.; ntrans_no_sharing = 0.; max_inputs_to_cblock = 0; /* Assume the two buffers below are 4x minimum drive strength (enough to * * drive a fanout of up to 16 pretty nicely -- should cover a reasonable * * wiring C plus the fanout. */ trans_track_to_cblock_buf = trans_per_buf (R_minW_nmos/4., R_minW_nmos, R_minW_pmos); trans_cblock_to_lblock_buf = trans_per_buf (R_minW_nmos/4., R_minW_nmos, R_minW_pmos); /* trans_track_to_cblock_buf = 1. + trans_per_R (R_minW_nmos, R_minW_pmos); trans_cblock_to_lblock_buf = 1. + trans_per_R (R_minW_nmos, R_minW_pmos); */ num_inputs_to_cblock = (int *) my_calloc (num_rr_nodes, sizeof (int)); maxlen = max (nx, ny) + 1; cblock_counted = (boolean *) my_calloc (maxlen, sizeof (boolean)); shared_buffer_trans = (float *) my_calloc (maxlen, sizeof (float)); unsharable_switch_trans = alloc_and_load_unsharable_switch_trans (num_switch, trans_sram_bit, R_minW_nmos); sharable_switch_trans = alloc_and_load_sharable_switch_trans (num_switch, trans_sram_bit, R_minW_nmos, R_minW_pmos); for (from_node=0; from_node<num_rr_nodes; from_node++) { from_rr_type = rr_node[from_node].type; switch (from_rr_type) { case CHANX: case CHANY: num_edges = rr_node[from_node].num_edges; for (iedge=0; iedge<num_edges; iedge++) { to_node = rr_node[from_node].edges[iedge]; to_rr_type = rr_node[to_node].type; switch (to_rr_type) { case CHANX: case CHANY: iswitch = rr_node[from_node].switches[iedge]; if (switch_inf[iswitch].buffered) { iseg = seg_index_of_sblock (from_node, to_node); shared_buffer_trans[iseg] = max (shared_buffer_trans[iseg], sharable_switch_trans[iswitch]); ntrans_no_sharing += unsharable_switch_trans[iswitch] + sharable_switch_trans[iswitch]; ntrans_sharing += unsharable_switch_trans[iswitch]; } else if (from_node < to_node) { /* Pass transistor shared by two edges -- only count once. * * Also, no part of a pass transistor is sharable. */ ntrans_no_sharing += unsharable_switch_trans[iswitch]; ntrans_sharing += unsharable_switch_trans[iswitch]; } break; case IPIN: num_inputs_to_cblock[to_node]++; max_inputs_to_cblock = max (max_inputs_to_cblock, num_inputs_to_cblock[to_node]); iseg = seg_index_of_cblock (from_rr_type, to_node); if (cblock_counted[iseg] == FALSE) { cblock_counted[iseg] = TRUE; ntrans_sharing += trans_track_to_cblock_buf; ntrans_no_sharing += trans_track_to_cblock_buf; } break; default: printf ("Error in count_routing_transistors: Unexpected \n" "connection from node %d (type %d) to node %d (type %d).\n", from_node, from_rr_type, to_node, to_rr_type); exit (1); break; } /* End switch on to_rr_type. */ } /* End for each edge. */ /* Now add in the shared buffer transistors, and reset some flags. */ if (from_rr_type == CHANX) { for (i=rr_node[from_node].xlow-1; i<=rr_node[from_node].xhigh; i++) { ntrans_sharing += shared_buffer_trans[i]; shared_buffer_trans[i] = 0.; } for (i=rr_node[from_node].xlow; i<=rr_node[from_node].xhigh; i++) cblock_counted[i] = FALSE; } else { /* CHANY */ for (j=rr_node[from_node].ylow-1; j<=rr_node[from_node].yhigh; j++) { ntrans_sharing += shared_buffer_trans[j]; shared_buffer_trans[j] = 0.; } for (j=rr_node[from_node].ylow; j<=rr_node[from_node].yhigh; j++) cblock_counted[j] = FALSE; } break; case OPIN: num_edges = rr_node[from_node].num_edges; shared_opin_buffer_trans = 0.; for (iedge=0; iedge<num_edges; iedge++) { iswitch = rr_node[from_node].switches[iedge]; ntrans_no_sharing += unsharable_switch_trans[iswitch] + sharable_switch_trans[iswitch]; ntrans_sharing += unsharable_switch_trans[iswitch]; shared_opin_buffer_trans = max (shared_opin_buffer_trans, sharable_switch_trans[iswitch]); } ntrans_sharing += shared_opin_buffer_trans; break; default: break; } /* End switch on from_rr_type */ } /* End for all nodes */ free (cblock_counted); free (shared_buffer_trans); free (unsharable_switch_trans); free (sharable_switch_trans); /* Now add in the input connection block transistors. */ input_cblock_trans = get_cblock_trans (num_inputs_to_cblock, max_inputs_to_cblock, trans_cblock_to_lblock_buf, trans_sram_bit); free (num_inputs_to_cblock); ntrans_sharing += input_cblock_trans; ntrans_no_sharing += input_cblock_trans; printf ("\nRouting area (in minimum width transistor areas):\n"); printf ("Assuming no buffer sharing (pessimistic). Total: %#g Per clb: " "%#g\n", ntrans_no_sharing, ntrans_no_sharing / (float) (nx * ny)); printf ("Assuming buffer sharing (slightly optimistic). Total: %#g Per clb: " "%#g\n\n", ntrans_sharing, ntrans_sharing / (float) (nx * ny)); }