Ejemplo n.º 1
0
/* print the statistics about this floorplan.
 * note that connects_file is NULL if wire 
 * information is already populated	
 */
void print_flp_stats(flp_t *flp, RC_model_t *model, 
					 char *l2_label, char *power_file, 
					 char *connects_file)
{
	double core, total, occupied;	/* area	*/
	double width, height, aspect, total_w, total_h;
	double wire_metric;
	double peak, avg;		/* temperature	*/
	double *power, *temp;
	FILE *fp = NULL;
	char str[STR_SIZE];

	if (connects_file) {
		fp = fopen(connects_file, "r");
		if (!fp) {
			sprintf(str, "error opening file %s\n", connects_file);
			fatal(str);
		}
		flp_populate_connects(flp, fp);
	}

	power = hotspot_vector(model);
	temp = hotspot_vector(model);
	read_power(model, power, power_file);

	core = get_core_area(flp, l2_label);
	total = get_total_area(flp);
	total_w = get_total_width(flp);
	total_h = get_total_height(flp);
	occupied = get_core_occupied_area(flp, l2_label);
	width = get_core_width(flp, l2_label);
	height = get_core_height(flp, l2_label);
	aspect = (height > width) ? (height/width) : (width/height);
	wire_metric = get_wire_metric(flp);

	populate_R_model(model, flp);
	steady_state_temp(model, power, temp);
	peak = find_max_temp(model, temp);
	avg = find_avg_temp(model, temp);

	fprintf(stdout, "printing summary statistics about the floorplan\n");
	fprintf(stdout, "total area:\t%g\n", total);
	fprintf(stdout, "total width:\t%g\n", total_w);
	fprintf(stdout, "total height:\t%g\n", total_h);
	fprintf(stdout, "core area:\t%g\n", core);
	fprintf(stdout, "occupied area:\t%g\n", occupied);
	fprintf(stdout, "area utilization:\t%.3f\n", occupied / core * 100.0);
	fprintf(stdout, "core width:\t%g\n", width);
	fprintf(stdout, "core height:\t%g\n", height);
	fprintf(stdout, "core aspect ratio:\t%.3f\n", aspect);
	fprintf(stdout, "wire length metric:\t%.3f\n", wire_metric);
	fprintf(stdout, "peak temperature:\t%.3f\n", peak);
	fprintf(stdout, "avg temperature:\t%.3f\n", avg);

	free_dvector(power);
	free_dvector(temp);
	if (fp)
		fclose(fp);
}
Ejemplo n.º 2
0
/* 
 * this is the metric function used for the floorplanning. 
 * in order to enable a different metric, just change the 
 * return statement of this function to return an appropriate
 * metric. The current metric used is a linear function of
 * area (A), temperature (T) and wire length (W):
 * lambdaA * A + lambdaT * T  + lambdaW * W
 * thermal model and power density are passed as parameters
 * since temperature is used in the metric. 
 */
double flp_evaluate_metric(flp_t *flp, RC_model_t *model, double *power,
						   double lambdaA, double lambdaT, double lambdaW)
{
	double tmax, area, wire_length, width, height, aspect;
	double *temp;

	temp = hotspot_vector(model);
	populate_R_model(model, flp);
	steady_state_temp(model, power, temp);
	tmax = find_max_temp(model, temp);
	area = get_total_area(flp);
	wire_length = get_wire_metric(flp);
	width = get_total_width(flp);
	height = get_total_height(flp);
	if (width > height)
		aspect = width / height; 
	else
		aspect = height / width;
	free_dvector(temp);

	/* can return any arbitrary function of area, tmax and wire_length	*/
	return (lambdaA * area + lambdaT * tmax + lambdaW * wire_length);
}
/* calculate avg sink temp for natural convection package model */
double calc_sink_temp_block(block_model_t *model, double *temp, thermal_config_t *config)
{
	flp_t *flp = model->flp;
	int i;
	double sum = 0.0;
	double width = get_total_width(flp);
	double height = get_total_height(flp);
	double spr_size = config->s_spreader*config->s_spreader;
	double sink_size = config->s_sink*config->s_sink;

	/* heatsink temperatures	*/
	for (i=0; i < flp->n_units; i++)
		if (temp[HSINK*flp->n_units+i] < 0)
			fatal("negative temperature!\n");
		else  /* area-weighted average */
			sum += temp[HSINK*flp->n_units+i]*(flp->units[i].width*flp->units[i].height);

	for(i=SINK_C_W; i <= SINK_C_E; i++)
		if (temp[i+NL*flp->n_units] < 0)
			fatal("negative temperature!\n");
		else
			sum += temp[i+NL*flp->n_units]*0.25*(config->s_spreader+height)*(config->s_spreader-width);

	for(i=SINK_C_N; i <= SINK_C_S; i++)
		if (temp[i+NL*flp->n_units] < 0)
			fatal("negative temperature!\n");
		else
			sum += temp[i+NL*flp->n_units]*0.25*(config->s_spreader+width)*(config->s_spreader-height);

	for(i=SINK_W; i <= SINK_S; i++)
		if (temp[i+NL*flp->n_units] < 0)
			fatal("negative temperature!\n");
		else
			sum += temp[i+NL*flp->n_units]*0.25*(sink_size-spr_size);

	return (sum / sink_size);
}
Ejemplo n.º 4
0
void create_RC_matrices(flp_t *flp, int omit_lateral)
{
    int i, j, k = 0, n = flp->n_units;
    int **border;
    double **len, *gx, *gy, **g, *c_ver, **t, *gx_sp, *gy_sp;
    double r_sp1, r_sp2, r_hs;	/* lateral resistances to spreader and heatsink	*/

    /* NOTE: *_mid - the vertical R/C from CENTER nodes of spreader
     * and heatsink. *_per - the vertical R/C from PERIPHERAL (n,s,e,w) nodes
     */
    double r_sp_per, r_hs_mid, r_hs_per, c_sp_per, c_hs_mid, c_hs_per;
    double gn_sp=0, gs_sp=0, ge_sp=0, gw_sp=0;

    double w_chip = get_total_width (flp);	/* x-axis	*/
    double l_chip = get_total_height (flp);	/* y-axis	*/

    border = imatrix(n, 4);
    len = matrix(n, n);		/* len[i][j] = length of shared edge bet. i & j	*/
    gx = vector(n);			/* lumped conductances in x direction	*/
    gy = vector(n);			/* lumped conductances in y direction	*/
    gx_sp = vector(n);		/* lateral conductances in the spreader	layer */
    gy_sp = vector(n);
    g = matrix(NL*n+EXTRA, NL*n+EXTRA);	/* g[i][j] = conductance bet. nodes i & j */
    c_ver = vector(NL*n+EXTRA);	/* vertical capacitance	*/

    b = matrix(NL*n+EXTRA, NL*n+EXTRA);	/* B, C, INVA  and INVB are (NL*n+EXTRA)x(NL*n+EXTRA) matrices	*/
    c = matrix(NL*n+EXTRA, NL*n+EXTRA);
    inva = matrix(NL*n+EXTRA, NL*n+EXTRA);
    invb = matrix(NL*n+EXTRA, NL*n+EXTRA);
    t = matrix (NL*n+EXTRA, NL*n+EXTRA);	/* copy of B	*/

    /* compute the silicon fitting factor - see pg 10 of the UVA CS tech report - CS-TR-2003-08	*/
    factor_chip = C_FACTOR * ((SPEC_HEAT_INT / SPEC_HEAT_SI) * (w_chip + 0.88 * t_interface) \
                              * (l_chip + 0.88 * t_interface) * t_interface / ( w_chip * l_chip * t_chip) + 1);

    /* fitting factor for interface	 - same rationale as above */
    factor_int = C_FACTOR * ((SPEC_HEAT_CU / SPEC_HEAT_INT) * (w_chip + 0.88 * t_spreader) \
                             * (l_chip + 0.88 * t_spreader) * t_spreader / ( w_chip * l_chip * t_interface) + 1);

    /*printf("fitting factors : %lf, %lf\n", factor_chip, factor_int);	*/

    /* gx's and gy's of blocks	*/
    for (i = 0; i < n; i++) {
        /* at the silicon layer	*/
        if (omit_lateral) {
            gx[i] = gy[i] = 0;
        }
        else {
            gx[i] = 1.0/getr(K_SI, flp->units[i].height, flp->units[i].width, l_chip, t_chip);
            gy[i] = 1.0/getr(K_SI, flp->units[i].width, flp->units[i].height, w_chip, t_chip);
        }

        /* at the spreader layer	*/
        gx_sp[i] = 1.0/getr(K_CU, flp->units[i].height, flp->units[i].width, l_chip, t_spreader);
        gy_sp[i] = 1.0/getr(K_CU, flp->units[i].width, flp->units[i].height, w_chip, t_spreader);
    }

    /* shared lengths between blocks	*/
    for (i = 0; i < n; i++)
        for (j = i; j < n; j++)
            len[i][j] = len[j][i] = get_shared_len(flp, i, j);

    /* lateral R's of spreader and sink */
    r_sp1 = getr(K_CU, (s_spreader+3*w_chip)/4.0, (s_spreader-w_chip)/4.0, w_chip, t_spreader);
    r_sp2 = getr(K_CU, (3*s_spreader+w_chip)/4.0, (s_spreader-w_chip)/4.0, (s_spreader+3*w_chip)/4.0, t_spreader);
    r_hs = getr(K_CU, (s_sink+3*s_spreader)/4.0, (s_sink-s_spreader)/4.0, s_spreader, t_sink);

    /* vertical R's and C's of spreader and sink */
    r_sp_per = RHO_CU * t_spreader * 4.0 / (s_spreader * s_spreader - w_chip*l_chip);
    c_sp_per = factor_pack * SPEC_HEAT_CU * t_spreader * (s_spreader * s_spreader - w_chip*l_chip) / 4.0;
    r_hs_mid = RHO_CU * t_sink / (s_spreader*s_spreader);
    c_hs_mid = factor_pack * SPEC_HEAT_CU * t_sink * (s_spreader * s_spreader);
    r_hs_per = RHO_CU * t_sink * 4.0 / (s_sink * s_sink - s_spreader*s_spreader);
    c_hs_per = factor_pack * SPEC_HEAT_CU * t_sink * (s_sink * s_sink - s_spreader*s_spreader) / 4.0;

    /* short the R's from block centers to a particular chip edge	*/
    for (i = 0; i < n; i++) {
        if (eq(flp->units[i].bottomy + flp->units[i].height, l_chip)) {
            gn_sp += gy_sp[i];
            border[i][2] = 1;	/* block is on northern border 	*/
        }
        if (eq(flp->units[i].bottomy, 0)) {
            gs_sp += gy_sp[i];
            border[i][3] = 1;	/* block is on southern border	*/
        }
        if (eq(flp->units[i].leftx + flp->units[i].width, w_chip)) {
            ge_sp += gx_sp[i];
            border[i][1] = 1;	/* block is on eastern border	*/
        }
        if (eq(flp->units[i].leftx, 0)) {
            gw_sp += gx_sp[i];
            border[i][0] = 1;	/* block is on western border	*/
        }
    }

    /* overall R and C between nodes */
    for (i = 0; i < n; i++) {
        double area = (flp->units[i].height * flp->units[i].width);
        /*
         * amongst functional units	in the various layers
         * resistances in the interface layer are assumed
         * to be infinite
         */
        for (j = 0; j < n; j++) {
            double part = 0, part_sp = 0;
            if (is_horiz_adj(flp, i, j)) {
                part = gx[i] / flp->units[i].height;
                part_sp = gx_sp[i] / flp->units[i].height;
            }
            else if (is_vert_adj(flp, i,j))  {
                part = gy[i] / flp->units[i].width;
                part_sp = gy_sp[i] / flp->units[i].width;
            }
            g[i][j] = part * len[i][j];
            g[HSP*n+i][HSP*n+j] = part_sp * len[i][j];
        }

        /* vertical g's in the silicon layer	*/
        g[i][IFACE*n+i]=g[IFACE*n+i][i]=2.0/(RHO_SI * t_chip / area);
        /* vertical g's in the interface layer	*/
        g[IFACE*n+i][HSP*n+i]=g[HSP*n+i][IFACE*n+i]=2.0/(RHO_INT * t_interface / area);
        /* vertical g's in the spreader layer	*/
        g[HSP*n+i][NL*n+SP_B]=g[NL*n+SP_B][HSP*n+i]=2.0/(RHO_CU * t_spreader / area);

        /* C's from functional units to ground	*/
        c_ver[i] = factor_chip * SPEC_HEAT_SI * t_chip * area;
        /* C's from interface portion of the functional units to ground	*/
        c_ver[IFACE*n+i] = factor_int * SPEC_HEAT_INT * t_interface * area;
        /* C's from spreader portion of the functional units to ground	*/
        c_ver[HSP*n+i] = factor_pack * SPEC_HEAT_CU * t_spreader * area;

        /* lateral g's from block center (spreader layer) to peripheral (n,s,e,w) spreader nodes	*/
        g[HSP*n+i][NL*n+SP_N]=g[NL*n+SP_N][HSP*n+i]=2.0*border[i][2]/((1.0/gy_sp[i])+r_sp1*gn_sp/gy_sp[i]);
        g[HSP*n+i][NL*n+SP_S]=g[NL*n+SP_S][HSP*n+i]=2.0*border[i][3]/((1.0/gy_sp[i])+r_sp1*gs_sp/gy_sp[i]);
        g[HSP*n+i][NL*n+SP_E]=g[NL*n+SP_E][HSP*n+i]=2.0*border[i][1]/((1.0/gx_sp[i])+r_sp1*ge_sp/gx_sp[i]);
        g[HSP*n+i][NL*n+SP_W]=g[NL*n+SP_W][HSP*n+i]=2.0*border[i][0]/((1.0/gx_sp[i])+r_sp1*gw_sp/gx_sp[i]);
    }

    /* max slope (max_power * max_vertical_R / vertical RC time constant) for silicon	*/
    max_slope = MAX_PD / (factor_chip * t_chip * SPEC_HEAT_SI);

    /* vertical g's and C's between central nodes	*/
    /* between spreader bottom and sink bottom	*/
    g[NL*n+SINK_B][NL*n+SP_B]=g[NL*n+SP_B][NL*n+SINK_B]=2.0/r_hs_mid;
    /* from spreader bottom to ground	*/
    c_ver[NL*n+SP_B]=c_hs_mid;
    /* from sink bottom to ground	*/
    c_ver[NL*n+SINK_B] = factor_pack * c_convec;

    /* g's and C's from peripheral(n,s,e,w) nodes	*/
    for (i = 1; i <= 4; i++) {
        /* vertical g's between peripheral spreader nodes and spreader bottom */
        g[NL*n+SP_B-i][NL*n+SP_B]=g[NL*n+SP_B][NL*n+SP_B-i]=2.0/r_sp_per;
        /* lateral g's between peripheral spreader nodes and peripheral sink nodes	*/
        g[NL*n+SP_B-i][NL*n+SINK_B-i]=g[NL*n+SINK_B-i][NL*n+SP_B-i]=2.0/(r_hs + r_sp2);
        /* vertical g's between peripheral sink nodes and sink bottom	*/
        g[NL*n+SINK_B-i][NL*n+SINK_B]=g[NL*n+SINK_B][NL*n+SINK_B-i]=2.0/r_hs_per;
        /* from peripheral spreader nodes to ground	*/
        c_ver[NL*n+SP_B-i]=c_sp_per;
        /* from peripheral sink nodes to ground	*/
        c_ver[NL*n+SINK_B-i]=c_hs_per;
    }

    /* calculate matrices A, B such that A(dT) + BT = POWER */

    for (i = 0; i < NL*n+EXTRA; i++) {
        for (j = 0; j < NL*n+EXTRA; j++) {
            if (i==j) {
                inva[i][j] = 1.0/c_ver[i];
                if (i == NL*n+SINK_B)	/* sink bottom */
                    b[i][j] += 1.0 / r_convec;
                for (k = 0; k < NL*n+EXTRA; k++) {
                    if ((g[i][k]==0.0)||(g[k][i])==0.0)
                        continue;
                    else
                        /* here is why the 2.0 factor comes when calculating g[][]	*/
                        b[i][j] += 1.0/((1.0/g[i][k])+(1.0/g[k][i]));
                }
            } else {
                inva[i][j]=0.0;
                if ((g[i][j]==0.0)||(g[j][i])==0.0)
                    b[i][j]=0.0;
                else
                    b[i][j]=-1.0/((1.0/g[i][j])+(1.0/g[j][i]));
            }
        }
    }

    /* we are always going to use the eqn dT + A^-1 * B T = A^-1 * POWER. so, store  C = A^-1 * B	*/
    matmult(c, inva, b, NL*n+EXTRA);
    /* we will also be needing INVB so store it too	*/
    copy_matrix(t, b, NL*n+EXTRA, NL*n+EXTRA);
    matinv(invb, t, NL*n+EXTRA);
    /*	dump_vector(c_ver, NL*n+EXTRA);	*/
    /*	dump_matrix(g, NL*n+EXTRA, NL*n+EXTRA);	*/
    /*	dump_matrix(c, NL*n+EXTRA, NL*n+EXTRA);	*/

    /* cleanup */
    free_matrix(t, NL*n+EXTRA);
    free_matrix(g, NL*n+EXTRA);
    free_matrix(len, n);
    free_imatrix(border, n);
    free_vector(c_ver);
    free_vector(gx);
    free_vector(gy);
    free_vector(gx_sp);
    free_vector(gy_sp);
}
Ejemplo n.º 5
0
/*
 * wrap the rim strips around. each edge has rim blocks
 * equal to the number of blocks abutting that edge. at
 * the four corners, the rim blocks are extended by the
 * rim thickness in a clockwise fashion
 */
int flp_wrap_rim(flp_t *flp, double rim_thickness)
{
	double width, height;
	int i, j = 0, k, n = flp->n_units;
	unit_t *unit;

	width = get_total_width(flp) + 2 * rim_thickness;
	height = get_total_height(flp) + 2 * rim_thickness;
	flp_translate(flp, rim_thickness, rim_thickness);

	for (i = 0; i < n; i++) {
		/* shortcut	*/
		unit = &flp->units[i];

		/* block is on the western border	*/
		if (eq(unit->leftx, rim_thickness)) {
			sprintf(flp->units[n+j].name, "%s_%s", 
					RIM_LEFT_STR, unit->name);
			flp->units[n+j].width = rim_thickness;
			flp->units[n+j].height = unit->height;
			flp->units[n+j].leftx = 0;
			flp->units[n+j].bottomy = unit->bottomy;
			/* northwest corner	*/
			if (eq(unit->bottomy + unit->height, height-rim_thickness))
				flp->units[n+j].height += rim_thickness;
			j++;
		}

		/* block is on the eastern border	*/
		if (eq(unit->leftx + unit->width, width-rim_thickness)) {
			sprintf(flp->units[n+j].name, "%s_%s", 
					RIM_RIGHT_STR, unit->name);
			flp->units[n+j].width = rim_thickness;
			flp->units[n+j].height = unit->height;
			flp->units[n+j].leftx = unit->leftx + unit->width;
			flp->units[n+j].bottomy = unit->bottomy;
			/* southeast corner	*/
			if (eq(unit->bottomy, rim_thickness)) {
				flp->units[n+j].height += rim_thickness;
				flp->units[n+j].bottomy = 0;
			}	
			j++;
		}

		/* block is on the northern border 	*/
		if (eq(unit->bottomy + unit->height, height-rim_thickness)) {
			sprintf(flp->units[n+j].name, "%s_%s", 
					RIM_TOP_STR, unit->name);
			flp->units[n+j].width = unit->width;
			flp->units[n+j].height = rim_thickness;
			flp->units[n+j].leftx = unit->leftx;
			flp->units[n+j].bottomy = unit->bottomy + unit->height;
			/* northeast corner	*/
			if (eq(unit->leftx + unit->width, width-rim_thickness))
				flp->units[n+j].width += rim_thickness;
			j++;
		}

		/* block is on the southern border	*/
		if (eq(unit->bottomy, rim_thickness)) {
			sprintf(flp->units[n+j].name, "%s_%s", 
					RIM_BOTTOM_STR, unit->name);
			flp->units[n+j].width = unit->width;
			flp->units[n+j].height = rim_thickness;
			flp->units[n+j].leftx = unit->leftx;
			flp->units[n+j].bottomy = 0;
			/* southwest corner	*/
			if (eq(unit->leftx, rim_thickness)) {
				flp->units[n+j].width += rim_thickness;
				flp->units[n+j].leftx = 0;
			}	
			j++;
		}
	}	

	flp->n_units += j;

	/* update all the rim wire densities */
	for(i=n; i < n+j; i++)
		for(k=0; k <= i; k++)
			flp->wire_density[i][k] = flp->wire_density[k][i] = 0;

	return j;
}
Ejemplo n.º 6
0
/* 
 * wrap the L2 around this floorplan. L2's area information 
 * is obtained from flp_desc. memory for L2 and its arms has
 * already been allocated in the flp. note that flp & flp_desc 
 * have L2 hidden beyond the boundary at this point
 */
void flp_wrap_l2(flp_t *flp, flp_desc_t *flp_desc)
{
	/* 
	 * x is the width of the L2 arms
	 * y is the height of the bottom portion
	 */
	double x, y, core_width, core_height, total_side, core_area, l2_area;
	unit_t *l2, *l2_left, *l2_right;

	/* find L2 dimensions so that the total chip becomes a square	*/
	core_area = get_total_area(flp);
	core_width = get_total_width(flp);
	core_height = get_total_height(flp);
	/* flp_desc has L2 hidden beyond the boundary	*/
	l2_area = flp_desc->units[flp_desc->n_units].area;
	total_side = sqrt(core_area + l2_area);
	/* 
	 * width of the total chip after L2 wrapping is equal to 
	 * the width of the core plus the width of the two arms
	 */
	x = (total_side - core_width) / 2.0;
	y = total_side - core_height;
	/* 
	 * we are trying to solve the equation 
	 * (2*x+core_width) * (y+core_height) 
	 * = l2_area + core_area
	 * for x and y. it is possible that the values 
	 * turnout to be negative if we restrict the
	 * total chip to be a square. in that case,
	 * theoretically, any value of x in the range
	 * (0, l2_area/(2*core_height)) and the 
	 * corresponding value of y or any value of y
	 * in the range (0, l2_area/core_width) and the
	 * corresponding value of x would be a solution
	 * we look for a solution with a reasonable 
	 * aspect ratio. i.e., we constrain kx = y (or
	 * ky = x  depending on the aspect ratio of the 
	 * core) where k = WRAP_L2_RATIO. solving the equation 
	 * with this constraint, we get the following
	 */
	if ( x <= 0 || y <= 0.0) {
		double sum;
		if (core_width >= core_height) {
			sum = WRAP_L2_RATIO * core_width + 2 * core_height;
			x = (sqrt(sum*sum + 8*WRAP_L2_RATIO*l2_area) - sum) / (4*WRAP_L2_RATIO);
			y = WRAP_L2_RATIO * x;
		} else {
			sum = core_width + 2 * WRAP_L2_RATIO * core_height;
			y = (sqrt(sum*sum + 8*WRAP_L2_RATIO*l2_area) - sum) / (4*WRAP_L2_RATIO);
			x = WRAP_L2_RATIO * y;
		}
		total_side = 2 * x + core_width;
	}
	
	/* fix the positions of core blocks	*/
	flp_translate(flp, x, y);

	/* restore the L2 blocks	*/
	flp->n_units += (L2_ARMS+1);
	/* copy L2 info again from flp_desc but from beyond the boundary	*/
	copy_l2_info(flp, flp->n_units-L2_ARMS-1, flp_desc, 
				 flp_desc->n_units, flp_desc->n_units);

	/* fix the positions of the L2  blocks. connectivity
	 * information has already been fixed (in flp_placeholder).
	 * bottom L2 block - (leftx, bottomy) is already (0,0)
	 */
	l2 = &flp->units[flp->n_units-1-L2_ARMS];
	l2->width = total_side;
	l2->height = y;
	l2->leftx = l2->bottomy = 0;

	/* left L2 arm */
	l2_left = &flp->units[flp->n_units-L2_ARMS+L2_LEFT];
	l2_left->width = x;
	l2_left->height = core_height;
	l2_left->leftx = 0;
	l2_left->bottomy = y;

	/* right L2 arm */
	l2_right = &flp->units[flp->n_units-L2_ARMS+L2_RIGHT];
	l2_right->width = x;
	l2_right->height = core_height;
	l2_right->leftx = x + core_width;
	l2_right->bottomy = y;
}
/* creates matrices  B and invB: BT = Power in the steady state.
 * NOTE: EXTRA nodes: 4 heat spreader peripheral nodes, 4 heat
 * sink inner peripheral nodes, 4 heat sink outer peripheral
 * nodes(north, south, east and west) and 1 ambient node.
 */
void populate_R_model_block(block_model_t *model, flp_t *flp)
{
	/*	shortcuts	*/
	double **b = model->b;
	double *gx = model->gx, *gy = model->gy;
	double *gx_int = model->gx_int, *gy_int = model->gy_int;
	double *gx_sp = model->gx_sp, *gy_sp = model->gy_sp;
	double *gx_hs = model->gx_hs, *gy_hs = model->gy_hs;
	double *g_amb = model->g_amb;
	double **len = model->len, **g = model->g, **lu = model->lu;
	int **border = model->border;
	int *p = model->p;
	double t_chip = model->config.t_chip;
	double r_convec = model->config.r_convec;
	double s_sink = model->config.s_sink;
	double t_sink = model->config.t_sink;
	double s_spreader = model->config.s_spreader;
	double t_spreader = model->config.t_spreader;
	double t_interface = model->config.t_interface;
	double k_chip = model->config.k_chip;
	double k_sink = model->config.k_sink;
	double k_spreader = model->config.k_spreader;
	double k_interface = model->config.k_interface;

	int i, j, n = flp->n_units;
	double gn_sp=0, gs_sp=0, ge_sp=0, gw_sp=0;
	double gn_hs=0, gs_hs=0, ge_hs=0, gw_hs=0;
	double r_amb;

	double w_chip = get_total_width (flp);	/* x-axis	*/
	double l_chip = get_total_height (flp);	/* y-axis	*/

	/* sanity check on floorplan sizes	*/
	if (w_chip > s_sink || l_chip > s_sink ||
		w_chip > s_spreader || l_chip > s_spreader) {
		//print_flp(flp);
		//print_flp_fig(flp);
		//fatal("inordinate floorplan size!\n");
	}
	if(model->flp != flp || model->n_units != flp->n_units ||
	   model->n_nodes != NL * flp->n_units + EXTRA)
	   fatal("mismatch between the floorplan and the thermal model\n");

	/* gx's and gy's of blocks	*/
	for (i = 0; i < n; i++) {
		/* at the silicon layer	*/
		if (model->config.block_omit_lateral) {
			gx[i] = gy[i] = 0;
		}
		else {
			gx[i] = 1.0/getr(k_chip, flp->units[i].width / 2.0, flp->units[i].height * t_chip);
			gy[i] = 1.0/getr(k_chip, flp->units[i].height / 2.0, flp->units[i].width * t_chip);
		}

		/* at the interface layer	*/
		gx_int[i] = 1.0/getr(k_interface, flp->units[i].width / 2.0, flp->units[i].height * t_interface);
		gy_int[i] = 1.0/getr(k_interface, flp->units[i].height / 2.0, flp->units[i].width * t_interface);

		/* at the spreader layer	*/
		gx_sp[i] = 1.0/getr(k_spreader, flp->units[i].width / 2.0, flp->units[i].height * t_spreader);
		gy_sp[i] = 1.0/getr(k_spreader, flp->units[i].height / 2.0, flp->units[i].width * t_spreader);

		/* at the heatsink layer	*/
		gx_hs[i] = 1.0/getr(k_sink, flp->units[i].width / 2.0, flp->units[i].height * t_sink);
		gy_hs[i] = 1.0/getr(k_sink, flp->units[i].height / 2.0, flp->units[i].width * t_sink);
	}

	/* shared lengths between blocks	*/
	for (i = 0; i < n; i++)
		for (j = i; j < n; j++)
			len[i][j] = len[j][i] = get_shared_len(flp, i, j);

	/* package R's	*/
	populate_package_R(&model->pack, &model->config, w_chip, l_chip);

	/* short the R's from block centers to a particular chip edge	*/
	for (i = 0; i < n; i++) {
		if (eq(flp->units[i].bottomy + flp->units[i].height, l_chip)) {
			gn_sp += gy_sp[i];
			gn_hs += gy_hs[i];
			border[i][2] = 1;	/* block is on northern border 	*/
		} else
			border[i][2] = 0;

		if (eq(flp->units[i].bottomy, 0)) {
			gs_sp += gy_sp[i];
			gs_hs += gy_hs[i];
			border[i][3] = 1;	/* block is on southern border	*/
		} else
			border[i][3] = 0;

		if (eq(flp->units[i].leftx + flp->units[i].width, w_chip)) {
			ge_sp += gx_sp[i];
			ge_hs += gx_hs[i];
			border[i][1] = 1;	/* block is on eastern border	*/
		} else
			border[i][1] = 0;

		if (eq(flp->units[i].leftx, 0)) {
			gw_sp += gx_sp[i];
			gw_hs += gx_hs[i];
			border[i][0] = 1;	/* block is on western border	*/
		} else
			border[i][0] = 0;
	}

	/* initialize g	*/
	zero_dmatrix(g, NL*n+EXTRA, NL*n+EXTRA);
	zero_dvector(g_amb, n+EXTRA);

	/* overall Rs between nodes */
	for (i = 0; i < n; i++) {
		double area = (flp->units[i].height * flp->units[i].width);
		/* amongst functional units	in the various layers	*/
		for (j = 0; j < n; j++) {
			double part = 0, part_int = 0, part_sp = 0, part_hs = 0;
			if (is_horiz_adj(flp, i, j)) {
				part = gx[i] / flp->units[i].height;
				part_int = gx_int[i] / flp->units[i].height;
				part_sp = gx_sp[i] / flp->units[i].height;
				part_hs = gx_hs[i] / flp->units[i].height;
			}
			else if (is_vert_adj(flp, i,j))  {
				part = gy[i] / flp->units[i].width;
				part_int = gy_int[i] / flp->units[i].width;
				part_sp = gy_sp[i] / flp->units[i].width;
				part_hs = gy_hs[i] / flp->units[i].width;
			}
			g[i][j] = part * len[i][j];
			g[IFACE*n+i][IFACE*n+j] = part_int * len[i][j];
			g[HSP*n+i][HSP*n+j] = part_sp * len[i][j];
			g[HSINK*n+i][HSINK*n+j] = part_hs * len[i][j];
		}
		/* the 2.0 factor in the following equations is
		 * explained during the calculation of the B matrix
		 */
 		/* vertical g's in the silicon layer	*/
		g[i][IFACE*n+i]=g[IFACE*n+i][i]=2.0/getr(k_chip, t_chip, area);
 		/* vertical g's in the interface layer	*/
		g[IFACE*n+i][HSP*n+i]=g[HSP*n+i][IFACE*n+i]=2.0/getr(k_interface, t_interface, area);
		/* vertical g's in the spreader layer	*/
		g[HSP*n+i][HSINK*n+i]=g[HSINK*n+i][HSP*n+i]=2.0/getr(k_spreader, t_spreader, area);
		/* vertical g's in the heatsink core layer	*/
		/* vertical R to ambient: divide r_convec proportional to area	*/
		r_amb = r_convec * (s_sink * s_sink) / area;
		g_amb[i] = 1.0 / (getr(k_sink, t_sink, area) + r_amb);

		/* lateral g's from block center (spreader layer) to peripheral (n,s,e,w) spreader nodes	*/
		g[HSP*n+i][NL*n+SP_N]=g[NL*n+SP_N][HSP*n+i]=2.0*border[i][2] /
							  ((1.0/gy_sp[i])+model->pack.r_sp1_y*gn_sp/gy_sp[i]);
		g[HSP*n+i][NL*n+SP_S]=g[NL*n+SP_S][HSP*n+i]=2.0*border[i][3] /
							  ((1.0/gy_sp[i])+model->pack.r_sp1_y*gs_sp/gy_sp[i]);
		g[HSP*n+i][NL*n+SP_E]=g[NL*n+SP_E][HSP*n+i]=2.0*border[i][1] /
							  ((1.0/gx_sp[i])+model->pack.r_sp1_x*ge_sp/gx_sp[i]);
		g[HSP*n+i][NL*n+SP_W]=g[NL*n+SP_W][HSP*n+i]=2.0*border[i][0] /
							  ((1.0/gx_sp[i])+model->pack.r_sp1_x*gw_sp/gx_sp[i]);

		/* lateral g's from block center (heatsink layer) to peripheral (n,s,e,w) heatsink nodes	*/
		g[HSINK*n+i][NL*n+SINK_C_N]=g[NL*n+SINK_C_N][HSINK*n+i]=2.0*border[i][2] /
									((1.0/gy_hs[i])+model->pack.r_hs1_y*gn_hs/gy_hs[i]);
		g[HSINK*n+i][NL*n+SINK_C_S]=g[NL*n+SINK_C_S][HSINK*n+i]=2.0*border[i][3] /
									((1.0/gy_hs[i])+model->pack.r_hs1_y*gs_hs/gy_hs[i]);
		g[HSINK*n+i][NL*n+SINK_C_E]=g[NL*n+SINK_C_E][HSINK*n+i]=2.0*border[i][1] /
									((1.0/gx_hs[i])+model->pack.r_hs1_x*ge_hs/gx_hs[i]);
		g[HSINK*n+i][NL*n+SINK_C_W]=g[NL*n+SINK_C_W][HSINK*n+i]=2.0*border[i][0] /
									((1.0/gx_hs[i])+model->pack.r_hs1_x*gw_hs/gx_hs[i]);
	}

	/* g's from peripheral(n,s,e,w) nodes	*/
	/* vertical g's between peripheral spreader nodes and center peripheral heatsink nodes */
	g[NL*n+SP_N][NL*n+SINK_C_N]=g[NL*n+SINK_C_N][NL*n+SP_N]=2.0/model->pack.r_sp_per_y;
	g[NL*n+SP_S][NL*n+SINK_C_S]=g[NL*n+SINK_C_S][NL*n+SP_S]=2.0/model->pack.r_sp_per_y;
	g[NL*n+SP_E][NL*n+SINK_C_E]=g[NL*n+SINK_C_E][NL*n+SP_E]=2.0/model->pack.r_sp_per_x;
	g[NL*n+SP_W][NL*n+SINK_C_W]=g[NL*n+SINK_C_W][NL*n+SP_W]=2.0/model->pack.r_sp_per_x;
	/* lateral g's between peripheral outer sink nodes and center peripheral sink nodes	*/
	g[NL*n+SINK_C_N][NL*n+SINK_N]=g[NL*n+SINK_N][NL*n+SINK_C_N]=2.0/(model->pack.r_hs + model->pack.r_hs2_y);
	g[NL*n+SINK_C_S][NL*n+SINK_S]=g[NL*n+SINK_S][NL*n+SINK_C_S]=2.0/(model->pack.r_hs + model->pack.r_hs2_y);
	g[NL*n+SINK_C_E][NL*n+SINK_E]=g[NL*n+SINK_E][NL*n+SINK_C_E]=2.0/(model->pack.r_hs + model->pack.r_hs2_x);
	g[NL*n+SINK_C_W][NL*n+SINK_W]=g[NL*n+SINK_W][NL*n+SINK_C_W]=2.0/(model->pack.r_hs + model->pack.r_hs2_x);
	/* vertical g's between inner peripheral sink nodes and ambient	*/
	g_amb[n+SINK_C_N] = g_amb[n+SINK_C_S] = 1.0 / (model->pack.r_hs_c_per_y+model->pack.r_amb_c_per_y);
	g_amb[n+SINK_C_E] = g_amb[n+SINK_C_W] = 1.0 / (model->pack.r_hs_c_per_x+model->pack.r_amb_c_per_x);
	/* vertical g's between outer peripheral sink nodes and ambient	*/
	g_amb[n+SINK_N] = g_amb[n+SINK_S] = g_amb[n+SINK_E] =
					  g_amb[n+SINK_W] = 1.0 / (model->pack.r_hs_per+model->pack.r_amb_per);

	/* calculate matrix B such that BT = POWER in steady state */
	/* non-diagonal elements	*/
	for (i = 0; i < NL*n+EXTRA; i++)
		for (j = 0; j < i; j++)
			if ((g[i][j] == 0.0) || (g[j][i] == 0.0))
				b[i][j] = b[j][i] = 0.0;
			else
				/* here is why the 2.0 factor comes when calculating g[][]	*/
				b[i][j] = b[j][i] = -1.0/((1.0/g[i][j])+(1.0/g[j][i]));
	/* diagonal elements	*/
	for (i = 0; i < NL*n+EXTRA; i++) {
		/* functional blocks in the heat sink layer	*/
		if (i >= HSINK*n && i < NL*n)
			b[i][i] = g_amb[i%n];
		/* heat sink peripheral nodes	*/
		else if (i >= NL*n+SINK_C_W)
			b[i][i] = g_amb[n+i-NL*n];
		/* all other nodes that are not connected to the ambient	*/
		else
			b[i][i] = 0.0;
		/* sum up the conductances	*/
		for(j=0; j < NL*n+EXTRA; j++)
			if (i != j)
				b[i][i] -= b[i][j];
	}

	/* compute the LUP decomposition of B and store it too	*/
	copy_dmatrix(lu, b, NL*n+EXTRA, NL*n+EXTRA);
	/*
	 * B is a symmetric positive definite matrix. It is
	 * symmetric because if a node A is connected to B,
	 * then B is also connected to A with the same R value.
	 * It is positive definite because of the following
	 * informal argument from Professor Lieven Vandenberghe's
	 * lecture slides for the spring 2004-2005 EE 103 class
	 * at UCLA: http://www.ee.ucla.edu/~vandenbe/103/chol.pdf
	 * x^T*B*x = voltage^T * (B*x) = voltage^T * current
	 * = total power dissipated in the resistors > 0
	 * for x != 0.
	 */
	lupdcmp(lu, NL*n+EXTRA, p, 1);

	/* done	*/
	model->flp = flp;
	model->r_ready = TRUE;
}
/* creates 2 matrices: invA, C: dT + A^-1*BT = A^-1*Power,
 * C = A^-1 * B. note that A is a diagonal matrix (no lateral
 * capacitances. all capacitances are to ground). also note that
 * it is stored as a 1-d vector. so, for computing the inverse,
 * inva[i] = 1/a[i] is just enough.
 */
void populate_C_model_block(block_model_t *model, flp_t *flp)
{
	/*	shortcuts	*/
	double *inva = model->inva, **c = model->c;
	double **b = model->b;
	double *a = model->a;
	double t_chip = model->config.t_chip;
	double c_convec = model->config.c_convec;
	double s_sink = model->config.s_sink;
	double t_sink = model->config.t_sink;
	double t_spreader = model->config.t_spreader;
	double t_interface = model->config.t_interface;
	double p_chip = model->config.p_chip;
	double p_sink = model->config.p_sink;
	double p_spreader = model->config.p_spreader;
	double p_interface = model->config.p_interface;
	double c_amb;
	double w_chip, l_chip;

	int i, n = flp->n_units;

	if (!model->r_ready)
		fatal("R model not ready\n");
	if (model->flp != flp || model->n_units != flp->n_units ||
		model->n_nodes != NL * flp->n_units + EXTRA)
		fatal("different floorplans for R and C models!\n");

	w_chip = get_total_width (flp);	/* x-axis	*/
	l_chip = get_total_height (flp);	/* y-axis	*/

	/* package C's	*/
	populate_package_C(&model->pack, &model->config, w_chip, l_chip);

	/* functional block C's */
	for (i = 0; i < n; i++) {
		double area = (flp->units[i].height * flp->units[i].width);
		/* C's from functional units to ground	*/
		a[i] = getcap(p_chip, t_chip, area);
		/* C's from interface portion of the functional units to ground	*/
		a[IFACE*n+i] = getcap(p_interface, t_interface, area);
		/* C's from spreader portion of the functional units to ground	*/
		a[HSP*n+i] = getcap(p_spreader, t_spreader, area);
		/* C's from heatsink portion of the functional units to ground	*/
		/* vertical C to ambient: divide c_convec proportional to area	*/
		c_amb = C_FACTOR * c_convec / (s_sink * s_sink) * area;
		a[HSINK*n+i] = getcap(p_sink, t_sink, area) + c_amb;
	}

	/* C's from peripheral(n,s,e,w) nodes	*/
 	/* from peripheral spreader nodes to ground	*/
	a[NL*n+SP_N] = a[NL*n+SP_S] = model->pack.c_sp_per_y;
	a[NL*n+SP_E] = a[NL*n+SP_W] = model->pack.c_sp_per_x;
 	/* from center peripheral sink nodes to ground
	 * NOTE: this treatment of capacitances (and
	 * the corresponding treatment of resistances
	 * in populate_R_model) as parallel (series)
	 * is only approximate and is done in order
	 * to avoid creating an extra layer of nodes
	 */
	a[NL*n+SINK_C_N] = a[NL*n+SINK_C_S] = model->pack.c_hs_c_per_y +
										  model->pack.c_amb_c_per_y;
	a[NL*n+SINK_C_E] = a[NL*n+SINK_C_W] = model->pack.c_hs_c_per_x +
										  model->pack.c_amb_c_per_x;
	/* from outer peripheral sink nodes to ground	*/
	a[NL*n+SINK_N] = a[NL*n+SINK_S] = a[NL*n+SINK_E] = a[NL*n+SINK_W] =
					 model->pack.c_hs_per + model->pack.c_amb_per;

	/* calculate A^-1 (for diagonal matrix A) such that A(dT) + BT = POWER */
	for (i = 0; i < NL*n+EXTRA; i++)
		inva[i] = 1.0/a[i];

	/* we are always going to use the eqn dT + A^-1 * B T = A^-1 * POWER. so, store  C = A^-1 * B	*/
	diagmatmult(c, inva, b, NL*n+EXTRA);

	/*	done	*/
	model->c_ready = TRUE;
}
Ejemplo n.º 9
0
/* creates 3 matrices: invA, B, C: dT + A^-1*BT = A^-1*Power, 
 * C = A^-1 * B. note that A is a diagonal matrix (no lateral
 * capacitances. all capacitances are to ground). so, inva[i][i]
 * (= 1/a[i][i]) is just enough.
 *
 * NOTE: EXTRA nodes: 1 chip bottom, 5 spreader and 5 heat sink nodes
 * (north, south, east, west and bottom).
 */
void create_RC_matrices(flp_t *flp, int omit_lateral)
{
	int i, j, k = 0, n = flp->n_units;
	int **border;
	double **len, *gx, *gy, **g, *c_ver, **t;
	double r_sp1, r_sp2, r_hs;	/* lateral resistances to spreader and heatsink	*/

	/* NOTE: *_mid - the vertical R/C from center nodes of spreader 
	 * and heatsink. *_ver - the vertical R/C from peripheral (n,s,e,w) nodes
	 */
	double r_sp_mid, r_sp_ver, r_hs_mid, r_hs_ver, c_sp_mid, c_sp_ver, c_hs_mid, c_hs_ver;
	double gn=0, gs=0, ge=0, gw=0;
	double w_chip = get_total_width (flp);	/* x-axis	*/
	double l_chip = get_total_height (flp);	/* y-axis	*/
	FILE *fp_b,*fp_c,*fp_inva,*fp_invb;
	fp_b=fopen("B","w");
	fp_c=fopen("C","w");
	fp_invb=fopen("invB","w");
	fp_inva=fopen("invA","w");

	border = imatrix(n, 4);
	len = matrix(n, n);		/* len[i][j] = length of shared edge bet. i & j	*/
	gx = vector(n);			/* lumped conductances in x direction	*/
	gy = vector(n);			/* lumped conductances in y direction	*/
	g = matrix(n+EXTRA, n+EXTRA);	/* g[i][j] = conductance bet. nodes i & j */
	c_ver = vector(n+EXTRA);	/* vertical capacitance	*/

	b = matrix(n+EXTRA, n+EXTRA);	/* B, C, INVA  and INVB are (n+EXTRA)x(n+EXTRA) matrices	*/
	c = matrix(n+EXTRA, n+EXTRA);
	inva = matrix(n+EXTRA, n+EXTRA);
	invb = matrix(n+EXTRA, n+EXTRA);
	t = matrix (n+EXTRA, n+EXTRA);	/* copy of B	*/

	/* compute the silicon fitting factor - see pg 10 of the UVA CS tech report - CS-TR-2003-08	*/
	factor_chip = C_FACTOR * ((SPEC_HEAT_CU / SPEC_HEAT_SI) * (w_chip + 0.88 * t_spreader) \
				* (l_chip + 0.88 * t_spreader) * t_spreader / ( w_chip * l_chip * t_chip) + 1);

	/* gx's and gy's of blocks	*/
	for (i = 0; i < n; i++) {
		gx[i] = 1.0/getr(K_SI, flp->units[i].height, flp->units[i].width, l_chip);
		gy[i] = 1.0/getr(K_SI, flp->units[i].width, flp->units[i].height, w_chip);
	}

	/* shared lengths between blocks	*/
	for (i = 0; i < n; i++) 
		for (j = i; j < n; j++) 
			len[i][j] = len[j][i] = get_shared_len(flp, i, j);

	/* lateral R's of spreader and sink */
	r_sp1 = getr(K_CU, (s_spreader+3*w_chip)/4.0, (s_spreader-w_chip)/4.0, w_chip);
	r_sp2 = getr(K_CU, (3*s_spreader+w_chip)/4.0, (s_spreader-w_chip)/4.0, (s_spreader+3*w_chip)/4.0);
	r_hs = getr(K_CU, (s_sink+3*s_spreader)/4.0, (s_sink-s_spreader)/4.0, s_spreader);

	/* vertical R's and C's of spreader and sink */
	r_sp_mid = RHO_CU * t_spreader / (w_chip * l_chip);
	c_sp_mid = factor_pack * SPEC_HEAT_CU * t_spreader * (w_chip * l_chip);
	r_sp_ver = RHO_CU * t_spreader * 4.0 / (s_spreader * s_spreader - w_chip*l_chip);
	c_sp_ver = factor_pack * SPEC_HEAT_CU * t_spreader * (s_spreader * s_spreader - w_chip*l_chip) / 4.0;
	r_hs_mid = RHO_CU * t_sink / (s_spreader*s_spreader);
	c_hs_mid = factor_pack * SPEC_HEAT_CU * t_sink * (s_spreader * s_spreader);
	r_hs_ver = RHO_CU * t_sink * 4.0 / (s_sink * s_sink - s_spreader*s_spreader);
	c_hs_ver = factor_pack * SPEC_HEAT_CU * t_sink * (s_sink * s_sink - s_spreader*s_spreader) / 4.0;

	/* short the R's from block centers to a particular chip edge	*/
	for (i = 0; i < n; i++) {
		if (eq(flp->units[i].bottomy + flp->units[i].height, l_chip)) {
			gn += gy[i];
			border[i][2] = 1;	/* block is on northern border 	*/
		}	
		if (eq(flp->units[i].bottomy, 0)) {
			gs += gy[i];
			border[i][3] = 1;	/* block is on southern border	*/
		}	
		if (eq(flp->units[i].leftx + flp->units[i].width, w_chip)) {
			ge += gx[i];
			border[i][1] = 1;	/* block is on eastern border	*/
		}	
		if (eq(flp->units[i].leftx, 0)) {
			gw += gx[i];
			border[i][0] = 1;	/* block is on western border	*/
		}	
	}

	/* overall R and C between nodes */
	for (i = 0; i < n; i++) {

		/* amongst functional units	*/
		for (j = 0; j < n; j++) {
			double part = 0;
			if (!omit_lateral) {
				if (is_horiz_adj(flp, i, j)){ 
					part = gx[i] / flp->units[i].height;
					printf("%d %d horiz adj\n",i,j);
				}
				else if (is_vert_adj(flp, i,j)) {
					part = gy[i] / flp->units[i].width;
					printf("%d %d vert adj\n",i,j);
				}
			}
			g[i][j] = part * len[i][j];
		}

		/* C's from functional units to ground	*/
		c_ver[i] = factor_chip * SPEC_HEAT_SI * t_chip * flp->units[i].height * flp->units[i].width;

		/* lateral g's from block center to peripheral (n,s,e,w) spreader nodes	*/
		g[i][n+SP_N]=g[n+SP_N][i]=2.0*border[i][2]/((1.0/gy[i])+r_sp1*gn/gy[i]);
		g[i][n+SP_S]=g[n+SP_S][i]=2.0*border[i][3]/((1.0/gy[i])+r_sp1*gs/gy[i]);
		g[i][n+SP_E]=g[n+SP_E][i]=2.0*border[i][1]/((1.0/gx[i])+r_sp1*ge/gx[i]);
		g[i][n+SP_W]=g[n+SP_W][i]=2.0*border[i][0]/((1.0/gx[i])+r_sp1*gw/gx[i]);

 		/* vertical g's from block center to chip bottom */
		g[i][n+CHIP_B]=g[n+CHIP_B][i]=2.0/(RHO_SI * t_chip / (flp->units[i].height * flp->units[i].width));

	}

	/* max slope (1/vertical RC time constant) for silicon	*/
	max_slope = 1.0 / (factor_chip * t_chip * t_chip * RHO_SI * SPEC_HEAT_SI);

	/* vertical g's and C's between central nodes	*/
 	/* between chip bottom and spreader bottom */
	g[n+CHIP_B][n+SP_B]=g[n+SP_B][n+CHIP_B]=2.0/r_sp_mid;
 	/* from chip bottom to ground	*/
	c_ver[n+CHIP_B]=c_sp_mid;
 	/* between spreader bottom and sink bottom	*/
	g[n+SINK_B][n+SP_B]=g[n+SP_B][n+SINK_B]=2.0/r_hs_mid;
 	/* from spreader bottom to ground	*/
	c_ver[n+SP_B]=c_hs_mid;
 	/* from sink bottom to ground	*/
	c_ver[n+SINK_B]=c_convec;

	/* g's and C's from peripheral(n,s,e,w) nodes	*/
	for (i = 1; i <= 4; i++) {
 		/* vertical g's between peripheral spreader nodes and spreader bottom */
		g[n+SP_B-i][n+SP_B]=g[n+SP_B][n+SP_B-i]=2.0/r_sp_ver;
 		/* lateral g's between peripheral spreader nodes and peripheral sink nodes	*/
		g[n+SP_B-i][n+SINK_B-i]=g[n+SINK_B-i][n+SP_B-i]=2.0/(r_hs + r_sp2);
 		/* vertical g's between peripheral sink nodes and sink bottom	*/
		g[n+SINK_B-i][n+SINK_B]=g[n+SINK_B][n+SINK_B-i]=2.0/r_hs_ver;
 		/* from peripheral spreader nodes to ground	*/
		c_ver[n+SP_B-i]=c_sp_ver;
 		/* from peripheral sink nodes to ground	*/
		c_ver[n+SINK_B-i]=c_hs_ver;
	}

	/* calculate matrices A, B such that A(dT) + BT = POWER */

	for (i = 0; i < n+EXTRA; i++) {
		for (j = 0; j < n+EXTRA; j++) {
			if (i==j) {
				inva[i][j] = 1.0/c_ver[i];
				if (i == n+SINK_B)	/* sink bottom */
					b[i][j] += 1.0 / r_convec;
				for (k = 0; k < n+EXTRA; k++) {
					if ((g[i][k]==0.0)||(g[k][i])==0.0) 
						continue;
					else 
					/* here is why the 2.0 factor comes when calculating g[][]	*/
						b[i][j] += 1.0/((1.0/g[i][k])+(1.0/g[k][i]));
				}
			} else {
				inva[i][j]=0.0;
				if ((g[i][j]==0.0)||(g[j][i])==0.0)
					b[i][j]=0.0;
				else
				b[i][j]=-1.0/((1.0/g[i][j])+(1.0/g[j][i]));
			}
		}
	}

	/* we are always going to use the eqn dT + A^-1 * B T = A^-1 * POWER. so, store  C = A^-1 * B	*/
	matmult(c, inva, b, n+EXTRA);
	/* we will also be needing INVB so store it too	*/
	copy_matrix(t, b, n+EXTRA, n+EXTRA);
	matinv(invb, t, n+EXTRA);
	for (i = 0; i < n+EXTRA; i++) {
		for (j = 0; j < n+EXTRA; j++) {
			fprintf(fp_inva,"%f  ",inva[i][j]);
			fprintf(fp_invb,"%f  ",invb[i][j]);
			fprintf(fp_c,"%f  ",c[i][j]);
			fprintf(fp_b,"%f  ",b[i][j]);
		}
		fprintf(fp_inva, "\n");
		fprintf(fp_invb, "\n");
		fprintf(fp_c , "\n");
		fprintf(fp_b, "\n");
	}
	fclose(fp_inva);
	fclose(fp_b);
	fclose(fp_c);
	fclose(fp_invb);

/*	dump_vector(c_ver, n+EXTRA);	*/
/*	dump_matrix(invb, n+EXTRA, n+EXTRA);	*/
/*	dump_matrix(c, n+EXTRA, n+EXTRA);	*/

	/* cleanup */
	free_matrix(t, n+EXTRA);
	free_matrix(g, n+EXTRA);
	free_matrix(len, n);
	free_imatrix(border, n);
	free_vector(c_ver);
	free_vector(gx);
	free_vector(gy);
}