/* Tag array wordline delay (see section 6.3 of tech report) */ double SIM_wordline_tag_delay(int C, int A, int Ntspd, int Ntwl, double inrisetime, double *outrisetime) { double tf,m,a,b,c; double Cline,Rline,Ceq,nextinputtime; int tagbits; double Tworddrivedel,Twordchargedel; /* number of tag bits */ tagbits = PARM(ADDRESS_BITS)+2-(int)logtwo((double)C)+(int)logtwo((double)A); /* first stage */ Ceq = SIM_draincap(Wdecinvn,NCH,1) + SIM_draincap(Wdecinvp,PCH,1) + SIM_gatecap(Wdecinvn+Wdecinvp,20.0); tf = SIM_transreson(Wdecinvn,NCH,1)*Ceq; Tworddrivedel = SIM_horowitz(inrisetime,tf,PARM(VSINV),PARM(VSINV),RISE); nextinputtime = Tworddrivedel/(1.0-PARM(VSINV)); /* second stage */ Cline = (SIM_gatecappass(Wmemcella,(BitWidth-2*Wmemcella)/2.0)+ SIM_gatecappass(Wmemcella,(BitWidth-2*Wmemcella)/2.0)+ Cwordmetal)*tagbits*A*Ntspd/Ntwl+ SIM_draincap(Wdecinvn,NCH,1) + SIM_draincap(Wdecinvp,PCH,1); Rline = Rwordmetal*tagbits*A*Ntspd/(2*Ntwl); tf = (SIM_transreson(Wdecinvp,PCH,1)+Rline)*Cline; Twordchargedel = SIM_horowitz(nextinputtime,tf,PARM(VSINV),PARM(VSINV),FALL); *outrisetime = Twordchargedel/PARM(VSINV); return(Tworddrivedel+Twordchargedel); }
/* switch cap of grant signal (round robin arbiter) */ static double SIM_rr_arbiter_grant_cap() { double Ctotal = 0; /* part 1: drain cap of NOR gate */ /* FIXME: need actual size */ Ctotal += 2 * SIM_draincap(WdecNORn, NCH, 1) + SIM_draincap(WdecNORp, PCH, 2); return Ctotal; }
/* Valid driver (see section 6.9 of tech report) Note that this will only be called for a direct mapped cache */ double SIM_valid_driver_delay(int C, int A, int Ntbl, int Ntspd, double inputtime) { double Ceq,Tst1,tf; Ceq = SIM_draincap(Wmuxdrv12n,NCH,1)+SIM_draincap(Wmuxdrv12p,PCH,1)+Cout; tf = Ceq*SIM_transreson(Wmuxdrv12p,PCH,1); Tst1 = SIM_horowitz(inputtime,tf,PARM(VTHMUXDRV1),0.5,FALL); return(Tst1); }
/* switch cap of internal node (matrix arbiter) */ static double SIM_matrix_arbiter_int_cap() { double Ctotal = 0; /* part 1: drain cap of NOR gate */ Ctotal += 2 * SIM_draincap(WdecNORn, NCH, 1) + SIM_draincap(WdecNORp, PCH, 2); /* part 2: gate cap of the "huge" NOR gate */ Ctotal += SIM_gatecap(WdecNORn + WdecNORp, 0); return Ctotal; }
/* Sel inverter delay (part of the output driver) see section 6.8 */ double SIM_selb_delay_tag_path(double inrisetime, double *outrisetime) { double Ceq,Tst1,tf; Ceq = SIM_draincap(Woutdrvseln,NCH,1)+SIM_draincap(Woutdrvselp,PCH,1)+ SIM_gatecap(Woutdrvnandn+Woutdrvnandp,10.0); tf = Ceq*SIM_transreson(Woutdrvseln,NCH,1); Tst1 = SIM_horowitz(inrisetime,tf,PARM(VTHOUTDRINV),PARM(VTHOUTDRNAND),RISE); *outrisetime = Tst1/(1.0-PARM(VTHOUTDRNAND)); return(Tst1); }
/* switch cap of carry signal (round robin arbiter) */ static double SIM_rr_arbiter_carry_cap() { double Ctotal = 0; /* part 1: drain cap of NOR gate (this block) */ /* FIXME: need actual size */ Ctotal += 2 * SIM_draincap(WdecNORn, NCH, 1) + SIM_draincap(WdecNORp, PCH, 2); /* part 2: gate cap of NOR gate (next block) */ /* FIXME: need actual size */ Ctotal += SIM_gatecap(WdecNORn + WdecNORp, 0); return Ctotal; }
/* This routine calculates the extra time required after an access before * the next access can occur [ie. it returns (cycle time-access time)]. */ double SIM_precharge_delay(double worddata) { double Ceq,tf,pretime; /* as discussed in the tech report, the delay is the delay of 4 inverter delays (each with fanout of 4) plus the delay of the wordline */ Ceq = SIM_draincap(Wdecinvn,NCH,1)+SIM_draincap(Wdecinvp,PCH,1)+ 4*SIM_gatecap(Wdecinvn+Wdecinvp,0.0); tf = Ceq*SIM_transreson(Wdecinvn,NCH,1); pretime = 4*SIM_horowitz(0.0,tf,0.5,0.5,RISE) + worddata; return(pretime); }
/* Tag array bitline: (see section 6.4 in tech report) */ double SIM_bitline_tag_delay(int C, int A, int B, int Ntwl, int Ntbl, int Ntspd, double inrisetime, double *outrisetime) { double Tbit,Cline,Ccolmux,Rlineb,r1,r2,c1,c2,a,b,c; double m,tstep; double Cbitrow; /* bitline capacitance due to access transistor */ int rows,cols; Cbitrow = SIM_draincap(Wmemcella,NCH,1)/2.0; /* due to shared contact */ rows = C/(B*A*Ntbl*Ntspd); cols = 8*B*A*Ntspd/Ntwl; if (Ntbl*Ntspd == 1) { Cline = rows*(Cbitrow+Cbitmetal)+2*SIM_draincap(Wbitpreequ,PCH,1); Ccolmux = 2*SIM_gatecap(WsenseQ1to4,10.0); Rlineb = Rbitmetal*rows/2.0; r1 = Rlineb; } else { Cline = rows*(Cbitrow+Cbitmetal) + 2*SIM_draincap(Wbitpreequ,PCH,1) + SIM_draincap(Wbitmuxn,NCH,1); Ccolmux = Ntspd*Ntbl*(SIM_draincap(Wbitmuxn,NCH,1))+2*SIM_gatecap(WsenseQ1to4,10.0); Rlineb = Rbitmetal*rows/2.0; r1 = Rlineb + SIM_transreson(Wbitmuxn,NCH,1); } r2 = SIM_transreson(Wmemcella,NCH,1) + SIM_transreson(Wmemcella*Wmemcellbscale,NCH,1); c1 = Ccolmux; c2 = Cline; tstep = (r2*c2+(r1+r2)*c1)*log((Vbitpre)/(Vbitpre-Vbitsense)); /* take into account input rise time */ m = Vdd/inrisetime; if (tstep <= (0.5*(Vdd-Vt)/m)) { a = m; b = 2*((Vdd*0.5)-Vt); c = -2*tstep*(Vdd-Vt)+1/m*((Vdd*0.5)-Vt)* ((Vdd*0.5)-Vt); Tbit = (-b+sqrt(b*b-4*a*c))/(2*a); } else { Tbit = tstep + (Vdd+Vt)/(2*m) - (Vdd*0.5)/m; } *outrisetime = Tbit/(log((Vbitpre-Vbitsense)/Vdd)); return(Tbit); }
/* Data array wordline delay (see section 6.2 of tech report) */ double SIM_wordline_delay(int B, int A, int Ndwl, int Nspd, double inrisetime, double *outrisetime) { double Rpdrive,nextrisetime; double desiredrisetime,psize,nsize; double tf,nextinputtime,Ceq,Req,Rline,Cline; int cols; double Tworddrivedel,Twordchargedel; cols = 8*B*A*Nspd/Ndwl; /* Choose a transistor size that makes sense */ /* Use a first-order approx */ desiredrisetime = krise*log((double)(cols))/2.0; Cline = (SIM_gatecappass(Wmemcella,0.0)+ SIM_gatecappass(Wmemcella,0.0)+ Cwordmetal)*cols; Rpdrive = desiredrisetime/(Cline*log(PARM(VSINV))*-1.0); psize = SIM_restowidth(Rpdrive,PCH); if (psize > Wworddrivemax) { psize = Wworddrivemax; } /* Now that we have a reasonable psize, do the rest as before */ /* If we keep the ratio the same as the tag wordline driver, the threshold voltage will be close to VSINV */ nsize = psize * Wdecinvn/Wdecinvp; Ceq = SIM_draincap(Wdecinvn,NCH,1) + SIM_draincap(Wdecinvp,PCH,1) + SIM_gatecap(nsize+psize,20.0); tf = SIM_transreson(Wdecinvn,NCH,1)*Ceq; Tworddrivedel = SIM_horowitz(inrisetime,tf,PARM(VSINV),PARM(VSINV),RISE); nextinputtime = Tworddrivedel/(1.0-PARM(VSINV)); Cline = (SIM_gatecappass(Wmemcella,(BitWidth-2*Wmemcella)/2.0)+ SIM_gatecappass(Wmemcella,(BitWidth-2*Wmemcella)/2.0)+ Cwordmetal)*cols+ SIM_draincap(nsize,NCH,1) + SIM_draincap(psize,PCH,1); Rline = Rwordmetal*cols/2; tf = (SIM_transreson(psize,PCH,1)+Rline)*Cline; Twordchargedel = SIM_horowitz(nextinputtime,tf,PARM(VSINV),PARM(VSINV),FALL); *outrisetime = Twordchargedel/PARM(VSINV); return(Tworddrivedel+Twordchargedel); }
/* switch cap of request signal (round robin arbiter) */ static double SIM_rr_arbiter_req_cap(double length) { double Ctotal = 0; /* part 1: gate cap of 2 NOR gates */ /* FIXME: need actual size */ Ctotal += 2 * SIM_gatecap(WdecNORn + WdecNORp, 0); /* part 2: inverter */ /* FIXME: need actual size */ Ctotal += SIM_draincap(Wdecinvn, NCH, 1) + SIM_draincap(Wdecinvp, PCH, 1) + SIM_gatecap(Wdecinvn + Wdecinvp, 0); /* part 3: wire cap */ Ctotal += length * Cmetal; return Ctotal; }
/* switch cap of request signal (matrix arbiter) */ static double SIM_matrix_arbiter_req_cap(u_int req_width, double length) { double Ctotal = 0; /* FIXME: all need actual sizes */ /* part 1: gate cap of NOR gates */ Ctotal += (req_width - 1) * SIM_gatecap(WdecNORn + WdecNORp, 0); /* part 2: inverter */ Ctotal += SIM_draincap(Wdecinvn, NCH, 1) + SIM_draincap(Wdecinvp, PCH, 1) + SIM_gatecap(Wdecinvn + Wdecinvp, 0); /* part 3: gate cap of the "huge" NOR gate */ Ctotal += SIM_gatecap(WdecNORn + WdecNORp, 0); /* part 4: wire cap */ Ctotal += length * Cmetal; return Ctotal; }
/* Data output delay (data side) -- see section 6.8 This is the time through the NAND/NOR gate and the final inverter assuming sel is already present */ double SIM_dataoutput_delay(int C, int B, int A, int Ndbl, int Nspd, int Ndwl, double inrisetime, double *outrisetime) { double Ceq,Rwire,Rline; double aspectRatio; /* as height over width */ double ramBlocks; /* number of RAM blocks */ double tf; double nordel,outdel,nextinputtime; double hstack,vstack; /* calculate some layout info */ aspectRatio = (2.0*C)/(8.0*B*B*A*A*Ndbl*Ndbl*Nspd*Nspd); hstack = (aspectRatio > 1.0) ? aspectRatio : 1.0/aspectRatio; ramBlocks = Ndwl*Ndbl; hstack = hstack * sqrt(ramBlocks/ hstack); vstack = ramBlocks/ hstack; /* Delay of NOR gate */ Ceq = 2*SIM_draincap(Woutdrvnorn,NCH,1)+SIM_draincap(Woutdrvnorp,PCH,2)+ SIM_gatecap(Woutdrivern,10.0); tf = Ceq*SIM_transreson(Woutdrvnorp,PCH,2); nordel = SIM_horowitz(inrisetime,tf,PARM(VTHOUTDRNOR),PARM(VTHOUTDRIVE),FALL); nextinputtime = nordel/(PARM(VTHOUTDRIVE)); /* Delay of final output driver */ Ceq = (SIM_draincap(Woutdrivern,NCH,1)+SIM_draincap(Woutdriverp,PCH,1))* ((8*B*A)/PARM(BITOUT)) + Cwordmetal*(8*B*A*Nspd* (vstack)) + Cout; Rwire = Rwordmetal*(8*B*A*Nspd* (vstack))/2; tf = Ceq*(SIM_transreson(Woutdriverp,PCH,1)+Rwire); outdel = SIM_horowitz(nextinputtime,tf,PARM(VTHOUTDRIVE),0.5,RISE); *outrisetime = outdel/0.5; return(outdel+nordel); }
/* Delay of the multiplexor Driver (see section 6.7) */ double SIM_mux_driver_delay(int C, int B, int A, int Ndbl, int Nspd, int Ndwl, int Ntbl, int Ntspd, double inputtime, double *outputtime) { double Ceq,Req,tf,nextinputtime; double Tst1,Tst2,Tst3; /* first driver stage - Inverte "match" to produce "matchb" */ /* the critical path is the DESELECTED case, so consider what happens when the address bit is true, but match goes low */ Ceq = SIM_gatecap(WmuxdrvNORn+WmuxdrvNORp,15.0)*(8*B/PARM(BITOUT)) + SIM_draincap(Wmuxdrv12n,NCH,1) + SIM_draincap(Wmuxdrv12p,PCH,1); Req = SIM_transreson(Wmuxdrv12p,PCH,1); tf = Ceq*Req; Tst1 = SIM_horowitz(inputtime,tf,PARM(VTHMUXDRV1),PARM(VTHMUXDRV2),FALL); nextinputtime = Tst1/PARM(VTHMUXDRV2); /* second driver stage - NOR "matchb" with address bits to produce sel */ Ceq = SIM_gatecap(Wmuxdrv3n+Wmuxdrv3p,15.0) + 2*SIM_draincap(WmuxdrvNORn,NCH,1) + SIM_draincap(WmuxdrvNORp,PCH,2); Req = SIM_transreson(WmuxdrvNORn,NCH,1); tf = Ceq*Req; Tst2 = SIM_horowitz(nextinputtime,tf,PARM(VTHMUXDRV2),PARM(VTHMUXDRV3),RISE); nextinputtime = Tst2/(1-PARM(VTHMUXDRV3)); /* third driver stage - invert "select" to produce "select bar" */ Ceq = PARM(BITOUT)*SIM_gatecap(Woutdrvseln+Woutdrvselp+Woutdrvnorn+Woutdrvnorp,20.0)+ SIM_draincap(Wmuxdrv3p,PCH,1) + SIM_draincap(Wmuxdrv3n,NCH,1) + Cwordmetal*8*B*A*Nspd*Ndbl/2.0; Req = (Rwordmetal*8*B*A*Nspd*Ndbl/2)/2 + SIM_transreson(Wmuxdrv3p,PCH,1); tf = Ceq*Req; Tst3 = SIM_horowitz(nextinputtime,tf,PARM(VTHMUXDRV3),PARM(VTHOUTDRINV),FALL); *outputtime = Tst3/(PARM(VTHOUTDRINV)); return(Tst1 + Tst2 + Tst3); }
/* switch cap of grant signal (matrix arbiter) */ static double SIM_matrix_arbiter_grant_cap(u_int req_width) { /* drain cap of the "huge" NOR gate */ return (req_width * SIM_draincap(WdecNORn, NCH, 1) + SIM_draincap(WdecNORp, PCH, req_width)); }
/* Comparator Delay (see section 6.6) */ double SIM_compare_time(int C, int A, int Ntbl, int Ntspd, double inputtime, double *outputtime) { double Req,Ceq,tf,st1del,st2del,st3del,nextinputtime,m; double c1,c2,r1,r2,tstep,a,b,c; double Tcomparatorni; int cols,tagbits; /* First Inverter */ Ceq = SIM_gatecap(Wcompinvn2+Wcompinvp2,10.0) + SIM_draincap(Wcompinvp1,PCH,1) + SIM_draincap(Wcompinvn1,NCH,1); Req = SIM_transreson(Wcompinvp1,PCH,1); tf = Req*Ceq; st1del = SIM_horowitz(inputtime,tf,PARM(VTHCOMPINV),PARM(VTHCOMPINV),FALL); nextinputtime = st1del/PARM(VTHCOMPINV); /* Second Inverter */ Ceq = SIM_gatecap(Wcompinvn3+Wcompinvp3,10.0) + SIM_draincap(Wcompinvp2,PCH,1) + SIM_draincap(Wcompinvn2,NCH,1); Req = SIM_transreson(Wcompinvn2,NCH,1); tf = Req*Ceq; st2del = SIM_horowitz(inputtime,tf,PARM(VTHCOMPINV),PARM(VTHCOMPINV),RISE); nextinputtime = st1del/(1.0-PARM(VTHCOMPINV)); /* Third Inverter */ Ceq = SIM_gatecap(Wevalinvn+Wevalinvp,10.0) + SIM_draincap(Wcompinvp3,PCH,1) + SIM_draincap(Wcompinvn3,NCH,1); Req = SIM_transreson(Wcompinvp3,PCH,1); tf = Req*Ceq; st3del = SIM_horowitz(nextinputtime,tf,PARM(VTHCOMPINV),PARM(VTHEVALINV),FALL); nextinputtime = st1del/(PARM(VTHEVALINV)); /* Final Inverter (virtual ground driver) discharging compare part */ tagbits = PARM(ADDRESS_BITS) - (int)logtwo((double)C) + (int)logtwo((double)A); cols = tagbits*Ntbl*Ntspd; r1 = SIM_transreson(Wcompn,NCH,2); r2 = SIM_transresswitch(Wevalinvn,NCH,1); c2 = (tagbits)*(SIM_draincap(Wcompn,NCH,1)+SIM_draincap(Wcompn,NCH,2))+ SIM_draincap(Wevalinvp,PCH,1) + SIM_draincap(Wevalinvn,NCH,1); c1 = (tagbits)*(SIM_draincap(Wcompn,NCH,1)+SIM_draincap(Wcompn,NCH,2)) +SIM_draincap(Wcompp,PCH,1) + SIM_gatecap(Wmuxdrv12n+Wmuxdrv12p,20.0) + cols*Cwordmetal; /* time to go to threshold of mux driver */ tstep = (r2*c2+(r1+r2)*c1)*log(1.0/PARM(VTHMUXDRV1)); /* take into account non-zero input rise time */ m = Vdd/nextinputtime; if ((tstep) <= (0.5*(Vdd-Vt)/m)) { a = m; b = 2*((Vdd*PARM(VTHEVALINV))-Vt); c = -2*(tstep)*(Vdd-Vt)+1/m*((Vdd*PARM(VTHEVALINV))-Vt)*((Vdd*PARM(VTHEVALINV))-Vt); Tcomparatorni = (-b+sqrt(b*b-4*a*c))/(2*a); } else { Tcomparatorni = (tstep) + (Vdd+Vt)/(2*m) - (Vdd*PARM(VTHEVALINV))/m; } *outputtime = Tcomparatorni/(1.0-PARM(VTHMUXDRV1)); return(Tcomparatorni+st1del+st2del+st3del); }
/* Decoder delay in the tag array (see section 6.1 of tech report) */ double SIM_decoder_tag_delay(int C, int B, int A, int Ndwl, int Ndbl, int Nspd, int Ntwl, int Ntbl, int Ntspd, double *Tdecdrive, double *Tdecoder1, double *Tdecoder2, double *outrisetime) { double Ceq,Req,Rwire,rows,tf,nextinputtime,vth = 0,tstep,m,a,b,c; int numstack; /* Calculate rise time. Consider two inverters */ Ceq = SIM_draincap(Wdecdrivep,PCH,1)+SIM_draincap(Wdecdriven,NCH,1) + SIM_gatecap(Wdecdrivep+Wdecdriven,0.0); tf = Ceq*SIM_transreson(Wdecdriven,NCH,1); nextinputtime = SIM_horowitz(0.0,tf,PARM(VTHINV100x60),PARM(VTHINV100x60),FALL)/ (PARM(VTHINV100x60)); Ceq = SIM_draincap(Wdecdrivep,PCH,1)+SIM_draincap(Wdecdriven,NCH,1) + SIM_gatecap(Wdecdrivep+Wdecdriven,0.0); tf = Ceq*SIM_transreson(Wdecdriven,NCH,1); nextinputtime = SIM_horowitz(nextinputtime,tf,PARM(VTHINV100x60),PARM(VTHINV100x60), RISE)/ (1.0-PARM(VTHINV100x60)); /* First stage: driving the decoders */ rows = C/(8*B*A*Ntbl*Ntspd); Ceq = SIM_draincap(Wdecdrivep,PCH,1)+SIM_draincap(Wdecdriven,NCH,1) + 4*SIM_gatecap(Wdec3to8n+Wdec3to8p,10.0)*(Ntwl*Ntbl)+ Cwordmetal*0.25*8*B*A*Ntbl*Ntspd; Rwire = Rwordmetal*0.125*8*B*A*Ntbl*Ntspd; tf = (Rwire + SIM_transreson(Wdecdrivep,PCH,1))*Ceq; *Tdecdrive = SIM_horowitz(nextinputtime,tf,PARM(VTHINV100x60),PARM(VTHNAND60x90), FALL); nextinputtime = *Tdecdrive/PARM(VTHNAND60x90); /* second stage: driving a bunch of nor gates with a nand */ numstack = (int)(ceil((1.0/3.0)*logtwo( (double)((double)C/(double)(B*A*Ntbl*Ntspd))))); if (numstack==0) numstack = 1; if (numstack>5) numstack = 5; Ceq = 3*SIM_draincap(Wdec3to8p,PCH,1) +SIM_draincap(Wdec3to8n,NCH,3) + SIM_gatecap(WdecNORn+WdecNORp,((numstack*40)+20.0))*rows + Cbitmetal*rows*8; Rwire = Rbitmetal*rows*8/2; tf = Ceq*(Rwire+SIM_transreson(Wdec3to8n,NCH,3)); /* we only want to charge the output to the threshold of the nor gate. But the threshold depends on the number of inputs to the nor. */ switch(numstack) { case 1: vth = PARM(VTHNOR12x4x1); break; case 2: vth = PARM(VTHNOR12x4x2); break; case 3: vth = PARM(VTHNOR12x4x3); break; case 4: vth = PARM(VTHNOR12x4x4); break; case 5: vth = PARM(VTHNOR12x4x4); break; case 6: vth = PARM(VTHNOR12x4x4); break; default: printf("error:numstack=%d\n",numstack); } *Tdecoder1 = SIM_horowitz(nextinputtime,tf,PARM(VTHNAND60x90),vth,RISE); nextinputtime = *Tdecoder1/(1.0-vth); /* Final stage: driving an inverter with the nor */ Req = SIM_transreson(WdecNORp,PCH,numstack); Ceq = (SIM_gatecap(Wdecinvn+Wdecinvp,20.0)+ numstack*SIM_draincap(WdecNORn,NCH,1)+ SIM_draincap(WdecNORp,PCH,numstack)); tf = Req*Ceq; *Tdecoder2 = SIM_horowitz(nextinputtime,tf,vth,PARM(VSINV),FALL); *outrisetime = *Tdecoder2/(PARM(VSINV)); return(*Tdecdrive+*Tdecoder1+*Tdecoder2); }