FPge_expr::FPge_expr(Target* _target, int wE, int wF) : Operator(_target) { ostringstream name; name<<"FPge_expr_" <<wE<<"_"<<wF; setName(name.str()); setCopyrightString("Fabrizio Ferrandi (2011-2017)"); /* Set up the IO signals */ addFPInput ("X", wE,wF); addFPInput ("Y", wE,wF); addOutput ("R", 1); /* VHDL code description */ manageCriticalPath(_target->localWireDelay() + _target->lutDelay()); vhdl << tab << declare("nY",wE+wF+3) << " <= Y"<< range(wE+wF+2,wE+wF+1) << " & not(Y"<< of(wE+wF) << ") & Y" << range(wE+wF-1,0) << ";" << endl; FPAdderSinglePath * value_difference = new FPAdderSinglePath(_target, wE, wF, wE, wF, wE, wF); value_difference->changeName(getName()+"value_difference"); oplist.push_back(value_difference); inPortMap (value_difference, "X", "X"); inPortMap (value_difference, "Y", "nY"); outPortMap (value_difference, "R","valueDiff"); vhdl << instance(value_difference, "value_difference"); syncCycleFromSignal("valueDiff"); setCriticalPath(value_difference->getOutputDelay("R")); manageCriticalPath(_target->localWireDelay() + _target->lutDelay()); vhdl << tab << "R(0) <= '1' when (valueDiff" << of(wE+wF) << "='0' or (valueDiff" << range(wE+wF+2,wE+wF+1) << " = \"00\")) else '0';" << endl; }
LNSMul::LNSMul(Target * target, int wE, int wF) : Operator(target), wE(wE), wF(wF) { ostringstream name; /* The name has the format: LNSMul_wE_wF where: wE = width of the integral part of the exponent wF = width of the fractional part of the exponent */ name << "LNSMul_" << wE << "_" << wF; setName(name.str()); setCopyrightString("Jérémie Detrey, Florent de Dinechin (2003-2004), Sylvain Collange (2008)"); addInput ("nA", wE + wF + 3); addInput ("nB", wE + wF + 3); addOutput("nR", wE + wF + 3); addConstant("wE", "positive", wE); addConstant("wF", "positive", wF); //vhdl << tab << declare("eRn", wE+wF+1) << " <= (nA(wE+wF-1) & nA(wE+wF-1 downto 0)) + (nB(wE+wF-1) & nB(wE+wF-1 downto 0));\n"; IntAdder *my_adder = new IntAdder(target, wE+wF+1); oplist.push_back(my_adder); vhdl << tab << declare("X", wE+wF+1) << "<= nA(wE+wF-1) & nA(wE+wF-1 downto 0);\n"; vhdl << tab << declare("Y", wE+wF+1) << "<= nB(wE+wF-1) & nB(wE+wF-1 downto 0);\n"; inPortMap (my_adder, "X", "X"); inPortMap (my_adder, "Y", "Y"); inPortMapCst(my_adder, "Cin", "'0'"); outPortMap (my_adder, "R","eRn"); vhdl << instance(my_adder, "my_add"); vhdl << tab << declare("sRn") << " <= nA(wE+wF) xor nB(wE+wF);\n"; vhdl << tab << declare("xRn", 2) << " <= \"00\" when eRn(wE+wF downto wE+wF-1) = \"10\" else\n" << tab << " \"10\" when eRn(wE+wF downto wE+wF-1) = \"01\" else\n" << tab << " \"01\";\n"; vhdl << tab << declare("nRn", wE+wF+3) << " <= xRn & sRn & eRn(wE+wF-1 downto 0);\n"; vhdl << tab << declare("xA", 2) << " <= nA(wE+wF+2 downto wE+wF+1);\n"; vhdl << tab << declare("xB", 2) << " <= nB(wE+wF+2 downto wE+wF+1);\n"; vhdl << tab << declare("xAB", 4) << " <= xA & xB when xA >= xB else\n" << tab << " xB & xA;\n"; vhdl << tab << "with xAB select\n" << tab << tab << "nR(wE+wF+2 downto wE+wF+1) <= xRn when \"0101\",\n" << tab << " \"00\" when \"0000\" | \"0100\",\n" << tab << " \"10\" when \"1001\" | \"1010\",\n" << tab << " \"11\" when others;\n" << tab << "\n" << tab << "nR(wE+wF downto 0) <= nRn(wE+wF downto 0);\n"; }
LongIntAdderMuxNetwork::LongIntAdderMuxNetwork(Target* target, int wIn, map<string, double> inputDelays, int regular): Operator(target), wIn_(wIn), inputDelays_(inputDelays) { srcFileName="LongIntAdderMuxNetwork"; setName(join("LongIntAdderMuxNetwork_", wIn_)); // Set up the IO signals for (int i=0; i<2; i++) addInput ( join("X",i) , wIn_, true); addInput("Cin"); addOutput("R" , wIn_, true, 1); //compute the maximum input delay maxInputDelay = getMaxInputDelays(inputDelays); if (false){ if (verbose) cout << "The maximum input delay is "<< maxInputDelay<<endl; cSize = new int[2000]; REPORT(3, "-- The new version: direct mapping without 0/1 padding, IntAdders instantiated"); double objectivePeriod = double(1) / target->frequency(); REPORT(2, "Objective period is "<< objectivePeriod <<" at an objective frequency of "<<target->frequency()); target->suggestSubaddSize(chunkSize_ ,wIn_); REPORT(2, "The chunkSize for first two chunks is: " << chunkSize_ ); if (2*chunkSize_ >= wIn_){ cerr << "ERROR FOR NOW -- instantiate int adder, dimmension too small for LongIntAdderMuxNetwork" << endl; exit(0); } cSize[0] = chunkSize_; cSize[1] = chunkSize_; bool finished = false; /* detect when finished the first the first phase of the chunk selection algo */ int width = wIn_ - 2*chunkSize_; /* remaining size to split into chunks */ int propagationSize = 0; /* carry addition size */ int chunkIndex = 2; /* the index of the chunk for which the size is to be determined at the current step */ bool invalid = false; /* the result of the first phase of the algo */ /* FIRST PHASE */ REPORT(3, "FIRST PHASE chunk splitting"); while (not (finished)) { REPORT(2, "The width is " << width); propagationSize+=2; double delay = objectivePeriod - target->adderDelay(width)- target->adderDelay(propagationSize); //2*target->localWireDelay() - REPORT(2, "The value of the delay at step " << chunkIndex << " is " << delay); if ((delay > 0) || (width < 4)) { REPORT(2, "finished -> found last chunk of size: " << width); cSize[chunkIndex] = width; finished = true; }else{ REPORT(2, "Found regular chunk "); int cs; double slack = target->adderDelay(propagationSize) ; //+ 2*target->localWireDelay() REPORT(2, "slack is: " << slack); REPORT(2, "adderDelay of " << propagationSize << " is " << target->adderDelay(propagationSize) ); target->suggestSlackSubaddSize( cs, width, slack); REPORT(2, "size of the regular chunk is : " << cs); width = width - cs; cSize[chunkIndex] = cs; if ( (cSize[chunkIndex-1]<=2) && (cSize[chunkIndex-1]<=2) && ( invalid == false) ){ REPORT(1, "[WARNING] Register level inserted after carry-propagation chain"); invalid = true; /* invalidate the current splitting */ } chunkIndex++; /* as this is not the last pair of chunks, pass to the next pair */ } } REPORT(2, "First phase return valid result: " << invalid); /* SECOND PHASE: only if first phase is cannot return a valid chunk size decomposition */ if (invalid){ REPORT(2,"SECOND PHASE chunk splitting ..."); target->suggestSubaddSize(chunkSize_ ,wIn_); lastChunkSize_ = (wIn_% chunkSize_ ==0 ? chunkSize_ :wIn_% chunkSize_); /* the index of the last chunk pair */ chunkIndex = (wIn_% chunkSize_ ==0 ? ( wIn_ / chunkSize_) - 1 : (wIn_-lastChunkSize_) / chunkSize_ ); for (int i=0; i < chunkIndex; i++) cSize[i] = chunkSize_; /* last chunk is handled separately */ cSize[chunkIndex] = lastChunkSize_; } /* VERIFICATION PHASE: check if decomposition is correct */ REPORT(2, "found " << chunkIndex + 1 << " chunks "); nbOfChunks = chunkIndex + 1; int sum = 0; ostringstream chunks; for (int i=chunkIndex; i>=0; i--){ chunks << cSize[i] << " "; sum+=cSize[i]; } chunks << endl; REPORT(2, "Chunks are: " << chunks.str()); REPORT(2, "The chunk size sum is " << sum << " and initial width was " << wIn_); if (sum != wIn_){ cerr << "ERROR: check the algo" << endl; /*should never get here ... */ exit(0); } } int ll,l0; // double xordelay; // double dcarry; // double muxcystoo; // double fdcq; double muxcystooOut; int fanOutWeight; if (target->getID()=="Virtex5"){ // fdcq = 0.396e-9; // xordelay = 0.300e-9; // dcarry = 0.023e-9; // muxcystoo = 0.305e-9; muxcystooOut = 0.504e-9; fanOutWeight = 45; }else{ if (target->getID()=="Virtex6"){ // fdcq = 0.280e-9; // xordelay = 0.180e-9; // dcarry = 0.015e-9; // muxcystoo = 0.219e-9; muxcystooOut = 0.373e-9; fanOutWeight = 51; }else{ if (target->getID()=="Virtex4"){ // fdcq = 0.272e-9; // xordelay = 0.273e-9; // dcarry = 0.034e-9; // muxcystoo = 0.278e-9; muxcystooOut = 0.524e-9; fanOutWeight = 60; } } } int lkm1; double iDelay = getMaxInputDelays(inputDelays); #ifdef MAXSIZE for (int aa=25; aa<=500; aa+=5){ target->setFrequency(double(aa)*1000000.0); #endif bool nogo = false; double t = 1.0 / target->frequency(); if (!target->suggestSlackSubaddSize(lkm1, wIn, iDelay /*fdcq + target->localWireDelay()*/ + target->localWireDelay() + target->lutDelay())){ // cerr << "Impossible 1" << endl; nogo = true; } // cout << "lkm1 = " << lkm1 << endl; double z = iDelay + /*fdcq + target->localWireDelay() +*/ target->lutDelay() + //xordelay + muxcystooOut + // the select to output line of the carry chain multiplexer. // usually this delay for the 1-bit addition which is not overlapping target->localWireDelay() + target->localWireDelay(fanOutWeight) + //final multiplexer delay. Fan-out of the CGC bits is accounted for target->lutDelay(); #ifdef DEBUGN cerr << "lut delay = " << target->lutDelay() << endl; cerr << "muxcystooOut delay = " << muxcystooOut << endl; cerr << "localWireDelay delay = " << target->localWireDelay() << endl; cerr << "localWireDelay2 delay = " << target->localWireDelay(fanOutWeight) << endl; cerr << "z slack = " << z << endl; #endif nogo = nogo | (!target->suggestSlackSubaddSize(ll, wIn, z)); #ifdef DEBUGN cerr << "ll is = "<<ll << endl; #endif /*nogo = nogo | (!*/target->suggestSlackSubaddSize(l0, wIn, t - (2*target->lutDelay()+ muxcystooOut/* xordelay*/)); //); REPORT(INFO, "l0="<<l0); int maxAdderSize = lkm1 + ll*(ll+1)/2 + l0; if (nogo) maxAdderSize = -1; REPORT(INFO, "ll="<<ll); REPORT(INFO, "max adder size is="<< maxAdderSize); #ifdef MAXSIZE cout << " f="<<aa<<" s="<<maxAdderSize<<endl; } exit(1); #endif cSize = new int[100]; if (regular>0) { int c = regular; cout << "c="<<c<<endl; int s = wIn_; int j=0; while (s>0){ if (s-c>0){ cSize[j]=c; s-=c; }else{ cSize[j]=s; s=0; } j++; } nbOfChunks = j; }else{ int td = wIn; cSize[0] = l0; cSize[1] = 1; td -= (l0+1); nbOfChunks = 2; while (td > 0){ int nc = cSize[nbOfChunks-1] + 1; int nnc = lkm1; REPORT(INFO,"nc="<<nc); REPORT(INFO,"nnc="<<nnc); if (nc + nnc >= td){ REPORT(INFO, "Finish"); //we can finish it now; if (nc>=td) nc = td-1; cSize[nbOfChunks] = nc; nbOfChunks++; td-=nc; cSize[nbOfChunks] = td; nbOfChunks++; td=0; }else{ REPORT(INFO, "run"); //not possible to finish chunk splitting now cSize[nbOfChunks] = nc; nbOfChunks++; td-=nc; } } } for (int i=0; i<nbOfChunks; i++) REPORT(INFO, "cSize["<<i<<"]="<<cSize[i]); //#define test512 #ifdef test512 nbOfChunks = 16; for (int i=1;i<=16;i++) cSize[i-1]=32; #endif //================================================= //split the inputs ( this should be reusable ) vhdl << tab << "--split the inputs into chunks of bits depending on the frequency" << endl; for (int i=0;i<2;i++) for (int j=0; j<nbOfChunks; j++){ ostringstream name; //the naming standard: sX j _ i _ l //j=the chunk index i is the input index and l is the current level name << "sX"<<j<<"_"<<i<<"_l"<<0; int low=0, high=0; for (int k=0;k<=j;k++) high+=cSize[k]; for (int k=0;k<=j-1;k++) low+=cSize[k]; vhdl << tab << declare (name.str(),cSize[j],true) << " <= X"<<i<<range(high-1,low)<<";"<<endl; } int l=1; for (int j=0; j<nbOfChunks; j++){ //code for adder instantiation to stop ise from "optimizing" IntAdderSpecific *adder = new IntAdderSpecific(target, cSize[j]); oplist.push_back(adder); if (j>0){ //for all chunks greater than zero we perform this additions inPortMap(adder, "X", join("sX",j,"_0_l",l-1) ); inPortMap(adder, "Y", join("sX",j,"_1_l",l-1) ); inPortMapCst(adder, "Cin", "'0'"); outPortMap(adder, "R", join("sX",j,"_0_l",l,"_Zero") ); outPortMap(adder, "Cout", join("coutX",j,"_0_l",l,"_Zero") ); vhdl << instance(adder, join("adderZ",j) ); inPortMapCst(adder, "Cin", "'1'"); outPortMap(adder, "R", join("sX",j,"_0_l",l,"_One")); outPortMap(adder, "Cout", join("coutX",j,"_0_l",l,"_One")); vhdl << instance( adder, join("adderO",j) ); }else{ vhdl << tab << "-- the carry resulting from the addition of the chunk + Cin is obtained directly" << endl; inPortMap(adder, "X", join("sX",j,"_0_l",l-1) ); inPortMap(adder, "Y", join("sX",j,"_1_l",l-1) ); inPortMapCst(adder, "Cin", "Cin"); outPortMap(adder, "R", join("sX",j,"_0_l",l,"_Cin") ); outPortMap(adder, "Cout", join("coutX",j,"_0_l",l,"_Cin") ); vhdl << instance(adder, join("adderCin",j) ); } } vhdl << tab <<"--form the two carry string"<<endl; vhdl << tab << declare("carryStringZero",nbOfChunks-2) << " <= "; for (int i=nbOfChunks-3; i>=0; i--) { vhdl << "coutX"<<i+1<<"_0_l"<<l<<"_Zero"<< (i>0?" & ":";") ; } vhdl << endl; vhdl << tab << declare("carryStringOne", nbOfChunks-2) << " <= "; for (int i=nbOfChunks-3; i>=0; i--) { vhdl << "coutX"<<i+1<<"_0_l"<<l<<"_One" << " " << (i>0?" & ":";"); } vhdl << endl; //multiplexer network for (int i=0; i<=nbOfChunks-3; i++){ if (i==0) vhdl << tab << declare( join("c",i+1) ) << " <= carryStringOne"<<of(i)<<" when Cin='1' else carryStringZero"<<of(i)<<";"<<endl; else vhdl << tab << declare( join("c",i+1) ) << " <= carryStringOne"<<of(i)<<" when "<<join("c",i)<<"='1' else carryStringZero"<<of(i)<<";"<<endl; } for (int i=0; i< nbOfChunks; i++){ if (i==0) vhdl << tab << declare( join("res",i), cSize[i],true) << " <= " << join("sX",i,"_0_l",1,"_Cin") << ";" << endl; else if (i==1) vhdl << tab << declare( join("res",i), cSize[i],true) << " <= " << join("sX",i,"_0_l",1,"_Zero") << " when "<<join("coutX",0,"_0_l",1,"_Cin")<<"='0' else "<< join("sX",i,"_0_l",1,"_One") << ";" << endl; else vhdl << tab << declare( join("res",i), cSize[i],true) << " <= " << join("sX",i,"_0_l",1,"_Zero") << " when "<<join("c",i-1)<<"='0' else "<< join("sX",i,"_0_l",1,"_One") << ";" << endl; } // if (target->getVendor()== "Xilinx"){ // ////////////////////////////////////////////////////// // vhdl << tab << "--perform the short carry additions" << endl; // CarryGenerationCircuit *cgc = new CarryGenerationCircuit(target,nbOfChunks-2); // oplist.push_back(cgc); // // inPortMap(cgc, "X", "carryStringZero" ); // inPortMap(cgc, "Y", "carryStringOne" ); // inPortMapCst(cgc, "Cin", join("coutX",0,"_0_l",1,"_Cin")); // outPortMap(cgc, "R", "rawCarrySum" ); // vhdl << instance(cgc, "cgc"); // // vhdl << tab <<"--get the final pipe results"<<endl; // for ( int i=0; i<nbOfChunks; i++){ // if (i==0) // vhdl << tab << declare(join("res",i),cSize[i],true) << " <= sX0_0_l1_Cin;" << endl; // else { // if (i==1) vhdl << tab << declare(join("res",i),cSize[i],true) << " <= " << join("sX",i,"_0_l",l,"_Zero") << " when " << join("coutX",0,"_0_l",l,"_Cin")<<"='0' else "<<join("sX",i,"_0_l",l,"_One")<<";"<<endl; // else vhdl << tab << declare(join("res",i),cSize[i],true) << " <= " << join("sX",i,"_0_l",l,"_Zero") << " when rawCarrySum"<<of(i-2)<<"='0' else "<<join("sX",i,"_0_l",l,"_One")<<";"<<endl; // } // } // // }else{ //Altera ///////////////////////////////////////////////////////////////////// // vhdl << tab << "--perform the short carry additions" << endl; // IntAdderSpecific *cgc = new IntAdderSpecific(target,nbOfChunks-2); // oplist.push_back(cgc); // // inPortMap(cgc, "X", "carryStringZero" ); // inPortMap(cgc, "Y", "carryStringOne" ); // inPortMapCst(cgc, "Cin", join("coutX",0,"_0_l",1,"_Cin")); // outPortMap(cgc, "R", "rawCarrySum" ); // outPortMap(cgc, "Cout", "cgcCout"); // vhdl << instance(cgc, "cgc"); // vhdl << tab <<"--get the final pipe results"<<endl; // for ( int i=0; i<nbOfChunks; i++){ // if (i==0) // vhdl << tab << declare(join("res",i),cSize[i],true) << " <= sX0_0_l1_Cin;" << endl; // else { // if (i==1) vhdl << tab << declare(join("res",i),cSize[i],true) << " <= " << join("sX",i,"_0_l",l,"_Zero") << " when " << join("coutX",0,"_0_l",l,"_Cin")<<"='0' else "<<join("sX",i,"_0_l",l,"_One")<<";"<<endl; // else vhdl << tab << declare(join("res",i),cSize[i],true) << " <= " << join("sX",i,"_0_l",l,"_One") << " when ((not(rawCarrySum"<<of(i-2)<<") and carryStringOne"<<of(i-2)<<") or carryStringZero"<<of(i-2)<<")='1' else "<<join("sX",i,"_0_l",l,"_Zero")<<";"<<endl; // } // } // } vhdl << tab << "R <= "; int k=0; for (int i=nbOfChunks-1; i>=0; i--){ vhdl << join("res",i); if (i > 0) vhdl << " & "; k++; } vhdl << ";" <<endl; /////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////// }
FPSumOf3Squares::FPSumOf3Squares(Target* target, int wE, int wF, int optimize) : Operator(target), wE(wE), wF(wF) { setCopyrightString("F. de Dinechin, Bogdan Pasca (2011)"); srcFileName="FPSumOf3Squares"; ostringstream o; o << "FPSumOf3Squares_" << wE << "_" << wF; if(!optimize) o << "_FP"; setName(o.str()); addFPInput("X", wE, wF); addFPInput("Y", wE, wF); addFPInput("Z", wE, wF); addFPOutput("R", wE, wF, 2); // This 2 means: we will allow two possible inputs (faithful rounding) if(!optimize) { //////////////////////////////////////////////////////////////////: // A version that assembles FP operators // //////////////////////////////////////////////////////////////////: FPMult* mult = new FPMult(target, wE, wF, wE, wF, wE, wF, 1); oplist.push_back(mult); FPAddSinglePath* add = new FPAddSinglePath(target, wE, wF, wE, wF, wE, wF); oplist.push_back(add); inPortMap (mult, "X", "X"); inPortMap (mult, "Y", "X"); outPortMap(mult, "R", "X2"); vhdl << instance(mult, "multx"); inPortMap (mult, "X", "Y"); inPortMap (mult, "Y", "Y"); outPortMap(mult, "R", "Y2"); vhdl << instance(mult, "multy"); inPortMap (mult, "X", "Z"); inPortMap (mult, "Y", "Z"); outPortMap(mult, "R", "Z2"); vhdl << instance(mult, "multz"); syncCycleFromSignal("Z2", false); nextCycle(); inPortMap (add, "X", "X2"); inPortMap (add, "Y", "Y2"); outPortMap(add, "R", "X2PY2"); vhdl << instance(add, "add1"); syncCycleFromSignal("X2PY2", false); nextCycle(); inPortMap (add, "X", "X2PY2"); inPortMap (add, "Y", "Z2"); outPortMap(add, "R", "X2PY2PZ2"); vhdl << instance(add, "add2"); syncCycleFromSignal("X2PY2PZ2", false); setCriticalPath(add->getOutputDelay("R")); vhdl << tab << "R <= X2PY2PZ2;"<<endl; outDelayMap["R"]=getCriticalPath(); } else { ////////////////// here comes the FloPoCo version //////////////////////////: // Error analysis // 3 ulps(wF+g) in the multiplier truncation // Again 2 ulps(wF+g) in the shifter output truncation // Normalisation truncation: either 0 (total 5), or 1 ulp(wF+g) but dividing the previous by 2 (total 3.5) // Total max 5 ulps, we're safe with 3 guard bits // guard bits for a faithful result int g=3; // The exponent datapath // setCriticalPath( getMaxInputDelays(inputDelays) + target->localWireDelay()); setCriticalPath(0); manageCriticalPath( target->adderDelay(wE+1) // subtractions + target->localWireDelay(wE) // fanout of XltY etc + target->lutDelay() // & and mux ); //--------------------------------------------------------------------- // extract the three biased exponents. vhdl << tab << declare("EX", wE) << " <= X" << range(wE+wF-1, wF) << ";" << endl; vhdl << tab << declare("EY", wE) << " <= Y" << range(wE+wF-1, wF) << ";" << endl; vhdl << tab << declare("EZ", wE) << " <= Z" << range(wE+wF-1, wF) << ";" << endl; // determine the max of the exponents vhdl << tab << declare("DEXY", wE+1) << " <= ('0' & EX) - ('0' & EY);" << endl; vhdl << tab << declare("DEYZ", wE+1) << " <= ('0' & EY) - ('0' & EZ);" << endl; vhdl << tab << declare("DEXZ", wE+1) << " <= ('0' & EX) - ('0' & EZ);" << endl; vhdl << tab << declare("XltY") << " <= DEXY("<< wE<<");" << endl; vhdl << tab << declare("YltZ") << " <= DEYZ("<< wE<<");" << endl; vhdl << tab << declare("XltZ") << " <= DEXZ("<< wE<<");" << endl; // rename the exponents to A,B,C with A>=(B,C) vhdl << tab << declare("EA", wE) << " <= " << endl << tab << tab << "EZ when (XltZ='1') and (YltZ='1') else " << endl << tab << tab << "EY when (XltY='1') and (YltZ='0') else " << endl << tab << tab << "EX; " << endl; vhdl << tab << declare("EB", wE) << " <= " << endl << tab << tab << "EX when (XltZ='1') and (YltZ='1') else " << endl << tab << tab << "EZ when (XltY='1') and (YltZ='0') else " << endl << tab << tab << "EY; " << endl; vhdl << tab << declare("EC", wE) << " <= " << endl << tab << tab << "EY when (XltZ='1') and (YltZ='1') else " << endl << tab << tab << "EX when (XltY='1') and (YltZ='0') else " << endl << tab << tab << "EZ; " << endl; //--------------------------------------------------------------------- // Now recompute our two shift values -- they were already computed at cycle 0 but it is cheaper this way, otherwise we have to register, negate and mux them. manageCriticalPath( target->adderDelay(wE-1) ); vhdl << tab << declare("fullShiftValB", wE) << " <= (EA" << range(wE-2,0) << " - EB" << range(wE-2,0) << ") & '0' ; -- positive result, no overflow " << endl; vhdl << tab << declare("fullShiftValC", wE) << " <= (EA" << range(wE-2,0) << " - EC" << range(wE-2,0) << ") & '0' ; -- positive result, no overflow " << endl; double cpfullShiftValC = getCriticalPath(); //--------------------------------------------------------------------- Shifter* rightShifterDummy = new Shifter(target,wF+g+2, wF+g+2, Shifter::Right); int sizeRightShift = rightShifterDummy->getShiftInWidth(); //-- Manage the shift value of the mantissa of B -------- manageCriticalPath( target->localWireDelay() + target->lutDelay()); vhdl<<tab<<declare("shiftedOutB") << " <= "; if (wE>sizeRightShift){ for (int i=wE-1;i>=sizeRightShift;i--) { vhdl<< "fullShiftValB("<<i<<")"; if (i>sizeRightShift) vhdl<< " or "; } vhdl<<";"<<endl; } else vhdl<<tab<<"'0';"<<endl; if (wE>sizeRightShift) { manageCriticalPath( target->localWireDelay() + target->lutDelay()); vhdl<<tab<<declare("shiftValB",sizeRightShift) << " <= fullShiftValB("<< sizeRightShift-1<<" downto 0)" << " when shiftedOutB='0'"<<endl <<tab << tab << " else CONV_STD_LOGIC_VECTOR("<<wF+g+1<<","<<sizeRightShift<<") ;" << endl; }else if (wE==sizeRightShift) { vhdl<<tab<<declare("shiftValB",sizeRightShift) << " <= fullShiftValB;" << endl ; }else { // wE< sizeRightShift vhdl<<tab<<declare("shiftValB",sizeRightShift) << " <= CONV_STD_LOGIC_VECTOR(0,"<<sizeRightShift-wE <<") & fullShiftValB;" << endl; } double cpshiftValB = getCriticalPath(); //-- Manage the shift value of the mantissa of C -------- manageCriticalPath( target->localWireDelay() + target->lutDelay()); //FIXME possible fixme needed when or does not fit on lut vhdl<<tab<<declare("shiftedOutC") << " <= "; if (wE>sizeRightShift){ for (int i=wE-1;i>=sizeRightShift;i--) { vhdl<< "fullShiftValC("<<i<<")"; if (i>sizeRightShift) vhdl<< " or "; } vhdl<<";"<<endl; } else vhdl<<tab<<"'0';"<<endl; setCycleFromSignal("fullShiftValC",cpfullShiftValC); if (wE>sizeRightShift) { manageCriticalPath( target->localWireDelay() + target->lutDelay());//the mux delay vhdl<<tab<<declare("shiftValC",sizeRightShift) << " <= fullShiftValC("<< sizeRightShift-1<<" downto 0)" << " when shiftedOutC='0'"<<endl <<tab << tab << " else CONV_STD_LOGIC_VECTOR("<<wF+g+1<<","<<sizeRightShift<<") ;" << endl; } else if (wE==sizeRightShift) { vhdl<<tab<<declare("shiftValC",sizeRightShift) << " <= fullShiftValC;" << endl ; } else { // wE< sizeRightShift vhdl<<tab<<declare("shiftValC",sizeRightShift) << " <= CONV_STD_LOGIC_VECTOR(0,"<<sizeRightShift-wE <<") & fullShiftValC;" << endl; } // Back to cycle 0 for the significand datapath setCycle(0); //FIXME add inDelayMap for use within hierarchies of components // Square the significands #define USE_SQUARER 1 #if USE_SQUARER IntSquarer* mult = new IntSquarer(target, 1+ wF); #else IntMultiplier* mult = new IntMultiplier(target, 1+ wF, 1+ wF); #endif oplist.push_back(mult); vhdl << tab << declare("mX", wF+1) << " <= '1' & X" << range(wF-1, 0) << "; " << endl; inPortMap (mult, "X", "mX"); #if !USE_SQUARER inPortMap (mult, "Y", "mX"); #endif outPortMap(mult, "R", "mX2"); vhdl << instance(mult, "multx"); vhdl << tab << declare("mY", wF+1) << " <= '1' & Y" << range(wF-1, 0) << "; " << endl; inPortMap (mult, "X", "mY"); #if !USE_SQUARER inPortMap (mult, "Y", "mY"); #endif outPortMap(mult, "R", "mY2"); vhdl << instance(mult, "multy"); vhdl << tab << declare("mZ", wF+1) << " <= '1' & Z" << range(wF-1, 0) << "; " << endl; inPortMap (mult, "X", "mZ"); #if !USE_SQUARER inPortMap (mult, "Y", "mZ"); #endif outPortMap(mult, "R", "mZ2"); vhdl << instance(mult, "multz"); syncCycleFromSignal("mZ2", false); setCriticalPath(mult->getOutputDelay("R")); // truncate the three results to wF+g+2 int prodsize = 2+2*wF; vhdl << tab << declare("X2t", wF+g+2) << " <= mX2" << range(prodsize-1, prodsize - wF-g-2) << "; " << endl; vhdl << tab << declare("Y2t", wF+g+2) << " <= mY2" << range(prodsize-1, prodsize - wF-g-2) << "; " << endl; vhdl << tab << declare("Z2t", wF+g+2) << " <= mZ2" << range(prodsize-1, prodsize - wF-g-2) << "; " << endl; // Now we have our three FP squares, we rename them to A,B,C with A>=(B,C) // only 3 3-muxes manageCriticalPath(target->localWireDelay(wF) + target->lutDelay()); vhdl << tab << declare("MA", wF+g+2) << " <= " << endl << tab << tab << "Z2t when (XltZ='1') and (YltZ='1') else " << endl << tab << tab << "Y2t when (XltY='1') and (YltZ='0') else " << endl << tab << tab << "X2t; " << endl; vhdl << tab << declare("MB", wF+g+2) << " <= " << endl << tab << tab << "X2t when (XltZ='1') and (YltZ='1') else " << endl << tab << tab << "Z2t when (XltY='1') and (YltZ='0') else " << endl << tab << tab << "Y2t; " << endl; vhdl << tab << declare("MC", wF+g+2) << " <= " << endl << tab << tab << "Y2t when (XltZ='1') and (YltZ='1') else " << endl << tab << tab << "X2t when (XltY='1') and (YltZ='0') else " << endl << tab << tab << "Z2t; " << endl; //Synchronize exponent and significand datapath syncCycleFromSignal("shiftValB", cpshiftValB, false); // B and C right shifters are the same Shifter* rightShifter = new Shifter(target,wF+g+2, wF+g+2, Shifter::Right, inDelayMap("X",target->localWireDelay()+getCriticalPath())); oplist.push_back(rightShifter); inPortMap (rightShifter, "X", "MB"); inPortMap (rightShifter, "S", "shiftValB"); outPortMap (rightShifter, "R","shiftedB"); vhdl << instance(rightShifter, "ShifterForB"); inPortMap (rightShifter, "X", "MC"); inPortMap (rightShifter, "S", "shiftValC"); outPortMap (rightShifter, "R","shiftedC"); vhdl << instance(rightShifter, "ShifterForC"); // superbly ignore the bits that are shifted out syncCycleFromSignal("shiftedB", false); setCriticalPath( rightShifter->getOutputDelay("R")); int shiftedB_size = getSignalByName("shiftedB")->width(); vhdl << tab << declare("alignedB", wF+g+2) << " <= shiftedB" << range(shiftedB_size-1, shiftedB_size -(wF+g+2)) << "; " << endl; vhdl << tab << declare("alignedC", wF+g+2) << " <= shiftedC" << range(shiftedB_size-1, shiftedB_size -(wF+g+2)) << "; " << endl; vhdl << tab << declare("paddedA", wF+g+4) << " <= \"00\" & MA; " << endl; vhdl << tab << declare("paddedB", wF+g+4) << " <= \"00\" & alignedB; " << endl; vhdl << tab << declare("paddedC", wF+g+4) << " <= \"00\" & alignedC; " << endl; IntMultiAdder* adder = new IntMultiAdder(target,wF+g+4, 3, inDelayMap("X0", target->localWireDelay() + getCriticalPath() )); oplist.push_back(adder); inPortMap (adder, "X0", "paddedA"); inPortMap (adder, "X1", "paddedB"); inPortMap (adder, "X2", "paddedC"); inPortMapCst(adder, "Cin", "'0'"); // a 1 would compensate the two truncations in the worst case -- to explore outPortMap (adder, "R","sum"); vhdl << instance(adder, "adder1"); syncCycleFromSignal("sum", false); setCriticalPath(adder->getOutputDelay("R")); manageCriticalPath(target->localWireDelay() + target->lutDelay()); // Possible 3-bit normalisation, with a truncation vhdl << tab << declare("finalFraction", wF+g) << " <= " << endl << tab << tab << "sum" << range(wF+g+2,3) << " when sum(" << wF+g+3 << ")='1' else " << endl << tab << tab << "sum" << range(wF+g+1, 2) << " when (sum" << range(wF+g+3, wF+g+2) << "=\"01\") else " << endl << tab << tab << "sum" << range(wF+g, 1) << " when (sum" << range(wF+g+3, wF+g+1) << "=\"001\") else " << endl << tab << tab << "sum" << range(wF+g-1, 0) << "; " << endl; // Exponent datapath. We have to compute 2*EA - bias + an update corresponding to the normalisatiobn // since (1.m)*(1.m) = xx.xxxxxx sum is xxxx.xxxxxx // All the following ignores overflows, infinities, zeroes, etc for the sake of simplicity. manageCriticalPath(target->localWireDelay() + target->lutDelay()); int bias = (1<<(wE-1))-1; vhdl << tab << declare("exponentUpdate", wE+1) << " <= " << endl << tab << tab << "CONV_STD_LOGIC_VECTOR(" << bias-3 << ", "<< wE+1 <<") when sum(" << wF+g+3 << ")='1' else " << endl << tab << tab << "CONV_STD_LOGIC_VECTOR(" << bias-2 << ", "<< wE+1 <<") when (sum" << range(wF+g+3, wF+g+2) << "=\"01\") else " << endl << tab << tab << "CONV_STD_LOGIC_VECTOR(" << bias-1 << ", "<< wE+1 <<") when (sum" << range(wF+g+3, wF+g+1) << "=\"001\") else " << endl << tab << tab << "CONV_STD_LOGIC_VECTOR(" << bias << ", "<< wE+1 <<") ; " << endl; manageCriticalPath( target->localWireDelay() + target->adderDelay(wE+1)); vhdl << tab << declare("finalExp", wE+1) << " <= (EA & '0') - exponentUpdate ; " << endl; IntAdder *roundingAdder = new IntAdder(target, wE +1 + wF); oplist.push_back(roundingAdder); vhdl << tab << declare("roundingOp",wE+1 + wF) << "<= finalExp & finalFraction"<<range(wF+g-1,g)<<";"<<endl; inPortMap ( roundingAdder, "X", "roundingOp"); inPortMapCst ( roundingAdder, "Y", zg(wE+1+wF)); inPortMapCst ( roundingAdder, "Cin", "'1'"); outPortMap ( roundingAdder, "R", "expFrac"); vhdl << tab << instance( roundingAdder, "RoundingAdder"); syncCycleFromSignal("expFrac"); setCriticalPath( roundingAdder->getOutputDelay("R")); //TODO vhdl << tab << declare("rExc",2) << " <= \"01\" when expFrac"<<of(wE+wF)<<"='0' else \"10\";"<<endl; vhdl << tab << "R <= rExc & '0' & expFrac"<<range(wE+1 + wF-2,0)<<";"<<endl; } }