Ejemplo n.º 1
0
static void r2cfII_20(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
{
     DK(KP572061402, +0.572061402817684297600072783580302076536153377);
     DK(KP218508012, +0.218508012224410535399650602527877556893735408);
     DK(KP309016994, +0.309016994374947424102293417182819058860154590);
     DK(KP809016994, +0.809016994374947424102293417182819058860154590);
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
     DK(KP176776695, +0.176776695296636881100211090526212259821208984);
     DK(KP395284707, +0.395284707521047416499861693054089816714944392);
     DK(KP672498511, +0.672498511963957326960058968885748755876783111);
     DK(KP415626937, +0.415626937777453428589967464113135184222253485);
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
     {
	  INT i;
	  for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) {
	       E T8, TD, Tm, TN, T9, TC, TY, TE, Te, TF, Tl, TK, T12, TL, Tk;
	       E TM, T1, T6, Tq, T1l, T1c, Tp, T1f, T1e, T1d, Ty, TW, T1g, T1m, Tx;
	       E Tu;
	       T8 = R1[WS(rs, 2)];
	       TD = KP707106781 * T8;
	       Tm = R1[WS(rs, 7)];
	       TN = KP707106781 * Tm;
	       {
		    E Ta, TA, Td, TB, Tb, Tc;
		    T9 = R1[WS(rs, 6)];
		    Ta = R1[WS(rs, 8)];
		    TA = T9 + Ta;
		    Tb = R1[0];
		    Tc = R1[WS(rs, 4)];
		    Td = Tb + Tc;
		    TB = Tb - Tc;
		    TC = FMA(KP415626937, TA, KP672498511 * TB);
		    TY = FNMS(KP415626937, TB, KP672498511 * TA);
		    TE = KP395284707 * (Ta - Td);
		    Te = Ta + Td;
		    TF = KP176776695 * Te;
	       }
	       {
		    E Tg, TJ, Tj, TI, Th, Ti;
		    Tg = R1[WS(rs, 1)];
		    Tl = R1[WS(rs, 3)];
		    TJ = Tg + Tl;
		    Th = R1[WS(rs, 5)];
		    Ti = R1[WS(rs, 9)];
		    Tj = Th + Ti;
		    TI = Th - Ti;
		    TK = FNMS(KP415626937, TJ, KP672498511 * TI);
		    T12 = FMA(KP415626937, TI, KP672498511 * TJ);
		    TL = KP395284707 * (Tg - Tj);
		    Tk = Tg + Tj;
		    TM = KP176776695 * Tk;
	       }
	       {
		    E T2, T5, T3, T4, T1a, T1b;
		    T1 = R0[0];
		    T2 = R0[WS(rs, 6)];
		    T5 = R0[WS(rs, 8)];
		    T3 = R0[WS(rs, 2)];
		    T4 = R0[WS(rs, 4)];
		    T1a = T4 + T2;
		    T1b = T5 + T3;
		    T6 = T2 + T3 - (T4 + T5);
		    Tq = FMA(KP250000000, T6, T1);
		    T1l = FNMS(KP951056516, T1b, KP587785252 * T1a);
		    T1c = FMA(KP951056516, T1a, KP587785252 * T1b);
		    Tp = KP559016994 * (T5 + T2 - (T4 + T3));
	       }
	       T1f = R0[WS(rs, 5)];
	       {
		    E Tv, Tw, Ts, Tt;
		    Tv = R0[WS(rs, 9)];
		    Tw = R0[WS(rs, 1)];
		    Tx = Tv - Tw;
		    T1e = Tv + Tw;
		    Ts = R0[WS(rs, 3)];
		    Tt = R0[WS(rs, 7)];
		    Tu = Ts - Tt;
		    T1d = Ts + Tt;
	       }
	       Ty = FMA(KP951056516, Tu, KP587785252 * Tx);
	       TW = FNMS(KP951056516, Tx, KP587785252 * Tu);
	       T1g = FMA(KP809016994, T1d, KP309016994 * T1e) + T1f;
	       T1m = FNMS(KP809016994, T1e, T1f) - (KP309016994 * T1d);
	       {
		    E T7, T1r, To, T1q, Tf, Tn;
		    T7 = T1 - T6;
		    T1r = T1e + T1f - T1d;
		    Tf = T8 + (T9 - Te);
		    Tn = (Tk - Tl) - Tm;
		    To = KP707106781 * (Tf + Tn);
		    T1q = KP707106781 * (Tf - Tn);
		    Cr[WS(csr, 2)] = T7 - To;
		    Ci[WS(csi, 2)] = T1q - T1r;
		    Cr[WS(csr, 7)] = T7 + To;
		    Ci[WS(csi, 7)] = T1q + T1r;
	       }
	       {
		    E T1h, T1j, TX, T15, T10, T16, T13, T17, TV, TZ, T11;
		    T1h = T1c - T1g;
		    T1j = T1c + T1g;
		    TV = Tq - Tp;
		    TX = TV - TW;
		    T15 = TV + TW;
		    TZ = FMA(KP218508012, T9, TD) + TF - TE;
		    T10 = TY + TZ;
		    T16 = TZ - TY;
		    T11 = FNMS(KP218508012, Tl, TL) - (TM + TN);
		    T13 = T11 - T12;
		    T17 = T11 + T12;
		    {
			 E T14, T19, T18, T1i;
			 T14 = T10 + T13;
			 Cr[WS(csr, 5)] = TX - T14;
			 Cr[WS(csr, 4)] = TX + T14;
			 T19 = T17 - T16;
			 Ci[WS(csi, 5)] = T19 - T1h;
			 Ci[WS(csi, 4)] = T19 + T1h;
			 T18 = T16 + T17;
			 Cr[WS(csr, 9)] = T15 - T18;
			 Cr[0] = T15 + T18;
			 T1i = T13 - T10;
			 Ci[0] = T1i - T1j;
			 Ci[WS(csi, 9)] = T1i + T1j;
		    }
	       }
	       {
		    E T1n, T1p, Tz, TR, TH, TS, TP, TT, Tr, TG, TO;
		    T1n = T1l + T1m;
		    T1p = T1m - T1l;
		    Tr = Tp + Tq;
		    Tz = Tr + Ty;
		    TR = Tr - Ty;
		    TG = TD + TE + FNMS(KP572061402, T9, TF);
		    TH = TC + TG;
		    TS = TC - TG;
		    TO = TL + TM + FNMS(KP572061402, Tl, TN);
		    TP = TK - TO;
		    TT = TK + TO;
		    {
			 E TQ, T1o, TU, T1k;
			 TQ = TH + TP;
			 Cr[WS(csr, 6)] = Tz - TQ;
			 Cr[WS(csr, 3)] = Tz + TQ;
			 T1o = TT - TS;
			 Ci[WS(csi, 6)] = T1o - T1p;
			 Ci[WS(csi, 3)] = T1o + T1p;
			 TU = TS + TT;
			 Cr[WS(csr, 8)] = TR - TU;
			 Cr[WS(csr, 1)] = TR + TU;
			 T1k = TP - TH;
			 Ci[WS(csi, 8)] = T1k - T1n;
			 Ci[WS(csi, 1)] = T1k + T1n;
		    }
	       }
	  }
     }
}
static void hc2cfdft_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
{
     DK(KP353553390, +0.353553390593273762200422181052424519642417969);
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
     {
	  INT m;
	  for (m = mb, W = W + ((mb - 1) * 14); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 14, MAKE_VOLATILE_STRIDE(32, rs)) {
	       E Tv, TX, Ts, TY, TE, T1a, TJ, T19, T1l, T1m, T9, T10, Ti, T11, TP;
	       E T16, TU, T17, T1i, T1j;
	       {
		    E Tt, Tu, TD, Tz, TA, TB, Tn, TI, Tr, TG, Tk, To;
		    Tt = Ip[0];
		    Tu = Im[0];
		    TD = Tt + Tu;
		    Tz = Rm[0];
		    TA = Rp[0];
		    TB = Tz - TA;
		    {
			 E Tl, Tm, Tp, Tq;
			 Tl = Ip[WS(rs, 2)];
			 Tm = Im[WS(rs, 2)];
			 Tn = Tl - Tm;
			 TI = Tl + Tm;
			 Tp = Rp[WS(rs, 2)];
			 Tq = Rm[WS(rs, 2)];
			 Tr = Tp + Tq;
			 TG = Tp - Tq;
		    }
		    Tv = Tt - Tu;
		    TX = TA + Tz;
		    Tk = W[6];
		    To = W[7];
		    Ts = FNMS(To, Tr, Tk * Tn);
		    TY = FMA(Tk, Tr, To * Tn);
		    {
			 E Ty, TC, TF, TH;
			 Ty = W[0];
			 TC = W[1];
			 TE = FNMS(TC, TD, Ty * TB);
			 T1a = FMA(TC, TB, Ty * TD);
			 TF = W[8];
			 TH = W[9];
			 TJ = FMA(TF, TG, TH * TI);
			 T19 = FNMS(TH, TG, TF * TI);
		    }
		    T1l = TJ + TE;
		    T1m = T1a - T19;
	       }
	       {
		    E T4, TO, T8, TM, Td, TT, Th, TR;
		    {
			 E T2, T3, T6, T7;
			 T2 = Ip[WS(rs, 1)];
			 T3 = Im[WS(rs, 1)];
			 T4 = T2 - T3;
			 TO = T2 + T3;
			 T6 = Rp[WS(rs, 1)];
			 T7 = Rm[WS(rs, 1)];
			 T8 = T6 + T7;
			 TM = T6 - T7;
		    }
		    {
			 E Tb, Tc, Tf, Tg;
			 Tb = Ip[WS(rs, 3)];
			 Tc = Im[WS(rs, 3)];
			 Td = Tb - Tc;
			 TT = Tb + Tc;
			 Tf = Rp[WS(rs, 3)];
			 Tg = Rm[WS(rs, 3)];
			 Th = Tf + Tg;
			 TR = Tf - Tg;
		    }
		    {
			 E T1, T5, Ta, Te;
			 T1 = W[2];
			 T5 = W[3];
			 T9 = FNMS(T5, T8, T1 * T4);
			 T10 = FMA(T1, T8, T5 * T4);
			 Ta = W[10];
			 Te = W[11];
			 Ti = FNMS(Te, Th, Ta * Td);
			 T11 = FMA(Ta, Th, Te * Td);
			 {
			      E TL, TN, TQ, TS;
			      TL = W[4];
			      TN = W[5];
			      TP = FMA(TL, TM, TN * TO);
			      T16 = FNMS(TN, TM, TL * TO);
			      TQ = W[12];
			      TS = W[13];
			      TU = FMA(TQ, TR, TS * TT);
			      T17 = FNMS(TS, TR, TQ * TT);
			 }
			 T1i = T17 - T16;
			 T1j = TP - TU;
		    }
	       }
	       {
		    E T1h, T1t, T1w, T1y, T1o, T1s, T1r, T1x;
		    {
			 E T1f, T1g, T1u, T1v;
			 T1f = Tv - Ts;
			 T1g = T10 - T11;
			 T1h = KP500000000 * (T1f - T1g);
			 T1t = KP500000000 * (T1g + T1f);
			 T1u = T1i - T1j;
			 T1v = T1l + T1m;
			 T1w = KP353553390 * (T1u - T1v);
			 T1y = KP353553390 * (T1u + T1v);
		    }
		    {
			 E T1k, T1n, T1p, T1q;
			 T1k = T1i + T1j;
			 T1n = T1l - T1m;
			 T1o = KP353553390 * (T1k + T1n);
			 T1s = KP353553390 * (T1n - T1k);
			 T1p = TX - TY;
			 T1q = T9 - Ti;
			 T1r = KP500000000 * (T1p - T1q);
			 T1x = KP500000000 * (T1p + T1q);
		    }
		    Ip[WS(rs, 1)] = T1h + T1o;
		    Rp[WS(rs, 1)] = T1x + T1y;
		    Im[WS(rs, 2)] = T1o - T1h;
		    Rm[WS(rs, 2)] = T1x - T1y;
		    Rm[0] = T1r - T1s;
		    Im[0] = T1w - T1t;
		    Rp[WS(rs, 3)] = T1r + T1s;
		    Ip[WS(rs, 3)] = T1t + T1w;
	       }
	       {
		    E Tx, T15, T1c, T1e, TW, T14, T13, T1d;
		    {
			 E Tj, Tw, T18, T1b;
			 Tj = T9 + Ti;
			 Tw = Ts + Tv;
			 Tx = Tj + Tw;
			 T15 = Tw - Tj;
			 T18 = T16 + T17;
			 T1b = T19 + T1a;
			 T1c = T18 - T1b;
			 T1e = T18 + T1b;
		    }
		    {
			 E TK, TV, TZ, T12;
			 TK = TE - TJ;
			 TV = TP + TU;
			 TW = TK - TV;
			 T14 = TV + TK;
			 TZ = TX + TY;
			 T12 = T10 + T11;
			 T13 = TZ - T12;
			 T1d = TZ + T12;
		    }
		    Ip[0] = KP500000000 * (Tx + TW);
		    Rp[0] = KP500000000 * (T1d + T1e);
		    Im[WS(rs, 3)] = KP500000000 * (TW - Tx);
		    Rm[WS(rs, 3)] = KP500000000 * (T1d - T1e);
		    Rm[WS(rs, 1)] = KP500000000 * (T13 - T14);
		    Im[WS(rs, 1)] = KP500000000 * (T1c - T15);
		    Rp[WS(rs, 2)] = KP500000000 * (T13 + T14);
		    Ip[WS(rs, 2)] = KP500000000 * (T15 + T1c);
	       }
	  }
     }
}
Ejemplo n.º 3
0
static void hc2r_9(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
{
     DK(KP984807753, +0.984807753012208059366743024589523013670643252);
     DK(KP173648177, +0.173648177666930348851716626769314796000375677);
     DK(KP300767466, +0.300767466360870593278543795225003852144476517);
     DK(KP1_705737063, +1.705737063904886419256501927880148143872040591);
     DK(KP642787609, +0.642787609686539326322643409907263432907559884);
     DK(KP766044443, +0.766044443118978035202392650555416673935832457);
     DK(KP1_326827896, +1.326827896337876792410842639271782594433726619);
     DK(KP1_113340798, +1.113340798452838732905825904094046265936583811);
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
     DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
     int i;
     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) {
	  E T3, Tq, Tc, Tk, Tj, T8, Tm, Ts, Th, Tr, Tw, Tx;
	  {
	       E Tb, T1, T2, T9, Ta;
	       Ta = ii[WS(iis, 3)];
	       Tb = KP1_732050807 * Ta;
	       T1 = ri[0];
	       T2 = ri[WS(ris, 3)];
	       T9 = T1 - T2;
	       T3 = FMA(KP2_000000000, T2, T1);
	       Tq = T9 + Tb;
	       Tc = T9 - Tb;
	  }
	  {
	       E T4, T7, Ti, Tg, Tl, Td;
	       T4 = ri[WS(ris, 1)];
	       Tk = ii[WS(iis, 1)];
	       {
		    E T5, T6, Te, Tf;
		    T5 = ri[WS(ris, 4)];
		    T6 = ri[WS(ris, 2)];
		    T7 = T5 + T6;
		    Ti = KP866025403 * (T5 - T6);
		    Te = ii[WS(iis, 4)];
		    Tf = ii[WS(iis, 2)];
		    Tg = KP866025403 * (Te + Tf);
		    Tj = Tf - Te;
	       }
	       T8 = T4 + T7;
	       Tl = FMA(KP500000000, Tj, Tk);
	       Tm = Ti + Tl;
	       Ts = Tl - Ti;
	       Td = FNMS(KP500000000, T7, T4);
	       Th = Td - Tg;
	       Tr = Td + Tg;
	  }
	  O[0] = FMA(KP2_000000000, T8, T3);
	  Tw = T3 - T8;
	  Tx = KP1_732050807 * (Tk - Tj);
	  O[WS(os, 3)] = Tw - Tx;
	  O[WS(os, 6)] = Tw + Tx;
	  {
	       E Tp, Tn, To, Tv, Tt, Tu;
	       Tp = FMA(KP1_113340798, Th, KP1_326827896 * Tm);
	       Tn = FNMS(KP642787609, Tm, KP766044443 * Th);
	       To = Tc - Tn;
	       O[WS(os, 1)] = FMA(KP2_000000000, Tn, Tc);
	       O[WS(os, 7)] = To + Tp;
	       O[WS(os, 4)] = To - Tp;
	       Tv = FMA(KP1_705737063, Tr, KP300767466 * Ts);
	       Tt = FNMS(KP984807753, Ts, KP173648177 * Tr);
	       Tu = Tq - Tt;
	       O[WS(os, 2)] = FMA(KP2_000000000, Tt, Tq);
	       O[WS(os, 8)] = Tu + Tv;
	       O[WS(os, 5)] = Tu - Tv;
	  }
     }
}
Ejemplo n.º 4
0
static void hb2_16(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
{
     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
     {
	  INT m;
	  for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(32, rs)) {
	       E Tv, Ty, T1l, T1n, T1p, T1t, T27, T25, Tz, Tw, TB, T21, T1P, T1H, T1X;
	       E T17, T1L, T1N, T1v, T1w, T1x, T1B, T2F, T2T, T2b, T2R, T3j, T3x, T35, T3t;
	       {
		    E TA, T1J, T15, T1G, Tx, T1K, T16, T1F;
		    {
			 E T1m, T1s, T1o, T1r;
			 Tv = W[0];
			 Ty = W[1];
			 T1l = W[2];
			 T1n = W[3];
			 T1m = Tv * T1l;
			 T1s = Ty * T1l;
			 T1o = Ty * T1n;
			 T1r = Tv * T1n;
			 T1p = T1m + T1o;
			 T1t = T1r - T1s;
			 T27 = T1r + T1s;
			 T25 = T1m - T1o;
			 Tz = W[5];
			 TA = Ty * Tz;
			 T1J = T1l * Tz;
			 T15 = Tv * Tz;
			 T1G = T1n * Tz;
			 Tw = W[4];
			 Tx = Tv * Tw;
			 T1K = T1n * Tw;
			 T16 = Ty * Tw;
			 T1F = T1l * Tw;
		    }
		    TB = Tx - TA;
		    T21 = T1J + T1K;
		    T1P = T15 - T16;
		    T1H = T1F + T1G;
		    T1X = T1F - T1G;
		    T17 = T15 + T16;
		    T1L = T1J - T1K;
		    T1N = Tx + TA;
		    T1v = W[6];
		    T1w = W[7];
		    T1x = FMA(Tv, T1v, Ty * T1w);
		    T1B = FNMS(Ty, T1v, Tv * T1w);
		    {
			 E T2D, T2E, T29, T2a;
			 T2D = T25 * Tz;
			 T2E = T27 * Tw;
			 T2F = T2D + T2E;
			 T2T = T2D - T2E;
			 T29 = T25 * Tw;
			 T2a = T27 * Tz;
			 T2b = T29 - T2a;
			 T2R = T29 + T2a;
		    }
		    {
			 E T3h, T3i, T33, T34;
			 T3h = T1p * Tz;
			 T3i = T1t * Tw;
			 T3j = T3h + T3i;
			 T3x = T3h - T3i;
			 T33 = T1p * Tw;
			 T34 = T1t * Tz;
			 T35 = T33 - T34;
			 T3t = T33 + T34;
		    }
	       }
	       {
		    E T7, T36, T3k, TC, T1f, T2e, T2I, T1Q, Te, TJ, T1R, T18, T2L, T37, T2l;
		    E T3l, Tm, T1T, TT, T1h, T2A, T2N, T3b, T3n, Tt, T1U, T12, T1i, T2t, T2O;
		    E T3e, T3o;
		    {
			 E T3, T2c, T1e, T2d, T6, T2G, T1b, T2H;
			 {
			      E T1, T2, T1c, T1d;
			      T1 = cr[0];
			      T2 = ci[WS(rs, 7)];
			      T3 = T1 + T2;
			      T2c = T1 - T2;
			      T1c = ci[WS(rs, 11)];
			      T1d = cr[WS(rs, 12)];
			      T1e = T1c - T1d;
			      T2d = T1c + T1d;
			 }
			 {
			      E T4, T5, T19, T1a;
			      T4 = cr[WS(rs, 4)];
			      T5 = ci[WS(rs, 3)];
			      T6 = T4 + T5;
			      T2G = T4 - T5;
			      T19 = ci[WS(rs, 15)];
			      T1a = cr[WS(rs, 8)];
			      T1b = T19 - T1a;
			      T2H = T19 + T1a;
			 }
			 T7 = T3 + T6;
			 T36 = T2c + T2d;
			 T3k = T2H - T2G;
			 TC = T3 - T6;
			 T1f = T1b - T1e;
			 T2e = T2c - T2d;
			 T2I = T2G + T2H;
			 T1Q = T1b + T1e;
		    }
		    {
			 E Ta, T2f, TI, T2g, Td, T2i, TF, T2j;
			 {
			      E T8, T9, TG, TH;
			      T8 = cr[WS(rs, 2)];
			      T9 = ci[WS(rs, 5)];
			      Ta = T8 + T9;
			      T2f = T8 - T9;
			      TG = ci[WS(rs, 13)];
			      TH = cr[WS(rs, 10)];
			      TI = TG - TH;
			      T2g = TG + TH;
			 }
			 {
			      E Tb, Tc, TD, TE;
			      Tb = ci[WS(rs, 1)];
			      Tc = cr[WS(rs, 6)];
			      Td = Tb + Tc;
			      T2i = Tb - Tc;
			      TD = ci[WS(rs, 9)];
			      TE = cr[WS(rs, 14)];
			      TF = TD - TE;
			      T2j = TD + TE;
			 }
			 Te = Ta + Td;
			 TJ = TF - TI;
			 T1R = TI + TF;
			 T18 = Ta - Td;
			 {
			      E T2J, T2K, T2h, T2k;
			      T2J = T2f + T2g;
			      T2K = T2i + T2j;
			      T2L = KP707106781 * (T2J - T2K);
			      T37 = KP707106781 * (T2J + T2K);
			      T2h = T2f - T2g;
			      T2k = T2i - T2j;
			      T2l = KP707106781 * (T2h + T2k);
			      T3l = KP707106781 * (T2h - T2k);
			 }
		    }
		    {
			 E Ti, T2x, TR, T2y, Tl, T2u, TO, T2v, TL, TS;
			 {
			      E Tg, Th, TP, TQ;
			      Tg = cr[WS(rs, 1)];
			      Th = ci[WS(rs, 6)];
			      Ti = Tg + Th;
			      T2x = Tg - Th;
			      TP = ci[WS(rs, 10)];
			      TQ = cr[WS(rs, 13)];
			      TR = TP - TQ;
			      T2y = TP + TQ;
			 }
			 {
			      E Tj, Tk, TM, TN;
			      Tj = cr[WS(rs, 5)];
			      Tk = ci[WS(rs, 2)];
			      Tl = Tj + Tk;
			      T2u = Tj - Tk;
			      TM = ci[WS(rs, 14)];
			      TN = cr[WS(rs, 9)];
			      TO = TM - TN;
			      T2v = TM + TN;
			 }
			 Tm = Ti + Tl;
			 T1T = TO + TR;
			 TL = Ti - Tl;
			 TS = TO - TR;
			 TT = TL - TS;
			 T1h = TL + TS;
			 {
			      E T2w, T2z, T39, T3a;
			      T2w = T2u + T2v;
			      T2z = T2x - T2y;
			      T2A = FMA(KP923879532, T2w, KP382683432 * T2z);
			      T2N = FNMS(KP382683432, T2w, KP923879532 * T2z);
			      T39 = T2x + T2y;
			      T3a = T2v - T2u;
			      T3b = FNMS(KP923879532, T3a, KP382683432 * T39);
			      T3n = FMA(KP382683432, T3a, KP923879532 * T39);
			 }
		    }
		    {
			 E Tp, T2q, T10, T2r, Ts, T2n, TX, T2o, TU, T11;
			 {
			      E Tn, To, TY, TZ;
			      Tn = ci[0];
			      To = cr[WS(rs, 7)];
			      Tp = Tn + To;
			      T2q = Tn - To;
			      TY = ci[WS(rs, 12)];
			      TZ = cr[WS(rs, 11)];
			      T10 = TY - TZ;
			      T2r = TY + TZ;
			 }
			 {
			      E Tq, Tr, TV, TW;
			      Tq = cr[WS(rs, 3)];
			      Tr = ci[WS(rs, 4)];
			      Ts = Tq + Tr;
			      T2n = Tq - Tr;
			      TV = ci[WS(rs, 8)];
			      TW = cr[WS(rs, 15)];
			      TX = TV - TW;
			      T2o = TV + TW;
			 }
			 Tt = Tp + Ts;
			 T1U = TX + T10;
			 TU = Tp - Ts;
			 T11 = TX - T10;
			 T12 = TU + T11;
			 T1i = T11 - TU;
			 {
			      E T2p, T2s, T3c, T3d;
			      T2p = T2n - T2o;
			      T2s = T2q - T2r;
			      T2t = FNMS(KP382683432, T2s, KP923879532 * T2p);
			      T2O = FMA(KP382683432, T2p, KP923879532 * T2s);
			      T3c = T2q + T2r;
			      T3d = T2n + T2o;
			      T3e = FNMS(KP923879532, T3d, KP382683432 * T3c);
			      T3o = FMA(KP382683432, T3d, KP923879532 * T3c);
			 }
		    }
		    {
			 E Tf, Tu, T1O, T1S, T1V, T1W;
			 Tf = T7 + Te;
			 Tu = Tm + Tt;
			 T1O = Tf - Tu;
			 T1S = T1Q + T1R;
			 T1V = T1T + T1U;
			 T1W = T1S - T1V;
			 cr[0] = Tf + Tu;
			 ci[0] = T1S + T1V;
			 cr[WS(rs, 8)] = FNMS(T1P, T1W, T1N * T1O);
			 ci[WS(rs, 8)] = FMA(T1P, T1O, T1N * T1W);
		    }
		    {
			 E T3g, T3r, T3q, T3s;
			 {
			      E T38, T3f, T3m, T3p;
			      T38 = T36 - T37;
			      T3f = T3b + T3e;
			      T3g = T38 - T3f;
			      T3r = T38 + T3f;
			      T3m = T3k + T3l;
			      T3p = T3n - T3o;
			      T3q = T3m - T3p;
			      T3s = T3m + T3p;
			 }
			 cr[WS(rs, 11)] = FNMS(T3j, T3q, T35 * T3g);
			 ci[WS(rs, 11)] = FMA(T3j, T3g, T35 * T3q);
			 cr[WS(rs, 3)] = FNMS(T1n, T3s, T1l * T3r);
			 ci[WS(rs, 3)] = FMA(T1n, T3r, T1l * T3s);
		    }
		    {
			 E T3w, T3B, T3A, T3C;
			 {
			      E T3u, T3v, T3y, T3z;
			      T3u = T36 + T37;
			      T3v = T3n + T3o;
			      T3w = T3u - T3v;
			      T3B = T3u + T3v;
			      T3y = T3k - T3l;
			      T3z = T3b - T3e;
			      T3A = T3y + T3z;
			      T3C = T3y - T3z;
			 }
			 cr[WS(rs, 7)] = FNMS(T3x, T3A, T3t * T3w);
			 ci[WS(rs, 7)] = FMA(T3t, T3A, T3x * T3w);
			 cr[WS(rs, 15)] = FNMS(T1w, T3C, T1v * T3B);
			 ci[WS(rs, 15)] = FMA(T1v, T3C, T1w * T3B);
		    }
		    {
			 E T14, T1q, T1k, T1u;
			 {
			      E TK, T13, T1g, T1j;
			      TK = TC + TJ;
			      T13 = KP707106781 * (TT + T12);
			      T14 = TK - T13;
			      T1q = TK + T13;
			      T1g = T18 + T1f;
			      T1j = KP707106781 * (T1h + T1i);
			      T1k = T1g - T1j;
			      T1u = T1g + T1j;
			 }
			 cr[WS(rs, 10)] = FNMS(T17, T1k, TB * T14);
			 ci[WS(rs, 10)] = FMA(T17, T14, TB * T1k);
			 cr[WS(rs, 2)] = FNMS(T1t, T1u, T1p * T1q);
			 ci[WS(rs, 2)] = FMA(T1t, T1q, T1p * T1u);
		    }
		    {
			 E T1A, T1I, T1E, T1M;
			 {
			      E T1y, T1z, T1C, T1D;
			      T1y = TC - TJ;
			      T1z = KP707106781 * (T1i - T1h);
			      T1A = T1y - T1z;
			      T1I = T1y + T1z;
			      T1C = T1f - T18;
			      T1D = KP707106781 * (TT - T12);
			      T1E = T1C - T1D;
			      T1M = T1C + T1D;
			 }
			 cr[WS(rs, 14)] = FNMS(T1B, T1E, T1x * T1A);
			 ci[WS(rs, 14)] = FMA(T1x, T1E, T1B * T1A);
			 cr[WS(rs, 6)] = FNMS(T1L, T1M, T1H * T1I);
			 ci[WS(rs, 6)] = FMA(T1H, T1M, T1L * T1I);
		    }
		    {
			 E T2C, T2S, T2Q, T2U;
			 {
			      E T2m, T2B, T2M, T2P;
			      T2m = T2e - T2l;
			      T2B = T2t - T2A;
			      T2C = T2m - T2B;
			      T2S = T2m + T2B;
			      T2M = T2I - T2L;
			      T2P = T2N - T2O;
			      T2Q = T2M - T2P;
			      T2U = T2M + T2P;
			 }
			 cr[WS(rs, 13)] = FNMS(T2F, T2Q, T2b * T2C);
			 ci[WS(rs, 13)] = FMA(T2F, T2C, T2b * T2Q);
			 cr[WS(rs, 5)] = FNMS(T2T, T2U, T2R * T2S);
			 ci[WS(rs, 5)] = FMA(T2T, T2S, T2R * T2U);
		    }
		    {
			 E T2X, T31, T30, T32;
			 {
			      E T2V, T2W, T2Y, T2Z;
			      T2V = T2e + T2l;
			      T2W = T2N + T2O;
			      T2X = T2V - T2W;
			      T31 = T2V + T2W;
			      T2Y = T2I + T2L;
			      T2Z = T2A + T2t;
			      T30 = T2Y - T2Z;
			      T32 = T2Y + T2Z;
			 }
			 cr[WS(rs, 9)] = FNMS(Tz, T30, Tw * T2X);
			 ci[WS(rs, 9)] = FMA(Tw, T30, Tz * T2X);
			 cr[WS(rs, 1)] = FNMS(Ty, T32, Tv * T31);
			 ci[WS(rs, 1)] = FMA(Tv, T32, Ty * T31);
		    }
		    {
			 E T20, T26, T24, T28;
			 {
			      E T1Y, T1Z, T22, T23;
			      T1Y = T7 - Te;
			      T1Z = T1U - T1T;
			      T20 = T1Y - T1Z;
			      T26 = T1Y + T1Z;
			      T22 = T1Q - T1R;
			      T23 = Tm - Tt;
			      T24 = T22 - T23;
			      T28 = T23 + T22;
			 }
			 cr[WS(rs, 12)] = FNMS(T21, T24, T1X * T20);
			 ci[WS(rs, 12)] = FMA(T1X, T24, T21 * T20);
			 cr[WS(rs, 4)] = FNMS(T27, T28, T25 * T26);
			 ci[WS(rs, 4)] = FMA(T25, T28, T27 * T26);
		    }
	       }
	  }
     }
}
Ejemplo n.º 5
0
static void q1_3(float *rio, float *iio, const float *W, stride rs, stride vs, INT mb, INT me, INT ms)
{
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
     INT m;
     for (m = mb, W = W + (mb * 4); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 4, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(vs)) {
	  E T1, T4, T6, Tc, Td, Te, T9, Tf, Tl, To, Tq, Tw, Tx, Ty, Tt;
	  E Tz, TR, TS, TN, TT, TF, TI, TK, TQ;
	  {
	       E T2, T3, Tr, Ts;
	       T1 = rio[0];
	       T2 = rio[WS(rs, 1)];
	       T3 = rio[WS(rs, 2)];
	       T4 = T2 + T3;
	       T6 = FNMS(KP500000000, T4, T1);
	       Tc = KP866025403 * (T3 - T2);
	       {
		    E T7, T8, Tm, Tn;
		    Td = iio[0];
		    T7 = iio[WS(rs, 1)];
		    T8 = iio[WS(rs, 2)];
		    Te = T7 + T8;
		    T9 = KP866025403 * (T7 - T8);
		    Tf = FNMS(KP500000000, Te, Td);
		    Tl = rio[WS(vs, 1)];
		    Tm = rio[WS(vs, 1) + WS(rs, 1)];
		    Tn = rio[WS(vs, 1) + WS(rs, 2)];
		    To = Tm + Tn;
		    Tq = FNMS(KP500000000, To, Tl);
		    Tw = KP866025403 * (Tn - Tm);
	       }
	       Tx = iio[WS(vs, 1)];
	       Tr = iio[WS(vs, 1) + WS(rs, 1)];
	       Ts = iio[WS(vs, 1) + WS(rs, 2)];
	       Ty = Tr + Ts;
	       Tt = KP866025403 * (Tr - Ts);
	       Tz = FNMS(KP500000000, Ty, Tx);
	       {
		    E TL, TM, TG, TH;
		    TR = iio[WS(vs, 2)];
		    TL = iio[WS(vs, 2) + WS(rs, 1)];
		    TM = iio[WS(vs, 2) + WS(rs, 2)];
		    TS = TL + TM;
		    TN = KP866025403 * (TL - TM);
		    TT = FNMS(KP500000000, TS, TR);
		    TF = rio[WS(vs, 2)];
		    TG = rio[WS(vs, 2) + WS(rs, 1)];
		    TH = rio[WS(vs, 2) + WS(rs, 2)];
		    TI = TG + TH;
		    TK = FNMS(KP500000000, TI, TF);
		    TQ = KP866025403 * (TH - TG);
	       }
	  }
	  rio[0] = T1 + T4;
	  iio[0] = Td + Te;
	  rio[WS(rs, 1)] = Tl + To;
	  iio[WS(rs, 1)] = Tx + Ty;
	  iio[WS(rs, 2)] = TR + TS;
	  rio[WS(rs, 2)] = TF + TI;
	  {
	       E Ta, Tg, T5, Tb;
	       Ta = T6 + T9;
	       Tg = Tc + Tf;
	       T5 = W[0];
	       Tb = W[1];
	       rio[WS(vs, 1)] = FMA(T5, Ta, Tb * Tg);
	       iio[WS(vs, 1)] = FNMS(Tb, Ta, T5 * Tg);
	  }
	  {
	       E TW, TY, TV, TX;
	       TW = TK - TN;
	       TY = TT - TQ;
	       TV = W[2];
	       TX = W[3];
	       rio[WS(vs, 2) + WS(rs, 2)] = FMA(TV, TW, TX * TY);
	       iio[WS(vs, 2) + WS(rs, 2)] = FNMS(TX, TW, TV * TY);
	  }
	  {
	       E TC, TE, TB, TD;
	       TC = Tq - Tt;
	       TE = Tz - Tw;
	       TB = W[2];
	       TD = W[3];
	       rio[WS(vs, 2) + WS(rs, 1)] = FMA(TB, TC, TD * TE);
	       iio[WS(vs, 2) + WS(rs, 1)] = FNMS(TD, TC, TB * TE);
	  }
	  {
	       E Tu, TA, Tp, Tv;
	       Tu = Tq + Tt;
	       TA = Tw + Tz;
	       Tp = W[0];
	       Tv = W[1];
	       rio[WS(vs, 1) + WS(rs, 1)] = FMA(Tp, Tu, Tv * TA);
	       iio[WS(vs, 1) + WS(rs, 1)] = FNMS(Tv, Tu, Tp * TA);
	  }
	  {
	       E TO, TU, TJ, TP;
	       TO = TK + TN;
	       TU = TQ + TT;
	       TJ = W[0];
	       TP = W[1];
	       rio[WS(vs, 1) + WS(rs, 2)] = FMA(TJ, TO, TP * TU);
	       iio[WS(vs, 1) + WS(rs, 2)] = FNMS(TP, TO, TJ * TU);
	  }
	  {
	       E Ti, Tk, Th, Tj;
	       Ti = T6 - T9;
	       Tk = Tf - Tc;
	       Th = W[2];
	       Tj = W[3];
	       rio[WS(vs, 2)] = FMA(Th, Ti, Tj * Tk);
	       iio[WS(vs, 2)] = FNMS(Tj, Ti, Th * Tk);
	  }
     }
}
Ejemplo n.º 6
0
static void hc2r_15(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, INT v, INT ivs, INT ovs)
{
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
     DK(KP1_902113032, +1.902113032590307144232878666758764286811397268);
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
     DK(KP1_118033988, +1.118033988749894848204586834365638117720309180);
     DK(KP618033988, +0.618033988749894848204586834365638117720309180);
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
     DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
     INT i;
     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs, MAKE_VOLATILE_STRIDE(ris), MAKE_VOLATILE_STRIDE(iis), MAKE_VOLATILE_STRIDE(os)) {
	  E TL, Tz, TM, TK;
	  {
	       E T3, Th, Tt, TD, TI, TH, TY, TC, TZ, Tu, Tm, Tv, Tr, Te, TW;
	       E Tg, T1, T2, T12, T10, TV;
	       Tg = ii[WS(iis, 5)];
	       T1 = ri[0];
	       T2 = ri[WS(ris, 5)];
	       {
		    E T4, TA, T9, TF, T7, Tj, Tc, Tk, TG, Tq, Tf, Tl, TB;
		    T4 = ri[WS(ris, 3)];
		    TA = ii[WS(iis, 3)];
		    T9 = ri[WS(ris, 6)];
		    Tf = T1 - T2;
		    T3 = FMA(KP2_000000000, T2, T1);
		    TF = ii[WS(iis, 6)];
		    {
			 E Ta, Tb, T5, T6, To, Tp;
			 T5 = ri[WS(ris, 7)];
			 T6 = ri[WS(ris, 2)];
			 Th = FMA(KP1_732050807, Tg, Tf);
			 Tt = FNMS(KP1_732050807, Tg, Tf);
			 Ta = ri[WS(ris, 4)];
			 TD = T5 - T6;
			 T7 = T5 + T6;
			 Tb = ri[WS(ris, 1)];
			 To = ii[WS(iis, 4)];
			 Tp = ii[WS(iis, 1)];
			 Tj = ii[WS(iis, 7)];
			 Tc = Ta + Tb;
			 TI = Ta - Tb;
			 Tk = ii[WS(iis, 2)];
			 TG = Tp - To;
			 Tq = To + Tp;
		    }
		    Tl = Tj - Tk;
		    TB = Tj + Tk;
		    TH = FNMS(KP500000000, TG, TF);
		    TY = TG + TF;
		    TC = FMA(KP500000000, TB, TA);
		    TZ = TA - TB;
		    {
			 E Ti, T8, Td, Tn;
			 Ti = FNMS(KP2_000000000, T4, T7);
			 T8 = T4 + T7;
			 Td = T9 + Tc;
			 Tn = FNMS(KP2_000000000, T9, Tc);
			 Tu = FNMS(KP1_732050807, Tl, Ti);
			 Tm = FMA(KP1_732050807, Tl, Ti);
			 Tv = FNMS(KP1_732050807, Tq, Tn);
			 Tr = FMA(KP1_732050807, Tq, Tn);
			 Te = T8 + Td;
			 TW = T8 - Td;
		    }
	       }
	       T12 = FMA(KP618033988, TY, TZ);
	       T10 = FNMS(KP618033988, TZ, TY);
	       TV = FNMS(KP500000000, Te, T3);
	       O[0] = FMA(KP2_000000000, Te, T3);
	       {
		    E TJ, TE, TT, TP, TU, TS, Ty, Tw, Tx;
		    {
			 E TO, Ts, TQ, TN, TR, T11, TX;
			 TO = Tr - Tm;
			 Ts = Tm + Tr;
			 T11 = FMA(KP1_118033988, TW, TV);
			 TX = FNMS(KP1_118033988, TW, TV);
			 TQ = FNMS(KP866025403, TI, TH);
			 TJ = FMA(KP866025403, TI, TH);
			 TN = FMA(KP250000000, Ts, Th);
			 O[WS(os, 6)] = FNMS(KP1_902113032, T12, T11);
			 O[WS(os, 9)] = FMA(KP1_902113032, T12, T11);
			 O[WS(os, 12)] = FMA(KP1_902113032, T10, TX);
			 O[WS(os, 3)] = FNMS(KP1_902113032, T10, TX);
			 TR = FNMS(KP866025403, TD, TC);
			 TE = FMA(KP866025403, TD, TC);
			 O[WS(os, 5)] = Th - Ts;
			 TT = FMA(KP559016994, TO, TN);
			 TP = FNMS(KP559016994, TO, TN);
			 TU = FMA(KP618033988, TQ, TR);
			 TS = FNMS(KP618033988, TR, TQ);
		    }
		    Ty = Tv - Tu;
		    Tw = Tu + Tv;
		    O[WS(os, 14)] = FMA(KP1_902113032, TU, TT);
		    O[WS(os, 11)] = FNMS(KP1_902113032, TU, TT);
		    O[WS(os, 2)] = FMA(KP1_902113032, TS, TP);
		    O[WS(os, 8)] = FNMS(KP1_902113032, TS, TP);
		    Tx = FMA(KP250000000, Tw, Tt);
		    O[WS(os, 10)] = Tt - Tw;
		    TL = FNMS(KP559016994, Ty, Tx);
		    Tz = FMA(KP559016994, Ty, Tx);
		    TM = FNMS(KP618033988, TE, TJ);
		    TK = FMA(KP618033988, TJ, TE);
	       }
	  }
	  O[WS(os, 7)] = FMA(KP1_902113032, TM, TL);
	  O[WS(os, 13)] = FNMS(KP1_902113032, TM, TL);
	  O[WS(os, 4)] = FMA(KP1_902113032, TK, Tz);
	  O[WS(os, 1)] = FNMS(KP1_902113032, TK, Tz);
     }
}
static void r2cbIII_32(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
{
     DK(KP1_913880671, +1.913880671464417729871595773960539938965698411);
     DK(KP580569354, +0.580569354508924735272384751634790549382952557);
     DK(KP942793473, +0.942793473651995297112775251810508755314920638);
     DK(KP1_763842528, +1.763842528696710059425513727320776699016885241);
     DK(KP1_546020906, +1.546020906725473921621813219516939601942082586);
     DK(KP1_268786568, +1.268786568327290996430343226450986741351374190);
     DK(KP196034280, +0.196034280659121203988391127777283691722273346);
     DK(KP1_990369453, +1.990369453344393772489673906218959843150949737);
     DK(KP765366864, +0.765366864730179543456919968060797733522689125);
     DK(KP1_847759065, +1.847759065022573512256366378793576573644833252);
     DK(KP1_961570560, +1.961570560806460898252364472268478073947867462);
     DK(KP390180644, +0.390180644032256535696569736954044481855383236);
     DK(KP1_111140466, +1.111140466039204449485661627897065748749874382);
     DK(KP1_662939224, +1.662939224605090474157576755235811513477121624);
     DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
     {
	  INT i;
	  for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(128, rs), MAKE_VOLATILE_STRIDE(128, csr), MAKE_VOLATILE_STRIDE(128, csi)) {
	       E T7, T2i, T2F, Tz, T1k, T1I, T1Z, T1x, Te, T22, T2E, T2j, T1f, T1y, TK;
	       E T1J, Tm, T2B, TW, T1a, T1C, T1L, T28, T2l, Tt, T2A, T17, T1b, T1F, T1M;
	       E T2d, T2m;
	       {
		    E T3, Tv, T1j, T2h, T6, T1g, Ty, T2g;
		    {
			 E T1, T2, T1h, T1i;
			 T1 = Cr[0];
			 T2 = Cr[WS(csr, 15)];
			 T3 = T1 + T2;
			 Tv = T1 - T2;
			 T1h = Ci[0];
			 T1i = Ci[WS(csi, 15)];
			 T1j = T1h + T1i;
			 T2h = T1i - T1h;
		    }
		    {
			 E T4, T5, Tw, Tx;
			 T4 = Cr[WS(csr, 8)];
			 T5 = Cr[WS(csr, 7)];
			 T6 = T4 + T5;
			 T1g = T4 - T5;
			 Tw = Ci[WS(csi, 8)];
			 Tx = Ci[WS(csi, 7)];
			 Ty = Tw + Tx;
			 T2g = Tw - Tx;
		    }
		    T7 = T3 + T6;
		    T2i = T2g + T2h;
		    T2F = T2h - T2g;
		    Tz = Tv - Ty;
		    T1k = T1g + T1j;
		    T1I = T1g - T1j;
		    T1Z = T3 - T6;
		    T1x = Tv + Ty;
	       }
	       {
		    E Ta, TA, TD, T21, Td, TF, TI, T20;
		    {
			 E T8, T9, TB, TC;
			 T8 = Cr[WS(csr, 4)];
			 T9 = Cr[WS(csr, 11)];
			 Ta = T8 + T9;
			 TA = T8 - T9;
			 TB = Ci[WS(csi, 4)];
			 TC = Ci[WS(csi, 11)];
			 TD = TB + TC;
			 T21 = TB - TC;
		    }
		    {
			 E Tb, Tc, TG, TH;
			 Tb = Cr[WS(csr, 3)];
			 Tc = Cr[WS(csr, 12)];
			 Td = Tb + Tc;
			 TF = Tb - Tc;
			 TG = Ci[WS(csi, 3)];
			 TH = Ci[WS(csi, 12)];
			 TI = TG + TH;
			 T20 = TH - TG;
		    }
		    Te = Ta + Td;
		    T22 = T20 - T21;
		    T2E = T21 + T20;
		    T2j = Ta - Td;
		    {
			 E T1d, T1e, TE, TJ;
			 T1d = TA + TD;
			 T1e = TF + TI;
			 T1f = KP707106781 * (T1d - T1e);
			 T1y = KP707106781 * (T1d + T1e);
			 TE = TA - TD;
			 TJ = TF - TI;
			 TK = KP707106781 * (TE + TJ);
			 T1J = KP707106781 * (TE - TJ);
		    }
	       }
	       {
		    E Ti, TM, TU, T25, Tl, TR, TP, T26, TQ, TV;
		    {
			 E Tg, Th, TS, TT;
			 Tg = Cr[WS(csr, 2)];
			 Th = Cr[WS(csr, 13)];
			 Ti = Tg + Th;
			 TM = Tg - Th;
			 TS = Ci[WS(csi, 2)];
			 TT = Ci[WS(csi, 13)];
			 TU = TS + TT;
			 T25 = TS - TT;
		    }
		    {
			 E Tj, Tk, TN, TO;
			 Tj = Cr[WS(csr, 10)];
			 Tk = Cr[WS(csr, 5)];
			 Tl = Tj + Tk;
			 TR = Tj - Tk;
			 TN = Ci[WS(csi, 10)];
			 TO = Ci[WS(csi, 5)];
			 TP = TN + TO;
			 T26 = TN - TO;
		    }
		    Tm = Ti + Tl;
		    T2B = T26 + T25;
		    TQ = TM - TP;
		    TV = TR + TU;
		    TW = FNMS(KP382683432, TV, KP923879532 * TQ);
		    T1a = FMA(KP382683432, TQ, KP923879532 * TV);
		    {
			 E T1A, T1B, T24, T27;
			 T1A = TM + TP;
			 T1B = TU - TR;
			 T1C = FNMS(KP923879532, T1B, KP382683432 * T1A);
			 T1L = FMA(KP923879532, T1A, KP382683432 * T1B);
			 T24 = Ti - Tl;
			 T27 = T25 - T26;
			 T28 = T24 - T27;
			 T2l = T24 + T27;
		    }
	       }
	       {
		    E Tp, TX, T15, T2a, Ts, T12, T10, T2b, T11, T16;
		    {
			 E Tn, To, T13, T14;
			 Tn = Cr[WS(csr, 1)];
			 To = Cr[WS(csr, 14)];
			 Tp = Tn + To;
			 TX = Tn - To;
			 T13 = Ci[WS(csi, 1)];
			 T14 = Ci[WS(csi, 14)];
			 T15 = T13 + T14;
			 T2a = T14 - T13;
		    }
		    {
			 E Tq, Tr, TY, TZ;
			 Tq = Cr[WS(csr, 6)];
			 Tr = Cr[WS(csr, 9)];
			 Ts = Tq + Tr;
			 T12 = Tq - Tr;
			 TY = Ci[WS(csi, 6)];
			 TZ = Ci[WS(csi, 9)];
			 T10 = TY + TZ;
			 T2b = TY - TZ;
		    }
		    Tt = Tp + Ts;
		    T2A = T2b + T2a;
		    T11 = TX - T10;
		    T16 = T12 - T15;
		    T17 = FMA(KP923879532, T11, KP382683432 * T16);
		    T1b = FNMS(KP382683432, T11, KP923879532 * T16);
		    {
			 E T1D, T1E, T29, T2c;
			 T1D = TX + T10;
			 T1E = T12 + T15;
			 T1F = FNMS(KP923879532, T1E, KP382683432 * T1D);
			 T1M = FMA(KP923879532, T1D, KP382683432 * T1E);
			 T29 = Tp - Ts;
			 T2c = T2a - T2b;
			 T2d = T29 + T2c;
			 T2m = T2c - T29;
		    }
	       }
	       {
		    E Tf, Tu, T2L, T2M, T2N, T2O;
		    Tf = T7 + Te;
		    Tu = Tm + Tt;
		    T2L = Tf - Tu;
		    T2M = T2B + T2A;
		    T2N = T2F - T2E;
		    T2O = T2M + T2N;
		    R0[0] = KP2_000000000 * (Tf + Tu);
		    R0[WS(rs, 8)] = KP2_000000000 * (T2N - T2M);
		    R0[WS(rs, 4)] = KP1_414213562 * (T2L + T2O);
		    R0[WS(rs, 12)] = KP1_414213562 * (T2O - T2L);
	       }
	       {
		    E T2t, T2x, T2w, T2y;
		    {
			 E T2r, T2s, T2u, T2v;
			 T2r = T1Z - T22;
			 T2s = KP707106781 * (T2m - T2l);
			 T2t = T2r + T2s;
			 T2x = T2r - T2s;
			 T2u = T2j + T2i;
			 T2v = KP707106781 * (T28 - T2d);
			 T2w = T2u - T2v;
			 T2y = T2v + T2u;
		    }
		    R0[WS(rs, 3)] = FMA(KP1_662939224, T2t, KP1_111140466 * T2w);
		    R0[WS(rs, 15)] = FNMS(KP1_961570560, T2x, KP390180644 * T2y);
		    R0[WS(rs, 11)] = FNMS(KP1_111140466, T2t, KP1_662939224 * T2w);
		    R0[WS(rs, 7)] = FMA(KP390180644, T2x, KP1_961570560 * T2y);
	       }
	       {
		    E T2D, T2J, T2I, T2K;
		    {
			 E T2z, T2C, T2G, T2H;
			 T2z = T7 - Te;
			 T2C = T2A - T2B;
			 T2D = T2z + T2C;
			 T2J = T2z - T2C;
			 T2G = T2E + T2F;
			 T2H = Tm - Tt;
			 T2I = T2G - T2H;
			 T2K = T2H + T2G;
		    }
		    R0[WS(rs, 2)] = FMA(KP1_847759065, T2D, KP765366864 * T2I);
		    R0[WS(rs, 14)] = FNMS(KP1_847759065, T2J, KP765366864 * T2K);
		    R0[WS(rs, 10)] = FNMS(KP765366864, T2D, KP1_847759065 * T2I);
		    R0[WS(rs, 6)] = FMA(KP765366864, T2J, KP1_847759065 * T2K);
	       }
	       {
		    E T19, T1n, T1m, T1o;
		    {
			 E TL, T18, T1c, T1l;
			 TL = Tz + TK;
			 T18 = TW + T17;
			 T19 = TL + T18;
			 T1n = TL - T18;
			 T1c = T1a + T1b;
			 T1l = T1f + T1k;
			 T1m = T1c + T1l;
			 T1o = T1c - T1l;
		    }
		    R1[0] = FNMS(KP196034280, T1m, KP1_990369453 * T19);
		    R1[WS(rs, 12)] = FNMS(KP1_546020906, T1n, KP1_268786568 * T1o);
		    R1[WS(rs, 8)] = -(FMA(KP196034280, T19, KP1_990369453 * T1m));
		    R1[WS(rs, 4)] = FMA(KP1_268786568, T1n, KP1_546020906 * T1o);
	       }
	       {
		    E T1r, T1v, T1u, T1w;
		    {
			 E T1p, T1q, T1s, T1t;
			 T1p = Tz - TK;
			 T1q = T1b - T1a;
			 T1r = T1p + T1q;
			 T1v = T1p - T1q;
			 T1s = T1f - T1k;
			 T1t = TW - T17;
			 T1u = T1s - T1t;
			 T1w = T1t + T1s;
		    }
		    R1[WS(rs, 2)] = FMA(KP1_763842528, T1r, KP942793473 * T1u);
		    R1[WS(rs, 14)] = FNMS(KP1_913880671, T1v, KP580569354 * T1w);
		    R1[WS(rs, 10)] = FNMS(KP942793473, T1r, KP1_763842528 * T1u);
		    R1[WS(rs, 6)] = FMA(KP580569354, T1v, KP1_913880671 * T1w);
	       }
	       {
		    E T1T, T1X, T1W, T1Y;
		    {
			 E T1R, T1S, T1U, T1V;
			 T1R = T1x + T1y;
			 T1S = T1L + T1M;
			 T1T = T1R - T1S;
			 T1X = T1R + T1S;
			 T1U = T1J + T1I;
			 T1V = T1C - T1F;
			 T1W = T1U - T1V;
			 T1Y = T1V + T1U;
		    }
		    R1[WS(rs, 3)] = FMA(KP1_546020906, T1T, KP1_268786568 * T1W);
		    R1[WS(rs, 15)] = FNMS(KP1_990369453, T1X, KP196034280 * T1Y);
		    R1[WS(rs, 11)] = FNMS(KP1_268786568, T1T, KP1_546020906 * T1W);
		    R1[WS(rs, 7)] = FMA(KP196034280, T1X, KP1_990369453 * T1Y);
	       }
	       {
		    E T2f, T2p, T2o, T2q;
		    {
			 E T23, T2e, T2k, T2n;
			 T23 = T1Z + T22;
			 T2e = KP707106781 * (T28 + T2d);
			 T2f = T23 + T2e;
			 T2p = T23 - T2e;
			 T2k = T2i - T2j;
			 T2n = KP707106781 * (T2l + T2m);
			 T2o = T2k - T2n;
			 T2q = T2n + T2k;
		    }
		    R0[WS(rs, 1)] = FMA(KP1_961570560, T2f, KP390180644 * T2o);
		    R0[WS(rs, 13)] = FNMS(KP1_662939224, T2p, KP1_111140466 * T2q);
		    R0[WS(rs, 9)] = FNMS(KP390180644, T2f, KP1_961570560 * T2o);
		    R0[WS(rs, 5)] = FMA(KP1_111140466, T2p, KP1_662939224 * T2q);
	       }
	       {
		    E T1H, T1P, T1O, T1Q;
		    {
			 E T1z, T1G, T1K, T1N;
			 T1z = T1x - T1y;
			 T1G = T1C + T1F;
			 T1H = T1z + T1G;
			 T1P = T1z - T1G;
			 T1K = T1I - T1J;
			 T1N = T1L - T1M;
			 T1O = T1K - T1N;
			 T1Q = T1N + T1K;
		    }
		    R1[WS(rs, 1)] = FMA(KP1_913880671, T1H, KP580569354 * T1O);
		    R1[WS(rs, 13)] = FNMS(KP1_763842528, T1P, KP942793473 * T1Q);
		    R1[WS(rs, 9)] = FNMS(KP580569354, T1H, KP1_913880671 * T1O);
		    R1[WS(rs, 5)] = FMA(KP942793473, T1P, KP1_763842528 * T1Q);
	       }
	  }
     }
}
Ejemplo n.º 8
0
static void r2cb_20(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
{
     DK(KP1_118033988, +1.118033988749894848204586834365638117720309180);
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
     DK(KP1_902113032, +1.902113032590307144232878666758764286811397268);
     DK(KP1_175570504, +1.175570504584946258337411909278145537195304875);
     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
     INT i;
     for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) {
	  E T6, TF, Tm, Tt, TQ, T1n, T1f, T12, T1m, TV, T13, T1c, Td, Tk, Tl;
	  E Ty, TD, TE, Tn, To, Tp, TG, TH, TI;
	  {
	       E T5, Ts, T3, Tq;
	       {
		    E T4, Tr, T1, T2;
		    T4 = Cr[WS(csr, 5)];
		    T5 = KP2_000000000 * T4;
		    Tr = Ci[WS(csi, 5)];
		    Ts = KP2_000000000 * Tr;
		    T1 = Cr[0];
		    T2 = Cr[WS(csr, 10)];
		    T3 = T1 + T2;
		    Tq = T1 - T2;
	       }
	       T6 = T3 - T5;
	       TF = Tq - Ts;
	       Tm = T3 + T5;
	       Tt = Tq + Ts;
	  }
	  {
	       E T9, Tu, TO, T1b, Tc, T1a, Tx, TP, Tg, Tz, TT, T1e, Tj, T1d, TC;
	       E TU;
	       {
		    E T7, T8, TM, TN;
		    T7 = Cr[WS(csr, 4)];
		    T8 = Cr[WS(csr, 6)];
		    T9 = T7 + T8;
		    Tu = T7 - T8;
		    TM = Ci[WS(csi, 4)];
		    TN = Ci[WS(csi, 6)];
		    TO = TM - TN;
		    T1b = TM + TN;
	       }
	       {
		    E Ta, Tb, Tv, Tw;
		    Ta = Cr[WS(csr, 9)];
		    Tb = Cr[WS(csr, 1)];
		    Tc = Ta + Tb;
		    T1a = Ta - Tb;
		    Tv = Ci[WS(csi, 9)];
		    Tw = Ci[WS(csi, 1)];
		    Tx = Tv + Tw;
		    TP = Tv - Tw;
	       }
	       {
		    E Te, Tf, TR, TS;
		    Te = Cr[WS(csr, 8)];
		    Tf = Cr[WS(csr, 2)];
		    Tg = Te + Tf;
		    Tz = Te - Tf;
		    TR = Ci[WS(csi, 8)];
		    TS = Ci[WS(csi, 2)];
		    TT = TR - TS;
		    T1e = TR + TS;
	       }
	       {
		    E Th, Ti, TA, TB;
		    Th = Cr[WS(csr, 7)];
		    Ti = Cr[WS(csr, 3)];
		    Tj = Th + Ti;
		    T1d = Th - Ti;
		    TA = Ci[WS(csi, 7)];
		    TB = Ci[WS(csi, 3)];
		    TC = TA + TB;
		    TU = TB - TA;
	       }
	       TQ = TO - TP;
	       T1n = T1e - T1d;
	       T1f = T1d + T1e;
	       T12 = TP + TO;
	       T1m = T1b - T1a;
	       TV = TT - TU;
	       T13 = TU + TT;
	       T1c = T1a + T1b;
	       Td = T9 - Tc;
	       Tk = Tg - Tj;
	       Tl = Td + Tk;
	       Ty = Tu + Tx;
	       TD = Tz - TC;
	       TE = Ty + TD;
	       Tn = T9 + Tc;
	       To = Tg + Tj;
	       Tp = Tn + To;
	       TG = Tu - Tx;
	       TH = Tz + TC;
	       TI = TG + TH;
	  }
	  R0[WS(rs, 5)] = FMA(KP2_000000000, Tl, T6);
	  R1[WS(rs, 7)] = FMA(KP2_000000000, TE, Tt);
	  R1[WS(rs, 2)] = FMA(KP2_000000000, TI, TF);
	  R0[0] = FMA(KP2_000000000, Tp, Tm);
	  {
	       E TW, TY, TL, TX, TJ, TK;
	       TW = FNMS(KP1_902113032, TV, KP1_175570504 * TQ);
	       TY = FMA(KP1_902113032, TQ, KP1_175570504 * TV);
	       TJ = FNMS(KP500000000, Tl, T6);
	       TK = KP1_118033988 * (Td - Tk);
	       TL = TJ - TK;
	       TX = TK + TJ;
	       R0[WS(rs, 1)] = TL - TW;
	       R0[WS(rs, 7)] = TX + TY;
	       R0[WS(rs, 9)] = TL + TW;
	       R0[WS(rs, 3)] = TX - TY;
	  }
	  {
	       E T1g, T1i, T19, T1h, T17, T18;
	       T1g = FNMS(KP1_902113032, T1f, KP1_175570504 * T1c);
	       T1i = FMA(KP1_902113032, T1c, KP1_175570504 * T1f);
	       T17 = FNMS(KP500000000, TI, TF);
	       T18 = KP1_118033988 * (TG - TH);
	       T19 = T17 - T18;
	       T1h = T18 + T17;
	       R1[WS(rs, 8)] = T19 - T1g;
	       R1[WS(rs, 4)] = T1h + T1i;
	       R1[WS(rs, 6)] = T19 + T1g;
	       R1[0] = T1h - T1i;
	  }
	  {
	       E T1o, T1q, T1l, T1p, T1j, T1k;
	       T1o = FNMS(KP1_902113032, T1n, KP1_175570504 * T1m);
	       T1q = FMA(KP1_902113032, T1m, KP1_175570504 * T1n);
	       T1j = FNMS(KP500000000, TE, Tt);
	       T1k = KP1_118033988 * (Ty - TD);
	       T1l = T1j - T1k;
	       T1p = T1k + T1j;
	       R1[WS(rs, 3)] = T1l - T1o;
	       R1[WS(rs, 9)] = T1p + T1q;
	       R1[WS(rs, 1)] = T1l + T1o;
	       R1[WS(rs, 5)] = T1p - T1q;
	  }
	  {
	       E T14, T16, T11, T15, TZ, T10;
	       T14 = FNMS(KP1_902113032, T13, KP1_175570504 * T12);
	       T16 = FMA(KP1_902113032, T12, KP1_175570504 * T13);
	       TZ = FNMS(KP500000000, Tp, Tm);
	       T10 = KP1_118033988 * (Tn - To);
	       T11 = TZ - T10;
	       T15 = T10 + TZ;
	       R0[WS(rs, 6)] = T11 - T14;
	       R0[WS(rs, 2)] = T15 + T16;
	       R0[WS(rs, 4)] = T11 + T14;
	       R0[WS(rs, 8)] = T15 - T16;
	  }
     }
}
Ejemplo n.º 9
0
static void r2cb_20(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
{
     DK(KP1_902113032, +1.902113032590307144232878666758764286811397268);
     DK(KP1_118033988, +1.118033988749894848204586834365638117720309180);
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
     DK(KP618033988, +0.618033988749894848204586834365638117720309180);
     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
     INT i;
     for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) {
	  E TY, T1o, T1m, T14, T12, TX, T1n, T1j, TZ, T13;
	  {
	       E Tr, TD, Tl, T5, T1a, T1l, T1d, T1k, TT, T10, TO, T11, TE, TF, Tk;
	       E TI, TC, T1i, To, TG, T16;
	       {
		    E T4, Tq, T1, T2;
		    T4 = Cr[WS(csr, 5)];
		    Tq = Ci[WS(csi, 5)];
		    T1 = Cr[0];
		    T2 = Cr[WS(csr, 10)];
		    {
			 E Ts, T8, T19, TR, T18, Tb, TS, Tv, Tx, Tf, Ty, T1c, TM, T1b, Ti;
			 E Tz, Tt, Tu, TN, TA;
			 {
			      E TP, TQ, T9, Ta;
			      {
				   E T6, T7, Tp, T3;
				   T6 = Cr[WS(csr, 4)];
				   T7 = Cr[WS(csr, 6)];
				   TP = Ci[WS(csi, 4)];
				   Tp = T1 - T2;
				   T3 = T1 + T2;
				   Ts = T6 - T7;
				   T8 = T6 + T7;
				   Tr = FMA(KP2_000000000, Tq, Tp);
				   TD = FNMS(KP2_000000000, Tq, Tp);
				   Tl = FMA(KP2_000000000, T4, T3);
				   T5 = FNMS(KP2_000000000, T4, T3);
				   TQ = Ci[WS(csi, 6)];
			      }
			      T9 = Cr[WS(csr, 9)];
			      Ta = Cr[WS(csr, 1)];
			      Tt = Ci[WS(csi, 9)];
			      T19 = TP + TQ;
			      TR = TP - TQ;
			      T18 = T9 - Ta;
			      Tb = T9 + Ta;
			      Tu = Ci[WS(csi, 1)];
			 }
			 {
			      E TK, TL, Td, Te, Tg, Th;
			      Td = Cr[WS(csr, 8)];
			      Te = Cr[WS(csr, 2)];
			      TK = Ci[WS(csi, 8)];
			      TS = Tt - Tu;
			      Tv = Tt + Tu;
			      Tx = Td - Te;
			      Tf = Td + Te;
			      TL = Ci[WS(csi, 2)];
			      Tg = Cr[WS(csr, 7)];
			      Th = Cr[WS(csr, 3)];
			      Ty = Ci[WS(csi, 7)];
			      T1c = TK + TL;
			      TM = TK - TL;
			      T1b = Tg - Th;
			      Ti = Tg + Th;
			      Tz = Ci[WS(csi, 3)];
			 }
			 T1a = T18 + T19;
			 T1l = T19 - T18;
			 T1d = T1b + T1c;
			 T1k = T1c - T1b;
			 TT = TR - TS;
			 T10 = TS + TR;
			 TN = Tz - Ty;
			 TA = Ty + Tz;
			 TO = TM - TN;
			 T11 = TN + TM;
			 {
			      E Tm, Tc, Tj, Tn, Tw, TB;
			      Tm = T8 + Tb;
			      Tc = T8 - Tb;
			      Tj = Tf - Ti;
			      Tn = Tf + Ti;
			      TE = Ts - Tv;
			      Tw = Ts + Tv;
			      TB = Tx - TA;
			      TF = Tx + TA;
			      Tk = Tc + Tj;
			      TI = Tc - Tj;
			      TC = Tw + TB;
			      T1i = Tw - TB;
			      TY = Tm - Tn;
			      To = Tm + Tn;
			 }
		    }
	       }
	       R0[WS(rs, 5)] = FMA(KP2_000000000, Tk, T5);
	       R1[WS(rs, 7)] = FMA(KP2_000000000, TC, Tr);
	       TG = TE + TF;
	       T16 = TE - TF;
	       R0[0] = FMA(KP2_000000000, To, Tl);
	       {
		    E TU, TW, T1g, T1e, T15, TV, TJ, TH, T1h, T1f, T17;
		    TU = FNMS(KP618033988, TT, TO);
		    TW = FMA(KP618033988, TO, TT);
		    R1[WS(rs, 2)] = FMA(KP2_000000000, TG, TD);
		    TH = FNMS(KP500000000, Tk, T5);
		    T1g = FNMS(KP618033988, T1a, T1d);
		    T1e = FMA(KP618033988, T1d, T1a);
		    T15 = FNMS(KP500000000, TG, TD);
		    TV = FMA(KP1_118033988, TI, TH);
		    TJ = FNMS(KP1_118033988, TI, TH);
		    T1o = FMA(KP618033988, T1k, T1l);
		    T1m = FNMS(KP618033988, T1l, T1k);
		    R0[WS(rs, 3)] = FNMS(KP1_902113032, TW, TV);
		    R0[WS(rs, 7)] = FMA(KP1_902113032, TW, TV);
		    R0[WS(rs, 1)] = FMA(KP1_902113032, TU, TJ);
		    R0[WS(rs, 9)] = FNMS(KP1_902113032, TU, TJ);
		    T1f = FNMS(KP1_118033988, T16, T15);
		    T17 = FMA(KP1_118033988, T16, T15);
		    T1h = FNMS(KP500000000, TC, Tr);
		    R1[WS(rs, 6)] = FNMS(KP1_902113032, T1g, T1f);
		    R1[WS(rs, 8)] = FMA(KP1_902113032, T1g, T1f);
		    R1[WS(rs, 4)] = FMA(KP1_902113032, T1e, T17);
		    R1[0] = FNMS(KP1_902113032, T1e, T17);
		    T14 = FNMS(KP618033988, T10, T11);
		    T12 = FMA(KP618033988, T11, T10);
		    TX = FNMS(KP500000000, To, Tl);
		    T1n = FMA(KP1_118033988, T1i, T1h);
		    T1j = FNMS(KP1_118033988, T1i, T1h);
	       }
	  }
	  R1[WS(rs, 5)] = FNMS(KP1_902113032, T1o, T1n);
	  R1[WS(rs, 9)] = FMA(KP1_902113032, T1o, T1n);
	  R1[WS(rs, 3)] = FMA(KP1_902113032, T1m, T1j);
	  R1[WS(rs, 1)] = FNMS(KP1_902113032, T1m, T1j);
	  TZ = FMA(KP1_118033988, TY, TX);
	  T13 = FNMS(KP1_118033988, TY, TX);
	  R0[WS(rs, 4)] = FNMS(KP1_902113032, T14, T13);
	  R0[WS(rs, 6)] = FMA(KP1_902113032, T14, T13);
	  R0[WS(rs, 2)] = FMA(KP1_902113032, T12, TZ);
	  R0[WS(rs, 8)] = FNMS(KP1_902113032, T12, TZ);
     }
}
Ejemplo n.º 10
0
static void hc2cbdft_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
{
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
     INT m;
     for (m = mb, W = W + ((mb - 1) * 14); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 14, MAKE_VOLATILE_STRIDE(rs)) {
	  E T7, T1d, T1h, Tl, TG, T14, T19, TO, Te, TL, T18, T15, TB, T1e, Tw;
	  E T1i;
	  {
	       E T3, TC, Tk, TM, T6, Th, TF, TN;
	       {
		    E T1, T2, Ti, Tj;
		    T1 = Rp[0];
		    T2 = Rm[WS(rs, 3)];
		    T3 = T1 + T2;
		    TC = T1 - T2;
		    Ti = Ip[0];
		    Tj = Im[WS(rs, 3)];
		    Tk = Ti + Tj;
		    TM = Ti - Tj;
	       }
	       {
		    E T4, T5, TD, TE;
		    T4 = Rp[WS(rs, 2)];
		    T5 = Rm[WS(rs, 1)];
		    T6 = T4 + T5;
		    Th = T4 - T5;
		    TD = Ip[WS(rs, 2)];
		    TE = Im[WS(rs, 1)];
		    TF = TD + TE;
		    TN = TD - TE;
	       }
	       T7 = T3 + T6;
	       T1d = Tk - Th;
	       T1h = TC + TF;
	       Tl = Th + Tk;
	       TG = TC - TF;
	       T14 = T3 - T6;
	       T19 = TM - TN;
	       TO = TM + TN;
	  }
	  {
	       E Ta, Tm, Tp, TJ, Td, Tr, Tu, TK;
	       {
		    E T8, T9, Tn, To;
		    T8 = Rp[WS(rs, 1)];
		    T9 = Rm[WS(rs, 2)];
		    Ta = T8 + T9;
		    Tm = T8 - T9;
		    Tn = Ip[WS(rs, 1)];
		    To = Im[WS(rs, 2)];
		    Tp = Tn + To;
		    TJ = Tn - To;
	       }
	       {
		    E Tb, Tc, Ts, Tt;
		    Tb = Rm[0];
		    Tc = Rp[WS(rs, 3)];
		    Td = Tb + Tc;
		    Tr = Tb - Tc;
		    Ts = Im[0];
		    Tt = Ip[WS(rs, 3)];
		    Tu = Ts + Tt;
		    TK = Tt - Ts;
	       }
	       Te = Ta + Td;
	       TL = TJ + TK;
	       T18 = Ta - Td;
	       T15 = TK - TJ;
	       {
		    E Tz, TA, Tq, Tv;
		    Tz = Tm - Tp;
		    TA = Tr - Tu;
		    TB = KP707106781 * (Tz + TA);
		    T1e = KP707106781 * (Tz - TA);
		    Tq = Tm + Tp;
		    Tv = Tr + Tu;
		    Tw = KP707106781 * (Tq - Tv);
		    T1i = KP707106781 * (Tq + Tv);
	       }
	  }
	  {
	       E Tf, TP, TI, TQ;
	       Tf = T7 + Te;
	       TP = TL + TO;
	       {
		    E Tx, TH, Tg, Ty;
		    Tx = Tl + Tw;
		    TH = TB + TG;
		    Tg = W[0];
		    Ty = W[1];
		    TI = FMA(Tg, Tx, Ty * TH);
		    TQ = FNMS(Ty, Tx, Tg * TH);
	       }
	       Rp[0] = Tf - TI;
	       Ip[0] = TP + TQ;
	       Rm[0] = Tf + TI;
	       Im[0] = TQ - TP;
	  }
	  {
	       E T1r, T1x, T1w, T1y;
	       {
		    E T1o, T1q, T1n, T1p;
		    T1o = T14 - T15;
		    T1q = T19 - T18;
		    T1n = W[10];
		    T1p = W[11];
		    T1r = FNMS(T1p, T1q, T1n * T1o);
		    T1x = FMA(T1p, T1o, T1n * T1q);
	       }
	       {
		    E T1t, T1v, T1s, T1u;
		    T1t = T1d - T1e;
		    T1v = T1i + T1h;
		    T1s = W[12];
		    T1u = W[13];
		    T1w = FMA(T1s, T1t, T1u * T1v);
		    T1y = FNMS(T1u, T1t, T1s * T1v);
	       }
	       Rp[WS(rs, 3)] = T1r - T1w;
	       Ip[WS(rs, 3)] = T1x + T1y;
	       Rm[WS(rs, 3)] = T1r + T1w;
	       Im[WS(rs, 3)] = T1y - T1x;
	  }
	  {
	       E TV, T11, T10, T12;
	       {
		    E TS, TU, TR, TT;
		    TS = T7 - Te;
		    TU = TO - TL;
		    TR = W[6];
		    TT = W[7];
		    TV = FNMS(TT, TU, TR * TS);
		    T11 = FMA(TT, TS, TR * TU);
	       }
	       {
		    E TX, TZ, TW, TY;
		    TX = Tl - Tw;
		    TZ = TG - TB;
		    TW = W[8];
		    TY = W[9];
		    T10 = FMA(TW, TX, TY * TZ);
		    T12 = FNMS(TY, TX, TW * TZ);
	       }
	       Rp[WS(rs, 2)] = TV - T10;
	       Ip[WS(rs, 2)] = T11 + T12;
	       Rm[WS(rs, 2)] = TV + T10;
	       Im[WS(rs, 2)] = T12 - T11;
	  }
	  {
	       E T1b, T1l, T1k, T1m;
	       {
		    E T16, T1a, T13, T17;
		    T16 = T14 + T15;
		    T1a = T18 + T19;
		    T13 = W[2];
		    T17 = W[3];
		    T1b = FNMS(T17, T1a, T13 * T16);
		    T1l = FMA(T17, T16, T13 * T1a);
	       }
	       {
		    E T1f, T1j, T1c, T1g;
		    T1f = T1d + T1e;
		    T1j = T1h - T1i;
		    T1c = W[4];
		    T1g = W[5];
		    T1k = FMA(T1c, T1f, T1g * T1j);
		    T1m = FNMS(T1g, T1f, T1c * T1j);
	       }
	       Rp[WS(rs, 1)] = T1b - T1k;
	       Ip[WS(rs, 1)] = T1l + T1m;
	       Rm[WS(rs, 1)] = T1b + T1k;
	       Im[WS(rs, 1)] = T1m - T1l;
	  }
     }
}
Ejemplo n.º 11
0
static void hc2cbdft_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
{
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
     INT m;
     for (m = mb, W = W + ((mb - 1) * 14); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 14, MAKE_VOLATILE_STRIDE(rs)) {
	  E T1m, T1r, T1i, T1u, T1o, T1v, T1n, T1w, T1s;
	  {
	       E T1k, Tl, T1p, TE, TP, T1g, TM, T1b, T1f, T1a, TU, Tf, T1l, TH, Tw;
	       E T1q;
	       {
		    E TA, T3, TN, Tk, Th, T6, TO, TD, Tb, Tm, Ta, TK, Tp, Tc, Ts;
		    E Tt;
		    {
			 E T4, T5, TB, TC;
			 {
			      E T1, T2, Ti, Tj;
			      T1 = Rp[0];
			      T2 = Rm[WS(rs, 3)];
			      Ti = Ip[0];
			      Tj = Im[WS(rs, 3)];
			      T4 = Rp[WS(rs, 2)];
			      TA = T1 - T2;
			      T3 = T1 + T2;
			      TN = Ti - Tj;
			      Tk = Ti + Tj;
			      T5 = Rm[WS(rs, 1)];
			      TB = Ip[WS(rs, 2)];
			      TC = Im[WS(rs, 1)];
			 }
			 {
			      E T8, T9, Tn, To;
			      T8 = Rp[WS(rs, 1)];
			      Th = T4 - T5;
			      T6 = T4 + T5;
			      TO = TB - TC;
			      TD = TB + TC;
			      T9 = Rm[WS(rs, 2)];
			      Tn = Ip[WS(rs, 1)];
			      To = Im[WS(rs, 2)];
			      Tb = Rm[0];
			      Tm = T8 - T9;
			      Ta = T8 + T9;
			      TK = Tn - To;
			      Tp = Tn + To;
			      Tc = Rp[WS(rs, 3)];
			      Ts = Im[0];
			      Tt = Ip[WS(rs, 3)];
			 }
		    }
		    {
			 E Tr, Td, Tu, TL, Te, T7;
			 T1k = Tk - Th;
			 Tl = Th + Tk;
			 Tr = Tb - Tc;
			 Td = Tb + Tc;
			 TL = Tt - Ts;
			 Tu = Ts + Tt;
			 T1p = TA + TD;
			 TE = TA - TD;
			 TP = TN + TO;
			 T1g = TN - TO;
			 TM = TK + TL;
			 T1b = TL - TK;
			 T1f = Ta - Td;
			 Te = Ta + Td;
			 T1a = T3 - T6;
			 T7 = T3 + T6;
			 {
			      E Tq, TF, TG, Tv;
			      Tq = Tm + Tp;
			      TF = Tm - Tp;
			      TG = Tr - Tu;
			      Tv = Tr + Tu;
			      TU = T7 - Te;
			      Tf = T7 + Te;
			      T1l = TF - TG;
			      TH = TF + TG;
			      Tw = Tq - Tv;
			      T1q = Tq + Tv;
			 }
		    }
	       }
	       {
		    E TX, T10, T1c, T13, T1h, T1E, T1H, T1C, T1K, T1G, T1L, T1F;
		    {
			 E TQ, Tx, T1y, TI, Tg, Tz;
			 TX = TP - TM;
			 TQ = TM + TP;
			 Tx = FMA(KP707106781, Tw, Tl);
			 T10 = FNMS(KP707106781, Tw, Tl);
			 T1c = T1a + T1b;
			 T1y = T1a - T1b;
			 T13 = FNMS(KP707106781, TH, TE);
			 TI = FMA(KP707106781, TH, TE);
			 Tg = W[0];
			 Tz = W[1];
			 {
			      E T1B, T1A, T1x, T1J, T1z, T1D;
			      {
				   E TR, Ty, TS, TJ;
				   T1B = T1g - T1f;
				   T1h = T1f + T1g;
				   T1A = W[11];
				   TR = Tg * TI;
				   Ty = Tg * Tx;
				   T1x = W[10];
				   T1J = T1A * T1y;
				   TS = FNMS(Tz, Tx, TR);
				   TJ = FMA(Tz, TI, Ty);
				   T1z = T1x * T1y;
				   T1m = FMA(KP707106781, T1l, T1k);
				   T1E = FNMS(KP707106781, T1l, T1k);
				   Im[0] = TS - TQ;
				   Ip[0] = TQ + TS;
				   Rm[0] = Tf + TJ;
				   Rp[0] = Tf - TJ;
				   T1H = FMA(KP707106781, T1q, T1p);
				   T1r = FNMS(KP707106781, T1q, T1p);
				   T1D = W[12];
			      }
			      T1C = FNMS(T1A, T1B, T1z);
			      T1K = FMA(T1x, T1B, T1J);
			      T1G = W[13];
			      T1L = T1D * T1H;
			      T1F = T1D * T1E;
			 }
		    }
		    {
			 E TY, T16, T12, T17, T11;
			 {
			      E TW, TT, T15, TV, TZ, T1M, T1I;
			      TW = W[7];
			      T1M = FNMS(T1G, T1E, T1L);
			      T1I = FMA(T1G, T1H, T1F);
			      TT = W[6];
			      T15 = TW * TU;
			      Im[WS(rs, 3)] = T1M - T1K;
			      Ip[WS(rs, 3)] = T1K + T1M;
			      Rm[WS(rs, 3)] = T1C + T1I;
			      Rp[WS(rs, 3)] = T1C - T1I;
			      TV = TT * TU;
			      TZ = W[8];
			      TY = FNMS(TW, TX, TV);
			      T16 = FMA(TT, TX, T15);
			      T12 = W[9];
			      T17 = TZ * T13;
			      T11 = TZ * T10;
			 }
			 {
			      E T1e, T19, T1t, T1d, T1j, T18, T14;
			      T1e = W[3];
			      T18 = FNMS(T12, T10, T17);
			      T14 = FMA(T12, T13, T11);
			      T19 = W[2];
			      T1t = T1e * T1c;
			      Im[WS(rs, 2)] = T18 - T16;
			      Ip[WS(rs, 2)] = T16 + T18;
			      Rm[WS(rs, 2)] = TY + T14;
			      Rp[WS(rs, 2)] = TY - T14;
			      T1d = T19 * T1c;
			      T1j = W[4];
			      T1i = FNMS(T1e, T1h, T1d);
			      T1u = FMA(T19, T1h, T1t);
			      T1o = W[5];
			      T1v = T1j * T1r;
			      T1n = T1j * T1m;
			 }
		    }
	       }
	  }
	  T1w = FNMS(T1o, T1m, T1v);
	  T1s = FMA(T1o, T1r, T1n);
	  Im[WS(rs, 1)] = T1w - T1u;
	  Ip[WS(rs, 1)] = T1u + T1w;
	  Rm[WS(rs, 1)] = T1i + T1s;
	  Rp[WS(rs, 1)] = T1i - T1s;
     }
}
Ejemplo n.º 12
0
Archivo: t2_8.c Proyecto: Aegisub/fftw3
static void t2_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
     {
	  INT m;
	  for (m = mb, W = W + (mb * 6); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 6, MAKE_VOLATILE_STRIDE(16, rs)) {
	       E TS, T1m, TJ, T1l, T1k, Tw, T1w, T1u;
	       {
		    E T2, T3, Tl, Tn, T5, T4, Tm, Tr, T6;
		    T2 = W[0];
		    T3 = W[2];
		    Tl = W[4];
		    Tn = W[5];
		    T5 = W[1];
		    T4 = T2 * T3;
		    Tm = T2 * Tl;
		    Tr = T2 * Tn;
		    T6 = W[3];
		    {
			 E T1, T1s, TG, Td, T1r, Tu, TY, Tk, TW, T18, T1d, TD, TH, TA, T13;
			 E TE, T14;
			 {
			      E To, Ts, Tf, T7, T8, Ti, Tb, T9, Tc, TC, Ta, TF, TB, Tg, Th;
			      E Tj;
			      T1 = ri[0];
			      To = FMA(T5, Tn, Tm);
			      Ts = FNMS(T5, Tl, Tr);
			      Tf = FMA(T5, T6, T4);
			      T7 = FNMS(T5, T6, T4);
			      Ta = T2 * T6;
			      T1s = ii[0];
			      T8 = ri[WS(rs, 4)];
			      TF = Tf * Tn;
			      TB = Tf * Tl;
			      Ti = FNMS(T5, T3, Ta);
			      Tb = FMA(T5, T3, Ta);
			      T9 = T7 * T8;
			      Tc = ii[WS(rs, 4)];
			      TG = FNMS(Ti, Tl, TF);
			      TC = FMA(Ti, Tn, TB);
			      {
				   E Tp, T1q, Tt, Tq, TX;
				   Tp = ri[WS(rs, 6)];
				   Td = FMA(Tb, Tc, T9);
				   T1q = T7 * Tc;
				   Tt = ii[WS(rs, 6)];
				   Tq = To * Tp;
				   Tg = ri[WS(rs, 2)];
				   T1r = FNMS(Tb, T8, T1q);
				   TX = To * Tt;
				   Tu = FMA(Ts, Tt, Tq);
				   Th = Tf * Tg;
				   Tj = ii[WS(rs, 2)];
				   TY = FNMS(Ts, Tp, TX);
			      }
			      {
				   E TO, TQ, TN, TP, T1a, T1b;
				   {
					E TK, TM, TL, T19, TV;
					TK = ri[WS(rs, 7)];
					TM = ii[WS(rs, 7)];
					Tk = FMA(Ti, Tj, Th);
					TV = Tf * Tj;
					TL = Tl * TK;
					T19 = Tl * TM;
					TO = ri[WS(rs, 3)];
					TW = FNMS(Ti, Tg, TV);
					TQ = ii[WS(rs, 3)];
					TN = FMA(Tn, TM, TL);
					TP = T3 * TO;
					T1a = FNMS(Tn, TK, T19);
					T1b = T3 * TQ;
				   }
				   {
					E Tx, Tz, Ty, T12, T1c, TR;
					Tx = ri[WS(rs, 1)];
					TR = FMA(T6, TQ, TP);
					Tz = ii[WS(rs, 1)];
					T1c = FNMS(T6, TO, T1b);
					Ty = T2 * Tx;
					T18 = TN - TR;
					TS = TN + TR;
					T12 = T2 * Tz;
					T1d = T1a - T1c;
					T1m = T1a + T1c;
					TD = ri[WS(rs, 5)];
					TH = ii[WS(rs, 5)];
					TA = FMA(T5, Tz, Ty);
					T13 = FNMS(T5, Tx, T12);
					TE = TC * TD;
					T14 = TC * TH;
				   }
			      }
			 }
			 {
			      E Te, T1p, T1t, Tv;
			      {
				   E T1g, T10, T1z, T1B, T1A, T1j, T1C, T1f;
				   {
					E T1x, T11, T16, T1y;
					{
					     E TU, TZ, TI, T15;
					     Te = T1 + Td;
					     TU = T1 - Td;
					     TZ = TW - TY;
					     T1p = TW + TY;
					     TI = FMA(TG, TH, TE);
					     T15 = FNMS(TG, TD, T14);
					     T1t = T1r + T1s;
					     T1x = T1s - T1r;
					     T1g = TU - TZ;
					     T10 = TU + TZ;
					     T11 = TA - TI;
					     TJ = TA + TI;
					     T1l = T13 + T15;
					     T16 = T13 - T15;
					     T1y = Tk - Tu;
					     Tv = Tk + Tu;
					}
					{
					     E T1i, T1e, T17, T1h;
					     T1i = T18 + T1d;
					     T1e = T18 - T1d;
					     T17 = T11 + T16;
					     T1h = T16 - T11;
					     T1z = T1x - T1y;
					     T1B = T1y + T1x;
					     T1A = T1h + T1i;
					     T1j = T1h - T1i;
					     T1C = T1e - T17;
					     T1f = T17 + T1e;
					}
				   }
				   ri[WS(rs, 7)] = FNMS(KP707106781, T1j, T1g);
				   ii[WS(rs, 7)] = FNMS(KP707106781, T1C, T1B);
				   ri[WS(rs, 1)] = FMA(KP707106781, T1f, T10);
				   ri[WS(rs, 5)] = FNMS(KP707106781, T1f, T10);
				   ii[WS(rs, 1)] = FMA(KP707106781, T1A, T1z);
				   ii[WS(rs, 5)] = FNMS(KP707106781, T1A, T1z);
				   ri[WS(rs, 3)] = FMA(KP707106781, T1j, T1g);
				   ii[WS(rs, 3)] = FMA(KP707106781, T1C, T1B);
			      }
			      T1k = Te - Tv;
			      Tw = Te + Tv;
			      T1w = T1t - T1p;
			      T1u = T1p + T1t;
			 }
		    }
	       }
	       {
		    E TT, T1v, T1n, T1o;
		    TT = TJ + TS;
		    T1v = TS - TJ;
		    T1n = T1l - T1m;
		    T1o = T1l + T1m;
		    ii[WS(rs, 2)] = T1v + T1w;
		    ii[WS(rs, 6)] = T1w - T1v;
		    ri[0] = Tw + TT;
		    ri[WS(rs, 4)] = Tw - TT;
		    ii[0] = T1o + T1u;
		    ii[WS(rs, 4)] = T1u - T1o;
		    ri[WS(rs, 2)] = T1k + T1n;
		    ri[WS(rs, 6)] = T1k - T1n;
	       }
	  }
     }
}
Ejemplo n.º 13
0
Archivo: t2_8.c Proyecto: Aegisub/fftw3
static void t2_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
     {
	  INT m;
	  for (m = mb, W = W + (mb * 6); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 6, MAKE_VOLATILE_STRIDE(16, rs)) {
	       E T2, T5, T3, T6, T8, Tc, Tg, Ti, Tl, Tm, Tn, Tz, Tp, Tx;
	       {
		    E T4, Tb, T7, Ta;
		    T2 = W[0];
		    T5 = W[1];
		    T3 = W[2];
		    T6 = W[3];
		    T4 = T2 * T3;
		    Tb = T5 * T3;
		    T7 = T5 * T6;
		    Ta = T2 * T6;
		    T8 = T4 - T7;
		    Tc = Ta + Tb;
		    Tg = T4 + T7;
		    Ti = Ta - Tb;
		    Tl = W[4];
		    Tm = W[5];
		    Tn = FMA(T2, Tl, T5 * Tm);
		    Tz = FNMS(Ti, Tl, Tg * Tm);
		    Tp = FNMS(T5, Tl, T2 * Tm);
		    Tx = FMA(Tg, Tl, Ti * Tm);
	       }
	       {
		    E Tf, T1i, TL, T1d, TJ, T17, TV, TY, Ts, T1j, TO, T1a, TC, T16, TQ;
		    E TT;
		    {
			 E T1, T1c, Te, T1b, T9, Td;
			 T1 = ri[0];
			 T1c = ii[0];
			 T9 = ri[WS(rs, 4)];
			 Td = ii[WS(rs, 4)];
			 Te = FMA(T8, T9, Tc * Td);
			 T1b = FNMS(Tc, T9, T8 * Td);
			 Tf = T1 + Te;
			 T1i = T1c - T1b;
			 TL = T1 - Te;
			 T1d = T1b + T1c;
		    }
		    {
			 E TF, TW, TI, TX;
			 {
			      E TD, TE, TG, TH;
			      TD = ri[WS(rs, 7)];
			      TE = ii[WS(rs, 7)];
			      TF = FMA(Tl, TD, Tm * TE);
			      TW = FNMS(Tm, TD, Tl * TE);
			      TG = ri[WS(rs, 3)];
			      TH = ii[WS(rs, 3)];
			      TI = FMA(T3, TG, T6 * TH);
			      TX = FNMS(T6, TG, T3 * TH);
			 }
			 TJ = TF + TI;
			 T17 = TW + TX;
			 TV = TF - TI;
			 TY = TW - TX;
		    }
		    {
			 E Tk, TM, Tr, TN;
			 {
			      E Th, Tj, To, Tq;
			      Th = ri[WS(rs, 2)];
			      Tj = ii[WS(rs, 2)];
			      Tk = FMA(Tg, Th, Ti * Tj);
			      TM = FNMS(Ti, Th, Tg * Tj);
			      To = ri[WS(rs, 6)];
			      Tq = ii[WS(rs, 6)];
			      Tr = FMA(Tn, To, Tp * Tq);
			      TN = FNMS(Tp, To, Tn * Tq);
			 }
			 Ts = Tk + Tr;
			 T1j = Tk - Tr;
			 TO = TM - TN;
			 T1a = TM + TN;
		    }
		    {
			 E Tw, TR, TB, TS;
			 {
			      E Tu, Tv, Ty, TA;
			      Tu = ri[WS(rs, 1)];
			      Tv = ii[WS(rs, 1)];
			      Tw = FMA(T2, Tu, T5 * Tv);
			      TR = FNMS(T5, Tu, T2 * Tv);
			      Ty = ri[WS(rs, 5)];
			      TA = ii[WS(rs, 5)];
			      TB = FMA(Tx, Ty, Tz * TA);
			      TS = FNMS(Tz, Ty, Tx * TA);
			 }
			 TC = Tw + TB;
			 T16 = TR + TS;
			 TQ = Tw - TB;
			 TT = TR - TS;
		    }
		    {
			 E Tt, TK, T1f, T1g;
			 Tt = Tf + Ts;
			 TK = TC + TJ;
			 ri[WS(rs, 4)] = Tt - TK;
			 ri[0] = Tt + TK;
			 {
			      E T19, T1e, T15, T18;
			      T19 = T16 + T17;
			      T1e = T1a + T1d;
			      ii[0] = T19 + T1e;
			      ii[WS(rs, 4)] = T1e - T19;
			      T15 = Tf - Ts;
			      T18 = T16 - T17;
			      ri[WS(rs, 6)] = T15 - T18;
			      ri[WS(rs, 2)] = T15 + T18;
			 }
			 T1f = TJ - TC;
			 T1g = T1d - T1a;
			 ii[WS(rs, 2)] = T1f + T1g;
			 ii[WS(rs, 6)] = T1g - T1f;
			 {
			      E T11, T1k, T14, T1h, T12, T13;
			      T11 = TL - TO;
			      T1k = T1i - T1j;
			      T12 = TT - TQ;
			      T13 = TV + TY;
			      T14 = KP707106781 * (T12 - T13);
			      T1h = KP707106781 * (T12 + T13);
			      ri[WS(rs, 7)] = T11 - T14;
			      ii[WS(rs, 5)] = T1k - T1h;
			      ri[WS(rs, 3)] = T11 + T14;
			      ii[WS(rs, 1)] = T1h + T1k;
			 }
			 {
			      E TP, T1m, T10, T1l, TU, TZ;
			      TP = TL + TO;
			      T1m = T1j + T1i;
			      TU = TQ + TT;
			      TZ = TV - TY;
			      T10 = KP707106781 * (TU + TZ);
			      T1l = KP707106781 * (TZ - TU);
			      ri[WS(rs, 5)] = TP - T10;
			      ii[WS(rs, 7)] = T1m - T1l;
			      ri[WS(rs, 1)] = TP + T10;
			      ii[WS(rs, 3)] = T1l + T1m;
			 }
		    }
	       }
	  }
     }
}
Ejemplo n.º 14
0
static void r2cfII_20(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
{
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
     DK(KP690983005, +0.690983005625052575897706582817180941139845410);
     DK(KP552786404, +0.552786404500042060718165266253744752911876328);
     DK(KP447213595, +0.447213595499957939281834733746255247088123672);
     DK(KP809016994, +0.809016994374947424102293417182819058860154590);
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
     DK(KP618033988, +0.618033988749894848204586834365638117720309180);
     DK(KP381966011, +0.381966011250105151795413165634361882279690820);
     {
	  INT i;
	  for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) {
	       E Tv, TK, TN, Th, T1l, T1n, Ts, TH;
	       {
		    E Ti, T1d, T1f, T1e, T1g, T1p, TS, Tg, To, T8, T7, T19, T1r, T1k, Tx;
		    E Tp, TX, Ty, TF, Tr, TV, Tz, TA, TI;
		    {
			 E Ta, Tb, Td, Te;
			 Ti = R1[WS(rs, 2)];
			 T1d = R0[WS(rs, 5)];
			 Ta = R0[WS(rs, 9)];
			 Tb = R0[WS(rs, 1)];
			 Td = R0[WS(rs, 3)];
			 Te = R0[WS(rs, 7)];
			 {
			      E T1, T2, T5, T3, T4, T1i, Tc, Tf;
			      T1 = R0[0];
			      T1f = Ta + Tb;
			      Tc = Ta - Tb;
			      T1e = Td + Te;
			      Tf = Td - Te;
			      T2 = R0[WS(rs, 4)];
			      T5 = R0[WS(rs, 6)];
			      T1g = FMA(KP381966011, T1f, T1e);
			      T1p = FMA(KP381966011, T1e, T1f);
			      TS = FMA(KP618033988, Tc, Tf);
			      Tg = FNMS(KP618033988, Tf, Tc);
			      T3 = R0[WS(rs, 8)];
			      T4 = R0[WS(rs, 2)];
			      T1i = T2 + T5;
			      {
				   E Tj, Tu, Tm, Tt, Tn, Tq, TU;
				   Tj = R1[WS(rs, 8)];
				   To = R1[WS(rs, 6)];
				   {
					E T6, T1j, Tk, Tl;
					T6 = T2 + T3 - T4 - T5;
					T8 = (T3 + T5 - T2) - T4;
					T1j = T3 + T4;
					Tk = R1[0];
					Tl = R1[WS(rs, 4)];
					T7 = FNMS(KP250000000, T6, T1);
					T19 = T1 + T6;
					T1r = FNMS(KP618033988, T1i, T1j);
					T1k = FMA(KP618033988, T1j, T1i);
					Tu = Tk - Tl;
					Tm = Tk + Tl;
				   }
				   Tt = To + Tj;
				   Tx = R1[WS(rs, 7)];
				   Tn = Tj - Tm;
				   Tp = Tj + Tm;
				   Tv = FNMS(KP618033988, Tu, Tt);
				   TX = FMA(KP618033988, Tt, Tu);
				   Tq = FMA(KP809016994, Tp, To);
				   TU = FMA(KP447213595, Tp, Tn);
				   Ty = R1[WS(rs, 1)];
				   TF = R1[WS(rs, 3)];
				   Tr = FNMS(KP552786404, Tq, Tn);
				   TV = FNMS(KP690983005, TU, To);
				   Tz = R1[WS(rs, 5)];
				   TA = R1[WS(rs, 9)];
				   TI = TF + Ty;
			      }
			 }
		    }
		    {
			 E T1w, TJ, TB, T1a;
			 T1w = T1f + T1d - T1e;
			 TJ = Tz - TA;
			 TB = Tz + TA;
			 T1a = Ti + To - Tp;
			 {
			      E T9, T12, TT, T15, TG, TD, T1s, T1u, TW, T11, T10, T1h;
			      {
				   E TE, TC, TR, T1b;
				   T9 = FNMS(KP559016994, T8, T7);
				   TR = FMA(KP559016994, T8, T7);
				   TK = FMA(KP618033988, TJ, TI);
				   T12 = FNMS(KP618033988, TI, TJ);
				   TE = Ty - TB;
				   TC = Ty + TB;
				   TT = FMA(KP951056516, TS, TR);
				   T15 = FNMS(KP951056516, TS, TR);
				   TG = FNMS(KP552786404, TF, TE);
				   T1b = TC - TF - Tx;
				   {
					E TZ, T1q, T1c, T1x;
					TZ = FMA(KP447213595, TC, TE);
					TD = FMA(KP250000000, TC, Tx);
					T1q = FNMS(KP809016994, T1p, T1d);
					T1c = T1a + T1b;
					T1x = T1a - T1b;
					T10 = FNMS(KP690983005, TZ, TF);
					T1s = FNMS(KP951056516, T1r, T1q);
					T1u = FMA(KP951056516, T1r, T1q);
					Ci[WS(csi, 7)] = FMA(KP707106781, T1x, T1w);
					Ci[WS(csi, 2)] = FMS(KP707106781, T1x, T1w);
					Cr[WS(csr, 7)] = FMA(KP707106781, T1c, T19);
					Cr[WS(csr, 2)] = FNMS(KP707106781, T1c, T19);
				   }
			      }
			      TW = FNMS(KP809016994, TV, Ti);
			      T11 = FNMS(KP809016994, T10, Tx);
			      T1h = FMA(KP809016994, T1g, T1d);
			      {
				   E T17, TY, T16, T13;
				   T17 = FNMS(KP951056516, TX, TW);
				   TY = FMA(KP951056516, TX, TW);
				   T16 = FMA(KP951056516, T12, T11);
				   T13 = FNMS(KP951056516, T12, T11);
				   TN = FMA(KP951056516, Tg, T9);
				   Th = FNMS(KP951056516, Tg, T9);
				   {
					E T18, T1v, T1t, T14;
					T18 = T16 - T17;
					T1v = T17 + T16;
					T1t = TY + T13;
					T14 = TY - T13;
					Cr[WS(csr, 1)] = FMA(KP707106781, T18, T15);
					Cr[WS(csr, 8)] = FNMS(KP707106781, T18, T15);
					Ci[WS(csi, 3)] = FMA(KP707106781, T1v, T1u);
					Ci[WS(csi, 6)] = FMS(KP707106781, T1v, T1u);
					Ci[WS(csi, 1)] = FNMS(KP707106781, T1t, T1s);
					Ci[WS(csi, 8)] = -(FMA(KP707106781, T1t, T1s));
					Cr[WS(csr, 3)] = FMA(KP707106781, T14, TT);
					Cr[WS(csr, 6)] = FNMS(KP707106781, T14, TT);
					T1l = FMA(KP951056516, T1k, T1h);
					T1n = FNMS(KP951056516, T1k, T1h);
				   }
			      }
			      Ts = FNMS(KP559016994, Tr, Ti);
			      TH = FNMS(KP559016994, TG, TD);
			 }
		    }
	       }
	       {
		    E TO, Tw, TP, TL;
		    TO = FMA(KP951056516, Tv, Ts);
		    Tw = FNMS(KP951056516, Tv, Ts);
		    TP = FMA(KP951056516, TK, TH);
		    TL = FNMS(KP951056516, TK, TH);
		    {
			 E TQ, T1m, T1o, TM;
			 TQ = TO - TP;
			 T1m = TO + TP;
			 T1o = Tw + TL;
			 TM = Tw - TL;
			 Cr[WS(csr, 4)] = FMA(KP707106781, TQ, TN);
			 Cr[WS(csr, 5)] = FNMS(KP707106781, TQ, TN);
			 Ci[WS(csi, 9)] = FNMS(KP707106781, T1m, T1l);
			 Ci[0] = -(FMA(KP707106781, T1m, T1l));
			 Ci[WS(csi, 5)] = FNMS(KP707106781, T1o, T1n);
			 Ci[WS(csi, 4)] = -(FMA(KP707106781, T1o, T1n));
			 Cr[0] = FMA(KP707106781, TM, Th);
			 Cr[WS(csr, 9)] = FNMS(KP707106781, TM, Th);
		    }
	       }
	  }
     }
}
Ejemplo n.º 15
0
static void t1_8(float *ri, float *ii, const float *W, stride rs, INT mb, INT me, INT ms)
{
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
     INT m;
     for (m = mb, W = W + (mb * 14); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 14, MAKE_VOLATILE_STRIDE(rs)) {
	  E T1g, T1f, T1e, Tm, T1q, T1o, T1p, TN, T1h, T1i;
	  {
	       E T1, T1m, T1l, T7, TS, Tk, TQ, Te, To, Tr, T17, TM, T12, Tu, TW;
	       E Tp, Tx, Tt, Tq, Tw;
	       {
		    E T3, T6, T2, T5;
		    T1 = ri[0];
		    T1m = ii[0];
		    T3 = ri[WS(rs, 4)];
		    T6 = ii[WS(rs, 4)];
		    T2 = W[6];
		    T5 = W[7];
		    {
			 E Ta, Td, T9, Tc;
			 {
			      E Tg, Tj, Ti, TR, Th, T1k, T4, Tf;
			      Tg = ri[WS(rs, 6)];
			      Tj = ii[WS(rs, 6)];
			      T1k = T2 * T6;
			      T4 = T2 * T3;
			      Tf = W[10];
			      Ti = W[11];
			      T1l = FNMS(T5, T3, T1k);
			      T7 = FMA(T5, T6, T4);
			      TR = Tf * Tj;
			      Th = Tf * Tg;
			      Ta = ri[WS(rs, 2)];
			      Td = ii[WS(rs, 2)];
			      TS = FNMS(Ti, Tg, TR);
			      Tk = FMA(Ti, Tj, Th);
			      T9 = W[2];
			      Tc = W[3];
			 }
			 {
			      E TB, TE, TH, T13, TC, TK, TG, TD, TJ, TP, Tb, TA, Tn;
			      TB = ri[WS(rs, 7)];
			      TE = ii[WS(rs, 7)];
			      TP = T9 * Td;
			      Tb = T9 * Ta;
			      TA = W[12];
			      TH = ri[WS(rs, 3)];
			      TQ = FNMS(Tc, Ta, TP);
			      Te = FMA(Tc, Td, Tb);
			      T13 = TA * TE;
			      TC = TA * TB;
			      TK = ii[WS(rs, 3)];
			      TG = W[4];
			      TD = W[13];
			      TJ = W[5];
			      {
				   E T14, TF, T16, TL, T15, TI;
				   To = ri[WS(rs, 1)];
				   T15 = TG * TK;
				   TI = TG * TH;
				   T14 = FNMS(TD, TB, T13);
				   TF = FMA(TD, TE, TC);
				   T16 = FNMS(TJ, TH, T15);
				   TL = FMA(TJ, TK, TI);
				   Tr = ii[WS(rs, 1)];
				   Tn = W[0];
				   T17 = T14 - T16;
				   T1g = T14 + T16;
				   TM = TF + TL;
				   T12 = TF - TL;
			      }
			      Tu = ri[WS(rs, 5)];
			      TW = Tn * Tr;
			      Tp = Tn * To;
			      Tx = ii[WS(rs, 5)];
			      Tt = W[8];
			      Tq = W[1];
			      Tw = W[9];
			 }
		    }
	       }
	       {
		    E T8, T1j, T1n, Tz, T1a, TU, Tl, T1b, T1c, T1v, T1t, T1w, T19, T1u, T1d;
		    {
			 E T1r, T10, TV, T1s, T11, T18;
			 {
			      E TO, TX, Ts, TZ, Ty, TT, TY, Tv;
			      T8 = T1 + T7;
			      TO = T1 - T7;
			      TY = Tt * Tx;
			      Tv = Tt * Tu;
			      TX = FNMS(Tq, To, TW);
			      Ts = FMA(Tq, Tr, Tp);
			      TZ = FNMS(Tw, Tu, TY);
			      Ty = FMA(Tw, Tx, Tv);
			      TT = TQ - TS;
			      T1j = TQ + TS;
			      T1n = T1l + T1m;
			      T1r = T1m - T1l;
			      T10 = TX - TZ;
			      T1f = TX + TZ;
			      Tz = Ts + Ty;
			      TV = Ts - Ty;
			      T1a = TO - TT;
			      TU = TO + TT;
			      T1s = Te - Tk;
			      Tl = Te + Tk;
			 }
			 T1b = T10 - TV;
			 T11 = TV + T10;
			 T18 = T12 - T17;
			 T1c = T12 + T17;
			 T1v = T1s + T1r;
			 T1t = T1r - T1s;
			 T1w = T18 - T11;
			 T19 = T11 + T18;
		    }
		    ii[WS(rs, 3)] = FMA(KP707106781, T1w, T1v);
		    ii[WS(rs, 7)] = FNMS(KP707106781, T1w, T1v);
		    ri[WS(rs, 1)] = FMA(KP707106781, T19, TU);
		    ri[WS(rs, 5)] = FNMS(KP707106781, T19, TU);
		    T1u = T1b + T1c;
		    T1d = T1b - T1c;
		    ii[WS(rs, 1)] = FMA(KP707106781, T1u, T1t);
		    ii[WS(rs, 5)] = FNMS(KP707106781, T1u, T1t);
		    ri[WS(rs, 3)] = FMA(KP707106781, T1d, T1a);
		    ri[WS(rs, 7)] = FNMS(KP707106781, T1d, T1a);
		    T1e = T8 - Tl;
		    Tm = T8 + Tl;
		    T1q = T1n - T1j;
		    T1o = T1j + T1n;
		    T1p = TM - Tz;
		    TN = Tz + TM;
	       }
	  }
	  ii[WS(rs, 2)] = T1p + T1q;
	  ii[WS(rs, 6)] = T1q - T1p;
	  ri[0] = Tm + TN;
	  ri[WS(rs, 4)] = Tm - TN;
	  T1h = T1f - T1g;
	  T1i = T1f + T1g;
	  ii[0] = T1i + T1o;
	  ii[WS(rs, 4)] = T1o - T1i;
	  ri[WS(rs, 2)] = T1e + T1h;
	  ri[WS(rs, 6)] = T1e - T1h;
     }
}
static void hc2cfdft2_16(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
{
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
     DK(KP414213562, +0.414213562373095048801688724209698078569671875);
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
     {
	  INT m;
	  for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(64, rs)) {
	       E T4p, T4o, T4n, T4s;
	       {
		    E T1, T2, Tw, Ty, Th, T3, Tx, TE, Ti, TK, Tj, T4, T5;
		    T1 = W[0];
		    T2 = W[2];
		    Tw = W[6];
		    Ty = W[7];
		    Th = W[4];
		    T3 = T1 * T2;
		    Tx = T1 * Tw;
		    TE = T1 * Ty;
		    Ti = T1 * Th;
		    TK = T2 * Th;
		    Tj = W[5];
		    T4 = W[1];
		    T5 = W[3];
		    {
			 E T1v, T2q, T1s, T2s, T38, T3T, T1Y, T3P, T17, T1h, T2x, T2v, T33, T3Q, T3S;
			 E T1N, Tv, T3A, T2E, T3B, T3L, T2c, T3I, T2S, TW, T3E, T3J, T2n, T3D, T2J;
			 E T3M, T2X;
			 {
			      E TF, Tk, Tz, TL, T6, TR, Tq, Tc, T2h, T25, T2k, T29, T1G, T1M, T2P;
			      E T2R;
			      {
				   E T18, TY, T1d, T13, T1H, T1A, T1K, T1E, T37, T1R, T35, T1X;
				   {
					E T1j, T1o, T1W, T1p, T1m, T1Q, T1U, T1q;
					{
					     E T1k, T1l, T1S, T1T;
					     {
						  E T1t, T28, T24, T1D, T1z, T1u, TQ, Tp, Tb;
						  T1t = Ip[0];
						  TQ = T2 * Tj;
						  Tp = T1 * Tj;
						  TF = FNMS(T4, Tw, TE);
						  T1j = FMA(T4, Tj, Ti);
						  Tk = FNMS(T4, Tj, Ti);
						  Tz = FMA(T4, Ty, Tx);
						  T18 = FNMS(T5, Tj, TK);
						  TL = FMA(T5, Tj, TK);
						  TY = FNMS(T4, T5, T3);
						  T6 = FMA(T4, T5, T3);
						  Tb = T1 * T5;
						  TR = FNMS(T5, Th, TQ);
						  T1d = FMA(T5, Th, TQ);
						  Tq = FMA(T4, Th, Tp);
						  T1o = FNMS(T4, Th, Tp);
						  T28 = T6 * Tj;
						  T24 = T6 * Th;
						  T1D = TY * Tj;
						  T1z = TY * Th;
						  Tc = FNMS(T4, T2, Tb);
						  T13 = FMA(T4, T2, Tb);
						  T1u = Im[0];
						  T1k = Ip[WS(rs, 4)];
						  T2h = FMA(Tc, Tj, T24);
						  T25 = FNMS(Tc, Tj, T24);
						  T2k = FNMS(Tc, Th, T28);
						  T29 = FMA(Tc, Th, T28);
						  T1H = FNMS(T13, Tj, T1z);
						  T1A = FMA(T13, Tj, T1z);
						  T1K = FMA(T13, Th, T1D);
						  T1E = FNMS(T13, Th, T1D);
						  T1W = T1t + T1u;
						  T1v = T1t - T1u;
						  T1l = Im[WS(rs, 4)];
					     }
					     T1S = Rm[0];
					     T1T = Rp[0];
					     T1p = Rp[WS(rs, 4)];
					     T1m = T1k - T1l;
					     T1Q = T1k + T1l;
					     T2q = T1T + T1S;
					     T1U = T1S - T1T;
					     T1q = Rm[WS(rs, 4)];
					}
					{
					     E T36, T1V, T1O, T1r, T1n, T1P, T34, T2r;
					     T36 = T4 * T1U;
					     T1V = T1 * T1U;
					     T1O = T1q - T1p;
					     T1r = T1p + T1q;
					     T1n = T1j * T1m;
					     T37 = FMA(T1, T1W, T36);
					     T2r = T1j * T1r;
					     T1P = Th * T1O;
					     T34 = Tj * T1O;
					     T1s = FNMS(T1o, T1r, T1n);
					     T2s = FMA(T1o, T1m, T2r);
					     T1R = FNMS(Tj, T1Q, T1P);
					     T35 = FMA(Th, T1Q, T34);
					     T1X = FNMS(T4, T1W, T1V);
					}
				   }
				   {
					E T1F, T11, T1e, T16, T1L, T1b, T1f, T1C, T2Z;
					{
					     E T14, T15, TZ, T10, T19, T1a, T1B;
					     TZ = Ip[WS(rs, 2)];
					     T10 = Im[WS(rs, 2)];
					     T38 = T35 + T37;
					     T3T = T37 - T35;
					     T1Y = T1R + T1X;
					     T3P = T1X - T1R;
					     T1F = TZ + T10;
					     T11 = TZ - T10;
					     T14 = Rp[WS(rs, 2)];
					     T15 = Rm[WS(rs, 2)];
					     T19 = Ip[WS(rs, 6)];
					     T1a = Im[WS(rs, 6)];
					     T1e = Rp[WS(rs, 6)];
					     T16 = T14 + T15;
					     T1B = T15 - T14;
					     T1L = T19 + T1a;
					     T1b = T19 - T1a;
					     T1f = Rm[WS(rs, 6)];
					     T1C = T1A * T1B;
					     T2Z = T1E * T1B;
					}
					{
					     E T1J, T31, T2u, T30, T32;
					     {
						  E T12, T1g, T1I, T1c, T2w;
						  T12 = TY * T11;
						  T1g = T1e + T1f;
						  T1I = T1f - T1e;
						  T1c = T18 * T1b;
						  T17 = FNMS(T13, T16, T12);
						  T2w = T18 * T1g;
						  T1J = T1H * T1I;
						  T31 = T1K * T1I;
						  T1h = FNMS(T1d, T1g, T1c);
						  T2x = FMA(T1d, T1b, T2w);
					     }
					     T2u = TY * T16;
					     T30 = FMA(T1A, T1F, T2Z);
					     T32 = FMA(T1H, T1L, T31);
					     T1G = FNMS(T1E, T1F, T1C);
					     T2v = FMA(T13, T11, T2u);
					     T1M = FNMS(T1K, T1L, T1J);
					     T33 = T30 + T32;
					     T3Q = T30 - T32;
					}
				   }
			      }
			      {
				   E Tl, T22, T9, T20, Tf, T2O, Ta, T21, T2A, Tm, Tr, Ts;
				   {
					E T7, T8, Td, Te;
					T7 = Ip[WS(rs, 1)];
					T3S = T1G - T1M;
					T1N = T1G + T1M;
					T8 = Im[WS(rs, 1)];
					Td = Rp[WS(rs, 1)];
					Te = Rm[WS(rs, 1)];
					Tl = Ip[WS(rs, 5)];
					T22 = T7 + T8;
					T9 = T7 - T8;
					T20 = Td - Te;
					Tf = Td + Te;
					T2O = T2 * T22;
					Ta = T6 * T9;
					T21 = T2 * T20;
					T2A = T6 * Tf;
					Tm = Im[WS(rs, 5)];
					Tr = Rp[WS(rs, 5)];
					Ts = Rm[WS(rs, 5)];
				   }
				   {
					E Tg, T2a, Tn, T26, T2Q, T27, T2C, T2B, Tu, Tt, To, T23, T2D, T2b;
					Tg = FNMS(Tc, Tf, Ta);
					T2a = Tl + Tm;
					Tn = Tl - Tm;
					T26 = Tr - Ts;
					Tt = Tr + Ts;
					T2Q = T25 * T2a;
					To = Tk * Tn;
					T27 = T25 * T26;
					T2C = Tk * Tt;
					T2B = FMA(Tc, T9, T2A);
					Tu = FNMS(Tq, Tt, To);
					T23 = FMA(T5, T22, T21);
					T2D = FMA(Tq, Tn, T2C);
					T2b = FMA(T29, T2a, T27);
					Tv = Tg + Tu;
					T3A = Tg - Tu;
					T2P = FNMS(T5, T20, T2O);
					T2E = T2B + T2D;
					T3B = T2B - T2D;
					T3L = T2b - T23;
					T2c = T23 + T2b;
					T2R = FNMS(T29, T26, T2Q);
				   }
			      }
			      {
				   E T2f, TC, T2T, TD, T2d, TI, TS, T2e, T2F, T2l, TO, TT;
				   {
					E TG, TH, TA, TB, TM, TN;
					TA = Ip[WS(rs, 7)];
					TB = Im[WS(rs, 7)];
					TG = Rp[WS(rs, 7)];
					T3I = T2R - T2P;
					T2S = T2P + T2R;
					T2f = TA + TB;
					TC = TA - TB;
					TH = Rm[WS(rs, 7)];
					TM = Ip[WS(rs, 3)];
					T2T = Tw * T2f;
					TD = Tz * TC;
					T2d = TG - TH;
					TI = TG + TH;
					TN = Im[WS(rs, 3)];
					TS = Rp[WS(rs, 3)];
					T2e = Tw * T2d;
					T2F = Tz * TI;
					T2l = TM + TN;
					TO = TM - TN;
					TT = Rm[WS(rs, 3)];
				   }
				   {
					E TJ, T2V, TP, T2i, TU, T2G;
					TJ = FNMS(TF, TI, TD);
					T2V = T2h * T2l;
					TP = TL * TO;
					T2i = TS - TT;
					TU = TS + TT;
					T2G = FMA(TF, TC, T2F);
					{
					     E T2g, T2j, TV, T2H;
					     T2g = FMA(Ty, T2f, T2e);
					     T2j = T2h * T2i;
					     TV = FNMS(TR, TU, TP);
					     T2H = TL * TU;
					     {
						  E T2U, T2m, T2I, T2W;
						  T2U = FNMS(Ty, T2d, T2T);
						  T2m = FMA(T2k, T2l, T2j);
						  TW = TJ + TV;
						  T3E = TJ - TV;
						  T2I = FMA(TR, TO, T2H);
						  T2W = FNMS(T2k, T2i, T2V);
						  T3J = T2m - T2g;
						  T2n = T2g + T2m;
						  T3D = T2G - T2I;
						  T2J = T2G + T2I;
						  T3M = T2U - T2W;
						  T2X = T2U + T2W;
					     }
					}
				   }
			      }
			 }
			 {
			      E T3Y, T3x, T3X, T3y, T3r, T3q, T3p, T3u;
			      {
				   E T2Y, T3o, TX, T3s, T3i, T39, T3t, T3l, T3e, T1x, T2M, T2p, T3d, T2K, T2t;
				   E T2y;
				   {
					E T2o, T1Z, T3j, T3k, T1i, T1w, T3g, T3h;
					T2Y = T2S + T2X;
					T3g = T2X - T2S;
					T3h = T2c - T2n;
					T2o = T2c + T2n;
					T1Z = T1N + T1Y;
					T3j = T1Y - T1N;
					T3o = Tv - TW;
					TX = Tv + TW;
					T3s = T3g - T3h;
					T3i = T3g + T3h;
					T3k = T38 - T33;
					T39 = T33 + T38;
					T3Y = T17 - T1h;
					T1i = T17 + T1h;
					T1w = T1s + T1v;
					T3x = T1v - T1s;
					T3t = T3j + T3k;
					T3l = T3j - T3k;
					T3e = T1w - T1i;
					T1x = T1i + T1w;
					T2M = T2o + T1Z;
					T2p = T1Z - T2o;
					T3d = T2J - T2E;
					T2K = T2E + T2J;
					T3X = T2q - T2s;
					T2t = T2q + T2s;
					T2y = T2v + T2x;
					T3y = T2v - T2x;
				   }
				   {
					E T2N, T3c, T3a, T3n, T3b, T2L, T2z, T1y;
					T2N = T1x - TX;
					T1y = TX + T1x;
					T3c = T2Y + T39;
					T3a = T2Y - T39;
					T3n = T2t - T2y;
					T2z = T2t + T2y;
					Ip[0] = KP500000000 * (T1y + T2p);
					Im[WS(rs, 7)] = KP500000000 * (T2p - T1y);
					T3b = T2z + T2K;
					T2L = T2z - T2K;
					{
					     E T3f, T3m, T3v, T3w;
					     T3r = T3e - T3d;
					     T3f = T3d + T3e;
					     Im[WS(rs, 3)] = KP500000000 * (T3a - T2N);
					     Ip[WS(rs, 4)] = KP500000000 * (T2N + T3a);
					     Rp[WS(rs, 4)] = KP500000000 * (T2L + T2M);
					     Rm[WS(rs, 3)] = KP500000000 * (T2L - T2M);
					     Rp[0] = KP500000000 * (T3b + T3c);
					     Rm[WS(rs, 7)] = KP500000000 * (T3b - T3c);
					     T3m = T3i + T3l;
					     T3q = T3l - T3i;
					     T3p = T3n - T3o;
					     T3v = T3n + T3o;
					     T3w = T3s + T3t;
					     T3u = T3s - T3t;
					     Im[WS(rs, 5)] = -(KP500000000 * (FNMS(KP707106781, T3m, T3f)));
					     Ip[WS(rs, 2)] = KP500000000 * (FMA(KP707106781, T3m, T3f));
					     Rp[WS(rs, 2)] = KP500000000 * (FMA(KP707106781, T3w, T3v));
					     Rm[WS(rs, 5)] = KP500000000 * (FNMS(KP707106781, T3w, T3v));
					}
				   }
			      }
			      {
				   E T3R, T4b, T3z, T4q, T4g, T3U, T40, T41, T4r, T4j, T4m, T3G, T46, T3O, T4l;
				   E T3Z, T4c;
				   {
					E T3K, T3N, T4h, T4i, T3C, T3F, T4e, T4f;
					Rp[WS(rs, 6)] = KP500000000 * (FMA(KP707106781, T3q, T3p));
					Rm[WS(rs, 1)] = KP500000000 * (FNMS(KP707106781, T3q, T3p));
					Im[WS(rs, 1)] = -(KP500000000 * (FNMS(KP707106781, T3u, T3r)));
					Ip[WS(rs, 6)] = KP500000000 * (FMA(KP707106781, T3u, T3r));
					T3K = T3I + T3J;
					T4e = T3I - T3J;
					T4f = T3M - T3L;
					T3N = T3L + T3M;
					T3R = T3P - T3Q;
					T4h = T3Q + T3P;
					T4b = T3y + T3x;
					T3z = T3x - T3y;
					T4q = FNMS(KP414213562, T4e, T4f);
					T4g = FMA(KP414213562, T4f, T4e);
					T4i = T3T - T3S;
					T3U = T3S + T3T;
					T40 = T3B + T3A;
					T3C = T3A - T3B;
					T3F = T3D + T3E;
					T41 = T3D - T3E;
					T4r = FNMS(KP414213562, T4h, T4i);
					T4j = FMA(KP414213562, T4i, T4h);
					T4m = T3C - T3F;
					T3G = T3C + T3F;
					T46 = FNMS(KP414213562, T3K, T3N);
					T3O = FMA(KP414213562, T3N, T3K);
					T4l = T3X - T3Y;
					T3Z = T3X + T3Y;
				   }
				   {
					E T45, T3H, T42, T47, T3V;
					T45 = FNMS(KP707106781, T3G, T3z);
					T3H = FMA(KP707106781, T3G, T3z);
					T4c = T41 - T40;
					T42 = T40 + T41;
					T47 = FMA(KP414213562, T3R, T3U);
					T3V = FNMS(KP414213562, T3U, T3R);
					{
					     E T49, T43, T48, T4a, T44, T3W;
					     T49 = FMA(KP707106781, T42, T3Z);
					     T43 = FNMS(KP707106781, T42, T3Z);
					     T48 = T46 - T47;
					     T4a = T46 + T47;
					     T44 = T3V - T3O;
					     T3W = T3O + T3V;
					     Rp[WS(rs, 1)] = KP500000000 * (FMA(KP923879532, T4a, T49));
					     Rm[WS(rs, 6)] = KP500000000 * (FNMS(KP923879532, T4a, T49));
					     Rp[WS(rs, 5)] = KP500000000 * (FMA(KP923879532, T44, T43));
					     Rm[WS(rs, 2)] = KP500000000 * (FNMS(KP923879532, T44, T43));
					     Im[WS(rs, 6)] = -(KP500000000 * (FNMS(KP923879532, T3W, T3H)));
					     Ip[WS(rs, 1)] = KP500000000 * (FMA(KP923879532, T3W, T3H));
					     Ip[WS(rs, 5)] = KP500000000 * (FMA(KP923879532, T48, T45));
					     Im[WS(rs, 2)] = -(KP500000000 * (FNMS(KP923879532, T48, T45)));
					}
				   }
				   {
					E T4d, T4k, T4t, T4u;
					T4p = FMA(KP707106781, T4c, T4b);
					T4d = FNMS(KP707106781, T4c, T4b);
					T4k = T4g - T4j;
					T4o = T4g + T4j;
					T4n = FMA(KP707106781, T4m, T4l);
					T4t = FNMS(KP707106781, T4m, T4l);
					T4u = T4q + T4r;
					T4s = T4q - T4r;
					Im[0] = -(KP500000000 * (FNMS(KP923879532, T4k, T4d)));
					Ip[WS(rs, 7)] = KP500000000 * (FMA(KP923879532, T4k, T4d));
					Rm[0] = KP500000000 * (FMA(KP923879532, T4u, T4t));
					Rp[WS(rs, 7)] = KP500000000 * (FNMS(KP923879532, T4u, T4t));
				   }
			      }
			 }
		    }
	       }
	       Rp[WS(rs, 3)] = KP500000000 * (FMA(KP923879532, T4o, T4n));
	       Rm[WS(rs, 4)] = KP500000000 * (FNMS(KP923879532, T4o, T4n));
	       Im[WS(rs, 4)] = -(KP500000000 * (FNMS(KP923879532, T4s, T4p)));
	       Ip[WS(rs, 3)] = KP500000000 * (FMA(KP923879532, T4s, T4p));
	  }
     }
}
Ejemplo n.º 17
0
static void hc2r_15(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, INT v, INT ivs, INT ovs)
{
     DK(KP1_118033988, +1.118033988749894848204586834365638117720309180);
     DK(KP1_902113032, +1.902113032590307144232878666758764286811397268);
     DK(KP1_175570504, +1.175570504584946258337411909278145537195304875);
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
     DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
     INT i;
     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs, MAKE_VOLATILE_STRIDE(ris), MAKE_VOLATILE_STRIDE(iis), MAKE_VOLATILE_STRIDE(os)) {
	  E T3, Tu, Ti, TB, TZ, T10, TE, TG, TJ, Tn, Tv, Ts, Tw, T8, Td;
	  E Te;
	  {
	       E Th, T1, T2, Tf, Tg;
	       Tg = ii[WS(iis, 5)];
	       Th = KP1_732050807 * Tg;
	       T1 = ri[0];
	       T2 = ri[WS(ris, 5)];
	       Tf = T1 - T2;
	       T3 = FMA(KP2_000000000, T2, T1);
	       Tu = Tf - Th;
	       Ti = Tf + Th;
	  }
	  {
	       E T4, TD, T9, TI, T5, T6, T7, Ta, Tb, Tc, Tr, TH, Tm, TC, Tj;
	       E To;
	       T4 = ri[WS(ris, 3)];
	       TD = ii[WS(iis, 3)];
	       T9 = ri[WS(ris, 6)];
	       TI = ii[WS(iis, 6)];
	       T5 = ri[WS(ris, 7)];
	       T6 = ri[WS(ris, 2)];
	       T7 = T5 + T6;
	       Ta = ri[WS(ris, 4)];
	       Tb = ri[WS(ris, 1)];
	       Tc = Ta + Tb;
	       {
		    E Tp, Tq, Tk, Tl;
		    Tp = ii[WS(iis, 4)];
		    Tq = ii[WS(iis, 1)];
		    Tr = KP866025403 * (Tp + Tq);
		    TH = Tp - Tq;
		    Tk = ii[WS(iis, 7)];
		    Tl = ii[WS(iis, 2)];
		    Tm = KP866025403 * (Tk - Tl);
		    TC = Tk + Tl;
	       }
	       TB = KP866025403 * (T5 - T6);
	       TZ = TD - TC;
	       T10 = TI - TH;
	       TE = FMA(KP500000000, TC, TD);
	       TG = KP866025403 * (Ta - Tb);
	       TJ = FMA(KP500000000, TH, TI);
	       Tj = FNMS(KP500000000, T7, T4);
	       Tn = Tj - Tm;
	       Tv = Tj + Tm;
	       To = FNMS(KP500000000, Tc, T9);
	       Ts = To - Tr;
	       Tw = To + Tr;
	       T8 = T4 + T7;
	       Td = T9 + Tc;
	       Te = T8 + Td;
	  }
	  O[0] = FMA(KP2_000000000, Te, T3);
	  {
	       E T11, T13, TY, T12, TW, TX;
	       T11 = FNMS(KP1_902113032, T10, KP1_175570504 * TZ);
	       T13 = FMA(KP1_902113032, TZ, KP1_175570504 * T10);
	       TW = FNMS(KP500000000, Te, T3);
	       TX = KP1_118033988 * (T8 - Td);
	       TY = TW - TX;
	       T12 = TX + TW;
	       O[WS(os, 12)] = TY - T11;
	       O[WS(os, 9)] = T12 + T13;
	       O[WS(os, 3)] = TY + T11;
	       O[WS(os, 6)] = T12 - T13;
	  }
	  {
	       E TP, Tt, TO, TT, TV, TR, TS, TU, TQ;
	       TP = KP1_118033988 * (Tn - Ts);
	       Tt = Tn + Ts;
	       TO = FNMS(KP500000000, Tt, Ti);
	       TR = TE - TB;
	       TS = TJ - TG;
	       TT = FNMS(KP1_902113032, TS, KP1_175570504 * TR);
	       TV = FMA(KP1_902113032, TR, KP1_175570504 * TS);
	       O[WS(os, 5)] = FMA(KP2_000000000, Tt, Ti);
	       TU = TP + TO;
	       O[WS(os, 11)] = TU - TV;
	       O[WS(os, 14)] = TU + TV;
	       TQ = TO - TP;
	       O[WS(os, 2)] = TQ - TT;
	       O[WS(os, 8)] = TQ + TT;
	  }
	  {
	       E Tz, Tx, Ty, TL, TN, TF, TK, TM, TA;
	       Tz = KP1_118033988 * (Tv - Tw);
	       Tx = Tv + Tw;
	       Ty = FNMS(KP500000000, Tx, Tu);
	       TF = TB + TE;
	       TK = TG + TJ;
	       TL = FNMS(KP1_902113032, TK, KP1_175570504 * TF);
	       TN = FMA(KP1_902113032, TF, KP1_175570504 * TK);
	       O[WS(os, 10)] = FMA(KP2_000000000, Tx, Tu);
	       TM = Tz + Ty;
	       O[WS(os, 1)] = TM - TN;
	       O[WS(os, 4)] = TM + TN;
	       TA = Ty - Tz;
	       O[WS(os, 7)] = TA - TL;
	       O[WS(os, 13)] = TA + TL;
	  }
     }
}
static void hc2cfdft2_16(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
{
     DK(KP461939766, +0.461939766255643378064091594698394143411208313);
     DK(KP191341716, +0.191341716182544885864229992015199433380672281);
     DK(KP353553390, +0.353553390593273762200422181052424519642417969);
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
     {
	  INT m;
	  for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(64, rs)) {
	       E T1, T4, T2, T5, T7, Td, T12, TY, Tk, Ti, Tm, T1l, T1b, TL, T1h;
	       E Ts, TR, T17, Ty, Tz, TA, TE, T1L, T1Q, T1H, T1O, T24, T2d, T20, T2b;
	       {
		    E Tl, TP, Tq, TK, Tj, TQ, Tr, TJ;
		    {
			 E T3, Tc, T6, Tb;
			 T1 = W[0];
			 T4 = W[1];
			 T2 = W[2];
			 T5 = W[3];
			 T3 = T1 * T2;
			 Tc = T4 * T2;
			 T6 = T4 * T5;
			 Tb = T1 * T5;
			 T7 = T3 + T6;
			 Td = Tb - Tc;
			 T12 = Tb + Tc;
			 TY = T3 - T6;
			 Tk = W[5];
			 Tl = T4 * Tk;
			 TP = T2 * Tk;
			 Tq = T1 * Tk;
			 TK = T5 * Tk;
			 Ti = W[4];
			 Tj = T1 * Ti;
			 TQ = T5 * Ti;
			 Tr = T4 * Ti;
			 TJ = T2 * Ti;
		    }
		    Tm = Tj - Tl;
		    T1l = Tq - Tr;
		    T1b = TP + TQ;
		    TL = TJ + TK;
		    T1h = Tj + Tl;
		    Ts = Tq + Tr;
		    TR = TP - TQ;
		    T17 = TJ - TK;
		    Ty = W[6];
		    Tz = W[7];
		    TA = FMA(T1, Ty, T4 * Tz);
		    TE = FNMS(T4, Ty, T1 * Tz);
		    {
			 E T1J, T1K, T1F, T1G;
			 T1J = TY * Tk;
			 T1K = T12 * Ti;
			 T1L = T1J - T1K;
			 T1Q = T1J + T1K;
			 T1F = TY * Ti;
			 T1G = T12 * Tk;
			 T1H = T1F + T1G;
			 T1O = T1F - T1G;
		    }
		    {
			 E T22, T23, T1Y, T1Z;
			 T22 = T7 * Tk;
			 T23 = Td * Ti;
			 T24 = T22 + T23;
			 T2d = T22 - T23;
			 T1Y = T7 * Ti;
			 T1Z = Td * Tk;
			 T20 = T1Y - T1Z;
			 T2b = T1Y + T1Z;
		    }
	       }
	       {
		    E T1t, T3i, T2l, T3B, T1E, T3t, T2M, T3x, T1g, T3C, T2J, T3u, T1T, T3w, T2o;
		    E T3j, Tx, T3b, T2C, T3q, T27, T3m, T2s, T3c, TW, T3f, T2F, T3n, T2g, T3p;
		    E T2v, T3e;
		    {
			 E T1k, T1C, T1o, T1B, T1s, T1z, T1y, T2j, T1p, T2k;
			 {
			      E T1i, T1j, T1m, T1n;
			      T1i = Ip[WS(rs, 4)];
			      T1j = Im[WS(rs, 4)];
			      T1k = T1i - T1j;
			      T1C = T1i + T1j;
			      T1m = Rp[WS(rs, 4)];
			      T1n = Rm[WS(rs, 4)];
			      T1o = T1m + T1n;
			      T1B = T1m - T1n;
			 }
			 {
			      E T1q, T1r, T1w, T1x;
			      T1q = Ip[0];
			      T1r = Im[0];
			      T1s = T1q - T1r;
			      T1z = T1q + T1r;
			      T1w = Rm[0];
			      T1x = Rp[0];
			      T1y = T1w - T1x;
			      T2j = T1x + T1w;
			 }
			 T1p = FNMS(T1l, T1o, T1h * T1k);
			 T1t = T1p + T1s;
			 T3i = T1s - T1p;
			 T2k = FMA(T1h, T1o, T1l * T1k);
			 T2l = T2j + T2k;
			 T3B = T2j - T2k;
			 {
			      E T1A, T1D, T2K, T2L;
			      T1A = FNMS(T4, T1z, T1 * T1y);
			      T1D = FMA(Ti, T1B, Tk * T1C);
			      T1E = T1A - T1D;
			      T3t = T1D + T1A;
			      T2K = FNMS(Tk, T1B, Ti * T1C);
			      T2L = FMA(T4, T1y, T1 * T1z);
			      T2M = T2K + T2L;
			      T3x = T2L - T2K;
			 }
		    }
		    {
			 E T11, T1M, T15, T1I, T1a, T1R, T1e, T1P;
			 {
			      E TZ, T10, T13, T14;
			      TZ = Ip[WS(rs, 2)];
			      T10 = Im[WS(rs, 2)];
			      T11 = TZ - T10;
			      T1M = TZ + T10;
			      T13 = Rp[WS(rs, 2)];
			      T14 = Rm[WS(rs, 2)];
			      T15 = T13 + T14;
			      T1I = T13 - T14;
			 }
			 {
			      E T18, T19, T1c, T1d;
			      T18 = Ip[WS(rs, 6)];
			      T19 = Im[WS(rs, 6)];
			      T1a = T18 - T19;
			      T1R = T18 + T19;
			      T1c = Rp[WS(rs, 6)];
			      T1d = Rm[WS(rs, 6)];
			      T1e = T1c + T1d;
			      T1P = T1c - T1d;
			 }
			 {
			      E T16, T1f, T2H, T2I;
			      T16 = FNMS(T12, T15, TY * T11);
			      T1f = FNMS(T1b, T1e, T17 * T1a);
			      T1g = T16 + T1f;
			      T3C = T16 - T1f;
			      T2H = FNMS(T1L, T1I, T1H * T1M);
			      T2I = FNMS(T1Q, T1P, T1O * T1R);
			      T2J = T2H + T2I;
			      T3u = T2H - T2I;
			 }
			 {
			      E T1N, T1S, T2m, T2n;
			      T1N = FMA(T1H, T1I, T1L * T1M);
			      T1S = FMA(T1O, T1P, T1Q * T1R);
			      T1T = T1N + T1S;
			      T3w = T1S - T1N;
			      T2m = FMA(TY, T15, T12 * T11);
			      T2n = FMA(T17, T1e, T1b * T1a);
			      T2o = T2m + T2n;
			      T3j = T2m - T2n;
			 }
		    }
		    {
			 E Ta, T1W, Tg, T1V, Tp, T25, Tv, T21;
			 {
			      E T8, T9, Te, Tf;
			      T8 = Ip[WS(rs, 1)];
			      T9 = Im[WS(rs, 1)];
			      Ta = T8 - T9;
			      T1W = T8 + T9;
			      Te = Rp[WS(rs, 1)];
			      Tf = Rm[WS(rs, 1)];
			      Tg = Te + Tf;
			      T1V = Te - Tf;
			 }
			 {
			      E Tn, To, Tt, Tu;
			      Tn = Ip[WS(rs, 5)];
			      To = Im[WS(rs, 5)];
			      Tp = Tn - To;
			      T25 = Tn + To;
			      Tt = Rp[WS(rs, 5)];
			      Tu = Rm[WS(rs, 5)];
			      Tv = Tt + Tu;
			      T21 = Tt - Tu;
			 }
			 {
			      E Th, Tw, T2A, T2B;
			      Th = FNMS(Td, Tg, T7 * Ta);
			      Tw = FNMS(Ts, Tv, Tm * Tp);
			      Tx = Th + Tw;
			      T3b = Th - Tw;
			      T2A = FNMS(T5, T1V, T2 * T1W);
			      T2B = FNMS(T24, T21, T20 * T25);
			      T2C = T2A + T2B;
			      T3q = T2A - T2B;
			 }
			 {
			      E T1X, T26, T2q, T2r;
			      T1X = FMA(T2, T1V, T5 * T1W);
			      T26 = FMA(T20, T21, T24 * T25);
			      T27 = T1X + T26;
			      T3m = T26 - T1X;
			      T2q = FMA(T7, Tg, Td * Ta);
			      T2r = FMA(Tm, Tv, Ts * Tp);
			      T2s = T2q + T2r;
			      T3c = T2q - T2r;
			 }
		    }
		    {
			 E TD, T29, TH, T28, TO, T2e, TU, T2c;
			 {
			      E TB, TC, TF, TG;
			      TB = Ip[WS(rs, 7)];
			      TC = Im[WS(rs, 7)];
			      TD = TB - TC;
			      T29 = TB + TC;
			      TF = Rp[WS(rs, 7)];
			      TG = Rm[WS(rs, 7)];
			      TH = TF + TG;
			      T28 = TF - TG;
			 }
			 {
			      E TM, TN, TS, TT;
			      TM = Ip[WS(rs, 3)];
			      TN = Im[WS(rs, 3)];
			      TO = TM - TN;
			      T2e = TM + TN;
			      TS = Rp[WS(rs, 3)];
			      TT = Rm[WS(rs, 3)];
			      TU = TS + TT;
			      T2c = TS - TT;
			 }
			 {
			      E TI, TV, T2D, T2E;
			      TI = FNMS(TE, TH, TA * TD);
			      TV = FNMS(TR, TU, TL * TO);
			      TW = TI + TV;
			      T3f = TI - TV;
			      T2D = FNMS(Tz, T28, Ty * T29);
			      T2E = FNMS(T2d, T2c, T2b * T2e);
			      T2F = T2D + T2E;
			      T3n = T2D - T2E;
			 }
			 {
			      E T2a, T2f, T2t, T2u;
			      T2a = FMA(Ty, T28, Tz * T29);
			      T2f = FMA(T2b, T2c, T2d * T2e);
			      T2g = T2a + T2f;
			      T3p = T2f - T2a;
			      T2t = FMA(TA, TH, TE * TD);
			      T2u = FMA(TL, TU, TR * TO);
			      T2v = T2t + T2u;
			      T3e = T2t - T2u;
			 }
		    }
		    {
			 E T1v, T2z, T2O, T2Q, T2i, T2y, T2x, T2P;
			 {
			      E TX, T1u, T2G, T2N;
			      TX = Tx + TW;
			      T1u = T1g + T1t;
			      T1v = TX + T1u;
			      T2z = T1u - TX;
			      T2G = T2C + T2F;
			      T2N = T2J + T2M;
			      T2O = T2G - T2N;
			      T2Q = T2G + T2N;
			 }
			 {
			      E T1U, T2h, T2p, T2w;
			      T1U = T1E - T1T;
			      T2h = T27 + T2g;
			      T2i = T1U - T2h;
			      T2y = T2h + T1U;
			      T2p = T2l + T2o;
			      T2w = T2s + T2v;
			      T2x = T2p - T2w;
			      T2P = T2p + T2w;
			 }
			 Ip[0] = KP500000000 * (T1v + T2i);
			 Rp[0] = KP500000000 * (T2P + T2Q);
			 Im[WS(rs, 7)] = KP500000000 * (T2i - T1v);
			 Rm[WS(rs, 7)] = KP500000000 * (T2P - T2Q);
			 Rm[WS(rs, 3)] = KP500000000 * (T2x - T2y);
			 Im[WS(rs, 3)] = KP500000000 * (T2O - T2z);
			 Rp[WS(rs, 4)] = KP500000000 * (T2x + T2y);
			 Ip[WS(rs, 4)] = KP500000000 * (T2z + T2O);
		    }
		    {
			 E T2T, T35, T33, T39, T2W, T36, T2Z, T37;
			 {
			      E T2R, T2S, T31, T32;
			      T2R = T2v - T2s;
			      T2S = T1t - T1g;
			      T2T = KP500000000 * (T2R + T2S);
			      T35 = KP500000000 * (T2S - T2R);
			      T31 = T2l - T2o;
			      T32 = Tx - TW;
			      T33 = KP500000000 * (T31 - T32);
			      T39 = KP500000000 * (T31 + T32);
			 }
			 {
			      E T2U, T2V, T2X, T2Y;
			      T2U = T2F - T2C;
			      T2V = T27 - T2g;
			      T2W = T2U + T2V;
			      T36 = T2U - T2V;
			      T2X = T1T + T1E;
			      T2Y = T2M - T2J;
			      T2Z = T2X - T2Y;
			      T37 = T2X + T2Y;
			 }
			 {
			      E T30, T3a, T34, T38;
			      T30 = KP353553390 * (T2W + T2Z);
			      Ip[WS(rs, 2)] = T2T + T30;
			      Im[WS(rs, 5)] = T30 - T2T;
			      T3a = KP353553390 * (T36 + T37);
			      Rm[WS(rs, 5)] = T39 - T3a;
			      Rp[WS(rs, 2)] = T39 + T3a;
			      T34 = KP353553390 * (T2Z - T2W);
			      Rm[WS(rs, 1)] = T33 - T34;
			      Rp[WS(rs, 6)] = T33 + T34;
			      T38 = KP353553390 * (T36 - T37);
			      Ip[WS(rs, 6)] = T35 + T38;
			      Im[WS(rs, 1)] = T38 - T35;
			 }
		    }
		    {
			 E T3k, T3Q, T3Z, T3D, T3h, T40, T3X, T45, T3G, T3P, T3s, T3K, T3U, T44, T3z;
			 E T3L;
			 {
			      E T3d, T3g, T3o, T3r;
			      T3k = KP500000000 * (T3i - T3j);
			      T3Q = KP500000000 * (T3j + T3i);
			      T3Z = KP500000000 * (T3B - T3C);
			      T3D = KP500000000 * (T3B + T3C);
			      T3d = T3b - T3c;
			      T3g = T3e + T3f;
			      T3h = KP353553390 * (T3d + T3g);
			      T40 = KP353553390 * (T3d - T3g);
			      {
				   E T3V, T3W, T3E, T3F;
				   T3V = T3u + T3t;
				   T3W = T3x - T3w;
				   T3X = FNMS(KP461939766, T3W, KP191341716 * T3V);
				   T45 = FMA(KP461939766, T3V, KP191341716 * T3W);
				   T3E = T3c + T3b;
				   T3F = T3e - T3f;
				   T3G = KP353553390 * (T3E + T3F);
				   T3P = KP353553390 * (T3F - T3E);
			      }
			      T3o = T3m + T3n;
			      T3r = T3p - T3q;
			      T3s = FMA(KP191341716, T3o, KP461939766 * T3r);
			      T3K = FNMS(KP191341716, T3r, KP461939766 * T3o);
			      {
				   E T3S, T3T, T3v, T3y;
				   T3S = T3n - T3m;
				   T3T = T3q + T3p;
				   T3U = FMA(KP461939766, T3S, KP191341716 * T3T);
				   T44 = FNMS(KP461939766, T3T, KP191341716 * T3S);
				   T3v = T3t - T3u;
				   T3y = T3w + T3x;
				   T3z = FNMS(KP191341716, T3y, KP461939766 * T3v);
				   T3L = FMA(KP191341716, T3v, KP461939766 * T3y);
			      }
			 }
			 {
			      E T3l, T3A, T3N, T3O;
			      T3l = T3h + T3k;
			      T3A = T3s + T3z;
			      Ip[WS(rs, 1)] = T3l + T3A;
			      Im[WS(rs, 6)] = T3A - T3l;
			      T3N = T3D + T3G;
			      T3O = T3K + T3L;
			      Rm[WS(rs, 6)] = T3N - T3O;
			      Rp[WS(rs, 1)] = T3N + T3O;
			 }
			 {
			      E T3H, T3I, T3J, T3M;
			      T3H = T3D - T3G;
			      T3I = T3z - T3s;
			      Rm[WS(rs, 2)] = T3H - T3I;
			      Rp[WS(rs, 5)] = T3H + T3I;
			      T3J = T3k - T3h;
			      T3M = T3K - T3L;
			      Ip[WS(rs, 5)] = T3J + T3M;
			      Im[WS(rs, 2)] = T3M - T3J;
			 }
			 {
			      E T3R, T3Y, T47, T48;
			      T3R = T3P + T3Q;
			      T3Y = T3U + T3X;
			      Ip[WS(rs, 3)] = T3R + T3Y;
			      Im[WS(rs, 4)] = T3Y - T3R;
			      T47 = T3Z + T40;
			      T48 = T44 + T45;
			      Rm[WS(rs, 4)] = T47 - T48;
			      Rp[WS(rs, 3)] = T47 + T48;
			 }
			 {
			      E T41, T42, T43, T46;
			      T41 = T3Z - T40;
			      T42 = T3X - T3U;
			      Rm[0] = T41 - T42;
			      Rp[WS(rs, 7)] = T41 + T42;
			      T43 = T3Q - T3P;
			      T46 = T44 - T45;
			      Ip[WS(rs, 7)] = T43 + T46;
			      Im[0] = T46 - T43;
			 }
		    }
	       }
	  }
     }
}
static void r2cbIII_32(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
{
     DK(KP534511135, +0.534511135950791641089685961295362908582039528);
     DK(KP1_763842528, +1.763842528696710059425513727320776699016885241);
     DK(KP303346683, +0.303346683607342391675883946941299872384187453);
     DK(KP1_913880671, +1.913880671464417729871595773960539938965698411);
     DK(KP098491403, +0.098491403357164253077197521291327432293052451);
     DK(KP1_990369453, +1.990369453344393772489673906218959843150949737);
     DK(KP820678790, +0.820678790828660330972281985331011598767386482);
     DK(KP1_546020906, +1.546020906725473921621813219516939601942082586);
     DK(KP1_847759065, +1.847759065022573512256366378793576573644833252);
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
     DK(KP668178637, +0.668178637919298919997757686523080761552472251);
     DK(KP1_662939224, +1.662939224605090474157576755235811513477121624);
     DK(KP198912367, +0.198912367379658006911597622644676228597850501);
     DK(KP1_961570560, +1.961570560806460898252364472268478073947867462);
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
     DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
     DK(KP414213562, +0.414213562373095048801688724209698078569671875);
     {
	  INT i;
	  for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(128, rs), MAKE_VOLATILE_STRIDE(128, csr), MAKE_VOLATILE_STRIDE(128, csi)) {
	       E T1N, T1K, T1Q, T1H, T1O, T1P;
	       {
		    E T1I, T1e, T1Z, T7, T2E, T2i, T1x, Tz, Te, T2j, T22, T2F, T1h, T1y, TK;
		    E T1J, Tm, T2B, TX, Tp, T2m, T28, T1M, T1C, T1k, TW, TY, T2a, T14, T15;
		    E Ts, TZ;
		    {
			 E TE, T1g, TJ, T1f;
			 {
			      E T4, Tv, T3, T2g, T1d, T5, Tw, Tx;
			      {
				   E T1, T2, T1b, T1c;
				   T1 = Cr[0];
				   T2 = Cr[WS(csr, 15)];
				   T1b = Ci[0];
				   T1c = Ci[WS(csi, 15)];
				   T4 = Cr[WS(csr, 8)];
				   Tv = T1 - T2;
				   T3 = T1 + T2;
				   T2g = T1c - T1b;
				   T1d = T1b + T1c;
				   T5 = Cr[WS(csr, 7)];
				   Tw = Ci[WS(csi, 8)];
				   Tx = Ci[WS(csi, 7)];
			      }
			      {
				   E Tb, TA, Ta, T20, TD, Tc, TG, TH;
				   {
					E T8, T9, TB, TC;
					T8 = Cr[WS(csr, 4)];
					{
					     E T1a, T6, T2h, Ty;
					     T1a = T4 - T5;
					     T6 = T4 + T5;
					     T2h = Tx - Tw;
					     Ty = Tw + Tx;
					     T1I = T1a - T1d;
					     T1e = T1a + T1d;
					     T1Z = T3 - T6;
					     T7 = T3 + T6;
					     T2E = T2h + T2g;
					     T2i = T2g - T2h;
					     T1x = Tv + Ty;
					     Tz = Tv - Ty;
					     T9 = Cr[WS(csr, 11)];
					}
					TB = Ci[WS(csi, 4)];
					TC = Ci[WS(csi, 11)];
					Tb = Cr[WS(csr, 3)];
					TA = T8 - T9;
					Ta = T8 + T9;
					T20 = TC - TB;
					TD = TB + TC;
					Tc = Cr[WS(csr, 12)];
					TG = Ci[WS(csi, 3)];
					TH = Ci[WS(csi, 12)];
				   }
				   {
					E TF, Td, T21, TI;
					TE = TA - TD;
					T1g = TA + TD;
					TF = Tb - Tc;
					Td = Tb + Tc;
					T21 = TG - TH;
					TI = TG + TH;
					Te = Ta + Td;
					T2j = Ta - Td;
					T22 = T20 - T21;
					T2F = T20 + T21;
					TJ = TF - TI;
					T1f = TF + TI;
				   }
			      }
			 }
			 {
			      E TM, Ti, TN, T25, TU, TR, Tl, TO;
			      {
				   E TS, TT, Tg, Th, Tj, Tk;
				   Tg = Cr[WS(csr, 2)];
				   Th = Cr[WS(csr, 13)];
				   T1h = T1f - T1g;
				   T1y = T1g + T1f;
				   TK = TE + TJ;
				   T1J = TE - TJ;
				   TM = Tg - Th;
				   Ti = Tg + Th;
				   TS = Ci[WS(csi, 2)];
				   TT = Ci[WS(csi, 13)];
				   Tj = Cr[WS(csr, 10)];
				   Tk = Cr[WS(csr, 5)];
				   TN = Ci[WS(csi, 10)];
				   T25 = TS - TT;
				   TU = TS + TT;
				   TR = Tj - Tk;
				   Tl = Tj + Tk;
				   TO = Ci[WS(csi, 5)];
			      }
			      {
				   E T12, T13, Tq, Tr;
				   {
					E Tn, T1A, TV, T24, T26, TP, To, T27, T1B, TQ;
					Tn = Cr[WS(csr, 1)];
					T1A = TR - TU;
					TV = TR + TU;
					T24 = Ti - Tl;
					Tm = Ti + Tl;
					T26 = TN - TO;
					TP = TN + TO;
					To = Cr[WS(csr, 14)];
					T12 = Ci[WS(csi, 1)];
					T27 = T25 - T26;
					T2B = T26 + T25;
					T1B = TM + TP;
					TQ = TM - TP;
					TX = Tn - To;
					Tp = Tn + To;
					T2m = T24 + T27;
					T28 = T24 - T27;
					T1M = FNMS(KP414213562, T1A, T1B);
					T1C = FMA(KP414213562, T1B, T1A);
					T1k = FMA(KP414213562, TQ, TV);
					TW = FNMS(KP414213562, TV, TQ);
					T13 = Ci[WS(csi, 14)];
				   }
				   Tq = Cr[WS(csr, 6)];
				   Tr = Cr[WS(csr, 9)];
				   TY = Ci[WS(csi, 6)];
				   T2a = T13 - T12;
				   T14 = T12 + T13;
				   T15 = Tq - Tr;
				   Ts = Tq + Tr;
				   TZ = Ci[WS(csi, 9)];
			      }
			 }
		    }
		    {
			 E T1L, T1F, T23, T2n, T2k, T2e, T1p, T1t, T1s, T1i, T1o, T19, T1l, T1q;
			 {
			      E T2z, T2G, T2H, T2C, T1j, T17, T2r, T2s, T2u, T2v, T2K, T2D;
			      {
				   E T2L, T2d, T2l, T2O;
				   {
					E Tf, T2N, Tu, T2M;
					{
					     E T1D, T16, T29, Tt, T2b, T10;
					     T2z = T7 - Te;
					     Tf = T7 + Te;
					     T1D = T15 + T14;
					     T16 = T14 - T15;
					     T29 = Tp - Ts;
					     Tt = Tp + Ts;
					     T2b = TY - TZ;
					     T10 = TY + TZ;
					     T2N = T2F + T2E;
					     T2G = T2E - T2F;
					     T2H = Tm - Tt;
					     Tu = Tm + Tt;
					     {
						  E T2c, T2A, T1E, T11;
						  T2c = T2a - T2b;
						  T2A = T2b + T2a;
						  T1E = TX + T10;
						  T11 = TX - T10;
						  T2L = Tf - Tu;
						  T2d = T29 + T2c;
						  T2l = T29 - T2c;
						  T2C = T2A - T2B;
						  T2M = T2B + T2A;
						  T1L = FMA(KP414213562, T1D, T1E);
						  T1F = FNMS(KP414213562, T1E, T1D);
						  T1j = FMA(KP414213562, T11, T16);
						  T17 = FNMS(KP414213562, T16, T11);
						  T2O = T2M + T2N;
					     }
					}
					R0[0] = KP2_000000000 * (Tf + Tu);
					R0[WS(rs, 8)] = KP2_000000000 * (T2N - T2M);
				   }
				   T23 = T1Z + T22;
				   T2r = T1Z - T22;
				   R0[WS(rs, 12)] = KP1_414213562 * (T2O - T2L);
				   R0[WS(rs, 4)] = KP1_414213562 * (T2L + T2O);
				   T2s = T2m + T2l;
				   T2n = T2l - T2m;
				   T2k = T2i - T2j;
				   T2u = T2j + T2i;
				   T2v = T28 - T2d;
				   T2e = T28 + T2d;
			      }
			      {
				   E T2y, T2t, T2x, T2w;
				   T2y = FMA(KP707106781, T2s, T2r);
				   T2t = FNMS(KP707106781, T2s, T2r);
				   T2x = FMA(KP707106781, T2v, T2u);
				   T2w = FNMS(KP707106781, T2v, T2u);
				   R0[WS(rs, 7)] = KP1_961570560 * (FMA(KP198912367, T2y, T2x));
				   R0[WS(rs, 15)] = -(KP1_961570560 * (FNMS(KP198912367, T2x, T2y)));
				   R0[WS(rs, 11)] = KP1_662939224 * (FNMS(KP668178637, T2t, T2w));
				   R0[WS(rs, 3)] = KP1_662939224 * (FMA(KP668178637, T2w, T2t));
				   T2K = T2z - T2C;
				   T2D = T2z + T2C;
			      }
			      {
				   E TL, T18, T2J, T2I;
				   T1p = FNMS(KP707106781, TK, Tz);
				   TL = FMA(KP707106781, TK, Tz);
				   T18 = TW + T17;
				   T1t = TW - T17;
				   T1s = FMA(KP707106781, T1h, T1e);
				   T1i = FNMS(KP707106781, T1h, T1e);
				   T2J = T2H + T2G;
				   T2I = T2G - T2H;
				   T1o = FNMS(KP923879532, T18, TL);
				   T19 = FMA(KP923879532, T18, TL);
				   R0[WS(rs, 6)] = KP1_847759065 * (FMA(KP414213562, T2K, T2J));
				   R0[WS(rs, 14)] = -(KP1_847759065 * (FNMS(KP414213562, T2J, T2K)));
				   R0[WS(rs, 10)] = KP1_847759065 * (FNMS(KP414213562, T2D, T2I));
				   R0[WS(rs, 2)] = KP1_847759065 * (FMA(KP414213562, T2I, T2D));
				   T1l = T1j - T1k;
				   T1q = T1k + T1j;
			      }
			 }
			 {
			      E T1z, T1U, T1Y, T1T, T1V, T1G;
			      {
				   E T1w, T1r, T1n, T1m;
				   T1n = FMA(KP923879532, T1l, T1i);
				   T1m = FNMS(KP923879532, T1l, T1i);
				   T1w = FMA(KP923879532, T1q, T1p);
				   T1r = FNMS(KP923879532, T1q, T1p);
				   R1[WS(rs, 4)] = -(KP1_546020906 * (FNMS(KP820678790, T1o, T1n)));
				   R1[WS(rs, 12)] = -(KP1_546020906 * (FMA(KP820678790, T1n, T1o)));
				   R1[WS(rs, 8)] = -(KP1_990369453 * (FMA(KP098491403, T19, T1m)));
				   R1[0] = KP1_990369453 * (FNMS(KP098491403, T1m, T19));
				   {
					E T1R, T1S, T1v, T1u;
					T1z = FNMS(KP707106781, T1y, T1x);
					T1R = FMA(KP707106781, T1y, T1x);
					T1S = T1M + T1L;
					T1N = T1L - T1M;
					T1K = FNMS(KP707106781, T1J, T1I);
					T1U = FMA(KP707106781, T1J, T1I);
					T1v = FNMS(KP923879532, T1t, T1s);
					T1u = FMA(KP923879532, T1t, T1s);
					T1Y = FMA(KP923879532, T1S, T1R);
					T1T = FNMS(KP923879532, T1S, T1R);
					R1[WS(rs, 6)] = -(KP1_913880671 * (FNMS(KP303346683, T1w, T1v)));
					R1[WS(rs, 14)] = -(KP1_913880671 * (FMA(KP303346683, T1v, T1w)));
					R1[WS(rs, 10)] = -(KP1_763842528 * (FMA(KP534511135, T1r, T1u)));
					R1[WS(rs, 2)] = KP1_763842528 * (FNMS(KP534511135, T1u, T1r));
					T1V = T1C + T1F;
					T1G = T1C - T1F;
				   }
			      }
			      {
				   E T2q, T2f, T1X, T1W, T2p, T2o;
				   T1X = FMA(KP923879532, T1V, T1U);
				   T1W = FNMS(KP923879532, T1V, T1U);
				   T2q = FNMS(KP707106781, T2e, T23);
				   T2f = FMA(KP707106781, T2e, T23);
				   R1[WS(rs, 7)] = KP1_990369453 * (FMA(KP098491403, T1Y, T1X));
				   R1[WS(rs, 15)] = -(KP1_990369453 * (FNMS(KP098491403, T1X, T1Y)));
				   R1[WS(rs, 11)] = KP1_546020906 * (FNMS(KP820678790, T1T, T1W));
				   R1[WS(rs, 3)] = KP1_546020906 * (FMA(KP820678790, T1W, T1T));
				   T2p = FNMS(KP707106781, T2n, T2k);
				   T2o = FMA(KP707106781, T2n, T2k);
				   T1Q = FNMS(KP923879532, T1G, T1z);
				   T1H = FMA(KP923879532, T1G, T1z);
				   R0[WS(rs, 5)] = KP1_662939224 * (FMA(KP668178637, T2q, T2p));
				   R0[WS(rs, 13)] = -(KP1_662939224 * (FNMS(KP668178637, T2p, T2q)));
				   R0[WS(rs, 9)] = KP1_961570560 * (FNMS(KP198912367, T2f, T2o));
				   R0[WS(rs, 1)] = KP1_961570560 * (FMA(KP198912367, T2o, T2f));
			      }
			 }
		    }
	       }
	       T1O = FMA(KP923879532, T1N, T1K);
	       T1P = FNMS(KP923879532, T1N, T1K);
	       R1[WS(rs, 5)] = KP1_763842528 * (FMA(KP534511135, T1Q, T1P));
	       R1[WS(rs, 13)] = -(KP1_763842528 * (FNMS(KP534511135, T1P, T1Q)));
	       R1[WS(rs, 9)] = KP1_913880671 * (FNMS(KP303346683, T1H, T1O));
	       R1[WS(rs, 1)] = KP1_913880671 * (FMA(KP303346683, T1O, T1H));
	  }
     }
}
Ejemplo n.º 20
0
static void hc2cfdft_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
{
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
     {
	  INT m;
	  for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 6, MAKE_VOLATILE_STRIDE(16, rs)) {
	       E Tc, Tr, Tk, Tx, T9, Ts, Tp, Tw;
	       {
		    E Ta, Tb, Tj, Tf, Tg, Th, Te, Ti;
		    Ta = Ip[0];
		    Tb = Im[0];
		    Tj = Ta + Tb;
		    Tf = Rm[0];
		    Tg = Rp[0];
		    Th = Tf - Tg;
		    Tc = Ta - Tb;
		    Tr = Tg + Tf;
		    Te = W[0];
		    Ti = W[1];
		    Tk = FNMS(Ti, Tj, Te * Th);
		    Tx = FMA(Ti, Th, Te * Tj);
	       }
	       {
		    E T4, To, T8, Tm;
		    {
			 E T2, T3, T6, T7;
			 T2 = Ip[WS(rs, 1)];
			 T3 = Im[WS(rs, 1)];
			 T4 = T2 - T3;
			 To = T2 + T3;
			 T6 = Rp[WS(rs, 1)];
			 T7 = Rm[WS(rs, 1)];
			 T8 = T6 + T7;
			 Tm = T6 - T7;
		    }
		    {
			 E T1, T5, Tl, Tn;
			 T1 = W[2];
			 T5 = W[3];
			 T9 = FNMS(T5, T8, T1 * T4);
			 Ts = FMA(T1, T8, T5 * T4);
			 Tl = W[4];
			 Tn = W[5];
			 Tp = FMA(Tl, Tm, Tn * To);
			 Tw = FNMS(Tn, Tm, Tl * To);
		    }
	       }
	       {
		    E Td, Tq, Tz, TA;
		    Td = T9 + Tc;
		    Tq = Tk - Tp;
		    Ip[0] = KP500000000 * (Td + Tq);
		    Im[WS(rs, 1)] = KP500000000 * (Tq - Td);
		    Tz = Tr + Ts;
		    TA = Tw + Tx;
		    Rm[WS(rs, 1)] = KP500000000 * (Tz - TA);
		    Rp[0] = KP500000000 * (Tz + TA);
	       }
	       {
		    E Tt, Tu, Tv, Ty;
		    Tt = Tr - Ts;
		    Tu = Tp + Tk;
		    Rm[0] = KP500000000 * (Tt - Tu);
		    Rp[WS(rs, 1)] = KP500000000 * (Tt + Tu);
		    Tv = Tc - T9;
		    Ty = Tw - Tx;
		    Ip[WS(rs, 1)] = KP500000000 * (Tv + Ty);
		    Im[0] = KP500000000 * (Ty - Tv);
	       }
	  }
     }
}
Ejemplo n.º 21
0
static void hb2_16(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
{
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
     DK(KP414213562, +0.414213562373095048801688724209698078569671875);
     {
	  INT m;
	  for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(32, rs)) {
	       E Tv, Tw, T2z, T2C, TB, TF, Ty, Tz, T1V, TA, T2G, T3Q, T3C, T3g, T3L;
	       E T30, T3m, T3z, T3w, T3s, T1X, T1Y, T2u, T2c, T2p, TE, TG, T1G, T1o, T1D;
	       {
		    E T3f, T3l, T2F, T3r, T2Z, T3v, TD, Tx;
		    Tv = W[0];
		    Tw = W[2];
		    Tx = Tv * Tw;
		    T2z = W[6];
		    T3f = Tv * T2z;
		    T2C = W[7];
		    T3l = Tv * T2C;
		    TB = W[4];
		    T2F = Tv * TB;
		    T3r = Tw * TB;
		    TF = W[5];
		    T2Z = Tv * TF;
		    T3v = Tw * TF;
		    Ty = W[1];
		    Tz = W[3];
		    TD = Tv * Tz;
		    T1V = FMA(Ty, Tz, Tx);
		    TA = FNMS(Ty, Tz, Tx);
		    T2G = FNMS(Ty, TF, T2F);
		    T3Q = FMA(Tz, TB, T3v);
		    T3C = FNMS(Ty, TB, T2Z);
		    T3g = FMA(Ty, T2C, T3f);
		    T3L = FNMS(Tz, TF, T3r);
		    T30 = FMA(Ty, TB, T2Z);
		    T3m = FNMS(Ty, T2z, T3l);
		    T3z = FMA(Ty, TF, T2F);
		    T3w = FNMS(Tz, TB, T3v);
		    T3s = FMA(Tz, TF, T3r);
		    {
			 E T1W, T2b, TC, T1n;
			 T1W = T1V * TB;
			 T2b = T1V * TF;
			 T1X = FNMS(Ty, Tw, TD);
			 T1Y = FNMS(T1X, TF, T1W);
			 T2u = FNMS(T1X, TB, T2b);
			 T2c = FMA(T1X, TB, T2b);
			 T2p = FMA(T1X, TF, T1W);
			 TC = TA * TB;
			 T1n = TA * TF;
			 TE = FMA(Ty, Tw, TD);
			 TG = FNMS(TE, TF, TC);
			 T1G = FNMS(TE, TB, T1n);
			 T1o = FMA(TE, TB, T1n);
			 T1D = FMA(TE, TF, TC);
		    }
	       }
	       {
		    E TL, T1Z, T2d, T1t, T31, T34, T3n, T3D, T3E, T3R, T1w, T20, Tf, T3M, T2L;
		    E T3h, TW, T2e, T3G, T3H, T3N, T2Q, T36, T2V, T37, Tu, T3S, T18, T1z, T24;
		    E T2g, T27, T2h, T1j, T1y;
		    {
			 E T3, TH, TU, T2I, T1s, T32, T6, T1p, Ta, TM, TK, T33, TP, T2J, Td;
			 E TR;
			 {
			      E T1, T2, TS, TT;
			      T1 = cr[0];
			      T2 = ci[WS(rs, 7)];
			      T3 = T1 + T2;
			      TH = T1 - T2;
			      TS = ci[WS(rs, 9)];
			      TT = cr[WS(rs, 14)];
			      TU = TS + TT;
			      T2I = TS - TT;
			 }
			 {
			      E T1q, T1r, T4, T5;
			      T1q = ci[WS(rs, 15)];
			      T1r = cr[WS(rs, 8)];
			      T1s = T1q + T1r;
			      T32 = T1q - T1r;
			      T4 = cr[WS(rs, 4)];
			      T5 = ci[WS(rs, 3)];
			      T6 = T4 + T5;
			      T1p = T4 - T5;
			 }
			 {
			      E T8, T9, TI, TJ;
			      T8 = cr[WS(rs, 2)];
			      T9 = ci[WS(rs, 5)];
			      Ta = T8 + T9;
			      TM = T8 - T9;
			      TI = ci[WS(rs, 11)];
			      TJ = cr[WS(rs, 12)];
			      TK = TI + TJ;
			      T33 = TI - TJ;
			 }
			 {
			      E TN, TO, Tb, Tc;
			      TN = ci[WS(rs, 13)];
			      TO = cr[WS(rs, 10)];
			      TP = TN + TO;
			      T2J = TN - TO;
			      Tb = ci[WS(rs, 1)];
			      Tc = cr[WS(rs, 6)];
			      Td = Tb + Tc;
			      TR = Tb - Tc;
			 }
			 TL = TH - TK;
			 T1Z = TH + TK;
			 T2d = T1s - T1p;
			 T1t = T1p + T1s;
			 T31 = Ta - Td;
			 T34 = T32 - T33;
			 T3n = T34 - T31;
			 {
			      E T1u, T1v, T7, Te;
			      T3D = T32 + T33;
			      T3E = T2J + T2I;
			      T3R = T3D - T3E;
			      T1u = TM + TP;
			      T1v = TR + TU;
			      T1w = T1u - T1v;
			      T20 = T1u + T1v;
			      T7 = T3 + T6;
			      Te = Ta + Td;
			      Tf = T7 + Te;
			      T3M = T7 - Te;
			      {
				   E T2H, T2K, TQ, TV;
				   T2H = T3 - T6;
				   T2K = T2I - T2J;
				   T2L = T2H + T2K;
				   T3h = T2H - T2K;
				   TQ = TM - TP;
				   TV = TR - TU;
				   TW = TQ + TV;
				   T2e = TQ - TV;
			      }
			 }
		    }
		    {
			 E Ti, T1e, T1c, T2N, T1h, T2O, Tl, T19, Tp, T13, T11, T2S, T16, T2T, Ts;
			 E TY, T2M, T2P;
			 {
			      E Tg, Th, T1a, T1b;
			      Tg = cr[WS(rs, 1)];
			      Th = ci[WS(rs, 6)];
			      Ti = Tg + Th;
			      T1e = Tg - Th;
			      T1a = ci[WS(rs, 14)];
			      T1b = cr[WS(rs, 9)];
			      T1c = T1a + T1b;
			      T2N = T1a - T1b;
			 }
			 {
			      E T1f, T1g, Tj, Tk;
			      T1f = ci[WS(rs, 10)];
			      T1g = cr[WS(rs, 13)];
			      T1h = T1f + T1g;
			      T2O = T1f - T1g;
			      Tj = cr[WS(rs, 5)];
			      Tk = ci[WS(rs, 2)];
			      Tl = Tj + Tk;
			      T19 = Tj - Tk;
			 }
			 {
			      E Tn, To, TZ, T10;
			      Tn = ci[0];
			      To = cr[WS(rs, 7)];
			      Tp = Tn + To;
			      T13 = Tn - To;
			      TZ = ci[WS(rs, 8)];
			      T10 = cr[WS(rs, 15)];
			      T11 = TZ + T10;
			      T2S = TZ - T10;
			 }
			 {
			      E T14, T15, Tq, Tr;
			      T14 = ci[WS(rs, 12)];
			      T15 = cr[WS(rs, 11)];
			      T16 = T14 + T15;
			      T2T = T14 - T15;
			      Tq = cr[WS(rs, 3)];
			      Tr = ci[WS(rs, 4)];
			      Ts = Tq + Tr;
			      TY = Tq - Tr;
			 }
			 T3G = T2N + T2O;
			 T3H = T2S + T2T;
			 T3N = T3H - T3G;
			 T2M = Ti - Tl;
			 T2P = T2N - T2O;
			 T2Q = T2M - T2P;
			 T36 = T2M + T2P;
			 {
			      E T2R, T2U, Tm, Tt;
			      T2R = Tp - Ts;
			      T2U = T2S - T2T;
			      T2V = T2R + T2U;
			      T37 = T2U - T2R;
			      Tm = Ti + Tl;
			      Tt = Tp + Ts;
			      Tu = Tm + Tt;
			      T3S = Tm - Tt;
			 }
			 {
			      E T12, T17, T22, T23;
			      T12 = TY - T11;
			      T17 = T13 - T16;
			      T18 = FNMS(KP414213562, T17, T12);
			      T1z = FMA(KP414213562, T12, T17);
			      T22 = T1c - T19;
			      T23 = T1e + T1h;
			      T24 = FNMS(KP414213562, T23, T22);
			      T2g = FMA(KP414213562, T22, T23);
			 }
			 {
			      E T25, T26, T1d, T1i;
			      T25 = TY + T11;
			      T26 = T13 + T16;
			      T27 = FNMS(KP414213562, T26, T25);
			      T2h = FMA(KP414213562, T25, T26);
			      T1d = T19 + T1c;
			      T1i = T1e - T1h;
			      T1j = FMA(KP414213562, T1i, T1d);
			      T1y = FNMS(KP414213562, T1d, T1i);
			 }
		    }
		    cr[0] = Tf + Tu;
		    {
			 E T3B, T3K, T3F, T3I, T3J, T3A;
			 T3A = Tf - Tu;
			 T3B = T3z * T3A;
			 T3K = T3C * T3A;
			 T3F = T3D + T3E;
			 T3I = T3G + T3H;
			 T3J = T3F - T3I;
			 ci[0] = T3F + T3I;
			 ci[WS(rs, 8)] = FMA(T3z, T3J, T3K);
			 cr[WS(rs, 8)] = FNMS(T3C, T3J, T3B);
		    }
		    {
			 E T3O, T3P, T3T, T3U;
			 T3O = T3M - T3N;
			 T3P = T3L * T3O;
			 T3T = T3R - T3S;
			 T3U = T3L * T3T;
			 cr[WS(rs, 12)] = FNMS(T3Q, T3T, T3P);
			 ci[WS(rs, 12)] = FMA(T3Q, T3O, T3U);
		    }
		    {
			 E T3V, T3W, T3X, T3Y;
			 T3V = T3M + T3N;
			 T3W = TA * T3V;
			 T3X = T3S + T3R;
			 T3Y = TA * T3X;
			 cr[WS(rs, 4)] = FNMS(TE, T3X, T3W);
			 ci[WS(rs, 4)] = FMA(TE, T3V, T3Y);
		    }
		    {
			 E T3j, T3t, T3p, T3x, T3i, T3o;
			 T3i = T37 - T36;
			 T3j = FNMS(KP707106781, T3i, T3h);
			 T3t = FMA(KP707106781, T3i, T3h);
			 T3o = T2Q - T2V;
			 T3p = FNMS(KP707106781, T3o, T3n);
			 T3x = FMA(KP707106781, T3o, T3n);
			 {
			      E T3k, T3q, T3u, T3y;
			      T3k = T3g * T3j;
			      cr[WS(rs, 14)] = FNMS(T3m, T3p, T3k);
			      T3q = T3g * T3p;
			      ci[WS(rs, 14)] = FMA(T3m, T3j, T3q);
			      T3u = T3s * T3t;
			      cr[WS(rs, 6)] = FNMS(T3w, T3x, T3u);
			      T3y = T3s * T3x;
			      ci[WS(rs, 6)] = FMA(T3w, T3t, T3y);
			 }
		    }
		    {
			 E T2X, T3b, T39, T3d, T2W, T35, T38;
			 T2W = T2Q + T2V;
			 T2X = FNMS(KP707106781, T2W, T2L);
			 T3b = FMA(KP707106781, T2W, T2L);
			 T35 = T31 + T34;
			 T38 = T36 + T37;
			 T39 = FNMS(KP707106781, T38, T35);
			 T3d = FMA(KP707106781, T38, T35);
			 {
			      E T2Y, T3a, T3c, T3e;
			      T2Y = T2G * T2X;
			      cr[WS(rs, 10)] = FNMS(T30, T39, T2Y);
			      T3a = T30 * T2X;
			      ci[WS(rs, 10)] = FMA(T2G, T39, T3a);
			      T3c = T1V * T3b;
			      cr[WS(rs, 2)] = FNMS(T1X, T3d, T3c);
			      T3e = T1X * T3b;
			      ci[WS(rs, 2)] = FMA(T1V, T3d, T3e);
			 }
		    }
		    {
			 E T29, T2l, T2j, T2n;
			 {
			      E T21, T28, T2f, T2i;
			      T21 = FNMS(KP707106781, T20, T1Z);
			      T28 = T24 + T27;
			      T29 = FMA(KP923879532, T28, T21);
			      T2l = FNMS(KP923879532, T28, T21);
			      T2f = FMA(KP707106781, T2e, T2d);
			      T2i = T2g - T2h;
			      T2j = FNMS(KP923879532, T2i, T2f);
			      T2n = FMA(KP923879532, T2i, T2f);
			 }
			 {
			      E T2a, T2k, T2m, T2o;
			      T2a = T1Y * T29;
			      cr[WS(rs, 11)] = FNMS(T2c, T2j, T2a);
			      T2k = T2c * T29;
			      ci[WS(rs, 11)] = FMA(T1Y, T2j, T2k);
			      T2m = Tw * T2l;
			      cr[WS(rs, 3)] = FNMS(Tz, T2n, T2m);
			      T2o = Tz * T2l;
			      ci[WS(rs, 3)] = FMA(Tw, T2n, T2o);
			 }
		    }
		    {
			 E T1l, T1E, T1B, T1H;
			 {
			      E TX, T1k, T1x, T1A;
			      TX = FNMS(KP707106781, TW, TL);
			      T1k = T18 - T1j;
			      T1l = FNMS(KP923879532, T1k, TX);
			      T1E = FMA(KP923879532, T1k, TX);
			      T1x = FNMS(KP707106781, T1w, T1t);
			      T1A = T1y - T1z;
			      T1B = FNMS(KP923879532, T1A, T1x);
			      T1H = FMA(KP923879532, T1A, T1x);
			 }
			 {
			      E T1m, T1C, T1F, T1I;
			      T1m = TG * T1l;
			      cr[WS(rs, 13)] = FNMS(T1o, T1B, T1m);
			      T1C = T1o * T1l;
			      ci[WS(rs, 13)] = FMA(TG, T1B, T1C);
			      T1F = T1D * T1E;
			      cr[WS(rs, 5)] = FNMS(T1G, T1H, T1F);
			      T1I = T1G * T1E;
			      ci[WS(rs, 5)] = FMA(T1D, T1H, T1I);
			 }
		    }
		    {
			 E T2s, T2A, T2x, T2D;
			 {
			      E T2q, T2r, T2v, T2w;
			      T2q = FMA(KP707106781, T20, T1Z);
			      T2r = T2g + T2h;
			      T2s = FNMS(KP923879532, T2r, T2q);
			      T2A = FMA(KP923879532, T2r, T2q);
			      T2v = FNMS(KP707106781, T2e, T2d);
			      T2w = T27 - T24;
			      T2x = FMA(KP923879532, T2w, T2v);
			      T2D = FNMS(KP923879532, T2w, T2v);
			 }
			 {
			      E T2t, T2y, T2B, T2E;
			      T2t = T2p * T2s;
			      cr[WS(rs, 7)] = FNMS(T2u, T2x, T2t);
			      T2y = T2p * T2x;
			      ci[WS(rs, 7)] = FMA(T2u, T2s, T2y);
			      T2B = T2z * T2A;
			      cr[WS(rs, 15)] = FNMS(T2C, T2D, T2B);
			      T2E = T2z * T2D;
			      ci[WS(rs, 15)] = FMA(T2C, T2A, T2E);
			 }
		    }
		    {
			 E T1L, T1R, T1P, T1T;
			 {
			      E T1J, T1K, T1N, T1O;
			      T1J = FMA(KP707106781, TW, TL);
			      T1K = T1y + T1z;
			      T1L = FNMS(KP923879532, T1K, T1J);
			      T1R = FMA(KP923879532, T1K, T1J);
			      T1N = FMA(KP707106781, T1w, T1t);
			      T1O = T1j + T18;
			      T1P = FNMS(KP923879532, T1O, T1N);
			      T1T = FMA(KP923879532, T1O, T1N);
			 }
			 {
			      E T1M, T1Q, T1S, T1U;
			      T1M = TB * T1L;
			      cr[WS(rs, 9)] = FNMS(TF, T1P, T1M);
			      T1Q = TB * T1P;
			      ci[WS(rs, 9)] = FMA(TF, T1L, T1Q);
			      T1S = Tv * T1R;
			      cr[WS(rs, 1)] = FNMS(Ty, T1T, T1S);
			      T1U = Tv * T1T;
			      ci[WS(rs, 1)] = FMA(Ty, T1R, T1U);
			 }
		    }
	       }
	  }
     }
}
Ejemplo n.º 22
0
static void hc2cfdft_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
{
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
     {
	  INT m;
	  for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 6, MAKE_VOLATILE_STRIDE(16, rs)) {
	       E Td, Tu, Tr, T4, Tm, To, T9, T5, TA, Tp, Tv, TD, T6, Tq;
	       {
		    E Tk, Tl, Tf, TC, Tj, T7, T8, T1, Tn, Tb, Tc;
		    Tb = Ip[0];
		    Tc = Im[0];
		    {
			 E Ti, Tg, Th, T2, T3;
			 Tg = Rm[0];
			 Th = Rp[0];
			 Tk = W[1];
			 Tl = Tb + Tc;
			 Td = Tb - Tc;
			 Tu = Th + Tg;
			 Ti = Tg - Th;
			 Tf = W[0];
			 T2 = Ip[WS(rs, 1)];
			 T3 = Im[WS(rs, 1)];
			 TC = Tk * Ti;
			 Tj = Tf * Ti;
			 T7 = Rp[WS(rs, 1)];
			 Tr = T2 + T3;
			 T4 = T2 - T3;
			 T8 = Rm[WS(rs, 1)];
			 T1 = W[2];
			 Tn = W[4];
		    }
		    Tm = FNMS(Tk, Tl, Tj);
		    To = T7 - T8;
		    T9 = T7 + T8;
		    T5 = T1 * T4;
		    TA = Tn * Tr;
		    Tp = Tn * To;
		    Tv = T1 * T9;
		    TD = FMA(Tf, Tl, TC);
		    T6 = W[3];
		    Tq = W[5];
	       }
	       {
		    E Tw, Ta, TB, Ts;
		    Tw = FMA(T6, T4, Tv);
		    Ta = FNMS(T6, T9, T5);
		    TB = FNMS(Tq, To, TA);
		    Ts = FMA(Tq, Tr, Tp);
		    {
			 E TF, Tx, Te, Tz;
			 TF = Tu + Tw;
			 Tx = Tu - Tw;
			 Te = Ta + Td;
			 Tz = Td - Ta;
			 {
			      E TG, TE, Tt, Ty;
			      TG = TB + TD;
			      TE = TB - TD;
			      Tt = Tm - Ts;
			      Ty = Ts + Tm;
			      Im[0] = KP500000000 * (TE - Tz);
			      Ip[WS(rs, 1)] = KP500000000 * (Tz + TE);
			      Rp[0] = KP500000000 * (TF + TG);
			      Rm[WS(rs, 1)] = KP500000000 * (TF - TG);
			      Rp[WS(rs, 1)] = KP500000000 * (Tx + Ty);
			      Rm[0] = KP500000000 * (Tx - Ty);
			      Im[WS(rs, 1)] = KP500000000 * (Tt - Te);
			      Ip[0] = KP500000000 * (Te + Tt);
			 }
		    }
	       }
	  }
     }
}
Ejemplo n.º 23
0
static void r2hc_11(const R *I, R *ro, R *io, stride is, stride ros, stride ios, INT v, INT ivs, INT ovs)
{
     DK(KP959492973, +0.959492973614497389890368057066327699062454848);
     DK(KP876768831, +0.876768831002589333891339807079336796764054852);
     DK(KP918985947, +0.918985947228994779780736114132655398124909697);
     DK(KP989821441, +0.989821441880932732376092037776718787376519372);
     DK(KP778434453, +0.778434453334651800608337670740821884709317477);
     DK(KP830830026, +0.830830026003772851058548298459246407048009821);
     DK(KP715370323, +0.715370323453429719112414662767260662417897278);
     DK(KP634356270, +0.634356270682424498893150776899916060542806975);
     DK(KP342584725, +0.342584725681637509502641509861112333758894680);
     DK(KP521108558, +0.521108558113202722944698153526659300680427422);
     INT i;
     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(is), MAKE_VOLATILE_STRIDE(ros), MAKE_VOLATILE_STRIDE(ios)) {
	  E T1, Tg, TF, TB, TI, TL, Tz, TA;
	  {
	       E T4, TC, TE, T7, TD, Ta, TS, TG, TJ, Td, TP, TM, Ty, Tq, Th;
	       E Tt, Tl;
	       T1 = I[0];
	       {
		    E Tb, Tc, Tx, Tp;
		    {
			 E T2, T3, Te, Tf;
			 T2 = I[WS(is, 1)];
			 T3 = I[WS(is, 10)];
			 Te = I[WS(is, 5)];
			 Tf = I[WS(is, 6)];
			 {
			      E T5, T6, T8, T9;
			      T5 = I[WS(is, 2)];
			      T4 = T2 + T3;
			      TC = T3 - T2;
			      Tg = Te + Tf;
			      TE = Tf - Te;
			      T6 = I[WS(is, 9)];
			      T8 = I[WS(is, 3)];
			      T9 = I[WS(is, 8)];
			      Tb = I[WS(is, 4)];
			      T7 = T5 + T6;
			      TD = T5 - T6;
			      Ta = T8 + T9;
			      TF = T9 - T8;
			      Tc = I[WS(is, 7)];
			 }
		    }
		    TS = FMA(KP521108558, TC, TD);
		    TG = FMA(KP521108558, TF, TE);
		    TJ = FMA(KP521108558, TE, TC);
		    Td = Tb + Tc;
		    TB = Tb - Tc;
		    Tx = FNMS(KP342584725, Ta, T7);
		    Tp = FNMS(KP342584725, T4, Ta);
		    TP = FNMS(KP521108558, TB, TF);
		    TM = FNMS(KP521108558, TD, TB);
		    Ty = FNMS(KP634356270, Tx, Td);
		    Tq = FNMS(KP634356270, Tp, Tg);
		    Th = FNMS(KP342584725, Tg, Td);
		    Tt = FNMS(KP342584725, Td, T4);
		    Tl = FNMS(KP342584725, T7, Tg);
	       }
	       {
		    E Tu, Ts, TN, Tv;
		    {
			 E Tm, TU, Tj, Ti, TT;
			 TT = FMA(KP715370323, TS, TF);
			 Ti = FNMS(KP634356270, Th, Ta);
			 Tu = FNMS(KP634356270, Tt, T7);
			 Tm = FNMS(KP634356270, Tl, T4);
			 TU = FMA(KP830830026, TT, TB);
			 Tj = FNMS(KP778434453, Ti, T7);
			 {
			      E Tk, TR, To, Tn, TQ, Tr;
			      TQ = FMA(KP715370323, TP, TC);
			      Tn = FNMS(KP778434453, Tm, Ta);
			      io[WS(ios, 5)] = KP989821441 * (FMA(KP918985947, TU, TE));
			      Tk = FNMS(KP876768831, Tj, T4);
			      TR = FNMS(KP830830026, TQ, TE);
			      To = FNMS(KP876768831, Tn, Td);
			      Tr = FNMS(KP778434453, Tq, Td);
			      ro[WS(ros, 5)] = FNMS(KP959492973, Tk, T1);
			      io[WS(ios, 4)] = KP989821441 * (FNMS(KP918985947, TR, TD));
			      ro[WS(ros, 4)] = FNMS(KP959492973, To, T1);
			      Ts = FNMS(KP876768831, Tr, T7);
			 }
		    }
		    TN = FNMS(KP715370323, TM, TE);
		    Tv = FNMS(KP778434453, Tu, Tg);
		    ro[0] = T1 + T4 + T7 + Ta + Td + Tg;
		    ro[WS(ros, 3)] = FNMS(KP959492973, Ts, T1);
		    {
			 E TO, Tw, TH, TK;
			 TO = FNMS(KP830830026, TN, TF);
			 Tw = FNMS(KP876768831, Tv, Ta);
			 TH = FMA(KP715370323, TG, TD);
			 TK = FNMS(KP715370323, TJ, TB);
			 io[WS(ios, 3)] = KP989821441 * (FNMS(KP918985947, TO, TC));
			 ro[WS(ros, 2)] = FNMS(KP959492973, Tw, T1);
			 TI = FNMS(KP830830026, TH, TC);
			 TL = FMA(KP830830026, TK, TD);
			 Tz = FNMS(KP778434453, Ty, T4);
		    }
	       }
	  }
	  io[WS(ios, 2)] = KP989821441 * (FMA(KP918985947, TI, TB));
	  io[WS(ios, 1)] = KP989821441 * (FNMS(KP918985947, TL, TF));
	  TA = FNMS(KP876768831, Tz, Tg);
	  ro[WS(ros, 1)] = FNMS(KP959492973, TA, T1);
     }
}
Ejemplo n.º 24
0
static void hf_25(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
{
     DK(KP949179823, +0.949179823508441261575555465843363271711583843);
     DK(KP860541664, +0.860541664367944677098261680920518816412804187);
     DK(KP621716863, +0.621716863012209892444754556304102309693593202);
     DK(KP614372930, +0.614372930789563808870829930444362096004872855);
     DK(KP557913902, +0.557913902031834264187699648465567037992437152);
     DK(KP249506682, +0.249506682107067890488084201715862638334226305);
     DK(KP560319534, +0.560319534973832390111614715371676131169633784);
     DK(KP681693190, +0.681693190061530575150324149145440022633095390);
     DK(KP906616052, +0.906616052148196230441134447086066874408359177);
     DK(KP968479752, +0.968479752739016373193524836781420152702090879);
     DK(KP845997307, +0.845997307939530944175097360758058292389769300);
     DK(KP998026728, +0.998026728428271561952336806863450553336905220);
     DK(KP994076283, +0.994076283785401014123185814696322018529298887);
     DK(KP734762448, +0.734762448793050413546343770063151342619912334);
     DK(KP772036680, +0.772036680810363904029489473607579825330539880);
     DK(KP062914667, +0.062914667253649757225485955897349402364686947);
     DK(KP833417178, +0.833417178328688677408962550243238843138996060);
     DK(KP921177326, +0.921177326965143320250447435415066029359282231);
     DK(KP541454447, +0.541454447536312777046285590082819509052033189);
     DK(KP803003575, +0.803003575438660414833440593570376004635464850);
     DK(KP943557151, +0.943557151597354104399655195398983005179443399);
     DK(KP554608978, +0.554608978404018097464974850792216217022558774);
     DK(KP242145790, +0.242145790282157779872542093866183953459003101);
     DK(KP559154169, +0.559154169276087864842202529084232643714075927);
     DK(KP683113946, +0.683113946453479238701949862233725244439656928);
     DK(KP248028675, +0.248028675328619457762448260696444630363259177);
     DK(KP968583161, +0.968583161128631119490168375464735813836012403);
     DK(KP525970792, +0.525970792408939708442463226536226366643874659);
     DK(KP726211448, +0.726211448929902658173535992263577167607493062);
     DK(KP904730450, +0.904730450839922351881287709692877908104763647);
     DK(KP831864738, +0.831864738706457140726048799369896829771167132);
     DK(KP871714437, +0.871714437527667770979999223229522602943903653);
     DK(KP549754652, +0.549754652192770074288023275540779861653779767);
     DK(KP992114701, +0.992114701314477831049793042785778521453036709);
     DK(KP939062505, +0.939062505817492352556001843133229685779824606);
     DK(KP256756360, +0.256756360367726783319498520922669048172391148);
     DK(KP851038619, +0.851038619207379630836264138867114231259902550);
     DK(KP912575812, +0.912575812670962425556968549836277086778922727);
     DK(KP912018591, +0.912018591466481957908415381764119056233607330);
     DK(KP634619297, +0.634619297544148100711287640319130485732531031);
     DK(KP470564281, +0.470564281212251493087595091036643380879947982);
     DK(KP827271945, +0.827271945972475634034355757144307982555673741);
     DK(KP126329378, +0.126329378446108174786050455341811215027378105);
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
     DK(KP618033988, +0.618033988749894848204586834365638117720309180);
     {
	  INT m;
	  for (m = mb, W = W + ((mb - 1) * 48); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 48, MAKE_VOLATILE_STRIDE(50, rs)) {
	       E T7i, T6o, T6m, T7o, T7m, T7h, T6n, T6f, T7j, T7n;
	       {
		    E T6W, T5G, T3Y, T3M, T7q, T70, T6V, T7P, Tt, T3L, T5T, T45, T5Q, T4c, T3G;
		    E T2G, T5P, T49, T5S, T42, T65, T4H, T68, T4A, T2Z, T11, T67, T4x, T64, T4E;
		    E T5Y, T4W, T61, T4P, T3d, T1z, T60, T4M, T5X, T4T, T3g, T1G, T3q, T4q, T4j;
		    E T26, T3i, T1M, T3k, T1S;
		    {
			 E T3u, T2e, T3E, T44, T4b, T2E, T3w, T2k, T3y, T2q;
			 {
			      E T1, T6R, T3P, T7, T3W, Tq, T9, Tc, Tb, T3U, Tk, T3Q, Ta;
			      {
				   E T3, T6, T2, T5;
				   T1 = cr[0];
				   T6R = ci[0];
				   T3 = cr[WS(rs, 5)];
				   T6 = ci[WS(rs, 5)];
				   T2 = W[8];
				   T5 = W[9];
				   {
					E Tm, Tp, To, T3V, Tn, T3O, T4, Tl;
					Tm = cr[WS(rs, 15)];
					Tp = ci[WS(rs, 15)];
					T3O = T2 * T6;
					T4 = T2 * T3;
					Tl = W[28];
					To = W[29];
					T3P = FNMS(T5, T3, T3O);
					T7 = FMA(T5, T6, T4);
					T3V = Tl * Tp;
					Tn = Tl * Tm;
					{
					     E Tg, Tj, Tf, Ti, T3T, Th, T8;
					     Tg = cr[WS(rs, 10)];
					     Tj = ci[WS(rs, 10)];
					     T3W = FNMS(To, Tm, T3V);
					     Tq = FMA(To, Tp, Tn);
					     Tf = W[18];
					     Ti = W[19];
					     T9 = cr[WS(rs, 20)];
					     Tc = ci[WS(rs, 20)];
					     T3T = Tf * Tj;
					     Th = Tf * Tg;
					     T8 = W[38];
					     Tb = W[39];
					     T3U = FNMS(Ti, Tg, T3T);
					     Tk = FMA(Ti, Tj, Th);
					     T3Q = T8 * Tc;
					     Ta = T8 * T9;
					}
				   }
			      }
			      {
				   E T6T, T3X, T6Y, Tr, T3R, Td;
				   T6T = T3U + T3W;
				   T3X = T3U - T3W;
				   T6Y = Tk - Tq;
				   Tr = Tk + Tq;
				   T3R = FNMS(Tb, T9, T3Q);
				   Td = FMA(Tb, Tc, Ta);
				   {
					E T3S, T6Z, Te, T6U, T6S, Ts;
					T3S = T3P - T3R;
					T6S = T3P + T3R;
					T6Z = T7 - Td;
					Te = T7 + Td;
					T6W = T6S - T6T;
					T6U = T6S + T6T;
					T5G = FNMS(KP618033988, T3S, T3X);
					T3Y = FMA(KP618033988, T3X, T3S);
					T3M = Te - Tr;
					Ts = Te + Tr;
					T7q = FMA(KP618033988, T6Y, T6Z);
					T70 = FNMS(KP618033988, T6Z, T6Y);
					T6V = FNMS(KP250000000, T6U, T6R);
					T7P = T6U + T6R;
					Tt = T1 + Ts;
					T3L = FNMS(KP250000000, Ts, T1);
				   }
			      }
			 }
			 {
			      E T2g, T2j, T2m, T3v, T2h, T2p, T2l, T2i, T2o, T3x, T2n;
			      {
				   E T2a, T2d, T29, T2c;
				   T2a = cr[WS(rs, 3)];
				   T2d = ci[WS(rs, 3)];
				   T29 = W[4];
				   T2c = W[5];
				   {
					E T2t, T2w, T2z, T3A, T2u, T2C, T2y, T2v, T2B, T3t, T2b, T2s, T2f;
					T2t = cr[WS(rs, 13)];
					T2w = ci[WS(rs, 13)];
					T3t = T29 * T2d;
					T2b = T29 * T2a;
					T2s = W[24];
					T2z = cr[WS(rs, 18)];
					T3u = FNMS(T2c, T2a, T3t);
					T2e = FMA(T2c, T2d, T2b);
					T3A = T2s * T2w;
					T2u = T2s * T2t;
					T2C = ci[WS(rs, 18)];
					T2y = W[34];
					T2v = W[25];
					T2B = W[35];
					{
					     E T3B, T2x, T3D, T2D, T3C, T2A;
					     T2g = cr[WS(rs, 8)];
					     T3C = T2y * T2C;
					     T2A = T2y * T2z;
					     T3B = FNMS(T2v, T2t, T3A);
					     T2x = FMA(T2v, T2w, T2u);
					     T3D = FNMS(T2B, T2z, T3C);
					     T2D = FMA(T2B, T2C, T2A);
					     T2j = ci[WS(rs, 8)];
					     T2f = W[14];
					     T3E = T3B + T3D;
					     T44 = T3D - T3B;
					     T4b = T2x - T2D;
					     T2E = T2x + T2D;
					}
					T2m = cr[WS(rs, 23)];
					T3v = T2f * T2j;
					T2h = T2f * T2g;
					T2p = ci[WS(rs, 23)];
					T2l = W[44];
					T2i = W[15];
					T2o = W[45];
				   }
			      }
			      T3x = T2l * T2p;
			      T2n = T2l * T2m;
			      T3w = FNMS(T2i, T2g, T3v);
			      T2k = FMA(T2i, T2j, T2h);
			      T3y = FNMS(T2o, T2m, T3x);
			      T2q = FMA(T2o, T2p, T2n);
			 }
			 {
			      E T2N, Tz, T2X, T4G, T4z, TZ, T2P, TF, T2R, TL;
			      {
				   E TB, TE, TH, T2O, TC, TK, TG, TD, TJ, T2Q, TI;
				   {
					E Tv, Ty, Tu, Tx;
					{
					     E T48, T41, T47, T40, T43, T3z;
					     Tv = cr[WS(rs, 1)];
					     T43 = T3y - T3w;
					     T3z = T3w + T3y;
					     {
						  E T4a, T2r, T3F, T2F;
						  T4a = T2k - T2q;
						  T2r = T2k + T2q;
						  T5T = FNMS(KP618033988, T43, T44);
						  T45 = FMA(KP618033988, T44, T43);
						  T3F = T3z + T3E;
						  T48 = T3E - T3z;
						  T5Q = FNMS(KP618033988, T4a, T4b);
						  T4c = FMA(KP618033988, T4b, T4a);
						  T2F = T2r + T2E;
						  T41 = T2E - T2r;
						  T3G = T3u + T3F;
						  T47 = FNMS(KP250000000, T3F, T3u);
						  T2G = T2e + T2F;
						  T40 = FNMS(KP250000000, T2F, T2e);
						  Ty = ci[WS(rs, 1)];
					     }
					     T5P = FMA(KP559016994, T48, T47);
					     T49 = FNMS(KP559016994, T48, T47);
					     T5S = FMA(KP559016994, T41, T40);
					     T42 = FNMS(KP559016994, T41, T40);
					     Tu = W[0];
					}
					Tx = W[1];
					{
					     E TO, TR, TU, T2T, TP, TX, TT, TQ, TW, T2M, Tw, TN, TA;
					     TO = cr[WS(rs, 11)];
					     TR = ci[WS(rs, 11)];
					     T2M = Tu * Ty;
					     Tw = Tu * Tv;
					     TN = W[20];
					     TU = cr[WS(rs, 16)];
					     T2N = FNMS(Tx, Tv, T2M);
					     Tz = FMA(Tx, Ty, Tw);
					     T2T = TN * TR;
					     TP = TN * TO;
					     TX = ci[WS(rs, 16)];
					     TT = W[30];
					     TQ = W[21];
					     TW = W[31];
					     {
						  E T2U, TS, T2W, TY, T2V, TV;
						  TB = cr[WS(rs, 6)];
						  T2V = TT * TX;
						  TV = TT * TU;
						  T2U = FNMS(TQ, TO, T2T);
						  TS = FMA(TQ, TR, TP);
						  T2W = FNMS(TW, TU, T2V);
						  TY = FMA(TW, TX, TV);
						  TE = ci[WS(rs, 6)];
						  TA = W[10];
						  T2X = T2U + T2W;
						  T4G = T2W - T2U;
						  T4z = TY - TS;
						  TZ = TS + TY;
					     }
					     TH = cr[WS(rs, 21)];
					     T2O = TA * TE;
					     TC = TA * TB;
					     TK = ci[WS(rs, 21)];
					     TG = W[40];
					     TD = W[11];
					     TJ = W[41];
					}
				   }
				   T2Q = TG * TK;
				   TI = TG * TH;
				   T2P = FNMS(TD, TB, T2O);
				   TF = FMA(TD, TE, TC);
				   T2R = FNMS(TJ, TH, T2Q);
				   TL = FMA(TJ, TK, TI);
			      }
			      {
				   E T31, T17, T3b, T4V, T4O, T1x, T33, T1d, T35, T1j;
				   {
					E T19, T1c, T1f, T32, T1a, T1i, T1e, T1b, T1h, T34, T1g;
					{
					     E T13, T16, T12, T15;
					     {
						  E T4w, T4D, T4v, T4C, T4F, T2S;
						  T13 = cr[WS(rs, 4)];
						  T4F = T2P - T2R;
						  T2S = T2P + T2R;
						  {
						       E T4y, TM, T2Y, T10;
						       T4y = TL - TF;
						       TM = TF + TL;
						       T65 = FMA(KP618033988, T4F, T4G);
						       T4H = FNMS(KP618033988, T4G, T4F);
						       T2Y = T2S + T2X;
						       T4w = T2S - T2X;
						       T68 = FNMS(KP618033988, T4y, T4z);
						       T4A = FMA(KP618033988, T4z, T4y);
						       T10 = TM + TZ;
						       T4D = TM - TZ;
						       T2Z = T2N + T2Y;
						       T4v = FNMS(KP250000000, T2Y, T2N);
						       T11 = Tz + T10;
						       T4C = FNMS(KP250000000, T10, Tz);
						       T16 = ci[WS(rs, 4)];
						  }
						  T67 = FNMS(KP559016994, T4w, T4v);
						  T4x = FMA(KP559016994, T4w, T4v);
						  T64 = FNMS(KP559016994, T4D, T4C);
						  T4E = FMA(KP559016994, T4D, T4C);
						  T12 = W[6];
					     }
					     T15 = W[7];
					     {
						  E T1m, T1p, T1s, T37, T1n, T1v, T1r, T1o, T1u, T30, T14, T1l, T18;
						  T1m = cr[WS(rs, 14)];
						  T1p = ci[WS(rs, 14)];
						  T30 = T12 * T16;
						  T14 = T12 * T13;
						  T1l = W[26];
						  T1s = cr[WS(rs, 19)];
						  T31 = FNMS(T15, T13, T30);
						  T17 = FMA(T15, T16, T14);
						  T37 = T1l * T1p;
						  T1n = T1l * T1m;
						  T1v = ci[WS(rs, 19)];
						  T1r = W[36];
						  T1o = W[27];
						  T1u = W[37];
						  {
						       E T38, T1q, T3a, T1w, T39, T1t;
						       T19 = cr[WS(rs, 9)];
						       T39 = T1r * T1v;
						       T1t = T1r * T1s;
						       T38 = FNMS(T1o, T1m, T37);
						       T1q = FMA(T1o, T1p, T1n);
						       T3a = FNMS(T1u, T1s, T39);
						       T1w = FMA(T1u, T1v, T1t);
						       T1c = ci[WS(rs, 9)];
						       T18 = W[16];
						       T3b = T38 + T3a;
						       T4V = T3a - T38;
						       T4O = T1w - T1q;
						       T1x = T1q + T1w;
						  }
						  T1f = cr[WS(rs, 24)];
						  T32 = T18 * T1c;
						  T1a = T18 * T19;
						  T1i = ci[WS(rs, 24)];
						  T1e = W[46];
						  T1b = W[17];
						  T1h = W[47];
					     }
					}
					T34 = T1e * T1i;
					T1g = T1e * T1f;
					T33 = FNMS(T1b, T19, T32);
					T1d = FMA(T1b, T1c, T1a);
					T35 = FNMS(T1h, T1f, T34);
					T1j = FMA(T1h, T1i, T1g);
				   }
				   {
					E T1I, T1L, T1O, T3h, T1J, T1R, T1N, T1K, T1Q, T3j, T1P;
					{
					     E T1C, T1F, T1B, T1E;
					     {
						  E T4L, T4S, T4K, T4R, T4U, T36;
						  T1C = cr[WS(rs, 2)];
						  T4U = T35 - T33;
						  T36 = T33 + T35;
						  {
						       E T4N, T1k, T3c, T1y;
						       T4N = T1j - T1d;
						       T1k = T1d + T1j;
						       T5Y = FNMS(KP618033988, T4U, T4V);
						       T4W = FMA(KP618033988, T4V, T4U);
						       T3c = T36 + T3b;
						       T4L = T3b - T36;
						       T61 = FNMS(KP618033988, T4N, T4O);
						       T4P = FMA(KP618033988, T4O, T4N);
						       T1y = T1k + T1x;
						       T4S = T1k - T1x;
						       T3d = T31 + T3c;
						       T4K = FNMS(KP250000000, T3c, T31);
						       T1z = T17 + T1y;
						       T4R = FNMS(KP250000000, T1y, T17);
						       T1F = ci[WS(rs, 2)];
						  }
						  T60 = FMA(KP559016994, T4L, T4K);
						  T4M = FNMS(KP559016994, T4L, T4K);
						  T5X = FNMS(KP559016994, T4S, T4R);
						  T4T = FMA(KP559016994, T4S, T4R);
						  T1B = W[2];
					     }
					     T1E = W[3];
					     {
						  E T1V, T1Y, T21, T3m, T1W, T24, T20, T1X, T23, T3f, T1D, T1U, T1H;
						  T1V = cr[WS(rs, 12)];
						  T1Y = ci[WS(rs, 12)];
						  T3f = T1B * T1F;
						  T1D = T1B * T1C;
						  T1U = W[22];
						  T21 = cr[WS(rs, 17)];
						  T3g = FNMS(T1E, T1C, T3f);
						  T1G = FMA(T1E, T1F, T1D);
						  T3m = T1U * T1Y;
						  T1W = T1U * T1V;
						  T24 = ci[WS(rs, 17)];
						  T20 = W[32];
						  T1X = W[23];
						  T23 = W[33];
						  {
						       E T3n, T1Z, T3p, T25, T3o, T22;
						       T1I = cr[WS(rs, 7)];
						       T3o = T20 * T24;
						       T22 = T20 * T21;
						       T3n = FNMS(T1X, T1V, T3m);
						       T1Z = FMA(T1X, T1Y, T1W);
						       T3p = FNMS(T23, T21, T3o);
						       T25 = FMA(T23, T24, T22);
						       T1L = ci[WS(rs, 7)];
						       T1H = W[12];
						       T3q = T3n + T3p;
						       T4q = T3n - T3p;
						       T4j = T25 - T1Z;
						       T26 = T1Z + T25;
						  }
						  T1O = cr[WS(rs, 22)];
						  T3h = T1H * T1L;
						  T1J = T1H * T1I;
						  T1R = ci[WS(rs, 22)];
						  T1N = W[42];
						  T1K = W[13];
						  T1Q = W[43];
					     }
					}
					T3j = T1N * T1R;
					T1P = T1N * T1O;
					T3i = FNMS(T1K, T1I, T3h);
					T1M = FMA(T1K, T1L, T1J);
					T3k = FNMS(T1Q, T1O, T3j);
					T1S = FMA(T1Q, T1R, T1P);
				   }
			      }
			 }
		    }
		    {
			 E T7Q, T5M, T5J, T7R, T5I, T5L, T7X, T7W, T5F, T6X, T5u, T7M, T7O, T5C, T5E;
			 E T5t, T7J, T7N;
			 {
			      E T4r, T4k, T4h, T4o, T3K, T3I, T1A, T2H, T28;
			      {
				   E T3e, T4g, T4n, T4f, T4m, T3H, T4p, T3l;
				   T7Q = T2Z + T3d;
				   T3e = T2Z - T3d;
				   T4p = T3k - T3i;
				   T3l = T3i + T3k;
				   {
					E T4i, T1T, T3r, T27, T3s;
					T4i = T1S - T1M;
					T1T = T1M + T1S;
					T5M = FMA(KP618033988, T4p, T4q);
					T4r = FNMS(KP618033988, T4q, T4p);
					T3r = T3l + T3q;
					T4g = T3q - T3l;
					T5J = FNMS(KP618033988, T4i, T4j);
					T4k = FMA(KP618033988, T4j, T4i);
					T27 = T1T + T26;
					T4n = T26 - T1T;
					T3s = T3g + T3r;
					T4f = FNMS(KP250000000, T3r, T3g);
					T28 = T1G + T27;
					T4m = FNMS(KP250000000, T27, T1G);
					T3H = T3s - T3G;
					T7R = T3s + T3G;
				   }
				   T5I = FMA(KP559016994, T4g, T4f);
				   T4h = FNMS(KP559016994, T4g, T4f);
				   T5L = FMA(KP559016994, T4n, T4m);
				   T4o = FNMS(KP559016994, T4n, T4m);
				   T3K = FNMS(KP618033988, T3e, T3H);
				   T3I = FMA(KP618033988, T3H, T3e);
			      }
			      T1A = T11 + T1z;
			      T7X = T1z - T11;
			      T7W = T28 - T2G;
			      T2H = T28 + T2G;
			      {
				   E T3Z, T5d, T7r, T7D, T5h, T5i, T5m, T5l, T59, T7K, T56, T7L, T7I, T7G, T52;
				   E T50, T5w, T5g, T5q, T5A, T3N, T7p;
				   T3N = FMA(KP559016994, T3M, T3L);
				   T5F = FNMS(KP559016994, T3M, T3L);
				   T6X = FNMS(KP559016994, T6W, T6V);
				   T7p = FMA(KP559016994, T6W, T6V);
				   {
					E T5o, T5p, T57, T4e, T4Y, T55, T4l, T4s, T4B, T5f, T5e, T4I;
					{
					     E T46, T2K, T2J, T4d, T2I;
					     T46 = FMA(KP951056516, T45, T42);
					     T5o = FNMS(KP951056516, T45, T42);
					     T2I = T1A + T2H;
					     T2K = T1A - T2H;
					     T3Z = FNMS(KP951056516, T3Y, T3N);
					     T5d = FMA(KP951056516, T3Y, T3N);
					     T7r = FNMS(KP951056516, T7q, T7p);
					     T7D = FMA(KP951056516, T7q, T7p);
					     cr[0] = Tt + T2I;
					     T2J = FNMS(KP250000000, T2I, Tt);
					     T5p = FNMS(KP951056516, T4c, T49);
					     T4d = FMA(KP951056516, T4c, T49);
					     {
						  E T4Q, T4X, T2L, T3J;
						  T4Q = FNMS(KP951056516, T4P, T4M);
						  T5h = FMA(KP951056516, T4P, T4M);
						  T5i = FNMS(KP951056516, T4W, T4T);
						  T4X = FMA(KP951056516, T4W, T4T);
						  T2L = FMA(KP559016994, T2K, T2J);
						  T3J = FNMS(KP559016994, T2K, T2J);
						  T57 = FMA(KP126329378, T46, T4d);
						  T4e = FNMS(KP126329378, T4d, T46);
						  cr[WS(rs, 5)] = FMA(KP951056516, T3I, T2L);
						  ci[WS(rs, 4)] = FNMS(KP951056516, T3I, T2L);
						  ci[WS(rs, 9)] = FMA(KP951056516, T3K, T3J);
						  cr[WS(rs, 10)] = FNMS(KP951056516, T3K, T3J);
						  T4Y = FMA(KP827271945, T4X, T4Q);
						  T55 = FNMS(KP827271945, T4Q, T4X);
					     }
					}
					T4l = FNMS(KP951056516, T4k, T4h);
					T5m = FMA(KP951056516, T4k, T4h);
					T5l = FNMS(KP951056516, T4r, T4o);
					T4s = FMA(KP951056516, T4r, T4o);
					T4B = FNMS(KP951056516, T4A, T4x);
					T5f = FMA(KP951056516, T4A, T4x);
					T5e = FMA(KP951056516, T4H, T4E);
					T4I = FNMS(KP951056516, T4H, T4E);
					{
					     E T4u, T4Z, T4t, T58;
					     T4t = FNMS(KP470564281, T4s, T4l);
					     T58 = FMA(KP470564281, T4l, T4s);
					     {
						  E T4J, T54, T7E, T7F;
						  T4J = FMA(KP634619297, T4I, T4B);
						  T54 = FNMS(KP634619297, T4B, T4I);
						  T59 = FNMS(KP912018591, T58, T57);
						  T7E = FMA(KP912018591, T58, T57);
						  T7K = FMA(KP912018591, T4t, T4e);
						  T4u = FNMS(KP912018591, T4t, T4e);
						  T56 = FMA(KP912575812, T55, T54);
						  T7F = FNMS(KP912575812, T55, T54);
						  T7L = FMA(KP912575812, T4Y, T4J);
						  T4Z = FNMS(KP912575812, T4Y, T4J);
						  T7I = FNMS(KP851038619, T7F, T7E);
						  T7G = FMA(KP851038619, T7F, T7E);
					     }
					     T52 = FMA(KP851038619, T4Z, T4u);
					     T50 = FNMS(KP851038619, T4Z, T4u);
					}
					T5w = FNMS(KP256756360, T5e, T5f);
					T5g = FMA(KP256756360, T5f, T5e);
					T5q = FMA(KP939062505, T5p, T5o);
					T5A = FNMS(KP939062505, T5o, T5p);
				   }
				   {
					E T5y, T7z, T5B, T7y, T7w, T7u, T5s;
					{
					     E T5k, T5r, T5j, T5x;
					     cr[WS(rs, 4)] = FNMS(KP992114701, T50, T3Z);
					     T5j = FMA(KP634619297, T5i, T5h);
					     T5x = FNMS(KP634619297, T5h, T5i);
					     {
						  E T5n, T5z, T7s, T7t;
						  T5n = FMA(KP549754652, T5m, T5l);
						  T5z = FNMS(KP549754652, T5l, T5m);
						  T5y = FMA(KP871714437, T5x, T5w);
						  T7s = FNMS(KP871714437, T5x, T5w);
						  T7z = FNMS(KP871714437, T5j, T5g);
						  T5k = FMA(KP871714437, T5j, T5g);
						  T5B = FNMS(KP831864738, T5A, T5z);
						  T7t = FMA(KP831864738, T5A, T5z);
						  T7y = FNMS(KP831864738, T5q, T5n);
						  T5r = FMA(KP831864738, T5q, T5n);
						  T7w = FNMS(KP904730450, T7t, T7s);
						  T7u = FMA(KP904730450, T7t, T7s);
					     }
					     ci[WS(rs, 20)] = FNMS(KP992114701, T7G, T7D);
					     T5u = FNMS(KP904730450, T5r, T5k);
					     T5s = FMA(KP904730450, T5r, T5k);
					}
					{
					     E T5a, T5c, T7A, T7C, T7v, T53, T5b, T51, T7H, T7x, T7B;
					     T5a = FNMS(KP726211448, T59, T56);
					     T5c = FMA(KP525970792, T56, T59);
					     ci[WS(rs, 23)] = FMA(KP968583161, T7u, T7r);
					     cr[WS(rs, 1)] = FMA(KP968583161, T5s, T5d);
					     T51 = FMA(KP248028675, T50, T3Z);
					     T7A = FNMS(KP683113946, T7z, T7y);
					     T7C = FMA(KP559154169, T7y, T7z);
					     T7v = FNMS(KP242145790, T7u, T7r);
					     T53 = FMA(KP554608978, T52, T51);
					     T5b = FNMS(KP554608978, T52, T51);
					     T7M = FNMS(KP525970792, T7L, T7K);
					     T7O = FMA(KP726211448, T7K, T7L);
					     ci[WS(rs, 10)] = FNMS(KP943557151, T5c, T5b);
					     ci[WS(rs, 5)] = FMA(KP943557151, T5c, T5b);
					     ci[0] = FMA(KP803003575, T5a, T53);
					     cr[WS(rs, 9)] = FNMS(KP803003575, T5a, T53);
					     T7x = FNMS(KP541454447, T7w, T7v);
					     T7B = FMA(KP541454447, T7w, T7v);
					     T7H = FMA(KP248028675, T7G, T7D);
					     cr[WS(rs, 21)] = -(FMA(KP921177326, T7C, T7B));
					     ci[WS(rs, 18)] = FNMS(KP921177326, T7C, T7B);
					     ci[WS(rs, 13)] = FMA(KP833417178, T7A, T7x);
					     cr[WS(rs, 16)] = FMS(KP833417178, T7A, T7x);
					     T5C = FMA(KP559154169, T5B, T5y);
					     T5E = FNMS(KP683113946, T5y, T5B);
					     T5t = FNMS(KP242145790, T5s, T5d);
					     T7J = FNMS(KP554608978, T7I, T7H);
					     T7N = FMA(KP554608978, T7I, T7H);
					}
				   }
			      }
			 }
			 {
			      E T7Y, T80, T5v, T5D;
			      cr[WS(rs, 24)] = -(FMA(KP803003575, T7O, T7N));
			      ci[WS(rs, 15)] = FNMS(KP803003575, T7O, T7N);
			      cr[WS(rs, 19)] = FMS(KP943557151, T7M, T7J);
			      cr[WS(rs, 14)] = -(FMA(KP943557151, T7M, T7J));
			      T5v = FMA(KP541454447, T5u, T5t);
			      T5D = FNMS(KP541454447, T5u, T5t);
			      cr[WS(rs, 11)] = FNMS(KP833417178, T5E, T5D);
			      ci[WS(rs, 8)] = FMA(KP833417178, T5E, T5D);
			      cr[WS(rs, 6)] = FMA(KP921177326, T5C, T5v);
			      ci[WS(rs, 3)] = FNMS(KP921177326, T5C, T5v);
			      T7Y = FMA(KP618033988, T7X, T7W);
			      T80 = FNMS(KP618033988, T7W, T7X);
			      {
				   E T6t, T6p, T5H, T7d, T71, T6u, T6y, T6x, T6l, T7k, T6i, T7l, T7g, T6c, T6e;
				   E T6s, T6L, T6J, T6C;
				   {
					E T6A, T6B, T5O, T6j, T6h, T6a, T6q, T5R, T5U, T6r, T5Z, T62;
					{
					     E T5K, T7U, T7T, T5N, T7S;
					     T6t = FNMS(KP951056516, T5J, T5I);
					     T5K = FMA(KP951056516, T5J, T5I);
					     T7U = T7Q - T7R;
					     T7S = T7Q + T7R;
					     T6p = FNMS(KP951056516, T5G, T5F);
					     T5H = FMA(KP951056516, T5G, T5F);
					     T7d = FNMS(KP951056516, T70, T6X);
					     T71 = FMA(KP951056516, T70, T6X);
					     ci[WS(rs, 24)] = T7S + T7P;
					     T7T = FNMS(KP250000000, T7S, T7P);
					     T5N = FMA(KP951056516, T5M, T5L);
					     T6u = FNMS(KP951056516, T5M, T5L);
					     {
						  E T66, T69, T7Z, T7V;
						  T6A = FMA(KP951056516, T65, T64);
						  T66 = FNMS(KP951056516, T65, T64);
						  T69 = FMA(KP951056516, T68, T67);
						  T6B = FNMS(KP951056516, T68, T67);
						  T7Z = FMA(KP559016994, T7U, T7T);
						  T7V = FNMS(KP559016994, T7U, T7T);
						  T5O = FMA(KP062914667, T5N, T5K);
						  T6j = FNMS(KP062914667, T5K, T5N);
						  ci[WS(rs, 14)] = FMA(KP951056516, T7Y, T7V);
						  cr[WS(rs, 15)] = FMS(KP951056516, T7Y, T7V);
						  ci[WS(rs, 19)] = FMA(KP951056516, T80, T7Z);
						  cr[WS(rs, 20)] = FMS(KP951056516, T80, T7Z);
						  T6h = FNMS(KP939062505, T66, T69);
						  T6a = FMA(KP939062505, T69, T66);
					     }
					}
					T6q = FMA(KP951056516, T5Q, T5P);
					T5R = FNMS(KP951056516, T5Q, T5P);
					T5U = FNMS(KP951056516, T5T, T5S);
					T6r = FMA(KP951056516, T5T, T5S);
					T6y = FMA(KP951056516, T5Y, T5X);
					T5Z = FNMS(KP951056516, T5Y, T5X);
					T62 = FMA(KP951056516, T61, T60);
					T6x = FNMS(KP951056516, T61, T60);
					{
					     E T5W, T6b, T6k, T5V;
					     T6k = FMA(KP827271945, T5R, T5U);
					     T5V = FNMS(KP827271945, T5U, T5R);
					     {
						  E T6g, T63, T7e, T7f;
						  T6g = FMA(KP126329378, T5Z, T62);
						  T63 = FNMS(KP126329378, T62, T5Z);
						  T7e = FMA(KP772036680, T6k, T6j);
						  T6l = FNMS(KP772036680, T6k, T6j);
						  T5W = FMA(KP772036680, T5V, T5O);
						  T7k = FNMS(KP772036680, T5V, T5O);
						  T7f = FNMS(KP734762448, T6h, T6g);
						  T6i = FMA(KP734762448, T6h, T6g);
						  T6b = FNMS(KP734762448, T6a, T63);
						  T7l = FMA(KP734762448, T6a, T63);
						  T7g = FMA(KP994076283, T7f, T7e);
						  T7i = FNMS(KP994076283, T7f, T7e);
					     }
					     T6c = FNMS(KP994076283, T6b, T5W);
					     T6e = FMA(KP994076283, T6b, T5W);
					}
					T6s = FMA(KP062914667, T6r, T6q);
					T6L = FNMS(KP062914667, T6q, T6r);
					T6J = FNMS(KP549754652, T6A, T6B);
					T6C = FMA(KP549754652, T6B, T6A);
				   }
				   {
					E T6N, T78, T6K, T79, T74, T76, T6E, T6G;
					{
					     E T6w, T6D, T6M, T6v;
					     cr[WS(rs, 3)] = FMA(KP998026728, T6c, T5H);
					     T6M = FNMS(KP634619297, T6t, T6u);
					     T6v = FMA(KP634619297, T6u, T6t);
					     {
						  E T6I, T6z, T72, T73;
						  T6I = FMA(KP470564281, T6x, T6y);
						  T6z = FNMS(KP470564281, T6y, T6x);
						  T72 = FMA(KP845997307, T6M, T6L);
						  T6N = FNMS(KP845997307, T6M, T6L);
						  T6w = FMA(KP845997307, T6v, T6s);
						  T78 = FNMS(KP845997307, T6v, T6s);
						  T73 = FNMS(KP968479752, T6J, T6I);
						  T6K = FMA(KP968479752, T6J, T6I);
						  T6D = FMA(KP968479752, T6C, T6z);
						  T79 = FNMS(KP968479752, T6C, T6z);
						  T74 = FMA(KP906616052, T73, T72);
						  T76 = FNMS(KP906616052, T73, T72);
					     }
					     ci[WS(rs, 21)] = FNMS(KP998026728, T7g, T7d);
					     T6E = FMA(KP906616052, T6D, T6w);
					     T6G = FNMS(KP906616052, T6D, T6w);
					}
					{
					     E T7c, T7a, T6Q, T6O, T6F, T7b, T77, T75, T6d, T6P, T6H;
					     T7c = FMA(KP681693190, T78, T79);
					     T7a = FNMS(KP560319534, T79, T78);
					     ci[WS(rs, 22)] = FNMS(KP998026728, T74, T71);
					     cr[WS(rs, 2)] = FMA(KP998026728, T6E, T6p);
					     T75 = FMA(KP249506682, T74, T71);
					     T6Q = FNMS(KP560319534, T6K, T6N);
					     T6O = FMA(KP681693190, T6N, T6K);
					     T6F = FNMS(KP249506682, T6E, T6p);
					     T7b = FMA(KP557913902, T76, T75);
					     T77 = FNMS(KP557913902, T76, T75);
					     T6o = FMA(KP614372930, T6i, T6l);
					     T6m = FNMS(KP621716863, T6l, T6i);
					     cr[WS(rs, 22)] = FMS(KP860541664, T7c, T7b);
					     ci[WS(rs, 17)] = FMA(KP860541664, T7c, T7b);
					     ci[WS(rs, 12)] = FNMS(KP949179823, T7a, T77);
					     cr[WS(rs, 17)] = -(FMA(KP949179823, T7a, T77));
					     T6P = FMA(KP557913902, T6G, T6F);
					     T6H = FNMS(KP557913902, T6G, T6F);
					     T6d = FNMS(KP249506682, T6c, T5H);
					     ci[WS(rs, 7)] = FMA(KP949179823, T6Q, T6P);
					     cr[WS(rs, 12)] = FNMS(KP949179823, T6Q, T6P);
					     cr[WS(rs, 7)] = FMA(KP860541664, T6O, T6H);
					     ci[WS(rs, 2)] = FNMS(KP860541664, T6O, T6H);
					     T7o = FMA(KP621716863, T7k, T7l);
					     T7m = FNMS(KP614372930, T7l, T7k);
					     T7h = FMA(KP249506682, T7g, T7d);
					     T6n = FMA(KP557913902, T6e, T6d);
					     T6f = FNMS(KP557913902, T6e, T6d);
					}
				   }
			      }
			 }
		    }
	       }
	       ci[WS(rs, 6)] = FNMS(KP949179823, T6o, T6n);
	       ci[WS(rs, 11)] = FMA(KP949179823, T6o, T6n);
	       cr[WS(rs, 8)] = FMA(KP943557151, T6m, T6f);
	       ci[WS(rs, 1)] = FNMS(KP943557151, T6m, T6f);
	       T7j = FNMS(KP557913902, T7i, T7h);
	       T7n = FMA(KP557913902, T7i, T7h);
	       cr[WS(rs, 23)] = -(FMA(KP943557151, T7o, T7n));
	       ci[WS(rs, 16)] = FNMS(KP943557151, T7o, T7n);
	       cr[WS(rs, 18)] = FMS(KP949179823, T7m, T7j);
	       cr[WS(rs, 13)] = -(FMA(KP949179823, T7m, T7j));
	  }
     }
}
Ejemplo n.º 25
0
static void q1_3(float *rio, float *iio, const float *W, stride rs, stride vs, INT mb, INT me, INT ms)
{
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
     INT m;
     for (m = mb, W = W + (mb * 4); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 4, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(vs)) {
	  E Tk, Tn, Tm, To, Tl;
	  {
	       E T1, Td, T4, Tg, Tp, T9, Te, T6, Tf, TB, TE, Ts, TZ, Tu, Tx;
	       E TC, TN, TO, TD, TV, T10, TP, Tq, Tr;
	       {
		    E T2, T3, T7, T8;
		    T1 = rio[0];
		    T2 = rio[WS(rs, 1)];
		    T3 = rio[WS(rs, 2)];
		    Td = iio[0];
		    T7 = iio[WS(rs, 1)];
		    T8 = iio[WS(rs, 2)];
		    T4 = T2 + T3;
		    Tg = T3 - T2;
		    Tp = rio[WS(vs, 1)];
		    T9 = T7 - T8;
		    Te = T7 + T8;
		    T6 = FNMS(KP500000000, T4, T1);
		    Tq = rio[WS(vs, 1) + WS(rs, 1)];
		    Tr = rio[WS(vs, 1) + WS(rs, 2)];
		    Tf = FNMS(KP500000000, Te, Td);
	       }
	       {
		    E Tv, Tw, TT, TU;
		    TB = iio[WS(vs, 1)];
		    Tv = iio[WS(vs, 1) + WS(rs, 1)];
		    TE = Tr - Tq;
		    Ts = Tq + Tr;
		    Tw = iio[WS(vs, 1) + WS(rs, 2)];
		    TZ = iio[WS(vs, 2)];
		    TT = iio[WS(vs, 2) + WS(rs, 1)];
		    Tu = FNMS(KP500000000, Ts, Tp);
		    Tx = Tv - Tw;
		    TC = Tv + Tw;
		    TU = iio[WS(vs, 2) + WS(rs, 2)];
		    TN = rio[WS(vs, 2)];
		    TO = rio[WS(vs, 2) + WS(rs, 1)];
		    TD = FNMS(KP500000000, TC, TB);
		    TV = TT - TU;
		    T10 = TT + TU;
		    TP = rio[WS(vs, 2) + WS(rs, 2)];
	       }
	       {
		    E T11, T12, TS, TQ;
		    rio[0] = T1 + T4;
		    iio[0] = Td + Te;
		    T11 = FNMS(KP500000000, T10, TZ);
		    T12 = TP - TO;
		    TQ = TO + TP;
		    rio[WS(rs, 1)] = Tp + Ts;
		    iio[WS(rs, 1)] = TB + TC;
		    iio[WS(rs, 2)] = TZ + T10;
		    TS = FNMS(KP500000000, TQ, TN);
		    rio[WS(rs, 2)] = TN + TQ;
		    {
			 E TW, T13, Ty, TI, TL, TF, TH, TK;
			 {
			      E Ta, Th, T5, Tc;
			      Tk = FNMS(KP866025403, T9, T6);
			      Ta = FMA(KP866025403, T9, T6);
			      Th = FMA(KP866025403, Tg, Tf);
			      Tn = FNMS(KP866025403, Tg, Tf);
			      T5 = W[0];
			      Tc = W[1];
			      {
				   E T16, T19, T18, T1a, T17, Ti, Tb, T15;
				   TW = FMA(KP866025403, TV, TS);
				   T16 = FNMS(KP866025403, TV, TS);
				   T19 = FNMS(KP866025403, T12, T11);
				   T13 = FMA(KP866025403, T12, T11);
				   Ti = T5 * Th;
				   Tb = T5 * Ta;
				   T15 = W[2];
				   T18 = W[3];
				   iio[WS(vs, 1)] = FNMS(Tc, Ta, Ti);
				   rio[WS(vs, 1)] = FMA(Tc, Th, Tb);
				   T1a = T15 * T19;
				   T17 = T15 * T16;
				   Ty = FMA(KP866025403, Tx, Tu);
				   TI = FNMS(KP866025403, Tx, Tu);
				   TL = FNMS(KP866025403, TE, TD);
				   TF = FMA(KP866025403, TE, TD);
				   iio[WS(vs, 2) + WS(rs, 2)] = FNMS(T18, T16, T1a);
				   rio[WS(vs, 2) + WS(rs, 2)] = FMA(T18, T19, T17);
				   TH = W[2];
				   TK = W[3];
			      }
			 }
			 {
			      E TA, TG, Tz, TM, TJ, Tt;
			      TM = TH * TL;
			      TJ = TH * TI;
			      Tt = W[0];
			      TA = W[1];
			      iio[WS(vs, 2) + WS(rs, 1)] = FNMS(TK, TI, TM);
			      rio[WS(vs, 2) + WS(rs, 1)] = FMA(TK, TL, TJ);
			      TG = Tt * TF;
			      Tz = Tt * Ty;
			      {
				   E TR, TY, T14, TX, Tj;
				   iio[WS(vs, 1) + WS(rs, 1)] = FNMS(TA, Ty, TG);
				   rio[WS(vs, 1) + WS(rs, 1)] = FMA(TA, TF, Tz);
				   TR = W[0];
				   TY = W[1];
				   T14 = TR * T13;
				   TX = TR * TW;
				   Tj = W[2];
				   Tm = W[3];
				   iio[WS(vs, 1) + WS(rs, 2)] = FNMS(TY, TW, T14);
				   rio[WS(vs, 1) + WS(rs, 2)] = FMA(TY, T13, TX);
				   To = Tj * Tn;
				   Tl = Tj * Tk;
			      }
			 }
		    }
	       }
	  }
	  iio[WS(vs, 2)] = FNMS(Tm, Tk, To);
	  rio[WS(vs, 2)] = FMA(Tm, Tn, Tl);
     }
}
Ejemplo n.º 26
0
static void hf_25(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
{
     DK(KP998026728, +0.998026728428271561952336806863450553336905220);
     DK(KP062790519, +0.062790519529313376076178224565631133122484832);
     DK(KP684547105, +0.684547105928688673732283357621209269889519233);
     DK(KP728968627, +0.728968627421411523146730319055259111372571664);
     DK(KP481753674, +0.481753674101715274987191502872129653528542010);
     DK(KP876306680, +0.876306680043863587308115903922062583399064238);
     DK(KP248689887, +0.248689887164854788242283746006447968417567406);
     DK(KP968583161, +0.968583161128631119490168375464735813836012403);
     DK(KP992114701, +0.992114701314477831049793042785778521453036709);
     DK(KP125333233, +0.125333233564304245373118759816508793942918247);
     DK(KP425779291, +0.425779291565072648862502445744251703979973042);
     DK(KP904827052, +0.904827052466019527713668647932697593970413911);
     DK(KP637423989, +0.637423989748689710176712811676016195434917298);
     DK(KP770513242, +0.770513242775789230803009636396177847271667672);
     DK(KP844327925, +0.844327925502015078548558063966681505381659241);
     DK(KP535826794, +0.535826794978996618271308767867639978063575346);
     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
     {
	  INT m;
	  for (m = mb, W = W + ((mb - 1) * 48); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 48, MAKE_VOLATILE_STRIDE(50, rs)) {
	       E T1, T6b, T2l, T6g, To, T2m, T6e, T6f, T6a, T6H, T2u, T4I, T2i, T60, T3S;
	       E T5D, T4r, T58, T3Z, T5C, T4q, T5b, TS, T5W, T2G, T5s, T4g, T4M, T2R, T5t;
	       E T4h, T4P, T1l, T5X, T37, T5v, T4k, T4T, T3e, T5w, T4j, T4W, T1P, T5Z, T3v;
	       E T5A, T4o, T54, T3C, T5z, T4n, T51;
	       {
		    E T6, T2o, Tb, T2p, Tc, T6c, Th, T2r, Tm, T2s, Tn, T6d;
		    T1 = cr[0];
		    T6b = ci[0];
		    {
			 E T3, T5, T2, T4;
			 T3 = cr[WS(rs, 5)];
			 T5 = ci[WS(rs, 5)];
			 T2 = W[8];
			 T4 = W[9];
			 T6 = FMA(T2, T3, T4 * T5);
			 T2o = FNMS(T4, T3, T2 * T5);
		    }
		    {
			 E T8, Ta, T7, T9;
			 T8 = cr[WS(rs, 20)];
			 Ta = ci[WS(rs, 20)];
			 T7 = W[38];
			 T9 = W[39];
			 Tb = FMA(T7, T8, T9 * Ta);
			 T2p = FNMS(T9, T8, T7 * Ta);
		    }
		    Tc = T6 + Tb;
		    T6c = T2o + T2p;
		    {
			 E Te, Tg, Td, Tf;
			 Te = cr[WS(rs, 10)];
			 Tg = ci[WS(rs, 10)];
			 Td = W[18];
			 Tf = W[19];
			 Th = FMA(Td, Te, Tf * Tg);
			 T2r = FNMS(Tf, Te, Td * Tg);
		    }
		    {
			 E Tj, Tl, Ti, Tk;
			 Tj = cr[WS(rs, 15)];
			 Tl = ci[WS(rs, 15)];
			 Ti = W[28];
			 Tk = W[29];
			 Tm = FMA(Ti, Tj, Tk * Tl);
			 T2s = FNMS(Tk, Tj, Ti * Tl);
		    }
		    Tn = Th + Tm;
		    T6d = T2r + T2s;
		    T2l = KP559016994 * (Tc - Tn);
		    T6g = KP559016994 * (T6c - T6d);
		    To = Tc + Tn;
		    T2m = FNMS(KP250000000, To, T1);
		    T6e = T6c + T6d;
		    T6f = FNMS(KP250000000, T6e, T6b);
		    {
			 E T68, T69, T2q, T2t;
			 T68 = Th - Tm;
			 T69 = T6 - Tb;
			 T6a = FNMS(KP587785252, T69, KP951056516 * T68);
			 T6H = FMA(KP951056516, T69, KP587785252 * T68);
			 T2q = T2o - T2p;
			 T2t = T2r - T2s;
			 T2u = FMA(KP951056516, T2q, KP587785252 * T2t);
			 T4I = FNMS(KP587785252, T2q, KP951056516 * T2t);
		    }
	       }
	       {
		    E T1U, T3O, T3E, T3F, T3X, T3W, T3J, T3M, T3P, T25, T2g, T2h;
		    {
			 E T1R, T1T, T1Q, T1S;
			 T1R = cr[WS(rs, 3)];
			 T1T = ci[WS(rs, 3)];
			 T1Q = W[4];
			 T1S = W[5];
			 T1U = FMA(T1Q, T1R, T1S * T1T);
			 T3O = FNMS(T1S, T1R, T1Q * T1T);
		    }
		    {
			 E T1Z, T3H, T2f, T3L, T24, T3I, T2a, T3K;
			 {
			      E T1W, T1Y, T1V, T1X;
			      T1W = cr[WS(rs, 8)];
			      T1Y = ci[WS(rs, 8)];
			      T1V = W[14];
			      T1X = W[15];
			      T1Z = FMA(T1V, T1W, T1X * T1Y);
			      T3H = FNMS(T1X, T1W, T1V * T1Y);
			 }
			 {
			      E T2c, T2e, T2b, T2d;
			      T2c = cr[WS(rs, 18)];
			      T2e = ci[WS(rs, 18)];
			      T2b = W[34];
			      T2d = W[35];
			      T2f = FMA(T2b, T2c, T2d * T2e);
			      T3L = FNMS(T2d, T2c, T2b * T2e);
			 }
			 {
			      E T21, T23, T20, T22;
			      T21 = cr[WS(rs, 23)];
			      T23 = ci[WS(rs, 23)];
			      T20 = W[44];
			      T22 = W[45];
			      T24 = FMA(T20, T21, T22 * T23);
			      T3I = FNMS(T22, T21, T20 * T23);
			 }
			 {
			      E T27, T29, T26, T28;
			      T27 = cr[WS(rs, 13)];
			      T29 = ci[WS(rs, 13)];
			      T26 = W[24];
			      T28 = W[25];
			      T2a = FMA(T26, T27, T28 * T29);
			      T3K = FNMS(T28, T27, T26 * T29);
			 }
			 T3E = T1Z - T24;
			 T3F = T2a - T2f;
			 T3X = T3K - T3L;
			 T3W = T3H - T3I;
			 T3J = T3H + T3I;
			 T3M = T3K + T3L;
			 T3P = T3J + T3M;
			 T25 = T1Z + T24;
			 T2g = T2a + T2f;
			 T2h = T25 + T2g;
		    }
		    T2i = T1U + T2h;
		    T60 = T3O + T3P;
		    {
			 E T3G, T57, T3R, T56, T3N, T3Q;
			 T3G = FMA(KP951056516, T3E, KP587785252 * T3F);
			 T57 = FNMS(KP587785252, T3E, KP951056516 * T3F);
			 T3N = KP559016994 * (T3J - T3M);
			 T3Q = FNMS(KP250000000, T3P, T3O);
			 T3R = T3N + T3Q;
			 T56 = T3Q - T3N;
			 T3S = T3G + T3R;
			 T5D = T57 + T56;
			 T4r = T3R - T3G;
			 T58 = T56 - T57;
		    }
		    {
			 E T3Y, T5a, T3V, T59, T3T, T3U;
			 T3Y = FMA(KP951056516, T3W, KP587785252 * T3X);
			 T5a = FNMS(KP587785252, T3W, KP951056516 * T3X);
			 T3T = KP559016994 * (T25 - T2g);
			 T3U = FNMS(KP250000000, T2h, T1U);
			 T3V = T3T + T3U;
			 T59 = T3U - T3T;
			 T3Z = T3V - T3Y;
			 T5C = T59 - T5a;
			 T4q = T3V + T3Y;
			 T5b = T59 + T5a;
		    }
	       }
	       {
		    E Tu, T2N, T2B, T2E, T2I, T2H, T2K, T2L, T2O, TF, TQ, TR;
		    {
			 E Tr, Tt, Tq, Ts;
			 Tr = cr[WS(rs, 1)];
			 Tt = ci[WS(rs, 1)];
			 Tq = W[0];
			 Ts = W[1];
			 Tu = FMA(Tq, Tr, Ts * Tt);
			 T2N = FNMS(Ts, Tr, Tq * Tt);
		    }
		    {
			 E Tz, T2z, TP, T2D, TE, T2A, TK, T2C;
			 {
			      E Tw, Ty, Tv, Tx;
			      Tw = cr[WS(rs, 6)];
			      Ty = ci[WS(rs, 6)];
			      Tv = W[10];
			      Tx = W[11];
			      Tz = FMA(Tv, Tw, Tx * Ty);
			      T2z = FNMS(Tx, Tw, Tv * Ty);
			 }
			 {
			      E TM, TO, TL, TN;
			      TM = cr[WS(rs, 16)];
			      TO = ci[WS(rs, 16)];
			      TL = W[30];
			      TN = W[31];
			      TP = FMA(TL, TM, TN * TO);
			      T2D = FNMS(TN, TM, TL * TO);
			 }
			 {
			      E TB, TD, TA, TC;
			      TB = cr[WS(rs, 21)];
			      TD = ci[WS(rs, 21)];
			      TA = W[40];
			      TC = W[41];
			      TE = FMA(TA, TB, TC * TD);
			      T2A = FNMS(TC, TB, TA * TD);
			 }
			 {
			      E TH, TJ, TG, TI;
			      TH = cr[WS(rs, 11)];
			      TJ = ci[WS(rs, 11)];
			      TG = W[20];
			      TI = W[21];
			      TK = FMA(TG, TH, TI * TJ);
			      T2C = FNMS(TI, TH, TG * TJ);
			 }
			 T2B = T2z - T2A;
			 T2E = T2C - T2D;
			 T2I = TK - TP;
			 T2H = Tz - TE;
			 T2K = T2z + T2A;
			 T2L = T2C + T2D;
			 T2O = T2K + T2L;
			 TF = Tz + TE;
			 TQ = TK + TP;
			 TR = TF + TQ;
		    }
		    TS = Tu + TR;
		    T5W = T2N + T2O;
		    {
			 E T2F, T4L, T2y, T4K, T2w, T2x;
			 T2F = FMA(KP951056516, T2B, KP587785252 * T2E);
			 T4L = FNMS(KP587785252, T2B, KP951056516 * T2E);
			 T2w = KP559016994 * (TF - TQ);
			 T2x = FNMS(KP250000000, TR, Tu);
			 T2y = T2w + T2x;
			 T4K = T2x - T2w;
			 T2G = T2y - T2F;
			 T5s = T4K - T4L;
			 T4g = T2y + T2F;
			 T4M = T4K + T4L;
		    }
		    {
			 E T2J, T4O, T2Q, T4N, T2M, T2P;
			 T2J = FMA(KP951056516, T2H, KP587785252 * T2I);
			 T4O = FNMS(KP587785252, T2H, KP951056516 * T2I);
			 T2M = KP559016994 * (T2K - T2L);
			 T2P = FNMS(KP250000000, T2O, T2N);
			 T2Q = T2M + T2P;
			 T4N = T2P - T2M;
			 T2R = T2J + T2Q;
			 T5t = T4O + T4N;
			 T4h = T2Q - T2J;
			 T4P = T4N - T4O;
		    }
	       }
	       {
		    E TX, T33, T2T, T2U, T3c, T3b, T2Y, T31, T34, T18, T1j, T1k;
		    {
			 E TU, TW, TT, TV;
			 TU = cr[WS(rs, 4)];
			 TW = ci[WS(rs, 4)];
			 TT = W[6];
			 TV = W[7];
			 TX = FMA(TT, TU, TV * TW);
			 T33 = FNMS(TV, TU, TT * TW);
		    }
		    {
			 E T12, T2W, T1i, T30, T17, T2X, T1d, T2Z;
			 {
			      E TZ, T11, TY, T10;
			      TZ = cr[WS(rs, 9)];
			      T11 = ci[WS(rs, 9)];
			      TY = W[16];
			      T10 = W[17];
			      T12 = FMA(TY, TZ, T10 * T11);
			      T2W = FNMS(T10, TZ, TY * T11);
			 }
			 {
			      E T1f, T1h, T1e, T1g;
			      T1f = cr[WS(rs, 19)];
			      T1h = ci[WS(rs, 19)];
			      T1e = W[36];
			      T1g = W[37];
			      T1i = FMA(T1e, T1f, T1g * T1h);
			      T30 = FNMS(T1g, T1f, T1e * T1h);
			 }
			 {
			      E T14, T16, T13, T15;
			      T14 = cr[WS(rs, 24)];
			      T16 = ci[WS(rs, 24)];
			      T13 = W[46];
			      T15 = W[47];
			      T17 = FMA(T13, T14, T15 * T16);
			      T2X = FNMS(T15, T14, T13 * T16);
			 }
			 {
			      E T1a, T1c, T19, T1b;
			      T1a = cr[WS(rs, 14)];
			      T1c = ci[WS(rs, 14)];
			      T19 = W[26];
			      T1b = W[27];
			      T1d = FMA(T19, T1a, T1b * T1c);
			      T2Z = FNMS(T1b, T1a, T19 * T1c);
			 }
			 T2T = T17 - T12;
			 T2U = T1d - T1i;
			 T3c = T2Z - T30;
			 T3b = T2W - T2X;
			 T2Y = T2W + T2X;
			 T31 = T2Z + T30;
			 T34 = T2Y + T31;
			 T18 = T12 + T17;
			 T1j = T1d + T1i;
			 T1k = T18 + T1j;
		    }
		    T1l = TX + T1k;
		    T5X = T33 + T34;
		    {
			 E T2V, T4S, T36, T4R, T32, T35;
			 T2V = FNMS(KP587785252, T2U, KP951056516 * T2T);
			 T4S = FMA(KP587785252, T2T, KP951056516 * T2U);
			 T32 = KP559016994 * (T2Y - T31);
			 T35 = FNMS(KP250000000, T34, T33);
			 T36 = T32 + T35;
			 T4R = T35 - T32;
			 T37 = T2V - T36;
			 T5v = T4S + T4R;
			 T4k = T2V + T36;
			 T4T = T4R - T4S;
		    }
		    {
			 E T3d, T4V, T3a, T4U, T38, T39;
			 T3d = FMA(KP951056516, T3b, KP587785252 * T3c);
			 T4V = FNMS(KP587785252, T3b, KP951056516 * T3c);
			 T38 = KP559016994 * (T18 - T1j);
			 T39 = FNMS(KP250000000, T1k, TX);
			 T3a = T38 + T39;
			 T4U = T39 - T38;
			 T3e = T3a - T3d;
			 T5w = T4U - T4V;
			 T4j = T3a + T3d;
			 T4W = T4U + T4V;
		    }
	       }
	       {
		    E T1r, T3r, T3h, T3i, T3A, T3z, T3m, T3p, T3s, T1C, T1N, T1O;
		    {
			 E T1o, T1q, T1n, T1p;
			 T1o = cr[WS(rs, 2)];
			 T1q = ci[WS(rs, 2)];
			 T1n = W[2];
			 T1p = W[3];
			 T1r = FMA(T1n, T1o, T1p * T1q);
			 T3r = FNMS(T1p, T1o, T1n * T1q);
		    }
		    {
			 E T1w, T3k, T1M, T3o, T1B, T3l, T1H, T3n;
			 {
			      E T1t, T1v, T1s, T1u;
			      T1t = cr[WS(rs, 7)];
			      T1v = ci[WS(rs, 7)];
			      T1s = W[12];
			      T1u = W[13];
			      T1w = FMA(T1s, T1t, T1u * T1v);
			      T3k = FNMS(T1u, T1t, T1s * T1v);
			 }
			 {
			      E T1J, T1L, T1I, T1K;
			      T1J = cr[WS(rs, 17)];
			      T1L = ci[WS(rs, 17)];
			      T1I = W[32];
			      T1K = W[33];
			      T1M = FMA(T1I, T1J, T1K * T1L);
			      T3o = FNMS(T1K, T1J, T1I * T1L);
			 }
			 {
			      E T1y, T1A, T1x, T1z;
			      T1y = cr[WS(rs, 22)];
			      T1A = ci[WS(rs, 22)];
			      T1x = W[42];
			      T1z = W[43];
			      T1B = FMA(T1x, T1y, T1z * T1A);
			      T3l = FNMS(T1z, T1y, T1x * T1A);
			 }
			 {
			      E T1E, T1G, T1D, T1F;
			      T1E = cr[WS(rs, 12)];
			      T1G = ci[WS(rs, 12)];
			      T1D = W[22];
			      T1F = W[23];
			      T1H = FMA(T1D, T1E, T1F * T1G);
			      T3n = FNMS(T1F, T1E, T1D * T1G);
			 }
			 T3h = T1w - T1B;
			 T3i = T1H - T1M;
			 T3A = T3n - T3o;
			 T3z = T3k - T3l;
			 T3m = T3k + T3l;
			 T3p = T3n + T3o;
			 T3s = T3m + T3p;
			 T1C = T1w + T1B;
			 T1N = T1H + T1M;
			 T1O = T1C + T1N;
		    }
		    T1P = T1r + T1O;
		    T5Z = T3r + T3s;
		    {
			 E T3j, T53, T3u, T52, T3q, T3t;
			 T3j = FMA(KP951056516, T3h, KP587785252 * T3i);
			 T53 = FNMS(KP587785252, T3h, KP951056516 * T3i);
			 T3q = KP559016994 * (T3m - T3p);
			 T3t = FNMS(KP250000000, T3s, T3r);
			 T3u = T3q + T3t;
			 T52 = T3t - T3q;
			 T3v = T3j + T3u;
			 T5A = T53 + T52;
			 T4o = T3u - T3j;
			 T54 = T52 - T53;
		    }
		    {
			 E T3B, T50, T3y, T4Z, T3w, T3x;
			 T3B = FMA(KP951056516, T3z, KP587785252 * T3A);
			 T50 = FNMS(KP587785252, T3z, KP951056516 * T3A);
			 T3w = KP559016994 * (T1C - T1N);
			 T3x = FNMS(KP250000000, T1O, T1r);
			 T3y = T3w + T3x;
			 T4Z = T3x - T3w;
			 T3C = T3y - T3B;
			 T5z = T4Z - T50;
			 T4n = T3y + T3B;
			 T51 = T4Z + T50;
		    }
	       }
	       {
		    E T62, T64, Tp, T2k, T5T, T5U, T63, T5V;
		    {
			 E T5Y, T61, T1m, T2j;
			 T5Y = T5W - T5X;
			 T61 = T5Z - T60;
			 T62 = FMA(KP951056516, T5Y, KP587785252 * T61);
			 T64 = FNMS(KP587785252, T5Y, KP951056516 * T61);
			 Tp = T1 + To;
			 T1m = TS + T1l;
			 T2j = T1P + T2i;
			 T2k = T1m + T2j;
			 T5T = KP559016994 * (T1m - T2j);
			 T5U = FNMS(KP250000000, T2k, Tp);
		    }
		    cr[0] = Tp + T2k;
		    T63 = T5U - T5T;
		    cr[WS(rs, 10)] = T63 - T64;
		    ci[WS(rs, 9)] = T63 + T64;
		    T5V = T5T + T5U;
		    ci[WS(rs, 4)] = T5V - T62;
		    cr[WS(rs, 5)] = T5V + T62;
	       }
	       {
		    E T2v, T4f, T6I, T6U, T42, T6Z, T43, T6Y, T4A, T6N, T4D, T6L, T4u, T6E, T4v;
		    E T6D, T48, T6V, T4b, T6T, T2n, T6G;
		    T2n = T2l + T2m;
		    T2v = T2n - T2u;
		    T4f = T2n + T2u;
		    T6G = T6g + T6f;
		    T6I = T6G - T6H;
		    T6U = T6H + T6G;
		    {
			 E T2S, T3f, T3g, T3D, T40, T41;
			 T2S = FMA(KP535826794, T2G, KP844327925 * T2R);
			 T3f = FNMS(KP637423989, T3e, KP770513242 * T37);
			 T3g = T2S + T3f;
			 T3D = FNMS(KP425779291, T3C, KP904827052 * T3v);
			 T40 = FNMS(KP992114701, T3Z, KP125333233 * T3S);
			 T41 = T3D + T40;
			 T42 = T3g + T41;
			 T6Z = T3D - T40;
			 T43 = KP559016994 * (T3g - T41);
			 T6Y = T3f - T2S;
		    }
		    {
			 E T4y, T4z, T6J, T4B, T4C, T6K;
			 T4y = FNMS(KP248689887, T4g, KP968583161 * T4h);
			 T4z = FNMS(KP844327925, T4j, KP535826794 * T4k);
			 T6J = T4y + T4z;
			 T4B = FNMS(KP481753674, T4n, KP876306680 * T4o);
			 T4C = FNMS(KP684547105, T4q, KP728968627 * T4r);
			 T6K = T4B + T4C;
			 T4A = T4y - T4z;
			 T6N = KP559016994 * (T6J - T6K);
			 T4D = T4B - T4C;
			 T6L = T6J + T6K;
		    }
		    {
			 E T4i, T4l, T4m, T4p, T4s, T4t;
			 T4i = FMA(KP968583161, T4g, KP248689887 * T4h);
			 T4l = FMA(KP535826794, T4j, KP844327925 * T4k);
			 T4m = T4i + T4l;
			 T4p = FMA(KP876306680, T4n, KP481753674 * T4o);
			 T4s = FMA(KP728968627, T4q, KP684547105 * T4r);
			 T4t = T4p + T4s;
			 T4u = T4m + T4t;
			 T6E = T4p - T4s;
			 T4v = KP559016994 * (T4m - T4t);
			 T6D = T4l - T4i;
		    }
		    {
			 E T46, T47, T6R, T49, T4a, T6S;
			 T46 = FNMS(KP844327925, T2G, KP535826794 * T2R);
			 T47 = FMA(KP770513242, T3e, KP637423989 * T37);
			 T6R = T46 + T47;
			 T49 = FMA(KP125333233, T3Z, KP992114701 * T3S);
			 T4a = FMA(KP904827052, T3C, KP425779291 * T3v);
			 T6S = T4a + T49;
			 T48 = T46 - T47;
			 T6V = T6R - T6S;
			 T4b = T49 - T4a;
			 T6T = KP559016994 * (T6R + T6S);
		    }
		    cr[WS(rs, 4)] = T2v + T42;
		    ci[WS(rs, 23)] = T6L + T6I;
		    ci[WS(rs, 20)] = T6V + T6U;
		    cr[WS(rs, 1)] = T4f + T4u;
		    {
			 E T4c, T4e, T45, T4d, T44;
			 T4c = FMA(KP951056516, T48, KP587785252 * T4b);
			 T4e = FNMS(KP587785252, T48, KP951056516 * T4b);
			 T44 = FNMS(KP250000000, T42, T2v);
			 T45 = T43 + T44;
			 T4d = T44 - T43;
			 ci[0] = T45 - T4c;
			 ci[WS(rs, 5)] = T4d + T4e;
			 cr[WS(rs, 9)] = T45 + T4c;
			 ci[WS(rs, 10)] = T4d - T4e;
		    }
		    {
			 E T6F, T6P, T6O, T6Q, T6M;
			 T6F = FMA(KP587785252, T6D, KP951056516 * T6E);
			 T6P = FNMS(KP587785252, T6E, KP951056516 * T6D);
			 T6M = FNMS(KP250000000, T6L, T6I);
			 T6O = T6M - T6N;
			 T6Q = T6N + T6M;
			 cr[WS(rs, 16)] = T6F - T6O;
			 ci[WS(rs, 18)] = T6P + T6Q;
			 ci[WS(rs, 13)] = T6F + T6O;
			 cr[WS(rs, 21)] = T6P - T6Q;
		    }
		    {
			 E T70, T71, T6X, T72, T6W;
			 T70 = FMA(KP587785252, T6Y, KP951056516 * T6Z);
			 T71 = FNMS(KP587785252, T6Z, KP951056516 * T6Y);
			 T6W = FNMS(KP250000000, T6V, T6U);
			 T6X = T6T - T6W;
			 T72 = T6T + T6W;
			 cr[WS(rs, 14)] = T6X - T70;
			 ci[WS(rs, 15)] = T71 + T72;
			 cr[WS(rs, 19)] = T70 + T6X;
			 cr[WS(rs, 24)] = T71 - T72;
		    }
		    {
			 E T4E, T4G, T4x, T4F, T4w;
			 T4E = FMA(KP951056516, T4A, KP587785252 * T4D);
			 T4G = FNMS(KP587785252, T4A, KP951056516 * T4D);
			 T4w = FNMS(KP250000000, T4u, T4f);
			 T4x = T4v + T4w;
			 T4F = T4w - T4v;
			 ci[WS(rs, 3)] = T4x - T4E;
			 ci[WS(rs, 8)] = T4F + T4G;
			 cr[WS(rs, 6)] = T4x + T4E;
			 cr[WS(rs, 11)] = T4F - T4G;
		    }
	       }
	       {
		    E T75, T7d, T76, T79, T7a, T7b, T7e, T7c;
		    {
			 E T73, T74, T77, T78;
			 T73 = T1l - TS;
			 T74 = T1P - T2i;
			 T75 = FMA(KP587785252, T73, KP951056516 * T74);
			 T7d = FNMS(KP587785252, T74, KP951056516 * T73);
			 T76 = T6e + T6b;
			 T77 = T5W + T5X;
			 T78 = T5Z + T60;
			 T79 = T77 + T78;
			 T7a = FNMS(KP250000000, T79, T76);
			 T7b = KP559016994 * (T77 - T78);
		    }
		    ci[WS(rs, 24)] = T79 + T76;
		    T7e = T7b + T7a;
		    cr[WS(rs, 20)] = T7d - T7e;
		    ci[WS(rs, 19)] = T7d + T7e;
		    T7c = T7a - T7b;
		    cr[WS(rs, 15)] = T75 - T7c;
		    ci[WS(rs, 14)] = T75 + T7c;
	       }
	       {
		    E T4J, T5r, T6i, T6u, T5e, T6z, T5f, T6y, T5M, T6n, T5P, T6l, T5G, T66, T5H;
		    E T65, T5k, T6v, T5n, T6t, T4H, T6h;
		    T4H = T2m - T2l;
		    T4J = T4H + T4I;
		    T5r = T4H - T4I;
		    T6h = T6f - T6g;
		    T6i = T6a + T6h;
		    T6u = T6h - T6a;
		    {
			 E T4Q, T4X, T4Y, T55, T5c, T5d;
			 T4Q = FMA(KP728968627, T4M, KP684547105 * T4P);
			 T4X = FNMS(KP992114701, T4W, KP125333233 * T4T);
			 T4Y = T4Q + T4X;
			 T55 = FMA(KP062790519, T51, KP998026728 * T54);
			 T5c = FNMS(KP637423989, T5b, KP770513242 * T58);
			 T5d = T55 + T5c;
			 T5e = T4Y + T5d;
			 T6z = T55 - T5c;
			 T5f = KP559016994 * (T4Y - T5d);
			 T6y = T4X - T4Q;
		    }
		    {
			 E T5K, T5L, T6j, T5N, T5O, T6k;
			 T5K = FNMS(KP481753674, T5s, KP876306680 * T5t);
			 T5L = FMA(KP904827052, T5w, KP425779291 * T5v);
			 T6j = T5K - T5L;
			 T5N = FNMS(KP844327925, T5z, KP535826794 * T5A);
			 T5O = FNMS(KP998026728, T5C, KP062790519 * T5D);
			 T6k = T5N + T5O;
			 T5M = T5K + T5L;
			 T6n = KP559016994 * (T6j - T6k);
			 T5P = T5N - T5O;
			 T6l = T6j + T6k;
		    }
		    {
			 E T5u, T5x, T5y, T5B, T5E, T5F;
			 T5u = FMA(KP876306680, T5s, KP481753674 * T5t);
			 T5x = FNMS(KP425779291, T5w, KP904827052 * T5v);
			 T5y = T5u + T5x;
			 T5B = FMA(KP535826794, T5z, KP844327925 * T5A);
			 T5E = FMA(KP062790519, T5C, KP998026728 * T5D);
			 T5F = T5B + T5E;
			 T5G = T5y + T5F;
			 T66 = T5B - T5E;
			 T5H = KP559016994 * (T5y - T5F);
			 T65 = T5x - T5u;
		    }
		    {
			 E T5i, T5j, T6r, T5l, T5m, T6s;
			 T5i = FNMS(KP684547105, T4M, KP728968627 * T4P);
			 T5j = FMA(KP125333233, T4W, KP992114701 * T4T);
			 T6r = T5i - T5j;
			 T5l = FNMS(KP998026728, T51, KP062790519 * T54);
			 T5m = FMA(KP770513242, T5b, KP637423989 * T58);
			 T6s = T5l - T5m;
			 T5k = T5i + T5j;
			 T6v = T6r + T6s;
			 T5n = T5l + T5m;
			 T6t = KP559016994 * (T6r - T6s);
		    }
		    cr[WS(rs, 3)] = T4J + T5e;
		    ci[WS(rs, 22)] = T6l + T6i;
		    ci[WS(rs, 21)] = T6v + T6u;
		    cr[WS(rs, 2)] = T5r + T5G;
		    {
			 E T67, T6p, T6o, T6q, T6m;
			 T67 = FMA(KP587785252, T65, KP951056516 * T66);
			 T6p = FNMS(KP587785252, T66, KP951056516 * T65);
			 T6m = FNMS(KP250000000, T6l, T6i);
			 T6o = T6m - T6n;
			 T6q = T6n + T6m;
			 cr[WS(rs, 17)] = T67 - T6o;
			 ci[WS(rs, 17)] = T6p + T6q;
			 ci[WS(rs, 12)] = T67 + T6o;
			 cr[WS(rs, 22)] = T6p - T6q;
		    }
		    {
			 E T5Q, T5S, T5J, T5R, T5I;
			 T5Q = FMA(KP951056516, T5M, KP587785252 * T5P);
			 T5S = FNMS(KP587785252, T5M, KP951056516 * T5P);
			 T5I = FNMS(KP250000000, T5G, T5r);
			 T5J = T5H + T5I;
			 T5R = T5I - T5H;
			 ci[WS(rs, 2)] = T5J - T5Q;
			 ci[WS(rs, 7)] = T5R + T5S;
			 cr[WS(rs, 7)] = T5J + T5Q;
			 cr[WS(rs, 12)] = T5R - T5S;
		    }
		    {
			 E T5o, T5q, T5h, T5p, T5g;
			 T5o = FMA(KP951056516, T5k, KP587785252 * T5n);
			 T5q = FNMS(KP587785252, T5k, KP951056516 * T5n);
			 T5g = FNMS(KP250000000, T5e, T4J);
			 T5h = T5f + T5g;
			 T5p = T5g - T5f;
			 ci[WS(rs, 1)] = T5h - T5o;
			 ci[WS(rs, 6)] = T5p + T5q;
			 cr[WS(rs, 8)] = T5h + T5o;
			 ci[WS(rs, 11)] = T5p - T5q;
		    }
		    {
			 E T6A, T6B, T6x, T6C, T6w;
			 T6A = FMA(KP587785252, T6y, KP951056516 * T6z);
			 T6B = FNMS(KP587785252, T6z, KP951056516 * T6y);
			 T6w = FNMS(KP250000000, T6v, T6u);
			 T6x = T6t - T6w;
			 T6C = T6t + T6w;
			 cr[WS(rs, 13)] = T6x - T6A;
			 ci[WS(rs, 16)] = T6B + T6C;
			 cr[WS(rs, 18)] = T6A + T6x;
			 cr[WS(rs, 23)] = T6B - T6C;
		    }
	       }
	  }
     }
}
static void hc2cfdft_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
{
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
     {
	  INT m;
	  for (m = mb, W = W + ((mb - 1) * 14); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 14, MAKE_VOLATILE_STRIDE(32, rs)) {
	       E T1A, T1w, T1z, T1x, T1H, T1v, T1L, T1F;
	       {
		    E Ty, T14, TO, T1o, Tv, TG, T16, T1m, Ta, T19, T1h, TV, T10, TX, TZ;
		    E Tk, T1i, TY, T1b, TF, TB, T1l;
		    {
			 E TH, TN, TK, TM;
			 {
			      E Tw, Tx, TI, TJ;
			      Tw = Ip[0];
			      Tx = Im[0];
			      TI = Rm[0];
			      TJ = Rp[0];
			      TH = W[0];
			      Ty = Tw - Tx;
			      TN = Tw + Tx;
			      T14 = TJ + TI;
			      TK = TI - TJ;
			      TM = W[1];
			 }
			 {
			      E Ts, Tp, Tt, Tm, Tr;
			      {
				   E Tn, To, TL, T1n;
				   Tn = Ip[WS(rs, 2)];
				   To = Im[WS(rs, 2)];
				   TL = TH * TK;
				   T1n = TM * TK;
				   Ts = Rp[WS(rs, 2)];
				   TF = Tn + To;
				   Tp = Tn - To;
				   TO = FNMS(TM, TN, TL);
				   T1o = FMA(TH, TN, T1n);
				   Tt = Rm[WS(rs, 2)];
			      }
			      Tm = W[6];
			      Tr = W[7];
			      {
				   E TE, TD, T15, TC, Tu, Tq;
				   TB = W[8];
				   TC = Tt - Ts;
				   Tu = Ts + Tt;
				   Tq = Tm * Tp;
				   TE = W[9];
				   TD = TB * TC;
				   T15 = Tm * Tu;
				   Tv = FNMS(Tr, Tu, Tq);
				   T1l = TE * TC;
				   TG = FNMS(TE, TF, TD);
				   T16 = FMA(Tr, Tp, T15);
			      }
			 }
		    }
		    {
			 E TU, TR, TT, T1g, TS;
			 {
			      E T2, T3, T7, T8;
			      T2 = Ip[WS(rs, 1)];
			      T1m = FMA(TB, TF, T1l);
			      T3 = Im[WS(rs, 1)];
			      T7 = Rp[WS(rs, 1)];
			      T8 = Rm[WS(rs, 1)];
			      {
				   E T1, T4, T9, T6, T5, TQ, T18;
				   T1 = W[2];
				   TU = T2 + T3;
				   T4 = T2 - T3;
				   TR = T7 - T8;
				   T9 = T7 + T8;
				   T6 = W[3];
				   T5 = T1 * T4;
				   TQ = W[4];
				   T18 = T1 * T9;
				   TT = W[5];
				   Ta = FNMS(T6, T9, T5);
				   T1g = TQ * TU;
				   TS = TQ * TR;
				   T19 = FMA(T6, T4, T18);
			      }
			 }
			 {
			      E Tc, Td, Th, Ti;
			      Tc = Ip[WS(rs, 3)];
			      T1h = FNMS(TT, TR, T1g);
			      TV = FMA(TT, TU, TS);
			      Td = Im[WS(rs, 3)];
			      Th = Rp[WS(rs, 3)];
			      Ti = Rm[WS(rs, 3)];
			      {
				   E Tb, Te, Tj, Tg, Tf, TW, T1a;
				   Tb = W[10];
				   T10 = Tc + Td;
				   Te = Tc - Td;
				   TX = Th - Ti;
				   Tj = Th + Ti;
				   Tg = W[11];
				   Tf = Tb * Te;
				   TW = W[12];
				   T1a = Tb * Tj;
				   TZ = W[13];
				   Tk = FNMS(Tg, Tj, Tf);
				   T1i = TW * T10;
				   TY = TW * TX;
				   T1b = FMA(Tg, Te, T1a);
			      }
			 }
		    }
		    {
			 E T1E, T1t, TA, T1s, T1D, T1u, T1e, T13, T1r, T1d;
			 {
			      E TP, T1f, T1q, T12, T17, T1c;
			      {
				   E Tl, T11, Tz, T1p, T1k, T1j;
				   T1E = Ta - Tk;
				   Tl = Ta + Tk;
				   T1j = FNMS(TZ, TX, T1i);
				   T11 = FMA(TZ, T10, TY);
				   Tz = Tv + Ty;
				   T1t = Ty - Tv;
				   T1A = T1o - T1m;
				   T1p = T1m + T1o;
				   T1k = T1h + T1j;
				   T1w = T1j - T1h;
				   T1z = TO - TG;
				   TP = TG + TO;
				   T1f = Tz - Tl;
				   TA = Tl + Tz;
				   T1s = T1k + T1p;
				   T1q = T1k - T1p;
				   T12 = TV + T11;
				   T1x = TV - T11;
				   T1D = T14 - T16;
				   T17 = T14 + T16;
				   T1c = T19 + T1b;
				   T1u = T19 - T1b;
			      }
			      Im[WS(rs, 1)] = KP500000000 * (T1q - T1f);
			      T1e = T12 + TP;
			      T13 = TP - T12;
			      T1r = T17 + T1c;
			      T1d = T17 - T1c;
			      Ip[WS(rs, 2)] = KP500000000 * (T1f + T1q);
			 }
			 Im[WS(rs, 3)] = KP500000000 * (T13 - TA);
			 Ip[0] = KP500000000 * (TA + T13);
			 Rm[WS(rs, 3)] = KP500000000 * (T1r - T1s);
			 Rp[0] = KP500000000 * (T1r + T1s);
			 Rp[WS(rs, 2)] = KP500000000 * (T1d + T1e);
			 Rm[WS(rs, 1)] = KP500000000 * (T1d - T1e);
			 T1H = T1u + T1t;
			 T1v = T1t - T1u;
			 T1L = T1D + T1E;
			 T1F = T1D - T1E;
		    }
	       }
	       {
		    E T1y, T1I, T1B, T1J;
		    T1y = T1w + T1x;
		    T1I = T1w - T1x;
		    T1B = T1z - T1A;
		    T1J = T1z + T1A;
		    {
			 E T1M, T1K, T1C, T1G;
			 T1M = T1I + T1J;
			 T1K = T1I - T1J;
			 T1C = T1y + T1B;
			 T1G = T1B - T1y;
			 Im[0] = -(KP500000000 * (FNMS(KP707106781, T1K, T1H)));
			 Ip[WS(rs, 3)] = KP500000000 * (FMA(KP707106781, T1K, T1H));
			 Rp[WS(rs, 1)] = KP500000000 * (FMA(KP707106781, T1M, T1L));
			 Rm[WS(rs, 2)] = KP500000000 * (FNMS(KP707106781, T1M, T1L));
			 Rp[WS(rs, 3)] = KP500000000 * (FMA(KP707106781, T1G, T1F));
			 Rm[0] = KP500000000 * (FNMS(KP707106781, T1G, T1F));
			 Im[WS(rs, 2)] = -(KP500000000 * (FNMS(KP707106781, T1C, T1v)));
			 Ip[WS(rs, 1)] = KP500000000 * (FMA(KP707106781, T1C, T1v));
		    }
	       }
	  }
     }
}
Ejemplo n.º 28
0
static void t1_8(float *ri, float *ii, const float *W, stride rs, INT mb, INT me, INT ms)
{
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
     INT m;
     for (m = mb, W = W + (mb * 14); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 14, MAKE_VOLATILE_STRIDE(rs)) {
	  E T7, T1e, TH, T19, TF, T13, TR, TU, Ti, T1f, TK, T16, Tu, T12, TM;
	  E TP;
	  {
	       E T1, T18, T6, T17;
	       T1 = ri[0];
	       T18 = ii[0];
	       {
		    E T3, T5, T2, T4;
		    T3 = ri[WS(rs, 4)];
		    T5 = ii[WS(rs, 4)];
		    T2 = W[6];
		    T4 = W[7];
		    T6 = FMA(T2, T3, T4 * T5);
		    T17 = FNMS(T4, T3, T2 * T5);
	       }
	       T7 = T1 + T6;
	       T1e = T18 - T17;
	       TH = T1 - T6;
	       T19 = T17 + T18;
	  }
	  {
	       E Tz, TS, TE, TT;
	       {
		    E Tw, Ty, Tv, Tx;
		    Tw = ri[WS(rs, 7)];
		    Ty = ii[WS(rs, 7)];
		    Tv = W[12];
		    Tx = W[13];
		    Tz = FMA(Tv, Tw, Tx * Ty);
		    TS = FNMS(Tx, Tw, Tv * Ty);
	       }
	       {
		    E TB, TD, TA, TC;
		    TB = ri[WS(rs, 3)];
		    TD = ii[WS(rs, 3)];
		    TA = W[4];
		    TC = W[5];
		    TE = FMA(TA, TB, TC * TD);
		    TT = FNMS(TC, TB, TA * TD);
	       }
	       TF = Tz + TE;
	       T13 = TS + TT;
	       TR = Tz - TE;
	       TU = TS - TT;
	  }
	  {
	       E Tc, TI, Th, TJ;
	       {
		    E T9, Tb, T8, Ta;
		    T9 = ri[WS(rs, 2)];
		    Tb = ii[WS(rs, 2)];
		    T8 = W[2];
		    Ta = W[3];
		    Tc = FMA(T8, T9, Ta * Tb);
		    TI = FNMS(Ta, T9, T8 * Tb);
	       }
	       {
		    E Te, Tg, Td, Tf;
		    Te = ri[WS(rs, 6)];
		    Tg = ii[WS(rs, 6)];
		    Td = W[10];
		    Tf = W[11];
		    Th = FMA(Td, Te, Tf * Tg);
		    TJ = FNMS(Tf, Te, Td * Tg);
	       }
	       Ti = Tc + Th;
	       T1f = Tc - Th;
	       TK = TI - TJ;
	       T16 = TI + TJ;
	  }
	  {
	       E To, TN, Tt, TO;
	       {
		    E Tl, Tn, Tk, Tm;
		    Tl = ri[WS(rs, 1)];
		    Tn = ii[WS(rs, 1)];
		    Tk = W[0];
		    Tm = W[1];
		    To = FMA(Tk, Tl, Tm * Tn);
		    TN = FNMS(Tm, Tl, Tk * Tn);
	       }
	       {
		    E Tq, Ts, Tp, Tr;
		    Tq = ri[WS(rs, 5)];
		    Ts = ii[WS(rs, 5)];
		    Tp = W[8];
		    Tr = W[9];
		    Tt = FMA(Tp, Tq, Tr * Ts);
		    TO = FNMS(Tr, Tq, Tp * Ts);
	       }
	       Tu = To + Tt;
	       T12 = TN + TO;
	       TM = To - Tt;
	       TP = TN - TO;
	  }
	  {
	       E Tj, TG, T1b, T1c;
	       Tj = T7 + Ti;
	       TG = Tu + TF;
	       ri[WS(rs, 4)] = Tj - TG;
	       ri[0] = Tj + TG;
	       {
		    E T15, T1a, T11, T14;
		    T15 = T12 + T13;
		    T1a = T16 + T19;
		    ii[0] = T15 + T1a;
		    ii[WS(rs, 4)] = T1a - T15;
		    T11 = T7 - Ti;
		    T14 = T12 - T13;
		    ri[WS(rs, 6)] = T11 - T14;
		    ri[WS(rs, 2)] = T11 + T14;
	       }
	       T1b = TF - Tu;
	       T1c = T19 - T16;
	       ii[WS(rs, 2)] = T1b + T1c;
	       ii[WS(rs, 6)] = T1c - T1b;
	       {
		    E TX, T1g, T10, T1d, TY, TZ;
		    TX = TH - TK;
		    T1g = T1e - T1f;
		    TY = TP - TM;
		    TZ = TR + TU;
		    T10 = KP707106781 * (TY - TZ);
		    T1d = KP707106781 * (TY + TZ);
		    ri[WS(rs, 7)] = TX - T10;
		    ii[WS(rs, 5)] = T1g - T1d;
		    ri[WS(rs, 3)] = TX + T10;
		    ii[WS(rs, 1)] = T1d + T1g;
	       }
	       {
		    E TL, T1i, TW, T1h, TQ, TV;
		    TL = TH + TK;
		    T1i = T1f + T1e;
		    TQ = TM + TP;
		    TV = TR - TU;
		    TW = KP707106781 * (TQ + TV);
		    T1h = KP707106781 * (TV - TQ);
		    ri[WS(rs, 5)] = TL - TW;
		    ii[WS(rs, 7)] = T1i - T1h;
		    ri[WS(rs, 1)] = TL + TW;
		    ii[WS(rs, 3)] = T1h + T1i;
	       }
	  }
     }
}
Ejemplo n.º 29
0
static void hb_5(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms)
{
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
     INT m;
     for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(rs)) {
	  E T1, Tj, TG, Ts, T8, Ti, T9, Tn, TD, Tu, Tg, Tt;
	  {
	       E T4, Tq, T7, Tr;
	       T1 = cr[0];
	       {
		    E T2, T3, T5, T6;
		    T2 = cr[WS(rs, 1)];
		    T3 = ci[0];
		    T4 = T2 + T3;
		    Tq = T2 - T3;
		    T5 = cr[WS(rs, 2)];
		    T6 = ci[WS(rs, 1)];
		    T7 = T5 + T6;
		    Tr = T5 - T6;
	       }
	       Tj = KP559016994 * (T4 - T7);
	       TG = FMA(KP951056516, Tq, KP587785252 * Tr);
	       Ts = FNMS(KP951056516, Tr, KP587785252 * Tq);
	       T8 = T4 + T7;
	       Ti = FNMS(KP250000000, T8, T1);
	  }
	  {
	       E Tc, Tl, Tf, Tm;
	       T9 = ci[WS(rs, 4)];
	       {
		    E Ta, Tb, Td, Te;
		    Ta = ci[WS(rs, 3)];
		    Tb = cr[WS(rs, 4)];
		    Tc = Ta - Tb;
		    Tl = Ta + Tb;
		    Td = ci[WS(rs, 2)];
		    Te = cr[WS(rs, 3)];
		    Tf = Td - Te;
		    Tm = Td + Te;
	       }
	       Tn = FNMS(KP951056516, Tm, KP587785252 * Tl);
	       TD = FMA(KP951056516, Tl, KP587785252 * Tm);
	       Tu = KP559016994 * (Tc - Tf);
	       Tg = Tc + Tf;
	       Tt = FNMS(KP250000000, Tg, T9);
	  }
	  cr[0] = T1 + T8;
	  ci[0] = T9 + Tg;
	  {
	       E To, Ty, Tw, TA, Tk, Tv;
	       Tk = Ti - Tj;
	       To = Tk - Tn;
	       Ty = Tk + Tn;
	       Tv = Tt - Tu;
	       Tw = Ts + Tv;
	       TA = Tv - Ts;
	       {
		    E Th, Tp, Tx, Tz;
		    Th = W[2];
		    Tp = W[3];
		    cr[WS(rs, 2)] = FNMS(Tp, Tw, Th * To);
		    ci[WS(rs, 2)] = FMA(Th, Tw, Tp * To);
		    Tx = W[4];
		    Tz = W[5];
		    cr[WS(rs, 3)] = FNMS(Tz, TA, Tx * Ty);
		    ci[WS(rs, 3)] = FMA(Tx, TA, Tz * Ty);
	       }
	  }
	  {
	       E TE, TK, TI, TM, TC, TH;
	       TC = Tj + Ti;
	       TE = TC - TD;
	       TK = TC + TD;
	       TH = Tu + Tt;
	       TI = TG + TH;
	       TM = TH - TG;
	       {
		    E TB, TF, TJ, TL;
		    TB = W[0];
		    TF = W[1];
		    cr[WS(rs, 1)] = FNMS(TF, TI, TB * TE);
		    ci[WS(rs, 1)] = FMA(TB, TI, TF * TE);
		    TJ = W[6];
		    TL = W[7];
		    cr[WS(rs, 4)] = FNMS(TL, TM, TJ * TK);
		    ci[WS(rs, 4)] = FMA(TJ, TM, TL * TK);
	       }
	  }
     }
}
Ejemplo n.º 30
0
static void hc2cb_16(float *Rp, float *Ip, float *Rm, float *Im, const float *W, stride rs, INT mb, INT me, INT ms)
{
     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
     INT m;
     for (m = mb, W = W + ((mb - 1) * 30); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 30, MAKE_VOLATILE_STRIDE(rs)) {
	  E T7, T2K, T2W, Tw, T17, T1S, T2k, T1w, Te, TD, T1x, T10, T2n, T2L, T1Z;
	  E T2X, Tm, T1z, TN, T19, T2e, T2p, T2P, T2Z, Tt, T1A, TW, T1a, T27, T2q;
	  E T2S, T30;
	  {
	       E T3, T1Q, T13, T2j, T6, T2i, T16, T1R;
	       {
		    E T1, T2, T11, T12;
		    T1 = Rp[0];
		    T2 = Rm[WS(rs, 7)];
		    T3 = T1 + T2;
		    T1Q = T1 - T2;
		    T11 = Ip[0];
		    T12 = Im[WS(rs, 7)];
		    T13 = T11 - T12;
		    T2j = T11 + T12;
	       }
	       {
		    E T4, T5, T14, T15;
		    T4 = Rp[WS(rs, 4)];
		    T5 = Rm[WS(rs, 3)];
		    T6 = T4 + T5;
		    T2i = T4 - T5;
		    T14 = Ip[WS(rs, 4)];
		    T15 = Im[WS(rs, 3)];
		    T16 = T14 - T15;
		    T1R = T14 + T15;
	       }
	       T7 = T3 + T6;
	       T2K = T1Q + T1R;
	       T2W = T2j - T2i;
	       Tw = T3 - T6;
	       T17 = T13 - T16;
	       T1S = T1Q - T1R;
	       T2k = T2i + T2j;
	       T1w = T13 + T16;
	  }
	  {
	       E Ta, T1T, TC, T1U, Td, T1W, Tz, T1X;
	       {
		    E T8, T9, TA, TB;
		    T8 = Rp[WS(rs, 2)];
		    T9 = Rm[WS(rs, 5)];
		    Ta = T8 + T9;
		    T1T = T8 - T9;
		    TA = Ip[WS(rs, 2)];
		    TB = Im[WS(rs, 5)];
		    TC = TA - TB;
		    T1U = TA + TB;
	       }
	       {
		    E Tb, Tc, Tx, Ty;
		    Tb = Rm[WS(rs, 1)];
		    Tc = Rp[WS(rs, 6)];
		    Td = Tb + Tc;
		    T1W = Tb - Tc;
		    Tx = Ip[WS(rs, 6)];
		    Ty = Im[WS(rs, 1)];
		    Tz = Tx - Ty;
		    T1X = Tx + Ty;
	       }
	       Te = Ta + Td;
	       TD = Tz - TC;
	       T1x = TC + Tz;
	       T10 = Ta - Td;
	       {
		    E T2l, T2m, T1V, T1Y;
		    T2l = T1T + T1U;
		    T2m = T1W + T1X;
		    T2n = KP707106781 * (T2l - T2m);
		    T2L = KP707106781 * (T2l + T2m);
		    T1V = T1T - T1U;
		    T1Y = T1W - T1X;
		    T1Z = KP707106781 * (T1V + T1Y);
		    T2X = KP707106781 * (T1V - T1Y);
	       }
	  }
	  {
	       E Ti, T2b, TI, T29, Tl, T28, TL, T2c, TF, TM;
	       {
		    E Tg, Th, TG, TH;
		    Tg = Rp[WS(rs, 1)];
		    Th = Rm[WS(rs, 6)];
		    Ti = Tg + Th;
		    T2b = Tg - Th;
		    TG = Ip[WS(rs, 1)];
		    TH = Im[WS(rs, 6)];
		    TI = TG - TH;
		    T29 = TG + TH;
	       }
	       {
		    E Tj, Tk, TJ, TK;
		    Tj = Rp[WS(rs, 5)];
		    Tk = Rm[WS(rs, 2)];
		    Tl = Tj + Tk;
		    T28 = Tj - Tk;
		    TJ = Ip[WS(rs, 5)];
		    TK = Im[WS(rs, 2)];
		    TL = TJ - TK;
		    T2c = TJ + TK;
	       }
	       Tm = Ti + Tl;
	       T1z = TI + TL;
	       TF = Ti - Tl;
	       TM = TI - TL;
	       TN = TF - TM;
	       T19 = TF + TM;
	       {
		    E T2a, T2d, T2N, T2O;
		    T2a = T28 + T29;
		    T2d = T2b - T2c;
		    T2e = FMA(KP923879532, T2a, KP382683432 * T2d);
		    T2p = FNMS(KP382683432, T2a, KP923879532 * T2d);
		    T2N = T2b + T2c;
		    T2O = T29 - T28;
		    T2P = FNMS(KP923879532, T2O, KP382683432 * T2N);
		    T2Z = FMA(KP382683432, T2O, KP923879532 * T2N);
	       }
	  }
	  {
	       E Tp, T24, TR, T22, Ts, T21, TU, T25, TO, TV;
	       {
		    E Tn, To, TP, TQ;
		    Tn = Rm[0];
		    To = Rp[WS(rs, 7)];
		    Tp = Tn + To;
		    T24 = Tn - To;
		    TP = Ip[WS(rs, 7)];
		    TQ = Im[0];
		    TR = TP - TQ;
		    T22 = TP + TQ;
	       }
	       {
		    E Tq, Tr, TS, TT;
		    Tq = Rp[WS(rs, 3)];
		    Tr = Rm[WS(rs, 4)];
		    Ts = Tq + Tr;
		    T21 = Tq - Tr;
		    TS = Ip[WS(rs, 3)];
		    TT = Im[WS(rs, 4)];
		    TU = TS - TT;
		    T25 = TS + TT;
	       }
	       Tt = Tp + Ts;
	       T1A = TR + TU;
	       TO = Tp - Ts;
	       TV = TR - TU;
	       TW = TO + TV;
	       T1a = TV - TO;
	       {
		    E T23, T26, T2Q, T2R;
		    T23 = T21 - T22;
		    T26 = T24 - T25;
		    T27 = FNMS(KP382683432, T26, KP923879532 * T23);
		    T2q = FMA(KP382683432, T23, KP923879532 * T26);
		    T2Q = T24 + T25;
		    T2R = T21 + T22;
		    T2S = FNMS(KP923879532, T2R, KP382683432 * T2Q);
		    T30 = FMA(KP382683432, T2R, KP923879532 * T2Q);
	       }
	  }
	  {
	       E Tf, Tu, T1u, T1y, T1B, T1C, T1t, T1v;
	       Tf = T7 + Te;
	       Tu = Tm + Tt;
	       T1u = Tf - Tu;
	       T1y = T1w + T1x;
	       T1B = T1z + T1A;
	       T1C = T1y - T1B;
	       Rp[0] = Tf + Tu;
	       Rm[0] = T1y + T1B;
	       T1t = W[14];
	       T1v = W[15];
	       Rp[WS(rs, 4)] = FNMS(T1v, T1C, T1t * T1u);
	       Rm[WS(rs, 4)] = FMA(T1v, T1u, T1t * T1C);
	  }
	  {
	       E T2U, T34, T32, T36;
	       {
		    E T2M, T2T, T2Y, T31;
		    T2M = T2K - T2L;
		    T2T = T2P + T2S;
		    T2U = T2M - T2T;
		    T34 = T2M + T2T;
		    T2Y = T2W + T2X;
		    T31 = T2Z - T30;
		    T32 = T2Y - T31;
		    T36 = T2Y + T31;
	       }
	       {
		    E T2J, T2V, T33, T35;
		    T2J = W[20];
		    T2V = W[21];
		    Ip[WS(rs, 5)] = FNMS(T2V, T32, T2J * T2U);
		    Im[WS(rs, 5)] = FMA(T2V, T2U, T2J * T32);
		    T33 = W[4];
		    T35 = W[5];
		    Ip[WS(rs, 1)] = FNMS(T35, T36, T33 * T34);
		    Im[WS(rs, 1)] = FMA(T35, T34, T33 * T36);
	       }
	  }
	  {
	       E T3a, T3g, T3e, T3i;
	       {
		    E T38, T39, T3c, T3d;
		    T38 = T2K + T2L;
		    T39 = T2Z + T30;
		    T3a = T38 - T39;
		    T3g = T38 + T39;
		    T3c = T2W - T2X;
		    T3d = T2P - T2S;
		    T3e = T3c + T3d;
		    T3i = T3c - T3d;
	       }
	       {
		    E T37, T3b, T3f, T3h;
		    T37 = W[12];
		    T3b = W[13];
		    Ip[WS(rs, 3)] = FNMS(T3b, T3e, T37 * T3a);
		    Im[WS(rs, 3)] = FMA(T37, T3e, T3b * T3a);
		    T3f = W[28];
		    T3h = W[29];
		    Ip[WS(rs, 7)] = FNMS(T3h, T3i, T3f * T3g);
		    Im[WS(rs, 7)] = FMA(T3f, T3i, T3h * T3g);
	       }
	  }
	  {
	       E TY, T1e, T1c, T1g;
	       {
		    E TE, TX, T18, T1b;
		    TE = Tw + TD;
		    TX = KP707106781 * (TN + TW);
		    TY = TE - TX;
		    T1e = TE + TX;
		    T18 = T10 + T17;
		    T1b = KP707106781 * (T19 + T1a);
		    T1c = T18 - T1b;
		    T1g = T18 + T1b;
	       }
	       {
		    E Tv, TZ, T1d, T1f;
		    Tv = W[18];
		    TZ = W[19];
		    Rp[WS(rs, 5)] = FNMS(TZ, T1c, Tv * TY);
		    Rm[WS(rs, 5)] = FMA(TZ, TY, Tv * T1c);
		    T1d = W[2];
		    T1f = W[3];
		    Rp[WS(rs, 1)] = FNMS(T1f, T1g, T1d * T1e);
		    Rm[WS(rs, 1)] = FMA(T1f, T1e, T1d * T1g);
	       }
	  }
	  {
	       E T1k, T1q, T1o, T1s;
	       {
		    E T1i, T1j, T1m, T1n;
		    T1i = Tw - TD;
		    T1j = KP707106781 * (T1a - T19);
		    T1k = T1i - T1j;
		    T1q = T1i + T1j;
		    T1m = T17 - T10;
		    T1n = KP707106781 * (TN - TW);
		    T1o = T1m - T1n;
		    T1s = T1m + T1n;
	       }
	       {
		    E T1h, T1l, T1p, T1r;
		    T1h = W[26];
		    T1l = W[27];
		    Rp[WS(rs, 7)] = FNMS(T1l, T1o, T1h * T1k);
		    Rm[WS(rs, 7)] = FMA(T1h, T1o, T1l * T1k);
		    T1p = W[10];
		    T1r = W[11];
		    Rp[WS(rs, 3)] = FNMS(T1r, T1s, T1p * T1q);
		    Rm[WS(rs, 3)] = FMA(T1p, T1s, T1r * T1q);
	       }
	  }
	  {
	       E T2g, T2u, T2s, T2w;
	       {
		    E T20, T2f, T2o, T2r;
		    T20 = T1S - T1Z;
		    T2f = T27 - T2e;
		    T2g = T20 - T2f;
		    T2u = T20 + T2f;
		    T2o = T2k - T2n;
		    T2r = T2p - T2q;
		    T2s = T2o - T2r;
		    T2w = T2o + T2r;
	       }
	       {
		    E T1P, T2h, T2t, T2v;
		    T1P = W[24];
		    T2h = W[25];
		    Ip[WS(rs, 6)] = FNMS(T2h, T2s, T1P * T2g);
		    Im[WS(rs, 6)] = FMA(T2h, T2g, T1P * T2s);
		    T2t = W[8];
		    T2v = W[9];
		    Ip[WS(rs, 2)] = FNMS(T2v, T2w, T2t * T2u);
		    Im[WS(rs, 2)] = FMA(T2v, T2u, T2t * T2w);
	       }
	  }
	  {
	       E T2A, T2G, T2E, T2I;
	       {
		    E T2y, T2z, T2C, T2D;
		    T2y = T1S + T1Z;
		    T2z = T2p + T2q;
		    T2A = T2y - T2z;
		    T2G = T2y + T2z;
		    T2C = T2k + T2n;
		    T2D = T2e + T27;
		    T2E = T2C - T2D;
		    T2I = T2C + T2D;
	       }
	       {
		    E T2x, T2B, T2F, T2H;
		    T2x = W[16];
		    T2B = W[17];
		    Ip[WS(rs, 4)] = FNMS(T2B, T2E, T2x * T2A);
		    Im[WS(rs, 4)] = FMA(T2x, T2E, T2B * T2A);
		    T2F = W[0];
		    T2H = W[1];
		    Ip[0] = FNMS(T2H, T2I, T2F * T2G);
		    Im[0] = FMA(T2F, T2I, T2H * T2G);
	       }
	  }
	  {
	       E T1G, T1M, T1K, T1O;
	       {
		    E T1E, T1F, T1I, T1J;
		    T1E = T7 - Te;
		    T1F = T1A - T1z;
		    T1G = T1E - T1F;
		    T1M = T1E + T1F;
		    T1I = T1w - T1x;
		    T1J = Tm - Tt;
		    T1K = T1I - T1J;
		    T1O = T1J + T1I;
	       }
	       {
		    E T1D, T1H, T1L, T1N;
		    T1D = W[22];
		    T1H = W[23];
		    Rp[WS(rs, 6)] = FNMS(T1H, T1K, T1D * T1G);
		    Rm[WS(rs, 6)] = FMA(T1D, T1K, T1H * T1G);
		    T1L = W[6];
		    T1N = W[7];
		    Rp[WS(rs, 2)] = FNMS(T1N, T1O, T1L * T1M);
		    Rm[WS(rs, 2)] = FMA(T1L, T1O, T1N * T1M);
	       }
	  }
     }
}