static void r2cbIII_5(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) { DK(KP1_902113032, +1.902113032590307144232878666758764286811397268); DK(KP1_118033988, +1.118033988749894848204586834365638117720309180); DK(KP500000000, +0.500000000000000000000000000000000000000000000); DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); DK(KP618033988, +0.618033988749894848204586834365638117720309180); { INT i; for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) { E T1, T2, T3, Tc, Ta, T8, T9; T8 = Ci[WS(csi, 1)]; T9 = Ci[0]; T1 = Cr[WS(csr, 2)]; T2 = Cr[WS(csr, 1)]; T3 = Cr[0]; Tc = FMS(KP618033988, T8, T9); Ta = FMA(KP618033988, T9, T8); { E T6, T4, T5, T7, Tb; T6 = T3 - T2; T4 = T2 + T3; R0[0] = FMA(KP2_000000000, T4, T1); T5 = FNMS(KP500000000, T4, T1); T7 = FNMS(KP1_118033988, T6, T5); Tb = FMA(KP1_118033988, T6, T5); R0[WS(rs, 2)] = FNMS(KP1_902113032, Ta, T7); R1[0] = -(FMA(KP1_902113032, Ta, T7)); R1[WS(rs, 1)] = FMS(KP1_902113032, Tc, Tb); R0[WS(rs, 1)] = FMA(KP1_902113032, Tc, Tb); } } } }
static void hc2rIII_5(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, INT v, INT ivs, INT ovs) { DK(KP1_902113032, +1.902113032590307144232878666758764286811397268); DK(KP1_118033988, +1.118033988749894848204586834365638117720309180); DK(KP500000000, +0.500000000000000000000000000000000000000000000); DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); DK(KP618033988, +0.618033988749894848204586834365638117720309180); INT i; for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs, MAKE_VOLATILE_STRIDE(ris), MAKE_VOLATILE_STRIDE(iis), MAKE_VOLATILE_STRIDE(os)) { E T1, T2, T3, Tc, Ta, T8, T9; T8 = ii[WS(iis, 1)]; T9 = ii[0]; T1 = ri[WS(ris, 2)]; T2 = ri[WS(ris, 1)]; T3 = ri[0]; Tc = FMS(KP618033988, T8, T9); Ta = FMA(KP618033988, T9, T8); { E T6, T4, T5, T7, Tb; T6 = T3 - T2; T4 = T2 + T3; O[0] = FMA(KP2_000000000, T4, T1); T5 = FNMS(KP500000000, T4, T1); T7 = FNMS(KP1_118033988, T6, T5); Tb = FMA(KP1_118033988, T6, T5); O[WS(os, 4)] = FNMS(KP1_902113032, Ta, T7); O[WS(os, 1)] = -(FMA(KP1_902113032, Ta, T7)); O[WS(os, 3)] = FMS(KP1_902113032, Tc, Tb); O[WS(os, 2)] = FMA(KP1_902113032, Tc, Tb); } } }
double findMedianSortedArrays(int A[], int m, int B[], int n) { int total = m + n ; if(total%2 == 1) return FMS(A,m,B,n,total/2+1); else return (FMS(A,m,B,n,total/2+1) + FMS(A,m,B,n,total/2))/2; }
static void r2cfII_8(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) { DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP707106781, +0.707106781186547524400844362104849039284835938); DK(KP414213562, +0.414213562373095048801688724209698078569671875); { INT i; for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) { E Te, T8, Td, T5, Tj, Tl, Tf, Tb; { E T1, Th, T9, Ti, T4, Ta; T1 = R0[0]; Th = R0[WS(rs, 2)]; { E T2, T3, T6, T7; T2 = R0[WS(rs, 1)]; T3 = R0[WS(rs, 3)]; T6 = R1[0]; T7 = R1[WS(rs, 2)]; T9 = R1[WS(rs, 3)]; Ti = T2 + T3; T4 = T2 - T3; Te = FMA(KP414213562, T6, T7); T8 = FNMS(KP414213562, T7, T6); Ta = R1[WS(rs, 1)]; } Td = FNMS(KP707106781, T4, T1); T5 = FMA(KP707106781, T4, T1); Tj = FMA(KP707106781, Ti, Th); Tl = FNMS(KP707106781, Ti, Th); Tf = FMA(KP414213562, T9, Ta); Tb = FMS(KP414213562, Ta, T9); } { E Tk, Tg, Tc, Tm; Tk = Te + Tf; Tg = Te - Tf; Tc = T8 + Tb; Tm = Tb - T8; Cr[WS(csr, 1)] = FMA(KP923879532, Tg, Td); Cr[WS(csr, 2)] = FNMS(KP923879532, Tg, Td); Ci[WS(csi, 3)] = FNMS(KP923879532, Tk, Tj); Ci[0] = -(FMA(KP923879532, Tk, Tj)); Ci[WS(csi, 1)] = FMA(KP923879532, Tm, Tl); Ci[WS(csi, 2)] = FMS(KP923879532, Tm, Tl); Cr[0] = FMA(KP923879532, Tc, T5); Cr[WS(csr, 3)] = FNMS(KP923879532, Tc, T5); } } } }
double FMS(int a[], int m , int b[], int n, const int k) { if(m > n ) return FMS(b,n,a,m,k); if(m == 0) return b[k-1]; if(k == 1) return min(a[0],b[0]); int pa = min(m,k/2); int pb = k - pa; if(a[pa-1] < b[pb-1]) return FMS(a+pa,m-pa,b,n,k-pa); else return FMS(a,m,b+pb,n-pb,k-pb); }
static void hc2rIII_5(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs) { DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); DK(KP1_118033988, +1.118033988749894848204586834365638117720309180); DK(KP500000000, +0.500000000000000000000000000000000000000000000); DK(KP1_175570504, +1.175570504584946258337411909278145537195304875); DK(KP1_902113032, +1.902113032590307144232878666758764286811397268); int i; for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) { E Ta, Tc, T1, T4, T5, T6, Tb, T7; { E T8, T9, T2, T3; T8 = ii[WS(iis, 1)]; T9 = ii[0]; Ta = FMA(KP1_902113032, T8, KP1_175570504 * T9); Tc = FNMS(KP1_902113032, T9, KP1_175570504 * T8); T1 = ri[WS(ris, 2)]; T2 = ri[WS(ris, 1)]; T3 = ri[0]; T4 = T2 + T3; T5 = FMS(KP500000000, T4, T1); T6 = KP1_118033988 * (T3 - T2); } O[0] = FMA(KP2_000000000, T4, T1); Tb = T6 - T5; O[WS(os, 2)] = Tb + Tc; O[WS(os, 3)] = Tc - Tb; T7 = T5 + T6; O[WS(os, 1)] = T7 - Ta; O[WS(os, 4)] = -(T7 + Ta); } }
static void r2cfII_6(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) { DK(KP866025403, +0.866025403784438646763723170752936183471402627); DK(KP500000000, +0.500000000000000000000000000000000000000000000); { INT i; for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(24, rs), MAKE_VOLATILE_STRIDE(24, csr), MAKE_VOLATILE_STRIDE(24, csi)) { E T1, T9, T2, T3, T6, T7; T1 = R0[0]; T9 = R1[WS(rs, 1)]; T2 = R0[WS(rs, 2)]; T3 = R0[WS(rs, 1)]; T6 = R1[WS(rs, 2)]; T7 = R1[0]; { E Tc, T4, Ta, T8, T5, Tb; Cr[WS(csr, 1)] = T1 + T2 - T3; Tc = T2 + T3; T4 = T3 - T2; Ta = T6 + T7; T8 = T6 - T7; T5 = FMA(KP500000000, T4, T1); Tb = FMA(KP500000000, Ta, T9); Ci[WS(csi, 1)] = T9 - Ta; Cr[WS(csr, 2)] = FMA(KP866025403, T8, T5); Cr[0] = FNMS(KP866025403, T8, T5); Ci[WS(csi, 2)] = FMS(KP866025403, Tc, Tb); Ci[0] = -(FMA(KP866025403, Tc, Tb)); } } } }
static void r2cbIII_5(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) { DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); DK(KP1_118033988, +1.118033988749894848204586834365638117720309180); DK(KP500000000, +0.500000000000000000000000000000000000000000000); DK(KP1_175570504, +1.175570504584946258337411909278145537195304875); DK(KP1_902113032, +1.902113032590307144232878666758764286811397268); { INT i; for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) { E Ta, Tc, T1, T4, T5, T6, Tb, T7; { E T8, T9, T2, T3; T8 = Ci[WS(csi, 1)]; T9 = Ci[0]; Ta = FMA(KP1_902113032, T8, KP1_175570504 * T9); Tc = FNMS(KP1_902113032, T9, KP1_175570504 * T8); T1 = Cr[WS(csr, 2)]; T2 = Cr[WS(csr, 1)]; T3 = Cr[0]; T4 = T2 + T3; T5 = FMS(KP500000000, T4, T1); T6 = KP1_118033988 * (T3 - T2); } R0[0] = FMA(KP2_000000000, T4, T1); Tb = T6 - T5; R0[WS(rs, 1)] = Tb + Tc; R1[WS(rs, 1)] = Tc - Tb; T7 = T5 + T6; R1[0] = T7 - Ta; R0[WS(rs, 2)] = -(T7 + Ta); } } }
static void r2hcII_6(const R *I, R *ro, R *io, stride is, stride ros, stride ios, INT v, INT ivs, INT ovs) { DK(KP866025403, +0.866025403784438646763723170752936183471402627); DK(KP500000000, +0.500000000000000000000000000000000000000000000); INT i; for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(is), MAKE_VOLATILE_STRIDE(ros), MAKE_VOLATILE_STRIDE(ios)) { E T1, T9, T2, T3, T6, T7; T1 = I[0]; T9 = I[WS(is, 3)]; T2 = I[WS(is, 4)]; T3 = I[WS(is, 2)]; T6 = I[WS(is, 5)]; T7 = I[WS(is, 1)]; { E Tc, T4, Ta, T8, T5, Tb; ro[WS(ros, 1)] = T1 + T2 - T3; Tc = T2 + T3; T4 = T3 - T2; Ta = T6 + T7; T8 = T6 - T7; T5 = FMA(KP500000000, T4, T1); Tb = FMA(KP500000000, Ta, T9); io[WS(ios, 1)] = T9 - Ta; ro[WS(ros, 2)] = FMA(KP866025403, T8, T5); ro[0] = FNMS(KP866025403, T8, T5); io[WS(ios, 2)] = FMS(KP866025403, Tc, Tb); io[0] = -(FMA(KP866025403, Tc, Tb)); } } }
static void r2cbIII_7(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) { DK(KP1_949855824, +1.949855824363647214036263365987862434465571601); DK(KP1_801937735, +1.801937735804838252472204639014890102331838324); DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); DK(KP692021471, +0.692021471630095869627814897002069140197260599); DK(KP801937735, +0.801937735804838252472204639014890102331838324); DK(KP356895867, +0.356895867892209443894399510021300583399127187); DK(KP554958132, +0.554958132087371191422194871006410481067288862); { INT i; for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) { E Tn, Td, Tg, Ti, Tl, T8; { E T1, T9, Tb, Ta, T2, T4, Th, Tm, Tc, T3, Te; T1 = Cr[WS(csr, 3)]; T9 = Ci[WS(csi, 1)]; Tb = Ci[0]; Ta = Ci[WS(csi, 2)]; T2 = Cr[WS(csr, 2)]; T4 = Cr[0]; Th = FMA(KP554958132, T9, Tb); Tm = FNMS(KP554958132, Ta, T9); Tc = FMA(KP554958132, Tb, Ta); T3 = Cr[WS(csr, 1)]; Te = FNMS(KP356895867, T2, T4); Tn = FNMS(KP801937735, Tm, Tb); { E Tf, Tk, T7, T5, Tj, T6; Td = FMA(KP801937735, Tc, T9); T5 = T2 + T3 + T4; Tj = FNMS(KP356895867, T4, T3); T6 = FNMS(KP356895867, T3, T2); Tf = FNMS(KP692021471, Te, T3); R0[0] = FMA(KP2_000000000, T5, T1); Tk = FNMS(KP692021471, Tj, T2); T7 = FNMS(KP692021471, T6, T4); Tg = FNMS(KP1_801937735, Tf, T1); Ti = FNMS(KP801937735, Th, Ta); Tl = FNMS(KP1_801937735, Tk, T1); T8 = FNMS(KP1_801937735, T7, T1); } } R1[WS(rs, 2)] = FMS(KP1_949855824, Ti, Tg); R0[WS(rs, 1)] = FMA(KP1_949855824, Ti, Tg); R0[WS(rs, 2)] = FNMS(KP1_949855824, Tn, Tl); R1[WS(rs, 1)] = -(FMA(KP1_949855824, Tn, Tl)); R0[WS(rs, 3)] = FNMS(KP1_949855824, Td, T8); R1[0] = -(FMA(KP1_949855824, Td, T8)); } } }
static void r2cf_8(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) { DK(KP707106781, +0.707106781186547524400844362104849039284835938); { INT i; for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) { E T4, T7, T3, Tj, Td, T5, T8, T9; { E T1, T2, Tb, Tc; T1 = R0[0]; T2 = R0[WS(rs, 2)]; Tb = R1[WS(rs, 3)]; Tc = R1[WS(rs, 1)]; T4 = R0[WS(rs, 1)]; T7 = T1 - T2; T3 = T1 + T2; Tj = Tb + Tc; Td = Tb - Tc; T5 = R0[WS(rs, 3)]; T8 = R1[0]; T9 = R1[WS(rs, 2)]; } { E T6, Tf, Ta, Ti; T6 = T4 + T5; Tf = T4 - T5; Ta = T8 - T9; Ti = T8 + T9; { E Th, Tk, Te, Tg; Th = T3 + T6; Cr[WS(csr, 2)] = T3 - T6; Tk = Ti + Tj; Ci[WS(csi, 2)] = Tj - Ti; Te = Ta + Td; Tg = Td - Ta; Cr[0] = Th + Tk; Cr[WS(csr, 4)] = Th - Tk; Ci[WS(csi, 3)] = FMA(KP707106781, Tg, Tf); Ci[WS(csi, 1)] = FMS(KP707106781, Tg, Tf); Cr[WS(csr, 1)] = FMA(KP707106781, Te, T7); Cr[WS(csr, 3)] = FNMS(KP707106781, Te, T7); } } } } }
static void hc2cf2_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) { DK(KP951056516, +0.951056516295153572116439333379382143405698634); DK(KP559016994, +0.559016994374947424102293417182819058860154590); DK(KP250000000, +0.250000000000000000000000000000000000000000000); DK(KP618033988, +0.618033988749894848204586834365638117720309180); { INT m; for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(80, rs)) { E T59, T5i, T5k, T5e, T5c, T5d, T5j, T5f; { E T2, Th, Tf, T6, T5, Tl, T1p, T1n, Ti, T3, Tt, Tv, T24, T1f, T1D; E Tb, T1P, Tm, T21, T1b, T7, T1A, Tw, T1H, T13, TA, T1L, T17, T1S, Tq; E T1o, T2g, T1t, T2c, TO, TK; { E T1e, Ta, Tk, Tg; T2 = W[0]; Th = W[3]; Tf = W[2]; T6 = W[5]; T5 = W[1]; Tk = T2 * Th; Tg = T2 * Tf; T1e = Tf * T6; Ta = T2 * T6; Tl = FMA(T5, Tf, Tk); T1p = FNMS(T5, Tf, Tk); T1n = FMA(T5, Th, Tg); Ti = FNMS(T5, Th, Tg); T3 = W[4]; Tt = W[6]; Tv = W[7]; { E Tp, Tj, TN, TJ; Tp = Ti * T6; T24 = FMA(Th, T3, T1e); T1f = FNMS(Th, T3, T1e); T1D = FNMS(T5, T3, Ta); Tb = FMA(T5, T3, Ta); Tj = Ti * T3; { E T1a, T4, Tu, T1G; T1a = Tf * T3; T4 = T2 * T3; Tu = Ti * Tt; T1G = T2 * Tt; { E T12, Tz, T1K, T16; T12 = Tf * Tt; Tz = Ti * Tv; T1K = T2 * Tv; T16 = Tf * Tv; T1P = FNMS(Tl, T6, Tj); Tm = FMA(Tl, T6, Tj); T21 = FNMS(Th, T6, T1a); T1b = FMA(Th, T6, T1a); T7 = FNMS(T5, T6, T4); T1A = FMA(T5, T6, T4); Tw = FMA(Tl, Tv, Tu); T1H = FMA(T5, Tv, T1G); T13 = FMA(Th, Tv, T12); TA = FNMS(Tl, Tt, Tz); T1L = FNMS(T5, Tt, T1K); T17 = FNMS(Th, Tt, T16); T1S = FMA(Tl, T3, Tp); Tq = FNMS(Tl, T3, Tp); } } T1o = T1n * T3; T2g = T1n * Tv; TN = Tm * Tv; TJ = Tm * Tt; T1t = T1n * T6; T2c = T1n * Tt; TO = FNMS(Tq, Tt, TN); TK = FMA(Tq, Tv, TJ); } } { E Te, T2C, T4L, T57, T58, TD, T2H, T4H, T3J, T3Z, T11, T2v, T2P, T3P, T4d; E T4z, T3n, T43, T2r, T2z, T3b, T3T, T4n, T4v, T3u, T42, T20, T2y, T34, T3S; E T4k, T4w, T1c, T19, T1d, T3y, T1w, T2U, T1g, T1j, T1l; { E T2d, T2h, T2k, T1q, T1u, T2n, TL, TI, TM, T3F, TZ, T2N, TP, TS, TU; { E T1, T4K, T8, T9, Tc; T1 = Rp[0]; T4K = Rm[0]; T8 = Rp[WS(rs, 5)]; T2d = FMA(T1p, Tv, T2c); T2h = FNMS(T1p, Tt, T2g); T2k = FMA(T1p, T6, T1o); T1q = FNMS(T1p, T6, T1o); T1u = FMA(T1p, T3, T1t); T2n = FNMS(T1p, T3, T1t); T9 = T7 * T8; Tc = Rm[WS(rs, 5)]; { E Tx, Ts, T2F, TC, T2E; { E Tn, Tr, To, T2D, T4J, Ty, TB, Td, T4I; Tn = Ip[WS(rs, 2)]; Tr = Im[WS(rs, 2)]; Tx = Ip[WS(rs, 7)]; Td = FMA(Tb, Tc, T9); T4I = T7 * Tc; To = Tm * Tn; T2D = Tm * Tr; Te = T1 + Td; T2C = T1 - Td; T4J = FNMS(Tb, T8, T4I); Ty = Tw * Tx; TB = Im[WS(rs, 7)]; Ts = FMA(Tq, Tr, To); T4L = T4J + T4K; T57 = T4K - T4J; T2F = Tw * TB; TC = FMA(TA, TB, Ty); T2E = FNMS(Tq, Tn, T2D); } { E TF, TG, TH, TW, TY, T2G, T3E, TX, T2M; TF = Rp[WS(rs, 2)]; T2G = FNMS(TA, Tx, T2F); T58 = Ts - TC; TD = Ts + TC; TG = Ti * TF; T2H = T2E - T2G; T4H = T2E + T2G; TH = Rm[WS(rs, 2)]; TW = Ip[WS(rs, 9)]; TY = Im[WS(rs, 9)]; TL = Rp[WS(rs, 7)]; TI = FMA(Tl, TH, TG); T3E = Ti * TH; TX = Tt * TW; T2M = Tt * TY; TM = TK * TL; T3F = FNMS(Tl, TF, T3E); TZ = FMA(Tv, TY, TX); T2N = FNMS(Tv, TW, T2M); TP = Rm[WS(rs, 7)]; TS = Ip[WS(rs, 4)]; TU = Im[WS(rs, 4)]; } } } { E T27, T26, T28, T3j, T2p, T39, T29, T2e, T2i; { E T22, T23, T25, T2l, T2o, T3i, T2m, T38; { E TR, T2J, T3H, TV, T2L, T4b, T3I; T22 = Rp[WS(rs, 6)]; { E TQ, T3G, TT, T2K; TQ = FMA(TO, TP, TM); T3G = TK * TP; TT = T3 * TS; T2K = T3 * TU; TR = TI + TQ; T2J = TI - TQ; T3H = FNMS(TO, TL, T3G); TV = FMA(T6, TU, TT); T2L = FNMS(T6, TS, T2K); T23 = T21 * T22; } T4b = T3F + T3H; T3I = T3F - T3H; { E T10, T3D, T4c, T2O; T10 = TV + TZ; T3D = TZ - TV; T4c = T2L + T2N; T2O = T2L - T2N; T3J = T3D - T3I; T3Z = T3I + T3D; T11 = TR - T10; T2v = TR + T10; T2P = T2J - T2O; T3P = T2J + T2O; T4d = T4b + T4c; T4z = T4c - T4b; T25 = Rm[WS(rs, 6)]; } } T2l = Ip[WS(rs, 3)]; T2o = Im[WS(rs, 3)]; T27 = Rp[WS(rs, 1)]; T26 = FMA(T24, T25, T23); T3i = T21 * T25; T2m = T2k * T2l; T38 = T2k * T2o; T28 = T1n * T27; T3j = FNMS(T24, T22, T3i); T2p = FMA(T2n, T2o, T2m); T39 = FNMS(T2n, T2l, T38); T29 = Rm[WS(rs, 1)]; T2e = Ip[WS(rs, 8)]; T2i = Im[WS(rs, 8)]; } { E T1I, T1F, T1J, T3q, T1Y, T32, T1M, T1Q, T1T; { E T1B, T1C, T1E, T1V, T1X, T3p, T1W, T31; { E T2b, T35, T3l, T2j, T37, T4l, T3m; T1B = Rp[WS(rs, 4)]; { E T2a, T3k, T2f, T36; T2a = FMA(T1p, T29, T28); T3k = T1n * T29; T2f = T2d * T2e; T36 = T2d * T2i; T2b = T26 + T2a; T35 = T26 - T2a; T3l = FNMS(T1p, T27, T3k); T2j = FMA(T2h, T2i, T2f); T37 = FNMS(T2h, T2e, T36); T1C = T1A * T1B; } T4l = T3j + T3l; T3m = T3j - T3l; { E T2q, T3h, T4m, T3a; T2q = T2j + T2p; T3h = T2p - T2j; T4m = T37 + T39; T3a = T37 - T39; T3n = T3h - T3m; T43 = T3m + T3h; T2r = T2b - T2q; T2z = T2b + T2q; T3b = T35 - T3a; T3T = T35 + T3a; T4n = T4l + T4m; T4v = T4m - T4l; T1E = Rm[WS(rs, 4)]; } } T1V = Ip[WS(rs, 1)]; T1X = Im[WS(rs, 1)]; T1I = Rp[WS(rs, 9)]; T1F = FMA(T1D, T1E, T1C); T3p = T1A * T1E; T1W = Tf * T1V; T31 = Tf * T1X; T1J = T1H * T1I; T3q = FNMS(T1D, T1B, T3p); T1Y = FMA(Th, T1X, T1W); T32 = FNMS(Th, T1V, T31); T1M = Rm[WS(rs, 9)]; T1Q = Ip[WS(rs, 6)]; T1T = Im[WS(rs, 6)]; } { E T14, T15, T18, T1r, T1v, T3x, T1s, T2T; { E T1O, T2Y, T3s, T1U, T30, T4i, T3t; T14 = Rp[WS(rs, 8)]; { E T1N, T3r, T1R, T2Z; T1N = FMA(T1L, T1M, T1J); T3r = T1H * T1M; T1R = T1P * T1Q; T2Z = T1P * T1T; T1O = T1F + T1N; T2Y = T1F - T1N; T3s = FNMS(T1L, T1I, T3r); T1U = FMA(T1S, T1T, T1R); T30 = FNMS(T1S, T1Q, T2Z); T15 = T13 * T14; } T4i = T3q + T3s; T3t = T3q - T3s; { E T1Z, T3o, T4j, T33; T1Z = T1U + T1Y; T3o = T1Y - T1U; T4j = T30 + T32; T33 = T30 - T32; T3u = T3o - T3t; T42 = T3t + T3o; T20 = T1O - T1Z; T2y = T1O + T1Z; T34 = T2Y - T33; T3S = T2Y + T33; T4k = T4i + T4j; T4w = T4j - T4i; T18 = Rm[WS(rs, 8)]; } } T1r = Ip[WS(rs, 5)]; T1v = Im[WS(rs, 5)]; T1c = Rp[WS(rs, 3)]; T19 = FMA(T17, T18, T15); T3x = T13 * T18; T1s = T1q * T1r; T2T = T1q * T1v; T1d = T1b * T1c; T3y = FNMS(T17, T14, T3x); T1w = FMA(T1u, T1v, T1s); T2U = FNMS(T1u, T1r, T2T); T1g = Rm[WS(rs, 3)]; T1j = Ip[0]; T1l = Im[0]; } } } } { E T3C, T40, T2W, T3Q, T4M, T4E, T4F, T4U, T4S; { E T4W, T2u, T2w, T4g, T4V, T4D, T4B, T54, T56, T4Y, T4u, T4C; { E T4x, TE, T53, T1z, T2s, T52, T4A, T4t, T4s, T2t; { E T1i, T2Q, T3A, T1m, T2S; T4x = T4v - T4w; T4W = T4w + T4v; { E T1h, T3z, T1k, T2R; T1h = FMA(T1f, T1g, T1d); T3z = T1b * T1g; T1k = T2 * T1j; T2R = T2 * T1l; T1i = T19 + T1h; T2Q = T19 - T1h; T3A = FNMS(T1f, T1c, T3z); T1m = FMA(T5, T1l, T1k); T2S = FNMS(T5, T1j, T2R); } TE = Te - TD; T2u = Te + TD; { E T4e, T3B, T1x, T3w; T4e = T3y + T3A; T3B = T3y - T3A; T1x = T1m + T1w; T3w = T1w - T1m; { E T4f, T2V, T1y, T4y; T4f = T2S + T2U; T2V = T2S - T2U; T3C = T3w - T3B; T40 = T3B + T3w; T1y = T1i - T1x; T2w = T1i + T1x; T2W = T2Q - T2V; T3Q = T2Q + T2V; T4g = T4e + T4f; T4y = T4f - T4e; T53 = T1y - T11; T1z = T11 + T1y; T2s = T20 + T2r; T52 = T20 - T2r; T4V = T4z + T4y; T4A = T4y - T4z; } } } T4t = T1z - T2s; T2t = T1z + T2s; T4D = FMA(KP618033988, T4x, T4A); T4B = FNMS(KP618033988, T4A, T4x); T54 = FMA(KP618033988, T53, T52); T56 = FNMS(KP618033988, T52, T53); Rm[WS(rs, 9)] = TE + T2t; T4s = FNMS(KP250000000, T2t, TE); T4Y = T4L - T4H; T4M = T4H + T4L; T4u = FNMS(KP559016994, T4t, T4s); T4C = FMA(KP559016994, T4t, T4s); } { E T2x, T4Q, T4p, T4r, T4R, T2A, T51, T55; { E T4h, T50, T4X, T4o, T4Z; T4E = T4d + T4g; T4h = T4d - T4g; Rm[WS(rs, 1)] = FMA(KP951056516, T4B, T4u); Rp[WS(rs, 2)] = FNMS(KP951056516, T4B, T4u); Rp[WS(rs, 6)] = FMA(KP951056516, T4D, T4C); Rm[WS(rs, 5)] = FNMS(KP951056516, T4D, T4C); T50 = T4W - T4V; T4X = T4V + T4W; T4o = T4k - T4n; T4F = T4k + T4n; T2x = T2v + T2w; T4Q = T2v - T2w; Im[WS(rs, 9)] = T4X - T4Y; T4Z = FMA(KP250000000, T4X, T4Y); T4p = FMA(KP618033988, T4o, T4h); T4r = FNMS(KP618033988, T4h, T4o); T4R = T2z - T2y; T2A = T2y + T2z; T51 = FNMS(KP559016994, T50, T4Z); T55 = FMA(KP559016994, T50, T4Z); } { E T49, T48, T2B, T4a, T4q; T2B = T2x + T2A; T49 = T2x - T2A; Ip[WS(rs, 2)] = FMA(KP951056516, T54, T51); Im[WS(rs, 1)] = FMS(KP951056516, T54, T51); Ip[WS(rs, 6)] = FMA(KP951056516, T56, T55); Im[WS(rs, 5)] = FMS(KP951056516, T56, T55); Rp[0] = T2u + T2B; T48 = FNMS(KP250000000, T2B, T2u); T4a = FMA(KP559016994, T49, T48); T4q = FNMS(KP559016994, T49, T48); T4U = FMA(KP618033988, T4Q, T4R); T4S = FNMS(KP618033988, T4R, T4Q); Rm[WS(rs, 3)] = FMA(KP951056516, T4p, T4a); Rp[WS(rs, 4)] = FNMS(KP951056516, T4p, T4a); Rp[WS(rs, 8)] = FMA(KP951056516, T4r, T4q); Rm[WS(rs, 7)] = FNMS(KP951056516, T4r, T4q); } } } { E T3O, T5u, T5w, T5o, T5q, T5n; { E T5m, T5l, T2I, T4O, T3N, T3L, T2X, T5s, T4N, T5t, T3c, T3v, T3K, T4G; T5m = T3u + T3n; T3v = T3n - T3u; T3K = T3C - T3J; T5l = T3J + T3C; T3O = T2C + T2H; T2I = T2C - T2H; T4O = T4E - T4F; T4G = T4E + T4F; T3N = FMA(KP618033988, T3v, T3K); T3L = FNMS(KP618033988, T3K, T3v); T2X = T2P + T2W; T5s = T2P - T2W; Ip[0] = T4G + T4M; T4N = FNMS(KP250000000, T4G, T4M); T5t = T34 - T3b; T3c = T34 + T3b; { E T3f, T3e, T4P, T4T, T3d, T3M, T3g; T4P = FMA(KP559016994, T4O, T4N); T4T = FNMS(KP559016994, T4O, T4N); T3f = T2X - T3c; T3d = T2X + T3c; Ip[WS(rs, 4)] = FMA(KP951056516, T4S, T4P); Im[WS(rs, 3)] = FMS(KP951056516, T4S, T4P); Ip[WS(rs, 8)] = FMA(KP951056516, T4U, T4T); Im[WS(rs, 7)] = FMS(KP951056516, T4U, T4T); Rm[WS(rs, 4)] = T2I + T3d; T3e = FNMS(KP250000000, T3d, T2I); T5u = FMA(KP618033988, T5t, T5s); T5w = FNMS(KP618033988, T5s, T5t); T5o = T58 + T57; T59 = T57 - T58; T3M = FMA(KP559016994, T3f, T3e); T3g = FNMS(KP559016994, T3f, T3e); Rp[WS(rs, 7)] = FNMS(KP951056516, T3L, T3g); Rp[WS(rs, 3)] = FMA(KP951056516, T3L, T3g); Rm[0] = FNMS(KP951056516, T3N, T3M); Rm[WS(rs, 8)] = FMA(KP951056516, T3N, T3M); T5q = T5l - T5m; T5n = T5l + T5m; } } { E T5a, T5b, T47, T45, T5h, T5g, T3V, T3X, T41, T44, T5p, T3W, T46, T3Y; T5a = T3Z + T40; T41 = T3Z - T40; T44 = T42 - T43; T5b = T42 + T43; Im[WS(rs, 4)] = T5n - T5o; T5p = FMA(KP250000000, T5n, T5o); T47 = FNMS(KP618033988, T41, T44); T45 = FMA(KP618033988, T44, T41); { E T5r, T5v, T3R, T3U; T5r = FNMS(KP559016994, T5q, T5p); T5v = FMA(KP559016994, T5q, T5p); T3R = T3P + T3Q; T5h = T3P - T3Q; T5g = T3S - T3T; T3U = T3S + T3T; Im[0] = -(FMA(KP951056516, T5u, T5r)); Im[WS(rs, 8)] = FMS(KP951056516, T5u, T5r); Ip[WS(rs, 7)] = FMA(KP951056516, T5w, T5v); Ip[WS(rs, 3)] = FNMS(KP951056516, T5w, T5v); T3V = T3R + T3U; T3X = T3R - T3U; } Rp[WS(rs, 5)] = T3O + T3V; T3W = FNMS(KP250000000, T3V, T3O); T5i = FNMS(KP618033988, T5h, T5g); T5k = FMA(KP618033988, T5g, T5h); T46 = FNMS(KP559016994, T3X, T3W); T3Y = FMA(KP559016994, T3X, T3W); Rp[WS(rs, 9)] = FNMS(KP951056516, T45, T3Y); Rp[WS(rs, 1)] = FMA(KP951056516, T45, T3Y); Rm[WS(rs, 2)] = FNMS(KP951056516, T47, T46); Rm[WS(rs, 6)] = FMA(KP951056516, T47, T46); T5e = T5a - T5b; T5c = T5a + T5b; } } } } } Ip[WS(rs, 5)] = T5c + T59; T5d = FNMS(KP250000000, T5c, T59); T5j = FMA(KP559016994, T5e, T5d); T5f = FNMS(KP559016994, T5e, T5d); Im[WS(rs, 2)] = -(FMA(KP951056516, T5i, T5f)); Im[WS(rs, 6)] = FMS(KP951056516, T5i, T5f); Ip[WS(rs, 9)] = FMA(KP951056516, T5k, T5j); Ip[WS(rs, 1)] = FNMS(KP951056516, T5k, T5j); } } }
static const R *hf_15(R *rio, R *iio, const R *W, stride ios, INT m, INT dist) { DK(KP951056516, +0.951056516295153572116439333379382143405698634); DK(KP559016994, +0.559016994374947424102293417182819058860154590); DK(KP250000000, +0.250000000000000000000000000000000000000000000); DK(KP618033988, +0.618033988749894848204586834365638117720309180); DK(KP866025403, +0.866025403784438646763723170752936183471402627); DK(KP500000000, +0.500000000000000000000000000000000000000000000); INT i; for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 28, MAKE_VOLATILE_STRIDE(ios)) { E T2d, T2O, T2Q, T2m, T2k, T2l, T2P, T2n; { E T1G, T3x, T3k, T3w, T1B, Tf, T37, T1y, T2V, T2M, T2a, T2i, T39, Tz, T2X; E T2t, T1O, T2e, T36, T1e, TF, T2U, T2G, T23, T2h, T2v, TN, TQ, TP, T1R; E TL, T1S, TO; { E T2I, T1k, T1m, T1p, T1o, T28, T1w, T25, T1n; { E T1, T3j, T9, Tc, Tb, T1D, T7, T1E, Ta, T1j, T1i, T1h; T1 = rio[0]; T3j = iio[-WS(ios, 14)]; { E T3, T6, T2, T5, T1C, T4, T8; T3 = rio[WS(ios, 5)]; T6 = iio[-WS(ios, 9)]; T2 = W[8]; T5 = W[9]; T9 = rio[WS(ios, 10)]; Tc = iio[-WS(ios, 4)]; T1C = T2 * T6; T4 = T2 * T3; T8 = W[18]; Tb = W[19]; T1D = FNMS(T5, T3, T1C); T7 = FMA(T5, T6, T4); T1E = T8 * Tc; Ta = T8 * T9; } { E T1g, T1F, Td, T1f, T3i, Te, T2H; T1g = rio[WS(ios, 9)]; T1j = iio[-WS(ios, 5)]; T1F = FNMS(Tb, T9, T1E); Td = FMA(Tb, Tc, Ta); T1f = W[16]; T1i = W[17]; T1G = T1D - T1F; T3i = T1D + T1F; T3x = Td - T7; Te = T7 + Td; T2H = T1f * T1j; T1h = T1f * T1g; T3k = T3i + T3j; T3w = FNMS(KP500000000, T3i, T3j); T1B = FNMS(KP500000000, Te, T1); Tf = T1 + Te; T2I = FNMS(T1i, T1g, T2H); } T1k = FMA(T1i, T1j, T1h); { E T1s, T1v, T1r, T1u, T27, T1t, T1l; T1s = rio[WS(ios, 4)]; T1v = iio[-WS(ios, 10)]; T1r = W[6]; T1u = W[7]; T1m = rio[WS(ios, 14)]; T1p = iio[0]; T27 = T1r * T1v; T1t = T1r * T1s; T1l = W[26]; T1o = W[27]; T28 = FNMS(T1u, T1s, T27); T1w = FMA(T1u, T1v, T1t); T25 = T1l * T1p; T1n = T1l * T1m; } } { E Tl, T2p, Tn, Tq, Tp, T1M, Tx, T1J, To; { E Th, Tk, T26, T1q, Tg, Tj; Th = rio[WS(ios, 3)]; Tk = iio[-WS(ios, 11)]; T26 = FNMS(T1o, T1m, T25); T1q = FMA(T1o, T1p, T1n); Tg = W[4]; Tj = W[5]; { E T29, T2J, T1x, T2L; T29 = T26 - T28; T2J = T26 + T28; T1x = T1q + T1w; T2L = T1q - T1w; { E T2o, Ti, T2K, T24; T2o = Tg * Tk; Ti = Tg * Th; T2K = FNMS(KP500000000, T2J, T2I); T37 = T2I + T2J; T24 = FNMS(KP500000000, T1x, T1k); T1y = T1k + T1x; Tl = FMA(Tj, Tk, Ti); T2V = FMA(KP866025403, T2L, T2K); T2M = FNMS(KP866025403, T2L, T2K); T2a = FNMS(KP866025403, T29, T24); T2i = FMA(KP866025403, T29, T24); T2p = FNMS(Tj, Th, T2o); } } } { E Tt, Tw, Ts, Tv, T1L, Tu, Tm; Tt = rio[WS(ios, 13)]; Tw = iio[-WS(ios, 1)]; Ts = W[24]; Tv = W[25]; Tn = rio[WS(ios, 8)]; Tq = iio[-WS(ios, 6)]; T1L = Ts * Tw; Tu = Ts * Tt; Tm = W[14]; Tp = W[15]; T1M = FNMS(Tv, Tt, T1L); Tx = FMA(Tv, Tw, Tu); T1J = Tm * Tq; To = Tm * Tn; } { E T10, T2C, T12, T15, T14, T21, T1c, T1Y, T13; { E TW, TZ, T1K, Tr, TV, TY; TW = rio[WS(ios, 6)]; TZ = iio[-WS(ios, 8)]; T1K = FNMS(Tp, Tn, T1J); Tr = FMA(Tp, Tq, To); TV = W[10]; TY = W[11]; { E T1N, T2q, Ty, T2s; T1N = T1K - T1M; T2q = T1K + T1M; Ty = Tr + Tx; T2s = Tr - Tx; { E T2B, TX, T2r, T1I; T2B = TV * TZ; TX = TV * TW; T2r = FNMS(KP500000000, T2q, T2p); T39 = T2p + T2q; T1I = FNMS(KP500000000, Ty, Tl); Tz = Tl + Ty; T10 = FMA(TY, TZ, TX); T2X = FMA(KP866025403, T2s, T2r); T2t = FNMS(KP866025403, T2s, T2r); T1O = FNMS(KP866025403, T1N, T1I); T2e = FMA(KP866025403, T1N, T1I); T2C = FNMS(TY, TW, T2B); } } } { E T18, T1b, T17, T1a, T20, T19, T11; T18 = rio[WS(ios, 1)]; T1b = iio[-WS(ios, 13)]; T17 = W[0]; T1a = W[1]; T12 = rio[WS(ios, 11)]; T15 = iio[-WS(ios, 3)]; T20 = T17 * T1b; T19 = T17 * T18; T11 = W[20]; T14 = W[21]; T21 = FNMS(T1a, T18, T20); T1c = FMA(T1a, T1b, T19); T1Y = T11 * T15; T13 = T11 * T12; } { E TB, TE, T1Z, T16, TA, TD; TB = rio[WS(ios, 12)]; TE = iio[-WS(ios, 2)]; T1Z = FNMS(T14, T12, T1Y); T16 = FMA(T14, T15, T13); TA = W[22]; TD = W[23]; { E T22, T2D, T1d, T2F; T22 = T1Z - T21; T2D = T1Z + T21; T1d = T16 + T1c; T2F = T16 - T1c; { E T2u, TC, T2E, T1X; T2u = TA * TE; TC = TA * TB; T2E = FNMS(KP500000000, T2D, T2C); T36 = T2C + T2D; T1X = FNMS(KP500000000, T1d, T10); T1e = T10 + T1d; TF = FMA(TD, TE, TC); T2U = FMA(KP866025403, T2F, T2E); T2G = FNMS(KP866025403, T2F, T2E); T23 = FNMS(KP866025403, T22, T1X); T2h = FMA(KP866025403, T22, T1X); T2v = FNMS(TD, TB, T2u); } } } { E TH, TK, TG, TJ, T1Q, TI, TM; TH = rio[WS(ios, 2)]; TK = iio[-WS(ios, 12)]; TG = W[2]; TJ = W[3]; TN = rio[WS(ios, 7)]; TQ = iio[-WS(ios, 7)]; T1Q = TG * TK; TI = TG * TH; TM = W[12]; TP = W[13]; T1R = FNMS(TJ, TH, T1Q); TL = FMA(TJ, TK, TI); T1S = TM * TQ; TO = TM * TN; } } } } { E T2z, T2f, T3J, T3I, T32, T30, T1H, T1W, T3O, T3P, T2b; { E T3g, T38, T1T, TR, T3o, T1z; T3g = T36 + T37; T38 = T36 - T37; T1T = FNMS(TP, TN, T1S); TR = FMA(TP, TQ, TO); T3o = T1y - T1e; T1z = T1e + T1y; { E T3f, T2Y, T1V, T3c, T3e, T3q, T3s, T1A, T34, T3r, T3n; { E T1U, TS, T2y, T2x, T3a, T2w; T1U = T1R - T1T; T2w = T1R + T1T; TS = TL + TR; T2y = TL - TR; T2x = FNMS(KP500000000, T2w, T2v); T3a = T2v + T2w; { E TT, T1P, T3b, TU, T3p; TT = TF + TS; T1P = FNMS(KP500000000, TS, TF); T3b = T39 - T3a; T3f = T39 + T3a; T2z = FNMS(KP866025403, T2y, T2x); T2Y = FMA(KP866025403, T2y, T2x); TU = Tz + TT; T3p = TT - Tz; T2f = FMA(KP866025403, T1U, T1P); T1V = FNMS(KP866025403, T1U, T1P); T3c = FNMS(KP618033988, T3b, T38); T3e = FMA(KP618033988, T38, T3b); T3q = FNMS(KP618033988, T3p, T3o); T3s = FMA(KP618033988, T3o, T3p); T1A = TU + T1z; T34 = TU - T1z; } } { E T2W, T33, T3m, T3h, T2Z, T3d, T35, T3l; T3J = T2U + T2V; T2W = T2U - T2V; rio[0] = Tf + T1A; T33 = FNMS(KP250000000, T1A, Tf); T3m = T3f - T3g; T3h = T3f + T3g; T2Z = T2X - T2Y; T3I = T2X + T2Y; T3d = FMA(KP559016994, T34, T33); T35 = FNMS(KP559016994, T34, T33); iio[0] = T3h + T3k; T3l = FNMS(KP250000000, T3h, T3k); rio[WS(ios, 3)] = FMA(KP951056516, T3c, T35); iio[-WS(ios, 12)] = FNMS(KP951056516, T3c, T35); rio[WS(ios, 6)] = FMA(KP951056516, T3e, T3d); iio[-WS(ios, 9)] = FNMS(KP951056516, T3e, T3d); T3r = FMA(KP559016994, T3m, T3l); T3n = FNMS(KP559016994, T3m, T3l); T32 = FMA(KP618033988, T2W, T2Z); T30 = FNMS(KP618033988, T2Z, T2W); } iio[-WS(ios, 3)] = FMA(KP951056516, T3q, T3n); rio[WS(ios, 12)] = FMS(KP951056516, T3q, T3n); iio[-WS(ios, 6)] = FMA(KP951056516, T3s, T3r); rio[WS(ios, 9)] = FMS(KP951056516, T3s, T3r); T2d = FMA(KP866025403, T1G, T1B); T1H = FNMS(KP866025403, T1G, T1B); T1W = T1O + T1V; T3O = T1O - T1V; T3P = T23 - T2a; T2b = T23 + T2a; } } { E T3H, T3y, T2S, T3Q, T3S, T2R, T2c; T3H = FNMS(KP866025403, T3x, T3w); T3y = FMA(KP866025403, T3x, T3w); T2c = T1W + T2b; T2S = T1W - T2b; T3Q = FMA(KP618033988, T3P, T3O); T3S = FNMS(KP618033988, T3O, T3P); rio[WS(ios, 5)] = T1H + T2c; T2R = FNMS(KP250000000, T2c, T1H); { E T2g, T2j, T3G, T3E, T2A, T2N, T3v, T3A, T3M, T3L, T3z, T3F, T3B; { E T3C, T3D, T31, T2T, T3K; T2g = T2e + T2f; T3C = T2e - T2f; T3D = T2h - T2i; T2j = T2h + T2i; T31 = FMA(KP559016994, T2S, T2R); T2T = FNMS(KP559016994, T2S, T2R); T3K = T3I + T3J; T3M = T3J - T3I; iio[-WS(ios, 8)] = FMA(KP951056516, T30, T2T); rio[WS(ios, 2)] = FNMS(KP951056516, T30, T2T); iio[-WS(ios, 11)] = FMA(KP951056516, T32, T31); iio[-WS(ios, 14)] = FNMS(KP951056516, T32, T31); iio[-WS(ios, 5)] = T3K + T3H; T3L = FNMS(KP250000000, T3K, T3H); T3G = FNMS(KP618033988, T3C, T3D); T3E = FMA(KP618033988, T3D, T3C); } { E T3N, T3R, T3t, T3u; T3N = FNMS(KP559016994, T3M, T3L); T3R = FMA(KP559016994, T3M, T3L); T3t = T2t + T2z; T2A = T2t - T2z; T2N = T2G - T2M; T3u = T2G + T2M; rio[WS(ios, 14)] = -(FMA(KP951056516, T3Q, T3N)); rio[WS(ios, 11)] = FMS(KP951056516, T3Q, T3N); iio[-WS(ios, 2)] = FMA(KP951056516, T3S, T3R); rio[WS(ios, 8)] = FMS(KP951056516, T3S, T3R); T3v = T3t + T3u; T3A = T3t - T3u; } rio[WS(ios, 10)] = -(T3v + T3y); T3z = FNMS(KP250000000, T3v, T3y); T2O = FMA(KP618033988, T2N, T2A); T2Q = FNMS(KP618033988, T2A, T2N); T3F = FNMS(KP559016994, T3A, T3z); T3B = FMA(KP559016994, T3A, T3z); iio[-WS(ios, 4)] = FMA(KP951056516, T3E, T3B); iio[-WS(ios, 1)] = FNMS(KP951056516, T3E, T3B); iio[-WS(ios, 7)] = FMA(KP951056516, T3G, T3F); rio[WS(ios, 13)] = FMS(KP951056516, T3G, T3F); T2m = T2g - T2j; T2k = T2g + T2j; } } } } iio[-WS(ios, 10)] = T2d + T2k; T2l = FNMS(KP250000000, T2k, T2d); T2P = FNMS(KP559016994, T2m, T2l); T2n = FMA(KP559016994, T2m, T2l); rio[WS(ios, 1)] = FMA(KP951056516, T2O, T2n); rio[WS(ios, 4)] = FNMS(KP951056516, T2O, T2n); iio[-WS(ios, 13)] = FMA(KP951056516, T2Q, T2P); rio[WS(ios, 7)] = FNMS(KP951056516, T2Q, T2P); } return W; }
static void hc2cf2_16(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) { DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP414213562, +0.414213562373095048801688724209698078569671875); DK(KP707106781, +0.707106781186547524400844362104849039284835938); { INT m; for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(rs)) { E T3S, T3R; { E T2, Tf, TM, TO, T3, Tg, TN, TS, T4, Tp, T6, T5, Th; T2 = W[0]; Tf = W[2]; TM = W[6]; TO = W[7]; T3 = W[4]; Tg = T2 * Tf; TN = T2 * TM; TS = T2 * TO; T4 = T2 * T3; Tp = Tf * T3; T6 = W[5]; T5 = W[1]; Th = W[3]; { E TZ, Te, T1U, T3A, T3L, T2D, T1G, T2B, T3h, T1R, T2w, T2I, T3i, Tx, T3M; E T1Z, T3w, TL, T26, T25, T37, T1d, T2o, T2l, T3c, T1s, T2m, T2t, T3d, TX; E T10, TV, T2a, TY, T2b; { E TF, TP, TT, Tq, TW, Tz, Tu, TI, TC, T1m, T1f, T1p, T1j, Tr, Ts; E Tv, To, T1W; { E Ti, Tm, T1L, T1O, T1D, T1A, T1x, T2z, T1F, T2y; { E T1, T7, Tb, T3z, T8, T1z, T9, Tc; { E T1i, T1e, T1C, T1y, Tt, Ta, Tl; T1 = Rp[0]; Tt = Tf * T6; Ta = T2 * T6; T7 = FMA(T5, T6, T4); TF = FNMS(T5, T6, T4); TP = FMA(T5, TO, TN); TT = FNMS(T5, TM, TS); Tq = FNMS(Th, T6, Tp); TW = FMA(Th, T6, Tp); Tz = FMA(T5, Th, Tg); Ti = FNMS(T5, Th, Tg); Tl = T2 * Th; Tu = FMA(Th, T3, Tt); TZ = FNMS(Th, T3, Tt); TI = FMA(T5, T3, Ta); Tb = FNMS(T5, T3, Ta); T1i = Ti * T6; T1e = Ti * T3; T1C = Tz * T6; T1y = Tz * T3; Tm = FMA(T5, Tf, Tl); TC = FNMS(T5, Tf, Tl); T3z = Rm[0]; T8 = Rp[WS(rs, 4)]; T1m = FNMS(Tm, T6, T1e); T1f = FMA(Tm, T6, T1e); T1p = FMA(Tm, T3, T1i); T1j = FNMS(Tm, T3, T1i); T1L = FNMS(TC, T6, T1y); T1z = FMA(TC, T6, T1y); T1O = FMA(TC, T3, T1C); T1D = FNMS(TC, T3, T1C); T9 = T7 * T8; Tc = Rm[WS(rs, 4)]; } { E T1u, T1w, T1v, T2x, T3y, T1B, T1E, Td, T3x; T1u = Ip[WS(rs, 7)]; T1w = Im[WS(rs, 7)]; T1A = Ip[WS(rs, 3)]; Td = FMA(Tb, Tc, T9); T3x = T7 * Tc; T1v = TM * T1u; T2x = TM * T1w; Te = T1 + Td; T1U = T1 - Td; T3y = FNMS(Tb, T8, T3x); T1B = T1z * T1A; T1E = Im[WS(rs, 3)]; T1x = FMA(TO, T1w, T1v); T3A = T3y + T3z; T3L = T3z - T3y; T2z = T1z * T1E; T1F = FMA(T1D, T1E, T1B); T2y = FNMS(TO, T1u, T2x); } } { E T1H, T1I, T1J, T1M, T1P, T2A; T1H = Ip[WS(rs, 1)]; T2A = FNMS(T1D, T1A, T2z); T2D = T1x - T1F; T1G = T1x + T1F; T1I = Tf * T1H; T2B = T2y - T2A; T3h = T2y + T2A; T1J = Im[WS(rs, 1)]; T1M = Ip[WS(rs, 5)]; T1P = Im[WS(rs, 5)]; { E Tj, Tk, Tn, T1V; { E T1K, T2F, T1Q, T2H, T2E, T1N, T2G; Tj = Rp[WS(rs, 2)]; T1K = FMA(Th, T1J, T1I); T2E = Tf * T1J; T1N = T1L * T1M; T2G = T1L * T1P; Tk = Ti * Tj; T2F = FNMS(Th, T1H, T2E); T1Q = FMA(T1O, T1P, T1N); T2H = FNMS(T1O, T1M, T2G); Tn = Rm[WS(rs, 2)]; Tr = Rp[WS(rs, 6)]; T1R = T1K + T1Q; T2w = T1Q - T1K; T2I = T2F - T2H; T3i = T2F + T2H; T1V = Ti * Tn; Ts = Tq * Tr; Tv = Rm[WS(rs, 6)]; } To = FMA(Tm, Tn, Tk); T1W = FNMS(Tm, Tj, T1V); } } } { E T19, T1b, T18, T2i, T1a, T2j; { E TE, T22, TK, T24; { E TA, TD, TB, T21, TG, TJ, TH, T23, T1Y, Tw, T1X; TA = Rp[WS(rs, 1)]; Tw = FMA(Tu, Tv, Ts); T1X = Tq * Tv; TD = Rm[WS(rs, 1)]; TB = Tz * TA; Tx = To + Tw; T3M = To - Tw; T1Y = FNMS(Tu, Tr, T1X); T21 = Tz * TD; TG = Rp[WS(rs, 5)]; TJ = Rm[WS(rs, 5)]; T1Z = T1W - T1Y; T3w = T1W + T1Y; TH = TF * TG; T23 = TF * TJ; TE = FMA(TC, TD, TB); T22 = FNMS(TC, TA, T21); TK = FMA(TI, TJ, TH); T24 = FNMS(TI, TG, T23); } { E T15, T17, T16, T2h; T15 = Ip[0]; T17 = Im[0]; TL = TE + TK; T26 = TE - TK; T25 = T22 - T24; T37 = T22 + T24; T16 = T2 * T15; T2h = T2 * T17; T19 = Ip[WS(rs, 4)]; T1b = Im[WS(rs, 4)]; T18 = FMA(T5, T17, T16); T2i = FNMS(T5, T15, T2h); T1a = T3 * T19; T2j = T3 * T1b; } } { E T1n, T1q, T1l, T2q, T1o, T2r; { E T1g, T1k, T1h, T2p, T1c, T2k; T1g = Ip[WS(rs, 2)]; T1k = Im[WS(rs, 2)]; T1c = FMA(T6, T1b, T1a); T2k = FNMS(T6, T19, T2j); T1h = T1f * T1g; T2p = T1f * T1k; T1d = T18 + T1c; T2o = T18 - T1c; T2l = T2i - T2k; T3c = T2i + T2k; T1n = Ip[WS(rs, 6)]; T1q = Im[WS(rs, 6)]; T1l = FMA(T1j, T1k, T1h); T2q = FNMS(T1j, T1g, T2p); T1o = T1m * T1n; T2r = T1m * T1q; } { E TQ, TU, TR, T29, T1r, T2s; TQ = Rp[WS(rs, 7)]; TU = Rm[WS(rs, 7)]; T1r = FMA(T1p, T1q, T1o); T2s = FNMS(T1p, T1n, T2r); TR = TP * TQ; T29 = TP * TU; T1s = T1l + T1r; T2m = T1l - T1r; T2t = T2q - T2s; T3d = T2q + T2s; TX = Rp[WS(rs, 3)]; T10 = Rm[WS(rs, 3)]; TV = FMA(TT, TU, TR); T2a = FNMS(TT, TQ, T29); TY = TW * TX; T2b = TW * T10; } } } } { E T36, T3G, T3b, T3g, T28, T2d, T3F, T39, T3e, T3q, T3C, T3j, T3u, T3t; { E T3D, T1T, T3r, T14, T3E, T3s; { E Ty, T3B, T11, T2c, T13, T3v; T36 = Te - Tx; Ty = Te + Tx; T3B = T3w + T3A; T3G = T3A - T3w; T11 = FMA(TZ, T10, TY); T2c = FNMS(TZ, TX, T2b); { E T1t, T1S, T12, T38; T3b = T1d - T1s; T1t = T1d + T1s; T1S = T1G + T1R; T3g = T1G - T1R; T12 = TV + T11; T28 = TV - T11; T2d = T2a - T2c; T38 = T2a + T2c; T3D = T1S - T1t; T1T = T1t + T1S; T13 = TL + T12; T3F = T12 - TL; T39 = T37 - T38; T3v = T37 + T38; } T3e = T3c - T3d; T3r = T3c + T3d; T3q = Ty - T13; T14 = Ty + T13; T3E = T3B - T3v; T3C = T3v + T3B; T3s = T3h + T3i; T3j = T3h - T3i; } Rm[WS(rs, 7)] = T14 - T1T; Rp[0] = T14 + T1T; Im[WS(rs, 3)] = T3D - T3E; T3u = T3r + T3s; T3t = T3r - T3s; Ip[WS(rs, 4)] = T3D + T3E; } { E T3m, T3a, T3J, T3H; Ip[0] = T3u + T3C; Im[WS(rs, 7)] = T3u - T3C; Rp[WS(rs, 4)] = T3q + T3t; Rm[WS(rs, 3)] = T3q - T3t; T3m = T36 - T39; T3a = T36 + T39; T3J = T3G - T3F; T3H = T3F + T3G; { E T2Q, T20, T3N, T3T, T2J, T2C, T3O, T2f, T34, T30, T2W, T2V, T3U, T2T, T2N; E T2v; { E T2R, T27, T2e, T2S; { E T3n, T3f, T3o, T3k; T2Q = T1U + T1Z; T20 = T1U - T1Z; T3n = T3e - T3b; T3f = T3b + T3e; T3o = T3g + T3j; T3k = T3g - T3j; T3N = T3L - T3M; T3T = T3M + T3L; { E T3p, T3I, T3K, T3l; T3p = T3n - T3o; T3I = T3n + T3o; T3K = T3k - T3f; T3l = T3f + T3k; Rp[WS(rs, 6)] = FMA(KP707106781, T3p, T3m); Rm[WS(rs, 1)] = FNMS(KP707106781, T3p, T3m); Ip[WS(rs, 2)] = FMA(KP707106781, T3I, T3H); Im[WS(rs, 5)] = FMS(KP707106781, T3I, T3H); Ip[WS(rs, 6)] = FMA(KP707106781, T3K, T3J); Im[WS(rs, 1)] = FMS(KP707106781, T3K, T3J); Rp[WS(rs, 2)] = FMA(KP707106781, T3l, T3a); Rm[WS(rs, 5)] = FNMS(KP707106781, T3l, T3a); T2R = T26 + T25; T27 = T25 - T26; T2e = T28 + T2d; T2S = T28 - T2d; } } { E T2Y, T2Z, T2n, T2u; T2J = T2D - T2I; T2Y = T2D + T2I; T2Z = T2B + T2w; T2C = T2w - T2B; T3O = T27 + T2e; T2f = T27 - T2e; T34 = FMA(KP414213562, T2Y, T2Z); T30 = FNMS(KP414213562, T2Z, T2Y); T2W = T2l - T2m; T2n = T2l + T2m; T2u = T2o - T2t; T2V = T2o + T2t; T3U = T2S - T2R; T2T = T2R + T2S; T2N = FNMS(KP414213562, T2n, T2u); T2v = FMA(KP414213562, T2u, T2n); } } { E T33, T2X, T3X, T3Y; { E T2M, T2g, T2O, T2K, T3V, T3W, T2P, T2L; T2M = FNMS(KP707106781, T2f, T20); T2g = FMA(KP707106781, T2f, T20); T33 = FNMS(KP414213562, T2V, T2W); T2X = FMA(KP414213562, T2W, T2V); T2O = FNMS(KP414213562, T2C, T2J); T2K = FMA(KP414213562, T2J, T2C); T3V = FMA(KP707106781, T3U, T3T); T3X = FNMS(KP707106781, T3U, T3T); T3W = T2O - T2N; T2P = T2N + T2O; T3Y = T2K - T2v; T2L = T2v + T2K; Ip[WS(rs, 3)] = FMA(KP923879532, T3W, T3V); Im[WS(rs, 4)] = FMS(KP923879532, T3W, T3V); Rp[WS(rs, 3)] = FMA(KP923879532, T2L, T2g); Rm[WS(rs, 4)] = FNMS(KP923879532, T2L, T2g); Rm[0] = FMA(KP923879532, T2P, T2M); Rp[WS(rs, 7)] = FNMS(KP923879532, T2P, T2M); } { E T32, T3P, T3Q, T35, T2U, T31; T32 = FNMS(KP707106781, T2T, T2Q); T2U = FMA(KP707106781, T2T, T2Q); T31 = T2X + T30; T3S = T30 - T2X; T3R = FNMS(KP707106781, T3O, T3N); T3P = FMA(KP707106781, T3O, T3N); Ip[WS(rs, 7)] = FMA(KP923879532, T3Y, T3X); Im[0] = FMS(KP923879532, T3Y, T3X); Rp[WS(rs, 1)] = FMA(KP923879532, T31, T2U); Rm[WS(rs, 6)] = FNMS(KP923879532, T31, T2U); T3Q = T33 + T34; T35 = T33 - T34; Ip[WS(rs, 1)] = FMA(KP923879532, T3Q, T3P); Im[WS(rs, 6)] = FMS(KP923879532, T3Q, T3P); Rp[WS(rs, 5)] = FMA(KP923879532, T35, T32); Rm[WS(rs, 2)] = FNMS(KP923879532, T35, T32); } } } } } } } Ip[WS(rs, 5)] = FMA(KP923879532, T3S, T3R); Im[WS(rs, 2)] = FMS(KP923879532, T3S, T3R); } } }
static const R *hf2_8(R *rio, R *iio, const R *W, stride ios, INT m, INT dist) { DK(KP707106781, +0.707106781186547524400844362104849039284835938); INT i; for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 6, MAKE_VOLATILE_STRIDE(ios)) { E T1m, T1l, T1k, Tw, T1w, T1u, T1v, TT, T1n, T1o; { E T2, T3, Tl, Tn, T5, T4, Tm, Tr, T6; T2 = W[0]; T3 = W[2]; Tl = W[4]; Tn = W[5]; T5 = W[1]; T4 = T2 * T3; Tm = T2 * Tl; Tr = T2 * Tn; T6 = W[3]; { E T1, T1s, TG, TC, Tu, TY, Tk, TW, Td, T1r, Tx, TH, T1d, T14, TS; E T18, T12, TD, TA; { E To, Ts, Tf, T7, Tp, Ti, Tb, Tq, Tt, Ta, TF, TB, T8, T9, Tc; T1 = rio[0]; To = FMA(T5, Tn, Tm); Ts = FNMS(T5, Tl, Tr); Tf = FMA(T5, T6, T4); T7 = FNMS(T5, T6, T4); Ta = T2 * T6; T1s = iio[-WS(ios, 7)]; Tp = rio[WS(ios, 6)]; TF = Tf * Tn; TB = Tf * Tl; Ti = FNMS(T5, T3, Ta); Tb = FMA(T5, T3, Ta); Tq = To * Tp; Tt = iio[-WS(ios, 1)]; TG = FNMS(Ti, Tl, TF); TC = FMA(Ti, Tn, TB); { E Tg, TX, Tj, Th, TV; Tg = rio[WS(ios, 2)]; Tu = FMA(Ts, Tt, Tq); TX = To * Tt; Tj = iio[-WS(ios, 5)]; Th = Tf * Tg; T8 = rio[WS(ios, 4)]; TY = FNMS(Ts, Tp, TX); TV = Tf * Tj; Tk = FMA(Ti, Tj, Th); T9 = T7 * T8; Tc = iio[-WS(ios, 3)]; TW = FNMS(Ti, Tg, TV); } { E TK, TQ, TL, T1b, TM, TO, T1q, Ty, Tz; TK = rio[WS(ios, 7)]; TQ = iio[-WS(ios, 4)]; Td = FMA(Tb, Tc, T9); T1q = T7 * Tc; TL = Tl * TK; T1b = T3 * TQ; TM = iio[0]; T1r = FNMS(Tb, T8, T1q); TO = rio[WS(ios, 3)]; { E TN, T1c, T1a, TR, T19, TP; Tx = rio[WS(ios, 1)]; TN = FMA(Tn, TM, TL); T19 = Tl * TM; T1c = FNMS(T6, TO, T1b); TP = T3 * TO; Ty = T2 * Tx; T1a = FNMS(Tn, TK, T19); TH = iio[-WS(ios, 2)]; TR = FMA(T6, TQ, TP); Tz = iio[-WS(ios, 6)]; T1m = T1a + T1c; T1d = T1a - T1c; T14 = TC * TH; TS = TN + TR; T18 = TN - TR; T12 = T2 * Tz; TD = rio[WS(ios, 5)]; } TA = FMA(T5, Tz, Ty); } } { E Te, T1p, T1t, T10, T1g, Tv, TJ, T1i, T1e, T1z, T1B, T17, T1h; { E T1x, T16, T1y, T11; { E TU, T13, T15, TE, TZ, TI; Te = T1 + Td; TU = T1 - Td; T13 = FNMS(T5, Tx, T12); T15 = FNMS(TG, TD, T14); TE = TC * TD; T1p = TW + TY; TZ = TW - TY; T1x = T1s - T1r; T1t = T1r + T1s; T1l = T13 + T15; T16 = T13 - T15; TI = FMA(TG, TH, TE); T10 = TU + TZ; T1g = TU - TZ; Tv = Tk + Tu; T1y = Tk - Tu; T11 = TA - TI; TJ = TA + TI; } T1i = T18 + T1d; T1e = T18 - T1d; T1z = T1x - T1y; T1B = T1y + T1x; T17 = T11 + T16; T1h = T16 - T11; } { E T1j, T1A, T1f, T1C; T1j = T1h - T1i; T1A = T1h + T1i; T1f = T17 + T1e; T1C = T1e - T17; iio[-WS(ios, 1)] = FMA(KP707106781, T1A, T1z); rio[WS(ios, 5)] = FMS(KP707106781, T1A, T1z); rio[WS(ios, 3)] = FMA(KP707106781, T1j, T1g); iio[-WS(ios, 7)] = FNMS(KP707106781, T1j, T1g); iio[-WS(ios, 3)] = FMA(KP707106781, T1C, T1B); rio[WS(ios, 7)] = FMS(KP707106781, T1C, T1B); rio[WS(ios, 1)] = FMA(KP707106781, T1f, T10); iio[-WS(ios, 5)] = FNMS(KP707106781, T1f, T10); T1k = Te - Tv; Tw = Te + Tv; } T1w = T1t - T1p; T1u = T1p + T1t; T1v = TS - TJ; TT = TJ + TS; } } } iio[-WS(ios, 2)] = T1v + T1w; rio[WS(ios, 6)] = T1v - T1w; rio[0] = Tw + TT; iio[-WS(ios, 4)] = Tw - TT; T1n = T1l - T1m; T1o = T1l + T1m; iio[0] = T1o + T1u; rio[WS(ios, 4)] = T1o - T1u; rio[WS(ios, 2)] = T1k + T1n; iio[-WS(ios, 6)] = T1k - T1n; } return W; }
static void r2cfII_25(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) { DK(KP1_996053456, +1.996053456856543123904673613726901106673810439); DK(KP062790519, +0.062790519529313376076178224565631133122484832); DK(KP125581039, +0.125581039058626752152356449131262266244969664); DK(KP998026728, +0.998026728428271561952336806863450553336905220); DK(KP1_369094211, +1.369094211857377347464566715242418539779038465); DK(KP728968627, +0.728968627421411523146730319055259111372571664); DK(KP963507348, +0.963507348203430549974383005744259307057084020); DK(KP876306680, +0.876306680043863587308115903922062583399064238); DK(KP497379774, +0.497379774329709576484567492012895936835134813); DK(KP968583161, +0.968583161128631119490168375464735813836012403); DK(KP1_457937254, +1.457937254842823046293460638110518222745143328); DK(KP684547105, +0.684547105928688673732283357621209269889519233); DK(KP1_752613360, +1.752613360087727174616231807844125166798128477); DK(KP481753674, +0.481753674101715274987191502872129653528542010); DK(KP1_937166322, +1.937166322257262238980336750929471627672024806); DK(KP248689887, +0.248689887164854788242283746006447968417567406); DK(KP992114701, +0.992114701314477831049793042785778521453036709); DK(KP250666467, +0.250666467128608490746237519633017587885836494); DK(KP1_809654104, +1.809654104932039055427337295865395187940827822); DK(KP425779291, +0.425779291565072648862502445744251703979973042); DK(KP1_541026485, +1.541026485551578461606019272792355694543335344); DK(KP637423989, +0.637423989748689710176712811676016195434917298); DK(KP1_688655851, +1.688655851004030157097116127933363010763318483); DK(KP535826794, +0.535826794978996618271308767867639978063575346); DK(KP851558583, +0.851558583130145297725004891488503407959946084); DK(KP904827052, +0.904827052466019527713668647932697593970413911); DK(KP1_984229402, +1.984229402628955662099586085571557042906073418); DK(KP125333233, +0.125333233564304245373118759816508793942918247); DK(KP1_274847979, +1.274847979497379420353425623352032390869834596); DK(KP770513242, +0.770513242775789230803009636396177847271667672); DK(KP844327925, +0.844327925502015078548558063966681505381659241); DK(KP1_071653589, +1.071653589957993236542617535735279956127150691); DK(KP293892626, +0.293892626146236564584352977319536384298826219); DK(KP475528258, +0.475528258147576786058219666689691071702849317); DK(KP250000000, +0.250000000000000000000000000000000000000000000); DK(KP587785252, +0.587785252292473129168705954639072768597652438); DK(KP951056516, +0.951056516295153572116439333379382143405698634); DK(KP559016994, +0.559016994374947424102293417182819058860154590); { INT i; for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(100, rs), MAKE_VOLATILE_STRIDE(100, csr), MAKE_VOLATILE_STRIDE(100, csi)) { E TE, TR, T2i, T1z, TL, TS, TB, T2d, T1l, T1i, T2c, T9, T23, TZ, TW; E T22, Ti, T26, T16, T13, T25, Ts, T2a, T1e, T1b, T29, TP, TQ; { E TK, T1y, TH, T1x; TE = R0[0]; { E TI, TJ, TF, TG; TI = R0[WS(rs, 10)]; TJ = R1[WS(rs, 2)]; TK = TI - TJ; T1y = TI + TJ; TF = R0[WS(rs, 5)]; TG = R1[WS(rs, 7)]; TH = TF - TG; T1x = TF + TG; } TR = KP559016994 * (TH - TK); T2i = FNMS(KP587785252, T1x, KP951056516 * T1y); T1z = FMA(KP951056516, T1x, KP587785252 * T1y); TL = TH + TK; TS = FNMS(KP250000000, TL, TE); } { E Tt, Tw, Tz, TA, T1k, T1j, T1g, T1h; Tt = R0[WS(rs, 3)]; { E Tu, Tv, Tx, Ty; Tu = R0[WS(rs, 8)]; Tv = R1[WS(rs, 10)]; Tw = Tu - Tv; Tx = R1[0]; Ty = R1[WS(rs, 5)]; Tz = Tx + Ty; TA = Tw - Tz; T1k = Ty - Tx; T1j = Tu + Tv; } TB = Tt + TA; T2d = FNMS(KP293892626, T1j, KP475528258 * T1k); T1l = FMA(KP475528258, T1j, KP293892626 * T1k); T1g = FNMS(KP250000000, TA, Tt); T1h = KP559016994 * (Tw + Tz); T1i = T1g + T1h; T2c = T1g - T1h; } { E T1, T4, T7, T8, TY, TX, TU, TV; T1 = R0[WS(rs, 1)]; { E T2, T3, T5, T6; T2 = R0[WS(rs, 6)]; T3 = R1[WS(rs, 8)]; T4 = T2 - T3; T5 = R0[WS(rs, 11)]; T6 = R1[WS(rs, 3)]; T7 = T5 - T6; T8 = T4 + T7; TY = T5 + T6; TX = T2 + T3; } T9 = T1 + T8; T23 = FNMS(KP293892626, TX, KP475528258 * TY); TZ = FMA(KP475528258, TX, KP293892626 * TY); TU = KP559016994 * (T4 - T7); TV = FNMS(KP250000000, T8, T1); TW = TU + TV; T22 = TV - TU; } { E Ta, Td, Tg, Th, T15, T14, T11, T12; Ta = R0[WS(rs, 4)]; { E Tb, Tc, Te, Tf; Tb = R0[WS(rs, 9)]; Tc = R1[WS(rs, 11)]; Td = Tb - Tc; Te = R1[WS(rs, 1)]; Tf = R1[WS(rs, 6)]; Tg = Te + Tf; Th = Td - Tg; T15 = Tf - Te; T14 = Tb + Tc; } Ti = Ta + Th; T26 = FNMS(KP293892626, T14, KP475528258 * T15); T16 = FMA(KP475528258, T14, KP293892626 * T15); T11 = FNMS(KP250000000, Th, Ta); T12 = KP559016994 * (Td + Tg); T13 = T11 + T12; T25 = T11 - T12; } { E Tk, Tn, Tq, Tr, T1d, T1c, T19, T1a; Tk = R0[WS(rs, 2)]; { E Tl, Tm, To, Tp; Tl = R0[WS(rs, 7)]; Tm = R1[WS(rs, 9)]; Tn = Tl - Tm; To = R0[WS(rs, 12)]; Tp = R1[WS(rs, 4)]; Tq = To - Tp; Tr = Tn + Tq; T1d = To + Tp; T1c = Tl + Tm; } Ts = Tk + Tr; T2a = FNMS(KP293892626, T1c, KP475528258 * T1d); T1e = FMA(KP475528258, T1c, KP293892626 * T1d); T19 = KP559016994 * (Tn - Tq); T1a = FNMS(KP250000000, Tr, Tk); T1b = T19 + T1a; T29 = T1a - T19; } TP = TB - Ts; TQ = T9 - Ti; Ci[WS(csi, 2)] = FNMS(KP951056516, TQ, KP587785252 * TP); Ci[WS(csi, 7)] = FMA(KP587785252, TQ, KP951056516 * TP); { E TM, TD, TN, Tj, TC, TO; TM = TE + TL; Tj = T9 + Ti; TC = Ts + TB; TD = KP559016994 * (Tj - TC); TN = Tj + TC; Cr[WS(csr, 12)] = TM + TN; TO = FNMS(KP250000000, TN, TM); Cr[WS(csr, 2)] = TD + TO; Cr[WS(csr, 7)] = TO - TD; } { E TT, T1J, T1Y, T1U, T1X, T1P, T1V, T1M, T1W, T1A, T1B, T1r, T1C, T1v, T18; E T1n, T1o, T1G, T1D; TT = TR + TS; { E T1H, T1I, T1S, T1T; T1H = FNMS(KP844327925, TW, KP1_071653589 * TZ); T1I = FNMS(KP1_274847979, T16, KP770513242 * T13); T1J = T1H - T1I; T1Y = T1H + T1I; T1S = FMA(KP125333233, T1i, KP1_984229402 * T1l); T1T = FMA(KP904827052, T1b, KP851558583 * T1e); T1U = T1S - T1T; T1X = T1T + T1S; } { E T1N, T1O, T1K, T1L; T1N = FMA(KP535826794, TW, KP1_688655851 * TZ); T1O = FMA(KP637423989, T13, KP1_541026485 * T16); T1P = T1N - T1O; T1V = T1N + T1O; T1K = FNMS(KP1_809654104, T1e, KP425779291 * T1b); T1L = FNMS(KP992114701, T1i, KP250666467 * T1l); T1M = T1K - T1L; T1W = T1K + T1L; } { E T1p, T1q, T1t, T1u; T1p = FMA(KP844327925, T13, KP1_071653589 * T16); T1q = FMA(KP248689887, TW, KP1_937166322 * TZ); T1A = T1q + T1p; T1t = FMA(KP481753674, T1b, KP1_752613360 * T1e); T1u = FMA(KP684547105, T1i, KP1_457937254 * T1l); T1B = T1t + T1u; T1r = T1p - T1q; T1C = T1A + T1B; T1v = T1t - T1u; } { E T10, T17, T1f, T1m; T10 = FNMS(KP497379774, TZ, KP968583161 * TW); T17 = FNMS(KP1_688655851, T16, KP535826794 * T13); T18 = T10 + T17; T1f = FNMS(KP963507348, T1e, KP876306680 * T1b); T1m = FNMS(KP1_369094211, T1l, KP728968627 * T1i); T1n = T1f + T1m; T1o = T18 + T1n; T1G = T10 - T17; T1D = T1f - T1m; } { E T1R, T1Q, T20, T1Z; Cr[0] = TT + T1o; Ci[0] = -(T1z + T1C); T1R = KP559016994 * (T1P + T1M); T1Q = FMA(KP250000000, T1M - T1P, TT); Cr[WS(csr, 4)] = FMA(KP951056516, T1J, T1Q) + FMA(KP587785252, T1U, T1R); Cr[WS(csr, 9)] = FMA(KP951056516, T1U, T1Q) + FNMA(KP587785252, T1J, T1R); T20 = KP559016994 * (T1Y + T1X); T1Z = FMA(KP250000000, T1X - T1Y, T1z); Ci[WS(csi, 9)] = FMA(KP587785252, T1V, KP951056516 * T1W) + T1Z - T20; Ci[WS(csi, 4)] = FMA(KP587785252, T1W, T1Z) + FNMS(KP951056516, T1V, T20); { E T1E, T1F, T1s, T1w; T1E = FMS(KP250000000, T1C, T1z); T1F = KP559016994 * (T1B - T1A); Ci[WS(csi, 5)] = FMA(KP951056516, T1D, T1E) + FNMA(KP587785252, T1G, T1F); Ci[WS(csi, 10)] = FMA(KP951056516, T1G, KP587785252 * T1D) + T1E + T1F; T1s = FNMS(KP250000000, T1o, TT); T1w = KP559016994 * (T18 - T1n); Cr[WS(csr, 5)] = FMA(KP587785252, T1r, T1s) + FMS(KP951056516, T1v, T1w); Cr[WS(csr, 10)] = T1w + FMA(KP587785252, T1v, T1s) - (KP951056516 * T1r); } } } { E T21, T2z, T2L, T2K, T2M, T2F, T2P, T2C, T2Q, T2l, T2o, T2p, T2w, T2u, T28; E T2f, T2g, T2s, T2h; T21 = TS - TR; { E T2x, T2y, T2I, T2J; T2x = FNMS(KP844327925, T29, KP1_071653589 * T2a); T2y = FNMS(KP125581039, T2d, KP998026728 * T2c); T2z = T2x + T2y; T2L = T2y - T2x; T2I = FNMS(KP481753674, T22, KP1_752613360 * T23); T2J = FMA(KP904827052, T25, KP851558583 * T26); T2K = T2I + T2J; T2M = T2I - T2J; } { E T2D, T2E, T2A, T2B; T2D = FMA(KP535826794, T29, KP1_688655851 * T2a); T2E = FMA(KP062790519, T2c, KP1_996053456 * T2d); T2F = T2D + T2E; T2P = T2E - T2D; T2A = FMA(KP876306680, T22, KP963507348 * T23); T2B = FNMS(KP425779291, T25, KP1_809654104 * T26); T2C = T2A + T2B; T2Q = T2A - T2B; } { E T2j, T2k, T2m, T2n; T2j = FNMS(KP125333233, T25, KP1_984229402 * T26); T2k = FMA(KP684547105, T22, KP1_457937254 * T23); T2l = T2j - T2k; T2m = FNMS(KP770513242, T2c, KP1_274847979 * T2d); T2n = FMA(KP998026728, T29, KP125581039 * T2a); T2o = T2m - T2n; T2p = T2l + T2o; T2w = T2k + T2j; T2u = T2n + T2m; } { E T24, T27, T2b, T2e; T24 = FNMS(KP1_369094211, T23, KP728968627 * T22); T27 = FMA(KP992114701, T25, KP250666467 * T26); T28 = T24 - T27; T2b = FNMS(KP1_996053456, T2a, KP062790519 * T29); T2e = FMA(KP637423989, T2c, KP1_541026485 * T2d); T2f = T2b - T2e; T2g = T28 + T2f; T2s = T24 + T27; T2h = T2b + T2e; } { E T2H, T2G, T2O, T2N; Cr[WS(csr, 1)] = T21 + T2g; Ci[WS(csi, 1)] = T2p - T2i; T2H = KP559016994 * (T2C - T2F); T2G = FNMS(KP250000000, T2C + T2F, T21); Cr[WS(csr, 8)] = FMA(KP951056516, T2z, T2G) + FNMA(KP587785252, T2K, T2H); Cr[WS(csr, 3)] = FMA(KP951056516, T2K, KP587785252 * T2z) + T2G + T2H; T2O = KP559016994 * (T2M + T2L); T2N = FMA(KP250000000, T2L - T2M, T2i); Ci[WS(csi, 3)] = T2N + FMA(KP587785252, T2P, T2O) - (KP951056516 * T2Q); Ci[WS(csi, 8)] = FMA(KP587785252, T2Q, T2N) + FMS(KP951056516, T2P, T2O); { E T2t, T2v, T2q, T2r; T2t = FNMS(KP250000000, T2g, T21); T2v = KP559016994 * (T28 - T2f); Cr[WS(csr, 6)] = FMA(KP951056516, T2u, T2t) + FNMA(KP587785252, T2w, T2v); Cr[WS(csr, 11)] = FMA(KP951056516, T2w, T2v) + FMA(KP587785252, T2u, T2t); T2q = KP250000000 * T2p; T2r = KP559016994 * (T2l - T2o); Ci[WS(csi, 6)] = FMS(KP951056516, T2h, T2i + T2q) + FNMA(KP587785252, T2s, T2r); Ci[WS(csi, 11)] = FMA(KP951056516, T2s, KP587785252 * T2h) + T2r - (T2i + T2q); } } } } } }
static void hf_10(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) { DK(KP951056516, +0.951056516295153572116439333379382143405698634); DK(KP559016994, +0.559016994374947424102293417182819058860154590); DK(KP250000000, +0.250000000000000000000000000000000000000000000); DK(KP618033988, +0.618033988749894848204586834365638117720309180); INT m; for (m = mb, W = W + ((mb - 1) * 18); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 18, MAKE_VOLATILE_STRIDE(rs)) { E T29, T2d, T2c, T2e; { E T23, T1U, T8, T12, T1y, T1P, T25, T1H, T2b, T18, T10, T1Y, T1I, Tl, T13; E T1J, Ty, T14, T1n, T1O, T24, T1K; { E T1, T1R, T3, T6, T2, T5; T1 = cr[0]; T1R = ci[0]; T3 = cr[WS(rs, 5)]; T6 = ci[WS(rs, 5)]; T2 = W[8]; T5 = W[9]; { E T1p, TY, T1x, T1F, TM, T16, T1r, TS; { E TF, T1w, TO, TR, T1u, TL, TN, TQ, T1q, TP; { E TU, TX, TT, TW; { E TB, TE, T1S, T4, TA, TD; TB = cr[WS(rs, 4)]; TE = ci[WS(rs, 4)]; T1S = T2 * T6; T4 = T2 * T3; TA = W[6]; TD = W[7]; { E T1T, T7, T1v, TC; T1T = FNMS(T5, T3, T1S); T7 = FMA(T5, T6, T4); T1v = TA * TE; TC = TA * TB; T23 = T1T + T1R; T1U = T1R - T1T; T8 = T1 - T7; T12 = T1 + T7; TF = FMA(TD, TE, TC); T1w = FNMS(TD, TB, T1v); } } TU = cr[WS(rs, 1)]; TX = ci[WS(rs, 1)]; TT = W[0]; TW = W[1]; { E TH, TK, TJ, T1t, TI, T1o, TV, TG; TH = cr[WS(rs, 9)]; TK = ci[WS(rs, 9)]; T1o = TT * TX; TV = TT * TU; TG = W[16]; TJ = W[17]; T1p = FNMS(TW, TU, T1o); TY = FMA(TW, TX, TV); T1t = TG * TK; TI = TG * TH; TO = cr[WS(rs, 6)]; TR = ci[WS(rs, 6)]; T1u = FNMS(TJ, TH, T1t); TL = FMA(TJ, TK, TI); TN = W[10]; TQ = W[11]; } } T1x = T1u - T1w; T1F = T1w + T1u; TM = TF - TL; T16 = TF + TL; T1q = TN * TR; TP = TN * TO; T1r = FNMS(TQ, TO, T1q); TS = FMA(TQ, TR, TP); } { E T1l, Te, T1e, Tx, Tn, Tq, Tp, T1j, Tk, T1f, To; { E Tt, Tw, Tv, T1d, Tu; { E Ta, Td, T9, Tc, T1k, Tb, Ts; Ta = cr[WS(rs, 2)]; Td = ci[WS(rs, 2)]; { E T1G, T1s, TZ, T17; T1G = T1r + T1p; T1s = T1p - T1r; TZ = TS - TY; T17 = TS + TY; T1y = T1s - T1x; T1P = T1x + T1s; T25 = T1F + T1G; T1H = T1F - T1G; T2b = T16 - T17; T18 = T16 + T17; T10 = TM + TZ; T1Y = TZ - TM; T9 = W[2]; } Tc = W[3]; Tt = cr[WS(rs, 3)]; Tw = ci[WS(rs, 3)]; T1k = T9 * Td; Tb = T9 * Ta; Ts = W[4]; Tv = W[5]; T1l = FNMS(Tc, Ta, T1k); Te = FMA(Tc, Td, Tb); T1d = Ts * Tw; Tu = Ts * Tt; } { E Tg, Tj, Tf, Ti, T1i, Th, Tm; Tg = cr[WS(rs, 7)]; Tj = ci[WS(rs, 7)]; T1e = FNMS(Tv, Tt, T1d); Tx = FMA(Tv, Tw, Tu); Tf = W[12]; Ti = W[13]; Tn = cr[WS(rs, 8)]; Tq = ci[WS(rs, 8)]; T1i = Tf * Tj; Th = Tf * Tg; Tm = W[14]; Tp = W[15]; T1j = FNMS(Ti, Tg, T1i); Tk = FMA(Ti, Tj, Th); T1f = Tm * Tq; To = Tm * Tn; } } { E T1m, T1g, Tr, T1h; T1m = T1j - T1l; T1I = T1l + T1j; Tl = Te - Tk; T13 = Te + Tk; T1g = FNMS(Tp, Tn, T1f); Tr = FMA(Tp, Tq, To); T1J = T1g + T1e; T1h = T1e - T1g; Ty = Tr - Tx; T14 = Tr + Tx; T1n = T1h - T1m; T1O = T1m + T1h; } } } } T24 = T1I + T1J; T1K = T1I - T1J; { E T2a, T15, Tz, T1Z; T2a = T13 - T14; T15 = T13 + T14; Tz = Tl + Ty; T1Z = Ty - Tl; { E T1L, T1N, T1E, T1M; { E T19, T1D, T1C, T11, T1b; T19 = T15 + T18; T1D = T15 - T18; T11 = Tz + T10; T1b = Tz - T10; { E T1B, T1z, T1a, T1A, T1c; T1B = FNMS(KP618033988, T1n, T1y); T1z = FMA(KP618033988, T1y, T1n); ci[WS(rs, 4)] = T8 + T11; T1a = FNMS(KP250000000, T11, T8); T1A = FNMS(KP559016994, T1b, T1a); T1c = FMA(KP559016994, T1b, T1a); T1C = FNMS(KP250000000, T19, T12); T1L = FNMS(KP618033988, T1K, T1H); T1N = FMA(KP618033988, T1H, T1K); cr[WS(rs, 1)] = FMA(KP951056516, T1z, T1c); ci[0] = FNMS(KP951056516, T1z, T1c); cr[WS(rs, 3)] = FMA(KP951056516, T1B, T1A); ci[WS(rs, 2)] = FNMS(KP951056516, T1B, T1A); } cr[0] = T12 + T19; T1E = FNMS(KP559016994, T1D, T1C); T1M = FMA(KP559016994, T1D, T1C); } { E T1X, T21, T20, T22, T1Q, T1W, T1V, T26, T28, T27; T1Q = T1O + T1P; T1W = T1P - T1O; ci[WS(rs, 3)] = FMA(KP951056516, T1N, T1M); cr[WS(rs, 4)] = FNMS(KP951056516, T1N, T1M); ci[WS(rs, 1)] = FMA(KP951056516, T1L, T1E); cr[WS(rs, 2)] = FNMS(KP951056516, T1L, T1E); T1V = FMA(KP250000000, T1Q, T1U); cr[WS(rs, 5)] = T1Q - T1U; T1X = FNMS(KP559016994, T1W, T1V); T21 = FMA(KP559016994, T1W, T1V); T20 = FNMS(KP618033988, T1Z, T1Y); T22 = FMA(KP618033988, T1Y, T1Z); T26 = T24 + T25; T28 = T24 - T25; ci[WS(rs, 8)] = FMA(KP951056516, T22, T21); cr[WS(rs, 9)] = FMS(KP951056516, T22, T21); ci[WS(rs, 6)] = FMA(KP951056516, T20, T1X); cr[WS(rs, 7)] = FMS(KP951056516, T20, T1X); T27 = FNMS(KP250000000, T26, T23); ci[WS(rs, 9)] = T26 + T23; T29 = FMA(KP559016994, T28, T27); T2d = FNMS(KP559016994, T28, T27); T2c = FMA(KP618033988, T2b, T2a); T2e = FNMS(KP618033988, T2a, T2b); } } } } ci[WS(rs, 7)] = FMA(KP951056516, T2e, T2d); cr[WS(rs, 8)] = FMS(KP951056516, T2e, T2d); ci[WS(rs, 5)] = FMA(KP951056516, T2c, T29); cr[WS(rs, 6)] = FMS(KP951056516, T2c, T29); } }
static void r2cf_25(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) { DK(KP792626838, +0.792626838241819413632131824093538848057784557); DK(KP876091699, +0.876091699473550838204498029706869638173524346); DK(KP809385824, +0.809385824416008241660603814668679683846476688); DK(KP860541664, +0.860541664367944677098261680920518816412804187); DK(KP681693190, +0.681693190061530575150324149145440022633095390); DK(KP560319534, +0.560319534973832390111614715371676131169633784); DK(KP997675361, +0.997675361079556513670859573984492383596555031); DK(KP237294955, +0.237294955877110315393888866460840817927895961); DK(KP897376177, +0.897376177523557693138608077137219684419427330); DK(KP923225144, +0.923225144846402650453449441572664695995209956); DK(KP956723877, +0.956723877038460305821989399535483155872969262); DK(KP949179823, +0.949179823508441261575555465843363271711583843); DK(KP669429328, +0.669429328479476605641803240971985825917022098); DK(KP570584518, +0.570584518783621657366766175430996792655723863); DK(KP262346850, +0.262346850930607871785420028382979691334784273); DK(KP876306680, +0.876306680043863587308115903922062583399064238); DK(KP906616052, +0.906616052148196230441134447086066874408359177); DK(KP683113946, +0.683113946453479238701949862233725244439656928); DK(KP559154169, +0.559154169276087864842202529084232643714075927); DK(KP921078979, +0.921078979742360627699756128143719920817673854); DK(KP904508497, +0.904508497187473712051146708591409529430077295); DK(KP999754674, +0.999754674276473633366203429228112409535557487); DK(KP968583161, +0.968583161128631119490168375464735813836012403); DK(KP242145790, +0.242145790282157779872542093866183953459003101); DK(KP904730450, +0.904730450839922351881287709692877908104763647); DK(KP845997307, +0.845997307939530944175097360758058292389769300); DK(KP855719849, +0.855719849902058969314654733608091555096772472); DK(KP982009705, +0.982009705009746369461829878184175962711969869); DK(KP916574801, +0.916574801383451584742370439148878693530976769); DK(KP690983005, +0.690983005625052575897706582817180941139845410); DK(KP952936919, +0.952936919628306576880750665357914584765951388); DK(KP998026728, +0.998026728428271561952336806863450553336905220); DK(KP831864738, +0.831864738706457140726048799369896829771167132); DK(KP803003575, +0.803003575438660414833440593570376004635464850); DK(KP522616830, +0.522616830205754336872861364785224694908468440); DK(KP829049696, +0.829049696159252993975487806364305442437946767); DK(KP999544308, +0.999544308746292983948881682379742149196758193); DK(KP772036680, +0.772036680810363904029489473607579825330539880); DK(KP763932022, +0.763932022500210303590826331268723764559381640); DK(KP992114701, +0.992114701314477831049793042785778521453036709); DK(KP447417479, +0.447417479732227551498980015410057305749330693); DK(KP734762448, +0.734762448793050413546343770063151342619912334); DK(KP894834959, +0.894834959464455102997960030820114611498661386); DK(KP867381224, +0.867381224396525206773171885031575671309956167); DK(KP958953096, +0.958953096729998668045963838399037225970891871); DK(KP912575812, +0.912575812670962425556968549836277086778922727); DK(KP951056516, +0.951056516295153572116439333379382143405698634); DK(KP244189809, +0.244189809627953270309879511234821255780225091); DK(KP269969613, +0.269969613759572083574752974412347470060951301); DK(KP522847744, +0.522847744331509716623755382187077770911012542); DK(KP578046249, +0.578046249379945007321754579646815604023525655); DK(KP603558818, +0.603558818296015001454675132653458027918768137); DK(KP667278218, +0.667278218140296670899089292254759909713898805); DK(KP447533225, +0.447533225982656890041886979663652563063114397); DK(KP494780565, +0.494780565770515410344588413655324772219443730); DK(KP987388751, +0.987388751065621252324603216482382109400433949); DK(KP893101515, +0.893101515366181661711202267938416198338079437); DK(KP132830569, +0.132830569247582714407653942074819768844536507); DK(KP120146378, +0.120146378570687701782758537356596213647956445); DK(KP059835404, +0.059835404262124915169548397419498386427871950); DK(KP066152395, +0.066152395967733048213034281011006031460903353); DK(KP786782374, +0.786782374965295178365099601674911834788448471); DK(KP869845200, +0.869845200362138853122720822420327157933056305); DK(KP559016994, +0.559016994374947424102293417182819058860154590); DK(KP250000000, +0.250000000000000000000000000000000000000000000); DK(KP618033988, +0.618033988749894848204586834365638117720309180); INT i; for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) { E T2H, T2w, T2x, T2A, T2C, T2v, T2M, T2y, T2B, T2N; { E T2u, TJ, T1O, T39, T2t, TB, T21, T1M, T2e, T26, T1B, T1r, T1k, T1c, T9; E T1X, T1R, T2k, T29, T1z, T1v, T1h, TX, Ti, T13, T2a, T2j, T1U, T1Y, TQ; E T1g, T1u, T1y, T12, Ts, T11, T1I; { E Tt, Tw, T16, Tx, Ty; { E T2p, TG, TH, TD, TE, TI, T2r; T2p = R0[0]; TG = R0[WS(rs, 5)]; TH = R1[WS(rs, 7)]; TD = R1[WS(rs, 2)]; TE = R0[WS(rs, 10)]; Tt = R1[WS(rs, 1)]; TI = TG - TH; T2r = TG + TH; { E TF, T2q, Tu, Tv, T2s; TF = TD - TE; T2q = TD + TE; Tu = R0[WS(rs, 4)]; Tv = R1[WS(rs, 11)]; T2u = T2q - T2r; T2s = T2q + T2r; TJ = FMA(KP618033988, TI, TF); T1O = FNMS(KP618033988, TF, TI); T39 = T2p + T2s; T2t = FNMS(KP250000000, T2s, T2p); Tw = Tu + Tv; T16 = Tv - Tu; Tx = R1[WS(rs, 6)]; Ty = R0[WS(rs, 9)]; } } { E T1P, TW, TS, TR; { E T1, T5, T1L, T18, T1a, TA, T4, TU, T6, T19; T1 = R0[WS(rs, 2)]; { E T2, T17, Tz, T3; T2 = R1[WS(rs, 4)]; T17 = Tx - Ty; Tz = Tx + Ty; T3 = R0[WS(rs, 12)]; T5 = R0[WS(rs, 7)]; T1L = FMA(KP618033988, T16, T17); T18 = FNMS(KP618033988, T17, T16); T1a = Tz - Tw; TA = Tw + Tz; T4 = T2 + T3; TU = T3 - T2; T6 = R1[WS(rs, 9)]; } TB = Tt + TA; T19 = FNMS(KP250000000, TA, Tt); { E T7, TV, T1b, T1K, T8; T7 = T5 + T6; TV = T5 - T6; T1b = FNMS(KP559016994, T1a, T19); T1K = FMA(KP559016994, T1a, T19); T1P = FMA(KP618033988, TU, TV); TW = FNMS(KP618033988, TV, TU); TS = T4 - T7; T8 = T4 + T7; T21 = FMA(KP869845200, T1K, T1L); T1M = FNMS(KP786782374, T1L, T1K); T2e = FMA(KP066152395, T1K, T1L); T26 = FNMS(KP059835404, T1L, T1K); T1B = FMA(KP120146378, T18, T1b); T1r = FNMS(KP132830569, T1b, T18); T1k = FMA(KP893101515, T18, T1b); T1c = FNMS(KP987388751, T1b, T18); T9 = T1 + T8; TR = FMS(KP250000000, T8, T1); } } { E Ta, Te, TK, Td, Tf; Ta = R1[0]; { E Tb, Tc, T1Q, TT; Tb = R0[WS(rs, 3)]; Tc = R1[WS(rs, 10)]; T1Q = FMA(KP559016994, TS, TR); TT = FNMS(KP559016994, TS, TR); Te = R1[WS(rs, 5)]; TK = Tb - Tc; Td = Tb + Tc; T1X = FNMS(KP120146378, T1P, T1Q); T1R = FMA(KP132830569, T1Q, T1P); T2k = FMA(KP494780565, T1Q, T1P); T29 = FNMS(KP447533225, T1P, T1Q); T1z = FMA(KP869845200, TT, TW); T1v = FNMS(KP786782374, TW, TT); T1h = FNMS(KP667278218, TT, TW); TX = FMA(KP603558818, TW, TT); Tf = R0[WS(rs, 8)]; } { E Tk, T1S, TM, TO, Tn, TZ, TN, T10, Tq, To, Th, Tp, TP, T1T, Tr; Tk = R0[WS(rs, 1)]; { E Tl, TL, Tg, Tm; Tl = R1[WS(rs, 3)]; TL = Tf - Te; Tg = Te + Tf; Tm = R0[WS(rs, 11)]; To = R0[WS(rs, 6)]; T1S = FMA(KP618033988, TK, TL); TM = FNMS(KP618033988, TL, TK); TO = Td - Tg; Th = Td + Tg; Tn = Tl + Tm; TZ = Tm - Tl; Tp = R1[WS(rs, 8)]; } Ti = Ta + Th; TN = FNMS(KP250000000, Th, Ta); T10 = Tp - To; Tq = To + Tp; TP = FMA(KP559016994, TO, TN); T1T = FNMS(KP559016994, TO, TN); Tr = Tn + Tq; T13 = Tn - Tq; T2a = FMA(KP578046249, T1T, T1S); T2j = FNMS(KP522847744, T1S, T1T); T1U = FNMS(KP987388751, T1T, T1S); T1Y = FMA(KP893101515, T1S, T1T); TQ = FMA(KP269969613, TP, TM); T1g = FNMS(KP244189809, TM, TP); T1u = FNMS(KP603558818, TM, TP); T1y = FMA(KP667278218, TP, TM); T12 = FMS(KP250000000, Tr, Tk); Ts = Tk + Tr; T11 = FMA(KP618033988, T10, TZ); T1I = FNMS(KP618033988, TZ, T10); } } } } { E T2f, T27, T1j, T15, T2K, T2J, T2I, T2T, T1Z, T2X, T1N, T1V, T2W, T2U, T22; E T1G; { E T3a, T3b, T20, T1J, T1C, T1s; { E Tj, TC, T1H, T14; T3a = T9 + Ti; Tj = T9 - Ti; TC = Ts - TB; T3b = Ts + TB; T1H = FMA(KP559016994, T13, T12); T14 = FNMS(KP559016994, T13, T12); Ci[WS(csi, 10)] = KP951056516 * (FMA(KP618033988, Tj, TC)); Ci[WS(csi, 5)] = KP951056516 * (FNMS(KP618033988, TC, Tj)); T20 = FNMS(KP066152395, T1H, T1I); T1J = FMA(KP059835404, T1I, T1H); T2f = FMA(KP667278218, T1H, T1I); T27 = FNMS(KP603558818, T1I, T1H); T1C = FNMS(KP494780565, T14, T11); T1s = FMA(KP447533225, T11, T14); T1j = FNMS(KP522847744, T11, T14); T15 = FMA(KP578046249, T14, T11); } { E T1A, T1t, T1w, T3c, T3e, T1D, T1x, T3d, T1E, T1F; T1A = FNMS(KP912575812, T1z, T1y); T2K = FMA(KP912575812, T1z, T1y); T2J = FNMS(KP958953096, T1s, T1r); T1t = FMA(KP958953096, T1s, T1r); T1w = FMA(KP912575812, T1v, T1u); T2H = FNMS(KP912575812, T1v, T1u); T3c = T3a + T3b; T3e = T3a - T3b; T2I = FMA(KP867381224, T1C, T1B); T1D = FNMS(KP867381224, T1C, T1B); T1x = FNMS(KP894834959, T1w, T1t); T2T = FMA(KP734762448, T1Y, T1X); T1Z = FNMS(KP734762448, T1Y, T1X); T3d = FNMS(KP250000000, T3c, T39); Cr[0] = T3c + T39; T1E = FMA(KP447417479, T1w, T1D); Ci[WS(csi, 4)] = KP951056516 * (FMA(KP992114701, T1x, TJ)); Cr[WS(csr, 10)] = FNMS(KP559016994, T3e, T3d); Cr[WS(csr, 5)] = FMA(KP559016994, T3e, T3d); T1F = FMA(KP763932022, T1E, T1t); T2X = FMA(KP772036680, T1M, T1J); T1N = FNMS(KP772036680, T1M, T1J); T1V = FMA(KP734762448, T1U, T1R); T2W = FNMS(KP734762448, T1U, T1R); T2U = FNMS(KP772036680, T21, T20); T22 = FMA(KP772036680, T21, T20); T1G = FMA(KP999544308, T1F, T1A); } } { E T1i, T1l, T2l, T2R, T2g, T2Q, T28, T32, T1f, T1n, T1p, T33, T2b; { E T24, TY, T1d, T1W, T23, T25, T1m, T1e; T2w = FMA(KP829049696, T1h, T1g); T1i = FNMS(KP829049696, T1h, T1g); T1W = FNMS(KP992114701, T1V, T1O); T23 = FNMS(KP522616830, T1V, T22); Ci[WS(csi, 9)] = KP951056516 * (FNMS(KP803003575, T1G, TJ)); T2x = FNMS(KP831864738, T1k, T1j); T1l = FMA(KP831864738, T1k, T1j); Ci[WS(csi, 3)] = KP998026728 * (FNMS(KP952936919, T1W, T1N)); T24 = FMA(KP690983005, T23, T1N); TY = FNMS(KP916574801, TX, TQ); T2A = FMA(KP916574801, TX, TQ); T2C = FNMS(KP831864738, T1c, T15); T1d = FMA(KP831864738, T1c, T15); T2l = FNMS(KP982009705, T2k, T2j); T2R = FMA(KP982009705, T2k, T2j); T25 = FNMS(KP855719849, T24, T1Z); T2g = FMA(KP845997307, T2f, T2e); T2Q = FNMS(KP845997307, T2f, T2e); T1m = FMA(KP904730450, T1d, TY); T1e = FNMS(KP904730450, T1d, TY); Ci[WS(csi, 8)] = -(KP951056516 * (FNMS(KP992114701, T25, T1O))); T28 = FNMS(KP845997307, T27, T26); T32 = FMA(KP845997307, T27, T26); T1f = FNMS(KP242145790, T1e, TJ); Ci[WS(csi, 1)] = -(KP951056516 * (FMA(KP968583161, T1e, TJ))); T1n = FNMS(KP999754674, T1m, T1l); T1p = FNMS(KP904508497, T1m, T1i); T33 = FMA(KP921078979, T2a, T29); T2b = FNMS(KP921078979, T2a, T29); } { E T2P, T2Z, T2V, T2O; { E T2d, T2n, T2i, T2Y, T2m, T2o; T2P = FNMS(KP559016994, T2u, T2t); T2v = FMA(KP559016994, T2u, T2t); { E T1o, T1q, T2h, T2c; T1o = FNMS(KP559154169, T1n, T1i); T1q = FMA(KP683113946, T1p, T1l); T2h = FMA(KP906616052, T2b, T28); T2c = FNMS(KP906616052, T2b, T28); Ci[WS(csi, 6)] = -(KP951056516 * (FMA(KP968583161, T1o, T1f))); Ci[WS(csi, 11)] = -(KP951056516 * (FMA(KP876306680, T1q, T1f))); T2d = FMA(KP262346850, T2c, T1O); Ci[WS(csi, 2)] = -(KP998026728 * (FNMS(KP952936919, T1O, T2c))); T2n = T2g + T2h; T2i = FMA(KP618033988, T2h, T2g); } T2m = FMA(KP570584518, T2l, T2i); T2o = FNMS(KP669429328, T2n, T2l); Ci[WS(csi, 12)] = KP951056516 * (FNMS(KP949179823, T2m, T2d)); Ci[WS(csi, 7)] = KP951056516 * (FNMS(KP876306680, T2o, T2d)); T2V = FMA(KP956723877, T2U, T2T); T2Y = FMA(KP522616830, T2T, T2X); T2Z = FNMS(KP763932022, T2Y, T2U); } Cr[WS(csr, 3)] = FMA(KP992114701, T2V, T2P); { E T30, T34, T2S, T31, T35; T30 = FMA(KP855719849, T2Z, T2W); T34 = FNMS(KP923225144, T2R, T2Q); T2S = FMA(KP923225144, T2R, T2Q); Cr[WS(csr, 8)] = FNMS(KP897376177, T30, T2P); T31 = FNMS(KP237294955, T2S, T2P); Cr[WS(csr, 2)] = FMA(KP949179823, T2S, T2P); T35 = FNMS(KP997675361, T34, T33); { E T37, T36, T38, T2L; T37 = FNMS(KP904508497, T34, T32); T36 = FMA(KP560319534, T35, T32); T38 = FNMS(KP681693190, T37, T33); Cr[WS(csr, 12)] = FNMS(KP949179823, T36, T31); Cr[WS(csr, 7)] = FNMS(KP860541664, T38, T31); T2O = FNMS(KP809385824, T2K, T2I); T2L = FNMS(KP447417479, T2K, T2J); T2M = FNMS(KP690983005, T2L, T2I); } } Cr[WS(csr, 4)] = FNMS(KP992114701, T2O, T2v); } } } } T2y = FNMS(KP904730450, T2x, T2w); T2B = FMA(KP904730450, T2x, T2w); T2N = FNMS(KP999544308, T2M, T2H); { E T2z, T2D, T2F, T2E, T2G; T2z = FNMS(KP242145790, T2y, T2v); Cr[WS(csr, 1)] = FMA(KP968583161, T2y, T2v); T2D = FMA(KP904730450, T2C, T2B); T2F = T2A + T2B; Cr[WS(csr, 9)] = FNMS(KP803003575, T2N, T2v); T2E = FNMS(KP618033988, T2D, T2A); T2G = FMA(KP683113946, T2F, T2C); Cr[WS(csr, 6)] = FNMS(KP876091699, T2E, T2z); Cr[WS(csr, 11)] = FNMS(KP792626838, T2G, T2z); } } }
static void hf_9(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) { DK(KP642787609, +0.642787609686539326322643409907263432907559884); DK(KP766044443, +0.766044443118978035202392650555416673935832457); DK(KP939692620, +0.939692620785908384054109277324731469936208134); DK(KP342020143, +0.342020143325668733044099614682259580763083368); DK(KP984807753, +0.984807753012208059366743024589523013670643252); DK(KP173648177, +0.173648177666930348851716626769314796000375677); DK(KP500000000, +0.500000000000000000000000000000000000000000000); DK(KP866025403, +0.866025403784438646763723170752936183471402627); INT m; for (m = mb, W = W + ((mb - 1) * 16); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 16, MAKE_VOLATILE_STRIDE(rs)) { E T1, T1B, TQ, T1A, Tc, TN, T1C, T1D, TL, T1x, T19, T1o, T1c, T1n, Tu; E T1w, TW, T1k, T11, T1l; { E T6, TO, Tb, TP; T1 = cr[0]; T1B = ci[0]; { E T3, T5, T2, T4; T3 = cr[WS(rs, 3)]; T5 = ci[WS(rs, 3)]; T2 = W[4]; T4 = W[5]; T6 = FMA(T2, T3, T4 * T5); TO = FNMS(T4, T3, T2 * T5); } { E T8, Ta, T7, T9; T8 = cr[WS(rs, 6)]; Ta = ci[WS(rs, 6)]; T7 = W[10]; T9 = W[11]; Tb = FMA(T7, T8, T9 * Ta); TP = FNMS(T9, T8, T7 * Ta); } TQ = KP866025403 * (TO - TP); T1A = KP866025403 * (Tb - T6); Tc = T6 + Tb; TN = FNMS(KP500000000, Tc, T1); T1C = TO + TP; T1D = FNMS(KP500000000, T1C, T1B); } { E Tz, T13, TE, T14, TJ, T15, TK, T16; { E Tw, Ty, Tv, Tx; Tw = cr[WS(rs, 2)]; Ty = ci[WS(rs, 2)]; Tv = W[2]; Tx = W[3]; Tz = FMA(Tv, Tw, Tx * Ty); T13 = FNMS(Tx, Tw, Tv * Ty); } { E TB, TD, TA, TC; TB = cr[WS(rs, 5)]; TD = ci[WS(rs, 5)]; TA = W[8]; TC = W[9]; TE = FMA(TA, TB, TC * TD); T14 = FNMS(TC, TB, TA * TD); } { E TG, TI, TF, TH; TG = cr[WS(rs, 8)]; TI = ci[WS(rs, 8)]; TF = W[14]; TH = W[15]; TJ = FMA(TF, TG, TH * TI); T15 = FNMS(TH, TG, TF * TI); } TK = TE + TJ; T16 = T14 + T15; TL = Tz + TK; T1x = T13 + T16; { E T17, T18, T1a, T1b; T17 = FNMS(KP500000000, T16, T13); T18 = KP866025403 * (TJ - TE); T19 = T17 - T18; T1o = T18 + T17; T1a = FNMS(KP500000000, TK, Tz); T1b = KP866025403 * (T14 - T15); T1c = T1a - T1b; T1n = T1a + T1b; } } { E Ti, TX, Tn, TT, Ts, TU, Tt, TY; { E Tf, Th, Te, Tg; Tf = cr[WS(rs, 1)]; Th = ci[WS(rs, 1)]; Te = W[0]; Tg = W[1]; Ti = FMA(Te, Tf, Tg * Th); TX = FNMS(Tg, Tf, Te * Th); } { E Tk, Tm, Tj, Tl; Tk = cr[WS(rs, 4)]; Tm = ci[WS(rs, 4)]; Tj = W[6]; Tl = W[7]; Tn = FMA(Tj, Tk, Tl * Tm); TT = FNMS(Tl, Tk, Tj * Tm); } { E Tp, Tr, To, Tq; Tp = cr[WS(rs, 7)]; Tr = ci[WS(rs, 7)]; To = W[12]; Tq = W[13]; Ts = FMA(To, Tp, Tq * Tr); TU = FNMS(Tq, Tp, To * Tr); } Tt = Tn + Ts; TY = TT + TU; Tu = Ti + Tt; T1w = TX + TY; { E TS, TV, TZ, T10; TS = FNMS(KP500000000, Tt, Ti); TV = KP866025403 * (TT - TU); TW = TS - TV; T1k = TS + TV; TZ = FNMS(KP500000000, TY, TX); T10 = KP866025403 * (Ts - Tn); T11 = TZ - T10; T1l = T10 + TZ; } } { E T1y, Td, TM, T1v; T1y = KP866025403 * (T1w - T1x); Td = T1 + Tc; TM = Tu + TL; T1v = FNMS(KP500000000, TM, Td); cr[0] = Td + TM; cr[WS(rs, 3)] = T1v + T1y; ci[WS(rs, 2)] = T1v - T1y; } { E TR, T1I, T1e, T1K, T1i, T1H, T1f, T1J; TR = TN - TQ; T1I = T1D - T1A; { E T12, T1d, T1g, T1h; T12 = FMA(KP173648177, TW, KP984807753 * T11); T1d = FNMS(KP939692620, T1c, KP342020143 * T19); T1e = T12 + T1d; T1K = KP866025403 * (T1d - T12); T1g = FNMS(KP984807753, TW, KP173648177 * T11); T1h = FMA(KP342020143, T1c, KP939692620 * T19); T1i = KP866025403 * (T1g + T1h); T1H = T1g - T1h; } cr[WS(rs, 2)] = TR + T1e; ci[WS(rs, 6)] = T1H + T1I; T1f = FNMS(KP500000000, T1e, TR); ci[0] = T1f - T1i; ci[WS(rs, 3)] = T1f + T1i; T1J = FMS(KP500000000, T1H, T1I); cr[WS(rs, 5)] = T1J - T1K; cr[WS(rs, 8)] = T1K + T1J; } { E T1L, T1M, T1N, T1O; T1L = KP866025403 * (TL - Tu); T1M = T1C + T1B; T1N = T1w + T1x; T1O = FNMS(KP500000000, T1N, T1M); cr[WS(rs, 6)] = T1L - T1O; ci[WS(rs, 8)] = T1N + T1M; ci[WS(rs, 5)] = T1L + T1O; } { E T1j, T1E, T1q, T1z, T1u, T1F, T1r, T1G; T1j = TN + TQ; T1E = T1A + T1D; { E T1m, T1p, T1s, T1t; T1m = FMA(KP766044443, T1k, KP642787609 * T1l); T1p = FMA(KP173648177, T1n, KP984807753 * T1o); T1q = T1m + T1p; T1z = KP866025403 * (T1p - T1m); T1s = FNMS(KP642787609, T1k, KP766044443 * T1l); T1t = FNMS(KP984807753, T1n, KP173648177 * T1o); T1u = KP866025403 * (T1s - T1t); T1F = T1s + T1t; } cr[WS(rs, 1)] = T1j + T1q; T1r = FNMS(KP500000000, T1q, T1j); ci[WS(rs, 1)] = T1r - T1u; cr[WS(rs, 4)] = T1r + T1u; ci[WS(rs, 7)] = T1F + T1E; T1G = FNMS(KP500000000, T1F, T1E); cr[WS(rs, 7)] = T1z - T1G; ci[WS(rs, 4)] = T1z + T1G; } } }
static void r2cbIII_25(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) { DK(KP968583161, +0.968583161128631119490168375464735813836012403); DK(KP248689887, +0.248689887164854788242283746006447968417567406); DK(KP684547105, +0.684547105928688673732283357621209269889519233); DK(KP728968627, +0.728968627421411523146730319055259111372571664); DK(KP062790519, +0.062790519529313376076178224565631133122484832); DK(KP998026728, +0.998026728428271561952336806863450553336905220); DK(KP876306680, +0.876306680043863587308115903922062583399064238); DK(KP481753674, +0.481753674101715274987191502872129653528542010); DK(KP535826794, +0.535826794978996618271308767867639978063575346); DK(KP844327925, +0.844327925502015078548558063966681505381659241); DK(KP904827052, +0.904827052466019527713668647932697593970413911); DK(KP425779291, +0.425779291565072648862502445744251703979973042); DK(KP250000000, +0.250000000000000000000000000000000000000000000); DK(KP951056516, +0.951056516295153572116439333379382143405698634); DK(KP587785252, +0.587785252292473129168705954639072768597652438); DK(KP559016994, +0.559016994374947424102293417182819058860154590); DK(KP500000000, +0.500000000000000000000000000000000000000000000); DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); DK(KP1_118033988, +1.118033988749894848204586834365638117720309180); DK(KP1_175570504, +1.175570504584946258337411909278145537195304875); DK(KP1_902113032, +1.902113032590307144232878666758764286811397268); { INT i; for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(100, rs), MAKE_VOLATILE_STRIDE(100, csr), MAKE_VOLATILE_STRIDE(100, csi)) { E TS, T1O, T5, TP, T1N, TI, TH, Te, T17, T2h, T1y, T1V, T10, T2g, T1x; E T1S, Tz, Ty, Tn, T1m, T2e, T1B, T22, T1f, T2d, T1A, T1Z, TQ, TR; TQ = Ci[WS(csi, 2)]; TR = Ci[WS(csi, 7)]; TS = FNMS(KP1_175570504, TR, KP1_902113032 * TQ); T1O = FMA(KP1_902113032, TR, KP1_175570504 * TQ); { E T1, T4, TN, T2, T3, TO; T1 = Cr[WS(csr, 12)]; T2 = Cr[WS(csr, 7)]; T3 = Cr[WS(csr, 2)]; T4 = T2 + T3; TN = KP1_118033988 * (T3 - T2); T5 = FMA(KP2_000000000, T4, T1); TO = FMS(KP500000000, T4, T1); TP = TN - TO; T1N = TO + TN; } { E T6, Td, T15, TU, T14, T11, TX, TY; T6 = Cr[WS(csr, 11)]; TI = Ci[WS(csi, 11)]; { E T7, T8, T9, Ta, Tb, Tc; T7 = Cr[WS(csr, 6)]; T8 = Cr[WS(csr, 8)]; T9 = T7 + T8; Ta = Cr[WS(csr, 1)]; Tb = Cr[WS(csr, 3)]; Tc = Ta + Tb; Td = T9 + Tc; T15 = Ta - Tb; TU = KP559016994 * (Tc - T9); T14 = T8 - T7; } { E TB, TC, TD, TE, TF, TG; TB = Ci[WS(csi, 6)]; TC = Ci[WS(csi, 8)]; TD = TB - TC; TE = Ci[WS(csi, 1)]; TF = Ci[WS(csi, 3)]; TG = TE - TF; TH = TD + TG; T11 = KP559016994 * (TD - TG); TX = TB + TC; TY = TE + TF; } Te = T6 + Td; { E T16, T1T, T13, T1U, T12; T16 = FMA(KP587785252, T14, KP951056516 * T15); T1T = FNMS(KP587785252, T15, KP951056516 * T14); T12 = FNMS(KP250000000, TH, TI); T13 = T11 - T12; T1U = T11 + T12; T17 = T13 - T16; T2h = T1T - T1U; T1y = T16 + T13; T1V = T1T + T1U; } { E TZ, T1R, TW, T1Q, TV; TZ = FNMS(KP951056516, TY, KP587785252 * TX); T1R = FMA(KP951056516, TX, KP587785252 * TY); TV = FMS(KP250000000, Td, T6); TW = TU - TV; T1Q = TV + TU; T10 = TW + TZ; T2g = T1Q + T1R; T1x = TZ - TW; T1S = T1Q - T1R; } } { E Tf, Tm, T1k, T19, T1j, T1g, T1c, T1d; Tf = Cr[WS(csr, 10)]; Tz = Ci[WS(csi, 10)]; { E Tg, Th, Ti, Tj, Tk, Tl; Tg = Cr[WS(csr, 5)]; Th = Cr[WS(csr, 9)]; Ti = Tg + Th; Tj = Cr[0]; Tk = Cr[WS(csr, 4)]; Tl = Tj + Tk; Tm = Ti + Tl; T1k = Tj - Tk; T19 = KP559016994 * (Tl - Ti); T1j = Th - Tg; } { E Ts, Tt, Tu, Tv, Tw, Tx; Ts = Ci[WS(csi, 4)]; Tt = Ci[0]; Tu = Ts - Tt; Tv = Ci[WS(csi, 5)]; Tw = Ci[WS(csi, 9)]; Tx = Tv - Tw; Ty = Tu - Tx; T1g = KP559016994 * (Tx + Tu); T1c = Tv + Tw; T1d = Tt + Ts; } Tn = Tf + Tm; { E T1l, T20, T1i, T21, T1h; T1l = FMA(KP587785252, T1j, KP951056516 * T1k); T20 = FNMS(KP587785252, T1k, KP951056516 * T1j); T1h = FMA(KP250000000, Ty, Tz); T1i = T1g - T1h; T21 = T1g + T1h; T1m = T1i - T1l; T2e = T21 - T20; T1B = T1l + T1i; T22 = T20 + T21; } { E T1e, T1Y, T1b, T1X, T1a; T1e = FNMS(KP951056516, T1d, KP587785252 * T1c); T1Y = FMA(KP951056516, T1c, KP587785252 * T1d); T1a = FMS(KP250000000, Tm, Tf); T1b = T19 - T1a; T1X = T1a + T19; T1f = T1b + T1e; T2d = T1X + T1Y; T1A = T1e - T1b; T1Z = T1X - T1Y; } } { E Tq, To, Tp, TK, TM, TA, TJ, TL, Tr; Tq = KP1_118033988 * (Tn - Te); To = Te + Tn; Tp = FMS(KP500000000, To, T5); TA = Ty - Tz; TJ = TH + TI; TK = FNMS(KP1_902113032, TJ, KP1_175570504 * TA); TM = FMA(KP1_175570504, TJ, KP1_902113032 * TA); R0[0] = FMA(KP2_000000000, To, T5); TL = Tq - Tp; R0[WS(rs, 5)] = TL + TM; R1[WS(rs, 7)] = TM - TL; Tr = Tp + Tq; R1[WS(rs, 2)] = Tr + TK; R0[WS(rs, 10)] = TK - Tr; } { E T2q, T2s, T2k, T2j, T2l, T2m, T2r, T2n; { E T2o, T2p, T2f, T2i; T2o = FNMS(KP904827052, T2d, KP425779291 * T2e); T2p = FNMS(KP535826794, T2h, KP844327925 * T2g); T2q = FNMS(KP1_902113032, T2p, KP1_175570504 * T2o); T2s = FMA(KP1_175570504, T2p, KP1_902113032 * T2o); T2k = T1N + T1O; T2f = FMA(KP425779291, T2d, KP904827052 * T2e); T2i = FMA(KP535826794, T2g, KP844327925 * T2h); T2j = T2f - T2i; T2l = FMA(KP500000000, T2j, T2k); T2m = KP1_118033988 * (T2i + T2f); } R0[WS(rs, 2)] = FMS(KP2_000000000, T2j, T2k); T2r = T2m - T2l; R0[WS(rs, 7)] = T2r + T2s; R1[WS(rs, 9)] = T2s - T2r; T2n = T2l + T2m; R1[WS(rs, 4)] = T2n + T2q; R0[WS(rs, 12)] = T2q - T2n; } { E T1u, T1w, TT, T1o, T1p, T1q, T1v, T1r; { E T1s, T1t, T18, T1n; T1s = FMA(KP481753674, T10, KP876306680 * T17); T1t = FMA(KP844327925, T1f, KP535826794 * T1m); T1u = FMA(KP1_902113032, T1s, KP1_175570504 * T1t); T1w = FNMS(KP1_175570504, T1s, KP1_902113032 * T1t); TT = TP - TS; T18 = FNMS(KP481753674, T17, KP876306680 * T10); T1n = FNMS(KP844327925, T1m, KP535826794 * T1f); T1o = T18 + T1n; T1p = FMS(KP500000000, T1o, TT); T1q = KP1_118033988 * (T1n - T18); } R0[WS(rs, 1)] = FMA(KP2_000000000, T1o, TT); T1v = T1q - T1p; R0[WS(rs, 6)] = T1v + T1w; R1[WS(rs, 8)] = T1w - T1v; T1r = T1p + T1q; R1[WS(rs, 3)] = T1r + T1u; R0[WS(rs, 11)] = T1u - T1r; } { E T1H, T1L, T1E, T1D, T1I, T1J, T1M, T1K; { E T1F, T1G, T1z, T1C; T1F = FNMS(KP062790519, T1B, KP998026728 * T1A); T1G = FNMS(KP684547105, T1x, KP728968627 * T1y); T1H = FNMS(KP1_902113032, T1G, KP1_175570504 * T1F); T1L = FMA(KP1_175570504, T1G, KP1_902113032 * T1F); T1E = TP + TS; T1z = FMA(KP728968627, T1x, KP684547105 * T1y); T1C = FMA(KP062790519, T1A, KP998026728 * T1B); T1D = T1z + T1C; T1I = FMA(KP500000000, T1D, T1E); T1J = KP1_118033988 * (T1C - T1z); } R1[WS(rs, 1)] = FMS(KP2_000000000, T1D, T1E); T1M = T1J - T1I; R0[WS(rs, 9)] = T1L - T1M; R1[WS(rs, 6)] = T1L + T1M; T1K = T1I + T1J; R1[WS(rs, 11)] = T1H - T1K; R0[WS(rs, 4)] = T1H + T1K; } { E T2a, T2c, T1P, T24, T25, T26, T2b, T27; { E T28, T29, T1W, T23; T28 = FMA(KP248689887, T1S, KP968583161 * T1V); T29 = FMA(KP481753674, T1Z, KP876306680 * T22); T2a = FMA(KP1_902113032, T28, KP1_175570504 * T29); T2c = FNMS(KP1_175570504, T28, KP1_902113032 * T29); T1P = T1N - T1O; T1W = FNMS(KP248689887, T1V, KP968583161 * T1S); T23 = FNMS(KP481753674, T22, KP876306680 * T1Z); T24 = T1W + T23; T25 = FMS(KP500000000, T24, T1P); T26 = KP1_118033988 * (T23 - T1W); } R1[0] = FMA(KP2_000000000, T24, T1P); T2b = T26 - T25; R1[WS(rs, 5)] = T2b + T2c; R0[WS(rs, 8)] = T2c - T2b; T27 = T25 + T26; R0[WS(rs, 3)] = T27 + T2a; R1[WS(rs, 10)] = T2a - T27; } } } }
static void hc2cf_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) { DK(KP707106781, +0.707106781186547524400844362104849039284835938); INT m; for (m = mb, W = W + ((mb - 1) * 14); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 14, MAKE_VOLATILE_STRIDE(rs)) { E T1g, T1f, T1e, Tm, T1q, T1o, T1p, TN, T1h, T1i; { E T1, T1m, T1l, T7, TS, Tk, TQ, Te, To, Tr, T17, TM, T12, Tu, TW; E Tp, Tx, Tt, Tq, Tw; { E T3, T6, T2, T5; T1 = Rp[0]; T1m = Rm[0]; T3 = Rp[WS(rs, 2)]; T6 = Rm[WS(rs, 2)]; T2 = W[6]; T5 = W[7]; { E Ta, Td, T9, Tc; { E Tg, Tj, Ti, TR, Th, T1k, T4, Tf; Tg = Rp[WS(rs, 3)]; Tj = Rm[WS(rs, 3)]; T1k = T2 * T6; T4 = T2 * T3; Tf = W[10]; Ti = W[11]; T1l = FNMS(T5, T3, T1k); T7 = FMA(T5, T6, T4); TR = Tf * Tj; Th = Tf * Tg; Ta = Rp[WS(rs, 1)]; Td = Rm[WS(rs, 1)]; TS = FNMS(Ti, Tg, TR); Tk = FMA(Ti, Tj, Th); T9 = W[2]; Tc = W[3]; } { E TB, TE, TH, T13, TC, TK, TG, TD, TJ, TP, Tb, TA, Tn; TB = Ip[WS(rs, 3)]; TE = Im[WS(rs, 3)]; TP = T9 * Td; Tb = T9 * Ta; TA = W[12]; TH = Ip[WS(rs, 1)]; TQ = FNMS(Tc, Ta, TP); Te = FMA(Tc, Td, Tb); T13 = TA * TE; TC = TA * TB; TK = Im[WS(rs, 1)]; TG = W[4]; TD = W[13]; TJ = W[5]; { E T14, TF, T16, TL, T15, TI; To = Ip[0]; T15 = TG * TK; TI = TG * TH; T14 = FNMS(TD, TB, T13); TF = FMA(TD, TE, TC); T16 = FNMS(TJ, TH, T15); TL = FMA(TJ, TK, TI); Tr = Im[0]; Tn = W[0]; T17 = T14 - T16; T1g = T14 + T16; TM = TF + TL; T12 = TF - TL; } Tu = Ip[WS(rs, 2)]; TW = Tn * Tr; Tp = Tn * To; Tx = Im[WS(rs, 2)]; Tt = W[8]; Tq = W[1]; Tw = W[9]; } } } { E T8, T1j, T1n, Tz, T1a, TU, Tl, T1b, T1c, T1v, T1t, T1w, T19, T1u, T1d; { E T1r, T10, TV, T1s, T11, T18; { E TO, TX, Ts, TZ, Ty, TT, TY, Tv; T8 = T1 + T7; TO = T1 - T7; TY = Tt * Tx; Tv = Tt * Tu; TX = FNMS(Tq, To, TW); Ts = FMA(Tq, Tr, Tp); TZ = FNMS(Tw, Tu, TY); Ty = FMA(Tw, Tx, Tv); TT = TQ - TS; T1j = TQ + TS; T1n = T1l + T1m; T1r = T1m - T1l; T10 = TX - TZ; T1f = TX + TZ; Tz = Ts + Ty; TV = Ts - Ty; T1a = TO - TT; TU = TO + TT; T1s = Te - Tk; Tl = Te + Tk; } T1b = T10 - TV; T11 = TV + T10; T18 = T12 - T17; T1c = T12 + T17; T1v = T1s + T1r; T1t = T1r - T1s; T1w = T18 - T11; T19 = T11 + T18; } Ip[WS(rs, 3)] = FMA(KP707106781, T1w, T1v); Im[0] = FMS(KP707106781, T1w, T1v); Rp[WS(rs, 1)] = FMA(KP707106781, T19, TU); Rm[WS(rs, 2)] = FNMS(KP707106781, T19, TU); T1u = T1b + T1c; T1d = T1b - T1c; Ip[WS(rs, 1)] = FMA(KP707106781, T1u, T1t); Im[WS(rs, 2)] = FMS(KP707106781, T1u, T1t); Rp[WS(rs, 3)] = FMA(KP707106781, T1d, T1a); Rm[0] = FNMS(KP707106781, T1d, T1a); T1e = T8 - Tl; Tm = T8 + Tl; T1q = T1n - T1j; T1o = T1j + T1n; T1p = TM - Tz; TN = Tz + TM; } } Ip[WS(rs, 2)] = T1p + T1q; Im[WS(rs, 1)] = T1p - T1q; Rp[0] = Tm + TN; Rm[WS(rs, 3)] = Tm - TN; T1h = T1f - T1g; T1i = T1f + T1g; Ip[0] = T1i + T1o; Im[WS(rs, 3)] = T1i - T1o; Rp[WS(rs, 2)] = T1e + T1h; Rm[WS(rs, 1)] = T1e - T1h; } }
static void r2cf_32(float *R0, float *R1, float *Cr, float *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) { DK(KP831469612, +0.831469612302545237078788377617905756738560812); DK(KP668178637, +0.668178637919298919997757686523080761552472251); DK(KP980785280, +0.980785280403230449126182236134239036973933731); DK(KP198912367, +0.198912367379658006911597622644676228597850501); DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP707106781, +0.707106781186547524400844362104849039284835938); DK(KP414213562, +0.414213562373095048801688724209698078569671875); INT i; for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) { E T1x, T1M, T1I, T1E, T1J, T1H; { E Tv, T1h, T7, T2b, Te, T2n, Ty, T1i, T1l, TF, T2d, Tt, T1k, TC, T2c; E Tm, T2j, T1Z, T2k, T22, TK, T1B, T19, T1C, T1e, TO, TV, T1T, TN, TP; E T2g, T1S; { E TD, Tp, Tq, Tr; { E T1, T2, T4, T5; T1 = R0[0]; T2 = R0[WS(rs, 8)]; T4 = R0[WS(rs, 4)]; T5 = R0[WS(rs, 12)]; { E Ta, Tw, Tx, Td, Tn, To; { E T8, T3, T6, T9, Tb, Tc; T8 = R0[WS(rs, 2)]; Tv = T1 - T2; T3 = T1 + T2; T1h = T4 - T5; T6 = T4 + T5; T9 = R0[WS(rs, 10)]; Tb = R0[WS(rs, 14)]; Tc = R0[WS(rs, 6)]; T7 = T3 + T6; T2b = T3 - T6; Ta = T8 + T9; Tw = T8 - T9; Tx = Tb - Tc; Td = Tb + Tc; } Tn = R0[WS(rs, 15)]; To = R0[WS(rs, 7)]; Te = Ta + Td; T2n = Td - Ta; Ty = Tw + Tx; T1i = Tx - Tw; TD = Tn - To; Tp = Tn + To; Tq = R0[WS(rs, 3)]; Tr = R0[WS(rs, 11)]; } } { E Tj, TA, Ti, Tk; { E Tg, Th, TE, Ts; Tg = R0[WS(rs, 1)]; Th = R0[WS(rs, 9)]; Tj = R0[WS(rs, 5)]; TE = Tq - Tr; Ts = Tq + Tr; TA = Tg - Th; Ti = Tg + Th; T1l = FNMS(KP414213562, TD, TE); TF = FMA(KP414213562, TE, TD); T2d = Tp - Ts; Tt = Tp + Ts; Tk = R0[WS(rs, 13)]; } { E T11, T15, T1c, T20, T14, T16, T1X, T1Y, T1Q, T1R; { E T1a, T1b, T12, T13; { E TZ, T10, TB, Tl; TZ = R1[WS(rs, 15)]; T10 = R1[WS(rs, 7)]; T1a = R1[WS(rs, 11)]; TB = Tj - Tk; Tl = Tj + Tk; T1X = TZ + T10; T11 = TZ - T10; T1k = FMA(KP414213562, TA, TB); TC = FNMS(KP414213562, TB, TA); T2c = Ti - Tl; Tm = Ti + Tl; T1b = R1[WS(rs, 3)]; } T12 = R1[WS(rs, 1)]; T13 = R1[WS(rs, 9)]; T15 = R1[WS(rs, 13)]; T1Y = T1b + T1a; T1c = T1a - T1b; T20 = T12 + T13; T14 = T12 - T13; T16 = R1[WS(rs, 5)]; } T2j = T1X - T1Y; T1Z = T1X + T1Y; { E TT, TU, TL, TM; { E TI, T21, T17, TJ, T18, T1d; TI = R1[0]; T21 = T15 + T16; T17 = T15 - T16; TJ = R1[WS(rs, 8)]; TT = R1[WS(rs, 4)]; T2k = T21 - T20; T22 = T20 + T21; T18 = T14 + T17; T1d = T17 - T14; T1Q = TI + TJ; TK = TI - TJ; T1B = FNMS(KP707106781, T18, T11); T19 = FMA(KP707106781, T18, T11); T1C = FNMS(KP707106781, T1d, T1c); T1e = FMA(KP707106781, T1d, T1c); TU = R1[WS(rs, 12)]; } TL = R1[WS(rs, 2)]; TM = R1[WS(rs, 10)]; TO = R1[WS(rs, 14)]; T1R = TT + TU; TV = TT - TU; T1T = TL + TM; TN = TL - TM; TP = R1[WS(rs, 6)]; } T2g = T1Q - T1R; T1S = T1Q + T1R; } } } { E T1P, T25, T23, T2h, T1W, T1y, TS, T1z, TX, T27, T2a; { E Tf, Tu, T29, T28; { E T1U, TQ, T1V, TR, TW; T1P = T7 - Te; Tf = T7 + Te; T1U = TO + TP; TQ = TO - TP; Tu = Tm + Tt; T25 = Tt - Tm; T23 = T1Z - T22; T29 = T1Z + T22; T2h = T1U - T1T; T1V = T1T + T1U; TR = TN + TQ; TW = TN - TQ; T27 = Tf + Tu; T1W = T1S - T1V; T28 = T1S + T1V; T1y = FNMS(KP707106781, TR, TK); TS = FMA(KP707106781, TR, TK); T1z = FNMS(KP707106781, TW, TV); TX = FMA(KP707106781, TW, TV); T2a = T28 + T29; } Cr[WS(csr, 8)] = Tf - Tu; Ci[WS(csi, 8)] = T29 - T28; } Cr[0] = T27 + T2a; Cr[WS(csr, 16)] = T27 - T2a; { E T2s, T2i, T2v, T2f, T2r, T2p, T2l, T2t; { E T2o, T2e, T26, T24; T2o = T2d - T2c; T2e = T2c + T2d; T2s = FNMS(KP414213562, T2g, T2h); T2i = FMA(KP414213562, T2h, T2g); T26 = T23 - T1W; T24 = T1W + T23; T2v = FNMS(KP707106781, T2e, T2b); T2f = FMA(KP707106781, T2e, T2b); T2r = FMA(KP707106781, T2o, T2n); T2p = FNMS(KP707106781, T2o, T2n); Ci[WS(csi, 4)] = FMA(KP707106781, T26, T25); Ci[WS(csi, 12)] = FMS(KP707106781, T26, T25); Cr[WS(csr, 4)] = FMA(KP707106781, T24, T1P); Cr[WS(csr, 12)] = FNMS(KP707106781, T24, T1P); T2l = FNMS(KP414213562, T2k, T2j); T2t = FMA(KP414213562, T2j, T2k); } { E T1v, T1G, TH, T1s, T1F, T1w, T1o, T1g, T1p, T1n; { E T1f, TY, T1t, T1u, T1j, T1m; { E Tz, TG, T1q, T1r; T1v = FNMS(KP707106781, Ty, Tv); Tz = FMA(KP707106781, Ty, Tv); { E T2q, T2m, T2w, T2u; T2q = T2l - T2i; T2m = T2i + T2l; T2w = T2t - T2s; T2u = T2s + T2t; Ci[WS(csi, 10)] = FMA(KP923879532, T2q, T2p); Ci[WS(csi, 6)] = FMS(KP923879532, T2q, T2p); Cr[WS(csr, 2)] = FMA(KP923879532, T2m, T2f); Cr[WS(csr, 14)] = FNMS(KP923879532, T2m, T2f); Cr[WS(csr, 10)] = FNMS(KP923879532, T2w, T2v); Cr[WS(csr, 6)] = FMA(KP923879532, T2w, T2v); Ci[WS(csi, 2)] = FMA(KP923879532, T2u, T2r); Ci[WS(csi, 14)] = FMS(KP923879532, T2u, T2r); TG = TC + TF; T1G = TF - TC; } T1f = FNMS(KP198912367, T1e, T19); T1q = FMA(KP198912367, T19, T1e); T1r = FMA(KP198912367, TS, TX); TY = FNMS(KP198912367, TX, TS); T1t = FNMS(KP923879532, TG, Tz); TH = FMA(KP923879532, TG, Tz); T1u = T1r + T1q; T1s = T1q - T1r; T1F = FMA(KP707106781, T1i, T1h); T1j = FNMS(KP707106781, T1i, T1h); T1m = T1k + T1l; T1w = T1k - T1l; } Cr[WS(csr, 7)] = FMA(KP980785280, T1u, T1t); T1o = T1f - TY; T1g = TY + T1f; T1p = FMA(KP923879532, T1m, T1j); T1n = FNMS(KP923879532, T1m, T1j); Cr[WS(csr, 9)] = FNMS(KP980785280, T1u, T1t); } Cr[WS(csr, 1)] = FMA(KP980785280, T1g, TH); Cr[WS(csr, 15)] = FNMS(KP980785280, T1g, TH); Ci[WS(csi, 1)] = FMS(KP980785280, T1s, T1p); Ci[WS(csi, 15)] = FMA(KP980785280, T1s, T1p); Ci[WS(csi, 9)] = FMS(KP980785280, T1o, T1n); Ci[WS(csi, 7)] = FMA(KP980785280, T1o, T1n); { E T1A, T1D, T1N, T1O, T1K, T1L; T1A = FMA(KP668178637, T1z, T1y); T1K = FNMS(KP668178637, T1y, T1z); T1L = FNMS(KP668178637, T1B, T1C); T1D = FMA(KP668178637, T1C, T1B); T1N = FNMS(KP923879532, T1w, T1v); T1x = FMA(KP923879532, T1w, T1v); T1O = T1K + T1L; T1M = T1K - T1L; Cr[WS(csr, 5)] = FNMS(KP831469612, T1O, T1N); T1I = T1D - T1A; T1E = T1A + T1D; T1J = FMA(KP923879532, T1G, T1F); T1H = FNMS(KP923879532, T1G, T1F); Cr[WS(csr, 11)] = FMA(KP831469612, T1O, T1N); } } } } } Ci[WS(csi, 3)] = FMA(KP831469612, T1M, T1J); Cr[WS(csr, 3)] = FMA(KP831469612, T1E, T1x); Ci[WS(csi, 13)] = FMS(KP831469612, T1M, T1J); Cr[WS(csr, 13)] = FNMS(KP831469612, T1E, T1x); Ci[WS(csi, 11)] = FMA(KP831469612, T1I, T1H); Ci[WS(csi, 5)] = FMS(KP831469612, T1I, T1H); } }
static void r2cf_25(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) { DK(KP998026728, +0.998026728428271561952336806863450553336905220); DK(KP125581039, +0.125581039058626752152356449131262266244969664); DK(KP1_996053456, +1.996053456856543123904673613726901106673810439); DK(KP062790519, +0.062790519529313376076178224565631133122484832); DK(KP809016994, +0.809016994374947424102293417182819058860154590); DK(KP309016994, +0.309016994374947424102293417182819058860154590); DK(KP1_369094211, +1.369094211857377347464566715242418539779038465); DK(KP728968627, +0.728968627421411523146730319055259111372571664); DK(KP963507348, +0.963507348203430549974383005744259307057084020); DK(KP876306680, +0.876306680043863587308115903922062583399064238); DK(KP497379774, +0.497379774329709576484567492012895936835134813); DK(KP968583161, +0.968583161128631119490168375464735813836012403); DK(KP684547105, +0.684547105928688673732283357621209269889519233); DK(KP1_457937254, +1.457937254842823046293460638110518222745143328); DK(KP481753674, +0.481753674101715274987191502872129653528542010); DK(KP1_752613360, +1.752613360087727174616231807844125166798128477); DK(KP248689887, +0.248689887164854788242283746006447968417567406); DK(KP1_937166322, +1.937166322257262238980336750929471627672024806); DK(KP992114701, +0.992114701314477831049793042785778521453036709); DK(KP250666467, +0.250666467128608490746237519633017587885836494); DK(KP425779291, +0.425779291565072648862502445744251703979973042); DK(KP1_809654104, +1.809654104932039055427337295865395187940827822); DK(KP1_274847979, +1.274847979497379420353425623352032390869834596); DK(KP770513242, +0.770513242775789230803009636396177847271667672); DK(KP844327925, +0.844327925502015078548558063966681505381659241); DK(KP1_071653589, +1.071653589957993236542617535735279956127150691); DK(KP125333233, +0.125333233564304245373118759816508793942918247); DK(KP1_984229402, +1.984229402628955662099586085571557042906073418); DK(KP904827052, +0.904827052466019527713668647932697593970413911); DK(KP851558583, +0.851558583130145297725004891488503407959946084); DK(KP637423989, +0.637423989748689710176712811676016195434917298); DK(KP1_541026485, +1.541026485551578461606019272792355694543335344); DK(KP535826794, +0.535826794978996618271308767867639978063575346); DK(KP1_688655851, +1.688655851004030157097116127933363010763318483); DK(KP293892626, +0.293892626146236564584352977319536384298826219); DK(KP475528258, +0.475528258147576786058219666689691071702849317); DK(KP250000000, +0.250000000000000000000000000000000000000000000); DK(KP559016994, +0.559016994374947424102293417182819058860154590); DK(KP587785252, +0.587785252292473129168705954639072768597652438); DK(KP951056516, +0.951056516295153572116439333379382143405698634); INT i; for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) { E T8, T1j, T1V, T1l, T7, T9, Ta, T12, T2u, T1O, T19, T1P, Ti, T2r, T1K; E Tp, T1L, Tx, T2q, T1H, TE, T1I, TN, T2t, T1R, TU, T1S, T6, T1k, T3; E T2s, T2v; T8 = R0[0]; { E T4, T5, T1, T2; T4 = R0[WS(rs, 5)]; T5 = R1[WS(rs, 7)]; T6 = T4 + T5; T1k = T4 - T5; T1 = R1[WS(rs, 2)]; T2 = R0[WS(rs, 10)]; T3 = T1 + T2; T1j = T1 - T2; } T1V = KP951056516 * T1k; T1l = FMA(KP951056516, T1j, KP587785252 * T1k); T7 = KP559016994 * (T3 - T6); T9 = T3 + T6; Ta = FNMS(KP250000000, T9, T8); { E T16, T13, T14, TY, T17, T11, T15, T18; T16 = R1[WS(rs, 1)]; { E TW, TX, TZ, T10; TW = R0[WS(rs, 4)]; TX = R1[WS(rs, 11)]; T13 = TW + TX; TZ = R1[WS(rs, 6)]; T10 = R0[WS(rs, 9)]; T14 = TZ + T10; TY = TW - TX; T17 = T13 + T14; T11 = TZ - T10; } T12 = FMA(KP475528258, TY, KP293892626 * T11); T2u = T16 + T17; T1O = FNMS(KP293892626, TY, KP475528258 * T11); T15 = KP559016994 * (T13 - T14); T18 = FNMS(KP250000000, T17, T16); T19 = T15 + T18; T1P = T18 - T15; } { E Tm, Tj, Tk, Te, Tn, Th, Tl, To; Tm = R1[0]; { E Tc, Td, Tf, Tg; Tc = R0[WS(rs, 3)]; Td = R1[WS(rs, 10)]; Tj = Tc + Td; Tf = R1[WS(rs, 5)]; Tg = R0[WS(rs, 8)]; Tk = Tf + Tg; Te = Tc - Td; Tn = Tj + Tk; Th = Tf - Tg; } Ti = FMA(KP475528258, Te, KP293892626 * Th); T2r = Tm + Tn; T1K = FNMS(KP293892626, Te, KP475528258 * Th); Tl = KP559016994 * (Tj - Tk); To = FNMS(KP250000000, Tn, Tm); Tp = Tl + To; T1L = To - Tl; } { E TB, Ty, Tz, Tt, TC, Tw, TA, TD; TB = R0[WS(rs, 2)]; { E Tr, Ts, Tu, Tv; Tr = R1[WS(rs, 4)]; Ts = R0[WS(rs, 12)]; Ty = Tr + Ts; Tu = R0[WS(rs, 7)]; Tv = R1[WS(rs, 9)]; Tz = Tu + Tv; Tt = Tr - Ts; TC = Ty + Tz; Tw = Tu - Tv; } Tx = FMA(KP475528258, Tt, KP293892626 * Tw); T2q = TB + TC; T1H = FNMS(KP293892626, Tt, KP475528258 * Tw); TA = KP559016994 * (Ty - Tz); TD = FNMS(KP250000000, TC, TB); TE = TA + TD; T1I = TD - TA; } { E TR, TO, TP, TJ, TS, TM, TQ, TT; TR = R0[WS(rs, 1)]; { E TH, TI, TK, TL; TH = R1[WS(rs, 3)]; TI = R0[WS(rs, 11)]; TO = TH + TI; TK = R0[WS(rs, 6)]; TL = R1[WS(rs, 8)]; TP = TK + TL; TJ = TH - TI; TS = TO + TP; TM = TK - TL; } TN = FMA(KP475528258, TJ, KP293892626 * TM); T2t = TR + TS; T1R = FNMS(KP293892626, TJ, KP475528258 * TM); TQ = KP559016994 * (TO - TP); TT = FNMS(KP250000000, TS, TR); TU = TQ + TT; T1S = TT - TQ; } T2s = T2q - T2r; T2v = T2t - T2u; Ci[WS(csi, 5)] = FNMS(KP587785252, T2v, KP951056516 * T2s); Ci[WS(csi, 10)] = FMA(KP587785252, T2s, KP951056516 * T2v); { E T2z, T2y, T2A, T2w, T2x, T2B; T2z = T8 + T9; T2w = T2r + T2q; T2x = T2t + T2u; T2y = KP559016994 * (T2w - T2x); T2A = T2w + T2x; Cr[0] = T2z + T2A; T2B = FNMS(KP250000000, T2A, T2z); Cr[WS(csr, 5)] = T2y + T2B; Cr[WS(csr, 10)] = T2B - T2y; } { E Tb, Tq, TF, TG, T1E, T1F, T1G, T1B, T1C, T1D, TV, T1a, T1b, T1o, T1r; E T1s, T1z, T1x, T1e, T1h, T1i, T1u, T1t; Tb = T7 + Ta; Tq = FMA(KP1_688655851, Ti, KP535826794 * Tp); TF = FMA(KP1_541026485, Tx, KP637423989 * TE); TG = Tq - TF; T1E = FMA(KP851558583, TN, KP904827052 * TU); T1F = FMA(KP1_984229402, T12, KP125333233 * T19); T1G = T1E + T1F; T1B = FNMS(KP844327925, Tp, KP1_071653589 * Ti); T1C = FNMS(KP1_274847979, Tx, KP770513242 * TE); T1D = T1B + T1C; TV = FNMS(KP425779291, TU, KP1_809654104 * TN); T1a = FNMS(KP992114701, T19, KP250666467 * T12); T1b = TV + T1a; { E T1m, T1n, T1p, T1q; T1m = FMA(KP1_937166322, Ti, KP248689887 * Tp); T1n = FMA(KP1_071653589, Tx, KP844327925 * TE); T1o = T1m + T1n; T1p = FMA(KP1_752613360, TN, KP481753674 * TU); T1q = FMA(KP1_457937254, T12, KP684547105 * T19); T1r = T1p + T1q; T1s = T1o + T1r; T1z = T1q - T1p; T1x = T1n - T1m; } { E T1c, T1d, T1f, T1g; T1c = FNMS(KP497379774, Ti, KP968583161 * Tp); T1d = FNMS(KP1_688655851, Tx, KP535826794 * TE); T1e = T1c + T1d; T1f = FNMS(KP963507348, TN, KP876306680 * TU); T1g = FNMS(KP1_369094211, T12, KP728968627 * T19); T1h = T1f + T1g; T1i = T1e + T1h; T1u = T1f - T1g; T1t = T1d - T1c; } Cr[WS(csr, 1)] = Tb + T1i; Ci[WS(csi, 1)] = -(T1l + T1s); Cr[WS(csr, 4)] = Tb + TG + T1b; Ci[WS(csi, 4)] = T1l + T1D - T1G; Ci[WS(csi, 9)] = FMA(KP309016994, T1D, T1l) + FMA(KP587785252, T1a - TV, KP809016994 * T1G) - (KP951056516 * (Tq + TF)); Cr[WS(csr, 9)] = FMA(KP309016994, TG, Tb) + FMA(KP951056516, T1B - T1C, KP587785252 * (T1F - T1E)) - (KP809016994 * T1b); { E T1v, T1w, T1y, T1A; T1v = FMS(KP250000000, T1s, T1l); T1w = KP559016994 * (T1r - T1o); Ci[WS(csi, 11)] = FMA(KP587785252, T1t, KP951056516 * T1u) + T1v - T1w; Ci[WS(csi, 6)] = FMA(KP951056516, T1t, T1v) + FNMS(KP587785252, T1u, T1w); T1y = FNMS(KP250000000, T1i, Tb); T1A = KP559016994 * (T1e - T1h); Cr[WS(csr, 11)] = FMA(KP587785252, T1x, T1y) + FNMA(KP951056516, T1z, T1A); Cr[WS(csr, 6)] = FMA(KP951056516, T1x, T1A) + FMA(KP587785252, T1z, T1y); } } { E T1W, T1X, T1J, T1M, T1N, T21, T22, T23, T1Q, T1T, T1U, T1Y, T1Z, T20, T26; E T29, T2a, T2k, T2j, T2l, T2m, T2d, T2o, T2i; T1W = FNMS(KP587785252, T1j, T1V); T1X = Ta - T7; T1J = FNMS(KP125333233, T1I, KP1_984229402 * T1H); T1M = FMA(KP1_457937254, T1K, KP684547105 * T1L); T1N = T1J - T1M; T21 = FNMS(KP1_996053456, T1R, KP062790519 * T1S); T22 = FMA(KP1_541026485, T1O, KP637423989 * T1P); T23 = T21 - T22; T1Q = FNMS(KP770513242, T1P, KP1_274847979 * T1O); T1T = FMA(KP125581039, T1R, KP998026728 * T1S); T1U = T1Q - T1T; T1Y = FNMS(KP1_369094211, T1K, KP728968627 * T1L); T1Z = FMA(KP250666467, T1H, KP992114701 * T1I); T20 = T1Y - T1Z; { E T24, T25, T27, T28; T24 = FNMS(KP481753674, T1L, KP1_752613360 * T1K); T25 = FMA(KP851558583, T1H, KP904827052 * T1I); T26 = T24 - T25; T27 = FNMS(KP844327925, T1S, KP1_071653589 * T1R); T28 = FNMS(KP998026728, T1P, KP125581039 * T1O); T29 = T27 + T28; T2a = T26 + T29; T2k = T27 - T28; T2j = T24 + T25; } { E T2b, T2c, T2g, T2h; T2b = FNMS(KP425779291, T1I, KP1_809654104 * T1H); T2c = FMA(KP963507348, T1K, KP876306680 * T1L); T2l = T2c + T2b; T2g = FMA(KP1_688655851, T1R, KP535826794 * T1S); T2h = FMA(KP1_996053456, T1O, KP062790519 * T1P); T2m = T2g + T2h; T2d = T2b - T2c; T2o = T2l + T2m; T2i = T2g - T2h; } Ci[WS(csi, 2)] = T1W + T2a; Cr[WS(csr, 2)] = T1X + T2o; Ci[WS(csi, 3)] = T1N + T1U - T1W; Cr[WS(csr, 3)] = T1X + T20 + T23; Cr[WS(csr, 8)] = FMA(KP309016994, T20, T1X) + FNMA(KP809016994, T23, KP587785252 * (T1T + T1Q)) - (KP951056516 * (T1M + T1J)); Ci[WS(csi, 8)] = FNMS(KP587785252, T21 + T22, KP309016994 * T1N) + FNMA(KP809016994, T1U, KP951056516 * (T1Y + T1Z)) - T1W; { E T2e, T2f, T2n, T2p; T2e = KP559016994 * (T26 - T29); T2f = FNMS(KP250000000, T2a, T1W); Ci[WS(csi, 7)] = FMA(KP951056516, T2d, T2e) + FNMS(KP587785252, T2i, T2f); Ci[WS(csi, 12)] = FMA(KP587785252, T2d, T2f) + FMS(KP951056516, T2i, T2e); T2n = KP559016994 * (T2l - T2m); T2p = FNMS(KP250000000, T2o, T1X); Cr[WS(csr, 7)] = FMA(KP951056516, T2j, KP587785252 * T2k) + T2n + T2p; Cr[WS(csr, 12)] = FMA(KP587785252, T2j, T2p) + FNMA(KP951056516, T2k, T2n); } } } }
static void hf_25(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) { DK(KP949179823, +0.949179823508441261575555465843363271711583843); DK(KP860541664, +0.860541664367944677098261680920518816412804187); DK(KP621716863, +0.621716863012209892444754556304102309693593202); DK(KP614372930, +0.614372930789563808870829930444362096004872855); DK(KP557913902, +0.557913902031834264187699648465567037992437152); DK(KP249506682, +0.249506682107067890488084201715862638334226305); DK(KP560319534, +0.560319534973832390111614715371676131169633784); DK(KP681693190, +0.681693190061530575150324149145440022633095390); DK(KP906616052, +0.906616052148196230441134447086066874408359177); DK(KP968479752, +0.968479752739016373193524836781420152702090879); DK(KP845997307, +0.845997307939530944175097360758058292389769300); DK(KP998026728, +0.998026728428271561952336806863450553336905220); DK(KP994076283, +0.994076283785401014123185814696322018529298887); DK(KP734762448, +0.734762448793050413546343770063151342619912334); DK(KP772036680, +0.772036680810363904029489473607579825330539880); DK(KP062914667, +0.062914667253649757225485955897349402364686947); DK(KP833417178, +0.833417178328688677408962550243238843138996060); DK(KP921177326, +0.921177326965143320250447435415066029359282231); DK(KP541454447, +0.541454447536312777046285590082819509052033189); DK(KP803003575, +0.803003575438660414833440593570376004635464850); DK(KP943557151, +0.943557151597354104399655195398983005179443399); DK(KP554608978, +0.554608978404018097464974850792216217022558774); DK(KP242145790, +0.242145790282157779872542093866183953459003101); DK(KP559154169, +0.559154169276087864842202529084232643714075927); DK(KP683113946, +0.683113946453479238701949862233725244439656928); DK(KP248028675, +0.248028675328619457762448260696444630363259177); DK(KP968583161, +0.968583161128631119490168375464735813836012403); DK(KP525970792, +0.525970792408939708442463226536226366643874659); DK(KP726211448, +0.726211448929902658173535992263577167607493062); DK(KP904730450, +0.904730450839922351881287709692877908104763647); DK(KP831864738, +0.831864738706457140726048799369896829771167132); DK(KP871714437, +0.871714437527667770979999223229522602943903653); DK(KP549754652, +0.549754652192770074288023275540779861653779767); DK(KP992114701, +0.992114701314477831049793042785778521453036709); DK(KP939062505, +0.939062505817492352556001843133229685779824606); DK(KP256756360, +0.256756360367726783319498520922669048172391148); DK(KP851038619, +0.851038619207379630836264138867114231259902550); DK(KP912575812, +0.912575812670962425556968549836277086778922727); DK(KP912018591, +0.912018591466481957908415381764119056233607330); DK(KP634619297, +0.634619297544148100711287640319130485732531031); DK(KP470564281, +0.470564281212251493087595091036643380879947982); DK(KP827271945, +0.827271945972475634034355757144307982555673741); DK(KP126329378, +0.126329378446108174786050455341811215027378105); DK(KP951056516, +0.951056516295153572116439333379382143405698634); DK(KP559016994, +0.559016994374947424102293417182819058860154590); DK(KP250000000, +0.250000000000000000000000000000000000000000000); DK(KP618033988, +0.618033988749894848204586834365638117720309180); INT m; for (m = mb, W = W + ((mb - 1) * 48); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 48, MAKE_VOLATILE_STRIDE(rs)) { E T7i, T6o, T6m, T7o, T7m, T7h, T6n, T6f, T7j, T7n; { E T6W, T5G, T3Y, T3M, T7q, T70, T6V, T7P, Tt, T3L, T5T, T45, T5Q, T4c, T3G; E T2G, T5P, T49, T5S, T42, T65, T4H, T68, T4A, T2Z, T11, T67, T4x, T64, T4E; E T5Y, T4W, T61, T4P, T3d, T1z, T60, T4M, T5X, T4T, T3g, T1G, T3q, T4q, T4j; E T26, T3i, T1M, T3k, T1S; { E T3u, T2e, T3E, T44, T4b, T2E, T3w, T2k, T3y, T2q; { E T1, T6R, T3P, T7, T3W, Tq, T9, Tc, Tb, T3U, Tk, T3Q, Ta; { E T3, T6, T2, T5; T1 = cr[0]; T6R = ci[0]; T3 = cr[WS(rs, 5)]; T6 = ci[WS(rs, 5)]; T2 = W[8]; T5 = W[9]; { E Tm, Tp, To, T3V, Tn, T3O, T4, Tl; Tm = cr[WS(rs, 15)]; Tp = ci[WS(rs, 15)]; T3O = T2 * T6; T4 = T2 * T3; Tl = W[28]; To = W[29]; T3P = FNMS(T5, T3, T3O); T7 = FMA(T5, T6, T4); T3V = Tl * Tp; Tn = Tl * Tm; { E Tg, Tj, Tf, Ti, T3T, Th, T8; Tg = cr[WS(rs, 10)]; Tj = ci[WS(rs, 10)]; T3W = FNMS(To, Tm, T3V); Tq = FMA(To, Tp, Tn); Tf = W[18]; Ti = W[19]; T9 = cr[WS(rs, 20)]; Tc = ci[WS(rs, 20)]; T3T = Tf * Tj; Th = Tf * Tg; T8 = W[38]; Tb = W[39]; T3U = FNMS(Ti, Tg, T3T); Tk = FMA(Ti, Tj, Th); T3Q = T8 * Tc; Ta = T8 * T9; } } } { E T6T, T3X, T6Y, Tr, T3R, Td; T6T = T3U + T3W; T3X = T3U - T3W; T6Y = Tk - Tq; Tr = Tk + Tq; T3R = FNMS(Tb, T9, T3Q); Td = FMA(Tb, Tc, Ta); { E T3S, T6Z, Te, T6U, T6S, Ts; T3S = T3P - T3R; T6S = T3P + T3R; T6Z = T7 - Td; Te = T7 + Td; T6W = T6S - T6T; T6U = T6S + T6T; T5G = FNMS(KP618033988, T3S, T3X); T3Y = FMA(KP618033988, T3X, T3S); T3M = Te - Tr; Ts = Te + Tr; T7q = FMA(KP618033988, T6Y, T6Z); T70 = FNMS(KP618033988, T6Z, T6Y); T6V = FNMS(KP250000000, T6U, T6R); T7P = T6U + T6R; Tt = T1 + Ts; T3L = FNMS(KP250000000, Ts, T1); } } } { E T2g, T2j, T2m, T3v, T2h, T2p, T2l, T2i, T2o, T3x, T2n; { E T2a, T2d, T29, T2c; T2a = cr[WS(rs, 3)]; T2d = ci[WS(rs, 3)]; T29 = W[4]; T2c = W[5]; { E T2t, T2w, T2z, T3A, T2u, T2C, T2y, T2v, T2B, T3t, T2b, T2s, T2f; T2t = cr[WS(rs, 13)]; T2w = ci[WS(rs, 13)]; T3t = T29 * T2d; T2b = T29 * T2a; T2s = W[24]; T2z = cr[WS(rs, 18)]; T3u = FNMS(T2c, T2a, T3t); T2e = FMA(T2c, T2d, T2b); T3A = T2s * T2w; T2u = T2s * T2t; T2C = ci[WS(rs, 18)]; T2y = W[34]; T2v = W[25]; T2B = W[35]; { E T3B, T2x, T3D, T2D, T3C, T2A; T2g = cr[WS(rs, 8)]; T3C = T2y * T2C; T2A = T2y * T2z; T3B = FNMS(T2v, T2t, T3A); T2x = FMA(T2v, T2w, T2u); T3D = FNMS(T2B, T2z, T3C); T2D = FMA(T2B, T2C, T2A); T2j = ci[WS(rs, 8)]; T2f = W[14]; T3E = T3B + T3D; T44 = T3D - T3B; T4b = T2x - T2D; T2E = T2x + T2D; } T2m = cr[WS(rs, 23)]; T3v = T2f * T2j; T2h = T2f * T2g; T2p = ci[WS(rs, 23)]; T2l = W[44]; T2i = W[15]; T2o = W[45]; } } T3x = T2l * T2p; T2n = T2l * T2m; T3w = FNMS(T2i, T2g, T3v); T2k = FMA(T2i, T2j, T2h); T3y = FNMS(T2o, T2m, T3x); T2q = FMA(T2o, T2p, T2n); } { E T2N, Tz, T2X, T4G, T4z, TZ, T2P, TF, T2R, TL; { E TB, TE, TH, T2O, TC, TK, TG, TD, TJ, T2Q, TI; { E Tv, Ty, Tu, Tx; { E T48, T41, T47, T40, T43, T3z; Tv = cr[WS(rs, 1)]; T43 = T3y - T3w; T3z = T3w + T3y; { E T4a, T2r, T3F, T2F; T4a = T2k - T2q; T2r = T2k + T2q; T5T = FNMS(KP618033988, T43, T44); T45 = FMA(KP618033988, T44, T43); T3F = T3z + T3E; T48 = T3E - T3z; T5Q = FNMS(KP618033988, T4a, T4b); T4c = FMA(KP618033988, T4b, T4a); T2F = T2r + T2E; T41 = T2E - T2r; T3G = T3u + T3F; T47 = FNMS(KP250000000, T3F, T3u); T2G = T2e + T2F; T40 = FNMS(KP250000000, T2F, T2e); Ty = ci[WS(rs, 1)]; } T5P = FMA(KP559016994, T48, T47); T49 = FNMS(KP559016994, T48, T47); T5S = FMA(KP559016994, T41, T40); T42 = FNMS(KP559016994, T41, T40); Tu = W[0]; } Tx = W[1]; { E TO, TR, TU, T2T, TP, TX, TT, TQ, TW, T2M, Tw, TN, TA; TO = cr[WS(rs, 11)]; TR = ci[WS(rs, 11)]; T2M = Tu * Ty; Tw = Tu * Tv; TN = W[20]; TU = cr[WS(rs, 16)]; T2N = FNMS(Tx, Tv, T2M); Tz = FMA(Tx, Ty, Tw); T2T = TN * TR; TP = TN * TO; TX = ci[WS(rs, 16)]; TT = W[30]; TQ = W[21]; TW = W[31]; { E T2U, TS, T2W, TY, T2V, TV; TB = cr[WS(rs, 6)]; T2V = TT * TX; TV = TT * TU; T2U = FNMS(TQ, TO, T2T); TS = FMA(TQ, TR, TP); T2W = FNMS(TW, TU, T2V); TY = FMA(TW, TX, TV); TE = ci[WS(rs, 6)]; TA = W[10]; T2X = T2U + T2W; T4G = T2W - T2U; T4z = TY - TS; TZ = TS + TY; } TH = cr[WS(rs, 21)]; T2O = TA * TE; TC = TA * TB; TK = ci[WS(rs, 21)]; TG = W[40]; TD = W[11]; TJ = W[41]; } } T2Q = TG * TK; TI = TG * TH; T2P = FNMS(TD, TB, T2O); TF = FMA(TD, TE, TC); T2R = FNMS(TJ, TH, T2Q); TL = FMA(TJ, TK, TI); } { E T31, T17, T3b, T4V, T4O, T1x, T33, T1d, T35, T1j; { E T19, T1c, T1f, T32, T1a, T1i, T1e, T1b, T1h, T34, T1g; { E T13, T16, T12, T15; { E T4w, T4D, T4v, T4C, T4F, T2S; T13 = cr[WS(rs, 4)]; T4F = T2P - T2R; T2S = T2P + T2R; { E T4y, TM, T2Y, T10; T4y = TL - TF; TM = TF + TL; T65 = FMA(KP618033988, T4F, T4G); T4H = FNMS(KP618033988, T4G, T4F); T2Y = T2S + T2X; T4w = T2S - T2X; T68 = FNMS(KP618033988, T4y, T4z); T4A = FMA(KP618033988, T4z, T4y); T10 = TM + TZ; T4D = TM - TZ; T2Z = T2N + T2Y; T4v = FNMS(KP250000000, T2Y, T2N); T11 = Tz + T10; T4C = FNMS(KP250000000, T10, Tz); T16 = ci[WS(rs, 4)]; } T67 = FNMS(KP559016994, T4w, T4v); T4x = FMA(KP559016994, T4w, T4v); T64 = FNMS(KP559016994, T4D, T4C); T4E = FMA(KP559016994, T4D, T4C); T12 = W[6]; } T15 = W[7]; { E T1m, T1p, T1s, T37, T1n, T1v, T1r, T1o, T1u, T30, T14, T1l, T18; T1m = cr[WS(rs, 14)]; T1p = ci[WS(rs, 14)]; T30 = T12 * T16; T14 = T12 * T13; T1l = W[26]; T1s = cr[WS(rs, 19)]; T31 = FNMS(T15, T13, T30); T17 = FMA(T15, T16, T14); T37 = T1l * T1p; T1n = T1l * T1m; T1v = ci[WS(rs, 19)]; T1r = W[36]; T1o = W[27]; T1u = W[37]; { E T38, T1q, T3a, T1w, T39, T1t; T19 = cr[WS(rs, 9)]; T39 = T1r * T1v; T1t = T1r * T1s; T38 = FNMS(T1o, T1m, T37); T1q = FMA(T1o, T1p, T1n); T3a = FNMS(T1u, T1s, T39); T1w = FMA(T1u, T1v, T1t); T1c = ci[WS(rs, 9)]; T18 = W[16]; T3b = T38 + T3a; T4V = T3a - T38; T4O = T1w - T1q; T1x = T1q + T1w; } T1f = cr[WS(rs, 24)]; T32 = T18 * T1c; T1a = T18 * T19; T1i = ci[WS(rs, 24)]; T1e = W[46]; T1b = W[17]; T1h = W[47]; } } T34 = T1e * T1i; T1g = T1e * T1f; T33 = FNMS(T1b, T19, T32); T1d = FMA(T1b, T1c, T1a); T35 = FNMS(T1h, T1f, T34); T1j = FMA(T1h, T1i, T1g); } { E T1I, T1L, T1O, T3h, T1J, T1R, T1N, T1K, T1Q, T3j, T1P; { E T1C, T1F, T1B, T1E; { E T4L, T4S, T4K, T4R, T4U, T36; T1C = cr[WS(rs, 2)]; T4U = T35 - T33; T36 = T33 + T35; { E T4N, T1k, T3c, T1y; T4N = T1j - T1d; T1k = T1d + T1j; T5Y = FNMS(KP618033988, T4U, T4V); T4W = FMA(KP618033988, T4V, T4U); T3c = T36 + T3b; T4L = T3b - T36; T61 = FNMS(KP618033988, T4N, T4O); T4P = FMA(KP618033988, T4O, T4N); T1y = T1k + T1x; T4S = T1k - T1x; T3d = T31 + T3c; T4K = FNMS(KP250000000, T3c, T31); T1z = T17 + T1y; T4R = FNMS(KP250000000, T1y, T17); T1F = ci[WS(rs, 2)]; } T60 = FMA(KP559016994, T4L, T4K); T4M = FNMS(KP559016994, T4L, T4K); T5X = FNMS(KP559016994, T4S, T4R); T4T = FMA(KP559016994, T4S, T4R); T1B = W[2]; } T1E = W[3]; { E T1V, T1Y, T21, T3m, T1W, T24, T20, T1X, T23, T3f, T1D, T1U, T1H; T1V = cr[WS(rs, 12)]; T1Y = ci[WS(rs, 12)]; T3f = T1B * T1F; T1D = T1B * T1C; T1U = W[22]; T21 = cr[WS(rs, 17)]; T3g = FNMS(T1E, T1C, T3f); T1G = FMA(T1E, T1F, T1D); T3m = T1U * T1Y; T1W = T1U * T1V; T24 = ci[WS(rs, 17)]; T20 = W[32]; T1X = W[23]; T23 = W[33]; { E T3n, T1Z, T3p, T25, T3o, T22; T1I = cr[WS(rs, 7)]; T3o = T20 * T24; T22 = T20 * T21; T3n = FNMS(T1X, T1V, T3m); T1Z = FMA(T1X, T1Y, T1W); T3p = FNMS(T23, T21, T3o); T25 = FMA(T23, T24, T22); T1L = ci[WS(rs, 7)]; T1H = W[12]; T3q = T3n + T3p; T4q = T3n - T3p; T4j = T25 - T1Z; T26 = T1Z + T25; } T1O = cr[WS(rs, 22)]; T3h = T1H * T1L; T1J = T1H * T1I; T1R = ci[WS(rs, 22)]; T1N = W[42]; T1K = W[13]; T1Q = W[43]; } } T3j = T1N * T1R; T1P = T1N * T1O; T3i = FNMS(T1K, T1I, T3h); T1M = FMA(T1K, T1L, T1J); T3k = FNMS(T1Q, T1O, T3j); T1S = FMA(T1Q, T1R, T1P); } } } } { E T7Q, T5M, T5J, T7R, T5I, T5L, T7X, T7W, T5F, T6X, T5u, T7M, T7O, T5C, T5E; E T5t, T7J, T7N; { E T4r, T4k, T4h, T4o, T3K, T3I, T1A, T2H, T28; { E T3e, T4g, T4n, T4f, T4m, T3H, T4p, T3l; T7Q = T2Z + T3d; T3e = T2Z - T3d; T4p = T3k - T3i; T3l = T3i + T3k; { E T4i, T1T, T3r, T27, T3s; T4i = T1S - T1M; T1T = T1M + T1S; T5M = FMA(KP618033988, T4p, T4q); T4r = FNMS(KP618033988, T4q, T4p); T3r = T3l + T3q; T4g = T3q - T3l; T5J = FNMS(KP618033988, T4i, T4j); T4k = FMA(KP618033988, T4j, T4i); T27 = T1T + T26; T4n = T26 - T1T; T3s = T3g + T3r; T4f = FNMS(KP250000000, T3r, T3g); T28 = T1G + T27; T4m = FNMS(KP250000000, T27, T1G); T3H = T3s - T3G; T7R = T3s + T3G; } T5I = FMA(KP559016994, T4g, T4f); T4h = FNMS(KP559016994, T4g, T4f); T5L = FMA(KP559016994, T4n, T4m); T4o = FNMS(KP559016994, T4n, T4m); T3K = FNMS(KP618033988, T3e, T3H); T3I = FMA(KP618033988, T3H, T3e); } T1A = T11 + T1z; T7X = T1z - T11; T7W = T28 - T2G; T2H = T28 + T2G; { E T3Z, T5d, T7r, T7D, T5h, T5i, T5m, T5l, T59, T7K, T56, T7L, T7I, T7G, T52; E T50, T5w, T5g, T5q, T5A, T3N, T7p; T3N = FMA(KP559016994, T3M, T3L); T5F = FNMS(KP559016994, T3M, T3L); T6X = FNMS(KP559016994, T6W, T6V); T7p = FMA(KP559016994, T6W, T6V); { E T5o, T5p, T57, T4e, T4Y, T55, T4l, T4s, T4B, T5f, T5e, T4I; { E T46, T2K, T2J, T4d, T2I; T46 = FMA(KP951056516, T45, T42); T5o = FNMS(KP951056516, T45, T42); T2I = T1A + T2H; T2K = T1A - T2H; T3Z = FNMS(KP951056516, T3Y, T3N); T5d = FMA(KP951056516, T3Y, T3N); T7r = FNMS(KP951056516, T7q, T7p); T7D = FMA(KP951056516, T7q, T7p); cr[0] = Tt + T2I; T2J = FNMS(KP250000000, T2I, Tt); T5p = FNMS(KP951056516, T4c, T49); T4d = FMA(KP951056516, T4c, T49); { E T4Q, T4X, T2L, T3J; T4Q = FNMS(KP951056516, T4P, T4M); T5h = FMA(KP951056516, T4P, T4M); T5i = FNMS(KP951056516, T4W, T4T); T4X = FMA(KP951056516, T4W, T4T); T2L = FMA(KP559016994, T2K, T2J); T3J = FNMS(KP559016994, T2K, T2J); T57 = FMA(KP126329378, T46, T4d); T4e = FNMS(KP126329378, T4d, T46); cr[WS(rs, 5)] = FMA(KP951056516, T3I, T2L); ci[WS(rs, 4)] = FNMS(KP951056516, T3I, T2L); ci[WS(rs, 9)] = FMA(KP951056516, T3K, T3J); cr[WS(rs, 10)] = FNMS(KP951056516, T3K, T3J); T4Y = FMA(KP827271945, T4X, T4Q); T55 = FNMS(KP827271945, T4Q, T4X); } } T4l = FNMS(KP951056516, T4k, T4h); T5m = FMA(KP951056516, T4k, T4h); T5l = FNMS(KP951056516, T4r, T4o); T4s = FMA(KP951056516, T4r, T4o); T4B = FNMS(KP951056516, T4A, T4x); T5f = FMA(KP951056516, T4A, T4x); T5e = FMA(KP951056516, T4H, T4E); T4I = FNMS(KP951056516, T4H, T4E); { E T4u, T4Z, T4t, T58; T4t = FNMS(KP470564281, T4s, T4l); T58 = FMA(KP470564281, T4l, T4s); { E T4J, T54, T7E, T7F; T4J = FMA(KP634619297, T4I, T4B); T54 = FNMS(KP634619297, T4B, T4I); T59 = FNMS(KP912018591, T58, T57); T7E = FMA(KP912018591, T58, T57); T7K = FMA(KP912018591, T4t, T4e); T4u = FNMS(KP912018591, T4t, T4e); T56 = FMA(KP912575812, T55, T54); T7F = FNMS(KP912575812, T55, T54); T7L = FMA(KP912575812, T4Y, T4J); T4Z = FNMS(KP912575812, T4Y, T4J); T7I = FNMS(KP851038619, T7F, T7E); T7G = FMA(KP851038619, T7F, T7E); } T52 = FMA(KP851038619, T4Z, T4u); T50 = FNMS(KP851038619, T4Z, T4u); } T5w = FNMS(KP256756360, T5e, T5f); T5g = FMA(KP256756360, T5f, T5e); T5q = FMA(KP939062505, T5p, T5o); T5A = FNMS(KP939062505, T5o, T5p); } { E T5y, T7z, T5B, T7y, T7w, T7u, T5s; { E T5k, T5r, T5j, T5x; cr[WS(rs, 4)] = FNMS(KP992114701, T50, T3Z); T5j = FMA(KP634619297, T5i, T5h); T5x = FNMS(KP634619297, T5h, T5i); { E T5n, T5z, T7s, T7t; T5n = FMA(KP549754652, T5m, T5l); T5z = FNMS(KP549754652, T5l, T5m); T5y = FMA(KP871714437, T5x, T5w); T7s = FNMS(KP871714437, T5x, T5w); T7z = FNMS(KP871714437, T5j, T5g); T5k = FMA(KP871714437, T5j, T5g); T5B = FNMS(KP831864738, T5A, T5z); T7t = FMA(KP831864738, T5A, T5z); T7y = FNMS(KP831864738, T5q, T5n); T5r = FMA(KP831864738, T5q, T5n); T7w = FNMS(KP904730450, T7t, T7s); T7u = FMA(KP904730450, T7t, T7s); } ci[WS(rs, 20)] = FNMS(KP992114701, T7G, T7D); T5u = FNMS(KP904730450, T5r, T5k); T5s = FMA(KP904730450, T5r, T5k); } { E T5a, T5c, T7A, T7C, T7v, T53, T5b, T51, T7H, T7x, T7B; T5a = FNMS(KP726211448, T59, T56); T5c = FMA(KP525970792, T56, T59); ci[WS(rs, 23)] = FMA(KP968583161, T7u, T7r); cr[WS(rs, 1)] = FMA(KP968583161, T5s, T5d); T51 = FMA(KP248028675, T50, T3Z); T7A = FNMS(KP683113946, T7z, T7y); T7C = FMA(KP559154169, T7y, T7z); T7v = FNMS(KP242145790, T7u, T7r); T53 = FMA(KP554608978, T52, T51); T5b = FNMS(KP554608978, T52, T51); T7M = FNMS(KP525970792, T7L, T7K); T7O = FMA(KP726211448, T7K, T7L); ci[WS(rs, 10)] = FNMS(KP943557151, T5c, T5b); ci[WS(rs, 5)] = FMA(KP943557151, T5c, T5b); ci[0] = FMA(KP803003575, T5a, T53); cr[WS(rs, 9)] = FNMS(KP803003575, T5a, T53); T7x = FNMS(KP541454447, T7w, T7v); T7B = FMA(KP541454447, T7w, T7v); T7H = FMA(KP248028675, T7G, T7D); cr[WS(rs, 21)] = -(FMA(KP921177326, T7C, T7B)); ci[WS(rs, 18)] = FNMS(KP921177326, T7C, T7B); ci[WS(rs, 13)] = FMA(KP833417178, T7A, T7x); cr[WS(rs, 16)] = FMS(KP833417178, T7A, T7x); T5C = FMA(KP559154169, T5B, T5y); T5E = FNMS(KP683113946, T5y, T5B); T5t = FNMS(KP242145790, T5s, T5d); T7J = FNMS(KP554608978, T7I, T7H); T7N = FMA(KP554608978, T7I, T7H); } } } } { E T7Y, T80, T5v, T5D; cr[WS(rs, 24)] = -(FMA(KP803003575, T7O, T7N)); ci[WS(rs, 15)] = FNMS(KP803003575, T7O, T7N); cr[WS(rs, 19)] = FMS(KP943557151, T7M, T7J); cr[WS(rs, 14)] = -(FMA(KP943557151, T7M, T7J)); T5v = FMA(KP541454447, T5u, T5t); T5D = FNMS(KP541454447, T5u, T5t); cr[WS(rs, 11)] = FNMS(KP833417178, T5E, T5D); ci[WS(rs, 8)] = FMA(KP833417178, T5E, T5D); cr[WS(rs, 6)] = FMA(KP921177326, T5C, T5v); ci[WS(rs, 3)] = FNMS(KP921177326, T5C, T5v); T7Y = FMA(KP618033988, T7X, T7W); T80 = FNMS(KP618033988, T7W, T7X); { E T6t, T6p, T5H, T7d, T71, T6u, T6y, T6x, T6l, T7k, T6i, T7l, T7g, T6c, T6e; E T6s, T6L, T6J, T6C; { E T6A, T6B, T5O, T6j, T6h, T6a, T6q, T5R, T5U, T6r, T5Z, T62; { E T5K, T7U, T7T, T5N, T7S; T6t = FNMS(KP951056516, T5J, T5I); T5K = FMA(KP951056516, T5J, T5I); T7U = T7Q - T7R; T7S = T7Q + T7R; T6p = FNMS(KP951056516, T5G, T5F); T5H = FMA(KP951056516, T5G, T5F); T7d = FNMS(KP951056516, T70, T6X); T71 = FMA(KP951056516, T70, T6X); ci[WS(rs, 24)] = T7S + T7P; T7T = FNMS(KP250000000, T7S, T7P); T5N = FMA(KP951056516, T5M, T5L); T6u = FNMS(KP951056516, T5M, T5L); { E T66, T69, T7Z, T7V; T6A = FMA(KP951056516, T65, T64); T66 = FNMS(KP951056516, T65, T64); T69 = FMA(KP951056516, T68, T67); T6B = FNMS(KP951056516, T68, T67); T7Z = FMA(KP559016994, T7U, T7T); T7V = FNMS(KP559016994, T7U, T7T); T5O = FMA(KP062914667, T5N, T5K); T6j = FNMS(KP062914667, T5K, T5N); ci[WS(rs, 14)] = FMA(KP951056516, T7Y, T7V); cr[WS(rs, 15)] = FMS(KP951056516, T7Y, T7V); ci[WS(rs, 19)] = FMA(KP951056516, T80, T7Z); cr[WS(rs, 20)] = FMS(KP951056516, T80, T7Z); T6h = FNMS(KP939062505, T66, T69); T6a = FMA(KP939062505, T69, T66); } } T6q = FMA(KP951056516, T5Q, T5P); T5R = FNMS(KP951056516, T5Q, T5P); T5U = FNMS(KP951056516, T5T, T5S); T6r = FMA(KP951056516, T5T, T5S); T6y = FMA(KP951056516, T5Y, T5X); T5Z = FNMS(KP951056516, T5Y, T5X); T62 = FMA(KP951056516, T61, T60); T6x = FNMS(KP951056516, T61, T60); { E T5W, T6b, T6k, T5V; T6k = FMA(KP827271945, T5R, T5U); T5V = FNMS(KP827271945, T5U, T5R); { E T6g, T63, T7e, T7f; T6g = FMA(KP126329378, T5Z, T62); T63 = FNMS(KP126329378, T62, T5Z); T7e = FMA(KP772036680, T6k, T6j); T6l = FNMS(KP772036680, T6k, T6j); T5W = FMA(KP772036680, T5V, T5O); T7k = FNMS(KP772036680, T5V, T5O); T7f = FNMS(KP734762448, T6h, T6g); T6i = FMA(KP734762448, T6h, T6g); T6b = FNMS(KP734762448, T6a, T63); T7l = FMA(KP734762448, T6a, T63); T7g = FMA(KP994076283, T7f, T7e); T7i = FNMS(KP994076283, T7f, T7e); } T6c = FNMS(KP994076283, T6b, T5W); T6e = FMA(KP994076283, T6b, T5W); } T6s = FMA(KP062914667, T6r, T6q); T6L = FNMS(KP062914667, T6q, T6r); T6J = FNMS(KP549754652, T6A, T6B); T6C = FMA(KP549754652, T6B, T6A); } { E T6N, T78, T6K, T79, T74, T76, T6E, T6G; { E T6w, T6D, T6M, T6v; cr[WS(rs, 3)] = FMA(KP998026728, T6c, T5H); T6M = FNMS(KP634619297, T6t, T6u); T6v = FMA(KP634619297, T6u, T6t); { E T6I, T6z, T72, T73; T6I = FMA(KP470564281, T6x, T6y); T6z = FNMS(KP470564281, T6y, T6x); T72 = FMA(KP845997307, T6M, T6L); T6N = FNMS(KP845997307, T6M, T6L); T6w = FMA(KP845997307, T6v, T6s); T78 = FNMS(KP845997307, T6v, T6s); T73 = FNMS(KP968479752, T6J, T6I); T6K = FMA(KP968479752, T6J, T6I); T6D = FMA(KP968479752, T6C, T6z); T79 = FNMS(KP968479752, T6C, T6z); T74 = FMA(KP906616052, T73, T72); T76 = FNMS(KP906616052, T73, T72); } ci[WS(rs, 21)] = FNMS(KP998026728, T7g, T7d); T6E = FMA(KP906616052, T6D, T6w); T6G = FNMS(KP906616052, T6D, T6w); } { E T7c, T7a, T6Q, T6O, T6F, T7b, T77, T75, T6d, T6P, T6H; T7c = FMA(KP681693190, T78, T79); T7a = FNMS(KP560319534, T79, T78); ci[WS(rs, 22)] = FNMS(KP998026728, T74, T71); cr[WS(rs, 2)] = FMA(KP998026728, T6E, T6p); T75 = FMA(KP249506682, T74, T71); T6Q = FNMS(KP560319534, T6K, T6N); T6O = FMA(KP681693190, T6N, T6K); T6F = FNMS(KP249506682, T6E, T6p); T7b = FMA(KP557913902, T76, T75); T77 = FNMS(KP557913902, T76, T75); T6o = FMA(KP614372930, T6i, T6l); T6m = FNMS(KP621716863, T6l, T6i); cr[WS(rs, 22)] = FMS(KP860541664, T7c, T7b); ci[WS(rs, 17)] = FMA(KP860541664, T7c, T7b); ci[WS(rs, 12)] = FNMS(KP949179823, T7a, T77); cr[WS(rs, 17)] = -(FMA(KP949179823, T7a, T77)); T6P = FMA(KP557913902, T6G, T6F); T6H = FNMS(KP557913902, T6G, T6F); T6d = FNMS(KP249506682, T6c, T5H); ci[WS(rs, 7)] = FMA(KP949179823, T6Q, T6P); cr[WS(rs, 12)] = FNMS(KP949179823, T6Q, T6P); cr[WS(rs, 7)] = FMA(KP860541664, T6O, T6H); ci[WS(rs, 2)] = FNMS(KP860541664, T6O, T6H); T7o = FMA(KP621716863, T7k, T7l); T7m = FNMS(KP614372930, T7l, T7k); T7h = FMA(KP249506682, T7g, T7d); T6n = FMA(KP557913902, T6e, T6d); T6f = FNMS(KP557913902, T6e, T6d); } } } } } } ci[WS(rs, 6)] = FNMS(KP949179823, T6o, T6n); ci[WS(rs, 11)] = FMA(KP949179823, T6o, T6n); cr[WS(rs, 8)] = FMA(KP943557151, T6m, T6f); ci[WS(rs, 1)] = FNMS(KP943557151, T6m, T6f); T7j = FNMS(KP557913902, T7i, T7h); T7n = FMA(KP557913902, T7i, T7h); cr[WS(rs, 23)] = -(FMA(KP943557151, T7o, T7n)); ci[WS(rs, 16)] = FNMS(KP943557151, T7o, T7n); cr[WS(rs, 18)] = FMS(KP949179823, T7m, T7j); cr[WS(rs, 13)] = -(FMA(KP949179823, T7m, T7j)); } }
static void hb_9(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) { DK(KP954188894, +0.954188894138671133499268364187245676532219158); DK(KP852868531, +0.852868531952443209628250963940074071936020296); DK(KP492403876, +0.492403876506104029683371512294761506835321626); DK(KP984807753, +0.984807753012208059366743024589523013670643252); DK(KP777861913, +0.777861913430206160028177977318626690410586096); DK(KP839099631, +0.839099631177280011763127298123181364687434283); DK(KP363970234, +0.363970234266202361351047882776834043890471784); DK(KP176326980, +0.176326980708464973471090386868618986121633062); DK(KP866025403, +0.866025403784438646763723170752936183471402627); DK(KP500000000, +0.500000000000000000000000000000000000000000000); { INT m; for (m = mb, W = W + ((mb - 1) * 16); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 16, MAKE_VOLATILE_STRIDE(18, rs)) { E T1X, T1S, T1U, T1P, T1Y, T1T; { E T5, Tl, TQ, T1y, T1b, T1J, Tg, TE, TW, T13, T10, Tz, Tw, TT, T1K; E T1B, T1L, T1E; { E T1, Th, T2, T3, Ti, Tj; T1 = cr[0]; Th = ci[WS(rs, 8)]; T2 = cr[WS(rs, 3)]; T3 = ci[WS(rs, 2)]; Ti = ci[WS(rs, 5)]; Tj = cr[WS(rs, 6)]; { E T12, Tb, TZ, TY, Ta, Tq, T11, Tr, Ts, TS, Te, Tt; { E T6, Tm, Tn, To, T9, Tc, Td, Tp; { E T7, T8, T1a, T4; T6 = cr[WS(rs, 1)]; T1a = T2 - T3; T4 = T2 + T3; { E TP, Tk, TO, T19; TP = Ti + Tj; Tk = Ti - Tj; T7 = cr[WS(rs, 4)]; T5 = T1 + T4; TO = FNMS(KP500000000, T4, T1); Tl = Th + Tk; T19 = FNMS(KP500000000, Tk, Th); TQ = FNMS(KP866025403, TP, TO); T1y = FMA(KP866025403, TP, TO); T1b = FMA(KP866025403, T1a, T19); T1J = FNMS(KP866025403, T1a, T19); T8 = ci[WS(rs, 1)]; } Tm = ci[WS(rs, 7)]; Tn = ci[WS(rs, 4)]; To = cr[WS(rs, 7)]; T9 = T7 + T8; T12 = T7 - T8; } Tb = cr[WS(rs, 2)]; TZ = Tn + To; Tp = Tn - To; TY = FNMS(KP500000000, T9, T6); Ta = T6 + T9; Tc = ci[WS(rs, 3)]; Td = ci[0]; Tq = Tm + Tp; T11 = FMS(KP500000000, Tp, Tm); Tr = ci[WS(rs, 6)]; Ts = cr[WS(rs, 5)]; TS = Td - Tc; Te = Tc + Td; Tt = cr[WS(rs, 8)]; } { E T1C, Tv, TR, T1D, T1z, T1A; { E TU, Tu, TV, Tf; TU = FNMS(KP500000000, Te, Tb); Tf = Tb + Te; Tu = Ts + Tt; TV = Ts - Tt; Tg = Ta + Tf; TE = Ta - Tf; TW = FMA(KP866025403, TV, TU); T1C = FNMS(KP866025403, TV, TU); Tv = Tr - Tu; TR = FMA(KP500000000, Tu, Tr); } T1z = FMA(KP866025403, T12, T11); T13 = FNMS(KP866025403, T12, T11); T10 = FNMS(KP866025403, TZ, TY); T1A = FMA(KP866025403, TZ, TY); Tz = Tv - Tq; Tw = Tq + Tv; T1D = FMA(KP866025403, TS, TR); TT = FNMS(KP866025403, TS, TR); T1K = FNMS(KP176326980, T1z, T1A); T1B = FMA(KP176326980, T1A, T1z); T1L = FNMS(KP363970234, T1C, T1D); T1E = FMA(KP363970234, T1D, T1C); } } } { E T1d, T14, T1c, TX; cr[0] = T5 + Tg; T1d = FNMS(KP839099631, T10, T13); T14 = FMA(KP839099631, T13, T10); T1c = FMA(KP176326980, TT, TW); TX = FNMS(KP176326980, TW, TT); ci[0] = Tl + Tw; { E TL, TK, TJ, Ty, TD; Ty = FNMS(KP500000000, Tg, T5); TD = FNMS(KP500000000, Tw, Tl); { E Tx, TC, TA, TI, TF; Tx = W[10]; TC = W[11]; TA = FNMS(KP866025403, Tz, Ty); TI = FMA(KP866025403, Tz, Ty); TF = FNMS(KP866025403, TE, TD); TL = FMA(KP866025403, TE, TD); { E TH, TB, TG, TM; TH = W[4]; TB = Tx * TA; TK = W[5]; TG = Tx * TF; TM = TH * TL; TJ = TH * TI; cr[WS(rs, 6)] = FNMS(TC, TF, TB); ci[WS(rs, 6)] = FMA(TC, TA, TG); ci[WS(rs, 3)] = FMA(TK, TI, TM); } } cr[WS(rs, 3)] = FNMS(TK, TL, TJ); { E T1k, T1p, T1l, T1q, T1m; { E T1e, T1j, T15, T1o; T1e = FNMS(KP777861913, T1d, T1c); T1j = FMA(KP777861913, T1d, T1c); T15 = FNMS(KP777861913, T14, TX); T1o = FMA(KP777861913, T14, TX); { E TN, T16, T1f, T17, T1s, T1v, T18, T1i, T1n, T1r, T1u; TN = W[0]; T16 = FNMS(KP984807753, T15, TQ); T1i = FMA(KP492403876, T15, TQ); T1f = FMA(KP984807753, T1e, T1b); T1n = FNMS(KP492403876, T1e, T1b); T17 = TN * T16; T1s = FMA(KP852868531, T1j, T1i); T1k = FNMS(KP852868531, T1j, T1i); T1v = FMA(KP852868531, T1o, T1n); T1p = FNMS(KP852868531, T1o, T1n); T18 = W[1]; T1r = W[6]; T1u = W[7]; { E T1h, T1g, T1w, T1t; T1h = W[12]; cr[WS(rs, 1)] = FNMS(T18, T1f, T17); T1g = T18 * T16; T1w = T1r * T1v; T1t = T1r * T1s; T1l = T1h * T1k; ci[WS(rs, 1)] = FMA(TN, T1f, T1g); ci[WS(rs, 4)] = FMA(T1u, T1s, T1w); cr[WS(rs, 4)] = FNMS(T1u, T1v, T1t); T1q = T1h * T1p; } T1m = W[13]; } } { E T1F, T1W, T1R, T1V, T1N, T1M, T1x, T1I; T1F = FNMS(KP954188894, T1E, T1B); T1W = FMA(KP954188894, T1E, T1B); T1M = FNMS(KP954188894, T1L, T1K); T1R = FMA(KP954188894, T1L, T1K); ci[WS(rs, 7)] = FMA(T1m, T1k, T1q); cr[WS(rs, 7)] = FNMS(T1m, T1p, T1l); T1V = FNMS(KP492403876, T1M, T1J); T1N = FMA(KP984807753, T1M, T1J); T1x = W[2]; T1I = W[3]; { E T23, T22, T20, T1Z, T24, T21; T1X = FMA(KP852868531, T1W, T1V); T23 = FNMS(KP852868531, T1W, T1V); { E T1G, T1Q, T1O, T1H; T1G = FMA(KP984807753, T1F, T1y); T1Q = FNMS(KP492403876, T1F, T1y); T1O = T1x * T1N; T22 = W[15]; T1H = T1x * T1G; T20 = FMA(KP852868531, T1R, T1Q); T1S = FNMS(KP852868531, T1R, T1Q); ci[WS(rs, 2)] = FMA(T1I, T1G, T1O); cr[WS(rs, 2)] = FNMS(T1I, T1N, T1H); T1Z = W[14]; T24 = T22 * T20; } T1U = W[9]; T21 = T1Z * T20; ci[WS(rs, 8)] = FMA(T1Z, T23, T24); T1P = W[8]; T1Y = T1U * T1S; cr[WS(rs, 8)] = FNMS(T22, T23, T21); } } } } } } T1T = T1P * T1S; ci[WS(rs, 5)] = FMA(T1P, T1X, T1Y); cr[WS(rs, 5)] = FNMS(T1U, T1X, T1T); } } }
static void e01_8(const R *I, R *O, stride is, stride os, INT v, INT ivs, INT ovs) { DK(KP1_961570560, +1.961570560806460898252364472268478073947867462); DK(KP1_662939224, +1.662939224605090474157576755235811513477121624); DK(KP198912367, +0.198912367379658006911597622644676228597850501); DK(KP668178637, +0.668178637919298919997757686523080761552472251); DK(KP707106781, +0.707106781186547524400844362104849039284835938); DK(KP1_847759065, +1.847759065022573512256366378793576573644833252); DK(KP414213562, +0.414213562373095048801688724209698078569671875); DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); INT i; for (i = v; i > 0; i = i - 1, I = I + ivs, O = O + ovs, MAKE_VOLATILE_STRIDE(is), MAKE_VOLATILE_STRIDE(os)) { E T8, Td, Th, T7, Tp, Tl, Te, Tb; { E Tj, T3, Tk, T6, T9, Ta; { E T1, T2, T4, T5; T1 = I[0]; T2 = I[WS(is, 4)]; T4 = I[WS(is, 2)]; T5 = I[WS(is, 6)]; T8 = I[WS(is, 1)]; Tj = FNMS(KP1_414213562, T2, T1); T3 = FMA(KP1_414213562, T2, T1); Tk = FMS(KP414213562, T4, T5); T6 = FMA(KP414213562, T5, T4); Td = I[WS(is, 7)]; T9 = I[WS(is, 5)]; Ta = I[WS(is, 3)]; } Th = FNMS(KP1_847759065, T6, T3); T7 = FMA(KP1_847759065, T6, T3); Tp = FNMS(KP1_847759065, Tk, Tj); Tl = FMA(KP1_847759065, Tk, Tj); Te = Ta - T9; Tb = T9 + Ta; } { E Tn, Tf, Tc, Tm; Tn = FNMS(KP707106781, Te, Td); Tf = FMA(KP707106781, Te, Td); Tc = FMA(KP707106781, Tb, T8); Tm = FNMS(KP707106781, Tb, T8); { E Tq, To, Tg, Ti; Tq = FMA(KP668178637, Tm, Tn); To = FNMS(KP668178637, Tn, Tm); Tg = FMA(KP198912367, Tf, Tc); Ti = FNMS(KP198912367, Tc, Tf); O[WS(os, 1)] = FMA(KP1_662939224, To, Tl); O[WS(os, 6)] = FNMS(KP1_662939224, To, Tl); O[WS(os, 2)] = FMA(KP1_662939224, Tq, Tp); O[WS(os, 5)] = FNMS(KP1_662939224, Tq, Tp); O[WS(os, 4)] = FMA(KP1_961570560, Ti, Th); O[WS(os, 3)] = FNMS(KP1_961570560, Ti, Th); O[0] = FMA(KP1_961570560, Tg, T7); O[WS(os, 7)] = FNMS(KP1_961570560, Tg, T7); } } } }
static void r2cfII_10(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) { DK(KP951056516, +0.951056516295153572116439333379382143405698634); DK(KP559016994, +0.559016994374947424102293417182819058860154590); DK(KP250000000, +0.250000000000000000000000000000000000000000000); DK(KP618033988, +0.618033988749894848204586834365638117720309180); INT i; for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) { E Tq, Ti, Tk, Tu, Tw, Tp, Tb, Tj, Tr, Tv; { E T1, To, Ts, Tt, T8, Ta, Te, Tm, Tl, Th, Tn, T9; T1 = R0[0]; To = R1[WS(rs, 2)]; { E T2, T3, T5, T6; T2 = R0[WS(rs, 2)]; T3 = R0[WS(rs, 3)]; T5 = R0[WS(rs, 4)]; T6 = R0[WS(rs, 1)]; { E Tc, T4, T7, Td, Tf, Tg; Tc = R1[0]; Ts = T2 + T3; T4 = T2 - T3; Tt = T5 + T6; T7 = T5 - T6; Td = R1[WS(rs, 4)]; Tf = R1[WS(rs, 1)]; Tg = R1[WS(rs, 3)]; T8 = T4 + T7; Ta = T4 - T7; Te = Tc - Td; Tm = Tc + Td; Tl = Tf + Tg; Th = Tf - Tg; } } Cr[WS(csr, 2)] = T1 + T8; Tn = Tl - Tm; Tq = Tm + Tl; Ti = FMA(KP618033988, Th, Te); Tk = FNMS(KP618033988, Te, Th); Ci[WS(csi, 2)] = Tn - To; T9 = FNMS(KP250000000, T8, T1); Tu = FMA(KP618033988, Tt, Ts); Tw = FNMS(KP618033988, Ts, Tt); Tp = FMA(KP250000000, Tn, To); Tb = FMA(KP559016994, Ta, T9); Tj = FNMS(KP559016994, Ta, T9); } Tr = FMA(KP559016994, Tq, Tp); Tv = FNMS(KP559016994, Tq, Tp); Cr[WS(csr, 1)] = FNMS(KP951056516, Tk, Tj); Cr[WS(csr, 3)] = FMA(KP951056516, Tk, Tj); Cr[0] = FMA(KP951056516, Ti, Tb); Cr[WS(csr, 4)] = FNMS(KP951056516, Ti, Tb); Ci[WS(csi, 1)] = FNMS(KP951056516, Tw, Tv); Ci[WS(csi, 3)] = FMA(KP951056516, Tw, Tv); Ci[WS(csi, 4)] = FMS(KP951056516, Tu, Tr); Ci[0] = -(FMA(KP951056516, Tu, Tr)); } }
static void r2cfII_12(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) { DK(KP707106781, +0.707106781186547524400844362104849039284835938); DK(KP866025403, +0.866025403784438646763723170752936183471402627); DK(KP500000000, +0.500000000000000000000000000000000000000000000); INT i; for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) { E TD, TB, Tp, T9, Tq, Tr, TE, To, Ts, TC; { E T8, T1, Tv, Tm, TF, Tz, Tl, Ta, Tb, Tt, TA, T4, Tc; { E Tx, Th, Ti, Tj, Ty, T6, T7, T2, T3, Tk; Tx = R0[WS(rs, 3)]; T6 = R0[WS(rs, 5)]; T7 = R0[WS(rs, 1)]; Th = R1[WS(rs, 4)]; Ti = R1[WS(rs, 2)]; Tj = R1[0]; Ty = T6 + T7; T8 = T6 - T7; T1 = R0[0]; Tv = Ti - Tj - Th; Tk = Ti - Tj; Tm = Ti + Tj; TF = Tx - Ty; Tz = FMA(KP500000000, Ty, Tx); T2 = R0[WS(rs, 2)]; T3 = R0[WS(rs, 4)]; Tl = FMA(KP500000000, Tk, Th); Ta = R1[WS(rs, 1)]; Tb = R1[WS(rs, 3)]; Tt = T1 + T3 - T2; TA = T3 + T2; T4 = T2 - T3; Tc = R1[WS(rs, 5)]; } { E Tn, Tg, T5, Tu; TD = FNMS(KP866025403, TA, Tz); TB = FMA(KP866025403, TA, Tz); T5 = FMA(KP500000000, T4, T1); Tu = Ta + Tc - Tb; { E Td, Tf, TG, Tw, Te; Td = Tb - Tc; Tf = Tc + Tb; Tp = FMA(KP866025403, T8, T5); T9 = FNMS(KP866025403, T8, T5); TG = Tv - Tu; Tw = Tu + Tv; Te = FMA(KP500000000, Td, Ta); Tq = FMA(KP866025403, Tm, Tl); Tn = FNMS(KP866025403, Tm, Tl); Ci[WS(csi, 1)] = FMA(KP707106781, TG, TF); Ci[WS(csi, 4)] = FMS(KP707106781, TG, TF); Cr[WS(csr, 4)] = FMA(KP707106781, Tw, Tt); Cr[WS(csr, 1)] = FNMS(KP707106781, Tw, Tt); Tg = FNMS(KP866025403, Tf, Te); Tr = FMA(KP866025403, Tf, Te); } TE = Tg + Tn; To = Tg - Tn; } } Ci[WS(csi, 2)] = FMS(KP707106781, TE, TD); Ci[WS(csi, 3)] = FMA(KP707106781, TE, TD); Cr[0] = FMA(KP707106781, To, T9); Cr[WS(csr, 5)] = FNMS(KP707106781, To, T9); Ts = Tq - Tr; TC = Tr + Tq; Ci[0] = -(FMA(KP707106781, TC, TB)); Ci[WS(csi, 5)] = FNMS(KP707106781, TC, TB); Cr[WS(csr, 2)] = FMA(KP707106781, Ts, Tp); Cr[WS(csr, 3)] = FNMS(KP707106781, Ts, Tp); } }
static void r2cbIII_25(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) { DK(KP979740652, +0.979740652857618686258237536568998933733477632); DK(KP1_752613360, +1.752613360087727174616231807844125166798128477); DK(KP438153340, +0.438153340021931793654057951961031291699532119); DK(KP963507348, +0.963507348203430549974383005744259307057084020); DK(KP595480289, +0.595480289600000014706716770488118292997907308); DK(KP641441904, +0.641441904830606407298806329068862424939687989); DK(KP1_606007150, +1.606007150877320829666881187140752009270929701); DK(KP1_721083328, +1.721083328735889354196523361841037632825608373); DK(KP1_011627398, +1.011627398597394192215998921771049272931807941); DK(KP1_809654104, +1.809654104932039055427337295865395187940827822); DK(KP452413526, +0.452413526233009763856834323966348796985206956); DK(KP933137358, +0.933137358350283770603023973254446451924190884); DK(KP576710603, +0.576710603632765877371579268136471017090111488); DK(KP662318342, +0.662318342759882818626911127577439236802190210); DK(KP470564281, +0.470564281212251493087595091036643380879947982); DK(KP634619297, +0.634619297544148100711287640319130485732531031); DK(KP1_842354653, +1.842354653930286640500894870830132058718564461); DK(KP1_666834356, +1.666834356657377354817925100486477686277992119); DK(KP1_082908895, +1.082908895072625554092571180165639018104066379); DK(KP1_937166322, +1.937166322257262238980336750929471627672024806); DK(KP484291580, +0.484291580564315559745084187732367906918006201); DK(KP904730450, +0.904730450839922351881287709692877908104763647); DK(KP683113946, +0.683113946453479238701949862233725244439656928); DK(KP559154169, +0.559154169276087864842202529084232643714075927); DK(KP549754652, +0.549754652192770074288023275540779861653779767); DK(KP256756360, +0.256756360367726783319498520922669048172391148); DK(KP1_386580726, +1.386580726567734802700860150804827247498955921); DK(KP1_898359647, +1.898359647016882523151110931686726543423167685); DK(KP1_115827804, +1.115827804063668528375399296931134075984874304); DK(KP1_996053456, +1.996053456856543123904673613726901106673810439); DK(KP499013364, +0.499013364214135780976168403431725276668452610); DK(KP730409924, +0.730409924561256563751459444999838399157094302); DK(KP451418159, +0.451418159099103183892477933432151804893354132); DK(KP846146756, +0.846146756728608505452954290121135880883743802); DK(KP062914667, +0.062914667253649757225485955897349402364686947); DK(KP939062505, +0.939062505817492352556001843133229685779824606); DK(KP1_902113032, +1.902113032590307144232878666758764286811397268); DK(KP951056516, +0.951056516295153572116439333379382143405698634); DK(KP559016994, +0.559016994374947424102293417182819058860154590); DK(KP250000000, +0.250000000000000000000000000000000000000000000); DK(KP1_118033988, +1.118033988749894848204586834365638117720309180); DK(KP500000000, +0.500000000000000000000000000000000000000000000); DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); DK(KP618033988, +0.618033988749894848204586834365638117720309180); { INT i; for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(100, rs), MAKE_VOLATILE_STRIDE(100, csr), MAKE_VOLATILE_STRIDE(100, csi)) { E TS, T1O, T5, TP, T1N, Tz, Ty, Te, T17, T2i, T1B, T1V, T10, T2h, T1C; E T1S, TI, TH, Tn, T1m, T2e, T1y, T1Z, T1f, T2f, T1z, T22, TQ, TR; TQ = Ci[WS(csi, 7)]; TR = Ci[WS(csi, 2)]; TS = FMA(KP618033988, TR, TQ); T1O = FNMS(KP618033988, TQ, TR); { E T1, T4, TO, T2, T3, TN; T1 = Cr[WS(csr, 12)]; T2 = Cr[WS(csr, 7)]; T3 = Cr[WS(csr, 2)]; T4 = T2 + T3; TO = T3 - T2; T5 = FMA(KP2_000000000, T4, T1); TN = FNMS(KP500000000, T4, T1); TP = FNMS(KP1_118033988, TO, TN); T1N = FMA(KP1_118033988, TO, TN); } { E T6, Td, T15, TV, T14, T12, TX, TY; T6 = Cr[WS(csr, 11)]; Tz = Ci[WS(csi, 11)]; { E T7, T8, T9, Ta, Tb, Tc; T7 = Cr[WS(csr, 6)]; T8 = Cr[WS(csr, 8)]; T9 = T7 + T8; Ta = Cr[WS(csr, 1)]; Tb = Cr[WS(csr, 3)]; Tc = Ta + Tb; Td = T9 + Tc; T15 = Tb - Ta; TV = Tc - T9; T14 = T8 - T7; } { E Ts, Tt, Tu, Tv, Tw, Tx; Ts = Ci[WS(csi, 8)]; Tt = Ci[WS(csi, 6)]; Tu = Ts - Tt; Tv = Ci[WS(csi, 3)]; Tw = Ci[WS(csi, 1)]; Tx = Tv - Tw; Ty = Tu + Tx; T12 = Tx - Tu; TX = Tt + Ts; TY = Tw + Tv; } Te = T6 + Td; { E T16, T1U, T13, T1T, T11; T16 = FMA(KP618033988, T15, T14); T1U = FNMS(KP618033988, T14, T15); T11 = FMA(KP250000000, Ty, Tz); T13 = FMA(KP559016994, T12, T11); T1T = FNMS(KP559016994, T12, T11); T17 = FMA(KP951056516, T16, T13); T2i = FMA(KP951056516, T1U, T1T); T1B = FNMS(KP951056516, T16, T13); T1V = FNMS(KP951056516, T1U, T1T); } { E TZ, T1R, TW, T1Q, TU; TZ = FMA(KP618033988, TY, TX); T1R = FNMS(KP618033988, TX, TY); TU = FMS(KP250000000, Td, T6); TW = FMA(KP559016994, TV, TU); T1Q = FNMS(KP559016994, TV, TU); T10 = FNMS(KP951056516, TZ, TW); T2h = FNMS(KP951056516, T1R, T1Q); T1C = FMA(KP951056516, TZ, TW); T1S = FMA(KP951056516, T1R, T1Q); } } { E Tf, Tm, T1k, T1a, T1j, T1h, T1c, T1d; Tf = Cr[WS(csr, 10)]; TI = Ci[WS(csi, 10)]; { E Tg, Th, Ti, Tj, Tk, Tl; Tg = Cr[WS(csr, 5)]; Th = Cr[WS(csr, 9)]; Ti = Tg + Th; Tj = Cr[0]; Tk = Cr[WS(csr, 4)]; Tl = Tj + Tk; Tm = Ti + Tl; T1k = Tk - Tj; T1a = Ti - Tl; T1j = Tg - Th; } { E TB, TC, TD, TE, TF, TG; TB = Ci[WS(csi, 9)]; TC = Ci[WS(csi, 5)]; TD = TB - TC; TE = Ci[WS(csi, 4)]; TF = Ci[0]; TG = TE - TF; TH = TD + TG; T1h = TD - TG; T1c = TC + TB; T1d = TF + TE; } Tn = Tf + Tm; { E T1l, T1Y, T1i, T1X, T1g; T1l = FNMS(KP618033988, T1k, T1j); T1Y = FMA(KP618033988, T1j, T1k); T1g = FMA(KP250000000, TH, TI); T1i = FNMS(KP559016994, T1h, T1g); T1X = FMA(KP559016994, T1h, T1g); T1m = FNMS(KP951056516, T1l, T1i); T2e = FMA(KP951056516, T1Y, T1X); T1y = FMA(KP951056516, T1l, T1i); T1Z = FNMS(KP951056516, T1Y, T1X); } { E T1e, T21, T1b, T20, T19; T1e = FMA(KP618033988, T1d, T1c); T21 = FNMS(KP618033988, T1c, T1d); T19 = FMS(KP250000000, Tm, Tf); T1b = FNMS(KP559016994, T1a, T19); T20 = FMA(KP559016994, T1a, T19); T1f = FNMS(KP951056516, T1e, T1b); T2f = FNMS(KP951056516, T21, T20); T1z = FMA(KP951056516, T1e, T1b); T22 = FMA(KP951056516, T21, T20); } } { E Tq, To, Tp, TK, TM, TA, TJ, TL, Tr; Tq = Tn - Te; To = Te + Tn; Tp = FNMS(KP500000000, To, T5); TA = Ty - Tz; TJ = TH - TI; TK = FMA(KP618033988, TJ, TA); TM = FNMS(KP618033988, TA, TJ); R0[0] = FMA(KP2_000000000, To, T5); TL = FMA(KP1_118033988, Tq, Tp); R0[WS(rs, 5)] = FMA(KP1_902113032, TM, TL); R1[WS(rs, 7)] = FMS(KP1_902113032, TM, TL); Tr = FNMS(KP1_118033988, Tq, Tp); R1[WS(rs, 2)] = FMS(KP1_902113032, TK, Tr); R0[WS(rs, 10)] = FMA(KP1_902113032, TK, Tr); } { E T2q, T2s, T2d, T2k, T2l, T2m, T2r, T2n; { E T2o, T2p, T2g, T2j; T2o = FMA(KP939062505, T2h, T2i); T2p = FMA(KP062914667, T2e, T2f); T2q = FMA(KP846146756, T2p, T2o); T2s = FNMS(KP451418159, T2o, T2p); T2d = FMA(KP1_902113032, T1O, T1N); T2g = FNMS(KP062914667, T2f, T2e); T2j = FNMS(KP939062505, T2i, T2h); T2k = FNMS(KP730409924, T2j, T2g); T2l = FNMS(KP499013364, T2k, T2d); T2m = FMA(KP730409924, T2j, T2g); } R1[WS(rs, 1)] = -(FMA(KP1_996053456, T2k, T2d)); T2r = FMA(KP1_115827804, T2m, T2l); R1[WS(rs, 6)] = FMS(KP1_898359647, T2s, T2r); R0[WS(rs, 9)] = FMA(KP1_898359647, T2s, T2r); T2n = FNMS(KP1_115827804, T2m, T2l); R0[WS(rs, 4)] = FMA(KP1_386580726, T2q, T2n); R1[WS(rs, 11)] = FMS(KP1_386580726, T2q, T2n); } { E T1u, T1w, TT, T1o, T1p, T1q, T1v, T1r; { E T1s, T1t, T18, T1n; T1s = FMA(KP256756360, T10, T17); T1t = FMA(KP549754652, T1f, T1m); T1u = FMA(KP559154169, T1t, T1s); T1w = FNMS(KP683113946, T1s, T1t); TT = FMA(KP1_902113032, TS, TP); T18 = FNMS(KP256756360, T17, T10); T1n = FNMS(KP549754652, T1m, T1f); T1o = FMA(KP904730450, T1n, T18); T1p = FMA(KP484291580, T1o, TT); T1q = FNMS(KP904730450, T1n, T18); } R1[0] = FMS(KP1_937166322, T1o, TT); T1v = FMA(KP1_082908895, T1q, T1p); R1[WS(rs, 5)] = FMS(KP1_666834356, T1w, T1v); R0[WS(rs, 8)] = FMA(KP1_666834356, T1w, T1v); T1r = FNMS(KP1_082908895, T1q, T1p); R0[WS(rs, 3)] = FMA(KP1_842354653, T1u, T1r); R1[WS(rs, 10)] = FMS(KP1_842354653, T1u, T1r); } { E T1K, T1M, T1x, T1E, T1F, T1G, T1L, T1H; { E T1I, T1J, T1A, T1D; T1I = FMA(KP634619297, T1B, T1C); T1J = FNMS(KP470564281, T1y, T1z); T1K = FMA(KP662318342, T1J, T1I); T1M = FNMS(KP576710603, T1I, T1J); T1x = FNMS(KP1_902113032, TS, TP); T1A = FMA(KP470564281, T1z, T1y); T1D = FNMS(KP634619297, T1C, T1B); T1E = FMA(KP933137358, T1D, T1A); T1F = FNMS(KP452413526, T1E, T1x); T1G = FNMS(KP933137358, T1D, T1A); } R0[WS(rs, 2)] = FMA(KP1_809654104, T1E, T1x); T1L = FMA(KP1_011627398, T1G, T1F); R0[WS(rs, 7)] = FNMS(KP1_721083328, T1M, T1L); R1[WS(rs, 9)] = -(FMA(KP1_721083328, T1M, T1L)); T1H = FNMS(KP1_011627398, T1G, T1F); R1[WS(rs, 4)] = -(FMA(KP1_606007150, T1K, T1H)); R0[WS(rs, 12)] = FNMS(KP1_606007150, T1K, T1H); } { E T2a, T2c, T1P, T24, T25, T26, T2b, T27; { E T28, T29, T1W, T23; T28 = FMA(KP634619297, T1Z, T22); T29 = FMA(KP549754652, T1S, T1V); T2a = FNMS(KP641441904, T29, T28); T2c = FMA(KP595480289, T28, T29); T1P = FNMS(KP1_902113032, T1O, T1N); T1W = FNMS(KP549754652, T1V, T1S); T23 = FNMS(KP634619297, T22, T1Z); T24 = FNMS(KP963507348, T23, T1W); T25 = FMA(KP438153340, T24, T1P); T26 = FMA(KP963507348, T23, T1W); } R0[WS(rs, 1)] = FNMS(KP1_752613360, T24, T1P); T2b = FNMS(KP979740652, T26, T25); R0[WS(rs, 11)] = FNMS(KP1_666834356, T2c, T2b); R1[WS(rs, 3)] = -(FMA(KP1_666834356, T2c, T2b)); T27 = FMA(KP979740652, T26, T25); R0[WS(rs, 6)] = FNMS(KP1_606007150, T2a, T27); R1[WS(rs, 8)] = -(FMA(KP1_606007150, T2a, T27)); } } } }
static void hf_9(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) { DK(KP777861913, +0.777861913430206160028177977318626690410586096); DK(KP852868531, +0.852868531952443209628250963940074071936020296); DK(KP839099631, +0.839099631177280011763127298123181364687434283); DK(KP492403876, +0.492403876506104029683371512294761506835321626); DK(KP984807753, +0.984807753012208059366743024589523013670643252); DK(KP954188894, +0.954188894138671133499268364187245676532219158); DK(KP363970234, +0.363970234266202361351047882776834043890471784); DK(KP176326980, +0.176326980708464973471090386868618986121633062); DK(KP866025403, +0.866025403784438646763723170752936183471402627); DK(KP500000000, +0.500000000000000000000000000000000000000000000); INT m; for (m = mb, W = W + ((mb - 1) * 16); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 16, MAKE_VOLATILE_STRIDE(rs)) { E T20, T1Z; { E T1, T1P, T1Q, T10, T1S, Te, TB, T1d, T1a, T19, T1M, TE, T1c, Tz, T1n; E TC, TH, TK, T1k, TR, TG, TJ, TD; T1 = cr[0]; T1P = ci[0]; { E T9, Tc, TY, Ta, Tb, TX, T7; { E T3, T6, T8, TW, T4, T2, T5; T3 = cr[WS(rs, 3)]; T6 = ci[WS(rs, 3)]; T2 = W[4]; T9 = cr[WS(rs, 6)]; Tc = ci[WS(rs, 6)]; T8 = W[10]; TW = T2 * T6; T4 = T2 * T3; T5 = W[5]; TY = T8 * Tc; Ta = T8 * T9; Tb = W[11]; TX = FNMS(T5, T3, TW); T7 = FMA(T5, T6, T4); } { E Th, Tk, Ti, T12, Tn, Tq, Tp, T17, Tx, T14, To, Tj, TZ, Td, Tg; E TA, Tl, Ty; Th = cr[WS(rs, 1)]; TZ = FNMS(Tb, T9, TY); Td = FMA(Tb, Tc, Ta); Tk = ci[WS(rs, 1)]; Tg = W[0]; T1Q = TX + TZ; T10 = TX - TZ; T1S = Td - T7; Te = T7 + Td; Ti = Tg * Th; T12 = Tg * Tk; { E Tt, Tw, Ts, Tv, T16, Tu, Tm; Tt = cr[WS(rs, 7)]; Tw = ci[WS(rs, 7)]; Ts = W[12]; Tv = W[13]; Tn = cr[WS(rs, 4)]; Tq = ci[WS(rs, 4)]; T16 = Ts * Tw; Tu = Ts * Tt; Tm = W[6]; Tp = W[7]; T17 = FNMS(Tv, Tt, T16); Tx = FMA(Tv, Tw, Tu); T14 = Tm * Tq; To = Tm * Tn; } Tj = W[1]; TB = cr[WS(rs, 2)]; { E T15, Tr, T13, T18; T15 = FNMS(Tp, Tn, T14); Tr = FMA(Tp, Tq, To); T13 = FNMS(Tj, Th, T12); Tl = FMA(Tj, Tk, Ti); T18 = T15 + T17; T1d = T15 - T17; Ty = Tr + Tx; T1a = Tr - Tx; T19 = FNMS(KP500000000, T18, T13); T1M = T13 + T18; TE = ci[WS(rs, 2)]; } T1c = FNMS(KP500000000, Ty, Tl); Tz = Tl + Ty; TA = W[2]; { E TN, TQ, TP, T1j, TO, TM; TN = cr[WS(rs, 8)]; TQ = ci[WS(rs, 8)]; TM = W[14]; T1n = TA * TE; TC = TA * TB; TP = W[15]; T1j = TM * TQ; TO = TM * TN; TH = cr[WS(rs, 5)]; TK = ci[WS(rs, 5)]; T1k = FNMS(TP, TN, T1j); TR = FMA(TP, TQ, TO); TG = W[8]; TJ = W[9]; } TD = W[3]; } } { E TV, Tf, T21, T1R, T1l, T1r, T1q, T1N, TT, T1g; { E T1o, TF, T1i, TL, T1h, TI, TS, T1p; TV = FNMS(KP500000000, Te, T1); Tf = T1 + Te; T1h = TG * TK; TI = TG * TH; T1o = FNMS(TD, TB, T1n); TF = FMA(TD, TE, TC); T1i = FNMS(TJ, TH, T1h); TL = FMA(TJ, TK, TI); T21 = T1Q + T1P; T1R = FNMS(KP500000000, T1Q, T1P); T1p = T1i + T1k; T1l = T1i - T1k; TS = TL + TR; T1r = TR - TL; T1q = FNMS(KP500000000, T1p, T1o); T1N = T1o + T1p; TT = TF + TS; T1g = FNMS(KP500000000, TS, TF); } { E T11, T1z, T1E, T1D, T1X, T1T, T1I, T1C, T1Y, T1y, T1u, T24, TU; T24 = TT - Tz; TU = Tz + TT; { E T22, T1O, T1L, T23; T22 = T1M + T1N; T1O = T1M - T1N; T11 = FNMS(KP866025403, T10, TV); T1z = FMA(KP866025403, T10, TV); T1L = FNMS(KP500000000, TU, Tf); cr[0] = Tf + TU; T23 = FNMS(KP500000000, T22, T21); ci[WS(rs, 8)] = T22 + T21; cr[WS(rs, 3)] = FMA(KP866025403, T1O, T1L); ci[WS(rs, 2)] = FNMS(KP866025403, T1O, T1L); ci[WS(rs, 5)] = FMA(KP866025403, T24, T23); cr[WS(rs, 6)] = FMS(KP866025403, T24, T23); } { E T1B, T1m, T1w, T1f, T1s, T1A, T1b, T1e, T1x, T1t; T1E = FNMS(KP866025403, T1a, T19); T1b = FMA(KP866025403, T1a, T19); T1e = FNMS(KP866025403, T1d, T1c); T1D = FMA(KP866025403, T1d, T1c); T1B = FMA(KP866025403, T1l, T1g); T1m = FNMS(KP866025403, T1l, T1g); T1X = FNMS(KP866025403, T1S, T1R); T1T = FMA(KP866025403, T1S, T1R); T1w = FNMS(KP176326980, T1b, T1e); T1f = FMA(KP176326980, T1e, T1b); T1s = FNMS(KP866025403, T1r, T1q); T1A = FMA(KP866025403, T1r, T1q); T1x = FMA(KP363970234, T1m, T1s); T1t = FNMS(KP363970234, T1s, T1m); T1I = FNMS(KP176326980, T1A, T1B); T1C = FMA(KP176326980, T1B, T1A); T1Y = FMA(KP954188894, T1x, T1w); T1y = FNMS(KP954188894, T1x, T1w); T20 = FMA(KP954188894, T1t, T1f); T1u = FNMS(KP954188894, T1t, T1f); } { E T1F, T1J, T1v, T1U, T1K; ci[WS(rs, 6)] = FNMS(KP984807753, T1Y, T1X); T1v = FNMS(KP492403876, T1u, T11); cr[WS(rs, 2)] = FMA(KP984807753, T1u, T11); T1F = FMA(KP839099631, T1E, T1D); T1J = FNMS(KP839099631, T1D, T1E); ci[WS(rs, 3)] = FNMS(KP852868531, T1y, T1v); ci[0] = FMA(KP852868531, T1y, T1v); T1U = FNMS(KP777861913, T1J, T1I); T1K = FMA(KP777861913, T1J, T1I); { E T1G, T1W, T1V, T1H; T1G = FMA(KP777861913, T1F, T1C); T1W = FNMS(KP777861913, T1F, T1C); T1Z = FMA(KP492403876, T1Y, T1X); T1V = FMA(KP492403876, T1U, T1T); ci[WS(rs, 7)] = FNMS(KP984807753, T1U, T1T); T1H = FNMS(KP492403876, T1G, T1z); cr[WS(rs, 1)] = FMA(KP984807753, T1G, T1z); ci[WS(rs, 4)] = FMA(KP852868531, T1W, T1V); cr[WS(rs, 7)] = FMS(KP852868531, T1W, T1V); cr[WS(rs, 4)] = FMA(KP852868531, T1K, T1H); ci[WS(rs, 1)] = FNMS(KP852868531, T1K, T1H); } } } } } cr[WS(rs, 8)] = -(FMA(KP852868531, T20, T1Z)); cr[WS(rs, 5)] = FMS(KP852868531, T20, T1Z); } }