static void r2cfII_20(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) { DK(KP572061402, +0.572061402817684297600072783580302076536153377); DK(KP218508012, +0.218508012224410535399650602527877556893735408); DK(KP309016994, +0.309016994374947424102293417182819058860154590); DK(KP809016994, +0.809016994374947424102293417182819058860154590); DK(KP559016994, +0.559016994374947424102293417182819058860154590); DK(KP951056516, +0.951056516295153572116439333379382143405698634); DK(KP587785252, +0.587785252292473129168705954639072768597652438); DK(KP250000000, +0.250000000000000000000000000000000000000000000); DK(KP176776695, +0.176776695296636881100211090526212259821208984); DK(KP395284707, +0.395284707521047416499861693054089816714944392); DK(KP672498511, +0.672498511963957326960058968885748755876783111); DK(KP415626937, +0.415626937777453428589967464113135184222253485); DK(KP707106781, +0.707106781186547524400844362104849039284835938); { INT i; for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) { E T8, TD, Tm, TN, T9, TC, TY, TE, Te, TF, Tl, TK, T12, TL, Tk; E TM, T1, T6, Tq, T1l, T1c, Tp, T1f, T1e, T1d, Ty, TW, T1g, T1m, Tx; E Tu; T8 = R1[WS(rs, 2)]; TD = KP707106781 * T8; Tm = R1[WS(rs, 7)]; TN = KP707106781 * Tm; { E Ta, TA, Td, TB, Tb, Tc; T9 = R1[WS(rs, 6)]; Ta = R1[WS(rs, 8)]; TA = T9 + Ta; Tb = R1[0]; Tc = R1[WS(rs, 4)]; Td = Tb + Tc; TB = Tb - Tc; TC = FMA(KP415626937, TA, KP672498511 * TB); TY = FNMS(KP415626937, TB, KP672498511 * TA); TE = KP395284707 * (Ta - Td); Te = Ta + Td; TF = KP176776695 * Te; } { E Tg, TJ, Tj, TI, Th, Ti; Tg = R1[WS(rs, 1)]; Tl = R1[WS(rs, 3)]; TJ = Tg + Tl; Th = R1[WS(rs, 5)]; Ti = R1[WS(rs, 9)]; Tj = Th + Ti; TI = Th - Ti; TK = FNMS(KP415626937, TJ, KP672498511 * TI); T12 = FMA(KP415626937, TI, KP672498511 * TJ); TL = KP395284707 * (Tg - Tj); Tk = Tg + Tj; TM = KP176776695 * Tk; } { E T2, T5, T3, T4, T1a, T1b; T1 = R0[0]; T2 = R0[WS(rs, 6)]; T5 = R0[WS(rs, 8)]; T3 = R0[WS(rs, 2)]; T4 = R0[WS(rs, 4)]; T1a = T4 + T2; T1b = T5 + T3; T6 = T2 + T3 - (T4 + T5); Tq = FMA(KP250000000, T6, T1); T1l = FNMS(KP951056516, T1b, KP587785252 * T1a); T1c = FMA(KP951056516, T1a, KP587785252 * T1b); Tp = KP559016994 * (T5 + T2 - (T4 + T3)); } T1f = R0[WS(rs, 5)]; { E Tv, Tw, Ts, Tt; Tv = R0[WS(rs, 9)]; Tw = R0[WS(rs, 1)]; Tx = Tv - Tw; T1e = Tv + Tw; Ts = R0[WS(rs, 3)]; Tt = R0[WS(rs, 7)]; Tu = Ts - Tt; T1d = Ts + Tt; } Ty = FMA(KP951056516, Tu, KP587785252 * Tx); TW = FNMS(KP951056516, Tx, KP587785252 * Tu); T1g = FMA(KP809016994, T1d, KP309016994 * T1e) + T1f; T1m = FNMS(KP809016994, T1e, T1f) - (KP309016994 * T1d); { E T7, T1r, To, T1q, Tf, Tn; T7 = T1 - T6; T1r = T1e + T1f - T1d; Tf = T8 + (T9 - Te); Tn = (Tk - Tl) - Tm; To = KP707106781 * (Tf + Tn); T1q = KP707106781 * (Tf - Tn); Cr[WS(csr, 2)] = T7 - To; Ci[WS(csi, 2)] = T1q - T1r; Cr[WS(csr, 7)] = T7 + To; Ci[WS(csi, 7)] = T1q + T1r; } { E T1h, T1j, TX, T15, T10, T16, T13, T17, TV, TZ, T11; T1h = T1c - T1g; T1j = T1c + T1g; TV = Tq - Tp; TX = TV - TW; T15 = TV + TW; TZ = FMA(KP218508012, T9, TD) + TF - TE; T10 = TY + TZ; T16 = TZ - TY; T11 = FNMS(KP218508012, Tl, TL) - (TM + TN); T13 = T11 - T12; T17 = T11 + T12; { E T14, T19, T18, T1i; T14 = T10 + T13; Cr[WS(csr, 5)] = TX - T14; Cr[WS(csr, 4)] = TX + T14; T19 = T17 - T16; Ci[WS(csi, 5)] = T19 - T1h; Ci[WS(csi, 4)] = T19 + T1h; T18 = T16 + T17; Cr[WS(csr, 9)] = T15 - T18; Cr[0] = T15 + T18; T1i = T13 - T10; Ci[0] = T1i - T1j; Ci[WS(csi, 9)] = T1i + T1j; } } { E T1n, T1p, Tz, TR, TH, TS, TP, TT, Tr, TG, TO; T1n = T1l + T1m; T1p = T1m - T1l; Tr = Tp + Tq; Tz = Tr + Ty; TR = Tr - Ty; TG = TD + TE + FNMS(KP572061402, T9, TF); TH = TC + TG; TS = TC - TG; TO = TL + TM + FNMS(KP572061402, Tl, TN); TP = TK - TO; TT = TK + TO; { E TQ, T1o, TU, T1k; TQ = TH + TP; Cr[WS(csr, 6)] = Tz - TQ; Cr[WS(csr, 3)] = Tz + TQ; T1o = TT - TS; Ci[WS(csi, 6)] = T1o - T1p; Ci[WS(csi, 3)] = T1o + T1p; TU = TS + TT; Cr[WS(csr, 8)] = TR - TU; Cr[WS(csr, 1)] = TR + TU; T1k = TP - TH; Ci[WS(csi, 8)] = T1k - T1n; Ci[WS(csi, 1)] = T1k + T1n; } } } } }
static void hc2cfdft_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) { DK(KP353553390, +0.353553390593273762200422181052424519642417969); DK(KP500000000, +0.500000000000000000000000000000000000000000000); { INT m; for (m = mb, W = W + ((mb - 1) * 14); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 14, MAKE_VOLATILE_STRIDE(32, rs)) { E Tv, TX, Ts, TY, TE, T1a, TJ, T19, T1l, T1m, T9, T10, Ti, T11, TP; E T16, TU, T17, T1i, T1j; { E Tt, Tu, TD, Tz, TA, TB, Tn, TI, Tr, TG, Tk, To; Tt = Ip[0]; Tu = Im[0]; TD = Tt + Tu; Tz = Rm[0]; TA = Rp[0]; TB = Tz - TA; { E Tl, Tm, Tp, Tq; Tl = Ip[WS(rs, 2)]; Tm = Im[WS(rs, 2)]; Tn = Tl - Tm; TI = Tl + Tm; Tp = Rp[WS(rs, 2)]; Tq = Rm[WS(rs, 2)]; Tr = Tp + Tq; TG = Tp - Tq; } Tv = Tt - Tu; TX = TA + Tz; Tk = W[6]; To = W[7]; Ts = FNMS(To, Tr, Tk * Tn); TY = FMA(Tk, Tr, To * Tn); { E Ty, TC, TF, TH; Ty = W[0]; TC = W[1]; TE = FNMS(TC, TD, Ty * TB); T1a = FMA(TC, TB, Ty * TD); TF = W[8]; TH = W[9]; TJ = FMA(TF, TG, TH * TI); T19 = FNMS(TH, TG, TF * TI); } T1l = TJ + TE; T1m = T1a - T19; } { E T4, TO, T8, TM, Td, TT, Th, TR; { E T2, T3, T6, T7; T2 = Ip[WS(rs, 1)]; T3 = Im[WS(rs, 1)]; T4 = T2 - T3; TO = T2 + T3; T6 = Rp[WS(rs, 1)]; T7 = Rm[WS(rs, 1)]; T8 = T6 + T7; TM = T6 - T7; } { E Tb, Tc, Tf, Tg; Tb = Ip[WS(rs, 3)]; Tc = Im[WS(rs, 3)]; Td = Tb - Tc; TT = Tb + Tc; Tf = Rp[WS(rs, 3)]; Tg = Rm[WS(rs, 3)]; Th = Tf + Tg; TR = Tf - Tg; } { E T1, T5, Ta, Te; T1 = W[2]; T5 = W[3]; T9 = FNMS(T5, T8, T1 * T4); T10 = FMA(T1, T8, T5 * T4); Ta = W[10]; Te = W[11]; Ti = FNMS(Te, Th, Ta * Td); T11 = FMA(Ta, Th, Te * Td); { E TL, TN, TQ, TS; TL = W[4]; TN = W[5]; TP = FMA(TL, TM, TN * TO); T16 = FNMS(TN, TM, TL * TO); TQ = W[12]; TS = W[13]; TU = FMA(TQ, TR, TS * TT); T17 = FNMS(TS, TR, TQ * TT); } T1i = T17 - T16; T1j = TP - TU; } } { E T1h, T1t, T1w, T1y, T1o, T1s, T1r, T1x; { E T1f, T1g, T1u, T1v; T1f = Tv - Ts; T1g = T10 - T11; T1h = KP500000000 * (T1f - T1g); T1t = KP500000000 * (T1g + T1f); T1u = T1i - T1j; T1v = T1l + T1m; T1w = KP353553390 * (T1u - T1v); T1y = KP353553390 * (T1u + T1v); } { E T1k, T1n, T1p, T1q; T1k = T1i + T1j; T1n = T1l - T1m; T1o = KP353553390 * (T1k + T1n); T1s = KP353553390 * (T1n - T1k); T1p = TX - TY; T1q = T9 - Ti; T1r = KP500000000 * (T1p - T1q); T1x = KP500000000 * (T1p + T1q); } Ip[WS(rs, 1)] = T1h + T1o; Rp[WS(rs, 1)] = T1x + T1y; Im[WS(rs, 2)] = T1o - T1h; Rm[WS(rs, 2)] = T1x - T1y; Rm[0] = T1r - T1s; Im[0] = T1w - T1t; Rp[WS(rs, 3)] = T1r + T1s; Ip[WS(rs, 3)] = T1t + T1w; } { E Tx, T15, T1c, T1e, TW, T14, T13, T1d; { E Tj, Tw, T18, T1b; Tj = T9 + Ti; Tw = Ts + Tv; Tx = Tj + Tw; T15 = Tw - Tj; T18 = T16 + T17; T1b = T19 + T1a; T1c = T18 - T1b; T1e = T18 + T1b; } { E TK, TV, TZ, T12; TK = TE - TJ; TV = TP + TU; TW = TK - TV; T14 = TV + TK; TZ = TX + TY; T12 = T10 + T11; T13 = TZ - T12; T1d = TZ + T12; } Ip[0] = KP500000000 * (Tx + TW); Rp[0] = KP500000000 * (T1d + T1e); Im[WS(rs, 3)] = KP500000000 * (TW - Tx); Rm[WS(rs, 3)] = KP500000000 * (T1d - T1e); Rm[WS(rs, 1)] = KP500000000 * (T13 - T14); Im[WS(rs, 1)] = KP500000000 * (T1c - T15); Rp[WS(rs, 2)] = KP500000000 * (T13 + T14); Ip[WS(rs, 2)] = KP500000000 * (T15 + T1c); } } } }
static void hc2r_9(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs) { DK(KP984807753, +0.984807753012208059366743024589523013670643252); DK(KP173648177, +0.173648177666930348851716626769314796000375677); DK(KP300767466, +0.300767466360870593278543795225003852144476517); DK(KP1_705737063, +1.705737063904886419256501927880148143872040591); DK(KP642787609, +0.642787609686539326322643409907263432907559884); DK(KP766044443, +0.766044443118978035202392650555416673935832457); DK(KP1_326827896, +1.326827896337876792410842639271782594433726619); DK(KP1_113340798, +1.113340798452838732905825904094046265936583811); DK(KP500000000, +0.500000000000000000000000000000000000000000000); DK(KP866025403, +0.866025403784438646763723170752936183471402627); DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); DK(KP1_732050807, +1.732050807568877293527446341505872366942805254); int i; for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) { E T3, Tq, Tc, Tk, Tj, T8, Tm, Ts, Th, Tr, Tw, Tx; { E Tb, T1, T2, T9, Ta; Ta = ii[WS(iis, 3)]; Tb = KP1_732050807 * Ta; T1 = ri[0]; T2 = ri[WS(ris, 3)]; T9 = T1 - T2; T3 = FMA(KP2_000000000, T2, T1); Tq = T9 + Tb; Tc = T9 - Tb; } { E T4, T7, Ti, Tg, Tl, Td; T4 = ri[WS(ris, 1)]; Tk = ii[WS(iis, 1)]; { E T5, T6, Te, Tf; T5 = ri[WS(ris, 4)]; T6 = ri[WS(ris, 2)]; T7 = T5 + T6; Ti = KP866025403 * (T5 - T6); Te = ii[WS(iis, 4)]; Tf = ii[WS(iis, 2)]; Tg = KP866025403 * (Te + Tf); Tj = Tf - Te; } T8 = T4 + T7; Tl = FMA(KP500000000, Tj, Tk); Tm = Ti + Tl; Ts = Tl - Ti; Td = FNMS(KP500000000, T7, T4); Th = Td - Tg; Tr = Td + Tg; } O[0] = FMA(KP2_000000000, T8, T3); Tw = T3 - T8; Tx = KP1_732050807 * (Tk - Tj); O[WS(os, 3)] = Tw - Tx; O[WS(os, 6)] = Tw + Tx; { E Tp, Tn, To, Tv, Tt, Tu; Tp = FMA(KP1_113340798, Th, KP1_326827896 * Tm); Tn = FNMS(KP642787609, Tm, KP766044443 * Th); To = Tc - Tn; O[WS(os, 1)] = FMA(KP2_000000000, Tn, Tc); O[WS(os, 7)] = To + Tp; O[WS(os, 4)] = To - Tp; Tv = FMA(KP1_705737063, Tr, KP300767466 * Ts); Tt = FNMS(KP984807753, Ts, KP173648177 * Tr); Tu = Tq - Tt; O[WS(os, 2)] = FMA(KP2_000000000, Tt, Tq); O[WS(os, 8)] = Tu + Tv; O[WS(os, 5)] = Tu - Tv; } } }
static void hb2_16(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) { DK(KP382683432, +0.382683432365089771728459984030398866761344562); DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP707106781, +0.707106781186547524400844362104849039284835938); { INT m; for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(32, rs)) { E Tv, Ty, T1l, T1n, T1p, T1t, T27, T25, Tz, Tw, TB, T21, T1P, T1H, T1X; E T17, T1L, T1N, T1v, T1w, T1x, T1B, T2F, T2T, T2b, T2R, T3j, T3x, T35, T3t; { E TA, T1J, T15, T1G, Tx, T1K, T16, T1F; { E T1m, T1s, T1o, T1r; Tv = W[0]; Ty = W[1]; T1l = W[2]; T1n = W[3]; T1m = Tv * T1l; T1s = Ty * T1l; T1o = Ty * T1n; T1r = Tv * T1n; T1p = T1m + T1o; T1t = T1r - T1s; T27 = T1r + T1s; T25 = T1m - T1o; Tz = W[5]; TA = Ty * Tz; T1J = T1l * Tz; T15 = Tv * Tz; T1G = T1n * Tz; Tw = W[4]; Tx = Tv * Tw; T1K = T1n * Tw; T16 = Ty * Tw; T1F = T1l * Tw; } TB = Tx - TA; T21 = T1J + T1K; T1P = T15 - T16; T1H = T1F + T1G; T1X = T1F - T1G; T17 = T15 + T16; T1L = T1J - T1K; T1N = Tx + TA; T1v = W[6]; T1w = W[7]; T1x = FMA(Tv, T1v, Ty * T1w); T1B = FNMS(Ty, T1v, Tv * T1w); { E T2D, T2E, T29, T2a; T2D = T25 * Tz; T2E = T27 * Tw; T2F = T2D + T2E; T2T = T2D - T2E; T29 = T25 * Tw; T2a = T27 * Tz; T2b = T29 - T2a; T2R = T29 + T2a; } { E T3h, T3i, T33, T34; T3h = T1p * Tz; T3i = T1t * Tw; T3j = T3h + T3i; T3x = T3h - T3i; T33 = T1p * Tw; T34 = T1t * Tz; T35 = T33 - T34; T3t = T33 + T34; } } { E T7, T36, T3k, TC, T1f, T2e, T2I, T1Q, Te, TJ, T1R, T18, T2L, T37, T2l; E T3l, Tm, T1T, TT, T1h, T2A, T2N, T3b, T3n, Tt, T1U, T12, T1i, T2t, T2O; E T3e, T3o; { E T3, T2c, T1e, T2d, T6, T2G, T1b, T2H; { E T1, T2, T1c, T1d; T1 = cr[0]; T2 = ci[WS(rs, 7)]; T3 = T1 + T2; T2c = T1 - T2; T1c = ci[WS(rs, 11)]; T1d = cr[WS(rs, 12)]; T1e = T1c - T1d; T2d = T1c + T1d; } { E T4, T5, T19, T1a; T4 = cr[WS(rs, 4)]; T5 = ci[WS(rs, 3)]; T6 = T4 + T5; T2G = T4 - T5; T19 = ci[WS(rs, 15)]; T1a = cr[WS(rs, 8)]; T1b = T19 - T1a; T2H = T19 + T1a; } T7 = T3 + T6; T36 = T2c + T2d; T3k = T2H - T2G; TC = T3 - T6; T1f = T1b - T1e; T2e = T2c - T2d; T2I = T2G + T2H; T1Q = T1b + T1e; } { E Ta, T2f, TI, T2g, Td, T2i, TF, T2j; { E T8, T9, TG, TH; T8 = cr[WS(rs, 2)]; T9 = ci[WS(rs, 5)]; Ta = T8 + T9; T2f = T8 - T9; TG = ci[WS(rs, 13)]; TH = cr[WS(rs, 10)]; TI = TG - TH; T2g = TG + TH; } { E Tb, Tc, TD, TE; Tb = ci[WS(rs, 1)]; Tc = cr[WS(rs, 6)]; Td = Tb + Tc; T2i = Tb - Tc; TD = ci[WS(rs, 9)]; TE = cr[WS(rs, 14)]; TF = TD - TE; T2j = TD + TE; } Te = Ta + Td; TJ = TF - TI; T1R = TI + TF; T18 = Ta - Td; { E T2J, T2K, T2h, T2k; T2J = T2f + T2g; T2K = T2i + T2j; T2L = KP707106781 * (T2J - T2K); T37 = KP707106781 * (T2J + T2K); T2h = T2f - T2g; T2k = T2i - T2j; T2l = KP707106781 * (T2h + T2k); T3l = KP707106781 * (T2h - T2k); } } { E Ti, T2x, TR, T2y, Tl, T2u, TO, T2v, TL, TS; { E Tg, Th, TP, TQ; Tg = cr[WS(rs, 1)]; Th = ci[WS(rs, 6)]; Ti = Tg + Th; T2x = Tg - Th; TP = ci[WS(rs, 10)]; TQ = cr[WS(rs, 13)]; TR = TP - TQ; T2y = TP + TQ; } { E Tj, Tk, TM, TN; Tj = cr[WS(rs, 5)]; Tk = ci[WS(rs, 2)]; Tl = Tj + Tk; T2u = Tj - Tk; TM = ci[WS(rs, 14)]; TN = cr[WS(rs, 9)]; TO = TM - TN; T2v = TM + TN; } Tm = Ti + Tl; T1T = TO + TR; TL = Ti - Tl; TS = TO - TR; TT = TL - TS; T1h = TL + TS; { E T2w, T2z, T39, T3a; T2w = T2u + T2v; T2z = T2x - T2y; T2A = FMA(KP923879532, T2w, KP382683432 * T2z); T2N = FNMS(KP382683432, T2w, KP923879532 * T2z); T39 = T2x + T2y; T3a = T2v - T2u; T3b = FNMS(KP923879532, T3a, KP382683432 * T39); T3n = FMA(KP382683432, T3a, KP923879532 * T39); } } { E Tp, T2q, T10, T2r, Ts, T2n, TX, T2o, TU, T11; { E Tn, To, TY, TZ; Tn = ci[0]; To = cr[WS(rs, 7)]; Tp = Tn + To; T2q = Tn - To; TY = ci[WS(rs, 12)]; TZ = cr[WS(rs, 11)]; T10 = TY - TZ; T2r = TY + TZ; } { E Tq, Tr, TV, TW; Tq = cr[WS(rs, 3)]; Tr = ci[WS(rs, 4)]; Ts = Tq + Tr; T2n = Tq - Tr; TV = ci[WS(rs, 8)]; TW = cr[WS(rs, 15)]; TX = TV - TW; T2o = TV + TW; } Tt = Tp + Ts; T1U = TX + T10; TU = Tp - Ts; T11 = TX - T10; T12 = TU + T11; T1i = T11 - TU; { E T2p, T2s, T3c, T3d; T2p = T2n - T2o; T2s = T2q - T2r; T2t = FNMS(KP382683432, T2s, KP923879532 * T2p); T2O = FMA(KP382683432, T2p, KP923879532 * T2s); T3c = T2q + T2r; T3d = T2n + T2o; T3e = FNMS(KP923879532, T3d, KP382683432 * T3c); T3o = FMA(KP382683432, T3d, KP923879532 * T3c); } } { E Tf, Tu, T1O, T1S, T1V, T1W; Tf = T7 + Te; Tu = Tm + Tt; T1O = Tf - Tu; T1S = T1Q + T1R; T1V = T1T + T1U; T1W = T1S - T1V; cr[0] = Tf + Tu; ci[0] = T1S + T1V; cr[WS(rs, 8)] = FNMS(T1P, T1W, T1N * T1O); ci[WS(rs, 8)] = FMA(T1P, T1O, T1N * T1W); } { E T3g, T3r, T3q, T3s; { E T38, T3f, T3m, T3p; T38 = T36 - T37; T3f = T3b + T3e; T3g = T38 - T3f; T3r = T38 + T3f; T3m = T3k + T3l; T3p = T3n - T3o; T3q = T3m - T3p; T3s = T3m + T3p; } cr[WS(rs, 11)] = FNMS(T3j, T3q, T35 * T3g); ci[WS(rs, 11)] = FMA(T3j, T3g, T35 * T3q); cr[WS(rs, 3)] = FNMS(T1n, T3s, T1l * T3r); ci[WS(rs, 3)] = FMA(T1n, T3r, T1l * T3s); } { E T3w, T3B, T3A, T3C; { E T3u, T3v, T3y, T3z; T3u = T36 + T37; T3v = T3n + T3o; T3w = T3u - T3v; T3B = T3u + T3v; T3y = T3k - T3l; T3z = T3b - T3e; T3A = T3y + T3z; T3C = T3y - T3z; } cr[WS(rs, 7)] = FNMS(T3x, T3A, T3t * T3w); ci[WS(rs, 7)] = FMA(T3t, T3A, T3x * T3w); cr[WS(rs, 15)] = FNMS(T1w, T3C, T1v * T3B); ci[WS(rs, 15)] = FMA(T1v, T3C, T1w * T3B); } { E T14, T1q, T1k, T1u; { E TK, T13, T1g, T1j; TK = TC + TJ; T13 = KP707106781 * (TT + T12); T14 = TK - T13; T1q = TK + T13; T1g = T18 + T1f; T1j = KP707106781 * (T1h + T1i); T1k = T1g - T1j; T1u = T1g + T1j; } cr[WS(rs, 10)] = FNMS(T17, T1k, TB * T14); ci[WS(rs, 10)] = FMA(T17, T14, TB * T1k); cr[WS(rs, 2)] = FNMS(T1t, T1u, T1p * T1q); ci[WS(rs, 2)] = FMA(T1t, T1q, T1p * T1u); } { E T1A, T1I, T1E, T1M; { E T1y, T1z, T1C, T1D; T1y = TC - TJ; T1z = KP707106781 * (T1i - T1h); T1A = T1y - T1z; T1I = T1y + T1z; T1C = T1f - T18; T1D = KP707106781 * (TT - T12); T1E = T1C - T1D; T1M = T1C + T1D; } cr[WS(rs, 14)] = FNMS(T1B, T1E, T1x * T1A); ci[WS(rs, 14)] = FMA(T1x, T1E, T1B * T1A); cr[WS(rs, 6)] = FNMS(T1L, T1M, T1H * T1I); ci[WS(rs, 6)] = FMA(T1H, T1M, T1L * T1I); } { E T2C, T2S, T2Q, T2U; { E T2m, T2B, T2M, T2P; T2m = T2e - T2l; T2B = T2t - T2A; T2C = T2m - T2B; T2S = T2m + T2B; T2M = T2I - T2L; T2P = T2N - T2O; T2Q = T2M - T2P; T2U = T2M + T2P; } cr[WS(rs, 13)] = FNMS(T2F, T2Q, T2b * T2C); ci[WS(rs, 13)] = FMA(T2F, T2C, T2b * T2Q); cr[WS(rs, 5)] = FNMS(T2T, T2U, T2R * T2S); ci[WS(rs, 5)] = FMA(T2T, T2S, T2R * T2U); } { E T2X, T31, T30, T32; { E T2V, T2W, T2Y, T2Z; T2V = T2e + T2l; T2W = T2N + T2O; T2X = T2V - T2W; T31 = T2V + T2W; T2Y = T2I + T2L; T2Z = T2A + T2t; T30 = T2Y - T2Z; T32 = T2Y + T2Z; } cr[WS(rs, 9)] = FNMS(Tz, T30, Tw * T2X); ci[WS(rs, 9)] = FMA(Tw, T30, Tz * T2X); cr[WS(rs, 1)] = FNMS(Ty, T32, Tv * T31); ci[WS(rs, 1)] = FMA(Tv, T32, Ty * T31); } { E T20, T26, T24, T28; { E T1Y, T1Z, T22, T23; T1Y = T7 - Te; T1Z = T1U - T1T; T20 = T1Y - T1Z; T26 = T1Y + T1Z; T22 = T1Q - T1R; T23 = Tm - Tt; T24 = T22 - T23; T28 = T23 + T22; } cr[WS(rs, 12)] = FNMS(T21, T24, T1X * T20); ci[WS(rs, 12)] = FMA(T1X, T24, T21 * T20); cr[WS(rs, 4)] = FNMS(T27, T28, T25 * T26); ci[WS(rs, 4)] = FMA(T25, T28, T27 * T26); } } } } }
static void q1_3(float *rio, float *iio, const float *W, stride rs, stride vs, INT mb, INT me, INT ms) { DK(KP866025403, +0.866025403784438646763723170752936183471402627); DK(KP500000000, +0.500000000000000000000000000000000000000000000); INT m; for (m = mb, W = W + (mb * 4); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 4, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(vs)) { E T1, T4, T6, Tc, Td, Te, T9, Tf, Tl, To, Tq, Tw, Tx, Ty, Tt; E Tz, TR, TS, TN, TT, TF, TI, TK, TQ; { E T2, T3, Tr, Ts; T1 = rio[0]; T2 = rio[WS(rs, 1)]; T3 = rio[WS(rs, 2)]; T4 = T2 + T3; T6 = FNMS(KP500000000, T4, T1); Tc = KP866025403 * (T3 - T2); { E T7, T8, Tm, Tn; Td = iio[0]; T7 = iio[WS(rs, 1)]; T8 = iio[WS(rs, 2)]; Te = T7 + T8; T9 = KP866025403 * (T7 - T8); Tf = FNMS(KP500000000, Te, Td); Tl = rio[WS(vs, 1)]; Tm = rio[WS(vs, 1) + WS(rs, 1)]; Tn = rio[WS(vs, 1) + WS(rs, 2)]; To = Tm + Tn; Tq = FNMS(KP500000000, To, Tl); Tw = KP866025403 * (Tn - Tm); } Tx = iio[WS(vs, 1)]; Tr = iio[WS(vs, 1) + WS(rs, 1)]; Ts = iio[WS(vs, 1) + WS(rs, 2)]; Ty = Tr + Ts; Tt = KP866025403 * (Tr - Ts); Tz = FNMS(KP500000000, Ty, Tx); { E TL, TM, TG, TH; TR = iio[WS(vs, 2)]; TL = iio[WS(vs, 2) + WS(rs, 1)]; TM = iio[WS(vs, 2) + WS(rs, 2)]; TS = TL + TM; TN = KP866025403 * (TL - TM); TT = FNMS(KP500000000, TS, TR); TF = rio[WS(vs, 2)]; TG = rio[WS(vs, 2) + WS(rs, 1)]; TH = rio[WS(vs, 2) + WS(rs, 2)]; TI = TG + TH; TK = FNMS(KP500000000, TI, TF); TQ = KP866025403 * (TH - TG); } } rio[0] = T1 + T4; iio[0] = Td + Te; rio[WS(rs, 1)] = Tl + To; iio[WS(rs, 1)] = Tx + Ty; iio[WS(rs, 2)] = TR + TS; rio[WS(rs, 2)] = TF + TI; { E Ta, Tg, T5, Tb; Ta = T6 + T9; Tg = Tc + Tf; T5 = W[0]; Tb = W[1]; rio[WS(vs, 1)] = FMA(T5, Ta, Tb * Tg); iio[WS(vs, 1)] = FNMS(Tb, Ta, T5 * Tg); } { E TW, TY, TV, TX; TW = TK - TN; TY = TT - TQ; TV = W[2]; TX = W[3]; rio[WS(vs, 2) + WS(rs, 2)] = FMA(TV, TW, TX * TY); iio[WS(vs, 2) + WS(rs, 2)] = FNMS(TX, TW, TV * TY); } { E TC, TE, TB, TD; TC = Tq - Tt; TE = Tz - Tw; TB = W[2]; TD = W[3]; rio[WS(vs, 2) + WS(rs, 1)] = FMA(TB, TC, TD * TE); iio[WS(vs, 2) + WS(rs, 1)] = FNMS(TD, TC, TB * TE); } { E Tu, TA, Tp, Tv; Tu = Tq + Tt; TA = Tw + Tz; Tp = W[0]; Tv = W[1]; rio[WS(vs, 1) + WS(rs, 1)] = FMA(Tp, Tu, Tv * TA); iio[WS(vs, 1) + WS(rs, 1)] = FNMS(Tv, Tu, Tp * TA); } { E TO, TU, TJ, TP; TO = TK + TN; TU = TQ + TT; TJ = W[0]; TP = W[1]; rio[WS(vs, 1) + WS(rs, 2)] = FMA(TJ, TO, TP * TU); iio[WS(vs, 1) + WS(rs, 2)] = FNMS(TP, TO, TJ * TU); } { E Ti, Tk, Th, Tj; Ti = T6 - T9; Tk = Tf - Tc; Th = W[2]; Tj = W[3]; rio[WS(vs, 2)] = FMA(Th, Ti, Tj * Tk); iio[WS(vs, 2)] = FNMS(Tj, Ti, Th * Tk); } } }
static void hc2r_15(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, INT v, INT ivs, INT ovs) { DK(KP559016994, +0.559016994374947424102293417182819058860154590); DK(KP1_902113032, +1.902113032590307144232878666758764286811397268); DK(KP250000000, +0.250000000000000000000000000000000000000000000); DK(KP866025403, +0.866025403784438646763723170752936183471402627); DK(KP1_118033988, +1.118033988749894848204586834365638117720309180); DK(KP618033988, +0.618033988749894848204586834365638117720309180); DK(KP500000000, +0.500000000000000000000000000000000000000000000); DK(KP1_732050807, +1.732050807568877293527446341505872366942805254); DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); INT i; for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs, MAKE_VOLATILE_STRIDE(ris), MAKE_VOLATILE_STRIDE(iis), MAKE_VOLATILE_STRIDE(os)) { E TL, Tz, TM, TK; { E T3, Th, Tt, TD, TI, TH, TY, TC, TZ, Tu, Tm, Tv, Tr, Te, TW; E Tg, T1, T2, T12, T10, TV; Tg = ii[WS(iis, 5)]; T1 = ri[0]; T2 = ri[WS(ris, 5)]; { E T4, TA, T9, TF, T7, Tj, Tc, Tk, TG, Tq, Tf, Tl, TB; T4 = ri[WS(ris, 3)]; TA = ii[WS(iis, 3)]; T9 = ri[WS(ris, 6)]; Tf = T1 - T2; T3 = FMA(KP2_000000000, T2, T1); TF = ii[WS(iis, 6)]; { E Ta, Tb, T5, T6, To, Tp; T5 = ri[WS(ris, 7)]; T6 = ri[WS(ris, 2)]; Th = FMA(KP1_732050807, Tg, Tf); Tt = FNMS(KP1_732050807, Tg, Tf); Ta = ri[WS(ris, 4)]; TD = T5 - T6; T7 = T5 + T6; Tb = ri[WS(ris, 1)]; To = ii[WS(iis, 4)]; Tp = ii[WS(iis, 1)]; Tj = ii[WS(iis, 7)]; Tc = Ta + Tb; TI = Ta - Tb; Tk = ii[WS(iis, 2)]; TG = Tp - To; Tq = To + Tp; } Tl = Tj - Tk; TB = Tj + Tk; TH = FNMS(KP500000000, TG, TF); TY = TG + TF; TC = FMA(KP500000000, TB, TA); TZ = TA - TB; { E Ti, T8, Td, Tn; Ti = FNMS(KP2_000000000, T4, T7); T8 = T4 + T7; Td = T9 + Tc; Tn = FNMS(KP2_000000000, T9, Tc); Tu = FNMS(KP1_732050807, Tl, Ti); Tm = FMA(KP1_732050807, Tl, Ti); Tv = FNMS(KP1_732050807, Tq, Tn); Tr = FMA(KP1_732050807, Tq, Tn); Te = T8 + Td; TW = T8 - Td; } } T12 = FMA(KP618033988, TY, TZ); T10 = FNMS(KP618033988, TZ, TY); TV = FNMS(KP500000000, Te, T3); O[0] = FMA(KP2_000000000, Te, T3); { E TJ, TE, TT, TP, TU, TS, Ty, Tw, Tx; { E TO, Ts, TQ, TN, TR, T11, TX; TO = Tr - Tm; Ts = Tm + Tr; T11 = FMA(KP1_118033988, TW, TV); TX = FNMS(KP1_118033988, TW, TV); TQ = FNMS(KP866025403, TI, TH); TJ = FMA(KP866025403, TI, TH); TN = FMA(KP250000000, Ts, Th); O[WS(os, 6)] = FNMS(KP1_902113032, T12, T11); O[WS(os, 9)] = FMA(KP1_902113032, T12, T11); O[WS(os, 12)] = FMA(KP1_902113032, T10, TX); O[WS(os, 3)] = FNMS(KP1_902113032, T10, TX); TR = FNMS(KP866025403, TD, TC); TE = FMA(KP866025403, TD, TC); O[WS(os, 5)] = Th - Ts; TT = FMA(KP559016994, TO, TN); TP = FNMS(KP559016994, TO, TN); TU = FMA(KP618033988, TQ, TR); TS = FNMS(KP618033988, TR, TQ); } Ty = Tv - Tu; Tw = Tu + Tv; O[WS(os, 14)] = FMA(KP1_902113032, TU, TT); O[WS(os, 11)] = FNMS(KP1_902113032, TU, TT); O[WS(os, 2)] = FMA(KP1_902113032, TS, TP); O[WS(os, 8)] = FNMS(KP1_902113032, TS, TP); Tx = FMA(KP250000000, Tw, Tt); O[WS(os, 10)] = Tt - Tw; TL = FNMS(KP559016994, Ty, Tx); Tz = FMA(KP559016994, Ty, Tx); TM = FNMS(KP618033988, TE, TJ); TK = FMA(KP618033988, TJ, TE); } } O[WS(os, 7)] = FMA(KP1_902113032, TM, TL); O[WS(os, 13)] = FNMS(KP1_902113032, TM, TL); O[WS(os, 4)] = FMA(KP1_902113032, TK, Tz); O[WS(os, 1)] = FNMS(KP1_902113032, TK, Tz); } }
static void r2cbIII_32(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) { DK(KP1_913880671, +1.913880671464417729871595773960539938965698411); DK(KP580569354, +0.580569354508924735272384751634790549382952557); DK(KP942793473, +0.942793473651995297112775251810508755314920638); DK(KP1_763842528, +1.763842528696710059425513727320776699016885241); DK(KP1_546020906, +1.546020906725473921621813219516939601942082586); DK(KP1_268786568, +1.268786568327290996430343226450986741351374190); DK(KP196034280, +0.196034280659121203988391127777283691722273346); DK(KP1_990369453, +1.990369453344393772489673906218959843150949737); DK(KP765366864, +0.765366864730179543456919968060797733522689125); DK(KP1_847759065, +1.847759065022573512256366378793576573644833252); DK(KP1_961570560, +1.961570560806460898252364472268478073947867462); DK(KP390180644, +0.390180644032256535696569736954044481855383236); DK(KP1_111140466, +1.111140466039204449485661627897065748749874382); DK(KP1_662939224, +1.662939224605090474157576755235811513477121624); DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); DK(KP382683432, +0.382683432365089771728459984030398866761344562); DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP707106781, +0.707106781186547524400844362104849039284835938); { INT i; for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(128, rs), MAKE_VOLATILE_STRIDE(128, csr), MAKE_VOLATILE_STRIDE(128, csi)) { E T7, T2i, T2F, Tz, T1k, T1I, T1Z, T1x, Te, T22, T2E, T2j, T1f, T1y, TK; E T1J, Tm, T2B, TW, T1a, T1C, T1L, T28, T2l, Tt, T2A, T17, T1b, T1F, T1M; E T2d, T2m; { E T3, Tv, T1j, T2h, T6, T1g, Ty, T2g; { E T1, T2, T1h, T1i; T1 = Cr[0]; T2 = Cr[WS(csr, 15)]; T3 = T1 + T2; Tv = T1 - T2; T1h = Ci[0]; T1i = Ci[WS(csi, 15)]; T1j = T1h + T1i; T2h = T1i - T1h; } { E T4, T5, Tw, Tx; T4 = Cr[WS(csr, 8)]; T5 = Cr[WS(csr, 7)]; T6 = T4 + T5; T1g = T4 - T5; Tw = Ci[WS(csi, 8)]; Tx = Ci[WS(csi, 7)]; Ty = Tw + Tx; T2g = Tw - Tx; } T7 = T3 + T6; T2i = T2g + T2h; T2F = T2h - T2g; Tz = Tv - Ty; T1k = T1g + T1j; T1I = T1g - T1j; T1Z = T3 - T6; T1x = Tv + Ty; } { E Ta, TA, TD, T21, Td, TF, TI, T20; { E T8, T9, TB, TC; T8 = Cr[WS(csr, 4)]; T9 = Cr[WS(csr, 11)]; Ta = T8 + T9; TA = T8 - T9; TB = Ci[WS(csi, 4)]; TC = Ci[WS(csi, 11)]; TD = TB + TC; T21 = TB - TC; } { E Tb, Tc, TG, TH; Tb = Cr[WS(csr, 3)]; Tc = Cr[WS(csr, 12)]; Td = Tb + Tc; TF = Tb - Tc; TG = Ci[WS(csi, 3)]; TH = Ci[WS(csi, 12)]; TI = TG + TH; T20 = TH - TG; } Te = Ta + Td; T22 = T20 - T21; T2E = T21 + T20; T2j = Ta - Td; { E T1d, T1e, TE, TJ; T1d = TA + TD; T1e = TF + TI; T1f = KP707106781 * (T1d - T1e); T1y = KP707106781 * (T1d + T1e); TE = TA - TD; TJ = TF - TI; TK = KP707106781 * (TE + TJ); T1J = KP707106781 * (TE - TJ); } } { E Ti, TM, TU, T25, Tl, TR, TP, T26, TQ, TV; { E Tg, Th, TS, TT; Tg = Cr[WS(csr, 2)]; Th = Cr[WS(csr, 13)]; Ti = Tg + Th; TM = Tg - Th; TS = Ci[WS(csi, 2)]; TT = Ci[WS(csi, 13)]; TU = TS + TT; T25 = TS - TT; } { E Tj, Tk, TN, TO; Tj = Cr[WS(csr, 10)]; Tk = Cr[WS(csr, 5)]; Tl = Tj + Tk; TR = Tj - Tk; TN = Ci[WS(csi, 10)]; TO = Ci[WS(csi, 5)]; TP = TN + TO; T26 = TN - TO; } Tm = Ti + Tl; T2B = T26 + T25; TQ = TM - TP; TV = TR + TU; TW = FNMS(KP382683432, TV, KP923879532 * TQ); T1a = FMA(KP382683432, TQ, KP923879532 * TV); { E T1A, T1B, T24, T27; T1A = TM + TP; T1B = TU - TR; T1C = FNMS(KP923879532, T1B, KP382683432 * T1A); T1L = FMA(KP923879532, T1A, KP382683432 * T1B); T24 = Ti - Tl; T27 = T25 - T26; T28 = T24 - T27; T2l = T24 + T27; } } { E Tp, TX, T15, T2a, Ts, T12, T10, T2b, T11, T16; { E Tn, To, T13, T14; Tn = Cr[WS(csr, 1)]; To = Cr[WS(csr, 14)]; Tp = Tn + To; TX = Tn - To; T13 = Ci[WS(csi, 1)]; T14 = Ci[WS(csi, 14)]; T15 = T13 + T14; T2a = T14 - T13; } { E Tq, Tr, TY, TZ; Tq = Cr[WS(csr, 6)]; Tr = Cr[WS(csr, 9)]; Ts = Tq + Tr; T12 = Tq - Tr; TY = Ci[WS(csi, 6)]; TZ = Ci[WS(csi, 9)]; T10 = TY + TZ; T2b = TY - TZ; } Tt = Tp + Ts; T2A = T2b + T2a; T11 = TX - T10; T16 = T12 - T15; T17 = FMA(KP923879532, T11, KP382683432 * T16); T1b = FNMS(KP382683432, T11, KP923879532 * T16); { E T1D, T1E, T29, T2c; T1D = TX + T10; T1E = T12 + T15; T1F = FNMS(KP923879532, T1E, KP382683432 * T1D); T1M = FMA(KP923879532, T1D, KP382683432 * T1E); T29 = Tp - Ts; T2c = T2a - T2b; T2d = T29 + T2c; T2m = T2c - T29; } } { E Tf, Tu, T2L, T2M, T2N, T2O; Tf = T7 + Te; Tu = Tm + Tt; T2L = Tf - Tu; T2M = T2B + T2A; T2N = T2F - T2E; T2O = T2M + T2N; R0[0] = KP2_000000000 * (Tf + Tu); R0[WS(rs, 8)] = KP2_000000000 * (T2N - T2M); R0[WS(rs, 4)] = KP1_414213562 * (T2L + T2O); R0[WS(rs, 12)] = KP1_414213562 * (T2O - T2L); } { E T2t, T2x, T2w, T2y; { E T2r, T2s, T2u, T2v; T2r = T1Z - T22; T2s = KP707106781 * (T2m - T2l); T2t = T2r + T2s; T2x = T2r - T2s; T2u = T2j + T2i; T2v = KP707106781 * (T28 - T2d); T2w = T2u - T2v; T2y = T2v + T2u; } R0[WS(rs, 3)] = FMA(KP1_662939224, T2t, KP1_111140466 * T2w); R0[WS(rs, 15)] = FNMS(KP1_961570560, T2x, KP390180644 * T2y); R0[WS(rs, 11)] = FNMS(KP1_111140466, T2t, KP1_662939224 * T2w); R0[WS(rs, 7)] = FMA(KP390180644, T2x, KP1_961570560 * T2y); } { E T2D, T2J, T2I, T2K; { E T2z, T2C, T2G, T2H; T2z = T7 - Te; T2C = T2A - T2B; T2D = T2z + T2C; T2J = T2z - T2C; T2G = T2E + T2F; T2H = Tm - Tt; T2I = T2G - T2H; T2K = T2H + T2G; } R0[WS(rs, 2)] = FMA(KP1_847759065, T2D, KP765366864 * T2I); R0[WS(rs, 14)] = FNMS(KP1_847759065, T2J, KP765366864 * T2K); R0[WS(rs, 10)] = FNMS(KP765366864, T2D, KP1_847759065 * T2I); R0[WS(rs, 6)] = FMA(KP765366864, T2J, KP1_847759065 * T2K); } { E T19, T1n, T1m, T1o; { E TL, T18, T1c, T1l; TL = Tz + TK; T18 = TW + T17; T19 = TL + T18; T1n = TL - T18; T1c = T1a + T1b; T1l = T1f + T1k; T1m = T1c + T1l; T1o = T1c - T1l; } R1[0] = FNMS(KP196034280, T1m, KP1_990369453 * T19); R1[WS(rs, 12)] = FNMS(KP1_546020906, T1n, KP1_268786568 * T1o); R1[WS(rs, 8)] = -(FMA(KP196034280, T19, KP1_990369453 * T1m)); R1[WS(rs, 4)] = FMA(KP1_268786568, T1n, KP1_546020906 * T1o); } { E T1r, T1v, T1u, T1w; { E T1p, T1q, T1s, T1t; T1p = Tz - TK; T1q = T1b - T1a; T1r = T1p + T1q; T1v = T1p - T1q; T1s = T1f - T1k; T1t = TW - T17; T1u = T1s - T1t; T1w = T1t + T1s; } R1[WS(rs, 2)] = FMA(KP1_763842528, T1r, KP942793473 * T1u); R1[WS(rs, 14)] = FNMS(KP1_913880671, T1v, KP580569354 * T1w); R1[WS(rs, 10)] = FNMS(KP942793473, T1r, KP1_763842528 * T1u); R1[WS(rs, 6)] = FMA(KP580569354, T1v, KP1_913880671 * T1w); } { E T1T, T1X, T1W, T1Y; { E T1R, T1S, T1U, T1V; T1R = T1x + T1y; T1S = T1L + T1M; T1T = T1R - T1S; T1X = T1R + T1S; T1U = T1J + T1I; T1V = T1C - T1F; T1W = T1U - T1V; T1Y = T1V + T1U; } R1[WS(rs, 3)] = FMA(KP1_546020906, T1T, KP1_268786568 * T1W); R1[WS(rs, 15)] = FNMS(KP1_990369453, T1X, KP196034280 * T1Y); R1[WS(rs, 11)] = FNMS(KP1_268786568, T1T, KP1_546020906 * T1W); R1[WS(rs, 7)] = FMA(KP196034280, T1X, KP1_990369453 * T1Y); } { E T2f, T2p, T2o, T2q; { E T23, T2e, T2k, T2n; T23 = T1Z + T22; T2e = KP707106781 * (T28 + T2d); T2f = T23 + T2e; T2p = T23 - T2e; T2k = T2i - T2j; T2n = KP707106781 * (T2l + T2m); T2o = T2k - T2n; T2q = T2n + T2k; } R0[WS(rs, 1)] = FMA(KP1_961570560, T2f, KP390180644 * T2o); R0[WS(rs, 13)] = FNMS(KP1_662939224, T2p, KP1_111140466 * T2q); R0[WS(rs, 9)] = FNMS(KP390180644, T2f, KP1_961570560 * T2o); R0[WS(rs, 5)] = FMA(KP1_111140466, T2p, KP1_662939224 * T2q); } { E T1H, T1P, T1O, T1Q; { E T1z, T1G, T1K, T1N; T1z = T1x - T1y; T1G = T1C + T1F; T1H = T1z + T1G; T1P = T1z - T1G; T1K = T1I - T1J; T1N = T1L - T1M; T1O = T1K - T1N; T1Q = T1N + T1K; } R1[WS(rs, 1)] = FMA(KP1_913880671, T1H, KP580569354 * T1O); R1[WS(rs, 13)] = FNMS(KP1_763842528, T1P, KP942793473 * T1Q); R1[WS(rs, 9)] = FNMS(KP580569354, T1H, KP1_913880671 * T1O); R1[WS(rs, 5)] = FMA(KP942793473, T1P, KP1_763842528 * T1Q); } } } }
static void r2cb_20(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) { DK(KP1_118033988, +1.118033988749894848204586834365638117720309180); DK(KP500000000, +0.500000000000000000000000000000000000000000000); DK(KP1_902113032, +1.902113032590307144232878666758764286811397268); DK(KP1_175570504, +1.175570504584946258337411909278145537195304875); DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); INT i; for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) { E T6, TF, Tm, Tt, TQ, T1n, T1f, T12, T1m, TV, T13, T1c, Td, Tk, Tl; E Ty, TD, TE, Tn, To, Tp, TG, TH, TI; { E T5, Ts, T3, Tq; { E T4, Tr, T1, T2; T4 = Cr[WS(csr, 5)]; T5 = KP2_000000000 * T4; Tr = Ci[WS(csi, 5)]; Ts = KP2_000000000 * Tr; T1 = Cr[0]; T2 = Cr[WS(csr, 10)]; T3 = T1 + T2; Tq = T1 - T2; } T6 = T3 - T5; TF = Tq - Ts; Tm = T3 + T5; Tt = Tq + Ts; } { E T9, Tu, TO, T1b, Tc, T1a, Tx, TP, Tg, Tz, TT, T1e, Tj, T1d, TC; E TU; { E T7, T8, TM, TN; T7 = Cr[WS(csr, 4)]; T8 = Cr[WS(csr, 6)]; T9 = T7 + T8; Tu = T7 - T8; TM = Ci[WS(csi, 4)]; TN = Ci[WS(csi, 6)]; TO = TM - TN; T1b = TM + TN; } { E Ta, Tb, Tv, Tw; Ta = Cr[WS(csr, 9)]; Tb = Cr[WS(csr, 1)]; Tc = Ta + Tb; T1a = Ta - Tb; Tv = Ci[WS(csi, 9)]; Tw = Ci[WS(csi, 1)]; Tx = Tv + Tw; TP = Tv - Tw; } { E Te, Tf, TR, TS; Te = Cr[WS(csr, 8)]; Tf = Cr[WS(csr, 2)]; Tg = Te + Tf; Tz = Te - Tf; TR = Ci[WS(csi, 8)]; TS = Ci[WS(csi, 2)]; TT = TR - TS; T1e = TR + TS; } { E Th, Ti, TA, TB; Th = Cr[WS(csr, 7)]; Ti = Cr[WS(csr, 3)]; Tj = Th + Ti; T1d = Th - Ti; TA = Ci[WS(csi, 7)]; TB = Ci[WS(csi, 3)]; TC = TA + TB; TU = TB - TA; } TQ = TO - TP; T1n = T1e - T1d; T1f = T1d + T1e; T12 = TP + TO; T1m = T1b - T1a; TV = TT - TU; T13 = TU + TT; T1c = T1a + T1b; Td = T9 - Tc; Tk = Tg - Tj; Tl = Td + Tk; Ty = Tu + Tx; TD = Tz - TC; TE = Ty + TD; Tn = T9 + Tc; To = Tg + Tj; Tp = Tn + To; TG = Tu - Tx; TH = Tz + TC; TI = TG + TH; } R0[WS(rs, 5)] = FMA(KP2_000000000, Tl, T6); R1[WS(rs, 7)] = FMA(KP2_000000000, TE, Tt); R1[WS(rs, 2)] = FMA(KP2_000000000, TI, TF); R0[0] = FMA(KP2_000000000, Tp, Tm); { E TW, TY, TL, TX, TJ, TK; TW = FNMS(KP1_902113032, TV, KP1_175570504 * TQ); TY = FMA(KP1_902113032, TQ, KP1_175570504 * TV); TJ = FNMS(KP500000000, Tl, T6); TK = KP1_118033988 * (Td - Tk); TL = TJ - TK; TX = TK + TJ; R0[WS(rs, 1)] = TL - TW; R0[WS(rs, 7)] = TX + TY; R0[WS(rs, 9)] = TL + TW; R0[WS(rs, 3)] = TX - TY; } { E T1g, T1i, T19, T1h, T17, T18; T1g = FNMS(KP1_902113032, T1f, KP1_175570504 * T1c); T1i = FMA(KP1_902113032, T1c, KP1_175570504 * T1f); T17 = FNMS(KP500000000, TI, TF); T18 = KP1_118033988 * (TG - TH); T19 = T17 - T18; T1h = T18 + T17; R1[WS(rs, 8)] = T19 - T1g; R1[WS(rs, 4)] = T1h + T1i; R1[WS(rs, 6)] = T19 + T1g; R1[0] = T1h - T1i; } { E T1o, T1q, T1l, T1p, T1j, T1k; T1o = FNMS(KP1_902113032, T1n, KP1_175570504 * T1m); T1q = FMA(KP1_902113032, T1m, KP1_175570504 * T1n); T1j = FNMS(KP500000000, TE, Tt); T1k = KP1_118033988 * (Ty - TD); T1l = T1j - T1k; T1p = T1k + T1j; R1[WS(rs, 3)] = T1l - T1o; R1[WS(rs, 9)] = T1p + T1q; R1[WS(rs, 1)] = T1l + T1o; R1[WS(rs, 5)] = T1p - T1q; } { E T14, T16, T11, T15, TZ, T10; T14 = FNMS(KP1_902113032, T13, KP1_175570504 * T12); T16 = FMA(KP1_902113032, T12, KP1_175570504 * T13); TZ = FNMS(KP500000000, Tp, Tm); T10 = KP1_118033988 * (Tn - To); T11 = TZ - T10; T15 = T10 + TZ; R0[WS(rs, 6)] = T11 - T14; R0[WS(rs, 2)] = T15 + T16; R0[WS(rs, 4)] = T11 + T14; R0[WS(rs, 8)] = T15 - T16; } } }
static void r2cb_20(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) { DK(KP1_902113032, +1.902113032590307144232878666758764286811397268); DK(KP1_118033988, +1.118033988749894848204586834365638117720309180); DK(KP500000000, +0.500000000000000000000000000000000000000000000); DK(KP618033988, +0.618033988749894848204586834365638117720309180); DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); INT i; for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) { E TY, T1o, T1m, T14, T12, TX, T1n, T1j, TZ, T13; { E Tr, TD, Tl, T5, T1a, T1l, T1d, T1k, TT, T10, TO, T11, TE, TF, Tk; E TI, TC, T1i, To, TG, T16; { E T4, Tq, T1, T2; T4 = Cr[WS(csr, 5)]; Tq = Ci[WS(csi, 5)]; T1 = Cr[0]; T2 = Cr[WS(csr, 10)]; { E Ts, T8, T19, TR, T18, Tb, TS, Tv, Tx, Tf, Ty, T1c, TM, T1b, Ti; E Tz, Tt, Tu, TN, TA; { E TP, TQ, T9, Ta; { E T6, T7, Tp, T3; T6 = Cr[WS(csr, 4)]; T7 = Cr[WS(csr, 6)]; TP = Ci[WS(csi, 4)]; Tp = T1 - T2; T3 = T1 + T2; Ts = T6 - T7; T8 = T6 + T7; Tr = FMA(KP2_000000000, Tq, Tp); TD = FNMS(KP2_000000000, Tq, Tp); Tl = FMA(KP2_000000000, T4, T3); T5 = FNMS(KP2_000000000, T4, T3); TQ = Ci[WS(csi, 6)]; } T9 = Cr[WS(csr, 9)]; Ta = Cr[WS(csr, 1)]; Tt = Ci[WS(csi, 9)]; T19 = TP + TQ; TR = TP - TQ; T18 = T9 - Ta; Tb = T9 + Ta; Tu = Ci[WS(csi, 1)]; } { E TK, TL, Td, Te, Tg, Th; Td = Cr[WS(csr, 8)]; Te = Cr[WS(csr, 2)]; TK = Ci[WS(csi, 8)]; TS = Tt - Tu; Tv = Tt + Tu; Tx = Td - Te; Tf = Td + Te; TL = Ci[WS(csi, 2)]; Tg = Cr[WS(csr, 7)]; Th = Cr[WS(csr, 3)]; Ty = Ci[WS(csi, 7)]; T1c = TK + TL; TM = TK - TL; T1b = Tg - Th; Ti = Tg + Th; Tz = Ci[WS(csi, 3)]; } T1a = T18 + T19; T1l = T19 - T18; T1d = T1b + T1c; T1k = T1c - T1b; TT = TR - TS; T10 = TS + TR; TN = Tz - Ty; TA = Ty + Tz; TO = TM - TN; T11 = TN + TM; { E Tm, Tc, Tj, Tn, Tw, TB; Tm = T8 + Tb; Tc = T8 - Tb; Tj = Tf - Ti; Tn = Tf + Ti; TE = Ts - Tv; Tw = Ts + Tv; TB = Tx - TA; TF = Tx + TA; Tk = Tc + Tj; TI = Tc - Tj; TC = Tw + TB; T1i = Tw - TB; TY = Tm - Tn; To = Tm + Tn; } } } R0[WS(rs, 5)] = FMA(KP2_000000000, Tk, T5); R1[WS(rs, 7)] = FMA(KP2_000000000, TC, Tr); TG = TE + TF; T16 = TE - TF; R0[0] = FMA(KP2_000000000, To, Tl); { E TU, TW, T1g, T1e, T15, TV, TJ, TH, T1h, T1f, T17; TU = FNMS(KP618033988, TT, TO); TW = FMA(KP618033988, TO, TT); R1[WS(rs, 2)] = FMA(KP2_000000000, TG, TD); TH = FNMS(KP500000000, Tk, T5); T1g = FNMS(KP618033988, T1a, T1d); T1e = FMA(KP618033988, T1d, T1a); T15 = FNMS(KP500000000, TG, TD); TV = FMA(KP1_118033988, TI, TH); TJ = FNMS(KP1_118033988, TI, TH); T1o = FMA(KP618033988, T1k, T1l); T1m = FNMS(KP618033988, T1l, T1k); R0[WS(rs, 3)] = FNMS(KP1_902113032, TW, TV); R0[WS(rs, 7)] = FMA(KP1_902113032, TW, TV); R0[WS(rs, 1)] = FMA(KP1_902113032, TU, TJ); R0[WS(rs, 9)] = FNMS(KP1_902113032, TU, TJ); T1f = FNMS(KP1_118033988, T16, T15); T17 = FMA(KP1_118033988, T16, T15); T1h = FNMS(KP500000000, TC, Tr); R1[WS(rs, 6)] = FNMS(KP1_902113032, T1g, T1f); R1[WS(rs, 8)] = FMA(KP1_902113032, T1g, T1f); R1[WS(rs, 4)] = FMA(KP1_902113032, T1e, T17); R1[0] = FNMS(KP1_902113032, T1e, T17); T14 = FNMS(KP618033988, T10, T11); T12 = FMA(KP618033988, T11, T10); TX = FNMS(KP500000000, To, Tl); T1n = FMA(KP1_118033988, T1i, T1h); T1j = FNMS(KP1_118033988, T1i, T1h); } } R1[WS(rs, 5)] = FNMS(KP1_902113032, T1o, T1n); R1[WS(rs, 9)] = FMA(KP1_902113032, T1o, T1n); R1[WS(rs, 3)] = FMA(KP1_902113032, T1m, T1j); R1[WS(rs, 1)] = FNMS(KP1_902113032, T1m, T1j); TZ = FMA(KP1_118033988, TY, TX); T13 = FNMS(KP1_118033988, TY, TX); R0[WS(rs, 4)] = FNMS(KP1_902113032, T14, T13); R0[WS(rs, 6)] = FMA(KP1_902113032, T14, T13); R0[WS(rs, 2)] = FMA(KP1_902113032, T12, TZ); R0[WS(rs, 8)] = FNMS(KP1_902113032, T12, TZ); } }
static void hc2cbdft_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) { DK(KP707106781, +0.707106781186547524400844362104849039284835938); INT m; for (m = mb, W = W + ((mb - 1) * 14); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 14, MAKE_VOLATILE_STRIDE(rs)) { E T7, T1d, T1h, Tl, TG, T14, T19, TO, Te, TL, T18, T15, TB, T1e, Tw; E T1i; { E T3, TC, Tk, TM, T6, Th, TF, TN; { E T1, T2, Ti, Tj; T1 = Rp[0]; T2 = Rm[WS(rs, 3)]; T3 = T1 + T2; TC = T1 - T2; Ti = Ip[0]; Tj = Im[WS(rs, 3)]; Tk = Ti + Tj; TM = Ti - Tj; } { E T4, T5, TD, TE; T4 = Rp[WS(rs, 2)]; T5 = Rm[WS(rs, 1)]; T6 = T4 + T5; Th = T4 - T5; TD = Ip[WS(rs, 2)]; TE = Im[WS(rs, 1)]; TF = TD + TE; TN = TD - TE; } T7 = T3 + T6; T1d = Tk - Th; T1h = TC + TF; Tl = Th + Tk; TG = TC - TF; T14 = T3 - T6; T19 = TM - TN; TO = TM + TN; } { E Ta, Tm, Tp, TJ, Td, Tr, Tu, TK; { E T8, T9, Tn, To; T8 = Rp[WS(rs, 1)]; T9 = Rm[WS(rs, 2)]; Ta = T8 + T9; Tm = T8 - T9; Tn = Ip[WS(rs, 1)]; To = Im[WS(rs, 2)]; Tp = Tn + To; TJ = Tn - To; } { E Tb, Tc, Ts, Tt; Tb = Rm[0]; Tc = Rp[WS(rs, 3)]; Td = Tb + Tc; Tr = Tb - Tc; Ts = Im[0]; Tt = Ip[WS(rs, 3)]; Tu = Ts + Tt; TK = Tt - Ts; } Te = Ta + Td; TL = TJ + TK; T18 = Ta - Td; T15 = TK - TJ; { E Tz, TA, Tq, Tv; Tz = Tm - Tp; TA = Tr - Tu; TB = KP707106781 * (Tz + TA); T1e = KP707106781 * (Tz - TA); Tq = Tm + Tp; Tv = Tr + Tu; Tw = KP707106781 * (Tq - Tv); T1i = KP707106781 * (Tq + Tv); } } { E Tf, TP, TI, TQ; Tf = T7 + Te; TP = TL + TO; { E Tx, TH, Tg, Ty; Tx = Tl + Tw; TH = TB + TG; Tg = W[0]; Ty = W[1]; TI = FMA(Tg, Tx, Ty * TH); TQ = FNMS(Ty, Tx, Tg * TH); } Rp[0] = Tf - TI; Ip[0] = TP + TQ; Rm[0] = Tf + TI; Im[0] = TQ - TP; } { E T1r, T1x, T1w, T1y; { E T1o, T1q, T1n, T1p; T1o = T14 - T15; T1q = T19 - T18; T1n = W[10]; T1p = W[11]; T1r = FNMS(T1p, T1q, T1n * T1o); T1x = FMA(T1p, T1o, T1n * T1q); } { E T1t, T1v, T1s, T1u; T1t = T1d - T1e; T1v = T1i + T1h; T1s = W[12]; T1u = W[13]; T1w = FMA(T1s, T1t, T1u * T1v); T1y = FNMS(T1u, T1t, T1s * T1v); } Rp[WS(rs, 3)] = T1r - T1w; Ip[WS(rs, 3)] = T1x + T1y; Rm[WS(rs, 3)] = T1r + T1w; Im[WS(rs, 3)] = T1y - T1x; } { E TV, T11, T10, T12; { E TS, TU, TR, TT; TS = T7 - Te; TU = TO - TL; TR = W[6]; TT = W[7]; TV = FNMS(TT, TU, TR * TS); T11 = FMA(TT, TS, TR * TU); } { E TX, TZ, TW, TY; TX = Tl - Tw; TZ = TG - TB; TW = W[8]; TY = W[9]; T10 = FMA(TW, TX, TY * TZ); T12 = FNMS(TY, TX, TW * TZ); } Rp[WS(rs, 2)] = TV - T10; Ip[WS(rs, 2)] = T11 + T12; Rm[WS(rs, 2)] = TV + T10; Im[WS(rs, 2)] = T12 - T11; } { E T1b, T1l, T1k, T1m; { E T16, T1a, T13, T17; T16 = T14 + T15; T1a = T18 + T19; T13 = W[2]; T17 = W[3]; T1b = FNMS(T17, T1a, T13 * T16); T1l = FMA(T17, T16, T13 * T1a); } { E T1f, T1j, T1c, T1g; T1f = T1d + T1e; T1j = T1h - T1i; T1c = W[4]; T1g = W[5]; T1k = FMA(T1c, T1f, T1g * T1j); T1m = FNMS(T1g, T1f, T1c * T1j); } Rp[WS(rs, 1)] = T1b - T1k; Ip[WS(rs, 1)] = T1l + T1m; Rm[WS(rs, 1)] = T1b + T1k; Im[WS(rs, 1)] = T1m - T1l; } } }
static void hc2cbdft_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) { DK(KP707106781, +0.707106781186547524400844362104849039284835938); INT m; for (m = mb, W = W + ((mb - 1) * 14); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 14, MAKE_VOLATILE_STRIDE(rs)) { E T1m, T1r, T1i, T1u, T1o, T1v, T1n, T1w, T1s; { E T1k, Tl, T1p, TE, TP, T1g, TM, T1b, T1f, T1a, TU, Tf, T1l, TH, Tw; E T1q; { E TA, T3, TN, Tk, Th, T6, TO, TD, Tb, Tm, Ta, TK, Tp, Tc, Ts; E Tt; { E T4, T5, TB, TC; { E T1, T2, Ti, Tj; T1 = Rp[0]; T2 = Rm[WS(rs, 3)]; Ti = Ip[0]; Tj = Im[WS(rs, 3)]; T4 = Rp[WS(rs, 2)]; TA = T1 - T2; T3 = T1 + T2; TN = Ti - Tj; Tk = Ti + Tj; T5 = Rm[WS(rs, 1)]; TB = Ip[WS(rs, 2)]; TC = Im[WS(rs, 1)]; } { E T8, T9, Tn, To; T8 = Rp[WS(rs, 1)]; Th = T4 - T5; T6 = T4 + T5; TO = TB - TC; TD = TB + TC; T9 = Rm[WS(rs, 2)]; Tn = Ip[WS(rs, 1)]; To = Im[WS(rs, 2)]; Tb = Rm[0]; Tm = T8 - T9; Ta = T8 + T9; TK = Tn - To; Tp = Tn + To; Tc = Rp[WS(rs, 3)]; Ts = Im[0]; Tt = Ip[WS(rs, 3)]; } } { E Tr, Td, Tu, TL, Te, T7; T1k = Tk - Th; Tl = Th + Tk; Tr = Tb - Tc; Td = Tb + Tc; TL = Tt - Ts; Tu = Ts + Tt; T1p = TA + TD; TE = TA - TD; TP = TN + TO; T1g = TN - TO; TM = TK + TL; T1b = TL - TK; T1f = Ta - Td; Te = Ta + Td; T1a = T3 - T6; T7 = T3 + T6; { E Tq, TF, TG, Tv; Tq = Tm + Tp; TF = Tm - Tp; TG = Tr - Tu; Tv = Tr + Tu; TU = T7 - Te; Tf = T7 + Te; T1l = TF - TG; TH = TF + TG; Tw = Tq - Tv; T1q = Tq + Tv; } } } { E TX, T10, T1c, T13, T1h, T1E, T1H, T1C, T1K, T1G, T1L, T1F; { E TQ, Tx, T1y, TI, Tg, Tz; TX = TP - TM; TQ = TM + TP; Tx = FMA(KP707106781, Tw, Tl); T10 = FNMS(KP707106781, Tw, Tl); T1c = T1a + T1b; T1y = T1a - T1b; T13 = FNMS(KP707106781, TH, TE); TI = FMA(KP707106781, TH, TE); Tg = W[0]; Tz = W[1]; { E T1B, T1A, T1x, T1J, T1z, T1D; { E TR, Ty, TS, TJ; T1B = T1g - T1f; T1h = T1f + T1g; T1A = W[11]; TR = Tg * TI; Ty = Tg * Tx; T1x = W[10]; T1J = T1A * T1y; TS = FNMS(Tz, Tx, TR); TJ = FMA(Tz, TI, Ty); T1z = T1x * T1y; T1m = FMA(KP707106781, T1l, T1k); T1E = FNMS(KP707106781, T1l, T1k); Im[0] = TS - TQ; Ip[0] = TQ + TS; Rm[0] = Tf + TJ; Rp[0] = Tf - TJ; T1H = FMA(KP707106781, T1q, T1p); T1r = FNMS(KP707106781, T1q, T1p); T1D = W[12]; } T1C = FNMS(T1A, T1B, T1z); T1K = FMA(T1x, T1B, T1J); T1G = W[13]; T1L = T1D * T1H; T1F = T1D * T1E; } } { E TY, T16, T12, T17, T11; { E TW, TT, T15, TV, TZ, T1M, T1I; TW = W[7]; T1M = FNMS(T1G, T1E, T1L); T1I = FMA(T1G, T1H, T1F); TT = W[6]; T15 = TW * TU; Im[WS(rs, 3)] = T1M - T1K; Ip[WS(rs, 3)] = T1K + T1M; Rm[WS(rs, 3)] = T1C + T1I; Rp[WS(rs, 3)] = T1C - T1I; TV = TT * TU; TZ = W[8]; TY = FNMS(TW, TX, TV); T16 = FMA(TT, TX, T15); T12 = W[9]; T17 = TZ * T13; T11 = TZ * T10; } { E T1e, T19, T1t, T1d, T1j, T18, T14; T1e = W[3]; T18 = FNMS(T12, T10, T17); T14 = FMA(T12, T13, T11); T19 = W[2]; T1t = T1e * T1c; Im[WS(rs, 2)] = T18 - T16; Ip[WS(rs, 2)] = T16 + T18; Rm[WS(rs, 2)] = TY + T14; Rp[WS(rs, 2)] = TY - T14; T1d = T19 * T1c; T1j = W[4]; T1i = FNMS(T1e, T1h, T1d); T1u = FMA(T19, T1h, T1t); T1o = W[5]; T1v = T1j * T1r; T1n = T1j * T1m; } } } } T1w = FNMS(T1o, T1m, T1v); T1s = FMA(T1o, T1r, T1n); Im[WS(rs, 1)] = T1w - T1u; Ip[WS(rs, 1)] = T1u + T1w; Rm[WS(rs, 1)] = T1i + T1s; Rp[WS(rs, 1)] = T1i - T1s; } }
static void t2_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) { DK(KP707106781, +0.707106781186547524400844362104849039284835938); { INT m; for (m = mb, W = W + (mb * 6); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 6, MAKE_VOLATILE_STRIDE(16, rs)) { E TS, T1m, TJ, T1l, T1k, Tw, T1w, T1u; { E T2, T3, Tl, Tn, T5, T4, Tm, Tr, T6; T2 = W[0]; T3 = W[2]; Tl = W[4]; Tn = W[5]; T5 = W[1]; T4 = T2 * T3; Tm = T2 * Tl; Tr = T2 * Tn; T6 = W[3]; { E T1, T1s, TG, Td, T1r, Tu, TY, Tk, TW, T18, T1d, TD, TH, TA, T13; E TE, T14; { E To, Ts, Tf, T7, T8, Ti, Tb, T9, Tc, TC, Ta, TF, TB, Tg, Th; E Tj; T1 = ri[0]; To = FMA(T5, Tn, Tm); Ts = FNMS(T5, Tl, Tr); Tf = FMA(T5, T6, T4); T7 = FNMS(T5, T6, T4); Ta = T2 * T6; T1s = ii[0]; T8 = ri[WS(rs, 4)]; TF = Tf * Tn; TB = Tf * Tl; Ti = FNMS(T5, T3, Ta); Tb = FMA(T5, T3, Ta); T9 = T7 * T8; Tc = ii[WS(rs, 4)]; TG = FNMS(Ti, Tl, TF); TC = FMA(Ti, Tn, TB); { E Tp, T1q, Tt, Tq, TX; Tp = ri[WS(rs, 6)]; Td = FMA(Tb, Tc, T9); T1q = T7 * Tc; Tt = ii[WS(rs, 6)]; Tq = To * Tp; Tg = ri[WS(rs, 2)]; T1r = FNMS(Tb, T8, T1q); TX = To * Tt; Tu = FMA(Ts, Tt, Tq); Th = Tf * Tg; Tj = ii[WS(rs, 2)]; TY = FNMS(Ts, Tp, TX); } { E TO, TQ, TN, TP, T1a, T1b; { E TK, TM, TL, T19, TV; TK = ri[WS(rs, 7)]; TM = ii[WS(rs, 7)]; Tk = FMA(Ti, Tj, Th); TV = Tf * Tj; TL = Tl * TK; T19 = Tl * TM; TO = ri[WS(rs, 3)]; TW = FNMS(Ti, Tg, TV); TQ = ii[WS(rs, 3)]; TN = FMA(Tn, TM, TL); TP = T3 * TO; T1a = FNMS(Tn, TK, T19); T1b = T3 * TQ; } { E Tx, Tz, Ty, T12, T1c, TR; Tx = ri[WS(rs, 1)]; TR = FMA(T6, TQ, TP); Tz = ii[WS(rs, 1)]; T1c = FNMS(T6, TO, T1b); Ty = T2 * Tx; T18 = TN - TR; TS = TN + TR; T12 = T2 * Tz; T1d = T1a - T1c; T1m = T1a + T1c; TD = ri[WS(rs, 5)]; TH = ii[WS(rs, 5)]; TA = FMA(T5, Tz, Ty); T13 = FNMS(T5, Tx, T12); TE = TC * TD; T14 = TC * TH; } } } { E Te, T1p, T1t, Tv; { E T1g, T10, T1z, T1B, T1A, T1j, T1C, T1f; { E T1x, T11, T16, T1y; { E TU, TZ, TI, T15; Te = T1 + Td; TU = T1 - Td; TZ = TW - TY; T1p = TW + TY; TI = FMA(TG, TH, TE); T15 = FNMS(TG, TD, T14); T1t = T1r + T1s; T1x = T1s - T1r; T1g = TU - TZ; T10 = TU + TZ; T11 = TA - TI; TJ = TA + TI; T1l = T13 + T15; T16 = T13 - T15; T1y = Tk - Tu; Tv = Tk + Tu; } { E T1i, T1e, T17, T1h; T1i = T18 + T1d; T1e = T18 - T1d; T17 = T11 + T16; T1h = T16 - T11; T1z = T1x - T1y; T1B = T1y + T1x; T1A = T1h + T1i; T1j = T1h - T1i; T1C = T1e - T17; T1f = T17 + T1e; } } ri[WS(rs, 7)] = FNMS(KP707106781, T1j, T1g); ii[WS(rs, 7)] = FNMS(KP707106781, T1C, T1B); ri[WS(rs, 1)] = FMA(KP707106781, T1f, T10); ri[WS(rs, 5)] = FNMS(KP707106781, T1f, T10); ii[WS(rs, 1)] = FMA(KP707106781, T1A, T1z); ii[WS(rs, 5)] = FNMS(KP707106781, T1A, T1z); ri[WS(rs, 3)] = FMA(KP707106781, T1j, T1g); ii[WS(rs, 3)] = FMA(KP707106781, T1C, T1B); } T1k = Te - Tv; Tw = Te + Tv; T1w = T1t - T1p; T1u = T1p + T1t; } } } { E TT, T1v, T1n, T1o; TT = TJ + TS; T1v = TS - TJ; T1n = T1l - T1m; T1o = T1l + T1m; ii[WS(rs, 2)] = T1v + T1w; ii[WS(rs, 6)] = T1w - T1v; ri[0] = Tw + TT; ri[WS(rs, 4)] = Tw - TT; ii[0] = T1o + T1u; ii[WS(rs, 4)] = T1u - T1o; ri[WS(rs, 2)] = T1k + T1n; ri[WS(rs, 6)] = T1k - T1n; } } } }
static void t2_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms) { DK(KP707106781, +0.707106781186547524400844362104849039284835938); { INT m; for (m = mb, W = W + (mb * 6); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 6, MAKE_VOLATILE_STRIDE(16, rs)) { E T2, T5, T3, T6, T8, Tc, Tg, Ti, Tl, Tm, Tn, Tz, Tp, Tx; { E T4, Tb, T7, Ta; T2 = W[0]; T5 = W[1]; T3 = W[2]; T6 = W[3]; T4 = T2 * T3; Tb = T5 * T3; T7 = T5 * T6; Ta = T2 * T6; T8 = T4 - T7; Tc = Ta + Tb; Tg = T4 + T7; Ti = Ta - Tb; Tl = W[4]; Tm = W[5]; Tn = FMA(T2, Tl, T5 * Tm); Tz = FNMS(Ti, Tl, Tg * Tm); Tp = FNMS(T5, Tl, T2 * Tm); Tx = FMA(Tg, Tl, Ti * Tm); } { E Tf, T1i, TL, T1d, TJ, T17, TV, TY, Ts, T1j, TO, T1a, TC, T16, TQ; E TT; { E T1, T1c, Te, T1b, T9, Td; T1 = ri[0]; T1c = ii[0]; T9 = ri[WS(rs, 4)]; Td = ii[WS(rs, 4)]; Te = FMA(T8, T9, Tc * Td); T1b = FNMS(Tc, T9, T8 * Td); Tf = T1 + Te; T1i = T1c - T1b; TL = T1 - Te; T1d = T1b + T1c; } { E TF, TW, TI, TX; { E TD, TE, TG, TH; TD = ri[WS(rs, 7)]; TE = ii[WS(rs, 7)]; TF = FMA(Tl, TD, Tm * TE); TW = FNMS(Tm, TD, Tl * TE); TG = ri[WS(rs, 3)]; TH = ii[WS(rs, 3)]; TI = FMA(T3, TG, T6 * TH); TX = FNMS(T6, TG, T3 * TH); } TJ = TF + TI; T17 = TW + TX; TV = TF - TI; TY = TW - TX; } { E Tk, TM, Tr, TN; { E Th, Tj, To, Tq; Th = ri[WS(rs, 2)]; Tj = ii[WS(rs, 2)]; Tk = FMA(Tg, Th, Ti * Tj); TM = FNMS(Ti, Th, Tg * Tj); To = ri[WS(rs, 6)]; Tq = ii[WS(rs, 6)]; Tr = FMA(Tn, To, Tp * Tq); TN = FNMS(Tp, To, Tn * Tq); } Ts = Tk + Tr; T1j = Tk - Tr; TO = TM - TN; T1a = TM + TN; } { E Tw, TR, TB, TS; { E Tu, Tv, Ty, TA; Tu = ri[WS(rs, 1)]; Tv = ii[WS(rs, 1)]; Tw = FMA(T2, Tu, T5 * Tv); TR = FNMS(T5, Tu, T2 * Tv); Ty = ri[WS(rs, 5)]; TA = ii[WS(rs, 5)]; TB = FMA(Tx, Ty, Tz * TA); TS = FNMS(Tz, Ty, Tx * TA); } TC = Tw + TB; T16 = TR + TS; TQ = Tw - TB; TT = TR - TS; } { E Tt, TK, T1f, T1g; Tt = Tf + Ts; TK = TC + TJ; ri[WS(rs, 4)] = Tt - TK; ri[0] = Tt + TK; { E T19, T1e, T15, T18; T19 = T16 + T17; T1e = T1a + T1d; ii[0] = T19 + T1e; ii[WS(rs, 4)] = T1e - T19; T15 = Tf - Ts; T18 = T16 - T17; ri[WS(rs, 6)] = T15 - T18; ri[WS(rs, 2)] = T15 + T18; } T1f = TJ - TC; T1g = T1d - T1a; ii[WS(rs, 2)] = T1f + T1g; ii[WS(rs, 6)] = T1g - T1f; { E T11, T1k, T14, T1h, T12, T13; T11 = TL - TO; T1k = T1i - T1j; T12 = TT - TQ; T13 = TV + TY; T14 = KP707106781 * (T12 - T13); T1h = KP707106781 * (T12 + T13); ri[WS(rs, 7)] = T11 - T14; ii[WS(rs, 5)] = T1k - T1h; ri[WS(rs, 3)] = T11 + T14; ii[WS(rs, 1)] = T1h + T1k; } { E TP, T1m, T10, T1l, TU, TZ; TP = TL + TO; T1m = T1j + T1i; TU = TQ + TT; TZ = TV - TY; T10 = KP707106781 * (TU + TZ); T1l = KP707106781 * (TZ - TU); ri[WS(rs, 5)] = TP - T10; ii[WS(rs, 7)] = T1m - T1l; ri[WS(rs, 1)] = TP + T10; ii[WS(rs, 3)] = T1l + T1m; } } } } } }
static void r2cfII_20(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) { DK(KP707106781, +0.707106781186547524400844362104849039284835938); DK(KP951056516, +0.951056516295153572116439333379382143405698634); DK(KP559016994, +0.559016994374947424102293417182819058860154590); DK(KP690983005, +0.690983005625052575897706582817180941139845410); DK(KP552786404, +0.552786404500042060718165266253744752911876328); DK(KP447213595, +0.447213595499957939281834733746255247088123672); DK(KP809016994, +0.809016994374947424102293417182819058860154590); DK(KP250000000, +0.250000000000000000000000000000000000000000000); DK(KP618033988, +0.618033988749894848204586834365638117720309180); DK(KP381966011, +0.381966011250105151795413165634361882279690820); { INT i; for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) { E Tv, TK, TN, Th, T1l, T1n, Ts, TH; { E Ti, T1d, T1f, T1e, T1g, T1p, TS, Tg, To, T8, T7, T19, T1r, T1k, Tx; E Tp, TX, Ty, TF, Tr, TV, Tz, TA, TI; { E Ta, Tb, Td, Te; Ti = R1[WS(rs, 2)]; T1d = R0[WS(rs, 5)]; Ta = R0[WS(rs, 9)]; Tb = R0[WS(rs, 1)]; Td = R0[WS(rs, 3)]; Te = R0[WS(rs, 7)]; { E T1, T2, T5, T3, T4, T1i, Tc, Tf; T1 = R0[0]; T1f = Ta + Tb; Tc = Ta - Tb; T1e = Td + Te; Tf = Td - Te; T2 = R0[WS(rs, 4)]; T5 = R0[WS(rs, 6)]; T1g = FMA(KP381966011, T1f, T1e); T1p = FMA(KP381966011, T1e, T1f); TS = FMA(KP618033988, Tc, Tf); Tg = FNMS(KP618033988, Tf, Tc); T3 = R0[WS(rs, 8)]; T4 = R0[WS(rs, 2)]; T1i = T2 + T5; { E Tj, Tu, Tm, Tt, Tn, Tq, TU; Tj = R1[WS(rs, 8)]; To = R1[WS(rs, 6)]; { E T6, T1j, Tk, Tl; T6 = T2 + T3 - T4 - T5; T8 = (T3 + T5 - T2) - T4; T1j = T3 + T4; Tk = R1[0]; Tl = R1[WS(rs, 4)]; T7 = FNMS(KP250000000, T6, T1); T19 = T1 + T6; T1r = FNMS(KP618033988, T1i, T1j); T1k = FMA(KP618033988, T1j, T1i); Tu = Tk - Tl; Tm = Tk + Tl; } Tt = To + Tj; Tx = R1[WS(rs, 7)]; Tn = Tj - Tm; Tp = Tj + Tm; Tv = FNMS(KP618033988, Tu, Tt); TX = FMA(KP618033988, Tt, Tu); Tq = FMA(KP809016994, Tp, To); TU = FMA(KP447213595, Tp, Tn); Ty = R1[WS(rs, 1)]; TF = R1[WS(rs, 3)]; Tr = FNMS(KP552786404, Tq, Tn); TV = FNMS(KP690983005, TU, To); Tz = R1[WS(rs, 5)]; TA = R1[WS(rs, 9)]; TI = TF + Ty; } } } { E T1w, TJ, TB, T1a; T1w = T1f + T1d - T1e; TJ = Tz - TA; TB = Tz + TA; T1a = Ti + To - Tp; { E T9, T12, TT, T15, TG, TD, T1s, T1u, TW, T11, T10, T1h; { E TE, TC, TR, T1b; T9 = FNMS(KP559016994, T8, T7); TR = FMA(KP559016994, T8, T7); TK = FMA(KP618033988, TJ, TI); T12 = FNMS(KP618033988, TI, TJ); TE = Ty - TB; TC = Ty + TB; TT = FMA(KP951056516, TS, TR); T15 = FNMS(KP951056516, TS, TR); TG = FNMS(KP552786404, TF, TE); T1b = TC - TF - Tx; { E TZ, T1q, T1c, T1x; TZ = FMA(KP447213595, TC, TE); TD = FMA(KP250000000, TC, Tx); T1q = FNMS(KP809016994, T1p, T1d); T1c = T1a + T1b; T1x = T1a - T1b; T10 = FNMS(KP690983005, TZ, TF); T1s = FNMS(KP951056516, T1r, T1q); T1u = FMA(KP951056516, T1r, T1q); Ci[WS(csi, 7)] = FMA(KP707106781, T1x, T1w); Ci[WS(csi, 2)] = FMS(KP707106781, T1x, T1w); Cr[WS(csr, 7)] = FMA(KP707106781, T1c, T19); Cr[WS(csr, 2)] = FNMS(KP707106781, T1c, T19); } } TW = FNMS(KP809016994, TV, Ti); T11 = FNMS(KP809016994, T10, Tx); T1h = FMA(KP809016994, T1g, T1d); { E T17, TY, T16, T13; T17 = FNMS(KP951056516, TX, TW); TY = FMA(KP951056516, TX, TW); T16 = FMA(KP951056516, T12, T11); T13 = FNMS(KP951056516, T12, T11); TN = FMA(KP951056516, Tg, T9); Th = FNMS(KP951056516, Tg, T9); { E T18, T1v, T1t, T14; T18 = T16 - T17; T1v = T17 + T16; T1t = TY + T13; T14 = TY - T13; Cr[WS(csr, 1)] = FMA(KP707106781, T18, T15); Cr[WS(csr, 8)] = FNMS(KP707106781, T18, T15); Ci[WS(csi, 3)] = FMA(KP707106781, T1v, T1u); Ci[WS(csi, 6)] = FMS(KP707106781, T1v, T1u); Ci[WS(csi, 1)] = FNMS(KP707106781, T1t, T1s); Ci[WS(csi, 8)] = -(FMA(KP707106781, T1t, T1s)); Cr[WS(csr, 3)] = FMA(KP707106781, T14, TT); Cr[WS(csr, 6)] = FNMS(KP707106781, T14, TT); T1l = FMA(KP951056516, T1k, T1h); T1n = FNMS(KP951056516, T1k, T1h); } } Ts = FNMS(KP559016994, Tr, Ti); TH = FNMS(KP559016994, TG, TD); } } } { E TO, Tw, TP, TL; TO = FMA(KP951056516, Tv, Ts); Tw = FNMS(KP951056516, Tv, Ts); TP = FMA(KP951056516, TK, TH); TL = FNMS(KP951056516, TK, TH); { E TQ, T1m, T1o, TM; TQ = TO - TP; T1m = TO + TP; T1o = Tw + TL; TM = Tw - TL; Cr[WS(csr, 4)] = FMA(KP707106781, TQ, TN); Cr[WS(csr, 5)] = FNMS(KP707106781, TQ, TN); Ci[WS(csi, 9)] = FNMS(KP707106781, T1m, T1l); Ci[0] = -(FMA(KP707106781, T1m, T1l)); Ci[WS(csi, 5)] = FNMS(KP707106781, T1o, T1n); Ci[WS(csi, 4)] = -(FMA(KP707106781, T1o, T1n)); Cr[0] = FMA(KP707106781, TM, Th); Cr[WS(csr, 9)] = FNMS(KP707106781, TM, Th); } } } } }
static void t1_8(float *ri, float *ii, const float *W, stride rs, INT mb, INT me, INT ms) { DK(KP707106781, +0.707106781186547524400844362104849039284835938); INT m; for (m = mb, W = W + (mb * 14); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 14, MAKE_VOLATILE_STRIDE(rs)) { E T1g, T1f, T1e, Tm, T1q, T1o, T1p, TN, T1h, T1i; { E T1, T1m, T1l, T7, TS, Tk, TQ, Te, To, Tr, T17, TM, T12, Tu, TW; E Tp, Tx, Tt, Tq, Tw; { E T3, T6, T2, T5; T1 = ri[0]; T1m = ii[0]; T3 = ri[WS(rs, 4)]; T6 = ii[WS(rs, 4)]; T2 = W[6]; T5 = W[7]; { E Ta, Td, T9, Tc; { E Tg, Tj, Ti, TR, Th, T1k, T4, Tf; Tg = ri[WS(rs, 6)]; Tj = ii[WS(rs, 6)]; T1k = T2 * T6; T4 = T2 * T3; Tf = W[10]; Ti = W[11]; T1l = FNMS(T5, T3, T1k); T7 = FMA(T5, T6, T4); TR = Tf * Tj; Th = Tf * Tg; Ta = ri[WS(rs, 2)]; Td = ii[WS(rs, 2)]; TS = FNMS(Ti, Tg, TR); Tk = FMA(Ti, Tj, Th); T9 = W[2]; Tc = W[3]; } { E TB, TE, TH, T13, TC, TK, TG, TD, TJ, TP, Tb, TA, Tn; TB = ri[WS(rs, 7)]; TE = ii[WS(rs, 7)]; TP = T9 * Td; Tb = T9 * Ta; TA = W[12]; TH = ri[WS(rs, 3)]; TQ = FNMS(Tc, Ta, TP); Te = FMA(Tc, Td, Tb); T13 = TA * TE; TC = TA * TB; TK = ii[WS(rs, 3)]; TG = W[4]; TD = W[13]; TJ = W[5]; { E T14, TF, T16, TL, T15, TI; To = ri[WS(rs, 1)]; T15 = TG * TK; TI = TG * TH; T14 = FNMS(TD, TB, T13); TF = FMA(TD, TE, TC); T16 = FNMS(TJ, TH, T15); TL = FMA(TJ, TK, TI); Tr = ii[WS(rs, 1)]; Tn = W[0]; T17 = T14 - T16; T1g = T14 + T16; TM = TF + TL; T12 = TF - TL; } Tu = ri[WS(rs, 5)]; TW = Tn * Tr; Tp = Tn * To; Tx = ii[WS(rs, 5)]; Tt = W[8]; Tq = W[1]; Tw = W[9]; } } } { E T8, T1j, T1n, Tz, T1a, TU, Tl, T1b, T1c, T1v, T1t, T1w, T19, T1u, T1d; { E T1r, T10, TV, T1s, T11, T18; { E TO, TX, Ts, TZ, Ty, TT, TY, Tv; T8 = T1 + T7; TO = T1 - T7; TY = Tt * Tx; Tv = Tt * Tu; TX = FNMS(Tq, To, TW); Ts = FMA(Tq, Tr, Tp); TZ = FNMS(Tw, Tu, TY); Ty = FMA(Tw, Tx, Tv); TT = TQ - TS; T1j = TQ + TS; T1n = T1l + T1m; T1r = T1m - T1l; T10 = TX - TZ; T1f = TX + TZ; Tz = Ts + Ty; TV = Ts - Ty; T1a = TO - TT; TU = TO + TT; T1s = Te - Tk; Tl = Te + Tk; } T1b = T10 - TV; T11 = TV + T10; T18 = T12 - T17; T1c = T12 + T17; T1v = T1s + T1r; T1t = T1r - T1s; T1w = T18 - T11; T19 = T11 + T18; } ii[WS(rs, 3)] = FMA(KP707106781, T1w, T1v); ii[WS(rs, 7)] = FNMS(KP707106781, T1w, T1v); ri[WS(rs, 1)] = FMA(KP707106781, T19, TU); ri[WS(rs, 5)] = FNMS(KP707106781, T19, TU); T1u = T1b + T1c; T1d = T1b - T1c; ii[WS(rs, 1)] = FMA(KP707106781, T1u, T1t); ii[WS(rs, 5)] = FNMS(KP707106781, T1u, T1t); ri[WS(rs, 3)] = FMA(KP707106781, T1d, T1a); ri[WS(rs, 7)] = FNMS(KP707106781, T1d, T1a); T1e = T8 - Tl; Tm = T8 + Tl; T1q = T1n - T1j; T1o = T1j + T1n; T1p = TM - Tz; TN = Tz + TM; } } ii[WS(rs, 2)] = T1p + T1q; ii[WS(rs, 6)] = T1q - T1p; ri[0] = Tm + TN; ri[WS(rs, 4)] = Tm - TN; T1h = T1f - T1g; T1i = T1f + T1g; ii[0] = T1i + T1o; ii[WS(rs, 4)] = T1o - T1i; ri[WS(rs, 2)] = T1e + T1h; ri[WS(rs, 6)] = T1e - T1h; } }
static void hc2cfdft2_16(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) { DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP414213562, +0.414213562373095048801688724209698078569671875); DK(KP707106781, +0.707106781186547524400844362104849039284835938); DK(KP500000000, +0.500000000000000000000000000000000000000000000); { INT m; for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(64, rs)) { E T4p, T4o, T4n, T4s; { E T1, T2, Tw, Ty, Th, T3, Tx, TE, Ti, TK, Tj, T4, T5; T1 = W[0]; T2 = W[2]; Tw = W[6]; Ty = W[7]; Th = W[4]; T3 = T1 * T2; Tx = T1 * Tw; TE = T1 * Ty; Ti = T1 * Th; TK = T2 * Th; Tj = W[5]; T4 = W[1]; T5 = W[3]; { E T1v, T2q, T1s, T2s, T38, T3T, T1Y, T3P, T17, T1h, T2x, T2v, T33, T3Q, T3S; E T1N, Tv, T3A, T2E, T3B, T3L, T2c, T3I, T2S, TW, T3E, T3J, T2n, T3D, T2J; E T3M, T2X; { E TF, Tk, Tz, TL, T6, TR, Tq, Tc, T2h, T25, T2k, T29, T1G, T1M, T2P; E T2R; { E T18, TY, T1d, T13, T1H, T1A, T1K, T1E, T37, T1R, T35, T1X; { E T1j, T1o, T1W, T1p, T1m, T1Q, T1U, T1q; { E T1k, T1l, T1S, T1T; { E T1t, T28, T24, T1D, T1z, T1u, TQ, Tp, Tb; T1t = Ip[0]; TQ = T2 * Tj; Tp = T1 * Tj; TF = FNMS(T4, Tw, TE); T1j = FMA(T4, Tj, Ti); Tk = FNMS(T4, Tj, Ti); Tz = FMA(T4, Ty, Tx); T18 = FNMS(T5, Tj, TK); TL = FMA(T5, Tj, TK); TY = FNMS(T4, T5, T3); T6 = FMA(T4, T5, T3); Tb = T1 * T5; TR = FNMS(T5, Th, TQ); T1d = FMA(T5, Th, TQ); Tq = FMA(T4, Th, Tp); T1o = FNMS(T4, Th, Tp); T28 = T6 * Tj; T24 = T6 * Th; T1D = TY * Tj; T1z = TY * Th; Tc = FNMS(T4, T2, Tb); T13 = FMA(T4, T2, Tb); T1u = Im[0]; T1k = Ip[WS(rs, 4)]; T2h = FMA(Tc, Tj, T24); T25 = FNMS(Tc, Tj, T24); T2k = FNMS(Tc, Th, T28); T29 = FMA(Tc, Th, T28); T1H = FNMS(T13, Tj, T1z); T1A = FMA(T13, Tj, T1z); T1K = FMA(T13, Th, T1D); T1E = FNMS(T13, Th, T1D); T1W = T1t + T1u; T1v = T1t - T1u; T1l = Im[WS(rs, 4)]; } T1S = Rm[0]; T1T = Rp[0]; T1p = Rp[WS(rs, 4)]; T1m = T1k - T1l; T1Q = T1k + T1l; T2q = T1T + T1S; T1U = T1S - T1T; T1q = Rm[WS(rs, 4)]; } { E T36, T1V, T1O, T1r, T1n, T1P, T34, T2r; T36 = T4 * T1U; T1V = T1 * T1U; T1O = T1q - T1p; T1r = T1p + T1q; T1n = T1j * T1m; T37 = FMA(T1, T1W, T36); T2r = T1j * T1r; T1P = Th * T1O; T34 = Tj * T1O; T1s = FNMS(T1o, T1r, T1n); T2s = FMA(T1o, T1m, T2r); T1R = FNMS(Tj, T1Q, T1P); T35 = FMA(Th, T1Q, T34); T1X = FNMS(T4, T1W, T1V); } } { E T1F, T11, T1e, T16, T1L, T1b, T1f, T1C, T2Z; { E T14, T15, TZ, T10, T19, T1a, T1B; TZ = Ip[WS(rs, 2)]; T10 = Im[WS(rs, 2)]; T38 = T35 + T37; T3T = T37 - T35; T1Y = T1R + T1X; T3P = T1X - T1R; T1F = TZ + T10; T11 = TZ - T10; T14 = Rp[WS(rs, 2)]; T15 = Rm[WS(rs, 2)]; T19 = Ip[WS(rs, 6)]; T1a = Im[WS(rs, 6)]; T1e = Rp[WS(rs, 6)]; T16 = T14 + T15; T1B = T15 - T14; T1L = T19 + T1a; T1b = T19 - T1a; T1f = Rm[WS(rs, 6)]; T1C = T1A * T1B; T2Z = T1E * T1B; } { E T1J, T31, T2u, T30, T32; { E T12, T1g, T1I, T1c, T2w; T12 = TY * T11; T1g = T1e + T1f; T1I = T1f - T1e; T1c = T18 * T1b; T17 = FNMS(T13, T16, T12); T2w = T18 * T1g; T1J = T1H * T1I; T31 = T1K * T1I; T1h = FNMS(T1d, T1g, T1c); T2x = FMA(T1d, T1b, T2w); } T2u = TY * T16; T30 = FMA(T1A, T1F, T2Z); T32 = FMA(T1H, T1L, T31); T1G = FNMS(T1E, T1F, T1C); T2v = FMA(T13, T11, T2u); T1M = FNMS(T1K, T1L, T1J); T33 = T30 + T32; T3Q = T30 - T32; } } } { E Tl, T22, T9, T20, Tf, T2O, Ta, T21, T2A, Tm, Tr, Ts; { E T7, T8, Td, Te; T7 = Ip[WS(rs, 1)]; T3S = T1G - T1M; T1N = T1G + T1M; T8 = Im[WS(rs, 1)]; Td = Rp[WS(rs, 1)]; Te = Rm[WS(rs, 1)]; Tl = Ip[WS(rs, 5)]; T22 = T7 + T8; T9 = T7 - T8; T20 = Td - Te; Tf = Td + Te; T2O = T2 * T22; Ta = T6 * T9; T21 = T2 * T20; T2A = T6 * Tf; Tm = Im[WS(rs, 5)]; Tr = Rp[WS(rs, 5)]; Ts = Rm[WS(rs, 5)]; } { E Tg, T2a, Tn, T26, T2Q, T27, T2C, T2B, Tu, Tt, To, T23, T2D, T2b; Tg = FNMS(Tc, Tf, Ta); T2a = Tl + Tm; Tn = Tl - Tm; T26 = Tr - Ts; Tt = Tr + Ts; T2Q = T25 * T2a; To = Tk * Tn; T27 = T25 * T26; T2C = Tk * Tt; T2B = FMA(Tc, T9, T2A); Tu = FNMS(Tq, Tt, To); T23 = FMA(T5, T22, T21); T2D = FMA(Tq, Tn, T2C); T2b = FMA(T29, T2a, T27); Tv = Tg + Tu; T3A = Tg - Tu; T2P = FNMS(T5, T20, T2O); T2E = T2B + T2D; T3B = T2B - T2D; T3L = T2b - T23; T2c = T23 + T2b; T2R = FNMS(T29, T26, T2Q); } } { E T2f, TC, T2T, TD, T2d, TI, TS, T2e, T2F, T2l, TO, TT; { E TG, TH, TA, TB, TM, TN; TA = Ip[WS(rs, 7)]; TB = Im[WS(rs, 7)]; TG = Rp[WS(rs, 7)]; T3I = T2R - T2P; T2S = T2P + T2R; T2f = TA + TB; TC = TA - TB; TH = Rm[WS(rs, 7)]; TM = Ip[WS(rs, 3)]; T2T = Tw * T2f; TD = Tz * TC; T2d = TG - TH; TI = TG + TH; TN = Im[WS(rs, 3)]; TS = Rp[WS(rs, 3)]; T2e = Tw * T2d; T2F = Tz * TI; T2l = TM + TN; TO = TM - TN; TT = Rm[WS(rs, 3)]; } { E TJ, T2V, TP, T2i, TU, T2G; TJ = FNMS(TF, TI, TD); T2V = T2h * T2l; TP = TL * TO; T2i = TS - TT; TU = TS + TT; T2G = FMA(TF, TC, T2F); { E T2g, T2j, TV, T2H; T2g = FMA(Ty, T2f, T2e); T2j = T2h * T2i; TV = FNMS(TR, TU, TP); T2H = TL * TU; { E T2U, T2m, T2I, T2W; T2U = FNMS(Ty, T2d, T2T); T2m = FMA(T2k, T2l, T2j); TW = TJ + TV; T3E = TJ - TV; T2I = FMA(TR, TO, T2H); T2W = FNMS(T2k, T2i, T2V); T3J = T2m - T2g; T2n = T2g + T2m; T3D = T2G - T2I; T2J = T2G + T2I; T3M = T2U - T2W; T2X = T2U + T2W; } } } } } { E T3Y, T3x, T3X, T3y, T3r, T3q, T3p, T3u; { E T2Y, T3o, TX, T3s, T3i, T39, T3t, T3l, T3e, T1x, T2M, T2p, T3d, T2K, T2t; E T2y; { E T2o, T1Z, T3j, T3k, T1i, T1w, T3g, T3h; T2Y = T2S + T2X; T3g = T2X - T2S; T3h = T2c - T2n; T2o = T2c + T2n; T1Z = T1N + T1Y; T3j = T1Y - T1N; T3o = Tv - TW; TX = Tv + TW; T3s = T3g - T3h; T3i = T3g + T3h; T3k = T38 - T33; T39 = T33 + T38; T3Y = T17 - T1h; T1i = T17 + T1h; T1w = T1s + T1v; T3x = T1v - T1s; T3t = T3j + T3k; T3l = T3j - T3k; T3e = T1w - T1i; T1x = T1i + T1w; T2M = T2o + T1Z; T2p = T1Z - T2o; T3d = T2J - T2E; T2K = T2E + T2J; T3X = T2q - T2s; T2t = T2q + T2s; T2y = T2v + T2x; T3y = T2v - T2x; } { E T2N, T3c, T3a, T3n, T3b, T2L, T2z, T1y; T2N = T1x - TX; T1y = TX + T1x; T3c = T2Y + T39; T3a = T2Y - T39; T3n = T2t - T2y; T2z = T2t + T2y; Ip[0] = KP500000000 * (T1y + T2p); Im[WS(rs, 7)] = KP500000000 * (T2p - T1y); T3b = T2z + T2K; T2L = T2z - T2K; { E T3f, T3m, T3v, T3w; T3r = T3e - T3d; T3f = T3d + T3e; Im[WS(rs, 3)] = KP500000000 * (T3a - T2N); Ip[WS(rs, 4)] = KP500000000 * (T2N + T3a); Rp[WS(rs, 4)] = KP500000000 * (T2L + T2M); Rm[WS(rs, 3)] = KP500000000 * (T2L - T2M); Rp[0] = KP500000000 * (T3b + T3c); Rm[WS(rs, 7)] = KP500000000 * (T3b - T3c); T3m = T3i + T3l; T3q = T3l - T3i; T3p = T3n - T3o; T3v = T3n + T3o; T3w = T3s + T3t; T3u = T3s - T3t; Im[WS(rs, 5)] = -(KP500000000 * (FNMS(KP707106781, T3m, T3f))); Ip[WS(rs, 2)] = KP500000000 * (FMA(KP707106781, T3m, T3f)); Rp[WS(rs, 2)] = KP500000000 * (FMA(KP707106781, T3w, T3v)); Rm[WS(rs, 5)] = KP500000000 * (FNMS(KP707106781, T3w, T3v)); } } } { E T3R, T4b, T3z, T4q, T4g, T3U, T40, T41, T4r, T4j, T4m, T3G, T46, T3O, T4l; E T3Z, T4c; { E T3K, T3N, T4h, T4i, T3C, T3F, T4e, T4f; Rp[WS(rs, 6)] = KP500000000 * (FMA(KP707106781, T3q, T3p)); Rm[WS(rs, 1)] = KP500000000 * (FNMS(KP707106781, T3q, T3p)); Im[WS(rs, 1)] = -(KP500000000 * (FNMS(KP707106781, T3u, T3r))); Ip[WS(rs, 6)] = KP500000000 * (FMA(KP707106781, T3u, T3r)); T3K = T3I + T3J; T4e = T3I - T3J; T4f = T3M - T3L; T3N = T3L + T3M; T3R = T3P - T3Q; T4h = T3Q + T3P; T4b = T3y + T3x; T3z = T3x - T3y; T4q = FNMS(KP414213562, T4e, T4f); T4g = FMA(KP414213562, T4f, T4e); T4i = T3T - T3S; T3U = T3S + T3T; T40 = T3B + T3A; T3C = T3A - T3B; T3F = T3D + T3E; T41 = T3D - T3E; T4r = FNMS(KP414213562, T4h, T4i); T4j = FMA(KP414213562, T4i, T4h); T4m = T3C - T3F; T3G = T3C + T3F; T46 = FNMS(KP414213562, T3K, T3N); T3O = FMA(KP414213562, T3N, T3K); T4l = T3X - T3Y; T3Z = T3X + T3Y; } { E T45, T3H, T42, T47, T3V; T45 = FNMS(KP707106781, T3G, T3z); T3H = FMA(KP707106781, T3G, T3z); T4c = T41 - T40; T42 = T40 + T41; T47 = FMA(KP414213562, T3R, T3U); T3V = FNMS(KP414213562, T3U, T3R); { E T49, T43, T48, T4a, T44, T3W; T49 = FMA(KP707106781, T42, T3Z); T43 = FNMS(KP707106781, T42, T3Z); T48 = T46 - T47; T4a = T46 + T47; T44 = T3V - T3O; T3W = T3O + T3V; Rp[WS(rs, 1)] = KP500000000 * (FMA(KP923879532, T4a, T49)); Rm[WS(rs, 6)] = KP500000000 * (FNMS(KP923879532, T4a, T49)); Rp[WS(rs, 5)] = KP500000000 * (FMA(KP923879532, T44, T43)); Rm[WS(rs, 2)] = KP500000000 * (FNMS(KP923879532, T44, T43)); Im[WS(rs, 6)] = -(KP500000000 * (FNMS(KP923879532, T3W, T3H))); Ip[WS(rs, 1)] = KP500000000 * (FMA(KP923879532, T3W, T3H)); Ip[WS(rs, 5)] = KP500000000 * (FMA(KP923879532, T48, T45)); Im[WS(rs, 2)] = -(KP500000000 * (FNMS(KP923879532, T48, T45))); } } { E T4d, T4k, T4t, T4u; T4p = FMA(KP707106781, T4c, T4b); T4d = FNMS(KP707106781, T4c, T4b); T4k = T4g - T4j; T4o = T4g + T4j; T4n = FMA(KP707106781, T4m, T4l); T4t = FNMS(KP707106781, T4m, T4l); T4u = T4q + T4r; T4s = T4q - T4r; Im[0] = -(KP500000000 * (FNMS(KP923879532, T4k, T4d))); Ip[WS(rs, 7)] = KP500000000 * (FMA(KP923879532, T4k, T4d)); Rm[0] = KP500000000 * (FMA(KP923879532, T4u, T4t)); Rp[WS(rs, 7)] = KP500000000 * (FNMS(KP923879532, T4u, T4t)); } } } } } Rp[WS(rs, 3)] = KP500000000 * (FMA(KP923879532, T4o, T4n)); Rm[WS(rs, 4)] = KP500000000 * (FNMS(KP923879532, T4o, T4n)); Im[WS(rs, 4)] = -(KP500000000 * (FNMS(KP923879532, T4s, T4p))); Ip[WS(rs, 3)] = KP500000000 * (FMA(KP923879532, T4s, T4p)); } } }
static void hc2r_15(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, INT v, INT ivs, INT ovs) { DK(KP1_118033988, +1.118033988749894848204586834365638117720309180); DK(KP1_902113032, +1.902113032590307144232878666758764286811397268); DK(KP1_175570504, +1.175570504584946258337411909278145537195304875); DK(KP500000000, +0.500000000000000000000000000000000000000000000); DK(KP866025403, +0.866025403784438646763723170752936183471402627); DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); DK(KP1_732050807, +1.732050807568877293527446341505872366942805254); INT i; for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs, MAKE_VOLATILE_STRIDE(ris), MAKE_VOLATILE_STRIDE(iis), MAKE_VOLATILE_STRIDE(os)) { E T3, Tu, Ti, TB, TZ, T10, TE, TG, TJ, Tn, Tv, Ts, Tw, T8, Td; E Te; { E Th, T1, T2, Tf, Tg; Tg = ii[WS(iis, 5)]; Th = KP1_732050807 * Tg; T1 = ri[0]; T2 = ri[WS(ris, 5)]; Tf = T1 - T2; T3 = FMA(KP2_000000000, T2, T1); Tu = Tf - Th; Ti = Tf + Th; } { E T4, TD, T9, TI, T5, T6, T7, Ta, Tb, Tc, Tr, TH, Tm, TC, Tj; E To; T4 = ri[WS(ris, 3)]; TD = ii[WS(iis, 3)]; T9 = ri[WS(ris, 6)]; TI = ii[WS(iis, 6)]; T5 = ri[WS(ris, 7)]; T6 = ri[WS(ris, 2)]; T7 = T5 + T6; Ta = ri[WS(ris, 4)]; Tb = ri[WS(ris, 1)]; Tc = Ta + Tb; { E Tp, Tq, Tk, Tl; Tp = ii[WS(iis, 4)]; Tq = ii[WS(iis, 1)]; Tr = KP866025403 * (Tp + Tq); TH = Tp - Tq; Tk = ii[WS(iis, 7)]; Tl = ii[WS(iis, 2)]; Tm = KP866025403 * (Tk - Tl); TC = Tk + Tl; } TB = KP866025403 * (T5 - T6); TZ = TD - TC; T10 = TI - TH; TE = FMA(KP500000000, TC, TD); TG = KP866025403 * (Ta - Tb); TJ = FMA(KP500000000, TH, TI); Tj = FNMS(KP500000000, T7, T4); Tn = Tj - Tm; Tv = Tj + Tm; To = FNMS(KP500000000, Tc, T9); Ts = To - Tr; Tw = To + Tr; T8 = T4 + T7; Td = T9 + Tc; Te = T8 + Td; } O[0] = FMA(KP2_000000000, Te, T3); { E T11, T13, TY, T12, TW, TX; T11 = FNMS(KP1_902113032, T10, KP1_175570504 * TZ); T13 = FMA(KP1_902113032, TZ, KP1_175570504 * T10); TW = FNMS(KP500000000, Te, T3); TX = KP1_118033988 * (T8 - Td); TY = TW - TX; T12 = TX + TW; O[WS(os, 12)] = TY - T11; O[WS(os, 9)] = T12 + T13; O[WS(os, 3)] = TY + T11; O[WS(os, 6)] = T12 - T13; } { E TP, Tt, TO, TT, TV, TR, TS, TU, TQ; TP = KP1_118033988 * (Tn - Ts); Tt = Tn + Ts; TO = FNMS(KP500000000, Tt, Ti); TR = TE - TB; TS = TJ - TG; TT = FNMS(KP1_902113032, TS, KP1_175570504 * TR); TV = FMA(KP1_902113032, TR, KP1_175570504 * TS); O[WS(os, 5)] = FMA(KP2_000000000, Tt, Ti); TU = TP + TO; O[WS(os, 11)] = TU - TV; O[WS(os, 14)] = TU + TV; TQ = TO - TP; O[WS(os, 2)] = TQ - TT; O[WS(os, 8)] = TQ + TT; } { E Tz, Tx, Ty, TL, TN, TF, TK, TM, TA; Tz = KP1_118033988 * (Tv - Tw); Tx = Tv + Tw; Ty = FNMS(KP500000000, Tx, Tu); TF = TB + TE; TK = TG + TJ; TL = FNMS(KP1_902113032, TK, KP1_175570504 * TF); TN = FMA(KP1_902113032, TF, KP1_175570504 * TK); O[WS(os, 10)] = FMA(KP2_000000000, Tx, Tu); TM = Tz + Ty; O[WS(os, 1)] = TM - TN; O[WS(os, 4)] = TM + TN; TA = Ty - Tz; O[WS(os, 7)] = TA - TL; O[WS(os, 13)] = TA + TL; } } }
static void hc2cfdft2_16(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) { DK(KP461939766, +0.461939766255643378064091594698394143411208313); DK(KP191341716, +0.191341716182544885864229992015199433380672281); DK(KP353553390, +0.353553390593273762200422181052424519642417969); DK(KP500000000, +0.500000000000000000000000000000000000000000000); { INT m; for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(64, rs)) { E T1, T4, T2, T5, T7, Td, T12, TY, Tk, Ti, Tm, T1l, T1b, TL, T1h; E Ts, TR, T17, Ty, Tz, TA, TE, T1L, T1Q, T1H, T1O, T24, T2d, T20, T2b; { E Tl, TP, Tq, TK, Tj, TQ, Tr, TJ; { E T3, Tc, T6, Tb; T1 = W[0]; T4 = W[1]; T2 = W[2]; T5 = W[3]; T3 = T1 * T2; Tc = T4 * T2; T6 = T4 * T5; Tb = T1 * T5; T7 = T3 + T6; Td = Tb - Tc; T12 = Tb + Tc; TY = T3 - T6; Tk = W[5]; Tl = T4 * Tk; TP = T2 * Tk; Tq = T1 * Tk; TK = T5 * Tk; Ti = W[4]; Tj = T1 * Ti; TQ = T5 * Ti; Tr = T4 * Ti; TJ = T2 * Ti; } Tm = Tj - Tl; T1l = Tq - Tr; T1b = TP + TQ; TL = TJ + TK; T1h = Tj + Tl; Ts = Tq + Tr; TR = TP - TQ; T17 = TJ - TK; Ty = W[6]; Tz = W[7]; TA = FMA(T1, Ty, T4 * Tz); TE = FNMS(T4, Ty, T1 * Tz); { E T1J, T1K, T1F, T1G; T1J = TY * Tk; T1K = T12 * Ti; T1L = T1J - T1K; T1Q = T1J + T1K; T1F = TY * Ti; T1G = T12 * Tk; T1H = T1F + T1G; T1O = T1F - T1G; } { E T22, T23, T1Y, T1Z; T22 = T7 * Tk; T23 = Td * Ti; T24 = T22 + T23; T2d = T22 - T23; T1Y = T7 * Ti; T1Z = Td * Tk; T20 = T1Y - T1Z; T2b = T1Y + T1Z; } } { E T1t, T3i, T2l, T3B, T1E, T3t, T2M, T3x, T1g, T3C, T2J, T3u, T1T, T3w, T2o; E T3j, Tx, T3b, T2C, T3q, T27, T3m, T2s, T3c, TW, T3f, T2F, T3n, T2g, T3p; E T2v, T3e; { E T1k, T1C, T1o, T1B, T1s, T1z, T1y, T2j, T1p, T2k; { E T1i, T1j, T1m, T1n; T1i = Ip[WS(rs, 4)]; T1j = Im[WS(rs, 4)]; T1k = T1i - T1j; T1C = T1i + T1j; T1m = Rp[WS(rs, 4)]; T1n = Rm[WS(rs, 4)]; T1o = T1m + T1n; T1B = T1m - T1n; } { E T1q, T1r, T1w, T1x; T1q = Ip[0]; T1r = Im[0]; T1s = T1q - T1r; T1z = T1q + T1r; T1w = Rm[0]; T1x = Rp[0]; T1y = T1w - T1x; T2j = T1x + T1w; } T1p = FNMS(T1l, T1o, T1h * T1k); T1t = T1p + T1s; T3i = T1s - T1p; T2k = FMA(T1h, T1o, T1l * T1k); T2l = T2j + T2k; T3B = T2j - T2k; { E T1A, T1D, T2K, T2L; T1A = FNMS(T4, T1z, T1 * T1y); T1D = FMA(Ti, T1B, Tk * T1C); T1E = T1A - T1D; T3t = T1D + T1A; T2K = FNMS(Tk, T1B, Ti * T1C); T2L = FMA(T4, T1y, T1 * T1z); T2M = T2K + T2L; T3x = T2L - T2K; } } { E T11, T1M, T15, T1I, T1a, T1R, T1e, T1P; { E TZ, T10, T13, T14; TZ = Ip[WS(rs, 2)]; T10 = Im[WS(rs, 2)]; T11 = TZ - T10; T1M = TZ + T10; T13 = Rp[WS(rs, 2)]; T14 = Rm[WS(rs, 2)]; T15 = T13 + T14; T1I = T13 - T14; } { E T18, T19, T1c, T1d; T18 = Ip[WS(rs, 6)]; T19 = Im[WS(rs, 6)]; T1a = T18 - T19; T1R = T18 + T19; T1c = Rp[WS(rs, 6)]; T1d = Rm[WS(rs, 6)]; T1e = T1c + T1d; T1P = T1c - T1d; } { E T16, T1f, T2H, T2I; T16 = FNMS(T12, T15, TY * T11); T1f = FNMS(T1b, T1e, T17 * T1a); T1g = T16 + T1f; T3C = T16 - T1f; T2H = FNMS(T1L, T1I, T1H * T1M); T2I = FNMS(T1Q, T1P, T1O * T1R); T2J = T2H + T2I; T3u = T2H - T2I; } { E T1N, T1S, T2m, T2n; T1N = FMA(T1H, T1I, T1L * T1M); T1S = FMA(T1O, T1P, T1Q * T1R); T1T = T1N + T1S; T3w = T1S - T1N; T2m = FMA(TY, T15, T12 * T11); T2n = FMA(T17, T1e, T1b * T1a); T2o = T2m + T2n; T3j = T2m - T2n; } } { E Ta, T1W, Tg, T1V, Tp, T25, Tv, T21; { E T8, T9, Te, Tf; T8 = Ip[WS(rs, 1)]; T9 = Im[WS(rs, 1)]; Ta = T8 - T9; T1W = T8 + T9; Te = Rp[WS(rs, 1)]; Tf = Rm[WS(rs, 1)]; Tg = Te + Tf; T1V = Te - Tf; } { E Tn, To, Tt, Tu; Tn = Ip[WS(rs, 5)]; To = Im[WS(rs, 5)]; Tp = Tn - To; T25 = Tn + To; Tt = Rp[WS(rs, 5)]; Tu = Rm[WS(rs, 5)]; Tv = Tt + Tu; T21 = Tt - Tu; } { E Th, Tw, T2A, T2B; Th = FNMS(Td, Tg, T7 * Ta); Tw = FNMS(Ts, Tv, Tm * Tp); Tx = Th + Tw; T3b = Th - Tw; T2A = FNMS(T5, T1V, T2 * T1W); T2B = FNMS(T24, T21, T20 * T25); T2C = T2A + T2B; T3q = T2A - T2B; } { E T1X, T26, T2q, T2r; T1X = FMA(T2, T1V, T5 * T1W); T26 = FMA(T20, T21, T24 * T25); T27 = T1X + T26; T3m = T26 - T1X; T2q = FMA(T7, Tg, Td * Ta); T2r = FMA(Tm, Tv, Ts * Tp); T2s = T2q + T2r; T3c = T2q - T2r; } } { E TD, T29, TH, T28, TO, T2e, TU, T2c; { E TB, TC, TF, TG; TB = Ip[WS(rs, 7)]; TC = Im[WS(rs, 7)]; TD = TB - TC; T29 = TB + TC; TF = Rp[WS(rs, 7)]; TG = Rm[WS(rs, 7)]; TH = TF + TG; T28 = TF - TG; } { E TM, TN, TS, TT; TM = Ip[WS(rs, 3)]; TN = Im[WS(rs, 3)]; TO = TM - TN; T2e = TM + TN; TS = Rp[WS(rs, 3)]; TT = Rm[WS(rs, 3)]; TU = TS + TT; T2c = TS - TT; } { E TI, TV, T2D, T2E; TI = FNMS(TE, TH, TA * TD); TV = FNMS(TR, TU, TL * TO); TW = TI + TV; T3f = TI - TV; T2D = FNMS(Tz, T28, Ty * T29); T2E = FNMS(T2d, T2c, T2b * T2e); T2F = T2D + T2E; T3n = T2D - T2E; } { E T2a, T2f, T2t, T2u; T2a = FMA(Ty, T28, Tz * T29); T2f = FMA(T2b, T2c, T2d * T2e); T2g = T2a + T2f; T3p = T2f - T2a; T2t = FMA(TA, TH, TE * TD); T2u = FMA(TL, TU, TR * TO); T2v = T2t + T2u; T3e = T2t - T2u; } } { E T1v, T2z, T2O, T2Q, T2i, T2y, T2x, T2P; { E TX, T1u, T2G, T2N; TX = Tx + TW; T1u = T1g + T1t; T1v = TX + T1u; T2z = T1u - TX; T2G = T2C + T2F; T2N = T2J + T2M; T2O = T2G - T2N; T2Q = T2G + T2N; } { E T1U, T2h, T2p, T2w; T1U = T1E - T1T; T2h = T27 + T2g; T2i = T1U - T2h; T2y = T2h + T1U; T2p = T2l + T2o; T2w = T2s + T2v; T2x = T2p - T2w; T2P = T2p + T2w; } Ip[0] = KP500000000 * (T1v + T2i); Rp[0] = KP500000000 * (T2P + T2Q); Im[WS(rs, 7)] = KP500000000 * (T2i - T1v); Rm[WS(rs, 7)] = KP500000000 * (T2P - T2Q); Rm[WS(rs, 3)] = KP500000000 * (T2x - T2y); Im[WS(rs, 3)] = KP500000000 * (T2O - T2z); Rp[WS(rs, 4)] = KP500000000 * (T2x + T2y); Ip[WS(rs, 4)] = KP500000000 * (T2z + T2O); } { E T2T, T35, T33, T39, T2W, T36, T2Z, T37; { E T2R, T2S, T31, T32; T2R = T2v - T2s; T2S = T1t - T1g; T2T = KP500000000 * (T2R + T2S); T35 = KP500000000 * (T2S - T2R); T31 = T2l - T2o; T32 = Tx - TW; T33 = KP500000000 * (T31 - T32); T39 = KP500000000 * (T31 + T32); } { E T2U, T2V, T2X, T2Y; T2U = T2F - T2C; T2V = T27 - T2g; T2W = T2U + T2V; T36 = T2U - T2V; T2X = T1T + T1E; T2Y = T2M - T2J; T2Z = T2X - T2Y; T37 = T2X + T2Y; } { E T30, T3a, T34, T38; T30 = KP353553390 * (T2W + T2Z); Ip[WS(rs, 2)] = T2T + T30; Im[WS(rs, 5)] = T30 - T2T; T3a = KP353553390 * (T36 + T37); Rm[WS(rs, 5)] = T39 - T3a; Rp[WS(rs, 2)] = T39 + T3a; T34 = KP353553390 * (T2Z - T2W); Rm[WS(rs, 1)] = T33 - T34; Rp[WS(rs, 6)] = T33 + T34; T38 = KP353553390 * (T36 - T37); Ip[WS(rs, 6)] = T35 + T38; Im[WS(rs, 1)] = T38 - T35; } } { E T3k, T3Q, T3Z, T3D, T3h, T40, T3X, T45, T3G, T3P, T3s, T3K, T3U, T44, T3z; E T3L; { E T3d, T3g, T3o, T3r; T3k = KP500000000 * (T3i - T3j); T3Q = KP500000000 * (T3j + T3i); T3Z = KP500000000 * (T3B - T3C); T3D = KP500000000 * (T3B + T3C); T3d = T3b - T3c; T3g = T3e + T3f; T3h = KP353553390 * (T3d + T3g); T40 = KP353553390 * (T3d - T3g); { E T3V, T3W, T3E, T3F; T3V = T3u + T3t; T3W = T3x - T3w; T3X = FNMS(KP461939766, T3W, KP191341716 * T3V); T45 = FMA(KP461939766, T3V, KP191341716 * T3W); T3E = T3c + T3b; T3F = T3e - T3f; T3G = KP353553390 * (T3E + T3F); T3P = KP353553390 * (T3F - T3E); } T3o = T3m + T3n; T3r = T3p - T3q; T3s = FMA(KP191341716, T3o, KP461939766 * T3r); T3K = FNMS(KP191341716, T3r, KP461939766 * T3o); { E T3S, T3T, T3v, T3y; T3S = T3n - T3m; T3T = T3q + T3p; T3U = FMA(KP461939766, T3S, KP191341716 * T3T); T44 = FNMS(KP461939766, T3T, KP191341716 * T3S); T3v = T3t - T3u; T3y = T3w + T3x; T3z = FNMS(KP191341716, T3y, KP461939766 * T3v); T3L = FMA(KP191341716, T3v, KP461939766 * T3y); } } { E T3l, T3A, T3N, T3O; T3l = T3h + T3k; T3A = T3s + T3z; Ip[WS(rs, 1)] = T3l + T3A; Im[WS(rs, 6)] = T3A - T3l; T3N = T3D + T3G; T3O = T3K + T3L; Rm[WS(rs, 6)] = T3N - T3O; Rp[WS(rs, 1)] = T3N + T3O; } { E T3H, T3I, T3J, T3M; T3H = T3D - T3G; T3I = T3z - T3s; Rm[WS(rs, 2)] = T3H - T3I; Rp[WS(rs, 5)] = T3H + T3I; T3J = T3k - T3h; T3M = T3K - T3L; Ip[WS(rs, 5)] = T3J + T3M; Im[WS(rs, 2)] = T3M - T3J; } { E T3R, T3Y, T47, T48; T3R = T3P + T3Q; T3Y = T3U + T3X; Ip[WS(rs, 3)] = T3R + T3Y; Im[WS(rs, 4)] = T3Y - T3R; T47 = T3Z + T40; T48 = T44 + T45; Rm[WS(rs, 4)] = T47 - T48; Rp[WS(rs, 3)] = T47 + T48; } { E T41, T42, T43, T46; T41 = T3Z - T40; T42 = T3X - T3U; Rm[0] = T41 - T42; Rp[WS(rs, 7)] = T41 + T42; T43 = T3Q - T3P; T46 = T44 - T45; Ip[WS(rs, 7)] = T43 + T46; Im[0] = T46 - T43; } } } } } }
static void r2cbIII_32(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs) { DK(KP534511135, +0.534511135950791641089685961295362908582039528); DK(KP1_763842528, +1.763842528696710059425513727320776699016885241); DK(KP303346683, +0.303346683607342391675883946941299872384187453); DK(KP1_913880671, +1.913880671464417729871595773960539938965698411); DK(KP098491403, +0.098491403357164253077197521291327432293052451); DK(KP1_990369453, +1.990369453344393772489673906218959843150949737); DK(KP820678790, +0.820678790828660330972281985331011598767386482); DK(KP1_546020906, +1.546020906725473921621813219516939601942082586); DK(KP1_847759065, +1.847759065022573512256366378793576573644833252); DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP668178637, +0.668178637919298919997757686523080761552472251); DK(KP1_662939224, +1.662939224605090474157576755235811513477121624); DK(KP198912367, +0.198912367379658006911597622644676228597850501); DK(KP1_961570560, +1.961570560806460898252364472268478073947867462); DK(KP707106781, +0.707106781186547524400844362104849039284835938); DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); DK(KP414213562, +0.414213562373095048801688724209698078569671875); { INT i; for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(128, rs), MAKE_VOLATILE_STRIDE(128, csr), MAKE_VOLATILE_STRIDE(128, csi)) { E T1N, T1K, T1Q, T1H, T1O, T1P; { E T1I, T1e, T1Z, T7, T2E, T2i, T1x, Tz, Te, T2j, T22, T2F, T1h, T1y, TK; E T1J, Tm, T2B, TX, Tp, T2m, T28, T1M, T1C, T1k, TW, TY, T2a, T14, T15; E Ts, TZ; { E TE, T1g, TJ, T1f; { E T4, Tv, T3, T2g, T1d, T5, Tw, Tx; { E T1, T2, T1b, T1c; T1 = Cr[0]; T2 = Cr[WS(csr, 15)]; T1b = Ci[0]; T1c = Ci[WS(csi, 15)]; T4 = Cr[WS(csr, 8)]; Tv = T1 - T2; T3 = T1 + T2; T2g = T1c - T1b; T1d = T1b + T1c; T5 = Cr[WS(csr, 7)]; Tw = Ci[WS(csi, 8)]; Tx = Ci[WS(csi, 7)]; } { E Tb, TA, Ta, T20, TD, Tc, TG, TH; { E T8, T9, TB, TC; T8 = Cr[WS(csr, 4)]; { E T1a, T6, T2h, Ty; T1a = T4 - T5; T6 = T4 + T5; T2h = Tx - Tw; Ty = Tw + Tx; T1I = T1a - T1d; T1e = T1a + T1d; T1Z = T3 - T6; T7 = T3 + T6; T2E = T2h + T2g; T2i = T2g - T2h; T1x = Tv + Ty; Tz = Tv - Ty; T9 = Cr[WS(csr, 11)]; } TB = Ci[WS(csi, 4)]; TC = Ci[WS(csi, 11)]; Tb = Cr[WS(csr, 3)]; TA = T8 - T9; Ta = T8 + T9; T20 = TC - TB; TD = TB + TC; Tc = Cr[WS(csr, 12)]; TG = Ci[WS(csi, 3)]; TH = Ci[WS(csi, 12)]; } { E TF, Td, T21, TI; TE = TA - TD; T1g = TA + TD; TF = Tb - Tc; Td = Tb + Tc; T21 = TG - TH; TI = TG + TH; Te = Ta + Td; T2j = Ta - Td; T22 = T20 - T21; T2F = T20 + T21; TJ = TF - TI; T1f = TF + TI; } } } { E TM, Ti, TN, T25, TU, TR, Tl, TO; { E TS, TT, Tg, Th, Tj, Tk; Tg = Cr[WS(csr, 2)]; Th = Cr[WS(csr, 13)]; T1h = T1f - T1g; T1y = T1g + T1f; TK = TE + TJ; T1J = TE - TJ; TM = Tg - Th; Ti = Tg + Th; TS = Ci[WS(csi, 2)]; TT = Ci[WS(csi, 13)]; Tj = Cr[WS(csr, 10)]; Tk = Cr[WS(csr, 5)]; TN = Ci[WS(csi, 10)]; T25 = TS - TT; TU = TS + TT; TR = Tj - Tk; Tl = Tj + Tk; TO = Ci[WS(csi, 5)]; } { E T12, T13, Tq, Tr; { E Tn, T1A, TV, T24, T26, TP, To, T27, T1B, TQ; Tn = Cr[WS(csr, 1)]; T1A = TR - TU; TV = TR + TU; T24 = Ti - Tl; Tm = Ti + Tl; T26 = TN - TO; TP = TN + TO; To = Cr[WS(csr, 14)]; T12 = Ci[WS(csi, 1)]; T27 = T25 - T26; T2B = T26 + T25; T1B = TM + TP; TQ = TM - TP; TX = Tn - To; Tp = Tn + To; T2m = T24 + T27; T28 = T24 - T27; T1M = FNMS(KP414213562, T1A, T1B); T1C = FMA(KP414213562, T1B, T1A); T1k = FMA(KP414213562, TQ, TV); TW = FNMS(KP414213562, TV, TQ); T13 = Ci[WS(csi, 14)]; } Tq = Cr[WS(csr, 6)]; Tr = Cr[WS(csr, 9)]; TY = Ci[WS(csi, 6)]; T2a = T13 - T12; T14 = T12 + T13; T15 = Tq - Tr; Ts = Tq + Tr; TZ = Ci[WS(csi, 9)]; } } } { E T1L, T1F, T23, T2n, T2k, T2e, T1p, T1t, T1s, T1i, T1o, T19, T1l, T1q; { E T2z, T2G, T2H, T2C, T1j, T17, T2r, T2s, T2u, T2v, T2K, T2D; { E T2L, T2d, T2l, T2O; { E Tf, T2N, Tu, T2M; { E T1D, T16, T29, Tt, T2b, T10; T2z = T7 - Te; Tf = T7 + Te; T1D = T15 + T14; T16 = T14 - T15; T29 = Tp - Ts; Tt = Tp + Ts; T2b = TY - TZ; T10 = TY + TZ; T2N = T2F + T2E; T2G = T2E - T2F; T2H = Tm - Tt; Tu = Tm + Tt; { E T2c, T2A, T1E, T11; T2c = T2a - T2b; T2A = T2b + T2a; T1E = TX + T10; T11 = TX - T10; T2L = Tf - Tu; T2d = T29 + T2c; T2l = T29 - T2c; T2C = T2A - T2B; T2M = T2B + T2A; T1L = FMA(KP414213562, T1D, T1E); T1F = FNMS(KP414213562, T1E, T1D); T1j = FMA(KP414213562, T11, T16); T17 = FNMS(KP414213562, T16, T11); T2O = T2M + T2N; } } R0[0] = KP2_000000000 * (Tf + Tu); R0[WS(rs, 8)] = KP2_000000000 * (T2N - T2M); } T23 = T1Z + T22; T2r = T1Z - T22; R0[WS(rs, 12)] = KP1_414213562 * (T2O - T2L); R0[WS(rs, 4)] = KP1_414213562 * (T2L + T2O); T2s = T2m + T2l; T2n = T2l - T2m; T2k = T2i - T2j; T2u = T2j + T2i; T2v = T28 - T2d; T2e = T28 + T2d; } { E T2y, T2t, T2x, T2w; T2y = FMA(KP707106781, T2s, T2r); T2t = FNMS(KP707106781, T2s, T2r); T2x = FMA(KP707106781, T2v, T2u); T2w = FNMS(KP707106781, T2v, T2u); R0[WS(rs, 7)] = KP1_961570560 * (FMA(KP198912367, T2y, T2x)); R0[WS(rs, 15)] = -(KP1_961570560 * (FNMS(KP198912367, T2x, T2y))); R0[WS(rs, 11)] = KP1_662939224 * (FNMS(KP668178637, T2t, T2w)); R0[WS(rs, 3)] = KP1_662939224 * (FMA(KP668178637, T2w, T2t)); T2K = T2z - T2C; T2D = T2z + T2C; } { E TL, T18, T2J, T2I; T1p = FNMS(KP707106781, TK, Tz); TL = FMA(KP707106781, TK, Tz); T18 = TW + T17; T1t = TW - T17; T1s = FMA(KP707106781, T1h, T1e); T1i = FNMS(KP707106781, T1h, T1e); T2J = T2H + T2G; T2I = T2G - T2H; T1o = FNMS(KP923879532, T18, TL); T19 = FMA(KP923879532, T18, TL); R0[WS(rs, 6)] = KP1_847759065 * (FMA(KP414213562, T2K, T2J)); R0[WS(rs, 14)] = -(KP1_847759065 * (FNMS(KP414213562, T2J, T2K))); R0[WS(rs, 10)] = KP1_847759065 * (FNMS(KP414213562, T2D, T2I)); R0[WS(rs, 2)] = KP1_847759065 * (FMA(KP414213562, T2I, T2D)); T1l = T1j - T1k; T1q = T1k + T1j; } } { E T1z, T1U, T1Y, T1T, T1V, T1G; { E T1w, T1r, T1n, T1m; T1n = FMA(KP923879532, T1l, T1i); T1m = FNMS(KP923879532, T1l, T1i); T1w = FMA(KP923879532, T1q, T1p); T1r = FNMS(KP923879532, T1q, T1p); R1[WS(rs, 4)] = -(KP1_546020906 * (FNMS(KP820678790, T1o, T1n))); R1[WS(rs, 12)] = -(KP1_546020906 * (FMA(KP820678790, T1n, T1o))); R1[WS(rs, 8)] = -(KP1_990369453 * (FMA(KP098491403, T19, T1m))); R1[0] = KP1_990369453 * (FNMS(KP098491403, T1m, T19)); { E T1R, T1S, T1v, T1u; T1z = FNMS(KP707106781, T1y, T1x); T1R = FMA(KP707106781, T1y, T1x); T1S = T1M + T1L; T1N = T1L - T1M; T1K = FNMS(KP707106781, T1J, T1I); T1U = FMA(KP707106781, T1J, T1I); T1v = FNMS(KP923879532, T1t, T1s); T1u = FMA(KP923879532, T1t, T1s); T1Y = FMA(KP923879532, T1S, T1R); T1T = FNMS(KP923879532, T1S, T1R); R1[WS(rs, 6)] = -(KP1_913880671 * (FNMS(KP303346683, T1w, T1v))); R1[WS(rs, 14)] = -(KP1_913880671 * (FMA(KP303346683, T1v, T1w))); R1[WS(rs, 10)] = -(KP1_763842528 * (FMA(KP534511135, T1r, T1u))); R1[WS(rs, 2)] = KP1_763842528 * (FNMS(KP534511135, T1u, T1r)); T1V = T1C + T1F; T1G = T1C - T1F; } } { E T2q, T2f, T1X, T1W, T2p, T2o; T1X = FMA(KP923879532, T1V, T1U); T1W = FNMS(KP923879532, T1V, T1U); T2q = FNMS(KP707106781, T2e, T23); T2f = FMA(KP707106781, T2e, T23); R1[WS(rs, 7)] = KP1_990369453 * (FMA(KP098491403, T1Y, T1X)); R1[WS(rs, 15)] = -(KP1_990369453 * (FNMS(KP098491403, T1X, T1Y))); R1[WS(rs, 11)] = KP1_546020906 * (FNMS(KP820678790, T1T, T1W)); R1[WS(rs, 3)] = KP1_546020906 * (FMA(KP820678790, T1W, T1T)); T2p = FNMS(KP707106781, T2n, T2k); T2o = FMA(KP707106781, T2n, T2k); T1Q = FNMS(KP923879532, T1G, T1z); T1H = FMA(KP923879532, T1G, T1z); R0[WS(rs, 5)] = KP1_662939224 * (FMA(KP668178637, T2q, T2p)); R0[WS(rs, 13)] = -(KP1_662939224 * (FNMS(KP668178637, T2p, T2q))); R0[WS(rs, 9)] = KP1_961570560 * (FNMS(KP198912367, T2f, T2o)); R0[WS(rs, 1)] = KP1_961570560 * (FMA(KP198912367, T2o, T2f)); } } } } T1O = FMA(KP923879532, T1N, T1K); T1P = FNMS(KP923879532, T1N, T1K); R1[WS(rs, 5)] = KP1_763842528 * (FMA(KP534511135, T1Q, T1P)); R1[WS(rs, 13)] = -(KP1_763842528 * (FNMS(KP534511135, T1P, T1Q))); R1[WS(rs, 9)] = KP1_913880671 * (FNMS(KP303346683, T1H, T1O)); R1[WS(rs, 1)] = KP1_913880671 * (FMA(KP303346683, T1O, T1H)); } } }
static void hc2cfdft_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) { DK(KP500000000, +0.500000000000000000000000000000000000000000000); { INT m; for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 6, MAKE_VOLATILE_STRIDE(16, rs)) { E Tc, Tr, Tk, Tx, T9, Ts, Tp, Tw; { E Ta, Tb, Tj, Tf, Tg, Th, Te, Ti; Ta = Ip[0]; Tb = Im[0]; Tj = Ta + Tb; Tf = Rm[0]; Tg = Rp[0]; Th = Tf - Tg; Tc = Ta - Tb; Tr = Tg + Tf; Te = W[0]; Ti = W[1]; Tk = FNMS(Ti, Tj, Te * Th); Tx = FMA(Ti, Th, Te * Tj); } { E T4, To, T8, Tm; { E T2, T3, T6, T7; T2 = Ip[WS(rs, 1)]; T3 = Im[WS(rs, 1)]; T4 = T2 - T3; To = T2 + T3; T6 = Rp[WS(rs, 1)]; T7 = Rm[WS(rs, 1)]; T8 = T6 + T7; Tm = T6 - T7; } { E T1, T5, Tl, Tn; T1 = W[2]; T5 = W[3]; T9 = FNMS(T5, T8, T1 * T4); Ts = FMA(T1, T8, T5 * T4); Tl = W[4]; Tn = W[5]; Tp = FMA(Tl, Tm, Tn * To); Tw = FNMS(Tn, Tm, Tl * To); } } { E Td, Tq, Tz, TA; Td = T9 + Tc; Tq = Tk - Tp; Ip[0] = KP500000000 * (Td + Tq); Im[WS(rs, 1)] = KP500000000 * (Tq - Td); Tz = Tr + Ts; TA = Tw + Tx; Rm[WS(rs, 1)] = KP500000000 * (Tz - TA); Rp[0] = KP500000000 * (Tz + TA); } { E Tt, Tu, Tv, Ty; Tt = Tr - Ts; Tu = Tp + Tk; Rm[0] = KP500000000 * (Tt - Tu); Rp[WS(rs, 1)] = KP500000000 * (Tt + Tu); Tv = Tc - T9; Ty = Tw - Tx; Ip[WS(rs, 1)] = KP500000000 * (Tv + Ty); Im[0] = KP500000000 * (Ty - Tv); } } } }
static void hb2_16(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) { DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP707106781, +0.707106781186547524400844362104849039284835938); DK(KP414213562, +0.414213562373095048801688724209698078569671875); { INT m; for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(32, rs)) { E Tv, Tw, T2z, T2C, TB, TF, Ty, Tz, T1V, TA, T2G, T3Q, T3C, T3g, T3L; E T30, T3m, T3z, T3w, T3s, T1X, T1Y, T2u, T2c, T2p, TE, TG, T1G, T1o, T1D; { E T3f, T3l, T2F, T3r, T2Z, T3v, TD, Tx; Tv = W[0]; Tw = W[2]; Tx = Tv * Tw; T2z = W[6]; T3f = Tv * T2z; T2C = W[7]; T3l = Tv * T2C; TB = W[4]; T2F = Tv * TB; T3r = Tw * TB; TF = W[5]; T2Z = Tv * TF; T3v = Tw * TF; Ty = W[1]; Tz = W[3]; TD = Tv * Tz; T1V = FMA(Ty, Tz, Tx); TA = FNMS(Ty, Tz, Tx); T2G = FNMS(Ty, TF, T2F); T3Q = FMA(Tz, TB, T3v); T3C = FNMS(Ty, TB, T2Z); T3g = FMA(Ty, T2C, T3f); T3L = FNMS(Tz, TF, T3r); T30 = FMA(Ty, TB, T2Z); T3m = FNMS(Ty, T2z, T3l); T3z = FMA(Ty, TF, T2F); T3w = FNMS(Tz, TB, T3v); T3s = FMA(Tz, TF, T3r); { E T1W, T2b, TC, T1n; T1W = T1V * TB; T2b = T1V * TF; T1X = FNMS(Ty, Tw, TD); T1Y = FNMS(T1X, TF, T1W); T2u = FNMS(T1X, TB, T2b); T2c = FMA(T1X, TB, T2b); T2p = FMA(T1X, TF, T1W); TC = TA * TB; T1n = TA * TF; TE = FMA(Ty, Tw, TD); TG = FNMS(TE, TF, TC); T1G = FNMS(TE, TB, T1n); T1o = FMA(TE, TB, T1n); T1D = FMA(TE, TF, TC); } } { E TL, T1Z, T2d, T1t, T31, T34, T3n, T3D, T3E, T3R, T1w, T20, Tf, T3M, T2L; E T3h, TW, T2e, T3G, T3H, T3N, T2Q, T36, T2V, T37, Tu, T3S, T18, T1z, T24; E T2g, T27, T2h, T1j, T1y; { E T3, TH, TU, T2I, T1s, T32, T6, T1p, Ta, TM, TK, T33, TP, T2J, Td; E TR; { E T1, T2, TS, TT; T1 = cr[0]; T2 = ci[WS(rs, 7)]; T3 = T1 + T2; TH = T1 - T2; TS = ci[WS(rs, 9)]; TT = cr[WS(rs, 14)]; TU = TS + TT; T2I = TS - TT; } { E T1q, T1r, T4, T5; T1q = ci[WS(rs, 15)]; T1r = cr[WS(rs, 8)]; T1s = T1q + T1r; T32 = T1q - T1r; T4 = cr[WS(rs, 4)]; T5 = ci[WS(rs, 3)]; T6 = T4 + T5; T1p = T4 - T5; } { E T8, T9, TI, TJ; T8 = cr[WS(rs, 2)]; T9 = ci[WS(rs, 5)]; Ta = T8 + T9; TM = T8 - T9; TI = ci[WS(rs, 11)]; TJ = cr[WS(rs, 12)]; TK = TI + TJ; T33 = TI - TJ; } { E TN, TO, Tb, Tc; TN = ci[WS(rs, 13)]; TO = cr[WS(rs, 10)]; TP = TN + TO; T2J = TN - TO; Tb = ci[WS(rs, 1)]; Tc = cr[WS(rs, 6)]; Td = Tb + Tc; TR = Tb - Tc; } TL = TH - TK; T1Z = TH + TK; T2d = T1s - T1p; T1t = T1p + T1s; T31 = Ta - Td; T34 = T32 - T33; T3n = T34 - T31; { E T1u, T1v, T7, Te; T3D = T32 + T33; T3E = T2J + T2I; T3R = T3D - T3E; T1u = TM + TP; T1v = TR + TU; T1w = T1u - T1v; T20 = T1u + T1v; T7 = T3 + T6; Te = Ta + Td; Tf = T7 + Te; T3M = T7 - Te; { E T2H, T2K, TQ, TV; T2H = T3 - T6; T2K = T2I - T2J; T2L = T2H + T2K; T3h = T2H - T2K; TQ = TM - TP; TV = TR - TU; TW = TQ + TV; T2e = TQ - TV; } } } { E Ti, T1e, T1c, T2N, T1h, T2O, Tl, T19, Tp, T13, T11, T2S, T16, T2T, Ts; E TY, T2M, T2P; { E Tg, Th, T1a, T1b; Tg = cr[WS(rs, 1)]; Th = ci[WS(rs, 6)]; Ti = Tg + Th; T1e = Tg - Th; T1a = ci[WS(rs, 14)]; T1b = cr[WS(rs, 9)]; T1c = T1a + T1b; T2N = T1a - T1b; } { E T1f, T1g, Tj, Tk; T1f = ci[WS(rs, 10)]; T1g = cr[WS(rs, 13)]; T1h = T1f + T1g; T2O = T1f - T1g; Tj = cr[WS(rs, 5)]; Tk = ci[WS(rs, 2)]; Tl = Tj + Tk; T19 = Tj - Tk; } { E Tn, To, TZ, T10; Tn = ci[0]; To = cr[WS(rs, 7)]; Tp = Tn + To; T13 = Tn - To; TZ = ci[WS(rs, 8)]; T10 = cr[WS(rs, 15)]; T11 = TZ + T10; T2S = TZ - T10; } { E T14, T15, Tq, Tr; T14 = ci[WS(rs, 12)]; T15 = cr[WS(rs, 11)]; T16 = T14 + T15; T2T = T14 - T15; Tq = cr[WS(rs, 3)]; Tr = ci[WS(rs, 4)]; Ts = Tq + Tr; TY = Tq - Tr; } T3G = T2N + T2O; T3H = T2S + T2T; T3N = T3H - T3G; T2M = Ti - Tl; T2P = T2N - T2O; T2Q = T2M - T2P; T36 = T2M + T2P; { E T2R, T2U, Tm, Tt; T2R = Tp - Ts; T2U = T2S - T2T; T2V = T2R + T2U; T37 = T2U - T2R; Tm = Ti + Tl; Tt = Tp + Ts; Tu = Tm + Tt; T3S = Tm - Tt; } { E T12, T17, T22, T23; T12 = TY - T11; T17 = T13 - T16; T18 = FNMS(KP414213562, T17, T12); T1z = FMA(KP414213562, T12, T17); T22 = T1c - T19; T23 = T1e + T1h; T24 = FNMS(KP414213562, T23, T22); T2g = FMA(KP414213562, T22, T23); } { E T25, T26, T1d, T1i; T25 = TY + T11; T26 = T13 + T16; T27 = FNMS(KP414213562, T26, T25); T2h = FMA(KP414213562, T25, T26); T1d = T19 + T1c; T1i = T1e - T1h; T1j = FMA(KP414213562, T1i, T1d); T1y = FNMS(KP414213562, T1d, T1i); } } cr[0] = Tf + Tu; { E T3B, T3K, T3F, T3I, T3J, T3A; T3A = Tf - Tu; T3B = T3z * T3A; T3K = T3C * T3A; T3F = T3D + T3E; T3I = T3G + T3H; T3J = T3F - T3I; ci[0] = T3F + T3I; ci[WS(rs, 8)] = FMA(T3z, T3J, T3K); cr[WS(rs, 8)] = FNMS(T3C, T3J, T3B); } { E T3O, T3P, T3T, T3U; T3O = T3M - T3N; T3P = T3L * T3O; T3T = T3R - T3S; T3U = T3L * T3T; cr[WS(rs, 12)] = FNMS(T3Q, T3T, T3P); ci[WS(rs, 12)] = FMA(T3Q, T3O, T3U); } { E T3V, T3W, T3X, T3Y; T3V = T3M + T3N; T3W = TA * T3V; T3X = T3S + T3R; T3Y = TA * T3X; cr[WS(rs, 4)] = FNMS(TE, T3X, T3W); ci[WS(rs, 4)] = FMA(TE, T3V, T3Y); } { E T3j, T3t, T3p, T3x, T3i, T3o; T3i = T37 - T36; T3j = FNMS(KP707106781, T3i, T3h); T3t = FMA(KP707106781, T3i, T3h); T3o = T2Q - T2V; T3p = FNMS(KP707106781, T3o, T3n); T3x = FMA(KP707106781, T3o, T3n); { E T3k, T3q, T3u, T3y; T3k = T3g * T3j; cr[WS(rs, 14)] = FNMS(T3m, T3p, T3k); T3q = T3g * T3p; ci[WS(rs, 14)] = FMA(T3m, T3j, T3q); T3u = T3s * T3t; cr[WS(rs, 6)] = FNMS(T3w, T3x, T3u); T3y = T3s * T3x; ci[WS(rs, 6)] = FMA(T3w, T3t, T3y); } } { E T2X, T3b, T39, T3d, T2W, T35, T38; T2W = T2Q + T2V; T2X = FNMS(KP707106781, T2W, T2L); T3b = FMA(KP707106781, T2W, T2L); T35 = T31 + T34; T38 = T36 + T37; T39 = FNMS(KP707106781, T38, T35); T3d = FMA(KP707106781, T38, T35); { E T2Y, T3a, T3c, T3e; T2Y = T2G * T2X; cr[WS(rs, 10)] = FNMS(T30, T39, T2Y); T3a = T30 * T2X; ci[WS(rs, 10)] = FMA(T2G, T39, T3a); T3c = T1V * T3b; cr[WS(rs, 2)] = FNMS(T1X, T3d, T3c); T3e = T1X * T3b; ci[WS(rs, 2)] = FMA(T1V, T3d, T3e); } } { E T29, T2l, T2j, T2n; { E T21, T28, T2f, T2i; T21 = FNMS(KP707106781, T20, T1Z); T28 = T24 + T27; T29 = FMA(KP923879532, T28, T21); T2l = FNMS(KP923879532, T28, T21); T2f = FMA(KP707106781, T2e, T2d); T2i = T2g - T2h; T2j = FNMS(KP923879532, T2i, T2f); T2n = FMA(KP923879532, T2i, T2f); } { E T2a, T2k, T2m, T2o; T2a = T1Y * T29; cr[WS(rs, 11)] = FNMS(T2c, T2j, T2a); T2k = T2c * T29; ci[WS(rs, 11)] = FMA(T1Y, T2j, T2k); T2m = Tw * T2l; cr[WS(rs, 3)] = FNMS(Tz, T2n, T2m); T2o = Tz * T2l; ci[WS(rs, 3)] = FMA(Tw, T2n, T2o); } } { E T1l, T1E, T1B, T1H; { E TX, T1k, T1x, T1A; TX = FNMS(KP707106781, TW, TL); T1k = T18 - T1j; T1l = FNMS(KP923879532, T1k, TX); T1E = FMA(KP923879532, T1k, TX); T1x = FNMS(KP707106781, T1w, T1t); T1A = T1y - T1z; T1B = FNMS(KP923879532, T1A, T1x); T1H = FMA(KP923879532, T1A, T1x); } { E T1m, T1C, T1F, T1I; T1m = TG * T1l; cr[WS(rs, 13)] = FNMS(T1o, T1B, T1m); T1C = T1o * T1l; ci[WS(rs, 13)] = FMA(TG, T1B, T1C); T1F = T1D * T1E; cr[WS(rs, 5)] = FNMS(T1G, T1H, T1F); T1I = T1G * T1E; ci[WS(rs, 5)] = FMA(T1D, T1H, T1I); } } { E T2s, T2A, T2x, T2D; { E T2q, T2r, T2v, T2w; T2q = FMA(KP707106781, T20, T1Z); T2r = T2g + T2h; T2s = FNMS(KP923879532, T2r, T2q); T2A = FMA(KP923879532, T2r, T2q); T2v = FNMS(KP707106781, T2e, T2d); T2w = T27 - T24; T2x = FMA(KP923879532, T2w, T2v); T2D = FNMS(KP923879532, T2w, T2v); } { E T2t, T2y, T2B, T2E; T2t = T2p * T2s; cr[WS(rs, 7)] = FNMS(T2u, T2x, T2t); T2y = T2p * T2x; ci[WS(rs, 7)] = FMA(T2u, T2s, T2y); T2B = T2z * T2A; cr[WS(rs, 15)] = FNMS(T2C, T2D, T2B); T2E = T2z * T2D; ci[WS(rs, 15)] = FMA(T2C, T2A, T2E); } } { E T1L, T1R, T1P, T1T; { E T1J, T1K, T1N, T1O; T1J = FMA(KP707106781, TW, TL); T1K = T1y + T1z; T1L = FNMS(KP923879532, T1K, T1J); T1R = FMA(KP923879532, T1K, T1J); T1N = FMA(KP707106781, T1w, T1t); T1O = T1j + T18; T1P = FNMS(KP923879532, T1O, T1N); T1T = FMA(KP923879532, T1O, T1N); } { E T1M, T1Q, T1S, T1U; T1M = TB * T1L; cr[WS(rs, 9)] = FNMS(TF, T1P, T1M); T1Q = TB * T1P; ci[WS(rs, 9)] = FMA(TF, T1L, T1Q); T1S = Tv * T1R; cr[WS(rs, 1)] = FNMS(Ty, T1T, T1S); T1U = Tv * T1T; ci[WS(rs, 1)] = FMA(Ty, T1R, T1U); } } } } } }
static void hc2cfdft_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) { DK(KP500000000, +0.500000000000000000000000000000000000000000000); { INT m; for (m = mb, W = W + ((mb - 1) * 6); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 6, MAKE_VOLATILE_STRIDE(16, rs)) { E Td, Tu, Tr, T4, Tm, To, T9, T5, TA, Tp, Tv, TD, T6, Tq; { E Tk, Tl, Tf, TC, Tj, T7, T8, T1, Tn, Tb, Tc; Tb = Ip[0]; Tc = Im[0]; { E Ti, Tg, Th, T2, T3; Tg = Rm[0]; Th = Rp[0]; Tk = W[1]; Tl = Tb + Tc; Td = Tb - Tc; Tu = Th + Tg; Ti = Tg - Th; Tf = W[0]; T2 = Ip[WS(rs, 1)]; T3 = Im[WS(rs, 1)]; TC = Tk * Ti; Tj = Tf * Ti; T7 = Rp[WS(rs, 1)]; Tr = T2 + T3; T4 = T2 - T3; T8 = Rm[WS(rs, 1)]; T1 = W[2]; Tn = W[4]; } Tm = FNMS(Tk, Tl, Tj); To = T7 - T8; T9 = T7 + T8; T5 = T1 * T4; TA = Tn * Tr; Tp = Tn * To; Tv = T1 * T9; TD = FMA(Tf, Tl, TC); T6 = W[3]; Tq = W[5]; } { E Tw, Ta, TB, Ts; Tw = FMA(T6, T4, Tv); Ta = FNMS(T6, T9, T5); TB = FNMS(Tq, To, TA); Ts = FMA(Tq, Tr, Tp); { E TF, Tx, Te, Tz; TF = Tu + Tw; Tx = Tu - Tw; Te = Ta + Td; Tz = Td - Ta; { E TG, TE, Tt, Ty; TG = TB + TD; TE = TB - TD; Tt = Tm - Ts; Ty = Ts + Tm; Im[0] = KP500000000 * (TE - Tz); Ip[WS(rs, 1)] = KP500000000 * (Tz + TE); Rp[0] = KP500000000 * (TF + TG); Rm[WS(rs, 1)] = KP500000000 * (TF - TG); Rp[WS(rs, 1)] = KP500000000 * (Tx + Ty); Rm[0] = KP500000000 * (Tx - Ty); Im[WS(rs, 1)] = KP500000000 * (Tt - Te); Ip[0] = KP500000000 * (Te + Tt); } } } } } }
static void r2hc_11(const R *I, R *ro, R *io, stride is, stride ros, stride ios, INT v, INT ivs, INT ovs) { DK(KP959492973, +0.959492973614497389890368057066327699062454848); DK(KP876768831, +0.876768831002589333891339807079336796764054852); DK(KP918985947, +0.918985947228994779780736114132655398124909697); DK(KP989821441, +0.989821441880932732376092037776718787376519372); DK(KP778434453, +0.778434453334651800608337670740821884709317477); DK(KP830830026, +0.830830026003772851058548298459246407048009821); DK(KP715370323, +0.715370323453429719112414662767260662417897278); DK(KP634356270, +0.634356270682424498893150776899916060542806975); DK(KP342584725, +0.342584725681637509502641509861112333758894680); DK(KP521108558, +0.521108558113202722944698153526659300680427422); INT i; for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(is), MAKE_VOLATILE_STRIDE(ros), MAKE_VOLATILE_STRIDE(ios)) { E T1, Tg, TF, TB, TI, TL, Tz, TA; { E T4, TC, TE, T7, TD, Ta, TS, TG, TJ, Td, TP, TM, Ty, Tq, Th; E Tt, Tl; T1 = I[0]; { E Tb, Tc, Tx, Tp; { E T2, T3, Te, Tf; T2 = I[WS(is, 1)]; T3 = I[WS(is, 10)]; Te = I[WS(is, 5)]; Tf = I[WS(is, 6)]; { E T5, T6, T8, T9; T5 = I[WS(is, 2)]; T4 = T2 + T3; TC = T3 - T2; Tg = Te + Tf; TE = Tf - Te; T6 = I[WS(is, 9)]; T8 = I[WS(is, 3)]; T9 = I[WS(is, 8)]; Tb = I[WS(is, 4)]; T7 = T5 + T6; TD = T5 - T6; Ta = T8 + T9; TF = T9 - T8; Tc = I[WS(is, 7)]; } } TS = FMA(KP521108558, TC, TD); TG = FMA(KP521108558, TF, TE); TJ = FMA(KP521108558, TE, TC); Td = Tb + Tc; TB = Tb - Tc; Tx = FNMS(KP342584725, Ta, T7); Tp = FNMS(KP342584725, T4, Ta); TP = FNMS(KP521108558, TB, TF); TM = FNMS(KP521108558, TD, TB); Ty = FNMS(KP634356270, Tx, Td); Tq = FNMS(KP634356270, Tp, Tg); Th = FNMS(KP342584725, Tg, Td); Tt = FNMS(KP342584725, Td, T4); Tl = FNMS(KP342584725, T7, Tg); } { E Tu, Ts, TN, Tv; { E Tm, TU, Tj, Ti, TT; TT = FMA(KP715370323, TS, TF); Ti = FNMS(KP634356270, Th, Ta); Tu = FNMS(KP634356270, Tt, T7); Tm = FNMS(KP634356270, Tl, T4); TU = FMA(KP830830026, TT, TB); Tj = FNMS(KP778434453, Ti, T7); { E Tk, TR, To, Tn, TQ, Tr; TQ = FMA(KP715370323, TP, TC); Tn = FNMS(KP778434453, Tm, Ta); io[WS(ios, 5)] = KP989821441 * (FMA(KP918985947, TU, TE)); Tk = FNMS(KP876768831, Tj, T4); TR = FNMS(KP830830026, TQ, TE); To = FNMS(KP876768831, Tn, Td); Tr = FNMS(KP778434453, Tq, Td); ro[WS(ros, 5)] = FNMS(KP959492973, Tk, T1); io[WS(ios, 4)] = KP989821441 * (FNMS(KP918985947, TR, TD)); ro[WS(ros, 4)] = FNMS(KP959492973, To, T1); Ts = FNMS(KP876768831, Tr, T7); } } TN = FNMS(KP715370323, TM, TE); Tv = FNMS(KP778434453, Tu, Tg); ro[0] = T1 + T4 + T7 + Ta + Td + Tg; ro[WS(ros, 3)] = FNMS(KP959492973, Ts, T1); { E TO, Tw, TH, TK; TO = FNMS(KP830830026, TN, TF); Tw = FNMS(KP876768831, Tv, Ta); TH = FMA(KP715370323, TG, TD); TK = FNMS(KP715370323, TJ, TB); io[WS(ios, 3)] = KP989821441 * (FNMS(KP918985947, TO, TC)); ro[WS(ros, 2)] = FNMS(KP959492973, Tw, T1); TI = FNMS(KP830830026, TH, TC); TL = FMA(KP830830026, TK, TD); Tz = FNMS(KP778434453, Ty, T4); } } } io[WS(ios, 2)] = KP989821441 * (FMA(KP918985947, TI, TB)); io[WS(ios, 1)] = KP989821441 * (FNMS(KP918985947, TL, TF)); TA = FNMS(KP876768831, Tz, Tg); ro[WS(ros, 1)] = FNMS(KP959492973, TA, T1); } }
static void hf_25(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) { DK(KP949179823, +0.949179823508441261575555465843363271711583843); DK(KP860541664, +0.860541664367944677098261680920518816412804187); DK(KP621716863, +0.621716863012209892444754556304102309693593202); DK(KP614372930, +0.614372930789563808870829930444362096004872855); DK(KP557913902, +0.557913902031834264187699648465567037992437152); DK(KP249506682, +0.249506682107067890488084201715862638334226305); DK(KP560319534, +0.560319534973832390111614715371676131169633784); DK(KP681693190, +0.681693190061530575150324149145440022633095390); DK(KP906616052, +0.906616052148196230441134447086066874408359177); DK(KP968479752, +0.968479752739016373193524836781420152702090879); DK(KP845997307, +0.845997307939530944175097360758058292389769300); DK(KP998026728, +0.998026728428271561952336806863450553336905220); DK(KP994076283, +0.994076283785401014123185814696322018529298887); DK(KP734762448, +0.734762448793050413546343770063151342619912334); DK(KP772036680, +0.772036680810363904029489473607579825330539880); DK(KP062914667, +0.062914667253649757225485955897349402364686947); DK(KP833417178, +0.833417178328688677408962550243238843138996060); DK(KP921177326, +0.921177326965143320250447435415066029359282231); DK(KP541454447, +0.541454447536312777046285590082819509052033189); DK(KP803003575, +0.803003575438660414833440593570376004635464850); DK(KP943557151, +0.943557151597354104399655195398983005179443399); DK(KP554608978, +0.554608978404018097464974850792216217022558774); DK(KP242145790, +0.242145790282157779872542093866183953459003101); DK(KP559154169, +0.559154169276087864842202529084232643714075927); DK(KP683113946, +0.683113946453479238701949862233725244439656928); DK(KP248028675, +0.248028675328619457762448260696444630363259177); DK(KP968583161, +0.968583161128631119490168375464735813836012403); DK(KP525970792, +0.525970792408939708442463226536226366643874659); DK(KP726211448, +0.726211448929902658173535992263577167607493062); DK(KP904730450, +0.904730450839922351881287709692877908104763647); DK(KP831864738, +0.831864738706457140726048799369896829771167132); DK(KP871714437, +0.871714437527667770979999223229522602943903653); DK(KP549754652, +0.549754652192770074288023275540779861653779767); DK(KP992114701, +0.992114701314477831049793042785778521453036709); DK(KP939062505, +0.939062505817492352556001843133229685779824606); DK(KP256756360, +0.256756360367726783319498520922669048172391148); DK(KP851038619, +0.851038619207379630836264138867114231259902550); DK(KP912575812, +0.912575812670962425556968549836277086778922727); DK(KP912018591, +0.912018591466481957908415381764119056233607330); DK(KP634619297, +0.634619297544148100711287640319130485732531031); DK(KP470564281, +0.470564281212251493087595091036643380879947982); DK(KP827271945, +0.827271945972475634034355757144307982555673741); DK(KP126329378, +0.126329378446108174786050455341811215027378105); DK(KP951056516, +0.951056516295153572116439333379382143405698634); DK(KP559016994, +0.559016994374947424102293417182819058860154590); DK(KP250000000, +0.250000000000000000000000000000000000000000000); DK(KP618033988, +0.618033988749894848204586834365638117720309180); { INT m; for (m = mb, W = W + ((mb - 1) * 48); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 48, MAKE_VOLATILE_STRIDE(50, rs)) { E T7i, T6o, T6m, T7o, T7m, T7h, T6n, T6f, T7j, T7n; { E T6W, T5G, T3Y, T3M, T7q, T70, T6V, T7P, Tt, T3L, T5T, T45, T5Q, T4c, T3G; E T2G, T5P, T49, T5S, T42, T65, T4H, T68, T4A, T2Z, T11, T67, T4x, T64, T4E; E T5Y, T4W, T61, T4P, T3d, T1z, T60, T4M, T5X, T4T, T3g, T1G, T3q, T4q, T4j; E T26, T3i, T1M, T3k, T1S; { E T3u, T2e, T3E, T44, T4b, T2E, T3w, T2k, T3y, T2q; { E T1, T6R, T3P, T7, T3W, Tq, T9, Tc, Tb, T3U, Tk, T3Q, Ta; { E T3, T6, T2, T5; T1 = cr[0]; T6R = ci[0]; T3 = cr[WS(rs, 5)]; T6 = ci[WS(rs, 5)]; T2 = W[8]; T5 = W[9]; { E Tm, Tp, To, T3V, Tn, T3O, T4, Tl; Tm = cr[WS(rs, 15)]; Tp = ci[WS(rs, 15)]; T3O = T2 * T6; T4 = T2 * T3; Tl = W[28]; To = W[29]; T3P = FNMS(T5, T3, T3O); T7 = FMA(T5, T6, T4); T3V = Tl * Tp; Tn = Tl * Tm; { E Tg, Tj, Tf, Ti, T3T, Th, T8; Tg = cr[WS(rs, 10)]; Tj = ci[WS(rs, 10)]; T3W = FNMS(To, Tm, T3V); Tq = FMA(To, Tp, Tn); Tf = W[18]; Ti = W[19]; T9 = cr[WS(rs, 20)]; Tc = ci[WS(rs, 20)]; T3T = Tf * Tj; Th = Tf * Tg; T8 = W[38]; Tb = W[39]; T3U = FNMS(Ti, Tg, T3T); Tk = FMA(Ti, Tj, Th); T3Q = T8 * Tc; Ta = T8 * T9; } } } { E T6T, T3X, T6Y, Tr, T3R, Td; T6T = T3U + T3W; T3X = T3U - T3W; T6Y = Tk - Tq; Tr = Tk + Tq; T3R = FNMS(Tb, T9, T3Q); Td = FMA(Tb, Tc, Ta); { E T3S, T6Z, Te, T6U, T6S, Ts; T3S = T3P - T3R; T6S = T3P + T3R; T6Z = T7 - Td; Te = T7 + Td; T6W = T6S - T6T; T6U = T6S + T6T; T5G = FNMS(KP618033988, T3S, T3X); T3Y = FMA(KP618033988, T3X, T3S); T3M = Te - Tr; Ts = Te + Tr; T7q = FMA(KP618033988, T6Y, T6Z); T70 = FNMS(KP618033988, T6Z, T6Y); T6V = FNMS(KP250000000, T6U, T6R); T7P = T6U + T6R; Tt = T1 + Ts; T3L = FNMS(KP250000000, Ts, T1); } } } { E T2g, T2j, T2m, T3v, T2h, T2p, T2l, T2i, T2o, T3x, T2n; { E T2a, T2d, T29, T2c; T2a = cr[WS(rs, 3)]; T2d = ci[WS(rs, 3)]; T29 = W[4]; T2c = W[5]; { E T2t, T2w, T2z, T3A, T2u, T2C, T2y, T2v, T2B, T3t, T2b, T2s, T2f; T2t = cr[WS(rs, 13)]; T2w = ci[WS(rs, 13)]; T3t = T29 * T2d; T2b = T29 * T2a; T2s = W[24]; T2z = cr[WS(rs, 18)]; T3u = FNMS(T2c, T2a, T3t); T2e = FMA(T2c, T2d, T2b); T3A = T2s * T2w; T2u = T2s * T2t; T2C = ci[WS(rs, 18)]; T2y = W[34]; T2v = W[25]; T2B = W[35]; { E T3B, T2x, T3D, T2D, T3C, T2A; T2g = cr[WS(rs, 8)]; T3C = T2y * T2C; T2A = T2y * T2z; T3B = FNMS(T2v, T2t, T3A); T2x = FMA(T2v, T2w, T2u); T3D = FNMS(T2B, T2z, T3C); T2D = FMA(T2B, T2C, T2A); T2j = ci[WS(rs, 8)]; T2f = W[14]; T3E = T3B + T3D; T44 = T3D - T3B; T4b = T2x - T2D; T2E = T2x + T2D; } T2m = cr[WS(rs, 23)]; T3v = T2f * T2j; T2h = T2f * T2g; T2p = ci[WS(rs, 23)]; T2l = W[44]; T2i = W[15]; T2o = W[45]; } } T3x = T2l * T2p; T2n = T2l * T2m; T3w = FNMS(T2i, T2g, T3v); T2k = FMA(T2i, T2j, T2h); T3y = FNMS(T2o, T2m, T3x); T2q = FMA(T2o, T2p, T2n); } { E T2N, Tz, T2X, T4G, T4z, TZ, T2P, TF, T2R, TL; { E TB, TE, TH, T2O, TC, TK, TG, TD, TJ, T2Q, TI; { E Tv, Ty, Tu, Tx; { E T48, T41, T47, T40, T43, T3z; Tv = cr[WS(rs, 1)]; T43 = T3y - T3w; T3z = T3w + T3y; { E T4a, T2r, T3F, T2F; T4a = T2k - T2q; T2r = T2k + T2q; T5T = FNMS(KP618033988, T43, T44); T45 = FMA(KP618033988, T44, T43); T3F = T3z + T3E; T48 = T3E - T3z; T5Q = FNMS(KP618033988, T4a, T4b); T4c = FMA(KP618033988, T4b, T4a); T2F = T2r + T2E; T41 = T2E - T2r; T3G = T3u + T3F; T47 = FNMS(KP250000000, T3F, T3u); T2G = T2e + T2F; T40 = FNMS(KP250000000, T2F, T2e); Ty = ci[WS(rs, 1)]; } T5P = FMA(KP559016994, T48, T47); T49 = FNMS(KP559016994, T48, T47); T5S = FMA(KP559016994, T41, T40); T42 = FNMS(KP559016994, T41, T40); Tu = W[0]; } Tx = W[1]; { E TO, TR, TU, T2T, TP, TX, TT, TQ, TW, T2M, Tw, TN, TA; TO = cr[WS(rs, 11)]; TR = ci[WS(rs, 11)]; T2M = Tu * Ty; Tw = Tu * Tv; TN = W[20]; TU = cr[WS(rs, 16)]; T2N = FNMS(Tx, Tv, T2M); Tz = FMA(Tx, Ty, Tw); T2T = TN * TR; TP = TN * TO; TX = ci[WS(rs, 16)]; TT = W[30]; TQ = W[21]; TW = W[31]; { E T2U, TS, T2W, TY, T2V, TV; TB = cr[WS(rs, 6)]; T2V = TT * TX; TV = TT * TU; T2U = FNMS(TQ, TO, T2T); TS = FMA(TQ, TR, TP); T2W = FNMS(TW, TU, T2V); TY = FMA(TW, TX, TV); TE = ci[WS(rs, 6)]; TA = W[10]; T2X = T2U + T2W; T4G = T2W - T2U; T4z = TY - TS; TZ = TS + TY; } TH = cr[WS(rs, 21)]; T2O = TA * TE; TC = TA * TB; TK = ci[WS(rs, 21)]; TG = W[40]; TD = W[11]; TJ = W[41]; } } T2Q = TG * TK; TI = TG * TH; T2P = FNMS(TD, TB, T2O); TF = FMA(TD, TE, TC); T2R = FNMS(TJ, TH, T2Q); TL = FMA(TJ, TK, TI); } { E T31, T17, T3b, T4V, T4O, T1x, T33, T1d, T35, T1j; { E T19, T1c, T1f, T32, T1a, T1i, T1e, T1b, T1h, T34, T1g; { E T13, T16, T12, T15; { E T4w, T4D, T4v, T4C, T4F, T2S; T13 = cr[WS(rs, 4)]; T4F = T2P - T2R; T2S = T2P + T2R; { E T4y, TM, T2Y, T10; T4y = TL - TF; TM = TF + TL; T65 = FMA(KP618033988, T4F, T4G); T4H = FNMS(KP618033988, T4G, T4F); T2Y = T2S + T2X; T4w = T2S - T2X; T68 = FNMS(KP618033988, T4y, T4z); T4A = FMA(KP618033988, T4z, T4y); T10 = TM + TZ; T4D = TM - TZ; T2Z = T2N + T2Y; T4v = FNMS(KP250000000, T2Y, T2N); T11 = Tz + T10; T4C = FNMS(KP250000000, T10, Tz); T16 = ci[WS(rs, 4)]; } T67 = FNMS(KP559016994, T4w, T4v); T4x = FMA(KP559016994, T4w, T4v); T64 = FNMS(KP559016994, T4D, T4C); T4E = FMA(KP559016994, T4D, T4C); T12 = W[6]; } T15 = W[7]; { E T1m, T1p, T1s, T37, T1n, T1v, T1r, T1o, T1u, T30, T14, T1l, T18; T1m = cr[WS(rs, 14)]; T1p = ci[WS(rs, 14)]; T30 = T12 * T16; T14 = T12 * T13; T1l = W[26]; T1s = cr[WS(rs, 19)]; T31 = FNMS(T15, T13, T30); T17 = FMA(T15, T16, T14); T37 = T1l * T1p; T1n = T1l * T1m; T1v = ci[WS(rs, 19)]; T1r = W[36]; T1o = W[27]; T1u = W[37]; { E T38, T1q, T3a, T1w, T39, T1t; T19 = cr[WS(rs, 9)]; T39 = T1r * T1v; T1t = T1r * T1s; T38 = FNMS(T1o, T1m, T37); T1q = FMA(T1o, T1p, T1n); T3a = FNMS(T1u, T1s, T39); T1w = FMA(T1u, T1v, T1t); T1c = ci[WS(rs, 9)]; T18 = W[16]; T3b = T38 + T3a; T4V = T3a - T38; T4O = T1w - T1q; T1x = T1q + T1w; } T1f = cr[WS(rs, 24)]; T32 = T18 * T1c; T1a = T18 * T19; T1i = ci[WS(rs, 24)]; T1e = W[46]; T1b = W[17]; T1h = W[47]; } } T34 = T1e * T1i; T1g = T1e * T1f; T33 = FNMS(T1b, T19, T32); T1d = FMA(T1b, T1c, T1a); T35 = FNMS(T1h, T1f, T34); T1j = FMA(T1h, T1i, T1g); } { E T1I, T1L, T1O, T3h, T1J, T1R, T1N, T1K, T1Q, T3j, T1P; { E T1C, T1F, T1B, T1E; { E T4L, T4S, T4K, T4R, T4U, T36; T1C = cr[WS(rs, 2)]; T4U = T35 - T33; T36 = T33 + T35; { E T4N, T1k, T3c, T1y; T4N = T1j - T1d; T1k = T1d + T1j; T5Y = FNMS(KP618033988, T4U, T4V); T4W = FMA(KP618033988, T4V, T4U); T3c = T36 + T3b; T4L = T3b - T36; T61 = FNMS(KP618033988, T4N, T4O); T4P = FMA(KP618033988, T4O, T4N); T1y = T1k + T1x; T4S = T1k - T1x; T3d = T31 + T3c; T4K = FNMS(KP250000000, T3c, T31); T1z = T17 + T1y; T4R = FNMS(KP250000000, T1y, T17); T1F = ci[WS(rs, 2)]; } T60 = FMA(KP559016994, T4L, T4K); T4M = FNMS(KP559016994, T4L, T4K); T5X = FNMS(KP559016994, T4S, T4R); T4T = FMA(KP559016994, T4S, T4R); T1B = W[2]; } T1E = W[3]; { E T1V, T1Y, T21, T3m, T1W, T24, T20, T1X, T23, T3f, T1D, T1U, T1H; T1V = cr[WS(rs, 12)]; T1Y = ci[WS(rs, 12)]; T3f = T1B * T1F; T1D = T1B * T1C; T1U = W[22]; T21 = cr[WS(rs, 17)]; T3g = FNMS(T1E, T1C, T3f); T1G = FMA(T1E, T1F, T1D); T3m = T1U * T1Y; T1W = T1U * T1V; T24 = ci[WS(rs, 17)]; T20 = W[32]; T1X = W[23]; T23 = W[33]; { E T3n, T1Z, T3p, T25, T3o, T22; T1I = cr[WS(rs, 7)]; T3o = T20 * T24; T22 = T20 * T21; T3n = FNMS(T1X, T1V, T3m); T1Z = FMA(T1X, T1Y, T1W); T3p = FNMS(T23, T21, T3o); T25 = FMA(T23, T24, T22); T1L = ci[WS(rs, 7)]; T1H = W[12]; T3q = T3n + T3p; T4q = T3n - T3p; T4j = T25 - T1Z; T26 = T1Z + T25; } T1O = cr[WS(rs, 22)]; T3h = T1H * T1L; T1J = T1H * T1I; T1R = ci[WS(rs, 22)]; T1N = W[42]; T1K = W[13]; T1Q = W[43]; } } T3j = T1N * T1R; T1P = T1N * T1O; T3i = FNMS(T1K, T1I, T3h); T1M = FMA(T1K, T1L, T1J); T3k = FNMS(T1Q, T1O, T3j); T1S = FMA(T1Q, T1R, T1P); } } } } { E T7Q, T5M, T5J, T7R, T5I, T5L, T7X, T7W, T5F, T6X, T5u, T7M, T7O, T5C, T5E; E T5t, T7J, T7N; { E T4r, T4k, T4h, T4o, T3K, T3I, T1A, T2H, T28; { E T3e, T4g, T4n, T4f, T4m, T3H, T4p, T3l; T7Q = T2Z + T3d; T3e = T2Z - T3d; T4p = T3k - T3i; T3l = T3i + T3k; { E T4i, T1T, T3r, T27, T3s; T4i = T1S - T1M; T1T = T1M + T1S; T5M = FMA(KP618033988, T4p, T4q); T4r = FNMS(KP618033988, T4q, T4p); T3r = T3l + T3q; T4g = T3q - T3l; T5J = FNMS(KP618033988, T4i, T4j); T4k = FMA(KP618033988, T4j, T4i); T27 = T1T + T26; T4n = T26 - T1T; T3s = T3g + T3r; T4f = FNMS(KP250000000, T3r, T3g); T28 = T1G + T27; T4m = FNMS(KP250000000, T27, T1G); T3H = T3s - T3G; T7R = T3s + T3G; } T5I = FMA(KP559016994, T4g, T4f); T4h = FNMS(KP559016994, T4g, T4f); T5L = FMA(KP559016994, T4n, T4m); T4o = FNMS(KP559016994, T4n, T4m); T3K = FNMS(KP618033988, T3e, T3H); T3I = FMA(KP618033988, T3H, T3e); } T1A = T11 + T1z; T7X = T1z - T11; T7W = T28 - T2G; T2H = T28 + T2G; { E T3Z, T5d, T7r, T7D, T5h, T5i, T5m, T5l, T59, T7K, T56, T7L, T7I, T7G, T52; E T50, T5w, T5g, T5q, T5A, T3N, T7p; T3N = FMA(KP559016994, T3M, T3L); T5F = FNMS(KP559016994, T3M, T3L); T6X = FNMS(KP559016994, T6W, T6V); T7p = FMA(KP559016994, T6W, T6V); { E T5o, T5p, T57, T4e, T4Y, T55, T4l, T4s, T4B, T5f, T5e, T4I; { E T46, T2K, T2J, T4d, T2I; T46 = FMA(KP951056516, T45, T42); T5o = FNMS(KP951056516, T45, T42); T2I = T1A + T2H; T2K = T1A - T2H; T3Z = FNMS(KP951056516, T3Y, T3N); T5d = FMA(KP951056516, T3Y, T3N); T7r = FNMS(KP951056516, T7q, T7p); T7D = FMA(KP951056516, T7q, T7p); cr[0] = Tt + T2I; T2J = FNMS(KP250000000, T2I, Tt); T5p = FNMS(KP951056516, T4c, T49); T4d = FMA(KP951056516, T4c, T49); { E T4Q, T4X, T2L, T3J; T4Q = FNMS(KP951056516, T4P, T4M); T5h = FMA(KP951056516, T4P, T4M); T5i = FNMS(KP951056516, T4W, T4T); T4X = FMA(KP951056516, T4W, T4T); T2L = FMA(KP559016994, T2K, T2J); T3J = FNMS(KP559016994, T2K, T2J); T57 = FMA(KP126329378, T46, T4d); T4e = FNMS(KP126329378, T4d, T46); cr[WS(rs, 5)] = FMA(KP951056516, T3I, T2L); ci[WS(rs, 4)] = FNMS(KP951056516, T3I, T2L); ci[WS(rs, 9)] = FMA(KP951056516, T3K, T3J); cr[WS(rs, 10)] = FNMS(KP951056516, T3K, T3J); T4Y = FMA(KP827271945, T4X, T4Q); T55 = FNMS(KP827271945, T4Q, T4X); } } T4l = FNMS(KP951056516, T4k, T4h); T5m = FMA(KP951056516, T4k, T4h); T5l = FNMS(KP951056516, T4r, T4o); T4s = FMA(KP951056516, T4r, T4o); T4B = FNMS(KP951056516, T4A, T4x); T5f = FMA(KP951056516, T4A, T4x); T5e = FMA(KP951056516, T4H, T4E); T4I = FNMS(KP951056516, T4H, T4E); { E T4u, T4Z, T4t, T58; T4t = FNMS(KP470564281, T4s, T4l); T58 = FMA(KP470564281, T4l, T4s); { E T4J, T54, T7E, T7F; T4J = FMA(KP634619297, T4I, T4B); T54 = FNMS(KP634619297, T4B, T4I); T59 = FNMS(KP912018591, T58, T57); T7E = FMA(KP912018591, T58, T57); T7K = FMA(KP912018591, T4t, T4e); T4u = FNMS(KP912018591, T4t, T4e); T56 = FMA(KP912575812, T55, T54); T7F = FNMS(KP912575812, T55, T54); T7L = FMA(KP912575812, T4Y, T4J); T4Z = FNMS(KP912575812, T4Y, T4J); T7I = FNMS(KP851038619, T7F, T7E); T7G = FMA(KP851038619, T7F, T7E); } T52 = FMA(KP851038619, T4Z, T4u); T50 = FNMS(KP851038619, T4Z, T4u); } T5w = FNMS(KP256756360, T5e, T5f); T5g = FMA(KP256756360, T5f, T5e); T5q = FMA(KP939062505, T5p, T5o); T5A = FNMS(KP939062505, T5o, T5p); } { E T5y, T7z, T5B, T7y, T7w, T7u, T5s; { E T5k, T5r, T5j, T5x; cr[WS(rs, 4)] = FNMS(KP992114701, T50, T3Z); T5j = FMA(KP634619297, T5i, T5h); T5x = FNMS(KP634619297, T5h, T5i); { E T5n, T5z, T7s, T7t; T5n = FMA(KP549754652, T5m, T5l); T5z = FNMS(KP549754652, T5l, T5m); T5y = FMA(KP871714437, T5x, T5w); T7s = FNMS(KP871714437, T5x, T5w); T7z = FNMS(KP871714437, T5j, T5g); T5k = FMA(KP871714437, T5j, T5g); T5B = FNMS(KP831864738, T5A, T5z); T7t = FMA(KP831864738, T5A, T5z); T7y = FNMS(KP831864738, T5q, T5n); T5r = FMA(KP831864738, T5q, T5n); T7w = FNMS(KP904730450, T7t, T7s); T7u = FMA(KP904730450, T7t, T7s); } ci[WS(rs, 20)] = FNMS(KP992114701, T7G, T7D); T5u = FNMS(KP904730450, T5r, T5k); T5s = FMA(KP904730450, T5r, T5k); } { E T5a, T5c, T7A, T7C, T7v, T53, T5b, T51, T7H, T7x, T7B; T5a = FNMS(KP726211448, T59, T56); T5c = FMA(KP525970792, T56, T59); ci[WS(rs, 23)] = FMA(KP968583161, T7u, T7r); cr[WS(rs, 1)] = FMA(KP968583161, T5s, T5d); T51 = FMA(KP248028675, T50, T3Z); T7A = FNMS(KP683113946, T7z, T7y); T7C = FMA(KP559154169, T7y, T7z); T7v = FNMS(KP242145790, T7u, T7r); T53 = FMA(KP554608978, T52, T51); T5b = FNMS(KP554608978, T52, T51); T7M = FNMS(KP525970792, T7L, T7K); T7O = FMA(KP726211448, T7K, T7L); ci[WS(rs, 10)] = FNMS(KP943557151, T5c, T5b); ci[WS(rs, 5)] = FMA(KP943557151, T5c, T5b); ci[0] = FMA(KP803003575, T5a, T53); cr[WS(rs, 9)] = FNMS(KP803003575, T5a, T53); T7x = FNMS(KP541454447, T7w, T7v); T7B = FMA(KP541454447, T7w, T7v); T7H = FMA(KP248028675, T7G, T7D); cr[WS(rs, 21)] = -(FMA(KP921177326, T7C, T7B)); ci[WS(rs, 18)] = FNMS(KP921177326, T7C, T7B); ci[WS(rs, 13)] = FMA(KP833417178, T7A, T7x); cr[WS(rs, 16)] = FMS(KP833417178, T7A, T7x); T5C = FMA(KP559154169, T5B, T5y); T5E = FNMS(KP683113946, T5y, T5B); T5t = FNMS(KP242145790, T5s, T5d); T7J = FNMS(KP554608978, T7I, T7H); T7N = FMA(KP554608978, T7I, T7H); } } } } { E T7Y, T80, T5v, T5D; cr[WS(rs, 24)] = -(FMA(KP803003575, T7O, T7N)); ci[WS(rs, 15)] = FNMS(KP803003575, T7O, T7N); cr[WS(rs, 19)] = FMS(KP943557151, T7M, T7J); cr[WS(rs, 14)] = -(FMA(KP943557151, T7M, T7J)); T5v = FMA(KP541454447, T5u, T5t); T5D = FNMS(KP541454447, T5u, T5t); cr[WS(rs, 11)] = FNMS(KP833417178, T5E, T5D); ci[WS(rs, 8)] = FMA(KP833417178, T5E, T5D); cr[WS(rs, 6)] = FMA(KP921177326, T5C, T5v); ci[WS(rs, 3)] = FNMS(KP921177326, T5C, T5v); T7Y = FMA(KP618033988, T7X, T7W); T80 = FNMS(KP618033988, T7W, T7X); { E T6t, T6p, T5H, T7d, T71, T6u, T6y, T6x, T6l, T7k, T6i, T7l, T7g, T6c, T6e; E T6s, T6L, T6J, T6C; { E T6A, T6B, T5O, T6j, T6h, T6a, T6q, T5R, T5U, T6r, T5Z, T62; { E T5K, T7U, T7T, T5N, T7S; T6t = FNMS(KP951056516, T5J, T5I); T5K = FMA(KP951056516, T5J, T5I); T7U = T7Q - T7R; T7S = T7Q + T7R; T6p = FNMS(KP951056516, T5G, T5F); T5H = FMA(KP951056516, T5G, T5F); T7d = FNMS(KP951056516, T70, T6X); T71 = FMA(KP951056516, T70, T6X); ci[WS(rs, 24)] = T7S + T7P; T7T = FNMS(KP250000000, T7S, T7P); T5N = FMA(KP951056516, T5M, T5L); T6u = FNMS(KP951056516, T5M, T5L); { E T66, T69, T7Z, T7V; T6A = FMA(KP951056516, T65, T64); T66 = FNMS(KP951056516, T65, T64); T69 = FMA(KP951056516, T68, T67); T6B = FNMS(KP951056516, T68, T67); T7Z = FMA(KP559016994, T7U, T7T); T7V = FNMS(KP559016994, T7U, T7T); T5O = FMA(KP062914667, T5N, T5K); T6j = FNMS(KP062914667, T5K, T5N); ci[WS(rs, 14)] = FMA(KP951056516, T7Y, T7V); cr[WS(rs, 15)] = FMS(KP951056516, T7Y, T7V); ci[WS(rs, 19)] = FMA(KP951056516, T80, T7Z); cr[WS(rs, 20)] = FMS(KP951056516, T80, T7Z); T6h = FNMS(KP939062505, T66, T69); T6a = FMA(KP939062505, T69, T66); } } T6q = FMA(KP951056516, T5Q, T5P); T5R = FNMS(KP951056516, T5Q, T5P); T5U = FNMS(KP951056516, T5T, T5S); T6r = FMA(KP951056516, T5T, T5S); T6y = FMA(KP951056516, T5Y, T5X); T5Z = FNMS(KP951056516, T5Y, T5X); T62 = FMA(KP951056516, T61, T60); T6x = FNMS(KP951056516, T61, T60); { E T5W, T6b, T6k, T5V; T6k = FMA(KP827271945, T5R, T5U); T5V = FNMS(KP827271945, T5U, T5R); { E T6g, T63, T7e, T7f; T6g = FMA(KP126329378, T5Z, T62); T63 = FNMS(KP126329378, T62, T5Z); T7e = FMA(KP772036680, T6k, T6j); T6l = FNMS(KP772036680, T6k, T6j); T5W = FMA(KP772036680, T5V, T5O); T7k = FNMS(KP772036680, T5V, T5O); T7f = FNMS(KP734762448, T6h, T6g); T6i = FMA(KP734762448, T6h, T6g); T6b = FNMS(KP734762448, T6a, T63); T7l = FMA(KP734762448, T6a, T63); T7g = FMA(KP994076283, T7f, T7e); T7i = FNMS(KP994076283, T7f, T7e); } T6c = FNMS(KP994076283, T6b, T5W); T6e = FMA(KP994076283, T6b, T5W); } T6s = FMA(KP062914667, T6r, T6q); T6L = FNMS(KP062914667, T6q, T6r); T6J = FNMS(KP549754652, T6A, T6B); T6C = FMA(KP549754652, T6B, T6A); } { E T6N, T78, T6K, T79, T74, T76, T6E, T6G; { E T6w, T6D, T6M, T6v; cr[WS(rs, 3)] = FMA(KP998026728, T6c, T5H); T6M = FNMS(KP634619297, T6t, T6u); T6v = FMA(KP634619297, T6u, T6t); { E T6I, T6z, T72, T73; T6I = FMA(KP470564281, T6x, T6y); T6z = FNMS(KP470564281, T6y, T6x); T72 = FMA(KP845997307, T6M, T6L); T6N = FNMS(KP845997307, T6M, T6L); T6w = FMA(KP845997307, T6v, T6s); T78 = FNMS(KP845997307, T6v, T6s); T73 = FNMS(KP968479752, T6J, T6I); T6K = FMA(KP968479752, T6J, T6I); T6D = FMA(KP968479752, T6C, T6z); T79 = FNMS(KP968479752, T6C, T6z); T74 = FMA(KP906616052, T73, T72); T76 = FNMS(KP906616052, T73, T72); } ci[WS(rs, 21)] = FNMS(KP998026728, T7g, T7d); T6E = FMA(KP906616052, T6D, T6w); T6G = FNMS(KP906616052, T6D, T6w); } { E T7c, T7a, T6Q, T6O, T6F, T7b, T77, T75, T6d, T6P, T6H; T7c = FMA(KP681693190, T78, T79); T7a = FNMS(KP560319534, T79, T78); ci[WS(rs, 22)] = FNMS(KP998026728, T74, T71); cr[WS(rs, 2)] = FMA(KP998026728, T6E, T6p); T75 = FMA(KP249506682, T74, T71); T6Q = FNMS(KP560319534, T6K, T6N); T6O = FMA(KP681693190, T6N, T6K); T6F = FNMS(KP249506682, T6E, T6p); T7b = FMA(KP557913902, T76, T75); T77 = FNMS(KP557913902, T76, T75); T6o = FMA(KP614372930, T6i, T6l); T6m = FNMS(KP621716863, T6l, T6i); cr[WS(rs, 22)] = FMS(KP860541664, T7c, T7b); ci[WS(rs, 17)] = FMA(KP860541664, T7c, T7b); ci[WS(rs, 12)] = FNMS(KP949179823, T7a, T77); cr[WS(rs, 17)] = -(FMA(KP949179823, T7a, T77)); T6P = FMA(KP557913902, T6G, T6F); T6H = FNMS(KP557913902, T6G, T6F); T6d = FNMS(KP249506682, T6c, T5H); ci[WS(rs, 7)] = FMA(KP949179823, T6Q, T6P); cr[WS(rs, 12)] = FNMS(KP949179823, T6Q, T6P); cr[WS(rs, 7)] = FMA(KP860541664, T6O, T6H); ci[WS(rs, 2)] = FNMS(KP860541664, T6O, T6H); T7o = FMA(KP621716863, T7k, T7l); T7m = FNMS(KP614372930, T7l, T7k); T7h = FMA(KP249506682, T7g, T7d); T6n = FMA(KP557913902, T6e, T6d); T6f = FNMS(KP557913902, T6e, T6d); } } } } } } ci[WS(rs, 6)] = FNMS(KP949179823, T6o, T6n); ci[WS(rs, 11)] = FMA(KP949179823, T6o, T6n); cr[WS(rs, 8)] = FMA(KP943557151, T6m, T6f); ci[WS(rs, 1)] = FNMS(KP943557151, T6m, T6f); T7j = FNMS(KP557913902, T7i, T7h); T7n = FMA(KP557913902, T7i, T7h); cr[WS(rs, 23)] = -(FMA(KP943557151, T7o, T7n)); ci[WS(rs, 16)] = FNMS(KP943557151, T7o, T7n); cr[WS(rs, 18)] = FMS(KP949179823, T7m, T7j); cr[WS(rs, 13)] = -(FMA(KP949179823, T7m, T7j)); } } }
static void q1_3(float *rio, float *iio, const float *W, stride rs, stride vs, INT mb, INT me, INT ms) { DK(KP866025403, +0.866025403784438646763723170752936183471402627); DK(KP500000000, +0.500000000000000000000000000000000000000000000); INT m; for (m = mb, W = W + (mb * 4); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 4, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(vs)) { E Tk, Tn, Tm, To, Tl; { E T1, Td, T4, Tg, Tp, T9, Te, T6, Tf, TB, TE, Ts, TZ, Tu, Tx; E TC, TN, TO, TD, TV, T10, TP, Tq, Tr; { E T2, T3, T7, T8; T1 = rio[0]; T2 = rio[WS(rs, 1)]; T3 = rio[WS(rs, 2)]; Td = iio[0]; T7 = iio[WS(rs, 1)]; T8 = iio[WS(rs, 2)]; T4 = T2 + T3; Tg = T3 - T2; Tp = rio[WS(vs, 1)]; T9 = T7 - T8; Te = T7 + T8; T6 = FNMS(KP500000000, T4, T1); Tq = rio[WS(vs, 1) + WS(rs, 1)]; Tr = rio[WS(vs, 1) + WS(rs, 2)]; Tf = FNMS(KP500000000, Te, Td); } { E Tv, Tw, TT, TU; TB = iio[WS(vs, 1)]; Tv = iio[WS(vs, 1) + WS(rs, 1)]; TE = Tr - Tq; Ts = Tq + Tr; Tw = iio[WS(vs, 1) + WS(rs, 2)]; TZ = iio[WS(vs, 2)]; TT = iio[WS(vs, 2) + WS(rs, 1)]; Tu = FNMS(KP500000000, Ts, Tp); Tx = Tv - Tw; TC = Tv + Tw; TU = iio[WS(vs, 2) + WS(rs, 2)]; TN = rio[WS(vs, 2)]; TO = rio[WS(vs, 2) + WS(rs, 1)]; TD = FNMS(KP500000000, TC, TB); TV = TT - TU; T10 = TT + TU; TP = rio[WS(vs, 2) + WS(rs, 2)]; } { E T11, T12, TS, TQ; rio[0] = T1 + T4; iio[0] = Td + Te; T11 = FNMS(KP500000000, T10, TZ); T12 = TP - TO; TQ = TO + TP; rio[WS(rs, 1)] = Tp + Ts; iio[WS(rs, 1)] = TB + TC; iio[WS(rs, 2)] = TZ + T10; TS = FNMS(KP500000000, TQ, TN); rio[WS(rs, 2)] = TN + TQ; { E TW, T13, Ty, TI, TL, TF, TH, TK; { E Ta, Th, T5, Tc; Tk = FNMS(KP866025403, T9, T6); Ta = FMA(KP866025403, T9, T6); Th = FMA(KP866025403, Tg, Tf); Tn = FNMS(KP866025403, Tg, Tf); T5 = W[0]; Tc = W[1]; { E T16, T19, T18, T1a, T17, Ti, Tb, T15; TW = FMA(KP866025403, TV, TS); T16 = FNMS(KP866025403, TV, TS); T19 = FNMS(KP866025403, T12, T11); T13 = FMA(KP866025403, T12, T11); Ti = T5 * Th; Tb = T5 * Ta; T15 = W[2]; T18 = W[3]; iio[WS(vs, 1)] = FNMS(Tc, Ta, Ti); rio[WS(vs, 1)] = FMA(Tc, Th, Tb); T1a = T15 * T19; T17 = T15 * T16; Ty = FMA(KP866025403, Tx, Tu); TI = FNMS(KP866025403, Tx, Tu); TL = FNMS(KP866025403, TE, TD); TF = FMA(KP866025403, TE, TD); iio[WS(vs, 2) + WS(rs, 2)] = FNMS(T18, T16, T1a); rio[WS(vs, 2) + WS(rs, 2)] = FMA(T18, T19, T17); TH = W[2]; TK = W[3]; } } { E TA, TG, Tz, TM, TJ, Tt; TM = TH * TL; TJ = TH * TI; Tt = W[0]; TA = W[1]; iio[WS(vs, 2) + WS(rs, 1)] = FNMS(TK, TI, TM); rio[WS(vs, 2) + WS(rs, 1)] = FMA(TK, TL, TJ); TG = Tt * TF; Tz = Tt * Ty; { E TR, TY, T14, TX, Tj; iio[WS(vs, 1) + WS(rs, 1)] = FNMS(TA, Ty, TG); rio[WS(vs, 1) + WS(rs, 1)] = FMA(TA, TF, Tz); TR = W[0]; TY = W[1]; T14 = TR * T13; TX = TR * TW; Tj = W[2]; Tm = W[3]; iio[WS(vs, 1) + WS(rs, 2)] = FNMS(TY, TW, T14); rio[WS(vs, 1) + WS(rs, 2)] = FMA(TY, T13, TX); To = Tj * Tn; Tl = Tj * Tk; } } } } } iio[WS(vs, 2)] = FNMS(Tm, Tk, To); rio[WS(vs, 2)] = FMA(Tm, Tn, Tl); } }
static void hf_25(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) { DK(KP998026728, +0.998026728428271561952336806863450553336905220); DK(KP062790519, +0.062790519529313376076178224565631133122484832); DK(KP684547105, +0.684547105928688673732283357621209269889519233); DK(KP728968627, +0.728968627421411523146730319055259111372571664); DK(KP481753674, +0.481753674101715274987191502872129653528542010); DK(KP876306680, +0.876306680043863587308115903922062583399064238); DK(KP248689887, +0.248689887164854788242283746006447968417567406); DK(KP968583161, +0.968583161128631119490168375464735813836012403); DK(KP992114701, +0.992114701314477831049793042785778521453036709); DK(KP125333233, +0.125333233564304245373118759816508793942918247); DK(KP425779291, +0.425779291565072648862502445744251703979973042); DK(KP904827052, +0.904827052466019527713668647932697593970413911); DK(KP637423989, +0.637423989748689710176712811676016195434917298); DK(KP770513242, +0.770513242775789230803009636396177847271667672); DK(KP844327925, +0.844327925502015078548558063966681505381659241); DK(KP535826794, +0.535826794978996618271308767867639978063575346); DK(KP587785252, +0.587785252292473129168705954639072768597652438); DK(KP951056516, +0.951056516295153572116439333379382143405698634); DK(KP250000000, +0.250000000000000000000000000000000000000000000); DK(KP559016994, +0.559016994374947424102293417182819058860154590); { INT m; for (m = mb, W = W + ((mb - 1) * 48); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 48, MAKE_VOLATILE_STRIDE(50, rs)) { E T1, T6b, T2l, T6g, To, T2m, T6e, T6f, T6a, T6H, T2u, T4I, T2i, T60, T3S; E T5D, T4r, T58, T3Z, T5C, T4q, T5b, TS, T5W, T2G, T5s, T4g, T4M, T2R, T5t; E T4h, T4P, T1l, T5X, T37, T5v, T4k, T4T, T3e, T5w, T4j, T4W, T1P, T5Z, T3v; E T5A, T4o, T54, T3C, T5z, T4n, T51; { E T6, T2o, Tb, T2p, Tc, T6c, Th, T2r, Tm, T2s, Tn, T6d; T1 = cr[0]; T6b = ci[0]; { E T3, T5, T2, T4; T3 = cr[WS(rs, 5)]; T5 = ci[WS(rs, 5)]; T2 = W[8]; T4 = W[9]; T6 = FMA(T2, T3, T4 * T5); T2o = FNMS(T4, T3, T2 * T5); } { E T8, Ta, T7, T9; T8 = cr[WS(rs, 20)]; Ta = ci[WS(rs, 20)]; T7 = W[38]; T9 = W[39]; Tb = FMA(T7, T8, T9 * Ta); T2p = FNMS(T9, T8, T7 * Ta); } Tc = T6 + Tb; T6c = T2o + T2p; { E Te, Tg, Td, Tf; Te = cr[WS(rs, 10)]; Tg = ci[WS(rs, 10)]; Td = W[18]; Tf = W[19]; Th = FMA(Td, Te, Tf * Tg); T2r = FNMS(Tf, Te, Td * Tg); } { E Tj, Tl, Ti, Tk; Tj = cr[WS(rs, 15)]; Tl = ci[WS(rs, 15)]; Ti = W[28]; Tk = W[29]; Tm = FMA(Ti, Tj, Tk * Tl); T2s = FNMS(Tk, Tj, Ti * Tl); } Tn = Th + Tm; T6d = T2r + T2s; T2l = KP559016994 * (Tc - Tn); T6g = KP559016994 * (T6c - T6d); To = Tc + Tn; T2m = FNMS(KP250000000, To, T1); T6e = T6c + T6d; T6f = FNMS(KP250000000, T6e, T6b); { E T68, T69, T2q, T2t; T68 = Th - Tm; T69 = T6 - Tb; T6a = FNMS(KP587785252, T69, KP951056516 * T68); T6H = FMA(KP951056516, T69, KP587785252 * T68); T2q = T2o - T2p; T2t = T2r - T2s; T2u = FMA(KP951056516, T2q, KP587785252 * T2t); T4I = FNMS(KP587785252, T2q, KP951056516 * T2t); } } { E T1U, T3O, T3E, T3F, T3X, T3W, T3J, T3M, T3P, T25, T2g, T2h; { E T1R, T1T, T1Q, T1S; T1R = cr[WS(rs, 3)]; T1T = ci[WS(rs, 3)]; T1Q = W[4]; T1S = W[5]; T1U = FMA(T1Q, T1R, T1S * T1T); T3O = FNMS(T1S, T1R, T1Q * T1T); } { E T1Z, T3H, T2f, T3L, T24, T3I, T2a, T3K; { E T1W, T1Y, T1V, T1X; T1W = cr[WS(rs, 8)]; T1Y = ci[WS(rs, 8)]; T1V = W[14]; T1X = W[15]; T1Z = FMA(T1V, T1W, T1X * T1Y); T3H = FNMS(T1X, T1W, T1V * T1Y); } { E T2c, T2e, T2b, T2d; T2c = cr[WS(rs, 18)]; T2e = ci[WS(rs, 18)]; T2b = W[34]; T2d = W[35]; T2f = FMA(T2b, T2c, T2d * T2e); T3L = FNMS(T2d, T2c, T2b * T2e); } { E T21, T23, T20, T22; T21 = cr[WS(rs, 23)]; T23 = ci[WS(rs, 23)]; T20 = W[44]; T22 = W[45]; T24 = FMA(T20, T21, T22 * T23); T3I = FNMS(T22, T21, T20 * T23); } { E T27, T29, T26, T28; T27 = cr[WS(rs, 13)]; T29 = ci[WS(rs, 13)]; T26 = W[24]; T28 = W[25]; T2a = FMA(T26, T27, T28 * T29); T3K = FNMS(T28, T27, T26 * T29); } T3E = T1Z - T24; T3F = T2a - T2f; T3X = T3K - T3L; T3W = T3H - T3I; T3J = T3H + T3I; T3M = T3K + T3L; T3P = T3J + T3M; T25 = T1Z + T24; T2g = T2a + T2f; T2h = T25 + T2g; } T2i = T1U + T2h; T60 = T3O + T3P; { E T3G, T57, T3R, T56, T3N, T3Q; T3G = FMA(KP951056516, T3E, KP587785252 * T3F); T57 = FNMS(KP587785252, T3E, KP951056516 * T3F); T3N = KP559016994 * (T3J - T3M); T3Q = FNMS(KP250000000, T3P, T3O); T3R = T3N + T3Q; T56 = T3Q - T3N; T3S = T3G + T3R; T5D = T57 + T56; T4r = T3R - T3G; T58 = T56 - T57; } { E T3Y, T5a, T3V, T59, T3T, T3U; T3Y = FMA(KP951056516, T3W, KP587785252 * T3X); T5a = FNMS(KP587785252, T3W, KP951056516 * T3X); T3T = KP559016994 * (T25 - T2g); T3U = FNMS(KP250000000, T2h, T1U); T3V = T3T + T3U; T59 = T3U - T3T; T3Z = T3V - T3Y; T5C = T59 - T5a; T4q = T3V + T3Y; T5b = T59 + T5a; } } { E Tu, T2N, T2B, T2E, T2I, T2H, T2K, T2L, T2O, TF, TQ, TR; { E Tr, Tt, Tq, Ts; Tr = cr[WS(rs, 1)]; Tt = ci[WS(rs, 1)]; Tq = W[0]; Ts = W[1]; Tu = FMA(Tq, Tr, Ts * Tt); T2N = FNMS(Ts, Tr, Tq * Tt); } { E Tz, T2z, TP, T2D, TE, T2A, TK, T2C; { E Tw, Ty, Tv, Tx; Tw = cr[WS(rs, 6)]; Ty = ci[WS(rs, 6)]; Tv = W[10]; Tx = W[11]; Tz = FMA(Tv, Tw, Tx * Ty); T2z = FNMS(Tx, Tw, Tv * Ty); } { E TM, TO, TL, TN; TM = cr[WS(rs, 16)]; TO = ci[WS(rs, 16)]; TL = W[30]; TN = W[31]; TP = FMA(TL, TM, TN * TO); T2D = FNMS(TN, TM, TL * TO); } { E TB, TD, TA, TC; TB = cr[WS(rs, 21)]; TD = ci[WS(rs, 21)]; TA = W[40]; TC = W[41]; TE = FMA(TA, TB, TC * TD); T2A = FNMS(TC, TB, TA * TD); } { E TH, TJ, TG, TI; TH = cr[WS(rs, 11)]; TJ = ci[WS(rs, 11)]; TG = W[20]; TI = W[21]; TK = FMA(TG, TH, TI * TJ); T2C = FNMS(TI, TH, TG * TJ); } T2B = T2z - T2A; T2E = T2C - T2D; T2I = TK - TP; T2H = Tz - TE; T2K = T2z + T2A; T2L = T2C + T2D; T2O = T2K + T2L; TF = Tz + TE; TQ = TK + TP; TR = TF + TQ; } TS = Tu + TR; T5W = T2N + T2O; { E T2F, T4L, T2y, T4K, T2w, T2x; T2F = FMA(KP951056516, T2B, KP587785252 * T2E); T4L = FNMS(KP587785252, T2B, KP951056516 * T2E); T2w = KP559016994 * (TF - TQ); T2x = FNMS(KP250000000, TR, Tu); T2y = T2w + T2x; T4K = T2x - T2w; T2G = T2y - T2F; T5s = T4K - T4L; T4g = T2y + T2F; T4M = T4K + T4L; } { E T2J, T4O, T2Q, T4N, T2M, T2P; T2J = FMA(KP951056516, T2H, KP587785252 * T2I); T4O = FNMS(KP587785252, T2H, KP951056516 * T2I); T2M = KP559016994 * (T2K - T2L); T2P = FNMS(KP250000000, T2O, T2N); T2Q = T2M + T2P; T4N = T2P - T2M; T2R = T2J + T2Q; T5t = T4O + T4N; T4h = T2Q - T2J; T4P = T4N - T4O; } } { E TX, T33, T2T, T2U, T3c, T3b, T2Y, T31, T34, T18, T1j, T1k; { E TU, TW, TT, TV; TU = cr[WS(rs, 4)]; TW = ci[WS(rs, 4)]; TT = W[6]; TV = W[7]; TX = FMA(TT, TU, TV * TW); T33 = FNMS(TV, TU, TT * TW); } { E T12, T2W, T1i, T30, T17, T2X, T1d, T2Z; { E TZ, T11, TY, T10; TZ = cr[WS(rs, 9)]; T11 = ci[WS(rs, 9)]; TY = W[16]; T10 = W[17]; T12 = FMA(TY, TZ, T10 * T11); T2W = FNMS(T10, TZ, TY * T11); } { E T1f, T1h, T1e, T1g; T1f = cr[WS(rs, 19)]; T1h = ci[WS(rs, 19)]; T1e = W[36]; T1g = W[37]; T1i = FMA(T1e, T1f, T1g * T1h); T30 = FNMS(T1g, T1f, T1e * T1h); } { E T14, T16, T13, T15; T14 = cr[WS(rs, 24)]; T16 = ci[WS(rs, 24)]; T13 = W[46]; T15 = W[47]; T17 = FMA(T13, T14, T15 * T16); T2X = FNMS(T15, T14, T13 * T16); } { E T1a, T1c, T19, T1b; T1a = cr[WS(rs, 14)]; T1c = ci[WS(rs, 14)]; T19 = W[26]; T1b = W[27]; T1d = FMA(T19, T1a, T1b * T1c); T2Z = FNMS(T1b, T1a, T19 * T1c); } T2T = T17 - T12; T2U = T1d - T1i; T3c = T2Z - T30; T3b = T2W - T2X; T2Y = T2W + T2X; T31 = T2Z + T30; T34 = T2Y + T31; T18 = T12 + T17; T1j = T1d + T1i; T1k = T18 + T1j; } T1l = TX + T1k; T5X = T33 + T34; { E T2V, T4S, T36, T4R, T32, T35; T2V = FNMS(KP587785252, T2U, KP951056516 * T2T); T4S = FMA(KP587785252, T2T, KP951056516 * T2U); T32 = KP559016994 * (T2Y - T31); T35 = FNMS(KP250000000, T34, T33); T36 = T32 + T35; T4R = T35 - T32; T37 = T2V - T36; T5v = T4S + T4R; T4k = T2V + T36; T4T = T4R - T4S; } { E T3d, T4V, T3a, T4U, T38, T39; T3d = FMA(KP951056516, T3b, KP587785252 * T3c); T4V = FNMS(KP587785252, T3b, KP951056516 * T3c); T38 = KP559016994 * (T18 - T1j); T39 = FNMS(KP250000000, T1k, TX); T3a = T38 + T39; T4U = T39 - T38; T3e = T3a - T3d; T5w = T4U - T4V; T4j = T3a + T3d; T4W = T4U + T4V; } } { E T1r, T3r, T3h, T3i, T3A, T3z, T3m, T3p, T3s, T1C, T1N, T1O; { E T1o, T1q, T1n, T1p; T1o = cr[WS(rs, 2)]; T1q = ci[WS(rs, 2)]; T1n = W[2]; T1p = W[3]; T1r = FMA(T1n, T1o, T1p * T1q); T3r = FNMS(T1p, T1o, T1n * T1q); } { E T1w, T3k, T1M, T3o, T1B, T3l, T1H, T3n; { E T1t, T1v, T1s, T1u; T1t = cr[WS(rs, 7)]; T1v = ci[WS(rs, 7)]; T1s = W[12]; T1u = W[13]; T1w = FMA(T1s, T1t, T1u * T1v); T3k = FNMS(T1u, T1t, T1s * T1v); } { E T1J, T1L, T1I, T1K; T1J = cr[WS(rs, 17)]; T1L = ci[WS(rs, 17)]; T1I = W[32]; T1K = W[33]; T1M = FMA(T1I, T1J, T1K * T1L); T3o = FNMS(T1K, T1J, T1I * T1L); } { E T1y, T1A, T1x, T1z; T1y = cr[WS(rs, 22)]; T1A = ci[WS(rs, 22)]; T1x = W[42]; T1z = W[43]; T1B = FMA(T1x, T1y, T1z * T1A); T3l = FNMS(T1z, T1y, T1x * T1A); } { E T1E, T1G, T1D, T1F; T1E = cr[WS(rs, 12)]; T1G = ci[WS(rs, 12)]; T1D = W[22]; T1F = W[23]; T1H = FMA(T1D, T1E, T1F * T1G); T3n = FNMS(T1F, T1E, T1D * T1G); } T3h = T1w - T1B; T3i = T1H - T1M; T3A = T3n - T3o; T3z = T3k - T3l; T3m = T3k + T3l; T3p = T3n + T3o; T3s = T3m + T3p; T1C = T1w + T1B; T1N = T1H + T1M; T1O = T1C + T1N; } T1P = T1r + T1O; T5Z = T3r + T3s; { E T3j, T53, T3u, T52, T3q, T3t; T3j = FMA(KP951056516, T3h, KP587785252 * T3i); T53 = FNMS(KP587785252, T3h, KP951056516 * T3i); T3q = KP559016994 * (T3m - T3p); T3t = FNMS(KP250000000, T3s, T3r); T3u = T3q + T3t; T52 = T3t - T3q; T3v = T3j + T3u; T5A = T53 + T52; T4o = T3u - T3j; T54 = T52 - T53; } { E T3B, T50, T3y, T4Z, T3w, T3x; T3B = FMA(KP951056516, T3z, KP587785252 * T3A); T50 = FNMS(KP587785252, T3z, KP951056516 * T3A); T3w = KP559016994 * (T1C - T1N); T3x = FNMS(KP250000000, T1O, T1r); T3y = T3w + T3x; T4Z = T3x - T3w; T3C = T3y - T3B; T5z = T4Z - T50; T4n = T3y + T3B; T51 = T4Z + T50; } } { E T62, T64, Tp, T2k, T5T, T5U, T63, T5V; { E T5Y, T61, T1m, T2j; T5Y = T5W - T5X; T61 = T5Z - T60; T62 = FMA(KP951056516, T5Y, KP587785252 * T61); T64 = FNMS(KP587785252, T5Y, KP951056516 * T61); Tp = T1 + To; T1m = TS + T1l; T2j = T1P + T2i; T2k = T1m + T2j; T5T = KP559016994 * (T1m - T2j); T5U = FNMS(KP250000000, T2k, Tp); } cr[0] = Tp + T2k; T63 = T5U - T5T; cr[WS(rs, 10)] = T63 - T64; ci[WS(rs, 9)] = T63 + T64; T5V = T5T + T5U; ci[WS(rs, 4)] = T5V - T62; cr[WS(rs, 5)] = T5V + T62; } { E T2v, T4f, T6I, T6U, T42, T6Z, T43, T6Y, T4A, T6N, T4D, T6L, T4u, T6E, T4v; E T6D, T48, T6V, T4b, T6T, T2n, T6G; T2n = T2l + T2m; T2v = T2n - T2u; T4f = T2n + T2u; T6G = T6g + T6f; T6I = T6G - T6H; T6U = T6H + T6G; { E T2S, T3f, T3g, T3D, T40, T41; T2S = FMA(KP535826794, T2G, KP844327925 * T2R); T3f = FNMS(KP637423989, T3e, KP770513242 * T37); T3g = T2S + T3f; T3D = FNMS(KP425779291, T3C, KP904827052 * T3v); T40 = FNMS(KP992114701, T3Z, KP125333233 * T3S); T41 = T3D + T40; T42 = T3g + T41; T6Z = T3D - T40; T43 = KP559016994 * (T3g - T41); T6Y = T3f - T2S; } { E T4y, T4z, T6J, T4B, T4C, T6K; T4y = FNMS(KP248689887, T4g, KP968583161 * T4h); T4z = FNMS(KP844327925, T4j, KP535826794 * T4k); T6J = T4y + T4z; T4B = FNMS(KP481753674, T4n, KP876306680 * T4o); T4C = FNMS(KP684547105, T4q, KP728968627 * T4r); T6K = T4B + T4C; T4A = T4y - T4z; T6N = KP559016994 * (T6J - T6K); T4D = T4B - T4C; T6L = T6J + T6K; } { E T4i, T4l, T4m, T4p, T4s, T4t; T4i = FMA(KP968583161, T4g, KP248689887 * T4h); T4l = FMA(KP535826794, T4j, KP844327925 * T4k); T4m = T4i + T4l; T4p = FMA(KP876306680, T4n, KP481753674 * T4o); T4s = FMA(KP728968627, T4q, KP684547105 * T4r); T4t = T4p + T4s; T4u = T4m + T4t; T6E = T4p - T4s; T4v = KP559016994 * (T4m - T4t); T6D = T4l - T4i; } { E T46, T47, T6R, T49, T4a, T6S; T46 = FNMS(KP844327925, T2G, KP535826794 * T2R); T47 = FMA(KP770513242, T3e, KP637423989 * T37); T6R = T46 + T47; T49 = FMA(KP125333233, T3Z, KP992114701 * T3S); T4a = FMA(KP904827052, T3C, KP425779291 * T3v); T6S = T4a + T49; T48 = T46 - T47; T6V = T6R - T6S; T4b = T49 - T4a; T6T = KP559016994 * (T6R + T6S); } cr[WS(rs, 4)] = T2v + T42; ci[WS(rs, 23)] = T6L + T6I; ci[WS(rs, 20)] = T6V + T6U; cr[WS(rs, 1)] = T4f + T4u; { E T4c, T4e, T45, T4d, T44; T4c = FMA(KP951056516, T48, KP587785252 * T4b); T4e = FNMS(KP587785252, T48, KP951056516 * T4b); T44 = FNMS(KP250000000, T42, T2v); T45 = T43 + T44; T4d = T44 - T43; ci[0] = T45 - T4c; ci[WS(rs, 5)] = T4d + T4e; cr[WS(rs, 9)] = T45 + T4c; ci[WS(rs, 10)] = T4d - T4e; } { E T6F, T6P, T6O, T6Q, T6M; T6F = FMA(KP587785252, T6D, KP951056516 * T6E); T6P = FNMS(KP587785252, T6E, KP951056516 * T6D); T6M = FNMS(KP250000000, T6L, T6I); T6O = T6M - T6N; T6Q = T6N + T6M; cr[WS(rs, 16)] = T6F - T6O; ci[WS(rs, 18)] = T6P + T6Q; ci[WS(rs, 13)] = T6F + T6O; cr[WS(rs, 21)] = T6P - T6Q; } { E T70, T71, T6X, T72, T6W; T70 = FMA(KP587785252, T6Y, KP951056516 * T6Z); T71 = FNMS(KP587785252, T6Z, KP951056516 * T6Y); T6W = FNMS(KP250000000, T6V, T6U); T6X = T6T - T6W; T72 = T6T + T6W; cr[WS(rs, 14)] = T6X - T70; ci[WS(rs, 15)] = T71 + T72; cr[WS(rs, 19)] = T70 + T6X; cr[WS(rs, 24)] = T71 - T72; } { E T4E, T4G, T4x, T4F, T4w; T4E = FMA(KP951056516, T4A, KP587785252 * T4D); T4G = FNMS(KP587785252, T4A, KP951056516 * T4D); T4w = FNMS(KP250000000, T4u, T4f); T4x = T4v + T4w; T4F = T4w - T4v; ci[WS(rs, 3)] = T4x - T4E; ci[WS(rs, 8)] = T4F + T4G; cr[WS(rs, 6)] = T4x + T4E; cr[WS(rs, 11)] = T4F - T4G; } } { E T75, T7d, T76, T79, T7a, T7b, T7e, T7c; { E T73, T74, T77, T78; T73 = T1l - TS; T74 = T1P - T2i; T75 = FMA(KP587785252, T73, KP951056516 * T74); T7d = FNMS(KP587785252, T74, KP951056516 * T73); T76 = T6e + T6b; T77 = T5W + T5X; T78 = T5Z + T60; T79 = T77 + T78; T7a = FNMS(KP250000000, T79, T76); T7b = KP559016994 * (T77 - T78); } ci[WS(rs, 24)] = T79 + T76; T7e = T7b + T7a; cr[WS(rs, 20)] = T7d - T7e; ci[WS(rs, 19)] = T7d + T7e; T7c = T7a - T7b; cr[WS(rs, 15)] = T75 - T7c; ci[WS(rs, 14)] = T75 + T7c; } { E T4J, T5r, T6i, T6u, T5e, T6z, T5f, T6y, T5M, T6n, T5P, T6l, T5G, T66, T5H; E T65, T5k, T6v, T5n, T6t, T4H, T6h; T4H = T2m - T2l; T4J = T4H + T4I; T5r = T4H - T4I; T6h = T6f - T6g; T6i = T6a + T6h; T6u = T6h - T6a; { E T4Q, T4X, T4Y, T55, T5c, T5d; T4Q = FMA(KP728968627, T4M, KP684547105 * T4P); T4X = FNMS(KP992114701, T4W, KP125333233 * T4T); T4Y = T4Q + T4X; T55 = FMA(KP062790519, T51, KP998026728 * T54); T5c = FNMS(KP637423989, T5b, KP770513242 * T58); T5d = T55 + T5c; T5e = T4Y + T5d; T6z = T55 - T5c; T5f = KP559016994 * (T4Y - T5d); T6y = T4X - T4Q; } { E T5K, T5L, T6j, T5N, T5O, T6k; T5K = FNMS(KP481753674, T5s, KP876306680 * T5t); T5L = FMA(KP904827052, T5w, KP425779291 * T5v); T6j = T5K - T5L; T5N = FNMS(KP844327925, T5z, KP535826794 * T5A); T5O = FNMS(KP998026728, T5C, KP062790519 * T5D); T6k = T5N + T5O; T5M = T5K + T5L; T6n = KP559016994 * (T6j - T6k); T5P = T5N - T5O; T6l = T6j + T6k; } { E T5u, T5x, T5y, T5B, T5E, T5F; T5u = FMA(KP876306680, T5s, KP481753674 * T5t); T5x = FNMS(KP425779291, T5w, KP904827052 * T5v); T5y = T5u + T5x; T5B = FMA(KP535826794, T5z, KP844327925 * T5A); T5E = FMA(KP062790519, T5C, KP998026728 * T5D); T5F = T5B + T5E; T5G = T5y + T5F; T66 = T5B - T5E; T5H = KP559016994 * (T5y - T5F); T65 = T5x - T5u; } { E T5i, T5j, T6r, T5l, T5m, T6s; T5i = FNMS(KP684547105, T4M, KP728968627 * T4P); T5j = FMA(KP125333233, T4W, KP992114701 * T4T); T6r = T5i - T5j; T5l = FNMS(KP998026728, T51, KP062790519 * T54); T5m = FMA(KP770513242, T5b, KP637423989 * T58); T6s = T5l - T5m; T5k = T5i + T5j; T6v = T6r + T6s; T5n = T5l + T5m; T6t = KP559016994 * (T6r - T6s); } cr[WS(rs, 3)] = T4J + T5e; ci[WS(rs, 22)] = T6l + T6i; ci[WS(rs, 21)] = T6v + T6u; cr[WS(rs, 2)] = T5r + T5G; { E T67, T6p, T6o, T6q, T6m; T67 = FMA(KP587785252, T65, KP951056516 * T66); T6p = FNMS(KP587785252, T66, KP951056516 * T65); T6m = FNMS(KP250000000, T6l, T6i); T6o = T6m - T6n; T6q = T6n + T6m; cr[WS(rs, 17)] = T67 - T6o; ci[WS(rs, 17)] = T6p + T6q; ci[WS(rs, 12)] = T67 + T6o; cr[WS(rs, 22)] = T6p - T6q; } { E T5Q, T5S, T5J, T5R, T5I; T5Q = FMA(KP951056516, T5M, KP587785252 * T5P); T5S = FNMS(KP587785252, T5M, KP951056516 * T5P); T5I = FNMS(KP250000000, T5G, T5r); T5J = T5H + T5I; T5R = T5I - T5H; ci[WS(rs, 2)] = T5J - T5Q; ci[WS(rs, 7)] = T5R + T5S; cr[WS(rs, 7)] = T5J + T5Q; cr[WS(rs, 12)] = T5R - T5S; } { E T5o, T5q, T5h, T5p, T5g; T5o = FMA(KP951056516, T5k, KP587785252 * T5n); T5q = FNMS(KP587785252, T5k, KP951056516 * T5n); T5g = FNMS(KP250000000, T5e, T4J); T5h = T5f + T5g; T5p = T5g - T5f; ci[WS(rs, 1)] = T5h - T5o; ci[WS(rs, 6)] = T5p + T5q; cr[WS(rs, 8)] = T5h + T5o; ci[WS(rs, 11)] = T5p - T5q; } { E T6A, T6B, T6x, T6C, T6w; T6A = FMA(KP587785252, T6y, KP951056516 * T6z); T6B = FNMS(KP587785252, T6z, KP951056516 * T6y); T6w = FNMS(KP250000000, T6v, T6u); T6x = T6t - T6w; T6C = T6t + T6w; cr[WS(rs, 13)] = T6x - T6A; ci[WS(rs, 16)] = T6B + T6C; cr[WS(rs, 18)] = T6A + T6x; cr[WS(rs, 23)] = T6B - T6C; } } } } }
static void hc2cfdft_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) { DK(KP707106781, +0.707106781186547524400844362104849039284835938); DK(KP500000000, +0.500000000000000000000000000000000000000000000); { INT m; for (m = mb, W = W + ((mb - 1) * 14); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 14, MAKE_VOLATILE_STRIDE(32, rs)) { E T1A, T1w, T1z, T1x, T1H, T1v, T1L, T1F; { E Ty, T14, TO, T1o, Tv, TG, T16, T1m, Ta, T19, T1h, TV, T10, TX, TZ; E Tk, T1i, TY, T1b, TF, TB, T1l; { E TH, TN, TK, TM; { E Tw, Tx, TI, TJ; Tw = Ip[0]; Tx = Im[0]; TI = Rm[0]; TJ = Rp[0]; TH = W[0]; Ty = Tw - Tx; TN = Tw + Tx; T14 = TJ + TI; TK = TI - TJ; TM = W[1]; } { E Ts, Tp, Tt, Tm, Tr; { E Tn, To, TL, T1n; Tn = Ip[WS(rs, 2)]; To = Im[WS(rs, 2)]; TL = TH * TK; T1n = TM * TK; Ts = Rp[WS(rs, 2)]; TF = Tn + To; Tp = Tn - To; TO = FNMS(TM, TN, TL); T1o = FMA(TH, TN, T1n); Tt = Rm[WS(rs, 2)]; } Tm = W[6]; Tr = W[7]; { E TE, TD, T15, TC, Tu, Tq; TB = W[8]; TC = Tt - Ts; Tu = Ts + Tt; Tq = Tm * Tp; TE = W[9]; TD = TB * TC; T15 = Tm * Tu; Tv = FNMS(Tr, Tu, Tq); T1l = TE * TC; TG = FNMS(TE, TF, TD); T16 = FMA(Tr, Tp, T15); } } } { E TU, TR, TT, T1g, TS; { E T2, T3, T7, T8; T2 = Ip[WS(rs, 1)]; T1m = FMA(TB, TF, T1l); T3 = Im[WS(rs, 1)]; T7 = Rp[WS(rs, 1)]; T8 = Rm[WS(rs, 1)]; { E T1, T4, T9, T6, T5, TQ, T18; T1 = W[2]; TU = T2 + T3; T4 = T2 - T3; TR = T7 - T8; T9 = T7 + T8; T6 = W[3]; T5 = T1 * T4; TQ = W[4]; T18 = T1 * T9; TT = W[5]; Ta = FNMS(T6, T9, T5); T1g = TQ * TU; TS = TQ * TR; T19 = FMA(T6, T4, T18); } } { E Tc, Td, Th, Ti; Tc = Ip[WS(rs, 3)]; T1h = FNMS(TT, TR, T1g); TV = FMA(TT, TU, TS); Td = Im[WS(rs, 3)]; Th = Rp[WS(rs, 3)]; Ti = Rm[WS(rs, 3)]; { E Tb, Te, Tj, Tg, Tf, TW, T1a; Tb = W[10]; T10 = Tc + Td; Te = Tc - Td; TX = Th - Ti; Tj = Th + Ti; Tg = W[11]; Tf = Tb * Te; TW = W[12]; T1a = Tb * Tj; TZ = W[13]; Tk = FNMS(Tg, Tj, Tf); T1i = TW * T10; TY = TW * TX; T1b = FMA(Tg, Te, T1a); } } } { E T1E, T1t, TA, T1s, T1D, T1u, T1e, T13, T1r, T1d; { E TP, T1f, T1q, T12, T17, T1c; { E Tl, T11, Tz, T1p, T1k, T1j; T1E = Ta - Tk; Tl = Ta + Tk; T1j = FNMS(TZ, TX, T1i); T11 = FMA(TZ, T10, TY); Tz = Tv + Ty; T1t = Ty - Tv; T1A = T1o - T1m; T1p = T1m + T1o; T1k = T1h + T1j; T1w = T1j - T1h; T1z = TO - TG; TP = TG + TO; T1f = Tz - Tl; TA = Tl + Tz; T1s = T1k + T1p; T1q = T1k - T1p; T12 = TV + T11; T1x = TV - T11; T1D = T14 - T16; T17 = T14 + T16; T1c = T19 + T1b; T1u = T19 - T1b; } Im[WS(rs, 1)] = KP500000000 * (T1q - T1f); T1e = T12 + TP; T13 = TP - T12; T1r = T17 + T1c; T1d = T17 - T1c; Ip[WS(rs, 2)] = KP500000000 * (T1f + T1q); } Im[WS(rs, 3)] = KP500000000 * (T13 - TA); Ip[0] = KP500000000 * (TA + T13); Rm[WS(rs, 3)] = KP500000000 * (T1r - T1s); Rp[0] = KP500000000 * (T1r + T1s); Rp[WS(rs, 2)] = KP500000000 * (T1d + T1e); Rm[WS(rs, 1)] = KP500000000 * (T1d - T1e); T1H = T1u + T1t; T1v = T1t - T1u; T1L = T1D + T1E; T1F = T1D - T1E; } } { E T1y, T1I, T1B, T1J; T1y = T1w + T1x; T1I = T1w - T1x; T1B = T1z - T1A; T1J = T1z + T1A; { E T1M, T1K, T1C, T1G; T1M = T1I + T1J; T1K = T1I - T1J; T1C = T1y + T1B; T1G = T1B - T1y; Im[0] = -(KP500000000 * (FNMS(KP707106781, T1K, T1H))); Ip[WS(rs, 3)] = KP500000000 * (FMA(KP707106781, T1K, T1H)); Rp[WS(rs, 1)] = KP500000000 * (FMA(KP707106781, T1M, T1L)); Rm[WS(rs, 2)] = KP500000000 * (FNMS(KP707106781, T1M, T1L)); Rp[WS(rs, 3)] = KP500000000 * (FMA(KP707106781, T1G, T1F)); Rm[0] = KP500000000 * (FNMS(KP707106781, T1G, T1F)); Im[WS(rs, 2)] = -(KP500000000 * (FNMS(KP707106781, T1C, T1v))); Ip[WS(rs, 1)] = KP500000000 * (FMA(KP707106781, T1C, T1v)); } } } } }
static void t1_8(float *ri, float *ii, const float *W, stride rs, INT mb, INT me, INT ms) { DK(KP707106781, +0.707106781186547524400844362104849039284835938); INT m; for (m = mb, W = W + (mb * 14); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 14, MAKE_VOLATILE_STRIDE(rs)) { E T7, T1e, TH, T19, TF, T13, TR, TU, Ti, T1f, TK, T16, Tu, T12, TM; E TP; { E T1, T18, T6, T17; T1 = ri[0]; T18 = ii[0]; { E T3, T5, T2, T4; T3 = ri[WS(rs, 4)]; T5 = ii[WS(rs, 4)]; T2 = W[6]; T4 = W[7]; T6 = FMA(T2, T3, T4 * T5); T17 = FNMS(T4, T3, T2 * T5); } T7 = T1 + T6; T1e = T18 - T17; TH = T1 - T6; T19 = T17 + T18; } { E Tz, TS, TE, TT; { E Tw, Ty, Tv, Tx; Tw = ri[WS(rs, 7)]; Ty = ii[WS(rs, 7)]; Tv = W[12]; Tx = W[13]; Tz = FMA(Tv, Tw, Tx * Ty); TS = FNMS(Tx, Tw, Tv * Ty); } { E TB, TD, TA, TC; TB = ri[WS(rs, 3)]; TD = ii[WS(rs, 3)]; TA = W[4]; TC = W[5]; TE = FMA(TA, TB, TC * TD); TT = FNMS(TC, TB, TA * TD); } TF = Tz + TE; T13 = TS + TT; TR = Tz - TE; TU = TS - TT; } { E Tc, TI, Th, TJ; { E T9, Tb, T8, Ta; T9 = ri[WS(rs, 2)]; Tb = ii[WS(rs, 2)]; T8 = W[2]; Ta = W[3]; Tc = FMA(T8, T9, Ta * Tb); TI = FNMS(Ta, T9, T8 * Tb); } { E Te, Tg, Td, Tf; Te = ri[WS(rs, 6)]; Tg = ii[WS(rs, 6)]; Td = W[10]; Tf = W[11]; Th = FMA(Td, Te, Tf * Tg); TJ = FNMS(Tf, Te, Td * Tg); } Ti = Tc + Th; T1f = Tc - Th; TK = TI - TJ; T16 = TI + TJ; } { E To, TN, Tt, TO; { E Tl, Tn, Tk, Tm; Tl = ri[WS(rs, 1)]; Tn = ii[WS(rs, 1)]; Tk = W[0]; Tm = W[1]; To = FMA(Tk, Tl, Tm * Tn); TN = FNMS(Tm, Tl, Tk * Tn); } { E Tq, Ts, Tp, Tr; Tq = ri[WS(rs, 5)]; Ts = ii[WS(rs, 5)]; Tp = W[8]; Tr = W[9]; Tt = FMA(Tp, Tq, Tr * Ts); TO = FNMS(Tr, Tq, Tp * Ts); } Tu = To + Tt; T12 = TN + TO; TM = To - Tt; TP = TN - TO; } { E Tj, TG, T1b, T1c; Tj = T7 + Ti; TG = Tu + TF; ri[WS(rs, 4)] = Tj - TG; ri[0] = Tj + TG; { E T15, T1a, T11, T14; T15 = T12 + T13; T1a = T16 + T19; ii[0] = T15 + T1a; ii[WS(rs, 4)] = T1a - T15; T11 = T7 - Ti; T14 = T12 - T13; ri[WS(rs, 6)] = T11 - T14; ri[WS(rs, 2)] = T11 + T14; } T1b = TF - Tu; T1c = T19 - T16; ii[WS(rs, 2)] = T1b + T1c; ii[WS(rs, 6)] = T1c - T1b; { E TX, T1g, T10, T1d, TY, TZ; TX = TH - TK; T1g = T1e - T1f; TY = TP - TM; TZ = TR + TU; T10 = KP707106781 * (TY - TZ); T1d = KP707106781 * (TY + TZ); ri[WS(rs, 7)] = TX - T10; ii[WS(rs, 5)] = T1g - T1d; ri[WS(rs, 3)] = TX + T10; ii[WS(rs, 1)] = T1d + T1g; } { E TL, T1i, TW, T1h, TQ, TV; TL = TH + TK; T1i = T1f + T1e; TQ = TM + TP; TV = TR - TU; TW = KP707106781 * (TQ + TV); T1h = KP707106781 * (TV - TQ); ri[WS(rs, 5)] = TL - TW; ii[WS(rs, 7)] = T1i - T1h; ri[WS(rs, 1)] = TL + TW; ii[WS(rs, 3)] = T1h + T1i; } } } }
static void hb_5(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms) { DK(KP250000000, +0.250000000000000000000000000000000000000000000); DK(KP587785252, +0.587785252292473129168705954639072768597652438); DK(KP951056516, +0.951056516295153572116439333379382143405698634); DK(KP559016994, +0.559016994374947424102293417182819058860154590); INT m; for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(rs)) { E T1, Tj, TG, Ts, T8, Ti, T9, Tn, TD, Tu, Tg, Tt; { E T4, Tq, T7, Tr; T1 = cr[0]; { E T2, T3, T5, T6; T2 = cr[WS(rs, 1)]; T3 = ci[0]; T4 = T2 + T3; Tq = T2 - T3; T5 = cr[WS(rs, 2)]; T6 = ci[WS(rs, 1)]; T7 = T5 + T6; Tr = T5 - T6; } Tj = KP559016994 * (T4 - T7); TG = FMA(KP951056516, Tq, KP587785252 * Tr); Ts = FNMS(KP951056516, Tr, KP587785252 * Tq); T8 = T4 + T7; Ti = FNMS(KP250000000, T8, T1); } { E Tc, Tl, Tf, Tm; T9 = ci[WS(rs, 4)]; { E Ta, Tb, Td, Te; Ta = ci[WS(rs, 3)]; Tb = cr[WS(rs, 4)]; Tc = Ta - Tb; Tl = Ta + Tb; Td = ci[WS(rs, 2)]; Te = cr[WS(rs, 3)]; Tf = Td - Te; Tm = Td + Te; } Tn = FNMS(KP951056516, Tm, KP587785252 * Tl); TD = FMA(KP951056516, Tl, KP587785252 * Tm); Tu = KP559016994 * (Tc - Tf); Tg = Tc + Tf; Tt = FNMS(KP250000000, Tg, T9); } cr[0] = T1 + T8; ci[0] = T9 + Tg; { E To, Ty, Tw, TA, Tk, Tv; Tk = Ti - Tj; To = Tk - Tn; Ty = Tk + Tn; Tv = Tt - Tu; Tw = Ts + Tv; TA = Tv - Ts; { E Th, Tp, Tx, Tz; Th = W[2]; Tp = W[3]; cr[WS(rs, 2)] = FNMS(Tp, Tw, Th * To); ci[WS(rs, 2)] = FMA(Th, Tw, Tp * To); Tx = W[4]; Tz = W[5]; cr[WS(rs, 3)] = FNMS(Tz, TA, Tx * Ty); ci[WS(rs, 3)] = FMA(Tx, TA, Tz * Ty); } } { E TE, TK, TI, TM, TC, TH; TC = Tj + Ti; TE = TC - TD; TK = TC + TD; TH = Tu + Tt; TI = TG + TH; TM = TH - TG; { E TB, TF, TJ, TL; TB = W[0]; TF = W[1]; cr[WS(rs, 1)] = FNMS(TF, TI, TB * TE); ci[WS(rs, 1)] = FMA(TB, TI, TF * TE); TJ = W[6]; TL = W[7]; cr[WS(rs, 4)] = FNMS(TL, TM, TJ * TK); ci[WS(rs, 4)] = FMA(TJ, TM, TL * TK); } } } }
static void hc2cb_16(float *Rp, float *Ip, float *Rm, float *Im, const float *W, stride rs, INT mb, INT me, INT ms) { DK(KP382683432, +0.382683432365089771728459984030398866761344562); DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP707106781, +0.707106781186547524400844362104849039284835938); INT m; for (m = mb, W = W + ((mb - 1) * 30); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 30, MAKE_VOLATILE_STRIDE(rs)) { E T7, T2K, T2W, Tw, T17, T1S, T2k, T1w, Te, TD, T1x, T10, T2n, T2L, T1Z; E T2X, Tm, T1z, TN, T19, T2e, T2p, T2P, T2Z, Tt, T1A, TW, T1a, T27, T2q; E T2S, T30; { E T3, T1Q, T13, T2j, T6, T2i, T16, T1R; { E T1, T2, T11, T12; T1 = Rp[0]; T2 = Rm[WS(rs, 7)]; T3 = T1 + T2; T1Q = T1 - T2; T11 = Ip[0]; T12 = Im[WS(rs, 7)]; T13 = T11 - T12; T2j = T11 + T12; } { E T4, T5, T14, T15; T4 = Rp[WS(rs, 4)]; T5 = Rm[WS(rs, 3)]; T6 = T4 + T5; T2i = T4 - T5; T14 = Ip[WS(rs, 4)]; T15 = Im[WS(rs, 3)]; T16 = T14 - T15; T1R = T14 + T15; } T7 = T3 + T6; T2K = T1Q + T1R; T2W = T2j - T2i; Tw = T3 - T6; T17 = T13 - T16; T1S = T1Q - T1R; T2k = T2i + T2j; T1w = T13 + T16; } { E Ta, T1T, TC, T1U, Td, T1W, Tz, T1X; { E T8, T9, TA, TB; T8 = Rp[WS(rs, 2)]; T9 = Rm[WS(rs, 5)]; Ta = T8 + T9; T1T = T8 - T9; TA = Ip[WS(rs, 2)]; TB = Im[WS(rs, 5)]; TC = TA - TB; T1U = TA + TB; } { E Tb, Tc, Tx, Ty; Tb = Rm[WS(rs, 1)]; Tc = Rp[WS(rs, 6)]; Td = Tb + Tc; T1W = Tb - Tc; Tx = Ip[WS(rs, 6)]; Ty = Im[WS(rs, 1)]; Tz = Tx - Ty; T1X = Tx + Ty; } Te = Ta + Td; TD = Tz - TC; T1x = TC + Tz; T10 = Ta - Td; { E T2l, T2m, T1V, T1Y; T2l = T1T + T1U; T2m = T1W + T1X; T2n = KP707106781 * (T2l - T2m); T2L = KP707106781 * (T2l + T2m); T1V = T1T - T1U; T1Y = T1W - T1X; T1Z = KP707106781 * (T1V + T1Y); T2X = KP707106781 * (T1V - T1Y); } } { E Ti, T2b, TI, T29, Tl, T28, TL, T2c, TF, TM; { E Tg, Th, TG, TH; Tg = Rp[WS(rs, 1)]; Th = Rm[WS(rs, 6)]; Ti = Tg + Th; T2b = Tg - Th; TG = Ip[WS(rs, 1)]; TH = Im[WS(rs, 6)]; TI = TG - TH; T29 = TG + TH; } { E Tj, Tk, TJ, TK; Tj = Rp[WS(rs, 5)]; Tk = Rm[WS(rs, 2)]; Tl = Tj + Tk; T28 = Tj - Tk; TJ = Ip[WS(rs, 5)]; TK = Im[WS(rs, 2)]; TL = TJ - TK; T2c = TJ + TK; } Tm = Ti + Tl; T1z = TI + TL; TF = Ti - Tl; TM = TI - TL; TN = TF - TM; T19 = TF + TM; { E T2a, T2d, T2N, T2O; T2a = T28 + T29; T2d = T2b - T2c; T2e = FMA(KP923879532, T2a, KP382683432 * T2d); T2p = FNMS(KP382683432, T2a, KP923879532 * T2d); T2N = T2b + T2c; T2O = T29 - T28; T2P = FNMS(KP923879532, T2O, KP382683432 * T2N); T2Z = FMA(KP382683432, T2O, KP923879532 * T2N); } } { E Tp, T24, TR, T22, Ts, T21, TU, T25, TO, TV; { E Tn, To, TP, TQ; Tn = Rm[0]; To = Rp[WS(rs, 7)]; Tp = Tn + To; T24 = Tn - To; TP = Ip[WS(rs, 7)]; TQ = Im[0]; TR = TP - TQ; T22 = TP + TQ; } { E Tq, Tr, TS, TT; Tq = Rp[WS(rs, 3)]; Tr = Rm[WS(rs, 4)]; Ts = Tq + Tr; T21 = Tq - Tr; TS = Ip[WS(rs, 3)]; TT = Im[WS(rs, 4)]; TU = TS - TT; T25 = TS + TT; } Tt = Tp + Ts; T1A = TR + TU; TO = Tp - Ts; TV = TR - TU; TW = TO + TV; T1a = TV - TO; { E T23, T26, T2Q, T2R; T23 = T21 - T22; T26 = T24 - T25; T27 = FNMS(KP382683432, T26, KP923879532 * T23); T2q = FMA(KP382683432, T23, KP923879532 * T26); T2Q = T24 + T25; T2R = T21 + T22; T2S = FNMS(KP923879532, T2R, KP382683432 * T2Q); T30 = FMA(KP382683432, T2R, KP923879532 * T2Q); } } { E Tf, Tu, T1u, T1y, T1B, T1C, T1t, T1v; Tf = T7 + Te; Tu = Tm + Tt; T1u = Tf - Tu; T1y = T1w + T1x; T1B = T1z + T1A; T1C = T1y - T1B; Rp[0] = Tf + Tu; Rm[0] = T1y + T1B; T1t = W[14]; T1v = W[15]; Rp[WS(rs, 4)] = FNMS(T1v, T1C, T1t * T1u); Rm[WS(rs, 4)] = FMA(T1v, T1u, T1t * T1C); } { E T2U, T34, T32, T36; { E T2M, T2T, T2Y, T31; T2M = T2K - T2L; T2T = T2P + T2S; T2U = T2M - T2T; T34 = T2M + T2T; T2Y = T2W + T2X; T31 = T2Z - T30; T32 = T2Y - T31; T36 = T2Y + T31; } { E T2J, T2V, T33, T35; T2J = W[20]; T2V = W[21]; Ip[WS(rs, 5)] = FNMS(T2V, T32, T2J * T2U); Im[WS(rs, 5)] = FMA(T2V, T2U, T2J * T32); T33 = W[4]; T35 = W[5]; Ip[WS(rs, 1)] = FNMS(T35, T36, T33 * T34); Im[WS(rs, 1)] = FMA(T35, T34, T33 * T36); } } { E T3a, T3g, T3e, T3i; { E T38, T39, T3c, T3d; T38 = T2K + T2L; T39 = T2Z + T30; T3a = T38 - T39; T3g = T38 + T39; T3c = T2W - T2X; T3d = T2P - T2S; T3e = T3c + T3d; T3i = T3c - T3d; } { E T37, T3b, T3f, T3h; T37 = W[12]; T3b = W[13]; Ip[WS(rs, 3)] = FNMS(T3b, T3e, T37 * T3a); Im[WS(rs, 3)] = FMA(T37, T3e, T3b * T3a); T3f = W[28]; T3h = W[29]; Ip[WS(rs, 7)] = FNMS(T3h, T3i, T3f * T3g); Im[WS(rs, 7)] = FMA(T3f, T3i, T3h * T3g); } } { E TY, T1e, T1c, T1g; { E TE, TX, T18, T1b; TE = Tw + TD; TX = KP707106781 * (TN + TW); TY = TE - TX; T1e = TE + TX; T18 = T10 + T17; T1b = KP707106781 * (T19 + T1a); T1c = T18 - T1b; T1g = T18 + T1b; } { E Tv, TZ, T1d, T1f; Tv = W[18]; TZ = W[19]; Rp[WS(rs, 5)] = FNMS(TZ, T1c, Tv * TY); Rm[WS(rs, 5)] = FMA(TZ, TY, Tv * T1c); T1d = W[2]; T1f = W[3]; Rp[WS(rs, 1)] = FNMS(T1f, T1g, T1d * T1e); Rm[WS(rs, 1)] = FMA(T1f, T1e, T1d * T1g); } } { E T1k, T1q, T1o, T1s; { E T1i, T1j, T1m, T1n; T1i = Tw - TD; T1j = KP707106781 * (T1a - T19); T1k = T1i - T1j; T1q = T1i + T1j; T1m = T17 - T10; T1n = KP707106781 * (TN - TW); T1o = T1m - T1n; T1s = T1m + T1n; } { E T1h, T1l, T1p, T1r; T1h = W[26]; T1l = W[27]; Rp[WS(rs, 7)] = FNMS(T1l, T1o, T1h * T1k); Rm[WS(rs, 7)] = FMA(T1h, T1o, T1l * T1k); T1p = W[10]; T1r = W[11]; Rp[WS(rs, 3)] = FNMS(T1r, T1s, T1p * T1q); Rm[WS(rs, 3)] = FMA(T1p, T1s, T1r * T1q); } } { E T2g, T2u, T2s, T2w; { E T20, T2f, T2o, T2r; T20 = T1S - T1Z; T2f = T27 - T2e; T2g = T20 - T2f; T2u = T20 + T2f; T2o = T2k - T2n; T2r = T2p - T2q; T2s = T2o - T2r; T2w = T2o + T2r; } { E T1P, T2h, T2t, T2v; T1P = W[24]; T2h = W[25]; Ip[WS(rs, 6)] = FNMS(T2h, T2s, T1P * T2g); Im[WS(rs, 6)] = FMA(T2h, T2g, T1P * T2s); T2t = W[8]; T2v = W[9]; Ip[WS(rs, 2)] = FNMS(T2v, T2w, T2t * T2u); Im[WS(rs, 2)] = FMA(T2v, T2u, T2t * T2w); } } { E T2A, T2G, T2E, T2I; { E T2y, T2z, T2C, T2D; T2y = T1S + T1Z; T2z = T2p + T2q; T2A = T2y - T2z; T2G = T2y + T2z; T2C = T2k + T2n; T2D = T2e + T27; T2E = T2C - T2D; T2I = T2C + T2D; } { E T2x, T2B, T2F, T2H; T2x = W[16]; T2B = W[17]; Ip[WS(rs, 4)] = FNMS(T2B, T2E, T2x * T2A); Im[WS(rs, 4)] = FMA(T2x, T2E, T2B * T2A); T2F = W[0]; T2H = W[1]; Ip[0] = FNMS(T2H, T2I, T2F * T2G); Im[0] = FMA(T2F, T2I, T2H * T2G); } } { E T1G, T1M, T1K, T1O; { E T1E, T1F, T1I, T1J; T1E = T7 - Te; T1F = T1A - T1z; T1G = T1E - T1F; T1M = T1E + T1F; T1I = T1w - T1x; T1J = Tm - Tt; T1K = T1I - T1J; T1O = T1J + T1I; } { E T1D, T1H, T1L, T1N; T1D = W[22]; T1H = W[23]; Rp[WS(rs, 6)] = FNMS(T1H, T1K, T1D * T1G); Rm[WS(rs, 6)] = FMA(T1D, T1K, T1H * T1G); T1L = W[6]; T1N = W[7]; Rp[WS(rs, 2)] = FNMS(T1N, T1O, T1L * T1M); Rm[WS(rs, 2)] = FMA(T1L, T1O, T1N * T1M); } } } }