Tt = VFNMS(LDK(KP939692620), Ts, T7); ST(&(x[WS(ios, 5)]), VFNMSI(TK, TG), dist, &(x[WS(ios, 1)])); ST(&(x[WS(ios, 4)]), VFMAI(TK, TG), dist, &(x[0])); ST(&(x[WS(ios, 2)]), VFMAI(Ty, Tt), dist, &(x[0])); ST(&(x[WS(ios, 7)]), VFNMSI(Ty, Tt), dist, &(x[WS(ios, 1)])); } } } } } } return W; } static const tw_instr twinstr[] = { VTW(1), VTW(2), VTW(3), VTW(4), VTW(5), VTW(6), VTW(7), VTW(8), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 9, "t1bv_9", twinstr, &GENUS, {20, 20, 34, 0}, 0, 0, 0 }; void X(codelet_t1bv_9) (planner *p) { X(kdft_dit_register) (p, t1bv_9, &desc); }
Th = VFNMS(LDK(KP500000000), Tg, Tf); ST(&(x[WS(ios, 1)]), VADD(Tb, Th), dist, &(x[WS(ios, 1)])); ST(&(x[WS(ios, 3)]), VADD(Tf, Tg), dist, &(x[WS(ios, 1)])); ST(&(x[WS(ios, 5)]), VSUB(Th, Tb), dist, &(x[WS(ios, 1)])); Tn = VBYI(VMUL(LDK(KP866025403), VSUB(Tj, Tk))); Tl = VADD(Tj, Tk); Tm = VFNMS(LDK(KP500000000), Tl, Ti); ST(&(x[WS(ios, 2)]), VSUB(Tm, Tn), dist, &(x[0])); ST(&(x[0]), VADD(Ti, Tl), dist, &(x[0])); ST(&(x[WS(ios, 4)]), VADD(Tn, Tm), dist, &(x[0])); } } END_SIMD(); return W; } static const tw_instr twinstr[] = { VTW(1), VTW(2), VTW(3), VTW(4), VTW(5), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 6, "t1bv_6", twinstr, {21, 12, 2, 0}, &GENUS, 0, 0, 0 }; void X(codelet_t1bv_6) (planner *p) { X(kdft_dit_register) (p, t1bv_6, &desc); }
T2d = VBYI(VSUB(T2a, T27)); ST(&(x[WS(ios, 23)]), VSUB(T2c, T2d), dist, &(x[WS(ios, 1)])); ST(&(x[WS(ios, 9)]), VADD(T2c, T2d), dist, &(x[WS(ios, 1)])); T2g = VBYI(VSUB(T2e, T2f)); T2j = VSUB(T2h, T2i); ST(&(x[WS(ios, 15)]), VADD(T2g, T2j), dist, &(x[WS(ios, 1)])); ST(&(x[WS(ios, 17)]), VSUB(T2j, T2g), dist, &(x[WS(ios, 1)])); } } } END_SIMD(); return W; } static const tw_instr twinstr[] = { VTW(1), VTW(2), VTW(3), VTW(4), VTW(5), VTW(6), VTW(7), VTW(8), VTW(9), VTW(10), VTW(11), VTW(12), VTW(13), VTW(14), VTW(15), VTW(16),
ST(&(x[WS(rs, 11)]), VFMAI(T2j, T2g), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 27)]), VFMAI(T2l, T2k), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 5)]), VFNMSI(T2l, T2k), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 3)]), VFMAI(T2d, T2c), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 29)]), VFNMSI(T2d, T2c), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 19)]), VFMAI(T2b, T24), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 13)]), VFNMSI(T2b, T24), ms, &(x[WS(rs, 1)])); } } } } VLEAVE(); } static const tw_instr twinstr[] = { VTW(0, 1), VTW(0, 2), VTW(0, 3), VTW(0, 4), VTW(0, 5), VTW(0, 6), VTW(0, 7), VTW(0, 8), VTW(0, 9), VTW(0, 10), VTW(0, 11), VTW(0, 12), VTW(0, 13), VTW(0, 14), VTW(0, 15), VTW(0, 16),
T2 = LD(&(Rp[0]), ms, &(Rp[0])); T3 = LD(&(Rm[0]), -ms, &(Rm[0])); T1 = LDW(&(W[0])); T5 = VFMACONJ(T3, T2); T4 = VZMULI(T1, VFNMSCONJ(T3, T2)); T7 = VCONJ(VSUB(T5, T4)); T6 = VADD(T4, T5); ST(&(Rm[0]), T7, -ms, &(Rm[0])); ST(&(Rp[0]), T6, ms, &(Rp[0])); } } VLEAVE(); } static const tw_instr twinstr[] = { VTW(1, 1), {TW_NEXT, VL, 0} }; static const hc2c_desc desc = { 2, XSIMD_STRING("hc2cbdftv_2"), twinstr, &GENUS, {3, 2, 2, 0} }; void XSIMD(codelet_hc2cbdftv_2) (planner *p) { X(khc2c_register) (p, hc2cbdftv_2, &desc, HC2C_VIA_DFT); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_hc2cdft_c.native -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 2 -dif -sign 1 -name hc2cbdftv_2 -include hc2cbv.h */ /* * This function contains 5 FP additions, 2 FP multiplications, * (or, 5 additions, 2 multiplications, 0 fused multiply/add),
ST(&(x[WS(rs, 5)]), VFNMSI(To, Tj), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 1)]), VFMAI(Tt, Tr), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 6)]), VFNMSI(Tt, Tr), ms, &(x[0])); ST(&(x[WS(rs, 3)]), VFMAI(Ty, Tw), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 4)]), VFNMSI(Ty, Tw), ms, &(x[0])); } } } } } } VLEAVE(); } static const tw_instr twinstr[] = { VTW(0, 1), VTW(0, 2), VTW(0, 3), VTW(0, 4), VTW(0, 5), VTW(0, 6), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 7, XSIMD_STRING("t1fuv_7"), twinstr, &GENUS, {15, 15, 21, 0}, 0, 0, 0 }; void XSIMD(codelet_t1fuv_7) (planner *p) { X(kdft_dit_register) (p, t1fuv_7, &desc); } #else /* HAVE_FMA */
Ty = VCONJ(VMUL(LDK(KP500000000), VFNMSI(Tx, Tw))); ST(&(Rm[WS(rs, 2)]), Tv, -ms, &(Rm[0])); Tp = VMUL(LDK(KP500000000), VFNMSI(To, Tn)); Tq = VCONJ(VMUL(LDK(KP500000000), VFMAI(To, Tn))); ST(&(Rp[0]), Tm, ms, &(Rp[0])); ST(&(Rp[WS(rs, 1)]), Tz, ms, &(Rp[WS(rs, 1)])); ST(&(Rm[0]), Ty, -ms, &(Rm[0])); ST(&(Rm[WS(rs, 1)]), Tq, -ms, &(Rm[WS(rs, 1)])); ST(&(Rp[WS(rs, 2)]), Tp, ms, &(Rp[0])); } } VLEAVE(); } static const tw_instr twinstr[] = { VTW(1, 1), VTW(1, 2), VTW(1, 3), VTW(1, 4), VTW(1, 5), {TW_NEXT, VL, 0} }; static const hc2c_desc desc = { 6, XSIMD_STRING("hc2cfdftv_6"), twinstr, &GENUS, {17, 18, 12, 0} }; void XSIMD(codelet_hc2cfdftv_6) (planner *p) { X(khc2c_register) (p, hc2cfdftv_6, &desc, HC2C_VIA_DFT); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_hc2cdft_c.native -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 6 -dit -name hc2cfdftv_6 -include hc2cfv.h */
T8 = VSUB(T3, T7); Td = VSUB(Ta, Tc); Th = VADD(Tc, Ta); Tf = VCONJ(VMUL(LDK(KP500000000), VFMAI(Td, T8))); Te = VMUL(LDK(KP500000000), VFNMSI(Td, T8)); Ti = VMUL(LDK(KP500000000), VSUB(Tg, Th)); Tj = VCONJ(VMUL(LDK(KP500000000), VADD(Th, Tg))); ST(&(Rm[0]), Tf, -ms, &(Rm[0])); ST(&(Rp[WS(rs, 1)]), Te, ms, &(Rp[WS(rs, 1)])); ST(&(Rp[0]), Ti, ms, &(Rp[0])); ST(&(Rm[WS(rs, 1)]), Tj, -ms, &(Rm[WS(rs, 1)])); } } static const tw_instr twinstr[] = { VTW(1, 1), VTW(1, 2), VTW(1, 3), {TW_NEXT, VL, 0} }; static const hc2c_desc desc = { 4, "hc2cfdftv_4", twinstr, &GENUS, {9, 10, 6, 0} }; void X(codelet_hc2cfdftv_4) (planner *p) { X(khc2c_register) (p, hc2cfdftv_4, &desc, HC2C_VIA_DFT); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_hc2cdft_c -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 4 -dit -name hc2cfdftv_4 -include hc2cfv.h */ /*
T17 = VCONJ(VSUB(T14, T16)); ST(&(Rm[WS(rs, 4)]), T17, -ms, &(Rm[0])); T1A = VCONJ(VSUB(T1r, T1z)); ST(&(Rm[WS(rs, 2)]), T1A, -ms, &(Rm[0])); T1B = VADD(T1r, T1z); ST(&(Rp[WS(rs, 2)]), T1B, ms, &(Rp[0])); T18 = VADD(T14, T16); ST(&(Rp[WS(rs, 4)]), T18, ms, &(Rp[0])); } } } VLEAVE(); } static const tw_instr twinstr[] = { VTW(1, 1), VTW(1, 2), VTW(1, 3), VTW(1, 4), VTW(1, 5), VTW(1, 6), VTW(1, 7), VTW(1, 8), VTW(1, 9), VTW(1, 10), VTW(1, 11), VTW(1, 12), VTW(1, 13), VTW(1, 14), VTW(1, 15), {TW_NEXT, VL, 0}
V T8, T4, T9, T7; T8 = VMUL(T2, T6); T4 = VMUL(T2, T3); T9 = VFNMS(T5, T3, T8); T7 = VFMA(T5, T6, T4); ST(&(ii[0]), VADD(T9, Ta), dist, &(ii[0])); ST(&(ii[WS(ios, 1)]), VSUB(Ta, T9), dist, &(ii[WS(ios, 1)])); ST(&(ri[0]), VADD(T1, T7), dist, &(ri[0])); ST(&(ri[WS(ios, 1)]), VSUB(T1, T7), dist, &(ri[WS(ios, 1)])); } } return W; } static const tw_instr twinstr[] = { VTW(1), {TW_NEXT, (2 * VL), 0} }; static const ct_desc desc = { 2, "t1sv_2", twinstr, &GENUS, {4, 2, 2, 0}, 0, 0, 0 }; void X(codelet_t1sv_2) (planner *p) { X(kdft_dit_register) (p, t1sv_2, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_twiddle -simd -compact -variables 4 -pipeline-latency 8 -n 2 -name t1sv_2 -include ts.h */ /* * This function contains 6 FP additions, 4 FP multiplications, * (or, 4 additions, 2 multiplications, 2 fused multiply/add),
TM = VADD(Tz, TD); TN = VSUB(TL, TM); TP = VADD(TL, TM); } ST(&(x[WS(ios, 2)]), VADD(TK, TN), dist, &(x[0])); ST(&(x[WS(ios, 8)]), VSUB(TP, TO), dist, &(x[0])); ST(&(x[WS(ios, 10)]), VSUB(TN, TK), dist, &(x[0])); ST(&(x[WS(ios, 4)]), VADD(TO, TP), dist, &(x[0])); } } END_SIMD(); return W; } static const tw_instr twinstr[] = { VTW(1), VTW(2), VTW(3), VTW(4), VTW(5), VTW(6), VTW(7), VTW(8), VTW(9), VTW(10), VTW(11), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 12, "t1bv_12", twinstr, {55, 26, 4, 0}, &GENUS, 0, 0, 0 };
ST(&(x[WS(ios, 14)]), VFNMSI(T12, T11), dist, &(x[0])); ST(&(x[WS(ios, 2)]), VFMAI(T12, T11), dist, &(x[0])); ST(&(x[WS(ios, 10)]), VFMAI(T10, TK), dist, &(x[0])); ST(&(x[WS(ios, 6)]), VFNMSI(T10, TK), dist, &(x[0])); } } } } } } } return W; } static const tw_instr twinstr[] = { VTW(1), VTW(3), VTW(9), VTW(15), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 16, "t3fv_16", twinstr, &GENUS, {64, 52, 34, 0}, 0, 0, 0 }; void X(codelet_t3fv_16) (planner *p) { X(kdft_dit_register) (p, t3fv_16, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_twiddle_c -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 16 -name t3fv_16 -include t3f.h */
R *x; x = ri; BEGIN_SIMD(); for (i = 0; i < m; i = i + VL, x = x + (VL * dist), W = W + (TWVL * 2)) { V T1, T2, T3, T4, T5, T6; T1 = LD(&(x[0]), dist, &(x[0])); T2 = LD(&(x[WS(is, 1)]), dist, &(x[WS(is, 1)])); T3 = BYTWJ(&(W[0]), VSUB(T1, T2)); T4 = LD(&(x[WS(vs, 1)]), dist, &(x[WS(vs, 1)])); T5 = LD(&(x[WS(vs, 1) + WS(is, 1)]), dist, &(x[WS(vs, 1) + WS(is, 1)])); T6 = BYTWJ(&(W[0]), VSUB(T4, T5)); ST(&(x[WS(vs, 1)]), T3, dist, &(x[WS(vs, 1)])); ST(&(x[WS(vs, 1) + WS(is, 1)]), T6, dist, &(x[WS(vs, 1) + WS(is, 1)])); ST(&(x[0]), VADD(T1, T2), dist, &(x[0])); ST(&(x[WS(is, 1)]), VADD(T4, T5), dist, &(x[WS(is, 1)])); } END_SIMD(); return W; } static const tw_instr twinstr[] = { VTW(1), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 2, "q1fv_2", twinstr, {6, 4, 0, 0}, &GENUS, 0, 0, 0 }; void X(codelet_q1fv_2) (planner *p) { X(kdft_difsq_register) (p, q1fv_2, &desc); }
Td = VFNMS(LDK(KP250000000), Tc, T1); ST(&(x[0]), VADD(T1, Tc), ms, &(x[0])); Tj = VFNMS(LDK(KP559016994), Te, Td); Tf = VFMA(LDK(KP559016994), Te, Td); ST(&(x[WS(rs, 2)]), VFNMSI(Tk, Tj), ms, &(x[0])); ST(&(x[WS(rs, 3)]), VFMAI(Tk, Tj), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 4)]), VFNMSI(Ti, Tf), ms, &(x[0])); ST(&(x[WS(rs, 1)]), VFMAI(Ti, Tf), ms, &(x[WS(rs, 1)])); } } } } } static const tw_instr twinstr[] = { VTW(0, 1), VTW(0, 2), VTW(0, 3), VTW(0, 4), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 5, "t2bv_5", twinstr, &GENUS, {11, 10, 9, 0}, 0, 0, 0 }; void X(codelet_t2bv_5) (planner *p) { X(kdft_dit_register) (p, t2bv_5, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_twiddle_c -simd -compact -variables 4 -pipeline-latency 8 -n 5 -name t2bv_5 -include t2b.h -sign 1 */
ST(&(x[WS(rs, 2)]), VFMAI(TN, TM), ms, &(x[0])); ST(&(x[WS(rs, 10)]), VFMAI(TL, Ty), ms, &(x[0])); ST(&(x[WS(rs, 6)]), VFNMSI(TL, Ty), ms, &(x[0])); } } } } } } } } VLEAVE(); } static const tw_instr twinstr[] = { VTW(0, 1), VTW(0, 2), VTW(0, 3), VTW(0, 4), VTW(0, 5), VTW(0, 6), VTW(0, 7), VTW(0, 8), VTW(0, 9), VTW(0, 10), VTW(0, 11), VTW(0, 12), VTW(0, 13), VTW(0, 14), VTW(0, 15), {TW_NEXT, VL, 0}
ST(&(x[WS(rs, 4)]), VFNMSI(TP, TO), ms, &(x[0])); ST(&(x[WS(rs, 9)]), VFNMSI(Tx, Tu), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 1)]), VFMAI(Tx, Tu), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 7)]), VFNMSI(Tz, Ty), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 3)]), VFMAI(Tz, Ty), ms, &(x[WS(rs, 1)])); } } } } } } VLEAVE(); } static const tw_instr twinstr[] = { VTW(0, 1), VTW(0, 2), VTW(0, 3), VTW(0, 4), VTW(0, 5), VTW(0, 6), VTW(0, 7), VTW(0, 8), VTW(0, 9), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 10, XSIMD_STRING("t2bv_10"), twinstr, &GENUS, {33, 22, 18, 0}, 0, 0, 0 }; void XSIMD(codelet_t2bv_10) (planner *p) { X(kdft_dit_register) (p, t2bv_10, &desc);
{ V T3, T5, T8, T6, T7; T3 = BYTW(&(W[0]), T2); T5 = BYTW(&(W[TWVL * 2]), T4); T8 = VMUL(LDK(KP866025403), VSUB(T3, T5)); T6 = VADD(T3, T5); T7 = VFNMS(LDK(KP500000000), T6, T1); ST(&(x[0]), VADD(T1, T6), ms, &(x[0])); ST(&(x[WS(rs, 2)]), VFNMSI(T8, T7), ms, &(x[0])); ST(&(x[WS(rs, 1)]), VFMAI(T8, T7), ms, &(x[WS(rs, 1)])); } } } static const tw_instr twinstr[] = { VTW(0, 1), VTW(0, 2), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 3, "t1buv_3", twinstr, &GENUS, {5, 5, 3, 0}, 0, 0, 0 }; void X(codelet_t1buv_3) (planner *p) { X(kdft_dit_register) (p, t1buv_3, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_twiddle_c -simd -compact -variables 4 -pipeline-latency 8 -n 3 -name t1buv_3 -include t1bu.h -sign 1 */ /* * This function contains 8 FP additions, 6 FP multiplications,
ST(&(x[WS(rs, 9)]), VFNMSI(TG, TD), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 1)]), VFMAI(TG, TD), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 7)]), VFNMSI(TI, TH), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 3)]), VFMAI(TI, TH), ms, &(x[WS(rs, 1)])); } } } } } } } VLEAVE(); } static const tw_instr twinstr[] = { VTW(0, 1), VTW(0, 3), VTW(0, 9), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 10, XSIMD_STRING("t3bv_10"), twinstr, &GENUS, {39, 34, 18, 0}, 0, 0, 0 }; void XSIMD(codelet_t3bv_10) (planner *p) { X(kdft_dit_register) (p, t3bv_10, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 10 -name t3bv_10 -include t3b.h -sign 1 */ /*
BEGIN_SIMD(); for (i = m; i > 0; i = i - VL, x = x + (VL * dist), W = W + (TWVL * 4)) { V T1, T3, T5, T6, T2, T4, T7, T8; T1 = LD(&(x[0]), dist, &(x[0])); T2 = LD(&(x[WS(ios, 1)]), dist, &(x[WS(ios, 1)])); T3 = BYTWJ(&(W[0]), T2); T4 = LD(&(x[WS(ios, 2)]), dist, &(x[0])); T5 = BYTWJ(&(W[TWVL * 2]), T4); T6 = VADD(T3, T5); ST(&(x[0]), VADD(T1, T6), dist, &(x[0])); T7 = VFNMS(LDK(KP500000000), T6, T1); T8 = VBYI(VMUL(LDK(KP866025403), VSUB(T5, T3))); ST(&(x[WS(ios, 2)]), VSUB(T7, T8), dist, &(x[0])); ST(&(x[WS(ios, 1)]), VADD(T7, T8), dist, &(x[WS(ios, 1)])); } END_SIMD(); return W; } static const tw_instr twinstr[] = { VTW(1), VTW(2), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 3, "t1fv_3", twinstr, {7, 5, 1, 0}, &GENUS, 0, 0, 0 }; void X(codelet_t1fv_3) (planner *p) { X(kdft_dit_register) (p, t1fv_3, &desc); }
ST(&(Rp[WS(rs, 2)]), TX, ms, &(Rp[0])); ST(&(Rm[WS(rs, 2)]), TY, -ms, &(Rm[0])); ST(&(Rp[0]), T18, ms, &(Rp[0])); ST(&(Rm[0]), T19, -ms, &(Rm[0])); ST(&(Rm[WS(rs, 4)]), TQ, -ms, &(Rm[0])); ST(&(Rp[WS(rs, 4)]), TP, ms, &(Rp[0])); ST(&(Rp[WS(rs, 3)]), T13, ms, &(Rp[WS(rs, 1)])); ST(&(Rm[WS(rs, 3)]), T14, -ms, &(Rm[WS(rs, 1)])); ST(&(Rm[WS(rs, 1)]), TI, -ms, &(Rm[WS(rs, 1)])); ST(&(Rp[WS(rs, 1)]), TH, ms, &(Rp[WS(rs, 1)])); } } } static const tw_instr twinstr[] = { VTW(1, 1), VTW(1, 2), VTW(1, 3), VTW(1, 4), VTW(1, 5), VTW(1, 6), VTW(1, 7), VTW(1, 8), VTW(1, 9), {TW_NEXT, VL, 0} }; static const hc2c_desc desc = { 10, "hc2cbdftv_10", twinstr, &GENUS, {33, 22, 28, 0} }; void X(codelet_hc2cbdftv_10) (planner *p) { X(khc2c_register) (p, hc2cbdftv_10, &desc, HC2C_VIA_DFT);
ST(&(x[WS(rs, 1)]), VFMAI(Tw, Tv), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 7)]), VFNMSI(Tw, Tv), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 5)]), VFMAI(Tu, Tm), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 3)]), VFNMSI(Tu, Tm), ms, &(x[WS(rs, 1)])); } } } } } } } VLEAVE(); } static const tw_instr twinstr[] = { VTW(0, 1), VTW(0, 3), VTW(0, 7), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 8, XSIMD_STRING("t3bv_8"), twinstr, &GENUS, {27, 22, 10, 0}, 0, 0, 0 }; void XSIMD(codelet_t3bv_8) (planner *p) { X(kdft_dit_register) (p, t3bv_8, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 8 -name t3bv_8 -include t3b.h -sign 1 */ /*
T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); T4 = LD(&(x[WS(vs, 1)]), ms, &(x[WS(vs, 1)])); T5 = LD(&(x[WS(vs, 1) + WS(rs, 1)]), ms, &(x[WS(vs, 1) + WS(rs, 1)])); ST(&(x[0]), VADD(T1, T2), ms, &(x[0])); T3 = BYTWJ(&(W[0]), VSUB(T1, T2)); ST(&(x[WS(rs, 1)]), VADD(T4, T5), ms, &(x[WS(rs, 1)])); T6 = BYTWJ(&(W[0]), VSUB(T4, T5)); ST(&(x[WS(vs, 1)]), T3, ms, &(x[WS(vs, 1)])); ST(&(x[WS(vs, 1) + WS(rs, 1)]), T6, ms, &(x[WS(vs, 1) + WS(rs, 1)])); } } VLEAVE(); } static const tw_instr twinstr[] = { VTW(0, 1), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 2, XSIMD_STRING("q1fv_2"), twinstr, &GENUS, {6, 4, 0, 0}, 0, 0, 0 }; void XSIMD(codelet_q1fv_2) (planner *p) { X(kdft_difsq_register) (p, q1fv_2, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_twidsq_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 2 -dif -name q1fv_2 -include q1f.h */ /* * This function contains 6 FP additions, 4 FP multiplications, * (or, 6 additions, 4 multiplications, 0 fused multiply/add),
ST(&(ri[WS(ios, 3)]), VSUB(Tm, Tr), dist, &(ri[WS(ios, 1)])); ST(&(ii[WS(ios, 2)]), VSUB(Tw, Ts), dist, &(ii[0])); ST(&(ii[0]), VADD(Ts, Tw), dist, &(ii[0])); ST(&(ii[WS(ios, 3)]), VADD(Ty, Tx), dist, &(ii[WS(ios, 1)])); ST(&(ii[WS(ios, 1)]), VSUB(Tx, Ty), dist, &(ii[WS(ios, 1)])); ST(&(ri[0]), VADD(T8, Tl), dist, &(ri[0])); ST(&(ri[WS(ios, 2)]), VSUB(T8, Tl), dist, &(ri[0])); } } } } return W; } static const tw_instr twinstr[] = { VTW(1), VTW(2), VTW(3), {TW_NEXT, (2 * VL), 0} }; static const ct_desc desc = { 4, "t1sv_4", twinstr, &GENUS, {16, 6, 6, 0}, 0, 0, 0 }; void X(codelet_t1sv_4) (planner *p) { X(kdft_dit_register) (p, t1sv_4, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_twiddle -simd -compact -variables 4 -pipeline-latency 8 -n 4 -name t1sv_4 -include ts.h */ /*