VTW(0, 4), VTW(0, 5), VTW(0, 6), VTW(0, 7), VTW(0, 8), VTW(0, 9), VTW(0, 10), VTW(0, 11), VTW(0, 12), VTW(0, 13), VTW(0, 14), VTW(0, 15), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 16, XSIMD_STRING("t1fv_16"), twinstr, &GENUS, {53, 30, 34, 0}, 0, 0, 0 }; void XSIMD(codelet_t1fv_16) (planner *p) { X(kdft_dit_register) (p, t1fv_16, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 16 -name t1fv_16 -include t1f.h */ /* * This function contains 87 FP additions, 42 FP multiplications, * (or, 83 additions, 38 multiplications, 4 fused multiply/add), * 36 stack variables, 3 constants, and 32 memory accesses */ #include "t1f.h"
T4 = VZMULI(T1, VFNMSCONJ(T3, T2)); T7 = VCONJ(VSUB(T5, T4)); T6 = VADD(T4, T5); ST(&(Rm[0]), T7, -ms, &(Rm[0])); ST(&(Rp[0]), T6, ms, &(Rp[0])); } } VLEAVE(); } static const tw_instr twinstr[] = { VTW(1, 1), {TW_NEXT, VL, 0} }; static const hc2c_desc desc = { 2, XSIMD_STRING("hc2cbdftv_2"), twinstr, &GENUS, {3, 2, 2, 0} }; void XSIMD(codelet_hc2cbdftv_2) (planner *p) { X(khc2c_register) (p, hc2cbdftv_2, &desc, HC2C_VIA_DFT); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_hc2cdft_c.native -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 2 -dif -sign 1 -name hc2cbdftv_2 -include hc2cbv.h */ /* * This function contains 5 FP additions, 2 FP multiplications, * (or, 5 additions, 2 multiplications, 0 fused multiply/add), * 9 stack variables, 0 constants, and 4 memory accesses */ #include "hc2cbv.h"
T3 = BYTWJ(&(W[0]), VSUB(T1, T2)); ST(&(x[WS(rs, 1)]), VADD(T4, T5), ms, &(x[WS(rs, 1)])); T6 = BYTWJ(&(W[0]), VSUB(T4, T5)); ST(&(x[WS(vs, 1)]), T3, ms, &(x[WS(vs, 1)])); ST(&(x[WS(vs, 1) + WS(rs, 1)]), T6, ms, &(x[WS(vs, 1) + WS(rs, 1)])); } } VLEAVE(); } static const tw_instr twinstr[] = { VTW(0, 1), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 2, XSIMD_STRING("q1fv_2"), twinstr, &GENUS, {6, 4, 0, 0}, 0, 0, 0 }; void XSIMD(codelet_q1fv_2) (planner *p) { X(kdft_difsq_register) (p, q1fv_2, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_twidsq_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 2 -dif -name q1fv_2 -include q1f.h */ /* * This function contains 6 FP additions, 4 FP multiplications, * (or, 6 additions, 4 multiplications, 0 fused multiply/add), * 8 stack variables, 0 constants, and 8 memory accesses */ #include "q1f.h"
ST(&(xo[WS(os, 8)]), VFNMSI(TE, TB), ovs, &(xo[0])); ST(&(xo[WS(os, 2)]), VFMAI(TE, TB), ovs, &(xo[0])); ST(&(xo[WS(os, 3)]), VFNMSI(Tq, Tp), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 7)]), VFMAI(Tq, Tp), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 9)]), VFMAI(To, Tl), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 1)]), VFNMSI(To, Tl), ovs, &(xo[WS(os, 1)])); } } } } } } VLEAVE(); } static const kdft_desc desc = { 10, XSIMD_STRING("n1fv_10"), {24, 4, 18, 0}, &GENUS, 0, 0, 0, 0 }; void XSIMD(codelet_n1fv_10) (planner *p) { X(kdft_register) (p, n1fv_10, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 10 -name n1fv_10 -include n1f.h */ /* * This function contains 42 FP additions, 12 FP multiplications, * (or, 36 additions, 6 multiplications, 6 fused multiply/add), * 33 stack variables, 4 constants, and 20 memory accesses */ #include "n1f.h"
} static const tw_instr twinstr[] = { VTW(0, 1), VTW(0, 2), VTW(0, 3), VTW(0, 4), VTW(0, 5), VTW(0, 6), VTW(0, 7), VTW(0, 8), VTW(0, 9), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 10, XSIMD_STRING("t2bv_10"), twinstr, &GENUS, {33, 22, 18, 0}, 0, 0, 0 }; void XSIMD(codelet_t2bv_10) (planner *p) { X(kdft_dit_register) (p, t2bv_10, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 10 -name t2bv_10 -include t2b.h -sign 1 */ /* * This function contains 51 FP additions, 30 FP multiplications, * (or, 45 additions, 24 multiplications, 6 fused multiply/add), * 32 stack variables, 4 constants, and 20 memory accesses */ #include "t2b.h"
static const tw_instr twinstr[] = { VTW(0, 1), VTW(0, 2), VTW(0, 3), VTW(0, 4), VTW(0, 5), VTW(0, 6), VTW(0, 7), VTW(0, 8), VTW(0, 9), VTW(0, 10), VTW(0, 11), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 12, XSIMD_STRING("t1fv_12"), twinstr, &GENUS, {41, 24, 18, 0}, 0, 0, 0 }; void XSIMD(codelet_t1fv_12) (planner *p) { X(kdft_dit_register) (p, t1fv_12, &desc); } #else /* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 12 -name t1fv_12 -include dft/simd/t1f.h */ /* * This function contains 59 FP additions, 30 FP multiplications, * (or, 55 additions, 26 multiplications, 4 fused multiply/add), * 28 stack variables, 2 constants, and 24 memory accesses */ #include "dft/simd/t1f.h"
T1 = LD(&(x[0]), ms, &(x[0])); T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)])); T3 = BYTWJ(&(W[0]), T2); ST(&(x[0]), VADD(T1, T3), ms, &(x[0])); ST(&(x[WS(rs, 1)]), VSUB(T1, T3), ms, &(x[WS(rs, 1)])); } } VLEAVE(); } static const tw_instr twinstr[] = { VTW(0, 1), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 2, XSIMD_STRING("t1fuv_2"), twinstr, &GENUS, {3, 2, 0, 0}, 0, 0, 0 }; void XSIMD(codelet_t1fuv_2) (planner *p) { X(kdft_dit_register) (p, t1fuv_2, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 2 -name t1fuv_2 -include t1fu.h */ /* * This function contains 3 FP additions, 2 FP multiplications, * (or, 3 additions, 2 multiplications, 0 fused multiply/add), * 5 stack variables, 0 constants, and 4 memory accesses */ #include "t1fu.h"
ST(&(Rp[WS(rs, 1)]), Te, ms, &(Rp[WS(rs, 1)])); ST(&(Rp[0]), Ti, ms, &(Rp[0])); ST(&(Rm[WS(rs, 1)]), Tj, -ms, &(Rm[WS(rs, 1)])); } } VLEAVE(); } static const tw_instr twinstr[] = { VTW(1, 1), VTW(1, 2), VTW(1, 3), {TW_NEXT, VL, 0} }; static const hc2c_desc desc = { 4, XSIMD_STRING("hc2cfdftv_4"), twinstr, &GENUS, {9, 10, 6, 0} }; void XSIMD(codelet_hc2cfdftv_4) (planner *p) { X(khc2c_register) (p, hc2cfdftv_4, &desc, HC2C_VIA_DFT); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_hc2cdft_c.native -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 4 -dit -name hc2cfdftv_4 -include hc2cfv.h */ /* * This function contains 15 FP additions, 10 FP multiplications, * (or, 15 additions, 10 multiplications, 0 fused multiply/add), * 23 stack variables, 1 constants, and 8 memory accesses */ #include "hc2cfv.h"
} VLEAVE(); } static const tw_instr twinstr[] = { VTW(0, 1), VTW(0, 2), VTW(0, 3), VTW(0, 4), VTW(0, 5), VTW(0, 6), VTW(0, 7), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 8, XSIMD_STRING("t1bv_8"), twinstr, &GENUS, {23, 14, 10, 0}, 0, 0, 0 }; void XSIMD(codelet_t1bv_8) (planner *p) { X(kdft_dit_register) (p, t1bv_8, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 8 -name t1bv_8 -include t1b.h -sign 1 */ /* * This function contains 33 FP additions, 16 FP multiplications, * (or, 33 additions, 16 multiplications, 0 fused multiply/add), * 24 stack variables, 1 constants, and 16 memory accesses */ #include "t1b.h"
ST(&(xo[WS(os, 2)]), VFMAI(Ti, Td), ovs, &(xo[0])); ST(&(xo[WS(os, 5)]), VFNMSI(Ti, Td), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 1)]), VFMAI(Tn, Tl), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 6)]), VFNMSI(Tn, Tl), ovs, &(xo[0])); ST(&(xo[WS(os, 3)]), VFMAI(Ts, Tq), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 4)]), VFNMSI(Ts, Tq), ovs, &(xo[0])); } } } } } } VLEAVE(); } static const kdft_desc desc = { 7, XSIMD_STRING("n1fv_7"), {9, 3, 21, 0}, &GENUS, 0, 0, 0, 0 }; void XSIMD(codelet_n1fv_7) (planner *p) { X(kdft_register) (p, n1fv_7, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 7 -name n1fv_7 -include n1f.h */ /* * This function contains 30 FP additions, 18 FP multiplications, * (or, 18 additions, 6 multiplications, 12 fused multiply/add), * 24 stack variables, 6 constants, and 14 memory accesses */ #include "n1f.h"
ST(&(x[WS(rs, 3)]), VFMAI(Tc, T7), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 1)]), VFNMSI(Tc, T7), ms, &(x[WS(rs, 1)])); } } } } VLEAVE(); } static const tw_instr twinstr[] = { VTW(0, 1), VTW(0, 3), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 4, XSIMD_STRING("t3fv_4"), twinstr, &GENUS, {10, 8, 2, 0}, 0, 0, 0 }; void XSIMD(codelet_t3fv_4) (planner *p) { X(kdft_dit_register) (p, t3fv_4, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 4 -name t3fv_4 -include t3f.h */ /* * This function contains 12 FP additions, 8 FP multiplications, * (or, 12 additions, 8 multiplications, 0 fused multiply/add), * 16 stack variables, 0 constants, and 8 memory accesses */ #include "t3f.h"
ST(&(xo[WS(os, 3)]), VFNMSI(T1i, T1h), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 7)]), VFNMSI(T1o, T1l), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 6)]), VFMAI(T1o, T1l), ovs, &(xo[0])); ST(&(xo[WS(os, 11)]), VFNMSI(T1q, T1p), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 2)]), VFMAI(T1q, T1p), ovs, &(xo[0])); } } } } } } } VLEAVE(); } static const kdft_desc desc = { 13, XSIMD_STRING("n1bv_13"), {31, 6, 57, 0}, &GENUS, 0, 0, 0, 0 }; void XSIMD(codelet_n1bv_13) (planner *p) { X(kdft_register) (p, n1bv_13, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 13 -name n1bv_13 -include n1b.h */ /* * This function contains 88 FP additions, 34 FP multiplications, * (or, 69 additions, 15 multiplications, 19 fused multiply/add), * 60 stack variables, 20 constants, and 26 memory accesses */ #include "n1b.h"
STN2(&(xo[32]), T1P, T1Y, ovs); T1Z = VFMAI(T10, TZ); STM2(&(xo[14]), T1Z, ovs, &(xo[2])); STN2(&(xo[12]), T1M, T1Z, ovs); T20 = VFNMSI(T10, TZ); STM2(&(xo[26]), T20, ovs, &(xo[2])); STN2(&(xo[24]), T1R, T20, ovs); } } } } } VLEAVE(); } static const kdft_desc desc = { 20, XSIMD_STRING("n2fv_20"), {58, 4, 46, 0}, &GENUS, 0, 2, 0, 0 }; void XSIMD(codelet_n2fv_20) (planner *p) { X(kdft_register) (p, n2fv_20, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 20 -name n2fv_20 -with-ostride 2 -include n2f.h -store-multiple 2 */ /* * This function contains 104 FP additions, 24 FP multiplications, * (or, 92 additions, 12 multiplications, 12 fused multiply/add), * 57 stack variables, 4 constants, and 50 memory accesses */ #include "n2f.h"
ST(&(x[WS(vs, 2) + WS(rs, 1)]), Tm, ms, &(x[WS(vs, 2) + WS(rs, 1)])); ST(&(x[WS(vs, 2) + WS(rs, 2)]), Tx, ms, &(x[WS(vs, 2)])); ST(&(x[WS(vs, 2) + WS(rs, 3)]), TI, ms, &(x[WS(vs, 2) + WS(rs, 1)])); } } VLEAVE(); } static const tw_instr twinstr[] = { VTW(0, 1), VTW(0, 2), VTW(0, 3), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 4, XSIMD_STRING("q1fv_4"), twinstr, &GENUS, {36, 24, 8, 0}, 0, 0, 0 }; void XSIMD(codelet_q1fv_4) (planner *p) { X(kdft_difsq_register) (p, q1fv_4, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_twidsq_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 4 -dif -name q1fv_4 -include q1f.h */ /* * This function contains 44 FP additions, 24 FP multiplications, * (or, 44 additions, 24 multiplications, 0 fused multiply/add), * 22 stack variables, 0 constants, and 32 memory accesses */ #include "q1f.h"
ST(&(xo[WS(os, 11)]), VFNMSI(TK, TH), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 1)]), VFMAI(TK, TH), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 8)]), VFNMSI(TE, TD), ovs, &(xo[0])); ST(&(xo[WS(os, 4)]), VFMAI(TE, TD), ovs, &(xo[0])); ST(&(xo[WS(os, 2)]), VFMAI(TC, Tv), ovs, &(xo[0])); ST(&(xo[WS(os, 10)]), VFNMSI(TC, Tv), ovs, &(xo[0])); } } } } } } VLEAVE(); } static const kdft_desc desc = { 12, XSIMD_STRING("n1bv_12"), {30, 2, 18, 0}, &GENUS, 0, 0, 0, 0 }; void XSIMD(codelet_n1bv_12) (planner *p) { X(kdft_register) (p, n1bv_12, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 12 -name n1bv_12 -include n1b.h */ /* * This function contains 48 FP additions, 8 FP multiplications, * (or, 44 additions, 4 multiplications, 4 fused multiply/add), * 27 stack variables, 2 constants, and 24 memory accesses */ #include "n1b.h"
} } } VLEAVE(); } static const tw_instr twinstr[] = { VTW(0, 1), VTW(0, 2), VTW(0, 3), VTW(0, 4), VTW(0, 5), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 6, XSIMD_STRING("t1bv_6"), twinstr, &GENUS, {17, 12, 6, 0}, 0, 0, 0 }; void XSIMD(codelet_t1bv_6) (planner *p) { X(kdft_dit_register) (p, t1bv_6, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 6 -name t1bv_6 -include t1b.h -sign 1 */ /* * This function contains 23 FP additions, 14 FP multiplications, * (or, 21 additions, 12 multiplications, 2 fused multiply/add), * 19 stack variables, 2 constants, and 12 memory accesses */ #include "t1b.h"
} } } } VLEAVE(); } static const tw_instr twinstr[] = { VTW(0, 1), VTW(0, 2), VTW(0, 3), VTW(0, 4), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 5, XSIMD_STRING("t1fv_5"), twinstr, &GENUS, {11, 10, 9, 0}, 0, 0, 0 }; void XSIMD(codelet_t1fv_5) (planner *p) { X(kdft_dit_register) (p, t1fv_5, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 5 -name t1fv_5 -include t1f.h */ /* * This function contains 20 FP additions, 14 FP multiplications, * (or, 17 additions, 11 multiplications, 3 fused multiply/add), * 20 stack variables, 4 constants, and 10 memory accesses */ #include "t1f.h"
V T1, T2, T3, T6, T4, T5; T1 = LD(&(xi[0]), ivs, &(xi[0])); T2 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); T3 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); T6 = VMUL(LDK(KP866025403), VSUB(T3, T2)); T4 = VADD(T2, T3); T5 = VFNMS(LDK(KP500000000), T4, T1); ST(&(xo[0]), VADD(T1, T4), ovs, &(xo[0])); ST(&(xo[WS(os, 1)]), VFMAI(T6, T5), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 2)]), VFNMSI(T6, T5), ovs, &(xo[0])); } } VLEAVE(); } static const kdft_desc desc = { 3, XSIMD_STRING("n1fv_3"), {3, 1, 3, 0}, &GENUS, 0, 0, 0, 0 }; void XSIMD(codelet_n1fv_3) (planner *p) { X(kdft_register) (p, n1fv_3, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 3 -name n1fv_3 -include n1f.h */ /* * This function contains 6 FP additions, 2 FP multiplications, * (or, 5 additions, 1 multiplications, 1 fused multiply/add), * 11 stack variables, 2 constants, and 6 memory accesses */ #include "n1f.h"
ST(&(x[WS(rs, 3)]), VFMAI(T9, T4), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 1)]), VFNMSI(T9, T4), ms, &(x[WS(rs, 1)])); } } } VLEAVE(); } static const tw_instr twinstr[] = { VTW(0, 1), VTW(0, 2), VTW(0, 3), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 4, XSIMD_STRING("t1fuv_4"), twinstr, &GENUS, {9, 6, 2, 0}, 0, 0, 0 }; void XSIMD(codelet_t1fuv_4) (planner *p) { X(kdft_dit_register) (p, t1fuv_4, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 4 -name t1fuv_4 -include t1fu.h */ /* * This function contains 11 FP additions, 6 FP multiplications, * (or, 11 additions, 6 multiplications, 0 fused multiply/add), * 13 stack variables, 0 constants, and 8 memory accesses */ #include "t1fu.h"
Th = VFNMS(LDK(KP500000000), Tg, Td); ST(&(xo[0]), VADD(Td, Tg), ovs, &(xo[0])); Tb = VFNMS(LDK(KP500000000), Ta, T3); ST(&(xo[WS(os, 3)]), VADD(T3, Ta), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 4)]), VFMAI(Ti, Th), ovs, &(xo[0])); ST(&(xo[WS(os, 2)]), VFNMSI(Ti, Th), ovs, &(xo[0])); ST(&(xo[WS(os, 5)]), VFNMSI(Tc, Tb), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 1)]), VFMAI(Tc, Tb), ovs, &(xo[WS(os, 1)])); } } } } VLEAVE(); } static const kdft_desc desc = { 6, XSIMD_STRING("n1bv_6"), {12, 2, 6, 0}, &GENUS, 0, 0, 0, 0 }; void XSIMD(codelet_n1bv_6) (planner *p) { X(kdft_register) (p, n1bv_6, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 6 -name n1bv_6 -include n1b.h */ /* * This function contains 18 FP additions, 4 FP multiplications, * (or, 16 additions, 2 multiplications, 2 fused multiply/add), * 19 stack variables, 2 constants, and 12 memory accesses */ #include "n1b.h"
ST(&(x[WS(rs, 13)]), VFNMSI(T1C, T1B), ms, &(x[WS(rs, 1)])); } } } VLEAVE(); } static const tw_instr twinstr[] = { VTW(0, 1), VTW(0, 3), VTW(0, 9), VTW(0, 19), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 20, XSIMD_STRING("t3fv_20"), twinstr, &GENUS, {92, 72, 46, 0}, 0, 0, 0 }; void XSIMD(codelet_t3fv_20) (planner *p) { X(kdft_dit_register) (p, t3fv_20, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 20 -name t3fv_20 -include t3f.h */ /* * This function contains 138 FP additions, 92 FP multiplications, * (or, 126 additions, 80 multiplications, 12 fused multiply/add), * 73 stack variables, 4 constants, and 40 memory accesses */ #include "t3f.h"
} } } } } VLEAVE(); } static const tw_instr twinstr[] = { VTW(0, 1), VTW(0, 3), VTW(0, 7), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 8, XSIMD_STRING("t3bv_8"), twinstr, &GENUS, {27, 22, 10, 0}, 0, 0, 0 }; void XSIMD(codelet_t3bv_8) (planner *p) { X(kdft_dit_register) (p, t3bv_8, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 8 -name t3bv_8 -include t3b.h -sign 1 */ /* * This function contains 37 FP additions, 24 FP multiplications, * (or, 37 additions, 24 multiplications, 0 fused multiply/add), * 31 stack variables, 1 constants, and 16 memory accesses */ #include "t3b.h"
VLEAVE(); } static const tw_instr twinstr[] = { VTW(0, 1), VTW(0, 2), VTW(0, 3), VTW(0, 4), VTW(0, 5), VTW(0, 6), VTW(0, 7), VTW(0, 8), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 9, XSIMD_STRING("t1fuv_9"), twinstr, &GENUS, {20, 20, 34, 0}, 0, 0, 0 }; void XSIMD(codelet_t1fuv_9) (planner *p) { X(kdft_dit_register) (p, t1fuv_9, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 9 -name t1fuv_9 -include t1fu.h */ /* * This function contains 54 FP additions, 42 FP multiplications, * (or, 38 additions, 26 multiplications, 16 fused multiply/add), * 38 stack variables, 14 constants, and 18 memory accesses */ #include "t1fu.h"
xo = ro; for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(4, is), MAKE_VOLATILE_STRIDE(4, os)) { V T1, T2, T3, T4; T1 = LD(&(xi[0]), ivs, &(xi[0])); T2 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); T3 = VADD(T1, T2); STM2(&(xo[0]), T3, ovs, &(xo[0])); T4 = VSUB(T1, T2); STM2(&(xo[2]), T4, ovs, &(xo[2])); STN2(&(xo[0]), T3, T4, ovs); } } VLEAVE(); } static const kdft_desc desc = { 2, XSIMD_STRING("n2fv_2"), {2, 0, 0, 0}, &GENUS, 0, 2, 0, 0 }; void XSIMD(codelet_n2fv_2) (planner *p) { X(kdft_register) (p, n2fv_2, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 2 -name n2fv_2 -with-ostride 2 -include n2f.h -store-multiple 2 */ /* * This function contains 2 FP additions, 0 FP multiplications, * (or, 2 additions, 0 multiplications, 0 fused multiply/add), * 7 stack variables, 0 constants, and 5 memory accesses */ #include "n2f.h"
} } } } } VLEAVE(); } static const tw_instr twinstr[] = { VTW(0, 1), VTW(0, 3), VTW(0, 9), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 10, XSIMD_STRING("t3bv_10"), twinstr, &GENUS, {39, 34, 18, 0}, 0, 0, 0 }; void XSIMD(codelet_t3bv_10) (planner *p) { X(kdft_dit_register) (p, t3bv_10, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 10 -name t3bv_10 -include t3b.h -sign 1 */ /* * This function contains 57 FP additions, 42 FP multiplications, * (or, 51 additions, 36 multiplications, 6 fused multiply/add), * 41 stack variables, 4 constants, and 20 memory accesses */ #include "t3b.h"
} VLEAVE(); } static const tw_instr twinstr[] = { VTW(1, 1), VTW(1, 2), VTW(1, 3), VTW(1, 4), VTW(1, 5), VTW(1, 6), VTW(1, 7), {TW_NEXT, VL, 0} }; static const hc2c_desc desc = { 8, XSIMD_STRING("hc2cfdftv_8"), twinstr, &GENUS, {23, 22, 18, 0} }; void XSIMD(codelet_hc2cfdftv_8) (planner *p) { X(khc2c_register) (p, hc2cfdftv_8, &desc, HC2C_VIA_DFT); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_hc2cdft_c.native -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 8 -dit -name hc2cfdftv_8 -include hc2cfv.h */ /* * This function contains 41 FP additions, 23 FP multiplications, * (or, 41 additions, 23 multiplications, 0 fused multiply/add), * 57 stack variables, 3 constants, and 16 memory accesses */ #include "hc2cfv.h"
} } VLEAVE(); } static const tw_instr twinstr[] = { VTW(0, 1), VTW(0, 2), VTW(0, 3), VTW(0, 4), VTW(0, 5), VTW(0, 6), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 7, XSIMD_STRING("t1fuv_7"), twinstr, &GENUS, {15, 15, 21, 0}, 0, 0, 0 }; void XSIMD(codelet_t1fuv_7) (planner *p) { X(kdft_dit_register) (p, t1fuv_7, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 7 -name t1fuv_7 -include t1fu.h */ /* * This function contains 36 FP additions, 30 FP multiplications, * (or, 24 additions, 18 multiplications, 12 fused multiply/add), * 21 stack variables, 6 constants, and 14 memory accesses */ #include "t1fu.h"
VTW(0, 20), VTW(0, 21), VTW(0, 22), VTW(0, 23), VTW(0, 24), VTW(0, 25), VTW(0, 26), VTW(0, 27), VTW(0, 28), VTW(0, 29), VTW(0, 30), VTW(0, 31), {TW_NEXT, VL, 0} }; static const ct_desc desc = { 32, XSIMD_STRING("t2fv_32"), twinstr, &GENUS, {119, 62, 98, 0}, 0, 0, 0 }; void XSIMD(codelet_t2fv_32) (planner *p) { X(kdft_dit_register) (p, t2fv_32, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 32 -name t2fv_32 -include t2f.h */ /* * This function contains 217 FP additions, 104 FP multiplications, * (or, 201 additions, 88 multiplications, 16 fused multiply/add), * 59 stack variables, 7 constants, and 64 memory accesses */ #include "t2f.h"
ST(&(xo[WS(os, 11)]), VFMAI(TQ, TP), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 4)]), VFNMSI(TQ, TP), ovs, &(xo[0])); ST(&(xo[WS(os, 14)]), VFNMSI(TO, TD), ovs, &(xo[0])); ST(&(xo[WS(os, 1)]), VFMAI(TO, TD), ovs, &(xo[WS(os, 1)])); } } } } } } } } VLEAVE(); } static const kdft_desc desc = { 15, XSIMD_STRING("n1bv_15"), {36, 7, 42, 0}, &GENUS, 0, 0, 0, 0 }; void XSIMD(codelet_n1bv_15) (planner *p) { X(kdft_register) (p, n1bv_15, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 15 -name n1bv_15 -include n1b.h */ /* * This function contains 78 FP additions, 25 FP multiplications, * (or, 64 additions, 11 multiplications, 14 fused multiply/add), * 55 stack variables, 10 constants, and 30 memory accesses */ #include "n1b.h"
ST(&(xo[WS(os, 2)]), VFMAI(Ty, Tx), ovs, &(xo[0])); ST(&(xo[WS(os, 14)]), VFNMSI(Ty, Tx), ovs, &(xo[0])); ST(&(xo[WS(os, 10)]), VFMAI(Tw, Tn), ovs, &(xo[0])); ST(&(xo[WS(os, 6)]), VFNMSI(Tw, Tn), ovs, &(xo[0])); } } } } } } } } VLEAVE(); } static const kdft_desc desc = { 16, XSIMD_STRING("n1bv_16"), {38, 0, 34, 0}, &GENUS, 0, 0, 0, 0 }; void XSIMD(codelet_n1bv_16) (planner *p) { X(kdft_register) (p, n1bv_16, &desc); } #else /* HAVE_FMA */ /* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 16 -name n1bv_16 -include n1b.h */ /* * This function contains 72 FP additions, 12 FP multiplications, * (or, 68 additions, 8 multiplications, 4 fused multiply/add), * 30 stack variables, 3 constants, and 32 memory accesses */ #include "n1b.h"