Пример #1
0
     VTW(0, 4),
     VTW(0, 5),
     VTW(0, 6),
     VTW(0, 7),
     VTW(0, 8),
     VTW(0, 9),
     VTW(0, 10),
     VTW(0, 11),
     VTW(0, 12),
     VTW(0, 13),
     VTW(0, 14),
     VTW(0, 15),
     {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 16, XSIMD_STRING("t1fv_16"), twinstr, &GENUS, {53, 30, 34, 0}, 0, 0, 0 };

void XSIMD(codelet_t1fv_16) (planner *p) {
     X(kdft_dit_register) (p, t1fv_16, &desc);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 16 -name t1fv_16 -include t1f.h */

/*
 * This function contains 87 FP additions, 42 FP multiplications,
 * (or, 83 additions, 38 multiplications, 4 fused multiply/add),
 * 36 stack variables, 3 constants, and 32 memory accesses
 */
#include "t1f.h"
Пример #2
0
	       T4 = VZMULI(T1, VFNMSCONJ(T3, T2));
	       T7 = VCONJ(VSUB(T5, T4));
	       T6 = VADD(T4, T5);
	       ST(&(Rm[0]), T7, -ms, &(Rm[0]));
	       ST(&(Rp[0]), T6, ms, &(Rp[0]));
	  }
     }
     VLEAVE();
}

static const tw_instr twinstr[] = {
     VTW(1, 1),
     {TW_NEXT, VL, 0}
};

static const hc2c_desc desc = { 2, XSIMD_STRING("hc2cbdftv_2"), twinstr, &GENUS, {3, 2, 2, 0} };

void XSIMD(codelet_hc2cbdftv_2) (planner *p) {
     X(khc2c_register) (p, hc2cbdftv_2, &desc, HC2C_VIA_DFT);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_hc2cdft_c.native -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 2 -dif -sign 1 -name hc2cbdftv_2 -include hc2cbv.h */

/*
 * This function contains 5 FP additions, 2 FP multiplications,
 * (or, 5 additions, 2 multiplications, 0 fused multiply/add),
 * 9 stack variables, 0 constants, and 4 memory accesses
 */
#include "hc2cbv.h"
Пример #3
0
	       T3 = BYTWJ(&(W[0]), VSUB(T1, T2));
	       ST(&(x[WS(rs, 1)]), VADD(T4, T5), ms, &(x[WS(rs, 1)]));
	       T6 = BYTWJ(&(W[0]), VSUB(T4, T5));
	       ST(&(x[WS(vs, 1)]), T3, ms, &(x[WS(vs, 1)]));
	       ST(&(x[WS(vs, 1) + WS(rs, 1)]), T6, ms, &(x[WS(vs, 1) + WS(rs, 1)]));
	  }
     }
     VLEAVE();
}

static const tw_instr twinstr[] = {
     VTW(0, 1),
     {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 2, XSIMD_STRING("q1fv_2"), twinstr, &GENUS, {6, 4, 0, 0}, 0, 0, 0 };

void XSIMD(codelet_q1fv_2) (planner *p) {
     X(kdft_difsq_register) (p, q1fv_2, &desc);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_twidsq_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 2 -dif -name q1fv_2 -include q1f.h */

/*
 * This function contains 6 FP additions, 4 FP multiplications,
 * (or, 6 additions, 4 multiplications, 0 fused multiply/add),
 * 8 stack variables, 0 constants, and 8 memory accesses
 */
#include "q1f.h"
				   ST(&(xo[WS(os, 8)]), VFNMSI(TE, TB), ovs, &(xo[0]));
				   ST(&(xo[WS(os, 2)]), VFMAI(TE, TB), ovs, &(xo[0]));
				   ST(&(xo[WS(os, 3)]), VFNMSI(Tq, Tp), ovs, &(xo[WS(os, 1)]));
				   ST(&(xo[WS(os, 7)]), VFMAI(Tq, Tp), ovs, &(xo[WS(os, 1)]));
				   ST(&(xo[WS(os, 9)]), VFMAI(To, Tl), ovs, &(xo[WS(os, 1)]));
				   ST(&(xo[WS(os, 1)]), VFNMSI(To, Tl), ovs, &(xo[WS(os, 1)]));
			      }
			 }
		    }
	       }
	  }
     }
     VLEAVE();
}

static const kdft_desc desc = { 10, XSIMD_STRING("n1fv_10"), {24, 4, 18, 0}, &GENUS, 0, 0, 0, 0 };

void XSIMD(codelet_n1fv_10) (planner *p) {
     X(kdft_register) (p, n1fv_10, &desc);
}

#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 10 -name n1fv_10 -include n1f.h */

/*
 * This function contains 42 FP additions, 12 FP multiplications,
 * (or, 36 additions, 6 multiplications, 6 fused multiply/add),
 * 33 stack variables, 4 constants, and 20 memory accesses
 */
#include "n1f.h"
}

static const tw_instr twinstr[] = {
     VTW(0, 1),
     VTW(0, 2),
     VTW(0, 3),
     VTW(0, 4),
     VTW(0, 5),
     VTW(0, 6),
     VTW(0, 7),
     VTW(0, 8),
     VTW(0, 9),
     {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 10, XSIMD_STRING("t2bv_10"), twinstr, &GENUS, {33, 22, 18, 0}, 0, 0, 0 };

void XSIMD(codelet_t2bv_10) (planner *p) {
     X(kdft_dit_register) (p, t2bv_10, &desc);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 10 -name t2bv_10 -include t2b.h -sign 1 */

/*
 * This function contains 51 FP additions, 30 FP multiplications,
 * (or, 45 additions, 24 multiplications, 6 fused multiply/add),
 * 32 stack variables, 4 constants, and 20 memory accesses
 */
#include "t2b.h"
Пример #6
0
static const tw_instr twinstr[] = {
     VTW(0, 1),
     VTW(0, 2),
     VTW(0, 3),
     VTW(0, 4),
     VTW(0, 5),
     VTW(0, 6),
     VTW(0, 7),
     VTW(0, 8),
     VTW(0, 9),
     VTW(0, 10),
     VTW(0, 11),
     {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 12, XSIMD_STRING("t1fv_12"), twinstr, &GENUS, {41, 24, 18, 0}, 0, 0, 0 };

void XSIMD(codelet_t1fv_12) (planner *p) {
     X(kdft_dit_register) (p, t1fv_12, &desc);
}
#else

/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 12 -name t1fv_12 -include dft/simd/t1f.h */

/*
 * This function contains 59 FP additions, 30 FP multiplications,
 * (or, 55 additions, 26 multiplications, 4 fused multiply/add),
 * 28 stack variables, 2 constants, and 24 memory accesses
 */
#include "dft/simd/t1f.h"
Пример #7
0
	       T1 = LD(&(x[0]), ms, &(x[0]));
	       T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
	       T3 = BYTWJ(&(W[0]), T2);
	       ST(&(x[0]), VADD(T1, T3), ms, &(x[0]));
	       ST(&(x[WS(rs, 1)]), VSUB(T1, T3), ms, &(x[WS(rs, 1)]));
	  }
     }
     VLEAVE();
}

static const tw_instr twinstr[] = {
     VTW(0, 1),
     {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 2, XSIMD_STRING("t1fuv_2"), twinstr, &GENUS, {3, 2, 0, 0}, 0, 0, 0 };

void XSIMD(codelet_t1fuv_2) (planner *p) {
     X(kdft_dit_register) (p, t1fuv_2, &desc);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 2 -name t1fuv_2 -include t1fu.h */

/*
 * This function contains 3 FP additions, 2 FP multiplications,
 * (or, 3 additions, 2 multiplications, 0 fused multiply/add),
 * 5 stack variables, 0 constants, and 4 memory accesses
 */
#include "t1fu.h"
Пример #8
0
	       ST(&(Rp[WS(rs, 1)]), Te, ms, &(Rp[WS(rs, 1)]));
	       ST(&(Rp[0]), Ti, ms, &(Rp[0]));
	       ST(&(Rm[WS(rs, 1)]), Tj, -ms, &(Rm[WS(rs, 1)]));
	  }
     }
     VLEAVE();
}

static const tw_instr twinstr[] = {
     VTW(1, 1),
     VTW(1, 2),
     VTW(1, 3),
     {TW_NEXT, VL, 0}
};

static const hc2c_desc desc = { 4, XSIMD_STRING("hc2cfdftv_4"), twinstr, &GENUS, {9, 10, 6, 0} };

void XSIMD(codelet_hc2cfdftv_4) (planner *p) {
     X(khc2c_register) (p, hc2cfdftv_4, &desc, HC2C_VIA_DFT);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_hc2cdft_c.native -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 4 -dit -name hc2cfdftv_4 -include hc2cfv.h */

/*
 * This function contains 15 FP additions, 10 FP multiplications,
 * (or, 15 additions, 10 multiplications, 0 fused multiply/add),
 * 23 stack variables, 1 constants, and 8 memory accesses
 */
#include "hc2cfv.h"
Пример #9
0
     }
     VLEAVE();
}

static const tw_instr twinstr[] = {
     VTW(0, 1),
     VTW(0, 2),
     VTW(0, 3),
     VTW(0, 4),
     VTW(0, 5),
     VTW(0, 6),
     VTW(0, 7),
     {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 8, XSIMD_STRING("t1bv_8"), twinstr, &GENUS, {23, 14, 10, 0}, 0, 0, 0 };

void XSIMD(codelet_t1bv_8) (planner *p) {
     X(kdft_dit_register) (p, t1bv_8, &desc);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 8 -name t1bv_8 -include t1b.h -sign 1 */

/*
 * This function contains 33 FP additions, 16 FP multiplications,
 * (or, 33 additions, 16 multiplications, 0 fused multiply/add),
 * 24 stack variables, 1 constants, and 16 memory accesses
 */
#include "t1b.h"
Пример #10
0
				   ST(&(xo[WS(os, 2)]), VFMAI(Ti, Td), ovs, &(xo[0]));
				   ST(&(xo[WS(os, 5)]), VFNMSI(Ti, Td), ovs, &(xo[WS(os, 1)]));
				   ST(&(xo[WS(os, 1)]), VFMAI(Tn, Tl), ovs, &(xo[WS(os, 1)]));
				   ST(&(xo[WS(os, 6)]), VFNMSI(Tn, Tl), ovs, &(xo[0]));
				   ST(&(xo[WS(os, 3)]), VFMAI(Ts, Tq), ovs, &(xo[WS(os, 1)]));
				   ST(&(xo[WS(os, 4)]), VFNMSI(Ts, Tq), ovs, &(xo[0]));
			      }
			 }
		    }
	       }
	  }
     }
     VLEAVE();
}

static const kdft_desc desc = { 7, XSIMD_STRING("n1fv_7"), {9, 3, 21, 0}, &GENUS, 0, 0, 0, 0 };

void XSIMD(codelet_n1fv_7) (planner *p) {
     X(kdft_register) (p, n1fv_7, &desc);
}

#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 7 -name n1fv_7 -include n1f.h */

/*
 * This function contains 30 FP additions, 18 FP multiplications,
 * (or, 18 additions, 6 multiplications, 12 fused multiply/add),
 * 24 stack variables, 6 constants, and 14 memory accesses
 */
#include "n1f.h"
Пример #11
0
			 ST(&(x[WS(rs, 3)]), VFMAI(Tc, T7), ms, &(x[WS(rs, 1)]));
			 ST(&(x[WS(rs, 1)]), VFNMSI(Tc, T7), ms, &(x[WS(rs, 1)]));
		    }
	       }
	  }
     }
     VLEAVE();
}

static const tw_instr twinstr[] = {
     VTW(0, 1),
     VTW(0, 3),
     {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 4, XSIMD_STRING("t3fv_4"), twinstr, &GENUS, {10, 8, 2, 0}, 0, 0, 0 };

void XSIMD(codelet_t3fv_4) (planner *p) {
     X(kdft_dit_register) (p, t3fv_4, &desc);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 4 -name t3fv_4 -include t3f.h */

/*
 * This function contains 12 FP additions, 8 FP multiplications,
 * (or, 12 additions, 8 multiplications, 0 fused multiply/add),
 * 16 stack variables, 0 constants, and 8 memory accesses
 */
#include "t3f.h"
Пример #12
0
					ST(&(xo[WS(os, 3)]), VFNMSI(T1i, T1h), ovs, &(xo[WS(os, 1)]));
					ST(&(xo[WS(os, 7)]), VFNMSI(T1o, T1l), ovs, &(xo[WS(os, 1)]));
					ST(&(xo[WS(os, 6)]), VFMAI(T1o, T1l), ovs, &(xo[0]));
					ST(&(xo[WS(os, 11)]), VFNMSI(T1q, T1p), ovs, &(xo[WS(os, 1)]));
					ST(&(xo[WS(os, 2)]), VFMAI(T1q, T1p), ovs, &(xo[0]));
				   }
			      }
			 }
		    }
	       }
	  }
     }
     VLEAVE();
}

static const kdft_desc desc = { 13, XSIMD_STRING("n1bv_13"), {31, 6, 57, 0}, &GENUS, 0, 0, 0, 0 };

void XSIMD(codelet_n1bv_13) (planner *p) {
     X(kdft_register) (p, n1bv_13, &desc);
}

#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 13 -name n1bv_13 -include n1b.h */

/*
 * This function contains 88 FP additions, 34 FP multiplications,
 * (or, 69 additions, 15 multiplications, 19 fused multiply/add),
 * 60 stack variables, 20 constants, and 26 memory accesses
 */
#include "n1b.h"
Пример #13
0
			      STN2(&(xo[32]), T1P, T1Y, ovs);
			      T1Z = VFMAI(T10, TZ);
			      STM2(&(xo[14]), T1Z, ovs, &(xo[2]));
			      STN2(&(xo[12]), T1M, T1Z, ovs);
			      T20 = VFNMSI(T10, TZ);
			      STM2(&(xo[26]), T20, ovs, &(xo[2]));
			      STN2(&(xo[24]), T1R, T20, ovs);
			 }
		    }
	       }
	  }
     }
     VLEAVE();
}

static const kdft_desc desc = { 20, XSIMD_STRING("n2fv_20"), {58, 4, 46, 0}, &GENUS, 0, 2, 0, 0 };

void XSIMD(codelet_n2fv_20) (planner *p) {
     X(kdft_register) (p, n2fv_20, &desc);
}

#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 20 -name n2fv_20 -with-ostride 2 -include n2f.h -store-multiple 2 */

/*
 * This function contains 104 FP additions, 24 FP multiplications,
 * (or, 92 additions, 12 multiplications, 12 fused multiply/add),
 * 57 stack variables, 4 constants, and 50 memory accesses
 */
#include "n2f.h"
Пример #14
0
	       ST(&(x[WS(vs, 2) + WS(rs, 1)]), Tm, ms, &(x[WS(vs, 2) + WS(rs, 1)]));
	       ST(&(x[WS(vs, 2) + WS(rs, 2)]), Tx, ms, &(x[WS(vs, 2)]));
	       ST(&(x[WS(vs, 2) + WS(rs, 3)]), TI, ms, &(x[WS(vs, 2) + WS(rs, 1)]));
	  }
     }
     VLEAVE();
}

static const tw_instr twinstr[] = {
     VTW(0, 1),
     VTW(0, 2),
     VTW(0, 3),
     {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 4, XSIMD_STRING("q1fv_4"), twinstr, &GENUS, {36, 24, 8, 0}, 0, 0, 0 };

void XSIMD(codelet_q1fv_4) (planner *p) {
     X(kdft_difsq_register) (p, q1fv_4, &desc);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_twidsq_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 4 -dif -name q1fv_4 -include q1f.h */

/*
 * This function contains 44 FP additions, 24 FP multiplications,
 * (or, 44 additions, 24 multiplications, 0 fused multiply/add),
 * 22 stack variables, 0 constants, and 32 memory accesses
 */
#include "q1f.h"
Пример #15
0
                            ST(&(xo[WS(os, 11)]), VFNMSI(TK, TH), ovs, &(xo[WS(os, 1)]));
                            ST(&(xo[WS(os, 1)]), VFMAI(TK, TH), ovs, &(xo[WS(os, 1)]));
                            ST(&(xo[WS(os, 8)]), VFNMSI(TE, TD), ovs, &(xo[0]));
                            ST(&(xo[WS(os, 4)]), VFMAI(TE, TD), ovs, &(xo[0]));
                            ST(&(xo[WS(os, 2)]), VFMAI(TC, Tv), ovs, &(xo[0]));
                            ST(&(xo[WS(os, 10)]), VFNMSI(TC, Tv), ovs, &(xo[0]));
                        }
                    }
                }
            }
        }
    }
    VLEAVE();
}

static const kdft_desc desc = { 12, XSIMD_STRING("n1bv_12"), {30, 2, 18, 0}, &GENUS, 0, 0, 0, 0 };

void XSIMD(codelet_n1bv_12) (planner *p) {
    X(kdft_register) (p, n1bv_12, &desc);
}

#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 12 -name n1bv_12 -include n1b.h */

/*
 * This function contains 48 FP additions, 8 FP multiplications,
 * (or, 44 additions, 4 multiplications, 4 fused multiply/add),
 * 27 stack variables, 2 constants, and 24 memory accesses
 */
#include "n1b.h"
Пример #16
0
	       }
	  }
     }
     VLEAVE();
}

static const tw_instr twinstr[] = {
     VTW(0, 1),
     VTW(0, 2),
     VTW(0, 3),
     VTW(0, 4),
     VTW(0, 5),
     {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 6, XSIMD_STRING("t1bv_6"), twinstr, &GENUS, {17, 12, 6, 0}, 0, 0, 0 };

void XSIMD(codelet_t1bv_6) (planner *p) {
     X(kdft_dit_register) (p, t1bv_6, &desc);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 6 -name t1bv_6 -include t1b.h -sign 1 */

/*
 * This function contains 23 FP additions, 14 FP multiplications,
 * (or, 21 additions, 12 multiplications, 2 fused multiply/add),
 * 19 stack variables, 2 constants, and 12 memory accesses
 */
#include "t1b.h"
Пример #17
0
		    }
	       }
	  }
     }
     VLEAVE();
}

static const tw_instr twinstr[] = {
     VTW(0, 1),
     VTW(0, 2),
     VTW(0, 3),
     VTW(0, 4),
     {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 5, XSIMD_STRING("t1fv_5"), twinstr, &GENUS, {11, 10, 9, 0}, 0, 0, 0 };

void XSIMD(codelet_t1fv_5) (planner *p) {
     X(kdft_dit_register) (p, t1fv_5, &desc);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 5 -name t1fv_5 -include t1f.h */

/*
 * This function contains 20 FP additions, 14 FP multiplications,
 * (or, 17 additions, 11 multiplications, 3 fused multiply/add),
 * 20 stack variables, 4 constants, and 10 memory accesses
 */
#include "t1f.h"
Пример #18
0
	       V T1, T2, T3, T6, T4, T5;
	       T1 = LD(&(xi[0]), ivs, &(xi[0]));
	       T2 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
	       T3 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
	       T6 = VMUL(LDK(KP866025403), VSUB(T3, T2));
	       T4 = VADD(T2, T3);
	       T5 = VFNMS(LDK(KP500000000), T4, T1);
	       ST(&(xo[0]), VADD(T1, T4), ovs, &(xo[0]));
	       ST(&(xo[WS(os, 1)]), VFMAI(T6, T5), ovs, &(xo[WS(os, 1)]));
	       ST(&(xo[WS(os, 2)]), VFNMSI(T6, T5), ovs, &(xo[0]));
	  }
     }
     VLEAVE();
}

static const kdft_desc desc = { 3, XSIMD_STRING("n1fv_3"), {3, 1, 3, 0}, &GENUS, 0, 0, 0, 0 };

void XSIMD(codelet_n1fv_3) (planner *p) {
     X(kdft_register) (p, n1fv_3, &desc);
}

#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 3 -name n1fv_3 -include n1f.h */

/*
 * This function contains 6 FP additions, 2 FP multiplications,
 * (or, 5 additions, 1 multiplications, 1 fused multiply/add),
 * 11 stack variables, 2 constants, and 6 memory accesses
 */
#include "n1f.h"
Пример #19
0
                ST(&(x[WS(rs, 3)]), VFMAI(T9, T4), ms, &(x[WS(rs, 1)]));
                ST(&(x[WS(rs, 1)]), VFNMSI(T9, T4), ms, &(x[WS(rs, 1)]));
            }
        }
    }
    VLEAVE();
}

static const tw_instr twinstr[] = {
    VTW(0, 1),
    VTW(0, 2),
    VTW(0, 3),
    {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 4, XSIMD_STRING("t1fuv_4"), twinstr, &GENUS, {9, 6, 2, 0}, 0, 0, 0 };

void XSIMD(codelet_t1fuv_4) (planner *p) {
    X(kdft_dit_register) (p, t1fuv_4, &desc);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 4 -name t1fuv_4 -include t1fu.h */

/*
 * This function contains 11 FP additions, 6 FP multiplications,
 * (or, 11 additions, 6 multiplications, 0 fused multiply/add),
 * 13 stack variables, 0 constants, and 8 memory accesses
 */
#include "t1fu.h"
Пример #20
0
			 Th = VFNMS(LDK(KP500000000), Tg, Td);
			 ST(&(xo[0]), VADD(Td, Tg), ovs, &(xo[0]));
			 Tb = VFNMS(LDK(KP500000000), Ta, T3);
			 ST(&(xo[WS(os, 3)]), VADD(T3, Ta), ovs, &(xo[WS(os, 1)]));
			 ST(&(xo[WS(os, 4)]), VFMAI(Ti, Th), ovs, &(xo[0]));
			 ST(&(xo[WS(os, 2)]), VFNMSI(Ti, Th), ovs, &(xo[0]));
			 ST(&(xo[WS(os, 5)]), VFNMSI(Tc, Tb), ovs, &(xo[WS(os, 1)]));
			 ST(&(xo[WS(os, 1)]), VFMAI(Tc, Tb), ovs, &(xo[WS(os, 1)]));
		    }
	       }
	  }
     }
     VLEAVE();
}

static const kdft_desc desc = { 6, XSIMD_STRING("n1bv_6"), {12, 2, 6, 0}, &GENUS, 0, 0, 0, 0 };

void XSIMD(codelet_n1bv_6) (planner *p) {
     X(kdft_register) (p, n1bv_6, &desc);
}

#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 6 -name n1bv_6 -include n1b.h */

/*
 * This function contains 18 FP additions, 4 FP multiplications,
 * (or, 16 additions, 2 multiplications, 2 fused multiply/add),
 * 19 stack variables, 2 constants, and 12 memory accesses
 */
#include "n1b.h"
Пример #21
0
		    ST(&(x[WS(rs, 13)]), VFNMSI(T1C, T1B), ms, &(x[WS(rs, 1)]));
	       }
	  }
     }
     VLEAVE();
}

static const tw_instr twinstr[] = {
     VTW(0, 1),
     VTW(0, 3),
     VTW(0, 9),
     VTW(0, 19),
     {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 20, XSIMD_STRING("t3fv_20"), twinstr, &GENUS, {92, 72, 46, 0}, 0, 0, 0 };

void XSIMD(codelet_t3fv_20) (planner *p) {
     X(kdft_dit_register) (p, t3fv_20, &desc);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 20 -name t3fv_20 -include t3f.h */

/*
 * This function contains 138 FP additions, 92 FP multiplications,
 * (or, 126 additions, 80 multiplications, 12 fused multiply/add),
 * 73 stack variables, 4 constants, and 40 memory accesses
 */
#include "t3f.h"
Пример #22
0
			 }
		    }
	       }
	  }
     }
     VLEAVE();
}

static const tw_instr twinstr[] = {
     VTW(0, 1),
     VTW(0, 3),
     VTW(0, 7),
     {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 8, XSIMD_STRING("t3bv_8"), twinstr, &GENUS, {27, 22, 10, 0}, 0, 0, 0 };

void XSIMD(codelet_t3bv_8) (planner *p) {
     X(kdft_dit_register) (p, t3bv_8, &desc);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 8 -name t3bv_8 -include t3b.h -sign 1 */

/*
 * This function contains 37 FP additions, 24 FP multiplications,
 * (or, 37 additions, 24 multiplications, 0 fused multiply/add),
 * 31 stack variables, 1 constants, and 16 memory accesses
 */
#include "t3b.h"
Пример #23
0
     VLEAVE();
}

static const tw_instr twinstr[] = {
     VTW(0, 1),
     VTW(0, 2),
     VTW(0, 3),
     VTW(0, 4),
     VTW(0, 5),
     VTW(0, 6),
     VTW(0, 7),
     VTW(0, 8),
     {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 9, XSIMD_STRING("t1fuv_9"), twinstr, &GENUS, {20, 20, 34, 0}, 0, 0, 0 };

void XSIMD(codelet_t1fuv_9) (planner *p) {
     X(kdft_dit_register) (p, t1fuv_9, &desc);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 9 -name t1fuv_9 -include t1fu.h */

/*
 * This function contains 54 FP additions, 42 FP multiplications,
 * (or, 38 additions, 26 multiplications, 16 fused multiply/add),
 * 38 stack variables, 14 constants, and 18 memory accesses
 */
#include "t1fu.h"
Пример #24
0
	  xo = ro;
	  for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(4, is), MAKE_VOLATILE_STRIDE(4, os)) {
	       V T1, T2, T3, T4;
	       T1 = LD(&(xi[0]), ivs, &(xi[0]));
	       T2 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
	       T3 = VADD(T1, T2);
	       STM2(&(xo[0]), T3, ovs, &(xo[0]));
	       T4 = VSUB(T1, T2);
	       STM2(&(xo[2]), T4, ovs, &(xo[2]));
	       STN2(&(xo[0]), T3, T4, ovs);
	  }
     }
     VLEAVE();
}

static const kdft_desc desc = { 2, XSIMD_STRING("n2fv_2"), {2, 0, 0, 0}, &GENUS, 0, 2, 0, 0 };

void XSIMD(codelet_n2fv_2) (planner *p) {
     X(kdft_register) (p, n2fv_2, &desc);
}

#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 2 -name n2fv_2 -with-ostride 2 -include n2f.h -store-multiple 2 */

/*
 * This function contains 2 FP additions, 0 FP multiplications,
 * (or, 2 additions, 0 multiplications, 0 fused multiply/add),
 * 7 stack variables, 0 constants, and 5 memory accesses
 */
#include "n2f.h"
Пример #25
0
			 }
		    }
	       }
	  }
     }
     VLEAVE();
}

static const tw_instr twinstr[] = {
     VTW(0, 1),
     VTW(0, 3),
     VTW(0, 9),
     {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 10, XSIMD_STRING("t3bv_10"), twinstr, &GENUS, {39, 34, 18, 0}, 0, 0, 0 };

void XSIMD(codelet_t3bv_10) (planner *p) {
     X(kdft_dit_register) (p, t3bv_10, &desc);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 10 -name t3bv_10 -include t3b.h -sign 1 */

/*
 * This function contains 57 FP additions, 42 FP multiplications,
 * (or, 51 additions, 36 multiplications, 6 fused multiply/add),
 * 41 stack variables, 4 constants, and 20 memory accesses
 */
#include "t3b.h"
Пример #26
0
     }
     VLEAVE();
}

static const tw_instr twinstr[] = {
     VTW(1, 1),
     VTW(1, 2),
     VTW(1, 3),
     VTW(1, 4),
     VTW(1, 5),
     VTW(1, 6),
     VTW(1, 7),
     {TW_NEXT, VL, 0}
};

static const hc2c_desc desc = { 8, XSIMD_STRING("hc2cfdftv_8"), twinstr, &GENUS, {23, 22, 18, 0} };

void XSIMD(codelet_hc2cfdftv_8) (planner *p) {
     X(khc2c_register) (p, hc2cfdftv_8, &desc, HC2C_VIA_DFT);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_hc2cdft_c.native -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 8 -dit -name hc2cfdftv_8 -include hc2cfv.h */

/*
 * This function contains 41 FP additions, 23 FP multiplications,
 * (or, 41 additions, 23 multiplications, 0 fused multiply/add),
 * 57 stack variables, 3 constants, and 16 memory accesses
 */
#include "hc2cfv.h"
Пример #27
0
	  }
     }
     VLEAVE();
}

static const tw_instr twinstr[] = {
     VTW(0, 1),
     VTW(0, 2),
     VTW(0, 3),
     VTW(0, 4),
     VTW(0, 5),
     VTW(0, 6),
     {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 7, XSIMD_STRING("t1fuv_7"), twinstr, &GENUS, {15, 15, 21, 0}, 0, 0, 0 };

void XSIMD(codelet_t1fuv_7) (planner *p) {
     X(kdft_dit_register) (p, t1fuv_7, &desc);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 7 -name t1fuv_7 -include t1fu.h */

/*
 * This function contains 36 FP additions, 30 FP multiplications,
 * (or, 24 additions, 18 multiplications, 12 fused multiply/add),
 * 21 stack variables, 6 constants, and 14 memory accesses
 */
#include "t1fu.h"
Пример #28
0
     VTW(0, 20),
     VTW(0, 21),
     VTW(0, 22),
     VTW(0, 23),
     VTW(0, 24),
     VTW(0, 25),
     VTW(0, 26),
     VTW(0, 27),
     VTW(0, 28),
     VTW(0, 29),
     VTW(0, 30),
     VTW(0, 31),
     {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 32, XSIMD_STRING("t2fv_32"), twinstr, &GENUS, {119, 62, 98, 0}, 0, 0, 0 };

void XSIMD(codelet_t2fv_32) (planner *p) {
     X(kdft_dit_register) (p, t2fv_32, &desc);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 32 -name t2fv_32 -include t2f.h */

/*
 * This function contains 217 FP additions, 104 FP multiplications,
 * (or, 201 additions, 88 multiplications, 16 fused multiply/add),
 * 59 stack variables, 7 constants, and 64 memory accesses
 */
#include "t2f.h"
Пример #29
0
					     ST(&(xo[WS(os, 11)]), VFMAI(TQ, TP), ovs, &(xo[WS(os, 1)]));
					     ST(&(xo[WS(os, 4)]), VFNMSI(TQ, TP), ovs, &(xo[0]));
					     ST(&(xo[WS(os, 14)]), VFNMSI(TO, TD), ovs, &(xo[0]));
					     ST(&(xo[WS(os, 1)]), VFMAI(TO, TD), ovs, &(xo[WS(os, 1)]));
					}
				   }
			      }
			 }
		    }
	       }
	  }
     }
     VLEAVE();
}

static const kdft_desc desc = { 15, XSIMD_STRING("n1bv_15"), {36, 7, 42, 0}, &GENUS, 0, 0, 0, 0 };

void XSIMD(codelet_n1bv_15) (planner *p) {
     X(kdft_register) (p, n1bv_15, &desc);
}

#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 15 -name n1bv_15 -include n1b.h */

/*
 * This function contains 78 FP additions, 25 FP multiplications,
 * (or, 64 additions, 11 multiplications, 14 fused multiply/add),
 * 55 stack variables, 10 constants, and 30 memory accesses
 */
#include "n1b.h"
Пример #30
0
					     ST(&(xo[WS(os, 2)]), VFMAI(Ty, Tx), ovs, &(xo[0]));
					     ST(&(xo[WS(os, 14)]), VFNMSI(Ty, Tx), ovs, &(xo[0]));
					     ST(&(xo[WS(os, 10)]), VFMAI(Tw, Tn), ovs, &(xo[0]));
					     ST(&(xo[WS(os, 6)]), VFNMSI(Tw, Tn), ovs, &(xo[0]));
					}
				   }
			      }
			 }
		    }
	       }
	  }
     }
     VLEAVE();
}

static const kdft_desc desc = { 16, XSIMD_STRING("n1bv_16"), {38, 0, 34, 0}, &GENUS, 0, 0, 0, 0 };

void XSIMD(codelet_n1bv_16) (planner *p) {
     X(kdft_register) (p, n1bv_16, &desc);
}

#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 16 -name n1bv_16 -include n1b.h */

/*
 * This function contains 72 FP additions, 12 FP multiplications,
 * (or, 68 additions, 8 multiplications, 4 fused multiply/add),
 * 30 stack variables, 3 constants, and 32 memory accesses
 */
#include "n1b.h"