Beispiel #1
0
				   Tt = VFNMS(LDK(KP939692620), Ts, T7);
				   ST(&(x[WS(ios, 5)]), VFNMSI(TK, TG), dist, &(x[WS(ios, 1)]));
				   ST(&(x[WS(ios, 4)]), VFMAI(TK, TG), dist, &(x[0]));
				   ST(&(x[WS(ios, 2)]), VFMAI(Ty, Tt), dist, &(x[0]));
				   ST(&(x[WS(ios, 7)]), VFNMSI(Ty, Tt), dist, &(x[WS(ios, 1)]));
			      }
			 }
		    }
	       }
	  }
     }
     return W;
}

static const tw_instr twinstr[] = {
     VTW(1),
     VTW(2),
     VTW(3),
     VTW(4),
     VTW(5),
     VTW(6),
     VTW(7),
     VTW(8),
     {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 9, "t1bv_9", twinstr, &GENUS, {20, 20, 34, 0}, 0, 0, 0 };

void X(codelet_t1bv_9) (planner *p) {
     X(kdft_dit_register) (p, t1bv_9, &desc);
}
	       Th = VFNMS(LDK(KP500000000), Tg, Tf);
	       ST(&(x[WS(ios, 1)]), VADD(Tb, Th), dist, &(x[WS(ios, 1)]));
	       ST(&(x[WS(ios, 3)]), VADD(Tf, Tg), dist, &(x[WS(ios, 1)]));
	       ST(&(x[WS(ios, 5)]), VSUB(Th, Tb), dist, &(x[WS(ios, 1)]));
	       Tn = VBYI(VMUL(LDK(KP866025403), VSUB(Tj, Tk)));
	       Tl = VADD(Tj, Tk);
	       Tm = VFNMS(LDK(KP500000000), Tl, Ti);
	       ST(&(x[WS(ios, 2)]), VSUB(Tm, Tn), dist, &(x[0]));
	       ST(&(x[0]), VADD(Ti, Tl), dist, &(x[0]));
	       ST(&(x[WS(ios, 4)]), VADD(Tn, Tm), dist, &(x[0]));
	  }
     }
     END_SIMD();
     return W;
}

static const tw_instr twinstr[] = {
     VTW(1),
     VTW(2),
     VTW(3),
     VTW(4),
     VTW(5),
     {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 6, "t1bv_6", twinstr, {21, 12, 2, 0}, &GENUS, 0, 0, 0 };

void X(codelet_t1bv_6) (planner *p) {
     X(kdft_dit_register) (p, t1bv_6, &desc);
}
		    T2d = VBYI(VSUB(T2a, T27));
		    ST(&(x[WS(ios, 23)]), VSUB(T2c, T2d), dist, &(x[WS(ios, 1)]));
		    ST(&(x[WS(ios, 9)]), VADD(T2c, T2d), dist, &(x[WS(ios, 1)]));
		    T2g = VBYI(VSUB(T2e, T2f));
		    T2j = VSUB(T2h, T2i);
		    ST(&(x[WS(ios, 15)]), VADD(T2g, T2j), dist, &(x[WS(ios, 1)]));
		    ST(&(x[WS(ios, 17)]), VSUB(T2j, T2g), dist, &(x[WS(ios, 1)]));
	       }
	  }
     }
     END_SIMD();
     return W;
}

static const tw_instr twinstr[] = {
     VTW(1),
     VTW(2),
     VTW(3),
     VTW(4),
     VTW(5),
     VTW(6),
     VTW(7),
     VTW(8),
     VTW(9),
     VTW(10),
     VTW(11),
     VTW(12),
     VTW(13),
     VTW(14),
     VTW(15),
     VTW(16),
Beispiel #4
0
			 ST(&(x[WS(rs, 11)]), VFMAI(T2j, T2g), ms, &(x[WS(rs, 1)]));
			 ST(&(x[WS(rs, 27)]), VFMAI(T2l, T2k), ms, &(x[WS(rs, 1)]));
			 ST(&(x[WS(rs, 5)]), VFNMSI(T2l, T2k), ms, &(x[WS(rs, 1)]));
			 ST(&(x[WS(rs, 3)]), VFMAI(T2d, T2c), ms, &(x[WS(rs, 1)]));
			 ST(&(x[WS(rs, 29)]), VFNMSI(T2d, T2c), ms, &(x[WS(rs, 1)]));
			 ST(&(x[WS(rs, 19)]), VFMAI(T2b, T24), ms, &(x[WS(rs, 1)]));
			 ST(&(x[WS(rs, 13)]), VFNMSI(T2b, T24), ms, &(x[WS(rs, 1)]));
		    }
	       }
	  }
     }
     VLEAVE();
}

static const tw_instr twinstr[] = {
     VTW(0, 1),
     VTW(0, 2),
     VTW(0, 3),
     VTW(0, 4),
     VTW(0, 5),
     VTW(0, 6),
     VTW(0, 7),
     VTW(0, 8),
     VTW(0, 9),
     VTW(0, 10),
     VTW(0, 11),
     VTW(0, 12),
     VTW(0, 13),
     VTW(0, 14),
     VTW(0, 15),
     VTW(0, 16),
Beispiel #5
0
	       T2 = LD(&(Rp[0]), ms, &(Rp[0]));
	       T3 = LD(&(Rm[0]), -ms, &(Rm[0]));
	       T1 = LDW(&(W[0]));
	       T5 = VFMACONJ(T3, T2);
	       T4 = VZMULI(T1, VFNMSCONJ(T3, T2));
	       T7 = VCONJ(VSUB(T5, T4));
	       T6 = VADD(T4, T5);
	       ST(&(Rm[0]), T7, -ms, &(Rm[0]));
	       ST(&(Rp[0]), T6, ms, &(Rp[0]));
	  }
     }
     VLEAVE();
}

static const tw_instr twinstr[] = {
     VTW(1, 1),
     {TW_NEXT, VL, 0}
};

static const hc2c_desc desc = { 2, XSIMD_STRING("hc2cbdftv_2"), twinstr, &GENUS, {3, 2, 2, 0} };

void XSIMD(codelet_hc2cbdftv_2) (planner *p) {
     X(khc2c_register) (p, hc2cbdftv_2, &desc, HC2C_VIA_DFT);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_hc2cdft_c.native -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 2 -dif -sign 1 -name hc2cbdftv_2 -include hc2cbv.h */

/*
 * This function contains 5 FP additions, 2 FP multiplications,
 * (or, 5 additions, 2 multiplications, 0 fused multiply/add),
Beispiel #6
0
				   ST(&(x[WS(rs, 5)]), VFNMSI(To, Tj), ms, &(x[WS(rs, 1)]));
				   ST(&(x[WS(rs, 1)]), VFMAI(Tt, Tr), ms, &(x[WS(rs, 1)]));
				   ST(&(x[WS(rs, 6)]), VFNMSI(Tt, Tr), ms, &(x[0]));
				   ST(&(x[WS(rs, 3)]), VFMAI(Ty, Tw), ms, &(x[WS(rs, 1)]));
				   ST(&(x[WS(rs, 4)]), VFNMSI(Ty, Tw), ms, &(x[0]));
			      }
			 }
		    }
	       }
	  }
     }
     VLEAVE();
}

static const tw_instr twinstr[] = {
     VTW(0, 1),
     VTW(0, 2),
     VTW(0, 3),
     VTW(0, 4),
     VTW(0, 5),
     VTW(0, 6),
     {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 7, XSIMD_STRING("t1fuv_7"), twinstr, &GENUS, {15, 15, 21, 0}, 0, 0, 0 };

void XSIMD(codelet_t1fuv_7) (planner *p) {
     X(kdft_dit_register) (p, t1fuv_7, &desc);
}
#else				/* HAVE_FMA */
Beispiel #7
0
	       Ty = VCONJ(VMUL(LDK(KP500000000), VFNMSI(Tx, Tw)));
	       ST(&(Rm[WS(rs, 2)]), Tv, -ms, &(Rm[0]));
	       Tp = VMUL(LDK(KP500000000), VFNMSI(To, Tn));
	       Tq = VCONJ(VMUL(LDK(KP500000000), VFMAI(To, Tn)));
	       ST(&(Rp[0]), Tm, ms, &(Rp[0]));
	       ST(&(Rp[WS(rs, 1)]), Tz, ms, &(Rp[WS(rs, 1)]));
	       ST(&(Rm[0]), Ty, -ms, &(Rm[0]));
	       ST(&(Rm[WS(rs, 1)]), Tq, -ms, &(Rm[WS(rs, 1)]));
	       ST(&(Rp[WS(rs, 2)]), Tp, ms, &(Rp[0]));
	  }
     }
     VLEAVE();
}

static const tw_instr twinstr[] = {
     VTW(1, 1),
     VTW(1, 2),
     VTW(1, 3),
     VTW(1, 4),
     VTW(1, 5),
     {TW_NEXT, VL, 0}
};

static const hc2c_desc desc = { 6, XSIMD_STRING("hc2cfdftv_6"), twinstr, &GENUS, {17, 18, 12, 0} };

void XSIMD(codelet_hc2cfdftv_6) (planner *p) {
     X(khc2c_register) (p, hc2cfdftv_6, &desc, HC2C_VIA_DFT);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_hc2cdft_c.native -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 6 -dit -name hc2cfdftv_6 -include hc2cfv.h */
	  T8 = VSUB(T3, T7);
	  Td = VSUB(Ta, Tc);
	  Th = VADD(Tc, Ta);
	  Tf = VCONJ(VMUL(LDK(KP500000000), VFMAI(Td, T8)));
	  Te = VMUL(LDK(KP500000000), VFNMSI(Td, T8));
	  Ti = VMUL(LDK(KP500000000), VSUB(Tg, Th));
	  Tj = VCONJ(VMUL(LDK(KP500000000), VADD(Th, Tg)));
	  ST(&(Rm[0]), Tf, -ms, &(Rm[0]));
	  ST(&(Rp[WS(rs, 1)]), Te, ms, &(Rp[WS(rs, 1)]));
	  ST(&(Rp[0]), Ti, ms, &(Rp[0]));
	  ST(&(Rm[WS(rs, 1)]), Tj, -ms, &(Rm[WS(rs, 1)]));
     }
}

static const tw_instr twinstr[] = {
     VTW(1, 1),
     VTW(1, 2),
     VTW(1, 3),
     {TW_NEXT, VL, 0}
};

static const hc2c_desc desc = { 4, "hc2cfdftv_4", twinstr, &GENUS, {9, 10, 6, 0} };

void X(codelet_hc2cfdftv_4) (planner *p) {
     X(khc2c_register) (p, hc2cfdftv_4, &desc, HC2C_VIA_DFT);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_hc2cdft_c -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 4 -dit -name hc2cfdftv_4 -include hc2cfv.h */

/*
Beispiel #9
0
		    T17 = VCONJ(VSUB(T14, T16));
		    ST(&(Rm[WS(rs, 4)]), T17, -ms, &(Rm[0]));
		    T1A = VCONJ(VSUB(T1r, T1z));
		    ST(&(Rm[WS(rs, 2)]), T1A, -ms, &(Rm[0]));
		    T1B = VADD(T1r, T1z);
		    ST(&(Rp[WS(rs, 2)]), T1B, ms, &(Rp[0]));
		    T18 = VADD(T14, T16);
		    ST(&(Rp[WS(rs, 4)]), T18, ms, &(Rp[0]));
	       }
	  }
     }
     VLEAVE();
}

static const tw_instr twinstr[] = {
     VTW(1, 1),
     VTW(1, 2),
     VTW(1, 3),
     VTW(1, 4),
     VTW(1, 5),
     VTW(1, 6),
     VTW(1, 7),
     VTW(1, 8),
     VTW(1, 9),
     VTW(1, 10),
     VTW(1, 11),
     VTW(1, 12),
     VTW(1, 13),
     VTW(1, 14),
     VTW(1, 15),
     {TW_NEXT, VL, 0}
Beispiel #10
0
	       V T8, T4, T9, T7;
	       T8 = VMUL(T2, T6);
	       T4 = VMUL(T2, T3);
	       T9 = VFNMS(T5, T3, T8);
	       T7 = VFMA(T5, T6, T4);
	       ST(&(ii[0]), VADD(T9, Ta), dist, &(ii[0]));
	       ST(&(ii[WS(ios, 1)]), VSUB(Ta, T9), dist, &(ii[WS(ios, 1)]));
	       ST(&(ri[0]), VADD(T1, T7), dist, &(ri[0]));
	       ST(&(ri[WS(ios, 1)]), VSUB(T1, T7), dist, &(ri[WS(ios, 1)]));
	  }
     }
     return W;
}

static const tw_instr twinstr[] = {
     VTW(1),
     {TW_NEXT, (2 * VL), 0}
};

static const ct_desc desc = { 2, "t1sv_2", twinstr, &GENUS, {4, 2, 2, 0}, 0, 0, 0 };

void X(codelet_t1sv_2) (planner *p) {
     X(kdft_dit_register) (p, t1sv_2, &desc);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_twiddle -simd -compact -variables 4 -pipeline-latency 8 -n 2 -name t1sv_2 -include ts.h */

/*
 * This function contains 6 FP additions, 4 FP multiplications,
 * (or, 4 additions, 2 multiplications, 2 fused multiply/add),
Beispiel #11
0
		    TM = VADD(Tz, TD);
		    TN = VSUB(TL, TM);
		    TP = VADD(TL, TM);
	       }
	       ST(&(x[WS(ios, 2)]), VADD(TK, TN), dist, &(x[0]));
	       ST(&(x[WS(ios, 8)]), VSUB(TP, TO), dist, &(x[0]));
	       ST(&(x[WS(ios, 10)]), VSUB(TN, TK), dist, &(x[0]));
	       ST(&(x[WS(ios, 4)]), VADD(TO, TP), dist, &(x[0]));
	  }
     }
     END_SIMD();
     return W;
}

static const tw_instr twinstr[] = {
     VTW(1),
     VTW(2),
     VTW(3),
     VTW(4),
     VTW(5),
     VTW(6),
     VTW(7),
     VTW(8),
     VTW(9),
     VTW(10),
     VTW(11),
     {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 12, "t1bv_12", twinstr, {55, 26, 4, 0}, &GENUS, 0, 0, 0 };
Beispiel #12
0
					ST(&(x[WS(ios, 14)]), VFNMSI(T12, T11), dist, &(x[0]));
					ST(&(x[WS(ios, 2)]), VFMAI(T12, T11), dist, &(x[0]));
					ST(&(x[WS(ios, 10)]), VFMAI(T10, TK), dist, &(x[0]));
					ST(&(x[WS(ios, 6)]), VFNMSI(T10, TK), dist, &(x[0]));
				   }
			      }
			 }
		    }
	       }
	  }
     }
     return W;
}

static const tw_instr twinstr[] = {
     VTW(1),
     VTW(3),
     VTW(9),
     VTW(15),
     {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 16, "t3fv_16", twinstr, &GENUS, {64, 52, 34, 0}, 0, 0, 0 };

void X(codelet_t3fv_16) (planner *p) {
     X(kdft_dit_register) (p, t3fv_16, &desc);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_twiddle_c -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 16 -name t3fv_16 -include t3f.h */
     R *x;
     x = ri;
     BEGIN_SIMD();
     for (i = 0; i < m; i = i + VL, x = x + (VL * dist), W = W + (TWVL * 2)) {
	  V T1, T2, T3, T4, T5, T6;
	  T1 = LD(&(x[0]), dist, &(x[0]));
	  T2 = LD(&(x[WS(is, 1)]), dist, &(x[WS(is, 1)]));
	  T3 = BYTWJ(&(W[0]), VSUB(T1, T2));
	  T4 = LD(&(x[WS(vs, 1)]), dist, &(x[WS(vs, 1)]));
	  T5 = LD(&(x[WS(vs, 1) + WS(is, 1)]), dist, &(x[WS(vs, 1) + WS(is, 1)]));
	  T6 = BYTWJ(&(W[0]), VSUB(T4, T5));
	  ST(&(x[WS(vs, 1)]), T3, dist, &(x[WS(vs, 1)]));
	  ST(&(x[WS(vs, 1) + WS(is, 1)]), T6, dist, &(x[WS(vs, 1) + WS(is, 1)]));
	  ST(&(x[0]), VADD(T1, T2), dist, &(x[0]));
	  ST(&(x[WS(is, 1)]), VADD(T4, T5), dist, &(x[WS(is, 1)]));
     }
     END_SIMD();
     return W;
}

static const tw_instr twinstr[] = {
     VTW(1),
     {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 2, "q1fv_2", twinstr, {6, 4, 0, 0}, &GENUS, 0, 0, 0 };

void X(codelet_q1fv_2) (planner *p) {
     X(kdft_difsq_register) (p, q1fv_2, &desc);
}
Beispiel #14
0
			 Td = VFNMS(LDK(KP250000000), Tc, T1);
			 ST(&(x[0]), VADD(T1, Tc), ms, &(x[0]));
			 Tj = VFNMS(LDK(KP559016994), Te, Td);
			 Tf = VFMA(LDK(KP559016994), Te, Td);
			 ST(&(x[WS(rs, 2)]), VFNMSI(Tk, Tj), ms, &(x[0]));
			 ST(&(x[WS(rs, 3)]), VFMAI(Tk, Tj), ms, &(x[WS(rs, 1)]));
			 ST(&(x[WS(rs, 4)]), VFNMSI(Ti, Tf), ms, &(x[0]));
			 ST(&(x[WS(rs, 1)]), VFMAI(Ti, Tf), ms, &(x[WS(rs, 1)]));
		    }
	       }
	  }
     }
}

static const tw_instr twinstr[] = {
     VTW(0, 1),
     VTW(0, 2),
     VTW(0, 3),
     VTW(0, 4),
     {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 5, "t2bv_5", twinstr, &GENUS, {11, 10, 9, 0}, 0, 0, 0 };

void X(codelet_t2bv_5) (planner *p) {
     X(kdft_dit_register) (p, t2bv_5, &desc);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_twiddle_c -simd -compact -variables 4 -pipeline-latency 8 -n 5 -name t2bv_5 -include t2b.h -sign 1 */
Beispiel #15
0
					     ST(&(x[WS(rs, 2)]), VFMAI(TN, TM), ms, &(x[0]));
					     ST(&(x[WS(rs, 10)]), VFMAI(TL, Ty), ms, &(x[0]));
					     ST(&(x[WS(rs, 6)]), VFNMSI(TL, Ty), ms, &(x[0]));
					}
				   }
			      }
			 }
		    }
	       }
	  }
     }
     VLEAVE();
}

static const tw_instr twinstr[] = {
     VTW(0, 1),
     VTW(0, 2),
     VTW(0, 3),
     VTW(0, 4),
     VTW(0, 5),
     VTW(0, 6),
     VTW(0, 7),
     VTW(0, 8),
     VTW(0, 9),
     VTW(0, 10),
     VTW(0, 11),
     VTW(0, 12),
     VTW(0, 13),
     VTW(0, 14),
     VTW(0, 15),
     {TW_NEXT, VL, 0}
				   ST(&(x[WS(rs, 4)]), VFNMSI(TP, TO), ms, &(x[0]));
				   ST(&(x[WS(rs, 9)]), VFNMSI(Tx, Tu), ms, &(x[WS(rs, 1)]));
				   ST(&(x[WS(rs, 1)]), VFMAI(Tx, Tu), ms, &(x[WS(rs, 1)]));
				   ST(&(x[WS(rs, 7)]), VFNMSI(Tz, Ty), ms, &(x[WS(rs, 1)]));
				   ST(&(x[WS(rs, 3)]), VFMAI(Tz, Ty), ms, &(x[WS(rs, 1)]));
			      }
			 }
		    }
	       }
	  }
     }
     VLEAVE();
}

static const tw_instr twinstr[] = {
     VTW(0, 1),
     VTW(0, 2),
     VTW(0, 3),
     VTW(0, 4),
     VTW(0, 5),
     VTW(0, 6),
     VTW(0, 7),
     VTW(0, 8),
     VTW(0, 9),
     {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 10, XSIMD_STRING("t2bv_10"), twinstr, &GENUS, {33, 22, 18, 0}, 0, 0, 0 };

void XSIMD(codelet_t2bv_10) (planner *p) {
     X(kdft_dit_register) (p, t2bv_10, &desc);
Beispiel #17
0
        {
            V T3, T5, T8, T6, T7;
            T3 = BYTW(&(W[0]), T2);
            T5 = BYTW(&(W[TWVL * 2]), T4);
            T8 = VMUL(LDK(KP866025403), VSUB(T3, T5));
            T6 = VADD(T3, T5);
            T7 = VFNMS(LDK(KP500000000), T6, T1);
            ST(&(x[0]), VADD(T1, T6), ms, &(x[0]));
            ST(&(x[WS(rs, 2)]), VFNMSI(T8, T7), ms, &(x[0]));
            ST(&(x[WS(rs, 1)]), VFMAI(T8, T7), ms, &(x[WS(rs, 1)]));
        }
    }
}

static const tw_instr twinstr[] = {
    VTW(0, 1),
    VTW(0, 2),
    {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 3, "t1buv_3", twinstr, &GENUS, {5, 5, 3, 0}, 0, 0, 0 };

void X(codelet_t1buv_3) (planner *p) {
    X(kdft_dit_register) (p, t1buv_3, &desc);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_twiddle_c -simd -compact -variables 4 -pipeline-latency 8 -n 3 -name t1buv_3 -include t1bu.h -sign 1 */

/*
 * This function contains 8 FP additions, 6 FP multiplications,
Beispiel #18
0
					ST(&(x[WS(rs, 9)]), VFNMSI(TG, TD), ms, &(x[WS(rs, 1)]));
					ST(&(x[WS(rs, 1)]), VFMAI(TG, TD), ms, &(x[WS(rs, 1)]));
					ST(&(x[WS(rs, 7)]), VFNMSI(TI, TH), ms, &(x[WS(rs, 1)]));
					ST(&(x[WS(rs, 3)]), VFMAI(TI, TH), ms, &(x[WS(rs, 1)]));
				   }
			      }
			 }
		    }
	       }
	  }
     }
     VLEAVE();
}

static const tw_instr twinstr[] = {
     VTW(0, 1),
     VTW(0, 3),
     VTW(0, 9),
     {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 10, XSIMD_STRING("t3bv_10"), twinstr, &GENUS, {39, 34, 18, 0}, 0, 0, 0 };

void XSIMD(codelet_t3bv_10) (planner *p) {
     X(kdft_dit_register) (p, t3bv_10, &desc);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 10 -name t3bv_10 -include t3b.h -sign 1 */

/*
Beispiel #19
0
     BEGIN_SIMD();
     for (i = m; i > 0; i = i - VL, x = x + (VL * dist), W = W + (TWVL * 4)) {
	  V T1, T3, T5, T6, T2, T4, T7, T8;
	  T1 = LD(&(x[0]), dist, &(x[0]));
	  T2 = LD(&(x[WS(ios, 1)]), dist, &(x[WS(ios, 1)]));
	  T3 = BYTWJ(&(W[0]), T2);
	  T4 = LD(&(x[WS(ios, 2)]), dist, &(x[0]));
	  T5 = BYTWJ(&(W[TWVL * 2]), T4);
	  T6 = VADD(T3, T5);
	  ST(&(x[0]), VADD(T1, T6), dist, &(x[0]));
	  T7 = VFNMS(LDK(KP500000000), T6, T1);
	  T8 = VBYI(VMUL(LDK(KP866025403), VSUB(T5, T3)));
	  ST(&(x[WS(ios, 2)]), VSUB(T7, T8), dist, &(x[0]));
	  ST(&(x[WS(ios, 1)]), VADD(T7, T8), dist, &(x[WS(ios, 1)]));
     }
     END_SIMD();
     return W;
}

static const tw_instr twinstr[] = {
     VTW(1),
     VTW(2),
     {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 3, "t1fv_3", twinstr, {7, 5, 1, 0}, &GENUS, 0, 0, 0 };

void X(codelet_t1fv_3) (planner *p) {
     X(kdft_dit_register) (p, t1fv_3, &desc);
}
	       ST(&(Rp[WS(rs, 2)]), TX, ms, &(Rp[0]));
	       ST(&(Rm[WS(rs, 2)]), TY, -ms, &(Rm[0]));
	       ST(&(Rp[0]), T18, ms, &(Rp[0]));
	       ST(&(Rm[0]), T19, -ms, &(Rm[0]));
	       ST(&(Rm[WS(rs, 4)]), TQ, -ms, &(Rm[0]));
	       ST(&(Rp[WS(rs, 4)]), TP, ms, &(Rp[0]));
	       ST(&(Rp[WS(rs, 3)]), T13, ms, &(Rp[WS(rs, 1)]));
	       ST(&(Rm[WS(rs, 3)]), T14, -ms, &(Rm[WS(rs, 1)]));
	       ST(&(Rm[WS(rs, 1)]), TI, -ms, &(Rm[WS(rs, 1)]));
	       ST(&(Rp[WS(rs, 1)]), TH, ms, &(Rp[WS(rs, 1)]));
	  }
     }
}

static const tw_instr twinstr[] = {
     VTW(1, 1),
     VTW(1, 2),
     VTW(1, 3),
     VTW(1, 4),
     VTW(1, 5),
     VTW(1, 6),
     VTW(1, 7),
     VTW(1, 8),
     VTW(1, 9),
     {TW_NEXT, VL, 0}
};

static const hc2c_desc desc = { 10, "hc2cbdftv_10", twinstr, &GENUS, {33, 22, 28, 0} };

void X(codelet_hc2cbdftv_10) (planner *p) {
     X(khc2c_register) (p, hc2cbdftv_10, &desc, HC2C_VIA_DFT);
Beispiel #21
0
					ST(&(x[WS(rs, 1)]), VFMAI(Tw, Tv), ms, &(x[WS(rs, 1)]));
					ST(&(x[WS(rs, 7)]), VFNMSI(Tw, Tv), ms, &(x[WS(rs, 1)]));
					ST(&(x[WS(rs, 5)]), VFMAI(Tu, Tm), ms, &(x[WS(rs, 1)]));
					ST(&(x[WS(rs, 3)]), VFNMSI(Tu, Tm), ms, &(x[WS(rs, 1)]));
				   }
			      }
			 }
		    }
	       }
	  }
     }
     VLEAVE();
}

static const tw_instr twinstr[] = {
     VTW(0, 1),
     VTW(0, 3),
     VTW(0, 7),
     {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 8, XSIMD_STRING("t3bv_8"), twinstr, &GENUS, {27, 22, 10, 0}, 0, 0, 0 };

void XSIMD(codelet_t3bv_8) (planner *p) {
     X(kdft_dit_register) (p, t3bv_8, &desc);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 8 -name t3bv_8 -include t3b.h -sign 1 */

/*
Beispiel #22
0
	       T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
	       T4 = LD(&(x[WS(vs, 1)]), ms, &(x[WS(vs, 1)]));
	       T5 = LD(&(x[WS(vs, 1) + WS(rs, 1)]), ms, &(x[WS(vs, 1) + WS(rs, 1)]));
	       ST(&(x[0]), VADD(T1, T2), ms, &(x[0]));
	       T3 = BYTWJ(&(W[0]), VSUB(T1, T2));
	       ST(&(x[WS(rs, 1)]), VADD(T4, T5), ms, &(x[WS(rs, 1)]));
	       T6 = BYTWJ(&(W[0]), VSUB(T4, T5));
	       ST(&(x[WS(vs, 1)]), T3, ms, &(x[WS(vs, 1)]));
	       ST(&(x[WS(vs, 1) + WS(rs, 1)]), T6, ms, &(x[WS(vs, 1) + WS(rs, 1)]));
	  }
     }
     VLEAVE();
}

static const tw_instr twinstr[] = {
     VTW(0, 1),
     {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 2, XSIMD_STRING("q1fv_2"), twinstr, &GENUS, {6, 4, 0, 0}, 0, 0, 0 };

void XSIMD(codelet_q1fv_2) (planner *p) {
     X(kdft_difsq_register) (p, q1fv_2, &desc);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_twidsq_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 2 -dif -name q1fv_2 -include q1f.h */

/*
 * This function contains 6 FP additions, 4 FP multiplications,
 * (or, 6 additions, 4 multiplications, 0 fused multiply/add),
Beispiel #23
0
			 ST(&(ri[WS(ios, 3)]), VSUB(Tm, Tr), dist, &(ri[WS(ios, 1)]));
			 ST(&(ii[WS(ios, 2)]), VSUB(Tw, Ts), dist, &(ii[0]));
			 ST(&(ii[0]), VADD(Ts, Tw), dist, &(ii[0]));
			 ST(&(ii[WS(ios, 3)]), VADD(Ty, Tx), dist, &(ii[WS(ios, 1)]));
			 ST(&(ii[WS(ios, 1)]), VSUB(Tx, Ty), dist, &(ii[WS(ios, 1)]));
			 ST(&(ri[0]), VADD(T8, Tl), dist, &(ri[0]));
			 ST(&(ri[WS(ios, 2)]), VSUB(T8, Tl), dist, &(ri[0]));
		    }
	       }
	  }
     }
     return W;
}

static const tw_instr twinstr[] = {
     VTW(1),
     VTW(2),
     VTW(3),
     {TW_NEXT, (2 * VL), 0}
};

static const ct_desc desc = { 4, "t1sv_4", twinstr, &GENUS, {16, 6, 6, 0}, 0, 0, 0 };

void X(codelet_t1sv_4) (planner *p) {
     X(kdft_dit_register) (p, t1sv_4, &desc);
}
#else				/* HAVE_FMA */

/* Generated by: ../../../genfft/gen_twiddle -simd -compact -variables 4 -pipeline-latency 8 -n 4 -name t1sv_4 -include ts.h */

/*