static void hc2cfdftv_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) { DVK(KP500000000, +0.500000000000000000000000000000000000000000000); { INT m; for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 6)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(16, rs)) { V T1, T2, Tb, T5, T6, T4, T9, T3, Tc, T7, Ta, Tg, T8, Td, Th; V Tf, Te, Ti, Tj; T1 = LD(&(Rp[0]), ms, &(Rp[0])); T2 = LD(&(Rm[0]), -ms, &(Rm[0])); Tb = LDW(&(W[0])); T5 = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); T6 = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); T4 = LDW(&(W[TWVL * 2])); T9 = LDW(&(W[TWVL * 4])); T3 = VFMACONJ(T2, T1); Tc = VZMULIJ(Tb, VFNMSCONJ(T2, T1)); T7 = VZMULJ(T4, VFMACONJ(T6, T5)); Ta = VZMULIJ(T9, VFNMSCONJ(T6, T5)); Tg = VADD(T3, T7); T8 = VSUB(T3, T7); Td = VSUB(Ta, Tc); Th = VADD(Tc, Ta); Tf = VCONJ(VMUL(LDK(KP500000000), VFMAI(Td, T8))); Te = VMUL(LDK(KP500000000), VFNMSI(Td, T8)); Ti = VMUL(LDK(KP500000000), VSUB(Tg, Th)); Tj = VCONJ(VMUL(LDK(KP500000000), VADD(Th, Tg))); ST(&(Rm[0]), Tf, -ms, &(Rm[0])); ST(&(Rp[WS(rs, 1)]), Te, ms, &(Rp[WS(rs, 1)])); ST(&(Rp[0]), Ti, ms, &(Rp[0])); ST(&(Rm[WS(rs, 1)]), Tj, -ms, &(Rm[WS(rs, 1)])); } } VLEAVE(); }
static void hc2cfdftv_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) { DVK(KP500000000, +0.500000000000000000000000000000000000000000000); INT m; for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 6)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(rs)) { V T4, Tc, T9, Te, T1, T3, T2, Tb, T6, T8, T7, T5, Td, Tg, Th; V Ta, Tf, Tk, Tl, Ti, Tj; T1 = LD(&(Rp[0]), ms, &(Rp[0])); T2 = LD(&(Rm[0]), -ms, &(Rm[0])); T3 = VCONJ(T2); T4 = VADD(T1, T3); Tb = LDW(&(W[0])); Tc = VZMULIJ(Tb, VSUB(T3, T1)); T6 = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); T7 = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); T8 = VCONJ(T7); T5 = LDW(&(W[TWVL * 2])); T9 = VZMULJ(T5, VADD(T6, T8)); Td = LDW(&(W[TWVL * 4])); Te = VZMULIJ(Td, VSUB(T8, T6)); Ta = VSUB(T4, T9); Tf = VBYI(VSUB(Tc, Te)); Tg = VMUL(LDK(KP500000000), VSUB(Ta, Tf)); Th = VCONJ(VMUL(LDK(KP500000000), VADD(Ta, Tf))); ST(&(Rp[WS(rs, 1)]), Tg, ms, &(Rp[WS(rs, 1)])); ST(&(Rm[0]), Th, -ms, &(Rm[0])); Ti = VADD(T4, T9); Tj = VADD(Tc, Te); Tk = VCONJ(VMUL(LDK(KP500000000), VSUB(Ti, Tj))); Tl = VMUL(LDK(KP500000000), VADD(Ti, Tj)); ST(&(Rm[WS(rs, 1)]), Tk, -ms, &(Rm[WS(rs, 1)])); ST(&(Rp[0]), Tl, ms, &(Rp[0])); } }
static void hc2cfdftv_6(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) { DVK(KP250000000, +0.250000000000000000000000000000000000000000000); DVK(KP866025403, +0.866025403784438646763723170752936183471402627); DVK(KP500000000, +0.500000000000000000000000000000000000000000000); { INT m; for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 10)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 10), MAKE_VOLATILE_STRIDE(24, rs)) { V Ta, Tu, Tn, Tw, Ti, Tv, T1, T8, Tg, Tf, T7, T3, Te, T6, T2; V T4, T9, T5, Tk, Tm, Tj, Tl, Tc, Th, Tb, Td, Tr, Tp, Tq, To; V Tt, Ts, TA, Ty, Tz, Tx, TC, TB; T1 = LD(&(Rp[0]), ms, &(Rp[0])); T8 = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); Tg = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); Te = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); Tf = VCONJ(Te); T6 = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); T7 = VCONJ(T6); T2 = LD(&(Rm[0]), -ms, &(Rm[0])); T3 = VCONJ(T2); T4 = VADD(T1, T3); T5 = LDW(&(W[TWVL * 4])); T9 = VZMULIJ(T5, VSUB(T7, T8)); Ta = VADD(T4, T9); Tu = VSUB(T4, T9); Tj = LDW(&(W[0])); Tk = VZMULIJ(Tj, VSUB(T3, T1)); Tl = LDW(&(W[TWVL * 6])); Tm = VZMULJ(Tl, VADD(Tf, Tg)); Tn = VADD(Tk, Tm); Tw = VSUB(Tm, Tk); Tb = LDW(&(W[TWVL * 2])); Tc = VZMULJ(Tb, VADD(T7, T8)); Td = LDW(&(W[TWVL * 8])); Th = VZMULIJ(Td, VSUB(Tf, Tg)); Ti = VADD(Tc, Th); Tv = VSUB(Tc, Th); Tr = VMUL(LDK(KP500000000), VBYI(VMUL(LDK(KP866025403), VSUB(Tn, Ti)))); To = VADD(Ti, Tn); Tp = VMUL(LDK(KP500000000), VADD(Ta, To)); Tq = VFNMS(LDK(KP250000000), To, VMUL(LDK(KP500000000), Ta)); ST(&(Rp[0]), Tp, ms, &(Rp[0])); Tt = VCONJ(VADD(Tq, Tr)); ST(&(Rm[WS(rs, 1)]), Tt, -ms, &(Rm[WS(rs, 1)])); Ts = VSUB(Tq, Tr); ST(&(Rp[WS(rs, 2)]), Ts, ms, &(Rp[0])); TA = VMUL(LDK(KP500000000), VBYI(VMUL(LDK(KP866025403), VSUB(Tw, Tv)))); Tx = VADD(Tv, Tw); Ty = VCONJ(VMUL(LDK(KP500000000), VADD(Tu, Tx))); Tz = VFNMS(LDK(KP250000000), Tx, VMUL(LDK(KP500000000), Tu)); ST(&(Rm[WS(rs, 2)]), Ty, -ms, &(Rm[0])); TC = VADD(Tz, TA); ST(&(Rp[WS(rs, 1)]), TC, ms, &(Rp[WS(rs, 1)])); TB = VCONJ(VSUB(Tz, TA)); ST(&(Rm[0]), TB, -ms, &(Rm[0])); } } VLEAVE(); }
static void hc2cfdftv_6(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) { DVK(KP500000000, +0.500000000000000000000000000000000000000000000); DVK(KP866025403, +0.866025403784438646763723170752936183471402627); { INT m; for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 10)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 10), MAKE_VOLATILE_STRIDE(24, rs)) { V T5, T6, T3, Tj, T4, T9, Te, Th, T1, T2, Ti, Tc, Td, Tb, Tg; V T7, Ta, Tt, Tk, Tr, T8, Ts, Tf, Tx, Tu, To, Tl, Tw, Tv, Tn; V Tm, Tz, Ty, Tp, Tq; T1 = LD(&(Rp[0]), ms, &(Rp[0])); T2 = LD(&(Rm[0]), -ms, &(Rm[0])); Ti = LDW(&(W[0])); Tc = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); Td = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); Tb = LDW(&(W[TWVL * 8])); Tg = LDW(&(W[TWVL * 6])); T5 = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); T6 = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); T3 = VFMACONJ(T2, T1); Tj = VZMULIJ(Ti, VFNMSCONJ(T2, T1)); T4 = LDW(&(W[TWVL * 4])); T9 = LDW(&(W[TWVL * 2])); Te = VZMULIJ(Tb, VFNMSCONJ(Td, Tc)); Th = VZMULJ(Tg, VFMACONJ(Td, Tc)); T7 = VZMULIJ(T4, VFNMSCONJ(T6, T5)); Ta = VZMULJ(T9, VFMACONJ(T6, T5)); Tt = VADD(Tj, Th); Tk = VSUB(Th, Tj); Tr = VADD(T3, T7); T8 = VSUB(T3, T7); Ts = VADD(Ta, Te); Tf = VSUB(Ta, Te); Tx = VMUL(LDK(KP866025403), VSUB(Tt, Ts)); Tu = VADD(Ts, Tt); To = VMUL(LDK(KP866025403), VSUB(Tk, Tf)); Tl = VADD(Tf, Tk); Tw = VFNMS(LDK(KP500000000), Tu, Tr); Tv = VCONJ(VMUL(LDK(KP500000000), VADD(Tr, Tu))); Tn = VFNMS(LDK(KP500000000), Tl, T8); Tm = VMUL(LDK(KP500000000), VADD(T8, Tl)); Tz = VMUL(LDK(KP500000000), VFMAI(Tx, Tw)); Ty = VCONJ(VMUL(LDK(KP500000000), VFNMSI(Tx, Tw))); ST(&(Rm[WS(rs, 2)]), Tv, -ms, &(Rm[0])); Tp = VMUL(LDK(KP500000000), VFNMSI(To, Tn)); Tq = VCONJ(VMUL(LDK(KP500000000), VFMAI(To, Tn))); ST(&(Rp[0]), Tm, ms, &(Rp[0])); ST(&(Rp[WS(rs, 1)]), Tz, ms, &(Rp[WS(rs, 1)])); ST(&(Rm[0]), Ty, -ms, &(Rm[0])); ST(&(Rm[WS(rs, 1)]), Tq, -ms, &(Rm[WS(rs, 1)])); ST(&(Rp[WS(rs, 2)]), Tp, ms, &(Rp[0])); } } VLEAVE(); }
static void hc2cfdftv_2(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) { DVK(KP500000000, +0.500000000000000000000000000000000000000000000); INT m; for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 2)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(rs)) { V T1, T2, T4, T3, T5, T7, T6; T1 = LD(&(Rp[0]), ms, &(Rp[0])); T2 = LD(&(Rm[0]), -ms, &(Rm[0])); T4 = LDW(&(W[0])); T3 = VFMACONJ(T2, T1); T5 = VZMULIJ(T4, VFNMSCONJ(T2, T1)); T7 = VCONJ(VMUL(LDK(KP500000000), VADD(T3, T5))); T6 = VMUL(LDK(KP500000000), VSUB(T3, T5)); ST(&(Rm[0]), T7, -ms, &(Rm[0])); ST(&(Rp[0]), T6, ms, &(Rp[0])); } }
static void hc2cfdftv_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) { DVK(KP500000000, +0.500000000000000000000000000000000000000000000); DVK(KP707106781, +0.707106781186547524400844362104849039284835938); { INT m; for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 14)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 14), MAKE_VOLATILE_STRIDE(32, rs)) { V T3, Tc, Tl, Ts, Tf, Tg, Te, Tp, T7, Ta, T1, T2, Tb, Tj, Tk; V Ti, Tr, T5, T6, T4, T9, Th, Tq, TC, T8, Td, TF, Tm, TG, TD; V Tt, Tu, Tn, TH, TL, TE, TK, Tz, Tv, Ty, To, TJ, TI, TN, TM; V TB, TA, Tx, Tw; T1 = LD(&(Rp[0]), ms, &(Rp[0])); T2 = LD(&(Rm[0]), -ms, &(Rm[0])); Tb = LDW(&(W[0])); Tj = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); Tk = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); Ti = LDW(&(W[TWVL * 12])); Tr = LDW(&(W[TWVL * 10])); T5 = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); T6 = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); T3 = VFMACONJ(T2, T1); Tc = VZMULIJ(Tb, VFNMSCONJ(T2, T1)); T4 = LDW(&(W[TWVL * 6])); T9 = LDW(&(W[TWVL * 8])); Tl = VZMULIJ(Ti, VFNMSCONJ(Tk, Tj)); Ts = VZMULJ(Tr, VFMACONJ(Tk, Tj)); Tf = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); Tg = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); Te = LDW(&(W[TWVL * 4])); Tp = LDW(&(W[TWVL * 2])); T7 = VZMULJ(T4, VFMACONJ(T6, T5)); Ta = VZMULIJ(T9, VFNMSCONJ(T6, T5)); Th = VZMULIJ(Te, VFNMSCONJ(Tg, Tf)); Tq = VZMULJ(Tp, VFMACONJ(Tg, Tf)); TC = VADD(T3, T7); T8 = VSUB(T3, T7); Td = VSUB(Ta, Tc); TF = VADD(Tc, Ta); Tm = VSUB(Th, Tl); TG = VADD(Th, Tl); TD = VADD(Tq, Ts); Tt = VSUB(Tq, Ts); Tu = VSUB(Tm, Td); Tn = VADD(Td, Tm); TH = VSUB(TF, TG); TL = VADD(TF, TG); TE = VSUB(TC, TD); TK = VADD(TC, TD); Tz = VFMA(LDK(KP707106781), Tu, Tt); Tv = VFNMS(LDK(KP707106781), Tu, Tt); Ty = VFNMS(LDK(KP707106781), Tn, T8); To = VFMA(LDK(KP707106781), Tn, T8); TJ = VCONJ(VMUL(LDK(KP500000000), VFNMSI(TH, TE))); TI = VMUL(LDK(KP500000000), VFMAI(TH, TE)); TN = VCONJ(VMUL(LDK(KP500000000), VADD(TL, TK))); TM = VMUL(LDK(KP500000000), VSUB(TK, TL)); TB = VMUL(LDK(KP500000000), VFMAI(Tz, Ty)); TA = VCONJ(VMUL(LDK(KP500000000), VFNMSI(Tz, Ty))); Tx = VCONJ(VMUL(LDK(KP500000000), VFMAI(Tv, To))); Tw = VMUL(LDK(KP500000000), VFNMSI(Tv, To)); ST(&(Rm[WS(rs, 1)]), TJ, -ms, &(Rm[WS(rs, 1)])); ST(&(Rp[WS(rs, 2)]), TI, ms, &(Rp[0])); ST(&(Rm[WS(rs, 3)]), TN, -ms, &(Rm[WS(rs, 1)])); ST(&(Rp[0]), TM, ms, &(Rp[0])); ST(&(Rp[WS(rs, 3)]), TB, ms, &(Rp[WS(rs, 1)])); ST(&(Rm[WS(rs, 2)]), TA, -ms, &(Rm[0])); ST(&(Rm[0]), Tx, -ms, &(Rm[0])); ST(&(Rp[WS(rs, 1)]), Tw, ms, &(Rp[WS(rs, 1)])); } } VLEAVE(); }
static void hc2cfdftv_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) { DVK(KP707106781, +0.707106781186547524400844362104849039284835938); DVK(KP353553390, +0.353553390593273762200422181052424519642417969); DVK(KP500000000, +0.500000000000000000000000000000000000000000000); { INT m; for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 14)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 14), MAKE_VOLATILE_STRIDE(32, rs)) { V Ta, TE, Tr, TF, Tl, TK, Tw, TG, T1, T6, T3, T8, T2, T7, T4; V T9, T5, To, Tq, Tn, Tp, Tc, Th, Te, Tj, Td, Ti, Tf, Tk, Tb; V Tg, Tt, Tv, Ts, Tu, Ty, Tz, Tm, Tx, TC, TD, TA, TB, TI, TO; V TL, TP, TH, TJ, TM, TR, TN, TQ; T1 = LD(&(Rp[0]), ms, &(Rp[0])); T6 = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); T2 = LD(&(Rm[0]), -ms, &(Rm[0])); T3 = VCONJ(T2); T7 = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); T8 = VCONJ(T7); T4 = VADD(T1, T3); T5 = LDW(&(W[TWVL * 6])); T9 = VZMULJ(T5, VADD(T6, T8)); Ta = VADD(T4, T9); TE = VMUL(LDK(KP500000000), VSUB(T4, T9)); Tn = LDW(&(W[0])); To = VZMULIJ(Tn, VSUB(T3, T1)); Tp = LDW(&(W[TWVL * 8])); Tq = VZMULIJ(Tp, VSUB(T8, T6)); Tr = VADD(To, Tq); TF = VSUB(To, Tq); Tc = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); Th = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); Td = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); Te = VCONJ(Td); Ti = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); Tj = VCONJ(Ti); Tb = LDW(&(W[TWVL * 2])); Tf = VZMULJ(Tb, VADD(Tc, Te)); Tg = LDW(&(W[TWVL * 10])); Tk = VZMULJ(Tg, VADD(Th, Tj)); Tl = VADD(Tf, Tk); TK = VSUB(Tf, Tk); Ts = LDW(&(W[TWVL * 4])); Tt = VZMULIJ(Ts, VSUB(Te, Tc)); Tu = LDW(&(W[TWVL * 12])); Tv = VZMULIJ(Tu, VSUB(Tj, Th)); Tw = VADD(Tt, Tv); TG = VSUB(Tv, Tt); Tm = VADD(Ta, Tl); Tx = VADD(Tr, Tw); Ty = VCONJ(VMUL(LDK(KP500000000), VSUB(Tm, Tx))); Tz = VMUL(LDK(KP500000000), VADD(Tm, Tx)); ST(&(Rm[WS(rs, 3)]), Ty, -ms, &(Rm[WS(rs, 1)])); ST(&(Rp[0]), Tz, ms, &(Rp[0])); TA = VSUB(Ta, Tl); TB = VBYI(VSUB(Tw, Tr)); TC = VCONJ(VMUL(LDK(KP500000000), VSUB(TA, TB))); TD = VMUL(LDK(KP500000000), VADD(TA, TB)); ST(&(Rm[WS(rs, 1)]), TC, -ms, &(Rm[WS(rs, 1)])); ST(&(Rp[WS(rs, 2)]), TD, ms, &(Rp[0])); TH = VMUL(LDK(KP353553390), VADD(TF, TG)); TI = VADD(TE, TH); TO = VSUB(TE, TH); TJ = VMUL(LDK(KP707106781), VSUB(TG, TF)); TL = VMUL(LDK(KP500000000), VBYI(VSUB(TJ, TK))); TP = VMUL(LDK(KP500000000), VBYI(VADD(TK, TJ))); TM = VCONJ(VSUB(TI, TL)); ST(&(Rm[0]), TM, -ms, &(Rm[0])); TR = VADD(TO, TP); ST(&(Rp[WS(rs, 3)]), TR, ms, &(Rp[WS(rs, 1)])); TN = VADD(TI, TL); ST(&(Rp[WS(rs, 1)]), TN, ms, &(Rp[WS(rs, 1)])); TQ = VCONJ(VSUB(TO, TP)); ST(&(Rm[WS(rs, 2)]), TQ, -ms, &(Rm[0])); } } VLEAVE(); }
static void hc2cfdftv_12(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) { DVK(KP866025403, +0.866025403784438646763723170752936183471402627); DVK(KP500000000, +0.500000000000000000000000000000000000000000000); INT m; for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 22)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 22), MAKE_VOLATILE_STRIDE(rs)) { V T3, T7, TH, TE, Th, TC, Tq, T11, TU, Tx, Tb, Tz, Tu, Tw, Tp; V Tl, T9, Ta, T8, Ty, Tn, To, Tm, TG, T1, T2, Tt, T5, T6, T4; V Tv, Tj, Tk, Ti, TD, Tf, Tg, Te, TB, TT, TF, TR, Tr; T1 = LD(&(Rp[0]), ms, &(Rp[0])); T2 = LD(&(Rm[0]), -ms, &(Rm[0])); Tt = LDW(&(W[0])); T5 = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); T6 = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); T4 = LDW(&(W[TWVL * 6])); Tv = LDW(&(W[TWVL * 8])); Tn = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); To = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); T3 = VFMACONJ(T2, T1); Tu = VZMULIJ(Tt, VFNMSCONJ(T2, T1)); Tm = LDW(&(W[TWVL * 2])); TG = LDW(&(W[TWVL * 4])); T7 = VZMULJ(T4, VFMACONJ(T6, T5)); Tw = VZMULIJ(Tv, VFNMSCONJ(T6, T5)); Tj = LD(&(Rp[WS(rs, 5)]), ms, &(Rp[WS(rs, 1)])); Tk = LD(&(Rm[WS(rs, 5)]), -ms, &(Rm[WS(rs, 1)])); Ti = LDW(&(W[TWVL * 18])); TD = LDW(&(W[TWVL * 20])); Tp = VZMULJ(Tm, VFMACONJ(To, Tn)); TH = VZMULIJ(TG, VFNMSCONJ(To, Tn)); Tf = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); Tg = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); Te = LDW(&(W[TWVL * 10])); TB = LDW(&(W[TWVL * 12])); Tl = VZMULJ(Ti, VFMACONJ(Tk, Tj)); TE = VZMULIJ(TD, VFNMSCONJ(Tk, Tj)); T9 = LD(&(Rp[WS(rs, 4)]), ms, &(Rp[0])); Ta = LD(&(Rm[WS(rs, 4)]), -ms, &(Rm[0])); T8 = LDW(&(W[TWVL * 14])); Ty = LDW(&(W[TWVL * 16])); Th = VZMULJ(Te, VFMACONJ(Tg, Tf)); TC = VZMULIJ(TB, VFNMSCONJ(Tg, Tf)); Tq = VADD(Tl, Tp); T11 = VSUB(Tp, Tl); TU = VSUB(Tu, Tw); Tx = VADD(Tu, Tw); Tb = VZMULJ(T8, VFMACONJ(Ta, T9)); Tz = VZMULIJ(Ty, VFNMSCONJ(Ta, T9)); TT = VSUB(TC, TE); TF = VADD(TC, TE); TR = VFNMS(LDK(KP500000000), Tq, Th); Tr = VADD(Th, Tq); { V TX, TA, T1d, TV, TY, TI, T1e, T12, TQ, Td, T10, Tc, T1a, TN, TJ; V T1j, T1f, T1b, TS, TM, Ts, T17, T13, TZ, T1i, T1c, T16, TW, TP, TO; V TL, TK, T1k, T1l, T1h, T1g, T18, T19, T15, T14; T10 = VSUB(Tb, T7); Tc = VADD(T7, Tb); TX = VFNMS(LDK(KP500000000), Tx, Tz); TA = VADD(Tx, Tz); T1d = VADD(TU, TT); TV = VSUB(TT, TU); TY = VFNMS(LDK(KP500000000), TF, TH); TI = VADD(TF, TH); T1e = VADD(T10, T11); T12 = VSUB(T10, T11); TQ = VFNMS(LDK(KP500000000), Tc, T3); Td = VADD(T3, Tc); T1a = VADD(TX, TY); TZ = VSUB(TX, TY); TN = VADD(TA, TI); TJ = VSUB(TA, TI); T1j = VMUL(LDK(KP866025403), VADD(T1d, T1e)); T1f = VMUL(LDK(KP866025403), VSUB(T1d, T1e)); T1b = VADD(TQ, TR); TS = VSUB(TQ, TR); TM = VADD(Td, Tr); Ts = VSUB(Td, Tr); T17 = VFMA(LDK(KP866025403), T12, TZ); T13 = VFNMS(LDK(KP866025403), T12, TZ); T1i = VSUB(T1b, T1a); T1c = VADD(T1a, T1b); T16 = VFNMS(LDK(KP866025403), TV, TS); TW = VFMA(LDK(KP866025403), TV, TS); TP = VCONJ(VMUL(LDK(KP500000000), VADD(TN, TM))); TO = VMUL(LDK(KP500000000), VSUB(TM, TN)); TL = VCONJ(VMUL(LDK(KP500000000), VFNMSI(TJ, Ts))); TK = VMUL(LDK(KP500000000), VFMAI(TJ, Ts)); T1k = VCONJ(VMUL(LDK(KP500000000), VFNMSI(T1j, T1i))); T1l = VMUL(LDK(KP500000000), VFMAI(T1j, T1i)); T1h = VMUL(LDK(KP500000000), VFMAI(T1f, T1c)); T1g = VCONJ(VMUL(LDK(KP500000000), VFNMSI(T1f, T1c))); T18 = VMUL(LDK(KP500000000), VFNMSI(T17, T16)); T19 = VCONJ(VMUL(LDK(KP500000000), VFMAI(T17, T16))); T15 = VCONJ(VMUL(LDK(KP500000000), VFMAI(T13, TW))); T14 = VMUL(LDK(KP500000000), VFNMSI(T13, TW)); ST(&(Rm[WS(rs, 5)]), TP, -ms, &(Rm[WS(rs, 1)])); ST(&(Rp[0]), TO, ms, &(Rp[0])); ST(&(Rm[WS(rs, 2)]), TL, -ms, &(Rm[0])); ST(&(Rp[WS(rs, 3)]), TK, ms, &(Rp[WS(rs, 1)])); ST(&(Rm[WS(rs, 3)]), T1k, -ms, &(Rm[WS(rs, 1)])); ST(&(Rp[WS(rs, 4)]), T1l, ms, &(Rp[0])); ST(&(Rp[WS(rs, 2)]), T1h, ms, &(Rp[0])); ST(&(Rm[WS(rs, 1)]), T1g, -ms, &(Rm[WS(rs, 1)])); ST(&(Rp[WS(rs, 5)]), T18, ms, &(Rp[WS(rs, 1)])); ST(&(Rm[WS(rs, 4)]), T19, -ms, &(Rm[0])); ST(&(Rm[0]), T15, -ms, &(Rm[0])); ST(&(Rp[WS(rs, 1)]), T14, ms, &(Rp[WS(rs, 1)])); } } }
static void hc2cfdftv_12(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) { DVK(KP433012701, +0.433012701892219323381861585376468091735701313); DVK(KP866025403, +0.866025403784438646763723170752936183471402627); DVK(KP250000000, +0.250000000000000000000000000000000000000000000); DVK(KP500000000, +0.500000000000000000000000000000000000000000000); INT m; for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 22)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 22), MAKE_VOLATILE_STRIDE(rs)) { V TX, T13, T4, Tf, TZ, TD, TF, T17, TW, T14, Tw, Tl, T10, TL, TN; V T16; { V T1, T3, TA, Tb, Td, Te, T9, TC, T2, Tz, Tc, Ta, T6, T8, T7; V T5, TB, TE, Ti, Tk, TI, Ts, Tu, Tv, Tq, TK, Tj, TH, Tt, Tr; V Tn, Tp, To, Tm, TJ, Th, TM; T1 = LD(&(Rp[0]), ms, &(Rp[0])); T2 = LD(&(Rm[0]), -ms, &(Rm[0])); T3 = VCONJ(T2); Tz = LDW(&(W[0])); TA = VZMULIJ(Tz, VSUB(T3, T1)); Tb = LD(&(Rp[WS(rs, 4)]), ms, &(Rp[0])); Tc = LD(&(Rm[WS(rs, 4)]), -ms, &(Rm[0])); Td = VCONJ(Tc); Ta = LDW(&(W[TWVL * 14])); Te = VZMULJ(Ta, VADD(Tb, Td)); T6 = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); T7 = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); T8 = VCONJ(T7); T5 = LDW(&(W[TWVL * 6])); T9 = VZMULJ(T5, VADD(T6, T8)); TB = LDW(&(W[TWVL * 8])); TC = VZMULIJ(TB, VSUB(T8, T6)); TX = VSUB(TC, TA); T13 = VSUB(Te, T9); T4 = VADD(T1, T3); Tf = VADD(T9, Te); TZ = VFNMS(LDK(KP250000000), Tf, VMUL(LDK(KP500000000), T4)); TD = VADD(TA, TC); TE = LDW(&(W[TWVL * 16])); TF = VZMULIJ(TE, VSUB(Td, Tb)); T17 = VFNMS(LDK(KP500000000), TD, TF); Ti = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); Tj = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); Tk = VCONJ(Tj); TH = LDW(&(W[TWVL * 12])); TI = VZMULIJ(TH, VSUB(Tk, Ti)); Ts = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); Tt = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); Tu = VCONJ(Tt); Tr = LDW(&(W[TWVL * 2])); Tv = VZMULJ(Tr, VADD(Ts, Tu)); Tn = LD(&(Rp[WS(rs, 5)]), ms, &(Rp[WS(rs, 1)])); To = LD(&(Rm[WS(rs, 5)]), -ms, &(Rm[WS(rs, 1)])); Tp = VCONJ(To); Tm = LDW(&(W[TWVL * 18])); Tq = VZMULJ(Tm, VADD(Tn, Tp)); TJ = LDW(&(W[TWVL * 20])); TK = VZMULIJ(TJ, VSUB(Tp, Tn)); TW = VSUB(TK, TI); T14 = VSUB(Tv, Tq); Tw = VADD(Tq, Tv); Th = LDW(&(W[TWVL * 10])); Tl = VZMULJ(Th, VADD(Ti, Tk)); T10 = VFNMS(LDK(KP250000000), Tw, VMUL(LDK(KP500000000), Tl)); TL = VADD(TI, TK); TM = LDW(&(W[TWVL * 4])); TN = VZMULIJ(TM, VSUB(Tu, Ts)); T16 = VFNMS(LDK(KP500000000), TL, TN); } { V Ty, TS, TP, TT, Tg, Tx, TG, TO, TQ, TV, TR, TU, T1i, T1o, T1l; V T1p, T1g, T1h, T1j, T1k, T1m, T1r, T1n, T1q, T12, T1c, T19, T1d, TY, T11; V T15, T18, T1a, T1f, T1b, T1e; Tg = VADD(T4, Tf); Tx = VADD(Tl, Tw); Ty = VADD(Tg, Tx); TS = VSUB(Tg, Tx); TG = VADD(TD, TF); TO = VADD(TL, TN); TP = VADD(TG, TO); TT = VBYI(VSUB(TO, TG)); TQ = VCONJ(VMUL(LDK(KP500000000), VSUB(Ty, TP))); ST(&(Rm[WS(rs, 5)]), TQ, -ms, &(Rm[WS(rs, 1)])); TV = VMUL(LDK(KP500000000), VADD(TS, TT)); ST(&(Rp[WS(rs, 3)]), TV, ms, &(Rp[WS(rs, 1)])); TR = VMUL(LDK(KP500000000), VADD(Ty, TP)); ST(&(Rp[0]), TR, ms, &(Rp[0])); TU = VCONJ(VMUL(LDK(KP500000000), VSUB(TS, TT))); ST(&(Rm[WS(rs, 2)]), TU, -ms, &(Rm[0])); T1g = VADD(TX, TW); T1h = VADD(T13, T14); T1i = VMUL(LDK(KP500000000), VBYI(VMUL(LDK(KP866025403), VSUB(T1g, T1h)))); T1o = VMUL(LDK(KP500000000), VBYI(VMUL(LDK(KP866025403), VADD(T1g, T1h)))); T1j = VADD(TZ, T10); T1k = VMUL(LDK(KP500000000), VADD(T17, T16)); T1l = VSUB(T1j, T1k); T1p = VADD(T1j, T1k); T1m = VADD(T1i, T1l); ST(&(Rp[WS(rs, 2)]), T1m, ms, &(Rp[0])); T1r = VCONJ(VSUB(T1p, T1o)); ST(&(Rm[WS(rs, 3)]), T1r, -ms, &(Rm[WS(rs, 1)])); T1n = VCONJ(VSUB(T1l, T1i)); ST(&(Rm[WS(rs, 1)]), T1n, -ms, &(Rm[WS(rs, 1)])); T1q = VADD(T1o, T1p); ST(&(Rp[WS(rs, 4)]), T1q, ms, &(Rp[0])); TY = VMUL(LDK(KP433012701), VSUB(TW, TX)); T11 = VSUB(TZ, T10); T12 = VADD(TY, T11); T1c = VSUB(T11, TY); T15 = VMUL(LDK(KP866025403), VSUB(T13, T14)); T18 = VSUB(T16, T17); T19 = VMUL(LDK(KP500000000), VBYI(VSUB(T15, T18))); T1d = VMUL(LDK(KP500000000), VBYI(VADD(T15, T18))); T1a = VCONJ(VSUB(T12, T19)); ST(&(Rm[0]), T1a, -ms, &(Rm[0])); T1f = VCONJ(VADD(T1c, T1d)); ST(&(Rm[WS(rs, 4)]), T1f, -ms, &(Rm[0])); T1b = VADD(T12, T19); ST(&(Rp[WS(rs, 1)]), T1b, ms, &(Rp[WS(rs, 1)])); T1e = VSUB(T1c, T1d); ST(&(Rp[WS(rs, 5)]), T1e, ms, &(Rp[WS(rs, 1)])); } } }
static void hc2cfdftv_10(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) { DVK(KP559016994, +0.559016994374947424102293417182819058860154590); DVK(KP500000000, +0.500000000000000000000000000000000000000000000); DVK(KP250000000, +0.250000000000000000000000000000000000000000000); DVK(KP618033988, +0.618033988749894848204586834365638117720309180); DVK(KP951056516, +0.951056516295153572116439333379382143405698634); { INT m; for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 18)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 18), MAKE_VOLATILE_STRIDE(40, rs)) { V T5, T6, Tw, Tr, Tc, Tj, Tl, Tm, Tk, Ts, Tg, Ty, T3, T4, T1; V T2, Tv, Tq, Ta, Tb, T9, Ti, Te, Tf, Td, Tx, Tn, Tt, Th, TQ; V TT, Tz, T7, TR, To, Tu, TU; T1 = LD(&(Rp[0]), ms, &(Rp[0])); T2 = LD(&(Rm[0]), -ms, &(Rm[0])); Tv = LDW(&(W[0])); T5 = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); T6 = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); Tq = LDW(&(W[TWVL * 6])); Ta = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); Tb = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); T9 = LDW(&(W[TWVL * 2])); Ti = LDW(&(W[TWVL * 4])); Tw = VZMULIJ(Tv, VFNMSCONJ(T2, T1)); Te = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); Tf = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); Tr = VZMULJ(Tq, VFMACONJ(T6, T5)); Td = LDW(&(W[TWVL * 12])); Tx = LDW(&(W[TWVL * 10])); Tc = VZMULJ(T9, VFMACONJ(Tb, Ta)); Tj = VZMULIJ(Ti, VFNMSCONJ(Tb, Ta)); Tl = LD(&(Rp[WS(rs, 4)]), ms, &(Rp[0])); Tm = LD(&(Rm[WS(rs, 4)]), -ms, &(Rm[0])); Tk = LDW(&(W[TWVL * 14])); Ts = LDW(&(W[TWVL * 16])); Tg = VZMULIJ(Td, VFNMSCONJ(Tf, Te)); Ty = VZMULJ(Tx, VFMACONJ(Tf, Te)); T3 = VFMACONJ(T2, T1); T4 = LDW(&(W[TWVL * 8])); Tn = VZMULJ(Tk, VFMACONJ(Tm, Tl)); Tt = VZMULIJ(Ts, VFNMSCONJ(Tm, Tl)); Th = VSUB(Tc, Tg); TQ = VADD(Tc, Tg); TT = VADD(Tw, Ty); Tz = VSUB(Tw, Ty); T7 = VZMULIJ(T4, VFNMSCONJ(T6, T5)); TR = VADD(Tj, Tn); To = VSUB(Tj, Tn); Tu = VSUB(Tr, Tt); TU = VADD(Tr, Tt); { V TP, T8, TS, T11, Tp, TH, TA, TG, TV, T12, TE, TB, TM, TI, TZ; V TW, T17, T13, TD, TC, TY, TX, TL, TF, T10, T16, TN, TO, TK, TJ; V T18, T19, T15, T14; TP = VADD(T3, T7); T8 = VSUB(T3, T7); TS = VADD(TQ, TR); T11 = VSUB(TQ, TR); Tp = VSUB(Th, To); TH = VADD(Th, To); TA = VSUB(Tu, Tz); TG = VADD(Tz, Tu); TV = VADD(TT, TU); T12 = VSUB(TU, TT); TE = VSUB(Tp, TA); TB = VADD(Tp, TA); TM = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), TG, TH)); TI = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), TH, TG)); TZ = VSUB(TS, TV); TW = VADD(TS, TV); T17 = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T11, T12)); T13 = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T12, T11)); TD = VFNMS(LDK(KP250000000), TB, T8); TC = VMUL(LDK(KP500000000), VADD(T8, TB)); TY = VFNMS(LDK(KP250000000), TW, TP); TX = VCONJ(VMUL(LDK(KP500000000), VADD(TP, TW))); TL = VFMA(LDK(KP559016994), TE, TD); TF = VFNMS(LDK(KP559016994), TE, TD); ST(&(Rp[0]), TC, ms, &(Rp[0])); T10 = VFMA(LDK(KP559016994), TZ, TY); T16 = VFNMS(LDK(KP559016994), TZ, TY); ST(&(Rm[WS(rs, 4)]), TX, -ms, &(Rm[0])); TN = VCONJ(VMUL(LDK(KP500000000), VFNMSI(TM, TL))); TO = VMUL(LDK(KP500000000), VFMAI(TM, TL)); TK = VMUL(LDK(KP500000000), VFMAI(TI, TF)); TJ = VCONJ(VMUL(LDK(KP500000000), VFNMSI(TI, TF))); T18 = VMUL(LDK(KP500000000), VFNMSI(T17, T16)); T19 = VCONJ(VMUL(LDK(KP500000000), VFMAI(T17, T16))); T15 = VCONJ(VMUL(LDK(KP500000000), VFMAI(T13, T10))); T14 = VMUL(LDK(KP500000000), VFNMSI(T13, T10)); ST(&(Rm[WS(rs, 3)]), TN, -ms, &(Rm[WS(rs, 1)])); ST(&(Rp[WS(rs, 4)]), TO, ms, &(Rp[0])); ST(&(Rp[WS(rs, 2)]), TK, ms, &(Rp[0])); ST(&(Rm[WS(rs, 1)]), TJ, -ms, &(Rm[WS(rs, 1)])); ST(&(Rp[WS(rs, 3)]), T18, ms, &(Rp[WS(rs, 1)])); ST(&(Rm[WS(rs, 2)]), T19, -ms, &(Rm[0])); ST(&(Rm[0]), T15, -ms, &(Rm[0])); ST(&(Rp[WS(rs, 1)]), T14, ms, &(Rp[WS(rs, 1)])); } } } VLEAVE(); }
static void hc2cfdftv_10(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms) { DVK(KP125000000, +0.125000000000000000000000000000000000000000000); DVK(KP279508497, +0.279508497187473712051146708591409529430077295); DVK(KP587785252, +0.587785252292473129168705954639072768597652438); DVK(KP951056516, +0.951056516295153572116439333379382143405698634); DVK(KP500000000, +0.500000000000000000000000000000000000000000000); { INT m; for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 18)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 18), MAKE_VOLATILE_STRIDE(40, rs)) { V Tl, Tt, Tu, TY, TZ, T10, Tz, TE, TF, TV, TW, TX, Ta, TU, TN; V TR, TH, TQ, TK, TL, TM, TI, TG, TJ, TT, TO, TP, TS, T18, T1c; V T12, T1b, T15, T16, T17, T14, T11, T13, T1e, T19, T1a, T1d; { V T1, T3, Ty, T8, T7, TB, Tf, Ts, Tk, Tw, Tq, TD, T2, Tx, T6; V TA, Tc, Te, Td, Tb, Tr, Tj, Ti, Th, Tg, Tv, Tn, Tp, To, Tm; V TC, T4, T9, T5; T1 = LD(&(Rp[0]), ms, &(Rp[0])); T2 = LD(&(Rm[0]), -ms, &(Rm[0])); T3 = VCONJ(T2); Tx = LDW(&(W[0])); Ty = VZMULIJ(Tx, VSUB(T3, T1)); T8 = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0])); T6 = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0])); T7 = VCONJ(T6); TA = LDW(&(W[TWVL * 6])); TB = VZMULJ(TA, VADD(T7, T8)); Tc = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)])); Td = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)])); Te = VCONJ(Td); Tb = LDW(&(W[TWVL * 2])); Tf = VZMULJ(Tb, VADD(Tc, Te)); Tr = LDW(&(W[TWVL * 4])); Ts = VZMULIJ(Tr, VSUB(Te, Tc)); Tj = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)])); Th = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)])); Ti = VCONJ(Th); Tg = LDW(&(W[TWVL * 12])); Tk = VZMULIJ(Tg, VSUB(Ti, Tj)); Tv = LDW(&(W[TWVL * 10])); Tw = VZMULJ(Tv, VADD(Ti, Tj)); Tn = LD(&(Rp[WS(rs, 4)]), ms, &(Rp[0])); To = LD(&(Rm[WS(rs, 4)]), -ms, &(Rm[0])); Tp = VCONJ(To); Tm = LDW(&(W[TWVL * 14])); Tq = VZMULJ(Tm, VADD(Tn, Tp)); TC = LDW(&(W[TWVL * 16])); TD = VZMULIJ(TC, VSUB(Tp, Tn)); Tl = VSUB(Tf, Tk); Tt = VSUB(Tq, Ts); Tu = VADD(Tl, Tt); TY = VADD(Ty, Tw); TZ = VADD(TB, TD); T10 = VADD(TY, TZ); Tz = VSUB(Tw, Ty); TE = VSUB(TB, TD); TF = VADD(Tz, TE); TV = VADD(Tf, Tk); TW = VADD(Ts, Tq); TX = VADD(TV, TW); T4 = VADD(T1, T3); T5 = LDW(&(W[TWVL * 8])); T9 = VZMULIJ(T5, VSUB(T7, T8)); Ta = VSUB(T4, T9); TU = VADD(T4, T9); } TL = VSUB(Tl, Tt); TM = VSUB(TE, Tz); TN = VMUL(LDK(KP500000000), VBYI(VFMA(LDK(KP951056516), TL, VMUL(LDK(KP587785252), TM)))); TR = VMUL(LDK(KP500000000), VBYI(VFNMS(LDK(KP587785252), TL, VMUL(LDK(KP951056516), TM)))); TI = VMUL(LDK(KP279508497), VSUB(Tu, TF)); TG = VADD(Tu, TF); TJ = VFNMS(LDK(KP125000000), TG, VMUL(LDK(KP500000000), Ta)); TH = VCONJ(VMUL(LDK(KP500000000), VADD(Ta, TG))); TQ = VSUB(TJ, TI); TK = VADD(TI, TJ); ST(&(Rm[WS(rs, 4)]), TH, -ms, &(Rm[0])); TT = VCONJ(VADD(TQ, TR)); ST(&(Rm[WS(rs, 2)]), TT, -ms, &(Rm[0])); TO = VSUB(TK, TN); ST(&(Rp[WS(rs, 1)]), TO, ms, &(Rp[WS(rs, 1)])); TP = VCONJ(VADD(TK, TN)); ST(&(Rm[0]), TP, -ms, &(Rm[0])); TS = VSUB(TQ, TR); ST(&(Rp[WS(rs, 3)]), TS, ms, &(Rp[WS(rs, 1)])); T16 = VSUB(TZ, TY); T17 = VSUB(TV, TW); T18 = VMUL(LDK(KP500000000), VBYI(VFNMS(LDK(KP587785252), T17, VMUL(LDK(KP951056516), T16)))); T1c = VMUL(LDK(KP500000000), VBYI(VFMA(LDK(KP951056516), T17, VMUL(LDK(KP587785252), T16)))); T14 = VMUL(LDK(KP279508497), VSUB(TX, T10)); T11 = VADD(TX, T10); T13 = VFNMS(LDK(KP125000000), T11, VMUL(LDK(KP500000000), TU)); T12 = VMUL(LDK(KP500000000), VADD(TU, T11)); T1b = VADD(T14, T13); T15 = VSUB(T13, T14); ST(&(Rp[0]), T12, ms, &(Rp[0])); T1e = VADD(T1b, T1c); ST(&(Rp[WS(rs, 4)]), T1e, ms, &(Rp[0])); T19 = VCONJ(VSUB(T15, T18)); ST(&(Rm[WS(rs, 1)]), T19, -ms, &(Rm[WS(rs, 1)])); T1a = VADD(T15, T18); ST(&(Rp[WS(rs, 2)]), T1a, ms, &(Rp[0])); T1d = VCONJ(VSUB(T1b, T1c)); ST(&(Rm[WS(rs, 3)]), T1d, -ms, &(Rm[WS(rs, 1)])); } } VLEAVE(); }