예제 #1
0
파일: rader.c 프로젝트: Pinkii-/PCA
/* compute n^m mod p, where m >= 0 and p > 0. */
static int power_mod(int n, int m, int p)
{
     if (m == 0)
	  return 1;
     else if (m % 2 == 0) {
	  int x = power_mod(n, m / 2, p);
	  return MULMOD(x, x, p);
     }
     else
	  return MULMOD(n, power_mod(n, m - 1, p), p);
}
예제 #2
0
INT X(power_mod)(INT n, INT m, INT p)
{
     A(p > 0);
     if (m == 0)
	  return 1;
     else if (m % 2 == 0) {
	  INT x = X(power_mod)(n, m / 2, p);
	  return MULMOD(x, x, p);
     }
     else
	  return MULMOD(n, X(power_mod)(n, m - 1, p), p);
}
예제 #3
0
파일: fourstep.c 프로젝트: LANJr4D/FRED
/* forward transform, sign = -1; transform length = 3 * 2^n */
int
four_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
{
	mpd_size_t R = 3; /* number of rows */
	mpd_size_t C = n / 3; /* number of columns */
	mpd_uint_t w3table[3];
	mpd_uint_t kernel, w0, w1, wstep;
	mpd_uint_t *s, *p0, *p1, *p2;
	mpd_uint_t umod;
#ifdef PPRO
	double dmod;
	uint32_t dinvmod[3];
#endif
	mpd_size_t i, k;


	assert(n >= 48);
	assert(n <= 3*MPD_MAXTRANSFORM_2N);


	SETMODULUS(modnum);
	_mpd_init_w3table(w3table, -1, modnum);
	/* size three ntt on the columns */
	for (p0=a, p1=p0+C, p2=p0+2*C; p0<a+C; p0++,p1++,p2++) {

		SIZE3_NTT(p0, p1, p2, w3table);
	}


	kernel = _mpd_getkernel(n, -1, modnum);
	for (i = 1; i < R; i++) {
		w0 = 1;
		w1 = POWMOD(kernel, i);
		wstep = MULMOD(w1, w1);
		for (k = 0; k < C-1; k += 2) {
			mpd_uint_t x0 = a[i*C+k];
			mpd_uint_t x1 = a[i*C+k+1];
			MULMOD2(&x0, w0, &x1, w1);
			MULMOD2C(&w0, &w1, wstep);
			a[i*C+k] = x0;
			a[i*C+k+1] = x1;
		}
	}

	/* transform rows */
	for (s = a; s < a+n; s += C) {
		if (!six_step_fnt(s, C, modnum)) {
			return 0;
		}
	}

#if 0
	/* An unordered transform is sufficient for convolution. */
	if (ordered) {
		transpose_3xpow2(a, R, C);
	}
#endif

	return 1;
}
예제 #4
0
파일: rader.c 프로젝트: exic/last.fm-dbus
static R *mkomega(enum wakefulness wakefulness, plan *p_, INT n, INT ginv)
{
     plan_dft *p = (plan_dft *) p_;
     R *omega;
     INT i, gpower;
     trigreal scale;
     triggen *t;

     if ((omega = X(rader_tl_find)(n, n, ginv, omegas)))
	  return omega;

     omega = (R *)MALLOC(sizeof(R) * (n - 1) * 2, TWIDDLES);

     scale = n - 1.0; /* normalization for convolution */

     t = X(mktriggen)(wakefulness, n);
     for (i = 0, gpower = 1; i < n-1; ++i, gpower = MULMOD(gpower, ginv, n)) {
	  trigreal w[2];
	  t->cexpl(t, gpower, w);
	  omega[2*i] = w[0] / scale;
	  omega[2*i+1] = FFT_SIGN * w[1] / scale;
     }
     X(triggen_destroy)(t);
     A(gpower == 1);

     p->apply(p_, omega, omega + 1, omega, omega + 1);

     X(rader_tl_insert)(n, n, ginv, omega, &omegas);
     return omega;
}
예제 #5
0
/* backward transform, sign = 1; transform length = 3 * 2**n */
int
inv_four_step_fnt(const mpd_context_t *ctx, mpd_uint_t *a, mpd_size_t n, int modnum)
{
    mpd_size_t R = 3; /* number of rows */
    mpd_size_t C = n / 3; /* number of columns */
    mpd_uint_t w3table[3];
    mpd_uint_t kernel, w0, w1, wstep;
    mpd_uint_t *s, *p0, *p1, *p2;
    mpd_uint_t umod;
#ifdef PPRO
    double dmod;
    uint32_t dinvmod[3];
#endif
    mpd_size_t i, k;


    assert(n >= 48);
    assert(n <= 3*MPD_MAXTRANSFORM_2N);


#if 0
    /* An unordered transform is sufficient for convolution. */
    /* Transpose the matrix, producing an R*C matrix. */
    transpose_3xpow2(a, C, R);
#endif

    /* Length C transform on the rows. */
    for (s = a; s < a+n; s += C) {
        if (!inv_six_step_fnt(ctx, s, C, modnum)) {
            return 0;
        }
    }

    /* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */
    SETMODULUS(modnum);
    kernel = _mpd_getkernel(n, 1, modnum);
    for (i = 1; i < R; i++) {
        w0 = 1;
        w1 = POWMOD(kernel, i);
        wstep = MULMOD(w1, w1);
        for (k = 0; k < C; k += 2) {
            mpd_uint_t x0 = a[i*C+k];
            mpd_uint_t x1 = a[i*C+k+1];
            MULMOD2(&x0, w0, &x1, w1);
            MULMOD2C(&w0, &w1, wstep);
            a[i*C+k] = x0;
            a[i*C+k+1] = x1;
        }
    }

    /* Length R transform on the columns. */
    _mpd_init_w3table(w3table, 1, modnum);
    for (p0=a, p1=p0+C, p2=p0+2*C; p0<a+C; p0++,p1++,p2++) {

        SIZE3_NTT(p0, p1, p2, w3table);
    }

    return 1;
}
예제 #6
0
파일: fourstep.c 프로젝트: LANJr4D/FRED
static inline void
std_size3_ntt(mpd_uint_t *x1, mpd_uint_t *x2, mpd_uint_t *x3,
              mpd_uint_t w3table[3], mpd_uint_t umod)
{
	mpd_uint_t r1, r2;
	mpd_uint_t w;
	mpd_uint_t s, tmp;


	/* k = 0 -> w = 1 */
	s = *x1;
	s = addmod(s, *x2, umod);
	s = addmod(s, *x3, umod);

	r1 = s;

	/* k = 1 */
	s = *x1;

	w = w3table[1];
	tmp = MULMOD(*x2, w);
	s = addmod(s, tmp, umod);

	w = w3table[2];
	tmp = MULMOD(*x3, w);
	s = addmod(s, tmp, umod);

	r2 = s;

	/* k = 2 */
	s = *x1;

	w = w3table[2];
	tmp = MULMOD(*x2, w);
	s = addmod(s, tmp, umod);

	w = w3table[1];
	tmp = MULMOD(*x3, w);
	s = addmod(s, tmp, umod);

	*x3 = s;
	*x2 = r2;
	*x1 = r1;
}
예제 #7
0
파일: rader.c 프로젝트: Pinkii-/PCA
/*
 * Find the period of n in the multiplicative group mod p (p prime).
 * That is, return the smallest m such that n^m == 1 mod p.
 */
static int period(int n, int p)
{
     int prod = n, period = 1;

     while (prod != 1) {
	  prod = MULMOD(prod, n, p);
	  ++period;
	  if (prod == 0)
	       fftw_die("non-prime order in Rader\n");
     }
     return period;
}
예제 #8
0
static void apply_dit(const plan *ego_, R *ri, R *ii, R *ro, R *io)
{
     const P_dit *ego_dit = (const P_dit *) ego_;
     const P *ego;
     plan *cld1, *cld2;
     int os, osm;
     int j, k, gpower, g, ginv, r, m;
     R *buf;
     const R *omega, *W;

     {
	   plan *cld0 = ego_dit->cld;
	   plan_dft *cld = (plan_dft *) cld0;
	   cld->apply(cld0, ri, ii, ro, io);
     }

     ego = (const P *) ego_;
     cld1 = ego->cld1;
     cld2 = ego->cld2;
     r = ego->n;
     m = ego_dit->m;
     g = ego->g; 
     ginv = ego->ginv;
     omega = ego->omega;
     W = ego_dit->W;
     os = ego_dit->os;
     osm = ego->os;
     gpower = 1;

     buf = (R *) MALLOC(sizeof(R) * (r - 1) * 2, BUFFERS);

     for (j = 0; j < m; ++j, ro += os, io += os, W += 2*(r - 1)) {
	  /* First, permute the input and multiply by W, storing in buf: */
	  A(gpower == 1);
	  for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, g, r)) {
	       E rA, iA, rW, iW;
	       rA = ro[gpower * osm];
	       iA = io[gpower * osm];
	       rW = W[2*k];
	       iW = W[2*k+1];
	       buf[2*k] = rW * rA - iW * iA;
	       buf[2*k + 1] = rW * iA + iW * rA;
	  }
	  /* gpower == g^(r-1) mod r == 1 */;
	  
	  apply_aux(r, ginv, cld1, cld2, omega, 
		    buf, ro[0], io[0], ro, io, osm);
     }

     X(ifree)(buf);
}
예제 #9
0
static void apply_aux(int r, int ginv, plan *cld1,plan *cld2, const R *omega,
		      R *buf, R r0, R i0, R *ro, R *io, int os)
{
     int gpower, k;

     /* compute DFT of buf, storing in output (except DC): */
     {
	    plan_dft *cld = (plan_dft *) cld1;
	    cld->apply(cld1, buf, buf+1, ro+os, io+os);
     }

     /* set output DC component: */
     ro[0] = r0 + ro[os];
     io[0] = i0 + io[os];

     /* now, multiply by omega: */
     for (k = 0; k < r - 1; ++k) {
	  E rB, iB, rW, iW;
	  rW = omega[2*k];
	  iW = omega[2*k+1];
	  rB = ro[(k+1)*os];
	  iB = io[(k+1)*os];
	  ro[(k+1)*os] = rW * rB - iW * iB;
	  io[(k+1)*os] = -(rW * iB + iW * rB);
     }
     
     /* this will add input[0] to all of the outputs after the ifft */
     ro[os] += r0;
     io[os] -= i0;

     /* inverse FFT: */
     {
	    plan_dft *cld = (plan_dft *) cld2;
	    cld->apply(cld2, ro+os, io+os, buf, buf+1);
     }
     
     /* finally, do inverse permutation to unshuffle the output: */
     gpower = 1;
     for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, ginv, r)) {
	  ro[gpower * os] = buf[2*k];
	  io[gpower * os] = -buf[2*k+1];
     }
     A(gpower == 1);
}
예제 #10
0
/* initialize transform parameters */
struct fnt_params *
_mpd_init_fnt_params(mpd_size_t n, int sign, int modnum)
{
	struct fnt_params *tparams;
	mpd_uint_t umod;
#ifdef PPRO
	double dmod;
	uint32_t dinvmod[3];
#endif
	mpd_uint_t kernel, imag, w;
	mpd_uint_t i;
	mpd_size_t nhalf;

	assert(ispower2(n));
	assert(sign == -1 || sign == 1);
	assert(P1 <= modnum && modnum <= P3);

	nhalf = n/2;
	tparams = mpd_sh_alloc(sizeof *tparams, nhalf, sizeof (mpd_uint_t));
	if (tparams == NULL) {
		return NULL;
	}

	SETMODULUS(modnum);
	kernel = _mpd_getkernel(n, sign, modnum);
	imag = _mpd_getkernel(4, -sign, modnum);

	tparams->modnum = modnum;
	tparams->modulus = umod;
	tparams->imag = imag;
	tparams->kernel = kernel;

	w  = 1;
	for (i = 0; i < nhalf; i++) {
		tparams->wtable[i] = w;
		w = MULMOD(w, kernel);
	}

	return tparams;
}
예제 #11
0
static R *mkomega(enum wakefulness wakefulness,
		  plan *p_, INT n, INT npad, INT ginv)
{
     plan_rdft *p = (plan_rdft *) p_;
     R *omega;
     INT i, gpower;
     trigreal scale;
     triggen *t;

     if ((omega = X(rader_tl_find)(n, npad + 1, ginv, omegas))) 
	  return omega;

     omega = (R *)MALLOC(sizeof(R) * npad, TWIDDLES);

     scale = npad; /* normalization for convolution */

     t = X(mktriggen)(wakefulness, n);
     for (i = 0, gpower = 1; i < n-1; ++i, gpower = MULMOD(gpower, ginv, n)) {
	  trigreal w[2];
	  t->cexpl(t, gpower, w);
	  omega[i] = (w[0] + w[1]) / scale;
     }
     X(triggen_destroy)(t);
     A(gpower == 1);

     A(npad == n - 1 || npad >= 2*(n - 1) - 1);

     for (; i < npad; ++i)
	  omega[i] = K(0.0);
     if (npad > n - 1)
	  for (i = 1; i < n-1; ++i)
	       omega[npad - i] = omega[n - 1 - i];

     p->apply(p_, omega, omega);

     X(rader_tl_insert)(n, npad + 1, ginv, omega, &omegas);
     return omega;
}
예제 #12
0
static R *mktwiddle(int m, int r, int g)
{
     int i, j, gpower;
     int n = r * m;
     R *W;

     if ((W = X(rader_tl_find)(m, r, g, twiddles)) != 0 )
	  return W;

     W = (R *)MALLOC(sizeof(R) * (r - 1) * m * 2, TWIDDLES);
     for (i = 0; i < m; ++i) {
	  for (gpower = 1, j = 0; j < r - 1;
	       ++j, gpower = MULMOD(gpower, g, r)) {
	       int k = i * (r - 1) + j;
	       W[2*k] = X(cos2pi)(i * gpower, n);
	       W[2*k+1] = FFT_SIGN * X(sin2pi)(i * gpower, n);
	  }
	  A(gpower == 1);
     }

     X(rader_tl_insert)(m, r, g, W, &twiddles);
     return W;
}
예제 #13
0
static void awake(plan *ego_, enum wakefulness wakefulness)
{
     P *ego = (P *) ego_;

     X(plan_awake)(ego->cld1, wakefulness);
     X(plan_awake)(ego->cld2, wakefulness);
     X(plan_awake)(ego->cld_omega, wakefulness);

     switch (wakefulness) {
	 case SLEEPY:
	      free_omega(ego->omega);
	      ego->omega = 0;
	      break;
	 default:
	      ego->g = X(find_generator)(ego->n);
	      ego->ginv = X(power_mod)(ego->g, ego->n - 2, ego->n);
	      A(MULMOD(ego->g, ego->ginv, ego->n) == 1);

	      A(!ego->omega);
	      ego->omega = mkomega(wakefulness, 
				   ego->cld_omega,ego->n,ego->npad,ego->ginv);
	      break;
     }
}
예제 #14
0
static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
{
     const P *ego = (const P *) ego_;
     int is;
     int k, gpower, g, r;
     R *buf;

     r = ego->n; is = ego->is; g = ego->g; 
     buf = (R *) MALLOC(sizeof(R) * (r - 1) * 2, BUFFERS);

     /* First, permute the input, storing in buf: */
     for (gpower = 1, k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, g, r)) {
	  R rA, iA;
	  rA = ri[gpower * is];
	  iA = ii[gpower * is];
	  buf[2*k] = rA; buf[2*k + 1] = iA;
     }
     /* gpower == g^(r-1) mod r == 1 */;

     apply_aux(r, ego->ginv, ego->cld1, ego->cld2, ego->omega, 
	       buf, ri[0], ii[0], ro, io, ego->os);

     X(ifree)(buf);
}
예제 #15
0
파일: rader.c 프로젝트: Pinkii-/PCA
void fftw_twiddle_rader(fftw_complex *A, const fftw_complex *W,
			int m, int r, int stride,
			fftw_rader_data * d)
{
     fftw_complex *tmp = (fftw_complex *)
     fftw_malloc((r - 1) * sizeof(fftw_complex));
     int i, k, gpower = 1, g = d->g, ginv = d->ginv;
     fftw_real a0r, a0i;
     fftw_complex *omega = d->omega;

     for (i = 0; i < m; ++i, A += stride, W += r - 1) {
	  /* 
	   * Here, we fft W[k-1] * A[k*(m*stride)], using Rader.
	   * (Actually, W is pre-permuted to match the permutation that we 
	   * will do on A.) 
	   */

	  /* First, permute the input and multiply by W, storing in tmp: */
	  /* gpower == g^k mod r in the following loop */
	  for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, g, r)) {
	       fftw_real rA, iA, rW, iW;
	       rW = c_re(W[k]);
	       iW = c_im(W[k]);
	       rA = c_re(A[gpower * (m * stride)]);
	       iA = c_im(A[gpower * (m * stride)]);
	       c_re(tmp[k]) = rW * rA - iW * iA;
	       c_im(tmp[k]) = rW * iA + iW * rA;
	  }

	  WHEN_DEBUG( {
		     if (gpower != 1)
		     fftw_die("incorrect generator in Rader\n");
		     }
	  );

	  /* FFT tmp to A: */
	  fftw_executor_simple(r - 1, tmp, A + (m * stride),
			       d->plan->root, 1, m * stride,
			       d->plan->recurse_kind);

	  /* set output DC component: */
	  a0r = c_re(A[0]);
	  a0i = c_im(A[0]);
	  c_re(A[0]) += c_re(A[(m * stride)]);
	  c_im(A[0]) += c_im(A[(m * stride)]);

	  /* now, multiply by omega: */
	  for (k = 0; k < r - 1; ++k) {
	       fftw_real rA, iA, rW, iW;
	       rW = c_re(omega[k]);
	       iW = c_im(omega[k]);
	       rA = c_re(A[(k + 1) * (m * stride)]);
	       iA = c_im(A[(k + 1) * (m * stride)]);
	       c_re(A[(k + 1) * (m * stride)]) = rW * rA - iW * iA;
	       c_im(A[(k + 1) * (m * stride)]) = -(rW * iA + iW * rA);
	  }

	  /* this will add A[0] to all of the outputs after the ifft */
	  c_re(A[(m * stride)]) += a0r;
	  c_im(A[(m * stride)]) -= a0i;

	  /* inverse FFT: */
	  fftw_executor_simple(r - 1, A + (m * stride), tmp,
			       d->plan->root, m * stride, 1,
			       d->plan->recurse_kind);

	  /* finally, do inverse permutation to unshuffle the output: */
	  for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, ginv, r)) {
	       c_re(A[gpower * (m * stride)]) = c_re(tmp[k]);
	       c_im(A[gpower * (m * stride)]) = -c_im(tmp[k]);
	  }

	  WHEN_DEBUG( {
		     if (gpower != 1)
		     fftw_die("incorrect generator in Rader\n");
		     }
	  );
예제 #16
0
파일: sixstep.c 프로젝트: 524777134/cpython
/* forward transform with sign = -1 */
int
six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
{
    struct fnt_params *tparams;
    mpd_size_t log2n, C, R;
    mpd_uint_t kernel;
    mpd_uint_t umod;
#ifdef PPRO
    double dmod;
    uint32_t dinvmod[3];
#endif
    mpd_uint_t *x, w0, w1, wstep;
    mpd_size_t i, k;


    assert(ispower2(n));
    assert(n >= 16);
    assert(n <= MPD_MAXTRANSFORM_2N);

    log2n = mpd_bsr(n);
    C = ((mpd_size_t)1) << (log2n / 2);  /* number of columns */
    R = ((mpd_size_t)1) << (log2n - (log2n / 2)); /* number of rows */


    /* Transpose the matrix. */
    if (!transpose_pow2(a, R, C)) {
        return 0;
    }

    /* Length R transform on the rows. */
    if ((tparams = _mpd_init_fnt_params(R, -1, modnum)) == NULL) {
        return 0;
    }
    for (x = a; x < a+n; x += R) {
        fnt_dif2(x, R, tparams);
    }

    /* Transpose the matrix. */
    if (!transpose_pow2(a, C, R)) {
        mpd_free(tparams);
        return 0;
    }

    /* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */
    SETMODULUS(modnum);
    kernel = _mpd_getkernel(n, -1, modnum);
    for (i = 1; i < R; i++) {
        w0 = 1;                  /* r**(i*0): initial value for k=0 */
        w1 = POWMOD(kernel, i);  /* r**(i*1): initial value for k=1 */
        wstep = MULMOD(w1, w1);  /* r**(2*i) */
        for (k = 0; k < C; k += 2) {
            mpd_uint_t x0 = a[i*C+k];
            mpd_uint_t x1 = a[i*C+k+1];
            MULMOD2(&x0, w0, &x1, w1);
            MULMOD2C(&w0, &w1, wstep);  /* r**(i*(k+2)) = r**(i*k) * r**(2*i) */
            a[i*C+k] = x0;
            a[i*C+k+1] = x1;
        }
    }

    /* Length C transform on the rows. */
    if (C != R) {
        mpd_free(tparams);
        if ((tparams = _mpd_init_fnt_params(C, -1, modnum)) == NULL) {
            return 0;
        }
    }
    for (x = a; x < a+n; x += C) {
        fnt_dif2(x, C, tparams);
    }
    mpd_free(tparams);

#if 0
    /* An unordered transform is sufficient for convolution. */
    /* Transpose the matrix. */
    if (!transpose_pow2(a, R, C)) {
        return 0;
    }
#endif

    return 1;
}
예제 #17
0
파일: rader.c 프로젝트: exic/last.fm-dbus
static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
{
     const P *ego = (const P *) ego_;
     INT is, os;
     INT k, gpower, g, r;
     R *buf;
     R r0 = ri[0], i0 = ii[0];

     r = ego->n; is = ego->is; os = ego->os; g = ego->g; 
     buf = (R *) MALLOC(sizeof(R) * (r - 1) * 2, BUFFERS);

     /* First, permute the input, storing in buf: */
     for (gpower = 1, k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, g, r)) {
	  R rA, iA;
	  rA = ri[gpower * is];
	  iA = ii[gpower * is];
	  buf[2*k] = rA; buf[2*k + 1] = iA;
     }
     /* gpower == g^(r-1) mod r == 1 */;


     /* compute DFT of buf, storing in output (except DC): */
     {
	    plan_dft *cld = (plan_dft *) ego->cld1;
	    cld->apply(ego->cld1, buf, buf+1, ro+os, io+os);
     }

     /* set output DC component: */
     {
	  ro[0] = r0 + ro[os];
	  io[0] = i0 + io[os];
     }

     /* now, multiply by omega: */
     {
	  const R *omega = ego->omega;
	  for (k = 0; k < r - 1; ++k) {
	       E rB, iB, rW, iW;
	       rW = omega[2*k];
	       iW = omega[2*k+1];
	       rB = ro[(k+1)*os];
	       iB = io[(k+1)*os];
	       ro[(k+1)*os] = rW * rB - iW * iB;
	       io[(k+1)*os] = -(rW * iB + iW * rB);
	  }
     }
     
     /* this will add input[0] to all of the outputs after the ifft */
     ro[os] += r0;
     io[os] -= i0;

     /* inverse FFT: */
     {
	    plan_dft *cld = (plan_dft *) ego->cld2;
	    cld->apply(ego->cld2, ro+os, io+os, buf, buf+1);
     }
     
     /* finally, do inverse permutation to unshuffle the output: */
     {
	  INT ginv = ego->ginv;
	  gpower = 1;
	  for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, ginv, r)) {
	       ro[gpower * os] = buf[2*k];
	       io[gpower * os] = -buf[2*k+1];
	  }
	  A(gpower == 1);
     }


     X(ifree)(buf);
}
예제 #18
0
파일: rader.c 프로젝트: exic/last.fm-dbus
static int mkP(P *pln, INT n, INT is, INT os, R *ro, R *io,
	       planner *plnr)
{
     plan *cld1 = (plan *) 0;
     plan *cld2 = (plan *) 0;
     plan *cld_omega = (plan *) 0;
     R *buf = (R *) 0;

     /* initial allocation for the purpose of planning */
     buf = (R *) MALLOC(sizeof(R) * (n - 1) * 2, BUFFERS);

     cld1 = X(mkplan_f_d)(plnr, 
			  X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, 2, os),
					     X(mktensor_1d)(1, 0, 0),
					     buf, buf + 1, ro + os, io + os),
			  NO_SLOW, 0, 0);
     if (!cld1) goto nada;

     cld2 = X(mkplan_f_d)(plnr, 
			  X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, os, 2),
					     X(mktensor_1d)(1, 0, 0),
					     ro + os, io + os, buf, buf + 1),
			  NO_SLOW, 0, 0);

     if (!cld2) goto nada;

     /* plan for omega array */
     cld_omega = X(mkplan_f_d)(plnr, 
			       X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, 2, 2),
						  X(mktensor_1d)(1, 0, 0),
						  buf, buf + 1, buf, buf + 1),
			       NO_SLOW, ESTIMATE, 0);
     if (!cld_omega) goto nada;

     /* deallocate buffers; let awake() or apply() allocate them for real */
     X(ifree)(buf);
     buf = 0;

     pln->cld1 = cld1;
     pln->cld2 = cld2;
     pln->cld_omega = cld_omega;
     pln->omega = 0;
     pln->n = n;
     pln->is = is;
     pln->os = os;
     pln->g = X(find_generator)(n);
     pln->ginv = X(power_mod)(pln->g, n - 2, n);
     A(MULMOD(pln->g, pln->ginv, n) == 1);

     X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops);
     pln->super.super.ops.other += (n - 1) * (4 * 2 + 6) + 6;
     pln->super.super.ops.add += (n - 1) * 2 + 4;
     pln->super.super.ops.mul += (n - 1) * 4;

     return 1;

 nada:
     X(ifree0)(buf);
     X(plan_destroy_internal)(cld_omega);
     X(plan_destroy_internal)(cld2);
     X(plan_destroy_internal)(cld1);
     return 0;
}
예제 #19
0
파일: rader.c 프로젝트: Pinkii-/PCA
static fftw_rader_data *create_rader_aux(int p, int flags)
{
     fftw_complex *omega, *work;
     int g, ginv, gpower;
     int i;
     FFTW_TRIG_REAL twoPiOverN;
     fftw_real scale = 1.0 / (p - 1);	/* for convolution */
     fftw_plan plan;
     fftw_rader_data *d;

     if (p < 2)
	  fftw_die("non-prime order in Rader\n");

     flags &= ~FFTW_IN_PLACE;

     d = (fftw_rader_data *) fftw_malloc(sizeof(fftw_rader_data));

     g = find_generator(p);
     ginv = power_mod(g, p - 2, p);

     omega = (fftw_complex *) fftw_malloc((p - 1) * sizeof(fftw_complex));

     plan = fftw_create_plan(p - 1, FFTW_FORWARD,
			     flags & ~FFTW_NO_VECTOR_RECURSE);

     work = (fftw_complex *) fftw_malloc((p - 1) * sizeof(fftw_complex));

     twoPiOverN = FFTW_K2PI / (FFTW_TRIG_REAL) p;
     gpower = 1;
     for (i = 0; i < p - 1; ++i) {
	  c_re(work[i]) = scale * FFTW_TRIG_COS(twoPiOverN * gpower);
	  c_im(work[i]) = FFTW_FORWARD * scale * FFTW_TRIG_SIN(twoPiOverN 
							       * gpower);
	  gpower = MULMOD(gpower, ginv, p);
     }

     /* fft permuted roots of unity */
     fftw_executor_simple(p - 1, work, omega, plan->root, 1, 1,
			  plan->recurse_kind);

     fftw_free(work);

     d->plan = plan;
     d->omega = omega;
     d->g = g;
     d->ginv = ginv;
     d->p = p;
     d->flags = flags;
     d->refcount = 1;
     d->next = NULL;

     d->cdesc = (fftw_codelet_desc *) fftw_malloc(sizeof(fftw_codelet_desc));
     d->cdesc->name = NULL;
     d->cdesc->codelet = NULL;
     d->cdesc->size = p;
     d->cdesc->dir = FFTW_FORWARD;
     d->cdesc->type = FFTW_RADER;
     d->cdesc->signature = g;
     d->cdesc->ntwiddle = 0;
     d->cdesc->twiddle_order = NULL;
     return d;
}
예제 #20
0
파일: twiddle.c 프로젝트: Bedoustani/fftw3
static R *compute(enum wakefulness wakefulness,
                  const tw_instr *instr, INT n, INT r, INT m)
{
    INT ntwiddle, j, vl;
    R *W, *W0;
    const tw_instr *p;
    triggen *t = X(mktriggen)(wakefulness, n);

    p = instr;
    ntwiddle = twlen0(r, p, &vl);

    A(m % vl == 0);

    W0 = W = (R *)MALLOC((ntwiddle * (m / vl)) * sizeof(R), TWIDDLES);

    for (j = 0; j < m; j += vl) {
        for (p = instr; p->op != TW_NEXT; ++p) {
            switch (p->op) {
            case TW_FULL: {
                INT i;
                for (i = 1; i < r; ++i) {
                    A((j + (INT)p->v) * i < n);
                    A((j + (INT)p->v) * i > -n);
                    t->cexp(t, (j + (INT)p->v) * i, W);
                    W += 2;
                }
                break;
            }

            case TW_HALF: {
                INT i;
                A((r % 2) == 1);
                for (i = 1; i + i < r; ++i) {
                    t->cexp(t, MULMOD(i, (j + (INT)p->v), n), W);
                    W += 2;
                }
                break;
            }

            case TW_COS: {
                R d[2];

                A((j + (INT)p->v) * p->i < n);
                A((j + (INT)p->v) * p->i > -n);
                t->cexp(t, (j + (INT)p->v) * (INT)p->i, d);
                *W++ = d[0];
                break;
            }

            case TW_SIN: {
                R d[2];

                A((j + (INT)p->v) * p->i < n);
                A((j + (INT)p->v) * p->i > -n);
                t->cexp(t, (j + (INT)p->v) * (INT)p->i, d);
                *W++ = d[1];
                break;
            }

            case TW_CEXP:
                A((j + (INT)p->v) * p->i < n);
                A((j + (INT)p->v) * p->i > -n);
                t->cexp(t, (j + (INT)p->v) * (INT)p->i, W);
                W += 2;
                break;
            }
        }
    }

    X(triggen_destroy)(t);
    return W0;
}
예제 #21
0
static void apply(const plan *ego_, R *I, R *O)
{
     const P *ego = (const P *) ego_;
     INT n = ego->n; /* prime */
     INT npad = ego->npad; /* == n - 1 for unpadded Rader; always even */
     INT is = ego->is, os;
     INT k, gpower, g;
     R *buf, *omega;
     R r0;

     buf = (R *) MALLOC(sizeof(R) * npad, BUFFERS);

     /* First, permute the input, storing in buf: */
     g = ego->g; 
     for (gpower = 1, k = 0; k < n - 1; ++k, gpower = MULMOD(gpower, g, n)) {
	  buf[k] = I[gpower * is];
     }
     /* gpower == g^(n-1) mod n == 1 */;

     A(n - 1 <= npad);
     for (k = n - 1; k < npad; ++k) /* optionally, zero-pad convolution */
	  buf[k] = 0;

     os = ego->os;

     /* compute RDFT of buf, storing in buf (i.e., in-place): */
     {
	    plan_rdft *cld = (plan_rdft *) ego->cld1;
	    cld->apply((plan *) cld, buf, buf);
     }

     /* set output DC component: */
     O[0] = (r0 = I[0]) + buf[0];

     /* now, multiply by omega: */
     omega = ego->omega;
     buf[0] *= omega[0];
     for (k = 1; k < npad/2; ++k) {
	  E rB, iB, rW, iW, a, b;
	  rW = omega[k];
	  iW = omega[npad - k];
	  rB = buf[k];
	  iB = buf[npad - k];
	  a = rW * rB - iW * iB;
	  b = rW * iB + iW * rB;
#if R2HC_ONLY_CONV
	  buf[k] = a + b;
	  buf[npad - k] = a - b;
#else
	  buf[k] = a;
	  buf[npad - k] = b;
#endif
     }
     /* Nyquist component: */
     A(k + k == npad); /* since npad is even */
     buf[k] *= omega[k];
     
     /* this will add input[0] to all of the outputs after the ifft */
     buf[0] += r0;

     /* inverse FFT: */
     {
	    plan_rdft *cld = (plan_rdft *) ego->cld2;
	    cld->apply((plan *) cld, buf, buf);
     }

     /* do inverse permutation to unshuffle the output: */
     A(gpower == 1);
#if R2HC_ONLY_CONV
     O[os] = buf[0];
     gpower = g = ego->ginv;
     A(npad == n - 1 || npad/2 >= n - 1);
     if (npad == n - 1) {
	  for (k = 1; k < npad/2; ++k, gpower = MULMOD(gpower, g, n)) {
	       O[gpower * os] = buf[k] + buf[npad - k];
	  }
	  O[gpower * os] = buf[k];
	  ++k, gpower = MULMOD(gpower, g, n);
	  for (; k < npad; ++k, gpower = MULMOD(gpower, g, n)) {
	       O[gpower * os] = buf[npad - k] - buf[k];
	  }
     }
     else {
	  for (k = 1; k < n - 1; ++k, gpower = MULMOD(gpower, g, n)) {
	       O[gpower * os] = buf[k] + buf[npad - k];
	  }
     }
#else
     g = ego->ginv;
     for (k = 0; k < n - 1; ++k, gpower = MULMOD(gpower, g, n)) {
	  O[gpower * os] = buf[k];
     }
#endif
     A(gpower == 1);

     X(ifree)(buf);
}
예제 #22
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const S *ego = (const S *) ego_;
     const problem_rdft *p = (const problem_rdft *) p_;
     P *pln;
     INT n, npad;
     INT is, os;
     plan *cld1 = (plan *) 0;
     plan *cld2 = (plan *) 0;
     plan *cld_omega = (plan *) 0;
     float *buf = (float *) 0;
     problem *cldp;

     static const plan_adt padt = {
	  fftwf_rdft_solve, awake, print, destroy
     };

     if (!applicable(ego_, p_, plnr))
	  return (plan *) 0;

     n = p->sz->dims[0].n;
     is = p->sz->dims[0].is;
     os = p->sz->dims[0].os;

     if (ego->pad)
	  npad = choose_transform_size(2 * (n - 1) - 1);
     else
	  npad = n - 1;

     /* initial allocation for the purpose of planning */
     buf = (float *) MALLOC(sizeof(float) * npad, BUFFERS);

     cld1 = fftwf_mkplan_f_d(plnr,fftwf_mkproblem_rdft_1_d(fftwf_mktensor_1d(npad, 1, 1),
						fftwf_mktensor_1d(1, 0, 0),
						buf, buf,
						R2HC),  NO_SLOW, 0, 0);
     if (!cld1) goto nada;

     cldp =
          fftwf_mkproblem_rdft_1_d(
               fftwf_mktensor_1d(npad, 1, 1),
               fftwf_mktensor_1d(1, 0, 0),
	       buf, buf,
#if R2HC_ONLY_CONV
	       R2HC
#else
	       HC2R
#endif
	       );
     if (!(cld2 = fftwf_mkplan_f_d(plnr, cldp, NO_SLOW, 0, 0)))
	  goto nada;

     /* plan for omega */
     cld_omega = fftwf_mkplan_f_d(plnr,
			       fftwf_mkproblem_rdft_1_d(
				    fftwf_mktensor_1d(npad, 1, 1),
				    fftwf_mktensor_1d(1, 0, 0),
				    buf, buf, R2HC),
			       NO_SLOW, ESTIMATE, 0);
     if (!cld_omega) goto nada;

     /* deallocate buffers; let awake() or apply() allocate them for real */
     fftwf_ifree(buf);
     buf = 0;

     pln = MKPLAN_RDFT(P, &padt, apply);
     pln->cld1 = cld1;
     pln->cld2 = cld2;
     pln->cld_omega = cld_omega;
     pln->omega = 0;
     pln->n = n;
     pln->npad = npad;
     pln->is = is;
     pln->os = os;
     pln->g = fftwf_find_generator(n);
     pln->ginv = fftwf_power_mod(pln->g, n - 2, n);
     A(MULMOD(pln->g, pln->ginv, n) == 1);

     fftwf_ops_add(&cld1->ops, &cld2->ops, &pln->super.super.ops);
     pln->super.super.ops.other += (npad/2-1)*6 + npad + n + (n-1) * ego->pad;
     pln->super.super.ops.add += (npad/2-1)*2 + 2 + (n-1) * ego->pad;
     pln->super.super.ops.mul += (npad/2-1)*4 + 2 + ego->pad;
#if R2HC_ONLY_CONV
     pln->super.super.ops.other += n-2 - ego->pad;
     pln->super.super.ops.add += (npad/2-1)*2 + (n-2) - ego->pad;
#endif

     return &(pln->super.super);

 nada:
     fftwf_ifree0(buf);
     fftwf_plan_destroy_internal(cld_omega);
     fftwf_plan_destroy_internal(cld2);
     fftwf_plan_destroy_internal(cld1);
     return 0;
}