Пример #1
0
void fft_plan_exec(const fft_plan plan) {
#ifdef HAVE_LIBFFTW3
    fftw_execute(plan->plan);
#else
    kiss_fftnd(plan->cfg, (const kiss_fft_cpx *)plan->in, (kiss_fft_cpx *)plan->out);
#endif
}
Пример #2
0
static
void fft_filend(FILE * fin, FILE * fout, int *dims, int ndims, int isinverse)
{
    kiss_fftnd_cfg st;
    kiss_fft_cpx *buf;
    int dimprod = 1, i;
    for (i = 0; i < ndims; ++i)
        dimprod *= dims[i];

    buf = (kiss_fft_cpx *) malloc(sizeof(kiss_fft_cpx) * dimprod);
    st = kiss_fftnd_alloc(dims, ndims, isinverse, 0, 0);

    while (fread(buf, sizeof(kiss_fft_cpx) * dimprod, 1, fin) > 0) {
        kiss_fftnd(st, buf, buf);
        fwrite(buf, sizeof(kiss_fft_cpx), dimprod, fout);
    }
    free(st);
    free(buf);
}
Пример #3
0
int main(int argc, char ** argv)
{
    int k;
    int nfft[32];
    int ndims = 1;
    int isinverse = 0;
    int numffts = 1000, i;
    kiss_fft_cpx * buf;
    kiss_fft_cpx * bufout;
    int real = 0;

    nfft[0] = 1024;// default

    while (1) {
        int c = getopt(argc, argv, "n:ix:r");
        if (c == -1)
            break;
        switch (c) {
        case 'r':
            real = 1;
            break;
        case 'n':
            ndims = getdims(nfft, optarg);
            if (nfft[0] != kiss_fft_next_fast_size(nfft[0])) {
                int ng = kiss_fft_next_fast_size(nfft[0]);
                fprintf(stderr, "warning: %d might be a better choice for speed than %d\n", ng, nfft[0]);
            }
            break;
        case 'x':
            numffts = atoi(optarg);
            break;
        case 'i':
            isinverse = 1;
            break;
        }
    }
    int nbytes = sizeof(kiss_fft_cpx);
    for (k = 0; k < ndims; ++k)
        nbytes *= nfft[k];

#ifdef USE_SIMD
    numffts /= 4;
    fprintf(stderr, "since SIMD implementation does 4 ffts at a time, numffts is being reduced to %d\n", numffts);
#endif

    buf = (kiss_fft_cpx*)KISS_FFT_MALLOC(nbytes);
    bufout = (kiss_fft_cpx*)KISS_FFT_MALLOC(nbytes);
    memset(buf, 0, nbytes);

    pstats_init();

    if (ndims == 1) {
        if (real) {
            kiss_fftr_cfg st = kiss_fftr_alloc(nfft[0] , isinverse , 0, 0);
            if (isinverse)
                for (i = 0; i < numffts; ++i)
                    kiss_fftri(st , (kiss_fft_cpx*)buf, (kiss_fft_scalar*)bufout);
            else
                for (i = 0; i < numffts; ++i)
                    kiss_fftr(st , (kiss_fft_scalar*)buf, (kiss_fft_cpx*)bufout);
            free(st);
        } else {
            kiss_fft_cfg st = kiss_fft_alloc(nfft[0] , isinverse , 0, 0);
            for (i = 0; i < numffts; ++i)
                kiss_fft(st , buf, bufout);
            free(st);
        }
    } else {
        if (real) {
            kiss_fftndr_cfg st = kiss_fftndr_alloc(nfft, ndims , isinverse , 0, 0);
            if (isinverse)
                for (i = 0; i < numffts; ++i)
                    kiss_fftndri(st , (kiss_fft_cpx*)buf, (kiss_fft_scalar*)bufout);
            else
                for (i = 0; i < numffts; ++i)
                    kiss_fftndr(st , (kiss_fft_scalar*)buf, (kiss_fft_cpx*)bufout);
            free(st);
        } else {
            kiss_fftnd_cfg st = kiss_fftnd_alloc(nfft, ndims, isinverse , 0, 0);
            for (i = 0; i < numffts; ++i)
                kiss_fftnd(st , buf, bufout);
            free(st);
        }
    }

    free(buf); free(bufout);

    fprintf(stderr, "KISS\tnfft=");
    for (k = 0; k < ndims; ++k)
        fprintf(stderr, "%d,", nfft[k]);
    fprintf(stderr, "\tnumffts=%d\n" , numffts);
    pstats_report();

    kiss_fft_cleanup();

    return 0;
}
Пример #4
0
/* generic complex <N>d-transform. */
int tcl_cfft_nd(ClientData nodata, Tcl_Interp *interp,
                int objc, Tcl_Obj *const objv[]) 
{
    Tcl_Obj *result, **tdata[FFT_MAX_DIM];
    
    const char *name;
    kiss_fft_cpx *input;
    kiss_fft_cpx *output;
    kiss_fftnd_cfg work;
    
    int dir, ndim, alldim, ndat[FFT_MAX_DIM];
    int i;

    Tcl_MutexLock(&myFftMutex);

    /* set defaults: */
    dir   = FFT_FORWARD;
    ndim  = -1;
        
    /* Parse arguments:
     *
     * usage: cfftf_nd <data>
     *    or: cfftb_nd <data>
     * 
     * cfftf_nd   : is the Nd complex forward transform.
     * cfftb_nd   : is the Nd complex backward transform.
     * <data>     : list containing data to be transformed. this can either a real 
     *              or a list with two reals interpreted as complex.
     */

    name = Tcl_GetString(objv[0]);
    if (strcmp(name,"cfftf_2d") == 0) {
        dir = FFT_FORWARD;
        ndim = 2;
    } else if (strcmp(name,"cfftb_2d") == 0) {
        dir = FFT_BACKWARD;
        ndim = 2;
    } else if (strcmp(name,"cfftf_3d") == 0) {
        dir = FFT_FORWARD;
        ndim = 3;
    } else if (strcmp(name,"cfftb_3d") == 0) {
        dir = FFT_BACKWARD;
        ndim = 3;
    } else if (strcmp(name,"cfftf_4d") == 0) {
        dir = FFT_FORWARD;
        ndim = 4;
    } else if (strcmp(name,"cfftb_4d") == 0) {
        dir = FFT_BACKWARD;
        ndim = 4;
    } else {
        Tcl_AppendResult(interp, name, ": unknown fft command.", NULL);
        Tcl_MutexUnlock(&myFftMutex);
        return TCL_ERROR;
    }

    if (objc != 2) {
        Tcl_WrongNumArgs(interp, 1, objv, "<data>");
        Tcl_MutexUnlock(&myFftMutex);
        return TCL_ERROR;
    }
    
    /* mark data as busy and check */
    Tcl_IncrRefCount(objv[1]);
    if (Tcl_ListObjGetElements(interp, objv[1], &(ndat[0]), &(tdata[0])) != TCL_OK) {
        Tcl_DecrRefCount(objv[1]);
        Tcl_MutexUnlock(&myFftMutex);
        return TCL_ERROR;
    }
    if ((ndat[0] < 0) || (ndim > FFT_MAX_DIM)) { /* this should not happen, but... */
        Tcl_AppendResult(interp, name, ": illegal or unsupported data array.", NULL);
        Tcl_DecrRefCount(objv[1]);
        Tcl_MutexUnlock(&myFftMutex);
        return TCL_ERROR;
    }
    if (ndat[0] == 0) {         /* no effect for empty array */
        Tcl_DecrRefCount(objv[1]);
        Tcl_SetObjResult(interp, objv[1]);
        Tcl_MutexUnlock(&myFftMutex);
        return TCL_OK;
    }

    check_thread_count(interp,"fftcmds");

    /* determine size of each dimension for storage size and parsing/checking. */
    alldim=ndat[0];
    for (i=1; i<ndim; ++i) { 
        if (Tcl_ListObjGetElements(interp, tdata[i-1][0], &(ndat[i]), &(tdata[i])) != TCL_OK) {
            Tcl_DecrRefCount(objv[1]);
            Tcl_MutexUnlock(&myFftMutex);
            return TCL_ERROR;
        }
        alldim *= ndat[i];
    }
    input  = (void *)Tcl_Alloc(alldim*sizeof(kiss_fft_cpx));
    output = (void *)Tcl_Alloc(alldim*sizeof(kiss_fft_cpx));
    work   = kiss_fftnd_alloc(ndat, ndim, dir, NULL, NULL);

    /* parse/copy data list through recursive function and release original data. */
    alldim=0;
    for (i=0; i<ndat[0]; ++i) {
        if (read_list_list(interp, tdata[0][i], 1, ndim, ndat, input, &alldim) != TCL_OK) {
            Tcl_AppendResult(interp, name, ": illegal data array.", NULL);
            Tcl_DecrRefCount(objv[1]);
            Tcl_MutexUnlock(&myFftMutex);
            return TCL_ERROR;
        }
    }
    Tcl_DecrRefCount(objv[1]);
    
    /* finally run the transform */
    kiss_fftnd(work, input, output);
    
    /* build result list(s) recursively */
    result = Tcl_NewListObj(0, NULL);
    alldim = 0;
    for (i=0; i<ndat[0]; ++i) {
        make_list_list(interp, result, 1, ndim, ndat, output, &alldim);
    }
    Tcl_SetObjResult(interp, result);

    /* free intermediate storage */
    Tcl_Free((char *)input);
    Tcl_Free((char *)output);
    kiss_fft_free(work);
    kiss_fft_cleanup();

    Tcl_MutexUnlock(&myFftMutex);
    return TCL_OK;
}
template<class Tsrc> FIBITMAP* 
FFT2D<Tsrc>::FFT(FIBITMAP *src)
{
	int height, width;

	int i=0, x, y;
	int dims[2];
	int ndims = 2;
    size_t bufsize;
	Tsrc *bits; 
	FICOMPLEX *outbits;
	FIBITMAP *dst = NULL;
	
	kiss_fftnd_cfg st;
	kiss_fft_cpx* fftbuf;
	kiss_fft_cpx* fftoutbuf;
    kiss_fft_cpx* tmp_fftoutbuf;

	// Dims needs to be {rows, cols}, if you have contiguous rows.
	dims[0] = height = FreeImage_GetHeight(src);
	dims[1] = width = FreeImage_GetWidth(src);
	
    bufsize = width * height * sizeof(kiss_fft_cpx);
	fftbuf = (kiss_fft_cpx*) malloc(bufsize);
	tmp_fftoutbuf = fftoutbuf = (kiss_fft_cpx*) malloc(bufsize); 
	
    CheckMemory(fftbuf);
    CheckMemory(fftoutbuf);

	memset(fftbuf,0,bufsize);
    memset(tmp_fftoutbuf,0,bufsize);

	st = kiss_fftnd_alloc (dims, ndims, 0, 0, 0);

	for(y = height - 1; y >= 0; y--) { 
		
		bits = (Tsrc *) FreeImage_GetScanLine(src, y);
		
		for(x=0; x < width; x++) {
		
			fftbuf[i].r = (float) bits[x];
   		    fftbuf[i].i = 0.0;
   		 
   		    i++;
		}
	}

	kiss_fftnd(st, fftbuf, tmp_fftoutbuf);

	if ( (dst = FreeImage_AllocateT(FIT_COMPLEX, width, height, 32, 0, 0, 0)) == NULL )
		goto Error;

	for(y = height - 1; y >= 0; y--) { 
		
		outbits = (FICOMPLEX *) FreeImage_GetScanLine(dst, y);

		for(x=0; x < width; x++) {
				
			(outbits + x)->r = (double)((tmp_fftoutbuf + x)->r);
			(outbits + x)->i = (double)((tmp_fftoutbuf + x)->i);	  
		}

		tmp_fftoutbuf += width;
	}

Error:
 
    free(fftbuf);
    free(fftoutbuf);
    free(st);

	return dst;
}
Пример #6
0
/* Compare a 3d FFT against a reference FFT */
void test_distributed_fft_3d_compare()
{
    int s,p;
    MPI_Comm_size(MPI_COMM_WORLD, &p);
    MPI_Comm_rank(MPI_COMM_WORLD, &s);

    int nd = 3;
    int *pdim;
    pdim = (int *) malloc(sizeof(int)*nd);
    /* choose a decomposition */
    int r = powf(p,1.0/(double)nd)+0.5;
    int root = 1;
    while (root < r) root*=2;
    int ptot = 1;
    if (!s) printf("Processor grid: ");
    int i;
    for (i = 0; i < nd-1; ++i)
    {
        pdim[i] = (((ptot*root) > p) ? 1 : root);
        ptot *= pdim[i];
        if (!s) printf("%d x ",pdim[i]);
    }
    pdim[nd-1] = p/ptot;
    if (!s) printf("%d\n", pdim[nd-1]);

    /* determine processor index */
    int *pidx;
    pidx = (int*)malloc(nd*sizeof(int));
    int idx = s;
    for (i = nd-1; i >= 0; --i)
    {
        pidx[i] = idx % pdim[i];
        idx /= pdim[i];
    }

    int *dim_glob;
    dim_glob = (int *) malloc(sizeof(int)*nd);

    // Do a pdim[0]*4 x pdim[1]* 8 x pdim[2] * 16 FFT (powers of two)
    int local_nx = 4;
    int local_ny = 8;
    int local_nz = 16;
    dim_glob[0] = pdim[0]*local_nx;
    dim_glob[1] = pdim[1]*local_ny;
    dim_glob[2] = pdim[2]*local_nz;

    for (i = 0; i < nd-1; ++i)
        if (!s) printf("%d x ",dim_glob[i]);
    if (!s) printf("%d matrix\n", dim_glob[nd-1]);

    float scale = dim_glob[0]*dim_glob[1]*dim_glob[2];
    /* assume 0.5 sig digit loss per addition/twiddling (empirical)*/
    float sig_digits = 7.0-0.5*logf(scale)/logf(2.0);
    double tol = powf(10.0,-sig_digits);
    double abs_tol = 1.0*tol;
    printf("Testing with %f sig digits, rel precision %f, abs precision %f\n", sig_digits,  tol, abs_tol);

    kiss_fft_cpx *in_kiss;
    in_kiss = (kiss_fft_cpx *)malloc(sizeof(kiss_fft_cpx)*dim_glob[0]*dim_glob[1]*dim_glob[2]);

    srand(12345);

    // fill table with complex random numbers in row major order
    int x,y,z;
    int nx = dim_glob[0];
    int ny = dim_glob[1];
    int nz = dim_glob[2];
    for (x = 0; x < dim_glob[0]; ++x)
        for (y = 0; y < dim_glob[1]; ++y)
            for (z = 0; z < dim_glob[2]; ++z)
            {
                // KISS has column-major storage
                in_kiss[z+nz*(y+ny*x)].r = (float)rand()/(float)RAND_MAX;
                in_kiss[z+nz*(y+ny*x)].i =(float)rand()/(float)RAND_MAX;
            }

    kiss_fft_cpx *out_kiss;
    out_kiss = (kiss_fft_cpx *)malloc(sizeof(kiss_fft_cpx)*dim_glob[0]*dim_glob[1]*dim_glob[2]);

    // construct forward transform
    kiss_fftnd_cfg cfg = kiss_fftnd_alloc(dim_glob,3,0,NULL,NULL);

    // carry out conventional FFT
    kiss_fftnd(cfg, in_kiss, out_kiss);

    // compare to distributed FFT
    cuda_cpx_t * in_d, *in_h;
    cudaMalloc((void **)&in_d,sizeof(cuda_cpx_t)*local_nx*local_ny*local_nz);
    in_h = (cuda_cpx_t *) malloc(sizeof(cuda_cpx_t)*local_nx*local_ny*local_nz);

    int x_local, y_local, z_local;
    for (x = 0; x < nx; ++x)
        for (y = 0; y < ny; ++y)
            for (z = 0; z < nz; ++z)
            {
                if (x>=pidx[0]*local_nx && x < (pidx[0]+1)*local_nx &&
                        y>=pidx[1]*local_ny && y < (pidx[1]+1)*local_ny &&
                        z>=pidx[2]*local_nz && z < (pidx[2]+1)*local_nz)
                {
                    x_local = x - pidx[0]*local_nx;
                    y_local = y - pidx[1]*local_ny;
                    z_local = z - pidx[2]*local_nz;

                    CUDA_RE(in_h[z_local+local_nz*(y_local+local_ny*x_local)]) =
                        in_kiss[z+nz*(y+ny*x)].r;
                    CUDA_IM(in_h[z_local+local_nz*(y_local+local_ny*x_local)]) =
                        in_kiss[z+nz*(y+ny*x)].i;
                }
            }

    cuda_cpx_t *out_d, *out_h;
    cudaMalloc((void **)&out_d,sizeof(cuda_cpx_t)*local_nx*local_ny*local_nz);
    out_h = (cuda_cpx_t *) malloc(sizeof(cuda_cpx_t)*local_nx*local_ny*local_nz);

    dfft_plan plan;
    dfft_cuda_create_plan(&plan,3, dim_glob, NULL, NULL, pdim, pidx,0, 0, 0, MPI_COMM_WORLD, proc_map);
    dfft_cuda_check_errors(&plan,1);

    /* copy data to device */
    cudaMemcpy(in_d, in_h, sizeof(cuda_cpx_t)*local_nx*local_ny*local_nz, cudaMemcpyDefault);

    // forward transform
    dfft_cuda_execute(in_d, out_d, 0, &plan);

    /* copy data back to host */
    cudaMemcpy(out_h, out_d, sizeof(cuda_cpx_t)*local_nx*local_ny*local_nz, cudaMemcpyDefault);

    // do comparison
    int n_wave_local = local_nx * local_ny * local_nz;
    for (i = 0; i < n_wave_local; ++i)
    {

        x_local = i / local_ny / local_nz;
        y_local = (i - x_local*local_ny*local_nz)/local_nz;
        z_local = i % local_nz;

        x = pidx[0]*local_nx + x_local;
        y = pidx[1]*local_ny + y_local;
        z = pidx[2]*local_nz + z_local;

        double re = CUDA_RE(out_h[i]);
        double im = CUDA_IM(out_h[i]);
        double re_kiss = out_kiss[z+nz*(y+ny*x)].r;
        double im_kiss = out_kiss[z+nz*(y+ny*x)].i;

        if (fabs(re_kiss) < abs_tol)
        {
            CHECK_SMALL(re,2*abs_tol);
        }
        else
        {
            CHECK_CLOSE(re,re_kiss, tol);
        }

        if (fabs(im_kiss) < abs_tol)
        {
            CHECK_SMALL(im,2*abs_tol);
        }
        else
        {
            CHECK_CLOSE(im, im_kiss, tol);
        }
    }
    free(in_kiss);
    free(out_kiss);
    cudaFree(out_d);
    cudaFree(in_d);
    free(in_h);
    free(out_h);
    free(pidx);
    free(dim_glob);
    dfft_cuda_destroy_plan(plan);
}