Ejemplo n.º 1
0
int main(int argc, char *argv[]) {
    int i, j, k, ret, loops, freq, log2_N, jobs, N, mb = mbox_open();
    unsigned t[2];
    double tsq[2];

    GPU_FFT_COMPLEX *base;
    struct GPU_FFT *fft;

    log2_N = argc>1? atoi(argv[1]) : 12; // 8 <= log2_N <= 21
    jobs   = argc>2? atoi(argv[2]) : 1;  // transforms per batch
    loops  = argc>3? atoi(argv[3]) : 1;  // test repetitions

    if (argc<2 || jobs<1 || loops<1) {
        printf(Usage);
        return -1;
    }

    N = 1<<log2_N; // FFT length
    ret = gpu_fft_prepare(mb, log2_N, GPU_FFT_REV, jobs, &fft); // call once

    switch(ret) {
        case -1: printf("Unable to enable V3D. Please check your firmware is up to date.\n"); return -1;
        case -2: printf("log2_N=%d not supported.  Try between 8 and 21.\n", log2_N);         return -1;
        case -3: printf("Out of memory.  Try a smaller batch or increase GPU memory.\n");     return -1;
        case -4: printf("Unable to map Videocore peripherals into ARM memory space.\n");      return -1;
    }

    for (k=0; k<loops; k++) {

        for (j=0; j<jobs; j++) {
            base = fft->in + j*fft->step; // input buffer
            for (i=0; i<N; i++) base[i][0] = base[i][1] = 0;
            freq = j+1;
            base[freq][0] = base[N-freq][0] = 0.5;
        }

        usleep(1); // Yield to OS
        t[0] = Microseconds();
        gpu_fft_execute(fft); // call one or many times
        t[1] = Microseconds();

        tsq[0]=tsq[1]=0;
        for (j=0; j<jobs; j++) {
            base = fft->out + j*fft->step; // output buffer
            freq = j+1;
            for (i=0; i<N; i++) {
                double re = cos(2*GPU_FFT_PI*freq*i/N);
                tsq[0] += pow(re, 2);
                tsq[1] += pow(re - base[i][0], 2) + pow(base[i][1], 2);
            }
        }

        printf("rel_rms_err = %0.2g, usecs = %d, k = %d\n",
            sqrt(tsq[1]/tsq[0]), (t[1]-t[0])/jobs, k);
    }

    gpu_fft_release(fft); // Videocore memory lost if not freed !
    return 0;
}
Ejemplo n.º 2
0
void do_fft(double *buffer) {
	unsigned int ctr = 0;

	// Copy the buffer into the fft data structure
	for (ctr = 0; ctr < N; ctr++) {
		fft->in[ctr].re = buffer[ctr];
		fft->in[ctr].im = 0.0;
	}

	// Push to the GPU
	gpu_fft_execute(fft);

	// Copy back the square magnitudes for welching
	for (ctr = 0; ctr < N; ctr++) {
		buffer[ctr] = sqrt( pow(fft->out[ctr].re,2) + pow(fft->out[ctr].im,2));
	}
}
Ejemplo n.º 3
0
Archivo: main.c Proyecto: alexburov/fft
int main(int argc, char *argv[]) {
     unsigned t[2];
     struct GPU_FFT_COMPLEX *dataIn,*dataOut;
     struct GPU_FFT *fftinfo;

     int mb = mbox_open();
     gpu_fft_prepare(mb,12,GPU_FFT_FWD,1,&fftinfo);

     dataIn = fftinfo->in;
     dataOut = fftinfo->out;

     usleep(1); // Yield to OS
     t[0] = Microseconds();
     gpu_fft_execute(fftinfo); // call one or many times
     t[1] = Microseconds();

     printf("usecs = %d\n", (t[1]-t[0]));

     gpu_fft_release(fftinfo); // Videocore memory lost if not freed !
     return 0;
}