int main(int argc, char *argv[]) { int i, j, k, ret, loops, freq, log2_N, jobs, N, mb = mbox_open(); unsigned t[2]; double tsq[2]; GPU_FFT_COMPLEX *base; struct GPU_FFT *fft; log2_N = argc>1? atoi(argv[1]) : 12; // 8 <= log2_N <= 21 jobs = argc>2? atoi(argv[2]) : 1; // transforms per batch loops = argc>3? atoi(argv[3]) : 1; // test repetitions if (argc<2 || jobs<1 || loops<1) { printf(Usage); return -1; } N = 1<<log2_N; // FFT length ret = gpu_fft_prepare(mb, log2_N, GPU_FFT_REV, jobs, &fft); // call once switch(ret) { case -1: printf("Unable to enable V3D. Please check your firmware is up to date.\n"); return -1; case -2: printf("log2_N=%d not supported. Try between 8 and 21.\n", log2_N); return -1; case -3: printf("Out of memory. Try a smaller batch or increase GPU memory.\n"); return -1; case -4: printf("Unable to map Videocore peripherals into ARM memory space.\n"); return -1; } for (k=0; k<loops; k++) { for (j=0; j<jobs; j++) { base = fft->in + j*fft->step; // input buffer for (i=0; i<N; i++) base[i][0] = base[i][1] = 0; freq = j+1; base[freq][0] = base[N-freq][0] = 0.5; } usleep(1); // Yield to OS t[0] = Microseconds(); gpu_fft_execute(fft); // call one or many times t[1] = Microseconds(); tsq[0]=tsq[1]=0; for (j=0; j<jobs; j++) { base = fft->out + j*fft->step; // output buffer freq = j+1; for (i=0; i<N; i++) { double re = cos(2*GPU_FFT_PI*freq*i/N); tsq[0] += pow(re, 2); tsq[1] += pow(re - base[i][0], 2) + pow(base[i][1], 2); } } printf("rel_rms_err = %0.2g, usecs = %d, k = %d\n", sqrt(tsq[1]/tsq[0]), (t[1]-t[0])/jobs, k); } gpu_fft_release(fft); // Videocore memory lost if not freed ! return 0; }
void do_fft(double *buffer) { unsigned int ctr = 0; // Copy the buffer into the fft data structure for (ctr = 0; ctr < N; ctr++) { fft->in[ctr].re = buffer[ctr]; fft->in[ctr].im = 0.0; } // Push to the GPU gpu_fft_execute(fft); // Copy back the square magnitudes for welching for (ctr = 0; ctr < N; ctr++) { buffer[ctr] = sqrt( pow(fft->out[ctr].re,2) + pow(fft->out[ctr].im,2)); } }
int main(int argc, char *argv[]) { unsigned t[2]; struct GPU_FFT_COMPLEX *dataIn,*dataOut; struct GPU_FFT *fftinfo; int mb = mbox_open(); gpu_fft_prepare(mb,12,GPU_FFT_FWD,1,&fftinfo); dataIn = fftinfo->in; dataOut = fftinfo->out; usleep(1); // Yield to OS t[0] = Microseconds(); gpu_fft_execute(fftinfo); // call one or many times t[1] = Microseconds(); printf("usecs = %d\n", (t[1]-t[0])); gpu_fft_release(fftinfo); // Videocore memory lost if not freed ! return 0; }