void ChromSmoother::smooth_vect_fft( const std::vector<float> & raw_vec, std::vector<float> & out_vec, bool test) { /* if sum_norm : total_weight = sum(weights) weights = weights / total_weight n = len(values) assert(len(weights) % 2 == 1) #create a zero-padded array for both the weights and values #extra padding needed = (len(weights) / 2 )+ 1 N = n + len(weights) / 2 + 1 values_padded = numpy.zeros(N) weights_padded = numpy.zeros(N) values_padded[0:n] = values M = len(weights) M_mid = len(weights) / 2 weights_padded[0] = weights[M_mid] for i in range(1,M_mid) : weights_padded[N-i] = weights[M_mid-i] weights_padded[i] = weights[M_mid+i] convolved_fft = data_fft * weights_fft convolved_real = numpy.fft.irfft(convolved_fft) return convolved_real[:n] void cffti( integer_t *n, real_t *wsave, integer_t *ifac ); */ #ifdef HAVE_FFTPACK int n = raw_vec.size(); //std::basic_ofstream<char> x("foo"); //x.close(); //std::ofstream t1("pre_rfftb.test.tab"); std::cerr << "n % 2" << n % 2 << " , n: " << n << std::endl; const int M = this->weights.size(); assert( M % 2 == 1 ); const int M_mid = M / 2; int N = n + M_mid + 1; std::vector<float> input_padded(raw_vec); for ( int i = raw_vec.size() ; i < N ; i++ ) { input_padded.push_back(0.0f); } std::vector<float> weights_padded(N,0.0f); weights_padded[0] = this->weights[M_mid]; for ( int i = 1 ; i < M_mid ; i++ ) { weights_padded[N-i] = weights[M_mid-i]; weights_padded[i] = weights[M_mid+i]; } float * wsave_weights = (float*)malloc((8*N+15)*sizeof(float)); float * wsave_data = (float*)malloc((8*N+15)*sizeof(float)); float * wsave_back = (float*)malloc((8*N+15)*sizeof(float)); float * fft_prod = (float*)malloc(N*sizeof(float)); std::cerr << "finished padding" << std::endl; int ifac[64]; rffti( N, wsave_data); rffti( N, wsave_weights); rffti( n, wsave_back); rfftf( N, &(input_padded[0]), wsave_data); for ( int i = 0 ; i < N ; i++ ) { input_padded[i] /= N; } if ( true || SG_DALKE_DUMP ) { std::vector<float> output_padded(input_padded); //crawutils::output_vector(t1,output_padded); } //t1.close(); std::cerr << "weights fft 1" << std::endl; rfftf( N, &(weights_padded[0]), wsave_weights); for ( int i = 0 ; i < N ; i++ ) { weights_padded[i] *= N; } //std::vector<float> weights_rev(weights_padded); //rfftb( &N, &(weights_rev[0]), wsave, ifac); //std::ofstream t3("weights_fb.txt"); //crawutils::output_vector(t3,weights_rev); for ( int i = 0; i < N ; i++ ) { fft_prod[i] = input_padded[i] * weights_padded[i]; } std::cerr << "product 1" << std::endl; rfftb( N, fft_prod, wsave_back); std::cerr << "backwards fft" << std::endl; for ( int i = 0 ; i < out_vec.size() ; i++ ) { out_vec[i] = fft_prod[i]; } std::cerr << "out_vec filled" << std::endl; free(wsave_data); free(wsave_weights); free(wsave_back); free(fft_prod); #else throw("Forget about trying to call smooth_vect_fft if you don't have FFTPACK"); #endif }
int main(int, char **) { int N = 1; int W = 1001; int H = 1000; int F_In = 6; int F_Out = 4; int K_W = 5; int K_H = 3; // Note that Halide indices are reversed Halide::Buffer<int> parameters(7); Halide::Buffer<float> input_padded(F_In, H + K_H - 1, W + K_W - 1, N); Halide::Buffer<float> input_col(K_H, K_W, F_In, H, W, N); Halide::Buffer<float> kernel(F_Out, K_H, K_W, F_In); Halide::Buffer<float> output(F_Out, H, W, N); Halide::Buffer<float> output_test(F_Out, H, W, N); parameters(0) = N; parameters(1) = W; parameters(2) = H; parameters(3) = F_In; parameters(4) = F_Out; parameters(5) = K_W; parameters(6) = K_H; init_buffer(input_padded, (float)0); init_buffer(kernel, (float)1); // With decimal values test might fail due to floating point arithmetic. for (int n = 0; n < N; n++) { for (int x = 0; x < W; x++) { for (int y = 0; y < H; y++) { for (int c = 0; c < F_In; c++) { input_padded(c, y + (K_H - 1) / 2, x + (K_W - 1) / 2, n) = 1; } } } } for (int f_out = 0; f_out < F_Out; f_out++) { for (int k_x = 0; k_x < K_W; k_x++) { for (int k_y = 0; k_y < K_H; k_y++) { for (int f_in = 0; f_in < F_In; f_in++) { kernel(f_out, k_y, k_x, f_in) = 1; } } } } bool test = true; if (test) { init_buffer(output_test, (float)0); for (int n = 0; n < N; n++) { for (int x = 0; x < W; x++) { for (int y = 0; y < H; y++) { for (int k_x = 0; k_x < K_W; k_x++) { for (int k_y = 0; k_y < K_H; k_y++) { for (int f_in = 0; f_in < F_In; f_in++) { for (int f_out = 0; f_out < F_Out; f_out++) { output_test(f_out, y, x, n) += input_padded(f_in, y + k_y, x + k_x, n) * kernel(f_out, k_y, k_x, f_in); } } } } } } } } std::cout << "Buffers initialized" << std::endl; gemm_conv(parameters.raw_buffer(), input_padded.raw_buffer(), input_col.raw_buffer(), input_col.raw_buffer(), kernel.raw_buffer(), output.raw_buffer()); if (test) { compare_buffers("convs", output, output_test); } bool print = false; if (print) { for (int y = 0; y < H; y++) { for (int x = 0; x < W; x++) { std::printf("%3.1f ", input_col(0, 0, 0, y, x, 0)); } std::cout << std::endl; } for (int y = 0; y < H; y++) { for (int x = 0; x < W; x++) { std::printf("%3.1f ", output(0, y, x, 0)); } std::cout << std::endl; } } return 0; }