static decltype(auto) call(std::vector<neu::layer::any_layer>& layers, InputRange const& initial_delta, OutputRange& result_prev_delta, boost::compute::command_queue& queue) { gpu_vector delta(initial_delta.begin(), initial_delta.end(), queue); gpu_vector prev_delta(queue.get_context()); for(int i = layers.size()-1; i >= 0; --i) { auto& l = layers.at(i); prev_delta.resize(::neu::layer::whole_input_size(l), queue); auto prev_delta_range = range::to_range(prev_delta); #ifdef NEU_BENCHMARK_ENABLE boost::timer t; #endif //NEU_BENCHMARK_ENABLE l.backward( range::to_range(delta), prev_delta_range, queue); #ifdef NEU_BENCHMARK_ENABLE queue.finish(); std::cout << "layer" << i << "\tbackward\t" << t.elapsed() << " secs" << std::endl; #endif //NEU_BENCHMARK_ENABLE delta.swap(prev_delta); } range::copy(delta, result_prev_delta, queue); }
static decltype(auto) call( std::vector<neu::layer::any_layer>& layers, int batch_size, InputRange const& initial_input, OutputRange& result_output, boost::compute::command_queue& queue) { gpu_vector input(initial_input.begin(), initial_input.end(), queue); gpu_vector output(queue.get_context()); int i = 0; for(auto& l : layers) { output.resize(::neu::layer::output_dim(l)*batch_size, queue); /* std::cout << "whole" << ::neu::layer::whole_output_size(l) << std::endl; std::cout << "i" << i << std::endl; std::cout << "aa" << output.size() << std::endl; */ auto output_range = range::to_range(output); #ifdef NEU_BENCHMARK_ENABLE boost::timer t; #endif //NEU_BENCHMARK_ENABLE l.test_forward(batch_size, range::to_range(input), output_range, queue); #ifdef NEU_BENCHMARK_ENABLE queue.finish(); std::cout << "layer" << i << "\ttest_forward\t" << t.elapsed() << " secs" << std::endl; #endif //NEU_BENCHMARK_ENABLE input.swap(output); ++i; } range::copy(input, result_output, queue); }
explicit element_position_bitmask_table (const InputRange & sequence) { initialize(sequence.begin(), sequence.end()); }