void *memcpy(void *to, const void *from, __kernel_size_t n) { if (n < LARGE_COPY_CUTOFF) return (void *)__memcpy_asm(to, from, n); else return (void *)fast_copy(to, from, n, __memcpy_asm); }
unsigned long __copy_from_user_zeroing(void *to, const void __user *from, unsigned long n) { if (n < LARGE_COPY_CUTOFF) return __copy_from_user_zeroing_asm(to, from, n); else return fast_copy(to, from, n, __copy_from_user_zeroing_asm); }
unsigned long __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) { if (n < LARGE_COPY_CUTOFF) return __copy_to_user_inatomic_asm(to, from, n); else return fast_copy(to, from, n, __copy_to_user_inatomic_asm); }
void prepare_householder_vector( viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT>& A, viennacl::vector<SCALARTYPE, ALIGNMENT>& D, vcl_size_t size, vcl_size_t row_start, vcl_size_t col_start, vcl_size_t start, bool is_column ) { boost::numeric::ublas::vector<SCALARTYPE> tmp = boost::numeric::ublas::scalar_vector<SCALARTYPE>(size, 0); copy_vec(A, D, row_start, col_start, is_column); fast_copy(D.begin(), D.begin() + vcl_ptrdiff_t(size - start), tmp.begin() + start); //std::cout << "1: " << tmp << "\n"; detail::householder_vector(tmp, start); fast_copy(tmp, D); //std::cout << "2: " << D << "\n"; }
void BackpropTrainer::train(FeedForwardNetwork& network, LabeledDataset& ds, int epochs) { FeedForwardNetwork::LayerIterator lit; for (lit = network.layers_begin(); lit != network.layers_end(); ++lit) { int k = lit->get_k(); sigmas_.push_back(lit->clone()); outputs_.push_back(new viennacl::vector<float>(k)); derivatives_.push_back(new viennacl::vector<float>(k)); } std::vector<int> indexes; for (int i = 0; i < ds.size(); ++i) { indexes.push_back(i); } viennacl::vector<float> error; std::vector<float> host_error(ds.output_size(), 0); for (int i = 0; i < epochs; ++i) { float avg_rms_error = 0.0f; std::random_shuffle(indexes.begin(), indexes.end()); for (std::vector<int>::iterator it = indexes.begin(); it != indexes.end(); ++it) { const viennacl::vector<float>& input = ds.get_input(*it); const viennacl::vector<float>& output = ds.get_output(*it); // Forward pass. network.get_input_layer().get_value() = input; for (int j = 0; j < network.get_num_connections(); ++j) { Layer& layer = network.get_layer(j); Layer& next_layer = network.get_layer(j + 1); Connection& conn = network.get_connection(j); layer.activate(derivatives_[j]); conn.layer_propogate(layer, next_layer); viennacl::ocl::get_queue().finish(); outputs_[j] = layer.get_value(); } Layer& output_layer = network.get_output_layer(); output_layer.activate(derivatives_.back()); // Backwards pass. error = output - output_layer.get_value(); sigmas_.back().get_value() = viennacl::linalg::element_prod(derivatives_.back(), error); for (int j = network.get_num_layers() - 1; j > 0; --j) { Layer& prev_layer = network.get_layer(j - 1); Layer& layer = network.get_layer(j); Connection& conn = network.get_connection(j - 1); delegate_->backpropogate(sigmas_[j - 1], conn, sigmas_[j], derivatives_[j - 1]); viennacl::ocl::get_queue().finish(); delegate_->update(conn, sigmas_[j].get_value(), outputs_[j - 1]); viennacl::ocl::get_queue().finish(); delegate_->update(layer, sigmas_[j].get_value()); viennacl::ocl::get_queue().finish(); } fast_copy(error, host_error); float rms_error = 0.0f; for (size_t j = 0; j < host_error.size(); ++j) { rms_error += host_error[j] * host_error[j]; } rms_error = sqrt(rms_error / host_error.size()); avg_rms_error += rms_error; } avg_rms_error /= ds.size(); std::cout << "Epoch=" << i << " Avg-RMS=" << avg_rms_error << std::endl; } }