void BiasChannelLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // TODO(gpapan): write a CUDA kernel for this case const BiasChannelParameter_LabelType label_type = this->layer_param_.bias_channel_param().label_type(); if (label_type == BiasChannelParameter_LabelType_PIXEL) { Forward_cpu(bottom, top); return; } caffe_copy(bottom[0]->count(), bottom[0]->gpu_data(), top[0]->mutable_gpu_data()); for (int n = 0; n < num_; ++n) { for (int j = 0; j < max_labels_; ++j) { const int label = static_cast<int>(*bottom[1]->cpu_data_at(n, j)); if (ignore_label_.count(label) != 0) { continue; } else if (label >= 0 && label < channels_) { // Bias the foreground or background scores const Dtype bias = (label == 0) ? bg_bias_ : fg_bias_; caffe_gpu_add_scalar(height_ * width_, bias, top[0]->mutable_gpu_data_at(n, label)); } else { LOG(FATAL) << "Unexpected label " << label; } } } }
inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { Dtype loss = 0; Reshape(bottom, top); switch (Caffe::mode()) { case Caffe::CPU: Forward_cpu(bottom, top); for (int top_id = 0; top_id < top.size(); ++top_id) { if (!this->loss(top_id)) { continue; } const int count = top[top_id]->count(); const Dtype* data = top[top_id]->cpu_data(); const Dtype* loss_weights = top[top_id]->cpu_diff(); loss += caffe_cpu_dot(count, data, loss_weights); } break; case Caffe::GPU: Forward_gpu(bottom, top); #ifndef CPU_ONLY for (int top_id = 0; top_id < top.size(); ++top_id) { if (!this->loss(top_id)) { continue; } const int count = top[top_id]->count(); const Dtype* data = top[top_id]->gpu_data(); const Dtype* loss_weights = top[top_id]->gpu_diff(); Dtype blob_loss = 0; caffe_gpu_dot(count, data, loss_weights, &blob_loss); loss += blob_loss; } #endif break; default: LOG(FATAL) << "Unknown caffe mode."; } return loss; }
inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // Lock during forward to ensure sequential forward Lock(); Dtype loss = 0; Reshape(bottom, top); switch (Caffe::mode()) { case Caffe::CPU: Forward_cpu(bottom, top); for (int top_id = 0; top_id < top.size(); ++top_id) { if (!this->loss(top_id)) { continue; } const int count = top[top_id]->count(); const Dtype* data = top[top_id]->cpu_data(); const Dtype* loss_weights = top[top_id]->cpu_diff(); loss += caffe_cpu_dot(count, data, loss_weights); } break; case Caffe::GPU: Forward_gpu(bottom, top); #ifndef CPU_ONLY if (device_->backend() == BACKEND_CUDA) { #ifdef USE_CUDA for (int top_id = 0; top_id < top.size(); ++top_id) { if (!this->loss(top_id)) { continue; } const int count = top[top_id]->count(); const Dtype* data = top[top_id]->gpu_data(); const Dtype* loss_weights = top[top_id]->gpu_diff(); Dtype blob_loss = 0; caffe_gpu_dot(count, data, loss_weights, &blob_loss); loss += blob_loss; } #endif // USE_CUDA } else { #ifdef USE_GREENTEA for (int top_id = 0; top_id < top.size(); ++top_id) { if (!this->loss(top_id)) { continue; } const int count = top[top_id]->count(); cl_mem data = (cl_mem) (top[top_id]->gpu_data()); cl_mem loss_weights = (cl_mem) (top[top_id]->gpu_diff()); Dtype blob_loss = 0; greentea_gpu_dot(this->device_->id(), count, data, 0, loss_weights, 0, &blob_loss); loss += blob_loss; } #endif // USE_GREENTEA } #endif break; default: LOG(FATAL)<< "Unknown caffe mode."; } Unlock(); return loss; }
inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // Lock during forward to ensure sequential forward Lock(); Dtype loss = 0; double tempdouble = 0; int tempint = 0; //if (bottom.size() != 0) // tempint = bottom[0]->count(); Reshape(bottom, top); switch (Caffe::mode()) { case Caffe::CPU: Forward_cpu(bottom, top); for (int top_id = 0; top_id < top.size(); ++top_id) { if (!this->loss(top_id)) { continue; } const int count = top[top_id]->count(); const Dtype* data = top[top_id]->cpu_data(); //const Dtype* label = bottom[0]->cpu_data(); //for (int i = 0; i < 10; i++) //{ // // printf("%lf\t", data[i]); // /*if ((i + 1) % 10 == 0) // { // printf("\n"); // }*/ // //} //for (int i = 0; i < 10; i++) // printf("%lf\t", label[i]); const Dtype* loss_weights = top[top_id]->cpu_diff(); loss += caffe_cpu_dot(count, data, loss_weights); } break; case Caffe::GPU: Forward_gpu(bottom, top); #ifndef CPU_ONLY for (int top_id = 0; top_id < top.size(); ++top_id) { if (!this->loss(top_id)) { continue; } const int count = top[top_id]->count(); const Dtype* data = top[top_id]->gpu_data(); const Dtype* loss_weights = top[top_id]->gpu_diff(); Dtype blob_loss = 0; caffe_gpu_dot(count, data, loss_weights, &blob_loss); loss += blob_loss; } #endif break; default: LOG(FATAL) << "Unknown caffe mode."; } Unlock(); return loss; }
void ReconstructionLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { allow_refill_ = true; DummyDataLayer<Dtype>::LayerSetUp(bottom, top); Forward_cpu(bottom, top); allow_refill_ = false; this->blobs_.resize(top.size()); for (int i = 0; i < top.size(); ++i) { this->blobs_[i].reset(top[i]); } }
inline void Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) { switch (Caffe::mode()) { case Caffe::CPU: Forward_cpu(bottom, top); break; case Caffe::GPU: Forward_gpu(bottom, top); break; default: LOG(FATAL)<< "Unknown caffe mode."; } };
void TanHLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = (*top)[0]->mutable_cpu_data(); //Dtype exp2x; const int count = bottom[0]->count(); Forward_cpu(count, bottom_data, top_data); /* for (int i = 0; i < count; ++i) { exp2x = exp(2 * bottom_data[i]); top_data[i] = (exp2x - Dtype(1)) / (exp2x + Dtype(1)); } */ }
void LibProcessLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top){ vector<const void *> data_bottom; vector<void *> data_top; if (iface_.forward_gpu) { for (int i=0; i<bottom.size(); i++) data_bottom.push_back(bottom[i]->gpu_data()); for (int i=0; i<top.size(); i++) data_top.push_back(top[i]->mutable_gpu_data()); iface_.forward_gpu(libuserdata_, data_bottom, data_top); } else Forward_cpu(bottom, top); }
void MaximumCaffe<T>::Forward(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top) { try { // CUDA #ifdef USE_CUDA Forward_gpu(bottom, top); // OpenCL or CPU #else // CPU Version is already very fast (4ms) Forward_cpu(bottom, top); #endif } catch (const std::exception& e) { error(e.what(), __LINE__, __FUNCTION__, __FILE__); } }
/** * @brief Using the GPU device, compute the layer output. * Fall back to Forward_cpu() if unavailable. */ virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // LOG(WARNING) << "Using CPU code as backup."; return Forward_cpu(bottom, top); }
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { Forward_cpu(bottom,top); }
void EuclideanLossHeatmapLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { Forward_cpu(bottom, top); }
void MultiStageCRFLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { Forward_cpu(bottom, top); }