Dtype Blob<Dtype>::sumsq_diff() const { Dtype sumsq; const Dtype* diff; if (!diff_) { return 0; } switch (diff_->head()) { case SyncedMemory::HEAD_AT_CPU: diff = cpu_diff(); sumsq = caffe_cpu_dot(count_, diff, diff); break; case SyncedMemory::HEAD_AT_GPU: case SyncedMemory::SYNCED: #ifndef CPU_ONLY diff = gpu_diff(); caffe_gpu_dot(count_, diff, diff, &sumsq); break; #else NO_GPU; #endif case SyncedMemory::UNINITIALIZED: return 0; default: LOG(FATAL) << "Unknown SyncedMemory head state: " << data_->head(); } return sumsq; }
Dtype Blob<Dtype>::sumsq_data() const { Dtype sumsq; const Dtype* data; if (!data_) { return 0; } switch (data_->head()) { case SyncedMemory::SYNCED_PRV: case SyncedMemory::HEAD_AT_PRV: data = prv_data(); sumsq = caffe_cpu_dot(prv_data_count(), data, data); break; case SyncedMemory::HEAD_AT_CPU: data = cpu_data(); sumsq = caffe_cpu_dot(count_, data, data); break; case SyncedMemory::HEAD_AT_GPU: case SyncedMemory::SYNCED: #ifndef CPU_ONLY data = gpu_data(); caffe_gpu_dot(count_, data, data, &sumsq); #else NO_GPU; #endif break; case SyncedMemory::UNINITIALIZED: return 0; default: LOG(FATAL) << "Unknown SyncedMemory head state: " << data_->head(); } return sumsq; }
inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { Dtype loss = 0; Reshape(bottom, top); switch (Caffe::mode()) { case Caffe::CPU: Forward_cpu(bottom, top); for (int top_id = 0; top_id < top.size(); ++top_id) { if (!this->loss(top_id)) { continue; } const int count = top[top_id]->count(); const Dtype* data = top[top_id]->cpu_data(); const Dtype* loss_weights = top[top_id]->cpu_diff(); loss += caffe_cpu_dot(count, data, loss_weights); } break; case Caffe::GPU: Forward_gpu(bottom, top); #ifndef CPU_ONLY for (int top_id = 0; top_id < top.size(); ++top_id) { if (!this->loss(top_id)) { continue; } const int count = top[top_id]->count(); const Dtype* data = top[top_id]->gpu_data(); const Dtype* loss_weights = top[top_id]->gpu_diff(); Dtype blob_loss = 0; caffe_gpu_dot(count, data, loss_weights, &blob_loss); loss += blob_loss; } #endif break; default: LOG(FATAL) << "Unknown caffe mode."; } return loss; }
Dtype Tensor<Dtype>::DotPFrom(const Tensor& source) { if (source.count() != count_) { ASSERT(false, "Trying to dot blobs of different counts: " << source.count() << " != " << count_); } Dtype result; switch (mode()) { case Caffe::CPU: result = caffe_cpu_dot(count_, source.cpu_mem(), this->cpu_mem()); break; case Caffe::GPU: #ifndef CPU_ONLY caffe_gpu_dot(count_, source.gpu_mem(), this->gpu_mem(), &result); #else NO_GPU; #endif break; default: ASSERT(false, "Unknown caffe mode."); } return result; }
Dtype Tensor<Dtype>::sumsq() const { Dtype sumsq; const Dtype* data; if (!mem_) { return 0; } switch (mem_->head()) { case SyncedMemory::HEAD_AT_CPU: data = cpu_mem(); sumsq = caffe_cpu_dot(count_, data, data); break; case SyncedMemory::HEAD_AT_GPU: case SyncedMemory::SYNCED: #ifndef CPU_ONLY data = gpu_mem(); caffe_gpu_dot(count_, data, data, &sumsq); #else NO_GPU; #endif break; case SyncedMemory::UNINITIALIZED: return 0; default: LOG(FATAL) << "Unknown SyncedMemory head state: " << mem_->head(); } return sumsq; }
inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // Lock during forward to ensure sequential forward Lock(); Dtype loss = 0; Reshape(bottom, top); switch (Caffe::mode()) { case Caffe::CPU: Forward_cpu(bottom, top); for (int top_id = 0; top_id < top.size(); ++top_id) { if (!this->loss(top_id)) { continue; } const int count = top[top_id]->count(); const Dtype* data = top[top_id]->cpu_data(); const Dtype* loss_weights = top[top_id]->cpu_diff(); loss += caffe_cpu_dot(count, data, loss_weights); } break; case Caffe::GPU: Forward_gpu(bottom, top); #ifndef CPU_ONLY if (device_->backend() == BACKEND_CUDA) { #ifdef USE_CUDA for (int top_id = 0; top_id < top.size(); ++top_id) { if (!this->loss(top_id)) { continue; } const int count = top[top_id]->count(); const Dtype* data = top[top_id]->gpu_data(); const Dtype* loss_weights = top[top_id]->gpu_diff(); Dtype blob_loss = 0; caffe_gpu_dot(count, data, loss_weights, &blob_loss); loss += blob_loss; } #endif // USE_CUDA } else { #ifdef USE_GREENTEA for (int top_id = 0; top_id < top.size(); ++top_id) { if (!this->loss(top_id)) { continue; } const int count = top[top_id]->count(); cl_mem data = (cl_mem) (top[top_id]->gpu_data()); cl_mem loss_weights = (cl_mem) (top[top_id]->gpu_diff()); Dtype blob_loss = 0; greentea_gpu_dot(this->device_->id(), count, data, 0, loss_weights, 0, &blob_loss); loss += blob_loss; } #endif // USE_GREENTEA } #endif break; default: LOG(FATAL)<< "Unknown caffe mode."; } Unlock(); return loss; }
inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // Lock during forward to ensure sequential forward Lock(); Dtype loss = 0; double tempdouble = 0; int tempint = 0; //if (bottom.size() != 0) // tempint = bottom[0]->count(); Reshape(bottom, top); switch (Caffe::mode()) { case Caffe::CPU: Forward_cpu(bottom, top); for (int top_id = 0; top_id < top.size(); ++top_id) { if (!this->loss(top_id)) { continue; } const int count = top[top_id]->count(); const Dtype* data = top[top_id]->cpu_data(); //const Dtype* label = bottom[0]->cpu_data(); //for (int i = 0; i < 10; i++) //{ // // printf("%lf\t", data[i]); // /*if ((i + 1) % 10 == 0) // { // printf("\n"); // }*/ // //} //for (int i = 0; i < 10; i++) // printf("%lf\t", label[i]); const Dtype* loss_weights = top[top_id]->cpu_diff(); loss += caffe_cpu_dot(count, data, loss_weights); } break; case Caffe::GPU: Forward_gpu(bottom, top); #ifndef CPU_ONLY for (int top_id = 0; top_id < top.size(); ++top_id) { if (!this->loss(top_id)) { continue; } const int count = top[top_id]->count(); const Dtype* data = top[top_id]->gpu_data(); const Dtype* loss_weights = top[top_id]->gpu_diff(); Dtype blob_loss = 0; caffe_gpu_dot(count, data, loss_weights, &blob_loss); loss += blob_loss; } #endif break; default: LOG(FATAL) << "Unknown caffe mode."; } Unlock(); return loss; }
void EuclideanLossLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { int count = bottom[0]->count(); caffe_gpu_sub( count, bottom[0]->gpu_data(), bottom[1]->gpu_data(), diff_.mutable_gpu_data()); Dtype dot; caffe_gpu_dot(count, diff_.gpu_data(), diff_.gpu_data(), &dot); Dtype loss = dot / bottom[0]->num() / Dtype(2); top[0]->mutable_cpu_data()[0] = loss; }
inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // Lock during forward to ensure sequential forward for(int i=0; i<bottom.size(); i++) { LOG(INFO) << bottom[i]->shape_string(); } Lock(); Dtype loss = 0; Reshape(bottom, top); switch (Caffe::mode()) { /* case Caffe::CPU: Forward_cpu(bottom, top); for (int top_id = 0; top_id < top.size(); ++top_id) { if (!this->loss(top_id)) { continue; } const int count = top[top_id]->count(); const Dtype* data = top[top_id]->cpu_data(); const Dtype* loss_weights = top[top_id]->cpu_diff(); loss += caffe_cpu_dot(count, data, loss_weights); } break;*/ case Caffe::GPU: Forward_gpu(bottom, top); #ifndef CPU_ONLY for (int top_id = 0; top_id < top.size(); ++top_id) { if (!this->loss(top_id)) { continue; } const int count = top[top_id]->count(); const Dtype* data = top[top_id]->gpu_data(); const Dtype* loss_weights = top[top_id]->gpu_diff(); Dtype blob_loss = 0; caffe_gpu_dot(count, data, loss_weights, &blob_loss); loss += blob_loss; } #endif break; default: LOG(FATAL) << "Unknown caffe mode."; } Unlock(); return loss; }
Dtype Blob<Dtype>::sumsq_diff() const { Dtype sumsq; const Dtype* diff; if (!diff_) { return 0; } switch (diff_->head()) { case SyncedMemory::HEAD_AT_CPU: { diff = cpu_diff(); sumsq = caffe_cpu_dot(count_, diff, diff); break; } case SyncedMemory::HEAD_AT_GPU: case SyncedMemory::SYNCED: { #ifndef CPU_ONLY diff = gpu_diff(); if (device_->backend() == Backend::BACKEND_CUDA) { #ifdef USE_CUDA caffe_gpu_dot(count_, diff, diff, &sumsq); #endif } else { #ifdef USE_GREENTEA greentea_gpu_dot(device_->id(), count_, (cl_mem) diff, 0, (cl_mem) diff, 0, &sumsq); #endif } #else NO_GPU; #endif break; } case SyncedMemory::UNINITIALIZED: return 0; default: LOG(FATAL)<< "Unknown SyncedMemory head state: " << data_->head(); } return sumsq; }