void Blob<Dtype>::CopyFrom(const Blob& source, bool copy_diff, bool reshape) { if (source.count() != count_ || source.shape() != shape_) { if (reshape) { ReshapeLike(source); } else { LOG(FATAL)<< "Trying to copy blobs of different sizes."; } } switch (Caffe::mode()) { case Caffe::GPU: { if (device_->backend() == BACKEND_CUDA) { if (copy_diff) { caffe_copy(count_, source.gpu_diff(), static_cast<Dtype*>(diff_->mutable_gpu_data())); } else { caffe_copy(count_, source.gpu_data(), static_cast<Dtype*>(data_->mutable_gpu_data())); } } else { #ifdef USE_GREENTEA if (copy_diff) { greentea_copy<Dtype>( count_, (cl_mem) (source.gpu_diff()), 0, (cl_mem) (diff_->mutable_gpu_data()), 0, &viennacl::ocl::get_context(device_->id())); } else { greentea_copy<Dtype>( count_, (cl_mem) (source.gpu_data()), 0, (cl_mem) (data_->mutable_gpu_data()), 0, &viennacl::ocl::get_context(device_->id())); } #endif } break; } case Caffe::CPU: { if (copy_diff) { caffe_cpu_copy(count_, source.cpu_diff(), static_cast<Dtype*>(diff_->mutable_cpu_data())); } else { caffe_cpu_copy(count_, source.cpu_data(), static_cast<Dtype*>(data_->mutable_cpu_data())); } break; } default: LOG(FATAL)<< "Unknown caffe mode."; } }
Dtype GradientChecker<Dtype>::GetObjAndGradient(const Layer<Dtype>& layer, const vector<Blob<Dtype>*>& top, int_tp top_id, int_tp top_data_id) { Dtype loss = 0; if (top_id < 0) { // the loss will be half of the sum of squares of all outputs for (int_tp i = 0; i < top.size(); ++i) { Blob<Dtype>* top_blob = top[i]; const Dtype* top_blob_data = top_blob->cpu_data(); Dtype* top_blob_diff = top_blob->mutable_cpu_diff(); int_tp count = top_blob->count(); for (int_tp j = 0; j < count; ++j) { loss += top_blob_data[j] * top_blob_data[j]; } // set the diff: simply the data. caffe_cpu_copy(top_blob->count(), top_blob_data, top_blob_diff); } loss /= 2.; } else { // the loss will be the top_data_id-th element in the top_id-th blob. for (int_tp i = 0; i < top.size(); ++i) { Blob<Dtype>* top_blob = top[i]; Dtype* top_blob_diff = top_blob->mutable_cpu_diff(); caffe_set(top_blob->count(), Dtype(0), top_blob_diff); } const Dtype loss_weight = 2; loss = top[top_id]->cpu_data()[top_data_id] * loss_weight; top[top_id]->mutable_cpu_diff()[top_data_id] = loss_weight; } return loss; }
void SGDSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) { const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params(); const vector<float>& net_params_lr = this->net_->params_lr(); Dtype momentum = this->param_.momentum(); Dtype local_rate = rate * net_params_lr[param_id]; // Compute the update to history, then copy it to the parameter diff. switch (Caffe::mode()) { case Caffe::CPU: { caffe_cpu_axpby(net_params[param_id]->count(), local_rate, net_params[param_id]->cpu_diff(), momentum, history_[param_id]->mutable_cpu_data()); caffe_cpu_copy(net_params[param_id]->count(), history_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); break; } case Caffe::GPU: { #ifndef CPU_ONLY sgd_update_gpu(this->device_, net_params[param_id]->count(), net_params[param_id]->mutable_gpu_diff(), history_[param_id]->mutable_gpu_data(), momentum, local_rate); #else NO_GPU; #endif break; } default: { LOG(FATAL)<< "Unknown caffe mode: " << Caffe::mode(); } } }
void HingeLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); const Dtype* label = bottom[1]->cpu_data(); int_tp num = bottom[0]->num(); int_tp count = bottom[0]->count(); int_tp dim = count / num; caffe_cpu_copy(count, bottom_data, bottom_diff); for (int_tp i = 0; i < num; ++i) { bottom_diff[i * dim + static_cast<int_tp>(label[i])] *= -1; } for (int_tp i = 0; i < num; ++i) { for (int_tp j = 0; j < dim; ++j) { bottom_diff[i * dim + j] = std::max( Dtype(0), 1 + bottom_diff[i * dim + j]); } } Dtype* loss = top[0]->mutable_cpu_data(); switch (this->layer_param_.hinge_loss_param().norm()) { case HingeLossParameter_Norm_L1: loss[0] = caffe_cpu_asum(count, bottom_diff) / num; break; case HingeLossParameter_Norm_L2: loss[0] = caffe_cpu_dot(count, bottom_diff, bottom_diff) / num; break; default: LOG(FATAL) << "Unknown Norm"; } }
void SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[1]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to label inputs."; } if (propagate_down[0]) { Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); const Dtype* prob_data = prob_.cpu_data(); caffe_cpu_copy(prob_.count(), prob_data, bottom_diff); const Dtype* label = bottom[1]->cpu_data(); int_tp dim = prob_.count() / outer_num_; int_tp count = 0; for (int_tp i = 0; i < outer_num_; ++i) { for (int_tp j = 0; j < inner_num_; ++j) { const int_tp label_value = static_cast<int_tp> (label[i * inner_num_ + j]); if (has_ignore_label_ && label_value == ignore_label_) { for (int_tp c = 0; c < bottom[0]->shape(softmax_axis_); ++c) { bottom_diff[i * dim + c * inner_num_ + j] = 0; } } else { bottom_diff[i * dim + label_value * inner_num_ + j] -= 1; ++count; } } } // Scale gradient Dtype loss_weight = top[0]->cpu_diff()[0] / get_normalizer(normalization_, count); caffe_scal(prob_.count(), loss_weight, bottom_diff); } }
TYPED_TEST(CPUMathFunctionsTest, TestCopy) { const int n = this->blob_bottom_->count(); const TypeParam* bottom_data = this->blob_bottom_->cpu_data(); TypeParam* top_data = this->blob_top_->mutable_cpu_data(); caffe_cpu_copy(n, bottom_data, top_data); for (int i = 0; i < n; ++i) { EXPECT_EQ(bottom_data[i], top_data[i]); } }
void HDF5OutputLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { CHECK_GE(bottom.size(), 2); CHECK_EQ(bottom[0]->num(), bottom[1]->num()); data_blob_.Reshape(bottom[0]->num(), bottom[0]->channels(), bottom[0]->height(), bottom[0]->width()); label_blob_.Reshape(bottom[1]->num(), bottom[1]->channels(), bottom[1]->height(), bottom[1]->width()); const int_tp data_datum_dim = bottom[0]->count() / bottom[0]->num(); const int_tp label_datum_dim = bottom[1]->count() / bottom[1]->num(); for (int_tp i = 0; i < bottom[0]->num(); ++i) { caffe_cpu_copy(data_datum_dim, &bottom[0]->cpu_data()[i * data_datum_dim], &data_blob_.mutable_cpu_data()[i * data_datum_dim]); caffe_cpu_copy(label_datum_dim, &bottom[1]->cpu_data()[i * label_datum_dim], &label_blob_.mutable_cpu_data()[i * label_datum_dim]); } SaveBlobs(); }
void BasePrefetchingDataLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { Batch<Dtype>* batch = prefetch_full_.pop("Data layer prefetch queue empty"); // Reshape to loaded data. top[0]->ReshapeLike(batch->data_); // Copy the data caffe_cpu_copy(batch->data_.count(), batch->data_.cpu_data(), top[0]->mutable_cpu_data()); DLOG(INFO) << "Prefetch copied"; if (this->output_labels_) { // Reshape to loaded labels. top[1]->ReshapeLike(batch->label_); // Copy the labels. caffe_cpu_copy(batch->label_.count(), batch->label_.cpu_data(), top[1]->mutable_cpu_data()); } prefetch_free_.push(batch); }
void BasePrefetchingDataLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // First, join the thread JoinPrefetchThread(); DLOG(INFO) << "Thread joined"; // Reshape to loaded data. top[0]->ReshapeLike(prefetch_data_); // Copy the data caffe_cpu_copy(prefetch_data_.count(), prefetch_data_.cpu_data(), top[0]->mutable_cpu_data()); DLOG(INFO) << "Prefetch copied"; if (this->output_labels_) { // Reshape to loaded labels. top[1]->ReshapeLike(prefetch_label_); // Copy the labels. caffe_cpu_copy(prefetch_label_.count(), prefetch_label_.cpu_data(), top[1]->mutable_cpu_data()); } // Start a new prefetch thread DLOG(INFO) << "CreatePrefetchThread"; CreatePrefetchThread(); }
void PowerLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[0]) { Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); const int count = bottom[0]->count(); const Dtype* top_diff = top[0]->cpu_diff(); if (diff_scale_ == Dtype(0) || power_ == Dtype(1)) { caffe_set(count, diff_scale_, bottom_diff); } else { const Dtype* bottom_data = bottom[0]->cpu_data(); // Compute dy/dx = scale * power * (shift + scale * x)^(power - 1) // = diff_scale * y / (shift + scale * x) if (power_ == Dtype(2)) { // Special case for y = (shift + scale * x)^2 // -> dy/dx = 2 * scale * (shift + scale * x) // = diff_scale * shift + diff_scale * scale * x caffe_cpu_axpby(count, diff_scale_ * scale_, bottom_data, Dtype(0), bottom_diff); if (shift_ != Dtype(0)) { caffe_add_scalar(count, diff_scale_ * shift_, bottom_diff); } } else if (shift_ == Dtype(0)) { // Special case for y = (scale * x)^power // -> dy/dx = scale * power * (scale * x)^(power - 1) // = scale * power * (scale * x)^power * (scale * x)^(-1) // = power * y / x const Dtype* top_data = top[0]->cpu_data(); caffe_div(count, top_data, bottom_data, bottom_diff); caffe_scal(count, power_, bottom_diff); } else { caffe_cpu_copy(count, bottom_data, bottom_diff); if (scale_ != Dtype(1)) { caffe_scal(count, scale_, bottom_diff); } if (shift_ != Dtype(0)) { caffe_add_scalar(count, shift_, bottom_diff); } const Dtype* top_data = top[0]->cpu_data(); caffe_div<Dtype>(count, top_data, bottom_diff, bottom_diff); if (diff_scale_ != Dtype(1)) { caffe_scal(count, diff_scale_, bottom_diff); } } } if (diff_scale_ != Dtype(0)) { caffe_mul(count, top_diff, bottom_diff, bottom_diff); } } }
void SplitLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (!propagate_down[0]) { return; } if (top.size() == 1) { caffe_cpu_copy(count_, top[0]->cpu_diff(), bottom[0]->mutable_cpu_diff()); return; } caffe_add(count_, top[0]->cpu_diff(), top[1]->cpu_diff(), bottom[0]->mutable_cpu_diff()); // Add remaining top blob diffs. for (int i = 2; i < top.size(); ++i) { const Dtype* top_diff = top[i]->cpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); caffe_axpy(count_, Dtype(1.), top_diff, bottom_diff); } }
void FilterLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { int new_tops_num = indices_to_forward_.size(); // forward all filtered items for all bottoms but the Selector (bottom[last]) for (int t = 0; t < top.size(); ++t) { const Dtype* bottom_data = bottom[t]->cpu_data(); Dtype* top_data = top[t]->mutable_cpu_data(); int dim = bottom[t]->count() / bottom[t]->shape(0); for (int n = 0; n < new_tops_num; ++n) { int data_offset_top = n * dim; int data_offset_bottom = indices_to_forward_[n] * bottom[t]->count(1); caffe_cpu_copy(dim, bottom_data + data_offset_bottom, top_data + data_offset_top); } } }
void DropoutLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); uint_tp* mask = rand_vec_.mutable_cpu_data(); const int_tp count = bottom[0]->count(); if (this->phase_ == TRAIN) { // Create random numbers caffe_rng_bernoulli(count, 1. - threshold_, mask); for (int_tp i = 0; i < count; ++i) { top_data[i] = bottom_data[i] * mask[i] * scale_; } } else { caffe_cpu_copy(bottom[0]->count(), bottom_data, top_data); } }
void DropoutLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[0]) { const Dtype* top_diff = top[0]->cpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); if (this->phase_ == TRAIN) { const uint_tp* mask = rand_vec_.cpu_data(); const int_tp count = bottom[0]->count(); for (int_tp i = 0; i < count; ++i) { bottom_diff[i] = top_diff[i] * mask[i] * scale_; } } else { caffe_cpu_copy(top[0]->count(), top_diff, bottom_diff); } } }
void ConcatLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { if (bottom.size() == 1) { return; } Dtype* top_data = top[0]->mutable_cpu_data(); int offset_concat_axis = 0; const int top_concat_axis = top[0]->shape(concat_axis_); for (int i = 0; i < bottom.size(); ++i) { const Dtype* bottom_data = bottom[i]->cpu_data(); const int bottom_concat_axis = bottom[i]->shape(concat_axis_); for (int n = 0; n < num_concats_; ++n) { caffe_cpu_copy(bottom_concat_axis * concat_input_size_, bottom_data + n * bottom_concat_axis * concat_input_size_, top_data + (n * top_concat_axis + offset_concat_axis) * concat_input_size_); } offset_concat_axis += bottom_concat_axis; } }
void SliceLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (!propagate_down[0] || top.size() == 1) { return; } int_tp offset_slice_axis = 0; Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); const int_tp bottom_slice_axis = bottom[0]->shape(slice_axis_); for (int_tp i = 0; i < top.size(); ++i) { const Dtype* top_diff = top[i]->cpu_diff(); const int_tp top_slice_axis = top[i]->shape(slice_axis_); for (int_tp n = 0; n < num_slices_; ++n) { const int_tp top_offset = n * top_slice_axis * slice_size_; const int_tp bottom_offset = (n * bottom_slice_axis + offset_slice_axis) * slice_size_; caffe_cpu_copy(top_slice_axis * slice_size_, top_diff + top_offset, bottom_diff + bottom_offset); } offset_slice_axis += top_slice_axis; } }
void SliceLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { if (top.size() == 1) { return; } int_tp offset_slice_axis = 0; const Dtype* bottom_data = bottom[0]->cpu_data(); const int_tp bottom_slice_axis = bottom[0]->shape(slice_axis_); for (int_tp i = 0; i < top.size(); ++i) { Dtype* top_data = top[i]->mutable_cpu_data(); const int_tp top_slice_axis = top[i]->shape(slice_axis_); for (int_tp n = 0; n < num_slices_; ++n) { const int_tp top_offset = n * top_slice_axis * slice_size_; const int_tp bottom_offset = (n * bottom_slice_axis + offset_slice_axis) * slice_size_; caffe_cpu_copy(top_slice_axis * slice_size_, bottom_data + bottom_offset, top_data + top_offset); } offset_slice_axis += top_slice_axis; } }
void ConcatLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (bottom.size() == 1) { return; } const Dtype* top_diff = top[0]->cpu_diff(); int offset_concat_axis = 0; const int top_concat_axis = top[0]->shape(concat_axis_); for (int i = 0; i < bottom.size(); ++i) { const int bottom_concat_axis = bottom[i]->shape(concat_axis_); if (propagate_down[i]) { Dtype* bottom_diff = bottom[i]->mutable_cpu_diff(); for (int n = 0; n < num_concats_; ++n) { caffe_cpu_copy(bottom_concat_axis * concat_input_size_, top_diff + (n * top_concat_axis + offset_concat_axis) * concat_input_size_, bottom_diff + n * bottom_concat_axis * concat_input_size_); } } offset_concat_axis += bottom_concat_axis; } }
void EmbedLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); const Dtype* weight = this->blobs_[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); int index; for (int n = 0; n < M_; ++n) { index = static_cast<int>(bottom_data[n]); DCHECK_GE(index, 0); DCHECK_LT(index, K_); DCHECK_EQ(static_cast<Dtype>(index), bottom_data[n]) << "non-integer input"; caffe_cpu_copy(N_, weight + index * N_, top_data + n * N_); } if (bias_term_) { const Dtype* bias = this->blobs_[1]->cpu_data(); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1, Dtype(1), bias_multiplier_.cpu_data(), bias, Dtype(1), top_data); } }
void LogLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (!propagate_down[0]) { return; } const int count = bottom[0]->count(); const Dtype* bottom_data = bottom[0]->cpu_data(); const Dtype* top_diff = top[0]->cpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); caffe_cpu_copy(count, bottom_data, bottom_diff); if (input_scale_ != Dtype(1)) { caffe_scal(count, input_scale_, bottom_diff); } if (input_shift_ != Dtype(0)) { caffe_add_scalar(count, input_shift_, bottom_diff); } caffe_powx(count, bottom_diff, Dtype(-1), bottom_diff); if (backward_num_scale_ != Dtype(1)) { caffe_scal(count, backward_num_scale_, bottom_diff); } caffe_mul(count, top_diff, bottom_diff, bottom_diff); }
void LogLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const int count = bottom[0]->count(); const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); if (input_scale_ == Dtype(1) && input_shift_ == Dtype(0)) { caffe_log(count, bottom_data, top_data); } else { caffe_cpu_copy(count, bottom_data, top_data); if (input_scale_ != Dtype(1)) { caffe_scal(count, input_scale_, top_data); } if (input_shift_ != Dtype(0)) { caffe_add_scalar(count, input_shift_, top_data); } caffe_log(count, top_data, top_data); } if (base_scale_ != Dtype(1)) { caffe_scal(count, base_scale_, top_data); } }
void PowerLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { Dtype* top_data = top[0]->mutable_cpu_data(); const int count = bottom[0]->count(); // Special case where we can ignore the input: scale or power is 0. if (diff_scale_ == Dtype(0)) { Dtype value = (power_ == 0) ? Dtype(1) : pow(shift_, power_); caffe_set(count, value, top_data); return; } const Dtype* bottom_data = bottom[0]->cpu_data(); caffe_cpu_copy(count, bottom_data, top_data); if (scale_ != Dtype(1)) { caffe_scal(count, scale_, top_data); } if (shift_ != Dtype(0)) { caffe_add_scalar(count, shift_, top_data); } if (power_ != Dtype(1)) { caffe_powx(count, top_data, power_, top_data); } }
void FilterLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[bottom.size() - 1]) { LOG(FATAL) << this->type() << "Layer cannot backpropagate to filter index inputs"; } for (int i = 0; i < top.size(); i++) { // bottom[last] is the selector and never needs backpropagation // so we can iterate over top vector because top.size() == bottom.size() -1 if (propagate_down[i]) { const int dim = top[i]->count() / top[i]->shape(0); int next_to_backward_offset = 0; int batch_offset = 0; int data_offset_bottom = 0; int data_offset_top = 0; for (int n = 0; n < bottom[i]->shape(0); n++) { data_offset_bottom = n * dim; if (next_to_backward_offset >= indices_to_forward_.size()) { // we already visited all items that were been forwarded, so // just set to zero remaining ones caffe_set(dim, Dtype(0), bottom[i]->mutable_cpu_diff() + data_offset_bottom); } else { batch_offset = indices_to_forward_[next_to_backward_offset]; if (n != batch_offset) { // this data was not been forwarded caffe_set(dim, Dtype(0), bottom[i]->mutable_cpu_diff() + data_offset_bottom); } else { // this data was been forwarded data_offset_top = next_to_backward_offset * dim; next_to_backward_offset++; // point to next forwarded item index caffe_cpu_copy(dim, top[i]->mutable_cpu_diff() + data_offset_top, bottom[i]->mutable_cpu_diff() + data_offset_bottom); } } } } } }
void PReLULayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); const int count = bottom[0]->count(); const int dim = bottom[0]->count(2); const int channels = bottom[0]->channels(); const Dtype* slope_data = this->blobs_[0]->cpu_data(); // For in-place computation if (bottom[0] == top[0]) { caffe_cpu_copy(count, bottom_data, bottom_memory_.mutable_cpu_data()); } // if channel_shared, channel index in the following computation becomes // always zero. const int div_factor = channel_shared_ ? channels : 1; for (int i = 0; i < count; ++i) { int c = (i / dim) % channels / div_factor; top_data[i] = std::max(bottom_data[i], Dtype(0)) + slope_data[c] * std::min(bottom_data[i], Dtype(0)); } }
TYPED_TEST(GemmTest, TestGemmCPUGPU) { DeviceContext *dc = Caffe::GetDefaultDeviceContext(); Blob<TypeParam> A(1, 1, 2, 3, Caffe::GetDefaultDeviceContext()); Blob<TypeParam> B(1, 1, 3, 4, Caffe::GetDefaultDeviceContext()); Blob<TypeParam> C(1, 1, 2, 4, Caffe::GetDefaultDeviceContext()); TypeParam data[12] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; TypeParam A_reshape_data[6] = {1, 4, 2, 5, 3, 6}; TypeParam B_reshape_data[12] = {1, 5, 9, 2, 6, 10, 3, 7, 11, 4, 8, 12}; TypeParam result[8] = {38, 44, 50, 56, 83, 98, 113, 128}; caffe_cpu_copy(6, data, A.mutable_cpu_data()); caffe_cpu_copy(12, data, B.mutable_cpu_data()); // [1, 2, 3; 4 5 6] * [1, 2, 3, 4; 5, 6, 7, 8; 9, 10, 11, 12]; caffe_cpu_gemm<TypeParam>(CblasNoTrans, CblasNoTrans, 2, 4, 3, 1., A.cpu_data(), B.cpu_data(), 0., C.mutable_cpu_data()); for (int i = 0; i < 8; ++i) { EXPECT_EQ(C.cpu_data()[i], result[i]); } if (dc->backend() == BACKEND_CUDA) { #ifdef USE_CUDA caffe_gpu_gemm<TypeParam>(CblasNoTrans, CblasNoTrans, 2, 4, 3, 1., A.gpu_data(), B.gpu_data(), 0., C.mutable_gpu_data()); #endif // USE_CUDA } else { #ifdef USE_GREENTEA greentea_gpu_gemm<TypeParam>(dc->id(), CblasNoTrans, CblasNoTrans, 2, 4, 3, 1., (cl_mem)(A.gpu_data()), 0, (cl_mem)(B.gpu_data()), 0, 0., (cl_mem)(C.mutable_gpu_data()), 0); #endif // USE_GREENTEA } for (int i = 0; i < 8; ++i) { EXPECT_EQ(C.cpu_data()[i], result[i]); } // Test when we have a transposed A A.Reshape(1, 1, 3, 2); caffe_cpu_copy(6, A_reshape_data, A.mutable_cpu_data()); caffe_cpu_gemm<TypeParam>(CblasTrans, CblasNoTrans, 2, 4, 3, 1., A.cpu_data(), B.cpu_data(), 0., C.mutable_cpu_data()); for (int i = 0; i < 8; ++i) { EXPECT_EQ(C.cpu_data()[i], result[i]); } if (dc->backend() == BACKEND_CUDA) { #ifdef USE_CUDA caffe_gpu_gemm<TypeParam>(CblasTrans, CblasNoTrans, 2, 4, 3, 1., A.gpu_data(), B.gpu_data(), 0., C.mutable_gpu_data()); #endif // USE_CUDA } else { #ifdef USE_GREENTEA greentea_gpu_gemm<TypeParam>(dc->id(), CblasTrans, CblasNoTrans, 2, 4, 3, 1., (cl_mem)(A.gpu_data()), 0, (cl_mem)(B.gpu_data()), 0, 0., (cl_mem)(C.mutable_gpu_data()), 0); #endif // USE_GREENTEA } for (int i = 0; i < 8; ++i) { EXPECT_EQ(C.cpu_data()[i], result[i]); } // Test when we have a transposed A and a transposed B too B.Reshape(1, 1, 4, 3); caffe_cpu_copy(12, B_reshape_data, B.mutable_cpu_data()); caffe_cpu_gemm<TypeParam>(CblasTrans, CblasTrans, 2, 4, 3, 1., A.cpu_data(), B.cpu_data(), 0., C.mutable_cpu_data()); for (int i = 0; i < 8; ++i) { EXPECT_EQ(C.cpu_data()[i], result[i]); } if (dc->backend() == BACKEND_CUDA) { #ifdef USE_CUDA caffe_gpu_gemm<TypeParam>(CblasTrans, CblasTrans, 2, 4, 3, 1., A.gpu_data(), B.gpu_data(), 0., C.mutable_gpu_data()); #endif // USE_CUDA } else { #ifdef USE_GREENTEA greentea_gpu_gemm<TypeParam>(dc->id(), CblasTrans, CblasTrans, 2, 4, 3, 1., (cl_mem)(A.gpu_data()), 0, (cl_mem)(B.gpu_data()), 0, 0., (cl_mem)(C.mutable_gpu_data()), 0); #endif // USE_GREENTEA } for (int i = 0; i < 8; ++i) { EXPECT_EQ(C.cpu_data()[i], result[i]); } // Test when we have a transposed B A.Reshape(1, 1, 2, 3); caffe_cpu_copy(6, data, A.mutable_cpu_data()); caffe_cpu_gemm<TypeParam>(CblasNoTrans, CblasTrans, 2, 4, 3, 1., A.cpu_data(), B.cpu_data(), 0., C.mutable_cpu_data()); for (int i = 0; i < 8; ++i) { EXPECT_EQ(C.cpu_data()[i], result[i]); } if (dc->backend() == BACKEND_CUDA) { #ifdef USE_CUDA caffe_gpu_gemm<TypeParam>(CblasNoTrans, CblasTrans, 2, 4, 3, 1., A.gpu_data(), B.gpu_data(), 0., C.mutable_gpu_data()); #endif // USE_CUDA } else { #ifdef USE_GREENTEA greentea_gpu_gemm<TypeParam>(dc->id(), CblasNoTrans, CblasTrans, 2, 4, 3, 1., (cl_mem)(A.gpu_data()), 0, (cl_mem)(B.gpu_data()), 0, 0., (cl_mem)(C.mutable_gpu_data()), 0); #endif // USE_GREENTEA } for (int i = 0; i < 8; ++i) { EXPECT_EQ(C.cpu_data()[i], result[i]); } }
TYPED_TEST(GemmTest, TestGemvCPUGPU) { DeviceContext *dc = Caffe::GetDefaultDeviceContext(); Blob<TypeParam> A(1, 1, 2, 3, Caffe::GetDefaultDeviceContext()); Blob<TypeParam> x(1, 1, 1, 3, Caffe::GetDefaultDeviceContext()); Blob<TypeParam> y(1, 1, 1, 2, Caffe::GetDefaultDeviceContext()); TypeParam data[6] = {1, 2, 3, 4, 5, 6}; TypeParam result_2[2] = {14, 32}; TypeParam result_3[3] = {9, 12, 15}; caffe_cpu_copy(6, data, A.mutable_cpu_data()); caffe_cpu_copy(3, data, x.mutable_cpu_data()); caffe_cpu_gemv<TypeParam>(CblasNoTrans, 2, 3, 1., A.cpu_data(), x.cpu_data(), 0., y.mutable_cpu_data()); for (int i = 0; i < 2; ++i) { EXPECT_EQ(y.cpu_data()[i], result_2[i]); } if (dc->backend() == BACKEND_CUDA) { #ifdef USE_CUDA caffe_gpu_gemv<TypeParam>(CblasNoTrans, 2, 3, 1., A.gpu_data(), x.gpu_data(), 0., y.mutable_gpu_data()); #endif // USE_CUDA } else { #ifdef USE_GREENTEA greentea_gpu_gemv<TypeParam>(dc->id(), CblasNoTrans, 2, 3, 1., (cl_mem)(A.gpu_data()), 0, (cl_mem)(x.gpu_data()), 0, 0., (cl_mem)(y.mutable_gpu_data()), 0); #endif // USE_GREENTEA } for (int i = 0; i < 2; ++i) { EXPECT_EQ(y.cpu_data()[i], result_2[i]); } // Test transpose case caffe_cpu_copy(2, data, y.mutable_cpu_data()); caffe_cpu_gemv<TypeParam>(CblasTrans, 2, 3, 1., A.cpu_data(), y.cpu_data(), 0., x.mutable_cpu_data()); for (int i = 0; i < 3; ++i) { EXPECT_EQ(x.cpu_data()[i], result_3[i]); } if (dc->backend() == BACKEND_CUDA) { #ifdef USE_CUDA caffe_gpu_gemv<TypeParam>(CblasTrans, 2, 3, 1., A.gpu_data(), y.gpu_data(), 0., x.mutable_gpu_data()); #endif // USE_CUDA } else { #ifdef USE_GREENTEA greentea_gpu_gemv<TypeParam>(dc->id(), CblasTrans, 2, 3, 1., (cl_mem)(A.gpu_data()), 0, (cl_mem)(y.gpu_data()), 0, 0., (cl_mem)(x.mutable_gpu_data()), 0); #endif // USE_GREENTEA } for (int i = 0; i < 3; ++i) { EXPECT_EQ(x.cpu_data()[i], result_3[i]); } }
void GradientChecker<Dtype>::CheckGradientSingle( Layer<Dtype>* layer, const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top, int_tp check_bottom, int_tp top_id, int_tp top_data_id, bool element_wise) { if (element_wise) { CHECK_EQ(0, layer->blobs().size()); CHECK_LE(0, top_id); CHECK_LE(0, top_data_id); const int_tp top_count = top[top_id]->count(); for (int_tp blob_id = 0; blob_id < bottom.size(); ++blob_id) { CHECK_EQ(top_count, bottom[blob_id]->count()); } } // First, figure out what blobs we need to check against, and zero init // parameter blobs. vector<Blob<Dtype>*> blobs_to_check; vector<bool> propagate_down(bottom.size(), check_bottom == -1); for (int_tp i = 0; i < layer->blobs().size(); ++i) { Blob<Dtype>* blob = layer->blobs()[i].get(); caffe_set(blob->count(), static_cast<Dtype>(0), blob->mutable_cpu_diff()); blobs_to_check.push_back(blob); } if (check_bottom == -1) { for (int_tp i = 0; i < bottom.size(); ++i) { blobs_to_check.push_back(bottom[i]); } } else if (check_bottom >= 0) { CHECK_LT(check_bottom, bottom.size()); blobs_to_check.push_back(bottom[check_bottom]); propagate_down[check_bottom] = true; } CHECK_GT(blobs_to_check.size(), 0)<< "No blobs to check."; // Compute the gradient analytically using Backward Caffe::set_random_seed(seed_, Caffe::GetDefaultDevice()); // Ignore the loss from the layer (it's just the weighted sum of the losses // from the top blobs, whose gradients we may want to test individually). layer->Forward(bottom, top); // Get additional loss from the objective GetObjAndGradient(*layer, top, top_id, top_data_id); layer->Backward(top, propagate_down, bottom); // Store computed gradients for all checked blobs vector<shared_ptr<Blob<Dtype> > > computed_gradient_blobs( blobs_to_check.size()); for (int_tp blob_id = 0; blob_id < blobs_to_check.size(); ++blob_id) { Blob<Dtype>* current_blob = blobs_to_check[blob_id]; computed_gradient_blobs[blob_id].reset(new Blob<Dtype>()); computed_gradient_blobs[blob_id]->ReshapeLike(*current_blob); const int_tp count = blobs_to_check[blob_id]->count(); const Dtype* diff = blobs_to_check[blob_id]->cpu_diff(); Dtype* computed_gradients = computed_gradient_blobs[blob_id] ->mutable_cpu_data(); caffe_cpu_copy(count, diff, computed_gradients); } // Compute derivative of top w.r.t. each bottom and parameter input using // finite differencing. // LOG(ERROR) << "Checking " << blobs_to_check.size() << " blobs."; for (int_tp blob_id = 0; blob_id < blobs_to_check.size(); ++blob_id) { Blob<Dtype>* current_blob = blobs_to_check[blob_id]; const Dtype* computed_gradients = computed_gradient_blobs[blob_id]->cpu_data(); // LOG(ERROR) << "Blob " << blob_id << ": checking " // << current_blob->count() << " parameters."; for (int_tp feat_id = 0; feat_id < current_blob->count(); ++feat_id) { // For an element-wise layer, we only need to do finite differencing to // compute the derivative of top[top_id][top_data_id] w.r.t. // bottom[blob_id][i] only for i == top_data_id. For any other // i != top_data_id, we know the derivative is 0 by definition, and simply // check that that's true. Dtype estimated_gradient = 0; Dtype positive_objective = 0; Dtype negative_objective = 0; if (!element_wise || (feat_id == top_data_id)) { // Do finite differencing. // Compute loss with stepsize_ added to input. current_blob->mutable_cpu_data()[feat_id] += stepsize_; Caffe::set_random_seed(seed_, Caffe::GetDefaultDevice()); layer->Forward(bottom, top); positive_objective = GetObjAndGradient(*layer, top, top_id, top_data_id); // Compute loss with stepsize_ subtracted from input. current_blob->mutable_cpu_data()[feat_id] -= stepsize_ * 2; Caffe::set_random_seed(seed_, Caffe::GetDefaultDevice()); layer->Forward(bottom, top); negative_objective = GetObjAndGradient(*layer, top, top_id, top_data_id); // Recover original input value. current_blob->mutable_cpu_data()[feat_id] += stepsize_; estimated_gradient = (positive_objective - negative_objective) / stepsize_ / 2.; } Dtype computed_gradient = computed_gradients[feat_id]; Dtype feature = current_blob->cpu_data()[feat_id]; // LOG(ERROR) << "debug: " << current_blob->cpu_data()[feat_id] << " " // << current_blob->cpu_diff()[feat_id]; if (kink_ - kink_range_ > fabs(feature) || fabs(feature) > kink_ + kink_range_) { // We check relative accuracy, but for too small values, we threshold // the scale factor by 1. Dtype scale = std::max<Dtype>( std::max(fabs(computed_gradient), fabs(estimated_gradient)), Dtype(1.)); EXPECT_NEAR(computed_gradient, estimated_gradient, threshold_ * scale) << "debug: (top_id, top_data_id, blob_id, feat_id)=" << top_id << "," << top_data_id << "," << blob_id << "," << feat_id << "; feat = " << feature << "; objective+ = " << positive_objective << "; objective- = " << negative_objective; } // LOG(ERROR) << "Feature: " << current_blob->cpu_data()[feat_id]; // LOG(ERROR) << "computed gradient: " << computed_gradient // << " estimated_gradient: " << estimated_gradient; } } }