void BasePrefetchingDataLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { Batch<Dtype>* batch = prefetch_full_.pop("Data layer prefetch queue empty"); // Reshape to loaded data. top[0]->ReshapeLike(batch->data_); // Copy the data caffe_copy(batch->data_.count(), batch->data_.cpu_data(), top[0]->mutable_cpu_data()); DLOG(INFO) << "Prefetch copied"; if (this->output_labels_) { // Reshape to loaded labels. top[1]->ReshapeLike(batch->label_); // Copy the labels. caffe_copy(batch->label_.count(), batch->label_.cpu_data(), top[1]->mutable_cpu_data()); } prefetch_free_.push(batch); }
void LabelSpecificAutoLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (top[0] != bottom[0] && propagate_down[0]) { const Dtype* top_diff = top[0]->cpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); int count = bottom[0]->count(); caffe_copy(count, top_diff, bottom_diff); } }
TYPED_TEST(NeuronLayerTest, TestPReLUConsistencyReLU) { typedef typename TypeParam::Dtype Dtype; LayerParameter prelu_layer_param; LayerParameter relu_layer_param; relu_layer_param.mutable_relu_param()->set_negative_slope(0.25); PReLULayer<Dtype> prelu(prelu_layer_param); ReLULayer<Dtype> relu(relu_layer_param); // Set up blobs vector<Blob<Dtype>*> blob_bottom_vec_2; vector<Blob<Dtype>*> blob_top_vec_2; shared_ptr<Blob<Dtype> > blob_bottom_2(new Blob<Dtype>()); shared_ptr<Blob<Dtype> > blob_top_2(new Blob<Dtype>()); blob_bottom_vec_2.push_back(blob_bottom_2.get()); blob_top_vec_2.push_back(blob_top_2.get()); blob_bottom_2->CopyFrom(*this->blob_bottom_, false, true); // SetUp layers prelu.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); relu.SetUp(blob_bottom_vec_2, blob_top_vec_2); // Check forward prelu.Forward(this->blob_bottom_vec_, this->blob_top_vec_); relu.Forward(this->blob_bottom_vec_, blob_top_vec_2); for (int s = 0; s < blob_top_2->count(); ++s) { EXPECT_EQ(this->blob_top_->cpu_data()[s], blob_top_2->cpu_data()[s]); } // Check backward shared_ptr<Blob<Dtype> > tmp_blob(new Blob<Dtype>()); tmp_blob->ReshapeLike(*blob_top_2.get()); FillerParameter filler_param; GaussianFiller<Dtype> filler(filler_param); filler.Fill(tmp_blob.get()); caffe_copy(blob_top_2->count(), tmp_blob->cpu_data(), this->blob_top_->mutable_cpu_diff()); caffe_copy(blob_top_2->count(), tmp_blob->cpu_data(), blob_top_2->mutable_cpu_diff()); vector<bool> propagate_down; propagate_down.push_back(true); prelu.Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_); relu.Backward(blob_top_vec_2, propagate_down, blob_bottom_vec_2); for (int s = 0; s < blob_bottom_2->count(); ++s) { EXPECT_EQ(this->blob_bottom_->cpu_diff()[s], blob_bottom_2->cpu_diff()[s]); } }
void UnifiedLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // put child layers' data blobs together to form the final blob // then fill in the label_index blob int shift_data = 0; for (int i = 0; i < childlayer_num_; ++i) { caffe_copy(bottom[i]->count(), bottom[i]->cpu_data(), top[0]->mutable_cpu_data() + shift_data); shift_data += bottom[i]->count(); } }
void PowerLayer<Dtype>::Backward_gpu( const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[0]) { Dtype* bottom_diff = (bottom)[0]->mutable_gpu_diff(); const int count = (bottom)[0]->count(); const Dtype* top_diff = top[0]->gpu_diff(); if (diff_scale_ == Dtype(0) || power_ == Dtype(1)) { caffe_gpu_set(count, diff_scale_, bottom_diff); } else { const Dtype* bottom_data = (bottom)[0]->gpu_data(); // Compute dy/dx = scale * power * (shift + scale * x)^(power - 1) // = diff_scale * y / (shift + scale * x) if (power_ == Dtype(2)) { // Special case for y = (shift + scale * x)^2 // -> dy/dx = 2 * scale * (shift + scale * x) // = diff_scale * shift + diff_scale * scale * x caffe_gpu_axpby( count, diff_scale_ * scale_, bottom_data, Dtype(0), bottom_diff); if (shift_ != Dtype(0)) { caffe_gpu_add_scalar(count, diff_scale_ * shift_, bottom_diff); } } else if (shift_ == Dtype(0)) { // Special case for y = (scale * x)^power // -> dy/dx = scale * power * (scale * x)^(power - 1) // = scale * power * (scale * x)^power * (scale * x)^(-1) // = power * y / x const Dtype* top_data = top[0]->gpu_data(); caffe_gpu_div(count, top_data, bottom_data, bottom_diff); caffe_gpu_scal(count, power_, bottom_diff); } else { caffe_copy(count, bottom_data, bottom_diff); if (scale_ != Dtype(1)) { caffe_gpu_scal(count, scale_, bottom_diff); } if (shift_ != Dtype(0)) { caffe_gpu_add_scalar(count, shift_, bottom_diff); } const Dtype* top_data = top[0]->gpu_data(); caffe_gpu_div<Dtype>(count, top_data, bottom_diff, bottom_diff); if (diff_scale_ != Dtype(1)) { caffe_gpu_scal(count, diff_scale_, bottom_diff); } } } caffe_gpu_mul(count, top_diff, bottom_diff, bottom_diff); } }
void SumLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); caffe_copy(bottom[0]->count(), bottom_data, top_data); for (int i = 1; i < bottom.size(); ++i) { const Dtype* bottom_data_i = bottom[i]->cpu_data(); caffe_cpu_axpby(bottom[0]->count(), Dtype(1.0), bottom_data_i, Dtype(1.0), top_data); } }
Dtype DataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) { // First, join the thread // First, join the thread 等待线程结束 JoinPrefetchThread(); // Copy the data // Copy the data拷贝数据到top,即该层的输出 caffe_copy(prefetch_data_->count(), prefetch_data_->cpu_data(), (*top)[0]->mutable_cpu_data()); if (output_labels_) { caffe_copy(prefetch_label_->count(), prefetch_label_->cpu_data(), (*top)[1]->mutable_cpu_data()); } // Start a new prefetch thread CreatePrefetchThread(); return Dtype(0.); }
void HDF5OutputLayer<Dtype, MItype, MOtype>::Forward_cpu( const vector<Blob<MItype>*>& bottom, const vector<Blob<MOtype>*>& top) { CHECK_GE(bottom.size(), 2); CHECK_EQ(bottom[0]->num(), bottom[1]->num()); data_blob_.Reshape(bottom[0]->num(), bottom[0]->channels(), bottom[0]->height(), bottom[0]->width()); label_blob_.Reshape(bottom[1]->num(), bottom[1]->channels(), bottom[1]->height(), bottom[1]->width()); const int_tp data_datum_dim = bottom[0]->count() / bottom[0]->num(); const int_tp label_datum_dim = bottom[1]->count() / bottom[1]->num(); for (int_tp i = 0; i < bottom[0]->num(); ++i) { caffe_copy(data_datum_dim, &bottom[0]->cpu_data()[i * data_datum_dim], &data_blob_.mutable_cpu_data()[i * data_datum_dim]); caffe_copy(label_datum_dim, &bottom[1]->cpu_data()[i * label_datum_dim], &label_blob_.mutable_cpu_data()[i * label_datum_dim]); } SaveBlobs(); }
void BasePrefetchingDataLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // First, join the thread JoinPrefetchThread(); DLOG(INFO) << "Thread joined"; // Reshape to loaded data. top[0]->Reshape(this->prefetch_data_.num(), this->prefetch_data_.channels(), this->prefetch_data_.height(), this->prefetch_data_.width()); // Copy the data caffe_copy(prefetch_data_.count(), prefetch_data_.cpu_data(), top[0]->mutable_cpu_data()); DLOG(INFO) << "Prefetch copied"; if (this->output_labels_) { caffe_copy(prefetch_label_.count(), prefetch_label_.cpu_data(), top[1]->mutable_cpu_data()); } // Start a new prefetch thread DLOG(INFO) << "CreatePrefetchThread"; CreatePrefetchThread(); }
void LastRowLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { const Dtype* top_diff = top[0]->cpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); int num = bottom[0]->shape(0); int num1 = bottom[0]->shape(1); int channels = bottom[0]->shape(2); bottom_diff += bottom[0]->offset(num - 1); caffe_copy(channels * num1, top_diff, bottom_diff); }
TYPED_TEST(MathFunctionsTest, TestCopyGPU) { const int n = this->blob_bottom_->count(); const TypeParam* bottom_data = this->blob_bottom_->gpu_data(); TypeParam* top_data = this->blob_top_->mutable_gpu_data(); Caffe::set_mode(Caffe::GPU); caffe_copy(n, bottom_data, top_data); bottom_data = this->blob_bottom_->cpu_data(); top_data = this->blob_top_->mutable_cpu_data(); for (int i = 0; i < n; ++i) { EXPECT_EQ(bottom_data[i], top_data[i]); } }
void IgnoreOverlayLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { caffe_copy(bottom[1]->count(), bottom[1]->cpu_data(), top[0]->mutable_cpu_data()); const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); for (int i = 0; i < bottom[0]->count(); ++i) { const int value = bottom_data[i]; if (value == ignore_label_) { top_data[i] = static_cast<Dtype>(value); } } }
void TileLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); for (int i = 0; i < outer_dim_; ++i) { for (int t = 0; t < tiles_; ++t) { caffe_copy(inner_dim_, bottom_data, top_data); top_data += inner_dim_; } bottom_data += inner_dim_; } }
void SelectLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { Dtype* top_data = top[0]->mutable_cpu_data(); const Dtype* select_data = bottom[num_cand_]->cpu_data(); for (int i = 0; i < outer_dim_; ++i) { const int index = static_cast<int>(select_data[i]); DCHECK_GE(index, 0); DCHECK_LT(index, num_cand_); caffe_copy(inner_dim_, bottom[index]->cpu_data() + inner_dim_*i, top_data); top_data += inner_dim_; } }
void SumLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { for (int i=0; i<bottom.size(); i++){ if(propagate_down[i]) { const Dtype* top_diff=top[0]->cpu_diff(); Dtype* bottom_diff=bottom[i]->mutable_cpu_diff(); caffe_copy(top[0]->count(), top_diff, bottom_diff); } } }
void Tensor<Dtype>::CopyChunkFrom(const Tensor& source, int count, int this_offset, int other_offset) { ASSERT(source.count() >= count + other_offset, "Chunk exceeds source memory: " << count << " + " << other_offset << " > " << source.count()); ASSERT(this->count() >= count + this_offset, "Chunk exceeds target memory: " << count << " + " << this_offset << " > " << this->count()); switch (mode()) { case Caffe::CPU: caffe_copy(count, source.cpu_mem() + other_offset, mutable_cpu_mem() + this_offset); break; case Caffe::GPU: caffe_copy(count, source.gpu_mem() + other_offset, mutable_gpu_mem() + this_offset); break; default: LOG(FATAL) << "Unknown caffe mode."; } }
void BasePrefetchingInteractionDataLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) { // LOG(INFO) << "Forward_cpu"; // First, join the thread JoinPrefetchThread(); // Copy the data caffe_copy(prefetch_data_.count(), prefetch_data_.cpu_data(), (*top)[0]->mutable_cpu_data()); if (this->output_labels_) { caffe_copy(prefetch_label_.count(), prefetch_label_.cpu_data(), (*top)[1]->mutable_cpu_data()); } caffe_copy(prefetch_itact_data_.count(), prefetch_itact_data_.cpu_data(), (*top)[2]->mutable_cpu_data()); caffe_copy(prefetch_itact_label_.count(), prefetch_itact_label_.cpu_data(), (*top)[3]->mutable_cpu_data()); caffe_copy(prefetch_itact_count_.count(), prefetch_itact_count_.cpu_data(), (*top)[4]->mutable_cpu_data()); // Start a new prefetch thread CreatePrefetchThread(); }
void LastRowLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); int num = bottom[0]->shape(0); int num1 = bottom[0]->shape(1); int channels = bottom[0]->shape(2); bottom_data += bottom[0]->offset(num - 1); caffe_copy(channels * num1, bottom_data, top_data); }
virtual void SetUp() { Caffe::set_random_seed(1701); blob_bottom_->Reshape(2, 5, 2, 2); Dtype* bottom_data = blob_bottom_->mutable_cpu_data(); Dtype data[] = { 7, 4, 29, 22, 20, 26, 10, 21, 36, 39, 12, 11, 24, 37, 15, 8, 31, 34, 27, 5, 0, 30, 14, 16, 1, 6, 13, 3, 23, 28, 9, 2, 32, 38, 19, 17, 25, 35, 18, 33 }; caffe_copy(blob_bottom_->count(), data, bottom_data); blob_bottom_vec_.push_back(blob_bottom_); blob_top_vec_.push_back(blob_top_); Dtype t1_data[] = { 0, 0, 29, 22, 0, 0, 0, 0, 36, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 38, 19, 0, 0, 0, 0, 33 }; Dtype t3_data[] = { 0, 0, 29, 22, 0, 0, 0, 21, 36, 39, 0, 11, 24, 37, 15, 0, 31, 34, 27, 0, 0, 30, 14, 16, 0, 0, 0, 0, 23, 0, 0, 0, 32, 38, 19, 17, 25, 35, 18, 33 }; t1->ReshapeLike(*blob_bottom_); t3->ReshapeLike(*blob_bottom_); t5->ReshapeLike(*blob_bottom_); caffe_copy(t1->count(), t1_data, t1->mutable_cpu_data()); caffe_copy(t3->count(), t3_data, t3->mutable_cpu_data()); caffe_copy(t5->count(), data, t5->mutable_cpu_data()); }
void ConvolutionRistrettoLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // Trim layer input if (this->phase_ == TEST) { for (int i = 0; i < bottom.size(); ++i) { this->QuantizeLayerInputs_cpu(bottom[i]->mutable_cpu_data(), bottom[i]->count()); } } // Trim weights caffe_copy(this->blobs_[0]->count(), this->blobs_[0]->cpu_data(), this->weights_quantized_[0]->mutable_cpu_data()); if (this->bias_term_) { caffe_copy(this->blobs_[1]->count(), this->blobs_[1]->cpu_data(), this->weights_quantized_[1]->mutable_cpu_data()); } int rounding = this->phase_ == TEST ? this->rounding_ : QuantizationParameter_Rounding_STOCHASTIC; this->QuantizeWeights_cpu(this->weights_quantized_, rounding, this->bias_term_); // Do forward propagation const Dtype* weight = this->weights_quantized_[0]->cpu_data(); for (int i = 0; i < bottom.size(); ++i) { const Dtype* bottom_data = bottom[i]->cpu_data(); Dtype* top_data = top[i]->mutable_cpu_data(); for (int n = 0; n < this->num_; ++n) { this->forward_cpu_gemm(bottom_data + n * this->bottom_dim_, weight, top_data + n * this->top_dim_); if (this->bias_term_) { const Dtype* bias = this->weights_quantized_[1]->cpu_data(); this->forward_cpu_bias(top_data + n * this->top_dim_, bias); } } // Trim layer output if (this->phase_ == TEST) { this->QuantizeLayerOutputs_cpu(top_data, top[i]->count()); } } }
void CropLayer<Dtype>::crop_copy(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top, const int* offsets, vector<int> indices, int cur_dim, const Dtype* src_data, Dtype* dest_data, bool is_forward) { if (cur_dim + 1 < top[0]->num_axes()) { // We are not yet at the final dimension, call copy recursively for (int i = 0; i < top[0]->shape(cur_dim); ++i) { indices[cur_dim] = i; crop_copy(bottom, top, offsets, indices, cur_dim+1, src_data, dest_data, is_forward); } } else { // We are at the last dimensions, which is stored continuously in memory // prepare index vector reduced(red) and with offsets(off) std::vector<int> ind_red(cur_dim, 0); std::vector<int> ind_off(cur_dim+1, 0); for (int j = 0; j < cur_dim; ++j) { ind_red[j] = indices[j]; ind_off[j] = indices[j] + offsets[j]; } ind_off[cur_dim] = offsets[cur_dim]; // do the copy if (is_forward) { caffe_copy(top[0]->shape(cur_dim), src_data + bottom[0]->offset(ind_off), dest_data + top[0]->offset(ind_red)); } else { // in the backwards pass the src_data is top_diff // and the dest_data is bottom_diff caffe_copy(top[0]->shape(cur_dim), src_data + top[0]->offset(ind_red), dest_data + bottom[0]->offset(ind_off)); } } }
void MaskingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { CHECK_GE(this->blobs_.size(), 1); // TODO: check gradient formulas (http://ufldl.stanford.edu/tutorial/supervised/MultiLayerNeuralNetworks/) if (stable_prod_grad_) { if (propagate_down[0]) { // Gradient with respect to bottom data caffe_mul(top[0]->count(), this->blobs_[0]->cpu_data(), top[0]->cpu_diff(), bottom[0]->mutable_cpu_diff()); // d_i = d_(i+1) .* w } // Gradient with respect to weights caffe_mul(top[0]->count(), bottom[0]->cpu_data(), top[0]->cpu_diff(), this->blobs_[0]->mutable_cpu_diff()); // d_i = d_(i+1) .* in // Gradient with respect to bias if (bias_term_) { // TODO: check whether there are any smart pointer tricks which can replace the copying overhead caffe_copy(top[0]->count(), top[0]->cpu_diff(), this->blobs_[1]->mutable_cpu_diff()); // d_i = d_(i+1) } } else { // less stable gradient computation method inspired by elementwise layer, this is just for comparison/debugging purposes if (propagate_down[0]) { // Gradient with respect to bottom data caffe_div(top[0]->count(), top[0]->cpu_data(), bottom[0]->cpu_data(), bottom[0]->mutable_cpu_diff()); caffe_mul(top[0]->count(), bottom[0]->cpu_diff(), top[0]->cpu_diff(), bottom[0]->mutable_cpu_diff()); } // Gradient with respect to weights caffe_div(top[0]->count(), top[0]->cpu_data(), this->blobs_[0]->cpu_data(), this->blobs_[0]->mutable_cpu_diff()); caffe_mul(top[0]->count(), this->blobs_[0]->cpu_diff(), top[0]->cpu_diff(), this->blobs_[0]->mutable_cpu_diff()); // Gradient with respect to bias if (bias_term_) { caffe_copy(top[0]->count(), top[0]->cpu_diff(), this->blobs_[1]->mutable_cpu_diff()); // d_i = d_(i+1) } } }
void RecurrentLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { DCHECK_EQ(recur_input_blobs_.size(), recur_output_blobs_.size()); for (int i = 0; i < recur_input_blobs_.size(); ++i) { const int count = recur_input_blobs_[i]->count(); DCHECK_EQ(count, recur_output_blobs_[i]->count()); const Dtype* timestep_T_data = recur_output_blobs_[i]->cpu_data(); Dtype* timestep_0_data = recur_input_blobs_[i]->mutable_cpu_data(); caffe_copy(count, timestep_T_data, timestep_0_data); } unrolled_net_->ForwardPrefilled(); }
void UnifiedLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { // copy back diff, dispatch them to child layers, scale them back int shift_diff = 0; for (int i = 0; i < childlayer_num_; ++i) { // dispatch diff: diff * (batch_size_sum/batch_size[i]) caffe_copy(bottom[i]->count(), top[0]->cpu_diff() + shift_diff, bottom[i]->mutable_cpu_diff()); bottom[i]->scale_diff(static_cast<Dtype>(label_index_sum_) / label_index_[i]); shift_diff += bottom[i]->count(); } }
TYPED_TEST(GemmTest, TestGemvCPUGPU) { Blob<TypeParam> A(1, 1, 2, 3); Blob<TypeParam> x(1, 1, 1, 3); Blob<TypeParam> y(1, 1, 1, 2); TypeParam data[6] = {1, 2, 3, 4, 5, 6}; TypeParam result_2[2] = {14, 32}; TypeParam result_3[3] = {9, 12, 15}; caffe_copy(6, data, A.mutable_cpu_data()); caffe_copy(3, data, x.mutable_cpu_data()); if (sizeof(TypeParam) == 4 || CAFFE_TEST_CUDA_PROP.major >= 2) { caffe_cpu_gemv<TypeParam>(CblasNoTrans, 2, 3, 1., A.cpu_data(), x.cpu_data(), 0., y.mutable_cpu_data()); for (int i = 0; i < 2; ++i) { EXPECT_EQ(y.cpu_data()[i], result_2[i]); } caffe_gpu_gemv<TypeParam>(CblasNoTrans, 2, 3, 1., A.gpu_data(), x.gpu_data(), 0., y.mutable_gpu_data()); for (int i = 0; i < 2; ++i) { EXPECT_EQ(y.cpu_data()[i], result_2[i]); } // Test transpose case caffe_copy(2, data, y.mutable_cpu_data()); caffe_cpu_gemv<TypeParam>(CblasTrans, 2, 3, 1., A.cpu_data(), y.cpu_data(), 0., x.mutable_cpu_data()); for (int i = 0; i < 3; ++i) { EXPECT_EQ(x.cpu_data()[i], result_3[i]); } caffe_gpu_gemv<TypeParam>(CblasTrans, 2, 3, 1., A.gpu_data(), y.gpu_data(), 0., x.mutable_gpu_data()); for (int i = 0; i < 3; ++i) { EXPECT_EQ(x.cpu_data()[i], result_3[i]); } } else { LOG(ERROR) << "Skipping test due to old architecture."; } }
void Blob<Dtype>::CopyFrom(const Blob& source, bool copy_diff, bool reshape) { if (blob_mode_ == BlobProto_BlobMode_GLOBAL) { if (!copy_diff) { LOG(FATAL) << "Currently Petuum Caffe does not support " << "copying data to blobs with GLOBAL mode"; } // TODO: support CopyFrom( copy_diff == false ) } if (num_ != source.num() || channels_ != source.channels() || height_ != source.height() || width_ != source.width()) { if (reshape) { Reshape(source.num(), source.channels(), source.height(), source.width()); } else { LOG(FATAL) << "Trying to copy blobs of different sizes."; } } switch (Caffe::mode()) { case Caffe::GPU: if (copy_diff) { caffe_copy(count_, source.gpu_diff(), static_cast<Dtype*>(diff_->mutable_gpu_data())); } else { caffe_copy(count_, source.gpu_data(), static_cast<Dtype*>(data_->mutable_gpu_data())); } break; case Caffe::CPU: if (copy_diff) { caffe_copy(count_, source.cpu_diff(), static_cast<Dtype*>(diff_->mutable_cpu_data())); } else { caffe_copy(count_, source.cpu_data(), static_cast<Dtype*>(data_->mutable_cpu_data())); } break; default: LOG(FATAL) << "Unknown caffe mode."; } }
void CropLayer<Dtype>::crop_copy(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top, const vector<int>& offsets, const Dtype* src_data, Dtype* dest_data) { int last_dim = top[0]->num_axes() - 1; int copy_count = top[0]->count() / top[0]->shape(last_dim); #ifdef _OPENMP #pragma omp parallel for #endif for (int i = 0; i < copy_count; ++i) { // prepare index vector reduced(red) and with offsets(off) std::vector<int> ind_red(last_dim, 0); std::vector<int> ind_off(last_dim+1, 0); int cur_iteration = i; for (int j = last_dim - 1; j >=0; --j) { int index = cur_iteration % top[0]->shape(j); cur_iteration /= top[0]->shape(j); ind_red[j] = index; ind_off[j] = index + offsets[j]; } ind_off[last_dim] = offsets[last_dim]; // Last dimensions stored continously in memory // do the copy if (is_forward) { caffe_copy(top[0]->shape(last_dim), src_data + bottom[0]->offset(ind_off), dest_data + top[0]->offset(ind_red)); } else { // in the backwards pass the src_data is top_diff // and the dest_data is bottom_diff caffe_copy(top[0]->shape(last_dim), src_data + top[0]->offset(ind_red), dest_data + bottom[0]->offset(ind_off)); } } }
void SoftmaxLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); Dtype* scale_data = scale_.mutable_cpu_data(); int channels = bottom[0]->shape(softmax_axis_); int dim = bottom[0]->count() / outer_num_; caffe_copy(bottom[0]->count(), bottom_data, top_data); // We need to subtract the max to avoid numerical issues, compute the exp, // and then normalize. for (int i = 0; i < outer_num_; ++i) { //注意,inner_num_和dim的值并不相等,dim是inner_num_的倍数,为通道数channels倍 // initialize scale_data to the first plane caffe_copy(inner_num_, bottom_data + i * dim, scale_data); for (int j = 0; j < channels; j++) { for (int k = 0; k < inner_num_; k++) { scale_data[k] = std::max(scale_data[k], bottom_data[i * dim + j * inner_num_ + k]); } } //在每张图像中,沿着通道轴上取最大像素值。虽然scale_data所指向内存区域可存储的数据量为outer_num_×1×inner_num_,但每次也只是更新scale_data最前面的inner_num_个元素。scale_只是用来存储中间变量。 //在caffe_cpu_gemm函数中,从top_data所指的内存区域选取channels×inner_num_=dim个元素进行更新,其他函数类似 // subtraction caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, channels, inner_num_, 1, -1., sum_multiplier_.cpu_data(), scale_data, 1., top_data); // exponentiation caffe_exp<Dtype>(dim, top_data, top_data); // sum after exp caffe_cpu_gemv<Dtype>(CblasTrans, channels, inner_num_, 1., top_data, sum_multiplier_.cpu_data(), 0., scale_data); // division for (int j = 0; j < channels; j++) { caffe_div(inner_num_, top_data, scale_data, top_data); //更新top_data指针 top_data += inner_num_; } } }
void L2NormLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { const Dtype* top_diff = top[0]->cpu_diff(); const Dtype* top_data = top[0]->cpu_data(); const Dtype* norm_scale = norm_.cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); const int n = top[0]->num(); const int d = top[0]->count() / n; caffe_copy(bottom[0]->count(), top_diff, bottom_diff); for (int i=0; i<n; ++i) { Dtype a = caffe_cpu_dot(d, top_data+i*d, top_diff+i*d); caffe_cpu_axpby(d, Dtype(-1) * a * norm_scale[i], top_data + i*d, norm_scale[i], bottom_diff + i*d); } }
void CropLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); for (int n = 0; n < top[0]->num(); ++n) { for (int c = 0; c < top[0]->channels(); ++c) { for (int h = 0; h < top[0]->height(); ++h) { caffe_copy(top[0]->width(), bottom_data + bottom[0]->offset(n, c, crop_h_ + h, crop_w_), top_data + top[0]->offset(n, c, h)); } } } }